1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file xzdec.c 4 /// \brief Simple single-threaded tool to uncompress .xz or .lzma files 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "sysdefs.h" 14 #include "lzma.h" 15 16 #include <stdarg.h> 17 #include <errno.h> 18 #include <stdio.h> 19 #include <unistd.h> 20 21 #include "getopt.h" 22 #include "tuklib_progname.h" 23 #include "tuklib_exit.h" 24 25 #ifdef TUKLIB_DOSLIKE 26 # include <fcntl.h> 27 # include <io.h> 28 #endif 29 30 31 #ifdef LZMADEC 32 # define TOOL_FORMAT "lzma" 33 #else 34 # define TOOL_FORMAT "xz" 35 #endif 36 37 38 /// Number of bytes to use memory at maximum 39 static uint64_t memlimit; 40 41 /// Total amount of physical RAM 42 static uint64_t total_ram; 43 44 /// Error messages are suppressed if this is zero, which is the case when 45 /// --quiet has been given at least twice. 46 static unsigned int display_errors = 2; 47 48 49 static void lzma_attribute((format(printf, 1, 2))) 50 my_errorf(const char *fmt, ...) 51 { 52 va_list ap; 53 va_start(ap, fmt); 54 55 if (display_errors) { 56 fprintf(stderr, "%s: ", progname); 57 vfprintf(stderr, fmt, ap); 58 fprintf(stderr, "\n"); 59 } 60 61 va_end(ap); 62 return; 63 } 64 65 66 static void lzma_attribute((noreturn)) 67 help(void) 68 { 69 // Round up to the next MiB and do it correctly also with UINT64_MAX. 70 const uint64_t mem_mib = (memlimit >> 20) 71 + ((memlimit & ((UINT32_C(1) << 20) - 1)) != 0); 72 73 printf( 74 "Usage: %s [OPTION]... [FILE]...\n" 75 "Uncompress files in the ." TOOL_FORMAT " format to the standard output.\n" 76 "\n" 77 " -c, --stdout (ignored)\n" 78 " -d, --decompress (ignored)\n" 79 " -k, --keep (ignored)\n" 80 " -M, --memory=NUM use NUM bytes of memory at maximum (0 means default)\n" 81 " -q, --quiet specify *twice* to suppress errors\n" 82 " -Q, --no-warn (ignored)\n" 83 " -h, --help display this help and exit\n" 84 " -V, --version display the version number and exit\n" 85 "\n" 86 "With no FILE, or when FILE is -, read standard input.\n" 87 "\n" 88 "On this system and configuration, this program will use a maximum of roughly\n" 89 "%" PRIu64 " MiB RAM.\n" 90 "\n" 91 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n" 92 PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname, mem_mib); 93 tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); 94 } 95 96 97 static void lzma_attribute((noreturn)) 98 version(void) 99 { 100 printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n" 101 "liblzma %s\n", lzma_version_string()); 102 103 tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); 104 } 105 106 107 /// Find out the amount of physical memory (RAM) in the system, and set 108 /// the memory usage limit to the given percentage of RAM. 109 static void 110 memlimit_set_percentage(uint32_t percentage) 111 { 112 memlimit = percentage * total_ram / 100; 113 return; 114 } 115 116 117 /// Set the memory usage limit to give number of bytes. Zero is a special 118 /// value to indicate the default limit. 119 static void 120 memlimit_set(uint64_t new_memlimit) 121 { 122 if (new_memlimit != 0) { 123 memlimit = new_memlimit; 124 } else { 125 memlimit = 40 * total_ram / 100; 126 if (memlimit < UINT64_C(80) * 1024 * 1024) { 127 memlimit = 80 * total_ram / 100; 128 if (memlimit > UINT64_C(80) * 1024 * 1024) 129 memlimit = UINT64_C(80) * 1024 * 1024; 130 } 131 } 132 133 return; 134 } 135 136 137 /// Get the total amount of physical RAM and set the memory usage limit 138 /// to the default value. 139 static void 140 memlimit_init(void) 141 { 142 // If we cannot determine the amount of RAM, use the assumption 143 // defined by the configure script. 144 total_ram = lzma_physmem(); 145 if (total_ram == 0) 146 total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024; 147 148 memlimit_set(0); 149 return; 150 } 151 152 153 /// \brief Convert a string to uint64_t 154 /// 155 /// This is rudely copied from src/xz/util.c and modified a little. :-( 156 /// 157 /// \param max Return value when the string "max" was specified. 158 /// 159 static uint64_t 160 str_to_uint64(const char *value, uint64_t max) 161 { 162 uint64_t result = 0; 163 164 // Accept special value "max". 165 if (strcmp(value, "max") == 0) 166 return max; 167 168 if (*value < '0' || *value > '9') { 169 my_errorf("%s: Value is not a non-negative decimal integer", 170 value); 171 exit(EXIT_FAILURE); 172 } 173 174 do { 175 // Don't overflow. 176 if (result > (UINT64_MAX - 9) / 10) 177 return UINT64_MAX; 178 179 result *= 10; 180 result += *value - '0'; 181 ++value; 182 } while (*value >= '0' && *value <= '9'); 183 184 if (*value != '\0') { 185 // Look for suffix. 186 uint64_t multiplier = 0; 187 if (*value == 'k' || *value == 'K') 188 multiplier = UINT64_C(1) << 10; 189 else if (*value == 'm' || *value == 'M') 190 multiplier = UINT64_C(1) << 20; 191 else if (*value == 'g' || *value == 'G') 192 multiplier = UINT64_C(1) << 30; 193 194 ++value; 195 196 // Allow also e.g. Ki, KiB, and KB. 197 if (*value != '\0' && strcmp(value, "i") != 0 198 && strcmp(value, "iB") != 0 199 && strcmp(value, "B") != 0) 200 multiplier = 0; 201 202 if (multiplier == 0) { 203 my_errorf("%s: Invalid suffix", value - 1); 204 exit(EXIT_FAILURE); 205 } 206 207 // Don't overflow here either. 208 if (result > UINT64_MAX / multiplier) 209 result = UINT64_MAX; 210 else 211 result *= multiplier; 212 } 213 214 return result; 215 } 216 217 218 /// Parses command line options. 219 static void 220 parse_options(int argc, char **argv) 221 { 222 static const char short_opts[] = "cdkM:hqQV"; 223 static const struct option long_opts[] = { 224 { "stdout", no_argument, NULL, 'c' }, 225 { "to-stdout", no_argument, NULL, 'c' }, 226 { "decompress", no_argument, NULL, 'd' }, 227 { "uncompress", no_argument, NULL, 'd' }, 228 { "keep", no_argument, NULL, 'k' }, 229 { "memory", required_argument, NULL, 'M' }, 230 { "quiet", no_argument, NULL, 'q' }, 231 { "no-warn", no_argument, NULL, 'Q' }, 232 { "help", no_argument, NULL, 'h' }, 233 { "version", no_argument, NULL, 'V' }, 234 { NULL, 0, NULL, 0 } 235 }; 236 237 int c; 238 239 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) 240 != -1) { 241 switch (c) { 242 case 'c': 243 case 'd': 244 case 'k': 245 case 'Q': 246 break; 247 248 case 'M': { 249 // Support specifying the limit as a percentage of 250 // installed physical RAM. 251 const size_t len = strlen(optarg); 252 if (len > 0 && optarg[len - 1] == '%') { 253 // Memory limit is a percentage of total 254 // installed RAM. 255 optarg[len - 1] = '\0'; 256 const uint64_t percentage 257 = str_to_uint64(optarg, 100); 258 if (percentage < 1 || percentage > 100) { 259 my_errorf("Percentage must be in " 260 "the range [1, 100]"); 261 exit(EXIT_FAILURE); 262 } 263 264 memlimit_set_percentage(percentage); 265 } else { 266 memlimit_set(str_to_uint64( 267 optarg, UINT64_MAX)); 268 } 269 270 break; 271 } 272 273 case 'q': 274 if (display_errors > 0) 275 --display_errors; 276 277 break; 278 279 case 'h': 280 help(); 281 282 case 'V': 283 version(); 284 285 default: 286 exit(EXIT_FAILURE); 287 } 288 } 289 290 return; 291 } 292 293 294 static void 295 uncompress(lzma_stream *strm, FILE *file, const char *filename) 296 { 297 lzma_ret ret; 298 299 // Initialize the decoder 300 #ifdef LZMADEC 301 ret = lzma_alone_decoder(strm, memlimit); 302 #else 303 ret = lzma_stream_decoder(strm, memlimit, LZMA_CONCATENATED); 304 #endif 305 306 // The only reasonable error here is LZMA_MEM_ERROR. 307 // FIXME: Maybe also LZMA_MEMLIMIT_ERROR in future? 308 if (ret != LZMA_OK) { 309 my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM) 310 : "Internal error (bug)"); 311 exit(EXIT_FAILURE); 312 } 313 314 // Input and output buffers 315 uint8_t in_buf[BUFSIZ]; 316 uint8_t out_buf[BUFSIZ]; 317 318 strm->avail_in = 0; 319 strm->next_out = out_buf; 320 strm->avail_out = BUFSIZ; 321 322 lzma_action action = LZMA_RUN; 323 324 while (true) { 325 if (strm->avail_in == 0) { 326 strm->next_in = in_buf; 327 strm->avail_in = fread(in_buf, 1, BUFSIZ, file); 328 329 if (ferror(file)) { 330 // POSIX says that fread() sets errno if 331 // an error occurred. ferror() doesn't 332 // touch errno. 333 my_errorf("%s: Error reading input file: %s", 334 filename, strerror(errno)); 335 exit(EXIT_FAILURE); 336 } 337 338 #ifndef LZMADEC 339 // When using LZMA_CONCATENATED, we need to tell 340 // liblzma when it has got all the input. 341 if (feof(file)) 342 action = LZMA_FINISH; 343 #endif 344 } 345 346 ret = lzma_code(strm, action); 347 348 // Write and check write error before checking decoder error. 349 // This way as much data as possible gets written to output 350 // even if decoder detected an error. 351 if (strm->avail_out == 0 || ret != LZMA_OK) { 352 const size_t write_size = BUFSIZ - strm->avail_out; 353 354 if (fwrite(out_buf, 1, write_size, stdout) 355 != write_size) { 356 // Wouldn't be a surprise if writing to stderr 357 // would fail too but at least try to show an 358 // error message. 359 my_errorf("Cannot write to standard output: " 360 "%s", strerror(errno)); 361 exit(EXIT_FAILURE); 362 } 363 364 strm->next_out = out_buf; 365 strm->avail_out = BUFSIZ; 366 } 367 368 if (ret != LZMA_OK) { 369 if (ret == LZMA_STREAM_END) { 370 #ifdef LZMADEC 371 // Check that there's no trailing garbage. 372 if (strm->avail_in != 0 373 || fread(in_buf, 1, 1, file) 374 != 0 375 || !feof(file)) 376 ret = LZMA_DATA_ERROR; 377 else 378 return; 379 #else 380 // lzma_stream_decoder() already guarantees 381 // that there's no trailing garbage. 382 assert(strm->avail_in == 0); 383 assert(action == LZMA_FINISH); 384 assert(feof(file)); 385 return; 386 #endif 387 } 388 389 const char *msg; 390 switch (ret) { 391 case LZMA_MEM_ERROR: 392 msg = strerror(ENOMEM); 393 break; 394 395 case LZMA_MEMLIMIT_ERROR: 396 msg = "Memory usage limit reached"; 397 break; 398 399 case LZMA_FORMAT_ERROR: 400 msg = "File format not recognized"; 401 break; 402 403 case LZMA_OPTIONS_ERROR: 404 // FIXME: Better message? 405 msg = "Unsupported compression options"; 406 break; 407 408 case LZMA_DATA_ERROR: 409 msg = "File is corrupt"; 410 break; 411 412 case LZMA_BUF_ERROR: 413 msg = "Unexpected end of input"; 414 break; 415 416 default: 417 msg = "Internal error (bug)"; 418 break; 419 } 420 421 my_errorf("%s: %s", filename, msg); 422 exit(EXIT_FAILURE); 423 } 424 } 425 } 426 427 428 int 429 main(int argc, char **argv) 430 { 431 // Initialize progname which we will be used in error messages. 432 tuklib_progname_init(argv); 433 434 // Set the default memory usage limit. This is needed before parsing 435 // the command line arguments. 436 memlimit_init(); 437 438 // Parse the command line options. 439 parse_options(argc, argv); 440 441 // The same lzma_stream is used for all files that we decode. This way 442 // we don't need to reallocate memory for every file if they use same 443 // compression settings. 444 lzma_stream strm = LZMA_STREAM_INIT; 445 446 // Some systems require setting stdin and stdout to binary mode. 447 #ifdef TUKLIB_DOSLIKE 448 setmode(fileno(stdin), O_BINARY); 449 setmode(fileno(stdout), O_BINARY); 450 #endif 451 452 if (optind == argc) { 453 // No filenames given, decode from stdin. 454 uncompress(&strm, stdin, "(stdin)"); 455 } else { 456 // Loop through the filenames given on the command line. 457 do { 458 // "-" indicates stdin. 459 if (strcmp(argv[optind], "-") == 0) { 460 uncompress(&strm, stdin, "(stdin)"); 461 } else { 462 FILE *file = fopen(argv[optind], "rb"); 463 if (file == NULL) { 464 my_errorf("%s: %s", argv[optind], 465 strerror(errno)); 466 exit(EXIT_FAILURE); 467 } 468 469 uncompress(&strm, file, argv[optind]); 470 fclose(file); 471 } 472 } while (++optind < argc); 473 } 474 475 #ifndef NDEBUG 476 // Free the memory only when debugging. Freeing wastes some time, 477 // but allows detecting possible memory leaks with Valgrind. 478 lzma_end(&strm); 479 #endif 480 481 tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); 482 } 483