1 /* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2011 The NetBSD Foundation, Inc. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to The NetBSD Foundation 10 * by Christos Zoulas. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 #include <sys/cdefs.h> 34 #include <stdarg.h> 35 #include <errno.h> 36 #include <stdio.h> 37 #include <unistd.h> 38 #include <lzma.h> 39 40 static off_t 41 unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in) 42 { 43 lzma_stream strm = LZMA_STREAM_INIT; 44 static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED; 45 lzma_ret ret; 46 lzma_action action = LZMA_RUN; 47 off_t bytes_out, bp; 48 uint8_t ibuf[BUFSIZ]; 49 uint8_t obuf[BUFSIZ]; 50 51 if (bytes_in == NULL) 52 bytes_in = &bp; 53 54 strm.next_in = ibuf; 55 memcpy(ibuf, pre, prelen); 56 strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen); 57 if (strm.avail_in == (size_t)-1) 58 maybe_err("read failed"); 59 infile_newdata(strm.avail_in); 60 strm.avail_in += prelen; 61 *bytes_in = strm.avail_in; 62 63 if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK) 64 maybe_errx("Can't initialize decoder (%d)", ret); 65 66 strm.next_out = NULL; 67 strm.avail_out = 0; 68 if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK) 69 maybe_errx("Can't read headers (%d)", ret); 70 71 bytes_out = 0; 72 strm.next_out = obuf; 73 strm.avail_out = sizeof(obuf); 74 75 for (;;) { 76 check_siginfo(); 77 if (strm.avail_in == 0) { 78 strm.next_in = ibuf; 79 strm.avail_in = read(i, ibuf, sizeof(ibuf)); 80 switch (strm.avail_in) { 81 case (size_t)-1: 82 maybe_err("read failed"); 83 /*NOTREACHED*/ 84 case 0: 85 action = LZMA_FINISH; 86 break; 87 default: 88 infile_newdata(strm.avail_in); 89 *bytes_in += strm.avail_in; 90 break; 91 } 92 } 93 94 ret = lzma_code(&strm, action); 95 96 // Write and check write error before checking decoder error. 97 // This way as much data as possible gets written to output 98 // even if decoder detected an error. 99 if (strm.avail_out == 0 || ret != LZMA_OK) { 100 const size_t write_size = sizeof(obuf) - strm.avail_out; 101 102 if (write(o, obuf, write_size) != (ssize_t)write_size) 103 maybe_err("write failed"); 104 105 strm.next_out = obuf; 106 strm.avail_out = sizeof(obuf); 107 bytes_out += write_size; 108 } 109 110 if (ret != LZMA_OK) { 111 if (ret == LZMA_STREAM_END) { 112 // Check that there's no trailing garbage. 113 if (strm.avail_in != 0 || read(i, ibuf, 1)) 114 ret = LZMA_DATA_ERROR; 115 else { 116 lzma_end(&strm); 117 return bytes_out; 118 } 119 } 120 121 const char *msg; 122 switch (ret) { 123 case LZMA_MEM_ERROR: 124 msg = strerror(ENOMEM); 125 break; 126 127 case LZMA_FORMAT_ERROR: 128 msg = "File format not recognized"; 129 break; 130 131 case LZMA_OPTIONS_ERROR: 132 // FIXME: Better message? 133 msg = "Unsupported compression options"; 134 break; 135 136 case LZMA_DATA_ERROR: 137 msg = "File is corrupt"; 138 break; 139 140 case LZMA_BUF_ERROR: 141 msg = "Unexpected end of input"; 142 break; 143 144 case LZMA_MEMLIMIT_ERROR: 145 msg = "Reached memory limit"; 146 break; 147 148 default: 149 maybe_errx("Unknown error (%d)", ret); 150 break; 151 } 152 maybe_errx("%s", msg); 153 154 } 155 } 156 } 157 158 #include <stdbool.h> 159 160 /* 161 * Copied various bits and pieces from xz support code or brute force 162 * replacements. 163 */ 164 165 #define my_min(A,B) ((A)<(B)?(A):(B)) 166 167 // Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. 168 // We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) 169 #if BUFSIZ <= 1024 170 # define IO_BUFFER_SIZE 8192 171 #else 172 # define IO_BUFFER_SIZE (BUFSIZ & ~7U) 173 #endif 174 175 /// is_sparse() accesses the buffer as uint64_t for maximum speed. 176 /// Use an union to make sure that the buffer is properly aligned. 177 typedef union { 178 uint8_t u8[IO_BUFFER_SIZE]; 179 uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; 180 uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; 181 } io_buf; 182 183 184 static bool 185 io_pread(int fd, io_buf *buf, size_t size, off_t pos) 186 { 187 // Using lseek() and read() is more portable than pread() and 188 // for us it is as good as real pread(). 189 if (lseek(fd, pos, SEEK_SET) != pos) { 190 return true; 191 } 192 193 const size_t amount = read(fd, buf, size); 194 if (amount == SIZE_MAX) 195 return true; 196 197 if (amount != size) { 198 return true; 199 } 200 201 return false; 202 } 203 204 /* 205 * Most of the following is copied (mostly verbatim) from the xz 206 * distribution, from file src/xz/list.c 207 */ 208 209 /////////////////////////////////////////////////////////////////////////////// 210 // 211 /// \file list.c 212 /// \brief Listing information about .xz files 213 // 214 // Author: Lasse Collin 215 // 216 // This file has been put into the public domain. 217 // You can do whatever you want with this file. 218 // 219 /////////////////////////////////////////////////////////////////////////////// 220 221 222 /// Information about a .xz file 223 typedef struct { 224 /// Combined Index of all Streams in the file 225 lzma_index *idx; 226 227 /// Total amount of Stream Padding 228 uint64_t stream_padding; 229 230 /// Highest memory usage so far 231 uint64_t memusage_max; 232 233 /// True if all Blocks so far have Compressed Size and 234 /// Uncompressed Size fields 235 bool all_have_sizes; 236 237 /// Oldest XZ Utils version that will decompress the file 238 uint32_t min_version; 239 240 } xz_file_info; 241 242 #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } 243 244 245 /// \brief Parse the Index(es) from the given .xz file 246 /// 247 /// \param xfi Pointer to structure where the decoded information 248 /// is stored. 249 /// \param pair Input file 250 /// 251 /// \return On success, false is returned. On error, true is returned. 252 /// 253 // TODO: This function is pretty big. liblzma should have a function that 254 // takes a callback function to parse the Index(es) from a .xz file to make 255 // it easy for applications. 256 static bool 257 parse_indexes(xz_file_info *xfi, int src_fd) 258 { 259 struct stat st; 260 261 if (fstat(src_fd, &st) != 0) { 262 return true; 263 } 264 265 if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { 266 return true; 267 } 268 269 io_buf buf; 270 lzma_stream_flags header_flags; 271 lzma_stream_flags footer_flags; 272 lzma_ret ret; 273 274 // lzma_stream for the Index decoder 275 lzma_stream strm = LZMA_STREAM_INIT; 276 277 // All Indexes decoded so far 278 lzma_index *combined_index = NULL; 279 280 // The Index currently being decoded 281 lzma_index *this_index = NULL; 282 283 // Current position in the file. We parse the file backwards so 284 // initialize it to point to the end of the file. 285 off_t pos = st.st_size; 286 287 // Each loop iteration decodes one Index. 288 do { 289 // Check that there is enough data left to contain at least 290 // the Stream Header and Stream Footer. This check cannot 291 // fail in the first pass of this loop. 292 if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { 293 goto error; 294 } 295 296 pos -= LZMA_STREAM_HEADER_SIZE; 297 lzma_vli stream_padding = 0; 298 299 // Locate the Stream Footer. There may be Stream Padding which 300 // we must skip when reading backwards. 301 while (true) { 302 if (pos < LZMA_STREAM_HEADER_SIZE) { 303 goto error; 304 } 305 306 if (io_pread(src_fd, &buf, 307 LZMA_STREAM_HEADER_SIZE, pos)) 308 goto error; 309 310 // Stream Padding is always a multiple of four bytes. 311 int i = 2; 312 if (buf.u32[i] != 0) 313 break; 314 315 // To avoid calling io_pread() for every four bytes 316 // of Stream Padding, take advantage that we read 317 // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and 318 // check them too before calling io_pread() again. 319 do { 320 stream_padding += 4; 321 pos -= 4; 322 --i; 323 } while (i >= 0 && buf.u32[i] == 0); 324 } 325 326 // Decode the Stream Footer. 327 ret = lzma_stream_footer_decode(&footer_flags, buf.u8); 328 if (ret != LZMA_OK) { 329 goto error; 330 } 331 332 // Check that the Stream Footer doesn't specify something 333 // that we don't support. This can only happen if the xz 334 // version is older than liblzma and liblzma supports 335 // something new. 336 // 337 // It is enough to check Stream Footer. Stream Header must 338 // match when it is compared against Stream Footer with 339 // lzma_stream_flags_compare(). 340 if (footer_flags.version != 0) { 341 goto error; 342 } 343 344 // Check that the size of the Index field looks sane. 345 lzma_vli index_size = footer_flags.backward_size; 346 if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { 347 goto error; 348 } 349 350 // Set pos to the beginning of the Index. 351 pos -= index_size; 352 353 // Decode the Index. 354 ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX); 355 if (ret != LZMA_OK) { 356 goto error; 357 } 358 359 do { 360 // Don't give the decoder more input than the 361 // Index size. 362 strm.avail_in = my_min(IO_BUFFER_SIZE, index_size); 363 if (io_pread(src_fd, &buf, strm.avail_in, pos)) 364 goto error; 365 366 pos += strm.avail_in; 367 index_size -= strm.avail_in; 368 369 strm.next_in = buf.u8; 370 ret = lzma_code(&strm, LZMA_RUN); 371 372 } while (ret == LZMA_OK); 373 374 // If the decoding seems to be successful, check also that 375 // the Index decoder consumed as much input as indicated 376 // by the Backward Size field. 377 if (ret == LZMA_STREAM_END) 378 if (index_size != 0 || strm.avail_in != 0) 379 ret = LZMA_DATA_ERROR; 380 381 if (ret != LZMA_STREAM_END) { 382 // LZMA_BUFFER_ERROR means that the Index decoder 383 // would have liked more input than what the Index 384 // size should be according to Stream Footer. 385 // The message for LZMA_DATA_ERROR makes more 386 // sense in that case. 387 if (ret == LZMA_BUF_ERROR) 388 ret = LZMA_DATA_ERROR; 389 390 goto error; 391 } 392 393 // Decode the Stream Header and check that its Stream Flags 394 // match the Stream Footer. 395 pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; 396 if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { 397 goto error; 398 } 399 400 pos -= lzma_index_total_size(this_index); 401 if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos)) 402 goto error; 403 404 ret = lzma_stream_header_decode(&header_flags, buf.u8); 405 if (ret != LZMA_OK) { 406 goto error; 407 } 408 409 ret = lzma_stream_flags_compare(&header_flags, &footer_flags); 410 if (ret != LZMA_OK) { 411 goto error; 412 } 413 414 // Store the decoded Stream Flags into this_index. This is 415 // needed so that we can print which Check is used in each 416 // Stream. 417 ret = lzma_index_stream_flags(this_index, &footer_flags); 418 if (ret != LZMA_OK) 419 goto error; 420 421 // Store also the size of the Stream Padding field. It is 422 // needed to show the offsets of the Streams correctly. 423 ret = lzma_index_stream_padding(this_index, stream_padding); 424 if (ret != LZMA_OK) 425 goto error; 426 427 if (combined_index != NULL) { 428 // Append the earlier decoded Indexes 429 // after this_index. 430 ret = lzma_index_cat( 431 this_index, combined_index, NULL); 432 if (ret != LZMA_OK) { 433 goto error; 434 } 435 } 436 437 combined_index = this_index; 438 this_index = NULL; 439 440 xfi->stream_padding += stream_padding; 441 442 } while (pos > 0); 443 444 lzma_end(&strm); 445 446 // All OK. Make combined_index available to the caller. 447 xfi->idx = combined_index; 448 return false; 449 450 error: 451 // Something went wrong, free the allocated memory. 452 lzma_end(&strm); 453 lzma_index_end(combined_index, NULL); 454 lzma_index_end(this_index, NULL); 455 return true; 456 } 457 458 /***************** end of copy form list.c *************************/ 459 460 /* 461 * Small wrapper to extract total length of a file 462 */ 463 off_t 464 unxz_len(int fd) 465 { 466 xz_file_info xfi = XZ_FILE_INFO_INIT; 467 if (!parse_indexes(&xfi, fd)) { 468 off_t res = lzma_index_uncompressed_size(xfi.idx); 469 lzma_index_end(xfi.idx, NULL); 470 return res; 471 } 472 return 0; 473 } 474 475