1 /* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2011 The NetBSD Foundation, Inc. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to The NetBSD Foundation 10 * by Christos Zoulas. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 #include <stdarg.h> 37 #include <errno.h> 38 #include <stdio.h> 39 #include <unistd.h> 40 #include <lzma.h> 41 42 static off_t 43 unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in) 44 { 45 lzma_stream strm = LZMA_STREAM_INIT; 46 static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED; 47 lzma_ret ret; 48 lzma_action action = LZMA_RUN; 49 off_t bytes_out, bp; 50 uint8_t ibuf[BUFSIZ]; 51 uint8_t obuf[BUFSIZ]; 52 53 if (bytes_in == NULL) 54 bytes_in = &bp; 55 56 strm.next_in = ibuf; 57 memcpy(ibuf, pre, prelen); 58 strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen); 59 if (strm.avail_in == (size_t)-1) 60 maybe_err("read failed"); 61 infile_newdata(strm.avail_in); 62 strm.avail_in += prelen; 63 *bytes_in = strm.avail_in; 64 65 if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK) 66 maybe_errx("Can't initialize decoder (%d)", ret); 67 68 strm.next_out = NULL; 69 strm.avail_out = 0; 70 if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK) 71 maybe_errx("Can't read headers (%d)", ret); 72 73 bytes_out = 0; 74 strm.next_out = obuf; 75 strm.avail_out = sizeof(obuf); 76 77 for (;;) { 78 check_siginfo(); 79 if (strm.avail_in == 0) { 80 strm.next_in = ibuf; 81 strm.avail_in = read(i, ibuf, sizeof(ibuf)); 82 switch (strm.avail_in) { 83 case (size_t)-1: 84 maybe_err("read failed"); 85 /*NOTREACHED*/ 86 case 0: 87 action = LZMA_FINISH; 88 break; 89 default: 90 infile_newdata(strm.avail_in); 91 *bytes_in += strm.avail_in; 92 break; 93 } 94 } 95 96 ret = lzma_code(&strm, action); 97 98 // Write and check write error before checking decoder error. 99 // This way as much data as possible gets written to output 100 // even if decoder detected an error. 101 if (strm.avail_out == 0 || ret != LZMA_OK) { 102 const size_t write_size = sizeof(obuf) - strm.avail_out; 103 104 if (write(o, obuf, write_size) != (ssize_t)write_size) 105 maybe_err("write failed"); 106 107 strm.next_out = obuf; 108 strm.avail_out = sizeof(obuf); 109 bytes_out += write_size; 110 } 111 112 if (ret != LZMA_OK) { 113 if (ret == LZMA_STREAM_END) { 114 // Check that there's no trailing garbage. 115 if (strm.avail_in != 0 || read(i, ibuf, 1)) 116 ret = LZMA_DATA_ERROR; 117 else { 118 lzma_end(&strm); 119 return bytes_out; 120 } 121 } 122 123 const char *msg; 124 switch (ret) { 125 case LZMA_MEM_ERROR: 126 msg = strerror(ENOMEM); 127 break; 128 129 case LZMA_FORMAT_ERROR: 130 msg = "File format not recognized"; 131 break; 132 133 case LZMA_OPTIONS_ERROR: 134 // FIXME: Better message? 135 msg = "Unsupported compression options"; 136 break; 137 138 case LZMA_DATA_ERROR: 139 msg = "File is corrupt"; 140 break; 141 142 case LZMA_BUF_ERROR: 143 msg = "Unexpected end of input"; 144 break; 145 146 case LZMA_MEMLIMIT_ERROR: 147 msg = "Reached memory limit"; 148 break; 149 150 default: 151 maybe_errx("Unknown error (%d)", ret); 152 break; 153 } 154 maybe_errx("%s", msg); 155 156 } 157 } 158 } 159 160 #include <stdbool.h> 161 162 /* 163 * Copied various bits and pieces from xz support code or brute force 164 * replacements. 165 */ 166 167 #define my_min(A,B) ((A)<(B)?(A):(B)) 168 169 // Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. 170 // We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) 171 #if BUFSIZ <= 1024 172 # define IO_BUFFER_SIZE 8192 173 #else 174 # define IO_BUFFER_SIZE (BUFSIZ & ~7U) 175 #endif 176 177 /// is_sparse() accesses the buffer as uint64_t for maximum speed. 178 /// Use an union to make sure that the buffer is properly aligned. 179 typedef union { 180 uint8_t u8[IO_BUFFER_SIZE]; 181 uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; 182 uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; 183 } io_buf; 184 185 186 static bool 187 io_pread(int fd, io_buf *buf, size_t size, off_t pos) 188 { 189 // Using lseek() and read() is more portable than pread() and 190 // for us it is as good as real pread(). 191 if (lseek(fd, pos, SEEK_SET) != pos) { 192 return true; 193 } 194 195 const size_t amount = read(fd, buf, size); 196 if (amount == SIZE_MAX) 197 return true; 198 199 if (amount != size) { 200 return true; 201 } 202 203 return false; 204 } 205 206 /* 207 * Most of the following is copied (mostly verbatim) from the xz 208 * distribution, from file src/xz/list.c 209 */ 210 211 /////////////////////////////////////////////////////////////////////////////// 212 // 213 /// \file list.c 214 /// \brief Listing information about .xz files 215 // 216 // Author: Lasse Collin 217 // 218 // This file has been put into the public domain. 219 // You can do whatever you want with this file. 220 // 221 /////////////////////////////////////////////////////////////////////////////// 222 223 224 /// Information about a .xz file 225 typedef struct { 226 /// Combined Index of all Streams in the file 227 lzma_index *idx; 228 229 /// Total amount of Stream Padding 230 uint64_t stream_padding; 231 232 /// Highest memory usage so far 233 uint64_t memusage_max; 234 235 /// True if all Blocks so far have Compressed Size and 236 /// Uncompressed Size fields 237 bool all_have_sizes; 238 239 /// Oldest XZ Utils version that will decompress the file 240 uint32_t min_version; 241 242 } xz_file_info; 243 244 #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } 245 246 247 /// \brief Parse the Index(es) from the given .xz file 248 /// 249 /// \param xfi Pointer to structure where the decoded information 250 /// is stored. 251 /// \param pair Input file 252 /// 253 /// \return On success, false is returned. On error, true is returned. 254 /// 255 // TODO: This function is pretty big. liblzma should have a function that 256 // takes a callback function to parse the Index(es) from a .xz file to make 257 // it easy for applications. 258 static bool 259 parse_indexes(xz_file_info *xfi, int src_fd) 260 { 261 struct stat st; 262 263 if (fstat(src_fd, &st) != 0) { 264 return true; 265 } 266 267 if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { 268 return true; 269 } 270 271 io_buf buf; 272 lzma_stream_flags header_flags; 273 lzma_stream_flags footer_flags; 274 lzma_ret ret; 275 276 // lzma_stream for the Index decoder 277 lzma_stream strm = LZMA_STREAM_INIT; 278 279 // All Indexes decoded so far 280 lzma_index *combined_index = NULL; 281 282 // The Index currently being decoded 283 lzma_index *this_index = NULL; 284 285 // Current position in the file. We parse the file backwards so 286 // initialize it to point to the end of the file. 287 off_t pos = st.st_size; 288 289 // Each loop iteration decodes one Index. 290 do { 291 // Check that there is enough data left to contain at least 292 // the Stream Header and Stream Footer. This check cannot 293 // fail in the first pass of this loop. 294 if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { 295 goto error; 296 } 297 298 pos -= LZMA_STREAM_HEADER_SIZE; 299 lzma_vli stream_padding = 0; 300 301 // Locate the Stream Footer. There may be Stream Padding which 302 // we must skip when reading backwards. 303 while (true) { 304 if (pos < LZMA_STREAM_HEADER_SIZE) { 305 goto error; 306 } 307 308 if (io_pread(src_fd, &buf, 309 LZMA_STREAM_HEADER_SIZE, pos)) 310 goto error; 311 312 // Stream Padding is always a multiple of four bytes. 313 int i = 2; 314 if (buf.u32[i] != 0) 315 break; 316 317 // To avoid calling io_pread() for every four bytes 318 // of Stream Padding, take advantage that we read 319 // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and 320 // check them too before calling io_pread() again. 321 do { 322 stream_padding += 4; 323 pos -= 4; 324 --i; 325 } while (i >= 0 && buf.u32[i] == 0); 326 } 327 328 // Decode the Stream Footer. 329 ret = lzma_stream_footer_decode(&footer_flags, buf.u8); 330 if (ret != LZMA_OK) { 331 goto error; 332 } 333 334 // Check that the Stream Footer doesn't specify something 335 // that we don't support. This can only happen if the xz 336 // version is older than liblzma and liblzma supports 337 // something new. 338 // 339 // It is enough to check Stream Footer. Stream Header must 340 // match when it is compared against Stream Footer with 341 // lzma_stream_flags_compare(). 342 if (footer_flags.version != 0) { 343 goto error; 344 } 345 346 // Check that the size of the Index field looks sane. 347 lzma_vli index_size = footer_flags.backward_size; 348 if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { 349 goto error; 350 } 351 352 // Set pos to the beginning of the Index. 353 pos -= index_size; 354 355 // Decode the Index. 356 ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX); 357 if (ret != LZMA_OK) { 358 goto error; 359 } 360 361 do { 362 // Don't give the decoder more input than the 363 // Index size. 364 strm.avail_in = my_min(IO_BUFFER_SIZE, index_size); 365 if (io_pread(src_fd, &buf, strm.avail_in, pos)) 366 goto error; 367 368 pos += strm.avail_in; 369 index_size -= strm.avail_in; 370 371 strm.next_in = buf.u8; 372 ret = lzma_code(&strm, LZMA_RUN); 373 374 } while (ret == LZMA_OK); 375 376 // If the decoding seems to be successful, check also that 377 // the Index decoder consumed as much input as indicated 378 // by the Backward Size field. 379 if (ret == LZMA_STREAM_END) 380 if (index_size != 0 || strm.avail_in != 0) 381 ret = LZMA_DATA_ERROR; 382 383 if (ret != LZMA_STREAM_END) { 384 // LZMA_BUFFER_ERROR means that the Index decoder 385 // would have liked more input than what the Index 386 // size should be according to Stream Footer. 387 // The message for LZMA_DATA_ERROR makes more 388 // sense in that case. 389 if (ret == LZMA_BUF_ERROR) 390 ret = LZMA_DATA_ERROR; 391 392 goto error; 393 } 394 395 // Decode the Stream Header and check that its Stream Flags 396 // match the Stream Footer. 397 pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; 398 if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { 399 goto error; 400 } 401 402 pos -= lzma_index_total_size(this_index); 403 if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos)) 404 goto error; 405 406 ret = lzma_stream_header_decode(&header_flags, buf.u8); 407 if (ret != LZMA_OK) { 408 goto error; 409 } 410 411 ret = lzma_stream_flags_compare(&header_flags, &footer_flags); 412 if (ret != LZMA_OK) { 413 goto error; 414 } 415 416 // Store the decoded Stream Flags into this_index. This is 417 // needed so that we can print which Check is used in each 418 // Stream. 419 ret = lzma_index_stream_flags(this_index, &footer_flags); 420 if (ret != LZMA_OK) 421 goto error; 422 423 // Store also the size of the Stream Padding field. It is 424 // needed to show the offsets of the Streams correctly. 425 ret = lzma_index_stream_padding(this_index, stream_padding); 426 if (ret != LZMA_OK) 427 goto error; 428 429 if (combined_index != NULL) { 430 // Append the earlier decoded Indexes 431 // after this_index. 432 ret = lzma_index_cat( 433 this_index, combined_index, NULL); 434 if (ret != LZMA_OK) { 435 goto error; 436 } 437 } 438 439 combined_index = this_index; 440 this_index = NULL; 441 442 xfi->stream_padding += stream_padding; 443 444 } while (pos > 0); 445 446 lzma_end(&strm); 447 448 // All OK. Make combined_index available to the caller. 449 xfi->idx = combined_index; 450 return false; 451 452 error: 453 // Something went wrong, free the allocated memory. 454 lzma_end(&strm); 455 lzma_index_end(combined_index, NULL); 456 lzma_index_end(this_index, NULL); 457 return true; 458 } 459 460 /***************** end of copy form list.c *************************/ 461 462 /* 463 * Small wrapper to extract total length of a file 464 */ 465 off_t 466 unxz_len(int fd) 467 { 468 xz_file_info xfi = XZ_FILE_INFO_INIT; 469 if (!parse_indexes(&xfi, fd)) { 470 off_t res = lzma_index_uncompressed_size(xfi.idx); 471 lzma_index_end(xfi.idx, NULL); 472 return res; 473 } 474 return 0; 475 } 476 477