1 /* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD 5 * 6 * Copyright (c) 2011 The NetBSD Foundation, Inc. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to The NetBSD Foundation 10 * by Christos Zoulas. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 #include <stdarg.h> 37 #include <errno.h> 38 #include <stdio.h> 39 #include <unistd.h> 40 #include <lzma.h> 41 42 static off_t 43 unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in) 44 { 45 lzma_stream strm = LZMA_STREAM_INIT; 46 static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED; 47 lzma_ret ret; 48 lzma_action action = LZMA_RUN; 49 off_t bytes_out, bp; 50 uint8_t ibuf[BUFSIZ]; 51 uint8_t obuf[BUFSIZ]; 52 53 if (bytes_in == NULL) 54 bytes_in = &bp; 55 56 strm.next_in = ibuf; 57 memcpy(ibuf, pre, prelen); 58 strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen); 59 if (strm.avail_in == (size_t)-1) 60 maybe_err("read failed"); 61 infile_newdata(strm.avail_in); 62 strm.avail_in += prelen; 63 *bytes_in = strm.avail_in; 64 65 if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK) 66 maybe_errx("Can't initialize decoder (%d)", ret); 67 68 strm.next_out = NULL; 69 strm.avail_out = 0; 70 if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK) 71 maybe_errx("Can't read headers (%d)", ret); 72 73 bytes_out = 0; 74 strm.next_out = obuf; 75 strm.avail_out = sizeof(obuf); 76 77 for (;;) { 78 check_siginfo(); 79 if (strm.avail_in == 0) { 80 strm.next_in = ibuf; 81 strm.avail_in = read(i, ibuf, sizeof(ibuf)); 82 switch (strm.avail_in) { 83 case (size_t)-1: 84 maybe_err("read failed"); 85 /*NOTREACHED*/ 86 case 0: 87 action = LZMA_FINISH; 88 break; 89 default: 90 infile_newdata(strm.avail_in); 91 *bytes_in += strm.avail_in; 92 break; 93 } 94 } 95 96 ret = lzma_code(&strm, action); 97 98 // Write and check write error before checking decoder error. 99 // This way as much data as possible gets written to output 100 // even if decoder detected an error. 101 if (strm.avail_out == 0 || ret != LZMA_OK) { 102 const size_t write_size = sizeof(obuf) - strm.avail_out; 103 104 if (write(o, obuf, write_size) != (ssize_t)write_size) 105 maybe_err("write failed"); 106 107 strm.next_out = obuf; 108 strm.avail_out = sizeof(obuf); 109 bytes_out += write_size; 110 } 111 112 if (ret != LZMA_OK) { 113 if (ret == LZMA_STREAM_END) { 114 // Check that there's no trailing garbage. 115 if (strm.avail_in != 0 || read(i, ibuf, 1)) 116 ret = LZMA_DATA_ERROR; 117 else { 118 lzma_end(&strm); 119 return bytes_out; 120 } 121 } 122 123 const char *msg; 124 switch (ret) { 125 case LZMA_MEM_ERROR: 126 msg = strerror(ENOMEM); 127 break; 128 129 case LZMA_FORMAT_ERROR: 130 msg = "File format not recognized"; 131 break; 132 133 case LZMA_OPTIONS_ERROR: 134 // FIXME: Better message? 135 msg = "Unsupported compression options"; 136 break; 137 138 case LZMA_DATA_ERROR: 139 msg = "File is corrupt"; 140 break; 141 142 case LZMA_BUF_ERROR: 143 msg = "Unexpected end of input"; 144 break; 145 146 case LZMA_MEMLIMIT_ERROR: 147 msg = "Reached memory limit"; 148 break; 149 150 default: 151 maybe_errx("Unknown error (%d)", ret); 152 break; 153 } 154 maybe_errx("%s", msg); 155 156 } 157 } 158 } 159 160 #include <stdbool.h> 161 162 /* 163 * Copied various bits and pieces from xz support code or brute force 164 * replacements. 165 */ 166 167 #define my_min(A,B) ((A)<(B)?(A):(B)) 168 169 // Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. 170 // We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) 171 #if BUFSIZ <= 1024 172 # define IO_BUFFER_SIZE 8192 173 #else 174 # define IO_BUFFER_SIZE (BUFSIZ & ~7U) 175 #endif 176 177 /// is_sparse() accesses the buffer as uint64_t for maximum speed. 178 /// Use an union to make sure that the buffer is properly aligned. 179 typedef union { 180 uint8_t u8[IO_BUFFER_SIZE]; 181 uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; 182 uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; 183 } io_buf; 184 185 186 static bool 187 io_pread(int fd, io_buf *buf, size_t size, off_t pos) 188 { 189 // Using lseek() and read() is more portable than pread() and 190 // for us it is as good as real pread(). 191 if (lseek(fd, pos, SEEK_SET) != pos) { 192 return true; 193 } 194 195 const size_t amount = read(fd, buf, size); 196 if (amount == SIZE_MAX) 197 return true; 198 199 if (amount != size) { 200 return true; 201 } 202 203 return false; 204 } 205 206 /* 207 * Most of the following is copied (mostly verbatim) from the xz 208 * distribution, from file src/xz/list.c 209 */ 210 211 /////////////////////////////////////////////////////////////////////////////// 212 // 213 /// \file list.c 214 /// \brief Listing information about .xz files 215 // 216 // Author: Lasse Collin 217 // 218 // This file has been put into the public domain. 219 // You can do whatever you want with this file. 220 // 221 /////////////////////////////////////////////////////////////////////////////// 222 223 224 /// Information about a .xz file 225 typedef struct { 226 /// Combined Index of all Streams in the file 227 lzma_index *idx; 228 229 /// Total amount of Stream Padding 230 uint64_t stream_padding; 231 232 /// Highest memory usage so far 233 uint64_t memusage_max; 234 235 /// True if all Blocks so far have Compressed Size and 236 /// Uncompressed Size fields 237 bool all_have_sizes; 238 239 /// Oldest XZ Utils version that will decompress the file 240 uint32_t min_version; 241 242 } xz_file_info; 243 244 #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } 245 246 247 /// \brief Parse the Index(es) from the given .xz file 248 /// 249 /// \param xfi Pointer to structure where the decoded information 250 /// is stored. 251 /// \param pair Input file 252 /// 253 /// \return On success, false is returned. On error, true is returned. 254 /// 255 // TODO: This function is pretty big. liblzma should have a function that 256 // takes a callback function to parse the Index(es) from a .xz file to make 257 // it easy for applications. 258 static bool 259 parse_indexes(xz_file_info *xfi, int src_fd) 260 { 261 struct stat st; 262 263 fstat(src_fd, &st); 264 if (st.st_size <= 0) { 265 return true; 266 } 267 268 if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { 269 return true; 270 } 271 272 io_buf buf; 273 lzma_stream_flags header_flags; 274 lzma_stream_flags footer_flags; 275 lzma_ret ret; 276 277 // lzma_stream for the Index decoder 278 lzma_stream strm = LZMA_STREAM_INIT; 279 280 // All Indexes decoded so far 281 lzma_index *combined_index = NULL; 282 283 // The Index currently being decoded 284 lzma_index *this_index = NULL; 285 286 // Current position in the file. We parse the file backwards so 287 // initialize it to point to the end of the file. 288 off_t pos = st.st_size; 289 290 // Each loop iteration decodes one Index. 291 do { 292 // Check that there is enough data left to contain at least 293 // the Stream Header and Stream Footer. This check cannot 294 // fail in the first pass of this loop. 295 if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { 296 goto error; 297 } 298 299 pos -= LZMA_STREAM_HEADER_SIZE; 300 lzma_vli stream_padding = 0; 301 302 // Locate the Stream Footer. There may be Stream Padding which 303 // we must skip when reading backwards. 304 while (true) { 305 if (pos < LZMA_STREAM_HEADER_SIZE) { 306 goto error; 307 } 308 309 if (io_pread(src_fd, &buf, 310 LZMA_STREAM_HEADER_SIZE, pos)) 311 goto error; 312 313 // Stream Padding is always a multiple of four bytes. 314 int i = 2; 315 if (buf.u32[i] != 0) 316 break; 317 318 // To avoid calling io_pread() for every four bytes 319 // of Stream Padding, take advantage that we read 320 // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and 321 // check them too before calling io_pread() again. 322 do { 323 stream_padding += 4; 324 pos -= 4; 325 --i; 326 } while (i >= 0 && buf.u32[i] == 0); 327 } 328 329 // Decode the Stream Footer. 330 ret = lzma_stream_footer_decode(&footer_flags, buf.u8); 331 if (ret != LZMA_OK) { 332 goto error; 333 } 334 335 // Check that the Stream Footer doesn't specify something 336 // that we don't support. This can only happen if the xz 337 // version is older than liblzma and liblzma supports 338 // something new. 339 // 340 // It is enough to check Stream Footer. Stream Header must 341 // match when it is compared against Stream Footer with 342 // lzma_stream_flags_compare(). 343 if (footer_flags.version != 0) { 344 goto error; 345 } 346 347 // Check that the size of the Index field looks sane. 348 lzma_vli index_size = footer_flags.backward_size; 349 if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { 350 goto error; 351 } 352 353 // Set pos to the beginning of the Index. 354 pos -= index_size; 355 356 // Decode the Index. 357 ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX); 358 if (ret != LZMA_OK) { 359 goto error; 360 } 361 362 do { 363 // Don't give the decoder more input than the 364 // Index size. 365 strm.avail_in = my_min(IO_BUFFER_SIZE, index_size); 366 if (io_pread(src_fd, &buf, strm.avail_in, pos)) 367 goto error; 368 369 pos += strm.avail_in; 370 index_size -= strm.avail_in; 371 372 strm.next_in = buf.u8; 373 ret = lzma_code(&strm, LZMA_RUN); 374 375 } while (ret == LZMA_OK); 376 377 // If the decoding seems to be successful, check also that 378 // the Index decoder consumed as much input as indicated 379 // by the Backward Size field. 380 if (ret == LZMA_STREAM_END) 381 if (index_size != 0 || strm.avail_in != 0) 382 ret = LZMA_DATA_ERROR; 383 384 if (ret != LZMA_STREAM_END) { 385 // LZMA_BUFFER_ERROR means that the Index decoder 386 // would have liked more input than what the Index 387 // size should be according to Stream Footer. 388 // The message for LZMA_DATA_ERROR makes more 389 // sense in that case. 390 if (ret == LZMA_BUF_ERROR) 391 ret = LZMA_DATA_ERROR; 392 393 goto error; 394 } 395 396 // Decode the Stream Header and check that its Stream Flags 397 // match the Stream Footer. 398 pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; 399 if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { 400 goto error; 401 } 402 403 pos -= lzma_index_total_size(this_index); 404 if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos)) 405 goto error; 406 407 ret = lzma_stream_header_decode(&header_flags, buf.u8); 408 if (ret != LZMA_OK) { 409 goto error; 410 } 411 412 ret = lzma_stream_flags_compare(&header_flags, &footer_flags); 413 if (ret != LZMA_OK) { 414 goto error; 415 } 416 417 // Store the decoded Stream Flags into this_index. This is 418 // needed so that we can print which Check is used in each 419 // Stream. 420 ret = lzma_index_stream_flags(this_index, &footer_flags); 421 if (ret != LZMA_OK) 422 goto error; 423 424 // Store also the size of the Stream Padding field. It is 425 // needed to show the offsets of the Streams correctly. 426 ret = lzma_index_stream_padding(this_index, stream_padding); 427 if (ret != LZMA_OK) 428 goto error; 429 430 if (combined_index != NULL) { 431 // Append the earlier decoded Indexes 432 // after this_index. 433 ret = lzma_index_cat( 434 this_index, combined_index, NULL); 435 if (ret != LZMA_OK) { 436 goto error; 437 } 438 } 439 440 combined_index = this_index; 441 this_index = NULL; 442 443 xfi->stream_padding += stream_padding; 444 445 } while (pos > 0); 446 447 lzma_end(&strm); 448 449 // All OK. Make combined_index available to the caller. 450 xfi->idx = combined_index; 451 return false; 452 453 error: 454 // Something went wrong, free the allocated memory. 455 lzma_end(&strm); 456 lzma_index_end(combined_index, NULL); 457 lzma_index_end(this_index, NULL); 458 return true; 459 } 460 461 /***************** end of copy form list.c *************************/ 462 463 /* 464 * Small wrapper to extract total length of a file 465 */ 466 off_t 467 unxz_len(int fd) 468 { 469 xz_file_info xfi = XZ_FILE_INFO_INIT; 470 if (!parse_indexes(&xfi, fd)) { 471 off_t res = lzma_index_uncompressed_size(xfi.idx); 472 lzma_index_end(xfi.idx, NULL); 473 return res; 474 } 475 return 0; 476 } 477 478