1 /* gzread.c -- zlib functions for reading gzip files 2 * Copyright (C) 2004-2026 Mark Adler 3 * For conditions of distribution and use, see copyright notice in zlib.h 4 */ 5 6 #include "gzguts.h" 7 #include <unistd.h> 8 9 /* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from 10 state->fd, and update state->eof, state->err, and state->msg as appropriate. 11 This function needs to loop on read(), since read() is not guaranteed to 12 read the number of bytes requested, depending on the type of descriptor. It 13 also needs to loop to manage the fact that read() returns an int. If the 14 descriptor is non-blocking and read() returns with no data in order to avoid 15 blocking, then gz_load() will return 0 if some data has been read, or -1 if 16 no data has been read. Either way, state->again is set true to indicate a 17 non-blocking event. If errno is non-zero on return, then there was an error 18 signaled from read(). *have is set to the number of bytes read. */ 19 local int gz_load(gz_statep state, unsigned char *buf, unsigned len, 20 unsigned *have) { 21 int ret; 22 unsigned get, max = ((unsigned)-1 >> 2) + 1; 23 24 state->again = 0; 25 errno = 0; 26 *have = 0; 27 do { 28 get = len - *have; 29 if (get > max) 30 get = max; 31 ret = (int)read(state->fd, buf + *have, get); 32 if (ret <= 0) 33 break; 34 *have += (unsigned)ret; 35 } while (*have < len); 36 if (ret < 0) { 37 if (errno == EAGAIN || errno == EWOULDBLOCK) { 38 state->again = 1; 39 if (*have != 0) 40 return 0; 41 } 42 gz_error(state, Z_ERRNO, zstrerror()); 43 return -1; 44 } 45 if (ret == 0) 46 state->eof = 1; 47 return 0; 48 } 49 50 /* Load up input buffer and set eof flag if last data loaded -- return -1 on 51 error, 0 otherwise. Note that the eof flag is set when the end of the input 52 file is reached, even though there may be unused data in the buffer. Once 53 that data has been used, no more attempts will be made to read the file. 54 If strm->avail_in != 0, then the current data is moved to the beginning of 55 the input buffer, and then the remainder of the buffer is loaded with the 56 available data from the input file. */ 57 local int gz_avail(gz_statep state) { 58 unsigned got; 59 z_streamp strm = &(state->strm); 60 61 if (state->err != Z_OK && state->err != Z_BUF_ERROR) 62 return -1; 63 if (state->eof == 0) { 64 if (strm->avail_in) { /* copy what's there to the start */ 65 unsigned char *p = state->in; 66 unsigned const char *q = strm->next_in; 67 68 if (q != p) { 69 unsigned n = strm->avail_in; 70 71 do { 72 *p++ = *q++; 73 } while (--n); 74 } 75 } 76 if (gz_load(state, state->in + strm->avail_in, 77 state->size - strm->avail_in, &got) == -1) 78 return -1; 79 strm->avail_in += got; 80 strm->next_in = state->in; 81 } 82 return 0; 83 } 84 85 /* Look for gzip header, set up for inflate or copy. state->x.have must be 0. 86 If this is the first time in, allocate required memory. state->how will be 87 left unchanged if there is no more input data available, will be set to COPY 88 if there is no gzip header and direct copying will be performed, or it will 89 be set to GZIP for decompression. If direct copying, then leftover input 90 data from the input buffer will be copied to the output buffer. In that 91 case, all further file reads will be directly to either the output buffer or 92 a user buffer. If decompressing, the inflate state will be initialized. 93 gz_look() will return 0 on success or -1 on failure. */ 94 local int gz_look(gz_statep state) { 95 z_streamp strm = &(state->strm); 96 97 /* allocate read buffers and inflate memory */ 98 if (state->size == 0) { 99 /* allocate buffers */ 100 state->in = (unsigned char *)malloc(state->want); 101 state->out = (unsigned char *)malloc(state->want << 1); 102 if (state->in == NULL || state->out == NULL) { 103 free(state->out); 104 free(state->in); 105 gz_error(state, Z_MEM_ERROR, "out of memory"); 106 return -1; 107 } 108 state->size = state->want; 109 110 /* allocate inflate memory */ 111 state->strm.zalloc = Z_NULL; 112 state->strm.zfree = Z_NULL; 113 state->strm.opaque = Z_NULL; 114 state->strm.avail_in = 0; 115 state->strm.next_in = Z_NULL; 116 if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ 117 free(state->out); 118 free(state->in); 119 state->size = 0; 120 gz_error(state, Z_MEM_ERROR, "out of memory"); 121 return -1; 122 } 123 } 124 125 /* if transparent reading is disabled, which would only be at the start, or 126 if we're looking for a gzip member after the first one, which is not at 127 the start, then proceed directly to look for a gzip member next */ 128 if (state->direct == -1 || state->junk == 0) { 129 inflateReset(strm); 130 state->how = GZIP; 131 state->junk = state->junk != -1; 132 state->direct = 0; 133 return 0; 134 } 135 136 /* otherwise we're at the start with auto-detect -- we check to see if the 137 first four bytes could be gzip header in order to decide whether or not 138 this will be a transparent read */ 139 140 /* load any header bytes into the input buffer -- if the input is empty, 141 then it's not an error as this is a transparent read of zero bytes */ 142 if (gz_avail(state) == -1) 143 return -1; 144 if (strm->avail_in == 0 || (state->again && strm->avail_in < 4)) 145 /* if non-blocking input stalled before getting four bytes, then 146 return and wait until a later call has accumulated enough */ 147 return 0; 148 149 /* see if this is (likely) gzip input -- if the first four bytes are 150 consistent with a gzip header, then go look for the first gzip member, 151 otherwise proceed to copy the input transparently */ 152 if (strm->avail_in > 3 && 153 strm->next_in[0] == 31 && strm->next_in[1] == 139 && 154 strm->next_in[2] == 8 && strm->next_in[3] < 32) { 155 inflateReset(strm); 156 state->how = GZIP; 157 state->junk = 1; 158 state->direct = 0; 159 return 0; 160 } 161 162 /* doing raw i/o: copy any leftover input to output -- this assumes that 163 the output buffer is larger than the input buffer, which also assures 164 space for gzungetc() */ 165 state->x.next = state->out; 166 memcpy(state->x.next, strm->next_in, strm->avail_in); 167 state->x.have = strm->avail_in; 168 strm->avail_in = 0; 169 state->how = COPY; 170 return 0; 171 } 172 173 /* Decompress from input to the provided next_out and avail_out in the state. 174 On return, state->x.have and state->x.next point to the just decompressed 175 data. If the gzip stream completes, state->how is reset to LOOK to look for 176 the next gzip stream or raw data, once state->x.have is depleted. Returns 0 177 on success, -1 on failure. If EOF is reached when looking for more input to 178 complete the gzip member, then an unexpected end of file error is raised. 179 If there is no more input, but state->again is true, then EOF has not been 180 reached, and no error is raised. */ 181 local int gz_decomp(gz_statep state) { 182 int ret = Z_OK; 183 unsigned had; 184 z_streamp strm = &(state->strm); 185 186 /* fill output buffer up to end of deflate stream */ 187 had = strm->avail_out; 188 do { 189 /* get more input for inflate() */ 190 if (strm->avail_in == 0 && gz_avail(state) == -1) { 191 ret = state->err; 192 break; 193 } 194 if (strm->avail_in == 0) { 195 if (!state->again) 196 gz_error(state, Z_BUF_ERROR, "unexpected end of file"); 197 break; 198 } 199 200 /* decompress and handle errors */ 201 ret = inflate(strm, Z_NO_FLUSH); 202 if (strm->avail_out < had) 203 /* any decompressed data marks this as a real gzip stream */ 204 state->junk = 0; 205 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { 206 gz_error(state, Z_STREAM_ERROR, 207 "internal error: inflate stream corrupt"); 208 break; 209 } 210 if (ret == Z_MEM_ERROR) { 211 gz_error(state, Z_MEM_ERROR, "out of memory"); 212 break; 213 } 214 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ 215 if (state->junk == 1) { /* trailing garbage is ok */ 216 strm->avail_in = 0; 217 state->eof = 1; 218 state->how = LOOK; 219 ret = Z_OK; 220 break; 221 } 222 gz_error(state, Z_DATA_ERROR, 223 strm->msg == NULL ? "compressed data error" : strm->msg); 224 break; 225 } 226 } while (strm->avail_out && ret != Z_STREAM_END); 227 228 /* update available output */ 229 state->x.have = had - strm->avail_out; 230 state->x.next = strm->next_out - state->x.have; 231 232 /* if the gzip stream completed successfully, look for another */ 233 if (ret == Z_STREAM_END) { 234 state->junk = 0; 235 state->how = LOOK; 236 return 0; 237 } 238 239 /* return decompression status */ 240 return ret != Z_OK ? -1 : 0; 241 } 242 243 /* Fetch data and put it in the output buffer. Assumes state->x.have is 0. 244 Data is either copied from the input file or decompressed from the input 245 file depending on state->how. If state->how is LOOK, then a gzip header is 246 looked for to determine whether to copy or decompress. Returns -1 on error, 247 otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the 248 end of the input file has been reached and all data has been processed. */ 249 local int gz_fetch(gz_statep state) { 250 z_streamp strm = &(state->strm); 251 252 do { 253 switch(state->how) { 254 case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ 255 if (gz_look(state) == -1) 256 return -1; 257 if (state->how == LOOK) 258 return 0; 259 break; 260 case COPY: /* -> COPY */ 261 if (gz_load(state, state->out, state->size << 1, &(state->x.have)) 262 == -1) 263 return -1; 264 state->x.next = state->out; 265 return 0; 266 case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ 267 strm->avail_out = state->size << 1; 268 strm->next_out = state->out; 269 if (gz_decomp(state) == -1) 270 return -1; 271 break; 272 default: 273 gz_error(state, Z_STREAM_ERROR, "state corrupt"); 274 return -1; 275 } 276 } while (state->x.have == 0 && (!state->eof || strm->avail_in)); 277 return 0; 278 } 279 280 /* Skip state->skip (> 0) uncompressed bytes of output. Return -1 on error, 0 281 on success. */ 282 local int gz_skip(gz_statep state) { 283 unsigned n; 284 285 /* skip over len bytes or reach end-of-file, whichever comes first */ 286 do { 287 /* skip over whatever is in output buffer */ 288 if (state->x.have) { 289 n = GT_OFF(state->x.have) || 290 (z_off64_t)state->x.have > state->skip ? 291 (unsigned)state->skip : state->x.have; 292 state->x.have -= n; 293 state->x.next += n; 294 state->x.pos += n; 295 state->skip -= n; 296 } 297 298 /* output buffer empty -- return if we're at the end of the input */ 299 else if (state->eof && state->strm.avail_in == 0) 300 break; 301 302 /* need more data to skip -- load up output buffer */ 303 else { 304 /* get more output, looking for header if required */ 305 if (gz_fetch(state) == -1) 306 return -1; 307 } 308 } while (state->skip); 309 return 0; 310 } 311 312 /* Read len bytes into buf from file, or less than len up to the end of the 313 input. Return the number of bytes read. If zero is returned, either the end 314 of file was reached, or there was an error. state->err must be consulted in 315 that case to determine which. If there was an error, but some uncompressed 316 bytes were read before the error, then that count is returned. The error is 317 still recorded, and so is deferred until the next call. */ 318 local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) { 319 z_size_t got; 320 unsigned n; 321 int err; 322 323 /* if len is zero, avoid unnecessary operations */ 324 if (len == 0) 325 return 0; 326 327 /* process a skip request */ 328 if (state->skip && gz_skip(state) == -1) 329 return 0; 330 331 /* get len bytes to buf, or less than len if at the end */ 332 got = 0; 333 err = 0; 334 do { 335 /* set n to the maximum amount of len that fits in an unsigned int */ 336 n = (unsigned)-1; 337 if (n > len) 338 n = (unsigned)len; 339 340 /* first just try copying data from the output buffer */ 341 if (state->x.have) { 342 if (state->x.have < n) 343 n = state->x.have; 344 memcpy(buf, state->x.next, n); 345 state->x.next += n; 346 state->x.have -= n; 347 if (state->err != Z_OK) 348 /* caught deferred error from gz_fetch() */ 349 err = -1; 350 } 351 352 /* output buffer empty -- return if we're at the end of the input */ 353 else if (state->eof && state->strm.avail_in == 0) 354 break; 355 356 /* need output data -- for small len or new stream load up our output 357 buffer, so that gzgetc() can be fast */ 358 else if (state->how == LOOK || n < (state->size << 1)) { 359 /* get more output, looking for header if required */ 360 if (gz_fetch(state) == -1 && state->x.have == 0) 361 /* if state->x.have != 0, error will be caught after copy */ 362 err = -1; 363 continue; /* no progress yet -- go back to copy above */ 364 /* the copy above assures that we will leave with space in the 365 output buffer, allowing at least one gzungetc() to succeed */ 366 } 367 368 /* large len -- read directly into user buffer */ 369 else if (state->how == COPY) /* read directly */ 370 err = gz_load(state, (unsigned char *)buf, n, &n); 371 372 /* large len -- decompress directly into user buffer */ 373 else { /* state->how == GZIP */ 374 state->strm.avail_out = n; 375 state->strm.next_out = (unsigned char *)buf; 376 err = gz_decomp(state); 377 n = state->x.have; 378 state->x.have = 0; 379 } 380 381 /* update progress */ 382 len -= n; 383 buf = (char *)buf + n; 384 got += n; 385 state->x.pos += n; 386 } while (len && !err); 387 388 /* note read past eof */ 389 if (len && state->eof) 390 state->past = 1; 391 392 /* return number of bytes read into user buffer */ 393 return got; 394 } 395 396 /* -- see zlib.h -- */ 397 int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) { 398 gz_statep state; 399 400 /* get internal structure and check that it's for reading */ 401 if (file == NULL) 402 return -1; 403 state = (gz_statep)file; 404 if (state->mode != GZ_READ) 405 return -1; 406 407 /* check that there was no (serious) error */ 408 if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again) 409 return -1; 410 gz_error(state, Z_OK, NULL); 411 412 /* since an int is returned, make sure len fits in one, otherwise return 413 with an error (this avoids a flaw in the interface) */ 414 if ((int)len < 0) { 415 gz_error(state, Z_STREAM_ERROR, "request does not fit in an int"); 416 return -1; 417 } 418 419 /* read len or fewer bytes to buf */ 420 len = (unsigned)gz_read(state, buf, len); 421 422 /* check for an error */ 423 if (len == 0) { 424 if (state->err != Z_OK && state->err != Z_BUF_ERROR) 425 return -1; 426 if (state->again) { 427 /* non-blocking input stalled after some input was read, but no 428 uncompressed bytes were produced -- let the application know 429 this isn't EOF */ 430 gz_error(state, Z_ERRNO, zstrerror()); 431 return -1; 432 } 433 } 434 435 /* return the number of bytes read */ 436 return (int)len; 437 } 438 439 /* -- see zlib.h -- */ 440 z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, 441 gzFile file) { 442 z_size_t len; 443 gz_statep state; 444 445 /* get internal structure and check that it's for reading */ 446 if (file == NULL) 447 return 0; 448 state = (gz_statep)file; 449 if (state->mode != GZ_READ) 450 return 0; 451 452 /* check that there was no (serious) error */ 453 if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again) 454 return 0; 455 gz_error(state, Z_OK, NULL); 456 457 /* compute bytes to read -- error on overflow */ 458 len = nitems * size; 459 if (size && len / size != nitems) { 460 gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); 461 return 0; 462 } 463 464 /* read len or fewer bytes to buf, return the number of full items read */ 465 return len ? gz_read(state, buf, len) / size : 0; 466 } 467 468 /* -- see zlib.h -- */ 469 #ifdef Z_PREFIX_SET 470 # undef z_gzgetc 471 #else 472 # undef gzgetc 473 #endif 474 int ZEXPORT gzgetc(gzFile file) { 475 unsigned char buf[1]; 476 gz_statep state; 477 478 /* get internal structure and check that it's for reading */ 479 if (file == NULL) 480 return -1; 481 state = (gz_statep)file; 482 if (state->mode != GZ_READ) 483 return -1; 484 485 /* check that there was no (serious) error */ 486 if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again) 487 return -1; 488 gz_error(state, Z_OK, NULL); 489 490 /* try output buffer (no need to check for skip request) */ 491 if (state->x.have) { 492 state->x.have--; 493 state->x.pos++; 494 return *(state->x.next)++; 495 } 496 497 /* nothing there -- try gz_read() */ 498 return gz_read(state, buf, 1) < 1 ? -1 : buf[0]; 499 } 500 501 int ZEXPORT gzgetc_(gzFile file) { 502 return gzgetc(file); 503 } 504 505 /* -- see zlib.h -- */ 506 int ZEXPORT gzungetc(int c, gzFile file) { 507 gz_statep state; 508 509 /* get internal structure and check that it's for reading */ 510 if (file == NULL) 511 return -1; 512 state = (gz_statep)file; 513 if (state->mode != GZ_READ) 514 return -1; 515 516 /* in case this was just opened, set up the input buffer */ 517 if (state->how == LOOK && state->x.have == 0) 518 (void)gz_look(state); 519 520 /* check that there was no (serious) error */ 521 if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again) 522 return -1; 523 gz_error(state, Z_OK, NULL); 524 525 /* process a skip request */ 526 if (state->skip && gz_skip(state) == -1) 527 return -1; 528 529 /* can't push EOF */ 530 if (c < 0) 531 return -1; 532 533 /* if output buffer empty, put byte at end (allows more pushing) */ 534 if (state->x.have == 0) { 535 state->x.have = 1; 536 state->x.next = state->out + (state->size << 1) - 1; 537 state->x.next[0] = (unsigned char)c; 538 state->x.pos--; 539 state->past = 0; 540 return c; 541 } 542 543 /* if no room, give up (must have already done a gzungetc()) */ 544 if (state->x.have == (state->size << 1)) { 545 gz_error(state, Z_DATA_ERROR, "out of room to push characters"); 546 return -1; 547 } 548 549 /* slide output data if needed and insert byte before existing data */ 550 if (state->x.next == state->out) { 551 unsigned char *src = state->out + state->x.have; 552 unsigned char *dest = state->out + (state->size << 1); 553 554 while (src > state->out) 555 *--dest = *--src; 556 state->x.next = dest; 557 } 558 state->x.have++; 559 state->x.next--; 560 state->x.next[0] = (unsigned char)c; 561 state->x.pos--; 562 state->past = 0; 563 return c; 564 } 565 566 /* -- see zlib.h -- */ 567 char * ZEXPORT gzgets(gzFile file, char *buf, int len) { 568 unsigned left, n; 569 char *str; 570 unsigned char *eol; 571 gz_statep state; 572 573 /* check parameters, get internal structure, and check that it's for 574 reading */ 575 if (file == NULL || buf == NULL || len < 1) 576 return NULL; 577 state = (gz_statep)file; 578 if (state->mode != GZ_READ) 579 return NULL; 580 581 /* check that there was no (serious) error */ 582 if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again) 583 return NULL; 584 gz_error(state, Z_OK, NULL); 585 586 /* process a skip request */ 587 if (state->skip && gz_skip(state) == -1) 588 return NULL; 589 590 /* copy output up to a new line, len-1 bytes, or there is no more output, 591 whichever comes first */ 592 str = buf; 593 left = (unsigned)len - 1; 594 if (left) do { 595 /* assure that something is in the output buffer */ 596 if (state->x.have == 0 && gz_fetch(state) == -1) 597 break; /* error */ 598 if (state->x.have == 0) { /* end of file */ 599 state->past = 1; /* read past end */ 600 break; /* return what we have */ 601 } 602 603 /* look for end-of-line in current output buffer */ 604 n = state->x.have > left ? left : state->x.have; 605 eol = (unsigned char *)memchr(state->x.next, '\n', n); 606 if (eol != NULL) 607 n = (unsigned)(eol - state->x.next) + 1; 608 609 /* copy through end-of-line, or remainder if not found */ 610 memcpy(buf, state->x.next, n); 611 state->x.have -= n; 612 state->x.next += n; 613 state->x.pos += n; 614 left -= n; 615 buf += n; 616 } while (left && eol == NULL); 617 618 /* append a terminating zero to the string (we don't check for a zero in 619 the contents, let the user worry about that) -- return the terminated 620 string, or if nothing was read, NULL */ 621 if (buf == str) 622 return NULL; 623 buf[0] = 0; 624 return str; 625 } 626 627 /* -- see zlib.h -- */ 628 int ZEXPORT gzdirect(gzFile file) { 629 gz_statep state; 630 631 /* get internal structure */ 632 if (file == NULL) 633 return 0; 634 state = (gz_statep)file; 635 636 /* if the state is not known, but we can find out, then do so (this is 637 mainly for right after a gzopen() or gzdopen()) */ 638 if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) 639 (void)gz_look(state); 640 641 /* return 1 if transparent, 0 if processing a gzip stream */ 642 return state->direct == 1; 643 } 644 645 /* -- see zlib.h -- */ 646 int ZEXPORT gzclose_r(gzFile file) { 647 int ret, err; 648 gz_statep state; 649 650 /* get internal structure and check that it's for reading */ 651 if (file == NULL) 652 return Z_STREAM_ERROR; 653 state = (gz_statep)file; 654 if (state->mode != GZ_READ) 655 return Z_STREAM_ERROR; 656 657 /* free memory and close file */ 658 if (state->size) { 659 inflateEnd(&(state->strm)); 660 free(state->out); 661 free(state->in); 662 } 663 err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; 664 gz_error(state, Z_OK, NULL); 665 free(state->path); 666 ret = close(state->fd); 667 free(state); 668 return ret ? Z_ERRNO : err; 669 } 670