xref: /freebsd/sys/contrib/zstd/zlibWrapper/gzread.c (revision 6f63e88c0166ed3e5f2805a9e667c7d24d304cf1)
1 /* gzread.c contains minimal changes required to be compiled with zlibWrapper:
2  * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */
3 
4  /* gzread.c -- zlib functions for reading gzip files
5  * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
6  * For conditions of distribution and use, see http://www.zlib.net/zlib_license.html
7  */
8 
9 #include "gzguts.h"
10 
11 /* fix for Visual Studio, which doesn't support ssize_t type.
12  * see https://github.com/facebook/zstd/issues/1800#issuecomment-545945050 */
13 #if defined(_MSC_VER) && !defined(ssize_t)
14 #  include <BaseTsd.h>
15    typedef SSIZE_T ssize_t;
16 #endif
17 
18 
19 /* Local functions */
20 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
21 local int gz_avail OF((gz_statep));
22 local int gz_look OF((gz_statep));
23 local int gz_decomp OF((gz_statep));
24 local int gz_fetch OF((gz_statep));
25 local int gz_skip OF((gz_statep, z_off64_t));
26 local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
27 
28 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
29    state.state->fd, and update state.state->eof, state.state->err, and state.state->msg as appropriate.
30    This function needs to loop on read(), since read() is not guaranteed to
31    read the number of bytes requested, depending on the type of descriptor. */
32 local int gz_load(state, buf, len, have)
33     gz_statep state;
34     unsigned char *buf;
35     unsigned len;
36     unsigned *have;
37 {
38     ssize_t ret;
39     unsigned get, max = ((unsigned)-1 >> 2) + 1;
40 
41     *have = 0;
42     do {
43         get = len - *have;
44         if (get > max)
45             get = max;
46         ret = read(state.state->fd, buf + *have, get);
47         if (ret <= 0)
48             break;
49         *have += (unsigned)ret;
50     } while (*have < len);
51     if (ret < 0) {
52         gz_error(state, Z_ERRNO, zstrerror());
53         return -1;
54     }
55     if (ret == 0)
56         state.state->eof = 1;
57     return 0;
58 }
59 
60 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
61    error, 0 otherwise.  Note that the eof flag is set when the end of the input
62    file is reached, even though there may be unused data in the buffer.  Once
63    that data has been used, no more attempts will be made to read the file.
64    If strm->avail_in != 0, then the current data is moved to the beginning of
65    the input buffer, and then the remainder of the buffer is loaded with the
66    available data from the input file. */
67 local int gz_avail(state)
68     gz_statep state;
69 {
70     unsigned got;
71     z_streamp strm = &(state.state->strm);
72 
73     if (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR)
74         return -1;
75     if (state.state->eof == 0) {
76         if (strm->avail_in) {       /* copy what's there to the start */
77             unsigned char *p = state.state->in;
78             unsigned const char *q = strm->next_in;
79             unsigned n = strm->avail_in;
80             do {
81                 *p++ = *q++;
82             } while (--n);
83         }
84         if (gz_load(state, state.state->in + strm->avail_in,
85                     state.state->size - strm->avail_in, &got) == -1)
86             return -1;
87         strm->avail_in += got;
88         strm->next_in = state.state->in;
89     }
90     return 0;
91 }
92 
93 /* Look for gzip header, set up for inflate or copy.  state.state->x.have must be 0.
94    If this is the first time in, allocate required memory.  state.state->how will be
95    left unchanged if there is no more input data available, will be set to COPY
96    if there is no gzip header and direct copying will be performed, or it will
97    be set to GZIP for decompression.  If direct copying, then leftover input
98    data from the input buffer will be copied to the output buffer.  In that
99    case, all further file reads will be directly to either the output buffer or
100    a user buffer.  If decompressing, the inflate state will be initialized.
101    gz_look() will return 0 on success or -1 on failure. */
102 local int gz_look(state)
103     gz_statep state;
104 {
105     z_streamp strm = &(state.state->strm);
106 
107     /* allocate read buffers and inflate memory */
108     if (state.state->size == 0) {
109         /* allocate buffers */
110         state.state->in = (unsigned char *)malloc(state.state->want);
111         state.state->out = (unsigned char *)malloc(state.state->want << 1);
112         if (state.state->in == NULL || state.state->out == NULL) {
113             free(state.state->out);
114             free(state.state->in);
115             gz_error(state, Z_MEM_ERROR, "out of memory");
116             return -1;
117         }
118         state.state->size = state.state->want;
119 
120         /* allocate inflate memory */
121         state.state->strm.zalloc = Z_NULL;
122         state.state->strm.zfree = Z_NULL;
123         state.state->strm.opaque = Z_NULL;
124         state.state->strm.avail_in = 0;
125         state.state->strm.next_in = Z_NULL;
126         if (inflateInit2(&(state.state->strm), 15 + 16) != Z_OK) {    /* gunzip */
127             free(state.state->out);
128             free(state.state->in);
129             state.state->size = 0;
130             gz_error(state, Z_MEM_ERROR, "out of memory");
131             return -1;
132         }
133     }
134 
135     /* get at least the magic bytes in the input buffer */
136     if (strm->avail_in < 2) {
137         if (gz_avail(state) == -1)
138             return -1;
139         if (strm->avail_in == 0)
140             return 0;
141     }
142 
143     /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
144        a logical dilemma here when considering the case of a partially written
145        gzip file, to wit, if a single 31 byte is written, then we cannot tell
146        whether this is a single-byte file, or just a partially written gzip
147        file -- for here we assume that if a gzip file is being written, then
148        the header will be written in a single operation, so that reading a
149        single byte is sufficient indication that it is not a gzip file) */
150     if (strm->avail_in > 1 &&
151             ((strm->next_in[0] == 31 && strm->next_in[1] == 139) /* gz header */
152             || (strm->next_in[0] == 40 && strm->next_in[1] == 181))) { /* zstd header */
153         inflateReset(strm);
154         state.state->how = GZIP;
155         state.state->direct = 0;
156         return 0;
157     }
158 
159     /* no gzip header -- if we were decoding gzip before, then this is trailing
160        garbage.  Ignore the trailing garbage and finish. */
161     if (state.state->direct == 0) {
162         strm->avail_in = 0;
163         state.state->eof = 1;
164         state.state->x.have = 0;
165         return 0;
166     }
167 
168     /* doing raw i/o, copy any leftover input to output -- this assumes that
169        the output buffer is larger than the input buffer, which also assures
170        space for gzungetc() */
171     state.state->x.next = state.state->out;
172     if (strm->avail_in) {
173         memcpy(state.state->x.next, strm->next_in, strm->avail_in);
174         state.state->x.have = strm->avail_in;
175         strm->avail_in = 0;
176     }
177     state.state->how = COPY;
178     state.state->direct = 1;
179     return 0;
180 }
181 
182 /* Decompress from input to the provided next_out and avail_out in the state.
183    On return, state.state->x.have and state.state->x.next point to the just decompressed
184    data.  If the gzip stream completes, state.state->how is reset to LOOK to look for
185    the next gzip stream or raw data, once state.state->x.have is depleted.  Returns 0
186    on success, -1 on failure. */
187 local int gz_decomp(state)
188     gz_statep state;
189 {
190     int ret = Z_OK;
191     unsigned had;
192     z_streamp strm = &(state.state->strm);
193 
194     /* fill output buffer up to end of deflate stream */
195     had = strm->avail_out;
196     do {
197         /* get more input for inflate() */
198         if (strm->avail_in == 0 && gz_avail(state) == -1)
199             return -1;
200         if (strm->avail_in == 0) {
201             gz_error(state, Z_BUF_ERROR, "unexpected end of file");
202             break;
203         }
204 
205         /* decompress and handle errors */
206         ret = inflate(strm, Z_NO_FLUSH);
207         if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
208             gz_error(state, Z_STREAM_ERROR,
209                      "internal error: inflate stream corrupt");
210             return -1;
211         }
212         if (ret == Z_MEM_ERROR) {
213             gz_error(state, Z_MEM_ERROR, "out of memory");
214             return -1;
215         }
216         if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
217             gz_error(state, Z_DATA_ERROR,
218                      strm->msg == NULL ? "compressed data error" : strm->msg);
219             return -1;
220         }
221     } while (strm->avail_out && ret != Z_STREAM_END);
222 
223     /* update available output */
224     state.state->x.have = had - strm->avail_out;
225     state.state->x.next = strm->next_out - state.state->x.have;
226 
227     /* if the gzip stream completed successfully, look for another */
228     if (ret == Z_STREAM_END)
229         state.state->how = LOOK;
230 
231     /* good decompression */
232     return 0;
233 }
234 
235 /* Fetch data and put it in the output buffer.  Assumes state.state->x.have is 0.
236    Data is either copied from the input file or decompressed from the input
237    file depending on state.state->how.  If state.state->how is LOOK, then a gzip header is
238    looked for to determine whether to copy or decompress.  Returns -1 on error,
239    otherwise 0.  gz_fetch() will leave state.state->how as COPY or GZIP unless the
240    end of the input file has been reached and all data has been processed.  */
241 local int gz_fetch(state)
242     gz_statep state;
243 {
244     z_streamp strm = &(state.state->strm);
245 
246     do {
247         switch(state.state->how) {
248         case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
249             if (gz_look(state) == -1)
250                 return -1;
251             if (state.state->how == LOOK)
252                 return 0;
253             break;
254         case COPY:      /* -> COPY */
255             if (gz_load(state, state.state->out, state.state->size << 1, &(state.state->x.have))
256                     == -1)
257                 return -1;
258             state.state->x.next = state.state->out;
259             return 0;
260         case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
261             strm->avail_out = state.state->size << 1;
262             strm->next_out = state.state->out;
263             if (gz_decomp(state) == -1)
264                 return -1;
265         }
266     } while (state.state->x.have == 0 && (!state.state->eof || strm->avail_in));
267     return 0;
268 }
269 
270 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
271 local int gz_skip(state, len)
272     gz_statep state;
273     z_off64_t len;
274 {
275     unsigned n;
276 
277     /* skip over len bytes or reach end-of-file, whichever comes first */
278     while (len)
279         /* skip over whatever is in output buffer */
280         if (state.state->x.have) {
281             n = GT_OFF(state.state->x.have) || (z_off64_t)state.state->x.have > len ?
282                 (unsigned)len : state.state->x.have;
283             state.state->x.have -= n;
284             state.state->x.next += n;
285             state.state->x.pos += n;
286             len -= n;
287         }
288 
289         /* output buffer empty -- return if we're at the end of the input */
290         else if (state.state->eof && state.state->strm.avail_in == 0)
291             break;
292 
293         /* need more data to skip -- load up output buffer */
294         else {
295             /* get more output, looking for header if required */
296             if (gz_fetch(state) == -1)
297                 return -1;
298         }
299     return 0;
300 }
301 
302 /* Read len bytes into buf from file, or less than len up to the end of the
303    input.  Return the number of bytes read.  If zero is returned, either the
304    end of file was reached, or there was an error.  state.state->err must be
305    consulted in that case to determine which. */
306 local z_size_t gz_read(state, buf, len)
307     gz_statep state;
308     voidp buf;
309     z_size_t len;
310 {
311     z_size_t got;
312     unsigned n;
313 
314     /* if len is zero, avoid unnecessary operations */
315     if (len == 0)
316         return 0;
317 
318     /* process a skip request */
319     if (state.state->seek) {
320         state.state->seek = 0;
321         if (gz_skip(state, state.state->skip) == -1)
322             return 0;
323     }
324 
325     /* get len bytes to buf, or less than len if at the end */
326     got = 0;
327     do {
328         /* set n to the maximum amount of len that fits in an unsigned int */
329         n = -1;
330         if (n > len)
331             n = (unsigned)len;
332 
333         /* first just try copying data from the output buffer */
334         if (state.state->x.have) {
335             if (state.state->x.have < n)
336                 n = state.state->x.have;
337             memcpy(buf, state.state->x.next, n);
338             state.state->x.next += n;
339             state.state->x.have -= n;
340         }
341 
342         /* output buffer empty -- return if we're at the end of the input */
343         else if (state.state->eof && state.state->strm.avail_in == 0) {
344             state.state->past = 1;        /* tried to read past end */
345             break;
346         }
347 
348         /* need output data -- for small len or new stream load up our output
349            buffer */
350         else if (state.state->how == LOOK || n < (state.state->size << 1)) {
351             /* get more output, looking for header if required */
352             if (gz_fetch(state) == -1)
353                 return 0;
354             continue;       /* no progress yet -- go back to copy above */
355             /* the copy above assures that we will leave with space in the
356                output buffer, allowing at least one gzungetc() to succeed */
357         }
358 
359         /* large len -- read directly into user buffer */
360         else if (state.state->how == COPY) {      /* read directly */
361             if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
362                 return 0;
363         }
364 
365         /* large len -- decompress directly into user buffer */
366         else {  /* state.state->how == GZIP */
367             state.state->strm.avail_out = n;
368             state.state->strm.next_out = (unsigned char *)buf;
369             if (gz_decomp(state) == -1)
370                 return 0;
371             n = state.state->x.have;
372             state.state->x.have = 0;
373         }
374 
375         /* update progress */
376         len -= n;
377         buf = (char *)buf + n;
378         got += n;
379         state.state->x.pos += n;
380     } while (len);
381 
382     /* return number of bytes read into user buffer */
383     return got;
384 }
385 
386 /* -- see zlib.h -- */
387 int ZEXPORT gzread(file, buf, len)
388     gzFile file;
389     voidp buf;
390     unsigned len;
391 {
392     gz_statep state;
393 
394     /* get internal structure */
395     if (file == NULL)
396         return -1;
397     state.file = file;
398 
399     /* check that we're reading and that there's no (serious) error */
400     if (state.state->mode != GZ_READ ||
401             (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR))
402         return -1;
403 
404     /* since an int is returned, make sure len fits in one, otherwise return
405        with an error (this avoids a flaw in the interface) */
406     if ((int)len < 0) {
407         gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
408         return -1;
409     }
410 
411     /* read len or fewer bytes to buf */
412     len = (unsigned)gz_read(state, buf, len);
413 
414     /* check for an error */
415     if (len == 0 && state.state->err != Z_OK && state.state->err != Z_BUF_ERROR)
416         return -1;
417 
418     /* return the number of bytes read (this is assured to fit in an int) */
419     return (int)len;
420 }
421 
422 /* -- see zlib.h -- */
423 z_size_t ZEXPORT gzfread(buf, size, nitems, file)
424     voidp buf;
425     z_size_t size;
426     z_size_t nitems;
427     gzFile file;
428 {
429     z_size_t len;
430     gz_statep state;
431 
432     /* get internal structure */
433     if (file == NULL)
434         return 0;
435     state.file = file;
436 
437     /* check that we're reading and that there's no (serious) error */
438     if (state.state->mode != GZ_READ ||
439             (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR))
440         return 0;
441 
442     /* compute bytes to read -- error on overflow */
443     len = nitems * size;
444     if (size && len / size != nitems) {
445         gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
446         return 0;
447     }
448 
449     /* read len or fewer bytes to buf, return the number of full items read */
450     return len ? gz_read(state, buf, len) / size : 0;
451 }
452 
453 /* -- see zlib.h -- */
454 #if ZLIB_VERNUM >= 0x1261
455 #ifdef Z_PREFIX_SET
456 #  undef z_gzgetc
457 #else
458 #  undef gzgetc
459 #endif
460 #endif
461 
462 #if ZLIB_VERNUM == 0x1260
463 #  undef gzgetc
464 #endif
465 
466 #if ZLIB_VERNUM <= 0x1250
467 ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
468 ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));
469 #endif
470 
471 int ZEXPORT gzgetc(file)
472     gzFile file;
473 {
474     int ret;
475     unsigned char buf[1];
476     gz_statep state;
477 
478     /* get internal structure */
479     if (file == NULL)
480         return -1;
481     state.file = file;
482 
483     /* check that we're reading and that there's no (serious) error */
484     if (state.state->mode != GZ_READ ||
485         (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR))
486         return -1;
487 
488     /* try output buffer (no need to check for skip request) */
489     if (state.state->x.have) {
490         state.state->x.have--;
491         state.state->x.pos++;
492         return *(state.state->x.next)++;
493     }
494 
495     /* nothing there -- try gz_read() */
496     ret = (int)gz_read(state, buf, 1);
497     return ret < 1 ? -1 : buf[0];
498 }
499 
500 int ZEXPORT gzgetc_(file)
501 gzFile file;
502 {
503     return gzgetc(file);
504 }
505 
506 /* -- see zlib.h -- */
507 int ZEXPORT gzungetc(c, file)
508     int c;
509     gzFile file;
510 {
511     gz_statep state;
512 
513     /* get internal structure */
514     if (file == NULL)
515         return -1;
516     state.file = file;
517 
518     /* check that we're reading and that there's no (serious) error */
519     if (state.state->mode != GZ_READ ||
520         (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR))
521         return -1;
522 
523     /* process a skip request */
524     if (state.state->seek) {
525         state.state->seek = 0;
526         if (gz_skip(state, state.state->skip) == -1)
527             return -1;
528     }
529 
530     /* can't push EOF */
531     if (c < 0)
532         return -1;
533 
534     /* if output buffer empty, put byte at end (allows more pushing) */
535     if (state.state->x.have == 0) {
536         state.state->x.have = 1;
537         state.state->x.next = state.state->out + (state.state->size << 1) - 1;
538         state.state->x.next[0] = (unsigned char)c;
539         state.state->x.pos--;
540         state.state->past = 0;
541         return c;
542     }
543 
544     /* if no room, give up (must have already done a gzungetc()) */
545     if (state.state->x.have == (state.state->size << 1)) {
546         gz_error(state, Z_DATA_ERROR, "out of room to push characters");
547         return -1;
548     }
549 
550     /* slide output data if needed and insert byte before existing data */
551     if (state.state->x.next == state.state->out) {
552         unsigned char *src = state.state->out + state.state->x.have;
553         unsigned char *dest = state.state->out + (state.state->size << 1);
554         while (src > state.state->out)
555             *--dest = *--src;
556         state.state->x.next = dest;
557     }
558     state.state->x.have++;
559     state.state->x.next--;
560     state.state->x.next[0] = (unsigned char)c;
561     state.state->x.pos--;
562     state.state->past = 0;
563     return c;
564 }
565 
566 /* -- see zlib.h -- */
567 char * ZEXPORT gzgets(file, buf, len)
568     gzFile file;
569     char *buf;
570     int len;
571 {
572     unsigned left, n;
573     char *str;
574     unsigned char *eol;
575     gz_statep state;
576 
577     /* check parameters and get internal structure */
578     if (file == NULL || buf == NULL || len < 1)
579         return NULL;
580     state.file = file;
581 
582     /* check that we're reading and that there's no (serious) error */
583     if (state.state->mode != GZ_READ ||
584         (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR))
585         return NULL;
586 
587     /* process a skip request */
588     if (state.state->seek) {
589         state.state->seek = 0;
590         if (gz_skip(state, state.state->skip) == -1)
591             return NULL;
592     }
593 
594     /* copy output bytes up to new line or len - 1, whichever comes first --
595        append a terminating zero to the string (we don't check for a zero in
596        the contents, let the user worry about that) */
597     str = buf;
598     left = (unsigned)len - 1;
599     if (left) do {
600         /* assure that something is in the output buffer */
601         if (state.state->x.have == 0 && gz_fetch(state) == -1)
602             return NULL;                /* error */
603         if (state.state->x.have == 0) {       /* end of file */
604             state.state->past = 1;            /* read past end */
605             break;                      /* return what we have */
606         }
607 
608         /* look for end-of-line in current output buffer */
609         n = state.state->x.have > left ? left : state.state->x.have;
610         eol = (unsigned char *)memchr(state.state->x.next, '\n', n);
611         if (eol != NULL)
612             n = (unsigned)(eol - state.state->x.next) + 1;
613 
614         /* copy through end-of-line, or remainder if not found */
615         memcpy(buf, state.state->x.next, n);
616         state.state->x.have -= n;
617         state.state->x.next += n;
618         state.state->x.pos += n;
619         left -= n;
620         buf += n;
621     } while (left && eol == NULL);
622 
623     /* return terminated string, or if nothing, end of file */
624     if (buf == str)
625         return NULL;
626     buf[0] = 0;
627     return str;
628 }
629 
630 /* -- see zlib.h -- */
631 int ZEXPORT gzdirect(file)
632     gzFile file;
633 {
634     gz_statep state;
635 
636     /* get internal structure */
637     if (file == NULL)
638         return 0;
639     state.file = file;
640 
641     /* if the state is not known, but we can find out, then do so (this is
642        mainly for right after a gzopen() or gzdopen()) */
643     if (state.state->mode == GZ_READ && state.state->how == LOOK && state.state->x.have == 0)
644         (void)gz_look(state);
645 
646     /* return 1 if transparent, 0 if processing a gzip stream */
647     return state.state->direct;
648 }
649 
650 /* -- see zlib.h -- */
651 int ZEXPORT gzclose_r(file)
652     gzFile file;
653 {
654     int ret, err;
655     gz_statep state;
656 
657     /* get internal structure */
658     if (file == NULL)
659         return Z_STREAM_ERROR;
660     state.file = file;
661 
662     /* check that we're reading */
663     if (state.state->mode != GZ_READ)
664         return Z_STREAM_ERROR;
665 
666     /* free memory and close file */
667     if (state.state->size) {
668         inflateEnd(&(state.state->strm));
669         free(state.state->out);
670         free(state.state->in);
671     }
672     err = state.state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
673     gz_error(state, Z_OK, NULL);
674     free(state.state->path);
675     ret = close(state.state->fd);
676     free(state.state);
677     return ret ? Z_ERRNO : err;
678 }
679