xref: /freebsd/sys/contrib/zlib/gzread.c (revision 51015e6d0f570239b0c2088dc6cf2b018928375d)
1 /* gzread.c -- zlib functions for reading gzip files
2  * Copyright (C) 2004-2017 Mark Adler
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  */
5 
6 /* $FreeBSD$ */
7 
8 #include "gzguts.h"
9 #include <unistd.h>
10 
11 /* Local functions */
12 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
13 local int gz_avail OF((gz_statep));
14 local int gz_look OF((gz_statep));
15 local int gz_decomp OF((gz_statep));
16 local int gz_fetch OF((gz_statep));
17 local int gz_skip OF((gz_statep, z_off64_t));
18 local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
19 
20 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
21    state->fd, and update state->eof, state->err, and state->msg as appropriate.
22    This function needs to loop on read(), since read() is not guaranteed to
23    read the number of bytes requested, depending on the type of descriptor. */
24 local int gz_load(state, buf, len, have)
25     gz_statep state;
26     unsigned char *buf;
27     unsigned len;
28     unsigned *have;
29 {
30     int ret;
31     unsigned get, max = ((unsigned)-1 >> 2) + 1;
32 
33     *have = 0;
34     do {
35         get = len - *have;
36         if (get > max)
37             get = max;
38         ret = read(state->fd, buf + *have, get);
39         if (ret <= 0)
40             break;
41         *have += (unsigned)ret;
42     } while (*have < len);
43     if (ret < 0) {
44         gz_error(state, Z_ERRNO, zstrerror());
45         return -1;
46     }
47     if (ret == 0)
48         state->eof = 1;
49     return 0;
50 }
51 
52 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
53    error, 0 otherwise.  Note that the eof flag is set when the end of the input
54    file is reached, even though there may be unused data in the buffer.  Once
55    that data has been used, no more attempts will be made to read the file.
56    If strm->avail_in != 0, then the current data is moved to the beginning of
57    the input buffer, and then the remainder of the buffer is loaded with the
58    available data from the input file. */
59 local int gz_avail(state)
60     gz_statep state;
61 {
62     unsigned got;
63     z_streamp strm = &(state->strm);
64 
65     if (state->err != Z_OK && state->err != Z_BUF_ERROR)
66         return -1;
67     if (state->eof == 0) {
68         if (strm->avail_in) {       /* copy what's there to the start */
69             unsigned char *p = state->in;
70             unsigned const char *q = strm->next_in;
71             unsigned n = strm->avail_in;
72             do {
73                 *p++ = *q++;
74             } while (--n);
75         }
76         if (gz_load(state, state->in + strm->avail_in,
77                     state->size - strm->avail_in, &got) == -1)
78             return -1;
79         strm->avail_in += got;
80         strm->next_in = state->in;
81     }
82     return 0;
83 }
84 
85 /* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
86    If this is the first time in, allocate required memory.  state->how will be
87    left unchanged if there is no more input data available, will be set to COPY
88    if there is no gzip header and direct copying will be performed, or it will
89    be set to GZIP for decompression.  If direct copying, then leftover input
90    data from the input buffer will be copied to the output buffer.  In that
91    case, all further file reads will be directly to either the output buffer or
92    a user buffer.  If decompressing, the inflate state will be initialized.
93    gz_look() will return 0 on success or -1 on failure. */
94 local int gz_look(state)
95     gz_statep state;
96 {
97     z_streamp strm = &(state->strm);
98 
99     /* allocate read buffers and inflate memory */
100     if (state->size == 0) {
101         /* allocate buffers */
102         state->in = (unsigned char *)malloc(state->want);
103         state->out = (unsigned char *)malloc(state->want << 1);
104         if (state->in == NULL || state->out == NULL) {
105             free(state->out);
106             free(state->in);
107             gz_error(state, Z_MEM_ERROR, "out of memory");
108             return -1;
109         }
110         state->size = state->want;
111 
112         /* allocate inflate memory */
113         state->strm.zalloc = Z_NULL;
114         state->strm.zfree = Z_NULL;
115         state->strm.opaque = Z_NULL;
116         state->strm.avail_in = 0;
117         state->strm.next_in = Z_NULL;
118         if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
119             free(state->out);
120             free(state->in);
121             state->size = 0;
122             gz_error(state, Z_MEM_ERROR, "out of memory");
123             return -1;
124         }
125     }
126 
127     /* get at least the magic bytes in the input buffer */
128     if (strm->avail_in < 2) {
129         if (gz_avail(state) == -1)
130             return -1;
131         if (strm->avail_in == 0)
132             return 0;
133     }
134 
135     /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
136        a logical dilemma here when considering the case of a partially written
137        gzip file, to wit, if a single 31 byte is written, then we cannot tell
138        whether this is a single-byte file, or just a partially written gzip
139        file -- for here we assume that if a gzip file is being written, then
140        the header will be written in a single operation, so that reading a
141        single byte is sufficient indication that it is not a gzip file) */
142     if (strm->avail_in > 1 &&
143             strm->next_in[0] == 31 && strm->next_in[1] == 139) {
144         inflateReset(strm);
145         state->how = GZIP;
146         state->direct = 0;
147         return 0;
148     }
149 
150     /* no gzip header -- if we were decoding gzip before, then this is trailing
151        garbage.  Ignore the trailing garbage and finish. */
152     if (state->direct == 0) {
153         strm->avail_in = 0;
154         state->eof = 1;
155         state->x.have = 0;
156         return 0;
157     }
158 
159     /* doing raw i/o, copy any leftover input to output -- this assumes that
160        the output buffer is larger than the input buffer, which also assures
161        space for gzungetc() */
162     state->x.next = state->out;
163     memcpy(state->x.next, strm->next_in, strm->avail_in);
164     state->x.have = strm->avail_in;
165     strm->avail_in = 0;
166     state->how = COPY;
167     state->direct = 1;
168     return 0;
169 }
170 
171 /* Decompress from input to the provided next_out and avail_out in the state.
172    On return, state->x.have and state->x.next point to the just decompressed
173    data.  If the gzip stream completes, state->how is reset to LOOK to look for
174    the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
175    on success, -1 on failure. */
176 local int gz_decomp(state)
177     gz_statep state;
178 {
179     int ret = Z_OK;
180     unsigned had;
181     z_streamp strm = &(state->strm);
182 
183     /* fill output buffer up to end of deflate stream */
184     had = strm->avail_out;
185     do {
186         /* get more input for inflate() */
187         if (strm->avail_in == 0 && gz_avail(state) == -1)
188             return -1;
189         if (strm->avail_in == 0) {
190             gz_error(state, Z_BUF_ERROR, "unexpected end of file");
191             break;
192         }
193 
194         /* decompress and handle errors */
195         ret = inflate(strm, Z_NO_FLUSH);
196         if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
197             gz_error(state, Z_STREAM_ERROR,
198                      "internal error: inflate stream corrupt");
199             return -1;
200         }
201         if (ret == Z_MEM_ERROR) {
202             gz_error(state, Z_MEM_ERROR, "out of memory");
203             return -1;
204         }
205         if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
206             gz_error(state, Z_DATA_ERROR,
207                      strm->msg == NULL ? "compressed data error" : strm->msg);
208             return -1;
209         }
210     } while (strm->avail_out && ret != Z_STREAM_END);
211 
212     /* update available output */
213     state->x.have = had - strm->avail_out;
214     state->x.next = strm->next_out - state->x.have;
215 
216     /* if the gzip stream completed successfully, look for another */
217     if (ret == Z_STREAM_END)
218         state->how = LOOK;
219 
220     /* good decompression */
221     return 0;
222 }
223 
224 /* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
225    Data is either copied from the input file or decompressed from the input
226    file depending on state->how.  If state->how is LOOK, then a gzip header is
227    looked for to determine whether to copy or decompress.  Returns -1 on error,
228    otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
229    end of the input file has been reached and all data has been processed.  */
230 local int gz_fetch(state)
231     gz_statep state;
232 {
233     z_streamp strm = &(state->strm);
234 
235     do {
236         switch(state->how) {
237         case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
238             if (gz_look(state) == -1)
239                 return -1;
240             if (state->how == LOOK)
241                 return 0;
242             break;
243         case COPY:      /* -> COPY */
244             if (gz_load(state, state->out, state->size << 1, &(state->x.have))
245                     == -1)
246                 return -1;
247             state->x.next = state->out;
248             return 0;
249         case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
250             strm->avail_out = state->size << 1;
251             strm->next_out = state->out;
252             if (gz_decomp(state) == -1)
253                 return -1;
254         }
255     } while (state->x.have == 0 && (!state->eof || strm->avail_in));
256     return 0;
257 }
258 
259 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
260 local int gz_skip(state, len)
261     gz_statep state;
262     z_off64_t len;
263 {
264     unsigned n;
265 
266     /* skip over len bytes or reach end-of-file, whichever comes first */
267     while (len)
268         /* skip over whatever is in output buffer */
269         if (state->x.have) {
270             n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
271                 (unsigned)len : state->x.have;
272             state->x.have -= n;
273             state->x.next += n;
274             state->x.pos += n;
275             len -= n;
276         }
277 
278         /* output buffer empty -- return if we're at the end of the input */
279         else if (state->eof && state->strm.avail_in == 0)
280             break;
281 
282         /* need more data to skip -- load up output buffer */
283         else {
284             /* get more output, looking for header if required */
285             if (gz_fetch(state) == -1)
286                 return -1;
287         }
288     return 0;
289 }
290 
291 /* Read len bytes into buf from file, or less than len up to the end of the
292    input.  Return the number of bytes read.  If zero is returned, either the
293    end of file was reached, or there was an error.  state->err must be
294    consulted in that case to determine which. */
295 local z_size_t gz_read(state, buf, len)
296     gz_statep state;
297     voidp buf;
298     z_size_t len;
299 {
300     z_size_t got;
301     unsigned n;
302 
303     /* if len is zero, avoid unnecessary operations */
304     if (len == 0)
305         return 0;
306 
307     /* process a skip request */
308     if (state->seek) {
309         state->seek = 0;
310         if (gz_skip(state, state->skip) == -1)
311             return 0;
312     }
313 
314     /* get len bytes to buf, or less than len if at the end */
315     got = 0;
316     do {
317         /* set n to the maximum amount of len that fits in an unsigned int */
318         n = (unsigned)-1;
319         if (n > len)
320             n = (unsigned)len;
321 
322         /* first just try copying data from the output buffer */
323         if (state->x.have) {
324             if (state->x.have < n)
325                 n = state->x.have;
326             memcpy(buf, state->x.next, n);
327             state->x.next += n;
328             state->x.have -= n;
329         }
330 
331         /* output buffer empty -- return if we're at the end of the input */
332         else if (state->eof && state->strm.avail_in == 0) {
333             state->past = 1;        /* tried to read past end */
334             break;
335         }
336 
337         /* need output data -- for small len or new stream load up our output
338            buffer */
339         else if (state->how == LOOK || n < (state->size << 1)) {
340             /* get more output, looking for header if required */
341             if (gz_fetch(state) == -1)
342                 return 0;
343             continue;       /* no progress yet -- go back to copy above */
344             /* the copy above assures that we will leave with space in the
345                output buffer, allowing at least one gzungetc() to succeed */
346         }
347 
348         /* large len -- read directly into user buffer */
349         else if (state->how == COPY) {      /* read directly */
350             if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
351                 return 0;
352         }
353 
354         /* large len -- decompress directly into user buffer */
355         else {  /* state->how == GZIP */
356             state->strm.avail_out = n;
357             state->strm.next_out = (unsigned char *)buf;
358             if (gz_decomp(state) == -1)
359                 return 0;
360             n = state->x.have;
361             state->x.have = 0;
362         }
363 
364         /* update progress */
365         len -= n;
366         buf = (char *)buf + n;
367         got += n;
368         state->x.pos += n;
369     } while (len);
370 
371     /* return number of bytes read into user buffer */
372     return got;
373 }
374 
375 /* -- see zlib.h -- */
376 int ZEXPORT gzread(file, buf, len)
377     gzFile file;
378     voidp buf;
379     unsigned len;
380 {
381     gz_statep state;
382 
383     /* get internal structure */
384     if (file == NULL)
385         return -1;
386     state = (gz_statep)file;
387 
388     /* check that we're reading and that there's no (serious) error */
389     if (state->mode != GZ_READ ||
390             (state->err != Z_OK && state->err != Z_BUF_ERROR))
391         return -1;
392 
393     /* since an int is returned, make sure len fits in one, otherwise return
394        with an error (this avoids a flaw in the interface) */
395     if ((int)len < 0) {
396         gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
397         return -1;
398     }
399 
400     /* read len or fewer bytes to buf */
401     len = (unsigned)gz_read(state, buf, len);
402 
403     /* check for an error */
404     if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
405         return -1;
406 
407     /* return the number of bytes read (this is assured to fit in an int) */
408     return (int)len;
409 }
410 
411 /* -- see zlib.h -- */
412 z_size_t ZEXPORT gzfread(buf, size, nitems, file)
413     voidp buf;
414     z_size_t size;
415     z_size_t nitems;
416     gzFile file;
417 {
418     z_size_t len;
419     gz_statep state;
420 
421     /* get internal structure */
422     if (file == NULL)
423         return 0;
424     state = (gz_statep)file;
425 
426     /* check that we're reading and that there's no (serious) error */
427     if (state->mode != GZ_READ ||
428             (state->err != Z_OK && state->err != Z_BUF_ERROR))
429         return 0;
430 
431     /* compute bytes to read -- error on overflow */
432     len = nitems * size;
433     if (size && len / size != nitems) {
434         gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
435         return 0;
436     }
437 
438     /* read len or fewer bytes to buf, return the number of full items read */
439     return len ? gz_read(state, buf, len) / size : 0;
440 }
441 
442 /* -- see zlib.h -- */
443 #ifdef Z_PREFIX_SET
444 #  undef z_gzgetc
445 #else
446 #  undef gzgetc
447 #endif
448 int ZEXPORT gzgetc(file)
449     gzFile file;
450 {
451     unsigned char buf[1];
452     gz_statep state;
453 
454     /* get internal structure */
455     if (file == NULL)
456         return -1;
457     state = (gz_statep)file;
458 
459     /* check that we're reading and that there's no (serious) error */
460     if (state->mode != GZ_READ ||
461         (state->err != Z_OK && state->err != Z_BUF_ERROR))
462         return -1;
463 
464     /* try output buffer (no need to check for skip request) */
465     if (state->x.have) {
466         state->x.have--;
467         state->x.pos++;
468         return *(state->x.next)++;
469     }
470 
471     /* nothing there -- try gz_read() */
472     return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
473 }
474 
475 int ZEXPORT gzgetc_(file)
476 gzFile file;
477 {
478     return gzgetc(file);
479 }
480 
481 /* -- see zlib.h -- */
482 int ZEXPORT gzungetc(c, file)
483     int c;
484     gzFile file;
485 {
486     gz_statep state;
487 
488     /* get internal structure */
489     if (file == NULL)
490         return -1;
491     state = (gz_statep)file;
492 
493     /* check that we're reading and that there's no (serious) error */
494     if (state->mode != GZ_READ ||
495         (state->err != Z_OK && state->err != Z_BUF_ERROR))
496         return -1;
497 
498     /* process a skip request */
499     if (state->seek) {
500         state->seek = 0;
501         if (gz_skip(state, state->skip) == -1)
502             return -1;
503     }
504 
505     /* can't push EOF */
506     if (c < 0)
507         return -1;
508 
509     /* if output buffer empty, put byte at end (allows more pushing) */
510     if (state->x.have == 0) {
511         state->x.have = 1;
512         state->x.next = state->out + (state->size << 1) - 1;
513         state->x.next[0] = (unsigned char)c;
514         state->x.pos--;
515         state->past = 0;
516         return c;
517     }
518 
519     /* if no room, give up (must have already done a gzungetc()) */
520     if (state->x.have == (state->size << 1)) {
521         gz_error(state, Z_DATA_ERROR, "out of room to push characters");
522         return -1;
523     }
524 
525     /* slide output data if needed and insert byte before existing data */
526     if (state->x.next == state->out) {
527         unsigned char *src = state->out + state->x.have;
528         unsigned char *dest = state->out + (state->size << 1);
529         while (src > state->out)
530             *--dest = *--src;
531         state->x.next = dest;
532     }
533     state->x.have++;
534     state->x.next--;
535     state->x.next[0] = (unsigned char)c;
536     state->x.pos--;
537     state->past = 0;
538     return c;
539 }
540 
541 /* -- see zlib.h -- */
542 char * ZEXPORT gzgets(file, buf, len)
543     gzFile file;
544     char *buf;
545     int len;
546 {
547     unsigned left, n;
548     char *str;
549     unsigned char *eol;
550     gz_statep state;
551 
552     /* check parameters and get internal structure */
553     if (file == NULL || buf == NULL || len < 1)
554         return NULL;
555     state = (gz_statep)file;
556 
557     /* check that we're reading and that there's no (serious) error */
558     if (state->mode != GZ_READ ||
559         (state->err != Z_OK && state->err != Z_BUF_ERROR))
560         return NULL;
561 
562     /* process a skip request */
563     if (state->seek) {
564         state->seek = 0;
565         if (gz_skip(state, state->skip) == -1)
566             return NULL;
567     }
568 
569     /* copy output bytes up to new line or len - 1, whichever comes first --
570        append a terminating zero to the string (we don't check for a zero in
571        the contents, let the user worry about that) */
572     str = buf;
573     left = (unsigned)len - 1;
574     if (left) do {
575         /* assure that something is in the output buffer */
576         if (state->x.have == 0 && gz_fetch(state) == -1)
577             return NULL;                /* error */
578         if (state->x.have == 0) {       /* end of file */
579             state->past = 1;            /* read past end */
580             break;                      /* return what we have */
581         }
582 
583         /* look for end-of-line in current output buffer */
584         n = state->x.have > left ? left : state->x.have;
585         eol = (unsigned char *)memchr(state->x.next, '\n', n);
586         if (eol != NULL)
587             n = (unsigned)(eol - state->x.next) + 1;
588 
589         /* copy through end-of-line, or remainder if not found */
590         memcpy(buf, state->x.next, n);
591         state->x.have -= n;
592         state->x.next += n;
593         state->x.pos += n;
594         left -= n;
595         buf += n;
596     } while (left && eol == NULL);
597 
598     /* return terminated string, or if nothing, end of file */
599     if (buf == str)
600         return NULL;
601     buf[0] = 0;
602     return str;
603 }
604 
605 /* -- see zlib.h -- */
606 int ZEXPORT gzdirect(file)
607     gzFile file;
608 {
609     gz_statep state;
610 
611     /* get internal structure */
612     if (file == NULL)
613         return 0;
614     state = (gz_statep)file;
615 
616     /* if the state is not known, but we can find out, then do so (this is
617        mainly for right after a gzopen() or gzdopen()) */
618     if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
619         (void)gz_look(state);
620 
621     /* return 1 if transparent, 0 if processing a gzip stream */
622     return state->direct;
623 }
624 
625 /* -- see zlib.h -- */
626 int ZEXPORT gzclose_r(file)
627     gzFile file;
628 {
629     int ret, err;
630     gz_statep state;
631 
632     /* get internal structure */
633     if (file == NULL)
634         return Z_STREAM_ERROR;
635     state = (gz_statep)file;
636 
637     /* check that we're reading */
638     if (state->mode != GZ_READ)
639         return Z_STREAM_ERROR;
640 
641     /* free memory and close file */
642     if (state->size) {
643         inflateEnd(&(state->strm));
644         free(state->out);
645         free(state->in);
646     }
647     err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
648     gz_error(state, Z_OK, NULL);
649     free(state->path);
650     ret = close(state->fd);
651     free(state);
652     return ret ? Z_ERRNO : err;
653 }
654