xref: /freebsd/sys/contrib/zlib/inffast.c (revision 4717628ed859513a3262ea68259d0605f39de0b3)
1c9083b85SXin LI /* inffast.c -- fast decoding
2c9083b85SXin LI  * Copyright (C) 1995-2017 Mark Adler
3c9083b85SXin LI  * For conditions of distribution and use, see copyright notice in zlib.h
4c9083b85SXin LI  */
5c9083b85SXin LI 
6c9083b85SXin LI #include "zutil.h"
7c9083b85SXin LI #include "inftrees.h"
8c9083b85SXin LI #include "inflate.h"
9c9083b85SXin LI #include "inffast.h"
10c9083b85SXin LI 
11c9083b85SXin LI #ifdef ASMINF
12c9083b85SXin LI #  pragma message("Assembler code may have bugs -- use at your own risk")
13c9083b85SXin LI #else
14c9083b85SXin LI 
15c9083b85SXin LI /*
16c9083b85SXin LI    Decode literal, length, and distance codes and write out the resulting
17c9083b85SXin LI    literal and match bytes until either not enough input or output is
18c9083b85SXin LI    available, an end-of-block is encountered, or a data error is encountered.
19c9083b85SXin LI    When large enough input and output buffers are supplied to inflate(), for
20c9083b85SXin LI    example, a 16K input buffer and a 64K output buffer, more than 95% of the
21c9083b85SXin LI    inflate execution time is spent in this routine.
22c9083b85SXin LI 
23c9083b85SXin LI    Entry assumptions:
24c9083b85SXin LI 
25c9083b85SXin LI         state->mode == LEN
26c9083b85SXin LI         strm->avail_in >= 6
27c9083b85SXin LI         strm->avail_out >= 258
28c9083b85SXin LI         start >= strm->avail_out
29c9083b85SXin LI         state->bits < 8
30c9083b85SXin LI 
31c9083b85SXin LI    On return, state->mode is one of:
32c9083b85SXin LI 
33c9083b85SXin LI         LEN -- ran out of enough output space or enough available input
34c9083b85SXin LI         TYPE -- reached end of block code, inflate() to interpret next block
35c9083b85SXin LI         BAD -- error in block data
36c9083b85SXin LI 
37c9083b85SXin LI    Notes:
38c9083b85SXin LI 
39c9083b85SXin LI     - The maximum input bits used by a length/distance pair is 15 bits for the
40c9083b85SXin LI       length code, 5 bits for the length extra, 15 bits for the distance code,
41c9083b85SXin LI       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
42c9083b85SXin LI       Therefore if strm->avail_in >= 6, then there is enough input to avoid
43c9083b85SXin LI       checking for available input while decoding.
44c9083b85SXin LI 
45c9083b85SXin LI     - The maximum bytes that a single length/distance pair can output is 258
46c9083b85SXin LI       bytes, which is the maximum length that can be coded.  inflate_fast()
47c9083b85SXin LI       requires strm->avail_out >= 258 for each loop to avoid checking for
48c9083b85SXin LI       output space.
49c9083b85SXin LI  */
inflate_fast(z_streamp strm,unsigned start)50*4717628eSXin LI void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start) {
51c9083b85SXin LI     struct inflate_state FAR *state;
52c9083b85SXin LI     z_const unsigned char FAR *in;      /* local strm->next_in */
53c9083b85SXin LI     z_const unsigned char FAR *last;    /* have enough input while in < last */
54c9083b85SXin LI     unsigned char FAR *out;     /* local strm->next_out */
55c9083b85SXin LI     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
56c9083b85SXin LI     unsigned char FAR *end;     /* while out < end, enough space available */
57c9083b85SXin LI #ifdef INFLATE_STRICT
58c9083b85SXin LI     unsigned dmax;              /* maximum distance from zlib header */
59c9083b85SXin LI #endif
60c9083b85SXin LI     unsigned wsize;             /* window size or zero if not using window */
61c9083b85SXin LI     unsigned whave;             /* valid bytes in the window */
62c9083b85SXin LI     unsigned wnext;             /* window write index */
63c9083b85SXin LI     unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
64c9083b85SXin LI     unsigned long hold;         /* local strm->hold */
65c9083b85SXin LI     unsigned bits;              /* local strm->bits */
66c9083b85SXin LI     code const FAR *lcode;      /* local strm->lencode */
67c9083b85SXin LI     code const FAR *dcode;      /* local strm->distcode */
68c9083b85SXin LI     unsigned lmask;             /* mask for first level of length codes */
69c9083b85SXin LI     unsigned dmask;             /* mask for first level of distance codes */
70cd882207SXin LI     code const *here;           /* retrieved table entry */
71c9083b85SXin LI     unsigned op;                /* code bits, operation, extra bits, or */
72c9083b85SXin LI                                 /*  window position, window bytes to copy */
73c9083b85SXin LI     unsigned len;               /* match length, unused bytes */
74c9083b85SXin LI     unsigned dist;              /* match distance */
75c9083b85SXin LI     unsigned char FAR *from;    /* where to copy match from */
76c9083b85SXin LI 
77c9083b85SXin LI     /* copy state to local variables */
78c9083b85SXin LI     state = (struct inflate_state FAR *)strm->state;
79c9083b85SXin LI     in = strm->next_in;
80c9083b85SXin LI     last = in + (strm->avail_in - 5);
81c9083b85SXin LI     out = strm->next_out;
82c9083b85SXin LI     beg = out - (start - strm->avail_out);
83c9083b85SXin LI     end = out + (strm->avail_out - 257);
84c9083b85SXin LI #ifdef INFLATE_STRICT
85c9083b85SXin LI     dmax = state->dmax;
86c9083b85SXin LI #endif
87c9083b85SXin LI     wsize = state->wsize;
88c9083b85SXin LI     whave = state->whave;
89c9083b85SXin LI     wnext = state->wnext;
90c9083b85SXin LI     window = state->window;
91c9083b85SXin LI     hold = state->hold;
92c9083b85SXin LI     bits = state->bits;
93c9083b85SXin LI     lcode = state->lencode;
94c9083b85SXin LI     dcode = state->distcode;
95c9083b85SXin LI     lmask = (1U << state->lenbits) - 1;
96c9083b85SXin LI     dmask = (1U << state->distbits) - 1;
97c9083b85SXin LI 
98c9083b85SXin LI     /* decode literals and length/distances until end-of-block or not enough
99c9083b85SXin LI        input data or output space */
100c9083b85SXin LI     do {
101c9083b85SXin LI         if (bits < 15) {
102c9083b85SXin LI             hold += (unsigned long)(*in++) << bits;
103c9083b85SXin LI             bits += 8;
104c9083b85SXin LI             hold += (unsigned long)(*in++) << bits;
105c9083b85SXin LI             bits += 8;
106c9083b85SXin LI         }
107cd882207SXin LI         here = lcode + (hold & lmask);
108c9083b85SXin LI       dolen:
109cd882207SXin LI         op = (unsigned)(here->bits);
110c9083b85SXin LI         hold >>= op;
111c9083b85SXin LI         bits -= op;
112cd882207SXin LI         op = (unsigned)(here->op);
113c9083b85SXin LI         if (op == 0) {                          /* literal */
114cd882207SXin LI             Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
115c9083b85SXin LI                     "inflate:         literal '%c'\n" :
116cd882207SXin LI                     "inflate:         literal 0x%02x\n", here->val));
117cd882207SXin LI             *out++ = (unsigned char)(here->val);
118c9083b85SXin LI         }
119c9083b85SXin LI         else if (op & 16) {                     /* length base */
120cd882207SXin LI             len = (unsigned)(here->val);
121c9083b85SXin LI             op &= 15;                           /* number of extra bits */
122c9083b85SXin LI             if (op) {
123c9083b85SXin LI                 if (bits < op) {
124c9083b85SXin LI                     hold += (unsigned long)(*in++) << bits;
125c9083b85SXin LI                     bits += 8;
126c9083b85SXin LI                 }
127c9083b85SXin LI                 len += (unsigned)hold & ((1U << op) - 1);
128c9083b85SXin LI                 hold >>= op;
129c9083b85SXin LI                 bits -= op;
130c9083b85SXin LI             }
131c9083b85SXin LI             Tracevv((stderr, "inflate:         length %u\n", len));
132c9083b85SXin LI             if (bits < 15) {
133c9083b85SXin LI                 hold += (unsigned long)(*in++) << bits;
134c9083b85SXin LI                 bits += 8;
135c9083b85SXin LI                 hold += (unsigned long)(*in++) << bits;
136c9083b85SXin LI                 bits += 8;
137c9083b85SXin LI             }
138cd882207SXin LI             here = dcode + (hold & dmask);
139c9083b85SXin LI           dodist:
140cd882207SXin LI             op = (unsigned)(here->bits);
141c9083b85SXin LI             hold >>= op;
142c9083b85SXin LI             bits -= op;
143cd882207SXin LI             op = (unsigned)(here->op);
144c9083b85SXin LI             if (op & 16) {                      /* distance base */
145cd882207SXin LI                 dist = (unsigned)(here->val);
146c9083b85SXin LI                 op &= 15;                       /* number of extra bits */
147c9083b85SXin LI                 if (bits < op) {
148c9083b85SXin LI                     hold += (unsigned long)(*in++) << bits;
149c9083b85SXin LI                     bits += 8;
150c9083b85SXin LI                     if (bits < op) {
151c9083b85SXin LI                         hold += (unsigned long)(*in++) << bits;
152c9083b85SXin LI                         bits += 8;
153c9083b85SXin LI                     }
154c9083b85SXin LI                 }
155c9083b85SXin LI                 dist += (unsigned)hold & ((1U << op) - 1);
156c9083b85SXin LI #ifdef INFLATE_STRICT
157c9083b85SXin LI                 if (dist > dmax) {
158c9083b85SXin LI                     strm->msg = (char *)"invalid distance too far back";
159c9083b85SXin LI                     state->mode = BAD;
160c9083b85SXin LI                     break;
161c9083b85SXin LI                 }
162c9083b85SXin LI #endif
163c9083b85SXin LI                 hold >>= op;
164c9083b85SXin LI                 bits -= op;
165c9083b85SXin LI                 Tracevv((stderr, "inflate:         distance %u\n", dist));
166c9083b85SXin LI                 op = (unsigned)(out - beg);     /* max distance in output */
167c9083b85SXin LI                 if (dist > op) {                /* see if copy from window */
168c9083b85SXin LI                     op = dist - op;             /* distance back in window */
169c9083b85SXin LI                     if (op > whave) {
170c9083b85SXin LI                         if (state->sane) {
171c9083b85SXin LI                             strm->msg =
172c9083b85SXin LI                                 (char *)"invalid distance too far back";
173c9083b85SXin LI                             state->mode = BAD;
174c9083b85SXin LI                             break;
175c9083b85SXin LI                         }
176c9083b85SXin LI #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
177c9083b85SXin LI                         if (len <= op - whave) {
178c9083b85SXin LI                             do {
179c9083b85SXin LI                                 *out++ = 0;
180c9083b85SXin LI                             } while (--len);
181c9083b85SXin LI                             continue;
182c9083b85SXin LI                         }
183c9083b85SXin LI                         len -= op - whave;
184c9083b85SXin LI                         do {
185c9083b85SXin LI                             *out++ = 0;
186c9083b85SXin LI                         } while (--op > whave);
187c9083b85SXin LI                         if (op == 0) {
188c9083b85SXin LI                             from = out - dist;
189c9083b85SXin LI                             do {
190c9083b85SXin LI                                 *out++ = *from++;
191c9083b85SXin LI                             } while (--len);
192c9083b85SXin LI                             continue;
193c9083b85SXin LI                         }
194c9083b85SXin LI #endif
195c9083b85SXin LI                     }
196c9083b85SXin LI                     from = window;
197c9083b85SXin LI                     if (wnext == 0) {           /* very common case */
198c9083b85SXin LI                         from += wsize - op;
199c9083b85SXin LI                         if (op < len) {         /* some from window */
200c9083b85SXin LI                             len -= op;
201c9083b85SXin LI                             do {
202c9083b85SXin LI                                 *out++ = *from++;
203c9083b85SXin LI                             } while (--op);
204c9083b85SXin LI                             from = out - dist;  /* rest from output */
205c9083b85SXin LI                         }
206c9083b85SXin LI                     }
207c9083b85SXin LI                     else if (wnext < op) {      /* wrap around window */
208c9083b85SXin LI                         from += wsize + wnext - op;
209c9083b85SXin LI                         op -= wnext;
210c9083b85SXin LI                         if (op < len) {         /* some from end of window */
211c9083b85SXin LI                             len -= op;
212c9083b85SXin LI                             do {
213c9083b85SXin LI                                 *out++ = *from++;
214c9083b85SXin LI                             } while (--op);
215c9083b85SXin LI                             from = window;
216c9083b85SXin LI                             if (wnext < len) {  /* some from start of window */
217c9083b85SXin LI                                 op = wnext;
218c9083b85SXin LI                                 len -= op;
219c9083b85SXin LI                                 do {
220c9083b85SXin LI                                     *out++ = *from++;
221c9083b85SXin LI                                 } while (--op);
222c9083b85SXin LI                                 from = out - dist;      /* rest from output */
223c9083b85SXin LI                             }
224c9083b85SXin LI                         }
225c9083b85SXin LI                     }
226c9083b85SXin LI                     else {                      /* contiguous in window */
227c9083b85SXin LI                         from += wnext - op;
228c9083b85SXin LI                         if (op < len) {         /* some from window */
229c9083b85SXin LI                             len -= op;
230c9083b85SXin LI                             do {
231c9083b85SXin LI                                 *out++ = *from++;
232c9083b85SXin LI                             } while (--op);
233c9083b85SXin LI                             from = out - dist;  /* rest from output */
234c9083b85SXin LI                         }
235c9083b85SXin LI                     }
236c9083b85SXin LI                     while (len > 2) {
237c9083b85SXin LI                         *out++ = *from++;
238c9083b85SXin LI                         *out++ = *from++;
239c9083b85SXin LI                         *out++ = *from++;
240c9083b85SXin LI                         len -= 3;
241c9083b85SXin LI                     }
242c9083b85SXin LI                     if (len) {
243c9083b85SXin LI                         *out++ = *from++;
244c9083b85SXin LI                         if (len > 1)
245c9083b85SXin LI                             *out++ = *from++;
246c9083b85SXin LI                     }
247c9083b85SXin LI                 }
248c9083b85SXin LI                 else {
249c9083b85SXin LI                     from = out - dist;          /* copy direct from output */
250c9083b85SXin LI                     do {                        /* minimum length is three */
251c9083b85SXin LI                         *out++ = *from++;
252c9083b85SXin LI                         *out++ = *from++;
253c9083b85SXin LI                         *out++ = *from++;
254c9083b85SXin LI                         len -= 3;
255c9083b85SXin LI                     } while (len > 2);
256c9083b85SXin LI                     if (len) {
257c9083b85SXin LI                         *out++ = *from++;
258c9083b85SXin LI                         if (len > 1)
259c9083b85SXin LI                             *out++ = *from++;
260c9083b85SXin LI                     }
261c9083b85SXin LI                 }
262c9083b85SXin LI             }
263c9083b85SXin LI             else if ((op & 64) == 0) {          /* 2nd level distance code */
264cd882207SXin LI                 here = dcode + here->val + (hold & ((1U << op) - 1));
265c9083b85SXin LI                 goto dodist;
266c9083b85SXin LI             }
267c9083b85SXin LI             else {
268c9083b85SXin LI                 strm->msg = (char *)"invalid distance code";
269c9083b85SXin LI                 state->mode = BAD;
270c9083b85SXin LI                 break;
271c9083b85SXin LI             }
272c9083b85SXin LI         }
273c9083b85SXin LI         else if ((op & 64) == 0) {              /* 2nd level length code */
274cd882207SXin LI             here = lcode + here->val + (hold & ((1U << op) - 1));
275c9083b85SXin LI             goto dolen;
276c9083b85SXin LI         }
277c9083b85SXin LI         else if (op & 32) {                     /* end-of-block */
278c9083b85SXin LI             Tracevv((stderr, "inflate:         end of block\n"));
279c9083b85SXin LI             state->mode = TYPE;
280c9083b85SXin LI             break;
281c9083b85SXin LI         }
282c9083b85SXin LI         else {
283c9083b85SXin LI             strm->msg = (char *)"invalid literal/length code";
284c9083b85SXin LI             state->mode = BAD;
285c9083b85SXin LI             break;
286c9083b85SXin LI         }
287c9083b85SXin LI     } while (in < last && out < end);
288c9083b85SXin LI 
289c9083b85SXin LI     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
290c9083b85SXin LI     len = bits >> 3;
291c9083b85SXin LI     in -= len;
292c9083b85SXin LI     bits -= len << 3;
293c9083b85SXin LI     hold &= (1U << bits) - 1;
294c9083b85SXin LI 
295c9083b85SXin LI     /* update state and return */
296c9083b85SXin LI     strm->next_in = in;
297c9083b85SXin LI     strm->next_out = out;
298c9083b85SXin LI     strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
299c9083b85SXin LI     strm->avail_out = (unsigned)(out < end ?
300c9083b85SXin LI                                  257 + (end - out) : 257 - (out - end));
301c9083b85SXin LI     state->hold = hold;
302c9083b85SXin LI     state->bits = bits;
303c9083b85SXin LI     return;
304c9083b85SXin LI }
305c9083b85SXin LI 
306c9083b85SXin LI /*
307c9083b85SXin LI    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
308c9083b85SXin LI    - Using bit fields for code structure
309c9083b85SXin LI    - Different op definition to avoid & for extra bits (do & for table bits)
310c9083b85SXin LI    - Three separate decoding do-loops for direct, window, and wnext == 0
311c9083b85SXin LI    - Special case for distance > 1 copies to do overlapped load and store copy
312c9083b85SXin LI    - Explicit branch predictions (based on measured branch probabilities)
313c9083b85SXin LI    - Deferring match copy and interspersed it with decoding subsequent codes
314c9083b85SXin LI    - Swapping literal/length else
315c9083b85SXin LI    - Swapping window/direct else
316c9083b85SXin LI    - Larger unrolled copy loops (three is about right)
317c9083b85SXin LI    - Moving len -= 3 statement into middle of loop
318c9083b85SXin LI  */
319c9083b85SXin LI 
320c9083b85SXin LI #endif /* !ASMINF */
321