xref: /freebsd/sys/contrib/zlib/inffast.c (revision cd8822075a38d0734e74b1735e4b5dbef9789170)
1c9083b85SXin LI /* inffast.c -- fast decoding
2c9083b85SXin LI  * Copyright (C) 1995-2017 Mark Adler
3c9083b85SXin LI  * For conditions of distribution and use, see copyright notice in zlib.h
4c9083b85SXin LI  */
5c9083b85SXin LI 
6c9083b85SXin LI #include "zutil.h"
7c9083b85SXin LI #include "inftrees.h"
8c9083b85SXin LI #include "inflate.h"
9c9083b85SXin LI #include "inffast.h"
10c9083b85SXin LI 
11c9083b85SXin LI #ifdef ASMINF
12c9083b85SXin LI #  pragma message("Assembler code may have bugs -- use at your own risk")
13c9083b85SXin LI #else
14c9083b85SXin LI 
15c9083b85SXin LI /*
16c9083b85SXin LI    Decode literal, length, and distance codes and write out the resulting
17c9083b85SXin LI    literal and match bytes until either not enough input or output is
18c9083b85SXin LI    available, an end-of-block is encountered, or a data error is encountered.
19c9083b85SXin LI    When large enough input and output buffers are supplied to inflate(), for
20c9083b85SXin LI    example, a 16K input buffer and a 64K output buffer, more than 95% of the
21c9083b85SXin LI    inflate execution time is spent in this routine.
22c9083b85SXin LI 
23c9083b85SXin LI    Entry assumptions:
24c9083b85SXin LI 
25c9083b85SXin LI         state->mode == LEN
26c9083b85SXin LI         strm->avail_in >= 6
27c9083b85SXin LI         strm->avail_out >= 258
28c9083b85SXin LI         start >= strm->avail_out
29c9083b85SXin LI         state->bits < 8
30c9083b85SXin LI 
31c9083b85SXin LI    On return, state->mode is one of:
32c9083b85SXin LI 
33c9083b85SXin LI         LEN -- ran out of enough output space or enough available input
34c9083b85SXin LI         TYPE -- reached end of block code, inflate() to interpret next block
35c9083b85SXin LI         BAD -- error in block data
36c9083b85SXin LI 
37c9083b85SXin LI    Notes:
38c9083b85SXin LI 
39c9083b85SXin LI     - The maximum input bits used by a length/distance pair is 15 bits for the
40c9083b85SXin LI       length code, 5 bits for the length extra, 15 bits for the distance code,
41c9083b85SXin LI       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
42c9083b85SXin LI       Therefore if strm->avail_in >= 6, then there is enough input to avoid
43c9083b85SXin LI       checking for available input while decoding.
44c9083b85SXin LI 
45c9083b85SXin LI     - The maximum bytes that a single length/distance pair can output is 258
46c9083b85SXin LI       bytes, which is the maximum length that can be coded.  inflate_fast()
47c9083b85SXin LI       requires strm->avail_out >= 258 for each loop to avoid checking for
48c9083b85SXin LI       output space.
49c9083b85SXin LI  */
50c9083b85SXin LI void ZLIB_INTERNAL inflate_fast(strm, start)
51c9083b85SXin LI z_streamp strm;
52c9083b85SXin LI unsigned start;         /* inflate()'s starting value for strm->avail_out */
53c9083b85SXin LI {
54c9083b85SXin LI     struct inflate_state FAR *state;
55c9083b85SXin LI     z_const unsigned char FAR *in;      /* local strm->next_in */
56c9083b85SXin LI     z_const unsigned char FAR *last;    /* have enough input while in < last */
57c9083b85SXin LI     unsigned char FAR *out;     /* local strm->next_out */
58c9083b85SXin LI     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
59c9083b85SXin LI     unsigned char FAR *end;     /* while out < end, enough space available */
60c9083b85SXin LI #ifdef INFLATE_STRICT
61c9083b85SXin LI     unsigned dmax;              /* maximum distance from zlib header */
62c9083b85SXin LI #endif
63c9083b85SXin LI     unsigned wsize;             /* window size or zero if not using window */
64c9083b85SXin LI     unsigned whave;             /* valid bytes in the window */
65c9083b85SXin LI     unsigned wnext;             /* window write index */
66c9083b85SXin LI     unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
67c9083b85SXin LI     unsigned long hold;         /* local strm->hold */
68c9083b85SXin LI     unsigned bits;              /* local strm->bits */
69c9083b85SXin LI     code const FAR *lcode;      /* local strm->lencode */
70c9083b85SXin LI     code const FAR *dcode;      /* local strm->distcode */
71c9083b85SXin LI     unsigned lmask;             /* mask for first level of length codes */
72c9083b85SXin LI     unsigned dmask;             /* mask for first level of distance codes */
73*cd882207SXin LI     code const *here;           /* retrieved table entry */
74c9083b85SXin LI     unsigned op;                /* code bits, operation, extra bits, or */
75c9083b85SXin LI                                 /*  window position, window bytes to copy */
76c9083b85SXin LI     unsigned len;               /* match length, unused bytes */
77c9083b85SXin LI     unsigned dist;              /* match distance */
78c9083b85SXin LI     unsigned char FAR *from;    /* where to copy match from */
79c9083b85SXin LI 
80c9083b85SXin LI     /* copy state to local variables */
81c9083b85SXin LI     state = (struct inflate_state FAR *)strm->state;
82c9083b85SXin LI     in = strm->next_in;
83c9083b85SXin LI     last = in + (strm->avail_in - 5);
84c9083b85SXin LI     out = strm->next_out;
85c9083b85SXin LI     beg = out - (start - strm->avail_out);
86c9083b85SXin LI     end = out + (strm->avail_out - 257);
87c9083b85SXin LI #ifdef INFLATE_STRICT
88c9083b85SXin LI     dmax = state->dmax;
89c9083b85SXin LI #endif
90c9083b85SXin LI     wsize = state->wsize;
91c9083b85SXin LI     whave = state->whave;
92c9083b85SXin LI     wnext = state->wnext;
93c9083b85SXin LI     window = state->window;
94c9083b85SXin LI     hold = state->hold;
95c9083b85SXin LI     bits = state->bits;
96c9083b85SXin LI     lcode = state->lencode;
97c9083b85SXin LI     dcode = state->distcode;
98c9083b85SXin LI     lmask = (1U << state->lenbits) - 1;
99c9083b85SXin LI     dmask = (1U << state->distbits) - 1;
100c9083b85SXin LI 
101c9083b85SXin LI     /* decode literals and length/distances until end-of-block or not enough
102c9083b85SXin LI        input data or output space */
103c9083b85SXin LI     do {
104c9083b85SXin LI         if (bits < 15) {
105c9083b85SXin LI             hold += (unsigned long)(*in++) << bits;
106c9083b85SXin LI             bits += 8;
107c9083b85SXin LI             hold += (unsigned long)(*in++) << bits;
108c9083b85SXin LI             bits += 8;
109c9083b85SXin LI         }
110*cd882207SXin LI         here = lcode + (hold & lmask);
111c9083b85SXin LI       dolen:
112*cd882207SXin LI         op = (unsigned)(here->bits);
113c9083b85SXin LI         hold >>= op;
114c9083b85SXin LI         bits -= op;
115*cd882207SXin LI         op = (unsigned)(here->op);
116c9083b85SXin LI         if (op == 0) {                          /* literal */
117*cd882207SXin LI             Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
118c9083b85SXin LI                     "inflate:         literal '%c'\n" :
119*cd882207SXin LI                     "inflate:         literal 0x%02x\n", here->val));
120*cd882207SXin LI             *out++ = (unsigned char)(here->val);
121c9083b85SXin LI         }
122c9083b85SXin LI         else if (op & 16) {                     /* length base */
123*cd882207SXin LI             len = (unsigned)(here->val);
124c9083b85SXin LI             op &= 15;                           /* number of extra bits */
125c9083b85SXin LI             if (op) {
126c9083b85SXin LI                 if (bits < op) {
127c9083b85SXin LI                     hold += (unsigned long)(*in++) << bits;
128c9083b85SXin LI                     bits += 8;
129c9083b85SXin LI                 }
130c9083b85SXin LI                 len += (unsigned)hold & ((1U << op) - 1);
131c9083b85SXin LI                 hold >>= op;
132c9083b85SXin LI                 bits -= op;
133c9083b85SXin LI             }
134c9083b85SXin LI             Tracevv((stderr, "inflate:         length %u\n", len));
135c9083b85SXin LI             if (bits < 15) {
136c9083b85SXin LI                 hold += (unsigned long)(*in++) << bits;
137c9083b85SXin LI                 bits += 8;
138c9083b85SXin LI                 hold += (unsigned long)(*in++) << bits;
139c9083b85SXin LI                 bits += 8;
140c9083b85SXin LI             }
141*cd882207SXin LI             here = dcode + (hold & dmask);
142c9083b85SXin LI           dodist:
143*cd882207SXin LI             op = (unsigned)(here->bits);
144c9083b85SXin LI             hold >>= op;
145c9083b85SXin LI             bits -= op;
146*cd882207SXin LI             op = (unsigned)(here->op);
147c9083b85SXin LI             if (op & 16) {                      /* distance base */
148*cd882207SXin LI                 dist = (unsigned)(here->val);
149c9083b85SXin LI                 op &= 15;                       /* number of extra bits */
150c9083b85SXin LI                 if (bits < op) {
151c9083b85SXin LI                     hold += (unsigned long)(*in++) << bits;
152c9083b85SXin LI                     bits += 8;
153c9083b85SXin LI                     if (bits < op) {
154c9083b85SXin LI                         hold += (unsigned long)(*in++) << bits;
155c9083b85SXin LI                         bits += 8;
156c9083b85SXin LI                     }
157c9083b85SXin LI                 }
158c9083b85SXin LI                 dist += (unsigned)hold & ((1U << op) - 1);
159c9083b85SXin LI #ifdef INFLATE_STRICT
160c9083b85SXin LI                 if (dist > dmax) {
161c9083b85SXin LI                     strm->msg = (char *)"invalid distance too far back";
162c9083b85SXin LI                     state->mode = BAD;
163c9083b85SXin LI                     break;
164c9083b85SXin LI                 }
165c9083b85SXin LI #endif
166c9083b85SXin LI                 hold >>= op;
167c9083b85SXin LI                 bits -= op;
168c9083b85SXin LI                 Tracevv((stderr, "inflate:         distance %u\n", dist));
169c9083b85SXin LI                 op = (unsigned)(out - beg);     /* max distance in output */
170c9083b85SXin LI                 if (dist > op) {                /* see if copy from window */
171c9083b85SXin LI                     op = dist - op;             /* distance back in window */
172c9083b85SXin LI                     if (op > whave) {
173c9083b85SXin LI                         if (state->sane) {
174c9083b85SXin LI                             strm->msg =
175c9083b85SXin LI                                 (char *)"invalid distance too far back";
176c9083b85SXin LI                             state->mode = BAD;
177c9083b85SXin LI                             break;
178c9083b85SXin LI                         }
179c9083b85SXin LI #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
180c9083b85SXin LI                         if (len <= op - whave) {
181c9083b85SXin LI                             do {
182c9083b85SXin LI                                 *out++ = 0;
183c9083b85SXin LI                             } while (--len);
184c9083b85SXin LI                             continue;
185c9083b85SXin LI                         }
186c9083b85SXin LI                         len -= op - whave;
187c9083b85SXin LI                         do {
188c9083b85SXin LI                             *out++ = 0;
189c9083b85SXin LI                         } while (--op > whave);
190c9083b85SXin LI                         if (op == 0) {
191c9083b85SXin LI                             from = out - dist;
192c9083b85SXin LI                             do {
193c9083b85SXin LI                                 *out++ = *from++;
194c9083b85SXin LI                             } while (--len);
195c9083b85SXin LI                             continue;
196c9083b85SXin LI                         }
197c9083b85SXin LI #endif
198c9083b85SXin LI                     }
199c9083b85SXin LI                     from = window;
200c9083b85SXin LI                     if (wnext == 0) {           /* very common case */
201c9083b85SXin LI                         from += wsize - op;
202c9083b85SXin LI                         if (op < len) {         /* some from window */
203c9083b85SXin LI                             len -= op;
204c9083b85SXin LI                             do {
205c9083b85SXin LI                                 *out++ = *from++;
206c9083b85SXin LI                             } while (--op);
207c9083b85SXin LI                             from = out - dist;  /* rest from output */
208c9083b85SXin LI                         }
209c9083b85SXin LI                     }
210c9083b85SXin LI                     else if (wnext < op) {      /* wrap around window */
211c9083b85SXin LI                         from += wsize + wnext - op;
212c9083b85SXin LI                         op -= wnext;
213c9083b85SXin LI                         if (op < len) {         /* some from end of window */
214c9083b85SXin LI                             len -= op;
215c9083b85SXin LI                             do {
216c9083b85SXin LI                                 *out++ = *from++;
217c9083b85SXin LI                             } while (--op);
218c9083b85SXin LI                             from = window;
219c9083b85SXin LI                             if (wnext < len) {  /* some from start of window */
220c9083b85SXin LI                                 op = wnext;
221c9083b85SXin LI                                 len -= op;
222c9083b85SXin LI                                 do {
223c9083b85SXin LI                                     *out++ = *from++;
224c9083b85SXin LI                                 } while (--op);
225c9083b85SXin LI                                 from = out - dist;      /* rest from output */
226c9083b85SXin LI                             }
227c9083b85SXin LI                         }
228c9083b85SXin LI                     }
229c9083b85SXin LI                     else {                      /* contiguous in window */
230c9083b85SXin LI                         from += wnext - op;
231c9083b85SXin LI                         if (op < len) {         /* some from window */
232c9083b85SXin LI                             len -= op;
233c9083b85SXin LI                             do {
234c9083b85SXin LI                                 *out++ = *from++;
235c9083b85SXin LI                             } while (--op);
236c9083b85SXin LI                             from = out - dist;  /* rest from output */
237c9083b85SXin LI                         }
238c9083b85SXin LI                     }
239c9083b85SXin LI                     while (len > 2) {
240c9083b85SXin LI                         *out++ = *from++;
241c9083b85SXin LI                         *out++ = *from++;
242c9083b85SXin LI                         *out++ = *from++;
243c9083b85SXin LI                         len -= 3;
244c9083b85SXin LI                     }
245c9083b85SXin LI                     if (len) {
246c9083b85SXin LI                         *out++ = *from++;
247c9083b85SXin LI                         if (len > 1)
248c9083b85SXin LI                             *out++ = *from++;
249c9083b85SXin LI                     }
250c9083b85SXin LI                 }
251c9083b85SXin LI                 else {
252c9083b85SXin LI                     from = out - dist;          /* copy direct from output */
253c9083b85SXin LI                     do {                        /* minimum length is three */
254c9083b85SXin LI                         *out++ = *from++;
255c9083b85SXin LI                         *out++ = *from++;
256c9083b85SXin LI                         *out++ = *from++;
257c9083b85SXin LI                         len -= 3;
258c9083b85SXin LI                     } while (len > 2);
259c9083b85SXin LI                     if (len) {
260c9083b85SXin LI                         *out++ = *from++;
261c9083b85SXin LI                         if (len > 1)
262c9083b85SXin LI                             *out++ = *from++;
263c9083b85SXin LI                     }
264c9083b85SXin LI                 }
265c9083b85SXin LI             }
266c9083b85SXin LI             else if ((op & 64) == 0) {          /* 2nd level distance code */
267*cd882207SXin LI                 here = dcode + here->val + (hold & ((1U << op) - 1));
268c9083b85SXin LI                 goto dodist;
269c9083b85SXin LI             }
270c9083b85SXin LI             else {
271c9083b85SXin LI                 strm->msg = (char *)"invalid distance code";
272c9083b85SXin LI                 state->mode = BAD;
273c9083b85SXin LI                 break;
274c9083b85SXin LI             }
275c9083b85SXin LI         }
276c9083b85SXin LI         else if ((op & 64) == 0) {              /* 2nd level length code */
277*cd882207SXin LI             here = lcode + here->val + (hold & ((1U << op) - 1));
278c9083b85SXin LI             goto dolen;
279c9083b85SXin LI         }
280c9083b85SXin LI         else if (op & 32) {                     /* end-of-block */
281c9083b85SXin LI             Tracevv((stderr, "inflate:         end of block\n"));
282c9083b85SXin LI             state->mode = TYPE;
283c9083b85SXin LI             break;
284c9083b85SXin LI         }
285c9083b85SXin LI         else {
286c9083b85SXin LI             strm->msg = (char *)"invalid literal/length code";
287c9083b85SXin LI             state->mode = BAD;
288c9083b85SXin LI             break;
289c9083b85SXin LI         }
290c9083b85SXin LI     } while (in < last && out < end);
291c9083b85SXin LI 
292c9083b85SXin LI     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
293c9083b85SXin LI     len = bits >> 3;
294c9083b85SXin LI     in -= len;
295c9083b85SXin LI     bits -= len << 3;
296c9083b85SXin LI     hold &= (1U << bits) - 1;
297c9083b85SXin LI 
298c9083b85SXin LI     /* update state and return */
299c9083b85SXin LI     strm->next_in = in;
300c9083b85SXin LI     strm->next_out = out;
301c9083b85SXin LI     strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
302c9083b85SXin LI     strm->avail_out = (unsigned)(out < end ?
303c9083b85SXin LI                                  257 + (end - out) : 257 - (out - end));
304c9083b85SXin LI     state->hold = hold;
305c9083b85SXin LI     state->bits = bits;
306c9083b85SXin LI     return;
307c9083b85SXin LI }
308c9083b85SXin LI 
309c9083b85SXin LI /*
310c9083b85SXin LI    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
311c9083b85SXin LI    - Using bit fields for code structure
312c9083b85SXin LI    - Different op definition to avoid & for extra bits (do & for table bits)
313c9083b85SXin LI    - Three separate decoding do-loops for direct, window, and wnext == 0
314c9083b85SXin LI    - Special case for distance > 1 copies to do overlapped load and store copy
315c9083b85SXin LI    - Explicit branch predictions (based on measured branch probabilities)
316c9083b85SXin LI    - Deferring match copy and interspersed it with decoding subsequent codes
317c9083b85SXin LI    - Swapping literal/length else
318c9083b85SXin LI    - Swapping window/direct else
319c9083b85SXin LI    - Larger unrolled copy loops (three is about right)
320c9083b85SXin LI    - Moving len -= 3 statement into middle of loop
321c9083b85SXin LI  */
322c9083b85SXin LI 
323c9083b85SXin LI #endif /* !ASMINF */
324