xref: /freebsd/sys/contrib/zlib/inffast.c (revision c9083b850aed19fff781fd82084e8308c9ae6df6)
1*c9083b85SXin LI /* inffast.c -- fast decoding
2*c9083b85SXin LI  * Copyright (C) 1995-2017 Mark Adler
3*c9083b85SXin LI  * For conditions of distribution and use, see copyright notice in zlib.h
4*c9083b85SXin LI  */
5*c9083b85SXin LI 
6*c9083b85SXin LI #include "zutil.h"
7*c9083b85SXin LI #include "inftrees.h"
8*c9083b85SXin LI #include "inflate.h"
9*c9083b85SXin LI #include "inffast.h"
10*c9083b85SXin LI 
11*c9083b85SXin LI #ifdef ASMINF
12*c9083b85SXin LI #  pragma message("Assembler code may have bugs -- use at your own risk")
13*c9083b85SXin LI #else
14*c9083b85SXin LI 
15*c9083b85SXin LI /*
16*c9083b85SXin LI    Decode literal, length, and distance codes and write out the resulting
17*c9083b85SXin LI    literal and match bytes until either not enough input or output is
18*c9083b85SXin LI    available, an end-of-block is encountered, or a data error is encountered.
19*c9083b85SXin LI    When large enough input and output buffers are supplied to inflate(), for
20*c9083b85SXin LI    example, a 16K input buffer and a 64K output buffer, more than 95% of the
21*c9083b85SXin LI    inflate execution time is spent in this routine.
22*c9083b85SXin LI 
23*c9083b85SXin LI    Entry assumptions:
24*c9083b85SXin LI 
25*c9083b85SXin LI         state->mode == LEN
26*c9083b85SXin LI         strm->avail_in >= 6
27*c9083b85SXin LI         strm->avail_out >= 258
28*c9083b85SXin LI         start >= strm->avail_out
29*c9083b85SXin LI         state->bits < 8
30*c9083b85SXin LI 
31*c9083b85SXin LI    On return, state->mode is one of:
32*c9083b85SXin LI 
33*c9083b85SXin LI         LEN -- ran out of enough output space or enough available input
34*c9083b85SXin LI         TYPE -- reached end of block code, inflate() to interpret next block
35*c9083b85SXin LI         BAD -- error in block data
36*c9083b85SXin LI 
37*c9083b85SXin LI    Notes:
38*c9083b85SXin LI 
39*c9083b85SXin LI     - The maximum input bits used by a length/distance pair is 15 bits for the
40*c9083b85SXin LI       length code, 5 bits for the length extra, 15 bits for the distance code,
41*c9083b85SXin LI       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
42*c9083b85SXin LI       Therefore if strm->avail_in >= 6, then there is enough input to avoid
43*c9083b85SXin LI       checking for available input while decoding.
44*c9083b85SXin LI 
45*c9083b85SXin LI     - The maximum bytes that a single length/distance pair can output is 258
46*c9083b85SXin LI       bytes, which is the maximum length that can be coded.  inflate_fast()
47*c9083b85SXin LI       requires strm->avail_out >= 258 for each loop to avoid checking for
48*c9083b85SXin LI       output space.
49*c9083b85SXin LI  */
50*c9083b85SXin LI void ZLIB_INTERNAL inflate_fast(strm, start)
51*c9083b85SXin LI z_streamp strm;
52*c9083b85SXin LI unsigned start;         /* inflate()'s starting value for strm->avail_out */
53*c9083b85SXin LI {
54*c9083b85SXin LI     struct inflate_state FAR *state;
55*c9083b85SXin LI     z_const unsigned char FAR *in;      /* local strm->next_in */
56*c9083b85SXin LI     z_const unsigned char FAR *last;    /* have enough input while in < last */
57*c9083b85SXin LI     unsigned char FAR *out;     /* local strm->next_out */
58*c9083b85SXin LI     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
59*c9083b85SXin LI     unsigned char FAR *end;     /* while out < end, enough space available */
60*c9083b85SXin LI #ifdef INFLATE_STRICT
61*c9083b85SXin LI     unsigned dmax;              /* maximum distance from zlib header */
62*c9083b85SXin LI #endif
63*c9083b85SXin LI     unsigned wsize;             /* window size or zero if not using window */
64*c9083b85SXin LI     unsigned whave;             /* valid bytes in the window */
65*c9083b85SXin LI     unsigned wnext;             /* window write index */
66*c9083b85SXin LI     unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
67*c9083b85SXin LI     unsigned long hold;         /* local strm->hold */
68*c9083b85SXin LI     unsigned bits;              /* local strm->bits */
69*c9083b85SXin LI     code const FAR *lcode;      /* local strm->lencode */
70*c9083b85SXin LI     code const FAR *dcode;      /* local strm->distcode */
71*c9083b85SXin LI     unsigned lmask;             /* mask for first level of length codes */
72*c9083b85SXin LI     unsigned dmask;             /* mask for first level of distance codes */
73*c9083b85SXin LI     code here;                  /* retrieved table entry */
74*c9083b85SXin LI     unsigned op;                /* code bits, operation, extra bits, or */
75*c9083b85SXin LI                                 /*  window position, window bytes to copy */
76*c9083b85SXin LI     unsigned len;               /* match length, unused bytes */
77*c9083b85SXin LI     unsigned dist;              /* match distance */
78*c9083b85SXin LI     unsigned char FAR *from;    /* where to copy match from */
79*c9083b85SXin LI 
80*c9083b85SXin LI     /* copy state to local variables */
81*c9083b85SXin LI     state = (struct inflate_state FAR *)strm->state;
82*c9083b85SXin LI     in = strm->next_in;
83*c9083b85SXin LI     last = in + (strm->avail_in - 5);
84*c9083b85SXin LI     out = strm->next_out;
85*c9083b85SXin LI     beg = out - (start - strm->avail_out);
86*c9083b85SXin LI     end = out + (strm->avail_out - 257);
87*c9083b85SXin LI #ifdef INFLATE_STRICT
88*c9083b85SXin LI     dmax = state->dmax;
89*c9083b85SXin LI #endif
90*c9083b85SXin LI     wsize = state->wsize;
91*c9083b85SXin LI     whave = state->whave;
92*c9083b85SXin LI     wnext = state->wnext;
93*c9083b85SXin LI     window = state->window;
94*c9083b85SXin LI     hold = state->hold;
95*c9083b85SXin LI     bits = state->bits;
96*c9083b85SXin LI     lcode = state->lencode;
97*c9083b85SXin LI     dcode = state->distcode;
98*c9083b85SXin LI     lmask = (1U << state->lenbits) - 1;
99*c9083b85SXin LI     dmask = (1U << state->distbits) - 1;
100*c9083b85SXin LI 
101*c9083b85SXin LI     /* decode literals and length/distances until end-of-block or not enough
102*c9083b85SXin LI        input data or output space */
103*c9083b85SXin LI     do {
104*c9083b85SXin LI         if (bits < 15) {
105*c9083b85SXin LI             hold += (unsigned long)(*in++) << bits;
106*c9083b85SXin LI             bits += 8;
107*c9083b85SXin LI             hold += (unsigned long)(*in++) << bits;
108*c9083b85SXin LI             bits += 8;
109*c9083b85SXin LI         }
110*c9083b85SXin LI         here = lcode[hold & lmask];
111*c9083b85SXin LI       dolen:
112*c9083b85SXin LI         op = (unsigned)(here.bits);
113*c9083b85SXin LI         hold >>= op;
114*c9083b85SXin LI         bits -= op;
115*c9083b85SXin LI         op = (unsigned)(here.op);
116*c9083b85SXin LI         if (op == 0) {                          /* literal */
117*c9083b85SXin LI             Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
118*c9083b85SXin LI                     "inflate:         literal '%c'\n" :
119*c9083b85SXin LI                     "inflate:         literal 0x%02x\n", here.val));
120*c9083b85SXin LI             *out++ = (unsigned char)(here.val);
121*c9083b85SXin LI         }
122*c9083b85SXin LI         else if (op & 16) {                     /* length base */
123*c9083b85SXin LI             len = (unsigned)(here.val);
124*c9083b85SXin LI             op &= 15;                           /* number of extra bits */
125*c9083b85SXin LI             if (op) {
126*c9083b85SXin LI                 if (bits < op) {
127*c9083b85SXin LI                     hold += (unsigned long)(*in++) << bits;
128*c9083b85SXin LI                     bits += 8;
129*c9083b85SXin LI                 }
130*c9083b85SXin LI                 len += (unsigned)hold & ((1U << op) - 1);
131*c9083b85SXin LI                 hold >>= op;
132*c9083b85SXin LI                 bits -= op;
133*c9083b85SXin LI             }
134*c9083b85SXin LI             Tracevv((stderr, "inflate:         length %u\n", len));
135*c9083b85SXin LI             if (bits < 15) {
136*c9083b85SXin LI                 hold += (unsigned long)(*in++) << bits;
137*c9083b85SXin LI                 bits += 8;
138*c9083b85SXin LI                 hold += (unsigned long)(*in++) << bits;
139*c9083b85SXin LI                 bits += 8;
140*c9083b85SXin LI             }
141*c9083b85SXin LI             here = dcode[hold & dmask];
142*c9083b85SXin LI           dodist:
143*c9083b85SXin LI             op = (unsigned)(here.bits);
144*c9083b85SXin LI             hold >>= op;
145*c9083b85SXin LI             bits -= op;
146*c9083b85SXin LI             op = (unsigned)(here.op);
147*c9083b85SXin LI             if (op & 16) {                      /* distance base */
148*c9083b85SXin LI                 dist = (unsigned)(here.val);
149*c9083b85SXin LI                 op &= 15;                       /* number of extra bits */
150*c9083b85SXin LI                 if (bits < op) {
151*c9083b85SXin LI                     hold += (unsigned long)(*in++) << bits;
152*c9083b85SXin LI                     bits += 8;
153*c9083b85SXin LI                     if (bits < op) {
154*c9083b85SXin LI                         hold += (unsigned long)(*in++) << bits;
155*c9083b85SXin LI                         bits += 8;
156*c9083b85SXin LI                     }
157*c9083b85SXin LI                 }
158*c9083b85SXin LI                 dist += (unsigned)hold & ((1U << op) - 1);
159*c9083b85SXin LI #ifdef INFLATE_STRICT
160*c9083b85SXin LI                 if (dist > dmax) {
161*c9083b85SXin LI                     strm->msg = (char *)"invalid distance too far back";
162*c9083b85SXin LI                     state->mode = BAD;
163*c9083b85SXin LI                     break;
164*c9083b85SXin LI                 }
165*c9083b85SXin LI #endif
166*c9083b85SXin LI                 hold >>= op;
167*c9083b85SXin LI                 bits -= op;
168*c9083b85SXin LI                 Tracevv((stderr, "inflate:         distance %u\n", dist));
169*c9083b85SXin LI                 op = (unsigned)(out - beg);     /* max distance in output */
170*c9083b85SXin LI                 if (dist > op) {                /* see if copy from window */
171*c9083b85SXin LI                     op = dist - op;             /* distance back in window */
172*c9083b85SXin LI                     if (op > whave) {
173*c9083b85SXin LI                         if (state->sane) {
174*c9083b85SXin LI                             strm->msg =
175*c9083b85SXin LI                                 (char *)"invalid distance too far back";
176*c9083b85SXin LI                             state->mode = BAD;
177*c9083b85SXin LI                             break;
178*c9083b85SXin LI                         }
179*c9083b85SXin LI #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
180*c9083b85SXin LI                         if (len <= op - whave) {
181*c9083b85SXin LI                             do {
182*c9083b85SXin LI                                 *out++ = 0;
183*c9083b85SXin LI                             } while (--len);
184*c9083b85SXin LI                             continue;
185*c9083b85SXin LI                         }
186*c9083b85SXin LI                         len -= op - whave;
187*c9083b85SXin LI                         do {
188*c9083b85SXin LI                             *out++ = 0;
189*c9083b85SXin LI                         } while (--op > whave);
190*c9083b85SXin LI                         if (op == 0) {
191*c9083b85SXin LI                             from = out - dist;
192*c9083b85SXin LI                             do {
193*c9083b85SXin LI                                 *out++ = *from++;
194*c9083b85SXin LI                             } while (--len);
195*c9083b85SXin LI                             continue;
196*c9083b85SXin LI                         }
197*c9083b85SXin LI #endif
198*c9083b85SXin LI                     }
199*c9083b85SXin LI                     from = window;
200*c9083b85SXin LI                     if (wnext == 0) {           /* very common case */
201*c9083b85SXin LI                         from += wsize - op;
202*c9083b85SXin LI                         if (op < len) {         /* some from window */
203*c9083b85SXin LI                             len -= op;
204*c9083b85SXin LI                             do {
205*c9083b85SXin LI                                 *out++ = *from++;
206*c9083b85SXin LI                             } while (--op);
207*c9083b85SXin LI                             from = out - dist;  /* rest from output */
208*c9083b85SXin LI                         }
209*c9083b85SXin LI                     }
210*c9083b85SXin LI                     else if (wnext < op) {      /* wrap around window */
211*c9083b85SXin LI                         from += wsize + wnext - op;
212*c9083b85SXin LI                         op -= wnext;
213*c9083b85SXin LI                         if (op < len) {         /* some from end of window */
214*c9083b85SXin LI                             len -= op;
215*c9083b85SXin LI                             do {
216*c9083b85SXin LI                                 *out++ = *from++;
217*c9083b85SXin LI                             } while (--op);
218*c9083b85SXin LI                             from = window;
219*c9083b85SXin LI                             if (wnext < len) {  /* some from start of window */
220*c9083b85SXin LI                                 op = wnext;
221*c9083b85SXin LI                                 len -= op;
222*c9083b85SXin LI                                 do {
223*c9083b85SXin LI                                     *out++ = *from++;
224*c9083b85SXin LI                                 } while (--op);
225*c9083b85SXin LI                                 from = out - dist;      /* rest from output */
226*c9083b85SXin LI                             }
227*c9083b85SXin LI                         }
228*c9083b85SXin LI                     }
229*c9083b85SXin LI                     else {                      /* contiguous in window */
230*c9083b85SXin LI                         from += wnext - op;
231*c9083b85SXin LI                         if (op < len) {         /* some from window */
232*c9083b85SXin LI                             len -= op;
233*c9083b85SXin LI                             do {
234*c9083b85SXin LI                                 *out++ = *from++;
235*c9083b85SXin LI                             } while (--op);
236*c9083b85SXin LI                             from = out - dist;  /* rest from output */
237*c9083b85SXin LI                         }
238*c9083b85SXin LI                     }
239*c9083b85SXin LI                     while (len > 2) {
240*c9083b85SXin LI                         *out++ = *from++;
241*c9083b85SXin LI                         *out++ = *from++;
242*c9083b85SXin LI                         *out++ = *from++;
243*c9083b85SXin LI                         len -= 3;
244*c9083b85SXin LI                     }
245*c9083b85SXin LI                     if (len) {
246*c9083b85SXin LI                         *out++ = *from++;
247*c9083b85SXin LI                         if (len > 1)
248*c9083b85SXin LI                             *out++ = *from++;
249*c9083b85SXin LI                     }
250*c9083b85SXin LI                 }
251*c9083b85SXin LI                 else {
252*c9083b85SXin LI                     from = out - dist;          /* copy direct from output */
253*c9083b85SXin LI                     do {                        /* minimum length is three */
254*c9083b85SXin LI                         *out++ = *from++;
255*c9083b85SXin LI                         *out++ = *from++;
256*c9083b85SXin LI                         *out++ = *from++;
257*c9083b85SXin LI                         len -= 3;
258*c9083b85SXin LI                     } while (len > 2);
259*c9083b85SXin LI                     if (len) {
260*c9083b85SXin LI                         *out++ = *from++;
261*c9083b85SXin LI                         if (len > 1)
262*c9083b85SXin LI                             *out++ = *from++;
263*c9083b85SXin LI                     }
264*c9083b85SXin LI                 }
265*c9083b85SXin LI             }
266*c9083b85SXin LI             else if ((op & 64) == 0) {          /* 2nd level distance code */
267*c9083b85SXin LI                 here = dcode[here.val + (hold & ((1U << op) - 1))];
268*c9083b85SXin LI                 goto dodist;
269*c9083b85SXin LI             }
270*c9083b85SXin LI             else {
271*c9083b85SXin LI                 strm->msg = (char *)"invalid distance code";
272*c9083b85SXin LI                 state->mode = BAD;
273*c9083b85SXin LI                 break;
274*c9083b85SXin LI             }
275*c9083b85SXin LI         }
276*c9083b85SXin LI         else if ((op & 64) == 0) {              /* 2nd level length code */
277*c9083b85SXin LI             here = lcode[here.val + (hold & ((1U << op) - 1))];
278*c9083b85SXin LI             goto dolen;
279*c9083b85SXin LI         }
280*c9083b85SXin LI         else if (op & 32) {                     /* end-of-block */
281*c9083b85SXin LI             Tracevv((stderr, "inflate:         end of block\n"));
282*c9083b85SXin LI             state->mode = TYPE;
283*c9083b85SXin LI             break;
284*c9083b85SXin LI         }
285*c9083b85SXin LI         else {
286*c9083b85SXin LI             strm->msg = (char *)"invalid literal/length code";
287*c9083b85SXin LI             state->mode = BAD;
288*c9083b85SXin LI             break;
289*c9083b85SXin LI         }
290*c9083b85SXin LI     } while (in < last && out < end);
291*c9083b85SXin LI 
292*c9083b85SXin LI     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
293*c9083b85SXin LI     len = bits >> 3;
294*c9083b85SXin LI     in -= len;
295*c9083b85SXin LI     bits -= len << 3;
296*c9083b85SXin LI     hold &= (1U << bits) - 1;
297*c9083b85SXin LI 
298*c9083b85SXin LI     /* update state and return */
299*c9083b85SXin LI     strm->next_in = in;
300*c9083b85SXin LI     strm->next_out = out;
301*c9083b85SXin LI     strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
302*c9083b85SXin LI     strm->avail_out = (unsigned)(out < end ?
303*c9083b85SXin LI                                  257 + (end - out) : 257 - (out - end));
304*c9083b85SXin LI     state->hold = hold;
305*c9083b85SXin LI     state->bits = bits;
306*c9083b85SXin LI     return;
307*c9083b85SXin LI }
308*c9083b85SXin LI 
309*c9083b85SXin LI /*
310*c9083b85SXin LI    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
311*c9083b85SXin LI    - Using bit fields for code structure
312*c9083b85SXin LI    - Different op definition to avoid & for extra bits (do & for table bits)
313*c9083b85SXin LI    - Three separate decoding do-loops for direct, window, and wnext == 0
314*c9083b85SXin LI    - Special case for distance > 1 copies to do overlapped load and store copy
315*c9083b85SXin LI    - Explicit branch predictions (based on measured branch probabilities)
316*c9083b85SXin LI    - Deferring match copy and interspersed it with decoding subsequent codes
317*c9083b85SXin LI    - Swapping literal/length else
318*c9083b85SXin LI    - Swapping window/direct else
319*c9083b85SXin LI    - Larger unrolled copy loops (three is about right)
320*c9083b85SXin LI    - Moving len -= 3 statement into middle of loop
321*c9083b85SXin LI  */
322*c9083b85SXin LI 
323*c9083b85SXin LI #endif /* !ASMINF */
324