1*4a5d661aSToomas Soome /* inffast.c -- fast decoding
2*4a5d661aSToomas Soome * Copyright (C) 1995-2008, 2010, 2013 Mark Adler
3*4a5d661aSToomas Soome * For conditions of distribution and use, see copyright notice in zlib.h
4*4a5d661aSToomas Soome */
5*4a5d661aSToomas Soome
6*4a5d661aSToomas Soome #include "zutil.h"
7*4a5d661aSToomas Soome #include "inftrees.h"
8*4a5d661aSToomas Soome #include "inflate.h"
9*4a5d661aSToomas Soome #include "inffast.h"
10*4a5d661aSToomas Soome
11*4a5d661aSToomas Soome #ifndef ASMINF
12*4a5d661aSToomas Soome
13*4a5d661aSToomas Soome /* Allow machine dependent optimization for post-increment or pre-increment.
14*4a5d661aSToomas Soome Based on testing to date,
15*4a5d661aSToomas Soome Pre-increment preferred for:
16*4a5d661aSToomas Soome - PowerPC G3 (Adler)
17*4a5d661aSToomas Soome - MIPS R5000 (Randers-Pehrson)
18*4a5d661aSToomas Soome Post-increment preferred for:
19*4a5d661aSToomas Soome - none
20*4a5d661aSToomas Soome No measurable difference:
21*4a5d661aSToomas Soome - Pentium III (Anderson)
22*4a5d661aSToomas Soome - M68060 (Nikl)
23*4a5d661aSToomas Soome */
24*4a5d661aSToomas Soome #ifdef POSTINC
25*4a5d661aSToomas Soome # define OFF 0
26*4a5d661aSToomas Soome # define PUP(a) *(a)++
27*4a5d661aSToomas Soome #else
28*4a5d661aSToomas Soome # define OFF 1
29*4a5d661aSToomas Soome # define PUP(a) *++(a)
30*4a5d661aSToomas Soome #endif
31*4a5d661aSToomas Soome
32*4a5d661aSToomas Soome /*
33*4a5d661aSToomas Soome Decode literal, length, and distance codes and write out the resulting
34*4a5d661aSToomas Soome literal and match bytes until either not enough input or output is
35*4a5d661aSToomas Soome available, an end-of-block is encountered, or a data error is encountered.
36*4a5d661aSToomas Soome When large enough input and output buffers are supplied to inflate(), for
37*4a5d661aSToomas Soome example, a 16K input buffer and a 64K output buffer, more than 95% of the
38*4a5d661aSToomas Soome inflate execution time is spent in this routine.
39*4a5d661aSToomas Soome
40*4a5d661aSToomas Soome Entry assumptions:
41*4a5d661aSToomas Soome
42*4a5d661aSToomas Soome state->mode == LEN
43*4a5d661aSToomas Soome strm->avail_in >= 6
44*4a5d661aSToomas Soome strm->avail_out >= 258
45*4a5d661aSToomas Soome start >= strm->avail_out
46*4a5d661aSToomas Soome state->bits < 8
47*4a5d661aSToomas Soome
48*4a5d661aSToomas Soome On return, state->mode is one of:
49*4a5d661aSToomas Soome
50*4a5d661aSToomas Soome LEN -- ran out of enough output space or enough available input
51*4a5d661aSToomas Soome TYPE -- reached end of block code, inflate() to interpret next block
52*4a5d661aSToomas Soome BAD -- error in block data
53*4a5d661aSToomas Soome
54*4a5d661aSToomas Soome Notes:
55*4a5d661aSToomas Soome
56*4a5d661aSToomas Soome - The maximum input bits used by a length/distance pair is 15 bits for the
57*4a5d661aSToomas Soome length code, 5 bits for the length extra, 15 bits for the distance code,
58*4a5d661aSToomas Soome and 13 bits for the distance extra. This totals 48 bits, or six bytes.
59*4a5d661aSToomas Soome Therefore if strm->avail_in >= 6, then there is enough input to avoid
60*4a5d661aSToomas Soome checking for available input while decoding.
61*4a5d661aSToomas Soome
62*4a5d661aSToomas Soome - The maximum bytes that a single length/distance pair can output is 258
63*4a5d661aSToomas Soome bytes, which is the maximum length that can be coded. inflate_fast()
64*4a5d661aSToomas Soome requires strm->avail_out >= 258 for each loop to avoid checking for
65*4a5d661aSToomas Soome output space.
66*4a5d661aSToomas Soome */
inflate_fast(strm,start)67*4a5d661aSToomas Soome void ZLIB_INTERNAL inflate_fast(strm, start)
68*4a5d661aSToomas Soome z_streamp strm;
69*4a5d661aSToomas Soome unsigned start; /* inflate()'s starting value for strm->avail_out */
70*4a5d661aSToomas Soome {
71*4a5d661aSToomas Soome struct inflate_state FAR *state;
72*4a5d661aSToomas Soome z_const unsigned char FAR *in; /* local strm->next_in */
73*4a5d661aSToomas Soome z_const unsigned char FAR *last; /* have enough input while in < last */
74*4a5d661aSToomas Soome unsigned char FAR *out; /* local strm->next_out */
75*4a5d661aSToomas Soome unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
76*4a5d661aSToomas Soome unsigned char FAR *end; /* while out < end, enough space available */
77*4a5d661aSToomas Soome #ifdef INFLATE_STRICT
78*4a5d661aSToomas Soome unsigned dmax; /* maximum distance from zlib header */
79*4a5d661aSToomas Soome #endif
80*4a5d661aSToomas Soome unsigned wsize; /* window size or zero if not using window */
81*4a5d661aSToomas Soome unsigned whave; /* valid bytes in the window */
82*4a5d661aSToomas Soome unsigned wnext; /* window write index */
83*4a5d661aSToomas Soome unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
84*4a5d661aSToomas Soome unsigned long hold; /* local strm->hold */
85*4a5d661aSToomas Soome unsigned bits; /* local strm->bits */
86*4a5d661aSToomas Soome code const FAR *lcode; /* local strm->lencode */
87*4a5d661aSToomas Soome code const FAR *dcode; /* local strm->distcode */
88*4a5d661aSToomas Soome unsigned lmask; /* mask for first level of length codes */
89*4a5d661aSToomas Soome unsigned dmask; /* mask for first level of distance codes */
90*4a5d661aSToomas Soome code here; /* retrieved table entry */
91*4a5d661aSToomas Soome unsigned op; /* code bits, operation, extra bits, or */
92*4a5d661aSToomas Soome /* window position, window bytes to copy */
93*4a5d661aSToomas Soome unsigned len; /* match length, unused bytes */
94*4a5d661aSToomas Soome unsigned dist; /* match distance */
95*4a5d661aSToomas Soome unsigned char FAR *from; /* where to copy match from */
96*4a5d661aSToomas Soome
97*4a5d661aSToomas Soome /* copy state to local variables */
98*4a5d661aSToomas Soome state = (struct inflate_state FAR *)strm->state;
99*4a5d661aSToomas Soome in = strm->next_in - OFF;
100*4a5d661aSToomas Soome last = in + (strm->avail_in - 5);
101*4a5d661aSToomas Soome out = strm->next_out - OFF;
102*4a5d661aSToomas Soome beg = out - (start - strm->avail_out);
103*4a5d661aSToomas Soome end = out + (strm->avail_out - 257);
104*4a5d661aSToomas Soome #ifdef INFLATE_STRICT
105*4a5d661aSToomas Soome dmax = state->dmax;
106*4a5d661aSToomas Soome #endif
107*4a5d661aSToomas Soome wsize = state->wsize;
108*4a5d661aSToomas Soome whave = state->whave;
109*4a5d661aSToomas Soome wnext = state->wnext;
110*4a5d661aSToomas Soome window = state->window;
111*4a5d661aSToomas Soome hold = state->hold;
112*4a5d661aSToomas Soome bits = state->bits;
113*4a5d661aSToomas Soome lcode = state->lencode;
114*4a5d661aSToomas Soome dcode = state->distcode;
115*4a5d661aSToomas Soome lmask = (1U << state->lenbits) - 1;
116*4a5d661aSToomas Soome dmask = (1U << state->distbits) - 1;
117*4a5d661aSToomas Soome
118*4a5d661aSToomas Soome /* decode literals and length/distances until end-of-block or not enough
119*4a5d661aSToomas Soome input data or output space */
120*4a5d661aSToomas Soome do {
121*4a5d661aSToomas Soome if (bits < 15) {
122*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits;
123*4a5d661aSToomas Soome bits += 8;
124*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits;
125*4a5d661aSToomas Soome bits += 8;
126*4a5d661aSToomas Soome }
127*4a5d661aSToomas Soome here = lcode[hold & lmask];
128*4a5d661aSToomas Soome dolen:
129*4a5d661aSToomas Soome op = (unsigned)(here.bits);
130*4a5d661aSToomas Soome hold >>= op;
131*4a5d661aSToomas Soome bits -= op;
132*4a5d661aSToomas Soome op = (unsigned)(here.op);
133*4a5d661aSToomas Soome if (op == 0) { /* literal */
134*4a5d661aSToomas Soome Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
135*4a5d661aSToomas Soome "inflate: literal '%c'\n" :
136*4a5d661aSToomas Soome "inflate: literal 0x%02x\n", here.val));
137*4a5d661aSToomas Soome PUP(out) = (unsigned char)(here.val);
138*4a5d661aSToomas Soome }
139*4a5d661aSToomas Soome else if (op & 16) { /* length base */
140*4a5d661aSToomas Soome len = (unsigned)(here.val);
141*4a5d661aSToomas Soome op &= 15; /* number of extra bits */
142*4a5d661aSToomas Soome if (op) {
143*4a5d661aSToomas Soome if (bits < op) {
144*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits;
145*4a5d661aSToomas Soome bits += 8;
146*4a5d661aSToomas Soome }
147*4a5d661aSToomas Soome len += (unsigned)hold & ((1U << op) - 1);
148*4a5d661aSToomas Soome hold >>= op;
149*4a5d661aSToomas Soome bits -= op;
150*4a5d661aSToomas Soome }
151*4a5d661aSToomas Soome Tracevv((stderr, "inflate: length %u\n", len));
152*4a5d661aSToomas Soome if (bits < 15) {
153*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits;
154*4a5d661aSToomas Soome bits += 8;
155*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits;
156*4a5d661aSToomas Soome bits += 8;
157*4a5d661aSToomas Soome }
158*4a5d661aSToomas Soome here = dcode[hold & dmask];
159*4a5d661aSToomas Soome dodist:
160*4a5d661aSToomas Soome op = (unsigned)(here.bits);
161*4a5d661aSToomas Soome hold >>= op;
162*4a5d661aSToomas Soome bits -= op;
163*4a5d661aSToomas Soome op = (unsigned)(here.op);
164*4a5d661aSToomas Soome if (op & 16) { /* distance base */
165*4a5d661aSToomas Soome dist = (unsigned)(here.val);
166*4a5d661aSToomas Soome op &= 15; /* number of extra bits */
167*4a5d661aSToomas Soome if (bits < op) {
168*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits;
169*4a5d661aSToomas Soome bits += 8;
170*4a5d661aSToomas Soome if (bits < op) {
171*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits;
172*4a5d661aSToomas Soome bits += 8;
173*4a5d661aSToomas Soome }
174*4a5d661aSToomas Soome }
175*4a5d661aSToomas Soome dist += (unsigned)hold & ((1U << op) - 1);
176*4a5d661aSToomas Soome #ifdef INFLATE_STRICT
177*4a5d661aSToomas Soome if (dist > dmax) {
178*4a5d661aSToomas Soome strm->msg = (char *)"invalid distance too far back";
179*4a5d661aSToomas Soome state->mode = BAD;
180*4a5d661aSToomas Soome break;
181*4a5d661aSToomas Soome }
182*4a5d661aSToomas Soome #endif
183*4a5d661aSToomas Soome hold >>= op;
184*4a5d661aSToomas Soome bits -= op;
185*4a5d661aSToomas Soome Tracevv((stderr, "inflate: distance %u\n", dist));
186*4a5d661aSToomas Soome op = (unsigned)(out - beg); /* max distance in output */
187*4a5d661aSToomas Soome if (dist > op) { /* see if copy from window */
188*4a5d661aSToomas Soome op = dist - op; /* distance back in window */
189*4a5d661aSToomas Soome if (op > whave) {
190*4a5d661aSToomas Soome if (state->sane) {
191*4a5d661aSToomas Soome strm->msg =
192*4a5d661aSToomas Soome (char *)"invalid distance too far back";
193*4a5d661aSToomas Soome state->mode = BAD;
194*4a5d661aSToomas Soome break;
195*4a5d661aSToomas Soome }
196*4a5d661aSToomas Soome #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
197*4a5d661aSToomas Soome if (len <= op - whave) {
198*4a5d661aSToomas Soome do {
199*4a5d661aSToomas Soome PUP(out) = 0;
200*4a5d661aSToomas Soome } while (--len);
201*4a5d661aSToomas Soome continue;
202*4a5d661aSToomas Soome }
203*4a5d661aSToomas Soome len -= op - whave;
204*4a5d661aSToomas Soome do {
205*4a5d661aSToomas Soome PUP(out) = 0;
206*4a5d661aSToomas Soome } while (--op > whave);
207*4a5d661aSToomas Soome if (op == 0) {
208*4a5d661aSToomas Soome from = out - dist;
209*4a5d661aSToomas Soome do {
210*4a5d661aSToomas Soome PUP(out) = PUP(from);
211*4a5d661aSToomas Soome } while (--len);
212*4a5d661aSToomas Soome continue;
213*4a5d661aSToomas Soome }
214*4a5d661aSToomas Soome #endif
215*4a5d661aSToomas Soome }
216*4a5d661aSToomas Soome from = window - OFF;
217*4a5d661aSToomas Soome if (wnext == 0) { /* very common case */
218*4a5d661aSToomas Soome from += wsize - op;
219*4a5d661aSToomas Soome if (op < len) { /* some from window */
220*4a5d661aSToomas Soome len -= op;
221*4a5d661aSToomas Soome do {
222*4a5d661aSToomas Soome PUP(out) = PUP(from);
223*4a5d661aSToomas Soome } while (--op);
224*4a5d661aSToomas Soome from = out - dist; /* rest from output */
225*4a5d661aSToomas Soome }
226*4a5d661aSToomas Soome }
227*4a5d661aSToomas Soome else if (wnext < op) { /* wrap around window */
228*4a5d661aSToomas Soome from += wsize + wnext - op;
229*4a5d661aSToomas Soome op -= wnext;
230*4a5d661aSToomas Soome if (op < len) { /* some from end of window */
231*4a5d661aSToomas Soome len -= op;
232*4a5d661aSToomas Soome do {
233*4a5d661aSToomas Soome PUP(out) = PUP(from);
234*4a5d661aSToomas Soome } while (--op);
235*4a5d661aSToomas Soome from = window - OFF;
236*4a5d661aSToomas Soome if (wnext < len) { /* some from start of window */
237*4a5d661aSToomas Soome op = wnext;
238*4a5d661aSToomas Soome len -= op;
239*4a5d661aSToomas Soome do {
240*4a5d661aSToomas Soome PUP(out) = PUP(from);
241*4a5d661aSToomas Soome } while (--op);
242*4a5d661aSToomas Soome from = out - dist; /* rest from output */
243*4a5d661aSToomas Soome }
244*4a5d661aSToomas Soome }
245*4a5d661aSToomas Soome }
246*4a5d661aSToomas Soome else { /* contiguous in window */
247*4a5d661aSToomas Soome from += wnext - op;
248*4a5d661aSToomas Soome if (op < len) { /* some from window */
249*4a5d661aSToomas Soome len -= op;
250*4a5d661aSToomas Soome do {
251*4a5d661aSToomas Soome PUP(out) = PUP(from);
252*4a5d661aSToomas Soome } while (--op);
253*4a5d661aSToomas Soome from = out - dist; /* rest from output */
254*4a5d661aSToomas Soome }
255*4a5d661aSToomas Soome }
256*4a5d661aSToomas Soome while (len > 2) {
257*4a5d661aSToomas Soome PUP(out) = PUP(from);
258*4a5d661aSToomas Soome PUP(out) = PUP(from);
259*4a5d661aSToomas Soome PUP(out) = PUP(from);
260*4a5d661aSToomas Soome len -= 3;
261*4a5d661aSToomas Soome }
262*4a5d661aSToomas Soome if (len) {
263*4a5d661aSToomas Soome PUP(out) = PUP(from);
264*4a5d661aSToomas Soome if (len > 1)
265*4a5d661aSToomas Soome PUP(out) = PUP(from);
266*4a5d661aSToomas Soome }
267*4a5d661aSToomas Soome }
268*4a5d661aSToomas Soome else {
269*4a5d661aSToomas Soome from = out - dist; /* copy direct from output */
270*4a5d661aSToomas Soome do { /* minimum length is three */
271*4a5d661aSToomas Soome PUP(out) = PUP(from);
272*4a5d661aSToomas Soome PUP(out) = PUP(from);
273*4a5d661aSToomas Soome PUP(out) = PUP(from);
274*4a5d661aSToomas Soome len -= 3;
275*4a5d661aSToomas Soome } while (len > 2);
276*4a5d661aSToomas Soome if (len) {
277*4a5d661aSToomas Soome PUP(out) = PUP(from);
278*4a5d661aSToomas Soome if (len > 1)
279*4a5d661aSToomas Soome PUP(out) = PUP(from);
280*4a5d661aSToomas Soome }
281*4a5d661aSToomas Soome }
282*4a5d661aSToomas Soome }
283*4a5d661aSToomas Soome else if ((op & 64) == 0) { /* 2nd level distance code */
284*4a5d661aSToomas Soome here = dcode[here.val + (hold & ((1U << op) - 1))];
285*4a5d661aSToomas Soome goto dodist;
286*4a5d661aSToomas Soome }
287*4a5d661aSToomas Soome else {
288*4a5d661aSToomas Soome strm->msg = (char *)"invalid distance code";
289*4a5d661aSToomas Soome state->mode = BAD;
290*4a5d661aSToomas Soome break;
291*4a5d661aSToomas Soome }
292*4a5d661aSToomas Soome }
293*4a5d661aSToomas Soome else if ((op & 64) == 0) { /* 2nd level length code */
294*4a5d661aSToomas Soome here = lcode[here.val + (hold & ((1U << op) - 1))];
295*4a5d661aSToomas Soome goto dolen;
296*4a5d661aSToomas Soome }
297*4a5d661aSToomas Soome else if (op & 32) { /* end-of-block */
298*4a5d661aSToomas Soome Tracevv((stderr, "inflate: end of block\n"));
299*4a5d661aSToomas Soome state->mode = TYPE;
300*4a5d661aSToomas Soome break;
301*4a5d661aSToomas Soome }
302*4a5d661aSToomas Soome else {
303*4a5d661aSToomas Soome strm->msg = (char *)"invalid literal/length code";
304*4a5d661aSToomas Soome state->mode = BAD;
305*4a5d661aSToomas Soome break;
306*4a5d661aSToomas Soome }
307*4a5d661aSToomas Soome } while (in < last && out < end);
308*4a5d661aSToomas Soome
309*4a5d661aSToomas Soome /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
310*4a5d661aSToomas Soome len = bits >> 3;
311*4a5d661aSToomas Soome in -= len;
312*4a5d661aSToomas Soome bits -= len << 3;
313*4a5d661aSToomas Soome hold &= (1U << bits) - 1;
314*4a5d661aSToomas Soome
315*4a5d661aSToomas Soome /* update state and return */
316*4a5d661aSToomas Soome strm->next_in = in + OFF;
317*4a5d661aSToomas Soome strm->next_out = out + OFF;
318*4a5d661aSToomas Soome strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
319*4a5d661aSToomas Soome strm->avail_out = (unsigned)(out < end ?
320*4a5d661aSToomas Soome 257 + (end - out) : 257 - (out - end));
321*4a5d661aSToomas Soome state->hold = hold;
322*4a5d661aSToomas Soome state->bits = bits;
323*4a5d661aSToomas Soome return;
324*4a5d661aSToomas Soome }
325*4a5d661aSToomas Soome
326*4a5d661aSToomas Soome /*
327*4a5d661aSToomas Soome inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
328*4a5d661aSToomas Soome - Using bit fields for code structure
329*4a5d661aSToomas Soome - Different op definition to avoid & for extra bits (do & for table bits)
330*4a5d661aSToomas Soome - Three separate decoding do-loops for direct, window, and wnext == 0
331*4a5d661aSToomas Soome - Special case for distance > 1 copies to do overlapped load and store copy
332*4a5d661aSToomas Soome - Explicit branch predictions (based on measured branch probabilities)
333*4a5d661aSToomas Soome - Deferring match copy and interspersed it with decoding subsequent codes
334*4a5d661aSToomas Soome - Swapping literal/length else
335*4a5d661aSToomas Soome - Swapping window/direct else
336*4a5d661aSToomas Soome - Larger unrolled copy loops (three is about right)
337*4a5d661aSToomas Soome - Moving len -= 3 statement into middle of loop
338*4a5d661aSToomas Soome */
339*4a5d661aSToomas Soome
340*4a5d661aSToomas Soome #endif /* !ASMINF */
341