xref: /titanic_51/usr/src/boot/lib/libz/inffast.c (revision 4a5d661a82b942b6538acd26209d959ce98b593a)
1*4a5d661aSToomas Soome /* inffast.c -- fast decoding
2*4a5d661aSToomas Soome  * Copyright (C) 1995-2008, 2010, 2013 Mark Adler
3*4a5d661aSToomas Soome  * For conditions of distribution and use, see copyright notice in zlib.h
4*4a5d661aSToomas Soome  */
5*4a5d661aSToomas Soome 
6*4a5d661aSToomas Soome #include "zutil.h"
7*4a5d661aSToomas Soome #include "inftrees.h"
8*4a5d661aSToomas Soome #include "inflate.h"
9*4a5d661aSToomas Soome #include "inffast.h"
10*4a5d661aSToomas Soome 
11*4a5d661aSToomas Soome #ifndef ASMINF
12*4a5d661aSToomas Soome 
13*4a5d661aSToomas Soome /* Allow machine dependent optimization for post-increment or pre-increment.
14*4a5d661aSToomas Soome    Based on testing to date,
15*4a5d661aSToomas Soome    Pre-increment preferred for:
16*4a5d661aSToomas Soome    - PowerPC G3 (Adler)
17*4a5d661aSToomas Soome    - MIPS R5000 (Randers-Pehrson)
18*4a5d661aSToomas Soome    Post-increment preferred for:
19*4a5d661aSToomas Soome    - none
20*4a5d661aSToomas Soome    No measurable difference:
21*4a5d661aSToomas Soome    - Pentium III (Anderson)
22*4a5d661aSToomas Soome    - M68060 (Nikl)
23*4a5d661aSToomas Soome  */
24*4a5d661aSToomas Soome #ifdef POSTINC
25*4a5d661aSToomas Soome #  define OFF 0
26*4a5d661aSToomas Soome #  define PUP(a) *(a)++
27*4a5d661aSToomas Soome #else
28*4a5d661aSToomas Soome #  define OFF 1
29*4a5d661aSToomas Soome #  define PUP(a) *++(a)
30*4a5d661aSToomas Soome #endif
31*4a5d661aSToomas Soome 
32*4a5d661aSToomas Soome /*
33*4a5d661aSToomas Soome    Decode literal, length, and distance codes and write out the resulting
34*4a5d661aSToomas Soome    literal and match bytes until either not enough input or output is
35*4a5d661aSToomas Soome    available, an end-of-block is encountered, or a data error is encountered.
36*4a5d661aSToomas Soome    When large enough input and output buffers are supplied to inflate(), for
37*4a5d661aSToomas Soome    example, a 16K input buffer and a 64K output buffer, more than 95% of the
38*4a5d661aSToomas Soome    inflate execution time is spent in this routine.
39*4a5d661aSToomas Soome 
40*4a5d661aSToomas Soome    Entry assumptions:
41*4a5d661aSToomas Soome 
42*4a5d661aSToomas Soome         state->mode == LEN
43*4a5d661aSToomas Soome         strm->avail_in >= 6
44*4a5d661aSToomas Soome         strm->avail_out >= 258
45*4a5d661aSToomas Soome         start >= strm->avail_out
46*4a5d661aSToomas Soome         state->bits < 8
47*4a5d661aSToomas Soome 
48*4a5d661aSToomas Soome    On return, state->mode is one of:
49*4a5d661aSToomas Soome 
50*4a5d661aSToomas Soome         LEN -- ran out of enough output space or enough available input
51*4a5d661aSToomas Soome         TYPE -- reached end of block code, inflate() to interpret next block
52*4a5d661aSToomas Soome         BAD -- error in block data
53*4a5d661aSToomas Soome 
54*4a5d661aSToomas Soome    Notes:
55*4a5d661aSToomas Soome 
56*4a5d661aSToomas Soome     - The maximum input bits used by a length/distance pair is 15 bits for the
57*4a5d661aSToomas Soome       length code, 5 bits for the length extra, 15 bits for the distance code,
58*4a5d661aSToomas Soome       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
59*4a5d661aSToomas Soome       Therefore if strm->avail_in >= 6, then there is enough input to avoid
60*4a5d661aSToomas Soome       checking for available input while decoding.
61*4a5d661aSToomas Soome 
62*4a5d661aSToomas Soome     - The maximum bytes that a single length/distance pair can output is 258
63*4a5d661aSToomas Soome       bytes, which is the maximum length that can be coded.  inflate_fast()
64*4a5d661aSToomas Soome       requires strm->avail_out >= 258 for each loop to avoid checking for
65*4a5d661aSToomas Soome       output space.
66*4a5d661aSToomas Soome  */
67*4a5d661aSToomas Soome void ZLIB_INTERNAL inflate_fast(strm, start)
68*4a5d661aSToomas Soome z_streamp strm;
69*4a5d661aSToomas Soome unsigned start;         /* inflate()'s starting value for strm->avail_out */
70*4a5d661aSToomas Soome {
71*4a5d661aSToomas Soome     struct inflate_state FAR *state;
72*4a5d661aSToomas Soome     z_const unsigned char FAR *in;      /* local strm->next_in */
73*4a5d661aSToomas Soome     z_const unsigned char FAR *last;    /* have enough input while in < last */
74*4a5d661aSToomas Soome     unsigned char FAR *out;     /* local strm->next_out */
75*4a5d661aSToomas Soome     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
76*4a5d661aSToomas Soome     unsigned char FAR *end;     /* while out < end, enough space available */
77*4a5d661aSToomas Soome #ifdef INFLATE_STRICT
78*4a5d661aSToomas Soome     unsigned dmax;              /* maximum distance from zlib header */
79*4a5d661aSToomas Soome #endif
80*4a5d661aSToomas Soome     unsigned wsize;             /* window size or zero if not using window */
81*4a5d661aSToomas Soome     unsigned whave;             /* valid bytes in the window */
82*4a5d661aSToomas Soome     unsigned wnext;             /* window write index */
83*4a5d661aSToomas Soome     unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
84*4a5d661aSToomas Soome     unsigned long hold;         /* local strm->hold */
85*4a5d661aSToomas Soome     unsigned bits;              /* local strm->bits */
86*4a5d661aSToomas Soome     code const FAR *lcode;      /* local strm->lencode */
87*4a5d661aSToomas Soome     code const FAR *dcode;      /* local strm->distcode */
88*4a5d661aSToomas Soome     unsigned lmask;             /* mask for first level of length codes */
89*4a5d661aSToomas Soome     unsigned dmask;             /* mask for first level of distance codes */
90*4a5d661aSToomas Soome     code here;                  /* retrieved table entry */
91*4a5d661aSToomas Soome     unsigned op;                /* code bits, operation, extra bits, or */
92*4a5d661aSToomas Soome                                 /*  window position, window bytes to copy */
93*4a5d661aSToomas Soome     unsigned len;               /* match length, unused bytes */
94*4a5d661aSToomas Soome     unsigned dist;              /* match distance */
95*4a5d661aSToomas Soome     unsigned char FAR *from;    /* where to copy match from */
96*4a5d661aSToomas Soome 
97*4a5d661aSToomas Soome     /* copy state to local variables */
98*4a5d661aSToomas Soome     state = (struct inflate_state FAR *)strm->state;
99*4a5d661aSToomas Soome     in = strm->next_in - OFF;
100*4a5d661aSToomas Soome     last = in + (strm->avail_in - 5);
101*4a5d661aSToomas Soome     out = strm->next_out - OFF;
102*4a5d661aSToomas Soome     beg = out - (start - strm->avail_out);
103*4a5d661aSToomas Soome     end = out + (strm->avail_out - 257);
104*4a5d661aSToomas Soome #ifdef INFLATE_STRICT
105*4a5d661aSToomas Soome     dmax = state->dmax;
106*4a5d661aSToomas Soome #endif
107*4a5d661aSToomas Soome     wsize = state->wsize;
108*4a5d661aSToomas Soome     whave = state->whave;
109*4a5d661aSToomas Soome     wnext = state->wnext;
110*4a5d661aSToomas Soome     window = state->window;
111*4a5d661aSToomas Soome     hold = state->hold;
112*4a5d661aSToomas Soome     bits = state->bits;
113*4a5d661aSToomas Soome     lcode = state->lencode;
114*4a5d661aSToomas Soome     dcode = state->distcode;
115*4a5d661aSToomas Soome     lmask = (1U << state->lenbits) - 1;
116*4a5d661aSToomas Soome     dmask = (1U << state->distbits) - 1;
117*4a5d661aSToomas Soome 
118*4a5d661aSToomas Soome     /* decode literals and length/distances until end-of-block or not enough
119*4a5d661aSToomas Soome        input data or output space */
120*4a5d661aSToomas Soome     do {
121*4a5d661aSToomas Soome         if (bits < 15) {
122*4a5d661aSToomas Soome             hold += (unsigned long)(PUP(in)) << bits;
123*4a5d661aSToomas Soome             bits += 8;
124*4a5d661aSToomas Soome             hold += (unsigned long)(PUP(in)) << bits;
125*4a5d661aSToomas Soome             bits += 8;
126*4a5d661aSToomas Soome         }
127*4a5d661aSToomas Soome         here = lcode[hold & lmask];
128*4a5d661aSToomas Soome       dolen:
129*4a5d661aSToomas Soome         op = (unsigned)(here.bits);
130*4a5d661aSToomas Soome         hold >>= op;
131*4a5d661aSToomas Soome         bits -= op;
132*4a5d661aSToomas Soome         op = (unsigned)(here.op);
133*4a5d661aSToomas Soome         if (op == 0) {                          /* literal */
134*4a5d661aSToomas Soome             Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
135*4a5d661aSToomas Soome                     "inflate:         literal '%c'\n" :
136*4a5d661aSToomas Soome                     "inflate:         literal 0x%02x\n", here.val));
137*4a5d661aSToomas Soome             PUP(out) = (unsigned char)(here.val);
138*4a5d661aSToomas Soome         }
139*4a5d661aSToomas Soome         else if (op & 16) {                     /* length base */
140*4a5d661aSToomas Soome             len = (unsigned)(here.val);
141*4a5d661aSToomas Soome             op &= 15;                           /* number of extra bits */
142*4a5d661aSToomas Soome             if (op) {
143*4a5d661aSToomas Soome                 if (bits < op) {
144*4a5d661aSToomas Soome                     hold += (unsigned long)(PUP(in)) << bits;
145*4a5d661aSToomas Soome                     bits += 8;
146*4a5d661aSToomas Soome                 }
147*4a5d661aSToomas Soome                 len += (unsigned)hold & ((1U << op) - 1);
148*4a5d661aSToomas Soome                 hold >>= op;
149*4a5d661aSToomas Soome                 bits -= op;
150*4a5d661aSToomas Soome             }
151*4a5d661aSToomas Soome             Tracevv((stderr, "inflate:         length %u\n", len));
152*4a5d661aSToomas Soome             if (bits < 15) {
153*4a5d661aSToomas Soome                 hold += (unsigned long)(PUP(in)) << bits;
154*4a5d661aSToomas Soome                 bits += 8;
155*4a5d661aSToomas Soome                 hold += (unsigned long)(PUP(in)) << bits;
156*4a5d661aSToomas Soome                 bits += 8;
157*4a5d661aSToomas Soome             }
158*4a5d661aSToomas Soome             here = dcode[hold & dmask];
159*4a5d661aSToomas Soome           dodist:
160*4a5d661aSToomas Soome             op = (unsigned)(here.bits);
161*4a5d661aSToomas Soome             hold >>= op;
162*4a5d661aSToomas Soome             bits -= op;
163*4a5d661aSToomas Soome             op = (unsigned)(here.op);
164*4a5d661aSToomas Soome             if (op & 16) {                      /* distance base */
165*4a5d661aSToomas Soome                 dist = (unsigned)(here.val);
166*4a5d661aSToomas Soome                 op &= 15;                       /* number of extra bits */
167*4a5d661aSToomas Soome                 if (bits < op) {
168*4a5d661aSToomas Soome                     hold += (unsigned long)(PUP(in)) << bits;
169*4a5d661aSToomas Soome                     bits += 8;
170*4a5d661aSToomas Soome                     if (bits < op) {
171*4a5d661aSToomas Soome                         hold += (unsigned long)(PUP(in)) << bits;
172*4a5d661aSToomas Soome                         bits += 8;
173*4a5d661aSToomas Soome                     }
174*4a5d661aSToomas Soome                 }
175*4a5d661aSToomas Soome                 dist += (unsigned)hold & ((1U << op) - 1);
176*4a5d661aSToomas Soome #ifdef INFLATE_STRICT
177*4a5d661aSToomas Soome                 if (dist > dmax) {
178*4a5d661aSToomas Soome                     strm->msg = (char *)"invalid distance too far back";
179*4a5d661aSToomas Soome                     state->mode = BAD;
180*4a5d661aSToomas Soome                     break;
181*4a5d661aSToomas Soome                 }
182*4a5d661aSToomas Soome #endif
183*4a5d661aSToomas Soome                 hold >>= op;
184*4a5d661aSToomas Soome                 bits -= op;
185*4a5d661aSToomas Soome                 Tracevv((stderr, "inflate:         distance %u\n", dist));
186*4a5d661aSToomas Soome                 op = (unsigned)(out - beg);     /* max distance in output */
187*4a5d661aSToomas Soome                 if (dist > op) {                /* see if copy from window */
188*4a5d661aSToomas Soome                     op = dist - op;             /* distance back in window */
189*4a5d661aSToomas Soome                     if (op > whave) {
190*4a5d661aSToomas Soome                         if (state->sane) {
191*4a5d661aSToomas Soome                             strm->msg =
192*4a5d661aSToomas Soome                                 (char *)"invalid distance too far back";
193*4a5d661aSToomas Soome                             state->mode = BAD;
194*4a5d661aSToomas Soome                             break;
195*4a5d661aSToomas Soome                         }
196*4a5d661aSToomas Soome #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
197*4a5d661aSToomas Soome                         if (len <= op - whave) {
198*4a5d661aSToomas Soome                             do {
199*4a5d661aSToomas Soome                                 PUP(out) = 0;
200*4a5d661aSToomas Soome                             } while (--len);
201*4a5d661aSToomas Soome                             continue;
202*4a5d661aSToomas Soome                         }
203*4a5d661aSToomas Soome                         len -= op - whave;
204*4a5d661aSToomas Soome                         do {
205*4a5d661aSToomas Soome                             PUP(out) = 0;
206*4a5d661aSToomas Soome                         } while (--op > whave);
207*4a5d661aSToomas Soome                         if (op == 0) {
208*4a5d661aSToomas Soome                             from = out - dist;
209*4a5d661aSToomas Soome                             do {
210*4a5d661aSToomas Soome                                 PUP(out) = PUP(from);
211*4a5d661aSToomas Soome                             } while (--len);
212*4a5d661aSToomas Soome                             continue;
213*4a5d661aSToomas Soome                         }
214*4a5d661aSToomas Soome #endif
215*4a5d661aSToomas Soome                     }
216*4a5d661aSToomas Soome                     from = window - OFF;
217*4a5d661aSToomas Soome                     if (wnext == 0) {           /* very common case */
218*4a5d661aSToomas Soome                         from += wsize - op;
219*4a5d661aSToomas Soome                         if (op < len) {         /* some from window */
220*4a5d661aSToomas Soome                             len -= op;
221*4a5d661aSToomas Soome                             do {
222*4a5d661aSToomas Soome                                 PUP(out) = PUP(from);
223*4a5d661aSToomas Soome                             } while (--op);
224*4a5d661aSToomas Soome                             from = out - dist;  /* rest from output */
225*4a5d661aSToomas Soome                         }
226*4a5d661aSToomas Soome                     }
227*4a5d661aSToomas Soome                     else if (wnext < op) {      /* wrap around window */
228*4a5d661aSToomas Soome                         from += wsize + wnext - op;
229*4a5d661aSToomas Soome                         op -= wnext;
230*4a5d661aSToomas Soome                         if (op < len) {         /* some from end of window */
231*4a5d661aSToomas Soome                             len -= op;
232*4a5d661aSToomas Soome                             do {
233*4a5d661aSToomas Soome                                 PUP(out) = PUP(from);
234*4a5d661aSToomas Soome                             } while (--op);
235*4a5d661aSToomas Soome                             from = window - OFF;
236*4a5d661aSToomas Soome                             if (wnext < len) {  /* some from start of window */
237*4a5d661aSToomas Soome                                 op = wnext;
238*4a5d661aSToomas Soome                                 len -= op;
239*4a5d661aSToomas Soome                                 do {
240*4a5d661aSToomas Soome                                     PUP(out) = PUP(from);
241*4a5d661aSToomas Soome                                 } while (--op);
242*4a5d661aSToomas Soome                                 from = out - dist;      /* rest from output */
243*4a5d661aSToomas Soome                             }
244*4a5d661aSToomas Soome                         }
245*4a5d661aSToomas Soome                     }
246*4a5d661aSToomas Soome                     else {                      /* contiguous in window */
247*4a5d661aSToomas Soome                         from += wnext - op;
248*4a5d661aSToomas Soome                         if (op < len) {         /* some from window */
249*4a5d661aSToomas Soome                             len -= op;
250*4a5d661aSToomas Soome                             do {
251*4a5d661aSToomas Soome                                 PUP(out) = PUP(from);
252*4a5d661aSToomas Soome                             } while (--op);
253*4a5d661aSToomas Soome                             from = out - dist;  /* rest from output */
254*4a5d661aSToomas Soome                         }
255*4a5d661aSToomas Soome                     }
256*4a5d661aSToomas Soome                     while (len > 2) {
257*4a5d661aSToomas Soome                         PUP(out) = PUP(from);
258*4a5d661aSToomas Soome                         PUP(out) = PUP(from);
259*4a5d661aSToomas Soome                         PUP(out) = PUP(from);
260*4a5d661aSToomas Soome                         len -= 3;
261*4a5d661aSToomas Soome                     }
262*4a5d661aSToomas Soome                     if (len) {
263*4a5d661aSToomas Soome                         PUP(out) = PUP(from);
264*4a5d661aSToomas Soome                         if (len > 1)
265*4a5d661aSToomas Soome                             PUP(out) = PUP(from);
266*4a5d661aSToomas Soome                     }
267*4a5d661aSToomas Soome                 }
268*4a5d661aSToomas Soome                 else {
269*4a5d661aSToomas Soome                     from = out - dist;          /* copy direct from output */
270*4a5d661aSToomas Soome                     do {                        /* minimum length is three */
271*4a5d661aSToomas Soome                         PUP(out) = PUP(from);
272*4a5d661aSToomas Soome                         PUP(out) = PUP(from);
273*4a5d661aSToomas Soome                         PUP(out) = PUP(from);
274*4a5d661aSToomas Soome                         len -= 3;
275*4a5d661aSToomas Soome                     } while (len > 2);
276*4a5d661aSToomas Soome                     if (len) {
277*4a5d661aSToomas Soome                         PUP(out) = PUP(from);
278*4a5d661aSToomas Soome                         if (len > 1)
279*4a5d661aSToomas Soome                             PUP(out) = PUP(from);
280*4a5d661aSToomas Soome                     }
281*4a5d661aSToomas Soome                 }
282*4a5d661aSToomas Soome             }
283*4a5d661aSToomas Soome             else if ((op & 64) == 0) {          /* 2nd level distance code */
284*4a5d661aSToomas Soome                 here = dcode[here.val + (hold & ((1U << op) - 1))];
285*4a5d661aSToomas Soome                 goto dodist;
286*4a5d661aSToomas Soome             }
287*4a5d661aSToomas Soome             else {
288*4a5d661aSToomas Soome                 strm->msg = (char *)"invalid distance code";
289*4a5d661aSToomas Soome                 state->mode = BAD;
290*4a5d661aSToomas Soome                 break;
291*4a5d661aSToomas Soome             }
292*4a5d661aSToomas Soome         }
293*4a5d661aSToomas Soome         else if ((op & 64) == 0) {              /* 2nd level length code */
294*4a5d661aSToomas Soome             here = lcode[here.val + (hold & ((1U << op) - 1))];
295*4a5d661aSToomas Soome             goto dolen;
296*4a5d661aSToomas Soome         }
297*4a5d661aSToomas Soome         else if (op & 32) {                     /* end-of-block */
298*4a5d661aSToomas Soome             Tracevv((stderr, "inflate:         end of block\n"));
299*4a5d661aSToomas Soome             state->mode = TYPE;
300*4a5d661aSToomas Soome             break;
301*4a5d661aSToomas Soome         }
302*4a5d661aSToomas Soome         else {
303*4a5d661aSToomas Soome             strm->msg = (char *)"invalid literal/length code";
304*4a5d661aSToomas Soome             state->mode = BAD;
305*4a5d661aSToomas Soome             break;
306*4a5d661aSToomas Soome         }
307*4a5d661aSToomas Soome     } while (in < last && out < end);
308*4a5d661aSToomas Soome 
309*4a5d661aSToomas Soome     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
310*4a5d661aSToomas Soome     len = bits >> 3;
311*4a5d661aSToomas Soome     in -= len;
312*4a5d661aSToomas Soome     bits -= len << 3;
313*4a5d661aSToomas Soome     hold &= (1U << bits) - 1;
314*4a5d661aSToomas Soome 
315*4a5d661aSToomas Soome     /* update state and return */
316*4a5d661aSToomas Soome     strm->next_in = in + OFF;
317*4a5d661aSToomas Soome     strm->next_out = out + OFF;
318*4a5d661aSToomas Soome     strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
319*4a5d661aSToomas Soome     strm->avail_out = (unsigned)(out < end ?
320*4a5d661aSToomas Soome                                  257 + (end - out) : 257 - (out - end));
321*4a5d661aSToomas Soome     state->hold = hold;
322*4a5d661aSToomas Soome     state->bits = bits;
323*4a5d661aSToomas Soome     return;
324*4a5d661aSToomas Soome }
325*4a5d661aSToomas Soome 
326*4a5d661aSToomas Soome /*
327*4a5d661aSToomas Soome    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
328*4a5d661aSToomas Soome    - Using bit fields for code structure
329*4a5d661aSToomas Soome    - Different op definition to avoid & for extra bits (do & for table bits)
330*4a5d661aSToomas Soome    - Three separate decoding do-loops for direct, window, and wnext == 0
331*4a5d661aSToomas Soome    - Special case for distance > 1 copies to do overlapped load and store copy
332*4a5d661aSToomas Soome    - Explicit branch predictions (based on measured branch probabilities)
333*4a5d661aSToomas Soome    - Deferring match copy and interspersed it with decoding subsequent codes
334*4a5d661aSToomas Soome    - Swapping literal/length else
335*4a5d661aSToomas Soome    - Swapping window/direct else
336*4a5d661aSToomas Soome    - Larger unrolled copy loops (three is about right)
337*4a5d661aSToomas Soome    - Moving len -= 3 statement into middle of loop
338*4a5d661aSToomas Soome  */
339*4a5d661aSToomas Soome 
340*4a5d661aSToomas Soome #endif /* !ASMINF */
341