1*4a5d661aSToomas Soome /* inffast.c -- fast decoding 2*4a5d661aSToomas Soome * Copyright (C) 1995-2008, 2010, 2013 Mark Adler 3*4a5d661aSToomas Soome * For conditions of distribution and use, see copyright notice in zlib.h 4*4a5d661aSToomas Soome */ 5*4a5d661aSToomas Soome 6*4a5d661aSToomas Soome #include "zutil.h" 7*4a5d661aSToomas Soome #include "inftrees.h" 8*4a5d661aSToomas Soome #include "inflate.h" 9*4a5d661aSToomas Soome #include "inffast.h" 10*4a5d661aSToomas Soome 11*4a5d661aSToomas Soome #ifndef ASMINF 12*4a5d661aSToomas Soome 13*4a5d661aSToomas Soome /* Allow machine dependent optimization for post-increment or pre-increment. 14*4a5d661aSToomas Soome Based on testing to date, 15*4a5d661aSToomas Soome Pre-increment preferred for: 16*4a5d661aSToomas Soome - PowerPC G3 (Adler) 17*4a5d661aSToomas Soome - MIPS R5000 (Randers-Pehrson) 18*4a5d661aSToomas Soome Post-increment preferred for: 19*4a5d661aSToomas Soome - none 20*4a5d661aSToomas Soome No measurable difference: 21*4a5d661aSToomas Soome - Pentium III (Anderson) 22*4a5d661aSToomas Soome - M68060 (Nikl) 23*4a5d661aSToomas Soome */ 24*4a5d661aSToomas Soome #ifdef POSTINC 25*4a5d661aSToomas Soome # define OFF 0 26*4a5d661aSToomas Soome # define PUP(a) *(a)++ 27*4a5d661aSToomas Soome #else 28*4a5d661aSToomas Soome # define OFF 1 29*4a5d661aSToomas Soome # define PUP(a) *++(a) 30*4a5d661aSToomas Soome #endif 31*4a5d661aSToomas Soome 32*4a5d661aSToomas Soome /* 33*4a5d661aSToomas Soome Decode literal, length, and distance codes and write out the resulting 34*4a5d661aSToomas Soome literal and match bytes until either not enough input or output is 35*4a5d661aSToomas Soome available, an end-of-block is encountered, or a data error is encountered. 36*4a5d661aSToomas Soome When large enough input and output buffers are supplied to inflate(), for 37*4a5d661aSToomas Soome example, a 16K input buffer and a 64K output buffer, more than 95% of the 38*4a5d661aSToomas Soome inflate execution time is spent in this routine. 39*4a5d661aSToomas Soome 40*4a5d661aSToomas Soome Entry assumptions: 41*4a5d661aSToomas Soome 42*4a5d661aSToomas Soome state->mode == LEN 43*4a5d661aSToomas Soome strm->avail_in >= 6 44*4a5d661aSToomas Soome strm->avail_out >= 258 45*4a5d661aSToomas Soome start >= strm->avail_out 46*4a5d661aSToomas Soome state->bits < 8 47*4a5d661aSToomas Soome 48*4a5d661aSToomas Soome On return, state->mode is one of: 49*4a5d661aSToomas Soome 50*4a5d661aSToomas Soome LEN -- ran out of enough output space or enough available input 51*4a5d661aSToomas Soome TYPE -- reached end of block code, inflate() to interpret next block 52*4a5d661aSToomas Soome BAD -- error in block data 53*4a5d661aSToomas Soome 54*4a5d661aSToomas Soome Notes: 55*4a5d661aSToomas Soome 56*4a5d661aSToomas Soome - The maximum input bits used by a length/distance pair is 15 bits for the 57*4a5d661aSToomas Soome length code, 5 bits for the length extra, 15 bits for the distance code, 58*4a5d661aSToomas Soome and 13 bits for the distance extra. This totals 48 bits, or six bytes. 59*4a5d661aSToomas Soome Therefore if strm->avail_in >= 6, then there is enough input to avoid 60*4a5d661aSToomas Soome checking for available input while decoding. 61*4a5d661aSToomas Soome 62*4a5d661aSToomas Soome - The maximum bytes that a single length/distance pair can output is 258 63*4a5d661aSToomas Soome bytes, which is the maximum length that can be coded. inflate_fast() 64*4a5d661aSToomas Soome requires strm->avail_out >= 258 for each loop to avoid checking for 65*4a5d661aSToomas Soome output space. 66*4a5d661aSToomas Soome */ 67*4a5d661aSToomas Soome void ZLIB_INTERNAL inflate_fast(strm, start) 68*4a5d661aSToomas Soome z_streamp strm; 69*4a5d661aSToomas Soome unsigned start; /* inflate()'s starting value for strm->avail_out */ 70*4a5d661aSToomas Soome { 71*4a5d661aSToomas Soome struct inflate_state FAR *state; 72*4a5d661aSToomas Soome z_const unsigned char FAR *in; /* local strm->next_in */ 73*4a5d661aSToomas Soome z_const unsigned char FAR *last; /* have enough input while in < last */ 74*4a5d661aSToomas Soome unsigned char FAR *out; /* local strm->next_out */ 75*4a5d661aSToomas Soome unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ 76*4a5d661aSToomas Soome unsigned char FAR *end; /* while out < end, enough space available */ 77*4a5d661aSToomas Soome #ifdef INFLATE_STRICT 78*4a5d661aSToomas Soome unsigned dmax; /* maximum distance from zlib header */ 79*4a5d661aSToomas Soome #endif 80*4a5d661aSToomas Soome unsigned wsize; /* window size or zero if not using window */ 81*4a5d661aSToomas Soome unsigned whave; /* valid bytes in the window */ 82*4a5d661aSToomas Soome unsigned wnext; /* window write index */ 83*4a5d661aSToomas Soome unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ 84*4a5d661aSToomas Soome unsigned long hold; /* local strm->hold */ 85*4a5d661aSToomas Soome unsigned bits; /* local strm->bits */ 86*4a5d661aSToomas Soome code const FAR *lcode; /* local strm->lencode */ 87*4a5d661aSToomas Soome code const FAR *dcode; /* local strm->distcode */ 88*4a5d661aSToomas Soome unsigned lmask; /* mask for first level of length codes */ 89*4a5d661aSToomas Soome unsigned dmask; /* mask for first level of distance codes */ 90*4a5d661aSToomas Soome code here; /* retrieved table entry */ 91*4a5d661aSToomas Soome unsigned op; /* code bits, operation, extra bits, or */ 92*4a5d661aSToomas Soome /* window position, window bytes to copy */ 93*4a5d661aSToomas Soome unsigned len; /* match length, unused bytes */ 94*4a5d661aSToomas Soome unsigned dist; /* match distance */ 95*4a5d661aSToomas Soome unsigned char FAR *from; /* where to copy match from */ 96*4a5d661aSToomas Soome 97*4a5d661aSToomas Soome /* copy state to local variables */ 98*4a5d661aSToomas Soome state = (struct inflate_state FAR *)strm->state; 99*4a5d661aSToomas Soome in = strm->next_in - OFF; 100*4a5d661aSToomas Soome last = in + (strm->avail_in - 5); 101*4a5d661aSToomas Soome out = strm->next_out - OFF; 102*4a5d661aSToomas Soome beg = out - (start - strm->avail_out); 103*4a5d661aSToomas Soome end = out + (strm->avail_out - 257); 104*4a5d661aSToomas Soome #ifdef INFLATE_STRICT 105*4a5d661aSToomas Soome dmax = state->dmax; 106*4a5d661aSToomas Soome #endif 107*4a5d661aSToomas Soome wsize = state->wsize; 108*4a5d661aSToomas Soome whave = state->whave; 109*4a5d661aSToomas Soome wnext = state->wnext; 110*4a5d661aSToomas Soome window = state->window; 111*4a5d661aSToomas Soome hold = state->hold; 112*4a5d661aSToomas Soome bits = state->bits; 113*4a5d661aSToomas Soome lcode = state->lencode; 114*4a5d661aSToomas Soome dcode = state->distcode; 115*4a5d661aSToomas Soome lmask = (1U << state->lenbits) - 1; 116*4a5d661aSToomas Soome dmask = (1U << state->distbits) - 1; 117*4a5d661aSToomas Soome 118*4a5d661aSToomas Soome /* decode literals and length/distances until end-of-block or not enough 119*4a5d661aSToomas Soome input data or output space */ 120*4a5d661aSToomas Soome do { 121*4a5d661aSToomas Soome if (bits < 15) { 122*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits; 123*4a5d661aSToomas Soome bits += 8; 124*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits; 125*4a5d661aSToomas Soome bits += 8; 126*4a5d661aSToomas Soome } 127*4a5d661aSToomas Soome here = lcode[hold & lmask]; 128*4a5d661aSToomas Soome dolen: 129*4a5d661aSToomas Soome op = (unsigned)(here.bits); 130*4a5d661aSToomas Soome hold >>= op; 131*4a5d661aSToomas Soome bits -= op; 132*4a5d661aSToomas Soome op = (unsigned)(here.op); 133*4a5d661aSToomas Soome if (op == 0) { /* literal */ 134*4a5d661aSToomas Soome Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? 135*4a5d661aSToomas Soome "inflate: literal '%c'\n" : 136*4a5d661aSToomas Soome "inflate: literal 0x%02x\n", here.val)); 137*4a5d661aSToomas Soome PUP(out) = (unsigned char)(here.val); 138*4a5d661aSToomas Soome } 139*4a5d661aSToomas Soome else if (op & 16) { /* length base */ 140*4a5d661aSToomas Soome len = (unsigned)(here.val); 141*4a5d661aSToomas Soome op &= 15; /* number of extra bits */ 142*4a5d661aSToomas Soome if (op) { 143*4a5d661aSToomas Soome if (bits < op) { 144*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits; 145*4a5d661aSToomas Soome bits += 8; 146*4a5d661aSToomas Soome } 147*4a5d661aSToomas Soome len += (unsigned)hold & ((1U << op) - 1); 148*4a5d661aSToomas Soome hold >>= op; 149*4a5d661aSToomas Soome bits -= op; 150*4a5d661aSToomas Soome } 151*4a5d661aSToomas Soome Tracevv((stderr, "inflate: length %u\n", len)); 152*4a5d661aSToomas Soome if (bits < 15) { 153*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits; 154*4a5d661aSToomas Soome bits += 8; 155*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits; 156*4a5d661aSToomas Soome bits += 8; 157*4a5d661aSToomas Soome } 158*4a5d661aSToomas Soome here = dcode[hold & dmask]; 159*4a5d661aSToomas Soome dodist: 160*4a5d661aSToomas Soome op = (unsigned)(here.bits); 161*4a5d661aSToomas Soome hold >>= op; 162*4a5d661aSToomas Soome bits -= op; 163*4a5d661aSToomas Soome op = (unsigned)(here.op); 164*4a5d661aSToomas Soome if (op & 16) { /* distance base */ 165*4a5d661aSToomas Soome dist = (unsigned)(here.val); 166*4a5d661aSToomas Soome op &= 15; /* number of extra bits */ 167*4a5d661aSToomas Soome if (bits < op) { 168*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits; 169*4a5d661aSToomas Soome bits += 8; 170*4a5d661aSToomas Soome if (bits < op) { 171*4a5d661aSToomas Soome hold += (unsigned long)(PUP(in)) << bits; 172*4a5d661aSToomas Soome bits += 8; 173*4a5d661aSToomas Soome } 174*4a5d661aSToomas Soome } 175*4a5d661aSToomas Soome dist += (unsigned)hold & ((1U << op) - 1); 176*4a5d661aSToomas Soome #ifdef INFLATE_STRICT 177*4a5d661aSToomas Soome if (dist > dmax) { 178*4a5d661aSToomas Soome strm->msg = (char *)"invalid distance too far back"; 179*4a5d661aSToomas Soome state->mode = BAD; 180*4a5d661aSToomas Soome break; 181*4a5d661aSToomas Soome } 182*4a5d661aSToomas Soome #endif 183*4a5d661aSToomas Soome hold >>= op; 184*4a5d661aSToomas Soome bits -= op; 185*4a5d661aSToomas Soome Tracevv((stderr, "inflate: distance %u\n", dist)); 186*4a5d661aSToomas Soome op = (unsigned)(out - beg); /* max distance in output */ 187*4a5d661aSToomas Soome if (dist > op) { /* see if copy from window */ 188*4a5d661aSToomas Soome op = dist - op; /* distance back in window */ 189*4a5d661aSToomas Soome if (op > whave) { 190*4a5d661aSToomas Soome if (state->sane) { 191*4a5d661aSToomas Soome strm->msg = 192*4a5d661aSToomas Soome (char *)"invalid distance too far back"; 193*4a5d661aSToomas Soome state->mode = BAD; 194*4a5d661aSToomas Soome break; 195*4a5d661aSToomas Soome } 196*4a5d661aSToomas Soome #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR 197*4a5d661aSToomas Soome if (len <= op - whave) { 198*4a5d661aSToomas Soome do { 199*4a5d661aSToomas Soome PUP(out) = 0; 200*4a5d661aSToomas Soome } while (--len); 201*4a5d661aSToomas Soome continue; 202*4a5d661aSToomas Soome } 203*4a5d661aSToomas Soome len -= op - whave; 204*4a5d661aSToomas Soome do { 205*4a5d661aSToomas Soome PUP(out) = 0; 206*4a5d661aSToomas Soome } while (--op > whave); 207*4a5d661aSToomas Soome if (op == 0) { 208*4a5d661aSToomas Soome from = out - dist; 209*4a5d661aSToomas Soome do { 210*4a5d661aSToomas Soome PUP(out) = PUP(from); 211*4a5d661aSToomas Soome } while (--len); 212*4a5d661aSToomas Soome continue; 213*4a5d661aSToomas Soome } 214*4a5d661aSToomas Soome #endif 215*4a5d661aSToomas Soome } 216*4a5d661aSToomas Soome from = window - OFF; 217*4a5d661aSToomas Soome if (wnext == 0) { /* very common case */ 218*4a5d661aSToomas Soome from += wsize - op; 219*4a5d661aSToomas Soome if (op < len) { /* some from window */ 220*4a5d661aSToomas Soome len -= op; 221*4a5d661aSToomas Soome do { 222*4a5d661aSToomas Soome PUP(out) = PUP(from); 223*4a5d661aSToomas Soome } while (--op); 224*4a5d661aSToomas Soome from = out - dist; /* rest from output */ 225*4a5d661aSToomas Soome } 226*4a5d661aSToomas Soome } 227*4a5d661aSToomas Soome else if (wnext < op) { /* wrap around window */ 228*4a5d661aSToomas Soome from += wsize + wnext - op; 229*4a5d661aSToomas Soome op -= wnext; 230*4a5d661aSToomas Soome if (op < len) { /* some from end of window */ 231*4a5d661aSToomas Soome len -= op; 232*4a5d661aSToomas Soome do { 233*4a5d661aSToomas Soome PUP(out) = PUP(from); 234*4a5d661aSToomas Soome } while (--op); 235*4a5d661aSToomas Soome from = window - OFF; 236*4a5d661aSToomas Soome if (wnext < len) { /* some from start of window */ 237*4a5d661aSToomas Soome op = wnext; 238*4a5d661aSToomas Soome len -= op; 239*4a5d661aSToomas Soome do { 240*4a5d661aSToomas Soome PUP(out) = PUP(from); 241*4a5d661aSToomas Soome } while (--op); 242*4a5d661aSToomas Soome from = out - dist; /* rest from output */ 243*4a5d661aSToomas Soome } 244*4a5d661aSToomas Soome } 245*4a5d661aSToomas Soome } 246*4a5d661aSToomas Soome else { /* contiguous in window */ 247*4a5d661aSToomas Soome from += wnext - op; 248*4a5d661aSToomas Soome if (op < len) { /* some from window */ 249*4a5d661aSToomas Soome len -= op; 250*4a5d661aSToomas Soome do { 251*4a5d661aSToomas Soome PUP(out) = PUP(from); 252*4a5d661aSToomas Soome } while (--op); 253*4a5d661aSToomas Soome from = out - dist; /* rest from output */ 254*4a5d661aSToomas Soome } 255*4a5d661aSToomas Soome } 256*4a5d661aSToomas Soome while (len > 2) { 257*4a5d661aSToomas Soome PUP(out) = PUP(from); 258*4a5d661aSToomas Soome PUP(out) = PUP(from); 259*4a5d661aSToomas Soome PUP(out) = PUP(from); 260*4a5d661aSToomas Soome len -= 3; 261*4a5d661aSToomas Soome } 262*4a5d661aSToomas Soome if (len) { 263*4a5d661aSToomas Soome PUP(out) = PUP(from); 264*4a5d661aSToomas Soome if (len > 1) 265*4a5d661aSToomas Soome PUP(out) = PUP(from); 266*4a5d661aSToomas Soome } 267*4a5d661aSToomas Soome } 268*4a5d661aSToomas Soome else { 269*4a5d661aSToomas Soome from = out - dist; /* copy direct from output */ 270*4a5d661aSToomas Soome do { /* minimum length is three */ 271*4a5d661aSToomas Soome PUP(out) = PUP(from); 272*4a5d661aSToomas Soome PUP(out) = PUP(from); 273*4a5d661aSToomas Soome PUP(out) = PUP(from); 274*4a5d661aSToomas Soome len -= 3; 275*4a5d661aSToomas Soome } while (len > 2); 276*4a5d661aSToomas Soome if (len) { 277*4a5d661aSToomas Soome PUP(out) = PUP(from); 278*4a5d661aSToomas Soome if (len > 1) 279*4a5d661aSToomas Soome PUP(out) = PUP(from); 280*4a5d661aSToomas Soome } 281*4a5d661aSToomas Soome } 282*4a5d661aSToomas Soome } 283*4a5d661aSToomas Soome else if ((op & 64) == 0) { /* 2nd level distance code */ 284*4a5d661aSToomas Soome here = dcode[here.val + (hold & ((1U << op) - 1))]; 285*4a5d661aSToomas Soome goto dodist; 286*4a5d661aSToomas Soome } 287*4a5d661aSToomas Soome else { 288*4a5d661aSToomas Soome strm->msg = (char *)"invalid distance code"; 289*4a5d661aSToomas Soome state->mode = BAD; 290*4a5d661aSToomas Soome break; 291*4a5d661aSToomas Soome } 292*4a5d661aSToomas Soome } 293*4a5d661aSToomas Soome else if ((op & 64) == 0) { /* 2nd level length code */ 294*4a5d661aSToomas Soome here = lcode[here.val + (hold & ((1U << op) - 1))]; 295*4a5d661aSToomas Soome goto dolen; 296*4a5d661aSToomas Soome } 297*4a5d661aSToomas Soome else if (op & 32) { /* end-of-block */ 298*4a5d661aSToomas Soome Tracevv((stderr, "inflate: end of block\n")); 299*4a5d661aSToomas Soome state->mode = TYPE; 300*4a5d661aSToomas Soome break; 301*4a5d661aSToomas Soome } 302*4a5d661aSToomas Soome else { 303*4a5d661aSToomas Soome strm->msg = (char *)"invalid literal/length code"; 304*4a5d661aSToomas Soome state->mode = BAD; 305*4a5d661aSToomas Soome break; 306*4a5d661aSToomas Soome } 307*4a5d661aSToomas Soome } while (in < last && out < end); 308*4a5d661aSToomas Soome 309*4a5d661aSToomas Soome /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 310*4a5d661aSToomas Soome len = bits >> 3; 311*4a5d661aSToomas Soome in -= len; 312*4a5d661aSToomas Soome bits -= len << 3; 313*4a5d661aSToomas Soome hold &= (1U << bits) - 1; 314*4a5d661aSToomas Soome 315*4a5d661aSToomas Soome /* update state and return */ 316*4a5d661aSToomas Soome strm->next_in = in + OFF; 317*4a5d661aSToomas Soome strm->next_out = out + OFF; 318*4a5d661aSToomas Soome strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); 319*4a5d661aSToomas Soome strm->avail_out = (unsigned)(out < end ? 320*4a5d661aSToomas Soome 257 + (end - out) : 257 - (out - end)); 321*4a5d661aSToomas Soome state->hold = hold; 322*4a5d661aSToomas Soome state->bits = bits; 323*4a5d661aSToomas Soome return; 324*4a5d661aSToomas Soome } 325*4a5d661aSToomas Soome 326*4a5d661aSToomas Soome /* 327*4a5d661aSToomas Soome inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): 328*4a5d661aSToomas Soome - Using bit fields for code structure 329*4a5d661aSToomas Soome - Different op definition to avoid & for extra bits (do & for table bits) 330*4a5d661aSToomas Soome - Three separate decoding do-loops for direct, window, and wnext == 0 331*4a5d661aSToomas Soome - Special case for distance > 1 copies to do overlapped load and store copy 332*4a5d661aSToomas Soome - Explicit branch predictions (based on measured branch probabilities) 333*4a5d661aSToomas Soome - Deferring match copy and interspersed it with decoding subsequent codes 334*4a5d661aSToomas Soome - Swapping literal/length else 335*4a5d661aSToomas Soome - Swapping window/direct else 336*4a5d661aSToomas Soome - Larger unrolled copy loops (three is about right) 337*4a5d661aSToomas Soome - Moving len -= 3 statement into middle of loop 338*4a5d661aSToomas Soome */ 339*4a5d661aSToomas Soome 340*4a5d661aSToomas Soome #endif /* !ASMINF */ 341