1*ab9e68a2SToomas Soome /* inffast.c -- fast decoding 2*ab9e68a2SToomas Soome * Copyright (C) 1995-2017 Mark Adler 3*ab9e68a2SToomas Soome * For conditions of distribution and use, see copyright notice in zlib.h 4*ab9e68a2SToomas Soome */ 5*ab9e68a2SToomas Soome 6*ab9e68a2SToomas Soome #include "zutil.h" 7*ab9e68a2SToomas Soome #include "inftrees.h" 8*ab9e68a2SToomas Soome #include "inflate.h" 9*ab9e68a2SToomas Soome #include "inffast.h" 10*ab9e68a2SToomas Soome 11*ab9e68a2SToomas Soome #ifdef ASMINF 12*ab9e68a2SToomas Soome # pragma message("Assembler code may have bugs -- use at your own risk") 13*ab9e68a2SToomas Soome #else 14*ab9e68a2SToomas Soome 15*ab9e68a2SToomas Soome /* 16*ab9e68a2SToomas Soome Decode literal, length, and distance codes and write out the resulting 17*ab9e68a2SToomas Soome literal and match bytes until either not enough input or output is 18*ab9e68a2SToomas Soome available, an end-of-block is encountered, or a data error is encountered. 19*ab9e68a2SToomas Soome When large enough input and output buffers are supplied to inflate(), for 20*ab9e68a2SToomas Soome example, a 16K input buffer and a 64K output buffer, more than 95% of the 21*ab9e68a2SToomas Soome inflate execution time is spent in this routine. 22*ab9e68a2SToomas Soome 23*ab9e68a2SToomas Soome Entry assumptions: 24*ab9e68a2SToomas Soome 25*ab9e68a2SToomas Soome state->mode == LEN 26*ab9e68a2SToomas Soome strm->avail_in >= 6 27*ab9e68a2SToomas Soome strm->avail_out >= 258 28*ab9e68a2SToomas Soome start >= strm->avail_out 29*ab9e68a2SToomas Soome state->bits < 8 30*ab9e68a2SToomas Soome 31*ab9e68a2SToomas Soome On return, state->mode is one of: 32*ab9e68a2SToomas Soome 33*ab9e68a2SToomas Soome LEN -- ran out of enough output space or enough available input 34*ab9e68a2SToomas Soome TYPE -- reached end of block code, inflate() to interpret next block 35*ab9e68a2SToomas Soome BAD -- error in block data 36*ab9e68a2SToomas Soome 37*ab9e68a2SToomas Soome Notes: 38*ab9e68a2SToomas Soome 39*ab9e68a2SToomas Soome - The maximum input bits used by a length/distance pair is 15 bits for the 40*ab9e68a2SToomas Soome length code, 5 bits for the length extra, 15 bits for the distance code, 41*ab9e68a2SToomas Soome and 13 bits for the distance extra. This totals 48 bits, or six bytes. 42*ab9e68a2SToomas Soome Therefore if strm->avail_in >= 6, then there is enough input to avoid 43*ab9e68a2SToomas Soome checking for available input while decoding. 44*ab9e68a2SToomas Soome 45*ab9e68a2SToomas Soome - The maximum bytes that a single length/distance pair can output is 258 46*ab9e68a2SToomas Soome bytes, which is the maximum length that can be coded. inflate_fast() 47*ab9e68a2SToomas Soome requires strm->avail_out >= 258 for each loop to avoid checking for 48*ab9e68a2SToomas Soome output space. 49*ab9e68a2SToomas Soome */ 50*ab9e68a2SToomas Soome void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start) 51*ab9e68a2SToomas Soome { 52*ab9e68a2SToomas Soome struct inflate_state FAR *state; 53*ab9e68a2SToomas Soome z_const unsigned char FAR *in; /* local strm->next_in */ 54*ab9e68a2SToomas Soome z_const unsigned char FAR *last; /* have enough input while in < last */ 55*ab9e68a2SToomas Soome unsigned char FAR *out; /* local strm->next_out */ 56*ab9e68a2SToomas Soome unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ 57*ab9e68a2SToomas Soome unsigned char FAR *end; /* while out < end, enough space available */ 58*ab9e68a2SToomas Soome #ifdef INFLATE_STRICT 59*ab9e68a2SToomas Soome unsigned dmax; /* maximum distance from zlib header */ 60*ab9e68a2SToomas Soome #endif 61*ab9e68a2SToomas Soome unsigned wsize; /* window size or zero if not using window */ 62*ab9e68a2SToomas Soome unsigned whave; /* valid bytes in the window */ 63*ab9e68a2SToomas Soome unsigned wnext; /* window write index */ 64*ab9e68a2SToomas Soome unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ 65*ab9e68a2SToomas Soome unsigned long hold; /* local strm->hold */ 66*ab9e68a2SToomas Soome unsigned bits; /* local strm->bits */ 67*ab9e68a2SToomas Soome code const FAR *lcode; /* local strm->lencode */ 68*ab9e68a2SToomas Soome code const FAR *dcode; /* local strm->distcode */ 69*ab9e68a2SToomas Soome unsigned lmask; /* mask for first level of length codes */ 70*ab9e68a2SToomas Soome unsigned dmask; /* mask for first level of distance codes */ 71*ab9e68a2SToomas Soome code here; /* retrieved table entry */ 72*ab9e68a2SToomas Soome unsigned op; /* code bits, operation, extra bits, or */ 73*ab9e68a2SToomas Soome /* window position, window bytes to copy */ 74*ab9e68a2SToomas Soome unsigned len; /* match length, unused bytes */ 75*ab9e68a2SToomas Soome unsigned dist; /* match distance */ 76*ab9e68a2SToomas Soome unsigned char FAR *from; /* where to copy match from */ 77*ab9e68a2SToomas Soome 78*ab9e68a2SToomas Soome /* copy state to local variables */ 79*ab9e68a2SToomas Soome state = (struct inflate_state FAR *)strm->state; 80*ab9e68a2SToomas Soome in = strm->next_in; 81*ab9e68a2SToomas Soome last = in + (strm->avail_in - 5); 82*ab9e68a2SToomas Soome out = strm->next_out; 83*ab9e68a2SToomas Soome beg = out - (start - strm->avail_out); 84*ab9e68a2SToomas Soome end = out + (strm->avail_out - 257); 85*ab9e68a2SToomas Soome #ifdef INFLATE_STRICT 86*ab9e68a2SToomas Soome dmax = state->dmax; 87*ab9e68a2SToomas Soome #endif 88*ab9e68a2SToomas Soome wsize = state->wsize; 89*ab9e68a2SToomas Soome whave = state->whave; 90*ab9e68a2SToomas Soome wnext = state->wnext; 91*ab9e68a2SToomas Soome window = state->window; 92*ab9e68a2SToomas Soome hold = state->hold; 93*ab9e68a2SToomas Soome bits = state->bits; 94*ab9e68a2SToomas Soome lcode = state->lencode; 95*ab9e68a2SToomas Soome dcode = state->distcode; 96*ab9e68a2SToomas Soome lmask = (1U << state->lenbits) - 1; 97*ab9e68a2SToomas Soome dmask = (1U << state->distbits) - 1; 98*ab9e68a2SToomas Soome 99*ab9e68a2SToomas Soome /* decode literals and length/distances until end-of-block or not enough 100*ab9e68a2SToomas Soome input data or output space */ 101*ab9e68a2SToomas Soome do { 102*ab9e68a2SToomas Soome if (bits < 15) { 103*ab9e68a2SToomas Soome hold += (unsigned long)(*in++) << bits; 104*ab9e68a2SToomas Soome bits += 8; 105*ab9e68a2SToomas Soome hold += (unsigned long)(*in++) << bits; 106*ab9e68a2SToomas Soome bits += 8; 107*ab9e68a2SToomas Soome } 108*ab9e68a2SToomas Soome here = lcode[hold & lmask]; 109*ab9e68a2SToomas Soome dolen: 110*ab9e68a2SToomas Soome op = (unsigned)(here.bits); 111*ab9e68a2SToomas Soome hold >>= op; 112*ab9e68a2SToomas Soome bits -= op; 113*ab9e68a2SToomas Soome op = (unsigned)(here.op); 114*ab9e68a2SToomas Soome if (op == 0) { /* literal */ 115*ab9e68a2SToomas Soome Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? 116*ab9e68a2SToomas Soome "inflate: literal '%c'\n" : 117*ab9e68a2SToomas Soome "inflate: literal 0x%02x\n", here.val)); 118*ab9e68a2SToomas Soome *out++ = (unsigned char)(here.val); 119*ab9e68a2SToomas Soome } 120*ab9e68a2SToomas Soome else if (op & 16) { /* length base */ 121*ab9e68a2SToomas Soome len = (unsigned)(here.val); 122*ab9e68a2SToomas Soome op &= 15; /* number of extra bits */ 123*ab9e68a2SToomas Soome if (op) { 124*ab9e68a2SToomas Soome if (bits < op) { 125*ab9e68a2SToomas Soome hold += (unsigned long)(*in++) << bits; 126*ab9e68a2SToomas Soome bits += 8; 127*ab9e68a2SToomas Soome } 128*ab9e68a2SToomas Soome len += (unsigned)hold & ((1U << op) - 1); 129*ab9e68a2SToomas Soome hold >>= op; 130*ab9e68a2SToomas Soome bits -= op; 131*ab9e68a2SToomas Soome } 132*ab9e68a2SToomas Soome Tracevv((stderr, "inflate: length %u\n", len)); 133*ab9e68a2SToomas Soome if (bits < 15) { 134*ab9e68a2SToomas Soome hold += (unsigned long)(*in++) << bits; 135*ab9e68a2SToomas Soome bits += 8; 136*ab9e68a2SToomas Soome hold += (unsigned long)(*in++) << bits; 137*ab9e68a2SToomas Soome bits += 8; 138*ab9e68a2SToomas Soome } 139*ab9e68a2SToomas Soome here = dcode[hold & dmask]; 140*ab9e68a2SToomas Soome dodist: 141*ab9e68a2SToomas Soome op = (unsigned)(here.bits); 142*ab9e68a2SToomas Soome hold >>= op; 143*ab9e68a2SToomas Soome bits -= op; 144*ab9e68a2SToomas Soome op = (unsigned)(here.op); 145*ab9e68a2SToomas Soome if (op & 16) { /* distance base */ 146*ab9e68a2SToomas Soome dist = (unsigned)(here.val); 147*ab9e68a2SToomas Soome op &= 15; /* number of extra bits */ 148*ab9e68a2SToomas Soome if (bits < op) { 149*ab9e68a2SToomas Soome hold += (unsigned long)(*in++) << bits; 150*ab9e68a2SToomas Soome bits += 8; 151*ab9e68a2SToomas Soome if (bits < op) { 152*ab9e68a2SToomas Soome hold += (unsigned long)(*in++) << bits; 153*ab9e68a2SToomas Soome bits += 8; 154*ab9e68a2SToomas Soome } 155*ab9e68a2SToomas Soome } 156*ab9e68a2SToomas Soome dist += (unsigned)hold & ((1U << op) - 1); 157*ab9e68a2SToomas Soome #ifdef INFLATE_STRICT 158*ab9e68a2SToomas Soome if (dist > dmax) { 159*ab9e68a2SToomas Soome strm->msg = (char *)"invalid distance too far back"; 160*ab9e68a2SToomas Soome state->mode = BAD; 161*ab9e68a2SToomas Soome break; 162*ab9e68a2SToomas Soome } 163*ab9e68a2SToomas Soome #endif 164*ab9e68a2SToomas Soome hold >>= op; 165*ab9e68a2SToomas Soome bits -= op; 166*ab9e68a2SToomas Soome Tracevv((stderr, "inflate: distance %u\n", dist)); 167*ab9e68a2SToomas Soome op = (unsigned)(out - beg); /* max distance in output */ 168*ab9e68a2SToomas Soome if (dist > op) { /* see if copy from window */ 169*ab9e68a2SToomas Soome op = dist - op; /* distance back in window */ 170*ab9e68a2SToomas Soome if (op > whave) { 171*ab9e68a2SToomas Soome if (state->sane) { 172*ab9e68a2SToomas Soome strm->msg = 173*ab9e68a2SToomas Soome (char *)"invalid distance too far back"; 174*ab9e68a2SToomas Soome state->mode = BAD; 175*ab9e68a2SToomas Soome break; 176*ab9e68a2SToomas Soome } 177*ab9e68a2SToomas Soome #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR 178*ab9e68a2SToomas Soome if (len <= op - whave) { 179*ab9e68a2SToomas Soome do { 180*ab9e68a2SToomas Soome *out++ = 0; 181*ab9e68a2SToomas Soome } while (--len); 182*ab9e68a2SToomas Soome continue; 183*ab9e68a2SToomas Soome } 184*ab9e68a2SToomas Soome len -= op - whave; 185*ab9e68a2SToomas Soome do { 186*ab9e68a2SToomas Soome *out++ = 0; 187*ab9e68a2SToomas Soome } while (--op > whave); 188*ab9e68a2SToomas Soome if (op == 0) { 189*ab9e68a2SToomas Soome from = out - dist; 190*ab9e68a2SToomas Soome do { 191*ab9e68a2SToomas Soome *out++ = *from++; 192*ab9e68a2SToomas Soome } while (--len); 193*ab9e68a2SToomas Soome continue; 194*ab9e68a2SToomas Soome } 195*ab9e68a2SToomas Soome #endif 196*ab9e68a2SToomas Soome } 197*ab9e68a2SToomas Soome from = window; 198*ab9e68a2SToomas Soome if (wnext == 0) { /* very common case */ 199*ab9e68a2SToomas Soome from += wsize - op; 200*ab9e68a2SToomas Soome if (op < len) { /* some from window */ 201*ab9e68a2SToomas Soome len -= op; 202*ab9e68a2SToomas Soome do { 203*ab9e68a2SToomas Soome *out++ = *from++; 204*ab9e68a2SToomas Soome } while (--op); 205*ab9e68a2SToomas Soome from = out - dist; /* rest from output */ 206*ab9e68a2SToomas Soome } 207*ab9e68a2SToomas Soome } 208*ab9e68a2SToomas Soome else if (wnext < op) { /* wrap around window */ 209*ab9e68a2SToomas Soome from += wsize + wnext - op; 210*ab9e68a2SToomas Soome op -= wnext; 211*ab9e68a2SToomas Soome if (op < len) { /* some from end of window */ 212*ab9e68a2SToomas Soome len -= op; 213*ab9e68a2SToomas Soome do { 214*ab9e68a2SToomas Soome *out++ = *from++; 215*ab9e68a2SToomas Soome } while (--op); 216*ab9e68a2SToomas Soome from = window; 217*ab9e68a2SToomas Soome if (wnext < len) { /* some from start of window */ 218*ab9e68a2SToomas Soome op = wnext; 219*ab9e68a2SToomas Soome len -= op; 220*ab9e68a2SToomas Soome do { 221*ab9e68a2SToomas Soome *out++ = *from++; 222*ab9e68a2SToomas Soome } while (--op); 223*ab9e68a2SToomas Soome from = out - dist; /* rest from output */ 224*ab9e68a2SToomas Soome } 225*ab9e68a2SToomas Soome } 226*ab9e68a2SToomas Soome } 227*ab9e68a2SToomas Soome else { /* contiguous in window */ 228*ab9e68a2SToomas Soome from += wnext - op; 229*ab9e68a2SToomas Soome if (op < len) { /* some from window */ 230*ab9e68a2SToomas Soome len -= op; 231*ab9e68a2SToomas Soome do { 232*ab9e68a2SToomas Soome *out++ = *from++; 233*ab9e68a2SToomas Soome } while (--op); 234*ab9e68a2SToomas Soome from = out - dist; /* rest from output */ 235*ab9e68a2SToomas Soome } 236*ab9e68a2SToomas Soome } 237*ab9e68a2SToomas Soome while (len > 2) { 238*ab9e68a2SToomas Soome *out++ = *from++; 239*ab9e68a2SToomas Soome *out++ = *from++; 240*ab9e68a2SToomas Soome *out++ = *from++; 241*ab9e68a2SToomas Soome len -= 3; 242*ab9e68a2SToomas Soome } 243*ab9e68a2SToomas Soome if (len) { 244*ab9e68a2SToomas Soome *out++ = *from++; 245*ab9e68a2SToomas Soome if (len > 1) 246*ab9e68a2SToomas Soome *out++ = *from++; 247*ab9e68a2SToomas Soome } 248*ab9e68a2SToomas Soome } 249*ab9e68a2SToomas Soome else { 250*ab9e68a2SToomas Soome from = out - dist; /* copy direct from output */ 251*ab9e68a2SToomas Soome do { /* minimum length is three */ 252*ab9e68a2SToomas Soome *out++ = *from++; 253*ab9e68a2SToomas Soome *out++ = *from++; 254*ab9e68a2SToomas Soome *out++ = *from++; 255*ab9e68a2SToomas Soome len -= 3; 256*ab9e68a2SToomas Soome } while (len > 2); 257*ab9e68a2SToomas Soome if (len) { 258*ab9e68a2SToomas Soome *out++ = *from++; 259*ab9e68a2SToomas Soome if (len > 1) 260*ab9e68a2SToomas Soome *out++ = *from++; 261*ab9e68a2SToomas Soome } 262*ab9e68a2SToomas Soome } 263*ab9e68a2SToomas Soome } 264*ab9e68a2SToomas Soome else if ((op & 64) == 0) { /* 2nd level distance code */ 265*ab9e68a2SToomas Soome here = dcode[here.val + (hold & ((1U << op) - 1))]; 266*ab9e68a2SToomas Soome goto dodist; 267*ab9e68a2SToomas Soome } 268*ab9e68a2SToomas Soome else { 269*ab9e68a2SToomas Soome strm->msg = (char *)"invalid distance code"; 270*ab9e68a2SToomas Soome state->mode = BAD; 271*ab9e68a2SToomas Soome break; 272*ab9e68a2SToomas Soome } 273*ab9e68a2SToomas Soome } 274*ab9e68a2SToomas Soome else if ((op & 64) == 0) { /* 2nd level length code */ 275*ab9e68a2SToomas Soome here = lcode[here.val + (hold & ((1U << op) - 1))]; 276*ab9e68a2SToomas Soome goto dolen; 277*ab9e68a2SToomas Soome } 278*ab9e68a2SToomas Soome else if (op & 32) { /* end-of-block */ 279*ab9e68a2SToomas Soome Tracevv((stderr, "inflate: end of block\n")); 280*ab9e68a2SToomas Soome state->mode = TYPE; 281*ab9e68a2SToomas Soome break; 282*ab9e68a2SToomas Soome } 283*ab9e68a2SToomas Soome else { 284*ab9e68a2SToomas Soome strm->msg = (char *)"invalid literal/length code"; 285*ab9e68a2SToomas Soome state->mode = BAD; 286*ab9e68a2SToomas Soome break; 287*ab9e68a2SToomas Soome } 288*ab9e68a2SToomas Soome } while (in < last && out < end); 289*ab9e68a2SToomas Soome 290*ab9e68a2SToomas Soome /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 291*ab9e68a2SToomas Soome len = bits >> 3; 292*ab9e68a2SToomas Soome in -= len; 293*ab9e68a2SToomas Soome bits -= len << 3; 294*ab9e68a2SToomas Soome hold &= (1U << bits) - 1; 295*ab9e68a2SToomas Soome 296*ab9e68a2SToomas Soome /* update state and return */ 297*ab9e68a2SToomas Soome strm->next_in = in; 298*ab9e68a2SToomas Soome strm->next_out = out; 299*ab9e68a2SToomas Soome strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); 300*ab9e68a2SToomas Soome strm->avail_out = (unsigned)(out < end ? 301*ab9e68a2SToomas Soome 257 + (end - out) : 257 - (out - end)); 302*ab9e68a2SToomas Soome state->hold = hold; 303*ab9e68a2SToomas Soome state->bits = bits; 304*ab9e68a2SToomas Soome return; 305*ab9e68a2SToomas Soome } 306*ab9e68a2SToomas Soome 307*ab9e68a2SToomas Soome /* 308*ab9e68a2SToomas Soome inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): 309*ab9e68a2SToomas Soome - Using bit fields for code structure 310*ab9e68a2SToomas Soome - Different op definition to avoid & for extra bits (do & for table bits) 311*ab9e68a2SToomas Soome - Three separate decoding do-loops for direct, window, and wnext == 0 312*ab9e68a2SToomas Soome - Special case for distance > 1 copies to do overlapped load and store copy 313*ab9e68a2SToomas Soome - Explicit branch predictions (based on measured branch probabilities) 314*ab9e68a2SToomas Soome - Deferring match copy and interspersed it with decoding subsequent codes 315*ab9e68a2SToomas Soome - Swapping literal/length else 316*ab9e68a2SToomas Soome - Swapping window/direct else 317*ab9e68a2SToomas Soome - Larger unrolled copy loops (three is about right) 318*ab9e68a2SToomas Soome - Moving len -= 3 statement into middle of loop 319*ab9e68a2SToomas Soome */ 320*ab9e68a2SToomas Soome 321*ab9e68a2SToomas Soome #endif /* !ASMINF */ 322