1*c9083b85SXin LI /* inffast.c -- fast decoding 2*c9083b85SXin LI * Copyright (C) 1995-2017 Mark Adler 3*c9083b85SXin LI * For conditions of distribution and use, see copyright notice in zlib.h 4*c9083b85SXin LI */ 5*c9083b85SXin LI 6*c9083b85SXin LI #include "zutil.h" 7*c9083b85SXin LI #include "inftrees.h" 8*c9083b85SXin LI #include "inflate.h" 9*c9083b85SXin LI #include "inffast.h" 10*c9083b85SXin LI 11*c9083b85SXin LI #ifdef ASMINF 12*c9083b85SXin LI # pragma message("Assembler code may have bugs -- use at your own risk") 13*c9083b85SXin LI #else 14*c9083b85SXin LI 15*c9083b85SXin LI /* 16*c9083b85SXin LI Decode literal, length, and distance codes and write out the resulting 17*c9083b85SXin LI literal and match bytes until either not enough input or output is 18*c9083b85SXin LI available, an end-of-block is encountered, or a data error is encountered. 19*c9083b85SXin LI When large enough input and output buffers are supplied to inflate(), for 20*c9083b85SXin LI example, a 16K input buffer and a 64K output buffer, more than 95% of the 21*c9083b85SXin LI inflate execution time is spent in this routine. 22*c9083b85SXin LI 23*c9083b85SXin LI Entry assumptions: 24*c9083b85SXin LI 25*c9083b85SXin LI state->mode == LEN 26*c9083b85SXin LI strm->avail_in >= 6 27*c9083b85SXin LI strm->avail_out >= 258 28*c9083b85SXin LI start >= strm->avail_out 29*c9083b85SXin LI state->bits < 8 30*c9083b85SXin LI 31*c9083b85SXin LI On return, state->mode is one of: 32*c9083b85SXin LI 33*c9083b85SXin LI LEN -- ran out of enough output space or enough available input 34*c9083b85SXin LI TYPE -- reached end of block code, inflate() to interpret next block 35*c9083b85SXin LI BAD -- error in block data 36*c9083b85SXin LI 37*c9083b85SXin LI Notes: 38*c9083b85SXin LI 39*c9083b85SXin LI - The maximum input bits used by a length/distance pair is 15 bits for the 40*c9083b85SXin LI length code, 5 bits for the length extra, 15 bits for the distance code, 41*c9083b85SXin LI and 13 bits for the distance extra. This totals 48 bits, or six bytes. 42*c9083b85SXin LI Therefore if strm->avail_in >= 6, then there is enough input to avoid 43*c9083b85SXin LI checking for available input while decoding. 44*c9083b85SXin LI 45*c9083b85SXin LI - The maximum bytes that a single length/distance pair can output is 258 46*c9083b85SXin LI bytes, which is the maximum length that can be coded. inflate_fast() 47*c9083b85SXin LI requires strm->avail_out >= 258 for each loop to avoid checking for 48*c9083b85SXin LI output space. 49*c9083b85SXin LI */ 50*c9083b85SXin LI void ZLIB_INTERNAL inflate_fast(strm, start) 51*c9083b85SXin LI z_streamp strm; 52*c9083b85SXin LI unsigned start; /* inflate()'s starting value for strm->avail_out */ 53*c9083b85SXin LI { 54*c9083b85SXin LI struct inflate_state FAR *state; 55*c9083b85SXin LI z_const unsigned char FAR *in; /* local strm->next_in */ 56*c9083b85SXin LI z_const unsigned char FAR *last; /* have enough input while in < last */ 57*c9083b85SXin LI unsigned char FAR *out; /* local strm->next_out */ 58*c9083b85SXin LI unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ 59*c9083b85SXin LI unsigned char FAR *end; /* while out < end, enough space available */ 60*c9083b85SXin LI #ifdef INFLATE_STRICT 61*c9083b85SXin LI unsigned dmax; /* maximum distance from zlib header */ 62*c9083b85SXin LI #endif 63*c9083b85SXin LI unsigned wsize; /* window size or zero if not using window */ 64*c9083b85SXin LI unsigned whave; /* valid bytes in the window */ 65*c9083b85SXin LI unsigned wnext; /* window write index */ 66*c9083b85SXin LI unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ 67*c9083b85SXin LI unsigned long hold; /* local strm->hold */ 68*c9083b85SXin LI unsigned bits; /* local strm->bits */ 69*c9083b85SXin LI code const FAR *lcode; /* local strm->lencode */ 70*c9083b85SXin LI code const FAR *dcode; /* local strm->distcode */ 71*c9083b85SXin LI unsigned lmask; /* mask for first level of length codes */ 72*c9083b85SXin LI unsigned dmask; /* mask for first level of distance codes */ 73*c9083b85SXin LI code here; /* retrieved table entry */ 74*c9083b85SXin LI unsigned op; /* code bits, operation, extra bits, or */ 75*c9083b85SXin LI /* window position, window bytes to copy */ 76*c9083b85SXin LI unsigned len; /* match length, unused bytes */ 77*c9083b85SXin LI unsigned dist; /* match distance */ 78*c9083b85SXin LI unsigned char FAR *from; /* where to copy match from */ 79*c9083b85SXin LI 80*c9083b85SXin LI /* copy state to local variables */ 81*c9083b85SXin LI state = (struct inflate_state FAR *)strm->state; 82*c9083b85SXin LI in = strm->next_in; 83*c9083b85SXin LI last = in + (strm->avail_in - 5); 84*c9083b85SXin LI out = strm->next_out; 85*c9083b85SXin LI beg = out - (start - strm->avail_out); 86*c9083b85SXin LI end = out + (strm->avail_out - 257); 87*c9083b85SXin LI #ifdef INFLATE_STRICT 88*c9083b85SXin LI dmax = state->dmax; 89*c9083b85SXin LI #endif 90*c9083b85SXin LI wsize = state->wsize; 91*c9083b85SXin LI whave = state->whave; 92*c9083b85SXin LI wnext = state->wnext; 93*c9083b85SXin LI window = state->window; 94*c9083b85SXin LI hold = state->hold; 95*c9083b85SXin LI bits = state->bits; 96*c9083b85SXin LI lcode = state->lencode; 97*c9083b85SXin LI dcode = state->distcode; 98*c9083b85SXin LI lmask = (1U << state->lenbits) - 1; 99*c9083b85SXin LI dmask = (1U << state->distbits) - 1; 100*c9083b85SXin LI 101*c9083b85SXin LI /* decode literals and length/distances until end-of-block or not enough 102*c9083b85SXin LI input data or output space */ 103*c9083b85SXin LI do { 104*c9083b85SXin LI if (bits < 15) { 105*c9083b85SXin LI hold += (unsigned long)(*in++) << bits; 106*c9083b85SXin LI bits += 8; 107*c9083b85SXin LI hold += (unsigned long)(*in++) << bits; 108*c9083b85SXin LI bits += 8; 109*c9083b85SXin LI } 110*c9083b85SXin LI here = lcode[hold & lmask]; 111*c9083b85SXin LI dolen: 112*c9083b85SXin LI op = (unsigned)(here.bits); 113*c9083b85SXin LI hold >>= op; 114*c9083b85SXin LI bits -= op; 115*c9083b85SXin LI op = (unsigned)(here.op); 116*c9083b85SXin LI if (op == 0) { /* literal */ 117*c9083b85SXin LI Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? 118*c9083b85SXin LI "inflate: literal '%c'\n" : 119*c9083b85SXin LI "inflate: literal 0x%02x\n", here.val)); 120*c9083b85SXin LI *out++ = (unsigned char)(here.val); 121*c9083b85SXin LI } 122*c9083b85SXin LI else if (op & 16) { /* length base */ 123*c9083b85SXin LI len = (unsigned)(here.val); 124*c9083b85SXin LI op &= 15; /* number of extra bits */ 125*c9083b85SXin LI if (op) { 126*c9083b85SXin LI if (bits < op) { 127*c9083b85SXin LI hold += (unsigned long)(*in++) << bits; 128*c9083b85SXin LI bits += 8; 129*c9083b85SXin LI } 130*c9083b85SXin LI len += (unsigned)hold & ((1U << op) - 1); 131*c9083b85SXin LI hold >>= op; 132*c9083b85SXin LI bits -= op; 133*c9083b85SXin LI } 134*c9083b85SXin LI Tracevv((stderr, "inflate: length %u\n", len)); 135*c9083b85SXin LI if (bits < 15) { 136*c9083b85SXin LI hold += (unsigned long)(*in++) << bits; 137*c9083b85SXin LI bits += 8; 138*c9083b85SXin LI hold += (unsigned long)(*in++) << bits; 139*c9083b85SXin LI bits += 8; 140*c9083b85SXin LI } 141*c9083b85SXin LI here = dcode[hold & dmask]; 142*c9083b85SXin LI dodist: 143*c9083b85SXin LI op = (unsigned)(here.bits); 144*c9083b85SXin LI hold >>= op; 145*c9083b85SXin LI bits -= op; 146*c9083b85SXin LI op = (unsigned)(here.op); 147*c9083b85SXin LI if (op & 16) { /* distance base */ 148*c9083b85SXin LI dist = (unsigned)(here.val); 149*c9083b85SXin LI op &= 15; /* number of extra bits */ 150*c9083b85SXin LI if (bits < op) { 151*c9083b85SXin LI hold += (unsigned long)(*in++) << bits; 152*c9083b85SXin LI bits += 8; 153*c9083b85SXin LI if (bits < op) { 154*c9083b85SXin LI hold += (unsigned long)(*in++) << bits; 155*c9083b85SXin LI bits += 8; 156*c9083b85SXin LI } 157*c9083b85SXin LI } 158*c9083b85SXin LI dist += (unsigned)hold & ((1U << op) - 1); 159*c9083b85SXin LI #ifdef INFLATE_STRICT 160*c9083b85SXin LI if (dist > dmax) { 161*c9083b85SXin LI strm->msg = (char *)"invalid distance too far back"; 162*c9083b85SXin LI state->mode = BAD; 163*c9083b85SXin LI break; 164*c9083b85SXin LI } 165*c9083b85SXin LI #endif 166*c9083b85SXin LI hold >>= op; 167*c9083b85SXin LI bits -= op; 168*c9083b85SXin LI Tracevv((stderr, "inflate: distance %u\n", dist)); 169*c9083b85SXin LI op = (unsigned)(out - beg); /* max distance in output */ 170*c9083b85SXin LI if (dist > op) { /* see if copy from window */ 171*c9083b85SXin LI op = dist - op; /* distance back in window */ 172*c9083b85SXin LI if (op > whave) { 173*c9083b85SXin LI if (state->sane) { 174*c9083b85SXin LI strm->msg = 175*c9083b85SXin LI (char *)"invalid distance too far back"; 176*c9083b85SXin LI state->mode = BAD; 177*c9083b85SXin LI break; 178*c9083b85SXin LI } 179*c9083b85SXin LI #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR 180*c9083b85SXin LI if (len <= op - whave) { 181*c9083b85SXin LI do { 182*c9083b85SXin LI *out++ = 0; 183*c9083b85SXin LI } while (--len); 184*c9083b85SXin LI continue; 185*c9083b85SXin LI } 186*c9083b85SXin LI len -= op - whave; 187*c9083b85SXin LI do { 188*c9083b85SXin LI *out++ = 0; 189*c9083b85SXin LI } while (--op > whave); 190*c9083b85SXin LI if (op == 0) { 191*c9083b85SXin LI from = out - dist; 192*c9083b85SXin LI do { 193*c9083b85SXin LI *out++ = *from++; 194*c9083b85SXin LI } while (--len); 195*c9083b85SXin LI continue; 196*c9083b85SXin LI } 197*c9083b85SXin LI #endif 198*c9083b85SXin LI } 199*c9083b85SXin LI from = window; 200*c9083b85SXin LI if (wnext == 0) { /* very common case */ 201*c9083b85SXin LI from += wsize - op; 202*c9083b85SXin LI if (op < len) { /* some from window */ 203*c9083b85SXin LI len -= op; 204*c9083b85SXin LI do { 205*c9083b85SXin LI *out++ = *from++; 206*c9083b85SXin LI } while (--op); 207*c9083b85SXin LI from = out - dist; /* rest from output */ 208*c9083b85SXin LI } 209*c9083b85SXin LI } 210*c9083b85SXin LI else if (wnext < op) { /* wrap around window */ 211*c9083b85SXin LI from += wsize + wnext - op; 212*c9083b85SXin LI op -= wnext; 213*c9083b85SXin LI if (op < len) { /* some from end of window */ 214*c9083b85SXin LI len -= op; 215*c9083b85SXin LI do { 216*c9083b85SXin LI *out++ = *from++; 217*c9083b85SXin LI } while (--op); 218*c9083b85SXin LI from = window; 219*c9083b85SXin LI if (wnext < len) { /* some from start of window */ 220*c9083b85SXin LI op = wnext; 221*c9083b85SXin LI len -= op; 222*c9083b85SXin LI do { 223*c9083b85SXin LI *out++ = *from++; 224*c9083b85SXin LI } while (--op); 225*c9083b85SXin LI from = out - dist; /* rest from output */ 226*c9083b85SXin LI } 227*c9083b85SXin LI } 228*c9083b85SXin LI } 229*c9083b85SXin LI else { /* contiguous in window */ 230*c9083b85SXin LI from += wnext - op; 231*c9083b85SXin LI if (op < len) { /* some from window */ 232*c9083b85SXin LI len -= op; 233*c9083b85SXin LI do { 234*c9083b85SXin LI *out++ = *from++; 235*c9083b85SXin LI } while (--op); 236*c9083b85SXin LI from = out - dist; /* rest from output */ 237*c9083b85SXin LI } 238*c9083b85SXin LI } 239*c9083b85SXin LI while (len > 2) { 240*c9083b85SXin LI *out++ = *from++; 241*c9083b85SXin LI *out++ = *from++; 242*c9083b85SXin LI *out++ = *from++; 243*c9083b85SXin LI len -= 3; 244*c9083b85SXin LI } 245*c9083b85SXin LI if (len) { 246*c9083b85SXin LI *out++ = *from++; 247*c9083b85SXin LI if (len > 1) 248*c9083b85SXin LI *out++ = *from++; 249*c9083b85SXin LI } 250*c9083b85SXin LI } 251*c9083b85SXin LI else { 252*c9083b85SXin LI from = out - dist; /* copy direct from output */ 253*c9083b85SXin LI do { /* minimum length is three */ 254*c9083b85SXin LI *out++ = *from++; 255*c9083b85SXin LI *out++ = *from++; 256*c9083b85SXin LI *out++ = *from++; 257*c9083b85SXin LI len -= 3; 258*c9083b85SXin LI } while (len > 2); 259*c9083b85SXin LI if (len) { 260*c9083b85SXin LI *out++ = *from++; 261*c9083b85SXin LI if (len > 1) 262*c9083b85SXin LI *out++ = *from++; 263*c9083b85SXin LI } 264*c9083b85SXin LI } 265*c9083b85SXin LI } 266*c9083b85SXin LI else if ((op & 64) == 0) { /* 2nd level distance code */ 267*c9083b85SXin LI here = dcode[here.val + (hold & ((1U << op) - 1))]; 268*c9083b85SXin LI goto dodist; 269*c9083b85SXin LI } 270*c9083b85SXin LI else { 271*c9083b85SXin LI strm->msg = (char *)"invalid distance code"; 272*c9083b85SXin LI state->mode = BAD; 273*c9083b85SXin LI break; 274*c9083b85SXin LI } 275*c9083b85SXin LI } 276*c9083b85SXin LI else if ((op & 64) == 0) { /* 2nd level length code */ 277*c9083b85SXin LI here = lcode[here.val + (hold & ((1U << op) - 1))]; 278*c9083b85SXin LI goto dolen; 279*c9083b85SXin LI } 280*c9083b85SXin LI else if (op & 32) { /* end-of-block */ 281*c9083b85SXin LI Tracevv((stderr, "inflate: end of block\n")); 282*c9083b85SXin LI state->mode = TYPE; 283*c9083b85SXin LI break; 284*c9083b85SXin LI } 285*c9083b85SXin LI else { 286*c9083b85SXin LI strm->msg = (char *)"invalid literal/length code"; 287*c9083b85SXin LI state->mode = BAD; 288*c9083b85SXin LI break; 289*c9083b85SXin LI } 290*c9083b85SXin LI } while (in < last && out < end); 291*c9083b85SXin LI 292*c9083b85SXin LI /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 293*c9083b85SXin LI len = bits >> 3; 294*c9083b85SXin LI in -= len; 295*c9083b85SXin LI bits -= len << 3; 296*c9083b85SXin LI hold &= (1U << bits) - 1; 297*c9083b85SXin LI 298*c9083b85SXin LI /* update state and return */ 299*c9083b85SXin LI strm->next_in = in; 300*c9083b85SXin LI strm->next_out = out; 301*c9083b85SXin LI strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); 302*c9083b85SXin LI strm->avail_out = (unsigned)(out < end ? 303*c9083b85SXin LI 257 + (end - out) : 257 - (out - end)); 304*c9083b85SXin LI state->hold = hold; 305*c9083b85SXin LI state->bits = bits; 306*c9083b85SXin LI return; 307*c9083b85SXin LI } 308*c9083b85SXin LI 309*c9083b85SXin LI /* 310*c9083b85SXin LI inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): 311*c9083b85SXin LI - Using bit fields for code structure 312*c9083b85SXin LI - Different op definition to avoid & for extra bits (do & for table bits) 313*c9083b85SXin LI - Three separate decoding do-loops for direct, window, and wnext == 0 314*c9083b85SXin LI - Special case for distance > 1 copies to do overlapped load and store copy 315*c9083b85SXin LI - Explicit branch predictions (based on measured branch probabilities) 316*c9083b85SXin LI - Deferring match copy and interspersed it with decoding subsequent codes 317*c9083b85SXin LI - Swapping literal/length else 318*c9083b85SXin LI - Swapping window/direct else 319*c9083b85SXin LI - Larger unrolled copy loops (three is about right) 320*c9083b85SXin LI - Moving len -= 3 statement into middle of loop 321*c9083b85SXin LI */ 322*c9083b85SXin LI 323*c9083b85SXin LI #endif /* !ASMINF */ 324