1*ab9e68a2SToomas Soome /* inftrees.c -- generate Huffman trees for efficient decoding 2*ab9e68a2SToomas Soome * Copyright (C) 1995-2017 Mark Adler 3*ab9e68a2SToomas Soome * For conditions of distribution and use, see copyright notice in zlib.h 4*ab9e68a2SToomas Soome */ 5*ab9e68a2SToomas Soome 6*ab9e68a2SToomas Soome #include "zutil.h" 7*ab9e68a2SToomas Soome #include "inftrees.h" 8*ab9e68a2SToomas Soome 9*ab9e68a2SToomas Soome #define MAXBITS 15 10*ab9e68a2SToomas Soome 11*ab9e68a2SToomas Soome const char inflate_copyright[] = 12*ab9e68a2SToomas Soome " inflate 1.2.11 Copyright 1995-2017 Mark Adler "; 13*ab9e68a2SToomas Soome /* 14*ab9e68a2SToomas Soome If you use the zlib library in a product, an acknowledgment is welcome 15*ab9e68a2SToomas Soome in the documentation of your product. If for some reason you cannot 16*ab9e68a2SToomas Soome include such an acknowledgment, I would appreciate that you keep this 17*ab9e68a2SToomas Soome copyright string in the executable of your product. 18*ab9e68a2SToomas Soome */ 19*ab9e68a2SToomas Soome 20*ab9e68a2SToomas Soome /* 21*ab9e68a2SToomas Soome Build a set of tables to decode the provided canonical Huffman code. 22*ab9e68a2SToomas Soome The code lengths are lens[0..codes-1]. The result starts at *table, 23*ab9e68a2SToomas Soome whose indices are 0..2^bits-1. work is a writable array of at least 24*ab9e68a2SToomas Soome lens shorts, which is used as a work area. type is the type of code 25*ab9e68a2SToomas Soome to be generated, CODES, LENS, or DISTS. On return, zero is success, 26*ab9e68a2SToomas Soome -1 is an invalid code, and +1 means that ENOUGH isn't enough. table 27*ab9e68a2SToomas Soome on return points to the next available entry's address. bits is the 28*ab9e68a2SToomas Soome requested root table index bits, and on return it is the actual root 29*ab9e68a2SToomas Soome table index bits. It will differ if the request is greater than the 30*ab9e68a2SToomas Soome longest code or if it is less than the shortest code. 31*ab9e68a2SToomas Soome */ 32*ab9e68a2SToomas Soome int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens, 33*ab9e68a2SToomas Soome unsigned codes, code FAR * FAR *table, unsigned FAR *bits, 34*ab9e68a2SToomas Soome unsigned short FAR *work) 35*ab9e68a2SToomas Soome { 36*ab9e68a2SToomas Soome unsigned len; /* a code's length in bits */ 37*ab9e68a2SToomas Soome unsigned sym; /* index of code symbols */ 38*ab9e68a2SToomas Soome unsigned min, max; /* minimum and maximum code lengths */ 39*ab9e68a2SToomas Soome unsigned root; /* number of index bits for root table */ 40*ab9e68a2SToomas Soome unsigned curr; /* number of index bits for current table */ 41*ab9e68a2SToomas Soome unsigned drop; /* code bits to drop for sub-table */ 42*ab9e68a2SToomas Soome int left; /* number of prefix codes available */ 43*ab9e68a2SToomas Soome unsigned used; /* code entries in table used */ 44*ab9e68a2SToomas Soome unsigned huff; /* Huffman code */ 45*ab9e68a2SToomas Soome unsigned incr; /* for incrementing code, index */ 46*ab9e68a2SToomas Soome unsigned fill; /* index for replicating entries */ 47*ab9e68a2SToomas Soome unsigned low; /* low bits for current root entry */ 48*ab9e68a2SToomas Soome unsigned mask; /* mask for low root bits */ 49*ab9e68a2SToomas Soome code here; /* table entry for duplication */ 50*ab9e68a2SToomas Soome code FAR *next; /* next available space in table */ 51*ab9e68a2SToomas Soome const unsigned short FAR *base; /* base value table to use */ 52*ab9e68a2SToomas Soome const unsigned short FAR *extra; /* extra bits table to use */ 53*ab9e68a2SToomas Soome unsigned match; /* use base and extra for symbol >= match */ 54*ab9e68a2SToomas Soome unsigned short count[MAXBITS+1]; /* number of codes of each length */ 55*ab9e68a2SToomas Soome unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ 56*ab9e68a2SToomas Soome static const unsigned short lbase[31] = { /* Length codes 257..285 base */ 57*ab9e68a2SToomas Soome 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 58*ab9e68a2SToomas Soome 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; 59*ab9e68a2SToomas Soome static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 60*ab9e68a2SToomas Soome 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 61*ab9e68a2SToomas Soome 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; 62*ab9e68a2SToomas Soome static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ 63*ab9e68a2SToomas Soome 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 64*ab9e68a2SToomas Soome 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 65*ab9e68a2SToomas Soome 8193, 12289, 16385, 24577, 0, 0}; 66*ab9e68a2SToomas Soome static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ 67*ab9e68a2SToomas Soome 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 68*ab9e68a2SToomas Soome 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 69*ab9e68a2SToomas Soome 28, 28, 29, 29, 64, 64}; 70*ab9e68a2SToomas Soome 71*ab9e68a2SToomas Soome /* 72*ab9e68a2SToomas Soome Process a set of code lengths to create a canonical Huffman code. The 73*ab9e68a2SToomas Soome code lengths are lens[0..codes-1]. Each length corresponds to the 74*ab9e68a2SToomas Soome symbols 0..codes-1. The Huffman code is generated by first sorting the 75*ab9e68a2SToomas Soome symbols by length from short to long, and retaining the symbol order 76*ab9e68a2SToomas Soome for codes with equal lengths. Then the code starts with all zero bits 77*ab9e68a2SToomas Soome for the first code of the shortest length, and the codes are integer 78*ab9e68a2SToomas Soome increments for the same length, and zeros are appended as the length 79*ab9e68a2SToomas Soome increases. For the deflate format, these bits are stored backwards 80*ab9e68a2SToomas Soome from their more natural integer increment ordering, and so when the 81*ab9e68a2SToomas Soome decoding tables are built in the large loop below, the integer codes 82*ab9e68a2SToomas Soome are incremented backwards. 83*ab9e68a2SToomas Soome 84*ab9e68a2SToomas Soome This routine assumes, but does not check, that all of the entries in 85*ab9e68a2SToomas Soome lens[] are in the range 0..MAXBITS. The caller must assure this. 86*ab9e68a2SToomas Soome 1..MAXBITS is interpreted as that code length. zero means that that 87*ab9e68a2SToomas Soome symbol does not occur in this code. 88*ab9e68a2SToomas Soome 89*ab9e68a2SToomas Soome The codes are sorted by computing a count of codes for each length, 90*ab9e68a2SToomas Soome creating from that a table of starting indices for each length in the 91*ab9e68a2SToomas Soome sorted table, and then entering the symbols in order in the sorted 92*ab9e68a2SToomas Soome table. The sorted table is work[], with that space being provided by 93*ab9e68a2SToomas Soome the caller. 94*ab9e68a2SToomas Soome 95*ab9e68a2SToomas Soome The length counts are used for other purposes as well, i.e. finding 96*ab9e68a2SToomas Soome the minimum and maximum length codes, determining if there are any 97*ab9e68a2SToomas Soome codes at all, checking for a valid set of lengths, and looking ahead 98*ab9e68a2SToomas Soome at length counts to determine sub-table sizes when building the 99*ab9e68a2SToomas Soome decoding tables. 100*ab9e68a2SToomas Soome */ 101*ab9e68a2SToomas Soome 102*ab9e68a2SToomas Soome /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ 103*ab9e68a2SToomas Soome for (len = 0; len <= MAXBITS; len++) 104*ab9e68a2SToomas Soome count[len] = 0; 105*ab9e68a2SToomas Soome for (sym = 0; sym < codes; sym++) 106*ab9e68a2SToomas Soome count[lens[sym]]++; 107*ab9e68a2SToomas Soome 108*ab9e68a2SToomas Soome /* bound code lengths, force root to be within code lengths */ 109*ab9e68a2SToomas Soome root = *bits; 110*ab9e68a2SToomas Soome for (max = MAXBITS; max >= 1; max--) 111*ab9e68a2SToomas Soome if (count[max] != 0) break; 112*ab9e68a2SToomas Soome if (root > max) root = max; 113*ab9e68a2SToomas Soome if (max == 0) { /* no symbols to code at all */ 114*ab9e68a2SToomas Soome here.op = (unsigned char)64; /* invalid code marker */ 115*ab9e68a2SToomas Soome here.bits = (unsigned char)1; 116*ab9e68a2SToomas Soome here.val = (unsigned short)0; 117*ab9e68a2SToomas Soome *(*table)++ = here; /* make a table to force an error */ 118*ab9e68a2SToomas Soome *(*table)++ = here; 119*ab9e68a2SToomas Soome *bits = 1; 120*ab9e68a2SToomas Soome return 0; /* no symbols, but wait for decoding to report error */ 121*ab9e68a2SToomas Soome } 122*ab9e68a2SToomas Soome for (min = 1; min < max; min++) 123*ab9e68a2SToomas Soome if (count[min] != 0) break; 124*ab9e68a2SToomas Soome if (root < min) root = min; 125*ab9e68a2SToomas Soome 126*ab9e68a2SToomas Soome /* check for an over-subscribed or incomplete set of lengths */ 127*ab9e68a2SToomas Soome left = 1; 128*ab9e68a2SToomas Soome for (len = 1; len <= MAXBITS; len++) { 129*ab9e68a2SToomas Soome left <<= 1; 130*ab9e68a2SToomas Soome left -= count[len]; 131*ab9e68a2SToomas Soome if (left < 0) return -1; /* over-subscribed */ 132*ab9e68a2SToomas Soome } 133*ab9e68a2SToomas Soome if (left > 0 && (type == CODES || max != 1)) 134*ab9e68a2SToomas Soome return -1; /* incomplete set */ 135*ab9e68a2SToomas Soome 136*ab9e68a2SToomas Soome /* generate offsets into symbol table for each length for sorting */ 137*ab9e68a2SToomas Soome offs[1] = 0; 138*ab9e68a2SToomas Soome for (len = 1; len < MAXBITS; len++) 139*ab9e68a2SToomas Soome offs[len + 1] = offs[len] + count[len]; 140*ab9e68a2SToomas Soome 141*ab9e68a2SToomas Soome /* sort symbols by length, by symbol order within each length */ 142*ab9e68a2SToomas Soome for (sym = 0; sym < codes; sym++) 143*ab9e68a2SToomas Soome if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; 144*ab9e68a2SToomas Soome 145*ab9e68a2SToomas Soome /* 146*ab9e68a2SToomas Soome Create and fill in decoding tables. In this loop, the table being 147*ab9e68a2SToomas Soome filled is at next and has curr index bits. The code being used is huff 148*ab9e68a2SToomas Soome with length len. That code is converted to an index by dropping drop 149*ab9e68a2SToomas Soome bits off of the bottom. For codes where len is less than drop + curr, 150*ab9e68a2SToomas Soome those top drop + curr - len bits are incremented through all values to 151*ab9e68a2SToomas Soome fill the table with replicated entries. 152*ab9e68a2SToomas Soome 153*ab9e68a2SToomas Soome root is the number of index bits for the root table. When len exceeds 154*ab9e68a2SToomas Soome root, sub-tables are created pointed to by the root entry with an index 155*ab9e68a2SToomas Soome of the low root bits of huff. This is saved in low to check for when a 156*ab9e68a2SToomas Soome new sub-table should be started. drop is zero when the root table is 157*ab9e68a2SToomas Soome being filled, and drop is root when sub-tables are being filled. 158*ab9e68a2SToomas Soome 159*ab9e68a2SToomas Soome When a new sub-table is needed, it is necessary to look ahead in the 160*ab9e68a2SToomas Soome code lengths to determine what size sub-table is needed. The length 161*ab9e68a2SToomas Soome counts are used for this, and so count[] is decremented as codes are 162*ab9e68a2SToomas Soome entered in the tables. 163*ab9e68a2SToomas Soome 164*ab9e68a2SToomas Soome used keeps track of how many table entries have been allocated from the 165*ab9e68a2SToomas Soome provided *table space. It is checked for LENS and DIST tables against 166*ab9e68a2SToomas Soome the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in 167*ab9e68a2SToomas Soome the initial root table size constants. See the comments in inftrees.h 168*ab9e68a2SToomas Soome for more information. 169*ab9e68a2SToomas Soome 170*ab9e68a2SToomas Soome sym increments through all symbols, and the loop terminates when 171*ab9e68a2SToomas Soome all codes of length max, i.e. all codes, have been processed. This 172*ab9e68a2SToomas Soome routine permits incomplete codes, so another loop after this one fills 173*ab9e68a2SToomas Soome in the rest of the decoding tables with invalid code markers. 174*ab9e68a2SToomas Soome */ 175*ab9e68a2SToomas Soome 176*ab9e68a2SToomas Soome /* set up for code type */ 177*ab9e68a2SToomas Soome switch (type) { 178*ab9e68a2SToomas Soome case CODES: 179*ab9e68a2SToomas Soome base = extra = work; /* dummy value--not used */ 180*ab9e68a2SToomas Soome match = 20; 181*ab9e68a2SToomas Soome break; 182*ab9e68a2SToomas Soome case LENS: 183*ab9e68a2SToomas Soome base = lbase; 184*ab9e68a2SToomas Soome extra = lext; 185*ab9e68a2SToomas Soome match = 257; 186*ab9e68a2SToomas Soome break; 187*ab9e68a2SToomas Soome default: /* DISTS */ 188*ab9e68a2SToomas Soome base = dbase; 189*ab9e68a2SToomas Soome extra = dext; 190*ab9e68a2SToomas Soome match = 0; 191*ab9e68a2SToomas Soome } 192*ab9e68a2SToomas Soome 193*ab9e68a2SToomas Soome /* initialize state for loop */ 194*ab9e68a2SToomas Soome huff = 0; /* starting code */ 195*ab9e68a2SToomas Soome sym = 0; /* starting code symbol */ 196*ab9e68a2SToomas Soome len = min; /* starting code length */ 197*ab9e68a2SToomas Soome next = *table; /* current table to fill in */ 198*ab9e68a2SToomas Soome curr = root; /* current table index bits */ 199*ab9e68a2SToomas Soome drop = 0; /* current bits to drop from code for index */ 200*ab9e68a2SToomas Soome low = (unsigned)(-1); /* trigger new sub-table when len > root */ 201*ab9e68a2SToomas Soome used = 1U << root; /* use root table entries */ 202*ab9e68a2SToomas Soome mask = used - 1; /* mask for comparing low */ 203*ab9e68a2SToomas Soome 204*ab9e68a2SToomas Soome /* check available table space */ 205*ab9e68a2SToomas Soome if ((type == LENS && used > ENOUGH_LENS) || 206*ab9e68a2SToomas Soome (type == DISTS && used > ENOUGH_DISTS)) 207*ab9e68a2SToomas Soome return 1; 208*ab9e68a2SToomas Soome 209*ab9e68a2SToomas Soome /* process all codes and make table entries */ 210*ab9e68a2SToomas Soome for (;;) { 211*ab9e68a2SToomas Soome /* create table entry */ 212*ab9e68a2SToomas Soome here.bits = (unsigned char)(len - drop); 213*ab9e68a2SToomas Soome if (work[sym] + 1U < match) { 214*ab9e68a2SToomas Soome here.op = (unsigned char)0; 215*ab9e68a2SToomas Soome here.val = work[sym]; 216*ab9e68a2SToomas Soome } 217*ab9e68a2SToomas Soome else if (work[sym] >= match) { 218*ab9e68a2SToomas Soome here.op = (unsigned char)(extra[work[sym] - match]); 219*ab9e68a2SToomas Soome here.val = base[work[sym] - match]; 220*ab9e68a2SToomas Soome } 221*ab9e68a2SToomas Soome else { 222*ab9e68a2SToomas Soome here.op = (unsigned char)(32 + 64); /* end of block */ 223*ab9e68a2SToomas Soome here.val = 0; 224*ab9e68a2SToomas Soome } 225*ab9e68a2SToomas Soome 226*ab9e68a2SToomas Soome /* replicate for those indices with low len bits equal to huff */ 227*ab9e68a2SToomas Soome incr = 1U << (len - drop); 228*ab9e68a2SToomas Soome fill = 1U << curr; 229*ab9e68a2SToomas Soome min = fill; /* save offset to next table */ 230*ab9e68a2SToomas Soome do { 231*ab9e68a2SToomas Soome fill -= incr; 232*ab9e68a2SToomas Soome next[(huff >> drop) + fill] = here; 233*ab9e68a2SToomas Soome } while (fill != 0); 234*ab9e68a2SToomas Soome 235*ab9e68a2SToomas Soome /* backwards increment the len-bit code huff */ 236*ab9e68a2SToomas Soome incr = 1U << (len - 1); 237*ab9e68a2SToomas Soome while (huff & incr) 238*ab9e68a2SToomas Soome incr >>= 1; 239*ab9e68a2SToomas Soome if (incr != 0) { 240*ab9e68a2SToomas Soome huff &= incr - 1; 241*ab9e68a2SToomas Soome huff += incr; 242*ab9e68a2SToomas Soome } 243*ab9e68a2SToomas Soome else 244*ab9e68a2SToomas Soome huff = 0; 245*ab9e68a2SToomas Soome 246*ab9e68a2SToomas Soome /* go to next symbol, update count, len */ 247*ab9e68a2SToomas Soome sym++; 248*ab9e68a2SToomas Soome if (--(count[len]) == 0) { 249*ab9e68a2SToomas Soome if (len == max) break; 250*ab9e68a2SToomas Soome len = lens[work[sym]]; 251*ab9e68a2SToomas Soome } 252*ab9e68a2SToomas Soome 253*ab9e68a2SToomas Soome /* create new sub-table if needed */ 254*ab9e68a2SToomas Soome if (len > root && (huff & mask) != low) { 255*ab9e68a2SToomas Soome /* if first time, transition to sub-tables */ 256*ab9e68a2SToomas Soome if (drop == 0) 257*ab9e68a2SToomas Soome drop = root; 258*ab9e68a2SToomas Soome 259*ab9e68a2SToomas Soome /* increment past last table */ 260*ab9e68a2SToomas Soome next += min; /* here min is 1 << curr */ 261*ab9e68a2SToomas Soome 262*ab9e68a2SToomas Soome /* determine length of next table */ 263*ab9e68a2SToomas Soome curr = len - drop; 264*ab9e68a2SToomas Soome left = (int)(1 << curr); 265*ab9e68a2SToomas Soome while (curr + drop < max) { 266*ab9e68a2SToomas Soome left -= count[curr + drop]; 267*ab9e68a2SToomas Soome if (left <= 0) break; 268*ab9e68a2SToomas Soome curr++; 269*ab9e68a2SToomas Soome left <<= 1; 270*ab9e68a2SToomas Soome } 271*ab9e68a2SToomas Soome 272*ab9e68a2SToomas Soome /* check for enough space */ 273*ab9e68a2SToomas Soome used += 1U << curr; 274*ab9e68a2SToomas Soome if ((type == LENS && used > ENOUGH_LENS) || 275*ab9e68a2SToomas Soome (type == DISTS && used > ENOUGH_DISTS)) 276*ab9e68a2SToomas Soome return 1; 277*ab9e68a2SToomas Soome 278*ab9e68a2SToomas Soome /* point entry in root table to sub-table */ 279*ab9e68a2SToomas Soome low = huff & mask; 280*ab9e68a2SToomas Soome (*table)[low].op = (unsigned char)curr; 281*ab9e68a2SToomas Soome (*table)[low].bits = (unsigned char)root; 282*ab9e68a2SToomas Soome (*table)[low].val = (unsigned short)(next - *table); 283*ab9e68a2SToomas Soome } 284*ab9e68a2SToomas Soome } 285*ab9e68a2SToomas Soome 286*ab9e68a2SToomas Soome /* fill in remaining table entry if code is incomplete (guaranteed to have 287*ab9e68a2SToomas Soome at most one remaining entry, since if the code is incomplete, the 288*ab9e68a2SToomas Soome maximum code length that was allowed to get this far is one bit) */ 289*ab9e68a2SToomas Soome if (huff != 0) { 290*ab9e68a2SToomas Soome here.op = (unsigned char)64; /* invalid code marker */ 291*ab9e68a2SToomas Soome here.bits = (unsigned char)(len - drop); 292*ab9e68a2SToomas Soome here.val = (unsigned short)0; 293*ab9e68a2SToomas Soome next[huff] = here; 294*ab9e68a2SToomas Soome } 295*ab9e68a2SToomas Soome 296*ab9e68a2SToomas Soome /* set return parameters */ 297*ab9e68a2SToomas Soome *table += used; 298*ab9e68a2SToomas Soome *bits = root; 299*ab9e68a2SToomas Soome return 0; 300*ab9e68a2SToomas Soome } 301