xref: /titanic_52/usr/src/contrib/zlib/inftrees.c (revision ab9e68a238043b4ee046d74742fe13407f00e542)
1*ab9e68a2SToomas Soome /* inftrees.c -- generate Huffman trees for efficient decoding
2*ab9e68a2SToomas Soome  * Copyright (C) 1995-2017 Mark Adler
3*ab9e68a2SToomas Soome  * For conditions of distribution and use, see copyright notice in zlib.h
4*ab9e68a2SToomas Soome  */
5*ab9e68a2SToomas Soome 
6*ab9e68a2SToomas Soome #include "zutil.h"
7*ab9e68a2SToomas Soome #include "inftrees.h"
8*ab9e68a2SToomas Soome 
9*ab9e68a2SToomas Soome #define MAXBITS 15
10*ab9e68a2SToomas Soome 
11*ab9e68a2SToomas Soome const char inflate_copyright[] =
12*ab9e68a2SToomas Soome    " inflate 1.2.11 Copyright 1995-2017 Mark Adler ";
13*ab9e68a2SToomas Soome /*
14*ab9e68a2SToomas Soome   If you use the zlib library in a product, an acknowledgment is welcome
15*ab9e68a2SToomas Soome   in the documentation of your product. If for some reason you cannot
16*ab9e68a2SToomas Soome   include such an acknowledgment, I would appreciate that you keep this
17*ab9e68a2SToomas Soome   copyright string in the executable of your product.
18*ab9e68a2SToomas Soome  */
19*ab9e68a2SToomas Soome 
20*ab9e68a2SToomas Soome /*
21*ab9e68a2SToomas Soome    Build a set of tables to decode the provided canonical Huffman code.
22*ab9e68a2SToomas Soome    The code lengths are lens[0..codes-1].  The result starts at *table,
23*ab9e68a2SToomas Soome    whose indices are 0..2^bits-1.  work is a writable array of at least
24*ab9e68a2SToomas Soome    lens shorts, which is used as a work area.  type is the type of code
25*ab9e68a2SToomas Soome    to be generated, CODES, LENS, or DISTS.  On return, zero is success,
26*ab9e68a2SToomas Soome    -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
27*ab9e68a2SToomas Soome    on return points to the next available entry's address.  bits is the
28*ab9e68a2SToomas Soome    requested root table index bits, and on return it is the actual root
29*ab9e68a2SToomas Soome    table index bits.  It will differ if the request is greater than the
30*ab9e68a2SToomas Soome    longest code or if it is less than the shortest code.
31*ab9e68a2SToomas Soome  */
32*ab9e68a2SToomas Soome int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
33*ab9e68a2SToomas Soome     unsigned codes, code FAR * FAR *table, unsigned FAR *bits,
34*ab9e68a2SToomas Soome     unsigned short FAR *work)
35*ab9e68a2SToomas Soome {
36*ab9e68a2SToomas Soome     unsigned len;               /* a code's length in bits */
37*ab9e68a2SToomas Soome     unsigned sym;               /* index of code symbols */
38*ab9e68a2SToomas Soome     unsigned min, max;          /* minimum and maximum code lengths */
39*ab9e68a2SToomas Soome     unsigned root;              /* number of index bits for root table */
40*ab9e68a2SToomas Soome     unsigned curr;              /* number of index bits for current table */
41*ab9e68a2SToomas Soome     unsigned drop;              /* code bits to drop for sub-table */
42*ab9e68a2SToomas Soome     int left;                   /* number of prefix codes available */
43*ab9e68a2SToomas Soome     unsigned used;              /* code entries in table used */
44*ab9e68a2SToomas Soome     unsigned huff;              /* Huffman code */
45*ab9e68a2SToomas Soome     unsigned incr;              /* for incrementing code, index */
46*ab9e68a2SToomas Soome     unsigned fill;              /* index for replicating entries */
47*ab9e68a2SToomas Soome     unsigned low;               /* low bits for current root entry */
48*ab9e68a2SToomas Soome     unsigned mask;              /* mask for low root bits */
49*ab9e68a2SToomas Soome     code here;                  /* table entry for duplication */
50*ab9e68a2SToomas Soome     code FAR *next;             /* next available space in table */
51*ab9e68a2SToomas Soome     const unsigned short FAR *base;     /* base value table to use */
52*ab9e68a2SToomas Soome     const unsigned short FAR *extra;    /* extra bits table to use */
53*ab9e68a2SToomas Soome     unsigned match;             /* use base and extra for symbol >= match */
54*ab9e68a2SToomas Soome     unsigned short count[MAXBITS+1];    /* number of codes of each length */
55*ab9e68a2SToomas Soome     unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
56*ab9e68a2SToomas Soome     static const unsigned short lbase[31] = { /* Length codes 257..285 base */
57*ab9e68a2SToomas Soome         3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
58*ab9e68a2SToomas Soome         35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
59*ab9e68a2SToomas Soome     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
60*ab9e68a2SToomas Soome         16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
61*ab9e68a2SToomas Soome         19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202};
62*ab9e68a2SToomas Soome     static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
63*ab9e68a2SToomas Soome         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
64*ab9e68a2SToomas Soome         257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
65*ab9e68a2SToomas Soome         8193, 12289, 16385, 24577, 0, 0};
66*ab9e68a2SToomas Soome     static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
67*ab9e68a2SToomas Soome         16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
68*ab9e68a2SToomas Soome         23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
69*ab9e68a2SToomas Soome         28, 28, 29, 29, 64, 64};
70*ab9e68a2SToomas Soome 
71*ab9e68a2SToomas Soome     /*
72*ab9e68a2SToomas Soome        Process a set of code lengths to create a canonical Huffman code.  The
73*ab9e68a2SToomas Soome        code lengths are lens[0..codes-1].  Each length corresponds to the
74*ab9e68a2SToomas Soome        symbols 0..codes-1.  The Huffman code is generated by first sorting the
75*ab9e68a2SToomas Soome        symbols by length from short to long, and retaining the symbol order
76*ab9e68a2SToomas Soome        for codes with equal lengths.  Then the code starts with all zero bits
77*ab9e68a2SToomas Soome        for the first code of the shortest length, and the codes are integer
78*ab9e68a2SToomas Soome        increments for the same length, and zeros are appended as the length
79*ab9e68a2SToomas Soome        increases.  For the deflate format, these bits are stored backwards
80*ab9e68a2SToomas Soome        from their more natural integer increment ordering, and so when the
81*ab9e68a2SToomas Soome        decoding tables are built in the large loop below, the integer codes
82*ab9e68a2SToomas Soome        are incremented backwards.
83*ab9e68a2SToomas Soome 
84*ab9e68a2SToomas Soome        This routine assumes, but does not check, that all of the entries in
85*ab9e68a2SToomas Soome        lens[] are in the range 0..MAXBITS.  The caller must assure this.
86*ab9e68a2SToomas Soome        1..MAXBITS is interpreted as that code length.  zero means that that
87*ab9e68a2SToomas Soome        symbol does not occur in this code.
88*ab9e68a2SToomas Soome 
89*ab9e68a2SToomas Soome        The codes are sorted by computing a count of codes for each length,
90*ab9e68a2SToomas Soome        creating from that a table of starting indices for each length in the
91*ab9e68a2SToomas Soome        sorted table, and then entering the symbols in order in the sorted
92*ab9e68a2SToomas Soome        table.  The sorted table is work[], with that space being provided by
93*ab9e68a2SToomas Soome        the caller.
94*ab9e68a2SToomas Soome 
95*ab9e68a2SToomas Soome        The length counts are used for other purposes as well, i.e. finding
96*ab9e68a2SToomas Soome        the minimum and maximum length codes, determining if there are any
97*ab9e68a2SToomas Soome        codes at all, checking for a valid set of lengths, and looking ahead
98*ab9e68a2SToomas Soome        at length counts to determine sub-table sizes when building the
99*ab9e68a2SToomas Soome        decoding tables.
100*ab9e68a2SToomas Soome      */
101*ab9e68a2SToomas Soome 
102*ab9e68a2SToomas Soome     /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
103*ab9e68a2SToomas Soome     for (len = 0; len <= MAXBITS; len++)
104*ab9e68a2SToomas Soome         count[len] = 0;
105*ab9e68a2SToomas Soome     for (sym = 0; sym < codes; sym++)
106*ab9e68a2SToomas Soome         count[lens[sym]]++;
107*ab9e68a2SToomas Soome 
108*ab9e68a2SToomas Soome     /* bound code lengths, force root to be within code lengths */
109*ab9e68a2SToomas Soome     root = *bits;
110*ab9e68a2SToomas Soome     for (max = MAXBITS; max >= 1; max--)
111*ab9e68a2SToomas Soome         if (count[max] != 0) break;
112*ab9e68a2SToomas Soome     if (root > max) root = max;
113*ab9e68a2SToomas Soome     if (max == 0) {                     /* no symbols to code at all */
114*ab9e68a2SToomas Soome         here.op = (unsigned char)64;    /* invalid code marker */
115*ab9e68a2SToomas Soome         here.bits = (unsigned char)1;
116*ab9e68a2SToomas Soome         here.val = (unsigned short)0;
117*ab9e68a2SToomas Soome         *(*table)++ = here;             /* make a table to force an error */
118*ab9e68a2SToomas Soome         *(*table)++ = here;
119*ab9e68a2SToomas Soome         *bits = 1;
120*ab9e68a2SToomas Soome         return 0;     /* no symbols, but wait for decoding to report error */
121*ab9e68a2SToomas Soome     }
122*ab9e68a2SToomas Soome     for (min = 1; min < max; min++)
123*ab9e68a2SToomas Soome         if (count[min] != 0) break;
124*ab9e68a2SToomas Soome     if (root < min) root = min;
125*ab9e68a2SToomas Soome 
126*ab9e68a2SToomas Soome     /* check for an over-subscribed or incomplete set of lengths */
127*ab9e68a2SToomas Soome     left = 1;
128*ab9e68a2SToomas Soome     for (len = 1; len <= MAXBITS; len++) {
129*ab9e68a2SToomas Soome         left <<= 1;
130*ab9e68a2SToomas Soome         left -= count[len];
131*ab9e68a2SToomas Soome         if (left < 0) return -1;        /* over-subscribed */
132*ab9e68a2SToomas Soome     }
133*ab9e68a2SToomas Soome     if (left > 0 && (type == CODES || max != 1))
134*ab9e68a2SToomas Soome         return -1;                      /* incomplete set */
135*ab9e68a2SToomas Soome 
136*ab9e68a2SToomas Soome     /* generate offsets into symbol table for each length for sorting */
137*ab9e68a2SToomas Soome     offs[1] = 0;
138*ab9e68a2SToomas Soome     for (len = 1; len < MAXBITS; len++)
139*ab9e68a2SToomas Soome         offs[len + 1] = offs[len] + count[len];
140*ab9e68a2SToomas Soome 
141*ab9e68a2SToomas Soome     /* sort symbols by length, by symbol order within each length */
142*ab9e68a2SToomas Soome     for (sym = 0; sym < codes; sym++)
143*ab9e68a2SToomas Soome         if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
144*ab9e68a2SToomas Soome 
145*ab9e68a2SToomas Soome     /*
146*ab9e68a2SToomas Soome        Create and fill in decoding tables.  In this loop, the table being
147*ab9e68a2SToomas Soome        filled is at next and has curr index bits.  The code being used is huff
148*ab9e68a2SToomas Soome        with length len.  That code is converted to an index by dropping drop
149*ab9e68a2SToomas Soome        bits off of the bottom.  For codes where len is less than drop + curr,
150*ab9e68a2SToomas Soome        those top drop + curr - len bits are incremented through all values to
151*ab9e68a2SToomas Soome        fill the table with replicated entries.
152*ab9e68a2SToomas Soome 
153*ab9e68a2SToomas Soome        root is the number of index bits for the root table.  When len exceeds
154*ab9e68a2SToomas Soome        root, sub-tables are created pointed to by the root entry with an index
155*ab9e68a2SToomas Soome        of the low root bits of huff.  This is saved in low to check for when a
156*ab9e68a2SToomas Soome        new sub-table should be started.  drop is zero when the root table is
157*ab9e68a2SToomas Soome        being filled, and drop is root when sub-tables are being filled.
158*ab9e68a2SToomas Soome 
159*ab9e68a2SToomas Soome        When a new sub-table is needed, it is necessary to look ahead in the
160*ab9e68a2SToomas Soome        code lengths to determine what size sub-table is needed.  The length
161*ab9e68a2SToomas Soome        counts are used for this, and so count[] is decremented as codes are
162*ab9e68a2SToomas Soome        entered in the tables.
163*ab9e68a2SToomas Soome 
164*ab9e68a2SToomas Soome        used keeps track of how many table entries have been allocated from the
165*ab9e68a2SToomas Soome        provided *table space.  It is checked for LENS and DIST tables against
166*ab9e68a2SToomas Soome        the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
167*ab9e68a2SToomas Soome        the initial root table size constants.  See the comments in inftrees.h
168*ab9e68a2SToomas Soome        for more information.
169*ab9e68a2SToomas Soome 
170*ab9e68a2SToomas Soome        sym increments through all symbols, and the loop terminates when
171*ab9e68a2SToomas Soome        all codes of length max, i.e. all codes, have been processed.  This
172*ab9e68a2SToomas Soome        routine permits incomplete codes, so another loop after this one fills
173*ab9e68a2SToomas Soome        in the rest of the decoding tables with invalid code markers.
174*ab9e68a2SToomas Soome      */
175*ab9e68a2SToomas Soome 
176*ab9e68a2SToomas Soome     /* set up for code type */
177*ab9e68a2SToomas Soome     switch (type) {
178*ab9e68a2SToomas Soome     case CODES:
179*ab9e68a2SToomas Soome         base = extra = work;    /* dummy value--not used */
180*ab9e68a2SToomas Soome         match = 20;
181*ab9e68a2SToomas Soome         break;
182*ab9e68a2SToomas Soome     case LENS:
183*ab9e68a2SToomas Soome         base = lbase;
184*ab9e68a2SToomas Soome         extra = lext;
185*ab9e68a2SToomas Soome         match = 257;
186*ab9e68a2SToomas Soome         break;
187*ab9e68a2SToomas Soome     default:    /* DISTS */
188*ab9e68a2SToomas Soome         base = dbase;
189*ab9e68a2SToomas Soome         extra = dext;
190*ab9e68a2SToomas Soome         match = 0;
191*ab9e68a2SToomas Soome     }
192*ab9e68a2SToomas Soome 
193*ab9e68a2SToomas Soome     /* initialize state for loop */
194*ab9e68a2SToomas Soome     huff = 0;                   /* starting code */
195*ab9e68a2SToomas Soome     sym = 0;                    /* starting code symbol */
196*ab9e68a2SToomas Soome     len = min;                  /* starting code length */
197*ab9e68a2SToomas Soome     next = *table;              /* current table to fill in */
198*ab9e68a2SToomas Soome     curr = root;                /* current table index bits */
199*ab9e68a2SToomas Soome     drop = 0;                   /* current bits to drop from code for index */
200*ab9e68a2SToomas Soome     low = (unsigned)(-1);       /* trigger new sub-table when len > root */
201*ab9e68a2SToomas Soome     used = 1U << root;          /* use root table entries */
202*ab9e68a2SToomas Soome     mask = used - 1;            /* mask for comparing low */
203*ab9e68a2SToomas Soome 
204*ab9e68a2SToomas Soome     /* check available table space */
205*ab9e68a2SToomas Soome     if ((type == LENS && used > ENOUGH_LENS) ||
206*ab9e68a2SToomas Soome         (type == DISTS && used > ENOUGH_DISTS))
207*ab9e68a2SToomas Soome         return 1;
208*ab9e68a2SToomas Soome 
209*ab9e68a2SToomas Soome     /* process all codes and make table entries */
210*ab9e68a2SToomas Soome     for (;;) {
211*ab9e68a2SToomas Soome         /* create table entry */
212*ab9e68a2SToomas Soome         here.bits = (unsigned char)(len - drop);
213*ab9e68a2SToomas Soome         if (work[sym] + 1U < match) {
214*ab9e68a2SToomas Soome             here.op = (unsigned char)0;
215*ab9e68a2SToomas Soome             here.val = work[sym];
216*ab9e68a2SToomas Soome         }
217*ab9e68a2SToomas Soome         else if (work[sym] >= match) {
218*ab9e68a2SToomas Soome             here.op = (unsigned char)(extra[work[sym] - match]);
219*ab9e68a2SToomas Soome             here.val = base[work[sym] - match];
220*ab9e68a2SToomas Soome         }
221*ab9e68a2SToomas Soome         else {
222*ab9e68a2SToomas Soome             here.op = (unsigned char)(32 + 64);         /* end of block */
223*ab9e68a2SToomas Soome             here.val = 0;
224*ab9e68a2SToomas Soome         }
225*ab9e68a2SToomas Soome 
226*ab9e68a2SToomas Soome         /* replicate for those indices with low len bits equal to huff */
227*ab9e68a2SToomas Soome         incr = 1U << (len - drop);
228*ab9e68a2SToomas Soome         fill = 1U << curr;
229*ab9e68a2SToomas Soome         min = fill;                 /* save offset to next table */
230*ab9e68a2SToomas Soome         do {
231*ab9e68a2SToomas Soome             fill -= incr;
232*ab9e68a2SToomas Soome             next[(huff >> drop) + fill] = here;
233*ab9e68a2SToomas Soome         } while (fill != 0);
234*ab9e68a2SToomas Soome 
235*ab9e68a2SToomas Soome         /* backwards increment the len-bit code huff */
236*ab9e68a2SToomas Soome         incr = 1U << (len - 1);
237*ab9e68a2SToomas Soome         while (huff & incr)
238*ab9e68a2SToomas Soome             incr >>= 1;
239*ab9e68a2SToomas Soome         if (incr != 0) {
240*ab9e68a2SToomas Soome             huff &= incr - 1;
241*ab9e68a2SToomas Soome             huff += incr;
242*ab9e68a2SToomas Soome         }
243*ab9e68a2SToomas Soome         else
244*ab9e68a2SToomas Soome             huff = 0;
245*ab9e68a2SToomas Soome 
246*ab9e68a2SToomas Soome         /* go to next symbol, update count, len */
247*ab9e68a2SToomas Soome         sym++;
248*ab9e68a2SToomas Soome         if (--(count[len]) == 0) {
249*ab9e68a2SToomas Soome             if (len == max) break;
250*ab9e68a2SToomas Soome             len = lens[work[sym]];
251*ab9e68a2SToomas Soome         }
252*ab9e68a2SToomas Soome 
253*ab9e68a2SToomas Soome         /* create new sub-table if needed */
254*ab9e68a2SToomas Soome         if (len > root && (huff & mask) != low) {
255*ab9e68a2SToomas Soome             /* if first time, transition to sub-tables */
256*ab9e68a2SToomas Soome             if (drop == 0)
257*ab9e68a2SToomas Soome                 drop = root;
258*ab9e68a2SToomas Soome 
259*ab9e68a2SToomas Soome             /* increment past last table */
260*ab9e68a2SToomas Soome             next += min;            /* here min is 1 << curr */
261*ab9e68a2SToomas Soome 
262*ab9e68a2SToomas Soome             /* determine length of next table */
263*ab9e68a2SToomas Soome             curr = len - drop;
264*ab9e68a2SToomas Soome             left = (int)(1 << curr);
265*ab9e68a2SToomas Soome             while (curr + drop < max) {
266*ab9e68a2SToomas Soome                 left -= count[curr + drop];
267*ab9e68a2SToomas Soome                 if (left <= 0) break;
268*ab9e68a2SToomas Soome                 curr++;
269*ab9e68a2SToomas Soome                 left <<= 1;
270*ab9e68a2SToomas Soome             }
271*ab9e68a2SToomas Soome 
272*ab9e68a2SToomas Soome             /* check for enough space */
273*ab9e68a2SToomas Soome             used += 1U << curr;
274*ab9e68a2SToomas Soome             if ((type == LENS && used > ENOUGH_LENS) ||
275*ab9e68a2SToomas Soome                 (type == DISTS && used > ENOUGH_DISTS))
276*ab9e68a2SToomas Soome                 return 1;
277*ab9e68a2SToomas Soome 
278*ab9e68a2SToomas Soome             /* point entry in root table to sub-table */
279*ab9e68a2SToomas Soome             low = huff & mask;
280*ab9e68a2SToomas Soome             (*table)[low].op = (unsigned char)curr;
281*ab9e68a2SToomas Soome             (*table)[low].bits = (unsigned char)root;
282*ab9e68a2SToomas Soome             (*table)[low].val = (unsigned short)(next - *table);
283*ab9e68a2SToomas Soome         }
284*ab9e68a2SToomas Soome     }
285*ab9e68a2SToomas Soome 
286*ab9e68a2SToomas Soome     /* fill in remaining table entry if code is incomplete (guaranteed to have
287*ab9e68a2SToomas Soome        at most one remaining entry, since if the code is incomplete, the
288*ab9e68a2SToomas Soome        maximum code length that was allowed to get this far is one bit) */
289*ab9e68a2SToomas Soome     if (huff != 0) {
290*ab9e68a2SToomas Soome         here.op = (unsigned char)64;            /* invalid code marker */
291*ab9e68a2SToomas Soome         here.bits = (unsigned char)(len - drop);
292*ab9e68a2SToomas Soome         here.val = (unsigned short)0;
293*ab9e68a2SToomas Soome         next[huff] = here;
294*ab9e68a2SToomas Soome     }
295*ab9e68a2SToomas Soome 
296*ab9e68a2SToomas Soome     /* set return parameters */
297*ab9e68a2SToomas Soome     *table += used;
298*ab9e68a2SToomas Soome     *bits = root;
299*ab9e68a2SToomas Soome     return 0;
300*ab9e68a2SToomas Soome }
301