1*4a5d661aSToomas Soome /* deflate.h -- internal compression state 2*4a5d661aSToomas Soome * Copyright (C) 1995-2012 Jean-loup Gailly 3*4a5d661aSToomas Soome * For conditions of distribution and use, see copyright notice in zlib.h 4*4a5d661aSToomas Soome */ 5*4a5d661aSToomas Soome 6*4a5d661aSToomas Soome /* WARNING: this file should *not* be used by applications. It is 7*4a5d661aSToomas Soome part of the implementation of the compression library and is 8*4a5d661aSToomas Soome subject to change. Applications should only use zlib.h. 9*4a5d661aSToomas Soome */ 10*4a5d661aSToomas Soome 11*4a5d661aSToomas Soome /* @(#) $Id$ */ 12*4a5d661aSToomas Soome 13*4a5d661aSToomas Soome #ifndef DEFLATE_H 14*4a5d661aSToomas Soome #define DEFLATE_H 15*4a5d661aSToomas Soome 16*4a5d661aSToomas Soome #include "zutil.h" 17*4a5d661aSToomas Soome 18*4a5d661aSToomas Soome /* define NO_GZIP when compiling if you want to disable gzip header and 19*4a5d661aSToomas Soome trailer creation by deflate(). NO_GZIP would be used to avoid linking in 20*4a5d661aSToomas Soome the crc code when it is not needed. For shared libraries, gzip encoding 21*4a5d661aSToomas Soome should be left enabled. */ 22*4a5d661aSToomas Soome #ifndef NO_GZIP 23*4a5d661aSToomas Soome # define GZIP 24*4a5d661aSToomas Soome #endif 25*4a5d661aSToomas Soome 26*4a5d661aSToomas Soome /* =========================================================================== 27*4a5d661aSToomas Soome * Internal compression state. 28*4a5d661aSToomas Soome */ 29*4a5d661aSToomas Soome 30*4a5d661aSToomas Soome #define LENGTH_CODES 29 31*4a5d661aSToomas Soome /* number of length codes, not counting the special END_BLOCK code */ 32*4a5d661aSToomas Soome 33*4a5d661aSToomas Soome #define LITERALS 256 34*4a5d661aSToomas Soome /* number of literal bytes 0..255 */ 35*4a5d661aSToomas Soome 36*4a5d661aSToomas Soome #define L_CODES (LITERALS+1+LENGTH_CODES) 37*4a5d661aSToomas Soome /* number of Literal or Length codes, including the END_BLOCK code */ 38*4a5d661aSToomas Soome 39*4a5d661aSToomas Soome #define D_CODES 30 40*4a5d661aSToomas Soome /* number of distance codes */ 41*4a5d661aSToomas Soome 42*4a5d661aSToomas Soome #define BL_CODES 19 43*4a5d661aSToomas Soome /* number of codes used to transfer the bit lengths */ 44*4a5d661aSToomas Soome 45*4a5d661aSToomas Soome #define HEAP_SIZE (2*L_CODES+1) 46*4a5d661aSToomas Soome /* maximum heap size */ 47*4a5d661aSToomas Soome 48*4a5d661aSToomas Soome #define MAX_BITS 15 49*4a5d661aSToomas Soome /* All codes must not exceed MAX_BITS bits */ 50*4a5d661aSToomas Soome 51*4a5d661aSToomas Soome #define Buf_size 16 52*4a5d661aSToomas Soome /* size of bit buffer in bi_buf */ 53*4a5d661aSToomas Soome 54*4a5d661aSToomas Soome #define INIT_STATE 42 55*4a5d661aSToomas Soome #define EXTRA_STATE 69 56*4a5d661aSToomas Soome #define NAME_STATE 73 57*4a5d661aSToomas Soome #define COMMENT_STATE 91 58*4a5d661aSToomas Soome #define HCRC_STATE 103 59*4a5d661aSToomas Soome #define BUSY_STATE 113 60*4a5d661aSToomas Soome #define FINISH_STATE 666 61*4a5d661aSToomas Soome /* Stream status */ 62*4a5d661aSToomas Soome 63*4a5d661aSToomas Soome 64*4a5d661aSToomas Soome /* Data structure describing a single value and its code string. */ 65*4a5d661aSToomas Soome typedef struct ct_data_s { 66*4a5d661aSToomas Soome union { 67*4a5d661aSToomas Soome ush freq; /* frequency count */ 68*4a5d661aSToomas Soome ush code; /* bit string */ 69*4a5d661aSToomas Soome } fc; 70*4a5d661aSToomas Soome union { 71*4a5d661aSToomas Soome ush dad; /* father node in Huffman tree */ 72*4a5d661aSToomas Soome ush len; /* length of bit string */ 73*4a5d661aSToomas Soome } dl; 74*4a5d661aSToomas Soome } FAR ct_data; 75*4a5d661aSToomas Soome 76*4a5d661aSToomas Soome #define Freq fc.freq 77*4a5d661aSToomas Soome #define Code fc.code 78*4a5d661aSToomas Soome #define Dad dl.dad 79*4a5d661aSToomas Soome #define Len dl.len 80*4a5d661aSToomas Soome 81*4a5d661aSToomas Soome typedef struct static_tree_desc_s static_tree_desc; 82*4a5d661aSToomas Soome 83*4a5d661aSToomas Soome typedef struct tree_desc_s { 84*4a5d661aSToomas Soome ct_data *dyn_tree; /* the dynamic tree */ 85*4a5d661aSToomas Soome int max_code; /* largest code with non zero frequency */ 86*4a5d661aSToomas Soome static_tree_desc *stat_desc; /* the corresponding static tree */ 87*4a5d661aSToomas Soome } FAR tree_desc; 88*4a5d661aSToomas Soome 89*4a5d661aSToomas Soome typedef ush Pos; 90*4a5d661aSToomas Soome typedef Pos FAR Posf; 91*4a5d661aSToomas Soome typedef unsigned IPos; 92*4a5d661aSToomas Soome 93*4a5d661aSToomas Soome /* A Pos is an index in the character window. We use short instead of int to 94*4a5d661aSToomas Soome * save space in the various tables. IPos is used only for parameter passing. 95*4a5d661aSToomas Soome */ 96*4a5d661aSToomas Soome 97*4a5d661aSToomas Soome typedef struct internal_state { 98*4a5d661aSToomas Soome z_streamp strm; /* pointer back to this zlib stream */ 99*4a5d661aSToomas Soome int status; /* as the name implies */ 100*4a5d661aSToomas Soome Bytef *pending_buf; /* output still pending */ 101*4a5d661aSToomas Soome ulg pending_buf_size; /* size of pending_buf */ 102*4a5d661aSToomas Soome Bytef *pending_out; /* next pending byte to output to the stream */ 103*4a5d661aSToomas Soome uInt pending; /* nb of bytes in the pending buffer */ 104*4a5d661aSToomas Soome int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ 105*4a5d661aSToomas Soome gz_headerp gzhead; /* gzip header information to write */ 106*4a5d661aSToomas Soome uInt gzindex; /* where in extra, name, or comment */ 107*4a5d661aSToomas Soome Byte method; /* can only be DEFLATED */ 108*4a5d661aSToomas Soome int last_flush; /* value of flush param for previous deflate call */ 109*4a5d661aSToomas Soome 110*4a5d661aSToomas Soome /* used by deflate.c: */ 111*4a5d661aSToomas Soome 112*4a5d661aSToomas Soome uInt w_size; /* LZ77 window size (32K by default) */ 113*4a5d661aSToomas Soome uInt w_bits; /* log2(w_size) (8..16) */ 114*4a5d661aSToomas Soome uInt w_mask; /* w_size - 1 */ 115*4a5d661aSToomas Soome 116*4a5d661aSToomas Soome Bytef *window; 117*4a5d661aSToomas Soome /* Sliding window. Input bytes are read into the second half of the window, 118*4a5d661aSToomas Soome * and move to the first half later to keep a dictionary of at least wSize 119*4a5d661aSToomas Soome * bytes. With this organization, matches are limited to a distance of 120*4a5d661aSToomas Soome * wSize-MAX_MATCH bytes, but this ensures that IO is always 121*4a5d661aSToomas Soome * performed with a length multiple of the block size. Also, it limits 122*4a5d661aSToomas Soome * the window size to 64K, which is quite useful on MSDOS. 123*4a5d661aSToomas Soome * To do: use the user input buffer as sliding window. 124*4a5d661aSToomas Soome */ 125*4a5d661aSToomas Soome 126*4a5d661aSToomas Soome ulg window_size; 127*4a5d661aSToomas Soome /* Actual size of window: 2*wSize, except when the user input buffer 128*4a5d661aSToomas Soome * is directly used as sliding window. 129*4a5d661aSToomas Soome */ 130*4a5d661aSToomas Soome 131*4a5d661aSToomas Soome Posf *prev; 132*4a5d661aSToomas Soome /* Link to older string with same hash index. To limit the size of this 133*4a5d661aSToomas Soome * array to 64K, this link is maintained only for the last 32K strings. 134*4a5d661aSToomas Soome * An index in this array is thus a window index modulo 32K. 135*4a5d661aSToomas Soome */ 136*4a5d661aSToomas Soome 137*4a5d661aSToomas Soome Posf *head; /* Heads of the hash chains or NIL. */ 138*4a5d661aSToomas Soome 139*4a5d661aSToomas Soome uInt ins_h; /* hash index of string to be inserted */ 140*4a5d661aSToomas Soome uInt hash_size; /* number of elements in hash table */ 141*4a5d661aSToomas Soome uInt hash_bits; /* log2(hash_size) */ 142*4a5d661aSToomas Soome uInt hash_mask; /* hash_size-1 */ 143*4a5d661aSToomas Soome 144*4a5d661aSToomas Soome uInt hash_shift; 145*4a5d661aSToomas Soome /* Number of bits by which ins_h must be shifted at each input 146*4a5d661aSToomas Soome * step. It must be such that after MIN_MATCH steps, the oldest 147*4a5d661aSToomas Soome * byte no longer takes part in the hash key, that is: 148*4a5d661aSToomas Soome * hash_shift * MIN_MATCH >= hash_bits 149*4a5d661aSToomas Soome */ 150*4a5d661aSToomas Soome 151*4a5d661aSToomas Soome long block_start; 152*4a5d661aSToomas Soome /* Window position at the beginning of the current output block. Gets 153*4a5d661aSToomas Soome * negative when the window is moved backwards. 154*4a5d661aSToomas Soome */ 155*4a5d661aSToomas Soome 156*4a5d661aSToomas Soome uInt match_length; /* length of best match */ 157*4a5d661aSToomas Soome IPos prev_match; /* previous match */ 158*4a5d661aSToomas Soome int match_available; /* set if previous match exists */ 159*4a5d661aSToomas Soome uInt strstart; /* start of string to insert */ 160*4a5d661aSToomas Soome uInt match_start; /* start of matching string */ 161*4a5d661aSToomas Soome uInt lookahead; /* number of valid bytes ahead in window */ 162*4a5d661aSToomas Soome 163*4a5d661aSToomas Soome uInt prev_length; 164*4a5d661aSToomas Soome /* Length of the best match at previous step. Matches not greater than this 165*4a5d661aSToomas Soome * are discarded. This is used in the lazy match evaluation. 166*4a5d661aSToomas Soome */ 167*4a5d661aSToomas Soome 168*4a5d661aSToomas Soome uInt max_chain_length; 169*4a5d661aSToomas Soome /* To speed up deflation, hash chains are never searched beyond this 170*4a5d661aSToomas Soome * length. A higher limit improves compression ratio but degrades the 171*4a5d661aSToomas Soome * speed. 172*4a5d661aSToomas Soome */ 173*4a5d661aSToomas Soome 174*4a5d661aSToomas Soome uInt max_lazy_match; 175*4a5d661aSToomas Soome /* Attempt to find a better match only when the current match is strictly 176*4a5d661aSToomas Soome * smaller than this value. This mechanism is used only for compression 177*4a5d661aSToomas Soome * levels >= 4. 178*4a5d661aSToomas Soome */ 179*4a5d661aSToomas Soome # define max_insert_length max_lazy_match 180*4a5d661aSToomas Soome /* Insert new strings in the hash table only if the match length is not 181*4a5d661aSToomas Soome * greater than this length. This saves time but degrades compression. 182*4a5d661aSToomas Soome * max_insert_length is used only for compression levels <= 3. 183*4a5d661aSToomas Soome */ 184*4a5d661aSToomas Soome 185*4a5d661aSToomas Soome int level; /* compression level (1..9) */ 186*4a5d661aSToomas Soome int strategy; /* favor or force Huffman coding*/ 187*4a5d661aSToomas Soome 188*4a5d661aSToomas Soome uInt good_match; 189*4a5d661aSToomas Soome /* Use a faster search when the previous match is longer than this */ 190*4a5d661aSToomas Soome 191*4a5d661aSToomas Soome int nice_match; /* Stop searching when current match exceeds this */ 192*4a5d661aSToomas Soome 193*4a5d661aSToomas Soome /* used by trees.c: */ 194*4a5d661aSToomas Soome /* Didn't use ct_data typedef below to suppress compiler warning */ 195*4a5d661aSToomas Soome struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ 196*4a5d661aSToomas Soome struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ 197*4a5d661aSToomas Soome struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ 198*4a5d661aSToomas Soome 199*4a5d661aSToomas Soome struct tree_desc_s l_desc; /* desc. for literal tree */ 200*4a5d661aSToomas Soome struct tree_desc_s d_desc; /* desc. for distance tree */ 201*4a5d661aSToomas Soome struct tree_desc_s bl_desc; /* desc. for bit length tree */ 202*4a5d661aSToomas Soome 203*4a5d661aSToomas Soome ush bl_count[MAX_BITS+1]; 204*4a5d661aSToomas Soome /* number of codes at each bit length for an optimal tree */ 205*4a5d661aSToomas Soome 206*4a5d661aSToomas Soome int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ 207*4a5d661aSToomas Soome int heap_len; /* number of elements in the heap */ 208*4a5d661aSToomas Soome int heap_max; /* element of largest frequency */ 209*4a5d661aSToomas Soome /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. 210*4a5d661aSToomas Soome * The same heap array is used to build all trees. 211*4a5d661aSToomas Soome */ 212*4a5d661aSToomas Soome 213*4a5d661aSToomas Soome uch depth[2*L_CODES+1]; 214*4a5d661aSToomas Soome /* Depth of each subtree used as tie breaker for trees of equal frequency 215*4a5d661aSToomas Soome */ 216*4a5d661aSToomas Soome 217*4a5d661aSToomas Soome uchf *l_buf; /* buffer for literals or lengths */ 218*4a5d661aSToomas Soome 219*4a5d661aSToomas Soome uInt lit_bufsize; 220*4a5d661aSToomas Soome /* Size of match buffer for literals/lengths. There are 4 reasons for 221*4a5d661aSToomas Soome * limiting lit_bufsize to 64K: 222*4a5d661aSToomas Soome * - frequencies can be kept in 16 bit counters 223*4a5d661aSToomas Soome * - if compression is not successful for the first block, all input 224*4a5d661aSToomas Soome * data is still in the window so we can still emit a stored block even 225*4a5d661aSToomas Soome * when input comes from standard input. (This can also be done for 226*4a5d661aSToomas Soome * all blocks if lit_bufsize is not greater than 32K.) 227*4a5d661aSToomas Soome * - if compression is not successful for a file smaller than 64K, we can 228*4a5d661aSToomas Soome * even emit a stored file instead of a stored block (saving 5 bytes). 229*4a5d661aSToomas Soome * This is applicable only for zip (not gzip or zlib). 230*4a5d661aSToomas Soome * - creating new Huffman trees less frequently may not provide fast 231*4a5d661aSToomas Soome * adaptation to changes in the input data statistics. (Take for 232*4a5d661aSToomas Soome * example a binary file with poorly compressible code followed by 233*4a5d661aSToomas Soome * a highly compressible string table.) Smaller buffer sizes give 234*4a5d661aSToomas Soome * fast adaptation but have of course the overhead of transmitting 235*4a5d661aSToomas Soome * trees more frequently. 236*4a5d661aSToomas Soome * - I can't count above 4 237*4a5d661aSToomas Soome */ 238*4a5d661aSToomas Soome 239*4a5d661aSToomas Soome uInt last_lit; /* running index in l_buf */ 240*4a5d661aSToomas Soome 241*4a5d661aSToomas Soome ushf *d_buf; 242*4a5d661aSToomas Soome /* Buffer for distances. To simplify the code, d_buf and l_buf have 243*4a5d661aSToomas Soome * the same number of elements. To use different lengths, an extra flag 244*4a5d661aSToomas Soome * array would be necessary. 245*4a5d661aSToomas Soome */ 246*4a5d661aSToomas Soome 247*4a5d661aSToomas Soome ulg opt_len; /* bit length of current block with optimal trees */ 248*4a5d661aSToomas Soome ulg static_len; /* bit length of current block with static trees */ 249*4a5d661aSToomas Soome uInt matches; /* number of string matches in current block */ 250*4a5d661aSToomas Soome uInt insert; /* bytes at end of window left to insert */ 251*4a5d661aSToomas Soome 252*4a5d661aSToomas Soome #ifdef DEBUG 253*4a5d661aSToomas Soome ulg compressed_len; /* total bit length of compressed file mod 2^32 */ 254*4a5d661aSToomas Soome ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ 255*4a5d661aSToomas Soome #endif 256*4a5d661aSToomas Soome 257*4a5d661aSToomas Soome ush bi_buf; 258*4a5d661aSToomas Soome /* Output buffer. bits are inserted starting at the bottom (least 259*4a5d661aSToomas Soome * significant bits). 260*4a5d661aSToomas Soome */ 261*4a5d661aSToomas Soome int bi_valid; 262*4a5d661aSToomas Soome /* Number of valid bits in bi_buf. All bits above the last valid bit 263*4a5d661aSToomas Soome * are always zero. 264*4a5d661aSToomas Soome */ 265*4a5d661aSToomas Soome 266*4a5d661aSToomas Soome ulg high_water; 267*4a5d661aSToomas Soome /* High water mark offset in window for initialized bytes -- bytes above 268*4a5d661aSToomas Soome * this are set to zero in order to avoid memory check warnings when 269*4a5d661aSToomas Soome * longest match routines access bytes past the input. This is then 270*4a5d661aSToomas Soome * updated to the new high water mark. 271*4a5d661aSToomas Soome */ 272*4a5d661aSToomas Soome 273*4a5d661aSToomas Soome } FAR deflate_state; 274*4a5d661aSToomas Soome 275*4a5d661aSToomas Soome /* Output a byte on the stream. 276*4a5d661aSToomas Soome * IN assertion: there is enough room in pending_buf. 277*4a5d661aSToomas Soome */ 278*4a5d661aSToomas Soome #define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} 279*4a5d661aSToomas Soome 280*4a5d661aSToomas Soome 281*4a5d661aSToomas Soome #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) 282*4a5d661aSToomas Soome /* Minimum amount of lookahead, except at the end of the input file. 283*4a5d661aSToomas Soome * See deflate.c for comments about the MIN_MATCH+1. 284*4a5d661aSToomas Soome */ 285*4a5d661aSToomas Soome 286*4a5d661aSToomas Soome #define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) 287*4a5d661aSToomas Soome /* In order to simplify the code, particularly on 16 bit machines, match 288*4a5d661aSToomas Soome * distances are limited to MAX_DIST instead of WSIZE. 289*4a5d661aSToomas Soome */ 290*4a5d661aSToomas Soome 291*4a5d661aSToomas Soome #define WIN_INIT MAX_MATCH 292*4a5d661aSToomas Soome /* Number of bytes after end of data in window to initialize in order to avoid 293*4a5d661aSToomas Soome memory checker errors from longest match routines */ 294*4a5d661aSToomas Soome 295*4a5d661aSToomas Soome /* in trees.c */ 296*4a5d661aSToomas Soome void ZLIB_INTERNAL _tr_init OF((deflate_state *s)); 297*4a5d661aSToomas Soome int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); 298*4a5d661aSToomas Soome void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf, 299*4a5d661aSToomas Soome ulg stored_len, int last)); 300*4a5d661aSToomas Soome void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s)); 301*4a5d661aSToomas Soome void ZLIB_INTERNAL _tr_align OF((deflate_state *s)); 302*4a5d661aSToomas Soome void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, 303*4a5d661aSToomas Soome ulg stored_len, int last)); 304*4a5d661aSToomas Soome 305*4a5d661aSToomas Soome #define d_code(dist) \ 306*4a5d661aSToomas Soome ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) 307*4a5d661aSToomas Soome /* Mapping from a distance to a distance code. dist is the distance - 1 and 308*4a5d661aSToomas Soome * must not have side effects. _dist_code[256] and _dist_code[257] are never 309*4a5d661aSToomas Soome * used. 310*4a5d661aSToomas Soome */ 311*4a5d661aSToomas Soome 312*4a5d661aSToomas Soome #ifndef DEBUG 313*4a5d661aSToomas Soome /* Inline versions of _tr_tally for speed: */ 314*4a5d661aSToomas Soome 315*4a5d661aSToomas Soome #if defined(GEN_TREES_H) || !defined(STDC) 316*4a5d661aSToomas Soome extern uch ZLIB_INTERNAL _length_code[]; 317*4a5d661aSToomas Soome extern uch ZLIB_INTERNAL _dist_code[]; 318*4a5d661aSToomas Soome #else 319*4a5d661aSToomas Soome extern const uch ZLIB_INTERNAL _length_code[]; 320*4a5d661aSToomas Soome extern const uch ZLIB_INTERNAL _dist_code[]; 321*4a5d661aSToomas Soome #endif 322*4a5d661aSToomas Soome 323*4a5d661aSToomas Soome # define _tr_tally_lit(s, c, flush) \ 324*4a5d661aSToomas Soome { uch cc = (c); \ 325*4a5d661aSToomas Soome s->d_buf[s->last_lit] = 0; \ 326*4a5d661aSToomas Soome s->l_buf[s->last_lit++] = cc; \ 327*4a5d661aSToomas Soome s->dyn_ltree[cc].Freq++; \ 328*4a5d661aSToomas Soome flush = (s->last_lit == s->lit_bufsize-1); \ 329*4a5d661aSToomas Soome } 330*4a5d661aSToomas Soome # define _tr_tally_dist(s, distance, length, flush) \ 331*4a5d661aSToomas Soome { uch len = (length); \ 332*4a5d661aSToomas Soome ush dist = (distance); \ 333*4a5d661aSToomas Soome s->d_buf[s->last_lit] = dist; \ 334*4a5d661aSToomas Soome s->l_buf[s->last_lit++] = len; \ 335*4a5d661aSToomas Soome dist--; \ 336*4a5d661aSToomas Soome s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ 337*4a5d661aSToomas Soome s->dyn_dtree[d_code(dist)].Freq++; \ 338*4a5d661aSToomas Soome flush = (s->last_lit == s->lit_bufsize-1); \ 339*4a5d661aSToomas Soome } 340*4a5d661aSToomas Soome #else 341*4a5d661aSToomas Soome # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) 342*4a5d661aSToomas Soome # define _tr_tally_dist(s, distance, length, flush) \ 343*4a5d661aSToomas Soome flush = _tr_tally(s, distance, length) 344*4a5d661aSToomas Soome #endif 345*4a5d661aSToomas Soome 346*4a5d661aSToomas Soome #endif /* DEFLATE_H */ 347