1ca3e8d88SDave Plauger 2ca3e8d88SDave Plauger /*-------------------------------------------------------------*/ 3ca3e8d88SDave Plauger /*--- Huffman coding low-level stuff ---*/ 4ca3e8d88SDave Plauger /*--- huffman.c ---*/ 5ca3e8d88SDave Plauger /*-------------------------------------------------------------*/ 6ca3e8d88SDave Plauger 7ca3e8d88SDave Plauger /* ------------------------------------------------------------------ 8ca3e8d88SDave Plauger This file is part of bzip2/libbzip2, a program and library for 9ca3e8d88SDave Plauger lossless, block-sorting data compression. 10ca3e8d88SDave Plauger 11*b9071c34SGordon Ross bzip2/libbzip2 version 1.0.6 of 6 September 2010 12*b9071c34SGordon Ross Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org> 13ca3e8d88SDave Plauger 14ca3e8d88SDave Plauger Please read the WARNING, DISCLAIMER and PATENTS sections in the 15ca3e8d88SDave Plauger README file. 16ca3e8d88SDave Plauger 17ca3e8d88SDave Plauger This program is released under the terms of the license contained 18ca3e8d88SDave Plauger in the file LICENSE. 19ca3e8d88SDave Plauger ------------------------------------------------------------------ */ 20ca3e8d88SDave Plauger 21ca3e8d88SDave Plauger 22ca3e8d88SDave Plauger #include "bzlib_private.h" 23ca3e8d88SDave Plauger 24ca3e8d88SDave Plauger /*---------------------------------------------------*/ 25ca3e8d88SDave Plauger #define WEIGHTOF(zz0) ((zz0) & 0xffffff00) 26ca3e8d88SDave Plauger #define DEPTHOF(zz1) ((zz1) & 0x000000ff) 27ca3e8d88SDave Plauger #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) 28ca3e8d88SDave Plauger 29ca3e8d88SDave Plauger #define ADDWEIGHTS(zw1,zw2) \ 30ca3e8d88SDave Plauger (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ 31ca3e8d88SDave Plauger (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) 32ca3e8d88SDave Plauger 33ca3e8d88SDave Plauger #define UPHEAP(z) \ 34ca3e8d88SDave Plauger { \ 35ca3e8d88SDave Plauger Int32 zz, tmp; \ 36ca3e8d88SDave Plauger zz = z; tmp = heap[zz]; \ 37ca3e8d88SDave Plauger while (weight[tmp] < weight[heap[zz >> 1]]) { \ 38ca3e8d88SDave Plauger heap[zz] = heap[zz >> 1]; \ 39ca3e8d88SDave Plauger zz >>= 1; \ 40ca3e8d88SDave Plauger } \ 41ca3e8d88SDave Plauger heap[zz] = tmp; \ 42ca3e8d88SDave Plauger } 43ca3e8d88SDave Plauger 44ca3e8d88SDave Plauger #define DOWNHEAP(z) \ 45ca3e8d88SDave Plauger { \ 46ca3e8d88SDave Plauger Int32 zz, yy, tmp; \ 47ca3e8d88SDave Plauger zz = z; tmp = heap[zz]; \ 48ca3e8d88SDave Plauger while (True) { \ 49ca3e8d88SDave Plauger yy = zz << 1; \ 50ca3e8d88SDave Plauger if (yy > nHeap) break; \ 51ca3e8d88SDave Plauger if (yy < nHeap && \ 52ca3e8d88SDave Plauger weight[heap[yy+1]] < weight[heap[yy]]) \ 53ca3e8d88SDave Plauger yy++; \ 54ca3e8d88SDave Plauger if (weight[tmp] < weight[heap[yy]]) break; \ 55ca3e8d88SDave Plauger heap[zz] = heap[yy]; \ 56ca3e8d88SDave Plauger zz = yy; \ 57ca3e8d88SDave Plauger } \ 58ca3e8d88SDave Plauger heap[zz] = tmp; \ 59ca3e8d88SDave Plauger } 60ca3e8d88SDave Plauger 61ca3e8d88SDave Plauger 62ca3e8d88SDave Plauger /*---------------------------------------------------*/ 63ca3e8d88SDave Plauger void BZ2_hbMakeCodeLengths ( UChar *len, 64ca3e8d88SDave Plauger Int32 *freq, 65ca3e8d88SDave Plauger Int32 alphaSize, 66ca3e8d88SDave Plauger Int32 maxLen ) 67ca3e8d88SDave Plauger { 68ca3e8d88SDave Plauger /*-- 69ca3e8d88SDave Plauger Nodes and heap entries run from 1. Entry 0 70ca3e8d88SDave Plauger for both the heap and nodes is a sentinel. 71ca3e8d88SDave Plauger --*/ 72ca3e8d88SDave Plauger Int32 nNodes, nHeap, n1, n2, i, j, k; 73ca3e8d88SDave Plauger Bool tooLong; 74ca3e8d88SDave Plauger 75ca3e8d88SDave Plauger Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; 76ca3e8d88SDave Plauger Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; 77ca3e8d88SDave Plauger Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; 78ca3e8d88SDave Plauger 79ca3e8d88SDave Plauger for (i = 0; i < alphaSize; i++) 80ca3e8d88SDave Plauger weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 81ca3e8d88SDave Plauger 82ca3e8d88SDave Plauger while (True) { 83ca3e8d88SDave Plauger 84ca3e8d88SDave Plauger nNodes = alphaSize; 85ca3e8d88SDave Plauger nHeap = 0; 86ca3e8d88SDave Plauger 87ca3e8d88SDave Plauger heap[0] = 0; 88ca3e8d88SDave Plauger weight[0] = 0; 89ca3e8d88SDave Plauger parent[0] = -2; 90ca3e8d88SDave Plauger 91ca3e8d88SDave Plauger for (i = 1; i <= alphaSize; i++) { 92ca3e8d88SDave Plauger parent[i] = -1; 93ca3e8d88SDave Plauger nHeap++; 94ca3e8d88SDave Plauger heap[nHeap] = i; 95ca3e8d88SDave Plauger UPHEAP(nHeap); 96ca3e8d88SDave Plauger } 97ca3e8d88SDave Plauger 98ca3e8d88SDave Plauger AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 ); 99ca3e8d88SDave Plauger 100ca3e8d88SDave Plauger while (nHeap > 1) { 101ca3e8d88SDave Plauger n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 102ca3e8d88SDave Plauger n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 103ca3e8d88SDave Plauger nNodes++; 104ca3e8d88SDave Plauger parent[n1] = parent[n2] = nNodes; 105ca3e8d88SDave Plauger weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); 106ca3e8d88SDave Plauger parent[nNodes] = -1; 107ca3e8d88SDave Plauger nHeap++; 108ca3e8d88SDave Plauger heap[nHeap] = nNodes; 109ca3e8d88SDave Plauger UPHEAP(nHeap); 110ca3e8d88SDave Plauger } 111ca3e8d88SDave Plauger 112ca3e8d88SDave Plauger AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 ); 113ca3e8d88SDave Plauger 114ca3e8d88SDave Plauger tooLong = False; 115ca3e8d88SDave Plauger for (i = 1; i <= alphaSize; i++) { 116ca3e8d88SDave Plauger j = 0; 117ca3e8d88SDave Plauger k = i; 118ca3e8d88SDave Plauger while (parent[k] >= 0) { k = parent[k]; j++; } 119ca3e8d88SDave Plauger len[i-1] = j; 120ca3e8d88SDave Plauger if (j > maxLen) tooLong = True; 121ca3e8d88SDave Plauger } 122ca3e8d88SDave Plauger 123ca3e8d88SDave Plauger if (! tooLong) break; 124ca3e8d88SDave Plauger 125ca3e8d88SDave Plauger /* 17 Oct 04: keep-going condition for the following loop used 126ca3e8d88SDave Plauger to be 'i < alphaSize', which missed the last element, 127ca3e8d88SDave Plauger theoretically leading to the possibility of the compressor 128ca3e8d88SDave Plauger looping. However, this count-scaling step is only needed if 129ca3e8d88SDave Plauger one of the generated Huffman code words is longer than 130ca3e8d88SDave Plauger maxLen, which up to and including version 1.0.2 was 20 bits, 131ca3e8d88SDave Plauger which is extremely unlikely. In version 1.0.3 maxLen was 132ca3e8d88SDave Plauger changed to 17 bits, which has minimal effect on compression 133ca3e8d88SDave Plauger ratio, but does mean this scaling step is used from time to 134ca3e8d88SDave Plauger time, enough to verify that it works. 135ca3e8d88SDave Plauger 136ca3e8d88SDave Plauger This means that bzip2-1.0.3 and later will only produce 137ca3e8d88SDave Plauger Huffman codes with a maximum length of 17 bits. However, in 138ca3e8d88SDave Plauger order to preserve backwards compatibility with bitstreams 139ca3e8d88SDave Plauger produced by versions pre-1.0.3, the decompressor must still 140ca3e8d88SDave Plauger handle lengths of up to 20. */ 141ca3e8d88SDave Plauger 142ca3e8d88SDave Plauger for (i = 1; i <= alphaSize; i++) { 143ca3e8d88SDave Plauger j = weight[i] >> 8; 144ca3e8d88SDave Plauger j = 1 + (j / 2); 145ca3e8d88SDave Plauger weight[i] = j << 8; 146ca3e8d88SDave Plauger } 147ca3e8d88SDave Plauger } 148ca3e8d88SDave Plauger } 149ca3e8d88SDave Plauger 150ca3e8d88SDave Plauger 151ca3e8d88SDave Plauger /*---------------------------------------------------*/ 152ca3e8d88SDave Plauger void BZ2_hbAssignCodes ( Int32 *code, 153ca3e8d88SDave Plauger UChar *length, 154ca3e8d88SDave Plauger Int32 minLen, 155ca3e8d88SDave Plauger Int32 maxLen, 156ca3e8d88SDave Plauger Int32 alphaSize ) 157ca3e8d88SDave Plauger { 158ca3e8d88SDave Plauger Int32 n, vec, i; 159ca3e8d88SDave Plauger 160ca3e8d88SDave Plauger vec = 0; 161ca3e8d88SDave Plauger for (n = minLen; n <= maxLen; n++) { 162ca3e8d88SDave Plauger for (i = 0; i < alphaSize; i++) 163ca3e8d88SDave Plauger if (length[i] == n) { code[i] = vec; vec++; }; 164ca3e8d88SDave Plauger vec <<= 1; 165ca3e8d88SDave Plauger } 166ca3e8d88SDave Plauger } 167ca3e8d88SDave Plauger 168ca3e8d88SDave Plauger 169ca3e8d88SDave Plauger /*---------------------------------------------------*/ 170ca3e8d88SDave Plauger void BZ2_hbCreateDecodeTables ( Int32 *limit, 171ca3e8d88SDave Plauger Int32 *base, 172ca3e8d88SDave Plauger Int32 *perm, 173ca3e8d88SDave Plauger UChar *length, 174ca3e8d88SDave Plauger Int32 minLen, 175ca3e8d88SDave Plauger Int32 maxLen, 176ca3e8d88SDave Plauger Int32 alphaSize ) 177ca3e8d88SDave Plauger { 178ca3e8d88SDave Plauger Int32 pp, i, j, vec; 179ca3e8d88SDave Plauger 180ca3e8d88SDave Plauger pp = 0; 181ca3e8d88SDave Plauger for (i = minLen; i <= maxLen; i++) 182ca3e8d88SDave Plauger for (j = 0; j < alphaSize; j++) 183ca3e8d88SDave Plauger if (length[j] == i) { perm[pp] = j; pp++; }; 184ca3e8d88SDave Plauger 185ca3e8d88SDave Plauger for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0; 186ca3e8d88SDave Plauger for (i = 0; i < alphaSize; i++) base[length[i]+1]++; 187ca3e8d88SDave Plauger 188ca3e8d88SDave Plauger for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; 189ca3e8d88SDave Plauger 190ca3e8d88SDave Plauger for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; 191ca3e8d88SDave Plauger vec = 0; 192ca3e8d88SDave Plauger 193ca3e8d88SDave Plauger for (i = minLen; i <= maxLen; i++) { 194ca3e8d88SDave Plauger vec += (base[i+1] - base[i]); 195ca3e8d88SDave Plauger limit[i] = vec-1; 196ca3e8d88SDave Plauger vec <<= 1; 197ca3e8d88SDave Plauger } 198ca3e8d88SDave Plauger for (i = minLen + 1; i <= maxLen; i++) 199ca3e8d88SDave Plauger base[i] = ((limit[i-1] + 1) << 1) - base[i]; 200ca3e8d88SDave Plauger } 201ca3e8d88SDave Plauger 202ca3e8d88SDave Plauger 203ca3e8d88SDave Plauger /*-------------------------------------------------------------*/ 204ca3e8d88SDave Plauger /*--- end huffman.c ---*/ 205ca3e8d88SDave Plauger /*-------------------------------------------------------------*/ 206