1 /* ****************************************************************** 2 Huffman encoder, part of New Generation Entropy library 3 Copyright (C) 2013-2016, Yann Collet. 4 5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are 9 met: 10 11 * Redistributions of source code must retain the above copyright 12 notice, this list of conditions and the following disclaimer. 13 * Redistributions in binary form must reproduce the above 14 copyright notice, this list of conditions and the following disclaimer 15 in the documentation and/or other materials provided with the 16 distribution. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 You can contact the author at : 31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 32 - Public forum : https://groups.google.com/forum/#!forum/lz4c 33 ****************************************************************** */ 34 35 /* ************************************************************** 36 * Compiler specifics 37 ****************************************************************/ 38 #ifdef _MSC_VER /* Visual Studio */ 39 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 40 #endif 41 42 43 /* ************************************************************** 44 * Includes 45 ****************************************************************/ 46 #include <string.h> /* memcpy, memset */ 47 #include <stdio.h> /* printf (debug) */ 48 #include "compiler.h" 49 #include "bitstream.h" 50 #include "hist.h" 51 #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ 52 #include "fse.h" /* header compression */ 53 #define HUF_STATIC_LINKING_ONLY 54 #include "huf.h" 55 #include "error_private.h" 56 57 58 /* ************************************************************** 59 * Error Management 60 ****************************************************************/ 61 #define HUF_isError ERR_isError 62 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ 63 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e 64 #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } 65 66 67 /* ************************************************************** 68 * Utils 69 ****************************************************************/ 70 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) 71 { 72 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); 73 } 74 75 76 /* ******************************************************* 77 * HUF : Huffman block compression 78 *********************************************************/ 79 /* HUF_compressWeights() : 80 * Same as FSE_compress(), but dedicated to huff0's weights compression. 81 * The use case needs much less stack memory. 82 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. 83 */ 84 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 85 static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) 86 { 87 BYTE* const ostart = (BYTE*) dst; 88 BYTE* op = ostart; 89 BYTE* const oend = ostart + dstSize; 90 91 U32 maxSymbolValue = HUF_TABLELOG_MAX; 92 U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; 93 94 FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; 95 BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER]; 96 97 U32 count[HUF_TABLELOG_MAX+1]; 98 S16 norm[HUF_TABLELOG_MAX+1]; 99 100 /* init conditions */ 101 if (wtSize <= 1) return 0; /* Not compressible */ 102 103 /* Scan input and build symbol stats */ 104 { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ 105 if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ 106 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ 107 } 108 109 tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); 110 CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); 111 112 /* Write table description header */ 113 { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); 114 op += hSize; 115 } 116 117 /* Compress */ 118 CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); 119 { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); 120 if (cSize == 0) return 0; /* not enough space for compressed data */ 121 op += cSize; 122 } 123 124 return op-ostart; 125 } 126 127 128 struct HUF_CElt_s { 129 U16 val; 130 BYTE nbBits; 131 }; /* typedef'd to HUF_CElt within "huf.h" */ 132 133 /*! HUF_writeCTable() : 134 `CTable` : Huffman tree to save, using huf representation. 135 @return : size of saved CTable */ 136 size_t HUF_writeCTable (void* dst, size_t maxDstSize, 137 const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) 138 { 139 BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ 140 BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; 141 BYTE* op = (BYTE*)dst; 142 U32 n; 143 144 /* check conditions */ 145 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 146 147 /* convert to weight */ 148 bitsToWeight[0] = 0; 149 for (n=1; n<huffLog+1; n++) 150 bitsToWeight[n] = (BYTE)(huffLog + 1 - n); 151 for (n=0; n<maxSymbolValue; n++) 152 huffWeight[n] = bitsToWeight[CTable[n].nbBits]; 153 154 /* attempt weights compression by FSE */ 155 { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); 156 if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ 157 op[0] = (BYTE)hSize; 158 return hSize+1; 159 } } 160 161 /* write raw values as 4-bits (max : 15) */ 162 if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ 163 if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ 164 op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); 165 huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ 166 for (n=0; n<maxSymbolValue; n+=2) 167 op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); 168 return ((maxSymbolValue+1)/2) + 1; 169 } 170 171 172 size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src, size_t srcSize) 173 { 174 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ 175 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ 176 U32 tableLog = 0; 177 U32 nbSymbols = 0; 178 179 /* get symbol weights */ 180 CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); 181 182 /* check result */ 183 if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 184 if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); 185 186 /* Prepare base value per rank */ 187 { U32 n, nextRankStart = 0; 188 for (n=1; n<=tableLog; n++) { 189 U32 current = nextRankStart; 190 nextRankStart += (rankVal[n] << (n-1)); 191 rankVal[n] = current; 192 } } 193 194 /* fill nbBits */ 195 { U32 n; for (n=0; n<nbSymbols; n++) { 196 const U32 w = huffWeight[n]; 197 CTable[n].nbBits = (BYTE)(tableLog + 1 - w); 198 } } 199 200 /* fill val */ 201 { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ 202 U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; 203 { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; } 204 /* determine stating value per rank */ 205 valPerRank[tableLog+1] = 0; /* for w==0 */ 206 { U16 min = 0; 207 U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ 208 valPerRank[n] = min; /* get starting value within each rank */ 209 min += nbPerRank[n]; 210 min >>= 1; 211 } } 212 /* assign value within rank, symbol order */ 213 { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } 214 } 215 216 *maxSymbolValuePtr = nbSymbols - 1; 217 return readSize; 218 } 219 220 U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue) 221 { 222 const HUF_CElt* table = (const HUF_CElt*)symbolTable; 223 assert(symbolValue <= HUF_SYMBOLVALUE_MAX); 224 return table[symbolValue].nbBits; 225 } 226 227 228 typedef struct nodeElt_s { 229 U32 count; 230 U16 parent; 231 BYTE byte; 232 BYTE nbBits; 233 } nodeElt; 234 235 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) 236 { 237 const U32 largestBits = huffNode[lastNonNull].nbBits; 238 if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ 239 240 /* there are several too large elements (at least >= 2) */ 241 { int totalCost = 0; 242 const U32 baseCost = 1 << (largestBits - maxNbBits); 243 U32 n = lastNonNull; 244 245 while (huffNode[n].nbBits > maxNbBits) { 246 totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); 247 huffNode[n].nbBits = (BYTE)maxNbBits; 248 n --; 249 } /* n stops at huffNode[n].nbBits <= maxNbBits */ 250 while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ 251 252 /* renorm totalCost */ 253 totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ 254 255 /* repay normalized cost */ 256 { U32 const noSymbol = 0xF0F0F0F0; 257 U32 rankLast[HUF_TABLELOG_MAX+2]; 258 int pos; 259 260 /* Get pos of last (smallest) symbol per rank */ 261 memset(rankLast, 0xF0, sizeof(rankLast)); 262 { U32 currentNbBits = maxNbBits; 263 for (pos=n ; pos >= 0; pos--) { 264 if (huffNode[pos].nbBits >= currentNbBits) continue; 265 currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ 266 rankLast[maxNbBits-currentNbBits] = pos; 267 } } 268 269 while (totalCost > 0) { 270 U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; 271 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { 272 U32 highPos = rankLast[nBitsToDecrease]; 273 U32 lowPos = rankLast[nBitsToDecrease-1]; 274 if (highPos == noSymbol) continue; 275 if (lowPos == noSymbol) break; 276 { U32 const highTotal = huffNode[highPos].count; 277 U32 const lowTotal = 2 * huffNode[lowPos].count; 278 if (highTotal <= lowTotal) break; 279 } } 280 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ 281 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ 282 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) 283 nBitsToDecrease ++; 284 totalCost -= 1 << (nBitsToDecrease-1); 285 if (rankLast[nBitsToDecrease-1] == noSymbol) 286 rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ 287 huffNode[rankLast[nBitsToDecrease]].nbBits ++; 288 if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ 289 rankLast[nBitsToDecrease] = noSymbol; 290 else { 291 rankLast[nBitsToDecrease]--; 292 if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) 293 rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ 294 } } /* while (totalCost > 0) */ 295 296 while (totalCost < 0) { /* Sometimes, cost correction overshoot */ 297 if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ 298 while (huffNode[n].nbBits == maxNbBits) n--; 299 huffNode[n+1].nbBits--; 300 rankLast[1] = n+1; 301 totalCost++; 302 continue; 303 } 304 huffNode[ rankLast[1] + 1 ].nbBits--; 305 rankLast[1]++; 306 totalCost ++; 307 } } } /* there are several too large elements (at least >= 2) */ 308 309 return maxNbBits; 310 } 311 312 313 typedef struct { 314 U32 base; 315 U32 current; 316 } rankPos; 317 318 static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) 319 { 320 rankPos rank[32]; 321 U32 n; 322 323 memset(rank, 0, sizeof(rank)); 324 for (n=0; n<=maxSymbolValue; n++) { 325 U32 r = BIT_highbit32(count[n] + 1); 326 rank[r].base ++; 327 } 328 for (n=30; n>0; n--) rank[n-1].base += rank[n].base; 329 for (n=0; n<32; n++) rank[n].current = rank[n].base; 330 for (n=0; n<=maxSymbolValue; n++) { 331 U32 const c = count[n]; 332 U32 const r = BIT_highbit32(c+1) + 1; 333 U32 pos = rank[r].current++; 334 while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { 335 huffNode[pos] = huffNode[pos-1]; 336 pos--; 337 } 338 huffNode[pos].count = c; 339 huffNode[pos].byte = (BYTE)n; 340 } 341 } 342 343 344 /** HUF_buildCTable_wksp() : 345 * Same as HUF_buildCTable(), but using externally allocated scratch buffer. 346 * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. 347 */ 348 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) 349 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; 350 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) 351 { 352 nodeElt* const huffNode0 = (nodeElt*)workSpace; 353 nodeElt* const huffNode = huffNode0+1; 354 U32 n, nonNullRank; 355 int lowS, lowN; 356 U16 nodeNb = STARTNODE; 357 U32 nodeRoot; 358 359 /* safety checks */ 360 if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 361 if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); 362 if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; 363 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 364 memset(huffNode0, 0, sizeof(huffNodeTable)); 365 366 /* sort, decreasing order */ 367 HUF_sort(huffNode, count, maxSymbolValue); 368 369 /* init for parents */ 370 nonNullRank = maxSymbolValue; 371 while(huffNode[nonNullRank].count == 0) nonNullRank--; 372 lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; 373 huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; 374 huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; 375 nodeNb++; lowS-=2; 376 for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); 377 huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ 378 379 /* create parents */ 380 while (nodeNb <= nodeRoot) { 381 U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 382 U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 383 huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; 384 huffNode[n1].parent = huffNode[n2].parent = nodeNb; 385 nodeNb++; 386 } 387 388 /* distribute weights (unlimited tree height) */ 389 huffNode[nodeRoot].nbBits = 0; 390 for (n=nodeRoot-1; n>=STARTNODE; n--) 391 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 392 for (n=0; n<=nonNullRank; n++) 393 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 394 395 /* enforce maxTableLog */ 396 maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); 397 398 /* fill result into tree (val, nbBits) */ 399 { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; 400 U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; 401 if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ 402 for (n=0; n<=nonNullRank; n++) 403 nbPerRank[huffNode[n].nbBits]++; 404 /* determine stating value per rank */ 405 { U16 min = 0; 406 for (n=maxNbBits; n>0; n--) { 407 valPerRank[n] = min; /* get starting value within each rank */ 408 min += nbPerRank[n]; 409 min >>= 1; 410 } } 411 for (n=0; n<=maxSymbolValue; n++) 412 tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ 413 for (n=0; n<=maxSymbolValue; n++) 414 tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ 415 } 416 417 return maxNbBits; 418 } 419 420 /** HUF_buildCTable() : 421 * @return : maxNbBits 422 * Note : count is used before tree is written, so they can safely overlap 423 */ 424 size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) 425 { 426 huffNodeTable nodeTable; 427 return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); 428 } 429 430 static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) 431 { 432 size_t nbBits = 0; 433 int s; 434 for (s = 0; s <= (int)maxSymbolValue; ++s) { 435 nbBits += CTable[s].nbBits * count[s]; 436 } 437 return nbBits >> 3; 438 } 439 440 static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { 441 int bad = 0; 442 int s; 443 for (s = 0; s <= (int)maxSymbolValue; ++s) { 444 bad |= (count[s] != 0) & (CTable[s].nbBits == 0); 445 } 446 return !bad; 447 } 448 449 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } 450 451 FORCE_INLINE_TEMPLATE void 452 HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) 453 { 454 BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); 455 } 456 457 #define HUF_FLUSHBITS(s) BIT_flushBits(s) 458 459 #define HUF_FLUSHBITS_1(stream) \ 460 if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) 461 462 #define HUF_FLUSHBITS_2(stream) \ 463 if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) 464 465 FORCE_INLINE_TEMPLATE size_t 466 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, 467 const void* src, size_t srcSize, 468 const HUF_CElt* CTable) 469 { 470 const BYTE* ip = (const BYTE*) src; 471 BYTE* const ostart = (BYTE*)dst; 472 BYTE* const oend = ostart + dstSize; 473 BYTE* op = ostart; 474 size_t n; 475 BIT_CStream_t bitC; 476 477 /* init */ 478 if (dstSize < 8) return 0; /* not enough space to compress */ 479 { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); 480 if (HUF_isError(initErr)) return 0; } 481 482 n = srcSize & ~3; /* join to mod 4 */ 483 switch (srcSize & 3) 484 { 485 case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); 486 HUF_FLUSHBITS_2(&bitC); 487 /* fall-through */ 488 case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); 489 HUF_FLUSHBITS_1(&bitC); 490 /* fall-through */ 491 case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); 492 HUF_FLUSHBITS(&bitC); 493 /* fall-through */ 494 case 0 : /* fall-through */ 495 default: break; 496 } 497 498 for (; n>0; n-=4) { /* note : n&3==0 at this stage */ 499 HUF_encodeSymbol(&bitC, ip[n- 1], CTable); 500 HUF_FLUSHBITS_1(&bitC); 501 HUF_encodeSymbol(&bitC, ip[n- 2], CTable); 502 HUF_FLUSHBITS_2(&bitC); 503 HUF_encodeSymbol(&bitC, ip[n- 3], CTable); 504 HUF_FLUSHBITS_1(&bitC); 505 HUF_encodeSymbol(&bitC, ip[n- 4], CTable); 506 HUF_FLUSHBITS(&bitC); 507 } 508 509 return BIT_closeCStream(&bitC); 510 } 511 512 #if DYNAMIC_BMI2 513 514 static TARGET_ATTRIBUTE("bmi2") size_t 515 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, 516 const void* src, size_t srcSize, 517 const HUF_CElt* CTable) 518 { 519 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 520 } 521 522 static size_t 523 HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, 524 const void* src, size_t srcSize, 525 const HUF_CElt* CTable) 526 { 527 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 528 } 529 530 static size_t 531 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 532 const void* src, size_t srcSize, 533 const HUF_CElt* CTable, const int bmi2) 534 { 535 if (bmi2) { 536 return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); 537 } 538 return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); 539 } 540 541 #else 542 543 static size_t 544 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 545 const void* src, size_t srcSize, 546 const HUF_CElt* CTable, const int bmi2) 547 { 548 (void)bmi2; 549 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 550 } 551 552 #endif 553 554 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 555 { 556 return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 557 } 558 559 560 static size_t 561 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, 562 const void* src, size_t srcSize, 563 const HUF_CElt* CTable, int bmi2) 564 { 565 size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ 566 const BYTE* ip = (const BYTE*) src; 567 const BYTE* const iend = ip + srcSize; 568 BYTE* const ostart = (BYTE*) dst; 569 BYTE* const oend = ostart + dstSize; 570 BYTE* op = ostart; 571 572 if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ 573 if (srcSize < 12) return 0; /* no saving possible : too small input */ 574 op += 6; /* jumpTable */ 575 576 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); 577 if (cSize==0) return 0; 578 assert(cSize <= 65535); 579 MEM_writeLE16(ostart, (U16)cSize); 580 op += cSize; 581 } 582 583 ip += segmentSize; 584 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); 585 if (cSize==0) return 0; 586 assert(cSize <= 65535); 587 MEM_writeLE16(ostart+2, (U16)cSize); 588 op += cSize; 589 } 590 591 ip += segmentSize; 592 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); 593 if (cSize==0) return 0; 594 assert(cSize <= 65535); 595 MEM_writeLE16(ostart+4, (U16)cSize); 596 op += cSize; 597 } 598 599 ip += segmentSize; 600 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); 601 if (cSize==0) return 0; 602 op += cSize; 603 } 604 605 return op-ostart; 606 } 607 608 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 609 { 610 return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 611 } 612 613 614 static size_t HUF_compressCTable_internal( 615 BYTE* const ostart, BYTE* op, BYTE* const oend, 616 const void* src, size_t srcSize, 617 unsigned singleStream, const HUF_CElt* CTable, const int bmi2) 618 { 619 size_t const cSize = singleStream ? 620 HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : 621 HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); 622 if (HUF_isError(cSize)) { return cSize; } 623 if (cSize==0) { return 0; } /* uncompressible */ 624 op += cSize; 625 /* check compressibility */ 626 if ((size_t)(op-ostart) >= srcSize-1) { return 0; } 627 return op-ostart; 628 } 629 630 typedef struct { 631 U32 count[HUF_SYMBOLVALUE_MAX + 1]; 632 HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; 633 huffNodeTable nodeTable; 634 } HUF_compress_tables_t; 635 636 /* HUF_compress_internal() : 637 * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ 638 static size_t HUF_compress_internal ( 639 void* dst, size_t dstSize, 640 const void* src, size_t srcSize, 641 unsigned maxSymbolValue, unsigned huffLog, 642 unsigned singleStream, 643 void* workSpace, size_t wkspSize, 644 HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, 645 const int bmi2) 646 { 647 HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; 648 BYTE* const ostart = (BYTE*)dst; 649 BYTE* const oend = ostart + dstSize; 650 BYTE* op = ostart; 651 652 /* checks & inits */ 653 if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 654 if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); 655 if (!srcSize) return 0; /* Uncompressed */ 656 if (!dstSize) return 0; /* cannot fit anything within dst budget */ 657 if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ 658 if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 659 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 660 if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; 661 if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; 662 663 /* Heuristic : If old table is valid, use it for small inputs */ 664 if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { 665 return HUF_compressCTable_internal(ostart, op, oend, 666 src, srcSize, 667 singleStream, oldHufTable, bmi2); 668 } 669 670 /* Scan input and build symbol stats */ 671 { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) ); 672 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ 673 if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ 674 } 675 676 /* Check validity of previous table */ 677 if ( repeat 678 && *repeat == HUF_repeat_check 679 && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { 680 *repeat = HUF_repeat_none; 681 } 682 /* Heuristic : use existing table for small inputs */ 683 if (preferRepeat && repeat && *repeat != HUF_repeat_none) { 684 return HUF_compressCTable_internal(ostart, op, oend, 685 src, srcSize, 686 singleStream, oldHufTable, bmi2); 687 } 688 689 /* Build Huffman Tree */ 690 huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); 691 { CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count, 692 maxSymbolValue, huffLog, 693 table->nodeTable, sizeof(table->nodeTable)) ); 694 huffLog = (U32)maxBits; 695 /* Zero unused symbols in CTable, so we can check it for validity */ 696 memset(table->CTable + (maxSymbolValue + 1), 0, 697 sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); 698 } 699 700 /* Write table description header */ 701 { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); 702 /* Check if using previous huffman table is beneficial */ 703 if (repeat && *repeat != HUF_repeat_none) { 704 size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); 705 size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); 706 if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { 707 return HUF_compressCTable_internal(ostart, op, oend, 708 src, srcSize, 709 singleStream, oldHufTable, bmi2); 710 } } 711 712 /* Use the new huffman table */ 713 if (hSize + 12ul >= srcSize) { return 0; } 714 op += hSize; 715 if (repeat) { *repeat = HUF_repeat_none; } 716 if (oldHufTable) 717 memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ 718 } 719 return HUF_compressCTable_internal(ostart, op, oend, 720 src, srcSize, 721 singleStream, table->CTable, bmi2); 722 } 723 724 725 size_t HUF_compress1X_wksp (void* dst, size_t dstSize, 726 const void* src, size_t srcSize, 727 unsigned maxSymbolValue, unsigned huffLog, 728 void* workSpace, size_t wkspSize) 729 { 730 return HUF_compress_internal(dst, dstSize, src, srcSize, 731 maxSymbolValue, huffLog, 1 /*single stream*/, 732 workSpace, wkspSize, 733 NULL, NULL, 0, 0 /*bmi2*/); 734 } 735 736 size_t HUF_compress1X_repeat (void* dst, size_t dstSize, 737 const void* src, size_t srcSize, 738 unsigned maxSymbolValue, unsigned huffLog, 739 void* workSpace, size_t wkspSize, 740 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) 741 { 742 return HUF_compress_internal(dst, dstSize, src, srcSize, 743 maxSymbolValue, huffLog, 1 /*single stream*/, 744 workSpace, wkspSize, hufTable, 745 repeat, preferRepeat, bmi2); 746 } 747 748 size_t HUF_compress1X (void* dst, size_t dstSize, 749 const void* src, size_t srcSize, 750 unsigned maxSymbolValue, unsigned huffLog) 751 { 752 unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; 753 return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); 754 } 755 756 /* HUF_compress4X_repeat(): 757 * compress input using 4 streams. 758 * provide workspace to generate compression tables */ 759 size_t HUF_compress4X_wksp (void* dst, size_t dstSize, 760 const void* src, size_t srcSize, 761 unsigned maxSymbolValue, unsigned huffLog, 762 void* workSpace, size_t wkspSize) 763 { 764 return HUF_compress_internal(dst, dstSize, src, srcSize, 765 maxSymbolValue, huffLog, 0 /*4 streams*/, 766 workSpace, wkspSize, 767 NULL, NULL, 0, 0 /*bmi2*/); 768 } 769 770 /* HUF_compress4X_repeat(): 771 * compress input using 4 streams. 772 * re-use an existing huffman compression table */ 773 size_t HUF_compress4X_repeat (void* dst, size_t dstSize, 774 const void* src, size_t srcSize, 775 unsigned maxSymbolValue, unsigned huffLog, 776 void* workSpace, size_t wkspSize, 777 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) 778 { 779 return HUF_compress_internal(dst, dstSize, src, srcSize, 780 maxSymbolValue, huffLog, 0 /* 4 streams */, 781 workSpace, wkspSize, 782 hufTable, repeat, preferRepeat, bmi2); 783 } 784 785 size_t HUF_compress2 (void* dst, size_t dstSize, 786 const void* src, size_t srcSize, 787 unsigned maxSymbolValue, unsigned huffLog) 788 { 789 unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; 790 return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); 791 } 792 793 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) 794 { 795 return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); 796 } 797