1 /* ****************************************************************** 2 * Huffman encoder, part of New Generation Entropy library 3 * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. 4 * 5 * You can contact the author at : 6 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 7 * - Public forum : https://groups.google.com/forum/#!forum/lz4c 8 * 9 * This source code is licensed under both the BSD-style license (found in the 10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11 * in the COPYING file in the root directory of this source tree). 12 * You may select, at your option, one of the above-listed licenses. 13 ****************************************************************** */ 14 15 /* ************************************************************** 16 * Compiler specifics 17 ****************************************************************/ 18 #ifdef _MSC_VER /* Visual Studio */ 19 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 20 #endif 21 22 23 /* ************************************************************** 24 * Includes 25 ****************************************************************/ 26 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ 27 #include "../common/compiler.h" 28 #include "../common/bitstream.h" 29 #include "hist.h" 30 #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ 31 #include "../common/fse.h" /* header compression */ 32 #define HUF_STATIC_LINKING_ONLY 33 #include "../common/huf.h" 34 #include "../common/error_private.h" 35 36 37 /* ************************************************************** 38 * Error Management 39 ****************************************************************/ 40 #define HUF_isError ERR_isError 41 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ 42 43 44 /* ************************************************************** 45 * Utils 46 ****************************************************************/ 47 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) 48 { 49 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); 50 } 51 52 53 /* ******************************************************* 54 * HUF : Huffman block compression 55 *********************************************************/ 56 /* HUF_compressWeights() : 57 * Same as FSE_compress(), but dedicated to huff0's weights compression. 58 * The use case needs much less stack memory. 59 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. 60 */ 61 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 62 static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) 63 { 64 BYTE* const ostart = (BYTE*) dst; 65 BYTE* op = ostart; 66 BYTE* const oend = ostart + dstSize; 67 68 unsigned maxSymbolValue = HUF_TABLELOG_MAX; 69 U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; 70 71 FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; 72 U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)]; 73 74 unsigned count[HUF_TABLELOG_MAX+1]; 75 S16 norm[HUF_TABLELOG_MAX+1]; 76 77 /* init conditions */ 78 if (wtSize <= 1) return 0; /* Not compressible */ 79 80 /* Scan input and build symbol stats */ 81 { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ 82 if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ 83 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ 84 } 85 86 tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); 87 CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) ); 88 89 /* Write table description header */ 90 { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) ); 91 op += hSize; 92 } 93 94 /* Compress */ 95 CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); 96 { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) ); 97 if (cSize == 0) return 0; /* not enough space for compressed data */ 98 op += cSize; 99 } 100 101 return (size_t)(op-ostart); 102 } 103 104 105 /*! HUF_writeCTable() : 106 `CTable` : Huffman tree to save, using huf representation. 107 @return : size of saved CTable */ 108 size_t HUF_writeCTable (void* dst, size_t maxDstSize, 109 const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) 110 { 111 BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ 112 BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; 113 BYTE* op = (BYTE*)dst; 114 U32 n; 115 116 /* check conditions */ 117 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 118 119 /* convert to weight */ 120 bitsToWeight[0] = 0; 121 for (n=1; n<huffLog+1; n++) 122 bitsToWeight[n] = (BYTE)(huffLog + 1 - n); 123 for (n=0; n<maxSymbolValue; n++) 124 huffWeight[n] = bitsToWeight[CTable[n].nbBits]; 125 126 /* attempt weights compression by FSE */ 127 { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); 128 if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ 129 op[0] = (BYTE)hSize; 130 return hSize+1; 131 } } 132 133 /* write raw values as 4-bits (max : 15) */ 134 if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ 135 if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ 136 op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); 137 huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ 138 for (n=0; n<maxSymbolValue; n+=2) 139 op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); 140 return ((maxSymbolValue+1)/2) + 1; 141 } 142 143 144 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) 145 { 146 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ 147 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ 148 U32 tableLog = 0; 149 U32 nbSymbols = 0; 150 151 /* get symbol weights */ 152 CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); 153 *hasZeroWeights = (rankVal[0] > 0); 154 155 /* check result */ 156 if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 157 if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); 158 159 /* Prepare base value per rank */ 160 { U32 n, nextRankStart = 0; 161 for (n=1; n<=tableLog; n++) { 162 U32 curr = nextRankStart; 163 nextRankStart += (rankVal[n] << (n-1)); 164 rankVal[n] = curr; 165 } } 166 167 /* fill nbBits */ 168 { U32 n; for (n=0; n<nbSymbols; n++) { 169 const U32 w = huffWeight[n]; 170 CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0); 171 } } 172 173 /* fill val */ 174 { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ 175 U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; 176 { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; } 177 /* determine stating value per rank */ 178 valPerRank[tableLog+1] = 0; /* for w==0 */ 179 { U16 min = 0; 180 U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ 181 valPerRank[n] = min; /* get starting value within each rank */ 182 min += nbPerRank[n]; 183 min >>= 1; 184 } } 185 /* assign value within rank, symbol order */ 186 { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } 187 } 188 189 *maxSymbolValuePtr = nbSymbols - 1; 190 return readSize; 191 } 192 193 U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue) 194 { 195 const HUF_CElt* table = (const HUF_CElt*)symbolTable; 196 assert(symbolValue <= HUF_SYMBOLVALUE_MAX); 197 return table[symbolValue].nbBits; 198 } 199 200 201 typedef struct nodeElt_s { 202 U32 count; 203 U16 parent; 204 BYTE byte; 205 BYTE nbBits; 206 } nodeElt; 207 208 /** 209 * HUF_setMaxHeight(): 210 * Enforces maxNbBits on the Huffman tree described in huffNode. 211 * 212 * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts 213 * the tree to so that it is a valid canonical Huffman tree. 214 * 215 * @pre The sum of the ranks of each symbol == 2^largestBits, 216 * where largestBits == huffNode[lastNonNull].nbBits. 217 * @post The sum of the ranks of each symbol == 2^largestBits, 218 * where largestBits is the return value <= maxNbBits. 219 * 220 * @param huffNode The Huffman tree modified in place to enforce maxNbBits. 221 * @param lastNonNull The symbol with the lowest count in the Huffman tree. 222 * @param maxNbBits The maximum allowed number of bits, which the Huffman tree 223 * may not respect. After this function the Huffman tree will 224 * respect maxNbBits. 225 * @return The maximum number of bits of the Huffman tree after adjustment, 226 * necessarily no more than maxNbBits. 227 */ 228 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) 229 { 230 const U32 largestBits = huffNode[lastNonNull].nbBits; 231 /* early exit : no elt > maxNbBits, so the tree is already valid. */ 232 if (largestBits <= maxNbBits) return largestBits; 233 234 /* there are several too large elements (at least >= 2) */ 235 { int totalCost = 0; 236 const U32 baseCost = 1 << (largestBits - maxNbBits); 237 int n = (int)lastNonNull; 238 239 /* Adjust any ranks > maxNbBits to maxNbBits. 240 * Compute totalCost, which is how far the sum of the ranks is 241 * we are over 2^largestBits after adjust the offending ranks. 242 */ 243 while (huffNode[n].nbBits > maxNbBits) { 244 totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); 245 huffNode[n].nbBits = (BYTE)maxNbBits; 246 n--; 247 } 248 /* n stops at huffNode[n].nbBits <= maxNbBits */ 249 assert(huffNode[n].nbBits <= maxNbBits); 250 /* n end at index of smallest symbol using < maxNbBits */ 251 while (huffNode[n].nbBits == maxNbBits) --n; 252 253 /* renorm totalCost from 2^largestBits to 2^maxNbBits 254 * note : totalCost is necessarily a multiple of baseCost */ 255 assert((totalCost & (baseCost - 1)) == 0); 256 totalCost >>= (largestBits - maxNbBits); 257 assert(totalCost > 0); 258 259 /* repay normalized cost */ 260 { U32 const noSymbol = 0xF0F0F0F0; 261 U32 rankLast[HUF_TABLELOG_MAX+2]; 262 263 /* Get pos of last (smallest = lowest cum. count) symbol per rank */ 264 ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); 265 { U32 currentNbBits = maxNbBits; 266 int pos; 267 for (pos=n ; pos >= 0; pos--) { 268 if (huffNode[pos].nbBits >= currentNbBits) continue; 269 currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ 270 rankLast[maxNbBits-currentNbBits] = (U32)pos; 271 } } 272 273 while (totalCost > 0) { 274 /* Try to reduce the next power of 2 above totalCost because we 275 * gain back half the rank. 276 */ 277 U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; 278 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { 279 U32 const highPos = rankLast[nBitsToDecrease]; 280 U32 const lowPos = rankLast[nBitsToDecrease-1]; 281 if (highPos == noSymbol) continue; 282 /* Decrease highPos if no symbols of lowPos or if it is 283 * not cheaper to remove 2 lowPos than highPos. 284 */ 285 if (lowPos == noSymbol) break; 286 { U32 const highTotal = huffNode[highPos].count; 287 U32 const lowTotal = 2 * huffNode[lowPos].count; 288 if (highTotal <= lowTotal) break; 289 } } 290 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ 291 assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1); 292 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ 293 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) 294 nBitsToDecrease++; 295 assert(rankLast[nBitsToDecrease] != noSymbol); 296 /* Increase the number of bits to gain back half the rank cost. */ 297 totalCost -= 1 << (nBitsToDecrease-1); 298 huffNode[rankLast[nBitsToDecrease]].nbBits++; 299 300 /* Fix up the new rank. 301 * If the new rank was empty, this symbol is now its smallest. 302 * Otherwise, this symbol will be the largest in the new rank so no adjustment. 303 */ 304 if (rankLast[nBitsToDecrease-1] == noSymbol) 305 rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; 306 /* Fix up the old rank. 307 * If the symbol was at position 0, meaning it was the highest weight symbol in the tree, 308 * it must be the only symbol in its rank, so the old rank now has no symbols. 309 * Otherwise, since the Huffman nodes are sorted by count, the previous position is now 310 * the smallest node in the rank. If the previous position belongs to a different rank, 311 * then the rank is now empty. 312 */ 313 if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ 314 rankLast[nBitsToDecrease] = noSymbol; 315 else { 316 rankLast[nBitsToDecrease]--; 317 if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) 318 rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ 319 } 320 } /* while (totalCost > 0) */ 321 322 /* If we've removed too much weight, then we have to add it back. 323 * To avoid overshooting again, we only adjust the smallest rank. 324 * We take the largest nodes from the lowest rank 0 and move them 325 * to rank 1. There's guaranteed to be enough rank 0 symbols because 326 * TODO. 327 */ 328 while (totalCost < 0) { /* Sometimes, cost correction overshoot */ 329 /* special case : no rank 1 symbol (using maxNbBits-1); 330 * let's create one from largest rank 0 (using maxNbBits). 331 */ 332 if (rankLast[1] == noSymbol) { 333 while (huffNode[n].nbBits == maxNbBits) n--; 334 huffNode[n+1].nbBits--; 335 assert(n >= 0); 336 rankLast[1] = (U32)(n+1); 337 totalCost++; 338 continue; 339 } 340 huffNode[ rankLast[1] + 1 ].nbBits--; 341 rankLast[1]++; 342 totalCost ++; 343 } 344 } /* repay normalized cost */ 345 } /* there are several too large elements (at least >= 2) */ 346 347 return maxNbBits; 348 } 349 350 typedef struct { 351 U32 base; 352 U32 curr; 353 } rankPos; 354 355 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; 356 357 #define RANK_POSITION_TABLE_SIZE 32 358 359 typedef struct { 360 huffNodeTable huffNodeTbl; 361 rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; 362 } HUF_buildCTable_wksp_tables; 363 364 /** 365 * HUF_sort(): 366 * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. 367 * 368 * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. 369 * Must have (maxSymbolValue + 1) entries. 370 * @param[in] count Histogram of the symbols. 371 * @param[in] maxSymbolValue Maximum symbol value. 372 * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. 373 */ 374 static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) 375 { 376 int n; 377 int const maxSymbolValue1 = (int)maxSymbolValue + 1; 378 379 /* Compute base and set curr to base. 380 * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1. 381 * Then 2^lowerRank <= count[n]+1 <= 2^rank. 382 * We attribute each symbol to lowerRank's base value, because we want to know where 383 * each rank begins in the output, so for rank R we want to count ranks R+1 and above. 384 */ 385 ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); 386 for (n = 0; n < maxSymbolValue1; ++n) { 387 U32 lowerRank = BIT_highbit32(count[n] + 1); 388 rankPosition[lowerRank].base++; 389 } 390 assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0); 391 for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) { 392 rankPosition[n-1].base += rankPosition[n].base; 393 rankPosition[n-1].curr = rankPosition[n-1].base; 394 } 395 /* Sort */ 396 for (n = 0; n < maxSymbolValue1; ++n) { 397 U32 const c = count[n]; 398 U32 const r = BIT_highbit32(c+1) + 1; 399 U32 pos = rankPosition[r].curr++; 400 /* Insert into the correct position in the rank. 401 * We have at most 256 symbols, so this insertion should be fine. 402 */ 403 while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { 404 huffNode[pos] = huffNode[pos-1]; 405 pos--; 406 } 407 huffNode[pos].count = c; 408 huffNode[pos].byte = (BYTE)n; 409 } 410 } 411 412 413 /** HUF_buildCTable_wksp() : 414 * Same as HUF_buildCTable(), but using externally allocated scratch buffer. 415 * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). 416 */ 417 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) 418 419 /* HUF_buildTree(): 420 * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree. 421 * 422 * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array. 423 * @param maxSymbolValue The maximum symbol value. 424 * @return The smallest node in the Huffman tree (by count). 425 */ 426 static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) 427 { 428 nodeElt* const huffNode0 = huffNode - 1; 429 int nonNullRank; 430 int lowS, lowN; 431 int nodeNb = STARTNODE; 432 int n, nodeRoot; 433 /* init for parents */ 434 nonNullRank = (int)maxSymbolValue; 435 while(huffNode[nonNullRank].count == 0) nonNullRank--; 436 lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; 437 huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; 438 huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; 439 nodeNb++; lowS-=2; 440 for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); 441 huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ 442 443 /* create parents */ 444 while (nodeNb <= nodeRoot) { 445 int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 446 int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 447 huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; 448 huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; 449 nodeNb++; 450 } 451 452 /* distribute weights (unlimited tree height) */ 453 huffNode[nodeRoot].nbBits = 0; 454 for (n=nodeRoot-1; n>=STARTNODE; n--) 455 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 456 for (n=0; n<=nonNullRank; n++) 457 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 458 459 return nonNullRank; 460 } 461 462 /** 463 * HUF_buildCTableFromTree(): 464 * Build the CTable given the Huffman tree in huffNode. 465 * 466 * @param[out] CTable The output Huffman CTable. 467 * @param huffNode The Huffman tree. 468 * @param nonNullRank The last and smallest node in the Huffman tree. 469 * @param maxSymbolValue The maximum symbol value. 470 * @param maxNbBits The exact maximum number of bits used in the Huffman tree. 471 */ 472 static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) 473 { 474 /* fill result into ctable (val, nbBits) */ 475 int n; 476 U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; 477 U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; 478 int const alphabetSize = (int)(maxSymbolValue + 1); 479 for (n=0; n<=nonNullRank; n++) 480 nbPerRank[huffNode[n].nbBits]++; 481 /* determine starting value per rank */ 482 { U16 min = 0; 483 for (n=(int)maxNbBits; n>0; n--) { 484 valPerRank[n] = min; /* get starting value within each rank */ 485 min += nbPerRank[n]; 486 min >>= 1; 487 } } 488 for (n=0; n<alphabetSize; n++) 489 CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ 490 for (n=0; n<alphabetSize; n++) 491 CTable[n].val = valPerRank[CTable[n].nbBits]++; /* assign value within rank, symbol order */ 492 } 493 494 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) 495 { 496 HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace; 497 nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; 498 nodeElt* const huffNode = huffNode0+1; 499 int nonNullRank; 500 501 /* safety checks */ 502 if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 503 if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) 504 return ERROR(workSpace_tooSmall); 505 if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; 506 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) 507 return ERROR(maxSymbolValue_tooLarge); 508 ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); 509 510 /* sort, decreasing order */ 511 HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); 512 513 /* build tree */ 514 nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); 515 516 /* enforce maxTableLog */ 517 maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); 518 if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ 519 520 HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits); 521 522 return maxNbBits; 523 } 524 525 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) 526 { 527 size_t nbBits = 0; 528 int s; 529 for (s = 0; s <= (int)maxSymbolValue; ++s) { 530 nbBits += CTable[s].nbBits * count[s]; 531 } 532 return nbBits >> 3; 533 } 534 535 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { 536 int bad = 0; 537 int s; 538 for (s = 0; s <= (int)maxSymbolValue; ++s) { 539 bad |= (count[s] != 0) & (CTable[s].nbBits == 0); 540 } 541 return !bad; 542 } 543 544 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } 545 546 FORCE_INLINE_TEMPLATE void 547 HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) 548 { 549 BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); 550 } 551 552 #define HUF_FLUSHBITS(s) BIT_flushBits(s) 553 554 #define HUF_FLUSHBITS_1(stream) \ 555 if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) 556 557 #define HUF_FLUSHBITS_2(stream) \ 558 if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) 559 560 FORCE_INLINE_TEMPLATE size_t 561 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, 562 const void* src, size_t srcSize, 563 const HUF_CElt* CTable) 564 { 565 const BYTE* ip = (const BYTE*) src; 566 BYTE* const ostart = (BYTE*)dst; 567 BYTE* const oend = ostart + dstSize; 568 BYTE* op = ostart; 569 size_t n; 570 BIT_CStream_t bitC; 571 572 /* init */ 573 if (dstSize < 8) return 0; /* not enough space to compress */ 574 { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); 575 if (HUF_isError(initErr)) return 0; } 576 577 n = srcSize & ~3; /* join to mod 4 */ 578 switch (srcSize & 3) 579 { 580 case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); 581 HUF_FLUSHBITS_2(&bitC); 582 /* fall-through */ 583 case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); 584 HUF_FLUSHBITS_1(&bitC); 585 /* fall-through */ 586 case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); 587 HUF_FLUSHBITS(&bitC); 588 /* fall-through */ 589 case 0 : /* fall-through */ 590 default: break; 591 } 592 593 for (; n>0; n-=4) { /* note : n&3==0 at this stage */ 594 HUF_encodeSymbol(&bitC, ip[n- 1], CTable); 595 HUF_FLUSHBITS_1(&bitC); 596 HUF_encodeSymbol(&bitC, ip[n- 2], CTable); 597 HUF_FLUSHBITS_2(&bitC); 598 HUF_encodeSymbol(&bitC, ip[n- 3], CTable); 599 HUF_FLUSHBITS_1(&bitC); 600 HUF_encodeSymbol(&bitC, ip[n- 4], CTable); 601 HUF_FLUSHBITS(&bitC); 602 } 603 604 return BIT_closeCStream(&bitC); 605 } 606 607 #if DYNAMIC_BMI2 608 609 static TARGET_ATTRIBUTE("bmi2") size_t 610 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, 611 const void* src, size_t srcSize, 612 const HUF_CElt* CTable) 613 { 614 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 615 } 616 617 static size_t 618 HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, 619 const void* src, size_t srcSize, 620 const HUF_CElt* CTable) 621 { 622 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 623 } 624 625 static size_t 626 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 627 const void* src, size_t srcSize, 628 const HUF_CElt* CTable, const int bmi2) 629 { 630 if (bmi2) { 631 return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); 632 } 633 return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); 634 } 635 636 #else 637 638 static size_t 639 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 640 const void* src, size_t srcSize, 641 const HUF_CElt* CTable, const int bmi2) 642 { 643 (void)bmi2; 644 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 645 } 646 647 #endif 648 649 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 650 { 651 return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 652 } 653 654 655 static size_t 656 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, 657 const void* src, size_t srcSize, 658 const HUF_CElt* CTable, int bmi2) 659 { 660 size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ 661 const BYTE* ip = (const BYTE*) src; 662 const BYTE* const iend = ip + srcSize; 663 BYTE* const ostart = (BYTE*) dst; 664 BYTE* const oend = ostart + dstSize; 665 BYTE* op = ostart; 666 667 if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ 668 if (srcSize < 12) return 0; /* no saving possible : too small input */ 669 op += 6; /* jumpTable */ 670 671 assert(op <= oend); 672 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); 673 if (cSize==0) return 0; 674 assert(cSize <= 65535); 675 MEM_writeLE16(ostart, (U16)cSize); 676 op += cSize; 677 } 678 679 ip += segmentSize; 680 assert(op <= oend); 681 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); 682 if (cSize==0) return 0; 683 assert(cSize <= 65535); 684 MEM_writeLE16(ostart+2, (U16)cSize); 685 op += cSize; 686 } 687 688 ip += segmentSize; 689 assert(op <= oend); 690 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); 691 if (cSize==0) return 0; 692 assert(cSize <= 65535); 693 MEM_writeLE16(ostart+4, (U16)cSize); 694 op += cSize; 695 } 696 697 ip += segmentSize; 698 assert(op <= oend); 699 assert(ip <= iend); 700 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); 701 if (cSize==0) return 0; 702 op += cSize; 703 } 704 705 return (size_t)(op-ostart); 706 } 707 708 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 709 { 710 return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 711 } 712 713 typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; 714 715 static size_t HUF_compressCTable_internal( 716 BYTE* const ostart, BYTE* op, BYTE* const oend, 717 const void* src, size_t srcSize, 718 HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) 719 { 720 size_t const cSize = (nbStreams==HUF_singleStream) ? 721 HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : 722 HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); 723 if (HUF_isError(cSize)) { return cSize; } 724 if (cSize==0) { return 0; } /* uncompressible */ 725 op += cSize; 726 /* check compressibility */ 727 assert(op >= ostart); 728 if ((size_t)(op-ostart) >= srcSize-1) { return 0; } 729 return (size_t)(op-ostart); 730 } 731 732 typedef struct { 733 unsigned count[HUF_SYMBOLVALUE_MAX + 1]; 734 HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; 735 HUF_buildCTable_wksp_tables buildCTable_wksp; 736 } HUF_compress_tables_t; 737 738 /* HUF_compress_internal() : 739 * `workSpace_align4` must be aligned on 4-bytes boundaries, 740 * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */ 741 static size_t 742 HUF_compress_internal (void* dst, size_t dstSize, 743 const void* src, size_t srcSize, 744 unsigned maxSymbolValue, unsigned huffLog, 745 HUF_nbStreams_e nbStreams, 746 void* workSpace_align4, size_t wkspSize, 747 HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, 748 const int bmi2) 749 { 750 HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4; 751 BYTE* const ostart = (BYTE*)dst; 752 BYTE* const oend = ostart + dstSize; 753 BYTE* op = ostart; 754 755 HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); 756 assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */ 757 758 /* checks & inits */ 759 if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); 760 if (!srcSize) return 0; /* Uncompressed */ 761 if (!dstSize) return 0; /* cannot fit anything within dst budget */ 762 if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ 763 if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 764 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 765 if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; 766 if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; 767 768 /* Heuristic : If old table is valid, use it for small inputs */ 769 if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { 770 return HUF_compressCTable_internal(ostart, op, oend, 771 src, srcSize, 772 nbStreams, oldHufTable, bmi2); 773 } 774 775 /* Scan input and build symbol stats */ 776 { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) ); 777 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ 778 if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ 779 } 780 781 /* Check validity of previous table */ 782 if ( repeat 783 && *repeat == HUF_repeat_check 784 && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { 785 *repeat = HUF_repeat_none; 786 } 787 /* Heuristic : use existing table for small inputs */ 788 if (preferRepeat && repeat && *repeat != HUF_repeat_none) { 789 return HUF_compressCTable_internal(ostart, op, oend, 790 src, srcSize, 791 nbStreams, oldHufTable, bmi2); 792 } 793 794 /* Build Huffman Tree */ 795 huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); 796 { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, 797 maxSymbolValue, huffLog, 798 &table->buildCTable_wksp, sizeof(table->buildCTable_wksp)); 799 CHECK_F(maxBits); 800 huffLog = (U32)maxBits; 801 /* Zero unused symbols in CTable, so we can check it for validity */ 802 ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0, 803 sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); 804 } 805 806 /* Write table description header */ 807 { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); 808 /* Check if using previous huffman table is beneficial */ 809 if (repeat && *repeat != HUF_repeat_none) { 810 size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); 811 size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); 812 if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { 813 return HUF_compressCTable_internal(ostart, op, oend, 814 src, srcSize, 815 nbStreams, oldHufTable, bmi2); 816 } } 817 818 /* Use the new huffman table */ 819 if (hSize + 12ul >= srcSize) { return 0; } 820 op += hSize; 821 if (repeat) { *repeat = HUF_repeat_none; } 822 if (oldHufTable) 823 ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ 824 } 825 return HUF_compressCTable_internal(ostart, op, oend, 826 src, srcSize, 827 nbStreams, table->CTable, bmi2); 828 } 829 830 831 size_t HUF_compress1X_wksp (void* dst, size_t dstSize, 832 const void* src, size_t srcSize, 833 unsigned maxSymbolValue, unsigned huffLog, 834 void* workSpace, size_t wkspSize) 835 { 836 return HUF_compress_internal(dst, dstSize, src, srcSize, 837 maxSymbolValue, huffLog, HUF_singleStream, 838 workSpace, wkspSize, 839 NULL, NULL, 0, 0 /*bmi2*/); 840 } 841 842 size_t HUF_compress1X_repeat (void* dst, size_t dstSize, 843 const void* src, size_t srcSize, 844 unsigned maxSymbolValue, unsigned huffLog, 845 void* workSpace, size_t wkspSize, 846 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) 847 { 848 return HUF_compress_internal(dst, dstSize, src, srcSize, 849 maxSymbolValue, huffLog, HUF_singleStream, 850 workSpace, wkspSize, hufTable, 851 repeat, preferRepeat, bmi2); 852 } 853 854 /* HUF_compress4X_repeat(): 855 * compress input using 4 streams. 856 * provide workspace to generate compression tables */ 857 size_t HUF_compress4X_wksp (void* dst, size_t dstSize, 858 const void* src, size_t srcSize, 859 unsigned maxSymbolValue, unsigned huffLog, 860 void* workSpace, size_t wkspSize) 861 { 862 return HUF_compress_internal(dst, dstSize, src, srcSize, 863 maxSymbolValue, huffLog, HUF_fourStreams, 864 workSpace, wkspSize, 865 NULL, NULL, 0, 0 /*bmi2*/); 866 } 867 868 /* HUF_compress4X_repeat(): 869 * compress input using 4 streams. 870 * re-use an existing huffman compression table */ 871 size_t HUF_compress4X_repeat (void* dst, size_t dstSize, 872 const void* src, size_t srcSize, 873 unsigned maxSymbolValue, unsigned huffLog, 874 void* workSpace, size_t wkspSize, 875 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) 876 { 877 return HUF_compress_internal(dst, dstSize, src, srcSize, 878 maxSymbolValue, huffLog, HUF_fourStreams, 879 workSpace, wkspSize, 880 hufTable, repeat, preferRepeat, bmi2); 881 } 882 883 #ifndef ZSTD_NO_UNUSED_FUNCTIONS 884 /** HUF_buildCTable() : 885 * @return : maxNbBits 886 * Note : count is used before tree is written, so they can safely overlap 887 */ 888 size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) 889 { 890 HUF_buildCTable_wksp_tables workspace; 891 return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace)); 892 } 893 894 size_t HUF_compress1X (void* dst, size_t dstSize, 895 const void* src, size_t srcSize, 896 unsigned maxSymbolValue, unsigned huffLog) 897 { 898 unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; 899 return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); 900 } 901 902 size_t HUF_compress2 (void* dst, size_t dstSize, 903 const void* src, size_t srcSize, 904 unsigned maxSymbolValue, unsigned huffLog) 905 { 906 unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; 907 return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); 908 } 909 910 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) 911 { 912 return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); 913 } 914 #endif 915