1 /* ****************************************************************** 2 * Huffman encoder, part of New Generation Entropy library 3 * Copyright (c) Yann Collet, Facebook, Inc. 4 * 5 * You can contact the author at : 6 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 7 * - Public forum : https://groups.google.com/forum/#!forum/lz4c 8 * 9 * This source code is licensed under both the BSD-style license (found in the 10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11 * in the COPYING file in the root directory of this source tree). 12 * You may select, at your option, one of the above-listed licenses. 13 ****************************************************************** */ 14 15 /* ************************************************************** 16 * Compiler specifics 17 ****************************************************************/ 18 #ifdef _MSC_VER /* Visual Studio */ 19 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 20 #endif 21 22 23 /* ************************************************************** 24 * Includes 25 ****************************************************************/ 26 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ 27 #include "../common/compiler.h" 28 #include "../common/bitstream.h" 29 #include "hist.h" 30 #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ 31 #include "../common/fse.h" /* header compression */ 32 #define HUF_STATIC_LINKING_ONLY 33 #include "../common/huf.h" 34 #include "../common/error_private.h" 35 36 37 /* ************************************************************** 38 * Error Management 39 ****************************************************************/ 40 #define HUF_isError ERR_isError 41 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ 42 43 44 /* ************************************************************** 45 * Utils 46 ****************************************************************/ 47 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) 48 { 49 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); 50 } 51 52 53 /* ******************************************************* 54 * HUF : Huffman block compression 55 *********************************************************/ 56 #define HUF_WORKSPACE_MAX_ALIGNMENT 8 57 58 static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align) 59 { 60 size_t const mask = align - 1; 61 size_t const rem = (size_t)workspace & mask; 62 size_t const add = (align - rem) & mask; 63 BYTE* const aligned = (BYTE*)workspace + add; 64 assert((align & (align - 1)) == 0); /* pow 2 */ 65 assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT); 66 if (*workspaceSizePtr >= add) { 67 assert(add < align); 68 assert(((size_t)aligned & mask) == 0); 69 *workspaceSizePtr -= add; 70 return aligned; 71 } else { 72 *workspaceSizePtr = 0; 73 return NULL; 74 } 75 } 76 77 78 /* HUF_compressWeights() : 79 * Same as FSE_compress(), but dedicated to huff0's weights compression. 80 * The use case needs much less stack memory. 81 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. 82 */ 83 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 84 85 typedef struct { 86 FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; 87 U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)]; 88 unsigned count[HUF_TABLELOG_MAX+1]; 89 S16 norm[HUF_TABLELOG_MAX+1]; 90 } HUF_CompressWeightsWksp; 91 92 static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize) 93 { 94 BYTE* const ostart = (BYTE*) dst; 95 BYTE* op = ostart; 96 BYTE* const oend = ostart + dstSize; 97 98 unsigned maxSymbolValue = HUF_TABLELOG_MAX; 99 U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; 100 HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); 101 102 if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC); 103 104 /* init conditions */ 105 if (wtSize <= 1) return 0; /* Not compressible */ 106 107 /* Scan input and build symbol stats */ 108 { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */ 109 if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ 110 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ 111 } 112 113 tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); 114 CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) ); 115 116 /* Write table description header */ 117 { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) ); 118 op += hSize; 119 } 120 121 /* Compress */ 122 CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) ); 123 { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) ); 124 if (cSize == 0) return 0; /* not enough space for compressed data */ 125 op += cSize; 126 } 127 128 return (size_t)(op-ostart); 129 } 130 131 static size_t HUF_getNbBits(HUF_CElt elt) 132 { 133 return elt & 0xFF; 134 } 135 136 static size_t HUF_getNbBitsFast(HUF_CElt elt) 137 { 138 return elt; 139 } 140 141 static size_t HUF_getValue(HUF_CElt elt) 142 { 143 return elt & ~0xFF; 144 } 145 146 static size_t HUF_getValueFast(HUF_CElt elt) 147 { 148 return elt; 149 } 150 151 static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits) 152 { 153 assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX); 154 *elt = nbBits; 155 } 156 157 static void HUF_setValue(HUF_CElt* elt, size_t value) 158 { 159 size_t const nbBits = HUF_getNbBits(*elt); 160 if (nbBits > 0) { 161 assert((value >> nbBits) == 0); 162 *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits); 163 } 164 } 165 166 typedef struct { 167 HUF_CompressWeightsWksp wksp; 168 BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ 169 BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; 170 } HUF_WriteCTableWksp; 171 172 size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, 173 const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, 174 void* workspace, size_t workspaceSize) 175 { 176 HUF_CElt const* const ct = CTable + 1; 177 BYTE* op = (BYTE*)dst; 178 U32 n; 179 HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); 180 181 /* check conditions */ 182 if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); 183 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 184 185 /* convert to weight */ 186 wksp->bitsToWeight[0] = 0; 187 for (n=1; n<huffLog+1; n++) 188 wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n); 189 for (n=0; n<maxSymbolValue; n++) 190 wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])]; 191 192 /* attempt weights compression by FSE */ 193 if (maxDstSize < 1) return ERROR(dstSize_tooSmall); 194 { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) ); 195 if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ 196 op[0] = (BYTE)hSize; 197 return hSize+1; 198 } } 199 200 /* write raw values as 4-bits (max : 15) */ 201 if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ 202 if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ 203 op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); 204 wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ 205 for (n=0; n<maxSymbolValue; n+=2) 206 op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]); 207 return ((maxSymbolValue+1)/2) + 1; 208 } 209 210 /*! HUF_writeCTable() : 211 `CTable` : Huffman tree to save, using huf representation. 212 @return : size of saved CTable */ 213 size_t HUF_writeCTable (void* dst, size_t maxDstSize, 214 const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) 215 { 216 HUF_WriteCTableWksp wksp; 217 return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp)); 218 } 219 220 221 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) 222 { 223 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ 224 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ 225 U32 tableLog = 0; 226 U32 nbSymbols = 0; 227 HUF_CElt* const ct = CTable + 1; 228 229 /* get symbol weights */ 230 CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); 231 *hasZeroWeights = (rankVal[0] > 0); 232 233 /* check result */ 234 if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 235 if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); 236 237 CTable[0] = tableLog; 238 239 /* Prepare base value per rank */ 240 { U32 n, nextRankStart = 0; 241 for (n=1; n<=tableLog; n++) { 242 U32 curr = nextRankStart; 243 nextRankStart += (rankVal[n] << (n-1)); 244 rankVal[n] = curr; 245 } } 246 247 /* fill nbBits */ 248 { U32 n; for (n=0; n<nbSymbols; n++) { 249 const U32 w = huffWeight[n]; 250 HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0)); 251 } } 252 253 /* fill val */ 254 { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ 255 U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; 256 { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; } 257 /* determine stating value per rank */ 258 valPerRank[tableLog+1] = 0; /* for w==0 */ 259 { U16 min = 0; 260 U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ 261 valPerRank[n] = min; /* get starting value within each rank */ 262 min += nbPerRank[n]; 263 min >>= 1; 264 } } 265 /* assign value within rank, symbol order */ 266 { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); } 267 } 268 269 *maxSymbolValuePtr = nbSymbols - 1; 270 return readSize; 271 } 272 273 U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue) 274 { 275 const HUF_CElt* ct = CTable + 1; 276 assert(symbolValue <= HUF_SYMBOLVALUE_MAX); 277 return (U32)HUF_getNbBits(ct[symbolValue]); 278 } 279 280 281 typedef struct nodeElt_s { 282 U32 count; 283 U16 parent; 284 BYTE byte; 285 BYTE nbBits; 286 } nodeElt; 287 288 /** 289 * HUF_setMaxHeight(): 290 * Enforces maxNbBits on the Huffman tree described in huffNode. 291 * 292 * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts 293 * the tree to so that it is a valid canonical Huffman tree. 294 * 295 * @pre The sum of the ranks of each symbol == 2^largestBits, 296 * where largestBits == huffNode[lastNonNull].nbBits. 297 * @post The sum of the ranks of each symbol == 2^largestBits, 298 * where largestBits is the return value <= maxNbBits. 299 * 300 * @param huffNode The Huffman tree modified in place to enforce maxNbBits. 301 * @param lastNonNull The symbol with the lowest count in the Huffman tree. 302 * @param maxNbBits The maximum allowed number of bits, which the Huffman tree 303 * may not respect. After this function the Huffman tree will 304 * respect maxNbBits. 305 * @return The maximum number of bits of the Huffman tree after adjustment, 306 * necessarily no more than maxNbBits. 307 */ 308 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) 309 { 310 const U32 largestBits = huffNode[lastNonNull].nbBits; 311 /* early exit : no elt > maxNbBits, so the tree is already valid. */ 312 if (largestBits <= maxNbBits) return largestBits; 313 314 /* there are several too large elements (at least >= 2) */ 315 { int totalCost = 0; 316 const U32 baseCost = 1 << (largestBits - maxNbBits); 317 int n = (int)lastNonNull; 318 319 /* Adjust any ranks > maxNbBits to maxNbBits. 320 * Compute totalCost, which is how far the sum of the ranks is 321 * we are over 2^largestBits after adjust the offending ranks. 322 */ 323 while (huffNode[n].nbBits > maxNbBits) { 324 totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); 325 huffNode[n].nbBits = (BYTE)maxNbBits; 326 n--; 327 } 328 /* n stops at huffNode[n].nbBits <= maxNbBits */ 329 assert(huffNode[n].nbBits <= maxNbBits); 330 /* n end at index of smallest symbol using < maxNbBits */ 331 while (huffNode[n].nbBits == maxNbBits) --n; 332 333 /* renorm totalCost from 2^largestBits to 2^maxNbBits 334 * note : totalCost is necessarily a multiple of baseCost */ 335 assert((totalCost & (baseCost - 1)) == 0); 336 totalCost >>= (largestBits - maxNbBits); 337 assert(totalCost > 0); 338 339 /* repay normalized cost */ 340 { U32 const noSymbol = 0xF0F0F0F0; 341 U32 rankLast[HUF_TABLELOG_MAX+2]; 342 343 /* Get pos of last (smallest = lowest cum. count) symbol per rank */ 344 ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); 345 { U32 currentNbBits = maxNbBits; 346 int pos; 347 for (pos=n ; pos >= 0; pos--) { 348 if (huffNode[pos].nbBits >= currentNbBits) continue; 349 currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ 350 rankLast[maxNbBits-currentNbBits] = (U32)pos; 351 } } 352 353 while (totalCost > 0) { 354 /* Try to reduce the next power of 2 above totalCost because we 355 * gain back half the rank. 356 */ 357 U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; 358 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { 359 U32 const highPos = rankLast[nBitsToDecrease]; 360 U32 const lowPos = rankLast[nBitsToDecrease-1]; 361 if (highPos == noSymbol) continue; 362 /* Decrease highPos if no symbols of lowPos or if it is 363 * not cheaper to remove 2 lowPos than highPos. 364 */ 365 if (lowPos == noSymbol) break; 366 { U32 const highTotal = huffNode[highPos].count; 367 U32 const lowTotal = 2 * huffNode[lowPos].count; 368 if (highTotal <= lowTotal) break; 369 } } 370 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ 371 assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1); 372 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ 373 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) 374 nBitsToDecrease++; 375 assert(rankLast[nBitsToDecrease] != noSymbol); 376 /* Increase the number of bits to gain back half the rank cost. */ 377 totalCost -= 1 << (nBitsToDecrease-1); 378 huffNode[rankLast[nBitsToDecrease]].nbBits++; 379 380 /* Fix up the new rank. 381 * If the new rank was empty, this symbol is now its smallest. 382 * Otherwise, this symbol will be the largest in the new rank so no adjustment. 383 */ 384 if (rankLast[nBitsToDecrease-1] == noSymbol) 385 rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; 386 /* Fix up the old rank. 387 * If the symbol was at position 0, meaning it was the highest weight symbol in the tree, 388 * it must be the only symbol in its rank, so the old rank now has no symbols. 389 * Otherwise, since the Huffman nodes are sorted by count, the previous position is now 390 * the smallest node in the rank. If the previous position belongs to a different rank, 391 * then the rank is now empty. 392 */ 393 if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ 394 rankLast[nBitsToDecrease] = noSymbol; 395 else { 396 rankLast[nBitsToDecrease]--; 397 if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) 398 rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ 399 } 400 } /* while (totalCost > 0) */ 401 402 /* If we've removed too much weight, then we have to add it back. 403 * To avoid overshooting again, we only adjust the smallest rank. 404 * We take the largest nodes from the lowest rank 0 and move them 405 * to rank 1. There's guaranteed to be enough rank 0 symbols because 406 * TODO. 407 */ 408 while (totalCost < 0) { /* Sometimes, cost correction overshoot */ 409 /* special case : no rank 1 symbol (using maxNbBits-1); 410 * let's create one from largest rank 0 (using maxNbBits). 411 */ 412 if (rankLast[1] == noSymbol) { 413 while (huffNode[n].nbBits == maxNbBits) n--; 414 huffNode[n+1].nbBits--; 415 assert(n >= 0); 416 rankLast[1] = (U32)(n+1); 417 totalCost++; 418 continue; 419 } 420 huffNode[ rankLast[1] + 1 ].nbBits--; 421 rankLast[1]++; 422 totalCost ++; 423 } 424 } /* repay normalized cost */ 425 } /* there are several too large elements (at least >= 2) */ 426 427 return maxNbBits; 428 } 429 430 typedef struct { 431 U16 base; 432 U16 curr; 433 } rankPos; 434 435 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; 436 437 /* Number of buckets available for HUF_sort() */ 438 #define RANK_POSITION_TABLE_SIZE 192 439 440 typedef struct { 441 huffNodeTable huffNodeTbl; 442 rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; 443 } HUF_buildCTable_wksp_tables; 444 445 /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing. 446 * Strategy is to use as many buckets as possible for representing distinct 447 * counts while using the remainder to represent all "large" counts. 448 * 449 * To satisfy this requirement for 192 buckets, we can do the following: 450 * Let buckets 0-166 represent distinct counts of [0, 166] 451 * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing. 452 */ 453 #define RANK_POSITION_MAX_COUNT_LOG 32 454 #define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */ 455 #define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */ 456 457 /* Return the appropriate bucket index for a given count. See definition of 458 * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy. 459 */ 460 static U32 HUF_getIndex(U32 const count) { 461 return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF) 462 ? count 463 : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; 464 } 465 466 /* Helper swap function for HUF_quickSortPartition() */ 467 static void HUF_swapNodes(nodeElt* a, nodeElt* b) { 468 nodeElt tmp = *a; 469 *a = *b; 470 *b = tmp; 471 } 472 473 /* Returns 0 if the huffNode array is not sorted by descending count */ 474 MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) { 475 U32 i; 476 for (i = 1; i < maxSymbolValue1; ++i) { 477 if (huffNode[i].count > huffNode[i-1].count) { 478 return 0; 479 } 480 } 481 return 1; 482 } 483 484 /* Insertion sort by descending order */ 485 HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) { 486 int i; 487 int const size = high-low+1; 488 huffNode += low; 489 for (i = 1; i < size; ++i) { 490 nodeElt const key = huffNode[i]; 491 int j = i - 1; 492 while (j >= 0 && huffNode[j].count < key.count) { 493 huffNode[j + 1] = huffNode[j]; 494 j--; 495 } 496 huffNode[j + 1] = key; 497 } 498 } 499 500 /* Pivot helper function for quicksort. */ 501 static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) { 502 /* Simply select rightmost element as pivot. "Better" selectors like 503 * median-of-three don't experimentally appear to have any benefit. 504 */ 505 U32 const pivot = arr[high].count; 506 int i = low - 1; 507 int j = low; 508 for ( ; j < high; j++) { 509 if (arr[j].count > pivot) { 510 i++; 511 HUF_swapNodes(&arr[i], &arr[j]); 512 } 513 } 514 HUF_swapNodes(&arr[i + 1], &arr[high]); 515 return i + 1; 516 } 517 518 /* Classic quicksort by descending with partially iterative calls 519 * to reduce worst case callstack size. 520 */ 521 static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) { 522 int const kInsertionSortThreshold = 8; 523 if (high - low < kInsertionSortThreshold) { 524 HUF_insertionSort(arr, low, high); 525 return; 526 } 527 while (low < high) { 528 int const idx = HUF_quickSortPartition(arr, low, high); 529 if (idx - low < high - idx) { 530 HUF_simpleQuickSort(arr, low, idx - 1); 531 low = idx + 1; 532 } else { 533 HUF_simpleQuickSort(arr, idx + 1, high); 534 high = idx - 1; 535 } 536 } 537 } 538 539 /** 540 * HUF_sort(): 541 * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. 542 * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket. 543 * 544 * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. 545 * Must have (maxSymbolValue + 1) entries. 546 * @param[in] count Histogram of the symbols. 547 * @param[in] maxSymbolValue Maximum symbol value. 548 * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. 549 */ 550 static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) { 551 U32 n; 552 U32 const maxSymbolValue1 = maxSymbolValue+1; 553 554 /* Compute base and set curr to base. 555 * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1. 556 * See HUF_getIndex to see bucketing strategy. 557 * We attribute each symbol to lowerRank's base value, because we want to know where 558 * each rank begins in the output, so for rank R we want to count ranks R+1 and above. 559 */ 560 ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); 561 for (n = 0; n < maxSymbolValue1; ++n) { 562 U32 lowerRank = HUF_getIndex(count[n]); 563 assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1); 564 rankPosition[lowerRank].base++; 565 } 566 567 assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0); 568 /* Set up the rankPosition table */ 569 for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) { 570 rankPosition[n-1].base += rankPosition[n].base; 571 rankPosition[n-1].curr = rankPosition[n-1].base; 572 } 573 574 /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */ 575 for (n = 0; n < maxSymbolValue1; ++n) { 576 U32 const c = count[n]; 577 U32 const r = HUF_getIndex(c) + 1; 578 U32 const pos = rankPosition[r].curr++; 579 assert(pos < maxSymbolValue1); 580 huffNode[pos].count = c; 581 huffNode[pos].byte = (BYTE)n; 582 } 583 584 /* Sort each bucket. */ 585 for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) { 586 U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base; 587 U32 const bucketStartIdx = rankPosition[n].base; 588 if (bucketSize > 1) { 589 assert(bucketStartIdx < maxSymbolValue1); 590 HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1); 591 } 592 } 593 594 assert(HUF_isSorted(huffNode, maxSymbolValue1)); 595 } 596 597 /** HUF_buildCTable_wksp() : 598 * Same as HUF_buildCTable(), but using externally allocated scratch buffer. 599 * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). 600 */ 601 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) 602 603 /* HUF_buildTree(): 604 * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree. 605 * 606 * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array. 607 * @param maxSymbolValue The maximum symbol value. 608 * @return The smallest node in the Huffman tree (by count). 609 */ 610 static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) 611 { 612 nodeElt* const huffNode0 = huffNode - 1; 613 int nonNullRank; 614 int lowS, lowN; 615 int nodeNb = STARTNODE; 616 int n, nodeRoot; 617 /* init for parents */ 618 nonNullRank = (int)maxSymbolValue; 619 while(huffNode[nonNullRank].count == 0) nonNullRank--; 620 lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; 621 huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; 622 huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; 623 nodeNb++; lowS-=2; 624 for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); 625 huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ 626 627 /* create parents */ 628 while (nodeNb <= nodeRoot) { 629 int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 630 int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 631 huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; 632 huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; 633 nodeNb++; 634 } 635 636 /* distribute weights (unlimited tree height) */ 637 huffNode[nodeRoot].nbBits = 0; 638 for (n=nodeRoot-1; n>=STARTNODE; n--) 639 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 640 for (n=0; n<=nonNullRank; n++) 641 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 642 643 return nonNullRank; 644 } 645 646 /** 647 * HUF_buildCTableFromTree(): 648 * Build the CTable given the Huffman tree in huffNode. 649 * 650 * @param[out] CTable The output Huffman CTable. 651 * @param huffNode The Huffman tree. 652 * @param nonNullRank The last and smallest node in the Huffman tree. 653 * @param maxSymbolValue The maximum symbol value. 654 * @param maxNbBits The exact maximum number of bits used in the Huffman tree. 655 */ 656 static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) 657 { 658 HUF_CElt* const ct = CTable + 1; 659 /* fill result into ctable (val, nbBits) */ 660 int n; 661 U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; 662 U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; 663 int const alphabetSize = (int)(maxSymbolValue + 1); 664 for (n=0; n<=nonNullRank; n++) 665 nbPerRank[huffNode[n].nbBits]++; 666 /* determine starting value per rank */ 667 { U16 min = 0; 668 for (n=(int)maxNbBits; n>0; n--) { 669 valPerRank[n] = min; /* get starting value within each rank */ 670 min += nbPerRank[n]; 671 min >>= 1; 672 } } 673 for (n=0; n<alphabetSize; n++) 674 HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */ 675 for (n=0; n<alphabetSize; n++) 676 HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */ 677 CTable[0] = maxNbBits; 678 } 679 680 size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) 681 { 682 HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32)); 683 nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; 684 nodeElt* const huffNode = huffNode0+1; 685 int nonNullRank; 686 687 /* safety checks */ 688 if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) 689 return ERROR(workSpace_tooSmall); 690 if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; 691 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) 692 return ERROR(maxSymbolValue_tooLarge); 693 ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); 694 695 /* sort, decreasing order */ 696 HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); 697 698 /* build tree */ 699 nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); 700 701 /* enforce maxTableLog */ 702 maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); 703 if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ 704 705 HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits); 706 707 return maxNbBits; 708 } 709 710 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) 711 { 712 HUF_CElt const* ct = CTable + 1; 713 size_t nbBits = 0; 714 int s; 715 for (s = 0; s <= (int)maxSymbolValue; ++s) { 716 nbBits += HUF_getNbBits(ct[s]) * count[s]; 717 } 718 return nbBits >> 3; 719 } 720 721 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { 722 HUF_CElt const* ct = CTable + 1; 723 int bad = 0; 724 int s; 725 for (s = 0; s <= (int)maxSymbolValue; ++s) { 726 bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0); 727 } 728 return !bad; 729 } 730 731 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } 732 733 /** HUF_CStream_t: 734 * Huffman uses its own BIT_CStream_t implementation. 735 * There are three major differences from BIT_CStream_t: 736 * 1. HUF_addBits() takes a HUF_CElt (size_t) which is 737 * the pair (nbBits, value) in the format: 738 * format: 739 * - Bits [0, 4) = nbBits 740 * - Bits [4, 64 - nbBits) = 0 741 * - Bits [64 - nbBits, 64) = value 742 * 2. The bitContainer is built from the upper bits and 743 * right shifted. E.g. to add a new value of N bits 744 * you right shift the bitContainer by N, then or in 745 * the new value into the N upper bits. 746 * 3. The bitstream has two bit containers. You can add 747 * bits to the second container and merge them into 748 * the first container. 749 */ 750 751 #define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8) 752 753 typedef struct { 754 size_t bitContainer[2]; 755 size_t bitPos[2]; 756 757 BYTE* startPtr; 758 BYTE* ptr; 759 BYTE* endPtr; 760 } HUF_CStream_t; 761 762 /**! HUF_initCStream(): 763 * Initializes the bitstream. 764 * @returns 0 or an error code. 765 */ 766 static size_t HUF_initCStream(HUF_CStream_t* bitC, 767 void* startPtr, size_t dstCapacity) 768 { 769 ZSTD_memset(bitC, 0, sizeof(*bitC)); 770 bitC->startPtr = (BYTE*)startPtr; 771 bitC->ptr = bitC->startPtr; 772 bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]); 773 if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall); 774 return 0; 775 } 776 777 /*! HUF_addBits(): 778 * Adds the symbol stored in HUF_CElt elt to the bitstream. 779 * 780 * @param elt The element we're adding. This is a (nbBits, value) pair. 781 * See the HUF_CStream_t docs for the format. 782 * @param idx Insert into the bitstream at this idx. 783 * @param kFast This is a template parameter. If the bitstream is guaranteed 784 * to have at least 4 unused bits after this call it may be 1, 785 * otherwise it must be 0. HUF_addBits() is faster when fast is set. 786 */ 787 FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast) 788 { 789 assert(idx <= 1); 790 assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX); 791 /* This is efficient on x86-64 with BMI2 because shrx 792 * only reads the low 6 bits of the register. The compiler 793 * knows this and elides the mask. When fast is set, 794 * every operation can use the same value loaded from elt. 795 */ 796 bitC->bitContainer[idx] >>= HUF_getNbBits(elt); 797 bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt); 798 /* We only read the low 8 bits of bitC->bitPos[idx] so it 799 * doesn't matter that the high bits have noise from the value. 800 */ 801 bitC->bitPos[idx] += HUF_getNbBitsFast(elt); 802 assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER); 803 /* The last 4-bits of elt are dirty if fast is set, 804 * so we must not be overwriting bits that have already been 805 * inserted into the bit container. 806 */ 807 #if DEBUGLEVEL >= 1 808 { 809 size_t const nbBits = HUF_getNbBits(elt); 810 size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1; 811 (void)dirtyBits; 812 /* Middle bits are 0. */ 813 assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0); 814 /* We didn't overwrite any bits in the bit container. */ 815 assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER); 816 (void)dirtyBits; 817 } 818 #endif 819 } 820 821 FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC) 822 { 823 bitC->bitContainer[1] = 0; 824 bitC->bitPos[1] = 0; 825 } 826 827 /*! HUF_mergeIndex1() : 828 * Merges the bit container @ index 1 into the bit container @ index 0 829 * and zeros the bit container @ index 1. 830 */ 831 FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC) 832 { 833 assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER); 834 bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF); 835 bitC->bitContainer[0] |= bitC->bitContainer[1]; 836 bitC->bitPos[0] += bitC->bitPos[1]; 837 assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER); 838 } 839 840 /*! HUF_flushBits() : 841 * Flushes the bits in the bit container @ index 0. 842 * 843 * @post bitPos will be < 8. 844 * @param kFast If kFast is set then we must know a-priori that 845 * the bit container will not overflow. 846 */ 847 FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast) 848 { 849 /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */ 850 size_t const nbBits = bitC->bitPos[0] & 0xFF; 851 size_t const nbBytes = nbBits >> 3; 852 /* The top nbBits bits of bitContainer are the ones we need. */ 853 size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits); 854 /* Mask bitPos to account for the bytes we consumed. */ 855 bitC->bitPos[0] &= 7; 856 assert(nbBits > 0); 857 assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8); 858 assert(bitC->ptr <= bitC->endPtr); 859 MEM_writeLEST(bitC->ptr, bitContainer); 860 bitC->ptr += nbBytes; 861 assert(!kFast || bitC->ptr <= bitC->endPtr); 862 if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; 863 /* bitContainer doesn't need to be modified because the leftover 864 * bits are already the top bitPos bits. And we don't care about 865 * noise in the lower values. 866 */ 867 } 868 869 /*! HUF_endMark() 870 * @returns The Huffman stream end mark: A 1-bit value = 1. 871 */ 872 static HUF_CElt HUF_endMark(void) 873 { 874 HUF_CElt endMark; 875 HUF_setNbBits(&endMark, 1); 876 HUF_setValue(&endMark, 1); 877 return endMark; 878 } 879 880 /*! HUF_closeCStream() : 881 * @return Size of CStream, in bytes, 882 * or 0 if it could not fit into dstBuffer */ 883 static size_t HUF_closeCStream(HUF_CStream_t* bitC) 884 { 885 HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0); 886 HUF_flushBits(bitC, /* kFast */ 0); 887 { 888 size_t const nbBits = bitC->bitPos[0] & 0xFF; 889 if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ 890 return (bitC->ptr - bitC->startPtr) + (nbBits > 0); 891 } 892 } 893 894 FORCE_INLINE_TEMPLATE void 895 HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast) 896 { 897 HUF_addBits(bitCPtr, CTable[symbol], idx, fast); 898 } 899 900 FORCE_INLINE_TEMPLATE void 901 HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC, 902 const BYTE* ip, size_t srcSize, 903 const HUF_CElt* ct, 904 int kUnroll, int kFastFlush, int kLastFast) 905 { 906 /* Join to kUnroll */ 907 int n = (int)srcSize; 908 int rem = n % kUnroll; 909 if (rem > 0) { 910 for (; rem > 0; --rem) { 911 HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0); 912 } 913 HUF_flushBits(bitC, kFastFlush); 914 } 915 assert(n % kUnroll == 0); 916 917 /* Join to 2 * kUnroll */ 918 if (n % (2 * kUnroll)) { 919 int u; 920 for (u = 1; u < kUnroll; ++u) { 921 HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1); 922 } 923 HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast); 924 HUF_flushBits(bitC, kFastFlush); 925 n -= kUnroll; 926 } 927 assert(n % (2 * kUnroll) == 0); 928 929 for (; n>0; n-= 2 * kUnroll) { 930 /* Encode kUnroll symbols into the bitstream @ index 0. */ 931 int u; 932 for (u = 1; u < kUnroll; ++u) { 933 HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1); 934 } 935 HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast); 936 HUF_flushBits(bitC, kFastFlush); 937 /* Encode kUnroll symbols into the bitstream @ index 1. 938 * This allows us to start filling the bit container 939 * without any data dependencies. 940 */ 941 HUF_zeroIndex1(bitC); 942 for (u = 1; u < kUnroll; ++u) { 943 HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1); 944 } 945 HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast); 946 /* Merge bitstream @ index 1 into the bitstream @ index 0 */ 947 HUF_mergeIndex1(bitC); 948 HUF_flushBits(bitC, kFastFlush); 949 } 950 assert(n == 0); 951 952 } 953 954 /** 955 * Returns a tight upper bound on the output space needed by Huffman 956 * with 8 bytes buffer to handle over-writes. If the output is at least 957 * this large we don't need to do bounds checks during Huffman encoding. 958 */ 959 static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog) 960 { 961 return ((srcSize * tableLog) >> 3) + 8; 962 } 963 964 965 FORCE_INLINE_TEMPLATE size_t 966 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, 967 const void* src, size_t srcSize, 968 const HUF_CElt* CTable) 969 { 970 U32 const tableLog = (U32)CTable[0]; 971 HUF_CElt const* ct = CTable + 1; 972 const BYTE* ip = (const BYTE*) src; 973 BYTE* const ostart = (BYTE*)dst; 974 BYTE* const oend = ostart + dstSize; 975 BYTE* op = ostart; 976 HUF_CStream_t bitC; 977 978 /* init */ 979 if (dstSize < 8) return 0; /* not enough space to compress */ 980 { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op)); 981 if (HUF_isError(initErr)) return 0; } 982 983 if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11) 984 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0); 985 else { 986 if (MEM_32bits()) { 987 switch (tableLog) { 988 case 11: 989 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0); 990 break; 991 case 10: ZSTD_FALLTHROUGH; 992 case 9: ZSTD_FALLTHROUGH; 993 case 8: 994 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1); 995 break; 996 case 7: ZSTD_FALLTHROUGH; 997 default: 998 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1); 999 break; 1000 } 1001 } else { 1002 switch (tableLog) { 1003 case 11: 1004 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0); 1005 break; 1006 case 10: 1007 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1); 1008 break; 1009 case 9: 1010 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0); 1011 break; 1012 case 8: 1013 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0); 1014 break; 1015 case 7: 1016 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0); 1017 break; 1018 case 6: ZSTD_FALLTHROUGH; 1019 default: 1020 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1); 1021 break; 1022 } 1023 } 1024 } 1025 assert(bitC.ptr <= bitC.endPtr); 1026 1027 return HUF_closeCStream(&bitC); 1028 } 1029 1030 #if DYNAMIC_BMI2 1031 1032 static BMI2_TARGET_ATTRIBUTE size_t 1033 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, 1034 const void* src, size_t srcSize, 1035 const HUF_CElt* CTable) 1036 { 1037 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 1038 } 1039 1040 static size_t 1041 HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, 1042 const void* src, size_t srcSize, 1043 const HUF_CElt* CTable) 1044 { 1045 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 1046 } 1047 1048 static size_t 1049 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 1050 const void* src, size_t srcSize, 1051 const HUF_CElt* CTable, const int bmi2) 1052 { 1053 if (bmi2) { 1054 return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); 1055 } 1056 return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); 1057 } 1058 1059 #else 1060 1061 static size_t 1062 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 1063 const void* src, size_t srcSize, 1064 const HUF_CElt* CTable, const int bmi2) 1065 { 1066 (void)bmi2; 1067 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 1068 } 1069 1070 #endif 1071 1072 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 1073 { 1074 return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 1075 } 1076 1077 size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) 1078 { 1079 return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); 1080 } 1081 1082 static size_t 1083 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, 1084 const void* src, size_t srcSize, 1085 const HUF_CElt* CTable, int bmi2) 1086 { 1087 size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ 1088 const BYTE* ip = (const BYTE*) src; 1089 const BYTE* const iend = ip + srcSize; 1090 BYTE* const ostart = (BYTE*) dst; 1091 BYTE* const oend = ostart + dstSize; 1092 BYTE* op = ostart; 1093 1094 if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ 1095 if (srcSize < 12) return 0; /* no saving possible : too small input */ 1096 op += 6; /* jumpTable */ 1097 1098 assert(op <= oend); 1099 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); 1100 if (cSize == 0 || cSize > 65535) return 0; 1101 MEM_writeLE16(ostart, (U16)cSize); 1102 op += cSize; 1103 } 1104 1105 ip += segmentSize; 1106 assert(op <= oend); 1107 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); 1108 if (cSize == 0 || cSize > 65535) return 0; 1109 MEM_writeLE16(ostart+2, (U16)cSize); 1110 op += cSize; 1111 } 1112 1113 ip += segmentSize; 1114 assert(op <= oend); 1115 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); 1116 if (cSize == 0 || cSize > 65535) return 0; 1117 MEM_writeLE16(ostart+4, (U16)cSize); 1118 op += cSize; 1119 } 1120 1121 ip += segmentSize; 1122 assert(op <= oend); 1123 assert(ip <= iend); 1124 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); 1125 if (cSize == 0 || cSize > 65535) return 0; 1126 op += cSize; 1127 } 1128 1129 return (size_t)(op-ostart); 1130 } 1131 1132 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 1133 { 1134 return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 1135 } 1136 1137 size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) 1138 { 1139 return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); 1140 } 1141 1142 typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; 1143 1144 static size_t HUF_compressCTable_internal( 1145 BYTE* const ostart, BYTE* op, BYTE* const oend, 1146 const void* src, size_t srcSize, 1147 HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) 1148 { 1149 size_t const cSize = (nbStreams==HUF_singleStream) ? 1150 HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : 1151 HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); 1152 if (HUF_isError(cSize)) { return cSize; } 1153 if (cSize==0) { return 0; } /* uncompressible */ 1154 op += cSize; 1155 /* check compressibility */ 1156 assert(op >= ostart); 1157 if ((size_t)(op-ostart) >= srcSize-1) { return 0; } 1158 return (size_t)(op-ostart); 1159 } 1160 1161 typedef struct { 1162 unsigned count[HUF_SYMBOLVALUE_MAX + 1]; 1163 HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)]; 1164 union { 1165 HUF_buildCTable_wksp_tables buildCTable_wksp; 1166 HUF_WriteCTableWksp writeCTable_wksp; 1167 U32 hist_wksp[HIST_WKSP_SIZE_U32]; 1168 } wksps; 1169 } HUF_compress_tables_t; 1170 1171 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096 1172 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */ 1173 1174 /* HUF_compress_internal() : 1175 * `workSpace_align4` must be aligned on 4-bytes boundaries, 1176 * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ 1177 static size_t 1178 HUF_compress_internal (void* dst, size_t dstSize, 1179 const void* src, size_t srcSize, 1180 unsigned maxSymbolValue, unsigned huffLog, 1181 HUF_nbStreams_e nbStreams, 1182 void* workSpace, size_t wkspSize, 1183 HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, 1184 const int bmi2, unsigned suspectUncompressible) 1185 { 1186 HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); 1187 BYTE* const ostart = (BYTE*)dst; 1188 BYTE* const oend = ostart + dstSize; 1189 BYTE* op = ostart; 1190 1191 HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); 1192 1193 /* checks & inits */ 1194 if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); 1195 if (!srcSize) return 0; /* Uncompressed */ 1196 if (!dstSize) return 0; /* cannot fit anything within dst budget */ 1197 if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ 1198 if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 1199 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 1200 if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; 1201 if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; 1202 1203 /* Heuristic : If old table is valid, use it for small inputs */ 1204 if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { 1205 return HUF_compressCTable_internal(ostart, op, oend, 1206 src, srcSize, 1207 nbStreams, oldHufTable, bmi2); 1208 } 1209 1210 /* If uncompressible data is suspected, do a smaller sampling first */ 1211 DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2); 1212 if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { 1213 size_t largestTotal = 0; 1214 { unsigned maxSymbolValueBegin = maxSymbolValue; 1215 CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); 1216 largestTotal += largestBegin; 1217 } 1218 { unsigned maxSymbolValueEnd = maxSymbolValue; 1219 CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); 1220 largestTotal += largestEnd; 1221 } 1222 if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */ 1223 } 1224 1225 /* Scan input and build symbol stats */ 1226 { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) ); 1227 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ 1228 if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ 1229 } 1230 1231 /* Check validity of previous table */ 1232 if ( repeat 1233 && *repeat == HUF_repeat_check 1234 && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { 1235 *repeat = HUF_repeat_none; 1236 } 1237 /* Heuristic : use existing table for small inputs */ 1238 if (preferRepeat && repeat && *repeat != HUF_repeat_none) { 1239 return HUF_compressCTable_internal(ostart, op, oend, 1240 src, srcSize, 1241 nbStreams, oldHufTable, bmi2); 1242 } 1243 1244 /* Build Huffman Tree */ 1245 huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); 1246 { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, 1247 maxSymbolValue, huffLog, 1248 &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); 1249 CHECK_F(maxBits); 1250 huffLog = (U32)maxBits; 1251 } 1252 /* Zero unused symbols in CTable, so we can check it for validity */ 1253 { 1254 size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue); 1255 size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt); 1256 ZSTD_memset(table->CTable + ctableSize, 0, unusedSize); 1257 } 1258 1259 /* Write table description header */ 1260 { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, 1261 &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) ); 1262 /* Check if using previous huffman table is beneficial */ 1263 if (repeat && *repeat != HUF_repeat_none) { 1264 size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); 1265 size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); 1266 if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { 1267 return HUF_compressCTable_internal(ostart, op, oend, 1268 src, srcSize, 1269 nbStreams, oldHufTable, bmi2); 1270 } } 1271 1272 /* Use the new huffman table */ 1273 if (hSize + 12ul >= srcSize) { return 0; } 1274 op += hSize; 1275 if (repeat) { *repeat = HUF_repeat_none; } 1276 if (oldHufTable) 1277 ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ 1278 } 1279 return HUF_compressCTable_internal(ostart, op, oend, 1280 src, srcSize, 1281 nbStreams, table->CTable, bmi2); 1282 } 1283 1284 1285 size_t HUF_compress1X_wksp (void* dst, size_t dstSize, 1286 const void* src, size_t srcSize, 1287 unsigned maxSymbolValue, unsigned huffLog, 1288 void* workSpace, size_t wkspSize) 1289 { 1290 return HUF_compress_internal(dst, dstSize, src, srcSize, 1291 maxSymbolValue, huffLog, HUF_singleStream, 1292 workSpace, wkspSize, 1293 NULL, NULL, 0, 0 /*bmi2*/, 0); 1294 } 1295 1296 size_t HUF_compress1X_repeat (void* dst, size_t dstSize, 1297 const void* src, size_t srcSize, 1298 unsigned maxSymbolValue, unsigned huffLog, 1299 void* workSpace, size_t wkspSize, 1300 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, 1301 int bmi2, unsigned suspectUncompressible) 1302 { 1303 return HUF_compress_internal(dst, dstSize, src, srcSize, 1304 maxSymbolValue, huffLog, HUF_singleStream, 1305 workSpace, wkspSize, hufTable, 1306 repeat, preferRepeat, bmi2, suspectUncompressible); 1307 } 1308 1309 /* HUF_compress4X_repeat(): 1310 * compress input using 4 streams. 1311 * provide workspace to generate compression tables */ 1312 size_t HUF_compress4X_wksp (void* dst, size_t dstSize, 1313 const void* src, size_t srcSize, 1314 unsigned maxSymbolValue, unsigned huffLog, 1315 void* workSpace, size_t wkspSize) 1316 { 1317 return HUF_compress_internal(dst, dstSize, src, srcSize, 1318 maxSymbolValue, huffLog, HUF_fourStreams, 1319 workSpace, wkspSize, 1320 NULL, NULL, 0, 0 /*bmi2*/, 0); 1321 } 1322 1323 /* HUF_compress4X_repeat(): 1324 * compress input using 4 streams. 1325 * consider skipping quickly 1326 * re-use an existing huffman compression table */ 1327 size_t HUF_compress4X_repeat (void* dst, size_t dstSize, 1328 const void* src, size_t srcSize, 1329 unsigned maxSymbolValue, unsigned huffLog, 1330 void* workSpace, size_t wkspSize, 1331 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible) 1332 { 1333 return HUF_compress_internal(dst, dstSize, src, srcSize, 1334 maxSymbolValue, huffLog, HUF_fourStreams, 1335 workSpace, wkspSize, 1336 hufTable, repeat, preferRepeat, bmi2, suspectUncompressible); 1337 } 1338 1339 #ifndef ZSTD_NO_UNUSED_FUNCTIONS 1340 /** HUF_buildCTable() : 1341 * @return : maxNbBits 1342 * Note : count is used before tree is written, so they can safely overlap 1343 */ 1344 size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) 1345 { 1346 HUF_buildCTable_wksp_tables workspace; 1347 return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace)); 1348 } 1349 1350 size_t HUF_compress1X (void* dst, size_t dstSize, 1351 const void* src, size_t srcSize, 1352 unsigned maxSymbolValue, unsigned huffLog) 1353 { 1354 U64 workSpace[HUF_WORKSPACE_SIZE_U64]; 1355 return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); 1356 } 1357 1358 size_t HUF_compress2 (void* dst, size_t dstSize, 1359 const void* src, size_t srcSize, 1360 unsigned maxSymbolValue, unsigned huffLog) 1361 { 1362 U64 workSpace[HUF_WORKSPACE_SIZE_U64]; 1363 return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); 1364 } 1365 1366 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) 1367 { 1368 return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); 1369 } 1370 #endif 1371