1 /* ****************************************************************** 2 * Huffman encoder, part of New Generation Entropy library 3 * Copyright (c) Yann Collet, Facebook, Inc. 4 * 5 * You can contact the author at : 6 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 7 * - Public forum : https://groups.google.com/forum/#!forum/lz4c 8 * 9 * This source code is licensed under both the BSD-style license (found in the 10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11 * in the COPYING file in the root directory of this source tree). 12 * You may select, at your option, one of the above-listed licenses. 13 ****************************************************************** */ 14 15 /* ************************************************************** 16 * Compiler specifics 17 ****************************************************************/ 18 19 20 /* ************************************************************** 21 * Includes 22 ****************************************************************/ 23 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ 24 #include "../common/compiler.h" 25 #include "../common/bitstream.h" 26 #include "hist.h" 27 #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ 28 #include "../common/fse.h" /* header compression */ 29 #define HUF_STATIC_LINKING_ONLY 30 #include "../common/huf.h" 31 #include "../common/error_private.h" 32 33 34 /* ************************************************************** 35 * Error Management 36 ****************************************************************/ 37 #define HUF_isError ERR_isError 38 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ 39 40 41 /* ************************************************************** 42 * Utils 43 ****************************************************************/ 44 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) 45 { 46 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); 47 } 48 49 50 /* ******************************************************* 51 * HUF : Huffman block compression 52 *********************************************************/ 53 #define HUF_WORKSPACE_MAX_ALIGNMENT 8 54 55 static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align) 56 { 57 size_t const mask = align - 1; 58 size_t const rem = (size_t)workspace & mask; 59 size_t const add = (align - rem) & mask; 60 BYTE* const aligned = (BYTE*)workspace + add; 61 assert((align & (align - 1)) == 0); /* pow 2 */ 62 assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT); 63 if (*workspaceSizePtr >= add) { 64 assert(add < align); 65 assert(((size_t)aligned & mask) == 0); 66 *workspaceSizePtr -= add; 67 return aligned; 68 } else { 69 *workspaceSizePtr = 0; 70 return NULL; 71 } 72 } 73 74 75 /* HUF_compressWeights() : 76 * Same as FSE_compress(), but dedicated to huff0's weights compression. 77 * The use case needs much less stack memory. 78 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. 79 */ 80 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 81 82 typedef struct { 83 FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; 84 U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)]; 85 unsigned count[HUF_TABLELOG_MAX+1]; 86 S16 norm[HUF_TABLELOG_MAX+1]; 87 } HUF_CompressWeightsWksp; 88 89 static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize) 90 { 91 BYTE* const ostart = (BYTE*) dst; 92 BYTE* op = ostart; 93 BYTE* const oend = ostart + dstSize; 94 95 unsigned maxSymbolValue = HUF_TABLELOG_MAX; 96 U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; 97 HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); 98 99 if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC); 100 101 /* init conditions */ 102 if (wtSize <= 1) return 0; /* Not compressible */ 103 104 /* Scan input and build symbol stats */ 105 { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */ 106 if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ 107 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ 108 } 109 110 tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); 111 CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) ); 112 113 /* Write table description header */ 114 { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) ); 115 op += hSize; 116 } 117 118 /* Compress */ 119 CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) ); 120 { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) ); 121 if (cSize == 0) return 0; /* not enough space for compressed data */ 122 op += cSize; 123 } 124 125 return (size_t)(op-ostart); 126 } 127 128 static size_t HUF_getNbBits(HUF_CElt elt) 129 { 130 return elt & 0xFF; 131 } 132 133 static size_t HUF_getNbBitsFast(HUF_CElt elt) 134 { 135 return elt; 136 } 137 138 static size_t HUF_getValue(HUF_CElt elt) 139 { 140 return elt & ~0xFF; 141 } 142 143 static size_t HUF_getValueFast(HUF_CElt elt) 144 { 145 return elt; 146 } 147 148 static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits) 149 { 150 assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX); 151 *elt = nbBits; 152 } 153 154 static void HUF_setValue(HUF_CElt* elt, size_t value) 155 { 156 size_t const nbBits = HUF_getNbBits(*elt); 157 if (nbBits > 0) { 158 assert((value >> nbBits) == 0); 159 *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits); 160 } 161 } 162 163 typedef struct { 164 HUF_CompressWeightsWksp wksp; 165 BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ 166 BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; 167 } HUF_WriteCTableWksp; 168 169 size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, 170 const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, 171 void* workspace, size_t workspaceSize) 172 { 173 HUF_CElt const* const ct = CTable + 1; 174 BYTE* op = (BYTE*)dst; 175 U32 n; 176 HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); 177 178 /* check conditions */ 179 if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); 180 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 181 182 /* convert to weight */ 183 wksp->bitsToWeight[0] = 0; 184 for (n=1; n<huffLog+1; n++) 185 wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n); 186 for (n=0; n<maxSymbolValue; n++) 187 wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])]; 188 189 /* attempt weights compression by FSE */ 190 if (maxDstSize < 1) return ERROR(dstSize_tooSmall); 191 { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) ); 192 if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ 193 op[0] = (BYTE)hSize; 194 return hSize+1; 195 } } 196 197 /* write raw values as 4-bits (max : 15) */ 198 if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ 199 if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ 200 op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); 201 wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ 202 for (n=0; n<maxSymbolValue; n+=2) 203 op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]); 204 return ((maxSymbolValue+1)/2) + 1; 205 } 206 207 /*! HUF_writeCTable() : 208 `CTable` : Huffman tree to save, using huf representation. 209 @return : size of saved CTable */ 210 size_t HUF_writeCTable (void* dst, size_t maxDstSize, 211 const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) 212 { 213 HUF_WriteCTableWksp wksp; 214 return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp)); 215 } 216 217 218 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) 219 { 220 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ 221 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ 222 U32 tableLog = 0; 223 U32 nbSymbols = 0; 224 HUF_CElt* const ct = CTable + 1; 225 226 /* get symbol weights */ 227 CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); 228 *hasZeroWeights = (rankVal[0] > 0); 229 230 /* check result */ 231 if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 232 if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); 233 234 CTable[0] = tableLog; 235 236 /* Prepare base value per rank */ 237 { U32 n, nextRankStart = 0; 238 for (n=1; n<=tableLog; n++) { 239 U32 curr = nextRankStart; 240 nextRankStart += (rankVal[n] << (n-1)); 241 rankVal[n] = curr; 242 } } 243 244 /* fill nbBits */ 245 { U32 n; for (n=0; n<nbSymbols; n++) { 246 const U32 w = huffWeight[n]; 247 HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0)); 248 } } 249 250 /* fill val */ 251 { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ 252 U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; 253 { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; } 254 /* determine stating value per rank */ 255 valPerRank[tableLog+1] = 0; /* for w==0 */ 256 { U16 min = 0; 257 U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ 258 valPerRank[n] = min; /* get starting value within each rank */ 259 min += nbPerRank[n]; 260 min >>= 1; 261 } } 262 /* assign value within rank, symbol order */ 263 { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); } 264 } 265 266 *maxSymbolValuePtr = nbSymbols - 1; 267 return readSize; 268 } 269 270 U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue) 271 { 272 const HUF_CElt* ct = CTable + 1; 273 assert(symbolValue <= HUF_SYMBOLVALUE_MAX); 274 return (U32)HUF_getNbBits(ct[symbolValue]); 275 } 276 277 278 typedef struct nodeElt_s { 279 U32 count; 280 U16 parent; 281 BYTE byte; 282 BYTE nbBits; 283 } nodeElt; 284 285 /* 286 * HUF_setMaxHeight(): 287 * Enforces maxNbBits on the Huffman tree described in huffNode. 288 * 289 * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts 290 * the tree to so that it is a valid canonical Huffman tree. 291 * 292 * @pre The sum of the ranks of each symbol == 2^largestBits, 293 * where largestBits == huffNode[lastNonNull].nbBits. 294 * @post The sum of the ranks of each symbol == 2^largestBits, 295 * where largestBits is the return value <= maxNbBits. 296 * 297 * @param huffNode The Huffman tree modified in place to enforce maxNbBits. 298 * @param lastNonNull The symbol with the lowest count in the Huffman tree. 299 * @param maxNbBits The maximum allowed number of bits, which the Huffman tree 300 * may not respect. After this function the Huffman tree will 301 * respect maxNbBits. 302 * @return The maximum number of bits of the Huffman tree after adjustment, 303 * necessarily no more than maxNbBits. 304 */ 305 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) 306 { 307 const U32 largestBits = huffNode[lastNonNull].nbBits; 308 /* early exit : no elt > maxNbBits, so the tree is already valid. */ 309 if (largestBits <= maxNbBits) return largestBits; 310 311 /* there are several too large elements (at least >= 2) */ 312 { int totalCost = 0; 313 const U32 baseCost = 1 << (largestBits - maxNbBits); 314 int n = (int)lastNonNull; 315 316 /* Adjust any ranks > maxNbBits to maxNbBits. 317 * Compute totalCost, which is how far the sum of the ranks is 318 * we are over 2^largestBits after adjust the offending ranks. 319 */ 320 while (huffNode[n].nbBits > maxNbBits) { 321 totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); 322 huffNode[n].nbBits = (BYTE)maxNbBits; 323 n--; 324 } 325 /* n stops at huffNode[n].nbBits <= maxNbBits */ 326 assert(huffNode[n].nbBits <= maxNbBits); 327 /* n end at index of smallest symbol using < maxNbBits */ 328 while (huffNode[n].nbBits == maxNbBits) --n; 329 330 /* renorm totalCost from 2^largestBits to 2^maxNbBits 331 * note : totalCost is necessarily a multiple of baseCost */ 332 assert((totalCost & (baseCost - 1)) == 0); 333 totalCost >>= (largestBits - maxNbBits); 334 assert(totalCost > 0); 335 336 /* repay normalized cost */ 337 { U32 const noSymbol = 0xF0F0F0F0; 338 U32 rankLast[HUF_TABLELOG_MAX+2]; 339 340 /* Get pos of last (smallest = lowest cum. count) symbol per rank */ 341 ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); 342 { U32 currentNbBits = maxNbBits; 343 int pos; 344 for (pos=n ; pos >= 0; pos--) { 345 if (huffNode[pos].nbBits >= currentNbBits) continue; 346 currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ 347 rankLast[maxNbBits-currentNbBits] = (U32)pos; 348 } } 349 350 while (totalCost > 0) { 351 /* Try to reduce the next power of 2 above totalCost because we 352 * gain back half the rank. 353 */ 354 U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; 355 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { 356 U32 const highPos = rankLast[nBitsToDecrease]; 357 U32 const lowPos = rankLast[nBitsToDecrease-1]; 358 if (highPos == noSymbol) continue; 359 /* Decrease highPos if no symbols of lowPos or if it is 360 * not cheaper to remove 2 lowPos than highPos. 361 */ 362 if (lowPos == noSymbol) break; 363 { U32 const highTotal = huffNode[highPos].count; 364 U32 const lowTotal = 2 * huffNode[lowPos].count; 365 if (highTotal <= lowTotal) break; 366 } } 367 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ 368 assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1); 369 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ 370 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) 371 nBitsToDecrease++; 372 assert(rankLast[nBitsToDecrease] != noSymbol); 373 /* Increase the number of bits to gain back half the rank cost. */ 374 totalCost -= 1 << (nBitsToDecrease-1); 375 huffNode[rankLast[nBitsToDecrease]].nbBits++; 376 377 /* Fix up the new rank. 378 * If the new rank was empty, this symbol is now its smallest. 379 * Otherwise, this symbol will be the largest in the new rank so no adjustment. 380 */ 381 if (rankLast[nBitsToDecrease-1] == noSymbol) 382 rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; 383 /* Fix up the old rank. 384 * If the symbol was at position 0, meaning it was the highest weight symbol in the tree, 385 * it must be the only symbol in its rank, so the old rank now has no symbols. 386 * Otherwise, since the Huffman nodes are sorted by count, the previous position is now 387 * the smallest node in the rank. If the previous position belongs to a different rank, 388 * then the rank is now empty. 389 */ 390 if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ 391 rankLast[nBitsToDecrease] = noSymbol; 392 else { 393 rankLast[nBitsToDecrease]--; 394 if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) 395 rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ 396 } 397 } /* while (totalCost > 0) */ 398 399 /* If we've removed too much weight, then we have to add it back. 400 * To avoid overshooting again, we only adjust the smallest rank. 401 * We take the largest nodes from the lowest rank 0 and move them 402 * to rank 1. There's guaranteed to be enough rank 0 symbols because 403 * TODO. 404 */ 405 while (totalCost < 0) { /* Sometimes, cost correction overshoot */ 406 /* special case : no rank 1 symbol (using maxNbBits-1); 407 * let's create one from largest rank 0 (using maxNbBits). 408 */ 409 if (rankLast[1] == noSymbol) { 410 while (huffNode[n].nbBits == maxNbBits) n--; 411 huffNode[n+1].nbBits--; 412 assert(n >= 0); 413 rankLast[1] = (U32)(n+1); 414 totalCost++; 415 continue; 416 } 417 huffNode[ rankLast[1] + 1 ].nbBits--; 418 rankLast[1]++; 419 totalCost ++; 420 } 421 } /* repay normalized cost */ 422 } /* there are several too large elements (at least >= 2) */ 423 424 return maxNbBits; 425 } 426 427 typedef struct { 428 U16 base; 429 U16 curr; 430 } rankPos; 431 432 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; 433 434 /* Number of buckets available for HUF_sort() */ 435 #define RANK_POSITION_TABLE_SIZE 192 436 437 typedef struct { 438 huffNodeTable huffNodeTbl; 439 rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; 440 } HUF_buildCTable_wksp_tables; 441 442 /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing. 443 * Strategy is to use as many buckets as possible for representing distinct 444 * counts while using the remainder to represent all "large" counts. 445 * 446 * To satisfy this requirement for 192 buckets, we can do the following: 447 * Let buckets 0-166 represent distinct counts of [0, 166] 448 * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing. 449 */ 450 #define RANK_POSITION_MAX_COUNT_LOG 32 451 #define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */ 452 #define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */ 453 454 /* Return the appropriate bucket index for a given count. See definition of 455 * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy. 456 */ 457 static U32 HUF_getIndex(U32 const count) { 458 return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF) 459 ? count 460 : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; 461 } 462 463 /* Helper swap function for HUF_quickSortPartition() */ 464 static void HUF_swapNodes(nodeElt* a, nodeElt* b) { 465 nodeElt tmp = *a; 466 *a = *b; 467 *b = tmp; 468 } 469 470 /* Returns 0 if the huffNode array is not sorted by descending count */ 471 MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) { 472 U32 i; 473 for (i = 1; i < maxSymbolValue1; ++i) { 474 if (huffNode[i].count > huffNode[i-1].count) { 475 return 0; 476 } 477 } 478 return 1; 479 } 480 481 /* Insertion sort by descending order */ 482 HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) { 483 int i; 484 int const size = high-low+1; 485 huffNode += low; 486 for (i = 1; i < size; ++i) { 487 nodeElt const key = huffNode[i]; 488 int j = i - 1; 489 while (j >= 0 && huffNode[j].count < key.count) { 490 huffNode[j + 1] = huffNode[j]; 491 j--; 492 } 493 huffNode[j + 1] = key; 494 } 495 } 496 497 /* Pivot helper function for quicksort. */ 498 static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) { 499 /* Simply select rightmost element as pivot. "Better" selectors like 500 * median-of-three don't experimentally appear to have any benefit. 501 */ 502 U32 const pivot = arr[high].count; 503 int i = low - 1; 504 int j = low; 505 for ( ; j < high; j++) { 506 if (arr[j].count > pivot) { 507 i++; 508 HUF_swapNodes(&arr[i], &arr[j]); 509 } 510 } 511 HUF_swapNodes(&arr[i + 1], &arr[high]); 512 return i + 1; 513 } 514 515 /* Classic quicksort by descending with partially iterative calls 516 * to reduce worst case callstack size. 517 */ 518 static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) { 519 int const kInsertionSortThreshold = 8; 520 if (high - low < kInsertionSortThreshold) { 521 HUF_insertionSort(arr, low, high); 522 return; 523 } 524 while (low < high) { 525 int const idx = HUF_quickSortPartition(arr, low, high); 526 if (idx - low < high - idx) { 527 HUF_simpleQuickSort(arr, low, idx - 1); 528 low = idx + 1; 529 } else { 530 HUF_simpleQuickSort(arr, idx + 1, high); 531 high = idx - 1; 532 } 533 } 534 } 535 536 /* 537 * HUF_sort(): 538 * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. 539 * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket. 540 * 541 * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. 542 * Must have (maxSymbolValue + 1) entries. 543 * @param[in] count Histogram of the symbols. 544 * @param[in] maxSymbolValue Maximum symbol value. 545 * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. 546 */ 547 static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) { 548 U32 n; 549 U32 const maxSymbolValue1 = maxSymbolValue+1; 550 551 /* Compute base and set curr to base. 552 * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1. 553 * See HUF_getIndex to see bucketing strategy. 554 * We attribute each symbol to lowerRank's base value, because we want to know where 555 * each rank begins in the output, so for rank R we want to count ranks R+1 and above. 556 */ 557 ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); 558 for (n = 0; n < maxSymbolValue1; ++n) { 559 U32 lowerRank = HUF_getIndex(count[n]); 560 assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1); 561 rankPosition[lowerRank].base++; 562 } 563 564 assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0); 565 /* Set up the rankPosition table */ 566 for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) { 567 rankPosition[n-1].base += rankPosition[n].base; 568 rankPosition[n-1].curr = rankPosition[n-1].base; 569 } 570 571 /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */ 572 for (n = 0; n < maxSymbolValue1; ++n) { 573 U32 const c = count[n]; 574 U32 const r = HUF_getIndex(c) + 1; 575 U32 const pos = rankPosition[r].curr++; 576 assert(pos < maxSymbolValue1); 577 huffNode[pos].count = c; 578 huffNode[pos].byte = (BYTE)n; 579 } 580 581 /* Sort each bucket. */ 582 for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) { 583 U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base; 584 U32 const bucketStartIdx = rankPosition[n].base; 585 if (bucketSize > 1) { 586 assert(bucketStartIdx < maxSymbolValue1); 587 HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1); 588 } 589 } 590 591 assert(HUF_isSorted(huffNode, maxSymbolValue1)); 592 } 593 594 /* HUF_buildCTable_wksp() : 595 * Same as HUF_buildCTable(), but using externally allocated scratch buffer. 596 * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). 597 */ 598 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) 599 600 /* HUF_buildTree(): 601 * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree. 602 * 603 * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array. 604 * @param maxSymbolValue The maximum symbol value. 605 * @return The smallest node in the Huffman tree (by count). 606 */ 607 static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) 608 { 609 nodeElt* const huffNode0 = huffNode - 1; 610 int nonNullRank; 611 int lowS, lowN; 612 int nodeNb = STARTNODE; 613 int n, nodeRoot; 614 /* init for parents */ 615 nonNullRank = (int)maxSymbolValue; 616 while(huffNode[nonNullRank].count == 0) nonNullRank--; 617 lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; 618 huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; 619 huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; 620 nodeNb++; lowS-=2; 621 for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); 622 huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ 623 624 /* create parents */ 625 while (nodeNb <= nodeRoot) { 626 int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 627 int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 628 huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; 629 huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; 630 nodeNb++; 631 } 632 633 /* distribute weights (unlimited tree height) */ 634 huffNode[nodeRoot].nbBits = 0; 635 for (n=nodeRoot-1; n>=STARTNODE; n--) 636 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 637 for (n=0; n<=nonNullRank; n++) 638 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 639 640 return nonNullRank; 641 } 642 643 /* 644 * HUF_buildCTableFromTree(): 645 * Build the CTable given the Huffman tree in huffNode. 646 * 647 * @param[out] CTable The output Huffman CTable. 648 * @param huffNode The Huffman tree. 649 * @param nonNullRank The last and smallest node in the Huffman tree. 650 * @param maxSymbolValue The maximum symbol value. 651 * @param maxNbBits The exact maximum number of bits used in the Huffman tree. 652 */ 653 static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) 654 { 655 HUF_CElt* const ct = CTable + 1; 656 /* fill result into ctable (val, nbBits) */ 657 int n; 658 U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; 659 U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; 660 int const alphabetSize = (int)(maxSymbolValue + 1); 661 for (n=0; n<=nonNullRank; n++) 662 nbPerRank[huffNode[n].nbBits]++; 663 /* determine starting value per rank */ 664 { U16 min = 0; 665 for (n=(int)maxNbBits; n>0; n--) { 666 valPerRank[n] = min; /* get starting value within each rank */ 667 min += nbPerRank[n]; 668 min >>= 1; 669 } } 670 for (n=0; n<alphabetSize; n++) 671 HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */ 672 for (n=0; n<alphabetSize; n++) 673 HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */ 674 CTable[0] = maxNbBits; 675 } 676 677 size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) 678 { 679 HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32)); 680 nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; 681 nodeElt* const huffNode = huffNode0+1; 682 int nonNullRank; 683 684 /* safety checks */ 685 if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) 686 return ERROR(workSpace_tooSmall); 687 if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; 688 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) 689 return ERROR(maxSymbolValue_tooLarge); 690 ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); 691 692 /* sort, decreasing order */ 693 HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); 694 695 /* build tree */ 696 nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); 697 698 /* enforce maxTableLog */ 699 maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); 700 if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ 701 702 HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits); 703 704 return maxNbBits; 705 } 706 707 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) 708 { 709 HUF_CElt const* ct = CTable + 1; 710 size_t nbBits = 0; 711 int s; 712 for (s = 0; s <= (int)maxSymbolValue; ++s) { 713 nbBits += HUF_getNbBits(ct[s]) * count[s]; 714 } 715 return nbBits >> 3; 716 } 717 718 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { 719 HUF_CElt const* ct = CTable + 1; 720 int bad = 0; 721 int s; 722 for (s = 0; s <= (int)maxSymbolValue; ++s) { 723 bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0); 724 } 725 return !bad; 726 } 727 728 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } 729 730 /* HUF_CStream_t: 731 * Huffman uses its own BIT_CStream_t implementation. 732 * There are three major differences from BIT_CStream_t: 733 * 1. HUF_addBits() takes a HUF_CElt (size_t) which is 734 * the pair (nbBits, value) in the format: 735 * format: 736 * - Bits [0, 4) = nbBits 737 * - Bits [4, 64 - nbBits) = 0 738 * - Bits [64 - nbBits, 64) = value 739 * 2. The bitContainer is built from the upper bits and 740 * right shifted. E.g. to add a new value of N bits 741 * you right shift the bitContainer by N, then or in 742 * the new value into the N upper bits. 743 * 3. The bitstream has two bit containers. You can add 744 * bits to the second container and merge them into 745 * the first container. 746 */ 747 748 #define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8) 749 750 typedef struct { 751 size_t bitContainer[2]; 752 size_t bitPos[2]; 753 754 BYTE* startPtr; 755 BYTE* ptr; 756 BYTE* endPtr; 757 } HUF_CStream_t; 758 759 /*! HUF_initCStream(): 760 * Initializes the bitstream. 761 * @returns 0 or an error code. 762 */ 763 static size_t HUF_initCStream(HUF_CStream_t* bitC, 764 void* startPtr, size_t dstCapacity) 765 { 766 ZSTD_memset(bitC, 0, sizeof(*bitC)); 767 bitC->startPtr = (BYTE*)startPtr; 768 bitC->ptr = bitC->startPtr; 769 bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]); 770 if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall); 771 return 0; 772 } 773 774 /*! HUF_addBits(): 775 * Adds the symbol stored in HUF_CElt elt to the bitstream. 776 * 777 * @param elt The element we're adding. This is a (nbBits, value) pair. 778 * See the HUF_CStream_t docs for the format. 779 * @param idx Insert into the bitstream at this idx. 780 * @param kFast This is a template parameter. If the bitstream is guaranteed 781 * to have at least 4 unused bits after this call it may be 1, 782 * otherwise it must be 0. HUF_addBits() is faster when fast is set. 783 */ 784 FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast) 785 { 786 assert(idx <= 1); 787 assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX); 788 /* This is efficient on x86-64 with BMI2 because shrx 789 * only reads the low 6 bits of the register. The compiler 790 * knows this and elides the mask. When fast is set, 791 * every operation can use the same value loaded from elt. 792 */ 793 bitC->bitContainer[idx] >>= HUF_getNbBits(elt); 794 bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt); 795 /* We only read the low 8 bits of bitC->bitPos[idx] so it 796 * doesn't matter that the high bits have noise from the value. 797 */ 798 bitC->bitPos[idx] += HUF_getNbBitsFast(elt); 799 assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER); 800 /* The last 4-bits of elt are dirty if fast is set, 801 * so we must not be overwriting bits that have already been 802 * inserted into the bit container. 803 */ 804 #if DEBUGLEVEL >= 1 805 { 806 size_t const nbBits = HUF_getNbBits(elt); 807 size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1; 808 (void)dirtyBits; 809 /* Middle bits are 0. */ 810 assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0); 811 /* We didn't overwrite any bits in the bit container. */ 812 assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER); 813 (void)dirtyBits; 814 } 815 #endif 816 } 817 818 FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC) 819 { 820 bitC->bitContainer[1] = 0; 821 bitC->bitPos[1] = 0; 822 } 823 824 /*! HUF_mergeIndex1() : 825 * Merges the bit container @ index 1 into the bit container @ index 0 826 * and zeros the bit container @ index 1. 827 */ 828 FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC) 829 { 830 assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER); 831 bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF); 832 bitC->bitContainer[0] |= bitC->bitContainer[1]; 833 bitC->bitPos[0] += bitC->bitPos[1]; 834 assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER); 835 } 836 837 /*! HUF_flushBits() : 838 * Flushes the bits in the bit container @ index 0. 839 * 840 * @post bitPos will be < 8. 841 * @param kFast If kFast is set then we must know a-priori that 842 * the bit container will not overflow. 843 */ 844 FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast) 845 { 846 /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */ 847 size_t const nbBits = bitC->bitPos[0] & 0xFF; 848 size_t const nbBytes = nbBits >> 3; 849 /* The top nbBits bits of bitContainer are the ones we need. */ 850 size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits); 851 /* Mask bitPos to account for the bytes we consumed. */ 852 bitC->bitPos[0] &= 7; 853 assert(nbBits > 0); 854 assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8); 855 assert(bitC->ptr <= bitC->endPtr); 856 MEM_writeLEST(bitC->ptr, bitContainer); 857 bitC->ptr += nbBytes; 858 assert(!kFast || bitC->ptr <= bitC->endPtr); 859 if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; 860 /* bitContainer doesn't need to be modified because the leftover 861 * bits are already the top bitPos bits. And we don't care about 862 * noise in the lower values. 863 */ 864 } 865 866 /*! HUF_endMark() 867 * @returns The Huffman stream end mark: A 1-bit value = 1. 868 */ 869 static HUF_CElt HUF_endMark(void) 870 { 871 HUF_CElt endMark; 872 HUF_setNbBits(&endMark, 1); 873 HUF_setValue(&endMark, 1); 874 return endMark; 875 } 876 877 /*! HUF_closeCStream() : 878 * @return Size of CStream, in bytes, 879 * or 0 if it could not fit into dstBuffer */ 880 static size_t HUF_closeCStream(HUF_CStream_t* bitC) 881 { 882 HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0); 883 HUF_flushBits(bitC, /* kFast */ 0); 884 { 885 size_t const nbBits = bitC->bitPos[0] & 0xFF; 886 if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ 887 return (bitC->ptr - bitC->startPtr) + (nbBits > 0); 888 } 889 } 890 891 FORCE_INLINE_TEMPLATE void 892 HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast) 893 { 894 HUF_addBits(bitCPtr, CTable[symbol], idx, fast); 895 } 896 897 FORCE_INLINE_TEMPLATE void 898 HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC, 899 const BYTE* ip, size_t srcSize, 900 const HUF_CElt* ct, 901 int kUnroll, int kFastFlush, int kLastFast) 902 { 903 /* Join to kUnroll */ 904 int n = (int)srcSize; 905 int rem = n % kUnroll; 906 if (rem > 0) { 907 for (; rem > 0; --rem) { 908 HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0); 909 } 910 HUF_flushBits(bitC, kFastFlush); 911 } 912 assert(n % kUnroll == 0); 913 914 /* Join to 2 * kUnroll */ 915 if (n % (2 * kUnroll)) { 916 int u; 917 for (u = 1; u < kUnroll; ++u) { 918 HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1); 919 } 920 HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast); 921 HUF_flushBits(bitC, kFastFlush); 922 n -= kUnroll; 923 } 924 assert(n % (2 * kUnroll) == 0); 925 926 for (; n>0; n-= 2 * kUnroll) { 927 /* Encode kUnroll symbols into the bitstream @ index 0. */ 928 int u; 929 for (u = 1; u < kUnroll; ++u) { 930 HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1); 931 } 932 HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast); 933 HUF_flushBits(bitC, kFastFlush); 934 /* Encode kUnroll symbols into the bitstream @ index 1. 935 * This allows us to start filling the bit container 936 * without any data dependencies. 937 */ 938 HUF_zeroIndex1(bitC); 939 for (u = 1; u < kUnroll; ++u) { 940 HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1); 941 } 942 HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast); 943 /* Merge bitstream @ index 1 into the bitstream @ index 0 */ 944 HUF_mergeIndex1(bitC); 945 HUF_flushBits(bitC, kFastFlush); 946 } 947 assert(n == 0); 948 949 } 950 951 /* 952 * Returns a tight upper bound on the output space needed by Huffman 953 * with 8 bytes buffer to handle over-writes. If the output is at least 954 * this large we don't need to do bounds checks during Huffman encoding. 955 */ 956 static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog) 957 { 958 return ((srcSize * tableLog) >> 3) + 8; 959 } 960 961 962 FORCE_INLINE_TEMPLATE size_t 963 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, 964 const void* src, size_t srcSize, 965 const HUF_CElt* CTable) 966 { 967 U32 const tableLog = (U32)CTable[0]; 968 HUF_CElt const* ct = CTable + 1; 969 const BYTE* ip = (const BYTE*) src; 970 BYTE* const ostart = (BYTE*)dst; 971 BYTE* const oend = ostart + dstSize; 972 BYTE* op = ostart; 973 HUF_CStream_t bitC; 974 975 /* init */ 976 if (dstSize < 8) return 0; /* not enough space to compress */ 977 { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op)); 978 if (HUF_isError(initErr)) return 0; } 979 980 if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11) 981 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0); 982 else { 983 if (MEM_32bits()) { 984 switch (tableLog) { 985 case 11: 986 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0); 987 break; 988 case 10: ZSTD_FALLTHROUGH; 989 case 9: ZSTD_FALLTHROUGH; 990 case 8: 991 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1); 992 break; 993 case 7: ZSTD_FALLTHROUGH; 994 default: 995 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1); 996 break; 997 } 998 } else { 999 switch (tableLog) { 1000 case 11: 1001 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0); 1002 break; 1003 case 10: 1004 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1); 1005 break; 1006 case 9: 1007 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0); 1008 break; 1009 case 8: 1010 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0); 1011 break; 1012 case 7: 1013 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0); 1014 break; 1015 case 6: ZSTD_FALLTHROUGH; 1016 default: 1017 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1); 1018 break; 1019 } 1020 } 1021 } 1022 assert(bitC.ptr <= bitC.endPtr); 1023 1024 return HUF_closeCStream(&bitC); 1025 } 1026 1027 #if DYNAMIC_BMI2 1028 1029 static BMI2_TARGET_ATTRIBUTE size_t 1030 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, 1031 const void* src, size_t srcSize, 1032 const HUF_CElt* CTable) 1033 { 1034 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 1035 } 1036 1037 static size_t 1038 HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, 1039 const void* src, size_t srcSize, 1040 const HUF_CElt* CTable) 1041 { 1042 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 1043 } 1044 1045 static size_t 1046 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 1047 const void* src, size_t srcSize, 1048 const HUF_CElt* CTable, const int bmi2) 1049 { 1050 if (bmi2) { 1051 return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); 1052 } 1053 return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); 1054 } 1055 1056 #else 1057 1058 static size_t 1059 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 1060 const void* src, size_t srcSize, 1061 const HUF_CElt* CTable, const int bmi2) 1062 { 1063 (void)bmi2; 1064 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 1065 } 1066 1067 #endif 1068 1069 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 1070 { 1071 return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 1072 } 1073 1074 size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) 1075 { 1076 return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); 1077 } 1078 1079 static size_t 1080 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, 1081 const void* src, size_t srcSize, 1082 const HUF_CElt* CTable, int bmi2) 1083 { 1084 size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ 1085 const BYTE* ip = (const BYTE*) src; 1086 const BYTE* const iend = ip + srcSize; 1087 BYTE* const ostart = (BYTE*) dst; 1088 BYTE* const oend = ostart + dstSize; 1089 BYTE* op = ostart; 1090 1091 if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ 1092 if (srcSize < 12) return 0; /* no saving possible : too small input */ 1093 op += 6; /* jumpTable */ 1094 1095 assert(op <= oend); 1096 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); 1097 if (cSize == 0 || cSize > 65535) return 0; 1098 MEM_writeLE16(ostart, (U16)cSize); 1099 op += cSize; 1100 } 1101 1102 ip += segmentSize; 1103 assert(op <= oend); 1104 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); 1105 if (cSize == 0 || cSize > 65535) return 0; 1106 MEM_writeLE16(ostart+2, (U16)cSize); 1107 op += cSize; 1108 } 1109 1110 ip += segmentSize; 1111 assert(op <= oend); 1112 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); 1113 if (cSize == 0 || cSize > 65535) return 0; 1114 MEM_writeLE16(ostart+4, (U16)cSize); 1115 op += cSize; 1116 } 1117 1118 ip += segmentSize; 1119 assert(op <= oend); 1120 assert(ip <= iend); 1121 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); 1122 if (cSize == 0 || cSize > 65535) return 0; 1123 op += cSize; 1124 } 1125 1126 return (size_t)(op-ostart); 1127 } 1128 1129 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 1130 { 1131 return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 1132 } 1133 1134 size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) 1135 { 1136 return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); 1137 } 1138 1139 typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; 1140 1141 static size_t HUF_compressCTable_internal( 1142 BYTE* const ostart, BYTE* op, BYTE* const oend, 1143 const void* src, size_t srcSize, 1144 HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) 1145 { 1146 size_t const cSize = (nbStreams==HUF_singleStream) ? 1147 HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : 1148 HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); 1149 if (HUF_isError(cSize)) { return cSize; } 1150 if (cSize==0) { return 0; } /* uncompressible */ 1151 op += cSize; 1152 /* check compressibility */ 1153 assert(op >= ostart); 1154 if ((size_t)(op-ostart) >= srcSize-1) { return 0; } 1155 return (size_t)(op-ostart); 1156 } 1157 1158 typedef struct { 1159 unsigned count[HUF_SYMBOLVALUE_MAX + 1]; 1160 HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)]; 1161 union { 1162 HUF_buildCTable_wksp_tables buildCTable_wksp; 1163 HUF_WriteCTableWksp writeCTable_wksp; 1164 U32 hist_wksp[HIST_WKSP_SIZE_U32]; 1165 } wksps; 1166 } HUF_compress_tables_t; 1167 1168 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096 1169 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */ 1170 1171 /* HUF_compress_internal() : 1172 * `workSpace_align4` must be aligned on 4-bytes boundaries, 1173 * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ 1174 static size_t 1175 HUF_compress_internal (void* dst, size_t dstSize, 1176 const void* src, size_t srcSize, 1177 unsigned maxSymbolValue, unsigned huffLog, 1178 HUF_nbStreams_e nbStreams, 1179 void* workSpace, size_t wkspSize, 1180 HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, 1181 const int bmi2, unsigned suspectUncompressible) 1182 { 1183 HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); 1184 BYTE* const ostart = (BYTE*)dst; 1185 BYTE* const oend = ostart + dstSize; 1186 BYTE* op = ostart; 1187 1188 HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); 1189 1190 /* checks & inits */ 1191 if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); 1192 if (!srcSize) return 0; /* Uncompressed */ 1193 if (!dstSize) return 0; /* cannot fit anything within dst budget */ 1194 if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ 1195 if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 1196 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 1197 if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; 1198 if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; 1199 1200 /* Heuristic : If old table is valid, use it for small inputs */ 1201 if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { 1202 return HUF_compressCTable_internal(ostart, op, oend, 1203 src, srcSize, 1204 nbStreams, oldHufTable, bmi2); 1205 } 1206 1207 /* If uncompressible data is suspected, do a smaller sampling first */ 1208 DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2); 1209 if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { 1210 size_t largestTotal = 0; 1211 { unsigned maxSymbolValueBegin = maxSymbolValue; 1212 CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); 1213 largestTotal += largestBegin; 1214 } 1215 { unsigned maxSymbolValueEnd = maxSymbolValue; 1216 CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); 1217 largestTotal += largestEnd; 1218 } 1219 if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */ 1220 } 1221 1222 /* Scan input and build symbol stats */ 1223 { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) ); 1224 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ 1225 if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ 1226 } 1227 1228 /* Check validity of previous table */ 1229 if ( repeat 1230 && *repeat == HUF_repeat_check 1231 && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { 1232 *repeat = HUF_repeat_none; 1233 } 1234 /* Heuristic : use existing table for small inputs */ 1235 if (preferRepeat && repeat && *repeat != HUF_repeat_none) { 1236 return HUF_compressCTable_internal(ostart, op, oend, 1237 src, srcSize, 1238 nbStreams, oldHufTable, bmi2); 1239 } 1240 1241 /* Build Huffman Tree */ 1242 huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); 1243 { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, 1244 maxSymbolValue, huffLog, 1245 &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); 1246 CHECK_F(maxBits); 1247 huffLog = (U32)maxBits; 1248 } 1249 /* Zero unused symbols in CTable, so we can check it for validity */ 1250 { 1251 size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue); 1252 size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt); 1253 ZSTD_memset(table->CTable + ctableSize, 0, unusedSize); 1254 } 1255 1256 /* Write table description header */ 1257 { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, 1258 &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) ); 1259 /* Check if using previous huffman table is beneficial */ 1260 if (repeat && *repeat != HUF_repeat_none) { 1261 size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); 1262 size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); 1263 if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { 1264 return HUF_compressCTable_internal(ostart, op, oend, 1265 src, srcSize, 1266 nbStreams, oldHufTable, bmi2); 1267 } } 1268 1269 /* Use the new huffman table */ 1270 if (hSize + 12ul >= srcSize) { return 0; } 1271 op += hSize; 1272 if (repeat) { *repeat = HUF_repeat_none; } 1273 if (oldHufTable) 1274 ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ 1275 } 1276 return HUF_compressCTable_internal(ostart, op, oend, 1277 src, srcSize, 1278 nbStreams, table->CTable, bmi2); 1279 } 1280 1281 1282 size_t HUF_compress1X_wksp (void* dst, size_t dstSize, 1283 const void* src, size_t srcSize, 1284 unsigned maxSymbolValue, unsigned huffLog, 1285 void* workSpace, size_t wkspSize) 1286 { 1287 return HUF_compress_internal(dst, dstSize, src, srcSize, 1288 maxSymbolValue, huffLog, HUF_singleStream, 1289 workSpace, wkspSize, 1290 NULL, NULL, 0, 0 /*bmi2*/, 0); 1291 } 1292 1293 size_t HUF_compress1X_repeat (void* dst, size_t dstSize, 1294 const void* src, size_t srcSize, 1295 unsigned maxSymbolValue, unsigned huffLog, 1296 void* workSpace, size_t wkspSize, 1297 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, 1298 int bmi2, unsigned suspectUncompressible) 1299 { 1300 return HUF_compress_internal(dst, dstSize, src, srcSize, 1301 maxSymbolValue, huffLog, HUF_singleStream, 1302 workSpace, wkspSize, hufTable, 1303 repeat, preferRepeat, bmi2, suspectUncompressible); 1304 } 1305 1306 /* HUF_compress4X_repeat(): 1307 * compress input using 4 streams. 1308 * provide workspace to generate compression tables */ 1309 size_t HUF_compress4X_wksp (void* dst, size_t dstSize, 1310 const void* src, size_t srcSize, 1311 unsigned maxSymbolValue, unsigned huffLog, 1312 void* workSpace, size_t wkspSize) 1313 { 1314 return HUF_compress_internal(dst, dstSize, src, srcSize, 1315 maxSymbolValue, huffLog, HUF_fourStreams, 1316 workSpace, wkspSize, 1317 NULL, NULL, 0, 0 /*bmi2*/, 0); 1318 } 1319 1320 /* HUF_compress4X_repeat(): 1321 * compress input using 4 streams. 1322 * consider skipping quickly 1323 * re-use an existing huffman compression table */ 1324 size_t HUF_compress4X_repeat (void* dst, size_t dstSize, 1325 const void* src, size_t srcSize, 1326 unsigned maxSymbolValue, unsigned huffLog, 1327 void* workSpace, size_t wkspSize, 1328 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible) 1329 { 1330 return HUF_compress_internal(dst, dstSize, src, srcSize, 1331 maxSymbolValue, huffLog, HUF_fourStreams, 1332 workSpace, wkspSize, 1333 hufTable, repeat, preferRepeat, bmi2, suspectUncompressible); 1334 } 1335 1336