xref: /linux/lib/zstd/compress/huf_compress.c (revision e61f33273ca755b3e2ebee4520a76097199dc7a8)
1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
2 /* ******************************************************************
3  * Huffman encoder, part of New Generation Entropy library
4  * Copyright (c) Meta Platforms, Inc. and affiliates.
5  *
6  *  You can contact the author at :
7  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
9  *
10  * This source code is licensed under both the BSD-style license (found in the
11  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12  * in the COPYING file in the root directory of this source tree).
13  * You may select, at your option, one of the above-listed licenses.
14 ****************************************************************** */
15 
16 /* **************************************************************
17 *  Compiler specifics
18 ****************************************************************/
19 
20 
21 /* **************************************************************
22 *  Includes
23 ****************************************************************/
24 #include "../common/zstd_deps.h"     /* ZSTD_memcpy, ZSTD_memset */
25 #include "../common/compiler.h"
26 #include "../common/bitstream.h"
27 #include "hist.h"
28 #define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
29 #include "../common/fse.h"        /* header compression */
30 #include "../common/huf.h"
31 #include "../common/error_private.h"
32 #include "../common/bits.h"       /* ZSTD_highbit32 */
33 
34 
35 /* **************************************************************
36 *  Error Management
37 ****************************************************************/
38 #define HUF_isError ERR_isError
39 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
40 
41 
42 /* **************************************************************
43 *  Required declarations
44 ****************************************************************/
45 typedef struct nodeElt_s {
46     U32 count;
47     U16 parent;
48     BYTE byte;
49     BYTE nbBits;
50 } nodeElt;
51 
52 
53 /* **************************************************************
54 *  Debug Traces
55 ****************************************************************/
56 
57 #if DEBUGLEVEL >= 2
58 
showU32(const U32 * arr,size_t size)59 static size_t showU32(const U32* arr, size_t size)
60 {
61     size_t u;
62     for (u=0; u<size; u++) {
63         RAWLOG(6, " %u", arr[u]); (void)arr;
64     }
65     RAWLOG(6, " \n");
66     return size;
67 }
68 
69 static size_t HUF_getNbBits(HUF_CElt elt);
70 
showCTableBits(const HUF_CElt * ctable,size_t size)71 static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
72 {
73     size_t u;
74     for (u=0; u<size; u++) {
75         RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
76     }
77     RAWLOG(6, " \n");
78     return size;
79 
80 }
81 
showHNodeSymbols(const nodeElt * hnode,size_t size)82 static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
83 {
84     size_t u;
85     for (u=0; u<size; u++) {
86         RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
87     }
88     RAWLOG(6, " \n");
89     return size;
90 }
91 
showHNodeBits(const nodeElt * hnode,size_t size)92 static size_t showHNodeBits(const nodeElt* hnode, size_t size)
93 {
94     size_t u;
95     for (u=0; u<size; u++) {
96         RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
97     }
98     RAWLOG(6, " \n");
99     return size;
100 }
101 
102 #endif
103 
104 
105 /* *******************************************************
106 *  HUF : Huffman block compression
107 *********************************************************/
108 #define HUF_WORKSPACE_MAX_ALIGNMENT 8
109 
HUF_alignUpWorkspace(void * workspace,size_t * workspaceSizePtr,size_t align)110 static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
111 {
112     size_t const mask = align - 1;
113     size_t const rem = (size_t)workspace & mask;
114     size_t const add = (align - rem) & mask;
115     BYTE* const aligned = (BYTE*)workspace + add;
116     assert((align & (align - 1)) == 0); /* pow 2 */
117     assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
118     if (*workspaceSizePtr >= add) {
119         assert(add < align);
120         assert(((size_t)aligned & mask) == 0);
121         *workspaceSizePtr -= add;
122         return aligned;
123     } else {
124         *workspaceSizePtr = 0;
125         return NULL;
126     }
127 }
128 
129 
130 /* HUF_compressWeights() :
131  * Same as FSE_compress(), but dedicated to huff0's weights compression.
132  * The use case needs much less stack memory.
133  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
134  */
135 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
136 
137 typedef struct {
138     FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
139     U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
140     unsigned count[HUF_TABLELOG_MAX+1];
141     S16 norm[HUF_TABLELOG_MAX+1];
142 } HUF_CompressWeightsWksp;
143 
144 static size_t
HUF_compressWeights(void * dst,size_t dstSize,const void * weightTable,size_t wtSize,void * workspace,size_t workspaceSize)145 HUF_compressWeights(void* dst, size_t dstSize,
146               const void* weightTable, size_t wtSize,
147                     void* workspace, size_t workspaceSize)
148 {
149     BYTE* const ostart = (BYTE*) dst;
150     BYTE* op = ostart;
151     BYTE* const oend = ostart + dstSize;
152 
153     unsigned maxSymbolValue = HUF_TABLELOG_MAX;
154     U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
155     HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
156 
157     if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
158 
159     /* init conditions */
160     if (wtSize <= 1) return 0;  /* Not compressible */
161 
162     /* Scan input and build symbol stats */
163     {   unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
164         if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
165         if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
166     }
167 
168     tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
169     CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
170 
171     /* Write table description header */
172     {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
173         op += hSize;
174     }
175 
176     /* Compress */
177     CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
178     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
179         if (cSize == 0) return 0;   /* not enough space for compressed data */
180         op += cSize;
181     }
182 
183     return (size_t)(op-ostart);
184 }
185 
HUF_getNbBits(HUF_CElt elt)186 static size_t HUF_getNbBits(HUF_CElt elt)
187 {
188     return elt & 0xFF;
189 }
190 
HUF_getNbBitsFast(HUF_CElt elt)191 static size_t HUF_getNbBitsFast(HUF_CElt elt)
192 {
193     return elt;
194 }
195 
HUF_getValue(HUF_CElt elt)196 static size_t HUF_getValue(HUF_CElt elt)
197 {
198     return elt & ~(size_t)0xFF;
199 }
200 
HUF_getValueFast(HUF_CElt elt)201 static size_t HUF_getValueFast(HUF_CElt elt)
202 {
203     return elt;
204 }
205 
HUF_setNbBits(HUF_CElt * elt,size_t nbBits)206 static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
207 {
208     assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
209     *elt = nbBits;
210 }
211 
HUF_setValue(HUF_CElt * elt,size_t value)212 static void HUF_setValue(HUF_CElt* elt, size_t value)
213 {
214     size_t const nbBits = HUF_getNbBits(*elt);
215     if (nbBits > 0) {
216         assert((value >> nbBits) == 0);
217         *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
218     }
219 }
220 
HUF_readCTableHeader(HUF_CElt const * ctable)221 HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
222 {
223     HUF_CTableHeader header;
224     ZSTD_memcpy(&header, ctable, sizeof(header));
225     return header;
226 }
227 
HUF_writeCTableHeader(HUF_CElt * ctable,U32 tableLog,U32 maxSymbolValue)228 static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
229 {
230     HUF_CTableHeader header;
231     HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));
232     ZSTD_memset(&header, 0, sizeof(header));
233     assert(tableLog < 256);
234     header.tableLog = (BYTE)tableLog;
235     assert(maxSymbolValue < 256);
236     header.maxSymbolValue = (BYTE)maxSymbolValue;
237     ZSTD_memcpy(ctable, &header, sizeof(header));
238 }
239 
240 typedef struct {
241     HUF_CompressWeightsWksp wksp;
242     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
243     BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
244 } HUF_WriteCTableWksp;
245 
HUF_writeCTable_wksp(void * dst,size_t maxDstSize,const HUF_CElt * CTable,unsigned maxSymbolValue,unsigned huffLog,void * workspace,size_t workspaceSize)246 size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
247                             const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
248                             void* workspace, size_t workspaceSize)
249 {
250     HUF_CElt const* const ct = CTable + 1;
251     BYTE* op = (BYTE*)dst;
252     U32 n;
253     HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
254 
255     HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
256 
257     assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);
258     assert(HUF_readCTableHeader(CTable).tableLog == huffLog);
259 
260     /* check conditions */
261     if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
262     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
263 
264     /* convert to weight */
265     wksp->bitsToWeight[0] = 0;
266     for (n=1; n<huffLog+1; n++)
267         wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
268     for (n=0; n<maxSymbolValue; n++)
269         wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
270 
271     /* attempt weights compression by FSE */
272     if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
273     {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
274         if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
275             op[0] = (BYTE)hSize;
276             return hSize+1;
277     }   }
278 
279     /* write raw values as 4-bits (max : 15) */
280     if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
281     if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
282     op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
283     wksp->huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
284     for (n=0; n<maxSymbolValue; n+=2)
285         op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
286     return ((maxSymbolValue+1)/2) + 1;
287 }
288 
289 
HUF_readCTable(HUF_CElt * CTable,unsigned * maxSymbolValuePtr,const void * src,size_t srcSize,unsigned * hasZeroWeights)290 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
291 {
292     BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
293     U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
294     U32 tableLog = 0;
295     U32 nbSymbols = 0;
296     HUF_CElt* const ct = CTable + 1;
297 
298     /* get symbol weights */
299     CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
300     *hasZeroWeights = (rankVal[0] > 0);
301 
302     /* check result */
303     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
304     if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
305 
306     *maxSymbolValuePtr = nbSymbols - 1;
307 
308     HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);
309 
310     /* Prepare base value per rank */
311     {   U32 n, nextRankStart = 0;
312         for (n=1; n<=tableLog; n++) {
313             U32 curr = nextRankStart;
314             nextRankStart += (rankVal[n] << (n-1));
315             rankVal[n] = curr;
316     }   }
317 
318     /* fill nbBits */
319     {   U32 n; for (n=0; n<nbSymbols; n++) {
320             const U32 w = huffWeight[n];
321             HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
322     }   }
323 
324     /* fill val */
325     {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
326         U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
327         { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
328         /* determine stating value per rank */
329         valPerRank[tableLog+1] = 0;   /* for w==0 */
330         {   U16 min = 0;
331             U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
332                 valPerRank[n] = min;     /* get starting value within each rank */
333                 min += nbPerRank[n];
334                 min >>= 1;
335         }   }
336         /* assign value within rank, symbol order */
337         { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
338     }
339 
340     return readSize;
341 }
342 
HUF_getNbBitsFromCTable(HUF_CElt const * CTable,U32 symbolValue)343 U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
344 {
345     const HUF_CElt* const ct = CTable + 1;
346     assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
347     if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)
348         return 0;
349     return (U32)HUF_getNbBits(ct[symbolValue]);
350 }
351 
352 
353 /*
354  * HUF_setMaxHeight():
355  * Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
356  *
357  * It attempts to convert all nodes with nbBits > @targetNbBits
358  * to employ @targetNbBits instead. Then it adjusts the tree
359  * so that it remains a valid canonical Huffman tree.
360  *
361  * @pre               The sum of the ranks of each symbol == 2^largestBits,
362  *                    where largestBits == huffNode[lastNonNull].nbBits.
363  * @post              The sum of the ranks of each symbol == 2^largestBits,
364  *                    where largestBits is the return value (expected <= targetNbBits).
365  *
366  * @param huffNode    The Huffman tree modified in place to enforce targetNbBits.
367  *                    It's presumed sorted, from most frequent to rarest symbol.
368  * @param lastNonNull The symbol with the lowest count in the Huffman tree.
369  * @param targetNbBits  The allowed number of bits, which the Huffman tree
370  *                    may not respect. After this function the Huffman tree will
371  *                    respect targetNbBits.
372  * @return            The maximum number of bits of the Huffman tree after adjustment.
373  */
HUF_setMaxHeight(nodeElt * huffNode,U32 lastNonNull,U32 targetNbBits)374 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
375 {
376     const U32 largestBits = huffNode[lastNonNull].nbBits;
377     /* early exit : no elt > targetNbBits, so the tree is already valid. */
378     if (largestBits <= targetNbBits) return largestBits;
379 
380     DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
381 
382     /* there are several too large elements (at least >= 2) */
383     {   int totalCost = 0;
384         const U32 baseCost = 1 << (largestBits - targetNbBits);
385         int n = (int)lastNonNull;
386 
387         /* Adjust any ranks > targetNbBits to targetNbBits.
388          * Compute totalCost, which is how far the sum of the ranks is
389          * we are over 2^largestBits after adjust the offending ranks.
390          */
391         while (huffNode[n].nbBits > targetNbBits) {
392             totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
393             huffNode[n].nbBits = (BYTE)targetNbBits;
394             n--;
395         }
396         /* n stops at huffNode[n].nbBits <= targetNbBits */
397         assert(huffNode[n].nbBits <= targetNbBits);
398         /* n end at index of smallest symbol using < targetNbBits */
399         while (huffNode[n].nbBits == targetNbBits) --n;
400 
401         /* renorm totalCost from 2^largestBits to 2^targetNbBits
402          * note : totalCost is necessarily a multiple of baseCost */
403         assert(((U32)totalCost & (baseCost - 1)) == 0);
404         totalCost >>= (largestBits - targetNbBits);
405         assert(totalCost > 0);
406 
407         /* repay normalized cost */
408         {   U32 const noSymbol = 0xF0F0F0F0;
409             U32 rankLast[HUF_TABLELOG_MAX+2];
410 
411             /* Get pos of last (smallest = lowest cum. count) symbol per rank */
412             ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
413             {   U32 currentNbBits = targetNbBits;
414                 int pos;
415                 for (pos=n ; pos >= 0; pos--) {
416                     if (huffNode[pos].nbBits >= currentNbBits) continue;
417                     currentNbBits = huffNode[pos].nbBits;   /* < targetNbBits */
418                     rankLast[targetNbBits-currentNbBits] = (U32)pos;
419             }   }
420 
421             while (totalCost > 0) {
422                 /* Try to reduce the next power of 2 above totalCost because we
423                  * gain back half the rank.
424                  */
425                 U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
426                 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
427                     U32 const highPos = rankLast[nBitsToDecrease];
428                     U32 const lowPos = rankLast[nBitsToDecrease-1];
429                     if (highPos == noSymbol) continue;
430                     /* Decrease highPos if no symbols of lowPos or if it is
431                      * not cheaper to remove 2 lowPos than highPos.
432                      */
433                     if (lowPos == noSymbol) break;
434                     {   U32 const highTotal = huffNode[highPos].count;
435                         U32 const lowTotal = 2 * huffNode[lowPos].count;
436                         if (highTotal <= lowTotal) break;
437                 }   }
438                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
439                 assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
440                 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
441                 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
442                     nBitsToDecrease++;
443                 assert(rankLast[nBitsToDecrease] != noSymbol);
444                 /* Increase the number of bits to gain back half the rank cost. */
445                 totalCost -= 1 << (nBitsToDecrease-1);
446                 huffNode[rankLast[nBitsToDecrease]].nbBits++;
447 
448                 /* Fix up the new rank.
449                  * If the new rank was empty, this symbol is now its smallest.
450                  * Otherwise, this symbol will be the largest in the new rank so no adjustment.
451                  */
452                 if (rankLast[nBitsToDecrease-1] == noSymbol)
453                     rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
454                 /* Fix up the old rank.
455                  * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
456                  * it must be the only symbol in its rank, so the old rank now has no symbols.
457                  * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
458                  * the smallest node in the rank. If the previous position belongs to a different rank,
459                  * then the rank is now empty.
460                  */
461                 if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
462                     rankLast[nBitsToDecrease] = noSymbol;
463                 else {
464                     rankLast[nBitsToDecrease]--;
465                     if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
466                         rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
467                 }
468             }   /* while (totalCost > 0) */
469 
470             /* If we've removed too much weight, then we have to add it back.
471              * To avoid overshooting again, we only adjust the smallest rank.
472              * We take the largest nodes from the lowest rank 0 and move them
473              * to rank 1. There's guaranteed to be enough rank 0 symbols because
474              * TODO.
475              */
476             while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
477                 /* special case : no rank 1 symbol (using targetNbBits-1);
478                  * let's create one from largest rank 0 (using targetNbBits).
479                  */
480                 if (rankLast[1] == noSymbol) {
481                     while (huffNode[n].nbBits == targetNbBits) n--;
482                     huffNode[n+1].nbBits--;
483                     assert(n >= 0);
484                     rankLast[1] = (U32)(n+1);
485                     totalCost++;
486                     continue;
487                 }
488                 huffNode[ rankLast[1] + 1 ].nbBits--;
489                 rankLast[1]++;
490                 totalCost ++;
491             }
492         }   /* repay normalized cost */
493     }   /* there are several too large elements (at least >= 2) */
494 
495     return targetNbBits;
496 }
497 
498 typedef struct {
499     U16 base;
500     U16 curr;
501 } rankPos;
502 
503 typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
504 
505 /* Number of buckets available for HUF_sort() */
506 #define RANK_POSITION_TABLE_SIZE 192
507 
508 typedef struct {
509   huffNodeTable huffNodeTbl;
510   rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
511 } HUF_buildCTable_wksp_tables;
512 
513 /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
514  * Strategy is to use as many buckets as possible for representing distinct
515  * counts while using the remainder to represent all "large" counts.
516  *
517  * To satisfy this requirement for 192 buckets, we can do the following:
518  * Let buckets 0-166 represent distinct counts of [0, 166]
519  * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
520  */
521 #define RANK_POSITION_MAX_COUNT_LOG 32
522 #define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
523 #define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
524 
525 /* Return the appropriate bucket index for a given count. See definition of
526  * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
527  */
HUF_getIndex(U32 const count)528 static U32 HUF_getIndex(U32 const count) {
529     return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
530         ? count
531         : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
532 }
533 
534 /* Helper swap function for HUF_quickSortPartition() */
HUF_swapNodes(nodeElt * a,nodeElt * b)535 static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
536 	nodeElt tmp = *a;
537 	*a = *b;
538 	*b = tmp;
539 }
540 
541 /* Returns 0 if the huffNode array is not sorted by descending count */
HUF_isSorted(nodeElt huffNode[],U32 const maxSymbolValue1)542 MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
543     U32 i;
544     for (i = 1; i < maxSymbolValue1; ++i) {
545         if (huffNode[i].count > huffNode[i-1].count) {
546             return 0;
547         }
548     }
549     return 1;
550 }
551 
552 /* Insertion sort by descending order */
HUF_insertionSort(nodeElt huffNode[],int const low,int const high)553 HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
554     int i;
555     int const size = high-low+1;
556     huffNode += low;
557     for (i = 1; i < size; ++i) {
558         nodeElt const key = huffNode[i];
559         int j = i - 1;
560         while (j >= 0 && huffNode[j].count < key.count) {
561             huffNode[j + 1] = huffNode[j];
562             j--;
563         }
564         huffNode[j + 1] = key;
565     }
566 }
567 
568 /* Pivot helper function for quicksort. */
HUF_quickSortPartition(nodeElt arr[],int const low,int const high)569 static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
570     /* Simply select rightmost element as pivot. "Better" selectors like
571      * median-of-three don't experimentally appear to have any benefit.
572      */
573     U32 const pivot = arr[high].count;
574     int i = low - 1;
575     int j = low;
576     for ( ; j < high; j++) {
577         if (arr[j].count > pivot) {
578             i++;
579             HUF_swapNodes(&arr[i], &arr[j]);
580         }
581     }
582     HUF_swapNodes(&arr[i + 1], &arr[high]);
583     return i + 1;
584 }
585 
586 /* Classic quicksort by descending with partially iterative calls
587  * to reduce worst case callstack size.
588  */
HUF_simpleQuickSort(nodeElt arr[],int low,int high)589 static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
590     int const kInsertionSortThreshold = 8;
591     if (high - low < kInsertionSortThreshold) {
592         HUF_insertionSort(arr, low, high);
593         return;
594     }
595     while (low < high) {
596         int const idx = HUF_quickSortPartition(arr, low, high);
597         if (idx - low < high - idx) {
598             HUF_simpleQuickSort(arr, low, idx - 1);
599             low = idx + 1;
600         } else {
601             HUF_simpleQuickSort(arr, idx + 1, high);
602             high = idx - 1;
603         }
604     }
605 }
606 
607 /*
608  * HUF_sort():
609  * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
610  * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
611  *
612  * @param[out] huffNode       Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
613  *                            Must have (maxSymbolValue + 1) entries.
614  * @param[in]  count          Histogram of the symbols.
615  * @param[in]  maxSymbolValue Maximum symbol value.
616  * @param      rankPosition   This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
617  */
HUF_sort(nodeElt huffNode[],const unsigned count[],U32 const maxSymbolValue,rankPos rankPosition[])618 static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
619     U32 n;
620     U32 const maxSymbolValue1 = maxSymbolValue+1;
621 
622     /* Compute base and set curr to base.
623      * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
624      * See HUF_getIndex to see bucketing strategy.
625      * We attribute each symbol to lowerRank's base value, because we want to know where
626      * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
627      */
628     ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
629     for (n = 0; n < maxSymbolValue1; ++n) {
630         U32 lowerRank = HUF_getIndex(count[n]);
631         assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
632         rankPosition[lowerRank].base++;
633     }
634 
635     assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
636     /* Set up the rankPosition table */
637     for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
638         rankPosition[n-1].base += rankPosition[n].base;
639         rankPosition[n-1].curr = rankPosition[n-1].base;
640     }
641 
642     /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
643     for (n = 0; n < maxSymbolValue1; ++n) {
644         U32 const c = count[n];
645         U32 const r = HUF_getIndex(c) + 1;
646         U32 const pos = rankPosition[r].curr++;
647         assert(pos < maxSymbolValue1);
648         huffNode[pos].count = c;
649         huffNode[pos].byte  = (BYTE)n;
650     }
651 
652     /* Sort each bucket. */
653     for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
654         int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
655         U32 const bucketStartIdx = rankPosition[n].base;
656         if (bucketSize > 1) {
657             assert(bucketStartIdx < maxSymbolValue1);
658             HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
659         }
660     }
661 
662     assert(HUF_isSorted(huffNode, maxSymbolValue1));
663 }
664 
665 
666 /* HUF_buildCTable_wksp() :
667  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
668  *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
669  */
670 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
671 
672 /* HUF_buildTree():
673  * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
674  *
675  * @param huffNode        The array sorted by HUF_sort(). Builds the Huffman tree in this array.
676  * @param maxSymbolValue  The maximum symbol value.
677  * @return                The smallest node in the Huffman tree (by count).
678  */
HUF_buildTree(nodeElt * huffNode,U32 maxSymbolValue)679 static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
680 {
681     nodeElt* const huffNode0 = huffNode - 1;
682     int nonNullRank;
683     int lowS, lowN;
684     int nodeNb = STARTNODE;
685     int n, nodeRoot;
686     DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
687     /* init for parents */
688     nonNullRank = (int)maxSymbolValue;
689     while(huffNode[nonNullRank].count == 0) nonNullRank--;
690     lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
691     huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
692     huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
693     nodeNb++; lowS-=2;
694     for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
695     huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
696 
697     /* create parents */
698     while (nodeNb <= nodeRoot) {
699         int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
700         int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
701         huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
702         huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
703         nodeNb++;
704     }
705 
706     /* distribute weights (unlimited tree height) */
707     huffNode[nodeRoot].nbBits = 0;
708     for (n=nodeRoot-1; n>=STARTNODE; n--)
709         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
710     for (n=0; n<=nonNullRank; n++)
711         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
712 
713     DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
714 
715     return nonNullRank;
716 }
717 
718 /*
719  * HUF_buildCTableFromTree():
720  * Build the CTable given the Huffman tree in huffNode.
721  *
722  * @param[out] CTable         The output Huffman CTable.
723  * @param      huffNode       The Huffman tree.
724  * @param      nonNullRank    The last and smallest node in the Huffman tree.
725  * @param      maxSymbolValue The maximum symbol value.
726  * @param      maxNbBits      The exact maximum number of bits used in the Huffman tree.
727  */
HUF_buildCTableFromTree(HUF_CElt * CTable,nodeElt const * huffNode,int nonNullRank,U32 maxSymbolValue,U32 maxNbBits)728 static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
729 {
730     HUF_CElt* const ct = CTable + 1;
731     /* fill result into ctable (val, nbBits) */
732     int n;
733     U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
734     U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
735     int const alphabetSize = (int)(maxSymbolValue + 1);
736     for (n=0; n<=nonNullRank; n++)
737         nbPerRank[huffNode[n].nbBits]++;
738     /* determine starting value per rank */
739     {   U16 min = 0;
740         for (n=(int)maxNbBits; n>0; n--) {
741             valPerRank[n] = min;      /* get starting value within each rank */
742             min += nbPerRank[n];
743             min >>= 1;
744     }   }
745     for (n=0; n<alphabetSize; n++)
746         HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits);   /* push nbBits per symbol, symbol order */
747     for (n=0; n<alphabetSize; n++)
748         HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++);   /* assign value within rank, symbol order */
749 
750     HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);
751 }
752 
753 size_t
HUF_buildCTable_wksp(HUF_CElt * CTable,const unsigned * count,U32 maxSymbolValue,U32 maxNbBits,void * workSpace,size_t wkspSize)754 HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
755                      void* workSpace, size_t wkspSize)
756 {
757     HUF_buildCTable_wksp_tables* const wksp_tables =
758         (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
759     nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
760     nodeElt* const huffNode = huffNode0+1;
761     int nonNullRank;
762 
763     HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
764 
765     DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
766 
767     /* safety checks */
768     if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
769         return ERROR(workSpace_tooSmall);
770     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
771     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
772         return ERROR(maxSymbolValue_tooLarge);
773     ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
774 
775     /* sort, decreasing order */
776     HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
777     DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
778 
779     /* build tree */
780     nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
781 
782     /* determine and enforce maxTableLog */
783     maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
784     if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
785 
786     HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
787 
788     return maxNbBits;
789 }
790 
HUF_estimateCompressedSize(const HUF_CElt * CTable,const unsigned * count,unsigned maxSymbolValue)791 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
792 {
793     HUF_CElt const* ct = CTable + 1;
794     size_t nbBits = 0;
795     int s;
796     for (s = 0; s <= (int)maxSymbolValue; ++s) {
797         nbBits += HUF_getNbBits(ct[s]) * count[s];
798     }
799     return nbBits >> 3;
800 }
801 
HUF_validateCTable(const HUF_CElt * CTable,const unsigned * count,unsigned maxSymbolValue)802 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
803     HUF_CTableHeader header = HUF_readCTableHeader(CTable);
804     HUF_CElt const* ct = CTable + 1;
805     int bad = 0;
806     int s;
807 
808     assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);
809 
810     if (header.maxSymbolValue < maxSymbolValue)
811         return 0;
812 
813     for (s = 0; s <= (int)maxSymbolValue; ++s) {
814         bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
815     }
816     return !bad;
817 }
818 
HUF_compressBound(size_t size)819 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
820 
821 /* HUF_CStream_t:
822  * Huffman uses its own BIT_CStream_t implementation.
823  * There are three major differences from BIT_CStream_t:
824  *   1. HUF_addBits() takes a HUF_CElt (size_t) which is
825  *      the pair (nbBits, value) in the format:
826  *      format:
827  *        - Bits [0, 4)            = nbBits
828  *        - Bits [4, 64 - nbBits)  = 0
829  *        - Bits [64 - nbBits, 64) = value
830  *   2. The bitContainer is built from the upper bits and
831  *      right shifted. E.g. to add a new value of N bits
832  *      you right shift the bitContainer by N, then or in
833  *      the new value into the N upper bits.
834  *   3. The bitstream has two bit containers. You can add
835  *      bits to the second container and merge them into
836  *      the first container.
837  */
838 
839 #define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
840 
841 typedef struct {
842     size_t bitContainer[2];
843     size_t bitPos[2];
844 
845     BYTE* startPtr;
846     BYTE* ptr;
847     BYTE* endPtr;
848 } HUF_CStream_t;
849 
850 /*! HUF_initCStream():
851  * Initializes the bitstream.
852  * @returns 0 or an error code.
853  */
HUF_initCStream(HUF_CStream_t * bitC,void * startPtr,size_t dstCapacity)854 static size_t HUF_initCStream(HUF_CStream_t* bitC,
855                                   void* startPtr, size_t dstCapacity)
856 {
857     ZSTD_memset(bitC, 0, sizeof(*bitC));
858     bitC->startPtr = (BYTE*)startPtr;
859     bitC->ptr = bitC->startPtr;
860     bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
861     if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
862     return 0;
863 }
864 
865 /*! HUF_addBits():
866  * Adds the symbol stored in HUF_CElt elt to the bitstream.
867  *
868  * @param elt   The element we're adding. This is a (nbBits, value) pair.
869  *              See the HUF_CStream_t docs for the format.
870  * @param idx   Insert into the bitstream at this idx.
871  * @param kFast This is a template parameter. If the bitstream is guaranteed
872  *              to have at least 4 unused bits after this call it may be 1,
873  *              otherwise it must be 0. HUF_addBits() is faster when fast is set.
874  */
HUF_addBits(HUF_CStream_t * bitC,HUF_CElt elt,int idx,int kFast)875 FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
876 {
877     assert(idx <= 1);
878     assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
879     /* This is efficient on x86-64 with BMI2 because shrx
880      * only reads the low 6 bits of the register. The compiler
881      * knows this and elides the mask. When fast is set,
882      * every operation can use the same value loaded from elt.
883      */
884     bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
885     bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
886     /* We only read the low 8 bits of bitC->bitPos[idx] so it
887      * doesn't matter that the high bits have noise from the value.
888      */
889     bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
890     assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
891     /* The last 4-bits of elt are dirty if fast is set,
892      * so we must not be overwriting bits that have already been
893      * inserted into the bit container.
894      */
895 #if DEBUGLEVEL >= 1
896     {
897         size_t const nbBits = HUF_getNbBits(elt);
898         size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
899         (void)dirtyBits;
900         /* Middle bits are 0. */
901         assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
902         /* We didn't overwrite any bits in the bit container. */
903         assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
904         (void)dirtyBits;
905     }
906 #endif
907 }
908 
HUF_zeroIndex1(HUF_CStream_t * bitC)909 FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
910 {
911     bitC->bitContainer[1] = 0;
912     bitC->bitPos[1] = 0;
913 }
914 
915 /*! HUF_mergeIndex1() :
916  * Merges the bit container @ index 1 into the bit container @ index 0
917  * and zeros the bit container @ index 1.
918  */
HUF_mergeIndex1(HUF_CStream_t * bitC)919 FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
920 {
921     assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
922     bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
923     bitC->bitContainer[0] |= bitC->bitContainer[1];
924     bitC->bitPos[0] += bitC->bitPos[1];
925     assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
926 }
927 
928 /*! HUF_flushBits() :
929 * Flushes the bits in the bit container @ index 0.
930 *
931 * @post bitPos will be < 8.
932 * @param kFast If kFast is set then we must know a-priori that
933 *              the bit container will not overflow.
934 */
HUF_flushBits(HUF_CStream_t * bitC,int kFast)935 FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
936 {
937     /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
938     size_t const nbBits = bitC->bitPos[0] & 0xFF;
939     size_t const nbBytes = nbBits >> 3;
940     /* The top nbBits bits of bitContainer are the ones we need. */
941     size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
942     /* Mask bitPos to account for the bytes we consumed. */
943     bitC->bitPos[0] &= 7;
944     assert(nbBits > 0);
945     assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
946     assert(bitC->ptr <= bitC->endPtr);
947     MEM_writeLEST(bitC->ptr, bitContainer);
948     bitC->ptr += nbBytes;
949     assert(!kFast || bitC->ptr <= bitC->endPtr);
950     if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
951     /* bitContainer doesn't need to be modified because the leftover
952      * bits are already the top bitPos bits. And we don't care about
953      * noise in the lower values.
954      */
955 }
956 
957 /*! HUF_endMark()
958  * @returns The Huffman stream end mark: A 1-bit value = 1.
959  */
HUF_endMark(void)960 static HUF_CElt HUF_endMark(void)
961 {
962     HUF_CElt endMark;
963     HUF_setNbBits(&endMark, 1);
964     HUF_setValue(&endMark, 1);
965     return endMark;
966 }
967 
968 /*! HUF_closeCStream() :
969  *  @return Size of CStream, in bytes,
970  *          or 0 if it could not fit into dstBuffer */
HUF_closeCStream(HUF_CStream_t * bitC)971 static size_t HUF_closeCStream(HUF_CStream_t* bitC)
972 {
973     HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
974     HUF_flushBits(bitC, /* kFast */ 0);
975     {
976         size_t const nbBits = bitC->bitPos[0] & 0xFF;
977         if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
978         return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
979     }
980 }
981 
982 FORCE_INLINE_TEMPLATE void
HUF_encodeSymbol(HUF_CStream_t * bitCPtr,U32 symbol,const HUF_CElt * CTable,int idx,int fast)983 HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
984 {
985     HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
986 }
987 
988 FORCE_INLINE_TEMPLATE void
HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t * bitC,const BYTE * ip,size_t srcSize,const HUF_CElt * ct,int kUnroll,int kFastFlush,int kLastFast)989 HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
990                                    const BYTE* ip, size_t srcSize,
991                                    const HUF_CElt* ct,
992                                    int kUnroll, int kFastFlush, int kLastFast)
993 {
994     /* Join to kUnroll */
995     int n = (int)srcSize;
996     int rem = n % kUnroll;
997     if (rem > 0) {
998         for (; rem > 0; --rem) {
999             HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
1000         }
1001         HUF_flushBits(bitC, kFastFlush);
1002     }
1003     assert(n % kUnroll == 0);
1004 
1005     /* Join to 2 * kUnroll */
1006     if (n % (2 * kUnroll)) {
1007         int u;
1008         for (u = 1; u < kUnroll; ++u) {
1009             HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
1010         }
1011         HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
1012         HUF_flushBits(bitC, kFastFlush);
1013         n -= kUnroll;
1014     }
1015     assert(n % (2 * kUnroll) == 0);
1016 
1017     for (; n>0; n-= 2 * kUnroll) {
1018         /* Encode kUnroll symbols into the bitstream @ index 0. */
1019         int u;
1020         for (u = 1; u < kUnroll; ++u) {
1021             HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
1022         }
1023         HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
1024         HUF_flushBits(bitC, kFastFlush);
1025         /* Encode kUnroll symbols into the bitstream @ index 1.
1026          * This allows us to start filling the bit container
1027          * without any data dependencies.
1028          */
1029         HUF_zeroIndex1(bitC);
1030         for (u = 1; u < kUnroll; ++u) {
1031             HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
1032         }
1033         HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
1034         /* Merge bitstream @ index 1 into the bitstream @ index 0 */
1035         HUF_mergeIndex1(bitC);
1036         HUF_flushBits(bitC, kFastFlush);
1037     }
1038     assert(n == 0);
1039 
1040 }
1041 
1042 /*
1043  * Returns a tight upper bound on the output space needed by Huffman
1044  * with 8 bytes buffer to handle over-writes. If the output is at least
1045  * this large we don't need to do bounds checks during Huffman encoding.
1046  */
HUF_tightCompressBound(size_t srcSize,size_t tableLog)1047 static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
1048 {
1049     return ((srcSize * tableLog) >> 3) + 8;
1050 }
1051 
1052 
1053 FORCE_INLINE_TEMPLATE size_t
HUF_compress1X_usingCTable_internal_body(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable)1054 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
1055                                    const void* src, size_t srcSize,
1056                                    const HUF_CElt* CTable)
1057 {
1058     U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;
1059     HUF_CElt const* ct = CTable + 1;
1060     const BYTE* ip = (const BYTE*) src;
1061     BYTE* const ostart = (BYTE*)dst;
1062     BYTE* const oend = ostart + dstSize;
1063     HUF_CStream_t bitC;
1064 
1065     /* init */
1066     if (dstSize < 8) return 0;   /* not enough space to compress */
1067     { BYTE* op = ostart;
1068       size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
1069       if (HUF_isError(initErr)) return 0; }
1070 
1071     if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
1072         HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
1073     else {
1074         if (MEM_32bits()) {
1075             switch (tableLog) {
1076             case 11:
1077                 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
1078                 break;
1079             case 10: ZSTD_FALLTHROUGH;
1080             case 9: ZSTD_FALLTHROUGH;
1081             case 8:
1082                 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
1083                 break;
1084             case 7: ZSTD_FALLTHROUGH;
1085             default:
1086                 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
1087                 break;
1088             }
1089         } else {
1090             switch (tableLog) {
1091             case 11:
1092                 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
1093                 break;
1094             case 10:
1095                 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
1096                 break;
1097             case 9:
1098                 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
1099                 break;
1100             case 8:
1101                 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
1102                 break;
1103             case 7:
1104                 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
1105                 break;
1106             case 6: ZSTD_FALLTHROUGH;
1107             default:
1108                 HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
1109                 break;
1110             }
1111         }
1112     }
1113     assert(bitC.ptr <= bitC.endPtr);
1114 
1115     return HUF_closeCStream(&bitC);
1116 }
1117 
1118 #if DYNAMIC_BMI2
1119 
1120 static BMI2_TARGET_ATTRIBUTE size_t
HUF_compress1X_usingCTable_internal_bmi2(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable)1121 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
1122                                    const void* src, size_t srcSize,
1123                                    const HUF_CElt* CTable)
1124 {
1125     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
1126 }
1127 
1128 static size_t
HUF_compress1X_usingCTable_internal_default(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable)1129 HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
1130                                       const void* src, size_t srcSize,
1131                                       const HUF_CElt* CTable)
1132 {
1133     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
1134 }
1135 
1136 static size_t
HUF_compress1X_usingCTable_internal(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable,const int flags)1137 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
1138                               const void* src, size_t srcSize,
1139                               const HUF_CElt* CTable, const int flags)
1140 {
1141     if (flags & HUF_flags_bmi2) {
1142         return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
1143     }
1144     return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
1145 }
1146 
1147 #else
1148 
1149 static size_t
HUF_compress1X_usingCTable_internal(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable,const int flags)1150 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
1151                               const void* src, size_t srcSize,
1152                               const HUF_CElt* CTable, const int flags)
1153 {
1154     (void)flags;
1155     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
1156 }
1157 
1158 #endif
1159 
HUF_compress1X_usingCTable(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable,int flags)1160 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
1161 {
1162     return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
1163 }
1164 
1165 static size_t
HUF_compress4X_usingCTable_internal(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable,int flags)1166 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
1167                               const void* src, size_t srcSize,
1168                               const HUF_CElt* CTable, int flags)
1169 {
1170     size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
1171     const BYTE* ip = (const BYTE*) src;
1172     const BYTE* const iend = ip + srcSize;
1173     BYTE* const ostart = (BYTE*) dst;
1174     BYTE* const oend = ostart + dstSize;
1175     BYTE* op = ostart;
1176 
1177     if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
1178     if (srcSize < 12) return 0;   /* no saving possible : too small input */
1179     op += 6;   /* jumpTable */
1180 
1181     assert(op <= oend);
1182     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1183         if (cSize == 0 || cSize > 65535) return 0;
1184         MEM_writeLE16(ostart, (U16)cSize);
1185         op += cSize;
1186     }
1187 
1188     ip += segmentSize;
1189     assert(op <= oend);
1190     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1191         if (cSize == 0 || cSize > 65535) return 0;
1192         MEM_writeLE16(ostart+2, (U16)cSize);
1193         op += cSize;
1194     }
1195 
1196     ip += segmentSize;
1197     assert(op <= oend);
1198     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1199         if (cSize == 0 || cSize > 65535) return 0;
1200         MEM_writeLE16(ostart+4, (U16)cSize);
1201         op += cSize;
1202     }
1203 
1204     ip += segmentSize;
1205     assert(op <= oend);
1206     assert(ip <= iend);
1207     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
1208         if (cSize == 0 || cSize > 65535) return 0;
1209         op += cSize;
1210     }
1211 
1212     return (size_t)(op-ostart);
1213 }
1214 
HUF_compress4X_usingCTable(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable,int flags)1215 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
1216 {
1217     return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
1218 }
1219 
1220 typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
1221 
HUF_compressCTable_internal(BYTE * const ostart,BYTE * op,BYTE * const oend,const void * src,size_t srcSize,HUF_nbStreams_e nbStreams,const HUF_CElt * CTable,const int flags)1222 static size_t HUF_compressCTable_internal(
1223                 BYTE* const ostart, BYTE* op, BYTE* const oend,
1224                 const void* src, size_t srcSize,
1225                 HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
1226 {
1227     size_t const cSize = (nbStreams==HUF_singleStream) ?
1228                          HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
1229                          HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
1230     if (HUF_isError(cSize)) { return cSize; }
1231     if (cSize==0) { return 0; }   /* uncompressible */
1232     op += cSize;
1233     /* check compressibility */
1234     assert(op >= ostart);
1235     if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
1236     return (size_t)(op-ostart);
1237 }
1238 
1239 typedef struct {
1240     unsigned count[HUF_SYMBOLVALUE_MAX + 1];
1241     HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
1242     union {
1243         HUF_buildCTable_wksp_tables buildCTable_wksp;
1244         HUF_WriteCTableWksp writeCTable_wksp;
1245         U32 hist_wksp[HIST_WKSP_SIZE_U32];
1246     } wksps;
1247 } HUF_compress_tables_t;
1248 
1249 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
1250 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10  /* Must be >= 2 */
1251 
HUF_cardinality(const unsigned * count,unsigned maxSymbolValue)1252 unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
1253 {
1254     unsigned cardinality = 0;
1255     unsigned i;
1256 
1257     for (i = 0; i < maxSymbolValue + 1; i++) {
1258         if (count[i] != 0) cardinality += 1;
1259     }
1260 
1261     return cardinality;
1262 }
1263 
HUF_minTableLog(unsigned symbolCardinality)1264 unsigned HUF_minTableLog(unsigned symbolCardinality)
1265 {
1266     U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
1267     return minBitsSymbols;
1268 }
1269 
HUF_optimalTableLog(unsigned maxTableLog,size_t srcSize,unsigned maxSymbolValue,void * workSpace,size_t wkspSize,HUF_CElt * table,const unsigned * count,int flags)1270 unsigned HUF_optimalTableLog(
1271             unsigned maxTableLog,
1272             size_t srcSize,
1273             unsigned maxSymbolValue,
1274             void* workSpace, size_t wkspSize,
1275             HUF_CElt* table,
1276       const unsigned* count,
1277             int flags)
1278 {
1279     assert(srcSize > 1); /* Not supported, RLE should be used instead */
1280     assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
1281 
1282     if (!(flags & HUF_flags_optimalDepth)) {
1283         /* cheap evaluation, based on FSE */
1284         return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
1285     }
1286 
1287     {   BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
1288         size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
1289         size_t hSize, newSize;
1290         const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
1291         const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
1292         size_t optSize = ((size_t) ~0) - 1;
1293         unsigned optLog = maxTableLog, optLogGuess;
1294 
1295         DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
1296 
1297         /* Search until size increases */
1298         for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
1299             DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
1300 
1301             {   size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
1302                 if (ERR_isError(maxBits)) continue;
1303 
1304                 if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
1305 
1306                 hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
1307             }
1308 
1309             if (ERR_isError(hSize)) continue;
1310 
1311             newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
1312 
1313             if (newSize > optSize + 1) {
1314                 break;
1315             }
1316 
1317             if (newSize < optSize) {
1318                 optSize = newSize;
1319                 optLog = optLogGuess;
1320             }
1321         }
1322         assert(optLog <= HUF_TABLELOG_MAX);
1323         return optLog;
1324     }
1325 }
1326 
1327 /* HUF_compress_internal() :
1328  * `workSpace_align4` must be aligned on 4-bytes boundaries,
1329  * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
1330 static size_t
HUF_compress_internal(void * dst,size_t dstSize,const void * src,size_t srcSize,unsigned maxSymbolValue,unsigned huffLog,HUF_nbStreams_e nbStreams,void * workSpace,size_t wkspSize,HUF_CElt * oldHufTable,HUF_repeat * repeat,int flags)1331 HUF_compress_internal (void* dst, size_t dstSize,
1332                  const void* src, size_t srcSize,
1333                        unsigned maxSymbolValue, unsigned huffLog,
1334                        HUF_nbStreams_e nbStreams,
1335                        void* workSpace, size_t wkspSize,
1336                        HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
1337 {
1338     HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
1339     BYTE* const ostart = (BYTE*)dst;
1340     BYTE* const oend = ostart + dstSize;
1341     BYTE* op = ostart;
1342 
1343     DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
1344     HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
1345 
1346     /* checks & inits */
1347     if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
1348     if (!srcSize) return 0;  /* Uncompressed */
1349     if (!dstSize) return 0;  /* cannot fit anything within dst budget */
1350     if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
1351     if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
1352     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
1353     if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
1354     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
1355 
1356     /* Heuristic : If old table is valid, use it for small inputs */
1357     if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
1358         return HUF_compressCTable_internal(ostart, op, oend,
1359                                            src, srcSize,
1360                                            nbStreams, oldHufTable, flags);
1361     }
1362 
1363     /* If uncompressible data is suspected, do a smaller sampling first */
1364     DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
1365     if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
1366         size_t largestTotal = 0;
1367         DEBUGLOG(5, "input suspected incompressible : sampling to check");
1368         {   unsigned maxSymbolValueBegin = maxSymbolValue;
1369             CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1370             largestTotal += largestBegin;
1371         }
1372         {   unsigned maxSymbolValueEnd = maxSymbolValue;
1373             CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1374             largestTotal += largestEnd;
1375         }
1376         if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
1377     }
1378 
1379     /* Scan input and build symbol stats */
1380     {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
1381         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
1382         if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
1383     }
1384     DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
1385 
1386     /* Check validity of previous table */
1387     if ( repeat
1388       && *repeat == HUF_repeat_check
1389       && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
1390         *repeat = HUF_repeat_none;
1391     }
1392     /* Heuristic : use existing table for small inputs */
1393     if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
1394         return HUF_compressCTable_internal(ostart, op, oend,
1395                                            src, srcSize,
1396                                            nbStreams, oldHufTable, flags);
1397     }
1398 
1399     /* Build Huffman Tree */
1400     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
1401     {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
1402                                             maxSymbolValue, huffLog,
1403                                             &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
1404         CHECK_F(maxBits);
1405         huffLog = (U32)maxBits;
1406         DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
1407     }
1408 
1409     /* Write table description header */
1410     {   CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
1411                                               &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
1412         /* Check if using previous huffman table is beneficial */
1413         if (repeat && *repeat != HUF_repeat_none) {
1414             size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
1415             size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
1416             if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
1417                 return HUF_compressCTable_internal(ostart, op, oend,
1418                                                    src, srcSize,
1419                                                    nbStreams, oldHufTable, flags);
1420         }   }
1421 
1422         /* Use the new huffman table */
1423         if (hSize + 12ul >= srcSize) { return 0; }
1424         op += hSize;
1425         if (repeat) { *repeat = HUF_repeat_none; }
1426         if (oldHufTable)
1427             ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
1428     }
1429     return HUF_compressCTable_internal(ostart, op, oend,
1430                                        src, srcSize,
1431                                        nbStreams, table->CTable, flags);
1432 }
1433 
HUF_compress1X_repeat(void * dst,size_t dstSize,const void * src,size_t srcSize,unsigned maxSymbolValue,unsigned huffLog,void * workSpace,size_t wkspSize,HUF_CElt * hufTable,HUF_repeat * repeat,int flags)1434 size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
1435                       const void* src, size_t srcSize,
1436                       unsigned maxSymbolValue, unsigned huffLog,
1437                       void* workSpace, size_t wkspSize,
1438                       HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
1439 {
1440     DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
1441     return HUF_compress_internal(dst, dstSize, src, srcSize,
1442                                  maxSymbolValue, huffLog, HUF_singleStream,
1443                                  workSpace, wkspSize, hufTable,
1444                                  repeat, flags);
1445 }
1446 
1447 /* HUF_compress4X_repeat():
1448  * compress input using 4 streams.
1449  * consider skipping quickly
1450  * reuse an existing huffman compression table */
HUF_compress4X_repeat(void * dst,size_t dstSize,const void * src,size_t srcSize,unsigned maxSymbolValue,unsigned huffLog,void * workSpace,size_t wkspSize,HUF_CElt * hufTable,HUF_repeat * repeat,int flags)1451 size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
1452                       const void* src, size_t srcSize,
1453                       unsigned maxSymbolValue, unsigned huffLog,
1454                       void* workSpace, size_t wkspSize,
1455                       HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
1456 {
1457     DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
1458     return HUF_compress_internal(dst, dstSize, src, srcSize,
1459                                  maxSymbolValue, huffLog, HUF_fourStreams,
1460                                  workSpace, wkspSize,
1461                                  hufTable, repeat, flags);
1462 }
1463