xref: /freebsd/sys/contrib/zstd/lib/compress/huf_compress.c (revision e9b1dc32c9bd2ebae5f9e140bfa0e0321bc366b5)
1 /* ******************************************************************
2    Huffman encoder, part of New Generation Entropy library
3    Copyright (C) 2013-2016, Yann Collet.
4 
5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 
7    Redistribution and use in source and binary forms, with or without
8    modification, are permitted provided that the following conditions are
9    met:
10 
11        * Redistributions of source code must retain the above copyright
12    notice, this list of conditions and the following disclaimer.
13        * Redistributions in binary form must reproduce the above
14    copyright notice, this list of conditions and the following disclaimer
15    in the documentation and/or other materials provided with the
16    distribution.
17 
18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30     You can contact the author at :
31     - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32     - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */
34 
35 /* **************************************************************
36 *  Compiler specifics
37 ****************************************************************/
38 #ifdef _MSC_VER    /* Visual Studio */
39 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
40 #endif
41 
42 
43 /* **************************************************************
44 *  Includes
45 ****************************************************************/
46 #include <string.h>     /* memcpy, memset */
47 #include <stdio.h>      /* printf (debug) */
48 #include "compiler.h"
49 #include "bitstream.h"
50 #include "hist.h"
51 #define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
52 #include "fse.h"        /* header compression */
53 #define HUF_STATIC_LINKING_ONLY
54 #include "huf.h"
55 #include "error_private.h"
56 
57 
58 /* **************************************************************
59 *  Error Management
60 ****************************************************************/
61 #define HUF_isError ERR_isError
62 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
63 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
64 #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
65 
66 
67 /* **************************************************************
68 *  Utils
69 ****************************************************************/
70 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
71 {
72     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
73 }
74 
75 
76 /* *******************************************************
77 *  HUF : Huffman block compression
78 *********************************************************/
79 /* HUF_compressWeights() :
80  * Same as FSE_compress(), but dedicated to huff0's weights compression.
81  * The use case needs much less stack memory.
82  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
83  */
84 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
85 static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
86 {
87     BYTE* const ostart = (BYTE*) dst;
88     BYTE* op = ostart;
89     BYTE* const oend = ostart + dstSize;
90 
91     U32 maxSymbolValue = HUF_TABLELOG_MAX;
92     U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
93 
94     FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
95     BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
96 
97     U32 count[HUF_TABLELOG_MAX+1];
98     S16 norm[HUF_TABLELOG_MAX+1];
99 
100     /* init conditions */
101     if (wtSize <= 1) return 0;  /* Not compressible */
102 
103     /* Scan input and build symbol stats */
104     {   unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
105         if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
106         if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
107     }
108 
109     tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
110     CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
111 
112     /* Write table description header */
113     {   CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
114         op += hSize;
115     }
116 
117     /* Compress */
118     CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
119     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
120         if (cSize == 0) return 0;   /* not enough space for compressed data */
121         op += cSize;
122     }
123 
124     return op-ostart;
125 }
126 
127 
128 struct HUF_CElt_s {
129   U16  val;
130   BYTE nbBits;
131 };   /* typedef'd to HUF_CElt within "huf.h" */
132 
133 /*! HUF_writeCTable() :
134     `CTable` : Huffman tree to save, using huf representation.
135     @return : size of saved CTable */
136 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
137                         const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
138 {
139     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
140     BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
141     BYTE* op = (BYTE*)dst;
142     U32 n;
143 
144      /* check conditions */
145     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
146 
147     /* convert to weight */
148     bitsToWeight[0] = 0;
149     for (n=1; n<huffLog+1; n++)
150         bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
151     for (n=0; n<maxSymbolValue; n++)
152         huffWeight[n] = bitsToWeight[CTable[n].nbBits];
153 
154     /* attempt weights compression by FSE */
155     {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
156         if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
157             op[0] = (BYTE)hSize;
158             return hSize+1;
159     }   }
160 
161     /* write raw values as 4-bits (max : 15) */
162     if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
163     if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
164     op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
165     huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
166     for (n=0; n<maxSymbolValue; n+=2)
167         op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
168     return ((maxSymbolValue+1)/2) + 1;
169 }
170 
171 
172 size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src, size_t srcSize)
173 {
174     BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
175     U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
176     U32 tableLog = 0;
177     U32 nbSymbols = 0;
178 
179     /* get symbol weights */
180     CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
181 
182     /* check result */
183     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
184     if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
185 
186     /* Prepare base value per rank */
187     {   U32 n, nextRankStart = 0;
188         for (n=1; n<=tableLog; n++) {
189             U32 current = nextRankStart;
190             nextRankStart += (rankVal[n] << (n-1));
191             rankVal[n] = current;
192     }   }
193 
194     /* fill nbBits */
195     {   U32 n; for (n=0; n<nbSymbols; n++) {
196             const U32 w = huffWeight[n];
197             CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
198     }   }
199 
200     /* fill val */
201     {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
202         U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
203         { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
204         /* determine stating value per rank */
205         valPerRank[tableLog+1] = 0;   /* for w==0 */
206         {   U16 min = 0;
207             U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
208                 valPerRank[n] = min;     /* get starting value within each rank */
209                 min += nbPerRank[n];
210                 min >>= 1;
211         }   }
212         /* assign value within rank, symbol order */
213         { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
214     }
215 
216     *maxSymbolValuePtr = nbSymbols - 1;
217     return readSize;
218 }
219 
220 U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
221 {
222     const HUF_CElt* table = (const HUF_CElt*)symbolTable;
223     assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
224     return table[symbolValue].nbBits;
225 }
226 
227 
228 typedef struct nodeElt_s {
229     U32 count;
230     U16 parent;
231     BYTE byte;
232     BYTE nbBits;
233 } nodeElt;
234 
235 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
236 {
237     const U32 largestBits = huffNode[lastNonNull].nbBits;
238     if (largestBits <= maxNbBits) return largestBits;   /* early exit : no elt > maxNbBits */
239 
240     /* there are several too large elements (at least >= 2) */
241     {   int totalCost = 0;
242         const U32 baseCost = 1 << (largestBits - maxNbBits);
243         U32 n = lastNonNull;
244 
245         while (huffNode[n].nbBits > maxNbBits) {
246             totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
247             huffNode[n].nbBits = (BYTE)maxNbBits;
248             n --;
249         }  /* n stops at huffNode[n].nbBits <= maxNbBits */
250         while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using < maxNbBits */
251 
252         /* renorm totalCost */
253         totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
254 
255         /* repay normalized cost */
256         {   U32 const noSymbol = 0xF0F0F0F0;
257             U32 rankLast[HUF_TABLELOG_MAX+2];
258             int pos;
259 
260             /* Get pos of last (smallest) symbol per rank */
261             memset(rankLast, 0xF0, sizeof(rankLast));
262             {   U32 currentNbBits = maxNbBits;
263                 for (pos=n ; pos >= 0; pos--) {
264                     if (huffNode[pos].nbBits >= currentNbBits) continue;
265                     currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
266                     rankLast[maxNbBits-currentNbBits] = pos;
267             }   }
268 
269             while (totalCost > 0) {
270                 U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
271                 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
272                     U32 highPos = rankLast[nBitsToDecrease];
273                     U32 lowPos = rankLast[nBitsToDecrease-1];
274                     if (highPos == noSymbol) continue;
275                     if (lowPos == noSymbol) break;
276                     {   U32 const highTotal = huffNode[highPos].count;
277                         U32 const lowTotal = 2 * huffNode[lowPos].count;
278                         if (highTotal <= lowTotal) break;
279                 }   }
280                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
281                 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
282                 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
283                     nBitsToDecrease ++;
284                 totalCost -= 1 << (nBitsToDecrease-1);
285                 if (rankLast[nBitsToDecrease-1] == noSymbol)
286                     rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
287                 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
288                 if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
289                     rankLast[nBitsToDecrease] = noSymbol;
290                 else {
291                     rankLast[nBitsToDecrease]--;
292                     if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
293                         rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
294             }   }   /* while (totalCost > 0) */
295 
296             while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
297                 if (rankLast[1] == noSymbol) {  /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
298                     while (huffNode[n].nbBits == maxNbBits) n--;
299                     huffNode[n+1].nbBits--;
300                     rankLast[1] = n+1;
301                     totalCost++;
302                     continue;
303                 }
304                 huffNode[ rankLast[1] + 1 ].nbBits--;
305                 rankLast[1]++;
306                 totalCost ++;
307     }   }   }   /* there are several too large elements (at least >= 2) */
308 
309     return maxNbBits;
310 }
311 
312 
313 typedef struct {
314     U32 base;
315     U32 current;
316 } rankPos;
317 
318 static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
319 {
320     rankPos rank[32];
321     U32 n;
322 
323     memset(rank, 0, sizeof(rank));
324     for (n=0; n<=maxSymbolValue; n++) {
325         U32 r = BIT_highbit32(count[n] + 1);
326         rank[r].base ++;
327     }
328     for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
329     for (n=0; n<32; n++) rank[n].current = rank[n].base;
330     for (n=0; n<=maxSymbolValue; n++) {
331         U32 const c = count[n];
332         U32 const r = BIT_highbit32(c+1) + 1;
333         U32 pos = rank[r].current++;
334         while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
335             huffNode[pos] = huffNode[pos-1];
336             pos--;
337         }
338         huffNode[pos].count = c;
339         huffNode[pos].byte  = (BYTE)n;
340     }
341 }
342 
343 
344 /** HUF_buildCTable_wksp() :
345  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
346  *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
347  */
348 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
349 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
350 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
351 {
352     nodeElt* const huffNode0 = (nodeElt*)workSpace;
353     nodeElt* const huffNode = huffNode0+1;
354     U32 n, nonNullRank;
355     int lowS, lowN;
356     U16 nodeNb = STARTNODE;
357     U32 nodeRoot;
358 
359     /* safety checks */
360     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
361     if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
362     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
363     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
364     memset(huffNode0, 0, sizeof(huffNodeTable));
365 
366     /* sort, decreasing order */
367     HUF_sort(huffNode, count, maxSymbolValue);
368 
369     /* init for parents */
370     nonNullRank = maxSymbolValue;
371     while(huffNode[nonNullRank].count == 0) nonNullRank--;
372     lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
373     huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
374     huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
375     nodeNb++; lowS-=2;
376     for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
377     huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
378 
379     /* create parents */
380     while (nodeNb <= nodeRoot) {
381         U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
382         U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
383         huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
384         huffNode[n1].parent = huffNode[n2].parent = nodeNb;
385         nodeNb++;
386     }
387 
388     /* distribute weights (unlimited tree height) */
389     huffNode[nodeRoot].nbBits = 0;
390     for (n=nodeRoot-1; n>=STARTNODE; n--)
391         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
392     for (n=0; n<=nonNullRank; n++)
393         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
394 
395     /* enforce maxTableLog */
396     maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
397 
398     /* fill result into tree (val, nbBits) */
399     {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
400         U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
401         if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
402         for (n=0; n<=nonNullRank; n++)
403             nbPerRank[huffNode[n].nbBits]++;
404         /* determine stating value per rank */
405         {   U16 min = 0;
406             for (n=maxNbBits; n>0; n--) {
407                 valPerRank[n] = min;      /* get starting value within each rank */
408                 min += nbPerRank[n];
409                 min >>= 1;
410         }   }
411         for (n=0; n<=maxSymbolValue; n++)
412             tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
413         for (n=0; n<=maxSymbolValue; n++)
414             tree[n].val = valPerRank[tree[n].nbBits]++;   /* assign value within rank, symbol order */
415     }
416 
417     return maxNbBits;
418 }
419 
420 /** HUF_buildCTable() :
421  * @return : maxNbBits
422  *  Note : count is used before tree is written, so they can safely overlap
423  */
424 size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
425 {
426     huffNodeTable nodeTable;
427     return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
428 }
429 
430 static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
431 {
432     size_t nbBits = 0;
433     int s;
434     for (s = 0; s <= (int)maxSymbolValue; ++s) {
435         nbBits += CTable[s].nbBits * count[s];
436     }
437     return nbBits >> 3;
438 }
439 
440 static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
441   int bad = 0;
442   int s;
443   for (s = 0; s <= (int)maxSymbolValue; ++s) {
444     bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
445   }
446   return !bad;
447 }
448 
449 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
450 
451 FORCE_INLINE_TEMPLATE void
452 HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
453 {
454     BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
455 }
456 
457 #define HUF_FLUSHBITS(s)  BIT_flushBits(s)
458 
459 #define HUF_FLUSHBITS_1(stream) \
460     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
461 
462 #define HUF_FLUSHBITS_2(stream) \
463     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
464 
465 FORCE_INLINE_TEMPLATE size_t
466 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
467                                    const void* src, size_t srcSize,
468                                    const HUF_CElt* CTable)
469 {
470     const BYTE* ip = (const BYTE*) src;
471     BYTE* const ostart = (BYTE*)dst;
472     BYTE* const oend = ostart + dstSize;
473     BYTE* op = ostart;
474     size_t n;
475     BIT_CStream_t bitC;
476 
477     /* init */
478     if (dstSize < 8) return 0;   /* not enough space to compress */
479     { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
480       if (HUF_isError(initErr)) return 0; }
481 
482     n = srcSize & ~3;  /* join to mod 4 */
483     switch (srcSize & 3)
484     {
485         case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
486                  HUF_FLUSHBITS_2(&bitC);
487 		 /* fall-through */
488         case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
489                  HUF_FLUSHBITS_1(&bitC);
490 		 /* fall-through */
491         case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
492                  HUF_FLUSHBITS(&bitC);
493 		 /* fall-through */
494         case 0 : /* fall-through */
495         default: break;
496     }
497 
498     for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
499         HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
500         HUF_FLUSHBITS_1(&bitC);
501         HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
502         HUF_FLUSHBITS_2(&bitC);
503         HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
504         HUF_FLUSHBITS_1(&bitC);
505         HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
506         HUF_FLUSHBITS(&bitC);
507     }
508 
509     return BIT_closeCStream(&bitC);
510 }
511 
512 #if DYNAMIC_BMI2
513 
514 static TARGET_ATTRIBUTE("bmi2") size_t
515 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
516                                    const void* src, size_t srcSize,
517                                    const HUF_CElt* CTable)
518 {
519     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
520 }
521 
522 static size_t
523 HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
524                                       const void* src, size_t srcSize,
525                                       const HUF_CElt* CTable)
526 {
527     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
528 }
529 
530 static size_t
531 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
532                               const void* src, size_t srcSize,
533                               const HUF_CElt* CTable, const int bmi2)
534 {
535     if (bmi2) {
536         return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
537     }
538     return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
539 }
540 
541 #else
542 
543 static size_t
544 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
545                               const void* src, size_t srcSize,
546                               const HUF_CElt* CTable, const int bmi2)
547 {
548     (void)bmi2;
549     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
550 }
551 
552 #endif
553 
554 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
555 {
556     return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
557 }
558 
559 
560 static size_t
561 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
562                               const void* src, size_t srcSize,
563                               const HUF_CElt* CTable, int bmi2)
564 {
565     size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
566     const BYTE* ip = (const BYTE*) src;
567     const BYTE* const iend = ip + srcSize;
568     BYTE* const ostart = (BYTE*) dst;
569     BYTE* const oend = ostart + dstSize;
570     BYTE* op = ostart;
571 
572     if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
573     if (srcSize < 12) return 0;   /* no saving possible : too small input */
574     op += 6;   /* jumpTable */
575 
576     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
577         if (cSize==0) return 0;
578         assert(cSize <= 65535);
579         MEM_writeLE16(ostart, (U16)cSize);
580         op += cSize;
581     }
582 
583     ip += segmentSize;
584     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
585         if (cSize==0) return 0;
586         assert(cSize <= 65535);
587         MEM_writeLE16(ostart+2, (U16)cSize);
588         op += cSize;
589     }
590 
591     ip += segmentSize;
592     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
593         if (cSize==0) return 0;
594         assert(cSize <= 65535);
595         MEM_writeLE16(ostart+4, (U16)cSize);
596         op += cSize;
597     }
598 
599     ip += segmentSize;
600     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
601         if (cSize==0) return 0;
602         op += cSize;
603     }
604 
605     return op-ostart;
606 }
607 
608 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
609 {
610     return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
611 }
612 
613 
614 static size_t HUF_compressCTable_internal(
615                 BYTE* const ostart, BYTE* op, BYTE* const oend,
616                 const void* src, size_t srcSize,
617                 unsigned singleStream, const HUF_CElt* CTable, const int bmi2)
618 {
619     size_t const cSize = singleStream ?
620                          HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
621                          HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
622     if (HUF_isError(cSize)) { return cSize; }
623     if (cSize==0) { return 0; }   /* uncompressible */
624     op += cSize;
625     /* check compressibility */
626     if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
627     return op-ostart;
628 }
629 
630 typedef struct {
631     U32 count[HUF_SYMBOLVALUE_MAX + 1];
632     HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
633     huffNodeTable nodeTable;
634 } HUF_compress_tables_t;
635 
636 /* HUF_compress_internal() :
637  * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
638 static size_t HUF_compress_internal (
639                 void* dst, size_t dstSize,
640                 const void* src, size_t srcSize,
641                 unsigned maxSymbolValue, unsigned huffLog,
642                 unsigned singleStream,
643                 void* workSpace, size_t wkspSize,
644                 HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
645                 const int bmi2)
646 {
647     HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
648     BYTE* const ostart = (BYTE*)dst;
649     BYTE* const oend = ostart + dstSize;
650     BYTE* op = ostart;
651 
652     /* checks & inits */
653     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
654     if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
655     if (!srcSize) return 0;  /* Uncompressed */
656     if (!dstSize) return 0;  /* cannot fit anything within dst budget */
657     if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
658     if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
659     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
660     if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
661     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
662 
663     /* Heuristic : If old table is valid, use it for small inputs */
664     if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
665         return HUF_compressCTable_internal(ostart, op, oend,
666                                            src, srcSize,
667                                            singleStream, oldHufTable, bmi2);
668     }
669 
670     /* Scan input and build symbol stats */
671     {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
672         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
673         if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
674     }
675 
676     /* Check validity of previous table */
677     if ( repeat
678       && *repeat == HUF_repeat_check
679       && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
680         *repeat = HUF_repeat_none;
681     }
682     /* Heuristic : use existing table for small inputs */
683     if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
684         return HUF_compressCTable_internal(ostart, op, oend,
685                                            src, srcSize,
686                                            singleStream, oldHufTable, bmi2);
687     }
688 
689     /* Build Huffman Tree */
690     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
691     {   CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count,
692                                                 maxSymbolValue, huffLog,
693                                                 table->nodeTable, sizeof(table->nodeTable)) );
694         huffLog = (U32)maxBits;
695         /* Zero unused symbols in CTable, so we can check it for validity */
696         memset(table->CTable + (maxSymbolValue + 1), 0,
697                sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
698     }
699 
700     /* Write table description header */
701     {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
702         /* Check if using previous huffman table is beneficial */
703         if (repeat && *repeat != HUF_repeat_none) {
704             size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
705             size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
706             if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
707                 return HUF_compressCTable_internal(ostart, op, oend,
708                                                    src, srcSize,
709                                                    singleStream, oldHufTable, bmi2);
710         }   }
711 
712         /* Use the new huffman table */
713         if (hSize + 12ul >= srcSize) { return 0; }
714         op += hSize;
715         if (repeat) { *repeat = HUF_repeat_none; }
716         if (oldHufTable)
717             memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
718     }
719     return HUF_compressCTable_internal(ostart, op, oend,
720                                        src, srcSize,
721                                        singleStream, table->CTable, bmi2);
722 }
723 
724 
725 size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
726                       const void* src, size_t srcSize,
727                       unsigned maxSymbolValue, unsigned huffLog,
728                       void* workSpace, size_t wkspSize)
729 {
730     return HUF_compress_internal(dst, dstSize, src, srcSize,
731                                  maxSymbolValue, huffLog, 1 /*single stream*/,
732                                  workSpace, wkspSize,
733                                  NULL, NULL, 0, 0 /*bmi2*/);
734 }
735 
736 size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
737                       const void* src, size_t srcSize,
738                       unsigned maxSymbolValue, unsigned huffLog,
739                       void* workSpace, size_t wkspSize,
740                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
741 {
742     return HUF_compress_internal(dst, dstSize, src, srcSize,
743                                  maxSymbolValue, huffLog, 1 /*single stream*/,
744                                  workSpace, wkspSize, hufTable,
745                                  repeat, preferRepeat, bmi2);
746 }
747 
748 size_t HUF_compress1X (void* dst, size_t dstSize,
749                  const void* src, size_t srcSize,
750                  unsigned maxSymbolValue, unsigned huffLog)
751 {
752     unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
753     return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
754 }
755 
756 /* HUF_compress4X_repeat():
757  * compress input using 4 streams.
758  * provide workspace to generate compression tables */
759 size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
760                       const void* src, size_t srcSize,
761                       unsigned maxSymbolValue, unsigned huffLog,
762                       void* workSpace, size_t wkspSize)
763 {
764     return HUF_compress_internal(dst, dstSize, src, srcSize,
765                                  maxSymbolValue, huffLog, 0 /*4 streams*/,
766                                  workSpace, wkspSize,
767                                  NULL, NULL, 0, 0 /*bmi2*/);
768 }
769 
770 /* HUF_compress4X_repeat():
771  * compress input using 4 streams.
772  * re-use an existing huffman compression table */
773 size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
774                       const void* src, size_t srcSize,
775                       unsigned maxSymbolValue, unsigned huffLog,
776                       void* workSpace, size_t wkspSize,
777                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
778 {
779     return HUF_compress_internal(dst, dstSize, src, srcSize,
780                                  maxSymbolValue, huffLog, 0 /* 4 streams */,
781                                  workSpace, wkspSize,
782                                  hufTable, repeat, preferRepeat, bmi2);
783 }
784 
785 size_t HUF_compress2 (void* dst, size_t dstSize,
786                 const void* src, size_t srcSize,
787                 unsigned maxSymbolValue, unsigned huffLog)
788 {
789     unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
790     return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
791 }
792 
793 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
794 {
795     return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
796 }
797