xref: /freebsd/sys/contrib/zstd/lib/compress/huf_compress.c (revision bcce9a2b33a8e9187a63f435726a7a801e89f326)
1 /* ******************************************************************
2    Huffman encoder, part of New Generation Entropy library
3    Copyright (C) 2013-2016, Yann Collet.
4 
5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 
7    Redistribution and use in source and binary forms, with or without
8    modification, are permitted provided that the following conditions are
9    met:
10 
11        * Redistributions of source code must retain the above copyright
12    notice, this list of conditions and the following disclaimer.
13        * Redistributions in binary form must reproduce the above
14    copyright notice, this list of conditions and the following disclaimer
15    in the documentation and/or other materials provided with the
16    distribution.
17 
18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30     You can contact the author at :
31     - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32     - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */
34 
35 /* **************************************************************
36 *  Compiler specifics
37 ****************************************************************/
38 #ifdef _MSC_VER    /* Visual Studio */
39 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
40 #endif
41 
42 
43 /* **************************************************************
44 *  Includes
45 ****************************************************************/
46 #include <string.h>     /* memcpy, memset */
47 #include <stdio.h>      /* printf (debug) */
48 #include "bitstream.h"
49 #include "compiler.h"
50 #define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
51 #include "fse.h"        /* header compression */
52 #define HUF_STATIC_LINKING_ONLY
53 #include "huf.h"
54 #include "error_private.h"
55 
56 
57 /* **************************************************************
58 *  Error Management
59 ****************************************************************/
60 #define HUF_isError ERR_isError
61 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
62 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
63 #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
64 
65 
66 /* **************************************************************
67 *  Utils
68 ****************************************************************/
69 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
70 {
71     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
72 }
73 
74 
75 /* *******************************************************
76 *  HUF : Huffman block compression
77 *********************************************************/
78 /* HUF_compressWeights() :
79  * Same as FSE_compress(), but dedicated to huff0's weights compression.
80  * The use case needs much less stack memory.
81  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
82  */
83 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
84 size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
85 {
86     BYTE* const ostart = (BYTE*) dst;
87     BYTE* op = ostart;
88     BYTE* const oend = ostart + dstSize;
89 
90     U32 maxSymbolValue = HUF_TABLELOG_MAX;
91     U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
92 
93     FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
94     BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
95 
96     U32 count[HUF_TABLELOG_MAX+1];
97     S16 norm[HUF_TABLELOG_MAX+1];
98 
99     /* init conditions */
100     if (wtSize <= 1) return 0;  /* Not compressible */
101 
102     /* Scan input and build symbol stats */
103     {   CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) );
104         if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
105         if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
106     }
107 
108     tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
109     CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
110 
111     /* Write table description header */
112     {   CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
113         op += hSize;
114     }
115 
116     /* Compress */
117     CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
118     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
119         if (cSize == 0) return 0;   /* not enough space for compressed data */
120         op += cSize;
121     }
122 
123     return op-ostart;
124 }
125 
126 
127 struct HUF_CElt_s {
128   U16  val;
129   BYTE nbBits;
130 };   /* typedef'd to HUF_CElt within "huf.h" */
131 
132 /*! HUF_writeCTable() :
133     `CTable` : Huffman tree to save, using huf representation.
134     @return : size of saved CTable */
135 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
136                         const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
137 {
138     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
139     BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
140     BYTE* op = (BYTE*)dst;
141     U32 n;
142 
143      /* check conditions */
144     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
145 
146     /* convert to weight */
147     bitsToWeight[0] = 0;
148     for (n=1; n<huffLog+1; n++)
149         bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
150     for (n=0; n<maxSymbolValue; n++)
151         huffWeight[n] = bitsToWeight[CTable[n].nbBits];
152 
153     /* attempt weights compression by FSE */
154     {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
155         if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
156             op[0] = (BYTE)hSize;
157             return hSize+1;
158     }   }
159 
160     /* write raw values as 4-bits (max : 15) */
161     if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
162     if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
163     op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
164     huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
165     for (n=0; n<maxSymbolValue; n+=2)
166         op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
167     return ((maxSymbolValue+1)/2) + 1;
168 }
169 
170 
171 size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src, size_t srcSize)
172 {
173     BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
174     U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
175     U32 tableLog = 0;
176     U32 nbSymbols = 0;
177 
178     /* get symbol weights */
179     CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
180 
181     /* check result */
182     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
183     if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
184 
185     /* Prepare base value per rank */
186     {   U32 n, nextRankStart = 0;
187         for (n=1; n<=tableLog; n++) {
188             U32 current = nextRankStart;
189             nextRankStart += (rankVal[n] << (n-1));
190             rankVal[n] = current;
191     }   }
192 
193     /* fill nbBits */
194     {   U32 n; for (n=0; n<nbSymbols; n++) {
195             const U32 w = huffWeight[n];
196             CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
197     }   }
198 
199     /* fill val */
200     {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
201         U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
202         { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
203         /* determine stating value per rank */
204         valPerRank[tableLog+1] = 0;   /* for w==0 */
205         {   U16 min = 0;
206             U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
207                 valPerRank[n] = min;     /* get starting value within each rank */
208                 min += nbPerRank[n];
209                 min >>= 1;
210         }   }
211         /* assign value within rank, symbol order */
212         { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
213     }
214 
215     *maxSymbolValuePtr = nbSymbols - 1;
216     return readSize;
217 }
218 
219 
220 typedef struct nodeElt_s {
221     U32 count;
222     U16 parent;
223     BYTE byte;
224     BYTE nbBits;
225 } nodeElt;
226 
227 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
228 {
229     const U32 largestBits = huffNode[lastNonNull].nbBits;
230     if (largestBits <= maxNbBits) return largestBits;   /* early exit : no elt > maxNbBits */
231 
232     /* there are several too large elements (at least >= 2) */
233     {   int totalCost = 0;
234         const U32 baseCost = 1 << (largestBits - maxNbBits);
235         U32 n = lastNonNull;
236 
237         while (huffNode[n].nbBits > maxNbBits) {
238             totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
239             huffNode[n].nbBits = (BYTE)maxNbBits;
240             n --;
241         }  /* n stops at huffNode[n].nbBits <= maxNbBits */
242         while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using < maxNbBits */
243 
244         /* renorm totalCost */
245         totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
246 
247         /* repay normalized cost */
248         {   U32 const noSymbol = 0xF0F0F0F0;
249             U32 rankLast[HUF_TABLELOG_MAX+2];
250             int pos;
251 
252             /* Get pos of last (smallest) symbol per rank */
253             memset(rankLast, 0xF0, sizeof(rankLast));
254             {   U32 currentNbBits = maxNbBits;
255                 for (pos=n ; pos >= 0; pos--) {
256                     if (huffNode[pos].nbBits >= currentNbBits) continue;
257                     currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
258                     rankLast[maxNbBits-currentNbBits] = pos;
259             }   }
260 
261             while (totalCost > 0) {
262                 U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
263                 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
264                     U32 highPos = rankLast[nBitsToDecrease];
265                     U32 lowPos = rankLast[nBitsToDecrease-1];
266                     if (highPos == noSymbol) continue;
267                     if (lowPos == noSymbol) break;
268                     {   U32 const highTotal = huffNode[highPos].count;
269                         U32 const lowTotal = 2 * huffNode[lowPos].count;
270                         if (highTotal <= lowTotal) break;
271                 }   }
272                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
273                 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
274                 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
275                     nBitsToDecrease ++;
276                 totalCost -= 1 << (nBitsToDecrease-1);
277                 if (rankLast[nBitsToDecrease-1] == noSymbol)
278                     rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
279                 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
280                 if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
281                     rankLast[nBitsToDecrease] = noSymbol;
282                 else {
283                     rankLast[nBitsToDecrease]--;
284                     if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
285                         rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
286             }   }   /* while (totalCost > 0) */
287 
288             while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
289                 if (rankLast[1] == noSymbol) {  /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
290                     while (huffNode[n].nbBits == maxNbBits) n--;
291                     huffNode[n+1].nbBits--;
292                     rankLast[1] = n+1;
293                     totalCost++;
294                     continue;
295                 }
296                 huffNode[ rankLast[1] + 1 ].nbBits--;
297                 rankLast[1]++;
298                 totalCost ++;
299     }   }   }   /* there are several too large elements (at least >= 2) */
300 
301     return maxNbBits;
302 }
303 
304 
305 typedef struct {
306     U32 base;
307     U32 current;
308 } rankPos;
309 
310 static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
311 {
312     rankPos rank[32];
313     U32 n;
314 
315     memset(rank, 0, sizeof(rank));
316     for (n=0; n<=maxSymbolValue; n++) {
317         U32 r = BIT_highbit32(count[n] + 1);
318         rank[r].base ++;
319     }
320     for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
321     for (n=0; n<32; n++) rank[n].current = rank[n].base;
322     for (n=0; n<=maxSymbolValue; n++) {
323         U32 const c = count[n];
324         U32 const r = BIT_highbit32(c+1) + 1;
325         U32 pos = rank[r].current++;
326         while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
327             huffNode[pos] = huffNode[pos-1];
328             pos--;
329         }
330         huffNode[pos].count = c;
331         huffNode[pos].byte  = (BYTE)n;
332     }
333 }
334 
335 
336 /** HUF_buildCTable_wksp() :
337  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
338  *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
339  */
340 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
341 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
342 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
343 {
344     nodeElt* const huffNode0 = (nodeElt*)workSpace;
345     nodeElt* const huffNode = huffNode0+1;
346     U32 n, nonNullRank;
347     int lowS, lowN;
348     U16 nodeNb = STARTNODE;
349     U32 nodeRoot;
350 
351     /* safety checks */
352     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
353     if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
354     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
355     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
356     memset(huffNode0, 0, sizeof(huffNodeTable));
357 
358     /* sort, decreasing order */
359     HUF_sort(huffNode, count, maxSymbolValue);
360 
361     /* init for parents */
362     nonNullRank = maxSymbolValue;
363     while(huffNode[nonNullRank].count == 0) nonNullRank--;
364     lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
365     huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
366     huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
367     nodeNb++; lowS-=2;
368     for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
369     huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
370 
371     /* create parents */
372     while (nodeNb <= nodeRoot) {
373         U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
374         U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
375         huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
376         huffNode[n1].parent = huffNode[n2].parent = nodeNb;
377         nodeNb++;
378     }
379 
380     /* distribute weights (unlimited tree height) */
381     huffNode[nodeRoot].nbBits = 0;
382     for (n=nodeRoot-1; n>=STARTNODE; n--)
383         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
384     for (n=0; n<=nonNullRank; n++)
385         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
386 
387     /* enforce maxTableLog */
388     maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
389 
390     /* fill result into tree (val, nbBits) */
391     {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
392         U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
393         if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
394         for (n=0; n<=nonNullRank; n++)
395             nbPerRank[huffNode[n].nbBits]++;
396         /* determine stating value per rank */
397         {   U16 min = 0;
398             for (n=maxNbBits; n>0; n--) {
399                 valPerRank[n] = min;      /* get starting value within each rank */
400                 min += nbPerRank[n];
401                 min >>= 1;
402         }   }
403         for (n=0; n<=maxSymbolValue; n++)
404             tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
405         for (n=0; n<=maxSymbolValue; n++)
406             tree[n].val = valPerRank[tree[n].nbBits]++;   /* assign value within rank, symbol order */
407     }
408 
409     return maxNbBits;
410 }
411 
412 /** HUF_buildCTable() :
413  * @return : maxNbBits
414  *  Note : count is used before tree is written, so they can safely overlap
415  */
416 size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
417 {
418     huffNodeTable nodeTable;
419     return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
420 }
421 
422 static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
423 {
424     size_t nbBits = 0;
425     int s;
426     for (s = 0; s <= (int)maxSymbolValue; ++s) {
427         nbBits += CTable[s].nbBits * count[s];
428     }
429     return nbBits >> 3;
430 }
431 
432 static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
433   int bad = 0;
434   int s;
435   for (s = 0; s <= (int)maxSymbolValue; ++s) {
436     bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
437   }
438   return !bad;
439 }
440 
441 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
442 
443 FORCE_INLINE_TEMPLATE void
444 HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
445 {
446     BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
447 }
448 
449 #define HUF_FLUSHBITS(s)  BIT_flushBits(s)
450 
451 #define HUF_FLUSHBITS_1(stream) \
452     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
453 
454 #define HUF_FLUSHBITS_2(stream) \
455     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
456 
457 FORCE_INLINE_TEMPLATE size_t
458 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
459                                    const void* src, size_t srcSize,
460                                    const HUF_CElt* CTable)
461 {
462     const BYTE* ip = (const BYTE*) src;
463     BYTE* const ostart = (BYTE*)dst;
464     BYTE* const oend = ostart + dstSize;
465     BYTE* op = ostart;
466     size_t n;
467     BIT_CStream_t bitC;
468 
469     /* init */
470     if (dstSize < 8) return 0;   /* not enough space to compress */
471     { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
472       if (HUF_isError(initErr)) return 0; }
473 
474     n = srcSize & ~3;  /* join to mod 4 */
475     switch (srcSize & 3)
476     {
477         case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
478                  HUF_FLUSHBITS_2(&bitC);
479 		 /* fall-through */
480         case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
481                  HUF_FLUSHBITS_1(&bitC);
482 		 /* fall-through */
483         case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
484                  HUF_FLUSHBITS(&bitC);
485 		 /* fall-through */
486         case 0 : /* fall-through */
487         default: break;
488     }
489 
490     for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
491         HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
492         HUF_FLUSHBITS_1(&bitC);
493         HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
494         HUF_FLUSHBITS_2(&bitC);
495         HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
496         HUF_FLUSHBITS_1(&bitC);
497         HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
498         HUF_FLUSHBITS(&bitC);
499     }
500 
501     return BIT_closeCStream(&bitC);
502 }
503 
504 #if DYNAMIC_BMI2
505 
506 static TARGET_ATTRIBUTE("bmi2") size_t
507 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
508                                    const void* src, size_t srcSize,
509                                    const HUF_CElt* CTable)
510 {
511     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
512 }
513 
514 static size_t
515 HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
516                                       const void* src, size_t srcSize,
517                                       const HUF_CElt* CTable)
518 {
519     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
520 }
521 
522 static size_t
523 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
524                               const void* src, size_t srcSize,
525                               const HUF_CElt* CTable, const int bmi2)
526 {
527     if (bmi2) {
528         return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
529     }
530     return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
531 }
532 
533 #else
534 
535 static size_t
536 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
537                               const void* src, size_t srcSize,
538                               const HUF_CElt* CTable, const int bmi2)
539 {
540     (void)bmi2;
541     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
542 }
543 
544 #endif
545 
546 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
547 {
548     return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
549 }
550 
551 
552 static size_t
553 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
554                               const void* src, size_t srcSize,
555                               const HUF_CElt* CTable, int bmi2)
556 {
557     size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
558     const BYTE* ip = (const BYTE*) src;
559     const BYTE* const iend = ip + srcSize;
560     BYTE* const ostart = (BYTE*) dst;
561     BYTE* const oend = ostart + dstSize;
562     BYTE* op = ostart;
563 
564     if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
565     if (srcSize < 12) return 0;   /* no saving possible : too small input */
566     op += 6;   /* jumpTable */
567 
568     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
569         if (cSize==0) return 0;
570         assert(cSize <= 65535);
571         MEM_writeLE16(ostart, (U16)cSize);
572         op += cSize;
573     }
574 
575     ip += segmentSize;
576     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
577         if (cSize==0) return 0;
578         assert(cSize <= 65535);
579         MEM_writeLE16(ostart+2, (U16)cSize);
580         op += cSize;
581     }
582 
583     ip += segmentSize;
584     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
585         if (cSize==0) return 0;
586         assert(cSize <= 65535);
587         MEM_writeLE16(ostart+4, (U16)cSize);
588         op += cSize;
589     }
590 
591     ip += segmentSize;
592     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
593         if (cSize==0) return 0;
594         op += cSize;
595     }
596 
597     return op-ostart;
598 }
599 
600 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
601 {
602     return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
603 }
604 
605 
606 static size_t HUF_compressCTable_internal(
607                 BYTE* const ostart, BYTE* op, BYTE* const oend,
608                 const void* src, size_t srcSize,
609                 unsigned singleStream, const HUF_CElt* CTable, const int bmi2)
610 {
611     size_t const cSize = singleStream ?
612                          HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
613                          HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
614     if (HUF_isError(cSize)) { return cSize; }
615     if (cSize==0) { return 0; }   /* uncompressible */
616     op += cSize;
617     /* check compressibility */
618     if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
619     return op-ostart;
620 }
621 
622 typedef struct {
623     U32 count[HUF_SYMBOLVALUE_MAX + 1];
624     HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
625     huffNodeTable nodeTable;
626 } HUF_compress_tables_t;
627 
628 /* HUF_compress_internal() :
629  * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
630 static size_t HUF_compress_internal (
631                 void* dst, size_t dstSize,
632                 const void* src, size_t srcSize,
633                 unsigned maxSymbolValue, unsigned huffLog,
634                 unsigned singleStream,
635                 void* workSpace, size_t wkspSize,
636                 HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
637                 const int bmi2)
638 {
639     HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
640     BYTE* const ostart = (BYTE*)dst;
641     BYTE* const oend = ostart + dstSize;
642     BYTE* op = ostart;
643 
644     /* checks & inits */
645     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
646     if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
647     if (!srcSize) return 0;  /* Uncompressed */
648     if (!dstSize) return 0;  /* cannot fit anything within dst budget */
649     if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
650     if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
651     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
652     if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
653     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
654 
655     /* Heuristic : If old table is valid, use it for small inputs */
656     if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
657         return HUF_compressCTable_internal(ostart, op, oend,
658                                            src, srcSize,
659                                            singleStream, oldHufTable, bmi2);
660     }
661 
662     /* Scan input and build symbol stats */
663     {   CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
664         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
665         if (largest <= (srcSize >> 7)+1) return 0;   /* heuristic : probably not compressible enough */
666     }
667 
668     /* Check validity of previous table */
669     if ( repeat
670       && *repeat == HUF_repeat_check
671       && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
672         *repeat = HUF_repeat_none;
673     }
674     /* Heuristic : use existing table for small inputs */
675     if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
676         return HUF_compressCTable_internal(ostart, op, oend,
677                                            src, srcSize,
678                                            singleStream, oldHufTable, bmi2);
679     }
680 
681     /* Build Huffman Tree */
682     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
683     {   CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count,
684                                                 maxSymbolValue, huffLog,
685                                                 table->nodeTable, sizeof(table->nodeTable)) );
686         huffLog = (U32)maxBits;
687         /* Zero unused symbols in CTable, so we can check it for validity */
688         memset(table->CTable + (maxSymbolValue + 1), 0,
689                sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
690     }
691 
692     /* Write table description header */
693     {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
694         /* Check if using previous huffman table is beneficial */
695         if (repeat && *repeat != HUF_repeat_none) {
696             size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
697             size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
698             if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
699                 return HUF_compressCTable_internal(ostart, op, oend,
700                                                    src, srcSize,
701                                                    singleStream, oldHufTable, bmi2);
702         }   }
703 
704         /* Use the new huffman table */
705         if (hSize + 12ul >= srcSize) { return 0; }
706         op += hSize;
707         if (repeat) { *repeat = HUF_repeat_none; }
708         if (oldHufTable)
709             memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
710     }
711     return HUF_compressCTable_internal(ostart, op, oend,
712                                        src, srcSize,
713                                        singleStream, table->CTable, bmi2);
714 }
715 
716 
717 size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
718                       const void* src, size_t srcSize,
719                       unsigned maxSymbolValue, unsigned huffLog,
720                       void* workSpace, size_t wkspSize)
721 {
722     return HUF_compress_internal(dst, dstSize, src, srcSize,
723                                  maxSymbolValue, huffLog, 1 /*single stream*/,
724                                  workSpace, wkspSize,
725                                  NULL, NULL, 0, 0 /*bmi2*/);
726 }
727 
728 size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
729                       const void* src, size_t srcSize,
730                       unsigned maxSymbolValue, unsigned huffLog,
731                       void* workSpace, size_t wkspSize,
732                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
733 {
734     return HUF_compress_internal(dst, dstSize, src, srcSize,
735                                  maxSymbolValue, huffLog, 1 /*single stream*/,
736                                  workSpace, wkspSize, hufTable,
737                                  repeat, preferRepeat, bmi2);
738 }
739 
740 size_t HUF_compress1X (void* dst, size_t dstSize,
741                  const void* src, size_t srcSize,
742                  unsigned maxSymbolValue, unsigned huffLog)
743 {
744     unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
745     return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
746 }
747 
748 /* HUF_compress4X_repeat():
749  * compress input using 4 streams.
750  * provide workspace to generate compression tables */
751 size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
752                       const void* src, size_t srcSize,
753                       unsigned maxSymbolValue, unsigned huffLog,
754                       void* workSpace, size_t wkspSize)
755 {
756     return HUF_compress_internal(dst, dstSize, src, srcSize,
757                                  maxSymbolValue, huffLog, 0 /*4 streams*/,
758                                  workSpace, wkspSize,
759                                  NULL, NULL, 0, 0 /*bmi2*/);
760 }
761 
762 /* HUF_compress4X_repeat():
763  * compress input using 4 streams.
764  * re-use an existing huffman compression table */
765 size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
766                       const void* src, size_t srcSize,
767                       unsigned maxSymbolValue, unsigned huffLog,
768                       void* workSpace, size_t wkspSize,
769                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
770 {
771     return HUF_compress_internal(dst, dstSize, src, srcSize,
772                                  maxSymbolValue, huffLog, 0 /* 4 streams */,
773                                  workSpace, wkspSize,
774                                  hufTable, repeat, preferRepeat, bmi2);
775 }
776 
777 size_t HUF_compress2 (void* dst, size_t dstSize,
778                 const void* src, size_t srcSize,
779                 unsigned maxSymbolValue, unsigned huffLog)
780 {
781     unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
782     return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
783 }
784 
785 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
786 {
787     return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
788 }
789