xref: /freebsd/sys/contrib/zstd/lib/compress/huf_compress.c (revision a0483764f3d68669e9b7db074bcbd45b050166bb)
10c16b537SWarner Losh /* ******************************************************************
20c16b537SWarner Losh    Huffman encoder, part of New Generation Entropy library
30c16b537SWarner Losh    Copyright (C) 2013-2016, Yann Collet.
40c16b537SWarner Losh 
50c16b537SWarner Losh    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
60c16b537SWarner Losh 
70c16b537SWarner Losh    Redistribution and use in source and binary forms, with or without
80c16b537SWarner Losh    modification, are permitted provided that the following conditions are
90c16b537SWarner Losh    met:
100c16b537SWarner Losh 
110c16b537SWarner Losh        * Redistributions of source code must retain the above copyright
120c16b537SWarner Losh    notice, this list of conditions and the following disclaimer.
130c16b537SWarner Losh        * Redistributions in binary form must reproduce the above
140c16b537SWarner Losh    copyright notice, this list of conditions and the following disclaimer
150c16b537SWarner Losh    in the documentation and/or other materials provided with the
160c16b537SWarner Losh    distribution.
170c16b537SWarner Losh 
180c16b537SWarner Losh    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
190c16b537SWarner Losh    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
200c16b537SWarner Losh    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
210c16b537SWarner Losh    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
220c16b537SWarner Losh    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
230c16b537SWarner Losh    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
240c16b537SWarner Losh    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
250c16b537SWarner Losh    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
260c16b537SWarner Losh    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
270c16b537SWarner Losh    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
280c16b537SWarner Losh    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
290c16b537SWarner Losh 
300c16b537SWarner Losh     You can contact the author at :
310c16b537SWarner Losh     - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
320c16b537SWarner Losh     - Public forum : https://groups.google.com/forum/#!forum/lz4c
330c16b537SWarner Losh ****************************************************************** */
340c16b537SWarner Losh 
350c16b537SWarner Losh /* **************************************************************
360c16b537SWarner Losh *  Compiler specifics
370c16b537SWarner Losh ****************************************************************/
380c16b537SWarner Losh #ifdef _MSC_VER    /* Visual Studio */
390c16b537SWarner Losh #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
400c16b537SWarner Losh #endif
410c16b537SWarner Losh 
420c16b537SWarner Losh 
430c16b537SWarner Losh /* **************************************************************
440c16b537SWarner Losh *  Includes
450c16b537SWarner Losh ****************************************************************/
460c16b537SWarner Losh #include <string.h>     /* memcpy, memset */
470c16b537SWarner Losh #include <stdio.h>      /* printf (debug) */
4819fcbaf1SConrad Meyer #include "compiler.h"
490f743729SConrad Meyer #include "bitstream.h"
500f743729SConrad Meyer #include "hist.h"
510c16b537SWarner Losh #define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
520c16b537SWarner Losh #include "fse.h"        /* header compression */
530c16b537SWarner Losh #define HUF_STATIC_LINKING_ONLY
540c16b537SWarner Losh #include "huf.h"
550c16b537SWarner Losh #include "error_private.h"
560c16b537SWarner Losh 
570c16b537SWarner Losh 
580c16b537SWarner Losh /* **************************************************************
590c16b537SWarner Losh *  Error Management
600c16b537SWarner Losh ****************************************************************/
610c16b537SWarner Losh #define HUF_isError ERR_isError
620f743729SConrad Meyer #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
630c16b537SWarner Losh #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
640c16b537SWarner Losh #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
650c16b537SWarner Losh 
660c16b537SWarner Losh 
670c16b537SWarner Losh /* **************************************************************
680c16b537SWarner Losh *  Utils
690c16b537SWarner Losh ****************************************************************/
700c16b537SWarner Losh unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
710c16b537SWarner Losh {
720c16b537SWarner Losh     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
730c16b537SWarner Losh }
740c16b537SWarner Losh 
750c16b537SWarner Losh 
760c16b537SWarner Losh /* *******************************************************
770c16b537SWarner Losh *  HUF : Huffman block compression
780c16b537SWarner Losh *********************************************************/
790c16b537SWarner Losh /* HUF_compressWeights() :
800c16b537SWarner Losh  * Same as FSE_compress(), but dedicated to huff0's weights compression.
810c16b537SWarner Losh  * The use case needs much less stack memory.
820c16b537SWarner Losh  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
830c16b537SWarner Losh  */
840c16b537SWarner Losh #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
850f743729SConrad Meyer static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
860c16b537SWarner Losh {
870c16b537SWarner Losh     BYTE* const ostart = (BYTE*) dst;
880c16b537SWarner Losh     BYTE* op = ostart;
890c16b537SWarner Losh     BYTE* const oend = ostart + dstSize;
900c16b537SWarner Losh 
91*a0483764SConrad Meyer     unsigned maxSymbolValue = HUF_TABLELOG_MAX;
920c16b537SWarner Losh     U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
930c16b537SWarner Losh 
940c16b537SWarner Losh     FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
950c16b537SWarner Losh     BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
960c16b537SWarner Losh 
97*a0483764SConrad Meyer     unsigned count[HUF_TABLELOG_MAX+1];
980c16b537SWarner Losh     S16 norm[HUF_TABLELOG_MAX+1];
990c16b537SWarner Losh 
1000c16b537SWarner Losh     /* init conditions */
1010c16b537SWarner Losh     if (wtSize <= 1) return 0;  /* Not compressible */
1020c16b537SWarner Losh 
1030c16b537SWarner Losh     /* Scan input and build symbol stats */
1040f743729SConrad Meyer     {   unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
1050c16b537SWarner Losh         if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
1060c16b537SWarner Losh         if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
1070c16b537SWarner Losh     }
1080c16b537SWarner Losh 
1090c16b537SWarner Losh     tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
1100c16b537SWarner Losh     CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
1110c16b537SWarner Losh 
1120c16b537SWarner Losh     /* Write table description header */
1130c16b537SWarner Losh     {   CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
1140c16b537SWarner Losh         op += hSize;
1150c16b537SWarner Losh     }
1160c16b537SWarner Losh 
1170c16b537SWarner Losh     /* Compress */
1180c16b537SWarner Losh     CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
1190c16b537SWarner Losh     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
1200c16b537SWarner Losh         if (cSize == 0) return 0;   /* not enough space for compressed data */
1210c16b537SWarner Losh         op += cSize;
1220c16b537SWarner Losh     }
1230c16b537SWarner Losh 
1240c16b537SWarner Losh     return op-ostart;
1250c16b537SWarner Losh }
1260c16b537SWarner Losh 
1270c16b537SWarner Losh 
1280c16b537SWarner Losh struct HUF_CElt_s {
1290c16b537SWarner Losh   U16  val;
1300c16b537SWarner Losh   BYTE nbBits;
1310c16b537SWarner Losh };   /* typedef'd to HUF_CElt within "huf.h" */
1320c16b537SWarner Losh 
1330c16b537SWarner Losh /*! HUF_writeCTable() :
1340c16b537SWarner Losh     `CTable` : Huffman tree to save, using huf representation.
1350c16b537SWarner Losh     @return : size of saved CTable */
1360c16b537SWarner Losh size_t HUF_writeCTable (void* dst, size_t maxDstSize,
137*a0483764SConrad Meyer                         const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
1380c16b537SWarner Losh {
1390c16b537SWarner Losh     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
1400c16b537SWarner Losh     BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
1410c16b537SWarner Losh     BYTE* op = (BYTE*)dst;
1420c16b537SWarner Losh     U32 n;
1430c16b537SWarner Losh 
1440c16b537SWarner Losh      /* check conditions */
1450c16b537SWarner Losh     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
1460c16b537SWarner Losh 
1470c16b537SWarner Losh     /* convert to weight */
1480c16b537SWarner Losh     bitsToWeight[0] = 0;
1490c16b537SWarner Losh     for (n=1; n<huffLog+1; n++)
1500c16b537SWarner Losh         bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
1510c16b537SWarner Losh     for (n=0; n<maxSymbolValue; n++)
1520c16b537SWarner Losh         huffWeight[n] = bitsToWeight[CTable[n].nbBits];
1530c16b537SWarner Losh 
1540c16b537SWarner Losh     /* attempt weights compression by FSE */
1550c16b537SWarner Losh     {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
1560c16b537SWarner Losh         if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
1570c16b537SWarner Losh             op[0] = (BYTE)hSize;
1580c16b537SWarner Losh             return hSize+1;
1590c16b537SWarner Losh     }   }
1600c16b537SWarner Losh 
1610c16b537SWarner Losh     /* write raw values as 4-bits (max : 15) */
1620c16b537SWarner Losh     if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
1630c16b537SWarner Losh     if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
1640c16b537SWarner Losh     op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
1650c16b537SWarner Losh     huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
1660c16b537SWarner Losh     for (n=0; n<maxSymbolValue; n+=2)
1670c16b537SWarner Losh         op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
1680c16b537SWarner Losh     return ((maxSymbolValue+1)/2) + 1;
1690c16b537SWarner Losh }
1700c16b537SWarner Losh 
1710c16b537SWarner Losh 
172*a0483764SConrad Meyer size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
1730c16b537SWarner Losh {
1740c16b537SWarner Losh     BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
1750c16b537SWarner Losh     U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
1760c16b537SWarner Losh     U32 tableLog = 0;
1770c16b537SWarner Losh     U32 nbSymbols = 0;
1780c16b537SWarner Losh 
1790c16b537SWarner Losh     /* get symbol weights */
1800c16b537SWarner Losh     CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
1810c16b537SWarner Losh 
1820c16b537SWarner Losh     /* check result */
1830c16b537SWarner Losh     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
1840c16b537SWarner Losh     if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
1850c16b537SWarner Losh 
1860c16b537SWarner Losh     /* Prepare base value per rank */
1870c16b537SWarner Losh     {   U32 n, nextRankStart = 0;
1880c16b537SWarner Losh         for (n=1; n<=tableLog; n++) {
1890c16b537SWarner Losh             U32 current = nextRankStart;
1900c16b537SWarner Losh             nextRankStart += (rankVal[n] << (n-1));
1910c16b537SWarner Losh             rankVal[n] = current;
1920c16b537SWarner Losh     }   }
1930c16b537SWarner Losh 
1940c16b537SWarner Losh     /* fill nbBits */
1950c16b537SWarner Losh     {   U32 n; for (n=0; n<nbSymbols; n++) {
1960c16b537SWarner Losh             const U32 w = huffWeight[n];
1970c16b537SWarner Losh             CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
1980c16b537SWarner Losh     }   }
1990c16b537SWarner Losh 
2000c16b537SWarner Losh     /* fill val */
2010c16b537SWarner Losh     {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
2020c16b537SWarner Losh         U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
2030c16b537SWarner Losh         { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
2040c16b537SWarner Losh         /* determine stating value per rank */
2050c16b537SWarner Losh         valPerRank[tableLog+1] = 0;   /* for w==0 */
2060c16b537SWarner Losh         {   U16 min = 0;
2070c16b537SWarner Losh             U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
2080c16b537SWarner Losh                 valPerRank[n] = min;     /* get starting value within each rank */
2090c16b537SWarner Losh                 min += nbPerRank[n];
2100c16b537SWarner Losh                 min >>= 1;
2110c16b537SWarner Losh         }   }
2120c16b537SWarner Losh         /* assign value within rank, symbol order */
2130c16b537SWarner Losh         { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
2140c16b537SWarner Losh     }
2150c16b537SWarner Losh 
2160c16b537SWarner Losh     *maxSymbolValuePtr = nbSymbols - 1;
2170c16b537SWarner Losh     return readSize;
2180c16b537SWarner Losh }
2190c16b537SWarner Losh 
2200f743729SConrad Meyer U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
2210f743729SConrad Meyer {
2220f743729SConrad Meyer     const HUF_CElt* table = (const HUF_CElt*)symbolTable;
2230f743729SConrad Meyer     assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
2240f743729SConrad Meyer     return table[symbolValue].nbBits;
2250f743729SConrad Meyer }
2260f743729SConrad Meyer 
2270c16b537SWarner Losh 
2280c16b537SWarner Losh typedef struct nodeElt_s {
2290c16b537SWarner Losh     U32 count;
2300c16b537SWarner Losh     U16 parent;
2310c16b537SWarner Losh     BYTE byte;
2320c16b537SWarner Losh     BYTE nbBits;
2330c16b537SWarner Losh } nodeElt;
2340c16b537SWarner Losh 
2350c16b537SWarner Losh static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
2360c16b537SWarner Losh {
2370c16b537SWarner Losh     const U32 largestBits = huffNode[lastNonNull].nbBits;
2380c16b537SWarner Losh     if (largestBits <= maxNbBits) return largestBits;   /* early exit : no elt > maxNbBits */
2390c16b537SWarner Losh 
2400c16b537SWarner Losh     /* there are several too large elements (at least >= 2) */
2410c16b537SWarner Losh     {   int totalCost = 0;
2420c16b537SWarner Losh         const U32 baseCost = 1 << (largestBits - maxNbBits);
2430c16b537SWarner Losh         U32 n = lastNonNull;
2440c16b537SWarner Losh 
2450c16b537SWarner Losh         while (huffNode[n].nbBits > maxNbBits) {
2460c16b537SWarner Losh             totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
2470c16b537SWarner Losh             huffNode[n].nbBits = (BYTE)maxNbBits;
2480c16b537SWarner Losh             n --;
2490c16b537SWarner Losh         }  /* n stops at huffNode[n].nbBits <= maxNbBits */
2500c16b537SWarner Losh         while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using < maxNbBits */
2510c16b537SWarner Losh 
2520c16b537SWarner Losh         /* renorm totalCost */
2530c16b537SWarner Losh         totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
2540c16b537SWarner Losh 
2550c16b537SWarner Losh         /* repay normalized cost */
2560c16b537SWarner Losh         {   U32 const noSymbol = 0xF0F0F0F0;
2570c16b537SWarner Losh             U32 rankLast[HUF_TABLELOG_MAX+2];
2580c16b537SWarner Losh             int pos;
2590c16b537SWarner Losh 
2600c16b537SWarner Losh             /* Get pos of last (smallest) symbol per rank */
2610c16b537SWarner Losh             memset(rankLast, 0xF0, sizeof(rankLast));
2620c16b537SWarner Losh             {   U32 currentNbBits = maxNbBits;
2630c16b537SWarner Losh                 for (pos=n ; pos >= 0; pos--) {
2640c16b537SWarner Losh                     if (huffNode[pos].nbBits >= currentNbBits) continue;
2650c16b537SWarner Losh                     currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
2660c16b537SWarner Losh                     rankLast[maxNbBits-currentNbBits] = pos;
2670c16b537SWarner Losh             }   }
2680c16b537SWarner Losh 
2690c16b537SWarner Losh             while (totalCost > 0) {
2700c16b537SWarner Losh                 U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
2710c16b537SWarner Losh                 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
2720c16b537SWarner Losh                     U32 highPos = rankLast[nBitsToDecrease];
2730c16b537SWarner Losh                     U32 lowPos = rankLast[nBitsToDecrease-1];
2740c16b537SWarner Losh                     if (highPos == noSymbol) continue;
2750c16b537SWarner Losh                     if (lowPos == noSymbol) break;
2760c16b537SWarner Losh                     {   U32 const highTotal = huffNode[highPos].count;
2770c16b537SWarner Losh                         U32 const lowTotal = 2 * huffNode[lowPos].count;
2780c16b537SWarner Losh                         if (highTotal <= lowTotal) break;
2790c16b537SWarner Losh                 }   }
2800c16b537SWarner Losh                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
2810c16b537SWarner Losh                 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
2820c16b537SWarner Losh                 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
2830c16b537SWarner Losh                     nBitsToDecrease ++;
2840c16b537SWarner Losh                 totalCost -= 1 << (nBitsToDecrease-1);
2850c16b537SWarner Losh                 if (rankLast[nBitsToDecrease-1] == noSymbol)
2860c16b537SWarner Losh                     rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
2870c16b537SWarner Losh                 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
2880c16b537SWarner Losh                 if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
2890c16b537SWarner Losh                     rankLast[nBitsToDecrease] = noSymbol;
2900c16b537SWarner Losh                 else {
2910c16b537SWarner Losh                     rankLast[nBitsToDecrease]--;
2920c16b537SWarner Losh                     if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
2930c16b537SWarner Losh                         rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
2940c16b537SWarner Losh             }   }   /* while (totalCost > 0) */
2950c16b537SWarner Losh 
2960c16b537SWarner Losh             while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
2970c16b537SWarner Losh                 if (rankLast[1] == noSymbol) {  /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
2980c16b537SWarner Losh                     while (huffNode[n].nbBits == maxNbBits) n--;
2990c16b537SWarner Losh                     huffNode[n+1].nbBits--;
3000c16b537SWarner Losh                     rankLast[1] = n+1;
3010c16b537SWarner Losh                     totalCost++;
3020c16b537SWarner Losh                     continue;
3030c16b537SWarner Losh                 }
3040c16b537SWarner Losh                 huffNode[ rankLast[1] + 1 ].nbBits--;
3050c16b537SWarner Losh                 rankLast[1]++;
3060c16b537SWarner Losh                 totalCost ++;
3070c16b537SWarner Losh     }   }   }   /* there are several too large elements (at least >= 2) */
3080c16b537SWarner Losh 
3090c16b537SWarner Losh     return maxNbBits;
3100c16b537SWarner Losh }
3110c16b537SWarner Losh 
3120c16b537SWarner Losh 
3130c16b537SWarner Losh typedef struct {
3140c16b537SWarner Losh     U32 base;
3150c16b537SWarner Losh     U32 current;
3160c16b537SWarner Losh } rankPos;
3170c16b537SWarner Losh 
318*a0483764SConrad Meyer static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue)
3190c16b537SWarner Losh {
3200c16b537SWarner Losh     rankPos rank[32];
3210c16b537SWarner Losh     U32 n;
3220c16b537SWarner Losh 
3230c16b537SWarner Losh     memset(rank, 0, sizeof(rank));
3240c16b537SWarner Losh     for (n=0; n<=maxSymbolValue; n++) {
3250c16b537SWarner Losh         U32 r = BIT_highbit32(count[n] + 1);
3260c16b537SWarner Losh         rank[r].base ++;
3270c16b537SWarner Losh     }
3280c16b537SWarner Losh     for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
3290c16b537SWarner Losh     for (n=0; n<32; n++) rank[n].current = rank[n].base;
3300c16b537SWarner Losh     for (n=0; n<=maxSymbolValue; n++) {
3310c16b537SWarner Losh         U32 const c = count[n];
3320c16b537SWarner Losh         U32 const r = BIT_highbit32(c+1) + 1;
3330c16b537SWarner Losh         U32 pos = rank[r].current++;
33419fcbaf1SConrad Meyer         while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
33519fcbaf1SConrad Meyer             huffNode[pos] = huffNode[pos-1];
33619fcbaf1SConrad Meyer             pos--;
33719fcbaf1SConrad Meyer         }
3380c16b537SWarner Losh         huffNode[pos].count = c;
3390c16b537SWarner Losh         huffNode[pos].byte  = (BYTE)n;
3400c16b537SWarner Losh     }
3410c16b537SWarner Losh }
3420c16b537SWarner Losh 
3430c16b537SWarner Losh 
3440c16b537SWarner Losh /** HUF_buildCTable_wksp() :
3450c16b537SWarner Losh  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
34619fcbaf1SConrad Meyer  *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
3470c16b537SWarner Losh  */
3480c16b537SWarner Losh #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
34919fcbaf1SConrad Meyer typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
350*a0483764SConrad Meyer size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
3510c16b537SWarner Losh {
3520c16b537SWarner Losh     nodeElt* const huffNode0 = (nodeElt*)workSpace;
3530c16b537SWarner Losh     nodeElt* const huffNode = huffNode0+1;
3540c16b537SWarner Losh     U32 n, nonNullRank;
3550c16b537SWarner Losh     int lowS, lowN;
3560c16b537SWarner Losh     U16 nodeNb = STARTNODE;
3570c16b537SWarner Losh     U32 nodeRoot;
3580c16b537SWarner Losh 
3590c16b537SWarner Losh     /* safety checks */
36019fcbaf1SConrad Meyer     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
36119fcbaf1SConrad Meyer     if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
3620c16b537SWarner Losh     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
36319fcbaf1SConrad Meyer     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
3640c16b537SWarner Losh     memset(huffNode0, 0, sizeof(huffNodeTable));
3650c16b537SWarner Losh 
3660c16b537SWarner Losh     /* sort, decreasing order */
3670c16b537SWarner Losh     HUF_sort(huffNode, count, maxSymbolValue);
3680c16b537SWarner Losh 
3690c16b537SWarner Losh     /* init for parents */
3700c16b537SWarner Losh     nonNullRank = maxSymbolValue;
3710c16b537SWarner Losh     while(huffNode[nonNullRank].count == 0) nonNullRank--;
3720c16b537SWarner Losh     lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
3730c16b537SWarner Losh     huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
3740c16b537SWarner Losh     huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
3750c16b537SWarner Losh     nodeNb++; lowS-=2;
3760c16b537SWarner Losh     for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
3770c16b537SWarner Losh     huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
3780c16b537SWarner Losh 
3790c16b537SWarner Losh     /* create parents */
3800c16b537SWarner Losh     while (nodeNb <= nodeRoot) {
3810c16b537SWarner Losh         U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
3820c16b537SWarner Losh         U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
3830c16b537SWarner Losh         huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
3840c16b537SWarner Losh         huffNode[n1].parent = huffNode[n2].parent = nodeNb;
3850c16b537SWarner Losh         nodeNb++;
3860c16b537SWarner Losh     }
3870c16b537SWarner Losh 
3880c16b537SWarner Losh     /* distribute weights (unlimited tree height) */
3890c16b537SWarner Losh     huffNode[nodeRoot].nbBits = 0;
3900c16b537SWarner Losh     for (n=nodeRoot-1; n>=STARTNODE; n--)
3910c16b537SWarner Losh         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
3920c16b537SWarner Losh     for (n=0; n<=nonNullRank; n++)
3930c16b537SWarner Losh         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
3940c16b537SWarner Losh 
3950c16b537SWarner Losh     /* enforce maxTableLog */
3960c16b537SWarner Losh     maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
3970c16b537SWarner Losh 
3980c16b537SWarner Losh     /* fill result into tree (val, nbBits) */
3990c16b537SWarner Losh     {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
4000c16b537SWarner Losh         U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
4010c16b537SWarner Losh         if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
4020c16b537SWarner Losh         for (n=0; n<=nonNullRank; n++)
4030c16b537SWarner Losh             nbPerRank[huffNode[n].nbBits]++;
4040c16b537SWarner Losh         /* determine stating value per rank */
4050c16b537SWarner Losh         {   U16 min = 0;
4060c16b537SWarner Losh             for (n=maxNbBits; n>0; n--) {
4070c16b537SWarner Losh                 valPerRank[n] = min;      /* get starting value within each rank */
4080c16b537SWarner Losh                 min += nbPerRank[n];
4090c16b537SWarner Losh                 min >>= 1;
4100c16b537SWarner Losh         }   }
4110c16b537SWarner Losh         for (n=0; n<=maxSymbolValue; n++)
4120c16b537SWarner Losh             tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
4130c16b537SWarner Losh         for (n=0; n<=maxSymbolValue; n++)
4140c16b537SWarner Losh             tree[n].val = valPerRank[tree[n].nbBits]++;   /* assign value within rank, symbol order */
4150c16b537SWarner Losh     }
4160c16b537SWarner Losh 
4170c16b537SWarner Losh     return maxNbBits;
4180c16b537SWarner Losh }
4190c16b537SWarner Losh 
4200c16b537SWarner Losh /** HUF_buildCTable() :
42119fcbaf1SConrad Meyer  * @return : maxNbBits
4220c16b537SWarner Losh  *  Note : count is used before tree is written, so they can safely overlap
4230c16b537SWarner Losh  */
424*a0483764SConrad Meyer size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
4250c16b537SWarner Losh {
4260c16b537SWarner Losh     huffNodeTable nodeTable;
4270c16b537SWarner Losh     return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
4280c16b537SWarner Losh }
4290c16b537SWarner Losh 
4300c16b537SWarner Losh static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
4310c16b537SWarner Losh {
4320c16b537SWarner Losh     size_t nbBits = 0;
4330c16b537SWarner Losh     int s;
4340c16b537SWarner Losh     for (s = 0; s <= (int)maxSymbolValue; ++s) {
4350c16b537SWarner Losh         nbBits += CTable[s].nbBits * count[s];
4360c16b537SWarner Losh     }
4370c16b537SWarner Losh     return nbBits >> 3;
4380c16b537SWarner Losh }
4390c16b537SWarner Losh 
4400c16b537SWarner Losh static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
4410c16b537SWarner Losh   int bad = 0;
4420c16b537SWarner Losh   int s;
4430c16b537SWarner Losh   for (s = 0; s <= (int)maxSymbolValue; ++s) {
4440c16b537SWarner Losh     bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
4450c16b537SWarner Losh   }
4460c16b537SWarner Losh   return !bad;
4470c16b537SWarner Losh }
4480c16b537SWarner Losh 
44919fcbaf1SConrad Meyer size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
45019fcbaf1SConrad Meyer 
45119fcbaf1SConrad Meyer FORCE_INLINE_TEMPLATE void
45219fcbaf1SConrad Meyer HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
4530c16b537SWarner Losh {
4540c16b537SWarner Losh     BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
4550c16b537SWarner Losh }
4560c16b537SWarner Losh 
4570c16b537SWarner Losh #define HUF_FLUSHBITS(s)  BIT_flushBits(s)
4580c16b537SWarner Losh 
4590c16b537SWarner Losh #define HUF_FLUSHBITS_1(stream) \
4600c16b537SWarner Losh     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
4610c16b537SWarner Losh 
4620c16b537SWarner Losh #define HUF_FLUSHBITS_2(stream) \
4630c16b537SWarner Losh     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
4640c16b537SWarner Losh 
46519fcbaf1SConrad Meyer FORCE_INLINE_TEMPLATE size_t
46619fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
46719fcbaf1SConrad Meyer                                    const void* src, size_t srcSize,
46819fcbaf1SConrad Meyer                                    const HUF_CElt* CTable)
4690c16b537SWarner Losh {
4700c16b537SWarner Losh     const BYTE* ip = (const BYTE*) src;
4710c16b537SWarner Losh     BYTE* const ostart = (BYTE*)dst;
4720c16b537SWarner Losh     BYTE* const oend = ostart + dstSize;
4730c16b537SWarner Losh     BYTE* op = ostart;
4740c16b537SWarner Losh     size_t n;
4750c16b537SWarner Losh     BIT_CStream_t bitC;
4760c16b537SWarner Losh 
4770c16b537SWarner Losh     /* init */
4780c16b537SWarner Losh     if (dstSize < 8) return 0;   /* not enough space to compress */
4790c16b537SWarner Losh     { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
4800c16b537SWarner Losh       if (HUF_isError(initErr)) return 0; }
4810c16b537SWarner Losh 
4820c16b537SWarner Losh     n = srcSize & ~3;  /* join to mod 4 */
4830c16b537SWarner Losh     switch (srcSize & 3)
4840c16b537SWarner Losh     {
4850c16b537SWarner Losh         case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
4860c16b537SWarner Losh                  HUF_FLUSHBITS_2(&bitC);
4870c16b537SWarner Losh 		 /* fall-through */
4880c16b537SWarner Losh         case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
4890c16b537SWarner Losh                  HUF_FLUSHBITS_1(&bitC);
4900c16b537SWarner Losh 		 /* fall-through */
4910c16b537SWarner Losh         case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
4920c16b537SWarner Losh                  HUF_FLUSHBITS(&bitC);
4930c16b537SWarner Losh 		 /* fall-through */
4940c16b537SWarner Losh         case 0 : /* fall-through */
4950c16b537SWarner Losh         default: break;
4960c16b537SWarner Losh     }
4970c16b537SWarner Losh 
4980c16b537SWarner Losh     for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
4990c16b537SWarner Losh         HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
5000c16b537SWarner Losh         HUF_FLUSHBITS_1(&bitC);
5010c16b537SWarner Losh         HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
5020c16b537SWarner Losh         HUF_FLUSHBITS_2(&bitC);
5030c16b537SWarner Losh         HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
5040c16b537SWarner Losh         HUF_FLUSHBITS_1(&bitC);
5050c16b537SWarner Losh         HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
5060c16b537SWarner Losh         HUF_FLUSHBITS(&bitC);
5070c16b537SWarner Losh     }
5080c16b537SWarner Losh 
5090c16b537SWarner Losh     return BIT_closeCStream(&bitC);
5100c16b537SWarner Losh }
5110c16b537SWarner Losh 
51219fcbaf1SConrad Meyer #if DYNAMIC_BMI2
5130c16b537SWarner Losh 
51419fcbaf1SConrad Meyer static TARGET_ATTRIBUTE("bmi2") size_t
51519fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
51619fcbaf1SConrad Meyer                                    const void* src, size_t srcSize,
51719fcbaf1SConrad Meyer                                    const HUF_CElt* CTable)
51819fcbaf1SConrad Meyer {
51919fcbaf1SConrad Meyer     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
52019fcbaf1SConrad Meyer }
52119fcbaf1SConrad Meyer 
52219fcbaf1SConrad Meyer static size_t
52319fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
52419fcbaf1SConrad Meyer                                       const void* src, size_t srcSize,
52519fcbaf1SConrad Meyer                                       const HUF_CElt* CTable)
52619fcbaf1SConrad Meyer {
52719fcbaf1SConrad Meyer     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
52819fcbaf1SConrad Meyer }
52919fcbaf1SConrad Meyer 
53019fcbaf1SConrad Meyer static size_t
53119fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
53219fcbaf1SConrad Meyer                               const void* src, size_t srcSize,
53319fcbaf1SConrad Meyer                               const HUF_CElt* CTable, const int bmi2)
53419fcbaf1SConrad Meyer {
53519fcbaf1SConrad Meyer     if (bmi2) {
53619fcbaf1SConrad Meyer         return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
53719fcbaf1SConrad Meyer     }
53819fcbaf1SConrad Meyer     return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
53919fcbaf1SConrad Meyer }
54019fcbaf1SConrad Meyer 
54119fcbaf1SConrad Meyer #else
54219fcbaf1SConrad Meyer 
54319fcbaf1SConrad Meyer static size_t
54419fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
54519fcbaf1SConrad Meyer                               const void* src, size_t srcSize,
54619fcbaf1SConrad Meyer                               const HUF_CElt* CTable, const int bmi2)
54719fcbaf1SConrad Meyer {
54819fcbaf1SConrad Meyer     (void)bmi2;
54919fcbaf1SConrad Meyer     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
55019fcbaf1SConrad Meyer }
55119fcbaf1SConrad Meyer 
55219fcbaf1SConrad Meyer #endif
55319fcbaf1SConrad Meyer 
55419fcbaf1SConrad Meyer size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
55519fcbaf1SConrad Meyer {
55619fcbaf1SConrad Meyer     return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
55719fcbaf1SConrad Meyer }
55819fcbaf1SConrad Meyer 
55919fcbaf1SConrad Meyer 
56019fcbaf1SConrad Meyer static size_t
56119fcbaf1SConrad Meyer HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
56219fcbaf1SConrad Meyer                               const void* src, size_t srcSize,
56319fcbaf1SConrad Meyer                               const HUF_CElt* CTable, int bmi2)
5640c16b537SWarner Losh {
5650c16b537SWarner Losh     size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
5660c16b537SWarner Losh     const BYTE* ip = (const BYTE*) src;
5670c16b537SWarner Losh     const BYTE* const iend = ip + srcSize;
5680c16b537SWarner Losh     BYTE* const ostart = (BYTE*) dst;
5690c16b537SWarner Losh     BYTE* const oend = ostart + dstSize;
5700c16b537SWarner Losh     BYTE* op = ostart;
5710c16b537SWarner Losh 
5720c16b537SWarner Losh     if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
5730c16b537SWarner Losh     if (srcSize < 12) return 0;   /* no saving possible : too small input */
5740c16b537SWarner Losh     op += 6;   /* jumpTable */
5750c16b537SWarner Losh 
57619fcbaf1SConrad Meyer     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
5770c16b537SWarner Losh         if (cSize==0) return 0;
57819fcbaf1SConrad Meyer         assert(cSize <= 65535);
5790c16b537SWarner Losh         MEM_writeLE16(ostart, (U16)cSize);
5800c16b537SWarner Losh         op += cSize;
5810c16b537SWarner Losh     }
5820c16b537SWarner Losh 
5830c16b537SWarner Losh     ip += segmentSize;
58419fcbaf1SConrad Meyer     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
5850c16b537SWarner Losh         if (cSize==0) return 0;
58619fcbaf1SConrad Meyer         assert(cSize <= 65535);
5870c16b537SWarner Losh         MEM_writeLE16(ostart+2, (U16)cSize);
5880c16b537SWarner Losh         op += cSize;
5890c16b537SWarner Losh     }
5900c16b537SWarner Losh 
5910c16b537SWarner Losh     ip += segmentSize;
59219fcbaf1SConrad Meyer     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
5930c16b537SWarner Losh         if (cSize==0) return 0;
59419fcbaf1SConrad Meyer         assert(cSize <= 65535);
5950c16b537SWarner Losh         MEM_writeLE16(ostart+4, (U16)cSize);
5960c16b537SWarner Losh         op += cSize;
5970c16b537SWarner Losh     }
5980c16b537SWarner Losh 
5990c16b537SWarner Losh     ip += segmentSize;
60019fcbaf1SConrad Meyer     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
6010c16b537SWarner Losh         if (cSize==0) return 0;
6020c16b537SWarner Losh         op += cSize;
6030c16b537SWarner Losh     }
6040c16b537SWarner Losh 
6050c16b537SWarner Losh     return op-ostart;
6060c16b537SWarner Losh }
6070c16b537SWarner Losh 
60819fcbaf1SConrad Meyer size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
60919fcbaf1SConrad Meyer {
61019fcbaf1SConrad Meyer     return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
61119fcbaf1SConrad Meyer }
61219fcbaf1SConrad Meyer 
613*a0483764SConrad Meyer typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
6140c16b537SWarner Losh 
6150c16b537SWarner Losh static size_t HUF_compressCTable_internal(
6160c16b537SWarner Losh                 BYTE* const ostart, BYTE* op, BYTE* const oend,
6170c16b537SWarner Losh                 const void* src, size_t srcSize,
618*a0483764SConrad Meyer                 HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
6190c16b537SWarner Losh {
620*a0483764SConrad Meyer     size_t const cSize = (nbStreams==HUF_singleStream) ?
62119fcbaf1SConrad Meyer                          HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
62219fcbaf1SConrad Meyer                          HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
6230c16b537SWarner Losh     if (HUF_isError(cSize)) { return cSize; }
6240c16b537SWarner Losh     if (cSize==0) { return 0; }   /* uncompressible */
6250c16b537SWarner Losh     op += cSize;
6260c16b537SWarner Losh     /* check compressibility */
6270c16b537SWarner Losh     if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
6280c16b537SWarner Losh     return op-ostart;
6290c16b537SWarner Losh }
6300c16b537SWarner Losh 
63119fcbaf1SConrad Meyer typedef struct {
632*a0483764SConrad Meyer     unsigned count[HUF_SYMBOLVALUE_MAX + 1];
63319fcbaf1SConrad Meyer     HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
63419fcbaf1SConrad Meyer     huffNodeTable nodeTable;
63519fcbaf1SConrad Meyer } HUF_compress_tables_t;
6360c16b537SWarner Losh 
63719fcbaf1SConrad Meyer /* HUF_compress_internal() :
63819fcbaf1SConrad Meyer  * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
639*a0483764SConrad Meyer static size_t
640*a0483764SConrad Meyer HUF_compress_internal (void* dst, size_t dstSize,
6410c16b537SWarner Losh                  const void* src, size_t srcSize,
6420c16b537SWarner Losh                        unsigned maxSymbolValue, unsigned huffLog,
643*a0483764SConrad Meyer                        HUF_nbStreams_e nbStreams,
6440c16b537SWarner Losh                        void* workSpace, size_t wkspSize,
64519fcbaf1SConrad Meyer                        HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
64619fcbaf1SConrad Meyer                  const int bmi2)
6470c16b537SWarner Losh {
64819fcbaf1SConrad Meyer     HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
6490c16b537SWarner Losh     BYTE* const ostart = (BYTE*)dst;
6500c16b537SWarner Losh     BYTE* const oend = ostart + dstSize;
6510c16b537SWarner Losh     BYTE* op = ostart;
6520c16b537SWarner Losh 
6530c16b537SWarner Losh     /* checks & inits */
65419fcbaf1SConrad Meyer     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
655*a0483764SConrad Meyer     if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
65619fcbaf1SConrad Meyer     if (!srcSize) return 0;  /* Uncompressed */
65719fcbaf1SConrad Meyer     if (!dstSize) return 0;  /* cannot fit anything within dst budget */
6580c16b537SWarner Losh     if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
6590c16b537SWarner Losh     if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
66019fcbaf1SConrad Meyer     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
6610c16b537SWarner Losh     if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
6620c16b537SWarner Losh     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
6630c16b537SWarner Losh 
66419fcbaf1SConrad Meyer     /* Heuristic : If old table is valid, use it for small inputs */
6650c16b537SWarner Losh     if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
66619fcbaf1SConrad Meyer         return HUF_compressCTable_internal(ostart, op, oend,
66719fcbaf1SConrad Meyer                                            src, srcSize,
668*a0483764SConrad Meyer                                            nbStreams, oldHufTable, bmi2);
6690c16b537SWarner Losh     }
6700c16b537SWarner Losh 
6710c16b537SWarner Losh     /* Scan input and build symbol stats */
672*a0483764SConrad Meyer     {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
6730c16b537SWarner Losh         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
6740f743729SConrad Meyer         if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
6750c16b537SWarner Losh     }
6760c16b537SWarner Losh 
6770c16b537SWarner Losh     /* Check validity of previous table */
67819fcbaf1SConrad Meyer     if ( repeat
67919fcbaf1SConrad Meyer       && *repeat == HUF_repeat_check
68019fcbaf1SConrad Meyer       && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
6810c16b537SWarner Losh         *repeat = HUF_repeat_none;
6820c16b537SWarner Losh     }
6830c16b537SWarner Losh     /* Heuristic : use existing table for small inputs */
6840c16b537SWarner Losh     if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
68519fcbaf1SConrad Meyer         return HUF_compressCTable_internal(ostart, op, oend,
68619fcbaf1SConrad Meyer                                            src, srcSize,
687*a0483764SConrad Meyer                                            nbStreams, oldHufTable, bmi2);
6880c16b537SWarner Losh     }
6890c16b537SWarner Losh 
6900c16b537SWarner Losh     /* Build Huffman Tree */
6910c16b537SWarner Losh     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
692*a0483764SConrad Meyer     {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
69319fcbaf1SConrad Meyer                                             maxSymbolValue, huffLog,
694*a0483764SConrad Meyer                                             table->nodeTable, sizeof(table->nodeTable));
695*a0483764SConrad Meyer         CHECK_F(maxBits);
6960c16b537SWarner Losh         huffLog = (U32)maxBits;
69719fcbaf1SConrad Meyer         /* Zero unused symbols in CTable, so we can check it for validity */
69819fcbaf1SConrad Meyer         memset(table->CTable + (maxSymbolValue + 1), 0,
69919fcbaf1SConrad Meyer                sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
7000c16b537SWarner Losh     }
7010c16b537SWarner Losh 
7020c16b537SWarner Losh     /* Write table description header */
70319fcbaf1SConrad Meyer     {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
70419fcbaf1SConrad Meyer         /* Check if using previous huffman table is beneficial */
7050c16b537SWarner Losh         if (repeat && *repeat != HUF_repeat_none) {
70619fcbaf1SConrad Meyer             size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
70719fcbaf1SConrad Meyer             size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
7080c16b537SWarner Losh             if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
70919fcbaf1SConrad Meyer                 return HUF_compressCTable_internal(ostart, op, oend,
71019fcbaf1SConrad Meyer                                                    src, srcSize,
711*a0483764SConrad Meyer                                                    nbStreams, oldHufTable, bmi2);
71219fcbaf1SConrad Meyer         }   }
71319fcbaf1SConrad Meyer 
71419fcbaf1SConrad Meyer         /* Use the new huffman table */
7150c16b537SWarner Losh         if (hSize + 12ul >= srcSize) { return 0; }
7160c16b537SWarner Losh         op += hSize;
7170c16b537SWarner Losh         if (repeat) { *repeat = HUF_repeat_none; }
71819fcbaf1SConrad Meyer         if (oldHufTable)
71919fcbaf1SConrad Meyer             memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
7200c16b537SWarner Losh     }
72119fcbaf1SConrad Meyer     return HUF_compressCTable_internal(ostart, op, oend,
72219fcbaf1SConrad Meyer                                        src, srcSize,
723*a0483764SConrad Meyer                                        nbStreams, table->CTable, bmi2);
7240c16b537SWarner Losh }
7250c16b537SWarner Losh 
7260c16b537SWarner Losh 
7270c16b537SWarner Losh size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
7280c16b537SWarner Losh                       const void* src, size_t srcSize,
7290c16b537SWarner Losh                       unsigned maxSymbolValue, unsigned huffLog,
7300c16b537SWarner Losh                       void* workSpace, size_t wkspSize)
7310c16b537SWarner Losh {
73219fcbaf1SConrad Meyer     return HUF_compress_internal(dst, dstSize, src, srcSize,
733*a0483764SConrad Meyer                                  maxSymbolValue, huffLog, HUF_singleStream,
73419fcbaf1SConrad Meyer                                  workSpace, wkspSize,
73519fcbaf1SConrad Meyer                                  NULL, NULL, 0, 0 /*bmi2*/);
7360c16b537SWarner Losh }
7370c16b537SWarner Losh 
7380c16b537SWarner Losh size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
7390c16b537SWarner Losh                       const void* src, size_t srcSize,
7400c16b537SWarner Losh                       unsigned maxSymbolValue, unsigned huffLog,
7410c16b537SWarner Losh                       void* workSpace, size_t wkspSize,
74219fcbaf1SConrad Meyer                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
7430c16b537SWarner Losh {
74419fcbaf1SConrad Meyer     return HUF_compress_internal(dst, dstSize, src, srcSize,
745*a0483764SConrad Meyer                                  maxSymbolValue, huffLog, HUF_singleStream,
74619fcbaf1SConrad Meyer                                  workSpace, wkspSize, hufTable,
74719fcbaf1SConrad Meyer                                  repeat, preferRepeat, bmi2);
7480c16b537SWarner Losh }
7490c16b537SWarner Losh 
7500c16b537SWarner Losh size_t HUF_compress1X (void* dst, size_t dstSize,
7510c16b537SWarner Losh                  const void* src, size_t srcSize,
7520c16b537SWarner Losh                  unsigned maxSymbolValue, unsigned huffLog)
7530c16b537SWarner Losh {
75419fcbaf1SConrad Meyer     unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
7550c16b537SWarner Losh     return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
7560c16b537SWarner Losh }
7570c16b537SWarner Losh 
75819fcbaf1SConrad Meyer /* HUF_compress4X_repeat():
75919fcbaf1SConrad Meyer  * compress input using 4 streams.
76019fcbaf1SConrad Meyer  * provide workspace to generate compression tables */
7610c16b537SWarner Losh size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
7620c16b537SWarner Losh                       const void* src, size_t srcSize,
7630c16b537SWarner Losh                       unsigned maxSymbolValue, unsigned huffLog,
7640c16b537SWarner Losh                       void* workSpace, size_t wkspSize)
7650c16b537SWarner Losh {
76619fcbaf1SConrad Meyer     return HUF_compress_internal(dst, dstSize, src, srcSize,
767*a0483764SConrad Meyer                                  maxSymbolValue, huffLog, HUF_fourStreams,
76819fcbaf1SConrad Meyer                                  workSpace, wkspSize,
76919fcbaf1SConrad Meyer                                  NULL, NULL, 0, 0 /*bmi2*/);
7700c16b537SWarner Losh }
7710c16b537SWarner Losh 
77219fcbaf1SConrad Meyer /* HUF_compress4X_repeat():
77319fcbaf1SConrad Meyer  * compress input using 4 streams.
77419fcbaf1SConrad Meyer  * re-use an existing huffman compression table */
7750c16b537SWarner Losh size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
7760c16b537SWarner Losh                       const void* src, size_t srcSize,
7770c16b537SWarner Losh                       unsigned maxSymbolValue, unsigned huffLog,
7780c16b537SWarner Losh                       void* workSpace, size_t wkspSize,
77919fcbaf1SConrad Meyer                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
7800c16b537SWarner Losh {
78119fcbaf1SConrad Meyer     return HUF_compress_internal(dst, dstSize, src, srcSize,
782*a0483764SConrad Meyer                                  maxSymbolValue, huffLog, HUF_fourStreams,
78319fcbaf1SConrad Meyer                                  workSpace, wkspSize,
78419fcbaf1SConrad Meyer                                  hufTable, repeat, preferRepeat, bmi2);
7850c16b537SWarner Losh }
7860c16b537SWarner Losh 
7870c16b537SWarner Losh size_t HUF_compress2 (void* dst, size_t dstSize,
7880c16b537SWarner Losh                 const void* src, size_t srcSize,
7890c16b537SWarner Losh                 unsigned maxSymbolValue, unsigned huffLog)
7900c16b537SWarner Losh {
79119fcbaf1SConrad Meyer     unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
7920c16b537SWarner Losh     return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
7930c16b537SWarner Losh }
7940c16b537SWarner Losh 
7950c16b537SWarner Losh size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
7960c16b537SWarner Losh {
79719fcbaf1SConrad Meyer     return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
7980c16b537SWarner Losh }
799