10c16b537SWarner Losh /* ****************************************************************** 20c16b537SWarner Losh Huffman encoder, part of New Generation Entropy library 30c16b537SWarner Losh Copyright (C) 2013-2016, Yann Collet. 40c16b537SWarner Losh 50c16b537SWarner Losh BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 60c16b537SWarner Losh 70c16b537SWarner Losh Redistribution and use in source and binary forms, with or without 80c16b537SWarner Losh modification, are permitted provided that the following conditions are 90c16b537SWarner Losh met: 100c16b537SWarner Losh 110c16b537SWarner Losh * Redistributions of source code must retain the above copyright 120c16b537SWarner Losh notice, this list of conditions and the following disclaimer. 130c16b537SWarner Losh * Redistributions in binary form must reproduce the above 140c16b537SWarner Losh copyright notice, this list of conditions and the following disclaimer 150c16b537SWarner Losh in the documentation and/or other materials provided with the 160c16b537SWarner Losh distribution. 170c16b537SWarner Losh 180c16b537SWarner Losh THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 190c16b537SWarner Losh "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 200c16b537SWarner Losh LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 210c16b537SWarner Losh A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 220c16b537SWarner Losh OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 230c16b537SWarner Losh SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 240c16b537SWarner Losh LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 250c16b537SWarner Losh DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 260c16b537SWarner Losh THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 270c16b537SWarner Losh (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 280c16b537SWarner Losh OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 290c16b537SWarner Losh 300c16b537SWarner Losh You can contact the author at : 310c16b537SWarner Losh - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 320c16b537SWarner Losh - Public forum : https://groups.google.com/forum/#!forum/lz4c 330c16b537SWarner Losh ****************************************************************** */ 340c16b537SWarner Losh 350c16b537SWarner Losh /* ************************************************************** 360c16b537SWarner Losh * Compiler specifics 370c16b537SWarner Losh ****************************************************************/ 380c16b537SWarner Losh #ifdef _MSC_VER /* Visual Studio */ 390c16b537SWarner Losh # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 400c16b537SWarner Losh #endif 410c16b537SWarner Losh 420c16b537SWarner Losh 430c16b537SWarner Losh /* ************************************************************** 440c16b537SWarner Losh * Includes 450c16b537SWarner Losh ****************************************************************/ 460c16b537SWarner Losh #include <string.h> /* memcpy, memset */ 470c16b537SWarner Losh #include <stdio.h> /* printf (debug) */ 4819fcbaf1SConrad Meyer #include "compiler.h" 490f743729SConrad Meyer #include "bitstream.h" 500f743729SConrad Meyer #include "hist.h" 510c16b537SWarner Losh #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ 520c16b537SWarner Losh #include "fse.h" /* header compression */ 530c16b537SWarner Losh #define HUF_STATIC_LINKING_ONLY 540c16b537SWarner Losh #include "huf.h" 550c16b537SWarner Losh #include "error_private.h" 560c16b537SWarner Losh 570c16b537SWarner Losh 580c16b537SWarner Losh /* ************************************************************** 590c16b537SWarner Losh * Error Management 600c16b537SWarner Losh ****************************************************************/ 610c16b537SWarner Losh #define HUF_isError ERR_isError 620f743729SConrad Meyer #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ 630c16b537SWarner Losh #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e 640c16b537SWarner Losh #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } 650c16b537SWarner Losh 660c16b537SWarner Losh 670c16b537SWarner Losh /* ************************************************************** 680c16b537SWarner Losh * Utils 690c16b537SWarner Losh ****************************************************************/ 700c16b537SWarner Losh unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) 710c16b537SWarner Losh { 720c16b537SWarner Losh return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); 730c16b537SWarner Losh } 740c16b537SWarner Losh 750c16b537SWarner Losh 760c16b537SWarner Losh /* ******************************************************* 770c16b537SWarner Losh * HUF : Huffman block compression 780c16b537SWarner Losh *********************************************************/ 790c16b537SWarner Losh /* HUF_compressWeights() : 800c16b537SWarner Losh * Same as FSE_compress(), but dedicated to huff0's weights compression. 810c16b537SWarner Losh * The use case needs much less stack memory. 820c16b537SWarner Losh * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. 830c16b537SWarner Losh */ 840c16b537SWarner Losh #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 850f743729SConrad Meyer static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) 860c16b537SWarner Losh { 870c16b537SWarner Losh BYTE* const ostart = (BYTE*) dst; 880c16b537SWarner Losh BYTE* op = ostart; 890c16b537SWarner Losh BYTE* const oend = ostart + dstSize; 900c16b537SWarner Losh 91*a0483764SConrad Meyer unsigned maxSymbolValue = HUF_TABLELOG_MAX; 920c16b537SWarner Losh U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; 930c16b537SWarner Losh 940c16b537SWarner Losh FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; 950c16b537SWarner Losh BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER]; 960c16b537SWarner Losh 97*a0483764SConrad Meyer unsigned count[HUF_TABLELOG_MAX+1]; 980c16b537SWarner Losh S16 norm[HUF_TABLELOG_MAX+1]; 990c16b537SWarner Losh 1000c16b537SWarner Losh /* init conditions */ 1010c16b537SWarner Losh if (wtSize <= 1) return 0; /* Not compressible */ 1020c16b537SWarner Losh 1030c16b537SWarner Losh /* Scan input and build symbol stats */ 1040f743729SConrad Meyer { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ 1050c16b537SWarner Losh if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ 1060c16b537SWarner Losh if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ 1070c16b537SWarner Losh } 1080c16b537SWarner Losh 1090c16b537SWarner Losh tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); 1100c16b537SWarner Losh CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); 1110c16b537SWarner Losh 1120c16b537SWarner Losh /* Write table description header */ 1130c16b537SWarner Losh { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); 1140c16b537SWarner Losh op += hSize; 1150c16b537SWarner Losh } 1160c16b537SWarner Losh 1170c16b537SWarner Losh /* Compress */ 1180c16b537SWarner Losh CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); 1190c16b537SWarner Losh { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); 1200c16b537SWarner Losh if (cSize == 0) return 0; /* not enough space for compressed data */ 1210c16b537SWarner Losh op += cSize; 1220c16b537SWarner Losh } 1230c16b537SWarner Losh 1240c16b537SWarner Losh return op-ostart; 1250c16b537SWarner Losh } 1260c16b537SWarner Losh 1270c16b537SWarner Losh 1280c16b537SWarner Losh struct HUF_CElt_s { 1290c16b537SWarner Losh U16 val; 1300c16b537SWarner Losh BYTE nbBits; 1310c16b537SWarner Losh }; /* typedef'd to HUF_CElt within "huf.h" */ 1320c16b537SWarner Losh 1330c16b537SWarner Losh /*! HUF_writeCTable() : 1340c16b537SWarner Losh `CTable` : Huffman tree to save, using huf representation. 1350c16b537SWarner Losh @return : size of saved CTable */ 1360c16b537SWarner Losh size_t HUF_writeCTable (void* dst, size_t maxDstSize, 137*a0483764SConrad Meyer const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) 1380c16b537SWarner Losh { 1390c16b537SWarner Losh BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ 1400c16b537SWarner Losh BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; 1410c16b537SWarner Losh BYTE* op = (BYTE*)dst; 1420c16b537SWarner Losh U32 n; 1430c16b537SWarner Losh 1440c16b537SWarner Losh /* check conditions */ 1450c16b537SWarner Losh if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 1460c16b537SWarner Losh 1470c16b537SWarner Losh /* convert to weight */ 1480c16b537SWarner Losh bitsToWeight[0] = 0; 1490c16b537SWarner Losh for (n=1; n<huffLog+1; n++) 1500c16b537SWarner Losh bitsToWeight[n] = (BYTE)(huffLog + 1 - n); 1510c16b537SWarner Losh for (n=0; n<maxSymbolValue; n++) 1520c16b537SWarner Losh huffWeight[n] = bitsToWeight[CTable[n].nbBits]; 1530c16b537SWarner Losh 1540c16b537SWarner Losh /* attempt weights compression by FSE */ 1550c16b537SWarner Losh { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); 1560c16b537SWarner Losh if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ 1570c16b537SWarner Losh op[0] = (BYTE)hSize; 1580c16b537SWarner Losh return hSize+1; 1590c16b537SWarner Losh } } 1600c16b537SWarner Losh 1610c16b537SWarner Losh /* write raw values as 4-bits (max : 15) */ 1620c16b537SWarner Losh if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ 1630c16b537SWarner Losh if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ 1640c16b537SWarner Losh op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); 1650c16b537SWarner Losh huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ 1660c16b537SWarner Losh for (n=0; n<maxSymbolValue; n+=2) 1670c16b537SWarner Losh op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); 1680c16b537SWarner Losh return ((maxSymbolValue+1)/2) + 1; 1690c16b537SWarner Losh } 1700c16b537SWarner Losh 1710c16b537SWarner Losh 172*a0483764SConrad Meyer size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize) 1730c16b537SWarner Losh { 1740c16b537SWarner Losh BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ 1750c16b537SWarner Losh U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ 1760c16b537SWarner Losh U32 tableLog = 0; 1770c16b537SWarner Losh U32 nbSymbols = 0; 1780c16b537SWarner Losh 1790c16b537SWarner Losh /* get symbol weights */ 1800c16b537SWarner Losh CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); 1810c16b537SWarner Losh 1820c16b537SWarner Losh /* check result */ 1830c16b537SWarner Losh if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 1840c16b537SWarner Losh if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); 1850c16b537SWarner Losh 1860c16b537SWarner Losh /* Prepare base value per rank */ 1870c16b537SWarner Losh { U32 n, nextRankStart = 0; 1880c16b537SWarner Losh for (n=1; n<=tableLog; n++) { 1890c16b537SWarner Losh U32 current = nextRankStart; 1900c16b537SWarner Losh nextRankStart += (rankVal[n] << (n-1)); 1910c16b537SWarner Losh rankVal[n] = current; 1920c16b537SWarner Losh } } 1930c16b537SWarner Losh 1940c16b537SWarner Losh /* fill nbBits */ 1950c16b537SWarner Losh { U32 n; for (n=0; n<nbSymbols; n++) { 1960c16b537SWarner Losh const U32 w = huffWeight[n]; 1970c16b537SWarner Losh CTable[n].nbBits = (BYTE)(tableLog + 1 - w); 1980c16b537SWarner Losh } } 1990c16b537SWarner Losh 2000c16b537SWarner Losh /* fill val */ 2010c16b537SWarner Losh { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ 2020c16b537SWarner Losh U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; 2030c16b537SWarner Losh { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; } 2040c16b537SWarner Losh /* determine stating value per rank */ 2050c16b537SWarner Losh valPerRank[tableLog+1] = 0; /* for w==0 */ 2060c16b537SWarner Losh { U16 min = 0; 2070c16b537SWarner Losh U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ 2080c16b537SWarner Losh valPerRank[n] = min; /* get starting value within each rank */ 2090c16b537SWarner Losh min += nbPerRank[n]; 2100c16b537SWarner Losh min >>= 1; 2110c16b537SWarner Losh } } 2120c16b537SWarner Losh /* assign value within rank, symbol order */ 2130c16b537SWarner Losh { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } 2140c16b537SWarner Losh } 2150c16b537SWarner Losh 2160c16b537SWarner Losh *maxSymbolValuePtr = nbSymbols - 1; 2170c16b537SWarner Losh return readSize; 2180c16b537SWarner Losh } 2190c16b537SWarner Losh 2200f743729SConrad Meyer U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue) 2210f743729SConrad Meyer { 2220f743729SConrad Meyer const HUF_CElt* table = (const HUF_CElt*)symbolTable; 2230f743729SConrad Meyer assert(symbolValue <= HUF_SYMBOLVALUE_MAX); 2240f743729SConrad Meyer return table[symbolValue].nbBits; 2250f743729SConrad Meyer } 2260f743729SConrad Meyer 2270c16b537SWarner Losh 2280c16b537SWarner Losh typedef struct nodeElt_s { 2290c16b537SWarner Losh U32 count; 2300c16b537SWarner Losh U16 parent; 2310c16b537SWarner Losh BYTE byte; 2320c16b537SWarner Losh BYTE nbBits; 2330c16b537SWarner Losh } nodeElt; 2340c16b537SWarner Losh 2350c16b537SWarner Losh static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) 2360c16b537SWarner Losh { 2370c16b537SWarner Losh const U32 largestBits = huffNode[lastNonNull].nbBits; 2380c16b537SWarner Losh if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ 2390c16b537SWarner Losh 2400c16b537SWarner Losh /* there are several too large elements (at least >= 2) */ 2410c16b537SWarner Losh { int totalCost = 0; 2420c16b537SWarner Losh const U32 baseCost = 1 << (largestBits - maxNbBits); 2430c16b537SWarner Losh U32 n = lastNonNull; 2440c16b537SWarner Losh 2450c16b537SWarner Losh while (huffNode[n].nbBits > maxNbBits) { 2460c16b537SWarner Losh totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); 2470c16b537SWarner Losh huffNode[n].nbBits = (BYTE)maxNbBits; 2480c16b537SWarner Losh n --; 2490c16b537SWarner Losh } /* n stops at huffNode[n].nbBits <= maxNbBits */ 2500c16b537SWarner Losh while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ 2510c16b537SWarner Losh 2520c16b537SWarner Losh /* renorm totalCost */ 2530c16b537SWarner Losh totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ 2540c16b537SWarner Losh 2550c16b537SWarner Losh /* repay normalized cost */ 2560c16b537SWarner Losh { U32 const noSymbol = 0xF0F0F0F0; 2570c16b537SWarner Losh U32 rankLast[HUF_TABLELOG_MAX+2]; 2580c16b537SWarner Losh int pos; 2590c16b537SWarner Losh 2600c16b537SWarner Losh /* Get pos of last (smallest) symbol per rank */ 2610c16b537SWarner Losh memset(rankLast, 0xF0, sizeof(rankLast)); 2620c16b537SWarner Losh { U32 currentNbBits = maxNbBits; 2630c16b537SWarner Losh for (pos=n ; pos >= 0; pos--) { 2640c16b537SWarner Losh if (huffNode[pos].nbBits >= currentNbBits) continue; 2650c16b537SWarner Losh currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ 2660c16b537SWarner Losh rankLast[maxNbBits-currentNbBits] = pos; 2670c16b537SWarner Losh } } 2680c16b537SWarner Losh 2690c16b537SWarner Losh while (totalCost > 0) { 2700c16b537SWarner Losh U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; 2710c16b537SWarner Losh for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { 2720c16b537SWarner Losh U32 highPos = rankLast[nBitsToDecrease]; 2730c16b537SWarner Losh U32 lowPos = rankLast[nBitsToDecrease-1]; 2740c16b537SWarner Losh if (highPos == noSymbol) continue; 2750c16b537SWarner Losh if (lowPos == noSymbol) break; 2760c16b537SWarner Losh { U32 const highTotal = huffNode[highPos].count; 2770c16b537SWarner Losh U32 const lowTotal = 2 * huffNode[lowPos].count; 2780c16b537SWarner Losh if (highTotal <= lowTotal) break; 2790c16b537SWarner Losh } } 2800c16b537SWarner Losh /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ 2810c16b537SWarner Losh /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ 2820c16b537SWarner Losh while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) 2830c16b537SWarner Losh nBitsToDecrease ++; 2840c16b537SWarner Losh totalCost -= 1 << (nBitsToDecrease-1); 2850c16b537SWarner Losh if (rankLast[nBitsToDecrease-1] == noSymbol) 2860c16b537SWarner Losh rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ 2870c16b537SWarner Losh huffNode[rankLast[nBitsToDecrease]].nbBits ++; 2880c16b537SWarner Losh if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ 2890c16b537SWarner Losh rankLast[nBitsToDecrease] = noSymbol; 2900c16b537SWarner Losh else { 2910c16b537SWarner Losh rankLast[nBitsToDecrease]--; 2920c16b537SWarner Losh if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) 2930c16b537SWarner Losh rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ 2940c16b537SWarner Losh } } /* while (totalCost > 0) */ 2950c16b537SWarner Losh 2960c16b537SWarner Losh while (totalCost < 0) { /* Sometimes, cost correction overshoot */ 2970c16b537SWarner Losh if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ 2980c16b537SWarner Losh while (huffNode[n].nbBits == maxNbBits) n--; 2990c16b537SWarner Losh huffNode[n+1].nbBits--; 3000c16b537SWarner Losh rankLast[1] = n+1; 3010c16b537SWarner Losh totalCost++; 3020c16b537SWarner Losh continue; 3030c16b537SWarner Losh } 3040c16b537SWarner Losh huffNode[ rankLast[1] + 1 ].nbBits--; 3050c16b537SWarner Losh rankLast[1]++; 3060c16b537SWarner Losh totalCost ++; 3070c16b537SWarner Losh } } } /* there are several too large elements (at least >= 2) */ 3080c16b537SWarner Losh 3090c16b537SWarner Losh return maxNbBits; 3100c16b537SWarner Losh } 3110c16b537SWarner Losh 3120c16b537SWarner Losh 3130c16b537SWarner Losh typedef struct { 3140c16b537SWarner Losh U32 base; 3150c16b537SWarner Losh U32 current; 3160c16b537SWarner Losh } rankPos; 3170c16b537SWarner Losh 318*a0483764SConrad Meyer static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue) 3190c16b537SWarner Losh { 3200c16b537SWarner Losh rankPos rank[32]; 3210c16b537SWarner Losh U32 n; 3220c16b537SWarner Losh 3230c16b537SWarner Losh memset(rank, 0, sizeof(rank)); 3240c16b537SWarner Losh for (n=0; n<=maxSymbolValue; n++) { 3250c16b537SWarner Losh U32 r = BIT_highbit32(count[n] + 1); 3260c16b537SWarner Losh rank[r].base ++; 3270c16b537SWarner Losh } 3280c16b537SWarner Losh for (n=30; n>0; n--) rank[n-1].base += rank[n].base; 3290c16b537SWarner Losh for (n=0; n<32; n++) rank[n].current = rank[n].base; 3300c16b537SWarner Losh for (n=0; n<=maxSymbolValue; n++) { 3310c16b537SWarner Losh U32 const c = count[n]; 3320c16b537SWarner Losh U32 const r = BIT_highbit32(c+1) + 1; 3330c16b537SWarner Losh U32 pos = rank[r].current++; 33419fcbaf1SConrad Meyer while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { 33519fcbaf1SConrad Meyer huffNode[pos] = huffNode[pos-1]; 33619fcbaf1SConrad Meyer pos--; 33719fcbaf1SConrad Meyer } 3380c16b537SWarner Losh huffNode[pos].count = c; 3390c16b537SWarner Losh huffNode[pos].byte = (BYTE)n; 3400c16b537SWarner Losh } 3410c16b537SWarner Losh } 3420c16b537SWarner Losh 3430c16b537SWarner Losh 3440c16b537SWarner Losh /** HUF_buildCTable_wksp() : 3450c16b537SWarner Losh * Same as HUF_buildCTable(), but using externally allocated scratch buffer. 34619fcbaf1SConrad Meyer * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. 3470c16b537SWarner Losh */ 3480c16b537SWarner Losh #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) 34919fcbaf1SConrad Meyer typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; 350*a0483764SConrad Meyer size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) 3510c16b537SWarner Losh { 3520c16b537SWarner Losh nodeElt* const huffNode0 = (nodeElt*)workSpace; 3530c16b537SWarner Losh nodeElt* const huffNode = huffNode0+1; 3540c16b537SWarner Losh U32 n, nonNullRank; 3550c16b537SWarner Losh int lowS, lowN; 3560c16b537SWarner Losh U16 nodeNb = STARTNODE; 3570c16b537SWarner Losh U32 nodeRoot; 3580c16b537SWarner Losh 3590c16b537SWarner Losh /* safety checks */ 36019fcbaf1SConrad Meyer if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 36119fcbaf1SConrad Meyer if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); 3620c16b537SWarner Losh if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; 36319fcbaf1SConrad Meyer if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 3640c16b537SWarner Losh memset(huffNode0, 0, sizeof(huffNodeTable)); 3650c16b537SWarner Losh 3660c16b537SWarner Losh /* sort, decreasing order */ 3670c16b537SWarner Losh HUF_sort(huffNode, count, maxSymbolValue); 3680c16b537SWarner Losh 3690c16b537SWarner Losh /* init for parents */ 3700c16b537SWarner Losh nonNullRank = maxSymbolValue; 3710c16b537SWarner Losh while(huffNode[nonNullRank].count == 0) nonNullRank--; 3720c16b537SWarner Losh lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; 3730c16b537SWarner Losh huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; 3740c16b537SWarner Losh huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; 3750c16b537SWarner Losh nodeNb++; lowS-=2; 3760c16b537SWarner Losh for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); 3770c16b537SWarner Losh huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ 3780c16b537SWarner Losh 3790c16b537SWarner Losh /* create parents */ 3800c16b537SWarner Losh while (nodeNb <= nodeRoot) { 3810c16b537SWarner Losh U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 3820c16b537SWarner Losh U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 3830c16b537SWarner Losh huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; 3840c16b537SWarner Losh huffNode[n1].parent = huffNode[n2].parent = nodeNb; 3850c16b537SWarner Losh nodeNb++; 3860c16b537SWarner Losh } 3870c16b537SWarner Losh 3880c16b537SWarner Losh /* distribute weights (unlimited tree height) */ 3890c16b537SWarner Losh huffNode[nodeRoot].nbBits = 0; 3900c16b537SWarner Losh for (n=nodeRoot-1; n>=STARTNODE; n--) 3910c16b537SWarner Losh huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 3920c16b537SWarner Losh for (n=0; n<=nonNullRank; n++) 3930c16b537SWarner Losh huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 3940c16b537SWarner Losh 3950c16b537SWarner Losh /* enforce maxTableLog */ 3960c16b537SWarner Losh maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); 3970c16b537SWarner Losh 3980c16b537SWarner Losh /* fill result into tree (val, nbBits) */ 3990c16b537SWarner Losh { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; 4000c16b537SWarner Losh U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; 4010c16b537SWarner Losh if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ 4020c16b537SWarner Losh for (n=0; n<=nonNullRank; n++) 4030c16b537SWarner Losh nbPerRank[huffNode[n].nbBits]++; 4040c16b537SWarner Losh /* determine stating value per rank */ 4050c16b537SWarner Losh { U16 min = 0; 4060c16b537SWarner Losh for (n=maxNbBits; n>0; n--) { 4070c16b537SWarner Losh valPerRank[n] = min; /* get starting value within each rank */ 4080c16b537SWarner Losh min += nbPerRank[n]; 4090c16b537SWarner Losh min >>= 1; 4100c16b537SWarner Losh } } 4110c16b537SWarner Losh for (n=0; n<=maxSymbolValue; n++) 4120c16b537SWarner Losh tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ 4130c16b537SWarner Losh for (n=0; n<=maxSymbolValue; n++) 4140c16b537SWarner Losh tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ 4150c16b537SWarner Losh } 4160c16b537SWarner Losh 4170c16b537SWarner Losh return maxNbBits; 4180c16b537SWarner Losh } 4190c16b537SWarner Losh 4200c16b537SWarner Losh /** HUF_buildCTable() : 42119fcbaf1SConrad Meyer * @return : maxNbBits 4220c16b537SWarner Losh * Note : count is used before tree is written, so they can safely overlap 4230c16b537SWarner Losh */ 424*a0483764SConrad Meyer size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) 4250c16b537SWarner Losh { 4260c16b537SWarner Losh huffNodeTable nodeTable; 4270c16b537SWarner Losh return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); 4280c16b537SWarner Losh } 4290c16b537SWarner Losh 4300c16b537SWarner Losh static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) 4310c16b537SWarner Losh { 4320c16b537SWarner Losh size_t nbBits = 0; 4330c16b537SWarner Losh int s; 4340c16b537SWarner Losh for (s = 0; s <= (int)maxSymbolValue; ++s) { 4350c16b537SWarner Losh nbBits += CTable[s].nbBits * count[s]; 4360c16b537SWarner Losh } 4370c16b537SWarner Losh return nbBits >> 3; 4380c16b537SWarner Losh } 4390c16b537SWarner Losh 4400c16b537SWarner Losh static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { 4410c16b537SWarner Losh int bad = 0; 4420c16b537SWarner Losh int s; 4430c16b537SWarner Losh for (s = 0; s <= (int)maxSymbolValue; ++s) { 4440c16b537SWarner Losh bad |= (count[s] != 0) & (CTable[s].nbBits == 0); 4450c16b537SWarner Losh } 4460c16b537SWarner Losh return !bad; 4470c16b537SWarner Losh } 4480c16b537SWarner Losh 44919fcbaf1SConrad Meyer size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } 45019fcbaf1SConrad Meyer 45119fcbaf1SConrad Meyer FORCE_INLINE_TEMPLATE void 45219fcbaf1SConrad Meyer HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) 4530c16b537SWarner Losh { 4540c16b537SWarner Losh BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); 4550c16b537SWarner Losh } 4560c16b537SWarner Losh 4570c16b537SWarner Losh #define HUF_FLUSHBITS(s) BIT_flushBits(s) 4580c16b537SWarner Losh 4590c16b537SWarner Losh #define HUF_FLUSHBITS_1(stream) \ 4600c16b537SWarner Losh if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) 4610c16b537SWarner Losh 4620c16b537SWarner Losh #define HUF_FLUSHBITS_2(stream) \ 4630c16b537SWarner Losh if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) 4640c16b537SWarner Losh 46519fcbaf1SConrad Meyer FORCE_INLINE_TEMPLATE size_t 46619fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, 46719fcbaf1SConrad Meyer const void* src, size_t srcSize, 46819fcbaf1SConrad Meyer const HUF_CElt* CTable) 4690c16b537SWarner Losh { 4700c16b537SWarner Losh const BYTE* ip = (const BYTE*) src; 4710c16b537SWarner Losh BYTE* const ostart = (BYTE*)dst; 4720c16b537SWarner Losh BYTE* const oend = ostart + dstSize; 4730c16b537SWarner Losh BYTE* op = ostart; 4740c16b537SWarner Losh size_t n; 4750c16b537SWarner Losh BIT_CStream_t bitC; 4760c16b537SWarner Losh 4770c16b537SWarner Losh /* init */ 4780c16b537SWarner Losh if (dstSize < 8) return 0; /* not enough space to compress */ 4790c16b537SWarner Losh { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); 4800c16b537SWarner Losh if (HUF_isError(initErr)) return 0; } 4810c16b537SWarner Losh 4820c16b537SWarner Losh n = srcSize & ~3; /* join to mod 4 */ 4830c16b537SWarner Losh switch (srcSize & 3) 4840c16b537SWarner Losh { 4850c16b537SWarner Losh case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); 4860c16b537SWarner Losh HUF_FLUSHBITS_2(&bitC); 4870c16b537SWarner Losh /* fall-through */ 4880c16b537SWarner Losh case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); 4890c16b537SWarner Losh HUF_FLUSHBITS_1(&bitC); 4900c16b537SWarner Losh /* fall-through */ 4910c16b537SWarner Losh case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); 4920c16b537SWarner Losh HUF_FLUSHBITS(&bitC); 4930c16b537SWarner Losh /* fall-through */ 4940c16b537SWarner Losh case 0 : /* fall-through */ 4950c16b537SWarner Losh default: break; 4960c16b537SWarner Losh } 4970c16b537SWarner Losh 4980c16b537SWarner Losh for (; n>0; n-=4) { /* note : n&3==0 at this stage */ 4990c16b537SWarner Losh HUF_encodeSymbol(&bitC, ip[n- 1], CTable); 5000c16b537SWarner Losh HUF_FLUSHBITS_1(&bitC); 5010c16b537SWarner Losh HUF_encodeSymbol(&bitC, ip[n- 2], CTable); 5020c16b537SWarner Losh HUF_FLUSHBITS_2(&bitC); 5030c16b537SWarner Losh HUF_encodeSymbol(&bitC, ip[n- 3], CTable); 5040c16b537SWarner Losh HUF_FLUSHBITS_1(&bitC); 5050c16b537SWarner Losh HUF_encodeSymbol(&bitC, ip[n- 4], CTable); 5060c16b537SWarner Losh HUF_FLUSHBITS(&bitC); 5070c16b537SWarner Losh } 5080c16b537SWarner Losh 5090c16b537SWarner Losh return BIT_closeCStream(&bitC); 5100c16b537SWarner Losh } 5110c16b537SWarner Losh 51219fcbaf1SConrad Meyer #if DYNAMIC_BMI2 5130c16b537SWarner Losh 51419fcbaf1SConrad Meyer static TARGET_ATTRIBUTE("bmi2") size_t 51519fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, 51619fcbaf1SConrad Meyer const void* src, size_t srcSize, 51719fcbaf1SConrad Meyer const HUF_CElt* CTable) 51819fcbaf1SConrad Meyer { 51919fcbaf1SConrad Meyer return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 52019fcbaf1SConrad Meyer } 52119fcbaf1SConrad Meyer 52219fcbaf1SConrad Meyer static size_t 52319fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, 52419fcbaf1SConrad Meyer const void* src, size_t srcSize, 52519fcbaf1SConrad Meyer const HUF_CElt* CTable) 52619fcbaf1SConrad Meyer { 52719fcbaf1SConrad Meyer return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 52819fcbaf1SConrad Meyer } 52919fcbaf1SConrad Meyer 53019fcbaf1SConrad Meyer static size_t 53119fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 53219fcbaf1SConrad Meyer const void* src, size_t srcSize, 53319fcbaf1SConrad Meyer const HUF_CElt* CTable, const int bmi2) 53419fcbaf1SConrad Meyer { 53519fcbaf1SConrad Meyer if (bmi2) { 53619fcbaf1SConrad Meyer return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); 53719fcbaf1SConrad Meyer } 53819fcbaf1SConrad Meyer return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); 53919fcbaf1SConrad Meyer } 54019fcbaf1SConrad Meyer 54119fcbaf1SConrad Meyer #else 54219fcbaf1SConrad Meyer 54319fcbaf1SConrad Meyer static size_t 54419fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 54519fcbaf1SConrad Meyer const void* src, size_t srcSize, 54619fcbaf1SConrad Meyer const HUF_CElt* CTable, const int bmi2) 54719fcbaf1SConrad Meyer { 54819fcbaf1SConrad Meyer (void)bmi2; 54919fcbaf1SConrad Meyer return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 55019fcbaf1SConrad Meyer } 55119fcbaf1SConrad Meyer 55219fcbaf1SConrad Meyer #endif 55319fcbaf1SConrad Meyer 55419fcbaf1SConrad Meyer size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 55519fcbaf1SConrad Meyer { 55619fcbaf1SConrad Meyer return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 55719fcbaf1SConrad Meyer } 55819fcbaf1SConrad Meyer 55919fcbaf1SConrad Meyer 56019fcbaf1SConrad Meyer static size_t 56119fcbaf1SConrad Meyer HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, 56219fcbaf1SConrad Meyer const void* src, size_t srcSize, 56319fcbaf1SConrad Meyer const HUF_CElt* CTable, int bmi2) 5640c16b537SWarner Losh { 5650c16b537SWarner Losh size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ 5660c16b537SWarner Losh const BYTE* ip = (const BYTE*) src; 5670c16b537SWarner Losh const BYTE* const iend = ip + srcSize; 5680c16b537SWarner Losh BYTE* const ostart = (BYTE*) dst; 5690c16b537SWarner Losh BYTE* const oend = ostart + dstSize; 5700c16b537SWarner Losh BYTE* op = ostart; 5710c16b537SWarner Losh 5720c16b537SWarner Losh if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ 5730c16b537SWarner Losh if (srcSize < 12) return 0; /* no saving possible : too small input */ 5740c16b537SWarner Losh op += 6; /* jumpTable */ 5750c16b537SWarner Losh 57619fcbaf1SConrad Meyer { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); 5770c16b537SWarner Losh if (cSize==0) return 0; 57819fcbaf1SConrad Meyer assert(cSize <= 65535); 5790c16b537SWarner Losh MEM_writeLE16(ostart, (U16)cSize); 5800c16b537SWarner Losh op += cSize; 5810c16b537SWarner Losh } 5820c16b537SWarner Losh 5830c16b537SWarner Losh ip += segmentSize; 58419fcbaf1SConrad Meyer { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); 5850c16b537SWarner Losh if (cSize==0) return 0; 58619fcbaf1SConrad Meyer assert(cSize <= 65535); 5870c16b537SWarner Losh MEM_writeLE16(ostart+2, (U16)cSize); 5880c16b537SWarner Losh op += cSize; 5890c16b537SWarner Losh } 5900c16b537SWarner Losh 5910c16b537SWarner Losh ip += segmentSize; 59219fcbaf1SConrad Meyer { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); 5930c16b537SWarner Losh if (cSize==0) return 0; 59419fcbaf1SConrad Meyer assert(cSize <= 65535); 5950c16b537SWarner Losh MEM_writeLE16(ostart+4, (U16)cSize); 5960c16b537SWarner Losh op += cSize; 5970c16b537SWarner Losh } 5980c16b537SWarner Losh 5990c16b537SWarner Losh ip += segmentSize; 60019fcbaf1SConrad Meyer { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); 6010c16b537SWarner Losh if (cSize==0) return 0; 6020c16b537SWarner Losh op += cSize; 6030c16b537SWarner Losh } 6040c16b537SWarner Losh 6050c16b537SWarner Losh return op-ostart; 6060c16b537SWarner Losh } 6070c16b537SWarner Losh 60819fcbaf1SConrad Meyer size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 60919fcbaf1SConrad Meyer { 61019fcbaf1SConrad Meyer return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 61119fcbaf1SConrad Meyer } 61219fcbaf1SConrad Meyer 613*a0483764SConrad Meyer typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; 6140c16b537SWarner Losh 6150c16b537SWarner Losh static size_t HUF_compressCTable_internal( 6160c16b537SWarner Losh BYTE* const ostart, BYTE* op, BYTE* const oend, 6170c16b537SWarner Losh const void* src, size_t srcSize, 618*a0483764SConrad Meyer HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) 6190c16b537SWarner Losh { 620*a0483764SConrad Meyer size_t const cSize = (nbStreams==HUF_singleStream) ? 62119fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : 62219fcbaf1SConrad Meyer HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); 6230c16b537SWarner Losh if (HUF_isError(cSize)) { return cSize; } 6240c16b537SWarner Losh if (cSize==0) { return 0; } /* uncompressible */ 6250c16b537SWarner Losh op += cSize; 6260c16b537SWarner Losh /* check compressibility */ 6270c16b537SWarner Losh if ((size_t)(op-ostart) >= srcSize-1) { return 0; } 6280c16b537SWarner Losh return op-ostart; 6290c16b537SWarner Losh } 6300c16b537SWarner Losh 63119fcbaf1SConrad Meyer typedef struct { 632*a0483764SConrad Meyer unsigned count[HUF_SYMBOLVALUE_MAX + 1]; 63319fcbaf1SConrad Meyer HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; 63419fcbaf1SConrad Meyer huffNodeTable nodeTable; 63519fcbaf1SConrad Meyer } HUF_compress_tables_t; 6360c16b537SWarner Losh 63719fcbaf1SConrad Meyer /* HUF_compress_internal() : 63819fcbaf1SConrad Meyer * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ 639*a0483764SConrad Meyer static size_t 640*a0483764SConrad Meyer HUF_compress_internal (void* dst, size_t dstSize, 6410c16b537SWarner Losh const void* src, size_t srcSize, 6420c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog, 643*a0483764SConrad Meyer HUF_nbStreams_e nbStreams, 6440c16b537SWarner Losh void* workSpace, size_t wkspSize, 64519fcbaf1SConrad Meyer HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, 64619fcbaf1SConrad Meyer const int bmi2) 6470c16b537SWarner Losh { 64819fcbaf1SConrad Meyer HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; 6490c16b537SWarner Losh BYTE* const ostart = (BYTE*)dst; 6500c16b537SWarner Losh BYTE* const oend = ostart + dstSize; 6510c16b537SWarner Losh BYTE* op = ostart; 6520c16b537SWarner Losh 6530c16b537SWarner Losh /* checks & inits */ 65419fcbaf1SConrad Meyer if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 655*a0483764SConrad Meyer if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); 65619fcbaf1SConrad Meyer if (!srcSize) return 0; /* Uncompressed */ 65719fcbaf1SConrad Meyer if (!dstSize) return 0; /* cannot fit anything within dst budget */ 6580c16b537SWarner Losh if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ 6590c16b537SWarner Losh if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 66019fcbaf1SConrad Meyer if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 6610c16b537SWarner Losh if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; 6620c16b537SWarner Losh if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; 6630c16b537SWarner Losh 66419fcbaf1SConrad Meyer /* Heuristic : If old table is valid, use it for small inputs */ 6650c16b537SWarner Losh if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { 66619fcbaf1SConrad Meyer return HUF_compressCTable_internal(ostart, op, oend, 66719fcbaf1SConrad Meyer src, srcSize, 668*a0483764SConrad Meyer nbStreams, oldHufTable, bmi2); 6690c16b537SWarner Losh } 6700c16b537SWarner Losh 6710c16b537SWarner Losh /* Scan input and build symbol stats */ 672*a0483764SConrad Meyer { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) ); 6730c16b537SWarner Losh if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ 6740f743729SConrad Meyer if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ 6750c16b537SWarner Losh } 6760c16b537SWarner Losh 6770c16b537SWarner Losh /* Check validity of previous table */ 67819fcbaf1SConrad Meyer if ( repeat 67919fcbaf1SConrad Meyer && *repeat == HUF_repeat_check 68019fcbaf1SConrad Meyer && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { 6810c16b537SWarner Losh *repeat = HUF_repeat_none; 6820c16b537SWarner Losh } 6830c16b537SWarner Losh /* Heuristic : use existing table for small inputs */ 6840c16b537SWarner Losh if (preferRepeat && repeat && *repeat != HUF_repeat_none) { 68519fcbaf1SConrad Meyer return HUF_compressCTable_internal(ostart, op, oend, 68619fcbaf1SConrad Meyer src, srcSize, 687*a0483764SConrad Meyer nbStreams, oldHufTable, bmi2); 6880c16b537SWarner Losh } 6890c16b537SWarner Losh 6900c16b537SWarner Losh /* Build Huffman Tree */ 6910c16b537SWarner Losh huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); 692*a0483764SConrad Meyer { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, 69319fcbaf1SConrad Meyer maxSymbolValue, huffLog, 694*a0483764SConrad Meyer table->nodeTable, sizeof(table->nodeTable)); 695*a0483764SConrad Meyer CHECK_F(maxBits); 6960c16b537SWarner Losh huffLog = (U32)maxBits; 69719fcbaf1SConrad Meyer /* Zero unused symbols in CTable, so we can check it for validity */ 69819fcbaf1SConrad Meyer memset(table->CTable + (maxSymbolValue + 1), 0, 69919fcbaf1SConrad Meyer sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); 7000c16b537SWarner Losh } 7010c16b537SWarner Losh 7020c16b537SWarner Losh /* Write table description header */ 70319fcbaf1SConrad Meyer { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); 70419fcbaf1SConrad Meyer /* Check if using previous huffman table is beneficial */ 7050c16b537SWarner Losh if (repeat && *repeat != HUF_repeat_none) { 70619fcbaf1SConrad Meyer size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); 70719fcbaf1SConrad Meyer size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); 7080c16b537SWarner Losh if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { 70919fcbaf1SConrad Meyer return HUF_compressCTable_internal(ostart, op, oend, 71019fcbaf1SConrad Meyer src, srcSize, 711*a0483764SConrad Meyer nbStreams, oldHufTable, bmi2); 71219fcbaf1SConrad Meyer } } 71319fcbaf1SConrad Meyer 71419fcbaf1SConrad Meyer /* Use the new huffman table */ 7150c16b537SWarner Losh if (hSize + 12ul >= srcSize) { return 0; } 7160c16b537SWarner Losh op += hSize; 7170c16b537SWarner Losh if (repeat) { *repeat = HUF_repeat_none; } 71819fcbaf1SConrad Meyer if (oldHufTable) 71919fcbaf1SConrad Meyer memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ 7200c16b537SWarner Losh } 72119fcbaf1SConrad Meyer return HUF_compressCTable_internal(ostart, op, oend, 72219fcbaf1SConrad Meyer src, srcSize, 723*a0483764SConrad Meyer nbStreams, table->CTable, bmi2); 7240c16b537SWarner Losh } 7250c16b537SWarner Losh 7260c16b537SWarner Losh 7270c16b537SWarner Losh size_t HUF_compress1X_wksp (void* dst, size_t dstSize, 7280c16b537SWarner Losh const void* src, size_t srcSize, 7290c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog, 7300c16b537SWarner Losh void* workSpace, size_t wkspSize) 7310c16b537SWarner Losh { 73219fcbaf1SConrad Meyer return HUF_compress_internal(dst, dstSize, src, srcSize, 733*a0483764SConrad Meyer maxSymbolValue, huffLog, HUF_singleStream, 73419fcbaf1SConrad Meyer workSpace, wkspSize, 73519fcbaf1SConrad Meyer NULL, NULL, 0, 0 /*bmi2*/); 7360c16b537SWarner Losh } 7370c16b537SWarner Losh 7380c16b537SWarner Losh size_t HUF_compress1X_repeat (void* dst, size_t dstSize, 7390c16b537SWarner Losh const void* src, size_t srcSize, 7400c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog, 7410c16b537SWarner Losh void* workSpace, size_t wkspSize, 74219fcbaf1SConrad Meyer HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) 7430c16b537SWarner Losh { 74419fcbaf1SConrad Meyer return HUF_compress_internal(dst, dstSize, src, srcSize, 745*a0483764SConrad Meyer maxSymbolValue, huffLog, HUF_singleStream, 74619fcbaf1SConrad Meyer workSpace, wkspSize, hufTable, 74719fcbaf1SConrad Meyer repeat, preferRepeat, bmi2); 7480c16b537SWarner Losh } 7490c16b537SWarner Losh 7500c16b537SWarner Losh size_t HUF_compress1X (void* dst, size_t dstSize, 7510c16b537SWarner Losh const void* src, size_t srcSize, 7520c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog) 7530c16b537SWarner Losh { 75419fcbaf1SConrad Meyer unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; 7550c16b537SWarner Losh return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); 7560c16b537SWarner Losh } 7570c16b537SWarner Losh 75819fcbaf1SConrad Meyer /* HUF_compress4X_repeat(): 75919fcbaf1SConrad Meyer * compress input using 4 streams. 76019fcbaf1SConrad Meyer * provide workspace to generate compression tables */ 7610c16b537SWarner Losh size_t HUF_compress4X_wksp (void* dst, size_t dstSize, 7620c16b537SWarner Losh const void* src, size_t srcSize, 7630c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog, 7640c16b537SWarner Losh void* workSpace, size_t wkspSize) 7650c16b537SWarner Losh { 76619fcbaf1SConrad Meyer return HUF_compress_internal(dst, dstSize, src, srcSize, 767*a0483764SConrad Meyer maxSymbolValue, huffLog, HUF_fourStreams, 76819fcbaf1SConrad Meyer workSpace, wkspSize, 76919fcbaf1SConrad Meyer NULL, NULL, 0, 0 /*bmi2*/); 7700c16b537SWarner Losh } 7710c16b537SWarner Losh 77219fcbaf1SConrad Meyer /* HUF_compress4X_repeat(): 77319fcbaf1SConrad Meyer * compress input using 4 streams. 77419fcbaf1SConrad Meyer * re-use an existing huffman compression table */ 7750c16b537SWarner Losh size_t HUF_compress4X_repeat (void* dst, size_t dstSize, 7760c16b537SWarner Losh const void* src, size_t srcSize, 7770c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog, 7780c16b537SWarner Losh void* workSpace, size_t wkspSize, 77919fcbaf1SConrad Meyer HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) 7800c16b537SWarner Losh { 78119fcbaf1SConrad Meyer return HUF_compress_internal(dst, dstSize, src, srcSize, 782*a0483764SConrad Meyer maxSymbolValue, huffLog, HUF_fourStreams, 78319fcbaf1SConrad Meyer workSpace, wkspSize, 78419fcbaf1SConrad Meyer hufTable, repeat, preferRepeat, bmi2); 7850c16b537SWarner Losh } 7860c16b537SWarner Losh 7870c16b537SWarner Losh size_t HUF_compress2 (void* dst, size_t dstSize, 7880c16b537SWarner Losh const void* src, size_t srcSize, 7890c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog) 7900c16b537SWarner Losh { 79119fcbaf1SConrad Meyer unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; 7920c16b537SWarner Losh return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); 7930c16b537SWarner Losh } 7940c16b537SWarner Losh 7950c16b537SWarner Losh size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) 7960c16b537SWarner Losh { 79719fcbaf1SConrad Meyer return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); 7980c16b537SWarner Losh } 799