10c16b537SWarner Losh /* ****************************************************************** 20c16b537SWarner Losh Huffman encoder, part of New Generation Entropy library 30c16b537SWarner Losh Copyright (C) 2013-2016, Yann Collet. 40c16b537SWarner Losh 50c16b537SWarner Losh BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 60c16b537SWarner Losh 70c16b537SWarner Losh Redistribution and use in source and binary forms, with or without 80c16b537SWarner Losh modification, are permitted provided that the following conditions are 90c16b537SWarner Losh met: 100c16b537SWarner Losh 110c16b537SWarner Losh * Redistributions of source code must retain the above copyright 120c16b537SWarner Losh notice, this list of conditions and the following disclaimer. 130c16b537SWarner Losh * Redistributions in binary form must reproduce the above 140c16b537SWarner Losh copyright notice, this list of conditions and the following disclaimer 150c16b537SWarner Losh in the documentation and/or other materials provided with the 160c16b537SWarner Losh distribution. 170c16b537SWarner Losh 180c16b537SWarner Losh THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 190c16b537SWarner Losh "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 200c16b537SWarner Losh LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 210c16b537SWarner Losh A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 220c16b537SWarner Losh OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 230c16b537SWarner Losh SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 240c16b537SWarner Losh LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 250c16b537SWarner Losh DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 260c16b537SWarner Losh THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 270c16b537SWarner Losh (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 280c16b537SWarner Losh OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 290c16b537SWarner Losh 300c16b537SWarner Losh You can contact the author at : 310c16b537SWarner Losh - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 320c16b537SWarner Losh - Public forum : https://groups.google.com/forum/#!forum/lz4c 330c16b537SWarner Losh ****************************************************************** */ 340c16b537SWarner Losh 350c16b537SWarner Losh /* ************************************************************** 360c16b537SWarner Losh * Compiler specifics 370c16b537SWarner Losh ****************************************************************/ 380c16b537SWarner Losh #ifdef _MSC_VER /* Visual Studio */ 390c16b537SWarner Losh # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 400c16b537SWarner Losh #endif 410c16b537SWarner Losh 420c16b537SWarner Losh 430c16b537SWarner Losh /* ************************************************************** 440c16b537SWarner Losh * Includes 450c16b537SWarner Losh ****************************************************************/ 460c16b537SWarner Losh #include <string.h> /* memcpy, memset */ 470c16b537SWarner Losh #include <stdio.h> /* printf (debug) */ 4819fcbaf1SConrad Meyer #include "compiler.h" 49*0f743729SConrad Meyer #include "bitstream.h" 50*0f743729SConrad Meyer #include "hist.h" 510c16b537SWarner Losh #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ 520c16b537SWarner Losh #include "fse.h" /* header compression */ 530c16b537SWarner Losh #define HUF_STATIC_LINKING_ONLY 540c16b537SWarner Losh #include "huf.h" 550c16b537SWarner Losh #include "error_private.h" 560c16b537SWarner Losh 570c16b537SWarner Losh 580c16b537SWarner Losh /* ************************************************************** 590c16b537SWarner Losh * Error Management 600c16b537SWarner Losh ****************************************************************/ 610c16b537SWarner Losh #define HUF_isError ERR_isError 62*0f743729SConrad Meyer #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ 630c16b537SWarner Losh #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e 640c16b537SWarner Losh #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } 650c16b537SWarner Losh 660c16b537SWarner Losh 670c16b537SWarner Losh /* ************************************************************** 680c16b537SWarner Losh * Utils 690c16b537SWarner Losh ****************************************************************/ 700c16b537SWarner Losh unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) 710c16b537SWarner Losh { 720c16b537SWarner Losh return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); 730c16b537SWarner Losh } 740c16b537SWarner Losh 750c16b537SWarner Losh 760c16b537SWarner Losh /* ******************************************************* 770c16b537SWarner Losh * HUF : Huffman block compression 780c16b537SWarner Losh *********************************************************/ 790c16b537SWarner Losh /* HUF_compressWeights() : 800c16b537SWarner Losh * Same as FSE_compress(), but dedicated to huff0's weights compression. 810c16b537SWarner Losh * The use case needs much less stack memory. 820c16b537SWarner Losh * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. 830c16b537SWarner Losh */ 840c16b537SWarner Losh #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 85*0f743729SConrad Meyer static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) 860c16b537SWarner Losh { 870c16b537SWarner Losh BYTE* const ostart = (BYTE*) dst; 880c16b537SWarner Losh BYTE* op = ostart; 890c16b537SWarner Losh BYTE* const oend = ostart + dstSize; 900c16b537SWarner Losh 910c16b537SWarner Losh U32 maxSymbolValue = HUF_TABLELOG_MAX; 920c16b537SWarner Losh U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; 930c16b537SWarner Losh 940c16b537SWarner Losh FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; 950c16b537SWarner Losh BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER]; 960c16b537SWarner Losh 970c16b537SWarner Losh U32 count[HUF_TABLELOG_MAX+1]; 980c16b537SWarner Losh S16 norm[HUF_TABLELOG_MAX+1]; 990c16b537SWarner Losh 1000c16b537SWarner Losh /* init conditions */ 1010c16b537SWarner Losh if (wtSize <= 1) return 0; /* Not compressible */ 1020c16b537SWarner Losh 1030c16b537SWarner Losh /* Scan input and build symbol stats */ 104*0f743729SConrad Meyer { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ 1050c16b537SWarner Losh if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ 1060c16b537SWarner Losh if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ 1070c16b537SWarner Losh } 1080c16b537SWarner Losh 1090c16b537SWarner Losh tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); 1100c16b537SWarner Losh CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); 1110c16b537SWarner Losh 1120c16b537SWarner Losh /* Write table description header */ 1130c16b537SWarner Losh { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); 1140c16b537SWarner Losh op += hSize; 1150c16b537SWarner Losh } 1160c16b537SWarner Losh 1170c16b537SWarner Losh /* Compress */ 1180c16b537SWarner Losh CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); 1190c16b537SWarner Losh { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); 1200c16b537SWarner Losh if (cSize == 0) return 0; /* not enough space for compressed data */ 1210c16b537SWarner Losh op += cSize; 1220c16b537SWarner Losh } 1230c16b537SWarner Losh 1240c16b537SWarner Losh return op-ostart; 1250c16b537SWarner Losh } 1260c16b537SWarner Losh 1270c16b537SWarner Losh 1280c16b537SWarner Losh struct HUF_CElt_s { 1290c16b537SWarner Losh U16 val; 1300c16b537SWarner Losh BYTE nbBits; 1310c16b537SWarner Losh }; /* typedef'd to HUF_CElt within "huf.h" */ 1320c16b537SWarner Losh 1330c16b537SWarner Losh /*! HUF_writeCTable() : 1340c16b537SWarner Losh `CTable` : Huffman tree to save, using huf representation. 1350c16b537SWarner Losh @return : size of saved CTable */ 1360c16b537SWarner Losh size_t HUF_writeCTable (void* dst, size_t maxDstSize, 1370c16b537SWarner Losh const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) 1380c16b537SWarner Losh { 1390c16b537SWarner Losh BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ 1400c16b537SWarner Losh BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; 1410c16b537SWarner Losh BYTE* op = (BYTE*)dst; 1420c16b537SWarner Losh U32 n; 1430c16b537SWarner Losh 1440c16b537SWarner Losh /* check conditions */ 1450c16b537SWarner Losh if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 1460c16b537SWarner Losh 1470c16b537SWarner Losh /* convert to weight */ 1480c16b537SWarner Losh bitsToWeight[0] = 0; 1490c16b537SWarner Losh for (n=1; n<huffLog+1; n++) 1500c16b537SWarner Losh bitsToWeight[n] = (BYTE)(huffLog + 1 - n); 1510c16b537SWarner Losh for (n=0; n<maxSymbolValue; n++) 1520c16b537SWarner Losh huffWeight[n] = bitsToWeight[CTable[n].nbBits]; 1530c16b537SWarner Losh 1540c16b537SWarner Losh /* attempt weights compression by FSE */ 1550c16b537SWarner Losh { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); 1560c16b537SWarner Losh if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ 1570c16b537SWarner Losh op[0] = (BYTE)hSize; 1580c16b537SWarner Losh return hSize+1; 1590c16b537SWarner Losh } } 1600c16b537SWarner Losh 1610c16b537SWarner Losh /* write raw values as 4-bits (max : 15) */ 1620c16b537SWarner Losh if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ 1630c16b537SWarner Losh if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ 1640c16b537SWarner Losh op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); 1650c16b537SWarner Losh huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ 1660c16b537SWarner Losh for (n=0; n<maxSymbolValue; n+=2) 1670c16b537SWarner Losh op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); 1680c16b537SWarner Losh return ((maxSymbolValue+1)/2) + 1; 1690c16b537SWarner Losh } 1700c16b537SWarner Losh 1710c16b537SWarner Losh 1720c16b537SWarner Losh size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src, size_t srcSize) 1730c16b537SWarner Losh { 1740c16b537SWarner Losh BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ 1750c16b537SWarner Losh U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ 1760c16b537SWarner Losh U32 tableLog = 0; 1770c16b537SWarner Losh U32 nbSymbols = 0; 1780c16b537SWarner Losh 1790c16b537SWarner Losh /* get symbol weights */ 1800c16b537SWarner Losh CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); 1810c16b537SWarner Losh 1820c16b537SWarner Losh /* check result */ 1830c16b537SWarner Losh if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 1840c16b537SWarner Losh if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); 1850c16b537SWarner Losh 1860c16b537SWarner Losh /* Prepare base value per rank */ 1870c16b537SWarner Losh { U32 n, nextRankStart = 0; 1880c16b537SWarner Losh for (n=1; n<=tableLog; n++) { 1890c16b537SWarner Losh U32 current = nextRankStart; 1900c16b537SWarner Losh nextRankStart += (rankVal[n] << (n-1)); 1910c16b537SWarner Losh rankVal[n] = current; 1920c16b537SWarner Losh } } 1930c16b537SWarner Losh 1940c16b537SWarner Losh /* fill nbBits */ 1950c16b537SWarner Losh { U32 n; for (n=0; n<nbSymbols; n++) { 1960c16b537SWarner Losh const U32 w = huffWeight[n]; 1970c16b537SWarner Losh CTable[n].nbBits = (BYTE)(tableLog + 1 - w); 1980c16b537SWarner Losh } } 1990c16b537SWarner Losh 2000c16b537SWarner Losh /* fill val */ 2010c16b537SWarner Losh { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ 2020c16b537SWarner Losh U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; 2030c16b537SWarner Losh { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; } 2040c16b537SWarner Losh /* determine stating value per rank */ 2050c16b537SWarner Losh valPerRank[tableLog+1] = 0; /* for w==0 */ 2060c16b537SWarner Losh { U16 min = 0; 2070c16b537SWarner Losh U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ 2080c16b537SWarner Losh valPerRank[n] = min; /* get starting value within each rank */ 2090c16b537SWarner Losh min += nbPerRank[n]; 2100c16b537SWarner Losh min >>= 1; 2110c16b537SWarner Losh } } 2120c16b537SWarner Losh /* assign value within rank, symbol order */ 2130c16b537SWarner Losh { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } 2140c16b537SWarner Losh } 2150c16b537SWarner Losh 2160c16b537SWarner Losh *maxSymbolValuePtr = nbSymbols - 1; 2170c16b537SWarner Losh return readSize; 2180c16b537SWarner Losh } 2190c16b537SWarner Losh 220*0f743729SConrad Meyer U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue) 221*0f743729SConrad Meyer { 222*0f743729SConrad Meyer const HUF_CElt* table = (const HUF_CElt*)symbolTable; 223*0f743729SConrad Meyer assert(symbolValue <= HUF_SYMBOLVALUE_MAX); 224*0f743729SConrad Meyer return table[symbolValue].nbBits; 225*0f743729SConrad Meyer } 226*0f743729SConrad Meyer 2270c16b537SWarner Losh 2280c16b537SWarner Losh typedef struct nodeElt_s { 2290c16b537SWarner Losh U32 count; 2300c16b537SWarner Losh U16 parent; 2310c16b537SWarner Losh BYTE byte; 2320c16b537SWarner Losh BYTE nbBits; 2330c16b537SWarner Losh } nodeElt; 2340c16b537SWarner Losh 2350c16b537SWarner Losh static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) 2360c16b537SWarner Losh { 2370c16b537SWarner Losh const U32 largestBits = huffNode[lastNonNull].nbBits; 2380c16b537SWarner Losh if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ 2390c16b537SWarner Losh 2400c16b537SWarner Losh /* there are several too large elements (at least >= 2) */ 2410c16b537SWarner Losh { int totalCost = 0; 2420c16b537SWarner Losh const U32 baseCost = 1 << (largestBits - maxNbBits); 2430c16b537SWarner Losh U32 n = lastNonNull; 2440c16b537SWarner Losh 2450c16b537SWarner Losh while (huffNode[n].nbBits > maxNbBits) { 2460c16b537SWarner Losh totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); 2470c16b537SWarner Losh huffNode[n].nbBits = (BYTE)maxNbBits; 2480c16b537SWarner Losh n --; 2490c16b537SWarner Losh } /* n stops at huffNode[n].nbBits <= maxNbBits */ 2500c16b537SWarner Losh while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ 2510c16b537SWarner Losh 2520c16b537SWarner Losh /* renorm totalCost */ 2530c16b537SWarner Losh totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ 2540c16b537SWarner Losh 2550c16b537SWarner Losh /* repay normalized cost */ 2560c16b537SWarner Losh { U32 const noSymbol = 0xF0F0F0F0; 2570c16b537SWarner Losh U32 rankLast[HUF_TABLELOG_MAX+2]; 2580c16b537SWarner Losh int pos; 2590c16b537SWarner Losh 2600c16b537SWarner Losh /* Get pos of last (smallest) symbol per rank */ 2610c16b537SWarner Losh memset(rankLast, 0xF0, sizeof(rankLast)); 2620c16b537SWarner Losh { U32 currentNbBits = maxNbBits; 2630c16b537SWarner Losh for (pos=n ; pos >= 0; pos--) { 2640c16b537SWarner Losh if (huffNode[pos].nbBits >= currentNbBits) continue; 2650c16b537SWarner Losh currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ 2660c16b537SWarner Losh rankLast[maxNbBits-currentNbBits] = pos; 2670c16b537SWarner Losh } } 2680c16b537SWarner Losh 2690c16b537SWarner Losh while (totalCost > 0) { 2700c16b537SWarner Losh U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; 2710c16b537SWarner Losh for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { 2720c16b537SWarner Losh U32 highPos = rankLast[nBitsToDecrease]; 2730c16b537SWarner Losh U32 lowPos = rankLast[nBitsToDecrease-1]; 2740c16b537SWarner Losh if (highPos == noSymbol) continue; 2750c16b537SWarner Losh if (lowPos == noSymbol) break; 2760c16b537SWarner Losh { U32 const highTotal = huffNode[highPos].count; 2770c16b537SWarner Losh U32 const lowTotal = 2 * huffNode[lowPos].count; 2780c16b537SWarner Losh if (highTotal <= lowTotal) break; 2790c16b537SWarner Losh } } 2800c16b537SWarner Losh /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ 2810c16b537SWarner Losh /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ 2820c16b537SWarner Losh while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) 2830c16b537SWarner Losh nBitsToDecrease ++; 2840c16b537SWarner Losh totalCost -= 1 << (nBitsToDecrease-1); 2850c16b537SWarner Losh if (rankLast[nBitsToDecrease-1] == noSymbol) 2860c16b537SWarner Losh rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ 2870c16b537SWarner Losh huffNode[rankLast[nBitsToDecrease]].nbBits ++; 2880c16b537SWarner Losh if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ 2890c16b537SWarner Losh rankLast[nBitsToDecrease] = noSymbol; 2900c16b537SWarner Losh else { 2910c16b537SWarner Losh rankLast[nBitsToDecrease]--; 2920c16b537SWarner Losh if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) 2930c16b537SWarner Losh rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ 2940c16b537SWarner Losh } } /* while (totalCost > 0) */ 2950c16b537SWarner Losh 2960c16b537SWarner Losh while (totalCost < 0) { /* Sometimes, cost correction overshoot */ 2970c16b537SWarner Losh if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ 2980c16b537SWarner Losh while (huffNode[n].nbBits == maxNbBits) n--; 2990c16b537SWarner Losh huffNode[n+1].nbBits--; 3000c16b537SWarner Losh rankLast[1] = n+1; 3010c16b537SWarner Losh totalCost++; 3020c16b537SWarner Losh continue; 3030c16b537SWarner Losh } 3040c16b537SWarner Losh huffNode[ rankLast[1] + 1 ].nbBits--; 3050c16b537SWarner Losh rankLast[1]++; 3060c16b537SWarner Losh totalCost ++; 3070c16b537SWarner Losh } } } /* there are several too large elements (at least >= 2) */ 3080c16b537SWarner Losh 3090c16b537SWarner Losh return maxNbBits; 3100c16b537SWarner Losh } 3110c16b537SWarner Losh 3120c16b537SWarner Losh 3130c16b537SWarner Losh typedef struct { 3140c16b537SWarner Losh U32 base; 3150c16b537SWarner Losh U32 current; 3160c16b537SWarner Losh } rankPos; 3170c16b537SWarner Losh 3180c16b537SWarner Losh static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) 3190c16b537SWarner Losh { 3200c16b537SWarner Losh rankPos rank[32]; 3210c16b537SWarner Losh U32 n; 3220c16b537SWarner Losh 3230c16b537SWarner Losh memset(rank, 0, sizeof(rank)); 3240c16b537SWarner Losh for (n=0; n<=maxSymbolValue; n++) { 3250c16b537SWarner Losh U32 r = BIT_highbit32(count[n] + 1); 3260c16b537SWarner Losh rank[r].base ++; 3270c16b537SWarner Losh } 3280c16b537SWarner Losh for (n=30; n>0; n--) rank[n-1].base += rank[n].base; 3290c16b537SWarner Losh for (n=0; n<32; n++) rank[n].current = rank[n].base; 3300c16b537SWarner Losh for (n=0; n<=maxSymbolValue; n++) { 3310c16b537SWarner Losh U32 const c = count[n]; 3320c16b537SWarner Losh U32 const r = BIT_highbit32(c+1) + 1; 3330c16b537SWarner Losh U32 pos = rank[r].current++; 33419fcbaf1SConrad Meyer while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { 33519fcbaf1SConrad Meyer huffNode[pos] = huffNode[pos-1]; 33619fcbaf1SConrad Meyer pos--; 33719fcbaf1SConrad Meyer } 3380c16b537SWarner Losh huffNode[pos].count = c; 3390c16b537SWarner Losh huffNode[pos].byte = (BYTE)n; 3400c16b537SWarner Losh } 3410c16b537SWarner Losh } 3420c16b537SWarner Losh 3430c16b537SWarner Losh 3440c16b537SWarner Losh /** HUF_buildCTable_wksp() : 3450c16b537SWarner Losh * Same as HUF_buildCTable(), but using externally allocated scratch buffer. 34619fcbaf1SConrad Meyer * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. 3470c16b537SWarner Losh */ 3480c16b537SWarner Losh #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) 34919fcbaf1SConrad Meyer typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; 3500c16b537SWarner Losh size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) 3510c16b537SWarner Losh { 3520c16b537SWarner Losh nodeElt* const huffNode0 = (nodeElt*)workSpace; 3530c16b537SWarner Losh nodeElt* const huffNode = huffNode0+1; 3540c16b537SWarner Losh U32 n, nonNullRank; 3550c16b537SWarner Losh int lowS, lowN; 3560c16b537SWarner Losh U16 nodeNb = STARTNODE; 3570c16b537SWarner Losh U32 nodeRoot; 3580c16b537SWarner Losh 3590c16b537SWarner Losh /* safety checks */ 36019fcbaf1SConrad Meyer if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 36119fcbaf1SConrad Meyer if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); 3620c16b537SWarner Losh if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; 36319fcbaf1SConrad Meyer if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 3640c16b537SWarner Losh memset(huffNode0, 0, sizeof(huffNodeTable)); 3650c16b537SWarner Losh 3660c16b537SWarner Losh /* sort, decreasing order */ 3670c16b537SWarner Losh HUF_sort(huffNode, count, maxSymbolValue); 3680c16b537SWarner Losh 3690c16b537SWarner Losh /* init for parents */ 3700c16b537SWarner Losh nonNullRank = maxSymbolValue; 3710c16b537SWarner Losh while(huffNode[nonNullRank].count == 0) nonNullRank--; 3720c16b537SWarner Losh lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; 3730c16b537SWarner Losh huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; 3740c16b537SWarner Losh huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; 3750c16b537SWarner Losh nodeNb++; lowS-=2; 3760c16b537SWarner Losh for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); 3770c16b537SWarner Losh huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ 3780c16b537SWarner Losh 3790c16b537SWarner Losh /* create parents */ 3800c16b537SWarner Losh while (nodeNb <= nodeRoot) { 3810c16b537SWarner Losh U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 3820c16b537SWarner Losh U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 3830c16b537SWarner Losh huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; 3840c16b537SWarner Losh huffNode[n1].parent = huffNode[n2].parent = nodeNb; 3850c16b537SWarner Losh nodeNb++; 3860c16b537SWarner Losh } 3870c16b537SWarner Losh 3880c16b537SWarner Losh /* distribute weights (unlimited tree height) */ 3890c16b537SWarner Losh huffNode[nodeRoot].nbBits = 0; 3900c16b537SWarner Losh for (n=nodeRoot-1; n>=STARTNODE; n--) 3910c16b537SWarner Losh huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 3920c16b537SWarner Losh for (n=0; n<=nonNullRank; n++) 3930c16b537SWarner Losh huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 3940c16b537SWarner Losh 3950c16b537SWarner Losh /* enforce maxTableLog */ 3960c16b537SWarner Losh maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); 3970c16b537SWarner Losh 3980c16b537SWarner Losh /* fill result into tree (val, nbBits) */ 3990c16b537SWarner Losh { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; 4000c16b537SWarner Losh U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; 4010c16b537SWarner Losh if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ 4020c16b537SWarner Losh for (n=0; n<=nonNullRank; n++) 4030c16b537SWarner Losh nbPerRank[huffNode[n].nbBits]++; 4040c16b537SWarner Losh /* determine stating value per rank */ 4050c16b537SWarner Losh { U16 min = 0; 4060c16b537SWarner Losh for (n=maxNbBits; n>0; n--) { 4070c16b537SWarner Losh valPerRank[n] = min; /* get starting value within each rank */ 4080c16b537SWarner Losh min += nbPerRank[n]; 4090c16b537SWarner Losh min >>= 1; 4100c16b537SWarner Losh } } 4110c16b537SWarner Losh for (n=0; n<=maxSymbolValue; n++) 4120c16b537SWarner Losh tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ 4130c16b537SWarner Losh for (n=0; n<=maxSymbolValue; n++) 4140c16b537SWarner Losh tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ 4150c16b537SWarner Losh } 4160c16b537SWarner Losh 4170c16b537SWarner Losh return maxNbBits; 4180c16b537SWarner Losh } 4190c16b537SWarner Losh 4200c16b537SWarner Losh /** HUF_buildCTable() : 42119fcbaf1SConrad Meyer * @return : maxNbBits 4220c16b537SWarner Losh * Note : count is used before tree is written, so they can safely overlap 4230c16b537SWarner Losh */ 4240c16b537SWarner Losh size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) 4250c16b537SWarner Losh { 4260c16b537SWarner Losh huffNodeTable nodeTable; 4270c16b537SWarner Losh return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); 4280c16b537SWarner Losh } 4290c16b537SWarner Losh 4300c16b537SWarner Losh static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) 4310c16b537SWarner Losh { 4320c16b537SWarner Losh size_t nbBits = 0; 4330c16b537SWarner Losh int s; 4340c16b537SWarner Losh for (s = 0; s <= (int)maxSymbolValue; ++s) { 4350c16b537SWarner Losh nbBits += CTable[s].nbBits * count[s]; 4360c16b537SWarner Losh } 4370c16b537SWarner Losh return nbBits >> 3; 4380c16b537SWarner Losh } 4390c16b537SWarner Losh 4400c16b537SWarner Losh static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { 4410c16b537SWarner Losh int bad = 0; 4420c16b537SWarner Losh int s; 4430c16b537SWarner Losh for (s = 0; s <= (int)maxSymbolValue; ++s) { 4440c16b537SWarner Losh bad |= (count[s] != 0) & (CTable[s].nbBits == 0); 4450c16b537SWarner Losh } 4460c16b537SWarner Losh return !bad; 4470c16b537SWarner Losh } 4480c16b537SWarner Losh 44919fcbaf1SConrad Meyer size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } 45019fcbaf1SConrad Meyer 45119fcbaf1SConrad Meyer FORCE_INLINE_TEMPLATE void 45219fcbaf1SConrad Meyer HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) 4530c16b537SWarner Losh { 4540c16b537SWarner Losh BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); 4550c16b537SWarner Losh } 4560c16b537SWarner Losh 4570c16b537SWarner Losh #define HUF_FLUSHBITS(s) BIT_flushBits(s) 4580c16b537SWarner Losh 4590c16b537SWarner Losh #define HUF_FLUSHBITS_1(stream) \ 4600c16b537SWarner Losh if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) 4610c16b537SWarner Losh 4620c16b537SWarner Losh #define HUF_FLUSHBITS_2(stream) \ 4630c16b537SWarner Losh if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) 4640c16b537SWarner Losh 46519fcbaf1SConrad Meyer FORCE_INLINE_TEMPLATE size_t 46619fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, 46719fcbaf1SConrad Meyer const void* src, size_t srcSize, 46819fcbaf1SConrad Meyer const HUF_CElt* CTable) 4690c16b537SWarner Losh { 4700c16b537SWarner Losh const BYTE* ip = (const BYTE*) src; 4710c16b537SWarner Losh BYTE* const ostart = (BYTE*)dst; 4720c16b537SWarner Losh BYTE* const oend = ostart + dstSize; 4730c16b537SWarner Losh BYTE* op = ostart; 4740c16b537SWarner Losh size_t n; 4750c16b537SWarner Losh BIT_CStream_t bitC; 4760c16b537SWarner Losh 4770c16b537SWarner Losh /* init */ 4780c16b537SWarner Losh if (dstSize < 8) return 0; /* not enough space to compress */ 4790c16b537SWarner Losh { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); 4800c16b537SWarner Losh if (HUF_isError(initErr)) return 0; } 4810c16b537SWarner Losh 4820c16b537SWarner Losh n = srcSize & ~3; /* join to mod 4 */ 4830c16b537SWarner Losh switch (srcSize & 3) 4840c16b537SWarner Losh { 4850c16b537SWarner Losh case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); 4860c16b537SWarner Losh HUF_FLUSHBITS_2(&bitC); 4870c16b537SWarner Losh /* fall-through */ 4880c16b537SWarner Losh case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); 4890c16b537SWarner Losh HUF_FLUSHBITS_1(&bitC); 4900c16b537SWarner Losh /* fall-through */ 4910c16b537SWarner Losh case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); 4920c16b537SWarner Losh HUF_FLUSHBITS(&bitC); 4930c16b537SWarner Losh /* fall-through */ 4940c16b537SWarner Losh case 0 : /* fall-through */ 4950c16b537SWarner Losh default: break; 4960c16b537SWarner Losh } 4970c16b537SWarner Losh 4980c16b537SWarner Losh for (; n>0; n-=4) { /* note : n&3==0 at this stage */ 4990c16b537SWarner Losh HUF_encodeSymbol(&bitC, ip[n- 1], CTable); 5000c16b537SWarner Losh HUF_FLUSHBITS_1(&bitC); 5010c16b537SWarner Losh HUF_encodeSymbol(&bitC, ip[n- 2], CTable); 5020c16b537SWarner Losh HUF_FLUSHBITS_2(&bitC); 5030c16b537SWarner Losh HUF_encodeSymbol(&bitC, ip[n- 3], CTable); 5040c16b537SWarner Losh HUF_FLUSHBITS_1(&bitC); 5050c16b537SWarner Losh HUF_encodeSymbol(&bitC, ip[n- 4], CTable); 5060c16b537SWarner Losh HUF_FLUSHBITS(&bitC); 5070c16b537SWarner Losh } 5080c16b537SWarner Losh 5090c16b537SWarner Losh return BIT_closeCStream(&bitC); 5100c16b537SWarner Losh } 5110c16b537SWarner Losh 51219fcbaf1SConrad Meyer #if DYNAMIC_BMI2 5130c16b537SWarner Losh 51419fcbaf1SConrad Meyer static TARGET_ATTRIBUTE("bmi2") size_t 51519fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, 51619fcbaf1SConrad Meyer const void* src, size_t srcSize, 51719fcbaf1SConrad Meyer const HUF_CElt* CTable) 51819fcbaf1SConrad Meyer { 51919fcbaf1SConrad Meyer return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 52019fcbaf1SConrad Meyer } 52119fcbaf1SConrad Meyer 52219fcbaf1SConrad Meyer static size_t 52319fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, 52419fcbaf1SConrad Meyer const void* src, size_t srcSize, 52519fcbaf1SConrad Meyer const HUF_CElt* CTable) 52619fcbaf1SConrad Meyer { 52719fcbaf1SConrad Meyer return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 52819fcbaf1SConrad Meyer } 52919fcbaf1SConrad Meyer 53019fcbaf1SConrad Meyer static size_t 53119fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 53219fcbaf1SConrad Meyer const void* src, size_t srcSize, 53319fcbaf1SConrad Meyer const HUF_CElt* CTable, const int bmi2) 53419fcbaf1SConrad Meyer { 53519fcbaf1SConrad Meyer if (bmi2) { 53619fcbaf1SConrad Meyer return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); 53719fcbaf1SConrad Meyer } 53819fcbaf1SConrad Meyer return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); 53919fcbaf1SConrad Meyer } 54019fcbaf1SConrad Meyer 54119fcbaf1SConrad Meyer #else 54219fcbaf1SConrad Meyer 54319fcbaf1SConrad Meyer static size_t 54419fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 54519fcbaf1SConrad Meyer const void* src, size_t srcSize, 54619fcbaf1SConrad Meyer const HUF_CElt* CTable, const int bmi2) 54719fcbaf1SConrad Meyer { 54819fcbaf1SConrad Meyer (void)bmi2; 54919fcbaf1SConrad Meyer return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 55019fcbaf1SConrad Meyer } 55119fcbaf1SConrad Meyer 55219fcbaf1SConrad Meyer #endif 55319fcbaf1SConrad Meyer 55419fcbaf1SConrad Meyer size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 55519fcbaf1SConrad Meyer { 55619fcbaf1SConrad Meyer return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 55719fcbaf1SConrad Meyer } 55819fcbaf1SConrad Meyer 55919fcbaf1SConrad Meyer 56019fcbaf1SConrad Meyer static size_t 56119fcbaf1SConrad Meyer HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, 56219fcbaf1SConrad Meyer const void* src, size_t srcSize, 56319fcbaf1SConrad Meyer const HUF_CElt* CTable, int bmi2) 5640c16b537SWarner Losh { 5650c16b537SWarner Losh size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ 5660c16b537SWarner Losh const BYTE* ip = (const BYTE*) src; 5670c16b537SWarner Losh const BYTE* const iend = ip + srcSize; 5680c16b537SWarner Losh BYTE* const ostart = (BYTE*) dst; 5690c16b537SWarner Losh BYTE* const oend = ostart + dstSize; 5700c16b537SWarner Losh BYTE* op = ostart; 5710c16b537SWarner Losh 5720c16b537SWarner Losh if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ 5730c16b537SWarner Losh if (srcSize < 12) return 0; /* no saving possible : too small input */ 5740c16b537SWarner Losh op += 6; /* jumpTable */ 5750c16b537SWarner Losh 57619fcbaf1SConrad Meyer { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); 5770c16b537SWarner Losh if (cSize==0) return 0; 57819fcbaf1SConrad Meyer assert(cSize <= 65535); 5790c16b537SWarner Losh MEM_writeLE16(ostart, (U16)cSize); 5800c16b537SWarner Losh op += cSize; 5810c16b537SWarner Losh } 5820c16b537SWarner Losh 5830c16b537SWarner Losh ip += segmentSize; 58419fcbaf1SConrad Meyer { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); 5850c16b537SWarner Losh if (cSize==0) return 0; 58619fcbaf1SConrad Meyer assert(cSize <= 65535); 5870c16b537SWarner Losh MEM_writeLE16(ostart+2, (U16)cSize); 5880c16b537SWarner Losh op += cSize; 5890c16b537SWarner Losh } 5900c16b537SWarner Losh 5910c16b537SWarner Losh ip += segmentSize; 59219fcbaf1SConrad Meyer { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); 5930c16b537SWarner Losh if (cSize==0) return 0; 59419fcbaf1SConrad Meyer assert(cSize <= 65535); 5950c16b537SWarner Losh MEM_writeLE16(ostart+4, (U16)cSize); 5960c16b537SWarner Losh op += cSize; 5970c16b537SWarner Losh } 5980c16b537SWarner Losh 5990c16b537SWarner Losh ip += segmentSize; 60019fcbaf1SConrad Meyer { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); 6010c16b537SWarner Losh if (cSize==0) return 0; 6020c16b537SWarner Losh op += cSize; 6030c16b537SWarner Losh } 6040c16b537SWarner Losh 6050c16b537SWarner Losh return op-ostart; 6060c16b537SWarner Losh } 6070c16b537SWarner Losh 60819fcbaf1SConrad Meyer size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 60919fcbaf1SConrad Meyer { 61019fcbaf1SConrad Meyer return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 61119fcbaf1SConrad Meyer } 61219fcbaf1SConrad Meyer 6130c16b537SWarner Losh 6140c16b537SWarner Losh static size_t HUF_compressCTable_internal( 6150c16b537SWarner Losh BYTE* const ostart, BYTE* op, BYTE* const oend, 6160c16b537SWarner Losh const void* src, size_t srcSize, 61719fcbaf1SConrad Meyer unsigned singleStream, const HUF_CElt* CTable, const int bmi2) 6180c16b537SWarner Losh { 6190c16b537SWarner Losh size_t const cSize = singleStream ? 62019fcbaf1SConrad Meyer HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : 62119fcbaf1SConrad Meyer HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); 6220c16b537SWarner Losh if (HUF_isError(cSize)) { return cSize; } 6230c16b537SWarner Losh if (cSize==0) { return 0; } /* uncompressible */ 6240c16b537SWarner Losh op += cSize; 6250c16b537SWarner Losh /* check compressibility */ 6260c16b537SWarner Losh if ((size_t)(op-ostart) >= srcSize-1) { return 0; } 6270c16b537SWarner Losh return op-ostart; 6280c16b537SWarner Losh } 6290c16b537SWarner Losh 63019fcbaf1SConrad Meyer typedef struct { 63119fcbaf1SConrad Meyer U32 count[HUF_SYMBOLVALUE_MAX + 1]; 63219fcbaf1SConrad Meyer HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; 63319fcbaf1SConrad Meyer huffNodeTable nodeTable; 63419fcbaf1SConrad Meyer } HUF_compress_tables_t; 6350c16b537SWarner Losh 63619fcbaf1SConrad Meyer /* HUF_compress_internal() : 63719fcbaf1SConrad Meyer * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ 6380c16b537SWarner Losh static size_t HUF_compress_internal ( 6390c16b537SWarner Losh void* dst, size_t dstSize, 6400c16b537SWarner Losh const void* src, size_t srcSize, 6410c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog, 6420c16b537SWarner Losh unsigned singleStream, 6430c16b537SWarner Losh void* workSpace, size_t wkspSize, 64419fcbaf1SConrad Meyer HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, 64519fcbaf1SConrad Meyer const int bmi2) 6460c16b537SWarner Losh { 64719fcbaf1SConrad Meyer HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; 6480c16b537SWarner Losh BYTE* const ostart = (BYTE*)dst; 6490c16b537SWarner Losh BYTE* const oend = ostart + dstSize; 6500c16b537SWarner Losh BYTE* op = ostart; 6510c16b537SWarner Losh 6520c16b537SWarner Losh /* checks & inits */ 65319fcbaf1SConrad Meyer if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 65419fcbaf1SConrad Meyer if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); 65519fcbaf1SConrad Meyer if (!srcSize) return 0; /* Uncompressed */ 65619fcbaf1SConrad Meyer if (!dstSize) return 0; /* cannot fit anything within dst budget */ 6570c16b537SWarner Losh if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ 6580c16b537SWarner Losh if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 65919fcbaf1SConrad Meyer if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 6600c16b537SWarner Losh if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; 6610c16b537SWarner Losh if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; 6620c16b537SWarner Losh 66319fcbaf1SConrad Meyer /* Heuristic : If old table is valid, use it for small inputs */ 6640c16b537SWarner Losh if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { 66519fcbaf1SConrad Meyer return HUF_compressCTable_internal(ostart, op, oend, 66619fcbaf1SConrad Meyer src, srcSize, 66719fcbaf1SConrad Meyer singleStream, oldHufTable, bmi2); 6680c16b537SWarner Losh } 6690c16b537SWarner Losh 6700c16b537SWarner Losh /* Scan input and build symbol stats */ 671*0f743729SConrad Meyer { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) ); 6720c16b537SWarner Losh if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ 673*0f743729SConrad Meyer if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ 6740c16b537SWarner Losh } 6750c16b537SWarner Losh 6760c16b537SWarner Losh /* Check validity of previous table */ 67719fcbaf1SConrad Meyer if ( repeat 67819fcbaf1SConrad Meyer && *repeat == HUF_repeat_check 67919fcbaf1SConrad Meyer && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { 6800c16b537SWarner Losh *repeat = HUF_repeat_none; 6810c16b537SWarner Losh } 6820c16b537SWarner Losh /* Heuristic : use existing table for small inputs */ 6830c16b537SWarner Losh if (preferRepeat && repeat && *repeat != HUF_repeat_none) { 68419fcbaf1SConrad Meyer return HUF_compressCTable_internal(ostart, op, oend, 68519fcbaf1SConrad Meyer src, srcSize, 68619fcbaf1SConrad Meyer singleStream, oldHufTable, bmi2); 6870c16b537SWarner Losh } 6880c16b537SWarner Losh 6890c16b537SWarner Losh /* Build Huffman Tree */ 6900c16b537SWarner Losh huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); 69119fcbaf1SConrad Meyer { CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count, 69219fcbaf1SConrad Meyer maxSymbolValue, huffLog, 69319fcbaf1SConrad Meyer table->nodeTable, sizeof(table->nodeTable)) ); 6940c16b537SWarner Losh huffLog = (U32)maxBits; 69519fcbaf1SConrad Meyer /* Zero unused symbols in CTable, so we can check it for validity */ 69619fcbaf1SConrad Meyer memset(table->CTable + (maxSymbolValue + 1), 0, 69719fcbaf1SConrad Meyer sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); 6980c16b537SWarner Losh } 6990c16b537SWarner Losh 7000c16b537SWarner Losh /* Write table description header */ 70119fcbaf1SConrad Meyer { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); 70219fcbaf1SConrad Meyer /* Check if using previous huffman table is beneficial */ 7030c16b537SWarner Losh if (repeat && *repeat != HUF_repeat_none) { 70419fcbaf1SConrad Meyer size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); 70519fcbaf1SConrad Meyer size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); 7060c16b537SWarner Losh if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { 70719fcbaf1SConrad Meyer return HUF_compressCTable_internal(ostart, op, oend, 70819fcbaf1SConrad Meyer src, srcSize, 70919fcbaf1SConrad Meyer singleStream, oldHufTable, bmi2); 71019fcbaf1SConrad Meyer } } 71119fcbaf1SConrad Meyer 71219fcbaf1SConrad Meyer /* Use the new huffman table */ 7130c16b537SWarner Losh if (hSize + 12ul >= srcSize) { return 0; } 7140c16b537SWarner Losh op += hSize; 7150c16b537SWarner Losh if (repeat) { *repeat = HUF_repeat_none; } 71619fcbaf1SConrad Meyer if (oldHufTable) 71719fcbaf1SConrad Meyer memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ 7180c16b537SWarner Losh } 71919fcbaf1SConrad Meyer return HUF_compressCTable_internal(ostart, op, oend, 72019fcbaf1SConrad Meyer src, srcSize, 72119fcbaf1SConrad Meyer singleStream, table->CTable, bmi2); 7220c16b537SWarner Losh } 7230c16b537SWarner Losh 7240c16b537SWarner Losh 7250c16b537SWarner Losh size_t HUF_compress1X_wksp (void* dst, size_t dstSize, 7260c16b537SWarner Losh const void* src, size_t srcSize, 7270c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog, 7280c16b537SWarner Losh void* workSpace, size_t wkspSize) 7290c16b537SWarner Losh { 73019fcbaf1SConrad Meyer return HUF_compress_internal(dst, dstSize, src, srcSize, 73119fcbaf1SConrad Meyer maxSymbolValue, huffLog, 1 /*single stream*/, 73219fcbaf1SConrad Meyer workSpace, wkspSize, 73319fcbaf1SConrad Meyer NULL, NULL, 0, 0 /*bmi2*/); 7340c16b537SWarner Losh } 7350c16b537SWarner Losh 7360c16b537SWarner Losh size_t HUF_compress1X_repeat (void* dst, size_t dstSize, 7370c16b537SWarner Losh const void* src, size_t srcSize, 7380c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog, 7390c16b537SWarner Losh void* workSpace, size_t wkspSize, 74019fcbaf1SConrad Meyer HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) 7410c16b537SWarner Losh { 74219fcbaf1SConrad Meyer return HUF_compress_internal(dst, dstSize, src, srcSize, 74319fcbaf1SConrad Meyer maxSymbolValue, huffLog, 1 /*single stream*/, 74419fcbaf1SConrad Meyer workSpace, wkspSize, hufTable, 74519fcbaf1SConrad Meyer repeat, preferRepeat, bmi2); 7460c16b537SWarner Losh } 7470c16b537SWarner Losh 7480c16b537SWarner Losh size_t HUF_compress1X (void* dst, size_t dstSize, 7490c16b537SWarner Losh const void* src, size_t srcSize, 7500c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog) 7510c16b537SWarner Losh { 75219fcbaf1SConrad Meyer unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; 7530c16b537SWarner Losh return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); 7540c16b537SWarner Losh } 7550c16b537SWarner Losh 75619fcbaf1SConrad Meyer /* HUF_compress4X_repeat(): 75719fcbaf1SConrad Meyer * compress input using 4 streams. 75819fcbaf1SConrad Meyer * provide workspace to generate compression tables */ 7590c16b537SWarner Losh size_t HUF_compress4X_wksp (void* dst, size_t dstSize, 7600c16b537SWarner Losh const void* src, size_t srcSize, 7610c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog, 7620c16b537SWarner Losh void* workSpace, size_t wkspSize) 7630c16b537SWarner Losh { 76419fcbaf1SConrad Meyer return HUF_compress_internal(dst, dstSize, src, srcSize, 76519fcbaf1SConrad Meyer maxSymbolValue, huffLog, 0 /*4 streams*/, 76619fcbaf1SConrad Meyer workSpace, wkspSize, 76719fcbaf1SConrad Meyer NULL, NULL, 0, 0 /*bmi2*/); 7680c16b537SWarner Losh } 7690c16b537SWarner Losh 77019fcbaf1SConrad Meyer /* HUF_compress4X_repeat(): 77119fcbaf1SConrad Meyer * compress input using 4 streams. 77219fcbaf1SConrad Meyer * re-use an existing huffman compression table */ 7730c16b537SWarner Losh size_t HUF_compress4X_repeat (void* dst, size_t dstSize, 7740c16b537SWarner Losh const void* src, size_t srcSize, 7750c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog, 7760c16b537SWarner Losh void* workSpace, size_t wkspSize, 77719fcbaf1SConrad Meyer HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) 7780c16b537SWarner Losh { 77919fcbaf1SConrad Meyer return HUF_compress_internal(dst, dstSize, src, srcSize, 78019fcbaf1SConrad Meyer maxSymbolValue, huffLog, 0 /* 4 streams */, 78119fcbaf1SConrad Meyer workSpace, wkspSize, 78219fcbaf1SConrad Meyer hufTable, repeat, preferRepeat, bmi2); 7830c16b537SWarner Losh } 7840c16b537SWarner Losh 7850c16b537SWarner Losh size_t HUF_compress2 (void* dst, size_t dstSize, 7860c16b537SWarner Losh const void* src, size_t srcSize, 7870c16b537SWarner Losh unsigned maxSymbolValue, unsigned huffLog) 7880c16b537SWarner Losh { 78919fcbaf1SConrad Meyer unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; 7900c16b537SWarner Losh return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); 7910c16b537SWarner Losh } 7920c16b537SWarner Losh 7930c16b537SWarner Losh size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) 7940c16b537SWarner Losh { 79519fcbaf1SConrad Meyer return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); 7960c16b537SWarner Losh } 797