10c16b537SWarner Losh /* 20c16b537SWarner Losh Common functions of New Generation Entropy library 30c16b537SWarner Losh Copyright (C) 2016, Yann Collet. 40c16b537SWarner Losh 50c16b537SWarner Losh BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 60c16b537SWarner Losh 70c16b537SWarner Losh Redistribution and use in source and binary forms, with or without 80c16b537SWarner Losh modification, are permitted provided that the following conditions are 90c16b537SWarner Losh met: 100c16b537SWarner Losh 110c16b537SWarner Losh * Redistributions of source code must retain the above copyright 120c16b537SWarner Losh notice, this list of conditions and the following disclaimer. 130c16b537SWarner Losh * Redistributions in binary form must reproduce the above 140c16b537SWarner Losh copyright notice, this list of conditions and the following disclaimer 150c16b537SWarner Losh in the documentation and/or other materials provided with the 160c16b537SWarner Losh distribution. 170c16b537SWarner Losh 180c16b537SWarner Losh THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 190c16b537SWarner Losh "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 200c16b537SWarner Losh LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 210c16b537SWarner Losh A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 220c16b537SWarner Losh OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 230c16b537SWarner Losh SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 240c16b537SWarner Losh LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 250c16b537SWarner Losh DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 260c16b537SWarner Losh THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 270c16b537SWarner Losh (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 280c16b537SWarner Losh OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 290c16b537SWarner Losh 300c16b537SWarner Losh You can contact the author at : 310c16b537SWarner Losh - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 320c16b537SWarner Losh - Public forum : https://groups.google.com/forum/#!forum/lz4c 330c16b537SWarner Losh *************************************************************************** */ 340c16b537SWarner Losh 350c16b537SWarner Losh /* ************************************* 360c16b537SWarner Losh * Dependencies 370c16b537SWarner Losh ***************************************/ 380c16b537SWarner Losh #include "mem.h" 390c16b537SWarner Losh #include "error_private.h" /* ERR_*, ERROR */ 400c16b537SWarner Losh #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ 410c16b537SWarner Losh #include "fse.h" 420c16b537SWarner Losh #define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ 430c16b537SWarner Losh #include "huf.h" 440c16b537SWarner Losh 450c16b537SWarner Losh 460c16b537SWarner Losh /*=== Version ===*/ 470c16b537SWarner Losh unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } 480c16b537SWarner Losh 490c16b537SWarner Losh 500c16b537SWarner Losh /*=== Error Management ===*/ 510c16b537SWarner Losh unsigned FSE_isError(size_t code) { return ERR_isError(code); } 520c16b537SWarner Losh const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } 530c16b537SWarner Losh 540c16b537SWarner Losh unsigned HUF_isError(size_t code) { return ERR_isError(code); } 550c16b537SWarner Losh const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } 560c16b537SWarner Losh 570c16b537SWarner Losh 580c16b537SWarner Losh /*-************************************************************** 590c16b537SWarner Losh * FSE NCount encoding-decoding 600c16b537SWarner Losh ****************************************************************/ 610c16b537SWarner Losh size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, 620c16b537SWarner Losh const void* headerBuffer, size_t hbSize) 630c16b537SWarner Losh { 640c16b537SWarner Losh const BYTE* const istart = (const BYTE*) headerBuffer; 650c16b537SWarner Losh const BYTE* const iend = istart + hbSize; 660c16b537SWarner Losh const BYTE* ip = istart; 670c16b537SWarner Losh int nbBits; 680c16b537SWarner Losh int remaining; 690c16b537SWarner Losh int threshold; 700c16b537SWarner Losh U32 bitStream; 710c16b537SWarner Losh int bitCount; 720c16b537SWarner Losh unsigned charnum = 0; 730c16b537SWarner Losh int previous0 = 0; 740c16b537SWarner Losh 75*0f743729SConrad Meyer if (hbSize < 4) { 76*0f743729SConrad Meyer /* This function only works when hbSize >= 4 */ 77*0f743729SConrad Meyer char buffer[4]; 78*0f743729SConrad Meyer memset(buffer, 0, sizeof(buffer)); 79*0f743729SConrad Meyer memcpy(buffer, headerBuffer, hbSize); 80*0f743729SConrad Meyer { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, 81*0f743729SConrad Meyer buffer, sizeof(buffer)); 82*0f743729SConrad Meyer if (FSE_isError(countSize)) return countSize; 83*0f743729SConrad Meyer if (countSize > hbSize) return ERROR(corruption_detected); 84*0f743729SConrad Meyer return countSize; 85*0f743729SConrad Meyer } } 86*0f743729SConrad Meyer assert(hbSize >= 4); 87*0f743729SConrad Meyer 88*0f743729SConrad Meyer /* init */ 89*0f743729SConrad Meyer memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ 900c16b537SWarner Losh bitStream = MEM_readLE32(ip); 910c16b537SWarner Losh nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ 920c16b537SWarner Losh if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); 930c16b537SWarner Losh bitStream >>= 4; 940c16b537SWarner Losh bitCount = 4; 950c16b537SWarner Losh *tableLogPtr = nbBits; 960c16b537SWarner Losh remaining = (1<<nbBits)+1; 970c16b537SWarner Losh threshold = 1<<nbBits; 980c16b537SWarner Losh nbBits++; 990c16b537SWarner Losh 1000c16b537SWarner Losh while ((remaining>1) & (charnum<=*maxSVPtr)) { 1010c16b537SWarner Losh if (previous0) { 1020c16b537SWarner Losh unsigned n0 = charnum; 1030c16b537SWarner Losh while ((bitStream & 0xFFFF) == 0xFFFF) { 1040c16b537SWarner Losh n0 += 24; 1050c16b537SWarner Losh if (ip < iend-5) { 1060c16b537SWarner Losh ip += 2; 1070c16b537SWarner Losh bitStream = MEM_readLE32(ip) >> bitCount; 1080c16b537SWarner Losh } else { 1090c16b537SWarner Losh bitStream >>= 16; 1100c16b537SWarner Losh bitCount += 16; 1110c16b537SWarner Losh } } 1120c16b537SWarner Losh while ((bitStream & 3) == 3) { 1130c16b537SWarner Losh n0 += 3; 1140c16b537SWarner Losh bitStream >>= 2; 1150c16b537SWarner Losh bitCount += 2; 1160c16b537SWarner Losh } 1170c16b537SWarner Losh n0 += bitStream & 3; 1180c16b537SWarner Losh bitCount += 2; 1190c16b537SWarner Losh if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); 1200c16b537SWarner Losh while (charnum < n0) normalizedCounter[charnum++] = 0; 1210c16b537SWarner Losh if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { 122*0f743729SConrad Meyer assert((bitCount >> 3) <= 3); /* For first condition to work */ 1230c16b537SWarner Losh ip += bitCount>>3; 1240c16b537SWarner Losh bitCount &= 7; 1250c16b537SWarner Losh bitStream = MEM_readLE32(ip) >> bitCount; 1260c16b537SWarner Losh } else { 1270c16b537SWarner Losh bitStream >>= 2; 1280c16b537SWarner Losh } } 1290c16b537SWarner Losh { int const max = (2*threshold-1) - remaining; 1300c16b537SWarner Losh int count; 1310c16b537SWarner Losh 1320c16b537SWarner Losh if ((bitStream & (threshold-1)) < (U32)max) { 1330c16b537SWarner Losh count = bitStream & (threshold-1); 1340c16b537SWarner Losh bitCount += nbBits-1; 1350c16b537SWarner Losh } else { 1360c16b537SWarner Losh count = bitStream & (2*threshold-1); 1370c16b537SWarner Losh if (count >= threshold) count -= max; 1380c16b537SWarner Losh bitCount += nbBits; 1390c16b537SWarner Losh } 1400c16b537SWarner Losh 1410c16b537SWarner Losh count--; /* extra accuracy */ 1420c16b537SWarner Losh remaining -= count < 0 ? -count : count; /* -1 means +1 */ 1430c16b537SWarner Losh normalizedCounter[charnum++] = (short)count; 1440c16b537SWarner Losh previous0 = !count; 1450c16b537SWarner Losh while (remaining < threshold) { 1460c16b537SWarner Losh nbBits--; 1470c16b537SWarner Losh threshold >>= 1; 1480c16b537SWarner Losh } 1490c16b537SWarner Losh 1500c16b537SWarner Losh if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { 1510c16b537SWarner Losh ip += bitCount>>3; 1520c16b537SWarner Losh bitCount &= 7; 1530c16b537SWarner Losh } else { 1540c16b537SWarner Losh bitCount -= (int)(8 * (iend - 4 - ip)); 1550c16b537SWarner Losh ip = iend - 4; 1560c16b537SWarner Losh } 1570c16b537SWarner Losh bitStream = MEM_readLE32(ip) >> (bitCount & 31); 1580c16b537SWarner Losh } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ 1590c16b537SWarner Losh if (remaining != 1) return ERROR(corruption_detected); 1600c16b537SWarner Losh if (bitCount > 32) return ERROR(corruption_detected); 1610c16b537SWarner Losh *maxSVPtr = charnum-1; 1620c16b537SWarner Losh 1630c16b537SWarner Losh ip += (bitCount+7)>>3; 1640c16b537SWarner Losh return ip-istart; 1650c16b537SWarner Losh } 1660c16b537SWarner Losh 1670c16b537SWarner Losh 1680c16b537SWarner Losh /*! HUF_readStats() : 1690c16b537SWarner Losh Read compact Huffman tree, saved by HUF_writeCTable(). 1700c16b537SWarner Losh `huffWeight` is destination buffer. 1710c16b537SWarner Losh `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. 1720c16b537SWarner Losh @return : size read from `src` , or an error Code . 1730c16b537SWarner Losh Note : Needed by HUF_readCTable() and HUF_readDTableX?() . 1740c16b537SWarner Losh */ 1750c16b537SWarner Losh size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, 1760c16b537SWarner Losh U32* nbSymbolsPtr, U32* tableLogPtr, 1770c16b537SWarner Losh const void* src, size_t srcSize) 1780c16b537SWarner Losh { 1790c16b537SWarner Losh U32 weightTotal; 1800c16b537SWarner Losh const BYTE* ip = (const BYTE*) src; 1810c16b537SWarner Losh size_t iSize; 1820c16b537SWarner Losh size_t oSize; 1830c16b537SWarner Losh 1840c16b537SWarner Losh if (!srcSize) return ERROR(srcSize_wrong); 1850c16b537SWarner Losh iSize = ip[0]; 1860c16b537SWarner Losh /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ 1870c16b537SWarner Losh 1880c16b537SWarner Losh if (iSize >= 128) { /* special header */ 1890c16b537SWarner Losh oSize = iSize - 127; 1900c16b537SWarner Losh iSize = ((oSize+1)/2); 1910c16b537SWarner Losh if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 1920c16b537SWarner Losh if (oSize >= hwSize) return ERROR(corruption_detected); 1930c16b537SWarner Losh ip += 1; 1940c16b537SWarner Losh { U32 n; 1950c16b537SWarner Losh for (n=0; n<oSize; n+=2) { 1960c16b537SWarner Losh huffWeight[n] = ip[n/2] >> 4; 1970c16b537SWarner Losh huffWeight[n+1] = ip[n/2] & 15; 1980c16b537SWarner Losh } } } 1990c16b537SWarner Losh else { /* header compressed with FSE (normal case) */ 2000c16b537SWarner Losh FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ 2010c16b537SWarner Losh if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 2020c16b537SWarner Losh oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ 2030c16b537SWarner Losh if (FSE_isError(oSize)) return oSize; 2040c16b537SWarner Losh } 2050c16b537SWarner Losh 2060c16b537SWarner Losh /* collect weight stats */ 2070c16b537SWarner Losh memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); 2080c16b537SWarner Losh weightTotal = 0; 2090c16b537SWarner Losh { U32 n; for (n=0; n<oSize; n++) { 2100c16b537SWarner Losh if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected); 2110c16b537SWarner Losh rankStats[huffWeight[n]]++; 2120c16b537SWarner Losh weightTotal += (1 << huffWeight[n]) >> 1; 2130c16b537SWarner Losh } } 2140c16b537SWarner Losh if (weightTotal == 0) return ERROR(corruption_detected); 2150c16b537SWarner Losh 2160c16b537SWarner Losh /* get last non-null symbol weight (implied, total must be 2^n) */ 2170c16b537SWarner Losh { U32 const tableLog = BIT_highbit32(weightTotal) + 1; 2180c16b537SWarner Losh if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); 2190c16b537SWarner Losh *tableLogPtr = tableLog; 2200c16b537SWarner Losh /* determine last weight */ 2210c16b537SWarner Losh { U32 const total = 1 << tableLog; 2220c16b537SWarner Losh U32 const rest = total - weightTotal; 2230c16b537SWarner Losh U32 const verif = 1 << BIT_highbit32(rest); 2240c16b537SWarner Losh U32 const lastWeight = BIT_highbit32(rest) + 1; 2250c16b537SWarner Losh if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ 2260c16b537SWarner Losh huffWeight[oSize] = (BYTE)lastWeight; 2270c16b537SWarner Losh rankStats[lastWeight]++; 2280c16b537SWarner Losh } } 2290c16b537SWarner Losh 2300c16b537SWarner Losh /* check tree construction validity */ 2310c16b537SWarner Losh if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ 2320c16b537SWarner Losh 2330c16b537SWarner Losh /* results */ 2340c16b537SWarner Losh *nbSymbolsPtr = (U32)(oSize+1); 2350c16b537SWarner Losh return iSize+1; 2360c16b537SWarner Losh } 237