xref: /freebsd/sys/contrib/zstd/lib/common/entropy_common.c (revision 0f743729abbfc5c9d78a713f72241a4d4bd601ec)
10c16b537SWarner Losh /*
20c16b537SWarner Losh    Common functions of New Generation Entropy library
30c16b537SWarner Losh    Copyright (C) 2016, Yann Collet.
40c16b537SWarner Losh 
50c16b537SWarner Losh    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
60c16b537SWarner Losh 
70c16b537SWarner Losh    Redistribution and use in source and binary forms, with or without
80c16b537SWarner Losh    modification, are permitted provided that the following conditions are
90c16b537SWarner Losh    met:
100c16b537SWarner Losh 
110c16b537SWarner Losh        * Redistributions of source code must retain the above copyright
120c16b537SWarner Losh    notice, this list of conditions and the following disclaimer.
130c16b537SWarner Losh        * Redistributions in binary form must reproduce the above
140c16b537SWarner Losh    copyright notice, this list of conditions and the following disclaimer
150c16b537SWarner Losh    in the documentation and/or other materials provided with the
160c16b537SWarner Losh    distribution.
170c16b537SWarner Losh 
180c16b537SWarner Losh    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
190c16b537SWarner Losh    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
200c16b537SWarner Losh    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
210c16b537SWarner Losh    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
220c16b537SWarner Losh    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
230c16b537SWarner Losh    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
240c16b537SWarner Losh    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
250c16b537SWarner Losh    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
260c16b537SWarner Losh    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
270c16b537SWarner Losh    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
280c16b537SWarner Losh    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
290c16b537SWarner Losh 
300c16b537SWarner Losh     You can contact the author at :
310c16b537SWarner Losh     - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
320c16b537SWarner Losh     - Public forum : https://groups.google.com/forum/#!forum/lz4c
330c16b537SWarner Losh *************************************************************************** */
340c16b537SWarner Losh 
350c16b537SWarner Losh /* *************************************
360c16b537SWarner Losh *  Dependencies
370c16b537SWarner Losh ***************************************/
380c16b537SWarner Losh #include "mem.h"
390c16b537SWarner Losh #include "error_private.h"       /* ERR_*, ERROR */
400c16b537SWarner Losh #define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
410c16b537SWarner Losh #include "fse.h"
420c16b537SWarner Losh #define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
430c16b537SWarner Losh #include "huf.h"
440c16b537SWarner Losh 
450c16b537SWarner Losh 
460c16b537SWarner Losh /*===   Version   ===*/
470c16b537SWarner Losh unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
480c16b537SWarner Losh 
490c16b537SWarner Losh 
500c16b537SWarner Losh /*===   Error Management   ===*/
510c16b537SWarner Losh unsigned FSE_isError(size_t code) { return ERR_isError(code); }
520c16b537SWarner Losh const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
530c16b537SWarner Losh 
540c16b537SWarner Losh unsigned HUF_isError(size_t code) { return ERR_isError(code); }
550c16b537SWarner Losh const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
560c16b537SWarner Losh 
570c16b537SWarner Losh 
580c16b537SWarner Losh /*-**************************************************************
590c16b537SWarner Losh *  FSE NCount encoding-decoding
600c16b537SWarner Losh ****************************************************************/
610c16b537SWarner Losh size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
620c16b537SWarner Losh                  const void* headerBuffer, size_t hbSize)
630c16b537SWarner Losh {
640c16b537SWarner Losh     const BYTE* const istart = (const BYTE*) headerBuffer;
650c16b537SWarner Losh     const BYTE* const iend = istart + hbSize;
660c16b537SWarner Losh     const BYTE* ip = istart;
670c16b537SWarner Losh     int nbBits;
680c16b537SWarner Losh     int remaining;
690c16b537SWarner Losh     int threshold;
700c16b537SWarner Losh     U32 bitStream;
710c16b537SWarner Losh     int bitCount;
720c16b537SWarner Losh     unsigned charnum = 0;
730c16b537SWarner Losh     int previous0 = 0;
740c16b537SWarner Losh 
75*0f743729SConrad Meyer     if (hbSize < 4) {
76*0f743729SConrad Meyer         /* This function only works when hbSize >= 4 */
77*0f743729SConrad Meyer         char buffer[4];
78*0f743729SConrad Meyer         memset(buffer, 0, sizeof(buffer));
79*0f743729SConrad Meyer         memcpy(buffer, headerBuffer, hbSize);
80*0f743729SConrad Meyer         {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
81*0f743729SConrad Meyer                                                     buffer, sizeof(buffer));
82*0f743729SConrad Meyer             if (FSE_isError(countSize)) return countSize;
83*0f743729SConrad Meyer             if (countSize > hbSize) return ERROR(corruption_detected);
84*0f743729SConrad Meyer             return countSize;
85*0f743729SConrad Meyer     }   }
86*0f743729SConrad Meyer     assert(hbSize >= 4);
87*0f743729SConrad Meyer 
88*0f743729SConrad Meyer     /* init */
89*0f743729SConrad Meyer     memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
900c16b537SWarner Losh     bitStream = MEM_readLE32(ip);
910c16b537SWarner Losh     nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
920c16b537SWarner Losh     if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
930c16b537SWarner Losh     bitStream >>= 4;
940c16b537SWarner Losh     bitCount = 4;
950c16b537SWarner Losh     *tableLogPtr = nbBits;
960c16b537SWarner Losh     remaining = (1<<nbBits)+1;
970c16b537SWarner Losh     threshold = 1<<nbBits;
980c16b537SWarner Losh     nbBits++;
990c16b537SWarner Losh 
1000c16b537SWarner Losh     while ((remaining>1) & (charnum<=*maxSVPtr)) {
1010c16b537SWarner Losh         if (previous0) {
1020c16b537SWarner Losh             unsigned n0 = charnum;
1030c16b537SWarner Losh             while ((bitStream & 0xFFFF) == 0xFFFF) {
1040c16b537SWarner Losh                 n0 += 24;
1050c16b537SWarner Losh                 if (ip < iend-5) {
1060c16b537SWarner Losh                     ip += 2;
1070c16b537SWarner Losh                     bitStream = MEM_readLE32(ip) >> bitCount;
1080c16b537SWarner Losh                 } else {
1090c16b537SWarner Losh                     bitStream >>= 16;
1100c16b537SWarner Losh                     bitCount   += 16;
1110c16b537SWarner Losh             }   }
1120c16b537SWarner Losh             while ((bitStream & 3) == 3) {
1130c16b537SWarner Losh                 n0 += 3;
1140c16b537SWarner Losh                 bitStream >>= 2;
1150c16b537SWarner Losh                 bitCount += 2;
1160c16b537SWarner Losh             }
1170c16b537SWarner Losh             n0 += bitStream & 3;
1180c16b537SWarner Losh             bitCount += 2;
1190c16b537SWarner Losh             if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
1200c16b537SWarner Losh             while (charnum < n0) normalizedCounter[charnum++] = 0;
1210c16b537SWarner Losh             if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
122*0f743729SConrad Meyer                 assert((bitCount >> 3) <= 3); /* For first condition to work */
1230c16b537SWarner Losh                 ip += bitCount>>3;
1240c16b537SWarner Losh                 bitCount &= 7;
1250c16b537SWarner Losh                 bitStream = MEM_readLE32(ip) >> bitCount;
1260c16b537SWarner Losh             } else {
1270c16b537SWarner Losh                 bitStream >>= 2;
1280c16b537SWarner Losh         }   }
1290c16b537SWarner Losh         {   int const max = (2*threshold-1) - remaining;
1300c16b537SWarner Losh             int count;
1310c16b537SWarner Losh 
1320c16b537SWarner Losh             if ((bitStream & (threshold-1)) < (U32)max) {
1330c16b537SWarner Losh                 count = bitStream & (threshold-1);
1340c16b537SWarner Losh                 bitCount += nbBits-1;
1350c16b537SWarner Losh             } else {
1360c16b537SWarner Losh                 count = bitStream & (2*threshold-1);
1370c16b537SWarner Losh                 if (count >= threshold) count -= max;
1380c16b537SWarner Losh                 bitCount += nbBits;
1390c16b537SWarner Losh             }
1400c16b537SWarner Losh 
1410c16b537SWarner Losh             count--;   /* extra accuracy */
1420c16b537SWarner Losh             remaining -= count < 0 ? -count : count;   /* -1 means +1 */
1430c16b537SWarner Losh             normalizedCounter[charnum++] = (short)count;
1440c16b537SWarner Losh             previous0 = !count;
1450c16b537SWarner Losh             while (remaining < threshold) {
1460c16b537SWarner Losh                 nbBits--;
1470c16b537SWarner Losh                 threshold >>= 1;
1480c16b537SWarner Losh             }
1490c16b537SWarner Losh 
1500c16b537SWarner Losh             if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
1510c16b537SWarner Losh                 ip += bitCount>>3;
1520c16b537SWarner Losh                 bitCount &= 7;
1530c16b537SWarner Losh             } else {
1540c16b537SWarner Losh                 bitCount -= (int)(8 * (iend - 4 - ip));
1550c16b537SWarner Losh                 ip = iend - 4;
1560c16b537SWarner Losh             }
1570c16b537SWarner Losh             bitStream = MEM_readLE32(ip) >> (bitCount & 31);
1580c16b537SWarner Losh     }   }   /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
1590c16b537SWarner Losh     if (remaining != 1) return ERROR(corruption_detected);
1600c16b537SWarner Losh     if (bitCount > 32) return ERROR(corruption_detected);
1610c16b537SWarner Losh     *maxSVPtr = charnum-1;
1620c16b537SWarner Losh 
1630c16b537SWarner Losh     ip += (bitCount+7)>>3;
1640c16b537SWarner Losh     return ip-istart;
1650c16b537SWarner Losh }
1660c16b537SWarner Losh 
1670c16b537SWarner Losh 
1680c16b537SWarner Losh /*! HUF_readStats() :
1690c16b537SWarner Losh     Read compact Huffman tree, saved by HUF_writeCTable().
1700c16b537SWarner Losh     `huffWeight` is destination buffer.
1710c16b537SWarner Losh     `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
1720c16b537SWarner Losh     @return : size read from `src` , or an error Code .
1730c16b537SWarner Losh     Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
1740c16b537SWarner Losh */
1750c16b537SWarner Losh size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
1760c16b537SWarner Losh                      U32* nbSymbolsPtr, U32* tableLogPtr,
1770c16b537SWarner Losh                      const void* src, size_t srcSize)
1780c16b537SWarner Losh {
1790c16b537SWarner Losh     U32 weightTotal;
1800c16b537SWarner Losh     const BYTE* ip = (const BYTE*) src;
1810c16b537SWarner Losh     size_t iSize;
1820c16b537SWarner Losh     size_t oSize;
1830c16b537SWarner Losh 
1840c16b537SWarner Losh     if (!srcSize) return ERROR(srcSize_wrong);
1850c16b537SWarner Losh     iSize = ip[0];
1860c16b537SWarner Losh     /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
1870c16b537SWarner Losh 
1880c16b537SWarner Losh     if (iSize >= 128) {  /* special header */
1890c16b537SWarner Losh         oSize = iSize - 127;
1900c16b537SWarner Losh         iSize = ((oSize+1)/2);
1910c16b537SWarner Losh         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
1920c16b537SWarner Losh         if (oSize >= hwSize) return ERROR(corruption_detected);
1930c16b537SWarner Losh         ip += 1;
1940c16b537SWarner Losh         {   U32 n;
1950c16b537SWarner Losh             for (n=0; n<oSize; n+=2) {
1960c16b537SWarner Losh                 huffWeight[n]   = ip[n/2] >> 4;
1970c16b537SWarner Losh                 huffWeight[n+1] = ip[n/2] & 15;
1980c16b537SWarner Losh     }   }   }
1990c16b537SWarner Losh     else  {   /* header compressed with FSE (normal case) */
2000c16b537SWarner Losh         FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)];  /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
2010c16b537SWarner Losh         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
2020c16b537SWarner Losh         oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6);   /* max (hwSize-1) values decoded, as last one is implied */
2030c16b537SWarner Losh         if (FSE_isError(oSize)) return oSize;
2040c16b537SWarner Losh     }
2050c16b537SWarner Losh 
2060c16b537SWarner Losh     /* collect weight stats */
2070c16b537SWarner Losh     memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
2080c16b537SWarner Losh     weightTotal = 0;
2090c16b537SWarner Losh     {   U32 n; for (n=0; n<oSize; n++) {
2100c16b537SWarner Losh             if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
2110c16b537SWarner Losh             rankStats[huffWeight[n]]++;
2120c16b537SWarner Losh             weightTotal += (1 << huffWeight[n]) >> 1;
2130c16b537SWarner Losh     }   }
2140c16b537SWarner Losh     if (weightTotal == 0) return ERROR(corruption_detected);
2150c16b537SWarner Losh 
2160c16b537SWarner Losh     /* get last non-null symbol weight (implied, total must be 2^n) */
2170c16b537SWarner Losh     {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
2180c16b537SWarner Losh         if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
2190c16b537SWarner Losh         *tableLogPtr = tableLog;
2200c16b537SWarner Losh         /* determine last weight */
2210c16b537SWarner Losh         {   U32 const total = 1 << tableLog;
2220c16b537SWarner Losh             U32 const rest = total - weightTotal;
2230c16b537SWarner Losh             U32 const verif = 1 << BIT_highbit32(rest);
2240c16b537SWarner Losh             U32 const lastWeight = BIT_highbit32(rest) + 1;
2250c16b537SWarner Losh             if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
2260c16b537SWarner Losh             huffWeight[oSize] = (BYTE)lastWeight;
2270c16b537SWarner Losh             rankStats[lastWeight]++;
2280c16b537SWarner Losh     }   }
2290c16b537SWarner Losh 
2300c16b537SWarner Losh     /* check tree construction validity */
2310c16b537SWarner Losh     if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
2320c16b537SWarner Losh 
2330c16b537SWarner Losh     /* results */
2340c16b537SWarner Losh     *nbSymbolsPtr = (U32)(oSize+1);
2350c16b537SWarner Losh     return iSize+1;
2360c16b537SWarner Losh }
237