xref: /freebsd/sys/contrib/zstd/lib/common/entropy_common.c (revision 4928135658a9d0eaee37003df6137ab363fcb0b4)
1 /*
2    Common functions of New Generation Entropy library
3    Copyright (C) 2016, Yann Collet.
4 
5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 
7    Redistribution and use in source and binary forms, with or without
8    modification, are permitted provided that the following conditions are
9    met:
10 
11        * Redistributions of source code must retain the above copyright
12    notice, this list of conditions and the following disclaimer.
13        * Redistributions in binary form must reproduce the above
14    copyright notice, this list of conditions and the following disclaimer
15    in the documentation and/or other materials provided with the
16    distribution.
17 
18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30     You can contact the author at :
31     - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32     - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 *************************************************************************** */
34 
35 /* *************************************
36 *  Dependencies
37 ***************************************/
38 #include "mem.h"
39 #include "error_private.h"       /* ERR_*, ERROR */
40 #define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
41 #include "fse.h"
42 #define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
43 #include "huf.h"
44 
45 
46 /*===   Version   ===*/
47 unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
48 
49 
50 /*===   Error Management   ===*/
51 unsigned FSE_isError(size_t code) { return ERR_isError(code); }
52 const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
53 
54 unsigned HUF_isError(size_t code) { return ERR_isError(code); }
55 const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
56 
57 
58 /*-**************************************************************
59 *  FSE NCount encoding-decoding
60 ****************************************************************/
61 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
62                  const void* headerBuffer, size_t hbSize)
63 {
64     const BYTE* const istart = (const BYTE*) headerBuffer;
65     const BYTE* const iend = istart + hbSize;
66     const BYTE* ip = istart;
67     int nbBits;
68     int remaining;
69     int threshold;
70     U32 bitStream;
71     int bitCount;
72     unsigned charnum = 0;
73     int previous0 = 0;
74 
75     if (hbSize < 4) return ERROR(srcSize_wrong);
76     bitStream = MEM_readLE32(ip);
77     nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
78     if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
79     bitStream >>= 4;
80     bitCount = 4;
81     *tableLogPtr = nbBits;
82     remaining = (1<<nbBits)+1;
83     threshold = 1<<nbBits;
84     nbBits++;
85 
86     while ((remaining>1) & (charnum<=*maxSVPtr)) {
87         if (previous0) {
88             unsigned n0 = charnum;
89             while ((bitStream & 0xFFFF) == 0xFFFF) {
90                 n0 += 24;
91                 if (ip < iend-5) {
92                     ip += 2;
93                     bitStream = MEM_readLE32(ip) >> bitCount;
94                 } else {
95                     bitStream >>= 16;
96                     bitCount   += 16;
97             }   }
98             while ((bitStream & 3) == 3) {
99                 n0 += 3;
100                 bitStream >>= 2;
101                 bitCount += 2;
102             }
103             n0 += bitStream & 3;
104             bitCount += 2;
105             if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
106             while (charnum < n0) normalizedCounter[charnum++] = 0;
107             if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
108                 ip += bitCount>>3;
109                 bitCount &= 7;
110                 bitStream = MEM_readLE32(ip) >> bitCount;
111             } else {
112                 bitStream >>= 2;
113         }   }
114         {   int const max = (2*threshold-1) - remaining;
115             int count;
116 
117             if ((bitStream & (threshold-1)) < (U32)max) {
118                 count = bitStream & (threshold-1);
119                 bitCount += nbBits-1;
120             } else {
121                 count = bitStream & (2*threshold-1);
122                 if (count >= threshold) count -= max;
123                 bitCount += nbBits;
124             }
125 
126             count--;   /* extra accuracy */
127             remaining -= count < 0 ? -count : count;   /* -1 means +1 */
128             normalizedCounter[charnum++] = (short)count;
129             previous0 = !count;
130             while (remaining < threshold) {
131                 nbBits--;
132                 threshold >>= 1;
133             }
134 
135             if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
136                 ip += bitCount>>3;
137                 bitCount &= 7;
138             } else {
139                 bitCount -= (int)(8 * (iend - 4 - ip));
140                 ip = iend - 4;
141             }
142             bitStream = MEM_readLE32(ip) >> (bitCount & 31);
143     }   }   /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
144     if (remaining != 1) return ERROR(corruption_detected);
145     if (bitCount > 32) return ERROR(corruption_detected);
146     *maxSVPtr = charnum-1;
147 
148     ip += (bitCount+7)>>3;
149     return ip-istart;
150 }
151 
152 
153 /*! HUF_readStats() :
154     Read compact Huffman tree, saved by HUF_writeCTable().
155     `huffWeight` is destination buffer.
156     `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
157     @return : size read from `src` , or an error Code .
158     Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
159 */
160 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
161                      U32* nbSymbolsPtr, U32* tableLogPtr,
162                      const void* src, size_t srcSize)
163 {
164     U32 weightTotal;
165     const BYTE* ip = (const BYTE*) src;
166     size_t iSize;
167     size_t oSize;
168 
169     if (!srcSize) return ERROR(srcSize_wrong);
170     iSize = ip[0];
171     /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
172 
173     if (iSize >= 128) {  /* special header */
174         oSize = iSize - 127;
175         iSize = ((oSize+1)/2);
176         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
177         if (oSize >= hwSize) return ERROR(corruption_detected);
178         ip += 1;
179         {   U32 n;
180             for (n=0; n<oSize; n+=2) {
181                 huffWeight[n]   = ip[n/2] >> 4;
182                 huffWeight[n+1] = ip[n/2] & 15;
183     }   }   }
184     else  {   /* header compressed with FSE (normal case) */
185         FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)];  /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
186         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
187         oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6);   /* max (hwSize-1) values decoded, as last one is implied */
188         if (FSE_isError(oSize)) return oSize;
189     }
190 
191     /* collect weight stats */
192     memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
193     weightTotal = 0;
194     {   U32 n; for (n=0; n<oSize; n++) {
195             if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
196             rankStats[huffWeight[n]]++;
197             weightTotal += (1 << huffWeight[n]) >> 1;
198     }   }
199     if (weightTotal == 0) return ERROR(corruption_detected);
200 
201     /* get last non-null symbol weight (implied, total must be 2^n) */
202     {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
203         if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
204         *tableLogPtr = tableLog;
205         /* determine last weight */
206         {   U32 const total = 1 << tableLog;
207             U32 const rest = total - weightTotal;
208             U32 const verif = 1 << BIT_highbit32(rest);
209             U32 const lastWeight = BIT_highbit32(rest) + 1;
210             if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
211             huffWeight[oSize] = (BYTE)lastWeight;
212             rankStats[lastWeight]++;
213     }   }
214 
215     /* check tree construction validity */
216     if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
217 
218     /* results */
219     *nbSymbolsPtr = (U32)(oSize+1);
220     return iSize+1;
221 }
222