xref: /freebsd/sys/contrib/zstd/lib/common/entropy_common.c (revision b9f654b163bce26de79705e77b872427c9f2afa1)
1 /*
2    Common functions of New Generation Entropy library
3    Copyright (C) 2016, Yann Collet.
4 
5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 
7    Redistribution and use in source and binary forms, with or without
8    modification, are permitted provided that the following conditions are
9    met:
10 
11        * Redistributions of source code must retain the above copyright
12    notice, this list of conditions and the following disclaimer.
13        * Redistributions in binary form must reproduce the above
14    copyright notice, this list of conditions and the following disclaimer
15    in the documentation and/or other materials provided with the
16    distribution.
17 
18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30     You can contact the author at :
31     - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32     - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 *************************************************************************** */
34 
35 /* *************************************
36 *  Dependencies
37 ***************************************/
38 #include "mem.h"
39 #include "error_private.h"       /* ERR_*, ERROR */
40 #define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
41 #include "fse.h"
42 #define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
43 #include "huf.h"
44 
45 
46 /*===   Version   ===*/
47 unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
48 
49 
50 /*===   Error Management   ===*/
51 unsigned FSE_isError(size_t code) { return ERR_isError(code); }
52 const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
53 
54 unsigned HUF_isError(size_t code) { return ERR_isError(code); }
55 const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
56 
57 
58 /*-**************************************************************
59 *  FSE NCount encoding-decoding
60 ****************************************************************/
61 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
62                  const void* headerBuffer, size_t hbSize)
63 {
64     const BYTE* const istart = (const BYTE*) headerBuffer;
65     const BYTE* const iend = istart + hbSize;
66     const BYTE* ip = istart;
67     int nbBits;
68     int remaining;
69     int threshold;
70     U32 bitStream;
71     int bitCount;
72     unsigned charnum = 0;
73     int previous0 = 0;
74 
75     if (hbSize < 4) {
76         /* This function only works when hbSize >= 4 */
77         char buffer[4];
78         memset(buffer, 0, sizeof(buffer));
79         memcpy(buffer, headerBuffer, hbSize);
80         {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
81                                                     buffer, sizeof(buffer));
82             if (FSE_isError(countSize)) return countSize;
83             if (countSize > hbSize) return ERROR(corruption_detected);
84             return countSize;
85     }   }
86     assert(hbSize >= 4);
87 
88     /* init */
89     memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
90     bitStream = MEM_readLE32(ip);
91     nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
92     if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
93     bitStream >>= 4;
94     bitCount = 4;
95     *tableLogPtr = nbBits;
96     remaining = (1<<nbBits)+1;
97     threshold = 1<<nbBits;
98     nbBits++;
99 
100     while ((remaining>1) & (charnum<=*maxSVPtr)) {
101         if (previous0) {
102             unsigned n0 = charnum;
103             while ((bitStream & 0xFFFF) == 0xFFFF) {
104                 n0 += 24;
105                 if (ip < iend-5) {
106                     ip += 2;
107                     bitStream = MEM_readLE32(ip) >> bitCount;
108                 } else {
109                     bitStream >>= 16;
110                     bitCount   += 16;
111             }   }
112             while ((bitStream & 3) == 3) {
113                 n0 += 3;
114                 bitStream >>= 2;
115                 bitCount += 2;
116             }
117             n0 += bitStream & 3;
118             bitCount += 2;
119             if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
120             while (charnum < n0) normalizedCounter[charnum++] = 0;
121             if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
122                 assert((bitCount >> 3) <= 3); /* For first condition to work */
123                 ip += bitCount>>3;
124                 bitCount &= 7;
125                 bitStream = MEM_readLE32(ip) >> bitCount;
126             } else {
127                 bitStream >>= 2;
128         }   }
129         {   int const max = (2*threshold-1) - remaining;
130             int count;
131 
132             if ((bitStream & (threshold-1)) < (U32)max) {
133                 count = bitStream & (threshold-1);
134                 bitCount += nbBits-1;
135             } else {
136                 count = bitStream & (2*threshold-1);
137                 if (count >= threshold) count -= max;
138                 bitCount += nbBits;
139             }
140 
141             count--;   /* extra accuracy */
142             remaining -= count < 0 ? -count : count;   /* -1 means +1 */
143             normalizedCounter[charnum++] = (short)count;
144             previous0 = !count;
145             while (remaining < threshold) {
146                 nbBits--;
147                 threshold >>= 1;
148             }
149 
150             if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
151                 ip += bitCount>>3;
152                 bitCount &= 7;
153             } else {
154                 bitCount -= (int)(8 * (iend - 4 - ip));
155                 ip = iend - 4;
156             }
157             bitStream = MEM_readLE32(ip) >> (bitCount & 31);
158     }   }   /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
159     if (remaining != 1) return ERROR(corruption_detected);
160     if (bitCount > 32) return ERROR(corruption_detected);
161     *maxSVPtr = charnum-1;
162 
163     ip += (bitCount+7)>>3;
164     return ip-istart;
165 }
166 
167 
168 /*! HUF_readStats() :
169     Read compact Huffman tree, saved by HUF_writeCTable().
170     `huffWeight` is destination buffer.
171     `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
172     @return : size read from `src` , or an error Code .
173     Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
174 */
175 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
176                      U32* nbSymbolsPtr, U32* tableLogPtr,
177                      const void* src, size_t srcSize)
178 {
179     U32 weightTotal;
180     const BYTE* ip = (const BYTE*) src;
181     size_t iSize;
182     size_t oSize;
183 
184     if (!srcSize) return ERROR(srcSize_wrong);
185     iSize = ip[0];
186     /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
187 
188     if (iSize >= 128) {  /* special header */
189         oSize = iSize - 127;
190         iSize = ((oSize+1)/2);
191         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
192         if (oSize >= hwSize) return ERROR(corruption_detected);
193         ip += 1;
194         {   U32 n;
195             for (n=0; n<oSize; n+=2) {
196                 huffWeight[n]   = ip[n/2] >> 4;
197                 huffWeight[n+1] = ip[n/2] & 15;
198     }   }   }
199     else  {   /* header compressed with FSE (normal case) */
200         FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)];  /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
201         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
202         oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6);   /* max (hwSize-1) values decoded, as last one is implied */
203         if (FSE_isError(oSize)) return oSize;
204     }
205 
206     /* collect weight stats */
207     memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
208     weightTotal = 0;
209     {   U32 n; for (n=0; n<oSize; n++) {
210             if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
211             rankStats[huffWeight[n]]++;
212             weightTotal += (1 << huffWeight[n]) >> 1;
213     }   }
214     if (weightTotal == 0) return ERROR(corruption_detected);
215 
216     /* get last non-null symbol weight (implied, total must be 2^n) */
217     {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
218         if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
219         *tableLogPtr = tableLog;
220         /* determine last weight */
221         {   U32 const total = 1 << tableLog;
222             U32 const rest = total - weightTotal;
223             U32 const verif = 1 << BIT_highbit32(rest);
224             U32 const lastWeight = BIT_highbit32(rest) + 1;
225             if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
226             huffWeight[oSize] = (BYTE)lastWeight;
227             rankStats[lastWeight]++;
228     }   }
229 
230     /* check tree construction validity */
231     if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
232 
233     /* results */
234     *nbSymbolsPtr = (U32)(oSize+1);
235     return iSize+1;
236 }
237