1 /* 2 Common functions of New Generation Entropy library 3 Copyright (C) 2016, Yann Collet. 4 5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are 9 met: 10 11 * Redistributions of source code must retain the above copyright 12 notice, this list of conditions and the following disclaimer. 13 * Redistributions in binary form must reproduce the above 14 copyright notice, this list of conditions and the following disclaimer 15 in the documentation and/or other materials provided with the 16 distribution. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 You can contact the author at : 31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 32 - Public forum : https://groups.google.com/forum/#!forum/lz4c 33 *************************************************************************** */ 34 35 /* ************************************* 36 * Dependencies 37 ***************************************/ 38 #include "mem.h" 39 #include "error_private.h" /* ERR_*, ERROR */ 40 #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ 41 #include "fse.h" 42 #define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ 43 #include "huf.h" 44 45 46 /*=== Version ===*/ 47 unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } 48 49 50 /*=== Error Management ===*/ 51 unsigned FSE_isError(size_t code) { return ERR_isError(code); } 52 const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } 53 54 unsigned HUF_isError(size_t code) { return ERR_isError(code); } 55 const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } 56 57 58 /*-************************************************************** 59 * FSE NCount encoding-decoding 60 ****************************************************************/ 61 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, 62 const void* headerBuffer, size_t hbSize) 63 { 64 const BYTE* const istart = (const BYTE*) headerBuffer; 65 const BYTE* const iend = istart + hbSize; 66 const BYTE* ip = istart; 67 int nbBits; 68 int remaining; 69 int threshold; 70 U32 bitStream; 71 int bitCount; 72 unsigned charnum = 0; 73 int previous0 = 0; 74 75 if (hbSize < 4) return ERROR(srcSize_wrong); 76 bitStream = MEM_readLE32(ip); 77 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ 78 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); 79 bitStream >>= 4; 80 bitCount = 4; 81 *tableLogPtr = nbBits; 82 remaining = (1<<nbBits)+1; 83 threshold = 1<<nbBits; 84 nbBits++; 85 86 while ((remaining>1) & (charnum<=*maxSVPtr)) { 87 if (previous0) { 88 unsigned n0 = charnum; 89 while ((bitStream & 0xFFFF) == 0xFFFF) { 90 n0 += 24; 91 if (ip < iend-5) { 92 ip += 2; 93 bitStream = MEM_readLE32(ip) >> bitCount; 94 } else { 95 bitStream >>= 16; 96 bitCount += 16; 97 } } 98 while ((bitStream & 3) == 3) { 99 n0 += 3; 100 bitStream >>= 2; 101 bitCount += 2; 102 } 103 n0 += bitStream & 3; 104 bitCount += 2; 105 if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); 106 while (charnum < n0) normalizedCounter[charnum++] = 0; 107 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { 108 ip += bitCount>>3; 109 bitCount &= 7; 110 bitStream = MEM_readLE32(ip) >> bitCount; 111 } else { 112 bitStream >>= 2; 113 } } 114 { int const max = (2*threshold-1) - remaining; 115 int count; 116 117 if ((bitStream & (threshold-1)) < (U32)max) { 118 count = bitStream & (threshold-1); 119 bitCount += nbBits-1; 120 } else { 121 count = bitStream & (2*threshold-1); 122 if (count >= threshold) count -= max; 123 bitCount += nbBits; 124 } 125 126 count--; /* extra accuracy */ 127 remaining -= count < 0 ? -count : count; /* -1 means +1 */ 128 normalizedCounter[charnum++] = (short)count; 129 previous0 = !count; 130 while (remaining < threshold) { 131 nbBits--; 132 threshold >>= 1; 133 } 134 135 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { 136 ip += bitCount>>3; 137 bitCount &= 7; 138 } else { 139 bitCount -= (int)(8 * (iend - 4 - ip)); 140 ip = iend - 4; 141 } 142 bitStream = MEM_readLE32(ip) >> (bitCount & 31); 143 } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ 144 if (remaining != 1) return ERROR(corruption_detected); 145 if (bitCount > 32) return ERROR(corruption_detected); 146 *maxSVPtr = charnum-1; 147 148 ip += (bitCount+7)>>3; 149 return ip-istart; 150 } 151 152 153 /*! HUF_readStats() : 154 Read compact Huffman tree, saved by HUF_writeCTable(). 155 `huffWeight` is destination buffer. 156 `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. 157 @return : size read from `src` , or an error Code . 158 Note : Needed by HUF_readCTable() and HUF_readDTableX?() . 159 */ 160 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, 161 U32* nbSymbolsPtr, U32* tableLogPtr, 162 const void* src, size_t srcSize) 163 { 164 U32 weightTotal; 165 const BYTE* ip = (const BYTE*) src; 166 size_t iSize; 167 size_t oSize; 168 169 if (!srcSize) return ERROR(srcSize_wrong); 170 iSize = ip[0]; 171 /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ 172 173 if (iSize >= 128) { /* special header */ 174 oSize = iSize - 127; 175 iSize = ((oSize+1)/2); 176 if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 177 if (oSize >= hwSize) return ERROR(corruption_detected); 178 ip += 1; 179 { U32 n; 180 for (n=0; n<oSize; n+=2) { 181 huffWeight[n] = ip[n/2] >> 4; 182 huffWeight[n+1] = ip[n/2] & 15; 183 } } } 184 else { /* header compressed with FSE (normal case) */ 185 FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ 186 if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 187 oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ 188 if (FSE_isError(oSize)) return oSize; 189 } 190 191 /* collect weight stats */ 192 memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); 193 weightTotal = 0; 194 { U32 n; for (n=0; n<oSize; n++) { 195 if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected); 196 rankStats[huffWeight[n]]++; 197 weightTotal += (1 << huffWeight[n]) >> 1; 198 } } 199 if (weightTotal == 0) return ERROR(corruption_detected); 200 201 /* get last non-null symbol weight (implied, total must be 2^n) */ 202 { U32 const tableLog = BIT_highbit32(weightTotal) + 1; 203 if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); 204 *tableLogPtr = tableLog; 205 /* determine last weight */ 206 { U32 const total = 1 << tableLog; 207 U32 const rest = total - weightTotal; 208 U32 const verif = 1 << BIT_highbit32(rest); 209 U32 const lastWeight = BIT_highbit32(rest) + 1; 210 if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ 211 huffWeight[oSize] = (BYTE)lastWeight; 212 rankStats[lastWeight]++; 213 } } 214 215 /* check tree construction validity */ 216 if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ 217 218 /* results */ 219 *nbSymbolsPtr = (U32)(oSize+1); 220 return iSize+1; 221 } 222