1 /* ****************************************************************** 2 * Common functions of New Generation Entropy library 3 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 4 * 5 * You can contact the author at : 6 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 7 * - Public forum : https://groups.google.com/forum/#!forum/lz4c 8 * 9 * This source code is licensed under both the BSD-style license (found in the 10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11 * in the COPYING file in the root directory of this source tree). 12 * You may select, at your option, one of the above-listed licenses. 13 ****************************************************************** */ 14 15 /* ************************************* 16 * Dependencies 17 ***************************************/ 18 #include "mem.h" 19 #include "error_private.h" /* ERR_*, ERROR */ 20 #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ 21 #include "fse.h" 22 #define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ 23 #include "huf.h" 24 25 26 /*=== Version ===*/ 27 unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } 28 29 30 /*=== Error Management ===*/ 31 unsigned FSE_isError(size_t code) { return ERR_isError(code); } 32 const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } 33 34 unsigned HUF_isError(size_t code) { return ERR_isError(code); } 35 const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } 36 37 38 /*-************************************************************** 39 * FSE NCount encoding-decoding 40 ****************************************************************/ 41 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, 42 const void* headerBuffer, size_t hbSize) 43 { 44 const BYTE* const istart = (const BYTE*) headerBuffer; 45 const BYTE* const iend = istart + hbSize; 46 const BYTE* ip = istart; 47 int nbBits; 48 int remaining; 49 int threshold; 50 U32 bitStream; 51 int bitCount; 52 unsigned charnum = 0; 53 int previous0 = 0; 54 55 if (hbSize < 4) { 56 /* This function only works when hbSize >= 4 */ 57 char buffer[4]; 58 memset(buffer, 0, sizeof(buffer)); 59 memcpy(buffer, headerBuffer, hbSize); 60 { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, 61 buffer, sizeof(buffer)); 62 if (FSE_isError(countSize)) return countSize; 63 if (countSize > hbSize) return ERROR(corruption_detected); 64 return countSize; 65 } } 66 assert(hbSize >= 4); 67 68 /* init */ 69 memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ 70 bitStream = MEM_readLE32(ip); 71 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ 72 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); 73 bitStream >>= 4; 74 bitCount = 4; 75 *tableLogPtr = nbBits; 76 remaining = (1<<nbBits)+1; 77 threshold = 1<<nbBits; 78 nbBits++; 79 80 while ((remaining>1) & (charnum<=*maxSVPtr)) { 81 if (previous0) { 82 unsigned n0 = charnum; 83 while ((bitStream & 0xFFFF) == 0xFFFF) { 84 n0 += 24; 85 if (ip < iend-5) { 86 ip += 2; 87 bitStream = MEM_readLE32(ip) >> bitCount; 88 } else { 89 bitStream >>= 16; 90 bitCount += 16; 91 } } 92 while ((bitStream & 3) == 3) { 93 n0 += 3; 94 bitStream >>= 2; 95 bitCount += 2; 96 } 97 n0 += bitStream & 3; 98 bitCount += 2; 99 if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); 100 while (charnum < n0) normalizedCounter[charnum++] = 0; 101 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { 102 assert((bitCount >> 3) <= 3); /* For first condition to work */ 103 ip += bitCount>>3; 104 bitCount &= 7; 105 bitStream = MEM_readLE32(ip) >> bitCount; 106 } else { 107 bitStream >>= 2; 108 } } 109 { int const max = (2*threshold-1) - remaining; 110 int count; 111 112 if ((bitStream & (threshold-1)) < (U32)max) { 113 count = bitStream & (threshold-1); 114 bitCount += nbBits-1; 115 } else { 116 count = bitStream & (2*threshold-1); 117 if (count >= threshold) count -= max; 118 bitCount += nbBits; 119 } 120 121 count--; /* extra accuracy */ 122 remaining -= count < 0 ? -count : count; /* -1 means +1 */ 123 normalizedCounter[charnum++] = (short)count; 124 previous0 = !count; 125 while (remaining < threshold) { 126 nbBits--; 127 threshold >>= 1; 128 } 129 130 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { 131 ip += bitCount>>3; 132 bitCount &= 7; 133 } else { 134 bitCount -= (int)(8 * (iend - 4 - ip)); 135 ip = iend - 4; 136 } 137 bitStream = MEM_readLE32(ip) >> (bitCount & 31); 138 } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ 139 if (remaining != 1) return ERROR(corruption_detected); 140 if (bitCount > 32) return ERROR(corruption_detected); 141 *maxSVPtr = charnum-1; 142 143 ip += (bitCount+7)>>3; 144 return ip-istart; 145 } 146 147 148 /*! HUF_readStats() : 149 Read compact Huffman tree, saved by HUF_writeCTable(). 150 `huffWeight` is destination buffer. 151 `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. 152 @return : size read from `src` , or an error Code . 153 Note : Needed by HUF_readCTable() and HUF_readDTableX?() . 154 */ 155 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, 156 U32* nbSymbolsPtr, U32* tableLogPtr, 157 const void* src, size_t srcSize) 158 { 159 U32 weightTotal; 160 const BYTE* ip = (const BYTE*) src; 161 size_t iSize; 162 size_t oSize; 163 164 if (!srcSize) return ERROR(srcSize_wrong); 165 iSize = ip[0]; 166 /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ 167 168 if (iSize >= 128) { /* special header */ 169 oSize = iSize - 127; 170 iSize = ((oSize+1)/2); 171 if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 172 if (oSize >= hwSize) return ERROR(corruption_detected); 173 ip += 1; 174 { U32 n; 175 for (n=0; n<oSize; n+=2) { 176 huffWeight[n] = ip[n/2] >> 4; 177 huffWeight[n+1] = ip[n/2] & 15; 178 } } } 179 else { /* header compressed with FSE (normal case) */ 180 FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ 181 if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 182 oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ 183 if (FSE_isError(oSize)) return oSize; 184 } 185 186 /* collect weight stats */ 187 memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); 188 weightTotal = 0; 189 { U32 n; for (n=0; n<oSize; n++) { 190 if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected); 191 rankStats[huffWeight[n]]++; 192 weightTotal += (1 << huffWeight[n]) >> 1; 193 } } 194 if (weightTotal == 0) return ERROR(corruption_detected); 195 196 /* get last non-null symbol weight (implied, total must be 2^n) */ 197 { U32 const tableLog = BIT_highbit32(weightTotal) + 1; 198 if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); 199 *tableLogPtr = tableLog; 200 /* determine last weight */ 201 { U32 const total = 1 << tableLog; 202 U32 const rest = total - weightTotal; 203 U32 const verif = 1 << BIT_highbit32(rest); 204 U32 const lastWeight = BIT_highbit32(rest) + 1; 205 if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ 206 huffWeight[oSize] = (BYTE)lastWeight; 207 rankStats[lastWeight]++; 208 } } 209 210 /* check tree construction validity */ 211 if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ 212 213 /* results */ 214 *nbSymbolsPtr = (U32)(oSize+1); 215 return iSize+1; 216 } 217