1*37f1f268SConrad Meyer /* ****************************************************************** 2*37f1f268SConrad Meyer * Common functions of New Generation Entropy library 3*37f1f268SConrad Meyer * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 4*37f1f268SConrad Meyer * 5*37f1f268SConrad Meyer * You can contact the author at : 6*37f1f268SConrad Meyer * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 7*37f1f268SConrad Meyer * - Public forum : https://groups.google.com/forum/#!forum/lz4c 8*37f1f268SConrad Meyer * 9*37f1f268SConrad Meyer * This source code is licensed under both the BSD-style license (found in the 10*37f1f268SConrad Meyer * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11*37f1f268SConrad Meyer * in the COPYING file in the root directory of this source tree). 12*37f1f268SConrad Meyer * You may select, at your option, one of the above-listed licenses. 13*37f1f268SConrad Meyer ****************************************************************** */ 140c16b537SWarner Losh 150c16b537SWarner Losh /* ************************************* 160c16b537SWarner Losh * Dependencies 170c16b537SWarner Losh ***************************************/ 180c16b537SWarner Losh #include "mem.h" 190c16b537SWarner Losh #include "error_private.h" /* ERR_*, ERROR */ 200c16b537SWarner Losh #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ 210c16b537SWarner Losh #include "fse.h" 220c16b537SWarner Losh #define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ 230c16b537SWarner Losh #include "huf.h" 240c16b537SWarner Losh 250c16b537SWarner Losh 260c16b537SWarner Losh /*=== Version ===*/ 270c16b537SWarner Losh unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } 280c16b537SWarner Losh 290c16b537SWarner Losh 300c16b537SWarner Losh /*=== Error Management ===*/ 310c16b537SWarner Losh unsigned FSE_isError(size_t code) { return ERR_isError(code); } 320c16b537SWarner Losh const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } 330c16b537SWarner Losh 340c16b537SWarner Losh unsigned HUF_isError(size_t code) { return ERR_isError(code); } 350c16b537SWarner Losh const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } 360c16b537SWarner Losh 370c16b537SWarner Losh 380c16b537SWarner Losh /*-************************************************************** 390c16b537SWarner Losh * FSE NCount encoding-decoding 400c16b537SWarner Losh ****************************************************************/ 410c16b537SWarner Losh size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, 420c16b537SWarner Losh const void* headerBuffer, size_t hbSize) 430c16b537SWarner Losh { 440c16b537SWarner Losh const BYTE* const istart = (const BYTE*) headerBuffer; 450c16b537SWarner Losh const BYTE* const iend = istart + hbSize; 460c16b537SWarner Losh const BYTE* ip = istart; 470c16b537SWarner Losh int nbBits; 480c16b537SWarner Losh int remaining; 490c16b537SWarner Losh int threshold; 500c16b537SWarner Losh U32 bitStream; 510c16b537SWarner Losh int bitCount; 520c16b537SWarner Losh unsigned charnum = 0; 530c16b537SWarner Losh int previous0 = 0; 540c16b537SWarner Losh 550f743729SConrad Meyer if (hbSize < 4) { 560f743729SConrad Meyer /* This function only works when hbSize >= 4 */ 570f743729SConrad Meyer char buffer[4]; 580f743729SConrad Meyer memset(buffer, 0, sizeof(buffer)); 590f743729SConrad Meyer memcpy(buffer, headerBuffer, hbSize); 600f743729SConrad Meyer { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, 610f743729SConrad Meyer buffer, sizeof(buffer)); 620f743729SConrad Meyer if (FSE_isError(countSize)) return countSize; 630f743729SConrad Meyer if (countSize > hbSize) return ERROR(corruption_detected); 640f743729SConrad Meyer return countSize; 650f743729SConrad Meyer } } 660f743729SConrad Meyer assert(hbSize >= 4); 670f743729SConrad Meyer 680f743729SConrad Meyer /* init */ 690f743729SConrad Meyer memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ 700c16b537SWarner Losh bitStream = MEM_readLE32(ip); 710c16b537SWarner Losh nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ 720c16b537SWarner Losh if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); 730c16b537SWarner Losh bitStream >>= 4; 740c16b537SWarner Losh bitCount = 4; 750c16b537SWarner Losh *tableLogPtr = nbBits; 760c16b537SWarner Losh remaining = (1<<nbBits)+1; 770c16b537SWarner Losh threshold = 1<<nbBits; 780c16b537SWarner Losh nbBits++; 790c16b537SWarner Losh 800c16b537SWarner Losh while ((remaining>1) & (charnum<=*maxSVPtr)) { 810c16b537SWarner Losh if (previous0) { 820c16b537SWarner Losh unsigned n0 = charnum; 830c16b537SWarner Losh while ((bitStream & 0xFFFF) == 0xFFFF) { 840c16b537SWarner Losh n0 += 24; 850c16b537SWarner Losh if (ip < iend-5) { 860c16b537SWarner Losh ip += 2; 870c16b537SWarner Losh bitStream = MEM_readLE32(ip) >> bitCount; 880c16b537SWarner Losh } else { 890c16b537SWarner Losh bitStream >>= 16; 900c16b537SWarner Losh bitCount += 16; 910c16b537SWarner Losh } } 920c16b537SWarner Losh while ((bitStream & 3) == 3) { 930c16b537SWarner Losh n0 += 3; 940c16b537SWarner Losh bitStream >>= 2; 950c16b537SWarner Losh bitCount += 2; 960c16b537SWarner Losh } 970c16b537SWarner Losh n0 += bitStream & 3; 980c16b537SWarner Losh bitCount += 2; 990c16b537SWarner Losh if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); 1000c16b537SWarner Losh while (charnum < n0) normalizedCounter[charnum++] = 0; 1010c16b537SWarner Losh if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { 1020f743729SConrad Meyer assert((bitCount >> 3) <= 3); /* For first condition to work */ 1030c16b537SWarner Losh ip += bitCount>>3; 1040c16b537SWarner Losh bitCount &= 7; 1050c16b537SWarner Losh bitStream = MEM_readLE32(ip) >> bitCount; 1060c16b537SWarner Losh } else { 1070c16b537SWarner Losh bitStream >>= 2; 1080c16b537SWarner Losh } } 1090c16b537SWarner Losh { int const max = (2*threshold-1) - remaining; 1100c16b537SWarner Losh int count; 1110c16b537SWarner Losh 1120c16b537SWarner Losh if ((bitStream & (threshold-1)) < (U32)max) { 1130c16b537SWarner Losh count = bitStream & (threshold-1); 1140c16b537SWarner Losh bitCount += nbBits-1; 1150c16b537SWarner Losh } else { 1160c16b537SWarner Losh count = bitStream & (2*threshold-1); 1170c16b537SWarner Losh if (count >= threshold) count -= max; 1180c16b537SWarner Losh bitCount += nbBits; 1190c16b537SWarner Losh } 1200c16b537SWarner Losh 1210c16b537SWarner Losh count--; /* extra accuracy */ 1220c16b537SWarner Losh remaining -= count < 0 ? -count : count; /* -1 means +1 */ 1230c16b537SWarner Losh normalizedCounter[charnum++] = (short)count; 1240c16b537SWarner Losh previous0 = !count; 1250c16b537SWarner Losh while (remaining < threshold) { 1260c16b537SWarner Losh nbBits--; 1270c16b537SWarner Losh threshold >>= 1; 1280c16b537SWarner Losh } 1290c16b537SWarner Losh 1300c16b537SWarner Losh if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { 1310c16b537SWarner Losh ip += bitCount>>3; 1320c16b537SWarner Losh bitCount &= 7; 1330c16b537SWarner Losh } else { 1340c16b537SWarner Losh bitCount -= (int)(8 * (iend - 4 - ip)); 1350c16b537SWarner Losh ip = iend - 4; 1360c16b537SWarner Losh } 1370c16b537SWarner Losh bitStream = MEM_readLE32(ip) >> (bitCount & 31); 1380c16b537SWarner Losh } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ 1390c16b537SWarner Losh if (remaining != 1) return ERROR(corruption_detected); 1400c16b537SWarner Losh if (bitCount > 32) return ERROR(corruption_detected); 1410c16b537SWarner Losh *maxSVPtr = charnum-1; 1420c16b537SWarner Losh 1430c16b537SWarner Losh ip += (bitCount+7)>>3; 1440c16b537SWarner Losh return ip-istart; 1450c16b537SWarner Losh } 1460c16b537SWarner Losh 1470c16b537SWarner Losh 1480c16b537SWarner Losh /*! HUF_readStats() : 1490c16b537SWarner Losh Read compact Huffman tree, saved by HUF_writeCTable(). 1500c16b537SWarner Losh `huffWeight` is destination buffer. 1510c16b537SWarner Losh `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. 1520c16b537SWarner Losh @return : size read from `src` , or an error Code . 1530c16b537SWarner Losh Note : Needed by HUF_readCTable() and HUF_readDTableX?() . 1540c16b537SWarner Losh */ 1550c16b537SWarner Losh size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, 1560c16b537SWarner Losh U32* nbSymbolsPtr, U32* tableLogPtr, 1570c16b537SWarner Losh const void* src, size_t srcSize) 1580c16b537SWarner Losh { 1590c16b537SWarner Losh U32 weightTotal; 1600c16b537SWarner Losh const BYTE* ip = (const BYTE*) src; 1610c16b537SWarner Losh size_t iSize; 1620c16b537SWarner Losh size_t oSize; 1630c16b537SWarner Losh 1640c16b537SWarner Losh if (!srcSize) return ERROR(srcSize_wrong); 1650c16b537SWarner Losh iSize = ip[0]; 1660c16b537SWarner Losh /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ 1670c16b537SWarner Losh 1680c16b537SWarner Losh if (iSize >= 128) { /* special header */ 1690c16b537SWarner Losh oSize = iSize - 127; 1700c16b537SWarner Losh iSize = ((oSize+1)/2); 1710c16b537SWarner Losh if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 1720c16b537SWarner Losh if (oSize >= hwSize) return ERROR(corruption_detected); 1730c16b537SWarner Losh ip += 1; 1740c16b537SWarner Losh { U32 n; 1750c16b537SWarner Losh for (n=0; n<oSize; n+=2) { 1760c16b537SWarner Losh huffWeight[n] = ip[n/2] >> 4; 1770c16b537SWarner Losh huffWeight[n+1] = ip[n/2] & 15; 1780c16b537SWarner Losh } } } 1790c16b537SWarner Losh else { /* header compressed with FSE (normal case) */ 1800c16b537SWarner Losh FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ 1810c16b537SWarner Losh if (iSize+1 > srcSize) return ERROR(srcSize_wrong); 1820c16b537SWarner Losh oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ 1830c16b537SWarner Losh if (FSE_isError(oSize)) return oSize; 1840c16b537SWarner Losh } 1850c16b537SWarner Losh 1860c16b537SWarner Losh /* collect weight stats */ 1870c16b537SWarner Losh memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); 1880c16b537SWarner Losh weightTotal = 0; 1890c16b537SWarner Losh { U32 n; for (n=0; n<oSize; n++) { 1900c16b537SWarner Losh if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected); 1910c16b537SWarner Losh rankStats[huffWeight[n]]++; 1920c16b537SWarner Losh weightTotal += (1 << huffWeight[n]) >> 1; 1930c16b537SWarner Losh } } 1940c16b537SWarner Losh if (weightTotal == 0) return ERROR(corruption_detected); 1950c16b537SWarner Losh 1960c16b537SWarner Losh /* get last non-null symbol weight (implied, total must be 2^n) */ 1970c16b537SWarner Losh { U32 const tableLog = BIT_highbit32(weightTotal) + 1; 1980c16b537SWarner Losh if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); 1990c16b537SWarner Losh *tableLogPtr = tableLog; 2000c16b537SWarner Losh /* determine last weight */ 2010c16b537SWarner Losh { U32 const total = 1 << tableLog; 2020c16b537SWarner Losh U32 const rest = total - weightTotal; 2030c16b537SWarner Losh U32 const verif = 1 << BIT_highbit32(rest); 2040c16b537SWarner Losh U32 const lastWeight = BIT_highbit32(rest) + 1; 2050c16b537SWarner Losh if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ 2060c16b537SWarner Losh huffWeight[oSize] = (BYTE)lastWeight; 2070c16b537SWarner Losh rankStats[lastWeight]++; 2080c16b537SWarner Losh } } 2090c16b537SWarner Losh 2100c16b537SWarner Losh /* check tree construction validity */ 2110c16b537SWarner Losh if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ 2120c16b537SWarner Losh 2130c16b537SWarner Losh /* results */ 2140c16b537SWarner Losh *nbSymbolsPtr = (U32)(oSize+1); 2150c16b537SWarner Losh return iSize+1; 2160c16b537SWarner Losh } 217