1*0c16b537SWarner Losh /* ****************************************************************** 2*0c16b537SWarner Losh FSE : Finite State Entropy encoder 3*0c16b537SWarner Losh Copyright (C) 2013-2015, Yann Collet. 4*0c16b537SWarner Losh 5*0c16b537SWarner Losh BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6*0c16b537SWarner Losh 7*0c16b537SWarner Losh Redistribution and use in source and binary forms, with or without 8*0c16b537SWarner Losh modification, are permitted provided that the following conditions are 9*0c16b537SWarner Losh met: 10*0c16b537SWarner Losh 11*0c16b537SWarner Losh * Redistributions of source code must retain the above copyright 12*0c16b537SWarner Losh notice, this list of conditions and the following disclaimer. 13*0c16b537SWarner Losh * Redistributions in binary form must reproduce the above 14*0c16b537SWarner Losh copyright notice, this list of conditions and the following disclaimer 15*0c16b537SWarner Losh in the documentation and/or other materials provided with the 16*0c16b537SWarner Losh distribution. 17*0c16b537SWarner Losh 18*0c16b537SWarner Losh THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19*0c16b537SWarner Losh "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20*0c16b537SWarner Losh LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21*0c16b537SWarner Losh A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22*0c16b537SWarner Losh OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23*0c16b537SWarner Losh SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24*0c16b537SWarner Losh LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25*0c16b537SWarner Losh DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26*0c16b537SWarner Losh THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27*0c16b537SWarner Losh (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28*0c16b537SWarner Losh OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*0c16b537SWarner Losh 30*0c16b537SWarner Losh You can contact the author at : 31*0c16b537SWarner Losh - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 32*0c16b537SWarner Losh - Public forum : https://groups.google.com/forum/#!forum/lz4c 33*0c16b537SWarner Losh ****************************************************************** */ 34*0c16b537SWarner Losh 35*0c16b537SWarner Losh /* ************************************************************** 36*0c16b537SWarner Losh * Includes 37*0c16b537SWarner Losh ****************************************************************/ 38*0c16b537SWarner Losh #include <stdlib.h> /* malloc, free, qsort */ 39*0c16b537SWarner Losh #include <string.h> /* memcpy, memset */ 40*0c16b537SWarner Losh #include <stdio.h> /* printf (debug) */ 41*0c16b537SWarner Losh #include "bitstream.h" 42*0c16b537SWarner Losh #include "compiler.h" 43*0c16b537SWarner Losh #define FSE_STATIC_LINKING_ONLY 44*0c16b537SWarner Losh #include "fse.h" 45*0c16b537SWarner Losh #include "error_private.h" 46*0c16b537SWarner Losh 47*0c16b537SWarner Losh 48*0c16b537SWarner Losh /* ************************************************************** 49*0c16b537SWarner Losh * Error Management 50*0c16b537SWarner Losh ****************************************************************/ 51*0c16b537SWarner Losh #define FSE_isError ERR_isError 52*0c16b537SWarner Losh #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ 53*0c16b537SWarner Losh 54*0c16b537SWarner Losh 55*0c16b537SWarner Losh /* ************************************************************** 56*0c16b537SWarner Losh * Templates 57*0c16b537SWarner Losh ****************************************************************/ 58*0c16b537SWarner Losh /* 59*0c16b537SWarner Losh designed to be included 60*0c16b537SWarner Losh for type-specific functions (template emulation in C) 61*0c16b537SWarner Losh Objective is to write these functions only once, for improved maintenance 62*0c16b537SWarner Losh */ 63*0c16b537SWarner Losh 64*0c16b537SWarner Losh /* safety checks */ 65*0c16b537SWarner Losh #ifndef FSE_FUNCTION_EXTENSION 66*0c16b537SWarner Losh # error "FSE_FUNCTION_EXTENSION must be defined" 67*0c16b537SWarner Losh #endif 68*0c16b537SWarner Losh #ifndef FSE_FUNCTION_TYPE 69*0c16b537SWarner Losh # error "FSE_FUNCTION_TYPE must be defined" 70*0c16b537SWarner Losh #endif 71*0c16b537SWarner Losh 72*0c16b537SWarner Losh /* Function names */ 73*0c16b537SWarner Losh #define FSE_CAT(X,Y) X##Y 74*0c16b537SWarner Losh #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) 75*0c16b537SWarner Losh #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) 76*0c16b537SWarner Losh 77*0c16b537SWarner Losh 78*0c16b537SWarner Losh /* Function templates */ 79*0c16b537SWarner Losh 80*0c16b537SWarner Losh /* FSE_buildCTable_wksp() : 81*0c16b537SWarner Losh * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). 82*0c16b537SWarner Losh * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)` 83*0c16b537SWarner Losh * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements 84*0c16b537SWarner Losh */ 85*0c16b537SWarner Losh size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) 86*0c16b537SWarner Losh { 87*0c16b537SWarner Losh U32 const tableSize = 1 << tableLog; 88*0c16b537SWarner Losh U32 const tableMask = tableSize - 1; 89*0c16b537SWarner Losh void* const ptr = ct; 90*0c16b537SWarner Losh U16* const tableU16 = ( (U16*) ptr) + 2; 91*0c16b537SWarner Losh void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; 92*0c16b537SWarner Losh FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); 93*0c16b537SWarner Losh U32 const step = FSE_TABLESTEP(tableSize); 94*0c16b537SWarner Losh U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; 95*0c16b537SWarner Losh 96*0c16b537SWarner Losh FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; 97*0c16b537SWarner Losh U32 highThreshold = tableSize-1; 98*0c16b537SWarner Losh 99*0c16b537SWarner Losh /* CTable header */ 100*0c16b537SWarner Losh if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); 101*0c16b537SWarner Losh tableU16[-2] = (U16) tableLog; 102*0c16b537SWarner Losh tableU16[-1] = (U16) maxSymbolValue; 103*0c16b537SWarner Losh 104*0c16b537SWarner Losh /* For explanations on how to distribute symbol values over the table : 105*0c16b537SWarner Losh * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ 106*0c16b537SWarner Losh 107*0c16b537SWarner Losh /* symbol start positions */ 108*0c16b537SWarner Losh { U32 u; 109*0c16b537SWarner Losh cumul[0] = 0; 110*0c16b537SWarner Losh for (u=1; u<=maxSymbolValue+1; u++) { 111*0c16b537SWarner Losh if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ 112*0c16b537SWarner Losh cumul[u] = cumul[u-1] + 1; 113*0c16b537SWarner Losh tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); 114*0c16b537SWarner Losh } else { 115*0c16b537SWarner Losh cumul[u] = cumul[u-1] + normalizedCounter[u-1]; 116*0c16b537SWarner Losh } } 117*0c16b537SWarner Losh cumul[maxSymbolValue+1] = tableSize+1; 118*0c16b537SWarner Losh } 119*0c16b537SWarner Losh 120*0c16b537SWarner Losh /* Spread symbols */ 121*0c16b537SWarner Losh { U32 position = 0; 122*0c16b537SWarner Losh U32 symbol; 123*0c16b537SWarner Losh for (symbol=0; symbol<=maxSymbolValue; symbol++) { 124*0c16b537SWarner Losh int nbOccurences; 125*0c16b537SWarner Losh for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) { 126*0c16b537SWarner Losh tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; 127*0c16b537SWarner Losh position = (position + step) & tableMask; 128*0c16b537SWarner Losh while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */ 129*0c16b537SWarner Losh } } 130*0c16b537SWarner Losh 131*0c16b537SWarner Losh if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ 132*0c16b537SWarner Losh } 133*0c16b537SWarner Losh 134*0c16b537SWarner Losh /* Build table */ 135*0c16b537SWarner Losh { U32 u; for (u=0; u<tableSize; u++) { 136*0c16b537SWarner Losh FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */ 137*0c16b537SWarner Losh tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */ 138*0c16b537SWarner Losh } } 139*0c16b537SWarner Losh 140*0c16b537SWarner Losh /* Build Symbol Transformation Table */ 141*0c16b537SWarner Losh { unsigned total = 0; 142*0c16b537SWarner Losh unsigned s; 143*0c16b537SWarner Losh for (s=0; s<=maxSymbolValue; s++) { 144*0c16b537SWarner Losh switch (normalizedCounter[s]) 145*0c16b537SWarner Losh { 146*0c16b537SWarner Losh case 0: break; 147*0c16b537SWarner Losh 148*0c16b537SWarner Losh case -1: 149*0c16b537SWarner Losh case 1: 150*0c16b537SWarner Losh symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog); 151*0c16b537SWarner Losh symbolTT[s].deltaFindState = total - 1; 152*0c16b537SWarner Losh total ++; 153*0c16b537SWarner Losh break; 154*0c16b537SWarner Losh default : 155*0c16b537SWarner Losh { 156*0c16b537SWarner Losh U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1); 157*0c16b537SWarner Losh U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; 158*0c16b537SWarner Losh symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; 159*0c16b537SWarner Losh symbolTT[s].deltaFindState = total - normalizedCounter[s]; 160*0c16b537SWarner Losh total += normalizedCounter[s]; 161*0c16b537SWarner Losh } } } } 162*0c16b537SWarner Losh 163*0c16b537SWarner Losh return 0; 164*0c16b537SWarner Losh } 165*0c16b537SWarner Losh 166*0c16b537SWarner Losh 167*0c16b537SWarner Losh size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 168*0c16b537SWarner Losh { 169*0c16b537SWarner Losh FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ 170*0c16b537SWarner Losh return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol)); 171*0c16b537SWarner Losh } 172*0c16b537SWarner Losh 173*0c16b537SWarner Losh 174*0c16b537SWarner Losh 175*0c16b537SWarner Losh #ifndef FSE_COMMONDEFS_ONLY 176*0c16b537SWarner Losh 177*0c16b537SWarner Losh /*-************************************************************** 178*0c16b537SWarner Losh * FSE NCount encoding-decoding 179*0c16b537SWarner Losh ****************************************************************/ 180*0c16b537SWarner Losh size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) 181*0c16b537SWarner Losh { 182*0c16b537SWarner Losh size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; 183*0c16b537SWarner Losh return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ 184*0c16b537SWarner Losh } 185*0c16b537SWarner Losh 186*0c16b537SWarner Losh static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, 187*0c16b537SWarner Losh const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, 188*0c16b537SWarner Losh unsigned writeIsSafe) 189*0c16b537SWarner Losh { 190*0c16b537SWarner Losh BYTE* const ostart = (BYTE*) header; 191*0c16b537SWarner Losh BYTE* out = ostart; 192*0c16b537SWarner Losh BYTE* const oend = ostart + headerBufferSize; 193*0c16b537SWarner Losh int nbBits; 194*0c16b537SWarner Losh const int tableSize = 1 << tableLog; 195*0c16b537SWarner Losh int remaining; 196*0c16b537SWarner Losh int threshold; 197*0c16b537SWarner Losh U32 bitStream; 198*0c16b537SWarner Losh int bitCount; 199*0c16b537SWarner Losh unsigned charnum = 0; 200*0c16b537SWarner Losh int previous0 = 0; 201*0c16b537SWarner Losh 202*0c16b537SWarner Losh bitStream = 0; 203*0c16b537SWarner Losh bitCount = 0; 204*0c16b537SWarner Losh /* Table Size */ 205*0c16b537SWarner Losh bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; 206*0c16b537SWarner Losh bitCount += 4; 207*0c16b537SWarner Losh 208*0c16b537SWarner Losh /* Init */ 209*0c16b537SWarner Losh remaining = tableSize+1; /* +1 for extra accuracy */ 210*0c16b537SWarner Losh threshold = tableSize; 211*0c16b537SWarner Losh nbBits = tableLog+1; 212*0c16b537SWarner Losh 213*0c16b537SWarner Losh while (remaining>1) { /* stops at 1 */ 214*0c16b537SWarner Losh if (previous0) { 215*0c16b537SWarner Losh unsigned start = charnum; 216*0c16b537SWarner Losh while (!normalizedCounter[charnum]) charnum++; 217*0c16b537SWarner Losh while (charnum >= start+24) { 218*0c16b537SWarner Losh start+=24; 219*0c16b537SWarner Losh bitStream += 0xFFFFU << bitCount; 220*0c16b537SWarner Losh if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ 221*0c16b537SWarner Losh out[0] = (BYTE) bitStream; 222*0c16b537SWarner Losh out[1] = (BYTE)(bitStream>>8); 223*0c16b537SWarner Losh out+=2; 224*0c16b537SWarner Losh bitStream>>=16; 225*0c16b537SWarner Losh } 226*0c16b537SWarner Losh while (charnum >= start+3) { 227*0c16b537SWarner Losh start+=3; 228*0c16b537SWarner Losh bitStream += 3 << bitCount; 229*0c16b537SWarner Losh bitCount += 2; 230*0c16b537SWarner Losh } 231*0c16b537SWarner Losh bitStream += (charnum-start) << bitCount; 232*0c16b537SWarner Losh bitCount += 2; 233*0c16b537SWarner Losh if (bitCount>16) { 234*0c16b537SWarner Losh if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ 235*0c16b537SWarner Losh out[0] = (BYTE)bitStream; 236*0c16b537SWarner Losh out[1] = (BYTE)(bitStream>>8); 237*0c16b537SWarner Losh out += 2; 238*0c16b537SWarner Losh bitStream >>= 16; 239*0c16b537SWarner Losh bitCount -= 16; 240*0c16b537SWarner Losh } } 241*0c16b537SWarner Losh { int count = normalizedCounter[charnum++]; 242*0c16b537SWarner Losh int const max = (2*threshold-1)-remaining; 243*0c16b537SWarner Losh remaining -= count < 0 ? -count : count; 244*0c16b537SWarner Losh count++; /* +1 for extra accuracy */ 245*0c16b537SWarner Losh if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ 246*0c16b537SWarner Losh bitStream += count << bitCount; 247*0c16b537SWarner Losh bitCount += nbBits; 248*0c16b537SWarner Losh bitCount -= (count<max); 249*0c16b537SWarner Losh previous0 = (count==1); 250*0c16b537SWarner Losh if (remaining<1) return ERROR(GENERIC); 251*0c16b537SWarner Losh while (remaining<threshold) nbBits--, threshold>>=1; 252*0c16b537SWarner Losh } 253*0c16b537SWarner Losh if (bitCount>16) { 254*0c16b537SWarner Losh if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ 255*0c16b537SWarner Losh out[0] = (BYTE)bitStream; 256*0c16b537SWarner Losh out[1] = (BYTE)(bitStream>>8); 257*0c16b537SWarner Losh out += 2; 258*0c16b537SWarner Losh bitStream >>= 16; 259*0c16b537SWarner Losh bitCount -= 16; 260*0c16b537SWarner Losh } } 261*0c16b537SWarner Losh 262*0c16b537SWarner Losh /* flush remaining bitStream */ 263*0c16b537SWarner Losh if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ 264*0c16b537SWarner Losh out[0] = (BYTE)bitStream; 265*0c16b537SWarner Losh out[1] = (BYTE)(bitStream>>8); 266*0c16b537SWarner Losh out+= (bitCount+7) /8; 267*0c16b537SWarner Losh 268*0c16b537SWarner Losh if (charnum > maxSymbolValue + 1) return ERROR(GENERIC); 269*0c16b537SWarner Losh 270*0c16b537SWarner Losh return (out-ostart); 271*0c16b537SWarner Losh } 272*0c16b537SWarner Losh 273*0c16b537SWarner Losh 274*0c16b537SWarner Losh size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 275*0c16b537SWarner Losh { 276*0c16b537SWarner Losh if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ 277*0c16b537SWarner Losh if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ 278*0c16b537SWarner Losh 279*0c16b537SWarner Losh if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) 280*0c16b537SWarner Losh return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); 281*0c16b537SWarner Losh 282*0c16b537SWarner Losh return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); 283*0c16b537SWarner Losh } 284*0c16b537SWarner Losh 285*0c16b537SWarner Losh 286*0c16b537SWarner Losh 287*0c16b537SWarner Losh /*-************************************************************** 288*0c16b537SWarner Losh * Counting histogram 289*0c16b537SWarner Losh ****************************************************************/ 290*0c16b537SWarner Losh /*! FSE_count_simple 291*0c16b537SWarner Losh This function counts byte values within `src`, and store the histogram into table `count`. 292*0c16b537SWarner Losh It doesn't use any additional memory. 293*0c16b537SWarner Losh But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. 294*0c16b537SWarner Losh For this reason, prefer using a table `count` with 256 elements. 295*0c16b537SWarner Losh @return : count of most numerous element 296*0c16b537SWarner Losh */ 297*0c16b537SWarner Losh size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, 298*0c16b537SWarner Losh const void* src, size_t srcSize) 299*0c16b537SWarner Losh { 300*0c16b537SWarner Losh const BYTE* ip = (const BYTE*)src; 301*0c16b537SWarner Losh const BYTE* const end = ip + srcSize; 302*0c16b537SWarner Losh unsigned maxSymbolValue = *maxSymbolValuePtr; 303*0c16b537SWarner Losh unsigned max=0; 304*0c16b537SWarner Losh 305*0c16b537SWarner Losh memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); 306*0c16b537SWarner Losh if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } 307*0c16b537SWarner Losh 308*0c16b537SWarner Losh while (ip<end) count[*ip++]++; 309*0c16b537SWarner Losh 310*0c16b537SWarner Losh while (!count[maxSymbolValue]) maxSymbolValue--; 311*0c16b537SWarner Losh *maxSymbolValuePtr = maxSymbolValue; 312*0c16b537SWarner Losh 313*0c16b537SWarner Losh { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; } 314*0c16b537SWarner Losh 315*0c16b537SWarner Losh return (size_t)max; 316*0c16b537SWarner Losh } 317*0c16b537SWarner Losh 318*0c16b537SWarner Losh 319*0c16b537SWarner Losh /* FSE_count_parallel_wksp() : 320*0c16b537SWarner Losh * Same as FSE_count_parallel(), but using an externally provided scratch buffer. 321*0c16b537SWarner Losh * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ 322*0c16b537SWarner Losh static size_t FSE_count_parallel_wksp( 323*0c16b537SWarner Losh unsigned* count, unsigned* maxSymbolValuePtr, 324*0c16b537SWarner Losh const void* source, size_t sourceSize, 325*0c16b537SWarner Losh unsigned checkMax, unsigned* const workSpace) 326*0c16b537SWarner Losh { 327*0c16b537SWarner Losh const BYTE* ip = (const BYTE*)source; 328*0c16b537SWarner Losh const BYTE* const iend = ip+sourceSize; 329*0c16b537SWarner Losh unsigned maxSymbolValue = *maxSymbolValuePtr; 330*0c16b537SWarner Losh unsigned max=0; 331*0c16b537SWarner Losh U32* const Counting1 = workSpace; 332*0c16b537SWarner Losh U32* const Counting2 = Counting1 + 256; 333*0c16b537SWarner Losh U32* const Counting3 = Counting2 + 256; 334*0c16b537SWarner Losh U32* const Counting4 = Counting3 + 256; 335*0c16b537SWarner Losh 336*0c16b537SWarner Losh memset(Counting1, 0, 4*256*sizeof(unsigned)); 337*0c16b537SWarner Losh 338*0c16b537SWarner Losh /* safety checks */ 339*0c16b537SWarner Losh if (!sourceSize) { 340*0c16b537SWarner Losh memset(count, 0, maxSymbolValue + 1); 341*0c16b537SWarner Losh *maxSymbolValuePtr = 0; 342*0c16b537SWarner Losh return 0; 343*0c16b537SWarner Losh } 344*0c16b537SWarner Losh if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ 345*0c16b537SWarner Losh 346*0c16b537SWarner Losh /* by stripes of 16 bytes */ 347*0c16b537SWarner Losh { U32 cached = MEM_read32(ip); ip += 4; 348*0c16b537SWarner Losh while (ip < iend-15) { 349*0c16b537SWarner Losh U32 c = cached; cached = MEM_read32(ip); ip += 4; 350*0c16b537SWarner Losh Counting1[(BYTE) c ]++; 351*0c16b537SWarner Losh Counting2[(BYTE)(c>>8) ]++; 352*0c16b537SWarner Losh Counting3[(BYTE)(c>>16)]++; 353*0c16b537SWarner Losh Counting4[ c>>24 ]++; 354*0c16b537SWarner Losh c = cached; cached = MEM_read32(ip); ip += 4; 355*0c16b537SWarner Losh Counting1[(BYTE) c ]++; 356*0c16b537SWarner Losh Counting2[(BYTE)(c>>8) ]++; 357*0c16b537SWarner Losh Counting3[(BYTE)(c>>16)]++; 358*0c16b537SWarner Losh Counting4[ c>>24 ]++; 359*0c16b537SWarner Losh c = cached; cached = MEM_read32(ip); ip += 4; 360*0c16b537SWarner Losh Counting1[(BYTE) c ]++; 361*0c16b537SWarner Losh Counting2[(BYTE)(c>>8) ]++; 362*0c16b537SWarner Losh Counting3[(BYTE)(c>>16)]++; 363*0c16b537SWarner Losh Counting4[ c>>24 ]++; 364*0c16b537SWarner Losh c = cached; cached = MEM_read32(ip); ip += 4; 365*0c16b537SWarner Losh Counting1[(BYTE) c ]++; 366*0c16b537SWarner Losh Counting2[(BYTE)(c>>8) ]++; 367*0c16b537SWarner Losh Counting3[(BYTE)(c>>16)]++; 368*0c16b537SWarner Losh Counting4[ c>>24 ]++; 369*0c16b537SWarner Losh } 370*0c16b537SWarner Losh ip-=4; 371*0c16b537SWarner Losh } 372*0c16b537SWarner Losh 373*0c16b537SWarner Losh /* finish last symbols */ 374*0c16b537SWarner Losh while (ip<iend) Counting1[*ip++]++; 375*0c16b537SWarner Losh 376*0c16b537SWarner Losh if (checkMax) { /* verify stats will fit into destination table */ 377*0c16b537SWarner Losh U32 s; for (s=255; s>maxSymbolValue; s--) { 378*0c16b537SWarner Losh Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; 379*0c16b537SWarner Losh if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); 380*0c16b537SWarner Losh } } 381*0c16b537SWarner Losh 382*0c16b537SWarner Losh { U32 s; for (s=0; s<=maxSymbolValue; s++) { 383*0c16b537SWarner Losh count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; 384*0c16b537SWarner Losh if (count[s] > max) max = count[s]; 385*0c16b537SWarner Losh } } 386*0c16b537SWarner Losh 387*0c16b537SWarner Losh while (!count[maxSymbolValue]) maxSymbolValue--; 388*0c16b537SWarner Losh *maxSymbolValuePtr = maxSymbolValue; 389*0c16b537SWarner Losh return (size_t)max; 390*0c16b537SWarner Losh } 391*0c16b537SWarner Losh 392*0c16b537SWarner Losh /* FSE_countFast_wksp() : 393*0c16b537SWarner Losh * Same as FSE_countFast(), but using an externally provided scratch buffer. 394*0c16b537SWarner Losh * `workSpace` size must be table of >= `1024` unsigned */ 395*0c16b537SWarner Losh size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 396*0c16b537SWarner Losh const void* source, size_t sourceSize, unsigned* workSpace) 397*0c16b537SWarner Losh { 398*0c16b537SWarner Losh if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); 399*0c16b537SWarner Losh return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); 400*0c16b537SWarner Losh } 401*0c16b537SWarner Losh 402*0c16b537SWarner Losh /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ 403*0c16b537SWarner Losh size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, 404*0c16b537SWarner Losh const void* source, size_t sourceSize) 405*0c16b537SWarner Losh { 406*0c16b537SWarner Losh unsigned tmpCounters[1024]; 407*0c16b537SWarner Losh return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); 408*0c16b537SWarner Losh } 409*0c16b537SWarner Losh 410*0c16b537SWarner Losh /* FSE_count_wksp() : 411*0c16b537SWarner Losh * Same as FSE_count(), but using an externally provided scratch buffer. 412*0c16b537SWarner Losh * `workSpace` size must be table of >= `1024` unsigned */ 413*0c16b537SWarner Losh size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 414*0c16b537SWarner Losh const void* source, size_t sourceSize, unsigned* workSpace) 415*0c16b537SWarner Losh { 416*0c16b537SWarner Losh if (*maxSymbolValuePtr < 255) 417*0c16b537SWarner Losh return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); 418*0c16b537SWarner Losh *maxSymbolValuePtr = 255; 419*0c16b537SWarner Losh return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); 420*0c16b537SWarner Losh } 421*0c16b537SWarner Losh 422*0c16b537SWarner Losh size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, 423*0c16b537SWarner Losh const void* src, size_t srcSize) 424*0c16b537SWarner Losh { 425*0c16b537SWarner Losh unsigned tmpCounters[1024]; 426*0c16b537SWarner Losh return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); 427*0c16b537SWarner Losh } 428*0c16b537SWarner Losh 429*0c16b537SWarner Losh 430*0c16b537SWarner Losh 431*0c16b537SWarner Losh /*-************************************************************** 432*0c16b537SWarner Losh * FSE Compression Code 433*0c16b537SWarner Losh ****************************************************************/ 434*0c16b537SWarner Losh /*! FSE_sizeof_CTable() : 435*0c16b537SWarner Losh FSE_CTable is a variable size structure which contains : 436*0c16b537SWarner Losh `U16 tableLog;` 437*0c16b537SWarner Losh `U16 maxSymbolValue;` 438*0c16b537SWarner Losh `U16 nextStateNumber[1 << tableLog];` // This size is variable 439*0c16b537SWarner Losh `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable 440*0c16b537SWarner Losh Allocation is manual (C standard does not support variable-size structures). 441*0c16b537SWarner Losh */ 442*0c16b537SWarner Losh size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog) 443*0c16b537SWarner Losh { 444*0c16b537SWarner Losh if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); 445*0c16b537SWarner Losh return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); 446*0c16b537SWarner Losh } 447*0c16b537SWarner Losh 448*0c16b537SWarner Losh FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) 449*0c16b537SWarner Losh { 450*0c16b537SWarner Losh size_t size; 451*0c16b537SWarner Losh if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; 452*0c16b537SWarner Losh size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); 453*0c16b537SWarner Losh return (FSE_CTable*)malloc(size); 454*0c16b537SWarner Losh } 455*0c16b537SWarner Losh 456*0c16b537SWarner Losh void FSE_freeCTable (FSE_CTable* ct) { free(ct); } 457*0c16b537SWarner Losh 458*0c16b537SWarner Losh /* provides the minimum logSize to safely represent a distribution */ 459*0c16b537SWarner Losh static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) 460*0c16b537SWarner Losh { 461*0c16b537SWarner Losh U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; 462*0c16b537SWarner Losh U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; 463*0c16b537SWarner Losh U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; 464*0c16b537SWarner Losh assert(srcSize > 1); /* Not supported, RLE should be used instead */ 465*0c16b537SWarner Losh return minBits; 466*0c16b537SWarner Losh } 467*0c16b537SWarner Losh 468*0c16b537SWarner Losh unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) 469*0c16b537SWarner Losh { 470*0c16b537SWarner Losh U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; 471*0c16b537SWarner Losh U32 tableLog = maxTableLog; 472*0c16b537SWarner Losh U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); 473*0c16b537SWarner Losh assert(srcSize > 1); /* Not supported, RLE should be used instead */ 474*0c16b537SWarner Losh if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; 475*0c16b537SWarner Losh if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ 476*0c16b537SWarner Losh if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ 477*0c16b537SWarner Losh if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; 478*0c16b537SWarner Losh if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; 479*0c16b537SWarner Losh return tableLog; 480*0c16b537SWarner Losh } 481*0c16b537SWarner Losh 482*0c16b537SWarner Losh unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) 483*0c16b537SWarner Losh { 484*0c16b537SWarner Losh return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); 485*0c16b537SWarner Losh } 486*0c16b537SWarner Losh 487*0c16b537SWarner Losh 488*0c16b537SWarner Losh /* Secondary normalization method. 489*0c16b537SWarner Losh To be used when primary method fails. */ 490*0c16b537SWarner Losh 491*0c16b537SWarner Losh static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) 492*0c16b537SWarner Losh { 493*0c16b537SWarner Losh short const NOT_YET_ASSIGNED = -2; 494*0c16b537SWarner Losh U32 s; 495*0c16b537SWarner Losh U32 distributed = 0; 496*0c16b537SWarner Losh U32 ToDistribute; 497*0c16b537SWarner Losh 498*0c16b537SWarner Losh /* Init */ 499*0c16b537SWarner Losh U32 const lowThreshold = (U32)(total >> tableLog); 500*0c16b537SWarner Losh U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); 501*0c16b537SWarner Losh 502*0c16b537SWarner Losh for (s=0; s<=maxSymbolValue; s++) { 503*0c16b537SWarner Losh if (count[s] == 0) { 504*0c16b537SWarner Losh norm[s]=0; 505*0c16b537SWarner Losh continue; 506*0c16b537SWarner Losh } 507*0c16b537SWarner Losh if (count[s] <= lowThreshold) { 508*0c16b537SWarner Losh norm[s] = -1; 509*0c16b537SWarner Losh distributed++; 510*0c16b537SWarner Losh total -= count[s]; 511*0c16b537SWarner Losh continue; 512*0c16b537SWarner Losh } 513*0c16b537SWarner Losh if (count[s] <= lowOne) { 514*0c16b537SWarner Losh norm[s] = 1; 515*0c16b537SWarner Losh distributed++; 516*0c16b537SWarner Losh total -= count[s]; 517*0c16b537SWarner Losh continue; 518*0c16b537SWarner Losh } 519*0c16b537SWarner Losh 520*0c16b537SWarner Losh norm[s]=NOT_YET_ASSIGNED; 521*0c16b537SWarner Losh } 522*0c16b537SWarner Losh ToDistribute = (1 << tableLog) - distributed; 523*0c16b537SWarner Losh 524*0c16b537SWarner Losh if ((total / ToDistribute) > lowOne) { 525*0c16b537SWarner Losh /* risk of rounding to zero */ 526*0c16b537SWarner Losh lowOne = (U32)((total * 3) / (ToDistribute * 2)); 527*0c16b537SWarner Losh for (s=0; s<=maxSymbolValue; s++) { 528*0c16b537SWarner Losh if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { 529*0c16b537SWarner Losh norm[s] = 1; 530*0c16b537SWarner Losh distributed++; 531*0c16b537SWarner Losh total -= count[s]; 532*0c16b537SWarner Losh continue; 533*0c16b537SWarner Losh } } 534*0c16b537SWarner Losh ToDistribute = (1 << tableLog) - distributed; 535*0c16b537SWarner Losh } 536*0c16b537SWarner Losh 537*0c16b537SWarner Losh if (distributed == maxSymbolValue+1) { 538*0c16b537SWarner Losh /* all values are pretty poor; 539*0c16b537SWarner Losh probably incompressible data (should have already been detected); 540*0c16b537SWarner Losh find max, then give all remaining points to max */ 541*0c16b537SWarner Losh U32 maxV = 0, maxC = 0; 542*0c16b537SWarner Losh for (s=0; s<=maxSymbolValue; s++) 543*0c16b537SWarner Losh if (count[s] > maxC) maxV=s, maxC=count[s]; 544*0c16b537SWarner Losh norm[maxV] += (short)ToDistribute; 545*0c16b537SWarner Losh return 0; 546*0c16b537SWarner Losh } 547*0c16b537SWarner Losh 548*0c16b537SWarner Losh if (total == 0) { 549*0c16b537SWarner Losh /* all of the symbols were low enough for the lowOne or lowThreshold */ 550*0c16b537SWarner Losh for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) 551*0c16b537SWarner Losh if (norm[s] > 0) ToDistribute--, norm[s]++; 552*0c16b537SWarner Losh return 0; 553*0c16b537SWarner Losh } 554*0c16b537SWarner Losh 555*0c16b537SWarner Losh { U64 const vStepLog = 62 - tableLog; 556*0c16b537SWarner Losh U64 const mid = (1ULL << (vStepLog-1)) - 1; 557*0c16b537SWarner Losh U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */ 558*0c16b537SWarner Losh U64 tmpTotal = mid; 559*0c16b537SWarner Losh for (s=0; s<=maxSymbolValue; s++) { 560*0c16b537SWarner Losh if (norm[s]==NOT_YET_ASSIGNED) { 561*0c16b537SWarner Losh U64 const end = tmpTotal + (count[s] * rStep); 562*0c16b537SWarner Losh U32 const sStart = (U32)(tmpTotal >> vStepLog); 563*0c16b537SWarner Losh U32 const sEnd = (U32)(end >> vStepLog); 564*0c16b537SWarner Losh U32 const weight = sEnd - sStart; 565*0c16b537SWarner Losh if (weight < 1) 566*0c16b537SWarner Losh return ERROR(GENERIC); 567*0c16b537SWarner Losh norm[s] = (short)weight; 568*0c16b537SWarner Losh tmpTotal = end; 569*0c16b537SWarner Losh } } } 570*0c16b537SWarner Losh 571*0c16b537SWarner Losh return 0; 572*0c16b537SWarner Losh } 573*0c16b537SWarner Losh 574*0c16b537SWarner Losh 575*0c16b537SWarner Losh size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, 576*0c16b537SWarner Losh const unsigned* count, size_t total, 577*0c16b537SWarner Losh unsigned maxSymbolValue) 578*0c16b537SWarner Losh { 579*0c16b537SWarner Losh /* Sanity checks */ 580*0c16b537SWarner Losh if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; 581*0c16b537SWarner Losh if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ 582*0c16b537SWarner Losh if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ 583*0c16b537SWarner Losh if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ 584*0c16b537SWarner Losh 585*0c16b537SWarner Losh { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; 586*0c16b537SWarner Losh U64 const scale = 62 - tableLog; 587*0c16b537SWarner Losh U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ 588*0c16b537SWarner Losh U64 const vStep = 1ULL<<(scale-20); 589*0c16b537SWarner Losh int stillToDistribute = 1<<tableLog; 590*0c16b537SWarner Losh unsigned s; 591*0c16b537SWarner Losh unsigned largest=0; 592*0c16b537SWarner Losh short largestP=0; 593*0c16b537SWarner Losh U32 lowThreshold = (U32)(total >> tableLog); 594*0c16b537SWarner Losh 595*0c16b537SWarner Losh for (s=0; s<=maxSymbolValue; s++) { 596*0c16b537SWarner Losh if (count[s] == total) return 0; /* rle special case */ 597*0c16b537SWarner Losh if (count[s] == 0) { normalizedCounter[s]=0; continue; } 598*0c16b537SWarner Losh if (count[s] <= lowThreshold) { 599*0c16b537SWarner Losh normalizedCounter[s] = -1; 600*0c16b537SWarner Losh stillToDistribute--; 601*0c16b537SWarner Losh } else { 602*0c16b537SWarner Losh short proba = (short)((count[s]*step) >> scale); 603*0c16b537SWarner Losh if (proba<8) { 604*0c16b537SWarner Losh U64 restToBeat = vStep * rtbTable[proba]; 605*0c16b537SWarner Losh proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat; 606*0c16b537SWarner Losh } 607*0c16b537SWarner Losh if (proba > largestP) largestP=proba, largest=s; 608*0c16b537SWarner Losh normalizedCounter[s] = proba; 609*0c16b537SWarner Losh stillToDistribute -= proba; 610*0c16b537SWarner Losh } } 611*0c16b537SWarner Losh if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { 612*0c16b537SWarner Losh /* corner case, need another normalization method */ 613*0c16b537SWarner Losh size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); 614*0c16b537SWarner Losh if (FSE_isError(errorCode)) return errorCode; 615*0c16b537SWarner Losh } 616*0c16b537SWarner Losh else normalizedCounter[largest] += (short)stillToDistribute; 617*0c16b537SWarner Losh } 618*0c16b537SWarner Losh 619*0c16b537SWarner Losh #if 0 620*0c16b537SWarner Losh { /* Print Table (debug) */ 621*0c16b537SWarner Losh U32 s; 622*0c16b537SWarner Losh U32 nTotal = 0; 623*0c16b537SWarner Losh for (s=0; s<=maxSymbolValue; s++) 624*0c16b537SWarner Losh printf("%3i: %4i \n", s, normalizedCounter[s]); 625*0c16b537SWarner Losh for (s=0; s<=maxSymbolValue; s++) 626*0c16b537SWarner Losh nTotal += abs(normalizedCounter[s]); 627*0c16b537SWarner Losh if (nTotal != (1U<<tableLog)) 628*0c16b537SWarner Losh printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog); 629*0c16b537SWarner Losh getchar(); 630*0c16b537SWarner Losh } 631*0c16b537SWarner Losh #endif 632*0c16b537SWarner Losh 633*0c16b537SWarner Losh return tableLog; 634*0c16b537SWarner Losh } 635*0c16b537SWarner Losh 636*0c16b537SWarner Losh 637*0c16b537SWarner Losh /* fake FSE_CTable, for raw (uncompressed) input */ 638*0c16b537SWarner Losh size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) 639*0c16b537SWarner Losh { 640*0c16b537SWarner Losh const unsigned tableSize = 1 << nbBits; 641*0c16b537SWarner Losh const unsigned tableMask = tableSize - 1; 642*0c16b537SWarner Losh const unsigned maxSymbolValue = tableMask; 643*0c16b537SWarner Losh void* const ptr = ct; 644*0c16b537SWarner Losh U16* const tableU16 = ( (U16*) ptr) + 2; 645*0c16b537SWarner Losh void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */ 646*0c16b537SWarner Losh FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); 647*0c16b537SWarner Losh unsigned s; 648*0c16b537SWarner Losh 649*0c16b537SWarner Losh /* Sanity checks */ 650*0c16b537SWarner Losh if (nbBits < 1) return ERROR(GENERIC); /* min size */ 651*0c16b537SWarner Losh 652*0c16b537SWarner Losh /* header */ 653*0c16b537SWarner Losh tableU16[-2] = (U16) nbBits; 654*0c16b537SWarner Losh tableU16[-1] = (U16) maxSymbolValue; 655*0c16b537SWarner Losh 656*0c16b537SWarner Losh /* Build table */ 657*0c16b537SWarner Losh for (s=0; s<tableSize; s++) 658*0c16b537SWarner Losh tableU16[s] = (U16)(tableSize + s); 659*0c16b537SWarner Losh 660*0c16b537SWarner Losh /* Build Symbol Transformation Table */ 661*0c16b537SWarner Losh { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); 662*0c16b537SWarner Losh for (s=0; s<=maxSymbolValue; s++) { 663*0c16b537SWarner Losh symbolTT[s].deltaNbBits = deltaNbBits; 664*0c16b537SWarner Losh symbolTT[s].deltaFindState = s-1; 665*0c16b537SWarner Losh } } 666*0c16b537SWarner Losh 667*0c16b537SWarner Losh return 0; 668*0c16b537SWarner Losh } 669*0c16b537SWarner Losh 670*0c16b537SWarner Losh /* fake FSE_CTable, for rle input (always same symbol) */ 671*0c16b537SWarner Losh size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) 672*0c16b537SWarner Losh { 673*0c16b537SWarner Losh void* ptr = ct; 674*0c16b537SWarner Losh U16* tableU16 = ( (U16*) ptr) + 2; 675*0c16b537SWarner Losh void* FSCTptr = (U32*)ptr + 2; 676*0c16b537SWarner Losh FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr; 677*0c16b537SWarner Losh 678*0c16b537SWarner Losh /* header */ 679*0c16b537SWarner Losh tableU16[-2] = (U16) 0; 680*0c16b537SWarner Losh tableU16[-1] = (U16) symbolValue; 681*0c16b537SWarner Losh 682*0c16b537SWarner Losh /* Build table */ 683*0c16b537SWarner Losh tableU16[0] = 0; 684*0c16b537SWarner Losh tableU16[1] = 0; /* just in case */ 685*0c16b537SWarner Losh 686*0c16b537SWarner Losh /* Build Symbol Transformation Table */ 687*0c16b537SWarner Losh symbolTT[symbolValue].deltaNbBits = 0; 688*0c16b537SWarner Losh symbolTT[symbolValue].deltaFindState = 0; 689*0c16b537SWarner Losh 690*0c16b537SWarner Losh return 0; 691*0c16b537SWarner Losh } 692*0c16b537SWarner Losh 693*0c16b537SWarner Losh 694*0c16b537SWarner Losh static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, 695*0c16b537SWarner Losh const void* src, size_t srcSize, 696*0c16b537SWarner Losh const FSE_CTable* ct, const unsigned fast) 697*0c16b537SWarner Losh { 698*0c16b537SWarner Losh const BYTE* const istart = (const BYTE*) src; 699*0c16b537SWarner Losh const BYTE* const iend = istart + srcSize; 700*0c16b537SWarner Losh const BYTE* ip=iend; 701*0c16b537SWarner Losh 702*0c16b537SWarner Losh BIT_CStream_t bitC; 703*0c16b537SWarner Losh FSE_CState_t CState1, CState2; 704*0c16b537SWarner Losh 705*0c16b537SWarner Losh /* init */ 706*0c16b537SWarner Losh if (srcSize <= 2) return 0; 707*0c16b537SWarner Losh { size_t const initError = BIT_initCStream(&bitC, dst, dstSize); 708*0c16b537SWarner Losh if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ } 709*0c16b537SWarner Losh 710*0c16b537SWarner Losh #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) 711*0c16b537SWarner Losh 712*0c16b537SWarner Losh if (srcSize & 1) { 713*0c16b537SWarner Losh FSE_initCState2(&CState1, ct, *--ip); 714*0c16b537SWarner Losh FSE_initCState2(&CState2, ct, *--ip); 715*0c16b537SWarner Losh FSE_encodeSymbol(&bitC, &CState1, *--ip); 716*0c16b537SWarner Losh FSE_FLUSHBITS(&bitC); 717*0c16b537SWarner Losh } else { 718*0c16b537SWarner Losh FSE_initCState2(&CState2, ct, *--ip); 719*0c16b537SWarner Losh FSE_initCState2(&CState1, ct, *--ip); 720*0c16b537SWarner Losh } 721*0c16b537SWarner Losh 722*0c16b537SWarner Losh /* join to mod 4 */ 723*0c16b537SWarner Losh srcSize -= 2; 724*0c16b537SWarner Losh if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ 725*0c16b537SWarner Losh FSE_encodeSymbol(&bitC, &CState2, *--ip); 726*0c16b537SWarner Losh FSE_encodeSymbol(&bitC, &CState1, *--ip); 727*0c16b537SWarner Losh FSE_FLUSHBITS(&bitC); 728*0c16b537SWarner Losh } 729*0c16b537SWarner Losh 730*0c16b537SWarner Losh /* 2 or 4 encoding per loop */ 731*0c16b537SWarner Losh while ( ip>istart ) { 732*0c16b537SWarner Losh 733*0c16b537SWarner Losh FSE_encodeSymbol(&bitC, &CState2, *--ip); 734*0c16b537SWarner Losh 735*0c16b537SWarner Losh if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ 736*0c16b537SWarner Losh FSE_FLUSHBITS(&bitC); 737*0c16b537SWarner Losh 738*0c16b537SWarner Losh FSE_encodeSymbol(&bitC, &CState1, *--ip); 739*0c16b537SWarner Losh 740*0c16b537SWarner Losh if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ 741*0c16b537SWarner Losh FSE_encodeSymbol(&bitC, &CState2, *--ip); 742*0c16b537SWarner Losh FSE_encodeSymbol(&bitC, &CState1, *--ip); 743*0c16b537SWarner Losh } 744*0c16b537SWarner Losh 745*0c16b537SWarner Losh FSE_FLUSHBITS(&bitC); 746*0c16b537SWarner Losh } 747*0c16b537SWarner Losh 748*0c16b537SWarner Losh FSE_flushCState(&bitC, &CState2); 749*0c16b537SWarner Losh FSE_flushCState(&bitC, &CState1); 750*0c16b537SWarner Losh return BIT_closeCStream(&bitC); 751*0c16b537SWarner Losh } 752*0c16b537SWarner Losh 753*0c16b537SWarner Losh size_t FSE_compress_usingCTable (void* dst, size_t dstSize, 754*0c16b537SWarner Losh const void* src, size_t srcSize, 755*0c16b537SWarner Losh const FSE_CTable* ct) 756*0c16b537SWarner Losh { 757*0c16b537SWarner Losh unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); 758*0c16b537SWarner Losh 759*0c16b537SWarner Losh if (fast) 760*0c16b537SWarner Losh return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); 761*0c16b537SWarner Losh else 762*0c16b537SWarner Losh return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); 763*0c16b537SWarner Losh } 764*0c16b537SWarner Losh 765*0c16b537SWarner Losh 766*0c16b537SWarner Losh size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } 767*0c16b537SWarner Losh 768*0c16b537SWarner Losh #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e 769*0c16b537SWarner Losh #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } 770*0c16b537SWarner Losh 771*0c16b537SWarner Losh /* FSE_compress_wksp() : 772*0c16b537SWarner Losh * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). 773*0c16b537SWarner Losh * `wkspSize` size must be `(1<<tableLog)`. 774*0c16b537SWarner Losh */ 775*0c16b537SWarner Losh size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) 776*0c16b537SWarner Losh { 777*0c16b537SWarner Losh BYTE* const ostart = (BYTE*) dst; 778*0c16b537SWarner Losh BYTE* op = ostart; 779*0c16b537SWarner Losh BYTE* const oend = ostart + dstSize; 780*0c16b537SWarner Losh 781*0c16b537SWarner Losh U32 count[FSE_MAX_SYMBOL_VALUE+1]; 782*0c16b537SWarner Losh S16 norm[FSE_MAX_SYMBOL_VALUE+1]; 783*0c16b537SWarner Losh FSE_CTable* CTable = (FSE_CTable*)workSpace; 784*0c16b537SWarner Losh size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue); 785*0c16b537SWarner Losh void* scratchBuffer = (void*)(CTable + CTableSize); 786*0c16b537SWarner Losh size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable)); 787*0c16b537SWarner Losh 788*0c16b537SWarner Losh /* init conditions */ 789*0c16b537SWarner Losh if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge); 790*0c16b537SWarner Losh if (srcSize <= 1) return 0; /* Not compressible */ 791*0c16b537SWarner Losh if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; 792*0c16b537SWarner Losh if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; 793*0c16b537SWarner Losh 794*0c16b537SWarner Losh /* Scan input and build symbol stats */ 795*0c16b537SWarner Losh { CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) ); 796*0c16b537SWarner Losh if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */ 797*0c16b537SWarner Losh if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ 798*0c16b537SWarner Losh if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ 799*0c16b537SWarner Losh } 800*0c16b537SWarner Losh 801*0c16b537SWarner Losh tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); 802*0c16b537SWarner Losh CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); 803*0c16b537SWarner Losh 804*0c16b537SWarner Losh /* Write table description header */ 805*0c16b537SWarner Losh { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); 806*0c16b537SWarner Losh op += nc_err; 807*0c16b537SWarner Losh } 808*0c16b537SWarner Losh 809*0c16b537SWarner Losh /* Compress */ 810*0c16b537SWarner Losh CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); 811*0c16b537SWarner Losh { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); 812*0c16b537SWarner Losh if (cSize == 0) return 0; /* not enough space for compressed data */ 813*0c16b537SWarner Losh op += cSize; 814*0c16b537SWarner Losh } 815*0c16b537SWarner Losh 816*0c16b537SWarner Losh /* check compressibility */ 817*0c16b537SWarner Losh if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; 818*0c16b537SWarner Losh 819*0c16b537SWarner Losh return op-ostart; 820*0c16b537SWarner Losh } 821*0c16b537SWarner Losh 822*0c16b537SWarner Losh typedef struct { 823*0c16b537SWarner Losh FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; 824*0c16b537SWarner Losh BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; 825*0c16b537SWarner Losh } fseWkspMax_t; 826*0c16b537SWarner Losh 827*0c16b537SWarner Losh size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) 828*0c16b537SWarner Losh { 829*0c16b537SWarner Losh fseWkspMax_t scratchBuffer; 830*0c16b537SWarner Losh FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ 831*0c16b537SWarner Losh if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); 832*0c16b537SWarner Losh return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); 833*0c16b537SWarner Losh } 834*0c16b537SWarner Losh 835*0c16b537SWarner Losh size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 836*0c16b537SWarner Losh { 837*0c16b537SWarner Losh return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); 838*0c16b537SWarner Losh } 839*0c16b537SWarner Losh 840*0c16b537SWarner Losh 841*0c16b537SWarner Losh #endif /* FSE_COMMONDEFS_ONLY */ 842