xref: /freebsd/sys/contrib/zstd/lib/compress/fse_compress.c (revision 19fcbaf1424b464269f1a7621fab747bb75afc36)
10c16b537SWarner Losh /* ******************************************************************
20c16b537SWarner Losh    FSE : Finite State Entropy encoder
30c16b537SWarner Losh    Copyright (C) 2013-2015, Yann Collet.
40c16b537SWarner Losh 
50c16b537SWarner Losh    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
60c16b537SWarner Losh 
70c16b537SWarner Losh    Redistribution and use in source and binary forms, with or without
80c16b537SWarner Losh    modification, are permitted provided that the following conditions are
90c16b537SWarner Losh    met:
100c16b537SWarner Losh 
110c16b537SWarner Losh        * Redistributions of source code must retain the above copyright
120c16b537SWarner Losh    notice, this list of conditions and the following disclaimer.
130c16b537SWarner Losh        * Redistributions in binary form must reproduce the above
140c16b537SWarner Losh    copyright notice, this list of conditions and the following disclaimer
150c16b537SWarner Losh    in the documentation and/or other materials provided with the
160c16b537SWarner Losh    distribution.
170c16b537SWarner Losh 
180c16b537SWarner Losh    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
190c16b537SWarner Losh    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
200c16b537SWarner Losh    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
210c16b537SWarner Losh    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
220c16b537SWarner Losh    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
230c16b537SWarner Losh    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
240c16b537SWarner Losh    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
250c16b537SWarner Losh    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
260c16b537SWarner Losh    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
270c16b537SWarner Losh    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
280c16b537SWarner Losh    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
290c16b537SWarner Losh 
300c16b537SWarner Losh     You can contact the author at :
310c16b537SWarner Losh     - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
320c16b537SWarner Losh     - Public forum : https://groups.google.com/forum/#!forum/lz4c
330c16b537SWarner Losh ****************************************************************** */
340c16b537SWarner Losh 
350c16b537SWarner Losh /* **************************************************************
360c16b537SWarner Losh *  Includes
370c16b537SWarner Losh ****************************************************************/
380c16b537SWarner Losh #include <stdlib.h>     /* malloc, free, qsort */
390c16b537SWarner Losh #include <string.h>     /* memcpy, memset */
400c16b537SWarner Losh #include <stdio.h>      /* printf (debug) */
410c16b537SWarner Losh #include "bitstream.h"
420c16b537SWarner Losh #include "compiler.h"
430c16b537SWarner Losh #define FSE_STATIC_LINKING_ONLY
440c16b537SWarner Losh #include "fse.h"
450c16b537SWarner Losh #include "error_private.h"
460c16b537SWarner Losh 
470c16b537SWarner Losh 
480c16b537SWarner Losh /* **************************************************************
490c16b537SWarner Losh *  Error Management
500c16b537SWarner Losh ****************************************************************/
510c16b537SWarner Losh #define FSE_isError ERR_isError
520c16b537SWarner Losh #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
530c16b537SWarner Losh 
540c16b537SWarner Losh 
550c16b537SWarner Losh /* **************************************************************
560c16b537SWarner Losh *  Templates
570c16b537SWarner Losh ****************************************************************/
580c16b537SWarner Losh /*
590c16b537SWarner Losh   designed to be included
600c16b537SWarner Losh   for type-specific functions (template emulation in C)
610c16b537SWarner Losh   Objective is to write these functions only once, for improved maintenance
620c16b537SWarner Losh */
630c16b537SWarner Losh 
640c16b537SWarner Losh /* safety checks */
650c16b537SWarner Losh #ifndef FSE_FUNCTION_EXTENSION
660c16b537SWarner Losh #  error "FSE_FUNCTION_EXTENSION must be defined"
670c16b537SWarner Losh #endif
680c16b537SWarner Losh #ifndef FSE_FUNCTION_TYPE
690c16b537SWarner Losh #  error "FSE_FUNCTION_TYPE must be defined"
700c16b537SWarner Losh #endif
710c16b537SWarner Losh 
720c16b537SWarner Losh /* Function names */
730c16b537SWarner Losh #define FSE_CAT(X,Y) X##Y
740c16b537SWarner Losh #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
750c16b537SWarner Losh #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
760c16b537SWarner Losh 
770c16b537SWarner Losh 
780c16b537SWarner Losh /* Function templates */
790c16b537SWarner Losh 
800c16b537SWarner Losh /* FSE_buildCTable_wksp() :
810c16b537SWarner Losh  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
820c16b537SWarner Losh  * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
830c16b537SWarner Losh  * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
840c16b537SWarner Losh  */
850c16b537SWarner Losh size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
860c16b537SWarner Losh {
870c16b537SWarner Losh     U32 const tableSize = 1 << tableLog;
880c16b537SWarner Losh     U32 const tableMask = tableSize - 1;
890c16b537SWarner Losh     void* const ptr = ct;
900c16b537SWarner Losh     U16* const tableU16 = ( (U16*) ptr) + 2;
910c16b537SWarner Losh     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
920c16b537SWarner Losh     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
930c16b537SWarner Losh     U32 const step = FSE_TABLESTEP(tableSize);
940c16b537SWarner Losh     U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
950c16b537SWarner Losh 
960c16b537SWarner Losh     FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
970c16b537SWarner Losh     U32 highThreshold = tableSize-1;
980c16b537SWarner Losh 
990c16b537SWarner Losh     /* CTable header */
1000c16b537SWarner Losh     if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
1010c16b537SWarner Losh     tableU16[-2] = (U16) tableLog;
1020c16b537SWarner Losh     tableU16[-1] = (U16) maxSymbolValue;
1030c16b537SWarner Losh 
1040c16b537SWarner Losh     /* For explanations on how to distribute symbol values over the table :
1050c16b537SWarner Losh     *  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
1060c16b537SWarner Losh 
1070c16b537SWarner Losh     /* symbol start positions */
1080c16b537SWarner Losh     {   U32 u;
1090c16b537SWarner Losh         cumul[0] = 0;
1100c16b537SWarner Losh         for (u=1; u<=maxSymbolValue+1; u++) {
1110c16b537SWarner Losh             if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
1120c16b537SWarner Losh                 cumul[u] = cumul[u-1] + 1;
1130c16b537SWarner Losh                 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
1140c16b537SWarner Losh             } else {
1150c16b537SWarner Losh                 cumul[u] = cumul[u-1] + normalizedCounter[u-1];
1160c16b537SWarner Losh         }   }
1170c16b537SWarner Losh         cumul[maxSymbolValue+1] = tableSize+1;
1180c16b537SWarner Losh     }
1190c16b537SWarner Losh 
1200c16b537SWarner Losh     /* Spread symbols */
1210c16b537SWarner Losh     {   U32 position = 0;
1220c16b537SWarner Losh         U32 symbol;
1230c16b537SWarner Losh         for (symbol=0; symbol<=maxSymbolValue; symbol++) {
1240c16b537SWarner Losh             int nbOccurences;
1250c16b537SWarner Losh             for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) {
1260c16b537SWarner Losh                 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
1270c16b537SWarner Losh                 position = (position + step) & tableMask;
1280c16b537SWarner Losh                 while (position > highThreshold) position = (position + step) & tableMask;   /* Low proba area */
1290c16b537SWarner Losh         }   }
1300c16b537SWarner Losh 
1310c16b537SWarner Losh         if (position!=0) return ERROR(GENERIC);   /* Must have gone through all positions */
1320c16b537SWarner Losh     }
1330c16b537SWarner Losh 
1340c16b537SWarner Losh     /* Build table */
1350c16b537SWarner Losh     {   U32 u; for (u=0; u<tableSize; u++) {
1360c16b537SWarner Losh         FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
1370c16b537SWarner Losh         tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
1380c16b537SWarner Losh     }   }
1390c16b537SWarner Losh 
1400c16b537SWarner Losh     /* Build Symbol Transformation Table */
1410c16b537SWarner Losh     {   unsigned total = 0;
1420c16b537SWarner Losh         unsigned s;
1430c16b537SWarner Losh         for (s=0; s<=maxSymbolValue; s++) {
1440c16b537SWarner Losh             switch (normalizedCounter[s])
1450c16b537SWarner Losh             {
1460c16b537SWarner Losh             case  0: break;
1470c16b537SWarner Losh 
1480c16b537SWarner Losh             case -1:
1490c16b537SWarner Losh             case  1:
1500c16b537SWarner Losh                 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
1510c16b537SWarner Losh                 symbolTT[s].deltaFindState = total - 1;
1520c16b537SWarner Losh                 total ++;
1530c16b537SWarner Losh                 break;
1540c16b537SWarner Losh             default :
1550c16b537SWarner Losh                 {
1560c16b537SWarner Losh                     U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
1570c16b537SWarner Losh                     U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
1580c16b537SWarner Losh                     symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
1590c16b537SWarner Losh                     symbolTT[s].deltaFindState = total - normalizedCounter[s];
1600c16b537SWarner Losh                     total +=  normalizedCounter[s];
1610c16b537SWarner Losh     }   }   }   }
1620c16b537SWarner Losh 
1630c16b537SWarner Losh     return 0;
1640c16b537SWarner Losh }
1650c16b537SWarner Losh 
1660c16b537SWarner Losh 
1670c16b537SWarner Losh size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
1680c16b537SWarner Losh {
1690c16b537SWarner Losh     FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];   /* memset() is not necessary, even if static analyzer complain about it */
1700c16b537SWarner Losh     return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
1710c16b537SWarner Losh }
1720c16b537SWarner Losh 
1730c16b537SWarner Losh 
1740c16b537SWarner Losh 
1750c16b537SWarner Losh #ifndef FSE_COMMONDEFS_ONLY
1760c16b537SWarner Losh 
1770c16b537SWarner Losh /*-**************************************************************
1780c16b537SWarner Losh *  FSE NCount encoding-decoding
1790c16b537SWarner Losh ****************************************************************/
1800c16b537SWarner Losh size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
1810c16b537SWarner Losh {
1820c16b537SWarner Losh     size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
1830c16b537SWarner Losh     return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
1840c16b537SWarner Losh }
1850c16b537SWarner Losh 
1860c16b537SWarner Losh static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
1870c16b537SWarner Losh                                        const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
1880c16b537SWarner Losh                                        unsigned writeIsSafe)
1890c16b537SWarner Losh {
1900c16b537SWarner Losh     BYTE* const ostart = (BYTE*) header;
1910c16b537SWarner Losh     BYTE* out = ostart;
1920c16b537SWarner Losh     BYTE* const oend = ostart + headerBufferSize;
1930c16b537SWarner Losh     int nbBits;
1940c16b537SWarner Losh     const int tableSize = 1 << tableLog;
1950c16b537SWarner Losh     int remaining;
1960c16b537SWarner Losh     int threshold;
1970c16b537SWarner Losh     U32 bitStream;
1980c16b537SWarner Losh     int bitCount;
1990c16b537SWarner Losh     unsigned charnum = 0;
2000c16b537SWarner Losh     int previous0 = 0;
2010c16b537SWarner Losh 
2020c16b537SWarner Losh     bitStream = 0;
2030c16b537SWarner Losh     bitCount  = 0;
2040c16b537SWarner Losh     /* Table Size */
2050c16b537SWarner Losh     bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
2060c16b537SWarner Losh     bitCount  += 4;
2070c16b537SWarner Losh 
2080c16b537SWarner Losh     /* Init */
2090c16b537SWarner Losh     remaining = tableSize+1;   /* +1 for extra accuracy */
2100c16b537SWarner Losh     threshold = tableSize;
2110c16b537SWarner Losh     nbBits = tableLog+1;
2120c16b537SWarner Losh 
2130c16b537SWarner Losh     while (remaining>1) {  /* stops at 1 */
2140c16b537SWarner Losh         if (previous0) {
2150c16b537SWarner Losh             unsigned start = charnum;
2160c16b537SWarner Losh             while (!normalizedCounter[charnum]) charnum++;
2170c16b537SWarner Losh             while (charnum >= start+24) {
2180c16b537SWarner Losh                 start+=24;
2190c16b537SWarner Losh                 bitStream += 0xFFFFU << bitCount;
2200c16b537SWarner Losh                 if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
2210c16b537SWarner Losh                 out[0] = (BYTE) bitStream;
2220c16b537SWarner Losh                 out[1] = (BYTE)(bitStream>>8);
2230c16b537SWarner Losh                 out+=2;
2240c16b537SWarner Losh                 bitStream>>=16;
2250c16b537SWarner Losh             }
2260c16b537SWarner Losh             while (charnum >= start+3) {
2270c16b537SWarner Losh                 start+=3;
2280c16b537SWarner Losh                 bitStream += 3 << bitCount;
2290c16b537SWarner Losh                 bitCount += 2;
2300c16b537SWarner Losh             }
2310c16b537SWarner Losh             bitStream += (charnum-start) << bitCount;
2320c16b537SWarner Losh             bitCount += 2;
2330c16b537SWarner Losh             if (bitCount>16) {
2340c16b537SWarner Losh                 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
2350c16b537SWarner Losh                 out[0] = (BYTE)bitStream;
2360c16b537SWarner Losh                 out[1] = (BYTE)(bitStream>>8);
2370c16b537SWarner Losh                 out += 2;
2380c16b537SWarner Losh                 bitStream >>= 16;
2390c16b537SWarner Losh                 bitCount -= 16;
2400c16b537SWarner Losh         }   }
2410c16b537SWarner Losh         {   int count = normalizedCounter[charnum++];
2420c16b537SWarner Losh             int const max = (2*threshold-1)-remaining;
2430c16b537SWarner Losh             remaining -= count < 0 ? -count : count;
2440c16b537SWarner Losh             count++;   /* +1 for extra accuracy */
2450c16b537SWarner Losh             if (count>=threshold) count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
2460c16b537SWarner Losh             bitStream += count << bitCount;
2470c16b537SWarner Losh             bitCount  += nbBits;
2480c16b537SWarner Losh             bitCount  -= (count<max);
2490c16b537SWarner Losh             previous0  = (count==1);
2500c16b537SWarner Losh             if (remaining<1) return ERROR(GENERIC);
251*19fcbaf1SConrad Meyer             while (remaining<threshold) { nbBits--; threshold>>=1; }
2520c16b537SWarner Losh         }
2530c16b537SWarner Losh         if (bitCount>16) {
2540c16b537SWarner Losh             if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
2550c16b537SWarner Losh             out[0] = (BYTE)bitStream;
2560c16b537SWarner Losh             out[1] = (BYTE)(bitStream>>8);
2570c16b537SWarner Losh             out += 2;
2580c16b537SWarner Losh             bitStream >>= 16;
2590c16b537SWarner Losh             bitCount -= 16;
2600c16b537SWarner Losh     }   }
2610c16b537SWarner Losh 
2620c16b537SWarner Losh     /* flush remaining bitStream */
2630c16b537SWarner Losh     if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
2640c16b537SWarner Losh     out[0] = (BYTE)bitStream;
2650c16b537SWarner Losh     out[1] = (BYTE)(bitStream>>8);
2660c16b537SWarner Losh     out+= (bitCount+7) /8;
2670c16b537SWarner Losh 
2680c16b537SWarner Losh     if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
2690c16b537SWarner Losh 
2700c16b537SWarner Losh     return (out-ostart);
2710c16b537SWarner Losh }
2720c16b537SWarner Losh 
2730c16b537SWarner Losh 
2740c16b537SWarner Losh size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
2750c16b537SWarner Losh {
2760c16b537SWarner Losh     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
2770c16b537SWarner Losh     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
2780c16b537SWarner Losh 
2790c16b537SWarner Losh     if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
2800c16b537SWarner Losh         return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
2810c16b537SWarner Losh 
2820c16b537SWarner Losh     return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
2830c16b537SWarner Losh }
2840c16b537SWarner Losh 
2850c16b537SWarner Losh 
2860c16b537SWarner Losh 
2870c16b537SWarner Losh /*-**************************************************************
2880c16b537SWarner Losh *  Counting histogram
2890c16b537SWarner Losh ****************************************************************/
2900c16b537SWarner Losh /*! FSE_count_simple
2910c16b537SWarner Losh     This function counts byte values within `src`, and store the histogram into table `count`.
2920c16b537SWarner Losh     It doesn't use any additional memory.
2930c16b537SWarner Losh     But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
2940c16b537SWarner Losh     For this reason, prefer using a table `count` with 256 elements.
295*19fcbaf1SConrad Meyer     @return : count of most numerous element.
2960c16b537SWarner Losh */
2970c16b537SWarner Losh size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
2980c16b537SWarner Losh                         const void* src, size_t srcSize)
2990c16b537SWarner Losh {
3000c16b537SWarner Losh     const BYTE* ip = (const BYTE*)src;
3010c16b537SWarner Losh     const BYTE* const end = ip + srcSize;
3020c16b537SWarner Losh     unsigned maxSymbolValue = *maxSymbolValuePtr;
3030c16b537SWarner Losh     unsigned max=0;
3040c16b537SWarner Losh 
3050c16b537SWarner Losh     memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
3060c16b537SWarner Losh     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
3070c16b537SWarner Losh 
308*19fcbaf1SConrad Meyer     while (ip<end) {
309*19fcbaf1SConrad Meyer         assert(*ip <= maxSymbolValue);
310*19fcbaf1SConrad Meyer         count[*ip++]++;
311*19fcbaf1SConrad Meyer     }
3120c16b537SWarner Losh 
3130c16b537SWarner Losh     while (!count[maxSymbolValue]) maxSymbolValue--;
3140c16b537SWarner Losh     *maxSymbolValuePtr = maxSymbolValue;
3150c16b537SWarner Losh 
3160c16b537SWarner Losh     { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
3170c16b537SWarner Losh 
3180c16b537SWarner Losh     return (size_t)max;
3190c16b537SWarner Losh }
3200c16b537SWarner Losh 
3210c16b537SWarner Losh 
3220c16b537SWarner Losh /* FSE_count_parallel_wksp() :
3230c16b537SWarner Losh  * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
324*19fcbaf1SConrad Meyer  * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
325*19fcbaf1SConrad Meyer  * @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
3260c16b537SWarner Losh static size_t FSE_count_parallel_wksp(
3270c16b537SWarner Losh                                 unsigned* count, unsigned* maxSymbolValuePtr,
3280c16b537SWarner Losh                                 const void* source, size_t sourceSize,
3290c16b537SWarner Losh                                 unsigned checkMax, unsigned* const workSpace)
3300c16b537SWarner Losh {
3310c16b537SWarner Losh     const BYTE* ip = (const BYTE*)source;
3320c16b537SWarner Losh     const BYTE* const iend = ip+sourceSize;
3330c16b537SWarner Losh     unsigned maxSymbolValue = *maxSymbolValuePtr;
3340c16b537SWarner Losh     unsigned max=0;
3350c16b537SWarner Losh     U32* const Counting1 = workSpace;
3360c16b537SWarner Losh     U32* const Counting2 = Counting1 + 256;
3370c16b537SWarner Losh     U32* const Counting3 = Counting2 + 256;
3380c16b537SWarner Losh     U32* const Counting4 = Counting3 + 256;
3390c16b537SWarner Losh 
340*19fcbaf1SConrad Meyer     memset(workSpace, 0, 4*256*sizeof(unsigned));
3410c16b537SWarner Losh 
3420c16b537SWarner Losh     /* safety checks */
3430c16b537SWarner Losh     if (!sourceSize) {
3440c16b537SWarner Losh         memset(count, 0, maxSymbolValue + 1);
3450c16b537SWarner Losh         *maxSymbolValuePtr = 0;
3460c16b537SWarner Losh         return 0;
3470c16b537SWarner Losh     }
3480c16b537SWarner Losh     if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
3490c16b537SWarner Losh 
3500c16b537SWarner Losh     /* by stripes of 16 bytes */
3510c16b537SWarner Losh     {   U32 cached = MEM_read32(ip); ip += 4;
3520c16b537SWarner Losh         while (ip < iend-15) {
3530c16b537SWarner Losh             U32 c = cached; cached = MEM_read32(ip); ip += 4;
3540c16b537SWarner Losh             Counting1[(BYTE) c     ]++;
3550c16b537SWarner Losh             Counting2[(BYTE)(c>>8) ]++;
3560c16b537SWarner Losh             Counting3[(BYTE)(c>>16)]++;
3570c16b537SWarner Losh             Counting4[       c>>24 ]++;
3580c16b537SWarner Losh             c = cached; cached = MEM_read32(ip); ip += 4;
3590c16b537SWarner Losh             Counting1[(BYTE) c     ]++;
3600c16b537SWarner Losh             Counting2[(BYTE)(c>>8) ]++;
3610c16b537SWarner Losh             Counting3[(BYTE)(c>>16)]++;
3620c16b537SWarner Losh             Counting4[       c>>24 ]++;
3630c16b537SWarner Losh             c = cached; cached = MEM_read32(ip); ip += 4;
3640c16b537SWarner Losh             Counting1[(BYTE) c     ]++;
3650c16b537SWarner Losh             Counting2[(BYTE)(c>>8) ]++;
3660c16b537SWarner Losh             Counting3[(BYTE)(c>>16)]++;
3670c16b537SWarner Losh             Counting4[       c>>24 ]++;
3680c16b537SWarner Losh             c = cached; cached = MEM_read32(ip); ip += 4;
3690c16b537SWarner Losh             Counting1[(BYTE) c     ]++;
3700c16b537SWarner Losh             Counting2[(BYTE)(c>>8) ]++;
3710c16b537SWarner Losh             Counting3[(BYTE)(c>>16)]++;
3720c16b537SWarner Losh             Counting4[       c>>24 ]++;
3730c16b537SWarner Losh         }
3740c16b537SWarner Losh         ip-=4;
3750c16b537SWarner Losh     }
3760c16b537SWarner Losh 
3770c16b537SWarner Losh     /* finish last symbols */
3780c16b537SWarner Losh     while (ip<iend) Counting1[*ip++]++;
3790c16b537SWarner Losh 
3800c16b537SWarner Losh     if (checkMax) {   /* verify stats will fit into destination table */
3810c16b537SWarner Losh         U32 s; for (s=255; s>maxSymbolValue; s--) {
3820c16b537SWarner Losh             Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
3830c16b537SWarner Losh             if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
3840c16b537SWarner Losh     }   }
3850c16b537SWarner Losh 
386*19fcbaf1SConrad Meyer     {   U32 s;
387*19fcbaf1SConrad Meyer         if (maxSymbolValue > 255) maxSymbolValue = 255;
388*19fcbaf1SConrad Meyer         for (s=0; s<=maxSymbolValue; s++) {
3890c16b537SWarner Losh             count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
3900c16b537SWarner Losh             if (count[s] > max) max = count[s];
3910c16b537SWarner Losh     }   }
3920c16b537SWarner Losh 
3930c16b537SWarner Losh     while (!count[maxSymbolValue]) maxSymbolValue--;
3940c16b537SWarner Losh     *maxSymbolValuePtr = maxSymbolValue;
3950c16b537SWarner Losh     return (size_t)max;
3960c16b537SWarner Losh }
3970c16b537SWarner Losh 
3980c16b537SWarner Losh /* FSE_countFast_wksp() :
3990c16b537SWarner Losh  * Same as FSE_countFast(), but using an externally provided scratch buffer.
4000c16b537SWarner Losh  * `workSpace` size must be table of >= `1024` unsigned */
4010c16b537SWarner Losh size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
402*19fcbaf1SConrad Meyer                           const void* source, size_t sourceSize,
403*19fcbaf1SConrad Meyer                           unsigned* workSpace)
4040c16b537SWarner Losh {
405*19fcbaf1SConrad Meyer     if (sourceSize < 1500) /* heuristic threshold */
406*19fcbaf1SConrad Meyer         return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
4070c16b537SWarner Losh     return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
4080c16b537SWarner Losh }
4090c16b537SWarner Losh 
4100c16b537SWarner Losh /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
4110c16b537SWarner Losh size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
4120c16b537SWarner Losh                      const void* source, size_t sourceSize)
4130c16b537SWarner Losh {
4140c16b537SWarner Losh     unsigned tmpCounters[1024];
4150c16b537SWarner Losh     return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
4160c16b537SWarner Losh }
4170c16b537SWarner Losh 
4180c16b537SWarner Losh /* FSE_count_wksp() :
4190c16b537SWarner Losh  * Same as FSE_count(), but using an externally provided scratch buffer.
4200c16b537SWarner Losh  * `workSpace` size must be table of >= `1024` unsigned */
4210c16b537SWarner Losh size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
4220c16b537SWarner Losh                  const void* source, size_t sourceSize, unsigned* workSpace)
4230c16b537SWarner Losh {
4240c16b537SWarner Losh     if (*maxSymbolValuePtr < 255)
4250c16b537SWarner Losh         return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
4260c16b537SWarner Losh     *maxSymbolValuePtr = 255;
4270c16b537SWarner Losh     return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
4280c16b537SWarner Losh }
4290c16b537SWarner Losh 
4300c16b537SWarner Losh size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
4310c16b537SWarner Losh                  const void* src, size_t srcSize)
4320c16b537SWarner Losh {
4330c16b537SWarner Losh     unsigned tmpCounters[1024];
4340c16b537SWarner Losh     return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
4350c16b537SWarner Losh }
4360c16b537SWarner Losh 
4370c16b537SWarner Losh 
4380c16b537SWarner Losh 
4390c16b537SWarner Losh /*-**************************************************************
4400c16b537SWarner Losh *  FSE Compression Code
4410c16b537SWarner Losh ****************************************************************/
4420c16b537SWarner Losh /*! FSE_sizeof_CTable() :
4430c16b537SWarner Losh     FSE_CTable is a variable size structure which contains :
4440c16b537SWarner Losh     `U16 tableLog;`
4450c16b537SWarner Losh     `U16 maxSymbolValue;`
4460c16b537SWarner Losh     `U16 nextStateNumber[1 << tableLog];`                         // This size is variable
4470c16b537SWarner Losh     `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
4480c16b537SWarner Losh Allocation is manual (C standard does not support variable-size structures).
4490c16b537SWarner Losh */
4500c16b537SWarner Losh size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
4510c16b537SWarner Losh {
4520c16b537SWarner Losh     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
4530c16b537SWarner Losh     return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
4540c16b537SWarner Losh }
4550c16b537SWarner Losh 
4560c16b537SWarner Losh FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
4570c16b537SWarner Losh {
4580c16b537SWarner Losh     size_t size;
4590c16b537SWarner Losh     if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
4600c16b537SWarner Losh     size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
4610c16b537SWarner Losh     return (FSE_CTable*)malloc(size);
4620c16b537SWarner Losh }
4630c16b537SWarner Losh 
4640c16b537SWarner Losh void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
4650c16b537SWarner Losh 
4660c16b537SWarner Losh /* provides the minimum logSize to safely represent a distribution */
4670c16b537SWarner Losh static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
4680c16b537SWarner Losh {
4690c16b537SWarner Losh     U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
4700c16b537SWarner Losh     U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
4710c16b537SWarner Losh     U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
4720c16b537SWarner Losh     assert(srcSize > 1); /* Not supported, RLE should be used instead */
4730c16b537SWarner Losh     return minBits;
4740c16b537SWarner Losh }
4750c16b537SWarner Losh 
4760c16b537SWarner Losh unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
4770c16b537SWarner Losh {
4780c16b537SWarner Losh     U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
4790c16b537SWarner Losh     U32 tableLog = maxTableLog;
4800c16b537SWarner Losh     U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
4810c16b537SWarner Losh     assert(srcSize > 1); /* Not supported, RLE should be used instead */
4820c16b537SWarner Losh     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
4830c16b537SWarner Losh     if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
4840c16b537SWarner Losh     if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
4850c16b537SWarner Losh     if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
4860c16b537SWarner Losh     if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
4870c16b537SWarner Losh     return tableLog;
4880c16b537SWarner Losh }
4890c16b537SWarner Losh 
4900c16b537SWarner Losh unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
4910c16b537SWarner Losh {
4920c16b537SWarner Losh     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
4930c16b537SWarner Losh }
4940c16b537SWarner Losh 
4950c16b537SWarner Losh 
4960c16b537SWarner Losh /* Secondary normalization method.
4970c16b537SWarner Losh    To be used when primary method fails. */
4980c16b537SWarner Losh 
4990c16b537SWarner Losh static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
5000c16b537SWarner Losh {
5010c16b537SWarner Losh     short const NOT_YET_ASSIGNED = -2;
5020c16b537SWarner Losh     U32 s;
5030c16b537SWarner Losh     U32 distributed = 0;
5040c16b537SWarner Losh     U32 ToDistribute;
5050c16b537SWarner Losh 
5060c16b537SWarner Losh     /* Init */
5070c16b537SWarner Losh     U32 const lowThreshold = (U32)(total >> tableLog);
5080c16b537SWarner Losh     U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
5090c16b537SWarner Losh 
5100c16b537SWarner Losh     for (s=0; s<=maxSymbolValue; s++) {
5110c16b537SWarner Losh         if (count[s] == 0) {
5120c16b537SWarner Losh             norm[s]=0;
5130c16b537SWarner Losh             continue;
5140c16b537SWarner Losh         }
5150c16b537SWarner Losh         if (count[s] <= lowThreshold) {
5160c16b537SWarner Losh             norm[s] = -1;
5170c16b537SWarner Losh             distributed++;
5180c16b537SWarner Losh             total -= count[s];
5190c16b537SWarner Losh             continue;
5200c16b537SWarner Losh         }
5210c16b537SWarner Losh         if (count[s] <= lowOne) {
5220c16b537SWarner Losh             norm[s] = 1;
5230c16b537SWarner Losh             distributed++;
5240c16b537SWarner Losh             total -= count[s];
5250c16b537SWarner Losh             continue;
5260c16b537SWarner Losh         }
5270c16b537SWarner Losh 
5280c16b537SWarner Losh         norm[s]=NOT_YET_ASSIGNED;
5290c16b537SWarner Losh     }
5300c16b537SWarner Losh     ToDistribute = (1 << tableLog) - distributed;
5310c16b537SWarner Losh 
5320c16b537SWarner Losh     if ((total / ToDistribute) > lowOne) {
5330c16b537SWarner Losh         /* risk of rounding to zero */
5340c16b537SWarner Losh         lowOne = (U32)((total * 3) / (ToDistribute * 2));
5350c16b537SWarner Losh         for (s=0; s<=maxSymbolValue; s++) {
5360c16b537SWarner Losh             if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
5370c16b537SWarner Losh                 norm[s] = 1;
5380c16b537SWarner Losh                 distributed++;
5390c16b537SWarner Losh                 total -= count[s];
5400c16b537SWarner Losh                 continue;
5410c16b537SWarner Losh         }   }
5420c16b537SWarner Losh         ToDistribute = (1 << tableLog) - distributed;
5430c16b537SWarner Losh     }
5440c16b537SWarner Losh 
5450c16b537SWarner Losh     if (distributed == maxSymbolValue+1) {
5460c16b537SWarner Losh         /* all values are pretty poor;
5470c16b537SWarner Losh            probably incompressible data (should have already been detected);
5480c16b537SWarner Losh            find max, then give all remaining points to max */
5490c16b537SWarner Losh         U32 maxV = 0, maxC = 0;
5500c16b537SWarner Losh         for (s=0; s<=maxSymbolValue; s++)
551*19fcbaf1SConrad Meyer             if (count[s] > maxC) { maxV=s; maxC=count[s]; }
5520c16b537SWarner Losh         norm[maxV] += (short)ToDistribute;
5530c16b537SWarner Losh         return 0;
5540c16b537SWarner Losh     }
5550c16b537SWarner Losh 
5560c16b537SWarner Losh     if (total == 0) {
5570c16b537SWarner Losh         /* all of the symbols were low enough for the lowOne or lowThreshold */
5580c16b537SWarner Losh         for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
559*19fcbaf1SConrad Meyer             if (norm[s] > 0) { ToDistribute--; norm[s]++; }
5600c16b537SWarner Losh         return 0;
5610c16b537SWarner Losh     }
5620c16b537SWarner Losh 
5630c16b537SWarner Losh     {   U64 const vStepLog = 62 - tableLog;
5640c16b537SWarner Losh         U64 const mid = (1ULL << (vStepLog-1)) - 1;
5650c16b537SWarner Losh         U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total;   /* scale on remaining */
5660c16b537SWarner Losh         U64 tmpTotal = mid;
5670c16b537SWarner Losh         for (s=0; s<=maxSymbolValue; s++) {
5680c16b537SWarner Losh             if (norm[s]==NOT_YET_ASSIGNED) {
5690c16b537SWarner Losh                 U64 const end = tmpTotal + (count[s] * rStep);
5700c16b537SWarner Losh                 U32 const sStart = (U32)(tmpTotal >> vStepLog);
5710c16b537SWarner Losh                 U32 const sEnd = (U32)(end >> vStepLog);
5720c16b537SWarner Losh                 U32 const weight = sEnd - sStart;
5730c16b537SWarner Losh                 if (weight < 1)
5740c16b537SWarner Losh                     return ERROR(GENERIC);
5750c16b537SWarner Losh                 norm[s] = (short)weight;
5760c16b537SWarner Losh                 tmpTotal = end;
5770c16b537SWarner Losh     }   }   }
5780c16b537SWarner Losh 
5790c16b537SWarner Losh     return 0;
5800c16b537SWarner Losh }
5810c16b537SWarner Losh 
5820c16b537SWarner Losh 
5830c16b537SWarner Losh size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
5840c16b537SWarner Losh                            const unsigned* count, size_t total,
5850c16b537SWarner Losh                            unsigned maxSymbolValue)
5860c16b537SWarner Losh {
5870c16b537SWarner Losh     /* Sanity checks */
5880c16b537SWarner Losh     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
5890c16b537SWarner Losh     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
5900c16b537SWarner Losh     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
5910c16b537SWarner Losh     if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
5920c16b537SWarner Losh 
5930c16b537SWarner Losh     {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
5940c16b537SWarner Losh         U64 const scale = 62 - tableLog;
5950c16b537SWarner Losh         U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
5960c16b537SWarner Losh         U64 const vStep = 1ULL<<(scale-20);
5970c16b537SWarner Losh         int stillToDistribute = 1<<tableLog;
5980c16b537SWarner Losh         unsigned s;
5990c16b537SWarner Losh         unsigned largest=0;
6000c16b537SWarner Losh         short largestP=0;
6010c16b537SWarner Losh         U32 lowThreshold = (U32)(total >> tableLog);
6020c16b537SWarner Losh 
6030c16b537SWarner Losh         for (s=0; s<=maxSymbolValue; s++) {
6040c16b537SWarner Losh             if (count[s] == total) return 0;   /* rle special case */
6050c16b537SWarner Losh             if (count[s] == 0) { normalizedCounter[s]=0; continue; }
6060c16b537SWarner Losh             if (count[s] <= lowThreshold) {
6070c16b537SWarner Losh                 normalizedCounter[s] = -1;
6080c16b537SWarner Losh                 stillToDistribute--;
6090c16b537SWarner Losh             } else {
6100c16b537SWarner Losh                 short proba = (short)((count[s]*step) >> scale);
6110c16b537SWarner Losh                 if (proba<8) {
6120c16b537SWarner Losh                     U64 restToBeat = vStep * rtbTable[proba];
6130c16b537SWarner Losh                     proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
6140c16b537SWarner Losh                 }
615*19fcbaf1SConrad Meyer                 if (proba > largestP) { largestP=proba; largest=s; }
6160c16b537SWarner Losh                 normalizedCounter[s] = proba;
6170c16b537SWarner Losh                 stillToDistribute -= proba;
6180c16b537SWarner Losh         }   }
6190c16b537SWarner Losh         if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
6200c16b537SWarner Losh             /* corner case, need another normalization method */
6210c16b537SWarner Losh             size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
6220c16b537SWarner Losh             if (FSE_isError(errorCode)) return errorCode;
6230c16b537SWarner Losh         }
6240c16b537SWarner Losh         else normalizedCounter[largest] += (short)stillToDistribute;
6250c16b537SWarner Losh     }
6260c16b537SWarner Losh 
6270c16b537SWarner Losh #if 0
6280c16b537SWarner Losh     {   /* Print Table (debug) */
6290c16b537SWarner Losh         U32 s;
6300c16b537SWarner Losh         U32 nTotal = 0;
6310c16b537SWarner Losh         for (s=0; s<=maxSymbolValue; s++)
6320c16b537SWarner Losh             printf("%3i: %4i \n", s, normalizedCounter[s]);
6330c16b537SWarner Losh         for (s=0; s<=maxSymbolValue; s++)
6340c16b537SWarner Losh             nTotal += abs(normalizedCounter[s]);
6350c16b537SWarner Losh         if (nTotal != (1U<<tableLog))
6360c16b537SWarner Losh             printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
6370c16b537SWarner Losh         getchar();
6380c16b537SWarner Losh     }
6390c16b537SWarner Losh #endif
6400c16b537SWarner Losh 
6410c16b537SWarner Losh     return tableLog;
6420c16b537SWarner Losh }
6430c16b537SWarner Losh 
6440c16b537SWarner Losh 
6450c16b537SWarner Losh /* fake FSE_CTable, for raw (uncompressed) input */
6460c16b537SWarner Losh size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
6470c16b537SWarner Losh {
6480c16b537SWarner Losh     const unsigned tableSize = 1 << nbBits;
6490c16b537SWarner Losh     const unsigned tableMask = tableSize - 1;
6500c16b537SWarner Losh     const unsigned maxSymbolValue = tableMask;
6510c16b537SWarner Losh     void* const ptr = ct;
6520c16b537SWarner Losh     U16* const tableU16 = ( (U16*) ptr) + 2;
6530c16b537SWarner Losh     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
6540c16b537SWarner Losh     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
6550c16b537SWarner Losh     unsigned s;
6560c16b537SWarner Losh 
6570c16b537SWarner Losh     /* Sanity checks */
6580c16b537SWarner Losh     if (nbBits < 1) return ERROR(GENERIC);             /* min size */
6590c16b537SWarner Losh 
6600c16b537SWarner Losh     /* header */
6610c16b537SWarner Losh     tableU16[-2] = (U16) nbBits;
6620c16b537SWarner Losh     tableU16[-1] = (U16) maxSymbolValue;
6630c16b537SWarner Losh 
6640c16b537SWarner Losh     /* Build table */
6650c16b537SWarner Losh     for (s=0; s<tableSize; s++)
6660c16b537SWarner Losh         tableU16[s] = (U16)(tableSize + s);
6670c16b537SWarner Losh 
6680c16b537SWarner Losh     /* Build Symbol Transformation Table */
6690c16b537SWarner Losh     {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
6700c16b537SWarner Losh         for (s=0; s<=maxSymbolValue; s++) {
6710c16b537SWarner Losh             symbolTT[s].deltaNbBits = deltaNbBits;
6720c16b537SWarner Losh             symbolTT[s].deltaFindState = s-1;
6730c16b537SWarner Losh     }   }
6740c16b537SWarner Losh 
6750c16b537SWarner Losh     return 0;
6760c16b537SWarner Losh }
6770c16b537SWarner Losh 
6780c16b537SWarner Losh /* fake FSE_CTable, for rle input (always same symbol) */
6790c16b537SWarner Losh size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
6800c16b537SWarner Losh {
6810c16b537SWarner Losh     void* ptr = ct;
6820c16b537SWarner Losh     U16* tableU16 = ( (U16*) ptr) + 2;
6830c16b537SWarner Losh     void* FSCTptr = (U32*)ptr + 2;
6840c16b537SWarner Losh     FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
6850c16b537SWarner Losh 
6860c16b537SWarner Losh     /* header */
6870c16b537SWarner Losh     tableU16[-2] = (U16) 0;
6880c16b537SWarner Losh     tableU16[-1] = (U16) symbolValue;
6890c16b537SWarner Losh 
6900c16b537SWarner Losh     /* Build table */
6910c16b537SWarner Losh     tableU16[0] = 0;
6920c16b537SWarner Losh     tableU16[1] = 0;   /* just in case */
6930c16b537SWarner Losh 
6940c16b537SWarner Losh     /* Build Symbol Transformation Table */
6950c16b537SWarner Losh     symbolTT[symbolValue].deltaNbBits = 0;
6960c16b537SWarner Losh     symbolTT[symbolValue].deltaFindState = 0;
6970c16b537SWarner Losh 
6980c16b537SWarner Losh     return 0;
6990c16b537SWarner Losh }
7000c16b537SWarner Losh 
7010c16b537SWarner Losh 
7020c16b537SWarner Losh static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
7030c16b537SWarner Losh                            const void* src, size_t srcSize,
7040c16b537SWarner Losh                            const FSE_CTable* ct, const unsigned fast)
7050c16b537SWarner Losh {
7060c16b537SWarner Losh     const BYTE* const istart = (const BYTE*) src;
7070c16b537SWarner Losh     const BYTE* const iend = istart + srcSize;
7080c16b537SWarner Losh     const BYTE* ip=iend;
7090c16b537SWarner Losh 
7100c16b537SWarner Losh     BIT_CStream_t bitC;
7110c16b537SWarner Losh     FSE_CState_t CState1, CState2;
7120c16b537SWarner Losh 
7130c16b537SWarner Losh     /* init */
7140c16b537SWarner Losh     if (srcSize <= 2) return 0;
7150c16b537SWarner Losh     { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
7160c16b537SWarner Losh       if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
7170c16b537SWarner Losh 
7180c16b537SWarner Losh #define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
7190c16b537SWarner Losh 
7200c16b537SWarner Losh     if (srcSize & 1) {
7210c16b537SWarner Losh         FSE_initCState2(&CState1, ct, *--ip);
7220c16b537SWarner Losh         FSE_initCState2(&CState2, ct, *--ip);
7230c16b537SWarner Losh         FSE_encodeSymbol(&bitC, &CState1, *--ip);
7240c16b537SWarner Losh         FSE_FLUSHBITS(&bitC);
7250c16b537SWarner Losh     } else {
7260c16b537SWarner Losh         FSE_initCState2(&CState2, ct, *--ip);
7270c16b537SWarner Losh         FSE_initCState2(&CState1, ct, *--ip);
7280c16b537SWarner Losh     }
7290c16b537SWarner Losh 
7300c16b537SWarner Losh     /* join to mod 4 */
7310c16b537SWarner Losh     srcSize -= 2;
7320c16b537SWarner Losh     if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
7330c16b537SWarner Losh         FSE_encodeSymbol(&bitC, &CState2, *--ip);
7340c16b537SWarner Losh         FSE_encodeSymbol(&bitC, &CState1, *--ip);
7350c16b537SWarner Losh         FSE_FLUSHBITS(&bitC);
7360c16b537SWarner Losh     }
7370c16b537SWarner Losh 
7380c16b537SWarner Losh     /* 2 or 4 encoding per loop */
7390c16b537SWarner Losh     while ( ip>istart ) {
7400c16b537SWarner Losh 
7410c16b537SWarner Losh         FSE_encodeSymbol(&bitC, &CState2, *--ip);
7420c16b537SWarner Losh 
7430c16b537SWarner Losh         if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
7440c16b537SWarner Losh             FSE_FLUSHBITS(&bitC);
7450c16b537SWarner Losh 
7460c16b537SWarner Losh         FSE_encodeSymbol(&bitC, &CState1, *--ip);
7470c16b537SWarner Losh 
7480c16b537SWarner Losh         if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
7490c16b537SWarner Losh             FSE_encodeSymbol(&bitC, &CState2, *--ip);
7500c16b537SWarner Losh             FSE_encodeSymbol(&bitC, &CState1, *--ip);
7510c16b537SWarner Losh         }
7520c16b537SWarner Losh 
7530c16b537SWarner Losh         FSE_FLUSHBITS(&bitC);
7540c16b537SWarner Losh     }
7550c16b537SWarner Losh 
7560c16b537SWarner Losh     FSE_flushCState(&bitC, &CState2);
7570c16b537SWarner Losh     FSE_flushCState(&bitC, &CState1);
7580c16b537SWarner Losh     return BIT_closeCStream(&bitC);
7590c16b537SWarner Losh }
7600c16b537SWarner Losh 
7610c16b537SWarner Losh size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
7620c16b537SWarner Losh                            const void* src, size_t srcSize,
7630c16b537SWarner Losh                            const FSE_CTable* ct)
7640c16b537SWarner Losh {
7650c16b537SWarner Losh     unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
7660c16b537SWarner Losh 
7670c16b537SWarner Losh     if (fast)
7680c16b537SWarner Losh         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
7690c16b537SWarner Losh     else
7700c16b537SWarner Losh         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
7710c16b537SWarner Losh }
7720c16b537SWarner Losh 
7730c16b537SWarner Losh 
7740c16b537SWarner Losh size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
7750c16b537SWarner Losh 
7760c16b537SWarner Losh #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
7770c16b537SWarner Losh #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
7780c16b537SWarner Losh 
7790c16b537SWarner Losh /* FSE_compress_wksp() :
7800c16b537SWarner Losh  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
7810c16b537SWarner Losh  * `wkspSize` size must be `(1<<tableLog)`.
7820c16b537SWarner Losh  */
7830c16b537SWarner Losh size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
7840c16b537SWarner Losh {
7850c16b537SWarner Losh     BYTE* const ostart = (BYTE*) dst;
7860c16b537SWarner Losh     BYTE* op = ostart;
7870c16b537SWarner Losh     BYTE* const oend = ostart + dstSize;
7880c16b537SWarner Losh 
7890c16b537SWarner Losh     U32   count[FSE_MAX_SYMBOL_VALUE+1];
7900c16b537SWarner Losh     S16   norm[FSE_MAX_SYMBOL_VALUE+1];
7910c16b537SWarner Losh     FSE_CTable* CTable = (FSE_CTable*)workSpace;
7920c16b537SWarner Losh     size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
7930c16b537SWarner Losh     void* scratchBuffer = (void*)(CTable + CTableSize);
7940c16b537SWarner Losh     size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
7950c16b537SWarner Losh 
7960c16b537SWarner Losh     /* init conditions */
7970c16b537SWarner Losh     if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
7980c16b537SWarner Losh     if (srcSize <= 1) return 0;  /* Not compressible */
7990c16b537SWarner Losh     if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
8000c16b537SWarner Losh     if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
8010c16b537SWarner Losh 
8020c16b537SWarner Losh     /* Scan input and build symbol stats */
8030c16b537SWarner Losh     {   CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
8040c16b537SWarner Losh         if (maxCount == srcSize) return 1;   /* only a single symbol in src : rle */
8050c16b537SWarner Losh         if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
8060c16b537SWarner Losh         if (maxCount < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
8070c16b537SWarner Losh     }
8080c16b537SWarner Losh 
8090c16b537SWarner Losh     tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
8100c16b537SWarner Losh     CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
8110c16b537SWarner Losh 
8120c16b537SWarner Losh     /* Write table description header */
8130c16b537SWarner Losh     {   CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
8140c16b537SWarner Losh         op += nc_err;
8150c16b537SWarner Losh     }
8160c16b537SWarner Losh 
8170c16b537SWarner Losh     /* Compress */
8180c16b537SWarner Losh     CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
8190c16b537SWarner Losh     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
8200c16b537SWarner Losh         if (cSize == 0) return 0;   /* not enough space for compressed data */
8210c16b537SWarner Losh         op += cSize;
8220c16b537SWarner Losh     }
8230c16b537SWarner Losh 
8240c16b537SWarner Losh     /* check compressibility */
8250c16b537SWarner Losh     if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
8260c16b537SWarner Losh 
8270c16b537SWarner Losh     return op-ostart;
8280c16b537SWarner Losh }
8290c16b537SWarner Losh 
8300c16b537SWarner Losh typedef struct {
8310c16b537SWarner Losh     FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
8320c16b537SWarner Losh     BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
8330c16b537SWarner Losh } fseWkspMax_t;
8340c16b537SWarner Losh 
8350c16b537SWarner Losh size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
8360c16b537SWarner Losh {
8370c16b537SWarner Losh     fseWkspMax_t scratchBuffer;
8380c16b537SWarner Losh     FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
8390c16b537SWarner Losh     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
8400c16b537SWarner Losh     return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
8410c16b537SWarner Losh }
8420c16b537SWarner Losh 
8430c16b537SWarner Losh size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
8440c16b537SWarner Losh {
8450c16b537SWarner Losh     return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
8460c16b537SWarner Losh }
8470c16b537SWarner Losh 
8480c16b537SWarner Losh 
8490c16b537SWarner Losh #endif   /* FSE_COMMONDEFS_ONLY */
850