10c16b537SWarner Losh /* ******************************************************************
237f1f268SConrad Meyer * FSE : Finite State Entropy decoder
3*5ff13fbcSAllan Jude * Copyright (c) Yann Collet, Facebook, Inc.
437f1f268SConrad Meyer *
537f1f268SConrad Meyer * You can contact the author at :
637f1f268SConrad Meyer * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
737f1f268SConrad Meyer * - Public forum : https://groups.google.com/forum/#!forum/lz4c
837f1f268SConrad Meyer *
937f1f268SConrad Meyer * This source code is licensed under both the BSD-style license (found in the
1037f1f268SConrad Meyer * LICENSE file in the root directory of this source tree) and the GPLv2 (found
1137f1f268SConrad Meyer * in the COPYING file in the root directory of this source tree).
1237f1f268SConrad Meyer * You may select, at your option, one of the above-listed licenses.
130c16b537SWarner Losh ****************************************************************** */
140c16b537SWarner Losh
150c16b537SWarner Losh
160c16b537SWarner Losh /* **************************************************************
170c16b537SWarner Losh * Includes
180c16b537SWarner Losh ****************************************************************/
19f7cd7fe5SConrad Meyer #include "debug.h" /* assert */
200c16b537SWarner Losh #include "bitstream.h"
210c16b537SWarner Losh #include "compiler.h"
220c16b537SWarner Losh #define FSE_STATIC_LINKING_ONLY
230c16b537SWarner Losh #include "fse.h"
240c16b537SWarner Losh #include "error_private.h"
25f7cd7fe5SConrad Meyer #define ZSTD_DEPS_NEED_MALLOC
26f7cd7fe5SConrad Meyer #include "zstd_deps.h"
270c16b537SWarner Losh
280c16b537SWarner Losh
290c16b537SWarner Losh /* **************************************************************
300c16b537SWarner Losh * Error Management
310c16b537SWarner Losh ****************************************************************/
320c16b537SWarner Losh #define FSE_isError ERR_isError
330f743729SConrad Meyer #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
340c16b537SWarner Losh
350c16b537SWarner Losh
360c16b537SWarner Losh /* **************************************************************
370c16b537SWarner Losh * Templates
380c16b537SWarner Losh ****************************************************************/
390c16b537SWarner Losh /*
400c16b537SWarner Losh designed to be included
410c16b537SWarner Losh for type-specific functions (template emulation in C)
420c16b537SWarner Losh Objective is to write these functions only once, for improved maintenance
430c16b537SWarner Losh */
440c16b537SWarner Losh
450c16b537SWarner Losh /* safety checks */
460c16b537SWarner Losh #ifndef FSE_FUNCTION_EXTENSION
470c16b537SWarner Losh # error "FSE_FUNCTION_EXTENSION must be defined"
480c16b537SWarner Losh #endif
490c16b537SWarner Losh #ifndef FSE_FUNCTION_TYPE
500c16b537SWarner Losh # error "FSE_FUNCTION_TYPE must be defined"
510c16b537SWarner Losh #endif
520c16b537SWarner Losh
530c16b537SWarner Losh /* Function names */
540c16b537SWarner Losh #define FSE_CAT(X,Y) X##Y
550c16b537SWarner Losh #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
560c16b537SWarner Losh #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
570c16b537SWarner Losh
580c16b537SWarner Losh
590c16b537SWarner Losh /* Function templates */
FSE_createDTable(unsigned tableLog)600c16b537SWarner Losh FSE_DTable* FSE_createDTable (unsigned tableLog)
610c16b537SWarner Losh {
620c16b537SWarner Losh if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
63f7cd7fe5SConrad Meyer return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
640c16b537SWarner Losh }
650c16b537SWarner Losh
FSE_freeDTable(FSE_DTable * dt)660c16b537SWarner Losh void FSE_freeDTable (FSE_DTable* dt)
670c16b537SWarner Losh {
68f7cd7fe5SConrad Meyer ZSTD_free(dt);
690c16b537SWarner Losh }
700c16b537SWarner Losh
FSE_buildDTable_internal(FSE_DTable * dt,const short * normalizedCounter,unsigned maxSymbolValue,unsigned tableLog,void * workSpace,size_t wkspSize)71f7cd7fe5SConrad Meyer static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
720c16b537SWarner Losh {
730c16b537SWarner Losh void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
740c16b537SWarner Losh FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
75f7cd7fe5SConrad Meyer U16* symbolNext = (U16*)workSpace;
76f7cd7fe5SConrad Meyer BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
770c16b537SWarner Losh
780c16b537SWarner Losh U32 const maxSV1 = maxSymbolValue + 1;
790c16b537SWarner Losh U32 const tableSize = 1 << tableLog;
800c16b537SWarner Losh U32 highThreshold = tableSize-1;
810c16b537SWarner Losh
820c16b537SWarner Losh /* Sanity Checks */
83f7cd7fe5SConrad Meyer if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
840c16b537SWarner Losh if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
850c16b537SWarner Losh if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
860c16b537SWarner Losh
870c16b537SWarner Losh /* Init, lay down lowprob symbols */
880c16b537SWarner Losh { FSE_DTableHeader DTableH;
890c16b537SWarner Losh DTableH.tableLog = (U16)tableLog;
900c16b537SWarner Losh DTableH.fastMode = 1;
910c16b537SWarner Losh { S16 const largeLimit= (S16)(1 << (tableLog-1));
920c16b537SWarner Losh U32 s;
930c16b537SWarner Losh for (s=0; s<maxSV1; s++) {
940c16b537SWarner Losh if (normalizedCounter[s]==-1) {
950c16b537SWarner Losh tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
960c16b537SWarner Losh symbolNext[s] = 1;
970c16b537SWarner Losh } else {
980c16b537SWarner Losh if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
990c16b537SWarner Losh symbolNext[s] = normalizedCounter[s];
1000c16b537SWarner Losh } } }
101f7cd7fe5SConrad Meyer ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
1020c16b537SWarner Losh }
1030c16b537SWarner Losh
1040c16b537SWarner Losh /* Spread symbols */
105f7cd7fe5SConrad Meyer if (highThreshold == tableSize - 1) {
106f7cd7fe5SConrad Meyer size_t const tableMask = tableSize-1;
107f7cd7fe5SConrad Meyer size_t const step = FSE_TABLESTEP(tableSize);
108f7cd7fe5SConrad Meyer /* First lay down the symbols in order.
109f7cd7fe5SConrad Meyer * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
110f7cd7fe5SConrad Meyer * misses since small blocks generally have small table logs, so nearly
111f7cd7fe5SConrad Meyer * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
112f7cd7fe5SConrad Meyer * our buffer to handle the over-write.
113f7cd7fe5SConrad Meyer */
114f7cd7fe5SConrad Meyer {
115f7cd7fe5SConrad Meyer U64 const add = 0x0101010101010101ull;
116f7cd7fe5SConrad Meyer size_t pos = 0;
117f7cd7fe5SConrad Meyer U64 sv = 0;
118f7cd7fe5SConrad Meyer U32 s;
119f7cd7fe5SConrad Meyer for (s=0; s<maxSV1; ++s, sv += add) {
120f7cd7fe5SConrad Meyer int i;
121f7cd7fe5SConrad Meyer int const n = normalizedCounter[s];
122f7cd7fe5SConrad Meyer MEM_write64(spread + pos, sv);
123f7cd7fe5SConrad Meyer for (i = 8; i < n; i += 8) {
124f7cd7fe5SConrad Meyer MEM_write64(spread + pos + i, sv);
125f7cd7fe5SConrad Meyer }
126f7cd7fe5SConrad Meyer pos += n;
127f7cd7fe5SConrad Meyer }
128f7cd7fe5SConrad Meyer }
129f7cd7fe5SConrad Meyer /* Now we spread those positions across the table.
130f7cd7fe5SConrad Meyer * The benefit of doing it in two stages is that we avoid the the
131f7cd7fe5SConrad Meyer * variable size inner loop, which caused lots of branch misses.
132f7cd7fe5SConrad Meyer * Now we can run through all the positions without any branch misses.
133f7cd7fe5SConrad Meyer * We unroll the loop twice, since that is what emperically worked best.
134f7cd7fe5SConrad Meyer */
135f7cd7fe5SConrad Meyer {
136f7cd7fe5SConrad Meyer size_t position = 0;
137f7cd7fe5SConrad Meyer size_t s;
138f7cd7fe5SConrad Meyer size_t const unroll = 2;
139f7cd7fe5SConrad Meyer assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
140f7cd7fe5SConrad Meyer for (s = 0; s < (size_t)tableSize; s += unroll) {
141f7cd7fe5SConrad Meyer size_t u;
142f7cd7fe5SConrad Meyer for (u = 0; u < unroll; ++u) {
143f7cd7fe5SConrad Meyer size_t const uPosition = (position + (u * step)) & tableMask;
144f7cd7fe5SConrad Meyer tableDecode[uPosition].symbol = spread[s + u];
145f7cd7fe5SConrad Meyer }
146f7cd7fe5SConrad Meyer position = (position + (unroll * step)) & tableMask;
147f7cd7fe5SConrad Meyer }
148f7cd7fe5SConrad Meyer assert(position == 0);
149f7cd7fe5SConrad Meyer }
150f7cd7fe5SConrad Meyer } else {
151f7cd7fe5SConrad Meyer U32 const tableMask = tableSize-1;
1520c16b537SWarner Losh U32 const step = FSE_TABLESTEP(tableSize);
1530c16b537SWarner Losh U32 s, position = 0;
1540c16b537SWarner Losh for (s=0; s<maxSV1; s++) {
1550c16b537SWarner Losh int i;
1560c16b537SWarner Losh for (i=0; i<normalizedCounter[s]; i++) {
1570c16b537SWarner Losh tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
1580c16b537SWarner Losh position = (position + step) & tableMask;
1590c16b537SWarner Losh while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
1600c16b537SWarner Losh } }
1610c16b537SWarner Losh if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
1620c16b537SWarner Losh }
1630c16b537SWarner Losh
1640c16b537SWarner Losh /* Build Decoding table */
1650c16b537SWarner Losh { U32 u;
1660c16b537SWarner Losh for (u=0; u<tableSize; u++) {
1670c16b537SWarner Losh FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
16819fcbaf1SConrad Meyer U32 const nextState = symbolNext[symbol]++;
16919fcbaf1SConrad Meyer tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
1700c16b537SWarner Losh tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
1710c16b537SWarner Losh } }
1720c16b537SWarner Losh
1730c16b537SWarner Losh return 0;
1740c16b537SWarner Losh }
1750c16b537SWarner Losh
FSE_buildDTable_wksp(FSE_DTable * dt,const short * normalizedCounter,unsigned maxSymbolValue,unsigned tableLog,void * workSpace,size_t wkspSize)176f7cd7fe5SConrad Meyer size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
177f7cd7fe5SConrad Meyer {
178f7cd7fe5SConrad Meyer return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
179f7cd7fe5SConrad Meyer }
180f7cd7fe5SConrad Meyer
1810c16b537SWarner Losh
1820c16b537SWarner Losh #ifndef FSE_COMMONDEFS_ONLY
1830c16b537SWarner Losh
1840c16b537SWarner Losh /*-*******************************************************
1850c16b537SWarner Losh * Decompression (Byte symbols)
1860c16b537SWarner Losh *********************************************************/
FSE_buildDTable_rle(FSE_DTable * dt,BYTE symbolValue)1870c16b537SWarner Losh size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
1880c16b537SWarner Losh {
1890c16b537SWarner Losh void* ptr = dt;
1900c16b537SWarner Losh FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
1910c16b537SWarner Losh void* dPtr = dt + 1;
1920c16b537SWarner Losh FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
1930c16b537SWarner Losh
1940c16b537SWarner Losh DTableH->tableLog = 0;
1950c16b537SWarner Losh DTableH->fastMode = 0;
1960c16b537SWarner Losh
1970c16b537SWarner Losh cell->newState = 0;
1980c16b537SWarner Losh cell->symbol = symbolValue;
1990c16b537SWarner Losh cell->nbBits = 0;
2000c16b537SWarner Losh
2010c16b537SWarner Losh return 0;
2020c16b537SWarner Losh }
2030c16b537SWarner Losh
2040c16b537SWarner Losh
FSE_buildDTable_raw(FSE_DTable * dt,unsigned nbBits)2050c16b537SWarner Losh size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
2060c16b537SWarner Losh {
2070c16b537SWarner Losh void* ptr = dt;
2080c16b537SWarner Losh FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
2090c16b537SWarner Losh void* dPtr = dt + 1;
2100c16b537SWarner Losh FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
2110c16b537SWarner Losh const unsigned tableSize = 1 << nbBits;
2120c16b537SWarner Losh const unsigned tableMask = tableSize - 1;
2130c16b537SWarner Losh const unsigned maxSV1 = tableMask+1;
2140c16b537SWarner Losh unsigned s;
2150c16b537SWarner Losh
2160c16b537SWarner Losh /* Sanity checks */
2170c16b537SWarner Losh if (nbBits < 1) return ERROR(GENERIC); /* min size */
2180c16b537SWarner Losh
2190c16b537SWarner Losh /* Build Decoding Table */
2200c16b537SWarner Losh DTableH->tableLog = (U16)nbBits;
2210c16b537SWarner Losh DTableH->fastMode = 1;
2220c16b537SWarner Losh for (s=0; s<maxSV1; s++) {
2230c16b537SWarner Losh dinfo[s].newState = 0;
2240c16b537SWarner Losh dinfo[s].symbol = (BYTE)s;
2250c16b537SWarner Losh dinfo[s].nbBits = (BYTE)nbBits;
2260c16b537SWarner Losh }
2270c16b537SWarner Losh
2280c16b537SWarner Losh return 0;
2290c16b537SWarner Losh }
2300c16b537SWarner Losh
FSE_decompress_usingDTable_generic(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const FSE_DTable * dt,const unsigned fast)2310c16b537SWarner Losh FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
2320c16b537SWarner Losh void* dst, size_t maxDstSize,
2330c16b537SWarner Losh const void* cSrc, size_t cSrcSize,
2340c16b537SWarner Losh const FSE_DTable* dt, const unsigned fast)
2350c16b537SWarner Losh {
2360c16b537SWarner Losh BYTE* const ostart = (BYTE*) dst;
2370c16b537SWarner Losh BYTE* op = ostart;
2380c16b537SWarner Losh BYTE* const omax = op + maxDstSize;
2390c16b537SWarner Losh BYTE* const olimit = omax-3;
2400c16b537SWarner Losh
2410c16b537SWarner Losh BIT_DStream_t bitD;
2420c16b537SWarner Losh FSE_DState_t state1;
2430c16b537SWarner Losh FSE_DState_t state2;
2440c16b537SWarner Losh
2450c16b537SWarner Losh /* Init */
2460c16b537SWarner Losh CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
2470c16b537SWarner Losh
2480c16b537SWarner Losh FSE_initDState(&state1, &bitD, dt);
2490c16b537SWarner Losh FSE_initDState(&state2, &bitD, dt);
2500c16b537SWarner Losh
2510c16b537SWarner Losh #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
2520c16b537SWarner Losh
2530c16b537SWarner Losh /* 4 symbols per loop */
2540c16b537SWarner Losh for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
2550c16b537SWarner Losh op[0] = FSE_GETSYMBOL(&state1);
2560c16b537SWarner Losh
2570c16b537SWarner Losh if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
2580c16b537SWarner Losh BIT_reloadDStream(&bitD);
2590c16b537SWarner Losh
2600c16b537SWarner Losh op[1] = FSE_GETSYMBOL(&state2);
2610c16b537SWarner Losh
2620c16b537SWarner Losh if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
2630c16b537SWarner Losh { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
2640c16b537SWarner Losh
2650c16b537SWarner Losh op[2] = FSE_GETSYMBOL(&state1);
2660c16b537SWarner Losh
2670c16b537SWarner Losh if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
2680c16b537SWarner Losh BIT_reloadDStream(&bitD);
2690c16b537SWarner Losh
2700c16b537SWarner Losh op[3] = FSE_GETSYMBOL(&state2);
2710c16b537SWarner Losh }
2720c16b537SWarner Losh
2730c16b537SWarner Losh /* tail */
2740c16b537SWarner Losh /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
2750c16b537SWarner Losh while (1) {
2760c16b537SWarner Losh if (op>(omax-2)) return ERROR(dstSize_tooSmall);
2770c16b537SWarner Losh *op++ = FSE_GETSYMBOL(&state1);
2780c16b537SWarner Losh if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
2790c16b537SWarner Losh *op++ = FSE_GETSYMBOL(&state2);
2800c16b537SWarner Losh break;
2810c16b537SWarner Losh }
2820c16b537SWarner Losh
2830c16b537SWarner Losh if (op>(omax-2)) return ERROR(dstSize_tooSmall);
2840c16b537SWarner Losh *op++ = FSE_GETSYMBOL(&state2);
2850c16b537SWarner Losh if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
2860c16b537SWarner Losh *op++ = FSE_GETSYMBOL(&state1);
2870c16b537SWarner Losh break;
2880c16b537SWarner Losh } }
2890c16b537SWarner Losh
2900c16b537SWarner Losh return op-ostart;
2910c16b537SWarner Losh }
2920c16b537SWarner Losh
2930c16b537SWarner Losh
FSE_decompress_usingDTable(void * dst,size_t originalSize,const void * cSrc,size_t cSrcSize,const FSE_DTable * dt)2940c16b537SWarner Losh size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
2950c16b537SWarner Losh const void* cSrc, size_t cSrcSize,
2960c16b537SWarner Losh const FSE_DTable* dt)
2970c16b537SWarner Losh {
2980c16b537SWarner Losh const void* ptr = dt;
2990c16b537SWarner Losh const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
3000c16b537SWarner Losh const U32 fastMode = DTableH->fastMode;
3010c16b537SWarner Losh
3020c16b537SWarner Losh /* select fast mode (static) */
3030c16b537SWarner Losh if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
3040c16b537SWarner Losh return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
3050c16b537SWarner Losh }
3060c16b537SWarner Losh
3070c16b537SWarner Losh
FSE_decompress_wksp(void * dst,size_t dstCapacity,const void * cSrc,size_t cSrcSize,unsigned maxLog,void * workSpace,size_t wkspSize)308f7cd7fe5SConrad Meyer size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
309f7cd7fe5SConrad Meyer {
310f7cd7fe5SConrad Meyer return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
311f7cd7fe5SConrad Meyer }
312f7cd7fe5SConrad Meyer
313*5ff13fbcSAllan Jude typedef struct {
314*5ff13fbcSAllan Jude short ncount[FSE_MAX_SYMBOL_VALUE + 1];
315*5ff13fbcSAllan Jude FSE_DTable dtable[1]; /* Dynamically sized */
316*5ff13fbcSAllan Jude } FSE_DecompressWksp;
317*5ff13fbcSAllan Jude
318*5ff13fbcSAllan Jude
FSE_decompress_wksp_body(void * dst,size_t dstCapacity,const void * cSrc,size_t cSrcSize,unsigned maxLog,void * workSpace,size_t wkspSize,int bmi2)319f7cd7fe5SConrad Meyer FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
320f7cd7fe5SConrad Meyer void* dst, size_t dstCapacity,
321f7cd7fe5SConrad Meyer const void* cSrc, size_t cSrcSize,
322f7cd7fe5SConrad Meyer unsigned maxLog, void* workSpace, size_t wkspSize,
323f7cd7fe5SConrad Meyer int bmi2)
3240c16b537SWarner Losh {
3250c16b537SWarner Losh const BYTE* const istart = (const BYTE*)cSrc;
3260c16b537SWarner Losh const BYTE* ip = istart;
3270c16b537SWarner Losh unsigned tableLog;
3280c16b537SWarner Losh unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
329*5ff13fbcSAllan Jude FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
330*5ff13fbcSAllan Jude
331*5ff13fbcSAllan Jude DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
332*5ff13fbcSAllan Jude if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
3330c16b537SWarner Losh
3340c16b537SWarner Losh /* normal FSE decoding mode */
335*5ff13fbcSAllan Jude {
336*5ff13fbcSAllan Jude size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
3370c16b537SWarner Losh if (FSE_isError(NCountLength)) return NCountLength;
3380c16b537SWarner Losh if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
339f7cd7fe5SConrad Meyer assert(NCountLength <= cSrcSize);
3400c16b537SWarner Losh ip += NCountLength;
3410c16b537SWarner Losh cSrcSize -= NCountLength;
342*5ff13fbcSAllan Jude }
3430c16b537SWarner Losh
344f7cd7fe5SConrad Meyer if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
345*5ff13fbcSAllan Jude workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
346*5ff13fbcSAllan Jude wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
3470c16b537SWarner Losh
348*5ff13fbcSAllan Jude CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
349f7cd7fe5SConrad Meyer
350f7cd7fe5SConrad Meyer {
351*5ff13fbcSAllan Jude const void* ptr = wksp->dtable;
352f7cd7fe5SConrad Meyer const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
353f7cd7fe5SConrad Meyer const U32 fastMode = DTableH->fastMode;
354f7cd7fe5SConrad Meyer
355f7cd7fe5SConrad Meyer /* select fast mode (static) */
356*5ff13fbcSAllan Jude if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
357*5ff13fbcSAllan Jude return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
358f7cd7fe5SConrad Meyer }
359f7cd7fe5SConrad Meyer }
360f7cd7fe5SConrad Meyer
361f7cd7fe5SConrad Meyer /* Avoids the FORCE_INLINE of the _body() function. */
FSE_decompress_wksp_body_default(void * dst,size_t dstCapacity,const void * cSrc,size_t cSrcSize,unsigned maxLog,void * workSpace,size_t wkspSize)362f7cd7fe5SConrad Meyer static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
363f7cd7fe5SConrad Meyer {
364f7cd7fe5SConrad Meyer return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
365f7cd7fe5SConrad Meyer }
366f7cd7fe5SConrad Meyer
367f7cd7fe5SConrad Meyer #if DYNAMIC_BMI2
FSE_decompress_wksp_body_bmi2(void * dst,size_t dstCapacity,const void * cSrc,size_t cSrcSize,unsigned maxLog,void * workSpace,size_t wkspSize)368*5ff13fbcSAllan Jude BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
369f7cd7fe5SConrad Meyer {
370f7cd7fe5SConrad Meyer return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
371f7cd7fe5SConrad Meyer }
372f7cd7fe5SConrad Meyer #endif
373f7cd7fe5SConrad Meyer
FSE_decompress_wksp_bmi2(void * dst,size_t dstCapacity,const void * cSrc,size_t cSrcSize,unsigned maxLog,void * workSpace,size_t wkspSize,int bmi2)374f7cd7fe5SConrad Meyer size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
375f7cd7fe5SConrad Meyer {
376f7cd7fe5SConrad Meyer #if DYNAMIC_BMI2
377f7cd7fe5SConrad Meyer if (bmi2) {
378f7cd7fe5SConrad Meyer return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
379f7cd7fe5SConrad Meyer }
380f7cd7fe5SConrad Meyer #endif
381f7cd7fe5SConrad Meyer (void)bmi2;
382f7cd7fe5SConrad Meyer return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
3830c16b537SWarner Losh }
3840c16b537SWarner Losh
3850c16b537SWarner Losh
3860c16b537SWarner Losh typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
3870c16b537SWarner Losh
388f7cd7fe5SConrad Meyer #ifndef ZSTD_NO_UNUSED_FUNCTIONS
FSE_buildDTable(FSE_DTable * dt,const short * normalizedCounter,unsigned maxSymbolValue,unsigned tableLog)389f7cd7fe5SConrad Meyer size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
390f7cd7fe5SConrad Meyer U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
391f7cd7fe5SConrad Meyer return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
3920c16b537SWarner Losh }
3930c16b537SWarner Losh
FSE_decompress(void * dst,size_t dstCapacity,const void * cSrc,size_t cSrcSize)394f7cd7fe5SConrad Meyer size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
395f7cd7fe5SConrad Meyer {
396f7cd7fe5SConrad Meyer /* Static analyzer seems unable to understand this table will be properly initialized later */
397f7cd7fe5SConrad Meyer U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
398f7cd7fe5SConrad Meyer return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
399f7cd7fe5SConrad Meyer }
400f7cd7fe5SConrad Meyer #endif
4010c16b537SWarner Losh
4020c16b537SWarner Losh
4030c16b537SWarner Losh #endif /* FSE_COMMONDEFS_ONLY */
404