xref: /freebsd/sys/contrib/zstd/lib/legacy/zstd_v01.c (revision 0c16b53773565120a8f80a31a0af2ef56ccd368e)
1*0c16b537SWarner Losh /*
2*0c16b537SWarner Losh  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3*0c16b537SWarner Losh  * All rights reserved.
4*0c16b537SWarner Losh  *
5*0c16b537SWarner Losh  * This source code is licensed under both the BSD-style license (found in the
6*0c16b537SWarner Losh  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*0c16b537SWarner Losh  * in the COPYING file in the root directory of this source tree).
8*0c16b537SWarner Losh  * You may select, at your option, one of the above-listed licenses.
9*0c16b537SWarner Losh  */
10*0c16b537SWarner Losh 
11*0c16b537SWarner Losh 
12*0c16b537SWarner Losh /******************************************
13*0c16b537SWarner Losh *  Includes
14*0c16b537SWarner Losh ******************************************/
15*0c16b537SWarner Losh #include <stddef.h>    /* size_t, ptrdiff_t */
16*0c16b537SWarner Losh #include "zstd_v01.h"
17*0c16b537SWarner Losh #include "error_private.h"
18*0c16b537SWarner Losh 
19*0c16b537SWarner Losh 
20*0c16b537SWarner Losh /******************************************
21*0c16b537SWarner Losh *  Static allocation
22*0c16b537SWarner Losh ******************************************/
23*0c16b537SWarner Losh /* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
24*0c16b537SWarner Losh #define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
25*0c16b537SWarner Losh 
26*0c16b537SWarner Losh /* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
27*0c16b537SWarner Losh #define HUF_DTABLE_SIZE_U16(maxTableLog)   (1 + (1<<maxTableLog))
28*0c16b537SWarner Losh #define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
29*0c16b537SWarner Losh         unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
30*0c16b537SWarner Losh 
31*0c16b537SWarner Losh 
32*0c16b537SWarner Losh /******************************************
33*0c16b537SWarner Losh *  Error Management
34*0c16b537SWarner Losh ******************************************/
35*0c16b537SWarner Losh #define FSE_LIST_ERRORS(ITEM) \
36*0c16b537SWarner Losh         ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
37*0c16b537SWarner Losh         ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
38*0c16b537SWarner Losh         ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
39*0c16b537SWarner Losh         ITEM(FSE_ERROR_corruptionDetected) \
40*0c16b537SWarner Losh         ITEM(FSE_ERROR_maxCode)
41*0c16b537SWarner Losh 
42*0c16b537SWarner Losh #define FSE_GENERATE_ENUM(ENUM) ENUM,
43*0c16b537SWarner Losh typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
44*0c16b537SWarner Losh 
45*0c16b537SWarner Losh 
46*0c16b537SWarner Losh /******************************************
47*0c16b537SWarner Losh *  FSE symbol compression API
48*0c16b537SWarner Losh ******************************************/
49*0c16b537SWarner Losh /*
50*0c16b537SWarner Losh    This API consists of small unitary functions, which highly benefit from being inlined.
51*0c16b537SWarner Losh    You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
52*0c16b537SWarner Losh    Visual seems to do it automatically.
53*0c16b537SWarner Losh    For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
54*0c16b537SWarner Losh    If none of these solutions is applicable, include "fse.c" directly.
55*0c16b537SWarner Losh */
56*0c16b537SWarner Losh 
57*0c16b537SWarner Losh typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
58*0c16b537SWarner Losh typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
59*0c16b537SWarner Losh 
60*0c16b537SWarner Losh typedef struct
61*0c16b537SWarner Losh {
62*0c16b537SWarner Losh     size_t bitContainer;
63*0c16b537SWarner Losh     int    bitPos;
64*0c16b537SWarner Losh     char*  startPtr;
65*0c16b537SWarner Losh     char*  ptr;
66*0c16b537SWarner Losh     char*  endPtr;
67*0c16b537SWarner Losh } FSE_CStream_t;
68*0c16b537SWarner Losh 
69*0c16b537SWarner Losh typedef struct
70*0c16b537SWarner Losh {
71*0c16b537SWarner Losh     ptrdiff_t   value;
72*0c16b537SWarner Losh     const void* stateTable;
73*0c16b537SWarner Losh     const void* symbolTT;
74*0c16b537SWarner Losh     unsigned    stateLog;
75*0c16b537SWarner Losh } FSE_CState_t;
76*0c16b537SWarner Losh 
77*0c16b537SWarner Losh typedef struct
78*0c16b537SWarner Losh {
79*0c16b537SWarner Losh     size_t   bitContainer;
80*0c16b537SWarner Losh     unsigned bitsConsumed;
81*0c16b537SWarner Losh     const char* ptr;
82*0c16b537SWarner Losh     const char* start;
83*0c16b537SWarner Losh } FSE_DStream_t;
84*0c16b537SWarner Losh 
85*0c16b537SWarner Losh typedef struct
86*0c16b537SWarner Losh {
87*0c16b537SWarner Losh     size_t      state;
88*0c16b537SWarner Losh     const void* table;   /* precise table may vary, depending on U16 */
89*0c16b537SWarner Losh } FSE_DState_t;
90*0c16b537SWarner Losh 
91*0c16b537SWarner Losh typedef enum { FSE_DStream_unfinished = 0,
92*0c16b537SWarner Losh                FSE_DStream_endOfBuffer = 1,
93*0c16b537SWarner Losh                FSE_DStream_completed = 2,
94*0c16b537SWarner Losh                FSE_DStream_tooFar = 3 } FSE_DStream_status;  /* result of FSE_reloadDStream() */
95*0c16b537SWarner Losh                /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
96*0c16b537SWarner Losh 
97*0c16b537SWarner Losh 
98*0c16b537SWarner Losh /****************************************************************
99*0c16b537SWarner Losh *  Tuning parameters
100*0c16b537SWarner Losh ****************************************************************/
101*0c16b537SWarner Losh /* MEMORY_USAGE :
102*0c16b537SWarner Losh *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
103*0c16b537SWarner Losh *  Increasing memory usage improves compression ratio
104*0c16b537SWarner Losh *  Reduced memory usage can improve speed, due to cache effect
105*0c16b537SWarner Losh *  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
106*0c16b537SWarner Losh #define FSE_MAX_MEMORY_USAGE 14
107*0c16b537SWarner Losh #define FSE_DEFAULT_MEMORY_USAGE 13
108*0c16b537SWarner Losh 
109*0c16b537SWarner Losh /* FSE_MAX_SYMBOL_VALUE :
110*0c16b537SWarner Losh *  Maximum symbol value authorized.
111*0c16b537SWarner Losh *  Required for proper stack allocation */
112*0c16b537SWarner Losh #define FSE_MAX_SYMBOL_VALUE 255
113*0c16b537SWarner Losh 
114*0c16b537SWarner Losh 
115*0c16b537SWarner Losh /****************************************************************
116*0c16b537SWarner Losh *  template functions type & suffix
117*0c16b537SWarner Losh ****************************************************************/
118*0c16b537SWarner Losh #define FSE_FUNCTION_TYPE BYTE
119*0c16b537SWarner Losh #define FSE_FUNCTION_EXTENSION
120*0c16b537SWarner Losh 
121*0c16b537SWarner Losh 
122*0c16b537SWarner Losh /****************************************************************
123*0c16b537SWarner Losh *  Byte symbol type
124*0c16b537SWarner Losh ****************************************************************/
125*0c16b537SWarner Losh typedef struct
126*0c16b537SWarner Losh {
127*0c16b537SWarner Losh     unsigned short newState;
128*0c16b537SWarner Losh     unsigned char  symbol;
129*0c16b537SWarner Losh     unsigned char  nbBits;
130*0c16b537SWarner Losh } FSE_decode_t;   /* size == U32 */
131*0c16b537SWarner Losh 
132*0c16b537SWarner Losh 
133*0c16b537SWarner Losh 
134*0c16b537SWarner Losh /****************************************************************
135*0c16b537SWarner Losh *  Compiler specifics
136*0c16b537SWarner Losh ****************************************************************/
137*0c16b537SWarner Losh #ifdef _MSC_VER    /* Visual Studio */
138*0c16b537SWarner Losh #  define FORCE_INLINE static __forceinline
139*0c16b537SWarner Losh #  include <intrin.h>                    /* For Visual 2005 */
140*0c16b537SWarner Losh #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
141*0c16b537SWarner Losh #  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
142*0c16b537SWarner Losh #else
143*0c16b537SWarner Losh #  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
144*0c16b537SWarner Losh #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
145*0c16b537SWarner Losh #    ifdef __GNUC__
146*0c16b537SWarner Losh #      define FORCE_INLINE static inline __attribute__((always_inline))
147*0c16b537SWarner Losh #    else
148*0c16b537SWarner Losh #      define FORCE_INLINE static inline
149*0c16b537SWarner Losh #    endif
150*0c16b537SWarner Losh #  else
151*0c16b537SWarner Losh #    define FORCE_INLINE static
152*0c16b537SWarner Losh #  endif /* __STDC_VERSION__ */
153*0c16b537SWarner Losh #endif
154*0c16b537SWarner Losh 
155*0c16b537SWarner Losh 
156*0c16b537SWarner Losh /****************************************************************
157*0c16b537SWarner Losh *  Includes
158*0c16b537SWarner Losh ****************************************************************/
159*0c16b537SWarner Losh #include <stdlib.h>     /* malloc, free, qsort */
160*0c16b537SWarner Losh #include <string.h>     /* memcpy, memset */
161*0c16b537SWarner Losh #include <stdio.h>      /* printf (debug) */
162*0c16b537SWarner Losh 
163*0c16b537SWarner Losh 
164*0c16b537SWarner Losh #ifndef MEM_ACCESS_MODULE
165*0c16b537SWarner Losh #define MEM_ACCESS_MODULE
166*0c16b537SWarner Losh /****************************************************************
167*0c16b537SWarner Losh *  Basic Types
168*0c16b537SWarner Losh *****************************************************************/
169*0c16b537SWarner Losh #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
170*0c16b537SWarner Losh # include <stdint.h>
171*0c16b537SWarner Losh typedef  uint8_t BYTE;
172*0c16b537SWarner Losh typedef uint16_t U16;
173*0c16b537SWarner Losh typedef  int16_t S16;
174*0c16b537SWarner Losh typedef uint32_t U32;
175*0c16b537SWarner Losh typedef  int32_t S32;
176*0c16b537SWarner Losh typedef uint64_t U64;
177*0c16b537SWarner Losh typedef  int64_t S64;
178*0c16b537SWarner Losh #else
179*0c16b537SWarner Losh typedef unsigned char       BYTE;
180*0c16b537SWarner Losh typedef unsigned short      U16;
181*0c16b537SWarner Losh typedef   signed short      S16;
182*0c16b537SWarner Losh typedef unsigned int        U32;
183*0c16b537SWarner Losh typedef   signed int        S32;
184*0c16b537SWarner Losh typedef unsigned long long  U64;
185*0c16b537SWarner Losh typedef   signed long long  S64;
186*0c16b537SWarner Losh #endif
187*0c16b537SWarner Losh 
188*0c16b537SWarner Losh #endif   /* MEM_ACCESS_MODULE */
189*0c16b537SWarner Losh 
190*0c16b537SWarner Losh /****************************************************************
191*0c16b537SWarner Losh *  Memory I/O
192*0c16b537SWarner Losh *****************************************************************/
193*0c16b537SWarner Losh /* FSE_FORCE_MEMORY_ACCESS
194*0c16b537SWarner Losh  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
195*0c16b537SWarner Losh  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
196*0c16b537SWarner Losh  * The below switch allow to select different access method for improved performance.
197*0c16b537SWarner Losh  * Method 0 (default) : use `memcpy()`. Safe and portable.
198*0c16b537SWarner Losh  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
199*0c16b537SWarner Losh  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
200*0c16b537SWarner Losh  * Method 2 : direct access. This method is portable but violate C standard.
201*0c16b537SWarner Losh  *            It can generate buggy code on targets generating assembly depending on alignment.
202*0c16b537SWarner Losh  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
203*0c16b537SWarner Losh  * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
204*0c16b537SWarner Losh  * Prefer these methods in priority order (0 > 1 > 2)
205*0c16b537SWarner Losh  */
206*0c16b537SWarner Losh #ifndef FSE_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
207*0c16b537SWarner Losh #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
208*0c16b537SWarner Losh #    define FSE_FORCE_MEMORY_ACCESS 2
209*0c16b537SWarner Losh #  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
210*0c16b537SWarner Losh   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
211*0c16b537SWarner Losh #    define FSE_FORCE_MEMORY_ACCESS 1
212*0c16b537SWarner Losh #  endif
213*0c16b537SWarner Losh #endif
214*0c16b537SWarner Losh 
215*0c16b537SWarner Losh 
216*0c16b537SWarner Losh static unsigned FSE_32bits(void)
217*0c16b537SWarner Losh {
218*0c16b537SWarner Losh     return sizeof(void*)==4;
219*0c16b537SWarner Losh }
220*0c16b537SWarner Losh 
221*0c16b537SWarner Losh static unsigned FSE_isLittleEndian(void)
222*0c16b537SWarner Losh {
223*0c16b537SWarner Losh     const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
224*0c16b537SWarner Losh     return one.c[0];
225*0c16b537SWarner Losh }
226*0c16b537SWarner Losh 
227*0c16b537SWarner Losh #if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
228*0c16b537SWarner Losh 
229*0c16b537SWarner Losh static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
230*0c16b537SWarner Losh static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
231*0c16b537SWarner Losh static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
232*0c16b537SWarner Losh 
233*0c16b537SWarner Losh #elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
234*0c16b537SWarner Losh 
235*0c16b537SWarner Losh /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
236*0c16b537SWarner Losh /* currently only defined for gcc and icc */
237*0c16b537SWarner Losh typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
238*0c16b537SWarner Losh 
239*0c16b537SWarner Losh static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
240*0c16b537SWarner Losh static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
241*0c16b537SWarner Losh static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
242*0c16b537SWarner Losh 
243*0c16b537SWarner Losh #else
244*0c16b537SWarner Losh 
245*0c16b537SWarner Losh static U16 FSE_read16(const void* memPtr)
246*0c16b537SWarner Losh {
247*0c16b537SWarner Losh     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
248*0c16b537SWarner Losh }
249*0c16b537SWarner Losh 
250*0c16b537SWarner Losh static U32 FSE_read32(const void* memPtr)
251*0c16b537SWarner Losh {
252*0c16b537SWarner Losh     U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
253*0c16b537SWarner Losh }
254*0c16b537SWarner Losh 
255*0c16b537SWarner Losh static U64 FSE_read64(const void* memPtr)
256*0c16b537SWarner Losh {
257*0c16b537SWarner Losh     U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
258*0c16b537SWarner Losh }
259*0c16b537SWarner Losh 
260*0c16b537SWarner Losh #endif // FSE_FORCE_MEMORY_ACCESS
261*0c16b537SWarner Losh 
262*0c16b537SWarner Losh static U16 FSE_readLE16(const void* memPtr)
263*0c16b537SWarner Losh {
264*0c16b537SWarner Losh     if (FSE_isLittleEndian())
265*0c16b537SWarner Losh         return FSE_read16(memPtr);
266*0c16b537SWarner Losh     else
267*0c16b537SWarner Losh     {
268*0c16b537SWarner Losh         const BYTE* p = (const BYTE*)memPtr;
269*0c16b537SWarner Losh         return (U16)(p[0] + (p[1]<<8));
270*0c16b537SWarner Losh     }
271*0c16b537SWarner Losh }
272*0c16b537SWarner Losh 
273*0c16b537SWarner Losh static U32 FSE_readLE32(const void* memPtr)
274*0c16b537SWarner Losh {
275*0c16b537SWarner Losh     if (FSE_isLittleEndian())
276*0c16b537SWarner Losh         return FSE_read32(memPtr);
277*0c16b537SWarner Losh     else
278*0c16b537SWarner Losh     {
279*0c16b537SWarner Losh         const BYTE* p = (const BYTE*)memPtr;
280*0c16b537SWarner Losh         return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
281*0c16b537SWarner Losh     }
282*0c16b537SWarner Losh }
283*0c16b537SWarner Losh 
284*0c16b537SWarner Losh 
285*0c16b537SWarner Losh static U64 FSE_readLE64(const void* memPtr)
286*0c16b537SWarner Losh {
287*0c16b537SWarner Losh     if (FSE_isLittleEndian())
288*0c16b537SWarner Losh         return FSE_read64(memPtr);
289*0c16b537SWarner Losh     else
290*0c16b537SWarner Losh     {
291*0c16b537SWarner Losh         const BYTE* p = (const BYTE*)memPtr;
292*0c16b537SWarner Losh         return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
293*0c16b537SWarner Losh                      + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
294*0c16b537SWarner Losh     }
295*0c16b537SWarner Losh }
296*0c16b537SWarner Losh 
297*0c16b537SWarner Losh static size_t FSE_readLEST(const void* memPtr)
298*0c16b537SWarner Losh {
299*0c16b537SWarner Losh     if (FSE_32bits())
300*0c16b537SWarner Losh         return (size_t)FSE_readLE32(memPtr);
301*0c16b537SWarner Losh     else
302*0c16b537SWarner Losh         return (size_t)FSE_readLE64(memPtr);
303*0c16b537SWarner Losh }
304*0c16b537SWarner Losh 
305*0c16b537SWarner Losh 
306*0c16b537SWarner Losh 
307*0c16b537SWarner Losh /****************************************************************
308*0c16b537SWarner Losh *  Constants
309*0c16b537SWarner Losh *****************************************************************/
310*0c16b537SWarner Losh #define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
311*0c16b537SWarner Losh #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
312*0c16b537SWarner Losh #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
313*0c16b537SWarner Losh #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
314*0c16b537SWarner Losh #define FSE_MIN_TABLELOG 5
315*0c16b537SWarner Losh 
316*0c16b537SWarner Losh #define FSE_TABLELOG_ABSOLUTE_MAX 15
317*0c16b537SWarner Losh #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
318*0c16b537SWarner Losh #error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
319*0c16b537SWarner Losh #endif
320*0c16b537SWarner Losh 
321*0c16b537SWarner Losh 
322*0c16b537SWarner Losh /****************************************************************
323*0c16b537SWarner Losh *  Error Management
324*0c16b537SWarner Losh ****************************************************************/
325*0c16b537SWarner Losh #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
326*0c16b537SWarner Losh 
327*0c16b537SWarner Losh 
328*0c16b537SWarner Losh /****************************************************************
329*0c16b537SWarner Losh *  Complex types
330*0c16b537SWarner Losh ****************************************************************/
331*0c16b537SWarner Losh typedef struct
332*0c16b537SWarner Losh {
333*0c16b537SWarner Losh     int deltaFindState;
334*0c16b537SWarner Losh     U32 deltaNbBits;
335*0c16b537SWarner Losh } FSE_symbolCompressionTransform; /* total 8 bytes */
336*0c16b537SWarner Losh 
337*0c16b537SWarner Losh typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
338*0c16b537SWarner Losh 
339*0c16b537SWarner Losh /****************************************************************
340*0c16b537SWarner Losh *  Internal functions
341*0c16b537SWarner Losh ****************************************************************/
342*0c16b537SWarner Losh FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
343*0c16b537SWarner Losh {
344*0c16b537SWarner Losh #   if defined(_MSC_VER)   /* Visual */
345*0c16b537SWarner Losh     unsigned long r;
346*0c16b537SWarner Losh     _BitScanReverse ( &r, val );
347*0c16b537SWarner Losh     return (unsigned) r;
348*0c16b537SWarner Losh #   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
349*0c16b537SWarner Losh     return 31 - __builtin_clz (val);
350*0c16b537SWarner Losh #   else   /* Software version */
351*0c16b537SWarner Losh     static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
352*0c16b537SWarner Losh     U32 v = val;
353*0c16b537SWarner Losh     unsigned r;
354*0c16b537SWarner Losh     v |= v >> 1;
355*0c16b537SWarner Losh     v |= v >> 2;
356*0c16b537SWarner Losh     v |= v >> 4;
357*0c16b537SWarner Losh     v |= v >> 8;
358*0c16b537SWarner Losh     v |= v >> 16;
359*0c16b537SWarner Losh     r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
360*0c16b537SWarner Losh     return r;
361*0c16b537SWarner Losh #   endif
362*0c16b537SWarner Losh }
363*0c16b537SWarner Losh 
364*0c16b537SWarner Losh 
365*0c16b537SWarner Losh /****************************************************************
366*0c16b537SWarner Losh *  Templates
367*0c16b537SWarner Losh ****************************************************************/
368*0c16b537SWarner Losh /*
369*0c16b537SWarner Losh   designed to be included
370*0c16b537SWarner Losh   for type-specific functions (template emulation in C)
371*0c16b537SWarner Losh   Objective is to write these functions only once, for improved maintenance
372*0c16b537SWarner Losh */
373*0c16b537SWarner Losh 
374*0c16b537SWarner Losh /* safety checks */
375*0c16b537SWarner Losh #ifndef FSE_FUNCTION_EXTENSION
376*0c16b537SWarner Losh #  error "FSE_FUNCTION_EXTENSION must be defined"
377*0c16b537SWarner Losh #endif
378*0c16b537SWarner Losh #ifndef FSE_FUNCTION_TYPE
379*0c16b537SWarner Losh #  error "FSE_FUNCTION_TYPE must be defined"
380*0c16b537SWarner Losh #endif
381*0c16b537SWarner Losh 
382*0c16b537SWarner Losh /* Function names */
383*0c16b537SWarner Losh #define FSE_CAT(X,Y) X##Y
384*0c16b537SWarner Losh #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
385*0c16b537SWarner Losh #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
386*0c16b537SWarner Losh 
387*0c16b537SWarner Losh 
388*0c16b537SWarner Losh 
389*0c16b537SWarner Losh static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
390*0c16b537SWarner Losh 
391*0c16b537SWarner Losh #define FSE_DECODE_TYPE FSE_decode_t
392*0c16b537SWarner Losh 
393*0c16b537SWarner Losh 
394*0c16b537SWarner Losh typedef struct {
395*0c16b537SWarner Losh     U16 tableLog;
396*0c16b537SWarner Losh     U16 fastMode;
397*0c16b537SWarner Losh } FSE_DTableHeader;   /* sizeof U32 */
398*0c16b537SWarner Losh 
399*0c16b537SWarner Losh static size_t FSE_buildDTable
400*0c16b537SWarner Losh (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
401*0c16b537SWarner Losh {
402*0c16b537SWarner Losh     void* ptr = dt;
403*0c16b537SWarner Losh     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
404*0c16b537SWarner Losh     FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
405*0c16b537SWarner Losh     const U32 tableSize = 1 << tableLog;
406*0c16b537SWarner Losh     const U32 tableMask = tableSize-1;
407*0c16b537SWarner Losh     const U32 step = FSE_tableStep(tableSize);
408*0c16b537SWarner Losh     U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
409*0c16b537SWarner Losh     U32 position = 0;
410*0c16b537SWarner Losh     U32 highThreshold = tableSize-1;
411*0c16b537SWarner Losh     const S16 largeLimit= (S16)(1 << (tableLog-1));
412*0c16b537SWarner Losh     U32 noLarge = 1;
413*0c16b537SWarner Losh     U32 s;
414*0c16b537SWarner Losh 
415*0c16b537SWarner Losh     /* Sanity Checks */
416*0c16b537SWarner Losh     if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
417*0c16b537SWarner Losh     if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
418*0c16b537SWarner Losh 
419*0c16b537SWarner Losh     /* Init, lay down lowprob symbols */
420*0c16b537SWarner Losh     DTableH[0].tableLog = (U16)tableLog;
421*0c16b537SWarner Losh     for (s=0; s<=maxSymbolValue; s++)
422*0c16b537SWarner Losh     {
423*0c16b537SWarner Losh         if (normalizedCounter[s]==-1)
424*0c16b537SWarner Losh         {
425*0c16b537SWarner Losh             tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
426*0c16b537SWarner Losh             symbolNext[s] = 1;
427*0c16b537SWarner Losh         }
428*0c16b537SWarner Losh         else
429*0c16b537SWarner Losh         {
430*0c16b537SWarner Losh             if (normalizedCounter[s] >= largeLimit) noLarge=0;
431*0c16b537SWarner Losh             symbolNext[s] = normalizedCounter[s];
432*0c16b537SWarner Losh         }
433*0c16b537SWarner Losh     }
434*0c16b537SWarner Losh 
435*0c16b537SWarner Losh     /* Spread symbols */
436*0c16b537SWarner Losh     for (s=0; s<=maxSymbolValue; s++)
437*0c16b537SWarner Losh     {
438*0c16b537SWarner Losh         int i;
439*0c16b537SWarner Losh         for (i=0; i<normalizedCounter[s]; i++)
440*0c16b537SWarner Losh         {
441*0c16b537SWarner Losh             tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
442*0c16b537SWarner Losh             position = (position + step) & tableMask;
443*0c16b537SWarner Losh             while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
444*0c16b537SWarner Losh         }
445*0c16b537SWarner Losh     }
446*0c16b537SWarner Losh 
447*0c16b537SWarner Losh     if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
448*0c16b537SWarner Losh 
449*0c16b537SWarner Losh     /* Build Decoding table */
450*0c16b537SWarner Losh     {
451*0c16b537SWarner Losh         U32 i;
452*0c16b537SWarner Losh         for (i=0; i<tableSize; i++)
453*0c16b537SWarner Losh         {
454*0c16b537SWarner Losh             FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
455*0c16b537SWarner Losh             U16 nextState = symbolNext[symbol]++;
456*0c16b537SWarner Losh             tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
457*0c16b537SWarner Losh             tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
458*0c16b537SWarner Losh         }
459*0c16b537SWarner Losh     }
460*0c16b537SWarner Losh 
461*0c16b537SWarner Losh     DTableH->fastMode = (U16)noLarge;
462*0c16b537SWarner Losh     return 0;
463*0c16b537SWarner Losh }
464*0c16b537SWarner Losh 
465*0c16b537SWarner Losh 
466*0c16b537SWarner Losh /******************************************
467*0c16b537SWarner Losh *  FSE byte symbol
468*0c16b537SWarner Losh ******************************************/
469*0c16b537SWarner Losh #ifndef FSE_COMMONDEFS_ONLY
470*0c16b537SWarner Losh 
471*0c16b537SWarner Losh static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
472*0c16b537SWarner Losh 
473*0c16b537SWarner Losh static short FSE_abs(short a)
474*0c16b537SWarner Losh {
475*0c16b537SWarner Losh     return a<0? -a : a;
476*0c16b537SWarner Losh }
477*0c16b537SWarner Losh 
478*0c16b537SWarner Losh 
479*0c16b537SWarner Losh /****************************************************************
480*0c16b537SWarner Losh *  Header bitstream management
481*0c16b537SWarner Losh ****************************************************************/
482*0c16b537SWarner Losh static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
483*0c16b537SWarner Losh                  const void* headerBuffer, size_t hbSize)
484*0c16b537SWarner Losh {
485*0c16b537SWarner Losh     const BYTE* const istart = (const BYTE*) headerBuffer;
486*0c16b537SWarner Losh     const BYTE* const iend = istart + hbSize;
487*0c16b537SWarner Losh     const BYTE* ip = istart;
488*0c16b537SWarner Losh     int nbBits;
489*0c16b537SWarner Losh     int remaining;
490*0c16b537SWarner Losh     int threshold;
491*0c16b537SWarner Losh     U32 bitStream;
492*0c16b537SWarner Losh     int bitCount;
493*0c16b537SWarner Losh     unsigned charnum = 0;
494*0c16b537SWarner Losh     int previous0 = 0;
495*0c16b537SWarner Losh 
496*0c16b537SWarner Losh     if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
497*0c16b537SWarner Losh     bitStream = FSE_readLE32(ip);
498*0c16b537SWarner Losh     nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
499*0c16b537SWarner Losh     if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
500*0c16b537SWarner Losh     bitStream >>= 4;
501*0c16b537SWarner Losh     bitCount = 4;
502*0c16b537SWarner Losh     *tableLogPtr = nbBits;
503*0c16b537SWarner Losh     remaining = (1<<nbBits)+1;
504*0c16b537SWarner Losh     threshold = 1<<nbBits;
505*0c16b537SWarner Losh     nbBits++;
506*0c16b537SWarner Losh 
507*0c16b537SWarner Losh     while ((remaining>1) && (charnum<=*maxSVPtr))
508*0c16b537SWarner Losh     {
509*0c16b537SWarner Losh         if (previous0)
510*0c16b537SWarner Losh         {
511*0c16b537SWarner Losh             unsigned n0 = charnum;
512*0c16b537SWarner Losh             while ((bitStream & 0xFFFF) == 0xFFFF)
513*0c16b537SWarner Losh             {
514*0c16b537SWarner Losh                 n0+=24;
515*0c16b537SWarner Losh                 if (ip < iend-5)
516*0c16b537SWarner Losh                 {
517*0c16b537SWarner Losh                     ip+=2;
518*0c16b537SWarner Losh                     bitStream = FSE_readLE32(ip) >> bitCount;
519*0c16b537SWarner Losh                 }
520*0c16b537SWarner Losh                 else
521*0c16b537SWarner Losh                 {
522*0c16b537SWarner Losh                     bitStream >>= 16;
523*0c16b537SWarner Losh                     bitCount+=16;
524*0c16b537SWarner Losh                 }
525*0c16b537SWarner Losh             }
526*0c16b537SWarner Losh             while ((bitStream & 3) == 3)
527*0c16b537SWarner Losh             {
528*0c16b537SWarner Losh                 n0+=3;
529*0c16b537SWarner Losh                 bitStream>>=2;
530*0c16b537SWarner Losh                 bitCount+=2;
531*0c16b537SWarner Losh             }
532*0c16b537SWarner Losh             n0 += bitStream & 3;
533*0c16b537SWarner Losh             bitCount += 2;
534*0c16b537SWarner Losh             if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
535*0c16b537SWarner Losh             while (charnum < n0) normalizedCounter[charnum++] = 0;
536*0c16b537SWarner Losh             if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
537*0c16b537SWarner Losh             {
538*0c16b537SWarner Losh                 ip += bitCount>>3;
539*0c16b537SWarner Losh                 bitCount &= 7;
540*0c16b537SWarner Losh                 bitStream = FSE_readLE32(ip) >> bitCount;
541*0c16b537SWarner Losh             }
542*0c16b537SWarner Losh             else
543*0c16b537SWarner Losh                 bitStream >>= 2;
544*0c16b537SWarner Losh         }
545*0c16b537SWarner Losh         {
546*0c16b537SWarner Losh             const short max = (short)((2*threshold-1)-remaining);
547*0c16b537SWarner Losh             short count;
548*0c16b537SWarner Losh 
549*0c16b537SWarner Losh             if ((bitStream & (threshold-1)) < (U32)max)
550*0c16b537SWarner Losh             {
551*0c16b537SWarner Losh                 count = (short)(bitStream & (threshold-1));
552*0c16b537SWarner Losh                 bitCount   += nbBits-1;
553*0c16b537SWarner Losh             }
554*0c16b537SWarner Losh             else
555*0c16b537SWarner Losh             {
556*0c16b537SWarner Losh                 count = (short)(bitStream & (2*threshold-1));
557*0c16b537SWarner Losh                 if (count >= threshold) count -= max;
558*0c16b537SWarner Losh                 bitCount   += nbBits;
559*0c16b537SWarner Losh             }
560*0c16b537SWarner Losh 
561*0c16b537SWarner Losh             count--;   /* extra accuracy */
562*0c16b537SWarner Losh             remaining -= FSE_abs(count);
563*0c16b537SWarner Losh             normalizedCounter[charnum++] = count;
564*0c16b537SWarner Losh             previous0 = !count;
565*0c16b537SWarner Losh             while (remaining < threshold)
566*0c16b537SWarner Losh             {
567*0c16b537SWarner Losh                 nbBits--;
568*0c16b537SWarner Losh                 threshold >>= 1;
569*0c16b537SWarner Losh             }
570*0c16b537SWarner Losh 
571*0c16b537SWarner Losh             {
572*0c16b537SWarner Losh                 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
573*0c16b537SWarner Losh                 {
574*0c16b537SWarner Losh                     ip += bitCount>>3;
575*0c16b537SWarner Losh                     bitCount &= 7;
576*0c16b537SWarner Losh                 }
577*0c16b537SWarner Losh                 else
578*0c16b537SWarner Losh                 {
579*0c16b537SWarner Losh                     bitCount -= (int)(8 * (iend - 4 - ip));
580*0c16b537SWarner Losh                     ip = iend - 4;
581*0c16b537SWarner Losh                 }
582*0c16b537SWarner Losh                 bitStream = FSE_readLE32(ip) >> (bitCount & 31);
583*0c16b537SWarner Losh             }
584*0c16b537SWarner Losh         }
585*0c16b537SWarner Losh     }
586*0c16b537SWarner Losh     if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
587*0c16b537SWarner Losh     *maxSVPtr = charnum-1;
588*0c16b537SWarner Losh 
589*0c16b537SWarner Losh     ip += (bitCount+7)>>3;
590*0c16b537SWarner Losh     if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
591*0c16b537SWarner Losh     return ip-istart;
592*0c16b537SWarner Losh }
593*0c16b537SWarner Losh 
594*0c16b537SWarner Losh 
595*0c16b537SWarner Losh /*********************************************************
596*0c16b537SWarner Losh *  Decompression (Byte symbols)
597*0c16b537SWarner Losh *********************************************************/
598*0c16b537SWarner Losh static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
599*0c16b537SWarner Losh {
600*0c16b537SWarner Losh     void* ptr = dt;
601*0c16b537SWarner Losh     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
602*0c16b537SWarner Losh     FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
603*0c16b537SWarner Losh 
604*0c16b537SWarner Losh     DTableH->tableLog = 0;
605*0c16b537SWarner Losh     DTableH->fastMode = 0;
606*0c16b537SWarner Losh 
607*0c16b537SWarner Losh     cell->newState = 0;
608*0c16b537SWarner Losh     cell->symbol = symbolValue;
609*0c16b537SWarner Losh     cell->nbBits = 0;
610*0c16b537SWarner Losh 
611*0c16b537SWarner Losh     return 0;
612*0c16b537SWarner Losh }
613*0c16b537SWarner Losh 
614*0c16b537SWarner Losh 
615*0c16b537SWarner Losh static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
616*0c16b537SWarner Losh {
617*0c16b537SWarner Losh     void* ptr = dt;
618*0c16b537SWarner Losh     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
619*0c16b537SWarner Losh     FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
620*0c16b537SWarner Losh     const unsigned tableSize = 1 << nbBits;
621*0c16b537SWarner Losh     const unsigned tableMask = tableSize - 1;
622*0c16b537SWarner Losh     const unsigned maxSymbolValue = tableMask;
623*0c16b537SWarner Losh     unsigned s;
624*0c16b537SWarner Losh 
625*0c16b537SWarner Losh     /* Sanity checks */
626*0c16b537SWarner Losh     if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
627*0c16b537SWarner Losh 
628*0c16b537SWarner Losh     /* Build Decoding Table */
629*0c16b537SWarner Losh     DTableH->tableLog = (U16)nbBits;
630*0c16b537SWarner Losh     DTableH->fastMode = 1;
631*0c16b537SWarner Losh     for (s=0; s<=maxSymbolValue; s++)
632*0c16b537SWarner Losh     {
633*0c16b537SWarner Losh         dinfo[s].newState = 0;
634*0c16b537SWarner Losh         dinfo[s].symbol = (BYTE)s;
635*0c16b537SWarner Losh         dinfo[s].nbBits = (BYTE)nbBits;
636*0c16b537SWarner Losh     }
637*0c16b537SWarner Losh 
638*0c16b537SWarner Losh     return 0;
639*0c16b537SWarner Losh }
640*0c16b537SWarner Losh 
641*0c16b537SWarner Losh 
642*0c16b537SWarner Losh /* FSE_initDStream
643*0c16b537SWarner Losh  * Initialize a FSE_DStream_t.
644*0c16b537SWarner Losh  * srcBuffer must point at the beginning of an FSE block.
645*0c16b537SWarner Losh  * The function result is the size of the FSE_block (== srcSize).
646*0c16b537SWarner Losh  * If srcSize is too small, the function will return an errorCode;
647*0c16b537SWarner Losh  */
648*0c16b537SWarner Losh static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
649*0c16b537SWarner Losh {
650*0c16b537SWarner Losh     if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
651*0c16b537SWarner Losh 
652*0c16b537SWarner Losh     if (srcSize >=  sizeof(size_t))
653*0c16b537SWarner Losh     {
654*0c16b537SWarner Losh         U32 contain32;
655*0c16b537SWarner Losh         bitD->start = (const char*)srcBuffer;
656*0c16b537SWarner Losh         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
657*0c16b537SWarner Losh         bitD->bitContainer = FSE_readLEST(bitD->ptr);
658*0c16b537SWarner Losh         contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
659*0c16b537SWarner Losh         if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
660*0c16b537SWarner Losh         bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
661*0c16b537SWarner Losh     }
662*0c16b537SWarner Losh     else
663*0c16b537SWarner Losh     {
664*0c16b537SWarner Losh         U32 contain32;
665*0c16b537SWarner Losh         bitD->start = (const char*)srcBuffer;
666*0c16b537SWarner Losh         bitD->ptr   = bitD->start;
667*0c16b537SWarner Losh         bitD->bitContainer = *(const BYTE*)(bitD->start);
668*0c16b537SWarner Losh         switch(srcSize)
669*0c16b537SWarner Losh         {
670*0c16b537SWarner Losh             case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
671*0c16b537SWarner Losh             case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
672*0c16b537SWarner Losh             case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
673*0c16b537SWarner Losh             case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
674*0c16b537SWarner Losh             case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
675*0c16b537SWarner Losh             case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
676*0c16b537SWarner Losh             default:;
677*0c16b537SWarner Losh         }
678*0c16b537SWarner Losh         contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
679*0c16b537SWarner Losh         if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
680*0c16b537SWarner Losh         bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
681*0c16b537SWarner Losh         bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
682*0c16b537SWarner Losh     }
683*0c16b537SWarner Losh 
684*0c16b537SWarner Losh     return srcSize;
685*0c16b537SWarner Losh }
686*0c16b537SWarner Losh 
687*0c16b537SWarner Losh 
688*0c16b537SWarner Losh /*!FSE_lookBits
689*0c16b537SWarner Losh  * Provides next n bits from the bitContainer.
690*0c16b537SWarner Losh  * bitContainer is not modified (bits are still present for next read/look)
691*0c16b537SWarner Losh  * On 32-bits, maxNbBits==25
692*0c16b537SWarner Losh  * On 64-bits, maxNbBits==57
693*0c16b537SWarner Losh  * return : value extracted.
694*0c16b537SWarner Losh  */
695*0c16b537SWarner Losh static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
696*0c16b537SWarner Losh {
697*0c16b537SWarner Losh     const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
698*0c16b537SWarner Losh     return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
699*0c16b537SWarner Losh }
700*0c16b537SWarner Losh 
701*0c16b537SWarner Losh static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
702*0c16b537SWarner Losh {
703*0c16b537SWarner Losh     const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
704*0c16b537SWarner Losh     return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
705*0c16b537SWarner Losh }
706*0c16b537SWarner Losh 
707*0c16b537SWarner Losh static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
708*0c16b537SWarner Losh {
709*0c16b537SWarner Losh     bitD->bitsConsumed += nbBits;
710*0c16b537SWarner Losh }
711*0c16b537SWarner Losh 
712*0c16b537SWarner Losh 
713*0c16b537SWarner Losh /*!FSE_readBits
714*0c16b537SWarner Losh  * Read next n bits from the bitContainer.
715*0c16b537SWarner Losh  * On 32-bits, don't read more than maxNbBits==25
716*0c16b537SWarner Losh  * On 64-bits, don't read more than maxNbBits==57
717*0c16b537SWarner Losh  * Use the fast variant *only* if n >= 1.
718*0c16b537SWarner Losh  * return : value extracted.
719*0c16b537SWarner Losh  */
720*0c16b537SWarner Losh static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
721*0c16b537SWarner Losh {
722*0c16b537SWarner Losh     size_t value = FSE_lookBits(bitD, nbBits);
723*0c16b537SWarner Losh     FSE_skipBits(bitD, nbBits);
724*0c16b537SWarner Losh     return value;
725*0c16b537SWarner Losh }
726*0c16b537SWarner Losh 
727*0c16b537SWarner Losh static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
728*0c16b537SWarner Losh {
729*0c16b537SWarner Losh     size_t value = FSE_lookBitsFast(bitD, nbBits);
730*0c16b537SWarner Losh     FSE_skipBits(bitD, nbBits);
731*0c16b537SWarner Losh     return value;
732*0c16b537SWarner Losh }
733*0c16b537SWarner Losh 
734*0c16b537SWarner Losh static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
735*0c16b537SWarner Losh {
736*0c16b537SWarner Losh     if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
737*0c16b537SWarner Losh         return FSE_DStream_tooFar;
738*0c16b537SWarner Losh 
739*0c16b537SWarner Losh     if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
740*0c16b537SWarner Losh     {
741*0c16b537SWarner Losh         bitD->ptr -= bitD->bitsConsumed >> 3;
742*0c16b537SWarner Losh         bitD->bitsConsumed &= 7;
743*0c16b537SWarner Losh         bitD->bitContainer = FSE_readLEST(bitD->ptr);
744*0c16b537SWarner Losh         return FSE_DStream_unfinished;
745*0c16b537SWarner Losh     }
746*0c16b537SWarner Losh     if (bitD->ptr == bitD->start)
747*0c16b537SWarner Losh     {
748*0c16b537SWarner Losh         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
749*0c16b537SWarner Losh         return FSE_DStream_completed;
750*0c16b537SWarner Losh     }
751*0c16b537SWarner Losh     {
752*0c16b537SWarner Losh         U32 nbBytes = bitD->bitsConsumed >> 3;
753*0c16b537SWarner Losh         U32 result = FSE_DStream_unfinished;
754*0c16b537SWarner Losh         if (bitD->ptr - nbBytes < bitD->start)
755*0c16b537SWarner Losh         {
756*0c16b537SWarner Losh             nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
757*0c16b537SWarner Losh             result = FSE_DStream_endOfBuffer;
758*0c16b537SWarner Losh         }
759*0c16b537SWarner Losh         bitD->ptr -= nbBytes;
760*0c16b537SWarner Losh         bitD->bitsConsumed -= nbBytes*8;
761*0c16b537SWarner Losh         bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
762*0c16b537SWarner Losh         return result;
763*0c16b537SWarner Losh     }
764*0c16b537SWarner Losh }
765*0c16b537SWarner Losh 
766*0c16b537SWarner Losh 
767*0c16b537SWarner Losh static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
768*0c16b537SWarner Losh {
769*0c16b537SWarner Losh     const void* ptr = dt;
770*0c16b537SWarner Losh     const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
771*0c16b537SWarner Losh     DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
772*0c16b537SWarner Losh     FSE_reloadDStream(bitD);
773*0c16b537SWarner Losh     DStatePtr->table = dt + 1;
774*0c16b537SWarner Losh }
775*0c16b537SWarner Losh 
776*0c16b537SWarner Losh static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
777*0c16b537SWarner Losh {
778*0c16b537SWarner Losh     const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
779*0c16b537SWarner Losh     const U32  nbBits = DInfo.nbBits;
780*0c16b537SWarner Losh     BYTE symbol = DInfo.symbol;
781*0c16b537SWarner Losh     size_t lowBits = FSE_readBits(bitD, nbBits);
782*0c16b537SWarner Losh 
783*0c16b537SWarner Losh     DStatePtr->state = DInfo.newState + lowBits;
784*0c16b537SWarner Losh     return symbol;
785*0c16b537SWarner Losh }
786*0c16b537SWarner Losh 
787*0c16b537SWarner Losh static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
788*0c16b537SWarner Losh {
789*0c16b537SWarner Losh     const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
790*0c16b537SWarner Losh     const U32 nbBits = DInfo.nbBits;
791*0c16b537SWarner Losh     BYTE symbol = DInfo.symbol;
792*0c16b537SWarner Losh     size_t lowBits = FSE_readBitsFast(bitD, nbBits);
793*0c16b537SWarner Losh 
794*0c16b537SWarner Losh     DStatePtr->state = DInfo.newState + lowBits;
795*0c16b537SWarner Losh     return symbol;
796*0c16b537SWarner Losh }
797*0c16b537SWarner Losh 
798*0c16b537SWarner Losh /* FSE_endOfDStream
799*0c16b537SWarner Losh    Tells if bitD has reached end of bitStream or not */
800*0c16b537SWarner Losh 
801*0c16b537SWarner Losh static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
802*0c16b537SWarner Losh {
803*0c16b537SWarner Losh     return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
804*0c16b537SWarner Losh }
805*0c16b537SWarner Losh 
806*0c16b537SWarner Losh static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
807*0c16b537SWarner Losh {
808*0c16b537SWarner Losh     return DStatePtr->state == 0;
809*0c16b537SWarner Losh }
810*0c16b537SWarner Losh 
811*0c16b537SWarner Losh 
812*0c16b537SWarner Losh FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
813*0c16b537SWarner Losh           void* dst, size_t maxDstSize,
814*0c16b537SWarner Losh     const void* cSrc, size_t cSrcSize,
815*0c16b537SWarner Losh     const FSE_DTable* dt, const unsigned fast)
816*0c16b537SWarner Losh {
817*0c16b537SWarner Losh     BYTE* const ostart = (BYTE*) dst;
818*0c16b537SWarner Losh     BYTE* op = ostart;
819*0c16b537SWarner Losh     BYTE* const omax = op + maxDstSize;
820*0c16b537SWarner Losh     BYTE* const olimit = omax-3;
821*0c16b537SWarner Losh 
822*0c16b537SWarner Losh     FSE_DStream_t bitD;
823*0c16b537SWarner Losh     FSE_DState_t state1;
824*0c16b537SWarner Losh     FSE_DState_t state2;
825*0c16b537SWarner Losh     size_t errorCode;
826*0c16b537SWarner Losh 
827*0c16b537SWarner Losh     /* Init */
828*0c16b537SWarner Losh     errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
829*0c16b537SWarner Losh     if (FSE_isError(errorCode)) return errorCode;
830*0c16b537SWarner Losh 
831*0c16b537SWarner Losh     FSE_initDState(&state1, &bitD, dt);
832*0c16b537SWarner Losh     FSE_initDState(&state2, &bitD, dt);
833*0c16b537SWarner Losh 
834*0c16b537SWarner Losh #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
835*0c16b537SWarner Losh 
836*0c16b537SWarner Losh     /* 4 symbols per loop */
837*0c16b537SWarner Losh     for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
838*0c16b537SWarner Losh     {
839*0c16b537SWarner Losh         op[0] = FSE_GETSYMBOL(&state1);
840*0c16b537SWarner Losh 
841*0c16b537SWarner Losh         if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
842*0c16b537SWarner Losh             FSE_reloadDStream(&bitD);
843*0c16b537SWarner Losh 
844*0c16b537SWarner Losh         op[1] = FSE_GETSYMBOL(&state2);
845*0c16b537SWarner Losh 
846*0c16b537SWarner Losh         if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
847*0c16b537SWarner Losh             { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
848*0c16b537SWarner Losh 
849*0c16b537SWarner Losh         op[2] = FSE_GETSYMBOL(&state1);
850*0c16b537SWarner Losh 
851*0c16b537SWarner Losh         if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
852*0c16b537SWarner Losh             FSE_reloadDStream(&bitD);
853*0c16b537SWarner Losh 
854*0c16b537SWarner Losh         op[3] = FSE_GETSYMBOL(&state2);
855*0c16b537SWarner Losh     }
856*0c16b537SWarner Losh 
857*0c16b537SWarner Losh     /* tail */
858*0c16b537SWarner Losh     /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
859*0c16b537SWarner Losh     while (1)
860*0c16b537SWarner Losh     {
861*0c16b537SWarner Losh         if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
862*0c16b537SWarner Losh             break;
863*0c16b537SWarner Losh 
864*0c16b537SWarner Losh         *op++ = FSE_GETSYMBOL(&state1);
865*0c16b537SWarner Losh 
866*0c16b537SWarner Losh         if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
867*0c16b537SWarner Losh             break;
868*0c16b537SWarner Losh 
869*0c16b537SWarner Losh         *op++ = FSE_GETSYMBOL(&state2);
870*0c16b537SWarner Losh     }
871*0c16b537SWarner Losh 
872*0c16b537SWarner Losh     /* end ? */
873*0c16b537SWarner Losh     if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
874*0c16b537SWarner Losh         return op-ostart;
875*0c16b537SWarner Losh 
876*0c16b537SWarner Losh     if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
877*0c16b537SWarner Losh 
878*0c16b537SWarner Losh     return (size_t)-FSE_ERROR_corruptionDetected;
879*0c16b537SWarner Losh }
880*0c16b537SWarner Losh 
881*0c16b537SWarner Losh 
882*0c16b537SWarner Losh static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
883*0c16b537SWarner Losh                             const void* cSrc, size_t cSrcSize,
884*0c16b537SWarner Losh                             const FSE_DTable* dt)
885*0c16b537SWarner Losh {
886*0c16b537SWarner Losh     FSE_DTableHeader DTableH;
887*0c16b537SWarner Losh     memcpy(&DTableH, dt, sizeof(DTableH));   /* memcpy() into local variable, to avoid strict aliasing warning */
888*0c16b537SWarner Losh 
889*0c16b537SWarner Losh     /* select fast mode (static) */
890*0c16b537SWarner Losh     if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
891*0c16b537SWarner Losh     return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
892*0c16b537SWarner Losh }
893*0c16b537SWarner Losh 
894*0c16b537SWarner Losh 
895*0c16b537SWarner Losh static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
896*0c16b537SWarner Losh {
897*0c16b537SWarner Losh     const BYTE* const istart = (const BYTE*)cSrc;
898*0c16b537SWarner Losh     const BYTE* ip = istart;
899*0c16b537SWarner Losh     short counting[FSE_MAX_SYMBOL_VALUE+1];
900*0c16b537SWarner Losh     DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
901*0c16b537SWarner Losh     unsigned tableLog;
902*0c16b537SWarner Losh     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
903*0c16b537SWarner Losh     size_t errorCode;
904*0c16b537SWarner Losh 
905*0c16b537SWarner Losh     if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
906*0c16b537SWarner Losh 
907*0c16b537SWarner Losh     /* normal FSE decoding mode */
908*0c16b537SWarner Losh     errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
909*0c16b537SWarner Losh     if (FSE_isError(errorCode)) return errorCode;
910*0c16b537SWarner Losh     if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
911*0c16b537SWarner Losh     ip += errorCode;
912*0c16b537SWarner Losh     cSrcSize -= errorCode;
913*0c16b537SWarner Losh 
914*0c16b537SWarner Losh     errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
915*0c16b537SWarner Losh     if (FSE_isError(errorCode)) return errorCode;
916*0c16b537SWarner Losh 
917*0c16b537SWarner Losh     /* always return, even if it is an error code */
918*0c16b537SWarner Losh     return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
919*0c16b537SWarner Losh }
920*0c16b537SWarner Losh 
921*0c16b537SWarner Losh 
922*0c16b537SWarner Losh 
923*0c16b537SWarner Losh /* *******************************************************
924*0c16b537SWarner Losh *  Huff0 : Huffman block compression
925*0c16b537SWarner Losh *********************************************************/
926*0c16b537SWarner Losh #define HUF_MAX_SYMBOL_VALUE 255
927*0c16b537SWarner Losh #define HUF_DEFAULT_TABLELOG  12       /* used by default, when not specified */
928*0c16b537SWarner Losh #define HUF_MAX_TABLELOG  12           /* max possible tableLog; for allocation purpose; can be modified */
929*0c16b537SWarner Losh #define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
930*0c16b537SWarner Losh #if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
931*0c16b537SWarner Losh #  error "HUF_MAX_TABLELOG is too large !"
932*0c16b537SWarner Losh #endif
933*0c16b537SWarner Losh 
934*0c16b537SWarner Losh typedef struct HUF_CElt_s {
935*0c16b537SWarner Losh   U16  val;
936*0c16b537SWarner Losh   BYTE nbBits;
937*0c16b537SWarner Losh } HUF_CElt ;
938*0c16b537SWarner Losh 
939*0c16b537SWarner Losh typedef struct nodeElt_s {
940*0c16b537SWarner Losh     U32 count;
941*0c16b537SWarner Losh     U16 parent;
942*0c16b537SWarner Losh     BYTE byte;
943*0c16b537SWarner Losh     BYTE nbBits;
944*0c16b537SWarner Losh } nodeElt;
945*0c16b537SWarner Losh 
946*0c16b537SWarner Losh 
947*0c16b537SWarner Losh /* *******************************************************
948*0c16b537SWarner Losh *  Huff0 : Huffman block decompression
949*0c16b537SWarner Losh *********************************************************/
950*0c16b537SWarner Losh typedef struct {
951*0c16b537SWarner Losh     BYTE byte;
952*0c16b537SWarner Losh     BYTE nbBits;
953*0c16b537SWarner Losh } HUF_DElt;
954*0c16b537SWarner Losh 
955*0c16b537SWarner Losh static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
956*0c16b537SWarner Losh {
957*0c16b537SWarner Losh     BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
958*0c16b537SWarner Losh     U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];  /* large enough for values from 0 to 16 */
959*0c16b537SWarner Losh     U32 weightTotal;
960*0c16b537SWarner Losh     U32 maxBits;
961*0c16b537SWarner Losh     const BYTE* ip = (const BYTE*) src;
962*0c16b537SWarner Losh     size_t iSize;
963*0c16b537SWarner Losh     size_t oSize;
964*0c16b537SWarner Losh     U32 n;
965*0c16b537SWarner Losh     U32 nextRankStart;
966*0c16b537SWarner Losh     void* ptr = DTable+1;
967*0c16b537SWarner Losh     HUF_DElt* const dt = (HUF_DElt*)ptr;
968*0c16b537SWarner Losh 
969*0c16b537SWarner Losh     if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
970*0c16b537SWarner Losh     iSize = ip[0];
971*0c16b537SWarner Losh 
972*0c16b537SWarner Losh     FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */
973*0c16b537SWarner Losh     //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */
974*0c16b537SWarner Losh     if (iSize >= 128)  /* special header */
975*0c16b537SWarner Losh     {
976*0c16b537SWarner Losh         if (iSize >= (242))   /* RLE */
977*0c16b537SWarner Losh         {
978*0c16b537SWarner Losh             static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
979*0c16b537SWarner Losh             oSize = l[iSize-242];
980*0c16b537SWarner Losh             memset(huffWeight, 1, sizeof(huffWeight));
981*0c16b537SWarner Losh             iSize = 0;
982*0c16b537SWarner Losh         }
983*0c16b537SWarner Losh         else   /* Incompressible */
984*0c16b537SWarner Losh         {
985*0c16b537SWarner Losh             oSize = iSize - 127;
986*0c16b537SWarner Losh             iSize = ((oSize+1)/2);
987*0c16b537SWarner Losh             if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
988*0c16b537SWarner Losh             ip += 1;
989*0c16b537SWarner Losh             for (n=0; n<oSize; n+=2)
990*0c16b537SWarner Losh             {
991*0c16b537SWarner Losh                 huffWeight[n]   = ip[n/2] >> 4;
992*0c16b537SWarner Losh                 huffWeight[n+1] = ip[n/2] & 15;
993*0c16b537SWarner Losh             }
994*0c16b537SWarner Losh         }
995*0c16b537SWarner Losh     }
996*0c16b537SWarner Losh     else  /* header compressed with FSE (normal case) */
997*0c16b537SWarner Losh     {
998*0c16b537SWarner Losh         if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
999*0c16b537SWarner Losh         oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize);   /* max 255 values decoded, last one is implied */
1000*0c16b537SWarner Losh         if (FSE_isError(oSize)) return oSize;
1001*0c16b537SWarner Losh     }
1002*0c16b537SWarner Losh 
1003*0c16b537SWarner Losh     /* collect weight stats */
1004*0c16b537SWarner Losh     memset(rankVal, 0, sizeof(rankVal));
1005*0c16b537SWarner Losh     weightTotal = 0;
1006*0c16b537SWarner Losh     for (n=0; n<oSize; n++)
1007*0c16b537SWarner Losh     {
1008*0c16b537SWarner Losh         if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
1009*0c16b537SWarner Losh         rankVal[huffWeight[n]]++;
1010*0c16b537SWarner Losh         weightTotal += (1 << huffWeight[n]) >> 1;
1011*0c16b537SWarner Losh     }
1012*0c16b537SWarner Losh     if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected;
1013*0c16b537SWarner Losh 
1014*0c16b537SWarner Losh     /* get last non-null symbol weight (implied, total must be 2^n) */
1015*0c16b537SWarner Losh     maxBits = FSE_highbit32(weightTotal) + 1;
1016*0c16b537SWarner Losh     if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge;   /* DTable is too small */
1017*0c16b537SWarner Losh     DTable[0] = (U16)maxBits;
1018*0c16b537SWarner Losh     {
1019*0c16b537SWarner Losh         U32 total = 1 << maxBits;
1020*0c16b537SWarner Losh         U32 rest = total - weightTotal;
1021*0c16b537SWarner Losh         U32 verif = 1 << FSE_highbit32(rest);
1022*0c16b537SWarner Losh         U32 lastWeight = FSE_highbit32(rest) + 1;
1023*0c16b537SWarner Losh         if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected;    /* last value must be a clean power of 2 */
1024*0c16b537SWarner Losh         huffWeight[oSize] = (BYTE)lastWeight;
1025*0c16b537SWarner Losh         rankVal[lastWeight]++;
1026*0c16b537SWarner Losh     }
1027*0c16b537SWarner Losh 
1028*0c16b537SWarner Losh     /* check tree construction validity */
1029*0c16b537SWarner Losh     if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected;   /* by construction : at least 2 elts of rank 1, must be even */
1030*0c16b537SWarner Losh 
1031*0c16b537SWarner Losh     /* Prepare ranks */
1032*0c16b537SWarner Losh     nextRankStart = 0;
1033*0c16b537SWarner Losh     for (n=1; n<=maxBits; n++)
1034*0c16b537SWarner Losh     {
1035*0c16b537SWarner Losh         U32 current = nextRankStart;
1036*0c16b537SWarner Losh         nextRankStart += (rankVal[n] << (n-1));
1037*0c16b537SWarner Losh         rankVal[n] = current;
1038*0c16b537SWarner Losh     }
1039*0c16b537SWarner Losh 
1040*0c16b537SWarner Losh     /* fill DTable */
1041*0c16b537SWarner Losh     for (n=0; n<=oSize; n++)
1042*0c16b537SWarner Losh     {
1043*0c16b537SWarner Losh         const U32 w = huffWeight[n];
1044*0c16b537SWarner Losh         const U32 length = (1 << w) >> 1;
1045*0c16b537SWarner Losh         U32 i;
1046*0c16b537SWarner Losh         HUF_DElt D;
1047*0c16b537SWarner Losh         D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
1048*0c16b537SWarner Losh         for (i = rankVal[w]; i < rankVal[w] + length; i++)
1049*0c16b537SWarner Losh             dt[i] = D;
1050*0c16b537SWarner Losh         rankVal[w] += length;
1051*0c16b537SWarner Losh     }
1052*0c16b537SWarner Losh 
1053*0c16b537SWarner Losh     return iSize+1;
1054*0c16b537SWarner Losh }
1055*0c16b537SWarner Losh 
1056*0c16b537SWarner Losh 
1057*0c16b537SWarner Losh static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1058*0c16b537SWarner Losh {
1059*0c16b537SWarner Losh         const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
1060*0c16b537SWarner Losh         const BYTE c = dt[val].byte;
1061*0c16b537SWarner Losh         FSE_skipBits(Dstream, dt[val].nbBits);
1062*0c16b537SWarner Losh         return c;
1063*0c16b537SWarner Losh }
1064*0c16b537SWarner Losh 
1065*0c16b537SWarner Losh static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
1066*0c16b537SWarner Losh           void* dst, size_t maxDstSize,
1067*0c16b537SWarner Losh     const void* cSrc, size_t cSrcSize,
1068*0c16b537SWarner Losh     const U16* DTable)
1069*0c16b537SWarner Losh {
1070*0c16b537SWarner Losh     BYTE* const ostart = (BYTE*) dst;
1071*0c16b537SWarner Losh     BYTE* op = ostart;
1072*0c16b537SWarner Losh     BYTE* const omax = op + maxDstSize;
1073*0c16b537SWarner Losh     BYTE* const olimit = omax-15;
1074*0c16b537SWarner Losh 
1075*0c16b537SWarner Losh     const void* ptr = DTable;
1076*0c16b537SWarner Losh     const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
1077*0c16b537SWarner Losh     const U32 dtLog = DTable[0];
1078*0c16b537SWarner Losh     size_t errorCode;
1079*0c16b537SWarner Losh     U32 reloadStatus;
1080*0c16b537SWarner Losh 
1081*0c16b537SWarner Losh     /* Init */
1082*0c16b537SWarner Losh 
1083*0c16b537SWarner Losh     const U16* jumpTable = (const U16*)cSrc;
1084*0c16b537SWarner Losh     const size_t length1 = FSE_readLE16(jumpTable);
1085*0c16b537SWarner Losh     const size_t length2 = FSE_readLE16(jumpTable+1);
1086*0c16b537SWarner Losh     const size_t length3 = FSE_readLE16(jumpTable+2);
1087*0c16b537SWarner Losh     const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
1088*0c16b537SWarner Losh     const char* const start1 = (const char*)(cSrc) + 6;
1089*0c16b537SWarner Losh     const char* const start2 = start1 + length1;
1090*0c16b537SWarner Losh     const char* const start3 = start2 + length2;
1091*0c16b537SWarner Losh     const char* const start4 = start3 + length3;
1092*0c16b537SWarner Losh     FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1093*0c16b537SWarner Losh 
1094*0c16b537SWarner Losh     if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1095*0c16b537SWarner Losh 
1096*0c16b537SWarner Losh     errorCode = FSE_initDStream(&bitD1, start1, length1);
1097*0c16b537SWarner Losh     if (FSE_isError(errorCode)) return errorCode;
1098*0c16b537SWarner Losh     errorCode = FSE_initDStream(&bitD2, start2, length2);
1099*0c16b537SWarner Losh     if (FSE_isError(errorCode)) return errorCode;
1100*0c16b537SWarner Losh     errorCode = FSE_initDStream(&bitD3, start3, length3);
1101*0c16b537SWarner Losh     if (FSE_isError(errorCode)) return errorCode;
1102*0c16b537SWarner Losh     errorCode = FSE_initDStream(&bitD4, start4, length4);
1103*0c16b537SWarner Losh     if (FSE_isError(errorCode)) return errorCode;
1104*0c16b537SWarner Losh 
1105*0c16b537SWarner Losh     reloadStatus=FSE_reloadDStream(&bitD2);
1106*0c16b537SWarner Losh 
1107*0c16b537SWarner Losh     /* 16 symbols per loop */
1108*0c16b537SWarner Losh     for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
1109*0c16b537SWarner Losh         op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1110*0c16b537SWarner Losh     {
1111*0c16b537SWarner Losh #define HUF_DECODE_SYMBOL_0(n, Dstream) \
1112*0c16b537SWarner Losh         op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
1113*0c16b537SWarner Losh 
1114*0c16b537SWarner Losh #define HUF_DECODE_SYMBOL_1(n, Dstream) \
1115*0c16b537SWarner Losh         op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1116*0c16b537SWarner Losh         if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
1117*0c16b537SWarner Losh 
1118*0c16b537SWarner Losh #define HUF_DECODE_SYMBOL_2(n, Dstream) \
1119*0c16b537SWarner Losh         op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1120*0c16b537SWarner Losh         if (FSE_32bits()) FSE_reloadDStream(&Dstream)
1121*0c16b537SWarner Losh 
1122*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_1( 0, bitD1);
1123*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_1( 1, bitD2);
1124*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_1( 2, bitD3);
1125*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_1( 3, bitD4);
1126*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_2( 4, bitD1);
1127*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_2( 5, bitD2);
1128*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_2( 6, bitD3);
1129*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_2( 7, bitD4);
1130*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_1( 8, bitD1);
1131*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_1( 9, bitD2);
1132*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_1(10, bitD3);
1133*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_1(11, bitD4);
1134*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_0(12, bitD1);
1135*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_0(13, bitD2);
1136*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_0(14, bitD3);
1137*0c16b537SWarner Losh         HUF_DECODE_SYMBOL_0(15, bitD4);
1138*0c16b537SWarner Losh     }
1139*0c16b537SWarner Losh 
1140*0c16b537SWarner Losh     if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
1141*0c16b537SWarner Losh         return (size_t)-FSE_ERROR_corruptionDetected;
1142*0c16b537SWarner Losh 
1143*0c16b537SWarner Losh     /* tail */
1144*0c16b537SWarner Losh     {
1145*0c16b537SWarner Losh         // bitTail = bitD1;   // *much* slower : -20% !??!
1146*0c16b537SWarner Losh         FSE_DStream_t bitTail;
1147*0c16b537SWarner Losh         bitTail.ptr = bitD1.ptr;
1148*0c16b537SWarner Losh         bitTail.bitsConsumed = bitD1.bitsConsumed;
1149*0c16b537SWarner Losh         bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
1150*0c16b537SWarner Losh         bitTail.start = start1;
1151*0c16b537SWarner Losh         for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1152*0c16b537SWarner Losh         {
1153*0c16b537SWarner Losh             HUF_DECODE_SYMBOL_0(0, bitTail);
1154*0c16b537SWarner Losh         }
1155*0c16b537SWarner Losh 
1156*0c16b537SWarner Losh         if (FSE_endOfDStream(&bitTail))
1157*0c16b537SWarner Losh             return op-ostart;
1158*0c16b537SWarner Losh     }
1159*0c16b537SWarner Losh 
1160*0c16b537SWarner Losh     if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
1161*0c16b537SWarner Losh 
1162*0c16b537SWarner Losh     return (size_t)-FSE_ERROR_corruptionDetected;
1163*0c16b537SWarner Losh }
1164*0c16b537SWarner Losh 
1165*0c16b537SWarner Losh 
1166*0c16b537SWarner Losh static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1167*0c16b537SWarner Losh {
1168*0c16b537SWarner Losh     HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
1169*0c16b537SWarner Losh     const BYTE* ip = (const BYTE*) cSrc;
1170*0c16b537SWarner Losh     size_t errorCode;
1171*0c16b537SWarner Losh 
1172*0c16b537SWarner Losh     errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
1173*0c16b537SWarner Losh     if (FSE_isError(errorCode)) return errorCode;
1174*0c16b537SWarner Losh     if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1175*0c16b537SWarner Losh     ip += errorCode;
1176*0c16b537SWarner Losh     cSrcSize -= errorCode;
1177*0c16b537SWarner Losh 
1178*0c16b537SWarner Losh     return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1179*0c16b537SWarner Losh }
1180*0c16b537SWarner Losh 
1181*0c16b537SWarner Losh 
1182*0c16b537SWarner Losh #endif   /* FSE_COMMONDEFS_ONLY */
1183*0c16b537SWarner Losh 
1184*0c16b537SWarner Losh /*
1185*0c16b537SWarner Losh     zstd - standard compression library
1186*0c16b537SWarner Losh     Copyright (C) 2014-2015, Yann Collet.
1187*0c16b537SWarner Losh 
1188*0c16b537SWarner Losh     BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1189*0c16b537SWarner Losh 
1190*0c16b537SWarner Losh     Redistribution and use in source and binary forms, with or without
1191*0c16b537SWarner Losh     modification, are permitted provided that the following conditions are
1192*0c16b537SWarner Losh     met:
1193*0c16b537SWarner Losh     * Redistributions of source code must retain the above copyright
1194*0c16b537SWarner Losh     notice, this list of conditions and the following disclaimer.
1195*0c16b537SWarner Losh     * Redistributions in binary form must reproduce the above
1196*0c16b537SWarner Losh     copyright notice, this list of conditions and the following disclaimer
1197*0c16b537SWarner Losh     in the documentation and/or other materials provided with the
1198*0c16b537SWarner Losh     distribution.
1199*0c16b537SWarner Losh     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1200*0c16b537SWarner Losh     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1201*0c16b537SWarner Losh     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1202*0c16b537SWarner Losh     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1203*0c16b537SWarner Losh     OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1204*0c16b537SWarner Losh     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1205*0c16b537SWarner Losh     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1206*0c16b537SWarner Losh     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1207*0c16b537SWarner Losh     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1208*0c16b537SWarner Losh     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1209*0c16b537SWarner Losh     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1210*0c16b537SWarner Losh 
1211*0c16b537SWarner Losh     You can contact the author at :
1212*0c16b537SWarner Losh     - zstd source repository : https://github.com/Cyan4973/zstd
1213*0c16b537SWarner Losh     - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
1214*0c16b537SWarner Losh */
1215*0c16b537SWarner Losh 
1216*0c16b537SWarner Losh /****************************************************************
1217*0c16b537SWarner Losh *  Tuning parameters
1218*0c16b537SWarner Losh *****************************************************************/
1219*0c16b537SWarner Losh /* MEMORY_USAGE :
1220*0c16b537SWarner Losh *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
1221*0c16b537SWarner Losh *  Increasing memory usage improves compression ratio
1222*0c16b537SWarner Losh *  Reduced memory usage can improve speed, due to cache effect */
1223*0c16b537SWarner Losh #define ZSTD_MEMORY_USAGE 17
1224*0c16b537SWarner Losh 
1225*0c16b537SWarner Losh 
1226*0c16b537SWarner Losh /**************************************
1227*0c16b537SWarner Losh    CPU Feature Detection
1228*0c16b537SWarner Losh **************************************/
1229*0c16b537SWarner Losh /*
1230*0c16b537SWarner Losh  * Automated efficient unaligned memory access detection
1231*0c16b537SWarner Losh  * Based on known hardware architectures
1232*0c16b537SWarner Losh  * This list will be updated thanks to feedbacks
1233*0c16b537SWarner Losh  */
1234*0c16b537SWarner Losh #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
1235*0c16b537SWarner Losh     || defined(__ARM_FEATURE_UNALIGNED) \
1236*0c16b537SWarner Losh     || defined(__i386__) || defined(__x86_64__) \
1237*0c16b537SWarner Losh     || defined(_M_IX86) || defined(_M_X64) \
1238*0c16b537SWarner Losh     || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
1239*0c16b537SWarner Losh     || (defined(_M_ARM) && (_M_ARM >= 7))
1240*0c16b537SWarner Losh #  define ZSTD_UNALIGNED_ACCESS 1
1241*0c16b537SWarner Losh #else
1242*0c16b537SWarner Losh #  define ZSTD_UNALIGNED_ACCESS 0
1243*0c16b537SWarner Losh #endif
1244*0c16b537SWarner Losh 
1245*0c16b537SWarner Losh 
1246*0c16b537SWarner Losh /********************************************************
1247*0c16b537SWarner Losh *  Includes
1248*0c16b537SWarner Losh *********************************************************/
1249*0c16b537SWarner Losh #include <stdlib.h>      /* calloc */
1250*0c16b537SWarner Losh #include <string.h>      /* memcpy, memmove */
1251*0c16b537SWarner Losh #include <stdio.h>       /* debug : printf */
1252*0c16b537SWarner Losh 
1253*0c16b537SWarner Losh 
1254*0c16b537SWarner Losh /********************************************************
1255*0c16b537SWarner Losh *  Compiler specifics
1256*0c16b537SWarner Losh *********************************************************/
1257*0c16b537SWarner Losh #ifdef __AVX2__
1258*0c16b537SWarner Losh #  include <immintrin.h>   /* AVX2 intrinsics */
1259*0c16b537SWarner Losh #endif
1260*0c16b537SWarner Losh 
1261*0c16b537SWarner Losh #ifdef _MSC_VER    /* Visual Studio */
1262*0c16b537SWarner Losh #  include <intrin.h>                    /* For Visual 2005 */
1263*0c16b537SWarner Losh #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
1264*0c16b537SWarner Losh #  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
1265*0c16b537SWarner Losh #endif
1266*0c16b537SWarner Losh 
1267*0c16b537SWarner Losh 
1268*0c16b537SWarner Losh #ifndef MEM_ACCESS_MODULE
1269*0c16b537SWarner Losh #define MEM_ACCESS_MODULE
1270*0c16b537SWarner Losh /********************************************************
1271*0c16b537SWarner Losh *  Basic Types
1272*0c16b537SWarner Losh *********************************************************/
1273*0c16b537SWarner Losh #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
1274*0c16b537SWarner Losh # include <stdint.h>
1275*0c16b537SWarner Losh typedef  uint8_t BYTE;
1276*0c16b537SWarner Losh typedef uint16_t U16;
1277*0c16b537SWarner Losh typedef  int16_t S16;
1278*0c16b537SWarner Losh typedef uint32_t U32;
1279*0c16b537SWarner Losh typedef  int32_t S32;
1280*0c16b537SWarner Losh typedef uint64_t U64;
1281*0c16b537SWarner Losh #else
1282*0c16b537SWarner Losh typedef unsigned char       BYTE;
1283*0c16b537SWarner Losh typedef unsigned short      U16;
1284*0c16b537SWarner Losh typedef   signed short      S16;
1285*0c16b537SWarner Losh typedef unsigned int        U32;
1286*0c16b537SWarner Losh typedef   signed int        S32;
1287*0c16b537SWarner Losh typedef unsigned long long  U64;
1288*0c16b537SWarner Losh #endif
1289*0c16b537SWarner Losh 
1290*0c16b537SWarner Losh #endif   /* MEM_ACCESS_MODULE */
1291*0c16b537SWarner Losh 
1292*0c16b537SWarner Losh 
1293*0c16b537SWarner Losh /********************************************************
1294*0c16b537SWarner Losh *  Constants
1295*0c16b537SWarner Losh *********************************************************/
1296*0c16b537SWarner Losh static const U32 ZSTD_magicNumber = 0xFD2FB51E;   /* 3rd version : seqNb header */
1297*0c16b537SWarner Losh 
1298*0c16b537SWarner Losh #define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
1299*0c16b537SWarner Losh #define HASH_TABLESIZE (1 << HASH_LOG)
1300*0c16b537SWarner Losh #define HASH_MASK (HASH_TABLESIZE - 1)
1301*0c16b537SWarner Losh 
1302*0c16b537SWarner Losh #define KNUTH 2654435761
1303*0c16b537SWarner Losh 
1304*0c16b537SWarner Losh #define BIT7 128
1305*0c16b537SWarner Losh #define BIT6  64
1306*0c16b537SWarner Losh #define BIT5  32
1307*0c16b537SWarner Losh #define BIT4  16
1308*0c16b537SWarner Losh 
1309*0c16b537SWarner Losh #define KB *(1 <<10)
1310*0c16b537SWarner Losh #define MB *(1 <<20)
1311*0c16b537SWarner Losh #define GB *(1U<<30)
1312*0c16b537SWarner Losh 
1313*0c16b537SWarner Losh #define BLOCKSIZE (128 KB)                 /* define, for static allocation */
1314*0c16b537SWarner Losh 
1315*0c16b537SWarner Losh #define WORKPLACESIZE (BLOCKSIZE*3)
1316*0c16b537SWarner Losh #define MINMATCH 4
1317*0c16b537SWarner Losh #define MLbits   7
1318*0c16b537SWarner Losh #define LLbits   6
1319*0c16b537SWarner Losh #define Offbits  5
1320*0c16b537SWarner Losh #define MaxML  ((1<<MLbits )-1)
1321*0c16b537SWarner Losh #define MaxLL  ((1<<LLbits )-1)
1322*0c16b537SWarner Losh #define MaxOff ((1<<Offbits)-1)
1323*0c16b537SWarner Losh #define LitFSELog  11
1324*0c16b537SWarner Losh #define MLFSELog   10
1325*0c16b537SWarner Losh #define LLFSELog   10
1326*0c16b537SWarner Losh #define OffFSELog   9
1327*0c16b537SWarner Losh #define MAX(a,b) ((a)<(b)?(b):(a))
1328*0c16b537SWarner Losh #define MaxSeq MAX(MaxLL, MaxML)
1329*0c16b537SWarner Losh 
1330*0c16b537SWarner Losh #define LITERAL_NOENTROPY 63
1331*0c16b537SWarner Losh #define COMMAND_NOENTROPY 7   /* to remove */
1332*0c16b537SWarner Losh 
1333*0c16b537SWarner Losh static const size_t ZSTD_blockHeaderSize = 3;
1334*0c16b537SWarner Losh static const size_t ZSTD_frameHeaderSize = 4;
1335*0c16b537SWarner Losh 
1336*0c16b537SWarner Losh 
1337*0c16b537SWarner Losh /********************************************************
1338*0c16b537SWarner Losh *  Memory operations
1339*0c16b537SWarner Losh *********************************************************/
1340*0c16b537SWarner Losh static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
1341*0c16b537SWarner Losh 
1342*0c16b537SWarner Losh static unsigned ZSTD_isLittleEndian(void)
1343*0c16b537SWarner Losh {
1344*0c16b537SWarner Losh     const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
1345*0c16b537SWarner Losh     return one.c[0];
1346*0c16b537SWarner Losh }
1347*0c16b537SWarner Losh 
1348*0c16b537SWarner Losh static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
1349*0c16b537SWarner Losh 
1350*0c16b537SWarner Losh static U32    ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
1351*0c16b537SWarner Losh 
1352*0c16b537SWarner Losh static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
1353*0c16b537SWarner Losh 
1354*0c16b537SWarner Losh static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
1355*0c16b537SWarner Losh 
1356*0c16b537SWarner Losh #define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; }
1357*0c16b537SWarner Losh 
1358*0c16b537SWarner Losh static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
1359*0c16b537SWarner Losh {
1360*0c16b537SWarner Losh     const BYTE* ip = (const BYTE*)src;
1361*0c16b537SWarner Losh     BYTE* op = (BYTE*)dst;
1362*0c16b537SWarner Losh     BYTE* const oend = op + length;
1363*0c16b537SWarner Losh     while (op < oend) COPY8(op, ip);
1364*0c16b537SWarner Losh }
1365*0c16b537SWarner Losh 
1366*0c16b537SWarner Losh static U16 ZSTD_readLE16(const void* memPtr)
1367*0c16b537SWarner Losh {
1368*0c16b537SWarner Losh     if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
1369*0c16b537SWarner Losh     else
1370*0c16b537SWarner Losh     {
1371*0c16b537SWarner Losh         const BYTE* p = (const BYTE*)memPtr;
1372*0c16b537SWarner Losh         return (U16)((U16)p[0] + ((U16)p[1]<<8));
1373*0c16b537SWarner Losh     }
1374*0c16b537SWarner Losh }
1375*0c16b537SWarner Losh 
1376*0c16b537SWarner Losh 
1377*0c16b537SWarner Losh static U32 ZSTD_readLE32(const void* memPtr)
1378*0c16b537SWarner Losh {
1379*0c16b537SWarner Losh     if (ZSTD_isLittleEndian())
1380*0c16b537SWarner Losh         return ZSTD_read32(memPtr);
1381*0c16b537SWarner Losh     else
1382*0c16b537SWarner Losh     {
1383*0c16b537SWarner Losh         const BYTE* p = (const BYTE*)memPtr;
1384*0c16b537SWarner Losh         return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
1385*0c16b537SWarner Losh     }
1386*0c16b537SWarner Losh }
1387*0c16b537SWarner Losh 
1388*0c16b537SWarner Losh static U32 ZSTD_readBE32(const void* memPtr)
1389*0c16b537SWarner Losh {
1390*0c16b537SWarner Losh     const BYTE* p = (const BYTE*)memPtr;
1391*0c16b537SWarner Losh     return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
1392*0c16b537SWarner Losh }
1393*0c16b537SWarner Losh 
1394*0c16b537SWarner Losh 
1395*0c16b537SWarner Losh /**************************************
1396*0c16b537SWarner Losh *  Local structures
1397*0c16b537SWarner Losh ***************************************/
1398*0c16b537SWarner Losh typedef struct ZSTD_Cctx_s ZSTD_Cctx;
1399*0c16b537SWarner Losh 
1400*0c16b537SWarner Losh typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
1401*0c16b537SWarner Losh 
1402*0c16b537SWarner Losh typedef struct
1403*0c16b537SWarner Losh {
1404*0c16b537SWarner Losh     blockType_t blockType;
1405*0c16b537SWarner Losh     U32 origSize;
1406*0c16b537SWarner Losh } blockProperties_t;
1407*0c16b537SWarner Losh 
1408*0c16b537SWarner Losh typedef struct {
1409*0c16b537SWarner Losh     void* buffer;
1410*0c16b537SWarner Losh     U32*  offsetStart;
1411*0c16b537SWarner Losh     U32*  offset;
1412*0c16b537SWarner Losh     BYTE* offCodeStart;
1413*0c16b537SWarner Losh     BYTE* offCode;
1414*0c16b537SWarner Losh     BYTE* litStart;
1415*0c16b537SWarner Losh     BYTE* lit;
1416*0c16b537SWarner Losh     BYTE* litLengthStart;
1417*0c16b537SWarner Losh     BYTE* litLength;
1418*0c16b537SWarner Losh     BYTE* matchLengthStart;
1419*0c16b537SWarner Losh     BYTE* matchLength;
1420*0c16b537SWarner Losh     BYTE* dumpsStart;
1421*0c16b537SWarner Losh     BYTE* dumps;
1422*0c16b537SWarner Losh } seqStore_t;
1423*0c16b537SWarner Losh 
1424*0c16b537SWarner Losh 
1425*0c16b537SWarner Losh typedef struct ZSTD_Cctx_s
1426*0c16b537SWarner Losh {
1427*0c16b537SWarner Losh     const BYTE* base;
1428*0c16b537SWarner Losh     U32 current;
1429*0c16b537SWarner Losh     U32 nextUpdate;
1430*0c16b537SWarner Losh     seqStore_t seqStore;
1431*0c16b537SWarner Losh #ifdef __AVX2__
1432*0c16b537SWarner Losh     __m256i hashTable[HASH_TABLESIZE>>3];
1433*0c16b537SWarner Losh #else
1434*0c16b537SWarner Losh     U32 hashTable[HASH_TABLESIZE];
1435*0c16b537SWarner Losh #endif
1436*0c16b537SWarner Losh     BYTE buffer[WORKPLACESIZE];
1437*0c16b537SWarner Losh } cctxi_t;
1438*0c16b537SWarner Losh 
1439*0c16b537SWarner Losh 
1440*0c16b537SWarner Losh 
1441*0c16b537SWarner Losh 
1442*0c16b537SWarner Losh /**************************************
1443*0c16b537SWarner Losh *  Error Management
1444*0c16b537SWarner Losh **************************************/
1445*0c16b537SWarner Losh /* published entry point */
1446*0c16b537SWarner Losh unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
1447*0c16b537SWarner Losh 
1448*0c16b537SWarner Losh 
1449*0c16b537SWarner Losh /**************************************
1450*0c16b537SWarner Losh *  Tool functions
1451*0c16b537SWarner Losh **************************************/
1452*0c16b537SWarner Losh #define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
1453*0c16b537SWarner Losh #define ZSTD_VERSION_MINOR    1    /* for new (non-breaking) interface capabilities */
1454*0c16b537SWarner Losh #define ZSTD_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
1455*0c16b537SWarner Losh #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
1456*0c16b537SWarner Losh 
1457*0c16b537SWarner Losh /**************************************************************
1458*0c16b537SWarner Losh *   Decompression code
1459*0c16b537SWarner Losh **************************************************************/
1460*0c16b537SWarner Losh 
1461*0c16b537SWarner Losh size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1462*0c16b537SWarner Losh {
1463*0c16b537SWarner Losh     const BYTE* const in = (const BYTE* const)src;
1464*0c16b537SWarner Losh     BYTE headerFlags;
1465*0c16b537SWarner Losh     U32 cSize;
1466*0c16b537SWarner Losh 
1467*0c16b537SWarner Losh     if (srcSize < 3) return ERROR(srcSize_wrong);
1468*0c16b537SWarner Losh 
1469*0c16b537SWarner Losh     headerFlags = *in;
1470*0c16b537SWarner Losh     cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1471*0c16b537SWarner Losh 
1472*0c16b537SWarner Losh     bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1473*0c16b537SWarner Losh     bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1474*0c16b537SWarner Losh 
1475*0c16b537SWarner Losh     if (bpPtr->blockType == bt_end) return 0;
1476*0c16b537SWarner Losh     if (bpPtr->blockType == bt_rle) return 1;
1477*0c16b537SWarner Losh     return cSize;
1478*0c16b537SWarner Losh }
1479*0c16b537SWarner Losh 
1480*0c16b537SWarner Losh 
1481*0c16b537SWarner Losh static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1482*0c16b537SWarner Losh {
1483*0c16b537SWarner Losh     if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
1484*0c16b537SWarner Losh     memcpy(dst, src, srcSize);
1485*0c16b537SWarner Losh     return srcSize;
1486*0c16b537SWarner Losh }
1487*0c16b537SWarner Losh 
1488*0c16b537SWarner Losh 
1489*0c16b537SWarner Losh static size_t ZSTD_decompressLiterals(void* ctx,
1490*0c16b537SWarner Losh                                       void* dst, size_t maxDstSize,
1491*0c16b537SWarner Losh                                 const void* src, size_t srcSize)
1492*0c16b537SWarner Losh {
1493*0c16b537SWarner Losh     BYTE* op = (BYTE*)dst;
1494*0c16b537SWarner Losh     BYTE* const oend = op + maxDstSize;
1495*0c16b537SWarner Losh     const BYTE* ip = (const BYTE*)src;
1496*0c16b537SWarner Losh     size_t errorCode;
1497*0c16b537SWarner Losh     size_t litSize;
1498*0c16b537SWarner Losh 
1499*0c16b537SWarner Losh     /* check : minimum 2, for litSize, +1, for content */
1500*0c16b537SWarner Losh     if (srcSize <= 3) return ERROR(corruption_detected);
1501*0c16b537SWarner Losh 
1502*0c16b537SWarner Losh     litSize = ip[1] + (ip[0]<<8);
1503*0c16b537SWarner Losh     litSize += ((ip[-3] >> 3) & 7) << 16;   // mmmmh....
1504*0c16b537SWarner Losh     op = oend - litSize;
1505*0c16b537SWarner Losh 
1506*0c16b537SWarner Losh     (void)ctx;
1507*0c16b537SWarner Losh     if (litSize > maxDstSize) return ERROR(dstSize_tooSmall);
1508*0c16b537SWarner Losh     errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
1509*0c16b537SWarner Losh     if (FSE_isError(errorCode)) return ERROR(GENERIC);
1510*0c16b537SWarner Losh     return litSize;
1511*0c16b537SWarner Losh }
1512*0c16b537SWarner Losh 
1513*0c16b537SWarner Losh 
1514*0c16b537SWarner Losh size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1515*0c16b537SWarner Losh                                 void* dst, size_t maxDstSize,
1516*0c16b537SWarner Losh                           const BYTE** litStart, size_t* litSize,
1517*0c16b537SWarner Losh                           const void* src, size_t srcSize)
1518*0c16b537SWarner Losh {
1519*0c16b537SWarner Losh     const BYTE* const istart = (const BYTE* const)src;
1520*0c16b537SWarner Losh     const BYTE* ip = istart;
1521*0c16b537SWarner Losh     BYTE* const ostart = (BYTE* const)dst;
1522*0c16b537SWarner Losh     BYTE* const oend = ostart + maxDstSize;
1523*0c16b537SWarner Losh     blockProperties_t litbp;
1524*0c16b537SWarner Losh 
1525*0c16b537SWarner Losh     size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp);
1526*0c16b537SWarner Losh     if (ZSTDv01_isError(litcSize)) return litcSize;
1527*0c16b537SWarner Losh     if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1528*0c16b537SWarner Losh     ip += ZSTD_blockHeaderSize;
1529*0c16b537SWarner Losh 
1530*0c16b537SWarner Losh     switch(litbp.blockType)
1531*0c16b537SWarner Losh     {
1532*0c16b537SWarner Losh     case bt_raw:
1533*0c16b537SWarner Losh         *litStart = ip;
1534*0c16b537SWarner Losh         ip += litcSize;
1535*0c16b537SWarner Losh         *litSize = litcSize;
1536*0c16b537SWarner Losh         break;
1537*0c16b537SWarner Losh     case bt_rle:
1538*0c16b537SWarner Losh         {
1539*0c16b537SWarner Losh             size_t rleSize = litbp.origSize;
1540*0c16b537SWarner Losh             if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
1541*0c16b537SWarner Losh             if (!srcSize) return ERROR(srcSize_wrong);
1542*0c16b537SWarner Losh             memset(oend - rleSize, *ip, rleSize);
1543*0c16b537SWarner Losh             *litStart = oend - rleSize;
1544*0c16b537SWarner Losh             *litSize = rleSize;
1545*0c16b537SWarner Losh             ip++;
1546*0c16b537SWarner Losh             break;
1547*0c16b537SWarner Losh         }
1548*0c16b537SWarner Losh     case bt_compressed:
1549*0c16b537SWarner Losh         {
1550*0c16b537SWarner Losh             size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1551*0c16b537SWarner Losh             if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize;
1552*0c16b537SWarner Losh             *litStart = oend - decodedLitSize;
1553*0c16b537SWarner Losh             *litSize = decodedLitSize;
1554*0c16b537SWarner Losh             ip += litcSize;
1555*0c16b537SWarner Losh             break;
1556*0c16b537SWarner Losh         }
1557*0c16b537SWarner Losh     case bt_end:
1558*0c16b537SWarner Losh     default:
1559*0c16b537SWarner Losh         return ERROR(GENERIC);
1560*0c16b537SWarner Losh     }
1561*0c16b537SWarner Losh 
1562*0c16b537SWarner Losh     return ip-istart;
1563*0c16b537SWarner Losh }
1564*0c16b537SWarner Losh 
1565*0c16b537SWarner Losh 
1566*0c16b537SWarner Losh size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1567*0c16b537SWarner Losh                          FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1568*0c16b537SWarner Losh                          const void* src, size_t srcSize)
1569*0c16b537SWarner Losh {
1570*0c16b537SWarner Losh     const BYTE* const istart = (const BYTE* const)src;
1571*0c16b537SWarner Losh     const BYTE* ip = istart;
1572*0c16b537SWarner Losh     const BYTE* const iend = istart + srcSize;
1573*0c16b537SWarner Losh     U32 LLtype, Offtype, MLtype;
1574*0c16b537SWarner Losh     U32 LLlog, Offlog, MLlog;
1575*0c16b537SWarner Losh     size_t dumpsLength;
1576*0c16b537SWarner Losh 
1577*0c16b537SWarner Losh     /* check */
1578*0c16b537SWarner Losh     if (srcSize < 5) return ERROR(srcSize_wrong);
1579*0c16b537SWarner Losh 
1580*0c16b537SWarner Losh     /* SeqHead */
1581*0c16b537SWarner Losh     *nbSeq = ZSTD_readLE16(ip); ip+=2;
1582*0c16b537SWarner Losh     LLtype  = *ip >> 6;
1583*0c16b537SWarner Losh     Offtype = (*ip >> 4) & 3;
1584*0c16b537SWarner Losh     MLtype  = (*ip >> 2) & 3;
1585*0c16b537SWarner Losh     if (*ip & 2)
1586*0c16b537SWarner Losh     {
1587*0c16b537SWarner Losh         dumpsLength  = ip[2];
1588*0c16b537SWarner Losh         dumpsLength += ip[1] << 8;
1589*0c16b537SWarner Losh         ip += 3;
1590*0c16b537SWarner Losh     }
1591*0c16b537SWarner Losh     else
1592*0c16b537SWarner Losh     {
1593*0c16b537SWarner Losh         dumpsLength  = ip[1];
1594*0c16b537SWarner Losh         dumpsLength += (ip[0] & 1) << 8;
1595*0c16b537SWarner Losh         ip += 2;
1596*0c16b537SWarner Losh     }
1597*0c16b537SWarner Losh     *dumpsPtr = ip;
1598*0c16b537SWarner Losh     ip += dumpsLength;
1599*0c16b537SWarner Losh     *dumpsLengthPtr = dumpsLength;
1600*0c16b537SWarner Losh 
1601*0c16b537SWarner Losh     /* check */
1602*0c16b537SWarner Losh     if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
1603*0c16b537SWarner Losh 
1604*0c16b537SWarner Losh     /* sequences */
1605*0c16b537SWarner Losh     {
1606*0c16b537SWarner Losh         S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
1607*0c16b537SWarner Losh         size_t headerSize;
1608*0c16b537SWarner Losh 
1609*0c16b537SWarner Losh         /* Build DTables */
1610*0c16b537SWarner Losh         switch(LLtype)
1611*0c16b537SWarner Losh         {
1612*0c16b537SWarner Losh         case bt_rle :
1613*0c16b537SWarner Losh             LLlog = 0;
1614*0c16b537SWarner Losh             FSE_buildDTable_rle(DTableLL, *ip++); break;
1615*0c16b537SWarner Losh         case bt_raw :
1616*0c16b537SWarner Losh             LLlog = LLbits;
1617*0c16b537SWarner Losh             FSE_buildDTable_raw(DTableLL, LLbits); break;
1618*0c16b537SWarner Losh         default :
1619*0c16b537SWarner Losh             {   U32 max = MaxLL;
1620*0c16b537SWarner Losh                 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
1621*0c16b537SWarner Losh                 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1622*0c16b537SWarner Losh                 if (LLlog > LLFSELog) return ERROR(corruption_detected);
1623*0c16b537SWarner Losh                 ip += headerSize;
1624*0c16b537SWarner Losh                 FSE_buildDTable(DTableLL, norm, max, LLlog);
1625*0c16b537SWarner Losh         }   }
1626*0c16b537SWarner Losh 
1627*0c16b537SWarner Losh         switch(Offtype)
1628*0c16b537SWarner Losh         {
1629*0c16b537SWarner Losh         case bt_rle :
1630*0c16b537SWarner Losh             Offlog = 0;
1631*0c16b537SWarner Losh             if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1632*0c16b537SWarner Losh             FSE_buildDTable_rle(DTableOffb, *ip++); break;
1633*0c16b537SWarner Losh         case bt_raw :
1634*0c16b537SWarner Losh             Offlog = Offbits;
1635*0c16b537SWarner Losh             FSE_buildDTable_raw(DTableOffb, Offbits); break;
1636*0c16b537SWarner Losh         default :
1637*0c16b537SWarner Losh             {   U32 max = MaxOff;
1638*0c16b537SWarner Losh                 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
1639*0c16b537SWarner Losh                 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1640*0c16b537SWarner Losh                 if (Offlog > OffFSELog) return ERROR(corruption_detected);
1641*0c16b537SWarner Losh                 ip += headerSize;
1642*0c16b537SWarner Losh                 FSE_buildDTable(DTableOffb, norm, max, Offlog);
1643*0c16b537SWarner Losh         }   }
1644*0c16b537SWarner Losh 
1645*0c16b537SWarner Losh         switch(MLtype)
1646*0c16b537SWarner Losh         {
1647*0c16b537SWarner Losh         case bt_rle :
1648*0c16b537SWarner Losh             MLlog = 0;
1649*0c16b537SWarner Losh             if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1650*0c16b537SWarner Losh             FSE_buildDTable_rle(DTableML, *ip++); break;
1651*0c16b537SWarner Losh         case bt_raw :
1652*0c16b537SWarner Losh             MLlog = MLbits;
1653*0c16b537SWarner Losh             FSE_buildDTable_raw(DTableML, MLbits); break;
1654*0c16b537SWarner Losh         default :
1655*0c16b537SWarner Losh             {   U32 max = MaxML;
1656*0c16b537SWarner Losh                 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
1657*0c16b537SWarner Losh                 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1658*0c16b537SWarner Losh                 if (MLlog > MLFSELog) return ERROR(corruption_detected);
1659*0c16b537SWarner Losh                 ip += headerSize;
1660*0c16b537SWarner Losh                 FSE_buildDTable(DTableML, norm, max, MLlog);
1661*0c16b537SWarner Losh     }   }   }
1662*0c16b537SWarner Losh 
1663*0c16b537SWarner Losh     return ip-istart;
1664*0c16b537SWarner Losh }
1665*0c16b537SWarner Losh 
1666*0c16b537SWarner Losh 
1667*0c16b537SWarner Losh typedef struct {
1668*0c16b537SWarner Losh     size_t litLength;
1669*0c16b537SWarner Losh     size_t offset;
1670*0c16b537SWarner Losh     size_t matchLength;
1671*0c16b537SWarner Losh } seq_t;
1672*0c16b537SWarner Losh 
1673*0c16b537SWarner Losh typedef struct {
1674*0c16b537SWarner Losh     FSE_DStream_t DStream;
1675*0c16b537SWarner Losh     FSE_DState_t stateLL;
1676*0c16b537SWarner Losh     FSE_DState_t stateOffb;
1677*0c16b537SWarner Losh     FSE_DState_t stateML;
1678*0c16b537SWarner Losh     size_t prevOffset;
1679*0c16b537SWarner Losh     const BYTE* dumps;
1680*0c16b537SWarner Losh     const BYTE* dumpsEnd;
1681*0c16b537SWarner Losh } seqState_t;
1682*0c16b537SWarner Losh 
1683*0c16b537SWarner Losh 
1684*0c16b537SWarner Losh static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1685*0c16b537SWarner Losh {
1686*0c16b537SWarner Losh     size_t litLength;
1687*0c16b537SWarner Losh     size_t prevOffset;
1688*0c16b537SWarner Losh     size_t offset;
1689*0c16b537SWarner Losh     size_t matchLength;
1690*0c16b537SWarner Losh     const BYTE* dumps = seqState->dumps;
1691*0c16b537SWarner Losh     const BYTE* const de = seqState->dumpsEnd;
1692*0c16b537SWarner Losh 
1693*0c16b537SWarner Losh     /* Literal length */
1694*0c16b537SWarner Losh     litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1695*0c16b537SWarner Losh     prevOffset = litLength ? seq->offset : seqState->prevOffset;
1696*0c16b537SWarner Losh     seqState->prevOffset = seq->offset;
1697*0c16b537SWarner Losh     if (litLength == MaxLL)
1698*0c16b537SWarner Losh     {
1699*0c16b537SWarner Losh         U32 add = dumps<de ? *dumps++ : 0;
1700*0c16b537SWarner Losh         if (add < 255) litLength += add;
1701*0c16b537SWarner Losh         else
1702*0c16b537SWarner Losh         {
1703*0c16b537SWarner Losh             if (dumps<=(de-3))
1704*0c16b537SWarner Losh             {
1705*0c16b537SWarner Losh                 litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
1706*0c16b537SWarner Losh                 dumps += 3;
1707*0c16b537SWarner Losh             }
1708*0c16b537SWarner Losh         }
1709*0c16b537SWarner Losh     }
1710*0c16b537SWarner Losh 
1711*0c16b537SWarner Losh     /* Offset */
1712*0c16b537SWarner Losh     {
1713*0c16b537SWarner Losh         U32 offsetCode, nbBits;
1714*0c16b537SWarner Losh         offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1715*0c16b537SWarner Losh         if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1716*0c16b537SWarner Losh         nbBits = offsetCode - 1;
1717*0c16b537SWarner Losh         if (offsetCode==0) nbBits = 0;   /* cmove */
1718*0c16b537SWarner Losh         offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1719*0c16b537SWarner Losh         if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1720*0c16b537SWarner Losh         if (offsetCode==0) offset = prevOffset;
1721*0c16b537SWarner Losh     }
1722*0c16b537SWarner Losh 
1723*0c16b537SWarner Losh     /* MatchLength */
1724*0c16b537SWarner Losh     matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1725*0c16b537SWarner Losh     if (matchLength == MaxML)
1726*0c16b537SWarner Losh     {
1727*0c16b537SWarner Losh         U32 add = dumps<de ? *dumps++ : 0;
1728*0c16b537SWarner Losh         if (add < 255) matchLength += add;
1729*0c16b537SWarner Losh         else
1730*0c16b537SWarner Losh         {
1731*0c16b537SWarner Losh             if (dumps<=(de-3))
1732*0c16b537SWarner Losh             {
1733*0c16b537SWarner Losh                 matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
1734*0c16b537SWarner Losh                 dumps += 3;
1735*0c16b537SWarner Losh             }
1736*0c16b537SWarner Losh         }
1737*0c16b537SWarner Losh     }
1738*0c16b537SWarner Losh     matchLength += MINMATCH;
1739*0c16b537SWarner Losh 
1740*0c16b537SWarner Losh     /* save result */
1741*0c16b537SWarner Losh     seq->litLength = litLength;
1742*0c16b537SWarner Losh     seq->offset = offset;
1743*0c16b537SWarner Losh     seq->matchLength = matchLength;
1744*0c16b537SWarner Losh     seqState->dumps = dumps;
1745*0c16b537SWarner Losh }
1746*0c16b537SWarner Losh 
1747*0c16b537SWarner Losh 
1748*0c16b537SWarner Losh static size_t ZSTD_execSequence(BYTE* op,
1749*0c16b537SWarner Losh                                 seq_t sequence,
1750*0c16b537SWarner Losh                                 const BYTE** litPtr, const BYTE* const litLimit,
1751*0c16b537SWarner Losh                                 BYTE* const base, BYTE* const oend)
1752*0c16b537SWarner Losh {
1753*0c16b537SWarner Losh     static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
1754*0c16b537SWarner Losh     static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
1755*0c16b537SWarner Losh     const BYTE* const ostart = op;
1756*0c16b537SWarner Losh     const size_t litLength = sequence.litLength;
1757*0c16b537SWarner Losh     BYTE* const endMatch = op + litLength + sequence.matchLength;    /* risk : address space overflow (32-bits) */
1758*0c16b537SWarner Losh     const BYTE* const litEnd = *litPtr + litLength;
1759*0c16b537SWarner Losh 
1760*0c16b537SWarner Losh     /* check */
1761*0c16b537SWarner Losh     if (endMatch > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
1762*0c16b537SWarner Losh     if (litEnd > litLimit) return ERROR(corruption_detected);
1763*0c16b537SWarner Losh     if (sequence.matchLength > (size_t)(*litPtr-op))  return ERROR(dstSize_tooSmall);    /* overwrite literal segment */
1764*0c16b537SWarner Losh 
1765*0c16b537SWarner Losh     /* copy Literals */
1766*0c16b537SWarner Losh     if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
1767*0c16b537SWarner Losh         memmove(op, *litPtr, litLength);   /* overwrite risk */
1768*0c16b537SWarner Losh     else
1769*0c16b537SWarner Losh         ZSTD_wildcopy(op, *litPtr, litLength);
1770*0c16b537SWarner Losh     op += litLength;
1771*0c16b537SWarner Losh     *litPtr = litEnd;   /* update for next sequence */
1772*0c16b537SWarner Losh 
1773*0c16b537SWarner Losh     /* check : last match must be at a minimum distance of 8 from end of dest buffer */
1774*0c16b537SWarner Losh     if (oend-op < 8) return ERROR(dstSize_tooSmall);
1775*0c16b537SWarner Losh 
1776*0c16b537SWarner Losh     /* copy Match */
1777*0c16b537SWarner Losh     {
1778*0c16b537SWarner Losh         const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1779*0c16b537SWarner Losh         const BYTE* match = op - sequence.offset;            /* possible underflow at op - offset ? */
1780*0c16b537SWarner Losh         size_t qutt = 12;
1781*0c16b537SWarner Losh         U64 saved[2];
1782*0c16b537SWarner Losh 
1783*0c16b537SWarner Losh         /* check */
1784*0c16b537SWarner Losh         if (match < base) return ERROR(corruption_detected);
1785*0c16b537SWarner Losh         if (sequence.offset > (size_t)base) return ERROR(corruption_detected);
1786*0c16b537SWarner Losh 
1787*0c16b537SWarner Losh         /* save beginning of literal sequence, in case of write overlap */
1788*0c16b537SWarner Losh         if (overlapRisk)
1789*0c16b537SWarner Losh         {
1790*0c16b537SWarner Losh             if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1791*0c16b537SWarner Losh             memcpy(saved, endMatch, qutt);
1792*0c16b537SWarner Losh         }
1793*0c16b537SWarner Losh 
1794*0c16b537SWarner Losh         if (sequence.offset < 8)
1795*0c16b537SWarner Losh         {
1796*0c16b537SWarner Losh             const int dec64 = dec64table[sequence.offset];
1797*0c16b537SWarner Losh             op[0] = match[0];
1798*0c16b537SWarner Losh             op[1] = match[1];
1799*0c16b537SWarner Losh             op[2] = match[2];
1800*0c16b537SWarner Losh             op[3] = match[3];
1801*0c16b537SWarner Losh             match += dec32table[sequence.offset];
1802*0c16b537SWarner Losh             ZSTD_copy4(op+4, match);
1803*0c16b537SWarner Losh             match -= dec64;
1804*0c16b537SWarner Losh         } else { ZSTD_copy8(op, match); }
1805*0c16b537SWarner Losh         op += 8; match += 8;
1806*0c16b537SWarner Losh 
1807*0c16b537SWarner Losh         if (endMatch > oend-(16-MINMATCH))
1808*0c16b537SWarner Losh         {
1809*0c16b537SWarner Losh             if (op < oend-8)
1810*0c16b537SWarner Losh             {
1811*0c16b537SWarner Losh                 ZSTD_wildcopy(op, match, (oend-8) - op);
1812*0c16b537SWarner Losh                 match += (oend-8) - op;
1813*0c16b537SWarner Losh                 op = oend-8;
1814*0c16b537SWarner Losh             }
1815*0c16b537SWarner Losh             while (op<endMatch) *op++ = *match++;
1816*0c16b537SWarner Losh         }
1817*0c16b537SWarner Losh         else
1818*0c16b537SWarner Losh             ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
1819*0c16b537SWarner Losh 
1820*0c16b537SWarner Losh         /* restore, in case of overlap */
1821*0c16b537SWarner Losh         if (overlapRisk) memcpy(endMatch, saved, qutt);
1822*0c16b537SWarner Losh     }
1823*0c16b537SWarner Losh 
1824*0c16b537SWarner Losh     return endMatch-ostart;
1825*0c16b537SWarner Losh }
1826*0c16b537SWarner Losh 
1827*0c16b537SWarner Losh typedef struct ZSTDv01_Dctx_s
1828*0c16b537SWarner Losh {
1829*0c16b537SWarner Losh     U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1830*0c16b537SWarner Losh     U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1831*0c16b537SWarner Losh     U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1832*0c16b537SWarner Losh     void* previousDstEnd;
1833*0c16b537SWarner Losh     void* base;
1834*0c16b537SWarner Losh     size_t expected;
1835*0c16b537SWarner Losh     blockType_t bType;
1836*0c16b537SWarner Losh     U32 phase;
1837*0c16b537SWarner Losh } dctx_t;
1838*0c16b537SWarner Losh 
1839*0c16b537SWarner Losh 
1840*0c16b537SWarner Losh static size_t ZSTD_decompressSequences(
1841*0c16b537SWarner Losh                                void* ctx,
1842*0c16b537SWarner Losh                                void* dst, size_t maxDstSize,
1843*0c16b537SWarner Losh                          const void* seqStart, size_t seqSize,
1844*0c16b537SWarner Losh                          const BYTE* litStart, size_t litSize)
1845*0c16b537SWarner Losh {
1846*0c16b537SWarner Losh     dctx_t* dctx = (dctx_t*)ctx;
1847*0c16b537SWarner Losh     const BYTE* ip = (const BYTE*)seqStart;
1848*0c16b537SWarner Losh     const BYTE* const iend = ip + seqSize;
1849*0c16b537SWarner Losh     BYTE* const ostart = (BYTE* const)dst;
1850*0c16b537SWarner Losh     BYTE* op = ostart;
1851*0c16b537SWarner Losh     BYTE* const oend = ostart + maxDstSize;
1852*0c16b537SWarner Losh     size_t errorCode, dumpsLength;
1853*0c16b537SWarner Losh     const BYTE* litPtr = litStart;
1854*0c16b537SWarner Losh     const BYTE* const litEnd = litStart + litSize;
1855*0c16b537SWarner Losh     int nbSeq;
1856*0c16b537SWarner Losh     const BYTE* dumps;
1857*0c16b537SWarner Losh     U32* DTableLL = dctx->LLTable;
1858*0c16b537SWarner Losh     U32* DTableML = dctx->MLTable;
1859*0c16b537SWarner Losh     U32* DTableOffb = dctx->OffTable;
1860*0c16b537SWarner Losh     BYTE* const base = (BYTE*) (dctx->base);
1861*0c16b537SWarner Losh 
1862*0c16b537SWarner Losh     /* Build Decoding Tables */
1863*0c16b537SWarner Losh     errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
1864*0c16b537SWarner Losh                                       DTableLL, DTableML, DTableOffb,
1865*0c16b537SWarner Losh                                       ip, iend-ip);
1866*0c16b537SWarner Losh     if (ZSTDv01_isError(errorCode)) return errorCode;
1867*0c16b537SWarner Losh     ip += errorCode;
1868*0c16b537SWarner Losh 
1869*0c16b537SWarner Losh     /* Regen sequences */
1870*0c16b537SWarner Losh     {
1871*0c16b537SWarner Losh         seq_t sequence;
1872*0c16b537SWarner Losh         seqState_t seqState;
1873*0c16b537SWarner Losh 
1874*0c16b537SWarner Losh         memset(&sequence, 0, sizeof(sequence));
1875*0c16b537SWarner Losh         seqState.dumps = dumps;
1876*0c16b537SWarner Losh         seqState.dumpsEnd = dumps + dumpsLength;
1877*0c16b537SWarner Losh         seqState.prevOffset = 1;
1878*0c16b537SWarner Losh         errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
1879*0c16b537SWarner Losh         if (FSE_isError(errorCode)) return ERROR(corruption_detected);
1880*0c16b537SWarner Losh         FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1881*0c16b537SWarner Losh         FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1882*0c16b537SWarner Losh         FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1883*0c16b537SWarner Losh 
1884*0c16b537SWarner Losh         for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
1885*0c16b537SWarner Losh         {
1886*0c16b537SWarner Losh             size_t oneSeqSize;
1887*0c16b537SWarner Losh             nbSeq--;
1888*0c16b537SWarner Losh             ZSTD_decodeSequence(&sequence, &seqState);
1889*0c16b537SWarner Losh             oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
1890*0c16b537SWarner Losh             if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize;
1891*0c16b537SWarner Losh             op += oneSeqSize;
1892*0c16b537SWarner Losh         }
1893*0c16b537SWarner Losh 
1894*0c16b537SWarner Losh         /* check if reached exact end */
1895*0c16b537SWarner Losh         if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
1896*0c16b537SWarner Losh         if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
1897*0c16b537SWarner Losh 
1898*0c16b537SWarner Losh         /* last literal segment */
1899*0c16b537SWarner Losh         {
1900*0c16b537SWarner Losh             size_t lastLLSize = litEnd - litPtr;
1901*0c16b537SWarner Losh             if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
1902*0c16b537SWarner Losh             if (op != litPtr) memmove(op, litPtr, lastLLSize);
1903*0c16b537SWarner Losh             op += lastLLSize;
1904*0c16b537SWarner Losh         }
1905*0c16b537SWarner Losh     }
1906*0c16b537SWarner Losh 
1907*0c16b537SWarner Losh     return op-ostart;
1908*0c16b537SWarner Losh }
1909*0c16b537SWarner Losh 
1910*0c16b537SWarner Losh 
1911*0c16b537SWarner Losh static size_t ZSTD_decompressBlock(
1912*0c16b537SWarner Losh                             void* ctx,
1913*0c16b537SWarner Losh                             void* dst, size_t maxDstSize,
1914*0c16b537SWarner Losh                       const void* src, size_t srcSize)
1915*0c16b537SWarner Losh {
1916*0c16b537SWarner Losh     /* blockType == blockCompressed, srcSize is trusted */
1917*0c16b537SWarner Losh     const BYTE* ip = (const BYTE*)src;
1918*0c16b537SWarner Losh     const BYTE* litPtr = NULL;
1919*0c16b537SWarner Losh     size_t litSize = 0;
1920*0c16b537SWarner Losh     size_t errorCode;
1921*0c16b537SWarner Losh 
1922*0c16b537SWarner Losh     /* Decode literals sub-block */
1923*0c16b537SWarner Losh     errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
1924*0c16b537SWarner Losh     if (ZSTDv01_isError(errorCode)) return errorCode;
1925*0c16b537SWarner Losh     ip += errorCode;
1926*0c16b537SWarner Losh     srcSize -= errorCode;
1927*0c16b537SWarner Losh 
1928*0c16b537SWarner Losh     return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
1929*0c16b537SWarner Losh }
1930*0c16b537SWarner Losh 
1931*0c16b537SWarner Losh 
1932*0c16b537SWarner Losh size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1933*0c16b537SWarner Losh {
1934*0c16b537SWarner Losh     const BYTE* ip = (const BYTE*)src;
1935*0c16b537SWarner Losh     const BYTE* iend = ip + srcSize;
1936*0c16b537SWarner Losh     BYTE* const ostart = (BYTE* const)dst;
1937*0c16b537SWarner Losh     BYTE* op = ostart;
1938*0c16b537SWarner Losh     BYTE* const oend = ostart + maxDstSize;
1939*0c16b537SWarner Losh     size_t remainingSize = srcSize;
1940*0c16b537SWarner Losh     U32 magicNumber;
1941*0c16b537SWarner Losh     size_t errorCode=0;
1942*0c16b537SWarner Losh     blockProperties_t blockProperties;
1943*0c16b537SWarner Losh 
1944*0c16b537SWarner Losh     /* Frame Header */
1945*0c16b537SWarner Losh     if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1946*0c16b537SWarner Losh     magicNumber = ZSTD_readBE32(src);
1947*0c16b537SWarner Losh     if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
1948*0c16b537SWarner Losh     ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1949*0c16b537SWarner Losh 
1950*0c16b537SWarner Losh     /* Loop on each block */
1951*0c16b537SWarner Losh     while (1)
1952*0c16b537SWarner Losh     {
1953*0c16b537SWarner Losh         size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties);
1954*0c16b537SWarner Losh         if (ZSTDv01_isError(blockSize)) return blockSize;
1955*0c16b537SWarner Losh 
1956*0c16b537SWarner Losh         ip += ZSTD_blockHeaderSize;
1957*0c16b537SWarner Losh         remainingSize -= ZSTD_blockHeaderSize;
1958*0c16b537SWarner Losh         if (blockSize > remainingSize) return ERROR(srcSize_wrong);
1959*0c16b537SWarner Losh 
1960*0c16b537SWarner Losh         switch(blockProperties.blockType)
1961*0c16b537SWarner Losh         {
1962*0c16b537SWarner Losh         case bt_compressed:
1963*0c16b537SWarner Losh             errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
1964*0c16b537SWarner Losh             break;
1965*0c16b537SWarner Losh         case bt_raw :
1966*0c16b537SWarner Losh             errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1967*0c16b537SWarner Losh             break;
1968*0c16b537SWarner Losh         case bt_rle :
1969*0c16b537SWarner Losh             return ERROR(GENERIC);   /* not yet supported */
1970*0c16b537SWarner Losh             break;
1971*0c16b537SWarner Losh         case bt_end :
1972*0c16b537SWarner Losh             /* end of frame */
1973*0c16b537SWarner Losh             if (remainingSize) return ERROR(srcSize_wrong);
1974*0c16b537SWarner Losh             break;
1975*0c16b537SWarner Losh         default:
1976*0c16b537SWarner Losh             return ERROR(GENERIC);
1977*0c16b537SWarner Losh         }
1978*0c16b537SWarner Losh         if (blockSize == 0) break;   /* bt_end */
1979*0c16b537SWarner Losh 
1980*0c16b537SWarner Losh         if (ZSTDv01_isError(errorCode)) return errorCode;
1981*0c16b537SWarner Losh         op += errorCode;
1982*0c16b537SWarner Losh         ip += blockSize;
1983*0c16b537SWarner Losh         remainingSize -= blockSize;
1984*0c16b537SWarner Losh     }
1985*0c16b537SWarner Losh 
1986*0c16b537SWarner Losh     return op-ostart;
1987*0c16b537SWarner Losh }
1988*0c16b537SWarner Losh 
1989*0c16b537SWarner Losh size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1990*0c16b537SWarner Losh {
1991*0c16b537SWarner Losh     dctx_t ctx;
1992*0c16b537SWarner Losh     ctx.base = dst;
1993*0c16b537SWarner Losh     return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
1994*0c16b537SWarner Losh }
1995*0c16b537SWarner Losh 
1996*0c16b537SWarner Losh size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
1997*0c16b537SWarner Losh {
1998*0c16b537SWarner Losh     const BYTE* ip = (const BYTE*)src;
1999*0c16b537SWarner Losh     size_t remainingSize = srcSize;
2000*0c16b537SWarner Losh     U32 magicNumber;
2001*0c16b537SWarner Losh     blockProperties_t blockProperties;
2002*0c16b537SWarner Losh 
2003*0c16b537SWarner Losh     /* Frame Header */
2004*0c16b537SWarner Losh     if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
2005*0c16b537SWarner Losh     magicNumber = ZSTD_readBE32(src);
2006*0c16b537SWarner Losh     if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2007*0c16b537SWarner Losh     ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
2008*0c16b537SWarner Losh 
2009*0c16b537SWarner Losh     /* Loop on each block */
2010*0c16b537SWarner Losh     while (1)
2011*0c16b537SWarner Losh     {
2012*0c16b537SWarner Losh         size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2013*0c16b537SWarner Losh         if (ZSTDv01_isError(blockSize)) return blockSize;
2014*0c16b537SWarner Losh 
2015*0c16b537SWarner Losh         ip += ZSTD_blockHeaderSize;
2016*0c16b537SWarner Losh         remainingSize -= ZSTD_blockHeaderSize;
2017*0c16b537SWarner Losh         if (blockSize > remainingSize) return ERROR(srcSize_wrong);
2018*0c16b537SWarner Losh 
2019*0c16b537SWarner Losh         if (blockSize == 0) break;   /* bt_end */
2020*0c16b537SWarner Losh 
2021*0c16b537SWarner Losh         ip += blockSize;
2022*0c16b537SWarner Losh         remainingSize -= blockSize;
2023*0c16b537SWarner Losh     }
2024*0c16b537SWarner Losh 
2025*0c16b537SWarner Losh     return ip - (const BYTE*)src;
2026*0c16b537SWarner Losh }
2027*0c16b537SWarner Losh 
2028*0c16b537SWarner Losh /*******************************
2029*0c16b537SWarner Losh *  Streaming Decompression API
2030*0c16b537SWarner Losh *******************************/
2031*0c16b537SWarner Losh 
2032*0c16b537SWarner Losh size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
2033*0c16b537SWarner Losh {
2034*0c16b537SWarner Losh     dctx->expected = ZSTD_frameHeaderSize;
2035*0c16b537SWarner Losh     dctx->phase = 0;
2036*0c16b537SWarner Losh     dctx->previousDstEnd = NULL;
2037*0c16b537SWarner Losh     dctx->base = NULL;
2038*0c16b537SWarner Losh     return 0;
2039*0c16b537SWarner Losh }
2040*0c16b537SWarner Losh 
2041*0c16b537SWarner Losh ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
2042*0c16b537SWarner Losh {
2043*0c16b537SWarner Losh     ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
2044*0c16b537SWarner Losh     if (dctx==NULL) return NULL;
2045*0c16b537SWarner Losh     ZSTDv01_resetDCtx(dctx);
2046*0c16b537SWarner Losh     return dctx;
2047*0c16b537SWarner Losh }
2048*0c16b537SWarner Losh 
2049*0c16b537SWarner Losh size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
2050*0c16b537SWarner Losh {
2051*0c16b537SWarner Losh     free(dctx);
2052*0c16b537SWarner Losh     return 0;
2053*0c16b537SWarner Losh }
2054*0c16b537SWarner Losh 
2055*0c16b537SWarner Losh size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
2056*0c16b537SWarner Losh {
2057*0c16b537SWarner Losh     return ((dctx_t*)dctx)->expected;
2058*0c16b537SWarner Losh }
2059*0c16b537SWarner Losh 
2060*0c16b537SWarner Losh size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2061*0c16b537SWarner Losh {
2062*0c16b537SWarner Losh     dctx_t* ctx = (dctx_t*)dctx;
2063*0c16b537SWarner Losh 
2064*0c16b537SWarner Losh     /* Sanity check */
2065*0c16b537SWarner Losh     if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
2066*0c16b537SWarner Losh     if (dst != ctx->previousDstEnd)  /* not contiguous */
2067*0c16b537SWarner Losh         ctx->base = dst;
2068*0c16b537SWarner Losh 
2069*0c16b537SWarner Losh     /* Decompress : frame header */
2070*0c16b537SWarner Losh     if (ctx->phase == 0)
2071*0c16b537SWarner Losh     {
2072*0c16b537SWarner Losh         /* Check frame magic header */
2073*0c16b537SWarner Losh         U32 magicNumber = ZSTD_readBE32(src);
2074*0c16b537SWarner Losh         if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2075*0c16b537SWarner Losh         ctx->phase = 1;
2076*0c16b537SWarner Losh         ctx->expected = ZSTD_blockHeaderSize;
2077*0c16b537SWarner Losh         return 0;
2078*0c16b537SWarner Losh     }
2079*0c16b537SWarner Losh 
2080*0c16b537SWarner Losh     /* Decompress : block header */
2081*0c16b537SWarner Losh     if (ctx->phase == 1)
2082*0c16b537SWarner Losh     {
2083*0c16b537SWarner Losh         blockProperties_t bp;
2084*0c16b537SWarner Losh         size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
2085*0c16b537SWarner Losh         if (ZSTDv01_isError(blockSize)) return blockSize;
2086*0c16b537SWarner Losh         if (bp.blockType == bt_end)
2087*0c16b537SWarner Losh         {
2088*0c16b537SWarner Losh             ctx->expected = 0;
2089*0c16b537SWarner Losh             ctx->phase = 0;
2090*0c16b537SWarner Losh         }
2091*0c16b537SWarner Losh         else
2092*0c16b537SWarner Losh         {
2093*0c16b537SWarner Losh             ctx->expected = blockSize;
2094*0c16b537SWarner Losh             ctx->bType = bp.blockType;
2095*0c16b537SWarner Losh             ctx->phase = 2;
2096*0c16b537SWarner Losh         }
2097*0c16b537SWarner Losh 
2098*0c16b537SWarner Losh         return 0;
2099*0c16b537SWarner Losh     }
2100*0c16b537SWarner Losh 
2101*0c16b537SWarner Losh     /* Decompress : block content */
2102*0c16b537SWarner Losh     {
2103*0c16b537SWarner Losh         size_t rSize;
2104*0c16b537SWarner Losh         switch(ctx->bType)
2105*0c16b537SWarner Losh         {
2106*0c16b537SWarner Losh         case bt_compressed:
2107*0c16b537SWarner Losh             rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
2108*0c16b537SWarner Losh             break;
2109*0c16b537SWarner Losh         case bt_raw :
2110*0c16b537SWarner Losh             rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
2111*0c16b537SWarner Losh             break;
2112*0c16b537SWarner Losh         case bt_rle :
2113*0c16b537SWarner Losh             return ERROR(GENERIC);   /* not yet handled */
2114*0c16b537SWarner Losh             break;
2115*0c16b537SWarner Losh         case bt_end :   /* should never happen (filtered at phase 1) */
2116*0c16b537SWarner Losh             rSize = 0;
2117*0c16b537SWarner Losh             break;
2118*0c16b537SWarner Losh         default:
2119*0c16b537SWarner Losh             return ERROR(GENERIC);
2120*0c16b537SWarner Losh         }
2121*0c16b537SWarner Losh         ctx->phase = 1;
2122*0c16b537SWarner Losh         ctx->expected = ZSTD_blockHeaderSize;
2123*0c16b537SWarner Losh         ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
2124*0c16b537SWarner Losh         return rSize;
2125*0c16b537SWarner Losh     }
2126*0c16b537SWarner Losh 
2127*0c16b537SWarner Losh }
2128