xref: /freebsd/sys/contrib/zstd/lib/legacy/zstd_v01.c (revision f1951fd745b894fe6586c298874af98544a5e272)
1 /*
2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 
12 /******************************************
13 *  Includes
14 ******************************************/
15 #include <stddef.h>    /* size_t, ptrdiff_t */
16 #include "zstd_v01.h"
17 #include "error_private.h"
18 
19 
20 /******************************************
21 *  Static allocation
22 ******************************************/
23 /* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
24 #define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
25 
26 /* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
27 #define HUF_DTABLE_SIZE_U16(maxTableLog)   (1 + (1<<maxTableLog))
28 #define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
29         unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
30 
31 
32 /******************************************
33 *  Error Management
34 ******************************************/
35 #define FSE_LIST_ERRORS(ITEM) \
36         ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
37         ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
38         ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
39         ITEM(FSE_ERROR_corruptionDetected) \
40         ITEM(FSE_ERROR_maxCode)
41 
42 #define FSE_GENERATE_ENUM(ENUM) ENUM,
43 typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
44 
45 
46 /******************************************
47 *  FSE symbol compression API
48 ******************************************/
49 /*
50    This API consists of small unitary functions, which highly benefit from being inlined.
51    You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
52    Visual seems to do it automatically.
53    For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
54    If none of these solutions is applicable, include "fse.c" directly.
55 */
56 
57 typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
58 typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
59 
60 typedef struct
61 {
62     size_t bitContainer;
63     int    bitPos;
64     char*  startPtr;
65     char*  ptr;
66     char*  endPtr;
67 } FSE_CStream_t;
68 
69 typedef struct
70 {
71     ptrdiff_t   value;
72     const void* stateTable;
73     const void* symbolTT;
74     unsigned    stateLog;
75 } FSE_CState_t;
76 
77 typedef struct
78 {
79     size_t   bitContainer;
80     unsigned bitsConsumed;
81     const char* ptr;
82     const char* start;
83 } FSE_DStream_t;
84 
85 typedef struct
86 {
87     size_t      state;
88     const void* table;   /* precise table may vary, depending on U16 */
89 } FSE_DState_t;
90 
91 typedef enum { FSE_DStream_unfinished = 0,
92                FSE_DStream_endOfBuffer = 1,
93                FSE_DStream_completed = 2,
94                FSE_DStream_tooFar = 3 } FSE_DStream_status;  /* result of FSE_reloadDStream() */
95                /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
96 
97 
98 /****************************************************************
99 *  Tuning parameters
100 ****************************************************************/
101 /* MEMORY_USAGE :
102 *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
103 *  Increasing memory usage improves compression ratio
104 *  Reduced memory usage can improve speed, due to cache effect
105 *  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
106 #define FSE_MAX_MEMORY_USAGE 14
107 #define FSE_DEFAULT_MEMORY_USAGE 13
108 
109 /* FSE_MAX_SYMBOL_VALUE :
110 *  Maximum symbol value authorized.
111 *  Required for proper stack allocation */
112 #define FSE_MAX_SYMBOL_VALUE 255
113 
114 
115 /****************************************************************
116 *  template functions type & suffix
117 ****************************************************************/
118 #define FSE_FUNCTION_TYPE BYTE
119 #define FSE_FUNCTION_EXTENSION
120 
121 
122 /****************************************************************
123 *  Byte symbol type
124 ****************************************************************/
125 typedef struct
126 {
127     unsigned short newState;
128     unsigned char  symbol;
129     unsigned char  nbBits;
130 } FSE_decode_t;   /* size == U32 */
131 
132 
133 
134 /****************************************************************
135 *  Compiler specifics
136 ****************************************************************/
137 #ifdef _MSC_VER    /* Visual Studio */
138 #  define FORCE_INLINE static __forceinline
139 #  include <intrin.h>                    /* For Visual 2005 */
140 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
141 #  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
142 #else
143 #  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
144 #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
145 #    ifdef __GNUC__
146 #      define FORCE_INLINE static inline __attribute__((always_inline))
147 #    else
148 #      define FORCE_INLINE static inline
149 #    endif
150 #  else
151 #    define FORCE_INLINE static
152 #  endif /* __STDC_VERSION__ */
153 #endif
154 
155 
156 /****************************************************************
157 *  Includes
158 ****************************************************************/
159 #include <stdlib.h>     /* malloc, free, qsort */
160 #include <string.h>     /* memcpy, memset */
161 #include <stdio.h>      /* printf (debug) */
162 
163 
164 #ifndef MEM_ACCESS_MODULE
165 #define MEM_ACCESS_MODULE
166 /****************************************************************
167 *  Basic Types
168 *****************************************************************/
169 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
170 # include <stdint.h>
171 typedef  uint8_t BYTE;
172 typedef uint16_t U16;
173 typedef  int16_t S16;
174 typedef uint32_t U32;
175 typedef  int32_t S32;
176 typedef uint64_t U64;
177 typedef  int64_t S64;
178 #else
179 typedef unsigned char       BYTE;
180 typedef unsigned short      U16;
181 typedef   signed short      S16;
182 typedef unsigned int        U32;
183 typedef   signed int        S32;
184 typedef unsigned long long  U64;
185 typedef   signed long long  S64;
186 #endif
187 
188 #endif   /* MEM_ACCESS_MODULE */
189 
190 /****************************************************************
191 *  Memory I/O
192 *****************************************************************/
193 /* FSE_FORCE_MEMORY_ACCESS
194  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
195  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
196  * The below switch allow to select different access method for improved performance.
197  * Method 0 (default) : use `memcpy()`. Safe and portable.
198  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
199  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
200  * Method 2 : direct access. This method is portable but violate C standard.
201  *            It can generate buggy code on targets generating assembly depending on alignment.
202  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
203  * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
204  * Prefer these methods in priority order (0 > 1 > 2)
205  */
206 #ifndef FSE_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
207 #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
208 #    define FSE_FORCE_MEMORY_ACCESS 2
209 #  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
210   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
211 #    define FSE_FORCE_MEMORY_ACCESS 1
212 #  endif
213 #endif
214 
215 
216 static unsigned FSE_32bits(void)
217 {
218     return sizeof(void*)==4;
219 }
220 
221 static unsigned FSE_isLittleEndian(void)
222 {
223     const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
224     return one.c[0];
225 }
226 
227 #if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
228 
229 static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
230 static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
231 static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
232 
233 #elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
234 
235 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
236 /* currently only defined for gcc and icc */
237 typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
238 
239 static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
240 static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
241 static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
242 
243 #else
244 
245 static U16 FSE_read16(const void* memPtr)
246 {
247     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
248 }
249 
250 static U32 FSE_read32(const void* memPtr)
251 {
252     U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
253 }
254 
255 static U64 FSE_read64(const void* memPtr)
256 {
257     U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
258 }
259 
260 #endif // FSE_FORCE_MEMORY_ACCESS
261 
262 static U16 FSE_readLE16(const void* memPtr)
263 {
264     if (FSE_isLittleEndian())
265         return FSE_read16(memPtr);
266     else
267     {
268         const BYTE* p = (const BYTE*)memPtr;
269         return (U16)(p[0] + (p[1]<<8));
270     }
271 }
272 
273 static U32 FSE_readLE32(const void* memPtr)
274 {
275     if (FSE_isLittleEndian())
276         return FSE_read32(memPtr);
277     else
278     {
279         const BYTE* p = (const BYTE*)memPtr;
280         return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
281     }
282 }
283 
284 
285 static U64 FSE_readLE64(const void* memPtr)
286 {
287     if (FSE_isLittleEndian())
288         return FSE_read64(memPtr);
289     else
290     {
291         const BYTE* p = (const BYTE*)memPtr;
292         return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
293                      + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
294     }
295 }
296 
297 static size_t FSE_readLEST(const void* memPtr)
298 {
299     if (FSE_32bits())
300         return (size_t)FSE_readLE32(memPtr);
301     else
302         return (size_t)FSE_readLE64(memPtr);
303 }
304 
305 
306 
307 /****************************************************************
308 *  Constants
309 *****************************************************************/
310 #define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
311 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
312 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
313 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
314 #define FSE_MIN_TABLELOG 5
315 
316 #define FSE_TABLELOG_ABSOLUTE_MAX 15
317 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
318 #error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
319 #endif
320 
321 
322 /****************************************************************
323 *  Error Management
324 ****************************************************************/
325 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
326 
327 
328 /****************************************************************
329 *  Complex types
330 ****************************************************************/
331 typedef struct
332 {
333     int deltaFindState;
334     U32 deltaNbBits;
335 } FSE_symbolCompressionTransform; /* total 8 bytes */
336 
337 typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
338 
339 /****************************************************************
340 *  Internal functions
341 ****************************************************************/
342 FORCE_INLINE unsigned FSE_highbit32 (U32 val)
343 {
344 #   if defined(_MSC_VER)   /* Visual */
345     unsigned long r;
346     _BitScanReverse ( &r, val );
347     return (unsigned) r;
348 #   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
349     return 31 - __builtin_clz (val);
350 #   else   /* Software version */
351     static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
352     U32 v = val;
353     unsigned r;
354     v |= v >> 1;
355     v |= v >> 2;
356     v |= v >> 4;
357     v |= v >> 8;
358     v |= v >> 16;
359     r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
360     return r;
361 #   endif
362 }
363 
364 
365 /****************************************************************
366 *  Templates
367 ****************************************************************/
368 /*
369   designed to be included
370   for type-specific functions (template emulation in C)
371   Objective is to write these functions only once, for improved maintenance
372 */
373 
374 /* safety checks */
375 #ifndef FSE_FUNCTION_EXTENSION
376 #  error "FSE_FUNCTION_EXTENSION must be defined"
377 #endif
378 #ifndef FSE_FUNCTION_TYPE
379 #  error "FSE_FUNCTION_TYPE must be defined"
380 #endif
381 
382 /* Function names */
383 #define FSE_CAT(X,Y) X##Y
384 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
385 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
386 
387 
388 
389 static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
390 
391 #define FSE_DECODE_TYPE FSE_decode_t
392 
393 
394 typedef struct {
395     U16 tableLog;
396     U16 fastMode;
397 } FSE_DTableHeader;   /* sizeof U32 */
398 
399 static size_t FSE_buildDTable
400 (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
401 {
402     void* ptr = dt;
403     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
404     FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
405     const U32 tableSize = 1 << tableLog;
406     const U32 tableMask = tableSize-1;
407     const U32 step = FSE_tableStep(tableSize);
408     U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
409     U32 position = 0;
410     U32 highThreshold = tableSize-1;
411     const S16 largeLimit= (S16)(1 << (tableLog-1));
412     U32 noLarge = 1;
413     U32 s;
414 
415     /* Sanity Checks */
416     if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
417     if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
418 
419     /* Init, lay down lowprob symbols */
420     DTableH[0].tableLog = (U16)tableLog;
421     for (s=0; s<=maxSymbolValue; s++)
422     {
423         if (normalizedCounter[s]==-1)
424         {
425             tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
426             symbolNext[s] = 1;
427         }
428         else
429         {
430             if (normalizedCounter[s] >= largeLimit) noLarge=0;
431             symbolNext[s] = normalizedCounter[s];
432         }
433     }
434 
435     /* Spread symbols */
436     for (s=0; s<=maxSymbolValue; s++)
437     {
438         int i;
439         for (i=0; i<normalizedCounter[s]; i++)
440         {
441             tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
442             position = (position + step) & tableMask;
443             while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
444         }
445     }
446 
447     if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
448 
449     /* Build Decoding table */
450     {
451         U32 i;
452         for (i=0; i<tableSize; i++)
453         {
454             FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
455             U16 nextState = symbolNext[symbol]++;
456             tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
457             tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
458         }
459     }
460 
461     DTableH->fastMode = (U16)noLarge;
462     return 0;
463 }
464 
465 
466 /******************************************
467 *  FSE byte symbol
468 ******************************************/
469 #ifndef FSE_COMMONDEFS_ONLY
470 
471 static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
472 
473 static short FSE_abs(short a)
474 {
475     return a<0? -a : a;
476 }
477 
478 
479 /****************************************************************
480 *  Header bitstream management
481 ****************************************************************/
482 static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
483                  const void* headerBuffer, size_t hbSize)
484 {
485     const BYTE* const istart = (const BYTE*) headerBuffer;
486     const BYTE* const iend = istart + hbSize;
487     const BYTE* ip = istart;
488     int nbBits;
489     int remaining;
490     int threshold;
491     U32 bitStream;
492     int bitCount;
493     unsigned charnum = 0;
494     int previous0 = 0;
495 
496     if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
497     bitStream = FSE_readLE32(ip);
498     nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
499     if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
500     bitStream >>= 4;
501     bitCount = 4;
502     *tableLogPtr = nbBits;
503     remaining = (1<<nbBits)+1;
504     threshold = 1<<nbBits;
505     nbBits++;
506 
507     while ((remaining>1) && (charnum<=*maxSVPtr))
508     {
509         if (previous0)
510         {
511             unsigned n0 = charnum;
512             while ((bitStream & 0xFFFF) == 0xFFFF)
513             {
514                 n0+=24;
515                 if (ip < iend-5)
516                 {
517                     ip+=2;
518                     bitStream = FSE_readLE32(ip) >> bitCount;
519                 }
520                 else
521                 {
522                     bitStream >>= 16;
523                     bitCount+=16;
524                 }
525             }
526             while ((bitStream & 3) == 3)
527             {
528                 n0+=3;
529                 bitStream>>=2;
530                 bitCount+=2;
531             }
532             n0 += bitStream & 3;
533             bitCount += 2;
534             if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
535             while (charnum < n0) normalizedCounter[charnum++] = 0;
536             if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
537             {
538                 ip += bitCount>>3;
539                 bitCount &= 7;
540                 bitStream = FSE_readLE32(ip) >> bitCount;
541             }
542             else
543                 bitStream >>= 2;
544         }
545         {
546             const short max = (short)((2*threshold-1)-remaining);
547             short count;
548 
549             if ((bitStream & (threshold-1)) < (U32)max)
550             {
551                 count = (short)(bitStream & (threshold-1));
552                 bitCount   += nbBits-1;
553             }
554             else
555             {
556                 count = (short)(bitStream & (2*threshold-1));
557                 if (count >= threshold) count -= max;
558                 bitCount   += nbBits;
559             }
560 
561             count--;   /* extra accuracy */
562             remaining -= FSE_abs(count);
563             normalizedCounter[charnum++] = count;
564             previous0 = !count;
565             while (remaining < threshold)
566             {
567                 nbBits--;
568                 threshold >>= 1;
569             }
570 
571             {
572                 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
573                 {
574                     ip += bitCount>>3;
575                     bitCount &= 7;
576                 }
577                 else
578                 {
579                     bitCount -= (int)(8 * (iend - 4 - ip));
580                     ip = iend - 4;
581                 }
582                 bitStream = FSE_readLE32(ip) >> (bitCount & 31);
583             }
584         }
585     }
586     if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
587     *maxSVPtr = charnum-1;
588 
589     ip += (bitCount+7)>>3;
590     if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
591     return ip-istart;
592 }
593 
594 
595 /*********************************************************
596 *  Decompression (Byte symbols)
597 *********************************************************/
598 static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
599 {
600     void* ptr = dt;
601     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
602     FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
603 
604     DTableH->tableLog = 0;
605     DTableH->fastMode = 0;
606 
607     cell->newState = 0;
608     cell->symbol = symbolValue;
609     cell->nbBits = 0;
610 
611     return 0;
612 }
613 
614 
615 static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
616 {
617     void* ptr = dt;
618     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
619     FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
620     const unsigned tableSize = 1 << nbBits;
621     const unsigned tableMask = tableSize - 1;
622     const unsigned maxSymbolValue = tableMask;
623     unsigned s;
624 
625     /* Sanity checks */
626     if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
627 
628     /* Build Decoding Table */
629     DTableH->tableLog = (U16)nbBits;
630     DTableH->fastMode = 1;
631     for (s=0; s<=maxSymbolValue; s++)
632     {
633         dinfo[s].newState = 0;
634         dinfo[s].symbol = (BYTE)s;
635         dinfo[s].nbBits = (BYTE)nbBits;
636     }
637 
638     return 0;
639 }
640 
641 
642 /* FSE_initDStream
643  * Initialize a FSE_DStream_t.
644  * srcBuffer must point at the beginning of an FSE block.
645  * The function result is the size of the FSE_block (== srcSize).
646  * If srcSize is too small, the function will return an errorCode;
647  */
648 static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
649 {
650     if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
651 
652     if (srcSize >=  sizeof(size_t))
653     {
654         U32 contain32;
655         bitD->start = (const char*)srcBuffer;
656         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
657         bitD->bitContainer = FSE_readLEST(bitD->ptr);
658         contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
659         if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
660         bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
661     }
662     else
663     {
664         U32 contain32;
665         bitD->start = (const char*)srcBuffer;
666         bitD->ptr   = bitD->start;
667         bitD->bitContainer = *(const BYTE*)(bitD->start);
668         switch(srcSize)
669         {
670             case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
671             case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
672             case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
673             case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
674             case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
675             case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
676             default:;
677         }
678         contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
679         if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
680         bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
681         bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
682     }
683 
684     return srcSize;
685 }
686 
687 
688 /*!FSE_lookBits
689  * Provides next n bits from the bitContainer.
690  * bitContainer is not modified (bits are still present for next read/look)
691  * On 32-bits, maxNbBits==25
692  * On 64-bits, maxNbBits==57
693  * return : value extracted.
694  */
695 static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
696 {
697     const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
698     return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
699 }
700 
701 static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
702 {
703     const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
704     return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
705 }
706 
707 static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
708 {
709     bitD->bitsConsumed += nbBits;
710 }
711 
712 
713 /*!FSE_readBits
714  * Read next n bits from the bitContainer.
715  * On 32-bits, don't read more than maxNbBits==25
716  * On 64-bits, don't read more than maxNbBits==57
717  * Use the fast variant *only* if n >= 1.
718  * return : value extracted.
719  */
720 static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
721 {
722     size_t value = FSE_lookBits(bitD, nbBits);
723     FSE_skipBits(bitD, nbBits);
724     return value;
725 }
726 
727 static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
728 {
729     size_t value = FSE_lookBitsFast(bitD, nbBits);
730     FSE_skipBits(bitD, nbBits);
731     return value;
732 }
733 
734 static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
735 {
736     if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
737         return FSE_DStream_tooFar;
738 
739     if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
740     {
741         bitD->ptr -= bitD->bitsConsumed >> 3;
742         bitD->bitsConsumed &= 7;
743         bitD->bitContainer = FSE_readLEST(bitD->ptr);
744         return FSE_DStream_unfinished;
745     }
746     if (bitD->ptr == bitD->start)
747     {
748         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
749         return FSE_DStream_completed;
750     }
751     {
752         U32 nbBytes = bitD->bitsConsumed >> 3;
753         U32 result = FSE_DStream_unfinished;
754         if (bitD->ptr - nbBytes < bitD->start)
755         {
756             nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
757             result = FSE_DStream_endOfBuffer;
758         }
759         bitD->ptr -= nbBytes;
760         bitD->bitsConsumed -= nbBytes*8;
761         bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
762         return result;
763     }
764 }
765 
766 
767 static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
768 {
769     const void* ptr = dt;
770     const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
771     DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
772     FSE_reloadDStream(bitD);
773     DStatePtr->table = dt + 1;
774 }
775 
776 static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
777 {
778     const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
779     const U32  nbBits = DInfo.nbBits;
780     BYTE symbol = DInfo.symbol;
781     size_t lowBits = FSE_readBits(bitD, nbBits);
782 
783     DStatePtr->state = DInfo.newState + lowBits;
784     return symbol;
785 }
786 
787 static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
788 {
789     const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
790     const U32 nbBits = DInfo.nbBits;
791     BYTE symbol = DInfo.symbol;
792     size_t lowBits = FSE_readBitsFast(bitD, nbBits);
793 
794     DStatePtr->state = DInfo.newState + lowBits;
795     return symbol;
796 }
797 
798 /* FSE_endOfDStream
799    Tells if bitD has reached end of bitStream or not */
800 
801 static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
802 {
803     return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
804 }
805 
806 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
807 {
808     return DStatePtr->state == 0;
809 }
810 
811 
812 FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
813           void* dst, size_t maxDstSize,
814     const void* cSrc, size_t cSrcSize,
815     const FSE_DTable* dt, const unsigned fast)
816 {
817     BYTE* const ostart = (BYTE*) dst;
818     BYTE* op = ostart;
819     BYTE* const omax = op + maxDstSize;
820     BYTE* const olimit = omax-3;
821 
822     FSE_DStream_t bitD;
823     FSE_DState_t state1;
824     FSE_DState_t state2;
825     size_t errorCode;
826 
827     /* Init */
828     errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
829     if (FSE_isError(errorCode)) return errorCode;
830 
831     FSE_initDState(&state1, &bitD, dt);
832     FSE_initDState(&state2, &bitD, dt);
833 
834 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
835 
836     /* 4 symbols per loop */
837     for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
838     {
839         op[0] = FSE_GETSYMBOL(&state1);
840 
841         if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
842             FSE_reloadDStream(&bitD);
843 
844         op[1] = FSE_GETSYMBOL(&state2);
845 
846         if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
847             { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
848 
849         op[2] = FSE_GETSYMBOL(&state1);
850 
851         if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
852             FSE_reloadDStream(&bitD);
853 
854         op[3] = FSE_GETSYMBOL(&state2);
855     }
856 
857     /* tail */
858     /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
859     while (1)
860     {
861         if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
862             break;
863 
864         *op++ = FSE_GETSYMBOL(&state1);
865 
866         if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
867             break;
868 
869         *op++ = FSE_GETSYMBOL(&state2);
870     }
871 
872     /* end ? */
873     if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
874         return op-ostart;
875 
876     if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
877 
878     return (size_t)-FSE_ERROR_corruptionDetected;
879 }
880 
881 
882 static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
883                             const void* cSrc, size_t cSrcSize,
884                             const FSE_DTable* dt)
885 {
886     FSE_DTableHeader DTableH;
887     memcpy(&DTableH, dt, sizeof(DTableH));   /* memcpy() into local variable, to avoid strict aliasing warning */
888 
889     /* select fast mode (static) */
890     if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
891     return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
892 }
893 
894 
895 static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
896 {
897     const BYTE* const istart = (const BYTE*)cSrc;
898     const BYTE* ip = istart;
899     short counting[FSE_MAX_SYMBOL_VALUE+1];
900     DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
901     unsigned tableLog;
902     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
903     size_t errorCode;
904 
905     if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
906 
907     /* normal FSE decoding mode */
908     errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
909     if (FSE_isError(errorCode)) return errorCode;
910     if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
911     ip += errorCode;
912     cSrcSize -= errorCode;
913 
914     errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
915     if (FSE_isError(errorCode)) return errorCode;
916 
917     /* always return, even if it is an error code */
918     return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
919 }
920 
921 
922 
923 /* *******************************************************
924 *  Huff0 : Huffman block compression
925 *********************************************************/
926 #define HUF_MAX_SYMBOL_VALUE 255
927 #define HUF_DEFAULT_TABLELOG  12       /* used by default, when not specified */
928 #define HUF_MAX_TABLELOG  12           /* max possible tableLog; for allocation purpose; can be modified */
929 #define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
930 #if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
931 #  error "HUF_MAX_TABLELOG is too large !"
932 #endif
933 
934 typedef struct HUF_CElt_s {
935   U16  val;
936   BYTE nbBits;
937 } HUF_CElt ;
938 
939 typedef struct nodeElt_s {
940     U32 count;
941     U16 parent;
942     BYTE byte;
943     BYTE nbBits;
944 } nodeElt;
945 
946 
947 /* *******************************************************
948 *  Huff0 : Huffman block decompression
949 *********************************************************/
950 typedef struct {
951     BYTE byte;
952     BYTE nbBits;
953 } HUF_DElt;
954 
955 static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
956 {
957     BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
958     U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];  /* large enough for values from 0 to 16 */
959     U32 weightTotal;
960     U32 maxBits;
961     const BYTE* ip = (const BYTE*) src;
962     size_t iSize;
963     size_t oSize;
964     U32 n;
965     U32 nextRankStart;
966     void* ptr = DTable+1;
967     HUF_DElt* const dt = (HUF_DElt*)ptr;
968 
969     if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
970     iSize = ip[0];
971 
972     FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */
973     //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */
974     if (iSize >= 128)  /* special header */
975     {
976         if (iSize >= (242))   /* RLE */
977         {
978             static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
979             oSize = l[iSize-242];
980             memset(huffWeight, 1, sizeof(huffWeight));
981             iSize = 0;
982         }
983         else   /* Incompressible */
984         {
985             oSize = iSize - 127;
986             iSize = ((oSize+1)/2);
987             if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
988             ip += 1;
989             for (n=0; n<oSize; n+=2)
990             {
991                 huffWeight[n]   = ip[n/2] >> 4;
992                 huffWeight[n+1] = ip[n/2] & 15;
993             }
994         }
995     }
996     else  /* header compressed with FSE (normal case) */
997     {
998         if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
999         oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize);   /* max 255 values decoded, last one is implied */
1000         if (FSE_isError(oSize)) return oSize;
1001     }
1002 
1003     /* collect weight stats */
1004     memset(rankVal, 0, sizeof(rankVal));
1005     weightTotal = 0;
1006     for (n=0; n<oSize; n++)
1007     {
1008         if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
1009         rankVal[huffWeight[n]]++;
1010         weightTotal += (1 << huffWeight[n]) >> 1;
1011     }
1012     if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected;
1013 
1014     /* get last non-null symbol weight (implied, total must be 2^n) */
1015     maxBits = FSE_highbit32(weightTotal) + 1;
1016     if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge;   /* DTable is too small */
1017     DTable[0] = (U16)maxBits;
1018     {
1019         U32 total = 1 << maxBits;
1020         U32 rest = total - weightTotal;
1021         U32 verif = 1 << FSE_highbit32(rest);
1022         U32 lastWeight = FSE_highbit32(rest) + 1;
1023         if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected;    /* last value must be a clean power of 2 */
1024         huffWeight[oSize] = (BYTE)lastWeight;
1025         rankVal[lastWeight]++;
1026     }
1027 
1028     /* check tree construction validity */
1029     if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected;   /* by construction : at least 2 elts of rank 1, must be even */
1030 
1031     /* Prepare ranks */
1032     nextRankStart = 0;
1033     for (n=1; n<=maxBits; n++)
1034     {
1035         U32 current = nextRankStart;
1036         nextRankStart += (rankVal[n] << (n-1));
1037         rankVal[n] = current;
1038     }
1039 
1040     /* fill DTable */
1041     for (n=0; n<=oSize; n++)
1042     {
1043         const U32 w = huffWeight[n];
1044         const U32 length = (1 << w) >> 1;
1045         U32 i;
1046         HUF_DElt D;
1047         D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
1048         for (i = rankVal[w]; i < rankVal[w] + length; i++)
1049             dt[i] = D;
1050         rankVal[w] += length;
1051     }
1052 
1053     return iSize+1;
1054 }
1055 
1056 
1057 static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1058 {
1059         const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
1060         const BYTE c = dt[val].byte;
1061         FSE_skipBits(Dstream, dt[val].nbBits);
1062         return c;
1063 }
1064 
1065 static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
1066           void* dst, size_t maxDstSize,
1067     const void* cSrc, size_t cSrcSize,
1068     const U16* DTable)
1069 {
1070     BYTE* const ostart = (BYTE*) dst;
1071     BYTE* op = ostart;
1072     BYTE* const omax = op + maxDstSize;
1073     BYTE* const olimit = omax-15;
1074 
1075     const void* ptr = DTable;
1076     const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
1077     const U32 dtLog = DTable[0];
1078     size_t errorCode;
1079     U32 reloadStatus;
1080 
1081     /* Init */
1082 
1083     const U16* jumpTable = (const U16*)cSrc;
1084     const size_t length1 = FSE_readLE16(jumpTable);
1085     const size_t length2 = FSE_readLE16(jumpTable+1);
1086     const size_t length3 = FSE_readLE16(jumpTable+2);
1087     const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
1088     const char* const start1 = (const char*)(cSrc) + 6;
1089     const char* const start2 = start1 + length1;
1090     const char* const start3 = start2 + length2;
1091     const char* const start4 = start3 + length3;
1092     FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1093 
1094     if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1095 
1096     errorCode = FSE_initDStream(&bitD1, start1, length1);
1097     if (FSE_isError(errorCode)) return errorCode;
1098     errorCode = FSE_initDStream(&bitD2, start2, length2);
1099     if (FSE_isError(errorCode)) return errorCode;
1100     errorCode = FSE_initDStream(&bitD3, start3, length3);
1101     if (FSE_isError(errorCode)) return errorCode;
1102     errorCode = FSE_initDStream(&bitD4, start4, length4);
1103     if (FSE_isError(errorCode)) return errorCode;
1104 
1105     reloadStatus=FSE_reloadDStream(&bitD2);
1106 
1107     /* 16 symbols per loop */
1108     for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
1109         op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1110     {
1111 #define HUF_DECODE_SYMBOL_0(n, Dstream) \
1112         op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
1113 
1114 #define HUF_DECODE_SYMBOL_1(n, Dstream) \
1115         op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1116         if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
1117 
1118 #define HUF_DECODE_SYMBOL_2(n, Dstream) \
1119         op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1120         if (FSE_32bits()) FSE_reloadDStream(&Dstream)
1121 
1122         HUF_DECODE_SYMBOL_1( 0, bitD1);
1123         HUF_DECODE_SYMBOL_1( 1, bitD2);
1124         HUF_DECODE_SYMBOL_1( 2, bitD3);
1125         HUF_DECODE_SYMBOL_1( 3, bitD4);
1126         HUF_DECODE_SYMBOL_2( 4, bitD1);
1127         HUF_DECODE_SYMBOL_2( 5, bitD2);
1128         HUF_DECODE_SYMBOL_2( 6, bitD3);
1129         HUF_DECODE_SYMBOL_2( 7, bitD4);
1130         HUF_DECODE_SYMBOL_1( 8, bitD1);
1131         HUF_DECODE_SYMBOL_1( 9, bitD2);
1132         HUF_DECODE_SYMBOL_1(10, bitD3);
1133         HUF_DECODE_SYMBOL_1(11, bitD4);
1134         HUF_DECODE_SYMBOL_0(12, bitD1);
1135         HUF_DECODE_SYMBOL_0(13, bitD2);
1136         HUF_DECODE_SYMBOL_0(14, bitD3);
1137         HUF_DECODE_SYMBOL_0(15, bitD4);
1138     }
1139 
1140     if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
1141         return (size_t)-FSE_ERROR_corruptionDetected;
1142 
1143     /* tail */
1144     {
1145         // bitTail = bitD1;   // *much* slower : -20% !??!
1146         FSE_DStream_t bitTail;
1147         bitTail.ptr = bitD1.ptr;
1148         bitTail.bitsConsumed = bitD1.bitsConsumed;
1149         bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
1150         bitTail.start = start1;
1151         for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1152         {
1153             HUF_DECODE_SYMBOL_0(0, bitTail);
1154         }
1155 
1156         if (FSE_endOfDStream(&bitTail))
1157             return op-ostart;
1158     }
1159 
1160     if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
1161 
1162     return (size_t)-FSE_ERROR_corruptionDetected;
1163 }
1164 
1165 
1166 static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1167 {
1168     HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
1169     const BYTE* ip = (const BYTE*) cSrc;
1170     size_t errorCode;
1171 
1172     errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
1173     if (FSE_isError(errorCode)) return errorCode;
1174     if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1175     ip += errorCode;
1176     cSrcSize -= errorCode;
1177 
1178     return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1179 }
1180 
1181 
1182 #endif   /* FSE_COMMONDEFS_ONLY */
1183 
1184 /*
1185     zstd - standard compression library
1186     Copyright (C) 2014-2015, Yann Collet.
1187 
1188     BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1189 
1190     Redistribution and use in source and binary forms, with or without
1191     modification, are permitted provided that the following conditions are
1192     met:
1193     * Redistributions of source code must retain the above copyright
1194     notice, this list of conditions and the following disclaimer.
1195     * Redistributions in binary form must reproduce the above
1196     copyright notice, this list of conditions and the following disclaimer
1197     in the documentation and/or other materials provided with the
1198     distribution.
1199     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1200     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1201     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1202     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1203     OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1204     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1205     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1206     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1207     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1208     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1209     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1210 
1211     You can contact the author at :
1212     - zstd source repository : https://github.com/Cyan4973/zstd
1213     - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
1214 */
1215 
1216 /****************************************************************
1217 *  Tuning parameters
1218 *****************************************************************/
1219 /* MEMORY_USAGE :
1220 *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
1221 *  Increasing memory usage improves compression ratio
1222 *  Reduced memory usage can improve speed, due to cache effect */
1223 #define ZSTD_MEMORY_USAGE 17
1224 
1225 
1226 /**************************************
1227    CPU Feature Detection
1228 **************************************/
1229 /*
1230  * Automated efficient unaligned memory access detection
1231  * Based on known hardware architectures
1232  * This list will be updated thanks to feedbacks
1233  */
1234 #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
1235     || defined(__ARM_FEATURE_UNALIGNED) \
1236     || defined(__i386__) || defined(__x86_64__) \
1237     || defined(_M_IX86) || defined(_M_X64) \
1238     || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
1239     || (defined(_M_ARM) && (_M_ARM >= 7))
1240 #  define ZSTD_UNALIGNED_ACCESS 1
1241 #else
1242 #  define ZSTD_UNALIGNED_ACCESS 0
1243 #endif
1244 
1245 
1246 /********************************************************
1247 *  Includes
1248 *********************************************************/
1249 #include <stdlib.h>      /* calloc */
1250 #include <string.h>      /* memcpy, memmove */
1251 #include <stdio.h>       /* debug : printf */
1252 
1253 
1254 /********************************************************
1255 *  Compiler specifics
1256 *********************************************************/
1257 #ifdef __AVX2__
1258 #  include <immintrin.h>   /* AVX2 intrinsics */
1259 #endif
1260 
1261 #ifdef _MSC_VER    /* Visual Studio */
1262 #  include <intrin.h>                    /* For Visual 2005 */
1263 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
1264 #  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
1265 #endif
1266 
1267 
1268 #ifndef MEM_ACCESS_MODULE
1269 #define MEM_ACCESS_MODULE
1270 /********************************************************
1271 *  Basic Types
1272 *********************************************************/
1273 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
1274 # include <stdint.h>
1275 typedef  uint8_t BYTE;
1276 typedef uint16_t U16;
1277 typedef  int16_t S16;
1278 typedef uint32_t U32;
1279 typedef  int32_t S32;
1280 typedef uint64_t U64;
1281 #else
1282 typedef unsigned char       BYTE;
1283 typedef unsigned short      U16;
1284 typedef   signed short      S16;
1285 typedef unsigned int        U32;
1286 typedef   signed int        S32;
1287 typedef unsigned long long  U64;
1288 #endif
1289 
1290 #endif   /* MEM_ACCESS_MODULE */
1291 
1292 
1293 /********************************************************
1294 *  Constants
1295 *********************************************************/
1296 static const U32 ZSTD_magicNumber = 0xFD2FB51E;   /* 3rd version : seqNb header */
1297 
1298 #define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
1299 #define HASH_TABLESIZE (1 << HASH_LOG)
1300 #define HASH_MASK (HASH_TABLESIZE - 1)
1301 
1302 #define KNUTH 2654435761
1303 
1304 #define BIT7 128
1305 #define BIT6  64
1306 #define BIT5  32
1307 #define BIT4  16
1308 
1309 #define KB *(1 <<10)
1310 #define MB *(1 <<20)
1311 #define GB *(1U<<30)
1312 
1313 #define BLOCKSIZE (128 KB)                 /* define, for static allocation */
1314 
1315 #define WORKPLACESIZE (BLOCKSIZE*3)
1316 #define MINMATCH 4
1317 #define MLbits   7
1318 #define LLbits   6
1319 #define Offbits  5
1320 #define MaxML  ((1<<MLbits )-1)
1321 #define MaxLL  ((1<<LLbits )-1)
1322 #define MaxOff ((1<<Offbits)-1)
1323 #define LitFSELog  11
1324 #define MLFSELog   10
1325 #define LLFSELog   10
1326 #define OffFSELog   9
1327 #define MAX(a,b) ((a)<(b)?(b):(a))
1328 #define MaxSeq MAX(MaxLL, MaxML)
1329 
1330 #define LITERAL_NOENTROPY 63
1331 #define COMMAND_NOENTROPY 7   /* to remove */
1332 
1333 static const size_t ZSTD_blockHeaderSize = 3;
1334 static const size_t ZSTD_frameHeaderSize = 4;
1335 
1336 
1337 /********************************************************
1338 *  Memory operations
1339 *********************************************************/
1340 static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
1341 
1342 static unsigned ZSTD_isLittleEndian(void)
1343 {
1344     const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
1345     return one.c[0];
1346 }
1347 
1348 static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
1349 
1350 static U32    ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
1351 
1352 static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
1353 
1354 static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
1355 
1356 #define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; }
1357 
1358 static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
1359 {
1360     const BYTE* ip = (const BYTE*)src;
1361     BYTE* op = (BYTE*)dst;
1362     BYTE* const oend = op + length;
1363     while (op < oend) COPY8(op, ip);
1364 }
1365 
1366 static U16 ZSTD_readLE16(const void* memPtr)
1367 {
1368     if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
1369     else
1370     {
1371         const BYTE* p = (const BYTE*)memPtr;
1372         return (U16)((U16)p[0] + ((U16)p[1]<<8));
1373     }
1374 }
1375 
1376 
1377 static U32 ZSTD_readLE32(const void* memPtr)
1378 {
1379     if (ZSTD_isLittleEndian())
1380         return ZSTD_read32(memPtr);
1381     else
1382     {
1383         const BYTE* p = (const BYTE*)memPtr;
1384         return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
1385     }
1386 }
1387 
1388 static U32 ZSTD_readBE32(const void* memPtr)
1389 {
1390     const BYTE* p = (const BYTE*)memPtr;
1391     return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
1392 }
1393 
1394 
1395 /**************************************
1396 *  Local structures
1397 ***************************************/
1398 typedef struct ZSTD_Cctx_s ZSTD_Cctx;
1399 
1400 typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
1401 
1402 typedef struct
1403 {
1404     blockType_t blockType;
1405     U32 origSize;
1406 } blockProperties_t;
1407 
1408 typedef struct {
1409     void* buffer;
1410     U32*  offsetStart;
1411     U32*  offset;
1412     BYTE* offCodeStart;
1413     BYTE* offCode;
1414     BYTE* litStart;
1415     BYTE* lit;
1416     BYTE* litLengthStart;
1417     BYTE* litLength;
1418     BYTE* matchLengthStart;
1419     BYTE* matchLength;
1420     BYTE* dumpsStart;
1421     BYTE* dumps;
1422 } seqStore_t;
1423 
1424 
1425 typedef struct ZSTD_Cctx_s
1426 {
1427     const BYTE* base;
1428     U32 current;
1429     U32 nextUpdate;
1430     seqStore_t seqStore;
1431 #ifdef __AVX2__
1432     __m256i hashTable[HASH_TABLESIZE>>3];
1433 #else
1434     U32 hashTable[HASH_TABLESIZE];
1435 #endif
1436     BYTE buffer[WORKPLACESIZE];
1437 } cctxi_t;
1438 
1439 
1440 
1441 
1442 /**************************************
1443 *  Error Management
1444 **************************************/
1445 /* published entry point */
1446 unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
1447 
1448 
1449 /**************************************
1450 *  Tool functions
1451 **************************************/
1452 #define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
1453 #define ZSTD_VERSION_MINOR    1    /* for new (non-breaking) interface capabilities */
1454 #define ZSTD_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
1455 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
1456 
1457 /**************************************************************
1458 *   Decompression code
1459 **************************************************************/
1460 
1461 size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1462 {
1463     const BYTE* const in = (const BYTE* const)src;
1464     BYTE headerFlags;
1465     U32 cSize;
1466 
1467     if (srcSize < 3) return ERROR(srcSize_wrong);
1468 
1469     headerFlags = *in;
1470     cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1471 
1472     bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1473     bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1474 
1475     if (bpPtr->blockType == bt_end) return 0;
1476     if (bpPtr->blockType == bt_rle) return 1;
1477     return cSize;
1478 }
1479 
1480 
1481 static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1482 {
1483     if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
1484     memcpy(dst, src, srcSize);
1485     return srcSize;
1486 }
1487 
1488 
1489 static size_t ZSTD_decompressLiterals(void* ctx,
1490                                       void* dst, size_t maxDstSize,
1491                                 const void* src, size_t srcSize)
1492 {
1493     BYTE* op = (BYTE*)dst;
1494     BYTE* const oend = op + maxDstSize;
1495     const BYTE* ip = (const BYTE*)src;
1496     size_t errorCode;
1497     size_t litSize;
1498 
1499     /* check : minimum 2, for litSize, +1, for content */
1500     if (srcSize <= 3) return ERROR(corruption_detected);
1501 
1502     litSize = ip[1] + (ip[0]<<8);
1503     litSize += ((ip[-3] >> 3) & 7) << 16;   // mmmmh....
1504     op = oend - litSize;
1505 
1506     (void)ctx;
1507     if (litSize > maxDstSize) return ERROR(dstSize_tooSmall);
1508     errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
1509     if (FSE_isError(errorCode)) return ERROR(GENERIC);
1510     return litSize;
1511 }
1512 
1513 
1514 size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1515                                 void* dst, size_t maxDstSize,
1516                           const BYTE** litStart, size_t* litSize,
1517                           const void* src, size_t srcSize)
1518 {
1519     const BYTE* const istart = (const BYTE* const)src;
1520     const BYTE* ip = istart;
1521     BYTE* const ostart = (BYTE* const)dst;
1522     BYTE* const oend = ostart + maxDstSize;
1523     blockProperties_t litbp;
1524 
1525     size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp);
1526     if (ZSTDv01_isError(litcSize)) return litcSize;
1527     if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1528     ip += ZSTD_blockHeaderSize;
1529 
1530     switch(litbp.blockType)
1531     {
1532     case bt_raw:
1533         *litStart = ip;
1534         ip += litcSize;
1535         *litSize = litcSize;
1536         break;
1537     case bt_rle:
1538         {
1539             size_t rleSize = litbp.origSize;
1540             if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
1541             if (!srcSize) return ERROR(srcSize_wrong);
1542             memset(oend - rleSize, *ip, rleSize);
1543             *litStart = oend - rleSize;
1544             *litSize = rleSize;
1545             ip++;
1546             break;
1547         }
1548     case bt_compressed:
1549         {
1550             size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1551             if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize;
1552             *litStart = oend - decodedLitSize;
1553             *litSize = decodedLitSize;
1554             ip += litcSize;
1555             break;
1556         }
1557     case bt_end:
1558     default:
1559         return ERROR(GENERIC);
1560     }
1561 
1562     return ip-istart;
1563 }
1564 
1565 
1566 size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1567                          FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1568                          const void* src, size_t srcSize)
1569 {
1570     const BYTE* const istart = (const BYTE* const)src;
1571     const BYTE* ip = istart;
1572     const BYTE* const iend = istart + srcSize;
1573     U32 LLtype, Offtype, MLtype;
1574     U32 LLlog, Offlog, MLlog;
1575     size_t dumpsLength;
1576 
1577     /* check */
1578     if (srcSize < 5) return ERROR(srcSize_wrong);
1579 
1580     /* SeqHead */
1581     *nbSeq = ZSTD_readLE16(ip); ip+=2;
1582     LLtype  = *ip >> 6;
1583     Offtype = (*ip >> 4) & 3;
1584     MLtype  = (*ip >> 2) & 3;
1585     if (*ip & 2)
1586     {
1587         dumpsLength  = ip[2];
1588         dumpsLength += ip[1] << 8;
1589         ip += 3;
1590     }
1591     else
1592     {
1593         dumpsLength  = ip[1];
1594         dumpsLength += (ip[0] & 1) << 8;
1595         ip += 2;
1596     }
1597     *dumpsPtr = ip;
1598     ip += dumpsLength;
1599     *dumpsLengthPtr = dumpsLength;
1600 
1601     /* check */
1602     if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
1603 
1604     /* sequences */
1605     {
1606         S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
1607         size_t headerSize;
1608 
1609         /* Build DTables */
1610         switch(LLtype)
1611         {
1612         case bt_rle :
1613             LLlog = 0;
1614             FSE_buildDTable_rle(DTableLL, *ip++); break;
1615         case bt_raw :
1616             LLlog = LLbits;
1617             FSE_buildDTable_raw(DTableLL, LLbits); break;
1618         default :
1619             {   U32 max = MaxLL;
1620                 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
1621                 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1622                 if (LLlog > LLFSELog) return ERROR(corruption_detected);
1623                 ip += headerSize;
1624                 FSE_buildDTable(DTableLL, norm, max, LLlog);
1625         }   }
1626 
1627         switch(Offtype)
1628         {
1629         case bt_rle :
1630             Offlog = 0;
1631             if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1632             FSE_buildDTable_rle(DTableOffb, *ip++); break;
1633         case bt_raw :
1634             Offlog = Offbits;
1635             FSE_buildDTable_raw(DTableOffb, Offbits); break;
1636         default :
1637             {   U32 max = MaxOff;
1638                 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
1639                 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1640                 if (Offlog > OffFSELog) return ERROR(corruption_detected);
1641                 ip += headerSize;
1642                 FSE_buildDTable(DTableOffb, norm, max, Offlog);
1643         }   }
1644 
1645         switch(MLtype)
1646         {
1647         case bt_rle :
1648             MLlog = 0;
1649             if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1650             FSE_buildDTable_rle(DTableML, *ip++); break;
1651         case bt_raw :
1652             MLlog = MLbits;
1653             FSE_buildDTable_raw(DTableML, MLbits); break;
1654         default :
1655             {   U32 max = MaxML;
1656                 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
1657                 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1658                 if (MLlog > MLFSELog) return ERROR(corruption_detected);
1659                 ip += headerSize;
1660                 FSE_buildDTable(DTableML, norm, max, MLlog);
1661     }   }   }
1662 
1663     return ip-istart;
1664 }
1665 
1666 
1667 typedef struct {
1668     size_t litLength;
1669     size_t offset;
1670     size_t matchLength;
1671 } seq_t;
1672 
1673 typedef struct {
1674     FSE_DStream_t DStream;
1675     FSE_DState_t stateLL;
1676     FSE_DState_t stateOffb;
1677     FSE_DState_t stateML;
1678     size_t prevOffset;
1679     const BYTE* dumps;
1680     const BYTE* dumpsEnd;
1681 } seqState_t;
1682 
1683 
1684 static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1685 {
1686     size_t litLength;
1687     size_t prevOffset;
1688     size_t offset;
1689     size_t matchLength;
1690     const BYTE* dumps = seqState->dumps;
1691     const BYTE* const de = seqState->dumpsEnd;
1692 
1693     /* Literal length */
1694     litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1695     prevOffset = litLength ? seq->offset : seqState->prevOffset;
1696     seqState->prevOffset = seq->offset;
1697     if (litLength == MaxLL)
1698     {
1699         U32 add = dumps<de ? *dumps++ : 0;
1700         if (add < 255) litLength += add;
1701         else
1702         {
1703             if (dumps<=(de-3))
1704             {
1705                 litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
1706                 dumps += 3;
1707             }
1708         }
1709     }
1710 
1711     /* Offset */
1712     {
1713         U32 offsetCode, nbBits;
1714         offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1715         if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1716         nbBits = offsetCode - 1;
1717         if (offsetCode==0) nbBits = 0;   /* cmove */
1718         offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1719         if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1720         if (offsetCode==0) offset = prevOffset;
1721     }
1722 
1723     /* MatchLength */
1724     matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1725     if (matchLength == MaxML)
1726     {
1727         U32 add = dumps<de ? *dumps++ : 0;
1728         if (add < 255) matchLength += add;
1729         else
1730         {
1731             if (dumps<=(de-3))
1732             {
1733                 matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
1734                 dumps += 3;
1735             }
1736         }
1737     }
1738     matchLength += MINMATCH;
1739 
1740     /* save result */
1741     seq->litLength = litLength;
1742     seq->offset = offset;
1743     seq->matchLength = matchLength;
1744     seqState->dumps = dumps;
1745 }
1746 
1747 
1748 static size_t ZSTD_execSequence(BYTE* op,
1749                                 seq_t sequence,
1750                                 const BYTE** litPtr, const BYTE* const litLimit,
1751                                 BYTE* const base, BYTE* const oend)
1752 {
1753     static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
1754     static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
1755     const BYTE* const ostart = op;
1756     const size_t litLength = sequence.litLength;
1757     BYTE* const endMatch = op + litLength + sequence.matchLength;    /* risk : address space overflow (32-bits) */
1758     const BYTE* const litEnd = *litPtr + litLength;
1759 
1760     /* check */
1761     if (endMatch > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
1762     if (litEnd > litLimit) return ERROR(corruption_detected);
1763     if (sequence.matchLength > (size_t)(*litPtr-op))  return ERROR(dstSize_tooSmall);    /* overwrite literal segment */
1764 
1765     /* copy Literals */
1766     if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
1767         memmove(op, *litPtr, litLength);   /* overwrite risk */
1768     else
1769         ZSTD_wildcopy(op, *litPtr, litLength);
1770     op += litLength;
1771     *litPtr = litEnd;   /* update for next sequence */
1772 
1773     /* check : last match must be at a minimum distance of 8 from end of dest buffer */
1774     if (oend-op < 8) return ERROR(dstSize_tooSmall);
1775 
1776     /* copy Match */
1777     {
1778         const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1779         const BYTE* match = op - sequence.offset;            /* possible underflow at op - offset ? */
1780         size_t qutt = 12;
1781         U64 saved[2];
1782 
1783         /* check */
1784         if (match < base) return ERROR(corruption_detected);
1785         if (sequence.offset > (size_t)base) return ERROR(corruption_detected);
1786 
1787         /* save beginning of literal sequence, in case of write overlap */
1788         if (overlapRisk)
1789         {
1790             if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1791             memcpy(saved, endMatch, qutt);
1792         }
1793 
1794         if (sequence.offset < 8)
1795         {
1796             const int dec64 = dec64table[sequence.offset];
1797             op[0] = match[0];
1798             op[1] = match[1];
1799             op[2] = match[2];
1800             op[3] = match[3];
1801             match += dec32table[sequence.offset];
1802             ZSTD_copy4(op+4, match);
1803             match -= dec64;
1804         } else { ZSTD_copy8(op, match); }
1805         op += 8; match += 8;
1806 
1807         if (endMatch > oend-(16-MINMATCH))
1808         {
1809             if (op < oend-8)
1810             {
1811                 ZSTD_wildcopy(op, match, (oend-8) - op);
1812                 match += (oend-8) - op;
1813                 op = oend-8;
1814             }
1815             while (op<endMatch) *op++ = *match++;
1816         }
1817         else
1818             ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
1819 
1820         /* restore, in case of overlap */
1821         if (overlapRisk) memcpy(endMatch, saved, qutt);
1822     }
1823 
1824     return endMatch-ostart;
1825 }
1826 
1827 typedef struct ZSTDv01_Dctx_s
1828 {
1829     U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1830     U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1831     U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1832     void* previousDstEnd;
1833     void* base;
1834     size_t expected;
1835     blockType_t bType;
1836     U32 phase;
1837 } dctx_t;
1838 
1839 
1840 static size_t ZSTD_decompressSequences(
1841                                void* ctx,
1842                                void* dst, size_t maxDstSize,
1843                          const void* seqStart, size_t seqSize,
1844                          const BYTE* litStart, size_t litSize)
1845 {
1846     dctx_t* dctx = (dctx_t*)ctx;
1847     const BYTE* ip = (const BYTE*)seqStart;
1848     const BYTE* const iend = ip + seqSize;
1849     BYTE* const ostart = (BYTE* const)dst;
1850     BYTE* op = ostart;
1851     BYTE* const oend = ostart + maxDstSize;
1852     size_t errorCode, dumpsLength;
1853     const BYTE* litPtr = litStart;
1854     const BYTE* const litEnd = litStart + litSize;
1855     int nbSeq;
1856     const BYTE* dumps;
1857     U32* DTableLL = dctx->LLTable;
1858     U32* DTableML = dctx->MLTable;
1859     U32* DTableOffb = dctx->OffTable;
1860     BYTE* const base = (BYTE*) (dctx->base);
1861 
1862     /* Build Decoding Tables */
1863     errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
1864                                       DTableLL, DTableML, DTableOffb,
1865                                       ip, iend-ip);
1866     if (ZSTDv01_isError(errorCode)) return errorCode;
1867     ip += errorCode;
1868 
1869     /* Regen sequences */
1870     {
1871         seq_t sequence;
1872         seqState_t seqState;
1873 
1874         memset(&sequence, 0, sizeof(sequence));
1875         seqState.dumps = dumps;
1876         seqState.dumpsEnd = dumps + dumpsLength;
1877         seqState.prevOffset = 1;
1878         errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
1879         if (FSE_isError(errorCode)) return ERROR(corruption_detected);
1880         FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1881         FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1882         FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1883 
1884         for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
1885         {
1886             size_t oneSeqSize;
1887             nbSeq--;
1888             ZSTD_decodeSequence(&sequence, &seqState);
1889             oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
1890             if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize;
1891             op += oneSeqSize;
1892         }
1893 
1894         /* check if reached exact end */
1895         if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
1896         if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
1897 
1898         /* last literal segment */
1899         {
1900             size_t lastLLSize = litEnd - litPtr;
1901             if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
1902             if (op != litPtr) memmove(op, litPtr, lastLLSize);
1903             op += lastLLSize;
1904         }
1905     }
1906 
1907     return op-ostart;
1908 }
1909 
1910 
1911 static size_t ZSTD_decompressBlock(
1912                             void* ctx,
1913                             void* dst, size_t maxDstSize,
1914                       const void* src, size_t srcSize)
1915 {
1916     /* blockType == blockCompressed, srcSize is trusted */
1917     const BYTE* ip = (const BYTE*)src;
1918     const BYTE* litPtr = NULL;
1919     size_t litSize = 0;
1920     size_t errorCode;
1921 
1922     /* Decode literals sub-block */
1923     errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
1924     if (ZSTDv01_isError(errorCode)) return errorCode;
1925     ip += errorCode;
1926     srcSize -= errorCode;
1927 
1928     return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
1929 }
1930 
1931 
1932 size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1933 {
1934     const BYTE* ip = (const BYTE*)src;
1935     const BYTE* iend = ip + srcSize;
1936     BYTE* const ostart = (BYTE* const)dst;
1937     BYTE* op = ostart;
1938     BYTE* const oend = ostart + maxDstSize;
1939     size_t remainingSize = srcSize;
1940     U32 magicNumber;
1941     size_t errorCode=0;
1942     blockProperties_t blockProperties;
1943 
1944     /* Frame Header */
1945     if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1946     magicNumber = ZSTD_readBE32(src);
1947     if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
1948     ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1949 
1950     /* Loop on each block */
1951     while (1)
1952     {
1953         size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties);
1954         if (ZSTDv01_isError(blockSize)) return blockSize;
1955 
1956         ip += ZSTD_blockHeaderSize;
1957         remainingSize -= ZSTD_blockHeaderSize;
1958         if (blockSize > remainingSize) return ERROR(srcSize_wrong);
1959 
1960         switch(blockProperties.blockType)
1961         {
1962         case bt_compressed:
1963             errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
1964             break;
1965         case bt_raw :
1966             errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1967             break;
1968         case bt_rle :
1969             return ERROR(GENERIC);   /* not yet supported */
1970             break;
1971         case bt_end :
1972             /* end of frame */
1973             if (remainingSize) return ERROR(srcSize_wrong);
1974             break;
1975         default:
1976             return ERROR(GENERIC);
1977         }
1978         if (blockSize == 0) break;   /* bt_end */
1979 
1980         if (ZSTDv01_isError(errorCode)) return errorCode;
1981         op += errorCode;
1982         ip += blockSize;
1983         remainingSize -= blockSize;
1984     }
1985 
1986     return op-ostart;
1987 }
1988 
1989 size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1990 {
1991     dctx_t ctx;
1992     ctx.base = dst;
1993     return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
1994 }
1995 
1996 size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
1997 {
1998     const BYTE* ip = (const BYTE*)src;
1999     size_t remainingSize = srcSize;
2000     U32 magicNumber;
2001     blockProperties_t blockProperties;
2002 
2003     /* Frame Header */
2004     if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
2005     magicNumber = ZSTD_readBE32(src);
2006     if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2007     ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
2008 
2009     /* Loop on each block */
2010     while (1)
2011     {
2012         size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2013         if (ZSTDv01_isError(blockSize)) return blockSize;
2014 
2015         ip += ZSTD_blockHeaderSize;
2016         remainingSize -= ZSTD_blockHeaderSize;
2017         if (blockSize > remainingSize) return ERROR(srcSize_wrong);
2018 
2019         if (blockSize == 0) break;   /* bt_end */
2020 
2021         ip += blockSize;
2022         remainingSize -= blockSize;
2023     }
2024 
2025     return ip - (const BYTE*)src;
2026 }
2027 
2028 /*******************************
2029 *  Streaming Decompression API
2030 *******************************/
2031 
2032 size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
2033 {
2034     dctx->expected = ZSTD_frameHeaderSize;
2035     dctx->phase = 0;
2036     dctx->previousDstEnd = NULL;
2037     dctx->base = NULL;
2038     return 0;
2039 }
2040 
2041 ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
2042 {
2043     ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
2044     if (dctx==NULL) return NULL;
2045     ZSTDv01_resetDCtx(dctx);
2046     return dctx;
2047 }
2048 
2049 size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
2050 {
2051     free(dctx);
2052     return 0;
2053 }
2054 
2055 size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
2056 {
2057     return ((dctx_t*)dctx)->expected;
2058 }
2059 
2060 size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2061 {
2062     dctx_t* ctx = (dctx_t*)dctx;
2063 
2064     /* Sanity check */
2065     if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
2066     if (dst != ctx->previousDstEnd)  /* not contiguous */
2067         ctx->base = dst;
2068 
2069     /* Decompress : frame header */
2070     if (ctx->phase == 0)
2071     {
2072         /* Check frame magic header */
2073         U32 magicNumber = ZSTD_readBE32(src);
2074         if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2075         ctx->phase = 1;
2076         ctx->expected = ZSTD_blockHeaderSize;
2077         return 0;
2078     }
2079 
2080     /* Decompress : block header */
2081     if (ctx->phase == 1)
2082     {
2083         blockProperties_t bp;
2084         size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
2085         if (ZSTDv01_isError(blockSize)) return blockSize;
2086         if (bp.blockType == bt_end)
2087         {
2088             ctx->expected = 0;
2089             ctx->phase = 0;
2090         }
2091         else
2092         {
2093             ctx->expected = blockSize;
2094             ctx->bType = bp.blockType;
2095             ctx->phase = 2;
2096         }
2097 
2098         return 0;
2099     }
2100 
2101     /* Decompress : block content */
2102     {
2103         size_t rSize;
2104         switch(ctx->bType)
2105         {
2106         case bt_compressed:
2107             rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
2108             break;
2109         case bt_raw :
2110             rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
2111             break;
2112         case bt_rle :
2113             return ERROR(GENERIC);   /* not yet handled */
2114             break;
2115         case bt_end :   /* should never happen (filtered at phase 1) */
2116             rSize = 0;
2117             break;
2118         default:
2119             return ERROR(GENERIC);
2120         }
2121         ctx->phase = 1;
2122         ctx->expected = ZSTD_blockHeaderSize;
2123         ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
2124         return rSize;
2125     }
2126 
2127 }
2128