xref: /freebsd/sys/contrib/zstd/lib/legacy/zstd_v01.c (revision 43d1e6ee299ad4e143d90d3ad374d1c24bd3306f)
1 /*
2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 
12 /******************************************
13 *  Includes
14 ******************************************/
15 #include <stddef.h>    /* size_t, ptrdiff_t */
16 #include "zstd_v01.h"
17 #include "error_private.h"
18 
19 
20 /******************************************
21 *  Static allocation
22 ******************************************/
23 /* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
24 #define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
25 
26 /* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
27 #define HUF_DTABLE_SIZE_U16(maxTableLog)   (1 + (1<<maxTableLog))
28 #define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
29         unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
30 
31 
32 /******************************************
33 *  Error Management
34 ******************************************/
35 #define FSE_LIST_ERRORS(ITEM) \
36         ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
37         ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
38         ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
39         ITEM(FSE_ERROR_corruptionDetected) \
40         ITEM(FSE_ERROR_maxCode)
41 
42 #define FSE_GENERATE_ENUM(ENUM) ENUM,
43 typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
44 
45 
46 /******************************************
47 *  FSE symbol compression API
48 ******************************************/
49 /*
50    This API consists of small unitary functions, which highly benefit from being inlined.
51    You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
52    Visual seems to do it automatically.
53    For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
54    If none of these solutions is applicable, include "fse.c" directly.
55 */
56 
57 typedef unsigned FSE_CTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
58 typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
59 
60 typedef struct
61 {
62     size_t bitContainer;
63     int    bitPos;
64     char*  startPtr;
65     char*  ptr;
66     char*  endPtr;
67 } FSE_CStream_t;
68 
69 typedef struct
70 {
71     ptrdiff_t   value;
72     const void* stateTable;
73     const void* symbolTT;
74     unsigned    stateLog;
75 } FSE_CState_t;
76 
77 typedef struct
78 {
79     size_t   bitContainer;
80     unsigned bitsConsumed;
81     const char* ptr;
82     const char* start;
83 } FSE_DStream_t;
84 
85 typedef struct
86 {
87     size_t      state;
88     const void* table;   /* precise table may vary, depending on U16 */
89 } FSE_DState_t;
90 
91 typedef enum { FSE_DStream_unfinished = 0,
92                FSE_DStream_endOfBuffer = 1,
93                FSE_DStream_completed = 2,
94                FSE_DStream_tooFar = 3 } FSE_DStream_status;  /* result of FSE_reloadDStream() */
95                /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
96 
97 
98 /****************************************************************
99 *  Tuning parameters
100 ****************************************************************/
101 /* MEMORY_USAGE :
102 *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
103 *  Increasing memory usage improves compression ratio
104 *  Reduced memory usage can improve speed, due to cache effect
105 *  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
106 #define FSE_MAX_MEMORY_USAGE 14
107 #define FSE_DEFAULT_MEMORY_USAGE 13
108 
109 /* FSE_MAX_SYMBOL_VALUE :
110 *  Maximum symbol value authorized.
111 *  Required for proper stack allocation */
112 #define FSE_MAX_SYMBOL_VALUE 255
113 
114 
115 /****************************************************************
116 *  template functions type & suffix
117 ****************************************************************/
118 #define FSE_FUNCTION_TYPE BYTE
119 #define FSE_FUNCTION_EXTENSION
120 
121 
122 /****************************************************************
123 *  Byte symbol type
124 ****************************************************************/
125 typedef struct
126 {
127     unsigned short newState;
128     unsigned char  symbol;
129     unsigned char  nbBits;
130 } FSE_decode_t;   /* size == U32 */
131 
132 
133 
134 /****************************************************************
135 *  Compiler specifics
136 ****************************************************************/
137 #ifdef _MSC_VER    /* Visual Studio */
138 #  define FORCE_INLINE static __forceinline
139 #  include <intrin.h>                    /* For Visual 2005 */
140 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
141 #  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
142 #else
143 #  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
144 #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
145 #    ifdef __GNUC__
146 #      define FORCE_INLINE static inline __attribute__((always_inline))
147 #    else
148 #      define FORCE_INLINE static inline
149 #    endif
150 #  else
151 #    define FORCE_INLINE static
152 #  endif /* __STDC_VERSION__ */
153 #endif
154 
155 
156 /****************************************************************
157 *  Includes
158 ****************************************************************/
159 #include <stdlib.h>     /* malloc, free, qsort */
160 #include <string.h>     /* memcpy, memset */
161 #include <stdio.h>      /* printf (debug) */
162 
163 
164 #ifndef MEM_ACCESS_MODULE
165 #define MEM_ACCESS_MODULE
166 /****************************************************************
167 *  Basic Types
168 *****************************************************************/
169 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
170 # include <stdint.h>
171 typedef  uint8_t BYTE;
172 typedef uint16_t U16;
173 typedef  int16_t S16;
174 typedef uint32_t U32;
175 typedef  int32_t S32;
176 typedef uint64_t U64;
177 typedef  int64_t S64;
178 #else
179 typedef unsigned char       BYTE;
180 typedef unsigned short      U16;
181 typedef   signed short      S16;
182 typedef unsigned int        U32;
183 typedef   signed int        S32;
184 typedef unsigned long long  U64;
185 typedef   signed long long  S64;
186 #endif
187 
188 #endif   /* MEM_ACCESS_MODULE */
189 
190 /****************************************************************
191 *  Memory I/O
192 *****************************************************************/
193 /* FSE_FORCE_MEMORY_ACCESS
194  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
195  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
196  * The below switch allow to select different access method for improved performance.
197  * Method 0 (default) : use `memcpy()`. Safe and portable.
198  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
199  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
200  * Method 2 : direct access. This method is portable but violate C standard.
201  *            It can generate buggy code on targets generating assembly depending on alignment.
202  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
203  * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
204  * Prefer these methods in priority order (0 > 1 > 2)
205  */
206 #ifndef FSE_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
207 #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
208 #    define FSE_FORCE_MEMORY_ACCESS 2
209 #  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
210   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
211 #    define FSE_FORCE_MEMORY_ACCESS 1
212 #  endif
213 #endif
214 
215 
216 static unsigned FSE_32bits(void)
217 {
218     return sizeof(void*)==4;
219 }
220 
221 static unsigned FSE_isLittleEndian(void)
222 {
223     const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
224     return one.c[0];
225 }
226 
227 #if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
228 
229 static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
230 static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
231 static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
232 
233 #elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
234 
235 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
236 /* currently only defined for gcc and icc */
237 typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
238 
239 static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
240 static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
241 static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
242 
243 #else
244 
245 static U16 FSE_read16(const void* memPtr)
246 {
247     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
248 }
249 
250 static U32 FSE_read32(const void* memPtr)
251 {
252     U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
253 }
254 
255 static U64 FSE_read64(const void* memPtr)
256 {
257     U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
258 }
259 
260 #endif // FSE_FORCE_MEMORY_ACCESS
261 
262 static U16 FSE_readLE16(const void* memPtr)
263 {
264     if (FSE_isLittleEndian())
265         return FSE_read16(memPtr);
266     else
267     {
268         const BYTE* p = (const BYTE*)memPtr;
269         return (U16)(p[0] + (p[1]<<8));
270     }
271 }
272 
273 static U32 FSE_readLE32(const void* memPtr)
274 {
275     if (FSE_isLittleEndian())
276         return FSE_read32(memPtr);
277     else
278     {
279         const BYTE* p = (const BYTE*)memPtr;
280         return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
281     }
282 }
283 
284 
285 static U64 FSE_readLE64(const void* memPtr)
286 {
287     if (FSE_isLittleEndian())
288         return FSE_read64(memPtr);
289     else
290     {
291         const BYTE* p = (const BYTE*)memPtr;
292         return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
293                      + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
294     }
295 }
296 
297 static size_t FSE_readLEST(const void* memPtr)
298 {
299     if (FSE_32bits())
300         return (size_t)FSE_readLE32(memPtr);
301     else
302         return (size_t)FSE_readLE64(memPtr);
303 }
304 
305 
306 
307 /****************************************************************
308 *  Constants
309 *****************************************************************/
310 #define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
311 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
312 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
313 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
314 #define FSE_MIN_TABLELOG 5
315 
316 #define FSE_TABLELOG_ABSOLUTE_MAX 15
317 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
318 #error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
319 #endif
320 
321 
322 /****************************************************************
323 *  Error Management
324 ****************************************************************/
325 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
326 
327 
328 /****************************************************************
329 *  Complex types
330 ****************************************************************/
331 typedef struct
332 {
333     int deltaFindState;
334     U32 deltaNbBits;
335 } FSE_symbolCompressionTransform; /* total 8 bytes */
336 
337 typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
338 
339 /****************************************************************
340 *  Internal functions
341 ****************************************************************/
342 FORCE_INLINE unsigned FSE_highbit32 (U32 val)
343 {
344 #   if defined(_MSC_VER)   /* Visual */
345     unsigned long r;
346     _BitScanReverse ( &r, val );
347     return (unsigned) r;
348 #   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
349     return 31 - __builtin_clz (val);
350 #   else   /* Software version */
351     static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
352     U32 v = val;
353     unsigned r;
354     v |= v >> 1;
355     v |= v >> 2;
356     v |= v >> 4;
357     v |= v >> 8;
358     v |= v >> 16;
359     r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
360     return r;
361 #   endif
362 }
363 
364 
365 /****************************************************************
366 *  Templates
367 ****************************************************************/
368 /*
369   designed to be included
370   for type-specific functions (template emulation in C)
371   Objective is to write these functions only once, for improved maintenance
372 */
373 
374 /* safety checks */
375 #ifndef FSE_FUNCTION_EXTENSION
376 #  error "FSE_FUNCTION_EXTENSION must be defined"
377 #endif
378 #ifndef FSE_FUNCTION_TYPE
379 #  error "FSE_FUNCTION_TYPE must be defined"
380 #endif
381 
382 /* Function names */
383 #define FSE_CAT(X,Y) X##Y
384 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
385 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
386 
387 
388 
389 static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
390 
391 #define FSE_DECODE_TYPE FSE_decode_t
392 
393 
394 typedef struct {
395     U16 tableLog;
396     U16 fastMode;
397 } FSE_DTableHeader;   /* sizeof U32 */
398 
399 static size_t FSE_buildDTable
400 (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
401 {
402     void* ptr = dt;
403     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
404     FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1;   /* because dt is unsigned, 32-bits aligned on 32-bits */
405     const U32 tableSize = 1 << tableLog;
406     const U32 tableMask = tableSize-1;
407     const U32 step = FSE_tableStep(tableSize);
408     U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
409     U32 position = 0;
410     U32 highThreshold = tableSize-1;
411     const S16 largeLimit= (S16)(1 << (tableLog-1));
412     U32 noLarge = 1;
413     U32 s;
414 
415     /* Sanity Checks */
416     if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
417     if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
418 
419     /* Init, lay down lowprob symbols */
420     DTableH[0].tableLog = (U16)tableLog;
421     for (s=0; s<=maxSymbolValue; s++)
422     {
423         if (normalizedCounter[s]==-1)
424         {
425             tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
426             symbolNext[s] = 1;
427         }
428         else
429         {
430             if (normalizedCounter[s] >= largeLimit) noLarge=0;
431             symbolNext[s] = normalizedCounter[s];
432         }
433     }
434 
435     /* Spread symbols */
436     for (s=0; s<=maxSymbolValue; s++)
437     {
438         int i;
439         for (i=0; i<normalizedCounter[s]; i++)
440         {
441             tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
442             position = (position + step) & tableMask;
443             while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
444         }
445     }
446 
447     if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
448 
449     /* Build Decoding table */
450     {
451         U32 i;
452         for (i=0; i<tableSize; i++)
453         {
454             FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
455             U16 nextState = symbolNext[symbol]++;
456             tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
457             tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
458         }
459     }
460 
461     DTableH->fastMode = (U16)noLarge;
462     return 0;
463 }
464 
465 
466 /******************************************
467 *  FSE byte symbol
468 ******************************************/
469 #ifndef FSE_COMMONDEFS_ONLY
470 
471 static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
472 
473 static short FSE_abs(short a)
474 {
475     return a<0? -a : a;
476 }
477 
478 
479 /****************************************************************
480 *  Header bitstream management
481 ****************************************************************/
482 static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
483                  const void* headerBuffer, size_t hbSize)
484 {
485     const BYTE* const istart = (const BYTE*) headerBuffer;
486     const BYTE* const iend = istart + hbSize;
487     const BYTE* ip = istart;
488     int nbBits;
489     int remaining;
490     int threshold;
491     U32 bitStream;
492     int bitCount;
493     unsigned charnum = 0;
494     int previous0 = 0;
495 
496     if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
497     bitStream = FSE_readLE32(ip);
498     nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
499     if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
500     bitStream >>= 4;
501     bitCount = 4;
502     *tableLogPtr = nbBits;
503     remaining = (1<<nbBits)+1;
504     threshold = 1<<nbBits;
505     nbBits++;
506 
507     while ((remaining>1) && (charnum<=*maxSVPtr))
508     {
509         if (previous0)
510         {
511             unsigned n0 = charnum;
512             while ((bitStream & 0xFFFF) == 0xFFFF)
513             {
514                 n0+=24;
515                 if (ip < iend-5)
516                 {
517                     ip+=2;
518                     bitStream = FSE_readLE32(ip) >> bitCount;
519                 }
520                 else
521                 {
522                     bitStream >>= 16;
523                     bitCount+=16;
524                 }
525             }
526             while ((bitStream & 3) == 3)
527             {
528                 n0+=3;
529                 bitStream>>=2;
530                 bitCount+=2;
531             }
532             n0 += bitStream & 3;
533             bitCount += 2;
534             if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
535             while (charnum < n0) normalizedCounter[charnum++] = 0;
536             if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
537             {
538                 ip += bitCount>>3;
539                 bitCount &= 7;
540                 bitStream = FSE_readLE32(ip) >> bitCount;
541             }
542             else
543                 bitStream >>= 2;
544         }
545         {
546             const short max = (short)((2*threshold-1)-remaining);
547             short count;
548 
549             if ((bitStream & (threshold-1)) < (U32)max)
550             {
551                 count = (short)(bitStream & (threshold-1));
552                 bitCount   += nbBits-1;
553             }
554             else
555             {
556                 count = (short)(bitStream & (2*threshold-1));
557                 if (count >= threshold) count -= max;
558                 bitCount   += nbBits;
559             }
560 
561             count--;   /* extra accuracy */
562             remaining -= FSE_abs(count);
563             normalizedCounter[charnum++] = count;
564             previous0 = !count;
565             while (remaining < threshold)
566             {
567                 nbBits--;
568                 threshold >>= 1;
569             }
570 
571             {
572                 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
573                 {
574                     ip += bitCount>>3;
575                     bitCount &= 7;
576                 }
577                 else
578                 {
579                     bitCount -= (int)(8 * (iend - 4 - ip));
580                     ip = iend - 4;
581                 }
582                 bitStream = FSE_readLE32(ip) >> (bitCount & 31);
583             }
584         }
585     }
586     if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
587     *maxSVPtr = charnum-1;
588 
589     ip += (bitCount+7)>>3;
590     if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
591     return ip-istart;
592 }
593 
594 
595 /*********************************************************
596 *  Decompression (Byte symbols)
597 *********************************************************/
598 static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
599 {
600     void* ptr = dt;
601     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
602     FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
603 
604     DTableH->tableLog = 0;
605     DTableH->fastMode = 0;
606 
607     cell->newState = 0;
608     cell->symbol = symbolValue;
609     cell->nbBits = 0;
610 
611     return 0;
612 }
613 
614 
615 static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
616 {
617     void* ptr = dt;
618     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
619     FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1;   /* because dt is unsigned */
620     const unsigned tableSize = 1 << nbBits;
621     const unsigned tableMask = tableSize - 1;
622     const unsigned maxSymbolValue = tableMask;
623     unsigned s;
624 
625     /* Sanity checks */
626     if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
627 
628     /* Build Decoding Table */
629     DTableH->tableLog = (U16)nbBits;
630     DTableH->fastMode = 1;
631     for (s=0; s<=maxSymbolValue; s++)
632     {
633         dinfo[s].newState = 0;
634         dinfo[s].symbol = (BYTE)s;
635         dinfo[s].nbBits = (BYTE)nbBits;
636     }
637 
638     return 0;
639 }
640 
641 
642 /* FSE_initDStream
643  * Initialize a FSE_DStream_t.
644  * srcBuffer must point at the beginning of an FSE block.
645  * The function result is the size of the FSE_block (== srcSize).
646  * If srcSize is too small, the function will return an errorCode;
647  */
648 static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
649 {
650     if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
651 
652     if (srcSize >=  sizeof(size_t))
653     {
654         U32 contain32;
655         bitD->start = (const char*)srcBuffer;
656         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(size_t);
657         bitD->bitContainer = FSE_readLEST(bitD->ptr);
658         contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
659         if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
660         bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
661     }
662     else
663     {
664         U32 contain32;
665         bitD->start = (const char*)srcBuffer;
666         bitD->ptr   = bitD->start;
667         bitD->bitContainer = *(const BYTE*)(bitD->start);
668         switch(srcSize)
669         {
670             case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
671                     /* fallthrough */
672             case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
673                     /* fallthrough */
674             case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
675                     /* fallthrough */
676             case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
677                     /* fallthrough */
678             case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
679                     /* fallthrough */
680             case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) <<  8;
681                     /* fallthrough */
682             default:;
683         }
684         contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
685         if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
686         bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
687         bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
688     }
689 
690     return srcSize;
691 }
692 
693 
694 /*!FSE_lookBits
695  * Provides next n bits from the bitContainer.
696  * bitContainer is not modified (bits are still present for next read/look)
697  * On 32-bits, maxNbBits==25
698  * On 64-bits, maxNbBits==57
699  * return : value extracted.
700  */
701 static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
702 {
703     const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
704     return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
705 }
706 
707 static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
708 {
709     const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
710     return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
711 }
712 
713 static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
714 {
715     bitD->bitsConsumed += nbBits;
716 }
717 
718 
719 /*!FSE_readBits
720  * Read next n bits from the bitContainer.
721  * On 32-bits, don't read more than maxNbBits==25
722  * On 64-bits, don't read more than maxNbBits==57
723  * Use the fast variant *only* if n >= 1.
724  * return : value extracted.
725  */
726 static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
727 {
728     size_t value = FSE_lookBits(bitD, nbBits);
729     FSE_skipBits(bitD, nbBits);
730     return value;
731 }
732 
733 static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 !! */
734 {
735     size_t value = FSE_lookBitsFast(bitD, nbBits);
736     FSE_skipBits(bitD, nbBits);
737     return value;
738 }
739 
740 static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
741 {
742     if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should never happen */
743         return FSE_DStream_tooFar;
744 
745     if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
746     {
747         bitD->ptr -= bitD->bitsConsumed >> 3;
748         bitD->bitsConsumed &= 7;
749         bitD->bitContainer = FSE_readLEST(bitD->ptr);
750         return FSE_DStream_unfinished;
751     }
752     if (bitD->ptr == bitD->start)
753     {
754         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
755         return FSE_DStream_completed;
756     }
757     {
758         U32 nbBytes = bitD->bitsConsumed >> 3;
759         U32 result = FSE_DStream_unfinished;
760         if (bitD->ptr - nbBytes < bitD->start)
761         {
762             nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
763             result = FSE_DStream_endOfBuffer;
764         }
765         bitD->ptr -= nbBytes;
766         bitD->bitsConsumed -= nbBytes*8;
767         bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
768         return result;
769     }
770 }
771 
772 
773 static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
774 {
775     const void* ptr = dt;
776     const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
777     DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
778     FSE_reloadDStream(bitD);
779     DStatePtr->table = dt + 1;
780 }
781 
782 static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
783 {
784     const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
785     const U32  nbBits = DInfo.nbBits;
786     BYTE symbol = DInfo.symbol;
787     size_t lowBits = FSE_readBits(bitD, nbBits);
788 
789     DStatePtr->state = DInfo.newState + lowBits;
790     return symbol;
791 }
792 
793 static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
794 {
795     const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
796     const U32 nbBits = DInfo.nbBits;
797     BYTE symbol = DInfo.symbol;
798     size_t lowBits = FSE_readBitsFast(bitD, nbBits);
799 
800     DStatePtr->state = DInfo.newState + lowBits;
801     return symbol;
802 }
803 
804 /* FSE_endOfDStream
805    Tells if bitD has reached end of bitStream or not */
806 
807 static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
808 {
809     return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
810 }
811 
812 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
813 {
814     return DStatePtr->state == 0;
815 }
816 
817 
818 FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
819           void* dst, size_t maxDstSize,
820     const void* cSrc, size_t cSrcSize,
821     const FSE_DTable* dt, const unsigned fast)
822 {
823     BYTE* const ostart = (BYTE*) dst;
824     BYTE* op = ostart;
825     BYTE* const omax = op + maxDstSize;
826     BYTE* const olimit = omax-3;
827 
828     FSE_DStream_t bitD;
829     FSE_DState_t state1;
830     FSE_DState_t state2;
831     size_t errorCode;
832 
833     /* Init */
834     errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
835     if (FSE_isError(errorCode)) return errorCode;
836 
837     FSE_initDState(&state1, &bitD, dt);
838     FSE_initDState(&state2, &bitD, dt);
839 
840 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
841 
842     /* 4 symbols per loop */
843     for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
844     {
845         op[0] = FSE_GETSYMBOL(&state1);
846 
847         if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
848             FSE_reloadDStream(&bitD);
849 
850         op[1] = FSE_GETSYMBOL(&state2);
851 
852         if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
853             { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
854 
855         op[2] = FSE_GETSYMBOL(&state1);
856 
857         if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
858             FSE_reloadDStream(&bitD);
859 
860         op[3] = FSE_GETSYMBOL(&state2);
861     }
862 
863     /* tail */
864     /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
865     while (1)
866     {
867         if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
868             break;
869 
870         *op++ = FSE_GETSYMBOL(&state1);
871 
872         if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
873             break;
874 
875         *op++ = FSE_GETSYMBOL(&state2);
876     }
877 
878     /* end ? */
879     if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
880         return op-ostart;
881 
882     if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
883 
884     return (size_t)-FSE_ERROR_corruptionDetected;
885 }
886 
887 
888 static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
889                             const void* cSrc, size_t cSrcSize,
890                             const FSE_DTable* dt)
891 {
892     FSE_DTableHeader DTableH;
893     memcpy(&DTableH, dt, sizeof(DTableH));   /* memcpy() into local variable, to avoid strict aliasing warning */
894 
895     /* select fast mode (static) */
896     if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
897     return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
898 }
899 
900 
901 static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
902 {
903     const BYTE* const istart = (const BYTE*)cSrc;
904     const BYTE* ip = istart;
905     short counting[FSE_MAX_SYMBOL_VALUE+1];
906     DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
907     unsigned tableLog;
908     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
909     size_t errorCode;
910 
911     if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
912 
913     /* normal FSE decoding mode */
914     errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
915     if (FSE_isError(errorCode)) return errorCode;
916     if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
917     ip += errorCode;
918     cSrcSize -= errorCode;
919 
920     errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
921     if (FSE_isError(errorCode)) return errorCode;
922 
923     /* always return, even if it is an error code */
924     return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
925 }
926 
927 
928 
929 /* *******************************************************
930 *  Huff0 : Huffman block compression
931 *********************************************************/
932 #define HUF_MAX_SYMBOL_VALUE 255
933 #define HUF_DEFAULT_TABLELOG  12       /* used by default, when not specified */
934 #define HUF_MAX_TABLELOG  12           /* max possible tableLog; for allocation purpose; can be modified */
935 #define HUF_ABSOLUTEMAX_TABLELOG  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
936 #if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
937 #  error "HUF_MAX_TABLELOG is too large !"
938 #endif
939 
940 typedef struct HUF_CElt_s {
941   U16  val;
942   BYTE nbBits;
943 } HUF_CElt ;
944 
945 typedef struct nodeElt_s {
946     U32 count;
947     U16 parent;
948     BYTE byte;
949     BYTE nbBits;
950 } nodeElt;
951 
952 
953 /* *******************************************************
954 *  Huff0 : Huffman block decompression
955 *********************************************************/
956 typedef struct {
957     BYTE byte;
958     BYTE nbBits;
959 } HUF_DElt;
960 
961 static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
962 {
963     BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
964     U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];  /* large enough for values from 0 to 16 */
965     U32 weightTotal;
966     U32 maxBits;
967     const BYTE* ip = (const BYTE*) src;
968     size_t iSize;
969     size_t oSize;
970     U32 n;
971     U32 nextRankStart;
972     void* ptr = DTable+1;
973     HUF_DElt* const dt = (HUF_DElt*)ptr;
974 
975     if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
976     iSize = ip[0];
977 
978     FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */
979     //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */
980     if (iSize >= 128)  /* special header */
981     {
982         if (iSize >= (242))   /* RLE */
983         {
984             static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
985             oSize = l[iSize-242];
986             memset(huffWeight, 1, sizeof(huffWeight));
987             iSize = 0;
988         }
989         else   /* Incompressible */
990         {
991             oSize = iSize - 127;
992             iSize = ((oSize+1)/2);
993             if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
994             ip += 1;
995             for (n=0; n<oSize; n+=2)
996             {
997                 huffWeight[n]   = ip[n/2] >> 4;
998                 huffWeight[n+1] = ip[n/2] & 15;
999             }
1000         }
1001     }
1002     else  /* header compressed with FSE (normal case) */
1003     {
1004         if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1005         oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize);   /* max 255 values decoded, last one is implied */
1006         if (FSE_isError(oSize)) return oSize;
1007     }
1008 
1009     /* collect weight stats */
1010     memset(rankVal, 0, sizeof(rankVal));
1011     weightTotal = 0;
1012     for (n=0; n<oSize; n++)
1013     {
1014         if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
1015         rankVal[huffWeight[n]]++;
1016         weightTotal += (1 << huffWeight[n]) >> 1;
1017     }
1018     if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected;
1019 
1020     /* get last non-null symbol weight (implied, total must be 2^n) */
1021     maxBits = FSE_highbit32(weightTotal) + 1;
1022     if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge;   /* DTable is too small */
1023     DTable[0] = (U16)maxBits;
1024     {
1025         U32 total = 1 << maxBits;
1026         U32 rest = total - weightTotal;
1027         U32 verif = 1 << FSE_highbit32(rest);
1028         U32 lastWeight = FSE_highbit32(rest) + 1;
1029         if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected;    /* last value must be a clean power of 2 */
1030         huffWeight[oSize] = (BYTE)lastWeight;
1031         rankVal[lastWeight]++;
1032     }
1033 
1034     /* check tree construction validity */
1035     if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected;   /* by construction : at least 2 elts of rank 1, must be even */
1036 
1037     /* Prepare ranks */
1038     nextRankStart = 0;
1039     for (n=1; n<=maxBits; n++)
1040     {
1041         U32 current = nextRankStart;
1042         nextRankStart += (rankVal[n] << (n-1));
1043         rankVal[n] = current;
1044     }
1045 
1046     /* fill DTable */
1047     for (n=0; n<=oSize; n++)
1048     {
1049         const U32 w = huffWeight[n];
1050         const U32 length = (1 << w) >> 1;
1051         U32 i;
1052         HUF_DElt D;
1053         D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
1054         for (i = rankVal[w]; i < rankVal[w] + length; i++)
1055             dt[i] = D;
1056         rankVal[w] += length;
1057     }
1058 
1059     return iSize+1;
1060 }
1061 
1062 
1063 static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1064 {
1065         const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
1066         const BYTE c = dt[val].byte;
1067         FSE_skipBits(Dstream, dt[val].nbBits);
1068         return c;
1069 }
1070 
1071 static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
1072           void* dst, size_t maxDstSize,
1073     const void* cSrc, size_t cSrcSize,
1074     const U16* DTable)
1075 {
1076     BYTE* const ostart = (BYTE*) dst;
1077     BYTE* op = ostart;
1078     BYTE* const omax = op + maxDstSize;
1079     BYTE* const olimit = omax-15;
1080 
1081     const void* ptr = DTable;
1082     const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
1083     const U32 dtLog = DTable[0];
1084     size_t errorCode;
1085     U32 reloadStatus;
1086 
1087     /* Init */
1088 
1089     const U16* jumpTable = (const U16*)cSrc;
1090     const size_t length1 = FSE_readLE16(jumpTable);
1091     const size_t length2 = FSE_readLE16(jumpTable+1);
1092     const size_t length3 = FSE_readLE16(jumpTable+2);
1093     const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
1094     const char* const start1 = (const char*)(cSrc) + 6;
1095     const char* const start2 = start1 + length1;
1096     const char* const start3 = start2 + length2;
1097     const char* const start4 = start3 + length3;
1098     FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1099 
1100     if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1101 
1102     errorCode = FSE_initDStream(&bitD1, start1, length1);
1103     if (FSE_isError(errorCode)) return errorCode;
1104     errorCode = FSE_initDStream(&bitD2, start2, length2);
1105     if (FSE_isError(errorCode)) return errorCode;
1106     errorCode = FSE_initDStream(&bitD3, start3, length3);
1107     if (FSE_isError(errorCode)) return errorCode;
1108     errorCode = FSE_initDStream(&bitD4, start4, length4);
1109     if (FSE_isError(errorCode)) return errorCode;
1110 
1111     reloadStatus=FSE_reloadDStream(&bitD2);
1112 
1113     /* 16 symbols per loop */
1114     for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
1115         op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1116     {
1117 #define HUF_DECODE_SYMBOL_0(n, Dstream) \
1118         op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
1119 
1120 #define HUF_DECODE_SYMBOL_1(n, Dstream) \
1121         op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1122         if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
1123 
1124 #define HUF_DECODE_SYMBOL_2(n, Dstream) \
1125         op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1126         if (FSE_32bits()) FSE_reloadDStream(&Dstream)
1127 
1128         HUF_DECODE_SYMBOL_1( 0, bitD1);
1129         HUF_DECODE_SYMBOL_1( 1, bitD2);
1130         HUF_DECODE_SYMBOL_1( 2, bitD3);
1131         HUF_DECODE_SYMBOL_1( 3, bitD4);
1132         HUF_DECODE_SYMBOL_2( 4, bitD1);
1133         HUF_DECODE_SYMBOL_2( 5, bitD2);
1134         HUF_DECODE_SYMBOL_2( 6, bitD3);
1135         HUF_DECODE_SYMBOL_2( 7, bitD4);
1136         HUF_DECODE_SYMBOL_1( 8, bitD1);
1137         HUF_DECODE_SYMBOL_1( 9, bitD2);
1138         HUF_DECODE_SYMBOL_1(10, bitD3);
1139         HUF_DECODE_SYMBOL_1(11, bitD4);
1140         HUF_DECODE_SYMBOL_0(12, bitD1);
1141         HUF_DECODE_SYMBOL_0(13, bitD2);
1142         HUF_DECODE_SYMBOL_0(14, bitD3);
1143         HUF_DECODE_SYMBOL_0(15, bitD4);
1144     }
1145 
1146     if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
1147         return (size_t)-FSE_ERROR_corruptionDetected;
1148 
1149     /* tail */
1150     {
1151         // bitTail = bitD1;   // *much* slower : -20% !??!
1152         FSE_DStream_t bitTail;
1153         bitTail.ptr = bitD1.ptr;
1154         bitTail.bitsConsumed = bitD1.bitsConsumed;
1155         bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
1156         bitTail.start = start1;
1157         for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1158         {
1159             HUF_DECODE_SYMBOL_0(0, bitTail);
1160         }
1161 
1162         if (FSE_endOfDStream(&bitTail))
1163             return op-ostart;
1164     }
1165 
1166     if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
1167 
1168     return (size_t)-FSE_ERROR_corruptionDetected;
1169 }
1170 
1171 
1172 static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1173 {
1174     HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
1175     const BYTE* ip = (const BYTE*) cSrc;
1176     size_t errorCode;
1177 
1178     errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
1179     if (FSE_isError(errorCode)) return errorCode;
1180     if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1181     ip += errorCode;
1182     cSrcSize -= errorCode;
1183 
1184     return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1185 }
1186 
1187 
1188 #endif   /* FSE_COMMONDEFS_ONLY */
1189 
1190 /*
1191     zstd - standard compression library
1192     Copyright (C) 2014-2015, Yann Collet.
1193 
1194     BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1195 
1196     Redistribution and use in source and binary forms, with or without
1197     modification, are permitted provided that the following conditions are
1198     met:
1199     * Redistributions of source code must retain the above copyright
1200     notice, this list of conditions and the following disclaimer.
1201     * Redistributions in binary form must reproduce the above
1202     copyright notice, this list of conditions and the following disclaimer
1203     in the documentation and/or other materials provided with the
1204     distribution.
1205     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1206     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1207     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1208     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1209     OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1210     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1211     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1212     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1213     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1214     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1215     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1216 
1217     You can contact the author at :
1218     - zstd source repository : https://github.com/Cyan4973/zstd
1219     - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
1220 */
1221 
1222 /****************************************************************
1223 *  Tuning parameters
1224 *****************************************************************/
1225 /* MEMORY_USAGE :
1226 *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
1227 *  Increasing memory usage improves compression ratio
1228 *  Reduced memory usage can improve speed, due to cache effect */
1229 #define ZSTD_MEMORY_USAGE 17
1230 
1231 
1232 /**************************************
1233    CPU Feature Detection
1234 **************************************/
1235 /*
1236  * Automated efficient unaligned memory access detection
1237  * Based on known hardware architectures
1238  * This list will be updated thanks to feedbacks
1239  */
1240 #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
1241     || defined(__ARM_FEATURE_UNALIGNED) \
1242     || defined(__i386__) || defined(__x86_64__) \
1243     || defined(_M_IX86) || defined(_M_X64) \
1244     || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
1245     || (defined(_M_ARM) && (_M_ARM >= 7))
1246 #  define ZSTD_UNALIGNED_ACCESS 1
1247 #else
1248 #  define ZSTD_UNALIGNED_ACCESS 0
1249 #endif
1250 
1251 
1252 /********************************************************
1253 *  Includes
1254 *********************************************************/
1255 #include <stdlib.h>      /* calloc */
1256 #include <string.h>      /* memcpy, memmove */
1257 #include <stdio.h>       /* debug : printf */
1258 
1259 
1260 /********************************************************
1261 *  Compiler specifics
1262 *********************************************************/
1263 #ifdef __AVX2__
1264 #  include <immintrin.h>   /* AVX2 intrinsics */
1265 #endif
1266 
1267 #ifdef _MSC_VER    /* Visual Studio */
1268 #  include <intrin.h>                    /* For Visual 2005 */
1269 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
1270 #  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
1271 #endif
1272 
1273 
1274 #ifndef MEM_ACCESS_MODULE
1275 #define MEM_ACCESS_MODULE
1276 /********************************************************
1277 *  Basic Types
1278 *********************************************************/
1279 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
1280 # include <stdint.h>
1281 typedef  uint8_t BYTE;
1282 typedef uint16_t U16;
1283 typedef  int16_t S16;
1284 typedef uint32_t U32;
1285 typedef  int32_t S32;
1286 typedef uint64_t U64;
1287 #else
1288 typedef unsigned char       BYTE;
1289 typedef unsigned short      U16;
1290 typedef   signed short      S16;
1291 typedef unsigned int        U32;
1292 typedef   signed int        S32;
1293 typedef unsigned long long  U64;
1294 #endif
1295 
1296 #endif   /* MEM_ACCESS_MODULE */
1297 
1298 
1299 /********************************************************
1300 *  Constants
1301 *********************************************************/
1302 static const U32 ZSTD_magicNumber = 0xFD2FB51E;   /* 3rd version : seqNb header */
1303 
1304 #define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
1305 #define HASH_TABLESIZE (1 << HASH_LOG)
1306 #define HASH_MASK (HASH_TABLESIZE - 1)
1307 
1308 #define KNUTH 2654435761
1309 
1310 #define BIT7 128
1311 #define BIT6  64
1312 #define BIT5  32
1313 #define BIT4  16
1314 
1315 #define KB *(1 <<10)
1316 #define MB *(1 <<20)
1317 #define GB *(1U<<30)
1318 
1319 #define BLOCKSIZE (128 KB)                 /* define, for static allocation */
1320 
1321 #define WORKPLACESIZE (BLOCKSIZE*3)
1322 #define MINMATCH 4
1323 #define MLbits   7
1324 #define LLbits   6
1325 #define Offbits  5
1326 #define MaxML  ((1<<MLbits )-1)
1327 #define MaxLL  ((1<<LLbits )-1)
1328 #define MaxOff ((1<<Offbits)-1)
1329 #define LitFSELog  11
1330 #define MLFSELog   10
1331 #define LLFSELog   10
1332 #define OffFSELog   9
1333 #define MAX(a,b) ((a)<(b)?(b):(a))
1334 #define MaxSeq MAX(MaxLL, MaxML)
1335 
1336 #define LITERAL_NOENTROPY 63
1337 #define COMMAND_NOENTROPY 7   /* to remove */
1338 
1339 #define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
1340 
1341 static const size_t ZSTD_blockHeaderSize = 3;
1342 static const size_t ZSTD_frameHeaderSize = 4;
1343 
1344 
1345 /********************************************************
1346 *  Memory operations
1347 *********************************************************/
1348 static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
1349 
1350 static unsigned ZSTD_isLittleEndian(void)
1351 {
1352     const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
1353     return one.c[0];
1354 }
1355 
1356 static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
1357 
1358 static U32    ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
1359 
1360 static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
1361 
1362 static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
1363 
1364 #define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; }
1365 
1366 static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
1367 {
1368     const BYTE* ip = (const BYTE*)src;
1369     BYTE* op = (BYTE*)dst;
1370     BYTE* const oend = op + length;
1371     while (op < oend) COPY8(op, ip);
1372 }
1373 
1374 static U16 ZSTD_readLE16(const void* memPtr)
1375 {
1376     if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
1377     else
1378     {
1379         const BYTE* p = (const BYTE*)memPtr;
1380         return (U16)((U16)p[0] + ((U16)p[1]<<8));
1381     }
1382 }
1383 
1384 
1385 static U32 ZSTD_readLE32(const void* memPtr)
1386 {
1387     if (ZSTD_isLittleEndian())
1388         return ZSTD_read32(memPtr);
1389     else
1390     {
1391         const BYTE* p = (const BYTE*)memPtr;
1392         return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
1393     }
1394 }
1395 
1396 static U32 ZSTD_readBE32(const void* memPtr)
1397 {
1398     const BYTE* p = (const BYTE*)memPtr;
1399     return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
1400 }
1401 
1402 
1403 /**************************************
1404 *  Local structures
1405 ***************************************/
1406 typedef struct ZSTD_Cctx_s ZSTD_Cctx;
1407 
1408 typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
1409 
1410 typedef struct
1411 {
1412     blockType_t blockType;
1413     U32 origSize;
1414 } blockProperties_t;
1415 
1416 typedef struct {
1417     void* buffer;
1418     U32*  offsetStart;
1419     U32*  offset;
1420     BYTE* offCodeStart;
1421     BYTE* offCode;
1422     BYTE* litStart;
1423     BYTE* lit;
1424     BYTE* litLengthStart;
1425     BYTE* litLength;
1426     BYTE* matchLengthStart;
1427     BYTE* matchLength;
1428     BYTE* dumpsStart;
1429     BYTE* dumps;
1430 } seqStore_t;
1431 
1432 
1433 typedef struct ZSTD_Cctx_s
1434 {
1435     const BYTE* base;
1436     U32 current;
1437     U32 nextUpdate;
1438     seqStore_t seqStore;
1439 #ifdef __AVX2__
1440     __m256i hashTable[HASH_TABLESIZE>>3];
1441 #else
1442     U32 hashTable[HASH_TABLESIZE];
1443 #endif
1444     BYTE buffer[WORKPLACESIZE];
1445 } cctxi_t;
1446 
1447 
1448 
1449 
1450 /**************************************
1451 *  Error Management
1452 **************************************/
1453 /* published entry point */
1454 unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
1455 
1456 
1457 /**************************************
1458 *  Tool functions
1459 **************************************/
1460 #define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
1461 #define ZSTD_VERSION_MINOR    1    /* for new (non-breaking) interface capabilities */
1462 #define ZSTD_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
1463 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
1464 
1465 /**************************************************************
1466 *   Decompression code
1467 **************************************************************/
1468 
1469 static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1470 {
1471     const BYTE* const in = (const BYTE* const)src;
1472     BYTE headerFlags;
1473     U32 cSize;
1474 
1475     if (srcSize < 3) return ERROR(srcSize_wrong);
1476 
1477     headerFlags = *in;
1478     cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1479 
1480     bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1481     bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1482 
1483     if (bpPtr->blockType == bt_end) return 0;
1484     if (bpPtr->blockType == bt_rle) return 1;
1485     return cSize;
1486 }
1487 
1488 
1489 static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1490 {
1491     if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
1492     memcpy(dst, src, srcSize);
1493     return srcSize;
1494 }
1495 
1496 
1497 static size_t ZSTD_decompressLiterals(void* ctx,
1498                                       void* dst, size_t maxDstSize,
1499                                 const void* src, size_t srcSize)
1500 {
1501     BYTE* op = (BYTE*)dst;
1502     BYTE* const oend = op + maxDstSize;
1503     const BYTE* ip = (const BYTE*)src;
1504     size_t errorCode;
1505     size_t litSize;
1506 
1507     /* check : minimum 2, for litSize, +1, for content */
1508     if (srcSize <= 3) return ERROR(corruption_detected);
1509 
1510     litSize = ip[1] + (ip[0]<<8);
1511     litSize += ((ip[-3] >> 3) & 7) << 16;   // mmmmh....
1512     op = oend - litSize;
1513 
1514     (void)ctx;
1515     if (litSize > maxDstSize) return ERROR(dstSize_tooSmall);
1516     errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
1517     if (FSE_isError(errorCode)) return ERROR(GENERIC);
1518     return litSize;
1519 }
1520 
1521 
1522 static size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1523                                 void* dst, size_t maxDstSize,
1524                           const BYTE** litStart, size_t* litSize,
1525                           const void* src, size_t srcSize)
1526 {
1527     const BYTE* const istart = (const BYTE* const)src;
1528     const BYTE* ip = istart;
1529     BYTE* const ostart = (BYTE* const)dst;
1530     BYTE* const oend = ostart + maxDstSize;
1531     blockProperties_t litbp;
1532 
1533     size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp);
1534     if (ZSTDv01_isError(litcSize)) return litcSize;
1535     if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1536     ip += ZSTD_blockHeaderSize;
1537 
1538     switch(litbp.blockType)
1539     {
1540     case bt_raw:
1541         *litStart = ip;
1542         ip += litcSize;
1543         *litSize = litcSize;
1544         break;
1545     case bt_rle:
1546         {
1547             size_t rleSize = litbp.origSize;
1548             if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
1549             if (!srcSize) return ERROR(srcSize_wrong);
1550             memset(oend - rleSize, *ip, rleSize);
1551             *litStart = oend - rleSize;
1552             *litSize = rleSize;
1553             ip++;
1554             break;
1555         }
1556     case bt_compressed:
1557         {
1558             size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1559             if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize;
1560             *litStart = oend - decodedLitSize;
1561             *litSize = decodedLitSize;
1562             ip += litcSize;
1563             break;
1564         }
1565     case bt_end:
1566     default:
1567         return ERROR(GENERIC);
1568     }
1569 
1570     return ip-istart;
1571 }
1572 
1573 
1574 static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1575                          FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1576                          const void* src, size_t srcSize)
1577 {
1578     const BYTE* const istart = (const BYTE* const)src;
1579     const BYTE* ip = istart;
1580     const BYTE* const iend = istart + srcSize;
1581     U32 LLtype, Offtype, MLtype;
1582     U32 LLlog, Offlog, MLlog;
1583     size_t dumpsLength;
1584 
1585     /* check */
1586     if (srcSize < 5) return ERROR(srcSize_wrong);
1587 
1588     /* SeqHead */
1589     *nbSeq = ZSTD_readLE16(ip); ip+=2;
1590     LLtype  = *ip >> 6;
1591     Offtype = (*ip >> 4) & 3;
1592     MLtype  = (*ip >> 2) & 3;
1593     if (*ip & 2)
1594     {
1595         dumpsLength  = ip[2];
1596         dumpsLength += ip[1] << 8;
1597         ip += 3;
1598     }
1599     else
1600     {
1601         dumpsLength  = ip[1];
1602         dumpsLength += (ip[0] & 1) << 8;
1603         ip += 2;
1604     }
1605     *dumpsPtr = ip;
1606     ip += dumpsLength;
1607     *dumpsLengthPtr = dumpsLength;
1608 
1609     /* check */
1610     if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
1611 
1612     /* sequences */
1613     {
1614         S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
1615         size_t headerSize;
1616 
1617         /* Build DTables */
1618         switch(LLtype)
1619         {
1620         case bt_rle :
1621             LLlog = 0;
1622             FSE_buildDTable_rle(DTableLL, *ip++); break;
1623         case bt_raw :
1624             LLlog = LLbits;
1625             FSE_buildDTable_raw(DTableLL, LLbits); break;
1626         default :
1627             {   U32 max = MaxLL;
1628                 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
1629                 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1630                 if (LLlog > LLFSELog) return ERROR(corruption_detected);
1631                 ip += headerSize;
1632                 FSE_buildDTable(DTableLL, norm, max, LLlog);
1633         }   }
1634 
1635         switch(Offtype)
1636         {
1637         case bt_rle :
1638             Offlog = 0;
1639             if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1640             FSE_buildDTable_rle(DTableOffb, *ip++); break;
1641         case bt_raw :
1642             Offlog = Offbits;
1643             FSE_buildDTable_raw(DTableOffb, Offbits); break;
1644         default :
1645             {   U32 max = MaxOff;
1646                 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
1647                 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1648                 if (Offlog > OffFSELog) return ERROR(corruption_detected);
1649                 ip += headerSize;
1650                 FSE_buildDTable(DTableOffb, norm, max, Offlog);
1651         }   }
1652 
1653         switch(MLtype)
1654         {
1655         case bt_rle :
1656             MLlog = 0;
1657             if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1658             FSE_buildDTable_rle(DTableML, *ip++); break;
1659         case bt_raw :
1660             MLlog = MLbits;
1661             FSE_buildDTable_raw(DTableML, MLbits); break;
1662         default :
1663             {   U32 max = MaxML;
1664                 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
1665                 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1666                 if (MLlog > MLFSELog) return ERROR(corruption_detected);
1667                 ip += headerSize;
1668                 FSE_buildDTable(DTableML, norm, max, MLlog);
1669     }   }   }
1670 
1671     return ip-istart;
1672 }
1673 
1674 
1675 typedef struct {
1676     size_t litLength;
1677     size_t offset;
1678     size_t matchLength;
1679 } seq_t;
1680 
1681 typedef struct {
1682     FSE_DStream_t DStream;
1683     FSE_DState_t stateLL;
1684     FSE_DState_t stateOffb;
1685     FSE_DState_t stateML;
1686     size_t prevOffset;
1687     const BYTE* dumps;
1688     const BYTE* dumpsEnd;
1689 } seqState_t;
1690 
1691 
1692 static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1693 {
1694     size_t litLength;
1695     size_t prevOffset;
1696     size_t offset;
1697     size_t matchLength;
1698     const BYTE* dumps = seqState->dumps;
1699     const BYTE* const de = seqState->dumpsEnd;
1700 
1701     /* Literal length */
1702     litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1703     prevOffset = litLength ? seq->offset : seqState->prevOffset;
1704     seqState->prevOffset = seq->offset;
1705     if (litLength == MaxLL)
1706     {
1707         U32 add = dumps<de ? *dumps++ : 0;
1708         if (add < 255) litLength += add;
1709         else
1710         {
1711             if (dumps<=(de-3))
1712             {
1713                 litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
1714                 dumps += 3;
1715             }
1716         }
1717     }
1718 
1719     /* Offset */
1720     {
1721         U32 offsetCode, nbBits;
1722         offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1723         if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1724         nbBits = offsetCode - 1;
1725         if (offsetCode==0) nbBits = 0;   /* cmove */
1726         offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1727         if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1728         if (offsetCode==0) offset = prevOffset;
1729     }
1730 
1731     /* MatchLength */
1732     matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1733     if (matchLength == MaxML)
1734     {
1735         U32 add = dumps<de ? *dumps++ : 0;
1736         if (add < 255) matchLength += add;
1737         else
1738         {
1739             if (dumps<=(de-3))
1740             {
1741                 matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
1742                 dumps += 3;
1743             }
1744         }
1745     }
1746     matchLength += MINMATCH;
1747 
1748     /* save result */
1749     seq->litLength = litLength;
1750     seq->offset = offset;
1751     seq->matchLength = matchLength;
1752     seqState->dumps = dumps;
1753 }
1754 
1755 
1756 static size_t ZSTD_execSequence(BYTE* op,
1757                                 seq_t sequence,
1758                                 const BYTE** litPtr, const BYTE* const litLimit,
1759                                 BYTE* const base, BYTE* const oend)
1760 {
1761     static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
1762     static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* subtracted */
1763     const BYTE* const ostart = op;
1764     const size_t litLength = sequence.litLength;
1765     BYTE* const endMatch = op + litLength + sequence.matchLength;    /* risk : address space overflow (32-bits) */
1766     const BYTE* const litEnd = *litPtr + litLength;
1767 
1768     /* check */
1769     if (endMatch > oend) return ERROR(dstSize_tooSmall);   /* overwrite beyond dst buffer */
1770     if (litEnd > litLimit) return ERROR(corruption_detected);
1771     if (sequence.matchLength > (size_t)(*litPtr-op))  return ERROR(dstSize_tooSmall);    /* overwrite literal segment */
1772 
1773     /* copy Literals */
1774     if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
1775         memmove(op, *litPtr, litLength);   /* overwrite risk */
1776     else
1777         ZSTD_wildcopy(op, *litPtr, litLength);
1778     op += litLength;
1779     *litPtr = litEnd;   /* update for next sequence */
1780 
1781     /* check : last match must be at a minimum distance of 8 from end of dest buffer */
1782     if (oend-op < 8) return ERROR(dstSize_tooSmall);
1783 
1784     /* copy Match */
1785     {
1786         const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1787         const BYTE* match = op - sequence.offset;            /* possible underflow at op - offset ? */
1788         size_t qutt = 12;
1789         U64 saved[2];
1790 
1791         /* check */
1792         if (match < base) return ERROR(corruption_detected);
1793         if (sequence.offset > (size_t)base) return ERROR(corruption_detected);
1794 
1795         /* save beginning of literal sequence, in case of write overlap */
1796         if (overlapRisk)
1797         {
1798             if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1799             memcpy(saved, endMatch, qutt);
1800         }
1801 
1802         if (sequence.offset < 8)
1803         {
1804             const int dec64 = dec64table[sequence.offset];
1805             op[0] = match[0];
1806             op[1] = match[1];
1807             op[2] = match[2];
1808             op[3] = match[3];
1809             match += dec32table[sequence.offset];
1810             ZSTD_copy4(op+4, match);
1811             match -= dec64;
1812         } else { ZSTD_copy8(op, match); }
1813         op += 8; match += 8;
1814 
1815         if (endMatch > oend-(16-MINMATCH))
1816         {
1817             if (op < oend-8)
1818             {
1819                 ZSTD_wildcopy(op, match, (oend-8) - op);
1820                 match += (oend-8) - op;
1821                 op = oend-8;
1822             }
1823             while (op<endMatch) *op++ = *match++;
1824         }
1825         else
1826             ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
1827 
1828         /* restore, in case of overlap */
1829         if (overlapRisk) memcpy(endMatch, saved, qutt);
1830     }
1831 
1832     return endMatch-ostart;
1833 }
1834 
1835 typedef struct ZSTDv01_Dctx_s
1836 {
1837     U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1838     U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1839     U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1840     void* previousDstEnd;
1841     void* base;
1842     size_t expected;
1843     blockType_t bType;
1844     U32 phase;
1845 } dctx_t;
1846 
1847 
1848 static size_t ZSTD_decompressSequences(
1849                                void* ctx,
1850                                void* dst, size_t maxDstSize,
1851                          const void* seqStart, size_t seqSize,
1852                          const BYTE* litStart, size_t litSize)
1853 {
1854     dctx_t* dctx = (dctx_t*)ctx;
1855     const BYTE* ip = (const BYTE*)seqStart;
1856     const BYTE* const iend = ip + seqSize;
1857     BYTE* const ostart = (BYTE* const)dst;
1858     BYTE* op = ostart;
1859     BYTE* const oend = ostart + maxDstSize;
1860     size_t errorCode, dumpsLength;
1861     const BYTE* litPtr = litStart;
1862     const BYTE* const litEnd = litStart + litSize;
1863     int nbSeq;
1864     const BYTE* dumps;
1865     U32* DTableLL = dctx->LLTable;
1866     U32* DTableML = dctx->MLTable;
1867     U32* DTableOffb = dctx->OffTable;
1868     BYTE* const base = (BYTE*) (dctx->base);
1869 
1870     /* Build Decoding Tables */
1871     errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
1872                                       DTableLL, DTableML, DTableOffb,
1873                                       ip, iend-ip);
1874     if (ZSTDv01_isError(errorCode)) return errorCode;
1875     ip += errorCode;
1876 
1877     /* Regen sequences */
1878     {
1879         seq_t sequence;
1880         seqState_t seqState;
1881 
1882         memset(&sequence, 0, sizeof(sequence));
1883         seqState.dumps = dumps;
1884         seqState.dumpsEnd = dumps + dumpsLength;
1885         seqState.prevOffset = 1;
1886         errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
1887         if (FSE_isError(errorCode)) return ERROR(corruption_detected);
1888         FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1889         FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1890         FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1891 
1892         for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
1893         {
1894             size_t oneSeqSize;
1895             nbSeq--;
1896             ZSTD_decodeSequence(&sequence, &seqState);
1897             oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
1898             if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize;
1899             op += oneSeqSize;
1900         }
1901 
1902         /* check if reached exact end */
1903         if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected);   /* requested too much : data is corrupted */
1904         if (nbSeq<0) return ERROR(corruption_detected);   /* requested too many sequences : data is corrupted */
1905 
1906         /* last literal segment */
1907         {
1908             size_t lastLLSize = litEnd - litPtr;
1909             if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
1910             if (op != litPtr) memmove(op, litPtr, lastLLSize);
1911             op += lastLLSize;
1912         }
1913     }
1914 
1915     return op-ostart;
1916 }
1917 
1918 
1919 static size_t ZSTD_decompressBlock(
1920                             void* ctx,
1921                             void* dst, size_t maxDstSize,
1922                       const void* src, size_t srcSize)
1923 {
1924     /* blockType == blockCompressed, srcSize is trusted */
1925     const BYTE* ip = (const BYTE*)src;
1926     const BYTE* litPtr = NULL;
1927     size_t litSize = 0;
1928     size_t errorCode;
1929 
1930     /* Decode literals sub-block */
1931     errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
1932     if (ZSTDv01_isError(errorCode)) return errorCode;
1933     ip += errorCode;
1934     srcSize -= errorCode;
1935 
1936     return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
1937 }
1938 
1939 
1940 size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1941 {
1942     const BYTE* ip = (const BYTE*)src;
1943     const BYTE* iend = ip + srcSize;
1944     BYTE* const ostart = (BYTE* const)dst;
1945     BYTE* op = ostart;
1946     BYTE* const oend = ostart + maxDstSize;
1947     size_t remainingSize = srcSize;
1948     U32 magicNumber;
1949     size_t errorCode=0;
1950     blockProperties_t blockProperties;
1951 
1952     /* Frame Header */
1953     if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1954     magicNumber = ZSTD_readBE32(src);
1955     if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
1956     ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1957 
1958     /* Loop on each block */
1959     while (1)
1960     {
1961         size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties);
1962         if (ZSTDv01_isError(blockSize)) return blockSize;
1963 
1964         ip += ZSTD_blockHeaderSize;
1965         remainingSize -= ZSTD_blockHeaderSize;
1966         if (blockSize > remainingSize) return ERROR(srcSize_wrong);
1967 
1968         switch(blockProperties.blockType)
1969         {
1970         case bt_compressed:
1971             errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
1972             break;
1973         case bt_raw :
1974             errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1975             break;
1976         case bt_rle :
1977             return ERROR(GENERIC);   /* not yet supported */
1978             break;
1979         case bt_end :
1980             /* end of frame */
1981             if (remainingSize) return ERROR(srcSize_wrong);
1982             break;
1983         default:
1984             return ERROR(GENERIC);
1985         }
1986         if (blockSize == 0) break;   /* bt_end */
1987 
1988         if (ZSTDv01_isError(errorCode)) return errorCode;
1989         op += errorCode;
1990         ip += blockSize;
1991         remainingSize -= blockSize;
1992     }
1993 
1994     return op-ostart;
1995 }
1996 
1997 size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1998 {
1999     dctx_t ctx;
2000     ctx.base = dst;
2001     return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
2002 }
2003 
2004 /* ZSTD_errorFrameSizeInfoLegacy() :
2005    assumes `cSize` and `dBound` are _not_ NULL */
2006 static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
2007 {
2008     *cSize = ret;
2009     *dBound = ZSTD_CONTENTSIZE_ERROR;
2010 }
2011 
2012 void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
2013 {
2014     const BYTE* ip = (const BYTE*)src;
2015     size_t remainingSize = srcSize;
2016     size_t nbBlocks = 0;
2017     U32 magicNumber;
2018     blockProperties_t blockProperties;
2019 
2020     /* Frame Header */
2021     if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
2022         ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2023         return;
2024     }
2025     magicNumber = ZSTD_readBE32(src);
2026     if (magicNumber != ZSTD_magicNumber) {
2027         ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
2028         return;
2029     }
2030     ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
2031 
2032     /* Loop on each block */
2033     while (1)
2034     {
2035         size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2036         if (ZSTDv01_isError(blockSize)) {
2037             ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
2038             return;
2039         }
2040 
2041         ip += ZSTD_blockHeaderSize;
2042         remainingSize -= ZSTD_blockHeaderSize;
2043         if (blockSize > remainingSize) {
2044             ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2045             return;
2046         }
2047 
2048         if (blockSize == 0) break;   /* bt_end */
2049 
2050         ip += blockSize;
2051         remainingSize -= blockSize;
2052         nbBlocks++;
2053     }
2054 
2055     *cSize = ip - (const BYTE*)src;
2056     *dBound = nbBlocks * BLOCKSIZE;
2057 }
2058 
2059 /*******************************
2060 *  Streaming Decompression API
2061 *******************************/
2062 
2063 size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
2064 {
2065     dctx->expected = ZSTD_frameHeaderSize;
2066     dctx->phase = 0;
2067     dctx->previousDstEnd = NULL;
2068     dctx->base = NULL;
2069     return 0;
2070 }
2071 
2072 ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
2073 {
2074     ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
2075     if (dctx==NULL) return NULL;
2076     ZSTDv01_resetDCtx(dctx);
2077     return dctx;
2078 }
2079 
2080 size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
2081 {
2082     free(dctx);
2083     return 0;
2084 }
2085 
2086 size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
2087 {
2088     return ((dctx_t*)dctx)->expected;
2089 }
2090 
2091 size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2092 {
2093     dctx_t* ctx = (dctx_t*)dctx;
2094 
2095     /* Sanity check */
2096     if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
2097     if (dst != ctx->previousDstEnd)  /* not contiguous */
2098         ctx->base = dst;
2099 
2100     /* Decompress : frame header */
2101     if (ctx->phase == 0)
2102     {
2103         /* Check frame magic header */
2104         U32 magicNumber = ZSTD_readBE32(src);
2105         if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2106         ctx->phase = 1;
2107         ctx->expected = ZSTD_blockHeaderSize;
2108         return 0;
2109     }
2110 
2111     /* Decompress : block header */
2112     if (ctx->phase == 1)
2113     {
2114         blockProperties_t bp;
2115         size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
2116         if (ZSTDv01_isError(blockSize)) return blockSize;
2117         if (bp.blockType == bt_end)
2118         {
2119             ctx->expected = 0;
2120             ctx->phase = 0;
2121         }
2122         else
2123         {
2124             ctx->expected = blockSize;
2125             ctx->bType = bp.blockType;
2126             ctx->phase = 2;
2127         }
2128 
2129         return 0;
2130     }
2131 
2132     /* Decompress : block content */
2133     {
2134         size_t rSize;
2135         switch(ctx->bType)
2136         {
2137         case bt_compressed:
2138             rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
2139             break;
2140         case bt_raw :
2141             rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
2142             break;
2143         case bt_rle :
2144             return ERROR(GENERIC);   /* not yet handled */
2145             break;
2146         case bt_end :   /* should never happen (filtered at phase 1) */
2147             rSize = 0;
2148             break;
2149         default:
2150             return ERROR(GENERIC);
2151         }
2152         ctx->phase = 1;
2153         ctx->expected = ZSTD_blockHeaderSize;
2154         ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
2155         return rSize;
2156     }
2157 
2158 }
2159