1 /*
2 * Copyright (c) Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11
12 /******************************************
13 * Includes
14 ******************************************/
15 #include <stddef.h> /* size_t, ptrdiff_t */
16 #include "zstd_v01.h"
17 #include "../common/error_private.h"
18
19
20 /******************************************
21 * Static allocation
22 ******************************************/
23 /* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
24 #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
25
26 /* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
27 #define HUF_DTABLE_SIZE_U16(maxTableLog) (1 + (1<<maxTableLog))
28 #define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
29 unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
30
31
32 /******************************************
33 * Error Management
34 ******************************************/
35 #define FSE_LIST_ERRORS(ITEM) \
36 ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
37 ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
38 ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
39 ITEM(FSE_ERROR_corruptionDetected) \
40 ITEM(FSE_ERROR_maxCode)
41
42 #define FSE_GENERATE_ENUM(ENUM) ENUM,
43 typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
44
45
46 /******************************************
47 * FSE symbol compression API
48 ******************************************/
49 /*
50 This API consists of small unitary functions, which highly benefit from being inlined.
51 You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
52 Visual seems to do it automatically.
53 For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
54 If none of these solutions is applicable, include "fse.c" directly.
55 */
56
57 typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */
58 typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
59
60 typedef struct
61 {
62 size_t bitContainer;
63 int bitPos;
64 char* startPtr;
65 char* ptr;
66 char* endPtr;
67 } FSE_CStream_t;
68
69 typedef struct
70 {
71 ptrdiff_t value;
72 const void* stateTable;
73 const void* symbolTT;
74 unsigned stateLog;
75 } FSE_CState_t;
76
77 typedef struct
78 {
79 size_t bitContainer;
80 unsigned bitsConsumed;
81 const char* ptr;
82 const char* start;
83 } FSE_DStream_t;
84
85 typedef struct
86 {
87 size_t state;
88 const void* table; /* precise table may vary, depending on U16 */
89 } FSE_DState_t;
90
91 typedef enum { FSE_DStream_unfinished = 0,
92 FSE_DStream_endOfBuffer = 1,
93 FSE_DStream_completed = 2,
94 FSE_DStream_tooFar = 3 } FSE_DStream_status; /* result of FSE_reloadDStream() */
95 /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
96
97
98 /****************************************************************
99 * Tuning parameters
100 ****************************************************************/
101 /* MEMORY_USAGE :
102 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
103 * Increasing memory usage improves compression ratio
104 * Reduced memory usage can improve speed, due to cache effect
105 * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
106 #define FSE_MAX_MEMORY_USAGE 14
107 #define FSE_DEFAULT_MEMORY_USAGE 13
108
109 /* FSE_MAX_SYMBOL_VALUE :
110 * Maximum symbol value authorized.
111 * Required for proper stack allocation */
112 #define FSE_MAX_SYMBOL_VALUE 255
113
114
115 /****************************************************************
116 * template functions type & suffix
117 ****************************************************************/
118 #define FSE_FUNCTION_TYPE BYTE
119 #define FSE_FUNCTION_EXTENSION
120
121
122 /****************************************************************
123 * Byte symbol type
124 ****************************************************************/
125 typedef struct
126 {
127 unsigned short newState;
128 unsigned char symbol;
129 unsigned char nbBits;
130 } FSE_decode_t; /* size == U32 */
131
132
133
134 /****************************************************************
135 * Compiler specifics
136 ****************************************************************/
137 #ifdef _MSC_VER /* Visual Studio */
138 # define FORCE_INLINE static __forceinline
139 # include <intrin.h> /* For Visual 2005 */
140 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
141 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
142 #else
143 # define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
144 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
145 # ifdef __GNUC__
146 # define FORCE_INLINE static inline __attribute__((always_inline))
147 # else
148 # define FORCE_INLINE static inline
149 # endif
150 # else
151 # define FORCE_INLINE static
152 # endif /* __STDC_VERSION__ */
153 #endif
154
155
156 /****************************************************************
157 * Includes
158 ****************************************************************/
159 #include <stdlib.h> /* malloc, free, qsort */
160 #include <string.h> /* memcpy, memset */
161 #include <stdio.h> /* printf (debug) */
162
163
164 #ifndef MEM_ACCESS_MODULE
165 #define MEM_ACCESS_MODULE
166 /****************************************************************
167 * Basic Types
168 *****************************************************************/
169 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
170 # include <stdint.h>
171 typedef uint8_t BYTE;
172 typedef uint16_t U16;
173 typedef int16_t S16;
174 typedef uint32_t U32;
175 typedef int32_t S32;
176 typedef uint64_t U64;
177 typedef int64_t S64;
178 #else
179 typedef unsigned char BYTE;
180 typedef unsigned short U16;
181 typedef signed short S16;
182 typedef unsigned int U32;
183 typedef signed int S32;
184 typedef unsigned long long U64;
185 typedef signed long long S64;
186 #endif
187
188 #endif /* MEM_ACCESS_MODULE */
189
190 /****************************************************************
191 * Memory I/O
192 *****************************************************************/
193 /* FSE_FORCE_MEMORY_ACCESS
194 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
195 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
196 * The below switch allow to select different access method for improved performance.
197 * Method 0 (default) : use `memcpy()`. Safe and portable.
198 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
199 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
200 * Method 2 : direct access. This method is portable but violate C standard.
201 * It can generate buggy code on targets generating assembly depending on alignment.
202 * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
203 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
204 * Prefer these methods in priority order (0 > 1 > 2)
205 */
206 #ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
207 # if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
208 # define FSE_FORCE_MEMORY_ACCESS 1
209 # endif
210 #endif
211
212
FSE_32bits(void)213 static unsigned FSE_32bits(void)
214 {
215 return sizeof(void*)==4;
216 }
217
FSE_isLittleEndian(void)218 static unsigned FSE_isLittleEndian(void)
219 {
220 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
221 return one.c[0];
222 }
223
224 #if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
225
FSE_read16(const void * memPtr)226 static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
FSE_read32(const void * memPtr)227 static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
FSE_read64(const void * memPtr)228 static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
229
230 #elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
231
232 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
233 /* currently only defined for gcc and icc */
234 typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
235
FSE_read16(const void * ptr)236 static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
FSE_read32(const void * ptr)237 static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
FSE_read64(const void * ptr)238 static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
239
240 #else
241
FSE_read16(const void * memPtr)242 static U16 FSE_read16(const void* memPtr)
243 {
244 U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
245 }
246
FSE_read32(const void * memPtr)247 static U32 FSE_read32(const void* memPtr)
248 {
249 U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
250 }
251
FSE_read64(const void * memPtr)252 static U64 FSE_read64(const void* memPtr)
253 {
254 U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
255 }
256
257 #endif /* FSE_FORCE_MEMORY_ACCESS */
258
FSE_readLE16(const void * memPtr)259 static U16 FSE_readLE16(const void* memPtr)
260 {
261 if (FSE_isLittleEndian())
262 return FSE_read16(memPtr);
263 else
264 {
265 const BYTE* p = (const BYTE*)memPtr;
266 return (U16)(p[0] + (p[1]<<8));
267 }
268 }
269
FSE_readLE32(const void * memPtr)270 static U32 FSE_readLE32(const void* memPtr)
271 {
272 if (FSE_isLittleEndian())
273 return FSE_read32(memPtr);
274 else
275 {
276 const BYTE* p = (const BYTE*)memPtr;
277 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
278 }
279 }
280
281
FSE_readLE64(const void * memPtr)282 static U64 FSE_readLE64(const void* memPtr)
283 {
284 if (FSE_isLittleEndian())
285 return FSE_read64(memPtr);
286 else
287 {
288 const BYTE* p = (const BYTE*)memPtr;
289 return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
290 + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
291 }
292 }
293
FSE_readLEST(const void * memPtr)294 static size_t FSE_readLEST(const void* memPtr)
295 {
296 if (FSE_32bits())
297 return (size_t)FSE_readLE32(memPtr);
298 else
299 return (size_t)FSE_readLE64(memPtr);
300 }
301
302
303
304 /****************************************************************
305 * Constants
306 *****************************************************************/
307 #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
308 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
309 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
310 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
311 #define FSE_MIN_TABLELOG 5
312
313 #define FSE_TABLELOG_ABSOLUTE_MAX 15
314 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
315 #error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
316 #endif
317
318
319 /****************************************************************
320 * Error Management
321 ****************************************************************/
322 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
323
324
325 /****************************************************************
326 * Complex types
327 ****************************************************************/
328 typedef struct
329 {
330 int deltaFindState;
331 U32 deltaNbBits;
332 } FSE_symbolCompressionTransform; /* total 8 bytes */
333
334 typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
335
336 /****************************************************************
337 * Internal functions
338 ****************************************************************/
FSE_highbit32(U32 val)339 FORCE_INLINE unsigned FSE_highbit32 (U32 val)
340 {
341 # if defined(_MSC_VER) /* Visual */
342 unsigned long r;
343 return _BitScanReverse(&r, val) ? (unsigned)r : 0;
344 # elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
345 return __builtin_clz (val) ^ 31;
346 # else /* Software version */
347 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
348 U32 v = val;
349 unsigned r;
350 v |= v >> 1;
351 v |= v >> 2;
352 v |= v >> 4;
353 v |= v >> 8;
354 v |= v >> 16;
355 r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
356 return r;
357 # endif
358 }
359
360
361 /****************************************************************
362 * Templates
363 ****************************************************************/
364 /*
365 designed to be included
366 for type-specific functions (template emulation in C)
367 Objective is to write these functions only once, for improved maintenance
368 */
369
370 /* safety checks */
371 #ifndef FSE_FUNCTION_EXTENSION
372 # error "FSE_FUNCTION_EXTENSION must be defined"
373 #endif
374 #ifndef FSE_FUNCTION_TYPE
375 # error "FSE_FUNCTION_TYPE must be defined"
376 #endif
377
378 /* Function names */
379 #define FSE_CAT(X,Y) X##Y
380 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
381 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
382
383
384
FSE_tableStep(U32 tableSize)385 static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
386
387 #define FSE_DECODE_TYPE FSE_decode_t
388
389
390 typedef struct {
391 U16 tableLog;
392 U16 fastMode;
393 } FSE_DTableHeader; /* sizeof U32 */
394
FSE_buildDTable(FSE_DTable * dt,const short * normalizedCounter,unsigned maxSymbolValue,unsigned tableLog)395 static size_t FSE_buildDTable
396 (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
397 {
398 void* ptr = dt;
399 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
400 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1; /* because dt is unsigned, 32-bits aligned on 32-bits */
401 const U32 tableSize = 1 << tableLog;
402 const U32 tableMask = tableSize-1;
403 const U32 step = FSE_tableStep(tableSize);
404 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
405 U32 position = 0;
406 U32 highThreshold = tableSize-1;
407 const S16 largeLimit= (S16)(1 << (tableLog-1));
408 U32 noLarge = 1;
409 U32 s;
410
411 /* Sanity Checks */
412 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
413 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
414
415 /* Init, lay down lowprob symbols */
416 DTableH[0].tableLog = (U16)tableLog;
417 for (s=0; s<=maxSymbolValue; s++)
418 {
419 if (normalizedCounter[s]==-1)
420 {
421 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
422 symbolNext[s] = 1;
423 }
424 else
425 {
426 if (normalizedCounter[s] >= largeLimit) noLarge=0;
427 symbolNext[s] = normalizedCounter[s];
428 }
429 }
430
431 /* Spread symbols */
432 for (s=0; s<=maxSymbolValue; s++)
433 {
434 int i;
435 for (i=0; i<normalizedCounter[s]; i++)
436 {
437 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
438 position = (position + step) & tableMask;
439 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
440 }
441 }
442
443 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
444
445 /* Build Decoding table */
446 {
447 U32 i;
448 for (i=0; i<tableSize; i++)
449 {
450 FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
451 U16 nextState = symbolNext[symbol]++;
452 tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
453 tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
454 }
455 }
456
457 DTableH->fastMode = (U16)noLarge;
458 return 0;
459 }
460
461
462 /******************************************
463 * FSE byte symbol
464 ******************************************/
465 #ifndef FSE_COMMONDEFS_ONLY
466
FSE_isError(size_t code)467 static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
468
FSE_abs(short a)469 static short FSE_abs(short a)
470 {
471 return a<0? -a : a;
472 }
473
474
475 /****************************************************************
476 * Header bitstream management
477 ****************************************************************/
FSE_readNCount(short * normalizedCounter,unsigned * maxSVPtr,unsigned * tableLogPtr,const void * headerBuffer,size_t hbSize)478 static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
479 const void* headerBuffer, size_t hbSize)
480 {
481 const BYTE* const istart = (const BYTE*) headerBuffer;
482 const BYTE* const iend = istart + hbSize;
483 const BYTE* ip = istart;
484 int nbBits;
485 int remaining;
486 int threshold;
487 U32 bitStream;
488 int bitCount;
489 unsigned charnum = 0;
490 int previous0 = 0;
491
492 if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
493 bitStream = FSE_readLE32(ip);
494 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
495 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
496 bitStream >>= 4;
497 bitCount = 4;
498 *tableLogPtr = nbBits;
499 remaining = (1<<nbBits)+1;
500 threshold = 1<<nbBits;
501 nbBits++;
502
503 while ((remaining>1) && (charnum<=*maxSVPtr))
504 {
505 if (previous0)
506 {
507 unsigned n0 = charnum;
508 while ((bitStream & 0xFFFF) == 0xFFFF)
509 {
510 n0+=24;
511 if (ip < iend-5)
512 {
513 ip+=2;
514 bitStream = FSE_readLE32(ip) >> bitCount;
515 }
516 else
517 {
518 bitStream >>= 16;
519 bitCount+=16;
520 }
521 }
522 while ((bitStream & 3) == 3)
523 {
524 n0+=3;
525 bitStream>>=2;
526 bitCount+=2;
527 }
528 n0 += bitStream & 3;
529 bitCount += 2;
530 if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
531 while (charnum < n0) normalizedCounter[charnum++] = 0;
532 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
533 {
534 ip += bitCount>>3;
535 bitCount &= 7;
536 bitStream = FSE_readLE32(ip) >> bitCount;
537 }
538 else
539 bitStream >>= 2;
540 }
541 {
542 const short max = (short)((2*threshold-1)-remaining);
543 short count;
544
545 if ((bitStream & (threshold-1)) < (U32)max)
546 {
547 count = (short)(bitStream & (threshold-1));
548 bitCount += nbBits-1;
549 }
550 else
551 {
552 count = (short)(bitStream & (2*threshold-1));
553 if (count >= threshold) count -= max;
554 bitCount += nbBits;
555 }
556
557 count--; /* extra accuracy */
558 remaining -= FSE_abs(count);
559 normalizedCounter[charnum++] = count;
560 previous0 = !count;
561 while (remaining < threshold)
562 {
563 nbBits--;
564 threshold >>= 1;
565 }
566
567 {
568 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
569 {
570 ip += bitCount>>3;
571 bitCount &= 7;
572 }
573 else
574 {
575 bitCount -= (int)(8 * (iend - 4 - ip));
576 ip = iend - 4;
577 }
578 bitStream = FSE_readLE32(ip) >> (bitCount & 31);
579 }
580 }
581 }
582 if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
583 *maxSVPtr = charnum-1;
584
585 ip += (bitCount+7)>>3;
586 if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
587 return ip-istart;
588 }
589
590
591 /*********************************************************
592 * Decompression (Byte symbols)
593 *********************************************************/
FSE_buildDTable_rle(FSE_DTable * dt,BYTE symbolValue)594 static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
595 {
596 void* ptr = dt;
597 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
598 FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */
599
600 DTableH->tableLog = 0;
601 DTableH->fastMode = 0;
602
603 cell->newState = 0;
604 cell->symbol = symbolValue;
605 cell->nbBits = 0;
606
607 return 0;
608 }
609
610
FSE_buildDTable_raw(FSE_DTable * dt,unsigned nbBits)611 static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
612 {
613 void* ptr = dt;
614 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
615 FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */
616 const unsigned tableSize = 1 << nbBits;
617 const unsigned tableMask = tableSize - 1;
618 const unsigned maxSymbolValue = tableMask;
619 unsigned s;
620
621 /* Sanity checks */
622 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
623
624 /* Build Decoding Table */
625 DTableH->tableLog = (U16)nbBits;
626 DTableH->fastMode = 1;
627 for (s=0; s<=maxSymbolValue; s++)
628 {
629 dinfo[s].newState = 0;
630 dinfo[s].symbol = (BYTE)s;
631 dinfo[s].nbBits = (BYTE)nbBits;
632 }
633
634 return 0;
635 }
636
637
638 /* FSE_initDStream
639 * Initialize a FSE_DStream_t.
640 * srcBuffer must point at the beginning of an FSE block.
641 * The function result is the size of the FSE_block (== srcSize).
642 * If srcSize is too small, the function will return an errorCode;
643 */
FSE_initDStream(FSE_DStream_t * bitD,const void * srcBuffer,size_t srcSize)644 static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
645 {
646 if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
647
648 if (srcSize >= sizeof(size_t))
649 {
650 U32 contain32;
651 bitD->start = (const char*)srcBuffer;
652 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t);
653 bitD->bitContainer = FSE_readLEST(bitD->ptr);
654 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
655 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
656 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
657 }
658 else
659 {
660 U32 contain32;
661 bitD->start = (const char*)srcBuffer;
662 bitD->ptr = bitD->start;
663 bitD->bitContainer = *(const BYTE*)(bitD->start);
664 switch(srcSize)
665 {
666 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
667 /* fallthrough */
668 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
669 /* fallthrough */
670 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
671 /* fallthrough */
672 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
673 /* fallthrough */
674 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
675 /* fallthrough */
676 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
677 /* fallthrough */
678 default:;
679 }
680 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
681 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
682 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
683 bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
684 }
685
686 return srcSize;
687 }
688
689
690 /*!FSE_lookBits
691 * Provides next n bits from the bitContainer.
692 * bitContainer is not modified (bits are still present for next read/look)
693 * On 32-bits, maxNbBits==25
694 * On 64-bits, maxNbBits==57
695 * return : value extracted.
696 */
FSE_lookBits(FSE_DStream_t * bitD,U32 nbBits)697 static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
698 {
699 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
700 return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
701 }
702
FSE_lookBitsFast(FSE_DStream_t * bitD,U32 nbBits)703 static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
704 {
705 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
706 return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
707 }
708
FSE_skipBits(FSE_DStream_t * bitD,U32 nbBits)709 static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
710 {
711 bitD->bitsConsumed += nbBits;
712 }
713
714
715 /*!FSE_readBits
716 * Read next n bits from the bitContainer.
717 * On 32-bits, don't read more than maxNbBits==25
718 * On 64-bits, don't read more than maxNbBits==57
719 * Use the fast variant *only* if n >= 1.
720 * return : value extracted.
721 */
FSE_readBits(FSE_DStream_t * bitD,U32 nbBits)722 static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
723 {
724 size_t value = FSE_lookBits(bitD, nbBits);
725 FSE_skipBits(bitD, nbBits);
726 return value;
727 }
728
FSE_readBitsFast(FSE_DStream_t * bitD,U32 nbBits)729 static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
730 {
731 size_t value = FSE_lookBitsFast(bitD, nbBits);
732 FSE_skipBits(bitD, nbBits);
733 return value;
734 }
735
FSE_reloadDStream(FSE_DStream_t * bitD)736 static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
737 {
738 if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
739 return FSE_DStream_tooFar;
740
741 if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
742 {
743 bitD->ptr -= bitD->bitsConsumed >> 3;
744 bitD->bitsConsumed &= 7;
745 bitD->bitContainer = FSE_readLEST(bitD->ptr);
746 return FSE_DStream_unfinished;
747 }
748 if (bitD->ptr == bitD->start)
749 {
750 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
751 return FSE_DStream_completed;
752 }
753 {
754 U32 nbBytes = bitD->bitsConsumed >> 3;
755 U32 result = FSE_DStream_unfinished;
756 if (bitD->ptr - nbBytes < bitD->start)
757 {
758 nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
759 result = FSE_DStream_endOfBuffer;
760 }
761 bitD->ptr -= nbBytes;
762 bitD->bitsConsumed -= nbBytes*8;
763 bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
764 return result;
765 }
766 }
767
768
FSE_initDState(FSE_DState_t * DStatePtr,FSE_DStream_t * bitD,const FSE_DTable * dt)769 static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
770 {
771 const void* ptr = dt;
772 const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
773 DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
774 FSE_reloadDStream(bitD);
775 DStatePtr->table = dt + 1;
776 }
777
FSE_decodeSymbol(FSE_DState_t * DStatePtr,FSE_DStream_t * bitD)778 static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
779 {
780 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
781 const U32 nbBits = DInfo.nbBits;
782 BYTE symbol = DInfo.symbol;
783 size_t lowBits = FSE_readBits(bitD, nbBits);
784
785 DStatePtr->state = DInfo.newState + lowBits;
786 return symbol;
787 }
788
FSE_decodeSymbolFast(FSE_DState_t * DStatePtr,FSE_DStream_t * bitD)789 static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
790 {
791 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
792 const U32 nbBits = DInfo.nbBits;
793 BYTE symbol = DInfo.symbol;
794 size_t lowBits = FSE_readBitsFast(bitD, nbBits);
795
796 DStatePtr->state = DInfo.newState + lowBits;
797 return symbol;
798 }
799
800 /* FSE_endOfDStream
801 Tells if bitD has reached end of bitStream or not */
802
FSE_endOfDStream(const FSE_DStream_t * bitD)803 static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
804 {
805 return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
806 }
807
FSE_endOfDState(const FSE_DState_t * DStatePtr)808 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
809 {
810 return DStatePtr->state == 0;
811 }
812
813
FSE_decompress_usingDTable_generic(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const FSE_DTable * dt,const unsigned fast)814 FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
815 void* dst, size_t maxDstSize,
816 const void* cSrc, size_t cSrcSize,
817 const FSE_DTable* dt, const unsigned fast)
818 {
819 BYTE* const ostart = (BYTE*) dst;
820 BYTE* op = ostart;
821 BYTE* const omax = op + maxDstSize;
822 BYTE* const olimit = omax-3;
823
824 FSE_DStream_t bitD;
825 FSE_DState_t state1;
826 FSE_DState_t state2;
827 size_t errorCode;
828
829 /* Init */
830 errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */
831 if (FSE_isError(errorCode)) return errorCode;
832
833 FSE_initDState(&state1, &bitD, dt);
834 FSE_initDState(&state2, &bitD, dt);
835
836 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
837
838 /* 4 symbols per loop */
839 for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
840 {
841 op[0] = FSE_GETSYMBOL(&state1);
842
843 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
844 FSE_reloadDStream(&bitD);
845
846 op[1] = FSE_GETSYMBOL(&state2);
847
848 if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
849 { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
850
851 op[2] = FSE_GETSYMBOL(&state1);
852
853 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
854 FSE_reloadDStream(&bitD);
855
856 op[3] = FSE_GETSYMBOL(&state2);
857 }
858
859 /* tail */
860 /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
861 while (1)
862 {
863 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
864 break;
865
866 *op++ = FSE_GETSYMBOL(&state1);
867
868 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
869 break;
870
871 *op++ = FSE_GETSYMBOL(&state2);
872 }
873
874 /* end ? */
875 if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
876 return op-ostart;
877
878 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
879
880 return (size_t)-FSE_ERROR_corruptionDetected;
881 }
882
883
FSE_decompress_usingDTable(void * dst,size_t originalSize,const void * cSrc,size_t cSrcSize,const FSE_DTable * dt)884 static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
885 const void* cSrc, size_t cSrcSize,
886 const FSE_DTable* dt)
887 {
888 FSE_DTableHeader DTableH;
889 memcpy(&DTableH, dt, sizeof(DTableH)); /* memcpy() into local variable, to avoid strict aliasing warning */
890
891 /* select fast mode (static) */
892 if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
893 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
894 }
895
896
FSE_decompress(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize)897 static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
898 {
899 const BYTE* const istart = (const BYTE*)cSrc;
900 const BYTE* ip = istart;
901 short counting[FSE_MAX_SYMBOL_VALUE+1];
902 DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
903 unsigned tableLog;
904 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
905 size_t errorCode;
906
907 if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
908
909 /* normal FSE decoding mode */
910 errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
911 if (FSE_isError(errorCode)) return errorCode;
912 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
913 ip += errorCode;
914 cSrcSize -= errorCode;
915
916 errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
917 if (FSE_isError(errorCode)) return errorCode;
918
919 /* always return, even if it is an error code */
920 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
921 }
922
923
924
925 /* *******************************************************
926 * Huff0 : Huffman block compression
927 *********************************************************/
928 #define HUF_MAX_SYMBOL_VALUE 255
929 #define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */
930 #define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */
931 #define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
932 #if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
933 # error "HUF_MAX_TABLELOG is too large !"
934 #endif
935
936 typedef struct HUF_CElt_s {
937 U16 val;
938 BYTE nbBits;
939 } HUF_CElt ;
940
941 typedef struct nodeElt_s {
942 U32 count;
943 U16 parent;
944 BYTE byte;
945 BYTE nbBits;
946 } nodeElt;
947
948
949 /* *******************************************************
950 * Huff0 : Huffman block decompression
951 *********************************************************/
952 typedef struct {
953 BYTE byte;
954 BYTE nbBits;
955 } HUF_DElt;
956
HUF_readDTable(U16 * DTable,const void * src,size_t srcSize)957 static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
958 {
959 BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
960 U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */
961 U32 weightTotal;
962 U32 maxBits;
963 const BYTE* ip = (const BYTE*) src;
964 size_t iSize;
965 size_t oSize;
966 U32 n;
967 U32 nextRankStart;
968 void* ptr = DTable+1;
969 HUF_DElt* const dt = (HUF_DElt*)ptr;
970
971 if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
972 iSize = ip[0];
973
974 FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */
975 //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */
976 if (iSize >= 128) /* special header */
977 {
978 if (iSize >= (242)) /* RLE */
979 {
980 static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
981 oSize = l[iSize-242];
982 memset(huffWeight, 1, sizeof(huffWeight));
983 iSize = 0;
984 }
985 else /* Incompressible */
986 {
987 oSize = iSize - 127;
988 iSize = ((oSize+1)/2);
989 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
990 ip += 1;
991 for (n=0; n<oSize; n+=2)
992 {
993 huffWeight[n] = ip[n/2] >> 4;
994 huffWeight[n+1] = ip[n/2] & 15;
995 }
996 }
997 }
998 else /* header compressed with FSE (normal case) */
999 {
1000 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1001 oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */
1002 if (FSE_isError(oSize)) return oSize;
1003 }
1004
1005 /* collect weight stats */
1006 memset(rankVal, 0, sizeof(rankVal));
1007 weightTotal = 0;
1008 for (n=0; n<oSize; n++)
1009 {
1010 if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
1011 rankVal[huffWeight[n]]++;
1012 weightTotal += (1 << huffWeight[n]) >> 1;
1013 }
1014 if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected;
1015
1016 /* get last non-null symbol weight (implied, total must be 2^n) */
1017 maxBits = FSE_highbit32(weightTotal) + 1;
1018 if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */
1019 DTable[0] = (U16)maxBits;
1020 {
1021 U32 total = 1 << maxBits;
1022 U32 rest = total - weightTotal;
1023 U32 verif = 1 << FSE_highbit32(rest);
1024 U32 lastWeight = FSE_highbit32(rest) + 1;
1025 if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */
1026 huffWeight[oSize] = (BYTE)lastWeight;
1027 rankVal[lastWeight]++;
1028 }
1029
1030 /* check tree construction validity */
1031 if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */
1032
1033 /* Prepare ranks */
1034 nextRankStart = 0;
1035 for (n=1; n<=maxBits; n++)
1036 {
1037 U32 current = nextRankStart;
1038 nextRankStart += (rankVal[n] << (n-1));
1039 rankVal[n] = current;
1040 }
1041
1042 /* fill DTable */
1043 for (n=0; n<=oSize; n++)
1044 {
1045 const U32 w = huffWeight[n];
1046 const U32 length = (1 << w) >> 1;
1047 U32 i;
1048 HUF_DElt D;
1049 D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
1050 for (i = rankVal[w]; i < rankVal[w] + length; i++)
1051 dt[i] = D;
1052 rankVal[w] += length;
1053 }
1054
1055 return iSize+1;
1056 }
1057
1058
HUF_decodeSymbol(FSE_DStream_t * Dstream,const HUF_DElt * dt,const U32 dtLog)1059 static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1060 {
1061 const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
1062 const BYTE c = dt[val].byte;
1063 FSE_skipBits(Dstream, dt[val].nbBits);
1064 return c;
1065 }
1066
HUF_decompress_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const U16 * DTable)1067 static size_t HUF_decompress_usingDTable( /* -3% slower when non static */
1068 void* dst, size_t maxDstSize,
1069 const void* cSrc, size_t cSrcSize,
1070 const U16* DTable)
1071 {
1072 if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong;
1073 {
1074 BYTE* const ostart = (BYTE*) dst;
1075 BYTE* op = ostart;
1076 BYTE* const omax = op + maxDstSize;
1077 BYTE* const olimit = maxDstSize < 15 ? op : omax-15;
1078
1079 const void* ptr = DTable;
1080 const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
1081 const U32 dtLog = DTable[0];
1082 size_t errorCode;
1083 U32 reloadStatus;
1084
1085 /* Init */
1086
1087 const U16* jumpTable = (const U16*)cSrc;
1088 const size_t length1 = FSE_readLE16(jumpTable);
1089 const size_t length2 = FSE_readLE16(jumpTable+1);
1090 const size_t length3 = FSE_readLE16(jumpTable+2);
1091 const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; /* check coherency !! */
1092 const char* const start1 = (const char*)(cSrc) + 6;
1093 const char* const start2 = start1 + length1;
1094 const char* const start3 = start2 + length2;
1095 const char* const start4 = start3 + length3;
1096 FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1097
1098 if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1099
1100 errorCode = FSE_initDStream(&bitD1, start1, length1);
1101 if (FSE_isError(errorCode)) return errorCode;
1102 errorCode = FSE_initDStream(&bitD2, start2, length2);
1103 if (FSE_isError(errorCode)) return errorCode;
1104 errorCode = FSE_initDStream(&bitD3, start3, length3);
1105 if (FSE_isError(errorCode)) return errorCode;
1106 errorCode = FSE_initDStream(&bitD4, start4, length4);
1107 if (FSE_isError(errorCode)) return errorCode;
1108
1109 reloadStatus=FSE_reloadDStream(&bitD2);
1110
1111 /* 16 symbols per loop */
1112 for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
1113 op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1114 {
1115 #define HUF_DECODE_SYMBOL_0(n, Dstream) \
1116 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
1117
1118 #define HUF_DECODE_SYMBOL_1(n, Dstream) \
1119 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1120 if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
1121
1122 #define HUF_DECODE_SYMBOL_2(n, Dstream) \
1123 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1124 if (FSE_32bits()) FSE_reloadDStream(&Dstream)
1125
1126 HUF_DECODE_SYMBOL_1( 0, bitD1);
1127 HUF_DECODE_SYMBOL_1( 1, bitD2);
1128 HUF_DECODE_SYMBOL_1( 2, bitD3);
1129 HUF_DECODE_SYMBOL_1( 3, bitD4);
1130 HUF_DECODE_SYMBOL_2( 4, bitD1);
1131 HUF_DECODE_SYMBOL_2( 5, bitD2);
1132 HUF_DECODE_SYMBOL_2( 6, bitD3);
1133 HUF_DECODE_SYMBOL_2( 7, bitD4);
1134 HUF_DECODE_SYMBOL_1( 8, bitD1);
1135 HUF_DECODE_SYMBOL_1( 9, bitD2);
1136 HUF_DECODE_SYMBOL_1(10, bitD3);
1137 HUF_DECODE_SYMBOL_1(11, bitD4);
1138 HUF_DECODE_SYMBOL_0(12, bitD1);
1139 HUF_DECODE_SYMBOL_0(13, bitD2);
1140 HUF_DECODE_SYMBOL_0(14, bitD3);
1141 HUF_DECODE_SYMBOL_0(15, bitD4);
1142 }
1143
1144 if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
1145 return (size_t)-FSE_ERROR_corruptionDetected;
1146
1147 /* tail */
1148 {
1149 /* bitTail = bitD1; */ /* *much* slower : -20% !??! */
1150 FSE_DStream_t bitTail;
1151 bitTail.ptr = bitD1.ptr;
1152 bitTail.bitsConsumed = bitD1.bitsConsumed;
1153 bitTail.bitContainer = bitD1.bitContainer; /* required in case of FSE_DStream_endOfBuffer */
1154 bitTail.start = start1;
1155 for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1156 {
1157 HUF_DECODE_SYMBOL_0(0, bitTail);
1158 }
1159
1160 if (FSE_endOfDStream(&bitTail))
1161 return op-ostart;
1162 }
1163
1164 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1165
1166 return (size_t)-FSE_ERROR_corruptionDetected;
1167 }
1168 }
1169
1170
HUF_decompress(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize)1171 static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1172 {
1173 HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
1174 const BYTE* ip = (const BYTE*) cSrc;
1175 size_t errorCode;
1176
1177 errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
1178 if (FSE_isError(errorCode)) return errorCode;
1179 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1180 ip += errorCode;
1181 cSrcSize -= errorCode;
1182
1183 return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1184 }
1185
1186
1187 #endif /* FSE_COMMONDEFS_ONLY */
1188
1189 /*
1190 zstd - standard compression library
1191 Copyright (C) 2014-2015, Yann Collet.
1192
1193 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1194
1195 Redistribution and use in source and binary forms, with or without
1196 modification, are permitted provided that the following conditions are
1197 met:
1198 * Redistributions of source code must retain the above copyright
1199 notice, this list of conditions and the following disclaimer.
1200 * Redistributions in binary form must reproduce the above
1201 copyright notice, this list of conditions and the following disclaimer
1202 in the documentation and/or other materials provided with the
1203 distribution.
1204 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1205 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1206 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1207 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1208 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1209 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1210 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1211 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1212 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1213 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1214 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1215
1216 You can contact the author at :
1217 - zstd source repository : https://github.com/Cyan4973/zstd
1218 - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
1219 */
1220
1221 /****************************************************************
1222 * Tuning parameters
1223 *****************************************************************/
1224 /* MEMORY_USAGE :
1225 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
1226 * Increasing memory usage improves compression ratio
1227 * Reduced memory usage can improve speed, due to cache effect */
1228 #define ZSTD_MEMORY_USAGE 17
1229
1230
1231 /**************************************
1232 CPU Feature Detection
1233 **************************************/
1234 /*
1235 * Automated efficient unaligned memory access detection
1236 * Based on known hardware architectures
1237 * This list will be updated thanks to feedbacks
1238 */
1239 #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
1240 || defined(__ARM_FEATURE_UNALIGNED) \
1241 || defined(__i386__) || defined(__x86_64__) \
1242 || defined(_M_IX86) || defined(_M_X64) \
1243 || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
1244 || (defined(_M_ARM) && (_M_ARM >= 7))
1245 # define ZSTD_UNALIGNED_ACCESS 1
1246 #else
1247 # define ZSTD_UNALIGNED_ACCESS 0
1248 #endif
1249
1250
1251 /********************************************************
1252 * Includes
1253 *********************************************************/
1254 #include <stdlib.h> /* calloc */
1255 #include <string.h> /* memcpy, memmove */
1256 #include <stdio.h> /* debug : printf */
1257
1258
1259 /********************************************************
1260 * Compiler specifics
1261 *********************************************************/
1262 #ifdef __AVX2__
1263 # include <immintrin.h> /* AVX2 intrinsics */
1264 #endif
1265
1266 #ifdef _MSC_VER /* Visual Studio */
1267 # include <intrin.h> /* For Visual 2005 */
1268 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
1269 # pragma warning(disable : 4324) /* disable: C4324: padded structure */
1270 #endif
1271
1272
1273 #ifndef MEM_ACCESS_MODULE
1274 #define MEM_ACCESS_MODULE
1275 /********************************************************
1276 * Basic Types
1277 *********************************************************/
1278 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
1279 # if defined(_AIX)
1280 # include <inttypes.h>
1281 # else
1282 # include <stdint.h> /* intptr_t */
1283 # endif
1284 typedef uint8_t BYTE;
1285 typedef uint16_t U16;
1286 typedef int16_t S16;
1287 typedef uint32_t U32;
1288 typedef int32_t S32;
1289 typedef uint64_t U64;
1290 #else
1291 typedef unsigned char BYTE;
1292 typedef unsigned short U16;
1293 typedef signed short S16;
1294 typedef unsigned int U32;
1295 typedef signed int S32;
1296 typedef unsigned long long U64;
1297 #endif
1298
1299 #endif /* MEM_ACCESS_MODULE */
1300
1301
1302 /********************************************************
1303 * Constants
1304 *********************************************************/
1305 static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */
1306
1307 #define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
1308 #define HASH_TABLESIZE (1 << HASH_LOG)
1309 #define HASH_MASK (HASH_TABLESIZE - 1)
1310
1311 #define KNUTH 2654435761
1312
1313 #define BIT7 128
1314 #define BIT6 64
1315 #define BIT5 32
1316 #define BIT4 16
1317
1318 #define KB *(1 <<10)
1319 #define MB *(1 <<20)
1320 #define GB *(1U<<30)
1321
1322 #define BLOCKSIZE (128 KB) /* define, for static allocation */
1323
1324 #define WORKPLACESIZE (BLOCKSIZE*3)
1325 #define MINMATCH 4
1326 #define MLbits 7
1327 #define LLbits 6
1328 #define Offbits 5
1329 #define MaxML ((1<<MLbits )-1)
1330 #define MaxLL ((1<<LLbits )-1)
1331 #define MaxOff ((1<<Offbits)-1)
1332 #define LitFSELog 11
1333 #define MLFSELog 10
1334 #define LLFSELog 10
1335 #define OffFSELog 9
1336 #define MAX(a,b) ((a)<(b)?(b):(a))
1337 #define MaxSeq MAX(MaxLL, MaxML)
1338
1339 #define LITERAL_NOENTROPY 63
1340 #define COMMAND_NOENTROPY 7 /* to remove */
1341
1342 #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
1343
1344 static const size_t ZSTD_blockHeaderSize = 3;
1345 static const size_t ZSTD_frameHeaderSize = 4;
1346
1347
1348 /********************************************************
1349 * Memory operations
1350 *********************************************************/
ZSTD_32bits(void)1351 static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
1352
ZSTD_isLittleEndian(void)1353 static unsigned ZSTD_isLittleEndian(void)
1354 {
1355 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
1356 return one.c[0];
1357 }
1358
ZSTD_read16(const void * p)1359 static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
1360
ZSTD_copy4(void * dst,const void * src)1361 static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
1362
ZSTD_copy8(void * dst,const void * src)1363 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
1364
1365 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
1366
ZSTD_wildcopy(void * dst,const void * src,ptrdiff_t length)1367 static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
1368 {
1369 const BYTE* ip = (const BYTE*)src;
1370 BYTE* op = (BYTE*)dst;
1371 BYTE* const oend = op + length;
1372 while (op < oend) COPY8(op, ip);
1373 }
1374
ZSTD_readLE16(const void * memPtr)1375 static U16 ZSTD_readLE16(const void* memPtr)
1376 {
1377 if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
1378 else
1379 {
1380 const BYTE* p = (const BYTE*)memPtr;
1381 return (U16)((U16)p[0] + ((U16)p[1]<<8));
1382 }
1383 }
1384
ZSTD_readLE24(const void * memPtr)1385 static U32 ZSTD_readLE24(const void* memPtr)
1386 {
1387 return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
1388 }
1389
ZSTD_readBE32(const void * memPtr)1390 static U32 ZSTD_readBE32(const void* memPtr)
1391 {
1392 const BYTE* p = (const BYTE*)memPtr;
1393 return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
1394 }
1395
1396
1397 /**************************************
1398 * Local structures
1399 ***************************************/
1400 typedef struct ZSTD_Cctx_s ZSTD_Cctx;
1401
1402 typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
1403
1404 typedef struct
1405 {
1406 blockType_t blockType;
1407 U32 origSize;
1408 } blockProperties_t;
1409
1410 typedef struct {
1411 void* buffer;
1412 U32* offsetStart;
1413 U32* offset;
1414 BYTE* offCodeStart;
1415 BYTE* offCode;
1416 BYTE* litStart;
1417 BYTE* lit;
1418 BYTE* litLengthStart;
1419 BYTE* litLength;
1420 BYTE* matchLengthStart;
1421 BYTE* matchLength;
1422 BYTE* dumpsStart;
1423 BYTE* dumps;
1424 } seqStore_t;
1425
1426
1427 typedef struct ZSTD_Cctx_s
1428 {
1429 const BYTE* base;
1430 U32 current;
1431 U32 nextUpdate;
1432 seqStore_t seqStore;
1433 #ifdef __AVX2__
1434 __m256i hashTable[HASH_TABLESIZE>>3];
1435 #else
1436 U32 hashTable[HASH_TABLESIZE];
1437 #endif
1438 BYTE buffer[WORKPLACESIZE];
1439 } cctxi_t;
1440
1441
1442
1443
1444 /**************************************
1445 * Error Management
1446 **************************************/
1447 /* published entry point */
ZSTDv01_isError(size_t code)1448 unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
1449
1450
1451 /**************************************
1452 * Tool functions
1453 **************************************/
1454 #define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */
1455 #define ZSTD_VERSION_MINOR 1 /* for new (non-breaking) interface capabilities */
1456 #define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */
1457 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
1458
1459 /**************************************************************
1460 * Decompression code
1461 **************************************************************/
1462
ZSTDv01_getcBlockSize(const void * src,size_t srcSize,blockProperties_t * bpPtr)1463 static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1464 {
1465 const BYTE* const in = (const BYTE* const)src;
1466 BYTE headerFlags;
1467 U32 cSize;
1468
1469 if (srcSize < 3) return ERROR(srcSize_wrong);
1470
1471 headerFlags = *in;
1472 cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1473
1474 bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1475 bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1476
1477 if (bpPtr->blockType == bt_end) return 0;
1478 if (bpPtr->blockType == bt_rle) return 1;
1479 return cSize;
1480 }
1481
1482
ZSTD_copyUncompressedBlock(void * dst,size_t maxDstSize,const void * src,size_t srcSize)1483 static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1484 {
1485 if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
1486 if (srcSize > 0) {
1487 memcpy(dst, src, srcSize);
1488 }
1489 return srcSize;
1490 }
1491
1492
ZSTD_decompressLiterals(void * ctx,void * dst,size_t maxDstSize,const void * src,size_t srcSize)1493 static size_t ZSTD_decompressLiterals(void* ctx,
1494 void* dst, size_t maxDstSize,
1495 const void* src, size_t srcSize)
1496 {
1497 BYTE* op = (BYTE*)dst;
1498 BYTE* const oend = op + maxDstSize;
1499 const BYTE* ip = (const BYTE*)src;
1500 size_t errorCode;
1501 size_t litSize;
1502
1503 /* check : minimum 2, for litSize, +1, for content */
1504 if (srcSize <= 3) return ERROR(corruption_detected);
1505
1506 litSize = ip[1] + (ip[0]<<8);
1507 litSize += ((ip[-3] >> 3) & 7) << 16; /* mmmmh.... */
1508 op = oend - litSize;
1509
1510 (void)ctx;
1511 if (litSize > maxDstSize) return ERROR(dstSize_tooSmall);
1512 errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
1513 if (FSE_isError(errorCode)) return ERROR(GENERIC);
1514 return litSize;
1515 }
1516
1517
ZSTDv01_decodeLiteralsBlock(void * ctx,void * dst,size_t maxDstSize,const BYTE ** litStart,size_t * litSize,const void * src,size_t srcSize)1518 static size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1519 void* dst, size_t maxDstSize,
1520 const BYTE** litStart, size_t* litSize,
1521 const void* src, size_t srcSize)
1522 {
1523 const BYTE* const istart = (const BYTE* const)src;
1524 const BYTE* ip = istart;
1525 BYTE* const ostart = (BYTE* const)dst;
1526 BYTE* const oend = ostart + maxDstSize;
1527 blockProperties_t litbp;
1528
1529 size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp);
1530 if (ZSTDv01_isError(litcSize)) return litcSize;
1531 if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1532 ip += ZSTD_blockHeaderSize;
1533
1534 switch(litbp.blockType)
1535 {
1536 case bt_raw:
1537 *litStart = ip;
1538 ip += litcSize;
1539 *litSize = litcSize;
1540 break;
1541 case bt_rle:
1542 {
1543 size_t rleSize = litbp.origSize;
1544 if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
1545 if (!srcSize) return ERROR(srcSize_wrong);
1546 if (rleSize > 0) {
1547 memset(oend - rleSize, *ip, rleSize);
1548 }
1549 *litStart = oend - rleSize;
1550 *litSize = rleSize;
1551 ip++;
1552 break;
1553 }
1554 case bt_compressed:
1555 {
1556 size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1557 if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize;
1558 *litStart = oend - decodedLitSize;
1559 *litSize = decodedLitSize;
1560 ip += litcSize;
1561 break;
1562 }
1563 case bt_end:
1564 default:
1565 return ERROR(GENERIC);
1566 }
1567
1568 return ip-istart;
1569 }
1570
1571
ZSTDv01_decodeSeqHeaders(int * nbSeq,const BYTE ** dumpsPtr,size_t * dumpsLengthPtr,FSE_DTable * DTableLL,FSE_DTable * DTableML,FSE_DTable * DTableOffb,const void * src,size_t srcSize)1572 static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1573 FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1574 const void* src, size_t srcSize)
1575 {
1576 const BYTE* const istart = (const BYTE* const)src;
1577 const BYTE* ip = istart;
1578 const BYTE* const iend = istart + srcSize;
1579 U32 LLtype, Offtype, MLtype;
1580 U32 LLlog, Offlog, MLlog;
1581 size_t dumpsLength;
1582
1583 /* check */
1584 if (srcSize < 5) return ERROR(srcSize_wrong);
1585
1586 /* SeqHead */
1587 *nbSeq = ZSTD_readLE16(ip); ip+=2;
1588 LLtype = *ip >> 6;
1589 Offtype = (*ip >> 4) & 3;
1590 MLtype = (*ip >> 2) & 3;
1591 if (*ip & 2)
1592 {
1593 dumpsLength = ip[2];
1594 dumpsLength += ip[1] << 8;
1595 ip += 3;
1596 }
1597 else
1598 {
1599 dumpsLength = ip[1];
1600 dumpsLength += (ip[0] & 1) << 8;
1601 ip += 2;
1602 }
1603 *dumpsPtr = ip;
1604 ip += dumpsLength;
1605 *dumpsLengthPtr = dumpsLength;
1606
1607 /* check */
1608 if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
1609
1610 /* sequences */
1611 {
1612 S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */
1613 size_t headerSize;
1614
1615 /* Build DTables */
1616 switch(LLtype)
1617 {
1618 case bt_rle :
1619 LLlog = 0;
1620 FSE_buildDTable_rle(DTableLL, *ip++); break;
1621 case bt_raw :
1622 LLlog = LLbits;
1623 FSE_buildDTable_raw(DTableLL, LLbits); break;
1624 default :
1625 { U32 max = MaxLL;
1626 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
1627 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1628 if (LLlog > LLFSELog) return ERROR(corruption_detected);
1629 ip += headerSize;
1630 FSE_buildDTable(DTableLL, norm, max, LLlog);
1631 } }
1632
1633 switch(Offtype)
1634 {
1635 case bt_rle :
1636 Offlog = 0;
1637 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1638 FSE_buildDTable_rle(DTableOffb, *ip++); break;
1639 case bt_raw :
1640 Offlog = Offbits;
1641 FSE_buildDTable_raw(DTableOffb, Offbits); break;
1642 default :
1643 { U32 max = MaxOff;
1644 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
1645 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1646 if (Offlog > OffFSELog) return ERROR(corruption_detected);
1647 ip += headerSize;
1648 FSE_buildDTable(DTableOffb, norm, max, Offlog);
1649 } }
1650
1651 switch(MLtype)
1652 {
1653 case bt_rle :
1654 MLlog = 0;
1655 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1656 FSE_buildDTable_rle(DTableML, *ip++); break;
1657 case bt_raw :
1658 MLlog = MLbits;
1659 FSE_buildDTable_raw(DTableML, MLbits); break;
1660 default :
1661 { U32 max = MaxML;
1662 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
1663 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1664 if (MLlog > MLFSELog) return ERROR(corruption_detected);
1665 ip += headerSize;
1666 FSE_buildDTable(DTableML, norm, max, MLlog);
1667 } } }
1668
1669 return ip-istart;
1670 }
1671
1672
1673 typedef struct {
1674 size_t litLength;
1675 size_t offset;
1676 size_t matchLength;
1677 } seq_t;
1678
1679 typedef struct {
1680 FSE_DStream_t DStream;
1681 FSE_DState_t stateLL;
1682 FSE_DState_t stateOffb;
1683 FSE_DState_t stateML;
1684 size_t prevOffset;
1685 const BYTE* dumps;
1686 const BYTE* dumpsEnd;
1687 } seqState_t;
1688
1689
ZSTD_decodeSequence(seq_t * seq,seqState_t * seqState)1690 static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1691 {
1692 size_t litLength;
1693 size_t prevOffset;
1694 size_t offset;
1695 size_t matchLength;
1696 const BYTE* dumps = seqState->dumps;
1697 const BYTE* const de = seqState->dumpsEnd;
1698
1699 /* Literal length */
1700 litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1701 prevOffset = litLength ? seq->offset : seqState->prevOffset;
1702 seqState->prevOffset = seq->offset;
1703 if (litLength == MaxLL)
1704 {
1705 const U32 add = dumps<de ? *dumps++ : 0;
1706 if (add < 255) litLength += add;
1707 else
1708 {
1709 if (dumps<=(de-3))
1710 {
1711 litLength = ZSTD_readLE24(dumps);
1712 dumps += 3;
1713 }
1714 }
1715 }
1716
1717 /* Offset */
1718 {
1719 U32 offsetCode, nbBits;
1720 offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1721 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1722 nbBits = offsetCode - 1;
1723 if (offsetCode==0) nbBits = 0; /* cmove */
1724 offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1725 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1726 if (offsetCode==0) offset = prevOffset;
1727 }
1728
1729 /* MatchLength */
1730 matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1731 if (matchLength == MaxML)
1732 {
1733 const U32 add = dumps<de ? *dumps++ : 0;
1734 if (add < 255) matchLength += add;
1735 else
1736 {
1737 if (dumps<=(de-3))
1738 {
1739 matchLength = ZSTD_readLE24(dumps);
1740 dumps += 3;
1741 }
1742 }
1743 }
1744 matchLength += MINMATCH;
1745
1746 /* save result */
1747 seq->litLength = litLength;
1748 seq->offset = offset;
1749 seq->matchLength = matchLength;
1750 seqState->dumps = dumps;
1751 }
1752
1753
ZSTD_execSequence(BYTE * op,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,BYTE * const base,BYTE * const oend)1754 static size_t ZSTD_execSequence(BYTE* op,
1755 seq_t sequence,
1756 const BYTE** litPtr, const BYTE* const litLimit,
1757 BYTE* const base, BYTE* const oend)
1758 {
1759 static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
1760 static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
1761 const BYTE* const ostart = op;
1762 const size_t litLength = sequence.litLength;
1763 BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
1764 const BYTE* const litEnd = *litPtr + litLength;
1765
1766 /* check */
1767 if (endMatch > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
1768 if (litEnd > litLimit) return ERROR(corruption_detected);
1769 if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall); /* overwrite literal segment */
1770
1771 /* copy Literals */
1772 if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
1773 memmove(op, *litPtr, litLength); /* overwrite risk */
1774 else
1775 ZSTD_wildcopy(op, *litPtr, litLength);
1776 op += litLength;
1777 *litPtr = litEnd; /* update for next sequence */
1778
1779 /* check : last match must be at a minimum distance of 8 from end of dest buffer */
1780 if (oend-op < 8) return ERROR(dstSize_tooSmall);
1781
1782 /* copy Match */
1783 {
1784 const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1785 const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */
1786 size_t qutt = 12;
1787 U64 saved[2];
1788
1789 /* check */
1790 if (match < base) return ERROR(corruption_detected);
1791 if (sequence.offset > (size_t)base) return ERROR(corruption_detected);
1792
1793 /* save beginning of literal sequence, in case of write overlap */
1794 if (overlapRisk)
1795 {
1796 if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1797 memcpy(saved, endMatch, qutt);
1798 }
1799
1800 if (sequence.offset < 8)
1801 {
1802 const int dec64 = dec64table[sequence.offset];
1803 op[0] = match[0];
1804 op[1] = match[1];
1805 op[2] = match[2];
1806 op[3] = match[3];
1807 match += dec32table[sequence.offset];
1808 ZSTD_copy4(op+4, match);
1809 match -= dec64;
1810 } else { ZSTD_copy8(op, match); }
1811 op += 8; match += 8;
1812
1813 if (endMatch > oend-(16-MINMATCH))
1814 {
1815 if (op < oend-8)
1816 {
1817 ZSTD_wildcopy(op, match, (oend-8) - op);
1818 match += (oend-8) - op;
1819 op = oend-8;
1820 }
1821 while (op<endMatch) *op++ = *match++;
1822 }
1823 else
1824 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
1825
1826 /* restore, in case of overlap */
1827 if (overlapRisk) memcpy(endMatch, saved, qutt);
1828 }
1829
1830 return endMatch-ostart;
1831 }
1832
1833 typedef struct ZSTDv01_Dctx_s
1834 {
1835 U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1836 U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1837 U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1838 void* previousDstEnd;
1839 void* base;
1840 size_t expected;
1841 blockType_t bType;
1842 U32 phase;
1843 } dctx_t;
1844
1845
ZSTD_decompressSequences(void * ctx,void * dst,size_t maxDstSize,const void * seqStart,size_t seqSize,const BYTE * litStart,size_t litSize)1846 static size_t ZSTD_decompressSequences(
1847 void* ctx,
1848 void* dst, size_t maxDstSize,
1849 const void* seqStart, size_t seqSize,
1850 const BYTE* litStart, size_t litSize)
1851 {
1852 dctx_t* dctx = (dctx_t*)ctx;
1853 const BYTE* ip = (const BYTE*)seqStart;
1854 const BYTE* const iend = ip + seqSize;
1855 BYTE* const ostart = (BYTE* const)dst;
1856 BYTE* op = ostart;
1857 BYTE* const oend = ostart + maxDstSize;
1858 size_t errorCode, dumpsLength;
1859 const BYTE* litPtr = litStart;
1860 const BYTE* const litEnd = litStart + litSize;
1861 int nbSeq;
1862 const BYTE* dumps;
1863 U32* DTableLL = dctx->LLTable;
1864 U32* DTableML = dctx->MLTable;
1865 U32* DTableOffb = dctx->OffTable;
1866 BYTE* const base = (BYTE*) (dctx->base);
1867
1868 /* Build Decoding Tables */
1869 errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
1870 DTableLL, DTableML, DTableOffb,
1871 ip, iend-ip);
1872 if (ZSTDv01_isError(errorCode)) return errorCode;
1873 ip += errorCode;
1874
1875 /* Regen sequences */
1876 {
1877 seq_t sequence;
1878 seqState_t seqState;
1879
1880 memset(&sequence, 0, sizeof(sequence));
1881 seqState.dumps = dumps;
1882 seqState.dumpsEnd = dumps + dumpsLength;
1883 seqState.prevOffset = 1;
1884 errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
1885 if (FSE_isError(errorCode)) return ERROR(corruption_detected);
1886 FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1887 FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1888 FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1889
1890 for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
1891 {
1892 size_t oneSeqSize;
1893 nbSeq--;
1894 ZSTD_decodeSequence(&sequence, &seqState);
1895 oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
1896 if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize;
1897 op += oneSeqSize;
1898 }
1899
1900 /* check if reached exact end */
1901 if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* requested too much : data is corrupted */
1902 if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */
1903
1904 /* last literal segment */
1905 {
1906 size_t lastLLSize = litEnd - litPtr;
1907 if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
1908 if (lastLLSize > 0) {
1909 if (op != litPtr) memmove(op, litPtr, lastLLSize);
1910 op += lastLLSize;
1911 }
1912 }
1913 }
1914
1915 return op-ostart;
1916 }
1917
1918
ZSTD_decompressBlock(void * ctx,void * dst,size_t maxDstSize,const void * src,size_t srcSize)1919 static size_t ZSTD_decompressBlock(
1920 void* ctx,
1921 void* dst, size_t maxDstSize,
1922 const void* src, size_t srcSize)
1923 {
1924 /* blockType == blockCompressed, srcSize is trusted */
1925 const BYTE* ip = (const BYTE*)src;
1926 const BYTE* litPtr = NULL;
1927 size_t litSize = 0;
1928 size_t errorCode;
1929
1930 /* Decode literals sub-block */
1931 errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
1932 if (ZSTDv01_isError(errorCode)) return errorCode;
1933 ip += errorCode;
1934 srcSize -= errorCode;
1935
1936 return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
1937 }
1938
1939
ZSTDv01_decompressDCtx(void * ctx,void * dst,size_t maxDstSize,const void * src,size_t srcSize)1940 size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1941 {
1942 const BYTE* ip = (const BYTE*)src;
1943 const BYTE* iend = ip + srcSize;
1944 BYTE* const ostart = (BYTE* const)dst;
1945 BYTE* op = ostart;
1946 BYTE* const oend = ostart + maxDstSize;
1947 size_t remainingSize = srcSize;
1948 U32 magicNumber;
1949 size_t errorCode=0;
1950 blockProperties_t blockProperties;
1951
1952 /* Frame Header */
1953 if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1954 magicNumber = ZSTD_readBE32(src);
1955 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
1956 ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1957
1958 /* Loop on each block */
1959 while (1)
1960 {
1961 size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties);
1962 if (ZSTDv01_isError(blockSize)) return blockSize;
1963
1964 ip += ZSTD_blockHeaderSize;
1965 remainingSize -= ZSTD_blockHeaderSize;
1966 if (blockSize > remainingSize) return ERROR(srcSize_wrong);
1967
1968 switch(blockProperties.blockType)
1969 {
1970 case bt_compressed:
1971 errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
1972 break;
1973 case bt_raw :
1974 errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1975 break;
1976 case bt_rle :
1977 return ERROR(GENERIC); /* not yet supported */
1978 break;
1979 case bt_end :
1980 /* end of frame */
1981 if (remainingSize) return ERROR(srcSize_wrong);
1982 break;
1983 default:
1984 return ERROR(GENERIC);
1985 }
1986 if (blockSize == 0) break; /* bt_end */
1987
1988 if (ZSTDv01_isError(errorCode)) return errorCode;
1989 op += errorCode;
1990 ip += blockSize;
1991 remainingSize -= blockSize;
1992 }
1993
1994 return op-ostart;
1995 }
1996
ZSTDv01_decompress(void * dst,size_t maxDstSize,const void * src,size_t srcSize)1997 size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1998 {
1999 dctx_t ctx;
2000 ctx.base = dst;
2001 return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
2002 }
2003
2004 /* ZSTD_errorFrameSizeInfoLegacy() :
2005 assumes `cSize` and `dBound` are _not_ NULL */
ZSTD_errorFrameSizeInfoLegacy(size_t * cSize,unsigned long long * dBound,size_t ret)2006 static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
2007 {
2008 *cSize = ret;
2009 *dBound = ZSTD_CONTENTSIZE_ERROR;
2010 }
2011
ZSTDv01_findFrameSizeInfoLegacy(const void * src,size_t srcSize,size_t * cSize,unsigned long long * dBound)2012 void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
2013 {
2014 const BYTE* ip = (const BYTE*)src;
2015 size_t remainingSize = srcSize;
2016 size_t nbBlocks = 0;
2017 U32 magicNumber;
2018 blockProperties_t blockProperties;
2019
2020 /* Frame Header */
2021 if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
2022 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2023 return;
2024 }
2025 magicNumber = ZSTD_readBE32(src);
2026 if (magicNumber != ZSTD_magicNumber) {
2027 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
2028 return;
2029 }
2030 ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
2031
2032 /* Loop on each block */
2033 while (1)
2034 {
2035 size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2036 if (ZSTDv01_isError(blockSize)) {
2037 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
2038 return;
2039 }
2040
2041 ip += ZSTD_blockHeaderSize;
2042 remainingSize -= ZSTD_blockHeaderSize;
2043 if (blockSize > remainingSize) {
2044 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2045 return;
2046 }
2047
2048 if (blockSize == 0) break; /* bt_end */
2049
2050 ip += blockSize;
2051 remainingSize -= blockSize;
2052 nbBlocks++;
2053 }
2054
2055 *cSize = ip - (const BYTE*)src;
2056 *dBound = nbBlocks * BLOCKSIZE;
2057 }
2058
2059 /*******************************
2060 * Streaming Decompression API
2061 *******************************/
2062
ZSTDv01_resetDCtx(ZSTDv01_Dctx * dctx)2063 size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
2064 {
2065 dctx->expected = ZSTD_frameHeaderSize;
2066 dctx->phase = 0;
2067 dctx->previousDstEnd = NULL;
2068 dctx->base = NULL;
2069 return 0;
2070 }
2071
ZSTDv01_createDCtx(void)2072 ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
2073 {
2074 ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
2075 if (dctx==NULL) return NULL;
2076 ZSTDv01_resetDCtx(dctx);
2077 return dctx;
2078 }
2079
ZSTDv01_freeDCtx(ZSTDv01_Dctx * dctx)2080 size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
2081 {
2082 free(dctx);
2083 return 0;
2084 }
2085
ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx * dctx)2086 size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
2087 {
2088 return ((dctx_t*)dctx)->expected;
2089 }
2090
ZSTDv01_decompressContinue(ZSTDv01_Dctx * dctx,void * dst,size_t maxDstSize,const void * src,size_t srcSize)2091 size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2092 {
2093 dctx_t* ctx = (dctx_t*)dctx;
2094
2095 /* Sanity check */
2096 if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
2097 if (dst != ctx->previousDstEnd) /* not contiguous */
2098 ctx->base = dst;
2099
2100 /* Decompress : frame header */
2101 if (ctx->phase == 0)
2102 {
2103 /* Check frame magic header */
2104 U32 magicNumber = ZSTD_readBE32(src);
2105 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2106 ctx->phase = 1;
2107 ctx->expected = ZSTD_blockHeaderSize;
2108 return 0;
2109 }
2110
2111 /* Decompress : block header */
2112 if (ctx->phase == 1)
2113 {
2114 blockProperties_t bp;
2115 size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
2116 if (ZSTDv01_isError(blockSize)) return blockSize;
2117 if (bp.blockType == bt_end)
2118 {
2119 ctx->expected = 0;
2120 ctx->phase = 0;
2121 }
2122 else
2123 {
2124 ctx->expected = blockSize;
2125 ctx->bType = bp.blockType;
2126 ctx->phase = 2;
2127 }
2128
2129 return 0;
2130 }
2131
2132 /* Decompress : block content */
2133 {
2134 size_t rSize;
2135 switch(ctx->bType)
2136 {
2137 case bt_compressed:
2138 rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
2139 break;
2140 case bt_raw :
2141 rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
2142 break;
2143 case bt_rle :
2144 return ERROR(GENERIC); /* not yet handled */
2145 break;
2146 case bt_end : /* should never happen (filtered at phase 1) */
2147 rSize = 0;
2148 break;
2149 default:
2150 return ERROR(GENERIC);
2151 }
2152 ctx->phase = 1;
2153 ctx->expected = ZSTD_blockHeaderSize;
2154 ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
2155 return rSize;
2156 }
2157
2158 }
2159