10c16b537SWarner Losh /* 20c16b537SWarner Losh * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 30c16b537SWarner Losh * All rights reserved. 40c16b537SWarner Losh * 50c16b537SWarner Losh * This source code is licensed under both the BSD-style license (found in the 60c16b537SWarner Losh * LICENSE file in the root directory of this source tree) and the GPLv2 (found 70c16b537SWarner Losh * in the COPYING file in the root directory of this source tree). 80c16b537SWarner Losh * You may select, at your option, one of the above-listed licenses. 90c16b537SWarner Losh */ 100c16b537SWarner Losh 110c16b537SWarner Losh 120c16b537SWarner Losh /****************************************** 130c16b537SWarner Losh * Includes 140c16b537SWarner Losh ******************************************/ 150c16b537SWarner Losh #include <stddef.h> /* size_t, ptrdiff_t */ 160c16b537SWarner Losh #include "zstd_v01.h" 170c16b537SWarner Losh #include "error_private.h" 180c16b537SWarner Losh 190c16b537SWarner Losh 200c16b537SWarner Losh /****************************************** 210c16b537SWarner Losh * Static allocation 220c16b537SWarner Losh ******************************************/ 230c16b537SWarner Losh /* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ 240c16b537SWarner Losh #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog)) 250c16b537SWarner Losh 260c16b537SWarner Losh /* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */ 270c16b537SWarner Losh #define HUF_DTABLE_SIZE_U16(maxTableLog) (1 + (1<<maxTableLog)) 280c16b537SWarner Losh #define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \ 290c16b537SWarner Losh unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog } 300c16b537SWarner Losh 310c16b537SWarner Losh 320c16b537SWarner Losh /****************************************** 330c16b537SWarner Losh * Error Management 340c16b537SWarner Losh ******************************************/ 350c16b537SWarner Losh #define FSE_LIST_ERRORS(ITEM) \ 360c16b537SWarner Losh ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \ 370c16b537SWarner Losh ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \ 380c16b537SWarner Losh ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\ 390c16b537SWarner Losh ITEM(FSE_ERROR_corruptionDetected) \ 400c16b537SWarner Losh ITEM(FSE_ERROR_maxCode) 410c16b537SWarner Losh 420c16b537SWarner Losh #define FSE_GENERATE_ENUM(ENUM) ENUM, 430c16b537SWarner Losh typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ 440c16b537SWarner Losh 450c16b537SWarner Losh 460c16b537SWarner Losh /****************************************** 470c16b537SWarner Losh * FSE symbol compression API 480c16b537SWarner Losh ******************************************/ 490c16b537SWarner Losh /* 500c16b537SWarner Losh This API consists of small unitary functions, which highly benefit from being inlined. 510c16b537SWarner Losh You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary. 520c16b537SWarner Losh Visual seems to do it automatically. 530c16b537SWarner Losh For gcc or clang, you'll need to add -flto flag at compilation and linking stages. 540c16b537SWarner Losh If none of these solutions is applicable, include "fse.c" directly. 550c16b537SWarner Losh */ 560c16b537SWarner Losh 570c16b537SWarner Losh typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */ 580c16b537SWarner Losh typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ 590c16b537SWarner Losh 600c16b537SWarner Losh typedef struct 610c16b537SWarner Losh { 620c16b537SWarner Losh size_t bitContainer; 630c16b537SWarner Losh int bitPos; 640c16b537SWarner Losh char* startPtr; 650c16b537SWarner Losh char* ptr; 660c16b537SWarner Losh char* endPtr; 670c16b537SWarner Losh } FSE_CStream_t; 680c16b537SWarner Losh 690c16b537SWarner Losh typedef struct 700c16b537SWarner Losh { 710c16b537SWarner Losh ptrdiff_t value; 720c16b537SWarner Losh const void* stateTable; 730c16b537SWarner Losh const void* symbolTT; 740c16b537SWarner Losh unsigned stateLog; 750c16b537SWarner Losh } FSE_CState_t; 760c16b537SWarner Losh 770c16b537SWarner Losh typedef struct 780c16b537SWarner Losh { 790c16b537SWarner Losh size_t bitContainer; 800c16b537SWarner Losh unsigned bitsConsumed; 810c16b537SWarner Losh const char* ptr; 820c16b537SWarner Losh const char* start; 830c16b537SWarner Losh } FSE_DStream_t; 840c16b537SWarner Losh 850c16b537SWarner Losh typedef struct 860c16b537SWarner Losh { 870c16b537SWarner Losh size_t state; 880c16b537SWarner Losh const void* table; /* precise table may vary, depending on U16 */ 890c16b537SWarner Losh } FSE_DState_t; 900c16b537SWarner Losh 910c16b537SWarner Losh typedef enum { FSE_DStream_unfinished = 0, 920c16b537SWarner Losh FSE_DStream_endOfBuffer = 1, 930c16b537SWarner Losh FSE_DStream_completed = 2, 940c16b537SWarner Losh FSE_DStream_tooFar = 3 } FSE_DStream_status; /* result of FSE_reloadDStream() */ 950c16b537SWarner Losh /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */ 960c16b537SWarner Losh 970c16b537SWarner Losh 980c16b537SWarner Losh /**************************************************************** 990c16b537SWarner Losh * Tuning parameters 1000c16b537SWarner Losh ****************************************************************/ 1010c16b537SWarner Losh /* MEMORY_USAGE : 1020c16b537SWarner Losh * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 1030c16b537SWarner Losh * Increasing memory usage improves compression ratio 1040c16b537SWarner Losh * Reduced memory usage can improve speed, due to cache effect 1050c16b537SWarner Losh * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ 1060c16b537SWarner Losh #define FSE_MAX_MEMORY_USAGE 14 1070c16b537SWarner Losh #define FSE_DEFAULT_MEMORY_USAGE 13 1080c16b537SWarner Losh 1090c16b537SWarner Losh /* FSE_MAX_SYMBOL_VALUE : 1100c16b537SWarner Losh * Maximum symbol value authorized. 1110c16b537SWarner Losh * Required for proper stack allocation */ 1120c16b537SWarner Losh #define FSE_MAX_SYMBOL_VALUE 255 1130c16b537SWarner Losh 1140c16b537SWarner Losh 1150c16b537SWarner Losh /**************************************************************** 1160c16b537SWarner Losh * template functions type & suffix 1170c16b537SWarner Losh ****************************************************************/ 1180c16b537SWarner Losh #define FSE_FUNCTION_TYPE BYTE 1190c16b537SWarner Losh #define FSE_FUNCTION_EXTENSION 1200c16b537SWarner Losh 1210c16b537SWarner Losh 1220c16b537SWarner Losh /**************************************************************** 1230c16b537SWarner Losh * Byte symbol type 1240c16b537SWarner Losh ****************************************************************/ 1250c16b537SWarner Losh typedef struct 1260c16b537SWarner Losh { 1270c16b537SWarner Losh unsigned short newState; 1280c16b537SWarner Losh unsigned char symbol; 1290c16b537SWarner Losh unsigned char nbBits; 1300c16b537SWarner Losh } FSE_decode_t; /* size == U32 */ 1310c16b537SWarner Losh 1320c16b537SWarner Losh 1330c16b537SWarner Losh 1340c16b537SWarner Losh /**************************************************************** 1350c16b537SWarner Losh * Compiler specifics 1360c16b537SWarner Losh ****************************************************************/ 1370c16b537SWarner Losh #ifdef _MSC_VER /* Visual Studio */ 1380c16b537SWarner Losh # define FORCE_INLINE static __forceinline 1390c16b537SWarner Losh # include <intrin.h> /* For Visual 2005 */ 1400c16b537SWarner Losh # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 1410c16b537SWarner Losh # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ 1420c16b537SWarner Losh #else 1430c16b537SWarner Losh # define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 1440c16b537SWarner Losh # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ 1450c16b537SWarner Losh # ifdef __GNUC__ 1460c16b537SWarner Losh # define FORCE_INLINE static inline __attribute__((always_inline)) 1470c16b537SWarner Losh # else 1480c16b537SWarner Losh # define FORCE_INLINE static inline 1490c16b537SWarner Losh # endif 1500c16b537SWarner Losh # else 1510c16b537SWarner Losh # define FORCE_INLINE static 1520c16b537SWarner Losh # endif /* __STDC_VERSION__ */ 1530c16b537SWarner Losh #endif 1540c16b537SWarner Losh 1550c16b537SWarner Losh 1560c16b537SWarner Losh /**************************************************************** 1570c16b537SWarner Losh * Includes 1580c16b537SWarner Losh ****************************************************************/ 1590c16b537SWarner Losh #include <stdlib.h> /* malloc, free, qsort */ 1600c16b537SWarner Losh #include <string.h> /* memcpy, memset */ 1610c16b537SWarner Losh #include <stdio.h> /* printf (debug) */ 1620c16b537SWarner Losh 1630c16b537SWarner Losh 1640c16b537SWarner Losh #ifndef MEM_ACCESS_MODULE 1650c16b537SWarner Losh #define MEM_ACCESS_MODULE 1660c16b537SWarner Losh /**************************************************************** 1670c16b537SWarner Losh * Basic Types 1680c16b537SWarner Losh *****************************************************************/ 1690c16b537SWarner Losh #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ 1700c16b537SWarner Losh # include <stdint.h> 1710c16b537SWarner Losh typedef uint8_t BYTE; 1720c16b537SWarner Losh typedef uint16_t U16; 1730c16b537SWarner Losh typedef int16_t S16; 1740c16b537SWarner Losh typedef uint32_t U32; 1750c16b537SWarner Losh typedef int32_t S32; 1760c16b537SWarner Losh typedef uint64_t U64; 1770c16b537SWarner Losh typedef int64_t S64; 1780c16b537SWarner Losh #else 1790c16b537SWarner Losh typedef unsigned char BYTE; 1800c16b537SWarner Losh typedef unsigned short U16; 1810c16b537SWarner Losh typedef signed short S16; 1820c16b537SWarner Losh typedef unsigned int U32; 1830c16b537SWarner Losh typedef signed int S32; 1840c16b537SWarner Losh typedef unsigned long long U64; 1850c16b537SWarner Losh typedef signed long long S64; 1860c16b537SWarner Losh #endif 1870c16b537SWarner Losh 1880c16b537SWarner Losh #endif /* MEM_ACCESS_MODULE */ 1890c16b537SWarner Losh 1900c16b537SWarner Losh /**************************************************************** 1910c16b537SWarner Losh * Memory I/O 1920c16b537SWarner Losh *****************************************************************/ 1930c16b537SWarner Losh /* FSE_FORCE_MEMORY_ACCESS 1940c16b537SWarner Losh * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. 1950c16b537SWarner Losh * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. 1960c16b537SWarner Losh * The below switch allow to select different access method for improved performance. 1970c16b537SWarner Losh * Method 0 (default) : use `memcpy()`. Safe and portable. 1980c16b537SWarner Losh * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). 1990c16b537SWarner Losh * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. 2000c16b537SWarner Losh * Method 2 : direct access. This method is portable but violate C standard. 2010c16b537SWarner Losh * It can generate buggy code on targets generating assembly depending on alignment. 2020c16b537SWarner Losh * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) 2030c16b537SWarner Losh * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. 2040c16b537SWarner Losh * Prefer these methods in priority order (0 > 1 > 2) 2050c16b537SWarner Losh */ 2060c16b537SWarner Losh #ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ 2070c16b537SWarner Losh # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) 2080c16b537SWarner Losh # define FSE_FORCE_MEMORY_ACCESS 2 2090c16b537SWarner Losh # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ 2100c16b537SWarner Losh (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) 2110c16b537SWarner Losh # define FSE_FORCE_MEMORY_ACCESS 1 2120c16b537SWarner Losh # endif 2130c16b537SWarner Losh #endif 2140c16b537SWarner Losh 2150c16b537SWarner Losh 2160c16b537SWarner Losh static unsigned FSE_32bits(void) 2170c16b537SWarner Losh { 2180c16b537SWarner Losh return sizeof(void*)==4; 2190c16b537SWarner Losh } 2200c16b537SWarner Losh 2210c16b537SWarner Losh static unsigned FSE_isLittleEndian(void) 2220c16b537SWarner Losh { 2230c16b537SWarner Losh const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ 2240c16b537SWarner Losh return one.c[0]; 2250c16b537SWarner Losh } 2260c16b537SWarner Losh 2270c16b537SWarner Losh #if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2) 2280c16b537SWarner Losh 2290c16b537SWarner Losh static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; } 2300c16b537SWarner Losh static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; } 2310c16b537SWarner Losh static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; } 2320c16b537SWarner Losh 2330c16b537SWarner Losh #elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1) 2340c16b537SWarner Losh 2350c16b537SWarner Losh /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ 2360c16b537SWarner Losh /* currently only defined for gcc and icc */ 2370c16b537SWarner Losh typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; 2380c16b537SWarner Losh 2390c16b537SWarner Losh static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } 2400c16b537SWarner Losh static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } 2410c16b537SWarner Losh static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } 2420c16b537SWarner Losh 2430c16b537SWarner Losh #else 2440c16b537SWarner Losh 2450c16b537SWarner Losh static U16 FSE_read16(const void* memPtr) 2460c16b537SWarner Losh { 2470c16b537SWarner Losh U16 val; memcpy(&val, memPtr, sizeof(val)); return val; 2480c16b537SWarner Losh } 2490c16b537SWarner Losh 2500c16b537SWarner Losh static U32 FSE_read32(const void* memPtr) 2510c16b537SWarner Losh { 2520c16b537SWarner Losh U32 val; memcpy(&val, memPtr, sizeof(val)); return val; 2530c16b537SWarner Losh } 2540c16b537SWarner Losh 2550c16b537SWarner Losh static U64 FSE_read64(const void* memPtr) 2560c16b537SWarner Losh { 2570c16b537SWarner Losh U64 val; memcpy(&val, memPtr, sizeof(val)); return val; 2580c16b537SWarner Losh } 2590c16b537SWarner Losh 2600c16b537SWarner Losh #endif // FSE_FORCE_MEMORY_ACCESS 2610c16b537SWarner Losh 2620c16b537SWarner Losh static U16 FSE_readLE16(const void* memPtr) 2630c16b537SWarner Losh { 2640c16b537SWarner Losh if (FSE_isLittleEndian()) 2650c16b537SWarner Losh return FSE_read16(memPtr); 2660c16b537SWarner Losh else 2670c16b537SWarner Losh { 2680c16b537SWarner Losh const BYTE* p = (const BYTE*)memPtr; 2690c16b537SWarner Losh return (U16)(p[0] + (p[1]<<8)); 2700c16b537SWarner Losh } 2710c16b537SWarner Losh } 2720c16b537SWarner Losh 2730c16b537SWarner Losh static U32 FSE_readLE32(const void* memPtr) 2740c16b537SWarner Losh { 2750c16b537SWarner Losh if (FSE_isLittleEndian()) 2760c16b537SWarner Losh return FSE_read32(memPtr); 2770c16b537SWarner Losh else 2780c16b537SWarner Losh { 2790c16b537SWarner Losh const BYTE* p = (const BYTE*)memPtr; 2800c16b537SWarner Losh return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); 2810c16b537SWarner Losh } 2820c16b537SWarner Losh } 2830c16b537SWarner Losh 2840c16b537SWarner Losh 2850c16b537SWarner Losh static U64 FSE_readLE64(const void* memPtr) 2860c16b537SWarner Losh { 2870c16b537SWarner Losh if (FSE_isLittleEndian()) 2880c16b537SWarner Losh return FSE_read64(memPtr); 2890c16b537SWarner Losh else 2900c16b537SWarner Losh { 2910c16b537SWarner Losh const BYTE* p = (const BYTE*)memPtr; 2920c16b537SWarner Losh return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) 2930c16b537SWarner Losh + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); 2940c16b537SWarner Losh } 2950c16b537SWarner Losh } 2960c16b537SWarner Losh 2970c16b537SWarner Losh static size_t FSE_readLEST(const void* memPtr) 2980c16b537SWarner Losh { 2990c16b537SWarner Losh if (FSE_32bits()) 3000c16b537SWarner Losh return (size_t)FSE_readLE32(memPtr); 3010c16b537SWarner Losh else 3020c16b537SWarner Losh return (size_t)FSE_readLE64(memPtr); 3030c16b537SWarner Losh } 3040c16b537SWarner Losh 3050c16b537SWarner Losh 3060c16b537SWarner Losh 3070c16b537SWarner Losh /**************************************************************** 3080c16b537SWarner Losh * Constants 3090c16b537SWarner Losh *****************************************************************/ 3100c16b537SWarner Losh #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) 3110c16b537SWarner Losh #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG) 3120c16b537SWarner Losh #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1) 3130c16b537SWarner Losh #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2) 3140c16b537SWarner Losh #define FSE_MIN_TABLELOG 5 3150c16b537SWarner Losh 3160c16b537SWarner Losh #define FSE_TABLELOG_ABSOLUTE_MAX 15 3170c16b537SWarner Losh #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX 3180c16b537SWarner Losh #error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" 3190c16b537SWarner Losh #endif 3200c16b537SWarner Losh 3210c16b537SWarner Losh 3220c16b537SWarner Losh /**************************************************************** 3230c16b537SWarner Losh * Error Management 3240c16b537SWarner Losh ****************************************************************/ 3250c16b537SWarner Losh #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ 3260c16b537SWarner Losh 3270c16b537SWarner Losh 3280c16b537SWarner Losh /**************************************************************** 3290c16b537SWarner Losh * Complex types 3300c16b537SWarner Losh ****************************************************************/ 3310c16b537SWarner Losh typedef struct 3320c16b537SWarner Losh { 3330c16b537SWarner Losh int deltaFindState; 3340c16b537SWarner Losh U32 deltaNbBits; 3350c16b537SWarner Losh } FSE_symbolCompressionTransform; /* total 8 bytes */ 3360c16b537SWarner Losh 3370c16b537SWarner Losh typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; 3380c16b537SWarner Losh 3390c16b537SWarner Losh /**************************************************************** 3400c16b537SWarner Losh * Internal functions 3410c16b537SWarner Losh ****************************************************************/ 342052d3c12SConrad Meyer FORCE_INLINE unsigned FSE_highbit32 (U32 val) 3430c16b537SWarner Losh { 3440c16b537SWarner Losh # if defined(_MSC_VER) /* Visual */ 3450c16b537SWarner Losh unsigned long r; 3460c16b537SWarner Losh _BitScanReverse ( &r, val ); 3470c16b537SWarner Losh return (unsigned) r; 3480c16b537SWarner Losh # elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */ 3490c16b537SWarner Losh return 31 - __builtin_clz (val); 3500c16b537SWarner Losh # else /* Software version */ 3510c16b537SWarner Losh static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 3520c16b537SWarner Losh U32 v = val; 3530c16b537SWarner Losh unsigned r; 3540c16b537SWarner Losh v |= v >> 1; 3550c16b537SWarner Losh v |= v >> 2; 3560c16b537SWarner Losh v |= v >> 4; 3570c16b537SWarner Losh v |= v >> 8; 3580c16b537SWarner Losh v |= v >> 16; 3590c16b537SWarner Losh r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; 3600c16b537SWarner Losh return r; 3610c16b537SWarner Losh # endif 3620c16b537SWarner Losh } 3630c16b537SWarner Losh 3640c16b537SWarner Losh 3650c16b537SWarner Losh /**************************************************************** 3660c16b537SWarner Losh * Templates 3670c16b537SWarner Losh ****************************************************************/ 3680c16b537SWarner Losh /* 3690c16b537SWarner Losh designed to be included 3700c16b537SWarner Losh for type-specific functions (template emulation in C) 3710c16b537SWarner Losh Objective is to write these functions only once, for improved maintenance 3720c16b537SWarner Losh */ 3730c16b537SWarner Losh 3740c16b537SWarner Losh /* safety checks */ 3750c16b537SWarner Losh #ifndef FSE_FUNCTION_EXTENSION 3760c16b537SWarner Losh # error "FSE_FUNCTION_EXTENSION must be defined" 3770c16b537SWarner Losh #endif 3780c16b537SWarner Losh #ifndef FSE_FUNCTION_TYPE 3790c16b537SWarner Losh # error "FSE_FUNCTION_TYPE must be defined" 3800c16b537SWarner Losh #endif 3810c16b537SWarner Losh 3820c16b537SWarner Losh /* Function names */ 3830c16b537SWarner Losh #define FSE_CAT(X,Y) X##Y 3840c16b537SWarner Losh #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) 3850c16b537SWarner Losh #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) 3860c16b537SWarner Losh 3870c16b537SWarner Losh 3880c16b537SWarner Losh 3890c16b537SWarner Losh static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } 3900c16b537SWarner Losh 3910c16b537SWarner Losh #define FSE_DECODE_TYPE FSE_decode_t 3920c16b537SWarner Losh 3930c16b537SWarner Losh 3940c16b537SWarner Losh typedef struct { 3950c16b537SWarner Losh U16 tableLog; 3960c16b537SWarner Losh U16 fastMode; 3970c16b537SWarner Losh } FSE_DTableHeader; /* sizeof U32 */ 3980c16b537SWarner Losh 3990c16b537SWarner Losh static size_t FSE_buildDTable 4000c16b537SWarner Losh (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 4010c16b537SWarner Losh { 4020c16b537SWarner Losh void* ptr = dt; 4030c16b537SWarner Losh FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 4040c16b537SWarner Losh FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1; /* because dt is unsigned, 32-bits aligned on 32-bits */ 4050c16b537SWarner Losh const U32 tableSize = 1 << tableLog; 4060c16b537SWarner Losh const U32 tableMask = tableSize-1; 4070c16b537SWarner Losh const U32 step = FSE_tableStep(tableSize); 4080c16b537SWarner Losh U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; 4090c16b537SWarner Losh U32 position = 0; 4100c16b537SWarner Losh U32 highThreshold = tableSize-1; 4110c16b537SWarner Losh const S16 largeLimit= (S16)(1 << (tableLog-1)); 4120c16b537SWarner Losh U32 noLarge = 1; 4130c16b537SWarner Losh U32 s; 4140c16b537SWarner Losh 4150c16b537SWarner Losh /* Sanity Checks */ 4160c16b537SWarner Losh if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge; 4170c16b537SWarner Losh if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge; 4180c16b537SWarner Losh 4190c16b537SWarner Losh /* Init, lay down lowprob symbols */ 4200c16b537SWarner Losh DTableH[0].tableLog = (U16)tableLog; 4210c16b537SWarner Losh for (s=0; s<=maxSymbolValue; s++) 4220c16b537SWarner Losh { 4230c16b537SWarner Losh if (normalizedCounter[s]==-1) 4240c16b537SWarner Losh { 4250c16b537SWarner Losh tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; 4260c16b537SWarner Losh symbolNext[s] = 1; 4270c16b537SWarner Losh } 4280c16b537SWarner Losh else 4290c16b537SWarner Losh { 4300c16b537SWarner Losh if (normalizedCounter[s] >= largeLimit) noLarge=0; 4310c16b537SWarner Losh symbolNext[s] = normalizedCounter[s]; 4320c16b537SWarner Losh } 4330c16b537SWarner Losh } 4340c16b537SWarner Losh 4350c16b537SWarner Losh /* Spread symbols */ 4360c16b537SWarner Losh for (s=0; s<=maxSymbolValue; s++) 4370c16b537SWarner Losh { 4380c16b537SWarner Losh int i; 4390c16b537SWarner Losh for (i=0; i<normalizedCounter[s]; i++) 4400c16b537SWarner Losh { 4410c16b537SWarner Losh tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; 4420c16b537SWarner Losh position = (position + step) & tableMask; 4430c16b537SWarner Losh while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */ 4440c16b537SWarner Losh } 4450c16b537SWarner Losh } 4460c16b537SWarner Losh 4470c16b537SWarner Losh if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */ 4480c16b537SWarner Losh 4490c16b537SWarner Losh /* Build Decoding table */ 4500c16b537SWarner Losh { 4510c16b537SWarner Losh U32 i; 4520c16b537SWarner Losh for (i=0; i<tableSize; i++) 4530c16b537SWarner Losh { 4540c16b537SWarner Losh FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol); 4550c16b537SWarner Losh U16 nextState = symbolNext[symbol]++; 4560c16b537SWarner Losh tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) ); 4570c16b537SWarner Losh tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize); 4580c16b537SWarner Losh } 4590c16b537SWarner Losh } 4600c16b537SWarner Losh 4610c16b537SWarner Losh DTableH->fastMode = (U16)noLarge; 4620c16b537SWarner Losh return 0; 4630c16b537SWarner Losh } 4640c16b537SWarner Losh 4650c16b537SWarner Losh 4660c16b537SWarner Losh /****************************************** 4670c16b537SWarner Losh * FSE byte symbol 4680c16b537SWarner Losh ******************************************/ 4690c16b537SWarner Losh #ifndef FSE_COMMONDEFS_ONLY 4700c16b537SWarner Losh 4710c16b537SWarner Losh static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); } 4720c16b537SWarner Losh 4730c16b537SWarner Losh static short FSE_abs(short a) 4740c16b537SWarner Losh { 4750c16b537SWarner Losh return a<0? -a : a; 4760c16b537SWarner Losh } 4770c16b537SWarner Losh 4780c16b537SWarner Losh 4790c16b537SWarner Losh /**************************************************************** 4800c16b537SWarner Losh * Header bitstream management 4810c16b537SWarner Losh ****************************************************************/ 4820c16b537SWarner Losh static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, 4830c16b537SWarner Losh const void* headerBuffer, size_t hbSize) 4840c16b537SWarner Losh { 4850c16b537SWarner Losh const BYTE* const istart = (const BYTE*) headerBuffer; 4860c16b537SWarner Losh const BYTE* const iend = istart + hbSize; 4870c16b537SWarner Losh const BYTE* ip = istart; 4880c16b537SWarner Losh int nbBits; 4890c16b537SWarner Losh int remaining; 4900c16b537SWarner Losh int threshold; 4910c16b537SWarner Losh U32 bitStream; 4920c16b537SWarner Losh int bitCount; 4930c16b537SWarner Losh unsigned charnum = 0; 4940c16b537SWarner Losh int previous0 = 0; 4950c16b537SWarner Losh 4960c16b537SWarner Losh if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong; 4970c16b537SWarner Losh bitStream = FSE_readLE32(ip); 4980c16b537SWarner Losh nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ 4990c16b537SWarner Losh if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge; 5000c16b537SWarner Losh bitStream >>= 4; 5010c16b537SWarner Losh bitCount = 4; 5020c16b537SWarner Losh *tableLogPtr = nbBits; 5030c16b537SWarner Losh remaining = (1<<nbBits)+1; 5040c16b537SWarner Losh threshold = 1<<nbBits; 5050c16b537SWarner Losh nbBits++; 5060c16b537SWarner Losh 5070c16b537SWarner Losh while ((remaining>1) && (charnum<=*maxSVPtr)) 5080c16b537SWarner Losh { 5090c16b537SWarner Losh if (previous0) 5100c16b537SWarner Losh { 5110c16b537SWarner Losh unsigned n0 = charnum; 5120c16b537SWarner Losh while ((bitStream & 0xFFFF) == 0xFFFF) 5130c16b537SWarner Losh { 5140c16b537SWarner Losh n0+=24; 5150c16b537SWarner Losh if (ip < iend-5) 5160c16b537SWarner Losh { 5170c16b537SWarner Losh ip+=2; 5180c16b537SWarner Losh bitStream = FSE_readLE32(ip) >> bitCount; 5190c16b537SWarner Losh } 5200c16b537SWarner Losh else 5210c16b537SWarner Losh { 5220c16b537SWarner Losh bitStream >>= 16; 5230c16b537SWarner Losh bitCount+=16; 5240c16b537SWarner Losh } 5250c16b537SWarner Losh } 5260c16b537SWarner Losh while ((bitStream & 3) == 3) 5270c16b537SWarner Losh { 5280c16b537SWarner Losh n0+=3; 5290c16b537SWarner Losh bitStream>>=2; 5300c16b537SWarner Losh bitCount+=2; 5310c16b537SWarner Losh } 5320c16b537SWarner Losh n0 += bitStream & 3; 5330c16b537SWarner Losh bitCount += 2; 5340c16b537SWarner Losh if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall; 5350c16b537SWarner Losh while (charnum < n0) normalizedCounter[charnum++] = 0; 5360c16b537SWarner Losh if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 5370c16b537SWarner Losh { 5380c16b537SWarner Losh ip += bitCount>>3; 5390c16b537SWarner Losh bitCount &= 7; 5400c16b537SWarner Losh bitStream = FSE_readLE32(ip) >> bitCount; 5410c16b537SWarner Losh } 5420c16b537SWarner Losh else 5430c16b537SWarner Losh bitStream >>= 2; 5440c16b537SWarner Losh } 5450c16b537SWarner Losh { 5460c16b537SWarner Losh const short max = (short)((2*threshold-1)-remaining); 5470c16b537SWarner Losh short count; 5480c16b537SWarner Losh 5490c16b537SWarner Losh if ((bitStream & (threshold-1)) < (U32)max) 5500c16b537SWarner Losh { 5510c16b537SWarner Losh count = (short)(bitStream & (threshold-1)); 5520c16b537SWarner Losh bitCount += nbBits-1; 5530c16b537SWarner Losh } 5540c16b537SWarner Losh else 5550c16b537SWarner Losh { 5560c16b537SWarner Losh count = (short)(bitStream & (2*threshold-1)); 5570c16b537SWarner Losh if (count >= threshold) count -= max; 5580c16b537SWarner Losh bitCount += nbBits; 5590c16b537SWarner Losh } 5600c16b537SWarner Losh 5610c16b537SWarner Losh count--; /* extra accuracy */ 5620c16b537SWarner Losh remaining -= FSE_abs(count); 5630c16b537SWarner Losh normalizedCounter[charnum++] = count; 5640c16b537SWarner Losh previous0 = !count; 5650c16b537SWarner Losh while (remaining < threshold) 5660c16b537SWarner Losh { 5670c16b537SWarner Losh nbBits--; 5680c16b537SWarner Losh threshold >>= 1; 5690c16b537SWarner Losh } 5700c16b537SWarner Losh 5710c16b537SWarner Losh { 5720c16b537SWarner Losh if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 5730c16b537SWarner Losh { 5740c16b537SWarner Losh ip += bitCount>>3; 5750c16b537SWarner Losh bitCount &= 7; 5760c16b537SWarner Losh } 5770c16b537SWarner Losh else 5780c16b537SWarner Losh { 5790c16b537SWarner Losh bitCount -= (int)(8 * (iend - 4 - ip)); 5800c16b537SWarner Losh ip = iend - 4; 5810c16b537SWarner Losh } 5820c16b537SWarner Losh bitStream = FSE_readLE32(ip) >> (bitCount & 31); 5830c16b537SWarner Losh } 5840c16b537SWarner Losh } 5850c16b537SWarner Losh } 5860c16b537SWarner Losh if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC; 5870c16b537SWarner Losh *maxSVPtr = charnum-1; 5880c16b537SWarner Losh 5890c16b537SWarner Losh ip += (bitCount+7)>>3; 5900c16b537SWarner Losh if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; 5910c16b537SWarner Losh return ip-istart; 5920c16b537SWarner Losh } 5930c16b537SWarner Losh 5940c16b537SWarner Losh 5950c16b537SWarner Losh /********************************************************* 5960c16b537SWarner Losh * Decompression (Byte symbols) 5970c16b537SWarner Losh *********************************************************/ 5980c16b537SWarner Losh static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) 5990c16b537SWarner Losh { 6000c16b537SWarner Losh void* ptr = dt; 6010c16b537SWarner Losh FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 6020c16b537SWarner Losh FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ 6030c16b537SWarner Losh 6040c16b537SWarner Losh DTableH->tableLog = 0; 6050c16b537SWarner Losh DTableH->fastMode = 0; 6060c16b537SWarner Losh 6070c16b537SWarner Losh cell->newState = 0; 6080c16b537SWarner Losh cell->symbol = symbolValue; 6090c16b537SWarner Losh cell->nbBits = 0; 6100c16b537SWarner Losh 6110c16b537SWarner Losh return 0; 6120c16b537SWarner Losh } 6130c16b537SWarner Losh 6140c16b537SWarner Losh 6150c16b537SWarner Losh static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) 6160c16b537SWarner Losh { 6170c16b537SWarner Losh void* ptr = dt; 6180c16b537SWarner Losh FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 6190c16b537SWarner Losh FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ 6200c16b537SWarner Losh const unsigned tableSize = 1 << nbBits; 6210c16b537SWarner Losh const unsigned tableMask = tableSize - 1; 6220c16b537SWarner Losh const unsigned maxSymbolValue = tableMask; 6230c16b537SWarner Losh unsigned s; 6240c16b537SWarner Losh 6250c16b537SWarner Losh /* Sanity checks */ 6260c16b537SWarner Losh if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */ 6270c16b537SWarner Losh 6280c16b537SWarner Losh /* Build Decoding Table */ 6290c16b537SWarner Losh DTableH->tableLog = (U16)nbBits; 6300c16b537SWarner Losh DTableH->fastMode = 1; 6310c16b537SWarner Losh for (s=0; s<=maxSymbolValue; s++) 6320c16b537SWarner Losh { 6330c16b537SWarner Losh dinfo[s].newState = 0; 6340c16b537SWarner Losh dinfo[s].symbol = (BYTE)s; 6350c16b537SWarner Losh dinfo[s].nbBits = (BYTE)nbBits; 6360c16b537SWarner Losh } 6370c16b537SWarner Losh 6380c16b537SWarner Losh return 0; 6390c16b537SWarner Losh } 6400c16b537SWarner Losh 6410c16b537SWarner Losh 6420c16b537SWarner Losh /* FSE_initDStream 6430c16b537SWarner Losh * Initialize a FSE_DStream_t. 6440c16b537SWarner Losh * srcBuffer must point at the beginning of an FSE block. 6450c16b537SWarner Losh * The function result is the size of the FSE_block (== srcSize). 6460c16b537SWarner Losh * If srcSize is too small, the function will return an errorCode; 6470c16b537SWarner Losh */ 6480c16b537SWarner Losh static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize) 6490c16b537SWarner Losh { 6500c16b537SWarner Losh if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong; 6510c16b537SWarner Losh 6520c16b537SWarner Losh if (srcSize >= sizeof(size_t)) 6530c16b537SWarner Losh { 6540c16b537SWarner Losh U32 contain32; 6550c16b537SWarner Losh bitD->start = (const char*)srcBuffer; 6560c16b537SWarner Losh bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); 6570c16b537SWarner Losh bitD->bitContainer = FSE_readLEST(bitD->ptr); 6580c16b537SWarner Losh contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 6590c16b537SWarner Losh if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ 6600c16b537SWarner Losh bitD->bitsConsumed = 8 - FSE_highbit32(contain32); 6610c16b537SWarner Losh } 6620c16b537SWarner Losh else 6630c16b537SWarner Losh { 6640c16b537SWarner Losh U32 contain32; 6650c16b537SWarner Losh bitD->start = (const char*)srcBuffer; 6660c16b537SWarner Losh bitD->ptr = bitD->start; 6670c16b537SWarner Losh bitD->bitContainer = *(const BYTE*)(bitD->start); 6680c16b537SWarner Losh switch(srcSize) 6690c16b537SWarner Losh { 6700c16b537SWarner Losh case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); 6710f743729SConrad Meyer /* fallthrough */ 6720c16b537SWarner Losh case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); 6730f743729SConrad Meyer /* fallthrough */ 6740c16b537SWarner Losh case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); 6750f743729SConrad Meyer /* fallthrough */ 6760c16b537SWarner Losh case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; 6770f743729SConrad Meyer /* fallthrough */ 6780c16b537SWarner Losh case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; 6790f743729SConrad Meyer /* fallthrough */ 6800c16b537SWarner Losh case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; 6810f743729SConrad Meyer /* fallthrough */ 6820c16b537SWarner Losh default:; 6830c16b537SWarner Losh } 6840c16b537SWarner Losh contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 6850c16b537SWarner Losh if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ 6860c16b537SWarner Losh bitD->bitsConsumed = 8 - FSE_highbit32(contain32); 6870c16b537SWarner Losh bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; 6880c16b537SWarner Losh } 6890c16b537SWarner Losh 6900c16b537SWarner Losh return srcSize; 6910c16b537SWarner Losh } 6920c16b537SWarner Losh 6930c16b537SWarner Losh 6940c16b537SWarner Losh /*!FSE_lookBits 6950c16b537SWarner Losh * Provides next n bits from the bitContainer. 6960c16b537SWarner Losh * bitContainer is not modified (bits are still present for next read/look) 6970c16b537SWarner Losh * On 32-bits, maxNbBits==25 6980c16b537SWarner Losh * On 64-bits, maxNbBits==57 6990c16b537SWarner Losh * return : value extracted. 7000c16b537SWarner Losh */ 7010c16b537SWarner Losh static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits) 7020c16b537SWarner Losh { 7030c16b537SWarner Losh const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 7040c16b537SWarner Losh return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); 7050c16b537SWarner Losh } 7060c16b537SWarner Losh 7070c16b537SWarner Losh static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ 7080c16b537SWarner Losh { 7090c16b537SWarner Losh const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 7100c16b537SWarner Losh return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); 7110c16b537SWarner Losh } 7120c16b537SWarner Losh 7130c16b537SWarner Losh static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits) 7140c16b537SWarner Losh { 7150c16b537SWarner Losh bitD->bitsConsumed += nbBits; 7160c16b537SWarner Losh } 7170c16b537SWarner Losh 7180c16b537SWarner Losh 7190c16b537SWarner Losh /*!FSE_readBits 7200c16b537SWarner Losh * Read next n bits from the bitContainer. 7210c16b537SWarner Losh * On 32-bits, don't read more than maxNbBits==25 7220c16b537SWarner Losh * On 64-bits, don't read more than maxNbBits==57 7230c16b537SWarner Losh * Use the fast variant *only* if n >= 1. 7240c16b537SWarner Losh * return : value extracted. 7250c16b537SWarner Losh */ 7260c16b537SWarner Losh static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits) 7270c16b537SWarner Losh { 7280c16b537SWarner Losh size_t value = FSE_lookBits(bitD, nbBits); 7290c16b537SWarner Losh FSE_skipBits(bitD, nbBits); 7300c16b537SWarner Losh return value; 7310c16b537SWarner Losh } 7320c16b537SWarner Losh 7330c16b537SWarner Losh static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ 7340c16b537SWarner Losh { 7350c16b537SWarner Losh size_t value = FSE_lookBitsFast(bitD, nbBits); 7360c16b537SWarner Losh FSE_skipBits(bitD, nbBits); 7370c16b537SWarner Losh return value; 7380c16b537SWarner Losh } 7390c16b537SWarner Losh 7400c16b537SWarner Losh static unsigned FSE_reloadDStream(FSE_DStream_t* bitD) 7410c16b537SWarner Losh { 7420c16b537SWarner Losh if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ 7430c16b537SWarner Losh return FSE_DStream_tooFar; 7440c16b537SWarner Losh 7450c16b537SWarner Losh if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) 7460c16b537SWarner Losh { 7470c16b537SWarner Losh bitD->ptr -= bitD->bitsConsumed >> 3; 7480c16b537SWarner Losh bitD->bitsConsumed &= 7; 7490c16b537SWarner Losh bitD->bitContainer = FSE_readLEST(bitD->ptr); 7500c16b537SWarner Losh return FSE_DStream_unfinished; 7510c16b537SWarner Losh } 7520c16b537SWarner Losh if (bitD->ptr == bitD->start) 7530c16b537SWarner Losh { 7540c16b537SWarner Losh if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer; 7550c16b537SWarner Losh return FSE_DStream_completed; 7560c16b537SWarner Losh } 7570c16b537SWarner Losh { 7580c16b537SWarner Losh U32 nbBytes = bitD->bitsConsumed >> 3; 7590c16b537SWarner Losh U32 result = FSE_DStream_unfinished; 7600c16b537SWarner Losh if (bitD->ptr - nbBytes < bitD->start) 7610c16b537SWarner Losh { 7620c16b537SWarner Losh nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ 7630c16b537SWarner Losh result = FSE_DStream_endOfBuffer; 7640c16b537SWarner Losh } 7650c16b537SWarner Losh bitD->ptr -= nbBytes; 7660c16b537SWarner Losh bitD->bitsConsumed -= nbBytes*8; 7670c16b537SWarner Losh bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ 7680c16b537SWarner Losh return result; 7690c16b537SWarner Losh } 7700c16b537SWarner Losh } 7710c16b537SWarner Losh 7720c16b537SWarner Losh 7730c16b537SWarner Losh static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt) 7740c16b537SWarner Losh { 7750c16b537SWarner Losh const void* ptr = dt; 7760c16b537SWarner Losh const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; 7770c16b537SWarner Losh DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog); 7780c16b537SWarner Losh FSE_reloadDStream(bitD); 7790c16b537SWarner Losh DStatePtr->table = dt + 1; 7800c16b537SWarner Losh } 7810c16b537SWarner Losh 7820c16b537SWarner Losh static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) 7830c16b537SWarner Losh { 7840c16b537SWarner Losh const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 7850c16b537SWarner Losh const U32 nbBits = DInfo.nbBits; 7860c16b537SWarner Losh BYTE symbol = DInfo.symbol; 7870c16b537SWarner Losh size_t lowBits = FSE_readBits(bitD, nbBits); 7880c16b537SWarner Losh 7890c16b537SWarner Losh DStatePtr->state = DInfo.newState + lowBits; 7900c16b537SWarner Losh return symbol; 7910c16b537SWarner Losh } 7920c16b537SWarner Losh 7930c16b537SWarner Losh static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) 7940c16b537SWarner Losh { 7950c16b537SWarner Losh const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 7960c16b537SWarner Losh const U32 nbBits = DInfo.nbBits; 7970c16b537SWarner Losh BYTE symbol = DInfo.symbol; 7980c16b537SWarner Losh size_t lowBits = FSE_readBitsFast(bitD, nbBits); 7990c16b537SWarner Losh 8000c16b537SWarner Losh DStatePtr->state = DInfo.newState + lowBits; 8010c16b537SWarner Losh return symbol; 8020c16b537SWarner Losh } 8030c16b537SWarner Losh 8040c16b537SWarner Losh /* FSE_endOfDStream 8050c16b537SWarner Losh Tells if bitD has reached end of bitStream or not */ 8060c16b537SWarner Losh 8070c16b537SWarner Losh static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD) 8080c16b537SWarner Losh { 8090c16b537SWarner Losh return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8)); 8100c16b537SWarner Losh } 8110c16b537SWarner Losh 8120c16b537SWarner Losh static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) 8130c16b537SWarner Losh { 8140c16b537SWarner Losh return DStatePtr->state == 0; 8150c16b537SWarner Losh } 8160c16b537SWarner Losh 8170c16b537SWarner Losh 8180c16b537SWarner Losh FORCE_INLINE size_t FSE_decompress_usingDTable_generic( 8190c16b537SWarner Losh void* dst, size_t maxDstSize, 8200c16b537SWarner Losh const void* cSrc, size_t cSrcSize, 8210c16b537SWarner Losh const FSE_DTable* dt, const unsigned fast) 8220c16b537SWarner Losh { 8230c16b537SWarner Losh BYTE* const ostart = (BYTE*) dst; 8240c16b537SWarner Losh BYTE* op = ostart; 8250c16b537SWarner Losh BYTE* const omax = op + maxDstSize; 8260c16b537SWarner Losh BYTE* const olimit = omax-3; 8270c16b537SWarner Losh 8280c16b537SWarner Losh FSE_DStream_t bitD; 8290c16b537SWarner Losh FSE_DState_t state1; 8300c16b537SWarner Losh FSE_DState_t state2; 8310c16b537SWarner Losh size_t errorCode; 8320c16b537SWarner Losh 8330c16b537SWarner Losh /* Init */ 8340c16b537SWarner Losh errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ 8350c16b537SWarner Losh if (FSE_isError(errorCode)) return errorCode; 8360c16b537SWarner Losh 8370c16b537SWarner Losh FSE_initDState(&state1, &bitD, dt); 8380c16b537SWarner Losh FSE_initDState(&state2, &bitD, dt); 8390c16b537SWarner Losh 8400c16b537SWarner Losh #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) 8410c16b537SWarner Losh 8420c16b537SWarner Losh /* 4 symbols per loop */ 8430c16b537SWarner Losh for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4) 8440c16b537SWarner Losh { 8450c16b537SWarner Losh op[0] = FSE_GETSYMBOL(&state1); 8460c16b537SWarner Losh 8470c16b537SWarner Losh if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ 8480c16b537SWarner Losh FSE_reloadDStream(&bitD); 8490c16b537SWarner Losh 8500c16b537SWarner Losh op[1] = FSE_GETSYMBOL(&state2); 8510c16b537SWarner Losh 8520c16b537SWarner Losh if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ 8530c16b537SWarner Losh { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } } 8540c16b537SWarner Losh 8550c16b537SWarner Losh op[2] = FSE_GETSYMBOL(&state1); 8560c16b537SWarner Losh 8570c16b537SWarner Losh if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ 8580c16b537SWarner Losh FSE_reloadDStream(&bitD); 8590c16b537SWarner Losh 8600c16b537SWarner Losh op[3] = FSE_GETSYMBOL(&state2); 8610c16b537SWarner Losh } 8620c16b537SWarner Losh 8630c16b537SWarner Losh /* tail */ 8640c16b537SWarner Losh /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */ 8650c16b537SWarner Losh while (1) 8660c16b537SWarner Losh { 8670c16b537SWarner Losh if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) 8680c16b537SWarner Losh break; 8690c16b537SWarner Losh 8700c16b537SWarner Losh *op++ = FSE_GETSYMBOL(&state1); 8710c16b537SWarner Losh 8720c16b537SWarner Losh if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) 8730c16b537SWarner Losh break; 8740c16b537SWarner Losh 8750c16b537SWarner Losh *op++ = FSE_GETSYMBOL(&state2); 8760c16b537SWarner Losh } 8770c16b537SWarner Losh 8780c16b537SWarner Losh /* end ? */ 8790c16b537SWarner Losh if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) 8800c16b537SWarner Losh return op-ostart; 8810c16b537SWarner Losh 8820c16b537SWarner Losh if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ 8830c16b537SWarner Losh 8840c16b537SWarner Losh return (size_t)-FSE_ERROR_corruptionDetected; 8850c16b537SWarner Losh } 8860c16b537SWarner Losh 8870c16b537SWarner Losh 8880c16b537SWarner Losh static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, 8890c16b537SWarner Losh const void* cSrc, size_t cSrcSize, 8900c16b537SWarner Losh const FSE_DTable* dt) 8910c16b537SWarner Losh { 8920c16b537SWarner Losh FSE_DTableHeader DTableH; 8930c16b537SWarner Losh memcpy(&DTableH, dt, sizeof(DTableH)); /* memcpy() into local variable, to avoid strict aliasing warning */ 8940c16b537SWarner Losh 8950c16b537SWarner Losh /* select fast mode (static) */ 8960c16b537SWarner Losh if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); 8970c16b537SWarner Losh return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); 8980c16b537SWarner Losh } 8990c16b537SWarner Losh 9000c16b537SWarner Losh 9010c16b537SWarner Losh static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) 9020c16b537SWarner Losh { 9030c16b537SWarner Losh const BYTE* const istart = (const BYTE*)cSrc; 9040c16b537SWarner Losh const BYTE* ip = istart; 9050c16b537SWarner Losh short counting[FSE_MAX_SYMBOL_VALUE+1]; 9060c16b537SWarner Losh DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ 9070c16b537SWarner Losh unsigned tableLog; 9080c16b537SWarner Losh unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; 9090c16b537SWarner Losh size_t errorCode; 9100c16b537SWarner Losh 9110c16b537SWarner Losh if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ 9120c16b537SWarner Losh 9130c16b537SWarner Losh /* normal FSE decoding mode */ 9140c16b537SWarner Losh errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); 9150c16b537SWarner Losh if (FSE_isError(errorCode)) return errorCode; 9160c16b537SWarner Losh if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ 9170c16b537SWarner Losh ip += errorCode; 9180c16b537SWarner Losh cSrcSize -= errorCode; 9190c16b537SWarner Losh 9200c16b537SWarner Losh errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); 9210c16b537SWarner Losh if (FSE_isError(errorCode)) return errorCode; 9220c16b537SWarner Losh 9230c16b537SWarner Losh /* always return, even if it is an error code */ 9240c16b537SWarner Losh return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); 9250c16b537SWarner Losh } 9260c16b537SWarner Losh 9270c16b537SWarner Losh 9280c16b537SWarner Losh 9290c16b537SWarner Losh /* ******************************************************* 9300c16b537SWarner Losh * Huff0 : Huffman block compression 9310c16b537SWarner Losh *********************************************************/ 9320c16b537SWarner Losh #define HUF_MAX_SYMBOL_VALUE 255 9330c16b537SWarner Losh #define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */ 9340c16b537SWarner Losh #define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */ 9350c16b537SWarner Losh #define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ 9360c16b537SWarner Losh #if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) 9370c16b537SWarner Losh # error "HUF_MAX_TABLELOG is too large !" 9380c16b537SWarner Losh #endif 9390c16b537SWarner Losh 9400c16b537SWarner Losh typedef struct HUF_CElt_s { 9410c16b537SWarner Losh U16 val; 9420c16b537SWarner Losh BYTE nbBits; 9430c16b537SWarner Losh } HUF_CElt ; 9440c16b537SWarner Losh 9450c16b537SWarner Losh typedef struct nodeElt_s { 9460c16b537SWarner Losh U32 count; 9470c16b537SWarner Losh U16 parent; 9480c16b537SWarner Losh BYTE byte; 9490c16b537SWarner Losh BYTE nbBits; 9500c16b537SWarner Losh } nodeElt; 9510c16b537SWarner Losh 9520c16b537SWarner Losh 9530c16b537SWarner Losh /* ******************************************************* 9540c16b537SWarner Losh * Huff0 : Huffman block decompression 9550c16b537SWarner Losh *********************************************************/ 9560c16b537SWarner Losh typedef struct { 9570c16b537SWarner Losh BYTE byte; 9580c16b537SWarner Losh BYTE nbBits; 9590c16b537SWarner Losh } HUF_DElt; 9600c16b537SWarner Losh 9610c16b537SWarner Losh static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize) 9620c16b537SWarner Losh { 9630c16b537SWarner Losh BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; 9640c16b537SWarner Losh U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ 9650c16b537SWarner Losh U32 weightTotal; 9660c16b537SWarner Losh U32 maxBits; 9670c16b537SWarner Losh const BYTE* ip = (const BYTE*) src; 9680c16b537SWarner Losh size_t iSize; 9690c16b537SWarner Losh size_t oSize; 9700c16b537SWarner Losh U32 n; 9710c16b537SWarner Losh U32 nextRankStart; 9720c16b537SWarner Losh void* ptr = DTable+1; 9730c16b537SWarner Losh HUF_DElt* const dt = (HUF_DElt*)ptr; 9740c16b537SWarner Losh 9750c16b537SWarner Losh if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 9760c16b537SWarner Losh iSize = ip[0]; 9770c16b537SWarner Losh 9780c16b537SWarner Losh FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */ 9790c16b537SWarner Losh //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */ 9800c16b537SWarner Losh if (iSize >= 128) /* special header */ 9810c16b537SWarner Losh { 9820c16b537SWarner Losh if (iSize >= (242)) /* RLE */ 9830c16b537SWarner Losh { 9840c16b537SWarner Losh static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; 9850c16b537SWarner Losh oSize = l[iSize-242]; 9860c16b537SWarner Losh memset(huffWeight, 1, sizeof(huffWeight)); 9870c16b537SWarner Losh iSize = 0; 9880c16b537SWarner Losh } 9890c16b537SWarner Losh else /* Incompressible */ 9900c16b537SWarner Losh { 9910c16b537SWarner Losh oSize = iSize - 127; 9920c16b537SWarner Losh iSize = ((oSize+1)/2); 9930c16b537SWarner Losh if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 9940c16b537SWarner Losh ip += 1; 9950c16b537SWarner Losh for (n=0; n<oSize; n+=2) 9960c16b537SWarner Losh { 9970c16b537SWarner Losh huffWeight[n] = ip[n/2] >> 4; 9980c16b537SWarner Losh huffWeight[n+1] = ip[n/2] & 15; 9990c16b537SWarner Losh } 10000c16b537SWarner Losh } 10010c16b537SWarner Losh } 10020c16b537SWarner Losh else /* header compressed with FSE (normal case) */ 10030c16b537SWarner Losh { 10040c16b537SWarner Losh if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 10050c16b537SWarner Losh oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */ 10060c16b537SWarner Losh if (FSE_isError(oSize)) return oSize; 10070c16b537SWarner Losh } 10080c16b537SWarner Losh 10090c16b537SWarner Losh /* collect weight stats */ 10100c16b537SWarner Losh memset(rankVal, 0, sizeof(rankVal)); 10110c16b537SWarner Losh weightTotal = 0; 10120c16b537SWarner Losh for (n=0; n<oSize; n++) 10130c16b537SWarner Losh { 10140c16b537SWarner Losh if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected; 10150c16b537SWarner Losh rankVal[huffWeight[n]]++; 10160c16b537SWarner Losh weightTotal += (1 << huffWeight[n]) >> 1; 10170c16b537SWarner Losh } 10180c16b537SWarner Losh if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected; 10190c16b537SWarner Losh 10200c16b537SWarner Losh /* get last non-null symbol weight (implied, total must be 2^n) */ 10210c16b537SWarner Losh maxBits = FSE_highbit32(weightTotal) + 1; 10220c16b537SWarner Losh if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */ 10230c16b537SWarner Losh DTable[0] = (U16)maxBits; 10240c16b537SWarner Losh { 10250c16b537SWarner Losh U32 total = 1 << maxBits; 10260c16b537SWarner Losh U32 rest = total - weightTotal; 10270c16b537SWarner Losh U32 verif = 1 << FSE_highbit32(rest); 10280c16b537SWarner Losh U32 lastWeight = FSE_highbit32(rest) + 1; 10290c16b537SWarner Losh if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */ 10300c16b537SWarner Losh huffWeight[oSize] = (BYTE)lastWeight; 10310c16b537SWarner Losh rankVal[lastWeight]++; 10320c16b537SWarner Losh } 10330c16b537SWarner Losh 10340c16b537SWarner Losh /* check tree construction validity */ 10350c16b537SWarner Losh if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */ 10360c16b537SWarner Losh 10370c16b537SWarner Losh /* Prepare ranks */ 10380c16b537SWarner Losh nextRankStart = 0; 10390c16b537SWarner Losh for (n=1; n<=maxBits; n++) 10400c16b537SWarner Losh { 10410c16b537SWarner Losh U32 current = nextRankStart; 10420c16b537SWarner Losh nextRankStart += (rankVal[n] << (n-1)); 10430c16b537SWarner Losh rankVal[n] = current; 10440c16b537SWarner Losh } 10450c16b537SWarner Losh 10460c16b537SWarner Losh /* fill DTable */ 10470c16b537SWarner Losh for (n=0; n<=oSize; n++) 10480c16b537SWarner Losh { 10490c16b537SWarner Losh const U32 w = huffWeight[n]; 10500c16b537SWarner Losh const U32 length = (1 << w) >> 1; 10510c16b537SWarner Losh U32 i; 10520c16b537SWarner Losh HUF_DElt D; 10530c16b537SWarner Losh D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w); 10540c16b537SWarner Losh for (i = rankVal[w]; i < rankVal[w] + length; i++) 10550c16b537SWarner Losh dt[i] = D; 10560c16b537SWarner Losh rankVal[w] += length; 10570c16b537SWarner Losh } 10580c16b537SWarner Losh 10590c16b537SWarner Losh return iSize+1; 10600c16b537SWarner Losh } 10610c16b537SWarner Losh 10620c16b537SWarner Losh 10630c16b537SWarner Losh static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog) 10640c16b537SWarner Losh { 10650c16b537SWarner Losh const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 10660c16b537SWarner Losh const BYTE c = dt[val].byte; 10670c16b537SWarner Losh FSE_skipBits(Dstream, dt[val].nbBits); 10680c16b537SWarner Losh return c; 10690c16b537SWarner Losh } 10700c16b537SWarner Losh 10710c16b537SWarner Losh static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ 10720c16b537SWarner Losh void* dst, size_t maxDstSize, 10730c16b537SWarner Losh const void* cSrc, size_t cSrcSize, 10740c16b537SWarner Losh const U16* DTable) 10750c16b537SWarner Losh { 10760c16b537SWarner Losh BYTE* const ostart = (BYTE*) dst; 10770c16b537SWarner Losh BYTE* op = ostart; 10780c16b537SWarner Losh BYTE* const omax = op + maxDstSize; 10790c16b537SWarner Losh BYTE* const olimit = omax-15; 10800c16b537SWarner Losh 10810c16b537SWarner Losh const void* ptr = DTable; 10820c16b537SWarner Losh const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; 10830c16b537SWarner Losh const U32 dtLog = DTable[0]; 10840c16b537SWarner Losh size_t errorCode; 10850c16b537SWarner Losh U32 reloadStatus; 10860c16b537SWarner Losh 10870c16b537SWarner Losh /* Init */ 10880c16b537SWarner Losh 10890c16b537SWarner Losh const U16* jumpTable = (const U16*)cSrc; 10900c16b537SWarner Losh const size_t length1 = FSE_readLE16(jumpTable); 10910c16b537SWarner Losh const size_t length2 = FSE_readLE16(jumpTable+1); 10920c16b537SWarner Losh const size_t length3 = FSE_readLE16(jumpTable+2); 10930c16b537SWarner Losh const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! 10940c16b537SWarner Losh const char* const start1 = (const char*)(cSrc) + 6; 10950c16b537SWarner Losh const char* const start2 = start1 + length1; 10960c16b537SWarner Losh const char* const start3 = start2 + length2; 10970c16b537SWarner Losh const char* const start4 = start3 + length3; 10980c16b537SWarner Losh FSE_DStream_t bitD1, bitD2, bitD3, bitD4; 10990c16b537SWarner Losh 11000c16b537SWarner Losh if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 11010c16b537SWarner Losh 11020c16b537SWarner Losh errorCode = FSE_initDStream(&bitD1, start1, length1); 11030c16b537SWarner Losh if (FSE_isError(errorCode)) return errorCode; 11040c16b537SWarner Losh errorCode = FSE_initDStream(&bitD2, start2, length2); 11050c16b537SWarner Losh if (FSE_isError(errorCode)) return errorCode; 11060c16b537SWarner Losh errorCode = FSE_initDStream(&bitD3, start3, length3); 11070c16b537SWarner Losh if (FSE_isError(errorCode)) return errorCode; 11080c16b537SWarner Losh errorCode = FSE_initDStream(&bitD4, start4, length4); 11090c16b537SWarner Losh if (FSE_isError(errorCode)) return errorCode; 11100c16b537SWarner Losh 11110c16b537SWarner Losh reloadStatus=FSE_reloadDStream(&bitD2); 11120c16b537SWarner Losh 11130c16b537SWarner Losh /* 16 symbols per loop */ 11140c16b537SWarner Losh for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */ 11150c16b537SWarner Losh op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1)) 11160c16b537SWarner Losh { 11170c16b537SWarner Losh #define HUF_DECODE_SYMBOL_0(n, Dstream) \ 11180c16b537SWarner Losh op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); 11190c16b537SWarner Losh 11200c16b537SWarner Losh #define HUF_DECODE_SYMBOL_1(n, Dstream) \ 11210c16b537SWarner Losh op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ 11220c16b537SWarner Losh if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream) 11230c16b537SWarner Losh 11240c16b537SWarner Losh #define HUF_DECODE_SYMBOL_2(n, Dstream) \ 11250c16b537SWarner Losh op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ 11260c16b537SWarner Losh if (FSE_32bits()) FSE_reloadDStream(&Dstream) 11270c16b537SWarner Losh 11280c16b537SWarner Losh HUF_DECODE_SYMBOL_1( 0, bitD1); 11290c16b537SWarner Losh HUF_DECODE_SYMBOL_1( 1, bitD2); 11300c16b537SWarner Losh HUF_DECODE_SYMBOL_1( 2, bitD3); 11310c16b537SWarner Losh HUF_DECODE_SYMBOL_1( 3, bitD4); 11320c16b537SWarner Losh HUF_DECODE_SYMBOL_2( 4, bitD1); 11330c16b537SWarner Losh HUF_DECODE_SYMBOL_2( 5, bitD2); 11340c16b537SWarner Losh HUF_DECODE_SYMBOL_2( 6, bitD3); 11350c16b537SWarner Losh HUF_DECODE_SYMBOL_2( 7, bitD4); 11360c16b537SWarner Losh HUF_DECODE_SYMBOL_1( 8, bitD1); 11370c16b537SWarner Losh HUF_DECODE_SYMBOL_1( 9, bitD2); 11380c16b537SWarner Losh HUF_DECODE_SYMBOL_1(10, bitD3); 11390c16b537SWarner Losh HUF_DECODE_SYMBOL_1(11, bitD4); 11400c16b537SWarner Losh HUF_DECODE_SYMBOL_0(12, bitD1); 11410c16b537SWarner Losh HUF_DECODE_SYMBOL_0(13, bitD2); 11420c16b537SWarner Losh HUF_DECODE_SYMBOL_0(14, bitD3); 11430c16b537SWarner Losh HUF_DECODE_SYMBOL_0(15, bitD4); 11440c16b537SWarner Losh } 11450c16b537SWarner Losh 11460c16b537SWarner Losh if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ 11470c16b537SWarner Losh return (size_t)-FSE_ERROR_corruptionDetected; 11480c16b537SWarner Losh 11490c16b537SWarner Losh /* tail */ 11500c16b537SWarner Losh { 11510c16b537SWarner Losh // bitTail = bitD1; // *much* slower : -20% !??! 11520c16b537SWarner Losh FSE_DStream_t bitTail; 11530c16b537SWarner Losh bitTail.ptr = bitD1.ptr; 11540c16b537SWarner Losh bitTail.bitsConsumed = bitD1.bitsConsumed; 11550c16b537SWarner Losh bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer 11560c16b537SWarner Losh bitTail.start = start1; 11570c16b537SWarner Losh for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++) 11580c16b537SWarner Losh { 11590c16b537SWarner Losh HUF_DECODE_SYMBOL_0(0, bitTail); 11600c16b537SWarner Losh } 11610c16b537SWarner Losh 11620c16b537SWarner Losh if (FSE_endOfDStream(&bitTail)) 11630c16b537SWarner Losh return op-ostart; 11640c16b537SWarner Losh } 11650c16b537SWarner Losh 11660c16b537SWarner Losh if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ 11670c16b537SWarner Losh 11680c16b537SWarner Losh return (size_t)-FSE_ERROR_corruptionDetected; 11690c16b537SWarner Losh } 11700c16b537SWarner Losh 11710c16b537SWarner Losh 11720c16b537SWarner Losh static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) 11730c16b537SWarner Losh { 11740c16b537SWarner Losh HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG); 11750c16b537SWarner Losh const BYTE* ip = (const BYTE*) cSrc; 11760c16b537SWarner Losh size_t errorCode; 11770c16b537SWarner Losh 11780c16b537SWarner Losh errorCode = HUF_readDTable (DTable, cSrc, cSrcSize); 11790c16b537SWarner Losh if (FSE_isError(errorCode)) return errorCode; 11800c16b537SWarner Losh if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 11810c16b537SWarner Losh ip += errorCode; 11820c16b537SWarner Losh cSrcSize -= errorCode; 11830c16b537SWarner Losh 11840c16b537SWarner Losh return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable); 11850c16b537SWarner Losh } 11860c16b537SWarner Losh 11870c16b537SWarner Losh 11880c16b537SWarner Losh #endif /* FSE_COMMONDEFS_ONLY */ 11890c16b537SWarner Losh 11900c16b537SWarner Losh /* 11910c16b537SWarner Losh zstd - standard compression library 11920c16b537SWarner Losh Copyright (C) 2014-2015, Yann Collet. 11930c16b537SWarner Losh 11940c16b537SWarner Losh BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 11950c16b537SWarner Losh 11960c16b537SWarner Losh Redistribution and use in source and binary forms, with or without 11970c16b537SWarner Losh modification, are permitted provided that the following conditions are 11980c16b537SWarner Losh met: 11990c16b537SWarner Losh * Redistributions of source code must retain the above copyright 12000c16b537SWarner Losh notice, this list of conditions and the following disclaimer. 12010c16b537SWarner Losh * Redistributions in binary form must reproduce the above 12020c16b537SWarner Losh copyright notice, this list of conditions and the following disclaimer 12030c16b537SWarner Losh in the documentation and/or other materials provided with the 12040c16b537SWarner Losh distribution. 12050c16b537SWarner Losh THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 12060c16b537SWarner Losh "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 12070c16b537SWarner Losh LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 12080c16b537SWarner Losh A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 12090c16b537SWarner Losh OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 12100c16b537SWarner Losh SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 12110c16b537SWarner Losh LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 12120c16b537SWarner Losh DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 12130c16b537SWarner Losh THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 12140c16b537SWarner Losh (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 12150c16b537SWarner Losh OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12160c16b537SWarner Losh 12170c16b537SWarner Losh You can contact the author at : 12180c16b537SWarner Losh - zstd source repository : https://github.com/Cyan4973/zstd 12190c16b537SWarner Losh - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 12200c16b537SWarner Losh */ 12210c16b537SWarner Losh 12220c16b537SWarner Losh /**************************************************************** 12230c16b537SWarner Losh * Tuning parameters 12240c16b537SWarner Losh *****************************************************************/ 12250c16b537SWarner Losh /* MEMORY_USAGE : 12260c16b537SWarner Losh * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 12270c16b537SWarner Losh * Increasing memory usage improves compression ratio 12280c16b537SWarner Losh * Reduced memory usage can improve speed, due to cache effect */ 12290c16b537SWarner Losh #define ZSTD_MEMORY_USAGE 17 12300c16b537SWarner Losh 12310c16b537SWarner Losh 12320c16b537SWarner Losh /************************************** 12330c16b537SWarner Losh CPU Feature Detection 12340c16b537SWarner Losh **************************************/ 12350c16b537SWarner Losh /* 12360c16b537SWarner Losh * Automated efficient unaligned memory access detection 12370c16b537SWarner Losh * Based on known hardware architectures 12380c16b537SWarner Losh * This list will be updated thanks to feedbacks 12390c16b537SWarner Losh */ 12400c16b537SWarner Losh #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \ 12410c16b537SWarner Losh || defined(__ARM_FEATURE_UNALIGNED) \ 12420c16b537SWarner Losh || defined(__i386__) || defined(__x86_64__) \ 12430c16b537SWarner Losh || defined(_M_IX86) || defined(_M_X64) \ 12440c16b537SWarner Losh || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \ 12450c16b537SWarner Losh || (defined(_M_ARM) && (_M_ARM >= 7)) 12460c16b537SWarner Losh # define ZSTD_UNALIGNED_ACCESS 1 12470c16b537SWarner Losh #else 12480c16b537SWarner Losh # define ZSTD_UNALIGNED_ACCESS 0 12490c16b537SWarner Losh #endif 12500c16b537SWarner Losh 12510c16b537SWarner Losh 12520c16b537SWarner Losh /******************************************************** 12530c16b537SWarner Losh * Includes 12540c16b537SWarner Losh *********************************************************/ 12550c16b537SWarner Losh #include <stdlib.h> /* calloc */ 12560c16b537SWarner Losh #include <string.h> /* memcpy, memmove */ 12570c16b537SWarner Losh #include <stdio.h> /* debug : printf */ 12580c16b537SWarner Losh 12590c16b537SWarner Losh 12600c16b537SWarner Losh /******************************************************** 12610c16b537SWarner Losh * Compiler specifics 12620c16b537SWarner Losh *********************************************************/ 12630c16b537SWarner Losh #ifdef __AVX2__ 12640c16b537SWarner Losh # include <immintrin.h> /* AVX2 intrinsics */ 12650c16b537SWarner Losh #endif 12660c16b537SWarner Losh 12670c16b537SWarner Losh #ifdef _MSC_VER /* Visual Studio */ 12680c16b537SWarner Losh # include <intrin.h> /* For Visual 2005 */ 12690c16b537SWarner Losh # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 12700c16b537SWarner Losh # pragma warning(disable : 4324) /* disable: C4324: padded structure */ 12710c16b537SWarner Losh #endif 12720c16b537SWarner Losh 12730c16b537SWarner Losh 12740c16b537SWarner Losh #ifndef MEM_ACCESS_MODULE 12750c16b537SWarner Losh #define MEM_ACCESS_MODULE 12760c16b537SWarner Losh /******************************************************** 12770c16b537SWarner Losh * Basic Types 12780c16b537SWarner Losh *********************************************************/ 12790c16b537SWarner Losh #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ 12800c16b537SWarner Losh # include <stdint.h> 12810c16b537SWarner Losh typedef uint8_t BYTE; 12820c16b537SWarner Losh typedef uint16_t U16; 12830c16b537SWarner Losh typedef int16_t S16; 12840c16b537SWarner Losh typedef uint32_t U32; 12850c16b537SWarner Losh typedef int32_t S32; 12860c16b537SWarner Losh typedef uint64_t U64; 12870c16b537SWarner Losh #else 12880c16b537SWarner Losh typedef unsigned char BYTE; 12890c16b537SWarner Losh typedef unsigned short U16; 12900c16b537SWarner Losh typedef signed short S16; 12910c16b537SWarner Losh typedef unsigned int U32; 12920c16b537SWarner Losh typedef signed int S32; 12930c16b537SWarner Losh typedef unsigned long long U64; 12940c16b537SWarner Losh #endif 12950c16b537SWarner Losh 12960c16b537SWarner Losh #endif /* MEM_ACCESS_MODULE */ 12970c16b537SWarner Losh 12980c16b537SWarner Losh 12990c16b537SWarner Losh /******************************************************** 13000c16b537SWarner Losh * Constants 13010c16b537SWarner Losh *********************************************************/ 13020c16b537SWarner Losh static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */ 13030c16b537SWarner Losh 13040c16b537SWarner Losh #define HASH_LOG (ZSTD_MEMORY_USAGE - 2) 13050c16b537SWarner Losh #define HASH_TABLESIZE (1 << HASH_LOG) 13060c16b537SWarner Losh #define HASH_MASK (HASH_TABLESIZE - 1) 13070c16b537SWarner Losh 13080c16b537SWarner Losh #define KNUTH 2654435761 13090c16b537SWarner Losh 13100c16b537SWarner Losh #define BIT7 128 13110c16b537SWarner Losh #define BIT6 64 13120c16b537SWarner Losh #define BIT5 32 13130c16b537SWarner Losh #define BIT4 16 13140c16b537SWarner Losh 13150c16b537SWarner Losh #define KB *(1 <<10) 13160c16b537SWarner Losh #define MB *(1 <<20) 13170c16b537SWarner Losh #define GB *(1U<<30) 13180c16b537SWarner Losh 13190c16b537SWarner Losh #define BLOCKSIZE (128 KB) /* define, for static allocation */ 13200c16b537SWarner Losh 13210c16b537SWarner Losh #define WORKPLACESIZE (BLOCKSIZE*3) 13220c16b537SWarner Losh #define MINMATCH 4 13230c16b537SWarner Losh #define MLbits 7 13240c16b537SWarner Losh #define LLbits 6 13250c16b537SWarner Losh #define Offbits 5 13260c16b537SWarner Losh #define MaxML ((1<<MLbits )-1) 13270c16b537SWarner Losh #define MaxLL ((1<<LLbits )-1) 13280c16b537SWarner Losh #define MaxOff ((1<<Offbits)-1) 13290c16b537SWarner Losh #define LitFSELog 11 13300c16b537SWarner Losh #define MLFSELog 10 13310c16b537SWarner Losh #define LLFSELog 10 13320c16b537SWarner Losh #define OffFSELog 9 13330c16b537SWarner Losh #define MAX(a,b) ((a)<(b)?(b):(a)) 13340c16b537SWarner Losh #define MaxSeq MAX(MaxLL, MaxML) 13350c16b537SWarner Losh 13360c16b537SWarner Losh #define LITERAL_NOENTROPY 63 13370c16b537SWarner Losh #define COMMAND_NOENTROPY 7 /* to remove */ 13380c16b537SWarner Losh 1339*2b9c00cbSConrad Meyer #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) 1340*2b9c00cbSConrad Meyer 13410c16b537SWarner Losh static const size_t ZSTD_blockHeaderSize = 3; 13420c16b537SWarner Losh static const size_t ZSTD_frameHeaderSize = 4; 13430c16b537SWarner Losh 13440c16b537SWarner Losh 13450c16b537SWarner Losh /******************************************************** 13460c16b537SWarner Losh * Memory operations 13470c16b537SWarner Losh *********************************************************/ 13480c16b537SWarner Losh static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; } 13490c16b537SWarner Losh 13500c16b537SWarner Losh static unsigned ZSTD_isLittleEndian(void) 13510c16b537SWarner Losh { 13520c16b537SWarner Losh const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ 13530c16b537SWarner Losh return one.c[0]; 13540c16b537SWarner Losh } 13550c16b537SWarner Losh 13560c16b537SWarner Losh static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; } 13570c16b537SWarner Losh 13580c16b537SWarner Losh static U32 ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; } 13590c16b537SWarner Losh 13600c16b537SWarner Losh static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } 13610c16b537SWarner Losh 13620c16b537SWarner Losh static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } 13630c16b537SWarner Losh 13640c16b537SWarner Losh #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } 13650c16b537SWarner Losh 13660c16b537SWarner Losh static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length) 13670c16b537SWarner Losh { 13680c16b537SWarner Losh const BYTE* ip = (const BYTE*)src; 13690c16b537SWarner Losh BYTE* op = (BYTE*)dst; 13700c16b537SWarner Losh BYTE* const oend = op + length; 13710c16b537SWarner Losh while (op < oend) COPY8(op, ip); 13720c16b537SWarner Losh } 13730c16b537SWarner Losh 13740c16b537SWarner Losh static U16 ZSTD_readLE16(const void* memPtr) 13750c16b537SWarner Losh { 13760c16b537SWarner Losh if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr); 13770c16b537SWarner Losh else 13780c16b537SWarner Losh { 13790c16b537SWarner Losh const BYTE* p = (const BYTE*)memPtr; 13800c16b537SWarner Losh return (U16)((U16)p[0] + ((U16)p[1]<<8)); 13810c16b537SWarner Losh } 13820c16b537SWarner Losh } 13830c16b537SWarner Losh 13840c16b537SWarner Losh 13850c16b537SWarner Losh static U32 ZSTD_readLE32(const void* memPtr) 13860c16b537SWarner Losh { 13870c16b537SWarner Losh if (ZSTD_isLittleEndian()) 13880c16b537SWarner Losh return ZSTD_read32(memPtr); 13890c16b537SWarner Losh else 13900c16b537SWarner Losh { 13910c16b537SWarner Losh const BYTE* p = (const BYTE*)memPtr; 13920c16b537SWarner Losh return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); 13930c16b537SWarner Losh } 13940c16b537SWarner Losh } 13950c16b537SWarner Losh 13960c16b537SWarner Losh static U32 ZSTD_readBE32(const void* memPtr) 13970c16b537SWarner Losh { 13980c16b537SWarner Losh const BYTE* p = (const BYTE*)memPtr; 13990c16b537SWarner Losh return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0)); 14000c16b537SWarner Losh } 14010c16b537SWarner Losh 14020c16b537SWarner Losh 14030c16b537SWarner Losh /************************************** 14040c16b537SWarner Losh * Local structures 14050c16b537SWarner Losh ***************************************/ 14060c16b537SWarner Losh typedef struct ZSTD_Cctx_s ZSTD_Cctx; 14070c16b537SWarner Losh 14080c16b537SWarner Losh typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; 14090c16b537SWarner Losh 14100c16b537SWarner Losh typedef struct 14110c16b537SWarner Losh { 14120c16b537SWarner Losh blockType_t blockType; 14130c16b537SWarner Losh U32 origSize; 14140c16b537SWarner Losh } blockProperties_t; 14150c16b537SWarner Losh 14160c16b537SWarner Losh typedef struct { 14170c16b537SWarner Losh void* buffer; 14180c16b537SWarner Losh U32* offsetStart; 14190c16b537SWarner Losh U32* offset; 14200c16b537SWarner Losh BYTE* offCodeStart; 14210c16b537SWarner Losh BYTE* offCode; 14220c16b537SWarner Losh BYTE* litStart; 14230c16b537SWarner Losh BYTE* lit; 14240c16b537SWarner Losh BYTE* litLengthStart; 14250c16b537SWarner Losh BYTE* litLength; 14260c16b537SWarner Losh BYTE* matchLengthStart; 14270c16b537SWarner Losh BYTE* matchLength; 14280c16b537SWarner Losh BYTE* dumpsStart; 14290c16b537SWarner Losh BYTE* dumps; 14300c16b537SWarner Losh } seqStore_t; 14310c16b537SWarner Losh 14320c16b537SWarner Losh 14330c16b537SWarner Losh typedef struct ZSTD_Cctx_s 14340c16b537SWarner Losh { 14350c16b537SWarner Losh const BYTE* base; 14360c16b537SWarner Losh U32 current; 14370c16b537SWarner Losh U32 nextUpdate; 14380c16b537SWarner Losh seqStore_t seqStore; 14390c16b537SWarner Losh #ifdef __AVX2__ 14400c16b537SWarner Losh __m256i hashTable[HASH_TABLESIZE>>3]; 14410c16b537SWarner Losh #else 14420c16b537SWarner Losh U32 hashTable[HASH_TABLESIZE]; 14430c16b537SWarner Losh #endif 14440c16b537SWarner Losh BYTE buffer[WORKPLACESIZE]; 14450c16b537SWarner Losh } cctxi_t; 14460c16b537SWarner Losh 14470c16b537SWarner Losh 14480c16b537SWarner Losh 14490c16b537SWarner Losh 14500c16b537SWarner Losh /************************************** 14510c16b537SWarner Losh * Error Management 14520c16b537SWarner Losh **************************************/ 14530c16b537SWarner Losh /* published entry point */ 14540c16b537SWarner Losh unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); } 14550c16b537SWarner Losh 14560c16b537SWarner Losh 14570c16b537SWarner Losh /************************************** 14580c16b537SWarner Losh * Tool functions 14590c16b537SWarner Losh **************************************/ 14600c16b537SWarner Losh #define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ 14610c16b537SWarner Losh #define ZSTD_VERSION_MINOR 1 /* for new (non-breaking) interface capabilities */ 14620c16b537SWarner Losh #define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ 14630c16b537SWarner Losh #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) 14640c16b537SWarner Losh 14650c16b537SWarner Losh /************************************************************** 14660c16b537SWarner Losh * Decompression code 14670c16b537SWarner Losh **************************************************************/ 14680c16b537SWarner Losh 14690f743729SConrad Meyer static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) 14700c16b537SWarner Losh { 14710c16b537SWarner Losh const BYTE* const in = (const BYTE* const)src; 14720c16b537SWarner Losh BYTE headerFlags; 14730c16b537SWarner Losh U32 cSize; 14740c16b537SWarner Losh 14750c16b537SWarner Losh if (srcSize < 3) return ERROR(srcSize_wrong); 14760c16b537SWarner Losh 14770c16b537SWarner Losh headerFlags = *in; 14780c16b537SWarner Losh cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); 14790c16b537SWarner Losh 14800c16b537SWarner Losh bpPtr->blockType = (blockType_t)(headerFlags >> 6); 14810c16b537SWarner Losh bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; 14820c16b537SWarner Losh 14830c16b537SWarner Losh if (bpPtr->blockType == bt_end) return 0; 14840c16b537SWarner Losh if (bpPtr->blockType == bt_rle) return 1; 14850c16b537SWarner Losh return cSize; 14860c16b537SWarner Losh } 14870c16b537SWarner Losh 14880c16b537SWarner Losh 14890c16b537SWarner Losh static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 14900c16b537SWarner Losh { 14910c16b537SWarner Losh if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); 14920c16b537SWarner Losh memcpy(dst, src, srcSize); 14930c16b537SWarner Losh return srcSize; 14940c16b537SWarner Losh } 14950c16b537SWarner Losh 14960c16b537SWarner Losh 14970c16b537SWarner Losh static size_t ZSTD_decompressLiterals(void* ctx, 14980c16b537SWarner Losh void* dst, size_t maxDstSize, 14990c16b537SWarner Losh const void* src, size_t srcSize) 15000c16b537SWarner Losh { 15010c16b537SWarner Losh BYTE* op = (BYTE*)dst; 15020c16b537SWarner Losh BYTE* const oend = op + maxDstSize; 15030c16b537SWarner Losh const BYTE* ip = (const BYTE*)src; 15040c16b537SWarner Losh size_t errorCode; 15050c16b537SWarner Losh size_t litSize; 15060c16b537SWarner Losh 15070c16b537SWarner Losh /* check : minimum 2, for litSize, +1, for content */ 15080c16b537SWarner Losh if (srcSize <= 3) return ERROR(corruption_detected); 15090c16b537SWarner Losh 15100c16b537SWarner Losh litSize = ip[1] + (ip[0]<<8); 15110c16b537SWarner Losh litSize += ((ip[-3] >> 3) & 7) << 16; // mmmmh.... 15120c16b537SWarner Losh op = oend - litSize; 15130c16b537SWarner Losh 15140c16b537SWarner Losh (void)ctx; 15150c16b537SWarner Losh if (litSize > maxDstSize) return ERROR(dstSize_tooSmall); 15160c16b537SWarner Losh errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2); 15170c16b537SWarner Losh if (FSE_isError(errorCode)) return ERROR(GENERIC); 15180c16b537SWarner Losh return litSize; 15190c16b537SWarner Losh } 15200c16b537SWarner Losh 15210c16b537SWarner Losh 15220f743729SConrad Meyer static size_t ZSTDv01_decodeLiteralsBlock(void* ctx, 15230c16b537SWarner Losh void* dst, size_t maxDstSize, 15240c16b537SWarner Losh const BYTE** litStart, size_t* litSize, 15250c16b537SWarner Losh const void* src, size_t srcSize) 15260c16b537SWarner Losh { 15270c16b537SWarner Losh const BYTE* const istart = (const BYTE* const)src; 15280c16b537SWarner Losh const BYTE* ip = istart; 15290c16b537SWarner Losh BYTE* const ostart = (BYTE* const)dst; 15300c16b537SWarner Losh BYTE* const oend = ostart + maxDstSize; 15310c16b537SWarner Losh blockProperties_t litbp; 15320c16b537SWarner Losh 15330c16b537SWarner Losh size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp); 15340c16b537SWarner Losh if (ZSTDv01_isError(litcSize)) return litcSize; 15350c16b537SWarner Losh if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 15360c16b537SWarner Losh ip += ZSTD_blockHeaderSize; 15370c16b537SWarner Losh 15380c16b537SWarner Losh switch(litbp.blockType) 15390c16b537SWarner Losh { 15400c16b537SWarner Losh case bt_raw: 15410c16b537SWarner Losh *litStart = ip; 15420c16b537SWarner Losh ip += litcSize; 15430c16b537SWarner Losh *litSize = litcSize; 15440c16b537SWarner Losh break; 15450c16b537SWarner Losh case bt_rle: 15460c16b537SWarner Losh { 15470c16b537SWarner Losh size_t rleSize = litbp.origSize; 15480c16b537SWarner Losh if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall); 15490c16b537SWarner Losh if (!srcSize) return ERROR(srcSize_wrong); 15500c16b537SWarner Losh memset(oend - rleSize, *ip, rleSize); 15510c16b537SWarner Losh *litStart = oend - rleSize; 15520c16b537SWarner Losh *litSize = rleSize; 15530c16b537SWarner Losh ip++; 15540c16b537SWarner Losh break; 15550c16b537SWarner Losh } 15560c16b537SWarner Losh case bt_compressed: 15570c16b537SWarner Losh { 15580c16b537SWarner Losh size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize); 15590c16b537SWarner Losh if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize; 15600c16b537SWarner Losh *litStart = oend - decodedLitSize; 15610c16b537SWarner Losh *litSize = decodedLitSize; 15620c16b537SWarner Losh ip += litcSize; 15630c16b537SWarner Losh break; 15640c16b537SWarner Losh } 15650c16b537SWarner Losh case bt_end: 15660c16b537SWarner Losh default: 15670c16b537SWarner Losh return ERROR(GENERIC); 15680c16b537SWarner Losh } 15690c16b537SWarner Losh 15700c16b537SWarner Losh return ip-istart; 15710c16b537SWarner Losh } 15720c16b537SWarner Losh 15730c16b537SWarner Losh 15740f743729SConrad Meyer static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, 15750c16b537SWarner Losh FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, 15760c16b537SWarner Losh const void* src, size_t srcSize) 15770c16b537SWarner Losh { 15780c16b537SWarner Losh const BYTE* const istart = (const BYTE* const)src; 15790c16b537SWarner Losh const BYTE* ip = istart; 15800c16b537SWarner Losh const BYTE* const iend = istart + srcSize; 15810c16b537SWarner Losh U32 LLtype, Offtype, MLtype; 15820c16b537SWarner Losh U32 LLlog, Offlog, MLlog; 15830c16b537SWarner Losh size_t dumpsLength; 15840c16b537SWarner Losh 15850c16b537SWarner Losh /* check */ 15860c16b537SWarner Losh if (srcSize < 5) return ERROR(srcSize_wrong); 15870c16b537SWarner Losh 15880c16b537SWarner Losh /* SeqHead */ 15890c16b537SWarner Losh *nbSeq = ZSTD_readLE16(ip); ip+=2; 15900c16b537SWarner Losh LLtype = *ip >> 6; 15910c16b537SWarner Losh Offtype = (*ip >> 4) & 3; 15920c16b537SWarner Losh MLtype = (*ip >> 2) & 3; 15930c16b537SWarner Losh if (*ip & 2) 15940c16b537SWarner Losh { 15950c16b537SWarner Losh dumpsLength = ip[2]; 15960c16b537SWarner Losh dumpsLength += ip[1] << 8; 15970c16b537SWarner Losh ip += 3; 15980c16b537SWarner Losh } 15990c16b537SWarner Losh else 16000c16b537SWarner Losh { 16010c16b537SWarner Losh dumpsLength = ip[1]; 16020c16b537SWarner Losh dumpsLength += (ip[0] & 1) << 8; 16030c16b537SWarner Losh ip += 2; 16040c16b537SWarner Losh } 16050c16b537SWarner Losh *dumpsPtr = ip; 16060c16b537SWarner Losh ip += dumpsLength; 16070c16b537SWarner Losh *dumpsLengthPtr = dumpsLength; 16080c16b537SWarner Losh 16090c16b537SWarner Losh /* check */ 16100c16b537SWarner Losh if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ 16110c16b537SWarner Losh 16120c16b537SWarner Losh /* sequences */ 16130c16b537SWarner Losh { 16140c16b537SWarner Losh S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */ 16150c16b537SWarner Losh size_t headerSize; 16160c16b537SWarner Losh 16170c16b537SWarner Losh /* Build DTables */ 16180c16b537SWarner Losh switch(LLtype) 16190c16b537SWarner Losh { 16200c16b537SWarner Losh case bt_rle : 16210c16b537SWarner Losh LLlog = 0; 16220c16b537SWarner Losh FSE_buildDTable_rle(DTableLL, *ip++); break; 16230c16b537SWarner Losh case bt_raw : 16240c16b537SWarner Losh LLlog = LLbits; 16250c16b537SWarner Losh FSE_buildDTable_raw(DTableLL, LLbits); break; 16260c16b537SWarner Losh default : 16270c16b537SWarner Losh { U32 max = MaxLL; 16280c16b537SWarner Losh headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); 16290c16b537SWarner Losh if (FSE_isError(headerSize)) return ERROR(GENERIC); 16300c16b537SWarner Losh if (LLlog > LLFSELog) return ERROR(corruption_detected); 16310c16b537SWarner Losh ip += headerSize; 16320c16b537SWarner Losh FSE_buildDTable(DTableLL, norm, max, LLlog); 16330c16b537SWarner Losh } } 16340c16b537SWarner Losh 16350c16b537SWarner Losh switch(Offtype) 16360c16b537SWarner Losh { 16370c16b537SWarner Losh case bt_rle : 16380c16b537SWarner Losh Offlog = 0; 16390c16b537SWarner Losh if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ 16400c16b537SWarner Losh FSE_buildDTable_rle(DTableOffb, *ip++); break; 16410c16b537SWarner Losh case bt_raw : 16420c16b537SWarner Losh Offlog = Offbits; 16430c16b537SWarner Losh FSE_buildDTable_raw(DTableOffb, Offbits); break; 16440c16b537SWarner Losh default : 16450c16b537SWarner Losh { U32 max = MaxOff; 16460c16b537SWarner Losh headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); 16470c16b537SWarner Losh if (FSE_isError(headerSize)) return ERROR(GENERIC); 16480c16b537SWarner Losh if (Offlog > OffFSELog) return ERROR(corruption_detected); 16490c16b537SWarner Losh ip += headerSize; 16500c16b537SWarner Losh FSE_buildDTable(DTableOffb, norm, max, Offlog); 16510c16b537SWarner Losh } } 16520c16b537SWarner Losh 16530c16b537SWarner Losh switch(MLtype) 16540c16b537SWarner Losh { 16550c16b537SWarner Losh case bt_rle : 16560c16b537SWarner Losh MLlog = 0; 16570c16b537SWarner Losh if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ 16580c16b537SWarner Losh FSE_buildDTable_rle(DTableML, *ip++); break; 16590c16b537SWarner Losh case bt_raw : 16600c16b537SWarner Losh MLlog = MLbits; 16610c16b537SWarner Losh FSE_buildDTable_raw(DTableML, MLbits); break; 16620c16b537SWarner Losh default : 16630c16b537SWarner Losh { U32 max = MaxML; 16640c16b537SWarner Losh headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); 16650c16b537SWarner Losh if (FSE_isError(headerSize)) return ERROR(GENERIC); 16660c16b537SWarner Losh if (MLlog > MLFSELog) return ERROR(corruption_detected); 16670c16b537SWarner Losh ip += headerSize; 16680c16b537SWarner Losh FSE_buildDTable(DTableML, norm, max, MLlog); 16690c16b537SWarner Losh } } } 16700c16b537SWarner Losh 16710c16b537SWarner Losh return ip-istart; 16720c16b537SWarner Losh } 16730c16b537SWarner Losh 16740c16b537SWarner Losh 16750c16b537SWarner Losh typedef struct { 16760c16b537SWarner Losh size_t litLength; 16770c16b537SWarner Losh size_t offset; 16780c16b537SWarner Losh size_t matchLength; 16790c16b537SWarner Losh } seq_t; 16800c16b537SWarner Losh 16810c16b537SWarner Losh typedef struct { 16820c16b537SWarner Losh FSE_DStream_t DStream; 16830c16b537SWarner Losh FSE_DState_t stateLL; 16840c16b537SWarner Losh FSE_DState_t stateOffb; 16850c16b537SWarner Losh FSE_DState_t stateML; 16860c16b537SWarner Losh size_t prevOffset; 16870c16b537SWarner Losh const BYTE* dumps; 16880c16b537SWarner Losh const BYTE* dumpsEnd; 16890c16b537SWarner Losh } seqState_t; 16900c16b537SWarner Losh 16910c16b537SWarner Losh 16920c16b537SWarner Losh static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) 16930c16b537SWarner Losh { 16940c16b537SWarner Losh size_t litLength; 16950c16b537SWarner Losh size_t prevOffset; 16960c16b537SWarner Losh size_t offset; 16970c16b537SWarner Losh size_t matchLength; 16980c16b537SWarner Losh const BYTE* dumps = seqState->dumps; 16990c16b537SWarner Losh const BYTE* const de = seqState->dumpsEnd; 17000c16b537SWarner Losh 17010c16b537SWarner Losh /* Literal length */ 17020c16b537SWarner Losh litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); 17030c16b537SWarner Losh prevOffset = litLength ? seq->offset : seqState->prevOffset; 17040c16b537SWarner Losh seqState->prevOffset = seq->offset; 17050c16b537SWarner Losh if (litLength == MaxLL) 17060c16b537SWarner Losh { 17070c16b537SWarner Losh U32 add = dumps<de ? *dumps++ : 0; 17080c16b537SWarner Losh if (add < 255) litLength += add; 17090c16b537SWarner Losh else 17100c16b537SWarner Losh { 17110c16b537SWarner Losh if (dumps<=(de-3)) 17120c16b537SWarner Losh { 17130c16b537SWarner Losh litLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ 17140c16b537SWarner Losh dumps += 3; 17150c16b537SWarner Losh } 17160c16b537SWarner Losh } 17170c16b537SWarner Losh } 17180c16b537SWarner Losh 17190c16b537SWarner Losh /* Offset */ 17200c16b537SWarner Losh { 17210c16b537SWarner Losh U32 offsetCode, nbBits; 17220c16b537SWarner Losh offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); 17230c16b537SWarner Losh if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); 17240c16b537SWarner Losh nbBits = offsetCode - 1; 17250c16b537SWarner Losh if (offsetCode==0) nbBits = 0; /* cmove */ 17260c16b537SWarner Losh offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits); 17270c16b537SWarner Losh if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); 17280c16b537SWarner Losh if (offsetCode==0) offset = prevOffset; 17290c16b537SWarner Losh } 17300c16b537SWarner Losh 17310c16b537SWarner Losh /* MatchLength */ 17320c16b537SWarner Losh matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); 17330c16b537SWarner Losh if (matchLength == MaxML) 17340c16b537SWarner Losh { 17350c16b537SWarner Losh U32 add = dumps<de ? *dumps++ : 0; 17360c16b537SWarner Losh if (add < 255) matchLength += add; 17370c16b537SWarner Losh else 17380c16b537SWarner Losh { 17390c16b537SWarner Losh if (dumps<=(de-3)) 17400c16b537SWarner Losh { 17410c16b537SWarner Losh matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ 17420c16b537SWarner Losh dumps += 3; 17430c16b537SWarner Losh } 17440c16b537SWarner Losh } 17450c16b537SWarner Losh } 17460c16b537SWarner Losh matchLength += MINMATCH; 17470c16b537SWarner Losh 17480c16b537SWarner Losh /* save result */ 17490c16b537SWarner Losh seq->litLength = litLength; 17500c16b537SWarner Losh seq->offset = offset; 17510c16b537SWarner Losh seq->matchLength = matchLength; 17520c16b537SWarner Losh seqState->dumps = dumps; 17530c16b537SWarner Losh } 17540c16b537SWarner Losh 17550c16b537SWarner Losh 17560c16b537SWarner Losh static size_t ZSTD_execSequence(BYTE* op, 17570c16b537SWarner Losh seq_t sequence, 17580c16b537SWarner Losh const BYTE** litPtr, const BYTE* const litLimit, 17590c16b537SWarner Losh BYTE* const base, BYTE* const oend) 17600c16b537SWarner Losh { 17610c16b537SWarner Losh static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ 1762*2b9c00cbSConrad Meyer static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */ 17630c16b537SWarner Losh const BYTE* const ostart = op; 17640c16b537SWarner Losh const size_t litLength = sequence.litLength; 17650c16b537SWarner Losh BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */ 17660c16b537SWarner Losh const BYTE* const litEnd = *litPtr + litLength; 17670c16b537SWarner Losh 17680c16b537SWarner Losh /* check */ 17690c16b537SWarner Losh if (endMatch > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ 17700c16b537SWarner Losh if (litEnd > litLimit) return ERROR(corruption_detected); 17710c16b537SWarner Losh if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall); /* overwrite literal segment */ 17720c16b537SWarner Losh 17730c16b537SWarner Losh /* copy Literals */ 17740c16b537SWarner Losh if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8)) 17750c16b537SWarner Losh memmove(op, *litPtr, litLength); /* overwrite risk */ 17760c16b537SWarner Losh else 17770c16b537SWarner Losh ZSTD_wildcopy(op, *litPtr, litLength); 17780c16b537SWarner Losh op += litLength; 17790c16b537SWarner Losh *litPtr = litEnd; /* update for next sequence */ 17800c16b537SWarner Losh 17810c16b537SWarner Losh /* check : last match must be at a minimum distance of 8 from end of dest buffer */ 17820c16b537SWarner Losh if (oend-op < 8) return ERROR(dstSize_tooSmall); 17830c16b537SWarner Losh 17840c16b537SWarner Losh /* copy Match */ 17850c16b537SWarner Losh { 17860c16b537SWarner Losh const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12); 17870c16b537SWarner Losh const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */ 17880c16b537SWarner Losh size_t qutt = 12; 17890c16b537SWarner Losh U64 saved[2]; 17900c16b537SWarner Losh 17910c16b537SWarner Losh /* check */ 17920c16b537SWarner Losh if (match < base) return ERROR(corruption_detected); 17930c16b537SWarner Losh if (sequence.offset > (size_t)base) return ERROR(corruption_detected); 17940c16b537SWarner Losh 17950c16b537SWarner Losh /* save beginning of literal sequence, in case of write overlap */ 17960c16b537SWarner Losh if (overlapRisk) 17970c16b537SWarner Losh { 17980c16b537SWarner Losh if ((endMatch + qutt) > oend) qutt = oend-endMatch; 17990c16b537SWarner Losh memcpy(saved, endMatch, qutt); 18000c16b537SWarner Losh } 18010c16b537SWarner Losh 18020c16b537SWarner Losh if (sequence.offset < 8) 18030c16b537SWarner Losh { 18040c16b537SWarner Losh const int dec64 = dec64table[sequence.offset]; 18050c16b537SWarner Losh op[0] = match[0]; 18060c16b537SWarner Losh op[1] = match[1]; 18070c16b537SWarner Losh op[2] = match[2]; 18080c16b537SWarner Losh op[3] = match[3]; 18090c16b537SWarner Losh match += dec32table[sequence.offset]; 18100c16b537SWarner Losh ZSTD_copy4(op+4, match); 18110c16b537SWarner Losh match -= dec64; 18120c16b537SWarner Losh } else { ZSTD_copy8(op, match); } 18130c16b537SWarner Losh op += 8; match += 8; 18140c16b537SWarner Losh 18150c16b537SWarner Losh if (endMatch > oend-(16-MINMATCH)) 18160c16b537SWarner Losh { 18170c16b537SWarner Losh if (op < oend-8) 18180c16b537SWarner Losh { 18190c16b537SWarner Losh ZSTD_wildcopy(op, match, (oend-8) - op); 18200c16b537SWarner Losh match += (oend-8) - op; 18210c16b537SWarner Losh op = oend-8; 18220c16b537SWarner Losh } 18230c16b537SWarner Losh while (op<endMatch) *op++ = *match++; 18240c16b537SWarner Losh } 18250c16b537SWarner Losh else 18260c16b537SWarner Losh ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ 18270c16b537SWarner Losh 18280c16b537SWarner Losh /* restore, in case of overlap */ 18290c16b537SWarner Losh if (overlapRisk) memcpy(endMatch, saved, qutt); 18300c16b537SWarner Losh } 18310c16b537SWarner Losh 18320c16b537SWarner Losh return endMatch-ostart; 18330c16b537SWarner Losh } 18340c16b537SWarner Losh 18350c16b537SWarner Losh typedef struct ZSTDv01_Dctx_s 18360c16b537SWarner Losh { 18370c16b537SWarner Losh U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; 18380c16b537SWarner Losh U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; 18390c16b537SWarner Losh U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; 18400c16b537SWarner Losh void* previousDstEnd; 18410c16b537SWarner Losh void* base; 18420c16b537SWarner Losh size_t expected; 18430c16b537SWarner Losh blockType_t bType; 18440c16b537SWarner Losh U32 phase; 18450c16b537SWarner Losh } dctx_t; 18460c16b537SWarner Losh 18470c16b537SWarner Losh 18480c16b537SWarner Losh static size_t ZSTD_decompressSequences( 18490c16b537SWarner Losh void* ctx, 18500c16b537SWarner Losh void* dst, size_t maxDstSize, 18510c16b537SWarner Losh const void* seqStart, size_t seqSize, 18520c16b537SWarner Losh const BYTE* litStart, size_t litSize) 18530c16b537SWarner Losh { 18540c16b537SWarner Losh dctx_t* dctx = (dctx_t*)ctx; 18550c16b537SWarner Losh const BYTE* ip = (const BYTE*)seqStart; 18560c16b537SWarner Losh const BYTE* const iend = ip + seqSize; 18570c16b537SWarner Losh BYTE* const ostart = (BYTE* const)dst; 18580c16b537SWarner Losh BYTE* op = ostart; 18590c16b537SWarner Losh BYTE* const oend = ostart + maxDstSize; 18600c16b537SWarner Losh size_t errorCode, dumpsLength; 18610c16b537SWarner Losh const BYTE* litPtr = litStart; 18620c16b537SWarner Losh const BYTE* const litEnd = litStart + litSize; 18630c16b537SWarner Losh int nbSeq; 18640c16b537SWarner Losh const BYTE* dumps; 18650c16b537SWarner Losh U32* DTableLL = dctx->LLTable; 18660c16b537SWarner Losh U32* DTableML = dctx->MLTable; 18670c16b537SWarner Losh U32* DTableOffb = dctx->OffTable; 18680c16b537SWarner Losh BYTE* const base = (BYTE*) (dctx->base); 18690c16b537SWarner Losh 18700c16b537SWarner Losh /* Build Decoding Tables */ 18710c16b537SWarner Losh errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, 18720c16b537SWarner Losh DTableLL, DTableML, DTableOffb, 18730c16b537SWarner Losh ip, iend-ip); 18740c16b537SWarner Losh if (ZSTDv01_isError(errorCode)) return errorCode; 18750c16b537SWarner Losh ip += errorCode; 18760c16b537SWarner Losh 18770c16b537SWarner Losh /* Regen sequences */ 18780c16b537SWarner Losh { 18790c16b537SWarner Losh seq_t sequence; 18800c16b537SWarner Losh seqState_t seqState; 18810c16b537SWarner Losh 18820c16b537SWarner Losh memset(&sequence, 0, sizeof(sequence)); 18830c16b537SWarner Losh seqState.dumps = dumps; 18840c16b537SWarner Losh seqState.dumpsEnd = dumps + dumpsLength; 18850c16b537SWarner Losh seqState.prevOffset = 1; 18860c16b537SWarner Losh errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip); 18870c16b537SWarner Losh if (FSE_isError(errorCode)) return ERROR(corruption_detected); 18880c16b537SWarner Losh FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); 18890c16b537SWarner Losh FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); 18900c16b537SWarner Losh FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); 18910c16b537SWarner Losh 18920c16b537SWarner Losh for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; ) 18930c16b537SWarner Losh { 18940c16b537SWarner Losh size_t oneSeqSize; 18950c16b537SWarner Losh nbSeq--; 18960c16b537SWarner Losh ZSTD_decodeSequence(&sequence, &seqState); 18970c16b537SWarner Losh oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend); 18980c16b537SWarner Losh if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize; 18990c16b537SWarner Losh op += oneSeqSize; 19000c16b537SWarner Losh } 19010c16b537SWarner Losh 19020c16b537SWarner Losh /* check if reached exact end */ 19030c16b537SWarner Losh if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* requested too much : data is corrupted */ 19040c16b537SWarner Losh if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */ 19050c16b537SWarner Losh 19060c16b537SWarner Losh /* last literal segment */ 19070c16b537SWarner Losh { 19080c16b537SWarner Losh size_t lastLLSize = litEnd - litPtr; 19090c16b537SWarner Losh if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); 19100c16b537SWarner Losh if (op != litPtr) memmove(op, litPtr, lastLLSize); 19110c16b537SWarner Losh op += lastLLSize; 19120c16b537SWarner Losh } 19130c16b537SWarner Losh } 19140c16b537SWarner Losh 19150c16b537SWarner Losh return op-ostart; 19160c16b537SWarner Losh } 19170c16b537SWarner Losh 19180c16b537SWarner Losh 19190c16b537SWarner Losh static size_t ZSTD_decompressBlock( 19200c16b537SWarner Losh void* ctx, 19210c16b537SWarner Losh void* dst, size_t maxDstSize, 19220c16b537SWarner Losh const void* src, size_t srcSize) 19230c16b537SWarner Losh { 19240c16b537SWarner Losh /* blockType == blockCompressed, srcSize is trusted */ 19250c16b537SWarner Losh const BYTE* ip = (const BYTE*)src; 19260c16b537SWarner Losh const BYTE* litPtr = NULL; 19270c16b537SWarner Losh size_t litSize = 0; 19280c16b537SWarner Losh size_t errorCode; 19290c16b537SWarner Losh 19300c16b537SWarner Losh /* Decode literals sub-block */ 19310c16b537SWarner Losh errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize); 19320c16b537SWarner Losh if (ZSTDv01_isError(errorCode)) return errorCode; 19330c16b537SWarner Losh ip += errorCode; 19340c16b537SWarner Losh srcSize -= errorCode; 19350c16b537SWarner Losh 19360c16b537SWarner Losh return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize); 19370c16b537SWarner Losh } 19380c16b537SWarner Losh 19390c16b537SWarner Losh 19400c16b537SWarner Losh size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 19410c16b537SWarner Losh { 19420c16b537SWarner Losh const BYTE* ip = (const BYTE*)src; 19430c16b537SWarner Losh const BYTE* iend = ip + srcSize; 19440c16b537SWarner Losh BYTE* const ostart = (BYTE* const)dst; 19450c16b537SWarner Losh BYTE* op = ostart; 19460c16b537SWarner Losh BYTE* const oend = ostart + maxDstSize; 19470c16b537SWarner Losh size_t remainingSize = srcSize; 19480c16b537SWarner Losh U32 magicNumber; 19490c16b537SWarner Losh size_t errorCode=0; 19500c16b537SWarner Losh blockProperties_t blockProperties; 19510c16b537SWarner Losh 19520c16b537SWarner Losh /* Frame Header */ 19530c16b537SWarner Losh if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 19540c16b537SWarner Losh magicNumber = ZSTD_readBE32(src); 19550c16b537SWarner Losh if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); 19560c16b537SWarner Losh ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; 19570c16b537SWarner Losh 19580c16b537SWarner Losh /* Loop on each block */ 19590c16b537SWarner Losh while (1) 19600c16b537SWarner Losh { 19610c16b537SWarner Losh size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties); 19620c16b537SWarner Losh if (ZSTDv01_isError(blockSize)) return blockSize; 19630c16b537SWarner Losh 19640c16b537SWarner Losh ip += ZSTD_blockHeaderSize; 19650c16b537SWarner Losh remainingSize -= ZSTD_blockHeaderSize; 19660c16b537SWarner Losh if (blockSize > remainingSize) return ERROR(srcSize_wrong); 19670c16b537SWarner Losh 19680c16b537SWarner Losh switch(blockProperties.blockType) 19690c16b537SWarner Losh { 19700c16b537SWarner Losh case bt_compressed: 19710c16b537SWarner Losh errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize); 19720c16b537SWarner Losh break; 19730c16b537SWarner Losh case bt_raw : 19740c16b537SWarner Losh errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize); 19750c16b537SWarner Losh break; 19760c16b537SWarner Losh case bt_rle : 19770c16b537SWarner Losh return ERROR(GENERIC); /* not yet supported */ 19780c16b537SWarner Losh break; 19790c16b537SWarner Losh case bt_end : 19800c16b537SWarner Losh /* end of frame */ 19810c16b537SWarner Losh if (remainingSize) return ERROR(srcSize_wrong); 19820c16b537SWarner Losh break; 19830c16b537SWarner Losh default: 19840c16b537SWarner Losh return ERROR(GENERIC); 19850c16b537SWarner Losh } 19860c16b537SWarner Losh if (blockSize == 0) break; /* bt_end */ 19870c16b537SWarner Losh 19880c16b537SWarner Losh if (ZSTDv01_isError(errorCode)) return errorCode; 19890c16b537SWarner Losh op += errorCode; 19900c16b537SWarner Losh ip += blockSize; 19910c16b537SWarner Losh remainingSize -= blockSize; 19920c16b537SWarner Losh } 19930c16b537SWarner Losh 19940c16b537SWarner Losh return op-ostart; 19950c16b537SWarner Losh } 19960c16b537SWarner Losh 19970c16b537SWarner Losh size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 19980c16b537SWarner Losh { 19990c16b537SWarner Losh dctx_t ctx; 20000c16b537SWarner Losh ctx.base = dst; 20010c16b537SWarner Losh return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); 20020c16b537SWarner Losh } 20030c16b537SWarner Losh 2004*2b9c00cbSConrad Meyer /* ZSTD_errorFrameSizeInfoLegacy() : 2005*2b9c00cbSConrad Meyer assumes `cSize` and `dBound` are _not_ NULL */ 2006*2b9c00cbSConrad Meyer static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret) 2007*2b9c00cbSConrad Meyer { 2008*2b9c00cbSConrad Meyer *cSize = ret; 2009*2b9c00cbSConrad Meyer *dBound = ZSTD_CONTENTSIZE_ERROR; 2010*2b9c00cbSConrad Meyer } 2011*2b9c00cbSConrad Meyer 2012*2b9c00cbSConrad Meyer void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound) 20130c16b537SWarner Losh { 20140c16b537SWarner Losh const BYTE* ip = (const BYTE*)src; 20150c16b537SWarner Losh size_t remainingSize = srcSize; 2016*2b9c00cbSConrad Meyer size_t nbBlocks = 0; 20170c16b537SWarner Losh U32 magicNumber; 20180c16b537SWarner Losh blockProperties_t blockProperties; 20190c16b537SWarner Losh 20200c16b537SWarner Losh /* Frame Header */ 2021*2b9c00cbSConrad Meyer if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) { 2022*2b9c00cbSConrad Meyer ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); 2023*2b9c00cbSConrad Meyer return; 2024*2b9c00cbSConrad Meyer } 20250c16b537SWarner Losh magicNumber = ZSTD_readBE32(src); 2026*2b9c00cbSConrad Meyer if (magicNumber != ZSTD_magicNumber) { 2027*2b9c00cbSConrad Meyer ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown)); 2028*2b9c00cbSConrad Meyer return; 2029*2b9c00cbSConrad Meyer } 20300c16b537SWarner Losh ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; 20310c16b537SWarner Losh 20320c16b537SWarner Losh /* Loop on each block */ 20330c16b537SWarner Losh while (1) 20340c16b537SWarner Losh { 20350c16b537SWarner Losh size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties); 2036*2b9c00cbSConrad Meyer if (ZSTDv01_isError(blockSize)) { 2037*2b9c00cbSConrad Meyer ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize); 2038*2b9c00cbSConrad Meyer return; 2039*2b9c00cbSConrad Meyer } 20400c16b537SWarner Losh 20410c16b537SWarner Losh ip += ZSTD_blockHeaderSize; 20420c16b537SWarner Losh remainingSize -= ZSTD_blockHeaderSize; 2043*2b9c00cbSConrad Meyer if (blockSize > remainingSize) { 2044*2b9c00cbSConrad Meyer ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); 2045*2b9c00cbSConrad Meyer return; 2046*2b9c00cbSConrad Meyer } 20470c16b537SWarner Losh 20480c16b537SWarner Losh if (blockSize == 0) break; /* bt_end */ 20490c16b537SWarner Losh 20500c16b537SWarner Losh ip += blockSize; 20510c16b537SWarner Losh remainingSize -= blockSize; 2052*2b9c00cbSConrad Meyer nbBlocks++; 20530c16b537SWarner Losh } 20540c16b537SWarner Losh 2055*2b9c00cbSConrad Meyer *cSize = ip - (const BYTE*)src; 2056*2b9c00cbSConrad Meyer *dBound = nbBlocks * BLOCKSIZE; 20570c16b537SWarner Losh } 20580c16b537SWarner Losh 20590c16b537SWarner Losh /******************************* 20600c16b537SWarner Losh * Streaming Decompression API 20610c16b537SWarner Losh *******************************/ 20620c16b537SWarner Losh 20630c16b537SWarner Losh size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx) 20640c16b537SWarner Losh { 20650c16b537SWarner Losh dctx->expected = ZSTD_frameHeaderSize; 20660c16b537SWarner Losh dctx->phase = 0; 20670c16b537SWarner Losh dctx->previousDstEnd = NULL; 20680c16b537SWarner Losh dctx->base = NULL; 20690c16b537SWarner Losh return 0; 20700c16b537SWarner Losh } 20710c16b537SWarner Losh 20720c16b537SWarner Losh ZSTDv01_Dctx* ZSTDv01_createDCtx(void) 20730c16b537SWarner Losh { 20740c16b537SWarner Losh ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx)); 20750c16b537SWarner Losh if (dctx==NULL) return NULL; 20760c16b537SWarner Losh ZSTDv01_resetDCtx(dctx); 20770c16b537SWarner Losh return dctx; 20780c16b537SWarner Losh } 20790c16b537SWarner Losh 20800c16b537SWarner Losh size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx) 20810c16b537SWarner Losh { 20820c16b537SWarner Losh free(dctx); 20830c16b537SWarner Losh return 0; 20840c16b537SWarner Losh } 20850c16b537SWarner Losh 20860c16b537SWarner Losh size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx) 20870c16b537SWarner Losh { 20880c16b537SWarner Losh return ((dctx_t*)dctx)->expected; 20890c16b537SWarner Losh } 20900c16b537SWarner Losh 20910c16b537SWarner Losh size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 20920c16b537SWarner Losh { 20930c16b537SWarner Losh dctx_t* ctx = (dctx_t*)dctx; 20940c16b537SWarner Losh 20950c16b537SWarner Losh /* Sanity check */ 20960c16b537SWarner Losh if (srcSize != ctx->expected) return ERROR(srcSize_wrong); 20970c16b537SWarner Losh if (dst != ctx->previousDstEnd) /* not contiguous */ 20980c16b537SWarner Losh ctx->base = dst; 20990c16b537SWarner Losh 21000c16b537SWarner Losh /* Decompress : frame header */ 21010c16b537SWarner Losh if (ctx->phase == 0) 21020c16b537SWarner Losh { 21030c16b537SWarner Losh /* Check frame magic header */ 21040c16b537SWarner Losh U32 magicNumber = ZSTD_readBE32(src); 21050c16b537SWarner Losh if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); 21060c16b537SWarner Losh ctx->phase = 1; 21070c16b537SWarner Losh ctx->expected = ZSTD_blockHeaderSize; 21080c16b537SWarner Losh return 0; 21090c16b537SWarner Losh } 21100c16b537SWarner Losh 21110c16b537SWarner Losh /* Decompress : block header */ 21120c16b537SWarner Losh if (ctx->phase == 1) 21130c16b537SWarner Losh { 21140c16b537SWarner Losh blockProperties_t bp; 21150c16b537SWarner Losh size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); 21160c16b537SWarner Losh if (ZSTDv01_isError(blockSize)) return blockSize; 21170c16b537SWarner Losh if (bp.blockType == bt_end) 21180c16b537SWarner Losh { 21190c16b537SWarner Losh ctx->expected = 0; 21200c16b537SWarner Losh ctx->phase = 0; 21210c16b537SWarner Losh } 21220c16b537SWarner Losh else 21230c16b537SWarner Losh { 21240c16b537SWarner Losh ctx->expected = blockSize; 21250c16b537SWarner Losh ctx->bType = bp.blockType; 21260c16b537SWarner Losh ctx->phase = 2; 21270c16b537SWarner Losh } 21280c16b537SWarner Losh 21290c16b537SWarner Losh return 0; 21300c16b537SWarner Losh } 21310c16b537SWarner Losh 21320c16b537SWarner Losh /* Decompress : block content */ 21330c16b537SWarner Losh { 21340c16b537SWarner Losh size_t rSize; 21350c16b537SWarner Losh switch(ctx->bType) 21360c16b537SWarner Losh { 21370c16b537SWarner Losh case bt_compressed: 21380c16b537SWarner Losh rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); 21390c16b537SWarner Losh break; 21400c16b537SWarner Losh case bt_raw : 21410c16b537SWarner Losh rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize); 21420c16b537SWarner Losh break; 21430c16b537SWarner Losh case bt_rle : 21440c16b537SWarner Losh return ERROR(GENERIC); /* not yet handled */ 21450c16b537SWarner Losh break; 21460c16b537SWarner Losh case bt_end : /* should never happen (filtered at phase 1) */ 21470c16b537SWarner Losh rSize = 0; 21480c16b537SWarner Losh break; 21490c16b537SWarner Losh default: 21500c16b537SWarner Losh return ERROR(GENERIC); 21510c16b537SWarner Losh } 21520c16b537SWarner Losh ctx->phase = 1; 21530c16b537SWarner Losh ctx->expected = ZSTD_blockHeaderSize; 21540c16b537SWarner Losh ctx->previousDstEnd = (void*)( ((char*)dst) + rSize); 21550c16b537SWarner Losh return rSize; 21560c16b537SWarner Losh } 21570c16b537SWarner Losh 21580c16b537SWarner Losh } 2159