1 /* 2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 12 /****************************************** 13 * Includes 14 ******************************************/ 15 #include <stddef.h> /* size_t, ptrdiff_t */ 16 #include "zstd_v01.h" 17 #include "error_private.h" 18 19 20 /****************************************** 21 * Static allocation 22 ******************************************/ 23 /* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ 24 #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog)) 25 26 /* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */ 27 #define HUF_DTABLE_SIZE_U16(maxTableLog) (1 + (1<<maxTableLog)) 28 #define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \ 29 unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog } 30 31 32 /****************************************** 33 * Error Management 34 ******************************************/ 35 #define FSE_LIST_ERRORS(ITEM) \ 36 ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \ 37 ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \ 38 ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\ 39 ITEM(FSE_ERROR_corruptionDetected) \ 40 ITEM(FSE_ERROR_maxCode) 41 42 #define FSE_GENERATE_ENUM(ENUM) ENUM, 43 typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ 44 45 46 /****************************************** 47 * FSE symbol compression API 48 ******************************************/ 49 /* 50 This API consists of small unitary functions, which highly benefit from being inlined. 51 You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary. 52 Visual seems to do it automatically. 53 For gcc or clang, you'll need to add -flto flag at compilation and linking stages. 54 If none of these solutions is applicable, include "fse.c" directly. 55 */ 56 57 typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */ 58 typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ 59 60 typedef struct 61 { 62 size_t bitContainer; 63 int bitPos; 64 char* startPtr; 65 char* ptr; 66 char* endPtr; 67 } FSE_CStream_t; 68 69 typedef struct 70 { 71 ptrdiff_t value; 72 const void* stateTable; 73 const void* symbolTT; 74 unsigned stateLog; 75 } FSE_CState_t; 76 77 typedef struct 78 { 79 size_t bitContainer; 80 unsigned bitsConsumed; 81 const char* ptr; 82 const char* start; 83 } FSE_DStream_t; 84 85 typedef struct 86 { 87 size_t state; 88 const void* table; /* precise table may vary, depending on U16 */ 89 } FSE_DState_t; 90 91 typedef enum { FSE_DStream_unfinished = 0, 92 FSE_DStream_endOfBuffer = 1, 93 FSE_DStream_completed = 2, 94 FSE_DStream_tooFar = 3 } FSE_DStream_status; /* result of FSE_reloadDStream() */ 95 /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */ 96 97 98 /**************************************************************** 99 * Tuning parameters 100 ****************************************************************/ 101 /* MEMORY_USAGE : 102 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 103 * Increasing memory usage improves compression ratio 104 * Reduced memory usage can improve speed, due to cache effect 105 * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ 106 #define FSE_MAX_MEMORY_USAGE 14 107 #define FSE_DEFAULT_MEMORY_USAGE 13 108 109 /* FSE_MAX_SYMBOL_VALUE : 110 * Maximum symbol value authorized. 111 * Required for proper stack allocation */ 112 #define FSE_MAX_SYMBOL_VALUE 255 113 114 115 /**************************************************************** 116 * template functions type & suffix 117 ****************************************************************/ 118 #define FSE_FUNCTION_TYPE BYTE 119 #define FSE_FUNCTION_EXTENSION 120 121 122 /**************************************************************** 123 * Byte symbol type 124 ****************************************************************/ 125 typedef struct 126 { 127 unsigned short newState; 128 unsigned char symbol; 129 unsigned char nbBits; 130 } FSE_decode_t; /* size == U32 */ 131 132 133 134 /**************************************************************** 135 * Compiler specifics 136 ****************************************************************/ 137 #ifdef _MSC_VER /* Visual Studio */ 138 # define FORCE_INLINE static __forceinline 139 # include <intrin.h> /* For Visual 2005 */ 140 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 141 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ 142 #else 143 # define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 144 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ 145 # ifdef __GNUC__ 146 # define FORCE_INLINE static inline __attribute__((always_inline)) 147 # else 148 # define FORCE_INLINE static inline 149 # endif 150 # else 151 # define FORCE_INLINE static 152 # endif /* __STDC_VERSION__ */ 153 #endif 154 155 156 /**************************************************************** 157 * Includes 158 ****************************************************************/ 159 #include <stdlib.h> /* malloc, free, qsort */ 160 #include <string.h> /* memcpy, memset */ 161 #include <stdio.h> /* printf (debug) */ 162 163 164 #ifndef MEM_ACCESS_MODULE 165 #define MEM_ACCESS_MODULE 166 /**************************************************************** 167 * Basic Types 168 *****************************************************************/ 169 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ 170 # include <stdint.h> 171 typedef uint8_t BYTE; 172 typedef uint16_t U16; 173 typedef int16_t S16; 174 typedef uint32_t U32; 175 typedef int32_t S32; 176 typedef uint64_t U64; 177 typedef int64_t S64; 178 #else 179 typedef unsigned char BYTE; 180 typedef unsigned short U16; 181 typedef signed short S16; 182 typedef unsigned int U32; 183 typedef signed int S32; 184 typedef unsigned long long U64; 185 typedef signed long long S64; 186 #endif 187 188 #endif /* MEM_ACCESS_MODULE */ 189 190 /**************************************************************** 191 * Memory I/O 192 *****************************************************************/ 193 /* FSE_FORCE_MEMORY_ACCESS 194 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. 195 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. 196 * The below switch allow to select different access method for improved performance. 197 * Method 0 (default) : use `memcpy()`. Safe and portable. 198 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). 199 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. 200 * Method 2 : direct access. This method is portable but violate C standard. 201 * It can generate buggy code on targets generating assembly depending on alignment. 202 * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) 203 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. 204 * Prefer these methods in priority order (0 > 1 > 2) 205 */ 206 #ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ 207 # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) 208 # define FSE_FORCE_MEMORY_ACCESS 2 209 # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ 210 (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) 211 # define FSE_FORCE_MEMORY_ACCESS 1 212 # endif 213 #endif 214 215 216 static unsigned FSE_32bits(void) 217 { 218 return sizeof(void*)==4; 219 } 220 221 static unsigned FSE_isLittleEndian(void) 222 { 223 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ 224 return one.c[0]; 225 } 226 227 #if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2) 228 229 static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; } 230 static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; } 231 static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; } 232 233 #elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1) 234 235 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ 236 /* currently only defined for gcc and icc */ 237 typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; 238 239 static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } 240 static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } 241 static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } 242 243 #else 244 245 static U16 FSE_read16(const void* memPtr) 246 { 247 U16 val; memcpy(&val, memPtr, sizeof(val)); return val; 248 } 249 250 static U32 FSE_read32(const void* memPtr) 251 { 252 U32 val; memcpy(&val, memPtr, sizeof(val)); return val; 253 } 254 255 static U64 FSE_read64(const void* memPtr) 256 { 257 U64 val; memcpy(&val, memPtr, sizeof(val)); return val; 258 } 259 260 #endif // FSE_FORCE_MEMORY_ACCESS 261 262 static U16 FSE_readLE16(const void* memPtr) 263 { 264 if (FSE_isLittleEndian()) 265 return FSE_read16(memPtr); 266 else 267 { 268 const BYTE* p = (const BYTE*)memPtr; 269 return (U16)(p[0] + (p[1]<<8)); 270 } 271 } 272 273 static U32 FSE_readLE32(const void* memPtr) 274 { 275 if (FSE_isLittleEndian()) 276 return FSE_read32(memPtr); 277 else 278 { 279 const BYTE* p = (const BYTE*)memPtr; 280 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); 281 } 282 } 283 284 285 static U64 FSE_readLE64(const void* memPtr) 286 { 287 if (FSE_isLittleEndian()) 288 return FSE_read64(memPtr); 289 else 290 { 291 const BYTE* p = (const BYTE*)memPtr; 292 return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) 293 + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); 294 } 295 } 296 297 static size_t FSE_readLEST(const void* memPtr) 298 { 299 if (FSE_32bits()) 300 return (size_t)FSE_readLE32(memPtr); 301 else 302 return (size_t)FSE_readLE64(memPtr); 303 } 304 305 306 307 /**************************************************************** 308 * Constants 309 *****************************************************************/ 310 #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) 311 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG) 312 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1) 313 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2) 314 #define FSE_MIN_TABLELOG 5 315 316 #define FSE_TABLELOG_ABSOLUTE_MAX 15 317 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX 318 #error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" 319 #endif 320 321 322 /**************************************************************** 323 * Error Management 324 ****************************************************************/ 325 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ 326 327 328 /**************************************************************** 329 * Complex types 330 ****************************************************************/ 331 typedef struct 332 { 333 int deltaFindState; 334 U32 deltaNbBits; 335 } FSE_symbolCompressionTransform; /* total 8 bytes */ 336 337 typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; 338 339 /**************************************************************** 340 * Internal functions 341 ****************************************************************/ 342 FORCE_INLINE unsigned FSE_highbit32 (U32 val) 343 { 344 # if defined(_MSC_VER) /* Visual */ 345 unsigned long r; 346 _BitScanReverse ( &r, val ); 347 return (unsigned) r; 348 # elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */ 349 return 31 - __builtin_clz (val); 350 # else /* Software version */ 351 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 352 U32 v = val; 353 unsigned r; 354 v |= v >> 1; 355 v |= v >> 2; 356 v |= v >> 4; 357 v |= v >> 8; 358 v |= v >> 16; 359 r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; 360 return r; 361 # endif 362 } 363 364 365 /**************************************************************** 366 * Templates 367 ****************************************************************/ 368 /* 369 designed to be included 370 for type-specific functions (template emulation in C) 371 Objective is to write these functions only once, for improved maintenance 372 */ 373 374 /* safety checks */ 375 #ifndef FSE_FUNCTION_EXTENSION 376 # error "FSE_FUNCTION_EXTENSION must be defined" 377 #endif 378 #ifndef FSE_FUNCTION_TYPE 379 # error "FSE_FUNCTION_TYPE must be defined" 380 #endif 381 382 /* Function names */ 383 #define FSE_CAT(X,Y) X##Y 384 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) 385 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) 386 387 388 389 static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } 390 391 #define FSE_DECODE_TYPE FSE_decode_t 392 393 394 typedef struct { 395 U16 tableLog; 396 U16 fastMode; 397 } FSE_DTableHeader; /* sizeof U32 */ 398 399 static size_t FSE_buildDTable 400 (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 401 { 402 void* ptr = dt; 403 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 404 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1; /* because dt is unsigned, 32-bits aligned on 32-bits */ 405 const U32 tableSize = 1 << tableLog; 406 const U32 tableMask = tableSize-1; 407 const U32 step = FSE_tableStep(tableSize); 408 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; 409 U32 position = 0; 410 U32 highThreshold = tableSize-1; 411 const S16 largeLimit= (S16)(1 << (tableLog-1)); 412 U32 noLarge = 1; 413 U32 s; 414 415 /* Sanity Checks */ 416 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge; 417 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge; 418 419 /* Init, lay down lowprob symbols */ 420 DTableH[0].tableLog = (U16)tableLog; 421 for (s=0; s<=maxSymbolValue; s++) 422 { 423 if (normalizedCounter[s]==-1) 424 { 425 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; 426 symbolNext[s] = 1; 427 } 428 else 429 { 430 if (normalizedCounter[s] >= largeLimit) noLarge=0; 431 symbolNext[s] = normalizedCounter[s]; 432 } 433 } 434 435 /* Spread symbols */ 436 for (s=0; s<=maxSymbolValue; s++) 437 { 438 int i; 439 for (i=0; i<normalizedCounter[s]; i++) 440 { 441 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; 442 position = (position + step) & tableMask; 443 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */ 444 } 445 } 446 447 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */ 448 449 /* Build Decoding table */ 450 { 451 U32 i; 452 for (i=0; i<tableSize; i++) 453 { 454 FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol); 455 U16 nextState = symbolNext[symbol]++; 456 tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) ); 457 tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize); 458 } 459 } 460 461 DTableH->fastMode = (U16)noLarge; 462 return 0; 463 } 464 465 466 /****************************************** 467 * FSE byte symbol 468 ******************************************/ 469 #ifndef FSE_COMMONDEFS_ONLY 470 471 static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); } 472 473 static short FSE_abs(short a) 474 { 475 return a<0? -a : a; 476 } 477 478 479 /**************************************************************** 480 * Header bitstream management 481 ****************************************************************/ 482 static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, 483 const void* headerBuffer, size_t hbSize) 484 { 485 const BYTE* const istart = (const BYTE*) headerBuffer; 486 const BYTE* const iend = istart + hbSize; 487 const BYTE* ip = istart; 488 int nbBits; 489 int remaining; 490 int threshold; 491 U32 bitStream; 492 int bitCount; 493 unsigned charnum = 0; 494 int previous0 = 0; 495 496 if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong; 497 bitStream = FSE_readLE32(ip); 498 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ 499 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge; 500 bitStream >>= 4; 501 bitCount = 4; 502 *tableLogPtr = nbBits; 503 remaining = (1<<nbBits)+1; 504 threshold = 1<<nbBits; 505 nbBits++; 506 507 while ((remaining>1) && (charnum<=*maxSVPtr)) 508 { 509 if (previous0) 510 { 511 unsigned n0 = charnum; 512 while ((bitStream & 0xFFFF) == 0xFFFF) 513 { 514 n0+=24; 515 if (ip < iend-5) 516 { 517 ip+=2; 518 bitStream = FSE_readLE32(ip) >> bitCount; 519 } 520 else 521 { 522 bitStream >>= 16; 523 bitCount+=16; 524 } 525 } 526 while ((bitStream & 3) == 3) 527 { 528 n0+=3; 529 bitStream>>=2; 530 bitCount+=2; 531 } 532 n0 += bitStream & 3; 533 bitCount += 2; 534 if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall; 535 while (charnum < n0) normalizedCounter[charnum++] = 0; 536 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 537 { 538 ip += bitCount>>3; 539 bitCount &= 7; 540 bitStream = FSE_readLE32(ip) >> bitCount; 541 } 542 else 543 bitStream >>= 2; 544 } 545 { 546 const short max = (short)((2*threshold-1)-remaining); 547 short count; 548 549 if ((bitStream & (threshold-1)) < (U32)max) 550 { 551 count = (short)(bitStream & (threshold-1)); 552 bitCount += nbBits-1; 553 } 554 else 555 { 556 count = (short)(bitStream & (2*threshold-1)); 557 if (count >= threshold) count -= max; 558 bitCount += nbBits; 559 } 560 561 count--; /* extra accuracy */ 562 remaining -= FSE_abs(count); 563 normalizedCounter[charnum++] = count; 564 previous0 = !count; 565 while (remaining < threshold) 566 { 567 nbBits--; 568 threshold >>= 1; 569 } 570 571 { 572 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 573 { 574 ip += bitCount>>3; 575 bitCount &= 7; 576 } 577 else 578 { 579 bitCount -= (int)(8 * (iend - 4 - ip)); 580 ip = iend - 4; 581 } 582 bitStream = FSE_readLE32(ip) >> (bitCount & 31); 583 } 584 } 585 } 586 if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC; 587 *maxSVPtr = charnum-1; 588 589 ip += (bitCount+7)>>3; 590 if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; 591 return ip-istart; 592 } 593 594 595 /********************************************************* 596 * Decompression (Byte symbols) 597 *********************************************************/ 598 static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) 599 { 600 void* ptr = dt; 601 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 602 FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ 603 604 DTableH->tableLog = 0; 605 DTableH->fastMode = 0; 606 607 cell->newState = 0; 608 cell->symbol = symbolValue; 609 cell->nbBits = 0; 610 611 return 0; 612 } 613 614 615 static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) 616 { 617 void* ptr = dt; 618 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 619 FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ 620 const unsigned tableSize = 1 << nbBits; 621 const unsigned tableMask = tableSize - 1; 622 const unsigned maxSymbolValue = tableMask; 623 unsigned s; 624 625 /* Sanity checks */ 626 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */ 627 628 /* Build Decoding Table */ 629 DTableH->tableLog = (U16)nbBits; 630 DTableH->fastMode = 1; 631 for (s=0; s<=maxSymbolValue; s++) 632 { 633 dinfo[s].newState = 0; 634 dinfo[s].symbol = (BYTE)s; 635 dinfo[s].nbBits = (BYTE)nbBits; 636 } 637 638 return 0; 639 } 640 641 642 /* FSE_initDStream 643 * Initialize a FSE_DStream_t. 644 * srcBuffer must point at the beginning of an FSE block. 645 * The function result is the size of the FSE_block (== srcSize). 646 * If srcSize is too small, the function will return an errorCode; 647 */ 648 static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize) 649 { 650 if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong; 651 652 if (srcSize >= sizeof(size_t)) 653 { 654 U32 contain32; 655 bitD->start = (const char*)srcBuffer; 656 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); 657 bitD->bitContainer = FSE_readLEST(bitD->ptr); 658 contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 659 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ 660 bitD->bitsConsumed = 8 - FSE_highbit32(contain32); 661 } 662 else 663 { 664 U32 contain32; 665 bitD->start = (const char*)srcBuffer; 666 bitD->ptr = bitD->start; 667 bitD->bitContainer = *(const BYTE*)(bitD->start); 668 switch(srcSize) 669 { 670 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); 671 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); 672 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); 673 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; 674 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; 675 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; 676 default:; 677 } 678 contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 679 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ 680 bitD->bitsConsumed = 8 - FSE_highbit32(contain32); 681 bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; 682 } 683 684 return srcSize; 685 } 686 687 688 /*!FSE_lookBits 689 * Provides next n bits from the bitContainer. 690 * bitContainer is not modified (bits are still present for next read/look) 691 * On 32-bits, maxNbBits==25 692 * On 64-bits, maxNbBits==57 693 * return : value extracted. 694 */ 695 static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits) 696 { 697 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 698 return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); 699 } 700 701 static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ 702 { 703 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 704 return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); 705 } 706 707 static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits) 708 { 709 bitD->bitsConsumed += nbBits; 710 } 711 712 713 /*!FSE_readBits 714 * Read next n bits from the bitContainer. 715 * On 32-bits, don't read more than maxNbBits==25 716 * On 64-bits, don't read more than maxNbBits==57 717 * Use the fast variant *only* if n >= 1. 718 * return : value extracted. 719 */ 720 static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits) 721 { 722 size_t value = FSE_lookBits(bitD, nbBits); 723 FSE_skipBits(bitD, nbBits); 724 return value; 725 } 726 727 static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ 728 { 729 size_t value = FSE_lookBitsFast(bitD, nbBits); 730 FSE_skipBits(bitD, nbBits); 731 return value; 732 } 733 734 static unsigned FSE_reloadDStream(FSE_DStream_t* bitD) 735 { 736 if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ 737 return FSE_DStream_tooFar; 738 739 if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) 740 { 741 bitD->ptr -= bitD->bitsConsumed >> 3; 742 bitD->bitsConsumed &= 7; 743 bitD->bitContainer = FSE_readLEST(bitD->ptr); 744 return FSE_DStream_unfinished; 745 } 746 if (bitD->ptr == bitD->start) 747 { 748 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer; 749 return FSE_DStream_completed; 750 } 751 { 752 U32 nbBytes = bitD->bitsConsumed >> 3; 753 U32 result = FSE_DStream_unfinished; 754 if (bitD->ptr - nbBytes < bitD->start) 755 { 756 nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ 757 result = FSE_DStream_endOfBuffer; 758 } 759 bitD->ptr -= nbBytes; 760 bitD->bitsConsumed -= nbBytes*8; 761 bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ 762 return result; 763 } 764 } 765 766 767 static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt) 768 { 769 const void* ptr = dt; 770 const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; 771 DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog); 772 FSE_reloadDStream(bitD); 773 DStatePtr->table = dt + 1; 774 } 775 776 static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) 777 { 778 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 779 const U32 nbBits = DInfo.nbBits; 780 BYTE symbol = DInfo.symbol; 781 size_t lowBits = FSE_readBits(bitD, nbBits); 782 783 DStatePtr->state = DInfo.newState + lowBits; 784 return symbol; 785 } 786 787 static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) 788 { 789 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 790 const U32 nbBits = DInfo.nbBits; 791 BYTE symbol = DInfo.symbol; 792 size_t lowBits = FSE_readBitsFast(bitD, nbBits); 793 794 DStatePtr->state = DInfo.newState + lowBits; 795 return symbol; 796 } 797 798 /* FSE_endOfDStream 799 Tells if bitD has reached end of bitStream or not */ 800 801 static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD) 802 { 803 return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8)); 804 } 805 806 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) 807 { 808 return DStatePtr->state == 0; 809 } 810 811 812 FORCE_INLINE size_t FSE_decompress_usingDTable_generic( 813 void* dst, size_t maxDstSize, 814 const void* cSrc, size_t cSrcSize, 815 const FSE_DTable* dt, const unsigned fast) 816 { 817 BYTE* const ostart = (BYTE*) dst; 818 BYTE* op = ostart; 819 BYTE* const omax = op + maxDstSize; 820 BYTE* const olimit = omax-3; 821 822 FSE_DStream_t bitD; 823 FSE_DState_t state1; 824 FSE_DState_t state2; 825 size_t errorCode; 826 827 /* Init */ 828 errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ 829 if (FSE_isError(errorCode)) return errorCode; 830 831 FSE_initDState(&state1, &bitD, dt); 832 FSE_initDState(&state2, &bitD, dt); 833 834 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) 835 836 /* 4 symbols per loop */ 837 for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4) 838 { 839 op[0] = FSE_GETSYMBOL(&state1); 840 841 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ 842 FSE_reloadDStream(&bitD); 843 844 op[1] = FSE_GETSYMBOL(&state2); 845 846 if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ 847 { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } } 848 849 op[2] = FSE_GETSYMBOL(&state1); 850 851 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ 852 FSE_reloadDStream(&bitD); 853 854 op[3] = FSE_GETSYMBOL(&state2); 855 } 856 857 /* tail */ 858 /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */ 859 while (1) 860 { 861 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) 862 break; 863 864 *op++ = FSE_GETSYMBOL(&state1); 865 866 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) 867 break; 868 869 *op++ = FSE_GETSYMBOL(&state2); 870 } 871 872 /* end ? */ 873 if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) 874 return op-ostart; 875 876 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ 877 878 return (size_t)-FSE_ERROR_corruptionDetected; 879 } 880 881 882 static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, 883 const void* cSrc, size_t cSrcSize, 884 const FSE_DTable* dt) 885 { 886 FSE_DTableHeader DTableH; 887 memcpy(&DTableH, dt, sizeof(DTableH)); /* memcpy() into local variable, to avoid strict aliasing warning */ 888 889 /* select fast mode (static) */ 890 if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); 891 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); 892 } 893 894 895 static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) 896 { 897 const BYTE* const istart = (const BYTE*)cSrc; 898 const BYTE* ip = istart; 899 short counting[FSE_MAX_SYMBOL_VALUE+1]; 900 DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ 901 unsigned tableLog; 902 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; 903 size_t errorCode; 904 905 if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ 906 907 /* normal FSE decoding mode */ 908 errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); 909 if (FSE_isError(errorCode)) return errorCode; 910 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ 911 ip += errorCode; 912 cSrcSize -= errorCode; 913 914 errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); 915 if (FSE_isError(errorCode)) return errorCode; 916 917 /* always return, even if it is an error code */ 918 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); 919 } 920 921 922 923 /* ******************************************************* 924 * Huff0 : Huffman block compression 925 *********************************************************/ 926 #define HUF_MAX_SYMBOL_VALUE 255 927 #define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */ 928 #define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */ 929 #define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ 930 #if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) 931 # error "HUF_MAX_TABLELOG is too large !" 932 #endif 933 934 typedef struct HUF_CElt_s { 935 U16 val; 936 BYTE nbBits; 937 } HUF_CElt ; 938 939 typedef struct nodeElt_s { 940 U32 count; 941 U16 parent; 942 BYTE byte; 943 BYTE nbBits; 944 } nodeElt; 945 946 947 /* ******************************************************* 948 * Huff0 : Huffman block decompression 949 *********************************************************/ 950 typedef struct { 951 BYTE byte; 952 BYTE nbBits; 953 } HUF_DElt; 954 955 static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize) 956 { 957 BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; 958 U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ 959 U32 weightTotal; 960 U32 maxBits; 961 const BYTE* ip = (const BYTE*) src; 962 size_t iSize; 963 size_t oSize; 964 U32 n; 965 U32 nextRankStart; 966 void* ptr = DTable+1; 967 HUF_DElt* const dt = (HUF_DElt*)ptr; 968 969 if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 970 iSize = ip[0]; 971 972 FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */ 973 //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */ 974 if (iSize >= 128) /* special header */ 975 { 976 if (iSize >= (242)) /* RLE */ 977 { 978 static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; 979 oSize = l[iSize-242]; 980 memset(huffWeight, 1, sizeof(huffWeight)); 981 iSize = 0; 982 } 983 else /* Incompressible */ 984 { 985 oSize = iSize - 127; 986 iSize = ((oSize+1)/2); 987 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 988 ip += 1; 989 for (n=0; n<oSize; n+=2) 990 { 991 huffWeight[n] = ip[n/2] >> 4; 992 huffWeight[n+1] = ip[n/2] & 15; 993 } 994 } 995 } 996 else /* header compressed with FSE (normal case) */ 997 { 998 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 999 oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */ 1000 if (FSE_isError(oSize)) return oSize; 1001 } 1002 1003 /* collect weight stats */ 1004 memset(rankVal, 0, sizeof(rankVal)); 1005 weightTotal = 0; 1006 for (n=0; n<oSize; n++) 1007 { 1008 if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected; 1009 rankVal[huffWeight[n]]++; 1010 weightTotal += (1 << huffWeight[n]) >> 1; 1011 } 1012 if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected; 1013 1014 /* get last non-null symbol weight (implied, total must be 2^n) */ 1015 maxBits = FSE_highbit32(weightTotal) + 1; 1016 if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */ 1017 DTable[0] = (U16)maxBits; 1018 { 1019 U32 total = 1 << maxBits; 1020 U32 rest = total - weightTotal; 1021 U32 verif = 1 << FSE_highbit32(rest); 1022 U32 lastWeight = FSE_highbit32(rest) + 1; 1023 if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */ 1024 huffWeight[oSize] = (BYTE)lastWeight; 1025 rankVal[lastWeight]++; 1026 } 1027 1028 /* check tree construction validity */ 1029 if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */ 1030 1031 /* Prepare ranks */ 1032 nextRankStart = 0; 1033 for (n=1; n<=maxBits; n++) 1034 { 1035 U32 current = nextRankStart; 1036 nextRankStart += (rankVal[n] << (n-1)); 1037 rankVal[n] = current; 1038 } 1039 1040 /* fill DTable */ 1041 for (n=0; n<=oSize; n++) 1042 { 1043 const U32 w = huffWeight[n]; 1044 const U32 length = (1 << w) >> 1; 1045 U32 i; 1046 HUF_DElt D; 1047 D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w); 1048 for (i = rankVal[w]; i < rankVal[w] + length; i++) 1049 dt[i] = D; 1050 rankVal[w] += length; 1051 } 1052 1053 return iSize+1; 1054 } 1055 1056 1057 static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog) 1058 { 1059 const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 1060 const BYTE c = dt[val].byte; 1061 FSE_skipBits(Dstream, dt[val].nbBits); 1062 return c; 1063 } 1064 1065 static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ 1066 void* dst, size_t maxDstSize, 1067 const void* cSrc, size_t cSrcSize, 1068 const U16* DTable) 1069 { 1070 BYTE* const ostart = (BYTE*) dst; 1071 BYTE* op = ostart; 1072 BYTE* const omax = op + maxDstSize; 1073 BYTE* const olimit = omax-15; 1074 1075 const void* ptr = DTable; 1076 const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; 1077 const U32 dtLog = DTable[0]; 1078 size_t errorCode; 1079 U32 reloadStatus; 1080 1081 /* Init */ 1082 1083 const U16* jumpTable = (const U16*)cSrc; 1084 const size_t length1 = FSE_readLE16(jumpTable); 1085 const size_t length2 = FSE_readLE16(jumpTable+1); 1086 const size_t length3 = FSE_readLE16(jumpTable+2); 1087 const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! 1088 const char* const start1 = (const char*)(cSrc) + 6; 1089 const char* const start2 = start1 + length1; 1090 const char* const start3 = start2 + length2; 1091 const char* const start4 = start3 + length3; 1092 FSE_DStream_t bitD1, bitD2, bitD3, bitD4; 1093 1094 if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 1095 1096 errorCode = FSE_initDStream(&bitD1, start1, length1); 1097 if (FSE_isError(errorCode)) return errorCode; 1098 errorCode = FSE_initDStream(&bitD2, start2, length2); 1099 if (FSE_isError(errorCode)) return errorCode; 1100 errorCode = FSE_initDStream(&bitD3, start3, length3); 1101 if (FSE_isError(errorCode)) return errorCode; 1102 errorCode = FSE_initDStream(&bitD4, start4, length4); 1103 if (FSE_isError(errorCode)) return errorCode; 1104 1105 reloadStatus=FSE_reloadDStream(&bitD2); 1106 1107 /* 16 symbols per loop */ 1108 for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */ 1109 op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1)) 1110 { 1111 #define HUF_DECODE_SYMBOL_0(n, Dstream) \ 1112 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); 1113 1114 #define HUF_DECODE_SYMBOL_1(n, Dstream) \ 1115 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ 1116 if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream) 1117 1118 #define HUF_DECODE_SYMBOL_2(n, Dstream) \ 1119 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ 1120 if (FSE_32bits()) FSE_reloadDStream(&Dstream) 1121 1122 HUF_DECODE_SYMBOL_1( 0, bitD1); 1123 HUF_DECODE_SYMBOL_1( 1, bitD2); 1124 HUF_DECODE_SYMBOL_1( 2, bitD3); 1125 HUF_DECODE_SYMBOL_1( 3, bitD4); 1126 HUF_DECODE_SYMBOL_2( 4, bitD1); 1127 HUF_DECODE_SYMBOL_2( 5, bitD2); 1128 HUF_DECODE_SYMBOL_2( 6, bitD3); 1129 HUF_DECODE_SYMBOL_2( 7, bitD4); 1130 HUF_DECODE_SYMBOL_1( 8, bitD1); 1131 HUF_DECODE_SYMBOL_1( 9, bitD2); 1132 HUF_DECODE_SYMBOL_1(10, bitD3); 1133 HUF_DECODE_SYMBOL_1(11, bitD4); 1134 HUF_DECODE_SYMBOL_0(12, bitD1); 1135 HUF_DECODE_SYMBOL_0(13, bitD2); 1136 HUF_DECODE_SYMBOL_0(14, bitD3); 1137 HUF_DECODE_SYMBOL_0(15, bitD4); 1138 } 1139 1140 if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ 1141 return (size_t)-FSE_ERROR_corruptionDetected; 1142 1143 /* tail */ 1144 { 1145 // bitTail = bitD1; // *much* slower : -20% !??! 1146 FSE_DStream_t bitTail; 1147 bitTail.ptr = bitD1.ptr; 1148 bitTail.bitsConsumed = bitD1.bitsConsumed; 1149 bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer 1150 bitTail.start = start1; 1151 for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++) 1152 { 1153 HUF_DECODE_SYMBOL_0(0, bitTail); 1154 } 1155 1156 if (FSE_endOfDStream(&bitTail)) 1157 return op-ostart; 1158 } 1159 1160 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ 1161 1162 return (size_t)-FSE_ERROR_corruptionDetected; 1163 } 1164 1165 1166 static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) 1167 { 1168 HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG); 1169 const BYTE* ip = (const BYTE*) cSrc; 1170 size_t errorCode; 1171 1172 errorCode = HUF_readDTable (DTable, cSrc, cSrcSize); 1173 if (FSE_isError(errorCode)) return errorCode; 1174 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 1175 ip += errorCode; 1176 cSrcSize -= errorCode; 1177 1178 return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable); 1179 } 1180 1181 1182 #endif /* FSE_COMMONDEFS_ONLY */ 1183 1184 /* 1185 zstd - standard compression library 1186 Copyright (C) 2014-2015, Yann Collet. 1187 1188 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 1189 1190 Redistribution and use in source and binary forms, with or without 1191 modification, are permitted provided that the following conditions are 1192 met: 1193 * Redistributions of source code must retain the above copyright 1194 notice, this list of conditions and the following disclaimer. 1195 * Redistributions in binary form must reproduce the above 1196 copyright notice, this list of conditions and the following disclaimer 1197 in the documentation and/or other materials provided with the 1198 distribution. 1199 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1200 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1201 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1202 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1203 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1204 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1205 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1206 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1207 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1208 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1209 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1210 1211 You can contact the author at : 1212 - zstd source repository : https://github.com/Cyan4973/zstd 1213 - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 1214 */ 1215 1216 /**************************************************************** 1217 * Tuning parameters 1218 *****************************************************************/ 1219 /* MEMORY_USAGE : 1220 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 1221 * Increasing memory usage improves compression ratio 1222 * Reduced memory usage can improve speed, due to cache effect */ 1223 #define ZSTD_MEMORY_USAGE 17 1224 1225 1226 /************************************** 1227 CPU Feature Detection 1228 **************************************/ 1229 /* 1230 * Automated efficient unaligned memory access detection 1231 * Based on known hardware architectures 1232 * This list will be updated thanks to feedbacks 1233 */ 1234 #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \ 1235 || defined(__ARM_FEATURE_UNALIGNED) \ 1236 || defined(__i386__) || defined(__x86_64__) \ 1237 || defined(_M_IX86) || defined(_M_X64) \ 1238 || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \ 1239 || (defined(_M_ARM) && (_M_ARM >= 7)) 1240 # define ZSTD_UNALIGNED_ACCESS 1 1241 #else 1242 # define ZSTD_UNALIGNED_ACCESS 0 1243 #endif 1244 1245 1246 /******************************************************** 1247 * Includes 1248 *********************************************************/ 1249 #include <stdlib.h> /* calloc */ 1250 #include <string.h> /* memcpy, memmove */ 1251 #include <stdio.h> /* debug : printf */ 1252 1253 1254 /******************************************************** 1255 * Compiler specifics 1256 *********************************************************/ 1257 #ifdef __AVX2__ 1258 # include <immintrin.h> /* AVX2 intrinsics */ 1259 #endif 1260 1261 #ifdef _MSC_VER /* Visual Studio */ 1262 # include <intrin.h> /* For Visual 2005 */ 1263 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 1264 # pragma warning(disable : 4324) /* disable: C4324: padded structure */ 1265 #endif 1266 1267 1268 #ifndef MEM_ACCESS_MODULE 1269 #define MEM_ACCESS_MODULE 1270 /******************************************************** 1271 * Basic Types 1272 *********************************************************/ 1273 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ 1274 # include <stdint.h> 1275 typedef uint8_t BYTE; 1276 typedef uint16_t U16; 1277 typedef int16_t S16; 1278 typedef uint32_t U32; 1279 typedef int32_t S32; 1280 typedef uint64_t U64; 1281 #else 1282 typedef unsigned char BYTE; 1283 typedef unsigned short U16; 1284 typedef signed short S16; 1285 typedef unsigned int U32; 1286 typedef signed int S32; 1287 typedef unsigned long long U64; 1288 #endif 1289 1290 #endif /* MEM_ACCESS_MODULE */ 1291 1292 1293 /******************************************************** 1294 * Constants 1295 *********************************************************/ 1296 static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */ 1297 1298 #define HASH_LOG (ZSTD_MEMORY_USAGE - 2) 1299 #define HASH_TABLESIZE (1 << HASH_LOG) 1300 #define HASH_MASK (HASH_TABLESIZE - 1) 1301 1302 #define KNUTH 2654435761 1303 1304 #define BIT7 128 1305 #define BIT6 64 1306 #define BIT5 32 1307 #define BIT4 16 1308 1309 #define KB *(1 <<10) 1310 #define MB *(1 <<20) 1311 #define GB *(1U<<30) 1312 1313 #define BLOCKSIZE (128 KB) /* define, for static allocation */ 1314 1315 #define WORKPLACESIZE (BLOCKSIZE*3) 1316 #define MINMATCH 4 1317 #define MLbits 7 1318 #define LLbits 6 1319 #define Offbits 5 1320 #define MaxML ((1<<MLbits )-1) 1321 #define MaxLL ((1<<LLbits )-1) 1322 #define MaxOff ((1<<Offbits)-1) 1323 #define LitFSELog 11 1324 #define MLFSELog 10 1325 #define LLFSELog 10 1326 #define OffFSELog 9 1327 #define MAX(a,b) ((a)<(b)?(b):(a)) 1328 #define MaxSeq MAX(MaxLL, MaxML) 1329 1330 #define LITERAL_NOENTROPY 63 1331 #define COMMAND_NOENTROPY 7 /* to remove */ 1332 1333 static const size_t ZSTD_blockHeaderSize = 3; 1334 static const size_t ZSTD_frameHeaderSize = 4; 1335 1336 1337 /******************************************************** 1338 * Memory operations 1339 *********************************************************/ 1340 static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; } 1341 1342 static unsigned ZSTD_isLittleEndian(void) 1343 { 1344 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ 1345 return one.c[0]; 1346 } 1347 1348 static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; } 1349 1350 static U32 ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; } 1351 1352 static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } 1353 1354 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } 1355 1356 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } 1357 1358 static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length) 1359 { 1360 const BYTE* ip = (const BYTE*)src; 1361 BYTE* op = (BYTE*)dst; 1362 BYTE* const oend = op + length; 1363 while (op < oend) COPY8(op, ip); 1364 } 1365 1366 static U16 ZSTD_readLE16(const void* memPtr) 1367 { 1368 if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr); 1369 else 1370 { 1371 const BYTE* p = (const BYTE*)memPtr; 1372 return (U16)((U16)p[0] + ((U16)p[1]<<8)); 1373 } 1374 } 1375 1376 1377 static U32 ZSTD_readLE32(const void* memPtr) 1378 { 1379 if (ZSTD_isLittleEndian()) 1380 return ZSTD_read32(memPtr); 1381 else 1382 { 1383 const BYTE* p = (const BYTE*)memPtr; 1384 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); 1385 } 1386 } 1387 1388 static U32 ZSTD_readBE32(const void* memPtr) 1389 { 1390 const BYTE* p = (const BYTE*)memPtr; 1391 return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0)); 1392 } 1393 1394 1395 /************************************** 1396 * Local structures 1397 ***************************************/ 1398 typedef struct ZSTD_Cctx_s ZSTD_Cctx; 1399 1400 typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; 1401 1402 typedef struct 1403 { 1404 blockType_t blockType; 1405 U32 origSize; 1406 } blockProperties_t; 1407 1408 typedef struct { 1409 void* buffer; 1410 U32* offsetStart; 1411 U32* offset; 1412 BYTE* offCodeStart; 1413 BYTE* offCode; 1414 BYTE* litStart; 1415 BYTE* lit; 1416 BYTE* litLengthStart; 1417 BYTE* litLength; 1418 BYTE* matchLengthStart; 1419 BYTE* matchLength; 1420 BYTE* dumpsStart; 1421 BYTE* dumps; 1422 } seqStore_t; 1423 1424 1425 typedef struct ZSTD_Cctx_s 1426 { 1427 const BYTE* base; 1428 U32 current; 1429 U32 nextUpdate; 1430 seqStore_t seqStore; 1431 #ifdef __AVX2__ 1432 __m256i hashTable[HASH_TABLESIZE>>3]; 1433 #else 1434 U32 hashTable[HASH_TABLESIZE]; 1435 #endif 1436 BYTE buffer[WORKPLACESIZE]; 1437 } cctxi_t; 1438 1439 1440 1441 1442 /************************************** 1443 * Error Management 1444 **************************************/ 1445 /* published entry point */ 1446 unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); } 1447 1448 1449 /************************************** 1450 * Tool functions 1451 **************************************/ 1452 #define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ 1453 #define ZSTD_VERSION_MINOR 1 /* for new (non-breaking) interface capabilities */ 1454 #define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ 1455 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) 1456 1457 /************************************************************** 1458 * Decompression code 1459 **************************************************************/ 1460 1461 size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) 1462 { 1463 const BYTE* const in = (const BYTE* const)src; 1464 BYTE headerFlags; 1465 U32 cSize; 1466 1467 if (srcSize < 3) return ERROR(srcSize_wrong); 1468 1469 headerFlags = *in; 1470 cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); 1471 1472 bpPtr->blockType = (blockType_t)(headerFlags >> 6); 1473 bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; 1474 1475 if (bpPtr->blockType == bt_end) return 0; 1476 if (bpPtr->blockType == bt_rle) return 1; 1477 return cSize; 1478 } 1479 1480 1481 static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 1482 { 1483 if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); 1484 memcpy(dst, src, srcSize); 1485 return srcSize; 1486 } 1487 1488 1489 static size_t ZSTD_decompressLiterals(void* ctx, 1490 void* dst, size_t maxDstSize, 1491 const void* src, size_t srcSize) 1492 { 1493 BYTE* op = (BYTE*)dst; 1494 BYTE* const oend = op + maxDstSize; 1495 const BYTE* ip = (const BYTE*)src; 1496 size_t errorCode; 1497 size_t litSize; 1498 1499 /* check : minimum 2, for litSize, +1, for content */ 1500 if (srcSize <= 3) return ERROR(corruption_detected); 1501 1502 litSize = ip[1] + (ip[0]<<8); 1503 litSize += ((ip[-3] >> 3) & 7) << 16; // mmmmh.... 1504 op = oend - litSize; 1505 1506 (void)ctx; 1507 if (litSize > maxDstSize) return ERROR(dstSize_tooSmall); 1508 errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2); 1509 if (FSE_isError(errorCode)) return ERROR(GENERIC); 1510 return litSize; 1511 } 1512 1513 1514 size_t ZSTDv01_decodeLiteralsBlock(void* ctx, 1515 void* dst, size_t maxDstSize, 1516 const BYTE** litStart, size_t* litSize, 1517 const void* src, size_t srcSize) 1518 { 1519 const BYTE* const istart = (const BYTE* const)src; 1520 const BYTE* ip = istart; 1521 BYTE* const ostart = (BYTE* const)dst; 1522 BYTE* const oend = ostart + maxDstSize; 1523 blockProperties_t litbp; 1524 1525 size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp); 1526 if (ZSTDv01_isError(litcSize)) return litcSize; 1527 if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 1528 ip += ZSTD_blockHeaderSize; 1529 1530 switch(litbp.blockType) 1531 { 1532 case bt_raw: 1533 *litStart = ip; 1534 ip += litcSize; 1535 *litSize = litcSize; 1536 break; 1537 case bt_rle: 1538 { 1539 size_t rleSize = litbp.origSize; 1540 if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall); 1541 if (!srcSize) return ERROR(srcSize_wrong); 1542 memset(oend - rleSize, *ip, rleSize); 1543 *litStart = oend - rleSize; 1544 *litSize = rleSize; 1545 ip++; 1546 break; 1547 } 1548 case bt_compressed: 1549 { 1550 size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize); 1551 if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize; 1552 *litStart = oend - decodedLitSize; 1553 *litSize = decodedLitSize; 1554 ip += litcSize; 1555 break; 1556 } 1557 case bt_end: 1558 default: 1559 return ERROR(GENERIC); 1560 } 1561 1562 return ip-istart; 1563 } 1564 1565 1566 size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, 1567 FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, 1568 const void* src, size_t srcSize) 1569 { 1570 const BYTE* const istart = (const BYTE* const)src; 1571 const BYTE* ip = istart; 1572 const BYTE* const iend = istart + srcSize; 1573 U32 LLtype, Offtype, MLtype; 1574 U32 LLlog, Offlog, MLlog; 1575 size_t dumpsLength; 1576 1577 /* check */ 1578 if (srcSize < 5) return ERROR(srcSize_wrong); 1579 1580 /* SeqHead */ 1581 *nbSeq = ZSTD_readLE16(ip); ip+=2; 1582 LLtype = *ip >> 6; 1583 Offtype = (*ip >> 4) & 3; 1584 MLtype = (*ip >> 2) & 3; 1585 if (*ip & 2) 1586 { 1587 dumpsLength = ip[2]; 1588 dumpsLength += ip[1] << 8; 1589 ip += 3; 1590 } 1591 else 1592 { 1593 dumpsLength = ip[1]; 1594 dumpsLength += (ip[0] & 1) << 8; 1595 ip += 2; 1596 } 1597 *dumpsPtr = ip; 1598 ip += dumpsLength; 1599 *dumpsLengthPtr = dumpsLength; 1600 1601 /* check */ 1602 if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ 1603 1604 /* sequences */ 1605 { 1606 S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */ 1607 size_t headerSize; 1608 1609 /* Build DTables */ 1610 switch(LLtype) 1611 { 1612 case bt_rle : 1613 LLlog = 0; 1614 FSE_buildDTable_rle(DTableLL, *ip++); break; 1615 case bt_raw : 1616 LLlog = LLbits; 1617 FSE_buildDTable_raw(DTableLL, LLbits); break; 1618 default : 1619 { U32 max = MaxLL; 1620 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); 1621 if (FSE_isError(headerSize)) return ERROR(GENERIC); 1622 if (LLlog > LLFSELog) return ERROR(corruption_detected); 1623 ip += headerSize; 1624 FSE_buildDTable(DTableLL, norm, max, LLlog); 1625 } } 1626 1627 switch(Offtype) 1628 { 1629 case bt_rle : 1630 Offlog = 0; 1631 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ 1632 FSE_buildDTable_rle(DTableOffb, *ip++); break; 1633 case bt_raw : 1634 Offlog = Offbits; 1635 FSE_buildDTable_raw(DTableOffb, Offbits); break; 1636 default : 1637 { U32 max = MaxOff; 1638 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); 1639 if (FSE_isError(headerSize)) return ERROR(GENERIC); 1640 if (Offlog > OffFSELog) return ERROR(corruption_detected); 1641 ip += headerSize; 1642 FSE_buildDTable(DTableOffb, norm, max, Offlog); 1643 } } 1644 1645 switch(MLtype) 1646 { 1647 case bt_rle : 1648 MLlog = 0; 1649 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ 1650 FSE_buildDTable_rle(DTableML, *ip++); break; 1651 case bt_raw : 1652 MLlog = MLbits; 1653 FSE_buildDTable_raw(DTableML, MLbits); break; 1654 default : 1655 { U32 max = MaxML; 1656 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); 1657 if (FSE_isError(headerSize)) return ERROR(GENERIC); 1658 if (MLlog > MLFSELog) return ERROR(corruption_detected); 1659 ip += headerSize; 1660 FSE_buildDTable(DTableML, norm, max, MLlog); 1661 } } } 1662 1663 return ip-istart; 1664 } 1665 1666 1667 typedef struct { 1668 size_t litLength; 1669 size_t offset; 1670 size_t matchLength; 1671 } seq_t; 1672 1673 typedef struct { 1674 FSE_DStream_t DStream; 1675 FSE_DState_t stateLL; 1676 FSE_DState_t stateOffb; 1677 FSE_DState_t stateML; 1678 size_t prevOffset; 1679 const BYTE* dumps; 1680 const BYTE* dumpsEnd; 1681 } seqState_t; 1682 1683 1684 static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) 1685 { 1686 size_t litLength; 1687 size_t prevOffset; 1688 size_t offset; 1689 size_t matchLength; 1690 const BYTE* dumps = seqState->dumps; 1691 const BYTE* const de = seqState->dumpsEnd; 1692 1693 /* Literal length */ 1694 litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); 1695 prevOffset = litLength ? seq->offset : seqState->prevOffset; 1696 seqState->prevOffset = seq->offset; 1697 if (litLength == MaxLL) 1698 { 1699 U32 add = dumps<de ? *dumps++ : 0; 1700 if (add < 255) litLength += add; 1701 else 1702 { 1703 if (dumps<=(de-3)) 1704 { 1705 litLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ 1706 dumps += 3; 1707 } 1708 } 1709 } 1710 1711 /* Offset */ 1712 { 1713 U32 offsetCode, nbBits; 1714 offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); 1715 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); 1716 nbBits = offsetCode - 1; 1717 if (offsetCode==0) nbBits = 0; /* cmove */ 1718 offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits); 1719 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); 1720 if (offsetCode==0) offset = prevOffset; 1721 } 1722 1723 /* MatchLength */ 1724 matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); 1725 if (matchLength == MaxML) 1726 { 1727 U32 add = dumps<de ? *dumps++ : 0; 1728 if (add < 255) matchLength += add; 1729 else 1730 { 1731 if (dumps<=(de-3)) 1732 { 1733 matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ 1734 dumps += 3; 1735 } 1736 } 1737 } 1738 matchLength += MINMATCH; 1739 1740 /* save result */ 1741 seq->litLength = litLength; 1742 seq->offset = offset; 1743 seq->matchLength = matchLength; 1744 seqState->dumps = dumps; 1745 } 1746 1747 1748 static size_t ZSTD_execSequence(BYTE* op, 1749 seq_t sequence, 1750 const BYTE** litPtr, const BYTE* const litLimit, 1751 BYTE* const base, BYTE* const oend) 1752 { 1753 static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ 1754 static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */ 1755 const BYTE* const ostart = op; 1756 const size_t litLength = sequence.litLength; 1757 BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */ 1758 const BYTE* const litEnd = *litPtr + litLength; 1759 1760 /* check */ 1761 if (endMatch > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ 1762 if (litEnd > litLimit) return ERROR(corruption_detected); 1763 if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall); /* overwrite literal segment */ 1764 1765 /* copy Literals */ 1766 if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8)) 1767 memmove(op, *litPtr, litLength); /* overwrite risk */ 1768 else 1769 ZSTD_wildcopy(op, *litPtr, litLength); 1770 op += litLength; 1771 *litPtr = litEnd; /* update for next sequence */ 1772 1773 /* check : last match must be at a minimum distance of 8 from end of dest buffer */ 1774 if (oend-op < 8) return ERROR(dstSize_tooSmall); 1775 1776 /* copy Match */ 1777 { 1778 const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12); 1779 const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */ 1780 size_t qutt = 12; 1781 U64 saved[2]; 1782 1783 /* check */ 1784 if (match < base) return ERROR(corruption_detected); 1785 if (sequence.offset > (size_t)base) return ERROR(corruption_detected); 1786 1787 /* save beginning of literal sequence, in case of write overlap */ 1788 if (overlapRisk) 1789 { 1790 if ((endMatch + qutt) > oend) qutt = oend-endMatch; 1791 memcpy(saved, endMatch, qutt); 1792 } 1793 1794 if (sequence.offset < 8) 1795 { 1796 const int dec64 = dec64table[sequence.offset]; 1797 op[0] = match[0]; 1798 op[1] = match[1]; 1799 op[2] = match[2]; 1800 op[3] = match[3]; 1801 match += dec32table[sequence.offset]; 1802 ZSTD_copy4(op+4, match); 1803 match -= dec64; 1804 } else { ZSTD_copy8(op, match); } 1805 op += 8; match += 8; 1806 1807 if (endMatch > oend-(16-MINMATCH)) 1808 { 1809 if (op < oend-8) 1810 { 1811 ZSTD_wildcopy(op, match, (oend-8) - op); 1812 match += (oend-8) - op; 1813 op = oend-8; 1814 } 1815 while (op<endMatch) *op++ = *match++; 1816 } 1817 else 1818 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ 1819 1820 /* restore, in case of overlap */ 1821 if (overlapRisk) memcpy(endMatch, saved, qutt); 1822 } 1823 1824 return endMatch-ostart; 1825 } 1826 1827 typedef struct ZSTDv01_Dctx_s 1828 { 1829 U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; 1830 U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; 1831 U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; 1832 void* previousDstEnd; 1833 void* base; 1834 size_t expected; 1835 blockType_t bType; 1836 U32 phase; 1837 } dctx_t; 1838 1839 1840 static size_t ZSTD_decompressSequences( 1841 void* ctx, 1842 void* dst, size_t maxDstSize, 1843 const void* seqStart, size_t seqSize, 1844 const BYTE* litStart, size_t litSize) 1845 { 1846 dctx_t* dctx = (dctx_t*)ctx; 1847 const BYTE* ip = (const BYTE*)seqStart; 1848 const BYTE* const iend = ip + seqSize; 1849 BYTE* const ostart = (BYTE* const)dst; 1850 BYTE* op = ostart; 1851 BYTE* const oend = ostart + maxDstSize; 1852 size_t errorCode, dumpsLength; 1853 const BYTE* litPtr = litStart; 1854 const BYTE* const litEnd = litStart + litSize; 1855 int nbSeq; 1856 const BYTE* dumps; 1857 U32* DTableLL = dctx->LLTable; 1858 U32* DTableML = dctx->MLTable; 1859 U32* DTableOffb = dctx->OffTable; 1860 BYTE* const base = (BYTE*) (dctx->base); 1861 1862 /* Build Decoding Tables */ 1863 errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, 1864 DTableLL, DTableML, DTableOffb, 1865 ip, iend-ip); 1866 if (ZSTDv01_isError(errorCode)) return errorCode; 1867 ip += errorCode; 1868 1869 /* Regen sequences */ 1870 { 1871 seq_t sequence; 1872 seqState_t seqState; 1873 1874 memset(&sequence, 0, sizeof(sequence)); 1875 seqState.dumps = dumps; 1876 seqState.dumpsEnd = dumps + dumpsLength; 1877 seqState.prevOffset = 1; 1878 errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip); 1879 if (FSE_isError(errorCode)) return ERROR(corruption_detected); 1880 FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); 1881 FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); 1882 FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); 1883 1884 for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; ) 1885 { 1886 size_t oneSeqSize; 1887 nbSeq--; 1888 ZSTD_decodeSequence(&sequence, &seqState); 1889 oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend); 1890 if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize; 1891 op += oneSeqSize; 1892 } 1893 1894 /* check if reached exact end */ 1895 if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* requested too much : data is corrupted */ 1896 if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */ 1897 1898 /* last literal segment */ 1899 { 1900 size_t lastLLSize = litEnd - litPtr; 1901 if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); 1902 if (op != litPtr) memmove(op, litPtr, lastLLSize); 1903 op += lastLLSize; 1904 } 1905 } 1906 1907 return op-ostart; 1908 } 1909 1910 1911 static size_t ZSTD_decompressBlock( 1912 void* ctx, 1913 void* dst, size_t maxDstSize, 1914 const void* src, size_t srcSize) 1915 { 1916 /* blockType == blockCompressed, srcSize is trusted */ 1917 const BYTE* ip = (const BYTE*)src; 1918 const BYTE* litPtr = NULL; 1919 size_t litSize = 0; 1920 size_t errorCode; 1921 1922 /* Decode literals sub-block */ 1923 errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize); 1924 if (ZSTDv01_isError(errorCode)) return errorCode; 1925 ip += errorCode; 1926 srcSize -= errorCode; 1927 1928 return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize); 1929 } 1930 1931 1932 size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 1933 { 1934 const BYTE* ip = (const BYTE*)src; 1935 const BYTE* iend = ip + srcSize; 1936 BYTE* const ostart = (BYTE* const)dst; 1937 BYTE* op = ostart; 1938 BYTE* const oend = ostart + maxDstSize; 1939 size_t remainingSize = srcSize; 1940 U32 magicNumber; 1941 size_t errorCode=0; 1942 blockProperties_t blockProperties; 1943 1944 /* Frame Header */ 1945 if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 1946 magicNumber = ZSTD_readBE32(src); 1947 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); 1948 ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; 1949 1950 /* Loop on each block */ 1951 while (1) 1952 { 1953 size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties); 1954 if (ZSTDv01_isError(blockSize)) return blockSize; 1955 1956 ip += ZSTD_blockHeaderSize; 1957 remainingSize -= ZSTD_blockHeaderSize; 1958 if (blockSize > remainingSize) return ERROR(srcSize_wrong); 1959 1960 switch(blockProperties.blockType) 1961 { 1962 case bt_compressed: 1963 errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize); 1964 break; 1965 case bt_raw : 1966 errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize); 1967 break; 1968 case bt_rle : 1969 return ERROR(GENERIC); /* not yet supported */ 1970 break; 1971 case bt_end : 1972 /* end of frame */ 1973 if (remainingSize) return ERROR(srcSize_wrong); 1974 break; 1975 default: 1976 return ERROR(GENERIC); 1977 } 1978 if (blockSize == 0) break; /* bt_end */ 1979 1980 if (ZSTDv01_isError(errorCode)) return errorCode; 1981 op += errorCode; 1982 ip += blockSize; 1983 remainingSize -= blockSize; 1984 } 1985 1986 return op-ostart; 1987 } 1988 1989 size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 1990 { 1991 dctx_t ctx; 1992 ctx.base = dst; 1993 return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); 1994 } 1995 1996 size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize) 1997 { 1998 const BYTE* ip = (const BYTE*)src; 1999 size_t remainingSize = srcSize; 2000 U32 magicNumber; 2001 blockProperties_t blockProperties; 2002 2003 /* Frame Header */ 2004 if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 2005 magicNumber = ZSTD_readBE32(src); 2006 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); 2007 ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; 2008 2009 /* Loop on each block */ 2010 while (1) 2011 { 2012 size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties); 2013 if (ZSTDv01_isError(blockSize)) return blockSize; 2014 2015 ip += ZSTD_blockHeaderSize; 2016 remainingSize -= ZSTD_blockHeaderSize; 2017 if (blockSize > remainingSize) return ERROR(srcSize_wrong); 2018 2019 if (blockSize == 0) break; /* bt_end */ 2020 2021 ip += blockSize; 2022 remainingSize -= blockSize; 2023 } 2024 2025 return ip - (const BYTE*)src; 2026 } 2027 2028 /******************************* 2029 * Streaming Decompression API 2030 *******************************/ 2031 2032 size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx) 2033 { 2034 dctx->expected = ZSTD_frameHeaderSize; 2035 dctx->phase = 0; 2036 dctx->previousDstEnd = NULL; 2037 dctx->base = NULL; 2038 return 0; 2039 } 2040 2041 ZSTDv01_Dctx* ZSTDv01_createDCtx(void) 2042 { 2043 ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx)); 2044 if (dctx==NULL) return NULL; 2045 ZSTDv01_resetDCtx(dctx); 2046 return dctx; 2047 } 2048 2049 size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx) 2050 { 2051 free(dctx); 2052 return 0; 2053 } 2054 2055 size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx) 2056 { 2057 return ((dctx_t*)dctx)->expected; 2058 } 2059 2060 size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 2061 { 2062 dctx_t* ctx = (dctx_t*)dctx; 2063 2064 /* Sanity check */ 2065 if (srcSize != ctx->expected) return ERROR(srcSize_wrong); 2066 if (dst != ctx->previousDstEnd) /* not contiguous */ 2067 ctx->base = dst; 2068 2069 /* Decompress : frame header */ 2070 if (ctx->phase == 0) 2071 { 2072 /* Check frame magic header */ 2073 U32 magicNumber = ZSTD_readBE32(src); 2074 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); 2075 ctx->phase = 1; 2076 ctx->expected = ZSTD_blockHeaderSize; 2077 return 0; 2078 } 2079 2080 /* Decompress : block header */ 2081 if (ctx->phase == 1) 2082 { 2083 blockProperties_t bp; 2084 size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); 2085 if (ZSTDv01_isError(blockSize)) return blockSize; 2086 if (bp.blockType == bt_end) 2087 { 2088 ctx->expected = 0; 2089 ctx->phase = 0; 2090 } 2091 else 2092 { 2093 ctx->expected = blockSize; 2094 ctx->bType = bp.blockType; 2095 ctx->phase = 2; 2096 } 2097 2098 return 0; 2099 } 2100 2101 /* Decompress : block content */ 2102 { 2103 size_t rSize; 2104 switch(ctx->bType) 2105 { 2106 case bt_compressed: 2107 rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); 2108 break; 2109 case bt_raw : 2110 rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize); 2111 break; 2112 case bt_rle : 2113 return ERROR(GENERIC); /* not yet handled */ 2114 break; 2115 case bt_end : /* should never happen (filtered at phase 1) */ 2116 rSize = 0; 2117 break; 2118 default: 2119 return ERROR(GENERIC); 2120 } 2121 ctx->phase = 1; 2122 ctx->expected = ZSTD_blockHeaderSize; 2123 ctx->previousDstEnd = (void*)( ((char*)dst) + rSize); 2124 return rSize; 2125 } 2126 2127 } 2128