1 /* 2 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 12 /****************************************** 13 * Includes 14 ******************************************/ 15 #include <stddef.h> /* size_t, ptrdiff_t */ 16 #include "zstd_v01.h" 17 #include "../common/error_private.h" 18 19 20 /****************************************** 21 * Static allocation 22 ******************************************/ 23 /* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ 24 #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog)) 25 26 /* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */ 27 #define HUF_DTABLE_SIZE_U16(maxTableLog) (1 + (1<<maxTableLog)) 28 #define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \ 29 unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog } 30 31 32 /****************************************** 33 * Error Management 34 ******************************************/ 35 #define FSE_LIST_ERRORS(ITEM) \ 36 ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \ 37 ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \ 38 ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\ 39 ITEM(FSE_ERROR_corruptionDetected) \ 40 ITEM(FSE_ERROR_maxCode) 41 42 #define FSE_GENERATE_ENUM(ENUM) ENUM, 43 typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ 44 45 46 /****************************************** 47 * FSE symbol compression API 48 ******************************************/ 49 /* 50 This API consists of small unitary functions, which highly benefit from being inlined. 51 You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary. 52 Visual seems to do it automatically. 53 For gcc or clang, you'll need to add -flto flag at compilation and linking stages. 54 If none of these solutions is applicable, include "fse.c" directly. 55 */ 56 57 typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */ 58 typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ 59 60 typedef struct 61 { 62 size_t bitContainer; 63 int bitPos; 64 char* startPtr; 65 char* ptr; 66 char* endPtr; 67 } FSE_CStream_t; 68 69 typedef struct 70 { 71 ptrdiff_t value; 72 const void* stateTable; 73 const void* symbolTT; 74 unsigned stateLog; 75 } FSE_CState_t; 76 77 typedef struct 78 { 79 size_t bitContainer; 80 unsigned bitsConsumed; 81 const char* ptr; 82 const char* start; 83 } FSE_DStream_t; 84 85 typedef struct 86 { 87 size_t state; 88 const void* table; /* precise table may vary, depending on U16 */ 89 } FSE_DState_t; 90 91 typedef enum { FSE_DStream_unfinished = 0, 92 FSE_DStream_endOfBuffer = 1, 93 FSE_DStream_completed = 2, 94 FSE_DStream_tooFar = 3 } FSE_DStream_status; /* result of FSE_reloadDStream() */ 95 /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */ 96 97 98 /**************************************************************** 99 * Tuning parameters 100 ****************************************************************/ 101 /* MEMORY_USAGE : 102 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 103 * Increasing memory usage improves compression ratio 104 * Reduced memory usage can improve speed, due to cache effect 105 * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ 106 #define FSE_MAX_MEMORY_USAGE 14 107 #define FSE_DEFAULT_MEMORY_USAGE 13 108 109 /* FSE_MAX_SYMBOL_VALUE : 110 * Maximum symbol value authorized. 111 * Required for proper stack allocation */ 112 #define FSE_MAX_SYMBOL_VALUE 255 113 114 115 /**************************************************************** 116 * template functions type & suffix 117 ****************************************************************/ 118 #define FSE_FUNCTION_TYPE BYTE 119 #define FSE_FUNCTION_EXTENSION 120 121 122 /**************************************************************** 123 * Byte symbol type 124 ****************************************************************/ 125 typedef struct 126 { 127 unsigned short newState; 128 unsigned char symbol; 129 unsigned char nbBits; 130 } FSE_decode_t; /* size == U32 */ 131 132 133 134 /**************************************************************** 135 * Compiler specifics 136 ****************************************************************/ 137 #ifdef _MSC_VER /* Visual Studio */ 138 # define FORCE_INLINE static __forceinline 139 # include <intrin.h> /* For Visual 2005 */ 140 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 141 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ 142 #else 143 # define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 144 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ 145 # ifdef __GNUC__ 146 # define FORCE_INLINE static inline __attribute__((always_inline)) 147 # else 148 # define FORCE_INLINE static inline 149 # endif 150 # else 151 # define FORCE_INLINE static 152 # endif /* __STDC_VERSION__ */ 153 #endif 154 155 156 /**************************************************************** 157 * Includes 158 ****************************************************************/ 159 #include <stdlib.h> /* malloc, free, qsort */ 160 #include <string.h> /* memcpy, memset */ 161 #include <stdio.h> /* printf (debug) */ 162 163 164 #ifndef MEM_ACCESS_MODULE 165 #define MEM_ACCESS_MODULE 166 /**************************************************************** 167 * Basic Types 168 *****************************************************************/ 169 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ 170 # include <stdint.h> 171 typedef uint8_t BYTE; 172 typedef uint16_t U16; 173 typedef int16_t S16; 174 typedef uint32_t U32; 175 typedef int32_t S32; 176 typedef uint64_t U64; 177 typedef int64_t S64; 178 #else 179 typedef unsigned char BYTE; 180 typedef unsigned short U16; 181 typedef signed short S16; 182 typedef unsigned int U32; 183 typedef signed int S32; 184 typedef unsigned long long U64; 185 typedef signed long long S64; 186 #endif 187 188 #endif /* MEM_ACCESS_MODULE */ 189 190 /**************************************************************** 191 * Memory I/O 192 *****************************************************************/ 193 /* FSE_FORCE_MEMORY_ACCESS 194 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. 195 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. 196 * The below switch allow to select different access method for improved performance. 197 * Method 0 (default) : use `memcpy()`. Safe and portable. 198 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). 199 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. 200 * Method 2 : direct access. This method is portable but violate C standard. 201 * It can generate buggy code on targets generating assembly depending on alignment. 202 * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) 203 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. 204 * Prefer these methods in priority order (0 > 1 > 2) 205 */ 206 #ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ 207 # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) 208 # define FSE_FORCE_MEMORY_ACCESS 2 209 # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ 210 (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) 211 # define FSE_FORCE_MEMORY_ACCESS 1 212 # endif 213 #endif 214 215 216 static unsigned FSE_32bits(void) 217 { 218 return sizeof(void*)==4; 219 } 220 221 static unsigned FSE_isLittleEndian(void) 222 { 223 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ 224 return one.c[0]; 225 } 226 227 #if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2) 228 229 static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; } 230 static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; } 231 static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; } 232 233 #elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1) 234 235 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ 236 /* currently only defined for gcc and icc */ 237 typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; 238 239 static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } 240 static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } 241 static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } 242 243 #else 244 245 static U16 FSE_read16(const void* memPtr) 246 { 247 U16 val; memcpy(&val, memPtr, sizeof(val)); return val; 248 } 249 250 static U32 FSE_read32(const void* memPtr) 251 { 252 U32 val; memcpy(&val, memPtr, sizeof(val)); return val; 253 } 254 255 static U64 FSE_read64(const void* memPtr) 256 { 257 U64 val; memcpy(&val, memPtr, sizeof(val)); return val; 258 } 259 260 #endif /* FSE_FORCE_MEMORY_ACCESS */ 261 262 static U16 FSE_readLE16(const void* memPtr) 263 { 264 if (FSE_isLittleEndian()) 265 return FSE_read16(memPtr); 266 else 267 { 268 const BYTE* p = (const BYTE*)memPtr; 269 return (U16)(p[0] + (p[1]<<8)); 270 } 271 } 272 273 static U32 FSE_readLE32(const void* memPtr) 274 { 275 if (FSE_isLittleEndian()) 276 return FSE_read32(memPtr); 277 else 278 { 279 const BYTE* p = (const BYTE*)memPtr; 280 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); 281 } 282 } 283 284 285 static U64 FSE_readLE64(const void* memPtr) 286 { 287 if (FSE_isLittleEndian()) 288 return FSE_read64(memPtr); 289 else 290 { 291 const BYTE* p = (const BYTE*)memPtr; 292 return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) 293 + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); 294 } 295 } 296 297 static size_t FSE_readLEST(const void* memPtr) 298 { 299 if (FSE_32bits()) 300 return (size_t)FSE_readLE32(memPtr); 301 else 302 return (size_t)FSE_readLE64(memPtr); 303 } 304 305 306 307 /**************************************************************** 308 * Constants 309 *****************************************************************/ 310 #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) 311 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG) 312 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1) 313 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2) 314 #define FSE_MIN_TABLELOG 5 315 316 #define FSE_TABLELOG_ABSOLUTE_MAX 15 317 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX 318 #error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" 319 #endif 320 321 322 /**************************************************************** 323 * Error Management 324 ****************************************************************/ 325 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ 326 327 328 /**************************************************************** 329 * Complex types 330 ****************************************************************/ 331 typedef struct 332 { 333 int deltaFindState; 334 U32 deltaNbBits; 335 } FSE_symbolCompressionTransform; /* total 8 bytes */ 336 337 typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; 338 339 /**************************************************************** 340 * Internal functions 341 ****************************************************************/ 342 FORCE_INLINE unsigned FSE_highbit32 (U32 val) 343 { 344 # if defined(_MSC_VER) /* Visual */ 345 unsigned long r; 346 _BitScanReverse ( &r, val ); 347 return (unsigned) r; 348 # elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */ 349 return __builtin_clz (val) ^ 31; 350 # else /* Software version */ 351 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 352 U32 v = val; 353 unsigned r; 354 v |= v >> 1; 355 v |= v >> 2; 356 v |= v >> 4; 357 v |= v >> 8; 358 v |= v >> 16; 359 r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; 360 return r; 361 # endif 362 } 363 364 365 /**************************************************************** 366 * Templates 367 ****************************************************************/ 368 /* 369 designed to be included 370 for type-specific functions (template emulation in C) 371 Objective is to write these functions only once, for improved maintenance 372 */ 373 374 /* safety checks */ 375 #ifndef FSE_FUNCTION_EXTENSION 376 # error "FSE_FUNCTION_EXTENSION must be defined" 377 #endif 378 #ifndef FSE_FUNCTION_TYPE 379 # error "FSE_FUNCTION_TYPE must be defined" 380 #endif 381 382 /* Function names */ 383 #define FSE_CAT(X,Y) X##Y 384 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) 385 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) 386 387 388 389 static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } 390 391 #define FSE_DECODE_TYPE FSE_decode_t 392 393 394 typedef struct { 395 U16 tableLog; 396 U16 fastMode; 397 } FSE_DTableHeader; /* sizeof U32 */ 398 399 static size_t FSE_buildDTable 400 (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 401 { 402 void* ptr = dt; 403 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 404 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1; /* because dt is unsigned, 32-bits aligned on 32-bits */ 405 const U32 tableSize = 1 << tableLog; 406 const U32 tableMask = tableSize-1; 407 const U32 step = FSE_tableStep(tableSize); 408 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; 409 U32 position = 0; 410 U32 highThreshold = tableSize-1; 411 const S16 largeLimit= (S16)(1 << (tableLog-1)); 412 U32 noLarge = 1; 413 U32 s; 414 415 /* Sanity Checks */ 416 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge; 417 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge; 418 419 /* Init, lay down lowprob symbols */ 420 DTableH[0].tableLog = (U16)tableLog; 421 for (s=0; s<=maxSymbolValue; s++) 422 { 423 if (normalizedCounter[s]==-1) 424 { 425 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; 426 symbolNext[s] = 1; 427 } 428 else 429 { 430 if (normalizedCounter[s] >= largeLimit) noLarge=0; 431 symbolNext[s] = normalizedCounter[s]; 432 } 433 } 434 435 /* Spread symbols */ 436 for (s=0; s<=maxSymbolValue; s++) 437 { 438 int i; 439 for (i=0; i<normalizedCounter[s]; i++) 440 { 441 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; 442 position = (position + step) & tableMask; 443 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */ 444 } 445 } 446 447 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */ 448 449 /* Build Decoding table */ 450 { 451 U32 i; 452 for (i=0; i<tableSize; i++) 453 { 454 FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol); 455 U16 nextState = symbolNext[symbol]++; 456 tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) ); 457 tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize); 458 } 459 } 460 461 DTableH->fastMode = (U16)noLarge; 462 return 0; 463 } 464 465 466 /****************************************** 467 * FSE byte symbol 468 ******************************************/ 469 #ifndef FSE_COMMONDEFS_ONLY 470 471 static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); } 472 473 static short FSE_abs(short a) 474 { 475 return a<0? -a : a; 476 } 477 478 479 /**************************************************************** 480 * Header bitstream management 481 ****************************************************************/ 482 static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, 483 const void* headerBuffer, size_t hbSize) 484 { 485 const BYTE* const istart = (const BYTE*) headerBuffer; 486 const BYTE* const iend = istart + hbSize; 487 const BYTE* ip = istart; 488 int nbBits; 489 int remaining; 490 int threshold; 491 U32 bitStream; 492 int bitCount; 493 unsigned charnum = 0; 494 int previous0 = 0; 495 496 if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong; 497 bitStream = FSE_readLE32(ip); 498 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ 499 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge; 500 bitStream >>= 4; 501 bitCount = 4; 502 *tableLogPtr = nbBits; 503 remaining = (1<<nbBits)+1; 504 threshold = 1<<nbBits; 505 nbBits++; 506 507 while ((remaining>1) && (charnum<=*maxSVPtr)) 508 { 509 if (previous0) 510 { 511 unsigned n0 = charnum; 512 while ((bitStream & 0xFFFF) == 0xFFFF) 513 { 514 n0+=24; 515 if (ip < iend-5) 516 { 517 ip+=2; 518 bitStream = FSE_readLE32(ip) >> bitCount; 519 } 520 else 521 { 522 bitStream >>= 16; 523 bitCount+=16; 524 } 525 } 526 while ((bitStream & 3) == 3) 527 { 528 n0+=3; 529 bitStream>>=2; 530 bitCount+=2; 531 } 532 n0 += bitStream & 3; 533 bitCount += 2; 534 if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall; 535 while (charnum < n0) normalizedCounter[charnum++] = 0; 536 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 537 { 538 ip += bitCount>>3; 539 bitCount &= 7; 540 bitStream = FSE_readLE32(ip) >> bitCount; 541 } 542 else 543 bitStream >>= 2; 544 } 545 { 546 const short max = (short)((2*threshold-1)-remaining); 547 short count; 548 549 if ((bitStream & (threshold-1)) < (U32)max) 550 { 551 count = (short)(bitStream & (threshold-1)); 552 bitCount += nbBits-1; 553 } 554 else 555 { 556 count = (short)(bitStream & (2*threshold-1)); 557 if (count >= threshold) count -= max; 558 bitCount += nbBits; 559 } 560 561 count--; /* extra accuracy */ 562 remaining -= FSE_abs(count); 563 normalizedCounter[charnum++] = count; 564 previous0 = !count; 565 while (remaining < threshold) 566 { 567 nbBits--; 568 threshold >>= 1; 569 } 570 571 { 572 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) 573 { 574 ip += bitCount>>3; 575 bitCount &= 7; 576 } 577 else 578 { 579 bitCount -= (int)(8 * (iend - 4 - ip)); 580 ip = iend - 4; 581 } 582 bitStream = FSE_readLE32(ip) >> (bitCount & 31); 583 } 584 } 585 } 586 if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC; 587 *maxSVPtr = charnum-1; 588 589 ip += (bitCount+7)>>3; 590 if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; 591 return ip-istart; 592 } 593 594 595 /********************************************************* 596 * Decompression (Byte symbols) 597 *********************************************************/ 598 static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) 599 { 600 void* ptr = dt; 601 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 602 FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ 603 604 DTableH->tableLog = 0; 605 DTableH->fastMode = 0; 606 607 cell->newState = 0; 608 cell->symbol = symbolValue; 609 cell->nbBits = 0; 610 611 return 0; 612 } 613 614 615 static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) 616 { 617 void* ptr = dt; 618 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 619 FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ 620 const unsigned tableSize = 1 << nbBits; 621 const unsigned tableMask = tableSize - 1; 622 const unsigned maxSymbolValue = tableMask; 623 unsigned s; 624 625 /* Sanity checks */ 626 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */ 627 628 /* Build Decoding Table */ 629 DTableH->tableLog = (U16)nbBits; 630 DTableH->fastMode = 1; 631 for (s=0; s<=maxSymbolValue; s++) 632 { 633 dinfo[s].newState = 0; 634 dinfo[s].symbol = (BYTE)s; 635 dinfo[s].nbBits = (BYTE)nbBits; 636 } 637 638 return 0; 639 } 640 641 642 /* FSE_initDStream 643 * Initialize a FSE_DStream_t. 644 * srcBuffer must point at the beginning of an FSE block. 645 * The function result is the size of the FSE_block (== srcSize). 646 * If srcSize is too small, the function will return an errorCode; 647 */ 648 static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize) 649 { 650 if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong; 651 652 if (srcSize >= sizeof(size_t)) 653 { 654 U32 contain32; 655 bitD->start = (const char*)srcBuffer; 656 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); 657 bitD->bitContainer = FSE_readLEST(bitD->ptr); 658 contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 659 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ 660 bitD->bitsConsumed = 8 - FSE_highbit32(contain32); 661 } 662 else 663 { 664 U32 contain32; 665 bitD->start = (const char*)srcBuffer; 666 bitD->ptr = bitD->start; 667 bitD->bitContainer = *(const BYTE*)(bitD->start); 668 switch(srcSize) 669 { 670 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); 671 /* fallthrough */ 672 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); 673 /* fallthrough */ 674 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); 675 /* fallthrough */ 676 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; 677 /* fallthrough */ 678 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; 679 /* fallthrough */ 680 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; 681 /* fallthrough */ 682 default:; 683 } 684 contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; 685 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ 686 bitD->bitsConsumed = 8 - FSE_highbit32(contain32); 687 bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; 688 } 689 690 return srcSize; 691 } 692 693 694 /*!FSE_lookBits 695 * Provides next n bits from the bitContainer. 696 * bitContainer is not modified (bits are still present for next read/look) 697 * On 32-bits, maxNbBits==25 698 * On 64-bits, maxNbBits==57 699 * return : value extracted. 700 */ 701 static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits) 702 { 703 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 704 return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); 705 } 706 707 static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ 708 { 709 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; 710 return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); 711 } 712 713 static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits) 714 { 715 bitD->bitsConsumed += nbBits; 716 } 717 718 719 /*!FSE_readBits 720 * Read next n bits from the bitContainer. 721 * On 32-bits, don't read more than maxNbBits==25 722 * On 64-bits, don't read more than maxNbBits==57 723 * Use the fast variant *only* if n >= 1. 724 * return : value extracted. 725 */ 726 static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits) 727 { 728 size_t value = FSE_lookBits(bitD, nbBits); 729 FSE_skipBits(bitD, nbBits); 730 return value; 731 } 732 733 static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ 734 { 735 size_t value = FSE_lookBitsFast(bitD, nbBits); 736 FSE_skipBits(bitD, nbBits); 737 return value; 738 } 739 740 static unsigned FSE_reloadDStream(FSE_DStream_t* bitD) 741 { 742 if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ 743 return FSE_DStream_tooFar; 744 745 if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) 746 { 747 bitD->ptr -= bitD->bitsConsumed >> 3; 748 bitD->bitsConsumed &= 7; 749 bitD->bitContainer = FSE_readLEST(bitD->ptr); 750 return FSE_DStream_unfinished; 751 } 752 if (bitD->ptr == bitD->start) 753 { 754 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer; 755 return FSE_DStream_completed; 756 } 757 { 758 U32 nbBytes = bitD->bitsConsumed >> 3; 759 U32 result = FSE_DStream_unfinished; 760 if (bitD->ptr - nbBytes < bitD->start) 761 { 762 nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ 763 result = FSE_DStream_endOfBuffer; 764 } 765 bitD->ptr -= nbBytes; 766 bitD->bitsConsumed -= nbBytes*8; 767 bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ 768 return result; 769 } 770 } 771 772 773 static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt) 774 { 775 const void* ptr = dt; 776 const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; 777 DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog); 778 FSE_reloadDStream(bitD); 779 DStatePtr->table = dt + 1; 780 } 781 782 static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) 783 { 784 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 785 const U32 nbBits = DInfo.nbBits; 786 BYTE symbol = DInfo.symbol; 787 size_t lowBits = FSE_readBits(bitD, nbBits); 788 789 DStatePtr->state = DInfo.newState + lowBits; 790 return symbol; 791 } 792 793 static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) 794 { 795 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; 796 const U32 nbBits = DInfo.nbBits; 797 BYTE symbol = DInfo.symbol; 798 size_t lowBits = FSE_readBitsFast(bitD, nbBits); 799 800 DStatePtr->state = DInfo.newState + lowBits; 801 return symbol; 802 } 803 804 /* FSE_endOfDStream 805 Tells if bitD has reached end of bitStream or not */ 806 807 static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD) 808 { 809 return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8)); 810 } 811 812 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) 813 { 814 return DStatePtr->state == 0; 815 } 816 817 818 FORCE_INLINE size_t FSE_decompress_usingDTable_generic( 819 void* dst, size_t maxDstSize, 820 const void* cSrc, size_t cSrcSize, 821 const FSE_DTable* dt, const unsigned fast) 822 { 823 BYTE* const ostart = (BYTE*) dst; 824 BYTE* op = ostart; 825 BYTE* const omax = op + maxDstSize; 826 BYTE* const olimit = omax-3; 827 828 FSE_DStream_t bitD; 829 FSE_DState_t state1; 830 FSE_DState_t state2; 831 size_t errorCode; 832 833 /* Init */ 834 errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ 835 if (FSE_isError(errorCode)) return errorCode; 836 837 FSE_initDState(&state1, &bitD, dt); 838 FSE_initDState(&state2, &bitD, dt); 839 840 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) 841 842 /* 4 symbols per loop */ 843 for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4) 844 { 845 op[0] = FSE_GETSYMBOL(&state1); 846 847 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ 848 FSE_reloadDStream(&bitD); 849 850 op[1] = FSE_GETSYMBOL(&state2); 851 852 if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ 853 { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } } 854 855 op[2] = FSE_GETSYMBOL(&state1); 856 857 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ 858 FSE_reloadDStream(&bitD); 859 860 op[3] = FSE_GETSYMBOL(&state2); 861 } 862 863 /* tail */ 864 /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */ 865 while (1) 866 { 867 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) 868 break; 869 870 *op++ = FSE_GETSYMBOL(&state1); 871 872 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) 873 break; 874 875 *op++ = FSE_GETSYMBOL(&state2); 876 } 877 878 /* end ? */ 879 if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) 880 return op-ostart; 881 882 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ 883 884 return (size_t)-FSE_ERROR_corruptionDetected; 885 } 886 887 888 static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, 889 const void* cSrc, size_t cSrcSize, 890 const FSE_DTable* dt) 891 { 892 FSE_DTableHeader DTableH; 893 memcpy(&DTableH, dt, sizeof(DTableH)); /* memcpy() into local variable, to avoid strict aliasing warning */ 894 895 /* select fast mode (static) */ 896 if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); 897 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); 898 } 899 900 901 static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) 902 { 903 const BYTE* const istart = (const BYTE*)cSrc; 904 const BYTE* ip = istart; 905 short counting[FSE_MAX_SYMBOL_VALUE+1]; 906 DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ 907 unsigned tableLog; 908 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; 909 size_t errorCode; 910 911 if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ 912 913 /* normal FSE decoding mode */ 914 errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); 915 if (FSE_isError(errorCode)) return errorCode; 916 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ 917 ip += errorCode; 918 cSrcSize -= errorCode; 919 920 errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); 921 if (FSE_isError(errorCode)) return errorCode; 922 923 /* always return, even if it is an error code */ 924 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); 925 } 926 927 928 929 /* ******************************************************* 930 * Huff0 : Huffman block compression 931 *********************************************************/ 932 #define HUF_MAX_SYMBOL_VALUE 255 933 #define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */ 934 #define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */ 935 #define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ 936 #if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) 937 # error "HUF_MAX_TABLELOG is too large !" 938 #endif 939 940 typedef struct HUF_CElt_s { 941 U16 val; 942 BYTE nbBits; 943 } HUF_CElt ; 944 945 typedef struct nodeElt_s { 946 U32 count; 947 U16 parent; 948 BYTE byte; 949 BYTE nbBits; 950 } nodeElt; 951 952 953 /* ******************************************************* 954 * Huff0 : Huffman block decompression 955 *********************************************************/ 956 typedef struct { 957 BYTE byte; 958 BYTE nbBits; 959 } HUF_DElt; 960 961 static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize) 962 { 963 BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; 964 U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ 965 U32 weightTotal; 966 U32 maxBits; 967 const BYTE* ip = (const BYTE*) src; 968 size_t iSize; 969 size_t oSize; 970 U32 n; 971 U32 nextRankStart; 972 void* ptr = DTable+1; 973 HUF_DElt* const dt = (HUF_DElt*)ptr; 974 975 if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 976 iSize = ip[0]; 977 978 FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */ 979 //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */ 980 if (iSize >= 128) /* special header */ 981 { 982 if (iSize >= (242)) /* RLE */ 983 { 984 static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; 985 oSize = l[iSize-242]; 986 memset(huffWeight, 1, sizeof(huffWeight)); 987 iSize = 0; 988 } 989 else /* Incompressible */ 990 { 991 oSize = iSize - 127; 992 iSize = ((oSize+1)/2); 993 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 994 ip += 1; 995 for (n=0; n<oSize; n+=2) 996 { 997 huffWeight[n] = ip[n/2] >> 4; 998 huffWeight[n+1] = ip[n/2] & 15; 999 } 1000 } 1001 } 1002 else /* header compressed with FSE (normal case) */ 1003 { 1004 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 1005 oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */ 1006 if (FSE_isError(oSize)) return oSize; 1007 } 1008 1009 /* collect weight stats */ 1010 memset(rankVal, 0, sizeof(rankVal)); 1011 weightTotal = 0; 1012 for (n=0; n<oSize; n++) 1013 { 1014 if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected; 1015 rankVal[huffWeight[n]]++; 1016 weightTotal += (1 << huffWeight[n]) >> 1; 1017 } 1018 if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected; 1019 1020 /* get last non-null symbol weight (implied, total must be 2^n) */ 1021 maxBits = FSE_highbit32(weightTotal) + 1; 1022 if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */ 1023 DTable[0] = (U16)maxBits; 1024 { 1025 U32 total = 1 << maxBits; 1026 U32 rest = total - weightTotal; 1027 U32 verif = 1 << FSE_highbit32(rest); 1028 U32 lastWeight = FSE_highbit32(rest) + 1; 1029 if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */ 1030 huffWeight[oSize] = (BYTE)lastWeight; 1031 rankVal[lastWeight]++; 1032 } 1033 1034 /* check tree construction validity */ 1035 if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */ 1036 1037 /* Prepare ranks */ 1038 nextRankStart = 0; 1039 for (n=1; n<=maxBits; n++) 1040 { 1041 U32 current = nextRankStart; 1042 nextRankStart += (rankVal[n] << (n-1)); 1043 rankVal[n] = current; 1044 } 1045 1046 /* fill DTable */ 1047 for (n=0; n<=oSize; n++) 1048 { 1049 const U32 w = huffWeight[n]; 1050 const U32 length = (1 << w) >> 1; 1051 U32 i; 1052 HUF_DElt D; 1053 D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w); 1054 for (i = rankVal[w]; i < rankVal[w] + length; i++) 1055 dt[i] = D; 1056 rankVal[w] += length; 1057 } 1058 1059 return iSize+1; 1060 } 1061 1062 1063 static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog) 1064 { 1065 const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 1066 const BYTE c = dt[val].byte; 1067 FSE_skipBits(Dstream, dt[val].nbBits); 1068 return c; 1069 } 1070 1071 static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ 1072 void* dst, size_t maxDstSize, 1073 const void* cSrc, size_t cSrcSize, 1074 const U16* DTable) 1075 { 1076 if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong; 1077 { 1078 BYTE* const ostart = (BYTE*) dst; 1079 BYTE* op = ostart; 1080 BYTE* const omax = op + maxDstSize; 1081 BYTE* const olimit = maxDstSize < 15 ? op : omax-15; 1082 1083 const void* ptr = DTable; 1084 const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; 1085 const U32 dtLog = DTable[0]; 1086 size_t errorCode; 1087 U32 reloadStatus; 1088 1089 /* Init */ 1090 1091 const U16* jumpTable = (const U16*)cSrc; 1092 const size_t length1 = FSE_readLE16(jumpTable); 1093 const size_t length2 = FSE_readLE16(jumpTable+1); 1094 const size_t length3 = FSE_readLE16(jumpTable+2); 1095 const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; /* check coherency !! */ 1096 const char* const start1 = (const char*)(cSrc) + 6; 1097 const char* const start2 = start1 + length1; 1098 const char* const start3 = start2 + length2; 1099 const char* const start4 = start3 + length3; 1100 FSE_DStream_t bitD1, bitD2, bitD3, bitD4; 1101 1102 if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 1103 1104 errorCode = FSE_initDStream(&bitD1, start1, length1); 1105 if (FSE_isError(errorCode)) return errorCode; 1106 errorCode = FSE_initDStream(&bitD2, start2, length2); 1107 if (FSE_isError(errorCode)) return errorCode; 1108 errorCode = FSE_initDStream(&bitD3, start3, length3); 1109 if (FSE_isError(errorCode)) return errorCode; 1110 errorCode = FSE_initDStream(&bitD4, start4, length4); 1111 if (FSE_isError(errorCode)) return errorCode; 1112 1113 reloadStatus=FSE_reloadDStream(&bitD2); 1114 1115 /* 16 symbols per loop */ 1116 for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */ 1117 op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1)) 1118 { 1119 #define HUF_DECODE_SYMBOL_0(n, Dstream) \ 1120 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); 1121 1122 #define HUF_DECODE_SYMBOL_1(n, Dstream) \ 1123 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ 1124 if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream) 1125 1126 #define HUF_DECODE_SYMBOL_2(n, Dstream) \ 1127 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ 1128 if (FSE_32bits()) FSE_reloadDStream(&Dstream) 1129 1130 HUF_DECODE_SYMBOL_1( 0, bitD1); 1131 HUF_DECODE_SYMBOL_1( 1, bitD2); 1132 HUF_DECODE_SYMBOL_1( 2, bitD3); 1133 HUF_DECODE_SYMBOL_1( 3, bitD4); 1134 HUF_DECODE_SYMBOL_2( 4, bitD1); 1135 HUF_DECODE_SYMBOL_2( 5, bitD2); 1136 HUF_DECODE_SYMBOL_2( 6, bitD3); 1137 HUF_DECODE_SYMBOL_2( 7, bitD4); 1138 HUF_DECODE_SYMBOL_1( 8, bitD1); 1139 HUF_DECODE_SYMBOL_1( 9, bitD2); 1140 HUF_DECODE_SYMBOL_1(10, bitD3); 1141 HUF_DECODE_SYMBOL_1(11, bitD4); 1142 HUF_DECODE_SYMBOL_0(12, bitD1); 1143 HUF_DECODE_SYMBOL_0(13, bitD2); 1144 HUF_DECODE_SYMBOL_0(14, bitD3); 1145 HUF_DECODE_SYMBOL_0(15, bitD4); 1146 } 1147 1148 if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ 1149 return (size_t)-FSE_ERROR_corruptionDetected; 1150 1151 /* tail */ 1152 { 1153 /* bitTail = bitD1; */ /* *much* slower : -20% !??! */ 1154 FSE_DStream_t bitTail; 1155 bitTail.ptr = bitD1.ptr; 1156 bitTail.bitsConsumed = bitD1.bitsConsumed; 1157 bitTail.bitContainer = bitD1.bitContainer; /* required in case of FSE_DStream_endOfBuffer */ 1158 bitTail.start = start1; 1159 for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++) 1160 { 1161 HUF_DECODE_SYMBOL_0(0, bitTail); 1162 } 1163 1164 if (FSE_endOfDStream(&bitTail)) 1165 return op-ostart; 1166 } 1167 1168 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ 1169 1170 return (size_t)-FSE_ERROR_corruptionDetected; 1171 } 1172 } 1173 1174 1175 static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) 1176 { 1177 HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG); 1178 const BYTE* ip = (const BYTE*) cSrc; 1179 size_t errorCode; 1180 1181 errorCode = HUF_readDTable (DTable, cSrc, cSrcSize); 1182 if (FSE_isError(errorCode)) return errorCode; 1183 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; 1184 ip += errorCode; 1185 cSrcSize -= errorCode; 1186 1187 return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable); 1188 } 1189 1190 1191 #endif /* FSE_COMMONDEFS_ONLY */ 1192 1193 /* 1194 zstd - standard compression library 1195 Copyright (C) 2014-2015, Yann Collet. 1196 1197 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 1198 1199 Redistribution and use in source and binary forms, with or without 1200 modification, are permitted provided that the following conditions are 1201 met: 1202 * Redistributions of source code must retain the above copyright 1203 notice, this list of conditions and the following disclaimer. 1204 * Redistributions in binary form must reproduce the above 1205 copyright notice, this list of conditions and the following disclaimer 1206 in the documentation and/or other materials provided with the 1207 distribution. 1208 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1209 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1210 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1211 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1212 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1213 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1214 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1215 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1216 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1217 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1218 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1219 1220 You can contact the author at : 1221 - zstd source repository : https://github.com/Cyan4973/zstd 1222 - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c 1223 */ 1224 1225 /**************************************************************** 1226 * Tuning parameters 1227 *****************************************************************/ 1228 /* MEMORY_USAGE : 1229 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) 1230 * Increasing memory usage improves compression ratio 1231 * Reduced memory usage can improve speed, due to cache effect */ 1232 #define ZSTD_MEMORY_USAGE 17 1233 1234 1235 /************************************** 1236 CPU Feature Detection 1237 **************************************/ 1238 /* 1239 * Automated efficient unaligned memory access detection 1240 * Based on known hardware architectures 1241 * This list will be updated thanks to feedbacks 1242 */ 1243 #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \ 1244 || defined(__ARM_FEATURE_UNALIGNED) \ 1245 || defined(__i386__) || defined(__x86_64__) \ 1246 || defined(_M_IX86) || defined(_M_X64) \ 1247 || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \ 1248 || (defined(_M_ARM) && (_M_ARM >= 7)) 1249 # define ZSTD_UNALIGNED_ACCESS 1 1250 #else 1251 # define ZSTD_UNALIGNED_ACCESS 0 1252 #endif 1253 1254 1255 /******************************************************** 1256 * Includes 1257 *********************************************************/ 1258 #include <stdlib.h> /* calloc */ 1259 #include <string.h> /* memcpy, memmove */ 1260 #include <stdio.h> /* debug : printf */ 1261 1262 1263 /******************************************************** 1264 * Compiler specifics 1265 *********************************************************/ 1266 #ifdef __AVX2__ 1267 # include <immintrin.h> /* AVX2 intrinsics */ 1268 #endif 1269 1270 #ifdef _MSC_VER /* Visual Studio */ 1271 # include <intrin.h> /* For Visual 2005 */ 1272 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 1273 # pragma warning(disable : 4324) /* disable: C4324: padded structure */ 1274 #endif 1275 1276 1277 #ifndef MEM_ACCESS_MODULE 1278 #define MEM_ACCESS_MODULE 1279 /******************************************************** 1280 * Basic Types 1281 *********************************************************/ 1282 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ 1283 # if defined(_AIX) 1284 # include <inttypes.h> 1285 # else 1286 # include <stdint.h> /* intptr_t */ 1287 # endif 1288 typedef uint8_t BYTE; 1289 typedef uint16_t U16; 1290 typedef int16_t S16; 1291 typedef uint32_t U32; 1292 typedef int32_t S32; 1293 typedef uint64_t U64; 1294 #else 1295 typedef unsigned char BYTE; 1296 typedef unsigned short U16; 1297 typedef signed short S16; 1298 typedef unsigned int U32; 1299 typedef signed int S32; 1300 typedef unsigned long long U64; 1301 #endif 1302 1303 #endif /* MEM_ACCESS_MODULE */ 1304 1305 1306 /******************************************************** 1307 * Constants 1308 *********************************************************/ 1309 static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */ 1310 1311 #define HASH_LOG (ZSTD_MEMORY_USAGE - 2) 1312 #define HASH_TABLESIZE (1 << HASH_LOG) 1313 #define HASH_MASK (HASH_TABLESIZE - 1) 1314 1315 #define KNUTH 2654435761 1316 1317 #define BIT7 128 1318 #define BIT6 64 1319 #define BIT5 32 1320 #define BIT4 16 1321 1322 #define KB *(1 <<10) 1323 #define MB *(1 <<20) 1324 #define GB *(1U<<30) 1325 1326 #define BLOCKSIZE (128 KB) /* define, for static allocation */ 1327 1328 #define WORKPLACESIZE (BLOCKSIZE*3) 1329 #define MINMATCH 4 1330 #define MLbits 7 1331 #define LLbits 6 1332 #define Offbits 5 1333 #define MaxML ((1<<MLbits )-1) 1334 #define MaxLL ((1<<LLbits )-1) 1335 #define MaxOff ((1<<Offbits)-1) 1336 #define LitFSELog 11 1337 #define MLFSELog 10 1338 #define LLFSELog 10 1339 #define OffFSELog 9 1340 #define MAX(a,b) ((a)<(b)?(b):(a)) 1341 #define MaxSeq MAX(MaxLL, MaxML) 1342 1343 #define LITERAL_NOENTROPY 63 1344 #define COMMAND_NOENTROPY 7 /* to remove */ 1345 1346 #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) 1347 1348 static const size_t ZSTD_blockHeaderSize = 3; 1349 static const size_t ZSTD_frameHeaderSize = 4; 1350 1351 1352 /******************************************************** 1353 * Memory operations 1354 *********************************************************/ 1355 static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; } 1356 1357 static unsigned ZSTD_isLittleEndian(void) 1358 { 1359 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ 1360 return one.c[0]; 1361 } 1362 1363 static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; } 1364 1365 static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } 1366 1367 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } 1368 1369 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } 1370 1371 static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length) 1372 { 1373 const BYTE* ip = (const BYTE*)src; 1374 BYTE* op = (BYTE*)dst; 1375 BYTE* const oend = op + length; 1376 while (op < oend) COPY8(op, ip); 1377 } 1378 1379 static U16 ZSTD_readLE16(const void* memPtr) 1380 { 1381 if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr); 1382 else 1383 { 1384 const BYTE* p = (const BYTE*)memPtr; 1385 return (U16)((U16)p[0] + ((U16)p[1]<<8)); 1386 } 1387 } 1388 1389 static U32 ZSTD_readLE24(const void* memPtr) 1390 { 1391 return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); 1392 } 1393 1394 static U32 ZSTD_readBE32(const void* memPtr) 1395 { 1396 const BYTE* p = (const BYTE*)memPtr; 1397 return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0)); 1398 } 1399 1400 1401 /************************************** 1402 * Local structures 1403 ***************************************/ 1404 typedef struct ZSTD_Cctx_s ZSTD_Cctx; 1405 1406 typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; 1407 1408 typedef struct 1409 { 1410 blockType_t blockType; 1411 U32 origSize; 1412 } blockProperties_t; 1413 1414 typedef struct { 1415 void* buffer; 1416 U32* offsetStart; 1417 U32* offset; 1418 BYTE* offCodeStart; 1419 BYTE* offCode; 1420 BYTE* litStart; 1421 BYTE* lit; 1422 BYTE* litLengthStart; 1423 BYTE* litLength; 1424 BYTE* matchLengthStart; 1425 BYTE* matchLength; 1426 BYTE* dumpsStart; 1427 BYTE* dumps; 1428 } seqStore_t; 1429 1430 1431 typedef struct ZSTD_Cctx_s 1432 { 1433 const BYTE* base; 1434 U32 current; 1435 U32 nextUpdate; 1436 seqStore_t seqStore; 1437 #ifdef __AVX2__ 1438 __m256i hashTable[HASH_TABLESIZE>>3]; 1439 #else 1440 U32 hashTable[HASH_TABLESIZE]; 1441 #endif 1442 BYTE buffer[WORKPLACESIZE]; 1443 } cctxi_t; 1444 1445 1446 1447 1448 /************************************** 1449 * Error Management 1450 **************************************/ 1451 /* published entry point */ 1452 unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); } 1453 1454 1455 /************************************** 1456 * Tool functions 1457 **************************************/ 1458 #define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ 1459 #define ZSTD_VERSION_MINOR 1 /* for new (non-breaking) interface capabilities */ 1460 #define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ 1461 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) 1462 1463 /************************************************************** 1464 * Decompression code 1465 **************************************************************/ 1466 1467 static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) 1468 { 1469 const BYTE* const in = (const BYTE* const)src; 1470 BYTE headerFlags; 1471 U32 cSize; 1472 1473 if (srcSize < 3) return ERROR(srcSize_wrong); 1474 1475 headerFlags = *in; 1476 cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); 1477 1478 bpPtr->blockType = (blockType_t)(headerFlags >> 6); 1479 bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; 1480 1481 if (bpPtr->blockType == bt_end) return 0; 1482 if (bpPtr->blockType == bt_rle) return 1; 1483 return cSize; 1484 } 1485 1486 1487 static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 1488 { 1489 if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); 1490 if (srcSize > 0) { 1491 memcpy(dst, src, srcSize); 1492 } 1493 return srcSize; 1494 } 1495 1496 1497 static size_t ZSTD_decompressLiterals(void* ctx, 1498 void* dst, size_t maxDstSize, 1499 const void* src, size_t srcSize) 1500 { 1501 BYTE* op = (BYTE*)dst; 1502 BYTE* const oend = op + maxDstSize; 1503 const BYTE* ip = (const BYTE*)src; 1504 size_t errorCode; 1505 size_t litSize; 1506 1507 /* check : minimum 2, for litSize, +1, for content */ 1508 if (srcSize <= 3) return ERROR(corruption_detected); 1509 1510 litSize = ip[1] + (ip[0]<<8); 1511 litSize += ((ip[-3] >> 3) & 7) << 16; /* mmmmh.... */ 1512 op = oend - litSize; 1513 1514 (void)ctx; 1515 if (litSize > maxDstSize) return ERROR(dstSize_tooSmall); 1516 errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2); 1517 if (FSE_isError(errorCode)) return ERROR(GENERIC); 1518 return litSize; 1519 } 1520 1521 1522 static size_t ZSTDv01_decodeLiteralsBlock(void* ctx, 1523 void* dst, size_t maxDstSize, 1524 const BYTE** litStart, size_t* litSize, 1525 const void* src, size_t srcSize) 1526 { 1527 const BYTE* const istart = (const BYTE* const)src; 1528 const BYTE* ip = istart; 1529 BYTE* const ostart = (BYTE* const)dst; 1530 BYTE* const oend = ostart + maxDstSize; 1531 blockProperties_t litbp; 1532 1533 size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp); 1534 if (ZSTDv01_isError(litcSize)) return litcSize; 1535 if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 1536 ip += ZSTD_blockHeaderSize; 1537 1538 switch(litbp.blockType) 1539 { 1540 case bt_raw: 1541 *litStart = ip; 1542 ip += litcSize; 1543 *litSize = litcSize; 1544 break; 1545 case bt_rle: 1546 { 1547 size_t rleSize = litbp.origSize; 1548 if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall); 1549 if (!srcSize) return ERROR(srcSize_wrong); 1550 if (rleSize > 0) { 1551 memset(oend - rleSize, *ip, rleSize); 1552 } 1553 *litStart = oend - rleSize; 1554 *litSize = rleSize; 1555 ip++; 1556 break; 1557 } 1558 case bt_compressed: 1559 { 1560 size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize); 1561 if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize; 1562 *litStart = oend - decodedLitSize; 1563 *litSize = decodedLitSize; 1564 ip += litcSize; 1565 break; 1566 } 1567 case bt_end: 1568 default: 1569 return ERROR(GENERIC); 1570 } 1571 1572 return ip-istart; 1573 } 1574 1575 1576 static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, 1577 FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, 1578 const void* src, size_t srcSize) 1579 { 1580 const BYTE* const istart = (const BYTE* const)src; 1581 const BYTE* ip = istart; 1582 const BYTE* const iend = istart + srcSize; 1583 U32 LLtype, Offtype, MLtype; 1584 U32 LLlog, Offlog, MLlog; 1585 size_t dumpsLength; 1586 1587 /* check */ 1588 if (srcSize < 5) return ERROR(srcSize_wrong); 1589 1590 /* SeqHead */ 1591 *nbSeq = ZSTD_readLE16(ip); ip+=2; 1592 LLtype = *ip >> 6; 1593 Offtype = (*ip >> 4) & 3; 1594 MLtype = (*ip >> 2) & 3; 1595 if (*ip & 2) 1596 { 1597 dumpsLength = ip[2]; 1598 dumpsLength += ip[1] << 8; 1599 ip += 3; 1600 } 1601 else 1602 { 1603 dumpsLength = ip[1]; 1604 dumpsLength += (ip[0] & 1) << 8; 1605 ip += 2; 1606 } 1607 *dumpsPtr = ip; 1608 ip += dumpsLength; 1609 *dumpsLengthPtr = dumpsLength; 1610 1611 /* check */ 1612 if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ 1613 1614 /* sequences */ 1615 { 1616 S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */ 1617 size_t headerSize; 1618 1619 /* Build DTables */ 1620 switch(LLtype) 1621 { 1622 case bt_rle : 1623 LLlog = 0; 1624 FSE_buildDTable_rle(DTableLL, *ip++); break; 1625 case bt_raw : 1626 LLlog = LLbits; 1627 FSE_buildDTable_raw(DTableLL, LLbits); break; 1628 default : 1629 { U32 max = MaxLL; 1630 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); 1631 if (FSE_isError(headerSize)) return ERROR(GENERIC); 1632 if (LLlog > LLFSELog) return ERROR(corruption_detected); 1633 ip += headerSize; 1634 FSE_buildDTable(DTableLL, norm, max, LLlog); 1635 } } 1636 1637 switch(Offtype) 1638 { 1639 case bt_rle : 1640 Offlog = 0; 1641 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ 1642 FSE_buildDTable_rle(DTableOffb, *ip++); break; 1643 case bt_raw : 1644 Offlog = Offbits; 1645 FSE_buildDTable_raw(DTableOffb, Offbits); break; 1646 default : 1647 { U32 max = MaxOff; 1648 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); 1649 if (FSE_isError(headerSize)) return ERROR(GENERIC); 1650 if (Offlog > OffFSELog) return ERROR(corruption_detected); 1651 ip += headerSize; 1652 FSE_buildDTable(DTableOffb, norm, max, Offlog); 1653 } } 1654 1655 switch(MLtype) 1656 { 1657 case bt_rle : 1658 MLlog = 0; 1659 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ 1660 FSE_buildDTable_rle(DTableML, *ip++); break; 1661 case bt_raw : 1662 MLlog = MLbits; 1663 FSE_buildDTable_raw(DTableML, MLbits); break; 1664 default : 1665 { U32 max = MaxML; 1666 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); 1667 if (FSE_isError(headerSize)) return ERROR(GENERIC); 1668 if (MLlog > MLFSELog) return ERROR(corruption_detected); 1669 ip += headerSize; 1670 FSE_buildDTable(DTableML, norm, max, MLlog); 1671 } } } 1672 1673 return ip-istart; 1674 } 1675 1676 1677 typedef struct { 1678 size_t litLength; 1679 size_t offset; 1680 size_t matchLength; 1681 } seq_t; 1682 1683 typedef struct { 1684 FSE_DStream_t DStream; 1685 FSE_DState_t stateLL; 1686 FSE_DState_t stateOffb; 1687 FSE_DState_t stateML; 1688 size_t prevOffset; 1689 const BYTE* dumps; 1690 const BYTE* dumpsEnd; 1691 } seqState_t; 1692 1693 1694 static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) 1695 { 1696 size_t litLength; 1697 size_t prevOffset; 1698 size_t offset; 1699 size_t matchLength; 1700 const BYTE* dumps = seqState->dumps; 1701 const BYTE* const de = seqState->dumpsEnd; 1702 1703 /* Literal length */ 1704 litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); 1705 prevOffset = litLength ? seq->offset : seqState->prevOffset; 1706 seqState->prevOffset = seq->offset; 1707 if (litLength == MaxLL) 1708 { 1709 const U32 add = dumps<de ? *dumps++ : 0; 1710 if (add < 255) litLength += add; 1711 else 1712 { 1713 if (dumps<=(de-3)) 1714 { 1715 litLength = ZSTD_readLE24(dumps); 1716 dumps += 3; 1717 } 1718 } 1719 } 1720 1721 /* Offset */ 1722 { 1723 U32 offsetCode, nbBits; 1724 offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); 1725 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); 1726 nbBits = offsetCode - 1; 1727 if (offsetCode==0) nbBits = 0; /* cmove */ 1728 offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits); 1729 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); 1730 if (offsetCode==0) offset = prevOffset; 1731 } 1732 1733 /* MatchLength */ 1734 matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); 1735 if (matchLength == MaxML) 1736 { 1737 const U32 add = dumps<de ? *dumps++ : 0; 1738 if (add < 255) matchLength += add; 1739 else 1740 { 1741 if (dumps<=(de-3)) 1742 { 1743 matchLength = ZSTD_readLE24(dumps); 1744 dumps += 3; 1745 } 1746 } 1747 } 1748 matchLength += MINMATCH; 1749 1750 /* save result */ 1751 seq->litLength = litLength; 1752 seq->offset = offset; 1753 seq->matchLength = matchLength; 1754 seqState->dumps = dumps; 1755 } 1756 1757 1758 static size_t ZSTD_execSequence(BYTE* op, 1759 seq_t sequence, 1760 const BYTE** litPtr, const BYTE* const litLimit, 1761 BYTE* const base, BYTE* const oend) 1762 { 1763 static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ 1764 static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */ 1765 const BYTE* const ostart = op; 1766 const size_t litLength = sequence.litLength; 1767 BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */ 1768 const BYTE* const litEnd = *litPtr + litLength; 1769 1770 /* check */ 1771 if (endMatch > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ 1772 if (litEnd > litLimit) return ERROR(corruption_detected); 1773 if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall); /* overwrite literal segment */ 1774 1775 /* copy Literals */ 1776 if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8)) 1777 memmove(op, *litPtr, litLength); /* overwrite risk */ 1778 else 1779 ZSTD_wildcopy(op, *litPtr, litLength); 1780 op += litLength; 1781 *litPtr = litEnd; /* update for next sequence */ 1782 1783 /* check : last match must be at a minimum distance of 8 from end of dest buffer */ 1784 if (oend-op < 8) return ERROR(dstSize_tooSmall); 1785 1786 /* copy Match */ 1787 { 1788 const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12); 1789 const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */ 1790 size_t qutt = 12; 1791 U64 saved[2]; 1792 1793 /* check */ 1794 if (match < base) return ERROR(corruption_detected); 1795 if (sequence.offset > (size_t)base) return ERROR(corruption_detected); 1796 1797 /* save beginning of literal sequence, in case of write overlap */ 1798 if (overlapRisk) 1799 { 1800 if ((endMatch + qutt) > oend) qutt = oend-endMatch; 1801 memcpy(saved, endMatch, qutt); 1802 } 1803 1804 if (sequence.offset < 8) 1805 { 1806 const int dec64 = dec64table[sequence.offset]; 1807 op[0] = match[0]; 1808 op[1] = match[1]; 1809 op[2] = match[2]; 1810 op[3] = match[3]; 1811 match += dec32table[sequence.offset]; 1812 ZSTD_copy4(op+4, match); 1813 match -= dec64; 1814 } else { ZSTD_copy8(op, match); } 1815 op += 8; match += 8; 1816 1817 if (endMatch > oend-(16-MINMATCH)) 1818 { 1819 if (op < oend-8) 1820 { 1821 ZSTD_wildcopy(op, match, (oend-8) - op); 1822 match += (oend-8) - op; 1823 op = oend-8; 1824 } 1825 while (op<endMatch) *op++ = *match++; 1826 } 1827 else 1828 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ 1829 1830 /* restore, in case of overlap */ 1831 if (overlapRisk) memcpy(endMatch, saved, qutt); 1832 } 1833 1834 return endMatch-ostart; 1835 } 1836 1837 typedef struct ZSTDv01_Dctx_s 1838 { 1839 U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; 1840 U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; 1841 U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; 1842 void* previousDstEnd; 1843 void* base; 1844 size_t expected; 1845 blockType_t bType; 1846 U32 phase; 1847 } dctx_t; 1848 1849 1850 static size_t ZSTD_decompressSequences( 1851 void* ctx, 1852 void* dst, size_t maxDstSize, 1853 const void* seqStart, size_t seqSize, 1854 const BYTE* litStart, size_t litSize) 1855 { 1856 dctx_t* dctx = (dctx_t*)ctx; 1857 const BYTE* ip = (const BYTE*)seqStart; 1858 const BYTE* const iend = ip + seqSize; 1859 BYTE* const ostart = (BYTE* const)dst; 1860 BYTE* op = ostart; 1861 BYTE* const oend = ostart + maxDstSize; 1862 size_t errorCode, dumpsLength; 1863 const BYTE* litPtr = litStart; 1864 const BYTE* const litEnd = litStart + litSize; 1865 int nbSeq; 1866 const BYTE* dumps; 1867 U32* DTableLL = dctx->LLTable; 1868 U32* DTableML = dctx->MLTable; 1869 U32* DTableOffb = dctx->OffTable; 1870 BYTE* const base = (BYTE*) (dctx->base); 1871 1872 /* Build Decoding Tables */ 1873 errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, 1874 DTableLL, DTableML, DTableOffb, 1875 ip, iend-ip); 1876 if (ZSTDv01_isError(errorCode)) return errorCode; 1877 ip += errorCode; 1878 1879 /* Regen sequences */ 1880 { 1881 seq_t sequence; 1882 seqState_t seqState; 1883 1884 memset(&sequence, 0, sizeof(sequence)); 1885 seqState.dumps = dumps; 1886 seqState.dumpsEnd = dumps + dumpsLength; 1887 seqState.prevOffset = 1; 1888 errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip); 1889 if (FSE_isError(errorCode)) return ERROR(corruption_detected); 1890 FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); 1891 FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); 1892 FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); 1893 1894 for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; ) 1895 { 1896 size_t oneSeqSize; 1897 nbSeq--; 1898 ZSTD_decodeSequence(&sequence, &seqState); 1899 oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend); 1900 if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize; 1901 op += oneSeqSize; 1902 } 1903 1904 /* check if reached exact end */ 1905 if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* requested too much : data is corrupted */ 1906 if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */ 1907 1908 /* last literal segment */ 1909 { 1910 size_t lastLLSize = litEnd - litPtr; 1911 if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); 1912 if (lastLLSize > 0) { 1913 if (op != litPtr) memmove(op, litPtr, lastLLSize); 1914 op += lastLLSize; 1915 } 1916 } 1917 } 1918 1919 return op-ostart; 1920 } 1921 1922 1923 static size_t ZSTD_decompressBlock( 1924 void* ctx, 1925 void* dst, size_t maxDstSize, 1926 const void* src, size_t srcSize) 1927 { 1928 /* blockType == blockCompressed, srcSize is trusted */ 1929 const BYTE* ip = (const BYTE*)src; 1930 const BYTE* litPtr = NULL; 1931 size_t litSize = 0; 1932 size_t errorCode; 1933 1934 /* Decode literals sub-block */ 1935 errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize); 1936 if (ZSTDv01_isError(errorCode)) return errorCode; 1937 ip += errorCode; 1938 srcSize -= errorCode; 1939 1940 return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize); 1941 } 1942 1943 1944 size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 1945 { 1946 const BYTE* ip = (const BYTE*)src; 1947 const BYTE* iend = ip + srcSize; 1948 BYTE* const ostart = (BYTE* const)dst; 1949 BYTE* op = ostart; 1950 BYTE* const oend = ostart + maxDstSize; 1951 size_t remainingSize = srcSize; 1952 U32 magicNumber; 1953 size_t errorCode=0; 1954 blockProperties_t blockProperties; 1955 1956 /* Frame Header */ 1957 if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); 1958 magicNumber = ZSTD_readBE32(src); 1959 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); 1960 ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; 1961 1962 /* Loop on each block */ 1963 while (1) 1964 { 1965 size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties); 1966 if (ZSTDv01_isError(blockSize)) return blockSize; 1967 1968 ip += ZSTD_blockHeaderSize; 1969 remainingSize -= ZSTD_blockHeaderSize; 1970 if (blockSize > remainingSize) return ERROR(srcSize_wrong); 1971 1972 switch(blockProperties.blockType) 1973 { 1974 case bt_compressed: 1975 errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize); 1976 break; 1977 case bt_raw : 1978 errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize); 1979 break; 1980 case bt_rle : 1981 return ERROR(GENERIC); /* not yet supported */ 1982 break; 1983 case bt_end : 1984 /* end of frame */ 1985 if (remainingSize) return ERROR(srcSize_wrong); 1986 break; 1987 default: 1988 return ERROR(GENERIC); 1989 } 1990 if (blockSize == 0) break; /* bt_end */ 1991 1992 if (ZSTDv01_isError(errorCode)) return errorCode; 1993 op += errorCode; 1994 ip += blockSize; 1995 remainingSize -= blockSize; 1996 } 1997 1998 return op-ostart; 1999 } 2000 2001 size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) 2002 { 2003 dctx_t ctx; 2004 ctx.base = dst; 2005 return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); 2006 } 2007 2008 /* ZSTD_errorFrameSizeInfoLegacy() : 2009 assumes `cSize` and `dBound` are _not_ NULL */ 2010 static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret) 2011 { 2012 *cSize = ret; 2013 *dBound = ZSTD_CONTENTSIZE_ERROR; 2014 } 2015 2016 void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound) 2017 { 2018 const BYTE* ip = (const BYTE*)src; 2019 size_t remainingSize = srcSize; 2020 size_t nbBlocks = 0; 2021 U32 magicNumber; 2022 blockProperties_t blockProperties; 2023 2024 /* Frame Header */ 2025 if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) { 2026 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); 2027 return; 2028 } 2029 magicNumber = ZSTD_readBE32(src); 2030 if (magicNumber != ZSTD_magicNumber) { 2031 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown)); 2032 return; 2033 } 2034 ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; 2035 2036 /* Loop on each block */ 2037 while (1) 2038 { 2039 size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties); 2040 if (ZSTDv01_isError(blockSize)) { 2041 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize); 2042 return; 2043 } 2044 2045 ip += ZSTD_blockHeaderSize; 2046 remainingSize -= ZSTD_blockHeaderSize; 2047 if (blockSize > remainingSize) { 2048 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); 2049 return; 2050 } 2051 2052 if (blockSize == 0) break; /* bt_end */ 2053 2054 ip += blockSize; 2055 remainingSize -= blockSize; 2056 nbBlocks++; 2057 } 2058 2059 *cSize = ip - (const BYTE*)src; 2060 *dBound = nbBlocks * BLOCKSIZE; 2061 } 2062 2063 /******************************* 2064 * Streaming Decompression API 2065 *******************************/ 2066 2067 size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx) 2068 { 2069 dctx->expected = ZSTD_frameHeaderSize; 2070 dctx->phase = 0; 2071 dctx->previousDstEnd = NULL; 2072 dctx->base = NULL; 2073 return 0; 2074 } 2075 2076 ZSTDv01_Dctx* ZSTDv01_createDCtx(void) 2077 { 2078 ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx)); 2079 if (dctx==NULL) return NULL; 2080 ZSTDv01_resetDCtx(dctx); 2081 return dctx; 2082 } 2083 2084 size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx) 2085 { 2086 free(dctx); 2087 return 0; 2088 } 2089 2090 size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx) 2091 { 2092 return ((dctx_t*)dctx)->expected; 2093 } 2094 2095 size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) 2096 { 2097 dctx_t* ctx = (dctx_t*)dctx; 2098 2099 /* Sanity check */ 2100 if (srcSize != ctx->expected) return ERROR(srcSize_wrong); 2101 if (dst != ctx->previousDstEnd) /* not contiguous */ 2102 ctx->base = dst; 2103 2104 /* Decompress : frame header */ 2105 if (ctx->phase == 0) 2106 { 2107 /* Check frame magic header */ 2108 U32 magicNumber = ZSTD_readBE32(src); 2109 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); 2110 ctx->phase = 1; 2111 ctx->expected = ZSTD_blockHeaderSize; 2112 return 0; 2113 } 2114 2115 /* Decompress : block header */ 2116 if (ctx->phase == 1) 2117 { 2118 blockProperties_t bp; 2119 size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); 2120 if (ZSTDv01_isError(blockSize)) return blockSize; 2121 if (bp.blockType == bt_end) 2122 { 2123 ctx->expected = 0; 2124 ctx->phase = 0; 2125 } 2126 else 2127 { 2128 ctx->expected = blockSize; 2129 ctx->bType = bp.blockType; 2130 ctx->phase = 2; 2131 } 2132 2133 return 0; 2134 } 2135 2136 /* Decompress : block content */ 2137 { 2138 size_t rSize; 2139 switch(ctx->bType) 2140 { 2141 case bt_compressed: 2142 rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); 2143 break; 2144 case bt_raw : 2145 rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize); 2146 break; 2147 case bt_rle : 2148 return ERROR(GENERIC); /* not yet handled */ 2149 break; 2150 case bt_end : /* should never happen (filtered at phase 1) */ 2151 rSize = 0; 2152 break; 2153 default: 2154 return ERROR(GENERIC); 2155 } 2156 ctx->phase = 1; 2157 ctx->expected = ZSTD_blockHeaderSize; 2158 ctx->previousDstEnd = (void*)( ((char*)dst) + rSize); 2159 return rSize; 2160 } 2161 2162 } 2163