1 // SPDX-License-Identifier: BSD-2-Clause 2 /* 3 LZ4 - Fast LZ compression algorithm 4 Copyright (C) 2011-present, Yann Collet. 5 6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 7 8 Redistribution and use in source and binary forms, with or without 9 modification, are permitted provided that the following conditions are 10 met: 11 12 * Redistributions of source code must retain the above copyright 13 notice, this list of conditions and the following disclaimer. 14 * Redistributions in binary form must reproduce the above 15 copyright notice, this list of conditions and the following disclaimer 16 in the documentation and/or other materials provided with the 17 distribution. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 You can contact the author at : 32 - LZ4 homepage : http://www.lz4.org 33 - LZ4 source repository : https://github.com/lz4/lz4 34 */ 35 36 /* 37 * This file contains unmodified code from lz4 1.9.3's decompressor, plus 38 * associated macros and constants. 39 * 40 * It also contains a couple of defines from the old lz4.c to make things 41 * fit together smoothly. 42 * 43 */ 44 45 #include <sys/zfs_context.h> 46 47 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 48 int isize, int maxOutputSize); 49 50 /* 51 * Tuning parameters 52 */ 53 54 /* 55 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 56 * Lowering this value reduces memory usage. Reduced memory usage 57 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 58 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 59 * (examples : 12 -> 16KB ; 17 -> 512KB) 60 */ 61 #define COMPRESSIONLEVEL 12 62 63 /* 64 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 65 * algorithm skip faster data segments considered "incompressible". 66 * This may decrease compression ratio dramatically, but will be 67 * faster on incompressible data. Increasing this value will make 68 * the algorithm search more before declaring a segment "incompressible". 69 * This could improve compression a bit, but will be slower on 70 * incompressible data. The default value (6) is recommended. 71 */ 72 #define NOTCOMPRESSIBLE_CONFIRMATION 6 73 74 /* 75 * Little Endian or Big Endian? 76 * Note: overwrite the below #define if you know your architecture endianness. 77 */ 78 #if defined(_ZFS_BIG_ENDIAN) 79 #define LZ4_BIG_ENDIAN 1 80 #else 81 /* 82 * Little Endian assumed. PDP Endian and other very rare endian format 83 * are unsupported. 84 */ 85 #undef LZ4_BIG_ENDIAN 86 #endif 87 88 /*-************************************ 89 * CPU Feature Detection 90 **************************************/ 91 /* LZ4_FORCE_MEMORY_ACCESS 92 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. 93 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. 94 * The below switch allow to select different access method for improved performance. 95 * Method 0 (default) : use `memcpy()`. Safe and portable. 96 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). 97 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. 98 * Method 2 : direct access. This method is portable but violate C standard. 99 * It can generate buggy code on targets which assembly generation depends on alignment. 100 * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) 101 * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. 102 * Prefer these methods in priority order (0 > 1 > 2) 103 */ 104 #ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ 105 # if defined(__GNUC__) && \ 106 ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ 107 || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) 108 # define LZ4_FORCE_MEMORY_ACCESS 2 109 # elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) 110 # define LZ4_FORCE_MEMORY_ACCESS 1 111 # endif 112 #endif 113 114 /* 115 * LZ4_FORCE_SW_BITCOUNT 116 * Define this parameter if your target system or compiler does not support hardware bit count 117 */ 118 /* 119 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the 120 * kernel 121 * Linux : we can use GCC's __builtin_ctz family of builtins in the 122 * kernel 123 */ 124 #undef LZ4_FORCE_SW_BITCOUNT 125 #if defined(__sunos__) 126 #define LZ4_FORCE_SW_BITCOUNT 127 #endif 128 129 /* 130 * Compiler Options 131 */ 132 /* Disable restrict */ 133 #define restrict 134 135 /* 136 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it. 137 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 138 */ 139 #ifdef GCC_VERSION 140 #undef GCC_VERSION 141 #endif 142 143 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 144 145 #ifndef LZ4_FORCE_INLINE 146 # ifdef _MSC_VER /* Visual Studio */ 147 # define LZ4_FORCE_INLINE static __forceinline 148 # else 149 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ 150 # ifdef __GNUC__ 151 # define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) 152 # else 153 # define LZ4_FORCE_INLINE static inline 154 # endif 155 # else 156 # define LZ4_FORCE_INLINE static 157 # endif /* __STDC_VERSION__ */ 158 # endif /* _MSC_VER */ 159 #endif /* LZ4_FORCE_INLINE */ 160 161 /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE 162 * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, 163 * together with a simple 8-byte copy loop as a fall-back path. 164 * However, this optimization hurts the decompression speed by >30%, 165 * because the execution does not go to the optimized loop 166 * for typical compressible data, and all of the preamble checks 167 * before going to the fall-back path become useless overhead. 168 * This optimization happens only with the -O3 flag, and -O2 generates 169 * a simple 8-byte copy loop. 170 * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8 171 * functions are annotated with __attribute__((optimize("O2"))), 172 * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute 173 * of LZ4_wildCopy8 does not affect the compression speed. 174 */ 175 #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) 176 # define LZ4_FORCE_O2 __attribute__((optimize("O2"))) 177 # undef LZ4_FORCE_INLINE 178 # define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) 179 #else 180 # define LZ4_FORCE_O2 181 #endif 182 183 #ifndef expect 184 #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) 185 # define expect(expr,value) (__builtin_expect ((expr),(value)) ) 186 #else 187 # define expect(expr,value) (expr) 188 #endif 189 #endif 190 191 #ifndef likely 192 #define likely(expr) expect((expr) != 0, 1) 193 #endif 194 195 #ifndef unlikely 196 #define unlikely(expr) expect((expr) != 0, 0) 197 #endif 198 199 #ifndef _KERNEL 200 #include <stdlib.h> /* malloc, calloc, free */ 201 #include <string.h> /* memset, memcpy */ 202 #endif 203 #define ALLOC(s) malloc(s) 204 #define ALLOC_AND_ZERO(s) calloc(1,s) 205 #define FREEMEM(p) free(p) 206 207 #define MEM_INIT(p,v,s) memset((p),(v),(s)) 208 209 210 /*-************************************ 211 * Common Constants 212 **************************************/ 213 #define MINMATCH 4 214 215 #define WILDCOPYLENGTH 8 216 #define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ 217 #define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ 218 #define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ 219 #define FASTLOOP_SAFE_DISTANCE 64 220 221 #define KB *(1 <<10) 222 #define MB *(1 <<20) 223 #define GB *(1U<<30) 224 225 #ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ 226 # define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ 227 #endif 228 229 #define LZ4_DISTANCE_ABSOLUTE_MAX 65535 230 #if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ 231 # error "LZ4_DISTANCE_MAX is too big : must be <= 65535" 232 #endif 233 234 #define ML_BITS 4 235 #define ML_MASK ((1U<<ML_BITS)-1) 236 #define RUN_BITS (8-ML_BITS) 237 #define RUN_MASK ((1U<<RUN_BITS)-1) 238 239 #define DEBUGLOG(l, ...) {} /* disabled */ 240 241 #ifndef assert 242 #define assert ASSERT 243 #endif 244 245 /*-************************************ 246 * Types 247 **************************************/ 248 #ifndef _KERNEL 249 #include <limits.h> 250 #endif 251 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 252 #ifndef _KERNEL 253 #include <stdint.h> 254 #endif 255 typedef uint8_t BYTE; 256 typedef uint16_t U16; 257 typedef uint32_t U32; 258 typedef int32_t S32; 259 typedef uint64_t U64; 260 typedef uintptr_t uptrval; 261 #else 262 # if UINT_MAX != 4294967295UL 263 # error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" 264 # endif 265 typedef unsigned char BYTE; 266 typedef unsigned short U16; 267 typedef unsigned int U32; 268 typedef signed int S32; 269 typedef unsigned long long U64; 270 typedef size_t uptrval; /* generally true, except OpenVMS-64 */ 271 #endif 272 273 #if defined(__x86_64__) 274 typedef U64 reg_t; /* 64-bits in x32 mode */ 275 #else 276 typedef size_t reg_t; /* 32-bits in x32 mode */ 277 #endif 278 279 typedef enum { 280 notLimited = 0, 281 limitedOutput = 1, 282 fillOutput = 2 283 } limitedOutput_directive; 284 285 286 /*-************************************ 287 * Reading and writing into memory 288 **************************************/ 289 290 /** 291 * LZ4 relies on memcpy with a constant size being inlined. In freestanding 292 * environments, the compiler can't assume the implementation of memcpy() is 293 * standard compliant, so it can't apply its specialized memcpy() inlining 294 * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze 295 * memcpy() as if it were standard compliant, so it can inline it in freestanding 296 * environments. This is needed when decompressing the Linux Kernel, for example. 297 */ 298 #if defined(__GNUC__) && (__GNUC__ >= 4) 299 #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) 300 #else 301 #define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) 302 #endif 303 304 static unsigned LZ4_isLittleEndian(void) 305 { 306 const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ 307 return one.c[0]; 308 } 309 310 311 #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) 312 /* lie to the compiler about data alignment; use with caution */ 313 314 static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } 315 316 static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } 317 static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } 318 319 #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) 320 321 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ 322 /* currently only defined for gcc and icc */ 323 typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign; 324 325 static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } 326 327 static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } 328 329 #else /* safe and portable access using memcpy() */ 330 331 static U16 LZ4_read16(const void* memPtr) 332 { 333 U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; 334 } 335 336 static void LZ4_write32(void* memPtr, U32 value) 337 { 338 LZ4_memcpy(memPtr, &value, sizeof(value)); 339 } 340 341 #endif /* LZ4_FORCE_MEMORY_ACCESS */ 342 343 static U16 LZ4_readLE16(const void* memPtr) 344 { 345 if (LZ4_isLittleEndian()) { 346 return LZ4_read16(memPtr); 347 } else { 348 const BYTE* p = (const BYTE*)memPtr; 349 return (U16)((U16)p[0] + (p[1]<<8)); 350 } 351 } 352 353 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ 354 LZ4_FORCE_INLINE 355 void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) 356 { 357 BYTE* d = (BYTE*)dstPtr; 358 const BYTE* s = (const BYTE*)srcPtr; 359 BYTE* const e = (BYTE*)dstEnd; 360 361 do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e); 362 } 363 364 static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4}; 365 static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3}; 366 367 368 #ifndef LZ4_FAST_DEC_LOOP 369 # if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64 370 # define LZ4_FAST_DEC_LOOP 1 371 # elif defined(__aarch64__) && !defined(__clang__) 372 /* On aarch64, we disable this optimization for clang because on certain 373 * mobile chipsets, performance is reduced with clang. For information 374 * refer to https://github.com/lz4/lz4/pull/707 */ 375 # define LZ4_FAST_DEC_LOOP 1 376 # else 377 # define LZ4_FAST_DEC_LOOP 0 378 # endif 379 #endif 380 381 #if LZ4_FAST_DEC_LOOP 382 383 LZ4_FORCE_INLINE void 384 LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) 385 { 386 assert(srcPtr + offset == dstPtr); 387 if (offset < 8) { 388 LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */ 389 dstPtr[0] = srcPtr[0]; 390 dstPtr[1] = srcPtr[1]; 391 dstPtr[2] = srcPtr[2]; 392 dstPtr[3] = srcPtr[3]; 393 srcPtr += inc32table[offset]; 394 LZ4_memcpy(dstPtr+4, srcPtr, 4); 395 srcPtr -= dec64table[offset]; 396 dstPtr += 8; 397 } else { 398 LZ4_memcpy(dstPtr, srcPtr, 8); 399 dstPtr += 8; 400 srcPtr += 8; 401 } 402 403 LZ4_wildCopy8(dstPtr, srcPtr, dstEnd); 404 } 405 406 /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd 407 * this version copies two times 16 bytes (instead of one time 32 bytes) 408 * because it must be compatible with offsets >= 16. */ 409 LZ4_FORCE_INLINE void 410 LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) 411 { 412 BYTE* d = (BYTE*)dstPtr; 413 const BYTE* s = (const BYTE*)srcPtr; 414 BYTE* const e = (BYTE*)dstEnd; 415 416 do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e); 417 } 418 419 /* LZ4_memcpy_using_offset() presumes : 420 * - dstEnd >= dstPtr + MINMATCH 421 * - there is at least 8 bytes available to write after dstEnd */ 422 LZ4_FORCE_INLINE void 423 LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) 424 { 425 BYTE v[8]; 426 427 assert(dstEnd >= dstPtr + MINMATCH); 428 429 switch(offset) { 430 case 1: 431 MEM_INIT(v, *srcPtr, 8); 432 break; 433 case 2: 434 LZ4_memcpy(v, srcPtr, 2); 435 LZ4_memcpy(&v[2], srcPtr, 2); 436 LZ4_memcpy(&v[4], v, 4); 437 break; 438 case 4: 439 LZ4_memcpy(v, srcPtr, 4); 440 LZ4_memcpy(&v[4], srcPtr, 4); 441 break; 442 default: 443 LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); 444 return; 445 } 446 447 LZ4_memcpy(dstPtr, v, 8); 448 dstPtr += 8; 449 while (dstPtr < dstEnd) { 450 LZ4_memcpy(dstPtr, v, 8); 451 dstPtr += 8; 452 } 453 } 454 #endif 455 456 457 /*-************************************ 458 * Local Structures and types 459 **************************************/ 460 typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; 461 462 /** 463 * This enum distinguishes several different modes of accessing previous 464 * content in the stream. 465 * 466 * - noDict : There is no preceding content. 467 * - withPrefix64k : Table entries up to ctx->dictSize before the current blob 468 * blob being compressed are valid and refer to the preceding 469 * content (of length ctx->dictSize), which is available 470 * contiguously preceding in memory the content currently 471 * being compressed. 472 * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere 473 * else in memory, starting at ctx->dictionary with length 474 * ctx->dictSize. 475 * - usingDictCtx : Like usingExtDict, but everything concerning the preceding 476 * content is in a separate context, pointed to by 477 * ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table 478 * entries in the current context that refer to positions 479 * preceding the beginning of the current compression are 480 * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx 481 * ->dictSize describe the location and size of the preceding 482 * content, and matches are found by looking in the ctx 483 * ->dictCtx->hashTable. 484 */ 485 typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; 486 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; 487 488 /*-******************************* 489 * Decompression functions 490 ********************************/ 491 492 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; 493 typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; 494 495 typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error; 496 497 LZ4_FORCE_INLINE unsigned 498 read_variable_length(const BYTE**ip, const BYTE* lencheck, 499 int loop_check, int initial_check, 500 variable_length_error* error) 501 { 502 U32 length = 0; 503 U32 s; 504 if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ 505 *error = initial_error; 506 return length; 507 } 508 do { 509 s = **ip; 510 (*ip)++; 511 length += s; 512 if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ 513 *error = loop_error; 514 return length; 515 } 516 } while (s==255); 517 518 return length; 519 } 520 521 #define LZ4_STATIC_ASSERT(c) ASSERT(c) 522 523 524 /*! LZ4_decompress_generic() : 525 * This generic decompression function covers all use cases. 526 * It shall be instantiated several times, using different sets of directives. 527 * Note that it is important for performance that this function really get inlined, 528 * in order to remove useless branches during compilation optimization. 529 */ 530 LZ4_FORCE_INLINE int 531 LZ4_decompress_generic( 532 const char* const src, 533 char* const dst, 534 int srcSize, 535 int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ 536 537 endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */ 538 earlyEnd_directive partialDecoding, /* full, partial */ 539 dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ 540 const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ 541 const BYTE* const dictStart, /* only if dict==usingExtDict */ 542 const size_t dictSize /* note : = 0 if noDict */ 543 ) 544 { 545 if ((src == NULL) || (outputSize < 0)) { return -1; } 546 547 { const BYTE* ip = (const BYTE*) src; 548 const BYTE* const iend = ip + srcSize; 549 550 BYTE* op = (BYTE*) dst; 551 BYTE* const oend = op + outputSize; 552 BYTE* cpy; 553 554 const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; 555 556 const int safeDecode = (endOnInput==endOnInputSize); 557 const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); 558 559 560 /* Set up the "end" pointers for the shortcut. */ 561 const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; 562 const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; 563 564 const BYTE* match; 565 size_t offset; 566 unsigned token; 567 size_t length; 568 569 570 DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); 571 572 /* Special cases */ 573 assert(lowPrefix <= op); 574 if ((endOnInput) && (unlikely(outputSize==0))) { 575 /* Empty output buffer */ 576 if (partialDecoding) return 0; 577 return ((srcSize==1) && (*ip==0)) ? 0 : -1; 578 } 579 if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); } 580 if ((endOnInput) && unlikely(srcSize==0)) { return -1; } 581 582 /* Currently the fast loop shows a regression on qualcomm arm chips. */ 583 #if LZ4_FAST_DEC_LOOP 584 if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { 585 DEBUGLOG(6, "skip fast decode loop"); 586 goto safe_decode; 587 } 588 589 /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */ 590 while (1) { 591 /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ 592 assert(oend - op >= FASTLOOP_SAFE_DISTANCE); 593 if (endOnInput) { assert(ip < iend); } 594 token = *ip++; 595 length = token >> ML_BITS; /* literal length */ 596 597 assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ 598 599 /* decode literal length */ 600 if (length == RUN_MASK) { 601 variable_length_error error = ok; 602 length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); 603 if (error == initial_error) { goto _output_error; } 604 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ 605 if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ 606 607 /* copy literals */ 608 cpy = op+length; 609 LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); 610 if (endOnInput) { /* LZ4_decompress_safe() */ 611 if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } 612 LZ4_wildCopy32(op, ip, cpy); 613 } else { /* LZ4_decompress_fast() */ 614 if (cpy>oend-8) { goto safe_literal_copy; } 615 LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : 616 * it doesn't know input length, and only relies on end-of-block properties */ 617 } 618 ip += length; op = cpy; 619 } else { 620 cpy = op+length; 621 if (endOnInput) { /* LZ4_decompress_safe() */ 622 DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); 623 /* We don't need to check oend, since we check it once for each loop below */ 624 if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; } 625 /* Literals can only be 14, but hope compilers optimize if we copy by a register size */ 626 LZ4_memcpy(op, ip, 16); 627 } else { /* LZ4_decompress_fast() */ 628 /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : 629 * it doesn't know input length, and relies on end-of-block properties */ 630 LZ4_memcpy(op, ip, 8); 631 if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); } 632 } 633 ip += length; op = cpy; 634 } 635 636 /* get offset */ 637 offset = LZ4_readLE16(ip); ip+=2; 638 match = op - offset; 639 assert(match <= op); 640 641 /* get matchlength */ 642 length = token & ML_MASK; 643 644 if (length == ML_MASK) { 645 variable_length_error error = ok; 646 if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ 647 length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); 648 if (error != ok) { goto _output_error; } 649 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ 650 length += MINMATCH; 651 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { 652 goto safe_match_copy; 653 } 654 } else { 655 length += MINMATCH; 656 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { 657 goto safe_match_copy; 658 } 659 660 /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */ 661 if ((dict == withPrefix64k) || (match >= lowPrefix)) { 662 if (offset >= 8) { 663 assert(match >= lowPrefix); 664 assert(match <= op); 665 assert(op + 18 <= oend); 666 667 LZ4_memcpy(op, match, 8); 668 LZ4_memcpy(op+8, match+8, 8); 669 LZ4_memcpy(op+16, match+16, 2); 670 op += length; 671 continue; 672 } } } 673 674 if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ 675 /* match starting within external dictionary */ 676 if ((dict==usingExtDict) && (match < lowPrefix)) { 677 if (unlikely(op+length > oend-LASTLITERALS)) { 678 if (partialDecoding) { 679 DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); 680 length = MIN(length, (size_t)(oend-op)); 681 } else { 682 goto _output_error; /* end-of-block condition violated */ 683 } } 684 685 if (length <= (size_t)(lowPrefix-match)) { 686 /* match fits entirely within external dictionary : just copy */ 687 memmove(op, dictEnd - (lowPrefix-match), length); 688 op += length; 689 } else { 690 /* match stretches into both external dictionary and current block */ 691 size_t const copySize = (size_t)(lowPrefix - match); 692 size_t const restSize = length - copySize; 693 LZ4_memcpy(op, dictEnd - copySize, copySize); 694 op += copySize; 695 if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ 696 BYTE* const endOfMatch = op + restSize; 697 const BYTE* copyFrom = lowPrefix; 698 while (op < endOfMatch) { *op++ = *copyFrom++; } 699 } else { 700 LZ4_memcpy(op, lowPrefix, restSize); 701 op += restSize; 702 } } 703 continue; 704 } 705 706 /* copy match within block */ 707 cpy = op + length; 708 709 assert((op <= oend) && (oend-op >= 32)); 710 if (unlikely(offset<16)) { 711 LZ4_memcpy_using_offset(op, match, cpy, offset); 712 } else { 713 LZ4_wildCopy32(op, match, cpy); 714 } 715 716 op = cpy; /* wildcopy correction */ 717 } 718 safe_decode: 719 #endif 720 721 /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ 722 while (1) { 723 token = *ip++; 724 length = token >> ML_BITS; /* literal length */ 725 726 assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ 727 728 /* A two-stage shortcut for the most common case: 729 * 1) If the literal length is 0..14, and there is enough space, 730 * enter the shortcut and copy 16 bytes on behalf of the literals 731 * (in the fast mode, only 8 bytes can be safely copied this way). 732 * 2) Further if the match length is 4..18, copy 18 bytes in a similar 733 * manner; but we ensure that there's enough space in the output for 734 * those 18 bytes earlier, upon entering the shortcut (in other words, 735 * there is a combined check for both stages). 736 */ 737 if ( (endOnInput ? length != RUN_MASK : length <= 8) 738 /* strictly "less than" on input, to re-enter the loop with at least one byte */ 739 && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) { 740 /* Copy the literals */ 741 LZ4_memcpy(op, ip, endOnInput ? 16 : 8); 742 op += length; ip += length; 743 744 /* The second stage: prepare for match copying, decode full info. 745 * If it doesn't work out, the info won't be wasted. */ 746 length = token & ML_MASK; /* match length */ 747 offset = LZ4_readLE16(ip); ip += 2; 748 match = op - offset; 749 assert(match <= op); /* check overflow */ 750 751 /* Do not deal with overlapping matches. */ 752 if ( (length != ML_MASK) 753 && (offset >= 8) 754 && (dict==withPrefix64k || match >= lowPrefix) ) { 755 /* Copy the match. */ 756 LZ4_memcpy(op + 0, match + 0, 8); 757 LZ4_memcpy(op + 8, match + 8, 8); 758 LZ4_memcpy(op +16, match +16, 2); 759 op += length + MINMATCH; 760 /* Both stages worked, load the next token. */ 761 continue; 762 } 763 764 /* The second stage didn't work out, but the info is ready. 765 * Propel it right to the point of match copying. */ 766 goto _copy_match; 767 } 768 769 /* decode literal length */ 770 if (length == RUN_MASK) { 771 variable_length_error error = ok; 772 length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); 773 if (error == initial_error) { goto _output_error; } 774 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ 775 if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ 776 } 777 778 /* copy literals */ 779 cpy = op+length; 780 #if LZ4_FAST_DEC_LOOP 781 safe_literal_copy: 782 #endif 783 LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); 784 if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) ) 785 || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) 786 { 787 /* We've either hit the input parsing restriction or the output parsing restriction. 788 * In the normal scenario, decoding a full block, it must be the last sequence, 789 * otherwise it's an error (invalid input or dimensions). 790 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. 791 */ 792 if (partialDecoding) { 793 /* Since we are partial decoding we may be in this block because of the output parsing 794 * restriction, which is not valid since the output buffer is allowed to be undersized. 795 */ 796 assert(endOnInput); 797 DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") 798 DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); 799 DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); 800 DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); 801 /* Finishing in the middle of a literals segment, 802 * due to lack of input. 803 */ 804 if (ip+length > iend) { 805 length = (size_t)(iend-ip); 806 cpy = op + length; 807 } 808 /* Finishing in the middle of a literals segment, 809 * due to lack of output space. 810 */ 811 if (cpy > oend) { 812 cpy = oend; 813 assert(op<=oend); 814 length = (size_t)(oend-op); 815 } 816 } else { 817 /* We must be on the last sequence because of the parsing limitations so check 818 * that we exactly regenerate the original size (must be exact when !endOnInput). 819 */ 820 if ((!endOnInput) && (cpy != oend)) { goto _output_error; } 821 /* We must be on the last sequence (or invalid) because of the parsing limitations 822 * so check that we exactly consume the input and don't overrun the output buffer. 823 */ 824 if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { 825 DEBUGLOG(6, "should have been last run of literals") 826 DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend); 827 DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend); 828 goto _output_error; 829 } 830 } 831 memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */ 832 ip += length; 833 op += length; 834 /* Necessarily EOF when !partialDecoding. 835 * When partialDecoding, it is EOF if we've either 836 * filled the output buffer or 837 * can't proceed with reading an offset for following match. 838 */ 839 if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { 840 break; 841 } 842 } else { 843 LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */ 844 ip += length; op = cpy; 845 } 846 847 /* get offset */ 848 offset = LZ4_readLE16(ip); ip+=2; 849 match = op - offset; 850 851 /* get matchlength */ 852 length = token & ML_MASK; 853 854 _copy_match: 855 if (length == ML_MASK) { 856 variable_length_error error = ok; 857 length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); 858 if (error != ok) goto _output_error; 859 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ 860 } 861 length += MINMATCH; 862 863 #if LZ4_FAST_DEC_LOOP 864 safe_match_copy: 865 #endif 866 if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ 867 /* match starting within external dictionary */ 868 if ((dict==usingExtDict) && (match < lowPrefix)) { 869 if (unlikely(op+length > oend-LASTLITERALS)) { 870 if (partialDecoding) length = MIN(length, (size_t)(oend-op)); 871 else goto _output_error; /* doesn't respect parsing restriction */ 872 } 873 874 if (length <= (size_t)(lowPrefix-match)) { 875 /* match fits entirely within external dictionary : just copy */ 876 memmove(op, dictEnd - (lowPrefix-match), length); 877 op += length; 878 } else { 879 /* match stretches into both external dictionary and current block */ 880 size_t const copySize = (size_t)(lowPrefix - match); 881 size_t const restSize = length - copySize; 882 LZ4_memcpy(op, dictEnd - copySize, copySize); 883 op += copySize; 884 if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ 885 BYTE* const endOfMatch = op + restSize; 886 const BYTE* copyFrom = lowPrefix; 887 while (op < endOfMatch) *op++ = *copyFrom++; 888 } else { 889 LZ4_memcpy(op, lowPrefix, restSize); 890 op += restSize; 891 } } 892 continue; 893 } 894 assert(match >= lowPrefix); 895 896 /* copy match within block */ 897 cpy = op + length; 898 899 /* partialDecoding : may end anywhere within the block */ 900 assert(op<=oend); 901 if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { 902 size_t const mlen = MIN(length, (size_t)(oend-op)); 903 const BYTE* const matchEnd = match + mlen; 904 BYTE* const copyEnd = op + mlen; 905 if (matchEnd > op) { /* overlap copy */ 906 while (op < copyEnd) { *op++ = *match++; } 907 } else { 908 LZ4_memcpy(op, match, mlen); 909 } 910 op = copyEnd; 911 if (op == oend) { break; } 912 continue; 913 } 914 915 if (unlikely(offset<8)) { 916 LZ4_write32(op, 0); /* silence msan warning when offset==0 */ 917 op[0] = match[0]; 918 op[1] = match[1]; 919 op[2] = match[2]; 920 op[3] = match[3]; 921 match += inc32table[offset]; 922 LZ4_memcpy(op+4, match, 4); 923 match -= dec64table[offset]; 924 } else { 925 LZ4_memcpy(op, match, 8); 926 match += 8; 927 } 928 op += 8; 929 930 if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { 931 BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); 932 if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ 933 if (op < oCopyLimit) { 934 LZ4_wildCopy8(op, match, oCopyLimit); 935 match += oCopyLimit - op; 936 op = oCopyLimit; 937 } 938 while (op < cpy) { *op++ = *match++; } 939 } else { 940 LZ4_memcpy(op, match, 8); 941 if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } 942 } 943 op = cpy; /* wildcopy correction */ 944 } 945 946 /* end of decoding */ 947 if (endOnInput) { 948 DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); 949 return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ 950 } else { 951 return (int) (((const char*)ip)-src); /* Nb of input bytes read */ 952 } 953 954 /* Overflow error detected */ 955 _output_error: 956 return (int) (-(((const char*)ip)-src))-1; 957 } 958 } 959 960 /* 961 * LZ4_uncompress_unknownOutputSize() : 962 * isize : is the input size, therefore the compressed size 963 * maxOutputSize : is the size of the destination buffer (which must be 964 * already allocated) 965 * return : the number of bytes decoded in the destination buffer 966 * (necessarily <= maxOutputSize). If the source stream is 967 * malformed, the function will stop decoding and return a 968 * negative result, indicating the byte position of the faulty 969 * instruction. This function never writes beyond dest + 970 * maxOutputSize, and is therefore protected against malicious 971 * data packets. 972 * note : Destination buffer must be already allocated. 973 * This version is slightly slower than real_LZ4_uncompress() 974 * 975 */ 976 977 /* 978 * Note: In upstream code, LZ4_uncompress_unknownOutputSize is now a legacy 979 * wrapper for LZ4_decompress_safe which is a wrapper for 980 * LZ4_decompress_generic; this wrapper flattens that, rather than 981 * rewriting the callers. 982 */ 983 int LZ4_uncompress_unknownOutputSize(const char* source, char* dest, int compressedSize, int maxDecompressedSize) 984 { 985 return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, 986 endOnInputSize, decode_full_block, noDict, 987 (BYTE*)dest, NULL, 0); 988 } 989