1 /* 2 * LZ4 - Fast LZ compression algorithm 3 * Header File 4 * Copyright (C) 2011-2013, Yann Collet. 5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * You can contact the author at : 31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32 * - LZ4 source repository : http://code.google.com/p/lz4/ 33 */ 34 35 #include <sys/zfs_context.h> 36 37 static int real_LZ4_compress(const char *source, char *dest, int isize, 38 int osize); 39 static int real_LZ4_uncompress(const char *source, char *dest, int osize); 40 static int LZ4_compressBound(int isize); 41 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 42 int isize, int maxOutputSize); 43 static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 44 int isize, int osize); 45 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 46 int isize, int osize); 47 48 /*ARGSUSED*/ 49 size_t 50 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 51 { 52 uint32_t bufsiz; 53 char *dest = d_start; 54 55 ASSERT(d_len >= sizeof (bufsiz)); 56 57 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 58 d_len - sizeof (bufsiz)); 59 60 /* Signal an error if the compression routine returned zero. */ 61 if (bufsiz == 0) 62 return (s_len); 63 64 /* 65 * Encode the compresed buffer size at the start. We'll need this in 66 * decompression to counter the effects of padding which might be 67 * added to the compressed buffer and which, if unhandled, would 68 * confuse the hell out of our decompression function. 69 */ 70 *(uint32_t *)dest = BE_32(bufsiz); 71 72 return (bufsiz + sizeof (bufsiz)); 73 } 74 75 /*ARGSUSED*/ 76 int 77 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 78 { 79 const char *src = s_start; 80 uint32_t bufsiz = BE_IN32(src); 81 82 /* invalid compressed buffer size encoded at start */ 83 if (bufsiz + sizeof (bufsiz) > s_len) 84 return (1); 85 86 /* 87 * Returns 0 on success (decompression function returned non-negative) 88 * and non-zero on failure (decompression function returned negative. 89 */ 90 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 91 d_start, bufsiz, d_len) < 0); 92 } 93 94 /* 95 * LZ4 API Description: 96 * 97 * Simple Functions: 98 * real_LZ4_compress() : 99 * isize : is the input size. Max supported value is ~1.9GB 100 * return : the number of bytes written in buffer dest 101 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 102 * note : destination buffer must be already allocated. 103 * destination buffer must be sized to handle worst cases 104 * situations (input data not compressible) worst case size 105 * evaluation is provided by function LZ4_compressBound(). 106 * 107 * real_LZ4_uncompress() : 108 * osize : is the output size, therefore the original size 109 * return : the number of bytes read in the source buffer. 110 * If the source stream is malformed, the function will stop 111 * decoding and return a negative result, indicating the byte 112 * position of the faulty instruction. This function never 113 * writes beyond dest + osize, and is therefore protected 114 * against malicious data packets. 115 * note : destination buffer must be already allocated 116 * 117 * Advanced Functions 118 * 119 * LZ4_compressBound() : 120 * Provides the maximum size that LZ4 may output in a "worst case" 121 * scenario (input data not compressible) primarily useful for memory 122 * allocation of output buffer. 123 * 124 * isize : is the input size. Max supported value is ~1.9GB 125 * return : maximum output size in a "worst case" scenario 126 * note : this function is limited by "int" range (2^31-1) 127 * 128 * LZ4_uncompress_unknownOutputSize() : 129 * isize : is the input size, therefore the compressed size 130 * maxOutputSize : is the size of the destination buffer (which must be 131 * already allocated) 132 * return : the number of bytes decoded in the destination buffer 133 * (necessarily <= maxOutputSize). If the source stream is 134 * malformed, the function will stop decoding and return a 135 * negative result, indicating the byte position of the faulty 136 * instruction. This function never writes beyond dest + 137 * maxOutputSize, and is therefore protected against malicious 138 * data packets. 139 * note : Destination buffer must be already allocated. 140 * This version is slightly slower than real_LZ4_uncompress() 141 * 142 * LZ4_compressCtx() : 143 * This function explicitly handles the CTX memory structure. 144 * 145 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 146 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 147 * isn't valid. 148 * 149 * LZ4_compress64kCtx() : 150 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 151 * isize *Must* be <64KB, otherwise the output will be corrupted. 152 * 153 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 154 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 155 * isn't valid. 156 */ 157 158 /* 159 * Tuning parameters 160 */ 161 162 /* 163 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 164 * Lowering this value reduces memory usage. Reduced memory usage 165 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 166 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 167 * (examples : 12 -> 16KB ; 17 -> 512KB) 168 */ 169 #define COMPRESSIONLEVEL 12 170 171 /* 172 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 173 * algorithm skip faster data segments considered "incompressible". 174 * This may decrease compression ratio dramatically, but will be 175 * faster on incompressible data. Increasing this value will make 176 * the algorithm search more before declaring a segment "incompressible". 177 * This could improve compression a bit, but will be slower on 178 * incompressible data. The default value (6) is recommended. 179 */ 180 #define NOTCOMPRESSIBLE_CONFIRMATION 6 181 182 /* 183 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 184 * performance for big endian cpu, but the resulting compressed stream 185 * will be incompatible with little-endian CPU. You can set this option 186 * to 1 in situations where data will stay within closed environment. 187 * This option is useless on Little_Endian CPU (such as x86). 188 */ 189 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 190 191 /* 192 * CPU Feature Detection 193 */ 194 195 /* 32 or 64 bits ? */ 196 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ 197 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ 198 defined(__LP64__) || defined(_LP64)) 199 #define LZ4_ARCH64 1 200 #else 201 #define LZ4_ARCH64 0 202 #endif 203 204 /* 205 * Limits the amount of stack space that the algorithm may consume to hold 206 * the compression lookup table. The value `9' here means we'll never use 207 * more than 2k of stack (see above for a description of COMPRESSIONLEVEL). 208 * If more memory is needed, it is allocated from the heap. 209 */ 210 #define STACKLIMIT 9 211 212 /* 213 * Little Endian or Big Endian? 214 * Note: overwrite the below #define if you know your architecture endianess. 215 */ 216 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \ 217 defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \ 218 defined(__PPC) || defined(PPC) || defined(__powerpc__) || \ 219 defined(__powerpc) || defined(powerpc) || \ 220 ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))) 221 #define LZ4_BIG_ENDIAN 1 222 #else 223 /* 224 * Little Endian assumed. PDP Endian and other very rare endian format 225 * are unsupported. 226 */ 227 #endif 228 229 /* 230 * Unaligned memory access is automatically enabled for "common" CPU, 231 * such as x86. For others CPU, the compiler will be more cautious, and 232 * insert extra code to ensure aligned access is respected. If you know 233 * your target CPU supports unaligned memory access, you may want to 234 * force this option manually to improve performance 235 */ 236 #if defined(__ARM_FEATURE_UNALIGNED) 237 #define LZ4_FORCE_UNALIGNED_ACCESS 1 238 #endif 239 240 #ifdef __sparc 241 #define LZ4_FORCE_SW_BITCOUNT 242 #endif 243 244 /* 245 * Compiler Options 246 */ 247 #if __STDC_VERSION__ >= 199901L /* C99 */ 248 /* "restrict" is a known keyword */ 249 #else 250 /* Disable restrict */ 251 #define restrict 252 #endif 253 254 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 255 256 #ifdef _MSC_VER 257 /* Visual Studio */ 258 /* Visual is not C99, but supports some kind of inline */ 259 #define inline __forceinline 260 #if LZ4_ARCH64 261 /* For Visual 2005 */ 262 #pragma intrinsic(_BitScanForward64) 263 #pragma intrinsic(_BitScanReverse64) 264 #else /* !LZ4_ARCH64 */ 265 /* For Visual 2005 */ 266 #pragma intrinsic(_BitScanForward) 267 #pragma intrinsic(_BitScanReverse) 268 #endif /* !LZ4_ARCH64 */ 269 #endif /* _MSC_VER */ 270 271 #ifdef _MSC_VER 272 #define lz4_bswap16(x) _byteswap_ushort(x) 273 #else /* !_MSC_VER */ 274 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 275 (((x) & 0xffu) << 8))) 276 #endif /* !_MSC_VER */ 277 278 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 279 #define expect(expr, value) (__builtin_expect((expr), (value))) 280 #else 281 #define expect(expr, value) (expr) 282 #endif 283 284 #define likely(expr) expect((expr) != 0, 1) 285 #define unlikely(expr) expect((expr) != 0, 0) 286 287 /* Basic types */ 288 #if defined(_MSC_VER) 289 /* Visual Studio does not support 'stdint' natively */ 290 #define BYTE unsigned __int8 291 #define U16 unsigned __int16 292 #define U32 unsigned __int32 293 #define S32 __int32 294 #define U64 unsigned __int64 295 #else /* !defined(_MSC_VER) */ 296 #define BYTE uint8_t 297 #define U16 uint16_t 298 #define U32 uint32_t 299 #define S32 int32_t 300 #define U64 uint64_t 301 #endif /* !defined(_MSC_VER) */ 302 303 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 304 #pragma pack(1) 305 #endif 306 307 typedef struct _U16_S { 308 U16 v; 309 } U16_S; 310 typedef struct _U32_S { 311 U32 v; 312 } U32_S; 313 typedef struct _U64_S { 314 U64 v; 315 } U64_S; 316 317 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 318 #pragma pack() 319 #endif 320 321 #define A64(x) (((U64_S *)(x))->v) 322 #define A32(x) (((U32_S *)(x))->v) 323 #define A16(x) (((U16_S *)(x))->v) 324 325 /* 326 * Constants 327 */ 328 #define MINMATCH 4 329 330 #define HASH_LOG COMPRESSIONLEVEL 331 #define HASHTABLESIZE (1 << HASH_LOG) 332 #define HASH_MASK (HASHTABLESIZE - 1) 333 334 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 335 NOTCOMPRESSIBLE_CONFIRMATION : 2) 336 337 /* 338 * Defines if memory is allocated into the stack (local variable), 339 * or into the heap (kmem_alloc()). 340 */ 341 #define HEAPMODE (HASH_LOG > STACKLIMIT) 342 #define COPYLENGTH 8 343 #define LASTLITERALS 5 344 #define MFLIMIT (COPYLENGTH + MINMATCH) 345 #define MINLENGTH (MFLIMIT + 1) 346 347 #define MAXD_LOG 16 348 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 349 350 #define ML_BITS 4 351 #define ML_MASK ((1U<<ML_BITS)-1) 352 #define RUN_BITS (8-ML_BITS) 353 #define RUN_MASK ((1U<<RUN_BITS)-1) 354 355 356 /* 357 * Architecture-specific macros 358 */ 359 #if LZ4_ARCH64 360 #define STEPSIZE 8 361 #define UARCH U64 362 #define AARCH A64 363 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 364 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 365 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 366 #define HTYPE U32 367 #define INITBASE(base) const BYTE* const base = ip 368 #else /* !LZ4_ARCH64 */ 369 #define STEPSIZE 4 370 #define UARCH U32 371 #define AARCH A32 372 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 373 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 374 #define LZ4_SECURECOPY LZ4_WILDCOPY 375 #define HTYPE const BYTE * 376 #define INITBASE(base) const int base = 0 377 #endif /* !LZ4_ARCH64 */ 378 379 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 380 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 381 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 382 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 383 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 384 #else 385 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 386 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 387 #endif 388 389 390 /* Local structures */ 391 struct refTables { 392 HTYPE hashTable[HASHTABLESIZE]; 393 }; 394 395 396 /* Macros */ 397 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 398 HASH_LOG)) 399 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 400 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 401 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 402 d = e; } 403 404 405 /* Private functions */ 406 #if LZ4_ARCH64 407 408 static inline int 409 LZ4_NbCommonBytes(register U64 val) 410 { 411 #if defined(LZ4_BIG_ENDIAN) 412 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 413 unsigned long r = 0; 414 _BitScanReverse64(&r, val); 415 return (int)(r >> 3); 416 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 417 !defined(LZ4_FORCE_SW_BITCOUNT) 418 return (__builtin_clzll(val) >> 3); 419 #else 420 int r; 421 if (!(val >> 32)) { 422 r = 4; 423 } else { 424 r = 0; 425 val >>= 32; 426 } 427 if (!(val >> 16)) { 428 r += 2; 429 val >>= 8; 430 } else { 431 val >>= 24; 432 } 433 r += (!val); 434 return (r); 435 #endif 436 #else 437 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 438 unsigned long r = 0; 439 _BitScanForward64(&r, val); 440 return (int)(r >> 3); 441 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 442 !defined(LZ4_FORCE_SW_BITCOUNT) 443 return (__builtin_ctzll(val) >> 3); 444 #else 445 static const int DeBruijnBytePos[64] = 446 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 447 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 448 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 449 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 450 }; 451 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 452 58]; 453 #endif 454 #endif 455 } 456 457 #else 458 459 static inline int 460 LZ4_NbCommonBytes(register U32 val) 461 { 462 #if defined(LZ4_BIG_ENDIAN) 463 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 464 unsigned long r = 0; 465 _BitScanReverse(&r, val); 466 return (int)(r >> 3); 467 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 468 !defined(LZ4_FORCE_SW_BITCOUNT) 469 return (__builtin_clz(val) >> 3); 470 #else 471 int r; 472 if (!(val >> 16)) { 473 r = 2; 474 val >>= 8; 475 } else { 476 r = 0; 477 val >>= 24; 478 } 479 r += (!val); 480 return (r); 481 #endif 482 #else 483 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 484 unsigned long r = 0; 485 _BitScanForward(&r, val); 486 return (int)(r >> 3); 487 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 488 !defined(LZ4_FORCE_SW_BITCOUNT) 489 return (__builtin_ctz(val) >> 3); 490 #else 491 static const int DeBruijnBytePos[32] = { 492 0, 0, 3, 0, 3, 1, 3, 0, 493 3, 2, 2, 1, 3, 2, 0, 1, 494 3, 3, 1, 2, 2, 2, 2, 0, 495 3, 1, 2, 0, 1, 0, 1, 1 496 }; 497 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 498 27]; 499 #endif 500 #endif 501 } 502 503 #endif 504 505 /* Public functions */ 506 507 static int 508 LZ4_compressBound(int isize) 509 { 510 return (isize + (isize / 255) + 16); 511 } 512 513 /* Compression functions */ 514 515 /*ARGSUSED*/ 516 static int 517 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 518 int osize) 519 { 520 #if HEAPMODE 521 struct refTables *srt = (struct refTables *)ctx; 522 HTYPE *HashTable = (HTYPE *) (srt->hashTable); 523 #else 524 HTYPE HashTable[HASHTABLESIZE] = { 0 }; 525 #endif 526 527 const BYTE *ip = (BYTE *) source; 528 INITBASE(base); 529 const BYTE *anchor = ip; 530 const BYTE *const iend = ip + isize; 531 const BYTE *const oend = (BYTE *) dest + osize; 532 const BYTE *const mflimit = iend - MFLIMIT; 533 #define matchlimit (iend - LASTLITERALS) 534 535 BYTE *op = (BYTE *) dest; 536 537 int len, length; 538 const int skipStrength = SKIPSTRENGTH; 539 U32 forwardH; 540 541 542 /* Init */ 543 if (isize < MINLENGTH) 544 goto _last_literals; 545 546 /* First Byte */ 547 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 548 ip++; 549 forwardH = LZ4_HASH_VALUE(ip); 550 551 /* Main Loop */ 552 for (;;) { 553 int findMatchAttempts = (1U << skipStrength) + 3; 554 const BYTE *forwardIp = ip; 555 const BYTE *ref; 556 BYTE *token; 557 558 /* Find a match */ 559 do { 560 U32 h = forwardH; 561 int step = findMatchAttempts++ >> skipStrength; 562 ip = forwardIp; 563 forwardIp = ip + step; 564 565 if unlikely(forwardIp > mflimit) { 566 goto _last_literals; 567 } 568 569 forwardH = LZ4_HASH_VALUE(forwardIp); 570 ref = base + HashTable[h]; 571 HashTable[h] = ip - base; 572 573 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 574 575 /* Catch up */ 576 while ((ip > anchor) && (ref > (BYTE *) source) && 577 unlikely(ip[-1] == ref[-1])) { 578 ip--; 579 ref--; 580 } 581 582 /* Encode Literal length */ 583 length = ip - anchor; 584 token = op++; 585 586 /* Check output limit */ 587 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 588 (length >> 8) > oend) 589 return (0); 590 591 if (length >= (int)RUN_MASK) { 592 *token = (RUN_MASK << ML_BITS); 593 len = length - RUN_MASK; 594 for (; len > 254; len -= 255) 595 *op++ = 255; 596 *op++ = (BYTE)len; 597 } else 598 *token = (length << ML_BITS); 599 600 /* Copy Literals */ 601 LZ4_BLINDCOPY(anchor, op, length); 602 603 _next_match: 604 /* Encode Offset */ 605 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 606 607 /* Start Counting */ 608 ip += MINMATCH; 609 ref += MINMATCH; /* MinMatch verified */ 610 anchor = ip; 611 while likely(ip < matchlimit - (STEPSIZE - 1)) { 612 UARCH diff = AARCH(ref) ^ AARCH(ip); 613 if (!diff) { 614 ip += STEPSIZE; 615 ref += STEPSIZE; 616 continue; 617 } 618 ip += LZ4_NbCommonBytes(diff); 619 goto _endCount; 620 } 621 #if LZ4_ARCH64 622 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 623 ip += 4; 624 ref += 4; 625 } 626 #endif 627 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 628 ip += 2; 629 ref += 2; 630 } 631 if ((ip < matchlimit) && (*ref == *ip)) 632 ip++; 633 _endCount: 634 635 /* Encode MatchLength */ 636 len = (ip - anchor); 637 /* Check output limit */ 638 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 639 return (0); 640 if (len >= (int)ML_MASK) { 641 *token += ML_MASK; 642 len -= ML_MASK; 643 for (; len > 509; len -= 510) { 644 *op++ = 255; 645 *op++ = 255; 646 } 647 if (len > 254) { 648 len -= 255; 649 *op++ = 255; 650 } 651 *op++ = (BYTE)len; 652 } else 653 *token += len; 654 655 /* Test end of chunk */ 656 if (ip > mflimit) { 657 anchor = ip; 658 break; 659 } 660 /* Fill table */ 661 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 662 663 /* Test next position */ 664 ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 665 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 666 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 667 token = op++; 668 *token = 0; 669 goto _next_match; 670 } 671 /* Prepare next loop */ 672 anchor = ip++; 673 forwardH = LZ4_HASH_VALUE(ip); 674 } 675 676 _last_literals: 677 /* Encode Last Literals */ 678 { 679 int lastRun = iend - anchor; 680 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 681 oend) 682 return (0); 683 if (lastRun >= (int)RUN_MASK) { 684 *op++ = (RUN_MASK << ML_BITS); 685 lastRun -= RUN_MASK; 686 for (; lastRun > 254; lastRun -= 255) { 687 *op++ = 255; 688 } 689 *op++ = (BYTE)lastRun; 690 } else 691 *op++ = (lastRun << ML_BITS); 692 (void) memcpy(op, anchor, iend - anchor); 693 op += iend - anchor; 694 } 695 696 /* End */ 697 return (int)(((char *)op) - dest); 698 } 699 700 701 702 /* Note : this function is valid only if isize < LZ4_64KLIMIT */ 703 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 704 #define HASHLOG64K (HASH_LOG + 1) 705 #define HASH64KTABLESIZE (1U << HASHLOG64K) 706 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 707 HASHLOG64K)) 708 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 709 710 /*ARGSUSED*/ 711 static int 712 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 713 int osize) 714 { 715 #if HEAPMODE 716 struct refTables *srt = (struct refTables *)ctx; 717 U16 *HashTable = (U16 *) (srt->hashTable); 718 #else 719 U16 HashTable[HASH64KTABLESIZE] = { 0 }; 720 #endif 721 722 const BYTE *ip = (BYTE *) source; 723 const BYTE *anchor = ip; 724 const BYTE *const base = ip; 725 const BYTE *const iend = ip + isize; 726 const BYTE *const oend = (BYTE *) dest + osize; 727 const BYTE *const mflimit = iend - MFLIMIT; 728 #define matchlimit (iend - LASTLITERALS) 729 730 BYTE *op = (BYTE *) dest; 731 732 int len, length; 733 const int skipStrength = SKIPSTRENGTH; 734 U32 forwardH; 735 736 /* Init */ 737 if (isize < MINLENGTH) 738 goto _last_literals; 739 740 /* First Byte */ 741 ip++; 742 forwardH = LZ4_HASH64K_VALUE(ip); 743 744 /* Main Loop */ 745 for (;;) { 746 int findMatchAttempts = (1U << skipStrength) + 3; 747 const BYTE *forwardIp = ip; 748 const BYTE *ref; 749 BYTE *token; 750 751 /* Find a match */ 752 do { 753 U32 h = forwardH; 754 int step = findMatchAttempts++ >> skipStrength; 755 ip = forwardIp; 756 forwardIp = ip + step; 757 758 if (forwardIp > mflimit) { 759 goto _last_literals; 760 } 761 762 forwardH = LZ4_HASH64K_VALUE(forwardIp); 763 ref = base + HashTable[h]; 764 HashTable[h] = ip - base; 765 766 } while (A32(ref) != A32(ip)); 767 768 /* Catch up */ 769 while ((ip > anchor) && (ref > (BYTE *) source) && 770 (ip[-1] == ref[-1])) { 771 ip--; 772 ref--; 773 } 774 775 /* Encode Literal length */ 776 length = ip - anchor; 777 token = op++; 778 779 /* Check output limit */ 780 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 781 (length >> 8) > oend) 782 return (0); 783 784 if (length >= (int)RUN_MASK) { 785 *token = (RUN_MASK << ML_BITS); 786 len = length - RUN_MASK; 787 for (; len > 254; len -= 255) 788 *op++ = 255; 789 *op++ = (BYTE)len; 790 } else 791 *token = (length << ML_BITS); 792 793 /* Copy Literals */ 794 LZ4_BLINDCOPY(anchor, op, length); 795 796 _next_match: 797 /* Encode Offset */ 798 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 799 800 /* Start Counting */ 801 ip += MINMATCH; 802 ref += MINMATCH; /* MinMatch verified */ 803 anchor = ip; 804 while (ip < matchlimit - (STEPSIZE - 1)) { 805 UARCH diff = AARCH(ref) ^ AARCH(ip); 806 if (!diff) { 807 ip += STEPSIZE; 808 ref += STEPSIZE; 809 continue; 810 } 811 ip += LZ4_NbCommonBytes(diff); 812 goto _endCount; 813 } 814 #if LZ4_ARCH64 815 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 816 ip += 4; 817 ref += 4; 818 } 819 #endif 820 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 821 ip += 2; 822 ref += 2; 823 } 824 if ((ip < matchlimit) && (*ref == *ip)) 825 ip++; 826 _endCount: 827 828 /* Encode MatchLength */ 829 len = (ip - anchor); 830 /* Check output limit */ 831 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 832 return (0); 833 if (len >= (int)ML_MASK) { 834 *token += ML_MASK; 835 len -= ML_MASK; 836 for (; len > 509; len -= 510) { 837 *op++ = 255; 838 *op++ = 255; 839 } 840 if (len > 254) { 841 len -= 255; 842 *op++ = 255; 843 } 844 *op++ = (BYTE)len; 845 } else 846 *token += len; 847 848 /* Test end of chunk */ 849 if (ip > mflimit) { 850 anchor = ip; 851 break; 852 } 853 /* Fill table */ 854 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 855 856 /* Test next position */ 857 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 858 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 859 if (A32(ref) == A32(ip)) { 860 token = op++; 861 *token = 0; 862 goto _next_match; 863 } 864 /* Prepare next loop */ 865 anchor = ip++; 866 forwardH = LZ4_HASH64K_VALUE(ip); 867 } 868 869 _last_literals: 870 /* Encode Last Literals */ 871 { 872 int lastRun = iend - anchor; 873 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 874 oend) 875 return (0); 876 if (lastRun >= (int)RUN_MASK) { 877 *op++ = (RUN_MASK << ML_BITS); 878 lastRun -= RUN_MASK; 879 for (; lastRun > 254; lastRun -= 255) 880 *op++ = 255; 881 *op++ = (BYTE)lastRun; 882 } else 883 *op++ = (lastRun << ML_BITS); 884 (void) memcpy(op, anchor, iend - anchor); 885 op += iend - anchor; 886 } 887 888 /* End */ 889 return (int)(((char *)op) - dest); 890 } 891 892 static int 893 real_LZ4_compress(const char *source, char *dest, int isize, int osize) 894 { 895 #if HEAPMODE 896 void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP); 897 int result; 898 899 /* 900 * out of kernel memory, gently fall through - this will disable 901 * compression in zio_compress_data 902 */ 903 if (ctx == NULL) 904 return (0); 905 906 if (isize < LZ4_64KLIMIT) 907 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 908 else 909 result = LZ4_compressCtx(ctx, source, dest, isize, osize); 910 911 kmem_free(ctx, sizeof (struct refTables)); 912 return (result); 913 #else 914 if (isize < (int)LZ4_64KLIMIT) 915 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize)); 916 return (LZ4_compressCtx(NULL, source, dest, isize, osize)); 917 #endif 918 } 919 920 /* Decompression functions */ 921 922 /* 923 * Note: The decoding functions real_LZ4_uncompress() and 924 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow" 925 * attack type. They will never write nor read outside of the provided 926 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that 927 * it will never read outside of the input buffer. A corrupted input 928 * will produce an error result, a negative int, indicating the position 929 * of the error within input stream. 930 */ 931 932 static int 933 real_LZ4_uncompress(const char *source, char *dest, int osize) 934 { 935 /* Local Variables */ 936 const BYTE *restrict ip = (const BYTE *) source; 937 const BYTE *ref; 938 939 BYTE *op = (BYTE *) dest; 940 BYTE *const oend = op + osize; 941 BYTE *cpy; 942 943 unsigned token; 944 945 size_t length; 946 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 947 #if LZ4_ARCH64 948 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 949 #endif 950 951 /* Main Loop */ 952 for (;;) { 953 /* get runlength */ 954 token = *ip++; 955 if ((length = (token >> ML_BITS)) == RUN_MASK) { 956 size_t len; 957 for (; (len = *ip++) == 255; length += 255) { 958 } 959 length += len; 960 } 961 /* copy literals */ 962 cpy = op + length; 963 /* CORNER-CASE: cpy might overflow. */ 964 if (cpy < op) 965 goto _output_error; /* cpy was overflowed, bail! */ 966 if unlikely(cpy > oend - COPYLENGTH) { 967 if (cpy != oend) 968 /* Error: we must necessarily stand at EOF */ 969 goto _output_error; 970 (void) memcpy(op, ip, length); 971 ip += length; 972 break; /* EOF */ 973 } 974 LZ4_WILDCOPY(ip, op, cpy); 975 ip -= (op - cpy); 976 op = cpy; 977 978 /* get offset */ 979 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 980 ip += 2; 981 if unlikely(ref < (BYTE * const) dest) 982 /* 983 * Error: offset create reference outside destination 984 * buffer 985 */ 986 goto _output_error; 987 988 /* get matchlength */ 989 if ((length = (token & ML_MASK)) == ML_MASK) { 990 for (; *ip == 255; length += 255) { 991 ip++; 992 } 993 length += *ip++; 994 } 995 /* copy repeated sequence */ 996 if unlikely(op - ref < STEPSIZE) { 997 #if LZ4_ARCH64 998 size_t dec64 = dec64table[op-ref]; 999 #else 1000 const int dec64 = 0; 1001 #endif 1002 op[0] = ref[0]; 1003 op[1] = ref[1]; 1004 op[2] = ref[2]; 1005 op[3] = ref[3]; 1006 op += 4; 1007 ref += 4; 1008 ref -= dec32table[op-ref]; 1009 A32(op) = A32(ref); 1010 op += STEPSIZE - 4; 1011 ref -= dec64; 1012 } else { 1013 LZ4_COPYSTEP(ref, op); 1014 } 1015 cpy = op + length - (STEPSIZE - 4); 1016 if (cpy > oend - COPYLENGTH) { 1017 if (cpy > oend) 1018 /* 1019 * Error: request to write beyond destination 1020 * buffer 1021 */ 1022 goto _output_error; 1023 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 1024 while (op < cpy) 1025 *op++ = *ref++; 1026 op = cpy; 1027 if (op == oend) 1028 /* 1029 * Check EOF (should never happen, since last 1030 * 5 bytes are supposed to be literals) 1031 */ 1032 goto _output_error; 1033 continue; 1034 } 1035 LZ4_SECURECOPY(ref, op, cpy); 1036 op = cpy; /* correction */ 1037 } 1038 1039 /* end of decoding */ 1040 return (int)(((char *)ip) - source); 1041 1042 /* write overflow error detected */ 1043 _output_error: 1044 return (int)(-(((char *)ip) - source)); 1045 } 1046 1047 static int 1048 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, 1049 int maxOutputSize) 1050 { 1051 /* Local Variables */ 1052 const BYTE *restrict ip = (const BYTE *) source; 1053 const BYTE *const iend = ip + isize; 1054 const BYTE *ref; 1055 1056 BYTE *op = (BYTE *) dest; 1057 BYTE *const oend = op + maxOutputSize; 1058 BYTE *cpy; 1059 1060 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 1061 #if LZ4_ARCH64 1062 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 1063 #endif 1064 1065 /* Main Loop */ 1066 while (ip < iend) { 1067 unsigned token; 1068 size_t length; 1069 1070 /* get runlength */ 1071 token = *ip++; 1072 if ((length = (token >> ML_BITS)) == RUN_MASK) { 1073 int s = 255; 1074 while ((ip < iend) && (s == 255)) { 1075 s = *ip++; 1076 length += s; 1077 } 1078 } 1079 /* copy literals */ 1080 cpy = op + length; 1081 /* CORNER-CASE: cpy might overflow. */ 1082 if (cpy < op) 1083 goto _output_error; /* cpy was overflowed, bail! */ 1084 if ((cpy > oend - COPYLENGTH) || 1085 (ip + length > iend - COPYLENGTH)) { 1086 if (cpy > oend) 1087 /* Error: writes beyond output buffer */ 1088 goto _output_error; 1089 if (ip + length != iend) 1090 /* 1091 * Error: LZ4 format requires to consume all 1092 * input at this stage 1093 */ 1094 goto _output_error; 1095 (void) memcpy(op, ip, length); 1096 op += length; 1097 /* Necessarily EOF, due to parsing restrictions */ 1098 break; 1099 } 1100 LZ4_WILDCOPY(ip, op, cpy); 1101 ip -= (op - cpy); 1102 op = cpy; 1103 1104 /* get offset */ 1105 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 1106 ip += 2; 1107 if (ref < (BYTE * const) dest) 1108 /* 1109 * Error: offset creates reference outside of 1110 * destination buffer 1111 */ 1112 goto _output_error; 1113 1114 /* get matchlength */ 1115 if ((length = (token & ML_MASK)) == ML_MASK) { 1116 while (ip < iend) { 1117 int s = *ip++; 1118 length += s; 1119 if (s == 255) 1120 continue; 1121 break; 1122 } 1123 } 1124 /* copy repeated sequence */ 1125 if unlikely(op - ref < STEPSIZE) { 1126 #if LZ4_ARCH64 1127 size_t dec64 = dec64table[op-ref]; 1128 #else 1129 const int dec64 = 0; 1130 #endif 1131 op[0] = ref[0]; 1132 op[1] = ref[1]; 1133 op[2] = ref[2]; 1134 op[3] = ref[3]; 1135 op += 4; 1136 ref += 4; 1137 ref -= dec32table[op-ref]; 1138 A32(op) = A32(ref); 1139 op += STEPSIZE - 4; 1140 ref -= dec64; 1141 } else { 1142 LZ4_COPYSTEP(ref, op); 1143 } 1144 cpy = op + length - (STEPSIZE - 4); 1145 if (cpy > oend - COPYLENGTH) { 1146 if (cpy > oend) 1147 /* 1148 * Error: request to write outside of 1149 * destination buffer 1150 */ 1151 goto _output_error; 1152 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 1153 while (op < cpy) 1154 *op++ = *ref++; 1155 op = cpy; 1156 if (op == oend) 1157 /* 1158 * Check EOF (should never happen, since 1159 * last 5 bytes are supposed to be literals) 1160 */ 1161 goto _output_error; 1162 continue; 1163 } 1164 LZ4_SECURECOPY(ref, op, cpy); 1165 op = cpy; /* correction */ 1166 } 1167 1168 /* end of decoding */ 1169 return (int)(((char *)op) - dest); 1170 1171 /* write overflow error detected */ 1172 _output_error: 1173 return (int)(-(((char *)ip) - source)); 1174 } 1175