1 /* 2 * LZ4 - Fast LZ compression algorithm 3 * Header File 4 * Copyright (C) 2011-2013, Yann Collet. 5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * You can contact the author at : 31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32 * - LZ4 source repository : http://code.google.com/p/lz4/ 33 */ 34 35 #include <sys/zfs_context.h> 36 37 static int real_LZ4_compress(const char *source, char *dest, int isize, 38 int osize); 39 static int real_LZ4_uncompress(const char *source, char *dest, int osize); 40 static int LZ4_compressBound(int isize); 41 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 42 int isize, int maxOutputSize); 43 static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 44 int isize, int osize); 45 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 46 int isize, int osize); 47 48 /*ARGSUSED*/ 49 size_t 50 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 51 { 52 uint32_t bufsiz; 53 char *dest = d_start; 54 55 ASSERT(d_len >= sizeof (bufsiz)); 56 57 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 58 d_len - sizeof (bufsiz)); 59 60 /* Signal an error if the compression routine returned zero. */ 61 if (bufsiz == 0) 62 return (s_len); 63 64 /* 65 * Encode the compresed buffer size at the start. We'll need this in 66 * decompression to counter the effects of padding which might be 67 * added to the compressed buffer and which, if unhandled, would 68 * confuse the hell out of our decompression function. 69 */ 70 *(uint32_t *)dest = BE_32(bufsiz); 71 72 return (bufsiz + sizeof (bufsiz)); 73 } 74 75 /*ARGSUSED*/ 76 int 77 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 78 { 79 const char *src = s_start; 80 uint32_t bufsiz = BE_IN32(src); 81 82 /* invalid compressed buffer size encoded at start */ 83 if (bufsiz + sizeof (bufsiz) > s_len) 84 return (1); 85 86 /* 87 * Returns 0 on success (decompression function returned non-negative) 88 * and non-zero on failure (decompression function returned negative. 89 */ 90 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 91 d_start, bufsiz, d_len) < 0); 92 } 93 94 /* 95 * LZ4 API Description: 96 * 97 * Simple Functions: 98 * real_LZ4_compress() : 99 * isize : is the input size. Max supported value is ~1.9GB 100 * return : the number of bytes written in buffer dest 101 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 102 * note : destination buffer must be already allocated. 103 * destination buffer must be sized to handle worst cases 104 * situations (input data not compressible) worst case size 105 * evaluation is provided by function LZ4_compressBound(). 106 * 107 * real_LZ4_uncompress() : 108 * osize : is the output size, therefore the original size 109 * return : the number of bytes read in the source buffer. 110 * If the source stream is malformed, the function will stop 111 * decoding and return a negative result, indicating the byte 112 * position of the faulty instruction. This function never 113 * writes beyond dest + osize, and is therefore protected 114 * against malicious data packets. 115 * note : destination buffer must be already allocated 116 * 117 * Advanced Functions 118 * 119 * LZ4_compressBound() : 120 * Provides the maximum size that LZ4 may output in a "worst case" 121 * scenario (input data not compressible) primarily useful for memory 122 * allocation of output buffer. 123 * 124 * isize : is the input size. Max supported value is ~1.9GB 125 * return : maximum output size in a "worst case" scenario 126 * note : this function is limited by "int" range (2^31-1) 127 * 128 * LZ4_uncompress_unknownOutputSize() : 129 * isize : is the input size, therefore the compressed size 130 * maxOutputSize : is the size of the destination buffer (which must be 131 * already allocated) 132 * return : the number of bytes decoded in the destination buffer 133 * (necessarily <= maxOutputSize). If the source stream is 134 * malformed, the function will stop decoding and return a 135 * negative result, indicating the byte position of the faulty 136 * instruction. This function never writes beyond dest + 137 * maxOutputSize, and is therefore protected against malicious 138 * data packets. 139 * note : Destination buffer must be already allocated. 140 * This version is slightly slower than real_LZ4_uncompress() 141 * 142 * LZ4_compressCtx() : 143 * This function explicitly handles the CTX memory structure. 144 * 145 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 146 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 147 * isn't valid. 148 * 149 * LZ4_compress64kCtx() : 150 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 151 * isize *Must* be <64KB, otherwise the output will be corrupted. 152 * 153 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 154 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 155 * isn't valid. 156 */ 157 158 /* 159 * Tuning parameters 160 */ 161 162 /* 163 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 164 * Lowering this value reduces memory usage. Reduced memory usage 165 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 166 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 167 * (examples : 12 -> 16KB ; 17 -> 512KB) 168 */ 169 #define COMPRESSIONLEVEL 12 170 171 /* 172 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 173 * algorithm skip faster data segments considered "incompressible". 174 * This may decrease compression ratio dramatically, but will be 175 * faster on incompressible data. Increasing this value will make 176 * the algorithm search more before declaring a segment "incompressible". 177 * This could improve compression a bit, but will be slower on 178 * incompressible data. The default value (6) is recommended. 179 */ 180 #define NOTCOMPRESSIBLE_CONFIRMATION 6 181 182 /* 183 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 184 * performance for big endian cpu, but the resulting compressed stream 185 * will be incompatible with little-endian CPU. You can set this option 186 * to 1 in situations where data will stay within closed environment. 187 * This option is useless on Little_Endian CPU (such as x86). 188 */ 189 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 190 191 /* 192 * CPU Feature Detection 193 */ 194 195 /* 32 or 64 bits ? */ 196 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ 197 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ 198 defined(__LP64__) || defined(_LP64)) 199 #define LZ4_ARCH64 1 200 /* 201 * Illumos: On amd64 we have 20k of stack and 24k on sun4u and sun4v, so we 202 * can spend 16k on the algorithm 203 */ 204 #define STACKLIMIT 12 205 #else 206 #define LZ4_ARCH64 0 207 /* 208 * Illumos: On i386 we only have 12k of stack, so in order to maintain the 209 * same COMPRESSIONLEVEL we have to use heap allocation. Performance will 210 * suck, but alas, it's ZFS on 32-bit we're talking about, so... 211 */ 212 #define STACKLIMIT 11 213 #endif 214 215 /* 216 * Little Endian or Big Endian? 217 * Note: overwrite the below #define if you know your architecture endianess. 218 */ 219 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \ 220 defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \ 221 defined(__PPC) || defined(PPC) || defined(__powerpc__) || \ 222 defined(__powerpc) || defined(powerpc) || \ 223 ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))) 224 #define LZ4_BIG_ENDIAN 1 225 #else 226 /* 227 * Little Endian assumed. PDP Endian and other very rare endian format 228 * are unsupported. 229 */ 230 #endif 231 232 /* 233 * Unaligned memory access is automatically enabled for "common" CPU, 234 * such as x86. For others CPU, the compiler will be more cautious, and 235 * insert extra code to ensure aligned access is respected. If you know 236 * your target CPU supports unaligned memory access, you may want to 237 * force this option manually to improve performance 238 */ 239 #if defined(__ARM_FEATURE_UNALIGNED) 240 #define LZ4_FORCE_UNALIGNED_ACCESS 1 241 #endif 242 243 /* 244 * Illumos: we can't use GCC's __builtin_ctz family of builtins in the 245 * kernel 246 */ 247 #define LZ4_FORCE_SW_BITCOUNT 248 249 /* 250 * Compiler Options 251 */ 252 #if __STDC_VERSION__ >= 199901L /* C99 */ 253 /* "restrict" is a known keyword */ 254 #else 255 /* Disable restrict */ 256 #define restrict 257 #endif 258 259 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 260 261 #ifdef _MSC_VER 262 /* Visual Studio */ 263 /* Visual is not C99, but supports some kind of inline */ 264 #define inline __forceinline 265 #if LZ4_ARCH64 266 /* For Visual 2005 */ 267 #pragma intrinsic(_BitScanForward64) 268 #pragma intrinsic(_BitScanReverse64) 269 #else /* !LZ4_ARCH64 */ 270 /* For Visual 2005 */ 271 #pragma intrinsic(_BitScanForward) 272 #pragma intrinsic(_BitScanReverse) 273 #endif /* !LZ4_ARCH64 */ 274 #endif /* _MSC_VER */ 275 276 #ifdef _MSC_VER 277 #define lz4_bswap16(x) _byteswap_ushort(x) 278 #else /* !_MSC_VER */ 279 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 280 (((x) & 0xffu) << 8))) 281 #endif /* !_MSC_VER */ 282 283 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 284 #define expect(expr, value) (__builtin_expect((expr), (value))) 285 #else 286 #define expect(expr, value) (expr) 287 #endif 288 289 #define likely(expr) expect((expr) != 0, 1) 290 #define unlikely(expr) expect((expr) != 0, 0) 291 292 /* Basic types */ 293 #if defined(_MSC_VER) 294 /* Visual Studio does not support 'stdint' natively */ 295 #define BYTE unsigned __int8 296 #define U16 unsigned __int16 297 #define U32 unsigned __int32 298 #define S32 __int32 299 #define U64 unsigned __int64 300 #else /* !defined(_MSC_VER) */ 301 #define BYTE uint8_t 302 #define U16 uint16_t 303 #define U32 uint32_t 304 #define S32 int32_t 305 #define U64 uint64_t 306 #endif /* !defined(_MSC_VER) */ 307 308 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 309 #pragma pack(1) 310 #endif 311 312 typedef struct _U16_S { 313 U16 v; 314 } U16_S; 315 typedef struct _U32_S { 316 U32 v; 317 } U32_S; 318 typedef struct _U64_S { 319 U64 v; 320 } U64_S; 321 322 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 323 #pragma pack() 324 #endif 325 326 #define A64(x) (((U64_S *)(x))->v) 327 #define A32(x) (((U32_S *)(x))->v) 328 #define A16(x) (((U16_S *)(x))->v) 329 330 /* 331 * Constants 332 */ 333 #define MINMATCH 4 334 335 #define HASH_LOG COMPRESSIONLEVEL 336 #define HASHTABLESIZE (1 << HASH_LOG) 337 #define HASH_MASK (HASHTABLESIZE - 1) 338 339 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 340 NOTCOMPRESSIBLE_CONFIRMATION : 2) 341 342 /* 343 * Defines if memory is allocated into the stack (local variable), 344 * or into the heap (kmem_alloc()). 345 */ 346 #define HEAPMODE (HASH_LOG > STACKLIMIT) 347 #define COPYLENGTH 8 348 #define LASTLITERALS 5 349 #define MFLIMIT (COPYLENGTH + MINMATCH) 350 #define MINLENGTH (MFLIMIT + 1) 351 352 #define MAXD_LOG 16 353 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 354 355 #define ML_BITS 4 356 #define ML_MASK ((1U<<ML_BITS)-1) 357 #define RUN_BITS (8-ML_BITS) 358 #define RUN_MASK ((1U<<RUN_BITS)-1) 359 360 361 /* 362 * Architecture-specific macros 363 */ 364 #if LZ4_ARCH64 365 #define STEPSIZE 8 366 #define UARCH U64 367 #define AARCH A64 368 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 369 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 370 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 371 #define HTYPE U32 372 #define INITBASE(base) const BYTE* const base = ip 373 #else /* !LZ4_ARCH64 */ 374 #define STEPSIZE 4 375 #define UARCH U32 376 #define AARCH A32 377 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 378 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 379 #define LZ4_SECURECOPY LZ4_WILDCOPY 380 #define HTYPE const BYTE * 381 #define INITBASE(base) const int base = 0 382 #endif /* !LZ4_ARCH64 */ 383 384 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 385 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 386 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 387 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 388 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 389 #else 390 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 391 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 392 #endif 393 394 395 /* Local structures */ 396 struct refTables { 397 HTYPE hashTable[HASHTABLESIZE]; 398 }; 399 400 401 /* Macros */ 402 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 403 HASH_LOG)) 404 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 405 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 406 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 407 d = e; } 408 409 410 /* Private functions */ 411 #if LZ4_ARCH64 412 413 static inline int 414 LZ4_NbCommonBytes(register U64 val) 415 { 416 #if defined(LZ4_BIG_ENDIAN) 417 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 418 unsigned long r = 0; 419 _BitScanReverse64(&r, val); 420 return (int)(r >> 3); 421 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 422 !defined(LZ4_FORCE_SW_BITCOUNT) 423 return (__builtin_clzll(val) >> 3); 424 #else 425 int r; 426 if (!(val >> 32)) { 427 r = 4; 428 } else { 429 r = 0; 430 val >>= 32; 431 } 432 if (!(val >> 16)) { 433 r += 2; 434 val >>= 8; 435 } else { 436 val >>= 24; 437 } 438 r += (!val); 439 return (r); 440 #endif 441 #else 442 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 443 unsigned long r = 0; 444 _BitScanForward64(&r, val); 445 return (int)(r >> 3); 446 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 447 !defined(LZ4_FORCE_SW_BITCOUNT) 448 return (__builtin_ctzll(val) >> 3); 449 #else 450 static const int DeBruijnBytePos[64] = 451 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 452 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 453 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 454 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 455 }; 456 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 457 58]; 458 #endif 459 #endif 460 } 461 462 #else 463 464 static inline int 465 LZ4_NbCommonBytes(register U32 val) 466 { 467 #if defined(LZ4_BIG_ENDIAN) 468 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 469 unsigned long r = 0; 470 _BitScanReverse(&r, val); 471 return (int)(r >> 3); 472 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 473 !defined(LZ4_FORCE_SW_BITCOUNT) 474 return (__builtin_clz(val) >> 3); 475 #else 476 int r; 477 if (!(val >> 16)) { 478 r = 2; 479 val >>= 8; 480 } else { 481 r = 0; 482 val >>= 24; 483 } 484 r += (!val); 485 return (r); 486 #endif 487 #else 488 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 489 unsigned long r = 0; 490 _BitScanForward(&r, val); 491 return (int)(r >> 3); 492 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 493 !defined(LZ4_FORCE_SW_BITCOUNT) 494 return (__builtin_ctz(val) >> 3); 495 #else 496 static const int DeBruijnBytePos[32] = { 497 0, 0, 3, 0, 3, 1, 3, 0, 498 3, 2, 2, 1, 3, 2, 0, 1, 499 3, 3, 1, 2, 2, 2, 2, 0, 500 3, 1, 2, 0, 1, 0, 1, 1 501 }; 502 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 503 27]; 504 #endif 505 #endif 506 } 507 508 #endif 509 510 /* Public functions */ 511 512 static int 513 LZ4_compressBound(int isize) 514 { 515 return (isize + (isize / 255) + 16); 516 } 517 518 /* Compression functions */ 519 520 /*ARGSUSED*/ 521 static int 522 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 523 int osize) 524 { 525 #if HEAPMODE 526 struct refTables *srt = (struct refTables *)ctx; 527 HTYPE *HashTable = (HTYPE *) (srt->hashTable); 528 #else 529 HTYPE HashTable[HASHTABLESIZE] = { 0 }; 530 #endif 531 532 const BYTE *ip = (BYTE *) source; 533 INITBASE(base); 534 const BYTE *anchor = ip; 535 const BYTE *const iend = ip + isize; 536 const BYTE *const oend = (BYTE *) dest + osize; 537 const BYTE *const mflimit = iend - MFLIMIT; 538 #define matchlimit (iend - LASTLITERALS) 539 540 BYTE *op = (BYTE *) dest; 541 542 int len, length; 543 const int skipStrength = SKIPSTRENGTH; 544 U32 forwardH; 545 546 547 /* Init */ 548 if (isize < MINLENGTH) 549 goto _last_literals; 550 551 /* First Byte */ 552 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 553 ip++; 554 forwardH = LZ4_HASH_VALUE(ip); 555 556 /* Main Loop */ 557 for (;;) { 558 int findMatchAttempts = (1U << skipStrength) + 3; 559 const BYTE *forwardIp = ip; 560 const BYTE *ref; 561 BYTE *token; 562 563 /* Find a match */ 564 do { 565 U32 h = forwardH; 566 int step = findMatchAttempts++ >> skipStrength; 567 ip = forwardIp; 568 forwardIp = ip + step; 569 570 if unlikely(forwardIp > mflimit) { 571 goto _last_literals; 572 } 573 574 forwardH = LZ4_HASH_VALUE(forwardIp); 575 ref = base + HashTable[h]; 576 HashTable[h] = ip - base; 577 578 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 579 580 /* Catch up */ 581 while ((ip > anchor) && (ref > (BYTE *) source) && 582 unlikely(ip[-1] == ref[-1])) { 583 ip--; 584 ref--; 585 } 586 587 /* Encode Literal length */ 588 length = ip - anchor; 589 token = op++; 590 591 /* Check output limit */ 592 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 593 (length >> 8) > oend) 594 return (0); 595 596 if (length >= (int)RUN_MASK) { 597 *token = (RUN_MASK << ML_BITS); 598 len = length - RUN_MASK; 599 for (; len > 254; len -= 255) 600 *op++ = 255; 601 *op++ = (BYTE)len; 602 } else 603 *token = (length << ML_BITS); 604 605 /* Copy Literals */ 606 LZ4_BLINDCOPY(anchor, op, length); 607 608 _next_match: 609 /* Encode Offset */ 610 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 611 612 /* Start Counting */ 613 ip += MINMATCH; 614 ref += MINMATCH; /* MinMatch verified */ 615 anchor = ip; 616 while likely(ip < matchlimit - (STEPSIZE - 1)) { 617 UARCH diff = AARCH(ref) ^ AARCH(ip); 618 if (!diff) { 619 ip += STEPSIZE; 620 ref += STEPSIZE; 621 continue; 622 } 623 ip += LZ4_NbCommonBytes(diff); 624 goto _endCount; 625 } 626 #if LZ4_ARCH64 627 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 628 ip += 4; 629 ref += 4; 630 } 631 #endif 632 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 633 ip += 2; 634 ref += 2; 635 } 636 if ((ip < matchlimit) && (*ref == *ip)) 637 ip++; 638 _endCount: 639 640 /* Encode MatchLength */ 641 len = (ip - anchor); 642 /* Check output limit */ 643 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 644 return (0); 645 if (len >= (int)ML_MASK) { 646 *token += ML_MASK; 647 len -= ML_MASK; 648 for (; len > 509; len -= 510) { 649 *op++ = 255; 650 *op++ = 255; 651 } 652 if (len > 254) { 653 len -= 255; 654 *op++ = 255; 655 } 656 *op++ = (BYTE)len; 657 } else 658 *token += len; 659 660 /* Test end of chunk */ 661 if (ip > mflimit) { 662 anchor = ip; 663 break; 664 } 665 /* Fill table */ 666 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 667 668 /* Test next position */ 669 ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 670 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 671 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 672 token = op++; 673 *token = 0; 674 goto _next_match; 675 } 676 /* Prepare next loop */ 677 anchor = ip++; 678 forwardH = LZ4_HASH_VALUE(ip); 679 } 680 681 _last_literals: 682 /* Encode Last Literals */ 683 { 684 int lastRun = iend - anchor; 685 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 686 oend) 687 return (0); 688 if (lastRun >= (int)RUN_MASK) { 689 *op++ = (RUN_MASK << ML_BITS); 690 lastRun -= RUN_MASK; 691 for (; lastRun > 254; lastRun -= 255) { 692 *op++ = 255; 693 } 694 *op++ = (BYTE)lastRun; 695 } else 696 *op++ = (lastRun << ML_BITS); 697 (void) memcpy(op, anchor, iend - anchor); 698 op += iend - anchor; 699 } 700 701 /* End */ 702 return (int)(((char *)op) - dest); 703 } 704 705 706 707 /* Note : this function is valid only if isize < LZ4_64KLIMIT */ 708 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 709 #define HASHLOG64K (HASH_LOG + 1) 710 #define HASH64KTABLESIZE (1U << HASHLOG64K) 711 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 712 HASHLOG64K)) 713 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 714 715 /*ARGSUSED*/ 716 static int 717 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 718 int osize) 719 { 720 #if HEAPMODE 721 struct refTables *srt = (struct refTables *)ctx; 722 U16 *HashTable = (U16 *) (srt->hashTable); 723 #else 724 U16 HashTable[HASH64KTABLESIZE] = { 0 }; 725 #endif 726 727 const BYTE *ip = (BYTE *) source; 728 const BYTE *anchor = ip; 729 const BYTE *const base = ip; 730 const BYTE *const iend = ip + isize; 731 const BYTE *const oend = (BYTE *) dest + osize; 732 const BYTE *const mflimit = iend - MFLIMIT; 733 #define matchlimit (iend - LASTLITERALS) 734 735 BYTE *op = (BYTE *) dest; 736 737 int len, length; 738 const int skipStrength = SKIPSTRENGTH; 739 U32 forwardH; 740 741 /* Init */ 742 if (isize < MINLENGTH) 743 goto _last_literals; 744 745 /* First Byte */ 746 ip++; 747 forwardH = LZ4_HASH64K_VALUE(ip); 748 749 /* Main Loop */ 750 for (;;) { 751 int findMatchAttempts = (1U << skipStrength) + 3; 752 const BYTE *forwardIp = ip; 753 const BYTE *ref; 754 BYTE *token; 755 756 /* Find a match */ 757 do { 758 U32 h = forwardH; 759 int step = findMatchAttempts++ >> skipStrength; 760 ip = forwardIp; 761 forwardIp = ip + step; 762 763 if (forwardIp > mflimit) { 764 goto _last_literals; 765 } 766 767 forwardH = LZ4_HASH64K_VALUE(forwardIp); 768 ref = base + HashTable[h]; 769 HashTable[h] = ip - base; 770 771 } while (A32(ref) != A32(ip)); 772 773 /* Catch up */ 774 while ((ip > anchor) && (ref > (BYTE *) source) && 775 (ip[-1] == ref[-1])) { 776 ip--; 777 ref--; 778 } 779 780 /* Encode Literal length */ 781 length = ip - anchor; 782 token = op++; 783 784 /* Check output limit */ 785 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 786 (length >> 8) > oend) 787 return (0); 788 789 if (length >= (int)RUN_MASK) { 790 *token = (RUN_MASK << ML_BITS); 791 len = length - RUN_MASK; 792 for (; len > 254; len -= 255) 793 *op++ = 255; 794 *op++ = (BYTE)len; 795 } else 796 *token = (length << ML_BITS); 797 798 /* Copy Literals */ 799 LZ4_BLINDCOPY(anchor, op, length); 800 801 _next_match: 802 /* Encode Offset */ 803 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 804 805 /* Start Counting */ 806 ip += MINMATCH; 807 ref += MINMATCH; /* MinMatch verified */ 808 anchor = ip; 809 while (ip < matchlimit - (STEPSIZE - 1)) { 810 UARCH diff = AARCH(ref) ^ AARCH(ip); 811 if (!diff) { 812 ip += STEPSIZE; 813 ref += STEPSIZE; 814 continue; 815 } 816 ip += LZ4_NbCommonBytes(diff); 817 goto _endCount; 818 } 819 #if LZ4_ARCH64 820 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 821 ip += 4; 822 ref += 4; 823 } 824 #endif 825 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 826 ip += 2; 827 ref += 2; 828 } 829 if ((ip < matchlimit) && (*ref == *ip)) 830 ip++; 831 _endCount: 832 833 /* Encode MatchLength */ 834 len = (ip - anchor); 835 /* Check output limit */ 836 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 837 return (0); 838 if (len >= (int)ML_MASK) { 839 *token += ML_MASK; 840 len -= ML_MASK; 841 for (; len > 509; len -= 510) { 842 *op++ = 255; 843 *op++ = 255; 844 } 845 if (len > 254) { 846 len -= 255; 847 *op++ = 255; 848 } 849 *op++ = (BYTE)len; 850 } else 851 *token += len; 852 853 /* Test end of chunk */ 854 if (ip > mflimit) { 855 anchor = ip; 856 break; 857 } 858 /* Fill table */ 859 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 860 861 /* Test next position */ 862 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 863 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 864 if (A32(ref) == A32(ip)) { 865 token = op++; 866 *token = 0; 867 goto _next_match; 868 } 869 /* Prepare next loop */ 870 anchor = ip++; 871 forwardH = LZ4_HASH64K_VALUE(ip); 872 } 873 874 _last_literals: 875 /* Encode Last Literals */ 876 { 877 int lastRun = iend - anchor; 878 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 879 oend) 880 return (0); 881 if (lastRun >= (int)RUN_MASK) { 882 *op++ = (RUN_MASK << ML_BITS); 883 lastRun -= RUN_MASK; 884 for (; lastRun > 254; lastRun -= 255) 885 *op++ = 255; 886 *op++ = (BYTE)lastRun; 887 } else 888 *op++ = (lastRun << ML_BITS); 889 (void) memcpy(op, anchor, iend - anchor); 890 op += iend - anchor; 891 } 892 893 /* End */ 894 return (int)(((char *)op) - dest); 895 } 896 897 static int 898 real_LZ4_compress(const char *source, char *dest, int isize, int osize) 899 { 900 #if HEAPMODE 901 void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP); 902 int result; 903 904 /* 905 * out of kernel memory, gently fall through - this will disable 906 * compression in zio_compress_data 907 */ 908 if (ctx == NULL) 909 return (0); 910 911 if (isize < LZ4_64KLIMIT) 912 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 913 else 914 result = LZ4_compressCtx(ctx, source, dest, isize, osize); 915 916 kmem_free(ctx, sizeof (struct refTables)); 917 return (result); 918 #else 919 if (isize < (int)LZ4_64KLIMIT) 920 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize)); 921 return (LZ4_compressCtx(NULL, source, dest, isize, osize)); 922 #endif 923 } 924 925 /* Decompression functions */ 926 927 /* 928 * Note: The decoding functions real_LZ4_uncompress() and 929 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow" 930 * attack type. They will never write nor read outside of the provided 931 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that 932 * it will never read outside of the input buffer. A corrupted input 933 * will produce an error result, a negative int, indicating the position 934 * of the error within input stream. 935 */ 936 937 static int 938 real_LZ4_uncompress(const char *source, char *dest, int osize) 939 { 940 /* Local Variables */ 941 const BYTE *restrict ip = (const BYTE *) source; 942 const BYTE *ref; 943 944 BYTE *op = (BYTE *) dest; 945 BYTE *const oend = op + osize; 946 BYTE *cpy; 947 948 unsigned token; 949 950 size_t length; 951 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 952 #if LZ4_ARCH64 953 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 954 #endif 955 956 /* Main Loop */ 957 for (;;) { 958 /* get runlength */ 959 token = *ip++; 960 if ((length = (token >> ML_BITS)) == RUN_MASK) { 961 size_t len; 962 for (; (len = *ip++) == 255; length += 255) { 963 } 964 length += len; 965 } 966 /* copy literals */ 967 cpy = op + length; 968 if unlikely(cpy > oend - COPYLENGTH) { 969 if (cpy != oend) 970 /* Error: we must necessarily stand at EOF */ 971 goto _output_error; 972 (void) memcpy(op, ip, length); 973 ip += length; 974 break; /* EOF */ 975 } 976 LZ4_WILDCOPY(ip, op, cpy); 977 ip -= (op - cpy); 978 op = cpy; 979 980 /* get offset */ 981 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 982 ip += 2; 983 if unlikely(ref < (BYTE * const) dest) 984 /* 985 * Error: offset create reference outside destination 986 * buffer 987 */ 988 goto _output_error; 989 990 /* get matchlength */ 991 if ((length = (token & ML_MASK)) == ML_MASK) { 992 for (; *ip == 255; length += 255) { 993 ip++; 994 } 995 length += *ip++; 996 } 997 /* copy repeated sequence */ 998 if unlikely(op - ref < STEPSIZE) { 999 #if LZ4_ARCH64 1000 size_t dec64 = dec64table[op-ref]; 1001 #else 1002 const int dec64 = 0; 1003 #endif 1004 op[0] = ref[0]; 1005 op[1] = ref[1]; 1006 op[2] = ref[2]; 1007 op[3] = ref[3]; 1008 op += 4; 1009 ref += 4; 1010 ref -= dec32table[op-ref]; 1011 A32(op) = A32(ref); 1012 op += STEPSIZE - 4; 1013 ref -= dec64; 1014 } else { 1015 LZ4_COPYSTEP(ref, op); 1016 } 1017 cpy = op + length - (STEPSIZE - 4); 1018 if (cpy > oend - COPYLENGTH) { 1019 if (cpy > oend) 1020 /* 1021 * Error: request to write beyond destination 1022 * buffer 1023 */ 1024 goto _output_error; 1025 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 1026 while (op < cpy) 1027 *op++ = *ref++; 1028 op = cpy; 1029 if (op == oend) 1030 /* 1031 * Check EOF (should never happen, since last 1032 * 5 bytes are supposed to be literals) 1033 */ 1034 goto _output_error; 1035 continue; 1036 } 1037 LZ4_SECURECOPY(ref, op, cpy); 1038 op = cpy; /* correction */ 1039 } 1040 1041 /* end of decoding */ 1042 return (int)(((char *)ip) - source); 1043 1044 /* write overflow error detected */ 1045 _output_error: 1046 return (int)(-(((char *)ip) - source)); 1047 } 1048 1049 static int 1050 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, 1051 int maxOutputSize) 1052 { 1053 /* Local Variables */ 1054 const BYTE *restrict ip = (const BYTE *) source; 1055 const BYTE *const iend = ip + isize; 1056 const BYTE *ref; 1057 1058 BYTE *op = (BYTE *) dest; 1059 BYTE *const oend = op + maxOutputSize; 1060 BYTE *cpy; 1061 1062 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 1063 #if LZ4_ARCH64 1064 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 1065 #endif 1066 1067 /* Main Loop */ 1068 while (ip < iend) { 1069 unsigned token; 1070 size_t length; 1071 1072 /* get runlength */ 1073 token = *ip++; 1074 if ((length = (token >> ML_BITS)) == RUN_MASK) { 1075 int s = 255; 1076 while ((ip < iend) && (s == 255)) { 1077 s = *ip++; 1078 length += s; 1079 } 1080 } 1081 /* copy literals */ 1082 cpy = op + length; 1083 if ((cpy > oend - COPYLENGTH) || 1084 (ip + length > iend - COPYLENGTH)) { 1085 if (cpy > oend) 1086 /* Error: writes beyond output buffer */ 1087 goto _output_error; 1088 if (ip + length != iend) 1089 /* 1090 * Error: LZ4 format requires to consume all 1091 * input at this stage 1092 */ 1093 goto _output_error; 1094 (void) memcpy(op, ip, length); 1095 op += length; 1096 /* Necessarily EOF, due to parsing restrictions */ 1097 break; 1098 } 1099 LZ4_WILDCOPY(ip, op, cpy); 1100 ip -= (op - cpy); 1101 op = cpy; 1102 1103 /* get offset */ 1104 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 1105 ip += 2; 1106 if (ref < (BYTE * const) dest) 1107 /* 1108 * Error: offset creates reference outside of 1109 * destination buffer 1110 */ 1111 goto _output_error; 1112 1113 /* get matchlength */ 1114 if ((length = (token & ML_MASK)) == ML_MASK) { 1115 while (ip < iend) { 1116 int s = *ip++; 1117 length += s; 1118 if (s == 255) 1119 continue; 1120 break; 1121 } 1122 } 1123 /* copy repeated sequence */ 1124 if unlikely(op - ref < STEPSIZE) { 1125 #if LZ4_ARCH64 1126 size_t dec64 = dec64table[op-ref]; 1127 #else 1128 const int dec64 = 0; 1129 #endif 1130 op[0] = ref[0]; 1131 op[1] = ref[1]; 1132 op[2] = ref[2]; 1133 op[3] = ref[3]; 1134 op += 4; 1135 ref += 4; 1136 ref -= dec32table[op-ref]; 1137 A32(op) = A32(ref); 1138 op += STEPSIZE - 4; 1139 ref -= dec64; 1140 } else { 1141 LZ4_COPYSTEP(ref, op); 1142 } 1143 cpy = op + length - (STEPSIZE - 4); 1144 if (cpy > oend - COPYLENGTH) { 1145 if (cpy > oend) 1146 /* 1147 * Error: request to write outside of 1148 * destination buffer 1149 */ 1150 goto _output_error; 1151 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 1152 while (op < cpy) 1153 *op++ = *ref++; 1154 op = cpy; 1155 if (op == oend) 1156 /* 1157 * Check EOF (should never happen, since 1158 * last 5 bytes are supposed to be literals) 1159 */ 1160 goto _output_error; 1161 continue; 1162 } 1163 LZ4_SECURECOPY(ref, op, cpy); 1164 op = cpy; /* correction */ 1165 } 1166 1167 /* end of decoding */ 1168 return (int)(((char *)op) - dest); 1169 1170 /* write overflow error detected */ 1171 _output_error: 1172 return (int)(-(((char *)ip) - source)); 1173 } 1174