1 /* 2 * LZ4 - Fast LZ compression algorithm 3 * Header File 4 * Copyright (C) 2011-2013, Yann Collet. 5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * You can contact the author at : 31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32 * - LZ4 source repository : http://code.google.com/p/lz4/ 33 */ 34 /* 35 * Copyright (c) 2016 by Delphix. All rights reserved. 36 */ 37 38 #if defined(_KERNEL) || defined(_FAKE_KERNEL) 39 #include <sys/zfs_context.h> 40 #elif defined(_STANDALONE) 41 #include <sys/cdefs.h> 42 #include <stand.h> 43 #include <sys/types.h> 44 #include <sys/endian.h> 45 #include <assert.h> 46 47 #define ASSERT assert 48 #else 49 #include <string.h> 50 #include <stdlib.h> 51 #include <sys/types.h> 52 #include <sys/sysmacros.h> 53 #include <netinet/in.h> 54 #include <assert.h> 55 56 #define ASSERT assert 57 #endif 58 #include <lz4.h> 59 60 static int real_LZ4_compress(const char *source, char *dest, int isize, 61 int osize); 62 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 63 int isize, int maxOutputSize); 64 static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 65 int isize, int osize); 66 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 67 int isize, int osize); 68 69 size_t 70 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, 71 int n __unused) 72 { 73 uint32_t bufsiz; 74 char *dest = d_start; 75 76 ASSERT(d_len >= sizeof (bufsiz)); 77 78 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 79 d_len - sizeof (bufsiz)); 80 81 /* Signal an error if the compression routine returned zero. */ 82 if (bufsiz == 0) 83 return (s_len); 84 85 /* 86 * Encode the compresed buffer size at the start. We'll need this in 87 * decompression to counter the effects of padding which might be 88 * added to the compressed buffer and which, if unhandled, would 89 * confuse the hell out of our decompression function. 90 */ 91 #if defined(_KERNEL) || defined(_FAKE_KERNEL) 92 *(uint32_t *)(void *)dest = BE_32(bufsiz); 93 #else 94 *(uint32_t *)(void *)dest = htonl(bufsiz); 95 #endif 96 97 return (bufsiz + sizeof (bufsiz)); 98 } 99 100 int 101 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, 102 int n __unused) 103 { 104 const char *src = s_start; 105 #if defined(_KERNEL) || defined(_FAKE_KERNEL) 106 uint32_t bufsiz = BE_IN32(s_start); 107 #else 108 uint32_t bufsiz = htonl(*(uint32_t *)s_start); 109 #endif 110 111 /* invalid compressed buffer size encoded at start */ 112 if (bufsiz + sizeof (bufsiz) > s_len) 113 return (1); 114 115 /* 116 * Returns 0 on success (decompression function returned non-negative) 117 * and non-zero on failure (decompression function returned negative). 118 */ 119 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 120 d_start, bufsiz, d_len) < 0); 121 } 122 123 /* 124 * LZ4 API Description: 125 * 126 * Simple Functions: 127 * real_LZ4_compress() : 128 * isize : is the input size. Max supported value is ~1.9GB 129 * return : the number of bytes written in buffer dest 130 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 131 * note : destination buffer must be already allocated. 132 * destination buffer must be sized to handle worst cases 133 * situations (input data not compressible). 134 * 135 * Advanced Functions 136 * 137 * LZ4_compressBound() : 138 * Provides the maximum size that LZ4 may output in a "worst case" 139 * scenario (input data not compressible) primarily useful for memory 140 * allocation of output buffer. 141 * 142 * isize : is the input size. Max supported value is ~1.9GB 143 * return : maximum output size in a "worst case" scenario 144 * note : this function is limited by "int" range (2^31-1) 145 * 146 * LZ4_uncompress_unknownOutputSize() : 147 * isize : is the input size, therefore the compressed size 148 * maxOutputSize : is the size of the destination buffer (which must be 149 * already allocated) 150 * return : the number of bytes decoded in the destination buffer 151 * (necessarily <= maxOutputSize). If the source stream is 152 * malformed, the function will stop decoding and return a 153 * negative result, indicating the byte position of the faulty 154 * instruction. This function never writes beyond dest + 155 * maxOutputSize, and is therefore protected against malicious 156 * data packets. 157 * note : Destination buffer must be already allocated. 158 * 159 * LZ4_compressCtx() : 160 * This function explicitly handles the CTX memory structure. 161 * 162 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 163 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 164 * isn't valid. 165 * 166 * LZ4_compress64kCtx() : 167 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 168 * isize *Must* be <64KB, otherwise the output will be corrupted. 169 * 170 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 171 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 172 * isn't valid. 173 */ 174 175 /* 176 * Tuning parameters 177 */ 178 179 /* 180 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 181 * Lowering this value reduces memory usage. Reduced memory usage 182 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 183 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 184 * (examples : 12 -> 16KB ; 17 -> 512KB) 185 */ 186 #define COMPRESSIONLEVEL 12 187 188 /* 189 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 190 * algorithm skip faster data segments considered "incompressible". 191 * This may decrease compression ratio dramatically, but will be 192 * faster on incompressible data. Increasing this value will make 193 * the algorithm search more before declaring a segment "incompressible". 194 * This could improve compression a bit, but will be slower on 195 * incompressible data. The default value (6) is recommended. 196 */ 197 #define NOTCOMPRESSIBLE_CONFIRMATION 6 198 199 /* 200 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 201 * performance for big endian cpu, but the resulting compressed stream 202 * will be incompatible with little-endian CPU. You can set this option 203 * to 1 in situations where data will stay within closed environment. 204 * This option is useless on Little_Endian CPU (such as x86). 205 */ 206 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 207 208 /* 209 * CPU Feature Detection 210 */ 211 212 /* 32 or 64 bits ? */ 213 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ 214 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ 215 defined(__LP64__) || defined(_LP64)) 216 #define LZ4_ARCH64 1 217 #else 218 #define LZ4_ARCH64 0 219 #endif 220 221 /* 222 * Limits the amount of stack space that the algorithm may consume to hold 223 * the compression lookup table. The value `9' here means we'll never use 224 * more than 2k of stack (see above for a description of COMPRESSIONLEVEL). 225 * If more memory is needed, it is allocated from the heap. 226 */ 227 #define STACKLIMIT 9 228 229 /* 230 * Little Endian or Big Endian? 231 * Note: overwrite the below #define if you know your architecture endianess. 232 */ 233 #if defined(BYTE_ORDER) 234 #if BYTE_ORDER == BIG_ENDIAN /* This is sys/endian.h API */ 235 #define LZ4_BIG_ENDIAN 1 236 #else 237 /* 238 * Little Endian assumed. PDP Endian and other very rare endian format 239 * are unsupported. 240 */ 241 #endif 242 #else /* !defined(BYTE_ORDER) */ 243 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \ 244 defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \ 245 defined(__PPC) || defined(PPC) || defined(__powerpc__) || \ 246 defined(__powerpc) || defined(powerpc) || \ 247 ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))) 248 #define LZ4_BIG_ENDIAN 1 249 #else 250 /* 251 * Little Endian assumed. PDP Endian and other very rare endian format 252 * are unsupported. 253 */ 254 #endif 255 #endif /* defined(BYTE_ORDER) */ 256 257 /* 258 * Unaligned memory access is automatically enabled for "common" CPU, 259 * such as x86. For others CPU, the compiler will be more cautious, and 260 * insert extra code to ensure aligned access is respected. If you know 261 * your target CPU supports unaligned memory access, you may want to 262 * force this option manually to improve performance 263 */ 264 #if defined(__ARM_FEATURE_UNALIGNED) 265 #define LZ4_FORCE_UNALIGNED_ACCESS 1 266 #endif 267 268 #ifdef __sparc 269 #define LZ4_FORCE_SW_BITCOUNT 270 #endif 271 272 /* 273 * Compiler Options 274 */ 275 #if __STDC_VERSION__ >= 199901L /* C99 */ 276 /* "restrict" is a known keyword */ 277 #else 278 /* Disable restrict */ 279 #define restrict 280 #endif 281 282 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 283 284 #ifdef _MSC_VER 285 /* Visual Studio */ 286 /* Visual is not C99, but supports some kind of inline */ 287 #define inline __forceinline 288 #if LZ4_ARCH64 289 /* For Visual 2005 */ 290 #pragma intrinsic(_BitScanForward64) 291 #pragma intrinsic(_BitScanReverse64) 292 #else /* !LZ4_ARCH64 */ 293 /* For Visual 2005 */ 294 #pragma intrinsic(_BitScanForward) 295 #pragma intrinsic(_BitScanReverse) 296 #endif /* !LZ4_ARCH64 */ 297 #endif /* _MSC_VER */ 298 299 #ifdef _MSC_VER 300 #define lz4_bswap16(x) _byteswap_ushort(x) 301 #else /* !_MSC_VER */ 302 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 303 (((x) & 0xffu) << 8))) 304 #endif /* !_MSC_VER */ 305 306 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 307 #define expect(expr, value) (__builtin_expect((expr), (value))) 308 #else 309 #define expect(expr, value) (expr) 310 #endif 311 312 #ifndef likely 313 #define likely(expr) expect((expr) != 0, 1) 314 #endif 315 316 #ifndef unlikely 317 #define unlikely(expr) expect((expr) != 0, 0) 318 #endif 319 320 /* Basic types */ 321 #if defined(_MSC_VER) 322 /* Visual Studio does not support 'stdint' natively */ 323 #define BYTE unsigned __int8 324 #define U16 unsigned __int16 325 #define U32 unsigned __int32 326 #define S32 __int32 327 #define U64 unsigned __int64 328 #else /* !defined(_MSC_VER) */ 329 #define BYTE uint8_t 330 #define U16 uint16_t 331 #define U32 uint32_t 332 #define S32 int32_t 333 #define U64 uint64_t 334 #endif /* !defined(_MSC_VER) */ 335 336 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 337 #pragma pack(1) 338 #endif 339 340 typedef struct _U16_S { 341 U16 v; 342 } U16_S; 343 typedef struct _U32_S { 344 U32 v; 345 } U32_S; 346 typedef struct _U64_S { 347 U64 v; 348 } U64_S; 349 350 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 351 #pragma pack() 352 #endif 353 354 #define A64(x) (((U64_S *)(__DECONST(void *, x)))->v) 355 #define A32(x) (((U32_S *)(__DECONST(void *, x)))->v) 356 #define A16(x) (((U16_S *)(__DECONST(void *, x)))->v) 357 358 /* 359 * Constants 360 */ 361 #define MINMATCH 4 362 363 #define HASH_LOG COMPRESSIONLEVEL 364 #define HASHTABLESIZE (1 << HASH_LOG) 365 #define HASH_MASK (HASHTABLESIZE - 1) 366 367 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 368 NOTCOMPRESSIBLE_CONFIRMATION : 2) 369 370 /* 371 * Defines if memory is allocated into the stack (local variable), 372 * or into the heap (kmem_alloc()). 373 */ 374 #define HEAPMODE (HASH_LOG > STACKLIMIT) 375 #define COPYLENGTH 8 376 #define LASTLITERALS 5 377 #define MFLIMIT (COPYLENGTH + MINMATCH) 378 #define MINLENGTH (MFLIMIT + 1) 379 380 #define MAXD_LOG 16 381 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 382 383 #define ML_BITS 4 384 #define ML_MASK ((1U<<ML_BITS)-1) 385 #define RUN_BITS (8-ML_BITS) 386 #define RUN_MASK ((1U<<RUN_BITS)-1) 387 388 389 /* 390 * Architecture-specific macros 391 */ 392 #if LZ4_ARCH64 393 #define STEPSIZE 8 394 #define UARCH U64 395 #define AARCH A64 396 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 397 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 398 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 399 #define HTYPE U32 400 #define INITBASE(base) const BYTE* const base = ip 401 #else /* !LZ4_ARCH64 */ 402 #define STEPSIZE 4 403 #define UARCH U32 404 #define AARCH A32 405 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 406 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 407 #define LZ4_SECURECOPY LZ4_WILDCOPY 408 #define HTYPE const BYTE * 409 #define INITBASE(base) const int base = 0 410 #endif /* !LZ4_ARCH64 */ 411 412 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 413 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 414 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 415 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 416 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 417 #else 418 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 419 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 420 #endif 421 422 423 /* Local structures */ 424 struct refTables { 425 HTYPE hashTable[HASHTABLESIZE]; 426 }; 427 428 429 /* Macros */ 430 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 431 HASH_LOG)) 432 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 433 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 434 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 435 d = e; } 436 437 438 /* Private functions */ 439 #if LZ4_ARCH64 440 441 static inline int 442 LZ4_NbCommonBytes(register U64 val) 443 { 444 #if defined(LZ4_BIG_ENDIAN) 445 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 446 unsigned long r = 0; 447 _BitScanReverse64(&r, val); 448 return (int)(r >> 3); 449 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 450 !defined(LZ4_FORCE_SW_BITCOUNT) 451 return (__builtin_clzll(val) >> 3); 452 #else 453 int r; 454 if (!(val >> 32)) { 455 r = 4; 456 } else { 457 r = 0; 458 val >>= 32; 459 } 460 if (!(val >> 16)) { 461 r += 2; 462 val >>= 8; 463 } else { 464 val >>= 24; 465 } 466 r += (!val); 467 return (r); 468 #endif 469 #else 470 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 471 unsigned long r = 0; 472 _BitScanForward64(&r, val); 473 return (int)(r >> 3); 474 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 475 !defined(LZ4_FORCE_SW_BITCOUNT) 476 return (__builtin_ctzll(val) >> 3); 477 #else 478 static const int DeBruijnBytePos[64] = 479 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 480 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 481 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 482 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 483 }; 484 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 485 58]; 486 #endif 487 #endif 488 } 489 490 #else 491 492 static inline int 493 LZ4_NbCommonBytes(register U32 val) 494 { 495 #if defined(LZ4_BIG_ENDIAN) 496 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 497 unsigned long r = 0; 498 _BitScanReverse(&r, val); 499 return (int)(r >> 3); 500 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 501 !defined(LZ4_FORCE_SW_BITCOUNT) 502 return (__builtin_clz(val) >> 3); 503 #else 504 int r; 505 if (!(val >> 16)) { 506 r = 2; 507 val >>= 8; 508 } else { 509 r = 0; 510 val >>= 24; 511 } 512 r += (!val); 513 return (r); 514 #endif 515 #else 516 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 517 unsigned long r = 0; 518 _BitScanForward(&r, val); 519 return (int)(r >> 3); 520 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 521 !defined(LZ4_FORCE_SW_BITCOUNT) 522 return (__builtin_ctz(val) >> 3); 523 #else 524 static const int DeBruijnBytePos[32] = { 525 0, 0, 3, 0, 3, 1, 3, 0, 526 3, 2, 2, 1, 3, 2, 0, 1, 527 3, 3, 1, 2, 2, 2, 2, 0, 528 3, 1, 2, 0, 1, 0, 1, 1 529 }; 530 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 531 27]; 532 #endif 533 #endif 534 } 535 536 #endif 537 538 /* Compression functions */ 539 540 /*ARGSUSED*/ 541 static int 542 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 543 int osize) 544 { 545 #if HEAPMODE 546 struct refTables *srt = (struct refTables *)ctx; 547 HTYPE *HashTable = (HTYPE *) (srt->hashTable); 548 #else 549 HTYPE HashTable[HASHTABLESIZE] = { 0 }; 550 #endif 551 552 const BYTE *ip = (const BYTE *) source; 553 INITBASE(base); 554 const BYTE *anchor = ip; 555 const BYTE *const iend = ip + isize; 556 const BYTE *const oend = (BYTE *) dest + osize; 557 const BYTE *const mflimit = iend - MFLIMIT; 558 #define matchlimit (iend - LASTLITERALS) 559 560 BYTE *op = (BYTE *) dest; 561 562 int len, length; 563 const int skipStrength = SKIPSTRENGTH; 564 U32 forwardH; 565 566 567 /* Init */ 568 if (isize < MINLENGTH) 569 goto _last_literals; 570 571 /* First Byte */ 572 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 573 ip++; 574 forwardH = LZ4_HASH_VALUE(ip); 575 576 /* Main Loop */ 577 for (;;) { 578 int findMatchAttempts = (1U << skipStrength) + 3; 579 const BYTE *forwardIp = ip; 580 const BYTE *ref; 581 BYTE *token; 582 583 /* Find a match */ 584 do { 585 U32 h = forwardH; 586 int step = findMatchAttempts++ >> skipStrength; 587 ip = forwardIp; 588 forwardIp = ip + step; 589 590 if unlikely(forwardIp > mflimit) { 591 goto _last_literals; 592 } 593 594 forwardH = LZ4_HASH_VALUE(forwardIp); 595 ref = base + HashTable[h]; 596 HashTable[h] = ip - base; 597 598 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 599 600 /* Catch up */ 601 while ((ip > anchor) && (ref > (const BYTE *) source) && 602 unlikely(ip[-1] == ref[-1])) { 603 ip--; 604 ref--; 605 } 606 607 /* Encode Literal length */ 608 length = ip - anchor; 609 token = op++; 610 611 /* Check output limit */ 612 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 613 (length >> 8) > oend) 614 return (0); 615 616 if (length >= (int)RUN_MASK) { 617 *token = (RUN_MASK << ML_BITS); 618 len = length - RUN_MASK; 619 for (; len > 254; len -= 255) 620 *op++ = 255; 621 *op++ = (BYTE)len; 622 } else 623 *token = (length << ML_BITS); 624 625 /* Copy Literals */ 626 LZ4_BLINDCOPY(anchor, op, length); 627 628 _next_match: 629 /* Encode Offset */ 630 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 631 632 /* Start Counting */ 633 ip += MINMATCH; 634 ref += MINMATCH; /* MinMatch verified */ 635 anchor = ip; 636 while likely(ip < matchlimit - (STEPSIZE - 1)) { 637 UARCH diff = AARCH(ref) ^ AARCH(ip); 638 if (!diff) { 639 ip += STEPSIZE; 640 ref += STEPSIZE; 641 continue; 642 } 643 ip += LZ4_NbCommonBytes(diff); 644 goto _endCount; 645 } 646 #if LZ4_ARCH64 647 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 648 ip += 4; 649 ref += 4; 650 } 651 #endif 652 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 653 ip += 2; 654 ref += 2; 655 } 656 if ((ip < matchlimit) && (*ref == *ip)) 657 ip++; 658 _endCount: 659 660 /* Encode MatchLength */ 661 len = (ip - anchor); 662 /* Check output limit */ 663 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 664 return (0); 665 if (len >= (int)ML_MASK) { 666 *token += ML_MASK; 667 len -= ML_MASK; 668 for (; len > 509; len -= 510) { 669 *op++ = 255; 670 *op++ = 255; 671 } 672 if (len > 254) { 673 len -= 255; 674 *op++ = 255; 675 } 676 *op++ = (BYTE)len; 677 } else 678 *token += len; 679 680 /* Test end of chunk */ 681 if (ip > mflimit) { 682 anchor = ip; 683 break; 684 } 685 /* Fill table */ 686 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 687 688 /* Test next position */ 689 ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 690 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 691 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 692 token = op++; 693 *token = 0; 694 goto _next_match; 695 } 696 /* Prepare next loop */ 697 anchor = ip++; 698 forwardH = LZ4_HASH_VALUE(ip); 699 } 700 701 _last_literals: 702 /* Encode Last Literals */ 703 { 704 int lastRun = iend - anchor; 705 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 706 oend) 707 return (0); 708 if (lastRun >= (int)RUN_MASK) { 709 *op++ = (RUN_MASK << ML_BITS); 710 lastRun -= RUN_MASK; 711 for (; lastRun > 254; lastRun -= 255) { 712 *op++ = 255; 713 } 714 *op++ = (BYTE)lastRun; 715 } else 716 *op++ = (lastRun << ML_BITS); 717 (void) memcpy(op, anchor, iend - anchor); 718 op += iend - anchor; 719 } 720 721 /* End */ 722 return (int)(((char *)op) - dest); 723 } 724 725 726 727 /* Note : this function is valid only if isize < LZ4_64KLIMIT */ 728 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 729 #define HASHLOG64K (HASH_LOG + 1) 730 #define HASH64KTABLESIZE (1U << HASHLOG64K) 731 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 732 HASHLOG64K)) 733 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 734 735 /*ARGSUSED*/ 736 static int 737 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 738 int osize) 739 { 740 #if HEAPMODE 741 struct refTables *srt = (struct refTables *)ctx; 742 U16 *HashTable = (U16 *) (srt->hashTable); 743 #else 744 U16 HashTable[HASH64KTABLESIZE] = { 0 }; 745 #endif 746 747 const BYTE *ip = (const BYTE *) source; 748 const BYTE *anchor = ip; 749 const BYTE *const base = ip; 750 const BYTE *const iend = ip + isize; 751 const BYTE *const oend = (BYTE *) dest + osize; 752 const BYTE *const mflimit = iend - MFLIMIT; 753 #define matchlimit (iend - LASTLITERALS) 754 755 BYTE *op = (BYTE *) dest; 756 757 int len, length; 758 const int skipStrength = SKIPSTRENGTH; 759 U32 forwardH; 760 761 /* Init */ 762 if (isize < MINLENGTH) 763 goto _last_literals; 764 765 /* First Byte */ 766 ip++; 767 forwardH = LZ4_HASH64K_VALUE(ip); 768 769 /* Main Loop */ 770 for (;;) { 771 int findMatchAttempts = (1U << skipStrength) + 3; 772 const BYTE *forwardIp = ip; 773 const BYTE *ref; 774 BYTE *token; 775 776 /* Find a match */ 777 do { 778 U32 h = forwardH; 779 int step = findMatchAttempts++ >> skipStrength; 780 ip = forwardIp; 781 forwardIp = ip + step; 782 783 if (forwardIp > mflimit) { 784 goto _last_literals; 785 } 786 787 forwardH = LZ4_HASH64K_VALUE(forwardIp); 788 ref = base + HashTable[h]; 789 HashTable[h] = ip - base; 790 791 } while (A32(ref) != A32(ip)); 792 793 /* Catch up */ 794 while ((ip > anchor) && (ref > (const BYTE *) source) && 795 (ip[-1] == ref[-1])) { 796 ip--; 797 ref--; 798 } 799 800 /* Encode Literal length */ 801 length = ip - anchor; 802 token = op++; 803 804 /* Check output limit */ 805 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 806 (length >> 8) > oend) 807 return (0); 808 809 if (length >= (int)RUN_MASK) { 810 *token = (RUN_MASK << ML_BITS); 811 len = length - RUN_MASK; 812 for (; len > 254; len -= 255) 813 *op++ = 255; 814 *op++ = (BYTE)len; 815 } else 816 *token = (length << ML_BITS); 817 818 /* Copy Literals */ 819 LZ4_BLINDCOPY(anchor, op, length); 820 821 _next_match: 822 /* Encode Offset */ 823 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 824 825 /* Start Counting */ 826 ip += MINMATCH; 827 ref += MINMATCH; /* MinMatch verified */ 828 anchor = ip; 829 while (ip < matchlimit - (STEPSIZE - 1)) { 830 UARCH diff = AARCH(ref) ^ AARCH(ip); 831 if (!diff) { 832 ip += STEPSIZE; 833 ref += STEPSIZE; 834 continue; 835 } 836 ip += LZ4_NbCommonBytes(diff); 837 goto _endCount; 838 } 839 #if LZ4_ARCH64 840 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 841 ip += 4; 842 ref += 4; 843 } 844 #endif 845 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 846 ip += 2; 847 ref += 2; 848 } 849 if ((ip < matchlimit) && (*ref == *ip)) 850 ip++; 851 _endCount: 852 853 /* Encode MatchLength */ 854 len = (ip - anchor); 855 /* Check output limit */ 856 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 857 return (0); 858 if (len >= (int)ML_MASK) { 859 *token += ML_MASK; 860 len -= ML_MASK; 861 for (; len > 509; len -= 510) { 862 *op++ = 255; 863 *op++ = 255; 864 } 865 if (len > 254) { 866 len -= 255; 867 *op++ = 255; 868 } 869 *op++ = (BYTE)len; 870 } else 871 *token += len; 872 873 /* Test end of chunk */ 874 if (ip > mflimit) { 875 anchor = ip; 876 break; 877 } 878 /* Fill table */ 879 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 880 881 /* Test next position */ 882 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 883 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 884 if (A32(ref) == A32(ip)) { 885 token = op++; 886 *token = 0; 887 goto _next_match; 888 } 889 /* Prepare next loop */ 890 anchor = ip++; 891 forwardH = LZ4_HASH64K_VALUE(ip); 892 } 893 894 _last_literals: 895 /* Encode Last Literals */ 896 { 897 int lastRun = iend - anchor; 898 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 899 oend) 900 return (0); 901 if (lastRun >= (int)RUN_MASK) { 902 *op++ = (RUN_MASK << ML_BITS); 903 lastRun -= RUN_MASK; 904 for (; lastRun > 254; lastRun -= 255) 905 *op++ = 255; 906 *op++ = (BYTE)lastRun; 907 } else 908 *op++ = (lastRun << ML_BITS); 909 (void) memcpy(op, anchor, iend - anchor); 910 op += iend - anchor; 911 } 912 913 /* End */ 914 return (int)(((char *)op) - dest); 915 } 916 917 static int 918 real_LZ4_compress(const char *source, char *dest, int isize, int osize) 919 { 920 #if HEAPMODE 921 #if defined(_KERNEL) || defined(_FAKE_KERNEL) 922 void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP); 923 #else 924 void *ctx = malloc(sizeof (struct refTables)); 925 #endif 926 int result; 927 928 /* 929 * out of kernel memory, gently fall through - this will disable 930 * compression in zio_compress_data 931 */ 932 if (ctx == NULL) 933 return (0); 934 935 if (isize < LZ4_64KLIMIT) 936 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 937 else 938 result = LZ4_compressCtx(ctx, source, dest, isize, osize); 939 940 #if defined(_KERNEL) || defined(_FAKE_KERNEL) 941 kmem_free(ctx, sizeof (struct refTables)); 942 #else 943 free(ctx); 944 #endif 945 return (result); 946 #else 947 if (isize < (int)LZ4_64KLIMIT) 948 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize)); 949 return (LZ4_compressCtx(NULL, source, dest, isize, osize)); 950 #endif 951 } 952 953 /* Decompression functions */ 954 955 /* 956 * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe 957 * against "buffer overflow" attack type. It will never write nor 958 * read outside of the provided output buffers. 959 * LZ4_uncompress_unknownOutputSize() also insures that 960 * it will never read outside of the input buffer. A corrupted input 961 * will produce an error result, a negative int, indicating the position 962 * of the error within input stream. 963 */ 964 965 static int 966 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, 967 int maxOutputSize) 968 { 969 /* Local Variables */ 970 const BYTE *restrict ip = (const BYTE *) source; 971 const BYTE *const iend = ip + isize; 972 const BYTE *ref; 973 974 BYTE *op = (BYTE *) dest; 975 BYTE *const oend = op + maxOutputSize; 976 BYTE *cpy; 977 978 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 979 #if LZ4_ARCH64 980 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 981 #endif 982 983 /* Main Loop */ 984 while (ip < iend) { 985 unsigned token; 986 size_t length; 987 988 /* get runlength */ 989 token = *ip++; 990 if ((length = (token >> ML_BITS)) == RUN_MASK) { 991 int s = 255; 992 while ((ip < iend) && (s == 255)) { 993 s = *ip++; 994 length += s; 995 } 996 } 997 /* copy literals */ 998 cpy = op + length; 999 /* CORNER-CASE: cpy might overflow. */ 1000 if (cpy < op) 1001 goto _output_error; /* cpy was overflowed, bail! */ 1002 if ((cpy > oend - COPYLENGTH) || 1003 (ip + length > iend - COPYLENGTH)) { 1004 if (cpy > oend) 1005 /* Error: writes beyond output buffer */ 1006 goto _output_error; 1007 if (ip + length != iend) 1008 /* 1009 * Error: LZ4 format requires to consume all 1010 * input at this stage 1011 */ 1012 goto _output_error; 1013 (void) memcpy(op, ip, length); 1014 op += length; 1015 /* Necessarily EOF, due to parsing restrictions */ 1016 break; 1017 } 1018 LZ4_WILDCOPY(ip, op, cpy); 1019 ip -= (op - cpy); 1020 op = cpy; 1021 1022 /* get offset */ 1023 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 1024 ip += 2; 1025 if (ref < (BYTE * const) dest) 1026 /* 1027 * Error: offset creates reference outside of 1028 * destination buffer 1029 */ 1030 goto _output_error; 1031 1032 /* get matchlength */ 1033 if ((length = (token & ML_MASK)) == ML_MASK) { 1034 while (ip < iend) { 1035 int s = *ip++; 1036 length += s; 1037 if (s == 255) 1038 continue; 1039 break; 1040 } 1041 } 1042 /* copy repeated sequence */ 1043 if unlikely(op - ref < STEPSIZE) { 1044 #if LZ4_ARCH64 1045 size_t dec64 = dec64table[op-ref]; 1046 #else 1047 const int dec64 = 0; 1048 #endif 1049 op[0] = ref[0]; 1050 op[1] = ref[1]; 1051 op[2] = ref[2]; 1052 op[3] = ref[3]; 1053 op += 4; 1054 ref += 4; 1055 ref -= dec32table[op-ref]; 1056 A32(op) = A32(ref); 1057 op += STEPSIZE - 4; 1058 ref -= dec64; 1059 } else { 1060 LZ4_COPYSTEP(ref, op); 1061 } 1062 cpy = op + length - (STEPSIZE - 4); 1063 if (cpy > oend - COPYLENGTH) { 1064 if (cpy > oend) 1065 /* 1066 * Error: request to write outside of 1067 * destination buffer 1068 */ 1069 goto _output_error; 1070 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 1071 while (op < cpy) 1072 *op++ = *ref++; 1073 op = cpy; 1074 if (op == oend) 1075 /* 1076 * Check EOF (should never happen, since 1077 * last 5 bytes are supposed to be literals) 1078 */ 1079 goto _output_error; 1080 continue; 1081 } 1082 LZ4_SECURECOPY(ref, op, cpy); 1083 op = cpy; /* correction */ 1084 } 1085 1086 /* end of decoding */ 1087 return (int)(((char *)op) - dest); 1088 1089 /* write overflow error detected */ 1090 _output_error: 1091 return (int)(-(((const char *)ip) - source)); 1092 } 1093