1 /* 2 * LZ4 - Fast LZ compression algorithm 3 * Header File 4 * Copyright (C) 2011-2013, Yann Collet. 5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * You can contact the author at : 31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32 * - LZ4 source repository : http://code.google.com/p/lz4/ 33 */ 34 35 #include <sys/zfs_context.h> 36 #include <sys/zio_compress.h> 37 38 static int real_LZ4_compress(const char *source, char *dest, int isize, 39 int osize); 40 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 41 int isize, int maxOutputSize); 42 static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 43 int isize, int osize); 44 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 45 int isize, int osize); 46 47 static kmem_cache_t *lz4_cache; 48 49 /*ARGSUSED*/ 50 size_t 51 lz4_compress_zfs(void *s_start, void *d_start, size_t s_len, 52 size_t d_len, int n) 53 { 54 uint32_t bufsiz; 55 char *dest = d_start; 56 57 ASSERT(d_len >= sizeof (bufsiz)); 58 59 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 60 d_len - sizeof (bufsiz)); 61 62 /* Signal an error if the compression routine returned zero. */ 63 if (bufsiz == 0) 64 return (s_len); 65 66 /* 67 * The exact compressed size is needed by the decompression routine, 68 * so it is stored at the start of the buffer. Note that this may be 69 * less than the compressed block size, which is rounded up to a 70 * multiple of 1<<ashift. 71 */ 72 *(uint32_t *)dest = BE_32(bufsiz); 73 74 return (bufsiz + sizeof (bufsiz)); 75 } 76 77 /*ARGSUSED*/ 78 int 79 lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len, 80 size_t d_len, int n) 81 { 82 const char *src = s_start; 83 uint32_t bufsiz = BE_IN32(src); 84 85 /* invalid compressed buffer size encoded at start */ 86 if (bufsiz + sizeof (bufsiz) > s_len) 87 return (1); 88 89 /* 90 * Returns 0 on success (decompression function returned non-negative) 91 * and non-zero on failure (decompression function returned negative). 92 */ 93 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 94 d_start, bufsiz, d_len) < 0); 95 } 96 97 /* 98 * LZ4 API Description: 99 * 100 * Simple Functions: 101 * real_LZ4_compress() : 102 * isize : is the input size. Max supported value is ~1.9GB 103 * return : the number of bytes written in buffer dest 104 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 105 * note : destination buffer must be already allocated. 106 * destination buffer must be sized to handle worst cases 107 * situations (input data not compressible) worst case size 108 * evaluation is provided by function LZ4_compressBound(). 109 * 110 * real_LZ4_uncompress() : 111 * osize : is the output size, therefore the original size 112 * return : the number of bytes read in the source buffer. 113 * If the source stream is malformed, the function will stop 114 * decoding and return a negative result, indicating the byte 115 * position of the faulty instruction. This function never 116 * writes beyond dest + osize, and is therefore protected 117 * against malicious data packets. 118 * note : destination buffer must be already allocated 119 * note : real_LZ4_uncompress() is not used in ZFS so its code 120 * is not present here. 121 * 122 * Advanced Functions 123 * 124 * LZ4_compressBound() : 125 * Provides the maximum size that LZ4 may output in a "worst case" 126 * scenario (input data not compressible) primarily useful for memory 127 * allocation of output buffer. 128 * 129 * isize : is the input size. Max supported value is ~1.9GB 130 * return : maximum output size in a "worst case" scenario 131 * note : this function is limited by "int" range (2^31-1) 132 * 133 * LZ4_uncompress_unknownOutputSize() : 134 * isize : is the input size, therefore the compressed size 135 * maxOutputSize : is the size of the destination buffer (which must be 136 * already allocated) 137 * return : the number of bytes decoded in the destination buffer 138 * (necessarily <= maxOutputSize). If the source stream is 139 * malformed, the function will stop decoding and return a 140 * negative result, indicating the byte position of the faulty 141 * instruction. This function never writes beyond dest + 142 * maxOutputSize, and is therefore protected against malicious 143 * data packets. 144 * note : Destination buffer must be already allocated. 145 * This version is slightly slower than real_LZ4_uncompress() 146 * 147 * LZ4_compressCtx() : 148 * This function explicitly handles the CTX memory structure. 149 * 150 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 151 * by the caller (either on the stack or using kmem_cache_alloc). Passing 152 * NULL isn't valid. 153 * 154 * LZ4_compress64kCtx() : 155 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 156 * isize *Must* be <64KB, otherwise the output will be corrupted. 157 * 158 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 159 * by the caller (either on the stack or using kmem_cache_alloc). Passing 160 * NULL isn't valid. 161 */ 162 163 /* 164 * Tuning parameters 165 */ 166 167 /* 168 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 169 * Lowering this value reduces memory usage. Reduced memory usage 170 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 171 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 172 * (examples : 12 -> 16KB ; 17 -> 512KB) 173 */ 174 #define COMPRESSIONLEVEL 12 175 176 /* 177 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 178 * algorithm skip faster data segments considered "incompressible". 179 * This may decrease compression ratio dramatically, but will be 180 * faster on incompressible data. Increasing this value will make 181 * the algorithm search more before declaring a segment "incompressible". 182 * This could improve compression a bit, but will be slower on 183 * incompressible data. The default value (6) is recommended. 184 */ 185 #define NOTCOMPRESSIBLE_CONFIRMATION 6 186 187 /* 188 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 189 * performance for big endian cpu, but the resulting compressed stream 190 * will be incompatible with little-endian CPU. You can set this option 191 * to 1 in situations where data will stay within closed environment. 192 * This option is useless on Little_Endian CPU (such as x86). 193 */ 194 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 195 196 /* 197 * CPU Feature Detection 198 */ 199 200 /* 32 or 64 bits ? */ 201 #if defined(_LP64) 202 #define LZ4_ARCH64 1 203 #else 204 #define LZ4_ARCH64 0 205 #endif 206 207 /* 208 * Little Endian or Big Endian? 209 * Note: overwrite the below #define if you know your architecture endianness. 210 */ 211 #if defined(_ZFS_BIG_ENDIAN) 212 #define LZ4_BIG_ENDIAN 1 213 #else 214 /* 215 * Little Endian assumed. PDP Endian and other very rare endian format 216 * are unsupported. 217 */ 218 #undef LZ4_BIG_ENDIAN 219 #endif 220 221 /* 222 * Unaligned memory access is automatically enabled for "common" CPU, 223 * such as x86. For others CPU, the compiler will be more cautious, and 224 * insert extra code to ensure aligned access is respected. If you know 225 * your target CPU supports unaligned memory access, you may want to 226 * force this option manually to improve performance 227 */ 228 #if defined(__ARM_FEATURE_UNALIGNED) 229 #define LZ4_FORCE_UNALIGNED_ACCESS 1 230 #endif 231 232 /* 233 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the 234 * kernel 235 * Linux : we can use GCC's __builtin_ctz family of builtins in the 236 * kernel 237 */ 238 #undef LZ4_FORCE_SW_BITCOUNT 239 #if defined(__sparc) 240 #define LZ4_FORCE_SW_BITCOUNT 241 #endif 242 243 /* 244 * Compiler Options 245 */ 246 /* Disable restrict */ 247 #define restrict 248 249 /* 250 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it. 251 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 252 */ 253 #ifdef GCC_VERSION 254 #undef GCC_VERSION 255 #endif 256 257 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 258 259 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 260 #define expect(expr, value) (__builtin_expect((expr), (value))) 261 #else 262 #define expect(expr, value) (expr) 263 #endif 264 265 #ifndef likely 266 #define likely(expr) expect((expr) != 0, 1) 267 #endif 268 269 #ifndef unlikely 270 #define unlikely(expr) expect((expr) != 0, 0) 271 #endif 272 273 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 274 (((x) & 0xffu) << 8))) 275 276 /* Basic types */ 277 #define BYTE uint8_t 278 #define U16 uint16_t 279 #define U32 uint32_t 280 #define S32 int32_t 281 #define U64 uint64_t 282 283 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 284 #pragma pack(1) 285 #endif 286 287 typedef struct _U16_S { 288 U16 v; 289 } U16_S; 290 typedef struct _U32_S { 291 U32 v; 292 } U32_S; 293 typedef struct _U64_S { 294 U64 v; 295 } U64_S; 296 297 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 298 #pragma pack() 299 #endif 300 301 #define A64(x) (((U64_S *)(x))->v) 302 #define A32(x) (((U32_S *)(x))->v) 303 #define A16(x) (((U16_S *)(x))->v) 304 305 /* 306 * Constants 307 */ 308 #define MINMATCH 4 309 310 #define HASH_LOG COMPRESSIONLEVEL 311 #define HASHTABLESIZE (1 << HASH_LOG) 312 #define HASH_MASK (HASHTABLESIZE - 1) 313 314 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 315 NOTCOMPRESSIBLE_CONFIRMATION : 2) 316 317 #define COPYLENGTH 8 318 #define LASTLITERALS 5 319 #define MFLIMIT (COPYLENGTH + MINMATCH) 320 #define MINLENGTH (MFLIMIT + 1) 321 322 #define MAXD_LOG 16 323 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 324 325 #define ML_BITS 4 326 #define ML_MASK ((1U<<ML_BITS)-1) 327 #define RUN_BITS (8-ML_BITS) 328 #define RUN_MASK ((1U<<RUN_BITS)-1) 329 330 331 /* 332 * Architecture-specific macros 333 */ 334 #if LZ4_ARCH64 335 #define STEPSIZE 8 336 #define UARCH U64 337 #define AARCH A64 338 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 339 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 340 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 341 #define HTYPE U32 342 #define INITBASE(base) const BYTE* const base = ip 343 #else /* !LZ4_ARCH64 */ 344 #define STEPSIZE 4 345 #define UARCH U32 346 #define AARCH A32 347 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 348 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 349 #define LZ4_SECURECOPY LZ4_WILDCOPY 350 #define HTYPE const BYTE * 351 #define INITBASE(base) const int base = 0 352 #endif /* !LZ4_ARCH64 */ 353 354 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 355 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 356 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 357 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 358 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 359 #else 360 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 361 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 362 #endif 363 364 365 /* Local structures */ 366 struct refTables { 367 HTYPE hashTable[HASHTABLESIZE]; 368 }; 369 370 371 /* Macros */ 372 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 373 HASH_LOG)) 374 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 375 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 376 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 377 d = e; } 378 379 380 /* Private functions */ 381 #if LZ4_ARCH64 382 383 static inline int 384 LZ4_NbCommonBytes(register U64 val) 385 { 386 #if defined(LZ4_BIG_ENDIAN) 387 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 388 !defined(LZ4_FORCE_SW_BITCOUNT) 389 return (__builtin_clzll(val) >> 3); 390 #else 391 int r; 392 if (!(val >> 32)) { 393 r = 4; 394 } else { 395 r = 0; 396 val >>= 32; 397 } 398 if (!(val >> 16)) { 399 r += 2; 400 val >>= 8; 401 } else { 402 val >>= 24; 403 } 404 r += (!val); 405 return (r); 406 #endif 407 #else 408 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 409 !defined(LZ4_FORCE_SW_BITCOUNT) 410 return (__builtin_ctzll(val) >> 3); 411 #else 412 static const int DeBruijnBytePos[64] = 413 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 414 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 415 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 416 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 417 }; 418 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 419 58]; 420 #endif 421 #endif 422 } 423 424 #else 425 426 static inline int 427 LZ4_NbCommonBytes(register U32 val) 428 { 429 #if defined(LZ4_BIG_ENDIAN) 430 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 431 !defined(LZ4_FORCE_SW_BITCOUNT) 432 return (__builtin_clz(val) >> 3); 433 #else 434 int r; 435 if (!(val >> 16)) { 436 r = 2; 437 val >>= 8; 438 } else { 439 r = 0; 440 val >>= 24; 441 } 442 r += (!val); 443 return (r); 444 #endif 445 #else 446 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ 447 !defined(LZ4_FORCE_SW_BITCOUNT) 448 return (__builtin_ctz(val) >> 3); 449 #else 450 static const int DeBruijnBytePos[32] = { 451 0, 0, 3, 0, 3, 1, 3, 0, 452 3, 2, 2, 1, 3, 2, 0, 1, 453 3, 3, 1, 2, 2, 2, 2, 0, 454 3, 1, 2, 0, 1, 0, 1, 1 455 }; 456 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 457 27]; 458 #endif 459 #endif 460 } 461 462 #endif 463 464 /* Compression functions */ 465 466 /*ARGSUSED*/ 467 static int 468 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 469 int osize) 470 { 471 struct refTables *srt = (struct refTables *)ctx; 472 HTYPE *HashTable = (HTYPE *) (srt->hashTable); 473 474 const BYTE *ip = (BYTE *) source; 475 INITBASE(base); 476 const BYTE *anchor = ip; 477 const BYTE *const iend = ip + isize; 478 const BYTE *const oend = (BYTE *) dest + osize; 479 const BYTE *const mflimit = iend - MFLIMIT; 480 #define matchlimit (iend - LASTLITERALS) 481 482 BYTE *op = (BYTE *) dest; 483 484 int len, length; 485 const int skipStrength = SKIPSTRENGTH; 486 U32 forwardH; 487 488 489 /* Init */ 490 if (isize < MINLENGTH) 491 goto _last_literals; 492 493 /* First Byte */ 494 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 495 ip++; 496 forwardH = LZ4_HASH_VALUE(ip); 497 498 /* Main Loop */ 499 for (;;) { 500 int findMatchAttempts = (1U << skipStrength) + 3; 501 const BYTE *forwardIp = ip; 502 const BYTE *ref; 503 BYTE *token; 504 505 /* Find a match */ 506 do { 507 U32 h = forwardH; 508 int step = findMatchAttempts++ >> skipStrength; 509 ip = forwardIp; 510 forwardIp = ip + step; 511 512 if (unlikely(forwardIp > mflimit)) { 513 goto _last_literals; 514 } 515 516 forwardH = LZ4_HASH_VALUE(forwardIp); 517 ref = base + HashTable[h]; 518 HashTable[h] = ip - base; 519 520 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 521 522 /* Catch up */ 523 while ((ip > anchor) && (ref > (BYTE *) source) && 524 unlikely(ip[-1] == ref[-1])) { 525 ip--; 526 ref--; 527 } 528 529 /* Encode Literal length */ 530 length = ip - anchor; 531 token = op++; 532 533 /* Check output limit */ 534 if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 535 (length >> 8) > oend)) 536 return (0); 537 538 if (length >= (int)RUN_MASK) { 539 *token = (RUN_MASK << ML_BITS); 540 len = length - RUN_MASK; 541 for (; len > 254; len -= 255) 542 *op++ = 255; 543 *op++ = (BYTE)len; 544 } else 545 *token = (length << ML_BITS); 546 547 /* Copy Literals */ 548 LZ4_BLINDCOPY(anchor, op, length); 549 550 _next_match: 551 /* Encode Offset */ 552 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 553 554 /* Start Counting */ 555 ip += MINMATCH; 556 ref += MINMATCH; /* MinMatch verified */ 557 anchor = ip; 558 while (likely(ip < matchlimit - (STEPSIZE - 1))) { 559 UARCH diff = AARCH(ref) ^ AARCH(ip); 560 if (!diff) { 561 ip += STEPSIZE; 562 ref += STEPSIZE; 563 continue; 564 } 565 ip += LZ4_NbCommonBytes(diff); 566 goto _endCount; 567 } 568 #if LZ4_ARCH64 569 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 570 ip += 4; 571 ref += 4; 572 } 573 #endif 574 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 575 ip += 2; 576 ref += 2; 577 } 578 if ((ip < matchlimit) && (*ref == *ip)) 579 ip++; 580 _endCount: 581 582 /* Encode MatchLength */ 583 len = (ip - anchor); 584 /* Check output limit */ 585 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 586 return (0); 587 if (len >= (int)ML_MASK) { 588 *token += ML_MASK; 589 len -= ML_MASK; 590 for (; len > 509; len -= 510) { 591 *op++ = 255; 592 *op++ = 255; 593 } 594 if (len > 254) { 595 len -= 255; 596 *op++ = 255; 597 } 598 *op++ = (BYTE)len; 599 } else 600 *token += len; 601 602 /* Test end of chunk */ 603 if (ip > mflimit) { 604 anchor = ip; 605 break; 606 } 607 /* Fill table */ 608 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 609 610 /* Test next position */ 611 ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 612 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 613 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 614 token = op++; 615 *token = 0; 616 goto _next_match; 617 } 618 /* Prepare next loop */ 619 anchor = ip++; 620 forwardH = LZ4_HASH_VALUE(ip); 621 } 622 623 _last_literals: 624 /* Encode Last Literals */ 625 { 626 int lastRun = iend - anchor; 627 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 628 oend) 629 return (0); 630 if (lastRun >= (int)RUN_MASK) { 631 *op++ = (RUN_MASK << ML_BITS); 632 lastRun -= RUN_MASK; 633 for (; lastRun > 254; lastRun -= 255) { 634 *op++ = 255; 635 } 636 *op++ = (BYTE)lastRun; 637 } else 638 *op++ = (lastRun << ML_BITS); 639 (void) memcpy(op, anchor, iend - anchor); 640 op += iend - anchor; 641 } 642 643 /* End */ 644 return (int)(((char *)op) - dest); 645 } 646 647 648 649 /* Note : this function is valid only if isize < LZ4_64KLIMIT */ 650 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 651 #define HASHLOG64K (HASH_LOG + 1) 652 #define HASH64KTABLESIZE (1U << HASHLOG64K) 653 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 654 HASHLOG64K)) 655 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 656 657 /*ARGSUSED*/ 658 static int 659 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 660 int osize) 661 { 662 struct refTables *srt = (struct refTables *)ctx; 663 U16 *HashTable = (U16 *) (srt->hashTable); 664 665 const BYTE *ip = (BYTE *) source; 666 const BYTE *anchor = ip; 667 const BYTE *const base = ip; 668 const BYTE *const iend = ip + isize; 669 const BYTE *const oend = (BYTE *) dest + osize; 670 const BYTE *const mflimit = iend - MFLIMIT; 671 #define matchlimit (iend - LASTLITERALS) 672 673 BYTE *op = (BYTE *) dest; 674 675 int len, length; 676 const int skipStrength = SKIPSTRENGTH; 677 U32 forwardH; 678 679 /* Init */ 680 if (isize < MINLENGTH) 681 goto _last_literals; 682 683 /* First Byte */ 684 ip++; 685 forwardH = LZ4_HASH64K_VALUE(ip); 686 687 /* Main Loop */ 688 for (;;) { 689 int findMatchAttempts = (1U << skipStrength) + 3; 690 const BYTE *forwardIp = ip; 691 const BYTE *ref; 692 BYTE *token; 693 694 /* Find a match */ 695 do { 696 U32 h = forwardH; 697 int step = findMatchAttempts++ >> skipStrength; 698 ip = forwardIp; 699 forwardIp = ip + step; 700 701 if (forwardIp > mflimit) { 702 goto _last_literals; 703 } 704 705 forwardH = LZ4_HASH64K_VALUE(forwardIp); 706 ref = base + HashTable[h]; 707 HashTable[h] = ip - base; 708 709 } while (A32(ref) != A32(ip)); 710 711 /* Catch up */ 712 while ((ip > anchor) && (ref > (BYTE *) source) && 713 (ip[-1] == ref[-1])) { 714 ip--; 715 ref--; 716 } 717 718 /* Encode Literal length */ 719 length = ip - anchor; 720 token = op++; 721 722 /* Check output limit */ 723 if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 724 (length >> 8) > oend)) 725 return (0); 726 727 if (length >= (int)RUN_MASK) { 728 *token = (RUN_MASK << ML_BITS); 729 len = length - RUN_MASK; 730 for (; len > 254; len -= 255) 731 *op++ = 255; 732 *op++ = (BYTE)len; 733 } else 734 *token = (length << ML_BITS); 735 736 /* Copy Literals */ 737 LZ4_BLINDCOPY(anchor, op, length); 738 739 _next_match: 740 /* Encode Offset */ 741 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 742 743 /* Start Counting */ 744 ip += MINMATCH; 745 ref += MINMATCH; /* MinMatch verified */ 746 anchor = ip; 747 while (ip < matchlimit - (STEPSIZE - 1)) { 748 UARCH diff = AARCH(ref) ^ AARCH(ip); 749 if (!diff) { 750 ip += STEPSIZE; 751 ref += STEPSIZE; 752 continue; 753 } 754 ip += LZ4_NbCommonBytes(diff); 755 goto _endCount; 756 } 757 #if LZ4_ARCH64 758 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 759 ip += 4; 760 ref += 4; 761 } 762 #endif 763 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 764 ip += 2; 765 ref += 2; 766 } 767 if ((ip < matchlimit) && (*ref == *ip)) 768 ip++; 769 _endCount: 770 771 /* Encode MatchLength */ 772 len = (ip - anchor); 773 /* Check output limit */ 774 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 775 return (0); 776 if (len >= (int)ML_MASK) { 777 *token += ML_MASK; 778 len -= ML_MASK; 779 for (; len > 509; len -= 510) { 780 *op++ = 255; 781 *op++ = 255; 782 } 783 if (len > 254) { 784 len -= 255; 785 *op++ = 255; 786 } 787 *op++ = (BYTE)len; 788 } else 789 *token += len; 790 791 /* Test end of chunk */ 792 if (ip > mflimit) { 793 anchor = ip; 794 break; 795 } 796 /* Fill table */ 797 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 798 799 /* Test next position */ 800 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 801 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 802 if (A32(ref) == A32(ip)) { 803 token = op++; 804 *token = 0; 805 goto _next_match; 806 } 807 /* Prepare next loop */ 808 anchor = ip++; 809 forwardH = LZ4_HASH64K_VALUE(ip); 810 } 811 812 _last_literals: 813 /* Encode Last Literals */ 814 { 815 int lastRun = iend - anchor; 816 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 817 oend) 818 return (0); 819 if (lastRun >= (int)RUN_MASK) { 820 *op++ = (RUN_MASK << ML_BITS); 821 lastRun -= RUN_MASK; 822 for (; lastRun > 254; lastRun -= 255) 823 *op++ = 255; 824 *op++ = (BYTE)lastRun; 825 } else 826 *op++ = (lastRun << ML_BITS); 827 (void) memcpy(op, anchor, iend - anchor); 828 op += iend - anchor; 829 } 830 831 /* End */ 832 return (int)(((char *)op) - dest); 833 } 834 835 static int 836 real_LZ4_compress(const char *source, char *dest, int isize, int osize) 837 { 838 void *ctx; 839 int result; 840 841 ASSERT(lz4_cache != NULL); 842 ctx = kmem_cache_alloc(lz4_cache, KM_SLEEP); 843 844 /* 845 * out of kernel memory, gently fall through - this will disable 846 * compression in zio_compress_data 847 */ 848 if (ctx == NULL) 849 return (0); 850 851 memset(ctx, 0, sizeof (struct refTables)); 852 853 if (isize < LZ4_64KLIMIT) 854 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 855 else 856 result = LZ4_compressCtx(ctx, source, dest, isize, osize); 857 858 kmem_cache_free(lz4_cache, ctx); 859 return (result); 860 } 861 862 /* Decompression functions */ 863 864 /* 865 * Note: The decoding functions real_LZ4_uncompress() and 866 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow" 867 * attack type. They will never write nor read outside of the provided 868 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that 869 * it will never read outside of the input buffer. A corrupted input 870 * will produce an error result, a negative int, indicating the position 871 * of the error within input stream. 872 * 873 * Note[2]: real_LZ4_uncompress(), referred to above, is not used in ZFS so 874 * its code is not present here. 875 */ 876 877 static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 878 #if LZ4_ARCH64 879 static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; 880 #endif 881 882 static int 883 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, 884 int maxOutputSize) 885 { 886 /* Local Variables */ 887 const BYTE *restrict ip = (const BYTE *) source; 888 const BYTE *const iend = ip + isize; 889 const BYTE *ref; 890 891 BYTE *op = (BYTE *) dest; 892 BYTE *const oend = op + maxOutputSize; 893 BYTE *cpy; 894 895 /* Main Loop */ 896 while (ip < iend) { 897 unsigned token; 898 size_t length; 899 900 /* get runlength */ 901 token = *ip++; 902 if ((length = (token >> ML_BITS)) == RUN_MASK) { 903 int s = 255; 904 while ((ip < iend) && (s == 255)) { 905 s = *ip++; 906 if (unlikely(length > (size_t)(length + s))) 907 goto _output_error; 908 length += s; 909 } 910 } 911 /* copy literals */ 912 cpy = op + length; 913 /* CORNER-CASE: cpy might overflow. */ 914 if (cpy < op) 915 goto _output_error; /* cpy was overflowed, bail! */ 916 if ((cpy > oend - COPYLENGTH) || 917 (ip + length > iend - COPYLENGTH)) { 918 if (cpy > oend) 919 /* Error: writes beyond output buffer */ 920 goto _output_error; 921 if (ip + length != iend) 922 /* 923 * Error: LZ4 format requires to consume all 924 * input at this stage 925 */ 926 goto _output_error; 927 (void) memcpy(op, ip, length); 928 op += length; 929 /* Necessarily EOF, due to parsing restrictions */ 930 break; 931 } 932 LZ4_WILDCOPY(ip, op, cpy); 933 ip -= (op - cpy); 934 op = cpy; 935 936 /* get offset */ 937 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 938 ip += 2; 939 if (ref < (BYTE * const) dest) 940 /* 941 * Error: offset creates reference outside of 942 * destination buffer 943 */ 944 goto _output_error; 945 946 /* get matchlength */ 947 if ((length = (token & ML_MASK)) == ML_MASK) { 948 while (ip < iend) { 949 int s = *ip++; 950 if (unlikely(length > (size_t)(length + s))) 951 goto _output_error; 952 length += s; 953 if (s == 255) 954 continue; 955 break; 956 } 957 } 958 /* copy repeated sequence */ 959 if (unlikely(op - ref < STEPSIZE)) { 960 #if LZ4_ARCH64 961 int dec64 = dec64table[op - ref]; 962 #else 963 const int dec64 = 0; 964 #endif 965 op[0] = ref[0]; 966 op[1] = ref[1]; 967 op[2] = ref[2]; 968 op[3] = ref[3]; 969 op += 4; 970 ref += 4; 971 ref -= dec32table[op - ref]; 972 A32(op) = A32(ref); 973 op += STEPSIZE - 4; 974 ref -= dec64; 975 } else { 976 LZ4_COPYSTEP(ref, op); 977 } 978 cpy = op + length - (STEPSIZE - 4); 979 if (cpy > oend - COPYLENGTH) { 980 if (cpy > oend) 981 /* 982 * Error: request to write outside of 983 * destination buffer 984 */ 985 goto _output_error; 986 #if LZ4_ARCH64 987 if ((ref + COPYLENGTH) > oend) 988 #else 989 if ((ref + COPYLENGTH) > oend || 990 (op + COPYLENGTH) > oend) 991 #endif 992 goto _output_error; 993 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 994 while (op < cpy) 995 *op++ = *ref++; 996 op = cpy; 997 if (op == oend) 998 /* 999 * Check EOF (should never happen, since 1000 * last 5 bytes are supposed to be literals) 1001 */ 1002 goto _output_error; 1003 continue; 1004 } 1005 LZ4_SECURECOPY(ref, op, cpy); 1006 op = cpy; /* correction */ 1007 } 1008 1009 /* end of decoding */ 1010 return (int)(((char *)op) - dest); 1011 1012 /* write overflow error detected */ 1013 _output_error: 1014 return (-1); 1015 } 1016 1017 void 1018 lz4_init(void) 1019 { 1020 lz4_cache = kmem_cache_create("lz4_cache", 1021 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0); 1022 } 1023 1024 void 1025 lz4_fini(void) 1026 { 1027 if (lz4_cache) { 1028 kmem_cache_destroy(lz4_cache); 1029 lz4_cache = NULL; 1030 } 1031 } 1032