1 /* 2 * LZ4 - Fast LZ compression algorithm 3 * Header File 4 * Copyright (C) 2011-2013, Yann Collet. 5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * You can contact the author at : 31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32 * - LZ4 source repository : http://code.google.com/p/lz4/ 33 */ 34 35 /* 36 * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012 37 */ 38 39 #include <sys/zfs_context.h> 40 #include <sys/zio_compress.h> 41 42 static int real_LZ4_compress(const char *source, char *dest, int isize, 43 int osize); 44 static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 45 int isize, int osize); 46 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 47 int isize, int osize); 48 49 /* See lz4.c */ 50 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 51 int isize, int maxOutputSize); 52 53 static void *lz4_alloc(int flags); 54 static void lz4_free(void *ctx); 55 56 size_t 57 lz4_compress_zfs(void *s_start, void *d_start, size_t s_len, 58 size_t d_len, int n) 59 { 60 (void) n; 61 uint32_t bufsiz; 62 char *dest = d_start; 63 64 ASSERT(d_len >= sizeof (bufsiz)); 65 66 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 67 d_len - sizeof (bufsiz)); 68 69 /* Signal an error if the compression routine returned zero. */ 70 if (bufsiz == 0) 71 return (s_len); 72 73 /* 74 * The exact compressed size is needed by the decompression routine, 75 * so it is stored at the start of the buffer. Note that this may be 76 * less than the compressed block size, which is rounded up to a 77 * multiple of 1<<ashift. 78 */ 79 *(uint32_t *)dest = BE_32(bufsiz); 80 81 return (bufsiz + sizeof (bufsiz)); 82 } 83 84 int 85 lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len, 86 size_t d_len, int n) 87 { 88 (void) n; 89 const char *src = s_start; 90 uint32_t bufsiz = BE_IN32(src); 91 92 /* invalid compressed buffer size encoded at start */ 93 if (bufsiz + sizeof (bufsiz) > s_len) 94 return (1); 95 96 /* 97 * Returns 0 on success (decompression function returned non-negative) 98 * and non-zero on failure (decompression function returned negative). 99 */ 100 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 101 d_start, bufsiz, d_len) < 0); 102 } 103 104 /* 105 * LZ4 API Description: 106 * 107 * Simple Functions: 108 * real_LZ4_compress() : 109 * isize : is the input size. Max supported value is ~1.9GB 110 * return : the number of bytes written in buffer dest 111 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 112 * note : destination buffer must be already allocated. 113 * destination buffer must be sized to handle worst cases 114 * situations (input data not compressible) worst case size 115 * evaluation is provided by function LZ4_compressBound(). 116 * 117 * real_LZ4_uncompress() : 118 * osize : is the output size, therefore the original size 119 * return : the number of bytes read in the source buffer. 120 * If the source stream is malformed, the function will stop 121 * decoding and return a negative result, indicating the byte 122 * position of the faulty instruction. This function never 123 * writes beyond dest + osize, and is therefore protected 124 * against malicious data packets. 125 * note : destination buffer must be already allocated 126 * note : real_LZ4_uncompress() is not used in ZFS so its code 127 * is not present here. 128 * 129 * Advanced Functions 130 * 131 * LZ4_compressBound() : 132 * Provides the maximum size that LZ4 may output in a "worst case" 133 * scenario (input data not compressible) primarily useful for memory 134 * allocation of output buffer. 135 * 136 * isize : is the input size. Max supported value is ~1.9GB 137 * return : maximum output size in a "worst case" scenario 138 * note : this function is limited by "int" range (2^31-1) 139 * 140 * LZ4_uncompress_unknownOutputSize() : 141 * isize : is the input size, therefore the compressed size 142 * maxOutputSize : is the size of the destination buffer (which must be 143 * already allocated) 144 * return : the number of bytes decoded in the destination buffer 145 * (necessarily <= maxOutputSize). If the source stream is 146 * malformed, the function will stop decoding and return a 147 * negative result, indicating the byte position of the faulty 148 * instruction. This function never writes beyond dest + 149 * maxOutputSize, and is therefore protected against malicious 150 * data packets. 151 * note : Destination buffer must be already allocated. 152 * This version is slightly slower than real_LZ4_uncompress() 153 * 154 * LZ4_compressCtx() : 155 * This function explicitly handles the CTX memory structure. 156 * 157 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 158 * by the caller (either on the stack or using kmem_cache_alloc). Passing 159 * NULL isn't valid. 160 * 161 * LZ4_compress64kCtx() : 162 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 163 * isize *Must* be <64KB, otherwise the output will be corrupted. 164 * 165 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 166 * by the caller (either on the stack or using kmem_cache_alloc). Passing 167 * NULL isn't valid. 168 */ 169 170 /* 171 * Tuning parameters 172 */ 173 174 /* 175 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 176 * Lowering this value reduces memory usage. Reduced memory usage 177 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 178 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 179 * (examples : 12 -> 16KB ; 17 -> 512KB) 180 */ 181 #define COMPRESSIONLEVEL 12 182 183 /* 184 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 185 * algorithm skip faster data segments considered "incompressible". 186 * This may decrease compression ratio dramatically, but will be 187 * faster on incompressible data. Increasing this value will make 188 * the algorithm search more before declaring a segment "incompressible". 189 * This could improve compression a bit, but will be slower on 190 * incompressible data. The default value (6) is recommended. 191 */ 192 #define NOTCOMPRESSIBLE_CONFIRMATION 6 193 194 /* 195 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 196 * performance for big endian cpu, but the resulting compressed stream 197 * will be incompatible with little-endian CPU. You can set this option 198 * to 1 in situations where data will stay within closed environment. 199 * This option is useless on Little_Endian CPU (such as x86). 200 */ 201 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 202 203 /* 204 * CPU Feature Detection 205 */ 206 207 /* 32 or 64 bits ? */ 208 #if defined(_LP64) 209 #define LZ4_ARCH64 1 210 #else 211 #define LZ4_ARCH64 0 212 #endif 213 214 /* 215 * Little Endian or Big Endian? 216 * Note: overwrite the below #define if you know your architecture endianness. 217 */ 218 #if defined(_ZFS_BIG_ENDIAN) 219 #define LZ4_BIG_ENDIAN 1 220 #else 221 /* 222 * Little Endian assumed. PDP Endian and other very rare endian format 223 * are unsupported. 224 */ 225 #undef LZ4_BIG_ENDIAN 226 #endif 227 228 /* 229 * Unaligned memory access is automatically enabled for "common" CPU, 230 * such as x86. For others CPU, the compiler will be more cautious, and 231 * insert extra code to ensure aligned access is respected. If you know 232 * your target CPU supports unaligned memory access, you may want to 233 * force this option manually to improve performance 234 */ 235 #if defined(__ARM_FEATURE_UNALIGNED) 236 #define LZ4_FORCE_UNALIGNED_ACCESS 1 237 #endif 238 239 /* 240 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the 241 * kernel 242 * Linux : we can use GCC's __builtin_ctz family of builtins in the 243 * kernel 244 */ 245 #undef LZ4_FORCE_SW_BITCOUNT 246 #if defined(__sparc) 247 #define LZ4_FORCE_SW_BITCOUNT 248 #endif 249 250 /* 251 * Compiler Options 252 */ 253 /* Disable restrict */ 254 #define restrict 255 256 /* 257 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it. 258 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 259 */ 260 #ifdef GCC_VERSION 261 #undef GCC_VERSION 262 #endif 263 264 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 265 266 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 267 #define expect(expr, value) (__builtin_expect((expr), (value))) 268 #else 269 #define expect(expr, value) (expr) 270 #endif 271 272 #ifndef likely 273 #define likely(expr) expect((expr) != 0, 1) 274 #endif 275 276 #ifndef unlikely 277 #define unlikely(expr) expect((expr) != 0, 0) 278 #endif 279 280 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 281 (((x) & 0xffu) << 8))) 282 283 /* Basic types */ 284 #define BYTE uint8_t 285 #define U16 uint16_t 286 #define U32 uint32_t 287 #define S32 int32_t 288 #define U64 uint64_t 289 290 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 291 #pragma pack(1) 292 #endif 293 294 typedef struct _U16_S { 295 U16 v; 296 } U16_S; 297 typedef struct _U32_S { 298 U32 v; 299 } U32_S; 300 typedef struct _U64_S { 301 U64 v; 302 } U64_S; 303 304 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 305 #pragma pack() 306 #endif 307 308 #define A64(x) (((U64_S *)(x))->v) 309 #define A32(x) (((U32_S *)(x))->v) 310 #define A16(x) (((U16_S *)(x))->v) 311 312 /* 313 * Constants 314 */ 315 #define MINMATCH 4 316 317 #define HASH_LOG COMPRESSIONLEVEL 318 #define HASHTABLESIZE (1 << HASH_LOG) 319 #define HASH_MASK (HASHTABLESIZE - 1) 320 321 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 322 NOTCOMPRESSIBLE_CONFIRMATION : 2) 323 324 #define COPYLENGTH 8 325 #define LASTLITERALS 5 326 #define MFLIMIT (COPYLENGTH + MINMATCH) 327 #define MINLENGTH (MFLIMIT + 1) 328 329 #define MAXD_LOG 16 330 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 331 332 #define ML_BITS 4 333 #define ML_MASK ((1U<<ML_BITS)-1) 334 #define RUN_BITS (8-ML_BITS) 335 #define RUN_MASK ((1U<<RUN_BITS)-1) 336 337 338 /* 339 * Architecture-specific macros 340 */ 341 #if LZ4_ARCH64 342 #define STEPSIZE 8 343 #define UARCH U64 344 #define AARCH A64 345 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 346 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 347 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 348 #define HTYPE U32 349 #define INITBASE(base) const BYTE* const base = ip 350 #else /* !LZ4_ARCH64 */ 351 #define STEPSIZE 4 352 #define UARCH U32 353 #define AARCH A32 354 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 355 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 356 #define LZ4_SECURECOPY LZ4_WILDCOPY 357 #define HTYPE const BYTE * 358 #define INITBASE(base) const int base = 0 359 #endif /* !LZ4_ARCH64 */ 360 361 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 362 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 363 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 364 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 365 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 366 #else 367 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 368 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 369 #endif 370 371 372 /* Local structures */ 373 struct refTables { 374 HTYPE hashTable[HASHTABLESIZE]; 375 }; 376 377 378 /* Macros */ 379 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 380 HASH_LOG)) 381 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 382 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 383 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 384 d = e; } 385 386 387 /* Private functions */ 388 #if LZ4_ARCH64 389 390 static inline int 391 LZ4_NbCommonBytes(register U64 val) 392 { 393 #if defined(LZ4_BIG_ENDIAN) 394 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 395 !defined(LZ4_FORCE_SW_BITCOUNT) 396 return (__builtin_clzll(val) >> 3); 397 #else 398 int r; 399 if (!(val >> 32)) { 400 r = 4; 401 } else { 402 r = 0; 403 val >>= 32; 404 } 405 if (!(val >> 16)) { 406 r += 2; 407 val >>= 8; 408 } else { 409 val >>= 24; 410 } 411 r += (!val); 412 return (r); 413 #endif 414 #else 415 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 416 !defined(LZ4_FORCE_SW_BITCOUNT) 417 return (__builtin_ctzll(val) >> 3); 418 #else 419 static const int DeBruijnBytePos[64] = 420 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 421 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 422 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 423 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 424 }; 425 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 426 58]; 427 #endif 428 #endif 429 } 430 431 #else 432 433 static inline int 434 LZ4_NbCommonBytes(register U32 val) 435 { 436 #if defined(LZ4_BIG_ENDIAN) 437 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 438 !defined(LZ4_FORCE_SW_BITCOUNT) 439 return (__builtin_clz(val) >> 3); 440 #else 441 int r; 442 if (!(val >> 16)) { 443 r = 2; 444 val >>= 8; 445 } else { 446 r = 0; 447 val >>= 24; 448 } 449 r += (!val); 450 return (r); 451 #endif 452 #else 453 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ 454 !defined(LZ4_FORCE_SW_BITCOUNT) 455 return (__builtin_ctz(val) >> 3); 456 #else 457 static const int DeBruijnBytePos[32] = { 458 0, 0, 3, 0, 3, 1, 3, 0, 459 3, 2, 2, 1, 3, 2, 0, 1, 460 3, 3, 1, 2, 2, 2, 2, 0, 461 3, 1, 2, 0, 1, 0, 1, 1 462 }; 463 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 464 27]; 465 #endif 466 #endif 467 } 468 469 #endif 470 471 /* Compression functions */ 472 473 static int 474 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 475 int osize) 476 { 477 struct refTables *srt = (struct refTables *)ctx; 478 HTYPE *HashTable = (HTYPE *) (srt->hashTable); 479 480 const BYTE *ip = (BYTE *) source; 481 INITBASE(base); 482 const BYTE *anchor = ip; 483 const BYTE *const iend = ip + isize; 484 const BYTE *const oend = (BYTE *) dest + osize; 485 const BYTE *const mflimit = iend - MFLIMIT; 486 #define matchlimit (iend - LASTLITERALS) 487 488 BYTE *op = (BYTE *) dest; 489 490 int len, length; 491 const int skipStrength = SKIPSTRENGTH; 492 U32 forwardH; 493 494 495 /* Init */ 496 if (isize < MINLENGTH) 497 goto _last_literals; 498 499 /* First Byte */ 500 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 501 ip++; 502 forwardH = LZ4_HASH_VALUE(ip); 503 504 /* Main Loop */ 505 for (;;) { 506 int findMatchAttempts = (1U << skipStrength) + 3; 507 const BYTE *forwardIp = ip; 508 const BYTE *ref; 509 BYTE *token; 510 511 /* Find a match */ 512 do { 513 U32 h = forwardH; 514 int step = findMatchAttempts++ >> skipStrength; 515 ip = forwardIp; 516 forwardIp = ip + step; 517 518 if (unlikely(forwardIp > mflimit)) { 519 goto _last_literals; 520 } 521 522 forwardH = LZ4_HASH_VALUE(forwardIp); 523 ref = base + HashTable[h]; 524 HashTable[h] = ip - base; 525 526 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 527 528 /* Catch up */ 529 while ((ip > anchor) && (ref > (BYTE *) source) && 530 unlikely(ip[-1] == ref[-1])) { 531 ip--; 532 ref--; 533 } 534 535 /* Encode Literal length */ 536 length = ip - anchor; 537 token = op++; 538 539 /* Check output limit */ 540 if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 541 (length >> 8) > oend)) 542 return (0); 543 544 if (length >= (int)RUN_MASK) { 545 *token = (RUN_MASK << ML_BITS); 546 len = length - RUN_MASK; 547 for (; len > 254; len -= 255) 548 *op++ = 255; 549 *op++ = (BYTE)len; 550 } else 551 *token = (length << ML_BITS); 552 553 /* Copy Literals */ 554 LZ4_BLINDCOPY(anchor, op, length); 555 556 _next_match: 557 /* Encode Offset */ 558 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 559 560 /* Start Counting */ 561 ip += MINMATCH; 562 ref += MINMATCH; /* MinMatch verified */ 563 anchor = ip; 564 while (likely(ip < matchlimit - (STEPSIZE - 1))) { 565 UARCH diff = AARCH(ref) ^ AARCH(ip); 566 if (!diff) { 567 ip += STEPSIZE; 568 ref += STEPSIZE; 569 continue; 570 } 571 ip += LZ4_NbCommonBytes(diff); 572 goto _endCount; 573 } 574 #if LZ4_ARCH64 575 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 576 ip += 4; 577 ref += 4; 578 } 579 #endif 580 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 581 ip += 2; 582 ref += 2; 583 } 584 if ((ip < matchlimit) && (*ref == *ip)) 585 ip++; 586 _endCount: 587 588 /* Encode MatchLength */ 589 len = (ip - anchor); 590 /* Check output limit */ 591 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 592 return (0); 593 if (len >= (int)ML_MASK) { 594 *token += ML_MASK; 595 len -= ML_MASK; 596 for (; len > 509; len -= 510) { 597 *op++ = 255; 598 *op++ = 255; 599 } 600 if (len > 254) { 601 len -= 255; 602 *op++ = 255; 603 } 604 *op++ = (BYTE)len; 605 } else 606 *token += len; 607 608 /* Test end of chunk */ 609 if (ip > mflimit) { 610 anchor = ip; 611 break; 612 } 613 /* Fill table */ 614 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 615 616 /* Test next position */ 617 ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 618 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 619 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 620 token = op++; 621 *token = 0; 622 goto _next_match; 623 } 624 /* Prepare next loop */ 625 anchor = ip++; 626 forwardH = LZ4_HASH_VALUE(ip); 627 } 628 629 _last_literals: 630 /* Encode Last Literals */ 631 { 632 int lastRun = iend - anchor; 633 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 634 oend) 635 return (0); 636 if (lastRun >= (int)RUN_MASK) { 637 *op++ = (RUN_MASK << ML_BITS); 638 lastRun -= RUN_MASK; 639 for (; lastRun > 254; lastRun -= 255) { 640 *op++ = 255; 641 } 642 *op++ = (BYTE)lastRun; 643 } else 644 *op++ = (lastRun << ML_BITS); 645 (void) memcpy(op, anchor, iend - anchor); 646 op += iend - anchor; 647 } 648 649 /* End */ 650 return (int)(((char *)op) - dest); 651 } 652 653 654 655 /* Note : this function is valid only if isize < LZ4_64KLIMIT */ 656 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 657 #define HASHLOG64K (HASH_LOG + 1) 658 #define HASH64KTABLESIZE (1U << HASHLOG64K) 659 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 660 HASHLOG64K)) 661 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 662 663 static int 664 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 665 int osize) 666 { 667 struct refTables *srt = (struct refTables *)ctx; 668 U16 *HashTable = (U16 *) (srt->hashTable); 669 670 const BYTE *ip = (BYTE *) source; 671 const BYTE *anchor = ip; 672 const BYTE *const base = ip; 673 const BYTE *const iend = ip + isize; 674 const BYTE *const oend = (BYTE *) dest + osize; 675 const BYTE *const mflimit = iend - MFLIMIT; 676 #define matchlimit (iend - LASTLITERALS) 677 678 BYTE *op = (BYTE *) dest; 679 680 int len, length; 681 const int skipStrength = SKIPSTRENGTH; 682 U32 forwardH; 683 684 /* Init */ 685 if (isize < MINLENGTH) 686 goto _last_literals; 687 688 /* First Byte */ 689 ip++; 690 forwardH = LZ4_HASH64K_VALUE(ip); 691 692 /* Main Loop */ 693 for (;;) { 694 int findMatchAttempts = (1U << skipStrength) + 3; 695 const BYTE *forwardIp = ip; 696 const BYTE *ref; 697 BYTE *token; 698 699 /* Find a match */ 700 do { 701 U32 h = forwardH; 702 int step = findMatchAttempts++ >> skipStrength; 703 ip = forwardIp; 704 forwardIp = ip + step; 705 706 if (forwardIp > mflimit) { 707 goto _last_literals; 708 } 709 710 forwardH = LZ4_HASH64K_VALUE(forwardIp); 711 ref = base + HashTable[h]; 712 HashTable[h] = ip - base; 713 714 } while (A32(ref) != A32(ip)); 715 716 /* Catch up */ 717 while ((ip > anchor) && (ref > (BYTE *) source) && 718 (ip[-1] == ref[-1])) { 719 ip--; 720 ref--; 721 } 722 723 /* Encode Literal length */ 724 length = ip - anchor; 725 token = op++; 726 727 /* Check output limit */ 728 if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 729 (length >> 8) > oend)) 730 return (0); 731 732 if (length >= (int)RUN_MASK) { 733 *token = (RUN_MASK << ML_BITS); 734 len = length - RUN_MASK; 735 for (; len > 254; len -= 255) 736 *op++ = 255; 737 *op++ = (BYTE)len; 738 } else 739 *token = (length << ML_BITS); 740 741 /* Copy Literals */ 742 LZ4_BLINDCOPY(anchor, op, length); 743 744 _next_match: 745 /* Encode Offset */ 746 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 747 748 /* Start Counting */ 749 ip += MINMATCH; 750 ref += MINMATCH; /* MinMatch verified */ 751 anchor = ip; 752 while (ip < matchlimit - (STEPSIZE - 1)) { 753 UARCH diff = AARCH(ref) ^ AARCH(ip); 754 if (!diff) { 755 ip += STEPSIZE; 756 ref += STEPSIZE; 757 continue; 758 } 759 ip += LZ4_NbCommonBytes(diff); 760 goto _endCount; 761 } 762 #if LZ4_ARCH64 763 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 764 ip += 4; 765 ref += 4; 766 } 767 #endif 768 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 769 ip += 2; 770 ref += 2; 771 } 772 if ((ip < matchlimit) && (*ref == *ip)) 773 ip++; 774 _endCount: 775 776 /* Encode MatchLength */ 777 len = (ip - anchor); 778 /* Check output limit */ 779 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 780 return (0); 781 if (len >= (int)ML_MASK) { 782 *token += ML_MASK; 783 len -= ML_MASK; 784 for (; len > 509; len -= 510) { 785 *op++ = 255; 786 *op++ = 255; 787 } 788 if (len > 254) { 789 len -= 255; 790 *op++ = 255; 791 } 792 *op++ = (BYTE)len; 793 } else 794 *token += len; 795 796 /* Test end of chunk */ 797 if (ip > mflimit) { 798 anchor = ip; 799 break; 800 } 801 /* Fill table */ 802 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 803 804 /* Test next position */ 805 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 806 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 807 if (A32(ref) == A32(ip)) { 808 token = op++; 809 *token = 0; 810 goto _next_match; 811 } 812 /* Prepare next loop */ 813 anchor = ip++; 814 forwardH = LZ4_HASH64K_VALUE(ip); 815 } 816 817 _last_literals: 818 /* Encode Last Literals */ 819 { 820 int lastRun = iend - anchor; 821 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 822 oend) 823 return (0); 824 if (lastRun >= (int)RUN_MASK) { 825 *op++ = (RUN_MASK << ML_BITS); 826 lastRun -= RUN_MASK; 827 for (; lastRun > 254; lastRun -= 255) 828 *op++ = 255; 829 *op++ = (BYTE)lastRun; 830 } else 831 *op++ = (lastRun << ML_BITS); 832 (void) memcpy(op, anchor, iend - anchor); 833 op += iend - anchor; 834 } 835 836 /* End */ 837 return (int)(((char *)op) - dest); 838 } 839 840 static int 841 real_LZ4_compress(const char *source, char *dest, int isize, int osize) 842 { 843 void *ctx; 844 int result; 845 846 ctx = lz4_alloc(KM_SLEEP); 847 848 /* 849 * out of kernel memory, gently fall through - this will disable 850 * compression in zio_compress_data 851 */ 852 if (ctx == NULL) 853 return (0); 854 855 memset(ctx, 0, sizeof (struct refTables)); 856 857 if (isize < LZ4_64KLIMIT) 858 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 859 else 860 result = LZ4_compressCtx(ctx, source, dest, isize, osize); 861 862 lz4_free(ctx); 863 return (result); 864 } 865 866 #ifdef __FreeBSD__ 867 /* 868 * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here. 869 * Should struct refTables get resized this may need to be revisited, hence 870 * compiler-time asserts. 871 */ 872 _Static_assert(sizeof(struct refTables) <= 16384, 873 "refTables too big for malloc"); 874 _Static_assert((sizeof(struct refTables) % 4096) == 0, 875 "refTables not a multiple of page size"); 876 #else 877 #define ZFS_LZ4_USE_CACHE 878 #endif 879 880 #ifdef ZFS_LZ4_USE_CACHE 881 static kmem_cache_t *lz4_cache; 882 #endif 883 884 #ifdef ZFS_LZ4_USE_CACHE 885 void 886 lz4_init(void) 887 { 888 lz4_cache = kmem_cache_create("lz4_cache", 889 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0); 890 } 891 892 void 893 lz4_fini(void) 894 { 895 if (lz4_cache) { 896 kmem_cache_destroy(lz4_cache); 897 lz4_cache = NULL; 898 } 899 } 900 901 static void * 902 lz4_alloc(int flags) 903 { 904 ASSERT(lz4_cache != NULL); 905 return (kmem_cache_alloc(lz4_cache, flags)); 906 } 907 908 static void 909 lz4_free(void *ctx) 910 { 911 kmem_cache_free(lz4_cache, ctx); 912 } 913 #else 914 void 915 lz4_init(void) 916 { 917 } 918 919 void 920 lz4_fini(void) 921 { 922 } 923 924 static void * 925 lz4_alloc(int flags) 926 { 927 return (kmem_alloc(sizeof (struct refTables), flags)); 928 } 929 930 static void 931 lz4_free(void *ctx) 932 { 933 kmem_free(ctx, sizeof (struct refTables)); 934 } 935 #endif 936