1 // SPDX-License-Identifier: BSD-2-Clause 2 /* 3 * LZ4 - Fast LZ compression algorithm 4 * Header File 5 * Copyright (C) 2011-2013, Yann Collet. 6 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are 10 * met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following disclaimer 16 * in the documentation and/or other materials provided with the 17 * distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 * You can contact the author at : 32 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 33 * - LZ4 source repository : http://code.google.com/p/lz4/ 34 */ 35 36 /* 37 * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012 38 */ 39 40 #include <sys/zfs_context.h> 41 #include <sys/zio_compress.h> 42 43 static int real_LZ4_compress(const char *source, char *dest, int isize, 44 int osize); 45 static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 46 int isize, int osize); 47 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 48 int isize, int osize); 49 50 /* See lz4.c */ 51 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 52 int isize, int maxOutputSize); 53 54 static void *lz4_alloc(int flags); 55 static void lz4_free(void *ctx); 56 57 static size_t 58 zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len, 59 size_t d_len, int n) 60 { 61 (void) n; 62 uint32_t bufsiz; 63 char *dest = d_start; 64 65 ASSERT(d_len >= sizeof (bufsiz)); 66 67 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 68 d_len - sizeof (bufsiz)); 69 70 /* Signal an error if the compression routine returned zero. */ 71 if (bufsiz == 0) 72 return (s_len); 73 74 /* 75 * The exact compressed size is needed by the decompression routine, 76 * so it is stored at the start of the buffer. Note that this may be 77 * less than the compressed block size, which is rounded up to a 78 * multiple of 1<<ashift. 79 */ 80 *(uint32_t *)dest = BE_32(bufsiz); 81 82 return (bufsiz + sizeof (bufsiz)); 83 } 84 85 static int 86 zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len, 87 size_t d_len, int n) 88 { 89 (void) n; 90 const char *src = s_start; 91 uint32_t bufsiz = BE_IN32(src); 92 93 /* invalid compressed buffer size encoded at start */ 94 if (bufsiz + sizeof (bufsiz) > s_len) 95 return (1); 96 97 /* 98 * Returns 0 on success (decompression function returned non-negative) 99 * and non-zero on failure (decompression function returned negative). 100 */ 101 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 102 d_start, bufsiz, d_len) < 0); 103 } 104 105 ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress) 106 ZFS_DECOMPRESS_WRAP_DECL(zfs_lz4_decompress) 107 108 /* 109 * LZ4 API Description: 110 * 111 * Simple Functions: 112 * real_LZ4_compress() : 113 * isize : is the input size. Max supported value is ~1.9GB 114 * return : the number of bytes written in buffer dest 115 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 116 * note : destination buffer must be already allocated. 117 * destination buffer must be sized to handle worst cases 118 * situations (input data not compressible) worst case size 119 * evaluation is provided by function LZ4_compressBound(). 120 * 121 * real_LZ4_uncompress() : 122 * osize : is the output size, therefore the original size 123 * return : the number of bytes read in the source buffer. 124 * If the source stream is malformed, the function will stop 125 * decoding and return a negative result, indicating the byte 126 * position of the faulty instruction. This function never 127 * writes beyond dest + osize, and is therefore protected 128 * against malicious data packets. 129 * note : destination buffer must be already allocated 130 * note : real_LZ4_uncompress() is not used in ZFS so its code 131 * is not present here. 132 * 133 * Advanced Functions 134 * 135 * LZ4_compressBound() : 136 * Provides the maximum size that LZ4 may output in a "worst case" 137 * scenario (input data not compressible) primarily useful for memory 138 * allocation of output buffer. 139 * 140 * isize : is the input size. Max supported value is ~1.9GB 141 * return : maximum output size in a "worst case" scenario 142 * note : this function is limited by "int" range (2^31-1) 143 * 144 * LZ4_uncompress_unknownOutputSize() : 145 * isize : is the input size, therefore the compressed size 146 * maxOutputSize : is the size of the destination buffer (which must be 147 * already allocated) 148 * return : the number of bytes decoded in the destination buffer 149 * (necessarily <= maxOutputSize). If the source stream is 150 * malformed, the function will stop decoding and return a 151 * negative result, indicating the byte position of the faulty 152 * instruction. This function never writes beyond dest + 153 * maxOutputSize, and is therefore protected against malicious 154 * data packets. 155 * note : Destination buffer must be already allocated. 156 * This version is slightly slower than real_LZ4_uncompress() 157 * 158 * LZ4_compressCtx() : 159 * This function explicitly handles the CTX memory structure. 160 * 161 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 162 * by the caller (either on the stack or using kmem_cache_alloc). Passing 163 * NULL isn't valid. 164 * 165 * LZ4_compress64kCtx() : 166 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 167 * isize *Must* be <64KB, otherwise the output will be corrupted. 168 * 169 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 170 * by the caller (either on the stack or using kmem_cache_alloc). Passing 171 * NULL isn't valid. 172 */ 173 174 /* 175 * Tuning parameters 176 */ 177 178 /* 179 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 180 * Lowering this value reduces memory usage. Reduced memory usage 181 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 182 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 183 * (examples : 12 -> 16KB ; 17 -> 512KB) 184 */ 185 #define COMPRESSIONLEVEL 12 186 187 /* 188 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 189 * algorithm skip faster data segments considered "incompressible". 190 * This may decrease compression ratio dramatically, but will be 191 * faster on incompressible data. Increasing this value will make 192 * the algorithm search more before declaring a segment "incompressible". 193 * This could improve compression a bit, but will be slower on 194 * incompressible data. The default value (6) is recommended. 195 */ 196 #define NOTCOMPRESSIBLE_CONFIRMATION 6 197 198 /* 199 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 200 * performance for big endian cpu, but the resulting compressed stream 201 * will be incompatible with little-endian CPU. You can set this option 202 * to 1 in situations where data will stay within closed environment. 203 * This option is useless on Little_Endian CPU (such as x86). 204 */ 205 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 206 207 /* 208 * CPU Feature Detection 209 */ 210 211 /* 32 or 64 bits ? */ 212 #if defined(_LP64) 213 #define LZ4_ARCH64 1 214 #else 215 #define LZ4_ARCH64 0 216 #endif 217 218 /* 219 * Little Endian or Big Endian? 220 * Note: overwrite the below #define if you know your architecture endianness. 221 */ 222 #if defined(_ZFS_BIG_ENDIAN) 223 #define LZ4_BIG_ENDIAN 1 224 #else 225 /* 226 * Little Endian assumed. PDP Endian and other very rare endian format 227 * are unsupported. 228 */ 229 #undef LZ4_BIG_ENDIAN 230 #endif 231 232 /* 233 * Unaligned memory access is automatically enabled for "common" CPU, 234 * such as x86. For others CPU, the compiler will be more cautious, and 235 * insert extra code to ensure aligned access is respected. If you know 236 * your target CPU supports unaligned memory access, you may want to 237 * force this option manually to improve performance 238 */ 239 #if defined(__ARM_FEATURE_UNALIGNED) 240 #define LZ4_FORCE_UNALIGNED_ACCESS 1 241 #endif 242 243 /* 244 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the 245 * kernel 246 * Linux : we can use GCC's __builtin_ctz family of builtins in the 247 * kernel 248 */ 249 #undef LZ4_FORCE_SW_BITCOUNT 250 #if defined(__sparc) 251 #define LZ4_FORCE_SW_BITCOUNT 252 #endif 253 254 /* 255 * Compiler Options 256 */ 257 /* Disable restrict */ 258 #define restrict 259 260 /* 261 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it. 262 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 263 */ 264 #ifdef GCC_VERSION 265 #undef GCC_VERSION 266 #endif 267 268 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 269 270 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 271 #define expect(expr, value) (__builtin_expect((expr), (value))) 272 #else 273 #define expect(expr, value) (expr) 274 #endif 275 276 #ifndef likely 277 #define likely(expr) expect((expr) != 0, 1) 278 #endif 279 280 #ifndef unlikely 281 #define unlikely(expr) expect((expr) != 0, 0) 282 #endif 283 284 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 285 (((x) & 0xffu) << 8))) 286 287 /* Basic types */ 288 #define BYTE uint8_t 289 #define U16 uint16_t 290 #define U32 uint32_t 291 #define S32 int32_t 292 #define U64 uint64_t 293 294 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 295 #pragma pack(1) 296 #endif 297 298 typedef struct _U16_S { 299 U16 v; 300 } U16_S; 301 typedef struct _U32_S { 302 U32 v; 303 } U32_S; 304 typedef struct _U64_S { 305 U64 v; 306 } U64_S; 307 308 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 309 #pragma pack() 310 #endif 311 312 #define A64(x) (((U64_S *)(x))->v) 313 #define A32(x) (((U32_S *)(x))->v) 314 #define A16(x) (((U16_S *)(x))->v) 315 316 /* 317 * Constants 318 */ 319 #define MINMATCH 4 320 321 #define HASH_LOG COMPRESSIONLEVEL 322 #define HASHTABLESIZE (1 << HASH_LOG) 323 #define HASH_MASK (HASHTABLESIZE - 1) 324 325 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 326 NOTCOMPRESSIBLE_CONFIRMATION : 2) 327 328 #define COPYLENGTH 8 329 #define LASTLITERALS 5 330 #define MFLIMIT (COPYLENGTH + MINMATCH) 331 #define MINLENGTH (MFLIMIT + 1) 332 333 #define MAXD_LOG 16 334 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 335 336 #define ML_BITS 4 337 #define ML_MASK ((1U<<ML_BITS)-1) 338 #define RUN_BITS (8-ML_BITS) 339 #define RUN_MASK ((1U<<RUN_BITS)-1) 340 341 342 /* 343 * Architecture-specific macros 344 */ 345 #if LZ4_ARCH64 346 #define STEPSIZE 8 347 #define UARCH U64 348 #define AARCH A64 349 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 350 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 351 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 352 #define HTYPE U32 353 #define INITBASE(base) const BYTE* const base = ip 354 #else /* !LZ4_ARCH64 */ 355 #define STEPSIZE 4 356 #define UARCH U32 357 #define AARCH A32 358 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 359 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 360 #define LZ4_SECURECOPY LZ4_WILDCOPY 361 #define HTYPE const BYTE * 362 #define INITBASE(base) const int base = 0 363 #endif /* !LZ4_ARCH64 */ 364 365 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 366 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 367 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 368 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 369 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 370 #else 371 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 372 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 373 #endif 374 375 376 /* Local structures */ 377 struct refTables { 378 HTYPE hashTable[HASHTABLESIZE]; 379 }; 380 381 382 /* Macros */ 383 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 384 HASH_LOG)) 385 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 386 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 387 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 388 d = e; } 389 390 391 /* Private functions */ 392 #if LZ4_ARCH64 393 394 static inline int 395 LZ4_NbCommonBytes(register U64 val) 396 { 397 #if defined(LZ4_BIG_ENDIAN) 398 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 399 !defined(LZ4_FORCE_SW_BITCOUNT) 400 return (__builtin_clzll(val) >> 3); 401 #else 402 int r; 403 if (!(val >> 32)) { 404 r = 4; 405 } else { 406 r = 0; 407 val >>= 32; 408 } 409 if (!(val >> 16)) { 410 r += 2; 411 val >>= 8; 412 } else { 413 val >>= 24; 414 } 415 r += (!val); 416 return (r); 417 #endif 418 #else 419 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 420 !defined(LZ4_FORCE_SW_BITCOUNT) 421 return (__builtin_ctzll(val) >> 3); 422 #else 423 static const int DeBruijnBytePos[64] = 424 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 425 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 426 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 427 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 428 }; 429 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 430 58]; 431 #endif 432 #endif 433 } 434 435 #else 436 437 static inline int 438 LZ4_NbCommonBytes(register U32 val) 439 { 440 #if defined(LZ4_BIG_ENDIAN) 441 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 442 !defined(LZ4_FORCE_SW_BITCOUNT) 443 return (__builtin_clz(val) >> 3); 444 #else 445 int r; 446 if (!(val >> 16)) { 447 r = 2; 448 val >>= 8; 449 } else { 450 r = 0; 451 val >>= 24; 452 } 453 r += (!val); 454 return (r); 455 #endif 456 #else 457 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ 458 !defined(LZ4_FORCE_SW_BITCOUNT) 459 return (__builtin_ctz(val) >> 3); 460 #else 461 static const int DeBruijnBytePos[32] = { 462 0, 0, 3, 0, 3, 1, 3, 0, 463 3, 2, 2, 1, 3, 2, 0, 1, 464 3, 3, 1, 2, 2, 2, 2, 0, 465 3, 1, 2, 0, 1, 0, 1, 1 466 }; 467 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 468 27]; 469 #endif 470 #endif 471 } 472 473 #endif 474 475 /* Compression functions */ 476 477 static int 478 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 479 int osize) 480 { 481 struct refTables *srt = (struct refTables *)ctx; 482 HTYPE *HashTable = (HTYPE *) (srt->hashTable); 483 484 const BYTE *ip = (BYTE *) source; 485 INITBASE(base); 486 const BYTE *anchor = ip; 487 const BYTE *const iend = ip + isize; 488 const BYTE *const oend = (BYTE *) dest + osize; 489 const BYTE *const mflimit = iend - MFLIMIT; 490 #define matchlimit (iend - LASTLITERALS) 491 492 BYTE *op = (BYTE *) dest; 493 494 int len, length; 495 const int skipStrength = SKIPSTRENGTH; 496 U32 forwardH; 497 498 499 /* Init */ 500 if (isize < MINLENGTH) 501 goto _last_literals; 502 503 /* First Byte */ 504 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 505 ip++; 506 forwardH = LZ4_HASH_VALUE(ip); 507 508 /* Main Loop */ 509 for (;;) { 510 int findMatchAttempts = (1U << skipStrength) + 3; 511 const BYTE *forwardIp = ip; 512 const BYTE *ref; 513 BYTE *token; 514 515 /* Find a match */ 516 do { 517 U32 h = forwardH; 518 int step = findMatchAttempts++ >> skipStrength; 519 ip = forwardIp; 520 forwardIp = ip + step; 521 522 if (unlikely(forwardIp > mflimit)) { 523 goto _last_literals; 524 } 525 526 forwardH = LZ4_HASH_VALUE(forwardIp); 527 ref = base + HashTable[h]; 528 HashTable[h] = ip - base; 529 530 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 531 532 /* Catch up */ 533 while ((ip > anchor) && (ref > (BYTE *) source) && 534 unlikely(ip[-1] == ref[-1])) { 535 ip--; 536 ref--; 537 } 538 539 /* Encode Literal length */ 540 length = ip - anchor; 541 token = op++; 542 543 /* Check output limit */ 544 if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 545 (length >> 8) > oend)) 546 return (0); 547 548 if (length >= (int)RUN_MASK) { 549 *token = (RUN_MASK << ML_BITS); 550 len = length - RUN_MASK; 551 for (; len > 254; len -= 255) 552 *op++ = 255; 553 *op++ = (BYTE)len; 554 } else 555 *token = (length << ML_BITS); 556 557 /* Copy Literals */ 558 LZ4_BLINDCOPY(anchor, op, length); 559 560 _next_match: 561 /* Encode Offset */ 562 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 563 564 /* Start Counting */ 565 ip += MINMATCH; 566 ref += MINMATCH; /* MinMatch verified */ 567 anchor = ip; 568 while (likely(ip < matchlimit - (STEPSIZE - 1))) { 569 UARCH diff = AARCH(ref) ^ AARCH(ip); 570 if (!diff) { 571 ip += STEPSIZE; 572 ref += STEPSIZE; 573 continue; 574 } 575 ip += LZ4_NbCommonBytes(diff); 576 goto _endCount; 577 } 578 #if LZ4_ARCH64 579 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 580 ip += 4; 581 ref += 4; 582 } 583 #endif 584 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 585 ip += 2; 586 ref += 2; 587 } 588 if ((ip < matchlimit) && (*ref == *ip)) 589 ip++; 590 _endCount: 591 592 /* Encode MatchLength */ 593 len = (ip - anchor); 594 /* Check output limit */ 595 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 596 return (0); 597 if (len >= (int)ML_MASK) { 598 *token += ML_MASK; 599 len -= ML_MASK; 600 for (; len > 509; len -= 510) { 601 *op++ = 255; 602 *op++ = 255; 603 } 604 if (len > 254) { 605 len -= 255; 606 *op++ = 255; 607 } 608 *op++ = (BYTE)len; 609 } else 610 *token += len; 611 612 /* Test end of chunk */ 613 if (ip > mflimit) { 614 anchor = ip; 615 break; 616 } 617 /* Fill table */ 618 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 619 620 /* Test next position */ 621 ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 622 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 623 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 624 token = op++; 625 *token = 0; 626 goto _next_match; 627 } 628 /* Prepare next loop */ 629 anchor = ip++; 630 forwardH = LZ4_HASH_VALUE(ip); 631 } 632 633 _last_literals: 634 /* Encode Last Literals */ 635 { 636 int lastRun = iend - anchor; 637 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 638 oend) 639 return (0); 640 if (lastRun >= (int)RUN_MASK) { 641 *op++ = (RUN_MASK << ML_BITS); 642 lastRun -= RUN_MASK; 643 for (; lastRun > 254; lastRun -= 255) { 644 *op++ = 255; 645 } 646 *op++ = (BYTE)lastRun; 647 } else 648 *op++ = (lastRun << ML_BITS); 649 (void) memcpy(op, anchor, iend - anchor); 650 op += iend - anchor; 651 } 652 653 /* End */ 654 return (int)(((char *)op) - dest); 655 } 656 657 658 659 /* Note : this function is valid only if isize < LZ4_64KLIMIT */ 660 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 661 #define HASHLOG64K (HASH_LOG + 1) 662 #define HASH64KTABLESIZE (1U << HASHLOG64K) 663 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 664 HASHLOG64K)) 665 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 666 667 static int 668 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 669 int osize) 670 { 671 struct refTables *srt = (struct refTables *)ctx; 672 U16 *HashTable = (U16 *) (srt->hashTable); 673 674 const BYTE *ip = (BYTE *) source; 675 const BYTE *anchor = ip; 676 const BYTE *const base = ip; 677 const BYTE *const iend = ip + isize; 678 const BYTE *const oend = (BYTE *) dest + osize; 679 const BYTE *const mflimit = iend - MFLIMIT; 680 #define matchlimit (iend - LASTLITERALS) 681 682 BYTE *op = (BYTE *) dest; 683 684 int len, length; 685 const int skipStrength = SKIPSTRENGTH; 686 U32 forwardH; 687 688 /* Init */ 689 if (isize < MINLENGTH) 690 goto _last_literals; 691 692 /* First Byte */ 693 ip++; 694 forwardH = LZ4_HASH64K_VALUE(ip); 695 696 /* Main Loop */ 697 for (;;) { 698 int findMatchAttempts = (1U << skipStrength) + 3; 699 const BYTE *forwardIp = ip; 700 const BYTE *ref; 701 BYTE *token; 702 703 /* Find a match */ 704 do { 705 U32 h = forwardH; 706 int step = findMatchAttempts++ >> skipStrength; 707 ip = forwardIp; 708 forwardIp = ip + step; 709 710 if (forwardIp > mflimit) { 711 goto _last_literals; 712 } 713 714 forwardH = LZ4_HASH64K_VALUE(forwardIp); 715 ref = base + HashTable[h]; 716 HashTable[h] = ip - base; 717 718 } while (A32(ref) != A32(ip)); 719 720 /* Catch up */ 721 while ((ip > anchor) && (ref > (BYTE *) source) && 722 (ip[-1] == ref[-1])) { 723 ip--; 724 ref--; 725 } 726 727 /* Encode Literal length */ 728 length = ip - anchor; 729 token = op++; 730 731 /* Check output limit */ 732 if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 733 (length >> 8) > oend)) 734 return (0); 735 736 if (length >= (int)RUN_MASK) { 737 *token = (RUN_MASK << ML_BITS); 738 len = length - RUN_MASK; 739 for (; len > 254; len -= 255) 740 *op++ = 255; 741 *op++ = (BYTE)len; 742 } else 743 *token = (length << ML_BITS); 744 745 /* Copy Literals */ 746 LZ4_BLINDCOPY(anchor, op, length); 747 748 _next_match: 749 /* Encode Offset */ 750 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 751 752 /* Start Counting */ 753 ip += MINMATCH; 754 ref += MINMATCH; /* MinMatch verified */ 755 anchor = ip; 756 while (ip < matchlimit - (STEPSIZE - 1)) { 757 UARCH diff = AARCH(ref) ^ AARCH(ip); 758 if (!diff) { 759 ip += STEPSIZE; 760 ref += STEPSIZE; 761 continue; 762 } 763 ip += LZ4_NbCommonBytes(diff); 764 goto _endCount; 765 } 766 #if LZ4_ARCH64 767 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 768 ip += 4; 769 ref += 4; 770 } 771 #endif 772 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 773 ip += 2; 774 ref += 2; 775 } 776 if ((ip < matchlimit) && (*ref == *ip)) 777 ip++; 778 _endCount: 779 780 /* Encode MatchLength */ 781 len = (ip - anchor); 782 /* Check output limit */ 783 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 784 return (0); 785 if (len >= (int)ML_MASK) { 786 *token += ML_MASK; 787 len -= ML_MASK; 788 for (; len > 509; len -= 510) { 789 *op++ = 255; 790 *op++ = 255; 791 } 792 if (len > 254) { 793 len -= 255; 794 *op++ = 255; 795 } 796 *op++ = (BYTE)len; 797 } else 798 *token += len; 799 800 /* Test end of chunk */ 801 if (ip > mflimit) { 802 anchor = ip; 803 break; 804 } 805 /* Fill table */ 806 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 807 808 /* Test next position */ 809 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 810 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 811 if (A32(ref) == A32(ip)) { 812 token = op++; 813 *token = 0; 814 goto _next_match; 815 } 816 /* Prepare next loop */ 817 anchor = ip++; 818 forwardH = LZ4_HASH64K_VALUE(ip); 819 } 820 821 _last_literals: 822 /* Encode Last Literals */ 823 { 824 int lastRun = iend - anchor; 825 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 826 oend) 827 return (0); 828 if (lastRun >= (int)RUN_MASK) { 829 *op++ = (RUN_MASK << ML_BITS); 830 lastRun -= RUN_MASK; 831 for (; lastRun > 254; lastRun -= 255) 832 *op++ = 255; 833 *op++ = (BYTE)lastRun; 834 } else 835 *op++ = (lastRun << ML_BITS); 836 (void) memcpy(op, anchor, iend - anchor); 837 op += iend - anchor; 838 } 839 840 /* End */ 841 return (int)(((char *)op) - dest); 842 } 843 844 static int 845 real_LZ4_compress(const char *source, char *dest, int isize, int osize) 846 { 847 void *ctx; 848 int result; 849 850 ctx = lz4_alloc(KM_SLEEP); 851 852 /* 853 * out of kernel memory, gently fall through - this will disable 854 * compression in zio_compress_data 855 */ 856 if (ctx == NULL) 857 return (0); 858 859 memset(ctx, 0, sizeof (struct refTables)); 860 861 if (isize < LZ4_64KLIMIT) 862 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 863 else 864 result = LZ4_compressCtx(ctx, source, dest, isize, osize); 865 866 lz4_free(ctx); 867 return (result); 868 } 869 870 #ifdef __FreeBSD__ 871 /* 872 * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here. 873 * Should struct refTables get resized this may need to be revisited, hence 874 * compiler-time asserts. 875 */ 876 _Static_assert(sizeof(struct refTables) <= 16384, 877 "refTables too big for malloc"); 878 _Static_assert((sizeof(struct refTables) % 4096) == 0, 879 "refTables not a multiple of page size"); 880 #else 881 #define ZFS_LZ4_USE_CACHE 882 #endif 883 884 #ifdef ZFS_LZ4_USE_CACHE 885 static kmem_cache_t *lz4_cache; 886 #endif 887 888 #ifdef ZFS_LZ4_USE_CACHE 889 void 890 lz4_init(void) 891 { 892 lz4_cache = kmem_cache_create("lz4_cache", 893 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 894 KMC_RECLAIMABLE); 895 } 896 897 void 898 lz4_fini(void) 899 { 900 if (lz4_cache) { 901 kmem_cache_destroy(lz4_cache); 902 lz4_cache = NULL; 903 } 904 } 905 906 static void * 907 lz4_alloc(int flags) 908 { 909 ASSERT(lz4_cache != NULL); 910 return (kmem_cache_alloc(lz4_cache, flags)); 911 } 912 913 static void 914 lz4_free(void *ctx) 915 { 916 kmem_cache_free(lz4_cache, ctx); 917 } 918 #else 919 void 920 lz4_init(void) 921 { 922 } 923 924 void 925 lz4_fini(void) 926 { 927 } 928 929 static void * 930 lz4_alloc(int flags) 931 { 932 return (kmem_alloc(sizeof (struct refTables), flags)); 933 } 934 935 static void 936 lz4_free(void *ctx) 937 { 938 kmem_free(ctx, sizeof (struct refTables)); 939 } 940 #endif 941