1 /* 2 * LZ4 - Fast LZ compression algorithm 3 * Header File 4 * Copyright (C) 2011-2013, Yann Collet. 5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * You can contact the author at : 31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32 * - LZ4 source repository : http://code.google.com/p/lz4/ 33 */ 34 35 #include <sys/zfs_context.h> 36 #include <sys/zio_compress.h> 37 38 static int real_LZ4_compress(const char *source, char *dest, int isize, 39 int osize); 40 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 41 int isize, int maxOutputSize); 42 static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 43 int isize, int osize); 44 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 45 int isize, int osize); 46 47 static void *lz4_alloc(int flags); 48 static void lz4_free(void *ctx); 49 50 /*ARGSUSED*/ 51 size_t 52 lz4_compress_zfs(void *s_start, void *d_start, size_t s_len, 53 size_t d_len, int n) 54 { 55 uint32_t bufsiz; 56 char *dest = d_start; 57 58 ASSERT(d_len >= sizeof (bufsiz)); 59 60 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 61 d_len - sizeof (bufsiz)); 62 63 /* Signal an error if the compression routine returned zero. */ 64 if (bufsiz == 0) 65 return (s_len); 66 67 /* 68 * The exact compressed size is needed by the decompression routine, 69 * so it is stored at the start of the buffer. Note that this may be 70 * less than the compressed block size, which is rounded up to a 71 * multiple of 1<<ashift. 72 */ 73 *(uint32_t *)dest = BE_32(bufsiz); 74 75 return (bufsiz + sizeof (bufsiz)); 76 } 77 78 /*ARGSUSED*/ 79 int 80 lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len, 81 size_t d_len, int n) 82 { 83 const char *src = s_start; 84 uint32_t bufsiz = BE_IN32(src); 85 86 /* invalid compressed buffer size encoded at start */ 87 if (bufsiz + sizeof (bufsiz) > s_len) 88 return (1); 89 90 /* 91 * Returns 0 on success (decompression function returned non-negative) 92 * and non-zero on failure (decompression function returned negative). 93 */ 94 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 95 d_start, bufsiz, d_len) < 0); 96 } 97 98 /* 99 * LZ4 API Description: 100 * 101 * Simple Functions: 102 * real_LZ4_compress() : 103 * isize : is the input size. Max supported value is ~1.9GB 104 * return : the number of bytes written in buffer dest 105 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 106 * note : destination buffer must be already allocated. 107 * destination buffer must be sized to handle worst cases 108 * situations (input data not compressible) worst case size 109 * evaluation is provided by function LZ4_compressBound(). 110 * 111 * real_LZ4_uncompress() : 112 * osize : is the output size, therefore the original size 113 * return : the number of bytes read in the source buffer. 114 * If the source stream is malformed, the function will stop 115 * decoding and return a negative result, indicating the byte 116 * position of the faulty instruction. This function never 117 * writes beyond dest + osize, and is therefore protected 118 * against malicious data packets. 119 * note : destination buffer must be already allocated 120 * note : real_LZ4_uncompress() is not used in ZFS so its code 121 * is not present here. 122 * 123 * Advanced Functions 124 * 125 * LZ4_compressBound() : 126 * Provides the maximum size that LZ4 may output in a "worst case" 127 * scenario (input data not compressible) primarily useful for memory 128 * allocation of output buffer. 129 * 130 * isize : is the input size. Max supported value is ~1.9GB 131 * return : maximum output size in a "worst case" scenario 132 * note : this function is limited by "int" range (2^31-1) 133 * 134 * LZ4_uncompress_unknownOutputSize() : 135 * isize : is the input size, therefore the compressed size 136 * maxOutputSize : is the size of the destination buffer (which must be 137 * already allocated) 138 * return : the number of bytes decoded in the destination buffer 139 * (necessarily <= maxOutputSize). If the source stream is 140 * malformed, the function will stop decoding and return a 141 * negative result, indicating the byte position of the faulty 142 * instruction. This function never writes beyond dest + 143 * maxOutputSize, and is therefore protected against malicious 144 * data packets. 145 * note : Destination buffer must be already allocated. 146 * This version is slightly slower than real_LZ4_uncompress() 147 * 148 * LZ4_compressCtx() : 149 * This function explicitly handles the CTX memory structure. 150 * 151 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 152 * by the caller (either on the stack or using kmem_cache_alloc). Passing 153 * NULL isn't valid. 154 * 155 * LZ4_compress64kCtx() : 156 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 157 * isize *Must* be <64KB, otherwise the output will be corrupted. 158 * 159 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 160 * by the caller (either on the stack or using kmem_cache_alloc). Passing 161 * NULL isn't valid. 162 */ 163 164 /* 165 * Tuning parameters 166 */ 167 168 /* 169 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 170 * Lowering this value reduces memory usage. Reduced memory usage 171 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 172 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 173 * (examples : 12 -> 16KB ; 17 -> 512KB) 174 */ 175 #define COMPRESSIONLEVEL 12 176 177 /* 178 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 179 * algorithm skip faster data segments considered "incompressible". 180 * This may decrease compression ratio dramatically, but will be 181 * faster on incompressible data. Increasing this value will make 182 * the algorithm search more before declaring a segment "incompressible". 183 * This could improve compression a bit, but will be slower on 184 * incompressible data. The default value (6) is recommended. 185 */ 186 #define NOTCOMPRESSIBLE_CONFIRMATION 6 187 188 /* 189 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 190 * performance for big endian cpu, but the resulting compressed stream 191 * will be incompatible with little-endian CPU. You can set this option 192 * to 1 in situations where data will stay within closed environment. 193 * This option is useless on Little_Endian CPU (such as x86). 194 */ 195 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 196 197 /* 198 * CPU Feature Detection 199 */ 200 201 /* 32 or 64 bits ? */ 202 #if defined(_LP64) 203 #define LZ4_ARCH64 1 204 #else 205 #define LZ4_ARCH64 0 206 #endif 207 208 /* 209 * Little Endian or Big Endian? 210 * Note: overwrite the below #define if you know your architecture endianness. 211 */ 212 #if defined(_ZFS_BIG_ENDIAN) 213 #define LZ4_BIG_ENDIAN 1 214 #else 215 /* 216 * Little Endian assumed. PDP Endian and other very rare endian format 217 * are unsupported. 218 */ 219 #undef LZ4_BIG_ENDIAN 220 #endif 221 222 /* 223 * Unaligned memory access is automatically enabled for "common" CPU, 224 * such as x86. For others CPU, the compiler will be more cautious, and 225 * insert extra code to ensure aligned access is respected. If you know 226 * your target CPU supports unaligned memory access, you may want to 227 * force this option manually to improve performance 228 */ 229 #if defined(__ARM_FEATURE_UNALIGNED) 230 #define LZ4_FORCE_UNALIGNED_ACCESS 1 231 #endif 232 233 /* 234 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the 235 * kernel 236 * Linux : we can use GCC's __builtin_ctz family of builtins in the 237 * kernel 238 */ 239 #undef LZ4_FORCE_SW_BITCOUNT 240 #if defined(__sparc) 241 #define LZ4_FORCE_SW_BITCOUNT 242 #endif 243 244 /* 245 * Compiler Options 246 */ 247 /* Disable restrict */ 248 #define restrict 249 250 /* 251 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it. 252 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 253 */ 254 #ifdef GCC_VERSION 255 #undef GCC_VERSION 256 #endif 257 258 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 259 260 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 261 #define expect(expr, value) (__builtin_expect((expr), (value))) 262 #else 263 #define expect(expr, value) (expr) 264 #endif 265 266 #ifndef likely 267 #define likely(expr) expect((expr) != 0, 1) 268 #endif 269 270 #ifndef unlikely 271 #define unlikely(expr) expect((expr) != 0, 0) 272 #endif 273 274 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 275 (((x) & 0xffu) << 8))) 276 277 /* Basic types */ 278 #define BYTE uint8_t 279 #define U16 uint16_t 280 #define U32 uint32_t 281 #define S32 int32_t 282 #define U64 uint64_t 283 284 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 285 #pragma pack(1) 286 #endif 287 288 typedef struct _U16_S { 289 U16 v; 290 } U16_S; 291 typedef struct _U32_S { 292 U32 v; 293 } U32_S; 294 typedef struct _U64_S { 295 U64 v; 296 } U64_S; 297 298 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 299 #pragma pack() 300 #endif 301 302 #define A64(x) (((U64_S *)(x))->v) 303 #define A32(x) (((U32_S *)(x))->v) 304 #define A16(x) (((U16_S *)(x))->v) 305 306 /* 307 * Constants 308 */ 309 #define MINMATCH 4 310 311 #define HASH_LOG COMPRESSIONLEVEL 312 #define HASHTABLESIZE (1 << HASH_LOG) 313 #define HASH_MASK (HASHTABLESIZE - 1) 314 315 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 316 NOTCOMPRESSIBLE_CONFIRMATION : 2) 317 318 #define COPYLENGTH 8 319 #define LASTLITERALS 5 320 #define MFLIMIT (COPYLENGTH + MINMATCH) 321 #define MINLENGTH (MFLIMIT + 1) 322 323 #define MAXD_LOG 16 324 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 325 326 #define ML_BITS 4 327 #define ML_MASK ((1U<<ML_BITS)-1) 328 #define RUN_BITS (8-ML_BITS) 329 #define RUN_MASK ((1U<<RUN_BITS)-1) 330 331 332 /* 333 * Architecture-specific macros 334 */ 335 #if LZ4_ARCH64 336 #define STEPSIZE 8 337 #define UARCH U64 338 #define AARCH A64 339 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 340 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 341 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 342 #define HTYPE U32 343 #define INITBASE(base) const BYTE* const base = ip 344 #else /* !LZ4_ARCH64 */ 345 #define STEPSIZE 4 346 #define UARCH U32 347 #define AARCH A32 348 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 349 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 350 #define LZ4_SECURECOPY LZ4_WILDCOPY 351 #define HTYPE const BYTE * 352 #define INITBASE(base) const int base = 0 353 #endif /* !LZ4_ARCH64 */ 354 355 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 356 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 357 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 358 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 359 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 360 #else 361 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 362 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 363 #endif 364 365 366 /* Local structures */ 367 struct refTables { 368 HTYPE hashTable[HASHTABLESIZE]; 369 }; 370 371 372 /* Macros */ 373 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 374 HASH_LOG)) 375 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 376 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 377 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 378 d = e; } 379 380 381 /* Private functions */ 382 #if LZ4_ARCH64 383 384 static inline int 385 LZ4_NbCommonBytes(register U64 val) 386 { 387 #if defined(LZ4_BIG_ENDIAN) 388 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 389 !defined(LZ4_FORCE_SW_BITCOUNT) 390 return (__builtin_clzll(val) >> 3); 391 #else 392 int r; 393 if (!(val >> 32)) { 394 r = 4; 395 } else { 396 r = 0; 397 val >>= 32; 398 } 399 if (!(val >> 16)) { 400 r += 2; 401 val >>= 8; 402 } else { 403 val >>= 24; 404 } 405 r += (!val); 406 return (r); 407 #endif 408 #else 409 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 410 !defined(LZ4_FORCE_SW_BITCOUNT) 411 return (__builtin_ctzll(val) >> 3); 412 #else 413 static const int DeBruijnBytePos[64] = 414 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 415 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 416 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 417 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 418 }; 419 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 420 58]; 421 #endif 422 #endif 423 } 424 425 #else 426 427 static inline int 428 LZ4_NbCommonBytes(register U32 val) 429 { 430 #if defined(LZ4_BIG_ENDIAN) 431 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 432 !defined(LZ4_FORCE_SW_BITCOUNT) 433 return (__builtin_clz(val) >> 3); 434 #else 435 int r; 436 if (!(val >> 16)) { 437 r = 2; 438 val >>= 8; 439 } else { 440 r = 0; 441 val >>= 24; 442 } 443 r += (!val); 444 return (r); 445 #endif 446 #else 447 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ 448 !defined(LZ4_FORCE_SW_BITCOUNT) 449 return (__builtin_ctz(val) >> 3); 450 #else 451 static const int DeBruijnBytePos[32] = { 452 0, 0, 3, 0, 3, 1, 3, 0, 453 3, 2, 2, 1, 3, 2, 0, 1, 454 3, 3, 1, 2, 2, 2, 2, 0, 455 3, 1, 2, 0, 1, 0, 1, 1 456 }; 457 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 458 27]; 459 #endif 460 #endif 461 } 462 463 #endif 464 465 /* Compression functions */ 466 467 /*ARGSUSED*/ 468 static int 469 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 470 int osize) 471 { 472 struct refTables *srt = (struct refTables *)ctx; 473 HTYPE *HashTable = (HTYPE *) (srt->hashTable); 474 475 const BYTE *ip = (BYTE *) source; 476 INITBASE(base); 477 const BYTE *anchor = ip; 478 const BYTE *const iend = ip + isize; 479 const BYTE *const oend = (BYTE *) dest + osize; 480 const BYTE *const mflimit = iend - MFLIMIT; 481 #define matchlimit (iend - LASTLITERALS) 482 483 BYTE *op = (BYTE *) dest; 484 485 int len, length; 486 const int skipStrength = SKIPSTRENGTH; 487 U32 forwardH; 488 489 490 /* Init */ 491 if (isize < MINLENGTH) 492 goto _last_literals; 493 494 /* First Byte */ 495 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 496 ip++; 497 forwardH = LZ4_HASH_VALUE(ip); 498 499 /* Main Loop */ 500 for (;;) { 501 int findMatchAttempts = (1U << skipStrength) + 3; 502 const BYTE *forwardIp = ip; 503 const BYTE *ref; 504 BYTE *token; 505 506 /* Find a match */ 507 do { 508 U32 h = forwardH; 509 int step = findMatchAttempts++ >> skipStrength; 510 ip = forwardIp; 511 forwardIp = ip + step; 512 513 if (unlikely(forwardIp > mflimit)) { 514 goto _last_literals; 515 } 516 517 forwardH = LZ4_HASH_VALUE(forwardIp); 518 ref = base + HashTable[h]; 519 HashTable[h] = ip - base; 520 521 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 522 523 /* Catch up */ 524 while ((ip > anchor) && (ref > (BYTE *) source) && 525 unlikely(ip[-1] == ref[-1])) { 526 ip--; 527 ref--; 528 } 529 530 /* Encode Literal length */ 531 length = ip - anchor; 532 token = op++; 533 534 /* Check output limit */ 535 if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 536 (length >> 8) > oend)) 537 return (0); 538 539 if (length >= (int)RUN_MASK) { 540 *token = (RUN_MASK << ML_BITS); 541 len = length - RUN_MASK; 542 for (; len > 254; len -= 255) 543 *op++ = 255; 544 *op++ = (BYTE)len; 545 } else 546 *token = (length << ML_BITS); 547 548 /* Copy Literals */ 549 LZ4_BLINDCOPY(anchor, op, length); 550 551 _next_match: 552 /* Encode Offset */ 553 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 554 555 /* Start Counting */ 556 ip += MINMATCH; 557 ref += MINMATCH; /* MinMatch verified */ 558 anchor = ip; 559 while (likely(ip < matchlimit - (STEPSIZE - 1))) { 560 UARCH diff = AARCH(ref) ^ AARCH(ip); 561 if (!diff) { 562 ip += STEPSIZE; 563 ref += STEPSIZE; 564 continue; 565 } 566 ip += LZ4_NbCommonBytes(diff); 567 goto _endCount; 568 } 569 #if LZ4_ARCH64 570 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 571 ip += 4; 572 ref += 4; 573 } 574 #endif 575 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 576 ip += 2; 577 ref += 2; 578 } 579 if ((ip < matchlimit) && (*ref == *ip)) 580 ip++; 581 _endCount: 582 583 /* Encode MatchLength */ 584 len = (ip - anchor); 585 /* Check output limit */ 586 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 587 return (0); 588 if (len >= (int)ML_MASK) { 589 *token += ML_MASK; 590 len -= ML_MASK; 591 for (; len > 509; len -= 510) { 592 *op++ = 255; 593 *op++ = 255; 594 } 595 if (len > 254) { 596 len -= 255; 597 *op++ = 255; 598 } 599 *op++ = (BYTE)len; 600 } else 601 *token += len; 602 603 /* Test end of chunk */ 604 if (ip > mflimit) { 605 anchor = ip; 606 break; 607 } 608 /* Fill table */ 609 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 610 611 /* Test next position */ 612 ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 613 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 614 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 615 token = op++; 616 *token = 0; 617 goto _next_match; 618 } 619 /* Prepare next loop */ 620 anchor = ip++; 621 forwardH = LZ4_HASH_VALUE(ip); 622 } 623 624 _last_literals: 625 /* Encode Last Literals */ 626 { 627 int lastRun = iend - anchor; 628 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 629 oend) 630 return (0); 631 if (lastRun >= (int)RUN_MASK) { 632 *op++ = (RUN_MASK << ML_BITS); 633 lastRun -= RUN_MASK; 634 for (; lastRun > 254; lastRun -= 255) { 635 *op++ = 255; 636 } 637 *op++ = (BYTE)lastRun; 638 } else 639 *op++ = (lastRun << ML_BITS); 640 (void) memcpy(op, anchor, iend - anchor); 641 op += iend - anchor; 642 } 643 644 /* End */ 645 return (int)(((char *)op) - dest); 646 } 647 648 649 650 /* Note : this function is valid only if isize < LZ4_64KLIMIT */ 651 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 652 #define HASHLOG64K (HASH_LOG + 1) 653 #define HASH64KTABLESIZE (1U << HASHLOG64K) 654 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 655 HASHLOG64K)) 656 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 657 658 /*ARGSUSED*/ 659 static int 660 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 661 int osize) 662 { 663 struct refTables *srt = (struct refTables *)ctx; 664 U16 *HashTable = (U16 *) (srt->hashTable); 665 666 const BYTE *ip = (BYTE *) source; 667 const BYTE *anchor = ip; 668 const BYTE *const base = ip; 669 const BYTE *const iend = ip + isize; 670 const BYTE *const oend = (BYTE *) dest + osize; 671 const BYTE *const mflimit = iend - MFLIMIT; 672 #define matchlimit (iend - LASTLITERALS) 673 674 BYTE *op = (BYTE *) dest; 675 676 int len, length; 677 const int skipStrength = SKIPSTRENGTH; 678 U32 forwardH; 679 680 /* Init */ 681 if (isize < MINLENGTH) 682 goto _last_literals; 683 684 /* First Byte */ 685 ip++; 686 forwardH = LZ4_HASH64K_VALUE(ip); 687 688 /* Main Loop */ 689 for (;;) { 690 int findMatchAttempts = (1U << skipStrength) + 3; 691 const BYTE *forwardIp = ip; 692 const BYTE *ref; 693 BYTE *token; 694 695 /* Find a match */ 696 do { 697 U32 h = forwardH; 698 int step = findMatchAttempts++ >> skipStrength; 699 ip = forwardIp; 700 forwardIp = ip + step; 701 702 if (forwardIp > mflimit) { 703 goto _last_literals; 704 } 705 706 forwardH = LZ4_HASH64K_VALUE(forwardIp); 707 ref = base + HashTable[h]; 708 HashTable[h] = ip - base; 709 710 } while (A32(ref) != A32(ip)); 711 712 /* Catch up */ 713 while ((ip > anchor) && (ref > (BYTE *) source) && 714 (ip[-1] == ref[-1])) { 715 ip--; 716 ref--; 717 } 718 719 /* Encode Literal length */ 720 length = ip - anchor; 721 token = op++; 722 723 /* Check output limit */ 724 if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 725 (length >> 8) > oend)) 726 return (0); 727 728 if (length >= (int)RUN_MASK) { 729 *token = (RUN_MASK << ML_BITS); 730 len = length - RUN_MASK; 731 for (; len > 254; len -= 255) 732 *op++ = 255; 733 *op++ = (BYTE)len; 734 } else 735 *token = (length << ML_BITS); 736 737 /* Copy Literals */ 738 LZ4_BLINDCOPY(anchor, op, length); 739 740 _next_match: 741 /* Encode Offset */ 742 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 743 744 /* Start Counting */ 745 ip += MINMATCH; 746 ref += MINMATCH; /* MinMatch verified */ 747 anchor = ip; 748 while (ip < matchlimit - (STEPSIZE - 1)) { 749 UARCH diff = AARCH(ref) ^ AARCH(ip); 750 if (!diff) { 751 ip += STEPSIZE; 752 ref += STEPSIZE; 753 continue; 754 } 755 ip += LZ4_NbCommonBytes(diff); 756 goto _endCount; 757 } 758 #if LZ4_ARCH64 759 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 760 ip += 4; 761 ref += 4; 762 } 763 #endif 764 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 765 ip += 2; 766 ref += 2; 767 } 768 if ((ip < matchlimit) && (*ref == *ip)) 769 ip++; 770 _endCount: 771 772 /* Encode MatchLength */ 773 len = (ip - anchor); 774 /* Check output limit */ 775 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 776 return (0); 777 if (len >= (int)ML_MASK) { 778 *token += ML_MASK; 779 len -= ML_MASK; 780 for (; len > 509; len -= 510) { 781 *op++ = 255; 782 *op++ = 255; 783 } 784 if (len > 254) { 785 len -= 255; 786 *op++ = 255; 787 } 788 *op++ = (BYTE)len; 789 } else 790 *token += len; 791 792 /* Test end of chunk */ 793 if (ip > mflimit) { 794 anchor = ip; 795 break; 796 } 797 /* Fill table */ 798 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 799 800 /* Test next position */ 801 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 802 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 803 if (A32(ref) == A32(ip)) { 804 token = op++; 805 *token = 0; 806 goto _next_match; 807 } 808 /* Prepare next loop */ 809 anchor = ip++; 810 forwardH = LZ4_HASH64K_VALUE(ip); 811 } 812 813 _last_literals: 814 /* Encode Last Literals */ 815 { 816 int lastRun = iend - anchor; 817 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 818 oend) 819 return (0); 820 if (lastRun >= (int)RUN_MASK) { 821 *op++ = (RUN_MASK << ML_BITS); 822 lastRun -= RUN_MASK; 823 for (; lastRun > 254; lastRun -= 255) 824 *op++ = 255; 825 *op++ = (BYTE)lastRun; 826 } else 827 *op++ = (lastRun << ML_BITS); 828 (void) memcpy(op, anchor, iend - anchor); 829 op += iend - anchor; 830 } 831 832 /* End */ 833 return (int)(((char *)op) - dest); 834 } 835 836 static int 837 real_LZ4_compress(const char *source, char *dest, int isize, int osize) 838 { 839 void *ctx; 840 int result; 841 842 ctx = lz4_alloc(KM_SLEEP); 843 844 /* 845 * out of kernel memory, gently fall through - this will disable 846 * compression in zio_compress_data 847 */ 848 if (ctx == NULL) 849 return (0); 850 851 memset(ctx, 0, sizeof (struct refTables)); 852 853 if (isize < LZ4_64KLIMIT) 854 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 855 else 856 result = LZ4_compressCtx(ctx, source, dest, isize, osize); 857 858 lz4_free(ctx); 859 return (result); 860 } 861 862 /* Decompression functions */ 863 864 /* 865 * Note: The decoding functions real_LZ4_uncompress() and 866 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow" 867 * attack type. They will never write nor read outside of the provided 868 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that 869 * it will never read outside of the input buffer. A corrupted input 870 * will produce an error result, a negative int, indicating the position 871 * of the error within input stream. 872 * 873 * Note[2]: real_LZ4_uncompress(), referred to above, is not used in ZFS so 874 * its code is not present here. 875 */ 876 877 static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 878 #if LZ4_ARCH64 879 static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; 880 #endif 881 882 static int 883 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, 884 int maxOutputSize) 885 { 886 /* Local Variables */ 887 const BYTE *restrict ip = (const BYTE *) source; 888 const BYTE *const iend = ip + isize; 889 const BYTE *ref; 890 891 BYTE *op = (BYTE *) dest; 892 BYTE *const oend = op + maxOutputSize; 893 BYTE *cpy; 894 895 /* Main Loop */ 896 while (ip < iend) { 897 unsigned token; 898 size_t length; 899 900 /* get runlength */ 901 token = *ip++; 902 if ((length = (token >> ML_BITS)) == RUN_MASK) { 903 int s = 255; 904 while ((ip < iend) && (s == 255)) { 905 s = *ip++; 906 if (unlikely(length > (size_t)(length + s))) 907 goto _output_error; 908 length += s; 909 } 910 } 911 /* copy literals */ 912 cpy = op + length; 913 /* CORNER-CASE: cpy might overflow. */ 914 if (cpy < op) 915 goto _output_error; /* cpy was overflowed, bail! */ 916 if ((cpy > oend - COPYLENGTH) || 917 (ip + length > iend - COPYLENGTH)) { 918 if (cpy > oend) 919 /* Error: writes beyond output buffer */ 920 goto _output_error; 921 if (ip + length != iend) 922 /* 923 * Error: LZ4 format requires to consume all 924 * input at this stage 925 */ 926 goto _output_error; 927 (void) memcpy(op, ip, length); 928 op += length; 929 /* Necessarily EOF, due to parsing restrictions */ 930 break; 931 } 932 LZ4_WILDCOPY(ip, op, cpy); 933 ip -= (op - cpy); 934 op = cpy; 935 936 /* get offset */ 937 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 938 ip += 2; 939 if (ref < (BYTE * const) dest) 940 /* 941 * Error: offset creates reference outside of 942 * destination buffer 943 */ 944 goto _output_error; 945 946 /* get matchlength */ 947 if ((length = (token & ML_MASK)) == ML_MASK) { 948 while (ip < iend) { 949 int s = *ip++; 950 if (unlikely(length > (size_t)(length + s))) 951 goto _output_error; 952 length += s; 953 if (s == 255) 954 continue; 955 break; 956 } 957 } 958 /* copy repeated sequence */ 959 if (unlikely(op - ref < STEPSIZE)) { 960 #if LZ4_ARCH64 961 int dec64 = dec64table[op - ref]; 962 #else 963 const int dec64 = 0; 964 #endif 965 op[0] = ref[0]; 966 op[1] = ref[1]; 967 op[2] = ref[2]; 968 op[3] = ref[3]; 969 op += 4; 970 ref += 4; 971 ref -= dec32table[op - ref]; 972 A32(op) = A32(ref); 973 op += STEPSIZE - 4; 974 ref -= dec64; 975 } else { 976 LZ4_COPYSTEP(ref, op); 977 } 978 cpy = op + length - (STEPSIZE - 4); 979 if (cpy > oend - COPYLENGTH) { 980 if (cpy > oend) 981 /* 982 * Error: request to write outside of 983 * destination buffer 984 */ 985 goto _output_error; 986 #if LZ4_ARCH64 987 if ((ref + COPYLENGTH) > oend) 988 #else 989 if ((ref + COPYLENGTH) > oend || 990 (op + COPYLENGTH) > oend) 991 #endif 992 goto _output_error; 993 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 994 while (op < cpy) 995 *op++ = *ref++; 996 op = cpy; 997 if (op == oend) 998 /* 999 * Check EOF (should never happen, since 1000 * last 5 bytes are supposed to be literals) 1001 */ 1002 goto _output_error; 1003 continue; 1004 } 1005 LZ4_SECURECOPY(ref, op, cpy); 1006 op = cpy; /* correction */ 1007 } 1008 1009 /* end of decoding */ 1010 return (int)(((char *)op) - dest); 1011 1012 /* write overflow error detected */ 1013 _output_error: 1014 return (-1); 1015 } 1016 1017 #ifdef __FreeBSD__ 1018 /* 1019 * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here. 1020 * Should struct refTables get resized this may need to be revisited, hence 1021 * compiler-time asserts. 1022 */ 1023 _Static_assert(sizeof(struct refTables) <= 16384, 1024 "refTables too big for malloc"); 1025 _Static_assert((sizeof(struct refTables) % 4096) == 0, 1026 "refTables not a multiple of page size"); 1027 #else 1028 #define ZFS_LZ4_USE_CACHE 1029 #endif 1030 1031 #ifdef ZFS_LZ4_USE_CACHE 1032 static kmem_cache_t *lz4_cache; 1033 1034 void 1035 lz4_init(void) 1036 { 1037 lz4_cache = kmem_cache_create("lz4_cache", 1038 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0); 1039 } 1040 1041 void 1042 lz4_fini(void) 1043 { 1044 if (lz4_cache) { 1045 kmem_cache_destroy(lz4_cache); 1046 lz4_cache = NULL; 1047 } 1048 } 1049 1050 static void * 1051 lz4_alloc(int flags) 1052 { 1053 ASSERT(lz4_cache != NULL); 1054 return (kmem_cache_alloc(lz4_cache, flags)); 1055 } 1056 1057 static void 1058 lz4_free(void *ctx) 1059 { 1060 kmem_cache_free(lz4_cache, ctx); 1061 } 1062 #else 1063 void 1064 lz4_init(void) 1065 { 1066 } 1067 1068 void 1069 lz4_fini(void) 1070 { 1071 } 1072 1073 static void * 1074 lz4_alloc(int flags) 1075 { 1076 return (kmem_alloc(sizeof (struct refTables), flags)); 1077 } 1078 1079 static void 1080 lz4_free(void *ctx) 1081 { 1082 kmem_free(ctx, sizeof (struct refTables)); 1083 } 1084 #endif 1085