1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35 /*
36 * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012
37 */
38
39 #include <sys/zfs_context.h>
40 #include <sys/zio_compress.h>
41
42 static int real_LZ4_compress(const char *source, char *dest, int isize,
43 int osize);
44 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
45 int isize, int osize);
46 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
47 int isize, int osize);
48
49 /* See lz4.c */
50 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
51 int isize, int maxOutputSize);
52
53 static void *lz4_alloc(int flags);
54 static void lz4_free(void *ctx);
55
56 static size_t
zfs_lz4_compress_buf(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)57 zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
58 size_t d_len, int n)
59 {
60 (void) n;
61 uint32_t bufsiz;
62 char *dest = d_start;
63
64 ASSERT(d_len >= sizeof (bufsiz));
65
66 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
67 d_len - sizeof (bufsiz));
68
69 /* Signal an error if the compression routine returned zero. */
70 if (bufsiz == 0)
71 return (s_len);
72
73 /*
74 * The exact compressed size is needed by the decompression routine,
75 * so it is stored at the start of the buffer. Note that this may be
76 * less than the compressed block size, which is rounded up to a
77 * multiple of 1<<ashift.
78 */
79 *(uint32_t *)dest = BE_32(bufsiz);
80
81 return (bufsiz + sizeof (bufsiz));
82 }
83
84 static int
zfs_lz4_decompress_buf(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)85 zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
86 size_t d_len, int n)
87 {
88 (void) n;
89 const char *src = s_start;
90 uint32_t bufsiz = BE_IN32(src);
91
92 /* invalid compressed buffer size encoded at start */
93 if (bufsiz + sizeof (bufsiz) > s_len)
94 return (1);
95
96 /*
97 * Returns 0 on success (decompression function returned non-negative)
98 * and non-zero on failure (decompression function returned negative).
99 */
100 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
101 d_start, bufsiz, d_len) < 0);
102 }
103
104 ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)
105 ZFS_DECOMPRESS_WRAP_DECL(zfs_lz4_decompress)
106
107 /*
108 * LZ4 API Description:
109 *
110 * Simple Functions:
111 * real_LZ4_compress() :
112 * isize : is the input size. Max supported value is ~1.9GB
113 * return : the number of bytes written in buffer dest
114 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
115 * note : destination buffer must be already allocated.
116 * destination buffer must be sized to handle worst cases
117 * situations (input data not compressible) worst case size
118 * evaluation is provided by function LZ4_compressBound().
119 *
120 * real_LZ4_uncompress() :
121 * osize : is the output size, therefore the original size
122 * return : the number of bytes read in the source buffer.
123 * If the source stream is malformed, the function will stop
124 * decoding and return a negative result, indicating the byte
125 * position of the faulty instruction. This function never
126 * writes beyond dest + osize, and is therefore protected
127 * against malicious data packets.
128 * note : destination buffer must be already allocated
129 * note : real_LZ4_uncompress() is not used in ZFS so its code
130 * is not present here.
131 *
132 * Advanced Functions
133 *
134 * LZ4_compressBound() :
135 * Provides the maximum size that LZ4 may output in a "worst case"
136 * scenario (input data not compressible) primarily useful for memory
137 * allocation of output buffer.
138 *
139 * isize : is the input size. Max supported value is ~1.9GB
140 * return : maximum output size in a "worst case" scenario
141 * note : this function is limited by "int" range (2^31-1)
142 *
143 * LZ4_uncompress_unknownOutputSize() :
144 * isize : is the input size, therefore the compressed size
145 * maxOutputSize : is the size of the destination buffer (which must be
146 * already allocated)
147 * return : the number of bytes decoded in the destination buffer
148 * (necessarily <= maxOutputSize). If the source stream is
149 * malformed, the function will stop decoding and return a
150 * negative result, indicating the byte position of the faulty
151 * instruction. This function never writes beyond dest +
152 * maxOutputSize, and is therefore protected against malicious
153 * data packets.
154 * note : Destination buffer must be already allocated.
155 * This version is slightly slower than real_LZ4_uncompress()
156 *
157 * LZ4_compressCtx() :
158 * This function explicitly handles the CTX memory structure.
159 *
160 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
161 * by the caller (either on the stack or using kmem_cache_alloc). Passing
162 * NULL isn't valid.
163 *
164 * LZ4_compress64kCtx() :
165 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
166 * isize *Must* be <64KB, otherwise the output will be corrupted.
167 *
168 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
169 * by the caller (either on the stack or using kmem_cache_alloc). Passing
170 * NULL isn't valid.
171 */
172
173 /*
174 * Tuning parameters
175 */
176
177 /*
178 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
179 * Lowering this value reduces memory usage. Reduced memory usage
180 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
181 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
182 * (examples : 12 -> 16KB ; 17 -> 512KB)
183 */
184 #define COMPRESSIONLEVEL 12
185
186 /*
187 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
188 * algorithm skip faster data segments considered "incompressible".
189 * This may decrease compression ratio dramatically, but will be
190 * faster on incompressible data. Increasing this value will make
191 * the algorithm search more before declaring a segment "incompressible".
192 * This could improve compression a bit, but will be slower on
193 * incompressible data. The default value (6) is recommended.
194 */
195 #define NOTCOMPRESSIBLE_CONFIRMATION 6
196
197 /*
198 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
199 * performance for big endian cpu, but the resulting compressed stream
200 * will be incompatible with little-endian CPU. You can set this option
201 * to 1 in situations where data will stay within closed environment.
202 * This option is useless on Little_Endian CPU (such as x86).
203 */
204 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
205
206 /*
207 * CPU Feature Detection
208 */
209
210 /* 32 or 64 bits ? */
211 #if defined(_LP64)
212 #define LZ4_ARCH64 1
213 #else
214 #define LZ4_ARCH64 0
215 #endif
216
217 /*
218 * Little Endian or Big Endian?
219 * Note: overwrite the below #define if you know your architecture endianness.
220 */
221 #if defined(_ZFS_BIG_ENDIAN)
222 #define LZ4_BIG_ENDIAN 1
223 #else
224 /*
225 * Little Endian assumed. PDP Endian and other very rare endian format
226 * are unsupported.
227 */
228 #undef LZ4_BIG_ENDIAN
229 #endif
230
231 /*
232 * Unaligned memory access is automatically enabled for "common" CPU,
233 * such as x86. For others CPU, the compiler will be more cautious, and
234 * insert extra code to ensure aligned access is respected. If you know
235 * your target CPU supports unaligned memory access, you may want to
236 * force this option manually to improve performance
237 */
238 #if defined(__ARM_FEATURE_UNALIGNED)
239 #define LZ4_FORCE_UNALIGNED_ACCESS 1
240 #endif
241
242 /*
243 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
244 * kernel
245 * Linux : we can use GCC's __builtin_ctz family of builtins in the
246 * kernel
247 */
248 #undef LZ4_FORCE_SW_BITCOUNT
249 #if defined(__sparc)
250 #define LZ4_FORCE_SW_BITCOUNT
251 #endif
252
253 /*
254 * Compiler Options
255 */
256 /* Disable restrict */
257 #define restrict
258
259 /*
260 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
261 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
262 */
263 #ifdef GCC_VERSION
264 #undef GCC_VERSION
265 #endif
266
267 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
268
269 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
270 #define expect(expr, value) (__builtin_expect((expr), (value)))
271 #else
272 #define expect(expr, value) (expr)
273 #endif
274
275 #ifndef likely
276 #define likely(expr) expect((expr) != 0, 1)
277 #endif
278
279 #ifndef unlikely
280 #define unlikely(expr) expect((expr) != 0, 0)
281 #endif
282
283 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
284 (((x) & 0xffu) << 8)))
285
286 /* Basic types */
287 #define BYTE uint8_t
288 #define U16 uint16_t
289 #define U32 uint32_t
290 #define S32 int32_t
291 #define U64 uint64_t
292
293 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
294 #pragma pack(1)
295 #endif
296
297 typedef struct _U16_S {
298 U16 v;
299 } U16_S;
300 typedef struct _U32_S {
301 U32 v;
302 } U32_S;
303 typedef struct _U64_S {
304 U64 v;
305 } U64_S;
306
307 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
308 #pragma pack()
309 #endif
310
311 #define A64(x) (((U64_S *)(x))->v)
312 #define A32(x) (((U32_S *)(x))->v)
313 #define A16(x) (((U16_S *)(x))->v)
314
315 /*
316 * Constants
317 */
318 #define MINMATCH 4
319
320 #define HASH_LOG COMPRESSIONLEVEL
321 #define HASHTABLESIZE (1 << HASH_LOG)
322 #define HASH_MASK (HASHTABLESIZE - 1)
323
324 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
325 NOTCOMPRESSIBLE_CONFIRMATION : 2)
326
327 #define COPYLENGTH 8
328 #define LASTLITERALS 5
329 #define MFLIMIT (COPYLENGTH + MINMATCH)
330 #define MINLENGTH (MFLIMIT + 1)
331
332 #define MAXD_LOG 16
333 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
334
335 #define ML_BITS 4
336 #define ML_MASK ((1U<<ML_BITS)-1)
337 #define RUN_BITS (8-ML_BITS)
338 #define RUN_MASK ((1U<<RUN_BITS)-1)
339
340
341 /*
342 * Architecture-specific macros
343 */
344 #if LZ4_ARCH64
345 #define STEPSIZE 8
346 #define UARCH U64
347 #define AARCH A64
348 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
349 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
350 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
351 #define HTYPE U32
352 #define INITBASE(base) const BYTE* const base = ip
353 #else /* !LZ4_ARCH64 */
354 #define STEPSIZE 4
355 #define UARCH U32
356 #define AARCH A32
357 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
358 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
359 #define LZ4_SECURECOPY LZ4_WILDCOPY
360 #define HTYPE const BYTE *
361 #define INITBASE(base) const int base = 0
362 #endif /* !LZ4_ARCH64 */
363
364 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
365 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
366 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
367 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
368 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
369 #else
370 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
371 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
372 #endif
373
374
375 /* Local structures */
376 struct refTables {
377 HTYPE hashTable[HASHTABLESIZE];
378 };
379
380
381 /* Macros */
382 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
383 HASH_LOG))
384 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
385 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
386 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
387 d = e; }
388
389
390 /* Private functions */
391 #if LZ4_ARCH64
392
393 static inline int
LZ4_NbCommonBytes(register U64 val)394 LZ4_NbCommonBytes(register U64 val)
395 {
396 #if defined(LZ4_BIG_ENDIAN)
397 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
398 !defined(LZ4_FORCE_SW_BITCOUNT)
399 return (__builtin_clzll(val) >> 3);
400 #else
401 int r;
402 if (!(val >> 32)) {
403 r = 4;
404 } else {
405 r = 0;
406 val >>= 32;
407 }
408 if (!(val >> 16)) {
409 r += 2;
410 val >>= 8;
411 } else {
412 val >>= 24;
413 }
414 r += (!val);
415 return (r);
416 #endif
417 #else
418 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
419 !defined(LZ4_FORCE_SW_BITCOUNT)
420 return (__builtin_ctzll(val) >> 3);
421 #else
422 static const int DeBruijnBytePos[64] =
423 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
424 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
425 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
426 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
427 };
428 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
429 58];
430 #endif
431 #endif
432 }
433
434 #else
435
436 static inline int
LZ4_NbCommonBytes(register U32 val)437 LZ4_NbCommonBytes(register U32 val)
438 {
439 #if defined(LZ4_BIG_ENDIAN)
440 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
441 !defined(LZ4_FORCE_SW_BITCOUNT)
442 return (__builtin_clz(val) >> 3);
443 #else
444 int r;
445 if (!(val >> 16)) {
446 r = 2;
447 val >>= 8;
448 } else {
449 r = 0;
450 val >>= 24;
451 }
452 r += (!val);
453 return (r);
454 #endif
455 #else
456 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
457 !defined(LZ4_FORCE_SW_BITCOUNT)
458 return (__builtin_ctz(val) >> 3);
459 #else
460 static const int DeBruijnBytePos[32] = {
461 0, 0, 3, 0, 3, 1, 3, 0,
462 3, 2, 2, 1, 3, 2, 0, 1,
463 3, 3, 1, 2, 2, 2, 2, 0,
464 3, 1, 2, 0, 1, 0, 1, 1
465 };
466 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
467 27];
468 #endif
469 #endif
470 }
471
472 #endif
473
474 /* Compression functions */
475
476 static int
LZ4_compressCtx(void * ctx,const char * source,char * dest,int isize,int osize)477 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
478 int osize)
479 {
480 struct refTables *srt = (struct refTables *)ctx;
481 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
482
483 const BYTE *ip = (BYTE *) source;
484 INITBASE(base);
485 const BYTE *anchor = ip;
486 const BYTE *const iend = ip + isize;
487 const BYTE *const oend = (BYTE *) dest + osize;
488 const BYTE *const mflimit = iend - MFLIMIT;
489 #define matchlimit (iend - LASTLITERALS)
490
491 BYTE *op = (BYTE *) dest;
492
493 int len, length;
494 const int skipStrength = SKIPSTRENGTH;
495 U32 forwardH;
496
497
498 /* Init */
499 if (isize < MINLENGTH)
500 goto _last_literals;
501
502 /* First Byte */
503 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
504 ip++;
505 forwardH = LZ4_HASH_VALUE(ip);
506
507 /* Main Loop */
508 for (;;) {
509 int findMatchAttempts = (1U << skipStrength) + 3;
510 const BYTE *forwardIp = ip;
511 const BYTE *ref;
512 BYTE *token;
513
514 /* Find a match */
515 do {
516 U32 h = forwardH;
517 int step = findMatchAttempts++ >> skipStrength;
518 ip = forwardIp;
519 forwardIp = ip + step;
520
521 if (unlikely(forwardIp > mflimit)) {
522 goto _last_literals;
523 }
524
525 forwardH = LZ4_HASH_VALUE(forwardIp);
526 ref = base + HashTable[h];
527 HashTable[h] = ip - base;
528
529 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
530
531 /* Catch up */
532 while ((ip > anchor) && (ref > (BYTE *) source) &&
533 unlikely(ip[-1] == ref[-1])) {
534 ip--;
535 ref--;
536 }
537
538 /* Encode Literal length */
539 length = ip - anchor;
540 token = op++;
541
542 /* Check output limit */
543 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
544 (length >> 8) > oend))
545 return (0);
546
547 if (length >= (int)RUN_MASK) {
548 *token = (RUN_MASK << ML_BITS);
549 len = length - RUN_MASK;
550 for (; len > 254; len -= 255)
551 *op++ = 255;
552 *op++ = (BYTE)len;
553 } else
554 *token = (length << ML_BITS);
555
556 /* Copy Literals */
557 LZ4_BLINDCOPY(anchor, op, length);
558
559 _next_match:
560 /* Encode Offset */
561 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
562
563 /* Start Counting */
564 ip += MINMATCH;
565 ref += MINMATCH; /* MinMatch verified */
566 anchor = ip;
567 while (likely(ip < matchlimit - (STEPSIZE - 1))) {
568 UARCH diff = AARCH(ref) ^ AARCH(ip);
569 if (!diff) {
570 ip += STEPSIZE;
571 ref += STEPSIZE;
572 continue;
573 }
574 ip += LZ4_NbCommonBytes(diff);
575 goto _endCount;
576 }
577 #if LZ4_ARCH64
578 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
579 ip += 4;
580 ref += 4;
581 }
582 #endif
583 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
584 ip += 2;
585 ref += 2;
586 }
587 if ((ip < matchlimit) && (*ref == *ip))
588 ip++;
589 _endCount:
590
591 /* Encode MatchLength */
592 len = (ip - anchor);
593 /* Check output limit */
594 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
595 return (0);
596 if (len >= (int)ML_MASK) {
597 *token += ML_MASK;
598 len -= ML_MASK;
599 for (; len > 509; len -= 510) {
600 *op++ = 255;
601 *op++ = 255;
602 }
603 if (len > 254) {
604 len -= 255;
605 *op++ = 255;
606 }
607 *op++ = (BYTE)len;
608 } else
609 *token += len;
610
611 /* Test end of chunk */
612 if (ip > mflimit) {
613 anchor = ip;
614 break;
615 }
616 /* Fill table */
617 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
618
619 /* Test next position */
620 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
621 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
622 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
623 token = op++;
624 *token = 0;
625 goto _next_match;
626 }
627 /* Prepare next loop */
628 anchor = ip++;
629 forwardH = LZ4_HASH_VALUE(ip);
630 }
631
632 _last_literals:
633 /* Encode Last Literals */
634 {
635 int lastRun = iend - anchor;
636 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
637 oend)
638 return (0);
639 if (lastRun >= (int)RUN_MASK) {
640 *op++ = (RUN_MASK << ML_BITS);
641 lastRun -= RUN_MASK;
642 for (; lastRun > 254; lastRun -= 255) {
643 *op++ = 255;
644 }
645 *op++ = (BYTE)lastRun;
646 } else
647 *op++ = (lastRun << ML_BITS);
648 (void) memcpy(op, anchor, iend - anchor);
649 op += iend - anchor;
650 }
651
652 /* End */
653 return (int)(((char *)op) - dest);
654 }
655
656
657
658 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
659 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
660 #define HASHLOG64K (HASH_LOG + 1)
661 #define HASH64KTABLESIZE (1U << HASHLOG64K)
662 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
663 HASHLOG64K))
664 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
665
666 static int
LZ4_compress64kCtx(void * ctx,const char * source,char * dest,int isize,int osize)667 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
668 int osize)
669 {
670 struct refTables *srt = (struct refTables *)ctx;
671 U16 *HashTable = (U16 *) (srt->hashTable);
672
673 const BYTE *ip = (BYTE *) source;
674 const BYTE *anchor = ip;
675 const BYTE *const base = ip;
676 const BYTE *const iend = ip + isize;
677 const BYTE *const oend = (BYTE *) dest + osize;
678 const BYTE *const mflimit = iend - MFLIMIT;
679 #define matchlimit (iend - LASTLITERALS)
680
681 BYTE *op = (BYTE *) dest;
682
683 int len, length;
684 const int skipStrength = SKIPSTRENGTH;
685 U32 forwardH;
686
687 /* Init */
688 if (isize < MINLENGTH)
689 goto _last_literals;
690
691 /* First Byte */
692 ip++;
693 forwardH = LZ4_HASH64K_VALUE(ip);
694
695 /* Main Loop */
696 for (;;) {
697 int findMatchAttempts = (1U << skipStrength) + 3;
698 const BYTE *forwardIp = ip;
699 const BYTE *ref;
700 BYTE *token;
701
702 /* Find a match */
703 do {
704 U32 h = forwardH;
705 int step = findMatchAttempts++ >> skipStrength;
706 ip = forwardIp;
707 forwardIp = ip + step;
708
709 if (forwardIp > mflimit) {
710 goto _last_literals;
711 }
712
713 forwardH = LZ4_HASH64K_VALUE(forwardIp);
714 ref = base + HashTable[h];
715 HashTable[h] = ip - base;
716
717 } while (A32(ref) != A32(ip));
718
719 /* Catch up */
720 while ((ip > anchor) && (ref > (BYTE *) source) &&
721 (ip[-1] == ref[-1])) {
722 ip--;
723 ref--;
724 }
725
726 /* Encode Literal length */
727 length = ip - anchor;
728 token = op++;
729
730 /* Check output limit */
731 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
732 (length >> 8) > oend))
733 return (0);
734
735 if (length >= (int)RUN_MASK) {
736 *token = (RUN_MASK << ML_BITS);
737 len = length - RUN_MASK;
738 for (; len > 254; len -= 255)
739 *op++ = 255;
740 *op++ = (BYTE)len;
741 } else
742 *token = (length << ML_BITS);
743
744 /* Copy Literals */
745 LZ4_BLINDCOPY(anchor, op, length);
746
747 _next_match:
748 /* Encode Offset */
749 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
750
751 /* Start Counting */
752 ip += MINMATCH;
753 ref += MINMATCH; /* MinMatch verified */
754 anchor = ip;
755 while (ip < matchlimit - (STEPSIZE - 1)) {
756 UARCH diff = AARCH(ref) ^ AARCH(ip);
757 if (!diff) {
758 ip += STEPSIZE;
759 ref += STEPSIZE;
760 continue;
761 }
762 ip += LZ4_NbCommonBytes(diff);
763 goto _endCount;
764 }
765 #if LZ4_ARCH64
766 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
767 ip += 4;
768 ref += 4;
769 }
770 #endif
771 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
772 ip += 2;
773 ref += 2;
774 }
775 if ((ip < matchlimit) && (*ref == *ip))
776 ip++;
777 _endCount:
778
779 /* Encode MatchLength */
780 len = (ip - anchor);
781 /* Check output limit */
782 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
783 return (0);
784 if (len >= (int)ML_MASK) {
785 *token += ML_MASK;
786 len -= ML_MASK;
787 for (; len > 509; len -= 510) {
788 *op++ = 255;
789 *op++ = 255;
790 }
791 if (len > 254) {
792 len -= 255;
793 *op++ = 255;
794 }
795 *op++ = (BYTE)len;
796 } else
797 *token += len;
798
799 /* Test end of chunk */
800 if (ip > mflimit) {
801 anchor = ip;
802 break;
803 }
804 /* Fill table */
805 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
806
807 /* Test next position */
808 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
809 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
810 if (A32(ref) == A32(ip)) {
811 token = op++;
812 *token = 0;
813 goto _next_match;
814 }
815 /* Prepare next loop */
816 anchor = ip++;
817 forwardH = LZ4_HASH64K_VALUE(ip);
818 }
819
820 _last_literals:
821 /* Encode Last Literals */
822 {
823 int lastRun = iend - anchor;
824 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
825 oend)
826 return (0);
827 if (lastRun >= (int)RUN_MASK) {
828 *op++ = (RUN_MASK << ML_BITS);
829 lastRun -= RUN_MASK;
830 for (; lastRun > 254; lastRun -= 255)
831 *op++ = 255;
832 *op++ = (BYTE)lastRun;
833 } else
834 *op++ = (lastRun << ML_BITS);
835 (void) memcpy(op, anchor, iend - anchor);
836 op += iend - anchor;
837 }
838
839 /* End */
840 return (int)(((char *)op) - dest);
841 }
842
843 static int
real_LZ4_compress(const char * source,char * dest,int isize,int osize)844 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
845 {
846 void *ctx;
847 int result;
848
849 ctx = lz4_alloc(KM_SLEEP);
850
851 /*
852 * out of kernel memory, gently fall through - this will disable
853 * compression in zio_compress_data
854 */
855 if (ctx == NULL)
856 return (0);
857
858 memset(ctx, 0, sizeof (struct refTables));
859
860 if (isize < LZ4_64KLIMIT)
861 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
862 else
863 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
864
865 lz4_free(ctx);
866 return (result);
867 }
868
869 #ifdef __FreeBSD__
870 /*
871 * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
872 * Should struct refTables get resized this may need to be revisited, hence
873 * compiler-time asserts.
874 */
875 _Static_assert(sizeof(struct refTables) <= 16384,
876 "refTables too big for malloc");
877 _Static_assert((sizeof(struct refTables) % 4096) == 0,
878 "refTables not a multiple of page size");
879 #else
880 #define ZFS_LZ4_USE_CACHE
881 #endif
882
883 #ifdef ZFS_LZ4_USE_CACHE
884 static kmem_cache_t *lz4_cache;
885 #endif
886
887 #ifdef ZFS_LZ4_USE_CACHE
888 void
lz4_init(void)889 lz4_init(void)
890 {
891 lz4_cache = kmem_cache_create("lz4_cache",
892 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL,
893 KMC_RECLAIMABLE);
894 }
895
896 void
lz4_fini(void)897 lz4_fini(void)
898 {
899 if (lz4_cache) {
900 kmem_cache_destroy(lz4_cache);
901 lz4_cache = NULL;
902 }
903 }
904
905 static void *
lz4_alloc(int flags)906 lz4_alloc(int flags)
907 {
908 ASSERT(lz4_cache != NULL);
909 return (kmem_cache_alloc(lz4_cache, flags));
910 }
911
912 static void
lz4_free(void * ctx)913 lz4_free(void *ctx)
914 {
915 kmem_cache_free(lz4_cache, ctx);
916 }
917 #else
918 void
lz4_init(void)919 lz4_init(void)
920 {
921 }
922
923 void
lz4_fini(void)924 lz4_fini(void)
925 {
926 }
927
928 static void *
lz4_alloc(int flags)929 lz4_alloc(int flags)
930 {
931 return (kmem_alloc(sizeof (struct refTables), flags));
932 }
933
934 static void
lz4_free(void * ctx)935 lz4_free(void *ctx)
936 {
937 kmem_free(ctx, sizeof (struct refTables));
938 }
939 #endif
940