1 // SPDX-License-Identifier: BSD-2-Clause
2 /*
3 * LZ4 - Fast LZ compression algorithm
4 * Header File
5 * Copyright (C) 2011-2013, Yann Collet.
6 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met:
11 *
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following disclaimer
16 * in the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * You can contact the author at :
32 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
33 * - LZ4 source repository : http://code.google.com/p/lz4/
34 */
35
36 /*
37 * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012
38 */
39
40 #include <sys/zfs_context.h>
41 #include <sys/zio_compress.h>
42
43 static int real_LZ4_compress(const char *source, char *dest, int isize,
44 int osize);
45 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
46 int isize, int osize);
47 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
48 int isize, int osize);
49
50 /* See lz4.c */
51 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
52 int isize, int maxOutputSize);
53
54 static void *lz4_alloc(int flags);
55 static void lz4_free(void *ctx);
56
57 static size_t
zfs_lz4_compress_buf(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)58 zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
59 size_t d_len, int n)
60 {
61 (void) n;
62 uint32_t bufsiz;
63 char *dest = d_start;
64
65 ASSERT(d_len >= sizeof (bufsiz));
66
67 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
68 d_len - sizeof (bufsiz));
69
70 /* Signal an error if the compression routine returned zero. */
71 if (bufsiz == 0)
72 return (s_len);
73
74 /*
75 * The exact compressed size is needed by the decompression routine,
76 * so it is stored at the start of the buffer. Note that this may be
77 * less than the compressed block size, which is rounded up to a
78 * multiple of 1<<ashift.
79 */
80 *(uint32_t *)dest = BE_32(bufsiz);
81
82 return (bufsiz + sizeof (bufsiz));
83 }
84
85 static int
zfs_lz4_decompress_buf(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)86 zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
87 size_t d_len, int n)
88 {
89 (void) n;
90 const char *src = s_start;
91 uint32_t bufsiz = BE_IN32(src);
92
93 /* invalid compressed buffer size encoded at start */
94 if (bufsiz + sizeof (bufsiz) > s_len)
95 return (1);
96
97 /*
98 * Returns 0 on success (decompression function returned non-negative)
99 * and non-zero on failure (decompression function returned negative).
100 */
101 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
102 d_start, bufsiz, d_len) < 0);
103 }
104
105 ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)
106 ZFS_DECOMPRESS_WRAP_DECL(zfs_lz4_decompress)
107
108 /*
109 * LZ4 API Description:
110 *
111 * Simple Functions:
112 * real_LZ4_compress() :
113 * isize : is the input size. Max supported value is ~1.9GB
114 * return : the number of bytes written in buffer dest
115 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
116 * note : destination buffer must be already allocated.
117 * destination buffer must be sized to handle worst cases
118 * situations (input data not compressible) worst case size
119 * evaluation is provided by function LZ4_compressBound().
120 *
121 * real_LZ4_uncompress() :
122 * osize : is the output size, therefore the original size
123 * return : the number of bytes read in the source buffer.
124 * If the source stream is malformed, the function will stop
125 * decoding and return a negative result, indicating the byte
126 * position of the faulty instruction. This function never
127 * writes beyond dest + osize, and is therefore protected
128 * against malicious data packets.
129 * note : destination buffer must be already allocated
130 * note : real_LZ4_uncompress() is not used in ZFS so its code
131 * is not present here.
132 *
133 * Advanced Functions
134 *
135 * LZ4_compressBound() :
136 * Provides the maximum size that LZ4 may output in a "worst case"
137 * scenario (input data not compressible) primarily useful for memory
138 * allocation of output buffer.
139 *
140 * isize : is the input size. Max supported value is ~1.9GB
141 * return : maximum output size in a "worst case" scenario
142 * note : this function is limited by "int" range (2^31-1)
143 *
144 * LZ4_uncompress_unknownOutputSize() :
145 * isize : is the input size, therefore the compressed size
146 * maxOutputSize : is the size of the destination buffer (which must be
147 * already allocated)
148 * return : the number of bytes decoded in the destination buffer
149 * (necessarily <= maxOutputSize). If the source stream is
150 * malformed, the function will stop decoding and return a
151 * negative result, indicating the byte position of the faulty
152 * instruction. This function never writes beyond dest +
153 * maxOutputSize, and is therefore protected against malicious
154 * data packets.
155 * note : Destination buffer must be already allocated.
156 * This version is slightly slower than real_LZ4_uncompress()
157 *
158 * LZ4_compressCtx() :
159 * This function explicitly handles the CTX memory structure.
160 *
161 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
162 * by the caller (either on the stack or using kmem_cache_alloc). Passing
163 * NULL isn't valid.
164 *
165 * LZ4_compress64kCtx() :
166 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
167 * isize *Must* be <64KB, otherwise the output will be corrupted.
168 *
169 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
170 * by the caller (either on the stack or using kmem_cache_alloc). Passing
171 * NULL isn't valid.
172 */
173
174 /*
175 * Tuning parameters
176 */
177
178 /*
179 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
180 * Lowering this value reduces memory usage. Reduced memory usage
181 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
182 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
183 * (examples : 12 -> 16KB ; 17 -> 512KB)
184 */
185 #define COMPRESSIONLEVEL 12
186
187 /*
188 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
189 * algorithm skip faster data segments considered "incompressible".
190 * This may decrease compression ratio dramatically, but will be
191 * faster on incompressible data. Increasing this value will make
192 * the algorithm search more before declaring a segment "incompressible".
193 * This could improve compression a bit, but will be slower on
194 * incompressible data. The default value (6) is recommended.
195 */
196 #define NOTCOMPRESSIBLE_CONFIRMATION 6
197
198 /*
199 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
200 * performance for big endian cpu, but the resulting compressed stream
201 * will be incompatible with little-endian CPU. You can set this option
202 * to 1 in situations where data will stay within closed environment.
203 * This option is useless on Little_Endian CPU (such as x86).
204 */
205 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
206
207 /*
208 * CPU Feature Detection
209 */
210
211 /* 32 or 64 bits ? */
212 #if defined(_LP64)
213 #define LZ4_ARCH64 1
214 #else
215 #define LZ4_ARCH64 0
216 #endif
217
218 /*
219 * Little Endian or Big Endian?
220 * Note: overwrite the below #define if you know your architecture endianness.
221 */
222 #if defined(_ZFS_BIG_ENDIAN)
223 #define LZ4_BIG_ENDIAN 1
224 #else
225 /*
226 * Little Endian assumed. PDP Endian and other very rare endian format
227 * are unsupported.
228 */
229 #undef LZ4_BIG_ENDIAN
230 #endif
231
232 /*
233 * Unaligned memory access is automatically enabled for "common" CPU,
234 * such as x86. For others CPU, the compiler will be more cautious, and
235 * insert extra code to ensure aligned access is respected. If you know
236 * your target CPU supports unaligned memory access, you may want to
237 * force this option manually to improve performance
238 */
239 #if defined(__ARM_FEATURE_UNALIGNED)
240 #define LZ4_FORCE_UNALIGNED_ACCESS 1
241 #endif
242
243 /*
244 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
245 * kernel
246 * Linux : we can use GCC's __builtin_ctz family of builtins in the
247 * kernel
248 */
249 #undef LZ4_FORCE_SW_BITCOUNT
250 #if defined(__sparc)
251 #define LZ4_FORCE_SW_BITCOUNT
252 #endif
253
254 /*
255 * Compiler Options
256 */
257 /* Disable restrict */
258 #define restrict
259
260 /*
261 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
262 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
263 */
264 #ifdef GCC_VERSION
265 #undef GCC_VERSION
266 #endif
267
268 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
269
270 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
271 #define expect(expr, value) (__builtin_expect((expr), (value)))
272 #else
273 #define expect(expr, value) (expr)
274 #endif
275
276 #ifndef likely
277 #define likely(expr) expect((expr) != 0, 1)
278 #endif
279
280 #ifndef unlikely
281 #define unlikely(expr) expect((expr) != 0, 0)
282 #endif
283
284 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
285 (((x) & 0xffu) << 8)))
286
287 /* Basic types */
288 #define BYTE uint8_t
289 #define U16 uint16_t
290 #define U32 uint32_t
291 #define S32 int32_t
292 #define U64 uint64_t
293
294 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
295 #pragma pack(1)
296 #endif
297
298 typedef struct _U16_S {
299 U16 v;
300 } U16_S;
301 typedef struct _U32_S {
302 U32 v;
303 } U32_S;
304 typedef struct _U64_S {
305 U64 v;
306 } U64_S;
307
308 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
309 #pragma pack()
310 #endif
311
312 #define A64(x) (((U64_S *)(x))->v)
313 #define A32(x) (((U32_S *)(x))->v)
314 #define A16(x) (((U16_S *)(x))->v)
315
316 /*
317 * Constants
318 */
319 #define MINMATCH 4
320
321 #define HASH_LOG COMPRESSIONLEVEL
322 #define HASHTABLESIZE (1 << HASH_LOG)
323 #define HASH_MASK (HASHTABLESIZE - 1)
324
325 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
326 NOTCOMPRESSIBLE_CONFIRMATION : 2)
327
328 #define COPYLENGTH 8
329 #define LASTLITERALS 5
330 #define MFLIMIT (COPYLENGTH + MINMATCH)
331 #define MINLENGTH (MFLIMIT + 1)
332
333 #define MAXD_LOG 16
334 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
335
336 #define ML_BITS 4
337 #define ML_MASK ((1U<<ML_BITS)-1)
338 #define RUN_BITS (8-ML_BITS)
339 #define RUN_MASK ((1U<<RUN_BITS)-1)
340
341
342 /*
343 * Architecture-specific macros
344 */
345 #if LZ4_ARCH64
346 #define STEPSIZE 8
347 #define UARCH U64
348 #define AARCH A64
349 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
350 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
351 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
352 #define HTYPE U32
353 #define INITBASE(base) const BYTE* const base = ip
354 #else /* !LZ4_ARCH64 */
355 #define STEPSIZE 4
356 #define UARCH U32
357 #define AARCH A32
358 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
359 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
360 #define LZ4_SECURECOPY LZ4_WILDCOPY
361 #define HTYPE const BYTE *
362 #define INITBASE(base) const int base = 0
363 #endif /* !LZ4_ARCH64 */
364
365 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
366 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
367 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
368 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
369 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
370 #else
371 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
372 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
373 #endif
374
375
376 /* Local structures */
377 struct refTables {
378 HTYPE hashTable[HASHTABLESIZE];
379 };
380
381
382 /* Macros */
383 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
384 HASH_LOG))
385 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
386 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
387 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
388 d = e; }
389
390
391 /* Private functions */
392 #if LZ4_ARCH64
393
394 static inline int
LZ4_NbCommonBytes(register U64 val)395 LZ4_NbCommonBytes(register U64 val)
396 {
397 #if defined(LZ4_BIG_ENDIAN)
398 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
399 !defined(LZ4_FORCE_SW_BITCOUNT)
400 return (__builtin_clzll(val) >> 3);
401 #else
402 int r;
403 if (!(val >> 32)) {
404 r = 4;
405 } else {
406 r = 0;
407 val >>= 32;
408 }
409 if (!(val >> 16)) {
410 r += 2;
411 val >>= 8;
412 } else {
413 val >>= 24;
414 }
415 r += (!val);
416 return (r);
417 #endif
418 #else
419 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
420 !defined(LZ4_FORCE_SW_BITCOUNT)
421 return (__builtin_ctzll(val) >> 3);
422 #else
423 static const int DeBruijnBytePos[64] =
424 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
425 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
426 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
427 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
428 };
429 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
430 58];
431 #endif
432 #endif
433 }
434
435 #else
436
437 static inline int
LZ4_NbCommonBytes(register U32 val)438 LZ4_NbCommonBytes(register U32 val)
439 {
440 #if defined(LZ4_BIG_ENDIAN)
441 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
442 !defined(LZ4_FORCE_SW_BITCOUNT)
443 return (__builtin_clz(val) >> 3);
444 #else
445 int r;
446 if (!(val >> 16)) {
447 r = 2;
448 val >>= 8;
449 } else {
450 r = 0;
451 val >>= 24;
452 }
453 r += (!val);
454 return (r);
455 #endif
456 #else
457 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
458 !defined(LZ4_FORCE_SW_BITCOUNT)
459 return (__builtin_ctz(val) >> 3);
460 #else
461 static const int DeBruijnBytePos[32] = {
462 0, 0, 3, 0, 3, 1, 3, 0,
463 3, 2, 2, 1, 3, 2, 0, 1,
464 3, 3, 1, 2, 2, 2, 2, 0,
465 3, 1, 2, 0, 1, 0, 1, 1
466 };
467 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
468 27];
469 #endif
470 #endif
471 }
472
473 #endif
474
475 /* Compression functions */
476
477 static int
LZ4_compressCtx(void * ctx,const char * source,char * dest,int isize,int osize)478 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
479 int osize)
480 {
481 struct refTables *srt = (struct refTables *)ctx;
482 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
483
484 const BYTE *ip = (BYTE *) source;
485 INITBASE(base);
486 const BYTE *anchor = ip;
487 const BYTE *const iend = ip + isize;
488 const BYTE *const oend = (BYTE *) dest + osize;
489 const BYTE *const mflimit = iend - MFLIMIT;
490 #define matchlimit (iend - LASTLITERALS)
491
492 BYTE *op = (BYTE *) dest;
493
494 int len, length;
495 const int skipStrength = SKIPSTRENGTH;
496 U32 forwardH;
497
498
499 /* Init */
500 if (isize < MINLENGTH)
501 goto _last_literals;
502
503 /* First Byte */
504 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
505 ip++;
506 forwardH = LZ4_HASH_VALUE(ip);
507
508 /* Main Loop */
509 for (;;) {
510 int findMatchAttempts = (1U << skipStrength) + 3;
511 const BYTE *forwardIp = ip;
512 const BYTE *ref;
513 BYTE *token;
514
515 /* Find a match */
516 do {
517 U32 h = forwardH;
518 int step = findMatchAttempts++ >> skipStrength;
519 ip = forwardIp;
520 forwardIp = ip + step;
521
522 if (unlikely(forwardIp > mflimit)) {
523 goto _last_literals;
524 }
525
526 forwardH = LZ4_HASH_VALUE(forwardIp);
527 ref = base + HashTable[h];
528 HashTable[h] = ip - base;
529
530 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
531
532 /* Catch up */
533 while ((ip > anchor) && (ref > (BYTE *) source) &&
534 unlikely(ip[-1] == ref[-1])) {
535 ip--;
536 ref--;
537 }
538
539 /* Encode Literal length */
540 length = ip - anchor;
541 token = op++;
542
543 /* Check output limit */
544 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
545 (length >> 8) > oend))
546 return (0);
547
548 if (length >= (int)RUN_MASK) {
549 *token = (RUN_MASK << ML_BITS);
550 len = length - RUN_MASK;
551 for (; len > 254; len -= 255)
552 *op++ = 255;
553 *op++ = (BYTE)len;
554 } else
555 *token = (length << ML_BITS);
556
557 /* Copy Literals */
558 LZ4_BLINDCOPY(anchor, op, length);
559
560 _next_match:
561 /* Encode Offset */
562 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
563
564 /* Start Counting */
565 ip += MINMATCH;
566 ref += MINMATCH; /* MinMatch verified */
567 anchor = ip;
568 while (likely(ip < matchlimit - (STEPSIZE - 1))) {
569 UARCH diff = AARCH(ref) ^ AARCH(ip);
570 if (!diff) {
571 ip += STEPSIZE;
572 ref += STEPSIZE;
573 continue;
574 }
575 ip += LZ4_NbCommonBytes(diff);
576 goto _endCount;
577 }
578 #if LZ4_ARCH64
579 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
580 ip += 4;
581 ref += 4;
582 }
583 #endif
584 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
585 ip += 2;
586 ref += 2;
587 }
588 if ((ip < matchlimit) && (*ref == *ip))
589 ip++;
590 _endCount:
591
592 /* Encode MatchLength */
593 len = (ip - anchor);
594 /* Check output limit */
595 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
596 return (0);
597 if (len >= (int)ML_MASK) {
598 *token += ML_MASK;
599 len -= ML_MASK;
600 for (; len > 509; len -= 510) {
601 *op++ = 255;
602 *op++ = 255;
603 }
604 if (len > 254) {
605 len -= 255;
606 *op++ = 255;
607 }
608 *op++ = (BYTE)len;
609 } else
610 *token += len;
611
612 /* Test end of chunk */
613 if (ip > mflimit) {
614 anchor = ip;
615 break;
616 }
617 /* Fill table */
618 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
619
620 /* Test next position */
621 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
622 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
623 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
624 token = op++;
625 *token = 0;
626 goto _next_match;
627 }
628 /* Prepare next loop */
629 anchor = ip++;
630 forwardH = LZ4_HASH_VALUE(ip);
631 }
632
633 _last_literals:
634 /* Encode Last Literals */
635 {
636 int lastRun = iend - anchor;
637 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
638 oend)
639 return (0);
640 if (lastRun >= (int)RUN_MASK) {
641 *op++ = (RUN_MASK << ML_BITS);
642 lastRun -= RUN_MASK;
643 for (; lastRun > 254; lastRun -= 255) {
644 *op++ = 255;
645 }
646 *op++ = (BYTE)lastRun;
647 } else
648 *op++ = (lastRun << ML_BITS);
649 (void) memcpy(op, anchor, iend - anchor);
650 op += iend - anchor;
651 }
652
653 /* End */
654 return (int)(((char *)op) - dest);
655 }
656
657
658
659 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
660 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
661 #define HASHLOG64K (HASH_LOG + 1)
662 #define HASH64KTABLESIZE (1U << HASHLOG64K)
663 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
664 HASHLOG64K))
665 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
666
667 static int
LZ4_compress64kCtx(void * ctx,const char * source,char * dest,int isize,int osize)668 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
669 int osize)
670 {
671 struct refTables *srt = (struct refTables *)ctx;
672 U16 *HashTable = (U16 *) (srt->hashTable);
673
674 const BYTE *ip = (BYTE *) source;
675 const BYTE *anchor = ip;
676 const BYTE *const base = ip;
677 const BYTE *const iend = ip + isize;
678 const BYTE *const oend = (BYTE *) dest + osize;
679 const BYTE *const mflimit = iend - MFLIMIT;
680 #define matchlimit (iend - LASTLITERALS)
681
682 BYTE *op = (BYTE *) dest;
683
684 int len, length;
685 const int skipStrength = SKIPSTRENGTH;
686 U32 forwardH;
687
688 /* Init */
689 if (isize < MINLENGTH)
690 goto _last_literals;
691
692 /* First Byte */
693 ip++;
694 forwardH = LZ4_HASH64K_VALUE(ip);
695
696 /* Main Loop */
697 for (;;) {
698 int findMatchAttempts = (1U << skipStrength) + 3;
699 const BYTE *forwardIp = ip;
700 const BYTE *ref;
701 BYTE *token;
702
703 /* Find a match */
704 do {
705 U32 h = forwardH;
706 int step = findMatchAttempts++ >> skipStrength;
707 ip = forwardIp;
708 forwardIp = ip + step;
709
710 if (forwardIp > mflimit) {
711 goto _last_literals;
712 }
713
714 forwardH = LZ4_HASH64K_VALUE(forwardIp);
715 ref = base + HashTable[h];
716 HashTable[h] = ip - base;
717
718 } while (A32(ref) != A32(ip));
719
720 /* Catch up */
721 while ((ip > anchor) && (ref > (BYTE *) source) &&
722 (ip[-1] == ref[-1])) {
723 ip--;
724 ref--;
725 }
726
727 /* Encode Literal length */
728 length = ip - anchor;
729 token = op++;
730
731 /* Check output limit */
732 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
733 (length >> 8) > oend))
734 return (0);
735
736 if (length >= (int)RUN_MASK) {
737 *token = (RUN_MASK << ML_BITS);
738 len = length - RUN_MASK;
739 for (; len > 254; len -= 255)
740 *op++ = 255;
741 *op++ = (BYTE)len;
742 } else
743 *token = (length << ML_BITS);
744
745 /* Copy Literals */
746 LZ4_BLINDCOPY(anchor, op, length);
747
748 _next_match:
749 /* Encode Offset */
750 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
751
752 /* Start Counting */
753 ip += MINMATCH;
754 ref += MINMATCH; /* MinMatch verified */
755 anchor = ip;
756 while (ip < matchlimit - (STEPSIZE - 1)) {
757 UARCH diff = AARCH(ref) ^ AARCH(ip);
758 if (!diff) {
759 ip += STEPSIZE;
760 ref += STEPSIZE;
761 continue;
762 }
763 ip += LZ4_NbCommonBytes(diff);
764 goto _endCount;
765 }
766 #if LZ4_ARCH64
767 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
768 ip += 4;
769 ref += 4;
770 }
771 #endif
772 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
773 ip += 2;
774 ref += 2;
775 }
776 if ((ip < matchlimit) && (*ref == *ip))
777 ip++;
778 _endCount:
779
780 /* Encode MatchLength */
781 len = (ip - anchor);
782 /* Check output limit */
783 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
784 return (0);
785 if (len >= (int)ML_MASK) {
786 *token += ML_MASK;
787 len -= ML_MASK;
788 for (; len > 509; len -= 510) {
789 *op++ = 255;
790 *op++ = 255;
791 }
792 if (len > 254) {
793 len -= 255;
794 *op++ = 255;
795 }
796 *op++ = (BYTE)len;
797 } else
798 *token += len;
799
800 /* Test end of chunk */
801 if (ip > mflimit) {
802 anchor = ip;
803 break;
804 }
805 /* Fill table */
806 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
807
808 /* Test next position */
809 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
810 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
811 if (A32(ref) == A32(ip)) {
812 token = op++;
813 *token = 0;
814 goto _next_match;
815 }
816 /* Prepare next loop */
817 anchor = ip++;
818 forwardH = LZ4_HASH64K_VALUE(ip);
819 }
820
821 _last_literals:
822 /* Encode Last Literals */
823 {
824 int lastRun = iend - anchor;
825 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
826 oend)
827 return (0);
828 if (lastRun >= (int)RUN_MASK) {
829 *op++ = (RUN_MASK << ML_BITS);
830 lastRun -= RUN_MASK;
831 for (; lastRun > 254; lastRun -= 255)
832 *op++ = 255;
833 *op++ = (BYTE)lastRun;
834 } else
835 *op++ = (lastRun << ML_BITS);
836 (void) memcpy(op, anchor, iend - anchor);
837 op += iend - anchor;
838 }
839
840 /* End */
841 return (int)(((char *)op) - dest);
842 }
843
844 static int
real_LZ4_compress(const char * source,char * dest,int isize,int osize)845 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
846 {
847 void *ctx;
848 int result;
849
850 ctx = lz4_alloc(KM_SLEEP);
851
852 /*
853 * out of kernel memory, gently fall through - this will disable
854 * compression in zio_compress_data
855 */
856 if (ctx == NULL)
857 return (0);
858
859 memset(ctx, 0, sizeof (struct refTables));
860
861 if (isize < LZ4_64KLIMIT)
862 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
863 else
864 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
865
866 lz4_free(ctx);
867 return (result);
868 }
869
870 #ifdef __FreeBSD__
871 /*
872 * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
873 * Should struct refTables get resized this may need to be revisited, hence
874 * compiler-time asserts.
875 */
876 _Static_assert(sizeof(struct refTables) <= 16384,
877 "refTables too big for malloc");
878 _Static_assert((sizeof(struct refTables) % 4096) == 0,
879 "refTables not a multiple of page size");
880 #else
881 #define ZFS_LZ4_USE_CACHE
882 #endif
883
884 #ifdef ZFS_LZ4_USE_CACHE
885 static kmem_cache_t *lz4_cache;
886 #endif
887
888 #ifdef ZFS_LZ4_USE_CACHE
889 void
lz4_init(void)890 lz4_init(void)
891 {
892 lz4_cache = kmem_cache_create("lz4_cache",
893 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL,
894 KMC_RECLAIMABLE);
895 }
896
897 void
lz4_fini(void)898 lz4_fini(void)
899 {
900 if (lz4_cache) {
901 kmem_cache_destroy(lz4_cache);
902 lz4_cache = NULL;
903 }
904 }
905
906 static void *
lz4_alloc(int flags)907 lz4_alloc(int flags)
908 {
909 ASSERT(lz4_cache != NULL);
910 return (kmem_cache_alloc(lz4_cache, flags));
911 }
912
913 static void
lz4_free(void * ctx)914 lz4_free(void *ctx)
915 {
916 kmem_cache_free(lz4_cache, ctx);
917 }
918 #else
919 void
lz4_init(void)920 lz4_init(void)
921 {
922 }
923
924 void
lz4_fini(void)925 lz4_fini(void)
926 {
927 }
928
929 static void *
lz4_alloc(int flags)930 lz4_alloc(int flags)
931 {
932 return (kmem_alloc(sizeof (struct refTables), flags));
933 }
934
935 static void
lz4_free(void * ctx)936 lz4_free(void *ctx)
937 {
938 kmem_free(ctx, sizeof (struct refTables));
939 }
940 #endif
941