xref: /freebsd/sys/contrib/openzfs/module/zfs/lz4_zfs.c (revision 96190b4fef3b4a0cc3ca0606b0c4e3e69a5e6717)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 
35 /*
36  * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012
37  */
38 
39 #include <sys/zfs_context.h>
40 #include <sys/zio_compress.h>
41 
42 static int real_LZ4_compress(const char *source, char *dest, int isize,
43     int osize);
44 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
45     int isize, int osize);
46 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
47     int isize, int osize);
48 
49 /* See lz4.c */
50 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
51     int isize, int maxOutputSize);
52 
53 static void *lz4_alloc(int flags);
54 static void lz4_free(void *ctx);
55 
56 static size_t
57 zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
58     size_t d_len, int n)
59 {
60 	(void) n;
61 	uint32_t bufsiz;
62 	char *dest = d_start;
63 
64 	ASSERT(d_len >= sizeof (bufsiz));
65 
66 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
67 	    d_len - sizeof (bufsiz));
68 
69 	/* Signal an error if the compression routine returned zero. */
70 	if (bufsiz == 0)
71 		return (s_len);
72 
73 	/*
74 	 * The exact compressed size is needed by the decompression routine,
75 	 * so it is stored at the start of the buffer. Note that this may be
76 	 * less than the compressed block size, which is rounded up to a
77 	 * multiple of 1<<ashift.
78 	 */
79 	*(uint32_t *)dest = BE_32(bufsiz);
80 
81 	return (bufsiz + sizeof (bufsiz));
82 }
83 
84 static int
85 zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
86     size_t d_len, int n)
87 {
88 	(void) n;
89 	const char *src = s_start;
90 	uint32_t bufsiz = BE_IN32(src);
91 
92 	/* invalid compressed buffer size encoded at start */
93 	if (bufsiz + sizeof (bufsiz) > s_len)
94 		return (1);
95 
96 	/*
97 	 * Returns 0 on success (decompression function returned non-negative)
98 	 * and non-zero on failure (decompression function returned negative).
99 	 */
100 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
101 	    d_start, bufsiz, d_len) < 0);
102 }
103 
104 ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)
105 ZFS_DECOMPRESS_WRAP_DECL(zfs_lz4_decompress)
106 
107 /*
108  * LZ4 API Description:
109  *
110  * Simple Functions:
111  * real_LZ4_compress() :
112  * 	isize  : is the input size. Max supported value is ~1.9GB
113  * 	return : the number of bytes written in buffer dest
114  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
115  * 	note : destination buffer must be already allocated.
116  * 		destination buffer must be sized to handle worst cases
117  * 		situations (input data not compressible) worst case size
118  * 		evaluation is provided by function LZ4_compressBound().
119  *
120  * real_LZ4_uncompress() :
121  * 	osize  : is the output size, therefore the original size
122  * 	return : the number of bytes read in the source buffer.
123  * 		If the source stream is malformed, the function will stop
124  * 		decoding and return a negative result, indicating the byte
125  * 		position of the faulty instruction. This function never
126  * 		writes beyond dest + osize, and is therefore protected
127  * 		against malicious data packets.
128  * 	note : destination buffer must be already allocated
129  *	note : real_LZ4_uncompress() is not used in ZFS so its code
130  *	       is not present here.
131  *
132  * Advanced Functions
133  *
134  * LZ4_compressBound() :
135  * 	Provides the maximum size that LZ4 may output in a "worst case"
136  * 	scenario (input data not compressible) primarily useful for memory
137  * 	allocation of output buffer.
138  *
139  * 	isize  : is the input size. Max supported value is ~1.9GB
140  * 	return : maximum output size in a "worst case" scenario
141  * 	note : this function is limited by "int" range (2^31-1)
142  *
143  * LZ4_uncompress_unknownOutputSize() :
144  * 	isize  : is the input size, therefore the compressed size
145  * 	maxOutputSize : is the size of the destination buffer (which must be
146  * 		already allocated)
147  * 	return : the number of bytes decoded in the destination buffer
148  * 		(necessarily <= maxOutputSize). If the source stream is
149  * 		malformed, the function will stop decoding and return a
150  * 		negative result, indicating the byte position of the faulty
151  * 		instruction. This function never writes beyond dest +
152  * 		maxOutputSize, and is therefore protected against malicious
153  * 		data packets.
154  * 	note   : Destination buffer must be already allocated.
155  *		This version is slightly slower than real_LZ4_uncompress()
156  *
157  * LZ4_compressCtx() :
158  * 	This function explicitly handles the CTX memory structure.
159  *
160  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
161  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
162  * 	NULL isn't valid.
163  *
164  * LZ4_compress64kCtx() :
165  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
166  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
167  *
168  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
169  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
170  * 	NULL isn't valid.
171  */
172 
173 /*
174  * Tuning parameters
175  */
176 
177 /*
178  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
179  *	 Lowering this value reduces memory usage. Reduced memory usage
180  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
181  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
182  *	(examples : 12 -> 16KB ; 17 -> 512KB)
183  */
184 #define	COMPRESSIONLEVEL 12
185 
186 /*
187  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
188  *	algorithm skip faster data segments considered "incompressible".
189  *	This may decrease compression ratio dramatically, but will be
190  *	faster on incompressible data. Increasing this value will make
191  *	the algorithm search more before declaring a segment "incompressible".
192  *	This could improve compression a bit, but will be slower on
193  *	incompressible data. The default value (6) is recommended.
194  */
195 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
196 
197 /*
198  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
199  * performance for big endian cpu, but the resulting compressed stream
200  * will be incompatible with little-endian CPU. You can set this option
201  * to 1 in situations where data will stay within closed environment.
202  * This option is useless on Little_Endian CPU (such as x86).
203  */
204 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
205 
206 /*
207  * CPU Feature Detection
208  */
209 
210 /* 32 or 64 bits ? */
211 #if defined(_LP64)
212 #define	LZ4_ARCH64 1
213 #else
214 #define	LZ4_ARCH64 0
215 #endif
216 
217 /*
218  * Little Endian or Big Endian?
219  * Note: overwrite the below #define if you know your architecture endianness.
220  */
221 #if defined(_ZFS_BIG_ENDIAN)
222 #define	LZ4_BIG_ENDIAN 1
223 #else
224 /*
225  * Little Endian assumed. PDP Endian and other very rare endian format
226  * are unsupported.
227  */
228 #undef LZ4_BIG_ENDIAN
229 #endif
230 
231 /*
232  * Unaligned memory access is automatically enabled for "common" CPU,
233  * such as x86. For others CPU, the compiler will be more cautious, and
234  * insert extra code to ensure aligned access is respected. If you know
235  * your target CPU supports unaligned memory access, you may want to
236  * force this option manually to improve performance
237  */
238 #if defined(__ARM_FEATURE_UNALIGNED)
239 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
240 #endif
241 
242 /*
243  * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
244  * kernel
245  * Linux : we can use GCC's __builtin_ctz family of builtins in the
246  * kernel
247  */
248 #undef	LZ4_FORCE_SW_BITCOUNT
249 #if defined(__sparc)
250 #define	LZ4_FORCE_SW_BITCOUNT
251 #endif
252 
253 /*
254  * Compiler Options
255  */
256 /* Disable restrict */
257 #define	restrict
258 
259 /*
260  * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
261  * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
262  */
263 #ifdef GCC_VERSION
264 #undef GCC_VERSION
265 #endif
266 
267 #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
268 
269 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
270 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
271 #else
272 #define	expect(expr, value)    (expr)
273 #endif
274 
275 #ifndef likely
276 #define	likely(expr)	expect((expr) != 0, 1)
277 #endif
278 
279 #ifndef unlikely
280 #define	unlikely(expr)	expect((expr) != 0, 0)
281 #endif
282 
283 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
284 	(((x) & 0xffu) << 8)))
285 
286 /* Basic types */
287 #define	BYTE	uint8_t
288 #define	U16	uint16_t
289 #define	U32	uint32_t
290 #define	S32	int32_t
291 #define	U64	uint64_t
292 
293 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
294 #pragma pack(1)
295 #endif
296 
297 typedef struct _U16_S {
298 	U16 v;
299 } U16_S;
300 typedef struct _U32_S {
301 	U32 v;
302 } U32_S;
303 typedef struct _U64_S {
304 	U64 v;
305 } U64_S;
306 
307 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
308 #pragma pack()
309 #endif
310 
311 #define	A64(x) (((U64_S *)(x))->v)
312 #define	A32(x) (((U32_S *)(x))->v)
313 #define	A16(x) (((U16_S *)(x))->v)
314 
315 /*
316  * Constants
317  */
318 #define	MINMATCH 4
319 
320 #define	HASH_LOG COMPRESSIONLEVEL
321 #define	HASHTABLESIZE (1 << HASH_LOG)
322 #define	HASH_MASK (HASHTABLESIZE - 1)
323 
324 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
325 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
326 
327 #define	COPYLENGTH 8
328 #define	LASTLITERALS 5
329 #define	MFLIMIT (COPYLENGTH + MINMATCH)
330 #define	MINLENGTH (MFLIMIT + 1)
331 
332 #define	MAXD_LOG 16
333 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
334 
335 #define	ML_BITS 4
336 #define	ML_MASK ((1U<<ML_BITS)-1)
337 #define	RUN_BITS (8-ML_BITS)
338 #define	RUN_MASK ((1U<<RUN_BITS)-1)
339 
340 
341 /*
342  * Architecture-specific macros
343  */
344 #if LZ4_ARCH64
345 #define	STEPSIZE 8
346 #define	UARCH U64
347 #define	AARCH A64
348 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
349 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
350 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
351 #define	HTYPE U32
352 #define	INITBASE(base)		const BYTE* const base = ip
353 #else /* !LZ4_ARCH64 */
354 #define	STEPSIZE 4
355 #define	UARCH U32
356 #define	AARCH A32
357 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
358 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
359 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
360 #define	HTYPE const BYTE *
361 #define	INITBASE(base)		const int base = 0
362 #endif /* !LZ4_ARCH64 */
363 
364 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
365 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
366 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
367 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
368 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
369 #else
370 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
371 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
372 #endif
373 
374 
375 /* Local structures */
376 struct refTables {
377 	HTYPE hashTable[HASHTABLESIZE];
378 };
379 
380 
381 /* Macros */
382 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
383 	HASH_LOG))
384 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
385 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
386 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
387 	d = e; }
388 
389 
390 /* Private functions */
391 #if LZ4_ARCH64
392 
393 static inline int
394 LZ4_NbCommonBytes(register U64 val)
395 {
396 #if defined(LZ4_BIG_ENDIAN)
397 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
398 	!defined(LZ4_FORCE_SW_BITCOUNT)
399 	return (__builtin_clzll(val) >> 3);
400 #else
401 	int r;
402 	if (!(val >> 32)) {
403 		r = 4;
404 	} else {
405 		r = 0;
406 		val >>= 32;
407 	}
408 	if (!(val >> 16)) {
409 		r += 2;
410 		val >>= 8;
411 	} else {
412 		val >>= 24;
413 	}
414 	r += (!val);
415 	return (r);
416 #endif
417 #else
418 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
419 	!defined(LZ4_FORCE_SW_BITCOUNT)
420 	return (__builtin_ctzll(val) >> 3);
421 #else
422 	static const int DeBruijnBytePos[64] =
423 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
424 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
425 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
426 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
427 	};
428 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
429 	    58];
430 #endif
431 #endif
432 }
433 
434 #else
435 
436 static inline int
437 LZ4_NbCommonBytes(register U32 val)
438 {
439 #if defined(LZ4_BIG_ENDIAN)
440 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
441 	!defined(LZ4_FORCE_SW_BITCOUNT)
442 	return (__builtin_clz(val) >> 3);
443 #else
444 	int r;
445 	if (!(val >> 16)) {
446 		r = 2;
447 		val >>= 8;
448 	} else {
449 		r = 0;
450 		val >>= 24;
451 	}
452 	r += (!val);
453 	return (r);
454 #endif
455 #else
456 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
457 	!defined(LZ4_FORCE_SW_BITCOUNT)
458 	return (__builtin_ctz(val) >> 3);
459 #else
460 	static const int DeBruijnBytePos[32] = {
461 		0, 0, 3, 0, 3, 1, 3, 0,
462 		3, 2, 2, 1, 3, 2, 0, 1,
463 		3, 3, 1, 2, 2, 2, 2, 0,
464 		3, 1, 2, 0, 1, 0, 1, 1
465 	};
466 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
467 	    27];
468 #endif
469 #endif
470 }
471 
472 #endif
473 
474 /* Compression functions */
475 
476 static int
477 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
478     int osize)
479 {
480 	struct refTables *srt = (struct refTables *)ctx;
481 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
482 
483 	const BYTE *ip = (BYTE *) source;
484 	INITBASE(base);
485 	const BYTE *anchor = ip;
486 	const BYTE *const iend = ip + isize;
487 	const BYTE *const oend = (BYTE *) dest + osize;
488 	const BYTE *const mflimit = iend - MFLIMIT;
489 #define	matchlimit (iend - LASTLITERALS)
490 
491 	BYTE *op = (BYTE *) dest;
492 
493 	int len, length;
494 	const int skipStrength = SKIPSTRENGTH;
495 	U32 forwardH;
496 
497 
498 	/* Init */
499 	if (isize < MINLENGTH)
500 		goto _last_literals;
501 
502 	/* First Byte */
503 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
504 	ip++;
505 	forwardH = LZ4_HASH_VALUE(ip);
506 
507 	/* Main Loop */
508 	for (;;) {
509 		int findMatchAttempts = (1U << skipStrength) + 3;
510 		const BYTE *forwardIp = ip;
511 		const BYTE *ref;
512 		BYTE *token;
513 
514 		/* Find a match */
515 		do {
516 			U32 h = forwardH;
517 			int step = findMatchAttempts++ >> skipStrength;
518 			ip = forwardIp;
519 			forwardIp = ip + step;
520 
521 			if (unlikely(forwardIp > mflimit)) {
522 				goto _last_literals;
523 			}
524 
525 			forwardH = LZ4_HASH_VALUE(forwardIp);
526 			ref = base + HashTable[h];
527 			HashTable[h] = ip - base;
528 
529 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
530 
531 		/* Catch up */
532 		while ((ip > anchor) && (ref > (BYTE *) source) &&
533 		    unlikely(ip[-1] == ref[-1])) {
534 			ip--;
535 			ref--;
536 		}
537 
538 		/* Encode Literal length */
539 		length = ip - anchor;
540 		token = op++;
541 
542 		/* Check output limit */
543 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
544 		    (length >> 8) > oend))
545 			return (0);
546 
547 		if (length >= (int)RUN_MASK) {
548 			*token = (RUN_MASK << ML_BITS);
549 			len = length - RUN_MASK;
550 			for (; len > 254; len -= 255)
551 				*op++ = 255;
552 			*op++ = (BYTE)len;
553 		} else
554 			*token = (length << ML_BITS);
555 
556 		/* Copy Literals */
557 		LZ4_BLINDCOPY(anchor, op, length);
558 
559 		_next_match:
560 		/* Encode Offset */
561 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
562 
563 		/* Start Counting */
564 		ip += MINMATCH;
565 		ref += MINMATCH;	/* MinMatch verified */
566 		anchor = ip;
567 		while (likely(ip < matchlimit - (STEPSIZE - 1))) {
568 			UARCH diff = AARCH(ref) ^ AARCH(ip);
569 			if (!diff) {
570 				ip += STEPSIZE;
571 				ref += STEPSIZE;
572 				continue;
573 			}
574 			ip += LZ4_NbCommonBytes(diff);
575 			goto _endCount;
576 		}
577 #if LZ4_ARCH64
578 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
579 			ip += 4;
580 			ref += 4;
581 		}
582 #endif
583 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
584 			ip += 2;
585 			ref += 2;
586 		}
587 		if ((ip < matchlimit) && (*ref == *ip))
588 			ip++;
589 		_endCount:
590 
591 		/* Encode MatchLength */
592 		len = (ip - anchor);
593 		/* Check output limit */
594 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
595 			return (0);
596 		if (len >= (int)ML_MASK) {
597 			*token += ML_MASK;
598 			len -= ML_MASK;
599 			for (; len > 509; len -= 510) {
600 				*op++ = 255;
601 				*op++ = 255;
602 			}
603 			if (len > 254) {
604 				len -= 255;
605 				*op++ = 255;
606 			}
607 			*op++ = (BYTE)len;
608 		} else
609 			*token += len;
610 
611 		/* Test end of chunk */
612 		if (ip > mflimit) {
613 			anchor = ip;
614 			break;
615 		}
616 		/* Fill table */
617 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
618 
619 		/* Test next position */
620 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
621 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
622 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
623 			token = op++;
624 			*token = 0;
625 			goto _next_match;
626 		}
627 		/* Prepare next loop */
628 		anchor = ip++;
629 		forwardH = LZ4_HASH_VALUE(ip);
630 	}
631 
632 	_last_literals:
633 	/* Encode Last Literals */
634 	{
635 		int lastRun = iend - anchor;
636 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
637 		    oend)
638 			return (0);
639 		if (lastRun >= (int)RUN_MASK) {
640 			*op++ = (RUN_MASK << ML_BITS);
641 			lastRun -= RUN_MASK;
642 			for (; lastRun > 254; lastRun -= 255) {
643 				*op++ = 255;
644 			}
645 			*op++ = (BYTE)lastRun;
646 		} else
647 			*op++ = (lastRun << ML_BITS);
648 		(void) memcpy(op, anchor, iend - anchor);
649 		op += iend - anchor;
650 	}
651 
652 	/* End */
653 	return (int)(((char *)op) - dest);
654 }
655 
656 
657 
658 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
659 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
660 #define	HASHLOG64K (HASH_LOG + 1)
661 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
662 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
663 	HASHLOG64K))
664 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
665 
666 static int
667 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
668     int osize)
669 {
670 	struct refTables *srt = (struct refTables *)ctx;
671 	U16 *HashTable = (U16 *) (srt->hashTable);
672 
673 	const BYTE *ip = (BYTE *) source;
674 	const BYTE *anchor = ip;
675 	const BYTE *const base = ip;
676 	const BYTE *const iend = ip + isize;
677 	const BYTE *const oend = (BYTE *) dest + osize;
678 	const BYTE *const mflimit = iend - MFLIMIT;
679 #define	matchlimit (iend - LASTLITERALS)
680 
681 	BYTE *op = (BYTE *) dest;
682 
683 	int len, length;
684 	const int skipStrength = SKIPSTRENGTH;
685 	U32 forwardH;
686 
687 	/* Init */
688 	if (isize < MINLENGTH)
689 		goto _last_literals;
690 
691 	/* First Byte */
692 	ip++;
693 	forwardH = LZ4_HASH64K_VALUE(ip);
694 
695 	/* Main Loop */
696 	for (;;) {
697 		int findMatchAttempts = (1U << skipStrength) + 3;
698 		const BYTE *forwardIp = ip;
699 		const BYTE *ref;
700 		BYTE *token;
701 
702 		/* Find a match */
703 		do {
704 			U32 h = forwardH;
705 			int step = findMatchAttempts++ >> skipStrength;
706 			ip = forwardIp;
707 			forwardIp = ip + step;
708 
709 			if (forwardIp > mflimit) {
710 				goto _last_literals;
711 			}
712 
713 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
714 			ref = base + HashTable[h];
715 			HashTable[h] = ip - base;
716 
717 		} while (A32(ref) != A32(ip));
718 
719 		/* Catch up */
720 		while ((ip > anchor) && (ref > (BYTE *) source) &&
721 		    (ip[-1] == ref[-1])) {
722 			ip--;
723 			ref--;
724 		}
725 
726 		/* Encode Literal length */
727 		length = ip - anchor;
728 		token = op++;
729 
730 		/* Check output limit */
731 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
732 		    (length >> 8) > oend))
733 			return (0);
734 
735 		if (length >= (int)RUN_MASK) {
736 			*token = (RUN_MASK << ML_BITS);
737 			len = length - RUN_MASK;
738 			for (; len > 254; len -= 255)
739 				*op++ = 255;
740 			*op++ = (BYTE)len;
741 		} else
742 			*token = (length << ML_BITS);
743 
744 		/* Copy Literals */
745 		LZ4_BLINDCOPY(anchor, op, length);
746 
747 		_next_match:
748 		/* Encode Offset */
749 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
750 
751 		/* Start Counting */
752 		ip += MINMATCH;
753 		ref += MINMATCH;	/* MinMatch verified */
754 		anchor = ip;
755 		while (ip < matchlimit - (STEPSIZE - 1)) {
756 			UARCH diff = AARCH(ref) ^ AARCH(ip);
757 			if (!diff) {
758 				ip += STEPSIZE;
759 				ref += STEPSIZE;
760 				continue;
761 			}
762 			ip += LZ4_NbCommonBytes(diff);
763 			goto _endCount;
764 		}
765 #if LZ4_ARCH64
766 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
767 			ip += 4;
768 			ref += 4;
769 		}
770 #endif
771 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
772 			ip += 2;
773 			ref += 2;
774 		}
775 		if ((ip < matchlimit) && (*ref == *ip))
776 			ip++;
777 		_endCount:
778 
779 		/* Encode MatchLength */
780 		len = (ip - anchor);
781 		/* Check output limit */
782 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
783 			return (0);
784 		if (len >= (int)ML_MASK) {
785 			*token += ML_MASK;
786 			len -= ML_MASK;
787 			for (; len > 509; len -= 510) {
788 				*op++ = 255;
789 				*op++ = 255;
790 			}
791 			if (len > 254) {
792 				len -= 255;
793 				*op++ = 255;
794 			}
795 			*op++ = (BYTE)len;
796 		} else
797 			*token += len;
798 
799 		/* Test end of chunk */
800 		if (ip > mflimit) {
801 			anchor = ip;
802 			break;
803 		}
804 		/* Fill table */
805 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
806 
807 		/* Test next position */
808 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
809 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
810 		if (A32(ref) == A32(ip)) {
811 			token = op++;
812 			*token = 0;
813 			goto _next_match;
814 		}
815 		/* Prepare next loop */
816 		anchor = ip++;
817 		forwardH = LZ4_HASH64K_VALUE(ip);
818 	}
819 
820 	_last_literals:
821 	/* Encode Last Literals */
822 	{
823 		int lastRun = iend - anchor;
824 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
825 		    oend)
826 			return (0);
827 		if (lastRun >= (int)RUN_MASK) {
828 			*op++ = (RUN_MASK << ML_BITS);
829 			lastRun -= RUN_MASK;
830 			for (; lastRun > 254; lastRun -= 255)
831 				*op++ = 255;
832 			*op++ = (BYTE)lastRun;
833 		} else
834 			*op++ = (lastRun << ML_BITS);
835 		(void) memcpy(op, anchor, iend - anchor);
836 		op += iend - anchor;
837 	}
838 
839 	/* End */
840 	return (int)(((char *)op) - dest);
841 }
842 
843 static int
844 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
845 {
846 	void *ctx;
847 	int result;
848 
849 	ctx = lz4_alloc(KM_SLEEP);
850 
851 	/*
852 	 * out of kernel memory, gently fall through - this will disable
853 	 * compression in zio_compress_data
854 	 */
855 	if (ctx == NULL)
856 		return (0);
857 
858 	memset(ctx, 0, sizeof (struct refTables));
859 
860 	if (isize < LZ4_64KLIMIT)
861 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
862 	else
863 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
864 
865 	lz4_free(ctx);
866 	return (result);
867 }
868 
869 #ifdef __FreeBSD__
870 /*
871  * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
872  * Should struct refTables get resized this may need to be revisited, hence
873  * compiler-time asserts.
874  */
875 _Static_assert(sizeof(struct refTables) <= 16384,
876     "refTables too big for malloc");
877 _Static_assert((sizeof(struct refTables) % 4096) == 0,
878     "refTables not a multiple of page size");
879 #else
880 #define ZFS_LZ4_USE_CACHE
881 #endif
882 
883 #ifdef ZFS_LZ4_USE_CACHE
884 static kmem_cache_t *lz4_cache;
885 #endif
886 
887 #ifdef ZFS_LZ4_USE_CACHE
888 void
889 lz4_init(void)
890 {
891 	lz4_cache = kmem_cache_create("lz4_cache",
892 	    sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL,
893 	    KMC_RECLAIMABLE);
894 }
895 
896 void
897 lz4_fini(void)
898 {
899 	if (lz4_cache) {
900 		kmem_cache_destroy(lz4_cache);
901 		lz4_cache = NULL;
902 	}
903 }
904 
905 static void *
906 lz4_alloc(int flags)
907 {
908 	ASSERT(lz4_cache != NULL);
909 	return (kmem_cache_alloc(lz4_cache, flags));
910 }
911 
912 static void
913 lz4_free(void *ctx)
914 {
915 	kmem_cache_free(lz4_cache, ctx);
916 }
917 #else
918 void
919 lz4_init(void)
920 {
921 }
922 
923 void
924 lz4_fini(void)
925 {
926 }
927 
928 static void *
929 lz4_alloc(int flags)
930 {
931 	return (kmem_alloc(sizeof (struct refTables), flags));
932 }
933 
934 static void
935 lz4_free(void *ctx)
936 {
937 	kmem_free(ctx, sizeof (struct refTables));
938 }
939 #endif
940