xref: /freebsd/sys/contrib/openzfs/module/zfs/lz4_zfs.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: BSD-2-Clause
2 /*
3  * LZ4 - Fast LZ compression algorithm
4  * Header File
5  * Copyright (C) 2011-2013, Yann Collet.
6  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are
10  * met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above
15  * copyright notice, this list of conditions and the following disclaimer
16  * in the documentation and/or other materials provided with the
17  * distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * You can contact the author at :
32  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
33  * - LZ4 source repository : http://code.google.com/p/lz4/
34  */
35 
36 /*
37  * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012
38  */
39 
40 #include <sys/zfs_context.h>
41 #include <sys/zio_compress.h>
42 
43 static int real_LZ4_compress(const char *source, char *dest, int isize,
44     int osize);
45 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
46     int isize, int osize);
47 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
48     int isize, int osize);
49 
50 /* See lz4.c */
51 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
52     int isize, int maxOutputSize);
53 
54 static void *lz4_alloc(int flags);
55 static void lz4_free(void *ctx);
56 
57 static size_t
zfs_lz4_compress_buf(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)58 zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
59     size_t d_len, int n)
60 {
61 	(void) n;
62 	uint32_t bufsiz;
63 	char *dest = d_start;
64 
65 	ASSERT(d_len >= sizeof (bufsiz));
66 
67 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
68 	    d_len - sizeof (bufsiz));
69 
70 	/* Signal an error if the compression routine returned zero. */
71 	if (bufsiz == 0)
72 		return (s_len);
73 
74 	/*
75 	 * The exact compressed size is needed by the decompression routine,
76 	 * so it is stored at the start of the buffer. Note that this may be
77 	 * less than the compressed block size, which is rounded up to a
78 	 * multiple of 1<<ashift.
79 	 */
80 	*(uint32_t *)dest = BE_32(bufsiz);
81 
82 	return (bufsiz + sizeof (bufsiz));
83 }
84 
85 static int
zfs_lz4_decompress_buf(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)86 zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
87     size_t d_len, int n)
88 {
89 	(void) n;
90 	const char *src = s_start;
91 	uint32_t bufsiz = BE_IN32(src);
92 
93 	/* invalid compressed buffer size encoded at start */
94 	if (bufsiz + sizeof (bufsiz) > s_len)
95 		return (1);
96 
97 	/*
98 	 * Returns 0 on success (decompression function returned non-negative)
99 	 * and non-zero on failure (decompression function returned negative).
100 	 */
101 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
102 	    d_start, bufsiz, d_len) < 0);
103 }
104 
105 ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)
106 ZFS_DECOMPRESS_WRAP_DECL(zfs_lz4_decompress)
107 
108 /*
109  * LZ4 API Description:
110  *
111  * Simple Functions:
112  * real_LZ4_compress() :
113  * 	isize  : is the input size. Max supported value is ~1.9GB
114  * 	return : the number of bytes written in buffer dest
115  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
116  * 	note : destination buffer must be already allocated.
117  * 		destination buffer must be sized to handle worst cases
118  * 		situations (input data not compressible) worst case size
119  * 		evaluation is provided by function LZ4_compressBound().
120  *
121  * real_LZ4_uncompress() :
122  * 	osize  : is the output size, therefore the original size
123  * 	return : the number of bytes read in the source buffer.
124  * 		If the source stream is malformed, the function will stop
125  * 		decoding and return a negative result, indicating the byte
126  * 		position of the faulty instruction. This function never
127  * 		writes beyond dest + osize, and is therefore protected
128  * 		against malicious data packets.
129  * 	note : destination buffer must be already allocated
130  *	note : real_LZ4_uncompress() is not used in ZFS so its code
131  *	       is not present here.
132  *
133  * Advanced Functions
134  *
135  * LZ4_compressBound() :
136  * 	Provides the maximum size that LZ4 may output in a "worst case"
137  * 	scenario (input data not compressible) primarily useful for memory
138  * 	allocation of output buffer.
139  *
140  * 	isize  : is the input size. Max supported value is ~1.9GB
141  * 	return : maximum output size in a "worst case" scenario
142  * 	note : this function is limited by "int" range (2^31-1)
143  *
144  * LZ4_uncompress_unknownOutputSize() :
145  * 	isize  : is the input size, therefore the compressed size
146  * 	maxOutputSize : is the size of the destination buffer (which must be
147  * 		already allocated)
148  * 	return : the number of bytes decoded in the destination buffer
149  * 		(necessarily <= maxOutputSize). If the source stream is
150  * 		malformed, the function will stop decoding and return a
151  * 		negative result, indicating the byte position of the faulty
152  * 		instruction. This function never writes beyond dest +
153  * 		maxOutputSize, and is therefore protected against malicious
154  * 		data packets.
155  * 	note   : Destination buffer must be already allocated.
156  *		This version is slightly slower than real_LZ4_uncompress()
157  *
158  * LZ4_compressCtx() :
159  * 	This function explicitly handles the CTX memory structure.
160  *
161  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
162  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
163  * 	NULL isn't valid.
164  *
165  * LZ4_compress64kCtx() :
166  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
167  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
168  *
169  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
170  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
171  * 	NULL isn't valid.
172  */
173 
174 /*
175  * Tuning parameters
176  */
177 
178 /*
179  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
180  *	 Lowering this value reduces memory usage. Reduced memory usage
181  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
182  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
183  *	(examples : 12 -> 16KB ; 17 -> 512KB)
184  */
185 #define	COMPRESSIONLEVEL 12
186 
187 /*
188  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
189  *	algorithm skip faster data segments considered "incompressible".
190  *	This may decrease compression ratio dramatically, but will be
191  *	faster on incompressible data. Increasing this value will make
192  *	the algorithm search more before declaring a segment "incompressible".
193  *	This could improve compression a bit, but will be slower on
194  *	incompressible data. The default value (6) is recommended.
195  */
196 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
197 
198 /*
199  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
200  * performance for big endian cpu, but the resulting compressed stream
201  * will be incompatible with little-endian CPU. You can set this option
202  * to 1 in situations where data will stay within closed environment.
203  * This option is useless on Little_Endian CPU (such as x86).
204  */
205 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
206 
207 /*
208  * CPU Feature Detection
209  */
210 
211 /* 32 or 64 bits ? */
212 #if defined(_LP64)
213 #define	LZ4_ARCH64 1
214 #else
215 #define	LZ4_ARCH64 0
216 #endif
217 
218 /*
219  * Little Endian or Big Endian?
220  * Note: overwrite the below #define if you know your architecture endianness.
221  */
222 #if defined(_ZFS_BIG_ENDIAN)
223 #define	LZ4_BIG_ENDIAN 1
224 #else
225 /*
226  * Little Endian assumed. PDP Endian and other very rare endian format
227  * are unsupported.
228  */
229 #undef LZ4_BIG_ENDIAN
230 #endif
231 
232 /*
233  * Unaligned memory access is automatically enabled for "common" CPU,
234  * such as x86. For others CPU, the compiler will be more cautious, and
235  * insert extra code to ensure aligned access is respected. If you know
236  * your target CPU supports unaligned memory access, you may want to
237  * force this option manually to improve performance
238  */
239 #if defined(__ARM_FEATURE_UNALIGNED)
240 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
241 #endif
242 
243 /*
244  * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
245  * kernel
246  * Linux : we can use GCC's __builtin_ctz family of builtins in the
247  * kernel
248  */
249 #undef	LZ4_FORCE_SW_BITCOUNT
250 #if defined(__sparc)
251 #define	LZ4_FORCE_SW_BITCOUNT
252 #endif
253 
254 /*
255  * Compiler Options
256  */
257 /* Disable restrict */
258 #define	restrict
259 
260 /*
261  * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
262  * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
263  */
264 #ifdef GCC_VERSION
265 #undef GCC_VERSION
266 #endif
267 
268 #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
269 
270 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
271 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
272 #else
273 #define	expect(expr, value)    (expr)
274 #endif
275 
276 #ifndef likely
277 #define	likely(expr)	expect((expr) != 0, 1)
278 #endif
279 
280 #ifndef unlikely
281 #define	unlikely(expr)	expect((expr) != 0, 0)
282 #endif
283 
284 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
285 	(((x) & 0xffu) << 8)))
286 
287 /* Basic types */
288 #define	BYTE	uint8_t
289 #define	U16	uint16_t
290 #define	U32	uint32_t
291 #define	S32	int32_t
292 #define	U64	uint64_t
293 
294 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
295 #pragma pack(1)
296 #endif
297 
298 typedef struct _U16_S {
299 	U16 v;
300 } U16_S;
301 typedef struct _U32_S {
302 	U32 v;
303 } U32_S;
304 typedef struct _U64_S {
305 	U64 v;
306 } U64_S;
307 
308 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
309 #pragma pack()
310 #endif
311 
312 #define	A64(x) (((U64_S *)(x))->v)
313 #define	A32(x) (((U32_S *)(x))->v)
314 #define	A16(x) (((U16_S *)(x))->v)
315 
316 /*
317  * Constants
318  */
319 #define	MINMATCH 4
320 
321 #define	HASH_LOG COMPRESSIONLEVEL
322 #define	HASHTABLESIZE (1 << HASH_LOG)
323 #define	HASH_MASK (HASHTABLESIZE - 1)
324 
325 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
326 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
327 
328 #define	COPYLENGTH 8
329 #define	LASTLITERALS 5
330 #define	MFLIMIT (COPYLENGTH + MINMATCH)
331 #define	MINLENGTH (MFLIMIT + 1)
332 
333 #define	MAXD_LOG 16
334 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
335 
336 #define	ML_BITS 4
337 #define	ML_MASK ((1U<<ML_BITS)-1)
338 #define	RUN_BITS (8-ML_BITS)
339 #define	RUN_MASK ((1U<<RUN_BITS)-1)
340 
341 
342 /*
343  * Architecture-specific macros
344  */
345 #if LZ4_ARCH64
346 #define	STEPSIZE 8
347 #define	UARCH U64
348 #define	AARCH A64
349 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
350 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
351 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
352 #define	HTYPE U32
353 #define	INITBASE(base)		const BYTE* const base = ip
354 #else /* !LZ4_ARCH64 */
355 #define	STEPSIZE 4
356 #define	UARCH U32
357 #define	AARCH A32
358 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
359 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
360 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
361 #define	HTYPE const BYTE *
362 #define	INITBASE(base)		const int base = 0
363 #endif /* !LZ4_ARCH64 */
364 
365 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
366 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
367 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
368 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
369 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
370 #else
371 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
372 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
373 #endif
374 
375 
376 /* Local structures */
377 struct refTables {
378 	HTYPE hashTable[HASHTABLESIZE];
379 };
380 
381 
382 /* Macros */
383 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
384 	HASH_LOG))
385 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
386 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
387 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
388 	d = e; }
389 
390 
391 /* Private functions */
392 #if LZ4_ARCH64
393 
394 static inline int
LZ4_NbCommonBytes(register U64 val)395 LZ4_NbCommonBytes(register U64 val)
396 {
397 #if defined(LZ4_BIG_ENDIAN)
398 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
399 	!defined(LZ4_FORCE_SW_BITCOUNT)
400 	return (__builtin_clzll(val) >> 3);
401 #else
402 	int r;
403 	if (!(val >> 32)) {
404 		r = 4;
405 	} else {
406 		r = 0;
407 		val >>= 32;
408 	}
409 	if (!(val >> 16)) {
410 		r += 2;
411 		val >>= 8;
412 	} else {
413 		val >>= 24;
414 	}
415 	r += (!val);
416 	return (r);
417 #endif
418 #else
419 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
420 	!defined(LZ4_FORCE_SW_BITCOUNT)
421 	return (__builtin_ctzll(val) >> 3);
422 #else
423 	static const int DeBruijnBytePos[64] =
424 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
425 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
426 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
427 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
428 	};
429 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
430 	    58];
431 #endif
432 #endif
433 }
434 
435 #else
436 
437 static inline int
LZ4_NbCommonBytes(register U32 val)438 LZ4_NbCommonBytes(register U32 val)
439 {
440 #if defined(LZ4_BIG_ENDIAN)
441 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
442 	!defined(LZ4_FORCE_SW_BITCOUNT)
443 	return (__builtin_clz(val) >> 3);
444 #else
445 	int r;
446 	if (!(val >> 16)) {
447 		r = 2;
448 		val >>= 8;
449 	} else {
450 		r = 0;
451 		val >>= 24;
452 	}
453 	r += (!val);
454 	return (r);
455 #endif
456 #else
457 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
458 	!defined(LZ4_FORCE_SW_BITCOUNT)
459 	return (__builtin_ctz(val) >> 3);
460 #else
461 	static const int DeBruijnBytePos[32] = {
462 		0, 0, 3, 0, 3, 1, 3, 0,
463 		3, 2, 2, 1, 3, 2, 0, 1,
464 		3, 3, 1, 2, 2, 2, 2, 0,
465 		3, 1, 2, 0, 1, 0, 1, 1
466 	};
467 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
468 	    27];
469 #endif
470 #endif
471 }
472 
473 #endif
474 
475 /* Compression functions */
476 
477 static int
LZ4_compressCtx(void * ctx,const char * source,char * dest,int isize,int osize)478 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
479     int osize)
480 {
481 	struct refTables *srt = (struct refTables *)ctx;
482 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
483 
484 	const BYTE *ip = (BYTE *) source;
485 	INITBASE(base);
486 	const BYTE *anchor = ip;
487 	const BYTE *const iend = ip + isize;
488 	const BYTE *const oend = (BYTE *) dest + osize;
489 	const BYTE *const mflimit = iend - MFLIMIT;
490 #define	matchlimit (iend - LASTLITERALS)
491 
492 	BYTE *op = (BYTE *) dest;
493 
494 	int len, length;
495 	const int skipStrength = SKIPSTRENGTH;
496 	U32 forwardH;
497 
498 
499 	/* Init */
500 	if (isize < MINLENGTH)
501 		goto _last_literals;
502 
503 	/* First Byte */
504 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
505 	ip++;
506 	forwardH = LZ4_HASH_VALUE(ip);
507 
508 	/* Main Loop */
509 	for (;;) {
510 		int findMatchAttempts = (1U << skipStrength) + 3;
511 		const BYTE *forwardIp = ip;
512 		const BYTE *ref;
513 		BYTE *token;
514 
515 		/* Find a match */
516 		do {
517 			U32 h = forwardH;
518 			int step = findMatchAttempts++ >> skipStrength;
519 			ip = forwardIp;
520 			forwardIp = ip + step;
521 
522 			if (unlikely(forwardIp > mflimit)) {
523 				goto _last_literals;
524 			}
525 
526 			forwardH = LZ4_HASH_VALUE(forwardIp);
527 			ref = base + HashTable[h];
528 			HashTable[h] = ip - base;
529 
530 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
531 
532 		/* Catch up */
533 		while ((ip > anchor) && (ref > (BYTE *) source) &&
534 		    unlikely(ip[-1] == ref[-1])) {
535 			ip--;
536 			ref--;
537 		}
538 
539 		/* Encode Literal length */
540 		length = ip - anchor;
541 		token = op++;
542 
543 		/* Check output limit */
544 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
545 		    (length >> 8) > oend))
546 			return (0);
547 
548 		if (length >= (int)RUN_MASK) {
549 			*token = (RUN_MASK << ML_BITS);
550 			len = length - RUN_MASK;
551 			for (; len > 254; len -= 255)
552 				*op++ = 255;
553 			*op++ = (BYTE)len;
554 		} else
555 			*token = (length << ML_BITS);
556 
557 		/* Copy Literals */
558 		LZ4_BLINDCOPY(anchor, op, length);
559 
560 		_next_match:
561 		/* Encode Offset */
562 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
563 
564 		/* Start Counting */
565 		ip += MINMATCH;
566 		ref += MINMATCH;	/* MinMatch verified */
567 		anchor = ip;
568 		while (likely(ip < matchlimit - (STEPSIZE - 1))) {
569 			UARCH diff = AARCH(ref) ^ AARCH(ip);
570 			if (!diff) {
571 				ip += STEPSIZE;
572 				ref += STEPSIZE;
573 				continue;
574 			}
575 			ip += LZ4_NbCommonBytes(diff);
576 			goto _endCount;
577 		}
578 #if LZ4_ARCH64
579 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
580 			ip += 4;
581 			ref += 4;
582 		}
583 #endif
584 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
585 			ip += 2;
586 			ref += 2;
587 		}
588 		if ((ip < matchlimit) && (*ref == *ip))
589 			ip++;
590 		_endCount:
591 
592 		/* Encode MatchLength */
593 		len = (ip - anchor);
594 		/* Check output limit */
595 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
596 			return (0);
597 		if (len >= (int)ML_MASK) {
598 			*token += ML_MASK;
599 			len -= ML_MASK;
600 			for (; len > 509; len -= 510) {
601 				*op++ = 255;
602 				*op++ = 255;
603 			}
604 			if (len > 254) {
605 				len -= 255;
606 				*op++ = 255;
607 			}
608 			*op++ = (BYTE)len;
609 		} else
610 			*token += len;
611 
612 		/* Test end of chunk */
613 		if (ip > mflimit) {
614 			anchor = ip;
615 			break;
616 		}
617 		/* Fill table */
618 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
619 
620 		/* Test next position */
621 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
622 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
623 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
624 			token = op++;
625 			*token = 0;
626 			goto _next_match;
627 		}
628 		/* Prepare next loop */
629 		anchor = ip++;
630 		forwardH = LZ4_HASH_VALUE(ip);
631 	}
632 
633 	_last_literals:
634 	/* Encode Last Literals */
635 	{
636 		int lastRun = iend - anchor;
637 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
638 		    oend)
639 			return (0);
640 		if (lastRun >= (int)RUN_MASK) {
641 			*op++ = (RUN_MASK << ML_BITS);
642 			lastRun -= RUN_MASK;
643 			for (; lastRun > 254; lastRun -= 255) {
644 				*op++ = 255;
645 			}
646 			*op++ = (BYTE)lastRun;
647 		} else
648 			*op++ = (lastRun << ML_BITS);
649 		(void) memcpy(op, anchor, iend - anchor);
650 		op += iend - anchor;
651 	}
652 
653 	/* End */
654 	return (int)(((char *)op) - dest);
655 }
656 
657 
658 
659 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
660 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
661 #define	HASHLOG64K (HASH_LOG + 1)
662 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
663 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
664 	HASHLOG64K))
665 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
666 
667 static int
LZ4_compress64kCtx(void * ctx,const char * source,char * dest,int isize,int osize)668 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
669     int osize)
670 {
671 	struct refTables *srt = (struct refTables *)ctx;
672 	U16 *HashTable = (U16 *) (srt->hashTable);
673 
674 	const BYTE *ip = (BYTE *) source;
675 	const BYTE *anchor = ip;
676 	const BYTE *const base = ip;
677 	const BYTE *const iend = ip + isize;
678 	const BYTE *const oend = (BYTE *) dest + osize;
679 	const BYTE *const mflimit = iend - MFLIMIT;
680 #define	matchlimit (iend - LASTLITERALS)
681 
682 	BYTE *op = (BYTE *) dest;
683 
684 	int len, length;
685 	const int skipStrength = SKIPSTRENGTH;
686 	U32 forwardH;
687 
688 	/* Init */
689 	if (isize < MINLENGTH)
690 		goto _last_literals;
691 
692 	/* First Byte */
693 	ip++;
694 	forwardH = LZ4_HASH64K_VALUE(ip);
695 
696 	/* Main Loop */
697 	for (;;) {
698 		int findMatchAttempts = (1U << skipStrength) + 3;
699 		const BYTE *forwardIp = ip;
700 		const BYTE *ref;
701 		BYTE *token;
702 
703 		/* Find a match */
704 		do {
705 			U32 h = forwardH;
706 			int step = findMatchAttempts++ >> skipStrength;
707 			ip = forwardIp;
708 			forwardIp = ip + step;
709 
710 			if (forwardIp > mflimit) {
711 				goto _last_literals;
712 			}
713 
714 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
715 			ref = base + HashTable[h];
716 			HashTable[h] = ip - base;
717 
718 		} while (A32(ref) != A32(ip));
719 
720 		/* Catch up */
721 		while ((ip > anchor) && (ref > (BYTE *) source) &&
722 		    (ip[-1] == ref[-1])) {
723 			ip--;
724 			ref--;
725 		}
726 
727 		/* Encode Literal length */
728 		length = ip - anchor;
729 		token = op++;
730 
731 		/* Check output limit */
732 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
733 		    (length >> 8) > oend))
734 			return (0);
735 
736 		if (length >= (int)RUN_MASK) {
737 			*token = (RUN_MASK << ML_BITS);
738 			len = length - RUN_MASK;
739 			for (; len > 254; len -= 255)
740 				*op++ = 255;
741 			*op++ = (BYTE)len;
742 		} else
743 			*token = (length << ML_BITS);
744 
745 		/* Copy Literals */
746 		LZ4_BLINDCOPY(anchor, op, length);
747 
748 		_next_match:
749 		/* Encode Offset */
750 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
751 
752 		/* Start Counting */
753 		ip += MINMATCH;
754 		ref += MINMATCH;	/* MinMatch verified */
755 		anchor = ip;
756 		while (ip < matchlimit - (STEPSIZE - 1)) {
757 			UARCH diff = AARCH(ref) ^ AARCH(ip);
758 			if (!diff) {
759 				ip += STEPSIZE;
760 				ref += STEPSIZE;
761 				continue;
762 			}
763 			ip += LZ4_NbCommonBytes(diff);
764 			goto _endCount;
765 		}
766 #if LZ4_ARCH64
767 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
768 			ip += 4;
769 			ref += 4;
770 		}
771 #endif
772 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
773 			ip += 2;
774 			ref += 2;
775 		}
776 		if ((ip < matchlimit) && (*ref == *ip))
777 			ip++;
778 		_endCount:
779 
780 		/* Encode MatchLength */
781 		len = (ip - anchor);
782 		/* Check output limit */
783 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
784 			return (0);
785 		if (len >= (int)ML_MASK) {
786 			*token += ML_MASK;
787 			len -= ML_MASK;
788 			for (; len > 509; len -= 510) {
789 				*op++ = 255;
790 				*op++ = 255;
791 			}
792 			if (len > 254) {
793 				len -= 255;
794 				*op++ = 255;
795 			}
796 			*op++ = (BYTE)len;
797 		} else
798 			*token += len;
799 
800 		/* Test end of chunk */
801 		if (ip > mflimit) {
802 			anchor = ip;
803 			break;
804 		}
805 		/* Fill table */
806 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
807 
808 		/* Test next position */
809 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
810 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
811 		if (A32(ref) == A32(ip)) {
812 			token = op++;
813 			*token = 0;
814 			goto _next_match;
815 		}
816 		/* Prepare next loop */
817 		anchor = ip++;
818 		forwardH = LZ4_HASH64K_VALUE(ip);
819 	}
820 
821 	_last_literals:
822 	/* Encode Last Literals */
823 	{
824 		int lastRun = iend - anchor;
825 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
826 		    oend)
827 			return (0);
828 		if (lastRun >= (int)RUN_MASK) {
829 			*op++ = (RUN_MASK << ML_BITS);
830 			lastRun -= RUN_MASK;
831 			for (; lastRun > 254; lastRun -= 255)
832 				*op++ = 255;
833 			*op++ = (BYTE)lastRun;
834 		} else
835 			*op++ = (lastRun << ML_BITS);
836 		(void) memcpy(op, anchor, iend - anchor);
837 		op += iend - anchor;
838 	}
839 
840 	/* End */
841 	return (int)(((char *)op) - dest);
842 }
843 
844 static int
real_LZ4_compress(const char * source,char * dest,int isize,int osize)845 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
846 {
847 	void *ctx;
848 	int result;
849 
850 	ctx = lz4_alloc(KM_SLEEP);
851 
852 	/*
853 	 * out of kernel memory, gently fall through - this will disable
854 	 * compression in zio_compress_data
855 	 */
856 	if (ctx == NULL)
857 		return (0);
858 
859 	memset(ctx, 0, sizeof (struct refTables));
860 
861 	if (isize < LZ4_64KLIMIT)
862 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
863 	else
864 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
865 
866 	lz4_free(ctx);
867 	return (result);
868 }
869 
870 #ifdef __FreeBSD__
871 /*
872  * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
873  * Should struct refTables get resized this may need to be revisited, hence
874  * compiler-time asserts.
875  */
876 _Static_assert(sizeof(struct refTables) <= 16384,
877     "refTables too big for malloc");
878 _Static_assert((sizeof(struct refTables) % 4096) == 0,
879     "refTables not a multiple of page size");
880 #else
881 #define ZFS_LZ4_USE_CACHE
882 #endif
883 
884 #ifdef ZFS_LZ4_USE_CACHE
885 static kmem_cache_t *lz4_cache;
886 #endif
887 
888 #ifdef ZFS_LZ4_USE_CACHE
889 void
lz4_init(void)890 lz4_init(void)
891 {
892 	lz4_cache = kmem_cache_create("lz4_cache",
893 	    sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL,
894 	    KMC_RECLAIMABLE);
895 }
896 
897 void
lz4_fini(void)898 lz4_fini(void)
899 {
900 	if (lz4_cache) {
901 		kmem_cache_destroy(lz4_cache);
902 		lz4_cache = NULL;
903 	}
904 }
905 
906 static void *
lz4_alloc(int flags)907 lz4_alloc(int flags)
908 {
909 	ASSERT(lz4_cache != NULL);
910 	return (kmem_cache_alloc(lz4_cache, flags));
911 }
912 
913 static void
lz4_free(void * ctx)914 lz4_free(void *ctx)
915 {
916 	kmem_cache_free(lz4_cache, ctx);
917 }
918 #else
919 void
lz4_init(void)920 lz4_init(void)
921 {
922 }
923 
924 void
lz4_fini(void)925 lz4_fini(void)
926 {
927 }
928 
929 static void *
lz4_alloc(int flags)930 lz4_alloc(int flags)
931 {
932 	return (kmem_alloc(sizeof (struct refTables), flags));
933 }
934 
935 static void
lz4_free(void * ctx)936 lz4_free(void *ctx)
937 {
938 	kmem_free(ctx, sizeof (struct refTables));
939 }
940 #endif
941