xref: /freebsd/sys/contrib/openzfs/module/zfs/lz4.c (revision aebc96831f152a9da5f2c919103d571e80d3fade)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 
35 #include <sys/zfs_context.h>
36 #include <sys/zio_compress.h>
37 
38 static int real_LZ4_compress(const char *source, char *dest, int isize,
39     int osize);
40 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
41     int isize, int maxOutputSize);
42 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
43     int isize, int osize);
44 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
45     int isize, int osize);
46 
47 static void *lz4_alloc(int flags);
48 static void lz4_free(void *ctx);
49 
50 /*ARGSUSED*/
51 size_t
52 lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
53     size_t d_len, int n)
54 {
55 	uint32_t bufsiz;
56 	char *dest = d_start;
57 
58 	ASSERT(d_len >= sizeof (bufsiz));
59 
60 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
61 	    d_len - sizeof (bufsiz));
62 
63 	/* Signal an error if the compression routine returned zero. */
64 	if (bufsiz == 0)
65 		return (s_len);
66 
67 	/*
68 	 * The exact compressed size is needed by the decompression routine,
69 	 * so it is stored at the start of the buffer. Note that this may be
70 	 * less than the compressed block size, which is rounded up to a
71 	 * multiple of 1<<ashift.
72 	 */
73 	*(uint32_t *)dest = BE_32(bufsiz);
74 
75 	return (bufsiz + sizeof (bufsiz));
76 }
77 
78 /*ARGSUSED*/
79 int
80 lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
81     size_t d_len, int n)
82 {
83 	const char *src = s_start;
84 	uint32_t bufsiz = BE_IN32(src);
85 
86 	/* invalid compressed buffer size encoded at start */
87 	if (bufsiz + sizeof (bufsiz) > s_len)
88 		return (1);
89 
90 	/*
91 	 * Returns 0 on success (decompression function returned non-negative)
92 	 * and non-zero on failure (decompression function returned negative).
93 	 */
94 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
95 	    d_start, bufsiz, d_len) < 0);
96 }
97 
98 /*
99  * LZ4 API Description:
100  *
101  * Simple Functions:
102  * real_LZ4_compress() :
103  * 	isize  : is the input size. Max supported value is ~1.9GB
104  * 	return : the number of bytes written in buffer dest
105  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
106  * 	note : destination buffer must be already allocated.
107  * 		destination buffer must be sized to handle worst cases
108  * 		situations (input data not compressible) worst case size
109  * 		evaluation is provided by function LZ4_compressBound().
110  *
111  * real_LZ4_uncompress() :
112  * 	osize  : is the output size, therefore the original size
113  * 	return : the number of bytes read in the source buffer.
114  * 		If the source stream is malformed, the function will stop
115  * 		decoding and return a negative result, indicating the byte
116  * 		position of the faulty instruction. This function never
117  * 		writes beyond dest + osize, and is therefore protected
118  * 		against malicious data packets.
119  * 	note : destination buffer must be already allocated
120  *	note : real_LZ4_uncompress() is not used in ZFS so its code
121  *	       is not present here.
122  *
123  * Advanced Functions
124  *
125  * LZ4_compressBound() :
126  * 	Provides the maximum size that LZ4 may output in a "worst case"
127  * 	scenario (input data not compressible) primarily useful for memory
128  * 	allocation of output buffer.
129  *
130  * 	isize  : is the input size. Max supported value is ~1.9GB
131  * 	return : maximum output size in a "worst case" scenario
132  * 	note : this function is limited by "int" range (2^31-1)
133  *
134  * LZ4_uncompress_unknownOutputSize() :
135  * 	isize  : is the input size, therefore the compressed size
136  * 	maxOutputSize : is the size of the destination buffer (which must be
137  * 		already allocated)
138  * 	return : the number of bytes decoded in the destination buffer
139  * 		(necessarily <= maxOutputSize). If the source stream is
140  * 		malformed, the function will stop decoding and return a
141  * 		negative result, indicating the byte position of the faulty
142  * 		instruction. This function never writes beyond dest +
143  * 		maxOutputSize, and is therefore protected against malicious
144  * 		data packets.
145  * 	note   : Destination buffer must be already allocated.
146  *		This version is slightly slower than real_LZ4_uncompress()
147  *
148  * LZ4_compressCtx() :
149  * 	This function explicitly handles the CTX memory structure.
150  *
151  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
152  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
153  * 	NULL isn't valid.
154  *
155  * LZ4_compress64kCtx() :
156  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
157  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
158  *
159  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
160  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
161  * 	NULL isn't valid.
162  */
163 
164 /*
165  * Tuning parameters
166  */
167 
168 /*
169  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
170  *	 Lowering this value reduces memory usage. Reduced memory usage
171  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
172  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
173  *	(examples : 12 -> 16KB ; 17 -> 512KB)
174  */
175 #define	COMPRESSIONLEVEL 12
176 
177 /*
178  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
179  *	algorithm skip faster data segments considered "incompressible".
180  *	This may decrease compression ratio dramatically, but will be
181  *	faster on incompressible data. Increasing this value will make
182  *	the algorithm search more before declaring a segment "incompressible".
183  *	This could improve compression a bit, but will be slower on
184  *	incompressible data. The default value (6) is recommended.
185  */
186 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
187 
188 /*
189  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
190  * performance for big endian cpu, but the resulting compressed stream
191  * will be incompatible with little-endian CPU. You can set this option
192  * to 1 in situations where data will stay within closed environment.
193  * This option is useless on Little_Endian CPU (such as x86).
194  */
195 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
196 
197 /*
198  * CPU Feature Detection
199  */
200 
201 /* 32 or 64 bits ? */
202 #if defined(_LP64)
203 #define	LZ4_ARCH64 1
204 #else
205 #define	LZ4_ARCH64 0
206 #endif
207 
208 /*
209  * Little Endian or Big Endian?
210  * Note: overwrite the below #define if you know your architecture endianness.
211  */
212 #if defined(_ZFS_BIG_ENDIAN)
213 #define	LZ4_BIG_ENDIAN 1
214 #else
215 /*
216  * Little Endian assumed. PDP Endian and other very rare endian format
217  * are unsupported.
218  */
219 #undef LZ4_BIG_ENDIAN
220 #endif
221 
222 /*
223  * Unaligned memory access is automatically enabled for "common" CPU,
224  * such as x86. For others CPU, the compiler will be more cautious, and
225  * insert extra code to ensure aligned access is respected. If you know
226  * your target CPU supports unaligned memory access, you may want to
227  * force this option manually to improve performance
228  */
229 #if defined(__ARM_FEATURE_UNALIGNED)
230 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
231 #endif
232 
233 /*
234  * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
235  * kernel
236  * Linux : we can use GCC's __builtin_ctz family of builtins in the
237  * kernel
238  */
239 #undef	LZ4_FORCE_SW_BITCOUNT
240 #if defined(__sparc)
241 #define	LZ4_FORCE_SW_BITCOUNT
242 #endif
243 
244 /*
245  * Compiler Options
246  */
247 /* Disable restrict */
248 #define	restrict
249 
250 /*
251  * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
252  * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
253  */
254 #ifdef GCC_VERSION
255 #undef GCC_VERSION
256 #endif
257 
258 #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
259 
260 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
261 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
262 #else
263 #define	expect(expr, value)    (expr)
264 #endif
265 
266 #ifndef likely
267 #define	likely(expr)	expect((expr) != 0, 1)
268 #endif
269 
270 #ifndef unlikely
271 #define	unlikely(expr)	expect((expr) != 0, 0)
272 #endif
273 
274 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
275 	(((x) & 0xffu) << 8)))
276 
277 /* Basic types */
278 #define	BYTE	uint8_t
279 #define	U16	uint16_t
280 #define	U32	uint32_t
281 #define	S32	int32_t
282 #define	U64	uint64_t
283 
284 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
285 #pragma pack(1)
286 #endif
287 
288 typedef struct _U16_S {
289 	U16 v;
290 } U16_S;
291 typedef struct _U32_S {
292 	U32 v;
293 } U32_S;
294 typedef struct _U64_S {
295 	U64 v;
296 } U64_S;
297 
298 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
299 #pragma pack()
300 #endif
301 
302 #define	A64(x) (((U64_S *)(x))->v)
303 #define	A32(x) (((U32_S *)(x))->v)
304 #define	A16(x) (((U16_S *)(x))->v)
305 
306 /*
307  * Constants
308  */
309 #define	MINMATCH 4
310 
311 #define	HASH_LOG COMPRESSIONLEVEL
312 #define	HASHTABLESIZE (1 << HASH_LOG)
313 #define	HASH_MASK (HASHTABLESIZE - 1)
314 
315 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
316 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
317 
318 #define	COPYLENGTH 8
319 #define	LASTLITERALS 5
320 #define	MFLIMIT (COPYLENGTH + MINMATCH)
321 #define	MINLENGTH (MFLIMIT + 1)
322 
323 #define	MAXD_LOG 16
324 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
325 
326 #define	ML_BITS 4
327 #define	ML_MASK ((1U<<ML_BITS)-1)
328 #define	RUN_BITS (8-ML_BITS)
329 #define	RUN_MASK ((1U<<RUN_BITS)-1)
330 
331 
332 /*
333  * Architecture-specific macros
334  */
335 #if LZ4_ARCH64
336 #define	STEPSIZE 8
337 #define	UARCH U64
338 #define	AARCH A64
339 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
340 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
341 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
342 #define	HTYPE U32
343 #define	INITBASE(base)		const BYTE* const base = ip
344 #else /* !LZ4_ARCH64 */
345 #define	STEPSIZE 4
346 #define	UARCH U32
347 #define	AARCH A32
348 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
349 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
350 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
351 #define	HTYPE const BYTE *
352 #define	INITBASE(base)		const int base = 0
353 #endif /* !LZ4_ARCH64 */
354 
355 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
356 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
357 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
358 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
359 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
360 #else
361 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
362 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
363 #endif
364 
365 
366 /* Local structures */
367 struct refTables {
368 	HTYPE hashTable[HASHTABLESIZE];
369 };
370 
371 
372 /* Macros */
373 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
374 	HASH_LOG))
375 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
376 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
377 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
378 	d = e; }
379 
380 
381 /* Private functions */
382 #if LZ4_ARCH64
383 
384 static inline int
385 LZ4_NbCommonBytes(register U64 val)
386 {
387 #if defined(LZ4_BIG_ENDIAN)
388 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
389 	!defined(LZ4_FORCE_SW_BITCOUNT)
390 	return (__builtin_clzll(val) >> 3);
391 #else
392 	int r;
393 	if (!(val >> 32)) {
394 		r = 4;
395 	} else {
396 		r = 0;
397 		val >>= 32;
398 	}
399 	if (!(val >> 16)) {
400 		r += 2;
401 		val >>= 8;
402 	} else {
403 		val >>= 24;
404 	}
405 	r += (!val);
406 	return (r);
407 #endif
408 #else
409 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
410 	!defined(LZ4_FORCE_SW_BITCOUNT)
411 	return (__builtin_ctzll(val) >> 3);
412 #else
413 	static const int DeBruijnBytePos[64] =
414 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
415 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
416 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
417 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
418 	};
419 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
420 	    58];
421 #endif
422 #endif
423 }
424 
425 #else
426 
427 static inline int
428 LZ4_NbCommonBytes(register U32 val)
429 {
430 #if defined(LZ4_BIG_ENDIAN)
431 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
432 	!defined(LZ4_FORCE_SW_BITCOUNT)
433 	return (__builtin_clz(val) >> 3);
434 #else
435 	int r;
436 	if (!(val >> 16)) {
437 		r = 2;
438 		val >>= 8;
439 	} else {
440 		r = 0;
441 		val >>= 24;
442 	}
443 	r += (!val);
444 	return (r);
445 #endif
446 #else
447 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
448 	!defined(LZ4_FORCE_SW_BITCOUNT)
449 	return (__builtin_ctz(val) >> 3);
450 #else
451 	static const int DeBruijnBytePos[32] = {
452 		0, 0, 3, 0, 3, 1, 3, 0,
453 		3, 2, 2, 1, 3, 2, 0, 1,
454 		3, 3, 1, 2, 2, 2, 2, 0,
455 		3, 1, 2, 0, 1, 0, 1, 1
456 	};
457 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
458 	    27];
459 #endif
460 #endif
461 }
462 
463 #endif
464 
465 /* Compression functions */
466 
467 /*ARGSUSED*/
468 static int
469 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
470     int osize)
471 {
472 	struct refTables *srt = (struct refTables *)ctx;
473 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
474 
475 	const BYTE *ip = (BYTE *) source;
476 	INITBASE(base);
477 	const BYTE *anchor = ip;
478 	const BYTE *const iend = ip + isize;
479 	const BYTE *const oend = (BYTE *) dest + osize;
480 	const BYTE *const mflimit = iend - MFLIMIT;
481 #define	matchlimit (iend - LASTLITERALS)
482 
483 	BYTE *op = (BYTE *) dest;
484 
485 	int len, length;
486 	const int skipStrength = SKIPSTRENGTH;
487 	U32 forwardH;
488 
489 
490 	/* Init */
491 	if (isize < MINLENGTH)
492 		goto _last_literals;
493 
494 	/* First Byte */
495 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
496 	ip++;
497 	forwardH = LZ4_HASH_VALUE(ip);
498 
499 	/* Main Loop */
500 	for (;;) {
501 		int findMatchAttempts = (1U << skipStrength) + 3;
502 		const BYTE *forwardIp = ip;
503 		const BYTE *ref;
504 		BYTE *token;
505 
506 		/* Find a match */
507 		do {
508 			U32 h = forwardH;
509 			int step = findMatchAttempts++ >> skipStrength;
510 			ip = forwardIp;
511 			forwardIp = ip + step;
512 
513 			if (unlikely(forwardIp > mflimit)) {
514 				goto _last_literals;
515 			}
516 
517 			forwardH = LZ4_HASH_VALUE(forwardIp);
518 			ref = base + HashTable[h];
519 			HashTable[h] = ip - base;
520 
521 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
522 
523 		/* Catch up */
524 		while ((ip > anchor) && (ref > (BYTE *) source) &&
525 		    unlikely(ip[-1] == ref[-1])) {
526 			ip--;
527 			ref--;
528 		}
529 
530 		/* Encode Literal length */
531 		length = ip - anchor;
532 		token = op++;
533 
534 		/* Check output limit */
535 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
536 		    (length >> 8) > oend))
537 			return (0);
538 
539 		if (length >= (int)RUN_MASK) {
540 			*token = (RUN_MASK << ML_BITS);
541 			len = length - RUN_MASK;
542 			for (; len > 254; len -= 255)
543 				*op++ = 255;
544 			*op++ = (BYTE)len;
545 		} else
546 			*token = (length << ML_BITS);
547 
548 		/* Copy Literals */
549 		LZ4_BLINDCOPY(anchor, op, length);
550 
551 		_next_match:
552 		/* Encode Offset */
553 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
554 
555 		/* Start Counting */
556 		ip += MINMATCH;
557 		ref += MINMATCH;	/* MinMatch verified */
558 		anchor = ip;
559 		while (likely(ip < matchlimit - (STEPSIZE - 1))) {
560 			UARCH diff = AARCH(ref) ^ AARCH(ip);
561 			if (!diff) {
562 				ip += STEPSIZE;
563 				ref += STEPSIZE;
564 				continue;
565 			}
566 			ip += LZ4_NbCommonBytes(diff);
567 			goto _endCount;
568 		}
569 #if LZ4_ARCH64
570 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
571 			ip += 4;
572 			ref += 4;
573 		}
574 #endif
575 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
576 			ip += 2;
577 			ref += 2;
578 		}
579 		if ((ip < matchlimit) && (*ref == *ip))
580 			ip++;
581 		_endCount:
582 
583 		/* Encode MatchLength */
584 		len = (ip - anchor);
585 		/* Check output limit */
586 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
587 			return (0);
588 		if (len >= (int)ML_MASK) {
589 			*token += ML_MASK;
590 			len -= ML_MASK;
591 			for (; len > 509; len -= 510) {
592 				*op++ = 255;
593 				*op++ = 255;
594 			}
595 			if (len > 254) {
596 				len -= 255;
597 				*op++ = 255;
598 			}
599 			*op++ = (BYTE)len;
600 		} else
601 			*token += len;
602 
603 		/* Test end of chunk */
604 		if (ip > mflimit) {
605 			anchor = ip;
606 			break;
607 		}
608 		/* Fill table */
609 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
610 
611 		/* Test next position */
612 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
613 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
614 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
615 			token = op++;
616 			*token = 0;
617 			goto _next_match;
618 		}
619 		/* Prepare next loop */
620 		anchor = ip++;
621 		forwardH = LZ4_HASH_VALUE(ip);
622 	}
623 
624 	_last_literals:
625 	/* Encode Last Literals */
626 	{
627 		int lastRun = iend - anchor;
628 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
629 		    oend)
630 			return (0);
631 		if (lastRun >= (int)RUN_MASK) {
632 			*op++ = (RUN_MASK << ML_BITS);
633 			lastRun -= RUN_MASK;
634 			for (; lastRun > 254; lastRun -= 255) {
635 				*op++ = 255;
636 			}
637 			*op++ = (BYTE)lastRun;
638 		} else
639 			*op++ = (lastRun << ML_BITS);
640 		(void) memcpy(op, anchor, iend - anchor);
641 		op += iend - anchor;
642 	}
643 
644 	/* End */
645 	return (int)(((char *)op) - dest);
646 }
647 
648 
649 
650 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
651 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
652 #define	HASHLOG64K (HASH_LOG + 1)
653 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
654 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
655 	HASHLOG64K))
656 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
657 
658 /*ARGSUSED*/
659 static int
660 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
661     int osize)
662 {
663 	struct refTables *srt = (struct refTables *)ctx;
664 	U16 *HashTable = (U16 *) (srt->hashTable);
665 
666 	const BYTE *ip = (BYTE *) source;
667 	const BYTE *anchor = ip;
668 	const BYTE *const base = ip;
669 	const BYTE *const iend = ip + isize;
670 	const BYTE *const oend = (BYTE *) dest + osize;
671 	const BYTE *const mflimit = iend - MFLIMIT;
672 #define	matchlimit (iend - LASTLITERALS)
673 
674 	BYTE *op = (BYTE *) dest;
675 
676 	int len, length;
677 	const int skipStrength = SKIPSTRENGTH;
678 	U32 forwardH;
679 
680 	/* Init */
681 	if (isize < MINLENGTH)
682 		goto _last_literals;
683 
684 	/* First Byte */
685 	ip++;
686 	forwardH = LZ4_HASH64K_VALUE(ip);
687 
688 	/* Main Loop */
689 	for (;;) {
690 		int findMatchAttempts = (1U << skipStrength) + 3;
691 		const BYTE *forwardIp = ip;
692 		const BYTE *ref;
693 		BYTE *token;
694 
695 		/* Find a match */
696 		do {
697 			U32 h = forwardH;
698 			int step = findMatchAttempts++ >> skipStrength;
699 			ip = forwardIp;
700 			forwardIp = ip + step;
701 
702 			if (forwardIp > mflimit) {
703 				goto _last_literals;
704 			}
705 
706 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
707 			ref = base + HashTable[h];
708 			HashTable[h] = ip - base;
709 
710 		} while (A32(ref) != A32(ip));
711 
712 		/* Catch up */
713 		while ((ip > anchor) && (ref > (BYTE *) source) &&
714 		    (ip[-1] == ref[-1])) {
715 			ip--;
716 			ref--;
717 		}
718 
719 		/* Encode Literal length */
720 		length = ip - anchor;
721 		token = op++;
722 
723 		/* Check output limit */
724 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
725 		    (length >> 8) > oend))
726 			return (0);
727 
728 		if (length >= (int)RUN_MASK) {
729 			*token = (RUN_MASK << ML_BITS);
730 			len = length - RUN_MASK;
731 			for (; len > 254; len -= 255)
732 				*op++ = 255;
733 			*op++ = (BYTE)len;
734 		} else
735 			*token = (length << ML_BITS);
736 
737 		/* Copy Literals */
738 		LZ4_BLINDCOPY(anchor, op, length);
739 
740 		_next_match:
741 		/* Encode Offset */
742 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
743 
744 		/* Start Counting */
745 		ip += MINMATCH;
746 		ref += MINMATCH;	/* MinMatch verified */
747 		anchor = ip;
748 		while (ip < matchlimit - (STEPSIZE - 1)) {
749 			UARCH diff = AARCH(ref) ^ AARCH(ip);
750 			if (!diff) {
751 				ip += STEPSIZE;
752 				ref += STEPSIZE;
753 				continue;
754 			}
755 			ip += LZ4_NbCommonBytes(diff);
756 			goto _endCount;
757 		}
758 #if LZ4_ARCH64
759 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
760 			ip += 4;
761 			ref += 4;
762 		}
763 #endif
764 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
765 			ip += 2;
766 			ref += 2;
767 		}
768 		if ((ip < matchlimit) && (*ref == *ip))
769 			ip++;
770 		_endCount:
771 
772 		/* Encode MatchLength */
773 		len = (ip - anchor);
774 		/* Check output limit */
775 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
776 			return (0);
777 		if (len >= (int)ML_MASK) {
778 			*token += ML_MASK;
779 			len -= ML_MASK;
780 			for (; len > 509; len -= 510) {
781 				*op++ = 255;
782 				*op++ = 255;
783 			}
784 			if (len > 254) {
785 				len -= 255;
786 				*op++ = 255;
787 			}
788 			*op++ = (BYTE)len;
789 		} else
790 			*token += len;
791 
792 		/* Test end of chunk */
793 		if (ip > mflimit) {
794 			anchor = ip;
795 			break;
796 		}
797 		/* Fill table */
798 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
799 
800 		/* Test next position */
801 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
802 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
803 		if (A32(ref) == A32(ip)) {
804 			token = op++;
805 			*token = 0;
806 			goto _next_match;
807 		}
808 		/* Prepare next loop */
809 		anchor = ip++;
810 		forwardH = LZ4_HASH64K_VALUE(ip);
811 	}
812 
813 	_last_literals:
814 	/* Encode Last Literals */
815 	{
816 		int lastRun = iend - anchor;
817 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
818 		    oend)
819 			return (0);
820 		if (lastRun >= (int)RUN_MASK) {
821 			*op++ = (RUN_MASK << ML_BITS);
822 			lastRun -= RUN_MASK;
823 			for (; lastRun > 254; lastRun -= 255)
824 				*op++ = 255;
825 			*op++ = (BYTE)lastRun;
826 		} else
827 			*op++ = (lastRun << ML_BITS);
828 		(void) memcpy(op, anchor, iend - anchor);
829 		op += iend - anchor;
830 	}
831 
832 	/* End */
833 	return (int)(((char *)op) - dest);
834 }
835 
836 static int
837 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
838 {
839 	void *ctx;
840 	int result;
841 
842 	ctx = lz4_alloc(KM_SLEEP);
843 
844 	/*
845 	 * out of kernel memory, gently fall through - this will disable
846 	 * compression in zio_compress_data
847 	 */
848 	if (ctx == NULL)
849 		return (0);
850 
851 	memset(ctx, 0, sizeof (struct refTables));
852 
853 	if (isize < LZ4_64KLIMIT)
854 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
855 	else
856 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
857 
858 	lz4_free(ctx);
859 	return (result);
860 }
861 
862 /* Decompression functions */
863 
864 /*
865  * Note: The decoding functions real_LZ4_uncompress() and
866  *	LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
867  *	attack type. They will never write nor read outside of the provided
868  *	output buffers. LZ4_uncompress_unknownOutputSize() also insures that
869  *	it will never read outside of the input buffer. A corrupted input
870  *	will produce an error result, a negative int, indicating the position
871  *	of the error within input stream.
872  *
873  * Note[2]: real_LZ4_uncompress(), referred to above, is not used in ZFS so
874  *	its code is not present here.
875  */
876 
877 static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
878 #if LZ4_ARCH64
879 static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
880 #endif
881 
882 static int
883 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
884     int maxOutputSize)
885 {
886 	/* Local Variables */
887 	const BYTE *restrict ip = (const BYTE *) source;
888 	const BYTE *const iend = ip + isize;
889 	const BYTE *ref;
890 
891 	BYTE *op = (BYTE *) dest;
892 	BYTE *const oend = op + maxOutputSize;
893 	BYTE *cpy;
894 
895 	/* Main Loop */
896 	while (ip < iend) {
897 		unsigned token;
898 		size_t length;
899 
900 		/* get runlength */
901 		token = *ip++;
902 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
903 			int s = 255;
904 			while ((ip < iend) && (s == 255)) {
905 				s = *ip++;
906 				if (unlikely(length > (size_t)(length + s)))
907 					goto _output_error;
908 				length += s;
909 			}
910 		}
911 		/* copy literals */
912 		cpy = op + length;
913 		/* CORNER-CASE: cpy might overflow. */
914 		if (cpy < op)
915 			goto _output_error;	/* cpy was overflowed, bail! */
916 		if ((cpy > oend - COPYLENGTH) ||
917 		    (ip + length > iend - COPYLENGTH)) {
918 			if (cpy > oend)
919 				/* Error: writes beyond output buffer */
920 				goto _output_error;
921 			if (ip + length != iend)
922 				/*
923 				 * Error: LZ4 format requires to consume all
924 				 * input at this stage
925 				 */
926 				goto _output_error;
927 			(void) memcpy(op, ip, length);
928 			op += length;
929 			/* Necessarily EOF, due to parsing restrictions */
930 			break;
931 		}
932 		LZ4_WILDCOPY(ip, op, cpy);
933 		ip -= (op - cpy);
934 		op = cpy;
935 
936 		/* get offset */
937 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
938 		ip += 2;
939 		if (ref < (BYTE * const) dest)
940 			/*
941 			 * Error: offset creates reference outside of
942 			 * destination buffer
943 			 */
944 			goto _output_error;
945 
946 		/* get matchlength */
947 		if ((length = (token & ML_MASK)) == ML_MASK) {
948 			while (ip < iend) {
949 				int s = *ip++;
950 				if (unlikely(length > (size_t)(length + s)))
951 					goto _output_error;
952 				length += s;
953 				if (s == 255)
954 					continue;
955 				break;
956 			}
957 		}
958 		/* copy repeated sequence */
959 		if (unlikely(op - ref < STEPSIZE)) {
960 #if LZ4_ARCH64
961 			int dec64 = dec64table[op - ref];
962 #else
963 			const int dec64 = 0;
964 #endif
965 			op[0] = ref[0];
966 			op[1] = ref[1];
967 			op[2] = ref[2];
968 			op[3] = ref[3];
969 			op += 4;
970 			ref += 4;
971 			ref -= dec32table[op - ref];
972 			A32(op) = A32(ref);
973 			op += STEPSIZE - 4;
974 			ref -= dec64;
975 		} else {
976 			LZ4_COPYSTEP(ref, op);
977 		}
978 		cpy = op + length - (STEPSIZE - 4);
979 		if (cpy > oend - COPYLENGTH) {
980 			if (cpy > oend)
981 				/*
982 				 * Error: request to write outside of
983 				 * destination buffer
984 				 */
985 				goto _output_error;
986 #if LZ4_ARCH64
987 			if ((ref + COPYLENGTH) > oend)
988 #else
989 			if ((ref + COPYLENGTH) > oend ||
990 			    (op + COPYLENGTH) > oend)
991 #endif
992 				goto _output_error;
993 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
994 			while (op < cpy)
995 				*op++ = *ref++;
996 			op = cpy;
997 			if (op == oend)
998 				/*
999 				 * Check EOF (should never happen, since
1000 				 * last 5 bytes are supposed to be literals)
1001 				 */
1002 				goto _output_error;
1003 			continue;
1004 		}
1005 		LZ4_SECURECOPY(ref, op, cpy);
1006 		op = cpy;	/* correction */
1007 	}
1008 
1009 	/* end of decoding */
1010 	return (int)(((char *)op) - dest);
1011 
1012 	/* write overflow error detected */
1013 	_output_error:
1014 	return (-1);
1015 }
1016 
1017 #ifdef __FreeBSD__
1018 /*
1019  * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
1020  * Should struct refTables get resized this may need to be revisited, hence
1021  * compiler-time asserts.
1022  */
1023 _Static_assert(sizeof(struct refTables) <= 16384,
1024     "refTables too big for malloc");
1025 _Static_assert((sizeof(struct refTables) % 4096) == 0,
1026     "refTables not a multiple of page size");
1027 #else
1028 #define ZFS_LZ4_USE_CACHE
1029 #endif
1030 
1031 #ifdef ZFS_LZ4_USE_CACHE
1032 static kmem_cache_t *lz4_cache;
1033 
1034 void
1035 lz4_init(void)
1036 {
1037 	lz4_cache = kmem_cache_create("lz4_cache",
1038 	    sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
1039 }
1040 
1041 void
1042 lz4_fini(void)
1043 {
1044 	if (lz4_cache) {
1045 		kmem_cache_destroy(lz4_cache);
1046 		lz4_cache = NULL;
1047 	}
1048 }
1049 
1050 static void *
1051 lz4_alloc(int flags)
1052 {
1053 	ASSERT(lz4_cache != NULL);
1054 	return (kmem_cache_alloc(lz4_cache, flags));
1055 }
1056 
1057 static void
1058 lz4_free(void *ctx)
1059 {
1060 	kmem_cache_free(lz4_cache, ctx);
1061 }
1062 #else
1063 void
1064 lz4_init(void)
1065 {
1066 }
1067 
1068 void
1069 lz4_fini(void)
1070 {
1071 }
1072 
1073 static void *
1074 lz4_alloc(int flags)
1075 {
1076 	return (kmem_alloc(sizeof (struct refTables), flags));
1077 }
1078 
1079 static void
1080 lz4_free(void *ctx)
1081 {
1082 	kmem_free(ctx, sizeof (struct refTables));
1083 }
1084 #endif
1085