xref: /freebsd/sys/contrib/openzfs/module/zfs/lz4.c (revision 6132212808e8dccedc9e5d85fea4390c2f38059a)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 
35 #include <sys/zfs_context.h>
36 #include <sys/zio_compress.h>
37 
38 static int real_LZ4_compress(const char *source, char *dest, int isize,
39     int osize);
40 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
41     int isize, int maxOutputSize);
42 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
43     int isize, int osize);
44 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
45     int isize, int osize);
46 
47 static kmem_cache_t *lz4_cache;
48 
49 /*ARGSUSED*/
50 size_t
51 lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
52     size_t d_len, int n)
53 {
54 	uint32_t bufsiz;
55 	char *dest = d_start;
56 
57 	ASSERT(d_len >= sizeof (bufsiz));
58 
59 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
60 	    d_len - sizeof (bufsiz));
61 
62 	/* Signal an error if the compression routine returned zero. */
63 	if (bufsiz == 0)
64 		return (s_len);
65 
66 	/*
67 	 * The exact compressed size is needed by the decompression routine,
68 	 * so it is stored at the start of the buffer. Note that this may be
69 	 * less than the compressed block size, which is rounded up to a
70 	 * multiple of 1<<ashift.
71 	 */
72 	*(uint32_t *)dest = BE_32(bufsiz);
73 
74 	return (bufsiz + sizeof (bufsiz));
75 }
76 
77 /*ARGSUSED*/
78 int
79 lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
80     size_t d_len, int n)
81 {
82 	const char *src = s_start;
83 	uint32_t bufsiz = BE_IN32(src);
84 
85 	/* invalid compressed buffer size encoded at start */
86 	if (bufsiz + sizeof (bufsiz) > s_len)
87 		return (1);
88 
89 	/*
90 	 * Returns 0 on success (decompression function returned non-negative)
91 	 * and non-zero on failure (decompression function returned negative).
92 	 */
93 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
94 	    d_start, bufsiz, d_len) < 0);
95 }
96 
97 /*
98  * LZ4 API Description:
99  *
100  * Simple Functions:
101  * real_LZ4_compress() :
102  * 	isize  : is the input size. Max supported value is ~1.9GB
103  * 	return : the number of bytes written in buffer dest
104  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
105  * 	note : destination buffer must be already allocated.
106  * 		destination buffer must be sized to handle worst cases
107  * 		situations (input data not compressible) worst case size
108  * 		evaluation is provided by function LZ4_compressBound().
109  *
110  * real_LZ4_uncompress() :
111  * 	osize  : is the output size, therefore the original size
112  * 	return : the number of bytes read in the source buffer.
113  * 		If the source stream is malformed, the function will stop
114  * 		decoding and return a negative result, indicating the byte
115  * 		position of the faulty instruction. This function never
116  * 		writes beyond dest + osize, and is therefore protected
117  * 		against malicious data packets.
118  * 	note : destination buffer must be already allocated
119  *	note : real_LZ4_uncompress() is not used in ZFS so its code
120  *	       is not present here.
121  *
122  * Advanced Functions
123  *
124  * LZ4_compressBound() :
125  * 	Provides the maximum size that LZ4 may output in a "worst case"
126  * 	scenario (input data not compressible) primarily useful for memory
127  * 	allocation of output buffer.
128  *
129  * 	isize  : is the input size. Max supported value is ~1.9GB
130  * 	return : maximum output size in a "worst case" scenario
131  * 	note : this function is limited by "int" range (2^31-1)
132  *
133  * LZ4_uncompress_unknownOutputSize() :
134  * 	isize  : is the input size, therefore the compressed size
135  * 	maxOutputSize : is the size of the destination buffer (which must be
136  * 		already allocated)
137  * 	return : the number of bytes decoded in the destination buffer
138  * 		(necessarily <= maxOutputSize). If the source stream is
139  * 		malformed, the function will stop decoding and return a
140  * 		negative result, indicating the byte position of the faulty
141  * 		instruction. This function never writes beyond dest +
142  * 		maxOutputSize, and is therefore protected against malicious
143  * 		data packets.
144  * 	note   : Destination buffer must be already allocated.
145  *		This version is slightly slower than real_LZ4_uncompress()
146  *
147  * LZ4_compressCtx() :
148  * 	This function explicitly handles the CTX memory structure.
149  *
150  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
151  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
152  * 	NULL isn't valid.
153  *
154  * LZ4_compress64kCtx() :
155  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
156  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
157  *
158  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
159  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
160  * 	NULL isn't valid.
161  */
162 
163 /*
164  * Tuning parameters
165  */
166 
167 /*
168  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
169  *	 Lowering this value reduces memory usage. Reduced memory usage
170  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
171  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
172  *	(examples : 12 -> 16KB ; 17 -> 512KB)
173  */
174 #define	COMPRESSIONLEVEL 12
175 
176 /*
177  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
178  *	algorithm skip faster data segments considered "incompressible".
179  *	This may decrease compression ratio dramatically, but will be
180  *	faster on incompressible data. Increasing this value will make
181  *	the algorithm search more before declaring a segment "incompressible".
182  *	This could improve compression a bit, but will be slower on
183  *	incompressible data. The default value (6) is recommended.
184  */
185 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
186 
187 /*
188  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
189  * performance for big endian cpu, but the resulting compressed stream
190  * will be incompatible with little-endian CPU. You can set this option
191  * to 1 in situations where data will stay within closed environment.
192  * This option is useless on Little_Endian CPU (such as x86).
193  */
194 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
195 
196 /*
197  * CPU Feature Detection
198  */
199 
200 /* 32 or 64 bits ? */
201 #if defined(_LP64)
202 #define	LZ4_ARCH64 1
203 #else
204 #define	LZ4_ARCH64 0
205 #endif
206 
207 /*
208  * Little Endian or Big Endian?
209  * Note: overwrite the below #define if you know your architecture endianness.
210  */
211 #if defined(_ZFS_BIG_ENDIAN)
212 #define	LZ4_BIG_ENDIAN 1
213 #else
214 /*
215  * Little Endian assumed. PDP Endian and other very rare endian format
216  * are unsupported.
217  */
218 #undef LZ4_BIG_ENDIAN
219 #endif
220 
221 /*
222  * Unaligned memory access is automatically enabled for "common" CPU,
223  * such as x86. For others CPU, the compiler will be more cautious, and
224  * insert extra code to ensure aligned access is respected. If you know
225  * your target CPU supports unaligned memory access, you may want to
226  * force this option manually to improve performance
227  */
228 #if defined(__ARM_FEATURE_UNALIGNED)
229 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
230 #endif
231 
232 /*
233  * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
234  * kernel
235  * Linux : we can use GCC's __builtin_ctz family of builtins in the
236  * kernel
237  */
238 #undef	LZ4_FORCE_SW_BITCOUNT
239 #if defined(__sparc)
240 #define	LZ4_FORCE_SW_BITCOUNT
241 #endif
242 
243 /*
244  * Compiler Options
245  */
246 /* Disable restrict */
247 #define	restrict
248 
249 /*
250  * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
251  * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
252  */
253 #ifdef GCC_VERSION
254 #undef GCC_VERSION
255 #endif
256 
257 #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
258 
259 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
260 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
261 #else
262 #define	expect(expr, value)    (expr)
263 #endif
264 
265 #ifndef likely
266 #define	likely(expr)	expect((expr) != 0, 1)
267 #endif
268 
269 #ifndef unlikely
270 #define	unlikely(expr)	expect((expr) != 0, 0)
271 #endif
272 
273 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
274 	(((x) & 0xffu) << 8)))
275 
276 /* Basic types */
277 #define	BYTE	uint8_t
278 #define	U16	uint16_t
279 #define	U32	uint32_t
280 #define	S32	int32_t
281 #define	U64	uint64_t
282 
283 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
284 #pragma pack(1)
285 #endif
286 
287 typedef struct _U16_S {
288 	U16 v;
289 } U16_S;
290 typedef struct _U32_S {
291 	U32 v;
292 } U32_S;
293 typedef struct _U64_S {
294 	U64 v;
295 } U64_S;
296 
297 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
298 #pragma pack()
299 #endif
300 
301 #define	A64(x) (((U64_S *)(x))->v)
302 #define	A32(x) (((U32_S *)(x))->v)
303 #define	A16(x) (((U16_S *)(x))->v)
304 
305 /*
306  * Constants
307  */
308 #define	MINMATCH 4
309 
310 #define	HASH_LOG COMPRESSIONLEVEL
311 #define	HASHTABLESIZE (1 << HASH_LOG)
312 #define	HASH_MASK (HASHTABLESIZE - 1)
313 
314 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
315 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
316 
317 #define	COPYLENGTH 8
318 #define	LASTLITERALS 5
319 #define	MFLIMIT (COPYLENGTH + MINMATCH)
320 #define	MINLENGTH (MFLIMIT + 1)
321 
322 #define	MAXD_LOG 16
323 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
324 
325 #define	ML_BITS 4
326 #define	ML_MASK ((1U<<ML_BITS)-1)
327 #define	RUN_BITS (8-ML_BITS)
328 #define	RUN_MASK ((1U<<RUN_BITS)-1)
329 
330 
331 /*
332  * Architecture-specific macros
333  */
334 #if LZ4_ARCH64
335 #define	STEPSIZE 8
336 #define	UARCH U64
337 #define	AARCH A64
338 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
339 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
340 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
341 #define	HTYPE U32
342 #define	INITBASE(base)		const BYTE* const base = ip
343 #else /* !LZ4_ARCH64 */
344 #define	STEPSIZE 4
345 #define	UARCH U32
346 #define	AARCH A32
347 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
348 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
349 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
350 #define	HTYPE const BYTE *
351 #define	INITBASE(base)		const int base = 0
352 #endif /* !LZ4_ARCH64 */
353 
354 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
355 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
356 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
357 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
358 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
359 #else
360 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
361 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
362 #endif
363 
364 
365 /* Local structures */
366 struct refTables {
367 	HTYPE hashTable[HASHTABLESIZE];
368 };
369 
370 
371 /* Macros */
372 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
373 	HASH_LOG))
374 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
375 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
376 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
377 	d = e; }
378 
379 
380 /* Private functions */
381 #if LZ4_ARCH64
382 
383 static inline int
384 LZ4_NbCommonBytes(register U64 val)
385 {
386 #if defined(LZ4_BIG_ENDIAN)
387 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
388 	!defined(LZ4_FORCE_SW_BITCOUNT)
389 	return (__builtin_clzll(val) >> 3);
390 #else
391 	int r;
392 	if (!(val >> 32)) {
393 		r = 4;
394 	} else {
395 		r = 0;
396 		val >>= 32;
397 	}
398 	if (!(val >> 16)) {
399 		r += 2;
400 		val >>= 8;
401 	} else {
402 		val >>= 24;
403 	}
404 	r += (!val);
405 	return (r);
406 #endif
407 #else
408 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
409 	!defined(LZ4_FORCE_SW_BITCOUNT)
410 	return (__builtin_ctzll(val) >> 3);
411 #else
412 	static const int DeBruijnBytePos[64] =
413 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
414 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
415 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
416 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
417 	};
418 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
419 	    58];
420 #endif
421 #endif
422 }
423 
424 #else
425 
426 static inline int
427 LZ4_NbCommonBytes(register U32 val)
428 {
429 #if defined(LZ4_BIG_ENDIAN)
430 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
431 	!defined(LZ4_FORCE_SW_BITCOUNT)
432 	return (__builtin_clz(val) >> 3);
433 #else
434 	int r;
435 	if (!(val >> 16)) {
436 		r = 2;
437 		val >>= 8;
438 	} else {
439 		r = 0;
440 		val >>= 24;
441 	}
442 	r += (!val);
443 	return (r);
444 #endif
445 #else
446 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
447 	!defined(LZ4_FORCE_SW_BITCOUNT)
448 	return (__builtin_ctz(val) >> 3);
449 #else
450 	static const int DeBruijnBytePos[32] = {
451 		0, 0, 3, 0, 3, 1, 3, 0,
452 		3, 2, 2, 1, 3, 2, 0, 1,
453 		3, 3, 1, 2, 2, 2, 2, 0,
454 		3, 1, 2, 0, 1, 0, 1, 1
455 	};
456 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
457 	    27];
458 #endif
459 #endif
460 }
461 
462 #endif
463 
464 /* Compression functions */
465 
466 /*ARGSUSED*/
467 static int
468 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
469     int osize)
470 {
471 	struct refTables *srt = (struct refTables *)ctx;
472 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
473 
474 	const BYTE *ip = (BYTE *) source;
475 	INITBASE(base);
476 	const BYTE *anchor = ip;
477 	const BYTE *const iend = ip + isize;
478 	const BYTE *const oend = (BYTE *) dest + osize;
479 	const BYTE *const mflimit = iend - MFLIMIT;
480 #define	matchlimit (iend - LASTLITERALS)
481 
482 	BYTE *op = (BYTE *) dest;
483 
484 	int len, length;
485 	const int skipStrength = SKIPSTRENGTH;
486 	U32 forwardH;
487 
488 
489 	/* Init */
490 	if (isize < MINLENGTH)
491 		goto _last_literals;
492 
493 	/* First Byte */
494 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
495 	ip++;
496 	forwardH = LZ4_HASH_VALUE(ip);
497 
498 	/* Main Loop */
499 	for (;;) {
500 		int findMatchAttempts = (1U << skipStrength) + 3;
501 		const BYTE *forwardIp = ip;
502 		const BYTE *ref;
503 		BYTE *token;
504 
505 		/* Find a match */
506 		do {
507 			U32 h = forwardH;
508 			int step = findMatchAttempts++ >> skipStrength;
509 			ip = forwardIp;
510 			forwardIp = ip + step;
511 
512 			if (unlikely(forwardIp > mflimit)) {
513 				goto _last_literals;
514 			}
515 
516 			forwardH = LZ4_HASH_VALUE(forwardIp);
517 			ref = base + HashTable[h];
518 			HashTable[h] = ip - base;
519 
520 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
521 
522 		/* Catch up */
523 		while ((ip > anchor) && (ref > (BYTE *) source) &&
524 		    unlikely(ip[-1] == ref[-1])) {
525 			ip--;
526 			ref--;
527 		}
528 
529 		/* Encode Literal length */
530 		length = ip - anchor;
531 		token = op++;
532 
533 		/* Check output limit */
534 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
535 		    (length >> 8) > oend))
536 			return (0);
537 
538 		if (length >= (int)RUN_MASK) {
539 			*token = (RUN_MASK << ML_BITS);
540 			len = length - RUN_MASK;
541 			for (; len > 254; len -= 255)
542 				*op++ = 255;
543 			*op++ = (BYTE)len;
544 		} else
545 			*token = (length << ML_BITS);
546 
547 		/* Copy Literals */
548 		LZ4_BLINDCOPY(anchor, op, length);
549 
550 		_next_match:
551 		/* Encode Offset */
552 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
553 
554 		/* Start Counting */
555 		ip += MINMATCH;
556 		ref += MINMATCH;	/* MinMatch verified */
557 		anchor = ip;
558 		while (likely(ip < matchlimit - (STEPSIZE - 1))) {
559 			UARCH diff = AARCH(ref) ^ AARCH(ip);
560 			if (!diff) {
561 				ip += STEPSIZE;
562 				ref += STEPSIZE;
563 				continue;
564 			}
565 			ip += LZ4_NbCommonBytes(diff);
566 			goto _endCount;
567 		}
568 #if LZ4_ARCH64
569 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
570 			ip += 4;
571 			ref += 4;
572 		}
573 #endif
574 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
575 			ip += 2;
576 			ref += 2;
577 		}
578 		if ((ip < matchlimit) && (*ref == *ip))
579 			ip++;
580 		_endCount:
581 
582 		/* Encode MatchLength */
583 		len = (ip - anchor);
584 		/* Check output limit */
585 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
586 			return (0);
587 		if (len >= (int)ML_MASK) {
588 			*token += ML_MASK;
589 			len -= ML_MASK;
590 			for (; len > 509; len -= 510) {
591 				*op++ = 255;
592 				*op++ = 255;
593 			}
594 			if (len > 254) {
595 				len -= 255;
596 				*op++ = 255;
597 			}
598 			*op++ = (BYTE)len;
599 		} else
600 			*token += len;
601 
602 		/* Test end of chunk */
603 		if (ip > mflimit) {
604 			anchor = ip;
605 			break;
606 		}
607 		/* Fill table */
608 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
609 
610 		/* Test next position */
611 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
612 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
613 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
614 			token = op++;
615 			*token = 0;
616 			goto _next_match;
617 		}
618 		/* Prepare next loop */
619 		anchor = ip++;
620 		forwardH = LZ4_HASH_VALUE(ip);
621 	}
622 
623 	_last_literals:
624 	/* Encode Last Literals */
625 	{
626 		int lastRun = iend - anchor;
627 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
628 		    oend)
629 			return (0);
630 		if (lastRun >= (int)RUN_MASK) {
631 			*op++ = (RUN_MASK << ML_BITS);
632 			lastRun -= RUN_MASK;
633 			for (; lastRun > 254; lastRun -= 255) {
634 				*op++ = 255;
635 			}
636 			*op++ = (BYTE)lastRun;
637 		} else
638 			*op++ = (lastRun << ML_BITS);
639 		(void) memcpy(op, anchor, iend - anchor);
640 		op += iend - anchor;
641 	}
642 
643 	/* End */
644 	return (int)(((char *)op) - dest);
645 }
646 
647 
648 
649 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
650 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
651 #define	HASHLOG64K (HASH_LOG + 1)
652 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
653 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
654 	HASHLOG64K))
655 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
656 
657 /*ARGSUSED*/
658 static int
659 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
660     int osize)
661 {
662 	struct refTables *srt = (struct refTables *)ctx;
663 	U16 *HashTable = (U16 *) (srt->hashTable);
664 
665 	const BYTE *ip = (BYTE *) source;
666 	const BYTE *anchor = ip;
667 	const BYTE *const base = ip;
668 	const BYTE *const iend = ip + isize;
669 	const BYTE *const oend = (BYTE *) dest + osize;
670 	const BYTE *const mflimit = iend - MFLIMIT;
671 #define	matchlimit (iend - LASTLITERALS)
672 
673 	BYTE *op = (BYTE *) dest;
674 
675 	int len, length;
676 	const int skipStrength = SKIPSTRENGTH;
677 	U32 forwardH;
678 
679 	/* Init */
680 	if (isize < MINLENGTH)
681 		goto _last_literals;
682 
683 	/* First Byte */
684 	ip++;
685 	forwardH = LZ4_HASH64K_VALUE(ip);
686 
687 	/* Main Loop */
688 	for (;;) {
689 		int findMatchAttempts = (1U << skipStrength) + 3;
690 		const BYTE *forwardIp = ip;
691 		const BYTE *ref;
692 		BYTE *token;
693 
694 		/* Find a match */
695 		do {
696 			U32 h = forwardH;
697 			int step = findMatchAttempts++ >> skipStrength;
698 			ip = forwardIp;
699 			forwardIp = ip + step;
700 
701 			if (forwardIp > mflimit) {
702 				goto _last_literals;
703 			}
704 
705 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
706 			ref = base + HashTable[h];
707 			HashTable[h] = ip - base;
708 
709 		} while (A32(ref) != A32(ip));
710 
711 		/* Catch up */
712 		while ((ip > anchor) && (ref > (BYTE *) source) &&
713 		    (ip[-1] == ref[-1])) {
714 			ip--;
715 			ref--;
716 		}
717 
718 		/* Encode Literal length */
719 		length = ip - anchor;
720 		token = op++;
721 
722 		/* Check output limit */
723 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
724 		    (length >> 8) > oend))
725 			return (0);
726 
727 		if (length >= (int)RUN_MASK) {
728 			*token = (RUN_MASK << ML_BITS);
729 			len = length - RUN_MASK;
730 			for (; len > 254; len -= 255)
731 				*op++ = 255;
732 			*op++ = (BYTE)len;
733 		} else
734 			*token = (length << ML_BITS);
735 
736 		/* Copy Literals */
737 		LZ4_BLINDCOPY(anchor, op, length);
738 
739 		_next_match:
740 		/* Encode Offset */
741 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
742 
743 		/* Start Counting */
744 		ip += MINMATCH;
745 		ref += MINMATCH;	/* MinMatch verified */
746 		anchor = ip;
747 		while (ip < matchlimit - (STEPSIZE - 1)) {
748 			UARCH diff = AARCH(ref) ^ AARCH(ip);
749 			if (!diff) {
750 				ip += STEPSIZE;
751 				ref += STEPSIZE;
752 				continue;
753 			}
754 			ip += LZ4_NbCommonBytes(diff);
755 			goto _endCount;
756 		}
757 #if LZ4_ARCH64
758 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
759 			ip += 4;
760 			ref += 4;
761 		}
762 #endif
763 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
764 			ip += 2;
765 			ref += 2;
766 		}
767 		if ((ip < matchlimit) && (*ref == *ip))
768 			ip++;
769 		_endCount:
770 
771 		/* Encode MatchLength */
772 		len = (ip - anchor);
773 		/* Check output limit */
774 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
775 			return (0);
776 		if (len >= (int)ML_MASK) {
777 			*token += ML_MASK;
778 			len -= ML_MASK;
779 			for (; len > 509; len -= 510) {
780 				*op++ = 255;
781 				*op++ = 255;
782 			}
783 			if (len > 254) {
784 				len -= 255;
785 				*op++ = 255;
786 			}
787 			*op++ = (BYTE)len;
788 		} else
789 			*token += len;
790 
791 		/* Test end of chunk */
792 		if (ip > mflimit) {
793 			anchor = ip;
794 			break;
795 		}
796 		/* Fill table */
797 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
798 
799 		/* Test next position */
800 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
801 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
802 		if (A32(ref) == A32(ip)) {
803 			token = op++;
804 			*token = 0;
805 			goto _next_match;
806 		}
807 		/* Prepare next loop */
808 		anchor = ip++;
809 		forwardH = LZ4_HASH64K_VALUE(ip);
810 	}
811 
812 	_last_literals:
813 	/* Encode Last Literals */
814 	{
815 		int lastRun = iend - anchor;
816 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
817 		    oend)
818 			return (0);
819 		if (lastRun >= (int)RUN_MASK) {
820 			*op++ = (RUN_MASK << ML_BITS);
821 			lastRun -= RUN_MASK;
822 			for (; lastRun > 254; lastRun -= 255)
823 				*op++ = 255;
824 			*op++ = (BYTE)lastRun;
825 		} else
826 			*op++ = (lastRun << ML_BITS);
827 		(void) memcpy(op, anchor, iend - anchor);
828 		op += iend - anchor;
829 	}
830 
831 	/* End */
832 	return (int)(((char *)op) - dest);
833 }
834 
835 static int
836 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
837 {
838 	void *ctx;
839 	int result;
840 
841 	ASSERT(lz4_cache != NULL);
842 	ctx = kmem_cache_alloc(lz4_cache, KM_SLEEP);
843 
844 	/*
845 	 * out of kernel memory, gently fall through - this will disable
846 	 * compression in zio_compress_data
847 	 */
848 	if (ctx == NULL)
849 		return (0);
850 
851 	memset(ctx, 0, sizeof (struct refTables));
852 
853 	if (isize < LZ4_64KLIMIT)
854 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
855 	else
856 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
857 
858 	kmem_cache_free(lz4_cache, ctx);
859 	return (result);
860 }
861 
862 /* Decompression functions */
863 
864 /*
865  * Note: The decoding functions real_LZ4_uncompress() and
866  *	LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
867  *	attack type. They will never write nor read outside of the provided
868  *	output buffers. LZ4_uncompress_unknownOutputSize() also insures that
869  *	it will never read outside of the input buffer. A corrupted input
870  *	will produce an error result, a negative int, indicating the position
871  *	of the error within input stream.
872  *
873  * Note[2]: real_LZ4_uncompress(), referred to above, is not used in ZFS so
874  *	its code is not present here.
875  */
876 
877 static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
878 #if LZ4_ARCH64
879 static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
880 #endif
881 
882 static int
883 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
884     int maxOutputSize)
885 {
886 	/* Local Variables */
887 	const BYTE *restrict ip = (const BYTE *) source;
888 	const BYTE *const iend = ip + isize;
889 	const BYTE *ref;
890 
891 	BYTE *op = (BYTE *) dest;
892 	BYTE *const oend = op + maxOutputSize;
893 	BYTE *cpy;
894 
895 	/* Main Loop */
896 	while (ip < iend) {
897 		unsigned token;
898 		size_t length;
899 
900 		/* get runlength */
901 		token = *ip++;
902 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
903 			int s = 255;
904 			while ((ip < iend) && (s == 255)) {
905 				s = *ip++;
906 				if (unlikely(length > (size_t)(length + s)))
907 					goto _output_error;
908 				length += s;
909 			}
910 		}
911 		/* copy literals */
912 		cpy = op + length;
913 		/* CORNER-CASE: cpy might overflow. */
914 		if (cpy < op)
915 			goto _output_error;	/* cpy was overflowed, bail! */
916 		if ((cpy > oend - COPYLENGTH) ||
917 		    (ip + length > iend - COPYLENGTH)) {
918 			if (cpy > oend)
919 				/* Error: writes beyond output buffer */
920 				goto _output_error;
921 			if (ip + length != iend)
922 				/*
923 				 * Error: LZ4 format requires to consume all
924 				 * input at this stage
925 				 */
926 				goto _output_error;
927 			(void) memcpy(op, ip, length);
928 			op += length;
929 			/* Necessarily EOF, due to parsing restrictions */
930 			break;
931 		}
932 		LZ4_WILDCOPY(ip, op, cpy);
933 		ip -= (op - cpy);
934 		op = cpy;
935 
936 		/* get offset */
937 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
938 		ip += 2;
939 		if (ref < (BYTE * const) dest)
940 			/*
941 			 * Error: offset creates reference outside of
942 			 * destination buffer
943 			 */
944 			goto _output_error;
945 
946 		/* get matchlength */
947 		if ((length = (token & ML_MASK)) == ML_MASK) {
948 			while (ip < iend) {
949 				int s = *ip++;
950 				if (unlikely(length > (size_t)(length + s)))
951 					goto _output_error;
952 				length += s;
953 				if (s == 255)
954 					continue;
955 				break;
956 			}
957 		}
958 		/* copy repeated sequence */
959 		if (unlikely(op - ref < STEPSIZE)) {
960 #if LZ4_ARCH64
961 			int dec64 = dec64table[op - ref];
962 #else
963 			const int dec64 = 0;
964 #endif
965 			op[0] = ref[0];
966 			op[1] = ref[1];
967 			op[2] = ref[2];
968 			op[3] = ref[3];
969 			op += 4;
970 			ref += 4;
971 			ref -= dec32table[op - ref];
972 			A32(op) = A32(ref);
973 			op += STEPSIZE - 4;
974 			ref -= dec64;
975 		} else {
976 			LZ4_COPYSTEP(ref, op);
977 		}
978 		cpy = op + length - (STEPSIZE - 4);
979 		if (cpy > oend - COPYLENGTH) {
980 			if (cpy > oend)
981 				/*
982 				 * Error: request to write outside of
983 				 * destination buffer
984 				 */
985 				goto _output_error;
986 #if LZ4_ARCH64
987 			if ((ref + COPYLENGTH) > oend)
988 #else
989 			if ((ref + COPYLENGTH) > oend ||
990 			    (op + COPYLENGTH) > oend)
991 #endif
992 				goto _output_error;
993 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
994 			while (op < cpy)
995 				*op++ = *ref++;
996 			op = cpy;
997 			if (op == oend)
998 				/*
999 				 * Check EOF (should never happen, since
1000 				 * last 5 bytes are supposed to be literals)
1001 				 */
1002 				goto _output_error;
1003 			continue;
1004 		}
1005 		LZ4_SECURECOPY(ref, op, cpy);
1006 		op = cpy;	/* correction */
1007 	}
1008 
1009 	/* end of decoding */
1010 	return (int)(((char *)op) - dest);
1011 
1012 	/* write overflow error detected */
1013 	_output_error:
1014 	return (-1);
1015 }
1016 
1017 void
1018 lz4_init(void)
1019 {
1020 	lz4_cache = kmem_cache_create("lz4_cache",
1021 	    sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
1022 }
1023 
1024 void
1025 lz4_fini(void)
1026 {
1027 	if (lz4_cache) {
1028 		kmem_cache_destroy(lz4_cache);
1029 		lz4_cache = NULL;
1030 	}
1031 }
1032