xref: /titanic_50/usr/src/uts/common/fs/zfs/lz4.c (revision 8cc2da61b2209ea79a015eead3f390ecf885fcfb)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 
35 #include <sys/zfs_context.h>
36 
37 static int real_LZ4_compress(const char *source, char *dest, int isize,
38     int osize);
39 static int real_LZ4_uncompress(const char *source, char *dest, int osize);
40 static int LZ4_compressBound(int isize);
41 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
42     int isize, int maxOutputSize);
43 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
44     int isize, int osize);
45 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
46     int isize, int osize);
47 
48 /*ARGSUSED*/
49 size_t
50 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
51 {
52 	uint32_t bufsiz;
53 	char *dest = d_start;
54 
55 	ASSERT(d_len >= sizeof (bufsiz));
56 
57 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
58 	    d_len - sizeof (bufsiz));
59 
60 	/* Signal an error if the compression routine returned zero. */
61 	if (bufsiz == 0)
62 		return (s_len);
63 
64 	/*
65 	 * Encode the compresed buffer size at the start. We'll need this in
66 	 * decompression to counter the effects of padding which might be
67 	 * added to the compressed buffer and which, if unhandled, would
68 	 * confuse the hell out of our decompression function.
69 	 */
70 	*(uint32_t *)dest = BE_32(bufsiz);
71 
72 	return (bufsiz + sizeof (bufsiz));
73 }
74 
75 /*ARGSUSED*/
76 int
77 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
78 {
79 	const char *src = s_start;
80 	uint32_t bufsiz = BE_IN32(src);
81 
82 	/* invalid compressed buffer size encoded at start */
83 	if (bufsiz + sizeof (bufsiz) > s_len)
84 		return (1);
85 
86 	/*
87 	 * Returns 0 on success (decompression function returned non-negative)
88 	 * and non-zero on failure (decompression function returned negative.
89 	 */
90 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
91 	    d_start, bufsiz, d_len) < 0);
92 }
93 
94 /*
95  * LZ4 API Description:
96  *
97  * Simple Functions:
98  * real_LZ4_compress() :
99  * 	isize  : is the input size. Max supported value is ~1.9GB
100  * 	return : the number of bytes written in buffer dest
101  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
102  * 	note : destination buffer must be already allocated.
103  * 		destination buffer must be sized to handle worst cases
104  * 		situations (input data not compressible) worst case size
105  * 		evaluation is provided by function LZ4_compressBound().
106  *
107  * real_LZ4_uncompress() :
108  * 	osize  : is the output size, therefore the original size
109  * 	return : the number of bytes read in the source buffer.
110  * 		If the source stream is malformed, the function will stop
111  * 		decoding and return a negative result, indicating the byte
112  * 		position of the faulty instruction. This function never
113  * 		writes beyond dest + osize, and is therefore protected
114  * 		against malicious data packets.
115  * 	note : destination buffer must be already allocated
116  *
117  * Advanced Functions
118  *
119  * LZ4_compressBound() :
120  * 	Provides the maximum size that LZ4 may output in a "worst case"
121  * 	scenario (input data not compressible) primarily useful for memory
122  * 	allocation of output buffer.
123  *
124  * 	isize  : is the input size. Max supported value is ~1.9GB
125  * 	return : maximum output size in a "worst case" scenario
126  * 	note : this function is limited by "int" range (2^31-1)
127  *
128  * LZ4_uncompress_unknownOutputSize() :
129  * 	isize  : is the input size, therefore the compressed size
130  * 	maxOutputSize : is the size of the destination buffer (which must be
131  * 		already allocated)
132  * 	return : the number of bytes decoded in the destination buffer
133  * 		(necessarily <= maxOutputSize). If the source stream is
134  * 		malformed, the function will stop decoding and return a
135  * 		negative result, indicating the byte position of the faulty
136  * 		instruction. This function never writes beyond dest +
137  * 		maxOutputSize, and is therefore protected against malicious
138  * 		data packets.
139  * 	note   : Destination buffer must be already allocated.
140  *		This version is slightly slower than real_LZ4_uncompress()
141  *
142  * LZ4_compressCtx() :
143  * 	This function explicitly handles the CTX memory structure.
144  *
145  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
146  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
147  * 	isn't valid.
148  *
149  * LZ4_compress64kCtx() :
150  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
151  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
152  *
153  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
154  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
155  * 	isn't valid.
156  */
157 
158 /*
159  * Tuning parameters
160  */
161 
162 /*
163  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
164  *	 Lowering this value reduces memory usage. Reduced memory usage
165  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
166  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
167  *	(examples : 12 -> 16KB ; 17 -> 512KB)
168  */
169 #define	COMPRESSIONLEVEL 12
170 
171 /*
172  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
173  *	algorithm skip faster data segments considered "incompressible".
174  *	This may decrease compression ratio dramatically, but will be
175  *	faster on incompressible data. Increasing this value will make
176  *	the algorithm search more before declaring a segment "incompressible".
177  *	This could improve compression a bit, but will be slower on
178  *	incompressible data. The default value (6) is recommended.
179  */
180 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
181 
182 /*
183  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
184  * performance for big endian cpu, but the resulting compressed stream
185  * will be incompatible with little-endian CPU. You can set this option
186  * to 1 in situations where data will stay within closed environment.
187  * This option is useless on Little_Endian CPU (such as x86).
188  */
189 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
190 
191 /*
192  * CPU Feature Detection
193  */
194 
195 /* 32 or 64 bits ? */
196 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
197     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
198     defined(__LP64__) || defined(_LP64))
199 #define	LZ4_ARCH64 1
200 /*
201  * Illumos: On amd64 we have 20k of stack and 24k on sun4u and sun4v, so we
202  * can spend 16k on the algorithm
203  */
204 #define	STACKLIMIT 12
205 #else
206 #define	LZ4_ARCH64 0
207 /*
208  * Illumos: On i386 we only have 12k of stack, so in order to maintain the
209  * same COMPRESSIONLEVEL we have to use heap allocation. Performance will
210  * suck, but alas, it's ZFS on 32-bit we're talking about, so...
211  */
212 #define	STACKLIMIT 11
213 #endif
214 
215 /*
216  * Little Endian or Big Endian?
217  * Note: overwrite the below #define if you know your architecture endianess.
218  */
219 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
220 	defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
221 	defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
222 	defined(__powerpc) || defined(powerpc) || \
223 	((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
224 #define	LZ4_BIG_ENDIAN 1
225 #else
226 /*
227  * Little Endian assumed. PDP Endian and other very rare endian format
228  * are unsupported.
229  */
230 #endif
231 
232 /*
233  * Unaligned memory access is automatically enabled for "common" CPU,
234  * such as x86. For others CPU, the compiler will be more cautious, and
235  * insert extra code to ensure aligned access is respected. If you know
236  * your target CPU supports unaligned memory access, you may want to
237  * force this option manually to improve performance
238  */
239 #if defined(__ARM_FEATURE_UNALIGNED)
240 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
241 #endif
242 
243 /*
244  * Illumos: we can't use GCC's __builtin_ctz family of builtins in the
245  * kernel
246  */
247 #define	LZ4_FORCE_SW_BITCOUNT
248 
249 /*
250  * Compiler Options
251  */
252 #if __STDC_VERSION__ >= 199901L	/* C99 */
253 /* "restrict" is a known keyword */
254 #else
255 /* Disable restrict */
256 #define	restrict
257 #endif
258 
259 #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
260 
261 #ifdef _MSC_VER
262 /* Visual Studio */
263 /* Visual is not C99, but supports some kind of inline */
264 #define	inline __forceinline
265 #if LZ4_ARCH64
266 /* For Visual 2005 */
267 #pragma intrinsic(_BitScanForward64)
268 #pragma intrinsic(_BitScanReverse64)
269 #else /* !LZ4_ARCH64 */
270 /* For Visual 2005 */
271 #pragma intrinsic(_BitScanForward)
272 #pragma intrinsic(_BitScanReverse)
273 #endif /* !LZ4_ARCH64 */
274 #endif /* _MSC_VER */
275 
276 #ifdef _MSC_VER
277 #define	lz4_bswap16(x) _byteswap_ushort(x)
278 #else /* !_MSC_VER */
279 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
280 	(((x) & 0xffu) << 8)))
281 #endif /* !_MSC_VER */
282 
283 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
284 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
285 #else
286 #define	expect(expr, value)    (expr)
287 #endif
288 
289 #define	likely(expr)	expect((expr) != 0, 1)
290 #define	unlikely(expr)	expect((expr) != 0, 0)
291 
292 /* Basic types */
293 #if defined(_MSC_VER)
294 /* Visual Studio does not support 'stdint' natively */
295 #define	BYTE	unsigned __int8
296 #define	U16	unsigned __int16
297 #define	U32	unsigned __int32
298 #define	S32	__int32
299 #define	U64	unsigned __int64
300 #else /* !defined(_MSC_VER) */
301 #define	BYTE	uint8_t
302 #define	U16	uint16_t
303 #define	U32	uint32_t
304 #define	S32	int32_t
305 #define	U64	uint64_t
306 #endif /* !defined(_MSC_VER) */
307 
308 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
309 #pragma pack(1)
310 #endif
311 
312 typedef struct _U16_S {
313 	U16 v;
314 } U16_S;
315 typedef struct _U32_S {
316 	U32 v;
317 } U32_S;
318 typedef struct _U64_S {
319 	U64 v;
320 } U64_S;
321 
322 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
323 #pragma pack()
324 #endif
325 
326 #define	A64(x) (((U64_S *)(x))->v)
327 #define	A32(x) (((U32_S *)(x))->v)
328 #define	A16(x) (((U16_S *)(x))->v)
329 
330 /*
331  * Constants
332  */
333 #define	MINMATCH 4
334 
335 #define	HASH_LOG COMPRESSIONLEVEL
336 #define	HASHTABLESIZE (1 << HASH_LOG)
337 #define	HASH_MASK (HASHTABLESIZE - 1)
338 
339 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
340 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
341 
342 /*
343  * Defines if memory is allocated into the stack (local variable),
344  * or into the heap (kmem_alloc()).
345  */
346 #define	HEAPMODE (HASH_LOG > STACKLIMIT)
347 #define	COPYLENGTH 8
348 #define	LASTLITERALS 5
349 #define	MFLIMIT (COPYLENGTH + MINMATCH)
350 #define	MINLENGTH (MFLIMIT + 1)
351 
352 #define	MAXD_LOG 16
353 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
354 
355 #define	ML_BITS 4
356 #define	ML_MASK ((1U<<ML_BITS)-1)
357 #define	RUN_BITS (8-ML_BITS)
358 #define	RUN_MASK ((1U<<RUN_BITS)-1)
359 
360 
361 /*
362  * Architecture-specific macros
363  */
364 #if LZ4_ARCH64
365 #define	STEPSIZE 8
366 #define	UARCH U64
367 #define	AARCH A64
368 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
369 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
370 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
371 #define	HTYPE U32
372 #define	INITBASE(base)		const BYTE* const base = ip
373 #else /* !LZ4_ARCH64 */
374 #define	STEPSIZE 4
375 #define	UARCH U32
376 #define	AARCH A32
377 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
378 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
379 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
380 #define	HTYPE const BYTE *
381 #define	INITBASE(base)		const int base = 0
382 #endif /* !LZ4_ARCH64 */
383 
384 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
385 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
386 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
387 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
388 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
389 #else
390 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
391 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
392 #endif
393 
394 
395 /* Local structures */
396 struct refTables {
397 	HTYPE hashTable[HASHTABLESIZE];
398 };
399 
400 
401 /* Macros */
402 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
403 	HASH_LOG))
404 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
405 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
406 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
407 	d = e; }
408 
409 
410 /* Private functions */
411 #if LZ4_ARCH64
412 
413 static inline int
414 LZ4_NbCommonBytes(register U64 val)
415 {
416 #if defined(LZ4_BIG_ENDIAN)
417 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
418 	unsigned long r = 0;
419 	_BitScanReverse64(&r, val);
420 	return (int)(r >> 3);
421 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
422 	!defined(LZ4_FORCE_SW_BITCOUNT)
423 	return (__builtin_clzll(val) >> 3);
424 #else
425 	int r;
426 	if (!(val >> 32)) {
427 		r = 4;
428 	} else {
429 		r = 0;
430 		val >>= 32;
431 	}
432 	if (!(val >> 16)) {
433 		r += 2;
434 		val >>= 8;
435 	} else {
436 		val >>= 24;
437 	}
438 	r += (!val);
439 	return (r);
440 #endif
441 #else
442 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
443 	unsigned long r = 0;
444 	_BitScanForward64(&r, val);
445 	return (int)(r >> 3);
446 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
447 	!defined(LZ4_FORCE_SW_BITCOUNT)
448 	return (__builtin_ctzll(val) >> 3);
449 #else
450 	static const int DeBruijnBytePos[64] =
451 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
452 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
453 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
454 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
455 	};
456 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
457 	    58];
458 #endif
459 #endif
460 }
461 
462 #else
463 
464 static inline int
465 LZ4_NbCommonBytes(register U32 val)
466 {
467 #if defined(LZ4_BIG_ENDIAN)
468 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
469 	unsigned long r = 0;
470 	_BitScanReverse(&r, val);
471 	return (int)(r >> 3);
472 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
473 	!defined(LZ4_FORCE_SW_BITCOUNT)
474 	return (__builtin_clz(val) >> 3);
475 #else
476 	int r;
477 	if (!(val >> 16)) {
478 		r = 2;
479 		val >>= 8;
480 	} else {
481 		r = 0;
482 		val >>= 24;
483 	}
484 	r += (!val);
485 	return (r);
486 #endif
487 #else
488 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
489 	unsigned long r = 0;
490 	_BitScanForward(&r, val);
491 	return (int)(r >> 3);
492 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
493 	!defined(LZ4_FORCE_SW_BITCOUNT)
494 	return (__builtin_ctz(val) >> 3);
495 #else
496 	static const int DeBruijnBytePos[32] = {
497 		0, 0, 3, 0, 3, 1, 3, 0,
498 		3, 2, 2, 1, 3, 2, 0, 1,
499 		3, 3, 1, 2, 2, 2, 2, 0,
500 		3, 1, 2, 0, 1, 0, 1, 1
501 	};
502 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
503 	    27];
504 #endif
505 #endif
506 }
507 
508 #endif
509 
510 /* Public functions */
511 
512 static int
513 LZ4_compressBound(int isize)
514 {
515 	return (isize + (isize / 255) + 16);
516 }
517 
518 /* Compression functions */
519 
520 /*ARGSUSED*/
521 static int
522 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
523     int osize)
524 {
525 #if HEAPMODE
526 	struct refTables *srt = (struct refTables *)ctx;
527 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
528 #else
529 	HTYPE HashTable[HASHTABLESIZE] = { 0 };
530 #endif
531 
532 	const BYTE *ip = (BYTE *) source;
533 	INITBASE(base);
534 	const BYTE *anchor = ip;
535 	const BYTE *const iend = ip + isize;
536 	const BYTE *const oend = (BYTE *) dest + osize;
537 	const BYTE *const mflimit = iend - MFLIMIT;
538 #define	matchlimit (iend - LASTLITERALS)
539 
540 	BYTE *op = (BYTE *) dest;
541 
542 	int len, length;
543 	const int skipStrength = SKIPSTRENGTH;
544 	U32 forwardH;
545 
546 
547 	/* Init */
548 	if (isize < MINLENGTH)
549 		goto _last_literals;
550 
551 	/* First Byte */
552 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
553 	ip++;
554 	forwardH = LZ4_HASH_VALUE(ip);
555 
556 	/* Main Loop */
557 	for (;;) {
558 		int findMatchAttempts = (1U << skipStrength) + 3;
559 		const BYTE *forwardIp = ip;
560 		const BYTE *ref;
561 		BYTE *token;
562 
563 		/* Find a match */
564 		do {
565 			U32 h = forwardH;
566 			int step = findMatchAttempts++ >> skipStrength;
567 			ip = forwardIp;
568 			forwardIp = ip + step;
569 
570 			if unlikely(forwardIp > mflimit) {
571 				goto _last_literals;
572 			}
573 
574 			forwardH = LZ4_HASH_VALUE(forwardIp);
575 			ref = base + HashTable[h];
576 			HashTable[h] = ip - base;
577 
578 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
579 
580 		/* Catch up */
581 		while ((ip > anchor) && (ref > (BYTE *) source) &&
582 		    unlikely(ip[-1] == ref[-1])) {
583 			ip--;
584 			ref--;
585 		}
586 
587 		/* Encode Literal length */
588 		length = ip - anchor;
589 		token = op++;
590 
591 		/* Check output limit */
592 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
593 		    (length >> 8) > oend)
594 			return (0);
595 
596 		if (length >= (int)RUN_MASK) {
597 			*token = (RUN_MASK << ML_BITS);
598 			len = length - RUN_MASK;
599 			for (; len > 254; len -= 255)
600 				*op++ = 255;
601 			*op++ = (BYTE)len;
602 		} else
603 			*token = (length << ML_BITS);
604 
605 		/* Copy Literals */
606 		LZ4_BLINDCOPY(anchor, op, length);
607 
608 		_next_match:
609 		/* Encode Offset */
610 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
611 
612 		/* Start Counting */
613 		ip += MINMATCH;
614 		ref += MINMATCH;	/* MinMatch verified */
615 		anchor = ip;
616 		while likely(ip < matchlimit - (STEPSIZE - 1)) {
617 			UARCH diff = AARCH(ref) ^ AARCH(ip);
618 			if (!diff) {
619 				ip += STEPSIZE;
620 				ref += STEPSIZE;
621 				continue;
622 			}
623 			ip += LZ4_NbCommonBytes(diff);
624 			goto _endCount;
625 		}
626 #if LZ4_ARCH64
627 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
628 			ip += 4;
629 			ref += 4;
630 		}
631 #endif
632 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
633 			ip += 2;
634 			ref += 2;
635 		}
636 		if ((ip < matchlimit) && (*ref == *ip))
637 			ip++;
638 		_endCount:
639 
640 		/* Encode MatchLength */
641 		len = (ip - anchor);
642 		/* Check output limit */
643 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
644 			return (0);
645 		if (len >= (int)ML_MASK) {
646 			*token += ML_MASK;
647 			len -= ML_MASK;
648 			for (; len > 509; len -= 510) {
649 				*op++ = 255;
650 				*op++ = 255;
651 			}
652 			if (len > 254) {
653 				len -= 255;
654 				*op++ = 255;
655 			}
656 			*op++ = (BYTE)len;
657 		} else
658 			*token += len;
659 
660 		/* Test end of chunk */
661 		if (ip > mflimit) {
662 			anchor = ip;
663 			break;
664 		}
665 		/* Fill table */
666 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
667 
668 		/* Test next position */
669 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
670 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
671 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
672 			token = op++;
673 			*token = 0;
674 			goto _next_match;
675 		}
676 		/* Prepare next loop */
677 		anchor = ip++;
678 		forwardH = LZ4_HASH_VALUE(ip);
679 	}
680 
681 	_last_literals:
682 	/* Encode Last Literals */
683 	{
684 		int lastRun = iend - anchor;
685 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
686 		    oend)
687 			return (0);
688 		if (lastRun >= (int)RUN_MASK) {
689 			*op++ = (RUN_MASK << ML_BITS);
690 			lastRun -= RUN_MASK;
691 			for (; lastRun > 254; lastRun -= 255) {
692 				*op++ = 255;
693 			}
694 			*op++ = (BYTE)lastRun;
695 		} else
696 			*op++ = (lastRun << ML_BITS);
697 		(void) memcpy(op, anchor, iend - anchor);
698 		op += iend - anchor;
699 	}
700 
701 	/* End */
702 	return (int)(((char *)op) - dest);
703 }
704 
705 
706 
707 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
708 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
709 #define	HASHLOG64K (HASH_LOG + 1)
710 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
711 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
712 	HASHLOG64K))
713 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
714 
715 /*ARGSUSED*/
716 static int
717 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
718     int osize)
719 {
720 #if HEAPMODE
721 	struct refTables *srt = (struct refTables *)ctx;
722 	U16 *HashTable = (U16 *) (srt->hashTable);
723 #else
724 	U16 HashTable[HASH64KTABLESIZE] = { 0 };
725 #endif
726 
727 	const BYTE *ip = (BYTE *) source;
728 	const BYTE *anchor = ip;
729 	const BYTE *const base = ip;
730 	const BYTE *const iend = ip + isize;
731 	const BYTE *const oend = (BYTE *) dest + osize;
732 	const BYTE *const mflimit = iend - MFLIMIT;
733 #define	matchlimit (iend - LASTLITERALS)
734 
735 	BYTE *op = (BYTE *) dest;
736 
737 	int len, length;
738 	const int skipStrength = SKIPSTRENGTH;
739 	U32 forwardH;
740 
741 	/* Init */
742 	if (isize < MINLENGTH)
743 		goto _last_literals;
744 
745 	/* First Byte */
746 	ip++;
747 	forwardH = LZ4_HASH64K_VALUE(ip);
748 
749 	/* Main Loop */
750 	for (;;) {
751 		int findMatchAttempts = (1U << skipStrength) + 3;
752 		const BYTE *forwardIp = ip;
753 		const BYTE *ref;
754 		BYTE *token;
755 
756 		/* Find a match */
757 		do {
758 			U32 h = forwardH;
759 			int step = findMatchAttempts++ >> skipStrength;
760 			ip = forwardIp;
761 			forwardIp = ip + step;
762 
763 			if (forwardIp > mflimit) {
764 				goto _last_literals;
765 			}
766 
767 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
768 			ref = base + HashTable[h];
769 			HashTable[h] = ip - base;
770 
771 		} while (A32(ref) != A32(ip));
772 
773 		/* Catch up */
774 		while ((ip > anchor) && (ref > (BYTE *) source) &&
775 		    (ip[-1] == ref[-1])) {
776 			ip--;
777 			ref--;
778 		}
779 
780 		/* Encode Literal length */
781 		length = ip - anchor;
782 		token = op++;
783 
784 		/* Check output limit */
785 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
786 		    (length >> 8) > oend)
787 			return (0);
788 
789 		if (length >= (int)RUN_MASK) {
790 			*token = (RUN_MASK << ML_BITS);
791 			len = length - RUN_MASK;
792 			for (; len > 254; len -= 255)
793 				*op++ = 255;
794 			*op++ = (BYTE)len;
795 		} else
796 			*token = (length << ML_BITS);
797 
798 		/* Copy Literals */
799 		LZ4_BLINDCOPY(anchor, op, length);
800 
801 		_next_match:
802 		/* Encode Offset */
803 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
804 
805 		/* Start Counting */
806 		ip += MINMATCH;
807 		ref += MINMATCH;	/* MinMatch verified */
808 		anchor = ip;
809 		while (ip < matchlimit - (STEPSIZE - 1)) {
810 			UARCH diff = AARCH(ref) ^ AARCH(ip);
811 			if (!diff) {
812 				ip += STEPSIZE;
813 				ref += STEPSIZE;
814 				continue;
815 			}
816 			ip += LZ4_NbCommonBytes(diff);
817 			goto _endCount;
818 		}
819 #if LZ4_ARCH64
820 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
821 			ip += 4;
822 			ref += 4;
823 		}
824 #endif
825 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
826 			ip += 2;
827 			ref += 2;
828 		}
829 		if ((ip < matchlimit) && (*ref == *ip))
830 			ip++;
831 		_endCount:
832 
833 		/* Encode MatchLength */
834 		len = (ip - anchor);
835 		/* Check output limit */
836 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
837 			return (0);
838 		if (len >= (int)ML_MASK) {
839 			*token += ML_MASK;
840 			len -= ML_MASK;
841 			for (; len > 509; len -= 510) {
842 				*op++ = 255;
843 				*op++ = 255;
844 			}
845 			if (len > 254) {
846 				len -= 255;
847 				*op++ = 255;
848 			}
849 			*op++ = (BYTE)len;
850 		} else
851 			*token += len;
852 
853 		/* Test end of chunk */
854 		if (ip > mflimit) {
855 			anchor = ip;
856 			break;
857 		}
858 		/* Fill table */
859 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
860 
861 		/* Test next position */
862 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
863 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
864 		if (A32(ref) == A32(ip)) {
865 			token = op++;
866 			*token = 0;
867 			goto _next_match;
868 		}
869 		/* Prepare next loop */
870 		anchor = ip++;
871 		forwardH = LZ4_HASH64K_VALUE(ip);
872 	}
873 
874 	_last_literals:
875 	/* Encode Last Literals */
876 	{
877 		int lastRun = iend - anchor;
878 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
879 		    oend)
880 			return (0);
881 		if (lastRun >= (int)RUN_MASK) {
882 			*op++ = (RUN_MASK << ML_BITS);
883 			lastRun -= RUN_MASK;
884 			for (; lastRun > 254; lastRun -= 255)
885 				*op++ = 255;
886 			*op++ = (BYTE)lastRun;
887 		} else
888 			*op++ = (lastRun << ML_BITS);
889 		(void) memcpy(op, anchor, iend - anchor);
890 		op += iend - anchor;
891 	}
892 
893 	/* End */
894 	return (int)(((char *)op) - dest);
895 }
896 
897 static int
898 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
899 {
900 #if HEAPMODE
901 	void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
902 	int result;
903 
904 	/*
905 	 * out of kernel memory, gently fall through - this will disable
906 	 * compression in zio_compress_data
907 	 */
908 	if (ctx == NULL)
909 		return (0);
910 
911 	if (isize < LZ4_64KLIMIT)
912 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
913 	else
914 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
915 
916 	kmem_free(ctx, sizeof (struct refTables));
917 	return (result);
918 #else
919 	if (isize < (int)LZ4_64KLIMIT)
920 		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
921 	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
922 #endif
923 }
924 
925 /* Decompression functions */
926 
927 /*
928  * Note: The decoding functions real_LZ4_uncompress() and
929  *	LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
930  *	attack type. They will never write nor read outside of the provided
931  *	output buffers. LZ4_uncompress_unknownOutputSize() also insures that
932  *	it will never read outside of the input buffer. A corrupted input
933  *	will produce an error result, a negative int, indicating the position
934  *	of the error within input stream.
935  */
936 
937 static int
938 real_LZ4_uncompress(const char *source, char *dest, int osize)
939 {
940 	/* Local Variables */
941 	const BYTE *restrict ip = (const BYTE *) source;
942 	const BYTE *ref;
943 
944 	BYTE *op = (BYTE *) dest;
945 	BYTE *const oend = op + osize;
946 	BYTE *cpy;
947 
948 	unsigned token;
949 
950 	size_t length;
951 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
952 #if LZ4_ARCH64
953 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
954 #endif
955 
956 	/* Main Loop */
957 	for (;;) {
958 		/* get runlength */
959 		token = *ip++;
960 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
961 			size_t len;
962 			for (; (len = *ip++) == 255; length += 255) {
963 			}
964 			length += len;
965 		}
966 		/* copy literals */
967 		cpy = op + length;
968 		if unlikely(cpy > oend - COPYLENGTH) {
969 			if (cpy != oend)
970 				/* Error: we must necessarily stand at EOF */
971 				goto _output_error;
972 			(void) memcpy(op, ip, length);
973 			ip += length;
974 			break;	/* EOF */
975 			}
976 		LZ4_WILDCOPY(ip, op, cpy);
977 		ip -= (op - cpy);
978 		op = cpy;
979 
980 		/* get offset */
981 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
982 		ip += 2;
983 		if unlikely(ref < (BYTE * const) dest)
984 			/*
985 			 * Error: offset create reference outside destination
986 			 * buffer
987 			 */
988 			goto _output_error;
989 
990 		/* get matchlength */
991 		if ((length = (token & ML_MASK)) == ML_MASK) {
992 			for (; *ip == 255; length += 255) {
993 				ip++;
994 			}
995 			length += *ip++;
996 		}
997 		/* copy repeated sequence */
998 		if unlikely(op - ref < STEPSIZE) {
999 #if LZ4_ARCH64
1000 			size_t dec64 = dec64table[op-ref];
1001 #else
1002 			const int dec64 = 0;
1003 #endif
1004 			op[0] = ref[0];
1005 			op[1] = ref[1];
1006 			op[2] = ref[2];
1007 			op[3] = ref[3];
1008 			op += 4;
1009 			ref += 4;
1010 			ref -= dec32table[op-ref];
1011 			A32(op) = A32(ref);
1012 			op += STEPSIZE - 4;
1013 			ref -= dec64;
1014 		} else {
1015 			LZ4_COPYSTEP(ref, op);
1016 		}
1017 		cpy = op + length - (STEPSIZE - 4);
1018 		if (cpy > oend - COPYLENGTH) {
1019 			if (cpy > oend)
1020 				/*
1021 				 * Error: request to write beyond destination
1022 				 * buffer
1023 				 */
1024 				goto _output_error;
1025 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1026 			while (op < cpy)
1027 				*op++ = *ref++;
1028 			op = cpy;
1029 			if (op == oend)
1030 				/*
1031 				 * Check EOF (should never happen, since last
1032 				 * 5 bytes are supposed to be literals)
1033 				 */
1034 				goto _output_error;
1035 			continue;
1036 		}
1037 		LZ4_SECURECOPY(ref, op, cpy);
1038 		op = cpy;	/* correction */
1039 	}
1040 
1041 	/* end of decoding */
1042 	return (int)(((char *)ip) - source);
1043 
1044 	/* write overflow error detected */
1045 	_output_error:
1046 	return (int)(-(((char *)ip) - source));
1047 }
1048 
1049 static int
1050 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
1051     int maxOutputSize)
1052 {
1053 	/* Local Variables */
1054 	const BYTE *restrict ip = (const BYTE *) source;
1055 	const BYTE *const iend = ip + isize;
1056 	const BYTE *ref;
1057 
1058 	BYTE *op = (BYTE *) dest;
1059 	BYTE *const oend = op + maxOutputSize;
1060 	BYTE *cpy;
1061 
1062 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
1063 #if LZ4_ARCH64
1064 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
1065 #endif
1066 
1067 	/* Main Loop */
1068 	while (ip < iend) {
1069 		unsigned token;
1070 		size_t length;
1071 
1072 		/* get runlength */
1073 		token = *ip++;
1074 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
1075 			int s = 255;
1076 			while ((ip < iend) && (s == 255)) {
1077 				s = *ip++;
1078 				length += s;
1079 			}
1080 		}
1081 		/* copy literals */
1082 		cpy = op + length;
1083 		if ((cpy > oend - COPYLENGTH) ||
1084 		    (ip + length > iend - COPYLENGTH)) {
1085 			if (cpy > oend)
1086 				/* Error: writes beyond output buffer */
1087 				goto _output_error;
1088 			if (ip + length != iend)
1089 				/*
1090 				 * Error: LZ4 format requires to consume all
1091 				 * input at this stage
1092 				 */
1093 				goto _output_error;
1094 			(void) memcpy(op, ip, length);
1095 			op += length;
1096 			/* Necessarily EOF, due to parsing restrictions */
1097 			break;
1098 		}
1099 		LZ4_WILDCOPY(ip, op, cpy);
1100 		ip -= (op - cpy);
1101 		op = cpy;
1102 
1103 		/* get offset */
1104 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1105 		ip += 2;
1106 		if (ref < (BYTE * const) dest)
1107 			/*
1108 			 * Error: offset creates reference outside of
1109 			 * destination buffer
1110 			 */
1111 			goto _output_error;
1112 
1113 		/* get matchlength */
1114 		if ((length = (token & ML_MASK)) == ML_MASK) {
1115 			while (ip < iend) {
1116 				int s = *ip++;
1117 				length += s;
1118 				if (s == 255)
1119 					continue;
1120 				break;
1121 			}
1122 		}
1123 		/* copy repeated sequence */
1124 		if unlikely(op - ref < STEPSIZE) {
1125 #if LZ4_ARCH64
1126 			size_t dec64 = dec64table[op-ref];
1127 #else
1128 			const int dec64 = 0;
1129 #endif
1130 			op[0] = ref[0];
1131 			op[1] = ref[1];
1132 			op[2] = ref[2];
1133 			op[3] = ref[3];
1134 			op += 4;
1135 			ref += 4;
1136 			ref -= dec32table[op-ref];
1137 			A32(op) = A32(ref);
1138 			op += STEPSIZE - 4;
1139 			ref -= dec64;
1140 		} else {
1141 			LZ4_COPYSTEP(ref, op);
1142 		}
1143 		cpy = op + length - (STEPSIZE - 4);
1144 		if (cpy > oend - COPYLENGTH) {
1145 			if (cpy > oend)
1146 				/*
1147 				 * Error: request to write outside of
1148 				 * destination buffer
1149 				 */
1150 				goto _output_error;
1151 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1152 			while (op < cpy)
1153 				*op++ = *ref++;
1154 			op = cpy;
1155 			if (op == oend)
1156 				/*
1157 				 * Check EOF (should never happen, since
1158 				 * last 5 bytes are supposed to be literals)
1159 				 */
1160 				goto _output_error;
1161 			continue;
1162 		}
1163 		LZ4_SECURECOPY(ref, op, cpy);
1164 		op = cpy;	/* correction */
1165 	}
1166 
1167 	/* end of decoding */
1168 	return (int)(((char *)op) - dest);
1169 
1170 	/* write overflow error detected */
1171 	_output_error:
1172 	return (int)(-(((char *)ip) - source));
1173 }
1174