xref: /titanic_44/usr/src/uts/common/fs/zfs/lz4.c (revision 6935f61b0d202f1b87f0234824e4a6ab88c492ac)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 
35 #include <sys/zfs_context.h>
36 
37 static int real_LZ4_compress(const char *source, char *dest, int isize,
38     int osize);
39 static int real_LZ4_uncompress(const char *source, char *dest, int osize);
40 static int LZ4_compressBound(int isize);
41 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
42     int isize, int maxOutputSize);
43 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
44     int isize, int osize);
45 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
46     int isize, int osize);
47 
48 /*ARGSUSED*/
49 size_t
50 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
51 {
52 	uint32_t bufsiz;
53 	char *dest = d_start;
54 
55 	ASSERT(d_len >= sizeof (bufsiz));
56 
57 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
58 	    d_len - sizeof (bufsiz));
59 
60 	/* Signal an error if the compression routine returned zero. */
61 	if (bufsiz == 0)
62 		return (s_len);
63 
64 	/*
65 	 * Encode the compresed buffer size at the start. We'll need this in
66 	 * decompression to counter the effects of padding which might be
67 	 * added to the compressed buffer and which, if unhandled, would
68 	 * confuse the hell out of our decompression function.
69 	 */
70 	*(uint32_t *)dest = BE_32(bufsiz);
71 
72 	return (bufsiz + sizeof (bufsiz));
73 }
74 
75 /*ARGSUSED*/
76 int
77 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
78 {
79 	const char *src = s_start;
80 	uint32_t bufsiz = BE_IN32(src);
81 
82 	/* invalid compressed buffer size encoded at start */
83 	if (bufsiz + sizeof (bufsiz) > s_len)
84 		return (1);
85 
86 	/*
87 	 * Returns 0 on success (decompression function returned non-negative)
88 	 * and non-zero on failure (decompression function returned negative.
89 	 */
90 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
91 	    d_start, bufsiz, d_len) < 0);
92 }
93 
94 /*
95  * LZ4 API Description:
96  *
97  * Simple Functions:
98  * real_LZ4_compress() :
99  * 	isize  : is the input size. Max supported value is ~1.9GB
100  * 	return : the number of bytes written in buffer dest
101  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
102  * 	note : destination buffer must be already allocated.
103  * 		destination buffer must be sized to handle worst cases
104  * 		situations (input data not compressible) worst case size
105  * 		evaluation is provided by function LZ4_compressBound().
106  *
107  * real_LZ4_uncompress() :
108  * 	osize  : is the output size, therefore the original size
109  * 	return : the number of bytes read in the source buffer.
110  * 		If the source stream is malformed, the function will stop
111  * 		decoding and return a negative result, indicating the byte
112  * 		position of the faulty instruction. This function never
113  * 		writes beyond dest + osize, and is therefore protected
114  * 		against malicious data packets.
115  * 	note : destination buffer must be already allocated
116  *
117  * Advanced Functions
118  *
119  * LZ4_compressBound() :
120  * 	Provides the maximum size that LZ4 may output in a "worst case"
121  * 	scenario (input data not compressible) primarily useful for memory
122  * 	allocation of output buffer.
123  *
124  * 	isize  : is the input size. Max supported value is ~1.9GB
125  * 	return : maximum output size in a "worst case" scenario
126  * 	note : this function is limited by "int" range (2^31-1)
127  *
128  * LZ4_uncompress_unknownOutputSize() :
129  * 	isize  : is the input size, therefore the compressed size
130  * 	maxOutputSize : is the size of the destination buffer (which must be
131  * 		already allocated)
132  * 	return : the number of bytes decoded in the destination buffer
133  * 		(necessarily <= maxOutputSize). If the source stream is
134  * 		malformed, the function will stop decoding and return a
135  * 		negative result, indicating the byte position of the faulty
136  * 		instruction. This function never writes beyond dest +
137  * 		maxOutputSize, and is therefore protected against malicious
138  * 		data packets.
139  * 	note   : Destination buffer must be already allocated.
140  *		This version is slightly slower than real_LZ4_uncompress()
141  *
142  * LZ4_compressCtx() :
143  * 	This function explicitly handles the CTX memory structure.
144  *
145  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
146  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
147  * 	isn't valid.
148  *
149  * LZ4_compress64kCtx() :
150  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
151  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
152  *
153  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
154  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
155  * 	isn't valid.
156  */
157 
158 /*
159  * Tuning parameters
160  */
161 
162 /*
163  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
164  *	 Lowering this value reduces memory usage. Reduced memory usage
165  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
166  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
167  *	(examples : 12 -> 16KB ; 17 -> 512KB)
168  */
169 #define	COMPRESSIONLEVEL 12
170 
171 /*
172  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
173  *	algorithm skip faster data segments considered "incompressible".
174  *	This may decrease compression ratio dramatically, but will be
175  *	faster on incompressible data. Increasing this value will make
176  *	the algorithm search more before declaring a segment "incompressible".
177  *	This could improve compression a bit, but will be slower on
178  *	incompressible data. The default value (6) is recommended.
179  */
180 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
181 
182 /*
183  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
184  * performance for big endian cpu, but the resulting compressed stream
185  * will be incompatible with little-endian CPU. You can set this option
186  * to 1 in situations where data will stay within closed environment.
187  * This option is useless on Little_Endian CPU (such as x86).
188  */
189 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
190 
191 /*
192  * CPU Feature Detection
193  */
194 
195 /* 32 or 64 bits ? */
196 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
197     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
198     defined(__LP64__) || defined(_LP64))
199 #define	LZ4_ARCH64 1
200 #else
201 #define	LZ4_ARCH64 0
202 #endif
203 
204 /*
205  * Limits the amount of stack space that the algorithm may consume to hold
206  * the compression lookup table. The value `9' here means we'll never use
207  * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
208  * If more memory is needed, it is allocated from the heap.
209  */
210 #define	STACKLIMIT 9
211 
212 /*
213  * Little Endian or Big Endian?
214  * Note: overwrite the below #define if you know your architecture endianess.
215  */
216 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
217 	defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
218 	defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
219 	defined(__powerpc) || defined(powerpc) || \
220 	((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
221 #define	LZ4_BIG_ENDIAN 1
222 #else
223 /*
224  * Little Endian assumed. PDP Endian and other very rare endian format
225  * are unsupported.
226  */
227 #endif
228 
229 /*
230  * Unaligned memory access is automatically enabled for "common" CPU,
231  * such as x86. For others CPU, the compiler will be more cautious, and
232  * insert extra code to ensure aligned access is respected. If you know
233  * your target CPU supports unaligned memory access, you may want to
234  * force this option manually to improve performance
235  */
236 #if defined(__ARM_FEATURE_UNALIGNED)
237 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
238 #endif
239 
240 #ifdef __sparc
241 #define	LZ4_FORCE_SW_BITCOUNT
242 #endif
243 
244 /*
245  * Compiler Options
246  */
247 #if __STDC_VERSION__ >= 199901L	/* C99 */
248 /* "restrict" is a known keyword */
249 #else
250 /* Disable restrict */
251 #define	restrict
252 #endif
253 
254 #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
255 
256 #ifdef _MSC_VER
257 /* Visual Studio */
258 /* Visual is not C99, but supports some kind of inline */
259 #define	inline __forceinline
260 #if LZ4_ARCH64
261 /* For Visual 2005 */
262 #pragma intrinsic(_BitScanForward64)
263 #pragma intrinsic(_BitScanReverse64)
264 #else /* !LZ4_ARCH64 */
265 /* For Visual 2005 */
266 #pragma intrinsic(_BitScanForward)
267 #pragma intrinsic(_BitScanReverse)
268 #endif /* !LZ4_ARCH64 */
269 #endif /* _MSC_VER */
270 
271 #ifdef _MSC_VER
272 #define	lz4_bswap16(x) _byteswap_ushort(x)
273 #else /* !_MSC_VER */
274 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
275 	(((x) & 0xffu) << 8)))
276 #endif /* !_MSC_VER */
277 
278 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
279 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
280 #else
281 #define	expect(expr, value)    (expr)
282 #endif
283 
284 #define	likely(expr)	expect((expr) != 0, 1)
285 #define	unlikely(expr)	expect((expr) != 0, 0)
286 
287 /* Basic types */
288 #if defined(_MSC_VER)
289 /* Visual Studio does not support 'stdint' natively */
290 #define	BYTE	unsigned __int8
291 #define	U16	unsigned __int16
292 #define	U32	unsigned __int32
293 #define	S32	__int32
294 #define	U64	unsigned __int64
295 #else /* !defined(_MSC_VER) */
296 #define	BYTE	uint8_t
297 #define	U16	uint16_t
298 #define	U32	uint32_t
299 #define	S32	int32_t
300 #define	U64	uint64_t
301 #endif /* !defined(_MSC_VER) */
302 
303 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
304 #pragma pack(1)
305 #endif
306 
307 typedef struct _U16_S {
308 	U16 v;
309 } U16_S;
310 typedef struct _U32_S {
311 	U32 v;
312 } U32_S;
313 typedef struct _U64_S {
314 	U64 v;
315 } U64_S;
316 
317 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
318 #pragma pack()
319 #endif
320 
321 #define	A64(x) (((U64_S *)(x))->v)
322 #define	A32(x) (((U32_S *)(x))->v)
323 #define	A16(x) (((U16_S *)(x))->v)
324 
325 /*
326  * Constants
327  */
328 #define	MINMATCH 4
329 
330 #define	HASH_LOG COMPRESSIONLEVEL
331 #define	HASHTABLESIZE (1 << HASH_LOG)
332 #define	HASH_MASK (HASHTABLESIZE - 1)
333 
334 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
335 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
336 
337 /*
338  * Defines if memory is allocated into the stack (local variable),
339  * or into the heap (kmem_alloc()).
340  */
341 #define	HEAPMODE (HASH_LOG > STACKLIMIT)
342 #define	COPYLENGTH 8
343 #define	LASTLITERALS 5
344 #define	MFLIMIT (COPYLENGTH + MINMATCH)
345 #define	MINLENGTH (MFLIMIT + 1)
346 
347 #define	MAXD_LOG 16
348 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
349 
350 #define	ML_BITS 4
351 #define	ML_MASK ((1U<<ML_BITS)-1)
352 #define	RUN_BITS (8-ML_BITS)
353 #define	RUN_MASK ((1U<<RUN_BITS)-1)
354 
355 
356 /*
357  * Architecture-specific macros
358  */
359 #if LZ4_ARCH64
360 #define	STEPSIZE 8
361 #define	UARCH U64
362 #define	AARCH A64
363 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
364 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
365 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
366 #define	HTYPE U32
367 #define	INITBASE(base)		const BYTE* const base = ip
368 #else /* !LZ4_ARCH64 */
369 #define	STEPSIZE 4
370 #define	UARCH U32
371 #define	AARCH A32
372 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
373 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
374 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
375 #define	HTYPE const BYTE *
376 #define	INITBASE(base)		const int base = 0
377 #endif /* !LZ4_ARCH64 */
378 
379 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
380 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
381 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
382 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
383 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
384 #else
385 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
386 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
387 #endif
388 
389 
390 /* Local structures */
391 struct refTables {
392 	HTYPE hashTable[HASHTABLESIZE];
393 };
394 
395 
396 /* Macros */
397 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
398 	HASH_LOG))
399 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
400 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
401 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
402 	d = e; }
403 
404 
405 /* Private functions */
406 #if LZ4_ARCH64
407 
408 static inline int
409 LZ4_NbCommonBytes(register U64 val)
410 {
411 #if defined(LZ4_BIG_ENDIAN)
412 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
413 	unsigned long r = 0;
414 	_BitScanReverse64(&r, val);
415 	return (int)(r >> 3);
416 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
417 	!defined(LZ4_FORCE_SW_BITCOUNT)
418 	return (__builtin_clzll(val) >> 3);
419 #else
420 	int r;
421 	if (!(val >> 32)) {
422 		r = 4;
423 	} else {
424 		r = 0;
425 		val >>= 32;
426 	}
427 	if (!(val >> 16)) {
428 		r += 2;
429 		val >>= 8;
430 	} else {
431 		val >>= 24;
432 	}
433 	r += (!val);
434 	return (r);
435 #endif
436 #else
437 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
438 	unsigned long r = 0;
439 	_BitScanForward64(&r, val);
440 	return (int)(r >> 3);
441 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
442 	!defined(LZ4_FORCE_SW_BITCOUNT)
443 	return (__builtin_ctzll(val) >> 3);
444 #else
445 	static const int DeBruijnBytePos[64] =
446 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
447 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
448 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
449 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
450 	};
451 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
452 	    58];
453 #endif
454 #endif
455 }
456 
457 #else
458 
459 static inline int
460 LZ4_NbCommonBytes(register U32 val)
461 {
462 #if defined(LZ4_BIG_ENDIAN)
463 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
464 	unsigned long r = 0;
465 	_BitScanReverse(&r, val);
466 	return (int)(r >> 3);
467 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
468 	!defined(LZ4_FORCE_SW_BITCOUNT)
469 	return (__builtin_clz(val) >> 3);
470 #else
471 	int r;
472 	if (!(val >> 16)) {
473 		r = 2;
474 		val >>= 8;
475 	} else {
476 		r = 0;
477 		val >>= 24;
478 	}
479 	r += (!val);
480 	return (r);
481 #endif
482 #else
483 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
484 	unsigned long r = 0;
485 	_BitScanForward(&r, val);
486 	return (int)(r >> 3);
487 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
488 	!defined(LZ4_FORCE_SW_BITCOUNT)
489 	return (__builtin_ctz(val) >> 3);
490 #else
491 	static const int DeBruijnBytePos[32] = {
492 		0, 0, 3, 0, 3, 1, 3, 0,
493 		3, 2, 2, 1, 3, 2, 0, 1,
494 		3, 3, 1, 2, 2, 2, 2, 0,
495 		3, 1, 2, 0, 1, 0, 1, 1
496 	};
497 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
498 	    27];
499 #endif
500 #endif
501 }
502 
503 #endif
504 
505 /* Public functions */
506 
507 static int
508 LZ4_compressBound(int isize)
509 {
510 	return (isize + (isize / 255) + 16);
511 }
512 
513 /* Compression functions */
514 
515 /*ARGSUSED*/
516 static int
517 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
518     int osize)
519 {
520 #if HEAPMODE
521 	struct refTables *srt = (struct refTables *)ctx;
522 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
523 #else
524 	HTYPE HashTable[HASHTABLESIZE] = { 0 };
525 #endif
526 
527 	const BYTE *ip = (BYTE *) source;
528 	INITBASE(base);
529 	const BYTE *anchor = ip;
530 	const BYTE *const iend = ip + isize;
531 	const BYTE *const oend = (BYTE *) dest + osize;
532 	const BYTE *const mflimit = iend - MFLIMIT;
533 #define	matchlimit (iend - LASTLITERALS)
534 
535 	BYTE *op = (BYTE *) dest;
536 
537 	int len, length;
538 	const int skipStrength = SKIPSTRENGTH;
539 	U32 forwardH;
540 
541 
542 	/* Init */
543 	if (isize < MINLENGTH)
544 		goto _last_literals;
545 
546 	/* First Byte */
547 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
548 	ip++;
549 	forwardH = LZ4_HASH_VALUE(ip);
550 
551 	/* Main Loop */
552 	for (;;) {
553 		int findMatchAttempts = (1U << skipStrength) + 3;
554 		const BYTE *forwardIp = ip;
555 		const BYTE *ref;
556 		BYTE *token;
557 
558 		/* Find a match */
559 		do {
560 			U32 h = forwardH;
561 			int step = findMatchAttempts++ >> skipStrength;
562 			ip = forwardIp;
563 			forwardIp = ip + step;
564 
565 			if unlikely(forwardIp > mflimit) {
566 				goto _last_literals;
567 			}
568 
569 			forwardH = LZ4_HASH_VALUE(forwardIp);
570 			ref = base + HashTable[h];
571 			HashTable[h] = ip - base;
572 
573 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
574 
575 		/* Catch up */
576 		while ((ip > anchor) && (ref > (BYTE *) source) &&
577 		    unlikely(ip[-1] == ref[-1])) {
578 			ip--;
579 			ref--;
580 		}
581 
582 		/* Encode Literal length */
583 		length = ip - anchor;
584 		token = op++;
585 
586 		/* Check output limit */
587 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
588 		    (length >> 8) > oend)
589 			return (0);
590 
591 		if (length >= (int)RUN_MASK) {
592 			*token = (RUN_MASK << ML_BITS);
593 			len = length - RUN_MASK;
594 			for (; len > 254; len -= 255)
595 				*op++ = 255;
596 			*op++ = (BYTE)len;
597 		} else
598 			*token = (length << ML_BITS);
599 
600 		/* Copy Literals */
601 		LZ4_BLINDCOPY(anchor, op, length);
602 
603 		_next_match:
604 		/* Encode Offset */
605 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
606 
607 		/* Start Counting */
608 		ip += MINMATCH;
609 		ref += MINMATCH;	/* MinMatch verified */
610 		anchor = ip;
611 		while likely(ip < matchlimit - (STEPSIZE - 1)) {
612 			UARCH diff = AARCH(ref) ^ AARCH(ip);
613 			if (!diff) {
614 				ip += STEPSIZE;
615 				ref += STEPSIZE;
616 				continue;
617 			}
618 			ip += LZ4_NbCommonBytes(diff);
619 			goto _endCount;
620 		}
621 #if LZ4_ARCH64
622 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
623 			ip += 4;
624 			ref += 4;
625 		}
626 #endif
627 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
628 			ip += 2;
629 			ref += 2;
630 		}
631 		if ((ip < matchlimit) && (*ref == *ip))
632 			ip++;
633 		_endCount:
634 
635 		/* Encode MatchLength */
636 		len = (ip - anchor);
637 		/* Check output limit */
638 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
639 			return (0);
640 		if (len >= (int)ML_MASK) {
641 			*token += ML_MASK;
642 			len -= ML_MASK;
643 			for (; len > 509; len -= 510) {
644 				*op++ = 255;
645 				*op++ = 255;
646 			}
647 			if (len > 254) {
648 				len -= 255;
649 				*op++ = 255;
650 			}
651 			*op++ = (BYTE)len;
652 		} else
653 			*token += len;
654 
655 		/* Test end of chunk */
656 		if (ip > mflimit) {
657 			anchor = ip;
658 			break;
659 		}
660 		/* Fill table */
661 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
662 
663 		/* Test next position */
664 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
665 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
666 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
667 			token = op++;
668 			*token = 0;
669 			goto _next_match;
670 		}
671 		/* Prepare next loop */
672 		anchor = ip++;
673 		forwardH = LZ4_HASH_VALUE(ip);
674 	}
675 
676 	_last_literals:
677 	/* Encode Last Literals */
678 	{
679 		int lastRun = iend - anchor;
680 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
681 		    oend)
682 			return (0);
683 		if (lastRun >= (int)RUN_MASK) {
684 			*op++ = (RUN_MASK << ML_BITS);
685 			lastRun -= RUN_MASK;
686 			for (; lastRun > 254; lastRun -= 255) {
687 				*op++ = 255;
688 			}
689 			*op++ = (BYTE)lastRun;
690 		} else
691 			*op++ = (lastRun << ML_BITS);
692 		(void) memcpy(op, anchor, iend - anchor);
693 		op += iend - anchor;
694 	}
695 
696 	/* End */
697 	return (int)(((char *)op) - dest);
698 }
699 
700 
701 
702 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
703 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
704 #define	HASHLOG64K (HASH_LOG + 1)
705 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
706 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
707 	HASHLOG64K))
708 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
709 
710 /*ARGSUSED*/
711 static int
712 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
713     int osize)
714 {
715 #if HEAPMODE
716 	struct refTables *srt = (struct refTables *)ctx;
717 	U16 *HashTable = (U16 *) (srt->hashTable);
718 #else
719 	U16 HashTable[HASH64KTABLESIZE] = { 0 };
720 #endif
721 
722 	const BYTE *ip = (BYTE *) source;
723 	const BYTE *anchor = ip;
724 	const BYTE *const base = ip;
725 	const BYTE *const iend = ip + isize;
726 	const BYTE *const oend = (BYTE *) dest + osize;
727 	const BYTE *const mflimit = iend - MFLIMIT;
728 #define	matchlimit (iend - LASTLITERALS)
729 
730 	BYTE *op = (BYTE *) dest;
731 
732 	int len, length;
733 	const int skipStrength = SKIPSTRENGTH;
734 	U32 forwardH;
735 
736 	/* Init */
737 	if (isize < MINLENGTH)
738 		goto _last_literals;
739 
740 	/* First Byte */
741 	ip++;
742 	forwardH = LZ4_HASH64K_VALUE(ip);
743 
744 	/* Main Loop */
745 	for (;;) {
746 		int findMatchAttempts = (1U << skipStrength) + 3;
747 		const BYTE *forwardIp = ip;
748 		const BYTE *ref;
749 		BYTE *token;
750 
751 		/* Find a match */
752 		do {
753 			U32 h = forwardH;
754 			int step = findMatchAttempts++ >> skipStrength;
755 			ip = forwardIp;
756 			forwardIp = ip + step;
757 
758 			if (forwardIp > mflimit) {
759 				goto _last_literals;
760 			}
761 
762 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
763 			ref = base + HashTable[h];
764 			HashTable[h] = ip - base;
765 
766 		} while (A32(ref) != A32(ip));
767 
768 		/* Catch up */
769 		while ((ip > anchor) && (ref > (BYTE *) source) &&
770 		    (ip[-1] == ref[-1])) {
771 			ip--;
772 			ref--;
773 		}
774 
775 		/* Encode Literal length */
776 		length = ip - anchor;
777 		token = op++;
778 
779 		/* Check output limit */
780 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
781 		    (length >> 8) > oend)
782 			return (0);
783 
784 		if (length >= (int)RUN_MASK) {
785 			*token = (RUN_MASK << ML_BITS);
786 			len = length - RUN_MASK;
787 			for (; len > 254; len -= 255)
788 				*op++ = 255;
789 			*op++ = (BYTE)len;
790 		} else
791 			*token = (length << ML_BITS);
792 
793 		/* Copy Literals */
794 		LZ4_BLINDCOPY(anchor, op, length);
795 
796 		_next_match:
797 		/* Encode Offset */
798 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
799 
800 		/* Start Counting */
801 		ip += MINMATCH;
802 		ref += MINMATCH;	/* MinMatch verified */
803 		anchor = ip;
804 		while (ip < matchlimit - (STEPSIZE - 1)) {
805 			UARCH diff = AARCH(ref) ^ AARCH(ip);
806 			if (!diff) {
807 				ip += STEPSIZE;
808 				ref += STEPSIZE;
809 				continue;
810 			}
811 			ip += LZ4_NbCommonBytes(diff);
812 			goto _endCount;
813 		}
814 #if LZ4_ARCH64
815 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
816 			ip += 4;
817 			ref += 4;
818 		}
819 #endif
820 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
821 			ip += 2;
822 			ref += 2;
823 		}
824 		if ((ip < matchlimit) && (*ref == *ip))
825 			ip++;
826 		_endCount:
827 
828 		/* Encode MatchLength */
829 		len = (ip - anchor);
830 		/* Check output limit */
831 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
832 			return (0);
833 		if (len >= (int)ML_MASK) {
834 			*token += ML_MASK;
835 			len -= ML_MASK;
836 			for (; len > 509; len -= 510) {
837 				*op++ = 255;
838 				*op++ = 255;
839 			}
840 			if (len > 254) {
841 				len -= 255;
842 				*op++ = 255;
843 			}
844 			*op++ = (BYTE)len;
845 		} else
846 			*token += len;
847 
848 		/* Test end of chunk */
849 		if (ip > mflimit) {
850 			anchor = ip;
851 			break;
852 		}
853 		/* Fill table */
854 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
855 
856 		/* Test next position */
857 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
858 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
859 		if (A32(ref) == A32(ip)) {
860 			token = op++;
861 			*token = 0;
862 			goto _next_match;
863 		}
864 		/* Prepare next loop */
865 		anchor = ip++;
866 		forwardH = LZ4_HASH64K_VALUE(ip);
867 	}
868 
869 	_last_literals:
870 	/* Encode Last Literals */
871 	{
872 		int lastRun = iend - anchor;
873 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
874 		    oend)
875 			return (0);
876 		if (lastRun >= (int)RUN_MASK) {
877 			*op++ = (RUN_MASK << ML_BITS);
878 			lastRun -= RUN_MASK;
879 			for (; lastRun > 254; lastRun -= 255)
880 				*op++ = 255;
881 			*op++ = (BYTE)lastRun;
882 		} else
883 			*op++ = (lastRun << ML_BITS);
884 		(void) memcpy(op, anchor, iend - anchor);
885 		op += iend - anchor;
886 	}
887 
888 	/* End */
889 	return (int)(((char *)op) - dest);
890 }
891 
892 static int
893 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
894 {
895 #if HEAPMODE
896 	void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
897 	int result;
898 
899 	/*
900 	 * out of kernel memory, gently fall through - this will disable
901 	 * compression in zio_compress_data
902 	 */
903 	if (ctx == NULL)
904 		return (0);
905 
906 	if (isize < LZ4_64KLIMIT)
907 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
908 	else
909 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
910 
911 	kmem_free(ctx, sizeof (struct refTables));
912 	return (result);
913 #else
914 	if (isize < (int)LZ4_64KLIMIT)
915 		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
916 	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
917 #endif
918 }
919 
920 /* Decompression functions */
921 
922 /*
923  * Note: The decoding functions real_LZ4_uncompress() and
924  *	LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
925  *	attack type. They will never write nor read outside of the provided
926  *	output buffers. LZ4_uncompress_unknownOutputSize() also insures that
927  *	it will never read outside of the input buffer. A corrupted input
928  *	will produce an error result, a negative int, indicating the position
929  *	of the error within input stream.
930  */
931 
932 static int
933 real_LZ4_uncompress(const char *source, char *dest, int osize)
934 {
935 	/* Local Variables */
936 	const BYTE *restrict ip = (const BYTE *) source;
937 	const BYTE *ref;
938 
939 	BYTE *op = (BYTE *) dest;
940 	BYTE *const oend = op + osize;
941 	BYTE *cpy;
942 
943 	unsigned token;
944 
945 	size_t length;
946 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
947 #if LZ4_ARCH64
948 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
949 #endif
950 
951 	/* Main Loop */
952 	for (;;) {
953 		/* get runlength */
954 		token = *ip++;
955 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
956 			size_t len;
957 			for (; (len = *ip++) == 255; length += 255) {
958 			}
959 			length += len;
960 		}
961 		/* copy literals */
962 		cpy = op + length;
963 		/* CORNER-CASE: cpy might overflow. */
964 		if (cpy < op)
965 			goto _output_error;	/* cpy was overflowed, bail! */
966 		if unlikely(cpy > oend - COPYLENGTH) {
967 			if (cpy != oend)
968 				/* Error: we must necessarily stand at EOF */
969 				goto _output_error;
970 			(void) memcpy(op, ip, length);
971 			ip += length;
972 			break;	/* EOF */
973 			}
974 		LZ4_WILDCOPY(ip, op, cpy);
975 		ip -= (op - cpy);
976 		op = cpy;
977 
978 		/* get offset */
979 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
980 		ip += 2;
981 		if unlikely(ref < (BYTE * const) dest)
982 			/*
983 			 * Error: offset create reference outside destination
984 			 * buffer
985 			 */
986 			goto _output_error;
987 
988 		/* get matchlength */
989 		if ((length = (token & ML_MASK)) == ML_MASK) {
990 			for (; *ip == 255; length += 255) {
991 				ip++;
992 			}
993 			length += *ip++;
994 		}
995 		/* copy repeated sequence */
996 		if unlikely(op - ref < STEPSIZE) {
997 #if LZ4_ARCH64
998 			size_t dec64 = dec64table[op-ref];
999 #else
1000 			const int dec64 = 0;
1001 #endif
1002 			op[0] = ref[0];
1003 			op[1] = ref[1];
1004 			op[2] = ref[2];
1005 			op[3] = ref[3];
1006 			op += 4;
1007 			ref += 4;
1008 			ref -= dec32table[op-ref];
1009 			A32(op) = A32(ref);
1010 			op += STEPSIZE - 4;
1011 			ref -= dec64;
1012 		} else {
1013 			LZ4_COPYSTEP(ref, op);
1014 		}
1015 		cpy = op + length - (STEPSIZE - 4);
1016 		if (cpy > oend - COPYLENGTH) {
1017 			if (cpy > oend)
1018 				/*
1019 				 * Error: request to write beyond destination
1020 				 * buffer
1021 				 */
1022 				goto _output_error;
1023 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1024 			while (op < cpy)
1025 				*op++ = *ref++;
1026 			op = cpy;
1027 			if (op == oend)
1028 				/*
1029 				 * Check EOF (should never happen, since last
1030 				 * 5 bytes are supposed to be literals)
1031 				 */
1032 				goto _output_error;
1033 			continue;
1034 		}
1035 		LZ4_SECURECOPY(ref, op, cpy);
1036 		op = cpy;	/* correction */
1037 	}
1038 
1039 	/* end of decoding */
1040 	return (int)(((char *)ip) - source);
1041 
1042 	/* write overflow error detected */
1043 	_output_error:
1044 	return (int)(-(((char *)ip) - source));
1045 }
1046 
1047 static int
1048 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
1049     int maxOutputSize)
1050 {
1051 	/* Local Variables */
1052 	const BYTE *restrict ip = (const BYTE *) source;
1053 	const BYTE *const iend = ip + isize;
1054 	const BYTE *ref;
1055 
1056 	BYTE *op = (BYTE *) dest;
1057 	BYTE *const oend = op + maxOutputSize;
1058 	BYTE *cpy;
1059 
1060 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
1061 #if LZ4_ARCH64
1062 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
1063 #endif
1064 
1065 	/* Main Loop */
1066 	while (ip < iend) {
1067 		unsigned token;
1068 		size_t length;
1069 
1070 		/* get runlength */
1071 		token = *ip++;
1072 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
1073 			int s = 255;
1074 			while ((ip < iend) && (s == 255)) {
1075 				s = *ip++;
1076 				length += s;
1077 			}
1078 		}
1079 		/* copy literals */
1080 		cpy = op + length;
1081 		/* CORNER-CASE: cpy might overflow. */
1082 		if (cpy < op)
1083 			goto _output_error;	/* cpy was overflowed, bail! */
1084 		if ((cpy > oend - COPYLENGTH) ||
1085 		    (ip + length > iend - COPYLENGTH)) {
1086 			if (cpy > oend)
1087 				/* Error: writes beyond output buffer */
1088 				goto _output_error;
1089 			if (ip + length != iend)
1090 				/*
1091 				 * Error: LZ4 format requires to consume all
1092 				 * input at this stage
1093 				 */
1094 				goto _output_error;
1095 			(void) memcpy(op, ip, length);
1096 			op += length;
1097 			/* Necessarily EOF, due to parsing restrictions */
1098 			break;
1099 		}
1100 		LZ4_WILDCOPY(ip, op, cpy);
1101 		ip -= (op - cpy);
1102 		op = cpy;
1103 
1104 		/* get offset */
1105 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1106 		ip += 2;
1107 		if (ref < (BYTE * const) dest)
1108 			/*
1109 			 * Error: offset creates reference outside of
1110 			 * destination buffer
1111 			 */
1112 			goto _output_error;
1113 
1114 		/* get matchlength */
1115 		if ((length = (token & ML_MASK)) == ML_MASK) {
1116 			while (ip < iend) {
1117 				int s = *ip++;
1118 				length += s;
1119 				if (s == 255)
1120 					continue;
1121 				break;
1122 			}
1123 		}
1124 		/* copy repeated sequence */
1125 		if unlikely(op - ref < STEPSIZE) {
1126 #if LZ4_ARCH64
1127 			size_t dec64 = dec64table[op-ref];
1128 #else
1129 			const int dec64 = 0;
1130 #endif
1131 			op[0] = ref[0];
1132 			op[1] = ref[1];
1133 			op[2] = ref[2];
1134 			op[3] = ref[3];
1135 			op += 4;
1136 			ref += 4;
1137 			ref -= dec32table[op-ref];
1138 			A32(op) = A32(ref);
1139 			op += STEPSIZE - 4;
1140 			ref -= dec64;
1141 		} else {
1142 			LZ4_COPYSTEP(ref, op);
1143 		}
1144 		cpy = op + length - (STEPSIZE - 4);
1145 		if (cpy > oend - COPYLENGTH) {
1146 			if (cpy > oend)
1147 				/*
1148 				 * Error: request to write outside of
1149 				 * destination buffer
1150 				 */
1151 				goto _output_error;
1152 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1153 			while (op < cpy)
1154 				*op++ = *ref++;
1155 			op = cpy;
1156 			if (op == oend)
1157 				/*
1158 				 * Check EOF (should never happen, since
1159 				 * last 5 bytes are supposed to be literals)
1160 				 */
1161 				goto _output_error;
1162 			continue;
1163 		}
1164 		LZ4_SECURECOPY(ref, op, cpy);
1165 		op = cpy;	/* correction */
1166 	}
1167 
1168 	/* end of decoding */
1169 	return (int)(((char *)op) - dest);
1170 
1171 	/* write overflow error detected */
1172 	_output_error:
1173 	return (int)(-(((char *)ip) - source));
1174 }
1175