xref: /titanic_50/usr/src/uts/common/fs/zfs/lz4.c (revision 197c9523b8946cf70fab2bc4ee633b18fc5bde68)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 
35 #include <sys/zfs_context.h>
36 
37 static int real_LZ4_compress(const char *source, char *dest, int isize,
38     int osize);
39 static int real_LZ4_uncompress(const char *source, char *dest, int osize);
40 static int LZ4_compressBound(int isize);
41 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
42     int isize, int maxOutputSize);
43 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
44     int isize, int osize);
45 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
46     int isize, int osize);
47 
48 /*ARGSUSED*/
49 size_t
50 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
51 {
52 	uint32_t bufsiz;
53 	char *dest = d_start;
54 
55 	ASSERT(d_len >= sizeof (bufsiz));
56 
57 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
58 	    d_len - sizeof (bufsiz));
59 
60 	/* Signal an error if the compression routine returned zero. */
61 	if (bufsiz == 0)
62 		return (s_len);
63 
64 	/*
65 	 * Encode the compresed buffer size at the start. We'll need this in
66 	 * decompression to counter the effects of padding which might be
67 	 * added to the compressed buffer and which, if unhandled, would
68 	 * confuse the hell out of our decompression function.
69 	 */
70 	*(uint32_t *)dest = BE_32(bufsiz);
71 
72 	return (bufsiz + sizeof (bufsiz));
73 }
74 
75 /*ARGSUSED*/
76 int
77 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
78 {
79 	const char *src = s_start;
80 	uint32_t bufsiz = BE_IN32(src);
81 
82 	/* invalid compressed buffer size encoded at start */
83 	if (bufsiz + sizeof (bufsiz) > s_len)
84 		return (1);
85 
86 	/*
87 	 * Returns 0 on success (decompression function returned non-negative)
88 	 * and non-zero on failure (decompression function returned negative.
89 	 */
90 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
91 	    d_start, bufsiz, d_len) < 0);
92 }
93 
94 /*
95  * LZ4 API Description:
96  *
97  * Simple Functions:
98  * real_LZ4_compress() :
99  * 	isize  : is the input size. Max supported value is ~1.9GB
100  * 	return : the number of bytes written in buffer dest
101  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
102  * 	note : destination buffer must be already allocated.
103  * 		destination buffer must be sized to handle worst cases
104  * 		situations (input data not compressible) worst case size
105  * 		evaluation is provided by function LZ4_compressBound().
106  *
107  * real_LZ4_uncompress() :
108  * 	osize  : is the output size, therefore the original size
109  * 	return : the number of bytes read in the source buffer.
110  * 		If the source stream is malformed, the function will stop
111  * 		decoding and return a negative result, indicating the byte
112  * 		position of the faulty instruction. This function never
113  * 		writes beyond dest + osize, and is therefore protected
114  * 		against malicious data packets.
115  * 	note : destination buffer must be already allocated
116  *
117  * Advanced Functions
118  *
119  * LZ4_compressBound() :
120  * 	Provides the maximum size that LZ4 may output in a "worst case"
121  * 	scenario (input data not compressible) primarily useful for memory
122  * 	allocation of output buffer.
123  *
124  * 	isize  : is the input size. Max supported value is ~1.9GB
125  * 	return : maximum output size in a "worst case" scenario
126  * 	note : this function is limited by "int" range (2^31-1)
127  *
128  * LZ4_uncompress_unknownOutputSize() :
129  * 	isize  : is the input size, therefore the compressed size
130  * 	maxOutputSize : is the size of the destination buffer (which must be
131  * 		already allocated)
132  * 	return : the number of bytes decoded in the destination buffer
133  * 		(necessarily <= maxOutputSize). If the source stream is
134  * 		malformed, the function will stop decoding and return a
135  * 		negative result, indicating the byte position of the faulty
136  * 		instruction. This function never writes beyond dest +
137  * 		maxOutputSize, and is therefore protected against malicious
138  * 		data packets.
139  * 	note   : Destination buffer must be already allocated.
140  *		This version is slightly slower than real_LZ4_uncompress()
141  *
142  * LZ4_compressCtx() :
143  * 	This function explicitly handles the CTX memory structure.
144  *
145  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
146  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
147  * 	isn't valid.
148  *
149  * LZ4_compress64kCtx() :
150  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
151  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
152  *
153  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
154  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
155  * 	isn't valid.
156  */
157 
158 /*
159  * Tuning parameters
160  */
161 
162 /*
163  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
164  *	 Lowering this value reduces memory usage. Reduced memory usage
165  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
166  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
167  *	(examples : 12 -> 16KB ; 17 -> 512KB)
168  */
169 #define	COMPRESSIONLEVEL 12
170 
171 /*
172  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
173  *	algorithm skip faster data segments considered "incompressible".
174  *	This may decrease compression ratio dramatically, but will be
175  *	faster on incompressible data. Increasing this value will make
176  *	the algorithm search more before declaring a segment "incompressible".
177  *	This could improve compression a bit, but will be slower on
178  *	incompressible data. The default value (6) is recommended.
179  */
180 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
181 
182 /*
183  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
184  * performance for big endian cpu, but the resulting compressed stream
185  * will be incompatible with little-endian CPU. You can set this option
186  * to 1 in situations where data will stay within closed environment.
187  * This option is useless on Little_Endian CPU (such as x86).
188  */
189 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
190 
191 /*
192  * CPU Feature Detection
193  */
194 
195 /* 32 or 64 bits ? */
196 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
197     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
198     defined(__LP64__) || defined(_LP64))
199 #define	LZ4_ARCH64 1
200 #else
201 #define	LZ4_ARCH64 0
202 #endif
203 
204 /*
205  * Limits the amount of stack space that the algorithm may consume to hold
206  * the compression lookup table. The value `9' here means we'll never use
207  * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
208  * If more memory is needed, it is allocated from the heap.
209  */
210 #define	STACKLIMIT 9
211 
212 /*
213  * Little Endian or Big Endian?
214  * Note: overwrite the below #define if you know your architecture endianess.
215  */
216 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
217 	defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
218 	defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
219 	defined(__powerpc) || defined(powerpc) || \
220 	((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
221 #define	LZ4_BIG_ENDIAN 1
222 #else
223 /*
224  * Little Endian assumed. PDP Endian and other very rare endian format
225  * are unsupported.
226  */
227 #endif
228 
229 /*
230  * Unaligned memory access is automatically enabled for "common" CPU,
231  * such as x86. For others CPU, the compiler will be more cautious, and
232  * insert extra code to ensure aligned access is respected. If you know
233  * your target CPU supports unaligned memory access, you may want to
234  * force this option manually to improve performance
235  */
236 #if defined(__ARM_FEATURE_UNALIGNED)
237 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
238 #endif
239 
240 /* #define	LZ4_FORCE_SW_BITCOUNT */
241 
242 /*
243  * Compiler Options
244  */
245 #if __STDC_VERSION__ >= 199901L	/* C99 */
246 /* "restrict" is a known keyword */
247 #else
248 /* Disable restrict */
249 #define	restrict
250 #endif
251 
252 #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
253 
254 #ifdef _MSC_VER
255 /* Visual Studio */
256 /* Visual is not C99, but supports some kind of inline */
257 #define	inline __forceinline
258 #if LZ4_ARCH64
259 /* For Visual 2005 */
260 #pragma intrinsic(_BitScanForward64)
261 #pragma intrinsic(_BitScanReverse64)
262 #else /* !LZ4_ARCH64 */
263 /* For Visual 2005 */
264 #pragma intrinsic(_BitScanForward)
265 #pragma intrinsic(_BitScanReverse)
266 #endif /* !LZ4_ARCH64 */
267 #endif /* _MSC_VER */
268 
269 #ifdef _MSC_VER
270 #define	lz4_bswap16(x) _byteswap_ushort(x)
271 #else /* !_MSC_VER */
272 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
273 	(((x) & 0xffu) << 8)))
274 #endif /* !_MSC_VER */
275 
276 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
277 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
278 #else
279 #define	expect(expr, value)    (expr)
280 #endif
281 
282 #define	likely(expr)	expect((expr) != 0, 1)
283 #define	unlikely(expr)	expect((expr) != 0, 0)
284 
285 /* Basic types */
286 #if defined(_MSC_VER)
287 /* Visual Studio does not support 'stdint' natively */
288 #define	BYTE	unsigned __int8
289 #define	U16	unsigned __int16
290 #define	U32	unsigned __int32
291 #define	S32	__int32
292 #define	U64	unsigned __int64
293 #else /* !defined(_MSC_VER) */
294 #define	BYTE	uint8_t
295 #define	U16	uint16_t
296 #define	U32	uint32_t
297 #define	S32	int32_t
298 #define	U64	uint64_t
299 #endif /* !defined(_MSC_VER) */
300 
301 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
302 #pragma pack(1)
303 #endif
304 
305 typedef struct _U16_S {
306 	U16 v;
307 } U16_S;
308 typedef struct _U32_S {
309 	U32 v;
310 } U32_S;
311 typedef struct _U64_S {
312 	U64 v;
313 } U64_S;
314 
315 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
316 #pragma pack()
317 #endif
318 
319 #define	A64(x) (((U64_S *)(x))->v)
320 #define	A32(x) (((U32_S *)(x))->v)
321 #define	A16(x) (((U16_S *)(x))->v)
322 
323 /*
324  * Constants
325  */
326 #define	MINMATCH 4
327 
328 #define	HASH_LOG COMPRESSIONLEVEL
329 #define	HASHTABLESIZE (1 << HASH_LOG)
330 #define	HASH_MASK (HASHTABLESIZE - 1)
331 
332 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
333 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
334 
335 /*
336  * Defines if memory is allocated into the stack (local variable),
337  * or into the heap (kmem_alloc()).
338  */
339 #define	HEAPMODE (HASH_LOG > STACKLIMIT)
340 #define	COPYLENGTH 8
341 #define	LASTLITERALS 5
342 #define	MFLIMIT (COPYLENGTH + MINMATCH)
343 #define	MINLENGTH (MFLIMIT + 1)
344 
345 #define	MAXD_LOG 16
346 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
347 
348 #define	ML_BITS 4
349 #define	ML_MASK ((1U<<ML_BITS)-1)
350 #define	RUN_BITS (8-ML_BITS)
351 #define	RUN_MASK ((1U<<RUN_BITS)-1)
352 
353 
354 /*
355  * Architecture-specific macros
356  */
357 #if LZ4_ARCH64
358 #define	STEPSIZE 8
359 #define	UARCH U64
360 #define	AARCH A64
361 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
362 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
363 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
364 #define	HTYPE U32
365 #define	INITBASE(base)		const BYTE* const base = ip
366 #else /* !LZ4_ARCH64 */
367 #define	STEPSIZE 4
368 #define	UARCH U32
369 #define	AARCH A32
370 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
371 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
372 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
373 #define	HTYPE const BYTE *
374 #define	INITBASE(base)		const int base = 0
375 #endif /* !LZ4_ARCH64 */
376 
377 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
378 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
379 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
380 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
381 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
382 #else
383 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
384 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
385 #endif
386 
387 
388 /* Local structures */
389 struct refTables {
390 	HTYPE hashTable[HASHTABLESIZE];
391 };
392 
393 
394 /* Macros */
395 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
396 	HASH_LOG))
397 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
398 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
399 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
400 	d = e; }
401 
402 
403 /* Private functions */
404 #if LZ4_ARCH64
405 
406 static inline int
407 LZ4_NbCommonBytes(register U64 val)
408 {
409 #if defined(LZ4_BIG_ENDIAN)
410 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
411 	unsigned long r = 0;
412 	_BitScanReverse64(&r, val);
413 	return (int)(r >> 3);
414 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
415 	!defined(LZ4_FORCE_SW_BITCOUNT)
416 	return (__builtin_clzll(val) >> 3);
417 #else
418 	int r;
419 	if (!(val >> 32)) {
420 		r = 4;
421 	} else {
422 		r = 0;
423 		val >>= 32;
424 	}
425 	if (!(val >> 16)) {
426 		r += 2;
427 		val >>= 8;
428 	} else {
429 		val >>= 24;
430 	}
431 	r += (!val);
432 	return (r);
433 #endif
434 #else
435 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
436 	unsigned long r = 0;
437 	_BitScanForward64(&r, val);
438 	return (int)(r >> 3);
439 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
440 	!defined(LZ4_FORCE_SW_BITCOUNT)
441 	return (__builtin_ctzll(val) >> 3);
442 #else
443 	static const int DeBruijnBytePos[64] =
444 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
445 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
446 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
447 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
448 	};
449 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
450 	    58];
451 #endif
452 #endif
453 }
454 
455 #else
456 
457 static inline int
458 LZ4_NbCommonBytes(register U32 val)
459 {
460 #if defined(LZ4_BIG_ENDIAN)
461 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
462 	unsigned long r = 0;
463 	_BitScanReverse(&r, val);
464 	return (int)(r >> 3);
465 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
466 	!defined(LZ4_FORCE_SW_BITCOUNT)
467 	return (__builtin_clz(val) >> 3);
468 #else
469 	int r;
470 	if (!(val >> 16)) {
471 		r = 2;
472 		val >>= 8;
473 	} else {
474 		r = 0;
475 		val >>= 24;
476 	}
477 	r += (!val);
478 	return (r);
479 #endif
480 #else
481 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
482 	unsigned long r = 0;
483 	_BitScanForward(&r, val);
484 	return (int)(r >> 3);
485 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
486 	!defined(LZ4_FORCE_SW_BITCOUNT)
487 	return (__builtin_ctz(val) >> 3);
488 #else
489 	static const int DeBruijnBytePos[32] = {
490 		0, 0, 3, 0, 3, 1, 3, 0,
491 		3, 2, 2, 1, 3, 2, 0, 1,
492 		3, 3, 1, 2, 2, 2, 2, 0,
493 		3, 1, 2, 0, 1, 0, 1, 1
494 	};
495 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
496 	    27];
497 #endif
498 #endif
499 }
500 
501 #endif
502 
503 /* Public functions */
504 
505 static int
506 LZ4_compressBound(int isize)
507 {
508 	return (isize + (isize / 255) + 16);
509 }
510 
511 /* Compression functions */
512 
513 /*ARGSUSED*/
514 static int
515 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
516     int osize)
517 {
518 #if HEAPMODE
519 	struct refTables *srt = (struct refTables *)ctx;
520 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
521 #else
522 	HTYPE HashTable[HASHTABLESIZE] = { 0 };
523 #endif
524 
525 	const BYTE *ip = (BYTE *) source;
526 	INITBASE(base);
527 	const BYTE *anchor = ip;
528 	const BYTE *const iend = ip + isize;
529 	const BYTE *const oend = (BYTE *) dest + osize;
530 	const BYTE *const mflimit = iend - MFLIMIT;
531 #define	matchlimit (iend - LASTLITERALS)
532 
533 	BYTE *op = (BYTE *) dest;
534 
535 	int len, length;
536 	const int skipStrength = SKIPSTRENGTH;
537 	U32 forwardH;
538 
539 
540 	/* Init */
541 	if (isize < MINLENGTH)
542 		goto _last_literals;
543 
544 	/* First Byte */
545 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
546 	ip++;
547 	forwardH = LZ4_HASH_VALUE(ip);
548 
549 	/* Main Loop */
550 	for (;;) {
551 		int findMatchAttempts = (1U << skipStrength) + 3;
552 		const BYTE *forwardIp = ip;
553 		const BYTE *ref;
554 		BYTE *token;
555 
556 		/* Find a match */
557 		do {
558 			U32 h = forwardH;
559 			int step = findMatchAttempts++ >> skipStrength;
560 			ip = forwardIp;
561 			forwardIp = ip + step;
562 
563 			if unlikely(forwardIp > mflimit) {
564 				goto _last_literals;
565 			}
566 
567 			forwardH = LZ4_HASH_VALUE(forwardIp);
568 			ref = base + HashTable[h];
569 			HashTable[h] = ip - base;
570 
571 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
572 
573 		/* Catch up */
574 		while ((ip > anchor) && (ref > (BYTE *) source) &&
575 		    unlikely(ip[-1] == ref[-1])) {
576 			ip--;
577 			ref--;
578 		}
579 
580 		/* Encode Literal length */
581 		length = ip - anchor;
582 		token = op++;
583 
584 		/* Check output limit */
585 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
586 		    (length >> 8) > oend)
587 			return (0);
588 
589 		if (length >= (int)RUN_MASK) {
590 			*token = (RUN_MASK << ML_BITS);
591 			len = length - RUN_MASK;
592 			for (; len > 254; len -= 255)
593 				*op++ = 255;
594 			*op++ = (BYTE)len;
595 		} else
596 			*token = (length << ML_BITS);
597 
598 		/* Copy Literals */
599 		LZ4_BLINDCOPY(anchor, op, length);
600 
601 		_next_match:
602 		/* Encode Offset */
603 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
604 
605 		/* Start Counting */
606 		ip += MINMATCH;
607 		ref += MINMATCH;	/* MinMatch verified */
608 		anchor = ip;
609 		while likely(ip < matchlimit - (STEPSIZE - 1)) {
610 			UARCH diff = AARCH(ref) ^ AARCH(ip);
611 			if (!diff) {
612 				ip += STEPSIZE;
613 				ref += STEPSIZE;
614 				continue;
615 			}
616 			ip += LZ4_NbCommonBytes(diff);
617 			goto _endCount;
618 		}
619 #if LZ4_ARCH64
620 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
621 			ip += 4;
622 			ref += 4;
623 		}
624 #endif
625 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
626 			ip += 2;
627 			ref += 2;
628 		}
629 		if ((ip < matchlimit) && (*ref == *ip))
630 			ip++;
631 		_endCount:
632 
633 		/* Encode MatchLength */
634 		len = (ip - anchor);
635 		/* Check output limit */
636 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
637 			return (0);
638 		if (len >= (int)ML_MASK) {
639 			*token += ML_MASK;
640 			len -= ML_MASK;
641 			for (; len > 509; len -= 510) {
642 				*op++ = 255;
643 				*op++ = 255;
644 			}
645 			if (len > 254) {
646 				len -= 255;
647 				*op++ = 255;
648 			}
649 			*op++ = (BYTE)len;
650 		} else
651 			*token += len;
652 
653 		/* Test end of chunk */
654 		if (ip > mflimit) {
655 			anchor = ip;
656 			break;
657 		}
658 		/* Fill table */
659 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
660 
661 		/* Test next position */
662 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
663 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
664 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
665 			token = op++;
666 			*token = 0;
667 			goto _next_match;
668 		}
669 		/* Prepare next loop */
670 		anchor = ip++;
671 		forwardH = LZ4_HASH_VALUE(ip);
672 	}
673 
674 	_last_literals:
675 	/* Encode Last Literals */
676 	{
677 		int lastRun = iend - anchor;
678 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
679 		    oend)
680 			return (0);
681 		if (lastRun >= (int)RUN_MASK) {
682 			*op++ = (RUN_MASK << ML_BITS);
683 			lastRun -= RUN_MASK;
684 			for (; lastRun > 254; lastRun -= 255) {
685 				*op++ = 255;
686 			}
687 			*op++ = (BYTE)lastRun;
688 		} else
689 			*op++ = (lastRun << ML_BITS);
690 		(void) memcpy(op, anchor, iend - anchor);
691 		op += iend - anchor;
692 	}
693 
694 	/* End */
695 	return (int)(((char *)op) - dest);
696 }
697 
698 
699 
700 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
701 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
702 #define	HASHLOG64K (HASH_LOG + 1)
703 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
704 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
705 	HASHLOG64K))
706 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
707 
708 /*ARGSUSED*/
709 static int
710 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
711     int osize)
712 {
713 #if HEAPMODE
714 	struct refTables *srt = (struct refTables *)ctx;
715 	U16 *HashTable = (U16 *) (srt->hashTable);
716 #else
717 	U16 HashTable[HASH64KTABLESIZE] = { 0 };
718 #endif
719 
720 	const BYTE *ip = (BYTE *) source;
721 	const BYTE *anchor = ip;
722 	const BYTE *const base = ip;
723 	const BYTE *const iend = ip + isize;
724 	const BYTE *const oend = (BYTE *) dest + osize;
725 	const BYTE *const mflimit = iend - MFLIMIT;
726 #define	matchlimit (iend - LASTLITERALS)
727 
728 	BYTE *op = (BYTE *) dest;
729 
730 	int len, length;
731 	const int skipStrength = SKIPSTRENGTH;
732 	U32 forwardH;
733 
734 	/* Init */
735 	if (isize < MINLENGTH)
736 		goto _last_literals;
737 
738 	/* First Byte */
739 	ip++;
740 	forwardH = LZ4_HASH64K_VALUE(ip);
741 
742 	/* Main Loop */
743 	for (;;) {
744 		int findMatchAttempts = (1U << skipStrength) + 3;
745 		const BYTE *forwardIp = ip;
746 		const BYTE *ref;
747 		BYTE *token;
748 
749 		/* Find a match */
750 		do {
751 			U32 h = forwardH;
752 			int step = findMatchAttempts++ >> skipStrength;
753 			ip = forwardIp;
754 			forwardIp = ip + step;
755 
756 			if (forwardIp > mflimit) {
757 				goto _last_literals;
758 			}
759 
760 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
761 			ref = base + HashTable[h];
762 			HashTable[h] = ip - base;
763 
764 		} while (A32(ref) != A32(ip));
765 
766 		/* Catch up */
767 		while ((ip > anchor) && (ref > (BYTE *) source) &&
768 		    (ip[-1] == ref[-1])) {
769 			ip--;
770 			ref--;
771 		}
772 
773 		/* Encode Literal length */
774 		length = ip - anchor;
775 		token = op++;
776 
777 		/* Check output limit */
778 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
779 		    (length >> 8) > oend)
780 			return (0);
781 
782 		if (length >= (int)RUN_MASK) {
783 			*token = (RUN_MASK << ML_BITS);
784 			len = length - RUN_MASK;
785 			for (; len > 254; len -= 255)
786 				*op++ = 255;
787 			*op++ = (BYTE)len;
788 		} else
789 			*token = (length << ML_BITS);
790 
791 		/* Copy Literals */
792 		LZ4_BLINDCOPY(anchor, op, length);
793 
794 		_next_match:
795 		/* Encode Offset */
796 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
797 
798 		/* Start Counting */
799 		ip += MINMATCH;
800 		ref += MINMATCH;	/* MinMatch verified */
801 		anchor = ip;
802 		while (ip < matchlimit - (STEPSIZE - 1)) {
803 			UARCH diff = AARCH(ref) ^ AARCH(ip);
804 			if (!diff) {
805 				ip += STEPSIZE;
806 				ref += STEPSIZE;
807 				continue;
808 			}
809 			ip += LZ4_NbCommonBytes(diff);
810 			goto _endCount;
811 		}
812 #if LZ4_ARCH64
813 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
814 			ip += 4;
815 			ref += 4;
816 		}
817 #endif
818 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
819 			ip += 2;
820 			ref += 2;
821 		}
822 		if ((ip < matchlimit) && (*ref == *ip))
823 			ip++;
824 		_endCount:
825 
826 		/* Encode MatchLength */
827 		len = (ip - anchor);
828 		/* Check output limit */
829 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
830 			return (0);
831 		if (len >= (int)ML_MASK) {
832 			*token += ML_MASK;
833 			len -= ML_MASK;
834 			for (; len > 509; len -= 510) {
835 				*op++ = 255;
836 				*op++ = 255;
837 			}
838 			if (len > 254) {
839 				len -= 255;
840 				*op++ = 255;
841 			}
842 			*op++ = (BYTE)len;
843 		} else
844 			*token += len;
845 
846 		/* Test end of chunk */
847 		if (ip > mflimit) {
848 			anchor = ip;
849 			break;
850 		}
851 		/* Fill table */
852 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
853 
854 		/* Test next position */
855 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
856 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
857 		if (A32(ref) == A32(ip)) {
858 			token = op++;
859 			*token = 0;
860 			goto _next_match;
861 		}
862 		/* Prepare next loop */
863 		anchor = ip++;
864 		forwardH = LZ4_HASH64K_VALUE(ip);
865 	}
866 
867 	_last_literals:
868 	/* Encode Last Literals */
869 	{
870 		int lastRun = iend - anchor;
871 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
872 		    oend)
873 			return (0);
874 		if (lastRun >= (int)RUN_MASK) {
875 			*op++ = (RUN_MASK << ML_BITS);
876 			lastRun -= RUN_MASK;
877 			for (; lastRun > 254; lastRun -= 255)
878 				*op++ = 255;
879 			*op++ = (BYTE)lastRun;
880 		} else
881 			*op++ = (lastRun << ML_BITS);
882 		(void) memcpy(op, anchor, iend - anchor);
883 		op += iend - anchor;
884 	}
885 
886 	/* End */
887 	return (int)(((char *)op) - dest);
888 }
889 
890 static int
891 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
892 {
893 #if HEAPMODE
894 	void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
895 	int result;
896 
897 	/*
898 	 * out of kernel memory, gently fall through - this will disable
899 	 * compression in zio_compress_data
900 	 */
901 	if (ctx == NULL)
902 		return (0);
903 
904 	if (isize < LZ4_64KLIMIT)
905 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
906 	else
907 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
908 
909 	kmem_free(ctx, sizeof (struct refTables));
910 	return (result);
911 #else
912 	if (isize < (int)LZ4_64KLIMIT)
913 		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
914 	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
915 #endif
916 }
917 
918 /* Decompression functions */
919 
920 /*
921  * Note: The decoding functions real_LZ4_uncompress() and
922  *	LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
923  *	attack type. They will never write nor read outside of the provided
924  *	output buffers. LZ4_uncompress_unknownOutputSize() also insures that
925  *	it will never read outside of the input buffer. A corrupted input
926  *	will produce an error result, a negative int, indicating the position
927  *	of the error within input stream.
928  */
929 
930 static int
931 real_LZ4_uncompress(const char *source, char *dest, int osize)
932 {
933 	/* Local Variables */
934 	const BYTE *restrict ip = (const BYTE *) source;
935 	const BYTE *ref;
936 
937 	BYTE *op = (BYTE *) dest;
938 	BYTE *const oend = op + osize;
939 	BYTE *cpy;
940 
941 	unsigned token;
942 
943 	size_t length;
944 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
945 #if LZ4_ARCH64
946 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
947 #endif
948 
949 	/* Main Loop */
950 	for (;;) {
951 		/* get runlength */
952 		token = *ip++;
953 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
954 			size_t len;
955 			for (; (len = *ip++) == 255; length += 255) {
956 			}
957 			length += len;
958 		}
959 		/* copy literals */
960 		cpy = op + length;
961 		if unlikely(cpy > oend - COPYLENGTH) {
962 			if (cpy != oend)
963 				/* Error: we must necessarily stand at EOF */
964 				goto _output_error;
965 			(void) memcpy(op, ip, length);
966 			ip += length;
967 			break;	/* EOF */
968 			}
969 		LZ4_WILDCOPY(ip, op, cpy);
970 		ip -= (op - cpy);
971 		op = cpy;
972 
973 		/* get offset */
974 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
975 		ip += 2;
976 		if unlikely(ref < (BYTE * const) dest)
977 			/*
978 			 * Error: offset create reference outside destination
979 			 * buffer
980 			 */
981 			goto _output_error;
982 
983 		/* get matchlength */
984 		if ((length = (token & ML_MASK)) == ML_MASK) {
985 			for (; *ip == 255; length += 255) {
986 				ip++;
987 			}
988 			length += *ip++;
989 		}
990 		/* copy repeated sequence */
991 		if unlikely(op - ref < STEPSIZE) {
992 #if LZ4_ARCH64
993 			size_t dec64 = dec64table[op-ref];
994 #else
995 			const int dec64 = 0;
996 #endif
997 			op[0] = ref[0];
998 			op[1] = ref[1];
999 			op[2] = ref[2];
1000 			op[3] = ref[3];
1001 			op += 4;
1002 			ref += 4;
1003 			ref -= dec32table[op-ref];
1004 			A32(op) = A32(ref);
1005 			op += STEPSIZE - 4;
1006 			ref -= dec64;
1007 		} else {
1008 			LZ4_COPYSTEP(ref, op);
1009 		}
1010 		cpy = op + length - (STEPSIZE - 4);
1011 		if (cpy > oend - COPYLENGTH) {
1012 			if (cpy > oend)
1013 				/*
1014 				 * Error: request to write beyond destination
1015 				 * buffer
1016 				 */
1017 				goto _output_error;
1018 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1019 			while (op < cpy)
1020 				*op++ = *ref++;
1021 			op = cpy;
1022 			if (op == oend)
1023 				/*
1024 				 * Check EOF (should never happen, since last
1025 				 * 5 bytes are supposed to be literals)
1026 				 */
1027 				goto _output_error;
1028 			continue;
1029 		}
1030 		LZ4_SECURECOPY(ref, op, cpy);
1031 		op = cpy;	/* correction */
1032 	}
1033 
1034 	/* end of decoding */
1035 	return (int)(((char *)ip) - source);
1036 
1037 	/* write overflow error detected */
1038 	_output_error:
1039 	return (int)(-(((char *)ip) - source));
1040 }
1041 
1042 static int
1043 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
1044     int maxOutputSize)
1045 {
1046 	/* Local Variables */
1047 	const BYTE *restrict ip = (const BYTE *) source;
1048 	const BYTE *const iend = ip + isize;
1049 	const BYTE *ref;
1050 
1051 	BYTE *op = (BYTE *) dest;
1052 	BYTE *const oend = op + maxOutputSize;
1053 	BYTE *cpy;
1054 
1055 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
1056 #if LZ4_ARCH64
1057 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
1058 #endif
1059 
1060 	/* Main Loop */
1061 	while (ip < iend) {
1062 		unsigned token;
1063 		size_t length;
1064 
1065 		/* get runlength */
1066 		token = *ip++;
1067 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
1068 			int s = 255;
1069 			while ((ip < iend) && (s == 255)) {
1070 				s = *ip++;
1071 				length += s;
1072 			}
1073 		}
1074 		/* copy literals */
1075 		cpy = op + length;
1076 		if ((cpy > oend - COPYLENGTH) ||
1077 		    (ip + length > iend - COPYLENGTH)) {
1078 			if (cpy > oend)
1079 				/* Error: writes beyond output buffer */
1080 				goto _output_error;
1081 			if (ip + length != iend)
1082 				/*
1083 				 * Error: LZ4 format requires to consume all
1084 				 * input at this stage
1085 				 */
1086 				goto _output_error;
1087 			(void) memcpy(op, ip, length);
1088 			op += length;
1089 			/* Necessarily EOF, due to parsing restrictions */
1090 			break;
1091 		}
1092 		LZ4_WILDCOPY(ip, op, cpy);
1093 		ip -= (op - cpy);
1094 		op = cpy;
1095 
1096 		/* get offset */
1097 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1098 		ip += 2;
1099 		if (ref < (BYTE * const) dest)
1100 			/*
1101 			 * Error: offset creates reference outside of
1102 			 * destination buffer
1103 			 */
1104 			goto _output_error;
1105 
1106 		/* get matchlength */
1107 		if ((length = (token & ML_MASK)) == ML_MASK) {
1108 			while (ip < iend) {
1109 				int s = *ip++;
1110 				length += s;
1111 				if (s == 255)
1112 					continue;
1113 				break;
1114 			}
1115 		}
1116 		/* copy repeated sequence */
1117 		if unlikely(op - ref < STEPSIZE) {
1118 #if LZ4_ARCH64
1119 			size_t dec64 = dec64table[op-ref];
1120 #else
1121 			const int dec64 = 0;
1122 #endif
1123 			op[0] = ref[0];
1124 			op[1] = ref[1];
1125 			op[2] = ref[2];
1126 			op[3] = ref[3];
1127 			op += 4;
1128 			ref += 4;
1129 			ref -= dec32table[op-ref];
1130 			A32(op) = A32(ref);
1131 			op += STEPSIZE - 4;
1132 			ref -= dec64;
1133 		} else {
1134 			LZ4_COPYSTEP(ref, op);
1135 		}
1136 		cpy = op + length - (STEPSIZE - 4);
1137 		if (cpy > oend - COPYLENGTH) {
1138 			if (cpy > oend)
1139 				/*
1140 				 * Error: request to write outside of
1141 				 * destination buffer
1142 				 */
1143 				goto _output_error;
1144 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1145 			while (op < cpy)
1146 				*op++ = *ref++;
1147 			op = cpy;
1148 			if (op == oend)
1149 				/*
1150 				 * Check EOF (should never happen, since
1151 				 * last 5 bytes are supposed to be literals)
1152 				 */
1153 				goto _output_error;
1154 			continue;
1155 		}
1156 		LZ4_SECURECOPY(ref, op, cpy);
1157 		op = cpy;	/* correction */
1158 	}
1159 
1160 	/* end of decoding */
1161 	return (int)(((char *)op) - dest);
1162 
1163 	/* write overflow error detected */
1164 	_output_error:
1165 	return (int)(-(((char *)ip) - source));
1166 }
1167