xref: /illumos-gate/usr/src/common/lz4/lz4.c (revision f3a525d949e26238340eb8c1f6fdcfff91fc7e65)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 /*
35  * Copyright (c) 2016 by Delphix. All rights reserved.
36  */
37 
38 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
39 #include <sys/zfs_context.h>
40 #elif defined(_STANDALONE)
41 #include <sys/cdefs.h>
42 #include <stand.h>
43 #include <sys/types.h>
44 #include <sys/endian.h>
45 #include <assert.h>
46 
47 #define	ASSERT	assert
48 #else
49 #include <string.h>
50 #include <stdlib.h>
51 #include <sys/types.h>
52 #include <sys/sysmacros.h>
53 #include <netinet/in.h>
54 #include <assert.h>
55 
56 #define	ASSERT	assert
57 #endif
58 #include <lz4.h>
59 
60 static int real_LZ4_compress(const char *source, char *dest, int isize,
61     int osize);
62 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
63     int isize, int maxOutputSize);
64 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
65     int isize, int osize);
66 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
67     int isize, int osize);
68 
69 size_t
70 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
71     int n __unused)
72 {
73 	uint32_t bufsiz;
74 	char *dest = d_start;
75 
76 	ASSERT(d_len >= sizeof (bufsiz));
77 
78 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
79 	    d_len - sizeof (bufsiz));
80 
81 	/* Signal an error if the compression routine returned zero. */
82 	if (bufsiz == 0)
83 		return (s_len);
84 
85 	/*
86 	 * Encode the compresed buffer size at the start. We'll need this in
87 	 * decompression to counter the effects of padding which might be
88 	 * added to the compressed buffer and which, if unhandled, would
89 	 * confuse the hell out of our decompression function.
90 	 */
91 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
92 	*(uint32_t *)(void *)dest = BE_32(bufsiz);
93 #else
94 	*(uint32_t *)(void *)dest = htonl(bufsiz);
95 #endif
96 
97 	return (bufsiz + sizeof (bufsiz));
98 }
99 
100 int
101 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len,
102     int n __unused)
103 {
104 	const char *src = s_start;
105 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
106 	uint32_t bufsiz = BE_IN32(s_start);
107 #else
108 	uint32_t bufsiz = htonl(*(uint32_t *)s_start);
109 #endif
110 
111 	/* invalid compressed buffer size encoded at start */
112 	if (bufsiz + sizeof (bufsiz) > s_len)
113 		return (1);
114 
115 	/*
116 	 * Returns 0 on success (decompression function returned non-negative)
117 	 * and non-zero on failure (decompression function returned negative).
118 	 */
119 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
120 	    d_start, bufsiz, d_len) < 0);
121 }
122 
123 /*
124  * LZ4 API Description:
125  *
126  * Simple Functions:
127  * real_LZ4_compress() :
128  *	isize  : is the input size. Max supported value is ~1.9GB
129  *	return : the number of bytes written in buffer dest
130  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
131  *	note : destination buffer must be already allocated.
132  *		destination buffer must be sized to handle worst cases
133  *		situations (input data not compressible).
134  *
135  * Advanced Functions
136  *
137  * LZ4_compressBound() :
138  *	Provides the maximum size that LZ4 may output in a "worst case"
139  *	scenario (input data not compressible) primarily useful for memory
140  *	allocation of output buffer.
141  *
142  *	isize  : is the input size. Max supported value is ~1.9GB
143  *	return : maximum output size in a "worst case" scenario
144  *	note : this function is limited by "int" range (2^31-1)
145  *
146  * LZ4_uncompress_unknownOutputSize() :
147  *	isize  : is the input size, therefore the compressed size
148  *	maxOutputSize : is the size of the destination buffer (which must be
149  *		already allocated)
150  *	return : the number of bytes decoded in the destination buffer
151  *		(necessarily <= maxOutputSize). If the source stream is
152  *		malformed, the function will stop decoding and return a
153  *		negative result, indicating the byte position of the faulty
154  *		instruction. This function never writes beyond dest +
155  *		maxOutputSize, and is therefore protected against malicious
156  *		data packets.
157  *	note   : Destination buffer must be already allocated.
158  *
159  * LZ4_compressCtx() :
160  *	This function explicitly handles the CTX memory structure.
161  *
162  *	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
163  *	by the caller (either on the stack or using kmem_zalloc). Passing NULL
164  *	isn't valid.
165  *
166  * LZ4_compress64kCtx() :
167  *	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
168  *	isize *Must* be <64KB, otherwise the output will be corrupted.
169  *
170  *	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
171  *	by the caller (either on the stack or using kmem_zalloc). Passing NULL
172  *	isn't valid.
173  */
174 
175 /*
176  * Tuning parameters
177  */
178 
179 /*
180  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
181  *	 Lowering this value reduces memory usage. Reduced memory usage
182  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
183  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
184  *	(examples : 12 -> 16KB ; 17 -> 512KB)
185  */
186 #define	COMPRESSIONLEVEL 12
187 
188 /*
189  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
190  *	algorithm skip faster data segments considered "incompressible".
191  *	This may decrease compression ratio dramatically, but will be
192  *	faster on incompressible data. Increasing this value will make
193  *	the algorithm search more before declaring a segment "incompressible".
194  *	This could improve compression a bit, but will be slower on
195  *	incompressible data. The default value (6) is recommended.
196  */
197 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
198 
199 /*
200  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
201  * performance for big endian cpu, but the resulting compressed stream
202  * will be incompatible with little-endian CPU. You can set this option
203  * to 1 in situations where data will stay within closed environment.
204  * This option is useless on Little_Endian CPU (such as x86).
205  */
206 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
207 
208 /*
209  * CPU Feature Detection
210  */
211 
212 /* 32 or 64 bits ? */
213 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
214     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
215     defined(__LP64__) || defined(_LP64))
216 #define	LZ4_ARCH64 1
217 #else
218 #define	LZ4_ARCH64 0
219 #endif
220 
221 /*
222  * Limits the amount of stack space that the algorithm may consume to hold
223  * the compression lookup table. The value `9' here means we'll never use
224  * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
225  * If more memory is needed, it is allocated from the heap.
226  */
227 #define	STACKLIMIT 9
228 
229 /*
230  * Little Endian or Big Endian?
231  * Note: overwrite the below #define if you know your architecture endianess.
232  */
233 #if defined(BYTE_ORDER)
234 #if BYTE_ORDER == BIG_ENDIAN	/* This is sys/endian.h API */
235 #define	LZ4_BIG_ENDIAN 1
236 #else
237 /*
238  * Little Endian assumed. PDP Endian and other very rare endian format
239  * are unsupported.
240  */
241 #endif
242 #else /* !defined(BYTE_ORDER) */
243 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
244 	defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
245 	defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
246 	defined(__powerpc) || defined(powerpc) || \
247 	((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
248 #define	LZ4_BIG_ENDIAN 1
249 #else
250 /*
251  * Little Endian assumed. PDP Endian and other very rare endian format
252  * are unsupported.
253  */
254 #endif
255 #endif /* defined(BYTE_ORDER) */
256 
257 /*
258  * Unaligned memory access is automatically enabled for "common" CPU,
259  * such as x86. For others CPU, the compiler will be more cautious, and
260  * insert extra code to ensure aligned access is respected. If you know
261  * your target CPU supports unaligned memory access, you may want to
262  * force this option manually to improve performance
263  */
264 #if defined(__ARM_FEATURE_UNALIGNED)
265 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
266 #endif
267 
268 #ifdef __sparc
269 #define	LZ4_FORCE_SW_BITCOUNT
270 #endif
271 
272 /*
273  * Compiler Options
274  */
275 #if __STDC_VERSION__ >= 199901L	/* C99 */
276 /* "restrict" is a known keyword */
277 #else
278 /* Disable restrict */
279 #define	restrict
280 #endif
281 
282 #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
283 
284 #ifdef _MSC_VER
285 /* Visual Studio */
286 /* Visual is not C99, but supports some kind of inline */
287 #define	inline __forceinline
288 #if LZ4_ARCH64
289 /* For Visual 2005 */
290 #pragma intrinsic(_BitScanForward64)
291 #pragma intrinsic(_BitScanReverse64)
292 #else /* !LZ4_ARCH64 */
293 /* For Visual 2005 */
294 #pragma intrinsic(_BitScanForward)
295 #pragma intrinsic(_BitScanReverse)
296 #endif /* !LZ4_ARCH64 */
297 #endif /* _MSC_VER */
298 
299 #ifdef _MSC_VER
300 #define	lz4_bswap16(x) _byteswap_ushort(x)
301 #else /* !_MSC_VER */
302 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
303 	(((x) & 0xffu) << 8)))
304 #endif /* !_MSC_VER */
305 
306 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
307 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
308 #else
309 #define	expect(expr, value)    (expr)
310 #endif
311 
312 #ifndef likely
313 #define	likely(expr)	expect((expr) != 0, 1)
314 #endif
315 
316 #ifndef unlikely
317 #define	unlikely(expr)	expect((expr) != 0, 0)
318 #endif
319 
320 /* Basic types */
321 #if defined(_MSC_VER)
322 /* Visual Studio does not support 'stdint' natively */
323 #define	BYTE	unsigned __int8
324 #define	U16	unsigned __int16
325 #define	U32	unsigned __int32
326 #define	S32	__int32
327 #define	U64	unsigned __int64
328 #else /* !defined(_MSC_VER) */
329 #define	BYTE	uint8_t
330 #define	U16	uint16_t
331 #define	U32	uint32_t
332 #define	S32	int32_t
333 #define	U64	uint64_t
334 #endif /* !defined(_MSC_VER) */
335 
336 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
337 #pragma pack(1)
338 #endif
339 
340 typedef struct _U16_S {
341 	U16 v;
342 } U16_S;
343 typedef struct _U32_S {
344 	U32 v;
345 } U32_S;
346 typedef struct _U64_S {
347 	U64 v;
348 } U64_S;
349 
350 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
351 #pragma pack()
352 #endif
353 
354 #define	A64(x) (((U64_S *)(__DECONST(void *, x)))->v)
355 #define	A32(x) (((U32_S *)(__DECONST(void *, x)))->v)
356 #define	A16(x) (((U16_S *)(__DECONST(void *, x)))->v)
357 
358 /*
359  * Constants
360  */
361 #define	MINMATCH 4
362 
363 #define	HASH_LOG COMPRESSIONLEVEL
364 #define	HASHTABLESIZE (1 << HASH_LOG)
365 #define	HASH_MASK (HASHTABLESIZE - 1)
366 
367 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
368 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
369 
370 /*
371  * Defines if memory is allocated into the stack (local variable),
372  * or into the heap (kmem_alloc()).
373  */
374 #define	HEAPMODE (HASH_LOG > STACKLIMIT)
375 #define	COPYLENGTH 8
376 #define	LASTLITERALS 5
377 #define	MFLIMIT (COPYLENGTH + MINMATCH)
378 #define	MINLENGTH (MFLIMIT + 1)
379 
380 #define	MAXD_LOG 16
381 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
382 
383 #define	ML_BITS 4
384 #define	ML_MASK ((1U<<ML_BITS)-1)
385 #define	RUN_BITS (8-ML_BITS)
386 #define	RUN_MASK ((1U<<RUN_BITS)-1)
387 
388 
389 /*
390  * Architecture-specific macros
391  */
392 #if LZ4_ARCH64
393 #define	STEPSIZE 8
394 #define	UARCH U64
395 #define	AARCH A64
396 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
397 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
398 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
399 #define	HTYPE U32
400 #define	INITBASE(base)		const BYTE* const base = ip
401 #else /* !LZ4_ARCH64 */
402 #define	STEPSIZE 4
403 #define	UARCH U32
404 #define	AARCH A32
405 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
406 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
407 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
408 #define	HTYPE const BYTE *
409 #define	INITBASE(base)		const int base = 0
410 #endif /* !LZ4_ARCH64 */
411 
412 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
413 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
414 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
415 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
416 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
417 #else
418 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
419 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
420 #endif
421 
422 
423 /* Local structures */
424 struct refTables {
425 	HTYPE hashTable[HASHTABLESIZE];
426 };
427 
428 
429 /* Macros */
430 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
431 	HASH_LOG))
432 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
433 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
434 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
435 	d = e; }
436 
437 
438 /* Private functions */
439 #if LZ4_ARCH64
440 
441 static inline int
442 LZ4_NbCommonBytes(register U64 val)
443 {
444 #if defined(LZ4_BIG_ENDIAN)
445 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
446 	unsigned long r = 0;
447 	_BitScanReverse64(&r, val);
448 	return (int)(r >> 3);
449 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
450 	!defined(LZ4_FORCE_SW_BITCOUNT)
451 	return (__builtin_clzll(val) >> 3);
452 #else
453 	int r;
454 	if (!(val >> 32)) {
455 		r = 4;
456 	} else {
457 		r = 0;
458 		val >>= 32;
459 	}
460 	if (!(val >> 16)) {
461 		r += 2;
462 		val >>= 8;
463 	} else {
464 		val >>= 24;
465 	}
466 	r += (!val);
467 	return (r);
468 #endif
469 #else
470 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
471 	unsigned long r = 0;
472 	_BitScanForward64(&r, val);
473 	return (int)(r >> 3);
474 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
475 	!defined(LZ4_FORCE_SW_BITCOUNT)
476 	return (__builtin_ctzll(val) >> 3);
477 #else
478 	static const int DeBruijnBytePos[64] =
479 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
480 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
481 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
482 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
483 	};
484 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
485 	    58];
486 #endif
487 #endif
488 }
489 
490 #else
491 
492 static inline int
493 LZ4_NbCommonBytes(register U32 val)
494 {
495 #if defined(LZ4_BIG_ENDIAN)
496 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
497 	unsigned long r = 0;
498 	_BitScanReverse(&r, val);
499 	return (int)(r >> 3);
500 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
501 	!defined(LZ4_FORCE_SW_BITCOUNT)
502 	return (__builtin_clz(val) >> 3);
503 #else
504 	int r;
505 	if (!(val >> 16)) {
506 		r = 2;
507 		val >>= 8;
508 	} else {
509 		r = 0;
510 		val >>= 24;
511 	}
512 	r += (!val);
513 	return (r);
514 #endif
515 #else
516 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
517 	unsigned long r = 0;
518 	_BitScanForward(&r, val);
519 	return (int)(r >> 3);
520 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
521 	!defined(LZ4_FORCE_SW_BITCOUNT)
522 	return (__builtin_ctz(val) >> 3);
523 #else
524 	static const int DeBruijnBytePos[32] = {
525 		0, 0, 3, 0, 3, 1, 3, 0,
526 		3, 2, 2, 1, 3, 2, 0, 1,
527 		3, 3, 1, 2, 2, 2, 2, 0,
528 		3, 1, 2, 0, 1, 0, 1, 1
529 	};
530 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
531 	    27];
532 #endif
533 #endif
534 }
535 
536 #endif
537 
538 /* Compression functions */
539 
540 /*ARGSUSED*/
541 static int
542 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
543     int osize)
544 {
545 #if HEAPMODE
546 	struct refTables *srt = (struct refTables *)ctx;
547 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
548 #else
549 	HTYPE HashTable[HASHTABLESIZE] = { 0 };
550 #endif
551 
552 	const BYTE *ip = (const BYTE *) source;
553 	INITBASE(base);
554 	const BYTE *anchor = ip;
555 	const BYTE *const iend = ip + isize;
556 	const BYTE *const oend = (BYTE *) dest + osize;
557 	const BYTE *const mflimit = iend - MFLIMIT;
558 #define	matchlimit (iend - LASTLITERALS)
559 
560 	BYTE *op = (BYTE *) dest;
561 
562 	int len, length;
563 	const int skipStrength = SKIPSTRENGTH;
564 	U32 forwardH;
565 
566 
567 	/* Init */
568 	if (isize < MINLENGTH)
569 		goto _last_literals;
570 
571 	/* First Byte */
572 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
573 	ip++;
574 	forwardH = LZ4_HASH_VALUE(ip);
575 
576 	/* Main Loop */
577 	for (;;) {
578 		int findMatchAttempts = (1U << skipStrength) + 3;
579 		const BYTE *forwardIp = ip;
580 		const BYTE *ref;
581 		BYTE *token;
582 
583 		/* Find a match */
584 		do {
585 			U32 h = forwardH;
586 			int step = findMatchAttempts++ >> skipStrength;
587 			ip = forwardIp;
588 			forwardIp = ip + step;
589 
590 			if unlikely(forwardIp > mflimit) {
591 				goto _last_literals;
592 			}
593 
594 			forwardH = LZ4_HASH_VALUE(forwardIp);
595 			ref = base + HashTable[h];
596 			HashTable[h] = ip - base;
597 
598 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
599 
600 		/* Catch up */
601 		while ((ip > anchor) && (ref > (const BYTE *) source) &&
602 		    unlikely(ip[-1] == ref[-1])) {
603 			ip--;
604 			ref--;
605 		}
606 
607 		/* Encode Literal length */
608 		length = ip - anchor;
609 		token = op++;
610 
611 		/* Check output limit */
612 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
613 		    (length >> 8) > oend)
614 			return (0);
615 
616 		if (length >= (int)RUN_MASK) {
617 			*token = (RUN_MASK << ML_BITS);
618 			len = length - RUN_MASK;
619 			for (; len > 254; len -= 255)
620 				*op++ = 255;
621 			*op++ = (BYTE)len;
622 		} else
623 			*token = (length << ML_BITS);
624 
625 		/* Copy Literals */
626 		LZ4_BLINDCOPY(anchor, op, length);
627 
628 		_next_match:
629 		/* Encode Offset */
630 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
631 
632 		/* Start Counting */
633 		ip += MINMATCH;
634 		ref += MINMATCH;	/* MinMatch verified */
635 		anchor = ip;
636 		while likely(ip < matchlimit - (STEPSIZE - 1)) {
637 			UARCH diff = AARCH(ref) ^ AARCH(ip);
638 			if (!diff) {
639 				ip += STEPSIZE;
640 				ref += STEPSIZE;
641 				continue;
642 			}
643 			ip += LZ4_NbCommonBytes(diff);
644 			goto _endCount;
645 		}
646 #if LZ4_ARCH64
647 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
648 			ip += 4;
649 			ref += 4;
650 		}
651 #endif
652 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
653 			ip += 2;
654 			ref += 2;
655 		}
656 		if ((ip < matchlimit) && (*ref == *ip))
657 			ip++;
658 		_endCount:
659 
660 		/* Encode MatchLength */
661 		len = (ip - anchor);
662 		/* Check output limit */
663 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
664 			return (0);
665 		if (len >= (int)ML_MASK) {
666 			*token += ML_MASK;
667 			len -= ML_MASK;
668 			for (; len > 509; len -= 510) {
669 				*op++ = 255;
670 				*op++ = 255;
671 			}
672 			if (len > 254) {
673 				len -= 255;
674 				*op++ = 255;
675 			}
676 			*op++ = (BYTE)len;
677 		} else
678 			*token += len;
679 
680 		/* Test end of chunk */
681 		if (ip > mflimit) {
682 			anchor = ip;
683 			break;
684 		}
685 		/* Fill table */
686 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
687 
688 		/* Test next position */
689 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
690 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
691 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
692 			token = op++;
693 			*token = 0;
694 			goto _next_match;
695 		}
696 		/* Prepare next loop */
697 		anchor = ip++;
698 		forwardH = LZ4_HASH_VALUE(ip);
699 	}
700 
701 	_last_literals:
702 	/* Encode Last Literals */
703 	{
704 		int lastRun = iend - anchor;
705 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
706 		    oend)
707 			return (0);
708 		if (lastRun >= (int)RUN_MASK) {
709 			*op++ = (RUN_MASK << ML_BITS);
710 			lastRun -= RUN_MASK;
711 			for (; lastRun > 254; lastRun -= 255) {
712 				*op++ = 255;
713 			}
714 			*op++ = (BYTE)lastRun;
715 		} else
716 			*op++ = (lastRun << ML_BITS);
717 		(void) memcpy(op, anchor, iend - anchor);
718 		op += iend - anchor;
719 	}
720 
721 	/* End */
722 	return (int)(((char *)op) - dest);
723 }
724 
725 
726 
727 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
728 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
729 #define	HASHLOG64K (HASH_LOG + 1)
730 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
731 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
732 	HASHLOG64K))
733 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
734 
735 /*ARGSUSED*/
736 static int
737 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
738     int osize)
739 {
740 #if HEAPMODE
741 	struct refTables *srt = (struct refTables *)ctx;
742 	U16 *HashTable = (U16 *) (srt->hashTable);
743 #else
744 	U16 HashTable[HASH64KTABLESIZE] = { 0 };
745 #endif
746 
747 	const BYTE *ip = (const BYTE *) source;
748 	const BYTE *anchor = ip;
749 	const BYTE *const base = ip;
750 	const BYTE *const iend = ip + isize;
751 	const BYTE *const oend = (BYTE *) dest + osize;
752 	const BYTE *const mflimit = iend - MFLIMIT;
753 #define	matchlimit (iend - LASTLITERALS)
754 
755 	BYTE *op = (BYTE *) dest;
756 
757 	int len, length;
758 	const int skipStrength = SKIPSTRENGTH;
759 	U32 forwardH;
760 
761 	/* Init */
762 	if (isize < MINLENGTH)
763 		goto _last_literals;
764 
765 	/* First Byte */
766 	ip++;
767 	forwardH = LZ4_HASH64K_VALUE(ip);
768 
769 	/* Main Loop */
770 	for (;;) {
771 		int findMatchAttempts = (1U << skipStrength) + 3;
772 		const BYTE *forwardIp = ip;
773 		const BYTE *ref;
774 		BYTE *token;
775 
776 		/* Find a match */
777 		do {
778 			U32 h = forwardH;
779 			int step = findMatchAttempts++ >> skipStrength;
780 			ip = forwardIp;
781 			forwardIp = ip + step;
782 
783 			if (forwardIp > mflimit) {
784 				goto _last_literals;
785 			}
786 
787 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
788 			ref = base + HashTable[h];
789 			HashTable[h] = ip - base;
790 
791 		} while (A32(ref) != A32(ip));
792 
793 		/* Catch up */
794 		while ((ip > anchor) && (ref > (const BYTE *) source) &&
795 		    (ip[-1] == ref[-1])) {
796 			ip--;
797 			ref--;
798 		}
799 
800 		/* Encode Literal length */
801 		length = ip - anchor;
802 		token = op++;
803 
804 		/* Check output limit */
805 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
806 		    (length >> 8) > oend)
807 			return (0);
808 
809 		if (length >= (int)RUN_MASK) {
810 			*token = (RUN_MASK << ML_BITS);
811 			len = length - RUN_MASK;
812 			for (; len > 254; len -= 255)
813 				*op++ = 255;
814 			*op++ = (BYTE)len;
815 		} else
816 			*token = (length << ML_BITS);
817 
818 		/* Copy Literals */
819 		LZ4_BLINDCOPY(anchor, op, length);
820 
821 		_next_match:
822 		/* Encode Offset */
823 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
824 
825 		/* Start Counting */
826 		ip += MINMATCH;
827 		ref += MINMATCH;	/* MinMatch verified */
828 		anchor = ip;
829 		while (ip < matchlimit - (STEPSIZE - 1)) {
830 			UARCH diff = AARCH(ref) ^ AARCH(ip);
831 			if (!diff) {
832 				ip += STEPSIZE;
833 				ref += STEPSIZE;
834 				continue;
835 			}
836 			ip += LZ4_NbCommonBytes(diff);
837 			goto _endCount;
838 		}
839 #if LZ4_ARCH64
840 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
841 			ip += 4;
842 			ref += 4;
843 		}
844 #endif
845 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
846 			ip += 2;
847 			ref += 2;
848 		}
849 		if ((ip < matchlimit) && (*ref == *ip))
850 			ip++;
851 		_endCount:
852 
853 		/* Encode MatchLength */
854 		len = (ip - anchor);
855 		/* Check output limit */
856 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
857 			return (0);
858 		if (len >= (int)ML_MASK) {
859 			*token += ML_MASK;
860 			len -= ML_MASK;
861 			for (; len > 509; len -= 510) {
862 				*op++ = 255;
863 				*op++ = 255;
864 			}
865 			if (len > 254) {
866 				len -= 255;
867 				*op++ = 255;
868 			}
869 			*op++ = (BYTE)len;
870 		} else
871 			*token += len;
872 
873 		/* Test end of chunk */
874 		if (ip > mflimit) {
875 			anchor = ip;
876 			break;
877 		}
878 		/* Fill table */
879 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
880 
881 		/* Test next position */
882 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
883 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
884 		if (A32(ref) == A32(ip)) {
885 			token = op++;
886 			*token = 0;
887 			goto _next_match;
888 		}
889 		/* Prepare next loop */
890 		anchor = ip++;
891 		forwardH = LZ4_HASH64K_VALUE(ip);
892 	}
893 
894 	_last_literals:
895 	/* Encode Last Literals */
896 	{
897 		int lastRun = iend - anchor;
898 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
899 		    oend)
900 			return (0);
901 		if (lastRun >= (int)RUN_MASK) {
902 			*op++ = (RUN_MASK << ML_BITS);
903 			lastRun -= RUN_MASK;
904 			for (; lastRun > 254; lastRun -= 255)
905 				*op++ = 255;
906 			*op++ = (BYTE)lastRun;
907 		} else
908 			*op++ = (lastRun << ML_BITS);
909 		(void) memcpy(op, anchor, iend - anchor);
910 		op += iend - anchor;
911 	}
912 
913 	/* End */
914 	return (int)(((char *)op) - dest);
915 }
916 
917 static int
918 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
919 {
920 #if HEAPMODE
921 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
922 	void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
923 #else
924 	void *ctx = malloc(sizeof (struct refTables));
925 #endif
926 	int result;
927 
928 	/*
929 	 * out of kernel memory, gently fall through - this will disable
930 	 * compression in zio_compress_data
931 	 */
932 	if (ctx == NULL)
933 		return (0);
934 
935 	if (isize < LZ4_64KLIMIT)
936 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
937 	else
938 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
939 
940 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
941 	kmem_free(ctx, sizeof (struct refTables));
942 #else
943 	free(ctx);
944 #endif
945 	return (result);
946 #else
947 	if (isize < (int)LZ4_64KLIMIT)
948 		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
949 	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
950 #endif
951 }
952 
953 /* Decompression functions */
954 
955 /*
956  * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
957  *	against "buffer overflow" attack type. It will never write nor
958  *	read outside of the provided output buffers.
959  *	LZ4_uncompress_unknownOutputSize() also insures that
960  *	it will never read outside of the input buffer. A corrupted input
961  *	will produce an error result, a negative int, indicating the position
962  *	of the error within input stream.
963  */
964 
965 static int
966 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
967     int maxOutputSize)
968 {
969 	/* Local Variables */
970 	const BYTE *restrict ip = (const BYTE *) source;
971 	const BYTE *const iend = ip + isize;
972 	const BYTE *ref;
973 
974 	BYTE *op = (BYTE *) dest;
975 	BYTE *const oend = op + maxOutputSize;
976 	BYTE *cpy;
977 
978 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
979 #if LZ4_ARCH64
980 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
981 #endif
982 
983 	/* Main Loop */
984 	while (ip < iend) {
985 		unsigned token;
986 		size_t length;
987 
988 		/* get runlength */
989 		token = *ip++;
990 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
991 			int s = 255;
992 			while ((ip < iend) && (s == 255)) {
993 				s = *ip++;
994 				length += s;
995 			}
996 		}
997 		/* copy literals */
998 		cpy = op + length;
999 		/* CORNER-CASE: cpy might overflow. */
1000 		if (cpy < op)
1001 			goto _output_error;	/* cpy was overflowed, bail! */
1002 		if ((cpy > oend - COPYLENGTH) ||
1003 		    (ip + length > iend - COPYLENGTH)) {
1004 			if (cpy > oend)
1005 				/* Error: writes beyond output buffer */
1006 				goto _output_error;
1007 			if (ip + length != iend)
1008 				/*
1009 				 * Error: LZ4 format requires to consume all
1010 				 * input at this stage
1011 				 */
1012 				goto _output_error;
1013 			(void) memcpy(op, ip, length);
1014 			op += length;
1015 			/* Necessarily EOF, due to parsing restrictions */
1016 			break;
1017 		}
1018 		LZ4_WILDCOPY(ip, op, cpy);
1019 		ip -= (op - cpy);
1020 		op = cpy;
1021 
1022 		/* get offset */
1023 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1024 		ip += 2;
1025 		if (ref < (BYTE * const) dest)
1026 			/*
1027 			 * Error: offset creates reference outside of
1028 			 * destination buffer
1029 			 */
1030 			goto _output_error;
1031 
1032 		/* get matchlength */
1033 		if ((length = (token & ML_MASK)) == ML_MASK) {
1034 			while (ip < iend) {
1035 				int s = *ip++;
1036 				length += s;
1037 				if (s == 255)
1038 					continue;
1039 				break;
1040 			}
1041 		}
1042 		/* copy repeated sequence */
1043 		if unlikely(op - ref < STEPSIZE) {
1044 #if LZ4_ARCH64
1045 			size_t dec64 = dec64table[op-ref];
1046 #else
1047 			const int dec64 = 0;
1048 #endif
1049 			op[0] = ref[0];
1050 			op[1] = ref[1];
1051 			op[2] = ref[2];
1052 			op[3] = ref[3];
1053 			op += 4;
1054 			ref += 4;
1055 			ref -= dec32table[op-ref];
1056 			A32(op) = A32(ref);
1057 			op += STEPSIZE - 4;
1058 			ref -= dec64;
1059 		} else {
1060 			LZ4_COPYSTEP(ref, op);
1061 		}
1062 		cpy = op + length - (STEPSIZE - 4);
1063 		if (cpy > oend - COPYLENGTH) {
1064 			if (cpy > oend)
1065 				/*
1066 				 * Error: request to write outside of
1067 				 * destination buffer
1068 				 */
1069 				goto _output_error;
1070 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1071 			while (op < cpy)
1072 				*op++ = *ref++;
1073 			op = cpy;
1074 			if (op == oend)
1075 				/*
1076 				 * Check EOF (should never happen, since
1077 				 * last 5 bytes are supposed to be literals)
1078 				 */
1079 				goto _output_error;
1080 			continue;
1081 		}
1082 		LZ4_SECURECOPY(ref, op, cpy);
1083 		op = cpy;	/* correction */
1084 	}
1085 
1086 	/* end of decoding */
1087 	return (int)(((char *)op) - dest);
1088 
1089 	/* write overflow error detected */
1090 	_output_error:
1091 	return (int)(-(((const char *)ip) - source));
1092 }
1093