xref: /illumos-gate/usr/src/common/lz4/lz4.c (revision f73e1ebf60792a8bdb2d559097c3131b68c09318)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 /*
35  * Copyright (c) 2016 by Delphix. All rights reserved.
36  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
37  */
38 
39 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
40 #include <sys/zfs_context.h>
41 #elif defined(_STANDALONE)
42 #include <sys/cdefs.h>
43 #include <stand.h>
44 #include <sys/types.h>
45 #include <sys/endian.h>
46 #include <assert.h>
47 
48 #define	ASSERT	assert
49 #else
50 #include <string.h>
51 #include <stdlib.h>
52 #include <sys/types.h>
53 #include <sys/sysmacros.h>
54 #include <netinet/in.h>
55 #include <assert.h>
56 
57 #define	ASSERT	assert
58 #endif
59 #include <lz4.h>
60 
61 static int real_LZ4_compress(const char *source, char *dest, int isize,
62     int osize);
63 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
64     int isize, int maxOutputSize);
65 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
66     int isize, int osize);
67 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
68     int isize, int osize);
69 
70 size_t
71 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
72     int n __unused)
73 {
74 	uint32_t bufsiz;
75 	char *dest = d_start;
76 
77 	ASSERT(d_len >= sizeof (bufsiz));
78 
79 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
80 	    d_len - sizeof (bufsiz));
81 
82 	/* Signal an error if the compression routine returned zero. */
83 	if (bufsiz == 0)
84 		return (s_len);
85 
86 	/*
87 	 * Encode the compresed buffer size at the start. We'll need this in
88 	 * decompression to counter the effects of padding which might be
89 	 * added to the compressed buffer and which, if unhandled, would
90 	 * confuse the hell out of our decompression function.
91 	 */
92 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
93 	*(uint32_t *)(void *)dest = BE_32(bufsiz);
94 #else
95 	*(uint32_t *)(void *)dest = htonl(bufsiz);
96 #endif
97 
98 	return (bufsiz + sizeof (bufsiz));
99 }
100 
101 int
102 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len,
103     int n __unused)
104 {
105 	const char *src = s_start;
106 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
107 	uint32_t bufsiz = BE_IN32(s_start);
108 #else
109 	uint32_t bufsiz = htonl(*(uint32_t *)s_start);
110 #endif
111 
112 	/* invalid compressed buffer size encoded at start */
113 	if (bufsiz + sizeof (bufsiz) > s_len)
114 		return (1);
115 
116 	/*
117 	 * Returns 0 on success (decompression function returned non-negative)
118 	 * and non-zero on failure (decompression function returned negative).
119 	 */
120 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
121 	    d_start, bufsiz, d_len) < 0);
122 }
123 
124 /*
125  * LZ4 API Description:
126  *
127  * Simple Functions:
128  * real_LZ4_compress() :
129  *	isize  : is the input size. Max supported value is ~1.9GB
130  *	return : the number of bytes written in buffer dest
131  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
132  *	note : destination buffer must be already allocated.
133  *		destination buffer must be sized to handle worst cases
134  *		situations (input data not compressible).
135  *
136  * Advanced Functions
137  *
138  * LZ4_compressBound() :
139  *	Provides the maximum size that LZ4 may output in a "worst case"
140  *	scenario (input data not compressible) primarily useful for memory
141  *	allocation of output buffer.
142  *
143  *	isize  : is the input size. Max supported value is ~1.9GB
144  *	return : maximum output size in a "worst case" scenario
145  *	note : this function is limited by "int" range (2^31-1)
146  *
147  * LZ4_uncompress_unknownOutputSize() :
148  *	isize  : is the input size, therefore the compressed size
149  *	maxOutputSize : is the size of the destination buffer (which must be
150  *		already allocated)
151  *	return : the number of bytes decoded in the destination buffer
152  *		(necessarily <= maxOutputSize). If the source stream is
153  *		malformed, the function will stop decoding and return a
154  *		negative result, indicating the byte position of the faulty
155  *		instruction. This function never writes beyond dest +
156  *		maxOutputSize, and is therefore protected against malicious
157  *		data packets.
158  *	note   : Destination buffer must be already allocated.
159  *
160  * LZ4_compressCtx() :
161  *	This function explicitly handles the CTX memory structure.
162  *
163  *	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
164  *	by the caller (either on the stack or using kmem_zalloc). Passing NULL
165  *	isn't valid.
166  *
167  * LZ4_compress64kCtx() :
168  *	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
169  *	isize *Must* be <64KB, otherwise the output will be corrupted.
170  *
171  *	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
172  *	by the caller (either on the stack or using kmem_zalloc). Passing NULL
173  *	isn't valid.
174  */
175 
176 /*
177  * Tuning parameters
178  */
179 
180 /*
181  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
182  *	 Lowering this value reduces memory usage. Reduced memory usage
183  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
184  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
185  *	(examples : 12 -> 16KB ; 17 -> 512KB)
186  */
187 #define	COMPRESSIONLEVEL 12
188 
189 /*
190  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
191  *	algorithm skip faster data segments considered "incompressible".
192  *	This may decrease compression ratio dramatically, but will be
193  *	faster on incompressible data. Increasing this value will make
194  *	the algorithm search more before declaring a segment "incompressible".
195  *	This could improve compression a bit, but will be slower on
196  *	incompressible data. The default value (6) is recommended.
197  */
198 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
199 
200 /*
201  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
202  * performance for big endian cpu, but the resulting compressed stream
203  * will be incompatible with little-endian CPU. You can set this option
204  * to 1 in situations where data will stay within closed environment.
205  * This option is useless on Little_Endian CPU (such as x86).
206  */
207 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
208 
209 /*
210  * CPU Feature Detection
211  */
212 
213 /* 32 or 64 bits ? */
214 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
215     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
216     defined(__LP64__) || defined(_LP64))
217 #define	LZ4_ARCH64 1
218 #else
219 #define	LZ4_ARCH64 0
220 #endif
221 
222 /*
223  * Limits the amount of stack space that the algorithm may consume to hold
224  * the compression lookup table. The value `9' here means we'll never use
225  * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
226  * If more memory is needed, it is allocated from the heap.
227  */
228 #define	STACKLIMIT 9
229 
230 /*
231  * Little Endian or Big Endian?
232  * Note: overwrite the below #define if you know your architecture endianess.
233  */
234 #if defined(BYTE_ORDER)
235 #if BYTE_ORDER == BIG_ENDIAN	/* This is sys/endian.h API */
236 #define	LZ4_BIG_ENDIAN 1
237 #else
238 /*
239  * Little Endian assumed. PDP Endian and other very rare endian format
240  * are unsupported.
241  */
242 #endif
243 #else /* !defined(BYTE_ORDER) */
244 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
245 	defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
246 	defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
247 	defined(__powerpc) || defined(powerpc) || \
248 	((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
249 #define	LZ4_BIG_ENDIAN 1
250 #else
251 /*
252  * Little Endian assumed. PDP Endian and other very rare endian format
253  * are unsupported.
254  */
255 #endif
256 #endif /* defined(BYTE_ORDER) */
257 
258 /*
259  * Unaligned memory access is automatically enabled for "common" CPU,
260  * such as x86. For others CPU, the compiler will be more cautious, and
261  * insert extra code to ensure aligned access is respected. If you know
262  * your target CPU supports unaligned memory access, you may want to
263  * force this option manually to improve performance
264  */
265 #if defined(__ARM_FEATURE_UNALIGNED)
266 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
267 #endif
268 
269 #ifdef __sparc
270 #define	LZ4_FORCE_SW_BITCOUNT
271 #endif
272 
273 /*
274  * Compiler Options
275  */
276 #if __STDC_VERSION__ >= 199901L	/* C99 */
277 /* "restrict" is a known keyword */
278 #else
279 /* Disable restrict */
280 #define	restrict
281 #endif
282 
283 #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
284 
285 #ifdef _MSC_VER
286 /* Visual Studio */
287 /* Visual is not C99, but supports some kind of inline */
288 #define	inline __forceinline
289 #if LZ4_ARCH64
290 /* For Visual 2005 */
291 #pragma intrinsic(_BitScanForward64)
292 #pragma intrinsic(_BitScanReverse64)
293 #else /* !LZ4_ARCH64 */
294 /* For Visual 2005 */
295 #pragma intrinsic(_BitScanForward)
296 #pragma intrinsic(_BitScanReverse)
297 #endif /* !LZ4_ARCH64 */
298 #endif /* _MSC_VER */
299 
300 #ifdef _MSC_VER
301 #define	lz4_bswap16(x) _byteswap_ushort(x)
302 #else /* !_MSC_VER */
303 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
304 	(((x) & 0xffu) << 8)))
305 #endif /* !_MSC_VER */
306 
307 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
308 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
309 #else
310 #define	expect(expr, value)    (expr)
311 #endif
312 
313 #ifndef likely
314 #define	likely(expr)	expect((expr) != 0, 1)
315 #endif
316 
317 #ifndef unlikely
318 #define	unlikely(expr)	expect((expr) != 0, 0)
319 #endif
320 
321 /* Basic types */
322 #if defined(_MSC_VER)
323 /* Visual Studio does not support 'stdint' natively */
324 #define	BYTE	unsigned __int8
325 #define	U16	unsigned __int16
326 #define	U32	unsigned __int32
327 #define	S32	__int32
328 #define	U64	unsigned __int64
329 #else /* !defined(_MSC_VER) */
330 #define	BYTE	uint8_t
331 #define	U16	uint16_t
332 #define	U32	uint32_t
333 #define	S32	int32_t
334 #define	U64	uint64_t
335 #endif /* !defined(_MSC_VER) */
336 
337 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
338 #pragma pack(1)
339 #endif
340 
341 typedef struct _U16_S {
342 	U16 v;
343 } U16_S;
344 typedef struct _U32_S {
345 	U32 v;
346 } U32_S;
347 typedef struct _U64_S {
348 	U64 v;
349 } U64_S;
350 
351 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
352 #pragma pack()
353 #endif
354 
355 #define	A64(x) (((U64_S *)(__DECONST(void *, x)))->v)
356 #define	A32(x) (((U32_S *)(__DECONST(void *, x)))->v)
357 #define	A16(x) (((U16_S *)(__DECONST(void *, x)))->v)
358 
359 /*
360  * Constants
361  */
362 #define	MINMATCH 4
363 
364 #define	HASH_LOG COMPRESSIONLEVEL
365 #define	HASHTABLESIZE (1 << HASH_LOG)
366 #define	HASH_MASK (HASHTABLESIZE - 1)
367 
368 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
369 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
370 
371 /*
372  * Defines if memory is allocated into the stack (local variable),
373  * or into the heap (kmem_alloc()).
374  */
375 #define	HEAPMODE (HASH_LOG > STACKLIMIT)
376 #define	COPYLENGTH 8
377 #define	LASTLITERALS 5
378 #define	MFLIMIT (COPYLENGTH + MINMATCH)
379 #define	MINLENGTH (MFLIMIT + 1)
380 
381 #define	MAXD_LOG 16
382 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
383 
384 #define	ML_BITS 4
385 #define	ML_MASK ((1U<<ML_BITS)-1)
386 #define	RUN_BITS (8-ML_BITS)
387 #define	RUN_MASK ((1U<<RUN_BITS)-1)
388 
389 
390 /*
391  * Architecture-specific macros
392  */
393 #if LZ4_ARCH64
394 #define	STEPSIZE 8
395 #define	UARCH U64
396 #define	AARCH A64
397 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
398 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
399 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
400 #define	HTYPE U32
401 #define	INITBASE(base)		const BYTE* const base = ip
402 #else /* !LZ4_ARCH64 */
403 #define	STEPSIZE 4
404 #define	UARCH U32
405 #define	AARCH A32
406 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
407 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
408 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
409 #define	HTYPE const BYTE *
410 #define	INITBASE(base)		const int base = 0
411 #endif /* !LZ4_ARCH64 */
412 
413 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
414 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
415 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
416 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
417 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
418 #else
419 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
420 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
421 #endif
422 
423 
424 /* Local structures */
425 struct refTables {
426 	HTYPE hashTable[HASHTABLESIZE];
427 };
428 
429 
430 /* Macros */
431 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
432 	HASH_LOG))
433 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
434 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
435 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
436 	d = e; }
437 
438 
439 /* Private functions */
440 #if LZ4_ARCH64
441 
442 static inline int
443 LZ4_NbCommonBytes(register U64 val)
444 {
445 #if defined(LZ4_BIG_ENDIAN)
446 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
447 	unsigned long r = 0;
448 	_BitScanReverse64(&r, val);
449 	return (int)(r >> 3);
450 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
451 	!defined(LZ4_FORCE_SW_BITCOUNT)
452 	return (__builtin_clzll(val) >> 3);
453 #else
454 	int r;
455 	if (!(val >> 32)) {
456 		r = 4;
457 	} else {
458 		r = 0;
459 		val >>= 32;
460 	}
461 	if (!(val >> 16)) {
462 		r += 2;
463 		val >>= 8;
464 	} else {
465 		val >>= 24;
466 	}
467 	r += (!val);
468 	return (r);
469 #endif
470 #else
471 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
472 	unsigned long r = 0;
473 	_BitScanForward64(&r, val);
474 	return (int)(r >> 3);
475 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
476 	!defined(LZ4_FORCE_SW_BITCOUNT)
477 	return (__builtin_ctzll(val) >> 3);
478 #else
479 	static const int DeBruijnBytePos[64] =
480 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
481 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
482 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
483 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
484 	};
485 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
486 	    58];
487 #endif
488 #endif
489 }
490 
491 #else
492 
493 static inline int
494 LZ4_NbCommonBytes(register U32 val)
495 {
496 #if defined(LZ4_BIG_ENDIAN)
497 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
498 	unsigned long r = 0;
499 	_BitScanReverse(&r, val);
500 	return (int)(r >> 3);
501 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
502 	!defined(LZ4_FORCE_SW_BITCOUNT)
503 	return (__builtin_clz(val) >> 3);
504 #else
505 	int r;
506 	if (!(val >> 16)) {
507 		r = 2;
508 		val >>= 8;
509 	} else {
510 		r = 0;
511 		val >>= 24;
512 	}
513 	r += (!val);
514 	return (r);
515 #endif
516 #else
517 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
518 	unsigned long r = 0;
519 	_BitScanForward(&r, val);
520 	return (int)(r >> 3);
521 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
522 	!defined(LZ4_FORCE_SW_BITCOUNT)
523 	return (__builtin_ctz(val) >> 3);
524 #else
525 	static const int DeBruijnBytePos[32] = {
526 		0, 0, 3, 0, 3, 1, 3, 0,
527 		3, 2, 2, 1, 3, 2, 0, 1,
528 		3, 3, 1, 2, 2, 2, 2, 0,
529 		3, 1, 2, 0, 1, 0, 1, 1
530 	};
531 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
532 	    27];
533 #endif
534 #endif
535 }
536 
537 #endif
538 
539 /* Compression functions */
540 
541 /*ARGSUSED*/
542 static int
543 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
544     int osize)
545 {
546 #if HEAPMODE
547 	struct refTables *srt = (struct refTables *)ctx;
548 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
549 #else
550 	HTYPE HashTable[HASHTABLESIZE] = { 0 };
551 #endif
552 
553 	const BYTE *ip = (const BYTE *) source;
554 	INITBASE(base);
555 	const BYTE *anchor = ip;
556 	const BYTE *const iend = ip + isize;
557 	const BYTE *const oend = (BYTE *) dest + osize;
558 	const BYTE *const mflimit = iend - MFLIMIT;
559 #define	matchlimit (iend - LASTLITERALS)
560 
561 	BYTE *op = (BYTE *) dest;
562 
563 	int len, length;
564 	const int skipStrength = SKIPSTRENGTH;
565 	U32 forwardH;
566 
567 
568 	/* Init */
569 	if (isize < MINLENGTH)
570 		goto _last_literals;
571 
572 	/* First Byte */
573 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
574 	ip++;
575 	forwardH = LZ4_HASH_VALUE(ip);
576 
577 	/* Main Loop */
578 	for (;;) {
579 		int findMatchAttempts = (1U << skipStrength) + 3;
580 		const BYTE *forwardIp = ip;
581 		const BYTE *ref;
582 		BYTE *token;
583 
584 		/* Find a match */
585 		do {
586 			U32 h = forwardH;
587 			int step = findMatchAttempts++ >> skipStrength;
588 			ip = forwardIp;
589 			forwardIp = ip + step;
590 
591 			if unlikely(forwardIp > mflimit) {
592 				goto _last_literals;
593 			}
594 
595 			forwardH = LZ4_HASH_VALUE(forwardIp);
596 			ref = base + HashTable[h];
597 			HashTable[h] = ip - base;
598 
599 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
600 
601 		/* Catch up */
602 		while ((ip > anchor) && (ref > (const BYTE *) source) &&
603 		    unlikely(ip[-1] == ref[-1])) {
604 			ip--;
605 			ref--;
606 		}
607 
608 		/* Encode Literal length */
609 		length = ip - anchor;
610 		token = op++;
611 
612 		/* Check output limit */
613 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
614 		    (length >> 8) > oend)
615 			return (0);
616 
617 		if (length >= (int)RUN_MASK) {
618 			*token = (RUN_MASK << ML_BITS);
619 			len = length - RUN_MASK;
620 			for (; len > 254; len -= 255)
621 				*op++ = 255;
622 			*op++ = (BYTE)len;
623 		} else
624 			*token = (length << ML_BITS);
625 
626 		/* Copy Literals */
627 		LZ4_BLINDCOPY(anchor, op, length);
628 
629 		_next_match:
630 		/* Encode Offset */
631 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
632 
633 		/* Start Counting */
634 		ip += MINMATCH;
635 		ref += MINMATCH;	/* MinMatch verified */
636 		anchor = ip;
637 		while likely(ip < matchlimit - (STEPSIZE - 1)) {
638 			UARCH diff = AARCH(ref) ^ AARCH(ip);
639 			if (!diff) {
640 				ip += STEPSIZE;
641 				ref += STEPSIZE;
642 				continue;
643 			}
644 			ip += LZ4_NbCommonBytes(diff);
645 			goto _endCount;
646 		}
647 #if LZ4_ARCH64
648 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
649 			ip += 4;
650 			ref += 4;
651 		}
652 #endif
653 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
654 			ip += 2;
655 			ref += 2;
656 		}
657 		if ((ip < matchlimit) && (*ref == *ip))
658 			ip++;
659 		_endCount:
660 
661 		/* Encode MatchLength */
662 		len = (ip - anchor);
663 		/* Check output limit */
664 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
665 			return (0);
666 		if (len >= (int)ML_MASK) {
667 			*token += ML_MASK;
668 			len -= ML_MASK;
669 			for (; len > 509; len -= 510) {
670 				*op++ = 255;
671 				*op++ = 255;
672 			}
673 			if (len > 254) {
674 				len -= 255;
675 				*op++ = 255;
676 			}
677 			*op++ = (BYTE)len;
678 		} else
679 			*token += len;
680 
681 		/* Test end of chunk */
682 		if (ip > mflimit) {
683 			anchor = ip;
684 			break;
685 		}
686 		/* Fill table */
687 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
688 
689 		/* Test next position */
690 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
691 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
692 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
693 			token = op++;
694 			*token = 0;
695 			goto _next_match;
696 		}
697 		/* Prepare next loop */
698 		anchor = ip++;
699 		forwardH = LZ4_HASH_VALUE(ip);
700 	}
701 
702 	_last_literals:
703 	/* Encode Last Literals */
704 	{
705 		int lastRun = iend - anchor;
706 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
707 		    oend)
708 			return (0);
709 		if (lastRun >= (int)RUN_MASK) {
710 			*op++ = (RUN_MASK << ML_BITS);
711 			lastRun -= RUN_MASK;
712 			for (; lastRun > 254; lastRun -= 255) {
713 				*op++ = 255;
714 			}
715 			*op++ = (BYTE)lastRun;
716 		} else
717 			*op++ = (lastRun << ML_BITS);
718 		(void) memcpy(op, anchor, iend - anchor);
719 		op += iend - anchor;
720 	}
721 
722 	/* End */
723 	return (int)(((char *)op) - dest);
724 }
725 
726 
727 
728 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
729 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
730 #define	HASHLOG64K (HASH_LOG + 1)
731 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
732 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
733 	HASHLOG64K))
734 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
735 
736 /*ARGSUSED*/
737 static int
738 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
739     int osize)
740 {
741 #if HEAPMODE
742 	struct refTables *srt = (struct refTables *)ctx;
743 	U16 *HashTable = (U16 *) (srt->hashTable);
744 #else
745 	U16 HashTable[HASH64KTABLESIZE] = { 0 };
746 #endif
747 
748 	const BYTE *ip = (const BYTE *) source;
749 	const BYTE *anchor = ip;
750 	const BYTE *const base = ip;
751 	const BYTE *const iend = ip + isize;
752 	const BYTE *const oend = (BYTE *) dest + osize;
753 	const BYTE *const mflimit = iend - MFLIMIT;
754 #define	matchlimit (iend - LASTLITERALS)
755 
756 	BYTE *op = (BYTE *) dest;
757 
758 	int len, length;
759 	const int skipStrength = SKIPSTRENGTH;
760 	U32 forwardH;
761 
762 	/* Init */
763 	if (isize < MINLENGTH)
764 		goto _last_literals;
765 
766 	/* First Byte */
767 	ip++;
768 	forwardH = LZ4_HASH64K_VALUE(ip);
769 
770 	/* Main Loop */
771 	for (;;) {
772 		int findMatchAttempts = (1U << skipStrength) + 3;
773 		const BYTE *forwardIp = ip;
774 		const BYTE *ref;
775 		BYTE *token;
776 
777 		/* Find a match */
778 		do {
779 			U32 h = forwardH;
780 			int step = findMatchAttempts++ >> skipStrength;
781 			ip = forwardIp;
782 			forwardIp = ip + step;
783 
784 			if (forwardIp > mflimit) {
785 				goto _last_literals;
786 			}
787 
788 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
789 			ref = base + HashTable[h];
790 			HashTable[h] = ip - base;
791 
792 		} while (A32(ref) != A32(ip));
793 
794 		/* Catch up */
795 		while ((ip > anchor) && (ref > (const BYTE *) source) &&
796 		    (ip[-1] == ref[-1])) {
797 			ip--;
798 			ref--;
799 		}
800 
801 		/* Encode Literal length */
802 		length = ip - anchor;
803 		token = op++;
804 
805 		/* Check output limit */
806 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
807 		    (length >> 8) > oend)
808 			return (0);
809 
810 		if (length >= (int)RUN_MASK) {
811 			*token = (RUN_MASK << ML_BITS);
812 			len = length - RUN_MASK;
813 			for (; len > 254; len -= 255)
814 				*op++ = 255;
815 			*op++ = (BYTE)len;
816 		} else
817 			*token = (length << ML_BITS);
818 
819 		/* Copy Literals */
820 		LZ4_BLINDCOPY(anchor, op, length);
821 
822 		_next_match:
823 		/* Encode Offset */
824 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
825 
826 		/* Start Counting */
827 		ip += MINMATCH;
828 		ref += MINMATCH;	/* MinMatch verified */
829 		anchor = ip;
830 		while (ip < matchlimit - (STEPSIZE - 1)) {
831 			UARCH diff = AARCH(ref) ^ AARCH(ip);
832 			if (!diff) {
833 				ip += STEPSIZE;
834 				ref += STEPSIZE;
835 				continue;
836 			}
837 			ip += LZ4_NbCommonBytes(diff);
838 			goto _endCount;
839 		}
840 #if LZ4_ARCH64
841 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
842 			ip += 4;
843 			ref += 4;
844 		}
845 #endif
846 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
847 			ip += 2;
848 			ref += 2;
849 		}
850 		if ((ip < matchlimit) && (*ref == *ip))
851 			ip++;
852 		_endCount:
853 
854 		/* Encode MatchLength */
855 		len = (ip - anchor);
856 		/* Check output limit */
857 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
858 			return (0);
859 		if (len >= (int)ML_MASK) {
860 			*token += ML_MASK;
861 			len -= ML_MASK;
862 			for (; len > 509; len -= 510) {
863 				*op++ = 255;
864 				*op++ = 255;
865 			}
866 			if (len > 254) {
867 				len -= 255;
868 				*op++ = 255;
869 			}
870 			*op++ = (BYTE)len;
871 		} else
872 			*token += len;
873 
874 		/* Test end of chunk */
875 		if (ip > mflimit) {
876 			anchor = ip;
877 			break;
878 		}
879 		/* Fill table */
880 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
881 
882 		/* Test next position */
883 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
884 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
885 		if (A32(ref) == A32(ip)) {
886 			token = op++;
887 			*token = 0;
888 			goto _next_match;
889 		}
890 		/* Prepare next loop */
891 		anchor = ip++;
892 		forwardH = LZ4_HASH64K_VALUE(ip);
893 	}
894 
895 	_last_literals:
896 	/* Encode Last Literals */
897 	{
898 		int lastRun = iend - anchor;
899 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
900 		    oend)
901 			return (0);
902 		if (lastRun >= (int)RUN_MASK) {
903 			*op++ = (RUN_MASK << ML_BITS);
904 			lastRun -= RUN_MASK;
905 			for (; lastRun > 254; lastRun -= 255)
906 				*op++ = 255;
907 			*op++ = (BYTE)lastRun;
908 		} else
909 			*op++ = (lastRun << ML_BITS);
910 		(void) memcpy(op, anchor, iend - anchor);
911 		op += iend - anchor;
912 	}
913 
914 	/* End */
915 	return (int)(((char *)op) - dest);
916 }
917 
918 static int
919 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
920 {
921 #if HEAPMODE
922 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
923 	void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
924 #else
925 	void *ctx = calloc(1, sizeof (struct refTables));
926 #endif
927 	int result;
928 
929 	/*
930 	 * out of kernel memory, gently fall through - this will disable
931 	 * compression in zio_compress_data
932 	 */
933 	if (ctx == NULL)
934 		return (0);
935 
936 	if (isize < LZ4_64KLIMIT)
937 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
938 	else
939 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
940 
941 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
942 	kmem_free(ctx, sizeof (struct refTables));
943 #else
944 	free(ctx);
945 #endif
946 	return (result);
947 #else
948 	if (isize < (int)LZ4_64KLIMIT)
949 		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
950 	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
951 #endif
952 }
953 
954 /* Decompression functions */
955 
956 /*
957  * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
958  *	against "buffer overflow" attack type. It will never write nor
959  *	read outside of the provided output buffers.
960  *	LZ4_uncompress_unknownOutputSize() also insures that
961  *	it will never read outside of the input buffer. A corrupted input
962  *	will produce an error result, a negative int, indicating the position
963  *	of the error within input stream.
964  */
965 
966 static int
967 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
968     int maxOutputSize)
969 {
970 	/* Local Variables */
971 	const BYTE *restrict ip = (const BYTE *) source;
972 	const BYTE *const iend = ip + isize;
973 	const BYTE *ref;
974 
975 	BYTE *op = (BYTE *) dest;
976 	BYTE *const oend = op + maxOutputSize;
977 	BYTE *cpy;
978 
979 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
980 #if LZ4_ARCH64
981 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
982 #endif
983 
984 	/* Main Loop */
985 	while (ip < iend) {
986 		unsigned token;
987 		size_t length;
988 
989 		/* get runlength */
990 		token = *ip++;
991 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
992 			int s = 255;
993 			while ((ip < iend) && (s == 255)) {
994 				s = *ip++;
995 				length += s;
996 			}
997 		}
998 		/* copy literals */
999 		cpy = op + length;
1000 		/* CORNER-CASE: cpy might overflow. */
1001 		if (cpy < op)
1002 			goto _output_error;	/* cpy was overflowed, bail! */
1003 		if ((cpy > oend - COPYLENGTH) ||
1004 		    (ip + length > iend - COPYLENGTH)) {
1005 			if (cpy > oend)
1006 				/* Error: writes beyond output buffer */
1007 				goto _output_error;
1008 			if (ip + length != iend)
1009 				/*
1010 				 * Error: LZ4 format requires to consume all
1011 				 * input at this stage
1012 				 */
1013 				goto _output_error;
1014 			(void) memcpy(op, ip, length);
1015 			op += length;
1016 			/* Necessarily EOF, due to parsing restrictions */
1017 			break;
1018 		}
1019 		LZ4_WILDCOPY(ip, op, cpy);
1020 		ip -= (op - cpy);
1021 		op = cpy;
1022 
1023 		/* get offset */
1024 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1025 		ip += 2;
1026 		if (ref < (BYTE * const) dest)
1027 			/*
1028 			 * Error: offset creates reference outside of
1029 			 * destination buffer
1030 			 */
1031 			goto _output_error;
1032 
1033 		/* get matchlength */
1034 		if ((length = (token & ML_MASK)) == ML_MASK) {
1035 			while (ip < iend) {
1036 				int s = *ip++;
1037 				length += s;
1038 				if (s == 255)
1039 					continue;
1040 				break;
1041 			}
1042 		}
1043 		/* copy repeated sequence */
1044 		if unlikely(op - ref < STEPSIZE) {
1045 #if LZ4_ARCH64
1046 			size_t dec64 = dec64table[op-ref];
1047 #else
1048 			const int dec64 = 0;
1049 #endif
1050 			op[0] = ref[0];
1051 			op[1] = ref[1];
1052 			op[2] = ref[2];
1053 			op[3] = ref[3];
1054 			op += 4;
1055 			ref += 4;
1056 			ref -= dec32table[op-ref];
1057 			A32(op) = A32(ref);
1058 			op += STEPSIZE - 4;
1059 			ref -= dec64;
1060 		} else {
1061 			LZ4_COPYSTEP(ref, op);
1062 		}
1063 		cpy = op + length - (STEPSIZE - 4);
1064 		if (cpy > oend - COPYLENGTH) {
1065 			if (cpy > oend)
1066 				/*
1067 				 * Error: request to write outside of
1068 				 * destination buffer
1069 				 */
1070 				goto _output_error;
1071 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1072 			while (op < cpy)
1073 				*op++ = *ref++;
1074 			op = cpy;
1075 			if (op == oend)
1076 				/*
1077 				 * Check EOF (should never happen, since
1078 				 * last 5 bytes are supposed to be literals)
1079 				 */
1080 				goto _output_error;
1081 			continue;
1082 		}
1083 		LZ4_SECURECOPY(ref, op, cpy);
1084 		op = cpy;	/* correction */
1085 	}
1086 
1087 	/* end of decoding */
1088 	return (int)(((char *)op) - dest);
1089 
1090 	/* write overflow error detected */
1091 	_output_error:
1092 	return (int)(-(((const char *)ip) - source));
1093 }
1094