xref: /freebsd/sys/cddl/contrib/opensolaris/common/lz4/lz4.c (revision f157ca4696f5922275d5d451736005b9332eb136)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 /*
35  * Copyright (c) 2016 by Delphix. All rights reserved.
36  */
37 
38 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
39 #include <sys/zfs_context.h>
40 #elif defined(_STANDALONE)
41 #include <sys/cdefs.h>
42 #include <stand.h>
43 #include <sys/types.h>
44 #include <sys/endian.h>
45 #include <assert.h>
46 
47 #define	ASSERT	assert
48 #else
49 #include <string.h>
50 #include <stdlib.h>
51 #include <sys/types.h>
52 #include <netinet/in.h>
53 #include <assert.h>
54 
55 #define	ASSERT	assert
56 #endif
57 #include <lz4.h>
58 
59 static int real_LZ4_compress(const char *source, char *dest, int isize,
60     int osize);
61 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
62     int isize, int maxOutputSize);
63 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
64     int isize, int osize);
65 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
66     int isize, int osize);
67 
68 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
69 static kmem_cache_t *lz4_ctx_cache;
70 #endif
71 
72 size_t
73 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
74     int n __unused)
75 {
76 	uint32_t bufsiz;
77 	char *dest = d_start;
78 
79 	ASSERT(d_len >= sizeof (bufsiz));
80 
81 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
82 	    d_len - sizeof (bufsiz));
83 
84 	/* Signal an error if the compression routine returned zero. */
85 	if (bufsiz == 0)
86 		return (s_len);
87 
88 	/*
89 	 * Encode the compresed buffer size at the start. We'll need this in
90 	 * decompression to counter the effects of padding which might be
91 	 * added to the compressed buffer and which, if unhandled, would
92 	 * confuse the hell out of our decompression function.
93 	 */
94 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
95 	*(uint32_t *)(void *)dest = BE_32(bufsiz);
96 #else
97 	*(uint32_t *)(void *)dest = htonl(bufsiz);
98 #endif
99 
100 	return (bufsiz + sizeof (bufsiz));
101 }
102 
103 int
104 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len,
105     int n __unused)
106 {
107 	const char *src = s_start;
108 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
109 	uint32_t bufsiz = BE_IN32(s_start);
110 #else
111 	uint32_t bufsiz = htonl(*(uint32_t *)s_start);
112 #endif
113 
114 	/* invalid compressed buffer size encoded at start */
115 	if (bufsiz + sizeof (bufsiz) > s_len)
116 		return (1);
117 
118 	/*
119 	 * Returns 0 on success (decompression function returned non-negative)
120 	 * and non-zero on failure (decompression function returned negative).
121 	 */
122 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
123 	    d_start, bufsiz, d_len) < 0);
124 }
125 
126 /*
127  * LZ4 API Description:
128  *
129  * Simple Functions:
130  * real_LZ4_compress() :
131  * 	isize  : is the input size. Max supported value is ~1.9GB
132  * 	return : the number of bytes written in buffer dest
133  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
134  * 	note : destination buffer must be already allocated.
135  * 		destination buffer must be sized to handle worst cases
136  * 		situations (input data not compressible).
137  *
138  * Advanced Functions
139  *
140  * LZ4_uncompress_unknownOutputSize() :
141  * 	isize  : is the input size, therefore the compressed size
142  * 	maxOutputSize : is the size of the destination buffer (which must be
143  * 		already allocated)
144  * 	return : the number of bytes decoded in the destination buffer
145  * 		(necessarily <= maxOutputSize). If the source stream is
146  * 		malformed, the function will stop decoding and return a
147  * 		negative result, indicating the byte position of the faulty
148  * 		instruction. This function never writes beyond dest +
149  * 		maxOutputSize, and is therefore protected against malicious
150  * 		data packets.
151  * 	note   : Destination buffer must be already allocated.
152  *
153  * LZ4_compressCtx() :
154  * 	This function explicitly handles the CTX memory structure.
155  *
156  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
157  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
158  * 	isn't valid.
159  *
160  * LZ4_compress64kCtx() :
161  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
162  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
163  *
164  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
165  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
166  * 	isn't valid.
167  */
168 
169 /*
170  * Tuning parameters
171  */
172 
173 /*
174  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
175  *	 Lowering this value reduces memory usage. Reduced memory usage
176  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
177  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
178  *	(examples : 12 -> 16KB ; 17 -> 512KB)
179  */
180 #define	COMPRESSIONLEVEL 12
181 
182 /*
183  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
184  *	algorithm skip faster data segments considered "incompressible".
185  *	This may decrease compression ratio dramatically, but will be
186  *	faster on incompressible data. Increasing this value will make
187  *	the algorithm search more before declaring a segment "incompressible".
188  *	This could improve compression a bit, but will be slower on
189  *	incompressible data. The default value (6) is recommended.
190  */
191 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
192 
193 /*
194  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
195  * performance for big endian cpu, but the resulting compressed stream
196  * will be incompatible with little-endian CPU. You can set this option
197  * to 1 in situations where data will stay within closed environment.
198  * This option is useless on Little_Endian CPU (such as x86).
199  */
200 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
201 
202 /*
203  * CPU Feature Detection
204  */
205 
206 /* 32 or 64 bits ? */
207 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
208     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
209     defined(__LP64__) || defined(_LP64))
210 #define	LZ4_ARCH64 1
211 #else
212 #define	LZ4_ARCH64 0
213 #endif
214 
215 /*
216  * Limits the amount of stack space that the algorithm may consume to hold
217  * the compression lookup table. The value `9' here means we'll never use
218  * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
219  * If more memory is needed, it is allocated from the heap.
220  */
221 /* FreeBSD: Use heap for all platforms for now */
222 #define	STACKLIMIT 0
223 
224 /*
225  * Little Endian or Big Endian?
226  * Note: overwrite the below #define if you know your architecture endianess.
227  */
228 #if BYTE_ORDER == BIG_ENDIAN
229 #define	LZ4_BIG_ENDIAN 1
230 #else
231 /*
232  * Little Endian assumed. PDP Endian and other very rare endian format
233  * are unsupported.
234  */
235 #endif
236 
237 /*
238  * Unaligned memory access is automatically enabled for "common" CPU,
239  * such as x86. For others CPU, the compiler will be more cautious, and
240  * insert extra code to ensure aligned access is respected. If you know
241  * your target CPU supports unaligned memory access, you may want to
242  * force this option manually to improve performance
243  */
244 #if defined(__ARM_FEATURE_UNALIGNED)
245 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
246 #endif
247 
248 /*
249  * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because
250  * gcc currently rely on libcompiler_rt.
251  *
252  * TODO: revisit this when situation changes.
253  */
254 #if defined(__sparc64__)
255 #define	LZ4_FORCE_SW_BITCOUNT
256 #endif
257 
258 /*
259  * Compiler Options
260  */
261 #if __STDC_VERSION__ >= 199901L	/* C99 */
262 /* "restrict" is a known keyword */
263 #else
264 /* Disable restrict */
265 #define	restrict
266 #endif
267 
268 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
269 	(((x) & 0xffu) << 8)))
270 
271 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
272 
273 #if defined(likely)
274 #undef likely
275 #endif
276 #if defined(unlikely)
277 #undef unlikely
278 #endif
279 
280 #ifndef likely
281 #define	likely(expr)	expect((expr) != 0, 1)
282 #endif
283 
284 #ifndef unlikely
285 #define	unlikely(expr)	expect((expr) != 0, 0)
286 #endif
287 
288 /* Basic types */
289 #define	BYTE	uint8_t
290 #define	U16	uint16_t
291 #define	U32	uint32_t
292 #define	S32	int32_t
293 #define	U64	uint64_t
294 
295 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
296 #pragma pack(1)
297 #endif
298 
299 typedef struct _U16_S {
300 	U16 v;
301 } U16_S;
302 typedef struct _U32_S {
303 	U32 v;
304 } U32_S;
305 typedef struct _U64_S {
306 	U64 v;
307 } U64_S;
308 
309 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
310 #pragma pack()
311 #endif
312 
313 #define	A64(x) (((U64_S *)(__DECONST(void *, x)))->v)
314 #define	A32(x) (((U32_S *)(__DECONST(void *, x)))->v)
315 #define	A16(x) (((U16_S *)(__DECONST(void *, x)))->v)
316 
317 /*
318  * Constants
319  */
320 #define	MINMATCH 4
321 
322 #define	HASH_LOG COMPRESSIONLEVEL
323 #define	HASHTABLESIZE (1 << HASH_LOG)
324 #define	HASH_MASK (HASHTABLESIZE - 1)
325 
326 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
327 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
328 
329 /*
330  * Defines if memory is allocated into the stack (local variable),
331  * or into the heap (kmem_alloc()).
332  */
333 #define	HEAPMODE (HASH_LOG > STACKLIMIT)
334 #define	COPYLENGTH 8
335 #define	LASTLITERALS 5
336 #define	MFLIMIT (COPYLENGTH + MINMATCH)
337 #define	MINLENGTH (MFLIMIT + 1)
338 
339 #define	MAXD_LOG 16
340 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
341 
342 #define	ML_BITS 4
343 #define	ML_MASK ((1U<<ML_BITS)-1)
344 #define	RUN_BITS (8-ML_BITS)
345 #define	RUN_MASK ((1U<<RUN_BITS)-1)
346 
347 
348 /*
349  * Architecture-specific macros
350  */
351 #if LZ4_ARCH64
352 #define	STEPSIZE 8
353 #define	UARCH U64
354 #define	AARCH A64
355 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
356 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
357 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
358 #define	HTYPE U32
359 #define	INITBASE(base)		const BYTE* const base = ip
360 #else /* !LZ4_ARCH64 */
361 #define	STEPSIZE 4
362 #define	UARCH U32
363 #define	AARCH A32
364 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
365 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
366 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
367 #define	HTYPE const BYTE *
368 #define	INITBASE(base)		const int base = 0
369 #endif /* !LZ4_ARCH64 */
370 
371 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
372 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
373 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
374 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
375 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
376 #else
377 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
378 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
379 #endif
380 
381 
382 /* Local structures */
383 struct refTables {
384 	HTYPE hashTable[HASHTABLESIZE];
385 };
386 
387 
388 /* Macros */
389 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
390 	HASH_LOG))
391 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
392 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
393 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
394 	d = e; }
395 
396 
397 /* Private functions */
398 #if LZ4_ARCH64
399 
400 static inline int
401 LZ4_NbCommonBytes(register U64 val)
402 {
403 #if defined(LZ4_BIG_ENDIAN)
404 #if !defined(LZ4_FORCE_SW_BITCOUNT)
405 	return (__builtin_clzll(val) >> 3);
406 #else
407 	int r;
408 	if (!(val >> 32)) {
409 		r = 4;
410 	} else {
411 		r = 0;
412 		val >>= 32;
413 	}
414 	if (!(val >> 16)) {
415 		r += 2;
416 		val >>= 8;
417 	} else {
418 		val >>= 24;
419 	}
420 	r += (!val);
421 	return (r);
422 #endif
423 #else
424 #if !defined(LZ4_FORCE_SW_BITCOUNT)
425 	return (__builtin_ctzll(val) >> 3);
426 #else
427 	static const int DeBruijnBytePos[64] =
428 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
429 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
430 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
431 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
432 	};
433 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
434 	    58];
435 #endif
436 #endif
437 }
438 
439 #else
440 
441 static inline int
442 LZ4_NbCommonBytes(register U32 val)
443 {
444 #if defined(LZ4_BIG_ENDIAN)
445 #if !defined(LZ4_FORCE_SW_BITCOUNT)
446 	return (__builtin_clz(val) >> 3);
447 #else
448 	int r;
449 	if (!(val >> 16)) {
450 		r = 2;
451 		val >>= 8;
452 	} else {
453 		r = 0;
454 		val >>= 24;
455 	}
456 	r += (!val);
457 	return (r);
458 #endif
459 #else
460 #if !defined(LZ4_FORCE_SW_BITCOUNT)
461 	return (__builtin_ctz(val) >> 3);
462 #else
463 	static const int DeBruijnBytePos[32] = {
464 		0, 0, 3, 0, 3, 1, 3, 0,
465 		3, 2, 2, 1, 3, 2, 0, 1,
466 		3, 3, 1, 2, 2, 2, 2, 0,
467 		3, 1, 2, 0, 1, 0, 1, 1
468 	};
469 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
470 	    27];
471 #endif
472 #endif
473 }
474 
475 #endif
476 
477 /* Compression functions */
478 
479 /*ARGSUSED*/
480 static int
481 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
482     int osize)
483 {
484 #if HEAPMODE
485 	struct refTables *srt = (struct refTables *)ctx;
486 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
487 #else
488 	HTYPE HashTable[HASHTABLESIZE] = { 0 };
489 #endif
490 
491 	const BYTE *ip = (const BYTE *) source;
492 	INITBASE(base);
493 	const BYTE *anchor = ip;
494 	const BYTE *const iend = ip + isize;
495 	const BYTE *const oend = (BYTE *) dest + osize;
496 	const BYTE *const mflimit = iend - MFLIMIT;
497 #define	matchlimit (iend - LASTLITERALS)
498 
499 	BYTE *op = (BYTE *) dest;
500 
501 	int len, length;
502 	const int skipStrength = SKIPSTRENGTH;
503 	U32 forwardH;
504 
505 
506 	/* Init */
507 	if (isize < MINLENGTH)
508 		goto _last_literals;
509 
510 	/* First Byte */
511 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
512 	ip++;
513 	forwardH = LZ4_HASH_VALUE(ip);
514 
515 	/* Main Loop */
516 	for (;;) {
517 		int findMatchAttempts = (1U << skipStrength) + 3;
518 		const BYTE *forwardIp = ip;
519 		const BYTE *ref;
520 		BYTE *token;
521 
522 		/* Find a match */
523 		do {
524 			U32 h = forwardH;
525 			int step = findMatchAttempts++ >> skipStrength;
526 			ip = forwardIp;
527 			forwardIp = ip + step;
528 
529 			if unlikely(forwardIp > mflimit) {
530 				goto _last_literals;
531 			}
532 
533 			forwardH = LZ4_HASH_VALUE(forwardIp);
534 			ref = base + HashTable[h];
535 			HashTable[h] = ip - base;
536 
537 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
538 
539 		/* Catch up */
540 		while ((ip > anchor) && (ref > (const BYTE *) source) &&
541 		    unlikely(ip[-1] == ref[-1])) {
542 			ip--;
543 			ref--;
544 		}
545 
546 		/* Encode Literal length */
547 		length = ip - anchor;
548 		token = op++;
549 
550 		/* Check output limit */
551 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
552 		    (length >> 8) > oend)
553 			return (0);
554 
555 		if (length >= (int)RUN_MASK) {
556 			*token = (RUN_MASK << ML_BITS);
557 			len = length - RUN_MASK;
558 			for (; len > 254; len -= 255)
559 				*op++ = 255;
560 			*op++ = (BYTE)len;
561 		} else
562 			*token = (length << ML_BITS);
563 
564 		/* Copy Literals */
565 		LZ4_BLINDCOPY(anchor, op, length);
566 
567 		_next_match:
568 		/* Encode Offset */
569 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
570 
571 		/* Start Counting */
572 		ip += MINMATCH;
573 		ref += MINMATCH;	/* MinMatch verified */
574 		anchor = ip;
575 		while likely(ip < matchlimit - (STEPSIZE - 1)) {
576 			UARCH diff = AARCH(ref) ^ AARCH(ip);
577 			if (!diff) {
578 				ip += STEPSIZE;
579 				ref += STEPSIZE;
580 				continue;
581 			}
582 			ip += LZ4_NbCommonBytes(diff);
583 			goto _endCount;
584 		}
585 #if LZ4_ARCH64
586 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
587 			ip += 4;
588 			ref += 4;
589 		}
590 #endif
591 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
592 			ip += 2;
593 			ref += 2;
594 		}
595 		if ((ip < matchlimit) && (*ref == *ip))
596 			ip++;
597 		_endCount:
598 
599 		/* Encode MatchLength */
600 		len = (ip - anchor);
601 		/* Check output limit */
602 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
603 			return (0);
604 		if (len >= (int)ML_MASK) {
605 			*token += ML_MASK;
606 			len -= ML_MASK;
607 			for (; len > 509; len -= 510) {
608 				*op++ = 255;
609 				*op++ = 255;
610 			}
611 			if (len > 254) {
612 				len -= 255;
613 				*op++ = 255;
614 			}
615 			*op++ = (BYTE)len;
616 		} else
617 			*token += len;
618 
619 		/* Test end of chunk */
620 		if (ip > mflimit) {
621 			anchor = ip;
622 			break;
623 		}
624 		/* Fill table */
625 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
626 
627 		/* Test next position */
628 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
629 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
630 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
631 			token = op++;
632 			*token = 0;
633 			goto _next_match;
634 		}
635 		/* Prepare next loop */
636 		anchor = ip++;
637 		forwardH = LZ4_HASH_VALUE(ip);
638 	}
639 
640 	_last_literals:
641 	/* Encode Last Literals */
642 	{
643 		int lastRun = iend - anchor;
644 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
645 		    oend)
646 			return (0);
647 		if (lastRun >= (int)RUN_MASK) {
648 			*op++ = (RUN_MASK << ML_BITS);
649 			lastRun -= RUN_MASK;
650 			for (; lastRun > 254; lastRun -= 255) {
651 				*op++ = 255;
652 			}
653 			*op++ = (BYTE)lastRun;
654 		} else
655 			*op++ = (lastRun << ML_BITS);
656 		(void) memcpy(op, anchor, iend - anchor);
657 		op += iend - anchor;
658 	}
659 
660 	/* End */
661 	return (int)(((char *)op) - dest);
662 }
663 
664 
665 
666 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
667 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
668 #define	HASHLOG64K (HASH_LOG + 1)
669 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
670 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
671 	HASHLOG64K))
672 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
673 
674 /*ARGSUSED*/
675 static int
676 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
677     int osize)
678 {
679 #if HEAPMODE
680 	struct refTables *srt = (struct refTables *)ctx;
681 	U16 *HashTable = (U16 *) (srt->hashTable);
682 #else
683 	U16 HashTable[HASH64KTABLESIZE] = { 0 };
684 #endif
685 
686 	const BYTE *ip = (const BYTE *) source;
687 	const BYTE *anchor = ip;
688 	const BYTE *const base = ip;
689 	const BYTE *const iend = ip + isize;
690 	const BYTE *const oend = (BYTE *) dest + osize;
691 	const BYTE *const mflimit = iend - MFLIMIT;
692 #define	matchlimit (iend - LASTLITERALS)
693 
694 	BYTE *op = (BYTE *) dest;
695 
696 	int len, length;
697 	const int skipStrength = SKIPSTRENGTH;
698 	U32 forwardH;
699 
700 	/* Init */
701 	if (isize < MINLENGTH)
702 		goto _last_literals;
703 
704 	/* First Byte */
705 	ip++;
706 	forwardH = LZ4_HASH64K_VALUE(ip);
707 
708 	/* Main Loop */
709 	for (;;) {
710 		int findMatchAttempts = (1U << skipStrength) + 3;
711 		const BYTE *forwardIp = ip;
712 		const BYTE *ref;
713 		BYTE *token;
714 
715 		/* Find a match */
716 		do {
717 			U32 h = forwardH;
718 			int step = findMatchAttempts++ >> skipStrength;
719 			ip = forwardIp;
720 			forwardIp = ip + step;
721 
722 			if (forwardIp > mflimit) {
723 				goto _last_literals;
724 			}
725 
726 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
727 			ref = base + HashTable[h];
728 			HashTable[h] = ip - base;
729 
730 		} while (A32(ref) != A32(ip));
731 
732 		/* Catch up */
733 		while ((ip > anchor) && (ref > (const BYTE *) source) &&
734 		    (ip[-1] == ref[-1])) {
735 			ip--;
736 			ref--;
737 		}
738 
739 		/* Encode Literal length */
740 		length = ip - anchor;
741 		token = op++;
742 
743 		/* Check output limit */
744 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
745 		    (length >> 8) > oend)
746 			return (0);
747 
748 		if (length >= (int)RUN_MASK) {
749 			*token = (RUN_MASK << ML_BITS);
750 			len = length - RUN_MASK;
751 			for (; len > 254; len -= 255)
752 				*op++ = 255;
753 			*op++ = (BYTE)len;
754 		} else
755 			*token = (length << ML_BITS);
756 
757 		/* Copy Literals */
758 		LZ4_BLINDCOPY(anchor, op, length);
759 
760 		_next_match:
761 		/* Encode Offset */
762 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
763 
764 		/* Start Counting */
765 		ip += MINMATCH;
766 		ref += MINMATCH;	/* MinMatch verified */
767 		anchor = ip;
768 		while (ip < matchlimit - (STEPSIZE - 1)) {
769 			UARCH diff = AARCH(ref) ^ AARCH(ip);
770 			if (!diff) {
771 				ip += STEPSIZE;
772 				ref += STEPSIZE;
773 				continue;
774 			}
775 			ip += LZ4_NbCommonBytes(diff);
776 			goto _endCount;
777 		}
778 #if LZ4_ARCH64
779 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
780 			ip += 4;
781 			ref += 4;
782 		}
783 #endif
784 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
785 			ip += 2;
786 			ref += 2;
787 		}
788 		if ((ip < matchlimit) && (*ref == *ip))
789 			ip++;
790 		_endCount:
791 
792 		/* Encode MatchLength */
793 		len = (ip - anchor);
794 		/* Check output limit */
795 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
796 			return (0);
797 		if (len >= (int)ML_MASK) {
798 			*token += ML_MASK;
799 			len -= ML_MASK;
800 			for (; len > 509; len -= 510) {
801 				*op++ = 255;
802 				*op++ = 255;
803 			}
804 			if (len > 254) {
805 				len -= 255;
806 				*op++ = 255;
807 			}
808 			*op++ = (BYTE)len;
809 		} else
810 			*token += len;
811 
812 		/* Test end of chunk */
813 		if (ip > mflimit) {
814 			anchor = ip;
815 			break;
816 		}
817 		/* Fill table */
818 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
819 
820 		/* Test next position */
821 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
822 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
823 		if (A32(ref) == A32(ip)) {
824 			token = op++;
825 			*token = 0;
826 			goto _next_match;
827 		}
828 		/* Prepare next loop */
829 		anchor = ip++;
830 		forwardH = LZ4_HASH64K_VALUE(ip);
831 	}
832 
833 	_last_literals:
834 	/* Encode Last Literals */
835 	{
836 		int lastRun = iend - anchor;
837 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
838 		    oend)
839 			return (0);
840 		if (lastRun >= (int)RUN_MASK) {
841 			*op++ = (RUN_MASK << ML_BITS);
842 			lastRun -= RUN_MASK;
843 			for (; lastRun > 254; lastRun -= 255)
844 				*op++ = 255;
845 			*op++ = (BYTE)lastRun;
846 		} else
847 			*op++ = (lastRun << ML_BITS);
848 		(void) memcpy(op, anchor, iend - anchor);
849 		op += iend - anchor;
850 	}
851 
852 	/* End */
853 	return (int)(((char *)op) - dest);
854 }
855 
856 static int
857 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
858 {
859 #if HEAPMODE
860 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
861 	void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP);
862 #else
863 	void *ctx = malloc(sizeof(struct refTables));
864 #endif
865 	int result;
866 
867 	/*
868 	 * out of kernel memory, gently fall through - this will disable
869 	 * compression in zio_compress_data
870 	 */
871 	if (ctx == NULL)
872 		return (0);
873 
874 	bzero(ctx, sizeof(struct refTables));
875 	if (isize < LZ4_64KLIMIT)
876 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
877 	else
878 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
879 
880 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
881 	kmem_cache_free(lz4_ctx_cache, ctx);
882 #else
883 	free(ctx);
884 #endif
885 	return (result);
886 #else
887 	if (isize < (int)LZ4_64KLIMIT)
888 		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
889 	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
890 #endif
891 }
892 
893 /* Decompression functions */
894 
895 /*
896  * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
897  *	against "buffer overflow" attack type. It will never write nor
898  *	read outside of the provided output buffers.
899  *	LZ4_uncompress_unknownOutputSize() also insures that it will never
900  *	read outside of the input buffer.  A corrupted input will produce
901  *	an error result, a negative int, indicating the position of the
902  *	error within input stream.
903  */
904 
905 static int
906 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
907     int maxOutputSize)
908 {
909 	/* Local Variables */
910 	const BYTE *restrict ip = (const BYTE *) source;
911 	const BYTE *const iend = ip + isize;
912 	const BYTE *ref;
913 
914 	BYTE *op = (BYTE *) dest;
915 	BYTE *const oend = op + maxOutputSize;
916 	BYTE *cpy;
917 
918 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
919 #if LZ4_ARCH64
920 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
921 #endif
922 
923 	/* Main Loop */
924 	while (ip < iend) {
925 		unsigned token;
926 		size_t length;
927 
928 		/* get runlength */
929 		token = *ip++;
930 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
931 			int s = 255;
932 			while ((ip < iend) && (s == 255)) {
933 				s = *ip++;
934 				length += s;
935 			}
936 		}
937 		/* copy literals */
938 		cpy = op + length;
939 		/* CORNER-CASE: cpy might overflow. */
940 		if (cpy < op)
941 			goto _output_error;	/* cpy was overflowed, bail! */
942 		if ((cpy > oend - COPYLENGTH) ||
943 		    (ip + length > iend - COPYLENGTH)) {
944 			if (cpy > oend)
945 				/* Error: writes beyond output buffer */
946 				goto _output_error;
947 			if (ip + length != iend)
948 				/*
949 				 * Error: LZ4 format requires to consume all
950 				 * input at this stage
951 				 */
952 				goto _output_error;
953 			(void) memcpy(op, ip, length);
954 			op += length;
955 			/* Necessarily EOF, due to parsing restrictions */
956 			break;
957 		}
958 		LZ4_WILDCOPY(ip, op, cpy);
959 		ip -= (op - cpy);
960 		op = cpy;
961 
962 		/* get offset */
963 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
964 		ip += 2;
965 		if (ref < (BYTE * const) dest)
966 			/*
967 			 * Error: offset creates reference outside of
968 			 * destination buffer
969 			 */
970 			goto _output_error;
971 
972 		/* get matchlength */
973 		if ((length = (token & ML_MASK)) == ML_MASK) {
974 			while (ip < iend) {
975 				int s = *ip++;
976 				length += s;
977 				if (s == 255)
978 					continue;
979 				break;
980 			}
981 		}
982 		/* copy repeated sequence */
983 		if unlikely(op - ref < STEPSIZE) {
984 #if LZ4_ARCH64
985 			size_t dec64 = dec64table[op-ref];
986 #else
987 			const int dec64 = 0;
988 #endif
989 			op[0] = ref[0];
990 			op[1] = ref[1];
991 			op[2] = ref[2];
992 			op[3] = ref[3];
993 			op += 4;
994 			ref += 4;
995 			ref -= dec32table[op-ref];
996 			A32(op) = A32(ref);
997 			op += STEPSIZE - 4;
998 			ref -= dec64;
999 		} else {
1000 			LZ4_COPYSTEP(ref, op);
1001 		}
1002 		cpy = op + length - (STEPSIZE - 4);
1003 		if (cpy > oend - COPYLENGTH) {
1004 			if (cpy > oend)
1005 				/*
1006 				 * Error: request to write outside of
1007 				 * destination buffer
1008 				 */
1009 				goto _output_error;
1010 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1011 			while (op < cpy)
1012 				*op++ = *ref++;
1013 			op = cpy;
1014 			if (op == oend)
1015 				/*
1016 				 * Check EOF (should never happen, since
1017 				 * last 5 bytes are supposed to be literals)
1018 				 */
1019 				goto _output_error;
1020 			continue;
1021 		}
1022 		LZ4_SECURECOPY(ref, op, cpy);
1023 		op = cpy;	/* correction */
1024 	}
1025 
1026 	/* end of decoding */
1027 	return (int)(((char *)op) - dest);
1028 
1029 	/* write overflow error detected */
1030 	_output_error:
1031 	return (int)(-(((const char *)ip) - source));
1032 }
1033 
1034 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
1035 extern void
1036 lz4_init(void)
1037 {
1038 
1039 #if HEAPMODE
1040 	lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables),
1041 	    0, NULL, NULL, NULL, NULL, NULL, 0);
1042 #endif
1043 }
1044 
1045 extern void
1046 lz4_fini(void)
1047 {
1048 
1049 #if HEAPMODE
1050 	kmem_cache_destroy(lz4_ctx_cache);
1051 #endif
1052 }
1053 #endif /* _KERNEL || _FAKE_KERNEL */
1054