xref: /freebsd/sys/cddl/contrib/opensolaris/common/lz4/lz4.c (revision 7ef62cebc2f965b0f640263e179276928885e33d)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 /*
35  * Copyright (c) 2016 by Delphix. All rights reserved.
36  */
37 
38 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
39 #include <sys/zfs_context.h>
40 #elif defined(_STANDALONE)
41 #include <sys/cdefs.h>
42 #include <stand.h>
43 #include <sys/types.h>
44 #include <sys/endian.h>
45 #include <assert.h>
46 
47 #undef ASSERT
48 #define	ASSERT	assert
49 #else
50 #include <string.h>
51 #include <stdlib.h>
52 #include <sys/types.h>
53 #include <netinet/in.h>
54 #include <assert.h>
55 
56 #undef ASSERT
57 #define	ASSERT	assert
58 #endif
59 #include "lz4.h"
60 
61 static int real_LZ4_compress(const char *source, char *dest, int isize,
62     int osize);
63 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
64     int isize, int maxOutputSize);
65 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
66     int isize, int osize);
67 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
68     int isize, int osize);
69 
70 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
71 static kmem_cache_t *lz4_ctx_cache;
72 #endif
73 
74 size_t
75 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
76     int n __unused)
77 {
78 	uint32_t bufsiz;
79 	char *dest = d_start;
80 
81 	ASSERT(d_len >= sizeof (bufsiz));
82 
83 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
84 	    d_len - sizeof (bufsiz));
85 
86 	/* Signal an error if the compression routine returned zero. */
87 	if (bufsiz == 0)
88 		return (s_len);
89 
90 	/*
91 	 * Encode the compresed buffer size at the start. We'll need this in
92 	 * decompression to counter the effects of padding which might be
93 	 * added to the compressed buffer and which, if unhandled, would
94 	 * confuse the hell out of our decompression function.
95 	 */
96 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
97 	*(uint32_t *)(void *)dest = BE_32(bufsiz);
98 #else
99 	*(uint32_t *)(void *)dest = htonl(bufsiz);
100 #endif
101 
102 	return (bufsiz + sizeof (bufsiz));
103 }
104 
105 int
106 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len,
107     int n __unused)
108 {
109 	const char *src = s_start;
110 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
111 	uint32_t bufsiz = BE_IN32(s_start);
112 #else
113 	uint32_t bufsiz = htonl(*(uint32_t *)s_start);
114 #endif
115 
116 	/* invalid compressed buffer size encoded at start */
117 	if (bufsiz + sizeof (bufsiz) > s_len)
118 		return (1);
119 
120 	/*
121 	 * Returns 0 on success (decompression function returned non-negative)
122 	 * and non-zero on failure (decompression function returned negative).
123 	 */
124 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
125 	    d_start, bufsiz, d_len) < 0);
126 }
127 
128 /*
129  * LZ4 API Description:
130  *
131  * Simple Functions:
132  * real_LZ4_compress() :
133  * 	isize  : is the input size. Max supported value is ~1.9GB
134  * 	return : the number of bytes written in buffer dest
135  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
136  * 	note : destination buffer must be already allocated.
137  * 		destination buffer must be sized to handle worst cases
138  * 		situations (input data not compressible).
139  *
140  * Advanced Functions
141  *
142  * LZ4_uncompress_unknownOutputSize() :
143  * 	isize  : is the input size, therefore the compressed size
144  * 	maxOutputSize : is the size of the destination buffer (which must be
145  * 		already allocated)
146  * 	return : the number of bytes decoded in the destination buffer
147  * 		(necessarily <= maxOutputSize). If the source stream is
148  * 		malformed, the function will stop decoding and return a
149  * 		negative result, indicating the byte position of the faulty
150  * 		instruction. This function never writes beyond dest +
151  * 		maxOutputSize, and is therefore protected against malicious
152  * 		data packets.
153  * 	note   : Destination buffer must be already allocated.
154  *
155  * LZ4_compressCtx() :
156  * 	This function explicitly handles the CTX memory structure.
157  *
158  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
159  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
160  * 	isn't valid.
161  *
162  * LZ4_compress64kCtx() :
163  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
164  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
165  *
166  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
167  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
168  * 	isn't valid.
169  */
170 
171 /*
172  * Tuning parameters
173  */
174 
175 /*
176  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
177  *	 Lowering this value reduces memory usage. Reduced memory usage
178  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
179  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
180  *	(examples : 12 -> 16KB ; 17 -> 512KB)
181  */
182 #define	COMPRESSIONLEVEL 12
183 
184 /*
185  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
186  *	algorithm skip faster data segments considered "incompressible".
187  *	This may decrease compression ratio dramatically, but will be
188  *	faster on incompressible data. Increasing this value will make
189  *	the algorithm search more before declaring a segment "incompressible".
190  *	This could improve compression a bit, but will be slower on
191  *	incompressible data. The default value (6) is recommended.
192  */
193 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
194 
195 /*
196  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
197  * performance for big endian cpu, but the resulting compressed stream
198  * will be incompatible with little-endian CPU. You can set this option
199  * to 1 in situations where data will stay within closed environment.
200  * This option is useless on Little_Endian CPU (such as x86).
201  */
202 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
203 
204 /*
205  * CPU Feature Detection
206  */
207 
208 /* 32 or 64 bits ? */
209 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
210     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
211     defined(__LP64__) || defined(_LP64))
212 #define	LZ4_ARCH64 1
213 #else
214 #define	LZ4_ARCH64 0
215 #endif
216 
217 /*
218  * Limits the amount of stack space that the algorithm may consume to hold
219  * the compression lookup table. The value `9' here means we'll never use
220  * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
221  * If more memory is needed, it is allocated from the heap.
222  */
223 /* FreeBSD: Use heap for all platforms for now */
224 #define	STACKLIMIT 0
225 
226 /*
227  * Little Endian or Big Endian?
228  * Note: overwrite the below #define if you know your architecture endianess.
229  */
230 #if BYTE_ORDER == BIG_ENDIAN
231 #define	LZ4_BIG_ENDIAN 1
232 #else
233 /*
234  * Little Endian assumed. PDP Endian and other very rare endian format
235  * are unsupported.
236  */
237 #endif
238 
239 /*
240  * Unaligned memory access is automatically enabled for "common" CPU,
241  * such as x86. For others CPU, the compiler will be more cautious, and
242  * insert extra code to ensure aligned access is respected. If you know
243  * your target CPU supports unaligned memory access, you may want to
244  * force this option manually to improve performance
245  */
246 #if defined(__ARM_FEATURE_UNALIGNED)
247 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
248 #endif
249 
250 /*
251  * Compiler Options
252  */
253 #if __STDC_VERSION__ >= 199901L	/* C99 */
254 /* "restrict" is a known keyword */
255 #else
256 /* Disable restrict */
257 #define	restrict
258 #endif
259 
260 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
261 	(((x) & 0xffu) << 8)))
262 
263 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
264 
265 #if defined(likely)
266 #undef likely
267 #endif
268 #if defined(unlikely)
269 #undef unlikely
270 #endif
271 
272 #ifndef likely
273 #define	likely(expr)	expect((expr) != 0, 1)
274 #endif
275 
276 #ifndef unlikely
277 #define	unlikely(expr)	expect((expr) != 0, 0)
278 #endif
279 
280 /* Basic types */
281 #define	BYTE	uint8_t
282 #define	U16	uint16_t
283 #define	U32	uint32_t
284 #define	S32	int32_t
285 #define	U64	uint64_t
286 
287 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
288 #pragma pack(1)
289 #endif
290 
291 typedef struct _U16_S {
292 	U16 v;
293 } U16_S;
294 typedef struct _U32_S {
295 	U32 v;
296 } U32_S;
297 typedef struct _U64_S {
298 	U64 v;
299 } U64_S;
300 
301 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
302 #pragma pack()
303 #endif
304 
305 #define	A64(x) (((U64_S *)(__DECONST(void *, x)))->v)
306 #define	A32(x) (((U32_S *)(__DECONST(void *, x)))->v)
307 #define	A16(x) (((U16_S *)(__DECONST(void *, x)))->v)
308 
309 /*
310  * Constants
311  */
312 #define	MINMATCH 4
313 
314 #define	HASH_LOG COMPRESSIONLEVEL
315 #define	HASHTABLESIZE (1 << HASH_LOG)
316 #define	HASH_MASK (HASHTABLESIZE - 1)
317 
318 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
319 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
320 
321 /*
322  * Defines if memory is allocated into the stack (local variable),
323  * or into the heap (kmem_alloc()).
324  */
325 #define	HEAPMODE (HASH_LOG > STACKLIMIT)
326 #define	COPYLENGTH 8
327 #define	LASTLITERALS 5
328 #define	MFLIMIT (COPYLENGTH + MINMATCH)
329 #define	MINLENGTH (MFLIMIT + 1)
330 
331 #define	MAXD_LOG 16
332 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
333 
334 #define	ML_BITS 4
335 #define	ML_MASK ((1U<<ML_BITS)-1)
336 #define	RUN_BITS (8-ML_BITS)
337 #define	RUN_MASK ((1U<<RUN_BITS)-1)
338 
339 
340 /*
341  * Architecture-specific macros
342  */
343 #if LZ4_ARCH64
344 #define	STEPSIZE 8
345 #define	UARCH U64
346 #define	AARCH A64
347 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
348 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
349 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
350 #define	HTYPE U32
351 #define	INITBASE(base)		const BYTE* const base = ip
352 #else /* !LZ4_ARCH64 */
353 #define	STEPSIZE 4
354 #define	UARCH U32
355 #define	AARCH A32
356 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
357 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
358 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
359 #define	HTYPE const BYTE *
360 #define	INITBASE(base)		const int base = 0
361 #endif /* !LZ4_ARCH64 */
362 
363 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
364 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
365 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
366 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
367 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
368 #else
369 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
370 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
371 #endif
372 
373 
374 /* Local structures */
375 struct refTables {
376 	HTYPE hashTable[HASHTABLESIZE];
377 };
378 
379 
380 /* Macros */
381 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
382 	HASH_LOG))
383 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
384 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
385 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
386 	d = e; }
387 
388 
389 /* Private functions */
390 #if LZ4_ARCH64
391 
392 static inline int
393 LZ4_NbCommonBytes(register U64 val)
394 {
395 #if defined(LZ4_BIG_ENDIAN)
396 #if !defined(LZ4_FORCE_SW_BITCOUNT)
397 	return (__builtin_clzll(val) >> 3);
398 #else
399 	int r;
400 	if (!(val >> 32)) {
401 		r = 4;
402 	} else {
403 		r = 0;
404 		val >>= 32;
405 	}
406 	if (!(val >> 16)) {
407 		r += 2;
408 		val >>= 8;
409 	} else {
410 		val >>= 24;
411 	}
412 	r += (!val);
413 	return (r);
414 #endif
415 #else
416 #if !defined(LZ4_FORCE_SW_BITCOUNT)
417 	return (__builtin_ctzll(val) >> 3);
418 #else
419 	static const int DeBruijnBytePos[64] =
420 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
421 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
422 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
423 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
424 	};
425 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
426 	    58];
427 #endif
428 #endif
429 }
430 
431 #else
432 
433 static inline int
434 LZ4_NbCommonBytes(register U32 val)
435 {
436 #if defined(LZ4_BIG_ENDIAN)
437 #if !defined(LZ4_FORCE_SW_BITCOUNT)
438 	return (__builtin_clz(val) >> 3);
439 #else
440 	int r;
441 	if (!(val >> 16)) {
442 		r = 2;
443 		val >>= 8;
444 	} else {
445 		r = 0;
446 		val >>= 24;
447 	}
448 	r += (!val);
449 	return (r);
450 #endif
451 #else
452 #if !defined(LZ4_FORCE_SW_BITCOUNT)
453 	return (__builtin_ctz(val) >> 3);
454 #else
455 	static const int DeBruijnBytePos[32] = {
456 		0, 0, 3, 0, 3, 1, 3, 0,
457 		3, 2, 2, 1, 3, 2, 0, 1,
458 		3, 3, 1, 2, 2, 2, 2, 0,
459 		3, 1, 2, 0, 1, 0, 1, 1
460 	};
461 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
462 	    27];
463 #endif
464 #endif
465 }
466 
467 #endif
468 
469 /* Compression functions */
470 
471 /*ARGSUSED*/
472 static int
473 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
474     int osize)
475 {
476 #if HEAPMODE
477 	struct refTables *srt = (struct refTables *)ctx;
478 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
479 #else
480 	HTYPE HashTable[HASHTABLESIZE] = { 0 };
481 #endif
482 
483 	const BYTE *ip = (const BYTE *) source;
484 	INITBASE(base);
485 	const BYTE *anchor = ip;
486 	const BYTE *const iend = ip + isize;
487 	const BYTE *const oend = (BYTE *) dest + osize;
488 	const BYTE *const mflimit = iend - MFLIMIT;
489 #define	matchlimit (iend - LASTLITERALS)
490 
491 	BYTE *op = (BYTE *) dest;
492 
493 	int len, length;
494 	const int skipStrength = SKIPSTRENGTH;
495 	U32 forwardH;
496 
497 
498 	/* Init */
499 	if (isize < MINLENGTH)
500 		goto _last_literals;
501 
502 	/* First Byte */
503 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
504 	ip++;
505 	forwardH = LZ4_HASH_VALUE(ip);
506 
507 	/* Main Loop */
508 	for (;;) {
509 		int findMatchAttempts = (1U << skipStrength) + 3;
510 		const BYTE *forwardIp = ip;
511 		const BYTE *ref;
512 		BYTE *token;
513 
514 		/* Find a match */
515 		do {
516 			U32 h = forwardH;
517 			int step = findMatchAttempts++ >> skipStrength;
518 			ip = forwardIp;
519 			forwardIp = ip + step;
520 
521 			if unlikely(forwardIp > mflimit) {
522 				goto _last_literals;
523 			}
524 
525 			forwardH = LZ4_HASH_VALUE(forwardIp);
526 			ref = base + HashTable[h];
527 			HashTable[h] = ip - base;
528 
529 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
530 
531 		/* Catch up */
532 		while ((ip > anchor) && (ref > (const BYTE *) source) &&
533 		    unlikely(ip[-1] == ref[-1])) {
534 			ip--;
535 			ref--;
536 		}
537 
538 		/* Encode Literal length */
539 		length = ip - anchor;
540 		token = op++;
541 
542 		/* Check output limit */
543 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
544 		    (length >> 8) > oend)
545 			return (0);
546 
547 		if (length >= (int)RUN_MASK) {
548 			*token = (RUN_MASK << ML_BITS);
549 			len = length - RUN_MASK;
550 			for (; len > 254; len -= 255)
551 				*op++ = 255;
552 			*op++ = (BYTE)len;
553 		} else
554 			*token = (length << ML_BITS);
555 
556 		/* Copy Literals */
557 		LZ4_BLINDCOPY(anchor, op, length);
558 
559 		_next_match:
560 		/* Encode Offset */
561 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
562 
563 		/* Start Counting */
564 		ip += MINMATCH;
565 		ref += MINMATCH;	/* MinMatch verified */
566 		anchor = ip;
567 		while likely(ip < matchlimit - (STEPSIZE - 1)) {
568 			UARCH diff = AARCH(ref) ^ AARCH(ip);
569 			if (!diff) {
570 				ip += STEPSIZE;
571 				ref += STEPSIZE;
572 				continue;
573 			}
574 			ip += LZ4_NbCommonBytes(diff);
575 			goto _endCount;
576 		}
577 #if LZ4_ARCH64
578 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
579 			ip += 4;
580 			ref += 4;
581 		}
582 #endif
583 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
584 			ip += 2;
585 			ref += 2;
586 		}
587 		if ((ip < matchlimit) && (*ref == *ip))
588 			ip++;
589 		_endCount:
590 
591 		/* Encode MatchLength */
592 		len = (ip - anchor);
593 		/* Check output limit */
594 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
595 			return (0);
596 		if (len >= (int)ML_MASK) {
597 			*token += ML_MASK;
598 			len -= ML_MASK;
599 			for (; len > 509; len -= 510) {
600 				*op++ = 255;
601 				*op++ = 255;
602 			}
603 			if (len > 254) {
604 				len -= 255;
605 				*op++ = 255;
606 			}
607 			*op++ = (BYTE)len;
608 		} else
609 			*token += len;
610 
611 		/* Test end of chunk */
612 		if (ip > mflimit) {
613 			anchor = ip;
614 			break;
615 		}
616 		/* Fill table */
617 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
618 
619 		/* Test next position */
620 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
621 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
622 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
623 			token = op++;
624 			*token = 0;
625 			goto _next_match;
626 		}
627 		/* Prepare next loop */
628 		anchor = ip++;
629 		forwardH = LZ4_HASH_VALUE(ip);
630 	}
631 
632 	_last_literals:
633 	/* Encode Last Literals */
634 	{
635 		int lastRun = iend - anchor;
636 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
637 		    oend)
638 			return (0);
639 		if (lastRun >= (int)RUN_MASK) {
640 			*op++ = (RUN_MASK << ML_BITS);
641 			lastRun -= RUN_MASK;
642 			for (; lastRun > 254; lastRun -= 255) {
643 				*op++ = 255;
644 			}
645 			*op++ = (BYTE)lastRun;
646 		} else
647 			*op++ = (lastRun << ML_BITS);
648 		(void) memcpy(op, anchor, iend - anchor);
649 		op += iend - anchor;
650 	}
651 
652 	/* End */
653 	return (int)(((char *)op) - dest);
654 }
655 
656 
657 
658 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
659 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
660 #define	HASHLOG64K (HASH_LOG + 1)
661 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
662 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
663 	HASHLOG64K))
664 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
665 
666 /*ARGSUSED*/
667 static int
668 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
669     int osize)
670 {
671 #if HEAPMODE
672 	struct refTables *srt = (struct refTables *)ctx;
673 	U16 *HashTable = (U16 *) (srt->hashTable);
674 #else
675 	U16 HashTable[HASH64KTABLESIZE] = { 0 };
676 #endif
677 
678 	const BYTE *ip = (const BYTE *) source;
679 	const BYTE *anchor = ip;
680 	const BYTE *const base = ip;
681 	const BYTE *const iend = ip + isize;
682 	const BYTE *const oend = (BYTE *) dest + osize;
683 	const BYTE *const mflimit = iend - MFLIMIT;
684 #define	matchlimit (iend - LASTLITERALS)
685 
686 	BYTE *op = (BYTE *) dest;
687 
688 	int len, length;
689 	const int skipStrength = SKIPSTRENGTH;
690 	U32 forwardH;
691 
692 	/* Init */
693 	if (isize < MINLENGTH)
694 		goto _last_literals;
695 
696 	/* First Byte */
697 	ip++;
698 	forwardH = LZ4_HASH64K_VALUE(ip);
699 
700 	/* Main Loop */
701 	for (;;) {
702 		int findMatchAttempts = (1U << skipStrength) + 3;
703 		const BYTE *forwardIp = ip;
704 		const BYTE *ref;
705 		BYTE *token;
706 
707 		/* Find a match */
708 		do {
709 			U32 h = forwardH;
710 			int step = findMatchAttempts++ >> skipStrength;
711 			ip = forwardIp;
712 			forwardIp = ip + step;
713 
714 			if (forwardIp > mflimit) {
715 				goto _last_literals;
716 			}
717 
718 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
719 			ref = base + HashTable[h];
720 			HashTable[h] = ip - base;
721 
722 		} while (A32(ref) != A32(ip));
723 
724 		/* Catch up */
725 		while ((ip > anchor) && (ref > (const BYTE *) source) &&
726 		    (ip[-1] == ref[-1])) {
727 			ip--;
728 			ref--;
729 		}
730 
731 		/* Encode Literal length */
732 		length = ip - anchor;
733 		token = op++;
734 
735 		/* Check output limit */
736 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
737 		    (length >> 8) > oend)
738 			return (0);
739 
740 		if (length >= (int)RUN_MASK) {
741 			*token = (RUN_MASK << ML_BITS);
742 			len = length - RUN_MASK;
743 			for (; len > 254; len -= 255)
744 				*op++ = 255;
745 			*op++ = (BYTE)len;
746 		} else
747 			*token = (length << ML_BITS);
748 
749 		/* Copy Literals */
750 		LZ4_BLINDCOPY(anchor, op, length);
751 
752 		_next_match:
753 		/* Encode Offset */
754 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
755 
756 		/* Start Counting */
757 		ip += MINMATCH;
758 		ref += MINMATCH;	/* MinMatch verified */
759 		anchor = ip;
760 		while (ip < matchlimit - (STEPSIZE - 1)) {
761 			UARCH diff = AARCH(ref) ^ AARCH(ip);
762 			if (!diff) {
763 				ip += STEPSIZE;
764 				ref += STEPSIZE;
765 				continue;
766 			}
767 			ip += LZ4_NbCommonBytes(diff);
768 			goto _endCount;
769 		}
770 #if LZ4_ARCH64
771 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
772 			ip += 4;
773 			ref += 4;
774 		}
775 #endif
776 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
777 			ip += 2;
778 			ref += 2;
779 		}
780 		if ((ip < matchlimit) && (*ref == *ip))
781 			ip++;
782 		_endCount:
783 
784 		/* Encode MatchLength */
785 		len = (ip - anchor);
786 		/* Check output limit */
787 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
788 			return (0);
789 		if (len >= (int)ML_MASK) {
790 			*token += ML_MASK;
791 			len -= ML_MASK;
792 			for (; len > 509; len -= 510) {
793 				*op++ = 255;
794 				*op++ = 255;
795 			}
796 			if (len > 254) {
797 				len -= 255;
798 				*op++ = 255;
799 			}
800 			*op++ = (BYTE)len;
801 		} else
802 			*token += len;
803 
804 		/* Test end of chunk */
805 		if (ip > mflimit) {
806 			anchor = ip;
807 			break;
808 		}
809 		/* Fill table */
810 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
811 
812 		/* Test next position */
813 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
814 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
815 		if (A32(ref) == A32(ip)) {
816 			token = op++;
817 			*token = 0;
818 			goto _next_match;
819 		}
820 		/* Prepare next loop */
821 		anchor = ip++;
822 		forwardH = LZ4_HASH64K_VALUE(ip);
823 	}
824 
825 	_last_literals:
826 	/* Encode Last Literals */
827 	{
828 		int lastRun = iend - anchor;
829 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
830 		    oend)
831 			return (0);
832 		if (lastRun >= (int)RUN_MASK) {
833 			*op++ = (RUN_MASK << ML_BITS);
834 			lastRun -= RUN_MASK;
835 			for (; lastRun > 254; lastRun -= 255)
836 				*op++ = 255;
837 			*op++ = (BYTE)lastRun;
838 		} else
839 			*op++ = (lastRun << ML_BITS);
840 		(void) memcpy(op, anchor, iend - anchor);
841 		op += iend - anchor;
842 	}
843 
844 	/* End */
845 	return (int)(((char *)op) - dest);
846 }
847 
848 static int
849 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
850 {
851 #if HEAPMODE
852 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
853 	void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP);
854 #else
855 	void *ctx = calloc(1, sizeof(struct refTables));
856 #endif
857 	int result;
858 
859 	/*
860 	 * out of kernel memory, gently fall through - this will disable
861 	 * compression in zio_compress_data
862 	 */
863 	if (ctx == NULL)
864 		return (0);
865 
866 	bzero(ctx, sizeof(struct refTables));
867 	if (isize < LZ4_64KLIMIT)
868 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
869 	else
870 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
871 
872 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
873 	kmem_cache_free(lz4_ctx_cache, ctx);
874 #else
875 	free(ctx);
876 #endif
877 	return (result);
878 #else
879 	if (isize < (int)LZ4_64KLIMIT)
880 		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
881 	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
882 #endif
883 }
884 
885 /* Decompression functions */
886 
887 /*
888  * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
889  *	against "buffer overflow" attack type. It will never write nor
890  *	read outside of the provided output buffers.
891  *	LZ4_uncompress_unknownOutputSize() also insures that it will never
892  *	read outside of the input buffer.  A corrupted input will produce
893  *	an error result, a negative int, indicating the position of the
894  *	error within input stream.
895  */
896 
897 static int
898 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
899     int maxOutputSize)
900 {
901 	/* Local Variables */
902 	const BYTE *restrict ip = (const BYTE *) source;
903 	const BYTE *const iend = ip + isize;
904 	const BYTE *ref;
905 
906 	BYTE *op = (BYTE *) dest;
907 	BYTE *const oend = op + maxOutputSize;
908 	BYTE *cpy;
909 
910 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
911 #if LZ4_ARCH64
912 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
913 #endif
914 
915 	/* Main Loop */
916 	while (ip < iend) {
917 		unsigned token;
918 		size_t length;
919 
920 		/* get runlength */
921 		token = *ip++;
922 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
923 			int s = 255;
924 			while ((ip < iend) && (s == 255)) {
925 				s = *ip++;
926 				length += s;
927 			}
928 		}
929 		/* copy literals */
930 		cpy = op + length;
931 		/* CORNER-CASE: cpy might overflow. */
932 		if (cpy < op)
933 			goto _output_error;	/* cpy was overflowed, bail! */
934 		if ((cpy > oend - COPYLENGTH) ||
935 		    (ip + length > iend - COPYLENGTH)) {
936 			if (cpy > oend)
937 				/* Error: writes beyond output buffer */
938 				goto _output_error;
939 			if (ip + length != iend)
940 				/*
941 				 * Error: LZ4 format requires to consume all
942 				 * input at this stage
943 				 */
944 				goto _output_error;
945 			(void) memcpy(op, ip, length);
946 			op += length;
947 			/* Necessarily EOF, due to parsing restrictions */
948 			break;
949 		}
950 		LZ4_WILDCOPY(ip, op, cpy);
951 		ip -= (op - cpy);
952 		op = cpy;
953 
954 		/* get offset */
955 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
956 		ip += 2;
957 		if (ref < (BYTE * const) dest)
958 			/*
959 			 * Error: offset creates reference outside of
960 			 * destination buffer
961 			 */
962 			goto _output_error;
963 
964 		/* get matchlength */
965 		if ((length = (token & ML_MASK)) == ML_MASK) {
966 			while (ip < iend) {
967 				int s = *ip++;
968 				length += s;
969 				if (s == 255)
970 					continue;
971 				break;
972 			}
973 		}
974 		/* copy repeated sequence */
975 		if unlikely(op - ref < STEPSIZE) {
976 #if LZ4_ARCH64
977 			size_t dec64 = dec64table[op-ref];
978 #else
979 			const int dec64 = 0;
980 #endif
981 			op[0] = ref[0];
982 			op[1] = ref[1];
983 			op[2] = ref[2];
984 			op[3] = ref[3];
985 			op += 4;
986 			ref += 4;
987 			ref -= dec32table[op-ref];
988 			A32(op) = A32(ref);
989 			op += STEPSIZE - 4;
990 			ref -= dec64;
991 		} else {
992 			LZ4_COPYSTEP(ref, op);
993 		}
994 		cpy = op + length - (STEPSIZE - 4);
995 		if (cpy > oend - COPYLENGTH) {
996 			if (cpy > oend)
997 				/*
998 				 * Error: request to write outside of
999 				 * destination buffer
1000 				 */
1001 				goto _output_error;
1002 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1003 			while (op < cpy)
1004 				*op++ = *ref++;
1005 			op = cpy;
1006 			if (op == oend)
1007 				/*
1008 				 * Check EOF (should never happen, since
1009 				 * last 5 bytes are supposed to be literals)
1010 				 */
1011 				goto _output_error;
1012 			continue;
1013 		}
1014 		LZ4_SECURECOPY(ref, op, cpy);
1015 		op = cpy;	/* correction */
1016 	}
1017 
1018 	/* end of decoding */
1019 	return (int)(((char *)op) - dest);
1020 
1021 	/* write overflow error detected */
1022 	_output_error:
1023 	return (int)(-(((const char *)ip) - source));
1024 }
1025 
1026 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
1027 void
1028 lz4_init(void)
1029 {
1030 
1031 #if HEAPMODE
1032 	lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables),
1033 	    0, NULL, NULL, NULL, NULL, NULL, 0);
1034 #endif
1035 }
1036 
1037 void
1038 lz4_fini(void)
1039 {
1040 
1041 #if HEAPMODE
1042 	kmem_cache_destroy(lz4_ctx_cache);
1043 #endif
1044 }
1045 #endif /* _KERNEL || _FAKE_KERNEL */
1046