xref: /freebsd/sys/cddl/contrib/opensolaris/common/lz4/lz4.c (revision f5b7695d2d5abd735064870ad43f4b9c723940c1)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 /*
35  * Copyright (c) 2016 by Delphix. All rights reserved.
36  */
37 
38 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
39 #include <sys/zfs_context.h>
40 #elif defined(_STANDALONE)
41 #include <sys/cdefs.h>
42 #include <stand.h>
43 #include <sys/types.h>
44 #include <sys/endian.h>
45 #include <assert.h>
46 
47 #define	ASSERT	assert
48 #else
49 #include <string.h>
50 #include <stdlib.h>
51 #include <sys/types.h>
52 #include <netinet/in.h>
53 #include <assert.h>
54 
55 #define	ASSERT	assert
56 #endif
57 #include <lz4.h>
58 
59 static int real_LZ4_compress(const char *source, char *dest, int isize,
60     int osize);
61 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
62     int isize, int maxOutputSize);
63 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
64     int isize, int osize);
65 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
66     int isize, int osize);
67 
68 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
69 static kmem_cache_t *lz4_ctx_cache;
70 #endif
71 
72 size_t
73 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
74     int n __unused)
75 {
76 	uint32_t bufsiz;
77 	char *dest = d_start;
78 
79 	ASSERT(d_len >= sizeof (bufsiz));
80 
81 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
82 	    d_len - sizeof (bufsiz));
83 
84 	/* Signal an error if the compression routine returned zero. */
85 	if (bufsiz == 0)
86 		return (s_len);
87 
88 	/*
89 	 * Encode the compresed buffer size at the start. We'll need this in
90 	 * decompression to counter the effects of padding which might be
91 	 * added to the compressed buffer and which, if unhandled, would
92 	 * confuse the hell out of our decompression function.
93 	 */
94 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
95 	*(uint32_t *)(void *)dest = BE_32(bufsiz);
96 #else
97 	*(uint32_t *)(void *)dest = htonl(bufsiz);
98 #endif
99 
100 	return (bufsiz + sizeof (bufsiz));
101 }
102 
103 int
104 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len,
105     int n __unused)
106 {
107 	const char *src = s_start;
108 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
109 	uint32_t bufsiz = BE_IN32(s_start);
110 #else
111 	uint32_t bufsiz = htonl(*(uint32_t *)s_start);
112 #endif
113 
114 	/* invalid compressed buffer size encoded at start */
115 	if (bufsiz + sizeof (bufsiz) > s_len)
116 		return (1);
117 
118 	/*
119 	 * Returns 0 on success (decompression function returned non-negative)
120 	 * and non-zero on failure (decompression function returned negative).
121 	 */
122 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
123 	    d_start, bufsiz, d_len) < 0);
124 }
125 
126 /*
127  * LZ4 API Description:
128  *
129  * Simple Functions:
130  * real_LZ4_compress() :
131  * 	isize  : is the input size. Max supported value is ~1.9GB
132  * 	return : the number of bytes written in buffer dest
133  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
134  * 	note : destination buffer must be already allocated.
135  * 		destination buffer must be sized to handle worst cases
136  * 		situations (input data not compressible).
137  *
138  * Advanced Functions
139  *
140  * LZ4_uncompress_unknownOutputSize() :
141  * 	isize  : is the input size, therefore the compressed size
142  * 	maxOutputSize : is the size of the destination buffer (which must be
143  * 		already allocated)
144  * 	return : the number of bytes decoded in the destination buffer
145  * 		(necessarily <= maxOutputSize). If the source stream is
146  * 		malformed, the function will stop decoding and return a
147  * 		negative result, indicating the byte position of the faulty
148  * 		instruction. This function never writes beyond dest +
149  * 		maxOutputSize, and is therefore protected against malicious
150  * 		data packets.
151  * 	note   : Destination buffer must be already allocated.
152  *
153  * LZ4_compressCtx() :
154  * 	This function explicitly handles the CTX memory structure.
155  *
156  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
157  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
158  * 	isn't valid.
159  *
160  * LZ4_compress64kCtx() :
161  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
162  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
163  *
164  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
165  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
166  * 	isn't valid.
167  */
168 
169 /*
170  * Tuning parameters
171  */
172 
173 /*
174  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
175  *	 Lowering this value reduces memory usage. Reduced memory usage
176  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
177  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
178  *	(examples : 12 -> 16KB ; 17 -> 512KB)
179  */
180 #define	COMPRESSIONLEVEL 12
181 
182 /*
183  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
184  *	algorithm skip faster data segments considered "incompressible".
185  *	This may decrease compression ratio dramatically, but will be
186  *	faster on incompressible data. Increasing this value will make
187  *	the algorithm search more before declaring a segment "incompressible".
188  *	This could improve compression a bit, but will be slower on
189  *	incompressible data. The default value (6) is recommended.
190  */
191 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
192 
193 /*
194  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
195  * performance for big endian cpu, but the resulting compressed stream
196  * will be incompatible with little-endian CPU. You can set this option
197  * to 1 in situations where data will stay within closed environment.
198  * This option is useless on Little_Endian CPU (such as x86).
199  */
200 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
201 
202 /*
203  * CPU Feature Detection
204  */
205 
206 /* 32 or 64 bits ? */
207 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
208     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
209     defined(__LP64__) || defined(_LP64))
210 #define	LZ4_ARCH64 1
211 #else
212 #define	LZ4_ARCH64 0
213 #endif
214 
215 /*
216  * Limits the amount of stack space that the algorithm may consume to hold
217  * the compression lookup table. The value `9' here means we'll never use
218  * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
219  * If more memory is needed, it is allocated from the heap.
220  */
221 /* FreeBSD: Use heap for all platforms for now */
222 #define	STACKLIMIT 0
223 
224 /*
225  * Little Endian or Big Endian?
226  * Note: overwrite the below #define if you know your architecture endianess.
227  */
228 #if BYTE_ORDER == BIG_ENDIAN
229 #define	LZ4_BIG_ENDIAN 1
230 #else
231 /*
232  * Little Endian assumed. PDP Endian and other very rare endian format
233  * are unsupported.
234  */
235 #endif
236 
237 /*
238  * Unaligned memory access is automatically enabled for "common" CPU,
239  * such as x86. For others CPU, the compiler will be more cautious, and
240  * insert extra code to ensure aligned access is respected. If you know
241  * your target CPU supports unaligned memory access, you may want to
242  * force this option manually to improve performance
243  */
244 #if defined(__ARM_FEATURE_UNALIGNED)
245 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
246 #endif
247 
248 /*
249  * Compiler Options
250  */
251 #if __STDC_VERSION__ >= 199901L	/* C99 */
252 /* "restrict" is a known keyword */
253 #else
254 /* Disable restrict */
255 #define	restrict
256 #endif
257 
258 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
259 	(((x) & 0xffu) << 8)))
260 
261 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
262 
263 #if defined(likely)
264 #undef likely
265 #endif
266 #if defined(unlikely)
267 #undef unlikely
268 #endif
269 
270 #ifndef likely
271 #define	likely(expr)	expect((expr) != 0, 1)
272 #endif
273 
274 #ifndef unlikely
275 #define	unlikely(expr)	expect((expr) != 0, 0)
276 #endif
277 
278 /* Basic types */
279 #define	BYTE	uint8_t
280 #define	U16	uint16_t
281 #define	U32	uint32_t
282 #define	S32	int32_t
283 #define	U64	uint64_t
284 
285 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
286 #pragma pack(1)
287 #endif
288 
289 typedef struct _U16_S {
290 	U16 v;
291 } U16_S;
292 typedef struct _U32_S {
293 	U32 v;
294 } U32_S;
295 typedef struct _U64_S {
296 	U64 v;
297 } U64_S;
298 
299 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
300 #pragma pack()
301 #endif
302 
303 #define	A64(x) (((U64_S *)(__DECONST(void *, x)))->v)
304 #define	A32(x) (((U32_S *)(__DECONST(void *, x)))->v)
305 #define	A16(x) (((U16_S *)(__DECONST(void *, x)))->v)
306 
307 /*
308  * Constants
309  */
310 #define	MINMATCH 4
311 
312 #define	HASH_LOG COMPRESSIONLEVEL
313 #define	HASHTABLESIZE (1 << HASH_LOG)
314 #define	HASH_MASK (HASHTABLESIZE - 1)
315 
316 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
317 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
318 
319 /*
320  * Defines if memory is allocated into the stack (local variable),
321  * or into the heap (kmem_alloc()).
322  */
323 #define	HEAPMODE (HASH_LOG > STACKLIMIT)
324 #define	COPYLENGTH 8
325 #define	LASTLITERALS 5
326 #define	MFLIMIT (COPYLENGTH + MINMATCH)
327 #define	MINLENGTH (MFLIMIT + 1)
328 
329 #define	MAXD_LOG 16
330 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
331 
332 #define	ML_BITS 4
333 #define	ML_MASK ((1U<<ML_BITS)-1)
334 #define	RUN_BITS (8-ML_BITS)
335 #define	RUN_MASK ((1U<<RUN_BITS)-1)
336 
337 
338 /*
339  * Architecture-specific macros
340  */
341 #if LZ4_ARCH64
342 #define	STEPSIZE 8
343 #define	UARCH U64
344 #define	AARCH A64
345 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
346 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
347 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
348 #define	HTYPE U32
349 #define	INITBASE(base)		const BYTE* const base = ip
350 #else /* !LZ4_ARCH64 */
351 #define	STEPSIZE 4
352 #define	UARCH U32
353 #define	AARCH A32
354 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
355 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
356 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
357 #define	HTYPE const BYTE *
358 #define	INITBASE(base)		const int base = 0
359 #endif /* !LZ4_ARCH64 */
360 
361 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
362 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
363 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
364 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
365 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
366 #else
367 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
368 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
369 #endif
370 
371 
372 /* Local structures */
373 struct refTables {
374 	HTYPE hashTable[HASHTABLESIZE];
375 };
376 
377 
378 /* Macros */
379 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
380 	HASH_LOG))
381 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
382 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
383 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
384 	d = e; }
385 
386 
387 /* Private functions */
388 #if LZ4_ARCH64
389 
390 static inline int
391 LZ4_NbCommonBytes(register U64 val)
392 {
393 #if defined(LZ4_BIG_ENDIAN)
394 #if !defined(LZ4_FORCE_SW_BITCOUNT)
395 	return (__builtin_clzll(val) >> 3);
396 #else
397 	int r;
398 	if (!(val >> 32)) {
399 		r = 4;
400 	} else {
401 		r = 0;
402 		val >>= 32;
403 	}
404 	if (!(val >> 16)) {
405 		r += 2;
406 		val >>= 8;
407 	} else {
408 		val >>= 24;
409 	}
410 	r += (!val);
411 	return (r);
412 #endif
413 #else
414 #if !defined(LZ4_FORCE_SW_BITCOUNT)
415 	return (__builtin_ctzll(val) >> 3);
416 #else
417 	static const int DeBruijnBytePos[64] =
418 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
419 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
420 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
421 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
422 	};
423 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
424 	    58];
425 #endif
426 #endif
427 }
428 
429 #else
430 
431 static inline int
432 LZ4_NbCommonBytes(register U32 val)
433 {
434 #if defined(LZ4_BIG_ENDIAN)
435 #if !defined(LZ4_FORCE_SW_BITCOUNT)
436 	return (__builtin_clz(val) >> 3);
437 #else
438 	int r;
439 	if (!(val >> 16)) {
440 		r = 2;
441 		val >>= 8;
442 	} else {
443 		r = 0;
444 		val >>= 24;
445 	}
446 	r += (!val);
447 	return (r);
448 #endif
449 #else
450 #if !defined(LZ4_FORCE_SW_BITCOUNT)
451 	return (__builtin_ctz(val) >> 3);
452 #else
453 	static const int DeBruijnBytePos[32] = {
454 		0, 0, 3, 0, 3, 1, 3, 0,
455 		3, 2, 2, 1, 3, 2, 0, 1,
456 		3, 3, 1, 2, 2, 2, 2, 0,
457 		3, 1, 2, 0, 1, 0, 1, 1
458 	};
459 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
460 	    27];
461 #endif
462 #endif
463 }
464 
465 #endif
466 
467 /* Compression functions */
468 
469 /*ARGSUSED*/
470 static int
471 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
472     int osize)
473 {
474 #if HEAPMODE
475 	struct refTables *srt = (struct refTables *)ctx;
476 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
477 #else
478 	HTYPE HashTable[HASHTABLESIZE] = { 0 };
479 #endif
480 
481 	const BYTE *ip = (const BYTE *) source;
482 	INITBASE(base);
483 	const BYTE *anchor = ip;
484 	const BYTE *const iend = ip + isize;
485 	const BYTE *const oend = (BYTE *) dest + osize;
486 	const BYTE *const mflimit = iend - MFLIMIT;
487 #define	matchlimit (iend - LASTLITERALS)
488 
489 	BYTE *op = (BYTE *) dest;
490 
491 	int len, length;
492 	const int skipStrength = SKIPSTRENGTH;
493 	U32 forwardH;
494 
495 
496 	/* Init */
497 	if (isize < MINLENGTH)
498 		goto _last_literals;
499 
500 	/* First Byte */
501 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
502 	ip++;
503 	forwardH = LZ4_HASH_VALUE(ip);
504 
505 	/* Main Loop */
506 	for (;;) {
507 		int findMatchAttempts = (1U << skipStrength) + 3;
508 		const BYTE *forwardIp = ip;
509 		const BYTE *ref;
510 		BYTE *token;
511 
512 		/* Find a match */
513 		do {
514 			U32 h = forwardH;
515 			int step = findMatchAttempts++ >> skipStrength;
516 			ip = forwardIp;
517 			forwardIp = ip + step;
518 
519 			if unlikely(forwardIp > mflimit) {
520 				goto _last_literals;
521 			}
522 
523 			forwardH = LZ4_HASH_VALUE(forwardIp);
524 			ref = base + HashTable[h];
525 			HashTable[h] = ip - base;
526 
527 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
528 
529 		/* Catch up */
530 		while ((ip > anchor) && (ref > (const BYTE *) source) &&
531 		    unlikely(ip[-1] == ref[-1])) {
532 			ip--;
533 			ref--;
534 		}
535 
536 		/* Encode Literal length */
537 		length = ip - anchor;
538 		token = op++;
539 
540 		/* Check output limit */
541 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
542 		    (length >> 8) > oend)
543 			return (0);
544 
545 		if (length >= (int)RUN_MASK) {
546 			*token = (RUN_MASK << ML_BITS);
547 			len = length - RUN_MASK;
548 			for (; len > 254; len -= 255)
549 				*op++ = 255;
550 			*op++ = (BYTE)len;
551 		} else
552 			*token = (length << ML_BITS);
553 
554 		/* Copy Literals */
555 		LZ4_BLINDCOPY(anchor, op, length);
556 
557 		_next_match:
558 		/* Encode Offset */
559 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
560 
561 		/* Start Counting */
562 		ip += MINMATCH;
563 		ref += MINMATCH;	/* MinMatch verified */
564 		anchor = ip;
565 		while likely(ip < matchlimit - (STEPSIZE - 1)) {
566 			UARCH diff = AARCH(ref) ^ AARCH(ip);
567 			if (!diff) {
568 				ip += STEPSIZE;
569 				ref += STEPSIZE;
570 				continue;
571 			}
572 			ip += LZ4_NbCommonBytes(diff);
573 			goto _endCount;
574 		}
575 #if LZ4_ARCH64
576 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
577 			ip += 4;
578 			ref += 4;
579 		}
580 #endif
581 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
582 			ip += 2;
583 			ref += 2;
584 		}
585 		if ((ip < matchlimit) && (*ref == *ip))
586 			ip++;
587 		_endCount:
588 
589 		/* Encode MatchLength */
590 		len = (ip - anchor);
591 		/* Check output limit */
592 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
593 			return (0);
594 		if (len >= (int)ML_MASK) {
595 			*token += ML_MASK;
596 			len -= ML_MASK;
597 			for (; len > 509; len -= 510) {
598 				*op++ = 255;
599 				*op++ = 255;
600 			}
601 			if (len > 254) {
602 				len -= 255;
603 				*op++ = 255;
604 			}
605 			*op++ = (BYTE)len;
606 		} else
607 			*token += len;
608 
609 		/* Test end of chunk */
610 		if (ip > mflimit) {
611 			anchor = ip;
612 			break;
613 		}
614 		/* Fill table */
615 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
616 
617 		/* Test next position */
618 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
619 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
620 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
621 			token = op++;
622 			*token = 0;
623 			goto _next_match;
624 		}
625 		/* Prepare next loop */
626 		anchor = ip++;
627 		forwardH = LZ4_HASH_VALUE(ip);
628 	}
629 
630 	_last_literals:
631 	/* Encode Last Literals */
632 	{
633 		int lastRun = iend - anchor;
634 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
635 		    oend)
636 			return (0);
637 		if (lastRun >= (int)RUN_MASK) {
638 			*op++ = (RUN_MASK << ML_BITS);
639 			lastRun -= RUN_MASK;
640 			for (; lastRun > 254; lastRun -= 255) {
641 				*op++ = 255;
642 			}
643 			*op++ = (BYTE)lastRun;
644 		} else
645 			*op++ = (lastRun << ML_BITS);
646 		(void) memcpy(op, anchor, iend - anchor);
647 		op += iend - anchor;
648 	}
649 
650 	/* End */
651 	return (int)(((char *)op) - dest);
652 }
653 
654 
655 
656 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
657 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
658 #define	HASHLOG64K (HASH_LOG + 1)
659 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
660 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
661 	HASHLOG64K))
662 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
663 
664 /*ARGSUSED*/
665 static int
666 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
667     int osize)
668 {
669 #if HEAPMODE
670 	struct refTables *srt = (struct refTables *)ctx;
671 	U16 *HashTable = (U16 *) (srt->hashTable);
672 #else
673 	U16 HashTable[HASH64KTABLESIZE] = { 0 };
674 #endif
675 
676 	const BYTE *ip = (const BYTE *) source;
677 	const BYTE *anchor = ip;
678 	const BYTE *const base = ip;
679 	const BYTE *const iend = ip + isize;
680 	const BYTE *const oend = (BYTE *) dest + osize;
681 	const BYTE *const mflimit = iend - MFLIMIT;
682 #define	matchlimit (iend - LASTLITERALS)
683 
684 	BYTE *op = (BYTE *) dest;
685 
686 	int len, length;
687 	const int skipStrength = SKIPSTRENGTH;
688 	U32 forwardH;
689 
690 	/* Init */
691 	if (isize < MINLENGTH)
692 		goto _last_literals;
693 
694 	/* First Byte */
695 	ip++;
696 	forwardH = LZ4_HASH64K_VALUE(ip);
697 
698 	/* Main Loop */
699 	for (;;) {
700 		int findMatchAttempts = (1U << skipStrength) + 3;
701 		const BYTE *forwardIp = ip;
702 		const BYTE *ref;
703 		BYTE *token;
704 
705 		/* Find a match */
706 		do {
707 			U32 h = forwardH;
708 			int step = findMatchAttempts++ >> skipStrength;
709 			ip = forwardIp;
710 			forwardIp = ip + step;
711 
712 			if (forwardIp > mflimit) {
713 				goto _last_literals;
714 			}
715 
716 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
717 			ref = base + HashTable[h];
718 			HashTable[h] = ip - base;
719 
720 		} while (A32(ref) != A32(ip));
721 
722 		/* Catch up */
723 		while ((ip > anchor) && (ref > (const BYTE *) source) &&
724 		    (ip[-1] == ref[-1])) {
725 			ip--;
726 			ref--;
727 		}
728 
729 		/* Encode Literal length */
730 		length = ip - anchor;
731 		token = op++;
732 
733 		/* Check output limit */
734 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
735 		    (length >> 8) > oend)
736 			return (0);
737 
738 		if (length >= (int)RUN_MASK) {
739 			*token = (RUN_MASK << ML_BITS);
740 			len = length - RUN_MASK;
741 			for (; len > 254; len -= 255)
742 				*op++ = 255;
743 			*op++ = (BYTE)len;
744 		} else
745 			*token = (length << ML_BITS);
746 
747 		/* Copy Literals */
748 		LZ4_BLINDCOPY(anchor, op, length);
749 
750 		_next_match:
751 		/* Encode Offset */
752 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
753 
754 		/* Start Counting */
755 		ip += MINMATCH;
756 		ref += MINMATCH;	/* MinMatch verified */
757 		anchor = ip;
758 		while (ip < matchlimit - (STEPSIZE - 1)) {
759 			UARCH diff = AARCH(ref) ^ AARCH(ip);
760 			if (!diff) {
761 				ip += STEPSIZE;
762 				ref += STEPSIZE;
763 				continue;
764 			}
765 			ip += LZ4_NbCommonBytes(diff);
766 			goto _endCount;
767 		}
768 #if LZ4_ARCH64
769 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
770 			ip += 4;
771 			ref += 4;
772 		}
773 #endif
774 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
775 			ip += 2;
776 			ref += 2;
777 		}
778 		if ((ip < matchlimit) && (*ref == *ip))
779 			ip++;
780 		_endCount:
781 
782 		/* Encode MatchLength */
783 		len = (ip - anchor);
784 		/* Check output limit */
785 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
786 			return (0);
787 		if (len >= (int)ML_MASK) {
788 			*token += ML_MASK;
789 			len -= ML_MASK;
790 			for (; len > 509; len -= 510) {
791 				*op++ = 255;
792 				*op++ = 255;
793 			}
794 			if (len > 254) {
795 				len -= 255;
796 				*op++ = 255;
797 			}
798 			*op++ = (BYTE)len;
799 		} else
800 			*token += len;
801 
802 		/* Test end of chunk */
803 		if (ip > mflimit) {
804 			anchor = ip;
805 			break;
806 		}
807 		/* Fill table */
808 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
809 
810 		/* Test next position */
811 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
812 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
813 		if (A32(ref) == A32(ip)) {
814 			token = op++;
815 			*token = 0;
816 			goto _next_match;
817 		}
818 		/* Prepare next loop */
819 		anchor = ip++;
820 		forwardH = LZ4_HASH64K_VALUE(ip);
821 	}
822 
823 	_last_literals:
824 	/* Encode Last Literals */
825 	{
826 		int lastRun = iend - anchor;
827 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
828 		    oend)
829 			return (0);
830 		if (lastRun >= (int)RUN_MASK) {
831 			*op++ = (RUN_MASK << ML_BITS);
832 			lastRun -= RUN_MASK;
833 			for (; lastRun > 254; lastRun -= 255)
834 				*op++ = 255;
835 			*op++ = (BYTE)lastRun;
836 		} else
837 			*op++ = (lastRun << ML_BITS);
838 		(void) memcpy(op, anchor, iend - anchor);
839 		op += iend - anchor;
840 	}
841 
842 	/* End */
843 	return (int)(((char *)op) - dest);
844 }
845 
846 static int
847 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
848 {
849 #if HEAPMODE
850 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
851 	void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP);
852 #else
853 	void *ctx = malloc(sizeof(struct refTables));
854 #endif
855 	int result;
856 
857 	/*
858 	 * out of kernel memory, gently fall through - this will disable
859 	 * compression in zio_compress_data
860 	 */
861 	if (ctx == NULL)
862 		return (0);
863 
864 	bzero(ctx, sizeof(struct refTables));
865 	if (isize < LZ4_64KLIMIT)
866 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
867 	else
868 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
869 
870 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
871 	kmem_cache_free(lz4_ctx_cache, ctx);
872 #else
873 	free(ctx);
874 #endif
875 	return (result);
876 #else
877 	if (isize < (int)LZ4_64KLIMIT)
878 		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
879 	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
880 #endif
881 }
882 
883 /* Decompression functions */
884 
885 /*
886  * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
887  *	against "buffer overflow" attack type. It will never write nor
888  *	read outside of the provided output buffers.
889  *	LZ4_uncompress_unknownOutputSize() also insures that it will never
890  *	read outside of the input buffer.  A corrupted input will produce
891  *	an error result, a negative int, indicating the position of the
892  *	error within input stream.
893  */
894 
895 static int
896 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
897     int maxOutputSize)
898 {
899 	/* Local Variables */
900 	const BYTE *restrict ip = (const BYTE *) source;
901 	const BYTE *const iend = ip + isize;
902 	const BYTE *ref;
903 
904 	BYTE *op = (BYTE *) dest;
905 	BYTE *const oend = op + maxOutputSize;
906 	BYTE *cpy;
907 
908 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
909 #if LZ4_ARCH64
910 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
911 #endif
912 
913 	/* Main Loop */
914 	while (ip < iend) {
915 		unsigned token;
916 		size_t length;
917 
918 		/* get runlength */
919 		token = *ip++;
920 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
921 			int s = 255;
922 			while ((ip < iend) && (s == 255)) {
923 				s = *ip++;
924 				length += s;
925 			}
926 		}
927 		/* copy literals */
928 		cpy = op + length;
929 		/* CORNER-CASE: cpy might overflow. */
930 		if (cpy < op)
931 			goto _output_error;	/* cpy was overflowed, bail! */
932 		if ((cpy > oend - COPYLENGTH) ||
933 		    (ip + length > iend - COPYLENGTH)) {
934 			if (cpy > oend)
935 				/* Error: writes beyond output buffer */
936 				goto _output_error;
937 			if (ip + length != iend)
938 				/*
939 				 * Error: LZ4 format requires to consume all
940 				 * input at this stage
941 				 */
942 				goto _output_error;
943 			(void) memcpy(op, ip, length);
944 			op += length;
945 			/* Necessarily EOF, due to parsing restrictions */
946 			break;
947 		}
948 		LZ4_WILDCOPY(ip, op, cpy);
949 		ip -= (op - cpy);
950 		op = cpy;
951 
952 		/* get offset */
953 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
954 		ip += 2;
955 		if (ref < (BYTE * const) dest)
956 			/*
957 			 * Error: offset creates reference outside of
958 			 * destination buffer
959 			 */
960 			goto _output_error;
961 
962 		/* get matchlength */
963 		if ((length = (token & ML_MASK)) == ML_MASK) {
964 			while (ip < iend) {
965 				int s = *ip++;
966 				length += s;
967 				if (s == 255)
968 					continue;
969 				break;
970 			}
971 		}
972 		/* copy repeated sequence */
973 		if unlikely(op - ref < STEPSIZE) {
974 #if LZ4_ARCH64
975 			size_t dec64 = dec64table[op-ref];
976 #else
977 			const int dec64 = 0;
978 #endif
979 			op[0] = ref[0];
980 			op[1] = ref[1];
981 			op[2] = ref[2];
982 			op[3] = ref[3];
983 			op += 4;
984 			ref += 4;
985 			ref -= dec32table[op-ref];
986 			A32(op) = A32(ref);
987 			op += STEPSIZE - 4;
988 			ref -= dec64;
989 		} else {
990 			LZ4_COPYSTEP(ref, op);
991 		}
992 		cpy = op + length - (STEPSIZE - 4);
993 		if (cpy > oend - COPYLENGTH) {
994 			if (cpy > oend)
995 				/*
996 				 * Error: request to write outside of
997 				 * destination buffer
998 				 */
999 				goto _output_error;
1000 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1001 			while (op < cpy)
1002 				*op++ = *ref++;
1003 			op = cpy;
1004 			if (op == oend)
1005 				/*
1006 				 * Check EOF (should never happen, since
1007 				 * last 5 bytes are supposed to be literals)
1008 				 */
1009 				goto _output_error;
1010 			continue;
1011 		}
1012 		LZ4_SECURECOPY(ref, op, cpy);
1013 		op = cpy;	/* correction */
1014 	}
1015 
1016 	/* end of decoding */
1017 	return (int)(((char *)op) - dest);
1018 
1019 	/* write overflow error detected */
1020 	_output_error:
1021 	return (int)(-(((const char *)ip) - source));
1022 }
1023 
1024 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
1025 extern void
1026 lz4_init(void)
1027 {
1028 
1029 #if HEAPMODE
1030 	lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables),
1031 	    0, NULL, NULL, NULL, NULL, NULL, 0);
1032 #endif
1033 }
1034 
1035 extern void
1036 lz4_fini(void)
1037 {
1038 
1039 #if HEAPMODE
1040 	kmem_cache_destroy(lz4_ctx_cache);
1041 #endif
1042 }
1043 #endif /* _KERNEL || _FAKE_KERNEL */
1044