xref: /titanic_53/usr/src/common/ficl/softcore/lz4.c (revision a1bf3f785ae05c419b339c3a2061f2b18c024f61)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 
35 #include <sys/types.h>
36 #include <sys/byteorder.h>
37 #include <assert.h>
38 #include <string.h>
39 #include <umem.h>
40 
41 size_t lz4_compress(void *, void *, size_t, size_t, int);
42 int lz4_decompress(void *, void *, size_t, size_t, int);
43 static int real_LZ4_compress(const char *source, char *dest, int isize,
44     int osize);
45 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
46     int isize, int maxOutputSize);
47 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
48     int isize, int osize);
49 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
50     int isize, int osize);
51 
52 /*ARGSUSED*/
53 size_t
lz4_compress(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)54 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
55 {
56 	uint32_t bufsiz;
57 	char *dest = d_start;
58 
59 	assert(d_len >= sizeof (bufsiz));
60 
61 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
62 	    d_len - sizeof (bufsiz));
63 
64 	/* Signal an error if the compression routine returned zero. */
65 	if (bufsiz == 0)
66 		return (s_len);
67 
68 	/*
69 	 * Encode the compresed buffer size at the start. We'll need this in
70 	 * decompression to counter the effects of padding which might be
71 	 * added to the compressed buffer and which, if unhandled, would
72 	 * confuse the hell out of our decompression function.
73 	 */
74 	*(uint32_t *)dest = BE_32(bufsiz);
75 
76 	return (bufsiz + sizeof (bufsiz));
77 }
78 
79 /*ARGSUSED*/
80 int
lz4_decompress(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)81 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
82 {
83 	const char *src = s_start;
84 	uint32_t bufsiz = BE_IN32(src);
85 
86 	/* invalid compressed buffer size encoded at start */
87 	if (bufsiz + sizeof (bufsiz) > s_len)
88 		return (1);
89 
90 	/*
91 	 * Returns 0 on success (decompression function returned non-negative)
92 	 * and non-zero on failure (decompression function returned negative.
93 	 */
94 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
95 	    d_start, bufsiz, d_len) < 0);
96 }
97 
98 /*
99  * LZ4 API Description:
100  *
101  * Simple Functions:
102  * real_LZ4_compress() :
103  *	isize  : is the input size. Max supported value is ~1.9GB
104  *	return : the number of bytes written in buffer dest
105  *		or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
106  *	note : destination buffer must be already allocated.
107  *		destination buffer must be sized to handle worst cases
108  *		situations (input data not compressible)
109  *
110  * Advanced Functions
111  *
112  * LZ4_uncompress_unknownOutputSize() :
113  *	isize  : is the input size, therefore the compressed size
114  *	maxOutputSize : is the size of the destination buffer (which must be
115  *		already allocated)
116  *	return : the number of bytes decoded in the destination buffer
117  *		(necessarily <= maxOutputSize). If the source stream is
118  *		malformed, the function will stop decoding and return a
119  *		negative result, indicating the byte position of the faulty
120  *		instruction. This function never writes beyond dest +
121  *		maxOutputSize, and is therefore protected against malicious
122  *		data packets.
123  *	note   : Destination buffer must be already allocated.
124  *
125  * LZ4_compressCtx() :
126  *	This function explicitly handles the CTX memory structure.
127  *
128  *	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
129  *	by the caller (either on the stack or using kmem_zalloc). Passing NULL
130  *	isn't valid.
131  *
132  * LZ4_compress64kCtx() :
133  *	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
134  *	isize *Must* be <64KB, otherwise the output will be corrupted.
135  *
136  *	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
137  *	by the caller (either on the stack or using kmem_zalloc). Passing NULL
138  *	isn't valid.
139  */
140 
141 /*
142  * Tuning parameters
143  */
144 
145 /*
146  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
147  *	 Lowering this value reduces memory usage. Reduced memory usage
148  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
149  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
150  *	(examples : 12 -> 16KB ; 17 -> 512KB)
151  */
152 #define	COMPRESSIONLEVEL 12
153 
154 /*
155  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
156  *	algorithm skip faster data segments considered "incompressible".
157  *	This may decrease compression ratio dramatically, but will be
158  *	faster on incompressible data. Increasing this value will make
159  *	the algorithm search more before declaring a segment "incompressible".
160  *	This could improve compression a bit, but will be slower on
161  *	incompressible data. The default value (6) is recommended.
162  */
163 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
164 
165 /*
166  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
167  * performance for big endian cpu, but the resulting compressed stream
168  * will be incompatible with little-endian CPU. You can set this option
169  * to 1 in situations where data will stay within closed environment.
170  * This option is useless on Little_Endian CPU (such as x86).
171  */
172 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
173 
174 /*
175  * CPU Feature Detection
176  */
177 
178 /* 32 or 64 bits ? */
179 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
180     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
181     defined(__LP64__) || defined(_LP64))
182 #define	LZ4_ARCH64 1
183 #else
184 #define	LZ4_ARCH64 0
185 #endif
186 
187 /*
188  * Limits the amount of stack space that the algorithm may consume to hold
189  * the compression lookup table. The value `9' here means we'll never use
190  * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
191  * If more memory is needed, it is allocated from the heap.
192  */
193 #define	STACKLIMIT 9
194 
195 /*
196  * Little Endian or Big Endian?
197  * Note: overwrite the below #define if you know your architecture endianess.
198  */
199 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
200 	defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
201 	defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
202 	defined(__powerpc) || defined(powerpc) || \
203 	((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
204 #define	LZ4_BIG_ENDIAN 1
205 #else
206 /*
207  * Little Endian assumed. PDP Endian and other very rare endian format
208  * are unsupported.
209  */
210 #endif
211 
212 /*
213  * Unaligned memory access is automatically enabled for "common" CPU,
214  * such as x86. For others CPU, the compiler will be more cautious, and
215  * insert extra code to ensure aligned access is respected. If you know
216  * your target CPU supports unaligned memory access, you may want to
217  * force this option manually to improve performance
218  */
219 #if defined(__ARM_FEATURE_UNALIGNED)
220 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
221 #endif
222 
223 #ifdef __sparc
224 #define	LZ4_FORCE_SW_BITCOUNT
225 #endif
226 
227 /*
228  * Compiler Options
229  */
230 #if __STDC_VERSION__ >= 199901L	/* C99 */
231 /* "restrict" is a known keyword */
232 #else
233 /* Disable restrict */
234 #define	restrict
235 #endif
236 
237 #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
238 
239 #ifdef _MSC_VER
240 /* Visual Studio */
241 /* Visual is not C99, but supports some kind of inline */
242 #define	inline __forceinline
243 #if LZ4_ARCH64
244 /* For Visual 2005 */
245 #pragma intrinsic(_BitScanForward64)
246 #pragma intrinsic(_BitScanReverse64)
247 #else /* !LZ4_ARCH64 */
248 /* For Visual 2005 */
249 #pragma intrinsic(_BitScanForward)
250 #pragma intrinsic(_BitScanReverse)
251 #endif /* !LZ4_ARCH64 */
252 #endif /* _MSC_VER */
253 
254 #ifdef _MSC_VER
255 #define	lz4_bswap16(x) _byteswap_ushort(x)
256 #else /* !_MSC_VER */
257 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
258 	(((x) & 0xffu) << 8)))
259 #endif /* !_MSC_VER */
260 
261 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
262 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
263 #else
264 #define	expect(expr, value)    (expr)
265 #endif
266 
267 #define	likely(expr)	expect((expr) != 0, 1)
268 #define	unlikely(expr)	expect((expr) != 0, 0)
269 
270 /* Basic types */
271 #if defined(_MSC_VER)
272 /* Visual Studio does not support 'stdint' natively */
273 #define	BYTE	unsigned __int8
274 #define	U16	unsigned __int16
275 #define	U32	unsigned __int32
276 #define	S32	__int32
277 #define	U64	unsigned __int64
278 #else /* !defined(_MSC_VER) */
279 #define	BYTE	uint8_t
280 #define	U16	uint16_t
281 #define	U32	uint32_t
282 #define	S32	int32_t
283 #define	U64	uint64_t
284 #endif /* !defined(_MSC_VER) */
285 
286 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
287 #pragma pack(1)
288 #endif
289 
290 typedef struct _U16_S {
291 	U16 v;
292 } U16_S;
293 typedef struct _U32_S {
294 	U32 v;
295 } U32_S;
296 typedef struct _U64_S {
297 	U64 v;
298 } U64_S;
299 
300 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
301 #pragma pack()
302 #endif
303 
304 #define	A64(x) (((U64_S *)(x))->v)
305 #define	A32(x) (((U32_S *)(x))->v)
306 #define	A16(x) (((U16_S *)(x))->v)
307 
308 /*
309  * Constants
310  */
311 #define	MINMATCH 4
312 
313 #define	HASH_LOG COMPRESSIONLEVEL
314 #define	HASHTABLESIZE (1 << HASH_LOG)
315 #define	HASH_MASK (HASHTABLESIZE - 1)
316 
317 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
318 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
319 
320 /*
321  * Defines if memory is allocated into the stack (local variable),
322  * or into the heap (kmem_alloc()).
323  */
324 #define	HEAPMODE (HASH_LOG > STACKLIMIT)
325 #define	COPYLENGTH 8
326 #define	LASTLITERALS 5
327 #define	MFLIMIT (COPYLENGTH + MINMATCH)
328 #define	MINLENGTH (MFLIMIT + 1)
329 
330 #define	MAXD_LOG 16
331 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
332 
333 #define	ML_BITS 4
334 #define	ML_MASK ((1U<<ML_BITS)-1)
335 #define	RUN_BITS (8-ML_BITS)
336 #define	RUN_MASK ((1U<<RUN_BITS)-1)
337 
338 
339 /*
340  * Architecture-specific macros
341  */
342 #if LZ4_ARCH64
343 #define	STEPSIZE 8
344 #define	UARCH U64
345 #define	AARCH A64
346 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
347 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
348 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
349 #define	HTYPE U32
350 #define	INITBASE(base)		const BYTE* const base = ip
351 #else /* !LZ4_ARCH64 */
352 #define	STEPSIZE 4
353 #define	UARCH U32
354 #define	AARCH A32
355 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
356 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
357 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
358 #define	HTYPE const BYTE *
359 #define	INITBASE(base)		const int base = 0
360 #endif /* !LZ4_ARCH64 */
361 
362 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
363 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
364 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
365 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
366 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
367 #else
368 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
369 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
370 #endif
371 
372 
373 /* Local structures */
374 struct refTables {
375 	HTYPE hashTable[HASHTABLESIZE];
376 };
377 
378 
379 /* Macros */
380 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
381 	HASH_LOG))
382 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
383 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
384 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
385 	d = e; }
386 
387 
388 /* Private functions */
389 #if LZ4_ARCH64
390 
391 static int
LZ4_NbCommonBytes(register U64 val)392 LZ4_NbCommonBytes(register U64 val)
393 {
394 #if defined(LZ4_BIG_ENDIAN)
395 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
396 	unsigned long r = 0;
397 	_BitScanReverse64(&r, val);
398 	return (int)(r >> 3);
399 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
400 	!defined(LZ4_FORCE_SW_BITCOUNT)
401 	return (__builtin_clzll(val) >> 3);
402 #else
403 	int r;
404 	if (!(val >> 32)) {
405 		r = 4;
406 	} else {
407 		r = 0;
408 		val >>= 32;
409 	}
410 	if (!(val >> 16)) {
411 		r += 2;
412 		val >>= 8;
413 	} else {
414 		val >>= 24;
415 	}
416 	r += (!val);
417 	return (r);
418 #endif
419 #else
420 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
421 	unsigned long r = 0;
422 	_BitScanForward64(&r, val);
423 	return (int)(r >> 3);
424 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
425 	!defined(LZ4_FORCE_SW_BITCOUNT)
426 	return (__builtin_ctzll(val) >> 3);
427 #else
428 	static const int DeBruijnBytePos[64] =
429 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
430 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
431 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
432 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
433 	};
434 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
435 	    58];
436 #endif
437 #endif
438 }
439 
440 #else
441 
442 static int
LZ4_NbCommonBytes(register U32 val)443 LZ4_NbCommonBytes(register U32 val)
444 {
445 #if defined(LZ4_BIG_ENDIAN)
446 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
447 	unsigned long r = 0;
448 	_BitScanReverse(&r, val);
449 	return (int)(r >> 3);
450 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
451 	!defined(LZ4_FORCE_SW_BITCOUNT)
452 	return (__builtin_clz(val) >> 3);
453 #else
454 	int r;
455 	if (!(val >> 16)) {
456 		r = 2;
457 		val >>= 8;
458 	} else {
459 		r = 0;
460 		val >>= 24;
461 	}
462 	r += (!val);
463 	return (r);
464 #endif
465 #else
466 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
467 	unsigned long r = 0;
468 	_BitScanForward(&r, val);
469 	return (int)(r >> 3);
470 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
471 	!defined(LZ4_FORCE_SW_BITCOUNT)
472 	return (__builtin_ctz(val) >> 3);
473 #else
474 	static const int DeBruijnBytePos[32] = {
475 		0, 0, 3, 0, 3, 1, 3, 0,
476 		3, 2, 2, 1, 3, 2, 0, 1,
477 		3, 3, 1, 2, 2, 2, 2, 0,
478 		3, 1, 2, 0, 1, 0, 1, 1
479 	};
480 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
481 	    27];
482 #endif
483 #endif
484 }
485 
486 #endif
487 
488 /* Public functions */
489 
490 /* Compression functions */
491 
492 /*ARGSUSED*/
493 static int
LZ4_compressCtx(void * ctx,const char * source,char * dest,int isize,int osize)494 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
495     int osize)
496 {
497 #if HEAPMODE
498 	struct refTables *srt = (struct refTables *)ctx;
499 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
500 #else
501 	HTYPE HashTable[HASHTABLESIZE] = { 0 };
502 #endif
503 
504 	const BYTE *ip = (BYTE *) source;
505 	INITBASE(base);
506 	const BYTE *anchor = ip;
507 	const BYTE *const iend = ip + isize;
508 	const BYTE *const oend = (BYTE *) dest + osize;
509 	const BYTE *const mflimit = iend - MFLIMIT;
510 #define	matchlimit (iend - LASTLITERALS)
511 
512 	BYTE *op = (BYTE *) dest;
513 
514 	int len, length;
515 	const int skipStrength = SKIPSTRENGTH;
516 	U32 forwardH;
517 
518 
519 	/* Init */
520 	if (isize < MINLENGTH)
521 		goto _last_literals;
522 
523 	/* First Byte */
524 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
525 	ip++;
526 	forwardH = LZ4_HASH_VALUE(ip);
527 
528 	/* Main Loop */
529 	for (;;) {
530 		int findMatchAttempts = (1U << skipStrength) + 3;
531 		const BYTE *forwardIp = ip;
532 		const BYTE *ref;
533 		BYTE *token;
534 
535 		/* Find a match */
536 		do {
537 			U32 h = forwardH;
538 			int step = findMatchAttempts++ >> skipStrength;
539 			ip = forwardIp;
540 			forwardIp = ip + step;
541 
542 			if unlikely(forwardIp > mflimit) {
543 				goto _last_literals;
544 			}
545 
546 			forwardH = LZ4_HASH_VALUE(forwardIp);
547 			ref = base + HashTable[h];
548 			HashTable[h] = ip - base;
549 
550 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
551 
552 		/* Catch up */
553 		while ((ip > anchor) && (ref > (BYTE *) source) &&
554 		    unlikely(ip[-1] == ref[-1])) {
555 			ip--;
556 			ref--;
557 		}
558 
559 		/* Encode Literal length */
560 		length = ip - anchor;
561 		token = op++;
562 
563 		/* Check output limit */
564 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
565 		    (length >> 8) > oend)
566 			return (0);
567 
568 		if (length >= (int)RUN_MASK) {
569 			*token = (RUN_MASK << ML_BITS);
570 			len = length - RUN_MASK;
571 			for (; len > 254; len -= 255)
572 				*op++ = 255;
573 			*op++ = (BYTE)len;
574 		} else
575 			*token = (length << ML_BITS);
576 
577 		/* Copy Literals */
578 		LZ4_BLINDCOPY(anchor, op, length);
579 
580 		_next_match:
581 		/* Encode Offset */
582 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
583 
584 		/* Start Counting */
585 		ip += MINMATCH;
586 		ref += MINMATCH;	/* MinMatch verified */
587 		anchor = ip;
588 		while likely(ip < matchlimit - (STEPSIZE - 1)) {
589 			UARCH diff = AARCH(ref) ^ AARCH(ip);
590 			if (!diff) {
591 				ip += STEPSIZE;
592 				ref += STEPSIZE;
593 				continue;
594 			}
595 			ip += LZ4_NbCommonBytes(diff);
596 			goto _endCount;
597 		}
598 #if LZ4_ARCH64
599 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
600 			ip += 4;
601 			ref += 4;
602 		}
603 #endif
604 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
605 			ip += 2;
606 			ref += 2;
607 		}
608 		if ((ip < matchlimit) && (*ref == *ip))
609 			ip++;
610 		_endCount:
611 
612 		/* Encode MatchLength */
613 		len = (ip - anchor);
614 		/* Check output limit */
615 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
616 			return (0);
617 		if (len >= (int)ML_MASK) {
618 			*token += ML_MASK;
619 			len -= ML_MASK;
620 			for (; len > 509; len -= 510) {
621 				*op++ = 255;
622 				*op++ = 255;
623 			}
624 			if (len > 254) {
625 				len -= 255;
626 				*op++ = 255;
627 			}
628 			*op++ = (BYTE)len;
629 		} else
630 			*token += len;
631 
632 		/* Test end of chunk */
633 		if (ip > mflimit) {
634 			anchor = ip;
635 			break;
636 		}
637 		/* Fill table */
638 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
639 
640 		/* Test next position */
641 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
642 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
643 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
644 			token = op++;
645 			*token = 0;
646 			goto _next_match;
647 		}
648 		/* Prepare next loop */
649 		anchor = ip++;
650 		forwardH = LZ4_HASH_VALUE(ip);
651 	}
652 
653 	_last_literals:
654 	/* Encode Last Literals */
655 	{
656 		int lastRun = iend - anchor;
657 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
658 		    oend)
659 			return (0);
660 		if (lastRun >= (int)RUN_MASK) {
661 			*op++ = (RUN_MASK << ML_BITS);
662 			lastRun -= RUN_MASK;
663 			for (; lastRun > 254; lastRun -= 255) {
664 				*op++ = 255;
665 			}
666 			*op++ = (BYTE)lastRun;
667 		} else
668 			*op++ = (lastRun << ML_BITS);
669 		(void) memcpy(op, anchor, iend - anchor);
670 		op += iend - anchor;
671 	}
672 
673 	/* End */
674 	return (int)(((char *)op) - dest);
675 }
676 
677 
678 
679 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
680 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
681 #define	HASHLOG64K (HASH_LOG + 1)
682 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
683 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
684 	HASHLOG64K))
685 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
686 
687 /*ARGSUSED*/
688 static int
LZ4_compress64kCtx(void * ctx,const char * source,char * dest,int isize,int osize)689 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
690     int osize)
691 {
692 #if HEAPMODE
693 	struct refTables *srt = (struct refTables *)ctx;
694 	U16 *HashTable = (U16 *) (srt->hashTable);
695 #else
696 	U16 HashTable[HASH64KTABLESIZE] = { 0 };
697 #endif
698 
699 	const BYTE *ip = (BYTE *) source;
700 	const BYTE *anchor = ip;
701 	const BYTE *const base = ip;
702 	const BYTE *const iend = ip + isize;
703 	const BYTE *const oend = (BYTE *) dest + osize;
704 	const BYTE *const mflimit = iend - MFLIMIT;
705 #define	matchlimit (iend - LASTLITERALS)
706 
707 	BYTE *op = (BYTE *) dest;
708 
709 	int len, length;
710 	const int skipStrength = SKIPSTRENGTH;
711 	U32 forwardH;
712 
713 	/* Init */
714 	if (isize < MINLENGTH)
715 		goto _last_literals;
716 
717 	/* First Byte */
718 	ip++;
719 	forwardH = LZ4_HASH64K_VALUE(ip);
720 
721 	/* Main Loop */
722 	for (;;) {
723 		int findMatchAttempts = (1U << skipStrength) + 3;
724 		const BYTE *forwardIp = ip;
725 		const BYTE *ref;
726 		BYTE *token;
727 
728 		/* Find a match */
729 		do {
730 			U32 h = forwardH;
731 			int step = findMatchAttempts++ >> skipStrength;
732 			ip = forwardIp;
733 			forwardIp = ip + step;
734 
735 			if (forwardIp > mflimit) {
736 				goto _last_literals;
737 			}
738 
739 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
740 			ref = base + HashTable[h];
741 			HashTable[h] = ip - base;
742 
743 		} while (A32(ref) != A32(ip));
744 
745 		/* Catch up */
746 		while ((ip > anchor) && (ref > (BYTE *) source) &&
747 		    (ip[-1] == ref[-1])) {
748 			ip--;
749 			ref--;
750 		}
751 
752 		/* Encode Literal length */
753 		length = ip - anchor;
754 		token = op++;
755 
756 		/* Check output limit */
757 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
758 		    (length >> 8) > oend)
759 			return (0);
760 
761 		if (length >= (int)RUN_MASK) {
762 			*token = (RUN_MASK << ML_BITS);
763 			len = length - RUN_MASK;
764 			for (; len > 254; len -= 255)
765 				*op++ = 255;
766 			*op++ = (BYTE)len;
767 		} else
768 			*token = (length << ML_BITS);
769 
770 		/* Copy Literals */
771 		LZ4_BLINDCOPY(anchor, op, length);
772 
773 		_next_match:
774 		/* Encode Offset */
775 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
776 
777 		/* Start Counting */
778 		ip += MINMATCH;
779 		ref += MINMATCH;	/* MinMatch verified */
780 		anchor = ip;
781 		while (ip < matchlimit - (STEPSIZE - 1)) {
782 			UARCH diff = AARCH(ref) ^ AARCH(ip);
783 			if (!diff) {
784 				ip += STEPSIZE;
785 				ref += STEPSIZE;
786 				continue;
787 			}
788 			ip += LZ4_NbCommonBytes(diff);
789 			goto _endCount;
790 		}
791 #if LZ4_ARCH64
792 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
793 			ip += 4;
794 			ref += 4;
795 		}
796 #endif
797 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
798 			ip += 2;
799 			ref += 2;
800 		}
801 		if ((ip < matchlimit) && (*ref == *ip))
802 			ip++;
803 		_endCount:
804 
805 		/* Encode MatchLength */
806 		len = (ip - anchor);
807 		/* Check output limit */
808 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
809 			return (0);
810 		if (len >= (int)ML_MASK) {
811 			*token += ML_MASK;
812 			len -= ML_MASK;
813 			for (; len > 509; len -= 510) {
814 				*op++ = 255;
815 				*op++ = 255;
816 			}
817 			if (len > 254) {
818 				len -= 255;
819 				*op++ = 255;
820 			}
821 			*op++ = (BYTE)len;
822 		} else
823 			*token += len;
824 
825 		/* Test end of chunk */
826 		if (ip > mflimit) {
827 			anchor = ip;
828 			break;
829 		}
830 		/* Fill table */
831 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
832 
833 		/* Test next position */
834 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
835 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
836 		if (A32(ref) == A32(ip)) {
837 			token = op++;
838 			*token = 0;
839 			goto _next_match;
840 		}
841 		/* Prepare next loop */
842 		anchor = ip++;
843 		forwardH = LZ4_HASH64K_VALUE(ip);
844 	}
845 
846 	_last_literals:
847 	/* Encode Last Literals */
848 	{
849 		int lastRun = iend - anchor;
850 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
851 		    oend)
852 			return (0);
853 		if (lastRun >= (int)RUN_MASK) {
854 			*op++ = (RUN_MASK << ML_BITS);
855 			lastRun -= RUN_MASK;
856 			for (; lastRun > 254; lastRun -= 255)
857 				*op++ = 255;
858 			*op++ = (BYTE)lastRun;
859 		} else
860 			*op++ = (lastRun << ML_BITS);
861 		(void) memcpy(op, anchor, iend - anchor);
862 		op += iend - anchor;
863 	}
864 
865 	/* End */
866 	return (int)(((char *)op) - dest);
867 }
868 
869 static int
real_LZ4_compress(const char * source,char * dest,int isize,int osize)870 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
871 {
872 #if HEAPMODE
873 	void *ctx = umem_zalloc(sizeof (struct refTables), UMEM_DEFAULT);
874 	int result;
875 
876 	/*
877 	 * out of kernel memory, gently fall through - this will disable
878 	 * compression in zio_compress_data
879 	 */
880 	if (ctx == NULL)
881 		return (0);
882 
883 	if (isize < LZ4_64KLIMIT)
884 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
885 	else
886 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
887 
888 	umem_free(ctx, sizeof (struct refTables));
889 	return (result);
890 #else
891 	if (isize < (int)LZ4_64KLIMIT)
892 		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
893 	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
894 #endif
895 }
896 
897 /* Decompression functions */
898 
899 /*
900  * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
901  *	against "buffer overflow" attack type.
902  *	LZ4_uncompress_unknownOutputSize() insures that it will never read
903  *	outside of the input buffer. A corrupted input will produce an error
904  *	result, a negative int, indicating the position of the error within
905  *	input stream.
906  */
907 
908 static int
LZ4_uncompress_unknownOutputSize(const char * source,char * dest,int isize,int maxOutputSize)909 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
910     int maxOutputSize)
911 {
912 	/* Local Variables */
913 	const BYTE *restrict ip = (const BYTE *) source;
914 	const BYTE *const iend = ip + isize;
915 	const BYTE *ref;
916 
917 	BYTE *op = (BYTE *) dest;
918 	BYTE *const oend = op + maxOutputSize;
919 	BYTE *cpy;
920 
921 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
922 #if LZ4_ARCH64
923 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
924 #endif
925 
926 	/* Main Loop */
927 	while (ip < iend) {
928 		unsigned token;
929 		size_t length;
930 
931 		/* get runlength */
932 		token = *ip++;
933 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
934 			int s = 255;
935 			while ((ip < iend) && (s == 255)) {
936 				s = *ip++;
937 				length += s;
938 			}
939 		}
940 		/* copy literals */
941 		cpy = op + length;
942 		/* CORNER-CASE: cpy might overflow. */
943 		if (cpy < op)
944 			goto _output_error;	/* cpy was overflowed, bail! */
945 		if ((cpy > oend - COPYLENGTH) ||
946 		    (ip + length > iend - COPYLENGTH)) {
947 			if (cpy > oend)
948 				/* Error: writes beyond output buffer */
949 				goto _output_error;
950 			if (ip + length != iend)
951 				/*
952 				 * Error: LZ4 format requires to consume all
953 				 * input at this stage
954 				 */
955 				goto _output_error;
956 			(void) memcpy(op, ip, length);
957 			op += length;
958 			/* Necessarily EOF, due to parsing restrictions */
959 			break;
960 		}
961 		LZ4_WILDCOPY(ip, op, cpy);
962 		ip -= (op - cpy);
963 		op = cpy;
964 
965 		/* get offset */
966 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
967 		ip += 2;
968 		if (ref < (BYTE * const) dest)
969 			/*
970 			 * Error: offset creates reference outside of
971 			 * destination buffer
972 			 */
973 			goto _output_error;
974 
975 		/* get matchlength */
976 		if ((length = (token & ML_MASK)) == ML_MASK) {
977 			while (ip < iend) {
978 				int s = *ip++;
979 				length += s;
980 				if (s == 255)
981 					continue;
982 				break;
983 			}
984 		}
985 		/* copy repeated sequence */
986 		if unlikely(op - ref < STEPSIZE) {
987 #if LZ4_ARCH64
988 			size_t dec64 = dec64table[op-ref];
989 #else
990 			const int dec64 = 0;
991 #endif
992 			op[0] = ref[0];
993 			op[1] = ref[1];
994 			op[2] = ref[2];
995 			op[3] = ref[3];
996 			op += 4;
997 			ref += 4;
998 			ref -= dec32table[op-ref];
999 			A32(op) = A32(ref);
1000 			op += STEPSIZE - 4;
1001 			ref -= dec64;
1002 		} else {
1003 			LZ4_COPYSTEP(ref, op);
1004 		}
1005 		cpy = op + length - (STEPSIZE - 4);
1006 		if (cpy > oend - COPYLENGTH) {
1007 			if (cpy > oend)
1008 				/*
1009 				 * Error: request to write outside of
1010 				 * destination buffer
1011 				 */
1012 				goto _output_error;
1013 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1014 			while (op < cpy)
1015 				*op++ = *ref++;
1016 			op = cpy;
1017 			if (op == oend)
1018 				/*
1019 				 * Check EOF (should never happen, since
1020 				 * last 5 bytes are supposed to be literals)
1021 				 */
1022 				goto _output_error;
1023 			continue;
1024 		}
1025 		LZ4_SECURECOPY(ref, op, cpy);
1026 		op = cpy;	/* correction */
1027 	}
1028 
1029 	/* end of decoding */
1030 	return (int)(((char *)op) - dest);
1031 
1032 	/* write overflow error detected */
1033 	_output_error:
1034 	return (int)(-(((char *)ip) - source));
1035 }
1036