xref: /freebsd/sys/contrib/openzfs/module/zfs/lz4_zfs.c (revision ce4dcb97ca433b2a2f03fbae957dae0ff16f6f51)
1e92ffd9bSMartin Matuska /*
2e92ffd9bSMartin Matuska  * LZ4 - Fast LZ compression algorithm
3e92ffd9bSMartin Matuska  * Header File
4e92ffd9bSMartin Matuska  * Copyright (C) 2011-2013, Yann Collet.
5e92ffd9bSMartin Matuska  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6e92ffd9bSMartin Matuska  *
7e92ffd9bSMartin Matuska  * Redistribution and use in source and binary forms, with or without
8e92ffd9bSMartin Matuska  * modification, are permitted provided that the following conditions are
9e92ffd9bSMartin Matuska  * met:
10e92ffd9bSMartin Matuska  *
11e92ffd9bSMartin Matuska  *     * Redistributions of source code must retain the above copyright
12e92ffd9bSMartin Matuska  * notice, this list of conditions and the following disclaimer.
13e92ffd9bSMartin Matuska  *     * Redistributions in binary form must reproduce the above
14e92ffd9bSMartin Matuska  * copyright notice, this list of conditions and the following disclaimer
15e92ffd9bSMartin Matuska  * in the documentation and/or other materials provided with the
16e92ffd9bSMartin Matuska  * distribution.
17e92ffd9bSMartin Matuska  *
18e92ffd9bSMartin Matuska  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19e92ffd9bSMartin Matuska  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20e92ffd9bSMartin Matuska  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21e92ffd9bSMartin Matuska  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22e92ffd9bSMartin Matuska  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23e92ffd9bSMartin Matuska  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24e92ffd9bSMartin Matuska  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25e92ffd9bSMartin Matuska  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26e92ffd9bSMartin Matuska  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27e92ffd9bSMartin Matuska  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28e92ffd9bSMartin Matuska  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29e92ffd9bSMartin Matuska  *
30e92ffd9bSMartin Matuska  * You can contact the author at :
31e92ffd9bSMartin Matuska  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32e92ffd9bSMartin Matuska  * - LZ4 source repository : http://code.google.com/p/lz4/
33e92ffd9bSMartin Matuska  */
34e92ffd9bSMartin Matuska 
35e92ffd9bSMartin Matuska /*
36e92ffd9bSMartin Matuska  * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012
37e92ffd9bSMartin Matuska  */
38e92ffd9bSMartin Matuska 
39e92ffd9bSMartin Matuska #include <sys/zfs_context.h>
40e92ffd9bSMartin Matuska #include <sys/zio_compress.h>
41e92ffd9bSMartin Matuska 
42e92ffd9bSMartin Matuska static int real_LZ4_compress(const char *source, char *dest, int isize,
43e92ffd9bSMartin Matuska     int osize);
44e92ffd9bSMartin Matuska static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
45e92ffd9bSMartin Matuska     int isize, int osize);
46e92ffd9bSMartin Matuska static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
47e92ffd9bSMartin Matuska     int isize, int osize);
48e92ffd9bSMartin Matuska 
49e92ffd9bSMartin Matuska /* See lz4.c */
50e92ffd9bSMartin Matuska int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
51e92ffd9bSMartin Matuska     int isize, int maxOutputSize);
52e92ffd9bSMartin Matuska 
53e92ffd9bSMartin Matuska static void *lz4_alloc(int flags);
54e92ffd9bSMartin Matuska static void lz4_free(void *ctx);
55e92ffd9bSMartin Matuska 
56e92ffd9bSMartin Matuska size_t
57e92ffd9bSMartin Matuska lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
58e92ffd9bSMartin Matuska     size_t d_len, int n)
59e92ffd9bSMartin Matuska {
60e92ffd9bSMartin Matuska 	(void) n;
61e92ffd9bSMartin Matuska 	uint32_t bufsiz;
62e92ffd9bSMartin Matuska 	char *dest = d_start;
63e92ffd9bSMartin Matuska 
64e92ffd9bSMartin Matuska 	ASSERT(d_len >= sizeof (bufsiz));
65e92ffd9bSMartin Matuska 
66e92ffd9bSMartin Matuska 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
67e92ffd9bSMartin Matuska 	    d_len - sizeof (bufsiz));
68e92ffd9bSMartin Matuska 
69e92ffd9bSMartin Matuska 	/* Signal an error if the compression routine returned zero. */
70e92ffd9bSMartin Matuska 	if (bufsiz == 0)
71e92ffd9bSMartin Matuska 		return (s_len);
72e92ffd9bSMartin Matuska 
73e92ffd9bSMartin Matuska 	/*
74e92ffd9bSMartin Matuska 	 * The exact compressed size is needed by the decompression routine,
75e92ffd9bSMartin Matuska 	 * so it is stored at the start of the buffer. Note that this may be
76e92ffd9bSMartin Matuska 	 * less than the compressed block size, which is rounded up to a
77e92ffd9bSMartin Matuska 	 * multiple of 1<<ashift.
78e92ffd9bSMartin Matuska 	 */
79e92ffd9bSMartin Matuska 	*(uint32_t *)dest = BE_32(bufsiz);
80e92ffd9bSMartin Matuska 
81e92ffd9bSMartin Matuska 	return (bufsiz + sizeof (bufsiz));
82e92ffd9bSMartin Matuska }
83e92ffd9bSMartin Matuska 
84e92ffd9bSMartin Matuska int
85e92ffd9bSMartin Matuska lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
86e92ffd9bSMartin Matuska     size_t d_len, int n)
87e92ffd9bSMartin Matuska {
88e92ffd9bSMartin Matuska 	(void) n;
89e92ffd9bSMartin Matuska 	const char *src = s_start;
90e92ffd9bSMartin Matuska 	uint32_t bufsiz = BE_IN32(src);
91e92ffd9bSMartin Matuska 
92e92ffd9bSMartin Matuska 	/* invalid compressed buffer size encoded at start */
93e92ffd9bSMartin Matuska 	if (bufsiz + sizeof (bufsiz) > s_len)
94e92ffd9bSMartin Matuska 		return (1);
95e92ffd9bSMartin Matuska 
96e92ffd9bSMartin Matuska 	/*
97e92ffd9bSMartin Matuska 	 * Returns 0 on success (decompression function returned non-negative)
98e92ffd9bSMartin Matuska 	 * and non-zero on failure (decompression function returned negative).
99e92ffd9bSMartin Matuska 	 */
100e92ffd9bSMartin Matuska 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
101e92ffd9bSMartin Matuska 	    d_start, bufsiz, d_len) < 0);
102e92ffd9bSMartin Matuska }
103e92ffd9bSMartin Matuska 
104e92ffd9bSMartin Matuska /*
105e92ffd9bSMartin Matuska  * LZ4 API Description:
106e92ffd9bSMartin Matuska  *
107e92ffd9bSMartin Matuska  * Simple Functions:
108e92ffd9bSMartin Matuska  * real_LZ4_compress() :
109e92ffd9bSMartin Matuska  * 	isize  : is the input size. Max supported value is ~1.9GB
110e92ffd9bSMartin Matuska  * 	return : the number of bytes written in buffer dest
111e92ffd9bSMartin Matuska  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
112e92ffd9bSMartin Matuska  * 	note : destination buffer must be already allocated.
113e92ffd9bSMartin Matuska  * 		destination buffer must be sized to handle worst cases
114e92ffd9bSMartin Matuska  * 		situations (input data not compressible) worst case size
115e92ffd9bSMartin Matuska  * 		evaluation is provided by function LZ4_compressBound().
116e92ffd9bSMartin Matuska  *
117e92ffd9bSMartin Matuska  * real_LZ4_uncompress() :
118e92ffd9bSMartin Matuska  * 	osize  : is the output size, therefore the original size
119e92ffd9bSMartin Matuska  * 	return : the number of bytes read in the source buffer.
120e92ffd9bSMartin Matuska  * 		If the source stream is malformed, the function will stop
121e92ffd9bSMartin Matuska  * 		decoding and return a negative result, indicating the byte
122e92ffd9bSMartin Matuska  * 		position of the faulty instruction. This function never
123e92ffd9bSMartin Matuska  * 		writes beyond dest + osize, and is therefore protected
124e92ffd9bSMartin Matuska  * 		against malicious data packets.
125e92ffd9bSMartin Matuska  * 	note : destination buffer must be already allocated
126e92ffd9bSMartin Matuska  *	note : real_LZ4_uncompress() is not used in ZFS so its code
127e92ffd9bSMartin Matuska  *	       is not present here.
128e92ffd9bSMartin Matuska  *
129e92ffd9bSMartin Matuska  * Advanced Functions
130e92ffd9bSMartin Matuska  *
131e92ffd9bSMartin Matuska  * LZ4_compressBound() :
132e92ffd9bSMartin Matuska  * 	Provides the maximum size that LZ4 may output in a "worst case"
133e92ffd9bSMartin Matuska  * 	scenario (input data not compressible) primarily useful for memory
134e92ffd9bSMartin Matuska  * 	allocation of output buffer.
135e92ffd9bSMartin Matuska  *
136e92ffd9bSMartin Matuska  * 	isize  : is the input size. Max supported value is ~1.9GB
137e92ffd9bSMartin Matuska  * 	return : maximum output size in a "worst case" scenario
138e92ffd9bSMartin Matuska  * 	note : this function is limited by "int" range (2^31-1)
139e92ffd9bSMartin Matuska  *
140e92ffd9bSMartin Matuska  * LZ4_uncompress_unknownOutputSize() :
141e92ffd9bSMartin Matuska  * 	isize  : is the input size, therefore the compressed size
142e92ffd9bSMartin Matuska  * 	maxOutputSize : is the size of the destination buffer (which must be
143e92ffd9bSMartin Matuska  * 		already allocated)
144e92ffd9bSMartin Matuska  * 	return : the number of bytes decoded in the destination buffer
145e92ffd9bSMartin Matuska  * 		(necessarily <= maxOutputSize). If the source stream is
146e92ffd9bSMartin Matuska  * 		malformed, the function will stop decoding and return a
147e92ffd9bSMartin Matuska  * 		negative result, indicating the byte position of the faulty
148e92ffd9bSMartin Matuska  * 		instruction. This function never writes beyond dest +
149e92ffd9bSMartin Matuska  * 		maxOutputSize, and is therefore protected against malicious
150e92ffd9bSMartin Matuska  * 		data packets.
151e92ffd9bSMartin Matuska  * 	note   : Destination buffer must be already allocated.
152e92ffd9bSMartin Matuska  *		This version is slightly slower than real_LZ4_uncompress()
153e92ffd9bSMartin Matuska  *
154e92ffd9bSMartin Matuska  * LZ4_compressCtx() :
155e92ffd9bSMartin Matuska  * 	This function explicitly handles the CTX memory structure.
156e92ffd9bSMartin Matuska  *
157e92ffd9bSMartin Matuska  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
158e92ffd9bSMartin Matuska  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
159e92ffd9bSMartin Matuska  * 	NULL isn't valid.
160e92ffd9bSMartin Matuska  *
161e92ffd9bSMartin Matuska  * LZ4_compress64kCtx() :
162e92ffd9bSMartin Matuska  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
163e92ffd9bSMartin Matuska  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
164e92ffd9bSMartin Matuska  *
165e92ffd9bSMartin Matuska  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
166e92ffd9bSMartin Matuska  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
167e92ffd9bSMartin Matuska  * 	NULL isn't valid.
168e92ffd9bSMartin Matuska  */
169e92ffd9bSMartin Matuska 
170e92ffd9bSMartin Matuska /*
171e92ffd9bSMartin Matuska  * Tuning parameters
172e92ffd9bSMartin Matuska  */
173e92ffd9bSMartin Matuska 
174e92ffd9bSMartin Matuska /*
175e92ffd9bSMartin Matuska  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
176e92ffd9bSMartin Matuska  *	 Lowering this value reduces memory usage. Reduced memory usage
177e92ffd9bSMartin Matuska  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
178e92ffd9bSMartin Matuska  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
179e92ffd9bSMartin Matuska  *	(examples : 12 -> 16KB ; 17 -> 512KB)
180e92ffd9bSMartin Matuska  */
181e92ffd9bSMartin Matuska #define	COMPRESSIONLEVEL 12
182e92ffd9bSMartin Matuska 
183e92ffd9bSMartin Matuska /*
184e92ffd9bSMartin Matuska  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
185e92ffd9bSMartin Matuska  *	algorithm skip faster data segments considered "incompressible".
186e92ffd9bSMartin Matuska  *	This may decrease compression ratio dramatically, but will be
187e92ffd9bSMartin Matuska  *	faster on incompressible data. Increasing this value will make
188e92ffd9bSMartin Matuska  *	the algorithm search more before declaring a segment "incompressible".
189e92ffd9bSMartin Matuska  *	This could improve compression a bit, but will be slower on
190e92ffd9bSMartin Matuska  *	incompressible data. The default value (6) is recommended.
191e92ffd9bSMartin Matuska  */
192e92ffd9bSMartin Matuska #define	NOTCOMPRESSIBLE_CONFIRMATION 6
193e92ffd9bSMartin Matuska 
194e92ffd9bSMartin Matuska /*
195e92ffd9bSMartin Matuska  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
196e92ffd9bSMartin Matuska  * performance for big endian cpu, but the resulting compressed stream
197e92ffd9bSMartin Matuska  * will be incompatible with little-endian CPU. You can set this option
198e92ffd9bSMartin Matuska  * to 1 in situations where data will stay within closed environment.
199e92ffd9bSMartin Matuska  * This option is useless on Little_Endian CPU (such as x86).
200e92ffd9bSMartin Matuska  */
201e92ffd9bSMartin Matuska /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
202e92ffd9bSMartin Matuska 
203e92ffd9bSMartin Matuska /*
204e92ffd9bSMartin Matuska  * CPU Feature Detection
205e92ffd9bSMartin Matuska  */
206e92ffd9bSMartin Matuska 
207e92ffd9bSMartin Matuska /* 32 or 64 bits ? */
208e92ffd9bSMartin Matuska #if defined(_LP64)
209e92ffd9bSMartin Matuska #define	LZ4_ARCH64 1
210e92ffd9bSMartin Matuska #else
211e92ffd9bSMartin Matuska #define	LZ4_ARCH64 0
212e92ffd9bSMartin Matuska #endif
213e92ffd9bSMartin Matuska 
214e92ffd9bSMartin Matuska /*
215e92ffd9bSMartin Matuska  * Little Endian or Big Endian?
216e92ffd9bSMartin Matuska  * Note: overwrite the below #define if you know your architecture endianness.
217e92ffd9bSMartin Matuska  */
218e92ffd9bSMartin Matuska #if defined(_ZFS_BIG_ENDIAN)
219e92ffd9bSMartin Matuska #define	LZ4_BIG_ENDIAN 1
220e92ffd9bSMartin Matuska #else
221e92ffd9bSMartin Matuska /*
222e92ffd9bSMartin Matuska  * Little Endian assumed. PDP Endian and other very rare endian format
223e92ffd9bSMartin Matuska  * are unsupported.
224e92ffd9bSMartin Matuska  */
225e92ffd9bSMartin Matuska #undef LZ4_BIG_ENDIAN
226e92ffd9bSMartin Matuska #endif
227e92ffd9bSMartin Matuska 
228e92ffd9bSMartin Matuska /*
229e92ffd9bSMartin Matuska  * Unaligned memory access is automatically enabled for "common" CPU,
230e92ffd9bSMartin Matuska  * such as x86. For others CPU, the compiler will be more cautious, and
231e92ffd9bSMartin Matuska  * insert extra code to ensure aligned access is respected. If you know
232e92ffd9bSMartin Matuska  * your target CPU supports unaligned memory access, you may want to
233e92ffd9bSMartin Matuska  * force this option manually to improve performance
234e92ffd9bSMartin Matuska  */
235e92ffd9bSMartin Matuska #if defined(__ARM_FEATURE_UNALIGNED)
236e92ffd9bSMartin Matuska #define	LZ4_FORCE_UNALIGNED_ACCESS 1
237e92ffd9bSMartin Matuska #endif
238e92ffd9bSMartin Matuska 
239e92ffd9bSMartin Matuska /*
240e92ffd9bSMartin Matuska  * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
241e92ffd9bSMartin Matuska  * kernel
242e92ffd9bSMartin Matuska  * Linux : we can use GCC's __builtin_ctz family of builtins in the
243e92ffd9bSMartin Matuska  * kernel
244e92ffd9bSMartin Matuska  */
245e92ffd9bSMartin Matuska #undef	LZ4_FORCE_SW_BITCOUNT
246e92ffd9bSMartin Matuska #if defined(__sparc)
247e92ffd9bSMartin Matuska #define	LZ4_FORCE_SW_BITCOUNT
248e92ffd9bSMartin Matuska #endif
249e92ffd9bSMartin Matuska 
250e92ffd9bSMartin Matuska /*
251e92ffd9bSMartin Matuska  * Compiler Options
252e92ffd9bSMartin Matuska  */
253e92ffd9bSMartin Matuska /* Disable restrict */
254e92ffd9bSMartin Matuska #define	restrict
255e92ffd9bSMartin Matuska 
256e92ffd9bSMartin Matuska /*
257e92ffd9bSMartin Matuska  * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
258e92ffd9bSMartin Matuska  * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
259e92ffd9bSMartin Matuska  */
260e92ffd9bSMartin Matuska #ifdef GCC_VERSION
261e92ffd9bSMartin Matuska #undef GCC_VERSION
262e92ffd9bSMartin Matuska #endif
263e92ffd9bSMartin Matuska 
264e92ffd9bSMartin Matuska #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
265e92ffd9bSMartin Matuska 
266e92ffd9bSMartin Matuska #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
267e92ffd9bSMartin Matuska #define	expect(expr, value)    (__builtin_expect((expr), (value)))
268e92ffd9bSMartin Matuska #else
269e92ffd9bSMartin Matuska #define	expect(expr, value)    (expr)
270e92ffd9bSMartin Matuska #endif
271e92ffd9bSMartin Matuska 
272e92ffd9bSMartin Matuska #ifndef likely
273e92ffd9bSMartin Matuska #define	likely(expr)	expect((expr) != 0, 1)
274e92ffd9bSMartin Matuska #endif
275e92ffd9bSMartin Matuska 
276e92ffd9bSMartin Matuska #ifndef unlikely
277e92ffd9bSMartin Matuska #define	unlikely(expr)	expect((expr) != 0, 0)
278e92ffd9bSMartin Matuska #endif
279e92ffd9bSMartin Matuska 
280e92ffd9bSMartin Matuska #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
281e92ffd9bSMartin Matuska 	(((x) & 0xffu) << 8)))
282e92ffd9bSMartin Matuska 
283e92ffd9bSMartin Matuska /* Basic types */
284e92ffd9bSMartin Matuska #define	BYTE	uint8_t
285e92ffd9bSMartin Matuska #define	U16	uint16_t
286e92ffd9bSMartin Matuska #define	U32	uint32_t
287e92ffd9bSMartin Matuska #define	S32	int32_t
288e92ffd9bSMartin Matuska #define	U64	uint64_t
289e92ffd9bSMartin Matuska 
290e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_UNALIGNED_ACCESS
291e92ffd9bSMartin Matuska #pragma pack(1)
292e92ffd9bSMartin Matuska #endif
293e92ffd9bSMartin Matuska 
294e92ffd9bSMartin Matuska typedef struct _U16_S {
295e92ffd9bSMartin Matuska 	U16 v;
296e92ffd9bSMartin Matuska } U16_S;
297e92ffd9bSMartin Matuska typedef struct _U32_S {
298e92ffd9bSMartin Matuska 	U32 v;
299e92ffd9bSMartin Matuska } U32_S;
300e92ffd9bSMartin Matuska typedef struct _U64_S {
301e92ffd9bSMartin Matuska 	U64 v;
302e92ffd9bSMartin Matuska } U64_S;
303e92ffd9bSMartin Matuska 
304e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_UNALIGNED_ACCESS
305e92ffd9bSMartin Matuska #pragma pack()
306e92ffd9bSMartin Matuska #endif
307e92ffd9bSMartin Matuska 
308e92ffd9bSMartin Matuska #define	A64(x) (((U64_S *)(x))->v)
309e92ffd9bSMartin Matuska #define	A32(x) (((U32_S *)(x))->v)
310e92ffd9bSMartin Matuska #define	A16(x) (((U16_S *)(x))->v)
311e92ffd9bSMartin Matuska 
312e92ffd9bSMartin Matuska /*
313e92ffd9bSMartin Matuska  * Constants
314e92ffd9bSMartin Matuska  */
315e92ffd9bSMartin Matuska #define	MINMATCH 4
316e92ffd9bSMartin Matuska 
317e92ffd9bSMartin Matuska #define	HASH_LOG COMPRESSIONLEVEL
318e92ffd9bSMartin Matuska #define	HASHTABLESIZE (1 << HASH_LOG)
319e92ffd9bSMartin Matuska #define	HASH_MASK (HASHTABLESIZE - 1)
320e92ffd9bSMartin Matuska 
321e92ffd9bSMartin Matuska #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
322e92ffd9bSMartin Matuska 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
323e92ffd9bSMartin Matuska 
324e92ffd9bSMartin Matuska #define	COPYLENGTH 8
325e92ffd9bSMartin Matuska #define	LASTLITERALS 5
326e92ffd9bSMartin Matuska #define	MFLIMIT (COPYLENGTH + MINMATCH)
327e92ffd9bSMartin Matuska #define	MINLENGTH (MFLIMIT + 1)
328e92ffd9bSMartin Matuska 
329e92ffd9bSMartin Matuska #define	MAXD_LOG 16
330e92ffd9bSMartin Matuska #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
331e92ffd9bSMartin Matuska 
332e92ffd9bSMartin Matuska #define	ML_BITS 4
333e92ffd9bSMartin Matuska #define	ML_MASK ((1U<<ML_BITS)-1)
334e92ffd9bSMartin Matuska #define	RUN_BITS (8-ML_BITS)
335e92ffd9bSMartin Matuska #define	RUN_MASK ((1U<<RUN_BITS)-1)
336e92ffd9bSMartin Matuska 
337e92ffd9bSMartin Matuska 
338e92ffd9bSMartin Matuska /*
339e92ffd9bSMartin Matuska  * Architecture-specific macros
340e92ffd9bSMartin Matuska  */
341e92ffd9bSMartin Matuska #if LZ4_ARCH64
342e92ffd9bSMartin Matuska #define	STEPSIZE 8
343e92ffd9bSMartin Matuska #define	UARCH U64
344e92ffd9bSMartin Matuska #define	AARCH A64
345e92ffd9bSMartin Matuska #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
346e92ffd9bSMartin Matuska #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
347e92ffd9bSMartin Matuska #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
348e92ffd9bSMartin Matuska #define	HTYPE U32
349e92ffd9bSMartin Matuska #define	INITBASE(base)		const BYTE* const base = ip
350e92ffd9bSMartin Matuska #else /* !LZ4_ARCH64 */
351e92ffd9bSMartin Matuska #define	STEPSIZE 4
352e92ffd9bSMartin Matuska #define	UARCH U32
353e92ffd9bSMartin Matuska #define	AARCH A32
354e92ffd9bSMartin Matuska #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
355e92ffd9bSMartin Matuska #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
356e92ffd9bSMartin Matuska #define	LZ4_SECURECOPY		LZ4_WILDCOPY
357e92ffd9bSMartin Matuska #define	HTYPE const BYTE *
358e92ffd9bSMartin Matuska #define	INITBASE(base)		const int base = 0
359e92ffd9bSMartin Matuska #endif /* !LZ4_ARCH64 */
360e92ffd9bSMartin Matuska 
361e92ffd9bSMartin Matuska #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
362e92ffd9bSMartin Matuska #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
363e92ffd9bSMartin Matuska 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
364e92ffd9bSMartin Matuska #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
365e92ffd9bSMartin Matuska 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
366e92ffd9bSMartin Matuska #else
367e92ffd9bSMartin Matuska #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
368e92ffd9bSMartin Matuska #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
369e92ffd9bSMartin Matuska #endif
370e92ffd9bSMartin Matuska 
371e92ffd9bSMartin Matuska 
372e92ffd9bSMartin Matuska /* Local structures */
373e92ffd9bSMartin Matuska struct refTables {
374e92ffd9bSMartin Matuska 	HTYPE hashTable[HASHTABLESIZE];
375e92ffd9bSMartin Matuska };
376e92ffd9bSMartin Matuska 
377e92ffd9bSMartin Matuska 
378e92ffd9bSMartin Matuska /* Macros */
379e92ffd9bSMartin Matuska #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
380e92ffd9bSMartin Matuska 	HASH_LOG))
381e92ffd9bSMartin Matuska #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
382e92ffd9bSMartin Matuska #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
383e92ffd9bSMartin Matuska #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
384e92ffd9bSMartin Matuska 	d = e; }
385e92ffd9bSMartin Matuska 
386e92ffd9bSMartin Matuska 
387e92ffd9bSMartin Matuska /* Private functions */
388e92ffd9bSMartin Matuska #if LZ4_ARCH64
389e92ffd9bSMartin Matuska 
390e92ffd9bSMartin Matuska static inline int
391e92ffd9bSMartin Matuska LZ4_NbCommonBytes(register U64 val)
392e92ffd9bSMartin Matuska {
393e92ffd9bSMartin Matuska #if defined(LZ4_BIG_ENDIAN)
394e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
395e92ffd9bSMartin Matuska 	!defined(LZ4_FORCE_SW_BITCOUNT)
396e92ffd9bSMartin Matuska 	return (__builtin_clzll(val) >> 3);
397e92ffd9bSMartin Matuska #else
398e92ffd9bSMartin Matuska 	int r;
399e92ffd9bSMartin Matuska 	if (!(val >> 32)) {
400e92ffd9bSMartin Matuska 		r = 4;
401e92ffd9bSMartin Matuska 	} else {
402e92ffd9bSMartin Matuska 		r = 0;
403e92ffd9bSMartin Matuska 		val >>= 32;
404e92ffd9bSMartin Matuska 	}
405e92ffd9bSMartin Matuska 	if (!(val >> 16)) {
406e92ffd9bSMartin Matuska 		r += 2;
407e92ffd9bSMartin Matuska 		val >>= 8;
408e92ffd9bSMartin Matuska 	} else {
409e92ffd9bSMartin Matuska 		val >>= 24;
410e92ffd9bSMartin Matuska 	}
411e92ffd9bSMartin Matuska 	r += (!val);
412e92ffd9bSMartin Matuska 	return (r);
413e92ffd9bSMartin Matuska #endif
414e92ffd9bSMartin Matuska #else
415e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
416e92ffd9bSMartin Matuska 	!defined(LZ4_FORCE_SW_BITCOUNT)
417e92ffd9bSMartin Matuska 	return (__builtin_ctzll(val) >> 3);
418e92ffd9bSMartin Matuska #else
419e92ffd9bSMartin Matuska 	static const int DeBruijnBytePos[64] =
420e92ffd9bSMartin Matuska 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
421e92ffd9bSMartin Matuska 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
422e92ffd9bSMartin Matuska 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
423e92ffd9bSMartin Matuska 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
424e92ffd9bSMartin Matuska 	};
425e92ffd9bSMartin Matuska 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
426e92ffd9bSMartin Matuska 	    58];
427e92ffd9bSMartin Matuska #endif
428e92ffd9bSMartin Matuska #endif
429e92ffd9bSMartin Matuska }
430e92ffd9bSMartin Matuska 
431e92ffd9bSMartin Matuska #else
432e92ffd9bSMartin Matuska 
433e92ffd9bSMartin Matuska static inline int
434e92ffd9bSMartin Matuska LZ4_NbCommonBytes(register U32 val)
435e92ffd9bSMartin Matuska {
436e92ffd9bSMartin Matuska #if defined(LZ4_BIG_ENDIAN)
437e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
438e92ffd9bSMartin Matuska 	!defined(LZ4_FORCE_SW_BITCOUNT)
439e92ffd9bSMartin Matuska 	return (__builtin_clz(val) >> 3);
440e92ffd9bSMartin Matuska #else
441e92ffd9bSMartin Matuska 	int r;
442e92ffd9bSMartin Matuska 	if (!(val >> 16)) {
443e92ffd9bSMartin Matuska 		r = 2;
444e92ffd9bSMartin Matuska 		val >>= 8;
445e92ffd9bSMartin Matuska 	} else {
446e92ffd9bSMartin Matuska 		r = 0;
447e92ffd9bSMartin Matuska 		val >>= 24;
448e92ffd9bSMartin Matuska 	}
449e92ffd9bSMartin Matuska 	r += (!val);
450e92ffd9bSMartin Matuska 	return (r);
451e92ffd9bSMartin Matuska #endif
452e92ffd9bSMartin Matuska #else
453e92ffd9bSMartin Matuska #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
454e92ffd9bSMartin Matuska 	!defined(LZ4_FORCE_SW_BITCOUNT)
455e92ffd9bSMartin Matuska 	return (__builtin_ctz(val) >> 3);
456e92ffd9bSMartin Matuska #else
457e92ffd9bSMartin Matuska 	static const int DeBruijnBytePos[32] = {
458e92ffd9bSMartin Matuska 		0, 0, 3, 0, 3, 1, 3, 0,
459e92ffd9bSMartin Matuska 		3, 2, 2, 1, 3, 2, 0, 1,
460e92ffd9bSMartin Matuska 		3, 3, 1, 2, 2, 2, 2, 0,
461e92ffd9bSMartin Matuska 		3, 1, 2, 0, 1, 0, 1, 1
462e92ffd9bSMartin Matuska 	};
463e92ffd9bSMartin Matuska 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
464e92ffd9bSMartin Matuska 	    27];
465e92ffd9bSMartin Matuska #endif
466e92ffd9bSMartin Matuska #endif
467e92ffd9bSMartin Matuska }
468e92ffd9bSMartin Matuska 
469e92ffd9bSMartin Matuska #endif
470e92ffd9bSMartin Matuska 
471e92ffd9bSMartin Matuska /* Compression functions */
472e92ffd9bSMartin Matuska 
473e92ffd9bSMartin Matuska static int
474e92ffd9bSMartin Matuska LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
475e92ffd9bSMartin Matuska     int osize)
476e92ffd9bSMartin Matuska {
477e92ffd9bSMartin Matuska 	struct refTables *srt = (struct refTables *)ctx;
478e92ffd9bSMartin Matuska 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
479e92ffd9bSMartin Matuska 
480e92ffd9bSMartin Matuska 	const BYTE *ip = (BYTE *) source;
481e92ffd9bSMartin Matuska 	INITBASE(base);
482e92ffd9bSMartin Matuska 	const BYTE *anchor = ip;
483e92ffd9bSMartin Matuska 	const BYTE *const iend = ip + isize;
484e92ffd9bSMartin Matuska 	const BYTE *const oend = (BYTE *) dest + osize;
485e92ffd9bSMartin Matuska 	const BYTE *const mflimit = iend - MFLIMIT;
486e92ffd9bSMartin Matuska #define	matchlimit (iend - LASTLITERALS)
487e92ffd9bSMartin Matuska 
488e92ffd9bSMartin Matuska 	BYTE *op = (BYTE *) dest;
489e92ffd9bSMartin Matuska 
490e92ffd9bSMartin Matuska 	int len, length;
491e92ffd9bSMartin Matuska 	const int skipStrength = SKIPSTRENGTH;
492e92ffd9bSMartin Matuska 	U32 forwardH;
493e92ffd9bSMartin Matuska 
494e92ffd9bSMartin Matuska 
495e92ffd9bSMartin Matuska 	/* Init */
496e92ffd9bSMartin Matuska 	if (isize < MINLENGTH)
497e92ffd9bSMartin Matuska 		goto _last_literals;
498e92ffd9bSMartin Matuska 
499e92ffd9bSMartin Matuska 	/* First Byte */
500e92ffd9bSMartin Matuska 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
501e92ffd9bSMartin Matuska 	ip++;
502e92ffd9bSMartin Matuska 	forwardH = LZ4_HASH_VALUE(ip);
503e92ffd9bSMartin Matuska 
504e92ffd9bSMartin Matuska 	/* Main Loop */
505e92ffd9bSMartin Matuska 	for (;;) {
506e92ffd9bSMartin Matuska 		int findMatchAttempts = (1U << skipStrength) + 3;
507e92ffd9bSMartin Matuska 		const BYTE *forwardIp = ip;
508e92ffd9bSMartin Matuska 		const BYTE *ref;
509e92ffd9bSMartin Matuska 		BYTE *token;
510e92ffd9bSMartin Matuska 
511e92ffd9bSMartin Matuska 		/* Find a match */
512e92ffd9bSMartin Matuska 		do {
513e92ffd9bSMartin Matuska 			U32 h = forwardH;
514e92ffd9bSMartin Matuska 			int step = findMatchAttempts++ >> skipStrength;
515e92ffd9bSMartin Matuska 			ip = forwardIp;
516e92ffd9bSMartin Matuska 			forwardIp = ip + step;
517e92ffd9bSMartin Matuska 
518e92ffd9bSMartin Matuska 			if (unlikely(forwardIp > mflimit)) {
519e92ffd9bSMartin Matuska 				goto _last_literals;
520e92ffd9bSMartin Matuska 			}
521e92ffd9bSMartin Matuska 
522e92ffd9bSMartin Matuska 			forwardH = LZ4_HASH_VALUE(forwardIp);
523e92ffd9bSMartin Matuska 			ref = base + HashTable[h];
524e92ffd9bSMartin Matuska 			HashTable[h] = ip - base;
525e92ffd9bSMartin Matuska 
526e92ffd9bSMartin Matuska 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
527e92ffd9bSMartin Matuska 
528e92ffd9bSMartin Matuska 		/* Catch up */
529e92ffd9bSMartin Matuska 		while ((ip > anchor) && (ref > (BYTE *) source) &&
530e92ffd9bSMartin Matuska 		    unlikely(ip[-1] == ref[-1])) {
531e92ffd9bSMartin Matuska 			ip--;
532e92ffd9bSMartin Matuska 			ref--;
533e92ffd9bSMartin Matuska 		}
534e92ffd9bSMartin Matuska 
535e92ffd9bSMartin Matuska 		/* Encode Literal length */
536e92ffd9bSMartin Matuska 		length = ip - anchor;
537e92ffd9bSMartin Matuska 		token = op++;
538e92ffd9bSMartin Matuska 
539e92ffd9bSMartin Matuska 		/* Check output limit */
540e92ffd9bSMartin Matuska 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
541e92ffd9bSMartin Matuska 		    (length >> 8) > oend))
542e92ffd9bSMartin Matuska 			return (0);
543e92ffd9bSMartin Matuska 
544e92ffd9bSMartin Matuska 		if (length >= (int)RUN_MASK) {
545e92ffd9bSMartin Matuska 			*token = (RUN_MASK << ML_BITS);
546e92ffd9bSMartin Matuska 			len = length - RUN_MASK;
547e92ffd9bSMartin Matuska 			for (; len > 254; len -= 255)
548e92ffd9bSMartin Matuska 				*op++ = 255;
549e92ffd9bSMartin Matuska 			*op++ = (BYTE)len;
550e92ffd9bSMartin Matuska 		} else
551e92ffd9bSMartin Matuska 			*token = (length << ML_BITS);
552e92ffd9bSMartin Matuska 
553e92ffd9bSMartin Matuska 		/* Copy Literals */
554e92ffd9bSMartin Matuska 		LZ4_BLINDCOPY(anchor, op, length);
555e92ffd9bSMartin Matuska 
556e92ffd9bSMartin Matuska 		_next_match:
557e92ffd9bSMartin Matuska 		/* Encode Offset */
558e92ffd9bSMartin Matuska 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
559e92ffd9bSMartin Matuska 
560e92ffd9bSMartin Matuska 		/* Start Counting */
561e92ffd9bSMartin Matuska 		ip += MINMATCH;
562e92ffd9bSMartin Matuska 		ref += MINMATCH;	/* MinMatch verified */
563e92ffd9bSMartin Matuska 		anchor = ip;
564e92ffd9bSMartin Matuska 		while (likely(ip < matchlimit - (STEPSIZE - 1))) {
565e92ffd9bSMartin Matuska 			UARCH diff = AARCH(ref) ^ AARCH(ip);
566e92ffd9bSMartin Matuska 			if (!diff) {
567e92ffd9bSMartin Matuska 				ip += STEPSIZE;
568e92ffd9bSMartin Matuska 				ref += STEPSIZE;
569e92ffd9bSMartin Matuska 				continue;
570e92ffd9bSMartin Matuska 			}
571e92ffd9bSMartin Matuska 			ip += LZ4_NbCommonBytes(diff);
572e92ffd9bSMartin Matuska 			goto _endCount;
573e92ffd9bSMartin Matuska 		}
574e92ffd9bSMartin Matuska #if LZ4_ARCH64
575e92ffd9bSMartin Matuska 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
576e92ffd9bSMartin Matuska 			ip += 4;
577e92ffd9bSMartin Matuska 			ref += 4;
578e92ffd9bSMartin Matuska 		}
579e92ffd9bSMartin Matuska #endif
580e92ffd9bSMartin Matuska 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
581e92ffd9bSMartin Matuska 			ip += 2;
582e92ffd9bSMartin Matuska 			ref += 2;
583e92ffd9bSMartin Matuska 		}
584e92ffd9bSMartin Matuska 		if ((ip < matchlimit) && (*ref == *ip))
585e92ffd9bSMartin Matuska 			ip++;
586e92ffd9bSMartin Matuska 		_endCount:
587e92ffd9bSMartin Matuska 
588e92ffd9bSMartin Matuska 		/* Encode MatchLength */
589e92ffd9bSMartin Matuska 		len = (ip - anchor);
590e92ffd9bSMartin Matuska 		/* Check output limit */
591e92ffd9bSMartin Matuska 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
592e92ffd9bSMartin Matuska 			return (0);
593e92ffd9bSMartin Matuska 		if (len >= (int)ML_MASK) {
594e92ffd9bSMartin Matuska 			*token += ML_MASK;
595e92ffd9bSMartin Matuska 			len -= ML_MASK;
596e92ffd9bSMartin Matuska 			for (; len > 509; len -= 510) {
597e92ffd9bSMartin Matuska 				*op++ = 255;
598e92ffd9bSMartin Matuska 				*op++ = 255;
599e92ffd9bSMartin Matuska 			}
600e92ffd9bSMartin Matuska 			if (len > 254) {
601e92ffd9bSMartin Matuska 				len -= 255;
602e92ffd9bSMartin Matuska 				*op++ = 255;
603e92ffd9bSMartin Matuska 			}
604e92ffd9bSMartin Matuska 			*op++ = (BYTE)len;
605e92ffd9bSMartin Matuska 		} else
606e92ffd9bSMartin Matuska 			*token += len;
607e92ffd9bSMartin Matuska 
608e92ffd9bSMartin Matuska 		/* Test end of chunk */
609e92ffd9bSMartin Matuska 		if (ip > mflimit) {
610e92ffd9bSMartin Matuska 			anchor = ip;
611e92ffd9bSMartin Matuska 			break;
612e92ffd9bSMartin Matuska 		}
613e92ffd9bSMartin Matuska 		/* Fill table */
614e92ffd9bSMartin Matuska 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
615e92ffd9bSMartin Matuska 
616e92ffd9bSMartin Matuska 		/* Test next position */
617e92ffd9bSMartin Matuska 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
618e92ffd9bSMartin Matuska 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
619e92ffd9bSMartin Matuska 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
620e92ffd9bSMartin Matuska 			token = op++;
621e92ffd9bSMartin Matuska 			*token = 0;
622e92ffd9bSMartin Matuska 			goto _next_match;
623e92ffd9bSMartin Matuska 		}
624e92ffd9bSMartin Matuska 		/* Prepare next loop */
625e92ffd9bSMartin Matuska 		anchor = ip++;
626e92ffd9bSMartin Matuska 		forwardH = LZ4_HASH_VALUE(ip);
627e92ffd9bSMartin Matuska 	}
628e92ffd9bSMartin Matuska 
629e92ffd9bSMartin Matuska 	_last_literals:
630e92ffd9bSMartin Matuska 	/* Encode Last Literals */
631e92ffd9bSMartin Matuska 	{
632e92ffd9bSMartin Matuska 		int lastRun = iend - anchor;
633e92ffd9bSMartin Matuska 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
634e92ffd9bSMartin Matuska 		    oend)
635e92ffd9bSMartin Matuska 			return (0);
636e92ffd9bSMartin Matuska 		if (lastRun >= (int)RUN_MASK) {
637e92ffd9bSMartin Matuska 			*op++ = (RUN_MASK << ML_BITS);
638e92ffd9bSMartin Matuska 			lastRun -= RUN_MASK;
639e92ffd9bSMartin Matuska 			for (; lastRun > 254; lastRun -= 255) {
640e92ffd9bSMartin Matuska 				*op++ = 255;
641e92ffd9bSMartin Matuska 			}
642e92ffd9bSMartin Matuska 			*op++ = (BYTE)lastRun;
643e92ffd9bSMartin Matuska 		} else
644e92ffd9bSMartin Matuska 			*op++ = (lastRun << ML_BITS);
645e92ffd9bSMartin Matuska 		(void) memcpy(op, anchor, iend - anchor);
646e92ffd9bSMartin Matuska 		op += iend - anchor;
647e92ffd9bSMartin Matuska 	}
648e92ffd9bSMartin Matuska 
649e92ffd9bSMartin Matuska 	/* End */
650e92ffd9bSMartin Matuska 	return (int)(((char *)op) - dest);
651e92ffd9bSMartin Matuska }
652e92ffd9bSMartin Matuska 
653e92ffd9bSMartin Matuska 
654e92ffd9bSMartin Matuska 
655e92ffd9bSMartin Matuska /* Note : this function is valid only if isize < LZ4_64KLIMIT */
656e92ffd9bSMartin Matuska #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
657e92ffd9bSMartin Matuska #define	HASHLOG64K (HASH_LOG + 1)
658e92ffd9bSMartin Matuska #define	HASH64KTABLESIZE (1U << HASHLOG64K)
659e92ffd9bSMartin Matuska #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
660e92ffd9bSMartin Matuska 	HASHLOG64K))
661e92ffd9bSMartin Matuska #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
662e92ffd9bSMartin Matuska 
663e92ffd9bSMartin Matuska static int
664e92ffd9bSMartin Matuska LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
665e92ffd9bSMartin Matuska     int osize)
666e92ffd9bSMartin Matuska {
667e92ffd9bSMartin Matuska 	struct refTables *srt = (struct refTables *)ctx;
668e92ffd9bSMartin Matuska 	U16 *HashTable = (U16 *) (srt->hashTable);
669e92ffd9bSMartin Matuska 
670e92ffd9bSMartin Matuska 	const BYTE *ip = (BYTE *) source;
671e92ffd9bSMartin Matuska 	const BYTE *anchor = ip;
672e92ffd9bSMartin Matuska 	const BYTE *const base = ip;
673e92ffd9bSMartin Matuska 	const BYTE *const iend = ip + isize;
674e92ffd9bSMartin Matuska 	const BYTE *const oend = (BYTE *) dest + osize;
675e92ffd9bSMartin Matuska 	const BYTE *const mflimit = iend - MFLIMIT;
676e92ffd9bSMartin Matuska #define	matchlimit (iend - LASTLITERALS)
677e92ffd9bSMartin Matuska 
678e92ffd9bSMartin Matuska 	BYTE *op = (BYTE *) dest;
679e92ffd9bSMartin Matuska 
680e92ffd9bSMartin Matuska 	int len, length;
681e92ffd9bSMartin Matuska 	const int skipStrength = SKIPSTRENGTH;
682e92ffd9bSMartin Matuska 	U32 forwardH;
683e92ffd9bSMartin Matuska 
684e92ffd9bSMartin Matuska 	/* Init */
685e92ffd9bSMartin Matuska 	if (isize < MINLENGTH)
686e92ffd9bSMartin Matuska 		goto _last_literals;
687e92ffd9bSMartin Matuska 
688e92ffd9bSMartin Matuska 	/* First Byte */
689e92ffd9bSMartin Matuska 	ip++;
690e92ffd9bSMartin Matuska 	forwardH = LZ4_HASH64K_VALUE(ip);
691e92ffd9bSMartin Matuska 
692e92ffd9bSMartin Matuska 	/* Main Loop */
693e92ffd9bSMartin Matuska 	for (;;) {
694e92ffd9bSMartin Matuska 		int findMatchAttempts = (1U << skipStrength) + 3;
695e92ffd9bSMartin Matuska 		const BYTE *forwardIp = ip;
696e92ffd9bSMartin Matuska 		const BYTE *ref;
697e92ffd9bSMartin Matuska 		BYTE *token;
698e92ffd9bSMartin Matuska 
699e92ffd9bSMartin Matuska 		/* Find a match */
700e92ffd9bSMartin Matuska 		do {
701e92ffd9bSMartin Matuska 			U32 h = forwardH;
702e92ffd9bSMartin Matuska 			int step = findMatchAttempts++ >> skipStrength;
703e92ffd9bSMartin Matuska 			ip = forwardIp;
704e92ffd9bSMartin Matuska 			forwardIp = ip + step;
705e92ffd9bSMartin Matuska 
706e92ffd9bSMartin Matuska 			if (forwardIp > mflimit) {
707e92ffd9bSMartin Matuska 				goto _last_literals;
708e92ffd9bSMartin Matuska 			}
709e92ffd9bSMartin Matuska 
710e92ffd9bSMartin Matuska 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
711e92ffd9bSMartin Matuska 			ref = base + HashTable[h];
712e92ffd9bSMartin Matuska 			HashTable[h] = ip - base;
713e92ffd9bSMartin Matuska 
714e92ffd9bSMartin Matuska 		} while (A32(ref) != A32(ip));
715e92ffd9bSMartin Matuska 
716e92ffd9bSMartin Matuska 		/* Catch up */
717e92ffd9bSMartin Matuska 		while ((ip > anchor) && (ref > (BYTE *) source) &&
718e92ffd9bSMartin Matuska 		    (ip[-1] == ref[-1])) {
719e92ffd9bSMartin Matuska 			ip--;
720e92ffd9bSMartin Matuska 			ref--;
721e92ffd9bSMartin Matuska 		}
722e92ffd9bSMartin Matuska 
723e92ffd9bSMartin Matuska 		/* Encode Literal length */
724e92ffd9bSMartin Matuska 		length = ip - anchor;
725e92ffd9bSMartin Matuska 		token = op++;
726e92ffd9bSMartin Matuska 
727e92ffd9bSMartin Matuska 		/* Check output limit */
728e92ffd9bSMartin Matuska 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
729e92ffd9bSMartin Matuska 		    (length >> 8) > oend))
730e92ffd9bSMartin Matuska 			return (0);
731e92ffd9bSMartin Matuska 
732e92ffd9bSMartin Matuska 		if (length >= (int)RUN_MASK) {
733e92ffd9bSMartin Matuska 			*token = (RUN_MASK << ML_BITS);
734e92ffd9bSMartin Matuska 			len = length - RUN_MASK;
735e92ffd9bSMartin Matuska 			for (; len > 254; len -= 255)
736e92ffd9bSMartin Matuska 				*op++ = 255;
737e92ffd9bSMartin Matuska 			*op++ = (BYTE)len;
738e92ffd9bSMartin Matuska 		} else
739e92ffd9bSMartin Matuska 			*token = (length << ML_BITS);
740e92ffd9bSMartin Matuska 
741e92ffd9bSMartin Matuska 		/* Copy Literals */
742e92ffd9bSMartin Matuska 		LZ4_BLINDCOPY(anchor, op, length);
743e92ffd9bSMartin Matuska 
744e92ffd9bSMartin Matuska 		_next_match:
745e92ffd9bSMartin Matuska 		/* Encode Offset */
746e92ffd9bSMartin Matuska 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
747e92ffd9bSMartin Matuska 
748e92ffd9bSMartin Matuska 		/* Start Counting */
749e92ffd9bSMartin Matuska 		ip += MINMATCH;
750e92ffd9bSMartin Matuska 		ref += MINMATCH;	/* MinMatch verified */
751e92ffd9bSMartin Matuska 		anchor = ip;
752e92ffd9bSMartin Matuska 		while (ip < matchlimit - (STEPSIZE - 1)) {
753e92ffd9bSMartin Matuska 			UARCH diff = AARCH(ref) ^ AARCH(ip);
754e92ffd9bSMartin Matuska 			if (!diff) {
755e92ffd9bSMartin Matuska 				ip += STEPSIZE;
756e92ffd9bSMartin Matuska 				ref += STEPSIZE;
757e92ffd9bSMartin Matuska 				continue;
758e92ffd9bSMartin Matuska 			}
759e92ffd9bSMartin Matuska 			ip += LZ4_NbCommonBytes(diff);
760e92ffd9bSMartin Matuska 			goto _endCount;
761e92ffd9bSMartin Matuska 		}
762e92ffd9bSMartin Matuska #if LZ4_ARCH64
763e92ffd9bSMartin Matuska 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
764e92ffd9bSMartin Matuska 			ip += 4;
765e92ffd9bSMartin Matuska 			ref += 4;
766e92ffd9bSMartin Matuska 		}
767e92ffd9bSMartin Matuska #endif
768e92ffd9bSMartin Matuska 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
769e92ffd9bSMartin Matuska 			ip += 2;
770e92ffd9bSMartin Matuska 			ref += 2;
771e92ffd9bSMartin Matuska 		}
772e92ffd9bSMartin Matuska 		if ((ip < matchlimit) && (*ref == *ip))
773e92ffd9bSMartin Matuska 			ip++;
774e92ffd9bSMartin Matuska 		_endCount:
775e92ffd9bSMartin Matuska 
776e92ffd9bSMartin Matuska 		/* Encode MatchLength */
777e92ffd9bSMartin Matuska 		len = (ip - anchor);
778e92ffd9bSMartin Matuska 		/* Check output limit */
779e92ffd9bSMartin Matuska 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
780e92ffd9bSMartin Matuska 			return (0);
781e92ffd9bSMartin Matuska 		if (len >= (int)ML_MASK) {
782e92ffd9bSMartin Matuska 			*token += ML_MASK;
783e92ffd9bSMartin Matuska 			len -= ML_MASK;
784e92ffd9bSMartin Matuska 			for (; len > 509; len -= 510) {
785e92ffd9bSMartin Matuska 				*op++ = 255;
786e92ffd9bSMartin Matuska 				*op++ = 255;
787e92ffd9bSMartin Matuska 			}
788e92ffd9bSMartin Matuska 			if (len > 254) {
789e92ffd9bSMartin Matuska 				len -= 255;
790e92ffd9bSMartin Matuska 				*op++ = 255;
791e92ffd9bSMartin Matuska 			}
792e92ffd9bSMartin Matuska 			*op++ = (BYTE)len;
793e92ffd9bSMartin Matuska 		} else
794e92ffd9bSMartin Matuska 			*token += len;
795e92ffd9bSMartin Matuska 
796e92ffd9bSMartin Matuska 		/* Test end of chunk */
797e92ffd9bSMartin Matuska 		if (ip > mflimit) {
798e92ffd9bSMartin Matuska 			anchor = ip;
799e92ffd9bSMartin Matuska 			break;
800e92ffd9bSMartin Matuska 		}
801e92ffd9bSMartin Matuska 		/* Fill table */
802e92ffd9bSMartin Matuska 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
803e92ffd9bSMartin Matuska 
804e92ffd9bSMartin Matuska 		/* Test next position */
805e92ffd9bSMartin Matuska 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
806e92ffd9bSMartin Matuska 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
807e92ffd9bSMartin Matuska 		if (A32(ref) == A32(ip)) {
808e92ffd9bSMartin Matuska 			token = op++;
809e92ffd9bSMartin Matuska 			*token = 0;
810e92ffd9bSMartin Matuska 			goto _next_match;
811e92ffd9bSMartin Matuska 		}
812e92ffd9bSMartin Matuska 		/* Prepare next loop */
813e92ffd9bSMartin Matuska 		anchor = ip++;
814e92ffd9bSMartin Matuska 		forwardH = LZ4_HASH64K_VALUE(ip);
815e92ffd9bSMartin Matuska 	}
816e92ffd9bSMartin Matuska 
817e92ffd9bSMartin Matuska 	_last_literals:
818e92ffd9bSMartin Matuska 	/* Encode Last Literals */
819e92ffd9bSMartin Matuska 	{
820e92ffd9bSMartin Matuska 		int lastRun = iend - anchor;
821e92ffd9bSMartin Matuska 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
822e92ffd9bSMartin Matuska 		    oend)
823e92ffd9bSMartin Matuska 			return (0);
824e92ffd9bSMartin Matuska 		if (lastRun >= (int)RUN_MASK) {
825e92ffd9bSMartin Matuska 			*op++ = (RUN_MASK << ML_BITS);
826e92ffd9bSMartin Matuska 			lastRun -= RUN_MASK;
827e92ffd9bSMartin Matuska 			for (; lastRun > 254; lastRun -= 255)
828e92ffd9bSMartin Matuska 				*op++ = 255;
829e92ffd9bSMartin Matuska 			*op++ = (BYTE)lastRun;
830e92ffd9bSMartin Matuska 		} else
831e92ffd9bSMartin Matuska 			*op++ = (lastRun << ML_BITS);
832e92ffd9bSMartin Matuska 		(void) memcpy(op, anchor, iend - anchor);
833e92ffd9bSMartin Matuska 		op += iend - anchor;
834e92ffd9bSMartin Matuska 	}
835e92ffd9bSMartin Matuska 
836e92ffd9bSMartin Matuska 	/* End */
837e92ffd9bSMartin Matuska 	return (int)(((char *)op) - dest);
838e92ffd9bSMartin Matuska }
839e92ffd9bSMartin Matuska 
840e92ffd9bSMartin Matuska static int
841e92ffd9bSMartin Matuska real_LZ4_compress(const char *source, char *dest, int isize, int osize)
842e92ffd9bSMartin Matuska {
843e92ffd9bSMartin Matuska 	void *ctx;
844e92ffd9bSMartin Matuska 	int result;
845e92ffd9bSMartin Matuska 
846e92ffd9bSMartin Matuska 	ctx = lz4_alloc(KM_SLEEP);
847e92ffd9bSMartin Matuska 
848e92ffd9bSMartin Matuska 	/*
849e92ffd9bSMartin Matuska 	 * out of kernel memory, gently fall through - this will disable
850e92ffd9bSMartin Matuska 	 * compression in zio_compress_data
851e92ffd9bSMartin Matuska 	 */
852e92ffd9bSMartin Matuska 	if (ctx == NULL)
853e92ffd9bSMartin Matuska 		return (0);
854e92ffd9bSMartin Matuska 
855e92ffd9bSMartin Matuska 	memset(ctx, 0, sizeof (struct refTables));
856e92ffd9bSMartin Matuska 
857e92ffd9bSMartin Matuska 	if (isize < LZ4_64KLIMIT)
858e92ffd9bSMartin Matuska 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
859e92ffd9bSMartin Matuska 	else
860e92ffd9bSMartin Matuska 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
861e92ffd9bSMartin Matuska 
862e92ffd9bSMartin Matuska 	lz4_free(ctx);
863e92ffd9bSMartin Matuska 	return (result);
864e92ffd9bSMartin Matuska }
865e92ffd9bSMartin Matuska 
866e92ffd9bSMartin Matuska #ifdef __FreeBSD__
867e92ffd9bSMartin Matuska /*
868e92ffd9bSMartin Matuska  * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
869e92ffd9bSMartin Matuska  * Should struct refTables get resized this may need to be revisited, hence
870e92ffd9bSMartin Matuska  * compiler-time asserts.
871e92ffd9bSMartin Matuska  */
872e92ffd9bSMartin Matuska _Static_assert(sizeof(struct refTables) <= 16384,
873e92ffd9bSMartin Matuska     "refTables too big for malloc");
874e92ffd9bSMartin Matuska _Static_assert((sizeof(struct refTables) % 4096) == 0,
875e92ffd9bSMartin Matuska     "refTables not a multiple of page size");
876e92ffd9bSMartin Matuska #else
877e92ffd9bSMartin Matuska #define ZFS_LZ4_USE_CACHE
878e92ffd9bSMartin Matuska #endif
879e92ffd9bSMartin Matuska 
880e92ffd9bSMartin Matuska #ifdef ZFS_LZ4_USE_CACHE
881e92ffd9bSMartin Matuska static kmem_cache_t *lz4_cache;
882e92ffd9bSMartin Matuska #endif
883e92ffd9bSMartin Matuska 
884e92ffd9bSMartin Matuska #ifdef ZFS_LZ4_USE_CACHE
885e92ffd9bSMartin Matuska void
886e92ffd9bSMartin Matuska lz4_init(void)
887e92ffd9bSMartin Matuska {
888e92ffd9bSMartin Matuska 	lz4_cache = kmem_cache_create("lz4_cache",
889*ce4dcb97SMartin Matuska 	    sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL,
890*ce4dcb97SMartin Matuska 	    KMC_RECLAIMABLE);
891e92ffd9bSMartin Matuska }
892e92ffd9bSMartin Matuska 
893e92ffd9bSMartin Matuska void
894e92ffd9bSMartin Matuska lz4_fini(void)
895e92ffd9bSMartin Matuska {
896e92ffd9bSMartin Matuska 	if (lz4_cache) {
897e92ffd9bSMartin Matuska 		kmem_cache_destroy(lz4_cache);
898e92ffd9bSMartin Matuska 		lz4_cache = NULL;
899e92ffd9bSMartin Matuska 	}
900e92ffd9bSMartin Matuska }
901e92ffd9bSMartin Matuska 
902e92ffd9bSMartin Matuska static void *
903e92ffd9bSMartin Matuska lz4_alloc(int flags)
904e92ffd9bSMartin Matuska {
905e92ffd9bSMartin Matuska 	ASSERT(lz4_cache != NULL);
906e92ffd9bSMartin Matuska 	return (kmem_cache_alloc(lz4_cache, flags));
907e92ffd9bSMartin Matuska }
908e92ffd9bSMartin Matuska 
909e92ffd9bSMartin Matuska static void
910e92ffd9bSMartin Matuska lz4_free(void *ctx)
911e92ffd9bSMartin Matuska {
912e92ffd9bSMartin Matuska 	kmem_cache_free(lz4_cache, ctx);
913e92ffd9bSMartin Matuska }
914e92ffd9bSMartin Matuska #else
915e92ffd9bSMartin Matuska void
916e92ffd9bSMartin Matuska lz4_init(void)
917e92ffd9bSMartin Matuska {
918e92ffd9bSMartin Matuska }
919e92ffd9bSMartin Matuska 
920e92ffd9bSMartin Matuska void
921e92ffd9bSMartin Matuska lz4_fini(void)
922e92ffd9bSMartin Matuska {
923e92ffd9bSMartin Matuska }
924e92ffd9bSMartin Matuska 
925e92ffd9bSMartin Matuska static void *
926e92ffd9bSMartin Matuska lz4_alloc(int flags)
927e92ffd9bSMartin Matuska {
928e92ffd9bSMartin Matuska 	return (kmem_alloc(sizeof (struct refTables), flags));
929e92ffd9bSMartin Matuska }
930e92ffd9bSMartin Matuska 
931e92ffd9bSMartin Matuska static void
932e92ffd9bSMartin Matuska lz4_free(void *ctx)
933e92ffd9bSMartin Matuska {
934e92ffd9bSMartin Matuska 	kmem_free(ctx, sizeof (struct refTables));
935e92ffd9bSMartin Matuska }
936e92ffd9bSMartin Matuska #endif
937