1*61145dc2SMartin Matuska // SPDX-License-Identifier: BSD-2-Clause
2e92ffd9bSMartin Matuska /*
3e92ffd9bSMartin Matuska * LZ4 - Fast LZ compression algorithm
4e92ffd9bSMartin Matuska * Header File
5e92ffd9bSMartin Matuska * Copyright (C) 2011-2013, Yann Collet.
6e92ffd9bSMartin Matuska * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7e92ffd9bSMartin Matuska *
8e92ffd9bSMartin Matuska * Redistribution and use in source and binary forms, with or without
9e92ffd9bSMartin Matuska * modification, are permitted provided that the following conditions are
10e92ffd9bSMartin Matuska * met:
11e92ffd9bSMartin Matuska *
12e92ffd9bSMartin Matuska * * Redistributions of source code must retain the above copyright
13e92ffd9bSMartin Matuska * notice, this list of conditions and the following disclaimer.
14e92ffd9bSMartin Matuska * * Redistributions in binary form must reproduce the above
15e92ffd9bSMartin Matuska * copyright notice, this list of conditions and the following disclaimer
16e92ffd9bSMartin Matuska * in the documentation and/or other materials provided with the
17e92ffd9bSMartin Matuska * distribution.
18e92ffd9bSMartin Matuska *
19e92ffd9bSMartin Matuska * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20e92ffd9bSMartin Matuska * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21e92ffd9bSMartin Matuska * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22e92ffd9bSMartin Matuska * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23e92ffd9bSMartin Matuska * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24e92ffd9bSMartin Matuska * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25e92ffd9bSMartin Matuska * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26e92ffd9bSMartin Matuska * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27e92ffd9bSMartin Matuska * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28e92ffd9bSMartin Matuska * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29e92ffd9bSMartin Matuska * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30e92ffd9bSMartin Matuska *
31e92ffd9bSMartin Matuska * You can contact the author at :
32e92ffd9bSMartin Matuska * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
33e92ffd9bSMartin Matuska * - LZ4 source repository : http://code.google.com/p/lz4/
34e92ffd9bSMartin Matuska */
35e92ffd9bSMartin Matuska
36e92ffd9bSMartin Matuska /*
37e92ffd9bSMartin Matuska * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012
38e92ffd9bSMartin Matuska */
39e92ffd9bSMartin Matuska
40e92ffd9bSMartin Matuska #include <sys/zfs_context.h>
41e92ffd9bSMartin Matuska #include <sys/zio_compress.h>
42e92ffd9bSMartin Matuska
43e92ffd9bSMartin Matuska static int real_LZ4_compress(const char *source, char *dest, int isize,
44e92ffd9bSMartin Matuska int osize);
45e92ffd9bSMartin Matuska static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
46e92ffd9bSMartin Matuska int isize, int osize);
47e92ffd9bSMartin Matuska static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
48e92ffd9bSMartin Matuska int isize, int osize);
49e92ffd9bSMartin Matuska
50e92ffd9bSMartin Matuska /* See lz4.c */
51e92ffd9bSMartin Matuska int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
52e92ffd9bSMartin Matuska int isize, int maxOutputSize);
53e92ffd9bSMartin Matuska
54e92ffd9bSMartin Matuska static void *lz4_alloc(int flags);
55e92ffd9bSMartin Matuska static void lz4_free(void *ctx);
56e92ffd9bSMartin Matuska
57e2df9bb4SMartin Matuska static size_t
zfs_lz4_compress_buf(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)58e2df9bb4SMartin Matuska zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
59e92ffd9bSMartin Matuska size_t d_len, int n)
60e92ffd9bSMartin Matuska {
61e92ffd9bSMartin Matuska (void) n;
62e92ffd9bSMartin Matuska uint32_t bufsiz;
63e92ffd9bSMartin Matuska char *dest = d_start;
64e92ffd9bSMartin Matuska
65e92ffd9bSMartin Matuska ASSERT(d_len >= sizeof (bufsiz));
66e92ffd9bSMartin Matuska
67e92ffd9bSMartin Matuska bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
68e92ffd9bSMartin Matuska d_len - sizeof (bufsiz));
69e92ffd9bSMartin Matuska
70e92ffd9bSMartin Matuska /* Signal an error if the compression routine returned zero. */
71e92ffd9bSMartin Matuska if (bufsiz == 0)
72e92ffd9bSMartin Matuska return (s_len);
73e92ffd9bSMartin Matuska
74e92ffd9bSMartin Matuska /*
75e92ffd9bSMartin Matuska * The exact compressed size is needed by the decompression routine,
76e92ffd9bSMartin Matuska * so it is stored at the start of the buffer. Note that this may be
77e92ffd9bSMartin Matuska * less than the compressed block size, which is rounded up to a
78e92ffd9bSMartin Matuska * multiple of 1<<ashift.
79e92ffd9bSMartin Matuska */
80e92ffd9bSMartin Matuska *(uint32_t *)dest = BE_32(bufsiz);
81e92ffd9bSMartin Matuska
82e92ffd9bSMartin Matuska return (bufsiz + sizeof (bufsiz));
83e92ffd9bSMartin Matuska }
84e92ffd9bSMartin Matuska
85e2df9bb4SMartin Matuska static int
zfs_lz4_decompress_buf(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)86e2df9bb4SMartin Matuska zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
87e92ffd9bSMartin Matuska size_t d_len, int n)
88e92ffd9bSMartin Matuska {
89e92ffd9bSMartin Matuska (void) n;
90e92ffd9bSMartin Matuska const char *src = s_start;
91e92ffd9bSMartin Matuska uint32_t bufsiz = BE_IN32(src);
92e92ffd9bSMartin Matuska
93e92ffd9bSMartin Matuska /* invalid compressed buffer size encoded at start */
94e92ffd9bSMartin Matuska if (bufsiz + sizeof (bufsiz) > s_len)
95e92ffd9bSMartin Matuska return (1);
96e92ffd9bSMartin Matuska
97e92ffd9bSMartin Matuska /*
98e92ffd9bSMartin Matuska * Returns 0 on success (decompression function returned non-negative)
99e92ffd9bSMartin Matuska * and non-zero on failure (decompression function returned negative).
100e92ffd9bSMartin Matuska */
101e92ffd9bSMartin Matuska return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
102e92ffd9bSMartin Matuska d_start, bufsiz, d_len) < 0);
103e92ffd9bSMartin Matuska }
104e92ffd9bSMartin Matuska
105e2df9bb4SMartin Matuska ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)
106e2df9bb4SMartin Matuska ZFS_DECOMPRESS_WRAP_DECL(zfs_lz4_decompress)
107e2df9bb4SMartin Matuska
108e92ffd9bSMartin Matuska /*
109e92ffd9bSMartin Matuska * LZ4 API Description:
110e92ffd9bSMartin Matuska *
111e92ffd9bSMartin Matuska * Simple Functions:
112e92ffd9bSMartin Matuska * real_LZ4_compress() :
113e92ffd9bSMartin Matuska * isize : is the input size. Max supported value is ~1.9GB
114e92ffd9bSMartin Matuska * return : the number of bytes written in buffer dest
115e92ffd9bSMartin Matuska * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
116e92ffd9bSMartin Matuska * note : destination buffer must be already allocated.
117e92ffd9bSMartin Matuska * destination buffer must be sized to handle worst cases
118e92ffd9bSMartin Matuska * situations (input data not compressible) worst case size
119e92ffd9bSMartin Matuska * evaluation is provided by function LZ4_compressBound().
120e92ffd9bSMartin Matuska *
121e92ffd9bSMartin Matuska * real_LZ4_uncompress() :
122e92ffd9bSMartin Matuska * osize : is the output size, therefore the original size
123e92ffd9bSMartin Matuska * return : the number of bytes read in the source buffer.
124e92ffd9bSMartin Matuska * If the source stream is malformed, the function will stop
125e92ffd9bSMartin Matuska * decoding and return a negative result, indicating the byte
126e92ffd9bSMartin Matuska * position of the faulty instruction. This function never
127e92ffd9bSMartin Matuska * writes beyond dest + osize, and is therefore protected
128e92ffd9bSMartin Matuska * against malicious data packets.
129e92ffd9bSMartin Matuska * note : destination buffer must be already allocated
130e92ffd9bSMartin Matuska * note : real_LZ4_uncompress() is not used in ZFS so its code
131e92ffd9bSMartin Matuska * is not present here.
132e92ffd9bSMartin Matuska *
133e92ffd9bSMartin Matuska * Advanced Functions
134e92ffd9bSMartin Matuska *
135e92ffd9bSMartin Matuska * LZ4_compressBound() :
136e92ffd9bSMartin Matuska * Provides the maximum size that LZ4 may output in a "worst case"
137e92ffd9bSMartin Matuska * scenario (input data not compressible) primarily useful for memory
138e92ffd9bSMartin Matuska * allocation of output buffer.
139e92ffd9bSMartin Matuska *
140e92ffd9bSMartin Matuska * isize : is the input size. Max supported value is ~1.9GB
141e92ffd9bSMartin Matuska * return : maximum output size in a "worst case" scenario
142e92ffd9bSMartin Matuska * note : this function is limited by "int" range (2^31-1)
143e92ffd9bSMartin Matuska *
144e92ffd9bSMartin Matuska * LZ4_uncompress_unknownOutputSize() :
145e92ffd9bSMartin Matuska * isize : is the input size, therefore the compressed size
146e92ffd9bSMartin Matuska * maxOutputSize : is the size of the destination buffer (which must be
147e92ffd9bSMartin Matuska * already allocated)
148e92ffd9bSMartin Matuska * return : the number of bytes decoded in the destination buffer
149e92ffd9bSMartin Matuska * (necessarily <= maxOutputSize). If the source stream is
150e92ffd9bSMartin Matuska * malformed, the function will stop decoding and return a
151e92ffd9bSMartin Matuska * negative result, indicating the byte position of the faulty
152e92ffd9bSMartin Matuska * instruction. This function never writes beyond dest +
153e92ffd9bSMartin Matuska * maxOutputSize, and is therefore protected against malicious
154e92ffd9bSMartin Matuska * data packets.
155e92ffd9bSMartin Matuska * note : Destination buffer must be already allocated.
156e92ffd9bSMartin Matuska * This version is slightly slower than real_LZ4_uncompress()
157e92ffd9bSMartin Matuska *
158e92ffd9bSMartin Matuska * LZ4_compressCtx() :
159e92ffd9bSMartin Matuska * This function explicitly handles the CTX memory structure.
160e92ffd9bSMartin Matuska *
161e92ffd9bSMartin Matuska * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
162e92ffd9bSMartin Matuska * by the caller (either on the stack or using kmem_cache_alloc). Passing
163e92ffd9bSMartin Matuska * NULL isn't valid.
164e92ffd9bSMartin Matuska *
165e92ffd9bSMartin Matuska * LZ4_compress64kCtx() :
166e92ffd9bSMartin Matuska * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
167e92ffd9bSMartin Matuska * isize *Must* be <64KB, otherwise the output will be corrupted.
168e92ffd9bSMartin Matuska *
169e92ffd9bSMartin Matuska * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
170e92ffd9bSMartin Matuska * by the caller (either on the stack or using kmem_cache_alloc). Passing
171e92ffd9bSMartin Matuska * NULL isn't valid.
172e92ffd9bSMartin Matuska */
173e92ffd9bSMartin Matuska
174e92ffd9bSMartin Matuska /*
175e92ffd9bSMartin Matuska * Tuning parameters
176e92ffd9bSMartin Matuska */
177e92ffd9bSMartin Matuska
178e92ffd9bSMartin Matuska /*
179e92ffd9bSMartin Matuska * COMPRESSIONLEVEL: Increasing this value improves compression ratio
180e92ffd9bSMartin Matuska * Lowering this value reduces memory usage. Reduced memory usage
181e92ffd9bSMartin Matuska * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
182e92ffd9bSMartin Matuska * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
183e92ffd9bSMartin Matuska * (examples : 12 -> 16KB ; 17 -> 512KB)
184e92ffd9bSMartin Matuska */
185e92ffd9bSMartin Matuska #define COMPRESSIONLEVEL 12
186e92ffd9bSMartin Matuska
187e92ffd9bSMartin Matuska /*
188e92ffd9bSMartin Matuska * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
189e92ffd9bSMartin Matuska * algorithm skip faster data segments considered "incompressible".
190e92ffd9bSMartin Matuska * This may decrease compression ratio dramatically, but will be
191e92ffd9bSMartin Matuska * faster on incompressible data. Increasing this value will make
192e92ffd9bSMartin Matuska * the algorithm search more before declaring a segment "incompressible".
193e92ffd9bSMartin Matuska * This could improve compression a bit, but will be slower on
194e92ffd9bSMartin Matuska * incompressible data. The default value (6) is recommended.
195e92ffd9bSMartin Matuska */
196e92ffd9bSMartin Matuska #define NOTCOMPRESSIBLE_CONFIRMATION 6
197e92ffd9bSMartin Matuska
198e92ffd9bSMartin Matuska /*
199e92ffd9bSMartin Matuska * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
200e92ffd9bSMartin Matuska * performance for big endian cpu, but the resulting compressed stream
201e92ffd9bSMartin Matuska * will be incompatible with little-endian CPU. You can set this option
202e92ffd9bSMartin Matuska * to 1 in situations where data will stay within closed environment.
203e92ffd9bSMartin Matuska * This option is useless on Little_Endian CPU (such as x86).
204e92ffd9bSMartin Matuska */
205e92ffd9bSMartin Matuska /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
206e92ffd9bSMartin Matuska
207e92ffd9bSMartin Matuska /*
208e92ffd9bSMartin Matuska * CPU Feature Detection
209e92ffd9bSMartin Matuska */
210e92ffd9bSMartin Matuska
211e92ffd9bSMartin Matuska /* 32 or 64 bits ? */
212e92ffd9bSMartin Matuska #if defined(_LP64)
213e92ffd9bSMartin Matuska #define LZ4_ARCH64 1
214e92ffd9bSMartin Matuska #else
215e92ffd9bSMartin Matuska #define LZ4_ARCH64 0
216e92ffd9bSMartin Matuska #endif
217e92ffd9bSMartin Matuska
218e92ffd9bSMartin Matuska /*
219e92ffd9bSMartin Matuska * Little Endian or Big Endian?
220e92ffd9bSMartin Matuska * Note: overwrite the below #define if you know your architecture endianness.
221e92ffd9bSMartin Matuska */
222e92ffd9bSMartin Matuska #if defined(_ZFS_BIG_ENDIAN)
223e92ffd9bSMartin Matuska #define LZ4_BIG_ENDIAN 1
224e92ffd9bSMartin Matuska #else
225e92ffd9bSMartin Matuska /*
226e92ffd9bSMartin Matuska * Little Endian assumed. PDP Endian and other very rare endian format
227e92ffd9bSMartin Matuska * are unsupported.
228e92ffd9bSMartin Matuska */
229e92ffd9bSMartin Matuska #undef LZ4_BIG_ENDIAN
230e92ffd9bSMartin Matuska #endif
231e92ffd9bSMartin Matuska
232e92ffd9bSMartin Matuska /*
233e92ffd9bSMartin Matuska * Unaligned memory access is automatically enabled for "common" CPU,
234e92ffd9bSMartin Matuska * such as x86. For others CPU, the compiler will be more cautious, and
235e92ffd9bSMartin Matuska * insert extra code to ensure aligned access is respected. If you know
236e92ffd9bSMartin Matuska * your target CPU supports unaligned memory access, you may want to
237e92ffd9bSMartin Matuska * force this option manually to improve performance
238e92ffd9bSMartin Matuska */
239e92ffd9bSMartin Matuska #if defined(__ARM_FEATURE_UNALIGNED)
240e92ffd9bSMartin Matuska #define LZ4_FORCE_UNALIGNED_ACCESS 1
241e92ffd9bSMartin Matuska #endif
242e92ffd9bSMartin Matuska
243e92ffd9bSMartin Matuska /*
244e92ffd9bSMartin Matuska * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
245e92ffd9bSMartin Matuska * kernel
246e92ffd9bSMartin Matuska * Linux : we can use GCC's __builtin_ctz family of builtins in the
247e92ffd9bSMartin Matuska * kernel
248e92ffd9bSMartin Matuska */
249e92ffd9bSMartin Matuska #undef LZ4_FORCE_SW_BITCOUNT
250e92ffd9bSMartin Matuska #if defined(__sparc)
251e92ffd9bSMartin Matuska #define LZ4_FORCE_SW_BITCOUNT
252e92ffd9bSMartin Matuska #endif
253e92ffd9bSMartin Matuska
254e92ffd9bSMartin Matuska /*
255e92ffd9bSMartin Matuska * Compiler Options
256e92ffd9bSMartin Matuska */
257e92ffd9bSMartin Matuska /* Disable restrict */
258e92ffd9bSMartin Matuska #define restrict
259e92ffd9bSMartin Matuska
260e92ffd9bSMartin Matuska /*
261e92ffd9bSMartin Matuska * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
262e92ffd9bSMartin Matuska * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
263e92ffd9bSMartin Matuska */
264e92ffd9bSMartin Matuska #ifdef GCC_VERSION
265e92ffd9bSMartin Matuska #undef GCC_VERSION
266e92ffd9bSMartin Matuska #endif
267e92ffd9bSMartin Matuska
268e92ffd9bSMartin Matuska #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
269e92ffd9bSMartin Matuska
270e92ffd9bSMartin Matuska #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
271e92ffd9bSMartin Matuska #define expect(expr, value) (__builtin_expect((expr), (value)))
272e92ffd9bSMartin Matuska #else
273e92ffd9bSMartin Matuska #define expect(expr, value) (expr)
274e92ffd9bSMartin Matuska #endif
275e92ffd9bSMartin Matuska
276e92ffd9bSMartin Matuska #ifndef likely
277e92ffd9bSMartin Matuska #define likely(expr) expect((expr) != 0, 1)
278e92ffd9bSMartin Matuska #endif
279e92ffd9bSMartin Matuska
280e92ffd9bSMartin Matuska #ifndef unlikely
281e92ffd9bSMartin Matuska #define unlikely(expr) expect((expr) != 0, 0)
282e92ffd9bSMartin Matuska #endif
283e92ffd9bSMartin Matuska
284e92ffd9bSMartin Matuska #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
285e92ffd9bSMartin Matuska (((x) & 0xffu) << 8)))
286e92ffd9bSMartin Matuska
287e92ffd9bSMartin Matuska /* Basic types */
288e92ffd9bSMartin Matuska #define BYTE uint8_t
289e92ffd9bSMartin Matuska #define U16 uint16_t
290e92ffd9bSMartin Matuska #define U32 uint32_t
291e92ffd9bSMartin Matuska #define S32 int32_t
292e92ffd9bSMartin Matuska #define U64 uint64_t
293e92ffd9bSMartin Matuska
294e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_UNALIGNED_ACCESS
295e92ffd9bSMartin Matuska #pragma pack(1)
296e92ffd9bSMartin Matuska #endif
297e92ffd9bSMartin Matuska
298e92ffd9bSMartin Matuska typedef struct _U16_S {
299e92ffd9bSMartin Matuska U16 v;
300e92ffd9bSMartin Matuska } U16_S;
301e92ffd9bSMartin Matuska typedef struct _U32_S {
302e92ffd9bSMartin Matuska U32 v;
303e92ffd9bSMartin Matuska } U32_S;
304e92ffd9bSMartin Matuska typedef struct _U64_S {
305e92ffd9bSMartin Matuska U64 v;
306e92ffd9bSMartin Matuska } U64_S;
307e92ffd9bSMartin Matuska
308e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_UNALIGNED_ACCESS
309e92ffd9bSMartin Matuska #pragma pack()
310e92ffd9bSMartin Matuska #endif
311e92ffd9bSMartin Matuska
312e92ffd9bSMartin Matuska #define A64(x) (((U64_S *)(x))->v)
313e92ffd9bSMartin Matuska #define A32(x) (((U32_S *)(x))->v)
314e92ffd9bSMartin Matuska #define A16(x) (((U16_S *)(x))->v)
315e92ffd9bSMartin Matuska
316e92ffd9bSMartin Matuska /*
317e92ffd9bSMartin Matuska * Constants
318e92ffd9bSMartin Matuska */
319e92ffd9bSMartin Matuska #define MINMATCH 4
320e92ffd9bSMartin Matuska
321e92ffd9bSMartin Matuska #define HASH_LOG COMPRESSIONLEVEL
322e92ffd9bSMartin Matuska #define HASHTABLESIZE (1 << HASH_LOG)
323e92ffd9bSMartin Matuska #define HASH_MASK (HASHTABLESIZE - 1)
324e92ffd9bSMartin Matuska
325e92ffd9bSMartin Matuska #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
326e92ffd9bSMartin Matuska NOTCOMPRESSIBLE_CONFIRMATION : 2)
327e92ffd9bSMartin Matuska
328e92ffd9bSMartin Matuska #define COPYLENGTH 8
329e92ffd9bSMartin Matuska #define LASTLITERALS 5
330e92ffd9bSMartin Matuska #define MFLIMIT (COPYLENGTH + MINMATCH)
331e92ffd9bSMartin Matuska #define MINLENGTH (MFLIMIT + 1)
332e92ffd9bSMartin Matuska
333e92ffd9bSMartin Matuska #define MAXD_LOG 16
334e92ffd9bSMartin Matuska #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
335e92ffd9bSMartin Matuska
336e92ffd9bSMartin Matuska #define ML_BITS 4
337e92ffd9bSMartin Matuska #define ML_MASK ((1U<<ML_BITS)-1)
338e92ffd9bSMartin Matuska #define RUN_BITS (8-ML_BITS)
339e92ffd9bSMartin Matuska #define RUN_MASK ((1U<<RUN_BITS)-1)
340e92ffd9bSMartin Matuska
341e92ffd9bSMartin Matuska
342e92ffd9bSMartin Matuska /*
343e92ffd9bSMartin Matuska * Architecture-specific macros
344e92ffd9bSMartin Matuska */
345e92ffd9bSMartin Matuska #if LZ4_ARCH64
346e92ffd9bSMartin Matuska #define STEPSIZE 8
347e92ffd9bSMartin Matuska #define UARCH U64
348e92ffd9bSMartin Matuska #define AARCH A64
349e92ffd9bSMartin Matuska #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
350e92ffd9bSMartin Matuska #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
351e92ffd9bSMartin Matuska #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
352e92ffd9bSMartin Matuska #define HTYPE U32
353e92ffd9bSMartin Matuska #define INITBASE(base) const BYTE* const base = ip
354e92ffd9bSMartin Matuska #else /* !LZ4_ARCH64 */
355e92ffd9bSMartin Matuska #define STEPSIZE 4
356e92ffd9bSMartin Matuska #define UARCH U32
357e92ffd9bSMartin Matuska #define AARCH A32
358e92ffd9bSMartin Matuska #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
359e92ffd9bSMartin Matuska #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
360e92ffd9bSMartin Matuska #define LZ4_SECURECOPY LZ4_WILDCOPY
361e92ffd9bSMartin Matuska #define HTYPE const BYTE *
362e92ffd9bSMartin Matuska #define INITBASE(base) const int base = 0
363e92ffd9bSMartin Matuska #endif /* !LZ4_ARCH64 */
364e92ffd9bSMartin Matuska
365e92ffd9bSMartin Matuska #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
366e92ffd9bSMartin Matuska #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
367e92ffd9bSMartin Matuska { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
368e92ffd9bSMartin Matuska #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
369e92ffd9bSMartin Matuska { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
370e92ffd9bSMartin Matuska #else
371e92ffd9bSMartin Matuska #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
372e92ffd9bSMartin Matuska #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
373e92ffd9bSMartin Matuska #endif
374e92ffd9bSMartin Matuska
375e92ffd9bSMartin Matuska
376e92ffd9bSMartin Matuska /* Local structures */
377e92ffd9bSMartin Matuska struct refTables {
378e92ffd9bSMartin Matuska HTYPE hashTable[HASHTABLESIZE];
379e92ffd9bSMartin Matuska };
380e92ffd9bSMartin Matuska
381e92ffd9bSMartin Matuska
382e92ffd9bSMartin Matuska /* Macros */
383e92ffd9bSMartin Matuska #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
384e92ffd9bSMartin Matuska HASH_LOG))
385e92ffd9bSMartin Matuska #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
386e92ffd9bSMartin Matuska #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
387e92ffd9bSMartin Matuska #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
388e92ffd9bSMartin Matuska d = e; }
389e92ffd9bSMartin Matuska
390e92ffd9bSMartin Matuska
391e92ffd9bSMartin Matuska /* Private functions */
392e92ffd9bSMartin Matuska #if LZ4_ARCH64
393e92ffd9bSMartin Matuska
394e92ffd9bSMartin Matuska static inline int
LZ4_NbCommonBytes(register U64 val)395e92ffd9bSMartin Matuska LZ4_NbCommonBytes(register U64 val)
396e92ffd9bSMartin Matuska {
397e92ffd9bSMartin Matuska #if defined(LZ4_BIG_ENDIAN)
398e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
399e92ffd9bSMartin Matuska !defined(LZ4_FORCE_SW_BITCOUNT)
400e92ffd9bSMartin Matuska return (__builtin_clzll(val) >> 3);
401e92ffd9bSMartin Matuska #else
402e92ffd9bSMartin Matuska int r;
403e92ffd9bSMartin Matuska if (!(val >> 32)) {
404e92ffd9bSMartin Matuska r = 4;
405e92ffd9bSMartin Matuska } else {
406e92ffd9bSMartin Matuska r = 0;
407e92ffd9bSMartin Matuska val >>= 32;
408e92ffd9bSMartin Matuska }
409e92ffd9bSMartin Matuska if (!(val >> 16)) {
410e92ffd9bSMartin Matuska r += 2;
411e92ffd9bSMartin Matuska val >>= 8;
412e92ffd9bSMartin Matuska } else {
413e92ffd9bSMartin Matuska val >>= 24;
414e92ffd9bSMartin Matuska }
415e92ffd9bSMartin Matuska r += (!val);
416e92ffd9bSMartin Matuska return (r);
417e92ffd9bSMartin Matuska #endif
418e92ffd9bSMartin Matuska #else
419e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
420e92ffd9bSMartin Matuska !defined(LZ4_FORCE_SW_BITCOUNT)
421e92ffd9bSMartin Matuska return (__builtin_ctzll(val) >> 3);
422e92ffd9bSMartin Matuska #else
423e92ffd9bSMartin Matuska static const int DeBruijnBytePos[64] =
424e92ffd9bSMartin Matuska { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
425e92ffd9bSMartin Matuska 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
426e92ffd9bSMartin Matuska 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
427e92ffd9bSMartin Matuska 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
428e92ffd9bSMartin Matuska };
429e92ffd9bSMartin Matuska return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
430e92ffd9bSMartin Matuska 58];
431e92ffd9bSMartin Matuska #endif
432e92ffd9bSMartin Matuska #endif
433e92ffd9bSMartin Matuska }
434e92ffd9bSMartin Matuska
435e92ffd9bSMartin Matuska #else
436e92ffd9bSMartin Matuska
437e92ffd9bSMartin Matuska static inline int
LZ4_NbCommonBytes(register U32 val)438e92ffd9bSMartin Matuska LZ4_NbCommonBytes(register U32 val)
439e92ffd9bSMartin Matuska {
440e92ffd9bSMartin Matuska #if defined(LZ4_BIG_ENDIAN)
441e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
442e92ffd9bSMartin Matuska !defined(LZ4_FORCE_SW_BITCOUNT)
443e92ffd9bSMartin Matuska return (__builtin_clz(val) >> 3);
444e92ffd9bSMartin Matuska #else
445e92ffd9bSMartin Matuska int r;
446e92ffd9bSMartin Matuska if (!(val >> 16)) {
447e92ffd9bSMartin Matuska r = 2;
448e92ffd9bSMartin Matuska val >>= 8;
449e92ffd9bSMartin Matuska } else {
450e92ffd9bSMartin Matuska r = 0;
451e92ffd9bSMartin Matuska val >>= 24;
452e92ffd9bSMartin Matuska }
453e92ffd9bSMartin Matuska r += (!val);
454e92ffd9bSMartin Matuska return (r);
455e92ffd9bSMartin Matuska #endif
456e92ffd9bSMartin Matuska #else
457e92ffd9bSMartin Matuska #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
458e92ffd9bSMartin Matuska !defined(LZ4_FORCE_SW_BITCOUNT)
459e92ffd9bSMartin Matuska return (__builtin_ctz(val) >> 3);
460e92ffd9bSMartin Matuska #else
461e92ffd9bSMartin Matuska static const int DeBruijnBytePos[32] = {
462e92ffd9bSMartin Matuska 0, 0, 3, 0, 3, 1, 3, 0,
463e92ffd9bSMartin Matuska 3, 2, 2, 1, 3, 2, 0, 1,
464e92ffd9bSMartin Matuska 3, 3, 1, 2, 2, 2, 2, 0,
465e92ffd9bSMartin Matuska 3, 1, 2, 0, 1, 0, 1, 1
466e92ffd9bSMartin Matuska };
467e92ffd9bSMartin Matuska return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
468e92ffd9bSMartin Matuska 27];
469e92ffd9bSMartin Matuska #endif
470e92ffd9bSMartin Matuska #endif
471e92ffd9bSMartin Matuska }
472e92ffd9bSMartin Matuska
473e92ffd9bSMartin Matuska #endif
474e92ffd9bSMartin Matuska
475e92ffd9bSMartin Matuska /* Compression functions */
476e92ffd9bSMartin Matuska
477e92ffd9bSMartin Matuska static int
LZ4_compressCtx(void * ctx,const char * source,char * dest,int isize,int osize)478e92ffd9bSMartin Matuska LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
479e92ffd9bSMartin Matuska int osize)
480e92ffd9bSMartin Matuska {
481e92ffd9bSMartin Matuska struct refTables *srt = (struct refTables *)ctx;
482e92ffd9bSMartin Matuska HTYPE *HashTable = (HTYPE *) (srt->hashTable);
483e92ffd9bSMartin Matuska
484e92ffd9bSMartin Matuska const BYTE *ip = (BYTE *) source;
485e92ffd9bSMartin Matuska INITBASE(base);
486e92ffd9bSMartin Matuska const BYTE *anchor = ip;
487e92ffd9bSMartin Matuska const BYTE *const iend = ip + isize;
488e92ffd9bSMartin Matuska const BYTE *const oend = (BYTE *) dest + osize;
489e92ffd9bSMartin Matuska const BYTE *const mflimit = iend - MFLIMIT;
490e92ffd9bSMartin Matuska #define matchlimit (iend - LASTLITERALS)
491e92ffd9bSMartin Matuska
492e92ffd9bSMartin Matuska BYTE *op = (BYTE *) dest;
493e92ffd9bSMartin Matuska
494e92ffd9bSMartin Matuska int len, length;
495e92ffd9bSMartin Matuska const int skipStrength = SKIPSTRENGTH;
496e92ffd9bSMartin Matuska U32 forwardH;
497e92ffd9bSMartin Matuska
498e92ffd9bSMartin Matuska
499e92ffd9bSMartin Matuska /* Init */
500e92ffd9bSMartin Matuska if (isize < MINLENGTH)
501e92ffd9bSMartin Matuska goto _last_literals;
502e92ffd9bSMartin Matuska
503e92ffd9bSMartin Matuska /* First Byte */
504e92ffd9bSMartin Matuska HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
505e92ffd9bSMartin Matuska ip++;
506e92ffd9bSMartin Matuska forwardH = LZ4_HASH_VALUE(ip);
507e92ffd9bSMartin Matuska
508e92ffd9bSMartin Matuska /* Main Loop */
509e92ffd9bSMartin Matuska for (;;) {
510e92ffd9bSMartin Matuska int findMatchAttempts = (1U << skipStrength) + 3;
511e92ffd9bSMartin Matuska const BYTE *forwardIp = ip;
512e92ffd9bSMartin Matuska const BYTE *ref;
513e92ffd9bSMartin Matuska BYTE *token;
514e92ffd9bSMartin Matuska
515e92ffd9bSMartin Matuska /* Find a match */
516e92ffd9bSMartin Matuska do {
517e92ffd9bSMartin Matuska U32 h = forwardH;
518e92ffd9bSMartin Matuska int step = findMatchAttempts++ >> skipStrength;
519e92ffd9bSMartin Matuska ip = forwardIp;
520e92ffd9bSMartin Matuska forwardIp = ip + step;
521e92ffd9bSMartin Matuska
522e92ffd9bSMartin Matuska if (unlikely(forwardIp > mflimit)) {
523e92ffd9bSMartin Matuska goto _last_literals;
524e92ffd9bSMartin Matuska }
525e92ffd9bSMartin Matuska
526e92ffd9bSMartin Matuska forwardH = LZ4_HASH_VALUE(forwardIp);
527e92ffd9bSMartin Matuska ref = base + HashTable[h];
528e92ffd9bSMartin Matuska HashTable[h] = ip - base;
529e92ffd9bSMartin Matuska
530e92ffd9bSMartin Matuska } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
531e92ffd9bSMartin Matuska
532e92ffd9bSMartin Matuska /* Catch up */
533e92ffd9bSMartin Matuska while ((ip > anchor) && (ref > (BYTE *) source) &&
534e92ffd9bSMartin Matuska unlikely(ip[-1] == ref[-1])) {
535e92ffd9bSMartin Matuska ip--;
536e92ffd9bSMartin Matuska ref--;
537e92ffd9bSMartin Matuska }
538e92ffd9bSMartin Matuska
539e92ffd9bSMartin Matuska /* Encode Literal length */
540e92ffd9bSMartin Matuska length = ip - anchor;
541e92ffd9bSMartin Matuska token = op++;
542e92ffd9bSMartin Matuska
543e92ffd9bSMartin Matuska /* Check output limit */
544e92ffd9bSMartin Matuska if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
545e92ffd9bSMartin Matuska (length >> 8) > oend))
546e92ffd9bSMartin Matuska return (0);
547e92ffd9bSMartin Matuska
548e92ffd9bSMartin Matuska if (length >= (int)RUN_MASK) {
549e92ffd9bSMartin Matuska *token = (RUN_MASK << ML_BITS);
550e92ffd9bSMartin Matuska len = length - RUN_MASK;
551e92ffd9bSMartin Matuska for (; len > 254; len -= 255)
552e92ffd9bSMartin Matuska *op++ = 255;
553e92ffd9bSMartin Matuska *op++ = (BYTE)len;
554e92ffd9bSMartin Matuska } else
555e92ffd9bSMartin Matuska *token = (length << ML_BITS);
556e92ffd9bSMartin Matuska
557e92ffd9bSMartin Matuska /* Copy Literals */
558e92ffd9bSMartin Matuska LZ4_BLINDCOPY(anchor, op, length);
559e92ffd9bSMartin Matuska
560e92ffd9bSMartin Matuska _next_match:
561e92ffd9bSMartin Matuska /* Encode Offset */
562e92ffd9bSMartin Matuska LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
563e92ffd9bSMartin Matuska
564e92ffd9bSMartin Matuska /* Start Counting */
565e92ffd9bSMartin Matuska ip += MINMATCH;
566e92ffd9bSMartin Matuska ref += MINMATCH; /* MinMatch verified */
567e92ffd9bSMartin Matuska anchor = ip;
568e92ffd9bSMartin Matuska while (likely(ip < matchlimit - (STEPSIZE - 1))) {
569e92ffd9bSMartin Matuska UARCH diff = AARCH(ref) ^ AARCH(ip);
570e92ffd9bSMartin Matuska if (!diff) {
571e92ffd9bSMartin Matuska ip += STEPSIZE;
572e92ffd9bSMartin Matuska ref += STEPSIZE;
573e92ffd9bSMartin Matuska continue;
574e92ffd9bSMartin Matuska }
575e92ffd9bSMartin Matuska ip += LZ4_NbCommonBytes(diff);
576e92ffd9bSMartin Matuska goto _endCount;
577e92ffd9bSMartin Matuska }
578e92ffd9bSMartin Matuska #if LZ4_ARCH64
579e92ffd9bSMartin Matuska if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
580e92ffd9bSMartin Matuska ip += 4;
581e92ffd9bSMartin Matuska ref += 4;
582e92ffd9bSMartin Matuska }
583e92ffd9bSMartin Matuska #endif
584e92ffd9bSMartin Matuska if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
585e92ffd9bSMartin Matuska ip += 2;
586e92ffd9bSMartin Matuska ref += 2;
587e92ffd9bSMartin Matuska }
588e92ffd9bSMartin Matuska if ((ip < matchlimit) && (*ref == *ip))
589e92ffd9bSMartin Matuska ip++;
590e92ffd9bSMartin Matuska _endCount:
591e92ffd9bSMartin Matuska
592e92ffd9bSMartin Matuska /* Encode MatchLength */
593e92ffd9bSMartin Matuska len = (ip - anchor);
594e92ffd9bSMartin Matuska /* Check output limit */
595e92ffd9bSMartin Matuska if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
596e92ffd9bSMartin Matuska return (0);
597e92ffd9bSMartin Matuska if (len >= (int)ML_MASK) {
598e92ffd9bSMartin Matuska *token += ML_MASK;
599e92ffd9bSMartin Matuska len -= ML_MASK;
600e92ffd9bSMartin Matuska for (; len > 509; len -= 510) {
601e92ffd9bSMartin Matuska *op++ = 255;
602e92ffd9bSMartin Matuska *op++ = 255;
603e92ffd9bSMartin Matuska }
604e92ffd9bSMartin Matuska if (len > 254) {
605e92ffd9bSMartin Matuska len -= 255;
606e92ffd9bSMartin Matuska *op++ = 255;
607e92ffd9bSMartin Matuska }
608e92ffd9bSMartin Matuska *op++ = (BYTE)len;
609e92ffd9bSMartin Matuska } else
610e92ffd9bSMartin Matuska *token += len;
611e92ffd9bSMartin Matuska
612e92ffd9bSMartin Matuska /* Test end of chunk */
613e92ffd9bSMartin Matuska if (ip > mflimit) {
614e92ffd9bSMartin Matuska anchor = ip;
615e92ffd9bSMartin Matuska break;
616e92ffd9bSMartin Matuska }
617e92ffd9bSMartin Matuska /* Fill table */
618e92ffd9bSMartin Matuska HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
619e92ffd9bSMartin Matuska
620e92ffd9bSMartin Matuska /* Test next position */
621e92ffd9bSMartin Matuska ref = base + HashTable[LZ4_HASH_VALUE(ip)];
622e92ffd9bSMartin Matuska HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
623e92ffd9bSMartin Matuska if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
624e92ffd9bSMartin Matuska token = op++;
625e92ffd9bSMartin Matuska *token = 0;
626e92ffd9bSMartin Matuska goto _next_match;
627e92ffd9bSMartin Matuska }
628e92ffd9bSMartin Matuska /* Prepare next loop */
629e92ffd9bSMartin Matuska anchor = ip++;
630e92ffd9bSMartin Matuska forwardH = LZ4_HASH_VALUE(ip);
631e92ffd9bSMartin Matuska }
632e92ffd9bSMartin Matuska
633e92ffd9bSMartin Matuska _last_literals:
634e92ffd9bSMartin Matuska /* Encode Last Literals */
635e92ffd9bSMartin Matuska {
636e92ffd9bSMartin Matuska int lastRun = iend - anchor;
637e92ffd9bSMartin Matuska if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
638e92ffd9bSMartin Matuska oend)
639e92ffd9bSMartin Matuska return (0);
640e92ffd9bSMartin Matuska if (lastRun >= (int)RUN_MASK) {
641e92ffd9bSMartin Matuska *op++ = (RUN_MASK << ML_BITS);
642e92ffd9bSMartin Matuska lastRun -= RUN_MASK;
643e92ffd9bSMartin Matuska for (; lastRun > 254; lastRun -= 255) {
644e92ffd9bSMartin Matuska *op++ = 255;
645e92ffd9bSMartin Matuska }
646e92ffd9bSMartin Matuska *op++ = (BYTE)lastRun;
647e92ffd9bSMartin Matuska } else
648e92ffd9bSMartin Matuska *op++ = (lastRun << ML_BITS);
649e92ffd9bSMartin Matuska (void) memcpy(op, anchor, iend - anchor);
650e92ffd9bSMartin Matuska op += iend - anchor;
651e92ffd9bSMartin Matuska }
652e92ffd9bSMartin Matuska
653e92ffd9bSMartin Matuska /* End */
654e92ffd9bSMartin Matuska return (int)(((char *)op) - dest);
655e92ffd9bSMartin Matuska }
656e92ffd9bSMartin Matuska
657e92ffd9bSMartin Matuska
658e92ffd9bSMartin Matuska
659e92ffd9bSMartin Matuska /* Note : this function is valid only if isize < LZ4_64KLIMIT */
660e92ffd9bSMartin Matuska #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
661e92ffd9bSMartin Matuska #define HASHLOG64K (HASH_LOG + 1)
662e92ffd9bSMartin Matuska #define HASH64KTABLESIZE (1U << HASHLOG64K)
663e92ffd9bSMartin Matuska #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
664e92ffd9bSMartin Matuska HASHLOG64K))
665e92ffd9bSMartin Matuska #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
666e92ffd9bSMartin Matuska
667e92ffd9bSMartin Matuska static int
LZ4_compress64kCtx(void * ctx,const char * source,char * dest,int isize,int osize)668e92ffd9bSMartin Matuska LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
669e92ffd9bSMartin Matuska int osize)
670e92ffd9bSMartin Matuska {
671e92ffd9bSMartin Matuska struct refTables *srt = (struct refTables *)ctx;
672e92ffd9bSMartin Matuska U16 *HashTable = (U16 *) (srt->hashTable);
673e92ffd9bSMartin Matuska
674e92ffd9bSMartin Matuska const BYTE *ip = (BYTE *) source;
675e92ffd9bSMartin Matuska const BYTE *anchor = ip;
676e92ffd9bSMartin Matuska const BYTE *const base = ip;
677e92ffd9bSMartin Matuska const BYTE *const iend = ip + isize;
678e92ffd9bSMartin Matuska const BYTE *const oend = (BYTE *) dest + osize;
679e92ffd9bSMartin Matuska const BYTE *const mflimit = iend - MFLIMIT;
680e92ffd9bSMartin Matuska #define matchlimit (iend - LASTLITERALS)
681e92ffd9bSMartin Matuska
682e92ffd9bSMartin Matuska BYTE *op = (BYTE *) dest;
683e92ffd9bSMartin Matuska
684e92ffd9bSMartin Matuska int len, length;
685e92ffd9bSMartin Matuska const int skipStrength = SKIPSTRENGTH;
686e92ffd9bSMartin Matuska U32 forwardH;
687e92ffd9bSMartin Matuska
688e92ffd9bSMartin Matuska /* Init */
689e92ffd9bSMartin Matuska if (isize < MINLENGTH)
690e92ffd9bSMartin Matuska goto _last_literals;
691e92ffd9bSMartin Matuska
692e92ffd9bSMartin Matuska /* First Byte */
693e92ffd9bSMartin Matuska ip++;
694e92ffd9bSMartin Matuska forwardH = LZ4_HASH64K_VALUE(ip);
695e92ffd9bSMartin Matuska
696e92ffd9bSMartin Matuska /* Main Loop */
697e92ffd9bSMartin Matuska for (;;) {
698e92ffd9bSMartin Matuska int findMatchAttempts = (1U << skipStrength) + 3;
699e92ffd9bSMartin Matuska const BYTE *forwardIp = ip;
700e92ffd9bSMartin Matuska const BYTE *ref;
701e92ffd9bSMartin Matuska BYTE *token;
702e92ffd9bSMartin Matuska
703e92ffd9bSMartin Matuska /* Find a match */
704e92ffd9bSMartin Matuska do {
705e92ffd9bSMartin Matuska U32 h = forwardH;
706e92ffd9bSMartin Matuska int step = findMatchAttempts++ >> skipStrength;
707e92ffd9bSMartin Matuska ip = forwardIp;
708e92ffd9bSMartin Matuska forwardIp = ip + step;
709e92ffd9bSMartin Matuska
710e92ffd9bSMartin Matuska if (forwardIp > mflimit) {
711e92ffd9bSMartin Matuska goto _last_literals;
712e92ffd9bSMartin Matuska }
713e92ffd9bSMartin Matuska
714e92ffd9bSMartin Matuska forwardH = LZ4_HASH64K_VALUE(forwardIp);
715e92ffd9bSMartin Matuska ref = base + HashTable[h];
716e92ffd9bSMartin Matuska HashTable[h] = ip - base;
717e92ffd9bSMartin Matuska
718e92ffd9bSMartin Matuska } while (A32(ref) != A32(ip));
719e92ffd9bSMartin Matuska
720e92ffd9bSMartin Matuska /* Catch up */
721e92ffd9bSMartin Matuska while ((ip > anchor) && (ref > (BYTE *) source) &&
722e92ffd9bSMartin Matuska (ip[-1] == ref[-1])) {
723e92ffd9bSMartin Matuska ip--;
724e92ffd9bSMartin Matuska ref--;
725e92ffd9bSMartin Matuska }
726e92ffd9bSMartin Matuska
727e92ffd9bSMartin Matuska /* Encode Literal length */
728e92ffd9bSMartin Matuska length = ip - anchor;
729e92ffd9bSMartin Matuska token = op++;
730e92ffd9bSMartin Matuska
731e92ffd9bSMartin Matuska /* Check output limit */
732e92ffd9bSMartin Matuska if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
733e92ffd9bSMartin Matuska (length >> 8) > oend))
734e92ffd9bSMartin Matuska return (0);
735e92ffd9bSMartin Matuska
736e92ffd9bSMartin Matuska if (length >= (int)RUN_MASK) {
737e92ffd9bSMartin Matuska *token = (RUN_MASK << ML_BITS);
738e92ffd9bSMartin Matuska len = length - RUN_MASK;
739e92ffd9bSMartin Matuska for (; len > 254; len -= 255)
740e92ffd9bSMartin Matuska *op++ = 255;
741e92ffd9bSMartin Matuska *op++ = (BYTE)len;
742e92ffd9bSMartin Matuska } else
743e92ffd9bSMartin Matuska *token = (length << ML_BITS);
744e92ffd9bSMartin Matuska
745e92ffd9bSMartin Matuska /* Copy Literals */
746e92ffd9bSMartin Matuska LZ4_BLINDCOPY(anchor, op, length);
747e92ffd9bSMartin Matuska
748e92ffd9bSMartin Matuska _next_match:
749e92ffd9bSMartin Matuska /* Encode Offset */
750e92ffd9bSMartin Matuska LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
751e92ffd9bSMartin Matuska
752e92ffd9bSMartin Matuska /* Start Counting */
753e92ffd9bSMartin Matuska ip += MINMATCH;
754e92ffd9bSMartin Matuska ref += MINMATCH; /* MinMatch verified */
755e92ffd9bSMartin Matuska anchor = ip;
756e92ffd9bSMartin Matuska while (ip < matchlimit - (STEPSIZE - 1)) {
757e92ffd9bSMartin Matuska UARCH diff = AARCH(ref) ^ AARCH(ip);
758e92ffd9bSMartin Matuska if (!diff) {
759e92ffd9bSMartin Matuska ip += STEPSIZE;
760e92ffd9bSMartin Matuska ref += STEPSIZE;
761e92ffd9bSMartin Matuska continue;
762e92ffd9bSMartin Matuska }
763e92ffd9bSMartin Matuska ip += LZ4_NbCommonBytes(diff);
764e92ffd9bSMartin Matuska goto _endCount;
765e92ffd9bSMartin Matuska }
766e92ffd9bSMartin Matuska #if LZ4_ARCH64
767e92ffd9bSMartin Matuska if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
768e92ffd9bSMartin Matuska ip += 4;
769e92ffd9bSMartin Matuska ref += 4;
770e92ffd9bSMartin Matuska }
771e92ffd9bSMartin Matuska #endif
772e92ffd9bSMartin Matuska if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
773e92ffd9bSMartin Matuska ip += 2;
774e92ffd9bSMartin Matuska ref += 2;
775e92ffd9bSMartin Matuska }
776e92ffd9bSMartin Matuska if ((ip < matchlimit) && (*ref == *ip))
777e92ffd9bSMartin Matuska ip++;
778e92ffd9bSMartin Matuska _endCount:
779e92ffd9bSMartin Matuska
780e92ffd9bSMartin Matuska /* Encode MatchLength */
781e92ffd9bSMartin Matuska len = (ip - anchor);
782e92ffd9bSMartin Matuska /* Check output limit */
783e92ffd9bSMartin Matuska if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
784e92ffd9bSMartin Matuska return (0);
785e92ffd9bSMartin Matuska if (len >= (int)ML_MASK) {
786e92ffd9bSMartin Matuska *token += ML_MASK;
787e92ffd9bSMartin Matuska len -= ML_MASK;
788e92ffd9bSMartin Matuska for (; len > 509; len -= 510) {
789e92ffd9bSMartin Matuska *op++ = 255;
790e92ffd9bSMartin Matuska *op++ = 255;
791e92ffd9bSMartin Matuska }
792e92ffd9bSMartin Matuska if (len > 254) {
793e92ffd9bSMartin Matuska len -= 255;
794e92ffd9bSMartin Matuska *op++ = 255;
795e92ffd9bSMartin Matuska }
796e92ffd9bSMartin Matuska *op++ = (BYTE)len;
797e92ffd9bSMartin Matuska } else
798e92ffd9bSMartin Matuska *token += len;
799e92ffd9bSMartin Matuska
800e92ffd9bSMartin Matuska /* Test end of chunk */
801e92ffd9bSMartin Matuska if (ip > mflimit) {
802e92ffd9bSMartin Matuska anchor = ip;
803e92ffd9bSMartin Matuska break;
804e92ffd9bSMartin Matuska }
805e92ffd9bSMartin Matuska /* Fill table */
806e92ffd9bSMartin Matuska HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
807e92ffd9bSMartin Matuska
808e92ffd9bSMartin Matuska /* Test next position */
809e92ffd9bSMartin Matuska ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
810e92ffd9bSMartin Matuska HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
811e92ffd9bSMartin Matuska if (A32(ref) == A32(ip)) {
812e92ffd9bSMartin Matuska token = op++;
813e92ffd9bSMartin Matuska *token = 0;
814e92ffd9bSMartin Matuska goto _next_match;
815e92ffd9bSMartin Matuska }
816e92ffd9bSMartin Matuska /* Prepare next loop */
817e92ffd9bSMartin Matuska anchor = ip++;
818e92ffd9bSMartin Matuska forwardH = LZ4_HASH64K_VALUE(ip);
819e92ffd9bSMartin Matuska }
820e92ffd9bSMartin Matuska
821e92ffd9bSMartin Matuska _last_literals:
822e92ffd9bSMartin Matuska /* Encode Last Literals */
823e92ffd9bSMartin Matuska {
824e92ffd9bSMartin Matuska int lastRun = iend - anchor;
825e92ffd9bSMartin Matuska if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
826e92ffd9bSMartin Matuska oend)
827e92ffd9bSMartin Matuska return (0);
828e92ffd9bSMartin Matuska if (lastRun >= (int)RUN_MASK) {
829e92ffd9bSMartin Matuska *op++ = (RUN_MASK << ML_BITS);
830e92ffd9bSMartin Matuska lastRun -= RUN_MASK;
831e92ffd9bSMartin Matuska for (; lastRun > 254; lastRun -= 255)
832e92ffd9bSMartin Matuska *op++ = 255;
833e92ffd9bSMartin Matuska *op++ = (BYTE)lastRun;
834e92ffd9bSMartin Matuska } else
835e92ffd9bSMartin Matuska *op++ = (lastRun << ML_BITS);
836e92ffd9bSMartin Matuska (void) memcpy(op, anchor, iend - anchor);
837e92ffd9bSMartin Matuska op += iend - anchor;
838e92ffd9bSMartin Matuska }
839e92ffd9bSMartin Matuska
840e92ffd9bSMartin Matuska /* End */
841e92ffd9bSMartin Matuska return (int)(((char *)op) - dest);
842e92ffd9bSMartin Matuska }
843e92ffd9bSMartin Matuska
844e92ffd9bSMartin Matuska static int
real_LZ4_compress(const char * source,char * dest,int isize,int osize)845e92ffd9bSMartin Matuska real_LZ4_compress(const char *source, char *dest, int isize, int osize)
846e92ffd9bSMartin Matuska {
847e92ffd9bSMartin Matuska void *ctx;
848e92ffd9bSMartin Matuska int result;
849e92ffd9bSMartin Matuska
850e92ffd9bSMartin Matuska ctx = lz4_alloc(KM_SLEEP);
851e92ffd9bSMartin Matuska
852e92ffd9bSMartin Matuska /*
853e92ffd9bSMartin Matuska * out of kernel memory, gently fall through - this will disable
854e92ffd9bSMartin Matuska * compression in zio_compress_data
855e92ffd9bSMartin Matuska */
856e92ffd9bSMartin Matuska if (ctx == NULL)
857e92ffd9bSMartin Matuska return (0);
858e92ffd9bSMartin Matuska
859e92ffd9bSMartin Matuska memset(ctx, 0, sizeof (struct refTables));
860e92ffd9bSMartin Matuska
861e92ffd9bSMartin Matuska if (isize < LZ4_64KLIMIT)
862e92ffd9bSMartin Matuska result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
863e92ffd9bSMartin Matuska else
864e92ffd9bSMartin Matuska result = LZ4_compressCtx(ctx, source, dest, isize, osize);
865e92ffd9bSMartin Matuska
866e92ffd9bSMartin Matuska lz4_free(ctx);
867e92ffd9bSMartin Matuska return (result);
868e92ffd9bSMartin Matuska }
869e92ffd9bSMartin Matuska
870e92ffd9bSMartin Matuska #ifdef __FreeBSD__
871e92ffd9bSMartin Matuska /*
872e92ffd9bSMartin Matuska * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
873e92ffd9bSMartin Matuska * Should struct refTables get resized this may need to be revisited, hence
874e92ffd9bSMartin Matuska * compiler-time asserts.
875e92ffd9bSMartin Matuska */
876e92ffd9bSMartin Matuska _Static_assert(sizeof(struct refTables) <= 16384,
877e92ffd9bSMartin Matuska "refTables too big for malloc");
878e92ffd9bSMartin Matuska _Static_assert((sizeof(struct refTables) % 4096) == 0,
879e92ffd9bSMartin Matuska "refTables not a multiple of page size");
880e92ffd9bSMartin Matuska #else
881e92ffd9bSMartin Matuska #define ZFS_LZ4_USE_CACHE
882e92ffd9bSMartin Matuska #endif
883e92ffd9bSMartin Matuska
884e92ffd9bSMartin Matuska #ifdef ZFS_LZ4_USE_CACHE
885e92ffd9bSMartin Matuska static kmem_cache_t *lz4_cache;
886e92ffd9bSMartin Matuska #endif
887e92ffd9bSMartin Matuska
888e92ffd9bSMartin Matuska #ifdef ZFS_LZ4_USE_CACHE
889e92ffd9bSMartin Matuska void
lz4_init(void)890e92ffd9bSMartin Matuska lz4_init(void)
891e92ffd9bSMartin Matuska {
892e92ffd9bSMartin Matuska lz4_cache = kmem_cache_create("lz4_cache",
893ce4dcb97SMartin Matuska sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL,
894ce4dcb97SMartin Matuska KMC_RECLAIMABLE);
895e92ffd9bSMartin Matuska }
896e92ffd9bSMartin Matuska
897e92ffd9bSMartin Matuska void
lz4_fini(void)898e92ffd9bSMartin Matuska lz4_fini(void)
899e92ffd9bSMartin Matuska {
900e92ffd9bSMartin Matuska if (lz4_cache) {
901e92ffd9bSMartin Matuska kmem_cache_destroy(lz4_cache);
902e92ffd9bSMartin Matuska lz4_cache = NULL;
903e92ffd9bSMartin Matuska }
904e92ffd9bSMartin Matuska }
905e92ffd9bSMartin Matuska
906e92ffd9bSMartin Matuska static void *
lz4_alloc(int flags)907e92ffd9bSMartin Matuska lz4_alloc(int flags)
908e92ffd9bSMartin Matuska {
909e92ffd9bSMartin Matuska ASSERT(lz4_cache != NULL);
910e92ffd9bSMartin Matuska return (kmem_cache_alloc(lz4_cache, flags));
911e92ffd9bSMartin Matuska }
912e92ffd9bSMartin Matuska
913e92ffd9bSMartin Matuska static void
lz4_free(void * ctx)914e92ffd9bSMartin Matuska lz4_free(void *ctx)
915e92ffd9bSMartin Matuska {
916e92ffd9bSMartin Matuska kmem_cache_free(lz4_cache, ctx);
917e92ffd9bSMartin Matuska }
918e92ffd9bSMartin Matuska #else
919e92ffd9bSMartin Matuska void
lz4_init(void)920e92ffd9bSMartin Matuska lz4_init(void)
921e92ffd9bSMartin Matuska {
922e92ffd9bSMartin Matuska }
923e92ffd9bSMartin Matuska
924e92ffd9bSMartin Matuska void
lz4_fini(void)925e92ffd9bSMartin Matuska lz4_fini(void)
926e92ffd9bSMartin Matuska {
927e92ffd9bSMartin Matuska }
928e92ffd9bSMartin Matuska
929e92ffd9bSMartin Matuska static void *
lz4_alloc(int flags)930e92ffd9bSMartin Matuska lz4_alloc(int flags)
931e92ffd9bSMartin Matuska {
932e92ffd9bSMartin Matuska return (kmem_alloc(sizeof (struct refTables), flags));
933e92ffd9bSMartin Matuska }
934e92ffd9bSMartin Matuska
935e92ffd9bSMartin Matuska static void
lz4_free(void * ctx)936e92ffd9bSMartin Matuska lz4_free(void *ctx)
937e92ffd9bSMartin Matuska {
938e92ffd9bSMartin Matuska kmem_free(ctx, sizeof (struct refTables));
939e92ffd9bSMartin Matuska }
940e92ffd9bSMartin Matuska #endif
941