1*e92ffd9bSMartin Matuska /* 2*e92ffd9bSMartin Matuska * LZ4 - Fast LZ compression algorithm 3*e92ffd9bSMartin Matuska * Header File 4*e92ffd9bSMartin Matuska * Copyright (C) 2011-2013, Yann Collet. 5*e92ffd9bSMartin Matuska * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6*e92ffd9bSMartin Matuska * 7*e92ffd9bSMartin Matuska * Redistribution and use in source and binary forms, with or without 8*e92ffd9bSMartin Matuska * modification, are permitted provided that the following conditions are 9*e92ffd9bSMartin Matuska * met: 10*e92ffd9bSMartin Matuska * 11*e92ffd9bSMartin Matuska * * Redistributions of source code must retain the above copyright 12*e92ffd9bSMartin Matuska * notice, this list of conditions and the following disclaimer. 13*e92ffd9bSMartin Matuska * * Redistributions in binary form must reproduce the above 14*e92ffd9bSMartin Matuska * copyright notice, this list of conditions and the following disclaimer 15*e92ffd9bSMartin Matuska * in the documentation and/or other materials provided with the 16*e92ffd9bSMartin Matuska * distribution. 17*e92ffd9bSMartin Matuska * 18*e92ffd9bSMartin Matuska * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19*e92ffd9bSMartin Matuska * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20*e92ffd9bSMartin Matuska * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21*e92ffd9bSMartin Matuska * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22*e92ffd9bSMartin Matuska * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23*e92ffd9bSMartin Matuska * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24*e92ffd9bSMartin Matuska * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25*e92ffd9bSMartin Matuska * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26*e92ffd9bSMartin Matuska * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27*e92ffd9bSMartin Matuska * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28*e92ffd9bSMartin Matuska * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*e92ffd9bSMartin Matuska * 30*e92ffd9bSMartin Matuska * You can contact the author at : 31*e92ffd9bSMartin Matuska * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32*e92ffd9bSMartin Matuska * - LZ4 source repository : http://code.google.com/p/lz4/ 33*e92ffd9bSMartin Matuska */ 34*e92ffd9bSMartin Matuska 35*e92ffd9bSMartin Matuska /* 36*e92ffd9bSMartin Matuska * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012 37*e92ffd9bSMartin Matuska */ 38*e92ffd9bSMartin Matuska 39*e92ffd9bSMartin Matuska #include <sys/zfs_context.h> 40*e92ffd9bSMartin Matuska #include <sys/zio_compress.h> 41*e92ffd9bSMartin Matuska 42*e92ffd9bSMartin Matuska static int real_LZ4_compress(const char *source, char *dest, int isize, 43*e92ffd9bSMartin Matuska int osize); 44*e92ffd9bSMartin Matuska static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 45*e92ffd9bSMartin Matuska int isize, int osize); 46*e92ffd9bSMartin Matuska static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 47*e92ffd9bSMartin Matuska int isize, int osize); 48*e92ffd9bSMartin Matuska 49*e92ffd9bSMartin Matuska /* See lz4.c */ 50*e92ffd9bSMartin Matuska int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 51*e92ffd9bSMartin Matuska int isize, int maxOutputSize); 52*e92ffd9bSMartin Matuska 53*e92ffd9bSMartin Matuska static void *lz4_alloc(int flags); 54*e92ffd9bSMartin Matuska static void lz4_free(void *ctx); 55*e92ffd9bSMartin Matuska 56*e92ffd9bSMartin Matuska size_t 57*e92ffd9bSMartin Matuska lz4_compress_zfs(void *s_start, void *d_start, size_t s_len, 58*e92ffd9bSMartin Matuska size_t d_len, int n) 59*e92ffd9bSMartin Matuska { 60*e92ffd9bSMartin Matuska (void) n; 61*e92ffd9bSMartin Matuska uint32_t bufsiz; 62*e92ffd9bSMartin Matuska char *dest = d_start; 63*e92ffd9bSMartin Matuska 64*e92ffd9bSMartin Matuska ASSERT(d_len >= sizeof (bufsiz)); 65*e92ffd9bSMartin Matuska 66*e92ffd9bSMartin Matuska bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 67*e92ffd9bSMartin Matuska d_len - sizeof (bufsiz)); 68*e92ffd9bSMartin Matuska 69*e92ffd9bSMartin Matuska /* Signal an error if the compression routine returned zero. */ 70*e92ffd9bSMartin Matuska if (bufsiz == 0) 71*e92ffd9bSMartin Matuska return (s_len); 72*e92ffd9bSMartin Matuska 73*e92ffd9bSMartin Matuska /* 74*e92ffd9bSMartin Matuska * The exact compressed size is needed by the decompression routine, 75*e92ffd9bSMartin Matuska * so it is stored at the start of the buffer. Note that this may be 76*e92ffd9bSMartin Matuska * less than the compressed block size, which is rounded up to a 77*e92ffd9bSMartin Matuska * multiple of 1<<ashift. 78*e92ffd9bSMartin Matuska */ 79*e92ffd9bSMartin Matuska *(uint32_t *)dest = BE_32(bufsiz); 80*e92ffd9bSMartin Matuska 81*e92ffd9bSMartin Matuska return (bufsiz + sizeof (bufsiz)); 82*e92ffd9bSMartin Matuska } 83*e92ffd9bSMartin Matuska 84*e92ffd9bSMartin Matuska int 85*e92ffd9bSMartin Matuska lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len, 86*e92ffd9bSMartin Matuska size_t d_len, int n) 87*e92ffd9bSMartin Matuska { 88*e92ffd9bSMartin Matuska (void) n; 89*e92ffd9bSMartin Matuska const char *src = s_start; 90*e92ffd9bSMartin Matuska uint32_t bufsiz = BE_IN32(src); 91*e92ffd9bSMartin Matuska 92*e92ffd9bSMartin Matuska /* invalid compressed buffer size encoded at start */ 93*e92ffd9bSMartin Matuska if (bufsiz + sizeof (bufsiz) > s_len) 94*e92ffd9bSMartin Matuska return (1); 95*e92ffd9bSMartin Matuska 96*e92ffd9bSMartin Matuska /* 97*e92ffd9bSMartin Matuska * Returns 0 on success (decompression function returned non-negative) 98*e92ffd9bSMartin Matuska * and non-zero on failure (decompression function returned negative). 99*e92ffd9bSMartin Matuska */ 100*e92ffd9bSMartin Matuska return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 101*e92ffd9bSMartin Matuska d_start, bufsiz, d_len) < 0); 102*e92ffd9bSMartin Matuska } 103*e92ffd9bSMartin Matuska 104*e92ffd9bSMartin Matuska /* 105*e92ffd9bSMartin Matuska * LZ4 API Description: 106*e92ffd9bSMartin Matuska * 107*e92ffd9bSMartin Matuska * Simple Functions: 108*e92ffd9bSMartin Matuska * real_LZ4_compress() : 109*e92ffd9bSMartin Matuska * isize : is the input size. Max supported value is ~1.9GB 110*e92ffd9bSMartin Matuska * return : the number of bytes written in buffer dest 111*e92ffd9bSMartin Matuska * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 112*e92ffd9bSMartin Matuska * note : destination buffer must be already allocated. 113*e92ffd9bSMartin Matuska * destination buffer must be sized to handle worst cases 114*e92ffd9bSMartin Matuska * situations (input data not compressible) worst case size 115*e92ffd9bSMartin Matuska * evaluation is provided by function LZ4_compressBound(). 116*e92ffd9bSMartin Matuska * 117*e92ffd9bSMartin Matuska * real_LZ4_uncompress() : 118*e92ffd9bSMartin Matuska * osize : is the output size, therefore the original size 119*e92ffd9bSMartin Matuska * return : the number of bytes read in the source buffer. 120*e92ffd9bSMartin Matuska * If the source stream is malformed, the function will stop 121*e92ffd9bSMartin Matuska * decoding and return a negative result, indicating the byte 122*e92ffd9bSMartin Matuska * position of the faulty instruction. This function never 123*e92ffd9bSMartin Matuska * writes beyond dest + osize, and is therefore protected 124*e92ffd9bSMartin Matuska * against malicious data packets. 125*e92ffd9bSMartin Matuska * note : destination buffer must be already allocated 126*e92ffd9bSMartin Matuska * note : real_LZ4_uncompress() is not used in ZFS so its code 127*e92ffd9bSMartin Matuska * is not present here. 128*e92ffd9bSMartin Matuska * 129*e92ffd9bSMartin Matuska * Advanced Functions 130*e92ffd9bSMartin Matuska * 131*e92ffd9bSMartin Matuska * LZ4_compressBound() : 132*e92ffd9bSMartin Matuska * Provides the maximum size that LZ4 may output in a "worst case" 133*e92ffd9bSMartin Matuska * scenario (input data not compressible) primarily useful for memory 134*e92ffd9bSMartin Matuska * allocation of output buffer. 135*e92ffd9bSMartin Matuska * 136*e92ffd9bSMartin Matuska * isize : is the input size. Max supported value is ~1.9GB 137*e92ffd9bSMartin Matuska * return : maximum output size in a "worst case" scenario 138*e92ffd9bSMartin Matuska * note : this function is limited by "int" range (2^31-1) 139*e92ffd9bSMartin Matuska * 140*e92ffd9bSMartin Matuska * LZ4_uncompress_unknownOutputSize() : 141*e92ffd9bSMartin Matuska * isize : is the input size, therefore the compressed size 142*e92ffd9bSMartin Matuska * maxOutputSize : is the size of the destination buffer (which must be 143*e92ffd9bSMartin Matuska * already allocated) 144*e92ffd9bSMartin Matuska * return : the number of bytes decoded in the destination buffer 145*e92ffd9bSMartin Matuska * (necessarily <= maxOutputSize). If the source stream is 146*e92ffd9bSMartin Matuska * malformed, the function will stop decoding and return a 147*e92ffd9bSMartin Matuska * negative result, indicating the byte position of the faulty 148*e92ffd9bSMartin Matuska * instruction. This function never writes beyond dest + 149*e92ffd9bSMartin Matuska * maxOutputSize, and is therefore protected against malicious 150*e92ffd9bSMartin Matuska * data packets. 151*e92ffd9bSMartin Matuska * note : Destination buffer must be already allocated. 152*e92ffd9bSMartin Matuska * This version is slightly slower than real_LZ4_uncompress() 153*e92ffd9bSMartin Matuska * 154*e92ffd9bSMartin Matuska * LZ4_compressCtx() : 155*e92ffd9bSMartin Matuska * This function explicitly handles the CTX memory structure. 156*e92ffd9bSMartin Matuska * 157*e92ffd9bSMartin Matuska * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 158*e92ffd9bSMartin Matuska * by the caller (either on the stack or using kmem_cache_alloc). Passing 159*e92ffd9bSMartin Matuska * NULL isn't valid. 160*e92ffd9bSMartin Matuska * 161*e92ffd9bSMartin Matuska * LZ4_compress64kCtx() : 162*e92ffd9bSMartin Matuska * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 163*e92ffd9bSMartin Matuska * isize *Must* be <64KB, otherwise the output will be corrupted. 164*e92ffd9bSMartin Matuska * 165*e92ffd9bSMartin Matuska * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 166*e92ffd9bSMartin Matuska * by the caller (either on the stack or using kmem_cache_alloc). Passing 167*e92ffd9bSMartin Matuska * NULL isn't valid. 168*e92ffd9bSMartin Matuska */ 169*e92ffd9bSMartin Matuska 170*e92ffd9bSMartin Matuska /* 171*e92ffd9bSMartin Matuska * Tuning parameters 172*e92ffd9bSMartin Matuska */ 173*e92ffd9bSMartin Matuska 174*e92ffd9bSMartin Matuska /* 175*e92ffd9bSMartin Matuska * COMPRESSIONLEVEL: Increasing this value improves compression ratio 176*e92ffd9bSMartin Matuska * Lowering this value reduces memory usage. Reduced memory usage 177*e92ffd9bSMartin Matuska * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 178*e92ffd9bSMartin Matuska * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 179*e92ffd9bSMartin Matuska * (examples : 12 -> 16KB ; 17 -> 512KB) 180*e92ffd9bSMartin Matuska */ 181*e92ffd9bSMartin Matuska #define COMPRESSIONLEVEL 12 182*e92ffd9bSMartin Matuska 183*e92ffd9bSMartin Matuska /* 184*e92ffd9bSMartin Matuska * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 185*e92ffd9bSMartin Matuska * algorithm skip faster data segments considered "incompressible". 186*e92ffd9bSMartin Matuska * This may decrease compression ratio dramatically, but will be 187*e92ffd9bSMartin Matuska * faster on incompressible data. Increasing this value will make 188*e92ffd9bSMartin Matuska * the algorithm search more before declaring a segment "incompressible". 189*e92ffd9bSMartin Matuska * This could improve compression a bit, but will be slower on 190*e92ffd9bSMartin Matuska * incompressible data. The default value (6) is recommended. 191*e92ffd9bSMartin Matuska */ 192*e92ffd9bSMartin Matuska #define NOTCOMPRESSIBLE_CONFIRMATION 6 193*e92ffd9bSMartin Matuska 194*e92ffd9bSMartin Matuska /* 195*e92ffd9bSMartin Matuska * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 196*e92ffd9bSMartin Matuska * performance for big endian cpu, but the resulting compressed stream 197*e92ffd9bSMartin Matuska * will be incompatible with little-endian CPU. You can set this option 198*e92ffd9bSMartin Matuska * to 1 in situations where data will stay within closed environment. 199*e92ffd9bSMartin Matuska * This option is useless on Little_Endian CPU (such as x86). 200*e92ffd9bSMartin Matuska */ 201*e92ffd9bSMartin Matuska /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 202*e92ffd9bSMartin Matuska 203*e92ffd9bSMartin Matuska /* 204*e92ffd9bSMartin Matuska * CPU Feature Detection 205*e92ffd9bSMartin Matuska */ 206*e92ffd9bSMartin Matuska 207*e92ffd9bSMartin Matuska /* 32 or 64 bits ? */ 208*e92ffd9bSMartin Matuska #if defined(_LP64) 209*e92ffd9bSMartin Matuska #define LZ4_ARCH64 1 210*e92ffd9bSMartin Matuska #else 211*e92ffd9bSMartin Matuska #define LZ4_ARCH64 0 212*e92ffd9bSMartin Matuska #endif 213*e92ffd9bSMartin Matuska 214*e92ffd9bSMartin Matuska /* 215*e92ffd9bSMartin Matuska * Little Endian or Big Endian? 216*e92ffd9bSMartin Matuska * Note: overwrite the below #define if you know your architecture endianness. 217*e92ffd9bSMartin Matuska */ 218*e92ffd9bSMartin Matuska #if defined(_ZFS_BIG_ENDIAN) 219*e92ffd9bSMartin Matuska #define LZ4_BIG_ENDIAN 1 220*e92ffd9bSMartin Matuska #else 221*e92ffd9bSMartin Matuska /* 222*e92ffd9bSMartin Matuska * Little Endian assumed. PDP Endian and other very rare endian format 223*e92ffd9bSMartin Matuska * are unsupported. 224*e92ffd9bSMartin Matuska */ 225*e92ffd9bSMartin Matuska #undef LZ4_BIG_ENDIAN 226*e92ffd9bSMartin Matuska #endif 227*e92ffd9bSMartin Matuska 228*e92ffd9bSMartin Matuska /* 229*e92ffd9bSMartin Matuska * Unaligned memory access is automatically enabled for "common" CPU, 230*e92ffd9bSMartin Matuska * such as x86. For others CPU, the compiler will be more cautious, and 231*e92ffd9bSMartin Matuska * insert extra code to ensure aligned access is respected. If you know 232*e92ffd9bSMartin Matuska * your target CPU supports unaligned memory access, you may want to 233*e92ffd9bSMartin Matuska * force this option manually to improve performance 234*e92ffd9bSMartin Matuska */ 235*e92ffd9bSMartin Matuska #if defined(__ARM_FEATURE_UNALIGNED) 236*e92ffd9bSMartin Matuska #define LZ4_FORCE_UNALIGNED_ACCESS 1 237*e92ffd9bSMartin Matuska #endif 238*e92ffd9bSMartin Matuska 239*e92ffd9bSMartin Matuska /* 240*e92ffd9bSMartin Matuska * Illumos : we can't use GCC's __builtin_ctz family of builtins in the 241*e92ffd9bSMartin Matuska * kernel 242*e92ffd9bSMartin Matuska * Linux : we can use GCC's __builtin_ctz family of builtins in the 243*e92ffd9bSMartin Matuska * kernel 244*e92ffd9bSMartin Matuska */ 245*e92ffd9bSMartin Matuska #undef LZ4_FORCE_SW_BITCOUNT 246*e92ffd9bSMartin Matuska #if defined(__sparc) 247*e92ffd9bSMartin Matuska #define LZ4_FORCE_SW_BITCOUNT 248*e92ffd9bSMartin Matuska #endif 249*e92ffd9bSMartin Matuska 250*e92ffd9bSMartin Matuska /* 251*e92ffd9bSMartin Matuska * Compiler Options 252*e92ffd9bSMartin Matuska */ 253*e92ffd9bSMartin Matuska /* Disable restrict */ 254*e92ffd9bSMartin Matuska #define restrict 255*e92ffd9bSMartin Matuska 256*e92ffd9bSMartin Matuska /* 257*e92ffd9bSMartin Matuska * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it. 258*e92ffd9bSMartin Matuska * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 259*e92ffd9bSMartin Matuska */ 260*e92ffd9bSMartin Matuska #ifdef GCC_VERSION 261*e92ffd9bSMartin Matuska #undef GCC_VERSION 262*e92ffd9bSMartin Matuska #endif 263*e92ffd9bSMartin Matuska 264*e92ffd9bSMartin Matuska #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 265*e92ffd9bSMartin Matuska 266*e92ffd9bSMartin Matuska #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 267*e92ffd9bSMartin Matuska #define expect(expr, value) (__builtin_expect((expr), (value))) 268*e92ffd9bSMartin Matuska #else 269*e92ffd9bSMartin Matuska #define expect(expr, value) (expr) 270*e92ffd9bSMartin Matuska #endif 271*e92ffd9bSMartin Matuska 272*e92ffd9bSMartin Matuska #ifndef likely 273*e92ffd9bSMartin Matuska #define likely(expr) expect((expr) != 0, 1) 274*e92ffd9bSMartin Matuska #endif 275*e92ffd9bSMartin Matuska 276*e92ffd9bSMartin Matuska #ifndef unlikely 277*e92ffd9bSMartin Matuska #define unlikely(expr) expect((expr) != 0, 0) 278*e92ffd9bSMartin Matuska #endif 279*e92ffd9bSMartin Matuska 280*e92ffd9bSMartin Matuska #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 281*e92ffd9bSMartin Matuska (((x) & 0xffu) << 8))) 282*e92ffd9bSMartin Matuska 283*e92ffd9bSMartin Matuska /* Basic types */ 284*e92ffd9bSMartin Matuska #define BYTE uint8_t 285*e92ffd9bSMartin Matuska #define U16 uint16_t 286*e92ffd9bSMartin Matuska #define U32 uint32_t 287*e92ffd9bSMartin Matuska #define S32 int32_t 288*e92ffd9bSMartin Matuska #define U64 uint64_t 289*e92ffd9bSMartin Matuska 290*e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_UNALIGNED_ACCESS 291*e92ffd9bSMartin Matuska #pragma pack(1) 292*e92ffd9bSMartin Matuska #endif 293*e92ffd9bSMartin Matuska 294*e92ffd9bSMartin Matuska typedef struct _U16_S { 295*e92ffd9bSMartin Matuska U16 v; 296*e92ffd9bSMartin Matuska } U16_S; 297*e92ffd9bSMartin Matuska typedef struct _U32_S { 298*e92ffd9bSMartin Matuska U32 v; 299*e92ffd9bSMartin Matuska } U32_S; 300*e92ffd9bSMartin Matuska typedef struct _U64_S { 301*e92ffd9bSMartin Matuska U64 v; 302*e92ffd9bSMartin Matuska } U64_S; 303*e92ffd9bSMartin Matuska 304*e92ffd9bSMartin Matuska #ifndef LZ4_FORCE_UNALIGNED_ACCESS 305*e92ffd9bSMartin Matuska #pragma pack() 306*e92ffd9bSMartin Matuska #endif 307*e92ffd9bSMartin Matuska 308*e92ffd9bSMartin Matuska #define A64(x) (((U64_S *)(x))->v) 309*e92ffd9bSMartin Matuska #define A32(x) (((U32_S *)(x))->v) 310*e92ffd9bSMartin Matuska #define A16(x) (((U16_S *)(x))->v) 311*e92ffd9bSMartin Matuska 312*e92ffd9bSMartin Matuska /* 313*e92ffd9bSMartin Matuska * Constants 314*e92ffd9bSMartin Matuska */ 315*e92ffd9bSMartin Matuska #define MINMATCH 4 316*e92ffd9bSMartin Matuska 317*e92ffd9bSMartin Matuska #define HASH_LOG COMPRESSIONLEVEL 318*e92ffd9bSMartin Matuska #define HASHTABLESIZE (1 << HASH_LOG) 319*e92ffd9bSMartin Matuska #define HASH_MASK (HASHTABLESIZE - 1) 320*e92ffd9bSMartin Matuska 321*e92ffd9bSMartin Matuska #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 322*e92ffd9bSMartin Matuska NOTCOMPRESSIBLE_CONFIRMATION : 2) 323*e92ffd9bSMartin Matuska 324*e92ffd9bSMartin Matuska #define COPYLENGTH 8 325*e92ffd9bSMartin Matuska #define LASTLITERALS 5 326*e92ffd9bSMartin Matuska #define MFLIMIT (COPYLENGTH + MINMATCH) 327*e92ffd9bSMartin Matuska #define MINLENGTH (MFLIMIT + 1) 328*e92ffd9bSMartin Matuska 329*e92ffd9bSMartin Matuska #define MAXD_LOG 16 330*e92ffd9bSMartin Matuska #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 331*e92ffd9bSMartin Matuska 332*e92ffd9bSMartin Matuska #define ML_BITS 4 333*e92ffd9bSMartin Matuska #define ML_MASK ((1U<<ML_BITS)-1) 334*e92ffd9bSMartin Matuska #define RUN_BITS (8-ML_BITS) 335*e92ffd9bSMartin Matuska #define RUN_MASK ((1U<<RUN_BITS)-1) 336*e92ffd9bSMartin Matuska 337*e92ffd9bSMartin Matuska 338*e92ffd9bSMartin Matuska /* 339*e92ffd9bSMartin Matuska * Architecture-specific macros 340*e92ffd9bSMartin Matuska */ 341*e92ffd9bSMartin Matuska #if LZ4_ARCH64 342*e92ffd9bSMartin Matuska #define STEPSIZE 8 343*e92ffd9bSMartin Matuska #define UARCH U64 344*e92ffd9bSMartin Matuska #define AARCH A64 345*e92ffd9bSMartin Matuska #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 346*e92ffd9bSMartin Matuska #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 347*e92ffd9bSMartin Matuska #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 348*e92ffd9bSMartin Matuska #define HTYPE U32 349*e92ffd9bSMartin Matuska #define INITBASE(base) const BYTE* const base = ip 350*e92ffd9bSMartin Matuska #else /* !LZ4_ARCH64 */ 351*e92ffd9bSMartin Matuska #define STEPSIZE 4 352*e92ffd9bSMartin Matuska #define UARCH U32 353*e92ffd9bSMartin Matuska #define AARCH A32 354*e92ffd9bSMartin Matuska #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 355*e92ffd9bSMartin Matuska #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 356*e92ffd9bSMartin Matuska #define LZ4_SECURECOPY LZ4_WILDCOPY 357*e92ffd9bSMartin Matuska #define HTYPE const BYTE * 358*e92ffd9bSMartin Matuska #define INITBASE(base) const int base = 0 359*e92ffd9bSMartin Matuska #endif /* !LZ4_ARCH64 */ 360*e92ffd9bSMartin Matuska 361*e92ffd9bSMartin Matuska #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 362*e92ffd9bSMartin Matuska #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 363*e92ffd9bSMartin Matuska { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 364*e92ffd9bSMartin Matuska #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 365*e92ffd9bSMartin Matuska { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 366*e92ffd9bSMartin Matuska #else 367*e92ffd9bSMartin Matuska #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 368*e92ffd9bSMartin Matuska #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 369*e92ffd9bSMartin Matuska #endif 370*e92ffd9bSMartin Matuska 371*e92ffd9bSMartin Matuska 372*e92ffd9bSMartin Matuska /* Local structures */ 373*e92ffd9bSMartin Matuska struct refTables { 374*e92ffd9bSMartin Matuska HTYPE hashTable[HASHTABLESIZE]; 375*e92ffd9bSMartin Matuska }; 376*e92ffd9bSMartin Matuska 377*e92ffd9bSMartin Matuska 378*e92ffd9bSMartin Matuska /* Macros */ 379*e92ffd9bSMartin Matuska #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 380*e92ffd9bSMartin Matuska HASH_LOG)) 381*e92ffd9bSMartin Matuska #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 382*e92ffd9bSMartin Matuska #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 383*e92ffd9bSMartin Matuska #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 384*e92ffd9bSMartin Matuska d = e; } 385*e92ffd9bSMartin Matuska 386*e92ffd9bSMartin Matuska 387*e92ffd9bSMartin Matuska /* Private functions */ 388*e92ffd9bSMartin Matuska #if LZ4_ARCH64 389*e92ffd9bSMartin Matuska 390*e92ffd9bSMartin Matuska static inline int 391*e92ffd9bSMartin Matuska LZ4_NbCommonBytes(register U64 val) 392*e92ffd9bSMartin Matuska { 393*e92ffd9bSMartin Matuska #if defined(LZ4_BIG_ENDIAN) 394*e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 395*e92ffd9bSMartin Matuska !defined(LZ4_FORCE_SW_BITCOUNT) 396*e92ffd9bSMartin Matuska return (__builtin_clzll(val) >> 3); 397*e92ffd9bSMartin Matuska #else 398*e92ffd9bSMartin Matuska int r; 399*e92ffd9bSMartin Matuska if (!(val >> 32)) { 400*e92ffd9bSMartin Matuska r = 4; 401*e92ffd9bSMartin Matuska } else { 402*e92ffd9bSMartin Matuska r = 0; 403*e92ffd9bSMartin Matuska val >>= 32; 404*e92ffd9bSMartin Matuska } 405*e92ffd9bSMartin Matuska if (!(val >> 16)) { 406*e92ffd9bSMartin Matuska r += 2; 407*e92ffd9bSMartin Matuska val >>= 8; 408*e92ffd9bSMartin Matuska } else { 409*e92ffd9bSMartin Matuska val >>= 24; 410*e92ffd9bSMartin Matuska } 411*e92ffd9bSMartin Matuska r += (!val); 412*e92ffd9bSMartin Matuska return (r); 413*e92ffd9bSMartin Matuska #endif 414*e92ffd9bSMartin Matuska #else 415*e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 416*e92ffd9bSMartin Matuska !defined(LZ4_FORCE_SW_BITCOUNT) 417*e92ffd9bSMartin Matuska return (__builtin_ctzll(val) >> 3); 418*e92ffd9bSMartin Matuska #else 419*e92ffd9bSMartin Matuska static const int DeBruijnBytePos[64] = 420*e92ffd9bSMartin Matuska { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 421*e92ffd9bSMartin Matuska 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 422*e92ffd9bSMartin Matuska 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 423*e92ffd9bSMartin Matuska 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 424*e92ffd9bSMartin Matuska }; 425*e92ffd9bSMartin Matuska return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 426*e92ffd9bSMartin Matuska 58]; 427*e92ffd9bSMartin Matuska #endif 428*e92ffd9bSMartin Matuska #endif 429*e92ffd9bSMartin Matuska } 430*e92ffd9bSMartin Matuska 431*e92ffd9bSMartin Matuska #else 432*e92ffd9bSMartin Matuska 433*e92ffd9bSMartin Matuska static inline int 434*e92ffd9bSMartin Matuska LZ4_NbCommonBytes(register U32 val) 435*e92ffd9bSMartin Matuska { 436*e92ffd9bSMartin Matuska #if defined(LZ4_BIG_ENDIAN) 437*e92ffd9bSMartin Matuska #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 438*e92ffd9bSMartin Matuska !defined(LZ4_FORCE_SW_BITCOUNT) 439*e92ffd9bSMartin Matuska return (__builtin_clz(val) >> 3); 440*e92ffd9bSMartin Matuska #else 441*e92ffd9bSMartin Matuska int r; 442*e92ffd9bSMartin Matuska if (!(val >> 16)) { 443*e92ffd9bSMartin Matuska r = 2; 444*e92ffd9bSMartin Matuska val >>= 8; 445*e92ffd9bSMartin Matuska } else { 446*e92ffd9bSMartin Matuska r = 0; 447*e92ffd9bSMartin Matuska val >>= 24; 448*e92ffd9bSMartin Matuska } 449*e92ffd9bSMartin Matuska r += (!val); 450*e92ffd9bSMartin Matuska return (r); 451*e92ffd9bSMartin Matuska #endif 452*e92ffd9bSMartin Matuska #else 453*e92ffd9bSMartin Matuska #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ 454*e92ffd9bSMartin Matuska !defined(LZ4_FORCE_SW_BITCOUNT) 455*e92ffd9bSMartin Matuska return (__builtin_ctz(val) >> 3); 456*e92ffd9bSMartin Matuska #else 457*e92ffd9bSMartin Matuska static const int DeBruijnBytePos[32] = { 458*e92ffd9bSMartin Matuska 0, 0, 3, 0, 3, 1, 3, 0, 459*e92ffd9bSMartin Matuska 3, 2, 2, 1, 3, 2, 0, 1, 460*e92ffd9bSMartin Matuska 3, 3, 1, 2, 2, 2, 2, 0, 461*e92ffd9bSMartin Matuska 3, 1, 2, 0, 1, 0, 1, 1 462*e92ffd9bSMartin Matuska }; 463*e92ffd9bSMartin Matuska return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 464*e92ffd9bSMartin Matuska 27]; 465*e92ffd9bSMartin Matuska #endif 466*e92ffd9bSMartin Matuska #endif 467*e92ffd9bSMartin Matuska } 468*e92ffd9bSMartin Matuska 469*e92ffd9bSMartin Matuska #endif 470*e92ffd9bSMartin Matuska 471*e92ffd9bSMartin Matuska /* Compression functions */ 472*e92ffd9bSMartin Matuska 473*e92ffd9bSMartin Matuska static int 474*e92ffd9bSMartin Matuska LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 475*e92ffd9bSMartin Matuska int osize) 476*e92ffd9bSMartin Matuska { 477*e92ffd9bSMartin Matuska struct refTables *srt = (struct refTables *)ctx; 478*e92ffd9bSMartin Matuska HTYPE *HashTable = (HTYPE *) (srt->hashTable); 479*e92ffd9bSMartin Matuska 480*e92ffd9bSMartin Matuska const BYTE *ip = (BYTE *) source; 481*e92ffd9bSMartin Matuska INITBASE(base); 482*e92ffd9bSMartin Matuska const BYTE *anchor = ip; 483*e92ffd9bSMartin Matuska const BYTE *const iend = ip + isize; 484*e92ffd9bSMartin Matuska const BYTE *const oend = (BYTE *) dest + osize; 485*e92ffd9bSMartin Matuska const BYTE *const mflimit = iend - MFLIMIT; 486*e92ffd9bSMartin Matuska #define matchlimit (iend - LASTLITERALS) 487*e92ffd9bSMartin Matuska 488*e92ffd9bSMartin Matuska BYTE *op = (BYTE *) dest; 489*e92ffd9bSMartin Matuska 490*e92ffd9bSMartin Matuska int len, length; 491*e92ffd9bSMartin Matuska const int skipStrength = SKIPSTRENGTH; 492*e92ffd9bSMartin Matuska U32 forwardH; 493*e92ffd9bSMartin Matuska 494*e92ffd9bSMartin Matuska 495*e92ffd9bSMartin Matuska /* Init */ 496*e92ffd9bSMartin Matuska if (isize < MINLENGTH) 497*e92ffd9bSMartin Matuska goto _last_literals; 498*e92ffd9bSMartin Matuska 499*e92ffd9bSMartin Matuska /* First Byte */ 500*e92ffd9bSMartin Matuska HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 501*e92ffd9bSMartin Matuska ip++; 502*e92ffd9bSMartin Matuska forwardH = LZ4_HASH_VALUE(ip); 503*e92ffd9bSMartin Matuska 504*e92ffd9bSMartin Matuska /* Main Loop */ 505*e92ffd9bSMartin Matuska for (;;) { 506*e92ffd9bSMartin Matuska int findMatchAttempts = (1U << skipStrength) + 3; 507*e92ffd9bSMartin Matuska const BYTE *forwardIp = ip; 508*e92ffd9bSMartin Matuska const BYTE *ref; 509*e92ffd9bSMartin Matuska BYTE *token; 510*e92ffd9bSMartin Matuska 511*e92ffd9bSMartin Matuska /* Find a match */ 512*e92ffd9bSMartin Matuska do { 513*e92ffd9bSMartin Matuska U32 h = forwardH; 514*e92ffd9bSMartin Matuska int step = findMatchAttempts++ >> skipStrength; 515*e92ffd9bSMartin Matuska ip = forwardIp; 516*e92ffd9bSMartin Matuska forwardIp = ip + step; 517*e92ffd9bSMartin Matuska 518*e92ffd9bSMartin Matuska if (unlikely(forwardIp > mflimit)) { 519*e92ffd9bSMartin Matuska goto _last_literals; 520*e92ffd9bSMartin Matuska } 521*e92ffd9bSMartin Matuska 522*e92ffd9bSMartin Matuska forwardH = LZ4_HASH_VALUE(forwardIp); 523*e92ffd9bSMartin Matuska ref = base + HashTable[h]; 524*e92ffd9bSMartin Matuska HashTable[h] = ip - base; 525*e92ffd9bSMartin Matuska 526*e92ffd9bSMartin Matuska } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 527*e92ffd9bSMartin Matuska 528*e92ffd9bSMartin Matuska /* Catch up */ 529*e92ffd9bSMartin Matuska while ((ip > anchor) && (ref > (BYTE *) source) && 530*e92ffd9bSMartin Matuska unlikely(ip[-1] == ref[-1])) { 531*e92ffd9bSMartin Matuska ip--; 532*e92ffd9bSMartin Matuska ref--; 533*e92ffd9bSMartin Matuska } 534*e92ffd9bSMartin Matuska 535*e92ffd9bSMartin Matuska /* Encode Literal length */ 536*e92ffd9bSMartin Matuska length = ip - anchor; 537*e92ffd9bSMartin Matuska token = op++; 538*e92ffd9bSMartin Matuska 539*e92ffd9bSMartin Matuska /* Check output limit */ 540*e92ffd9bSMartin Matuska if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 541*e92ffd9bSMartin Matuska (length >> 8) > oend)) 542*e92ffd9bSMartin Matuska return (0); 543*e92ffd9bSMartin Matuska 544*e92ffd9bSMartin Matuska if (length >= (int)RUN_MASK) { 545*e92ffd9bSMartin Matuska *token = (RUN_MASK << ML_BITS); 546*e92ffd9bSMartin Matuska len = length - RUN_MASK; 547*e92ffd9bSMartin Matuska for (; len > 254; len -= 255) 548*e92ffd9bSMartin Matuska *op++ = 255; 549*e92ffd9bSMartin Matuska *op++ = (BYTE)len; 550*e92ffd9bSMartin Matuska } else 551*e92ffd9bSMartin Matuska *token = (length << ML_BITS); 552*e92ffd9bSMartin Matuska 553*e92ffd9bSMartin Matuska /* Copy Literals */ 554*e92ffd9bSMartin Matuska LZ4_BLINDCOPY(anchor, op, length); 555*e92ffd9bSMartin Matuska 556*e92ffd9bSMartin Matuska _next_match: 557*e92ffd9bSMartin Matuska /* Encode Offset */ 558*e92ffd9bSMartin Matuska LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 559*e92ffd9bSMartin Matuska 560*e92ffd9bSMartin Matuska /* Start Counting */ 561*e92ffd9bSMartin Matuska ip += MINMATCH; 562*e92ffd9bSMartin Matuska ref += MINMATCH; /* MinMatch verified */ 563*e92ffd9bSMartin Matuska anchor = ip; 564*e92ffd9bSMartin Matuska while (likely(ip < matchlimit - (STEPSIZE - 1))) { 565*e92ffd9bSMartin Matuska UARCH diff = AARCH(ref) ^ AARCH(ip); 566*e92ffd9bSMartin Matuska if (!diff) { 567*e92ffd9bSMartin Matuska ip += STEPSIZE; 568*e92ffd9bSMartin Matuska ref += STEPSIZE; 569*e92ffd9bSMartin Matuska continue; 570*e92ffd9bSMartin Matuska } 571*e92ffd9bSMartin Matuska ip += LZ4_NbCommonBytes(diff); 572*e92ffd9bSMartin Matuska goto _endCount; 573*e92ffd9bSMartin Matuska } 574*e92ffd9bSMartin Matuska #if LZ4_ARCH64 575*e92ffd9bSMartin Matuska if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 576*e92ffd9bSMartin Matuska ip += 4; 577*e92ffd9bSMartin Matuska ref += 4; 578*e92ffd9bSMartin Matuska } 579*e92ffd9bSMartin Matuska #endif 580*e92ffd9bSMartin Matuska if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 581*e92ffd9bSMartin Matuska ip += 2; 582*e92ffd9bSMartin Matuska ref += 2; 583*e92ffd9bSMartin Matuska } 584*e92ffd9bSMartin Matuska if ((ip < matchlimit) && (*ref == *ip)) 585*e92ffd9bSMartin Matuska ip++; 586*e92ffd9bSMartin Matuska _endCount: 587*e92ffd9bSMartin Matuska 588*e92ffd9bSMartin Matuska /* Encode MatchLength */ 589*e92ffd9bSMartin Matuska len = (ip - anchor); 590*e92ffd9bSMartin Matuska /* Check output limit */ 591*e92ffd9bSMartin Matuska if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 592*e92ffd9bSMartin Matuska return (0); 593*e92ffd9bSMartin Matuska if (len >= (int)ML_MASK) { 594*e92ffd9bSMartin Matuska *token += ML_MASK; 595*e92ffd9bSMartin Matuska len -= ML_MASK; 596*e92ffd9bSMartin Matuska for (; len > 509; len -= 510) { 597*e92ffd9bSMartin Matuska *op++ = 255; 598*e92ffd9bSMartin Matuska *op++ = 255; 599*e92ffd9bSMartin Matuska } 600*e92ffd9bSMartin Matuska if (len > 254) { 601*e92ffd9bSMartin Matuska len -= 255; 602*e92ffd9bSMartin Matuska *op++ = 255; 603*e92ffd9bSMartin Matuska } 604*e92ffd9bSMartin Matuska *op++ = (BYTE)len; 605*e92ffd9bSMartin Matuska } else 606*e92ffd9bSMartin Matuska *token += len; 607*e92ffd9bSMartin Matuska 608*e92ffd9bSMartin Matuska /* Test end of chunk */ 609*e92ffd9bSMartin Matuska if (ip > mflimit) { 610*e92ffd9bSMartin Matuska anchor = ip; 611*e92ffd9bSMartin Matuska break; 612*e92ffd9bSMartin Matuska } 613*e92ffd9bSMartin Matuska /* Fill table */ 614*e92ffd9bSMartin Matuska HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 615*e92ffd9bSMartin Matuska 616*e92ffd9bSMartin Matuska /* Test next position */ 617*e92ffd9bSMartin Matuska ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 618*e92ffd9bSMartin Matuska HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 619*e92ffd9bSMartin Matuska if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 620*e92ffd9bSMartin Matuska token = op++; 621*e92ffd9bSMartin Matuska *token = 0; 622*e92ffd9bSMartin Matuska goto _next_match; 623*e92ffd9bSMartin Matuska } 624*e92ffd9bSMartin Matuska /* Prepare next loop */ 625*e92ffd9bSMartin Matuska anchor = ip++; 626*e92ffd9bSMartin Matuska forwardH = LZ4_HASH_VALUE(ip); 627*e92ffd9bSMartin Matuska } 628*e92ffd9bSMartin Matuska 629*e92ffd9bSMartin Matuska _last_literals: 630*e92ffd9bSMartin Matuska /* Encode Last Literals */ 631*e92ffd9bSMartin Matuska { 632*e92ffd9bSMartin Matuska int lastRun = iend - anchor; 633*e92ffd9bSMartin Matuska if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 634*e92ffd9bSMartin Matuska oend) 635*e92ffd9bSMartin Matuska return (0); 636*e92ffd9bSMartin Matuska if (lastRun >= (int)RUN_MASK) { 637*e92ffd9bSMartin Matuska *op++ = (RUN_MASK << ML_BITS); 638*e92ffd9bSMartin Matuska lastRun -= RUN_MASK; 639*e92ffd9bSMartin Matuska for (; lastRun > 254; lastRun -= 255) { 640*e92ffd9bSMartin Matuska *op++ = 255; 641*e92ffd9bSMartin Matuska } 642*e92ffd9bSMartin Matuska *op++ = (BYTE)lastRun; 643*e92ffd9bSMartin Matuska } else 644*e92ffd9bSMartin Matuska *op++ = (lastRun << ML_BITS); 645*e92ffd9bSMartin Matuska (void) memcpy(op, anchor, iend - anchor); 646*e92ffd9bSMartin Matuska op += iend - anchor; 647*e92ffd9bSMartin Matuska } 648*e92ffd9bSMartin Matuska 649*e92ffd9bSMartin Matuska /* End */ 650*e92ffd9bSMartin Matuska return (int)(((char *)op) - dest); 651*e92ffd9bSMartin Matuska } 652*e92ffd9bSMartin Matuska 653*e92ffd9bSMartin Matuska 654*e92ffd9bSMartin Matuska 655*e92ffd9bSMartin Matuska /* Note : this function is valid only if isize < LZ4_64KLIMIT */ 656*e92ffd9bSMartin Matuska #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 657*e92ffd9bSMartin Matuska #define HASHLOG64K (HASH_LOG + 1) 658*e92ffd9bSMartin Matuska #define HASH64KTABLESIZE (1U << HASHLOG64K) 659*e92ffd9bSMartin Matuska #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 660*e92ffd9bSMartin Matuska HASHLOG64K)) 661*e92ffd9bSMartin Matuska #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 662*e92ffd9bSMartin Matuska 663*e92ffd9bSMartin Matuska static int 664*e92ffd9bSMartin Matuska LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 665*e92ffd9bSMartin Matuska int osize) 666*e92ffd9bSMartin Matuska { 667*e92ffd9bSMartin Matuska struct refTables *srt = (struct refTables *)ctx; 668*e92ffd9bSMartin Matuska U16 *HashTable = (U16 *) (srt->hashTable); 669*e92ffd9bSMartin Matuska 670*e92ffd9bSMartin Matuska const BYTE *ip = (BYTE *) source; 671*e92ffd9bSMartin Matuska const BYTE *anchor = ip; 672*e92ffd9bSMartin Matuska const BYTE *const base = ip; 673*e92ffd9bSMartin Matuska const BYTE *const iend = ip + isize; 674*e92ffd9bSMartin Matuska const BYTE *const oend = (BYTE *) dest + osize; 675*e92ffd9bSMartin Matuska const BYTE *const mflimit = iend - MFLIMIT; 676*e92ffd9bSMartin Matuska #define matchlimit (iend - LASTLITERALS) 677*e92ffd9bSMartin Matuska 678*e92ffd9bSMartin Matuska BYTE *op = (BYTE *) dest; 679*e92ffd9bSMartin Matuska 680*e92ffd9bSMartin Matuska int len, length; 681*e92ffd9bSMartin Matuska const int skipStrength = SKIPSTRENGTH; 682*e92ffd9bSMartin Matuska U32 forwardH; 683*e92ffd9bSMartin Matuska 684*e92ffd9bSMartin Matuska /* Init */ 685*e92ffd9bSMartin Matuska if (isize < MINLENGTH) 686*e92ffd9bSMartin Matuska goto _last_literals; 687*e92ffd9bSMartin Matuska 688*e92ffd9bSMartin Matuska /* First Byte */ 689*e92ffd9bSMartin Matuska ip++; 690*e92ffd9bSMartin Matuska forwardH = LZ4_HASH64K_VALUE(ip); 691*e92ffd9bSMartin Matuska 692*e92ffd9bSMartin Matuska /* Main Loop */ 693*e92ffd9bSMartin Matuska for (;;) { 694*e92ffd9bSMartin Matuska int findMatchAttempts = (1U << skipStrength) + 3; 695*e92ffd9bSMartin Matuska const BYTE *forwardIp = ip; 696*e92ffd9bSMartin Matuska const BYTE *ref; 697*e92ffd9bSMartin Matuska BYTE *token; 698*e92ffd9bSMartin Matuska 699*e92ffd9bSMartin Matuska /* Find a match */ 700*e92ffd9bSMartin Matuska do { 701*e92ffd9bSMartin Matuska U32 h = forwardH; 702*e92ffd9bSMartin Matuska int step = findMatchAttempts++ >> skipStrength; 703*e92ffd9bSMartin Matuska ip = forwardIp; 704*e92ffd9bSMartin Matuska forwardIp = ip + step; 705*e92ffd9bSMartin Matuska 706*e92ffd9bSMartin Matuska if (forwardIp > mflimit) { 707*e92ffd9bSMartin Matuska goto _last_literals; 708*e92ffd9bSMartin Matuska } 709*e92ffd9bSMartin Matuska 710*e92ffd9bSMartin Matuska forwardH = LZ4_HASH64K_VALUE(forwardIp); 711*e92ffd9bSMartin Matuska ref = base + HashTable[h]; 712*e92ffd9bSMartin Matuska HashTable[h] = ip - base; 713*e92ffd9bSMartin Matuska 714*e92ffd9bSMartin Matuska } while (A32(ref) != A32(ip)); 715*e92ffd9bSMartin Matuska 716*e92ffd9bSMartin Matuska /* Catch up */ 717*e92ffd9bSMartin Matuska while ((ip > anchor) && (ref > (BYTE *) source) && 718*e92ffd9bSMartin Matuska (ip[-1] == ref[-1])) { 719*e92ffd9bSMartin Matuska ip--; 720*e92ffd9bSMartin Matuska ref--; 721*e92ffd9bSMartin Matuska } 722*e92ffd9bSMartin Matuska 723*e92ffd9bSMartin Matuska /* Encode Literal length */ 724*e92ffd9bSMartin Matuska length = ip - anchor; 725*e92ffd9bSMartin Matuska token = op++; 726*e92ffd9bSMartin Matuska 727*e92ffd9bSMartin Matuska /* Check output limit */ 728*e92ffd9bSMartin Matuska if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 729*e92ffd9bSMartin Matuska (length >> 8) > oend)) 730*e92ffd9bSMartin Matuska return (0); 731*e92ffd9bSMartin Matuska 732*e92ffd9bSMartin Matuska if (length >= (int)RUN_MASK) { 733*e92ffd9bSMartin Matuska *token = (RUN_MASK << ML_BITS); 734*e92ffd9bSMartin Matuska len = length - RUN_MASK; 735*e92ffd9bSMartin Matuska for (; len > 254; len -= 255) 736*e92ffd9bSMartin Matuska *op++ = 255; 737*e92ffd9bSMartin Matuska *op++ = (BYTE)len; 738*e92ffd9bSMartin Matuska } else 739*e92ffd9bSMartin Matuska *token = (length << ML_BITS); 740*e92ffd9bSMartin Matuska 741*e92ffd9bSMartin Matuska /* Copy Literals */ 742*e92ffd9bSMartin Matuska LZ4_BLINDCOPY(anchor, op, length); 743*e92ffd9bSMartin Matuska 744*e92ffd9bSMartin Matuska _next_match: 745*e92ffd9bSMartin Matuska /* Encode Offset */ 746*e92ffd9bSMartin Matuska LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 747*e92ffd9bSMartin Matuska 748*e92ffd9bSMartin Matuska /* Start Counting */ 749*e92ffd9bSMartin Matuska ip += MINMATCH; 750*e92ffd9bSMartin Matuska ref += MINMATCH; /* MinMatch verified */ 751*e92ffd9bSMartin Matuska anchor = ip; 752*e92ffd9bSMartin Matuska while (ip < matchlimit - (STEPSIZE - 1)) { 753*e92ffd9bSMartin Matuska UARCH diff = AARCH(ref) ^ AARCH(ip); 754*e92ffd9bSMartin Matuska if (!diff) { 755*e92ffd9bSMartin Matuska ip += STEPSIZE; 756*e92ffd9bSMartin Matuska ref += STEPSIZE; 757*e92ffd9bSMartin Matuska continue; 758*e92ffd9bSMartin Matuska } 759*e92ffd9bSMartin Matuska ip += LZ4_NbCommonBytes(diff); 760*e92ffd9bSMartin Matuska goto _endCount; 761*e92ffd9bSMartin Matuska } 762*e92ffd9bSMartin Matuska #if LZ4_ARCH64 763*e92ffd9bSMartin Matuska if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 764*e92ffd9bSMartin Matuska ip += 4; 765*e92ffd9bSMartin Matuska ref += 4; 766*e92ffd9bSMartin Matuska } 767*e92ffd9bSMartin Matuska #endif 768*e92ffd9bSMartin Matuska if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 769*e92ffd9bSMartin Matuska ip += 2; 770*e92ffd9bSMartin Matuska ref += 2; 771*e92ffd9bSMartin Matuska } 772*e92ffd9bSMartin Matuska if ((ip < matchlimit) && (*ref == *ip)) 773*e92ffd9bSMartin Matuska ip++; 774*e92ffd9bSMartin Matuska _endCount: 775*e92ffd9bSMartin Matuska 776*e92ffd9bSMartin Matuska /* Encode MatchLength */ 777*e92ffd9bSMartin Matuska len = (ip - anchor); 778*e92ffd9bSMartin Matuska /* Check output limit */ 779*e92ffd9bSMartin Matuska if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 780*e92ffd9bSMartin Matuska return (0); 781*e92ffd9bSMartin Matuska if (len >= (int)ML_MASK) { 782*e92ffd9bSMartin Matuska *token += ML_MASK; 783*e92ffd9bSMartin Matuska len -= ML_MASK; 784*e92ffd9bSMartin Matuska for (; len > 509; len -= 510) { 785*e92ffd9bSMartin Matuska *op++ = 255; 786*e92ffd9bSMartin Matuska *op++ = 255; 787*e92ffd9bSMartin Matuska } 788*e92ffd9bSMartin Matuska if (len > 254) { 789*e92ffd9bSMartin Matuska len -= 255; 790*e92ffd9bSMartin Matuska *op++ = 255; 791*e92ffd9bSMartin Matuska } 792*e92ffd9bSMartin Matuska *op++ = (BYTE)len; 793*e92ffd9bSMartin Matuska } else 794*e92ffd9bSMartin Matuska *token += len; 795*e92ffd9bSMartin Matuska 796*e92ffd9bSMartin Matuska /* Test end of chunk */ 797*e92ffd9bSMartin Matuska if (ip > mflimit) { 798*e92ffd9bSMartin Matuska anchor = ip; 799*e92ffd9bSMartin Matuska break; 800*e92ffd9bSMartin Matuska } 801*e92ffd9bSMartin Matuska /* Fill table */ 802*e92ffd9bSMartin Matuska HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 803*e92ffd9bSMartin Matuska 804*e92ffd9bSMartin Matuska /* Test next position */ 805*e92ffd9bSMartin Matuska ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 806*e92ffd9bSMartin Matuska HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 807*e92ffd9bSMartin Matuska if (A32(ref) == A32(ip)) { 808*e92ffd9bSMartin Matuska token = op++; 809*e92ffd9bSMartin Matuska *token = 0; 810*e92ffd9bSMartin Matuska goto _next_match; 811*e92ffd9bSMartin Matuska } 812*e92ffd9bSMartin Matuska /* Prepare next loop */ 813*e92ffd9bSMartin Matuska anchor = ip++; 814*e92ffd9bSMartin Matuska forwardH = LZ4_HASH64K_VALUE(ip); 815*e92ffd9bSMartin Matuska } 816*e92ffd9bSMartin Matuska 817*e92ffd9bSMartin Matuska _last_literals: 818*e92ffd9bSMartin Matuska /* Encode Last Literals */ 819*e92ffd9bSMartin Matuska { 820*e92ffd9bSMartin Matuska int lastRun = iend - anchor; 821*e92ffd9bSMartin Matuska if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 822*e92ffd9bSMartin Matuska oend) 823*e92ffd9bSMartin Matuska return (0); 824*e92ffd9bSMartin Matuska if (lastRun >= (int)RUN_MASK) { 825*e92ffd9bSMartin Matuska *op++ = (RUN_MASK << ML_BITS); 826*e92ffd9bSMartin Matuska lastRun -= RUN_MASK; 827*e92ffd9bSMartin Matuska for (; lastRun > 254; lastRun -= 255) 828*e92ffd9bSMartin Matuska *op++ = 255; 829*e92ffd9bSMartin Matuska *op++ = (BYTE)lastRun; 830*e92ffd9bSMartin Matuska } else 831*e92ffd9bSMartin Matuska *op++ = (lastRun << ML_BITS); 832*e92ffd9bSMartin Matuska (void) memcpy(op, anchor, iend - anchor); 833*e92ffd9bSMartin Matuska op += iend - anchor; 834*e92ffd9bSMartin Matuska } 835*e92ffd9bSMartin Matuska 836*e92ffd9bSMartin Matuska /* End */ 837*e92ffd9bSMartin Matuska return (int)(((char *)op) - dest); 838*e92ffd9bSMartin Matuska } 839*e92ffd9bSMartin Matuska 840*e92ffd9bSMartin Matuska static int 841*e92ffd9bSMartin Matuska real_LZ4_compress(const char *source, char *dest, int isize, int osize) 842*e92ffd9bSMartin Matuska { 843*e92ffd9bSMartin Matuska void *ctx; 844*e92ffd9bSMartin Matuska int result; 845*e92ffd9bSMartin Matuska 846*e92ffd9bSMartin Matuska ctx = lz4_alloc(KM_SLEEP); 847*e92ffd9bSMartin Matuska 848*e92ffd9bSMartin Matuska /* 849*e92ffd9bSMartin Matuska * out of kernel memory, gently fall through - this will disable 850*e92ffd9bSMartin Matuska * compression in zio_compress_data 851*e92ffd9bSMartin Matuska */ 852*e92ffd9bSMartin Matuska if (ctx == NULL) 853*e92ffd9bSMartin Matuska return (0); 854*e92ffd9bSMartin Matuska 855*e92ffd9bSMartin Matuska memset(ctx, 0, sizeof (struct refTables)); 856*e92ffd9bSMartin Matuska 857*e92ffd9bSMartin Matuska if (isize < LZ4_64KLIMIT) 858*e92ffd9bSMartin Matuska result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 859*e92ffd9bSMartin Matuska else 860*e92ffd9bSMartin Matuska result = LZ4_compressCtx(ctx, source, dest, isize, osize); 861*e92ffd9bSMartin Matuska 862*e92ffd9bSMartin Matuska lz4_free(ctx); 863*e92ffd9bSMartin Matuska return (result); 864*e92ffd9bSMartin Matuska } 865*e92ffd9bSMartin Matuska 866*e92ffd9bSMartin Matuska #ifdef __FreeBSD__ 867*e92ffd9bSMartin Matuska /* 868*e92ffd9bSMartin Matuska * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here. 869*e92ffd9bSMartin Matuska * Should struct refTables get resized this may need to be revisited, hence 870*e92ffd9bSMartin Matuska * compiler-time asserts. 871*e92ffd9bSMartin Matuska */ 872*e92ffd9bSMartin Matuska _Static_assert(sizeof(struct refTables) <= 16384, 873*e92ffd9bSMartin Matuska "refTables too big for malloc"); 874*e92ffd9bSMartin Matuska _Static_assert((sizeof(struct refTables) % 4096) == 0, 875*e92ffd9bSMartin Matuska "refTables not a multiple of page size"); 876*e92ffd9bSMartin Matuska #else 877*e92ffd9bSMartin Matuska #define ZFS_LZ4_USE_CACHE 878*e92ffd9bSMartin Matuska #endif 879*e92ffd9bSMartin Matuska 880*e92ffd9bSMartin Matuska #ifdef ZFS_LZ4_USE_CACHE 881*e92ffd9bSMartin Matuska static kmem_cache_t *lz4_cache; 882*e92ffd9bSMartin Matuska #endif 883*e92ffd9bSMartin Matuska 884*e92ffd9bSMartin Matuska #ifdef ZFS_LZ4_USE_CACHE 885*e92ffd9bSMartin Matuska void 886*e92ffd9bSMartin Matuska lz4_init(void) 887*e92ffd9bSMartin Matuska { 888*e92ffd9bSMartin Matuska lz4_cache = kmem_cache_create("lz4_cache", 889*e92ffd9bSMartin Matuska sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0); 890*e92ffd9bSMartin Matuska } 891*e92ffd9bSMartin Matuska 892*e92ffd9bSMartin Matuska void 893*e92ffd9bSMartin Matuska lz4_fini(void) 894*e92ffd9bSMartin Matuska { 895*e92ffd9bSMartin Matuska if (lz4_cache) { 896*e92ffd9bSMartin Matuska kmem_cache_destroy(lz4_cache); 897*e92ffd9bSMartin Matuska lz4_cache = NULL; 898*e92ffd9bSMartin Matuska } 899*e92ffd9bSMartin Matuska } 900*e92ffd9bSMartin Matuska 901*e92ffd9bSMartin Matuska static void * 902*e92ffd9bSMartin Matuska lz4_alloc(int flags) 903*e92ffd9bSMartin Matuska { 904*e92ffd9bSMartin Matuska ASSERT(lz4_cache != NULL); 905*e92ffd9bSMartin Matuska return (kmem_cache_alloc(lz4_cache, flags)); 906*e92ffd9bSMartin Matuska } 907*e92ffd9bSMartin Matuska 908*e92ffd9bSMartin Matuska static void 909*e92ffd9bSMartin Matuska lz4_free(void *ctx) 910*e92ffd9bSMartin Matuska { 911*e92ffd9bSMartin Matuska kmem_cache_free(lz4_cache, ctx); 912*e92ffd9bSMartin Matuska } 913*e92ffd9bSMartin Matuska #else 914*e92ffd9bSMartin Matuska void 915*e92ffd9bSMartin Matuska lz4_init(void) 916*e92ffd9bSMartin Matuska { 917*e92ffd9bSMartin Matuska } 918*e92ffd9bSMartin Matuska 919*e92ffd9bSMartin Matuska void 920*e92ffd9bSMartin Matuska lz4_fini(void) 921*e92ffd9bSMartin Matuska { 922*e92ffd9bSMartin Matuska } 923*e92ffd9bSMartin Matuska 924*e92ffd9bSMartin Matuska static void * 925*e92ffd9bSMartin Matuska lz4_alloc(int flags) 926*e92ffd9bSMartin Matuska { 927*e92ffd9bSMartin Matuska return (kmem_alloc(sizeof (struct refTables), flags)); 928*e92ffd9bSMartin Matuska } 929*e92ffd9bSMartin Matuska 930*e92ffd9bSMartin Matuska static void 931*e92ffd9bSMartin Matuska lz4_free(void *ctx) 932*e92ffd9bSMartin Matuska { 933*e92ffd9bSMartin Matuska kmem_free(ctx, sizeof (struct refTables)); 934*e92ffd9bSMartin Matuska } 935*e92ffd9bSMartin Matuska #endif 936