1*eda14cbcSMatt Macy /* 2*eda14cbcSMatt Macy * LZ4 - Fast LZ compression algorithm 3*eda14cbcSMatt Macy * Header File 4*eda14cbcSMatt Macy * Copyright (C) 2011-2013, Yann Collet. 5*eda14cbcSMatt Macy * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6*eda14cbcSMatt Macy * 7*eda14cbcSMatt Macy * Redistribution and use in source and binary forms, with or without 8*eda14cbcSMatt Macy * modification, are permitted provided that the following conditions are 9*eda14cbcSMatt Macy * met: 10*eda14cbcSMatt Macy * 11*eda14cbcSMatt Macy * * Redistributions of source code must retain the above copyright 12*eda14cbcSMatt Macy * notice, this list of conditions and the following disclaimer. 13*eda14cbcSMatt Macy * * Redistributions in binary form must reproduce the above 14*eda14cbcSMatt Macy * copyright notice, this list of conditions and the following disclaimer 15*eda14cbcSMatt Macy * in the documentation and/or other materials provided with the 16*eda14cbcSMatt Macy * distribution. 17*eda14cbcSMatt Macy * 18*eda14cbcSMatt Macy * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19*eda14cbcSMatt Macy * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20*eda14cbcSMatt Macy * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21*eda14cbcSMatt Macy * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22*eda14cbcSMatt Macy * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23*eda14cbcSMatt Macy * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24*eda14cbcSMatt Macy * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25*eda14cbcSMatt Macy * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26*eda14cbcSMatt Macy * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27*eda14cbcSMatt Macy * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28*eda14cbcSMatt Macy * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*eda14cbcSMatt Macy * 30*eda14cbcSMatt Macy * You can contact the author at : 31*eda14cbcSMatt Macy * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32*eda14cbcSMatt Macy * - LZ4 source repository : http://code.google.com/p/lz4/ 33*eda14cbcSMatt Macy */ 34*eda14cbcSMatt Macy 35*eda14cbcSMatt Macy #include <sys/zfs_context.h> 36*eda14cbcSMatt Macy #include <sys/zio_compress.h> 37*eda14cbcSMatt Macy 38*eda14cbcSMatt Macy static int real_LZ4_compress(const char *source, char *dest, int isize, 39*eda14cbcSMatt Macy int osize); 40*eda14cbcSMatt Macy static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 41*eda14cbcSMatt Macy int isize, int maxOutputSize); 42*eda14cbcSMatt Macy static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 43*eda14cbcSMatt Macy int isize, int osize); 44*eda14cbcSMatt Macy static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 45*eda14cbcSMatt Macy int isize, int osize); 46*eda14cbcSMatt Macy 47*eda14cbcSMatt Macy static kmem_cache_t *lz4_cache; 48*eda14cbcSMatt Macy 49*eda14cbcSMatt Macy /*ARGSUSED*/ 50*eda14cbcSMatt Macy size_t 51*eda14cbcSMatt Macy lz4_compress_zfs(void *s_start, void *d_start, size_t s_len, 52*eda14cbcSMatt Macy size_t d_len, int n) 53*eda14cbcSMatt Macy { 54*eda14cbcSMatt Macy uint32_t bufsiz; 55*eda14cbcSMatt Macy char *dest = d_start; 56*eda14cbcSMatt Macy 57*eda14cbcSMatt Macy ASSERT(d_len >= sizeof (bufsiz)); 58*eda14cbcSMatt Macy 59*eda14cbcSMatt Macy bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 60*eda14cbcSMatt Macy d_len - sizeof (bufsiz)); 61*eda14cbcSMatt Macy 62*eda14cbcSMatt Macy /* Signal an error if the compression routine returned zero. */ 63*eda14cbcSMatt Macy if (bufsiz == 0) 64*eda14cbcSMatt Macy return (s_len); 65*eda14cbcSMatt Macy 66*eda14cbcSMatt Macy /* 67*eda14cbcSMatt Macy * The exact compressed size is needed by the decompression routine, 68*eda14cbcSMatt Macy * so it is stored at the start of the buffer. Note that this may be 69*eda14cbcSMatt Macy * less than the compressed block size, which is rounded up to a 70*eda14cbcSMatt Macy * multiple of 1<<ashift. 71*eda14cbcSMatt Macy */ 72*eda14cbcSMatt Macy *(uint32_t *)dest = BE_32(bufsiz); 73*eda14cbcSMatt Macy 74*eda14cbcSMatt Macy return (bufsiz + sizeof (bufsiz)); 75*eda14cbcSMatt Macy } 76*eda14cbcSMatt Macy 77*eda14cbcSMatt Macy /*ARGSUSED*/ 78*eda14cbcSMatt Macy int 79*eda14cbcSMatt Macy lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len, 80*eda14cbcSMatt Macy size_t d_len, int n) 81*eda14cbcSMatt Macy { 82*eda14cbcSMatt Macy const char *src = s_start; 83*eda14cbcSMatt Macy uint32_t bufsiz = BE_IN32(src); 84*eda14cbcSMatt Macy 85*eda14cbcSMatt Macy /* invalid compressed buffer size encoded at start */ 86*eda14cbcSMatt Macy if (bufsiz + sizeof (bufsiz) > s_len) 87*eda14cbcSMatt Macy return (1); 88*eda14cbcSMatt Macy 89*eda14cbcSMatt Macy /* 90*eda14cbcSMatt Macy * Returns 0 on success (decompression function returned non-negative) 91*eda14cbcSMatt Macy * and non-zero on failure (decompression function returned negative). 92*eda14cbcSMatt Macy */ 93*eda14cbcSMatt Macy return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 94*eda14cbcSMatt Macy d_start, bufsiz, d_len) < 0); 95*eda14cbcSMatt Macy } 96*eda14cbcSMatt Macy 97*eda14cbcSMatt Macy /* 98*eda14cbcSMatt Macy * LZ4 API Description: 99*eda14cbcSMatt Macy * 100*eda14cbcSMatt Macy * Simple Functions: 101*eda14cbcSMatt Macy * real_LZ4_compress() : 102*eda14cbcSMatt Macy * isize : is the input size. Max supported value is ~1.9GB 103*eda14cbcSMatt Macy * return : the number of bytes written in buffer dest 104*eda14cbcSMatt Macy * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 105*eda14cbcSMatt Macy * note : destination buffer must be already allocated. 106*eda14cbcSMatt Macy * destination buffer must be sized to handle worst cases 107*eda14cbcSMatt Macy * situations (input data not compressible) worst case size 108*eda14cbcSMatt Macy * evaluation is provided by function LZ4_compressBound(). 109*eda14cbcSMatt Macy * 110*eda14cbcSMatt Macy * real_LZ4_uncompress() : 111*eda14cbcSMatt Macy * osize : is the output size, therefore the original size 112*eda14cbcSMatt Macy * return : the number of bytes read in the source buffer. 113*eda14cbcSMatt Macy * If the source stream is malformed, the function will stop 114*eda14cbcSMatt Macy * decoding and return a negative result, indicating the byte 115*eda14cbcSMatt Macy * position of the faulty instruction. This function never 116*eda14cbcSMatt Macy * writes beyond dest + osize, and is therefore protected 117*eda14cbcSMatt Macy * against malicious data packets. 118*eda14cbcSMatt Macy * note : destination buffer must be already allocated 119*eda14cbcSMatt Macy * note : real_LZ4_uncompress() is not used in ZFS so its code 120*eda14cbcSMatt Macy * is not present here. 121*eda14cbcSMatt Macy * 122*eda14cbcSMatt Macy * Advanced Functions 123*eda14cbcSMatt Macy * 124*eda14cbcSMatt Macy * LZ4_compressBound() : 125*eda14cbcSMatt Macy * Provides the maximum size that LZ4 may output in a "worst case" 126*eda14cbcSMatt Macy * scenario (input data not compressible) primarily useful for memory 127*eda14cbcSMatt Macy * allocation of output buffer. 128*eda14cbcSMatt Macy * 129*eda14cbcSMatt Macy * isize : is the input size. Max supported value is ~1.9GB 130*eda14cbcSMatt Macy * return : maximum output size in a "worst case" scenario 131*eda14cbcSMatt Macy * note : this function is limited by "int" range (2^31-1) 132*eda14cbcSMatt Macy * 133*eda14cbcSMatt Macy * LZ4_uncompress_unknownOutputSize() : 134*eda14cbcSMatt Macy * isize : is the input size, therefore the compressed size 135*eda14cbcSMatt Macy * maxOutputSize : is the size of the destination buffer (which must be 136*eda14cbcSMatt Macy * already allocated) 137*eda14cbcSMatt Macy * return : the number of bytes decoded in the destination buffer 138*eda14cbcSMatt Macy * (necessarily <= maxOutputSize). If the source stream is 139*eda14cbcSMatt Macy * malformed, the function will stop decoding and return a 140*eda14cbcSMatt Macy * negative result, indicating the byte position of the faulty 141*eda14cbcSMatt Macy * instruction. This function never writes beyond dest + 142*eda14cbcSMatt Macy * maxOutputSize, and is therefore protected against malicious 143*eda14cbcSMatt Macy * data packets. 144*eda14cbcSMatt Macy * note : Destination buffer must be already allocated. 145*eda14cbcSMatt Macy * This version is slightly slower than real_LZ4_uncompress() 146*eda14cbcSMatt Macy * 147*eda14cbcSMatt Macy * LZ4_compressCtx() : 148*eda14cbcSMatt Macy * This function explicitly handles the CTX memory structure. 149*eda14cbcSMatt Macy * 150*eda14cbcSMatt Macy * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 151*eda14cbcSMatt Macy * by the caller (either on the stack or using kmem_cache_alloc). Passing 152*eda14cbcSMatt Macy * NULL isn't valid. 153*eda14cbcSMatt Macy * 154*eda14cbcSMatt Macy * LZ4_compress64kCtx() : 155*eda14cbcSMatt Macy * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 156*eda14cbcSMatt Macy * isize *Must* be <64KB, otherwise the output will be corrupted. 157*eda14cbcSMatt Macy * 158*eda14cbcSMatt Macy * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 159*eda14cbcSMatt Macy * by the caller (either on the stack or using kmem_cache_alloc). Passing 160*eda14cbcSMatt Macy * NULL isn't valid. 161*eda14cbcSMatt Macy */ 162*eda14cbcSMatt Macy 163*eda14cbcSMatt Macy /* 164*eda14cbcSMatt Macy * Tuning parameters 165*eda14cbcSMatt Macy */ 166*eda14cbcSMatt Macy 167*eda14cbcSMatt Macy /* 168*eda14cbcSMatt Macy * COMPRESSIONLEVEL: Increasing this value improves compression ratio 169*eda14cbcSMatt Macy * Lowering this value reduces memory usage. Reduced memory usage 170*eda14cbcSMatt Macy * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 171*eda14cbcSMatt Macy * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 172*eda14cbcSMatt Macy * (examples : 12 -> 16KB ; 17 -> 512KB) 173*eda14cbcSMatt Macy */ 174*eda14cbcSMatt Macy #define COMPRESSIONLEVEL 12 175*eda14cbcSMatt Macy 176*eda14cbcSMatt Macy /* 177*eda14cbcSMatt Macy * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 178*eda14cbcSMatt Macy * algorithm skip faster data segments considered "incompressible". 179*eda14cbcSMatt Macy * This may decrease compression ratio dramatically, but will be 180*eda14cbcSMatt Macy * faster on incompressible data. Increasing this value will make 181*eda14cbcSMatt Macy * the algorithm search more before declaring a segment "incompressible". 182*eda14cbcSMatt Macy * This could improve compression a bit, but will be slower on 183*eda14cbcSMatt Macy * incompressible data. The default value (6) is recommended. 184*eda14cbcSMatt Macy */ 185*eda14cbcSMatt Macy #define NOTCOMPRESSIBLE_CONFIRMATION 6 186*eda14cbcSMatt Macy 187*eda14cbcSMatt Macy /* 188*eda14cbcSMatt Macy * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 189*eda14cbcSMatt Macy * performance for big endian cpu, but the resulting compressed stream 190*eda14cbcSMatt Macy * will be incompatible with little-endian CPU. You can set this option 191*eda14cbcSMatt Macy * to 1 in situations where data will stay within closed environment. 192*eda14cbcSMatt Macy * This option is useless on Little_Endian CPU (such as x86). 193*eda14cbcSMatt Macy */ 194*eda14cbcSMatt Macy /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 195*eda14cbcSMatt Macy 196*eda14cbcSMatt Macy /* 197*eda14cbcSMatt Macy * CPU Feature Detection 198*eda14cbcSMatt Macy */ 199*eda14cbcSMatt Macy 200*eda14cbcSMatt Macy /* 32 or 64 bits ? */ 201*eda14cbcSMatt Macy #if defined(_LP64) 202*eda14cbcSMatt Macy #define LZ4_ARCH64 1 203*eda14cbcSMatt Macy #else 204*eda14cbcSMatt Macy #define LZ4_ARCH64 0 205*eda14cbcSMatt Macy #endif 206*eda14cbcSMatt Macy 207*eda14cbcSMatt Macy /* 208*eda14cbcSMatt Macy * Little Endian or Big Endian? 209*eda14cbcSMatt Macy * Note: overwrite the below #define if you know your architecture endianness. 210*eda14cbcSMatt Macy */ 211*eda14cbcSMatt Macy #if defined(_ZFS_BIG_ENDIAN) 212*eda14cbcSMatt Macy #define LZ4_BIG_ENDIAN 1 213*eda14cbcSMatt Macy #else 214*eda14cbcSMatt Macy /* 215*eda14cbcSMatt Macy * Little Endian assumed. PDP Endian and other very rare endian format 216*eda14cbcSMatt Macy * are unsupported. 217*eda14cbcSMatt Macy */ 218*eda14cbcSMatt Macy #undef LZ4_BIG_ENDIAN 219*eda14cbcSMatt Macy #endif 220*eda14cbcSMatt Macy 221*eda14cbcSMatt Macy /* 222*eda14cbcSMatt Macy * Unaligned memory access is automatically enabled for "common" CPU, 223*eda14cbcSMatt Macy * such as x86. For others CPU, the compiler will be more cautious, and 224*eda14cbcSMatt Macy * insert extra code to ensure aligned access is respected. If you know 225*eda14cbcSMatt Macy * your target CPU supports unaligned memory access, you may want to 226*eda14cbcSMatt Macy * force this option manually to improve performance 227*eda14cbcSMatt Macy */ 228*eda14cbcSMatt Macy #if defined(__ARM_FEATURE_UNALIGNED) 229*eda14cbcSMatt Macy #define LZ4_FORCE_UNALIGNED_ACCESS 1 230*eda14cbcSMatt Macy #endif 231*eda14cbcSMatt Macy 232*eda14cbcSMatt Macy /* 233*eda14cbcSMatt Macy * Illumos : we can't use GCC's __builtin_ctz family of builtins in the 234*eda14cbcSMatt Macy * kernel 235*eda14cbcSMatt Macy * Linux : we can use GCC's __builtin_ctz family of builtins in the 236*eda14cbcSMatt Macy * kernel 237*eda14cbcSMatt Macy */ 238*eda14cbcSMatt Macy #undef LZ4_FORCE_SW_BITCOUNT 239*eda14cbcSMatt Macy #if defined(__sparc) 240*eda14cbcSMatt Macy #define LZ4_FORCE_SW_BITCOUNT 241*eda14cbcSMatt Macy #endif 242*eda14cbcSMatt Macy 243*eda14cbcSMatt Macy /* 244*eda14cbcSMatt Macy * Compiler Options 245*eda14cbcSMatt Macy */ 246*eda14cbcSMatt Macy /* Disable restrict */ 247*eda14cbcSMatt Macy #define restrict 248*eda14cbcSMatt Macy 249*eda14cbcSMatt Macy /* 250*eda14cbcSMatt Macy * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it. 251*eda14cbcSMatt Macy * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 252*eda14cbcSMatt Macy */ 253*eda14cbcSMatt Macy #ifdef GCC_VERSION 254*eda14cbcSMatt Macy #undef GCC_VERSION 255*eda14cbcSMatt Macy #endif 256*eda14cbcSMatt Macy 257*eda14cbcSMatt Macy #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 258*eda14cbcSMatt Macy 259*eda14cbcSMatt Macy #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 260*eda14cbcSMatt Macy #define expect(expr, value) (__builtin_expect((expr), (value))) 261*eda14cbcSMatt Macy #else 262*eda14cbcSMatt Macy #define expect(expr, value) (expr) 263*eda14cbcSMatt Macy #endif 264*eda14cbcSMatt Macy 265*eda14cbcSMatt Macy #ifndef likely 266*eda14cbcSMatt Macy #define likely(expr) expect((expr) != 0, 1) 267*eda14cbcSMatt Macy #endif 268*eda14cbcSMatt Macy 269*eda14cbcSMatt Macy #ifndef unlikely 270*eda14cbcSMatt Macy #define unlikely(expr) expect((expr) != 0, 0) 271*eda14cbcSMatt Macy #endif 272*eda14cbcSMatt Macy 273*eda14cbcSMatt Macy #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 274*eda14cbcSMatt Macy (((x) & 0xffu) << 8))) 275*eda14cbcSMatt Macy 276*eda14cbcSMatt Macy /* Basic types */ 277*eda14cbcSMatt Macy #define BYTE uint8_t 278*eda14cbcSMatt Macy #define U16 uint16_t 279*eda14cbcSMatt Macy #define U32 uint32_t 280*eda14cbcSMatt Macy #define S32 int32_t 281*eda14cbcSMatt Macy #define U64 uint64_t 282*eda14cbcSMatt Macy 283*eda14cbcSMatt Macy #ifndef LZ4_FORCE_UNALIGNED_ACCESS 284*eda14cbcSMatt Macy #pragma pack(1) 285*eda14cbcSMatt Macy #endif 286*eda14cbcSMatt Macy 287*eda14cbcSMatt Macy typedef struct _U16_S { 288*eda14cbcSMatt Macy U16 v; 289*eda14cbcSMatt Macy } U16_S; 290*eda14cbcSMatt Macy typedef struct _U32_S { 291*eda14cbcSMatt Macy U32 v; 292*eda14cbcSMatt Macy } U32_S; 293*eda14cbcSMatt Macy typedef struct _U64_S { 294*eda14cbcSMatt Macy U64 v; 295*eda14cbcSMatt Macy } U64_S; 296*eda14cbcSMatt Macy 297*eda14cbcSMatt Macy #ifndef LZ4_FORCE_UNALIGNED_ACCESS 298*eda14cbcSMatt Macy #pragma pack() 299*eda14cbcSMatt Macy #endif 300*eda14cbcSMatt Macy 301*eda14cbcSMatt Macy #define A64(x) (((U64_S *)(x))->v) 302*eda14cbcSMatt Macy #define A32(x) (((U32_S *)(x))->v) 303*eda14cbcSMatt Macy #define A16(x) (((U16_S *)(x))->v) 304*eda14cbcSMatt Macy 305*eda14cbcSMatt Macy /* 306*eda14cbcSMatt Macy * Constants 307*eda14cbcSMatt Macy */ 308*eda14cbcSMatt Macy #define MINMATCH 4 309*eda14cbcSMatt Macy 310*eda14cbcSMatt Macy #define HASH_LOG COMPRESSIONLEVEL 311*eda14cbcSMatt Macy #define HASHTABLESIZE (1 << HASH_LOG) 312*eda14cbcSMatt Macy #define HASH_MASK (HASHTABLESIZE - 1) 313*eda14cbcSMatt Macy 314*eda14cbcSMatt Macy #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 315*eda14cbcSMatt Macy NOTCOMPRESSIBLE_CONFIRMATION : 2) 316*eda14cbcSMatt Macy 317*eda14cbcSMatt Macy #define COPYLENGTH 8 318*eda14cbcSMatt Macy #define LASTLITERALS 5 319*eda14cbcSMatt Macy #define MFLIMIT (COPYLENGTH + MINMATCH) 320*eda14cbcSMatt Macy #define MINLENGTH (MFLIMIT + 1) 321*eda14cbcSMatt Macy 322*eda14cbcSMatt Macy #define MAXD_LOG 16 323*eda14cbcSMatt Macy #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 324*eda14cbcSMatt Macy 325*eda14cbcSMatt Macy #define ML_BITS 4 326*eda14cbcSMatt Macy #define ML_MASK ((1U<<ML_BITS)-1) 327*eda14cbcSMatt Macy #define RUN_BITS (8-ML_BITS) 328*eda14cbcSMatt Macy #define RUN_MASK ((1U<<RUN_BITS)-1) 329*eda14cbcSMatt Macy 330*eda14cbcSMatt Macy 331*eda14cbcSMatt Macy /* 332*eda14cbcSMatt Macy * Architecture-specific macros 333*eda14cbcSMatt Macy */ 334*eda14cbcSMatt Macy #if LZ4_ARCH64 335*eda14cbcSMatt Macy #define STEPSIZE 8 336*eda14cbcSMatt Macy #define UARCH U64 337*eda14cbcSMatt Macy #define AARCH A64 338*eda14cbcSMatt Macy #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 339*eda14cbcSMatt Macy #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 340*eda14cbcSMatt Macy #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 341*eda14cbcSMatt Macy #define HTYPE U32 342*eda14cbcSMatt Macy #define INITBASE(base) const BYTE* const base = ip 343*eda14cbcSMatt Macy #else /* !LZ4_ARCH64 */ 344*eda14cbcSMatt Macy #define STEPSIZE 4 345*eda14cbcSMatt Macy #define UARCH U32 346*eda14cbcSMatt Macy #define AARCH A32 347*eda14cbcSMatt Macy #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 348*eda14cbcSMatt Macy #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 349*eda14cbcSMatt Macy #define LZ4_SECURECOPY LZ4_WILDCOPY 350*eda14cbcSMatt Macy #define HTYPE const BYTE * 351*eda14cbcSMatt Macy #define INITBASE(base) const int base = 0 352*eda14cbcSMatt Macy #endif /* !LZ4_ARCH64 */ 353*eda14cbcSMatt Macy 354*eda14cbcSMatt Macy #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 355*eda14cbcSMatt Macy #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 356*eda14cbcSMatt Macy { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 357*eda14cbcSMatt Macy #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 358*eda14cbcSMatt Macy { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 359*eda14cbcSMatt Macy #else 360*eda14cbcSMatt Macy #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 361*eda14cbcSMatt Macy #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 362*eda14cbcSMatt Macy #endif 363*eda14cbcSMatt Macy 364*eda14cbcSMatt Macy 365*eda14cbcSMatt Macy /* Local structures */ 366*eda14cbcSMatt Macy struct refTables { 367*eda14cbcSMatt Macy HTYPE hashTable[HASHTABLESIZE]; 368*eda14cbcSMatt Macy }; 369*eda14cbcSMatt Macy 370*eda14cbcSMatt Macy 371*eda14cbcSMatt Macy /* Macros */ 372*eda14cbcSMatt Macy #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 373*eda14cbcSMatt Macy HASH_LOG)) 374*eda14cbcSMatt Macy #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 375*eda14cbcSMatt Macy #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 376*eda14cbcSMatt Macy #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 377*eda14cbcSMatt Macy d = e; } 378*eda14cbcSMatt Macy 379*eda14cbcSMatt Macy 380*eda14cbcSMatt Macy /* Private functions */ 381*eda14cbcSMatt Macy #if LZ4_ARCH64 382*eda14cbcSMatt Macy 383*eda14cbcSMatt Macy static inline int 384*eda14cbcSMatt Macy LZ4_NbCommonBytes(register U64 val) 385*eda14cbcSMatt Macy { 386*eda14cbcSMatt Macy #if defined(LZ4_BIG_ENDIAN) 387*eda14cbcSMatt Macy #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 388*eda14cbcSMatt Macy !defined(LZ4_FORCE_SW_BITCOUNT) 389*eda14cbcSMatt Macy return (__builtin_clzll(val) >> 3); 390*eda14cbcSMatt Macy #else 391*eda14cbcSMatt Macy int r; 392*eda14cbcSMatt Macy if (!(val >> 32)) { 393*eda14cbcSMatt Macy r = 4; 394*eda14cbcSMatt Macy } else { 395*eda14cbcSMatt Macy r = 0; 396*eda14cbcSMatt Macy val >>= 32; 397*eda14cbcSMatt Macy } 398*eda14cbcSMatt Macy if (!(val >> 16)) { 399*eda14cbcSMatt Macy r += 2; 400*eda14cbcSMatt Macy val >>= 8; 401*eda14cbcSMatt Macy } else { 402*eda14cbcSMatt Macy val >>= 24; 403*eda14cbcSMatt Macy } 404*eda14cbcSMatt Macy r += (!val); 405*eda14cbcSMatt Macy return (r); 406*eda14cbcSMatt Macy #endif 407*eda14cbcSMatt Macy #else 408*eda14cbcSMatt Macy #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 409*eda14cbcSMatt Macy !defined(LZ4_FORCE_SW_BITCOUNT) 410*eda14cbcSMatt Macy return (__builtin_ctzll(val) >> 3); 411*eda14cbcSMatt Macy #else 412*eda14cbcSMatt Macy static const int DeBruijnBytePos[64] = 413*eda14cbcSMatt Macy { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 414*eda14cbcSMatt Macy 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 415*eda14cbcSMatt Macy 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 416*eda14cbcSMatt Macy 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 417*eda14cbcSMatt Macy }; 418*eda14cbcSMatt Macy return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 419*eda14cbcSMatt Macy 58]; 420*eda14cbcSMatt Macy #endif 421*eda14cbcSMatt Macy #endif 422*eda14cbcSMatt Macy } 423*eda14cbcSMatt Macy 424*eda14cbcSMatt Macy #else 425*eda14cbcSMatt Macy 426*eda14cbcSMatt Macy static inline int 427*eda14cbcSMatt Macy LZ4_NbCommonBytes(register U32 val) 428*eda14cbcSMatt Macy { 429*eda14cbcSMatt Macy #if defined(LZ4_BIG_ENDIAN) 430*eda14cbcSMatt Macy #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \ 431*eda14cbcSMatt Macy !defined(LZ4_FORCE_SW_BITCOUNT) 432*eda14cbcSMatt Macy return (__builtin_clz(val) >> 3); 433*eda14cbcSMatt Macy #else 434*eda14cbcSMatt Macy int r; 435*eda14cbcSMatt Macy if (!(val >> 16)) { 436*eda14cbcSMatt Macy r = 2; 437*eda14cbcSMatt Macy val >>= 8; 438*eda14cbcSMatt Macy } else { 439*eda14cbcSMatt Macy r = 0; 440*eda14cbcSMatt Macy val >>= 24; 441*eda14cbcSMatt Macy } 442*eda14cbcSMatt Macy r += (!val); 443*eda14cbcSMatt Macy return (r); 444*eda14cbcSMatt Macy #endif 445*eda14cbcSMatt Macy #else 446*eda14cbcSMatt Macy #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ 447*eda14cbcSMatt Macy !defined(LZ4_FORCE_SW_BITCOUNT) 448*eda14cbcSMatt Macy return (__builtin_ctz(val) >> 3); 449*eda14cbcSMatt Macy #else 450*eda14cbcSMatt Macy static const int DeBruijnBytePos[32] = { 451*eda14cbcSMatt Macy 0, 0, 3, 0, 3, 1, 3, 0, 452*eda14cbcSMatt Macy 3, 2, 2, 1, 3, 2, 0, 1, 453*eda14cbcSMatt Macy 3, 3, 1, 2, 2, 2, 2, 0, 454*eda14cbcSMatt Macy 3, 1, 2, 0, 1, 0, 1, 1 455*eda14cbcSMatt Macy }; 456*eda14cbcSMatt Macy return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 457*eda14cbcSMatt Macy 27]; 458*eda14cbcSMatt Macy #endif 459*eda14cbcSMatt Macy #endif 460*eda14cbcSMatt Macy } 461*eda14cbcSMatt Macy 462*eda14cbcSMatt Macy #endif 463*eda14cbcSMatt Macy 464*eda14cbcSMatt Macy /* Compression functions */ 465*eda14cbcSMatt Macy 466*eda14cbcSMatt Macy /*ARGSUSED*/ 467*eda14cbcSMatt Macy static int 468*eda14cbcSMatt Macy LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 469*eda14cbcSMatt Macy int osize) 470*eda14cbcSMatt Macy { 471*eda14cbcSMatt Macy struct refTables *srt = (struct refTables *)ctx; 472*eda14cbcSMatt Macy HTYPE *HashTable = (HTYPE *) (srt->hashTable); 473*eda14cbcSMatt Macy 474*eda14cbcSMatt Macy const BYTE *ip = (BYTE *) source; 475*eda14cbcSMatt Macy INITBASE(base); 476*eda14cbcSMatt Macy const BYTE *anchor = ip; 477*eda14cbcSMatt Macy const BYTE *const iend = ip + isize; 478*eda14cbcSMatt Macy const BYTE *const oend = (BYTE *) dest + osize; 479*eda14cbcSMatt Macy const BYTE *const mflimit = iend - MFLIMIT; 480*eda14cbcSMatt Macy #define matchlimit (iend - LASTLITERALS) 481*eda14cbcSMatt Macy 482*eda14cbcSMatt Macy BYTE *op = (BYTE *) dest; 483*eda14cbcSMatt Macy 484*eda14cbcSMatt Macy int len, length; 485*eda14cbcSMatt Macy const int skipStrength = SKIPSTRENGTH; 486*eda14cbcSMatt Macy U32 forwardH; 487*eda14cbcSMatt Macy 488*eda14cbcSMatt Macy 489*eda14cbcSMatt Macy /* Init */ 490*eda14cbcSMatt Macy if (isize < MINLENGTH) 491*eda14cbcSMatt Macy goto _last_literals; 492*eda14cbcSMatt Macy 493*eda14cbcSMatt Macy /* First Byte */ 494*eda14cbcSMatt Macy HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 495*eda14cbcSMatt Macy ip++; 496*eda14cbcSMatt Macy forwardH = LZ4_HASH_VALUE(ip); 497*eda14cbcSMatt Macy 498*eda14cbcSMatt Macy /* Main Loop */ 499*eda14cbcSMatt Macy for (;;) { 500*eda14cbcSMatt Macy int findMatchAttempts = (1U << skipStrength) + 3; 501*eda14cbcSMatt Macy const BYTE *forwardIp = ip; 502*eda14cbcSMatt Macy const BYTE *ref; 503*eda14cbcSMatt Macy BYTE *token; 504*eda14cbcSMatt Macy 505*eda14cbcSMatt Macy /* Find a match */ 506*eda14cbcSMatt Macy do { 507*eda14cbcSMatt Macy U32 h = forwardH; 508*eda14cbcSMatt Macy int step = findMatchAttempts++ >> skipStrength; 509*eda14cbcSMatt Macy ip = forwardIp; 510*eda14cbcSMatt Macy forwardIp = ip + step; 511*eda14cbcSMatt Macy 512*eda14cbcSMatt Macy if (unlikely(forwardIp > mflimit)) { 513*eda14cbcSMatt Macy goto _last_literals; 514*eda14cbcSMatt Macy } 515*eda14cbcSMatt Macy 516*eda14cbcSMatt Macy forwardH = LZ4_HASH_VALUE(forwardIp); 517*eda14cbcSMatt Macy ref = base + HashTable[h]; 518*eda14cbcSMatt Macy HashTable[h] = ip - base; 519*eda14cbcSMatt Macy 520*eda14cbcSMatt Macy } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 521*eda14cbcSMatt Macy 522*eda14cbcSMatt Macy /* Catch up */ 523*eda14cbcSMatt Macy while ((ip > anchor) && (ref > (BYTE *) source) && 524*eda14cbcSMatt Macy unlikely(ip[-1] == ref[-1])) { 525*eda14cbcSMatt Macy ip--; 526*eda14cbcSMatt Macy ref--; 527*eda14cbcSMatt Macy } 528*eda14cbcSMatt Macy 529*eda14cbcSMatt Macy /* Encode Literal length */ 530*eda14cbcSMatt Macy length = ip - anchor; 531*eda14cbcSMatt Macy token = op++; 532*eda14cbcSMatt Macy 533*eda14cbcSMatt Macy /* Check output limit */ 534*eda14cbcSMatt Macy if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 535*eda14cbcSMatt Macy (length >> 8) > oend)) 536*eda14cbcSMatt Macy return (0); 537*eda14cbcSMatt Macy 538*eda14cbcSMatt Macy if (length >= (int)RUN_MASK) { 539*eda14cbcSMatt Macy *token = (RUN_MASK << ML_BITS); 540*eda14cbcSMatt Macy len = length - RUN_MASK; 541*eda14cbcSMatt Macy for (; len > 254; len -= 255) 542*eda14cbcSMatt Macy *op++ = 255; 543*eda14cbcSMatt Macy *op++ = (BYTE)len; 544*eda14cbcSMatt Macy } else 545*eda14cbcSMatt Macy *token = (length << ML_BITS); 546*eda14cbcSMatt Macy 547*eda14cbcSMatt Macy /* Copy Literals */ 548*eda14cbcSMatt Macy LZ4_BLINDCOPY(anchor, op, length); 549*eda14cbcSMatt Macy 550*eda14cbcSMatt Macy _next_match: 551*eda14cbcSMatt Macy /* Encode Offset */ 552*eda14cbcSMatt Macy LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 553*eda14cbcSMatt Macy 554*eda14cbcSMatt Macy /* Start Counting */ 555*eda14cbcSMatt Macy ip += MINMATCH; 556*eda14cbcSMatt Macy ref += MINMATCH; /* MinMatch verified */ 557*eda14cbcSMatt Macy anchor = ip; 558*eda14cbcSMatt Macy while (likely(ip < matchlimit - (STEPSIZE - 1))) { 559*eda14cbcSMatt Macy UARCH diff = AARCH(ref) ^ AARCH(ip); 560*eda14cbcSMatt Macy if (!diff) { 561*eda14cbcSMatt Macy ip += STEPSIZE; 562*eda14cbcSMatt Macy ref += STEPSIZE; 563*eda14cbcSMatt Macy continue; 564*eda14cbcSMatt Macy } 565*eda14cbcSMatt Macy ip += LZ4_NbCommonBytes(diff); 566*eda14cbcSMatt Macy goto _endCount; 567*eda14cbcSMatt Macy } 568*eda14cbcSMatt Macy #if LZ4_ARCH64 569*eda14cbcSMatt Macy if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 570*eda14cbcSMatt Macy ip += 4; 571*eda14cbcSMatt Macy ref += 4; 572*eda14cbcSMatt Macy } 573*eda14cbcSMatt Macy #endif 574*eda14cbcSMatt Macy if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 575*eda14cbcSMatt Macy ip += 2; 576*eda14cbcSMatt Macy ref += 2; 577*eda14cbcSMatt Macy } 578*eda14cbcSMatt Macy if ((ip < matchlimit) && (*ref == *ip)) 579*eda14cbcSMatt Macy ip++; 580*eda14cbcSMatt Macy _endCount: 581*eda14cbcSMatt Macy 582*eda14cbcSMatt Macy /* Encode MatchLength */ 583*eda14cbcSMatt Macy len = (ip - anchor); 584*eda14cbcSMatt Macy /* Check output limit */ 585*eda14cbcSMatt Macy if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 586*eda14cbcSMatt Macy return (0); 587*eda14cbcSMatt Macy if (len >= (int)ML_MASK) { 588*eda14cbcSMatt Macy *token += ML_MASK; 589*eda14cbcSMatt Macy len -= ML_MASK; 590*eda14cbcSMatt Macy for (; len > 509; len -= 510) { 591*eda14cbcSMatt Macy *op++ = 255; 592*eda14cbcSMatt Macy *op++ = 255; 593*eda14cbcSMatt Macy } 594*eda14cbcSMatt Macy if (len > 254) { 595*eda14cbcSMatt Macy len -= 255; 596*eda14cbcSMatt Macy *op++ = 255; 597*eda14cbcSMatt Macy } 598*eda14cbcSMatt Macy *op++ = (BYTE)len; 599*eda14cbcSMatt Macy } else 600*eda14cbcSMatt Macy *token += len; 601*eda14cbcSMatt Macy 602*eda14cbcSMatt Macy /* Test end of chunk */ 603*eda14cbcSMatt Macy if (ip > mflimit) { 604*eda14cbcSMatt Macy anchor = ip; 605*eda14cbcSMatt Macy break; 606*eda14cbcSMatt Macy } 607*eda14cbcSMatt Macy /* Fill table */ 608*eda14cbcSMatt Macy HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 609*eda14cbcSMatt Macy 610*eda14cbcSMatt Macy /* Test next position */ 611*eda14cbcSMatt Macy ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 612*eda14cbcSMatt Macy HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 613*eda14cbcSMatt Macy if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 614*eda14cbcSMatt Macy token = op++; 615*eda14cbcSMatt Macy *token = 0; 616*eda14cbcSMatt Macy goto _next_match; 617*eda14cbcSMatt Macy } 618*eda14cbcSMatt Macy /* Prepare next loop */ 619*eda14cbcSMatt Macy anchor = ip++; 620*eda14cbcSMatt Macy forwardH = LZ4_HASH_VALUE(ip); 621*eda14cbcSMatt Macy } 622*eda14cbcSMatt Macy 623*eda14cbcSMatt Macy _last_literals: 624*eda14cbcSMatt Macy /* Encode Last Literals */ 625*eda14cbcSMatt Macy { 626*eda14cbcSMatt Macy int lastRun = iend - anchor; 627*eda14cbcSMatt Macy if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 628*eda14cbcSMatt Macy oend) 629*eda14cbcSMatt Macy return (0); 630*eda14cbcSMatt Macy if (lastRun >= (int)RUN_MASK) { 631*eda14cbcSMatt Macy *op++ = (RUN_MASK << ML_BITS); 632*eda14cbcSMatt Macy lastRun -= RUN_MASK; 633*eda14cbcSMatt Macy for (; lastRun > 254; lastRun -= 255) { 634*eda14cbcSMatt Macy *op++ = 255; 635*eda14cbcSMatt Macy } 636*eda14cbcSMatt Macy *op++ = (BYTE)lastRun; 637*eda14cbcSMatt Macy } else 638*eda14cbcSMatt Macy *op++ = (lastRun << ML_BITS); 639*eda14cbcSMatt Macy (void) memcpy(op, anchor, iend - anchor); 640*eda14cbcSMatt Macy op += iend - anchor; 641*eda14cbcSMatt Macy } 642*eda14cbcSMatt Macy 643*eda14cbcSMatt Macy /* End */ 644*eda14cbcSMatt Macy return (int)(((char *)op) - dest); 645*eda14cbcSMatt Macy } 646*eda14cbcSMatt Macy 647*eda14cbcSMatt Macy 648*eda14cbcSMatt Macy 649*eda14cbcSMatt Macy /* Note : this function is valid only if isize < LZ4_64KLIMIT */ 650*eda14cbcSMatt Macy #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 651*eda14cbcSMatt Macy #define HASHLOG64K (HASH_LOG + 1) 652*eda14cbcSMatt Macy #define HASH64KTABLESIZE (1U << HASHLOG64K) 653*eda14cbcSMatt Macy #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 654*eda14cbcSMatt Macy HASHLOG64K)) 655*eda14cbcSMatt Macy #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 656*eda14cbcSMatt Macy 657*eda14cbcSMatt Macy /*ARGSUSED*/ 658*eda14cbcSMatt Macy static int 659*eda14cbcSMatt Macy LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 660*eda14cbcSMatt Macy int osize) 661*eda14cbcSMatt Macy { 662*eda14cbcSMatt Macy struct refTables *srt = (struct refTables *)ctx; 663*eda14cbcSMatt Macy U16 *HashTable = (U16 *) (srt->hashTable); 664*eda14cbcSMatt Macy 665*eda14cbcSMatt Macy const BYTE *ip = (BYTE *) source; 666*eda14cbcSMatt Macy const BYTE *anchor = ip; 667*eda14cbcSMatt Macy const BYTE *const base = ip; 668*eda14cbcSMatt Macy const BYTE *const iend = ip + isize; 669*eda14cbcSMatt Macy const BYTE *const oend = (BYTE *) dest + osize; 670*eda14cbcSMatt Macy const BYTE *const mflimit = iend - MFLIMIT; 671*eda14cbcSMatt Macy #define matchlimit (iend - LASTLITERALS) 672*eda14cbcSMatt Macy 673*eda14cbcSMatt Macy BYTE *op = (BYTE *) dest; 674*eda14cbcSMatt Macy 675*eda14cbcSMatt Macy int len, length; 676*eda14cbcSMatt Macy const int skipStrength = SKIPSTRENGTH; 677*eda14cbcSMatt Macy U32 forwardH; 678*eda14cbcSMatt Macy 679*eda14cbcSMatt Macy /* Init */ 680*eda14cbcSMatt Macy if (isize < MINLENGTH) 681*eda14cbcSMatt Macy goto _last_literals; 682*eda14cbcSMatt Macy 683*eda14cbcSMatt Macy /* First Byte */ 684*eda14cbcSMatt Macy ip++; 685*eda14cbcSMatt Macy forwardH = LZ4_HASH64K_VALUE(ip); 686*eda14cbcSMatt Macy 687*eda14cbcSMatt Macy /* Main Loop */ 688*eda14cbcSMatt Macy for (;;) { 689*eda14cbcSMatt Macy int findMatchAttempts = (1U << skipStrength) + 3; 690*eda14cbcSMatt Macy const BYTE *forwardIp = ip; 691*eda14cbcSMatt Macy const BYTE *ref; 692*eda14cbcSMatt Macy BYTE *token; 693*eda14cbcSMatt Macy 694*eda14cbcSMatt Macy /* Find a match */ 695*eda14cbcSMatt Macy do { 696*eda14cbcSMatt Macy U32 h = forwardH; 697*eda14cbcSMatt Macy int step = findMatchAttempts++ >> skipStrength; 698*eda14cbcSMatt Macy ip = forwardIp; 699*eda14cbcSMatt Macy forwardIp = ip + step; 700*eda14cbcSMatt Macy 701*eda14cbcSMatt Macy if (forwardIp > mflimit) { 702*eda14cbcSMatt Macy goto _last_literals; 703*eda14cbcSMatt Macy } 704*eda14cbcSMatt Macy 705*eda14cbcSMatt Macy forwardH = LZ4_HASH64K_VALUE(forwardIp); 706*eda14cbcSMatt Macy ref = base + HashTable[h]; 707*eda14cbcSMatt Macy HashTable[h] = ip - base; 708*eda14cbcSMatt Macy 709*eda14cbcSMatt Macy } while (A32(ref) != A32(ip)); 710*eda14cbcSMatt Macy 711*eda14cbcSMatt Macy /* Catch up */ 712*eda14cbcSMatt Macy while ((ip > anchor) && (ref > (BYTE *) source) && 713*eda14cbcSMatt Macy (ip[-1] == ref[-1])) { 714*eda14cbcSMatt Macy ip--; 715*eda14cbcSMatt Macy ref--; 716*eda14cbcSMatt Macy } 717*eda14cbcSMatt Macy 718*eda14cbcSMatt Macy /* Encode Literal length */ 719*eda14cbcSMatt Macy length = ip - anchor; 720*eda14cbcSMatt Macy token = op++; 721*eda14cbcSMatt Macy 722*eda14cbcSMatt Macy /* Check output limit */ 723*eda14cbcSMatt Macy if (unlikely(op + length + (2 + 1 + LASTLITERALS) + 724*eda14cbcSMatt Macy (length >> 8) > oend)) 725*eda14cbcSMatt Macy return (0); 726*eda14cbcSMatt Macy 727*eda14cbcSMatt Macy if (length >= (int)RUN_MASK) { 728*eda14cbcSMatt Macy *token = (RUN_MASK << ML_BITS); 729*eda14cbcSMatt Macy len = length - RUN_MASK; 730*eda14cbcSMatt Macy for (; len > 254; len -= 255) 731*eda14cbcSMatt Macy *op++ = 255; 732*eda14cbcSMatt Macy *op++ = (BYTE)len; 733*eda14cbcSMatt Macy } else 734*eda14cbcSMatt Macy *token = (length << ML_BITS); 735*eda14cbcSMatt Macy 736*eda14cbcSMatt Macy /* Copy Literals */ 737*eda14cbcSMatt Macy LZ4_BLINDCOPY(anchor, op, length); 738*eda14cbcSMatt Macy 739*eda14cbcSMatt Macy _next_match: 740*eda14cbcSMatt Macy /* Encode Offset */ 741*eda14cbcSMatt Macy LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 742*eda14cbcSMatt Macy 743*eda14cbcSMatt Macy /* Start Counting */ 744*eda14cbcSMatt Macy ip += MINMATCH; 745*eda14cbcSMatt Macy ref += MINMATCH; /* MinMatch verified */ 746*eda14cbcSMatt Macy anchor = ip; 747*eda14cbcSMatt Macy while (ip < matchlimit - (STEPSIZE - 1)) { 748*eda14cbcSMatt Macy UARCH diff = AARCH(ref) ^ AARCH(ip); 749*eda14cbcSMatt Macy if (!diff) { 750*eda14cbcSMatt Macy ip += STEPSIZE; 751*eda14cbcSMatt Macy ref += STEPSIZE; 752*eda14cbcSMatt Macy continue; 753*eda14cbcSMatt Macy } 754*eda14cbcSMatt Macy ip += LZ4_NbCommonBytes(diff); 755*eda14cbcSMatt Macy goto _endCount; 756*eda14cbcSMatt Macy } 757*eda14cbcSMatt Macy #if LZ4_ARCH64 758*eda14cbcSMatt Macy if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 759*eda14cbcSMatt Macy ip += 4; 760*eda14cbcSMatt Macy ref += 4; 761*eda14cbcSMatt Macy } 762*eda14cbcSMatt Macy #endif 763*eda14cbcSMatt Macy if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 764*eda14cbcSMatt Macy ip += 2; 765*eda14cbcSMatt Macy ref += 2; 766*eda14cbcSMatt Macy } 767*eda14cbcSMatt Macy if ((ip < matchlimit) && (*ref == *ip)) 768*eda14cbcSMatt Macy ip++; 769*eda14cbcSMatt Macy _endCount: 770*eda14cbcSMatt Macy 771*eda14cbcSMatt Macy /* Encode MatchLength */ 772*eda14cbcSMatt Macy len = (ip - anchor); 773*eda14cbcSMatt Macy /* Check output limit */ 774*eda14cbcSMatt Macy if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) 775*eda14cbcSMatt Macy return (0); 776*eda14cbcSMatt Macy if (len >= (int)ML_MASK) { 777*eda14cbcSMatt Macy *token += ML_MASK; 778*eda14cbcSMatt Macy len -= ML_MASK; 779*eda14cbcSMatt Macy for (; len > 509; len -= 510) { 780*eda14cbcSMatt Macy *op++ = 255; 781*eda14cbcSMatt Macy *op++ = 255; 782*eda14cbcSMatt Macy } 783*eda14cbcSMatt Macy if (len > 254) { 784*eda14cbcSMatt Macy len -= 255; 785*eda14cbcSMatt Macy *op++ = 255; 786*eda14cbcSMatt Macy } 787*eda14cbcSMatt Macy *op++ = (BYTE)len; 788*eda14cbcSMatt Macy } else 789*eda14cbcSMatt Macy *token += len; 790*eda14cbcSMatt Macy 791*eda14cbcSMatt Macy /* Test end of chunk */ 792*eda14cbcSMatt Macy if (ip > mflimit) { 793*eda14cbcSMatt Macy anchor = ip; 794*eda14cbcSMatt Macy break; 795*eda14cbcSMatt Macy } 796*eda14cbcSMatt Macy /* Fill table */ 797*eda14cbcSMatt Macy HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 798*eda14cbcSMatt Macy 799*eda14cbcSMatt Macy /* Test next position */ 800*eda14cbcSMatt Macy ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 801*eda14cbcSMatt Macy HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 802*eda14cbcSMatt Macy if (A32(ref) == A32(ip)) { 803*eda14cbcSMatt Macy token = op++; 804*eda14cbcSMatt Macy *token = 0; 805*eda14cbcSMatt Macy goto _next_match; 806*eda14cbcSMatt Macy } 807*eda14cbcSMatt Macy /* Prepare next loop */ 808*eda14cbcSMatt Macy anchor = ip++; 809*eda14cbcSMatt Macy forwardH = LZ4_HASH64K_VALUE(ip); 810*eda14cbcSMatt Macy } 811*eda14cbcSMatt Macy 812*eda14cbcSMatt Macy _last_literals: 813*eda14cbcSMatt Macy /* Encode Last Literals */ 814*eda14cbcSMatt Macy { 815*eda14cbcSMatt Macy int lastRun = iend - anchor; 816*eda14cbcSMatt Macy if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 817*eda14cbcSMatt Macy oend) 818*eda14cbcSMatt Macy return (0); 819*eda14cbcSMatt Macy if (lastRun >= (int)RUN_MASK) { 820*eda14cbcSMatt Macy *op++ = (RUN_MASK << ML_BITS); 821*eda14cbcSMatt Macy lastRun -= RUN_MASK; 822*eda14cbcSMatt Macy for (; lastRun > 254; lastRun -= 255) 823*eda14cbcSMatt Macy *op++ = 255; 824*eda14cbcSMatt Macy *op++ = (BYTE)lastRun; 825*eda14cbcSMatt Macy } else 826*eda14cbcSMatt Macy *op++ = (lastRun << ML_BITS); 827*eda14cbcSMatt Macy (void) memcpy(op, anchor, iend - anchor); 828*eda14cbcSMatt Macy op += iend - anchor; 829*eda14cbcSMatt Macy } 830*eda14cbcSMatt Macy 831*eda14cbcSMatt Macy /* End */ 832*eda14cbcSMatt Macy return (int)(((char *)op) - dest); 833*eda14cbcSMatt Macy } 834*eda14cbcSMatt Macy 835*eda14cbcSMatt Macy static int 836*eda14cbcSMatt Macy real_LZ4_compress(const char *source, char *dest, int isize, int osize) 837*eda14cbcSMatt Macy { 838*eda14cbcSMatt Macy void *ctx; 839*eda14cbcSMatt Macy int result; 840*eda14cbcSMatt Macy 841*eda14cbcSMatt Macy ASSERT(lz4_cache != NULL); 842*eda14cbcSMatt Macy ctx = kmem_cache_alloc(lz4_cache, KM_SLEEP); 843*eda14cbcSMatt Macy 844*eda14cbcSMatt Macy /* 845*eda14cbcSMatt Macy * out of kernel memory, gently fall through - this will disable 846*eda14cbcSMatt Macy * compression in zio_compress_data 847*eda14cbcSMatt Macy */ 848*eda14cbcSMatt Macy if (ctx == NULL) 849*eda14cbcSMatt Macy return (0); 850*eda14cbcSMatt Macy 851*eda14cbcSMatt Macy memset(ctx, 0, sizeof (struct refTables)); 852*eda14cbcSMatt Macy 853*eda14cbcSMatt Macy if (isize < LZ4_64KLIMIT) 854*eda14cbcSMatt Macy result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 855*eda14cbcSMatt Macy else 856*eda14cbcSMatt Macy result = LZ4_compressCtx(ctx, source, dest, isize, osize); 857*eda14cbcSMatt Macy 858*eda14cbcSMatt Macy kmem_cache_free(lz4_cache, ctx); 859*eda14cbcSMatt Macy return (result); 860*eda14cbcSMatt Macy } 861*eda14cbcSMatt Macy 862*eda14cbcSMatt Macy /* Decompression functions */ 863*eda14cbcSMatt Macy 864*eda14cbcSMatt Macy /* 865*eda14cbcSMatt Macy * Note: The decoding functions real_LZ4_uncompress() and 866*eda14cbcSMatt Macy * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow" 867*eda14cbcSMatt Macy * attack type. They will never write nor read outside of the provided 868*eda14cbcSMatt Macy * output buffers. LZ4_uncompress_unknownOutputSize() also insures that 869*eda14cbcSMatt Macy * it will never read outside of the input buffer. A corrupted input 870*eda14cbcSMatt Macy * will produce an error result, a negative int, indicating the position 871*eda14cbcSMatt Macy * of the error within input stream. 872*eda14cbcSMatt Macy * 873*eda14cbcSMatt Macy * Note[2]: real_LZ4_uncompress(), referred to above, is not used in ZFS so 874*eda14cbcSMatt Macy * its code is not present here. 875*eda14cbcSMatt Macy */ 876*eda14cbcSMatt Macy 877*eda14cbcSMatt Macy static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 878*eda14cbcSMatt Macy #if LZ4_ARCH64 879*eda14cbcSMatt Macy static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; 880*eda14cbcSMatt Macy #endif 881*eda14cbcSMatt Macy 882*eda14cbcSMatt Macy static int 883*eda14cbcSMatt Macy LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, 884*eda14cbcSMatt Macy int maxOutputSize) 885*eda14cbcSMatt Macy { 886*eda14cbcSMatt Macy /* Local Variables */ 887*eda14cbcSMatt Macy const BYTE *restrict ip = (const BYTE *) source; 888*eda14cbcSMatt Macy const BYTE *const iend = ip + isize; 889*eda14cbcSMatt Macy const BYTE *ref; 890*eda14cbcSMatt Macy 891*eda14cbcSMatt Macy BYTE *op = (BYTE *) dest; 892*eda14cbcSMatt Macy BYTE *const oend = op + maxOutputSize; 893*eda14cbcSMatt Macy BYTE *cpy; 894*eda14cbcSMatt Macy 895*eda14cbcSMatt Macy /* Main Loop */ 896*eda14cbcSMatt Macy while (ip < iend) { 897*eda14cbcSMatt Macy unsigned token; 898*eda14cbcSMatt Macy size_t length; 899*eda14cbcSMatt Macy 900*eda14cbcSMatt Macy /* get runlength */ 901*eda14cbcSMatt Macy token = *ip++; 902*eda14cbcSMatt Macy if ((length = (token >> ML_BITS)) == RUN_MASK) { 903*eda14cbcSMatt Macy int s = 255; 904*eda14cbcSMatt Macy while ((ip < iend) && (s == 255)) { 905*eda14cbcSMatt Macy s = *ip++; 906*eda14cbcSMatt Macy if (unlikely(length > (size_t)(length + s))) 907*eda14cbcSMatt Macy goto _output_error; 908*eda14cbcSMatt Macy length += s; 909*eda14cbcSMatt Macy } 910*eda14cbcSMatt Macy } 911*eda14cbcSMatt Macy /* copy literals */ 912*eda14cbcSMatt Macy cpy = op + length; 913*eda14cbcSMatt Macy /* CORNER-CASE: cpy might overflow. */ 914*eda14cbcSMatt Macy if (cpy < op) 915*eda14cbcSMatt Macy goto _output_error; /* cpy was overflowed, bail! */ 916*eda14cbcSMatt Macy if ((cpy > oend - COPYLENGTH) || 917*eda14cbcSMatt Macy (ip + length > iend - COPYLENGTH)) { 918*eda14cbcSMatt Macy if (cpy > oend) 919*eda14cbcSMatt Macy /* Error: writes beyond output buffer */ 920*eda14cbcSMatt Macy goto _output_error; 921*eda14cbcSMatt Macy if (ip + length != iend) 922*eda14cbcSMatt Macy /* 923*eda14cbcSMatt Macy * Error: LZ4 format requires to consume all 924*eda14cbcSMatt Macy * input at this stage 925*eda14cbcSMatt Macy */ 926*eda14cbcSMatt Macy goto _output_error; 927*eda14cbcSMatt Macy (void) memcpy(op, ip, length); 928*eda14cbcSMatt Macy op += length; 929*eda14cbcSMatt Macy /* Necessarily EOF, due to parsing restrictions */ 930*eda14cbcSMatt Macy break; 931*eda14cbcSMatt Macy } 932*eda14cbcSMatt Macy LZ4_WILDCOPY(ip, op, cpy); 933*eda14cbcSMatt Macy ip -= (op - cpy); 934*eda14cbcSMatt Macy op = cpy; 935*eda14cbcSMatt Macy 936*eda14cbcSMatt Macy /* get offset */ 937*eda14cbcSMatt Macy LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 938*eda14cbcSMatt Macy ip += 2; 939*eda14cbcSMatt Macy if (ref < (BYTE * const) dest) 940*eda14cbcSMatt Macy /* 941*eda14cbcSMatt Macy * Error: offset creates reference outside of 942*eda14cbcSMatt Macy * destination buffer 943*eda14cbcSMatt Macy */ 944*eda14cbcSMatt Macy goto _output_error; 945*eda14cbcSMatt Macy 946*eda14cbcSMatt Macy /* get matchlength */ 947*eda14cbcSMatt Macy if ((length = (token & ML_MASK)) == ML_MASK) { 948*eda14cbcSMatt Macy while (ip < iend) { 949*eda14cbcSMatt Macy int s = *ip++; 950*eda14cbcSMatt Macy if (unlikely(length > (size_t)(length + s))) 951*eda14cbcSMatt Macy goto _output_error; 952*eda14cbcSMatt Macy length += s; 953*eda14cbcSMatt Macy if (s == 255) 954*eda14cbcSMatt Macy continue; 955*eda14cbcSMatt Macy break; 956*eda14cbcSMatt Macy } 957*eda14cbcSMatt Macy } 958*eda14cbcSMatt Macy /* copy repeated sequence */ 959*eda14cbcSMatt Macy if (unlikely(op - ref < STEPSIZE)) { 960*eda14cbcSMatt Macy #if LZ4_ARCH64 961*eda14cbcSMatt Macy int dec64 = dec64table[op - ref]; 962*eda14cbcSMatt Macy #else 963*eda14cbcSMatt Macy const int dec64 = 0; 964*eda14cbcSMatt Macy #endif 965*eda14cbcSMatt Macy op[0] = ref[0]; 966*eda14cbcSMatt Macy op[1] = ref[1]; 967*eda14cbcSMatt Macy op[2] = ref[2]; 968*eda14cbcSMatt Macy op[3] = ref[3]; 969*eda14cbcSMatt Macy op += 4; 970*eda14cbcSMatt Macy ref += 4; 971*eda14cbcSMatt Macy ref -= dec32table[op - ref]; 972*eda14cbcSMatt Macy A32(op) = A32(ref); 973*eda14cbcSMatt Macy op += STEPSIZE - 4; 974*eda14cbcSMatt Macy ref -= dec64; 975*eda14cbcSMatt Macy } else { 976*eda14cbcSMatt Macy LZ4_COPYSTEP(ref, op); 977*eda14cbcSMatt Macy } 978*eda14cbcSMatt Macy cpy = op + length - (STEPSIZE - 4); 979*eda14cbcSMatt Macy if (cpy > oend - COPYLENGTH) { 980*eda14cbcSMatt Macy if (cpy > oend) 981*eda14cbcSMatt Macy /* 982*eda14cbcSMatt Macy * Error: request to write outside of 983*eda14cbcSMatt Macy * destination buffer 984*eda14cbcSMatt Macy */ 985*eda14cbcSMatt Macy goto _output_error; 986*eda14cbcSMatt Macy #if LZ4_ARCH64 987*eda14cbcSMatt Macy if ((ref + COPYLENGTH) > oend) 988*eda14cbcSMatt Macy #else 989*eda14cbcSMatt Macy if ((ref + COPYLENGTH) > oend || 990*eda14cbcSMatt Macy (op + COPYLENGTH) > oend) 991*eda14cbcSMatt Macy #endif 992*eda14cbcSMatt Macy goto _output_error; 993*eda14cbcSMatt Macy LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 994*eda14cbcSMatt Macy while (op < cpy) 995*eda14cbcSMatt Macy *op++ = *ref++; 996*eda14cbcSMatt Macy op = cpy; 997*eda14cbcSMatt Macy if (op == oend) 998*eda14cbcSMatt Macy /* 999*eda14cbcSMatt Macy * Check EOF (should never happen, since 1000*eda14cbcSMatt Macy * last 5 bytes are supposed to be literals) 1001*eda14cbcSMatt Macy */ 1002*eda14cbcSMatt Macy goto _output_error; 1003*eda14cbcSMatt Macy continue; 1004*eda14cbcSMatt Macy } 1005*eda14cbcSMatt Macy LZ4_SECURECOPY(ref, op, cpy); 1006*eda14cbcSMatt Macy op = cpy; /* correction */ 1007*eda14cbcSMatt Macy } 1008*eda14cbcSMatt Macy 1009*eda14cbcSMatt Macy /* end of decoding */ 1010*eda14cbcSMatt Macy return (int)(((char *)op) - dest); 1011*eda14cbcSMatt Macy 1012*eda14cbcSMatt Macy /* write overflow error detected */ 1013*eda14cbcSMatt Macy _output_error: 1014*eda14cbcSMatt Macy return (-1); 1015*eda14cbcSMatt Macy } 1016*eda14cbcSMatt Macy 1017*eda14cbcSMatt Macy void 1018*eda14cbcSMatt Macy lz4_init(void) 1019*eda14cbcSMatt Macy { 1020*eda14cbcSMatt Macy lz4_cache = kmem_cache_create("lz4_cache", 1021*eda14cbcSMatt Macy sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0); 1022*eda14cbcSMatt Macy } 1023*eda14cbcSMatt Macy 1024*eda14cbcSMatt Macy void 1025*eda14cbcSMatt Macy lz4_fini(void) 1026*eda14cbcSMatt Macy { 1027*eda14cbcSMatt Macy if (lz4_cache) { 1028*eda14cbcSMatt Macy kmem_cache_destroy(lz4_cache); 1029*eda14cbcSMatt Macy lz4_cache = NULL; 1030*eda14cbcSMatt Macy } 1031*eda14cbcSMatt Macy } 1032