1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35 #include <sys/zfs_context.h>
36
37 static int real_LZ4_compress(const char *source, char *dest, int isize,
38 int osize);
39 static int real_LZ4_uncompress(const char *source, char *dest, int osize);
40 static int LZ4_compressBound(int isize);
41 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
42 int isize, int maxOutputSize);
43 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
44 int isize, int osize);
45 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
46 int isize, int osize);
47
48 /*ARGSUSED*/
49 size_t
lz4_compress(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)50 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
51 {
52 uint32_t bufsiz;
53 char *dest = d_start;
54
55 ASSERT(d_len >= sizeof (bufsiz));
56
57 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
58 d_len - sizeof (bufsiz));
59
60 /* Signal an error if the compression routine returned zero. */
61 if (bufsiz == 0)
62 return (s_len);
63
64 /*
65 * Encode the compresed buffer size at the start. We'll need this in
66 * decompression to counter the effects of padding which might be
67 * added to the compressed buffer and which, if unhandled, would
68 * confuse the hell out of our decompression function.
69 */
70 *(uint32_t *)dest = BE_32(bufsiz);
71
72 return (bufsiz + sizeof (bufsiz));
73 }
74
75 /*ARGSUSED*/
76 int
lz4_decompress(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)77 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
78 {
79 const char *src = s_start;
80 uint32_t bufsiz = BE_IN32(src);
81
82 /* invalid compressed buffer size encoded at start */
83 if (bufsiz + sizeof (bufsiz) > s_len)
84 return (1);
85
86 /*
87 * Returns 0 on success (decompression function returned non-negative)
88 * and non-zero on failure (decompression function returned negative.
89 */
90 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
91 d_start, bufsiz, d_len) < 0);
92 }
93
94 /*
95 * LZ4 API Description:
96 *
97 * Simple Functions:
98 * real_LZ4_compress() :
99 * isize : is the input size. Max supported value is ~1.9GB
100 * return : the number of bytes written in buffer dest
101 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
102 * note : destination buffer must be already allocated.
103 * destination buffer must be sized to handle worst cases
104 * situations (input data not compressible) worst case size
105 * evaluation is provided by function LZ4_compressBound().
106 *
107 * real_LZ4_uncompress() :
108 * osize : is the output size, therefore the original size
109 * return : the number of bytes read in the source buffer.
110 * If the source stream is malformed, the function will stop
111 * decoding and return a negative result, indicating the byte
112 * position of the faulty instruction. This function never
113 * writes beyond dest + osize, and is therefore protected
114 * against malicious data packets.
115 * note : destination buffer must be already allocated
116 *
117 * Advanced Functions
118 *
119 * LZ4_compressBound() :
120 * Provides the maximum size that LZ4 may output in a "worst case"
121 * scenario (input data not compressible) primarily useful for memory
122 * allocation of output buffer.
123 *
124 * isize : is the input size. Max supported value is ~1.9GB
125 * return : maximum output size in a "worst case" scenario
126 * note : this function is limited by "int" range (2^31-1)
127 *
128 * LZ4_uncompress_unknownOutputSize() :
129 * isize : is the input size, therefore the compressed size
130 * maxOutputSize : is the size of the destination buffer (which must be
131 * already allocated)
132 * return : the number of bytes decoded in the destination buffer
133 * (necessarily <= maxOutputSize). If the source stream is
134 * malformed, the function will stop decoding and return a
135 * negative result, indicating the byte position of the faulty
136 * instruction. This function never writes beyond dest +
137 * maxOutputSize, and is therefore protected against malicious
138 * data packets.
139 * note : Destination buffer must be already allocated.
140 * This version is slightly slower than real_LZ4_uncompress()
141 *
142 * LZ4_compressCtx() :
143 * This function explicitly handles the CTX memory structure.
144 *
145 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
146 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
147 * isn't valid.
148 *
149 * LZ4_compress64kCtx() :
150 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
151 * isize *Must* be <64KB, otherwise the output will be corrupted.
152 *
153 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
154 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
155 * isn't valid.
156 */
157
158 /*
159 * Tuning parameters
160 */
161
162 /*
163 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
164 * Lowering this value reduces memory usage. Reduced memory usage
165 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
166 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
167 * (examples : 12 -> 16KB ; 17 -> 512KB)
168 */
169 #define COMPRESSIONLEVEL 12
170
171 /*
172 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
173 * algorithm skip faster data segments considered "incompressible".
174 * This may decrease compression ratio dramatically, but will be
175 * faster on incompressible data. Increasing this value will make
176 * the algorithm search more before declaring a segment "incompressible".
177 * This could improve compression a bit, but will be slower on
178 * incompressible data. The default value (6) is recommended.
179 */
180 #define NOTCOMPRESSIBLE_CONFIRMATION 6
181
182 /*
183 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
184 * performance for big endian cpu, but the resulting compressed stream
185 * will be incompatible with little-endian CPU. You can set this option
186 * to 1 in situations where data will stay within closed environment.
187 * This option is useless on Little_Endian CPU (such as x86).
188 */
189 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
190
191 /*
192 * CPU Feature Detection
193 */
194
195 /* 32 or 64 bits ? */
196 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
197 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
198 defined(__LP64__) || defined(_LP64))
199 #define LZ4_ARCH64 1
200 #else
201 #define LZ4_ARCH64 0
202 #endif
203
204 /*
205 * Limits the amount of stack space that the algorithm may consume to hold
206 * the compression lookup table. The value `9' here means we'll never use
207 * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
208 * If more memory is needed, it is allocated from the heap.
209 */
210 #define STACKLIMIT 9
211
212 /*
213 * Little Endian or Big Endian?
214 * Note: overwrite the below #define if you know your architecture endianess.
215 */
216 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
217 defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
218 defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
219 defined(__powerpc) || defined(powerpc) || \
220 ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
221 #define LZ4_BIG_ENDIAN 1
222 #else
223 /*
224 * Little Endian assumed. PDP Endian and other very rare endian format
225 * are unsupported.
226 */
227 #endif
228
229 /*
230 * Unaligned memory access is automatically enabled for "common" CPU,
231 * such as x86. For others CPU, the compiler will be more cautious, and
232 * insert extra code to ensure aligned access is respected. If you know
233 * your target CPU supports unaligned memory access, you may want to
234 * force this option manually to improve performance
235 */
236 #if defined(__ARM_FEATURE_UNALIGNED)
237 #define LZ4_FORCE_UNALIGNED_ACCESS 1
238 #endif
239
240 #ifdef __sparc
241 #define LZ4_FORCE_SW_BITCOUNT
242 #endif
243
244 /*
245 * Compiler Options
246 */
247 #if __STDC_VERSION__ >= 199901L /* C99 */
248 /* "restrict" is a known keyword */
249 #else
250 /* Disable restrict */
251 #define restrict
252 #endif
253
254 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
255
256 #ifdef _MSC_VER
257 /* Visual Studio */
258 /* Visual is not C99, but supports some kind of inline */
259 #define inline __forceinline
260 #if LZ4_ARCH64
261 /* For Visual 2005 */
262 #pragma intrinsic(_BitScanForward64)
263 #pragma intrinsic(_BitScanReverse64)
264 #else /* !LZ4_ARCH64 */
265 /* For Visual 2005 */
266 #pragma intrinsic(_BitScanForward)
267 #pragma intrinsic(_BitScanReverse)
268 #endif /* !LZ4_ARCH64 */
269 #endif /* _MSC_VER */
270
271 #ifdef _MSC_VER
272 #define lz4_bswap16(x) _byteswap_ushort(x)
273 #else /* !_MSC_VER */
274 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
275 (((x) & 0xffu) << 8)))
276 #endif /* !_MSC_VER */
277
278 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
279 #define expect(expr, value) (__builtin_expect((expr), (value)))
280 #else
281 #define expect(expr, value) (expr)
282 #endif
283
284 #define likely(expr) expect((expr) != 0, 1)
285 #define unlikely(expr) expect((expr) != 0, 0)
286
287 /* Basic types */
288 #if defined(_MSC_VER)
289 /* Visual Studio does not support 'stdint' natively */
290 #define BYTE unsigned __int8
291 #define U16 unsigned __int16
292 #define U32 unsigned __int32
293 #define S32 __int32
294 #define U64 unsigned __int64
295 #else /* !defined(_MSC_VER) */
296 #define BYTE uint8_t
297 #define U16 uint16_t
298 #define U32 uint32_t
299 #define S32 int32_t
300 #define U64 uint64_t
301 #endif /* !defined(_MSC_VER) */
302
303 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
304 #pragma pack(1)
305 #endif
306
307 typedef struct _U16_S {
308 U16 v;
309 } U16_S;
310 typedef struct _U32_S {
311 U32 v;
312 } U32_S;
313 typedef struct _U64_S {
314 U64 v;
315 } U64_S;
316
317 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
318 #pragma pack()
319 #endif
320
321 #define A64(x) (((U64_S *)(x))->v)
322 #define A32(x) (((U32_S *)(x))->v)
323 #define A16(x) (((U16_S *)(x))->v)
324
325 /*
326 * Constants
327 */
328 #define MINMATCH 4
329
330 #define HASH_LOG COMPRESSIONLEVEL
331 #define HASHTABLESIZE (1 << HASH_LOG)
332 #define HASH_MASK (HASHTABLESIZE - 1)
333
334 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
335 NOTCOMPRESSIBLE_CONFIRMATION : 2)
336
337 /*
338 * Defines if memory is allocated into the stack (local variable),
339 * or into the heap (kmem_alloc()).
340 */
341 #define HEAPMODE (HASH_LOG > STACKLIMIT)
342 #define COPYLENGTH 8
343 #define LASTLITERALS 5
344 #define MFLIMIT (COPYLENGTH + MINMATCH)
345 #define MINLENGTH (MFLIMIT + 1)
346
347 #define MAXD_LOG 16
348 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
349
350 #define ML_BITS 4
351 #define ML_MASK ((1U<<ML_BITS)-1)
352 #define RUN_BITS (8-ML_BITS)
353 #define RUN_MASK ((1U<<RUN_BITS)-1)
354
355
356 /*
357 * Architecture-specific macros
358 */
359 #if LZ4_ARCH64
360 #define STEPSIZE 8
361 #define UARCH U64
362 #define AARCH A64
363 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
364 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
365 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
366 #define HTYPE U32
367 #define INITBASE(base) const BYTE* const base = ip
368 #else /* !LZ4_ARCH64 */
369 #define STEPSIZE 4
370 #define UARCH U32
371 #define AARCH A32
372 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
373 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
374 #define LZ4_SECURECOPY LZ4_WILDCOPY
375 #define HTYPE const BYTE *
376 #define INITBASE(base) const int base = 0
377 #endif /* !LZ4_ARCH64 */
378
379 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
380 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
381 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
382 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
383 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
384 #else
385 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
386 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
387 #endif
388
389
390 /* Local structures */
391 struct refTables {
392 HTYPE hashTable[HASHTABLESIZE];
393 };
394
395
396 /* Macros */
397 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
398 HASH_LOG))
399 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
400 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
401 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
402 d = e; }
403
404
405 /* Private functions */
406 #if LZ4_ARCH64
407
408 static inline int
LZ4_NbCommonBytes(register U64 val)409 LZ4_NbCommonBytes(register U64 val)
410 {
411 #if defined(LZ4_BIG_ENDIAN)
412 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
413 unsigned long r = 0;
414 _BitScanReverse64(&r, val);
415 return (int)(r >> 3);
416 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
417 !defined(LZ4_FORCE_SW_BITCOUNT)
418 return (__builtin_clzll(val) >> 3);
419 #else
420 int r;
421 if (!(val >> 32)) {
422 r = 4;
423 } else {
424 r = 0;
425 val >>= 32;
426 }
427 if (!(val >> 16)) {
428 r += 2;
429 val >>= 8;
430 } else {
431 val >>= 24;
432 }
433 r += (!val);
434 return (r);
435 #endif
436 #else
437 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
438 unsigned long r = 0;
439 _BitScanForward64(&r, val);
440 return (int)(r >> 3);
441 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
442 !defined(LZ4_FORCE_SW_BITCOUNT)
443 return (__builtin_ctzll(val) >> 3);
444 #else
445 static const int DeBruijnBytePos[64] =
446 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
447 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
448 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
449 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
450 };
451 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
452 58];
453 #endif
454 #endif
455 }
456
457 #else
458
459 static inline int
LZ4_NbCommonBytes(register U32 val)460 LZ4_NbCommonBytes(register U32 val)
461 {
462 #if defined(LZ4_BIG_ENDIAN)
463 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
464 unsigned long r = 0;
465 _BitScanReverse(&r, val);
466 return (int)(r >> 3);
467 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
468 !defined(LZ4_FORCE_SW_BITCOUNT)
469 return (__builtin_clz(val) >> 3);
470 #else
471 int r;
472 if (!(val >> 16)) {
473 r = 2;
474 val >>= 8;
475 } else {
476 r = 0;
477 val >>= 24;
478 }
479 r += (!val);
480 return (r);
481 #endif
482 #else
483 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
484 unsigned long r = 0;
485 _BitScanForward(&r, val);
486 return (int)(r >> 3);
487 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
488 !defined(LZ4_FORCE_SW_BITCOUNT)
489 return (__builtin_ctz(val) >> 3);
490 #else
491 static const int DeBruijnBytePos[32] = {
492 0, 0, 3, 0, 3, 1, 3, 0,
493 3, 2, 2, 1, 3, 2, 0, 1,
494 3, 3, 1, 2, 2, 2, 2, 0,
495 3, 1, 2, 0, 1, 0, 1, 1
496 };
497 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
498 27];
499 #endif
500 #endif
501 }
502
503 #endif
504
505 /* Public functions */
506
507 static int
LZ4_compressBound(int isize)508 LZ4_compressBound(int isize)
509 {
510 return (isize + (isize / 255) + 16);
511 }
512
513 /* Compression functions */
514
515 /*ARGSUSED*/
516 static int
LZ4_compressCtx(void * ctx,const char * source,char * dest,int isize,int osize)517 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
518 int osize)
519 {
520 #if HEAPMODE
521 struct refTables *srt = (struct refTables *)ctx;
522 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
523 #else
524 HTYPE HashTable[HASHTABLESIZE] = { 0 };
525 #endif
526
527 const BYTE *ip = (BYTE *) source;
528 INITBASE(base);
529 const BYTE *anchor = ip;
530 const BYTE *const iend = ip + isize;
531 const BYTE *const oend = (BYTE *) dest + osize;
532 const BYTE *const mflimit = iend - MFLIMIT;
533 #define matchlimit (iend - LASTLITERALS)
534
535 BYTE *op = (BYTE *) dest;
536
537 int len, length;
538 const int skipStrength = SKIPSTRENGTH;
539 U32 forwardH;
540
541
542 /* Init */
543 if (isize < MINLENGTH)
544 goto _last_literals;
545
546 /* First Byte */
547 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
548 ip++;
549 forwardH = LZ4_HASH_VALUE(ip);
550
551 /* Main Loop */
552 for (;;) {
553 int findMatchAttempts = (1U << skipStrength) + 3;
554 const BYTE *forwardIp = ip;
555 const BYTE *ref;
556 BYTE *token;
557
558 /* Find a match */
559 do {
560 U32 h = forwardH;
561 int step = findMatchAttempts++ >> skipStrength;
562 ip = forwardIp;
563 forwardIp = ip + step;
564
565 if unlikely(forwardIp > mflimit) {
566 goto _last_literals;
567 }
568
569 forwardH = LZ4_HASH_VALUE(forwardIp);
570 ref = base + HashTable[h];
571 HashTable[h] = ip - base;
572
573 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
574
575 /* Catch up */
576 while ((ip > anchor) && (ref > (BYTE *) source) &&
577 unlikely(ip[-1] == ref[-1])) {
578 ip--;
579 ref--;
580 }
581
582 /* Encode Literal length */
583 length = ip - anchor;
584 token = op++;
585
586 /* Check output limit */
587 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
588 (length >> 8) > oend)
589 return (0);
590
591 if (length >= (int)RUN_MASK) {
592 *token = (RUN_MASK << ML_BITS);
593 len = length - RUN_MASK;
594 for (; len > 254; len -= 255)
595 *op++ = 255;
596 *op++ = (BYTE)len;
597 } else
598 *token = (length << ML_BITS);
599
600 /* Copy Literals */
601 LZ4_BLINDCOPY(anchor, op, length);
602
603 _next_match:
604 /* Encode Offset */
605 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
606
607 /* Start Counting */
608 ip += MINMATCH;
609 ref += MINMATCH; /* MinMatch verified */
610 anchor = ip;
611 while likely(ip < matchlimit - (STEPSIZE - 1)) {
612 UARCH diff = AARCH(ref) ^ AARCH(ip);
613 if (!diff) {
614 ip += STEPSIZE;
615 ref += STEPSIZE;
616 continue;
617 }
618 ip += LZ4_NbCommonBytes(diff);
619 goto _endCount;
620 }
621 #if LZ4_ARCH64
622 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
623 ip += 4;
624 ref += 4;
625 }
626 #endif
627 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
628 ip += 2;
629 ref += 2;
630 }
631 if ((ip < matchlimit) && (*ref == *ip))
632 ip++;
633 _endCount:
634
635 /* Encode MatchLength */
636 len = (ip - anchor);
637 /* Check output limit */
638 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
639 return (0);
640 if (len >= (int)ML_MASK) {
641 *token += ML_MASK;
642 len -= ML_MASK;
643 for (; len > 509; len -= 510) {
644 *op++ = 255;
645 *op++ = 255;
646 }
647 if (len > 254) {
648 len -= 255;
649 *op++ = 255;
650 }
651 *op++ = (BYTE)len;
652 } else
653 *token += len;
654
655 /* Test end of chunk */
656 if (ip > mflimit) {
657 anchor = ip;
658 break;
659 }
660 /* Fill table */
661 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
662
663 /* Test next position */
664 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
665 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
666 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
667 token = op++;
668 *token = 0;
669 goto _next_match;
670 }
671 /* Prepare next loop */
672 anchor = ip++;
673 forwardH = LZ4_HASH_VALUE(ip);
674 }
675
676 _last_literals:
677 /* Encode Last Literals */
678 {
679 int lastRun = iend - anchor;
680 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
681 oend)
682 return (0);
683 if (lastRun >= (int)RUN_MASK) {
684 *op++ = (RUN_MASK << ML_BITS);
685 lastRun -= RUN_MASK;
686 for (; lastRun > 254; lastRun -= 255) {
687 *op++ = 255;
688 }
689 *op++ = (BYTE)lastRun;
690 } else
691 *op++ = (lastRun << ML_BITS);
692 (void) memcpy(op, anchor, iend - anchor);
693 op += iend - anchor;
694 }
695
696 /* End */
697 return (int)(((char *)op) - dest);
698 }
699
700
701
702 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
703 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
704 #define HASHLOG64K (HASH_LOG + 1)
705 #define HASH64KTABLESIZE (1U << HASHLOG64K)
706 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
707 HASHLOG64K))
708 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
709
710 /*ARGSUSED*/
711 static int
LZ4_compress64kCtx(void * ctx,const char * source,char * dest,int isize,int osize)712 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
713 int osize)
714 {
715 #if HEAPMODE
716 struct refTables *srt = (struct refTables *)ctx;
717 U16 *HashTable = (U16 *) (srt->hashTable);
718 #else
719 U16 HashTable[HASH64KTABLESIZE] = { 0 };
720 #endif
721
722 const BYTE *ip = (BYTE *) source;
723 const BYTE *anchor = ip;
724 const BYTE *const base = ip;
725 const BYTE *const iend = ip + isize;
726 const BYTE *const oend = (BYTE *) dest + osize;
727 const BYTE *const mflimit = iend - MFLIMIT;
728 #define matchlimit (iend - LASTLITERALS)
729
730 BYTE *op = (BYTE *) dest;
731
732 int len, length;
733 const int skipStrength = SKIPSTRENGTH;
734 U32 forwardH;
735
736 /* Init */
737 if (isize < MINLENGTH)
738 goto _last_literals;
739
740 /* First Byte */
741 ip++;
742 forwardH = LZ4_HASH64K_VALUE(ip);
743
744 /* Main Loop */
745 for (;;) {
746 int findMatchAttempts = (1U << skipStrength) + 3;
747 const BYTE *forwardIp = ip;
748 const BYTE *ref;
749 BYTE *token;
750
751 /* Find a match */
752 do {
753 U32 h = forwardH;
754 int step = findMatchAttempts++ >> skipStrength;
755 ip = forwardIp;
756 forwardIp = ip + step;
757
758 if (forwardIp > mflimit) {
759 goto _last_literals;
760 }
761
762 forwardH = LZ4_HASH64K_VALUE(forwardIp);
763 ref = base + HashTable[h];
764 HashTable[h] = ip - base;
765
766 } while (A32(ref) != A32(ip));
767
768 /* Catch up */
769 while ((ip > anchor) && (ref > (BYTE *) source) &&
770 (ip[-1] == ref[-1])) {
771 ip--;
772 ref--;
773 }
774
775 /* Encode Literal length */
776 length = ip - anchor;
777 token = op++;
778
779 /* Check output limit */
780 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
781 (length >> 8) > oend)
782 return (0);
783
784 if (length >= (int)RUN_MASK) {
785 *token = (RUN_MASK << ML_BITS);
786 len = length - RUN_MASK;
787 for (; len > 254; len -= 255)
788 *op++ = 255;
789 *op++ = (BYTE)len;
790 } else
791 *token = (length << ML_BITS);
792
793 /* Copy Literals */
794 LZ4_BLINDCOPY(anchor, op, length);
795
796 _next_match:
797 /* Encode Offset */
798 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
799
800 /* Start Counting */
801 ip += MINMATCH;
802 ref += MINMATCH; /* MinMatch verified */
803 anchor = ip;
804 while (ip < matchlimit - (STEPSIZE - 1)) {
805 UARCH diff = AARCH(ref) ^ AARCH(ip);
806 if (!diff) {
807 ip += STEPSIZE;
808 ref += STEPSIZE;
809 continue;
810 }
811 ip += LZ4_NbCommonBytes(diff);
812 goto _endCount;
813 }
814 #if LZ4_ARCH64
815 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
816 ip += 4;
817 ref += 4;
818 }
819 #endif
820 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
821 ip += 2;
822 ref += 2;
823 }
824 if ((ip < matchlimit) && (*ref == *ip))
825 ip++;
826 _endCount:
827
828 /* Encode MatchLength */
829 len = (ip - anchor);
830 /* Check output limit */
831 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
832 return (0);
833 if (len >= (int)ML_MASK) {
834 *token += ML_MASK;
835 len -= ML_MASK;
836 for (; len > 509; len -= 510) {
837 *op++ = 255;
838 *op++ = 255;
839 }
840 if (len > 254) {
841 len -= 255;
842 *op++ = 255;
843 }
844 *op++ = (BYTE)len;
845 } else
846 *token += len;
847
848 /* Test end of chunk */
849 if (ip > mflimit) {
850 anchor = ip;
851 break;
852 }
853 /* Fill table */
854 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
855
856 /* Test next position */
857 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
858 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
859 if (A32(ref) == A32(ip)) {
860 token = op++;
861 *token = 0;
862 goto _next_match;
863 }
864 /* Prepare next loop */
865 anchor = ip++;
866 forwardH = LZ4_HASH64K_VALUE(ip);
867 }
868
869 _last_literals:
870 /* Encode Last Literals */
871 {
872 int lastRun = iend - anchor;
873 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
874 oend)
875 return (0);
876 if (lastRun >= (int)RUN_MASK) {
877 *op++ = (RUN_MASK << ML_BITS);
878 lastRun -= RUN_MASK;
879 for (; lastRun > 254; lastRun -= 255)
880 *op++ = 255;
881 *op++ = (BYTE)lastRun;
882 } else
883 *op++ = (lastRun << ML_BITS);
884 (void) memcpy(op, anchor, iend - anchor);
885 op += iend - anchor;
886 }
887
888 /* End */
889 return (int)(((char *)op) - dest);
890 }
891
892 static int
real_LZ4_compress(const char * source,char * dest,int isize,int osize)893 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
894 {
895 #if HEAPMODE
896 void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
897 int result;
898
899 /*
900 * out of kernel memory, gently fall through - this will disable
901 * compression in zio_compress_data
902 */
903 if (ctx == NULL)
904 return (0);
905
906 if (isize < LZ4_64KLIMIT)
907 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
908 else
909 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
910
911 kmem_free(ctx, sizeof (struct refTables));
912 return (result);
913 #else
914 if (isize < (int)LZ4_64KLIMIT)
915 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
916 return (LZ4_compressCtx(NULL, source, dest, isize, osize));
917 #endif
918 }
919
920 /* Decompression functions */
921
922 /*
923 * Note: The decoding functions real_LZ4_uncompress() and
924 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
925 * attack type. They will never write nor read outside of the provided
926 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that
927 * it will never read outside of the input buffer. A corrupted input
928 * will produce an error result, a negative int, indicating the position
929 * of the error within input stream.
930 */
931
932 static int
real_LZ4_uncompress(const char * source,char * dest,int osize)933 real_LZ4_uncompress(const char *source, char *dest, int osize)
934 {
935 /* Local Variables */
936 const BYTE *restrict ip = (const BYTE *) source;
937 const BYTE *ref;
938
939 BYTE *op = (BYTE *) dest;
940 BYTE *const oend = op + osize;
941 BYTE *cpy;
942
943 unsigned token;
944
945 size_t length;
946 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
947 #if LZ4_ARCH64
948 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
949 #endif
950
951 /* Main Loop */
952 for (;;) {
953 /* get runlength */
954 token = *ip++;
955 if ((length = (token >> ML_BITS)) == RUN_MASK) {
956 size_t len;
957 for (; (len = *ip++) == 255; length += 255) {
958 }
959 length += len;
960 }
961 /* copy literals */
962 cpy = op + length;
963 /* CORNER-CASE: cpy might overflow. */
964 if (cpy < op)
965 goto _output_error; /* cpy was overflowed, bail! */
966 if unlikely(cpy > oend - COPYLENGTH) {
967 if (cpy != oend)
968 /* Error: we must necessarily stand at EOF */
969 goto _output_error;
970 (void) memcpy(op, ip, length);
971 ip += length;
972 break; /* EOF */
973 }
974 LZ4_WILDCOPY(ip, op, cpy);
975 ip -= (op - cpy);
976 op = cpy;
977
978 /* get offset */
979 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
980 ip += 2;
981 if unlikely(ref < (BYTE * const) dest)
982 /*
983 * Error: offset create reference outside destination
984 * buffer
985 */
986 goto _output_error;
987
988 /* get matchlength */
989 if ((length = (token & ML_MASK)) == ML_MASK) {
990 for (; *ip == 255; length += 255) {
991 ip++;
992 }
993 length += *ip++;
994 }
995 /* copy repeated sequence */
996 if unlikely(op - ref < STEPSIZE) {
997 #if LZ4_ARCH64
998 size_t dec64 = dec64table[op-ref];
999 #else
1000 const int dec64 = 0;
1001 #endif
1002 op[0] = ref[0];
1003 op[1] = ref[1];
1004 op[2] = ref[2];
1005 op[3] = ref[3];
1006 op += 4;
1007 ref += 4;
1008 ref -= dec32table[op-ref];
1009 A32(op) = A32(ref);
1010 op += STEPSIZE - 4;
1011 ref -= dec64;
1012 } else {
1013 LZ4_COPYSTEP(ref, op);
1014 }
1015 cpy = op + length - (STEPSIZE - 4);
1016 if (cpy > oend - COPYLENGTH) {
1017 if (cpy > oend)
1018 /*
1019 * Error: request to write beyond destination
1020 * buffer
1021 */
1022 goto _output_error;
1023 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1024 while (op < cpy)
1025 *op++ = *ref++;
1026 op = cpy;
1027 if (op == oend)
1028 /*
1029 * Check EOF (should never happen, since last
1030 * 5 bytes are supposed to be literals)
1031 */
1032 goto _output_error;
1033 continue;
1034 }
1035 LZ4_SECURECOPY(ref, op, cpy);
1036 op = cpy; /* correction */
1037 }
1038
1039 /* end of decoding */
1040 return (int)(((char *)ip) - source);
1041
1042 /* write overflow error detected */
1043 _output_error:
1044 return (int)(-(((char *)ip) - source));
1045 }
1046
1047 static int
LZ4_uncompress_unknownOutputSize(const char * source,char * dest,int isize,int maxOutputSize)1048 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
1049 int maxOutputSize)
1050 {
1051 /* Local Variables */
1052 const BYTE *restrict ip = (const BYTE *) source;
1053 const BYTE *const iend = ip + isize;
1054 const BYTE *ref;
1055
1056 BYTE *op = (BYTE *) dest;
1057 BYTE *const oend = op + maxOutputSize;
1058 BYTE *cpy;
1059
1060 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
1061 #if LZ4_ARCH64
1062 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
1063 #endif
1064
1065 /* Main Loop */
1066 while (ip < iend) {
1067 unsigned token;
1068 size_t length;
1069
1070 /* get runlength */
1071 token = *ip++;
1072 if ((length = (token >> ML_BITS)) == RUN_MASK) {
1073 int s = 255;
1074 while ((ip < iend) && (s == 255)) {
1075 s = *ip++;
1076 length += s;
1077 }
1078 }
1079 /* copy literals */
1080 cpy = op + length;
1081 /* CORNER-CASE: cpy might overflow. */
1082 if (cpy < op)
1083 goto _output_error; /* cpy was overflowed, bail! */
1084 if ((cpy > oend - COPYLENGTH) ||
1085 (ip + length > iend - COPYLENGTH)) {
1086 if (cpy > oend)
1087 /* Error: writes beyond output buffer */
1088 goto _output_error;
1089 if (ip + length != iend)
1090 /*
1091 * Error: LZ4 format requires to consume all
1092 * input at this stage
1093 */
1094 goto _output_error;
1095 (void) memcpy(op, ip, length);
1096 op += length;
1097 /* Necessarily EOF, due to parsing restrictions */
1098 break;
1099 }
1100 LZ4_WILDCOPY(ip, op, cpy);
1101 ip -= (op - cpy);
1102 op = cpy;
1103
1104 /* get offset */
1105 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1106 ip += 2;
1107 if (ref < (BYTE * const) dest)
1108 /*
1109 * Error: offset creates reference outside of
1110 * destination buffer
1111 */
1112 goto _output_error;
1113
1114 /* get matchlength */
1115 if ((length = (token & ML_MASK)) == ML_MASK) {
1116 while (ip < iend) {
1117 int s = *ip++;
1118 length += s;
1119 if (s == 255)
1120 continue;
1121 break;
1122 }
1123 }
1124 /* copy repeated sequence */
1125 if unlikely(op - ref < STEPSIZE) {
1126 #if LZ4_ARCH64
1127 size_t dec64 = dec64table[op-ref];
1128 #else
1129 const int dec64 = 0;
1130 #endif
1131 op[0] = ref[0];
1132 op[1] = ref[1];
1133 op[2] = ref[2];
1134 op[3] = ref[3];
1135 op += 4;
1136 ref += 4;
1137 ref -= dec32table[op-ref];
1138 A32(op) = A32(ref);
1139 op += STEPSIZE - 4;
1140 ref -= dec64;
1141 } else {
1142 LZ4_COPYSTEP(ref, op);
1143 }
1144 cpy = op + length - (STEPSIZE - 4);
1145 if (cpy > oend - COPYLENGTH) {
1146 if (cpy > oend)
1147 /*
1148 * Error: request to write outside of
1149 * destination buffer
1150 */
1151 goto _output_error;
1152 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1153 while (op < cpy)
1154 *op++ = *ref++;
1155 op = cpy;
1156 if (op == oend)
1157 /*
1158 * Check EOF (should never happen, since
1159 * last 5 bytes are supposed to be literals)
1160 */
1161 goto _output_error;
1162 continue;
1163 }
1164 LZ4_SECURECOPY(ref, op, cpy);
1165 op = cpy; /* correction */
1166 }
1167
1168 /* end of decoding */
1169 return (int)(((char *)op) - dest);
1170
1171 /* write overflow error detected */
1172 _output_error:
1173 return (int)(-(((char *)ip) - source));
1174 }
1175