1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 FastLZ - lightning-fast lossless compression library
14
15 Copyright (C) 2007 Ariya Hidayat (ariya@kde.org)
16 Copyright (C) 2006 Ariya Hidayat (ariya@kde.org)
17 Copyright (C) 2005 Ariya Hidayat (ariya@kde.org)
18
19 Permission is hereby granted, free of charge, to any person obtaining a copy
20 of this software and associated documentation files (the "Software"), to deal
21 in the Software without restriction, including without limitation the rights
22 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
23 copies of the Software, and to permit persons to whom the Software is
24 furnished to do so, subject to the following conditions:
25
26 The above copyright notice and this permission notice shall be included in
27 all copies or substantial portions of the Software.
28
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
35 THE SOFTWARE.
36 */
37 #include "osdep.h"
38 #include "fastlz.h"
39
40 #if !defined(FASTLZ__COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR)
41
42 /*
43 * Always check for bound when decompressing.
44 * Generally it is best to leave it defined.
45 */
46 #define FASTLZ_SAFE
47
48 #if defined(WIN32) || defined(__NT__) || defined(_WIN32) || defined(__WIN32__)
49 #if defined(_MSC_VER) || defined(__GNUC__)
50 /* #include <windows.h> */
51 #pragma warning(disable : 4242)
52 #pragma warning(disable : 4244)
53 #endif
54 #endif
55
56 /*
57 * Give hints to the compiler for branch prediction optimization.
58 */
59 #if defined(__GNUC__) && (__GNUC__ > 2)
60 #define FASTLZ_EXPECT_CONDITIONAL(c) (__builtin_expect((c), 1))
61 #define FASTLZ_UNEXPECT_CONDITIONAL(c) (__builtin_expect((c), 0))
62 #else
63 #define FASTLZ_EXPECT_CONDITIONAL(c) (c)
64 #define FASTLZ_UNEXPECT_CONDITIONAL(c) (c)
65 #endif
66
67 /*
68 * Use inlined functions for supported systems.
69 */
70 #if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) ||\
71 defined(__WATCOMC__) || defined(__SUNPRO_C)
72 #define FASTLZ_INLINE inline
73 #elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__)
74 #define FASTLZ_INLINE __inline
75 #else
76 #define FASTLZ_INLINE
77 #endif
78
79 /*
80 * Prevent accessing more than 8-bit at once, except on x86 architectures.
81 */
82 #if !defined(FASTLZ_STRICT_ALIGN)
83 #define FASTLZ_STRICT_ALIGN
84 #if defined(__i386__) || defined(__386) /* GNU C, Sun Studio */
85 #undef FASTLZ_STRICT_ALIGN
86 #elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */
87 #undef FASTLZ_STRICT_ALIGN
88 #elif defined(_M_IX86) /* Intel, MSVC */
89 #undef FASTLZ_STRICT_ALIGN
90 #elif defined(__386)
91 #undef FASTLZ_STRICT_ALIGN
92 #elif defined(_X86_) /* MinGW */
93 #undef FASTLZ_STRICT_ALIGN
94 #elif defined(__I86__) /* Digital Mars */
95 #undef FASTLZ_STRICT_ALIGN
96 #endif
97 #endif
98
99 /*
100 * FIXME: use preprocessor magic to set this on different platforms!
101 */
102
103 #define MAX_COPY 32
104 #define MAX_LEN 264 /* 256 + 8 */
105 #define MAX_DISTANCE 8192
106
107 #if !defined(FASTLZ_STRICT_ALIGN)
108 #define FASTLZ_READU16(p) (*((const unsigned short *)(p)))
109 #else
110 #define FASTLZ_READU16(p) ((p)[0] | (p)[1]<<8)
111 #endif
112
113 #define HASH_LOG 13
114 #define HASH_SIZE (1 << HASH_LOG)
115 #define HASH_MASK (HASH_SIZE - 1)
116 #define HASH_FUNCTION(v, p) {\
117 v = FASTLZ_READU16(p);\
118 v ^= FASTLZ_READU16(p + 1)^\
119 (v>>(16 - HASH_LOG));\
120 v &= HASH_MASK;\
121 }
122
123 #undef FASTLZ_LEVEL
124 #define FASTLZ_LEVEL 1
125
126 #undef FASTLZ_COMPRESSOR
127 #undef FASTLZ_DECOMPRESSOR
128 #define FASTLZ_COMPRESSOR fastlz1_compress
129 #define FASTLZ_DECOMPRESSOR fastlz1_decompress
130 static int FASTLZ_COMPRESSOR(const void *input, int length,
131 void *output);
132 static int FASTLZ_DECOMPRESSOR(const void *input, int length,
133 void *output, int maxout);
134 #include "fastlz.c"
135
136 #undef FASTLZ_LEVEL
137 #define FASTLZ_LEVEL 2
138
139 #undef MAX_DISTANCE
140 #define MAX_DISTANCE 8191
141 #define MAX_FARDISTANCE (65535 + MAX_DISTANCE - 1)
142
143 #undef FASTLZ_COMPRESSOR
144 #undef FASTLZ_DECOMPRESSOR
145 #define FASTLZ_COMPRESSOR fastlz2_compress
146 #define FASTLZ_DECOMPRESSOR fastlz2_decompress
147 static int FASTLZ_COMPRESSOR(const void *input, int length,
148 void *output);
149 static int FASTLZ_DECOMPRESSOR(const void *input, int length,
150 void *output, int maxout);
151 #include "fastlz.c"
152
153 int
fastlz_compress(const void * input,int length,void * output)154 fastlz_compress(const void *input, int length, void *output)
155 {
156 /* for short block, choose fastlz1 */
157 if (length < 65536)
158 return fastlz1_compress(input, length, output);
159
160 /* else... */
161 return fastlz2_compress(input, length, output);
162 }
163
164 int
fastlz_decompress(const void * input,int length,void * output,int maxout)165 fastlz_decompress(const void *input, int length, void *output, int maxout)
166 {
167 /* magic identifier for compression level */
168 int level = ((*(const unsigned char *)input) >> 5) + 1;
169
170 if (level == 1)
171 return fastlz1_decompress(input, length, output, maxout);
172 if (level == 2)
173 return fastlz2_decompress(input, length, output, maxout);
174
175 /* unknown level, trigger error */
176 return 0;
177 }
178
179 int
fastlz_compress_level(int level,const void * input,int length,void * output)180 fastlz_compress_level(int level, const void *input, int length,
181 void *output)
182 {
183 if (level == 1)
184 return fastlz1_compress(input, length, output);
185 if (level == 2)
186 return fastlz2_compress(input, length, output);
187
188 return 0;
189 }
190
191 #else /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
192
193
194 static int
FASTLZ_COMPRESSOR(const void * input,int length,void * output)195 FASTLZ_COMPRESSOR(const void *input, int length,
196 void *output)
197 {
198 const unsigned char *ip = (const unsigned char *) input;
199 const unsigned char *ip_bound = ip + length - 2;
200 const unsigned char *ip_limit = ip + length - 12;
201 unsigned char *op = (unsigned char *) output;
202 static const unsigned char *g_htab[HASH_SIZE];
203
204 const unsigned char **htab = g_htab;
205 const unsigned char **hslot;
206 unsigned int hval;
207
208 unsigned int copy;
209
210 /* sanity check */
211 if (FASTLZ_UNEXPECT_CONDITIONAL(length < 4)) {
212 if (length) {
213 /* create literal copy only */
214 *op++ = length - 1;
215 ip_bound++;
216 while (ip <= ip_bound)
217 *op++ = *ip++;
218 return length + 1;
219 } else
220 return 0;
221 }
222
223 /* initializes hash table */
224 for (hslot = htab; hslot < htab + HASH_SIZE; hslot++)
225 *hslot = ip;
226
227 /* we start with literal copy */
228 copy = 2;
229 *op++ = MAX_COPY - 1;
230 *op++ = *ip++;
231 *op++ = *ip++;
232
233 /* main loop */
234 while (FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) {
235 const unsigned char *ref;
236 unsigned int distance;
237
238 /* minimum match length */
239 unsigned int len = 3;
240
241 /* comparison starting-point */
242 const unsigned char *anchor = ip;
243
244 /* check for a run */
245 #if FASTLZ_LEVEL == 2
246 if (ip[0] == ip[-1] &&
247 FASTLZ_READU16(ip - 1) == FASTLZ_READU16(ip + 1)) {
248 distance = 1;
249 ip += 3;
250 ref = anchor - 1 + 3;
251 goto match;
252 }
253 #endif
254
255 /* find potential match */
256 HASH_FUNCTION(hval, ip);
257 hslot = htab + hval;
258 ref = htab[hval];
259
260 /* calculate distance to the match */
261 distance = anchor - ref;
262
263 /* update hash table */
264 *hslot = anchor;
265
266 if (!ref)
267 goto literal;
268 /* is this a match? check the first 3 bytes */
269 if (distance == 0 ||
270 #if FASTLZ_LEVEL == 1
271 (distance >= MAX_DISTANCE) ||
272 #else
273 (distance >= MAX_FARDISTANCE) ||
274 #endif
275 *ref++ != *ip++ || *ref++ != *ip++ ||
276 *ref++ != *ip++)
277 goto literal;
278
279 #if FASTLZ_LEVEL == 2
280 /* far, needs at least 5-byte match */
281 if (distance >= MAX_DISTANCE) {
282 if (*ip++ != *ref++ || *ip++ != *ref++)
283 goto literal;
284 len += 2;
285 }
286
287 match:
288 #endif
289
290 /* last matched byte */
291 ip = anchor + len;
292
293 /* distance is biased */
294 distance--;
295
296 if (!distance) {
297 /* zero distance means a run */
298 unsigned char x = ip[-1];
299 while (ip < ip_bound)
300 if (*ref++ != x)
301 break;
302 else
303 ip++;
304 } else
305 for (;;) {
306 /* safe because the outer check
307 * against ip limit */
308 if (*ref++ != *ip++)
309 break;
310 if (*ref++ != *ip++)
311 break;
312 if (*ref++ != *ip++)
313 break;
314 if (*ref++ != *ip++)
315 break;
316 if (*ref++ != *ip++)
317 break;
318 if (*ref++ != *ip++)
319 break;
320 if (*ref++ != *ip++)
321 break;
322 if (*ref++ != *ip++)
323 break;
324 while (ip < ip_bound)
325 if (*ref++ != *ip++)
326 break;
327 break;
328 }
329
330 /* if we have copied something, adjust the copy count */
331 if (copy)
332 /* copy is biased, '0' means 1 byte copy */
333 *(op - copy - 1) = copy - 1;
334 else
335 /* back, to overwrite the copy count */
336 op--;
337
338 /* reset literal counter */
339 copy = 0;
340
341 /* length is biased, '1' means a match of 3 bytes */
342 ip -= 3;
343 len = ip - anchor;
344
345 /* encode the match */
346 #if FASTLZ_LEVEL == 2
347 if (distance < MAX_DISTANCE) {
348 if (len < 7) {
349 *op++ = (len << 5) + (distance >> 8);
350 *op++ = (distance & 255);
351 } else {
352 *op++ = (7 << 5) + (distance >> 8);
353 for (len -= 7; len >= 255; len -= 255)
354 *op++ = 255;
355 *op++ = len;
356 *op++ = (distance & 255);
357 }
358 } else {
359 /* far away, but not yet in the another galaxy... */
360 if (len < 7) {
361 distance -= MAX_DISTANCE;
362 *op++ = (len << 5) + 31;
363 *op++ = 255;
364 *op++ = distance >> 8;
365 *op++ = distance & 255;
366 } else {
367 distance -= MAX_DISTANCE;
368 *op++ = (7 << 5) + 31;
369 for (len -= 7; len >= 255; len -= 255)
370 *op++ = 255;
371 *op++ = len;
372 *op++ = 255;
373 *op++ = distance >> 8;
374 *op++ = distance & 255;
375 }
376 }
377 #else
378
379 if (FASTLZ_UNEXPECT_CONDITIONAL(len > MAX_LEN - 2))
380 while (len > MAX_LEN - 2) {
381 *op++ = (7 << 5) + (distance >> 8);
382 *op++ = MAX_LEN - 2 - 7 - 2;
383 *op++ = (distance & 255);
384 len -= MAX_LEN - 2;
385 }
386
387 if (len < 7) {
388 *op++ = (len << 5) + (distance >> 8);
389 *op++ = (distance & 255);
390 } else {
391 *op++ = (7 << 5) + (distance >> 8);
392 *op++ = len - 7;
393 *op++ = (distance & 255);
394 }
395 #endif
396
397 /* update the hash at match boundary */
398 HASH_FUNCTION(hval, ip);
399 htab[hval] = ip++;
400 HASH_FUNCTION(hval, ip);
401 htab[hval] = ip++;
402
403 /* assuming literal copy */
404 *op++ = MAX_COPY - 1;
405
406 continue;
407
408 literal:
409 *op++ = *anchor++;
410 ip = anchor;
411 copy++;
412 if (FASTLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) {
413 copy = 0;
414 *op++ = MAX_COPY - 1;
415 }
416 }
417
418 /* left-over as literal copy */
419 ip_bound++;
420 while (ip <= ip_bound) {
421 *op++ = *ip++;
422 copy++;
423 if (copy == MAX_COPY) {
424 copy = 0;
425 *op++ = MAX_COPY - 1;
426 }
427 }
428
429 /* if we have copied something, adjust the copy length */
430 if (copy)
431 *(op - copy - 1) = copy - 1;
432 else
433 op--;
434
435 #if FASTLZ_LEVEL == 2
436 /* marker for fastlz2 */
437 *(unsigned char *)output |= (1 << 5);
438 #endif
439
440 return op - (unsigned char *)output;
441 }
442
443 static int
FASTLZ_DECOMPRESSOR(const void * input,int length,void * output,int maxout)444 FASTLZ_DECOMPRESSOR(const void *input, int length,
445 void *output, int maxout)
446 {
447 const unsigned char *ip = (const unsigned char *) input;
448 const unsigned char *ip_limit = ip + length;
449 unsigned char *op = (unsigned char *) output;
450 unsigned char *op_limit = op + maxout;
451 unsigned int ctrl = (*ip++) & 31;
452 int loop = 1;
453
454 do {
455 const unsigned char *ref = op;
456 #ifndef __CHECKER__
457 unsigned int len = ctrl >> 5;
458 #endif
459 unsigned int ofs = (ctrl & 31) << 8;
460
461 if (ctrl >= 32) {
462 #if FASTLZ_LEVEL == 2
463 unsigned char code;
464 #endif
465 len--;
466 ref -= ofs;
467 #ifndef __CHECKER__
468 if (len == 7 - 1)
469 #if FASTLZ_LEVEL == 1
470 len += *ip++;
471 ref -= *ip++;
472 #else
473 do {
474 code = *ip++;
475 len += code;
476 } while (code == 255);
477 code = *ip++;
478 ref -= code;
479
480 /* match from 16-bit distance */
481 if (FASTLZ_UNEXPECT_CONDITIONAL(code == 255))
482 if (FASTLZ_EXPECT_CONDITIONAL(ofs ==
483 (31 << 8))) {
484 ofs = (*ip++) << 8;
485 ofs += *ip++;
486 ref = op - ofs - MAX_DISTANCE;
487 }
488 #endif
489
490 #ifdef FASTLZ_SAFE
491 if (FASTLZ_UNEXPECT_CONDITIONAL(op + len + 3 >
492 op_limit))
493 return 0;
494
495 if (FASTLZ_UNEXPECT_CONDITIONAL(ref - 1 <
496 (unsigned char *)output)
497 )
498 return 0;
499 #endif
500
501 if (FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit))
502 ctrl = *ip++;
503 else
504 loop = 0;
505
506 if (ref == op) {
507 /* optimize copy for a run */
508 unsigned char b = ref[-1];
509 *op++ = b;
510 *op++ = b;
511 *op++ = b;
512 for (; len; --len)
513 *op++ = b;
514 } else {
515 #if !defined(FASTLZ_STRICT_ALIGN)
516 const unsigned short *p;
517 unsigned short *q;
518 #endif
519 /* copy from reference */
520 ref--;
521 *op++ = *ref++;
522 *op++ = *ref++;
523 *op++ = *ref++;
524
525 #if !defined(FASTLZ_STRICT_ALIGN)
526 /* copy a byte, so that now it's word aligned */
527 if (len & 1) {
528 *op++ = *ref++;
529 len--;
530 }
531
532 /* copy 16-bit at once */
533 q = (unsigned short *) op;
534 op += len;
535 p = (const unsigned short *) ref;
536 for (len >>= 1; len > 4; len -= 4) {
537 *q++ = *p++;
538 *q++ = *p++;
539 *q++ = *p++;
540 *q++ = *p++;
541 }
542 for (; len; --len)
543 *q++ = *p++;
544 #else
545 for (; len; --len)
546 *op++ = *ref++;
547 #endif
548 }
549 #endif /* __CHECKER__ */
550 } else {
551 ctrl++;
552 #ifdef FASTLZ_SAFE
553 if (FASTLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit))
554 return 0;
555 if (FASTLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit))
556 return 0;
557 #endif
558
559 *op++ = *ip++;
560 for (--ctrl; ctrl; ctrl--)
561 *op++ = *ip++;
562
563 loop = FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit);
564 if (loop)
565 ctrl = *ip++;
566 }
567 } while (FASTLZ_EXPECT_CONDITIONAL(loop));
568
569 return op - (unsigned char *)output;
570 }
571
572 #endif /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
573