xref: /freebsd/sys/dev/cxgbe/cudbg/fastlz.c (revision 76afb20c58adb296f09857aed214b91464242264)
1 /*
2    FastLZ - lightning-fast lossless compression library
3 
4    Copyright (C) 2007 Ariya Hidayat (ariya@kde.org)
5    Copyright (C) 2006 Ariya Hidayat (ariya@kde.org)
6    Copyright (C) 2005 Ariya Hidayat (ariya@kde.org)
7 
8    Permission is hereby granted, free of charge, to any person obtaining a copy
9    of this software and associated documentation files (the "Software"), to deal
10    in the Software without restriction, including without limitation the rights
11    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12    copies of the Software, and to permit persons to whom the Software is
13    furnished to do so, subject to the following conditions:
14 
15    The above copyright notice and this permission notice shall be included in
16    all copies or substantial portions of the Software.
17 
18    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24    THE SOFTWARE.
25    */
26 #include <sys/cdefs.h>
27 __FBSDID("$FreeBSD$");
28 
29 #include "osdep.h"
30 #include "fastlz.h"
31 
32 #if !defined(FASTLZ__COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR)
33 
34 /*
35  * Always check for bound when decompressing.
36  * Generally it is best to leave it defined.
37  */
38 #define FASTLZ_SAFE
39 
40 #if defined(WIN32) || defined(__NT__) || defined(_WIN32) || defined(__WIN32__)
41 #if defined(_MSC_VER) || defined(__GNUC__)
42 /* #include <windows.h> */
43 #pragma warning(disable : 4242)
44 #pragma warning(disable : 4244)
45 #endif
46 #endif
47 
48 /*
49  * Give hints to the compiler for branch prediction optimization.
50  */
51 #if defined(__GNUC__) && (__GNUC__ > 2)
52 #define FASTLZ_EXPECT_CONDITIONAL(c)	(__builtin_expect((c), 1))
53 #define FASTLZ_UNEXPECT_CONDITIONAL(c)	(__builtin_expect((c), 0))
54 #else
55 #define FASTLZ_EXPECT_CONDITIONAL(c)	(c)
56 #define FASTLZ_UNEXPECT_CONDITIONAL(c)	(c)
57 #endif
58 
59 /*
60  * Use inlined functions for supported systems.
61  */
62 #if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) ||\
63 	defined(__WATCOMC__) || defined(__SUNPRO_C)
64 #define FASTLZ_INLINE inline
65 #elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__)
66 #define FASTLZ_INLINE __inline
67 #else
68 #define FASTLZ_INLINE
69 #endif
70 
71 /*
72  * Prevent accessing more than 8-bit at once, except on x86 architectures.
73  */
74 #if !defined(FASTLZ_STRICT_ALIGN)
75 #define FASTLZ_STRICT_ALIGN
76 #if defined(__i386__) || defined(__386)  /* GNU C, Sun Studio */
77 #undef FASTLZ_STRICT_ALIGN
78 #elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */
79 #undef FASTLZ_STRICT_ALIGN
80 #elif defined(_M_IX86) /* Intel, MSVC */
81 #undef FASTLZ_STRICT_ALIGN
82 #elif defined(__386)
83 #undef FASTLZ_STRICT_ALIGN
84 #elif defined(_X86_) /* MinGW */
85 #undef FASTLZ_STRICT_ALIGN
86 #elif defined(__I86__) /* Digital Mars */
87 #undef FASTLZ_STRICT_ALIGN
88 #endif
89 #endif
90 
91 /*
92  * FIXME: use preprocessor magic to set this on different platforms!
93  */
94 
95 #define MAX_COPY       32
96 #define MAX_LEN       264  /* 256 + 8 */
97 #define MAX_DISTANCE 8192
98 
99 #if !defined(FASTLZ_STRICT_ALIGN)
100 #define FASTLZ_READU16(p) (*((const unsigned short *)(p)))
101 #else
102 #define FASTLZ_READU16(p) ((p)[0] | (p)[1]<<8)
103 #endif
104 
105 #define HASH_LOG  13
106 #define HASH_SIZE (1 << HASH_LOG)
107 #define HASH_MASK  (HASH_SIZE - 1)
108 #define HASH_FUNCTION(v, p) {\
109 				v = FASTLZ_READU16(p);\
110 				v ^= FASTLZ_READU16(p + 1)^\
111 				     (v>>(16 - HASH_LOG));\
112 				v &= HASH_MASK;\
113 			    }
114 
115 #undef FASTLZ_LEVEL
116 #define FASTLZ_LEVEL 1
117 
118 #undef FASTLZ_COMPRESSOR
119 #undef FASTLZ_DECOMPRESSOR
120 #define FASTLZ_COMPRESSOR fastlz1_compress
121 #define FASTLZ_DECOMPRESSOR fastlz1_decompress
122 static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void *input, int length,
123 					   void *output);
124 static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void *input, int length,
125 					     void *output, int maxout);
126 #include "fastlz.c"
127 
128 #undef FASTLZ_LEVEL
129 #define FASTLZ_LEVEL 2
130 
131 #undef MAX_DISTANCE
132 #define MAX_DISTANCE 8191
133 #define MAX_FARDISTANCE (65535 + MAX_DISTANCE - 1)
134 
135 #undef FASTLZ_COMPRESSOR
136 #undef FASTLZ_DECOMPRESSOR
137 #define FASTLZ_COMPRESSOR fastlz2_compress
138 #define FASTLZ_DECOMPRESSOR fastlz2_decompress
139 static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void *input, int length,
140 					   void *output);
141 static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void *input, int length,
142 					     void *output, int maxout);
143 #include "fastlz.c"
144 
145 int fastlz_compress(const void *input, int length, void *output)
146 {
147 	/* for short block, choose fastlz1 */
148 	if (length < 65536)
149 		return fastlz1_compress(input, length, output);
150 
151 	/* else... */
152 	return fastlz2_compress(input, length, output);
153 }
154 
155 int fastlz_decompress(const void *input, int length, void *output, int maxout)
156 {
157 	/* magic identifier for compression level */
158 	int level = ((*(const unsigned char *)input) >> 5) + 1;
159 
160 	if (level == 1)
161 		return fastlz1_decompress(input, length, output, maxout);
162 	if (level == 2)
163 		return fastlz2_decompress(input, length, output, maxout);
164 
165 	/* unknown level, trigger error */
166 	return 0;
167 }
168 
169 int fastlz_compress_level(int level, const void *input, int length,
170 			  void *output)
171 {
172 	if (level == 1)
173 		return fastlz1_compress(input, length, output);
174 	if (level == 2)
175 		return fastlz2_compress(input, length, output);
176 
177 	return 0;
178 }
179 
180 #else /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
181 
182 
183 static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void *input, int length,
184 					   void *output)
185 {
186 	const unsigned char *ip = (const unsigned char *) input;
187 	const unsigned char *ip_bound = ip + length - 2;
188 	const unsigned char *ip_limit = ip + length - 12;
189 	unsigned char *op = (unsigned char *) output;
190 	static const unsigned char *g_htab[HASH_SIZE];
191 
192 	const unsigned char **htab = g_htab;
193 	const unsigned char **hslot;
194 	unsigned int hval;
195 
196 	unsigned int copy;
197 
198 	/* sanity check */
199 	if (FASTLZ_UNEXPECT_CONDITIONAL(length < 4)) {
200 		if (length) {
201 			/* create literal copy only */
202 			*op++ = length - 1;
203 			ip_bound++;
204 			while (ip <= ip_bound)
205 				*op++ = *ip++;
206 			return length + 1;
207 		} else
208 			return 0;
209 	}
210 
211 	/* initializes hash table */
212 	for (hslot = htab; hslot < htab + HASH_SIZE; hslot++)
213 		*hslot = ip;
214 
215 	/* we start with literal copy */
216 	copy = 2;
217 	*op++ = MAX_COPY - 1;
218 	*op++ = *ip++;
219 	*op++ = *ip++;
220 
221 	/* main loop */
222 	while (FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) {
223 		const unsigned char *ref;
224 		unsigned int distance;
225 
226 		/* minimum match length */
227 		unsigned int len = 3;
228 
229 		/* comparison starting-point */
230 		const unsigned char *anchor = ip;
231 
232 		/* check for a run */
233 #if FASTLZ_LEVEL == 2
234 		if (ip[0] == ip[-1] &&
235 		    FASTLZ_READU16(ip - 1) == FASTLZ_READU16(ip + 1)) {
236 			distance = 1;
237 			ip += 3;
238 			ref = anchor - 1 + 3;
239 			goto match;
240 		}
241 #endif
242 
243 		/* find potential match */
244 		HASH_FUNCTION(hval, ip);
245 		hslot = htab + hval;
246 		ref = htab[hval];
247 
248 		/* calculate distance to the match */
249 		distance = anchor - ref;
250 
251 		/* update hash table */
252 		*hslot = anchor;
253 
254 		if (!ref)
255 			goto literal;
256 		/* is this a match? check the first 3 bytes */
257 		if (distance == 0 ||
258 #if FASTLZ_LEVEL == 1
259 				(distance >= MAX_DISTANCE) ||
260 #else
261 				(distance >= MAX_FARDISTANCE) ||
262 #endif
263 				*ref++ != *ip++ || *ref++ != *ip++ ||
264 				*ref++ != *ip++)
265 			goto literal;
266 
267 #if FASTLZ_LEVEL == 2
268 		/* far, needs at least 5-byte match */
269 		if (distance >= MAX_DISTANCE) {
270 			if (*ip++ != *ref++ || *ip++ != *ref++)
271 				goto literal;
272 			len += 2;
273 		}
274 
275 match:
276 #endif
277 
278 		/* last matched byte */
279 		ip = anchor + len;
280 
281 		/* distance is biased */
282 		distance--;
283 
284 		if (!distance) {
285 			/* zero distance means a run */
286 			unsigned char x = ip[-1];
287 			while (ip < ip_bound)
288 				if (*ref++ != x)
289 					break;
290 				else
291 					ip++;
292 		} else
293 			for (;;) {
294 				/* safe because the outer check
295 				 * against ip limit */
296 				if (*ref++ != *ip++)
297 					break;
298 				if (*ref++ != *ip++)
299 					break;
300 				if (*ref++ != *ip++)
301 					break;
302 				if (*ref++ != *ip++)
303 					break;
304 				if (*ref++ != *ip++)
305 					break;
306 				if (*ref++ != *ip++)
307 					break;
308 				if (*ref++ != *ip++)
309 					break;
310 				if (*ref++ != *ip++)
311 					break;
312 				while (ip < ip_bound)
313 					if (*ref++ != *ip++)
314 						break;
315 				break;
316 			}
317 
318 		/* if we have copied something, adjust the copy count */
319 		if (copy)
320 			/* copy is biased, '0' means 1 byte copy */
321 			*(op - copy - 1) = copy - 1;
322 		else
323 			/* back, to overwrite the copy count */
324 			op--;
325 
326 		/* reset literal counter */
327 		copy = 0;
328 
329 		/* length is biased, '1' means a match of 3 bytes */
330 		ip -= 3;
331 		len = ip - anchor;
332 
333 		/* encode the match */
334 #if FASTLZ_LEVEL == 2
335 		if (distance < MAX_DISTANCE) {
336 			if (len < 7) {
337 				*op++ = (len << 5) + (distance >> 8);
338 				*op++ = (distance & 255);
339 			} else {
340 				*op++ = (7 << 5) + (distance >> 8);
341 				for (len -= 7; len >= 255; len -= 255)
342 					*op++ = 255;
343 				*op++ = len;
344 				*op++ = (distance & 255);
345 			}
346 		} else {
347 			/* far away, but not yet in the another galaxy... */
348 			if (len < 7) {
349 				distance -= MAX_DISTANCE;
350 				*op++ = (len << 5) + 31;
351 				*op++ = 255;
352 				*op++ = distance >> 8;
353 				*op++ = distance & 255;
354 			} else {
355 				distance -= MAX_DISTANCE;
356 				*op++ = (7 << 5) + 31;
357 				for (len -= 7; len >= 255; len -= 255)
358 					*op++ = 255;
359 				*op++ = len;
360 				*op++ = 255;
361 				*op++ = distance >> 8;
362 				*op++ = distance & 255;
363 			}
364 		}
365 #else
366 
367 		if (FASTLZ_UNEXPECT_CONDITIONAL(len > MAX_LEN - 2))
368 			while (len > MAX_LEN - 2) {
369 				*op++ = (7 << 5) + (distance >> 8);
370 				*op++ = MAX_LEN - 2 - 7 - 2;
371 				*op++ = (distance & 255);
372 				len -= MAX_LEN - 2;
373 			}
374 
375 		if (len < 7) {
376 			*op++ = (len << 5) + (distance >> 8);
377 			*op++ = (distance & 255);
378 		} else {
379 			*op++ = (7 << 5) + (distance >> 8);
380 			*op++ = len - 7;
381 			*op++ = (distance & 255);
382 		}
383 #endif
384 
385 		/* update the hash at match boundary */
386 		HASH_FUNCTION(hval, ip);
387 		htab[hval] = ip++;
388 		HASH_FUNCTION(hval, ip);
389 		htab[hval] = ip++;
390 
391 		/* assuming literal copy */
392 		*op++ = MAX_COPY - 1;
393 
394 		continue;
395 
396 literal:
397 		*op++ = *anchor++;
398 		ip = anchor;
399 		copy++;
400 		if (FASTLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) {
401 			copy = 0;
402 			*op++ = MAX_COPY - 1;
403 		}
404 	}
405 
406 	/* left-over as literal copy */
407 	ip_bound++;
408 	while (ip <= ip_bound) {
409 		*op++ = *ip++;
410 		copy++;
411 		if (copy == MAX_COPY) {
412 			copy = 0;
413 			*op++ = MAX_COPY - 1;
414 		}
415 	}
416 
417 	/* if we have copied something, adjust the copy length */
418 	if (copy)
419 		*(op - copy - 1) = copy - 1;
420 	else
421 		op--;
422 
423 #if FASTLZ_LEVEL == 2
424 	/* marker for fastlz2 */
425 	*(unsigned char *)output |= (1 << 5);
426 #endif
427 
428 	return op - (unsigned char *)output;
429 }
430 
431 static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void *input, int length,
432 					     void *output, int maxout)
433 {
434 	const unsigned char *ip = (const unsigned char *) input;
435 	const unsigned char *ip_limit  = ip + length;
436 	unsigned char *op = (unsigned char *) output;
437 	unsigned char *op_limit = op + maxout;
438 	unsigned int ctrl = (*ip++) & 31;
439 	int loop = 1;
440 
441 	do {
442 		const unsigned char *ref = op;
443 		unsigned int len = ctrl >> 5;
444 		unsigned int ofs = (ctrl & 31) << 8;
445 
446 		if (ctrl >= 32) {
447 #if FASTLZ_LEVEL == 2
448 			unsigned char code;
449 #endif
450 			len--;
451 			ref -= ofs;
452 			if (len == 7 - 1)
453 #if FASTLZ_LEVEL == 1
454 				len += *ip++;
455 			ref -= *ip++;
456 #else
457 			do {
458 				code = *ip++;
459 				len += code;
460 			} while (code == 255);
461 			code = *ip++;
462 			ref -= code;
463 
464 			/* match from 16-bit distance */
465 			if (FASTLZ_UNEXPECT_CONDITIONAL(code == 255))
466 				if (FASTLZ_EXPECT_CONDITIONAL(ofs ==
467 							      (31 << 8))) {
468 					ofs = (*ip++) << 8;
469 					ofs += *ip++;
470 					ref = op - ofs - MAX_DISTANCE;
471 				}
472 #endif
473 
474 #ifdef FASTLZ_SAFE
475 			if (FASTLZ_UNEXPECT_CONDITIONAL(op + len + 3 >
476 							op_limit))
477 				return 0;
478 
479 			if (FASTLZ_UNEXPECT_CONDITIONAL(ref - 1 <
480 							(unsigned char *)output)
481 						       )
482 				return 0;
483 #endif
484 
485 			if (FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit))
486 				ctrl = *ip++;
487 			else
488 				loop = 0;
489 
490 			if (ref == op) {
491 				/* optimize copy for a run */
492 				unsigned char b = ref[-1];
493 				*op++ = b;
494 				*op++ = b;
495 				*op++ = b;
496 				for (; len; --len)
497 					*op++ = b;
498 			} else {
499 #if !defined(FASTLZ_STRICT_ALIGN)
500 				const unsigned short *p;
501 				unsigned short *q;
502 #endif
503 				/* copy from reference */
504 				ref--;
505 				*op++ = *ref++;
506 				*op++ = *ref++;
507 				*op++ = *ref++;
508 
509 #if !defined(FASTLZ_STRICT_ALIGN)
510 				/* copy a byte, so that now it's word aligned */
511 				if (len & 1) {
512 					*op++ = *ref++;
513 					len--;
514 				}
515 
516 				/* copy 16-bit at once */
517 				q = (unsigned short *) op;
518 				op += len;
519 				p = (const unsigned short *) ref;
520 				for (len >>= 1; len > 4; len -= 4) {
521 					*q++ = *p++;
522 					*q++ = *p++;
523 					*q++ = *p++;
524 					*q++ = *p++;
525 				}
526 				for (; len; --len)
527 					*q++ = *p++;
528 #else
529 				for (; len; --len)
530 					*op++ = *ref++;
531 #endif
532 			}
533 		} else {
534 			ctrl++;
535 #ifdef FASTLZ_SAFE
536 			if (FASTLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit))
537 				return 0;
538 			if (FASTLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit))
539 				return 0;
540 #endif
541 
542 			*op++ = *ip++;
543 			for (--ctrl; ctrl; ctrl--)
544 				*op++ = *ip++;
545 
546 			loop = FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit);
547 			if (loop)
548 				ctrl = *ip++;
549 		}
550 	} while (FASTLZ_EXPECT_CONDITIONAL(loop));
551 
552 	return op - (unsigned char *)output;
553 }
554 
555 #endif /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
556