xref: /illumos-gate/usr/src/uts/common/io/cxgbe/t4nex/fastlz.c (revision b93865c3d90e9b0d73e338c9abb3293c35c571a8)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source. A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13    FastLZ - lightning-fast lossless compression library
14 
15    Copyright (C) 2007 Ariya Hidayat (ariya@kde.org)
16    Copyright (C) 2006 Ariya Hidayat (ariya@kde.org)
17    Copyright (C) 2005 Ariya Hidayat (ariya@kde.org)
18 
19    Permission is hereby granted, free of charge, to any person obtaining a copy
20    of this software and associated documentation files (the "Software"), to deal
21    in the Software without restriction, including without limitation the rights
22    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
23    copies of the Software, and to permit persons to whom the Software is
24    furnished to do so, subject to the following conditions:
25 
26    The above copyright notice and this permission notice shall be included in
27    all copies or substantial portions of the Software.
28 
29    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
35    THE SOFTWARE.
36    */
37 #include "osdep.h"
38 #include "fastlz.h"
39 
40 #if !defined(FASTLZ__COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR)
41 
42 /*
43  * Always check for bound when decompressing.
44  * Generally it is best to leave it defined.
45  */
46 #define FASTLZ_SAFE
47 
48 #if defined(WIN32) || defined(__NT__) || defined(_WIN32) || defined(__WIN32__)
49 #if defined(_MSC_VER) || defined(__GNUC__)
50 /* #include <windows.h> */
51 #pragma warning(disable : 4242)
52 #pragma warning(disable : 4244)
53 #endif
54 #endif
55 
56 /*
57  * Give hints to the compiler for branch prediction optimization.
58  */
59 #if defined(__GNUC__) && (__GNUC__ > 2)
60 #define FASTLZ_EXPECT_CONDITIONAL(c)	(__builtin_expect((c), 1))
61 #define FASTLZ_UNEXPECT_CONDITIONAL(c)	(__builtin_expect((c), 0))
62 #else
63 #define FASTLZ_EXPECT_CONDITIONAL(c)	(c)
64 #define FASTLZ_UNEXPECT_CONDITIONAL(c)	(c)
65 #endif
66 
67 /*
68  * Use inlined functions for supported systems.
69  */
70 #if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) ||\
71 	defined(__WATCOMC__) || defined(__SUNPRO_C)
72 #define FASTLZ_INLINE inline
73 #elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__)
74 #define FASTLZ_INLINE __inline
75 #else
76 #define FASTLZ_INLINE
77 #endif
78 
79 /*
80  * Prevent accessing more than 8-bit at once, except on x86 architectures.
81  */
82 #if !defined(FASTLZ_STRICT_ALIGN)
83 #define FASTLZ_STRICT_ALIGN
84 #if defined(__i386__) || defined(__386)  /* GNU C, Sun Studio */
85 #undef FASTLZ_STRICT_ALIGN
86 #elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */
87 #undef FASTLZ_STRICT_ALIGN
88 #elif defined(_M_IX86) /* Intel, MSVC */
89 #undef FASTLZ_STRICT_ALIGN
90 #elif defined(__386)
91 #undef FASTLZ_STRICT_ALIGN
92 #elif defined(_X86_) /* MinGW */
93 #undef FASTLZ_STRICT_ALIGN
94 #elif defined(__I86__) /* Digital Mars */
95 #undef FASTLZ_STRICT_ALIGN
96 #endif
97 #endif
98 
99 /*
100  * FIXME: use preprocessor magic to set this on different platforms!
101  */
102 
103 #define MAX_COPY       32
104 #define MAX_LEN       264  /* 256 + 8 */
105 #define MAX_DISTANCE 8192
106 
107 #if !defined(FASTLZ_STRICT_ALIGN)
108 #define FASTLZ_READU16(p) (*((const unsigned short *)(p)))
109 #else
110 #define FASTLZ_READU16(p) ((p)[0] | (p)[1]<<8)
111 #endif
112 
113 #define HASH_LOG  13
114 #define HASH_SIZE (1 << HASH_LOG)
115 #define HASH_MASK  (HASH_SIZE - 1)
116 #define HASH_FUNCTION(v, p) {\
117 				v = FASTLZ_READU16(p);\
118 				v ^= FASTLZ_READU16(p + 1)^\
119 				     (v>>(16 - HASH_LOG));\
120 				v &= HASH_MASK;\
121 			    }
122 
123 #undef FASTLZ_LEVEL
124 #define FASTLZ_LEVEL 1
125 
126 #undef FASTLZ_COMPRESSOR
127 #undef FASTLZ_DECOMPRESSOR
128 #define FASTLZ_COMPRESSOR fastlz1_compress
129 #define FASTLZ_DECOMPRESSOR fastlz1_decompress
130 static int FASTLZ_COMPRESSOR(const void *input, int length,
131 					   void *output);
132 static int FASTLZ_DECOMPRESSOR(const void *input, int length,
133 					     void *output, int maxout);
134 #include "fastlz.c"
135 
136 #undef FASTLZ_LEVEL
137 #define FASTLZ_LEVEL 2
138 
139 #undef MAX_DISTANCE
140 #define MAX_DISTANCE 8191
141 #define MAX_FARDISTANCE (65535 + MAX_DISTANCE - 1)
142 
143 #undef FASTLZ_COMPRESSOR
144 #undef FASTLZ_DECOMPRESSOR
145 #define FASTLZ_COMPRESSOR fastlz2_compress
146 #define FASTLZ_DECOMPRESSOR fastlz2_decompress
147 static int FASTLZ_COMPRESSOR(const void *input, int length,
148 					   void *output);
149 static int FASTLZ_DECOMPRESSOR(const void *input, int length,
150 					     void *output, int maxout);
151 #include "fastlz.c"
152 
153 int
154 fastlz_compress(const void *input, int length, void *output)
155 {
156 	/* for short block, choose fastlz1 */
157 	if (length < 65536)
158 		return fastlz1_compress(input, length, output);
159 
160 	/* else... */
161 	return fastlz2_compress(input, length, output);
162 }
163 
164 int
165 fastlz_decompress(const void *input, int length, void *output, int maxout)
166 {
167 	/* magic identifier for compression level */
168 	int level = ((*(const unsigned char *)input) >> 5) + 1;
169 
170 	if (level == 1)
171 		return fastlz1_decompress(input, length, output, maxout);
172 	if (level == 2)
173 		return fastlz2_decompress(input, length, output, maxout);
174 
175 	/* unknown level, trigger error */
176 	return 0;
177 }
178 
179 int
180 fastlz_compress_level(int level, const void *input, int length,
181 		      void *output)
182 {
183 	if (level == 1)
184 		return fastlz1_compress(input, length, output);
185 	if (level == 2)
186 		return fastlz2_compress(input, length, output);
187 
188 	return 0;
189 }
190 
191 #else /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
192 
193 
194 static int
195 FASTLZ_COMPRESSOR(const void *input, int length,
196 		  void *output)
197 {
198 	const unsigned char *ip = (const unsigned char *) input;
199 	const unsigned char *ip_bound = ip + length - 2;
200 	const unsigned char *ip_limit = ip + length - 12;
201 	unsigned char *op = (unsigned char *) output;
202 	static const unsigned char *g_htab[HASH_SIZE];
203 
204 	const unsigned char **htab = g_htab;
205 	const unsigned char **hslot;
206 	unsigned int hval;
207 
208 	unsigned int copy;
209 
210 	/* sanity check */
211 	if (FASTLZ_UNEXPECT_CONDITIONAL(length < 4)) {
212 		if (length) {
213 			/* create literal copy only */
214 			*op++ = length - 1;
215 			ip_bound++;
216 			while (ip <= ip_bound)
217 				*op++ = *ip++;
218 			return length + 1;
219 		} else
220 			return 0;
221 	}
222 
223 	/* initializes hash table */
224 	for (hslot = htab; hslot < htab + HASH_SIZE; hslot++)
225 		*hslot = ip;
226 
227 	/* we start with literal copy */
228 	copy = 2;
229 	*op++ = MAX_COPY - 1;
230 	*op++ = *ip++;
231 	*op++ = *ip++;
232 
233 	/* main loop */
234 	while (FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) {
235 		const unsigned char *ref;
236 		unsigned int distance;
237 
238 		/* minimum match length */
239 		unsigned int len = 3;
240 
241 		/* comparison starting-point */
242 		const unsigned char *anchor = ip;
243 
244 		/* check for a run */
245 #if FASTLZ_LEVEL == 2
246 		if (ip[0] == ip[-1] &&
247 		    FASTLZ_READU16(ip - 1) == FASTLZ_READU16(ip + 1)) {
248 			distance = 1;
249 			ip += 3;
250 			ref = anchor - 1 + 3;
251 			goto match;
252 		}
253 #endif
254 
255 		/* find potential match */
256 		HASH_FUNCTION(hval, ip);
257 		hslot = htab + hval;
258 		ref = htab[hval];
259 
260 		/* calculate distance to the match */
261 		distance = anchor - ref;
262 
263 		/* update hash table */
264 		*hslot = anchor;
265 
266 		if (!ref)
267 			goto literal;
268 		/* is this a match? check the first 3 bytes */
269 		if (distance == 0 ||
270 #if FASTLZ_LEVEL == 1
271 				(distance >= MAX_DISTANCE) ||
272 #else
273 				(distance >= MAX_FARDISTANCE) ||
274 #endif
275 				*ref++ != *ip++ || *ref++ != *ip++ ||
276 				*ref++ != *ip++)
277 			goto literal;
278 
279 #if FASTLZ_LEVEL == 2
280 		/* far, needs at least 5-byte match */
281 		if (distance >= MAX_DISTANCE) {
282 			if (*ip++ != *ref++ || *ip++ != *ref++)
283 				goto literal;
284 			len += 2;
285 		}
286 
287 match:
288 #endif
289 
290 		/* last matched byte */
291 		ip = anchor + len;
292 
293 		/* distance is biased */
294 		distance--;
295 
296 		if (!distance) {
297 			/* zero distance means a run */
298 			unsigned char x = ip[-1];
299 			while (ip < ip_bound)
300 				if (*ref++ != x)
301 					break;
302 				else
303 					ip++;
304 		} else
305 			for (;;) {
306 				/* safe because the outer check
307 				 * against ip limit */
308 				if (*ref++ != *ip++)
309 					break;
310 				if (*ref++ != *ip++)
311 					break;
312 				if (*ref++ != *ip++)
313 					break;
314 				if (*ref++ != *ip++)
315 					break;
316 				if (*ref++ != *ip++)
317 					break;
318 				if (*ref++ != *ip++)
319 					break;
320 				if (*ref++ != *ip++)
321 					break;
322 				if (*ref++ != *ip++)
323 					break;
324 				while (ip < ip_bound)
325 					if (*ref++ != *ip++)
326 						break;
327 				break;
328 			}
329 
330 		/* if we have copied something, adjust the copy count */
331 		if (copy)
332 			/* copy is biased, '0' means 1 byte copy */
333 			*(op - copy - 1) = copy - 1;
334 		else
335 			/* back, to overwrite the copy count */
336 			op--;
337 
338 		/* reset literal counter */
339 		copy = 0;
340 
341 		/* length is biased, '1' means a match of 3 bytes */
342 		ip -= 3;
343 		len = ip - anchor;
344 
345 		/* encode the match */
346 #if FASTLZ_LEVEL == 2
347 		if (distance < MAX_DISTANCE) {
348 			if (len < 7) {
349 				*op++ = (len << 5) + (distance >> 8);
350 				*op++ = (distance & 255);
351 			} else {
352 				*op++ = (7 << 5) + (distance >> 8);
353 				for (len -= 7; len >= 255; len -= 255)
354 					*op++ = 255;
355 				*op++ = len;
356 				*op++ = (distance & 255);
357 			}
358 		} else {
359 			/* far away, but not yet in the another galaxy... */
360 			if (len < 7) {
361 				distance -= MAX_DISTANCE;
362 				*op++ = (len << 5) + 31;
363 				*op++ = 255;
364 				*op++ = distance >> 8;
365 				*op++ = distance & 255;
366 			} else {
367 				distance -= MAX_DISTANCE;
368 				*op++ = (7 << 5) + 31;
369 				for (len -= 7; len >= 255; len -= 255)
370 					*op++ = 255;
371 				*op++ = len;
372 				*op++ = 255;
373 				*op++ = distance >> 8;
374 				*op++ = distance & 255;
375 			}
376 		}
377 #else
378 
379 		if (FASTLZ_UNEXPECT_CONDITIONAL(len > MAX_LEN - 2))
380 			while (len > MAX_LEN - 2) {
381 				*op++ = (7 << 5) + (distance >> 8);
382 				*op++ = MAX_LEN - 2 - 7 - 2;
383 				*op++ = (distance & 255);
384 				len -= MAX_LEN - 2;
385 			}
386 
387 		if (len < 7) {
388 			*op++ = (len << 5) + (distance >> 8);
389 			*op++ = (distance & 255);
390 		} else {
391 			*op++ = (7 << 5) + (distance >> 8);
392 			*op++ = len - 7;
393 			*op++ = (distance & 255);
394 		}
395 #endif
396 
397 		/* update the hash at match boundary */
398 		HASH_FUNCTION(hval, ip);
399 		htab[hval] = ip++;
400 		HASH_FUNCTION(hval, ip);
401 		htab[hval] = ip++;
402 
403 		/* assuming literal copy */
404 		*op++ = MAX_COPY - 1;
405 
406 		continue;
407 
408 literal:
409 		*op++ = *anchor++;
410 		ip = anchor;
411 		copy++;
412 		if (FASTLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) {
413 			copy = 0;
414 			*op++ = MAX_COPY - 1;
415 		}
416 	}
417 
418 	/* left-over as literal copy */
419 	ip_bound++;
420 	while (ip <= ip_bound) {
421 		*op++ = *ip++;
422 		copy++;
423 		if (copy == MAX_COPY) {
424 			copy = 0;
425 			*op++ = MAX_COPY - 1;
426 		}
427 	}
428 
429 	/* if we have copied something, adjust the copy length */
430 	if (copy)
431 		*(op - copy - 1) = copy - 1;
432 	else
433 		op--;
434 
435 #if FASTLZ_LEVEL == 2
436 	/* marker for fastlz2 */
437 	*(unsigned char *)output |= (1 << 5);
438 #endif
439 
440 	return op - (unsigned char *)output;
441 }
442 
443 static int
444 FASTLZ_DECOMPRESSOR(const void *input, int length,
445 		    void *output, int maxout)
446 {
447 	const unsigned char *ip = (const unsigned char *) input;
448 	const unsigned char *ip_limit  = ip + length;
449 	unsigned char *op = (unsigned char *) output;
450 	unsigned char *op_limit = op + maxout;
451 	unsigned int ctrl = (*ip++) & 31;
452 	int loop = 1;
453 
454 	do {
455 		const unsigned char *ref = op;
456 #ifndef __CHECKER__
457 		unsigned int len = ctrl >> 5;
458 #endif
459 		unsigned int ofs = (ctrl & 31) << 8;
460 
461 		if (ctrl >= 32) {
462 #if FASTLZ_LEVEL == 2
463 			unsigned char code;
464 #endif
465 			len--;
466 			ref -= ofs;
467 #ifndef __CHECKER__
468 			if (len == 7 - 1)
469 #if FASTLZ_LEVEL == 1
470 				len += *ip++;
471 			ref -= *ip++;
472 #else
473 			do {
474 				code = *ip++;
475 				len += code;
476 			} while (code == 255);
477 			code = *ip++;
478 			ref -= code;
479 
480 			/* match from 16-bit distance */
481 			if (FASTLZ_UNEXPECT_CONDITIONAL(code == 255))
482 				if (FASTLZ_EXPECT_CONDITIONAL(ofs ==
483 							      (31 << 8))) {
484 					ofs = (*ip++) << 8;
485 					ofs += *ip++;
486 					ref = op - ofs - MAX_DISTANCE;
487 				}
488 #endif
489 
490 #ifdef FASTLZ_SAFE
491 			if (FASTLZ_UNEXPECT_CONDITIONAL(op + len + 3 >
492 							op_limit))
493 				return 0;
494 
495 			if (FASTLZ_UNEXPECT_CONDITIONAL(ref - 1 <
496 							(unsigned char *)output)
497 						       )
498 				return 0;
499 #endif
500 
501 			if (FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit))
502 				ctrl = *ip++;
503 			else
504 				loop = 0;
505 
506 			if (ref == op) {
507 				/* optimize copy for a run */
508 				unsigned char b = ref[-1];
509 				*op++ = b;
510 				*op++ = b;
511 				*op++ = b;
512 				for (; len; --len)
513 					*op++ = b;
514 			} else {
515 #if !defined(FASTLZ_STRICT_ALIGN)
516 				const unsigned short *p;
517 				unsigned short *q;
518 #endif
519 				/* copy from reference */
520 				ref--;
521 				*op++ = *ref++;
522 				*op++ = *ref++;
523 				*op++ = *ref++;
524 
525 #if !defined(FASTLZ_STRICT_ALIGN)
526 				/* copy a byte, so that now it's word aligned */
527 				if (len & 1) {
528 					*op++ = *ref++;
529 					len--;
530 				}
531 
532 				/* copy 16-bit at once */
533 				q = (unsigned short *) op;
534 				op += len;
535 				p = (const unsigned short *) ref;
536 				for (len >>= 1; len > 4; len -= 4) {
537 					*q++ = *p++;
538 					*q++ = *p++;
539 					*q++ = *p++;
540 					*q++ = *p++;
541 				}
542 				for (; len; --len)
543 					*q++ = *p++;
544 #else
545 				for (; len; --len)
546 					*op++ = *ref++;
547 #endif
548 			}
549 #endif /* __CHECKER__ */
550 		} else {
551 			ctrl++;
552 #ifdef FASTLZ_SAFE
553 			if (FASTLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit))
554 				return 0;
555 			if (FASTLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit))
556 				return 0;
557 #endif
558 
559 			*op++ = *ip++;
560 			for (--ctrl; ctrl; ctrl--)
561 				*op++ = *ip++;
562 
563 			loop = FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit);
564 			if (loop)
565 				ctrl = *ip++;
566 		}
567 	} while (FASTLZ_EXPECT_CONDITIONAL(loop));
568 
569 	return op - (unsigned char *)output;
570 }
571 
572 #endif /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
573