1 /* 2 FastLZ - lightning-fast lossless compression library 3 4 Copyright (C) 2007 Ariya Hidayat (ariya@kde.org) 5 Copyright (C) 2006 Ariya Hidayat (ariya@kde.org) 6 Copyright (C) 2005 Ariya Hidayat (ariya@kde.org) 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy 9 of this software and associated documentation files (the "Software"), to deal 10 in the Software without restriction, including without limitation the rights 11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 copies of the Software, and to permit persons to whom the Software is 13 furnished to do so, subject to the following conditions: 14 15 The above copyright notice and this permission notice shall be included in 16 all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 THE SOFTWARE. 25 */ 26 #include <sys/cdefs.h> 27 __FBSDID("$FreeBSD$"); 28 29 #include "osdep.h" 30 #include "fastlz.h" 31 32 #if !defined(FASTLZ__COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) 33 34 /* 35 * Always check for bound when decompressing. 36 * Generally it is best to leave it defined. 37 */ 38 #define FASTLZ_SAFE 39 40 #if defined(WIN32) || defined(__NT__) || defined(_WIN32) || defined(__WIN32__) 41 #if defined(_MSC_VER) || defined(__GNUC__) 42 /* #include <windows.h> */ 43 #pragma warning(disable : 4242) 44 #pragma warning(disable : 4244) 45 #endif 46 #endif 47 48 /* 49 * Give hints to the compiler for branch prediction optimization. 50 */ 51 #if defined(__GNUC__) && (__GNUC__ > 2) 52 #define FASTLZ_EXPECT_CONDITIONAL(c) (__builtin_expect((c), 1)) 53 #define FASTLZ_UNEXPECT_CONDITIONAL(c) (__builtin_expect((c), 0)) 54 #else 55 #define FASTLZ_EXPECT_CONDITIONAL(c) (c) 56 #define FASTLZ_UNEXPECT_CONDITIONAL(c) (c) 57 #endif 58 59 /* 60 * Use inlined functions for supported systems. 61 */ 62 #if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) ||\ 63 defined(__WATCOMC__) || defined(__SUNPRO_C) 64 #define FASTLZ_INLINE inline 65 #elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__) 66 #define FASTLZ_INLINE __inline 67 #else 68 #define FASTLZ_INLINE 69 #endif 70 71 /* 72 * Prevent accessing more than 8-bit at once, except on x86 architectures. 73 */ 74 #if !defined(FASTLZ_STRICT_ALIGN) 75 #define FASTLZ_STRICT_ALIGN 76 #if defined(__i386__) || defined(__386) /* GNU C, Sun Studio */ 77 #undef FASTLZ_STRICT_ALIGN 78 #elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */ 79 #undef FASTLZ_STRICT_ALIGN 80 #elif defined(_M_IX86) /* Intel, MSVC */ 81 #undef FASTLZ_STRICT_ALIGN 82 #elif defined(__386) 83 #undef FASTLZ_STRICT_ALIGN 84 #elif defined(_X86_) /* MinGW */ 85 #undef FASTLZ_STRICT_ALIGN 86 #elif defined(__I86__) /* Digital Mars */ 87 #undef FASTLZ_STRICT_ALIGN 88 #endif 89 #endif 90 91 /* 92 * FIXME: use preprocessor magic to set this on different platforms! 93 */ 94 95 #define MAX_COPY 32 96 #define MAX_LEN 264 /* 256 + 8 */ 97 #define MAX_DISTANCE 8192 98 99 #if !defined(FASTLZ_STRICT_ALIGN) 100 #define FASTLZ_READU16(p) (*((const unsigned short *)(p))) 101 #else 102 #define FASTLZ_READU16(p) ((p)[0] | (p)[1]<<8) 103 #endif 104 105 #define HASH_LOG 13 106 #define HASH_SIZE (1 << HASH_LOG) 107 #define HASH_MASK (HASH_SIZE - 1) 108 #define HASH_FUNCTION(v, p) {\ 109 v = FASTLZ_READU16(p);\ 110 v ^= FASTLZ_READU16(p + 1)^\ 111 (v>>(16 - HASH_LOG));\ 112 v &= HASH_MASK;\ 113 } 114 115 #undef FASTLZ_LEVEL 116 #define FASTLZ_LEVEL 1 117 118 #undef FASTLZ_COMPRESSOR 119 #undef FASTLZ_DECOMPRESSOR 120 #define FASTLZ_COMPRESSOR fastlz1_compress 121 #define FASTLZ_DECOMPRESSOR fastlz1_decompress 122 static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void *input, int length, 123 void *output); 124 static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void *input, int length, 125 void *output, int maxout); 126 #include "fastlz.c" 127 128 #undef FASTLZ_LEVEL 129 #define FASTLZ_LEVEL 2 130 131 #undef MAX_DISTANCE 132 #define MAX_DISTANCE 8191 133 #define MAX_FARDISTANCE (65535 + MAX_DISTANCE - 1) 134 135 #undef FASTLZ_COMPRESSOR 136 #undef FASTLZ_DECOMPRESSOR 137 #define FASTLZ_COMPRESSOR fastlz2_compress 138 #define FASTLZ_DECOMPRESSOR fastlz2_decompress 139 static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void *input, int length, 140 void *output); 141 static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void *input, int length, 142 void *output, int maxout); 143 #include "fastlz.c" 144 145 int fastlz_compress(const void *input, int length, void *output) 146 { 147 /* for short block, choose fastlz1 */ 148 if (length < 65536) 149 return fastlz1_compress(input, length, output); 150 151 /* else... */ 152 return fastlz2_compress(input, length, output); 153 } 154 155 int fastlz_decompress(const void *input, int length, void *output, int maxout) 156 { 157 /* magic identifier for compression level */ 158 int level = ((*(const unsigned char *)input) >> 5) + 1; 159 160 if (level == 1) 161 return fastlz1_decompress(input, length, output, maxout); 162 if (level == 2) 163 return fastlz2_decompress(input, length, output, maxout); 164 165 /* unknown level, trigger error */ 166 return 0; 167 } 168 169 int fastlz_compress_level(int level, const void *input, int length, 170 void *output) 171 { 172 if (level == 1) 173 return fastlz1_compress(input, length, output); 174 if (level == 2) 175 return fastlz2_compress(input, length, output); 176 177 return 0; 178 } 179 180 #else /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */ 181 182 183 static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void *input, int length, 184 void *output) 185 { 186 const unsigned char *ip = (const unsigned char *) input; 187 const unsigned char *ip_bound = ip + length - 2; 188 const unsigned char *ip_limit = ip + length - 12; 189 unsigned char *op = (unsigned char *) output; 190 static const unsigned char *g_htab[HASH_SIZE]; 191 192 const unsigned char **htab = g_htab; 193 const unsigned char **hslot; 194 unsigned int hval; 195 196 unsigned int copy; 197 198 /* sanity check */ 199 if (FASTLZ_UNEXPECT_CONDITIONAL(length < 4)) { 200 if (length) { 201 /* create literal copy only */ 202 *op++ = length - 1; 203 ip_bound++; 204 while (ip <= ip_bound) 205 *op++ = *ip++; 206 return length + 1; 207 } else 208 return 0; 209 } 210 211 /* initializes hash table */ 212 for (hslot = htab; hslot < htab + HASH_SIZE; hslot++) 213 *hslot = ip; 214 215 /* we start with literal copy */ 216 copy = 2; 217 *op++ = MAX_COPY - 1; 218 *op++ = *ip++; 219 *op++ = *ip++; 220 221 /* main loop */ 222 while (FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) { 223 const unsigned char *ref; 224 unsigned int distance; 225 226 /* minimum match length */ 227 unsigned int len = 3; 228 229 /* comparison starting-point */ 230 const unsigned char *anchor = ip; 231 232 /* check for a run */ 233 #if FASTLZ_LEVEL == 2 234 if (ip[0] == ip[-1] && 235 FASTLZ_READU16(ip - 1) == FASTLZ_READU16(ip + 1)) { 236 distance = 1; 237 ip += 3; 238 ref = anchor - 1 + 3; 239 goto match; 240 } 241 #endif 242 243 /* find potential match */ 244 HASH_FUNCTION(hval, ip); 245 hslot = htab + hval; 246 ref = htab[hval]; 247 248 /* calculate distance to the match */ 249 distance = anchor - ref; 250 251 /* update hash table */ 252 *hslot = anchor; 253 254 if (!ref) 255 goto literal; 256 /* is this a match? check the first 3 bytes */ 257 if (distance == 0 || 258 #if FASTLZ_LEVEL == 1 259 (distance >= MAX_DISTANCE) || 260 #else 261 (distance >= MAX_FARDISTANCE) || 262 #endif 263 *ref++ != *ip++ || *ref++ != *ip++ || 264 *ref++ != *ip++) 265 goto literal; 266 267 #if FASTLZ_LEVEL == 2 268 /* far, needs at least 5-byte match */ 269 if (distance >= MAX_DISTANCE) { 270 if (*ip++ != *ref++ || *ip++ != *ref++) 271 goto literal; 272 len += 2; 273 } 274 275 match: 276 #endif 277 278 /* last matched byte */ 279 ip = anchor + len; 280 281 /* distance is biased */ 282 distance--; 283 284 if (!distance) { 285 /* zero distance means a run */ 286 unsigned char x = ip[-1]; 287 while (ip < ip_bound) 288 if (*ref++ != x) 289 break; 290 else 291 ip++; 292 } else 293 for (;;) { 294 /* safe because the outer check 295 * against ip limit */ 296 if (*ref++ != *ip++) 297 break; 298 if (*ref++ != *ip++) 299 break; 300 if (*ref++ != *ip++) 301 break; 302 if (*ref++ != *ip++) 303 break; 304 if (*ref++ != *ip++) 305 break; 306 if (*ref++ != *ip++) 307 break; 308 if (*ref++ != *ip++) 309 break; 310 if (*ref++ != *ip++) 311 break; 312 while (ip < ip_bound) 313 if (*ref++ != *ip++) 314 break; 315 break; 316 } 317 318 /* if we have copied something, adjust the copy count */ 319 if (copy) 320 /* copy is biased, '0' means 1 byte copy */ 321 *(op - copy - 1) = copy - 1; 322 else 323 /* back, to overwrite the copy count */ 324 op--; 325 326 /* reset literal counter */ 327 copy = 0; 328 329 /* length is biased, '1' means a match of 3 bytes */ 330 ip -= 3; 331 len = ip - anchor; 332 333 /* encode the match */ 334 #if FASTLZ_LEVEL == 2 335 if (distance < MAX_DISTANCE) { 336 if (len < 7) { 337 *op++ = (len << 5) + (distance >> 8); 338 *op++ = (distance & 255); 339 } else { 340 *op++ = (7 << 5) + (distance >> 8); 341 for (len -= 7; len >= 255; len -= 255) 342 *op++ = 255; 343 *op++ = len; 344 *op++ = (distance & 255); 345 } 346 } else { 347 /* far away, but not yet in the another galaxy... */ 348 if (len < 7) { 349 distance -= MAX_DISTANCE; 350 *op++ = (len << 5) + 31; 351 *op++ = 255; 352 *op++ = distance >> 8; 353 *op++ = distance & 255; 354 } else { 355 distance -= MAX_DISTANCE; 356 *op++ = (7 << 5) + 31; 357 for (len -= 7; len >= 255; len -= 255) 358 *op++ = 255; 359 *op++ = len; 360 *op++ = 255; 361 *op++ = distance >> 8; 362 *op++ = distance & 255; 363 } 364 } 365 #else 366 367 if (FASTLZ_UNEXPECT_CONDITIONAL(len > MAX_LEN - 2)) 368 while (len > MAX_LEN - 2) { 369 *op++ = (7 << 5) + (distance >> 8); 370 *op++ = MAX_LEN - 2 - 7 - 2; 371 *op++ = (distance & 255); 372 len -= MAX_LEN - 2; 373 } 374 375 if (len < 7) { 376 *op++ = (len << 5) + (distance >> 8); 377 *op++ = (distance & 255); 378 } else { 379 *op++ = (7 << 5) + (distance >> 8); 380 *op++ = len - 7; 381 *op++ = (distance & 255); 382 } 383 #endif 384 385 /* update the hash at match boundary */ 386 HASH_FUNCTION(hval, ip); 387 htab[hval] = ip++; 388 HASH_FUNCTION(hval, ip); 389 htab[hval] = ip++; 390 391 /* assuming literal copy */ 392 *op++ = MAX_COPY - 1; 393 394 continue; 395 396 literal: 397 *op++ = *anchor++; 398 ip = anchor; 399 copy++; 400 if (FASTLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) { 401 copy = 0; 402 *op++ = MAX_COPY - 1; 403 } 404 } 405 406 /* left-over as literal copy */ 407 ip_bound++; 408 while (ip <= ip_bound) { 409 *op++ = *ip++; 410 copy++; 411 if (copy == MAX_COPY) { 412 copy = 0; 413 *op++ = MAX_COPY - 1; 414 } 415 } 416 417 /* if we have copied something, adjust the copy length */ 418 if (copy) 419 *(op - copy - 1) = copy - 1; 420 else 421 op--; 422 423 #if FASTLZ_LEVEL == 2 424 /* marker for fastlz2 */ 425 *(unsigned char *)output |= (1 << 5); 426 #endif 427 428 return op - (unsigned char *)output; 429 } 430 431 static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void *input, int length, 432 void *output, int maxout) 433 { 434 const unsigned char *ip = (const unsigned char *) input; 435 const unsigned char *ip_limit = ip + length; 436 unsigned char *op = (unsigned char *) output; 437 unsigned char *op_limit = op + maxout; 438 unsigned int ctrl = (*ip++) & 31; 439 int loop = 1; 440 441 do { 442 const unsigned char *ref = op; 443 unsigned int len = ctrl >> 5; 444 unsigned int ofs = (ctrl & 31) << 8; 445 446 if (ctrl >= 32) { 447 #if FASTLZ_LEVEL == 2 448 unsigned char code; 449 #endif 450 len--; 451 ref -= ofs; 452 if (len == 7 - 1) 453 #if FASTLZ_LEVEL == 1 454 len += *ip++; 455 ref -= *ip++; 456 #else 457 do { 458 code = *ip++; 459 len += code; 460 } while (code == 255); 461 code = *ip++; 462 ref -= code; 463 464 /* match from 16-bit distance */ 465 if (FASTLZ_UNEXPECT_CONDITIONAL(code == 255)) 466 if (FASTLZ_EXPECT_CONDITIONAL(ofs == 467 (31 << 8))) { 468 ofs = (*ip++) << 8; 469 ofs += *ip++; 470 ref = op - ofs - MAX_DISTANCE; 471 } 472 #endif 473 474 #ifdef FASTLZ_SAFE 475 if (FASTLZ_UNEXPECT_CONDITIONAL(op + len + 3 > 476 op_limit)) 477 return 0; 478 479 if (FASTLZ_UNEXPECT_CONDITIONAL(ref - 1 < 480 (unsigned char *)output) 481 ) 482 return 0; 483 #endif 484 485 if (FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) 486 ctrl = *ip++; 487 else 488 loop = 0; 489 490 if (ref == op) { 491 /* optimize copy for a run */ 492 unsigned char b = ref[-1]; 493 *op++ = b; 494 *op++ = b; 495 *op++ = b; 496 for (; len; --len) 497 *op++ = b; 498 } else { 499 #if !defined(FASTLZ_STRICT_ALIGN) 500 const unsigned short *p; 501 unsigned short *q; 502 #endif 503 /* copy from reference */ 504 ref--; 505 *op++ = *ref++; 506 *op++ = *ref++; 507 *op++ = *ref++; 508 509 #if !defined(FASTLZ_STRICT_ALIGN) 510 /* copy a byte, so that now it's word aligned */ 511 if (len & 1) { 512 *op++ = *ref++; 513 len--; 514 } 515 516 /* copy 16-bit at once */ 517 q = (unsigned short *) op; 518 op += len; 519 p = (const unsigned short *) ref; 520 for (len >>= 1; len > 4; len -= 4) { 521 *q++ = *p++; 522 *q++ = *p++; 523 *q++ = *p++; 524 *q++ = *p++; 525 } 526 for (; len; --len) 527 *q++ = *p++; 528 #else 529 for (; len; --len) 530 *op++ = *ref++; 531 #endif 532 } 533 } else { 534 ctrl++; 535 #ifdef FASTLZ_SAFE 536 if (FASTLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit)) 537 return 0; 538 if (FASTLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit)) 539 return 0; 540 #endif 541 542 *op++ = *ip++; 543 for (--ctrl; ctrl; ctrl--) 544 *op++ = *ip++; 545 546 loop = FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit); 547 if (loop) 548 ctrl = *ip++; 549 } 550 } while (FASTLZ_EXPECT_CONDITIONAL(loop)); 551 552 return op - (unsigned char *)output; 553 } 554 555 #endif /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */ 556