1 2 /*-------------------------------------------------------------*/ 3 /*--- Decompression machinery ---*/ 4 /*--- decompress.c ---*/ 5 /*-------------------------------------------------------------*/ 6 7 /* ------------------------------------------------------------------ 8 This file is part of bzip2/libbzip2, a program and library for 9 lossless, block-sorting data compression. 10 11 bzip2/libbzip2 version 1.0.5 of 10 December 2007 12 Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org> 13 14 Please read the WARNING, DISCLAIMER and PATENTS sections in the 15 README file. 16 17 This program is released under the terms of the license contained 18 in the file LICENSE. 19 ------------------------------------------------------------------ */ 20 21 22 #include "bzlib_private.h" 23 24 25 /*---------------------------------------------------*/ 26 static 27 void makeMaps_d ( DState* s ) 28 { 29 Int32 i; 30 s->nInUse = 0; 31 for (i = 0; i < 256; i++) 32 if (s->inUse[i]) { 33 s->seqToUnseq[s->nInUse] = i; 34 s->nInUse++; 35 } 36 } 37 38 39 /*---------------------------------------------------*/ 40 #define RETURN(rrr) \ 41 { retVal = rrr; goto save_state_and_return; } 42 43 #define GET_BITS(lll,vvv,nnn) \ 44 case lll: s->state = lll; \ 45 while (True) { \ 46 if (s->bsLive >= nnn) { \ 47 UInt32 v; \ 48 v = (s->bsBuff >> \ 49 (s->bsLive-nnn)) & ((1 << nnn)-1); \ 50 s->bsLive -= nnn; \ 51 vvv = v; \ 52 break; \ 53 } \ 54 if (s->strm->avail_in == 0) RETURN(BZ_OK); \ 55 s->bsBuff \ 56 = (s->bsBuff << 8) | \ 57 ((UInt32) \ 58 (*((UChar*)(s->strm->next_in)))); \ 59 s->bsLive += 8; \ 60 s->strm->next_in++; \ 61 s->strm->avail_in--; \ 62 s->strm->total_in_lo32++; \ 63 if (s->strm->total_in_lo32 == 0) \ 64 s->strm->total_in_hi32++; \ 65 } 66 67 #define GET_UCHAR(lll,uuu) \ 68 GET_BITS(lll,uuu,8) 69 70 #define GET_BIT(lll,uuu) \ 71 GET_BITS(lll,uuu,1) 72 73 /*---------------------------------------------------*/ 74 #define GET_MTF_VAL(label1,label2,lval) \ 75 { \ 76 if (groupPos == 0) { \ 77 groupNo++; \ 78 if (groupNo >= nSelectors) \ 79 RETURN(BZ_DATA_ERROR); \ 80 groupPos = BZ_G_SIZE; \ 81 gSel = s->selector[groupNo]; \ 82 gMinlen = s->minLens[gSel]; \ 83 gLimit = &(s->limit[gSel][0]); \ 84 gPerm = &(s->perm[gSel][0]); \ 85 gBase = &(s->base[gSel][0]); \ 86 } \ 87 groupPos--; \ 88 zn = gMinlen; \ 89 GET_BITS(label1, zvec, zn); \ 90 while (1) { \ 91 if (zn > 20 /* the longest code */) \ 92 RETURN(BZ_DATA_ERROR); \ 93 if (zvec <= gLimit[zn]) break; \ 94 zn++; \ 95 GET_BIT(label2, zj); \ 96 zvec = (zvec << 1) | zj; \ 97 }; \ 98 if (zvec - gBase[zn] < 0 \ 99 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \ 100 RETURN(BZ_DATA_ERROR); \ 101 lval = gPerm[zvec - gBase[zn]]; \ 102 } 103 104 105 /*---------------------------------------------------*/ 106 Int32 BZ2_decompress ( DState* s ) 107 { 108 UChar uc; 109 Int32 retVal; 110 Int32 minLen, maxLen; 111 bz_stream* strm = s->strm; 112 113 /* stuff that needs to be saved/restored */ 114 Int32 i; 115 Int32 j; 116 Int32 t; 117 Int32 alphaSize; 118 Int32 nGroups; 119 Int32 nSelectors; 120 Int32 EOB; 121 Int32 groupNo; 122 Int32 groupPos; 123 Int32 nextSym; 124 Int32 nblockMAX; 125 Int32 nblock; 126 Int32 es; 127 Int32 N; 128 Int32 curr; 129 Int32 zt; 130 Int32 zn; 131 Int32 zvec; 132 Int32 zj; 133 Int32 gSel; 134 Int32 gMinlen; 135 Int32* gLimit; 136 Int32* gBase; 137 Int32* gPerm; 138 139 if (s->state == BZ_X_MAGIC_1) { 140 /*initialise the save area*/ 141 s->save_i = 0; 142 s->save_j = 0; 143 s->save_t = 0; 144 s->save_alphaSize = 0; 145 s->save_nGroups = 0; 146 s->save_nSelectors = 0; 147 s->save_EOB = 0; 148 s->save_groupNo = 0; 149 s->save_groupPos = 0; 150 s->save_nextSym = 0; 151 s->save_nblockMAX = 0; 152 s->save_nblock = 0; 153 s->save_es = 0; 154 s->save_N = 0; 155 s->save_curr = 0; 156 s->save_zt = 0; 157 s->save_zn = 0; 158 s->save_zvec = 0; 159 s->save_zj = 0; 160 s->save_gSel = 0; 161 s->save_gMinlen = 0; 162 s->save_gLimit = NULL; 163 s->save_gBase = NULL; 164 s->save_gPerm = NULL; 165 } 166 167 /*restore from the save area*/ 168 i = s->save_i; 169 j = s->save_j; 170 t = s->save_t; 171 alphaSize = s->save_alphaSize; 172 nGroups = s->save_nGroups; 173 nSelectors = s->save_nSelectors; 174 EOB = s->save_EOB; 175 groupNo = s->save_groupNo; 176 groupPos = s->save_groupPos; 177 nextSym = s->save_nextSym; 178 nblockMAX = s->save_nblockMAX; 179 nblock = s->save_nblock; 180 es = s->save_es; 181 N = s->save_N; 182 curr = s->save_curr; 183 zt = s->save_zt; 184 zn = s->save_zn; 185 zvec = s->save_zvec; 186 zj = s->save_zj; 187 gSel = s->save_gSel; 188 gMinlen = s->save_gMinlen; 189 gLimit = s->save_gLimit; 190 gBase = s->save_gBase; 191 gPerm = s->save_gPerm; 192 193 retVal = BZ_OK; 194 195 switch (s->state) { 196 197 GET_UCHAR(BZ_X_MAGIC_1, uc); 198 if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC); 199 200 GET_UCHAR(BZ_X_MAGIC_2, uc); 201 if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC); 202 203 GET_UCHAR(BZ_X_MAGIC_3, uc) 204 if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC); 205 206 GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8) 207 if (s->blockSize100k < (BZ_HDR_0 + 1) || 208 s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC); 209 s->blockSize100k -= BZ_HDR_0; 210 211 if (s->smallDecompress) { 212 s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) ); 213 s->ll4 = BZALLOC( 214 ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar) 215 ); 216 if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR); 217 } else { 218 s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) ); 219 if (s->tt == NULL) RETURN(BZ_MEM_ERROR); 220 } 221 222 GET_UCHAR(BZ_X_BLKHDR_1, uc); 223 224 if (uc == 0x17) goto endhdr_2; 225 if (uc != 0x31) RETURN(BZ_DATA_ERROR); 226 GET_UCHAR(BZ_X_BLKHDR_2, uc); 227 if (uc != 0x41) RETURN(BZ_DATA_ERROR); 228 GET_UCHAR(BZ_X_BLKHDR_3, uc); 229 if (uc != 0x59) RETURN(BZ_DATA_ERROR); 230 GET_UCHAR(BZ_X_BLKHDR_4, uc); 231 if (uc != 0x26) RETURN(BZ_DATA_ERROR); 232 GET_UCHAR(BZ_X_BLKHDR_5, uc); 233 if (uc != 0x53) RETURN(BZ_DATA_ERROR); 234 GET_UCHAR(BZ_X_BLKHDR_6, uc); 235 if (uc != 0x59) RETURN(BZ_DATA_ERROR); 236 237 s->currBlockNo++; 238 if (s->verbosity >= 2) 239 VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo ); 240 241 s->storedBlockCRC = 0; 242 GET_UCHAR(BZ_X_BCRC_1, uc); 243 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 244 GET_UCHAR(BZ_X_BCRC_2, uc); 245 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 246 GET_UCHAR(BZ_X_BCRC_3, uc); 247 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 248 GET_UCHAR(BZ_X_BCRC_4, uc); 249 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 250 251 GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1); 252 253 s->origPtr = 0; 254 GET_UCHAR(BZ_X_ORIGPTR_1, uc); 255 s->origPtr = (s->origPtr << 8) | ((Int32)uc); 256 GET_UCHAR(BZ_X_ORIGPTR_2, uc); 257 s->origPtr = (s->origPtr << 8) | ((Int32)uc); 258 GET_UCHAR(BZ_X_ORIGPTR_3, uc); 259 s->origPtr = (s->origPtr << 8) | ((Int32)uc); 260 261 if (s->origPtr < 0) 262 RETURN(BZ_DATA_ERROR); 263 if (s->origPtr > 10 + 100000*s->blockSize100k) 264 RETURN(BZ_DATA_ERROR); 265 266 /*--- Receive the mapping table ---*/ 267 for (i = 0; i < 16; i++) { 268 GET_BIT(BZ_X_MAPPING_1, uc); 269 if (uc == 1) 270 s->inUse16[i] = True; else 271 s->inUse16[i] = False; 272 } 273 274 for (i = 0; i < 256; i++) s->inUse[i] = False; 275 276 for (i = 0; i < 16; i++) 277 if (s->inUse16[i]) 278 for (j = 0; j < 16; j++) { 279 GET_BIT(BZ_X_MAPPING_2, uc); 280 if (uc == 1) s->inUse[i * 16 + j] = True; 281 } 282 makeMaps_d ( s ); 283 if (s->nInUse == 0) RETURN(BZ_DATA_ERROR); 284 alphaSize = s->nInUse+2; 285 286 /*--- Now the selectors ---*/ 287 GET_BITS(BZ_X_SELECTOR_1, nGroups, 3); 288 if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR); 289 GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15); 290 if (nSelectors < 1) RETURN(BZ_DATA_ERROR); 291 for (i = 0; i < nSelectors; i++) { 292 j = 0; 293 while (True) { 294 GET_BIT(BZ_X_SELECTOR_3, uc); 295 if (uc == 0) break; 296 j++; 297 if (j >= nGroups) RETURN(BZ_DATA_ERROR); 298 } 299 s->selectorMtf[i] = j; 300 } 301 302 /*--- Undo the MTF values for the selectors. ---*/ 303 { 304 UChar pos[BZ_N_GROUPS], tmp, v; 305 for (v = 0; v < nGroups; v++) pos[v] = v; 306 307 for (i = 0; i < nSelectors; i++) { 308 v = s->selectorMtf[i]; 309 tmp = pos[v]; 310 while (v > 0) { pos[v] = pos[v-1]; v--; } 311 pos[0] = tmp; 312 s->selector[i] = tmp; 313 } 314 } 315 316 /*--- Now the coding tables ---*/ 317 for (t = 0; t < nGroups; t++) { 318 GET_BITS(BZ_X_CODING_1, curr, 5); 319 for (i = 0; i < alphaSize; i++) { 320 while (True) { 321 if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR); 322 GET_BIT(BZ_X_CODING_2, uc); 323 if (uc == 0) break; 324 GET_BIT(BZ_X_CODING_3, uc); 325 if (uc == 0) curr++; else curr--; 326 } 327 s->len[t][i] = curr; 328 } 329 } 330 331 /*--- Create the Huffman decoding tables ---*/ 332 for (t = 0; t < nGroups; t++) { 333 minLen = 32; 334 maxLen = 0; 335 for (i = 0; i < alphaSize; i++) { 336 if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; 337 if (s->len[t][i] < minLen) minLen = s->len[t][i]; 338 } 339 BZ2_hbCreateDecodeTables ( 340 &(s->limit[t][0]), 341 &(s->base[t][0]), 342 &(s->perm[t][0]), 343 &(s->len[t][0]), 344 minLen, maxLen, alphaSize 345 ); 346 s->minLens[t] = minLen; 347 } 348 349 /*--- Now the MTF values ---*/ 350 351 EOB = s->nInUse+1; 352 nblockMAX = 100000 * s->blockSize100k; 353 groupNo = -1; 354 groupPos = 0; 355 356 for (i = 0; i <= 255; i++) s->unzftab[i] = 0; 357 358 /*-- MTF init --*/ 359 { 360 Int32 ii, jj, kk; 361 kk = MTFA_SIZE-1; 362 for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) { 363 for (jj = MTFL_SIZE-1; jj >= 0; jj--) { 364 s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj); 365 kk--; 366 } 367 s->mtfbase[ii] = kk + 1; 368 } 369 } 370 /*-- end MTF init --*/ 371 372 nblock = 0; 373 GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym); 374 375 while (True) { 376 377 if (nextSym == EOB) break; 378 379 if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) { 380 381 es = -1; 382 N = 1; 383 do { 384 if (nextSym == BZ_RUNA) es = es + (0+1) * N; else 385 if (nextSym == BZ_RUNB) es = es + (1+1) * N; 386 N = N * 2; 387 GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym); 388 } 389 while (nextSym == BZ_RUNA || nextSym == BZ_RUNB); 390 391 es++; 392 uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ]; 393 s->unzftab[uc] += es; 394 395 if (s->smallDecompress) 396 while (es > 0) { 397 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); 398 s->ll16[nblock] = (UInt16)uc; 399 nblock++; 400 es--; 401 } 402 else 403 while (es > 0) { 404 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); 405 s->tt[nblock] = (UInt32)uc; 406 nblock++; 407 es--; 408 }; 409 410 continue; 411 412 } else { 413 414 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); 415 416 /*-- uc = MTF ( nextSym-1 ) --*/ 417 { 418 Int32 ii, jj, kk, pp, lno, off; 419 UInt32 nn; 420 nn = (UInt32)(nextSym - 1); 421 422 if (nn < MTFL_SIZE) { 423 /* avoid general-case expense */ 424 pp = s->mtfbase[0]; 425 uc = s->mtfa[pp+nn]; 426 while (nn > 3) { 427 Int32 z = pp+nn; 428 s->mtfa[(z) ] = s->mtfa[(z)-1]; 429 s->mtfa[(z)-1] = s->mtfa[(z)-2]; 430 s->mtfa[(z)-2] = s->mtfa[(z)-3]; 431 s->mtfa[(z)-3] = s->mtfa[(z)-4]; 432 nn -= 4; 433 } 434 while (nn > 0) { 435 s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--; 436 }; 437 s->mtfa[pp] = uc; 438 } else { 439 /* general case */ 440 lno = nn / MTFL_SIZE; 441 off = nn % MTFL_SIZE; 442 pp = s->mtfbase[lno] + off; 443 uc = s->mtfa[pp]; 444 while (pp > s->mtfbase[lno]) { 445 s->mtfa[pp] = s->mtfa[pp-1]; pp--; 446 }; 447 s->mtfbase[lno]++; 448 while (lno > 0) { 449 s->mtfbase[lno]--; 450 s->mtfa[s->mtfbase[lno]] 451 = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1]; 452 lno--; 453 } 454 s->mtfbase[0]--; 455 s->mtfa[s->mtfbase[0]] = uc; 456 if (s->mtfbase[0] == 0) { 457 kk = MTFA_SIZE-1; 458 for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) { 459 for (jj = MTFL_SIZE-1; jj >= 0; jj--) { 460 s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj]; 461 kk--; 462 } 463 s->mtfbase[ii] = kk + 1; 464 } 465 } 466 } 467 } 468 /*-- end uc = MTF ( nextSym-1 ) --*/ 469 470 s->unzftab[s->seqToUnseq[uc]]++; 471 if (s->smallDecompress) 472 s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else 473 s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]); 474 nblock++; 475 476 GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym); 477 continue; 478 } 479 } 480 481 /* Now we know what nblock is, we can do a better sanity 482 check on s->origPtr. 483 */ 484 if (s->origPtr < 0 || s->origPtr >= nblock) 485 RETURN(BZ_DATA_ERROR); 486 487 /*-- Set up cftab to facilitate generation of T^(-1) --*/ 488 s->cftab[0] = 0; 489 for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1]; 490 for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1]; 491 for (i = 0; i <= 256; i++) { 492 if (s->cftab[i] < 0 || s->cftab[i] > nblock) { 493 /* s->cftab[i] can legitimately be == nblock */ 494 RETURN(BZ_DATA_ERROR) 495 } 496 } 497 498 s->state_out_len = 0; 499 s->state_out_ch = 0; 500 BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); 501 s->state = BZ_X_OUTPUT; 502 if (s->verbosity >= 2) VPrintf0 ( "rt+rld" ); 503 504 if (s->smallDecompress) { 505 506 /*-- Make a copy of cftab, used in generation of T --*/ 507 for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i]; 508 509 /*-- compute the T vector --*/ 510 for (i = 0; i < nblock; i++) { 511 uc = (UChar)(s->ll16[i]); 512 SET_LL(i, s->cftabCopy[uc]); 513 s->cftabCopy[uc]++; 514 } 515 516 /*-- Compute T^(-1) by pointer reversal on T --*/ 517 i = s->origPtr; 518 j = GET_LL(i); 519 do { 520 Int32 tmp = GET_LL(j); 521 SET_LL(j, i); 522 i = j; 523 j = tmp; 524 } 525 while (i != s->origPtr); 526 527 s->tPos = s->origPtr; 528 s->nblock_used = 0; 529 if (s->blockRandomised) { 530 BZ_RAND_INIT_MASK; 531 BZ_GET_SMALL(s->k0); s->nblock_used++; 532 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 533 } else { 534 BZ_GET_SMALL(s->k0); s->nblock_used++; 535 } 536 537 } else { 538 539 /*-- compute the T^(-1) vector --*/ 540 for (i = 0; i < nblock; i++) { 541 uc = (UChar)(s->tt[i] & 0xff); 542 s->tt[s->cftab[uc]] |= (i << 8); 543 s->cftab[uc]++; 544 } 545 546 s->tPos = s->tt[s->origPtr] >> 8; 547 s->nblock_used = 0; 548 if (s->blockRandomised) { 549 BZ_RAND_INIT_MASK; 550 BZ_GET_FAST(s->k0); s->nblock_used++; 551 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 552 } else { 553 BZ_GET_FAST(s->k0); s->nblock_used++; 554 } 555 556 } 557 558 RETURN(BZ_OK) 559 560 561 562 endhdr_2: 563 564 GET_UCHAR(BZ_X_ENDHDR_2, uc); 565 if (uc != 0x72) RETURN(BZ_DATA_ERROR); 566 GET_UCHAR(BZ_X_ENDHDR_3, uc); 567 if (uc != 0x45) RETURN(BZ_DATA_ERROR); 568 GET_UCHAR(BZ_X_ENDHDR_4, uc); 569 if (uc != 0x38) RETURN(BZ_DATA_ERROR); 570 GET_UCHAR(BZ_X_ENDHDR_5, uc); 571 if (uc != 0x50) RETURN(BZ_DATA_ERROR); 572 GET_UCHAR(BZ_X_ENDHDR_6, uc); 573 if (uc != 0x90) RETURN(BZ_DATA_ERROR); 574 575 s->storedCombinedCRC = 0; 576 GET_UCHAR(BZ_X_CCRC_1, uc); 577 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 578 GET_UCHAR(BZ_X_CCRC_2, uc); 579 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 580 GET_UCHAR(BZ_X_CCRC_3, uc); 581 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 582 GET_UCHAR(BZ_X_CCRC_4, uc); 583 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 584 585 s->state = BZ_X_IDLE; 586 RETURN(BZ_STREAM_END) 587 588 default: AssertH ( False, 4001 ); 589 } 590 591 AssertH ( False, 4002 ); 592 593 save_state_and_return: 594 595 s->save_i = i; 596 s->save_j = j; 597 s->save_t = t; 598 s->save_alphaSize = alphaSize; 599 s->save_nGroups = nGroups; 600 s->save_nSelectors = nSelectors; 601 s->save_EOB = EOB; 602 s->save_groupNo = groupNo; 603 s->save_groupPos = groupPos; 604 s->save_nextSym = nextSym; 605 s->save_nblockMAX = nblockMAX; 606 s->save_nblock = nblock; 607 s->save_es = es; 608 s->save_N = N; 609 s->save_curr = curr; 610 s->save_zt = zt; 611 s->save_zn = zn; 612 s->save_zvec = zvec; 613 s->save_zj = zj; 614 s->save_gSel = gSel; 615 s->save_gMinlen = gMinlen; 616 s->save_gLimit = gLimit; 617 s->save_gBase = gBase; 618 s->save_gPerm = gPerm; 619 620 return retVal; 621 } 622 623 624 /*-------------------------------------------------------------*/ 625 /*--- end decompress.c ---*/ 626 /*-------------------------------------------------------------*/ 627