1 2 /*-----------------------------------------------------------*/ 3 /*--- Block recoverer program for bzip2 ---*/ 4 /*--- bzip2recover.c ---*/ 5 /*-----------------------------------------------------------*/ 6 7 /*-- 8 This program is bzip2recover, a program to attempt data 9 salvage from damaged files created by the accompanying 10 bzip2-1.0 program. 11 12 Copyright (C) 1996-2002 Julian R Seward. All rights reserved. 13 14 Redistribution and use in source and binary forms, with or without 15 modification, are permitted provided that the following conditions 16 are met: 17 18 1. Redistributions of source code must retain the above copyright 19 notice, this list of conditions and the following disclaimer. 20 21 2. The origin of this software must not be misrepresented; you must 22 not claim that you wrote the original software. If you use this 23 software in a product, an acknowledgment in the product 24 documentation would be appreciated but is not required. 25 26 3. Altered source versions must be plainly marked as such, and must 27 not be misrepresented as being the original software. 28 29 4. The name of the author may not be used to endorse or promote 30 products derived from this software without specific prior written 31 permission. 32 33 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 34 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 35 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 36 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 37 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 38 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 39 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 41 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 42 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 43 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 44 45 Julian Seward, Cambridge, UK. 46 jseward@acm.org 47 bzip2/libbzip2 version 1.0 of 21 March 2000 48 --*/ 49 50 /*-- 51 This program is a complete hack and should be rewritten 52 properly. It isn't very complicated. 53 --*/ 54 55 #include <stdio.h> 56 #include <errno.h> 57 #include <stdlib.h> 58 #include <string.h> 59 60 61 /* This program records bit locations in the file to be recovered. 62 That means that if 64-bit ints are not supported, we will not 63 be able to recover .bz2 files over 512MB (2^32 bits) long. 64 On GNU supported platforms, we take advantage of the 64-bit 65 int support to circumvent this problem. Ditto MSVC. 66 67 This change occurred in version 1.0.2; all prior versions have 68 the 512MB limitation. 69 */ 70 #ifdef __GNUC__ 71 typedef unsigned long long int MaybeUInt64; 72 # define MaybeUInt64_FMT "%Lu" 73 #else 74 #ifdef _MSC_VER 75 typedef unsigned __int64 MaybeUInt64; 76 # define MaybeUInt64_FMT "%I64u" 77 #else 78 typedef unsigned int MaybeUInt64; 79 # define MaybeUInt64_FMT "%u" 80 #endif 81 #endif 82 83 typedef unsigned int UInt32; 84 typedef int Int32; 85 typedef unsigned char UChar; 86 typedef char Char; 87 typedef unsigned char Bool; 88 #define True ((Bool)1) 89 #define False ((Bool)0) 90 91 92 #define BZ_MAX_FILENAME 2000 93 94 Char inFileName[BZ_MAX_FILENAME]; 95 Char outFileName[BZ_MAX_FILENAME]; 96 Char progName[BZ_MAX_FILENAME]; 97 98 MaybeUInt64 bytesOut = 0; 99 MaybeUInt64 bytesIn = 0; 100 101 102 /*---------------------------------------------------*/ 103 /*--- Header bytes ---*/ 104 /*---------------------------------------------------*/ 105 106 #define BZ_HDR_B 0x42 /* 'B' */ 107 #define BZ_HDR_Z 0x5a /* 'Z' */ 108 #define BZ_HDR_h 0x68 /* 'h' */ 109 #define BZ_HDR_0 0x30 /* '0' */ 110 111 112 /*---------------------------------------------------*/ 113 /*--- I/O errors ---*/ 114 /*---------------------------------------------------*/ 115 116 /*---------------------------------------------*/ 117 void readError ( void ) 118 { 119 fprintf ( stderr, 120 "%s: I/O error reading `%s', possible reason follows.\n", 121 progName, inFileName ); 122 perror ( progName ); 123 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 124 progName ); 125 exit ( 1 ); 126 } 127 128 129 /*---------------------------------------------*/ 130 void writeError ( void ) 131 { 132 fprintf ( stderr, 133 "%s: I/O error reading `%s', possible reason follows.\n", 134 progName, inFileName ); 135 perror ( progName ); 136 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 137 progName ); 138 exit ( 1 ); 139 } 140 141 142 /*---------------------------------------------*/ 143 void mallocFail ( Int32 n ) 144 { 145 fprintf ( stderr, 146 "%s: malloc failed on request for %d bytes.\n", 147 progName, n ); 148 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 149 progName ); 150 exit ( 1 ); 151 } 152 153 154 /*---------------------------------------------*/ 155 void tooManyBlocks ( Int32 max_handled_blocks ) 156 { 157 fprintf ( stderr, 158 "%s: `%s' appears to contain more than %d blocks\n", 159 progName, inFileName, max_handled_blocks ); 160 fprintf ( stderr, 161 "%s: and cannot be handled. To fix, increase\n", 162 progName ); 163 fprintf ( stderr, 164 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n", 165 progName ); 166 exit ( 1 ); 167 } 168 169 170 171 /*---------------------------------------------------*/ 172 /*--- Bit stream I/O ---*/ 173 /*---------------------------------------------------*/ 174 175 typedef 176 struct { 177 FILE* handle; 178 Int32 buffer; 179 Int32 buffLive; 180 Char mode; 181 } 182 BitStream; 183 184 185 /*---------------------------------------------*/ 186 BitStream* bsOpenReadStream ( FILE* stream ) 187 { 188 BitStream *bs = malloc ( sizeof(BitStream) ); 189 if (bs == NULL) mallocFail ( sizeof(BitStream) ); 190 bs->handle = stream; 191 bs->buffer = 0; 192 bs->buffLive = 0; 193 bs->mode = 'r'; 194 return bs; 195 } 196 197 198 /*---------------------------------------------*/ 199 BitStream* bsOpenWriteStream ( FILE* stream ) 200 { 201 BitStream *bs = malloc ( sizeof(BitStream) ); 202 if (bs == NULL) mallocFail ( sizeof(BitStream) ); 203 bs->handle = stream; 204 bs->buffer = 0; 205 bs->buffLive = 0; 206 bs->mode = 'w'; 207 return bs; 208 } 209 210 211 /*---------------------------------------------*/ 212 void bsPutBit ( BitStream* bs, Int32 bit ) 213 { 214 if (bs->buffLive == 8) { 215 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle ); 216 if (retVal == EOF) writeError(); 217 bytesOut++; 218 bs->buffLive = 1; 219 bs->buffer = bit & 0x1; 220 } else { 221 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) ); 222 bs->buffLive++; 223 }; 224 } 225 226 227 /*---------------------------------------------*/ 228 /*-- 229 Returns 0 or 1, or 2 to indicate EOF. 230 --*/ 231 Int32 bsGetBit ( BitStream* bs ) 232 { 233 if (bs->buffLive > 0) { 234 bs->buffLive --; 235 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 ); 236 } else { 237 Int32 retVal = getc ( bs->handle ); 238 if ( retVal == EOF ) { 239 if (errno != 0) readError(); 240 return 2; 241 } 242 bs->buffLive = 7; 243 bs->buffer = retVal; 244 return ( ((bs->buffer) >> 7) & 0x1 ); 245 } 246 } 247 248 249 /*---------------------------------------------*/ 250 void bsClose ( BitStream* bs ) 251 { 252 Int32 retVal; 253 254 if ( bs->mode == 'w' ) { 255 while ( bs->buffLive < 8 ) { 256 bs->buffLive++; 257 bs->buffer <<= 1; 258 }; 259 retVal = putc ( (UChar) (bs->buffer), bs->handle ); 260 if (retVal == EOF) writeError(); 261 bytesOut++; 262 retVal = fflush ( bs->handle ); 263 if (retVal == EOF) writeError(); 264 } 265 retVal = fclose ( bs->handle ); 266 if (retVal == EOF) { 267 if (bs->mode == 'w') writeError(); else readError(); 268 } 269 free ( bs ); 270 } 271 272 273 /*---------------------------------------------*/ 274 void bsPutUChar ( BitStream* bs, UChar c ) 275 { 276 Int32 i; 277 for (i = 7; i >= 0; i--) 278 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 ); 279 } 280 281 282 /*---------------------------------------------*/ 283 void bsPutUInt32 ( BitStream* bs, UInt32 c ) 284 { 285 Int32 i; 286 287 for (i = 31; i >= 0; i--) 288 bsPutBit ( bs, (c >> i) & 0x1 ); 289 } 290 291 292 /*---------------------------------------------*/ 293 Bool endsInBz2 ( Char* name ) 294 { 295 Int32 n = strlen ( name ); 296 if (n <= 4) return False; 297 return 298 (name[n-4] == '.' && 299 name[n-3] == 'b' && 300 name[n-2] == 'z' && 301 name[n-1] == '2'); 302 } 303 304 305 /*---------------------------------------------------*/ 306 /*--- ---*/ 307 /*---------------------------------------------------*/ 308 309 /* This logic isn't really right when it comes to Cygwin. */ 310 #ifdef _WIN32 311 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */ 312 #else 313 # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */ 314 #endif 315 316 #define BLOCK_HEADER_HI 0x00003141UL 317 #define BLOCK_HEADER_LO 0x59265359UL 318 319 #define BLOCK_ENDMARK_HI 0x00001772UL 320 #define BLOCK_ENDMARK_LO 0x45385090UL 321 322 /* Increase if necessary. However, a .bz2 file with > 50000 blocks 323 would have an uncompressed size of at least 40GB, so the chances 324 are low you'll need to up this. 325 */ 326 #define BZ_MAX_HANDLED_BLOCKS 50000 327 328 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS]; 329 MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS]; 330 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS]; 331 MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS]; 332 333 Int32 main ( Int32 argc, Char** argv ) 334 { 335 FILE* inFile; 336 FILE* outFile; 337 BitStream* bsIn, *bsWr; 338 Int32 b, wrBlock, currBlock, rbCtr; 339 MaybeUInt64 bitsRead; 340 341 UInt32 buffHi, buffLo, blockCRC; 342 Char* p; 343 344 strcpy ( progName, argv[0] ); 345 inFileName[0] = outFileName[0] = 0; 346 347 fprintf ( stderr, 348 "bzip2recover 1.0.2: extracts blocks from damaged .bz2 files.\n" ); 349 350 if (argc != 2) { 351 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", 352 progName, progName ); 353 switch (sizeof(MaybeUInt64)) { 354 case 8: 355 fprintf(stderr, 356 "\trestrictions on size of recovered file: None\n"); 357 break; 358 case 4: 359 fprintf(stderr, 360 "\trestrictions on size of recovered file: 512 MB\n"); 361 fprintf(stderr, 362 "\tto circumvent, recompile with MaybeUInt64 as an\n" 363 "\tunsigned 64-bit int.\n"); 364 break; 365 default: 366 fprintf(stderr, 367 "\tsizeof(MaybeUInt64) is not 4 or 8 -- " 368 "configuration error.\n"); 369 break; 370 } 371 exit(1); 372 } 373 374 if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { 375 fprintf ( stderr, 376 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", 377 progName, strlen(argv[1]) ); 378 exit(1); 379 } 380 381 strcpy ( inFileName, argv[1] ); 382 383 inFile = fopen ( inFileName, "rb" ); 384 if (inFile == NULL) { 385 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName ); 386 exit(1); 387 } 388 389 bsIn = bsOpenReadStream ( inFile ); 390 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName ); 391 392 bitsRead = 0; 393 buffHi = buffLo = 0; 394 currBlock = 0; 395 bStart[currBlock] = 0; 396 397 rbCtr = 0; 398 399 while (True) { 400 b = bsGetBit ( bsIn ); 401 bitsRead++; 402 if (b == 2) { 403 if (bitsRead >= bStart[currBlock] && 404 (bitsRead - bStart[currBlock]) >= 40) { 405 bEnd[currBlock] = bitsRead-1; 406 if (currBlock > 0) 407 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT 408 " to " MaybeUInt64_FMT " (incomplete)\n", 409 currBlock, bStart[currBlock], bEnd[currBlock] ); 410 } else 411 currBlock--; 412 break; 413 } 414 buffHi = (buffHi << 1) | (buffLo >> 31); 415 buffLo = (buffLo << 1) | (b & 1); 416 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI 417 && buffLo == BLOCK_HEADER_LO) 418 || 419 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI 420 && buffLo == BLOCK_ENDMARK_LO) 421 ) { 422 if (bitsRead > 49) { 423 bEnd[currBlock] = bitsRead-49; 424 } else { 425 bEnd[currBlock] = 0; 426 } 427 if (currBlock > 0 && 428 (bEnd[currBlock] - bStart[currBlock]) >= 130) { 429 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT 430 " to " MaybeUInt64_FMT "\n", 431 rbCtr+1, bStart[currBlock], bEnd[currBlock] ); 432 rbStart[rbCtr] = bStart[currBlock]; 433 rbEnd[rbCtr] = bEnd[currBlock]; 434 rbCtr++; 435 } 436 if (currBlock >= BZ_MAX_HANDLED_BLOCKS) 437 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS); 438 currBlock++; 439 440 bStart[currBlock] = bitsRead; 441 } 442 } 443 444 bsClose ( bsIn ); 445 446 /*-- identified blocks run from 1 to rbCtr inclusive. --*/ 447 448 if (rbCtr < 1) { 449 fprintf ( stderr, 450 "%s: sorry, I couldn't find any block boundaries.\n", 451 progName ); 452 exit(1); 453 }; 454 455 fprintf ( stderr, "%s: splitting into blocks\n", progName ); 456 457 inFile = fopen ( inFileName, "rb" ); 458 if (inFile == NULL) { 459 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName ); 460 exit(1); 461 } 462 bsIn = bsOpenReadStream ( inFile ); 463 464 /*-- placate gcc's dataflow analyser --*/ 465 blockCRC = 0; bsWr = 0; 466 467 bitsRead = 0; 468 outFile = NULL; 469 wrBlock = 0; 470 while (True) { 471 b = bsGetBit(bsIn); 472 if (b == 2) break; 473 buffHi = (buffHi << 1) | (buffLo >> 31); 474 buffLo = (buffLo << 1) | (b & 1); 475 if (bitsRead == 47+rbStart[wrBlock]) 476 blockCRC = (buffHi << 16) | (buffLo >> 16); 477 478 if (outFile != NULL && bitsRead >= rbStart[wrBlock] 479 && bitsRead <= rbEnd[wrBlock]) { 480 bsPutBit ( bsWr, b ); 481 } 482 483 bitsRead++; 484 485 if (bitsRead == rbEnd[wrBlock]+1) { 486 if (outFile != NULL) { 487 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 ); 488 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 ); 489 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 ); 490 bsPutUInt32 ( bsWr, blockCRC ); 491 bsClose ( bsWr ); 492 } 493 if (wrBlock >= rbCtr) break; 494 wrBlock++; 495 } else 496 if (bitsRead == rbStart[wrBlock]) { 497 /* Create the output file name, correctly handling leading paths. 498 (31.10.2001 by Sergey E. Kusikov) */ 499 Char* split; 500 Int32 ofs, k; 501 for (k = 0; k < BZ_MAX_FILENAME; k++) 502 outFileName[k] = 0; 503 strcpy (outFileName, inFileName); 504 split = strrchr (outFileName, BZ_SPLIT_SYM); 505 if (split == NULL) { 506 split = outFileName; 507 } else { 508 ++split; 509 } 510 /* Now split points to the start of the basename. */ 511 ofs = split - outFileName; 512 sprintf (split, "rec%5d", wrBlock+1); 513 for (p = split; *p != 0; p++) if (*p == ' ') *p = '0'; 514 strcat (outFileName, inFileName + ofs); 515 516 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); 517 518 fprintf ( stderr, " writing block %d to `%s' ...\n", 519 wrBlock+1, outFileName ); 520 521 outFile = fopen ( outFileName, "wb" ); 522 if (outFile == NULL) { 523 fprintf ( stderr, "%s: can't write `%s'\n", 524 progName, outFileName ); 525 exit(1); 526 } 527 bsWr = bsOpenWriteStream ( outFile ); 528 bsPutUChar ( bsWr, BZ_HDR_B ); 529 bsPutUChar ( bsWr, BZ_HDR_Z ); 530 bsPutUChar ( bsWr, BZ_HDR_h ); 531 bsPutUChar ( bsWr, BZ_HDR_0 + 9 ); 532 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); 533 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); 534 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); 535 } 536 } 537 538 fprintf ( stderr, "%s: finished\n", progName ); 539 return 0; 540 } 541 542 543 544 /*-----------------------------------------------------------*/ 545 /*--- end bzip2recover.c ---*/ 546 /*-----------------------------------------------------------*/ 547