1 /*-----------------------------------------------------------*/ 2 /*--- Block recoverer program for bzip2 ---*/ 3 /*--- bzip2recover.c ---*/ 4 /*-----------------------------------------------------------*/ 5 6 /* ------------------------------------------------------------------ 7 This file is part of bzip2/libbzip2, a program and library for 8 lossless, block-sorting data compression. 9 10 bzip2/libbzip2 version 1.0.5 of 10 December 2007 11 Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org> 12 13 Please read the WARNING, DISCLAIMER and PATENTS sections in the 14 README file. 15 16 This program is released under the terms of the license contained 17 in the file LICENSE. 18 ------------------------------------------------------------------ */ 19 20 /* This program is a complete hack and should be rewritten properly. 21 It isn't very complicated. */ 22 23 #include <stdio.h> 24 #include <errno.h> 25 #include <stdlib.h> 26 #include <string.h> 27 28 29 /* This program records bit locations in the file to be recovered. 30 That means that if 64-bit ints are not supported, we will not 31 be able to recover .bz2 files over 512MB (2^32 bits) long. 32 On GNU supported platforms, we take advantage of the 64-bit 33 int support to circumvent this problem. Ditto MSVC. 34 35 This change occurred in version 1.0.2; all prior versions have 36 the 512MB limitation. 37 */ 38 #ifdef __GNUC__ 39 typedef unsigned long long int MaybeUInt64; 40 # define MaybeUInt64_FMT "%Lu" 41 #else 42 #ifdef _MSC_VER 43 typedef unsigned __int64 MaybeUInt64; 44 # define MaybeUInt64_FMT "%I64u" 45 #else 46 typedef unsigned int MaybeUInt64; 47 # define MaybeUInt64_FMT "%u" 48 #endif 49 #endif 50 51 typedef unsigned int UInt32; 52 typedef int Int32; 53 typedef unsigned char UChar; 54 typedef char Char; 55 typedef unsigned char Bool; 56 #define True ((Bool)1) 57 #define False ((Bool)0) 58 59 60 #define BZ_MAX_FILENAME 2000 61 62 Char inFileName[BZ_MAX_FILENAME]; 63 Char outFileName[BZ_MAX_FILENAME]; 64 Char progName[BZ_MAX_FILENAME]; 65 66 MaybeUInt64 bytesOut = 0; 67 MaybeUInt64 bytesIn = 0; 68 69 70 /*---------------------------------------------------*/ 71 /*--- Header bytes ---*/ 72 /*---------------------------------------------------*/ 73 74 #define BZ_HDR_B 0x42 /* 'B' */ 75 #define BZ_HDR_Z 0x5a /* 'Z' */ 76 #define BZ_HDR_h 0x68 /* 'h' */ 77 #define BZ_HDR_0 0x30 /* '0' */ 78 79 80 /*---------------------------------------------------*/ 81 /*--- I/O errors ---*/ 82 /*---------------------------------------------------*/ 83 84 /*---------------------------------------------*/ 85 static void readError ( void ) 86 { 87 fprintf ( stderr, 88 "%s: I/O error reading `%s', possible reason follows.\n", 89 progName, inFileName ); 90 perror ( progName ); 91 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 92 progName ); 93 exit ( 1 ); 94 } 95 96 97 /*---------------------------------------------*/ 98 static void writeError ( void ) 99 { 100 fprintf ( stderr, 101 "%s: I/O error reading `%s', possible reason follows.\n", 102 progName, inFileName ); 103 perror ( progName ); 104 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 105 progName ); 106 exit ( 1 ); 107 } 108 109 110 /*---------------------------------------------*/ 111 static void mallocFail ( Int32 n ) 112 { 113 fprintf ( stderr, 114 "%s: malloc failed on request for %d bytes.\n", 115 progName, n ); 116 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 117 progName ); 118 exit ( 1 ); 119 } 120 121 122 /*---------------------------------------------*/ 123 static void tooManyBlocks ( Int32 max_handled_blocks ) 124 { 125 fprintf ( stderr, 126 "%s: `%s' appears to contain more than %d blocks\n", 127 progName, inFileName, max_handled_blocks ); 128 fprintf ( stderr, 129 "%s: and cannot be handled. To fix, increase\n", 130 progName ); 131 fprintf ( stderr, 132 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n", 133 progName ); 134 exit ( 1 ); 135 } 136 137 138 139 /*---------------------------------------------------*/ 140 /*--- Bit stream I/O ---*/ 141 /*---------------------------------------------------*/ 142 143 typedef 144 struct { 145 FILE* handle; 146 Int32 buffer; 147 Int32 buffLive; 148 Char mode; 149 } 150 BitStream; 151 152 153 /*---------------------------------------------*/ 154 static BitStream* bsOpenReadStream ( FILE* stream ) 155 { 156 BitStream *bs = malloc ( sizeof(BitStream) ); 157 if (bs == NULL) mallocFail ( sizeof(BitStream) ); 158 bs->handle = stream; 159 bs->buffer = 0; 160 bs->buffLive = 0; 161 bs->mode = 'r'; 162 return bs; 163 } 164 165 166 /*---------------------------------------------*/ 167 static BitStream* bsOpenWriteStream ( FILE* stream ) 168 { 169 BitStream *bs = malloc ( sizeof(BitStream) ); 170 if (bs == NULL) mallocFail ( sizeof(BitStream) ); 171 bs->handle = stream; 172 bs->buffer = 0; 173 bs->buffLive = 0; 174 bs->mode = 'w'; 175 return bs; 176 } 177 178 179 /*---------------------------------------------*/ 180 static void bsPutBit ( BitStream* bs, Int32 bit ) 181 { 182 if (bs->buffLive == 8) { 183 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle ); 184 if (retVal == EOF) writeError(); 185 bytesOut++; 186 bs->buffLive = 1; 187 bs->buffer = bit & 0x1; 188 } else { 189 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) ); 190 bs->buffLive++; 191 }; 192 } 193 194 195 /*---------------------------------------------*/ 196 /*-- 197 Returns 0 or 1, or 2 to indicate EOF. 198 --*/ 199 static Int32 bsGetBit ( BitStream* bs ) 200 { 201 if (bs->buffLive > 0) { 202 bs->buffLive --; 203 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 ); 204 } else { 205 Int32 retVal = getc ( bs->handle ); 206 if ( retVal == EOF ) { 207 if (errno != 0) readError(); 208 return 2; 209 } 210 bs->buffLive = 7; 211 bs->buffer = retVal; 212 return ( ((bs->buffer) >> 7) & 0x1 ); 213 } 214 } 215 216 217 /*---------------------------------------------*/ 218 static void bsClose ( BitStream* bs ) 219 { 220 Int32 retVal; 221 222 if ( bs->mode == 'w' ) { 223 while ( bs->buffLive < 8 ) { 224 bs->buffLive++; 225 bs->buffer <<= 1; 226 }; 227 retVal = putc ( (UChar) (bs->buffer), bs->handle ); 228 if (retVal == EOF) writeError(); 229 bytesOut++; 230 retVal = fflush ( bs->handle ); 231 if (retVal == EOF) writeError(); 232 } 233 retVal = fclose ( bs->handle ); 234 if (retVal == EOF) { 235 if (bs->mode == 'w') writeError(); else readError(); 236 } 237 free ( bs ); 238 } 239 240 241 /*---------------------------------------------*/ 242 static void bsPutUChar ( BitStream* bs, UChar c ) 243 { 244 Int32 i; 245 for (i = 7; i >= 0; i--) 246 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 ); 247 } 248 249 250 /*---------------------------------------------*/ 251 static void bsPutUInt32 ( BitStream* bs, UInt32 c ) 252 { 253 Int32 i; 254 255 for (i = 31; i >= 0; i--) 256 bsPutBit ( bs, (c >> i) & 0x1 ); 257 } 258 259 260 /*---------------------------------------------*/ 261 static Bool endsInBz2 ( Char* name ) 262 { 263 Int32 n = strlen ( name ); 264 if (n <= 4) return False; 265 return 266 (name[n-4] == '.' && 267 name[n-3] == 'b' && 268 name[n-2] == 'z' && 269 name[n-1] == '2'); 270 } 271 272 273 /*---------------------------------------------------*/ 274 /*--- ---*/ 275 /*---------------------------------------------------*/ 276 277 /* This logic isn't really right when it comes to Cygwin. */ 278 #ifdef _WIN32 279 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */ 280 #else 281 # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */ 282 #endif 283 284 #define BLOCK_HEADER_HI 0x00003141UL 285 #define BLOCK_HEADER_LO 0x59265359UL 286 287 #define BLOCK_ENDMARK_HI 0x00001772UL 288 #define BLOCK_ENDMARK_LO 0x45385090UL 289 290 /* Increase if necessary. However, a .bz2 file with > 50000 blocks 291 would have an uncompressed size of at least 40GB, so the chances 292 are low you'll need to up this. 293 */ 294 #define BZ_MAX_HANDLED_BLOCKS 50000 295 296 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS]; 297 MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS]; 298 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS]; 299 MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS]; 300 301 Int32 main ( Int32 argc, Char** argv ) 302 { 303 FILE* inFile; 304 FILE* outFile; 305 BitStream* bsIn, *bsWr; 306 Int32 b, wrBlock, currBlock, rbCtr; 307 MaybeUInt64 bitsRead; 308 309 UInt32 buffHi, buffLo, blockCRC; 310 Char* p; 311 312 strcpy ( progName, argv[0] ); 313 inFileName[0] = outFileName[0] = 0; 314 315 fprintf ( stderr, 316 "bzip2recover 1.0.5: extracts blocks from damaged .bz2 files.\n" ); 317 318 if (argc != 2) { 319 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", 320 progName, progName ); 321 switch (sizeof(MaybeUInt64)) { 322 case 8: 323 fprintf(stderr, 324 "\trestrictions on size of recovered file: None\n"); 325 break; 326 case 4: 327 fprintf(stderr, 328 "\trestrictions on size of recovered file: 512 MB\n"); 329 fprintf(stderr, 330 "\tto circumvent, recompile with MaybeUInt64 as an\n" 331 "\tunsigned 64-bit int.\n"); 332 break; 333 default: 334 fprintf(stderr, 335 "\tsizeof(MaybeUInt64) is not 4 or 8 -- " 336 "configuration error.\n"); 337 break; 338 } 339 exit(1); 340 } 341 342 if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { 343 fprintf ( stderr, 344 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", 345 progName, (int)strlen(argv[1]) ); 346 exit(1); 347 } 348 349 strcpy ( inFileName, argv[1] ); 350 351 inFile = fopen ( inFileName, "rb" ); 352 if (inFile == NULL) { 353 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName ); 354 exit(1); 355 } 356 357 bsIn = bsOpenReadStream ( inFile ); 358 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName ); 359 360 bitsRead = 0; 361 buffHi = buffLo = 0; 362 currBlock = 0; 363 bStart[currBlock] = 0; 364 365 rbCtr = 0; 366 367 while (True) { 368 b = bsGetBit ( bsIn ); 369 bitsRead++; 370 if (b == 2) { 371 if (bitsRead >= bStart[currBlock] && 372 (bitsRead - bStart[currBlock]) >= 40) { 373 bEnd[currBlock] = bitsRead-1; 374 if (currBlock > 0) 375 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT 376 " to " MaybeUInt64_FMT " (incomplete)\n", 377 currBlock, bStart[currBlock], bEnd[currBlock] ); 378 } else 379 currBlock--; 380 break; 381 } 382 buffHi = (buffHi << 1) | (buffLo >> 31); 383 buffLo = (buffLo << 1) | (b & 1); 384 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI 385 && buffLo == BLOCK_HEADER_LO) 386 || 387 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI 388 && buffLo == BLOCK_ENDMARK_LO) 389 ) { 390 if (bitsRead > 49) { 391 bEnd[currBlock] = bitsRead-49; 392 } else { 393 bEnd[currBlock] = 0; 394 } 395 if (currBlock > 0 && 396 (bEnd[currBlock] - bStart[currBlock]) >= 130) { 397 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT 398 " to " MaybeUInt64_FMT "\n", 399 rbCtr+1, bStart[currBlock], bEnd[currBlock] ); 400 rbStart[rbCtr] = bStart[currBlock]; 401 rbEnd[rbCtr] = bEnd[currBlock]; 402 rbCtr++; 403 } 404 if (currBlock >= BZ_MAX_HANDLED_BLOCKS) 405 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS); 406 currBlock++; 407 408 bStart[currBlock] = bitsRead; 409 } 410 } 411 412 bsClose ( bsIn ); 413 414 /*-- identified blocks run from 1 to rbCtr inclusive. --*/ 415 416 if (rbCtr < 1) { 417 fprintf ( stderr, 418 "%s: sorry, I couldn't find any block boundaries.\n", 419 progName ); 420 exit(1); 421 }; 422 423 fprintf ( stderr, "%s: splitting into blocks\n", progName ); 424 425 inFile = fopen ( inFileName, "rb" ); 426 if (inFile == NULL) { 427 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName ); 428 exit(1); 429 } 430 bsIn = bsOpenReadStream ( inFile ); 431 432 /*-- placate gcc's dataflow analyser --*/ 433 blockCRC = 0; bsWr = 0; 434 435 bitsRead = 0; 436 outFile = NULL; 437 wrBlock = 0; 438 while (True) { 439 b = bsGetBit(bsIn); 440 if (b == 2) break; 441 buffHi = (buffHi << 1) | (buffLo >> 31); 442 buffLo = (buffLo << 1) | (b & 1); 443 if (bitsRead == 47+rbStart[wrBlock]) 444 blockCRC = (buffHi << 16) | (buffLo >> 16); 445 446 if (outFile != NULL && bitsRead >= rbStart[wrBlock] 447 && bitsRead <= rbEnd[wrBlock]) { 448 bsPutBit ( bsWr, b ); 449 } 450 451 bitsRead++; 452 453 if (bitsRead == rbEnd[wrBlock]+1) { 454 if (outFile != NULL) { 455 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 ); 456 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 ); 457 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 ); 458 bsPutUInt32 ( bsWr, blockCRC ); 459 bsClose ( bsWr ); 460 } 461 if (wrBlock >= rbCtr) break; 462 wrBlock++; 463 } else 464 if (bitsRead == rbStart[wrBlock]) { 465 /* Create the output file name, correctly handling leading paths. 466 (31.10.2001 by Sergey E. Kusikov) */ 467 Char* split; 468 Int32 ofs, k; 469 for (k = 0; k < BZ_MAX_FILENAME; k++) 470 outFileName[k] = 0; 471 strcpy (outFileName, inFileName); 472 split = strrchr (outFileName, BZ_SPLIT_SYM); 473 if (split == NULL) { 474 split = outFileName; 475 } else { 476 ++split; 477 } 478 /* Now split points to the start of the basename. */ 479 ofs = split - outFileName; 480 sprintf (split, "rec%5d", wrBlock+1); 481 for (p = split; *p != 0; p++) if (*p == ' ') *p = '0'; 482 strcat (outFileName, inFileName + ofs); 483 484 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); 485 486 fprintf ( stderr, " writing block %d to `%s' ...\n", 487 wrBlock+1, outFileName ); 488 489 outFile = fopen ( outFileName, "wb" ); 490 if (outFile == NULL) { 491 fprintf ( stderr, "%s: can't write `%s'\n", 492 progName, outFileName ); 493 exit(1); 494 } 495 bsWr = bsOpenWriteStream ( outFile ); 496 bsPutUChar ( bsWr, BZ_HDR_B ); 497 bsPutUChar ( bsWr, BZ_HDR_Z ); 498 bsPutUChar ( bsWr, BZ_HDR_h ); 499 bsPutUChar ( bsWr, BZ_HDR_0 + 9 ); 500 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); 501 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); 502 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); 503 } 504 } 505 506 fprintf ( stderr, "%s: finished\n", progName ); 507 return 0; 508 } 509 510 511 512 /*-----------------------------------------------------------*/ 513 /*--- end bzip2recover.c ---*/ 514 /*-----------------------------------------------------------*/ 515