1 /*-----------------------------------------------------------*/ 2 /*--- Block recoverer program for bzip2 ---*/ 3 /*--- bzip2recover.c ---*/ 4 /*-----------------------------------------------------------*/ 5 6 /* ------------------------------------------------------------------ 7 This file is part of bzip2/libbzip2, a program and library for 8 lossless, block-sorting data compression. 9 10 bzip2/libbzip2 version 1.0.8 of 13 July 2019 11 Copyright (C) 1996-2019 Julian Seward <jseward@acm.org> 12 13 Please read the WARNING, DISCLAIMER and PATENTS sections in the 14 README file. 15 16 This program is released under the terms of the license contained 17 in the file LICENSE. 18 ------------------------------------------------------------------ */ 19 20 /* This program is a complete hack and should be rewritten properly. 21 It isn't very complicated. */ 22 23 #include <stdio.h> 24 #include <errno.h> 25 #include <stdlib.h> 26 #include <string.h> 27 28 29 /* This program records bit locations in the file to be recovered. 30 That means that if 64-bit ints are not supported, we will not 31 be able to recover .bz2 files over 512MB (2^32 bits) long. 32 On GNU supported platforms, we take advantage of the 64-bit 33 int support to circumvent this problem. Ditto MSVC. 34 35 This change occurred in version 1.0.2; all prior versions have 36 the 512MB limitation. 37 */ 38 #ifdef __GNUC__ 39 typedef unsigned long long int MaybeUInt64; 40 # define MaybeUInt64_FMT "%llu" 41 #else 42 #ifdef _MSC_VER 43 typedef unsigned __int64 MaybeUInt64; 44 # define MaybeUInt64_FMT "%I64u" 45 #else 46 typedef unsigned int MaybeUInt64; 47 # define MaybeUInt64_FMT "%u" 48 #endif 49 #endif 50 51 typedef unsigned int UInt32; 52 typedef int Int32; 53 typedef unsigned char UChar; 54 typedef char Char; 55 typedef unsigned char Bool; 56 #define True ((Bool)1) 57 #define False ((Bool)0) 58 59 60 #define BZ_MAX_FILENAME 2000 61 62 Char inFileName[BZ_MAX_FILENAME]; 63 Char outFileName[BZ_MAX_FILENAME]; 64 Char progName[BZ_MAX_FILENAME]; 65 66 MaybeUInt64 bytesOut = 0; 67 MaybeUInt64 bytesIn = 0; 68 69 70 /*---------------------------------------------------*/ 71 /*--- Header bytes ---*/ 72 /*---------------------------------------------------*/ 73 74 #define BZ_HDR_B 0x42 /* 'B' */ 75 #define BZ_HDR_Z 0x5a /* 'Z' */ 76 #define BZ_HDR_h 0x68 /* 'h' */ 77 #define BZ_HDR_0 0x30 /* '0' */ 78 79 80 /*---------------------------------------------------*/ 81 /*--- I/O errors ---*/ 82 /*---------------------------------------------------*/ 83 84 /*---------------------------------------------*/ 85 static void readError ( void ) 86 { 87 fprintf ( stderr, 88 "%s: I/O error reading `%s', possible reason follows.\n", 89 progName, inFileName ); 90 perror ( progName ); 91 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 92 progName ); 93 exit ( 1 ); 94 } 95 96 97 /*---------------------------------------------*/ 98 static void writeError ( void ) 99 { 100 fprintf ( stderr, 101 "%s: I/O error reading `%s', possible reason follows.\n", 102 progName, inFileName ); 103 perror ( progName ); 104 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 105 progName ); 106 exit ( 1 ); 107 } 108 109 110 /*---------------------------------------------*/ 111 static void mallocFail ( Int32 n ) 112 { 113 fprintf ( stderr, 114 "%s: malloc failed on request for %d bytes.\n", 115 progName, n ); 116 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 117 progName ); 118 exit ( 1 ); 119 } 120 121 122 /*---------------------------------------------*/ 123 static void tooManyBlocks ( Int32 max_handled_blocks ) 124 { 125 fprintf ( stderr, 126 "%s: `%s' appears to contain more than %d blocks\n", 127 progName, inFileName, max_handled_blocks ); 128 fprintf ( stderr, 129 "%s: and cannot be handled. To fix, increase\n", 130 progName ); 131 fprintf ( stderr, 132 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n", 133 progName ); 134 exit ( 1 ); 135 } 136 137 138 139 /*---------------------------------------------------*/ 140 /*--- Bit stream I/O ---*/ 141 /*---------------------------------------------------*/ 142 143 typedef 144 struct { 145 FILE* handle; 146 Int32 buffer; 147 Int32 buffLive; 148 Char mode; 149 } 150 BitStream; 151 152 153 /*---------------------------------------------*/ 154 static BitStream* bsOpenReadStream ( FILE* stream ) 155 { 156 BitStream *bs = malloc ( sizeof(BitStream) ); 157 if (bs == NULL) mallocFail ( sizeof(BitStream) ); 158 bs->handle = stream; 159 bs->buffer = 0; 160 bs->buffLive = 0; 161 bs->mode = 'r'; 162 return bs; 163 } 164 165 166 /*---------------------------------------------*/ 167 static BitStream* bsOpenWriteStream ( FILE* stream ) 168 { 169 BitStream *bs = malloc ( sizeof(BitStream) ); 170 if (bs == NULL) mallocFail ( sizeof(BitStream) ); 171 bs->handle = stream; 172 bs->buffer = 0; 173 bs->buffLive = 0; 174 bs->mode = 'w'; 175 return bs; 176 } 177 178 179 /*---------------------------------------------*/ 180 static void bsPutBit ( BitStream* bs, Int32 bit ) 181 { 182 if (bs->buffLive == 8) { 183 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle ); 184 if (retVal == EOF) writeError(); 185 bytesOut++; 186 bs->buffLive = 1; 187 bs->buffer = bit & 0x1; 188 } else { 189 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) ); 190 bs->buffLive++; 191 }; 192 } 193 194 195 /*---------------------------------------------*/ 196 /*-- 197 Returns 0 or 1, or 2 to indicate EOF. 198 --*/ 199 static Int32 bsGetBit ( BitStream* bs ) 200 { 201 if (bs->buffLive > 0) { 202 bs->buffLive --; 203 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 ); 204 } else { 205 Int32 retVal = getc ( bs->handle ); 206 if ( retVal == EOF ) { 207 if (errno != 0) readError(); 208 return 2; 209 } 210 bs->buffLive = 7; 211 bs->buffer = retVal; 212 return ( ((bs->buffer) >> 7) & 0x1 ); 213 } 214 } 215 216 217 /*---------------------------------------------*/ 218 static void bsClose ( BitStream* bs ) 219 { 220 Int32 retVal; 221 222 if ( bs->mode == 'w' ) { 223 while ( bs->buffLive < 8 ) { 224 bs->buffLive++; 225 bs->buffer <<= 1; 226 }; 227 retVal = putc ( (UChar) (bs->buffer), bs->handle ); 228 if (retVal == EOF) writeError(); 229 bytesOut++; 230 retVal = fflush ( bs->handle ); 231 if (retVal == EOF) writeError(); 232 } 233 retVal = fclose ( bs->handle ); 234 if (retVal == EOF) { 235 if (bs->mode == 'w') writeError(); else readError(); 236 } 237 free ( bs ); 238 } 239 240 241 /*---------------------------------------------*/ 242 static void bsPutUChar ( BitStream* bs, UChar c ) 243 { 244 Int32 i; 245 for (i = 7; i >= 0; i--) 246 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 ); 247 } 248 249 250 /*---------------------------------------------*/ 251 static void bsPutUInt32 ( BitStream* bs, UInt32 c ) 252 { 253 Int32 i; 254 255 for (i = 31; i >= 0; i--) 256 bsPutBit ( bs, (c >> i) & 0x1 ); 257 } 258 259 260 /*---------------------------------------------*/ 261 static Bool endsInBz2 ( Char* name ) 262 { 263 Int32 n = strlen ( name ); 264 if (n <= 4) return False; 265 return 266 (name[n-4] == '.' && 267 name[n-3] == 'b' && 268 name[n-2] == 'z' && 269 name[n-1] == '2'); 270 } 271 272 273 /*---------------------------------------------------*/ 274 /*--- ---*/ 275 /*---------------------------------------------------*/ 276 277 /* This logic isn't really right when it comes to Cygwin. */ 278 #ifdef _WIN32 279 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */ 280 #else 281 # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */ 282 #endif 283 284 #define BLOCK_HEADER_HI 0x00003141UL 285 #define BLOCK_HEADER_LO 0x59265359UL 286 287 #define BLOCK_ENDMARK_HI 0x00001772UL 288 #define BLOCK_ENDMARK_LO 0x45385090UL 289 290 /* Increase if necessary. However, a .bz2 file with > 50000 blocks 291 would have an uncompressed size of at least 40GB, so the chances 292 are low you'll need to up this. 293 */ 294 #define BZ_MAX_HANDLED_BLOCKS 50000 295 296 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS]; 297 MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS]; 298 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS]; 299 MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS]; 300 301 Int32 main ( Int32 argc, Char** argv ) 302 { 303 FILE* inFile; 304 FILE* outFile; 305 BitStream* bsIn, *bsWr; 306 Int32 b, wrBlock, currBlock, rbCtr; 307 MaybeUInt64 bitsRead; 308 309 UInt32 buffHi, buffLo, blockCRC; 310 Char* p; 311 312 strncpy ( progName, argv[0], BZ_MAX_FILENAME-1); 313 progName[BZ_MAX_FILENAME-1]='\0'; 314 inFileName[0] = outFileName[0] = 0; 315 316 fprintf ( stderr, 317 "bzip2recover 1.0.8: extracts blocks from damaged .bz2 files.\n" ); 318 319 if (argc != 2) { 320 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", 321 progName, progName ); 322 switch (sizeof(MaybeUInt64)) { 323 case 8: 324 fprintf(stderr, 325 "\trestrictions on size of recovered file: None\n"); 326 break; 327 case 4: 328 fprintf(stderr, 329 "\trestrictions on size of recovered file: 512 MB\n"); 330 fprintf(stderr, 331 "\tto circumvent, recompile with MaybeUInt64 as an\n" 332 "\tunsigned 64-bit int.\n"); 333 break; 334 default: 335 fprintf(stderr, 336 "\tsizeof(MaybeUInt64) is not 4 or 8 -- " 337 "configuration error.\n"); 338 break; 339 } 340 exit(1); 341 } 342 343 if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { 344 fprintf ( stderr, 345 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", 346 progName, (int)strlen(argv[1]) ); 347 exit(1); 348 } 349 350 strcpy ( inFileName, argv[1] ); 351 352 inFile = fopen ( inFileName, "rb" ); 353 if (inFile == NULL) { 354 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName ); 355 exit(1); 356 } 357 358 bsIn = bsOpenReadStream ( inFile ); 359 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName ); 360 361 bitsRead = 0; 362 buffHi = buffLo = 0; 363 currBlock = 0; 364 bStart[currBlock] = 0; 365 366 rbCtr = 0; 367 368 while (True) { 369 b = bsGetBit ( bsIn ); 370 bitsRead++; 371 if (b == 2) { 372 if (bitsRead >= bStart[currBlock] && 373 (bitsRead - bStart[currBlock]) >= 40) { 374 bEnd[currBlock] = bitsRead-1; 375 if (currBlock > 0) 376 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT 377 " to " MaybeUInt64_FMT " (incomplete)\n", 378 currBlock, bStart[currBlock], bEnd[currBlock] ); 379 } else 380 currBlock--; 381 break; 382 } 383 buffHi = (buffHi << 1) | (buffLo >> 31); 384 buffLo = (buffLo << 1) | (b & 1); 385 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI 386 && buffLo == BLOCK_HEADER_LO) 387 || 388 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI 389 && buffLo == BLOCK_ENDMARK_LO) 390 ) { 391 if (bitsRead > 49) { 392 bEnd[currBlock] = bitsRead-49; 393 } else { 394 bEnd[currBlock] = 0; 395 } 396 if (currBlock > 0 && 397 (bEnd[currBlock] - bStart[currBlock]) >= 130) { 398 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT 399 " to " MaybeUInt64_FMT "\n", 400 rbCtr+1, bStart[currBlock], bEnd[currBlock] ); 401 rbStart[rbCtr] = bStart[currBlock]; 402 rbEnd[rbCtr] = bEnd[currBlock]; 403 rbCtr++; 404 } 405 if (currBlock >= BZ_MAX_HANDLED_BLOCKS) 406 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS); 407 currBlock++; 408 409 bStart[currBlock] = bitsRead; 410 } 411 } 412 413 bsClose ( bsIn ); 414 415 /*-- identified blocks run from 1 to rbCtr inclusive. --*/ 416 417 if (rbCtr < 1) { 418 fprintf ( stderr, 419 "%s: sorry, I couldn't find any block boundaries.\n", 420 progName ); 421 exit(1); 422 }; 423 424 fprintf ( stderr, "%s: splitting into blocks\n", progName ); 425 426 inFile = fopen ( inFileName, "rb" ); 427 if (inFile == NULL) { 428 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName ); 429 exit(1); 430 } 431 bsIn = bsOpenReadStream ( inFile ); 432 433 /*-- placate gcc's dataflow analyser --*/ 434 blockCRC = 0; bsWr = 0; 435 436 bitsRead = 0; 437 outFile = NULL; 438 wrBlock = 0; 439 while (True) { 440 b = bsGetBit(bsIn); 441 if (b == 2) break; 442 buffHi = (buffHi << 1) | (buffLo >> 31); 443 buffLo = (buffLo << 1) | (b & 1); 444 if (bitsRead == 47+rbStart[wrBlock]) 445 blockCRC = (buffHi << 16) | (buffLo >> 16); 446 447 if (outFile != NULL && bitsRead >= rbStart[wrBlock] 448 && bitsRead <= rbEnd[wrBlock]) { 449 bsPutBit ( bsWr, b ); 450 } 451 452 bitsRead++; 453 454 if (bitsRead == rbEnd[wrBlock]+1) { 455 if (outFile != NULL) { 456 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 ); 457 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 ); 458 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 ); 459 bsPutUInt32 ( bsWr, blockCRC ); 460 bsClose ( bsWr ); 461 outFile = NULL; 462 } 463 if (wrBlock >= rbCtr) break; 464 wrBlock++; 465 } else 466 if (bitsRead == rbStart[wrBlock]) { 467 /* Create the output file name, correctly handling leading paths. 468 (31.10.2001 by Sergey E. Kusikov) */ 469 Char* split; 470 Int32 ofs, k; 471 for (k = 0; k < BZ_MAX_FILENAME; k++) 472 outFileName[k] = 0; 473 strcpy (outFileName, inFileName); 474 split = strrchr (outFileName, BZ_SPLIT_SYM); 475 if (split == NULL) { 476 split = outFileName; 477 } else { 478 ++split; 479 } 480 /* Now split points to the start of the basename. */ 481 ofs = split - outFileName; 482 sprintf (split, "rec%5d", wrBlock+1); 483 for (p = split; *p != 0; p++) if (*p == ' ') *p = '0'; 484 strcat (outFileName, inFileName + ofs); 485 486 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); 487 488 fprintf ( stderr, " writing block %d to `%s' ...\n", 489 wrBlock+1, outFileName ); 490 491 outFile = fopen ( outFileName, "wb" ); 492 if (outFile == NULL) { 493 fprintf ( stderr, "%s: can't write `%s'\n", 494 progName, outFileName ); 495 exit(1); 496 } 497 bsWr = bsOpenWriteStream ( outFile ); 498 bsPutUChar ( bsWr, BZ_HDR_B ); 499 bsPutUChar ( bsWr, BZ_HDR_Z ); 500 bsPutUChar ( bsWr, BZ_HDR_h ); 501 bsPutUChar ( bsWr, BZ_HDR_0 + 9 ); 502 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); 503 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); 504 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); 505 } 506 } 507 508 fprintf ( stderr, "%s: finished\n", progName ); 509 return 0; 510 } 511 512 513 514 /*-----------------------------------------------------------*/ 515 /*--- end bzip2recover.c ---*/ 516 /*-----------------------------------------------------------*/ 517