1 /*-----------------------------------------------------------*/
2 /*--- Block recoverer program for bzip2 ---*/
3 /*--- bzip2recover.c ---*/
4 /*-----------------------------------------------------------*/
5
6 /* ------------------------------------------------------------------
7 This file is part of bzip2/libbzip2, a program and library for
8 lossless, block-sorting data compression.
9
10 bzip2/libbzip2 version 1.0.8 of 13 July 2019
11 Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
12
13 Please read the WARNING, DISCLAIMER and PATENTS sections in the
14 README file.
15
16 This program is released under the terms of the license contained
17 in the file LICENSE.
18 ------------------------------------------------------------------ */
19
20 /* This program is a complete hack and should be rewritten properly.
21 It isn't very complicated. */
22
23 #include <stdio.h>
24 #include <errno.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28
29 /* This program records bit locations in the file to be recovered.
30 That means that if 64-bit ints are not supported, we will not
31 be able to recover .bz2 files over 512MB (2^32 bits) long.
32 On GNU supported platforms, we take advantage of the 64-bit
33 int support to circumvent this problem. Ditto MSVC.
34
35 This change occurred in version 1.0.2; all prior versions have
36 the 512MB limitation.
37 */
38 #ifdef __GNUC__
39 typedef unsigned long long int MaybeUInt64;
40 # define MaybeUInt64_FMT "%llu"
41 #else
42 #ifdef _MSC_VER
43 typedef unsigned __int64 MaybeUInt64;
44 # define MaybeUInt64_FMT "%I64u"
45 #else
46 typedef unsigned int MaybeUInt64;
47 # define MaybeUInt64_FMT "%u"
48 #endif
49 #endif
50
51 typedef unsigned int UInt32;
52 typedef int Int32;
53 typedef unsigned char UChar;
54 typedef char Char;
55 typedef unsigned char Bool;
56 #define True ((Bool)1)
57 #define False ((Bool)0)
58
59
60 #define BZ_MAX_FILENAME 2000
61
62 Char inFileName[BZ_MAX_FILENAME];
63 Char outFileName[BZ_MAX_FILENAME];
64 Char progName[BZ_MAX_FILENAME];
65
66 MaybeUInt64 bytesOut = 0;
67 MaybeUInt64 bytesIn = 0;
68
69
70 /*---------------------------------------------------*/
71 /*--- Header bytes ---*/
72 /*---------------------------------------------------*/
73
74 #define BZ_HDR_B 0x42 /* 'B' */
75 #define BZ_HDR_Z 0x5a /* 'Z' */
76 #define BZ_HDR_h 0x68 /* 'h' */
77 #define BZ_HDR_0 0x30 /* '0' */
78
79
80 /*---------------------------------------------------*/
81 /*--- I/O errors ---*/
82 /*---------------------------------------------------*/
83
84 /*---------------------------------------------*/
readError(void)85 static void readError ( void )
86 {
87 fprintf ( stderr,
88 "%s: I/O error reading `%s', possible reason follows.\n",
89 progName, inFileName );
90 perror ( progName );
91 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
92 progName );
93 exit ( 1 );
94 }
95
96
97 /*---------------------------------------------*/
writeError(void)98 static void writeError ( void )
99 {
100 fprintf ( stderr,
101 "%s: I/O error reading `%s', possible reason follows.\n",
102 progName, inFileName );
103 perror ( progName );
104 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
105 progName );
106 exit ( 1 );
107 }
108
109
110 /*---------------------------------------------*/
mallocFail(Int32 n)111 static void mallocFail ( Int32 n )
112 {
113 fprintf ( stderr,
114 "%s: malloc failed on request for %d bytes.\n",
115 progName, n );
116 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
117 progName );
118 exit ( 1 );
119 }
120
121
122 /*---------------------------------------------*/
tooManyBlocks(Int32 max_handled_blocks)123 static void tooManyBlocks ( Int32 max_handled_blocks )
124 {
125 fprintf ( stderr,
126 "%s: `%s' appears to contain more than %d blocks\n",
127 progName, inFileName, max_handled_blocks );
128 fprintf ( stderr,
129 "%s: and cannot be handled. To fix, increase\n",
130 progName );
131 fprintf ( stderr,
132 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
133 progName );
134 exit ( 1 );
135 }
136
137
138
139 /*---------------------------------------------------*/
140 /*--- Bit stream I/O ---*/
141 /*---------------------------------------------------*/
142
143 typedef
144 struct {
145 FILE* handle;
146 Int32 buffer;
147 Int32 buffLive;
148 Char mode;
149 }
150 BitStream;
151
152
153 /*---------------------------------------------*/
bsOpenReadStream(FILE * stream)154 static BitStream* bsOpenReadStream ( FILE* stream )
155 {
156 BitStream *bs = malloc ( sizeof(BitStream) );
157 if (bs == NULL) mallocFail ( sizeof(BitStream) );
158 bs->handle = stream;
159 bs->buffer = 0;
160 bs->buffLive = 0;
161 bs->mode = 'r';
162 return bs;
163 }
164
165
166 /*---------------------------------------------*/
bsOpenWriteStream(FILE * stream)167 static BitStream* bsOpenWriteStream ( FILE* stream )
168 {
169 BitStream *bs = malloc ( sizeof(BitStream) );
170 if (bs == NULL) mallocFail ( sizeof(BitStream) );
171 bs->handle = stream;
172 bs->buffer = 0;
173 bs->buffLive = 0;
174 bs->mode = 'w';
175 return bs;
176 }
177
178
179 /*---------------------------------------------*/
bsPutBit(BitStream * bs,Int32 bit)180 static void bsPutBit ( BitStream* bs, Int32 bit )
181 {
182 if (bs->buffLive == 8) {
183 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
184 if (retVal == EOF) writeError();
185 bytesOut++;
186 bs->buffLive = 1;
187 bs->buffer = bit & 0x1;
188 } else {
189 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
190 bs->buffLive++;
191 };
192 }
193
194
195 /*---------------------------------------------*/
196 /*--
197 Returns 0 or 1, or 2 to indicate EOF.
198 --*/
bsGetBit(BitStream * bs)199 static Int32 bsGetBit ( BitStream* bs )
200 {
201 if (bs->buffLive > 0) {
202 bs->buffLive --;
203 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
204 } else {
205 Int32 retVal = getc ( bs->handle );
206 if ( retVal == EOF ) {
207 if (errno != 0) readError();
208 return 2;
209 }
210 bs->buffLive = 7;
211 bs->buffer = retVal;
212 return ( ((bs->buffer) >> 7) & 0x1 );
213 }
214 }
215
216
217 /*---------------------------------------------*/
bsClose(BitStream * bs)218 static void bsClose ( BitStream* bs )
219 {
220 Int32 retVal;
221
222 if ( bs->mode == 'w' ) {
223 while ( bs->buffLive < 8 ) {
224 bs->buffLive++;
225 bs->buffer <<= 1;
226 };
227 retVal = putc ( (UChar) (bs->buffer), bs->handle );
228 if (retVal == EOF) writeError();
229 bytesOut++;
230 retVal = fflush ( bs->handle );
231 if (retVal == EOF) writeError();
232 }
233 retVal = fclose ( bs->handle );
234 if (retVal == EOF) {
235 if (bs->mode == 'w') writeError(); else readError();
236 }
237 free ( bs );
238 }
239
240
241 /*---------------------------------------------*/
bsPutUChar(BitStream * bs,UChar c)242 static void bsPutUChar ( BitStream* bs, UChar c )
243 {
244 Int32 i;
245 for (i = 7; i >= 0; i--)
246 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
247 }
248
249
250 /*---------------------------------------------*/
bsPutUInt32(BitStream * bs,UInt32 c)251 static void bsPutUInt32 ( BitStream* bs, UInt32 c )
252 {
253 Int32 i;
254
255 for (i = 31; i >= 0; i--)
256 bsPutBit ( bs, (c >> i) & 0x1 );
257 }
258
259
260 /*---------------------------------------------*/
endsInBz2(Char * name)261 static Bool endsInBz2 ( Char* name )
262 {
263 Int32 n = strlen ( name );
264 if (n <= 4) return False;
265 return
266 (name[n-4] == '.' &&
267 name[n-3] == 'b' &&
268 name[n-2] == 'z' &&
269 name[n-1] == '2');
270 }
271
272
273 /*---------------------------------------------------*/
274 /*--- ---*/
275 /*---------------------------------------------------*/
276
277 /* This logic isn't really right when it comes to Cygwin. */
278 #ifdef _WIN32
279 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */
280 #else
281 # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
282 #endif
283
284 #define BLOCK_HEADER_HI 0x00003141UL
285 #define BLOCK_HEADER_LO 0x59265359UL
286
287 #define BLOCK_ENDMARK_HI 0x00001772UL
288 #define BLOCK_ENDMARK_LO 0x45385090UL
289
290 /* Increase if necessary. However, a .bz2 file with > 50000 blocks
291 would have an uncompressed size of at least 40GB, so the chances
292 are low you'll need to up this.
293 */
294 #define BZ_MAX_HANDLED_BLOCKS 50000
295
296 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
297 MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS];
298 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
299 MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS];
300
main(Int32 argc,Char ** argv)301 Int32 main ( Int32 argc, Char** argv )
302 {
303 FILE* inFile;
304 FILE* outFile;
305 BitStream* bsIn, *bsWr;
306 Int32 b, wrBlock, currBlock, rbCtr;
307 MaybeUInt64 bitsRead;
308
309 UInt32 buffHi, buffLo, blockCRC;
310 Char* p;
311
312 strncpy ( progName, argv[0], BZ_MAX_FILENAME-1);
313 progName[BZ_MAX_FILENAME-1]='\0';
314 inFileName[0] = outFileName[0] = 0;
315
316 fprintf ( stderr,
317 "bzip2recover 1.0.8: extracts blocks from damaged .bz2 files.\n" );
318
319 if (argc != 2) {
320 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
321 progName, progName );
322 switch (sizeof(MaybeUInt64)) {
323 case 8:
324 fprintf(stderr,
325 "\trestrictions on size of recovered file: None\n");
326 break;
327 case 4:
328 fprintf(stderr,
329 "\trestrictions on size of recovered file: 512 MB\n");
330 fprintf(stderr,
331 "\tto circumvent, recompile with MaybeUInt64 as an\n"
332 "\tunsigned 64-bit int.\n");
333 break;
334 default:
335 fprintf(stderr,
336 "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
337 "configuration error.\n");
338 break;
339 }
340 exit(1);
341 }
342
343 if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
344 fprintf ( stderr,
345 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
346 progName, (int)strlen(argv[1]) );
347 exit(1);
348 }
349
350 strcpy ( inFileName, argv[1] );
351
352 inFile = fopen ( inFileName, "rb" );
353 if (inFile == NULL) {
354 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
355 exit(1);
356 }
357
358 bsIn = bsOpenReadStream ( inFile );
359 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
360
361 bitsRead = 0;
362 buffHi = buffLo = 0;
363 currBlock = 0;
364 bStart[currBlock] = 0;
365
366 rbCtr = 0;
367
368 while (True) {
369 b = bsGetBit ( bsIn );
370 bitsRead++;
371 if (b == 2) {
372 if (bitsRead >= bStart[currBlock] &&
373 (bitsRead - bStart[currBlock]) >= 40) {
374 bEnd[currBlock] = bitsRead-1;
375 if (currBlock > 0)
376 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
377 " to " MaybeUInt64_FMT " (incomplete)\n",
378 currBlock, bStart[currBlock], bEnd[currBlock] );
379 } else
380 currBlock--;
381 break;
382 }
383 buffHi = (buffHi << 1) | (buffLo >> 31);
384 buffLo = (buffLo << 1) | (b & 1);
385 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
386 && buffLo == BLOCK_HEADER_LO)
387 ||
388 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
389 && buffLo == BLOCK_ENDMARK_LO)
390 ) {
391 if (bitsRead > 49) {
392 bEnd[currBlock] = bitsRead-49;
393 } else {
394 bEnd[currBlock] = 0;
395 }
396 if (currBlock > 0 &&
397 (bEnd[currBlock] - bStart[currBlock]) >= 130) {
398 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
399 " to " MaybeUInt64_FMT "\n",
400 rbCtr+1, bStart[currBlock], bEnd[currBlock] );
401 rbStart[rbCtr] = bStart[currBlock];
402 rbEnd[rbCtr] = bEnd[currBlock];
403 rbCtr++;
404 }
405 if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
406 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
407 currBlock++;
408
409 bStart[currBlock] = bitsRead;
410 }
411 }
412
413 bsClose ( bsIn );
414
415 /*-- identified blocks run from 1 to rbCtr inclusive. --*/
416
417 if (rbCtr < 1) {
418 fprintf ( stderr,
419 "%s: sorry, I couldn't find any block boundaries.\n",
420 progName );
421 exit(1);
422 };
423
424 fprintf ( stderr, "%s: splitting into blocks\n", progName );
425
426 inFile = fopen ( inFileName, "rb" );
427 if (inFile == NULL) {
428 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
429 exit(1);
430 }
431 bsIn = bsOpenReadStream ( inFile );
432
433 /*-- placate gcc's dataflow analyser --*/
434 blockCRC = 0; bsWr = 0;
435
436 bitsRead = 0;
437 outFile = NULL;
438 wrBlock = 0;
439 while (True) {
440 b = bsGetBit(bsIn);
441 if (b == 2) break;
442 buffHi = (buffHi << 1) | (buffLo >> 31);
443 buffLo = (buffLo << 1) | (b & 1);
444 if (bitsRead == 47+rbStart[wrBlock])
445 blockCRC = (buffHi << 16) | (buffLo >> 16);
446
447 if (outFile != NULL && bitsRead >= rbStart[wrBlock]
448 && bitsRead <= rbEnd[wrBlock]) {
449 bsPutBit ( bsWr, b );
450 }
451
452 bitsRead++;
453
454 if (bitsRead == rbEnd[wrBlock]+1) {
455 if (outFile != NULL) {
456 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
457 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
458 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
459 bsPutUInt32 ( bsWr, blockCRC );
460 bsClose ( bsWr );
461 outFile = NULL;
462 }
463 if (wrBlock >= rbCtr) break;
464 wrBlock++;
465 } else
466 if (bitsRead == rbStart[wrBlock]) {
467 /* Create the output file name, correctly handling leading paths.
468 (31.10.2001 by Sergey E. Kusikov) */
469 Char* split;
470 Int32 ofs, k;
471 for (k = 0; k < BZ_MAX_FILENAME; k++)
472 outFileName[k] = 0;
473 strcpy (outFileName, inFileName);
474 split = strrchr (outFileName, BZ_SPLIT_SYM);
475 if (split == NULL) {
476 split = outFileName;
477 } else {
478 ++split;
479 }
480 /* Now split points to the start of the basename. */
481 ofs = split - outFileName;
482 sprintf (split, "rec%5d", wrBlock+1);
483 for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
484 strcat (outFileName, inFileName + ofs);
485
486 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
487
488 fprintf ( stderr, " writing block %d to `%s' ...\n",
489 wrBlock+1, outFileName );
490
491 outFile = fopen ( outFileName, "wb" );
492 if (outFile == NULL) {
493 fprintf ( stderr, "%s: can't write `%s'\n",
494 progName, outFileName );
495 exit(1);
496 }
497 bsWr = bsOpenWriteStream ( outFile );
498 bsPutUChar ( bsWr, BZ_HDR_B );
499 bsPutUChar ( bsWr, BZ_HDR_Z );
500 bsPutUChar ( bsWr, BZ_HDR_h );
501 bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
502 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
503 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
504 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
505 }
506 }
507
508 fprintf ( stderr, "%s: finished\n", progName );
509 return 0;
510 }
511
512
513
514 /*-----------------------------------------------------------*/
515 /*--- end bzip2recover.c ---*/
516 /*-----------------------------------------------------------*/
517