xref: /freebsd/contrib/bzip2/bzip2recover.c (revision 81966bce06dac45f42bda62b14dba0756ef28505)
1 /*-----------------------------------------------------------*/
2 /*--- Block recoverer program for bzip2                   ---*/
3 /*---                                      bzip2recover.c ---*/
4 /*-----------------------------------------------------------*/
5 
6 /* ------------------------------------------------------------------
7    This file is part of bzip2/libbzip2, a program and library for
8    lossless, block-sorting data compression.
9 
10    bzip2/libbzip2 version 1.0.6 of 6 September 2010
11    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
12 
13    Please read the WARNING, DISCLAIMER and PATENTS sections in the
14    README file.
15 
16    This program is released under the terms of the license contained
17    in the file LICENSE.
18    ------------------------------------------------------------------ */
19 
20 /* This program is a complete hack and should be rewritten properly.
21 	 It isn't very complicated. */
22 
23 #include <stdio.h>
24 #include <errno.h>
25 #include <stdlib.h>
26 #include <string.h>
27 
28 
29 /* This program records bit locations in the file to be recovered.
30    That means that if 64-bit ints are not supported, we will not
31    be able to recover .bz2 files over 512MB (2^32 bits) long.
32    On GNU supported platforms, we take advantage of the 64-bit
33    int support to circumvent this problem.  Ditto MSVC.
34 
35    This change occurred in version 1.0.2; all prior versions have
36    the 512MB limitation.
37 */
38 #ifdef __GNUC__
39    typedef  unsigned long long int  MaybeUInt64;
40 #  define MaybeUInt64_FMT "%llu"
41 #else
42 #ifdef _MSC_VER
43    typedef  unsigned __int64  MaybeUInt64;
44 #  define MaybeUInt64_FMT "%I64u"
45 #else
46    typedef  unsigned int   MaybeUInt64;
47 #  define MaybeUInt64_FMT "%u"
48 #endif
49 #endif
50 
51 typedef  unsigned int   UInt32;
52 typedef  int            Int32;
53 typedef  unsigned char  UChar;
54 typedef  char           Char;
55 typedef  unsigned char  Bool;
56 #define True    ((Bool)1)
57 #define False   ((Bool)0)
58 
59 
60 #define BZ_MAX_FILENAME 2000
61 
62 Char inFileName[BZ_MAX_FILENAME];
63 Char outFileName[BZ_MAX_FILENAME];
64 Char progName[BZ_MAX_FILENAME];
65 
66 MaybeUInt64 bytesOut = 0;
67 MaybeUInt64 bytesIn  = 0;
68 
69 
70 /*---------------------------------------------------*/
71 /*--- Header bytes                                ---*/
72 /*---------------------------------------------------*/
73 
74 #define BZ_HDR_B 0x42                         /* 'B' */
75 #define BZ_HDR_Z 0x5a                         /* 'Z' */
76 #define BZ_HDR_h 0x68                         /* 'h' */
77 #define BZ_HDR_0 0x30                         /* '0' */
78 
79 
80 /*---------------------------------------------------*/
81 /*--- I/O errors                                  ---*/
82 /*---------------------------------------------------*/
83 
84 /*---------------------------------------------*/
85 static void readError ( void )
86 {
87    fprintf ( stderr,
88              "%s: I/O error reading `%s', possible reason follows.\n",
89             progName, inFileName );
90    perror ( progName );
91    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
92              progName );
93    exit ( 1 );
94 }
95 
96 
97 /*---------------------------------------------*/
98 static void writeError ( void )
99 {
100    fprintf ( stderr,
101              "%s: I/O error reading `%s', possible reason follows.\n",
102             progName, inFileName );
103    perror ( progName );
104    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
105              progName );
106    exit ( 1 );
107 }
108 
109 
110 /*---------------------------------------------*/
111 static void mallocFail ( Int32 n )
112 {
113    fprintf ( stderr,
114              "%s: malloc failed on request for %d bytes.\n",
115             progName, n );
116    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
117              progName );
118    exit ( 1 );
119 }
120 
121 
122 /*---------------------------------------------*/
123 static void tooManyBlocks ( Int32 max_handled_blocks )
124 {
125    fprintf ( stderr,
126              "%s: `%s' appears to contain more than %d blocks\n",
127             progName, inFileName, max_handled_blocks );
128    fprintf ( stderr,
129              "%s: and cannot be handled.  To fix, increase\n",
130              progName );
131    fprintf ( stderr,
132              "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
133              progName );
134    exit ( 1 );
135 }
136 
137 
138 
139 /*---------------------------------------------------*/
140 /*--- Bit stream I/O                              ---*/
141 /*---------------------------------------------------*/
142 
143 typedef
144    struct {
145       FILE*  handle;
146       Int32  buffer;
147       Int32  buffLive;
148       Char   mode;
149    }
150    BitStream;
151 
152 
153 /*---------------------------------------------*/
154 static BitStream* bsOpenReadStream ( FILE* stream )
155 {
156    BitStream *bs = malloc ( sizeof(BitStream) );
157    if (bs == NULL) mallocFail ( sizeof(BitStream) );
158    bs->handle = stream;
159    bs->buffer = 0;
160    bs->buffLive = 0;
161    bs->mode = 'r';
162    return bs;
163 }
164 
165 
166 /*---------------------------------------------*/
167 static BitStream* bsOpenWriteStream ( FILE* stream )
168 {
169    BitStream *bs = malloc ( sizeof(BitStream) );
170    if (bs == NULL) mallocFail ( sizeof(BitStream) );
171    bs->handle = stream;
172    bs->buffer = 0;
173    bs->buffLive = 0;
174    bs->mode = 'w';
175    return bs;
176 }
177 
178 
179 /*---------------------------------------------*/
180 static void bsPutBit ( BitStream* bs, Int32 bit )
181 {
182    if (bs->buffLive == 8) {
183       Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
184       if (retVal == EOF) writeError();
185       bytesOut++;
186       bs->buffLive = 1;
187       bs->buffer = bit & 0x1;
188    } else {
189       bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
190       bs->buffLive++;
191    };
192 }
193 
194 
195 /*---------------------------------------------*/
196 /*--
197    Returns 0 or 1, or 2 to indicate EOF.
198 --*/
199 static Int32 bsGetBit ( BitStream* bs )
200 {
201    if (bs->buffLive > 0) {
202       bs->buffLive --;
203       return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
204    } else {
205       Int32 retVal = getc ( bs->handle );
206       if ( retVal == EOF ) {
207          if (errno != 0) readError();
208          return 2;
209       }
210       bs->buffLive = 7;
211       bs->buffer = retVal;
212       return ( ((bs->buffer) >> 7) & 0x1 );
213    }
214 }
215 
216 
217 /*---------------------------------------------*/
218 static void bsClose ( BitStream* bs )
219 {
220    Int32 retVal;
221 
222    if ( bs->mode == 'w' ) {
223       while ( bs->buffLive < 8 ) {
224          bs->buffLive++;
225          bs->buffer <<= 1;
226       };
227       retVal = putc ( (UChar) (bs->buffer), bs->handle );
228       if (retVal == EOF) writeError();
229       bytesOut++;
230       retVal = fflush ( bs->handle );
231       if (retVal == EOF) writeError();
232    }
233    retVal = fclose ( bs->handle );
234    if (retVal == EOF) {
235       if (bs->mode == 'w') writeError(); else readError();
236    }
237    free ( bs );
238 }
239 
240 
241 /*---------------------------------------------*/
242 static void bsPutUChar ( BitStream* bs, UChar c )
243 {
244    Int32 i;
245    for (i = 7; i >= 0; i--)
246       bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
247 }
248 
249 
250 /*---------------------------------------------*/
251 static void bsPutUInt32 ( BitStream* bs, UInt32 c )
252 {
253    Int32 i;
254 
255    for (i = 31; i >= 0; i--)
256       bsPutBit ( bs, (c >> i) & 0x1 );
257 }
258 
259 
260 /*---------------------------------------------*/
261 static Bool endsInBz2 ( Char* name )
262 {
263    Int32 n = strlen ( name );
264    if (n <= 4) return False;
265    return
266       (name[n-4] == '.' &&
267        name[n-3] == 'b' &&
268        name[n-2] == 'z' &&
269        name[n-1] == '2');
270 }
271 
272 
273 /*---------------------------------------------------*/
274 /*---                                             ---*/
275 /*---------------------------------------------------*/
276 
277 /* This logic isn't really right when it comes to Cygwin. */
278 #ifdef _WIN32
279 #  define  BZ_SPLIT_SYM  '\\'  /* path splitter on Windows platform */
280 #else
281 #  define  BZ_SPLIT_SYM  '/'   /* path splitter on Unix platform */
282 #endif
283 
284 #define BLOCK_HEADER_HI  0x00003141UL
285 #define BLOCK_HEADER_LO  0x59265359UL
286 
287 #define BLOCK_ENDMARK_HI 0x00001772UL
288 #define BLOCK_ENDMARK_LO 0x45385090UL
289 
290 /* Increase if necessary.  However, a .bz2 file with > 50000 blocks
291    would have an uncompressed size of at least 40GB, so the chances
292    are low you'll need to up this.
293 */
294 #define BZ_MAX_HANDLED_BLOCKS 50000
295 
296 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
297 MaybeUInt64 bEnd   [BZ_MAX_HANDLED_BLOCKS];
298 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
299 MaybeUInt64 rbEnd  [BZ_MAX_HANDLED_BLOCKS];
300 
301 Int32 main ( Int32 argc, Char** argv )
302 {
303    FILE*       inFile;
304    FILE*       outFile;
305    BitStream*  bsIn, *bsWr;
306    Int32       b, wrBlock, currBlock, rbCtr;
307    MaybeUInt64 bitsRead;
308 
309    UInt32      buffHi, buffLo, blockCRC;
310    Char*       p;
311 
312    strcpy ( progName, argv[0] );
313    inFileName[0] = outFileName[0] = 0;
314 
315    fprintf ( stderr,
316              "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n" );
317 
318    if (argc != 2) {
319       fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
320                         progName, progName );
321       switch (sizeof(MaybeUInt64)) {
322          case 8:
323             fprintf(stderr,
324                     "\trestrictions on size of recovered file: None\n");
325             break;
326          case 4:
327             fprintf(stderr,
328                     "\trestrictions on size of recovered file: 512 MB\n");
329             fprintf(stderr,
330                     "\tto circumvent, recompile with MaybeUInt64 as an\n"
331                     "\tunsigned 64-bit int.\n");
332             break;
333          default:
334             fprintf(stderr,
335                     "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
336                     "configuration error.\n");
337             break;
338       }
339       exit(1);
340    }
341 
342    if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
343       fprintf ( stderr,
344                 "%s: supplied filename is suspiciously (>= %d chars) long.  Bye!\n",
345                 progName, (int)strlen(argv[1]) );
346       exit(1);
347    }
348 
349    strcpy ( inFileName, argv[1] );
350 
351    inFile = fopen ( inFileName, "rb" );
352    if (inFile == NULL) {
353       fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
354       exit(1);
355    }
356 
357    bsIn = bsOpenReadStream ( inFile );
358    fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
359 
360    bitsRead = 0;
361    buffHi = buffLo = 0;
362    currBlock = 0;
363    bStart[currBlock] = 0;
364 
365    rbCtr = 0;
366 
367    while (True) {
368       b = bsGetBit ( bsIn );
369       bitsRead++;
370       if (b == 2) {
371          if (bitsRead >= bStart[currBlock] &&
372             (bitsRead - bStart[currBlock]) >= 40) {
373             bEnd[currBlock] = bitsRead-1;
374             if (currBlock > 0)
375                fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
376                                  " to " MaybeUInt64_FMT " (incomplete)\n",
377                          currBlock,  bStart[currBlock], bEnd[currBlock] );
378          } else
379             currBlock--;
380          break;
381       }
382       buffHi = (buffHi << 1) | (buffLo >> 31);
383       buffLo = (buffLo << 1) | (b & 1);
384       if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
385              && buffLo == BLOCK_HEADER_LO)
386            ||
387            ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
388              && buffLo == BLOCK_ENDMARK_LO)
389          ) {
390          if (bitsRead > 49) {
391             bEnd[currBlock] = bitsRead-49;
392          } else {
393             bEnd[currBlock] = 0;
394          }
395          if (currBlock > 0 &&
396 	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
397             fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
398                               " to " MaybeUInt64_FMT "\n",
399                       rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
400             rbStart[rbCtr] = bStart[currBlock];
401             rbEnd[rbCtr] = bEnd[currBlock];
402             rbCtr++;
403          }
404          if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
405             tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
406          currBlock++;
407 
408          bStart[currBlock] = bitsRead;
409       }
410    }
411 
412    bsClose ( bsIn );
413 
414    /*-- identified blocks run from 1 to rbCtr inclusive. --*/
415 
416    if (rbCtr < 1) {
417       fprintf ( stderr,
418                 "%s: sorry, I couldn't find any block boundaries.\n",
419                 progName );
420       exit(1);
421    };
422 
423    fprintf ( stderr, "%s: splitting into blocks\n", progName );
424 
425    inFile = fopen ( inFileName, "rb" );
426    if (inFile == NULL) {
427       fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
428       exit(1);
429    }
430    bsIn = bsOpenReadStream ( inFile );
431 
432    /*-- placate gcc's dataflow analyser --*/
433    blockCRC = 0; bsWr = 0;
434 
435    bitsRead = 0;
436    outFile = NULL;
437    wrBlock = 0;
438    while (True) {
439       b = bsGetBit(bsIn);
440       if (b == 2) break;
441       buffHi = (buffHi << 1) | (buffLo >> 31);
442       buffLo = (buffLo << 1) | (b & 1);
443       if (bitsRead == 47+rbStart[wrBlock])
444          blockCRC = (buffHi << 16) | (buffLo >> 16);
445 
446       if (outFile != NULL && bitsRead >= rbStart[wrBlock]
447                           && bitsRead <= rbEnd[wrBlock]) {
448          bsPutBit ( bsWr, b );
449       }
450 
451       bitsRead++;
452 
453       if (bitsRead == rbEnd[wrBlock]+1) {
454          if (outFile != NULL) {
455             bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
456             bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
457             bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
458             bsPutUInt32 ( bsWr, blockCRC );
459             bsClose ( bsWr );
460          }
461          if (wrBlock >= rbCtr) break;
462          wrBlock++;
463       } else
464       if (bitsRead == rbStart[wrBlock]) {
465          /* Create the output file name, correctly handling leading paths.
466             (31.10.2001 by Sergey E. Kusikov) */
467          Char* split;
468          Int32 ofs, k;
469          for (k = 0; k < BZ_MAX_FILENAME; k++)
470             outFileName[k] = 0;
471          strcpy (outFileName, inFileName);
472          split = strrchr (outFileName, BZ_SPLIT_SYM);
473          if (split == NULL) {
474             split = outFileName;
475          } else {
476             ++split;
477 	 }
478 	 /* Now split points to the start of the basename. */
479          ofs  = split - outFileName;
480          sprintf (split, "rec%5d", wrBlock+1);
481          for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
482          strcat (outFileName, inFileName + ofs);
483 
484          if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
485 
486          fprintf ( stderr, "   writing block %d to `%s' ...\n",
487                            wrBlock+1, outFileName );
488 
489          outFile = fopen ( outFileName, "wb" );
490          if (outFile == NULL) {
491             fprintf ( stderr, "%s: can't write `%s'\n",
492                       progName, outFileName );
493             exit(1);
494          }
495          bsWr = bsOpenWriteStream ( outFile );
496          bsPutUChar ( bsWr, BZ_HDR_B );
497          bsPutUChar ( bsWr, BZ_HDR_Z );
498          bsPutUChar ( bsWr, BZ_HDR_h );
499          bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
500          bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
501          bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
502          bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
503       }
504    }
505 
506    fprintf ( stderr, "%s: finished\n", progName );
507    return 0;
508 }
509 
510 
511 
512 /*-----------------------------------------------------------*/
513 /*--- end                                  bzip2recover.c ---*/
514 /*-----------------------------------------------------------*/
515