xref: /freebsd/contrib/bzip2/bzip2recover.c (revision 3ff369fed2a08f32dda232c10470b949bef9489f)
1 
2 /*-----------------------------------------------------------*/
3 /*--- Block recoverer program for bzip2                   ---*/
4 /*---                                      bzip2recover.c ---*/
5 /*-----------------------------------------------------------*/
6 
7 /*--
8   This program is bzip2recover, a program to attempt data
9   salvage from damaged files created by the accompanying
10   bzip2-1.0 program.
11 
12   Copyright (C) 1996-2002 Julian R Seward.  All rights reserved.
13 
14   Redistribution and use in source and binary forms, with or without
15   modification, are permitted provided that the following conditions
16   are met:
17 
18   1. Redistributions of source code must retain the above copyright
19      notice, this list of conditions and the following disclaimer.
20 
21   2. The origin of this software must not be misrepresented; you must
22      not claim that you wrote the original software.  If you use this
23      software in a product, an acknowledgment in the product
24      documentation would be appreciated but is not required.
25 
26   3. Altered source versions must be plainly marked as such, and must
27      not be misrepresented as being the original software.
28 
29   4. The name of the author may not be used to endorse or promote
30      products derived from this software without specific prior written
31      permission.
32 
33   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
39   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
41   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
42   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44 
45   Julian Seward, Cambridge, UK.
46   jseward@acm.org
47   bzip2/libbzip2 version 1.0 of 21 March 2000
48 --*/
49 
50 /*--
51   This program is a complete hack and should be rewritten
52   properly.  It isn't very complicated.
53 --*/
54 
55 #include <stdio.h>
56 #include <errno.h>
57 #include <stdlib.h>
58 #include <string.h>
59 
60 
61 /* This program records bit locations in the file to be recovered.
62    That means that if 64-bit ints are not supported, we will not
63    be able to recover .bz2 files over 512MB (2^32 bits) long.
64    On GNU supported platforms, we take advantage of the 64-bit
65    int support to circumvent this problem.  Ditto MSVC.
66 
67    This change occurred in version 1.0.2; all prior versions have
68    the 512MB limitation.
69 */
70 #ifdef __GNUC__
71    typedef  unsigned long long int  MaybeUInt64;
72 #  define MaybeUInt64_FMT "%Lu"
73 #else
74 #ifdef _MSC_VER
75    typedef  unsigned __int64  MaybeUInt64;
76 #  define MaybeUInt64_FMT "%I64u"
77 #else
78    typedef  unsigned int   MaybeUInt64;
79 #  define MaybeUInt64_FMT "%u"
80 #endif
81 #endif
82 
83 typedef  unsigned int   UInt32;
84 typedef  int            Int32;
85 typedef  unsigned char  UChar;
86 typedef  char           Char;
87 typedef  unsigned char  Bool;
88 #define True    ((Bool)1)
89 #define False   ((Bool)0)
90 
91 
92 #define BZ_MAX_FILENAME 2000
93 
94 Char inFileName[BZ_MAX_FILENAME];
95 Char outFileName[BZ_MAX_FILENAME];
96 Char progName[BZ_MAX_FILENAME];
97 
98 MaybeUInt64 bytesOut = 0;
99 MaybeUInt64 bytesIn  = 0;
100 
101 
102 /*---------------------------------------------------*/
103 /*--- Header bytes                                ---*/
104 /*---------------------------------------------------*/
105 
106 #define BZ_HDR_B 0x42                         /* 'B' */
107 #define BZ_HDR_Z 0x5a                         /* 'Z' */
108 #define BZ_HDR_h 0x68                         /* 'h' */
109 #define BZ_HDR_0 0x30                         /* '0' */
110 
111 
112 /*---------------------------------------------------*/
113 /*--- I/O errors                                  ---*/
114 /*---------------------------------------------------*/
115 
116 /*---------------------------------------------*/
117 void readError ( void )
118 {
119    fprintf ( stderr,
120              "%s: I/O error reading `%s', possible reason follows.\n",
121             progName, inFileName );
122    perror ( progName );
123    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
124              progName );
125    exit ( 1 );
126 }
127 
128 
129 /*---------------------------------------------*/
130 void writeError ( void )
131 {
132    fprintf ( stderr,
133              "%s: I/O error reading `%s', possible reason follows.\n",
134             progName, inFileName );
135    perror ( progName );
136    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
137              progName );
138    exit ( 1 );
139 }
140 
141 
142 /*---------------------------------------------*/
143 void mallocFail ( Int32 n )
144 {
145    fprintf ( stderr,
146              "%s: malloc failed on request for %d bytes.\n",
147             progName, n );
148    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
149              progName );
150    exit ( 1 );
151 }
152 
153 
154 /*---------------------------------------------*/
155 void tooManyBlocks ( Int32 max_handled_blocks )
156 {
157    fprintf ( stderr,
158              "%s: `%s' appears to contain more than %d blocks\n",
159             progName, inFileName, max_handled_blocks );
160    fprintf ( stderr,
161              "%s: and cannot be handled.  To fix, increase\n",
162              progName );
163    fprintf ( stderr,
164              "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
165              progName );
166    exit ( 1 );
167 }
168 
169 
170 
171 /*---------------------------------------------------*/
172 /*--- Bit stream I/O                              ---*/
173 /*---------------------------------------------------*/
174 
175 typedef
176    struct {
177       FILE*  handle;
178       Int32  buffer;
179       Int32  buffLive;
180       Char   mode;
181    }
182    BitStream;
183 
184 
185 /*---------------------------------------------*/
186 BitStream* bsOpenReadStream ( FILE* stream )
187 {
188    BitStream *bs = malloc ( sizeof(BitStream) );
189    if (bs == NULL) mallocFail ( sizeof(BitStream) );
190    bs->handle = stream;
191    bs->buffer = 0;
192    bs->buffLive = 0;
193    bs->mode = 'r';
194    return bs;
195 }
196 
197 
198 /*---------------------------------------------*/
199 BitStream* bsOpenWriteStream ( FILE* stream )
200 {
201    BitStream *bs = malloc ( sizeof(BitStream) );
202    if (bs == NULL) mallocFail ( sizeof(BitStream) );
203    bs->handle = stream;
204    bs->buffer = 0;
205    bs->buffLive = 0;
206    bs->mode = 'w';
207    return bs;
208 }
209 
210 
211 /*---------------------------------------------*/
212 void bsPutBit ( BitStream* bs, Int32 bit )
213 {
214    if (bs->buffLive == 8) {
215       Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
216       if (retVal == EOF) writeError();
217       bytesOut++;
218       bs->buffLive = 1;
219       bs->buffer = bit & 0x1;
220    } else {
221       bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
222       bs->buffLive++;
223    };
224 }
225 
226 
227 /*---------------------------------------------*/
228 /*--
229    Returns 0 or 1, or 2 to indicate EOF.
230 --*/
231 Int32 bsGetBit ( BitStream* bs )
232 {
233    if (bs->buffLive > 0) {
234       bs->buffLive --;
235       return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
236    } else {
237       Int32 retVal = getc ( bs->handle );
238       if ( retVal == EOF ) {
239          if (errno != 0) readError();
240          return 2;
241       }
242       bs->buffLive = 7;
243       bs->buffer = retVal;
244       return ( ((bs->buffer) >> 7) & 0x1 );
245    }
246 }
247 
248 
249 /*---------------------------------------------*/
250 void bsClose ( BitStream* bs )
251 {
252    Int32 retVal;
253 
254    if ( bs->mode == 'w' ) {
255       while ( bs->buffLive < 8 ) {
256          bs->buffLive++;
257          bs->buffer <<= 1;
258       };
259       retVal = putc ( (UChar) (bs->buffer), bs->handle );
260       if (retVal == EOF) writeError();
261       bytesOut++;
262       retVal = fflush ( bs->handle );
263       if (retVal == EOF) writeError();
264    }
265    retVal = fclose ( bs->handle );
266    if (retVal == EOF) {
267       if (bs->mode == 'w') writeError(); else readError();
268    }
269    free ( bs );
270 }
271 
272 
273 /*---------------------------------------------*/
274 void bsPutUChar ( BitStream* bs, UChar c )
275 {
276    Int32 i;
277    for (i = 7; i >= 0; i--)
278       bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
279 }
280 
281 
282 /*---------------------------------------------*/
283 void bsPutUInt32 ( BitStream* bs, UInt32 c )
284 {
285    Int32 i;
286 
287    for (i = 31; i >= 0; i--)
288       bsPutBit ( bs, (c >> i) & 0x1 );
289 }
290 
291 
292 /*---------------------------------------------*/
293 Bool endsInBz2 ( Char* name )
294 {
295    Int32 n = strlen ( name );
296    if (n <= 4) return False;
297    return
298       (name[n-4] == '.' &&
299        name[n-3] == 'b' &&
300        name[n-2] == 'z' &&
301        name[n-1] == '2');
302 }
303 
304 
305 /*---------------------------------------------------*/
306 /*---                                             ---*/
307 /*---------------------------------------------------*/
308 
309 /* This logic isn't really right when it comes to Cygwin. */
310 #ifdef _WIN32
311 #  define  BZ_SPLIT_SYM  '\\'  /* path splitter on Windows platform */
312 #else
313 #  define  BZ_SPLIT_SYM  '/'   /* path splitter on Unix platform */
314 #endif
315 
316 #define BLOCK_HEADER_HI  0x00003141UL
317 #define BLOCK_HEADER_LO  0x59265359UL
318 
319 #define BLOCK_ENDMARK_HI 0x00001772UL
320 #define BLOCK_ENDMARK_LO 0x45385090UL
321 
322 /* Increase if necessary.  However, a .bz2 file with > 50000 blocks
323    would have an uncompressed size of at least 40GB, so the chances
324    are low you'll need to up this.
325 */
326 #define BZ_MAX_HANDLED_BLOCKS 50000
327 
328 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
329 MaybeUInt64 bEnd   [BZ_MAX_HANDLED_BLOCKS];
330 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
331 MaybeUInt64 rbEnd  [BZ_MAX_HANDLED_BLOCKS];
332 
333 Int32 main ( Int32 argc, Char** argv )
334 {
335    FILE*       inFile;
336    FILE*       outFile;
337    BitStream*  bsIn, *bsWr;
338    Int32       b, wrBlock, currBlock, rbCtr;
339    MaybeUInt64 bitsRead;
340 
341    UInt32      buffHi, buffLo, blockCRC;
342    Char*       p;
343 
344    strcpy ( progName, argv[0] );
345    inFileName[0] = outFileName[0] = 0;
346 
347    fprintf ( stderr,
348              "bzip2recover 1.0.2: extracts blocks from damaged .bz2 files.\n" );
349 
350    if (argc != 2) {
351       fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
352                         progName, progName );
353       switch (sizeof(MaybeUInt64)) {
354          case 8:
355             fprintf(stderr,
356                     "\trestrictions on size of recovered file: None\n");
357             break;
358          case 4:
359             fprintf(stderr,
360                     "\trestrictions on size of recovered file: 512 MB\n");
361             fprintf(stderr,
362                     "\tto circumvent, recompile with MaybeUInt64 as an\n"
363                     "\tunsigned 64-bit int.\n");
364             break;
365          default:
366             fprintf(stderr,
367                     "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
368                     "configuration error.\n");
369             break;
370       }
371       exit(1);
372    }
373 
374    if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
375       fprintf ( stderr,
376                 "%s: supplied filename is suspiciously (>= %d chars) long.  Bye!\n",
377                 progName, strlen(argv[1]) );
378       exit(1);
379    }
380 
381    strcpy ( inFileName, argv[1] );
382 
383    inFile = fopen ( inFileName, "rb" );
384    if (inFile == NULL) {
385       fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
386       exit(1);
387    }
388 
389    bsIn = bsOpenReadStream ( inFile );
390    fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
391 
392    bitsRead = 0;
393    buffHi = buffLo = 0;
394    currBlock = 0;
395    bStart[currBlock] = 0;
396 
397    rbCtr = 0;
398 
399    while (True) {
400       b = bsGetBit ( bsIn );
401       bitsRead++;
402       if (b == 2) {
403          if (bitsRead >= bStart[currBlock] &&
404             (bitsRead - bStart[currBlock]) >= 40) {
405             bEnd[currBlock] = bitsRead-1;
406             if (currBlock > 0)
407                fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
408                                  " to " MaybeUInt64_FMT " (incomplete)\n",
409                          currBlock,  bStart[currBlock], bEnd[currBlock] );
410          } else
411             currBlock--;
412          break;
413       }
414       buffHi = (buffHi << 1) | (buffLo >> 31);
415       buffLo = (buffLo << 1) | (b & 1);
416       if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
417              && buffLo == BLOCK_HEADER_LO)
418            ||
419            ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
420              && buffLo == BLOCK_ENDMARK_LO)
421          ) {
422          if (bitsRead > 49) {
423             bEnd[currBlock] = bitsRead-49;
424          } else {
425             bEnd[currBlock] = 0;
426          }
427          if (currBlock > 0 &&
428 	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
429             fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
430                               " to " MaybeUInt64_FMT "\n",
431                       rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
432             rbStart[rbCtr] = bStart[currBlock];
433             rbEnd[rbCtr] = bEnd[currBlock];
434             rbCtr++;
435          }
436          if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
437             tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
438          currBlock++;
439 
440          bStart[currBlock] = bitsRead;
441       }
442    }
443 
444    bsClose ( bsIn );
445 
446    /*-- identified blocks run from 1 to rbCtr inclusive. --*/
447 
448    if (rbCtr < 1) {
449       fprintf ( stderr,
450                 "%s: sorry, I couldn't find any block boundaries.\n",
451                 progName );
452       exit(1);
453    };
454 
455    fprintf ( stderr, "%s: splitting into blocks\n", progName );
456 
457    inFile = fopen ( inFileName, "rb" );
458    if (inFile == NULL) {
459       fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
460       exit(1);
461    }
462    bsIn = bsOpenReadStream ( inFile );
463 
464    /*-- placate gcc's dataflow analyser --*/
465    blockCRC = 0; bsWr = 0;
466 
467    bitsRead = 0;
468    outFile = NULL;
469    wrBlock = 0;
470    while (True) {
471       b = bsGetBit(bsIn);
472       if (b == 2) break;
473       buffHi = (buffHi << 1) | (buffLo >> 31);
474       buffLo = (buffLo << 1) | (b & 1);
475       if (bitsRead == 47+rbStart[wrBlock])
476          blockCRC = (buffHi << 16) | (buffLo >> 16);
477 
478       if (outFile != NULL && bitsRead >= rbStart[wrBlock]
479                           && bitsRead <= rbEnd[wrBlock]) {
480          bsPutBit ( bsWr, b );
481       }
482 
483       bitsRead++;
484 
485       if (bitsRead == rbEnd[wrBlock]+1) {
486          if (outFile != NULL) {
487             bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
488             bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
489             bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
490             bsPutUInt32 ( bsWr, blockCRC );
491             bsClose ( bsWr );
492          }
493          if (wrBlock >= rbCtr) break;
494          wrBlock++;
495       } else
496       if (bitsRead == rbStart[wrBlock]) {
497          /* Create the output file name, correctly handling leading paths.
498             (31.10.2001 by Sergey E. Kusikov) */
499          Char* split;
500          Int32 ofs, k;
501          for (k = 0; k < BZ_MAX_FILENAME; k++)
502             outFileName[k] = 0;
503          strcpy (outFileName, inFileName);
504          split = strrchr (outFileName, BZ_SPLIT_SYM);
505          if (split == NULL) {
506             split = outFileName;
507          } else {
508             ++split;
509 	 }
510 	 /* Now split points to the start of the basename. */
511          ofs  = split - outFileName;
512          sprintf (split, "rec%5d", wrBlock+1);
513          for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
514          strcat (outFileName, inFileName + ofs);
515 
516          if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
517 
518          fprintf ( stderr, "   writing block %d to `%s' ...\n",
519                            wrBlock+1, outFileName );
520 
521          outFile = fopen ( outFileName, "wb" );
522          if (outFile == NULL) {
523             fprintf ( stderr, "%s: can't write `%s'\n",
524                       progName, outFileName );
525             exit(1);
526          }
527          bsWr = bsOpenWriteStream ( outFile );
528          bsPutUChar ( bsWr, BZ_HDR_B );
529          bsPutUChar ( bsWr, BZ_HDR_Z );
530          bsPutUChar ( bsWr, BZ_HDR_h );
531          bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
532          bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
533          bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
534          bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
535       }
536    }
537 
538    fprintf ( stderr, "%s: finished\n", progName );
539    return 0;
540 }
541 
542 
543 
544 /*-----------------------------------------------------------*/
545 /*--- end                                  bzip2recover.c ---*/
546 /*-----------------------------------------------------------*/
547