xref: /freebsd/contrib/bzip2/bzip2recover.c (revision 6f63e88c0166ed3e5f2805a9e667c7d24d304cf1)
1 /*-----------------------------------------------------------*/
2 /*--- Block recoverer program for bzip2                   ---*/
3 /*---                                      bzip2recover.c ---*/
4 /*-----------------------------------------------------------*/
5 
6 /* ------------------------------------------------------------------
7    This file is part of bzip2/libbzip2, a program and library for
8    lossless, block-sorting data compression.
9 
10    bzip2/libbzip2 version 1.0.8 of 13 July 2019
11    Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
12 
13    Please read the WARNING, DISCLAIMER and PATENTS sections in the
14    README file.
15 
16    This program is released under the terms of the license contained
17    in the file LICENSE.
18    ------------------------------------------------------------------ */
19 
20 /* This program is a complete hack and should be rewritten properly.
21 	 It isn't very complicated. */
22 
23 #include <stdio.h>
24 #include <errno.h>
25 #include <stdlib.h>
26 #include <string.h>
27 
28 
29 /* This program records bit locations in the file to be recovered.
30    That means that if 64-bit ints are not supported, we will not
31    be able to recover .bz2 files over 512MB (2^32 bits) long.
32    On GNU supported platforms, we take advantage of the 64-bit
33    int support to circumvent this problem.  Ditto MSVC.
34 
35    This change occurred in version 1.0.2; all prior versions have
36    the 512MB limitation.
37 */
38 #ifdef __GNUC__
39    typedef  unsigned long long int  MaybeUInt64;
40 #  define MaybeUInt64_FMT "%llu"
41 #else
42 #ifdef _MSC_VER
43    typedef  unsigned __int64  MaybeUInt64;
44 #  define MaybeUInt64_FMT "%I64u"
45 #else
46    typedef  unsigned int   MaybeUInt64;
47 #  define MaybeUInt64_FMT "%u"
48 #endif
49 #endif
50 
51 typedef  unsigned int   UInt32;
52 typedef  int            Int32;
53 typedef  unsigned char  UChar;
54 typedef  char           Char;
55 typedef  unsigned char  Bool;
56 #define True    ((Bool)1)
57 #define False   ((Bool)0)
58 
59 
60 #define BZ_MAX_FILENAME 2000
61 
62 Char inFileName[BZ_MAX_FILENAME];
63 Char outFileName[BZ_MAX_FILENAME];
64 Char progName[BZ_MAX_FILENAME];
65 
66 MaybeUInt64 bytesOut = 0;
67 MaybeUInt64 bytesIn  = 0;
68 
69 
70 /*---------------------------------------------------*/
71 /*--- Header bytes                                ---*/
72 /*---------------------------------------------------*/
73 
74 #define BZ_HDR_B 0x42                         /* 'B' */
75 #define BZ_HDR_Z 0x5a                         /* 'Z' */
76 #define BZ_HDR_h 0x68                         /* 'h' */
77 #define BZ_HDR_0 0x30                         /* '0' */
78 
79 
80 /*---------------------------------------------------*/
81 /*--- I/O errors                                  ---*/
82 /*---------------------------------------------------*/
83 
84 /*---------------------------------------------*/
85 static void readError ( void )
86 {
87    fprintf ( stderr,
88              "%s: I/O error reading `%s', possible reason follows.\n",
89             progName, inFileName );
90    perror ( progName );
91    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
92              progName );
93    exit ( 1 );
94 }
95 
96 
97 /*---------------------------------------------*/
98 static void writeError ( void )
99 {
100    fprintf ( stderr,
101              "%s: I/O error reading `%s', possible reason follows.\n",
102             progName, inFileName );
103    perror ( progName );
104    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
105              progName );
106    exit ( 1 );
107 }
108 
109 
110 /*---------------------------------------------*/
111 static void mallocFail ( Int32 n )
112 {
113    fprintf ( stderr,
114              "%s: malloc failed on request for %d bytes.\n",
115             progName, n );
116    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
117              progName );
118    exit ( 1 );
119 }
120 
121 
122 /*---------------------------------------------*/
123 static void tooManyBlocks ( Int32 max_handled_blocks )
124 {
125    fprintf ( stderr,
126              "%s: `%s' appears to contain more than %d blocks\n",
127             progName, inFileName, max_handled_blocks );
128    fprintf ( stderr,
129              "%s: and cannot be handled.  To fix, increase\n",
130              progName );
131    fprintf ( stderr,
132              "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
133              progName );
134    exit ( 1 );
135 }
136 
137 
138 
139 /*---------------------------------------------------*/
140 /*--- Bit stream I/O                              ---*/
141 /*---------------------------------------------------*/
142 
143 typedef
144    struct {
145       FILE*  handle;
146       Int32  buffer;
147       Int32  buffLive;
148       Char   mode;
149    }
150    BitStream;
151 
152 
153 /*---------------------------------------------*/
154 static BitStream* bsOpenReadStream ( FILE* stream )
155 {
156    BitStream *bs = malloc ( sizeof(BitStream) );
157    if (bs == NULL) mallocFail ( sizeof(BitStream) );
158    bs->handle = stream;
159    bs->buffer = 0;
160    bs->buffLive = 0;
161    bs->mode = 'r';
162    return bs;
163 }
164 
165 
166 /*---------------------------------------------*/
167 static BitStream* bsOpenWriteStream ( FILE* stream )
168 {
169    BitStream *bs = malloc ( sizeof(BitStream) );
170    if (bs == NULL) mallocFail ( sizeof(BitStream) );
171    bs->handle = stream;
172    bs->buffer = 0;
173    bs->buffLive = 0;
174    bs->mode = 'w';
175    return bs;
176 }
177 
178 
179 /*---------------------------------------------*/
180 static void bsPutBit ( BitStream* bs, Int32 bit )
181 {
182    if (bs->buffLive == 8) {
183       Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
184       if (retVal == EOF) writeError();
185       bytesOut++;
186       bs->buffLive = 1;
187       bs->buffer = bit & 0x1;
188    } else {
189       bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
190       bs->buffLive++;
191    };
192 }
193 
194 
195 /*---------------------------------------------*/
196 /*--
197    Returns 0 or 1, or 2 to indicate EOF.
198 --*/
199 static Int32 bsGetBit ( BitStream* bs )
200 {
201    if (bs->buffLive > 0) {
202       bs->buffLive --;
203       return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
204    } else {
205       Int32 retVal = getc ( bs->handle );
206       if ( retVal == EOF ) {
207          if (errno != 0) readError();
208          return 2;
209       }
210       bs->buffLive = 7;
211       bs->buffer = retVal;
212       return ( ((bs->buffer) >> 7) & 0x1 );
213    }
214 }
215 
216 
217 /*---------------------------------------------*/
218 static void bsClose ( BitStream* bs )
219 {
220    Int32 retVal;
221 
222    if ( bs->mode == 'w' ) {
223       while ( bs->buffLive < 8 ) {
224          bs->buffLive++;
225          bs->buffer <<= 1;
226       };
227       retVal = putc ( (UChar) (bs->buffer), bs->handle );
228       if (retVal == EOF) writeError();
229       bytesOut++;
230       retVal = fflush ( bs->handle );
231       if (retVal == EOF) writeError();
232    }
233    retVal = fclose ( bs->handle );
234    if (retVal == EOF) {
235       if (bs->mode == 'w') writeError(); else readError();
236    }
237    free ( bs );
238 }
239 
240 
241 /*---------------------------------------------*/
242 static void bsPutUChar ( BitStream* bs, UChar c )
243 {
244    Int32 i;
245    for (i = 7; i >= 0; i--)
246       bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
247 }
248 
249 
250 /*---------------------------------------------*/
251 static void bsPutUInt32 ( BitStream* bs, UInt32 c )
252 {
253    Int32 i;
254 
255    for (i = 31; i >= 0; i--)
256       bsPutBit ( bs, (c >> i) & 0x1 );
257 }
258 
259 
260 /*---------------------------------------------*/
261 static Bool endsInBz2 ( Char* name )
262 {
263    Int32 n = strlen ( name );
264    if (n <= 4) return False;
265    return
266       (name[n-4] == '.' &&
267        name[n-3] == 'b' &&
268        name[n-2] == 'z' &&
269        name[n-1] == '2');
270 }
271 
272 
273 /*---------------------------------------------------*/
274 /*---                                             ---*/
275 /*---------------------------------------------------*/
276 
277 /* This logic isn't really right when it comes to Cygwin. */
278 #ifdef _WIN32
279 #  define  BZ_SPLIT_SYM  '\\'  /* path splitter on Windows platform */
280 #else
281 #  define  BZ_SPLIT_SYM  '/'   /* path splitter on Unix platform */
282 #endif
283 
284 #define BLOCK_HEADER_HI  0x00003141UL
285 #define BLOCK_HEADER_LO  0x59265359UL
286 
287 #define BLOCK_ENDMARK_HI 0x00001772UL
288 #define BLOCK_ENDMARK_LO 0x45385090UL
289 
290 /* Increase if necessary.  However, a .bz2 file with > 50000 blocks
291    would have an uncompressed size of at least 40GB, so the chances
292    are low you'll need to up this.
293 */
294 #define BZ_MAX_HANDLED_BLOCKS 50000
295 
296 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
297 MaybeUInt64 bEnd   [BZ_MAX_HANDLED_BLOCKS];
298 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
299 MaybeUInt64 rbEnd  [BZ_MAX_HANDLED_BLOCKS];
300 
301 Int32 main ( Int32 argc, Char** argv )
302 {
303    FILE*       inFile;
304    FILE*       outFile;
305    BitStream*  bsIn, *bsWr;
306    Int32       b, wrBlock, currBlock, rbCtr;
307    MaybeUInt64 bitsRead;
308 
309    UInt32      buffHi, buffLo, blockCRC;
310    Char*       p;
311 
312    strncpy ( progName, argv[0], BZ_MAX_FILENAME-1);
313    progName[BZ_MAX_FILENAME-1]='\0';
314    inFileName[0] = outFileName[0] = 0;
315 
316    fprintf ( stderr,
317              "bzip2recover 1.0.8: extracts blocks from damaged .bz2 files.\n" );
318 
319    if (argc != 2) {
320       fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
321                         progName, progName );
322       switch (sizeof(MaybeUInt64)) {
323          case 8:
324             fprintf(stderr,
325                     "\trestrictions on size of recovered file: None\n");
326             break;
327          case 4:
328             fprintf(stderr,
329                     "\trestrictions on size of recovered file: 512 MB\n");
330             fprintf(stderr,
331                     "\tto circumvent, recompile with MaybeUInt64 as an\n"
332                     "\tunsigned 64-bit int.\n");
333             break;
334          default:
335             fprintf(stderr,
336                     "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
337                     "configuration error.\n");
338             break;
339       }
340       exit(1);
341    }
342 
343    if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
344       fprintf ( stderr,
345                 "%s: supplied filename is suspiciously (>= %d chars) long.  Bye!\n",
346                 progName, (int)strlen(argv[1]) );
347       exit(1);
348    }
349 
350    strcpy ( inFileName, argv[1] );
351 
352    inFile = fopen ( inFileName, "rb" );
353    if (inFile == NULL) {
354       fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
355       exit(1);
356    }
357 
358    bsIn = bsOpenReadStream ( inFile );
359    fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
360 
361    bitsRead = 0;
362    buffHi = buffLo = 0;
363    currBlock = 0;
364    bStart[currBlock] = 0;
365 
366    rbCtr = 0;
367 
368    while (True) {
369       b = bsGetBit ( bsIn );
370       bitsRead++;
371       if (b == 2) {
372          if (bitsRead >= bStart[currBlock] &&
373             (bitsRead - bStart[currBlock]) >= 40) {
374             bEnd[currBlock] = bitsRead-1;
375             if (currBlock > 0)
376                fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
377                                  " to " MaybeUInt64_FMT " (incomplete)\n",
378                          currBlock,  bStart[currBlock], bEnd[currBlock] );
379          } else
380             currBlock--;
381          break;
382       }
383       buffHi = (buffHi << 1) | (buffLo >> 31);
384       buffLo = (buffLo << 1) | (b & 1);
385       if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
386              && buffLo == BLOCK_HEADER_LO)
387            ||
388            ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
389              && buffLo == BLOCK_ENDMARK_LO)
390          ) {
391          if (bitsRead > 49) {
392             bEnd[currBlock] = bitsRead-49;
393          } else {
394             bEnd[currBlock] = 0;
395          }
396          if (currBlock > 0 &&
397 	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
398             fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
399                               " to " MaybeUInt64_FMT "\n",
400                       rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
401             rbStart[rbCtr] = bStart[currBlock];
402             rbEnd[rbCtr] = bEnd[currBlock];
403             rbCtr++;
404          }
405          if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
406             tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
407          currBlock++;
408 
409          bStart[currBlock] = bitsRead;
410       }
411    }
412 
413    bsClose ( bsIn );
414 
415    /*-- identified blocks run from 1 to rbCtr inclusive. --*/
416 
417    if (rbCtr < 1) {
418       fprintf ( stderr,
419                 "%s: sorry, I couldn't find any block boundaries.\n",
420                 progName );
421       exit(1);
422    };
423 
424    fprintf ( stderr, "%s: splitting into blocks\n", progName );
425 
426    inFile = fopen ( inFileName, "rb" );
427    if (inFile == NULL) {
428       fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
429       exit(1);
430    }
431    bsIn = bsOpenReadStream ( inFile );
432 
433    /*-- placate gcc's dataflow analyser --*/
434    blockCRC = 0; bsWr = 0;
435 
436    bitsRead = 0;
437    outFile = NULL;
438    wrBlock = 0;
439    while (True) {
440       b = bsGetBit(bsIn);
441       if (b == 2) break;
442       buffHi = (buffHi << 1) | (buffLo >> 31);
443       buffLo = (buffLo << 1) | (b & 1);
444       if (bitsRead == 47+rbStart[wrBlock])
445          blockCRC = (buffHi << 16) | (buffLo >> 16);
446 
447       if (outFile != NULL && bitsRead >= rbStart[wrBlock]
448                           && bitsRead <= rbEnd[wrBlock]) {
449          bsPutBit ( bsWr, b );
450       }
451 
452       bitsRead++;
453 
454       if (bitsRead == rbEnd[wrBlock]+1) {
455          if (outFile != NULL) {
456             bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
457             bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
458             bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
459             bsPutUInt32 ( bsWr, blockCRC );
460             bsClose ( bsWr );
461             outFile = NULL;
462          }
463          if (wrBlock >= rbCtr) break;
464          wrBlock++;
465       } else
466       if (bitsRead == rbStart[wrBlock]) {
467          /* Create the output file name, correctly handling leading paths.
468             (31.10.2001 by Sergey E. Kusikov) */
469          Char* split;
470          Int32 ofs, k;
471          for (k = 0; k < BZ_MAX_FILENAME; k++)
472             outFileName[k] = 0;
473          strcpy (outFileName, inFileName);
474          split = strrchr (outFileName, BZ_SPLIT_SYM);
475          if (split == NULL) {
476             split = outFileName;
477          } else {
478             ++split;
479 	 }
480 	 /* Now split points to the start of the basename. */
481          ofs  = split - outFileName;
482          sprintf (split, "rec%5d", wrBlock+1);
483          for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
484          strcat (outFileName, inFileName + ofs);
485 
486          if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
487 
488          fprintf ( stderr, "   writing block %d to `%s' ...\n",
489                            wrBlock+1, outFileName );
490 
491          outFile = fopen ( outFileName, "wb" );
492          if (outFile == NULL) {
493             fprintf ( stderr, "%s: can't write `%s'\n",
494                       progName, outFileName );
495             exit(1);
496          }
497          bsWr = bsOpenWriteStream ( outFile );
498          bsPutUChar ( bsWr, BZ_HDR_B );
499          bsPutUChar ( bsWr, BZ_HDR_Z );
500          bsPutUChar ( bsWr, BZ_HDR_h );
501          bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
502          bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
503          bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
504          bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
505       }
506    }
507 
508    fprintf ( stderr, "%s: finished\n", progName );
509    return 0;
510 }
511 
512 
513 
514 /*-----------------------------------------------------------*/
515 /*--- end                                  bzip2recover.c ---*/
516 /*-----------------------------------------------------------*/
517