2 |
- |
1 |
|
|
|
2 |
/*-----------------------------------------------------------*/
|
|
|
3 |
/*--- Block recoverer program for bzip2 ---*/
|
|
|
4 |
/*--- bzip2recover.c ---*/
|
|
|
5 |
/*-----------------------------------------------------------*/
|
|
|
6 |
|
|
|
7 |
/*--
|
|
|
8 |
This program is bzip2recover, a program to attempt data
|
|
|
9 |
salvage from damaged files created by the accompanying
|
|
|
10 |
bzip2-1.0 program.
|
|
|
11 |
|
|
|
12 |
Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
|
|
|
13 |
|
|
|
14 |
Redistribution and use in source and binary forms, with or without
|
|
|
15 |
modification, are permitted provided that the following conditions
|
|
|
16 |
are met:
|
|
|
17 |
|
|
|
18 |
1. Redistributions of source code must retain the above copyright
|
|
|
19 |
notice, this list of conditions and the following disclaimer.
|
|
|
20 |
|
|
|
21 |
2. The origin of this software must not be misrepresented; you must
|
|
|
22 |
not claim that you wrote the original software. If you use this
|
|
|
23 |
software in a product, an acknowledgment in the product
|
|
|
24 |
documentation would be appreciated but is not required.
|
|
|
25 |
|
|
|
26 |
3. Altered source versions must be plainly marked as such, and must
|
|
|
27 |
not be misrepresented as being the original software.
|
|
|
28 |
|
|
|
29 |
4. The name of the author may not be used to endorse or promote
|
|
|
30 |
products derived from this software without specific prior written
|
|
|
31 |
permission.
|
|
|
32 |
|
|
|
33 |
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
|
|
34 |
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
|
35 |
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
36 |
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|
|
37 |
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
38 |
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
|
|
39 |
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
40 |
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
|
41 |
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
|
42 |
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
|
43 |
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
44 |
|
|
|
45 |
Julian Seward, Cambridge, UK.
|
|
|
46 |
jseward@acm.org
|
|
|
47 |
bzip2/libbzip2 version 1.0 of 21 March 2000
|
|
|
48 |
--*/
|
|
|
49 |
|
|
|
50 |
/*--
|
|
|
51 |
This program is a complete hack and should be rewritten
|
|
|
52 |
properly. It isn't very complicated.
|
|
|
53 |
--*/
|
|
|
54 |
|
|
|
55 |
#include <stdio.h>
|
|
|
56 |
#include <errno.h>
|
|
|
57 |
#include <stdlib.h>
|
|
|
58 |
#include <string.h>
|
|
|
59 |
|
|
|
60 |
typedef unsigned int UInt32;
|
|
|
61 |
typedef int Int32;
|
|
|
62 |
typedef unsigned char UChar;
|
|
|
63 |
typedef char Char;
|
|
|
64 |
typedef unsigned char Bool;
|
|
|
65 |
#define True ((Bool)1)
|
|
|
66 |
#define False ((Bool)0)
|
|
|
67 |
|
|
|
68 |
|
|
|
69 |
Char inFileName[2000];
|
|
|
70 |
Char outFileName[2000];
|
|
|
71 |
Char progName[2000];
|
|
|
72 |
|
|
|
73 |
UInt32 bytesOut = 0;
|
|
|
74 |
UInt32 bytesIn = 0;
|
|
|
75 |
|
|
|
76 |
|
|
|
77 |
/*---------------------------------------------------*/
|
|
|
78 |
/*--- I/O errors ---*/
|
|
|
79 |
/*---------------------------------------------------*/
|
|
|
80 |
|
|
|
81 |
/*---------------------------------------------*/
|
|
|
82 |
void readError ( void )
|
|
|
83 |
{
|
|
|
84 |
fprintf ( stderr,
|
|
|
85 |
"%s: I/O error reading `%s', possible reason follows.\n",
|
|
|
86 |
progName, inFileName );
|
|
|
87 |
perror ( progName );
|
|
|
88 |
fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
|
|
|
89 |
progName );
|
|
|
90 |
exit ( 1 );
|
|
|
91 |
}
|
|
|
92 |
|
|
|
93 |
|
|
|
94 |
/*---------------------------------------------*/
|
|
|
95 |
void writeError ( void )
|
|
|
96 |
{
|
|
|
97 |
fprintf ( stderr,
|
|
|
98 |
"%s: I/O error reading `%s', possible reason follows.\n",
|
|
|
99 |
progName, inFileName );
|
|
|
100 |
perror ( progName );
|
|
|
101 |
fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
|
|
|
102 |
progName );
|
|
|
103 |
exit ( 1 );
|
|
|
104 |
}
|
|
|
105 |
|
|
|
106 |
|
|
|
107 |
/*---------------------------------------------*/
|
|
|
108 |
void mallocFail ( Int32 n )
|
|
|
109 |
{
|
|
|
110 |
fprintf ( stderr,
|
|
|
111 |
"%s: malloc failed on request for %d bytes.\n",
|
|
|
112 |
progName, n );
|
|
|
113 |
fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
|
|
|
114 |
progName );
|
|
|
115 |
exit ( 1 );
|
|
|
116 |
}
|
|
|
117 |
|
|
|
118 |
|
|
|
119 |
/*---------------------------------------------------*/
|
|
|
120 |
/*--- Bit stream I/O ---*/
|
|
|
121 |
/*---------------------------------------------------*/
|
|
|
122 |
|
|
|
123 |
typedef
|
|
|
124 |
struct {
|
|
|
125 |
FILE* handle;
|
|
|
126 |
Int32 buffer;
|
|
|
127 |
Int32 buffLive;
|
|
|
128 |
Char mode;
|
|
|
129 |
}
|
|
|
130 |
BitStream;
|
|
|
131 |
|
|
|
132 |
|
|
|
133 |
/*---------------------------------------------*/
|
|
|
134 |
BitStream* bsOpenReadStream ( FILE* stream )
|
|
|
135 |
{
|
|
|
136 |
BitStream *bs = malloc ( sizeof(BitStream) );
|
|
|
137 |
if (bs == NULL) mallocFail ( sizeof(BitStream) );
|
|
|
138 |
bs->handle = stream;
|
|
|
139 |
bs->buffer = 0;
|
|
|
140 |
bs->buffLive = 0;
|
|
|
141 |
bs->mode = 'r';
|
|
|
142 |
return bs;
|
|
|
143 |
}
|
|
|
144 |
|
|
|
145 |
|
|
|
146 |
/*---------------------------------------------*/
|
|
|
147 |
BitStream* bsOpenWriteStream ( FILE* stream )
|
|
|
148 |
{
|
|
|
149 |
BitStream *bs = malloc ( sizeof(BitStream) );
|
|
|
150 |
if (bs == NULL) mallocFail ( sizeof(BitStream) );
|
|
|
151 |
bs->handle = stream;
|
|
|
152 |
bs->buffer = 0;
|
|
|
153 |
bs->buffLive = 0;
|
|
|
154 |
bs->mode = 'w';
|
|
|
155 |
return bs;
|
|
|
156 |
}
|
|
|
157 |
|
|
|
158 |
|
|
|
159 |
/*---------------------------------------------*/
|
|
|
160 |
void bsPutBit ( BitStream* bs, Int32 bit )
|
|
|
161 |
{
|
|
|
162 |
if (bs->buffLive == 8) {
|
|
|
163 |
Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
|
|
|
164 |
if (retVal == EOF) writeError();
|
|
|
165 |
bytesOut++;
|
|
|
166 |
bs->buffLive = 1;
|
|
|
167 |
bs->buffer = bit & 0x1;
|
|
|
168 |
} else {
|
|
|
169 |
bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
|
|
|
170 |
bs->buffLive++;
|
|
|
171 |
};
|
|
|
172 |
}
|
|
|
173 |
|
|
|
174 |
|
|
|
175 |
/*---------------------------------------------*/
|
|
|
176 |
/*--
|
|
|
177 |
Returns 0 or 1, or 2 to indicate EOF.
|
|
|
178 |
--*/
|
|
|
179 |
Int32 bsGetBit ( BitStream* bs )
|
|
|
180 |
{
|
|
|
181 |
if (bs->buffLive > 0) {
|
|
|
182 |
bs->buffLive --;
|
|
|
183 |
return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
|
|
|
184 |
} else {
|
|
|
185 |
Int32 retVal = getc ( bs->handle );
|
|
|
186 |
if ( retVal == EOF ) {
|
|
|
187 |
if (errno != 0) readError();
|
|
|
188 |
return 2;
|
|
|
189 |
}
|
|
|
190 |
bs->buffLive = 7;
|
|
|
191 |
bs->buffer = retVal;
|
|
|
192 |
return ( ((bs->buffer) >> 7) & 0x1 );
|
|
|
193 |
}
|
|
|
194 |
}
|
|
|
195 |
|
|
|
196 |
|
|
|
197 |
/*---------------------------------------------*/
|
|
|
198 |
void bsClose ( BitStream* bs )
|
|
|
199 |
{
|
|
|
200 |
Int32 retVal;
|
|
|
201 |
|
|
|
202 |
if ( bs->mode == 'w' ) {
|
|
|
203 |
while ( bs->buffLive < 8 ) {
|
|
|
204 |
bs->buffLive++;
|
|
|
205 |
bs->buffer <<= 1;
|
|
|
206 |
};
|
|
|
207 |
retVal = putc ( (UChar) (bs->buffer), bs->handle );
|
|
|
208 |
if (retVal == EOF) writeError();
|
|
|
209 |
bytesOut++;
|
|
|
210 |
retVal = fflush ( bs->handle );
|
|
|
211 |
if (retVal == EOF) writeError();
|
|
|
212 |
}
|
|
|
213 |
retVal = fclose ( bs->handle );
|
|
|
214 |
if (retVal == EOF) {
|
|
|
215 |
if (bs->mode == 'w') writeError(); else readError();
|
|
|
216 |
}
|
|
|
217 |
free ( bs );
|
|
|
218 |
}
|
|
|
219 |
|
|
|
220 |
|
|
|
221 |
/*---------------------------------------------*/
|
|
|
222 |
void bsPutUChar ( BitStream* bs, UChar c )
|
|
|
223 |
{
|
|
|
224 |
Int32 i;
|
|
|
225 |
for (i = 7; i >= 0; i--)
|
|
|
226 |
bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
|
|
|
227 |
}
|
|
|
228 |
|
|
|
229 |
|
|
|
230 |
/*---------------------------------------------*/
|
|
|
231 |
void bsPutUInt32 ( BitStream* bs, UInt32 c )
|
|
|
232 |
{
|
|
|
233 |
Int32 i;
|
|
|
234 |
|
|
|
235 |
for (i = 31; i >= 0; i--)
|
|
|
236 |
bsPutBit ( bs, (c >> i) & 0x1 );
|
|
|
237 |
}
|
|
|
238 |
|
|
|
239 |
|
|
|
240 |
/*---------------------------------------------*/
|
|
|
241 |
Bool endsInBz2 ( Char* name )
|
|
|
242 |
{
|
|
|
243 |
Int32 n = strlen ( name );
|
|
|
244 |
if (n <= 4) return False;
|
|
|
245 |
return
|
|
|
246 |
(name[n-4] == '.' &&
|
|
|
247 |
name[n-3] == 'b' &&
|
|
|
248 |
name[n-2] == 'z' &&
|
|
|
249 |
name[n-1] == '2');
|
|
|
250 |
}
|
|
|
251 |
|
|
|
252 |
|
|
|
253 |
/*---------------------------------------------------*/
|
|
|
254 |
/*--- ---*/
|
|
|
255 |
/*---------------------------------------------------*/
|
|
|
256 |
|
|
|
257 |
#define BLOCK_HEADER_HI 0x00003141UL
|
|
|
258 |
#define BLOCK_HEADER_LO 0x59265359UL
|
|
|
259 |
|
|
|
260 |
#define BLOCK_ENDMARK_HI 0x00001772UL
|
|
|
261 |
#define BLOCK_ENDMARK_LO 0x45385090UL
|
|
|
262 |
|
|
|
263 |
|
|
|
264 |
UInt32 bStart[20000];
|
|
|
265 |
UInt32 bEnd[20000];
|
|
|
266 |
UInt32 rbStart[20000];
|
|
|
267 |
UInt32 rbEnd[20000];
|
|
|
268 |
|
|
|
269 |
Int32 main ( Int32 argc, Char** argv )
|
|
|
270 |
{
|
|
|
271 |
FILE* inFile;
|
|
|
272 |
FILE* outFile;
|
|
|
273 |
BitStream* bsIn, *bsWr;
|
|
|
274 |
Int32 currBlock, b, wrBlock;
|
|
|
275 |
UInt32 bitsRead;
|
|
|
276 |
Int32 rbCtr;
|
|
|
277 |
|
|
|
278 |
|
|
|
279 |
UInt32 buffHi, buffLo, blockCRC;
|
|
|
280 |
Char* p;
|
|
|
281 |
|
|
|
282 |
strncpy ( progName, argv[0], sizeof progName );
|
|
|
283 |
progName[sizeof progName-1] = 0;
|
|
|
284 |
inFileName[0] = outFileName[0] = 0;
|
|
|
285 |
|
|
|
286 |
fprintf ( stderr, "bzip2recover 1.0: extracts blocks from damaged .bz2 files.\n" );
|
|
|
287 |
|
|
|
288 |
if (argc != 2) {
|
|
|
289 |
fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
|
|
|
290 |
progName, progName );
|
|
|
291 |
exit(1);
|
|
|
292 |
}
|
|
|
293 |
|
|
|
294 |
strcpy ( inFileName, argv[1] );
|
|
|
295 |
|
|
|
296 |
inFile = fopen ( inFileName, "rb" );
|
|
|
297 |
if (inFile == NULL) {
|
|
|
298 |
fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
|
|
|
299 |
exit(1);
|
|
|
300 |
}
|
|
|
301 |
|
|
|
302 |
bsIn = bsOpenReadStream ( inFile );
|
|
|
303 |
fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
|
|
|
304 |
|
|
|
305 |
bitsRead = 0;
|
|
|
306 |
buffHi = buffLo = 0;
|
|
|
307 |
currBlock = 0;
|
|
|
308 |
bStart[currBlock] = 0;
|
|
|
309 |
|
|
|
310 |
rbCtr = 0;
|
|
|
311 |
|
|
|
312 |
while (True) {
|
|
|
313 |
b = bsGetBit ( bsIn );
|
|
|
314 |
bitsRead++;
|
|
|
315 |
if (b == 2) {
|
|
|
316 |
if (bitsRead >= bStart[currBlock] &&
|
|
|
317 |
(bitsRead - bStart[currBlock]) >= 40) {
|
|
|
318 |
bEnd[currBlock] = bitsRead-1;
|
|
|
319 |
if (currBlock > 0)
|
|
|
320 |
fprintf ( stderr, " block %d runs from %d to %d (incomplete)\n",
|
|
|
321 |
currBlock, bStart[currBlock], bEnd[currBlock] );
|
|
|
322 |
} else
|
|
|
323 |
currBlock--;
|
|
|
324 |
break;
|
|
|
325 |
}
|
|
|
326 |
buffHi = (buffHi << 1) | (buffLo >> 31);
|
|
|
327 |
buffLo = (buffLo << 1) | (b & 1);
|
|
|
328 |
if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
|
|
|
329 |
&& buffLo == BLOCK_HEADER_LO)
|
|
|
330 |
||
|
|
|
331 |
( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
|
|
|
332 |
&& buffLo == BLOCK_ENDMARK_LO)
|
|
|
333 |
) {
|
|
|
334 |
if (bitsRead > 49)
|
|
|
335 |
bEnd[currBlock] = bitsRead-49; else
|
|
|
336 |
bEnd[currBlock] = 0;
|
|
|
337 |
if (currBlock > 0 &&
|
|
|
338 |
(bEnd[currBlock] - bStart[currBlock]) >= 130) {
|
|
|
339 |
fprintf ( stderr, " block %d runs from %d to %d\n",
|
|
|
340 |
rbCtr+1, bStart[currBlock], bEnd[currBlock] );
|
|
|
341 |
rbStart[rbCtr] = bStart[currBlock];
|
|
|
342 |
rbEnd[rbCtr] = bEnd[currBlock];
|
|
|
343 |
rbCtr++;
|
|
|
344 |
}
|
|
|
345 |
currBlock++;
|
|
|
346 |
|
|
|
347 |
bStart[currBlock] = bitsRead;
|
|
|
348 |
}
|
|
|
349 |
}
|
|
|
350 |
|
|
|
351 |
bsClose ( bsIn );
|
|
|
352 |
|
|
|
353 |
/*-- identified blocks run from 1 to rbCtr inclusive. --*/
|
|
|
354 |
|
|
|
355 |
if (rbCtr < 1) {
|
|
|
356 |
fprintf ( stderr,
|
|
|
357 |
"%s: sorry, I couldn't find any block boundaries.\n",
|
|
|
358 |
progName );
|
|
|
359 |
exit(1);
|
|
|
360 |
};
|
|
|
361 |
|
|
|
362 |
fprintf ( stderr, "%s: splitting into blocks\n", progName );
|
|
|
363 |
|
|
|
364 |
inFile = fopen ( inFileName, "rb" );
|
|
|
365 |
if (inFile == NULL) {
|
|
|
366 |
fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
|
|
|
367 |
exit(1);
|
|
|
368 |
}
|
|
|
369 |
bsIn = bsOpenReadStream ( inFile );
|
|
|
370 |
|
|
|
371 |
/*-- placate gcc's dataflow analyser --*/
|
|
|
372 |
blockCRC = 0; bsWr = 0;
|
|
|
373 |
|
|
|
374 |
bitsRead = 0;
|
|
|
375 |
outFile = NULL;
|
|
|
376 |
wrBlock = 0;
|
|
|
377 |
while (True) {
|
|
|
378 |
b = bsGetBit(bsIn);
|
|
|
379 |
if (b == 2) break;
|
|
|
380 |
buffHi = (buffHi << 1) | (buffLo >> 31);
|
|
|
381 |
buffLo = (buffLo << 1) | (b & 1);
|
|
|
382 |
if (bitsRead == 47+rbStart[wrBlock])
|
|
|
383 |
blockCRC = (buffHi << 16) | (buffLo >> 16);
|
|
|
384 |
|
|
|
385 |
if (outFile != NULL && bitsRead >= rbStart[wrBlock]
|
|
|
386 |
&& bitsRead <= rbEnd[wrBlock]) {
|
|
|
387 |
bsPutBit ( bsWr, b );
|
|
|
388 |
}
|
|
|
389 |
|
|
|
390 |
bitsRead++;
|
|
|
391 |
|
|
|
392 |
if (bitsRead == rbEnd[wrBlock]+1) {
|
|
|
393 |
if (outFile != NULL) {
|
|
|
394 |
bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
|
|
|
395 |
bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
|
|
|
396 |
bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
|
|
|
397 |
bsPutUInt32 ( bsWr, blockCRC );
|
|
|
398 |
bsClose ( bsWr );
|
|
|
399 |
}
|
|
|
400 |
if (wrBlock >= rbCtr) break;
|
|
|
401 |
wrBlock++;
|
|
|
402 |
} else
|
|
|
403 |
if (bitsRead == rbStart[wrBlock]) {
|
|
|
404 |
outFileName[0] = 0;
|
|
|
405 |
sprintf ( outFileName, "rec%4d", wrBlock+1 );
|
|
|
406 |
for (p = outFileName; *p != 0; p++) if (*p == ' ') *p = '0';
|
|
|
407 |
strcat ( outFileName, inFileName );
|
|
|
408 |
if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
|
|
|
409 |
|
|
|
410 |
fprintf ( stderr, " writing block %d to `%s' ...\n",
|
|
|
411 |
wrBlock+1, outFileName );
|
|
|
412 |
|
|
|
413 |
outFile = fopen ( outFileName, "wb" );
|
|
|
414 |
if (outFile == NULL) {
|
|
|
415 |
fprintf ( stderr, "%s: can't write `%s'\n",
|
|
|
416 |
progName, outFileName );
|
|
|
417 |
exit(1);
|
|
|
418 |
}
|
|
|
419 |
bsWr = bsOpenWriteStream ( outFile );
|
|
|
420 |
bsPutUChar ( bsWr, 'B' ); bsPutUChar ( bsWr, 'Z' );
|
|
|
421 |
bsPutUChar ( bsWr, 'h' ); bsPutUChar ( bsWr, '9' );
|
|
|
422 |
bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
|
|
|
423 |
bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
|
|
|
424 |
bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
|
|
|
425 |
}
|
|
|
426 |
}
|
|
|
427 |
|
|
|
428 |
fprintf ( stderr, "%s: finished\n", progName );
|
|
|
429 |
return 0;
|
|
|
430 |
}
|
|
|
431 |
|
|
|
432 |
|
|
|
433 |
|
|
|
434 |
/*-----------------------------------------------------------*/
|
|
|
435 |
/*--- end bzip2recover.c ---*/
|
|
|
436 |
/*-----------------------------------------------------------*/
|