xref: /freebsd/stand/libsa/bzipfs.c (revision f126890ac5386406dadf7c4cfa9566cbb56537c5)
1 /*
2  * Copyright (c) 1998 Michael Smith.
3  * Copyright (c) 2000 Maxim Sobolev
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #ifndef REGRESSION
29 #include "stand.h"
30 #else
31 #include <stdlib.h>
32 #include <sys/errno.h>
33 #include <sys/fcntl.h>
34 #include <sys/types.h>
35 #include <sys/unistd.h>
36 
37 struct open_file {
38     int                 f_flags;        /* see F_* below */
39     void                *f_fsdata;      /* file system specific data */
40 };
41 #define F_READ          0x0001  /* file opened for reading */
42 #define EOFFSET (ELAST+8)       /* relative seek not supported */
43 static inline u_int min(u_int a, u_int b) { return(a < b ? a : b); }
44 #define panic(x, y) abort()
45 #endif
46 
47 #include <sys/stat.h>
48 #include <string.h>
49 #include <bzlib.h>
50 
51 #define BZ_BUFSIZE 2048	/* XXX larger? */
52 
53 struct bz_file
54 {
55     int			bzf_rawfd;
56     bz_stream		bzf_bzstream;
57     char		bzf_buf[BZ_BUFSIZE];
58     int			bzf_endseen;
59 };
60 
61 static int	bzf_fill(struct bz_file *z);
62 static int	bzf_open(const char *path, struct open_file *f);
63 static int	bzf_close(struct open_file *f);
64 static int	bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid);
65 static off_t	bzf_seek(struct open_file *f, off_t offset, int where);
66 static int	bzf_stat(struct open_file *f, struct stat *sb);
67 
68 #ifndef REGRESSION
69 struct fs_ops bzipfs_fsops = {
70 	.fs_name = "bzip",
71 	.fo_open = bzf_open,
72 	.fo_close = bzf_close,
73 	.fo_read = bzf_read,
74 	.fo_write = null_write,
75 	.fo_seek = bzf_seek,
76 	.fo_stat = bzf_stat,
77 	.fo_readdir = null_readdir,
78 };
79 #endif
80 
81 static int
82 bzf_fill(struct bz_file *bzf)
83 {
84     int		result;
85     int		req;
86 
87     req = BZ_BUFSIZE - bzf->bzf_bzstream.avail_in;
88     result = 0;
89 
90     /* If we need more */
91     if (req > 0) {
92 	/* move old data to bottom of buffer */
93 	if (req < BZ_BUFSIZE)
94 	    bcopy(bzf->bzf_buf + req, bzf->bzf_buf, BZ_BUFSIZE - req);
95 
96 	/* read to fill buffer and update availibility data */
97 	result = read(bzf->bzf_rawfd, bzf->bzf_buf + bzf->bzf_bzstream.avail_in, req);
98 	bzf->bzf_bzstream.next_in = bzf->bzf_buf;
99 	if (result >= 0)
100 	    bzf->bzf_bzstream.avail_in += result;
101     }
102     return(result);
103 }
104 
105 /*
106  * Adapted from get_byte/check_header in libz
107  *
108  * Returns 0 if the header is OK, nonzero if not.
109  */
110 static int
111 get_byte(struct bz_file *bzf)
112 {
113     if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1))
114 	return(-1);
115     bzf->bzf_bzstream.avail_in--;
116     return(*(bzf->bzf_bzstream.next_in)++);
117 }
118 
119 static int bz_magic[3] = {'B', 'Z', 'h'}; /* bzip2 magic header */
120 
121 static int
122 check_header(struct bz_file *bzf)
123 {
124     unsigned int len;
125     int		 c;
126 
127     /* Check the bzip2 magic header */
128     for (len = 0; len < 3; len++) {
129 	c = get_byte(bzf);
130 	if (c != bz_magic[len]) {
131 	    return(1);
132 	}
133     }
134     /* Check that the block size is valid */
135     c = get_byte(bzf);
136     if (c < '1' || c > '9')
137 	return(1);
138 
139     /* Put back bytes that we've took from the input stream */
140     bzf->bzf_bzstream.next_in -= 4;
141     bzf->bzf_bzstream.avail_in += 4;
142 
143     return(0);
144 }
145 
146 static int
147 bzf_open(const char *fname, struct open_file *f)
148 {
149     static char		*bzfname;
150     int			rawfd;
151     struct bz_file	*bzf;
152     char		*cp;
153     int			error;
154     struct stat		sb;
155 
156     /* Have to be in "just read it" mode */
157     if (f->f_flags != F_READ)
158 	return(EPERM);
159 
160     /* If the name already ends in .gz or .bz2, ignore it */
161     if ((cp = strrchr(fname, '.')) && (!strcmp(cp, ".gz")
162 	    || !strcmp(cp, ".bz2") || !strcmp(cp, ".split")))
163 	return(ENOENT);
164 
165     /* Construct new name */
166     bzfname = malloc(strlen(fname) + 5);
167     if (bzfname == NULL)
168 	return(ENOMEM);
169     sprintf(bzfname, "%s.bz2", fname);
170 
171     /* Try to open the compressed datafile */
172     rawfd = open(bzfname, O_RDONLY);
173     free(bzfname);
174     if (rawfd == -1)
175 	return(ENOENT);
176 
177     if (fstat(rawfd, &sb) < 0) {
178 	printf("bzf_open: stat failed\n");
179 	close(rawfd);
180 	return(ENOENT);
181     }
182     if (!S_ISREG(sb.st_mode)) {
183 	printf("bzf_open: not a file\n");
184 	close(rawfd);
185 	return(EISDIR);			/* best guess */
186     }
187 
188     /* Allocate a bz_file structure, populate it */
189     bzf = malloc(sizeof(struct bz_file));
190     if (bzf == NULL)
191 	return(ENOMEM);
192     bzero(bzf, sizeof(struct bz_file));
193     bzf->bzf_rawfd = rawfd;
194 
195     /* Verify that the file is bzipped */
196     if (check_header(bzf)) {
197 	close(bzf->bzf_rawfd);
198 	free(bzf);
199 	return(EFTYPE);
200     }
201 
202     /* Initialise the inflation engine */
203     if ((error = BZ2_bzDecompressInit(&(bzf->bzf_bzstream), 0, 1)) != BZ_OK) {
204 	printf("bzf_open: BZ2_bzDecompressInit returned %d\n", error);
205 	close(bzf->bzf_rawfd);
206 	free(bzf);
207 	return(EIO);
208     }
209 
210     /* Looks OK, we'll take it */
211     f->f_fsdata = bzf;
212     return(0);
213 }
214 
215 static int
216 bzf_close(struct open_file *f)
217 {
218     struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
219 
220     BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
221     close(bzf->bzf_rawfd);
222     free(bzf);
223     return(0);
224 }
225 
226 static int
227 bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid)
228 {
229     struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
230     int			error;
231 
232     bzf->bzf_bzstream.next_out = buf;			/* where and how much */
233     bzf->bzf_bzstream.avail_out = size;
234 
235     while (bzf->bzf_bzstream.avail_out && bzf->bzf_endseen == 0) {
236 	if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) {
237 	    printf("bzf_read: fill error\n");
238 	    return(EIO);
239 	}
240 	if (bzf->bzf_bzstream.avail_in == 0) {		/* oops, unexpected EOF */
241 	    printf("bzf_read: unexpected EOF\n");
242 	    if (bzf->bzf_bzstream.avail_out == size)
243 		return(EIO);
244 	    break;
245 	}
246 
247 	error = BZ2_bzDecompress(&bzf->bzf_bzstream);	/* decompression pass */
248 	if (error == BZ_STREAM_END) {			/* EOF, all done */
249 	    bzf->bzf_endseen = 1;
250 	    break;
251 	}
252 	if (error != BZ_OK) {				/* argh, decompression error */
253 	    printf("bzf_read: BZ2_bzDecompress returned %d\n", error);
254 	    return(EIO);
255 	}
256     }
257     if (resid != NULL)
258 	*resid = bzf->bzf_bzstream.avail_out;
259     return(0);
260 }
261 
262 static int
263 bzf_rewind(struct open_file *f)
264 {
265     struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
266     struct bz_file	*bzf_tmp;
267 
268     /*
269      * Since bzip2 does not have an equivalent inflateReset function a crude
270      * one needs to be provided.  The functions all called in such a way that
271      * at any time an error occurs a roll back can be done (effectively making
272      * this rewind 'atomic', either the reset occurs successfully or not at all,
273      * with no 'undefined' state happening).
274      */
275 
276     /* Allocate a bz_file structure, populate it */
277     bzf_tmp = malloc(sizeof(struct bz_file));
278     if (bzf_tmp == NULL)
279 	return(-1);
280     bzero(bzf_tmp, sizeof(struct bz_file));
281     bzf_tmp->bzf_rawfd = bzf->bzf_rawfd;
282 
283     /* Initialise the inflation engine */
284     if (BZ2_bzDecompressInit(&(bzf_tmp->bzf_bzstream), 0, 1) != BZ_OK) {
285 	free(bzf_tmp);
286 	return(-1);
287     }
288 
289     /* Seek back to the beginning of the file */
290     if (lseek(bzf->bzf_rawfd, 0, SEEK_SET) == -1) {
291 	BZ2_bzDecompressEnd(&(bzf_tmp->bzf_bzstream));
292 	free(bzf_tmp);
293 	return(-1);
294     }
295 
296     /* Free old bz_file data */
297     BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
298     free(bzf);
299 
300     /* Use the new bz_file data */
301     f->f_fsdata = bzf_tmp;
302 
303     return(0);
304 }
305 
306 static off_t
307 bzf_seek(struct open_file *f, off_t offset, int where)
308 {
309     struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
310     off_t		target;
311     char		discard[16];
312 
313     switch (where) {
314     case SEEK_SET:
315 	target = offset;
316 	break;
317     case SEEK_CUR:
318 	target = offset + bzf->bzf_bzstream.total_out_lo32;
319 	break;
320     default:
321 	errno = EINVAL;
322 	return(-1);
323     }
324 
325     /* Can we get there from here? */
326     if (target < bzf->bzf_bzstream.total_out_lo32 && bzf_rewind(f) != 0) {
327 	errno = EOFFSET;
328 	return -1;
329     }
330 
331     /* if bzf_rewind was called then bzf has changed */
332     bzf = (struct bz_file *)f->f_fsdata;
333 
334     /* skip forwards if required */
335     while (target > bzf->bzf_bzstream.total_out_lo32) {
336 	errno = bzf_read(f, discard, min(sizeof(discard),
337 	    target - bzf->bzf_bzstream.total_out_lo32), NULL);
338 	if (errno)
339 	    return(-1);
340 	/* Break out of loop if end of file has been reached. */
341 	if (bzf->bzf_endseen)
342 	    break;
343     }
344     /* This is where we are (be honest if we overshot) */
345     return(bzf->bzf_bzstream.total_out_lo32);
346 }
347 
348 static int
349 bzf_stat(struct open_file *f, struct stat *sb)
350 {
351     struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
352     int			result;
353 
354     /* stat as normal, but indicate that size is unknown */
355     if ((result = fstat(bzf->bzf_rawfd, sb)) == 0)
356 	sb->st_size = -1;
357     return(result);
358 }
359 
360 void
361 bz_internal_error(int errorcode)
362 {
363     panic("bzipfs: critical error %d in bzip2 library occurred", errorcode);
364 }
365 
366 #ifdef REGRESSION
367 /* Small test case, open and decompress test.bz2 */
368 int main(void)
369 {
370     struct open_file f;
371     char buf[1024];
372     size_t resid;
373     int err;
374 
375     memset(&f, '\0', sizeof(f));
376     f.f_flags = F_READ;
377     err = bzf_open("test", &f);
378     if (err != 0)
379 	exit(1);
380     do {
381 	err = bzf_read(&f, buf, sizeof(buf), &resid);
382     } while (err == 0 && resid != sizeof(buf));
383 
384     if (err != 0)
385 	exit(2);
386     exit(0);
387 }
388 #endif
389