xref: /freebsd/stand/libsa/bzipfs.c (revision 0e867a49115687398fd486b3af67fbb41f48b8a8)
1 /*
2  * Copyright (c) 1998 Michael Smith.
3  * Copyright (c) 2000 Maxim Sobolev
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #ifndef REGRESSION
30 #include "stand.h"
31 #else
32 #include <stdlib.h>
33 #include <sys/errno.h>
34 #include <sys/fcntl.h>
35 #include <sys/types.h>
36 #include <sys/unistd.h>
37 
38 struct open_file {
39     int                 f_flags;        /* see F_* below */
40     void                *f_fsdata;      /* file system specific data */
41 };
42 #define F_READ          0x0001  /* file opened for reading */
43 #define EOFFSET (ELAST+8)       /* relative seek not supported */
44 static inline u_int min(u_int a, u_int b) { return(a < b ? a : b); }
45 #define panic(x, y) abort()
46 #endif
47 
48 #include <sys/stat.h>
49 #include <string.h>
50 #include <bzlib.h>
51 
52 #define BZ_BUFSIZE 2048	/* XXX larger? */
53 
54 struct bz_file
55 {
56     int			bzf_rawfd;
57     bz_stream		bzf_bzstream;
58     char		bzf_buf[BZ_BUFSIZE];
59     int			bzf_endseen;
60 };
61 
62 static int	bzf_fill(struct bz_file *z);
63 static int	bzf_open(const char *path, struct open_file *f);
64 static int	bzf_close(struct open_file *f);
65 static int	bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid);
66 static off_t	bzf_seek(struct open_file *f, off_t offset, int where);
67 static int	bzf_stat(struct open_file *f, struct stat *sb);
68 
69 #ifndef REGRESSION
70 struct fs_ops bzipfs_fsops = {
71 	.fs_name = "bzip",
72 	.fo_open = bzf_open,
73 	.fo_close = bzf_close,
74 	.fo_read = bzf_read,
75 	.fo_write = null_write,
76 	.fo_seek = bzf_seek,
77 	.fo_stat = bzf_stat,
78 	.fo_readdir = null_readdir,
79 };
80 #endif
81 
82 static int
83 bzf_fill(struct bz_file *bzf)
84 {
85     int		result;
86     int		req;
87 
88     req = BZ_BUFSIZE - bzf->bzf_bzstream.avail_in;
89     result = 0;
90 
91     /* If we need more */
92     if (req > 0) {
93 	/* move old data to bottom of buffer */
94 	if (req < BZ_BUFSIZE)
95 	    bcopy(bzf->bzf_buf + req, bzf->bzf_buf, BZ_BUFSIZE - req);
96 
97 	/* read to fill buffer and update availibility data */
98 	result = read(bzf->bzf_rawfd, bzf->bzf_buf + bzf->bzf_bzstream.avail_in, req);
99 	bzf->bzf_bzstream.next_in = bzf->bzf_buf;
100 	if (result >= 0)
101 	    bzf->bzf_bzstream.avail_in += result;
102     }
103     return(result);
104 }
105 
106 /*
107  * Adapted from get_byte/check_header in libz
108  *
109  * Returns 0 if the header is OK, nonzero if not.
110  */
111 static int
112 get_byte(struct bz_file *bzf)
113 {
114     if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1))
115 	return(-1);
116     bzf->bzf_bzstream.avail_in--;
117     return(*(bzf->bzf_bzstream.next_in)++);
118 }
119 
120 static int bz_magic[3] = {'B', 'Z', 'h'}; /* bzip2 magic header */
121 
122 static int
123 check_header(struct bz_file *bzf)
124 {
125     unsigned int len;
126     int		 c;
127 
128     /* Check the bzip2 magic header */
129     for (len = 0; len < 3; len++) {
130 	c = get_byte(bzf);
131 	if (c != bz_magic[len]) {
132 	    return(1);
133 	}
134     }
135     /* Check that the block size is valid */
136     c = get_byte(bzf);
137     if (c < '1' || c > '9')
138 	return(1);
139 
140     /* Put back bytes that we've took from the input stream */
141     bzf->bzf_bzstream.next_in -= 4;
142     bzf->bzf_bzstream.avail_in += 4;
143 
144     return(0);
145 }
146 
147 static int
148 bzf_open(const char *fname, struct open_file *f)
149 {
150     static char		*bzfname;
151     int			rawfd;
152     struct bz_file	*bzf;
153     char		*cp;
154     int			error;
155     struct stat		sb;
156 
157     /* Have to be in "just read it" mode */
158     if (f->f_flags != F_READ)
159 	return(EPERM);
160 
161     /* If the name already ends in .gz or .bz2, ignore it */
162     if ((cp = strrchr(fname, '.')) && (!strcmp(cp, ".gz")
163 	    || !strcmp(cp, ".bz2") || !strcmp(cp, ".split")))
164 	return(ENOENT);
165 
166     /* Construct new name */
167     bzfname = malloc(strlen(fname) + 5);
168     if (bzfname == NULL)
169 	return(ENOMEM);
170     sprintf(bzfname, "%s.bz2", fname);
171 
172     /* Try to open the compressed datafile */
173     rawfd = open(bzfname, O_RDONLY);
174     free(bzfname);
175     if (rawfd == -1)
176 	return(ENOENT);
177 
178     if (fstat(rawfd, &sb) < 0) {
179 	printf("bzf_open: stat failed\n");
180 	close(rawfd);
181 	return(ENOENT);
182     }
183     if (!S_ISREG(sb.st_mode)) {
184 	printf("bzf_open: not a file\n");
185 	close(rawfd);
186 	return(EISDIR);			/* best guess */
187     }
188 
189     /* Allocate a bz_file structure, populate it */
190     bzf = malloc(sizeof(struct bz_file));
191     if (bzf == NULL)
192 	return(ENOMEM);
193     bzero(bzf, sizeof(struct bz_file));
194     bzf->bzf_rawfd = rawfd;
195 
196     /* Verify that the file is bzipped */
197     if (check_header(bzf)) {
198 	close(bzf->bzf_rawfd);
199 	free(bzf);
200 	return(EFTYPE);
201     }
202 
203     /* Initialise the inflation engine */
204     if ((error = BZ2_bzDecompressInit(&(bzf->bzf_bzstream), 0, 1)) != BZ_OK) {
205 	printf("bzf_open: BZ2_bzDecompressInit returned %d\n", error);
206 	close(bzf->bzf_rawfd);
207 	free(bzf);
208 	return(EIO);
209     }
210 
211     /* Looks OK, we'll take it */
212     f->f_fsdata = bzf;
213     return(0);
214 }
215 
216 static int
217 bzf_close(struct open_file *f)
218 {
219     struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
220 
221     BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
222     close(bzf->bzf_rawfd);
223     free(bzf);
224     return(0);
225 }
226 
227 static int
228 bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid)
229 {
230     struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
231     int			error;
232 
233     bzf->bzf_bzstream.next_out = buf;			/* where and how much */
234     bzf->bzf_bzstream.avail_out = size;
235 
236     while (bzf->bzf_bzstream.avail_out && bzf->bzf_endseen == 0) {
237 	if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) {
238 	    printf("bzf_read: fill error\n");
239 	    return(EIO);
240 	}
241 	if (bzf->bzf_bzstream.avail_in == 0) {		/* oops, unexpected EOF */
242 	    printf("bzf_read: unexpected EOF\n");
243 	    if (bzf->bzf_bzstream.avail_out == size)
244 		return(EIO);
245 	    break;
246 	}
247 
248 	error = BZ2_bzDecompress(&bzf->bzf_bzstream);	/* decompression pass */
249 	if (error == BZ_STREAM_END) {			/* EOF, all done */
250 	    bzf->bzf_endseen = 1;
251 	    break;
252 	}
253 	if (error != BZ_OK) {				/* argh, decompression error */
254 	    printf("bzf_read: BZ2_bzDecompress returned %d\n", error);
255 	    return(EIO);
256 	}
257     }
258     if (resid != NULL)
259 	*resid = bzf->bzf_bzstream.avail_out;
260     return(0);
261 }
262 
263 static int
264 bzf_rewind(struct open_file *f)
265 {
266     struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
267     struct bz_file	*bzf_tmp;
268 
269     /*
270      * Since bzip2 does not have an equivalent inflateReset function a crude
271      * one needs to be provided.  The functions all called in such a way that
272      * at any time an error occurs a roll back can be done (effectively making
273      * this rewind 'atomic', either the reset occurs successfully or not at all,
274      * with no 'undefined' state happening).
275      */
276 
277     /* Allocate a bz_file structure, populate it */
278     bzf_tmp = malloc(sizeof(struct bz_file));
279     if (bzf_tmp == NULL)
280 	return(-1);
281     bzero(bzf_tmp, sizeof(struct bz_file));
282     bzf_tmp->bzf_rawfd = bzf->bzf_rawfd;
283 
284     /* Initialise the inflation engine */
285     if (BZ2_bzDecompressInit(&(bzf_tmp->bzf_bzstream), 0, 1) != BZ_OK) {
286 	free(bzf_tmp);
287 	return(-1);
288     }
289 
290     /* Seek back to the beginning of the file */
291     if (lseek(bzf->bzf_rawfd, 0, SEEK_SET) == -1) {
292 	BZ2_bzDecompressEnd(&(bzf_tmp->bzf_bzstream));
293 	free(bzf_tmp);
294 	return(-1);
295     }
296 
297     /* Free old bz_file data */
298     BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
299     free(bzf);
300 
301     /* Use the new bz_file data */
302     f->f_fsdata = bzf_tmp;
303 
304     return(0);
305 }
306 
307 static off_t
308 bzf_seek(struct open_file *f, off_t offset, int where)
309 {
310     struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
311     off_t		target;
312     char		discard[16];
313 
314     switch (where) {
315     case SEEK_SET:
316 	target = offset;
317 	break;
318     case SEEK_CUR:
319 	target = offset + bzf->bzf_bzstream.total_out_lo32;
320 	break;
321     default:
322 	errno = EINVAL;
323 	return(-1);
324     }
325 
326     /* Can we get there from here? */
327     if (target < bzf->bzf_bzstream.total_out_lo32 && bzf_rewind(f) != 0) {
328 	errno = EOFFSET;
329 	return -1;
330     }
331 
332     /* if bzf_rewind was called then bzf has changed */
333     bzf = (struct bz_file *)f->f_fsdata;
334 
335     /* skip forwards if required */
336     while (target > bzf->bzf_bzstream.total_out_lo32) {
337 	errno = bzf_read(f, discard, min(sizeof(discard),
338 	    target - bzf->bzf_bzstream.total_out_lo32), NULL);
339 	if (errno)
340 	    return(-1);
341 	/* Break out of loop if end of file has been reached. */
342 	if (bzf->bzf_endseen)
343 	    break;
344     }
345     /* This is where we are (be honest if we overshot) */
346     return(bzf->bzf_bzstream.total_out_lo32);
347 }
348 
349 static int
350 bzf_stat(struct open_file *f, struct stat *sb)
351 {
352     struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
353     int			result;
354 
355     /* stat as normal, but indicate that size is unknown */
356     if ((result = fstat(bzf->bzf_rawfd, sb)) == 0)
357 	sb->st_size = -1;
358     return(result);
359 }
360 
361 void
362 bz_internal_error(int errorcode)
363 {
364     panic("bzipfs: critical error %d in bzip2 library occurred", errorcode);
365 }
366 
367 #ifdef REGRESSION
368 /* Small test case, open and decompress test.bz2 */
369 int main(void)
370 {
371     struct open_file f;
372     char buf[1024];
373     size_t resid;
374     int err;
375 
376     memset(&f, '\0', sizeof(f));
377     f.f_flags = F_READ;
378     err = bzf_open("test", &f);
379     if (err != 0)
380 	exit(1);
381     do {
382 	err = bzf_read(&f, buf, sizeof(buf), &resid);
383     } while (err == 0 && resid != sizeof(buf));
384 
385     if (err != 0)
386 	exit(2);
387     exit(0);
388 }
389 #endif
390