1 /*
2 * Copyright (c) 1998 Michael Smith.
3 * Copyright (c) 2000 Maxim Sobolev
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #ifndef REGRESSION
29 #include "stand.h"
30 #else
31 #include <stdlib.h>
32 #include <sys/errno.h>
33 #include <sys/fcntl.h>
34 #include <sys/types.h>
35 #include <sys/unistd.h>
36
37 struct open_file {
38 int f_flags; /* see F_* below */
39 void *f_fsdata; /* file system specific data */
40 };
41 #define F_READ 0x0001 /* file opened for reading */
42 #define EOFFSET (ELAST+8) /* relative seek not supported */
min(u_int a,u_int b)43 static inline u_int min(u_int a, u_int b) { return(a < b ? a : b); }
44 #define panic(x, y) abort()
45 #endif
46
47 #include <sys/stat.h>
48 #include <string.h>
49 #include <bzlib.h>
50
51 #define BZ_BUFSIZE 2048 /* XXX larger? */
52
53 struct bz_file
54 {
55 int bzf_rawfd;
56 bz_stream bzf_bzstream;
57 char bzf_buf[BZ_BUFSIZE];
58 int bzf_endseen;
59 };
60
61 static int bzf_fill(struct bz_file *z);
62 static int bzf_open(const char *path, struct open_file *f);
63 static int bzf_close(struct open_file *f);
64 static int bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid);
65 static off_t bzf_seek(struct open_file *f, off_t offset, int where);
66 static int bzf_stat(struct open_file *f, struct stat *sb);
67
68 #ifndef REGRESSION
69 struct fs_ops bzipfs_fsops = {
70 .fs_name = "bzip",
71 .fs_flags = 0,
72 .fo_open = bzf_open,
73 .fo_close = bzf_close,
74 .fo_read = bzf_read,
75 .fo_write = null_write,
76 .fo_seek = bzf_seek,
77 .fo_stat = bzf_stat,
78 .fo_readdir = null_readdir,
79 };
80 #endif
81
82 static int
bzf_fill(struct bz_file * bzf)83 bzf_fill(struct bz_file *bzf)
84 {
85 int result;
86 int req;
87
88 req = BZ_BUFSIZE - bzf->bzf_bzstream.avail_in;
89 result = 0;
90
91 /* If we need more */
92 if (req > 0) {
93 /* move old data to bottom of buffer */
94 if (req < BZ_BUFSIZE)
95 bcopy(bzf->bzf_buf + req, bzf->bzf_buf, BZ_BUFSIZE - req);
96
97 /* read to fill buffer and update availibility data */
98 result = read(bzf->bzf_rawfd, bzf->bzf_buf + bzf->bzf_bzstream.avail_in, req);
99 bzf->bzf_bzstream.next_in = bzf->bzf_buf;
100 if (result >= 0)
101 bzf->bzf_bzstream.avail_in += result;
102 }
103 return(result);
104 }
105
106 /*
107 * Adapted from get_byte/check_header in libz
108 *
109 * Returns 0 if the header is OK, nonzero if not.
110 */
111 static int
get_byte(struct bz_file * bzf)112 get_byte(struct bz_file *bzf)
113 {
114 if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1))
115 return(-1);
116 bzf->bzf_bzstream.avail_in--;
117 return(*(bzf->bzf_bzstream.next_in)++);
118 }
119
120 static int bz_magic[3] = {'B', 'Z', 'h'}; /* bzip2 magic header */
121
122 static int
check_header(struct bz_file * bzf)123 check_header(struct bz_file *bzf)
124 {
125 unsigned int len;
126 int c;
127
128 /* Check the bzip2 magic header */
129 for (len = 0; len < 3; len++) {
130 c = get_byte(bzf);
131 if (c != bz_magic[len]) {
132 return(1);
133 }
134 }
135 /* Check that the block size is valid */
136 c = get_byte(bzf);
137 if (c < '1' || c > '9')
138 return(1);
139
140 /* Put back bytes that we've took from the input stream */
141 bzf->bzf_bzstream.next_in -= 4;
142 bzf->bzf_bzstream.avail_in += 4;
143
144 return(0);
145 }
146
147 static int
bzf_open(const char * fname,struct open_file * f)148 bzf_open(const char *fname, struct open_file *f)
149 {
150 static char *bzfname;
151 int rawfd;
152 struct bz_file *bzf;
153 char *cp;
154 int error;
155 struct stat sb;
156
157 /* Have to be in "just read it" mode */
158 if (f->f_flags != F_READ)
159 return(EPERM);
160
161 /* If the name already ends in .gz or .bz2, ignore it */
162 if ((cp = strrchr(fname, '.')) && (!strcmp(cp, ".gz")
163 || !strcmp(cp, ".bz2") || !strcmp(cp, ".split")))
164 return(ENOENT);
165
166 /* Construct new name */
167 bzfname = malloc(strlen(fname) + 5);
168 if (bzfname == NULL)
169 return(ENOMEM);
170 sprintf(bzfname, "%s.bz2", fname);
171
172 /* Try to open the compressed datafile */
173 rawfd = open(bzfname, O_RDONLY);
174 free(bzfname);
175 if (rawfd == -1)
176 return(ENOENT);
177
178 if (fstat(rawfd, &sb) < 0) {
179 printf("bzf_open: stat failed\n");
180 close(rawfd);
181 return(ENOENT);
182 }
183 if (!S_ISREG(sb.st_mode)) {
184 printf("bzf_open: not a file\n");
185 close(rawfd);
186 return(EISDIR); /* best guess */
187 }
188
189 /* Allocate a bz_file structure, populate it */
190 bzf = malloc(sizeof(struct bz_file));
191 if (bzf == NULL)
192 return(ENOMEM);
193 bzero(bzf, sizeof(struct bz_file));
194 bzf->bzf_rawfd = rawfd;
195
196 /* Verify that the file is bzipped */
197 if (check_header(bzf)) {
198 close(bzf->bzf_rawfd);
199 free(bzf);
200 return(EFTYPE);
201 }
202
203 /* Initialise the inflation engine */
204 if ((error = BZ2_bzDecompressInit(&(bzf->bzf_bzstream), 0, 1)) != BZ_OK) {
205 printf("bzf_open: BZ2_bzDecompressInit returned %d\n", error);
206 close(bzf->bzf_rawfd);
207 free(bzf);
208 return(EIO);
209 }
210
211 /* Looks OK, we'll take it */
212 f->f_fsdata = bzf;
213 return(0);
214 }
215
216 static int
bzf_close(struct open_file * f)217 bzf_close(struct open_file *f)
218 {
219 struct bz_file *bzf = (struct bz_file *)f->f_fsdata;
220
221 BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
222 close(bzf->bzf_rawfd);
223 free(bzf);
224 return(0);
225 }
226
227 static int
bzf_read(struct open_file * f,void * buf,size_t size,size_t * resid)228 bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid)
229 {
230 struct bz_file *bzf = (struct bz_file *)f->f_fsdata;
231 int error;
232
233 bzf->bzf_bzstream.next_out = buf; /* where and how much */
234 bzf->bzf_bzstream.avail_out = size;
235
236 while (bzf->bzf_bzstream.avail_out && bzf->bzf_endseen == 0) {
237 if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) {
238 printf("bzf_read: fill error\n");
239 return(EIO);
240 }
241 if (bzf->bzf_bzstream.avail_in == 0) { /* oops, unexpected EOF */
242 printf("bzf_read: unexpected EOF\n");
243 if (bzf->bzf_bzstream.avail_out == size)
244 return(EIO);
245 break;
246 }
247
248 error = BZ2_bzDecompress(&bzf->bzf_bzstream); /* decompression pass */
249 if (error == BZ_STREAM_END) { /* EOF, all done */
250 bzf->bzf_endseen = 1;
251 break;
252 }
253 if (error != BZ_OK) { /* argh, decompression error */
254 printf("bzf_read: BZ2_bzDecompress returned %d\n", error);
255 return(EIO);
256 }
257 }
258 if (resid != NULL)
259 *resid = bzf->bzf_bzstream.avail_out;
260 return(0);
261 }
262
263 static int
bzf_rewind(struct open_file * f)264 bzf_rewind(struct open_file *f)
265 {
266 struct bz_file *bzf = (struct bz_file *)f->f_fsdata;
267 struct bz_file *bzf_tmp;
268
269 /*
270 * Since bzip2 does not have an equivalent inflateReset function a crude
271 * one needs to be provided. The functions all called in such a way that
272 * at any time an error occurs a roll back can be done (effectively making
273 * this rewind 'atomic', either the reset occurs successfully or not at all,
274 * with no 'undefined' state happening).
275 */
276
277 /* Allocate a bz_file structure, populate it */
278 bzf_tmp = malloc(sizeof(struct bz_file));
279 if (bzf_tmp == NULL)
280 return(-1);
281 bzero(bzf_tmp, sizeof(struct bz_file));
282 bzf_tmp->bzf_rawfd = bzf->bzf_rawfd;
283
284 /* Initialise the inflation engine */
285 if (BZ2_bzDecompressInit(&(bzf_tmp->bzf_bzstream), 0, 1) != BZ_OK) {
286 free(bzf_tmp);
287 return(-1);
288 }
289
290 /* Seek back to the beginning of the file */
291 if (lseek(bzf->bzf_rawfd, 0, SEEK_SET) == -1) {
292 BZ2_bzDecompressEnd(&(bzf_tmp->bzf_bzstream));
293 free(bzf_tmp);
294 return(-1);
295 }
296
297 /* Free old bz_file data */
298 BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
299 free(bzf);
300
301 /* Use the new bz_file data */
302 f->f_fsdata = bzf_tmp;
303
304 return(0);
305 }
306
307 static off_t
bzf_seek(struct open_file * f,off_t offset,int where)308 bzf_seek(struct open_file *f, off_t offset, int where)
309 {
310 struct bz_file *bzf = (struct bz_file *)f->f_fsdata;
311 off_t target;
312 char discard[16];
313
314 switch (where) {
315 case SEEK_SET:
316 target = offset;
317 break;
318 case SEEK_CUR:
319 target = offset + bzf->bzf_bzstream.total_out_lo32;
320 break;
321 default:
322 errno = EINVAL;
323 return(-1);
324 }
325
326 /* Can we get there from here? */
327 if (target < bzf->bzf_bzstream.total_out_lo32 && bzf_rewind(f) != 0) {
328 errno = EOFFSET;
329 return -1;
330 }
331
332 /* if bzf_rewind was called then bzf has changed */
333 bzf = (struct bz_file *)f->f_fsdata;
334
335 /* skip forwards if required */
336 while (target > bzf->bzf_bzstream.total_out_lo32) {
337 errno = bzf_read(f, discard, min(sizeof(discard),
338 target - bzf->bzf_bzstream.total_out_lo32), NULL);
339 if (errno)
340 return(-1);
341 /* Break out of loop if end of file has been reached. */
342 if (bzf->bzf_endseen)
343 break;
344 }
345 /* This is where we are (be honest if we overshot) */
346 return(bzf->bzf_bzstream.total_out_lo32);
347 }
348
349 static int
bzf_stat(struct open_file * f,struct stat * sb)350 bzf_stat(struct open_file *f, struct stat *sb)
351 {
352 struct bz_file *bzf = (struct bz_file *)f->f_fsdata;
353 int result;
354
355 /* stat as normal, but indicate that size is unknown */
356 if ((result = fstat(bzf->bzf_rawfd, sb)) == 0)
357 sb->st_size = -1;
358 return(result);
359 }
360
361 void
bz_internal_error(int errorcode)362 bz_internal_error(int errorcode)
363 {
364 panic("bzipfs: critical error %d in bzip2 library occurred", errorcode);
365 }
366
367 #ifdef REGRESSION
368 /* Small test case, open and decompress test.bz2 */
main(void)369 int main(void)
370 {
371 struct open_file f;
372 char buf[1024];
373 size_t resid;
374 int err;
375
376 memset(&f, '\0', sizeof(f));
377 f.f_flags = F_READ;
378 err = bzf_open("test", &f);
379 if (err != 0)
380 exit(1);
381 do {
382 err = bzf_read(&f, buf, sizeof(buf), &resid);
383 } while (err == 0 && resid != sizeof(buf));
384
385 if (err != 0)
386 exit(2);
387 exit(0);
388 }
389 #endif
390