1 /* 2 * Copyright (c) 1998 Michael Smith. 3 * Copyright (c) 2000 Maxim Sobolev 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #ifndef REGRESSION 32 #include "stand.h" 33 #else 34 #include <stdlib.h> 35 #include <sys/errno.h> 36 #include <sys/fcntl.h> 37 #include <sys/types.h> 38 #include <sys/unistd.h> 39 40 struct open_file { 41 int f_flags; /* see F_* below */ 42 void *f_fsdata; /* file system specific data */ 43 }; 44 #define F_READ 0x0001 /* file opened for reading */ 45 #define EOFFSET (ELAST+8) /* relative seek not supported */ 46 static inline u_int min(u_int a, u_int b) { return(a < b ? a : b); } 47 #define panic(x, y) abort() 48 #endif 49 50 #include <sys/stat.h> 51 #include <string.h> 52 #include <bzlib.h> 53 54 #define BZ_BUFSIZE 2048 /* XXX larger? */ 55 56 struct bz_file 57 { 58 int bzf_rawfd; 59 bz_stream bzf_bzstream; 60 char bzf_buf[BZ_BUFSIZE]; 61 int bzf_endseen; 62 }; 63 64 static int bzf_fill(struct bz_file *z); 65 static int bzf_open(const char *path, struct open_file *f); 66 static int bzf_close(struct open_file *f); 67 static int bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid); 68 static off_t bzf_seek(struct open_file *f, off_t offset, int where); 69 static int bzf_stat(struct open_file *f, struct stat *sb); 70 71 #ifndef REGRESSION 72 struct fs_ops bzipfs_fsops = { 73 "bzip", 74 bzf_open, 75 bzf_close, 76 bzf_read, 77 null_write, 78 bzf_seek, 79 bzf_stat, 80 null_readdir 81 }; 82 #endif 83 84 static int 85 bzf_fill(struct bz_file *bzf) 86 { 87 int result; 88 int req; 89 90 req = BZ_BUFSIZE - bzf->bzf_bzstream.avail_in; 91 result = 0; 92 93 /* If we need more */ 94 if (req > 0) { 95 /* move old data to bottom of buffer */ 96 if (req < BZ_BUFSIZE) 97 bcopy(bzf->bzf_buf + req, bzf->bzf_buf, BZ_BUFSIZE - req); 98 99 /* read to fill buffer and update availibility data */ 100 result = read(bzf->bzf_rawfd, bzf->bzf_buf + bzf->bzf_bzstream.avail_in, req); 101 bzf->bzf_bzstream.next_in = bzf->bzf_buf; 102 if (result >= 0) 103 bzf->bzf_bzstream.avail_in += result; 104 } 105 return(result); 106 } 107 108 /* 109 * Adapted from get_byte/check_header in libz 110 * 111 * Returns 0 if the header is OK, nonzero if not. 112 */ 113 static int 114 get_byte(struct bz_file *bzf) 115 { 116 if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) 117 return(-1); 118 bzf->bzf_bzstream.avail_in--; 119 return(*(bzf->bzf_bzstream.next_in)++); 120 } 121 122 static int bz_magic[3] = {'B', 'Z', 'h'}; /* bzip2 magic header */ 123 124 static int 125 check_header(struct bz_file *bzf) 126 { 127 unsigned int len; 128 int c; 129 130 /* Check the bzip2 magic header */ 131 for (len = 0; len < 3; len++) { 132 c = get_byte(bzf); 133 if (c != bz_magic[len]) { 134 return(1); 135 } 136 } 137 /* Check that the block size is valid */ 138 c = get_byte(bzf); 139 if (c < '1' || c > '9') 140 return(1); 141 142 /* Put back bytes that we've took from the input stream */ 143 bzf->bzf_bzstream.next_in -= 4; 144 bzf->bzf_bzstream.avail_in += 4; 145 146 return(0); 147 } 148 149 static int 150 bzf_open(const char *fname, struct open_file *f) 151 { 152 static char *bzfname; 153 int rawfd; 154 struct bz_file *bzf; 155 char *cp; 156 int error; 157 struct stat sb; 158 159 /* Have to be in "just read it" mode */ 160 if (f->f_flags != F_READ) 161 return(EPERM); 162 163 /* If the name already ends in .gz or .bz2, ignore it */ 164 if ((cp = strrchr(fname, '.')) && (!strcmp(cp, ".gz") 165 || !strcmp(cp, ".bz2") || !strcmp(cp, ".split"))) 166 return(ENOENT); 167 168 /* Construct new name */ 169 bzfname = malloc(strlen(fname) + 5); 170 if (bzfname == NULL) 171 return(ENOMEM); 172 sprintf(bzfname, "%s.bz2", fname); 173 174 /* Try to open the compressed datafile */ 175 rawfd = open(bzfname, O_RDONLY); 176 free(bzfname); 177 if (rawfd == -1) 178 return(ENOENT); 179 180 if (fstat(rawfd, &sb) < 0) { 181 printf("bzf_open: stat failed\n"); 182 close(rawfd); 183 return(ENOENT); 184 } 185 if (!S_ISREG(sb.st_mode)) { 186 printf("bzf_open: not a file\n"); 187 close(rawfd); 188 return(EISDIR); /* best guess */ 189 } 190 191 /* Allocate a bz_file structure, populate it */ 192 bzf = malloc(sizeof(struct bz_file)); 193 if (bzf == NULL) 194 return(ENOMEM); 195 bzero(bzf, sizeof(struct bz_file)); 196 bzf->bzf_rawfd = rawfd; 197 198 /* Verify that the file is bzipped */ 199 if (check_header(bzf)) { 200 close(bzf->bzf_rawfd); 201 free(bzf); 202 return(EFTYPE); 203 } 204 205 /* Initialise the inflation engine */ 206 if ((error = BZ2_bzDecompressInit(&(bzf->bzf_bzstream), 0, 1)) != BZ_OK) { 207 printf("bzf_open: BZ2_bzDecompressInit returned %d\n", error); 208 close(bzf->bzf_rawfd); 209 free(bzf); 210 return(EIO); 211 } 212 213 /* Looks OK, we'll take it */ 214 f->f_fsdata = bzf; 215 return(0); 216 } 217 218 static int 219 bzf_close(struct open_file *f) 220 { 221 struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 222 223 BZ2_bzDecompressEnd(&(bzf->bzf_bzstream)); 224 close(bzf->bzf_rawfd); 225 free(bzf); 226 return(0); 227 } 228 229 static int 230 bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid) 231 { 232 struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 233 int error; 234 235 bzf->bzf_bzstream.next_out = buf; /* where and how much */ 236 bzf->bzf_bzstream.avail_out = size; 237 238 while (bzf->bzf_bzstream.avail_out && bzf->bzf_endseen == 0) { 239 if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) { 240 printf("bzf_read: fill error\n"); 241 return(EIO); 242 } 243 if (bzf->bzf_bzstream.avail_in == 0) { /* oops, unexpected EOF */ 244 printf("bzf_read: unexpected EOF\n"); 245 if (bzf->bzf_bzstream.avail_out == size) 246 return(EIO); 247 break; 248 } 249 250 error = BZ2_bzDecompress(&bzf->bzf_bzstream); /* decompression pass */ 251 if (error == BZ_STREAM_END) { /* EOF, all done */ 252 bzf->bzf_endseen = 1; 253 break; 254 } 255 if (error != BZ_OK) { /* argh, decompression error */ 256 printf("bzf_read: BZ2_bzDecompress returned %d\n", error); 257 return(EIO); 258 } 259 } 260 if (resid != NULL) 261 *resid = bzf->bzf_bzstream.avail_out; 262 return(0); 263 } 264 265 static int 266 bzf_rewind(struct open_file *f) 267 { 268 struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 269 struct bz_file *bzf_tmp; 270 271 /* 272 * Since bzip2 does not have an equivalent inflateReset function a crude 273 * one needs to be provided. The functions all called in such a way that 274 * at any time an error occurs a roll back can be done (effectively making 275 * this rewind 'atomic', either the reset occurs successfully or not at all, 276 * with no 'undefined' state happening). 277 */ 278 279 /* Allocate a bz_file structure, populate it */ 280 bzf_tmp = malloc(sizeof(struct bz_file)); 281 if (bzf_tmp == NULL) 282 return(-1); 283 bzero(bzf_tmp, sizeof(struct bz_file)); 284 bzf_tmp->bzf_rawfd = bzf->bzf_rawfd; 285 286 /* Initialise the inflation engine */ 287 if (BZ2_bzDecompressInit(&(bzf_tmp->bzf_bzstream), 0, 1) != BZ_OK) { 288 free(bzf_tmp); 289 return(-1); 290 } 291 292 /* Seek back to the beginning of the file */ 293 if (lseek(bzf->bzf_rawfd, 0, SEEK_SET) == -1) { 294 BZ2_bzDecompressEnd(&(bzf_tmp->bzf_bzstream)); 295 free(bzf_tmp); 296 return(-1); 297 } 298 299 /* Free old bz_file data */ 300 BZ2_bzDecompressEnd(&(bzf->bzf_bzstream)); 301 free(bzf); 302 303 /* Use the new bz_file data */ 304 f->f_fsdata = bzf_tmp; 305 306 return(0); 307 } 308 309 static off_t 310 bzf_seek(struct open_file *f, off_t offset, int where) 311 { 312 struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 313 off_t target; 314 char discard[16]; 315 316 switch (where) { 317 case SEEK_SET: 318 target = offset; 319 break; 320 case SEEK_CUR: 321 target = offset + bzf->bzf_bzstream.total_out_lo32; 322 break; 323 default: 324 errno = EINVAL; 325 return(-1); 326 } 327 328 /* Can we get there from here? */ 329 if (target < bzf->bzf_bzstream.total_out_lo32 && bzf_rewind(f) != 0) { 330 errno = EOFFSET; 331 return -1; 332 } 333 334 /* if bzf_rewind was called then bzf has changed */ 335 bzf = (struct bz_file *)f->f_fsdata; 336 337 /* skip forwards if required */ 338 while (target > bzf->bzf_bzstream.total_out_lo32) { 339 errno = bzf_read(f, discard, min(sizeof(discard), 340 target - bzf->bzf_bzstream.total_out_lo32), NULL); 341 if (errno) 342 return(-1); 343 /* Break out of loop if end of file has been reached. */ 344 if (bzf->bzf_endseen) 345 break; 346 } 347 /* This is where we are (be honest if we overshot) */ 348 return(bzf->bzf_bzstream.total_out_lo32); 349 } 350 351 static int 352 bzf_stat(struct open_file *f, struct stat *sb) 353 { 354 struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 355 int result; 356 357 /* stat as normal, but indicate that size is unknown */ 358 if ((result = fstat(bzf->bzf_rawfd, sb)) == 0) 359 sb->st_size = -1; 360 return(result); 361 } 362 363 void 364 bz_internal_error(int errorcode) 365 { 366 panic("bzipfs: critical error %d in bzip2 library occured", errorcode); 367 } 368 369 #ifdef REGRESSION 370 /* Small test case, open and decompress test.bz2 */ 371 int main() 372 { 373 struct open_file f; 374 char buf[1024]; 375 size_t resid; 376 int err; 377 378 memset(&f, '\0', sizeof(f)); 379 f.f_flags = F_READ; 380 err = bzf_open("test", &f); 381 if (err != 0) 382 exit(1); 383 do { 384 err = bzf_read(&f, buf, sizeof(buf), &resid); 385 } while (err == 0 && resid != sizeof(buf)); 386 387 if (err != 0) 388 exit(2); 389 exit(0); 390 } 391 #endif 392