1ca987d46SWarner Losh /* 2ca987d46SWarner Losh * Copyright (c) 1998 Michael Smith. 3ca987d46SWarner Losh * Copyright (c) 2000 Maxim Sobolev 4ca987d46SWarner Losh * All rights reserved. 5ca987d46SWarner Losh * 6ca987d46SWarner Losh * Redistribution and use in source and binary forms, with or without 7ca987d46SWarner Losh * modification, are permitted provided that the following conditions 8ca987d46SWarner Losh * are met: 9ca987d46SWarner Losh * 1. Redistributions of source code must retain the above copyright 10ca987d46SWarner Losh * notice, this list of conditions and the following disclaimer. 11ca987d46SWarner Losh * 2. Redistributions in binary form must reproduce the above copyright 12ca987d46SWarner Losh * notice, this list of conditions and the following disclaimer in the 13ca987d46SWarner Losh * documentation and/or other materials provided with the distribution. 14ca987d46SWarner Losh * 15ca987d46SWarner Losh * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16ca987d46SWarner Losh * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17ca987d46SWarner Losh * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18ca987d46SWarner Losh * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19ca987d46SWarner Losh * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20ca987d46SWarner Losh * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21ca987d46SWarner Losh * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22ca987d46SWarner Losh * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23ca987d46SWarner Losh * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24ca987d46SWarner Losh * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25ca987d46SWarner Losh * SUCH DAMAGE. 26ca987d46SWarner Losh */ 27ca987d46SWarner Losh 28ca987d46SWarner Losh #include <sys/cdefs.h> 29ca987d46SWarner Losh __FBSDID("$FreeBSD$"); 30ca987d46SWarner Losh 31ca987d46SWarner Losh #ifndef REGRESSION 32ca987d46SWarner Losh #include "stand.h" 33ca987d46SWarner Losh #else 34ca987d46SWarner Losh #include <stdlib.h> 35ca987d46SWarner Losh #include <sys/errno.h> 36ca987d46SWarner Losh #include <sys/fcntl.h> 37ca987d46SWarner Losh #include <sys/types.h> 38ca987d46SWarner Losh #include <sys/unistd.h> 39ca987d46SWarner Losh 40ca987d46SWarner Losh struct open_file { 41ca987d46SWarner Losh int f_flags; /* see F_* below */ 42ca987d46SWarner Losh void *f_fsdata; /* file system specific data */ 43ca987d46SWarner Losh }; 44ca987d46SWarner Losh #define F_READ 0x0001 /* file opened for reading */ 45ca987d46SWarner Losh #define EOFFSET (ELAST+8) /* relative seek not supported */ 46ca987d46SWarner Losh static inline u_int min(u_int a, u_int b) { return(a < b ? a : b); } 47ca987d46SWarner Losh #define panic(x, y) abort() 48ca987d46SWarner Losh #endif 49ca987d46SWarner Losh 50ca987d46SWarner Losh #include <sys/stat.h> 51ca987d46SWarner Losh #include <string.h> 52ca987d46SWarner Losh #include <bzlib.h> 53ca987d46SWarner Losh 54ca987d46SWarner Losh #define BZ_BUFSIZE 2048 /* XXX larger? */ 55ca987d46SWarner Losh 56ca987d46SWarner Losh struct bz_file 57ca987d46SWarner Losh { 58ca987d46SWarner Losh int bzf_rawfd; 59ca987d46SWarner Losh bz_stream bzf_bzstream; 60ca987d46SWarner Losh char bzf_buf[BZ_BUFSIZE]; 61ca987d46SWarner Losh int bzf_endseen; 62ca987d46SWarner Losh }; 63ca987d46SWarner Losh 64ca987d46SWarner Losh static int bzf_fill(struct bz_file *z); 65ca987d46SWarner Losh static int bzf_open(const char *path, struct open_file *f); 66ca987d46SWarner Losh static int bzf_close(struct open_file *f); 67ca987d46SWarner Losh static int bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid); 68ca987d46SWarner Losh static off_t bzf_seek(struct open_file *f, off_t offset, int where); 69ca987d46SWarner Losh static int bzf_stat(struct open_file *f, struct stat *sb); 70ca987d46SWarner Losh 71ca987d46SWarner Losh #ifndef REGRESSION 72ca987d46SWarner Losh struct fs_ops bzipfs_fsops = { 73ca987d46SWarner Losh "bzip", 74ca987d46SWarner Losh bzf_open, 75ca987d46SWarner Losh bzf_close, 76ca987d46SWarner Losh bzf_read, 77ca987d46SWarner Losh null_write, 78ca987d46SWarner Losh bzf_seek, 79ca987d46SWarner Losh bzf_stat, 80ca987d46SWarner Losh null_readdir 81ca987d46SWarner Losh }; 82ca987d46SWarner Losh #endif 83ca987d46SWarner Losh 84ca987d46SWarner Losh static int 85ca987d46SWarner Losh bzf_fill(struct bz_file *bzf) 86ca987d46SWarner Losh { 87ca987d46SWarner Losh int result; 88ca987d46SWarner Losh int req; 89ca987d46SWarner Losh 90ca987d46SWarner Losh req = BZ_BUFSIZE - bzf->bzf_bzstream.avail_in; 91ca987d46SWarner Losh result = 0; 92ca987d46SWarner Losh 93ca987d46SWarner Losh /* If we need more */ 94ca987d46SWarner Losh if (req > 0) { 95ca987d46SWarner Losh /* move old data to bottom of buffer */ 96ca987d46SWarner Losh if (req < BZ_BUFSIZE) 97ca987d46SWarner Losh bcopy(bzf->bzf_buf + req, bzf->bzf_buf, BZ_BUFSIZE - req); 98ca987d46SWarner Losh 99ca987d46SWarner Losh /* read to fill buffer and update availibility data */ 100ca987d46SWarner Losh result = read(bzf->bzf_rawfd, bzf->bzf_buf + bzf->bzf_bzstream.avail_in, req); 101ca987d46SWarner Losh bzf->bzf_bzstream.next_in = bzf->bzf_buf; 102ca987d46SWarner Losh if (result >= 0) 103ca987d46SWarner Losh bzf->bzf_bzstream.avail_in += result; 104ca987d46SWarner Losh } 105ca987d46SWarner Losh return(result); 106ca987d46SWarner Losh } 107ca987d46SWarner Losh 108ca987d46SWarner Losh /* 109ca987d46SWarner Losh * Adapted from get_byte/check_header in libz 110ca987d46SWarner Losh * 111ca987d46SWarner Losh * Returns 0 if the header is OK, nonzero if not. 112ca987d46SWarner Losh */ 113ca987d46SWarner Losh static int 114ca987d46SWarner Losh get_byte(struct bz_file *bzf) 115ca987d46SWarner Losh { 116ca987d46SWarner Losh if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) 117ca987d46SWarner Losh return(-1); 118ca987d46SWarner Losh bzf->bzf_bzstream.avail_in--; 119ca987d46SWarner Losh return(*(bzf->bzf_bzstream.next_in)++); 120ca987d46SWarner Losh } 121ca987d46SWarner Losh 122ca987d46SWarner Losh static int bz_magic[3] = {'B', 'Z', 'h'}; /* bzip2 magic header */ 123ca987d46SWarner Losh 124ca987d46SWarner Losh static int 125ca987d46SWarner Losh check_header(struct bz_file *bzf) 126ca987d46SWarner Losh { 127ca987d46SWarner Losh unsigned int len; 128ca987d46SWarner Losh int c; 129ca987d46SWarner Losh 130ca987d46SWarner Losh /* Check the bzip2 magic header */ 131ca987d46SWarner Losh for (len = 0; len < 3; len++) { 132ca987d46SWarner Losh c = get_byte(bzf); 133ca987d46SWarner Losh if (c != bz_magic[len]) { 134ca987d46SWarner Losh return(1); 135ca987d46SWarner Losh } 136ca987d46SWarner Losh } 137ca987d46SWarner Losh /* Check that the block size is valid */ 138ca987d46SWarner Losh c = get_byte(bzf); 139ca987d46SWarner Losh if (c < '1' || c > '9') 140ca987d46SWarner Losh return(1); 141ca987d46SWarner Losh 142ca987d46SWarner Losh /* Put back bytes that we've took from the input stream */ 143ca987d46SWarner Losh bzf->bzf_bzstream.next_in -= 4; 144ca987d46SWarner Losh bzf->bzf_bzstream.avail_in += 4; 145ca987d46SWarner Losh 146ca987d46SWarner Losh return(0); 147ca987d46SWarner Losh } 148ca987d46SWarner Losh 149ca987d46SWarner Losh static int 150ca987d46SWarner Losh bzf_open(const char *fname, struct open_file *f) 151ca987d46SWarner Losh { 152ca987d46SWarner Losh static char *bzfname; 153ca987d46SWarner Losh int rawfd; 154ca987d46SWarner Losh struct bz_file *bzf; 155ca987d46SWarner Losh char *cp; 156ca987d46SWarner Losh int error; 157ca987d46SWarner Losh struct stat sb; 158ca987d46SWarner Losh 159ca987d46SWarner Losh /* Have to be in "just read it" mode */ 160ca987d46SWarner Losh if (f->f_flags != F_READ) 161ca987d46SWarner Losh return(EPERM); 162ca987d46SWarner Losh 163ca987d46SWarner Losh /* If the name already ends in .gz or .bz2, ignore it */ 164ca987d46SWarner Losh if ((cp = strrchr(fname, '.')) && (!strcmp(cp, ".gz") 165ca987d46SWarner Losh || !strcmp(cp, ".bz2") || !strcmp(cp, ".split"))) 166ca987d46SWarner Losh return(ENOENT); 167ca987d46SWarner Losh 168ca987d46SWarner Losh /* Construct new name */ 169ca987d46SWarner Losh bzfname = malloc(strlen(fname) + 5); 170ca987d46SWarner Losh if (bzfname == NULL) 171ca987d46SWarner Losh return(ENOMEM); 172ca987d46SWarner Losh sprintf(bzfname, "%s.bz2", fname); 173ca987d46SWarner Losh 174ca987d46SWarner Losh /* Try to open the compressed datafile */ 175ca987d46SWarner Losh rawfd = open(bzfname, O_RDONLY); 176ca987d46SWarner Losh free(bzfname); 177ca987d46SWarner Losh if (rawfd == -1) 178ca987d46SWarner Losh return(ENOENT); 179ca987d46SWarner Losh 180ca987d46SWarner Losh if (fstat(rawfd, &sb) < 0) { 181ca987d46SWarner Losh printf("bzf_open: stat failed\n"); 182ca987d46SWarner Losh close(rawfd); 183ca987d46SWarner Losh return(ENOENT); 184ca987d46SWarner Losh } 185ca987d46SWarner Losh if (!S_ISREG(sb.st_mode)) { 186ca987d46SWarner Losh printf("bzf_open: not a file\n"); 187ca987d46SWarner Losh close(rawfd); 188ca987d46SWarner Losh return(EISDIR); /* best guess */ 189ca987d46SWarner Losh } 190ca987d46SWarner Losh 191ca987d46SWarner Losh /* Allocate a bz_file structure, populate it */ 192ca987d46SWarner Losh bzf = malloc(sizeof(struct bz_file)); 193ca987d46SWarner Losh if (bzf == NULL) 194ca987d46SWarner Losh return(ENOMEM); 195ca987d46SWarner Losh bzero(bzf, sizeof(struct bz_file)); 196ca987d46SWarner Losh bzf->bzf_rawfd = rawfd; 197ca987d46SWarner Losh 198ca987d46SWarner Losh /* Verify that the file is bzipped */ 199ca987d46SWarner Losh if (check_header(bzf)) { 200ca987d46SWarner Losh close(bzf->bzf_rawfd); 201ca987d46SWarner Losh free(bzf); 202ca987d46SWarner Losh return(EFTYPE); 203ca987d46SWarner Losh } 204ca987d46SWarner Losh 205ca987d46SWarner Losh /* Initialise the inflation engine */ 206ca987d46SWarner Losh if ((error = BZ2_bzDecompressInit(&(bzf->bzf_bzstream), 0, 1)) != BZ_OK) { 207ca987d46SWarner Losh printf("bzf_open: BZ2_bzDecompressInit returned %d\n", error); 208ca987d46SWarner Losh close(bzf->bzf_rawfd); 209ca987d46SWarner Losh free(bzf); 210ca987d46SWarner Losh return(EIO); 211ca987d46SWarner Losh } 212ca987d46SWarner Losh 213ca987d46SWarner Losh /* Looks OK, we'll take it */ 214ca987d46SWarner Losh f->f_fsdata = bzf; 215ca987d46SWarner Losh return(0); 216ca987d46SWarner Losh } 217ca987d46SWarner Losh 218ca987d46SWarner Losh static int 219ca987d46SWarner Losh bzf_close(struct open_file *f) 220ca987d46SWarner Losh { 221ca987d46SWarner Losh struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 222ca987d46SWarner Losh 223ca987d46SWarner Losh BZ2_bzDecompressEnd(&(bzf->bzf_bzstream)); 224ca987d46SWarner Losh close(bzf->bzf_rawfd); 225ca987d46SWarner Losh free(bzf); 226ca987d46SWarner Losh return(0); 227ca987d46SWarner Losh } 228ca987d46SWarner Losh 229ca987d46SWarner Losh static int 230ca987d46SWarner Losh bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid) 231ca987d46SWarner Losh { 232ca987d46SWarner Losh struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 233ca987d46SWarner Losh int error; 234ca987d46SWarner Losh 235ca987d46SWarner Losh bzf->bzf_bzstream.next_out = buf; /* where and how much */ 236ca987d46SWarner Losh bzf->bzf_bzstream.avail_out = size; 237ca987d46SWarner Losh 238ca987d46SWarner Losh while (bzf->bzf_bzstream.avail_out && bzf->bzf_endseen == 0) { 239ca987d46SWarner Losh if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) { 240ca987d46SWarner Losh printf("bzf_read: fill error\n"); 241ca987d46SWarner Losh return(EIO); 242ca987d46SWarner Losh } 243ca987d46SWarner Losh if (bzf->bzf_bzstream.avail_in == 0) { /* oops, unexpected EOF */ 244ca987d46SWarner Losh printf("bzf_read: unexpected EOF\n"); 245ca987d46SWarner Losh if (bzf->bzf_bzstream.avail_out == size) 246ca987d46SWarner Losh return(EIO); 247ca987d46SWarner Losh break; 248ca987d46SWarner Losh } 249ca987d46SWarner Losh 250ca987d46SWarner Losh error = BZ2_bzDecompress(&bzf->bzf_bzstream); /* decompression pass */ 251ca987d46SWarner Losh if (error == BZ_STREAM_END) { /* EOF, all done */ 252ca987d46SWarner Losh bzf->bzf_endseen = 1; 253ca987d46SWarner Losh break; 254ca987d46SWarner Losh } 255ca987d46SWarner Losh if (error != BZ_OK) { /* argh, decompression error */ 256ca987d46SWarner Losh printf("bzf_read: BZ2_bzDecompress returned %d\n", error); 257ca987d46SWarner Losh return(EIO); 258ca987d46SWarner Losh } 259ca987d46SWarner Losh } 260ca987d46SWarner Losh if (resid != NULL) 261ca987d46SWarner Losh *resid = bzf->bzf_bzstream.avail_out; 262ca987d46SWarner Losh return(0); 263ca987d46SWarner Losh } 264ca987d46SWarner Losh 265ca987d46SWarner Losh static int 266ca987d46SWarner Losh bzf_rewind(struct open_file *f) 267ca987d46SWarner Losh { 268ca987d46SWarner Losh struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 269ca987d46SWarner Losh struct bz_file *bzf_tmp; 270ca987d46SWarner Losh 271ca987d46SWarner Losh /* 272ca987d46SWarner Losh * Since bzip2 does not have an equivalent inflateReset function a crude 273ca987d46SWarner Losh * one needs to be provided. The functions all called in such a way that 274ca987d46SWarner Losh * at any time an error occurs a roll back can be done (effectively making 275ca987d46SWarner Losh * this rewind 'atomic', either the reset occurs successfully or not at all, 276ca987d46SWarner Losh * with no 'undefined' state happening). 277ca987d46SWarner Losh */ 278ca987d46SWarner Losh 279ca987d46SWarner Losh /* Allocate a bz_file structure, populate it */ 280ca987d46SWarner Losh bzf_tmp = malloc(sizeof(struct bz_file)); 281ca987d46SWarner Losh if (bzf_tmp == NULL) 282ca987d46SWarner Losh return(-1); 283ca987d46SWarner Losh bzero(bzf_tmp, sizeof(struct bz_file)); 284ca987d46SWarner Losh bzf_tmp->bzf_rawfd = bzf->bzf_rawfd; 285ca987d46SWarner Losh 286ca987d46SWarner Losh /* Initialise the inflation engine */ 287ca987d46SWarner Losh if (BZ2_bzDecompressInit(&(bzf_tmp->bzf_bzstream), 0, 1) != BZ_OK) { 288ca987d46SWarner Losh free(bzf_tmp); 289ca987d46SWarner Losh return(-1); 290ca987d46SWarner Losh } 291ca987d46SWarner Losh 292ca987d46SWarner Losh /* Seek back to the beginning of the file */ 293ca987d46SWarner Losh if (lseek(bzf->bzf_rawfd, 0, SEEK_SET) == -1) { 294ca987d46SWarner Losh BZ2_bzDecompressEnd(&(bzf_tmp->bzf_bzstream)); 295ca987d46SWarner Losh free(bzf_tmp); 296ca987d46SWarner Losh return(-1); 297ca987d46SWarner Losh } 298ca987d46SWarner Losh 299ca987d46SWarner Losh /* Free old bz_file data */ 300ca987d46SWarner Losh BZ2_bzDecompressEnd(&(bzf->bzf_bzstream)); 301ca987d46SWarner Losh free(bzf); 302ca987d46SWarner Losh 303ca987d46SWarner Losh /* Use the new bz_file data */ 304ca987d46SWarner Losh f->f_fsdata = bzf_tmp; 305ca987d46SWarner Losh 306ca987d46SWarner Losh return(0); 307ca987d46SWarner Losh } 308ca987d46SWarner Losh 309ca987d46SWarner Losh static off_t 310ca987d46SWarner Losh bzf_seek(struct open_file *f, off_t offset, int where) 311ca987d46SWarner Losh { 312ca987d46SWarner Losh struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 313ca987d46SWarner Losh off_t target; 314ca987d46SWarner Losh char discard[16]; 315ca987d46SWarner Losh 316ca987d46SWarner Losh switch (where) { 317ca987d46SWarner Losh case SEEK_SET: 318ca987d46SWarner Losh target = offset; 319ca987d46SWarner Losh break; 320ca987d46SWarner Losh case SEEK_CUR: 321ca987d46SWarner Losh target = offset + bzf->bzf_bzstream.total_out_lo32; 322ca987d46SWarner Losh break; 323ca987d46SWarner Losh default: 324ca987d46SWarner Losh errno = EINVAL; 325ca987d46SWarner Losh return(-1); 326ca987d46SWarner Losh } 327ca987d46SWarner Losh 328ca987d46SWarner Losh /* Can we get there from here? */ 329ca987d46SWarner Losh if (target < bzf->bzf_bzstream.total_out_lo32 && bzf_rewind(f) != 0) { 330ca987d46SWarner Losh errno = EOFFSET; 331ca987d46SWarner Losh return -1; 332ca987d46SWarner Losh } 333ca987d46SWarner Losh 334ca987d46SWarner Losh /* if bzf_rewind was called then bzf has changed */ 335ca987d46SWarner Losh bzf = (struct bz_file *)f->f_fsdata; 336ca987d46SWarner Losh 337ca987d46SWarner Losh /* skip forwards if required */ 338ca987d46SWarner Losh while (target > bzf->bzf_bzstream.total_out_lo32) { 339ca987d46SWarner Losh errno = bzf_read(f, discard, min(sizeof(discard), 340ca987d46SWarner Losh target - bzf->bzf_bzstream.total_out_lo32), NULL); 341ca987d46SWarner Losh if (errno) 342ca987d46SWarner Losh return(-1); 3433df4c387SDavid Bright /* Break out of loop if end of file has been reached. */ 3443df4c387SDavid Bright if (bzf->bzf_endseen) 3453df4c387SDavid Bright break; 346ca987d46SWarner Losh } 347ca987d46SWarner Losh /* This is where we are (be honest if we overshot) */ 348ca987d46SWarner Losh return(bzf->bzf_bzstream.total_out_lo32); 349ca987d46SWarner Losh } 350ca987d46SWarner Losh 351ca987d46SWarner Losh static int 352ca987d46SWarner Losh bzf_stat(struct open_file *f, struct stat *sb) 353ca987d46SWarner Losh { 354ca987d46SWarner Losh struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 355ca987d46SWarner Losh int result; 356ca987d46SWarner Losh 357ca987d46SWarner Losh /* stat as normal, but indicate that size is unknown */ 358ca987d46SWarner Losh if ((result = fstat(bzf->bzf_rawfd, sb)) == 0) 359ca987d46SWarner Losh sb->st_size = -1; 360ca987d46SWarner Losh return(result); 361ca987d46SWarner Losh } 362ca987d46SWarner Losh 363ca987d46SWarner Losh void 364ca987d46SWarner Losh bz_internal_error(int errorcode) 365ca987d46SWarner Losh { 366*746cc38eSGordon Bergling panic("bzipfs: critical error %d in bzip2 library occurred", errorcode); 367ca987d46SWarner Losh } 368ca987d46SWarner Losh 369ca987d46SWarner Losh #ifdef REGRESSION 370ca987d46SWarner Losh /* Small test case, open and decompress test.bz2 */ 371ca987d46SWarner Losh int main() 372ca987d46SWarner Losh { 373ca987d46SWarner Losh struct open_file f; 374ca987d46SWarner Losh char buf[1024]; 375ca987d46SWarner Losh size_t resid; 376ca987d46SWarner Losh int err; 377ca987d46SWarner Losh 378ca987d46SWarner Losh memset(&f, '\0', sizeof(f)); 379ca987d46SWarner Losh f.f_flags = F_READ; 380ca987d46SWarner Losh err = bzf_open("test", &f); 381ca987d46SWarner Losh if (err != 0) 382ca987d46SWarner Losh exit(1); 383ca987d46SWarner Losh do { 384ca987d46SWarner Losh err = bzf_read(&f, buf, sizeof(buf), &resid); 385ca987d46SWarner Losh } while (err == 0 && resid != sizeof(buf)); 386ca987d46SWarner Losh 387ca987d46SWarner Losh if (err != 0) 388ca987d46SWarner Losh exit(2); 389ca987d46SWarner Losh exit(0); 390ca987d46SWarner Losh } 391ca987d46SWarner Losh #endif 392