1986fd29aSsetje 2986fd29aSsetje /* 3986fd29aSsetje * CDDL HEADER START 4986fd29aSsetje * 5986fd29aSsetje * The contents of this file are subject to the terms of the 6986fd29aSsetje * Common Development and Distribution License (the "License"). 7986fd29aSsetje * You may not use this file except in compliance with the License. 8986fd29aSsetje * 9986fd29aSsetje * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10986fd29aSsetje * or http://www.opensolaris.org/os/licensing. 11986fd29aSsetje * See the License for the specific language governing permissions 12986fd29aSsetje * and limitations under the License. 13986fd29aSsetje * 14986fd29aSsetje * When distributing Covered Code, include this CDDL HEADER in each 15986fd29aSsetje * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16986fd29aSsetje * If applicable, add the following below this CDDL HEADER, with the 17986fd29aSsetje * fields enclosed by brackets "[]" replaced with your own identifying 18986fd29aSsetje * information: Portions Copyright [yyyy] [name of copyright owner] 19986fd29aSsetje * 20986fd29aSsetje * CDDL HEADER END 21986fd29aSsetje */ 22986fd29aSsetje /* 230fbb751dSJohn Levon * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. 24986fd29aSsetje */ 25986fd29aSsetje 26986fd29aSsetje /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27986fd29aSsetje /* All Rights Reserved */ 28986fd29aSsetje 29986fd29aSsetje /* 30986fd29aSsetje * University Copyright- Copyright (c) 1982, 1986, 1988 31986fd29aSsetje * The Regents of the University of California 32986fd29aSsetje * All Rights Reserved 33986fd29aSsetje * 34986fd29aSsetje * University Acknowledgment- Portions of this document are derived from 35986fd29aSsetje * software developed by the University of California, Berkeley, and its 36986fd29aSsetje * contributors. 37986fd29aSsetje */ 38986fd29aSsetje 39986fd29aSsetje #include <sys/types.h> 40986fd29aSsetje #include <sys/thread.h> 41986fd29aSsetje #include <sys/t_lock.h> 42986fd29aSsetje #include <sys/param.h> 43986fd29aSsetje #include <sys/systm.h> 44986fd29aSsetje #include <sys/bitmap.h> 45986fd29aSsetje #include <sys/buf.h> 46986fd29aSsetje #include <sys/cmn_err.h> 47986fd29aSsetje #include <sys/conf.h> 48986fd29aSsetje #include <sys/ddi.h> 49986fd29aSsetje #include <sys/debug.h> 50986fd29aSsetje #include <sys/errno.h> 51986fd29aSsetje #include <sys/time.h> 52986fd29aSsetje #include <sys/fcntl.h> 53986fd29aSsetje #include <sys/flock.h> 54986fd29aSsetje #include <sys/file.h> 55986fd29aSsetje #include <sys/kmem.h> 56986fd29aSsetje #include <sys/mman.h> 57986fd29aSsetje #include <sys/vmsystm.h> 58986fd29aSsetje #include <sys/open.h> 59986fd29aSsetje #include <sys/swap.h> 60986fd29aSsetje #include <sys/sysmacros.h> 61986fd29aSsetje #include <sys/uio.h> 62986fd29aSsetje #include <sys/vfs.h> 63986fd29aSsetje #include <sys/vfs_opreg.h> 64986fd29aSsetje #include <sys/vnode.h> 65986fd29aSsetje #include <sys/stat.h> 66986fd29aSsetje #include <sys/poll.h> 67986fd29aSsetje #include <sys/zmod.h> 68986fd29aSsetje #include <sys/fs/decomp.h> 69986fd29aSsetje 70986fd29aSsetje #include <vm/hat.h> 71986fd29aSsetje #include <vm/as.h> 72986fd29aSsetje #include <vm/page.h> 73986fd29aSsetje #include <vm/pvn.h> 74986fd29aSsetje #include <vm/seg_vn.h> 75986fd29aSsetje #include <vm/seg_kmem.h> 76986fd29aSsetje #include <vm/seg_map.h> 77986fd29aSsetje 78986fd29aSsetje #include <fs/fs_subr.h> 79986fd29aSsetje 80986fd29aSsetje /* 81986fd29aSsetje * dcfs - A filesystem for automatic decompressing of fiocompressed files 82986fd29aSsetje * 83986fd29aSsetje * This filesystem is a layered filesystem that sits on top of a normal 84986fd29aSsetje * persistent filesystem and provides automatic decompression of files 85986fd29aSsetje * that have been previously compressed and stored on the host file system. 86986fd29aSsetje * This is a pseudo filesystem in that it does not persist data, rather it 87986fd29aSsetje * intercepts file lookup requests on the host filesystem and provides 88986fd29aSsetje * transparent decompression of those files. Currently the only supported 89986fd29aSsetje * host filesystem is ufs. 90986fd29aSsetje * 91986fd29aSsetje * A file is compressed via a userland utility (currently cmd/boot/fiocompress) 92986fd29aSsetje * and marked by fiocompress as a compressed file via a flag in the on-disk 93986fd29aSsetje * inode (set via a ufs ioctl() - see `ufs_vnops.c`ufs_ioctl()`_FIO_COMPRESSED 94986fd29aSsetje * ufs_lookup checks for this flag and if set, passes control to decompvp 95986fd29aSsetje * a function defined in this (dcfs) filesystem. decomvp uncompresses the file 96986fd29aSsetje * and returns a dcfs vnode to the VFS layer. 97986fd29aSsetje * 98986fd29aSsetje * dcfs is layered on top of ufs and passes requests involving persistence 99986fd29aSsetje * to the underlying ufs filesystem. The compressed files currently cannot be 100986fd29aSsetje * written to. 101986fd29aSsetje */ 102986fd29aSsetje 103986fd29aSsetje 104986fd29aSsetje /* 105986fd29aSsetje * Define data structures within this file. 106986fd29aSsetje */ 107986fd29aSsetje #define DCSHFT 5 108986fd29aSsetje #define DCTABLESIZE 16 109986fd29aSsetje 110986fd29aSsetje #if ((DCTABLESIZE & (DCTABLESIZE - 1)) == 0) 111986fd29aSsetje #define DCHASH(vp) (((uintptr_t)(vp) >> DCSHFT) & (DCTABLESIZE - 1)) 112986fd29aSsetje #else 113986fd29aSsetje #define DCHASH(vp) (((uintptr_t)(vp) >> DCSHFT) % DTABLESIZEC) 114986fd29aSsetje #endif 115986fd29aSsetje 116986fd29aSsetje #define DCLRUSIZE 16 117986fd29aSsetje 118986fd29aSsetje #define DCCACHESIZE 4 119986fd29aSsetje 120986fd29aSsetje #define rounddown(x, y) ((x) & ~((y) - 1)) 121986fd29aSsetje 122986fd29aSsetje struct dcnode *dctable[DCTABLESIZE]; 123986fd29aSsetje 124986fd29aSsetje struct dcnode *dclru; 125986fd29aSsetje static int dclru_len; 126986fd29aSsetje 127986fd29aSsetje kmutex_t dctable_lock; 128986fd29aSsetje 129986fd29aSsetje dev_t dcdev; 130986fd29aSsetje struct vfs dc_vfs; 131986fd29aSsetje 132986fd29aSsetje struct kmem_cache *dcnode_cache; 133986fd29aSsetje struct kmem_cache *dcbuf_cache[DCCACHESIZE]; 134986fd29aSsetje 135986fd29aSsetje kmutex_t dccache_lock; 136986fd29aSsetje 137986fd29aSsetje static int dcinit(int, char *); 138986fd29aSsetje 139986fd29aSsetje static struct dcnode *dcnode_alloc(void); 140986fd29aSsetje static void dcnode_free(struct dcnode *); 141986fd29aSsetje static void dcnode_recycle(struct dcnode *); 142986fd29aSsetje 143986fd29aSsetje static void dcinsert(struct dcnode *); 144986fd29aSsetje static void dcdelete(struct dcnode *); 145986fd29aSsetje static struct dcnode *dcfind(struct vnode *); 146986fd29aSsetje static void dclru_add(struct dcnode *); 147986fd29aSsetje static void dclru_sub(struct dcnode *); 148986fd29aSsetje 149986fd29aSsetje 150986fd29aSsetje /* 151986fd29aSsetje * This is the loadable module wrapper. 152986fd29aSsetje */ 153986fd29aSsetje #include <sys/modctl.h> 154986fd29aSsetje 155986fd29aSsetje struct vfsops *dc_vfsops; 156986fd29aSsetje 157986fd29aSsetje static vfsdef_t vfw = { 158986fd29aSsetje VFSDEF_VERSION, 159986fd29aSsetje "dcfs", 160986fd29aSsetje dcinit, 1610fbb751dSJohn Levon VSW_ZMOUNT, 162986fd29aSsetje NULL 163986fd29aSsetje }; 164986fd29aSsetje 165986fd29aSsetje /* 166986fd29aSsetje * Module linkage information for the kernel. 167986fd29aSsetje */ 168986fd29aSsetje extern struct mod_ops mod_fsops; 169986fd29aSsetje 170986fd29aSsetje static struct modlfs modlfs = { 171986fd29aSsetje &mod_fsops, "compressed filesystem", &vfw 172986fd29aSsetje }; 173986fd29aSsetje 174986fd29aSsetje static struct modlinkage modlinkage = { 175986fd29aSsetje MODREV_1, (void *)&modlfs, NULL 176986fd29aSsetje }; 177986fd29aSsetje 178986fd29aSsetje int 179986fd29aSsetje _init() 180986fd29aSsetje { 181986fd29aSsetje return (mod_install(&modlinkage)); 182986fd29aSsetje } 183986fd29aSsetje 184986fd29aSsetje int 185986fd29aSsetje _info(struct modinfo *modinfop) 186986fd29aSsetje { 187986fd29aSsetje return (mod_info(&modlinkage, modinfop)); 188986fd29aSsetje } 189986fd29aSsetje 190986fd29aSsetje 191986fd29aSsetje static int dc_open(struct vnode **, int, struct cred *, caller_context_t *); 192986fd29aSsetje static int dc_close(struct vnode *, int, int, offset_t, 193986fd29aSsetje struct cred *, caller_context_t *); 194986fd29aSsetje static int dc_read(struct vnode *, struct uio *, int, struct cred *, 195986fd29aSsetje struct caller_context *); 196986fd29aSsetje static int dc_getattr(struct vnode *, struct vattr *, int, 197986fd29aSsetje struct cred *, caller_context_t *); 198986fd29aSsetje static int dc_setattr(struct vnode *, struct vattr *, int, struct cred *, 199986fd29aSsetje struct caller_context *); 200986fd29aSsetje static int dc_access(struct vnode *, int, int, 201986fd29aSsetje struct cred *, caller_context_t *); 202986fd29aSsetje static int dc_fsync(struct vnode *, int, struct cred *, caller_context_t *); 203986fd29aSsetje static void dc_inactive(struct vnode *, struct cred *, caller_context_t *); 204986fd29aSsetje static int dc_fid(struct vnode *, struct fid *, caller_context_t *); 205986fd29aSsetje static int dc_seek(struct vnode *, offset_t, offset_t *, caller_context_t *); 206986fd29aSsetje static int dc_frlock(struct vnode *, int, struct flock64 *, int, offset_t, 207986fd29aSsetje struct flk_callback *, struct cred *, caller_context_t *); 208e38c922eSAndrew Balfour static int dc_realvp(struct vnode *, struct vnode **, caller_context_t *); 209986fd29aSsetje static int dc_getpage(struct vnode *, offset_t, size_t, uint_t *, 210986fd29aSsetje struct page **, size_t, struct seg *, caddr_t, enum seg_rw, 211986fd29aSsetje struct cred *, caller_context_t *); 212986fd29aSsetje static int dc_putpage(struct vnode *, offset_t, size_t, int, 213986fd29aSsetje struct cred *, caller_context_t *); 214986fd29aSsetje static int dc_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t, 215986fd29aSsetje uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *); 216986fd29aSsetje static int dc_addmap(struct vnode *, offset_t, struct as *, caddr_t, size_t, 217986fd29aSsetje uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *); 218986fd29aSsetje static int dc_delmap(struct vnode *, offset_t, struct as *, caddr_t, size_t, 219986fd29aSsetje uint_t, uint_t, uint_t, struct cred *, caller_context_t *); 220986fd29aSsetje 221986fd29aSsetje struct vnodeops *dc_vnodeops; 222986fd29aSsetje 223986fd29aSsetje const fs_operation_def_t dc_vnodeops_template[] = { 224986fd29aSsetje VOPNAME_OPEN, { .vop_open = dc_open }, 225986fd29aSsetje VOPNAME_CLOSE, { .vop_close = dc_close }, 226986fd29aSsetje VOPNAME_READ, { .vop_read = dc_read }, 227986fd29aSsetje VOPNAME_GETATTR, { .vop_getattr = dc_getattr }, 228986fd29aSsetje VOPNAME_SETATTR, { .vop_setattr = dc_setattr }, 229986fd29aSsetje VOPNAME_ACCESS, { .vop_access = dc_access }, 230986fd29aSsetje VOPNAME_FSYNC, { .vop_fsync = dc_fsync }, 231986fd29aSsetje VOPNAME_INACTIVE, { .vop_inactive = dc_inactive }, 232986fd29aSsetje VOPNAME_FID, { .vop_fid = dc_fid }, 233986fd29aSsetje VOPNAME_SEEK, { .vop_seek = dc_seek }, 234986fd29aSsetje VOPNAME_FRLOCK, { .vop_frlock = dc_frlock }, 235e38c922eSAndrew Balfour VOPNAME_REALVP, { .vop_realvp = dc_realvp }, 236986fd29aSsetje VOPNAME_GETPAGE, { .vop_getpage = dc_getpage }, 237986fd29aSsetje VOPNAME_PUTPAGE, { .vop_putpage = dc_putpage }, 238986fd29aSsetje VOPNAME_MAP, { .vop_map = dc_map }, 239986fd29aSsetje VOPNAME_ADDMAP, { .vop_addmap = dc_addmap }, 240986fd29aSsetje VOPNAME_DELMAP, { .vop_delmap = dc_delmap }, 241986fd29aSsetje NULL, NULL 242986fd29aSsetje }; 243986fd29aSsetje 244986fd29aSsetje /*ARGSUSED*/ 245986fd29aSsetje static int 246986fd29aSsetje dc_open(struct vnode **vpp, int flag, struct cred *cr, caller_context_t *ctp) 247986fd29aSsetje { 248986fd29aSsetje return (0); 249986fd29aSsetje } 250986fd29aSsetje 251986fd29aSsetje /*ARGSUSED*/ 252986fd29aSsetje static int 253986fd29aSsetje dc_close(struct vnode *vp, int flag, int count, offset_t off, 254986fd29aSsetje struct cred *cr, caller_context_t *ctp) 255986fd29aSsetje { 256986fd29aSsetje (void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 257986fd29aSsetje cleanshares(vp, ttoproc(curthread)->p_pid); 258986fd29aSsetje return (0); 259986fd29aSsetje } 260986fd29aSsetje 261986fd29aSsetje /*ARGSUSED*/ 262986fd29aSsetje static int 263986fd29aSsetje dc_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr, 264986fd29aSsetje struct caller_context *ct) 265986fd29aSsetje { 266986fd29aSsetje struct dcnode *dp = VTODC(vp); 267986fd29aSsetje size_t rdsize = MAX(MAXBSIZE, dp->dc_hdr->ch_blksize); 268986fd29aSsetje size_t fsize = dp->dc_hdr->ch_fsize; 269986fd29aSsetje int error; 270986fd29aSsetje 271986fd29aSsetje /* 272986fd29aSsetje * Loop through file with segmap, decompression will occur 273986fd29aSsetje * in dc_getapage 274986fd29aSsetje */ 275986fd29aSsetje do { 276986fd29aSsetje caddr_t base; 277986fd29aSsetje size_t n; 278986fd29aSsetje offset_t mapon; 279986fd29aSsetje 280986fd29aSsetje /* 281986fd29aSsetje * read to end of block or file 282986fd29aSsetje */ 283986fd29aSsetje mapon = uiop->uio_loffset & (rdsize - 1); 284986fd29aSsetje n = MIN(rdsize - mapon, uiop->uio_resid); 285986fd29aSsetje n = MIN(n, fsize - uiop->uio_loffset); 286986fd29aSsetje if (n == 0) 287986fd29aSsetje return (0); /* at EOF */ 288986fd29aSsetje 289986fd29aSsetje base = segmap_getmapflt(segkmap, vp, uiop->uio_loffset, n, 1, 290986fd29aSsetje S_READ); 291986fd29aSsetje error = uiomove(base + mapon, n, UIO_READ, uiop); 292986fd29aSsetje if (!error) { 293986fd29aSsetje uint_t flags; 294986fd29aSsetje 295986fd29aSsetje if (n + mapon == rdsize || uiop->uio_loffset == fsize) 296986fd29aSsetje flags = SM_DONTNEED; 297986fd29aSsetje else 298986fd29aSsetje flags = 0; 299986fd29aSsetje error = segmap_release(segkmap, base, flags); 300986fd29aSsetje } else 301986fd29aSsetje (void) segmap_release(segkmap, base, 0); 302986fd29aSsetje } while (!error && uiop->uio_resid); 303986fd29aSsetje 304986fd29aSsetje return (error); 305986fd29aSsetje } 306986fd29aSsetje 307986fd29aSsetje static int 308986fd29aSsetje dc_getattr(struct vnode *vp, struct vattr *vap, int flags, 309986fd29aSsetje cred_t *cred, caller_context_t *ctp) 310986fd29aSsetje { 311986fd29aSsetje struct dcnode *dp = VTODC(vp); 312986fd29aSsetje struct vnode *subvp = dp->dc_subvp; 313986fd29aSsetje int error; 314986fd29aSsetje 315986fd29aSsetje error = VOP_GETATTR(subvp, vap, flags, cred, ctp); 316986fd29aSsetje 317986fd29aSsetje /* substitute uncompressed size */ 318986fd29aSsetje vap->va_size = dp->dc_hdr->ch_fsize; 319986fd29aSsetje return (error); 320986fd29aSsetje } 321986fd29aSsetje 322986fd29aSsetje static int 323986fd29aSsetje dc_setattr(struct vnode *vp, struct vattr *vap, int flags, cred_t *cred, 324986fd29aSsetje caller_context_t *ctp) 325986fd29aSsetje { 326986fd29aSsetje struct dcnode *dp = VTODC(vp); 327986fd29aSsetje struct vnode *subvp = dp->dc_subvp; 328986fd29aSsetje 329986fd29aSsetje return (VOP_SETATTR(subvp, vap, flags, cred, ctp)); 330986fd29aSsetje } 331986fd29aSsetje 332986fd29aSsetje static int 333986fd29aSsetje dc_access(struct vnode *vp, int mode, int flags, 334986fd29aSsetje cred_t *cred, caller_context_t *ctp) 335986fd29aSsetje { 336986fd29aSsetje struct dcnode *dp = VTODC(vp); 337986fd29aSsetje struct vnode *subvp = dp->dc_subvp; 338986fd29aSsetje 339986fd29aSsetje return (VOP_ACCESS(subvp, mode, flags, cred, ctp)); 340986fd29aSsetje } 341986fd29aSsetje 342986fd29aSsetje /*ARGSUSED*/ 343986fd29aSsetje static int 344986fd29aSsetje dc_fsync(vnode_t *vp, int syncflag, cred_t *cred, caller_context_t *ctp) 345986fd29aSsetje { 346986fd29aSsetje return (0); 347986fd29aSsetje } 348986fd29aSsetje 349986fd29aSsetje /*ARGSUSED*/ 350986fd29aSsetje static void 351986fd29aSsetje dc_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ctp) 352986fd29aSsetje { 353986fd29aSsetje struct dcnode *dp = VTODC(vp); 354986fd29aSsetje 355986fd29aSsetje mutex_enter(&dctable_lock); 356986fd29aSsetje mutex_enter(&vp->v_lock); 357986fd29aSsetje ASSERT(vp->v_count >= 1); 358986fd29aSsetje if (--vp->v_count != 0) { 359986fd29aSsetje /* 360986fd29aSsetje * Somebody accessed the dcnode before we got a chance to 361986fd29aSsetje * remove it. They will remove it when they do a vn_rele. 362986fd29aSsetje */ 363986fd29aSsetje mutex_exit(&vp->v_lock); 364986fd29aSsetje mutex_exit(&dctable_lock); 365986fd29aSsetje return; 366986fd29aSsetje } 367986fd29aSsetje mutex_exit(&vp->v_lock); 368986fd29aSsetje 369986fd29aSsetje dcnode_free(dp); 370986fd29aSsetje 371986fd29aSsetje mutex_exit(&dctable_lock); 372986fd29aSsetje } 373986fd29aSsetje 374986fd29aSsetje static int 375986fd29aSsetje dc_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ctp) 376986fd29aSsetje { 377986fd29aSsetje struct dcnode *dp = VTODC(vp); 378986fd29aSsetje struct vnode *subvp = dp->dc_subvp; 379986fd29aSsetje 380986fd29aSsetje return (VOP_FID(subvp, fidp, ctp)); 381986fd29aSsetje } 382986fd29aSsetje 383986fd29aSsetje static int 384986fd29aSsetje dc_seek(struct vnode *vp, offset_t oof, offset_t *noffp, caller_context_t *ctp) 385986fd29aSsetje { 386986fd29aSsetje struct dcnode *dp = VTODC(vp); 387986fd29aSsetje struct vnode *subvp = dp->dc_subvp; 388986fd29aSsetje 389986fd29aSsetje return (VOP_SEEK(subvp, oof, noffp, ctp)); 390986fd29aSsetje } 391986fd29aSsetje 392986fd29aSsetje static int 393986fd29aSsetje dc_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, 394986fd29aSsetje offset_t offset, struct flk_callback *flk_cbp, 395986fd29aSsetje cred_t *cr, caller_context_t *ctp) 396986fd29aSsetje { 397986fd29aSsetje struct dcnode *dp = VTODC(vp); 39853eed1cbSJohn.Zolnowsky@Sun.COM int error; 39953eed1cbSJohn.Zolnowsky@Sun.COM struct vattr vattr; 400986fd29aSsetje 401986fd29aSsetje /* 402986fd29aSsetje * If file is being mapped, disallow frlock. 403986fd29aSsetje */ 40453eed1cbSJohn.Zolnowsky@Sun.COM vattr.va_mask = AT_MODE; 40553eed1cbSJohn.Zolnowsky@Sun.COM if (error = VOP_GETATTR(dp->dc_subvp, &vattr, 0, cr, ctp)) 40653eed1cbSJohn.Zolnowsky@Sun.COM return (error); 40753eed1cbSJohn.Zolnowsky@Sun.COM if (dp->dc_mapcnt > 0 && MANDLOCK(vp, vattr.va_mode)) 408986fd29aSsetje return (EAGAIN); 409986fd29aSsetje 410986fd29aSsetje return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ctp)); 411986fd29aSsetje } 412986fd29aSsetje 413986fd29aSsetje /*ARGSUSED*/ 414986fd29aSsetje static int 415986fd29aSsetje dc_getblock_miss(struct vnode *vp, offset_t off, size_t len, struct page **ppp, 416986fd29aSsetje struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr) 417986fd29aSsetje { 418986fd29aSsetje struct dcnode *dp = VTODC(vp); 419986fd29aSsetje struct comphdr *hdr = dp->dc_hdr; 420986fd29aSsetje struct page *pp; 421986fd29aSsetje struct buf *bp; 422986fd29aSsetje caddr_t saddr; 423986fd29aSsetje off_t cblkno; 424986fd29aSsetje size_t rdoff, rdsize, dsize; 425986fd29aSsetje long xlen; 426986fd29aSsetje int error, zerr; 427986fd29aSsetje 428986fd29aSsetje ASSERT(len == hdr->ch_blksize); 429986fd29aSsetje /* 430986fd29aSsetje * Get destination pages and make them addressable 431986fd29aSsetje */ 432986fd29aSsetje pp = page_create_va(vp, off, len, PG_WAIT, seg, addr); 433986fd29aSsetje bp = pageio_setup(pp, len, vp, B_READ); 434986fd29aSsetje bp_mapin(bp); 435986fd29aSsetje 436986fd29aSsetje /* 437986fd29aSsetje * read compressed data from subordinate vnode 438986fd29aSsetje */ 439986fd29aSsetje saddr = kmem_cache_alloc(dp->dc_bufcache, KM_SLEEP); 440986fd29aSsetje cblkno = off / len; 441986fd29aSsetje rdoff = hdr->ch_blkmap[cblkno]; 442986fd29aSsetje rdsize = hdr->ch_blkmap[cblkno + 1] - rdoff; 443986fd29aSsetje error = vn_rdwr(UIO_READ, dp->dc_subvp, saddr, rdsize, rdoff, 444986fd29aSsetje UIO_SYSSPACE, 0, 0, cr, NULL); 445986fd29aSsetje if (error) 446986fd29aSsetje goto cleanup; 447986fd29aSsetje 448986fd29aSsetje /* 449986fd29aSsetje * Uncompress 450986fd29aSsetje */ 451986fd29aSsetje dsize = len; 452986fd29aSsetje zerr = z_uncompress(bp->b_un.b_addr, &dsize, saddr, dp->dc_zmax); 453986fd29aSsetje if (zerr != Z_OK) { 454986fd29aSsetje error = EIO; 455986fd29aSsetje goto cleanup; 456986fd29aSsetje } 457986fd29aSsetje 458986fd29aSsetje /* 459986fd29aSsetje * Handle EOF 460986fd29aSsetje */ 461986fd29aSsetje xlen = hdr->ch_fsize - off; 462986fd29aSsetje if (xlen < len) { 463986fd29aSsetje bzero(bp->b_un.b_addr + xlen, len - xlen); 464986fd29aSsetje if (dsize != xlen) 465986fd29aSsetje error = EIO; 466986fd29aSsetje } else if (dsize != len) 467986fd29aSsetje error = EIO; 468986fd29aSsetje 469986fd29aSsetje /* 470986fd29aSsetje * Clean up 471986fd29aSsetje */ 472986fd29aSsetje cleanup: 473986fd29aSsetje kmem_cache_free(dp->dc_bufcache, saddr); 474986fd29aSsetje pageio_done(bp); 475986fd29aSsetje *ppp = pp; 476986fd29aSsetje return (error); 477986fd29aSsetje } 478986fd29aSsetje 479986fd29aSsetje static int 480986fd29aSsetje dc_getblock(struct vnode *vp, offset_t off, size_t len, struct page **ppp, 481986fd29aSsetje struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr) 482986fd29aSsetje { 483986fd29aSsetje struct page *pp, *plist = NULL; 484986fd29aSsetje offset_t pgoff; 485986fd29aSsetje int rdblk; 486986fd29aSsetje 487986fd29aSsetje /* 488986fd29aSsetje * pvn_read_kluster() doesn't quite do what we want, since it 489986fd29aSsetje * thinks sub block reads are ok. Here we always decompress 490986fd29aSsetje * a full block. 491986fd29aSsetje */ 492986fd29aSsetje 493986fd29aSsetje /* 494986fd29aSsetje * Check page cache 495986fd29aSsetje */ 496986fd29aSsetje rdblk = 0; 497986fd29aSsetje for (pgoff = off; pgoff < off + len; pgoff += PAGESIZE) { 498986fd29aSsetje pp = page_lookup(vp, pgoff, SE_EXCL); 499986fd29aSsetje if (pp == NULL) { 500986fd29aSsetje rdblk = 1; 501986fd29aSsetje break; 502986fd29aSsetje } 503986fd29aSsetje page_io_lock(pp); 504986fd29aSsetje page_add(&plist, pp); 505986fd29aSsetje plist = plist->p_next; 506986fd29aSsetje } 507986fd29aSsetje if (!rdblk) { 508986fd29aSsetje *ppp = plist; 509986fd29aSsetje return (0); /* all pages in cache */ 510986fd29aSsetje } 511986fd29aSsetje 512986fd29aSsetje /* 513986fd29aSsetje * Undo any locks so getblock_miss has an open field 514986fd29aSsetje */ 515986fd29aSsetje if (plist != NULL) 516986fd29aSsetje pvn_io_done(plist); 517986fd29aSsetje 518986fd29aSsetje return (dc_getblock_miss(vp, off, len, ppp, seg, addr, rw, cr)); 519986fd29aSsetje } 520986fd29aSsetje 521e38c922eSAndrew Balfour static int 522e38c922eSAndrew Balfour dc_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct) 523e38c922eSAndrew Balfour { 524e38c922eSAndrew Balfour struct vnode *rvp; 525e38c922eSAndrew Balfour 526e38c922eSAndrew Balfour vp = VTODC(vp)->dc_subvp; 527e38c922eSAndrew Balfour if (VOP_REALVP(vp, &rvp, ct) == 0) 528e38c922eSAndrew Balfour vp = rvp; 529e38c922eSAndrew Balfour *vpp = vp; 530e38c922eSAndrew Balfour return (0); 531e38c922eSAndrew Balfour } 532e38c922eSAndrew Balfour 533986fd29aSsetje /*ARGSUSED10*/ 534986fd29aSsetje static int 535986fd29aSsetje dc_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp, 536986fd29aSsetje struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr, 537986fd29aSsetje enum seg_rw rw, struct cred *cr, caller_context_t *ctp) 538986fd29aSsetje { 539986fd29aSsetje struct dcnode *dp = VTODC(vp); 540986fd29aSsetje struct comphdr *hdr = dp->dc_hdr; 541986fd29aSsetje struct page *pp, *plist = NULL; 542986fd29aSsetje caddr_t vp_baddr; 543986fd29aSsetje offset_t vp_boff, vp_bend; 544986fd29aSsetje size_t bsize = hdr->ch_blksize; 545986fd29aSsetje int nblks, error; 546986fd29aSsetje 547986fd29aSsetje /* does not support write */ 548986fd29aSsetje if (rw == S_WRITE) { 549986fd29aSsetje panic("write attempt on compressed file"); 550986fd29aSsetje /*NOTREACHED*/ 551986fd29aSsetje } 552986fd29aSsetje 553986fd29aSsetje if (protp) 554986fd29aSsetje *protp = PROT_ALL; 555986fd29aSsetje /* 556986fd29aSsetje * We don't support asynchronous operation at the moment, so 557986fd29aSsetje * just pretend we did it. If the pages are ever actually 558986fd29aSsetje * needed, they'll get brought in then. 559986fd29aSsetje */ 560986fd29aSsetje if (pl == NULL) 561986fd29aSsetje return (0); 562986fd29aSsetje 563986fd29aSsetje /* 564986fd29aSsetje * Calc block start and end offsets 565986fd29aSsetje */ 566986fd29aSsetje vp_boff = rounddown(off, bsize); 567986fd29aSsetje vp_bend = roundup(off + len, bsize); 568986fd29aSsetje vp_baddr = (caddr_t)rounddown((uintptr_t)addr, bsize); 569986fd29aSsetje 570986fd29aSsetje nblks = (vp_bend - vp_boff) / bsize; 571986fd29aSsetje while (nblks--) { 572986fd29aSsetje error = dc_getblock(vp, vp_boff, bsize, &pp, seg, vp_baddr, 573986fd29aSsetje rw, cr); 574986fd29aSsetje page_list_concat(&plist, &pp); 575986fd29aSsetje vp_boff += bsize; 576986fd29aSsetje vp_baddr += bsize; 577986fd29aSsetje } 578986fd29aSsetje if (!error) 579986fd29aSsetje pvn_plist_init(plist, pl, plsz, off, len, rw); 580986fd29aSsetje else 581986fd29aSsetje pvn_read_done(plist, B_ERROR); 582986fd29aSsetje return (error); 583986fd29aSsetje } 584986fd29aSsetje 585986fd29aSsetje /* 586986fd29aSsetje * This function should never be called. We need to have it to pass 587986fd29aSsetje * it as an argument to other functions. 588986fd29aSsetje */ 589986fd29aSsetje /*ARGSUSED*/ 590986fd29aSsetje static int 591986fd29aSsetje dc_putapage(struct vnode *vp, struct page *pp, u_offset_t *offp, size_t *lenp, 592986fd29aSsetje int flags, struct cred *cr) 593986fd29aSsetje { 594986fd29aSsetje /* should never happen */ 595986fd29aSsetje cmn_err(CE_PANIC, "dcfs: dc_putapage: dirty page"); 596986fd29aSsetje /*NOTREACHED*/ 597986fd29aSsetje return (0); 598986fd29aSsetje } 599986fd29aSsetje 600986fd29aSsetje 601986fd29aSsetje /* 602986fd29aSsetje * The only flags we support are B_INVAL, B_FREE and B_DONTNEED. 603986fd29aSsetje * B_INVAL is set by: 604986fd29aSsetje * 605986fd29aSsetje * 1) the MC_SYNC command of memcntl(2) to support the MS_INVALIDATE flag. 606986fd29aSsetje * 2) the MC_ADVISE command of memcntl(2) with the MADV_DONTNEED advice 607986fd29aSsetje * which translates to an MC_SYNC with the MS_INVALIDATE flag. 608986fd29aSsetje * 609986fd29aSsetje * The B_FREE (as well as the B_DONTNEED) flag is set when the 610986fd29aSsetje * MADV_SEQUENTIAL advice has been used. VOP_PUTPAGE is invoked 611986fd29aSsetje * from SEGVN to release pages behind a pagefault. 612986fd29aSsetje */ 613986fd29aSsetje /*ARGSUSED5*/ 614986fd29aSsetje static int 615986fd29aSsetje dc_putpage(struct vnode *vp, offset_t off, size_t len, int flags, 616986fd29aSsetje struct cred *cr, caller_context_t *ctp) 617986fd29aSsetje { 618986fd29aSsetje int error = 0; 619986fd29aSsetje 620986fd29aSsetje if (vp->v_count == 0) { 621986fd29aSsetje panic("dcfs_putpage: bad v_count"); 622986fd29aSsetje /*NOTREACHED*/ 623986fd29aSsetje } 624986fd29aSsetje 625986fd29aSsetje if (vp->v_flag & VNOMAP) 626986fd29aSsetje return (ENOSYS); 627986fd29aSsetje 628986fd29aSsetje if (!vn_has_cached_data(vp)) /* no pages mapped */ 629986fd29aSsetje return (0); 630986fd29aSsetje 631986fd29aSsetje if (len == 0) /* from 'off' to EOF */ 632986fd29aSsetje error = pvn_vplist_dirty(vp, off, dc_putapage, flags, cr); 633986fd29aSsetje else { 634986fd29aSsetje offset_t io_off; 635986fd29aSsetje se_t se = (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED; 636986fd29aSsetje 637986fd29aSsetje for (io_off = off; io_off < off + len; io_off += PAGESIZE) { 638986fd29aSsetje page_t *pp; 639986fd29aSsetje 640986fd29aSsetje /* 641986fd29aSsetje * We insist on getting the page only if we are 642986fd29aSsetje * about to invalidate, free or write it and 643986fd29aSsetje * the B_ASYNC flag is not set. 644986fd29aSsetje */ 645986fd29aSsetje if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) 646986fd29aSsetje pp = page_lookup(vp, io_off, se); 647986fd29aSsetje else 648986fd29aSsetje pp = page_lookup_nowait(vp, io_off, se); 649986fd29aSsetje 650986fd29aSsetje if (pp == NULL) 651986fd29aSsetje continue; 652986fd29aSsetje /* 653986fd29aSsetje * Normally pvn_getdirty() should return 0, which 654986fd29aSsetje * impies that it has done the job for us. 655986fd29aSsetje * The shouldn't-happen scenario is when it returns 1. 656986fd29aSsetje * This means that the page has been modified and 657986fd29aSsetje * needs to be put back. 658986fd29aSsetje * Since we can't write to a dcfs compressed file, 659986fd29aSsetje * we fake a failed I/O and force pvn_write_done() 660986fd29aSsetje * to destroy the page. 661986fd29aSsetje */ 662986fd29aSsetje if (pvn_getdirty(pp, flags) == 1) { 663986fd29aSsetje cmn_err(CE_NOTE, "dc_putpage: dirty page"); 664986fd29aSsetje pvn_write_done(pp, flags | 665986fd29aSsetje B_ERROR | B_WRITE | B_INVAL | B_FORCE); 666986fd29aSsetje } 667986fd29aSsetje } 668986fd29aSsetje } 669986fd29aSsetje return (error); 670986fd29aSsetje } 671986fd29aSsetje 672986fd29aSsetje static int 673986fd29aSsetje dc_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp, 674986fd29aSsetje size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 675986fd29aSsetje struct cred *cred, caller_context_t *ctp) 676986fd29aSsetje { 677986fd29aSsetje struct vattr vattr; 678986fd29aSsetje struct segvn_crargs vn_a; 679986fd29aSsetje int error; 680986fd29aSsetje 681986fd29aSsetje if (vp->v_flag & VNOMAP) 682986fd29aSsetje return (ENOSYS); 683986fd29aSsetje 684986fd29aSsetje if (off < (offset_t)0 || (offset_t)(off + len) < (offset_t)0) 685986fd29aSsetje return (ENXIO); 686986fd29aSsetje 687986fd29aSsetje /* 688986fd29aSsetje * If file is being locked, disallow mapping. 689986fd29aSsetje */ 690986fd29aSsetje if (error = VOP_GETATTR(VTODC(vp)->dc_subvp, &vattr, 0, cred, ctp)) 691986fd29aSsetje return (error); 692986fd29aSsetje if (vn_has_mandatory_locks(vp, vattr.va_mode)) 693986fd29aSsetje return (EAGAIN); 694986fd29aSsetje 695986fd29aSsetje as_rangelock(as); 696986fd29aSsetje 697986fd29aSsetje if ((flags & MAP_FIXED) == 0) { 698986fd29aSsetje map_addr(addrp, len, off, 1, flags); 699986fd29aSsetje if (*addrp == NULL) { 700986fd29aSsetje as_rangeunlock(as); 701986fd29aSsetje return (ENOMEM); 702986fd29aSsetje } 703986fd29aSsetje } else { 704986fd29aSsetje /* 705986fd29aSsetje * User specified address - blow away any previous mappings 706986fd29aSsetje */ 707986fd29aSsetje (void) as_unmap(as, *addrp, len); 708986fd29aSsetje } 709986fd29aSsetje 710986fd29aSsetje vn_a.vp = vp; 711986fd29aSsetje vn_a.offset = off; 712986fd29aSsetje vn_a.type = flags & MAP_TYPE; 713986fd29aSsetje vn_a.prot = prot; 714986fd29aSsetje vn_a.maxprot = maxprot; 715986fd29aSsetje vn_a.flags = flags & ~MAP_TYPE; 716986fd29aSsetje vn_a.cred = cred; 717986fd29aSsetje vn_a.amp = NULL; 718986fd29aSsetje vn_a.szc = 0; 719986fd29aSsetje vn_a.lgrp_mem_policy_flags = 0; 720986fd29aSsetje 721986fd29aSsetje error = as_map(as, *addrp, len, segvn_create, &vn_a); 722986fd29aSsetje as_rangeunlock(as); 723986fd29aSsetje return (error); 724986fd29aSsetje } 725986fd29aSsetje 726986fd29aSsetje /*ARGSUSED*/ 727986fd29aSsetje static int 728986fd29aSsetje dc_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr, 729986fd29aSsetje size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 730986fd29aSsetje struct cred *cr, caller_context_t *ctp) 731986fd29aSsetje { 732986fd29aSsetje struct dcnode *dp; 733986fd29aSsetje 734986fd29aSsetje if (vp->v_flag & VNOMAP) 735986fd29aSsetje return (ENOSYS); 736986fd29aSsetje 737986fd29aSsetje dp = VTODC(vp); 738986fd29aSsetje mutex_enter(&dp->dc_lock); 739986fd29aSsetje dp->dc_mapcnt += btopr(len); 740986fd29aSsetje mutex_exit(&dp->dc_lock); 741986fd29aSsetje return (0); 742986fd29aSsetje } 743986fd29aSsetje 744986fd29aSsetje /*ARGSUSED*/ 745986fd29aSsetje static int 746986fd29aSsetje dc_delmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr, 747986fd29aSsetje size_t len, uint_t prot, uint_t maxprot, uint_t flags, 748986fd29aSsetje struct cred *cr, caller_context_t *ctp) 749986fd29aSsetje { 750986fd29aSsetje struct dcnode *dp; 751986fd29aSsetje 752986fd29aSsetje if (vp->v_flag & VNOMAP) 753986fd29aSsetje return (ENOSYS); 754986fd29aSsetje 755986fd29aSsetje dp = VTODC(vp); 756986fd29aSsetje mutex_enter(&dp->dc_lock); 757986fd29aSsetje dp->dc_mapcnt -= btopr(len); 758986fd29aSsetje ASSERT(dp->dc_mapcnt >= 0); 759986fd29aSsetje mutex_exit(&dp->dc_lock); 760986fd29aSsetje return (0); 761986fd29aSsetje } 762986fd29aSsetje 763986fd29aSsetje /* 764986fd29aSsetje * Constructor/destructor routines for dcnodes 765986fd29aSsetje */ 766986fd29aSsetje /*ARGSUSED1*/ 767986fd29aSsetje static int 768986fd29aSsetje dcnode_constructor(void *buf, void *cdrarg, int kmflags) 769986fd29aSsetje { 770986fd29aSsetje struct dcnode *dp = buf; 771986fd29aSsetje struct vnode *vp; 772986fd29aSsetje 7738bd3a292Stomee vp = dp->dc_vp = vn_alloc(kmflags); 7748bd3a292Stomee if (vp == NULL) { 7758bd3a292Stomee return (-1); 7768bd3a292Stomee } 7778bd3a292Stomee vp->v_data = dp; 778986fd29aSsetje vp->v_type = VREG; 779986fd29aSsetje vp->v_flag = VNOSWAP; 780986fd29aSsetje vp->v_vfsp = &dc_vfs; 781986fd29aSsetje vn_setops(vp, dc_vnodeops); 782986fd29aSsetje vn_exists(vp); 783986fd29aSsetje 784986fd29aSsetje mutex_init(&dp->dc_lock, NULL, MUTEX_DEFAULT, NULL); 785986fd29aSsetje dp->dc_mapcnt = 0; 786986fd29aSsetje dp->dc_lrunext = dp->dc_lruprev = NULL; 7878bd3a292Stomee dp->dc_hdr = NULL; 7888bd3a292Stomee dp->dc_subvp = NULL; 789986fd29aSsetje return (0); 790986fd29aSsetje } 791986fd29aSsetje 792986fd29aSsetje /*ARGSUSED*/ 793986fd29aSsetje static void 794986fd29aSsetje dcnode_destructor(void *buf, void *cdrarg) 795986fd29aSsetje { 796986fd29aSsetje struct dcnode *dp = buf; 797986fd29aSsetje struct vnode *vp = DCTOV(dp); 798986fd29aSsetje 799986fd29aSsetje mutex_destroy(&dp->dc_lock); 800986fd29aSsetje 801986fd29aSsetje VERIFY(dp->dc_hdr == NULL); 802986fd29aSsetje VERIFY(dp->dc_subvp == NULL); 803986fd29aSsetje vn_invalid(vp); 804986fd29aSsetje vn_free(vp); 805986fd29aSsetje } 806986fd29aSsetje 807986fd29aSsetje static struct dcnode * 808986fd29aSsetje dcnode_alloc(void) 809986fd29aSsetje { 810986fd29aSsetje struct dcnode *dp; 811986fd29aSsetje 812986fd29aSsetje /* 813986fd29aSsetje * If the free list is above DCLRUSIZE 814986fd29aSsetje * re-use one from it 815986fd29aSsetje */ 816986fd29aSsetje mutex_enter(&dctable_lock); 817986fd29aSsetje if (dclru_len < DCLRUSIZE) { 818986fd29aSsetje mutex_exit(&dctable_lock); 819986fd29aSsetje dp = kmem_cache_alloc(dcnode_cache, KM_SLEEP); 820986fd29aSsetje } else { 821986fd29aSsetje ASSERT(dclru != NULL); 822986fd29aSsetje dp = dclru; 823986fd29aSsetje dclru_sub(dp); 824986fd29aSsetje dcdelete(dp); 825986fd29aSsetje mutex_exit(&dctable_lock); 826986fd29aSsetje dcnode_recycle(dp); 827986fd29aSsetje } 828986fd29aSsetje return (dp); 829986fd29aSsetje } 830986fd29aSsetje 831986fd29aSsetje static void 832986fd29aSsetje dcnode_free(struct dcnode *dp) 833986fd29aSsetje { 834986fd29aSsetje struct vnode *vp = DCTOV(dp); 835986fd29aSsetje 836986fd29aSsetje ASSERT(MUTEX_HELD(&dctable_lock)); 837986fd29aSsetje 838986fd29aSsetje /* 839986fd29aSsetje * If no cached pages, no need to put it on lru 840986fd29aSsetje */ 841986fd29aSsetje if (!vn_has_cached_data(vp)) { 842986fd29aSsetje dcdelete(dp); 843986fd29aSsetje dcnode_recycle(dp); 844986fd29aSsetje kmem_cache_free(dcnode_cache, dp); 845986fd29aSsetje return; 846986fd29aSsetje } 847986fd29aSsetje 848986fd29aSsetje /* 849986fd29aSsetje * Add to lru, if it's over the limit, free from head 850986fd29aSsetje */ 851986fd29aSsetje dclru_add(dp); 852986fd29aSsetje if (dclru_len > DCLRUSIZE) { 853986fd29aSsetje dp = dclru; 854986fd29aSsetje dclru_sub(dp); 855986fd29aSsetje dcdelete(dp); 856986fd29aSsetje dcnode_recycle(dp); 857986fd29aSsetje kmem_cache_free(dcnode_cache, dp); 858986fd29aSsetje } 859986fd29aSsetje } 860986fd29aSsetje 861986fd29aSsetje static void 862986fd29aSsetje dcnode_recycle(struct dcnode *dp) 863986fd29aSsetje { 864986fd29aSsetje struct vnode *vp; 865986fd29aSsetje 866986fd29aSsetje vp = DCTOV(dp); 867986fd29aSsetje 868986fd29aSsetje VN_RELE(dp->dc_subvp); 869986fd29aSsetje dp->dc_subvp = NULL; 870986fd29aSsetje (void) pvn_vplist_dirty(vp, 0, dc_putapage, B_INVAL, NULL); 871986fd29aSsetje kmem_free(dp->dc_hdr, dp->dc_hdrsize); 872986fd29aSsetje dp->dc_hdr = NULL; 873986fd29aSsetje dp->dc_hdrsize = dp->dc_zmax = 0; 874986fd29aSsetje dp->dc_bufcache = NULL; 875986fd29aSsetje dp->dc_mapcnt = 0; 876986fd29aSsetje vn_reinit(vp); 877986fd29aSsetje vp->v_type = VREG; 878986fd29aSsetje vp->v_flag = VNOSWAP; 879986fd29aSsetje vp->v_vfsp = &dc_vfs; 880986fd29aSsetje } 881986fd29aSsetje 882986fd29aSsetje static int 883986fd29aSsetje dcinit(int fstype, char *name) 884986fd29aSsetje { 885986fd29aSsetje static const fs_operation_def_t dc_vfsops_template[] = { 886986fd29aSsetje NULL, NULL 887986fd29aSsetje }; 888986fd29aSsetje int error; 889986fd29aSsetje major_t dev; 890986fd29aSsetje 891986fd29aSsetje error = vfs_setfsops(fstype, dc_vfsops_template, &dc_vfsops); 892986fd29aSsetje if (error) { 893986fd29aSsetje cmn_err(CE_WARN, "dcinit: bad vfs ops template"); 894986fd29aSsetje return (error); 895986fd29aSsetje } 896986fd29aSsetje VFS_INIT(&dc_vfs, dc_vfsops, NULL); 897986fd29aSsetje dc_vfs.vfs_flag = VFS_RDONLY; 898986fd29aSsetje dc_vfs.vfs_fstype = fstype; 899986fd29aSsetje if ((dev = getudev()) == (major_t)-1) 900986fd29aSsetje dev = 0; 901986fd29aSsetje dcdev = makedevice(dev, 0); 902986fd29aSsetje dc_vfs.vfs_dev = dcdev; 903986fd29aSsetje 904986fd29aSsetje error = vn_make_ops(name, dc_vnodeops_template, &dc_vnodeops); 905986fd29aSsetje if (error != 0) { 906986fd29aSsetje (void) vfs_freevfsops_by_type(fstype); 907986fd29aSsetje cmn_err(CE_WARN, "dcinit: bad vnode ops template"); 908986fd29aSsetje return (error); 909986fd29aSsetje } 910986fd29aSsetje 911986fd29aSsetje mutex_init(&dctable_lock, NULL, MUTEX_DEFAULT, NULL); 912986fd29aSsetje mutex_init(&dccache_lock, NULL, MUTEX_DEFAULT, NULL); 913986fd29aSsetje dcnode_cache = kmem_cache_create("dcnode_cache", sizeof (struct dcnode), 914986fd29aSsetje 0, dcnode_constructor, dcnode_destructor, NULL, NULL, NULL, 0); 915986fd29aSsetje 916986fd29aSsetje return (0); 917986fd29aSsetje } 918986fd29aSsetje 919986fd29aSsetje /* 920986fd29aSsetje * Return shadow vnode with the given vp as its subordinate 921986fd29aSsetje */ 922986fd29aSsetje struct vnode * 923986fd29aSsetje decompvp(struct vnode *vp, cred_t *cred, caller_context_t *ctp) 924986fd29aSsetje { 925986fd29aSsetje struct dcnode *dp, *ndp; 926986fd29aSsetje struct comphdr thdr, *hdr; 927986fd29aSsetje struct kmem_cache **cpp; 928986fd29aSsetje struct vattr vattr; 929986fd29aSsetje size_t hdrsize, bsize; 930986fd29aSsetje int error; 931986fd29aSsetje 932986fd29aSsetje /* 933986fd29aSsetje * See if we have an existing shadow 934986fd29aSsetje * If none, we have to manufacture one 935986fd29aSsetje */ 936986fd29aSsetje mutex_enter(&dctable_lock); 937986fd29aSsetje dp = dcfind(vp); 938986fd29aSsetje mutex_exit(&dctable_lock); 939986fd29aSsetje if (dp != NULL) 940986fd29aSsetje return (DCTOV(dp)); 941986fd29aSsetje 942986fd29aSsetje /* 943986fd29aSsetje * Make sure it's a valid compressed file 944986fd29aSsetje */ 945986fd29aSsetje hdr = &thdr; 946986fd29aSsetje error = vn_rdwr(UIO_READ, vp, (caddr_t)hdr, sizeof (struct comphdr), 0, 947986fd29aSsetje UIO_SYSSPACE, 0, 0, cred, NULL); 9481d7f3fadSKrishnendu Sadhukhan - Sun Microsystems if (error || hdr->ch_magic != CH_MAGIC_ZLIB || 949986fd29aSsetje hdr->ch_version != CH_VERSION || hdr->ch_algorithm != CH_ALG_ZLIB || 950986fd29aSsetje hdr->ch_fsize == 0 || hdr->ch_blksize < PAGESIZE || 951*de710d24SJosef 'Jeff' Sipek hdr->ch_blksize > ptob(DCCACHESIZE) || !ISP2(hdr->ch_blksize)) 952986fd29aSsetje return (NULL); 953986fd29aSsetje 954986fd29aSsetje /* get underlying file size */ 955986fd29aSsetje if (VOP_GETATTR(vp, &vattr, 0, cred, ctp) != 0) 956986fd29aSsetje return (NULL); 957986fd29aSsetje 958986fd29aSsetje /* 959986fd29aSsetje * Re-read entire header 960986fd29aSsetje */ 961986fd29aSsetje hdrsize = hdr->ch_blkmap[0] + sizeof (uint64_t); 962986fd29aSsetje hdr = kmem_alloc(hdrsize, KM_SLEEP); 963986fd29aSsetje error = vn_rdwr(UIO_READ, vp, (caddr_t)hdr, hdrsize, 0, UIO_SYSSPACE, 964986fd29aSsetje 0, 0, cred, NULL); 965986fd29aSsetje if (error) { 966986fd29aSsetje kmem_free(hdr, hdrsize); 967986fd29aSsetje return (NULL); 968986fd29aSsetje } 969986fd29aSsetje 970986fd29aSsetje /* 971986fd29aSsetje * add extra blkmap entry to make dc_getblock()'s 972986fd29aSsetje * life easier 973986fd29aSsetje */ 974986fd29aSsetje bsize = hdr->ch_blksize; 975986fd29aSsetje hdr->ch_blkmap[((hdr->ch_fsize-1) / bsize) + 1] = vattr.va_size; 976986fd29aSsetje 977986fd29aSsetje ndp = dcnode_alloc(); 978986fd29aSsetje ndp->dc_subvp = vp; 979986fd29aSsetje VN_HOLD(vp); 980986fd29aSsetje ndp->dc_hdr = hdr; 981986fd29aSsetje ndp->dc_hdrsize = hdrsize; 982986fd29aSsetje 983986fd29aSsetje /* 984986fd29aSsetje * Allocate kmem cache if none there already 985986fd29aSsetje */ 986986fd29aSsetje ndp->dc_zmax = ZMAXBUF(bsize); 987986fd29aSsetje cpp = &dcbuf_cache[btop(bsize)]; 988986fd29aSsetje mutex_enter(&dccache_lock); 989986fd29aSsetje if (*cpp == NULL) 990986fd29aSsetje *cpp = kmem_cache_create("dcbuf_cache", ndp->dc_zmax, 0, NULL, 991986fd29aSsetje NULL, NULL, NULL, NULL, 0); 992986fd29aSsetje mutex_exit(&dccache_lock); 993986fd29aSsetje ndp->dc_bufcache = *cpp; 994986fd29aSsetje 995986fd29aSsetje /* 996986fd29aSsetje * Recheck table in case someone else created shadow 997986fd29aSsetje * while we were blocked above. 998986fd29aSsetje */ 999986fd29aSsetje mutex_enter(&dctable_lock); 1000986fd29aSsetje dp = dcfind(vp); 1001986fd29aSsetje if (dp != NULL) { 1002986fd29aSsetje mutex_exit(&dctable_lock); 1003986fd29aSsetje dcnode_recycle(ndp); 1004986fd29aSsetje kmem_cache_free(dcnode_cache, ndp); 1005986fd29aSsetje return (DCTOV(dp)); 1006986fd29aSsetje } 1007986fd29aSsetje dcinsert(ndp); 1008986fd29aSsetje mutex_exit(&dctable_lock); 1009986fd29aSsetje 1010986fd29aSsetje return (DCTOV(ndp)); 1011986fd29aSsetje } 1012986fd29aSsetje 1013986fd29aSsetje 1014986fd29aSsetje /* 1015986fd29aSsetje * dcnode lookup table 1016986fd29aSsetje * These routines maintain a table of dcnodes hashed by their 1017986fd29aSsetje * subordinate vnode so that they can be found if they already 1018986fd29aSsetje * exist in the vnode cache 1019986fd29aSsetje */ 1020986fd29aSsetje 1021986fd29aSsetje /* 1022986fd29aSsetje * Put a dcnode in the table. 1023986fd29aSsetje */ 1024986fd29aSsetje static void 1025986fd29aSsetje dcinsert(struct dcnode *newdp) 1026986fd29aSsetje { 1027986fd29aSsetje int idx = DCHASH(newdp->dc_subvp); 1028986fd29aSsetje 1029986fd29aSsetje ASSERT(MUTEX_HELD(&dctable_lock)); 1030986fd29aSsetje newdp->dc_hash = dctable[idx]; 1031986fd29aSsetje dctable[idx] = newdp; 1032986fd29aSsetje } 1033986fd29aSsetje 1034986fd29aSsetje /* 1035986fd29aSsetje * Remove a dcnode from the hash table. 1036986fd29aSsetje */ 1037986fd29aSsetje void 1038986fd29aSsetje dcdelete(struct dcnode *deldp) 1039986fd29aSsetje { 1040986fd29aSsetje int idx = DCHASH(deldp->dc_subvp); 1041986fd29aSsetje struct dcnode *dp, *prevdp; 1042986fd29aSsetje 1043986fd29aSsetje ASSERT(MUTEX_HELD(&dctable_lock)); 1044986fd29aSsetje dp = dctable[idx]; 1045986fd29aSsetje if (dp == deldp) 1046986fd29aSsetje dctable[idx] = dp->dc_hash; 1047986fd29aSsetje else { 1048986fd29aSsetje for (prevdp = dp, dp = dp->dc_hash; dp != NULL; 1049986fd29aSsetje prevdp = dp, dp = dp->dc_hash) { 1050986fd29aSsetje if (dp == deldp) { 1051986fd29aSsetje prevdp->dc_hash = dp->dc_hash; 1052986fd29aSsetje break; 1053986fd29aSsetje } 1054986fd29aSsetje } 1055986fd29aSsetje } 1056986fd29aSsetje ASSERT(dp != NULL); 1057986fd29aSsetje } 1058986fd29aSsetje 1059986fd29aSsetje /* 1060986fd29aSsetje * Find a shadow vnode in the dctable hash list. 1061986fd29aSsetje */ 1062986fd29aSsetje static struct dcnode * 1063986fd29aSsetje dcfind(struct vnode *vp) 1064986fd29aSsetje { 1065986fd29aSsetje struct dcnode *dp; 1066986fd29aSsetje 1067986fd29aSsetje ASSERT(MUTEX_HELD(&dctable_lock)); 1068986fd29aSsetje for (dp = dctable[DCHASH(vp)]; dp != NULL; dp = dp->dc_hash) 1069986fd29aSsetje if (dp->dc_subvp == vp) { 1070986fd29aSsetje VN_HOLD(DCTOV(dp)); 1071986fd29aSsetje if (dp->dc_lrunext) 1072986fd29aSsetje dclru_sub(dp); 1073986fd29aSsetje return (dp); 1074986fd29aSsetje } 1075986fd29aSsetje return (NULL); 1076986fd29aSsetje } 1077986fd29aSsetje 1078986fd29aSsetje #ifdef DEBUG 1079986fd29aSsetje static int 1080986fd29aSsetje dclru_count(void) 1081986fd29aSsetje { 1082986fd29aSsetje struct dcnode *dp; 1083986fd29aSsetje int i = 0; 1084986fd29aSsetje 1085986fd29aSsetje if (dclru == NULL) 1086986fd29aSsetje return (0); 1087986fd29aSsetje for (dp = dclru; dp->dc_lrunext != dclru; dp = dp->dc_lrunext) 1088986fd29aSsetje i++; 1089986fd29aSsetje return (i + 1); 1090986fd29aSsetje } 1091986fd29aSsetje #endif 1092986fd29aSsetje 1093986fd29aSsetje static void 1094986fd29aSsetje dclru_add(struct dcnode *dp) 1095986fd29aSsetje { 1096986fd29aSsetje /* 1097986fd29aSsetje * Add to dclru as double-link chain 1098986fd29aSsetje */ 1099986fd29aSsetje ASSERT(MUTEX_HELD(&dctable_lock)); 1100986fd29aSsetje if (dclru == NULL) { 1101986fd29aSsetje dclru = dp; 1102986fd29aSsetje dp->dc_lruprev = dp->dc_lrunext = dp; 1103986fd29aSsetje } else { 1104986fd29aSsetje struct dcnode *last = dclru->dc_lruprev; 1105986fd29aSsetje 1106986fd29aSsetje dclru->dc_lruprev = dp; 1107986fd29aSsetje last->dc_lrunext = dp; 1108986fd29aSsetje dp->dc_lruprev = last; 1109986fd29aSsetje dp->dc_lrunext = dclru; 1110986fd29aSsetje } 1111986fd29aSsetje dclru_len++; 1112986fd29aSsetje ASSERT(dclru_len == dclru_count()); 1113986fd29aSsetje } 1114986fd29aSsetje 1115986fd29aSsetje static void 1116986fd29aSsetje dclru_sub(struct dcnode *dp) 1117986fd29aSsetje { 1118986fd29aSsetje ASSERT(MUTEX_HELD(&dctable_lock)); 1119986fd29aSsetje dp->dc_lrunext->dc_lruprev = dp->dc_lruprev; 1120986fd29aSsetje dp->dc_lruprev->dc_lrunext = dp->dc_lrunext; 1121986fd29aSsetje if (dp == dclru) 1122986fd29aSsetje dclru = dp->dc_lrunext == dp ? NULL : dp->dc_lrunext; 1123986fd29aSsetje dp->dc_lrunext = dp->dc_lruprev = NULL; 1124986fd29aSsetje dclru_len--; 1125986fd29aSsetje ASSERT(dclru_len == dclru_count()); 1126986fd29aSsetje } 1127