1df8bae1dSRodney W. Grimes /* 2df8bae1dSRodney W. Grimes * Copyright (c) 1990 University of Utah. 326f9a767SRodney W. Grimes * Copyright (c) 1991 The Regents of the University of California. 426f9a767SRodney W. Grimes * All rights reserved. 526f9a767SRodney W. Grimes * Copyright (c) 1993,1994 John S. Dyson 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 8df8bae1dSRodney W. Grimes * the Systems Programming Group of the University of Utah Computer 9df8bae1dSRodney W. Grimes * Science Department. 10df8bae1dSRodney W. Grimes * 11df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 12df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 13df8bae1dSRodney W. Grimes * are met: 14df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 15df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 16df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 17df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 18df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 19df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 20df8bae1dSRodney W. Grimes * must display the following acknowledgement: 21df8bae1dSRodney W. Grimes * This product includes software developed by the University of 22df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 23df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 24df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 25df8bae1dSRodney W. Grimes * without specific prior written permission. 26df8bae1dSRodney W. Grimes * 27df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37df8bae1dSRodney W. Grimes * SUCH DAMAGE. 38df8bae1dSRodney W. Grimes * 3926f9a767SRodney W. Grimes * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 40bbc0ec52SDavid Greenman * $Id: vnode_pager.c,v 1.2 1994/05/25 09:21:11 rgrimes Exp $ 41df8bae1dSRodney W. Grimes */ 42df8bae1dSRodney W. Grimes 43df8bae1dSRodney W. Grimes /* 44df8bae1dSRodney W. Grimes * Page to/from files (vnodes). 45df8bae1dSRodney W. Grimes * 46df8bae1dSRodney W. Grimes * TODO: 47df8bae1dSRodney W. Grimes * pageouts 48df8bae1dSRodney W. Grimes * fix credential use (uses current process credentials now) 49df8bae1dSRodney W. Grimes */ 50df8bae1dSRodney W. Grimes 5126f9a767SRodney W. Grimes /* 5226f9a767SRodney W. Grimes * MODIFICATIONS: 5326f9a767SRodney W. Grimes * John S. Dyson 08 Dec 93 5426f9a767SRodney W. Grimes * 5526f9a767SRodney W. Grimes * This file in conjunction with some vm_fault mods, eliminate the performance 5626f9a767SRodney W. Grimes * advantage for using the buffer cache and minimize memory copies. 5726f9a767SRodney W. Grimes * 5826f9a767SRodney W. Grimes * 1) Supports multiple - block reads 5926f9a767SRodney W. Grimes * 2) Bypasses buffer cache for reads 6026f9a767SRodney W. Grimes * 6126f9a767SRodney W. Grimes * TODO: 6226f9a767SRodney W. Grimes * 6326f9a767SRodney W. Grimes * 1) Totally bypass buffer cache for reads 6426f9a767SRodney W. Grimes * (Currently will still sometimes use buffer cache for reads) 6526f9a767SRodney W. Grimes * 2) Bypass buffer cache for writes 6626f9a767SRodney W. Grimes * (Code does not support it, but mods are simple) 6726f9a767SRodney W. Grimes */ 6826f9a767SRodney W. Grimes 69df8bae1dSRodney W. Grimes #include <sys/param.h> 70df8bae1dSRodney W. Grimes #include <sys/systm.h> 71df8bae1dSRodney W. Grimes #include <sys/proc.h> 72df8bae1dSRodney W. Grimes #include <sys/malloc.h> 73df8bae1dSRodney W. Grimes #include <sys/vnode.h> 74df8bae1dSRodney W. Grimes #include <sys/uio.h> 75df8bae1dSRodney W. Grimes #include <sys/mount.h> 76df8bae1dSRodney W. Grimes 77df8bae1dSRodney W. Grimes #include <vm/vm.h> 78df8bae1dSRodney W. Grimes #include <vm/vm_page.h> 79df8bae1dSRodney W. Grimes #include <vm/vnode_pager.h> 80df8bae1dSRodney W. Grimes 8126f9a767SRodney W. Grimes #include <sys/buf.h> 8226f9a767SRodney W. Grimes #include <miscfs/specfs/specdev.h> 83df8bae1dSRodney W. Grimes 8426f9a767SRodney W. Grimes int vnode_pager_putmulti(); 85df8bae1dSRodney W. Grimes 8626f9a767SRodney W. Grimes void vnode_pager_init(); 8726f9a767SRodney W. Grimes vm_pager_t vnode_pager_alloc(caddr_t, vm_offset_t, vm_prot_t, vm_offset_t); 8826f9a767SRodney W. Grimes void vnode_pager_dealloc(); 8926f9a767SRodney W. Grimes int vnode_pager_getpage(); 9026f9a767SRodney W. Grimes int vnode_pager_getmulti(); 9126f9a767SRodney W. Grimes int vnode_pager_putpage(); 9226f9a767SRodney W. Grimes boolean_t vnode_pager_haspage(); 93df8bae1dSRodney W. Grimes 94df8bae1dSRodney W. Grimes struct pagerops vnodepagerops = { 95df8bae1dSRodney W. Grimes vnode_pager_init, 96df8bae1dSRodney W. Grimes vnode_pager_alloc, 97df8bae1dSRodney W. Grimes vnode_pager_dealloc, 98df8bae1dSRodney W. Grimes vnode_pager_getpage, 9926f9a767SRodney W. Grimes vnode_pager_getmulti, 100df8bae1dSRodney W. Grimes vnode_pager_putpage, 10126f9a767SRodney W. Grimes vnode_pager_putmulti, 10226f9a767SRodney W. Grimes vnode_pager_haspage 103df8bae1dSRodney W. Grimes }; 104df8bae1dSRodney W. Grimes 10526f9a767SRodney W. Grimes static int vnode_pager_input(vn_pager_t vnp, vm_page_t * m, int count, int reqpage); 10626f9a767SRodney W. Grimes static int vnode_pager_output(vn_pager_t vnp, vm_page_t * m, int count, int *rtvals); 10726f9a767SRodney W. Grimes struct buf * getpbuf(); 10826f9a767SRodney W. Grimes void relpbuf(struct buf * bp); 10926f9a767SRodney W. Grimes 11026f9a767SRodney W. Grimes extern vm_map_t pager_map; 11126f9a767SRodney W. Grimes 11226f9a767SRodney W. Grimes struct pagerlst vnode_pager_list; /* list of managed vnodes */ 11326f9a767SRodney W. Grimes 11426f9a767SRodney W. Grimes #define MAXBP (PAGE_SIZE/DEV_BSIZE); 11526f9a767SRodney W. Grimes 11626f9a767SRodney W. Grimes void 117df8bae1dSRodney W. Grimes vnode_pager_init() 118df8bae1dSRodney W. Grimes { 119df8bae1dSRodney W. Grimes TAILQ_INIT(&vnode_pager_list); 120df8bae1dSRodney W. Grimes } 121df8bae1dSRodney W. Grimes 122df8bae1dSRodney W. Grimes /* 123df8bae1dSRodney W. Grimes * Allocate (or lookup) pager for a vnode. 124df8bae1dSRodney W. Grimes * Handle is a vnode pointer. 125df8bae1dSRodney W. Grimes */ 12626f9a767SRodney W. Grimes vm_pager_t 12726f9a767SRodney W. Grimes vnode_pager_alloc(handle, size, prot, offset) 128df8bae1dSRodney W. Grimes caddr_t handle; 129df8bae1dSRodney W. Grimes vm_size_t size; 130df8bae1dSRodney W. Grimes vm_prot_t prot; 13126f9a767SRodney W. Grimes vm_offset_t offset; 132df8bae1dSRodney W. Grimes { 133df8bae1dSRodney W. Grimes register vm_pager_t pager; 134df8bae1dSRodney W. Grimes register vn_pager_t vnp; 135df8bae1dSRodney W. Grimes vm_object_t object; 136df8bae1dSRodney W. Grimes struct vattr vattr; 137df8bae1dSRodney W. Grimes struct vnode *vp; 138df8bae1dSRodney W. Grimes struct proc *p = curproc; /* XXX */ 139df8bae1dSRodney W. Grimes 140df8bae1dSRodney W. Grimes /* 141df8bae1dSRodney W. Grimes * Pageout to vnode, no can do yet. 142df8bae1dSRodney W. Grimes */ 143df8bae1dSRodney W. Grimes if (handle == NULL) 144df8bae1dSRodney W. Grimes return (NULL); 145df8bae1dSRodney W. Grimes 146df8bae1dSRodney W. Grimes /* 147bbc0ec52SDavid Greenman * Vnodes keep a pointer to any associated pager so no need to lookup 148bbc0ec52SDavid Greenman * with vm_pager_lookup. 149df8bae1dSRodney W. Grimes */ 150df8bae1dSRodney W. Grimes vp = (struct vnode *) handle; 151df8bae1dSRodney W. Grimes pager = (vm_pager_t) vp->v_vmdata; 152df8bae1dSRodney W. Grimes if (pager == NULL) { 153bbc0ec52SDavid Greenman 154df8bae1dSRodney W. Grimes /* 155df8bae1dSRodney W. Grimes * Allocate pager structures 156df8bae1dSRodney W. Grimes */ 157df8bae1dSRodney W. Grimes pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK); 158df8bae1dSRodney W. Grimes if (pager == NULL) 159df8bae1dSRodney W. Grimes return (NULL); 160df8bae1dSRodney W. Grimes vnp = (vn_pager_t) malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); 161df8bae1dSRodney W. Grimes if (vnp == NULL) { 162df8bae1dSRodney W. Grimes free((caddr_t) pager, M_VMPAGER); 163df8bae1dSRodney W. Grimes return (NULL); 164df8bae1dSRodney W. Grimes } 165bbc0ec52SDavid Greenman 166df8bae1dSRodney W. Grimes /* 167df8bae1dSRodney W. Grimes * And an object of the appropriate size 168df8bae1dSRodney W. Grimes */ 169df8bae1dSRodney W. Grimes if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) { 170df8bae1dSRodney W. Grimes object = vm_object_allocate(round_page(vattr.va_size)); 171df8bae1dSRodney W. Grimes vm_object_enter(object, pager); 172df8bae1dSRodney W. Grimes vm_object_setpager(object, pager, 0, TRUE); 173df8bae1dSRodney W. Grimes } else { 174df8bae1dSRodney W. Grimes free((caddr_t) vnp, M_VMPGDATA); 175df8bae1dSRodney W. Grimes free((caddr_t) pager, M_VMPAGER); 176df8bae1dSRodney W. Grimes return (NULL); 177df8bae1dSRodney W. Grimes } 178bbc0ec52SDavid Greenman 179df8bae1dSRodney W. Grimes /* 180df8bae1dSRodney W. Grimes * Hold a reference to the vnode and initialize pager data. 181df8bae1dSRodney W. Grimes */ 182df8bae1dSRodney W. Grimes VREF(vp); 183df8bae1dSRodney W. Grimes vnp->vnp_flags = 0; 184df8bae1dSRodney W. Grimes vnp->vnp_vp = vp; 185df8bae1dSRodney W. Grimes vnp->vnp_size = vattr.va_size; 18626f9a767SRodney W. Grimes 187df8bae1dSRodney W. Grimes TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list); 188df8bae1dSRodney W. Grimes pager->pg_handle = handle; 189df8bae1dSRodney W. Grimes pager->pg_type = PG_VNODE; 190df8bae1dSRodney W. Grimes pager->pg_ops = &vnodepagerops; 19126f9a767SRodney W. Grimes pager->pg_data = (caddr_t) vnp; 192df8bae1dSRodney W. Grimes vp->v_vmdata = (caddr_t) pager; 193df8bae1dSRodney W. Grimes } else { 194bbc0ec52SDavid Greenman 195df8bae1dSRodney W. Grimes /* 196bbc0ec52SDavid Greenman * vm_object_lookup() will remove the object from the cache if 197bbc0ec52SDavid Greenman * found and also gain a reference to the object. 198df8bae1dSRodney W. Grimes */ 199df8bae1dSRodney W. Grimes object = vm_object_lookup(pager); 200df8bae1dSRodney W. Grimes } 201df8bae1dSRodney W. Grimes return (pager); 202df8bae1dSRodney W. Grimes } 203df8bae1dSRodney W. Grimes 20426f9a767SRodney W. Grimes void 205df8bae1dSRodney W. Grimes vnode_pager_dealloc(pager) 206df8bae1dSRodney W. Grimes vm_pager_t pager; 207df8bae1dSRodney W. Grimes { 208df8bae1dSRodney W. Grimes register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 209df8bae1dSRodney W. Grimes register struct vnode *vp; 210df8bae1dSRodney W. Grimes struct proc *p = curproc; /* XXX */ 211df8bae1dSRodney W. Grimes 212df8bae1dSRodney W. Grimes if (vp = vnp->vnp_vp) { 213df8bae1dSRodney W. Grimes vp->v_vmdata = NULL; 214df8bae1dSRodney W. Grimes vp->v_flag &= ~VTEXT; 21526f9a767SRodney W. Grimes #if 0 216df8bae1dSRodney W. Grimes /* can hang if done at reboot on NFS FS */ 217df8bae1dSRodney W. Grimes (void) VOP_FSYNC(vp, p->p_ucred, p); 218df8bae1dSRodney W. Grimes #endif 219df8bae1dSRodney W. Grimes vrele(vp); 220df8bae1dSRodney W. Grimes } 221df8bae1dSRodney W. Grimes TAILQ_REMOVE(&vnode_pager_list, pager, pg_list); 222df8bae1dSRodney W. Grimes free((caddr_t) vnp, M_VMPGDATA); 223df8bae1dSRodney W. Grimes free((caddr_t) pager, M_VMPAGER); 224df8bae1dSRodney W. Grimes } 225df8bae1dSRodney W. Grimes 22626f9a767SRodney W. Grimes int 22726f9a767SRodney W. Grimes vnode_pager_getmulti(pager, m, count, reqpage, sync) 228df8bae1dSRodney W. Grimes vm_pager_t pager; 22926f9a767SRodney W. Grimes vm_page_t *m; 23026f9a767SRodney W. Grimes int count; 23126f9a767SRodney W. Grimes int reqpage; 232df8bae1dSRodney W. Grimes boolean_t sync; 233df8bae1dSRodney W. Grimes { 234df8bae1dSRodney W. Grimes 23526f9a767SRodney W. Grimes return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage); 236df8bae1dSRodney W. Grimes } 237df8bae1dSRodney W. Grimes 23826f9a767SRodney W. Grimes int 23926f9a767SRodney W. Grimes vnode_pager_getpage(pager, m, sync) 240df8bae1dSRodney W. Grimes vm_pager_t pager; 24126f9a767SRodney W. Grimes vm_page_t m; 24226f9a767SRodney W. Grimes boolean_t sync; 24326f9a767SRodney W. Grimes { 24426f9a767SRodney W. Grimes 24526f9a767SRodney W. Grimes int err; 24626f9a767SRodney W. Grimes vm_page_t marray[1]; 247bbc0ec52SDavid Greenman 24826f9a767SRodney W. Grimes if (pager == NULL) 24926f9a767SRodney W. Grimes return FALSE; 25026f9a767SRodney W. Grimes marray[0] = m; 25126f9a767SRodney W. Grimes 25226f9a767SRodney W. Grimes return vnode_pager_input((vn_pager_t) pager->pg_data, marray, 1, 0); 25326f9a767SRodney W. Grimes } 25426f9a767SRodney W. Grimes 25526f9a767SRodney W. Grimes boolean_t 25626f9a767SRodney W. Grimes vnode_pager_putpage(pager, m, sync) 25726f9a767SRodney W. Grimes vm_pager_t pager; 25826f9a767SRodney W. Grimes vm_page_t m; 259df8bae1dSRodney W. Grimes boolean_t sync; 260df8bae1dSRodney W. Grimes { 261df8bae1dSRodney W. Grimes int err; 26226f9a767SRodney W. Grimes vm_page_t marray[1]; 26326f9a767SRodney W. Grimes int rtvals[1]; 264df8bae1dSRodney W. Grimes 265df8bae1dSRodney W. Grimes if (pager == NULL) 26626f9a767SRodney W. Grimes return FALSE; 26726f9a767SRodney W. Grimes marray[0] = m; 26826f9a767SRodney W. Grimes vnode_pager_output((vn_pager_t) pager->pg_data, marray, 1, rtvals); 26926f9a767SRodney W. Grimes return rtvals[0]; 270df8bae1dSRodney W. Grimes } 271df8bae1dSRodney W. Grimes 27226f9a767SRodney W. Grimes int 27326f9a767SRodney W. Grimes vnode_pager_putmulti(pager, m, c, sync, rtvals) 27426f9a767SRodney W. Grimes vm_pager_t pager; 27526f9a767SRodney W. Grimes vm_page_t *m; 27626f9a767SRodney W. Grimes int c; 27726f9a767SRodney W. Grimes boolean_t sync; 27826f9a767SRodney W. Grimes int *rtvals; 27926f9a767SRodney W. Grimes { 28026f9a767SRodney W. Grimes return vnode_pager_output((vn_pager_t) pager->pg_data, m, c, rtvals); 28126f9a767SRodney W. Grimes } 28226f9a767SRodney W. Grimes 28326f9a767SRodney W. Grimes 28426f9a767SRodney W. Grimes boolean_t 285df8bae1dSRodney W. Grimes vnode_pager_haspage(pager, offset) 286df8bae1dSRodney W. Grimes vm_pager_t pager; 287df8bae1dSRodney W. Grimes vm_offset_t offset; 288df8bae1dSRodney W. Grimes { 289df8bae1dSRodney W. Grimes register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 290df8bae1dSRodney W. Grimes daddr_t bn; 291df8bae1dSRodney W. Grimes int err; 292df8bae1dSRodney W. Grimes 293df8bae1dSRodney W. Grimes /* 294df8bae1dSRodney W. Grimes * Offset beyond end of file, do not have the page 295df8bae1dSRodney W. Grimes */ 296df8bae1dSRodney W. Grimes if (offset >= vnp->vnp_size) { 297df8bae1dSRodney W. Grimes return (FALSE); 298df8bae1dSRodney W. Grimes } 299df8bae1dSRodney W. Grimes 300df8bae1dSRodney W. Grimes /* 301bbc0ec52SDavid Greenman * Read the index to find the disk block to read from. If there is no 302bbc0ec52SDavid Greenman * block, report that we don't have this data. 303df8bae1dSRodney W. Grimes * 304df8bae1dSRodney W. Grimes * Assumes that the vnode has whole page or nothing. 305df8bae1dSRodney W. Grimes */ 306df8bae1dSRodney W. Grimes err = VOP_BMAP(vnp->vnp_vp, 307df8bae1dSRodney W. Grimes offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize, 30826f9a767SRodney W. Grimes (struct vnode **) 0, &bn, 0); 309df8bae1dSRodney W. Grimes if (err) { 310df8bae1dSRodney W. Grimes return (TRUE); 311df8bae1dSRodney W. Grimes } 312df8bae1dSRodney W. Grimes return ((long) bn < 0 ? FALSE : TRUE); 313df8bae1dSRodney W. Grimes } 314df8bae1dSRodney W. Grimes 315df8bae1dSRodney W. Grimes /* 316df8bae1dSRodney W. Grimes * Lets the VM system know about a change in size for a file. 317df8bae1dSRodney W. Grimes * If this vnode is mapped into some address space (i.e. we have a pager 318df8bae1dSRodney W. Grimes * for it) we adjust our own internal size and flush any cached pages in 319df8bae1dSRodney W. Grimes * the associated object that are affected by the size change. 320df8bae1dSRodney W. Grimes * 321df8bae1dSRodney W. Grimes * Note: this routine may be invoked as a result of a pager put 322df8bae1dSRodney W. Grimes * operation (possibly at object termination time), so we must be careful. 323df8bae1dSRodney W. Grimes */ 324df8bae1dSRodney W. Grimes void 325df8bae1dSRodney W. Grimes vnode_pager_setsize(vp, nsize) 326df8bae1dSRodney W. Grimes struct vnode *vp; 327df8bae1dSRodney W. Grimes u_long nsize; 328df8bae1dSRodney W. Grimes { 329df8bae1dSRodney W. Grimes register vn_pager_t vnp; 330df8bae1dSRodney W. Grimes register vm_object_t object; 331df8bae1dSRodney W. Grimes vm_pager_t pager; 332df8bae1dSRodney W. Grimes 333df8bae1dSRodney W. Grimes /* 334df8bae1dSRodney W. Grimes * Not a mapped vnode 335df8bae1dSRodney W. Grimes */ 336df8bae1dSRodney W. Grimes if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) 337df8bae1dSRodney W. Grimes return; 338bbc0ec52SDavid Greenman 339df8bae1dSRodney W. Grimes /* 340df8bae1dSRodney W. Grimes * Hasn't changed size 341df8bae1dSRodney W. Grimes */ 342df8bae1dSRodney W. Grimes pager = (vm_pager_t) vp->v_vmdata; 343df8bae1dSRodney W. Grimes vnp = (vn_pager_t) pager->pg_data; 344df8bae1dSRodney W. Grimes if (nsize == vnp->vnp_size) 345df8bae1dSRodney W. Grimes return; 346bbc0ec52SDavid Greenman 347df8bae1dSRodney W. Grimes /* 348bbc0ec52SDavid Greenman * No object. This can happen during object termination since 349bbc0ec52SDavid Greenman * vm_object_page_clean is called after the object has been removed 350bbc0ec52SDavid Greenman * from the hash table, and clean may cause vnode write operations 351bbc0ec52SDavid Greenman * which can wind up back here. 352df8bae1dSRodney W. Grimes */ 353df8bae1dSRodney W. Grimes object = vm_object_lookup(pager); 354df8bae1dSRodney W. Grimes if (object == NULL) 355df8bae1dSRodney W. Grimes return; 356df8bae1dSRodney W. Grimes 357df8bae1dSRodney W. Grimes /* 358bbc0ec52SDavid Greenman * File has shrunk. Toss any cached pages beyond the new EOF. 359df8bae1dSRodney W. Grimes */ 360bbc0ec52SDavid Greenman if (nsize < vnp->vnp_size) { 361df8bae1dSRodney W. Grimes vm_object_lock(object); 362df8bae1dSRodney W. Grimes vm_object_page_remove(object, 363bbc0ec52SDavid Greenman round_page((vm_offset_t) nsize), vnp->vnp_size); 364bbc0ec52SDavid Greenman vm_object_unlock(object); 365bbc0ec52SDavid Greenman 366bbc0ec52SDavid Greenman /* 367bbc0ec52SDavid Greenman * this gets rid of garbage at the end of a page that is now 368bbc0ec52SDavid Greenman * only partially backed by the vnode... 369bbc0ec52SDavid Greenman */ 370bbc0ec52SDavid Greenman if (nsize & PAGE_MASK) { 371bbc0ec52SDavid Greenman vm_offset_t kva; 372bbc0ec52SDavid Greenman vm_page_t m; 373bbc0ec52SDavid Greenman 374bbc0ec52SDavid Greenman m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize)); 375bbc0ec52SDavid Greenman if (m) { 376bbc0ec52SDavid Greenman kva = vm_pager_map_page(m); 377bbc0ec52SDavid Greenman bzero((caddr_t) kva + (nsize & PAGE_MASK), 378bbc0ec52SDavid Greenman round_page(nsize) - nsize); 379bbc0ec52SDavid Greenman vm_pager_unmap_page(kva); 380bbc0ec52SDavid Greenman } 381bbc0ec52SDavid Greenman } 382bbc0ec52SDavid Greenman } else { 383bbc0ec52SDavid Greenman 384bbc0ec52SDavid Greenman /* 385bbc0ec52SDavid Greenman * this allows the filesystem and VM cache to stay in sync if 386bbc0ec52SDavid Greenman * the VM page hasn't been modified... After the page is 387bbc0ec52SDavid Greenman * removed -- it will be faulted back in from the filesystem 388bbc0ec52SDavid Greenman * cache. 389bbc0ec52SDavid Greenman */ 390bbc0ec52SDavid Greenman if (vnp->vnp_size & PAGE_MASK) { 391bbc0ec52SDavid Greenman vm_page_t m; 392bbc0ec52SDavid Greenman 393bbc0ec52SDavid Greenman m = vm_page_lookup(object, trunc_page(vnp->vnp_size)); 394bbc0ec52SDavid Greenman if (m && (m->flags & PG_CLEAN)) { 395bbc0ec52SDavid Greenman vm_object_lock(object); 396bbc0ec52SDavid Greenman vm_object_page_remove(object, 397bbc0ec52SDavid Greenman vnp->vnp_size, vnp->vnp_size); 398df8bae1dSRodney W. Grimes vm_object_unlock(object); 399df8bae1dSRodney W. Grimes } 400bbc0ec52SDavid Greenman } 401bbc0ec52SDavid Greenman } 402df8bae1dSRodney W. Grimes vnp->vnp_size = (vm_offset_t) nsize; 403bbc0ec52SDavid Greenman object->size = round_page(nsize); 404bbc0ec52SDavid Greenman 405df8bae1dSRodney W. Grimes vm_object_deallocate(object); 406df8bae1dSRodney W. Grimes } 407df8bae1dSRodney W. Grimes 408df8bae1dSRodney W. Grimes void 409df8bae1dSRodney W. Grimes vnode_pager_umount(mp) 410df8bae1dSRodney W. Grimes register struct mount *mp; 411df8bae1dSRodney W. Grimes { 412df8bae1dSRodney W. Grimes register vm_pager_t pager, npager; 413df8bae1dSRodney W. Grimes struct vnode *vp; 414df8bae1dSRodney W. Grimes 41526f9a767SRodney W. Grimes pager = vnode_pager_list.tqh_first; 41626f9a767SRodney W. Grimes while (pager) { 417bbc0ec52SDavid Greenman 418df8bae1dSRodney W. Grimes /* 419bbc0ec52SDavid Greenman * Save the next pointer now since uncaching may terminate the 420bbc0ec52SDavid Greenman * object and render pager invalid 421df8bae1dSRodney W. Grimes */ 422df8bae1dSRodney W. Grimes vp = ((vn_pager_t) pager->pg_data)->vnp_vp; 42326f9a767SRodney W. Grimes npager = pager->pg_list.tqe_next; 42426f9a767SRodney W. Grimes if (mp == (struct mount *) 0 || vp->v_mount == mp) 425df8bae1dSRodney W. Grimes (void) vnode_pager_uncache(vp); 42626f9a767SRodney W. Grimes pager = npager; 427df8bae1dSRodney W. Grimes } 428df8bae1dSRodney W. Grimes } 429df8bae1dSRodney W. Grimes 430df8bae1dSRodney W. Grimes /* 431df8bae1dSRodney W. Grimes * Remove vnode associated object from the object cache. 432df8bae1dSRodney W. Grimes * 43326f9a767SRodney W. Grimes * Note: this routine may be invoked as a result of a pager put 43426f9a767SRodney W. Grimes * operation (possibly at object termination time), so we must be careful. 43526f9a767SRodney W. Grimes */ 43626f9a767SRodney W. Grimes boolean_t 43726f9a767SRodney W. Grimes vnode_pager_uncache(vp) 43826f9a767SRodney W. Grimes register struct vnode *vp; 43926f9a767SRodney W. Grimes { 44026f9a767SRodney W. Grimes register vm_object_t object; 44126f9a767SRodney W. Grimes boolean_t uncached, locked; 44226f9a767SRodney W. Grimes vm_pager_t pager; 44326f9a767SRodney W. Grimes 44426f9a767SRodney W. Grimes /* 44526f9a767SRodney W. Grimes * Not a mapped vnode 44626f9a767SRodney W. Grimes */ 44726f9a767SRodney W. Grimes pager = (vm_pager_t) vp->v_vmdata; 44826f9a767SRodney W. Grimes if (pager == NULL) 44926f9a767SRodney W. Grimes return (TRUE); 450bbc0ec52SDavid Greenman 45126f9a767SRodney W. Grimes /* 452bbc0ec52SDavid Greenman * Unlock the vnode if it is currently locked. We do this since 453bbc0ec52SDavid Greenman * uncaching the object may result in its destruction which may 454bbc0ec52SDavid Greenman * initiate paging activity which may necessitate locking the vnode. 45526f9a767SRodney W. Grimes */ 45626f9a767SRodney W. Grimes locked = VOP_ISLOCKED(vp); 45726f9a767SRodney W. Grimes if (locked) 45826f9a767SRodney W. Grimes VOP_UNLOCK(vp); 459bbc0ec52SDavid Greenman 46026f9a767SRodney W. Grimes /* 461bbc0ec52SDavid Greenman * Must use vm_object_lookup() as it actually removes the object from 462bbc0ec52SDavid Greenman * the cache list. 46326f9a767SRodney W. Grimes */ 46426f9a767SRodney W. Grimes object = vm_object_lookup(pager); 46526f9a767SRodney W. Grimes if (object) { 46626f9a767SRodney W. Grimes uncached = (object->ref_count <= 1); 46726f9a767SRodney W. Grimes pager_cache(object, FALSE); 46826f9a767SRodney W. Grimes } else 46926f9a767SRodney W. Grimes uncached = TRUE; 47026f9a767SRodney W. Grimes if (locked) 47126f9a767SRodney W. Grimes VOP_LOCK(vp); 47226f9a767SRodney W. Grimes return (uncached); 47326f9a767SRodney W. Grimes } 474df8bae1dSRodney W. Grimes 47526f9a767SRodney W. Grimes 47626f9a767SRodney W. Grimes void 47726f9a767SRodney W. Grimes vnode_pager_freepage(m) 47826f9a767SRodney W. Grimes vm_page_t m; 479df8bae1dSRodney W. Grimes { 48026f9a767SRodney W. Grimes PAGE_WAKEUP(m); 48126f9a767SRodney W. Grimes vm_page_free(m); 48226f9a767SRodney W. Grimes } 48326f9a767SRodney W. Grimes 48426f9a767SRodney W. Grimes /* 48526f9a767SRodney W. Grimes * calculate the linear (byte) disk address of specified virtual 48626f9a767SRodney W. Grimes * file address 48726f9a767SRodney W. Grimes */ 48826f9a767SRodney W. Grimes vm_offset_t 48926f9a767SRodney W. Grimes vnode_pager_addr(vp, address) 49026f9a767SRodney W. Grimes struct vnode *vp; 49126f9a767SRodney W. Grimes vm_offset_t address; 49226f9a767SRodney W. Grimes { 49326f9a767SRodney W. Grimes int rtaddress; 49426f9a767SRodney W. Grimes int bsize; 49526f9a767SRodney W. Grimes vm_offset_t block; 49626f9a767SRodney W. Grimes struct vnode *rtvp; 49726f9a767SRodney W. Grimes int err; 49826f9a767SRodney W. Grimes int vblock, voffset; 49926f9a767SRodney W. Grimes 50026f9a767SRodney W. Grimes bsize = vp->v_mount->mnt_stat.f_iosize; 50126f9a767SRodney W. Grimes vblock = address / bsize; 50226f9a767SRodney W. Grimes voffset = address % bsize; 50326f9a767SRodney W. Grimes 50426f9a767SRodney W. Grimes err = VOP_BMAP(vp, vblock, &rtvp, &block, 0); 50526f9a767SRodney W. Grimes 50626f9a767SRodney W. Grimes if (err) 50726f9a767SRodney W. Grimes rtaddress = -1; 50826f9a767SRodney W. Grimes else 50926f9a767SRodney W. Grimes rtaddress = block * DEV_BSIZE + voffset; 51026f9a767SRodney W. Grimes 51126f9a767SRodney W. Grimes return rtaddress; 51226f9a767SRodney W. Grimes } 51326f9a767SRodney W. Grimes 51426f9a767SRodney W. Grimes /* 51526f9a767SRodney W. Grimes * interrupt routine for I/O completion 51626f9a767SRodney W. Grimes */ 51726f9a767SRodney W. Grimes void 51826f9a767SRodney W. Grimes vnode_pager_iodone(bp) 51926f9a767SRodney W. Grimes struct buf *bp; 52026f9a767SRodney W. Grimes { 52126f9a767SRodney W. Grimes bp->b_flags |= B_DONE; 52226f9a767SRodney W. Grimes wakeup((caddr_t) bp); 52326f9a767SRodney W. Grimes } 52426f9a767SRodney W. Grimes 52526f9a767SRodney W. Grimes /* 52626f9a767SRodney W. Grimes * small block file system vnode pager input 52726f9a767SRodney W. Grimes */ 52826f9a767SRodney W. Grimes int 52926f9a767SRodney W. Grimes vnode_pager_input_smlfs(vnp, m) 53026f9a767SRodney W. Grimes vn_pager_t vnp; 53126f9a767SRodney W. Grimes vm_page_t m; 53226f9a767SRodney W. Grimes { 53326f9a767SRodney W. Grimes int i; 53426f9a767SRodney W. Grimes int s; 53526f9a767SRodney W. Grimes vm_offset_t paging_offset; 53626f9a767SRodney W. Grimes struct vnode *dp, *vp; 53726f9a767SRodney W. Grimes struct buf *bp; 53826f9a767SRodney W. Grimes vm_offset_t mapsize; 53926f9a767SRodney W. Grimes vm_offset_t foff; 54026f9a767SRodney W. Grimes vm_offset_t kva; 54126f9a767SRodney W. Grimes int fileaddr; 54226f9a767SRodney W. Grimes int block; 54326f9a767SRodney W. Grimes vm_offset_t bsize; 54426f9a767SRodney W. Grimes int error = 0; 54526f9a767SRodney W. Grimes 54626f9a767SRodney W. Grimes paging_offset = m->object->paging_offset; 54726f9a767SRodney W. Grimes vp = vnp->vnp_vp; 54826f9a767SRodney W. Grimes bsize = vp->v_mount->mnt_stat.f_iosize; 54926f9a767SRodney W. Grimes foff = m->offset + paging_offset; 55026f9a767SRodney W. Grimes 55126f9a767SRodney W. Grimes VOP_BMAP(vp, foff, &dp, 0, 0); 55226f9a767SRodney W. Grimes 55326f9a767SRodney W. Grimes kva = vm_pager_map_page(m); 55426f9a767SRodney W. Grimes 55526f9a767SRodney W. Grimes for (i = 0; i < PAGE_SIZE / bsize; i++) { 556bbc0ec52SDavid Greenman 55726f9a767SRodney W. Grimes /* 55826f9a767SRodney W. Grimes * calculate logical block and offset 55926f9a767SRodney W. Grimes */ 56026f9a767SRodney W. Grimes block = foff / bsize + i; 56126f9a767SRodney W. Grimes s = splbio(); 56226f9a767SRodney W. Grimes while (bp = incore(vp, block)) { 56326f9a767SRodney W. Grimes int amount; 56426f9a767SRodney W. Grimes 56526f9a767SRodney W. Grimes /* 56626f9a767SRodney W. Grimes * wait until the buffer is avail or gone 56726f9a767SRodney W. Grimes */ 56826f9a767SRodney W. Grimes if (bp->b_flags & B_BUSY) { 56926f9a767SRodney W. Grimes bp->b_flags |= B_WANTED; 57026f9a767SRodney W. Grimes tsleep((caddr_t) bp, PVM, "vnwblk", 0); 57126f9a767SRodney W. Grimes continue; 57226f9a767SRodney W. Grimes } 57326f9a767SRodney W. Grimes amount = bsize; 57426f9a767SRodney W. Grimes if ((foff + bsize) > vnp->vnp_size) 57526f9a767SRodney W. Grimes amount = vnp->vnp_size - foff; 57626f9a767SRodney W. Grimes 57726f9a767SRodney W. Grimes /* 57826f9a767SRodney W. Grimes * make sure that this page is in the buffer 57926f9a767SRodney W. Grimes */ 58026f9a767SRodney W. Grimes if ((amount > 0) && amount <= bp->b_bcount) { 58126f9a767SRodney W. Grimes bp->b_flags |= B_BUSY; 58226f9a767SRodney W. Grimes splx(s); 58326f9a767SRodney W. Grimes 58426f9a767SRodney W. Grimes /* 58526f9a767SRodney W. Grimes * copy the data from the buffer 58626f9a767SRodney W. Grimes */ 58726f9a767SRodney W. Grimes bcopy(bp->b_un.b_addr, (caddr_t) kva + i * bsize, amount); 58826f9a767SRodney W. Grimes if (amount < bsize) { 58926f9a767SRodney W. Grimes bzero((caddr_t) kva + amount, bsize - amount); 59026f9a767SRodney W. Grimes } 59126f9a767SRodney W. Grimes bp->b_flags &= ~B_BUSY; 59226f9a767SRodney W. Grimes wakeup((caddr_t) bp); 59326f9a767SRodney W. Grimes goto nextblock; 59426f9a767SRodney W. Grimes } 59526f9a767SRodney W. Grimes break; 59626f9a767SRodney W. Grimes } 59726f9a767SRodney W. Grimes splx(s); 59826f9a767SRodney W. Grimes fileaddr = vnode_pager_addr(vp, foff + i * bsize); 59926f9a767SRodney W. Grimes if (fileaddr != -1) { 60026f9a767SRodney W. Grimes bp = getpbuf(); 60126f9a767SRodney W. Grimes VHOLD(vp); 60226f9a767SRodney W. Grimes 60326f9a767SRodney W. Grimes /* build a minimal buffer header */ 60426f9a767SRodney W. Grimes bp->b_flags = B_BUSY | B_READ | B_CALL; 60526f9a767SRodney W. Grimes bp->b_iodone = vnode_pager_iodone; 60626f9a767SRodney W. Grimes bp->b_proc = curproc; 60726f9a767SRodney W. Grimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 60826f9a767SRodney W. Grimes if (bp->b_rcred != NOCRED) 60926f9a767SRodney W. Grimes crhold(bp->b_rcred); 61026f9a767SRodney W. Grimes if (bp->b_wcred != NOCRED) 61126f9a767SRodney W. Grimes crhold(bp->b_wcred); 61226f9a767SRodney W. Grimes bp->b_un.b_addr = (caddr_t) kva + i * bsize; 61326f9a767SRodney W. Grimes bp->b_blkno = fileaddr / DEV_BSIZE; 61426f9a767SRodney W. Grimes bgetvp(dp, bp); 61526f9a767SRodney W. Grimes bp->b_bcount = bsize; 61626f9a767SRodney W. Grimes bp->b_bufsize = bsize; 61726f9a767SRodney W. Grimes 61826f9a767SRodney W. Grimes /* do the input */ 61926f9a767SRodney W. Grimes VOP_STRATEGY(bp); 62026f9a767SRodney W. Grimes 62126f9a767SRodney W. Grimes /* we definitely need to be at splbio here */ 62226f9a767SRodney W. Grimes 62326f9a767SRodney W. Grimes s = splbio(); 62426f9a767SRodney W. Grimes while ((bp->b_flags & B_DONE) == 0) { 62526f9a767SRodney W. Grimes tsleep((caddr_t) bp, PVM, "vnsrd", 0); 62626f9a767SRodney W. Grimes } 62726f9a767SRodney W. Grimes splx(s); 62826f9a767SRodney W. Grimes if ((bp->b_flags & B_ERROR) != 0) 62926f9a767SRodney W. Grimes error = EIO; 63026f9a767SRodney W. Grimes 63126f9a767SRodney W. Grimes /* 63226f9a767SRodney W. Grimes * free the buffer header back to the swap buffer pool 63326f9a767SRodney W. Grimes */ 63426f9a767SRodney W. Grimes relpbuf(bp); 63526f9a767SRodney W. Grimes HOLDRELE(vp); 63626f9a767SRodney W. Grimes if (error) 63726f9a767SRodney W. Grimes break; 63826f9a767SRodney W. Grimes } else { 63926f9a767SRodney W. Grimes bzero((caddr_t) kva + i * bsize, bsize); 64026f9a767SRodney W. Grimes } 64126f9a767SRodney W. Grimes nextblock: 64226f9a767SRodney W. Grimes } 64326f9a767SRodney W. Grimes vm_pager_unmap_page(kva); 64426f9a767SRodney W. Grimes if (error) { 64526f9a767SRodney W. Grimes return VM_PAGER_FAIL; 64626f9a767SRodney W. Grimes } 64726f9a767SRodney W. Grimes pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 64826f9a767SRodney W. Grimes m->flags |= PG_CLEAN; 64926f9a767SRodney W. Grimes m->flags &= ~PG_LAUNDRY; 65026f9a767SRodney W. Grimes return VM_PAGER_OK; 65126f9a767SRodney W. Grimes 65226f9a767SRodney W. Grimes } 65326f9a767SRodney W. Grimes 65426f9a767SRodney W. Grimes 65526f9a767SRodney W. Grimes /* 65626f9a767SRodney W. Grimes * old style vnode pager output routine 65726f9a767SRodney W. Grimes */ 65826f9a767SRodney W. Grimes int 65926f9a767SRodney W. Grimes vnode_pager_input_old(vnp, m) 66026f9a767SRodney W. Grimes vn_pager_t vnp; 66126f9a767SRodney W. Grimes vm_page_t m; 66226f9a767SRodney W. Grimes { 66326f9a767SRodney W. Grimes int i; 664df8bae1dSRodney W. Grimes struct uio auio; 665df8bae1dSRodney W. Grimes struct iovec aiov; 66626f9a767SRodney W. Grimes int error; 66726f9a767SRodney W. Grimes int size; 66826f9a767SRodney W. Grimes vm_offset_t foff; 66926f9a767SRodney W. Grimes vm_offset_t kva; 670df8bae1dSRodney W. Grimes 67126f9a767SRodney W. Grimes error = 0; 672df8bae1dSRodney W. Grimes foff = m->offset + m->object->paging_offset; 673bbc0ec52SDavid Greenman 674df8bae1dSRodney W. Grimes /* 67526f9a767SRodney W. Grimes * Return failure if beyond current EOF 67626f9a767SRodney W. Grimes */ 67726f9a767SRodney W. Grimes if (foff >= vnp->vnp_size) { 67826f9a767SRodney W. Grimes return VM_PAGER_BAD; 67926f9a767SRodney W. Grimes } else { 68026f9a767SRodney W. Grimes size = PAGE_SIZE; 68126f9a767SRodney W. Grimes if (foff + size > vnp->vnp_size) 68226f9a767SRodney W. Grimes size = vnp->vnp_size - foff; 68326f9a767SRodney W. Grimes /* 684df8bae1dSRodney W. Grimes * Allocate a kernel virtual address and initialize so that 685df8bae1dSRodney W. Grimes * we can use VOP_READ/WRITE routines. 686df8bae1dSRodney W. Grimes */ 68726f9a767SRodney W. Grimes kva = vm_pager_map_page(m); 688df8bae1dSRodney W. Grimes aiov.iov_base = (caddr_t) kva; 689df8bae1dSRodney W. Grimes aiov.iov_len = size; 690df8bae1dSRodney W. Grimes auio.uio_iov = &aiov; 691df8bae1dSRodney W. Grimes auio.uio_iovcnt = 1; 692df8bae1dSRodney W. Grimes auio.uio_offset = foff; 693df8bae1dSRodney W. Grimes auio.uio_segflg = UIO_SYSSPACE; 69426f9a767SRodney W. Grimes auio.uio_rw = UIO_READ; 695df8bae1dSRodney W. Grimes auio.uio_resid = size; 696df8bae1dSRodney W. Grimes auio.uio_procp = (struct proc *) 0; 69726f9a767SRodney W. Grimes 69826f9a767SRodney W. Grimes error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred); 699df8bae1dSRodney W. Grimes if (!error) { 700df8bae1dSRodney W. Grimes register int count = size - auio.uio_resid; 701df8bae1dSRodney W. Grimes 702df8bae1dSRodney W. Grimes if (count == 0) 703df8bae1dSRodney W. Grimes error = EINVAL; 70426f9a767SRodney W. Grimes else if (count != PAGE_SIZE) 70526f9a767SRodney W. Grimes bzero((caddr_t) kva + count, PAGE_SIZE - count); 706df8bae1dSRodney W. Grimes } 70726f9a767SRodney W. Grimes vm_pager_unmap_page(kva); 708df8bae1dSRodney W. Grimes } 70926f9a767SRodney W. Grimes pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 71026f9a767SRodney W. Grimes m->flags |= PG_CLEAN; 71126f9a767SRodney W. Grimes m->flags &= ~PG_LAUNDRY; 71226f9a767SRodney W. Grimes return error ? VM_PAGER_FAIL : VM_PAGER_OK; 71326f9a767SRodney W. Grimes } 71426f9a767SRodney W. Grimes 71526f9a767SRodney W. Grimes /* 71626f9a767SRodney W. Grimes * generic vnode pager input routine 71726f9a767SRodney W. Grimes */ 71826f9a767SRodney W. Grimes int 71926f9a767SRodney W. Grimes vnode_pager_input(vnp, m, count, reqpage) 72026f9a767SRodney W. Grimes register vn_pager_t vnp; 72126f9a767SRodney W. Grimes vm_page_t *m; 72226f9a767SRodney W. Grimes int count, reqpage; 72326f9a767SRodney W. Grimes { 72426f9a767SRodney W. Grimes int i, j; 72526f9a767SRodney W. Grimes vm_offset_t kva, foff; 72626f9a767SRodney W. Grimes int size; 72726f9a767SRodney W. Grimes struct proc *p = curproc; /* XXX */ 72826f9a767SRodney W. Grimes vm_object_t object; 72926f9a767SRodney W. Grimes vm_offset_t paging_offset; 73026f9a767SRodney W. Grimes struct vnode *dp, *vp; 73126f9a767SRodney W. Grimes vm_offset_t mapsize; 73226f9a767SRodney W. Grimes int bsize; 73326f9a767SRodney W. Grimes 73426f9a767SRodney W. Grimes int first, last; 73526f9a767SRodney W. Grimes int reqaddr, firstaddr; 73626f9a767SRodney W. Grimes int block, offset; 73726f9a767SRodney W. Grimes 73826f9a767SRodney W. Grimes int nbp; 73926f9a767SRodney W. Grimes struct buf *bp; 74026f9a767SRodney W. Grimes int s; 74126f9a767SRodney W. Grimes int failflag; 74226f9a767SRodney W. Grimes 74326f9a767SRodney W. Grimes int errtype = 0; /* 0 is file type otherwise vm type */ 74426f9a767SRodney W. Grimes int error = 0; 74526f9a767SRodney W. Grimes 746bbc0ec52SDavid Greenman object = m[reqpage]->object; /* all vm_page_t items are in same 747bbc0ec52SDavid Greenman * object */ 74826f9a767SRodney W. Grimes paging_offset = object->paging_offset; 74926f9a767SRodney W. Grimes 75026f9a767SRodney W. Grimes vp = vnp->vnp_vp; 75126f9a767SRodney W. Grimes bsize = vp->v_mount->mnt_stat.f_iosize; 75226f9a767SRodney W. Grimes 75326f9a767SRodney W. Grimes /* get the UNDERLYING device for the file with VOP_BMAP() */ 754bbc0ec52SDavid Greenman 75526f9a767SRodney W. Grimes /* 756bbc0ec52SDavid Greenman * originally, we did not check for an error return value -- assuming 757bbc0ec52SDavid Greenman * an fs always has a bmap entry point -- that assumption is wrong!!! 75826f9a767SRodney W. Grimes */ 75926f9a767SRodney W. Grimes kva = 0; 76026f9a767SRodney W. Grimes mapsize = 0; 76126f9a767SRodney W. Grimes foff = m[reqpage]->offset + paging_offset; 76226f9a767SRodney W. Grimes if (!VOP_BMAP(vp, foff, &dp, 0, 0)) { 763bbc0ec52SDavid Greenman 76426f9a767SRodney W. Grimes /* 76526f9a767SRodney W. Grimes * we do not block for a kva, notice we default to a kva 76626f9a767SRodney W. Grimes * conservative behavior 76726f9a767SRodney W. Grimes */ 76826f9a767SRodney W. Grimes kva = kmem_alloc_pageable(pager_map, (mapsize = count * PAGE_SIZE)); 76926f9a767SRodney W. Grimes if (!kva) { 77026f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 77126f9a767SRodney W. Grimes if (i != reqpage) { 77226f9a767SRodney W. Grimes vnode_pager_freepage(m[i]); 77326f9a767SRodney W. Grimes } 77426f9a767SRodney W. Grimes } 77526f9a767SRodney W. Grimes m[0] = m[reqpage]; 77626f9a767SRodney W. Grimes kva = kmem_alloc_wait(pager_map, mapsize = PAGE_SIZE); 77726f9a767SRodney W. Grimes reqpage = 0; 77826f9a767SRodney W. Grimes count = 1; 77926f9a767SRodney W. Grimes } 78026f9a767SRodney W. Grimes } 78126f9a767SRodney W. Grimes 78226f9a767SRodney W. Grimes /* 78326f9a767SRodney W. Grimes * if we can't get a kva or we can't bmap, use old VOP code 78426f9a767SRodney W. Grimes */ 78526f9a767SRodney W. Grimes if (!kva) { 78626f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 78726f9a767SRodney W. Grimes if (i != reqpage) { 78826f9a767SRodney W. Grimes vnode_pager_freepage(m[i]); 78926f9a767SRodney W. Grimes } 79026f9a767SRodney W. Grimes } 79126f9a767SRodney W. Grimes return vnode_pager_input_old(vnp, m[reqpage]); 792bbc0ec52SDavid Greenman 79326f9a767SRodney W. Grimes /* 79426f9a767SRodney W. Grimes * if the blocksize is smaller than a page size, then use 79526f9a767SRodney W. Grimes * special small filesystem code. NFS sometimes has a small 79626f9a767SRodney W. Grimes * blocksize, but it can handle large reads itself. 79726f9a767SRodney W. Grimes */ 79826f9a767SRodney W. Grimes } else if ((PAGE_SIZE / bsize) > 1 && 79926f9a767SRodney W. Grimes (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 80026f9a767SRodney W. Grimes 80126f9a767SRodney W. Grimes kmem_free_wakeup(pager_map, kva, mapsize); 80226f9a767SRodney W. Grimes 80326f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 80426f9a767SRodney W. Grimes if (i != reqpage) { 80526f9a767SRodney W. Grimes vnode_pager_freepage(m[i]); 80626f9a767SRodney W. Grimes } 80726f9a767SRodney W. Grimes } 80826f9a767SRodney W. Grimes return vnode_pager_input_smlfs(vnp, m[reqpage]); 80926f9a767SRodney W. Grimes } 81026f9a767SRodney W. Grimes /* 81126f9a767SRodney W. Grimes * here on direct device I/O 81226f9a767SRodney W. Grimes */ 81326f9a767SRodney W. Grimes 81426f9a767SRodney W. Grimes 81526f9a767SRodney W. Grimes /* 81626f9a767SRodney W. Grimes * This pathetic hack gets data from the buffer cache, if it's there. 817bbc0ec52SDavid Greenman * I believe that this is not really necessary, and the ends can be 818bbc0ec52SDavid Greenman * gotten by defaulting to the normal vfs read behavior, but this 81926f9a767SRodney W. Grimes * might be more efficient, because the will NOT invoke read-aheads 820bbc0ec52SDavid Greenman * and one of the purposes of this code is to bypass the buffer cache 821bbc0ec52SDavid Greenman * and keep from flushing it by reading in a program. 82226f9a767SRodney W. Grimes */ 823bbc0ec52SDavid Greenman 82426f9a767SRodney W. Grimes /* 82526f9a767SRodney W. Grimes * calculate logical block and offset 82626f9a767SRodney W. Grimes */ 82726f9a767SRodney W. Grimes block = foff / bsize; 82826f9a767SRodney W. Grimes offset = foff % bsize; 82926f9a767SRodney W. Grimes s = splbio(); 83026f9a767SRodney W. Grimes 83126f9a767SRodney W. Grimes /* 83226f9a767SRodney W. Grimes * if we have a buffer in core, then try to use it 83326f9a767SRodney W. Grimes */ 83426f9a767SRodney W. Grimes while (bp = incore(vp, block)) { 83526f9a767SRodney W. Grimes int amount; 83626f9a767SRodney W. Grimes 83726f9a767SRodney W. Grimes /* 83826f9a767SRodney W. Grimes * wait until the buffer is avail or gone 83926f9a767SRodney W. Grimes */ 84026f9a767SRodney W. Grimes if (bp->b_flags & B_BUSY) { 84126f9a767SRodney W. Grimes bp->b_flags |= B_WANTED; 84226f9a767SRodney W. Grimes tsleep((caddr_t) bp, PVM, "vnwblk", 0); 84326f9a767SRodney W. Grimes continue; 84426f9a767SRodney W. Grimes } 84526f9a767SRodney W. Grimes amount = PAGE_SIZE; 84626f9a767SRodney W. Grimes if ((foff + amount) > vnp->vnp_size) 84726f9a767SRodney W. Grimes amount = vnp->vnp_size - foff; 84826f9a767SRodney W. Grimes 84926f9a767SRodney W. Grimes /* 85026f9a767SRodney W. Grimes * make sure that this page is in the buffer 85126f9a767SRodney W. Grimes */ 85226f9a767SRodney W. Grimes if ((amount > 0) && (offset + amount) <= bp->b_bcount) { 85326f9a767SRodney W. Grimes bp->b_flags |= B_BUSY; 85426f9a767SRodney W. Grimes splx(s); 85526f9a767SRodney W. Grimes 85626f9a767SRodney W. Grimes /* 85726f9a767SRodney W. Grimes * map the requested page 85826f9a767SRodney W. Grimes */ 85926f9a767SRodney W. Grimes pmap_kenter(kva, VM_PAGE_TO_PHYS(m[reqpage])); 86026f9a767SRodney W. Grimes pmap_update(); 86126f9a767SRodney W. Grimes 86226f9a767SRodney W. Grimes /* 86326f9a767SRodney W. Grimes * copy the data from the buffer 86426f9a767SRodney W. Grimes */ 86526f9a767SRodney W. Grimes bcopy(bp->b_un.b_addr + offset, (caddr_t) kva, amount); 86626f9a767SRodney W. Grimes if (amount < PAGE_SIZE) { 86726f9a767SRodney W. Grimes bzero((caddr_t) kva + amount, PAGE_SIZE - amount); 86826f9a767SRodney W. Grimes } 869bbc0ec52SDavid Greenman 87026f9a767SRodney W. Grimes /* 87126f9a767SRodney W. Grimes * unmap the page and free the kva 87226f9a767SRodney W. Grimes */ 87326f9a767SRodney W. Grimes pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE); 87426f9a767SRodney W. Grimes kmem_free_wakeup(pager_map, kva, mapsize); 875bbc0ec52SDavid Greenman 87626f9a767SRodney W. Grimes /* 87726f9a767SRodney W. Grimes * release the buffer back to the block subsystem 87826f9a767SRodney W. Grimes */ 87926f9a767SRodney W. Grimes bp->b_flags &= ~B_BUSY; 88026f9a767SRodney W. Grimes wakeup((caddr_t) bp); 881bbc0ec52SDavid Greenman 88226f9a767SRodney W. Grimes /* 88326f9a767SRodney W. Grimes * we did not have to do any work to get the requested 88426f9a767SRodney W. Grimes * page, the read behind/ahead does not justify a read 88526f9a767SRodney W. Grimes */ 88626f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 88726f9a767SRodney W. Grimes if (i != reqpage) { 88826f9a767SRodney W. Grimes vnode_pager_freepage(m[i]); 88926f9a767SRodney W. Grimes } 89026f9a767SRodney W. Grimes } 89126f9a767SRodney W. Grimes count = 1; 89226f9a767SRodney W. Grimes reqpage = 0; 89326f9a767SRodney W. Grimes m[0] = m[reqpage]; 89426f9a767SRodney W. Grimes 89526f9a767SRodney W. Grimes /* 89626f9a767SRodney W. Grimes * sorry for the goto 89726f9a767SRodney W. Grimes */ 89826f9a767SRodney W. Grimes goto finishup; 89926f9a767SRodney W. Grimes } 900bbc0ec52SDavid Greenman 90126f9a767SRodney W. Grimes /* 90226f9a767SRodney W. Grimes * buffer is nowhere to be found, read from the disk 90326f9a767SRodney W. Grimes */ 90426f9a767SRodney W. Grimes break; 90526f9a767SRodney W. Grimes } 90626f9a767SRodney W. Grimes splx(s); 90726f9a767SRodney W. Grimes 90826f9a767SRodney W. Grimes reqaddr = vnode_pager_addr(vp, foff); 90926f9a767SRodney W. Grimes s = splbio(); 910bbc0ec52SDavid Greenman 91126f9a767SRodney W. Grimes /* 912bbc0ec52SDavid Greenman * Make sure that our I/O request is contiguous. Scan backward and 913bbc0ec52SDavid Greenman * stop for the first discontiguous entry or stop for a page being in 914bbc0ec52SDavid Greenman * buffer cache. 91526f9a767SRodney W. Grimes */ 91626f9a767SRodney W. Grimes failflag = 0; 91726f9a767SRodney W. Grimes first = reqpage; 91826f9a767SRodney W. Grimes for (i = reqpage - 1; i >= 0; --i) { 91926f9a767SRodney W. Grimes if (failflag || 92026f9a767SRodney W. Grimes incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize) || 92126f9a767SRodney W. Grimes (vnode_pager_addr(vp, m[i]->offset + paging_offset)) 92226f9a767SRodney W. Grimes != reqaddr + (i - reqpage) * PAGE_SIZE) { 92326f9a767SRodney W. Grimes vnode_pager_freepage(m[i]); 92426f9a767SRodney W. Grimes failflag = 1; 92526f9a767SRodney W. Grimes } else { 92626f9a767SRodney W. Grimes first = i; 92726f9a767SRodney W. Grimes } 92826f9a767SRodney W. Grimes } 92926f9a767SRodney W. Grimes 93026f9a767SRodney W. Grimes /* 931bbc0ec52SDavid Greenman * Scan forward and stop for the first non-contiguous entry or stop 932bbc0ec52SDavid Greenman * for a page being in buffer cache. 93326f9a767SRodney W. Grimes */ 93426f9a767SRodney W. Grimes failflag = 0; 93526f9a767SRodney W. Grimes last = reqpage + 1; 93626f9a767SRodney W. Grimes for (i = reqpage + 1; i < count; i++) { 93726f9a767SRodney W. Grimes if (failflag || 93826f9a767SRodney W. Grimes incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize) || 93926f9a767SRodney W. Grimes (vnode_pager_addr(vp, m[i]->offset + paging_offset)) 94026f9a767SRodney W. Grimes != reqaddr + (i - reqpage) * PAGE_SIZE) { 94126f9a767SRodney W. Grimes vnode_pager_freepage(m[i]); 94226f9a767SRodney W. Grimes failflag = 1; 94326f9a767SRodney W. Grimes } else { 94426f9a767SRodney W. Grimes last = i + 1; 94526f9a767SRodney W. Grimes } 94626f9a767SRodney W. Grimes } 94726f9a767SRodney W. Grimes splx(s); 94826f9a767SRodney W. Grimes 94926f9a767SRodney W. Grimes /* 950bbc0ec52SDavid Greenman * the first and last page have been calculated now, move input pages 951bbc0ec52SDavid Greenman * to be zero based... 95226f9a767SRodney W. Grimes */ 95326f9a767SRodney W. Grimes count = last; 95426f9a767SRodney W. Grimes if (first != 0) { 95526f9a767SRodney W. Grimes for (i = first; i < count; i++) { 95626f9a767SRodney W. Grimes m[i - first] = m[i]; 95726f9a767SRodney W. Grimes } 95826f9a767SRodney W. Grimes count -= first; 95926f9a767SRodney W. Grimes reqpage -= first; 96026f9a767SRodney W. Grimes } 96126f9a767SRodney W. Grimes 96226f9a767SRodney W. Grimes /* 96326f9a767SRodney W. Grimes * calculate the file virtual address for the transfer 96426f9a767SRodney W. Grimes */ 96526f9a767SRodney W. Grimes foff = m[0]->offset + paging_offset; 966bbc0ec52SDavid Greenman 96726f9a767SRodney W. Grimes /* 96826f9a767SRodney W. Grimes * and get the disk physical address (in bytes) 96926f9a767SRodney W. Grimes */ 97026f9a767SRodney W. Grimes firstaddr = vnode_pager_addr(vp, foff); 97126f9a767SRodney W. Grimes 97226f9a767SRodney W. Grimes /* 97326f9a767SRodney W. Grimes * calculate the size of the transfer 97426f9a767SRodney W. Grimes */ 97526f9a767SRodney W. Grimes size = count * PAGE_SIZE; 97626f9a767SRodney W. Grimes if ((foff + size) > vnp->vnp_size) 97726f9a767SRodney W. Grimes size = vnp->vnp_size - foff; 97826f9a767SRodney W. Grimes 97926f9a767SRodney W. Grimes /* 98026f9a767SRodney W. Grimes * round up physical size for real devices 98126f9a767SRodney W. Grimes */ 98226f9a767SRodney W. Grimes if (dp->v_type == VBLK || dp->v_type == VCHR) 98326f9a767SRodney W. Grimes size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 98426f9a767SRodney W. Grimes 98526f9a767SRodney W. Grimes /* 98626f9a767SRodney W. Grimes * and map the pages to be read into the kva 98726f9a767SRodney W. Grimes */ 98826f9a767SRodney W. Grimes for (i = 0; i < count; i++) 98926f9a767SRodney W. Grimes pmap_kenter(kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i])); 99026f9a767SRodney W. Grimes 99126f9a767SRodney W. Grimes pmap_update(); 99226f9a767SRodney W. Grimes bp = getpbuf(); 99326f9a767SRodney W. Grimes VHOLD(vp); 99426f9a767SRodney W. Grimes 99526f9a767SRodney W. Grimes /* build a minimal buffer header */ 99626f9a767SRodney W. Grimes bp->b_flags = B_BUSY | B_READ | B_CALL; 99726f9a767SRodney W. Grimes bp->b_iodone = vnode_pager_iodone; 99826f9a767SRodney W. Grimes /* B_PHYS is not set, but it is nice to fill this in */ 99926f9a767SRodney W. Grimes bp->b_proc = curproc; 100026f9a767SRodney W. Grimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 100126f9a767SRodney W. Grimes if (bp->b_rcred != NOCRED) 100226f9a767SRodney W. Grimes crhold(bp->b_rcred); 100326f9a767SRodney W. Grimes if (bp->b_wcred != NOCRED) 100426f9a767SRodney W. Grimes crhold(bp->b_wcred); 100526f9a767SRodney W. Grimes bp->b_un.b_addr = (caddr_t) kva; 100626f9a767SRodney W. Grimes bp->b_blkno = firstaddr / DEV_BSIZE; 100726f9a767SRodney W. Grimes bgetvp(dp, bp); 100826f9a767SRodney W. Grimes bp->b_bcount = size; 100926f9a767SRodney W. Grimes bp->b_bufsize = size; 101026f9a767SRodney W. Grimes 101126f9a767SRodney W. Grimes /* do the input */ 101226f9a767SRodney W. Grimes VOP_STRATEGY(bp); 101326f9a767SRodney W. Grimes 101426f9a767SRodney W. Grimes s = splbio(); 101526f9a767SRodney W. Grimes /* we definitely need to be at splbio here */ 101626f9a767SRodney W. Grimes 101726f9a767SRodney W. Grimes while ((bp->b_flags & B_DONE) == 0) { 101826f9a767SRodney W. Grimes tsleep((caddr_t) bp, PVM, "vnread", 0); 101926f9a767SRodney W. Grimes } 102026f9a767SRodney W. Grimes splx(s); 102126f9a767SRodney W. Grimes if ((bp->b_flags & B_ERROR) != 0) 102226f9a767SRodney W. Grimes error = EIO; 102326f9a767SRodney W. Grimes 102426f9a767SRodney W. Grimes if (!error) { 102526f9a767SRodney W. Grimes if (size != count * PAGE_SIZE) 102626f9a767SRodney W. Grimes bzero((caddr_t) kva + size, PAGE_SIZE * count - size); 102726f9a767SRodney W. Grimes } 102826f9a767SRodney W. Grimes pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE * count); 102926f9a767SRodney W. Grimes kmem_free_wakeup(pager_map, kva, mapsize); 103026f9a767SRodney W. Grimes 103126f9a767SRodney W. Grimes /* 103226f9a767SRodney W. Grimes * free the buffer header back to the swap buffer pool 103326f9a767SRodney W. Grimes */ 103426f9a767SRodney W. Grimes relpbuf(bp); 103526f9a767SRodney W. Grimes HOLDRELE(vp); 103626f9a767SRodney W. Grimes 103726f9a767SRodney W. Grimes finishup: 103826f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 103926f9a767SRodney W. Grimes pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 104026f9a767SRodney W. Grimes m[i]->flags |= PG_CLEAN; 104126f9a767SRodney W. Grimes m[i]->flags &= ~PG_LAUNDRY; 104226f9a767SRodney W. Grimes if (i != reqpage) { 1043bbc0ec52SDavid Greenman 104426f9a767SRodney W. Grimes /* 1045bbc0ec52SDavid Greenman * whether or not to leave the page activated is up in 1046bbc0ec52SDavid Greenman * the air, but we should put the page on a page queue 1047bbc0ec52SDavid Greenman * somewhere. (it already is in the object). Result: 1048bbc0ec52SDavid Greenman * It appears that emperical results show that 1049bbc0ec52SDavid Greenman * deactivating pages is best. 105026f9a767SRodney W. Grimes */ 1051bbc0ec52SDavid Greenman 105226f9a767SRodney W. Grimes /* 1053bbc0ec52SDavid Greenman * just in case someone was asking for this page we 1054bbc0ec52SDavid Greenman * now tell them that it is ok to use 105526f9a767SRodney W. Grimes */ 105626f9a767SRodney W. Grimes if (!error) { 105726f9a767SRodney W. Grimes vm_page_deactivate(m[i]); 105826f9a767SRodney W. Grimes PAGE_WAKEUP(m[i]); 105926f9a767SRodney W. Grimes m[i]->flags &= ~PG_FAKE; 106026f9a767SRodney W. Grimes } else { 106126f9a767SRodney W. Grimes vnode_pager_freepage(m[i]); 106226f9a767SRodney W. Grimes } 106326f9a767SRodney W. Grimes } 106426f9a767SRodney W. Grimes } 106526f9a767SRodney W. Grimes if (error) { 106626f9a767SRodney W. Grimes printf("vnode pager read error: %d\n", error); 106726f9a767SRodney W. Grimes } 106826f9a767SRodney W. Grimes if (errtype) 106926f9a767SRodney W. Grimes return error; 107026f9a767SRodney W. Grimes return (error ? VM_PAGER_FAIL : VM_PAGER_OK); 107126f9a767SRodney W. Grimes } 107226f9a767SRodney W. Grimes 107326f9a767SRodney W. Grimes /* 107426f9a767SRodney W. Grimes * old-style vnode pager output routine 107526f9a767SRodney W. Grimes */ 107626f9a767SRodney W. Grimes int 107726f9a767SRodney W. Grimes vnode_pager_output_old(vnp, m) 107826f9a767SRodney W. Grimes register vn_pager_t vnp; 107926f9a767SRodney W. Grimes vm_page_t m; 108026f9a767SRodney W. Grimes { 108126f9a767SRodney W. Grimes vm_offset_t foff; 108226f9a767SRodney W. Grimes vm_offset_t kva; 108326f9a767SRodney W. Grimes vm_offset_t size; 108426f9a767SRodney W. Grimes struct iovec aiov; 108526f9a767SRodney W. Grimes struct uio auio; 108626f9a767SRodney W. Grimes struct vnode *vp; 108726f9a767SRodney W. Grimes int error; 108826f9a767SRodney W. Grimes 108926f9a767SRodney W. Grimes vp = vnp->vnp_vp; 109026f9a767SRodney W. Grimes foff = m->offset + m->object->paging_offset; 1091bbc0ec52SDavid Greenman 109226f9a767SRodney W. Grimes /* 109326f9a767SRodney W. Grimes * Return failure if beyond current EOF 109426f9a767SRodney W. Grimes */ 109526f9a767SRodney W. Grimes if (foff >= vnp->vnp_size) { 109626f9a767SRodney W. Grimes return VM_PAGER_BAD; 109726f9a767SRodney W. Grimes } else { 109826f9a767SRodney W. Grimes size = PAGE_SIZE; 109926f9a767SRodney W. Grimes if (foff + size > vnp->vnp_size) 110026f9a767SRodney W. Grimes size = vnp->vnp_size - foff; 110126f9a767SRodney W. Grimes /* 110226f9a767SRodney W. Grimes * Allocate a kernel virtual address and initialize so that 110326f9a767SRodney W. Grimes * we can use VOP_WRITE routines. 110426f9a767SRodney W. Grimes */ 110526f9a767SRodney W. Grimes kva = vm_pager_map_page(m); 110626f9a767SRodney W. Grimes aiov.iov_base = (caddr_t) kva; 110726f9a767SRodney W. Grimes aiov.iov_len = size; 110826f9a767SRodney W. Grimes auio.uio_iov = &aiov; 110926f9a767SRodney W. Grimes auio.uio_iovcnt = 1; 111026f9a767SRodney W. Grimes auio.uio_offset = foff; 111126f9a767SRodney W. Grimes auio.uio_segflg = UIO_SYSSPACE; 111226f9a767SRodney W. Grimes auio.uio_rw = UIO_WRITE; 111326f9a767SRodney W. Grimes auio.uio_resid = size; 111426f9a767SRodney W. Grimes auio.uio_procp = (struct proc *) 0; 111526f9a767SRodney W. Grimes 111626f9a767SRodney W. Grimes error = VOP_WRITE(vp, &auio, 0, curproc->p_ucred); 111726f9a767SRodney W. Grimes 111826f9a767SRodney W. Grimes if (!error) { 111926f9a767SRodney W. Grimes if ((size - auio.uio_resid) == 0) { 112026f9a767SRodney W. Grimes error = EINVAL; 112126f9a767SRodney W. Grimes } 112226f9a767SRodney W. Grimes } 112326f9a767SRodney W. Grimes vm_pager_unmap_page(kva); 112426f9a767SRodney W. Grimes return error ? VM_PAGER_FAIL : VM_PAGER_OK; 112526f9a767SRodney W. Grimes } 112626f9a767SRodney W. Grimes } 112726f9a767SRodney W. Grimes 112826f9a767SRodney W. Grimes /* 112926f9a767SRodney W. Grimes * vnode pager output on a small-block file system 113026f9a767SRodney W. Grimes */ 113126f9a767SRodney W. Grimes int 113226f9a767SRodney W. Grimes vnode_pager_output_smlfs(vnp, m) 113326f9a767SRodney W. Grimes vn_pager_t vnp; 113426f9a767SRodney W. Grimes vm_page_t m; 113526f9a767SRodney W. Grimes { 113626f9a767SRodney W. Grimes int i; 113726f9a767SRodney W. Grimes int s; 113826f9a767SRodney W. Grimes vm_offset_t paging_offset; 113926f9a767SRodney W. Grimes struct vnode *dp, *vp; 114026f9a767SRodney W. Grimes struct buf *bp; 114126f9a767SRodney W. Grimes vm_offset_t mapsize; 114226f9a767SRodney W. Grimes vm_offset_t foff; 114326f9a767SRodney W. Grimes vm_offset_t kva; 114426f9a767SRodney W. Grimes int fileaddr; 114526f9a767SRodney W. Grimes int block; 114626f9a767SRodney W. Grimes vm_offset_t bsize; 114726f9a767SRodney W. Grimes int error = 0; 114826f9a767SRodney W. Grimes 114926f9a767SRodney W. Grimes paging_offset = m->object->paging_offset; 115026f9a767SRodney W. Grimes vp = vnp->vnp_vp; 115126f9a767SRodney W. Grimes bsize = vp->v_mount->mnt_stat.f_iosize; 115226f9a767SRodney W. Grimes foff = m->offset + paging_offset; 115326f9a767SRodney W. Grimes 115426f9a767SRodney W. Grimes VOP_BMAP(vp, foff, &dp, 0, 0); 115526f9a767SRodney W. Grimes kva = vm_pager_map_page(m); 115626f9a767SRodney W. Grimes for (i = 0; !error && i < (PAGE_SIZE / bsize); i++) { 1157bbc0ec52SDavid Greenman 115826f9a767SRodney W. Grimes /* 115926f9a767SRodney W. Grimes * calculate logical block and offset 116026f9a767SRodney W. Grimes */ 116126f9a767SRodney W. Grimes fileaddr = vnode_pager_addr(vp, foff + i * bsize); 116226f9a767SRodney W. Grimes if (fileaddr != -1) { 116326f9a767SRodney W. Grimes s = splbio(); 116426f9a767SRodney W. Grimes if (bp = incore(vp, (foff / bsize) + i)) { 116526f9a767SRodney W. Grimes bp = getblk(vp, (foff / bsize) + i, bp->b_bufsize, 0, 0); 116626f9a767SRodney W. Grimes bp->b_flags |= B_INVAL; 116726f9a767SRodney W. Grimes brelse(bp); 116826f9a767SRodney W. Grimes } 116926f9a767SRodney W. Grimes splx(s); 117026f9a767SRodney W. Grimes 117126f9a767SRodney W. Grimes bp = getpbuf(); 117226f9a767SRodney W. Grimes VHOLD(vp); 117326f9a767SRodney W. Grimes 117426f9a767SRodney W. Grimes /* build a minimal buffer header */ 117526f9a767SRodney W. Grimes bp->b_flags = B_BUSY | B_CALL | B_WRITE; 117626f9a767SRodney W. Grimes bp->b_iodone = vnode_pager_iodone; 117726f9a767SRodney W. Grimes bp->b_proc = curproc; 117826f9a767SRodney W. Grimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 117926f9a767SRodney W. Grimes if (bp->b_rcred != NOCRED) 118026f9a767SRodney W. Grimes crhold(bp->b_rcred); 118126f9a767SRodney W. Grimes if (bp->b_wcred != NOCRED) 118226f9a767SRodney W. Grimes crhold(bp->b_wcred); 118326f9a767SRodney W. Grimes bp->b_un.b_addr = (caddr_t) kva + i * bsize; 118426f9a767SRodney W. Grimes bp->b_blkno = fileaddr / DEV_BSIZE; 118526f9a767SRodney W. Grimes bgetvp(dp, bp); 118626f9a767SRodney W. Grimes ++dp->v_numoutput; 118726f9a767SRodney W. Grimes /* for NFS */ 118826f9a767SRodney W. Grimes bp->b_dirtyoff = 0; 118926f9a767SRodney W. Grimes bp->b_dirtyend = bsize; 119026f9a767SRodney W. Grimes bp->b_bcount = bsize; 119126f9a767SRodney W. Grimes bp->b_bufsize = bsize; 119226f9a767SRodney W. Grimes 119326f9a767SRodney W. Grimes /* do the input */ 119426f9a767SRodney W. Grimes VOP_STRATEGY(bp); 119526f9a767SRodney W. Grimes 119626f9a767SRodney W. Grimes /* we definitely need to be at splbio here */ 119726f9a767SRodney W. Grimes 119826f9a767SRodney W. Grimes s = splbio(); 119926f9a767SRodney W. Grimes while ((bp->b_flags & B_DONE) == 0) { 120026f9a767SRodney W. Grimes tsleep((caddr_t) bp, PVM, "vnswrt", 0); 120126f9a767SRodney W. Grimes } 120226f9a767SRodney W. Grimes splx(s); 120326f9a767SRodney W. Grimes if ((bp->b_flags & B_ERROR) != 0) 120426f9a767SRodney W. Grimes error = EIO; 120526f9a767SRodney W. Grimes 120626f9a767SRodney W. Grimes /* 120726f9a767SRodney W. Grimes * free the buffer header back to the swap buffer pool 120826f9a767SRodney W. Grimes */ 120926f9a767SRodney W. Grimes relpbuf(bp); 121026f9a767SRodney W. Grimes HOLDRELE(vp); 121126f9a767SRodney W. Grimes } 121226f9a767SRodney W. Grimes } 121326f9a767SRodney W. Grimes vm_pager_unmap_page(kva); 121426f9a767SRodney W. Grimes if (error) 121526f9a767SRodney W. Grimes return VM_PAGER_FAIL; 121626f9a767SRodney W. Grimes else 121726f9a767SRodney W. Grimes return VM_PAGER_OK; 121826f9a767SRodney W. Grimes } 121926f9a767SRodney W. Grimes 122026f9a767SRodney W. Grimes /* 122126f9a767SRodney W. Grimes * generic vnode pager output routine 122226f9a767SRodney W. Grimes */ 122326f9a767SRodney W. Grimes int 122426f9a767SRodney W. Grimes vnode_pager_output(vnp, m, count, rtvals) 122526f9a767SRodney W. Grimes vn_pager_t vnp; 122626f9a767SRodney W. Grimes vm_page_t *m; 122726f9a767SRodney W. Grimes int count; 122826f9a767SRodney W. Grimes int *rtvals; 122926f9a767SRodney W. Grimes { 123026f9a767SRodney W. Grimes int i, j; 123126f9a767SRodney W. Grimes vm_offset_t kva, foff; 123226f9a767SRodney W. Grimes int size; 123326f9a767SRodney W. Grimes struct proc *p = curproc; /* XXX */ 123426f9a767SRodney W. Grimes vm_object_t object; 123526f9a767SRodney W. Grimes vm_offset_t paging_offset; 123626f9a767SRodney W. Grimes struct vnode *dp, *vp; 123726f9a767SRodney W. Grimes struct buf *bp; 123826f9a767SRodney W. Grimes vm_offset_t mapsize; 123926f9a767SRodney W. Grimes vm_offset_t reqaddr; 124026f9a767SRodney W. Grimes int bsize; 124126f9a767SRodney W. Grimes int s; 124226f9a767SRodney W. Grimes 124326f9a767SRodney W. Grimes int error = 0; 124426f9a767SRodney W. Grimes 124526f9a767SRodney W. Grimes retryoutput: 124626f9a767SRodney W. Grimes object = m[0]->object; /* all vm_page_t items are in same object */ 124726f9a767SRodney W. Grimes paging_offset = object->paging_offset; 124826f9a767SRodney W. Grimes 124926f9a767SRodney W. Grimes vp = vnp->vnp_vp; 125026f9a767SRodney W. Grimes bsize = vp->v_mount->mnt_stat.f_iosize; 125126f9a767SRodney W. Grimes 125226f9a767SRodney W. Grimes for (i = 0; i < count; i++) 125326f9a767SRodney W. Grimes rtvals[i] = VM_PAGER_AGAIN; 125426f9a767SRodney W. Grimes 125526f9a767SRodney W. Grimes /* 1256bbc0ec52SDavid Greenman * if the filesystem does not have a bmap, then use the old code 125726f9a767SRodney W. Grimes */ 125826f9a767SRodney W. Grimes if (VOP_BMAP(vp, m[0]->offset + paging_offset, &dp, 0, 0)) { 125926f9a767SRodney W. Grimes 126026f9a767SRodney W. Grimes rtvals[0] = vnode_pager_output_old(vnp, m[0]); 126126f9a767SRodney W. Grimes 126226f9a767SRodney W. Grimes pmap_clear_modify(VM_PAGE_TO_PHYS(m[0])); 126326f9a767SRodney W. Grimes m[0]->flags |= PG_CLEAN; 126426f9a767SRodney W. Grimes m[0]->flags &= ~PG_LAUNDRY; 126526f9a767SRodney W. Grimes return rtvals[0]; 126626f9a767SRodney W. Grimes } 126726f9a767SRodney W. Grimes 126826f9a767SRodney W. Grimes /* 1269bbc0ec52SDavid Greenman * if the filesystem has a small blocksize, then use the small block 1270bbc0ec52SDavid Greenman * filesystem output code 127126f9a767SRodney W. Grimes */ 127226f9a767SRodney W. Grimes if ((bsize < PAGE_SIZE) && 127326f9a767SRodney W. Grimes (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 127426f9a767SRodney W. Grimes 127526f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 127626f9a767SRodney W. Grimes rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]); 127726f9a767SRodney W. Grimes if (rtvals[i] == VM_PAGER_OK) { 127826f9a767SRodney W. Grimes pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 127926f9a767SRodney W. Grimes m[i]->flags |= PG_CLEAN; 128026f9a767SRodney W. Grimes m[i]->flags &= ~PG_LAUNDRY; 128126f9a767SRodney W. Grimes } 128226f9a767SRodney W. Grimes } 128326f9a767SRodney W. Grimes return rtvals[0]; 128426f9a767SRodney W. Grimes } 128526f9a767SRodney W. Grimes 128626f9a767SRodney W. Grimes /* 128726f9a767SRodney W. Grimes * get some kva for the output 128826f9a767SRodney W. Grimes */ 128926f9a767SRodney W. Grimes kva = kmem_alloc_pageable(pager_map, (mapsize = count * PAGE_SIZE)); 129026f9a767SRodney W. Grimes if (!kva) { 129126f9a767SRodney W. Grimes kva = kmem_alloc_pageable(pager_map, (mapsize = PAGE_SIZE)); 129226f9a767SRodney W. Grimes count = 1; 129326f9a767SRodney W. Grimes if (!kva) 129426f9a767SRodney W. Grimes return rtvals[0]; 129526f9a767SRodney W. Grimes } 129626f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 129726f9a767SRodney W. Grimes foff = m[i]->offset + paging_offset; 129826f9a767SRodney W. Grimes if (foff >= vnp->vnp_size) { 129926f9a767SRodney W. Grimes for (j = i; j < count; j++) 130026f9a767SRodney W. Grimes rtvals[j] = VM_PAGER_BAD; 130126f9a767SRodney W. Grimes count = i; 130226f9a767SRodney W. Grimes break; 130326f9a767SRodney W. Grimes } 130426f9a767SRodney W. Grimes } 130526f9a767SRodney W. Grimes if (count == 0) { 130626f9a767SRodney W. Grimes return rtvals[0]; 130726f9a767SRodney W. Grimes } 130826f9a767SRodney W. Grimes foff = m[0]->offset + paging_offset; 130926f9a767SRodney W. Grimes reqaddr = vnode_pager_addr(vp, foff); 1310bbc0ec52SDavid Greenman 131126f9a767SRodney W. Grimes /* 1312bbc0ec52SDavid Greenman * Scan forward and stop for the first non-contiguous entry or stop 1313bbc0ec52SDavid Greenman * for a page being in buffer cache. 131426f9a767SRodney W. Grimes */ 131526f9a767SRodney W. Grimes for (i = 1; i < count; i++) { 131626f9a767SRodney W. Grimes if (vnode_pager_addr(vp, m[i]->offset + paging_offset) 131726f9a767SRodney W. Grimes != reqaddr + i * PAGE_SIZE) { 131826f9a767SRodney W. Grimes count = i; 131926f9a767SRodney W. Grimes break; 132026f9a767SRodney W. Grimes } 132126f9a767SRodney W. Grimes } 132226f9a767SRodney W. Grimes 132326f9a767SRodney W. Grimes /* 132426f9a767SRodney W. Grimes * calculate the size of the transfer 132526f9a767SRodney W. Grimes */ 132626f9a767SRodney W. Grimes size = count * PAGE_SIZE; 132726f9a767SRodney W. Grimes if ((foff + size) > vnp->vnp_size) 132826f9a767SRodney W. Grimes size = vnp->vnp_size - foff; 132926f9a767SRodney W. Grimes 133026f9a767SRodney W. Grimes /* 133126f9a767SRodney W. Grimes * round up physical size for real devices 133226f9a767SRodney W. Grimes */ 133326f9a767SRodney W. Grimes if (dp->v_type == VBLK || dp->v_type == VCHR) 133426f9a767SRodney W. Grimes size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 133526f9a767SRodney W. Grimes 133626f9a767SRodney W. Grimes /* 133726f9a767SRodney W. Grimes * and map the pages to be read into the kva 133826f9a767SRodney W. Grimes */ 133926f9a767SRodney W. Grimes for (i = 0; i < count; i++) 134026f9a767SRodney W. Grimes pmap_kenter(kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i])); 134126f9a767SRodney W. Grimes pmap_update(); 134226f9a767SRodney W. Grimes /* 134326f9a767SRodney W. Grimes printf("vnode: writing foff: %d, devoff: %d, size: %d\n", 134426f9a767SRodney W. Grimes foff, reqaddr, size); 134526f9a767SRodney W. Grimes */ 1346bbc0ec52SDavid Greenman 134726f9a767SRodney W. Grimes /* 134826f9a767SRodney W. Grimes * next invalidate the incore vfs_bio data 134926f9a767SRodney W. Grimes */ 135026f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 135126f9a767SRodney W. Grimes int filblock = (foff + i * PAGE_SIZE) / bsize; 135226f9a767SRodney W. Grimes struct buf *fbp; 135326f9a767SRodney W. Grimes 135426f9a767SRodney W. Grimes s = splbio(); 135526f9a767SRodney W. Grimes if (fbp = incore(vp, filblock)) { 135626f9a767SRodney W. Grimes fbp = getblk(vp, filblock, fbp->b_bufsize, 0, 0); 1357bbc0ec52SDavid Greenman if (fbp->b_flags & B_DELWRI) { 1358bbc0ec52SDavid Greenman if (fbp->b_bufsize <= PAGE_SIZE) 1359bbc0ec52SDavid Greenman fbp->b_flags &= ~B_DELWRI; 1360bbc0ec52SDavid Greenman else { 1361bbc0ec52SDavid Greenman bwrite(fbp); 1362bbc0ec52SDavid Greenman fbp = getblk(vp, filblock, 1363bbc0ec52SDavid Greenman fbp->b_bufsize, 0, 0); 1364bbc0ec52SDavid Greenman } 1365bbc0ec52SDavid Greenman } 136626f9a767SRodney W. Grimes fbp->b_flags |= B_INVAL; 136726f9a767SRodney W. Grimes brelse(fbp); 136826f9a767SRodney W. Grimes } 136926f9a767SRodney W. Grimes splx(s); 137026f9a767SRodney W. Grimes } 137126f9a767SRodney W. Grimes 137226f9a767SRodney W. Grimes 137326f9a767SRodney W. Grimes bp = getpbuf(); 137426f9a767SRodney W. Grimes VHOLD(vp); 137526f9a767SRodney W. Grimes /* build a minimal buffer header */ 137626f9a767SRodney W. Grimes bp->b_flags = B_BUSY | B_WRITE | B_CALL; 137726f9a767SRodney W. Grimes bp->b_iodone = vnode_pager_iodone; 137826f9a767SRodney W. Grimes /* B_PHYS is not set, but it is nice to fill this in */ 137926f9a767SRodney W. Grimes bp->b_proc = curproc; 138026f9a767SRodney W. Grimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 138126f9a767SRodney W. Grimes 138226f9a767SRodney W. Grimes if (bp->b_rcred != NOCRED) 138326f9a767SRodney W. Grimes crhold(bp->b_rcred); 138426f9a767SRodney W. Grimes if (bp->b_wcred != NOCRED) 138526f9a767SRodney W. Grimes crhold(bp->b_wcred); 138626f9a767SRodney W. Grimes bp->b_un.b_addr = (caddr_t) kva; 138726f9a767SRodney W. Grimes bp->b_blkno = reqaddr / DEV_BSIZE; 138826f9a767SRodney W. Grimes bgetvp(dp, bp); 138926f9a767SRodney W. Grimes ++dp->v_numoutput; 139026f9a767SRodney W. Grimes 139126f9a767SRodney W. Grimes /* for NFS */ 139226f9a767SRodney W. Grimes bp->b_dirtyoff = 0; 139326f9a767SRodney W. Grimes bp->b_dirtyend = size; 139426f9a767SRodney W. Grimes 139526f9a767SRodney W. Grimes bp->b_bcount = size; 139626f9a767SRodney W. Grimes bp->b_bufsize = size; 139726f9a767SRodney W. Grimes 139826f9a767SRodney W. Grimes /* do the output */ 139926f9a767SRodney W. Grimes VOP_STRATEGY(bp); 140026f9a767SRodney W. Grimes 140126f9a767SRodney W. Grimes s = splbio(); 140226f9a767SRodney W. Grimes 140326f9a767SRodney W. Grimes /* we definitely need to be at splbio here */ 140426f9a767SRodney W. Grimes 140526f9a767SRodney W. Grimes while ((bp->b_flags & B_DONE) == 0) { 140626f9a767SRodney W. Grimes tsleep((caddr_t) bp, PVM, "vnwrite", 0); 140726f9a767SRodney W. Grimes } 140826f9a767SRodney W. Grimes splx(s); 140926f9a767SRodney W. Grimes 141026f9a767SRodney W. Grimes if ((bp->b_flags & B_ERROR) != 0) 141126f9a767SRodney W. Grimes error = EIO; 141226f9a767SRodney W. Grimes 141326f9a767SRodney W. Grimes pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE * count); 141426f9a767SRodney W. Grimes kmem_free_wakeup(pager_map, kva, mapsize); 141526f9a767SRodney W. Grimes 141626f9a767SRodney W. Grimes /* 141726f9a767SRodney W. Grimes * free the buffer header back to the swap buffer pool 141826f9a767SRodney W. Grimes */ 141926f9a767SRodney W. Grimes relpbuf(bp); 142026f9a767SRodney W. Grimes HOLDRELE(vp); 142126f9a767SRodney W. Grimes 142226f9a767SRodney W. Grimes if (!error) { 142326f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 142426f9a767SRodney W. Grimes pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 142526f9a767SRodney W. Grimes m[i]->flags |= PG_CLEAN; 142626f9a767SRodney W. Grimes m[i]->flags &= ~PG_LAUNDRY; 142726f9a767SRodney W. Grimes rtvals[i] = VM_PAGER_OK; 142826f9a767SRodney W. Grimes } 142926f9a767SRodney W. Grimes } else if (count != 1) { 143026f9a767SRodney W. Grimes error = 0; 143126f9a767SRodney W. Grimes count = 1; 143226f9a767SRodney W. Grimes goto retryoutput; 143326f9a767SRodney W. Grimes } 143426f9a767SRodney W. Grimes if (error) { 143526f9a767SRodney W. Grimes printf("vnode pager write error: %d\n", error); 143626f9a767SRodney W. Grimes } 143726f9a767SRodney W. Grimes return (error ? VM_PAGER_FAIL : VM_PAGER_OK); 143826f9a767SRodney W. Grimes } 1439