1*a5652762Spraks /* 2*a5652762Spraks * CDDL HEADER START 3*a5652762Spraks * 4*a5652762Spraks * The contents of this file are subject to the terms of the 5*a5652762Spraks * Common Development and Distribution License (the "License"). 6*a5652762Spraks * You may not use this file except in compliance with the License. 7*a5652762Spraks * 8*a5652762Spraks * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*a5652762Spraks * or http://www.opensolaris.org/os/licensing. 10*a5652762Spraks * See the License for the specific language governing permissions 11*a5652762Spraks * and limitations under the License. 12*a5652762Spraks * 13*a5652762Spraks * When distributing Covered Code, include this CDDL HEADER in each 14*a5652762Spraks * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*a5652762Spraks * If applicable, add the following below this CDDL HEADER, with the 16*a5652762Spraks * fields enclosed by brackets "[]" replaced with your own identifying 17*a5652762Spraks * information: Portions Copyright [yyyy] [name of copyright owner] 18*a5652762Spraks * 19*a5652762Spraks * CDDL HEADER END 20*a5652762Spraks */ 21*a5652762Spraks /* 22*a5652762Spraks * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23*a5652762Spraks * Use is subject to license terms. 24*a5652762Spraks */ 25*a5652762Spraks 26*a5652762Spraks #ifndef _VM_VPM_H 27*a5652762Spraks #define _VM_VPM_H 28*a5652762Spraks 29*a5652762Spraks #pragma ident "%Z%%M% %I% %E% SMI" 30*a5652762Spraks 31*a5652762Spraks #ifdef __cplusplus 32*a5652762Spraks extern "C" { 33*a5652762Spraks #endif 34*a5652762Spraks 35*a5652762Spraks /* 36*a5652762Spraks * The vnode page mappings(VPM) interfaces. 37*a5652762Spraks * "Commitment level - Consolidation private". They are subject 38*a5652762Spraks * to change without notice. Use them at your own risk. 39*a5652762Spraks * 40*a5652762Spraks * At this stage these interfaces are provided only to utilize the 41*a5652762Spraks * segkpm mappings and are enabled for solaris x64. Therefore these 42*a5652762Spraks * interfaces have to be used under the 'vpm_enable' check as an 43*a5652762Spraks * alternative to segmap interfaces where applicable. 44*a5652762Spraks * 45*a5652762Spraks * The VPM interfaces provide temporary mappings to file pages. They 46*a5652762Spraks * return the mappings in a scatter gather list(SGL). 47*a5652762Spraks * The SGL elements are the structure 'vmap_t'. 48*a5652762Spraks * 49*a5652762Spraks * typedef struct vmap { 50*a5652762Spraks * caddr_t vs_addr; / public / 51*a5652762Spraks * size_t vs_len; / public - Currently not used / 52*a5652762Spraks * void *vs_data; / opaque - private data / 53*a5652762Spraks * } vmap_t; 54*a5652762Spraks * 55*a5652762Spraks * An array of this structure has to be passed to the interface routines 56*a5652762Spraks * along with the size(# of elements) of the SGL array. Depending on the 57*a5652762Spraks * requested length and mapped chunk sizes(PAGESIZE here), the number of 58*a5652762Spraks * valid mappings returned can be less then actual size of the SGL array. 59*a5652762Spraks * Always, an element in the SGL will have 'vs_addr' set to NULL which 60*a5652762Spraks * marks the end of the valid entires in the SGL. 61*a5652762Spraks * 62*a5652762Spraks * The vmap_t structure members are populated with the mapped address 63*a5652762Spraks * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the 64*a5652762Spraks * mapping length is fixed at PAGESIZE. The 'vs_data' member is private 65*a5652762Spraks * and the caller should not access or modify it. 66*a5652762Spraks * 67*a5652762Spraks * Using a scatter gather list to return the mappings and length makes it 68*a5652762Spraks * possible to provide mappings of variable length. Currently mapping length 69*a5652762Spraks * of only 'PAGESIZE' per vmap_t is possible. Also, similar to the segmap 70*a5652762Spraks * interfaces, on each request, the max length of 'MAXBSIZE' is supported 71*a5652762Spraks * for now. The MAXBSIZE mappings will be returned in 1 or 2 vmap_t elements 72*a5652762Spraks * of the SGL depending on the PAGESIZE. The scatter gather list array size 73*a5652762Spraks * needs to be a minimum of MINVMAPS elements to accommodate MAXBSIZE. 74*a5652762Spraks * The MAXBSIZE restriction exists because the filesystems are not capable 75*a5652762Spraks * of handling more(disk block allocations at a time) for now. 76*a5652762Spraks * 77*a5652762Spraks * 78*a5652762Spraks * Interfaces: 79*a5652762Spraks * 80*a5652762Spraks * int vpm_map_pages( struct vnode *vp, u_offset_t off, size_t len, 81*a5652762Spraks * int fetchpage, vmap_t *vml, int vmlsz, 82*a5652762Spraks * int *newpagecreated, enum seg_rw rw); 83*a5652762Spraks * 84*a5652762Spraks * This function returns mappings to vnode pages. 85*a5652762Spraks * 86*a5652762Spraks * It takes a vnode, offset and length and returns mappings to the pages 87*a5652762Spraks * covering the range [off, off +len) in the vmap_t SGL array 'vml'. 88*a5652762Spraks * Currently these interfaces are subject to restrictions similar to the segmap 89*a5652762Spraks * interfaces. The length passed in should satisfy the following criteria. 90*a5652762Spraks * '(off + len) <= ((off & PAGEMASK) + MAXBSIZE)' 91*a5652762Spraks * The mapped address returned, in 'vs_addr', are for the page boundary. 92*a5652762Spraks * 93*a5652762Spraks * The 'vmlsz' is the size(# elements) of the 'vml' array. 94*a5652762Spraks * 95*a5652762Spraks * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched 96*a5652762Spraks * (calls VOP_GETPAGE) from the backing store(disk) if not found in the 97*a5652762Spraks * system page cache. If 'fetchpage == 0', the vnode(file) pages for the 98*a5652762Spraks * given offset will be just created if they are not already present in the 99*a5652762Spraks * system page cache. The 'newpagecreated' flag is set on return if new pages 100*a5652762Spraks * are created when 'fetchpage == 0'(requested to just create new pages). 101*a5652762Spraks * 102*a5652762Spraks * The 'seg_rw rw' indicates the intended operation on these mappings 103*a5652762Spraks * (S_WRITE or S_READ). 104*a5652762Spraks * 105*a5652762Spraks * Currently these interfaces only return segkpm mappings. Therefore the 106*a5652762Spraks * vnode pages that are being accessed will be locked(at least SHARED locked) 107*a5652762Spraks * for the duration these mappings are in use. After use, the unmap 108*a5652762Spraks * function, vpm_unmap_pages(), has to be called and the same SGL array 109*a5652762Spraks * needs to be passed to the unmap function. 110*a5652762Spraks * 111*a5652762Spraks * 112*a5652762Spraks * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);. 113*a5652762Spraks * 114*a5652762Spraks * This function unmaps the pages that where mapped by vpm_map_pages. 115*a5652762Spraks * The SGL array 'vml' has to be the one that was passed to vpm_map_pages(). 116*a5652762Spraks * 117*a5652762Spraks * 118*a5652762Spraks * ex: 119*a5652762Spraks * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer 120*a5652762Spraks * 'buf' the following code snippet shows how to use the above two interfaces. 121*a5652762Spraks * Here the the copy length is till the MAXBSIZE boundary. This code can be 122*a5652762Spraks * executed repeatedly, in a loop to copy more then MAXBSIZE length of data. 123*a5652762Spraks * 124*a5652762Spraks * vmap_t vml[MINVMAPS]; 125*a5652762Spraks * int err, i, newpage, len; 126*a5652762Spraks * int pon; 127*a5652762Spraks * 128*a5652762Spraks * pon = (off & PAGEOFFSET); 129*a5652762Spraks * len = MAXBSIZE - pon; 130*a5652762Spraks * 131*a5652762Spraks * if (vpm_enable) { 132*a5652762Spraks * err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS, 133*a5652762Spraks * &newpage, S_WRITE); 134*a5652762Spraks * 135*a5652762Spraks * if (err) 136*a5652762Spraks * return; 137*a5652762Spraks * 138*a5652762Spraks * for (i=0; vml[i].vs_addr != NULL); i++) { 139*a5652762Spraks * bcopy (buf, vml[i].vs_addr + pon, 140*a5652762Spraks * PAGESIZE - pon); 141*a5652762Spraks * buf += (PAGESIZE - pon); 142*a5652762Spraks * pon = 0; 143*a5652762Spraks * } 144*a5652762Spraks * 145*a5652762Spraks * if (newpage) { 146*a5652762Spraks * pon = (off & PAGEOFFSET); 147*a5652762Spraks * bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon); 148*a5652762Spraks * } 149*a5652762Spraks * 150*a5652762Spraks * vpm_unmap_pages(vml, S_WRITE); 151*a5652762Spraks * } 152*a5652762Spraks * 153*a5652762Spraks * 154*a5652762Spraks * 155*a5652762Spraks * 156*a5652762Spraks * int vpm_data_copy(struct vnode *vp, u_offset_t off, size_t len, 157*a5652762Spraks * struct uio *uio, int fetchpage, int *newpagecreated, 158*a5652762Spraks * int zerostart, enum seg_rw rw); 159*a5652762Spraks * 160*a5652762Spraks * This function can be called if the need is to just transfer data to/from 161*a5652762Spraks * the vnode pages. It takes a 'uio' structure and calls 'uiomove()' to 162*a5652762Spraks * do the data transfer. It can be used in the context of read and write 163*a5652762Spraks * system calls to transfer data between a user buffer, which is specified 164*a5652762Spraks * in the uio structure, and the vnode pages. If the data needs to be 165*a5652762Spraks * transferred between a kernel buffer and the pages, like in the above 166*a5652762Spraks * example, a uio structure can be set up accordingly and passed. The 'rw' 167*a5652762Spraks * parameter will determine the direction of the data transfer. 168*a5652762Spraks * 169*a5652762Spraks * The 'fetchpage' and 'newpagecreated' are same as explained before. 170*a5652762Spraks * The 'zerostart' flag when set will zero fill start of the page till the 171*a5652762Spraks * offset 'off' in the first page. i.e from 'off & PAGEMASK' to 'off'. 172*a5652762Spraks * Here too the MAXBSIZE restriction mentioned above applies to the length 173*a5652762Spraks * requested. 174*a5652762Spraks * 175*a5652762Spraks * 176*a5652762Spraks * int vpm_sync_pages(struct vnode *vp, u_offset_t off, 177*a5652762Spraks * size_t len, uint_t flags) 178*a5652762Spraks * 179*a5652762Spraks * This function can be called to flush or sync the vnode(file) pages that 180*a5652762Spraks * have been accessed. It will call VOP_PUTPAGE(). 181*a5652762Spraks * 182*a5652762Spraks * For the given vnode, off and len the pages covering the range 183*a5652762Spraks * [off, off + len) are flushed. Currently it uses the same flags that 184*a5652762Spraks * are used with segmap_release() interface. Refer vm/seg_map.h. 185*a5652762Spraks * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY) 186*a5652762Spraks * 187*a5652762Spraks */ 188*a5652762Spraks 189*a5652762Spraks 190*a5652762Spraks /* 191*a5652762Spraks * vpm cache related definitions. 192*a5652762Spraks */ 193*a5652762Spraks #define VPMAP_MINCACHE (64 * 1024 * 1024) 194*a5652762Spraks 195*a5652762Spraks /* 196*a5652762Spraks * vpm caching mode 197*a5652762Spraks */ 198*a5652762Spraks #define VPMCACHE_LRU 0 199*a5652762Spraks #define VPMCACHE_RANDOM 1 200*a5652762Spraks /* 201*a5652762Spraks * Data structures to manage the cache of pages referenced by 202*a5652762Spraks * the vpm interfaces. There is one vpmap struct per page in the cache. 203*a5652762Spraks */ 204*a5652762Spraks struct vpmap { 205*a5652762Spraks kmutex_t vpm_mtx; /* protects non list fields */ 206*a5652762Spraks struct vnode *vpm_vp; /* pointer to vnode of cached page */ 207*a5652762Spraks struct vpmap *vpm_next; /* free list pointers */ 208*a5652762Spraks struct vpmap *vpm_prev; 209*a5652762Spraks u_offset_t vpm_off; /* offset of the page */ 210*a5652762Spraks page_t *vpm_pp; /* page pointer */ 211*a5652762Spraks ushort_t vpm_refcnt; /* Number active references */ 212*a5652762Spraks ushort_t vpm_ndxflg; /* indicates which queue */ 213*a5652762Spraks ushort_t vpm_free_ndx; /* freelist it belongs to */ 214*a5652762Spraks }; 215*a5652762Spraks 216*a5652762Spraks /* 217*a5652762Spraks * Multiple vpmap free lists are maintaned so that allocations 218*a5652762Spraks * scale with cpu count. To further reduce contentions between 219*a5652762Spraks * allocation and deallocations, each list is made up of two queues. 220*a5652762Spraks */ 221*a5652762Spraks #define VPM_FREEQ_PAD 64 222*a5652762Spraks union vpm_freeq { 223*a5652762Spraks struct { 224*a5652762Spraks struct vpmap *vpmsq_free; 225*a5652762Spraks kmutex_t vpmsq_mtx; 226*a5652762Spraks } vpmfq; 227*a5652762Spraks char vpmq_pad[VPM_FREEQ_PAD]; 228*a5652762Spraks }; 229*a5652762Spraks 230*a5652762Spraks #define vpmq_free vpmfq.vpmsq_free 231*a5652762Spraks #define vpmq_mtx vpmfq.vpmsq_mtx 232*a5652762Spraks 233*a5652762Spraks struct vpmfree { 234*a5652762Spraks union vpm_freeq vpm_freeq[2]; /* alloc and release queue */ 235*a5652762Spraks union vpm_freeq *vpm_allocq; /* current alloc queue */ 236*a5652762Spraks union vpm_freeq *vpm_releq; /* current release queue */ 237*a5652762Spraks kcondvar_t vpm_free_cv; 238*a5652762Spraks ushort_t vpm_want; 239*a5652762Spraks }; 240*a5652762Spraks 241*a5652762Spraks #define VPMALLOCQ 0 242*a5652762Spraks #define VPMRELEQ 1 243*a5652762Spraks 244*a5652762Spraks /* 245*a5652762Spraks * VPM Interface definitions. 246*a5652762Spraks */ 247*a5652762Spraks 248*a5652762Spraks /* 249*a5652762Spraks * This structure is the scatter gather list element. The page 250*a5652762Spraks * mappings will be returned in this structure. A pointer to an 251*a5652762Spraks * array of this structure is passed to the interface routines. 252*a5652762Spraks */ 253*a5652762Spraks typedef struct vmap { 254*a5652762Spraks caddr_t vs_addr; /* mapped address */ 255*a5652762Spraks size_t vs_len; /* length, currently fixed at PAGESIZE */ 256*a5652762Spraks void *vs_data; /* opaque - private data */ 257*a5652762Spraks } vmap_t; 258*a5652762Spraks 259*a5652762Spraks /* 260*a5652762Spraks * The minimum and maximum number of array elements in the scatter 261*a5652762Spraks * gather list. 262*a5652762Spraks */ 263*a5652762Spraks #define MINVMAPS 3 /* ((MAXBSIZE/4096 + 1) min # mappings */ 264*a5652762Spraks #define MAXVMAPS 10 /* Max # the scatter gather list */ 265*a5652762Spraks 266*a5652762Spraks #ifdef _KERNEL 267*a5652762Spraks 268*a5652762Spraks extern int vpm_enable; 269*a5652762Spraks /* 270*a5652762Spraks * vpm page mapping operations. 271*a5652762Spraks */ 272*a5652762Spraks extern void vpm_init(void); 273*a5652762Spraks extern int vpm_map_pages(struct vnode *, u_offset_t, size_t, int, 274*a5652762Spraks vmap_t *, int, int *, enum seg_rw); 275*a5652762Spraks 276*a5652762Spraks extern void vpm_unmap_pages(vmap_t *, enum seg_rw); 277*a5652762Spraks extern int vpm_sync_pages(struct vnode *, u_offset_t, size_t, uint_t); 278*a5652762Spraks extern int vpm_data_copy(struct vnode *, u_offset_t, size_t, 279*a5652762Spraks struct uio *, int, int *, int, enum seg_rw rw); 280*a5652762Spraks #endif /* _KERNEL */ 281*a5652762Spraks 282*a5652762Spraks #ifdef __cplusplus 283*a5652762Spraks } 284*a5652762Spraks #endif 285*a5652762Spraks 286*a5652762Spraks #endif /* _VM_VPM_H */ 287