1a5652762Spraks /* 2a5652762Spraks * CDDL HEADER START 3a5652762Spraks * 4a5652762Spraks * The contents of this file are subject to the terms of the 5a5652762Spraks * Common Development and Distribution License (the "License"). 6a5652762Spraks * You may not use this file except in compliance with the License. 7a5652762Spraks * 8a5652762Spraks * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9a5652762Spraks * or http://www.opensolaris.org/os/licensing. 10a5652762Spraks * See the License for the specific language governing permissions 11a5652762Spraks * and limitations under the License. 12a5652762Spraks * 13a5652762Spraks * When distributing Covered Code, include this CDDL HEADER in each 14a5652762Spraks * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15a5652762Spraks * If applicable, add the following below this CDDL HEADER, with the 16a5652762Spraks * fields enclosed by brackets "[]" replaced with your own identifying 17a5652762Spraks * information: Portions Copyright [yyyy] [name of copyright owner] 18a5652762Spraks * 19a5652762Spraks * CDDL HEADER END 20a5652762Spraks */ 21a5652762Spraks /* 22*183971baSPrakash Sangappa * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23a5652762Spraks * Use is subject to license terms. 24a5652762Spraks */ 25a5652762Spraks 26a5652762Spraks #ifndef _VM_VPM_H 27a5652762Spraks #define _VM_VPM_H 28a5652762Spraks 29a5652762Spraks 30a5652762Spraks #ifdef __cplusplus 31a5652762Spraks extern "C" { 32a5652762Spraks #endif 33a5652762Spraks 34a5652762Spraks /* 35a5652762Spraks * The vnode page mappings(VPM) interfaces. 36a5652762Spraks * "Commitment level - Consolidation private". They are subject 37a5652762Spraks * to change without notice. Use them at your own risk. 38a5652762Spraks * 39a5652762Spraks * At this stage these interfaces are provided only to utilize the 40*183971baSPrakash Sangappa * segkpm mappings. Therefore these interfaces have to be used under 41*183971baSPrakash Sangappa * the 'vpm_enable' check as an alternative to segmap interfaces where 42*183971baSPrakash Sangappa * applicable. 43a5652762Spraks * 44a5652762Spraks * The VPM interfaces provide temporary mappings to file pages. They 45a5652762Spraks * return the mappings in a scatter gather list(SGL). 46a5652762Spraks * The SGL elements are the structure 'vmap_t'. 47a5652762Spraks * 48a5652762Spraks * typedef struct vmap { 49*183971baSPrakash Sangappa * caddr_t vs_addr; / public - mapped address / 50*183971baSPrakash Sangappa * size_t vs_len; / public - length of mapping / 51a5652762Spraks * void *vs_data; / opaque - private data / 52a5652762Spraks * } vmap_t; 53a5652762Spraks * 54a5652762Spraks * An array of this structure has to be passed to the interface routines 55a5652762Spraks * along with the size(# of elements) of the SGL array. Depending on the 56a5652762Spraks * requested length and mapped chunk sizes(PAGESIZE here), the number of 57a5652762Spraks * valid mappings returned can be less then actual size of the SGL array. 58a5652762Spraks * Always, an element in the SGL will have 'vs_addr' set to NULL which 59a5652762Spraks * marks the end of the valid entires in the SGL. 60a5652762Spraks * 61a5652762Spraks * The vmap_t structure members are populated with the mapped address 62a5652762Spraks * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the 63a5652762Spraks * mapping length is fixed at PAGESIZE. The 'vs_data' member is private 64a5652762Spraks * and the caller should not access or modify it. 65a5652762Spraks * 66a5652762Spraks * Using a scatter gather list to return the mappings and length makes it 67*183971baSPrakash Sangappa * possible to provide mappings of variable length. Mapping length upto 68*183971baSPrakash Sangappa * VPMMAXLEN is supported. The scatter gather list array size needs to 69*183971baSPrakash Sangappa * be a minimum of MINVMAPS elements. 70a5652762Spraks * 71a5652762Spraks * Interfaces: 72a5652762Spraks * 73a5652762Spraks * int vpm_map_pages( struct vnode *vp, u_offset_t off, size_t len, 74a5652762Spraks * int fetchpage, vmap_t *vml, int vmlsz, 75a5652762Spraks * int *newpagecreated, enum seg_rw rw); 76a5652762Spraks * 77a5652762Spraks * This function returns mappings to vnode pages. 78a5652762Spraks * 79a5652762Spraks * It takes a vnode, offset and length and returns mappings to the pages 80a5652762Spraks * covering the range [off, off + len) in the vmap_t SGL array 'vml'. 81*183971baSPrakash Sangappa * The length passed in should satisfy the following criteria 82*183971baSPrakash Sangappa * '(off + len) <= ((off & PAGEMASK) + VPMMAXLEN)' 83*183971baSPrakash Sangappa * The mapped address returned, in 'vs_addr', of first vml[] entry 84*183971baSPrakash Sangappa * is at begining of page containing 'off'. 85a5652762Spraks * 86a5652762Spraks * The 'vmlsz' is the size(# elements) of the 'vml' array. 87a5652762Spraks * 88a5652762Spraks * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched 89a5652762Spraks * (calls VOP_GETPAGE) from the backing store(disk) if not found in the 90a5652762Spraks * system page cache. If 'fetchpage == 0', the vnode(file) pages for the 91a5652762Spraks * given offset will be just created if they are not already present in the 92a5652762Spraks * system page cache. The 'newpagecreated' flag is set on return if new pages 93a5652762Spraks * are created when 'fetchpage == 0'(requested to just create new pages). 94a5652762Spraks * 95a5652762Spraks * The 'seg_rw rw' indicates the intended operation on these mappings 96a5652762Spraks * (S_WRITE or S_READ). 97a5652762Spraks * 98*183971baSPrakash Sangappa * Currently these interfaces only return segkpm mappings. The vnode pages 99*183971baSPrakash Sangappa * that are being accessed will be locked(at least SHARED locked) for the 100*183971baSPrakash Sangappa * duration these mappings are in use. After use, the unmap function, 101*183971baSPrakash Sangappa * vpm_unmap_pages(), has to be called and the same SGL array 102a5652762Spraks * needs to be passed to the unmap function. 103a5652762Spraks * 104a5652762Spraks * 105a5652762Spraks * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);. 106a5652762Spraks * 107a5652762Spraks * This function unmaps the pages that where mapped by vpm_map_pages. 108a5652762Spraks * The SGL array 'vml' has to be the one that was passed to vpm_map_pages(). 109a5652762Spraks * 110a5652762Spraks * 111a5652762Spraks * ex: 112a5652762Spraks * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer 113a5652762Spraks * 'buf' the following code snippet shows how to use the above two interfaces. 114a5652762Spraks * Here the the copy length is till the MAXBSIZE boundary. This code can be 115a5652762Spraks * executed repeatedly, in a loop to copy more then MAXBSIZE length of data. 116a5652762Spraks * 117a5652762Spraks * vmap_t vml[MINVMAPS]; 118a5652762Spraks * int err, i, newpage, len; 119a5652762Spraks * int pon; 120a5652762Spraks * 121a5652762Spraks * pon = (off & PAGEOFFSET); 122a5652762Spraks * len = MAXBSIZE - pon; 123a5652762Spraks * 124a5652762Spraks * if (vpm_enable) { 125a5652762Spraks * err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS, 126a5652762Spraks * &newpage, S_WRITE); 127a5652762Spraks * 128a5652762Spraks * if (err) 129a5652762Spraks * return; 130a5652762Spraks * 131a5652762Spraks * for (i=0; vml[i].vs_addr != NULL); i++) { 132a5652762Spraks * bcopy (buf, vml[i].vs_addr + pon, 133a5652762Spraks * PAGESIZE - pon); 134a5652762Spraks * buf += (PAGESIZE - pon); 135a5652762Spraks * pon = 0; 136a5652762Spraks * } 137a5652762Spraks * 138a5652762Spraks * if (newpage) { 139a5652762Spraks * pon = (off & PAGEOFFSET); 140a5652762Spraks * bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon); 141a5652762Spraks * } 142a5652762Spraks * 143a5652762Spraks * vpm_unmap_pages(vml, S_WRITE); 144a5652762Spraks * } 145a5652762Spraks * 146a5652762Spraks * 147a5652762Spraks * 148a5652762Spraks * 149a5652762Spraks * int vpm_data_copy(struct vnode *vp, u_offset_t off, size_t len, 150a5652762Spraks * struct uio *uio, int fetchpage, int *newpagecreated, 151a5652762Spraks * int zerostart, enum seg_rw rw); 152a5652762Spraks * 153a5652762Spraks * This function can be called if the need is to just transfer data to/from 154a5652762Spraks * the vnode pages. It takes a 'uio' structure and calls 'uiomove()' to 155a5652762Spraks * do the data transfer. It can be used in the context of read and write 156a5652762Spraks * system calls to transfer data between a user buffer, which is specified 157a5652762Spraks * in the uio structure, and the vnode pages. If the data needs to be 158a5652762Spraks * transferred between a kernel buffer and the pages, like in the above 159a5652762Spraks * example, a uio structure can be set up accordingly and passed. The 'rw' 160a5652762Spraks * parameter will determine the direction of the data transfer. 161a5652762Spraks * 162a5652762Spraks * The 'fetchpage' and 'newpagecreated' are same as explained before. 163a5652762Spraks * The 'zerostart' flag when set will zero fill start of the page till the 164a5652762Spraks * offset 'off' in the first page. i.e from 'off & PAGEMASK' to 'off'. 165a5652762Spraks * 166a5652762Spraks * 167a5652762Spraks * int vpm_sync_pages(struct vnode *vp, u_offset_t off, 168a5652762Spraks * size_t len, uint_t flags) 169a5652762Spraks * 170a5652762Spraks * This function can be called to flush or sync the vnode(file) pages that 171a5652762Spraks * have been accessed. It will call VOP_PUTPAGE(). 172a5652762Spraks * 173a5652762Spraks * For the given vnode, off and len the pages covering the range 174a5652762Spraks * [off, off + len) are flushed. Currently it uses the same flags that 175a5652762Spraks * are used with segmap_release() interface. Refer vm/seg_map.h. 176a5652762Spraks * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY) 177a5652762Spraks * 178a5652762Spraks */ 179a5652762Spraks 180a5652762Spraks 181a5652762Spraks /* 182a5652762Spraks * vpm cache related definitions. 183a5652762Spraks */ 184a5652762Spraks #define VPMAP_MINCACHE (64 * 1024 * 1024) 185*183971baSPrakash Sangappa #define VPMAP_MAXCACHE (256L * 1024L * 1024L * 1024L) /* 256G */ 186*183971baSPrakash Sangappa 187a5652762Spraks 188a5652762Spraks /* 189a5652762Spraks * vpm caching mode 190a5652762Spraks */ 191a5652762Spraks #define VPMCACHE_LRU 0 192a5652762Spraks #define VPMCACHE_RANDOM 1 193a5652762Spraks /* 194a5652762Spraks * Data structures to manage the cache of pages referenced by 195a5652762Spraks * the vpm interfaces. There is one vpmap struct per page in the cache. 196a5652762Spraks */ 197a5652762Spraks struct vpmap { 198a5652762Spraks kmutex_t vpm_mtx; /* protects non list fields */ 199a5652762Spraks struct vnode *vpm_vp; /* pointer to vnode of cached page */ 200a5652762Spraks struct vpmap *vpm_next; /* free list pointers */ 201a5652762Spraks struct vpmap *vpm_prev; 202a5652762Spraks u_offset_t vpm_off; /* offset of the page */ 203a5652762Spraks page_t *vpm_pp; /* page pointer */ 204a5652762Spraks ushort_t vpm_refcnt; /* Number active references */ 205a5652762Spraks ushort_t vpm_ndxflg; /* indicates which queue */ 206a5652762Spraks ushort_t vpm_free_ndx; /* freelist it belongs to */ 207a5652762Spraks }; 208a5652762Spraks 209a5652762Spraks /* 210a5652762Spraks * Multiple vpmap free lists are maintaned so that allocations 211a5652762Spraks * scale with cpu count. To further reduce contentions between 212a5652762Spraks * allocation and deallocations, each list is made up of two queues. 213a5652762Spraks */ 214a5652762Spraks #define VPM_FREEQ_PAD 64 215a5652762Spraks union vpm_freeq { 216a5652762Spraks struct { 217a5652762Spraks struct vpmap *vpmsq_free; 218a5652762Spraks kmutex_t vpmsq_mtx; 219a5652762Spraks } vpmfq; 220a5652762Spraks char vpmq_pad[VPM_FREEQ_PAD]; 221a5652762Spraks }; 222a5652762Spraks 223a5652762Spraks #define vpmq_free vpmfq.vpmsq_free 224a5652762Spraks #define vpmq_mtx vpmfq.vpmsq_mtx 225a5652762Spraks 226a5652762Spraks struct vpmfree { 227a5652762Spraks union vpm_freeq vpm_freeq[2]; /* alloc and release queue */ 228a5652762Spraks union vpm_freeq *vpm_allocq; /* current alloc queue */ 229a5652762Spraks union vpm_freeq *vpm_releq; /* current release queue */ 230a5652762Spraks kcondvar_t vpm_free_cv; 231a5652762Spraks ushort_t vpm_want; 232a5652762Spraks }; 233a5652762Spraks 234a5652762Spraks #define VPMALLOCQ 0 235a5652762Spraks #define VPMRELEQ 1 236a5652762Spraks 237a5652762Spraks /* 238a5652762Spraks * VPM Interface definitions. 239a5652762Spraks */ 240a5652762Spraks 241a5652762Spraks /* 242a5652762Spraks * This structure is the scatter gather list element. The page 243a5652762Spraks * mappings will be returned in this structure. A pointer to an 244a5652762Spraks * array of this structure is passed to the interface routines. 245a5652762Spraks */ 246a5652762Spraks typedef struct vmap { 247a5652762Spraks caddr_t vs_addr; /* mapped address */ 248a5652762Spraks size_t vs_len; /* length, currently fixed at PAGESIZE */ 249a5652762Spraks void *vs_data; /* opaque - private data */ 250a5652762Spraks } vmap_t; 251a5652762Spraks 252*183971baSPrakash Sangappa #define VPM_FETCHPAGE 0x01 /* fault in pages */ 253*183971baSPrakash Sangappa 254*183971baSPrakash Sangappa /* 255*183971baSPrakash Sangappa * Max request length - Needs to be a multiple of 256*183971baSPrakash Sangappa * 8192 (PAGESIZE on sparc) so it works properly on both 257*183971baSPrakash Sangappa * x86 & sparc systems. Max set to 128k. 258*183971baSPrakash Sangappa */ 259*183971baSPrakash Sangappa #define VPMMAXLEN (128*1024) 260*183971baSPrakash Sangappa 261a5652762Spraks /* 262a5652762Spraks * The minimum and maximum number of array elements in the scatter 263a5652762Spraks * gather list. 264a5652762Spraks */ 265a5652762Spraks #define MINVMAPS 3 /* ((MAXBSIZE/4096 + 1) min # mappings */ 266*183971baSPrakash Sangappa #if defined(__sparc) 267*183971baSPrakash Sangappa #define VPMMAXPGS (VPMMAXLEN/8192) /* Max # pages at a time */ 268*183971baSPrakash Sangappa #else 269*183971baSPrakash Sangappa #define VPMMAXPGS (VPMMAXLEN/4096) 270*183971baSPrakash Sangappa #endif 271*183971baSPrakash Sangappa #define MAXVMAPS (VPMMAXPGS + 1) /* Max # elements in the */ 272*183971baSPrakash Sangappa /* scatter gather list */ 273*183971baSPrakash Sangappa /* +1 element to mark the */ 274*183971baSPrakash Sangappa /* end of the list of valid */ 275*183971baSPrakash Sangappa /* mappings */ 276a5652762Spraks 277a5652762Spraks #ifdef _KERNEL 278a5652762Spraks 279a5652762Spraks extern int vpm_enable; 280a5652762Spraks /* 281a5652762Spraks * vpm page mapping operations. 282a5652762Spraks */ 283a5652762Spraks extern void vpm_init(void); 284a5652762Spraks extern int vpm_map_pages(struct vnode *, u_offset_t, size_t, int, 285a5652762Spraks vmap_t *, int, int *, enum seg_rw); 286a5652762Spraks 287a5652762Spraks extern void vpm_unmap_pages(vmap_t *, enum seg_rw); 288a5652762Spraks extern int vpm_sync_pages(struct vnode *, u_offset_t, size_t, uint_t); 289a5652762Spraks extern int vpm_data_copy(struct vnode *, u_offset_t, size_t, 290a5652762Spraks struct uio *, int, int *, int, enum seg_rw rw); 291a5652762Spraks #endif /* _KERNEL */ 292a5652762Spraks 293a5652762Spraks #ifdef __cplusplus 294a5652762Spraks } 295a5652762Spraks #endif 296a5652762Spraks 297a5652762Spraks #endif /* _VM_VPM_H */ 298