xref: /titanic_50/usr/src/uts/common/vm/vpm.h (revision c77a61a72b5ecdc507d6cf104142edd371a16c84)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifndef	_VM_VPM_H
27 #define	_VM_VPM_H
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #ifdef	__cplusplus
32 extern "C" {
33 #endif
34 
35 /*
36  * The vnode page mappings(VPM) interfaces.
37  * "Commitment level - Consolidation private". They are subject
38  * to change without notice. Use them at your own risk.
39  *
40  * At this stage these interfaces are provided only to utilize the
41  * segkpm mappings and are enabled for solaris x64. Therefore these
42  * interfaces have to be used under the 'vpm_enable' check as an
43  * alternative to segmap interfaces where applicable.
44  *
45  * The VPM interfaces provide temporary mappings to file pages. They
46  * return the mappings in a scatter gather list(SGL).
47  * The SGL elements are the structure 'vmap_t'.
48  *
49  *	typedef struct vmap {
50  *		caddr_t	vs_addr;        / public /
51  *		size_t	vs_len;         / public - Currently not used /
52  *		void	*vs_data;	/ opaque - private data /
53  *	} vmap_t;
54  *
55  * An array of this structure has to be passed to the interface routines
56  * along with the size(# of elements) of the SGL array. Depending on the
57  * requested length and mapped chunk sizes(PAGESIZE here), the number of
58  * valid mappings returned can be less then actual size of the SGL array.
59  * Always, an element in the SGL will have 'vs_addr' set to NULL which
60  * marks the end of the valid entires in the SGL.
61  *
62  * The vmap_t structure members are populated with the mapped address
63  * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the
64  * mapping length is fixed at PAGESIZE. The 'vs_data' member is private
65  * and the caller should not access or modify it.
66  *
67  * Using a scatter gather list to return the mappings and length makes it
68  * possible to provide mappings of variable length. Currently mapping length
69  * of only 'PAGESIZE' per vmap_t is possible. Also, similar to the segmap
70  * interfaces, on each request, the max length of 'MAXBSIZE' is supported
71  * for now. The MAXBSIZE mappings will be returned in 1 or 2 vmap_t elements
72  * of the SGL depending on the PAGESIZE. The scatter gather list array size
73  * needs to be a minimum of MINVMAPS elements to accommodate MAXBSIZE.
74  * The MAXBSIZE restriction exists because the filesystems are not capable
75  * of handling more(disk block allocations at a time) for now.
76  *
77  *
78  * Interfaces:
79  *
80  * int vpm_map_pages( struct vnode *vp, u_offset_t off, size_t len,
81  *			int fetchpage, vmap_t *vml, int vmlsz,
82  *			int *newpagecreated, enum seg_rw rw);
83  *
84  * This function returns mappings to vnode pages.
85  *
86  * It takes a vnode, offset and length and returns mappings to the  pages
87  * covering the range [off, off +len) in the vmap_t SGL array 'vml'.
88  * Currently these interfaces are subject to restrictions similar to the segmap
89  * interfaces. The length passed in should satisfy the following criteria.
90  * '(off + len)  <= ((off & PAGEMASK) + MAXBSIZE)'
91  * The mapped address returned, in 'vs_addr', are for the page boundary.
92  *
93  * The 'vmlsz' is the size(# elements) of the 'vml' array.
94  *
95  * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched
96  * (calls VOP_GETPAGE) from the backing store(disk) if not found in the
97  * system page cache. If 'fetchpage == 0', the vnode(file) pages for the
98  * given offset will be just created if they are not already present in the
99  * system page cache. The 'newpagecreated' flag is set on return if new pages
100  * are created when 'fetchpage == 0'(requested to just create new pages).
101  *
102  * The 'seg_rw rw' indicates the intended operation on these mappings
103  * (S_WRITE or S_READ).
104  *
105  * Currently these interfaces only return segkpm mappings. Therefore the
106  * vnode pages that are being accessed will be locked(at least SHARED locked)
107  * for the duration these mappings are in use. After use, the  unmap
108  * function, vpm_unmap_pages(), has to be called and the same SGL array
109  * needs to be passed to the unmap function.
110  *
111  *
112  * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);.
113  *
114  * This function unmaps the pages that where mapped by vpm_map_pages.
115  * The SGL array 'vml' has to be the one that was passed to vpm_map_pages().
116  *
117  *
118  * ex:
119  * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer
120  * 'buf' the following code snippet shows how to use the above two interfaces.
121  * Here the the copy length is till the MAXBSIZE boundary. This code can be
122  * executed repeatedly, in a loop to copy more then MAXBSIZE length of data.
123  *
124  *	vmap_t  vml[MINVMAPS];
125  *	int err, i, newpage, len;
126  *	int pon;
127  *
128  *	pon = (off & PAGEOFFSET);
129  *	len = MAXBSIZE - pon;
130  *
131  *	if (vpm_enable) {
132  *             err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS,
133  *				 &newpage, S_WRITE);
134  *
135  *		if (err)
136  *			return;
137  *
138  *		for (i=0; vml[i].vs_addr != NULL); i++) {
139  *			bcopy (buf, vml[i].vs_addr + pon,
140  *				 PAGESIZE - pon);
141  *			buf += (PAGESIZE - pon);
142  *			pon = 0;
143  *		}
144  *
145  *		if (newpage) {
146  *			pon = (off & PAGEOFFSET);
147  *			bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon);
148  *		}
149  *
150  *		vpm_unmap_pages(vml, S_WRITE);
151  *	}
152  *
153  *
154  *
155  *
156  * int vpm_data_copy(struct vnode *vp, u_offset_t off, size_t len,
157  *		struct uio *uio, int fetchpage, int *newpagecreated,
158  *		int zerostart, enum seg_rw rw);
159  *
160  * This function can be called if the need is to just transfer data to/from
161  * the vnode pages. It takes a 'uio' structure and  calls 'uiomove()' to
162  * do the data transfer. It can be used in the context of read and write
163  * system calls to transfer data between a user buffer, which is specified
164  * in the uio structure, and the vnode pages. If the data needs to be
165  * transferred between a kernel buffer and the pages, like in the above
166  * example, a uio structure can be set up accordingly and passed. The 'rw'
167  * parameter will determine the direction of the data transfer.
168  *
169  * The 'fetchpage' and 'newpagecreated' are same as explained before.
170  * The 'zerostart' flag when set will zero fill start of the page till the
171  * offset 'off' in the first page. i.e  from 'off & PAGEMASK' to 'off'.
172  * Here too the MAXBSIZE restriction mentioned above applies to the length
173  * requested.
174  *
175  *
176  * int vpm_sync_pages(struct vnode *vp, u_offset_t off,
177  *					 size_t len, uint_t flags)
178  *
179  * This function can be called to flush or sync the vnode(file) pages that
180  * have been accessed. It will call VOP_PUTPAGE().
181  *
182  * For the given vnode, off and len the pages covering the range
183  * [off, off + len) are flushed. Currently it uses the same flags that
184  * are used with segmap_release() interface. Refer vm/seg_map.h.
185  * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY)
186  *
187  */
188 
189 
190 /*
191  * vpm cache related definitions.
192  */
193 #define	VPMAP_MINCACHE		(64 * 1024 * 1024)
194 
195 /*
196  * vpm caching mode
197  */
198 #define	VPMCACHE_LRU		0
199 #define	VPMCACHE_RANDOM		1
200 /*
201  * Data structures to manage the cache of pages referenced by
202  * the vpm interfaces. There is one vpmap struct per page in the cache.
203  */
204 struct vpmap {
205 	kmutex_t	vpm_mtx;	/* protects non list fields */
206 	struct vnode	*vpm_vp;	/* pointer to vnode of cached page */
207 	struct vpmap	*vpm_next;	/* free list pointers */
208 	struct vpmap	*vpm_prev;
209 	u_offset_t	vpm_off;	/* offset of the page */
210 	page_t		*vpm_pp;	/* page pointer */
211 	ushort_t	vpm_refcnt;	/* Number active references */
212 	ushort_t	vpm_ndxflg;	/* indicates which queue */
213 	ushort_t	vpm_free_ndx;	/* freelist it belongs to */
214 };
215 
216 /*
217  * Multiple vpmap free lists are maintaned so that allocations
218  * scale with cpu count. To further reduce contentions between
219  * allocation and deallocations, each list is made up of two queues.
220  */
221 #define	VPM_FREEQ_PAD	64
222 union vpm_freeq {
223 	struct {
224 		struct vpmap	*vpmsq_free;
225 		kmutex_t	vpmsq_mtx;
226 	} vpmfq;
227 	char vpmq_pad[VPM_FREEQ_PAD];
228 };
229 
230 #define	vpmq_free	vpmfq.vpmsq_free
231 #define	vpmq_mtx	vpmfq.vpmsq_mtx
232 
233 struct vpmfree {
234 	union vpm_freeq vpm_freeq[2];	/* alloc and release queue */
235 	union vpm_freeq *vpm_allocq;	/* current alloc queue */
236 	union vpm_freeq *vpm_releq;	/* current release queue */
237 	kcondvar_t	vpm_free_cv;
238 	ushort_t	vpm_want;
239 };
240 
241 #define	VPMALLOCQ	0
242 #define	VPMRELEQ	1
243 
244 /*
245  * VPM Interface definitions.
246  */
247 
248 /*
249  * This structure is the scatter gather list element. The page
250  * mappings will be returned in this structure. A pointer to an
251  * array of this structure is passed to the interface routines.
252  */
253 typedef struct vmap {
254 	caddr_t	vs_addr;	/* mapped address */
255 	size_t	vs_len;		/* length, currently fixed at PAGESIZE */
256 	void	*vs_data;	/* opaque - private data */
257 } vmap_t;
258 
259 /*
260  * The minimum and maximum number of array elements in the scatter
261  * gather list.
262  */
263 #define	MINVMAPS   3		/* ((MAXBSIZE/4096 + 1)  min # mappings */
264 #define	MAXVMAPS   10		/* Max # the scatter gather list */
265 
266 #ifdef _KERNEL
267 
268 extern int	vpm_enable;
269 /*
270  * vpm page mapping operations.
271  */
272 extern void	vpm_init(void);
273 extern int	vpm_map_pages(struct vnode *, u_offset_t, size_t, int,
274 		vmap_t *, int, int  *, enum seg_rw);
275 
276 extern void	vpm_unmap_pages(vmap_t *, enum seg_rw);
277 extern int	vpm_sync_pages(struct vnode *, u_offset_t, size_t, uint_t);
278 extern int	vpm_data_copy(struct vnode *, u_offset_t, size_t,
279 		struct uio *, int, int *, int, enum seg_rw rw);
280 #endif	/* _KERNEL */
281 
282 #ifdef	__cplusplus
283 }
284 #endif
285 
286 #endif	/* _VM_VPM_H */
287