xref: /titanic_53/usr/src/uts/common/vm/vpm.h (revision a5652762e5f7bf683d19f18542e5e39df63bad79)
1*a5652762Spraks /*
2*a5652762Spraks  * CDDL HEADER START
3*a5652762Spraks  *
4*a5652762Spraks  * The contents of this file are subject to the terms of the
5*a5652762Spraks  * Common Development and Distribution License (the "License").
6*a5652762Spraks  * You may not use this file except in compliance with the License.
7*a5652762Spraks  *
8*a5652762Spraks  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*a5652762Spraks  * or http://www.opensolaris.org/os/licensing.
10*a5652762Spraks  * See the License for the specific language governing permissions
11*a5652762Spraks  * and limitations under the License.
12*a5652762Spraks  *
13*a5652762Spraks  * When distributing Covered Code, include this CDDL HEADER in each
14*a5652762Spraks  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*a5652762Spraks  * If applicable, add the following below this CDDL HEADER, with the
16*a5652762Spraks  * fields enclosed by brackets "[]" replaced with your own identifying
17*a5652762Spraks  * information: Portions Copyright [yyyy] [name of copyright owner]
18*a5652762Spraks  *
19*a5652762Spraks  * CDDL HEADER END
20*a5652762Spraks  */
21*a5652762Spraks /*
22*a5652762Spraks  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23*a5652762Spraks  * Use is subject to license terms.
24*a5652762Spraks  */
25*a5652762Spraks 
26*a5652762Spraks #ifndef	_VM_VPM_H
27*a5652762Spraks #define	_VM_VPM_H
28*a5652762Spraks 
29*a5652762Spraks #pragma ident	"%Z%%M%	%I%	%E% SMI"
30*a5652762Spraks 
31*a5652762Spraks #ifdef	__cplusplus
32*a5652762Spraks extern "C" {
33*a5652762Spraks #endif
34*a5652762Spraks 
35*a5652762Spraks /*
36*a5652762Spraks  * The vnode page mappings(VPM) interfaces.
37*a5652762Spraks  * "Commitment level - Consolidation private". They are subject
38*a5652762Spraks  * to change without notice. Use them at your own risk.
39*a5652762Spraks  *
40*a5652762Spraks  * At this stage these interfaces are provided only to utilize the
41*a5652762Spraks  * segkpm mappings and are enabled for solaris x64. Therefore these
42*a5652762Spraks  * interfaces have to be used under the 'vpm_enable' check as an
43*a5652762Spraks  * alternative to segmap interfaces where applicable.
44*a5652762Spraks  *
45*a5652762Spraks  * The VPM interfaces provide temporary mappings to file pages. They
46*a5652762Spraks  * return the mappings in a scatter gather list(SGL).
47*a5652762Spraks  * The SGL elements are the structure 'vmap_t'.
48*a5652762Spraks  *
49*a5652762Spraks  *	typedef struct vmap {
50*a5652762Spraks  *		caddr_t	vs_addr;        / public /
51*a5652762Spraks  *		size_t	vs_len;         / public - Currently not used /
52*a5652762Spraks  *		void	*vs_data;	/ opaque - private data /
53*a5652762Spraks  *	} vmap_t;
54*a5652762Spraks  *
55*a5652762Spraks  * An array of this structure has to be passed to the interface routines
56*a5652762Spraks  * along with the size(# of elements) of the SGL array. Depending on the
57*a5652762Spraks  * requested length and mapped chunk sizes(PAGESIZE here), the number of
58*a5652762Spraks  * valid mappings returned can be less then actual size of the SGL array.
59*a5652762Spraks  * Always, an element in the SGL will have 'vs_addr' set to NULL which
60*a5652762Spraks  * marks the end of the valid entires in the SGL.
61*a5652762Spraks  *
62*a5652762Spraks  * The vmap_t structure members are populated with the mapped address
63*a5652762Spraks  * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the
64*a5652762Spraks  * mapping length is fixed at PAGESIZE. The 'vs_data' member is private
65*a5652762Spraks  * and the caller should not access or modify it.
66*a5652762Spraks  *
67*a5652762Spraks  * Using a scatter gather list to return the mappings and length makes it
68*a5652762Spraks  * possible to provide mappings of variable length. Currently mapping length
69*a5652762Spraks  * of only 'PAGESIZE' per vmap_t is possible. Also, similar to the segmap
70*a5652762Spraks  * interfaces, on each request, the max length of 'MAXBSIZE' is supported
71*a5652762Spraks  * for now. The MAXBSIZE mappings will be returned in 1 or 2 vmap_t elements
72*a5652762Spraks  * of the SGL depending on the PAGESIZE. The scatter gather list array size
73*a5652762Spraks  * needs to be a minimum of MINVMAPS elements to accommodate MAXBSIZE.
74*a5652762Spraks  * The MAXBSIZE restriction exists because the filesystems are not capable
75*a5652762Spraks  * of handling more(disk block allocations at a time) for now.
76*a5652762Spraks  *
77*a5652762Spraks  *
78*a5652762Spraks  * Interfaces:
79*a5652762Spraks  *
80*a5652762Spraks  * int vpm_map_pages( struct vnode *vp, u_offset_t off, size_t len,
81*a5652762Spraks  *			int fetchpage, vmap_t *vml, int vmlsz,
82*a5652762Spraks  *			int *newpagecreated, enum seg_rw rw);
83*a5652762Spraks  *
84*a5652762Spraks  * This function returns mappings to vnode pages.
85*a5652762Spraks  *
86*a5652762Spraks  * It takes a vnode, offset and length and returns mappings to the  pages
87*a5652762Spraks  * covering the range [off, off +len) in the vmap_t SGL array 'vml'.
88*a5652762Spraks  * Currently these interfaces are subject to restrictions similar to the segmap
89*a5652762Spraks  * interfaces. The length passed in should satisfy the following criteria.
90*a5652762Spraks  * '(off + len)  <= ((off & PAGEMASK) + MAXBSIZE)'
91*a5652762Spraks  * The mapped address returned, in 'vs_addr', are for the page boundary.
92*a5652762Spraks  *
93*a5652762Spraks  * The 'vmlsz' is the size(# elements) of the 'vml' array.
94*a5652762Spraks  *
95*a5652762Spraks  * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched
96*a5652762Spraks  * (calls VOP_GETPAGE) from the backing store(disk) if not found in the
97*a5652762Spraks  * system page cache. If 'fetchpage == 0', the vnode(file) pages for the
98*a5652762Spraks  * given offset will be just created if they are not already present in the
99*a5652762Spraks  * system page cache. The 'newpagecreated' flag is set on return if new pages
100*a5652762Spraks  * are created when 'fetchpage == 0'(requested to just create new pages).
101*a5652762Spraks  *
102*a5652762Spraks  * The 'seg_rw rw' indicates the intended operation on these mappings
103*a5652762Spraks  * (S_WRITE or S_READ).
104*a5652762Spraks  *
105*a5652762Spraks  * Currently these interfaces only return segkpm mappings. Therefore the
106*a5652762Spraks  * vnode pages that are being accessed will be locked(at least SHARED locked)
107*a5652762Spraks  * for the duration these mappings are in use. After use, the  unmap
108*a5652762Spraks  * function, vpm_unmap_pages(), has to be called and the same SGL array
109*a5652762Spraks  * needs to be passed to the unmap function.
110*a5652762Spraks  *
111*a5652762Spraks  *
112*a5652762Spraks  * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);.
113*a5652762Spraks  *
114*a5652762Spraks  * This function unmaps the pages that where mapped by vpm_map_pages.
115*a5652762Spraks  * The SGL array 'vml' has to be the one that was passed to vpm_map_pages().
116*a5652762Spraks  *
117*a5652762Spraks  *
118*a5652762Spraks  * ex:
119*a5652762Spraks  * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer
120*a5652762Spraks  * 'buf' the following code snippet shows how to use the above two interfaces.
121*a5652762Spraks  * Here the the copy length is till the MAXBSIZE boundary. This code can be
122*a5652762Spraks  * executed repeatedly, in a loop to copy more then MAXBSIZE length of data.
123*a5652762Spraks  *
124*a5652762Spraks  *	vmap_t  vml[MINVMAPS];
125*a5652762Spraks  *	int err, i, newpage, len;
126*a5652762Spraks  *	int pon;
127*a5652762Spraks  *
128*a5652762Spraks  *	pon = (off & PAGEOFFSET);
129*a5652762Spraks  *	len = MAXBSIZE - pon;
130*a5652762Spraks  *
131*a5652762Spraks  *	if (vpm_enable) {
132*a5652762Spraks  *             err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS,
133*a5652762Spraks  *				 &newpage, S_WRITE);
134*a5652762Spraks  *
135*a5652762Spraks  *		if (err)
136*a5652762Spraks  *			return;
137*a5652762Spraks  *
138*a5652762Spraks  *		for (i=0; vml[i].vs_addr != NULL); i++) {
139*a5652762Spraks  *			bcopy (buf, vml[i].vs_addr + pon,
140*a5652762Spraks  *				 PAGESIZE - pon);
141*a5652762Spraks  *			buf += (PAGESIZE - pon);
142*a5652762Spraks  *			pon = 0;
143*a5652762Spraks  *		}
144*a5652762Spraks  *
145*a5652762Spraks  *		if (newpage) {
146*a5652762Spraks  *			pon = (off & PAGEOFFSET);
147*a5652762Spraks  *			bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon);
148*a5652762Spraks  *		}
149*a5652762Spraks  *
150*a5652762Spraks  *		vpm_unmap_pages(vml, S_WRITE);
151*a5652762Spraks  *	}
152*a5652762Spraks  *
153*a5652762Spraks  *
154*a5652762Spraks  *
155*a5652762Spraks  *
156*a5652762Spraks  * int vpm_data_copy(struct vnode *vp, u_offset_t off, size_t len,
157*a5652762Spraks  *		struct uio *uio, int fetchpage, int *newpagecreated,
158*a5652762Spraks  *		int zerostart, enum seg_rw rw);
159*a5652762Spraks  *
160*a5652762Spraks  * This function can be called if the need is to just transfer data to/from
161*a5652762Spraks  * the vnode pages. It takes a 'uio' structure and  calls 'uiomove()' to
162*a5652762Spraks  * do the data transfer. It can be used in the context of read and write
163*a5652762Spraks  * system calls to transfer data between a user buffer, which is specified
164*a5652762Spraks  * in the uio structure, and the vnode pages. If the data needs to be
165*a5652762Spraks  * transferred between a kernel buffer and the pages, like in the above
166*a5652762Spraks  * example, a uio structure can be set up accordingly and passed. The 'rw'
167*a5652762Spraks  * parameter will determine the direction of the data transfer.
168*a5652762Spraks  *
169*a5652762Spraks  * The 'fetchpage' and 'newpagecreated' are same as explained before.
170*a5652762Spraks  * The 'zerostart' flag when set will zero fill start of the page till the
171*a5652762Spraks  * offset 'off' in the first page. i.e  from 'off & PAGEMASK' to 'off'.
172*a5652762Spraks  * Here too the MAXBSIZE restriction mentioned above applies to the length
173*a5652762Spraks  * requested.
174*a5652762Spraks  *
175*a5652762Spraks  *
176*a5652762Spraks  * int vpm_sync_pages(struct vnode *vp, u_offset_t off,
177*a5652762Spraks  *					 size_t len, uint_t flags)
178*a5652762Spraks  *
179*a5652762Spraks  * This function can be called to flush or sync the vnode(file) pages that
180*a5652762Spraks  * have been accessed. It will call VOP_PUTPAGE().
181*a5652762Spraks  *
182*a5652762Spraks  * For the given vnode, off and len the pages covering the range
183*a5652762Spraks  * [off, off + len) are flushed. Currently it uses the same flags that
184*a5652762Spraks  * are used with segmap_release() interface. Refer vm/seg_map.h.
185*a5652762Spraks  * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY)
186*a5652762Spraks  *
187*a5652762Spraks  */
188*a5652762Spraks 
189*a5652762Spraks 
190*a5652762Spraks /*
191*a5652762Spraks  * vpm cache related definitions.
192*a5652762Spraks  */
193*a5652762Spraks #define	VPMAP_MINCACHE		(64 * 1024 * 1024)
194*a5652762Spraks 
195*a5652762Spraks /*
196*a5652762Spraks  * vpm caching mode
197*a5652762Spraks  */
198*a5652762Spraks #define	VPMCACHE_LRU		0
199*a5652762Spraks #define	VPMCACHE_RANDOM		1
200*a5652762Spraks /*
201*a5652762Spraks  * Data structures to manage the cache of pages referenced by
202*a5652762Spraks  * the vpm interfaces. There is one vpmap struct per page in the cache.
203*a5652762Spraks  */
204*a5652762Spraks struct vpmap {
205*a5652762Spraks 	kmutex_t	vpm_mtx;	/* protects non list fields */
206*a5652762Spraks 	struct vnode	*vpm_vp;	/* pointer to vnode of cached page */
207*a5652762Spraks 	struct vpmap	*vpm_next;	/* free list pointers */
208*a5652762Spraks 	struct vpmap	*vpm_prev;
209*a5652762Spraks 	u_offset_t	vpm_off;	/* offset of the page */
210*a5652762Spraks 	page_t		*vpm_pp;	/* page pointer */
211*a5652762Spraks 	ushort_t	vpm_refcnt;	/* Number active references */
212*a5652762Spraks 	ushort_t	vpm_ndxflg;	/* indicates which queue */
213*a5652762Spraks 	ushort_t	vpm_free_ndx;	/* freelist it belongs to */
214*a5652762Spraks };
215*a5652762Spraks 
216*a5652762Spraks /*
217*a5652762Spraks  * Multiple vpmap free lists are maintaned so that allocations
218*a5652762Spraks  * scale with cpu count. To further reduce contentions between
219*a5652762Spraks  * allocation and deallocations, each list is made up of two queues.
220*a5652762Spraks  */
221*a5652762Spraks #define	VPM_FREEQ_PAD	64
222*a5652762Spraks union vpm_freeq {
223*a5652762Spraks 	struct {
224*a5652762Spraks 		struct vpmap	*vpmsq_free;
225*a5652762Spraks 		kmutex_t	vpmsq_mtx;
226*a5652762Spraks 	} vpmfq;
227*a5652762Spraks 	char vpmq_pad[VPM_FREEQ_PAD];
228*a5652762Spraks };
229*a5652762Spraks 
230*a5652762Spraks #define	vpmq_free	vpmfq.vpmsq_free
231*a5652762Spraks #define	vpmq_mtx	vpmfq.vpmsq_mtx
232*a5652762Spraks 
233*a5652762Spraks struct vpmfree {
234*a5652762Spraks 	union vpm_freeq vpm_freeq[2];	/* alloc and release queue */
235*a5652762Spraks 	union vpm_freeq *vpm_allocq;	/* current alloc queue */
236*a5652762Spraks 	union vpm_freeq *vpm_releq;	/* current release queue */
237*a5652762Spraks 	kcondvar_t	vpm_free_cv;
238*a5652762Spraks 	ushort_t	vpm_want;
239*a5652762Spraks };
240*a5652762Spraks 
241*a5652762Spraks #define	VPMALLOCQ	0
242*a5652762Spraks #define	VPMRELEQ	1
243*a5652762Spraks 
244*a5652762Spraks /*
245*a5652762Spraks  * VPM Interface definitions.
246*a5652762Spraks  */
247*a5652762Spraks 
248*a5652762Spraks /*
249*a5652762Spraks  * This structure is the scatter gather list element. The page
250*a5652762Spraks  * mappings will be returned in this structure. A pointer to an
251*a5652762Spraks  * array of this structure is passed to the interface routines.
252*a5652762Spraks  */
253*a5652762Spraks typedef struct vmap {
254*a5652762Spraks 	caddr_t	vs_addr;	/* mapped address */
255*a5652762Spraks 	size_t	vs_len;		/* length, currently fixed at PAGESIZE */
256*a5652762Spraks 	void	*vs_data;	/* opaque - private data */
257*a5652762Spraks } vmap_t;
258*a5652762Spraks 
259*a5652762Spraks /*
260*a5652762Spraks  * The minimum and maximum number of array elements in the scatter
261*a5652762Spraks  * gather list.
262*a5652762Spraks  */
263*a5652762Spraks #define	MINVMAPS   3		/* ((MAXBSIZE/4096 + 1)  min # mappings */
264*a5652762Spraks #define	MAXVMAPS   10		/* Max # the scatter gather list */
265*a5652762Spraks 
266*a5652762Spraks #ifdef _KERNEL
267*a5652762Spraks 
268*a5652762Spraks extern int	vpm_enable;
269*a5652762Spraks /*
270*a5652762Spraks  * vpm page mapping operations.
271*a5652762Spraks  */
272*a5652762Spraks extern void	vpm_init(void);
273*a5652762Spraks extern int	vpm_map_pages(struct vnode *, u_offset_t, size_t, int,
274*a5652762Spraks 		vmap_t *, int, int  *, enum seg_rw);
275*a5652762Spraks 
276*a5652762Spraks extern void	vpm_unmap_pages(vmap_t *, enum seg_rw);
277*a5652762Spraks extern int	vpm_sync_pages(struct vnode *, u_offset_t, size_t, uint_t);
278*a5652762Spraks extern int	vpm_data_copy(struct vnode *, u_offset_t, size_t,
279*a5652762Spraks 		struct uio *, int, int *, int, enum seg_rw rw);
280*a5652762Spraks #endif	/* _KERNEL */
281*a5652762Spraks 
282*a5652762Spraks #ifdef	__cplusplus
283*a5652762Spraks }
284*a5652762Spraks #endif
285*a5652762Spraks 
286*a5652762Spraks #endif	/* _VM_VPM_H */
287