xref: /illumos-gate/usr/src/uts/common/vm/seg_map.h (revision 67d74cc3e7c9d9461311136a0b2069813a3fd927)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 
39 #ifndef	_VM_SEG_MAP_H
40 #define	_VM_SEG_MAP_H
41 
42 #ifdef	__cplusplus
43 extern "C" {
44 #endif
45 
46 /*
47  * When segmap is created it is possible to program its behavior,
48  *	using the create args [needed for performance reasons].
49  * Segmap creates n lists of pages.
50  *	For VAC machines, there will be at least one free list
51  *	per color. If more than one free list per color is needed,
52  *	set nfreelist as needed.
53  *
54  *	For PAC machines, it will be treated as VAC with only one
55  *	color- every page is of the same color. Again, set nfreelist
56  *	to get more than one free list.
57  */
58 struct segmap_crargs {
59 	uint_t	prot;
60 	uint_t	shmsize;	/* shm_alignment for VAC, 0 for PAC. */
61 	uint_t	nfreelist;	/* number of freelist per color, >= 1 */
62 };
63 
64 #include <vm/kpm.h>
65 #include <vm/vpm.h>
66 
67 /*
68  * Each smap struct represents a MAXBSIZE sized mapping to the
69  * <sm_vp, sm_off> given in the structure.  The location of the
70  * the structure in the array gives the virtual address of the
71  * mapping. Structure rearranged for 64bit sm_off.
72  */
73 struct	smap {
74 	kmutex_t	sm_mtx;		/* protect non-list fields */
75 	struct	vnode	*sm_vp;		/* vnode pointer (if mapped) */
76 	struct	smap	*sm_hash;	/* hash pointer */
77 	struct	smap	*sm_next;	/* next pointer */
78 	struct	smap	*sm_prev;	/* previous pointer */
79 	u_offset_t	sm_off;		/* file offset for mapping */
80 	ushort_t	sm_bitmap;	/* bit map for locked translations */
81 	ushort_t	sm_refcnt;	/* reference count for uses */
82 	ushort_t	sm_flags;	/* smap flags */
83 	ushort_t	sm_free_ndx;	/* freelist */
84 #ifdef	SEGKPM_SUPPORT
85 	struct kpme	sm_kpme;	/* segkpm */
86 #endif
87 };
88 
89 #ifdef	SEGKPM_SUPPORT
90 #define	GET_KPME(smp)	(&(smp)->sm_kpme)
91 #define	sm_kpme_next	sm_kpme.kpe_next
92 #define	sm_kpme_prev	sm_kpme.kpe_prev
93 #define	sm_kpme_page	sm_kpme.kpe_page
94 #else
95 #define	GET_KPME(smp)	((struct kpme *)NULL)
96 #endif
97 
98 /* sm_flags */
99 #define	SM_KPM_NEWPAGE	   0x00000001	/* page created in segmap_getmapft */
100 #define	SM_NOTKPM_RELEASED 0x00000002	/* released smap not in segkpm mode */
101 #define	SM_QNDX_ZERO	   0x00000004	/* on the index 0 freelist */
102 #define	SM_READ_DATA	   0x00000010	/* page created for read */
103 #define	SM_WRITE_DATA	   0x00000020	/* page created for write */
104 
105 /*
106  * Multiple smap free lists are maintained so that allocations
107  * will scale with cpu count. Each free list is made up of 2 queues
108  * so that allocations and deallocations can proceed concurrently.
109  * Each queue structure is padded to 64 bytes to avoid false sharing.
110  */
111 #define	SM_FREEQ_PAD (64 - sizeof (struct smap *) - sizeof (kmutex_t))
112 struct 	sm_freeq {
113 	struct smap	*smq_free;	/* points into freelist */
114 	kmutex_t	smq_mtx;	/* protects smq_free */
115 	char		smq_pad[SM_FREEQ_PAD];
116 };
117 
118 struct	smfree {
119 	struct sm_freeq	sm_freeq[2];	/* alloc and release queues */
120 	struct sm_freeq	*sm_allocq;	/* current allocq */
121 	struct sm_freeq	*sm_releq;	/* current releq */
122 	kcondvar_t	sm_free_cv;
123 	ushort_t	sm_want;	/* someone wants a slot of this color */
124 };
125 
126 /*
127  * Cached smaps are kept on hash chains to enable fast reclaim lookups.
128  */
129 struct  smaphash {
130 	kmutex_t	sh_mtx;		/* protects this hash chain */
131 	struct  smap	*sh_hash_list;  /* start of hash chain */
132 };
133 
134 /*
135  * (Semi) private data maintained by the segmap driver per SEGMENT mapping
136  * All fields in segmap_data are read-only after the segment is created.
137  *
138  */
139 
140 struct	segmap_data {
141 	struct	smap	*smd_sm;	/* array of smap structures */
142 	long		smd_npages;	/* size of smap array */
143 	struct smfree	*smd_free;	/* ptr to freelist header array */
144 	struct smaphash *smd_hash;	/* ptr to hash header array */
145 	int		smd_nfree;	/* number of free lists */
146 	uchar_t		smd_prot;	/* protections for all smap's */
147 };
148 
149 /*
150  * Statistics for segmap operations.
151  *
152  * No explicit locking to protect these stats.
153  */
154 struct segmapcnt {
155 	kstat_named_t	smp_fault;	/* number of segmap_faults */
156 	kstat_named_t	smp_faulta;	/* number of segmap_faultas */
157 	kstat_named_t	smp_getmap;	/* number of segmap_getmaps */
158 	kstat_named_t	smp_get_use;	/* getmaps that reuse existing map */
159 	kstat_named_t	smp_get_reclaim; /* getmaps that do a reclaim */
160 	kstat_named_t	smp_get_reuse;	/* getmaps that reuse a slot */
161 	kstat_named_t	smp_get_unused;	/* getmaps that reuse existing map */
162 	kstat_named_t	smp_get_nofree;	/* getmaps with no free slots */
163 	kstat_named_t	smp_rel_async;	/* releases that are async */
164 	kstat_named_t	smp_rel_write;	/* releases that write */
165 	kstat_named_t	smp_rel_free;	/* releases that free */
166 	kstat_named_t	smp_rel_abort;	/* releases that abort */
167 	kstat_named_t	smp_rel_dontneed; /* releases with dontneed set */
168 	kstat_named_t	smp_release;	/* releases with no other action */
169 	kstat_named_t	smp_pagecreate;	/* pagecreates */
170 	kstat_named_t   smp_free_notfree; /* pages not freed in */
171 					/* segmap_pagefree */
172 	kstat_named_t   smp_free_dirty; /* dirty pages freeed */
173 					/* in segmap_pagefree */
174 	kstat_named_t   smp_free;	/* clean pages freeed in */
175 					/* segmap_pagefree */
176 	kstat_named_t	smp_stolen;	/* segmap_getmapflt() stole */
177 					/* from get_free_smp() */
178 	kstat_named_t	smp_get_nomtx;	/* free smaps but no mutex */
179 };
180 
181 /*
182  * These are flags used on release.  Some of these might get handled
183  * by segment operations needed for msync (when we figure them out).
184  * SM_ASYNC modifies SM_WRITE.  SM_DONTNEED modifies SM_FREE.  SM_FREE
185  * and SM_INVAL as well as SM_FREE and SM_DESTROY are mutually exclusive.
186  * SM_DESTROY behaves like SM_INVAL but also forces the pages to be
187  * destroyed -- this prevents them from being written to the backing
188  * store.
189  */
190 #define	SM_WRITE	0x01		/* write back the pages upon release */
191 #define	SM_ASYNC	0x02		/* do the write asynchronously */
192 #define	SM_FREE		0x04		/* put pages back on free list */
193 #define	SM_INVAL	0x08		/* invalidate page (no caching) */
194 #define	SM_DONTNEED	0x10		/* less likely to be needed soon */
195 #define	SM_DESTROY	0x20		/* invalidate page, don't write back */
196 
197 /*
198  * These are the forcefault flags used on getmapflt.
199  *
200  * The orginal semantic was extended to allow using the segkpm mapping
201  * scheme w/o a major segmap interface change for MAXBSIZE == PAGESIZE
202  * (which is required to enable segkpm for MAXBSIZE > PAGESIZE).
203  * Most segmap consumers needn't to be changed at all or only need to
204  * be changed slightly to take advantage of segkpm. Because the segkpm
205  * virtual address is based on the physical address of a page, a page is
206  * required to determine the virtual address (return value). Pages mapped
207  * with segkpm are always at least read locked and are hence protected
208  * from pageout or fsflush from segmap_getmap until segmap_release. This
209  * implies, that the segkpm mappings are locked within this period too.
210  * No trap driven segmap_fault's are possible in segkpm mode.
211  *
212  * The following combinations of "forcefault" and "rw" allow segkpm mode.
213  * (1) SM_FAULT, S_READ
214  * (2) SM_FAULT, S_WRITE
215  * (3) SM_PAGECREATE, S_WRITE
216  * (4) SM_LOCKPROTO, {S_READ, S_WRITE, S_OTHER}
217  *
218  * The regular additional operations (come in pairs in most of the cases):
219  * . segmap_pagecreate/segmap_pageunlock
220  * . segmap_fault(F_SOFTLOCK)/segmap_fault(F_SOFTUNLOCK)
221  *
222  * are mostly a no-op in segkpm mode with the following exceptions:
223  * . The "newpage" return value of segmap_pagecreate is still supported
224  *   for zeroout operations needed on newly created pages.
225  *
226  * . segmap_fault() must follow when a error could be expected in
227  *   the VOP_GETPAGE. In segkpm mode this error is recognized in
228  *   segmap_getmapflt and returned from the following segmap_fault()
229  *   call. The "hole" optimization (read only after first VOP_GETPAGE
230  *   mapping in segmap_getmapflt followed by a trap driven protection
231  *   fault and a second VOP_GETPAGE via segmap_fault) cannot be used.
232  *
233  * . segmap_fault(F_SOFTUNLOCK) must follow when segmap_getmapflt was
234  *   called w/ (SM_LOCKPROTO, S_OTHER). S_WRITE has to be applied, when
235  *   the page should be marked "dirty". Otherwise the page is not
236  *   written to the backing store later (as mentioned above, no page
237  *   or protection faults are possible in segkpm mode). Caller cannot
238  *   use only S_OTHER and rely on a protection fault to force the page
239  *   to become dirty.
240  *
241  * . The segmap_pagecreate parameter softlock is ignored, pages and
242  *   mappings are locked anyway.
243  *
244  * SM_LOCKPROTO is used in the fbio layer and some special segmap consumers.
245  */
246 #define	SM_PAGECREATE	0x00		/* create page in segkpm mode, no I/O */
247 #define	SM_FAULT	0x01		/* fault in page if necessary */
248 #define	SM_LOCKPROTO	0x02		/* lock/unlock protocol used */
249 
250 #define	MAXBSHIFT	13		/* log2(MAXBSIZE) */
251 
252 #define	MAXBOFFSET	(MAXBSIZE - 1)
253 #define	MAXBMASK	(~MAXBOFFSET)
254 
255 /*
256  * SMAP_HASHAVELEN is the average length desired for this chain, from
257  * which the size of the smd_hash table is derived at segment create time.
258  * SMAP_HASHVPSHIFT is defined so that 1 << SMAP_HASHVPSHIFT is the
259  * approximate size of a vnode struct.
260  */
261 #define	SMAP_HASHAVELEN		4
262 #define	SMAP_HASHVPSHIFT	6
263 
264 
265 #ifdef _KERNEL
266 /*
267  * The kernel generic mapping segment.
268  */
269 extern struct seg *segkmap;
270 
271 /*
272  * Public seg_map segment operations.
273  */
274 extern int	segmap_create(struct seg *, void *);
275 extern int	segmap_pagecreate(struct seg *, caddr_t, size_t, int);
276 extern void	segmap_pageunlock(struct seg *, caddr_t, size_t, enum seg_rw);
277 extern faultcode_t segmap_fault(struct hat *, struct seg *, caddr_t, size_t,
278 		enum fault_type, enum seg_rw);
279 extern caddr_t	segmap_getmap(struct seg *, struct vnode *, u_offset_t);
280 extern caddr_t	segmap_getmapflt(struct seg *, struct vnode *, u_offset_t,
281 		size_t, int, enum seg_rw);
282 extern int	segmap_release(struct seg *, caddr_t, uint_t);
283 extern void	segmap_flush(struct seg *, struct vnode *);
284 extern void	segmap_inval(struct seg *, struct vnode *, u_offset_t);
285 
286 #endif	/* _KERNEL */
287 
288 #ifdef	__cplusplus
289 }
290 #endif
291 
292 #endif	/* _VM_SEG_MAP_H */
293