xref: /illumos-gate/usr/src/uts/common/vm/seg.h (revision 50f7888b60b9fee4c775b56966f02e23da2deef5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright (c) 2015, Joyent, Inc.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 #ifndef	_VM_SEG_H
41 #define	_VM_SEG_H
42 
43 #include <sys/vnode.h>
44 #include <sys/avl.h>
45 #include <vm/seg_enum.h>
46 #include <vm/faultcode.h>
47 #include <vm/hat.h>
48 
49 #ifdef	__cplusplus
50 extern "C" {
51 #endif
52 
53 /*
54  * VM - Segments.
55  */
56 
57 struct anon_map;
58 
59 /*
60  * kstat statistics for segment advise
61  */
62 typedef struct {
63 	kstat_named_t MADV_FREE_hit;
64 	kstat_named_t MADV_FREE_miss;
65 } segadvstat_t;
66 
67 /*
68  * memory object ids
69  */
70 typedef struct memid { u_longlong_t val[2]; } memid_t;
71 
72 /*
73  * An address space contains a set of segments, managed by drivers.
74  * Drivers support mapped devices, sharing, copy-on-write, etc.
75  *
76  * The seg structure contains a lock to prevent races, the base virtual
77  * address and size of the segment, a back pointer to the containing
78  * address space, pointers to maintain an AVL tree of segments in the
79  * same address space, and procedure and data hooks for the driver.
80  * The AVL tree of segments for the address space is sorted by
81  * ascending base addresses and overlapping segments are not allowed.
82  *
83  * After a segment is created, faults may occur on pages of the segment.
84  * When a fault occurs, the fault handling code must get the desired
85  * object and set up the hardware translation to the object.  For some
86  * objects, the fault handling code also implements copy-on-write.
87  *
88  * When the hat wants to unload a translation, it can call the unload
89  * routine which is responsible for processing reference and modify bits.
90  *
91  * Each segment is protected by it's containing address space lock.  To
92  * access any field in the segment structure, the "as" must be locked.
93  * If a segment field is to be modified, the address space lock must be
94  * write locked.
95  */
96 
97 typedef struct pcache_link {
98 	struct pcache_link	*p_lnext;
99 	struct pcache_link	*p_lprev;
100 } pcache_link_t;
101 
102 typedef struct seg {
103 	caddr_t	s_base;			/* base virtual address */
104 	size_t	s_size;			/* size in bytes */
105 	uint_t	s_szc;			/* max page size code */
106 	uint_t	s_flags;		/* flags for segment, see below */
107 	struct	as *s_as;		/* containing address space */
108 	avl_node_t s_tree;		/* AVL tree links to segs in this as */
109 	struct	seg_ops *s_ops;		/* ops vector: see below */
110 	void *s_data;			/* private data for instance */
111 	kmutex_t s_pmtx;		/* protects seg's pcache list */
112 	pcache_link_t s_phead;		/* head of seg's pcache list */
113 } seg_t;
114 
115 #define	S_PURGE		(0x01)		/* seg should be purged in as_gap() */
116 
117 struct	seg_ops {
118 	int	(*dup)(struct seg *, struct seg *);
119 	int	(*unmap)(struct seg *, caddr_t, size_t);
120 	void	(*free)(struct seg *);
121 	faultcode_t (*fault)(struct hat *, struct seg *, caddr_t, size_t,
122 	    enum fault_type, enum seg_rw);
123 	faultcode_t (*faulta)(struct seg *, caddr_t);
124 	int	(*setprot)(struct seg *, caddr_t, size_t, uint_t);
125 	int	(*checkprot)(struct seg *, caddr_t, size_t, uint_t);
126 	int	(*kluster)(struct seg *, caddr_t, ssize_t);
127 	size_t	(*swapout)(struct seg *);
128 	int	(*sync)(struct seg *, caddr_t, size_t, int, uint_t);
129 	size_t	(*incore)(struct seg *, caddr_t, size_t, char *);
130 	int	(*lockop)(struct seg *, caddr_t, size_t, int, int, ulong_t *,
131 			size_t);
132 	int	(*getprot)(struct seg *, caddr_t, size_t, uint_t *);
133 	u_offset_t	(*getoffset)(struct seg *, caddr_t);
134 	int	(*gettype)(struct seg *, caddr_t);
135 	int	(*getvp)(struct seg *, caddr_t, struct vnode **);
136 	int	(*advise)(struct seg *, caddr_t, size_t, uint_t);
137 	void	(*dump)(struct seg *);
138 	int	(*pagelock)(struct seg *, caddr_t, size_t, struct page ***,
139 			enum lock_type, enum seg_rw);
140 	int	(*setpagesize)(struct seg *, caddr_t, size_t, uint_t);
141 	int	(*getmemid)(struct seg *, caddr_t, memid_t *);
142 	struct lgrp_mem_policy_info	*(*getpolicy)(struct seg *, caddr_t);
143 	int	(*capable)(struct seg *, segcapability_t);
144 	int	(*inherit)(struct seg *, caddr_t, size_t, uint_t);
145 };
146 
147 #ifdef _KERNEL
148 
149 /*
150  * Generic segment operations
151  */
152 extern	void	seg_init(void);
153 extern	struct	seg *seg_alloc(struct as *as, caddr_t base, size_t size);
154 extern	int	seg_attach(struct as *as, caddr_t base, size_t size,
155 			struct seg *seg);
156 extern	void	seg_unmap(struct seg *seg);
157 extern	void	seg_free(struct seg *seg);
158 
159 /*
160  * functions for pagelock cache support
161  */
162 typedef	int (*seg_preclaim_cbfunc_t)(void *, caddr_t, size_t,
163     struct page **, enum seg_rw, int);
164 
165 extern	struct	page **seg_plookup(struct seg *seg, struct anon_map *amp,
166     caddr_t addr, size_t len, enum seg_rw rw, uint_t flags);
167 extern	void	seg_pinactive(struct seg *seg, struct anon_map *amp,
168     caddr_t addr, size_t len, struct page **pp, enum seg_rw rw,
169     uint_t flags, seg_preclaim_cbfunc_t callback);
170 
171 extern	void	seg_ppurge(struct seg *seg, struct anon_map *amp,
172     uint_t flags);
173 extern	void	seg_ppurge_wiredpp(struct page **pp);
174 
175 extern	int	seg_pinsert_check(struct seg *seg, struct anon_map *amp,
176     caddr_t addr, size_t len, uint_t flags);
177 extern	int	seg_pinsert(struct seg *seg, struct anon_map *amp,
178     caddr_t addr, size_t len, size_t wlen, struct page **pp, enum seg_rw rw,
179     uint_t flags, seg_preclaim_cbfunc_t callback);
180 
181 extern	void	seg_pasync_thread(void);
182 extern	void	seg_preap(void);
183 extern	int	seg_p_disable(void);
184 extern	void	seg_p_enable(void);
185 
186 extern	segadvstat_t	segadvstat;
187 
188 /*
189  * Flags for pagelock cache support.
190  * Flags argument is passed as uint_t to pcache routines.  upper 16 bits of
191  * the flags argument are reserved for alignment page shift when SEGP_PSHIFT
192  * is set.
193  */
194 #define	SEGP_FORCE_WIRED	0x1	/* skip check against seg_pwindow */
195 #define	SEGP_AMP		0x2	/* anon map's pcache entry */
196 #define	SEGP_PSHIFT		0x4	/* addr pgsz shift for hash function */
197 
198 /*
199  * Return values for seg_pinsert and seg_pinsert_check functions.
200  */
201 #define	SEGP_SUCCESS		0	/* seg_pinsert() succeeded */
202 #define	SEGP_FAIL		1	/* seg_pinsert() failed */
203 
204 /* Page status bits for segop_incore */
205 #define	SEG_PAGE_INCORE		0x01	/* VA has a page backing it */
206 #define	SEG_PAGE_LOCKED		0x02	/* VA has a page that is locked */
207 #define	SEG_PAGE_HASCOW		0x04	/* VA has a page with a copy-on-write */
208 #define	SEG_PAGE_SOFTLOCK	0x08	/* VA has a page with softlock held */
209 #define	SEG_PAGE_VNODEBACKED	0x10	/* Segment is backed by a vnode */
210 #define	SEG_PAGE_ANON		0x20	/* VA has an anonymous page */
211 #define	SEG_PAGE_VNODE		0x40	/* VA has a vnode page backing it */
212 
213 #define	SEGOP_DUP(s, n)		    (*(s)->s_ops->dup)((s), (n))
214 #define	SEGOP_UNMAP(s, a, l)	    (*(s)->s_ops->unmap)((s), (a), (l))
215 #define	SEGOP_FREE(s)		    (*(s)->s_ops->free)((s))
216 #define	SEGOP_FAULT(h, s, a, l, t, rw) \
217 		(*(s)->s_ops->fault)((h), (s), (a), (l), (t), (rw))
218 #define	SEGOP_FAULTA(s, a)	    (*(s)->s_ops->faulta)((s), (a))
219 #define	SEGOP_SETPROT(s, a, l, p)   (*(s)->s_ops->setprot)((s), (a), (l), (p))
220 #define	SEGOP_CHECKPROT(s, a, l, p) (*(s)->s_ops->checkprot)((s), (a), (l), (p))
221 #define	SEGOP_KLUSTER(s, a, d)	    (*(s)->s_ops->kluster)((s), (a), (d))
222 #define	SEGOP_SWAPOUT(s)	    (*(s)->s_ops->swapout)((s))
223 #define	SEGOP_SYNC(s, a, l, atr, f) \
224 		(*(s)->s_ops->sync)((s), (a), (l), (atr), (f))
225 #define	SEGOP_INCORE(s, a, l, v)    (*(s)->s_ops->incore)((s), (a), (l), (v))
226 #define	SEGOP_LOCKOP(s, a, l, atr, op, b, p) \
227 		(*(s)->s_ops->lockop)((s), (a), (l), (atr), (op), (b), (p))
228 #define	SEGOP_GETPROT(s, a, l, p)   (*(s)->s_ops->getprot)((s), (a), (l), (p))
229 #define	SEGOP_GETOFFSET(s, a)	    (*(s)->s_ops->getoffset)((s), (a))
230 #define	SEGOP_GETTYPE(s, a)	    (*(s)->s_ops->gettype)((s), (a))
231 #define	SEGOP_GETVP(s, a, vpp)	    (*(s)->s_ops->getvp)((s), (a), (vpp))
232 #define	SEGOP_ADVISE(s, a, l, b)    (*(s)->s_ops->advise)((s), (a), (l), (b))
233 #define	SEGOP_DUMP(s)		    (*(s)->s_ops->dump)((s))
234 #define	SEGOP_PAGELOCK(s, a, l, p, t, rw) \
235 		(*(s)->s_ops->pagelock)((s), (a), (l), (p), (t), (rw))
236 #define	SEGOP_SETPAGESIZE(s, a, l, szc) \
237 		(*(s)->s_ops->setpagesize)((s), (a), (l), (szc))
238 #define	SEGOP_GETMEMID(s, a, mp)    (*(s)->s_ops->getmemid)((s), (a), (mp))
239 #define	SEGOP_GETPOLICY(s, a)	    (*(s)->s_ops->getpolicy)((s), (a))
240 #define	SEGOP_CAPABLE(s, c)	    (*(s)->s_ops->capable)((s), (c))
241 #define	SEGOP_INHERIT(s, a, l, b)   (*(s)->s_ops->inherit)((s), (a), (l), (b))
242 
243 #define	seg_page(seg, addr) \
244 	(((uintptr_t)((addr) - (seg)->s_base)) >> PAGESHIFT)
245 
246 #define	seg_pages(seg) \
247 	(((uintptr_t)((seg)->s_size + PAGEOFFSET)) >> PAGESHIFT)
248 
249 #define	IE_NOMEM	-1	/* internal to seg layer */
250 #define	IE_RETRY	-2	/* internal to seg layer */
251 #define	IE_REATTACH	-3	/* internal to seg layer */
252 
253 /* Values for SEGOP_INHERIT */
254 #define	SEGP_INH_ZERO	0x01
255 
256 int seg_inherit_notsup(struct seg *, caddr_t, size_t, uint_t);
257 
258 /* Delay/retry factors for seg_p_mem_config_pre_del */
259 #define	SEGP_PREDEL_DELAY_FACTOR	4
260 /*
261  * As a workaround to being unable to purge the pagelock
262  * cache during a DR delete memory operation, we use
263  * a stall threshold that is twice the maximum seen
264  * during testing.  This workaround will be removed
265  * when a suitable fix is found.
266  */
267 #define	SEGP_STALL_SECONDS	25
268 #define	SEGP_STALL_THRESHOLD \
269 	(SEGP_STALL_SECONDS * SEGP_PREDEL_DELAY_FACTOR)
270 
271 #ifdef VMDEBUG
272 
273 uint_t	seg_page(struct seg *, caddr_t);
274 uint_t	seg_pages(struct seg *);
275 
276 #endif	/* VMDEBUG */
277 
278 boolean_t	seg_can_change_zones(struct seg *);
279 size_t		seg_swresv(struct seg *);
280 
281 #endif	/* _KERNEL */
282 
283 #ifdef	__cplusplus
284 }
285 #endif
286 
287 #endif	/* _VM_SEG_H */
288