xref: /titanic_51/usr/src/uts/common/vm/anon.h (revision ac4d633f367252125bb35e97c5725d2aa68c1291)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /*	 All Rights Reserved   */
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 
39 #ifndef	_VM_ANON_H
40 #define	_VM_ANON_H
41 
42 #pragma ident	"%Z%%M%	%I%	%E% SMI"
43 
44 #include <sys/cred.h>
45 #include <vm/seg.h>
46 #include <vm/vpage.h>
47 
48 #ifdef	__cplusplus
49 extern "C" {
50 #endif
51 
52 /*
53  * VM - Anonymous pages.
54  */
55 
56 typedef	unsigned long anoff_t;		/* anon offsets */
57 
58 /*
59  *	Each anonymous page, either in memory or in swap, has an anon structure.
60  * The structure (slot) provides a level of indirection between anonymous pages
61  * and their backing store.
62  *
63  *	(an_vp, an_off) names the vnode of the anonymous page for this slot.
64  *
65  * 	(an_pvp, an_poff) names the location of the physical backing store
66  * 	for the page this slot represents. If the name is null there is no
67  * 	associated physical store. The physical backing store location can
68  *	change while the slot is in use.
69  *
70  *	an_hash is a hash list of anon slots. The list is hashed by
71  * 	(an_vp, an_off) of the associated anonymous page and provides a
72  *	method of going from the name of an anonymous page to its
73  * 	associated anon slot.
74  *
75  *	an_refcnt holds a reference count which is the number of separate
76  * 	copies that will need to be created in case of copy-on-write.
77  *	A refcnt > 0 protects the existence of the slot. The refcnt is
78  * 	initialized to 1 when the anon slot is created in anon_alloc().
79  *	If a client obtains an anon slot and allows multiple threads to
80  * 	share it, then it is the client's responsibility to insure that
81  *	it does not allow one thread to try to reference the slot at the
82  *	same time as another is trying to decrement the last count and
83  *	destroy the anon slot. E.g., the seg_vn segment type protects
84  *	against this with higher level locks.
85  */
86 
87 struct anon {
88 	struct vnode *an_vp;	/* vnode of anon page */
89 	struct vnode *an_pvp;	/* vnode of physical backing store */
90 	anoff_t an_off;		/* offset of anon page */
91 	anoff_t an_poff;	/* offset in vnode */
92 	struct anon *an_hash;	/* hash table of anon slots */
93 	int an_refcnt;		/* # of people sharing slot */
94 };
95 
96 #ifdef _KERNEL
97 /*
98  * The swapinfo_lock protects:
99  *		swapinfo list
100  *		individual swapinfo structures
101  *
102  * The anoninfo_lock protects:
103  *		anoninfo counters
104  *
105  * The anonhash_lock protects:
106  *		anon hash lists
107  *		anon slot fields
108  *
109  * Fields in the anon slot which are read-only for the life of the slot
110  * (an_vp, an_off) do not require the anonhash_lock be held to access them.
111  * If you access a field without the anonhash_lock held you must be holding
112  * the slot with an_refcnt to make sure it isn't destroyed.
113  * To write (an_pvp, an_poff) in a given slot you must also hold the
114  * p_iolock of the anonymous page for slot.
115  */
116 extern kmutex_t anoninfo_lock;
117 extern kmutex_t swapinfo_lock;
118 extern kmutex_t anonhash_lock[];
119 extern pad_mutex_t anon_array_lock[];
120 extern kcondvar_t anon_array_cv[];
121 
122 /*
123  * Global hash table to provide a function from (vp, off) -> ap
124  */
125 extern size_t anon_hash_size;
126 extern struct anon **anon_hash;
127 #define	ANON_HASH_SIZE	anon_hash_size
128 #define	ANON_HASHAVELEN	4
129 #define	ANON_HASH(VP, OFF)	\
130 ((((uintptr_t)(VP) >> 7)  ^ ((OFF) >> PAGESHIFT)) & (ANON_HASH_SIZE - 1))
131 
132 #define	AH_LOCK_SIZE	64
133 #define	AH_LOCK(vp, off) (ANON_HASH((vp), (off)) & (AH_LOCK_SIZE -1))
134 
135 #endif	/* _KERNEL */
136 
137 /*
138  * Declaration for the Global counters to accurately
139  * track the kernel foot print in memory.
140  */
141 extern  pgcnt_t segvn_pages_locked;
142 extern  pgcnt_t pages_locked;
143 extern  pgcnt_t pages_claimed;
144 extern  pgcnt_t pages_useclaim;
145 extern  pgcnt_t obp_pages;
146 
147 /*
148  * Anonymous backing store accounting structure for swapctl.
149  *
150  * ani_max = maximum amount of swap space
151  *	(including potentially available physical memory)
152  * ani_free = amount of unallocated anonymous memory
153  *	(some of which might be reserved and including
154  *	potentially available physical memory)
155  * ani_resv = amount of claimed (reserved) anonymous memory
156  *
157  * The swap data can be aquired more efficiently through the
158  * kstats interface.
159  * Total slots currently available for reservation =
160  *	MAX(ani_max - ani_resv, 0) + (availrmem - swapfs_minfree)
161  */
162 struct anoninfo {
163 	pgcnt_t	ani_max;
164 	pgcnt_t	ani_free;
165 	pgcnt_t	ani_resv;
166 };
167 
168 #ifdef _SYSCALL32
169 struct anoninfo32 {
170 	size32_t ani_max;
171 	size32_t ani_free;
172 	size32_t ani_resv;
173 };
174 #endif /* _SYSCALL32 */
175 
176 /*
177  * Define the NCPU pool of the ani_free counters. Update the counter
178  * of the cpu on which the thread is running and in every clock intr
179  * sync anoninfo.ani_free with the current total off all the NCPU entries.
180  */
181 
182 typedef	struct	ani_free {
183 	kmutex_t	ani_lock;
184 	pgcnt_t		ani_count;
185 	uchar_t		pad[64 - sizeof (kmutex_t) - sizeof (pgcnt_t)];
186 			/* XXX 64 = cacheline size */
187 } ani_free_t;
188 
189 #define	ANI_MAX_POOL	128
190 extern	ani_free_t	ani_free_pool[];
191 
192 #define	ANI_ADD(inc)	{ \
193 	ani_free_t	*anifp; \
194 	int		index; \
195 	index = (CPU->cpu_id & (ANI_MAX_POOL - 1)); \
196 	anifp = &ani_free_pool[index]; \
197 	mutex_enter(&anifp->ani_lock); \
198 	anifp->ani_count += inc; \
199 	mutex_exit(&anifp->ani_lock); \
200 }
201 
202 /*
203  * Anon array pointers are allocated in chunks. Each chunk
204  * has PAGESIZE/sizeof(u_long *) of anon pointers.
205  * There are two levels of arrays for anon array pointers larger
206  * than a chunk. The first level points to anon array chunks.
207  * The second level consists of chunks of anon pointers.
208  *
209  * If anon array is smaller than a chunk then the whole anon array
210  * is created (memory is allocated for whole anon array).
211  * If anon array is larger than a chunk only first level array is
212  * allocated. Then other arrays (chunks) are allocated only when
213  * they are initialized with anon pointers.
214  */
215 struct anon_hdr {
216 	kmutex_t serial_lock;	/* serialize array chunk allocation */
217 	pgcnt_t	size;		/* number of pointers to (anon) pages */
218 	void	**array_chunk;	/* pointers to anon pointers or chunks of */
219 				/* anon pointers */
220 	int	flags;		/* ANON_ALLOC_FORCE force preallocation of */
221 				/* whole anon array	*/
222 };
223 
224 #ifdef	_LP64
225 #define	ANON_PTRSHIFT	3
226 #define	ANON_PTRMASK	~7
227 #else
228 #define	ANON_PTRSHIFT	2
229 #define	ANON_PTRMASK	~3
230 #endif
231 
232 #define	ANON_CHUNK_SIZE		(PAGESIZE >> ANON_PTRSHIFT)
233 #define	ANON_CHUNK_SHIFT	(PAGESHIFT - ANON_PTRSHIFT)
234 #define	ANON_CHUNK_OFF		(ANON_CHUNK_SIZE - 1)
235 
236 /*
237  * Anon flags.
238  */
239 #define	ANON_SLEEP		0x0	/* ok to block */
240 #define	ANON_NOSLEEP		0x1	/* non-blocking call */
241 #define	ANON_ALLOC_FORCE	0x2	/* force single level anon array */
242 #define	ANON_GROWDOWN		0x4	/* anon array should grow downward */
243 
244 /*
245  * The anon_map structure is used by various clients of the anon layer to
246  * manage anonymous memory.   When anonymous memory is shared,
247  * then the different clients sharing it will point to the
248  * same anon_map structure.  Also, if a segment is unmapped
249  * in the middle where an anon_map structure exists, the
250  * newly created segment will also share the anon_map structure,
251  * although the two segments will use different ranges of the
252  * anon array.  When mappings are private (or shared with
253  * a reference count of 1), an unmap operation will free up
254  * a range of anon slots in the array given by the anon_map
255  * structure.  Because of fragmentation due to this unmapping,
256  * we have to store the size of the anon array in the anon_map
257  * structure so that we can free everything when the referernce
258  * count goes to zero.
259  *
260  * A new rangelock scheme is introduced to make the anon layer scale.
261  * A reader/writer lock per anon_amp and an array of system-wide hash
262  * locks, anon_array_lock[] are introduced to replace serial_lock and
263  * anonmap lock.  The writer lock is held when we want to singlethreaD
264  * the reference to the anon array pointers or when references to
265  * anon_map's members, whereas reader lock and anon_array_lock are
266  * held to allows multiple threads to reference different part of
267  * anon array.  A global set of condition variables, anon_array_cv,
268  * are used with anon_array_lock[] to make the hold time of the locks
269  * short.
270  *
271  * szc is used to calculate the index of hash locks and cv's.  We
272  * could've just used seg->s_szc if not for the possible sharing of
273  * anon_amp between SYSV shared memory and ISM, so now we introduce
274  * szc in the anon_map structure.  For MAP_SHARED, the amp->szc is either
275  * 0 (base page size) or page_num_pagesizes() - 1, while MAP_PRIVATE
276  * the amp->szc could be anything in [0, page_num_pagesizes() - 1].
277  */
278 struct anon_map {
279 	krwlock_t a_rwlock;	/* protect anon_map and anon array */
280 	size_t	size;		/* size in bytes mapped by the anon array */
281 	struct	anon_hdr *ahp; 	/* anon array header pointer, containing */
282 				/* anon pointer array(s) */
283 	size_t	swresv;		/* swap space reserved for this anon_map */
284 	ulong_t	refcnt;		/* reference count on this structure */
285 	ushort_t a_szc;		/* max szc among shared processes */
286 	void	*locality;	/* lgroup locality info */
287 };
288 
289 #ifdef _KERNEL
290 
291 #define	ANON_BUSY		0x1
292 #define	ANON_ISBUSY(slot)	(*(slot) & ANON_BUSY)
293 #define	ANON_SETBUSY(slot)	(*(slot) |= ANON_BUSY)
294 #define	ANON_CLRBUSY(slot)	(*(slot) &= ~ANON_BUSY)
295 
296 #define	ANON_MAP_SHIFT		6	/* log2(sizeof (struct anon_map)) */
297 #define	ANON_ARRAY_SHIFT	7	/* log2(ANON_LOCKSIZE) */
298 #define	ANON_LOCKSIZE		128
299 
300 #define	ANON_LOCK_ENTER(lock, type)	rw_enter((lock), (type))
301 #define	ANON_LOCK_EXIT(lock)		rw_exit((lock))
302 
303 #define	ANON_ARRAY_HASH(amp, idx)\
304 	((((idx) + ((idx) >> ANON_ARRAY_SHIFT) +\
305 	((idx) >> (ANON_ARRAY_SHIFT << 1)) +\
306 	((idx) >> (ANON_ARRAY_SHIFT + (ANON_ARRAY_SHIFT << 1)))) ^\
307 	((uintptr_t)(amp) >> ANON_MAP_SHIFT)) & (ANON_LOCKSIZE - 1))
308 
309 typedef struct anon_sync_obj {
310 	kmutex_t	*sync_mutex;
311 	kcondvar_t	*sync_cv;
312 	ulong_t		*sync_data;
313 } anon_sync_obj_t;
314 
315 /*
316  * Anonymous backing store accounting structure for kernel.
317  * ani_max = total reservable slots on physical (disk-backed) swap
318  * ani_phys_resv = total phys slots reserved for use by clients
319  * ani_mem_resv = total mem slots reserved for use by clients
320  * ani_free = # unallocated physical slots + # of reserved unallocated
321  * memory slots
322  */
323 
324 /*
325  * Initial total swap slots available for reservation
326  */
327 #define	TOTAL_AVAILABLE_SWAP \
328 	(k_anoninfo.ani_max + MAX((spgcnt_t)(availrmem - swapfs_minfree), 0))
329 
330 /*
331  * Swap slots currently available for reservation
332  */
333 #define	CURRENT_TOTAL_AVAILABLE_SWAP \
334 	((k_anoninfo.ani_max - k_anoninfo.ani_phys_resv) +	\
335 			MAX((spgcnt_t)(availrmem - swapfs_minfree), 0))
336 
337 struct k_anoninfo {
338 	pgcnt_t	ani_max;	/* total reservable slots on phys */
339 					/* (disk) swap */
340 	pgcnt_t	ani_free;	/* # of unallocated phys and mem slots */
341 	pgcnt_t	ani_phys_resv;	/* # of reserved phys (disk) slots */
342 	pgcnt_t	ani_mem_resv;	/* # of reserved mem slots */
343 	pgcnt_t	ani_locked_swap; /* # of swap slots locked in reserved */
344 				/* mem swap */
345 };
346 
347 extern	struct k_anoninfo k_anoninfo;
348 
349 extern void	anon_init(void);
350 extern struct	anon *anon_alloc(struct vnode *, anoff_t);
351 extern void	anon_dup(struct anon_hdr *, ulong_t,
352 		    struct anon_hdr *, ulong_t, size_t);
353 extern void	anon_dup_fill_holes(struct anon_hdr *, ulong_t,
354 		    struct anon_hdr *, ulong_t, size_t, uint_t, int);
355 extern int	anon_fill_cow_holes(struct seg *, caddr_t, struct anon_hdr *,
356 		    ulong_t, struct vnode *, u_offset_t, size_t, uint_t,
357 		    uint_t, struct vpage [], struct cred *);
358 extern void	anon_free(struct anon_hdr *, ulong_t, size_t);
359 extern void	anon_free_pages(struct anon_hdr *, ulong_t, size_t, uint_t);
360 extern void	anon_disclaim(struct anon_map *, ulong_t, size_t, int);
361 extern int	anon_getpage(struct anon **, uint_t *, struct page **,
362 		    size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
363 extern int	swap_getconpage(struct vnode *, u_offset_t, size_t,
364 		    uint_t *, page_t *[], size_t, page_t *, uint_t *,
365 		    spgcnt_t *, struct seg *, caddr_t,
366 		    enum seg_rw, struct cred *);
367 extern int	anon_map_getpages(struct anon_map *, ulong_t,
368 		    uint_t, struct seg *, caddr_t, uint_t,
369 		    uint_t *, page_t *[], uint_t *,
370 		    struct vpage [], enum seg_rw, int, int, struct cred *);
371 extern int	anon_map_privatepages(struct anon_map *, ulong_t,
372 		    uint_t, struct seg *, caddr_t, uint_t,
373 		    page_t *[], struct vpage [], int, struct cred *);
374 extern struct	page *anon_private(struct anon **, struct seg *,
375 		    caddr_t, uint_t, struct page *,
376 		    int, struct cred *);
377 extern struct	page *anon_zero(struct seg *, caddr_t,
378 		    struct anon **, struct cred *);
379 extern int	anon_map_createpages(struct anon_map *, ulong_t,
380 		    size_t, struct page **,
381 		    struct seg *, caddr_t,
382 		    enum seg_rw, struct cred *);
383 extern int	anon_map_demotepages(struct anon_map *, ulong_t,
384 		    struct seg *, caddr_t, uint_t,
385 		    struct vpage [], struct cred *);
386 extern void	anon_shmap_free_pages(struct anon_map *, ulong_t, size_t);
387 extern int	anon_resvmem(size_t, uint_t);
388 extern void	anon_unresv(size_t);
389 extern struct	anon_map *anonmap_alloc(size_t, size_t);
390 extern void	anonmap_free(struct anon_map *);
391 extern void	anon_decref(struct anon *);
392 extern int	non_anon(struct anon_hdr *, ulong_t, u_offset_t *, size_t *);
393 extern pgcnt_t	anon_pages(struct anon_hdr *, ulong_t, pgcnt_t);
394 extern int	anon_swap_adjust(pgcnt_t);
395 extern void	anon_swap_restore(pgcnt_t);
396 extern struct	anon_hdr *anon_create(pgcnt_t, int);
397 extern void	anon_release(struct anon_hdr *, pgcnt_t);
398 extern struct	anon *anon_get_ptr(struct anon_hdr *, ulong_t);
399 extern ulong_t	*anon_get_slot(struct anon_hdr *, ulong_t);
400 extern struct	anon *anon_get_next_ptr(struct anon_hdr *, ulong_t *);
401 extern int	anon_set_ptr(struct anon_hdr *, ulong_t, struct anon *, int);
402 extern int 	anon_copy_ptr(struct anon_hdr *, ulong_t,
403 		    struct anon_hdr *, ulong_t, pgcnt_t, int);
404 extern pgcnt_t	anon_grow(struct anon_hdr *, ulong_t *, pgcnt_t, pgcnt_t, int);
405 extern void	anon_array_enter(struct anon_map *, ulong_t,
406 			anon_sync_obj_t *);
407 extern int	anon_array_try_enter(struct anon_map *, ulong_t,
408 			anon_sync_obj_t *);
409 extern void	anon_array_exit(anon_sync_obj_t *);
410 
411 /*
412  * anon_resv checks to see if there is enough swap space to fulfill a
413  * request and if so, reserves the appropriate anonymous memory resources.
414  * anon_checkspace just checks to see if there is space to fulfill the request,
415  * without taking any resources.  Both return 1 if successful and 0 if not.
416  */
417 #define	anon_resv(size)		anon_resvmem((size), 1)
418 #define	anon_checkspace(size)	anon_resvmem((size), 0)
419 
420 /*
421  * Flags to anon_private
422  */
423 #define	STEAL_PAGE	0x1	/* page can be stolen */
424 #define	LOCK_PAGE	0x2	/* page must be ``logically'' locked */
425 
426 /*
427  * Flags to anon_disclaim
428  */
429 #define	ANON_PGLOOKUP_BLK	0x1	/* block on locked pages */
430 
431 /*
432  * SEGKP ANON pages that are locked are assumed to be LWP stack pages
433  * and thus count towards the user pages locked count.
434  * This value is protected by the same lock as availrmem.
435  */
436 extern pgcnt_t anon_segkp_pages_locked;
437 
438 extern int anon_debug;
439 
440 #ifdef ANON_DEBUG
441 
442 #define	A_ANON	0x01
443 #define	A_RESV	0x02
444 #define	A_MRESV	0x04
445 
446 /* vararg-like debugging macro. */
447 #define	ANON_PRINT(f, printf_args) \
448 		if (anon_debug & f) \
449 			printf printf_args
450 
451 #else	/* ANON_DEBUG */
452 
453 #define	ANON_PRINT(f, printf_args)
454 
455 #endif	/* ANON_DEBUG */
456 
457 #endif	/* _KERNEL */
458 
459 #ifdef	__cplusplus
460 }
461 #endif
462 
463 #endif	/* _VM_ANON_H */
464