xref: /illumos-gate/usr/src/uts/common/sys/kmem_impl.h (revision 99dda20867d903eec23291ba1ecb18a82d70096b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef _SYS_KMEM_IMPL_H
28 #define	_SYS_KMEM_IMPL_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/kmem.h>
33 #include <sys/vmem.h>
34 #include <sys/thread.h>
35 #include <sys/t_lock.h>
36 #include <sys/time.h>
37 #include <sys/kstat.h>
38 #include <sys/cpuvar.h>
39 #include <sys/systm.h>
40 #include <vm/page.h>
41 
42 #ifdef	__cplusplus
43 extern "C" {
44 #endif
45 
46 /*
47  * kernel memory allocator: implementation-private data structures
48  */
49 
50 #define	KMF_AUDIT	0x00000001	/* transaction auditing */
51 #define	KMF_DEADBEEF	0x00000002	/* deadbeef checking */
52 #define	KMF_REDZONE	0x00000004	/* redzone checking */
53 #define	KMF_CONTENTS	0x00000008	/* freed-buffer content logging */
54 #define	KMF_STICKY	0x00000010	/* if set, override /etc/system */
55 #define	KMF_NOMAGAZINE	0x00000020	/* disable per-cpu magazines */
56 #define	KMF_FIREWALL	0x00000040	/* put all bufs before unmapped pages */
57 #define	KMF_LITE	0x00000100	/* lightweight debugging */
58 
59 #define	KMF_HASH	0x00000200	/* cache has hash table */
60 #define	KMF_RANDOMIZE	0x00000400	/* randomize other kmem_flags */
61 
62 #define	KMF_BUFTAG	(KMF_DEADBEEF | KMF_REDZONE)
63 #define	KMF_TOUCH	(KMF_BUFTAG | KMF_LITE | KMF_CONTENTS)
64 #define	KMF_RANDOM	(KMF_TOUCH | KMF_AUDIT | KMF_NOMAGAZINE)
65 #define	KMF_DEBUG	(KMF_RANDOM | KMF_FIREWALL)
66 
67 #define	KMEM_STACK_DEPTH	15
68 
69 #define	KMEM_FREE_PATTERN		0xdeadbeefdeadbeefULL
70 #define	KMEM_UNINITIALIZED_PATTERN	0xbaddcafebaddcafeULL
71 #define	KMEM_REDZONE_PATTERN		0xfeedfacefeedfaceULL
72 #define	KMEM_REDZONE_BYTE		0xbb
73 
74 /*
75  * Redzone size encodings for kmem_alloc() / kmem_free().  We encode the
76  * allocation size, rather than storing it directly, so that kmem_free()
77  * can distinguish frees of the wrong size from redzone violations.
78  *
79  * A size of zero is never valid.
80  */
81 #define	KMEM_SIZE_ENCODE(x)	(251 * (x) + 1)
82 #define	KMEM_SIZE_DECODE(x)	((x) / 251)
83 #define	KMEM_SIZE_VALID(x)	((x) % 251 == 1 && (x) != 1)
84 
85 /*
86  * The bufctl (buffer control) structure keeps some minimal information
87  * about each buffer: its address, its slab, and its current linkage,
88  * which is either on the slab's freelist (if the buffer is free), or
89  * on the cache's buf-to-bufctl hash table (if the buffer is allocated).
90  * In the case of non-hashed, or "raw", caches (the common case), only
91  * the freelist linkage is necessary: the buffer address is at a fixed
92  * offset from the bufctl address, and the slab is at the end of the page.
93  *
94  * NOTE: bc_next must be the first field; raw buffers have linkage only.
95  */
96 typedef struct kmem_bufctl {
97 	struct kmem_bufctl	*bc_next;	/* next bufctl struct */
98 	void			*bc_addr;	/* address of buffer */
99 	struct kmem_slab	*bc_slab;	/* controlling slab */
100 } kmem_bufctl_t;
101 
102 /*
103  * The KMF_AUDIT version of the bufctl structure.  The beginning of this
104  * structure must be identical to the normal bufctl structure so that
105  * pointers are interchangeable.
106  */
107 typedef struct kmem_bufctl_audit {
108 	struct kmem_bufctl	*bc_next;	/* next bufctl struct */
109 	void			*bc_addr;	/* address of buffer */
110 	struct kmem_slab	*bc_slab;	/* controlling slab */
111 	kmem_cache_t		*bc_cache;	/* controlling cache */
112 	hrtime_t		bc_timestamp;	/* transaction time */
113 	kthread_t		*bc_thread;	/* thread doing transaction */
114 	struct kmem_bufctl	*bc_lastlog;	/* last log entry */
115 	void			*bc_contents;	/* contents at last free */
116 	int			bc_depth;	/* stack depth */
117 	pc_t			bc_stack[KMEM_STACK_DEPTH];	/* pc stack */
118 } kmem_bufctl_audit_t;
119 
120 /*
121  * A kmem_buftag structure is appended to each buffer whenever any of the
122  * KMF_BUFTAG flags (KMF_DEADBEEF, KMF_REDZONE, KMF_VERIFY) are set.
123  */
124 typedef struct kmem_buftag {
125 	uint64_t		bt_redzone;	/* 64-bit redzone pattern */
126 	kmem_bufctl_t		*bt_bufctl;	/* bufctl */
127 	intptr_t		bt_bxstat;	/* bufctl ^ (alloc/free) */
128 } kmem_buftag_t;
129 
130 /*
131  * A variant of the kmem_buftag structure used for KMF_LITE caches.
132  * Previous callers are stored in reverse chronological order. (i.e. most
133  * recent first)
134  */
135 typedef struct kmem_buftag_lite {
136 	kmem_buftag_t		bt_buftag;	/* a normal buftag */
137 	pc_t			bt_history[1];	/* zero or more callers */
138 } kmem_buftag_lite_t;
139 
140 #define	KMEM_BUFTAG_LITE_SIZE(f)	\
141 	(offsetof(kmem_buftag_lite_t, bt_history[f]))
142 
143 #define	KMEM_BUFTAG(cp, buf)		\
144 	((kmem_buftag_t *)((char *)(buf) + (cp)->cache_buftag))
145 
146 #define	KMEM_BUFCTL(cp, buf)		\
147 	((kmem_bufctl_t *)((char *)(buf) + (cp)->cache_bufctl))
148 
149 #define	KMEM_BUF(cp, bcp)		\
150 	((void *)((char *)(bcp) - (cp)->cache_bufctl))
151 
152 #define	KMEM_SLAB(cp, buf)		\
153 	((kmem_slab_t *)P2END((uintptr_t)(buf), (cp)->cache_slabsize) - 1)
154 
155 #define	KMEM_CPU_CACHE(cp)		\
156 	(kmem_cpu_cache_t *)((char *)cp + CPU->cpu_cache_offset)
157 
158 #define	KMEM_MAGAZINE_VALID(cp, mp)	\
159 	(((kmem_slab_t *)P2END((uintptr_t)(mp), PAGESIZE) - 1)->slab_cache == \
160 	    (cp)->cache_magtype->mt_cache)
161 
162 #define	KMEM_SLAB_MEMBER(sp, buf)	\
163 	((size_t)(buf) - (size_t)(sp)->slab_base < \
164 	    (sp)->slab_cache->cache_slabsize)
165 
166 #define	KMEM_BUFTAG_ALLOC	0xa110c8edUL
167 #define	KMEM_BUFTAG_FREE	0xf4eef4eeUL
168 
169 typedef struct kmem_slab {
170 	struct kmem_cache	*slab_cache;	/* controlling cache */
171 	void			*slab_base;	/* base of allocated memory */
172 	struct kmem_slab	*slab_next;	/* next slab on freelist */
173 	struct kmem_slab	*slab_prev;	/* prev slab on freelist */
174 	struct kmem_bufctl	*slab_head;	/* first free buffer */
175 	long			slab_refcnt;	/* outstanding allocations */
176 	long			slab_chunks;	/* chunks (bufs) in this slab */
177 } kmem_slab_t;
178 
179 #define	KMEM_HASH_INITIAL	64
180 
181 #define	KMEM_HASH(cp, buf)	\
182 	((cp)->cache_hash_table +	\
183 	(((uintptr_t)(buf) >> (cp)->cache_hash_shift) & (cp)->cache_hash_mask))
184 
185 typedef struct kmem_magazine {
186 	void	*mag_next;
187 	void	*mag_round[1];		/* one or more rounds */
188 } kmem_magazine_t;
189 
190 /*
191  * The magazine types for fast per-cpu allocation
192  */
193 typedef struct kmem_magtype {
194 	int		mt_magsize;	/* magazine size (number of rounds) */
195 	int		mt_align;	/* magazine alignment */
196 	size_t		mt_minbuf;	/* all smaller buffers qualify */
197 	size_t		mt_maxbuf;	/* no larger buffers qualify */
198 	kmem_cache_t	*mt_cache;	/* magazine cache */
199 } kmem_magtype_t;
200 
201 #define	KMEM_CPU_CACHE_SIZE	64	/* must be power of 2 */
202 #define	KMEM_CPU_PAD		(KMEM_CPU_CACHE_SIZE - sizeof (kmutex_t) - \
203 	2 * sizeof (uint64_t) - 2 * sizeof (void *) - 4 * sizeof (int))
204 #define	KMEM_CACHE_SIZE(ncpus)	\
205 	((size_t)(&((kmem_cache_t *)0)->cache_cpu[ncpus]))
206 
207 typedef struct kmem_cpu_cache {
208 	kmutex_t	cc_lock;	/* protects this cpu's local cache */
209 	uint64_t	cc_alloc;	/* allocations from this cpu */
210 	uint64_t	cc_free;	/* frees to this cpu */
211 	kmem_magazine_t	*cc_loaded;	/* the currently loaded magazine */
212 	kmem_magazine_t	*cc_ploaded;	/* the previously loaded magazine */
213 	int		cc_rounds;	/* number of objects in loaded mag */
214 	int		cc_prounds;	/* number of objects in previous mag */
215 	int		cc_magsize;	/* number of rounds in a full mag */
216 	int		cc_flags;	/* CPU-local copy of cache_flags */
217 	char		cc_pad[KMEM_CPU_PAD]; /* for nice alignment */
218 } kmem_cpu_cache_t;
219 
220 /*
221  * The magazine lists used in the depot.
222  */
223 typedef struct kmem_maglist {
224 	kmem_magazine_t	*ml_list;	/* magazine list */
225 	long		ml_total;	/* number of magazines */
226 	long		ml_min;		/* min since last update */
227 	long		ml_reaplimit;	/* max reapable magazines */
228 	uint64_t	ml_alloc;	/* allocations from this list */
229 } kmem_maglist_t;
230 
231 #define	KMEM_CACHE_NAMELEN	31
232 
233 struct kmem_cache {
234 	/*
235 	 * Statistics
236 	 */
237 	uint64_t	cache_slab_create;	/* slab creates */
238 	uint64_t	cache_slab_destroy;	/* slab destroys */
239 	uint64_t	cache_slab_alloc;	/* slab layer allocations */
240 	uint64_t	cache_slab_free;	/* slab layer frees */
241 	uint64_t	cache_alloc_fail;	/* total failed allocations */
242 	uint64_t	cache_buftotal;		/* total buffers */
243 	uint64_t	cache_bufmax;		/* max buffers ever */
244 	uint64_t	cache_bufslab;		/* buffers free in slab layer */
245 	uint64_t	cache_rescale;		/* # of hash table rescales */
246 	uint64_t	cache_lookup_depth;	/* hash lookup depth */
247 	uint64_t	cache_depot_contention;	/* mutex contention count */
248 	uint64_t	cache_depot_contention_prev; /* previous snapshot */
249 
250 	/*
251 	 * Cache properties
252 	 */
253 	char		cache_name[KMEM_CACHE_NAMELEN + 1];
254 	size_t		cache_bufsize;		/* object size */
255 	size_t		cache_align;		/* object alignment */
256 	int		(*cache_constructor)(void *, void *, int);
257 	void		(*cache_destructor)(void *, void *);
258 	void		(*cache_reclaim)(void *);
259 	void		*cache_private;		/* opaque arg to callbacks */
260 	vmem_t		*cache_arena;		/* vmem source for slabs */
261 	int		cache_cflags;		/* cache creation flags */
262 	int		cache_flags;		/* various cache state info */
263 	uint32_t	cache_mtbf;		/* induced alloc failure rate */
264 	uint32_t	cache_pad1;		/* to align cache_lock */
265 	kstat_t		*cache_kstat;		/* exported statistics */
266 	kmem_cache_t	*cache_next;		/* forward cache linkage */
267 	kmem_cache_t	*cache_prev;		/* backward cache linkage */
268 
269 	/*
270 	 * Slab layer
271 	 */
272 	kmutex_t	cache_lock;		/* protects slab layer */
273 	size_t		cache_chunksize;	/* buf + alignment [+ debug] */
274 	size_t		cache_slabsize;		/* size of a slab */
275 	size_t		cache_bufctl;		/* buf-to-bufctl distance */
276 	size_t		cache_buftag;		/* buf-to-buftag distance */
277 	size_t		cache_verify;		/* bytes to verify */
278 	size_t		cache_contents;		/* bytes of saved content */
279 	size_t		cache_color;		/* next slab color */
280 	size_t		cache_mincolor;		/* maximum slab color */
281 	size_t		cache_maxcolor;		/* maximum slab color */
282 	size_t		cache_hash_shift;	/* get to interesting bits */
283 	size_t		cache_hash_mask;	/* hash table mask */
284 	kmem_slab_t	*cache_freelist;	/* slab free list */
285 	kmem_slab_t	cache_nullslab;		/* end of freelist marker */
286 	kmem_cache_t	*cache_bufctl_cache;	/* source of bufctls */
287 	kmem_bufctl_t	**cache_hash_table;	/* hash table base */
288 	void		*cache_pad2;		/* to align depot_lock */
289 
290 	/*
291 	 * Depot layer
292 	 */
293 	kmutex_t	cache_depot_lock;	/* protects depot */
294 	kmem_magtype_t	*cache_magtype;		/* magazine type */
295 	void		*cache_pad3;		/* to align cache_cpu */
296 	kmem_maglist_t	cache_full;		/* full magazines */
297 	kmem_maglist_t	cache_empty;		/* empty magazines */
298 
299 	/*
300 	 * Per-CPU layer
301 	 */
302 	kmem_cpu_cache_t cache_cpu[1];		/* max_ncpus actual elements */
303 };
304 
305 typedef struct kmem_cpu_log_header {
306 	kmutex_t	clh_lock;
307 	char		*clh_current;
308 	size_t		clh_avail;
309 	int		clh_chunk;
310 	int		clh_hits;
311 	char		clh_pad[64 - sizeof (kmutex_t) - sizeof (char *) -
312 				sizeof (size_t) - 2 * sizeof (int)];
313 } kmem_cpu_log_header_t;
314 
315 typedef struct kmem_log_header {
316 	kmutex_t	lh_lock;
317 	char		*lh_base;
318 	int		*lh_free;
319 	size_t		lh_chunksize;
320 	int		lh_nchunks;
321 	int		lh_head;
322 	int		lh_tail;
323 	int		lh_hits;
324 	kmem_cpu_log_header_t lh_cpu[1];	/* ncpus actually allocated */
325 } kmem_log_header_t;
326 
327 #define	KMEM_ALIGN		8	/* min guaranteed alignment */
328 #define	KMEM_ALIGN_SHIFT	3	/* log2(KMEM_ALIGN) */
329 #define	KMEM_VOID_FRACTION	8	/* never waste more than 1/8 of slab */
330 
331 #ifdef	__cplusplus
332 }
333 #endif
334 
335 #endif	/* _SYS_KMEM_IMPL_H */
336