xref: /freebsd/sys/kern/kern_malloc.c (revision c17d43407fe04133a94055b0dbc7ea8965654a9f)
1 /*
2  * Copyright (c) 1987, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)kern_malloc.c	8.3 (Berkeley) 1/4/94
34  * $FreeBSD$
35  */
36 
37 #include "opt_vm.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/mutex.h>
46 #include <sys/vmmeter.h>
47 #include <sys/proc.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/vm_kern.h>
52 #include <vm/vm_extern.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_map.h>
55 #include <vm/uma.h>
56 #include <vm/uma_int.h>
57 
58 #if defined(INVARIANTS) && defined(__i386__)
59 #include <machine/cpu.h>
60 #endif
61 
62 /*
63  * When realloc() is called, if the new size is sufficiently smaller than
64  * the old size, realloc() will allocate a new, smaller block to avoid
65  * wasting memory. 'Sufficiently smaller' is defined as: newsize <=
66  * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'.
67  */
68 #ifndef REALLOC_FRACTION
69 #define	REALLOC_FRACTION	1	/* new block if <= half the size */
70 #endif
71 
72 MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches");
73 MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory");
74 MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers");
75 
76 MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
77 MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
78 
79 static void kmeminit(void *);
80 SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL)
81 
82 static MALLOC_DEFINE(M_FREE, "free", "should be on free list");
83 
84 static struct malloc_type *kmemstatistics;
85 static char *kmembase;
86 static char *kmemlimit;
87 
88 #define KMEM_ZSHIFT	4
89 #define KMEM_ZBASE	16
90 #define KMEM_ZMASK	(KMEM_ZBASE - 1)
91 
92 #define KMEM_ZMAX	65536
93 #define KMEM_ZSIZE	(KMEM_ZMAX >> KMEM_ZSHIFT)
94 static uma_zone_t kmemzones[KMEM_ZSIZE + 1];
95 
96 
97 /* These won't be powers of two for long */
98 struct {
99 	int size;
100 	char *name;
101 } kmemsizes[] = {
102 	{16, "16"},
103 	{32, "32"},
104 	{64, "64"},
105 	{128, "128"},
106 	{256, "256"},
107 	{512, "512"},
108 	{1024, "1024"},
109 	{2048, "2048"},
110 	{4096, "4096"},
111 	{8192, "8192"},
112 	{16384, "16384"},
113 	{32768, "32768"},
114 	{65536, "65536"},
115 	{0, NULL},
116 };
117 
118 static struct mtx malloc_mtx;
119 
120 u_int vm_kmem_size;
121 
122 /*
123  *	malloc:
124  *
125  *	Allocate a block of memory.
126  *
127  *	If M_NOWAIT is set, this routine will not block and return NULL if
128  *	the allocation fails.
129  */
130 void *
131 malloc(size, type, flags)
132 	unsigned long size;
133 	struct malloc_type *type;
134 	int flags;
135 {
136 	int s;
137 	long indx;
138 	caddr_t va;
139 	uma_zone_t zone;
140 	register struct malloc_type *ksp = type;
141 
142 #if defined(INVARIANTS)
143 	if (flags == M_WAITOK)
144 		KASSERT(curthread->td_intr_nesting_level == 0,
145 		   ("malloc(M_WAITOK) in interrupt context"));
146 #endif
147 	s = splmem();
148 	/* mtx_lock(&malloc_mtx); XXX */
149 	while (ksp->ks_memuse >= ksp->ks_limit) {
150 		if (flags & M_NOWAIT) {
151 			splx(s);
152 			/* mtx_unlock(&malloc_mtx); XXX */
153 			return ((void *) NULL);
154 		}
155 		if (ksp->ks_limblocks < 65535)
156 			ksp->ks_limblocks++;
157 		msleep((caddr_t)ksp, /* &malloc_mtx */ NULL, PSWP+2, type->ks_shortdesc,
158 		    0);
159 	}
160 	/* mtx_unlock(&malloc_mtx); XXX */
161 
162 	if (size <= KMEM_ZMAX) {
163 		indx = size;
164 		if (indx & KMEM_ZMASK)
165 			indx = (indx & ~KMEM_ZMASK) + KMEM_ZBASE;
166 		zone = kmemzones[indx >> KMEM_ZSHIFT];
167 		indx = zone->uz_size;
168 		va = uma_zalloc(zone, flags);
169 		if (va == NULL) {
170 			/* mtx_lock(&malloc_mtx); XXX */
171 			goto out;
172 		}
173 		ksp->ks_size |= indx;
174 	} else {
175 		/* XXX This is not the next power of two so this will break ks_size */
176 		indx = roundup(size, PAGE_SIZE);
177 		zone = NULL;
178 		va = uma_large_malloc(size, flags);
179 		if (va == NULL) {
180 			/* mtx_lock(&malloc_mtx); XXX */
181 			goto out;
182 		}
183 	}
184 	/* mtx_lock(&malloc_mtx); XXX */
185 	ksp->ks_memuse += indx;
186 	ksp->ks_inuse++;
187 out:
188 	ksp->ks_calls++;
189 	if (ksp->ks_memuse > ksp->ks_maxused)
190 		ksp->ks_maxused = ksp->ks_memuse;
191 	splx(s);
192 	/* mtx_unlock(&malloc_mtx); XXX */
193 	/* XXX: Do idle pre-zeroing.  */
194 	if (va != NULL && (flags & M_ZERO))
195 		bzero(va, size);
196 	return ((void *) va);
197 }
198 
199 /*
200  *	free:
201  *
202  *	Free a block of memory allocated by malloc.
203  *
204  *	This routine may not block.
205  */
206 void
207 free(addr, type)
208 	void *addr;
209 	struct malloc_type *type;
210 {
211 	uma_slab_t slab;
212 	void *mem;
213 	u_long size;
214 	int s;
215 	register struct malloc_type *ksp = type;
216 
217 	/* free(NULL, ...) does nothing */
218 	if (addr == NULL)
219 		return;
220 
221 	size = 0;
222 	s = splmem();
223 
224 	mem = (void *)((u_long)addr & (~UMA_SLAB_MASK));
225 	slab = hash_sfind(mallochash, mem);
226 
227 	if (slab == NULL)
228 		panic("free: address %p(%p) has not been allocated.\n", addr, mem);
229 
230 	if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
231 		size = slab->us_zone->uz_size;
232 		uma_zfree_arg(slab->us_zone, addr, slab);
233 	} else {
234 		size = slab->us_size;
235 		uma_large_free(slab);
236 	}
237 	/* mtx_lock(&malloc_mtx); XXX */
238 
239 	ksp->ks_memuse -= size;
240 	if (ksp->ks_memuse + size >= ksp->ks_limit &&
241 	    ksp->ks_memuse < ksp->ks_limit)
242 		wakeup((caddr_t)ksp);
243 	ksp->ks_inuse--;
244 	splx(s);
245 	/* mtx_unlock(&malloc_mtx); XXX */
246 }
247 
248 /*
249  *	realloc: change the size of a memory block
250  */
251 void *
252 realloc(addr, size, type, flags)
253 	void *addr;
254 	unsigned long size;
255 	struct malloc_type *type;
256 	int flags;
257 {
258 	uma_slab_t slab;
259 	unsigned long alloc;
260 	void *newaddr;
261 
262 	/* realloc(NULL, ...) is equivalent to malloc(...) */
263 	if (addr == NULL)
264 		return (malloc(size, type, flags));
265 
266 	slab = hash_sfind(mallochash,
267 	    (void *)((u_long)addr & ~(UMA_SLAB_MASK)));
268 
269 	/* Sanity check */
270 	KASSERT(slab != NULL,
271 	    ("realloc: address %p out of range", (void *)addr));
272 
273 	/* Get the size of the original block */
274 	if (slab->us_zone)
275 		alloc = slab->us_zone->uz_size;
276 	else
277 		alloc = slab->us_size;
278 
279 	/* Reuse the original block if appropriate */
280 	if (size <= alloc
281 	    && (size > (alloc >> REALLOC_FRACTION) || alloc == MINALLOCSIZE))
282 		return (addr);
283 
284 	/* Allocate a new, bigger (or smaller) block */
285 	if ((newaddr = malloc(size, type, flags)) == NULL)
286 		return (NULL);
287 
288 	/* Copy over original contents */
289 	bcopy(addr, newaddr, min(size, alloc));
290 	free(addr, type);
291 	return (newaddr);
292 }
293 
294 /*
295  *	reallocf: same as realloc() but free memory on failure.
296  */
297 void *
298 reallocf(addr, size, type, flags)
299 	void *addr;
300 	unsigned long size;
301 	struct malloc_type *type;
302 	int flags;
303 {
304 	void *mem;
305 
306 	if ((mem = realloc(addr, size, type, flags)) == NULL)
307 		free(addr, type);
308 	return (mem);
309 }
310 
311 /*
312  * Initialize the kernel memory allocator
313  */
314 /* ARGSUSED*/
315 static void
316 kmeminit(dummy)
317 	void *dummy;
318 {
319 	register long indx;
320 	u_long npg;
321 	u_long mem_size;
322 	void *hashmem;
323 	u_long hashsize;
324 	int highbit;
325 	int bits;
326 	int i;
327 
328 	mtx_init(&malloc_mtx, "malloc", MTX_DEF);
329 
330 	/*
331 	 * Try to auto-tune the kernel memory size, so that it is
332 	 * more applicable for a wider range of machine sizes.
333 	 * On an X86, a VM_KMEM_SIZE_SCALE value of 4 is good, while
334 	 * a VM_KMEM_SIZE of 12MB is a fair compromise.  The
335 	 * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space
336 	 * available, and on an X86 with a total KVA space of 256MB,
337 	 * try to keep VM_KMEM_SIZE_MAX at 80MB or below.
338 	 *
339 	 * Note that the kmem_map is also used by the zone allocator,
340 	 * so make sure that there is enough space.
341 	 */
342 	vm_kmem_size = VM_KMEM_SIZE;
343 	mem_size = cnt.v_page_count * PAGE_SIZE;
344 
345 #if defined(VM_KMEM_SIZE_SCALE)
346 	if ((mem_size / VM_KMEM_SIZE_SCALE) > vm_kmem_size)
347 		vm_kmem_size = mem_size / VM_KMEM_SIZE_SCALE;
348 #endif
349 
350 #if defined(VM_KMEM_SIZE_MAX)
351 	if (vm_kmem_size >= VM_KMEM_SIZE_MAX)
352 		vm_kmem_size = VM_KMEM_SIZE_MAX;
353 #endif
354 
355 	/* Allow final override from the kernel environment */
356 	TUNABLE_INT_FETCH("kern.vm.kmem.size", &vm_kmem_size);
357 
358 	/*
359 	 * Limit kmem virtual size to twice the physical memory.
360 	 * This allows for kmem map sparseness, but limits the size
361 	 * to something sane. Be careful to not overflow the 32bit
362 	 * ints while doing the check.
363 	 */
364 	if ((vm_kmem_size / 2) > (cnt.v_page_count * PAGE_SIZE))
365 		vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE;
366 
367 	/*
368 	 * In mbuf_init(), we set up submaps for mbufs and clusters, in which
369 	 * case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES),
370 	 * respectively. Mathematically, this means that what we do here may
371 	 * amount to slightly more address space than we need for the submaps,
372 	 * but it never hurts to have an extra page in kmem_map.
373 	 */
374 	npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt *
375 	    sizeof(u_int) + vm_kmem_size) / PAGE_SIZE;
376 
377 	kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase,
378 		(vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE));
379 	kmem_map->system_map = 1;
380 
381 	hashsize = npg * sizeof(void *);
382 
383 	highbit = 0;
384 	bits = 0;
385 	/* The hash size must be a power of two */
386 	for (i = 0; i < 8 * sizeof(hashsize); i++)
387 		if (hashsize & (1 << i)) {
388 			highbit = i;
389 			bits++;
390 		}
391 	if (bits > 1)
392 		hashsize = 1 << (highbit);
393 
394 	hashmem = (void *)kmem_alloc(kernel_map, (vm_size_t)hashsize);
395 	uma_startup2(hashmem, hashsize / sizeof(void *));
396 
397 	for (i = 0, indx = 0; kmemsizes[indx].size != 0; indx++) {
398 		uma_zone_t zone;
399 		int size = kmemsizes[indx].size;
400 		char *name = kmemsizes[indx].name;
401 
402 		zone = uma_zcreate(name, size, NULL, NULL, NULL, NULL,
403 		    UMA_ALIGN_PTR, UMA_ZONE_MALLOC);
404 		for (;i <= size; i+= KMEM_ZBASE)
405 			kmemzones[i >> KMEM_ZSHIFT] = zone;
406 
407 	}
408 }
409 
410 void
411 malloc_init(data)
412 	void *data;
413 {
414 	struct malloc_type *type = (struct malloc_type *)data;
415 
416 	if (type->ks_magic != M_MAGIC)
417 		panic("malloc type lacks magic");
418 
419 	if (type->ks_limit != 0)
420 		return;
421 
422 	if (cnt.v_page_count == 0)
423 		panic("malloc_init not allowed before vm init");
424 
425 	/*
426 	 * The default limits for each malloc region is 1/2 of the
427 	 * malloc portion of the kmem map size.
428 	 */
429 	type->ks_limit = vm_kmem_size / 2;
430 	type->ks_next = kmemstatistics;
431 	kmemstatistics = type;
432 }
433 
434 void
435 malloc_uninit(data)
436 	void *data;
437 {
438 	struct malloc_type *type = (struct malloc_type *)data;
439 	struct malloc_type *t;
440 
441 	if (type->ks_magic != M_MAGIC)
442 		panic("malloc type lacks magic");
443 
444 	if (cnt.v_page_count == 0)
445 		panic("malloc_uninit not allowed before vm init");
446 
447 	if (type->ks_limit == 0)
448 		panic("malloc_uninit on uninitialized type");
449 
450 	if (type == kmemstatistics)
451 		kmemstatistics = type->ks_next;
452 	else {
453 		for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) {
454 			if (t->ks_next == type) {
455 				t->ks_next = type->ks_next;
456 				break;
457 			}
458 		}
459 	}
460 	type->ks_next = NULL;
461 	type->ks_limit = 0;
462 }
463