xref: /freebsd/sys/kern/kern_malloc.c (revision 2d50560abcbeb1907b640e5658d2ef0fd050397c)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1987, 1991, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
14df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
15df8bae1dSRodney W. Grimes  *    without specific prior written permission.
16df8bae1dSRodney W. Grimes  *
17df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
28df8bae1dSRodney W. Grimes  *
29df8bae1dSRodney W. Grimes  *	@(#)kern_malloc.c	8.3 (Berkeley) 1/4/94
30df8bae1dSRodney W. Grimes  */
31df8bae1dSRodney W. Grimes 
32677b542eSDavid E. O'Brien #include <sys/cdefs.h>
33677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
34677b542eSDavid E. O'Brien 
358a58a9f6SJohn Dyson #include "opt_vm.h"
368a58a9f6SJohn Dyson 
37df8bae1dSRodney W. Grimes #include <sys/param.h>
3826f9a767SRodney W. Grimes #include <sys/systm.h>
392d50560aSMarcel Moolenaar #include <sys/kdb.h>
40df8bae1dSRodney W. Grimes #include <sys/kernel.h>
41fb919e4dSMark Murray #include <sys/lock.h>
42df8bae1dSRodney W. Grimes #include <sys/malloc.h>
4354e7152cSDavid Greenman #include <sys/mbuf.h>
44eec258d2SJohn Baldwin #include <sys/mutex.h>
45efeaf95aSDavid Greenman #include <sys/vmmeter.h>
46a448b62aSJake Burkholder #include <sys/proc.h>
476f267175SJeff Roberson #include <sys/sysctl.h>
481fb14a47SPoul-Henning Kamp #include <sys/time.h>
499a02e8c6SJason Evans 
50df8bae1dSRodney W. Grimes #include <vm/vm.h>
5199571dc3SJeff Roberson #include <vm/pmap.h>
52efeaf95aSDavid Greenman #include <vm/vm_param.h>
53df8bae1dSRodney W. Grimes #include <vm/vm_kern.h>
54efeaf95aSDavid Greenman #include <vm/vm_extern.h>
553075778bSJohn Dyson #include <vm/vm_map.h>
5699571dc3SJeff Roberson #include <vm/vm_page.h>
578355f576SJeff Roberson #include <vm/uma.h>
588355f576SJeff Roberson #include <vm/uma_int.h>
598efc4effSJeff Roberson #include <vm/uma_dbg.h>
60df8bae1dSRodney W. Grimes 
61984982d6SPoul-Henning Kamp #if defined(INVARIANTS) && defined(__i386__)
62984982d6SPoul-Henning Kamp #include <machine/cpu.h>
63984982d6SPoul-Henning Kamp #endif
64984982d6SPoul-Henning Kamp 
6544a8ff31SArchie Cobbs /*
6644a8ff31SArchie Cobbs  * When realloc() is called, if the new size is sufficiently smaller than
6744a8ff31SArchie Cobbs  * the old size, realloc() will allocate a new, smaller block to avoid
6844a8ff31SArchie Cobbs  * wasting memory. 'Sufficiently smaller' is defined as: newsize <=
6944a8ff31SArchie Cobbs  * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'.
7044a8ff31SArchie Cobbs  */
7144a8ff31SArchie Cobbs #ifndef REALLOC_FRACTION
7244a8ff31SArchie Cobbs #define	REALLOC_FRACTION	1	/* new block if <= half the size */
7344a8ff31SArchie Cobbs #endif
7444a8ff31SArchie Cobbs 
753b6fb885SPoul-Henning Kamp MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches");
769ef246c6SBruce Evans MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory");
779ef246c6SBruce Evans MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers");
789ef246c6SBruce Evans 
7982cd038dSYoshinobu Inoue MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
8082cd038dSYoshinobu Inoue MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
8182cd038dSYoshinobu Inoue 
824d77a549SAlfred Perlstein static void kmeminit(void *);
832b14f991SJulian Elischer SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL)
842b14f991SJulian Elischer 
85a1c995b6SPoul-Henning Kamp static MALLOC_DEFINE(M_FREE, "free", "should be on free list");
86a1c995b6SPoul-Henning Kamp 
87db669378SPeter Wemm static struct malloc_type *kmemstatistics;
88254c6cb3SPoul-Henning Kamp static char *kmembase;
89043a2f3bSBruce Evans static char *kmemlimit;
901f6889a1SMatthew Dillon 
918355f576SJeff Roberson #define KMEM_ZSHIFT	4
928355f576SJeff Roberson #define KMEM_ZBASE	16
938355f576SJeff Roberson #define KMEM_ZMASK	(KMEM_ZBASE - 1)
948355f576SJeff Roberson 
959fb535deSJeff Roberson #define KMEM_ZMAX	PAGE_SIZE
968355f576SJeff Roberson #define KMEM_ZSIZE	(KMEM_ZMAX >> KMEM_ZSHIFT)
976f267175SJeff Roberson static u_int8_t kmemsize[KMEM_ZSIZE + 1];
986f267175SJeff Roberson 
998355f576SJeff Roberson /* These won't be powers of two for long */
1008355f576SJeff Roberson struct {
1016f267175SJeff Roberson 	int kz_size;
1026f267175SJeff Roberson 	char *kz_name;
1036f267175SJeff Roberson 	uma_zone_t kz_zone;
1046f267175SJeff Roberson } kmemzones[] = {
1056f267175SJeff Roberson 	{16, "16", NULL},
1066f267175SJeff Roberson 	{32, "32", NULL},
1076f267175SJeff Roberson 	{64, "64", NULL},
1086f267175SJeff Roberson 	{128, "128", NULL},
1096f267175SJeff Roberson 	{256, "256", NULL},
1106f267175SJeff Roberson 	{512, "512", NULL},
1116f267175SJeff Roberson 	{1024, "1024", NULL},
1126f267175SJeff Roberson 	{2048, "2048", NULL},
1136f267175SJeff Roberson 	{4096, "4096", NULL},
1149fb535deSJeff Roberson #if PAGE_SIZE > 4096
1156f267175SJeff Roberson 	{8192, "8192", NULL},
1169fb535deSJeff Roberson #if PAGE_SIZE > 8192
11743a7c4e9SRobert Watson 	{16384, "16384", NULL},
1189fb535deSJeff Roberson #if PAGE_SIZE > 16384
119bd796eb2SRobert Watson 	{32768, "32768", NULL},
1209fb535deSJeff Roberson #if PAGE_SIZE > 32768
121bd796eb2SRobert Watson 	{65536, "65536", NULL},
1229fb535deSJeff Roberson #if PAGE_SIZE > 65536
1239fb535deSJeff Roberson #error	"Unsupported PAGE_SIZE"
1249fb535deSJeff Roberson #endif	/* 65536 */
1259fb535deSJeff Roberson #endif	/* 32768 */
1269fb535deSJeff Roberson #endif	/* 16384 */
1279fb535deSJeff Roberson #endif	/* 8192 */
1289fb535deSJeff Roberson #endif	/* 4096 */
1298355f576SJeff Roberson 	{0, NULL},
1308355f576SJeff Roberson };
1318355f576SJeff Roberson 
1326f267175SJeff Roberson u_int vm_kmem_size;
13384344f9fSDag-Erling Smørgrav SYSCTL_UINT(_vm, OID_AUTO, kmem_size, CTLFLAG_RD, &vm_kmem_size, 0,
13484344f9fSDag-Erling Smørgrav     "Size of kernel memory");
1355a34a9f0SJeff Roberson 
1365a34a9f0SJeff Roberson /*
13799571dc3SJeff Roberson  * The malloc_mtx protects the kmemstatistics linked list.
1385a34a9f0SJeff Roberson  */
1395a34a9f0SJeff Roberson 
1405a34a9f0SJeff Roberson struct mtx malloc_mtx;
14169ef67f9SJason Evans 
1425e914b96SJeff Roberson #ifdef MALLOC_PROFILE
1435e914b96SJeff Roberson uint64_t krequests[KMEM_ZSIZE + 1];
1446f267175SJeff Roberson 
1455e914b96SJeff Roberson static int sysctl_kern_mprof(SYSCTL_HANDLER_ARGS);
1465e914b96SJeff Roberson #endif
1475e914b96SJeff Roberson 
1485e914b96SJeff Roberson static int sysctl_kern_malloc(SYSCTL_HANDLER_ARGS);
149df8bae1dSRodney W. Grimes 
1501fb14a47SPoul-Henning Kamp /* time_uptime of last malloc(9) failure */
1511fb14a47SPoul-Henning Kamp static time_t t_malloc_fail;
1521fb14a47SPoul-Henning Kamp 
153eae870cdSRobert Watson #ifdef MALLOC_MAKE_FAILURES
154eae870cdSRobert Watson /*
155eae870cdSRobert Watson  * Causes malloc failures every (n) mallocs with M_NOWAIT.  If set to 0,
156eae870cdSRobert Watson  * doesn't cause failures.
157eae870cdSRobert Watson  */
158eae870cdSRobert Watson SYSCTL_NODE(_debug, OID_AUTO, malloc, CTLFLAG_RD, 0,
159eae870cdSRobert Watson     "Kernel malloc debugging options");
160eae870cdSRobert Watson 
161eae870cdSRobert Watson static int malloc_failure_rate;
162eae870cdSRobert Watson static int malloc_nowait_count;
163eae870cdSRobert Watson static int malloc_failure_count;
164eae870cdSRobert Watson SYSCTL_INT(_debug_malloc, OID_AUTO, failure_rate, CTLFLAG_RW,
165eae870cdSRobert Watson     &malloc_failure_rate, 0, "Every (n) mallocs with M_NOWAIT will fail");
166f2538508SRobert Watson TUNABLE_INT("debug.malloc.failure_rate", &malloc_failure_rate);
167eae870cdSRobert Watson SYSCTL_INT(_debug_malloc, OID_AUTO, failure_count, CTLFLAG_RD,
168eae870cdSRobert Watson     &malloc_failure_count, 0, "Number of imposed M_NOWAIT malloc failures");
169eae870cdSRobert Watson #endif
170eae870cdSRobert Watson 
1711fb14a47SPoul-Henning Kamp int
1721fb14a47SPoul-Henning Kamp malloc_last_fail(void)
1731fb14a47SPoul-Henning Kamp {
1741fb14a47SPoul-Henning Kamp 
1751fb14a47SPoul-Henning Kamp 	return (time_uptime - t_malloc_fail);
1761fb14a47SPoul-Henning Kamp }
1771fb14a47SPoul-Henning Kamp 
178df8bae1dSRodney W. Grimes /*
1791c7c3c6aSMatthew Dillon  *	malloc:
1801c7c3c6aSMatthew Dillon  *
1811c7c3c6aSMatthew Dillon  *	Allocate a block of memory.
1821c7c3c6aSMatthew Dillon  *
1831c7c3c6aSMatthew Dillon  *	If M_NOWAIT is set, this routine will not block and return NULL if
1841c7c3c6aSMatthew Dillon  *	the allocation fails.
185df8bae1dSRodney W. Grimes  */
186df8bae1dSRodney W. Grimes void *
187df8bae1dSRodney W. Grimes malloc(size, type, flags)
188df8bae1dSRodney W. Grimes 	unsigned long size;
18960a513e9SPoul-Henning Kamp 	struct malloc_type *type;
190254c6cb3SPoul-Henning Kamp 	int flags;
191df8bae1dSRodney W. Grimes {
1926f267175SJeff Roberson 	int indx;
1938355f576SJeff Roberson 	caddr_t va;
1948355f576SJeff Roberson 	uma_zone_t zone;
195099a0e58SBosko Milekic 	uma_keg_t keg;
1964db4f5c8SPoul-Henning Kamp #ifdef DIAGNOSTIC
1974db4f5c8SPoul-Henning Kamp 	unsigned long osize = size;
1984db4f5c8SPoul-Henning Kamp #endif
19960a513e9SPoul-Henning Kamp 	register struct malloc_type *ksp = type;
200df8bae1dSRodney W. Grimes 
201194a0abfSPoul-Henning Kamp #ifdef INVARIANTS
202d3c11994SPoul-Henning Kamp 	/*
203d3c11994SPoul-Henning Kamp 	 * To make sure that WAITOK or NOWAIT is set, but not more than
204d3c11994SPoul-Henning Kamp 	 * one, and check against the API botches that are common.
205d3c11994SPoul-Henning Kamp 	 */
206d3c11994SPoul-Henning Kamp 	indx = flags & (M_WAITOK | M_NOWAIT | M_DONTWAIT | M_TRYWAIT);
207d3c11994SPoul-Henning Kamp 	if (indx != M_NOWAIT && indx != M_WAITOK) {
208d3c11994SPoul-Henning Kamp 		static	struct timeval lasterr;
209d3c11994SPoul-Henning Kamp 		static	int curerr, once;
210d3c11994SPoul-Henning Kamp 		if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) {
211d3c11994SPoul-Henning Kamp 			printf("Bad malloc flags: %x\n", indx);
2122d50560aSMarcel Moolenaar 			kdb_backtrace();
213d3c11994SPoul-Henning Kamp 			flags |= M_WAITOK;
214d3c11994SPoul-Henning Kamp 			once++;
215d3c11994SPoul-Henning Kamp 		}
216d3c11994SPoul-Henning Kamp 	}
217194a0abfSPoul-Henning Kamp #endif
218708da94eSPoul-Henning Kamp #if 0
219708da94eSPoul-Henning Kamp 	if (size == 0)
2202d50560aSMarcel Moolenaar 		kdb_enter("zero size malloc");
221708da94eSPoul-Henning Kamp #endif
222eae870cdSRobert Watson #ifdef MALLOC_MAKE_FAILURES
223eae870cdSRobert Watson 	if ((flags & M_NOWAIT) && (malloc_failure_rate != 0)) {
224eae870cdSRobert Watson 		atomic_add_int(&malloc_nowait_count, 1);
225eae870cdSRobert Watson 		if ((malloc_nowait_count % malloc_failure_rate) == 0) {
226eae870cdSRobert Watson 			atomic_add_int(&malloc_failure_count, 1);
2273f6ee876SPoul-Henning Kamp 			t_malloc_fail = time_uptime;
228eae870cdSRobert Watson 			return (NULL);
229eae870cdSRobert Watson 		}
230eae870cdSRobert Watson 	}
231eae870cdSRobert Watson #endif
232d3c11994SPoul-Henning Kamp 	if (flags & M_WAITOK)
233b40ce416SJulian Elischer 		KASSERT(curthread->td_intr_nesting_level == 0,
234a163d034SWarner Losh 		   ("malloc(M_WAITOK) in interrupt context"));
2358355f576SJeff Roberson 	if (size <= KMEM_ZMAX) {
2366f267175SJeff Roberson 		if (size & KMEM_ZMASK)
2376f267175SJeff Roberson 			size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
2386f267175SJeff Roberson 		indx = kmemsize[size >> KMEM_ZSHIFT];
2396f267175SJeff Roberson 		zone = kmemzones[indx].kz_zone;
240099a0e58SBosko Milekic 		keg = zone->uz_keg;
2416f267175SJeff Roberson #ifdef MALLOC_PROFILE
2426f267175SJeff Roberson 		krequests[size >> KMEM_ZSHIFT]++;
2436f267175SJeff Roberson #endif
2448355f576SJeff Roberson 		va = uma_zalloc(zone, flags);
2455a34a9f0SJeff Roberson 		mtx_lock(&ksp->ks_mtx);
2466f267175SJeff Roberson 		if (va == NULL)
247df8bae1dSRodney W. Grimes 			goto out;
2486f267175SJeff Roberson 
2496f267175SJeff Roberson 		ksp->ks_size |= 1 << indx;
250099a0e58SBosko Milekic 		size = keg->uk_size;
2518355f576SJeff Roberson 	} else {
2526f267175SJeff Roberson 		size = roundup(size, PAGE_SIZE);
2538355f576SJeff Roberson 		zone = NULL;
254099a0e58SBosko Milekic 		keg = NULL;
2558355f576SJeff Roberson 		va = uma_large_malloc(size, flags);
2565a34a9f0SJeff Roberson 		mtx_lock(&ksp->ks_mtx);
2576f267175SJeff Roberson 		if (va == NULL)
2588355f576SJeff Roberson 			goto out;
259df8bae1dSRodney W. Grimes 	}
2606f267175SJeff Roberson 	ksp->ks_memuse += size;
261df8bae1dSRodney W. Grimes 	ksp->ks_inuse++;
2628355f576SJeff Roberson out:
263df8bae1dSRodney W. Grimes 	ksp->ks_calls++;
264df8bae1dSRodney W. Grimes 	if (ksp->ks_memuse > ksp->ks_maxused)
265df8bae1dSRodney W. Grimes 		ksp->ks_maxused = ksp->ks_memuse;
2666f267175SJeff Roberson 
2675a34a9f0SJeff Roberson 	mtx_unlock(&ksp->ks_mtx);
2681282e9acSPoul-Henning Kamp 	if (flags & M_WAITOK)
269a163d034SWarner Losh 		KASSERT(va != NULL, ("malloc(M_WAITOK) returned NULL"));
2701282e9acSPoul-Henning Kamp 	else if (va == NULL)
2711fb14a47SPoul-Henning Kamp 		t_malloc_fail = time_uptime;
2724db4f5c8SPoul-Henning Kamp #ifdef DIAGNOSTIC
2731282e9acSPoul-Henning Kamp 	if (va != NULL && !(flags & M_ZERO)) {
2744db4f5c8SPoul-Henning Kamp 		memset(va, 0x70, osize);
2754db4f5c8SPoul-Henning Kamp 	}
2764db4f5c8SPoul-Henning Kamp #endif
277df8bae1dSRodney W. Grimes 	return ((void *) va);
278df8bae1dSRodney W. Grimes }
279df8bae1dSRodney W. Grimes 
280df8bae1dSRodney W. Grimes /*
2811c7c3c6aSMatthew Dillon  *	free:
2821c7c3c6aSMatthew Dillon  *
283df8bae1dSRodney W. Grimes  *	Free a block of memory allocated by malloc.
2841c7c3c6aSMatthew Dillon  *
2851c7c3c6aSMatthew Dillon  *	This routine may not block.
286df8bae1dSRodney W. Grimes  */
287df8bae1dSRodney W. Grimes void
28868f2d20bSPoul-Henning Kamp free(addr, type)
28968f2d20bSPoul-Henning Kamp 	void *addr;
29060a513e9SPoul-Henning Kamp 	struct malloc_type *type;
291df8bae1dSRodney W. Grimes {
29260a513e9SPoul-Henning Kamp 	register struct malloc_type *ksp = type;
29399571dc3SJeff Roberson 	uma_slab_t slab;
29499571dc3SJeff Roberson 	u_long size;
295254c6cb3SPoul-Henning Kamp 
29644a8ff31SArchie Cobbs 	/* free(NULL, ...) does nothing */
29744a8ff31SArchie Cobbs 	if (addr == NULL)
29844a8ff31SArchie Cobbs 		return;
29944a8ff31SArchie Cobbs 
3008cb72d61SPoul-Henning Kamp 	KASSERT(ksp->ks_memuse > 0,
3018cb72d61SPoul-Henning Kamp 		("malloc(9)/free(9) confusion.\n%s",
3028cb72d61SPoul-Henning Kamp 		 "Probably freeing with wrong type, but maybe not here."));
3038355f576SJeff Roberson 	size = 0;
30469ef67f9SJason Evans 
30599571dc3SJeff Roberson 	slab = vtoslab((vm_offset_t)addr & (~UMA_SLAB_MASK));
3068355f576SJeff Roberson 
3078355f576SJeff Roberson 	if (slab == NULL)
3086f267175SJeff Roberson 		panic("free: address %p(%p) has not been allocated.\n",
30999571dc3SJeff Roberson 		    addr, (void *)((u_long)addr & (~UMA_SLAB_MASK)));
31099571dc3SJeff Roberson 
3118355f576SJeff Roberson 
3128355f576SJeff Roberson 	if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
3138f70816cSJeff Roberson #ifdef INVARIANTS
3148f70816cSJeff Roberson 		struct malloc_type **mtp = addr;
3158f70816cSJeff Roberson #endif
316099a0e58SBosko Milekic 		size = slab->us_keg->uk_size;
3178f70816cSJeff Roberson #ifdef INVARIANTS
3188f70816cSJeff Roberson 		/*
3198f70816cSJeff Roberson 		 * Cache a pointer to the malloc_type that most recently freed
3208f70816cSJeff Roberson 		 * this memory here.  This way we know who is most likely to
3218f70816cSJeff Roberson 		 * have stepped on it later.
3228f70816cSJeff Roberson 		 *
3238f70816cSJeff Roberson 		 * This code assumes that size is a multiple of 8 bytes for
3248f70816cSJeff Roberson 		 * 64 bit machines
3258f70816cSJeff Roberson 		 */
3268f70816cSJeff Roberson 		mtp = (struct malloc_type **)
3278f70816cSJeff Roberson 		    ((unsigned long)mtp & ~UMA_ALIGN_PTR);
3288f70816cSJeff Roberson 		mtp += (size - sizeof(struct malloc_type *)) /
3298f70816cSJeff Roberson 		    sizeof(struct malloc_type *);
3308f70816cSJeff Roberson 		*mtp = type;
3318f70816cSJeff Roberson #endif
332099a0e58SBosko Milekic 		uma_zfree_arg(LIST_FIRST(&slab->us_keg->uk_zones), addr, slab);
33314bf02f8SJohn Dyson 	} else {
3348355f576SJeff Roberson 		size = slab->us_size;
3358355f576SJeff Roberson 		uma_large_free(slab);
33614bf02f8SJohn Dyson 	}
3375a34a9f0SJeff Roberson 	mtx_lock(&ksp->ks_mtx);
3388cb72d61SPoul-Henning Kamp 	KASSERT(size <= ksp->ks_memuse,
3398cb72d61SPoul-Henning Kamp 		("malloc(9)/free(9) confusion.\n%s",
3408cb72d61SPoul-Henning Kamp 		 "Probably freeing with wrong type, but maybe not here."));
3418355f576SJeff Roberson 	ksp->ks_memuse -= size;
3428355f576SJeff Roberson 	ksp->ks_inuse--;
3435a34a9f0SJeff Roberson 	mtx_unlock(&ksp->ks_mtx);
344df8bae1dSRodney W. Grimes }
345df8bae1dSRodney W. Grimes 
346df8bae1dSRodney W. Grimes /*
34744a8ff31SArchie Cobbs  *	realloc: change the size of a memory block
34844a8ff31SArchie Cobbs  */
34944a8ff31SArchie Cobbs void *
35044a8ff31SArchie Cobbs realloc(addr, size, type, flags)
35144a8ff31SArchie Cobbs 	void *addr;
35244a8ff31SArchie Cobbs 	unsigned long size;
35344a8ff31SArchie Cobbs 	struct malloc_type *type;
35444a8ff31SArchie Cobbs 	int flags;
35544a8ff31SArchie Cobbs {
3568355f576SJeff Roberson 	uma_slab_t slab;
35744a8ff31SArchie Cobbs 	unsigned long alloc;
35844a8ff31SArchie Cobbs 	void *newaddr;
35944a8ff31SArchie Cobbs 
36044a8ff31SArchie Cobbs 	/* realloc(NULL, ...) is equivalent to malloc(...) */
36144a8ff31SArchie Cobbs 	if (addr == NULL)
36244a8ff31SArchie Cobbs 		return (malloc(size, type, flags));
36344a8ff31SArchie Cobbs 
36499571dc3SJeff Roberson 	slab = vtoslab((vm_offset_t)addr & ~(UMA_SLAB_MASK));
3658355f576SJeff Roberson 
36644a8ff31SArchie Cobbs 	/* Sanity check */
3678355f576SJeff Roberson 	KASSERT(slab != NULL,
36844a8ff31SArchie Cobbs 	    ("realloc: address %p out of range", (void *)addr));
36944a8ff31SArchie Cobbs 
37044a8ff31SArchie Cobbs 	/* Get the size of the original block */
371099a0e58SBosko Milekic 	if (slab->us_keg)
372099a0e58SBosko Milekic 		alloc = slab->us_keg->uk_size;
3738355f576SJeff Roberson 	else
3748355f576SJeff Roberson 		alloc = slab->us_size;
37544a8ff31SArchie Cobbs 
37644a8ff31SArchie Cobbs 	/* Reuse the original block if appropriate */
37744a8ff31SArchie Cobbs 	if (size <= alloc
37844a8ff31SArchie Cobbs 	    && (size > (alloc >> REALLOC_FRACTION) || alloc == MINALLOCSIZE))
37944a8ff31SArchie Cobbs 		return (addr);
38044a8ff31SArchie Cobbs 
38144a8ff31SArchie Cobbs 	/* Allocate a new, bigger (or smaller) block */
38244a8ff31SArchie Cobbs 	if ((newaddr = malloc(size, type, flags)) == NULL)
38344a8ff31SArchie Cobbs 		return (NULL);
38444a8ff31SArchie Cobbs 
38544a8ff31SArchie Cobbs 	/* Copy over original contents */
38644a8ff31SArchie Cobbs 	bcopy(addr, newaddr, min(size, alloc));
38744a8ff31SArchie Cobbs 	free(addr, type);
38844a8ff31SArchie Cobbs 	return (newaddr);
38944a8ff31SArchie Cobbs }
39044a8ff31SArchie Cobbs 
39144a8ff31SArchie Cobbs /*
39244a8ff31SArchie Cobbs  *	reallocf: same as realloc() but free memory on failure.
39344a8ff31SArchie Cobbs  */
39444a8ff31SArchie Cobbs void *
39544a8ff31SArchie Cobbs reallocf(addr, size, type, flags)
39644a8ff31SArchie Cobbs 	void *addr;
39744a8ff31SArchie Cobbs 	unsigned long size;
39844a8ff31SArchie Cobbs 	struct malloc_type *type;
39944a8ff31SArchie Cobbs 	int flags;
40044a8ff31SArchie Cobbs {
40144a8ff31SArchie Cobbs 	void *mem;
40244a8ff31SArchie Cobbs 
40368f2d20bSPoul-Henning Kamp 	if ((mem = realloc(addr, size, type, flags)) == NULL)
40444a8ff31SArchie Cobbs 		free(addr, type);
40544a8ff31SArchie Cobbs 	return (mem);
40644a8ff31SArchie Cobbs }
40744a8ff31SArchie Cobbs 
40844a8ff31SArchie Cobbs /*
409df8bae1dSRodney W. Grimes  * Initialize the kernel memory allocator
410df8bae1dSRodney W. Grimes  */
4112b14f991SJulian Elischer /* ARGSUSED*/
4122b14f991SJulian Elischer static void
413d841aaa7SBruce Evans kmeminit(dummy)
414d841aaa7SBruce Evans 	void *dummy;
415df8bae1dSRodney W. Grimes {
4166f267175SJeff Roberson 	u_int8_t indx;
41727b8623fSDavid Greenman 	u_long mem_size;
4188355f576SJeff Roberson 	int i;
4198a58a9f6SJohn Dyson 
4206008862bSJohn Baldwin 	mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
42169ef67f9SJason Evans 
4228a58a9f6SJohn Dyson 	/*
4238a58a9f6SJohn Dyson 	 * Try to auto-tune the kernel memory size, so that it is
4248a58a9f6SJohn Dyson 	 * more applicable for a wider range of machine sizes.
4258a58a9f6SJohn Dyson 	 * On an X86, a VM_KMEM_SIZE_SCALE value of 4 is good, while
4268a58a9f6SJohn Dyson 	 * a VM_KMEM_SIZE of 12MB is a fair compromise.  The
4278a58a9f6SJohn Dyson 	 * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space
4288a58a9f6SJohn Dyson 	 * available, and on an X86 with a total KVA space of 256MB,
4298a58a9f6SJohn Dyson 	 * try to keep VM_KMEM_SIZE_MAX at 80MB or below.
4308a58a9f6SJohn Dyson 	 *
4318a58a9f6SJohn Dyson 	 * Note that the kmem_map is also used by the zone allocator,
4328a58a9f6SJohn Dyson 	 * so make sure that there is enough space.
4338a58a9f6SJohn Dyson 	 */
434099a0e58SBosko Milekic 	vm_kmem_size = VM_KMEM_SIZE + nmbclusters * PAGE_SIZE;
4351795d0cdSPaul Saab 	mem_size = cnt.v_page_count;
4368a58a9f6SJohn Dyson 
4378a58a9f6SJohn Dyson #if defined(VM_KMEM_SIZE_SCALE)
4381795d0cdSPaul Saab 	if ((mem_size / VM_KMEM_SIZE_SCALE) > (vm_kmem_size / PAGE_SIZE))
4391795d0cdSPaul Saab 		vm_kmem_size = (mem_size / VM_KMEM_SIZE_SCALE) * PAGE_SIZE;
4408a58a9f6SJohn Dyson #endif
4418a58a9f6SJohn Dyson 
4428a58a9f6SJohn Dyson #if defined(VM_KMEM_SIZE_MAX)
44381930014SPeter Wemm 	if (vm_kmem_size >= VM_KMEM_SIZE_MAX)
44481930014SPeter Wemm 		vm_kmem_size = VM_KMEM_SIZE_MAX;
4458a58a9f6SJohn Dyson #endif
4468a58a9f6SJohn Dyson 
4478de6e8e1SMike Smith 	/* Allow final override from the kernel environment */
44884344f9fSDag-Erling Smørgrav #ifndef BURN_BRIDGES
44984344f9fSDag-Erling Smørgrav 	if (TUNABLE_INT_FETCH("kern.vm.kmem.size", &vm_kmem_size) != 0)
45084344f9fSDag-Erling Smørgrav 		printf("kern.vm.kmem.size is now called vm.kmem_size!\n");
45184344f9fSDag-Erling Smørgrav #endif
45284344f9fSDag-Erling Smørgrav 	TUNABLE_INT_FETCH("vm.kmem_size", &vm_kmem_size);
4538de6e8e1SMike Smith 
45427b8623fSDavid Greenman 	/*
45527b8623fSDavid Greenman 	 * Limit kmem virtual size to twice the physical memory.
45627b8623fSDavid Greenman 	 * This allows for kmem map sparseness, but limits the size
45727b8623fSDavid Greenman 	 * to something sane. Be careful to not overflow the 32bit
45827b8623fSDavid Greenman 	 * ints while doing the check.
45927b8623fSDavid Greenman 	 */
4601795d0cdSPaul Saab 	if (((vm_kmem_size / 2) / PAGE_SIZE) > cnt.v_page_count)
46127b8623fSDavid Greenman 		vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE;
4628a58a9f6SJohn Dyson 
46308442f8aSBosko Milekic 	/*
464347194c1SMike Silbersack 	 * Tune settings based on the kernel map's size at this time.
465347194c1SMike Silbersack 	 */
466347194c1SMike Silbersack 	init_param3(vm_kmem_size / PAGE_SIZE);
467347194c1SMike Silbersack 
468df8bae1dSRodney W. Grimes 	kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase,
469099a0e58SBosko Milekic 		(vm_offset_t *)&kmemlimit, vm_kmem_size);
4703075778bSJohn Dyson 	kmem_map->system_map = 1;
4718355f576SJeff Roberson 
47299571dc3SJeff Roberson 	uma_startup2();
4738355f576SJeff Roberson 
4746f267175SJeff Roberson 	for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) {
4756f267175SJeff Roberson 		int size = kmemzones[indx].kz_size;
4766f267175SJeff Roberson 		char *name = kmemzones[indx].kz_name;
4778355f576SJeff Roberson 
4788efc4effSJeff Roberson 		kmemzones[indx].kz_zone = uma_zcreate(name, size,
4798efc4effSJeff Roberson #ifdef INVARIANTS
4808f70816cSJeff Roberson 		    mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini,
4818efc4effSJeff Roberson #else
4828efc4effSJeff Roberson 		    NULL, NULL, NULL, NULL,
4838efc4effSJeff Roberson #endif
4848efc4effSJeff Roberson 		    UMA_ALIGN_PTR, UMA_ZONE_MALLOC);
4856f267175SJeff Roberson 
4868355f576SJeff Roberson 		for (;i <= size; i+= KMEM_ZBASE)
4876f267175SJeff Roberson 			kmemsize[i >> KMEM_ZSHIFT] = indx;
4888355f576SJeff Roberson 
489df8bae1dSRodney W. Grimes 	}
490254c6cb3SPoul-Henning Kamp }
491254c6cb3SPoul-Henning Kamp 
492db669378SPeter Wemm void
493db669378SPeter Wemm malloc_init(data)
494db669378SPeter Wemm 	void *data;
495254c6cb3SPoul-Henning Kamp {
496db669378SPeter Wemm 	struct malloc_type *type = (struct malloc_type *)data;
497254c6cb3SPoul-Henning Kamp 
4986f267175SJeff Roberson 	mtx_lock(&malloc_mtx);
499d1bbc7ecSPoul-Henning Kamp 	if (type->ks_magic != M_MAGIC)
500d1bbc7ecSPoul-Henning Kamp 		panic("malloc type lacks magic");
501d1bbc7ecSPoul-Henning Kamp 
502d4060a87SJohn Dyson 	if (cnt.v_page_count == 0)
503d4060a87SJohn Dyson 		panic("malloc_init not allowed before vm init");
504d4060a87SJohn Dyson 
5056f267175SJeff Roberson 	if (type->ks_next != NULL)
5066f267175SJeff Roberson 		return;
5076f267175SJeff Roberson 
508254c6cb3SPoul-Henning Kamp 	type->ks_next = kmemstatistics;
509254c6cb3SPoul-Henning Kamp 	kmemstatistics = type;
5105a34a9f0SJeff Roberson 	mtx_init(&type->ks_mtx, type->ks_shortdesc, "Malloc Stats", MTX_DEF);
5116f267175SJeff Roberson 	mtx_unlock(&malloc_mtx);
512df8bae1dSRodney W. Grimes }
513db669378SPeter Wemm 
514db669378SPeter Wemm void
515db669378SPeter Wemm malloc_uninit(data)
516db669378SPeter Wemm 	void *data;
517db669378SPeter Wemm {
518db669378SPeter Wemm 	struct malloc_type *type = (struct malloc_type *)data;
519db669378SPeter Wemm 	struct malloc_type *t;
520db669378SPeter Wemm 
5216f267175SJeff Roberson 	mtx_lock(&malloc_mtx);
5225a34a9f0SJeff Roberson 	mtx_lock(&type->ks_mtx);
523db669378SPeter Wemm 	if (type->ks_magic != M_MAGIC)
524db669378SPeter Wemm 		panic("malloc type lacks magic");
525db669378SPeter Wemm 
526db669378SPeter Wemm 	if (cnt.v_page_count == 0)
527db669378SPeter Wemm 		panic("malloc_uninit not allowed before vm init");
528db669378SPeter Wemm 
529db669378SPeter Wemm 	if (type == kmemstatistics)
530db669378SPeter Wemm 		kmemstatistics = type->ks_next;
531db669378SPeter Wemm 	else {
532db669378SPeter Wemm 		for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) {
533db669378SPeter Wemm 			if (t->ks_next == type) {
534db669378SPeter Wemm 				t->ks_next = type->ks_next;
535db669378SPeter Wemm 				break;
536db669378SPeter Wemm 			}
537db669378SPeter Wemm 		}
538db669378SPeter Wemm 	}
539ce45b512SBruce Evans 	type->ks_next = NULL;
5405a34a9f0SJeff Roberson 	mtx_destroy(&type->ks_mtx);
5416f267175SJeff Roberson 	mtx_unlock(&malloc_mtx);
542db669378SPeter Wemm }
5436f267175SJeff Roberson 
5446f267175SJeff Roberson static int
5456f267175SJeff Roberson sysctl_kern_malloc(SYSCTL_HANDLER_ARGS)
5466f267175SJeff Roberson {
5476f267175SJeff Roberson 	struct malloc_type *type;
5486f267175SJeff Roberson 	int linesize = 128;
5496f267175SJeff Roberson 	int curline;
5506f267175SJeff Roberson 	int bufsize;
5516f267175SJeff Roberson 	int first;
5526f267175SJeff Roberson 	int error;
5536f267175SJeff Roberson 	char *buf;
5546f267175SJeff Roberson 	char *p;
5556f267175SJeff Roberson 	int cnt;
5566f267175SJeff Roberson 	int len;
5576f267175SJeff Roberson 	int i;
5586f267175SJeff Roberson 
5596f267175SJeff Roberson 	cnt = 0;
5606f267175SJeff Roberson 
5616f267175SJeff Roberson 	mtx_lock(&malloc_mtx);
5626f267175SJeff Roberson 	for (type = kmemstatistics; type != NULL; type = type->ks_next)
5636f267175SJeff Roberson 		cnt++;
5646f267175SJeff Roberson 
5655a34a9f0SJeff Roberson 	mtx_unlock(&malloc_mtx);
5666f267175SJeff Roberson 	bufsize = linesize * (cnt + 1);
567a163d034SWarner Losh 	p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
5685a34a9f0SJeff Roberson 	mtx_lock(&malloc_mtx);
5696f267175SJeff Roberson 
5706f267175SJeff Roberson 	len = snprintf(p, linesize,
5716f267175SJeff Roberson 	    "\n        Type  InUse MemUse HighUse Requests  Size(s)\n");
5726f267175SJeff Roberson 	p += len;
5736f267175SJeff Roberson 
5746f267175SJeff Roberson 	for (type = kmemstatistics; cnt != 0 && type != NULL;
5756f267175SJeff Roberson 	    type = type->ks_next, cnt--) {
5766f267175SJeff Roberson 		if (type->ks_calls == 0)
5776f267175SJeff Roberson 			continue;
5786f267175SJeff Roberson 
5796f267175SJeff Roberson 		curline = linesize - 2;	/* Leave room for the \n */
580289f207cSJeff Roberson 		len = snprintf(p, curline, "%13s%6lu%6luK%7luK%9llu",
5816f267175SJeff Roberson 			type->ks_shortdesc,
5826f267175SJeff Roberson 			type->ks_inuse,
5836f267175SJeff Roberson 			(type->ks_memuse + 1023) / 1024,
5846f267175SJeff Roberson 			(type->ks_maxused + 1023) / 1024,
5856f267175SJeff Roberson 			(long long unsigned)type->ks_calls);
5866f267175SJeff Roberson 		curline -= len;
5876f267175SJeff Roberson 		p += len;
5886f267175SJeff Roberson 
5896f267175SJeff Roberson 		first = 1;
590280759e7SRobert Drehmel 		for (i = 0; i < sizeof(kmemzones) / sizeof(kmemzones[0]) - 1;
591280759e7SRobert Drehmel 		    i++) {
5926f267175SJeff Roberson 			if (type->ks_size & (1 << i)) {
5936f267175SJeff Roberson 				if (first)
5946f267175SJeff Roberson 					len = snprintf(p, curline, "  ");
5956f267175SJeff Roberson 				else
5966f267175SJeff Roberson 					len = snprintf(p, curline, ",");
5976f267175SJeff Roberson 				curline -= len;
5986f267175SJeff Roberson 				p += len;
5996f267175SJeff Roberson 
6006f267175SJeff Roberson 				len = snprintf(p, curline,
6016f267175SJeff Roberson 				    "%s", kmemzones[i].kz_name);
6026f267175SJeff Roberson 				curline -= len;
6036f267175SJeff Roberson 				p += len;
6046f267175SJeff Roberson 
6056f267175SJeff Roberson 				first = 0;
6066f267175SJeff Roberson 			}
607280759e7SRobert Drehmel 		}
6086f267175SJeff Roberson 
6096f267175SJeff Roberson 		len = snprintf(p, 2, "\n");
6106f267175SJeff Roberson 		p += len;
6116f267175SJeff Roberson 	}
6126f267175SJeff Roberson 
6136f267175SJeff Roberson 	mtx_unlock(&malloc_mtx);
6146f267175SJeff Roberson 	error = SYSCTL_OUT(req, buf, p - buf);
6156f267175SJeff Roberson 
6166f267175SJeff Roberson 	free(buf, M_TEMP);
6176f267175SJeff Roberson 	return (error);
6186f267175SJeff Roberson }
6196f267175SJeff Roberson 
6206f267175SJeff Roberson SYSCTL_OID(_kern, OID_AUTO, malloc, CTLTYPE_STRING|CTLFLAG_RD,
6216f267175SJeff Roberson     NULL, 0, sysctl_kern_malloc, "A", "Malloc Stats");
6225e914b96SJeff Roberson 
6235e914b96SJeff Roberson #ifdef MALLOC_PROFILE
6245e914b96SJeff Roberson 
6255e914b96SJeff Roberson static int
6265e914b96SJeff Roberson sysctl_kern_mprof(SYSCTL_HANDLER_ARGS)
6275e914b96SJeff Roberson {
6285e914b96SJeff Roberson 	int linesize = 64;
6295e914b96SJeff Roberson 	uint64_t count;
6305e914b96SJeff Roberson 	uint64_t waste;
6315e914b96SJeff Roberson 	uint64_t mem;
6325e914b96SJeff Roberson 	int bufsize;
6335e914b96SJeff Roberson 	int error;
6345e914b96SJeff Roberson 	char *buf;
6355e914b96SJeff Roberson 	int rsize;
6365e914b96SJeff Roberson 	int size;
6375e914b96SJeff Roberson 	char *p;
6385e914b96SJeff Roberson 	int len;
6395e914b96SJeff Roberson 	int i;
6405e914b96SJeff Roberson 
6415e914b96SJeff Roberson 	bufsize = linesize * (KMEM_ZSIZE + 1);
6425e914b96SJeff Roberson 	bufsize += 128; 	/* For the stats line */
6435e914b96SJeff Roberson 	bufsize += 128; 	/* For the banner line */
6445e914b96SJeff Roberson 	waste = 0;
6455e914b96SJeff Roberson 	mem = 0;
6465e914b96SJeff Roberson 
647a163d034SWarner Losh 	p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
6485e914b96SJeff Roberson 	len = snprintf(p, bufsize,
6495e914b96SJeff Roberson 	    "\n  Size                    Requests  Real Size\n");
6505e914b96SJeff Roberson 	bufsize -= len;
6515e914b96SJeff Roberson 	p += len;
6525e914b96SJeff Roberson 
6535e914b96SJeff Roberson 	for (i = 0; i < KMEM_ZSIZE; i++) {
6545e914b96SJeff Roberson 		size = i << KMEM_ZSHIFT;
6555e914b96SJeff Roberson 		rsize = kmemzones[kmemsize[i]].kz_size;
6565e914b96SJeff Roberson 		count = (long long unsigned)krequests[i];
6575e914b96SJeff Roberson 
6585e914b96SJeff Roberson 		len = snprintf(p, bufsize, "%6d%28llu%11d\n",
6595e914b96SJeff Roberson 		    size, (unsigned long long)count, rsize);
6605e914b96SJeff Roberson 		bufsize -= len;
6615e914b96SJeff Roberson 		p += len;
6625e914b96SJeff Roberson 
6635e914b96SJeff Roberson 		if ((rsize * count) > (size * count))
6645e914b96SJeff Roberson 			waste += (rsize * count) - (size * count);
6655e914b96SJeff Roberson 		mem += (rsize * count);
6665e914b96SJeff Roberson 	}
6675e914b96SJeff Roberson 
6685e914b96SJeff Roberson 	len = snprintf(p, bufsize,
6695e914b96SJeff Roberson 	    "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n",
6705e914b96SJeff Roberson 	    (unsigned long long)mem, (unsigned long long)waste);
6715e914b96SJeff Roberson 	p += len;
6725e914b96SJeff Roberson 
6735e914b96SJeff Roberson 	error = SYSCTL_OUT(req, buf, p - buf);
6745e914b96SJeff Roberson 
6755e914b96SJeff Roberson 	free(buf, M_TEMP);
6765e914b96SJeff Roberson 	return (error);
6775e914b96SJeff Roberson }
6785e914b96SJeff Roberson 
6795e914b96SJeff Roberson SYSCTL_OID(_kern, OID_AUTO, mprof, CTLTYPE_STRING|CTLFLAG_RD,
6805e914b96SJeff Roberson     NULL, 0, sysctl_kern_mprof, "A", "Malloc Profiling");
6815e914b96SJeff Roberson #endif /* MALLOC_PROFILE */
682