xref: /freebsd/sys/vm/uma_core.c (revision 251386b4b261f2fd90e4eb905d9f18288a2c80ff)
160727d8bSWarner Losh /*-
2e20a199fSJeff Roberson  * Copyright (c) 2002-2005, 2009 Jeffrey Roberson <jeff@FreeBSD.org>
308ecce74SRobert Watson  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
4ae4e9636SRobert Watson  * Copyright (c) 2004-2006 Robert N. M. Watson
508ecce74SRobert Watson  * All rights reserved.
68355f576SJeff Roberson  *
78355f576SJeff Roberson  * Redistribution and use in source and binary forms, with or without
88355f576SJeff Roberson  * modification, are permitted provided that the following conditions
98355f576SJeff Roberson  * are met:
108355f576SJeff Roberson  * 1. Redistributions of source code must retain the above copyright
118355f576SJeff Roberson  *    notice unmodified, this list of conditions, and the following
128355f576SJeff Roberson  *    disclaimer.
138355f576SJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
148355f576SJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
158355f576SJeff Roberson  *    documentation and/or other materials provided with the distribution.
168355f576SJeff Roberson  *
178355f576SJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
188355f576SJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
198355f576SJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
208355f576SJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
218355f576SJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
228355f576SJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
238355f576SJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
248355f576SJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
258355f576SJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
268355f576SJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
278355f576SJeff Roberson  */
288355f576SJeff Roberson 
298355f576SJeff Roberson /*
308355f576SJeff Roberson  * uma_core.c  Implementation of the Universal Memory allocator
318355f576SJeff Roberson  *
328355f576SJeff Roberson  * This allocator is intended to replace the multitude of similar object caches
338355f576SJeff Roberson  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
348355f576SJeff Roberson  * effecient.  A primary design goal is to return unused memory to the rest of
358355f576SJeff Roberson  * the system.  This will make the system as a whole more flexible due to the
368355f576SJeff Roberson  * ability to move memory to subsystems which most need it instead of leaving
378355f576SJeff Roberson  * pools of reserved memory unused.
388355f576SJeff Roberson  *
398355f576SJeff Roberson  * The basic ideas stem from similar slab/zone based allocators whose algorithms
408355f576SJeff Roberson  * are well known.
418355f576SJeff Roberson  *
428355f576SJeff Roberson  */
438355f576SJeff Roberson 
448355f576SJeff Roberson /*
458355f576SJeff Roberson  * TODO:
468355f576SJeff Roberson  *	- Improve memory usage for large allocations
478355f576SJeff Roberson  *	- Investigate cache size adjustments
488355f576SJeff Roberson  */
498355f576SJeff Roberson 
50874651b1SDavid E. O'Brien #include <sys/cdefs.h>
51874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$");
52874651b1SDavid E. O'Brien 
538355f576SJeff Roberson /* I should really use ktr.. */
548355f576SJeff Roberson /*
558355f576SJeff Roberson #define UMA_DEBUG 1
568355f576SJeff Roberson #define UMA_DEBUG_ALLOC 1
578355f576SJeff Roberson #define UMA_DEBUG_ALLOC_1 1
588355f576SJeff Roberson */
598355f576SJeff Roberson 
6048c5777eSRobert Watson #include "opt_ddb.h"
618355f576SJeff Roberson #include "opt_param.h"
628d689e04SGleb Smirnoff #include "opt_vm.h"
6348c5777eSRobert Watson 
648355f576SJeff Roberson #include <sys/param.h>
658355f576SJeff Roberson #include <sys/systm.h>
668355f576SJeff Roberson #include <sys/kernel.h>
678355f576SJeff Roberson #include <sys/types.h>
688355f576SJeff Roberson #include <sys/queue.h>
698355f576SJeff Roberson #include <sys/malloc.h>
703659f747SRobert Watson #include <sys/ktr.h>
718355f576SJeff Roberson #include <sys/lock.h>
728355f576SJeff Roberson #include <sys/sysctl.h>
738355f576SJeff Roberson #include <sys/mutex.h>
744c1cc01cSJohn Baldwin #include <sys/proc.h>
757a52a97eSRobert Watson #include <sys/sbuf.h>
768355f576SJeff Roberson #include <sys/smp.h>
7786bbae32SJeff Roberson #include <sys/vmmeter.h>
7886bbae32SJeff Roberson 
798355f576SJeff Roberson #include <vm/vm.h>
808355f576SJeff Roberson #include <vm/vm_object.h>
818355f576SJeff Roberson #include <vm/vm_page.h>
828355f576SJeff Roberson #include <vm/vm_param.h>
838355f576SJeff Roberson #include <vm/vm_map.h>
848355f576SJeff Roberson #include <vm/vm_kern.h>
858355f576SJeff Roberson #include <vm/vm_extern.h>
868355f576SJeff Roberson #include <vm/uma.h>
878355f576SJeff Roberson #include <vm/uma_int.h>
88639c9550SJeff Roberson #include <vm/uma_dbg.h>
898355f576SJeff Roberson 
9048c5777eSRobert Watson #include <ddb/ddb.h>
9148c5777eSRobert Watson 
928d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
938d689e04SGleb Smirnoff #include <vm/memguard.h>
948d689e04SGleb Smirnoff #endif
958d689e04SGleb Smirnoff 
968355f576SJeff Roberson /*
97099a0e58SBosko Milekic  * This is the zone and keg from which all zones are spawned.  The idea is that
98099a0e58SBosko Milekic  * even the zone & keg heads are allocated from the allocator, so we use the
99099a0e58SBosko Milekic  * bss section to bootstrap us.
1008355f576SJeff Roberson  */
101099a0e58SBosko Milekic static struct uma_keg masterkeg;
102099a0e58SBosko Milekic static struct uma_zone masterzone_k;
103099a0e58SBosko Milekic static struct uma_zone masterzone_z;
104099a0e58SBosko Milekic static uma_zone_t kegs = &masterzone_k;
105099a0e58SBosko Milekic static uma_zone_t zones = &masterzone_z;
1068355f576SJeff Roberson 
1078355f576SJeff Roberson /* This is the zone from which all of uma_slab_t's are allocated. */
1088355f576SJeff Roberson static uma_zone_t slabzone;
109099a0e58SBosko Milekic static uma_zone_t slabrefzone;	/* With refcounters (for UMA_ZONE_REFCNT) */
1108355f576SJeff Roberson 
1118355f576SJeff Roberson /*
1128355f576SJeff Roberson  * The initial hash tables come out of this zone so they can be allocated
1138355f576SJeff Roberson  * prior to malloc coming up.
1148355f576SJeff Roberson  */
1158355f576SJeff Roberson static uma_zone_t hashzone;
1168355f576SJeff Roberson 
1171e319f6dSRobert Watson /* The boot-time adjusted value for cache line alignment. */
118e4cd31ddSJeff Roberson int uma_align_cache = 64 - 1;
1191e319f6dSRobert Watson 
120961647dfSJeff Roberson static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
121961647dfSJeff Roberson 
1228355f576SJeff Roberson /*
12386bbae32SJeff Roberson  * Are we allowed to allocate buckets?
12486bbae32SJeff Roberson  */
12586bbae32SJeff Roberson static int bucketdisable = 1;
12686bbae32SJeff Roberson 
127099a0e58SBosko Milekic /* Linked list of all kegs in the system */
12813e403fdSAntoine Brodin static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
1298355f576SJeff Roberson 
130099a0e58SBosko Milekic /* This mutex protects the keg list */
1318355f576SJeff Roberson static struct mtx uma_mtx;
1328355f576SJeff Roberson 
1338355f576SJeff Roberson /* Linked list of boot time pages */
1348355f576SJeff Roberson static LIST_HEAD(,uma_slab) uma_boot_pages =
13513e403fdSAntoine Brodin     LIST_HEAD_INITIALIZER(uma_boot_pages);
1368355f576SJeff Roberson 
137f353d338SAlan Cox /* This mutex protects the boot time pages list */
138f353d338SAlan Cox static struct mtx uma_boot_pages_mtx;
1398355f576SJeff Roberson 
1408355f576SJeff Roberson /* Is the VM done starting up? */
1418355f576SJeff Roberson static int booted = 0;
142342f1793SAlan Cox #define	UMA_STARTUP	1
143342f1793SAlan Cox #define	UMA_STARTUP2	2
1448355f576SJeff Roberson 
145244f4554SBosko Milekic /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
146244f4554SBosko Milekic static u_int uma_max_ipers;
147244f4554SBosko Milekic static u_int uma_max_ipers_ref;
148244f4554SBosko Milekic 
1499643769aSJeff Roberson /*
1509643769aSJeff Roberson  * This is the handle used to schedule events that need to happen
1519643769aSJeff Roberson  * outside of the allocation fast path.
1529643769aSJeff Roberson  */
1538355f576SJeff Roberson static struct callout uma_callout;
1549643769aSJeff Roberson #define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
1558355f576SJeff Roberson 
1568355f576SJeff Roberson /*
1578355f576SJeff Roberson  * This structure is passed as the zone ctor arg so that I don't have to create
1588355f576SJeff Roberson  * a special allocation function just for zones.
1598355f576SJeff Roberson  */
1608355f576SJeff Roberson struct uma_zctor_args {
1618355f576SJeff Roberson 	char *name;
162c3bdc05fSAndrew R. Reiter 	size_t size;
1638355f576SJeff Roberson 	uma_ctor ctor;
1648355f576SJeff Roberson 	uma_dtor dtor;
1658355f576SJeff Roberson 	uma_init uminit;
1668355f576SJeff Roberson 	uma_fini fini;
167099a0e58SBosko Milekic 	uma_keg_t keg;
168099a0e58SBosko Milekic 	int align;
1692018f30cSMike Silbersack 	u_int32_t flags;
170099a0e58SBosko Milekic };
171099a0e58SBosko Milekic 
172099a0e58SBosko Milekic struct uma_kctor_args {
173099a0e58SBosko Milekic 	uma_zone_t zone;
174099a0e58SBosko Milekic 	size_t size;
175099a0e58SBosko Milekic 	uma_init uminit;
176099a0e58SBosko Milekic 	uma_fini fini;
1778355f576SJeff Roberson 	int align;
1782018f30cSMike Silbersack 	u_int32_t flags;
1798355f576SJeff Roberson };
1808355f576SJeff Roberson 
181cae33c14SJeff Roberson struct uma_bucket_zone {
182cae33c14SJeff Roberson 	uma_zone_t	ubz_zone;
183cae33c14SJeff Roberson 	char		*ubz_name;
184cae33c14SJeff Roberson 	int		ubz_entries;
185cae33c14SJeff Roberson };
186cae33c14SJeff Roberson 
187cae33c14SJeff Roberson #define	BUCKET_MAX	128
188cae33c14SJeff Roberson 
189cae33c14SJeff Roberson struct uma_bucket_zone bucket_zones[] = {
190cae33c14SJeff Roberson 	{ NULL, "16 Bucket", 16 },
191cae33c14SJeff Roberson 	{ NULL, "32 Bucket", 32 },
192cae33c14SJeff Roberson 	{ NULL, "64 Bucket", 64 },
193cae33c14SJeff Roberson 	{ NULL, "128 Bucket", 128 },
194cae33c14SJeff Roberson 	{ NULL, NULL, 0}
195cae33c14SJeff Roberson };
196cae33c14SJeff Roberson 
197cae33c14SJeff Roberson #define	BUCKET_SHIFT	4
198cae33c14SJeff Roberson #define	BUCKET_ZONES	((BUCKET_MAX >> BUCKET_SHIFT) + 1)
199cae33c14SJeff Roberson 
200f9d27e75SRobert Watson /*
201f9d27e75SRobert Watson  * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
202f9d27e75SRobert Watson  * of approximately the right size.
203f9d27e75SRobert Watson  */
204f9d27e75SRobert Watson static uint8_t bucket_size[BUCKET_ZONES];
205cae33c14SJeff Roberson 
2062019094aSRobert Watson /*
2072019094aSRobert Watson  * Flags and enumerations to be passed to internal functions.
2082019094aSRobert Watson  */
209b23f72e9SBrian Feldman enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
210b23f72e9SBrian Feldman 
2112019094aSRobert Watson #define	ZFREE_STATFAIL	0x00000001	/* Update zone failure statistic. */
212f4ff923bSRobert Watson #define	ZFREE_STATFREE	0x00000002	/* Update zone free statistic. */
2132019094aSRobert Watson 
2148355f576SJeff Roberson /* Prototypes.. */
2158355f576SJeff Roberson 
2168355f576SJeff Roberson static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
2178355f576SJeff Roberson static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
218009b6fcbSJeff Roberson static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
2198355f576SJeff Roberson static void page_free(void *, int, u_int8_t);
220e20a199fSJeff Roberson static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
2219643769aSJeff Roberson static void cache_drain(uma_zone_t);
2228355f576SJeff Roberson static void bucket_drain(uma_zone_t, uma_bucket_t);
223aaa8bb16SJeff Roberson static void bucket_cache_drain(uma_zone_t zone);
224b23f72e9SBrian Feldman static int keg_ctor(void *, int, void *, int);
225099a0e58SBosko Milekic static void keg_dtor(void *, int, void *);
226b23f72e9SBrian Feldman static int zone_ctor(void *, int, void *, int);
2279c2cd7e5SJeff Roberson static void zone_dtor(void *, int, void *);
228b23f72e9SBrian Feldman static int zero_init(void *, int, int);
229e20a199fSJeff Roberson static void keg_small_init(uma_keg_t keg);
230e20a199fSJeff Roberson static void keg_large_init(uma_keg_t keg);
2318355f576SJeff Roberson static void zone_foreach(void (*zfunc)(uma_zone_t));
2328355f576SJeff Roberson static void zone_timeout(uma_zone_t zone);
2330aef6126SJeff Roberson static int hash_alloc(struct uma_hash *);
2340aef6126SJeff Roberson static int hash_expand(struct uma_hash *, struct uma_hash *);
2350aef6126SJeff Roberson static void hash_free(struct uma_hash *hash);
2368355f576SJeff Roberson static void uma_timeout(void *);
2378355f576SJeff Roberson static void uma_startup3(void);
238e20a199fSJeff Roberson static void *zone_alloc_item(uma_zone_t, void *, int);
239e20a199fSJeff Roberson static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip,
2402019094aSRobert Watson     int);
24186bbae32SJeff Roberson static void bucket_enable(void);
242cae33c14SJeff Roberson static void bucket_init(void);
243cae33c14SJeff Roberson static uma_bucket_t bucket_alloc(int, int);
244cae33c14SJeff Roberson static void bucket_free(uma_bucket_t);
245cae33c14SJeff Roberson static void bucket_zone_drain(void);
246e20a199fSJeff Roberson static int zone_alloc_bucket(uma_zone_t zone, int flags);
247e20a199fSJeff Roberson static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
248e20a199fSJeff Roberson static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
249e20a199fSJeff Roberson static void *slab_alloc_item(uma_zone_t zone, uma_slab_t slab);
250e20a199fSJeff Roberson static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
2512018f30cSMike Silbersack     uma_fini fini, int align, u_int32_t flags);
252e20a199fSJeff Roberson static inline void zone_relock(uma_zone_t zone, uma_keg_t keg);
253e20a199fSJeff Roberson static inline void keg_relock(uma_keg_t keg, uma_zone_t zone);
254bbee39c6SJeff Roberson 
2558355f576SJeff Roberson void uma_print_zone(uma_zone_t);
2568355f576SJeff Roberson void uma_print_stats(void);
2577a52a97eSRobert Watson static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
2587a52a97eSRobert Watson static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
2598355f576SJeff Roberson 
2608355f576SJeff Roberson SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
2618355f576SJeff Roberson 
2627a52a97eSRobert Watson SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
2637a52a97eSRobert Watson     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
2647a52a97eSRobert Watson 
2657a52a97eSRobert Watson SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
2667a52a97eSRobert Watson     0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
2677a52a97eSRobert Watson 
26886bbae32SJeff Roberson /*
26986bbae32SJeff Roberson  * This routine checks to see whether or not it's safe to enable buckets.
27086bbae32SJeff Roberson  */
27186bbae32SJeff Roberson 
27286bbae32SJeff Roberson static void
27386bbae32SJeff Roberson bucket_enable(void)
27486bbae32SJeff Roberson {
275*251386b4SMaksim Yevmenkin 	bucketdisable = vm_page_count_min();
27686bbae32SJeff Roberson }
27786bbae32SJeff Roberson 
278dc2c7965SRobert Watson /*
279dc2c7965SRobert Watson  * Initialize bucket_zones, the array of zones of buckets of various sizes.
280dc2c7965SRobert Watson  *
281dc2c7965SRobert Watson  * For each zone, calculate the memory required for each bucket, consisting
282dc2c7965SRobert Watson  * of the header and an array of pointers.  Initialize bucket_size[] to point
283dc2c7965SRobert Watson  * the range of appropriate bucket sizes at the zone.
284dc2c7965SRobert Watson  */
285cae33c14SJeff Roberson static void
286cae33c14SJeff Roberson bucket_init(void)
287cae33c14SJeff Roberson {
288cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
289cae33c14SJeff Roberson 	int i;
290cae33c14SJeff Roberson 	int j;
291cae33c14SJeff Roberson 
292cae33c14SJeff Roberson 	for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
293cae33c14SJeff Roberson 		int size;
294cae33c14SJeff Roberson 
295cae33c14SJeff Roberson 		ubz = &bucket_zones[j];
296cae33c14SJeff Roberson 		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
297cae33c14SJeff Roberson 		size += sizeof(void *) * ubz->ubz_entries;
298cae33c14SJeff Roberson 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
299e20a199fSJeff Roberson 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
300e20a199fSJeff Roberson 		    UMA_ZFLAG_INTERNAL | UMA_ZFLAG_BUCKET);
301cae33c14SJeff Roberson 		for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
302cae33c14SJeff Roberson 			bucket_size[i >> BUCKET_SHIFT] = j;
303cae33c14SJeff Roberson 	}
304cae33c14SJeff Roberson }
305cae33c14SJeff Roberson 
306dc2c7965SRobert Watson /*
307dc2c7965SRobert Watson  * Given a desired number of entries for a bucket, return the zone from which
308dc2c7965SRobert Watson  * to allocate the bucket.
309dc2c7965SRobert Watson  */
310dc2c7965SRobert Watson static struct uma_bucket_zone *
311dc2c7965SRobert Watson bucket_zone_lookup(int entries)
312dc2c7965SRobert Watson {
313dc2c7965SRobert Watson 	int idx;
314dc2c7965SRobert Watson 
315dc2c7965SRobert Watson 	idx = howmany(entries, 1 << BUCKET_SHIFT);
316dc2c7965SRobert Watson 	return (&bucket_zones[bucket_size[idx]]);
317dc2c7965SRobert Watson }
318dc2c7965SRobert Watson 
319cae33c14SJeff Roberson static uma_bucket_t
320cae33c14SJeff Roberson bucket_alloc(int entries, int bflags)
321cae33c14SJeff Roberson {
322cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
323cae33c14SJeff Roberson 	uma_bucket_t bucket;
324cae33c14SJeff Roberson 
325cae33c14SJeff Roberson 	/*
326cae33c14SJeff Roberson 	 * This is to stop us from allocating per cpu buckets while we're
3273803b26bSDag-Erling Smørgrav 	 * running out of vm.boot_pages.  Otherwise, we would exhaust the
328cae33c14SJeff Roberson 	 * boot pages.  This also prevents us from allocating buckets in
329cae33c14SJeff Roberson 	 * low memory situations.
330cae33c14SJeff Roberson 	 */
331cae33c14SJeff Roberson 	if (bucketdisable)
332cae33c14SJeff Roberson 		return (NULL);
333dc2c7965SRobert Watson 
334dc2c7965SRobert Watson 	ubz = bucket_zone_lookup(entries);
335e20a199fSJeff Roberson 	bucket = zone_alloc_item(ubz->ubz_zone, NULL, bflags);
336cae33c14SJeff Roberson 	if (bucket) {
337cae33c14SJeff Roberson #ifdef INVARIANTS
338cae33c14SJeff Roberson 		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
339cae33c14SJeff Roberson #endif
340cae33c14SJeff Roberson 		bucket->ub_cnt = 0;
341cae33c14SJeff Roberson 		bucket->ub_entries = ubz->ubz_entries;
342cae33c14SJeff Roberson 	}
343cae33c14SJeff Roberson 
344cae33c14SJeff Roberson 	return (bucket);
345cae33c14SJeff Roberson }
346cae33c14SJeff Roberson 
347cae33c14SJeff Roberson static void
348cae33c14SJeff Roberson bucket_free(uma_bucket_t bucket)
349cae33c14SJeff Roberson {
350cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
351cae33c14SJeff Roberson 
352dc2c7965SRobert Watson 	ubz = bucket_zone_lookup(bucket->ub_entries);
353e20a199fSJeff Roberson 	zone_free_item(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
354f4ff923bSRobert Watson 	    ZFREE_STATFREE);
355cae33c14SJeff Roberson }
356cae33c14SJeff Roberson 
357cae33c14SJeff Roberson static void
358cae33c14SJeff Roberson bucket_zone_drain(void)
359cae33c14SJeff Roberson {
360cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
361cae33c14SJeff Roberson 
362cae33c14SJeff Roberson 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
363cae33c14SJeff Roberson 		zone_drain(ubz->ubz_zone);
364cae33c14SJeff Roberson }
365cae33c14SJeff Roberson 
366e20a199fSJeff Roberson static inline uma_keg_t
367e20a199fSJeff Roberson zone_first_keg(uma_zone_t zone)
368e20a199fSJeff Roberson {
369e20a199fSJeff Roberson 
370e20a199fSJeff Roberson 	return (LIST_FIRST(&zone->uz_kegs)->kl_keg);
371e20a199fSJeff Roberson }
372e20a199fSJeff Roberson 
373e20a199fSJeff Roberson static void
374e20a199fSJeff Roberson zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
375e20a199fSJeff Roberson {
376e20a199fSJeff Roberson 	uma_klink_t klink;
377e20a199fSJeff Roberson 
378e20a199fSJeff Roberson 	LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
379e20a199fSJeff Roberson 		kegfn(klink->kl_keg);
380e20a199fSJeff Roberson }
3818355f576SJeff Roberson 
3828355f576SJeff Roberson /*
3838355f576SJeff Roberson  * Routine called by timeout which is used to fire off some time interval
3849643769aSJeff Roberson  * based calculations.  (stats, hash size, etc.)
3858355f576SJeff Roberson  *
3868355f576SJeff Roberson  * Arguments:
3878355f576SJeff Roberson  *	arg   Unused
3888355f576SJeff Roberson  *
3898355f576SJeff Roberson  * Returns:
3908355f576SJeff Roberson  *	Nothing
3918355f576SJeff Roberson  */
3928355f576SJeff Roberson static void
3938355f576SJeff Roberson uma_timeout(void *unused)
3948355f576SJeff Roberson {
39586bbae32SJeff Roberson 	bucket_enable();
3968355f576SJeff Roberson 	zone_foreach(zone_timeout);
3978355f576SJeff Roberson 
3988355f576SJeff Roberson 	/* Reschedule this event */
3999643769aSJeff Roberson 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
4008355f576SJeff Roberson }
4018355f576SJeff Roberson 
4028355f576SJeff Roberson /*
4039643769aSJeff Roberson  * Routine to perform timeout driven calculations.  This expands the
4049643769aSJeff Roberson  * hashes and does per cpu statistics aggregation.
4058355f576SJeff Roberson  *
406e20a199fSJeff Roberson  *  Returns nothing.
4078355f576SJeff Roberson  */
4088355f576SJeff Roberson static void
409e20a199fSJeff Roberson keg_timeout(uma_keg_t keg)
4108355f576SJeff Roberson {
4118355f576SJeff Roberson 
412e20a199fSJeff Roberson 	KEG_LOCK(keg);
4138355f576SJeff Roberson 	/*
414e20a199fSJeff Roberson 	 * Expand the keg hash table.
4158355f576SJeff Roberson 	 *
4168355f576SJeff Roberson 	 * This is done if the number of slabs is larger than the hash size.
4178355f576SJeff Roberson 	 * What I'm trying to do here is completely reduce collisions.  This
4188355f576SJeff Roberson 	 * may be a little aggressive.  Should I allow for two collisions max?
4198355f576SJeff Roberson 	 */
420099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_HASH &&
421099a0e58SBosko Milekic 	    keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
4220aef6126SJeff Roberson 		struct uma_hash newhash;
4230aef6126SJeff Roberson 		struct uma_hash oldhash;
4240aef6126SJeff Roberson 		int ret;
4255300d9ddSJeff Roberson 
4260aef6126SJeff Roberson 		/*
4270aef6126SJeff Roberson 		 * This is so involved because allocating and freeing
428e20a199fSJeff Roberson 		 * while the keg lock is held will lead to deadlock.
4290aef6126SJeff Roberson 		 * I have to do everything in stages and check for
4300aef6126SJeff Roberson 		 * races.
4310aef6126SJeff Roberson 		 */
432099a0e58SBosko Milekic 		newhash = keg->uk_hash;
433e20a199fSJeff Roberson 		KEG_UNLOCK(keg);
4340aef6126SJeff Roberson 		ret = hash_alloc(&newhash);
435e20a199fSJeff Roberson 		KEG_LOCK(keg);
4360aef6126SJeff Roberson 		if (ret) {
437099a0e58SBosko Milekic 			if (hash_expand(&keg->uk_hash, &newhash)) {
438099a0e58SBosko Milekic 				oldhash = keg->uk_hash;
439099a0e58SBosko Milekic 				keg->uk_hash = newhash;
4400aef6126SJeff Roberson 			} else
4410aef6126SJeff Roberson 				oldhash = newhash;
4420aef6126SJeff Roberson 
443e20a199fSJeff Roberson 			KEG_UNLOCK(keg);
4440aef6126SJeff Roberson 			hash_free(&oldhash);
445e20a199fSJeff Roberson 			KEG_LOCK(keg);
4460aef6126SJeff Roberson 		}
4475300d9ddSJeff Roberson 	}
448e20a199fSJeff Roberson 	KEG_UNLOCK(keg);
449e20a199fSJeff Roberson }
450e20a199fSJeff Roberson 
451e20a199fSJeff Roberson static void
452e20a199fSJeff Roberson zone_timeout(uma_zone_t zone)
453e20a199fSJeff Roberson {
454e20a199fSJeff Roberson 
455e20a199fSJeff Roberson 	zone_foreach_keg(zone, &keg_timeout);
4568355f576SJeff Roberson }
4578355f576SJeff Roberson 
4588355f576SJeff Roberson /*
4595300d9ddSJeff Roberson  * Allocate and zero fill the next sized hash table from the appropriate
4605300d9ddSJeff Roberson  * backing store.
4615300d9ddSJeff Roberson  *
4625300d9ddSJeff Roberson  * Arguments:
4630aef6126SJeff Roberson  *	hash  A new hash structure with the old hash size in uh_hashsize
4645300d9ddSJeff Roberson  *
4655300d9ddSJeff Roberson  * Returns:
4660aef6126SJeff Roberson  *	1 on sucess and 0 on failure.
4675300d9ddSJeff Roberson  */
46837c84183SPoul-Henning Kamp static int
4690aef6126SJeff Roberson hash_alloc(struct uma_hash *hash)
4705300d9ddSJeff Roberson {
4710aef6126SJeff Roberson 	int oldsize;
4725300d9ddSJeff Roberson 	int alloc;
4735300d9ddSJeff Roberson 
4740aef6126SJeff Roberson 	oldsize = hash->uh_hashsize;
4750aef6126SJeff Roberson 
4765300d9ddSJeff Roberson 	/* We're just going to go to a power of two greater */
4770aef6126SJeff Roberson 	if (oldsize)  {
4780aef6126SJeff Roberson 		hash->uh_hashsize = oldsize * 2;
4790aef6126SJeff Roberson 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
4800aef6126SJeff Roberson 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
481961647dfSJeff Roberson 		    M_UMAHASH, M_NOWAIT);
4825300d9ddSJeff Roberson 	} else {
4830aef6126SJeff Roberson 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
484e20a199fSJeff Roberson 		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
485a163d034SWarner Losh 		    M_WAITOK);
4860aef6126SJeff Roberson 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
4875300d9ddSJeff Roberson 	}
4880aef6126SJeff Roberson 	if (hash->uh_slab_hash) {
4890aef6126SJeff Roberson 		bzero(hash->uh_slab_hash, alloc);
4900aef6126SJeff Roberson 		hash->uh_hashmask = hash->uh_hashsize - 1;
4910aef6126SJeff Roberson 		return (1);
4920aef6126SJeff Roberson 	}
4935300d9ddSJeff Roberson 
4940aef6126SJeff Roberson 	return (0);
4955300d9ddSJeff Roberson }
4965300d9ddSJeff Roberson 
4975300d9ddSJeff Roberson /*
49864f051e9SJeff Roberson  * Expands the hash table for HASH zones.  This is done from zone_timeout
49964f051e9SJeff Roberson  * to reduce collisions.  This must not be done in the regular allocation
50064f051e9SJeff Roberson  * path, otherwise, we can recurse on the vm while allocating pages.
5018355f576SJeff Roberson  *
5028355f576SJeff Roberson  * Arguments:
5030aef6126SJeff Roberson  *	oldhash  The hash you want to expand
5040aef6126SJeff Roberson  *	newhash  The hash structure for the new table
5058355f576SJeff Roberson  *
5068355f576SJeff Roberson  * Returns:
5078355f576SJeff Roberson  *	Nothing
5088355f576SJeff Roberson  *
5098355f576SJeff Roberson  * Discussion:
5108355f576SJeff Roberson  */
5110aef6126SJeff Roberson static int
5120aef6126SJeff Roberson hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
5138355f576SJeff Roberson {
5148355f576SJeff Roberson 	uma_slab_t slab;
5158355f576SJeff Roberson 	int hval;
5168355f576SJeff Roberson 	int i;
5178355f576SJeff Roberson 
5180aef6126SJeff Roberson 	if (!newhash->uh_slab_hash)
5190aef6126SJeff Roberson 		return (0);
5208355f576SJeff Roberson 
5210aef6126SJeff Roberson 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
5220aef6126SJeff Roberson 		return (0);
5238355f576SJeff Roberson 
5248355f576SJeff Roberson 	/*
5258355f576SJeff Roberson 	 * I need to investigate hash algorithms for resizing without a
5268355f576SJeff Roberson 	 * full rehash.
5278355f576SJeff Roberson 	 */
5288355f576SJeff Roberson 
5290aef6126SJeff Roberson 	for (i = 0; i < oldhash->uh_hashsize; i++)
5300aef6126SJeff Roberson 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
5310aef6126SJeff Roberson 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
5320aef6126SJeff Roberson 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
5330aef6126SJeff Roberson 			hval = UMA_HASH(newhash, slab->us_data);
5340aef6126SJeff Roberson 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
5350aef6126SJeff Roberson 			    slab, us_hlink);
5368355f576SJeff Roberson 		}
5378355f576SJeff Roberson 
5380aef6126SJeff Roberson 	return (1);
5399c2cd7e5SJeff Roberson }
5409c2cd7e5SJeff Roberson 
5415300d9ddSJeff Roberson /*
5425300d9ddSJeff Roberson  * Free the hash bucket to the appropriate backing store.
5435300d9ddSJeff Roberson  *
5445300d9ddSJeff Roberson  * Arguments:
5455300d9ddSJeff Roberson  *	slab_hash  The hash bucket we're freeing
5465300d9ddSJeff Roberson  *	hashsize   The number of entries in that hash bucket
5475300d9ddSJeff Roberson  *
5485300d9ddSJeff Roberson  * Returns:
5495300d9ddSJeff Roberson  *	Nothing
5505300d9ddSJeff Roberson  */
5519c2cd7e5SJeff Roberson static void
5520aef6126SJeff Roberson hash_free(struct uma_hash *hash)
5539c2cd7e5SJeff Roberson {
5540aef6126SJeff Roberson 	if (hash->uh_slab_hash == NULL)
5550aef6126SJeff Roberson 		return;
5560aef6126SJeff Roberson 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
557e20a199fSJeff Roberson 		zone_free_item(hashzone,
558f4ff923bSRobert Watson 		    hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
5598355f576SJeff Roberson 	else
560961647dfSJeff Roberson 		free(hash->uh_slab_hash, M_UMAHASH);
5618355f576SJeff Roberson }
5628355f576SJeff Roberson 
5638355f576SJeff Roberson /*
5648355f576SJeff Roberson  * Frees all outstanding items in a bucket
5658355f576SJeff Roberson  *
5668355f576SJeff Roberson  * Arguments:
5678355f576SJeff Roberson  *	zone   The zone to free to, must be unlocked.
5688355f576SJeff Roberson  *	bucket The free/alloc bucket with items, cpu queue must be locked.
5698355f576SJeff Roberson  *
5708355f576SJeff Roberson  * Returns:
5718355f576SJeff Roberson  *	Nothing
5728355f576SJeff Roberson  */
5738355f576SJeff Roberson 
5748355f576SJeff Roberson static void
5758355f576SJeff Roberson bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
5768355f576SJeff Roberson {
5778355f576SJeff Roberson 	void *item;
5788355f576SJeff Roberson 
5798355f576SJeff Roberson 	if (bucket == NULL)
5808355f576SJeff Roberson 		return;
5818355f576SJeff Roberson 
582cae33c14SJeff Roberson 	while (bucket->ub_cnt > 0)  {
583cae33c14SJeff Roberson 		bucket->ub_cnt--;
584cae33c14SJeff Roberson 		item = bucket->ub_bucket[bucket->ub_cnt];
5858355f576SJeff Roberson #ifdef INVARIANTS
586cae33c14SJeff Roberson 		bucket->ub_bucket[bucket->ub_cnt] = NULL;
5878355f576SJeff Roberson 		KASSERT(item != NULL,
5888355f576SJeff Roberson 		    ("bucket_drain: botched ptr, item is NULL"));
5898355f576SJeff Roberson #endif
590e20a199fSJeff Roberson 		zone_free_item(zone, item, NULL, SKIP_DTOR, 0);
5918355f576SJeff Roberson 	}
5928355f576SJeff Roberson }
5938355f576SJeff Roberson 
5948355f576SJeff Roberson /*
5958355f576SJeff Roberson  * Drains the per cpu caches for a zone.
5968355f576SJeff Roberson  *
5975d1ae027SRobert Watson  * NOTE: This may only be called while the zone is being turn down, and not
5985d1ae027SRobert Watson  * during normal operation.  This is necessary in order that we do not have
5995d1ae027SRobert Watson  * to migrate CPUs to drain the per-CPU caches.
6005d1ae027SRobert Watson  *
6018355f576SJeff Roberson  * Arguments:
6028355f576SJeff Roberson  *	zone     The zone to drain, must be unlocked.
6038355f576SJeff Roberson  *
6048355f576SJeff Roberson  * Returns:
6058355f576SJeff Roberson  *	Nothing
6068355f576SJeff Roberson  */
6078355f576SJeff Roberson static void
6089643769aSJeff Roberson cache_drain(uma_zone_t zone)
6098355f576SJeff Roberson {
6108355f576SJeff Roberson 	uma_cache_t cache;
6118355f576SJeff Roberson 	int cpu;
6128355f576SJeff Roberson 
6138355f576SJeff Roberson 	/*
6145d1ae027SRobert Watson 	 * XXX: It is safe to not lock the per-CPU caches, because we're
6155d1ae027SRobert Watson 	 * tearing down the zone anyway.  I.e., there will be no further use
6165d1ae027SRobert Watson 	 * of the caches at this point.
6175d1ae027SRobert Watson 	 *
6185d1ae027SRobert Watson 	 * XXX: It would good to be able to assert that the zone is being
6195d1ae027SRobert Watson 	 * torn down to prevent improper use of cache_drain().
6205d1ae027SRobert Watson 	 *
6215d1ae027SRobert Watson 	 * XXX: We lock the zone before passing into bucket_cache_drain() as
6225d1ae027SRobert Watson 	 * it is used elsewhere.  Should the tear-down path be made special
6235d1ae027SRobert Watson 	 * there in some form?
6248355f576SJeff Roberson 	 */
6253aa6d94eSJohn Baldwin 	CPU_FOREACH(cpu) {
6268355f576SJeff Roberson 		cache = &zone->uz_cpu[cpu];
6278355f576SJeff Roberson 		bucket_drain(zone, cache->uc_allocbucket);
6288355f576SJeff Roberson 		bucket_drain(zone, cache->uc_freebucket);
629174ab450SBosko Milekic 		if (cache->uc_allocbucket != NULL)
630cae33c14SJeff Roberson 			bucket_free(cache->uc_allocbucket);
631174ab450SBosko Milekic 		if (cache->uc_freebucket != NULL)
632cae33c14SJeff Roberson 			bucket_free(cache->uc_freebucket);
633d56368d7SBosko Milekic 		cache->uc_allocbucket = cache->uc_freebucket = NULL;
634d56368d7SBosko Milekic 	}
635aaa8bb16SJeff Roberson 	ZONE_LOCK(zone);
636aaa8bb16SJeff Roberson 	bucket_cache_drain(zone);
637aaa8bb16SJeff Roberson 	ZONE_UNLOCK(zone);
638aaa8bb16SJeff Roberson }
639aaa8bb16SJeff Roberson 
640aaa8bb16SJeff Roberson /*
641aaa8bb16SJeff Roberson  * Drain the cached buckets from a zone.  Expects a locked zone on entry.
642aaa8bb16SJeff Roberson  */
643aaa8bb16SJeff Roberson static void
644aaa8bb16SJeff Roberson bucket_cache_drain(uma_zone_t zone)
645aaa8bb16SJeff Roberson {
646aaa8bb16SJeff Roberson 	uma_bucket_t bucket;
6478355f576SJeff Roberson 
6488355f576SJeff Roberson 	/*
6498355f576SJeff Roberson 	 * Drain the bucket queues and free the buckets, we just keep two per
6508355f576SJeff Roberson 	 * cpu (alloc/free).
6518355f576SJeff Roberson 	 */
6528355f576SJeff Roberson 	while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
6538355f576SJeff Roberson 		LIST_REMOVE(bucket, ub_link);
6548355f576SJeff Roberson 		ZONE_UNLOCK(zone);
6558355f576SJeff Roberson 		bucket_drain(zone, bucket);
656cae33c14SJeff Roberson 		bucket_free(bucket);
6578355f576SJeff Roberson 		ZONE_LOCK(zone);
6588355f576SJeff Roberson 	}
6598355f576SJeff Roberson 
6608355f576SJeff Roberson 	/* Now we do the free queue.. */
6618355f576SJeff Roberson 	while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
6628355f576SJeff Roberson 		LIST_REMOVE(bucket, ub_link);
663cae33c14SJeff Roberson 		bucket_free(bucket);
6648355f576SJeff Roberson 	}
6658355f576SJeff Roberson }
6668355f576SJeff Roberson 
6678355f576SJeff Roberson /*
668e20a199fSJeff Roberson  * Frees pages from a keg back to the system.  This is done on demand from
6698355f576SJeff Roberson  * the pageout daemon.
6708355f576SJeff Roberson  *
671e20a199fSJeff Roberson  * Returns nothing.
6728355f576SJeff Roberson  */
673e20a199fSJeff Roberson static void
674e20a199fSJeff Roberson keg_drain(uma_keg_t keg)
6758355f576SJeff Roberson {
6761e183df2SStefan Farfeleder 	struct slabhead freeslabs = { 0 };
6778355f576SJeff Roberson 	uma_slab_t slab;
6788355f576SJeff Roberson 	uma_slab_t n;
6798355f576SJeff Roberson 	u_int8_t flags;
6808355f576SJeff Roberson 	u_int8_t *mem;
6818355f576SJeff Roberson 	int i;
6828355f576SJeff Roberson 
6838355f576SJeff Roberson 	/*
684e20a199fSJeff Roberson 	 * We don't want to take pages from statically allocated kegs at this
6858355f576SJeff Roberson 	 * time
6868355f576SJeff Roberson 	 */
687099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
6888355f576SJeff Roberson 		return;
6898355f576SJeff Roberson 
6908355f576SJeff Roberson #ifdef UMA_DEBUG
691e20a199fSJeff Roberson 	printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
6928355f576SJeff Roberson #endif
693e20a199fSJeff Roberson 	KEG_LOCK(keg);
694099a0e58SBosko Milekic 	if (keg->uk_free == 0)
6958355f576SJeff Roberson 		goto finished;
6968355f576SJeff Roberson 
697099a0e58SBosko Milekic 	slab = LIST_FIRST(&keg->uk_free_slab);
6989643769aSJeff Roberson 	while (slab) {
6998355f576SJeff Roberson 		n = LIST_NEXT(slab, us_link);
7008355f576SJeff Roberson 
7018355f576SJeff Roberson 		/* We have no where to free these to */
7028355f576SJeff Roberson 		if (slab->us_flags & UMA_SLAB_BOOT) {
7038355f576SJeff Roberson 			slab = n;
7048355f576SJeff Roberson 			continue;
7058355f576SJeff Roberson 		}
7068355f576SJeff Roberson 
7078355f576SJeff Roberson 		LIST_REMOVE(slab, us_link);
708099a0e58SBosko Milekic 		keg->uk_pages -= keg->uk_ppera;
709099a0e58SBosko Milekic 		keg->uk_free -= keg->uk_ipers;
710713deb36SJeff Roberson 
711099a0e58SBosko Milekic 		if (keg->uk_flags & UMA_ZONE_HASH)
712099a0e58SBosko Milekic 			UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
713713deb36SJeff Roberson 
714713deb36SJeff Roberson 		SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
715713deb36SJeff Roberson 
716713deb36SJeff Roberson 		slab = n;
717713deb36SJeff Roberson 	}
718713deb36SJeff Roberson finished:
719e20a199fSJeff Roberson 	KEG_UNLOCK(keg);
720713deb36SJeff Roberson 
721713deb36SJeff Roberson 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
722713deb36SJeff Roberson 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
723099a0e58SBosko Milekic 		if (keg->uk_fini)
724099a0e58SBosko Milekic 			for (i = 0; i < keg->uk_ipers; i++)
725099a0e58SBosko Milekic 				keg->uk_fini(
726099a0e58SBosko Milekic 				    slab->us_data + (keg->uk_rsize * i),
727099a0e58SBosko Milekic 				    keg->uk_size);
7288355f576SJeff Roberson 		flags = slab->us_flags;
7298355f576SJeff Roberson 		mem = slab->us_data;
73099571dc3SJeff Roberson 
731e20a199fSJeff Roberson 		if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
732b70458aeSAlan Cox 			vm_object_t obj;
733b70458aeSAlan Cox 
734b70458aeSAlan Cox 			if (flags & UMA_SLAB_KMEM)
735b70458aeSAlan Cox 				obj = kmem_object;
7367630c265SAlan Cox 			else if (flags & UMA_SLAB_KERNEL)
7377630c265SAlan Cox 				obj = kernel_object;
738b70458aeSAlan Cox 			else
739b70458aeSAlan Cox 				obj = NULL;
740099a0e58SBosko Milekic 			for (i = 0; i < keg->uk_ppera; i++)
74199571dc3SJeff Roberson 				vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
742b70458aeSAlan Cox 				    obj);
74348eea375SJeff Roberson 		}
744099a0e58SBosko Milekic 		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
745e20a199fSJeff Roberson 			zone_free_item(keg->uk_slabzone, slab, NULL,
746f4ff923bSRobert Watson 			    SKIP_NONE, ZFREE_STATFREE);
7478355f576SJeff Roberson #ifdef UMA_DEBUG
7488355f576SJeff Roberson 		printf("%s: Returning %d bytes.\n",
749e20a199fSJeff Roberson 		    keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
7508355f576SJeff Roberson #endif
751099a0e58SBosko Milekic 		keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
7528355f576SJeff Roberson 	}
7538355f576SJeff Roberson }
7548355f576SJeff Roberson 
755e20a199fSJeff Roberson static void
756e20a199fSJeff Roberson zone_drain_wait(uma_zone_t zone, int waitok)
757e20a199fSJeff Roberson {
758e20a199fSJeff Roberson 
7598355f576SJeff Roberson 	/*
760e20a199fSJeff Roberson 	 * Set draining to interlock with zone_dtor() so we can release our
761e20a199fSJeff Roberson 	 * locks as we go.  Only dtor() should do a WAITOK call since it
762e20a199fSJeff Roberson 	 * is the only call that knows the structure will still be available
763e20a199fSJeff Roberson 	 * when it wakes up.
764e20a199fSJeff Roberson 	 */
765e20a199fSJeff Roberson 	ZONE_LOCK(zone);
766e20a199fSJeff Roberson 	while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
767e20a199fSJeff Roberson 		if (waitok == M_NOWAIT)
768e20a199fSJeff Roberson 			goto out;
769e20a199fSJeff Roberson 		mtx_unlock(&uma_mtx);
770e20a199fSJeff Roberson 		msleep(zone, zone->uz_lock, PVM, "zonedrain", 1);
771e20a199fSJeff Roberson 		mtx_lock(&uma_mtx);
772e20a199fSJeff Roberson 	}
773e20a199fSJeff Roberson 	zone->uz_flags |= UMA_ZFLAG_DRAINING;
774e20a199fSJeff Roberson 	bucket_cache_drain(zone);
775e20a199fSJeff Roberson 	ZONE_UNLOCK(zone);
776e20a199fSJeff Roberson 	/*
777e20a199fSJeff Roberson 	 * The DRAINING flag protects us from being freed while
778e20a199fSJeff Roberson 	 * we're running.  Normally the uma_mtx would protect us but we
779e20a199fSJeff Roberson 	 * must be able to release and acquire the right lock for each keg.
780e20a199fSJeff Roberson 	 */
781e20a199fSJeff Roberson 	zone_foreach_keg(zone, &keg_drain);
782e20a199fSJeff Roberson 	ZONE_LOCK(zone);
783e20a199fSJeff Roberson 	zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
784e20a199fSJeff Roberson 	wakeup(zone);
785e20a199fSJeff Roberson out:
786e20a199fSJeff Roberson 	ZONE_UNLOCK(zone);
787e20a199fSJeff Roberson }
788e20a199fSJeff Roberson 
789e20a199fSJeff Roberson void
790e20a199fSJeff Roberson zone_drain(uma_zone_t zone)
791e20a199fSJeff Roberson {
792e20a199fSJeff Roberson 
793e20a199fSJeff Roberson 	zone_drain_wait(zone, M_NOWAIT);
794e20a199fSJeff Roberson }
795e20a199fSJeff Roberson 
796e20a199fSJeff Roberson /*
797e20a199fSJeff Roberson  * Allocate a new slab for a keg.  This does not insert the slab onto a list.
7988355f576SJeff Roberson  *
7998355f576SJeff Roberson  * Arguments:
8008355f576SJeff Roberson  *	wait  Shall we wait?
8018355f576SJeff Roberson  *
8028355f576SJeff Roberson  * Returns:
8038355f576SJeff Roberson  *	The slab that was allocated or NULL if there is no memory and the
8048355f576SJeff Roberson  *	caller specified M_NOWAIT.
8058355f576SJeff Roberson  */
8068355f576SJeff Roberson static uma_slab_t
807e20a199fSJeff Roberson keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
8088355f576SJeff Roberson {
809099a0e58SBosko Milekic 	uma_slabrefcnt_t slabref;
810e20a199fSJeff Roberson 	uma_alloc allocf;
811099a0e58SBosko Milekic 	uma_slab_t slab;
8128355f576SJeff Roberson 	u_int8_t *mem;
8138355f576SJeff Roberson 	u_int8_t flags;
8148355f576SJeff Roberson 	int i;
8158355f576SJeff Roberson 
816e20a199fSJeff Roberson 	mtx_assert(&keg->uk_lock, MA_OWNED);
817a553d4b8SJeff Roberson 	slab = NULL;
818a553d4b8SJeff Roberson 
8198355f576SJeff Roberson #ifdef UMA_DEBUG
820e20a199fSJeff Roberson 	printf("slab_zalloc:  Allocating a new slab for %s\n", keg->uk_name);
8218355f576SJeff Roberson #endif
822e20a199fSJeff Roberson 	allocf = keg->uk_allocf;
823e20a199fSJeff Roberson 	KEG_UNLOCK(keg);
824a553d4b8SJeff Roberson 
825099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
826e20a199fSJeff Roberson 		slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
827a553d4b8SJeff Roberson 		if (slab == NULL) {
828e20a199fSJeff Roberson 			KEG_LOCK(keg);
829a553d4b8SJeff Roberson 			return NULL;
830a553d4b8SJeff Roberson 		}
831a553d4b8SJeff Roberson 	}
832a553d4b8SJeff Roberson 
8333370c5bfSJeff Roberson 	/*
8343370c5bfSJeff Roberson 	 * This reproduces the old vm_zone behavior of zero filling pages the
8353370c5bfSJeff Roberson 	 * first time they are added to a zone.
8363370c5bfSJeff Roberson 	 *
8373370c5bfSJeff Roberson 	 * Malloced items are zeroed in uma_zalloc.
8383370c5bfSJeff Roberson 	 */
8393370c5bfSJeff Roberson 
840099a0e58SBosko Milekic 	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
8413370c5bfSJeff Roberson 		wait |= M_ZERO;
8423370c5bfSJeff Roberson 	else
8433370c5bfSJeff Roberson 		wait &= ~M_ZERO;
8443370c5bfSJeff Roberson 
845263811f7SKip Macy 	if (keg->uk_flags & UMA_ZONE_NODUMP)
846263811f7SKip Macy 		wait |= M_NODUMP;
847263811f7SKip Macy 
848e20a199fSJeff Roberson 	/* zone is passed for legacy reasons. */
849e20a199fSJeff Roberson 	mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
850a553d4b8SJeff Roberson 	if (mem == NULL) {
851b23f72e9SBrian Feldman 		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
852e20a199fSJeff Roberson 			zone_free_item(keg->uk_slabzone, slab, NULL,
853f4ff923bSRobert Watson 			    SKIP_NONE, ZFREE_STATFREE);
854e20a199fSJeff Roberson 		KEG_LOCK(keg);
8558355f576SJeff Roberson 		return (NULL);
856a553d4b8SJeff Roberson 	}
8578355f576SJeff Roberson 
8585c0e403bSJeff Roberson 	/* Point the slab into the allocated memory */
859099a0e58SBosko Milekic 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
860099a0e58SBosko Milekic 		slab = (uma_slab_t )(mem + keg->uk_pgoff);
8615c0e403bSJeff Roberson 
862e20a199fSJeff Roberson 	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
863099a0e58SBosko Milekic 		for (i = 0; i < keg->uk_ppera; i++)
86499571dc3SJeff Roberson 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
8658355f576SJeff Roberson 
866099a0e58SBosko Milekic 	slab->us_keg = keg;
8678355f576SJeff Roberson 	slab->us_data = mem;
868099a0e58SBosko Milekic 	slab->us_freecount = keg->uk_ipers;
8698355f576SJeff Roberson 	slab->us_firstfree = 0;
8708355f576SJeff Roberson 	slab->us_flags = flags;
8718355f576SJeff Roberson 
872099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_REFCNT) {
873099a0e58SBosko Milekic 		slabref = (uma_slabrefcnt_t)slab;
874ab14a3f7SBrian Feldman 		for (i = 0; i < keg->uk_ipers; i++) {
875099a0e58SBosko Milekic 			slabref->us_freelist[i].us_refcnt = 0;
876ab14a3f7SBrian Feldman 			slabref->us_freelist[i].us_item = i+1;
877ab14a3f7SBrian Feldman 		}
878ab14a3f7SBrian Feldman 	} else {
879ab14a3f7SBrian Feldman 		for (i = 0; i < keg->uk_ipers; i++)
880ab14a3f7SBrian Feldman 			slab->us_freelist[i].us_item = i+1;
881099a0e58SBosko Milekic 	}
882099a0e58SBosko Milekic 
883b23f72e9SBrian Feldman 	if (keg->uk_init != NULL) {
884099a0e58SBosko Milekic 		for (i = 0; i < keg->uk_ipers; i++)
885b23f72e9SBrian Feldman 			if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
886b23f72e9SBrian Feldman 			    keg->uk_size, wait) != 0)
887b23f72e9SBrian Feldman 				break;
888b23f72e9SBrian Feldman 		if (i != keg->uk_ipers) {
889b23f72e9SBrian Feldman 			if (keg->uk_fini != NULL) {
890b23f72e9SBrian Feldman 				for (i--; i > -1; i--)
891b23f72e9SBrian Feldman 					keg->uk_fini(slab->us_data +
892b23f72e9SBrian Feldman 					    (keg->uk_rsize * i),
893099a0e58SBosko Milekic 					    keg->uk_size);
894b23f72e9SBrian Feldman 			}
895e20a199fSJeff Roberson 			if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
896b70458aeSAlan Cox 				vm_object_t obj;
897b70458aeSAlan Cox 
898b70458aeSAlan Cox 				if (flags & UMA_SLAB_KMEM)
899b70458aeSAlan Cox 					obj = kmem_object;
9007630c265SAlan Cox 				else if (flags & UMA_SLAB_KERNEL)
9017630c265SAlan Cox 					obj = kernel_object;
902b70458aeSAlan Cox 				else
903b70458aeSAlan Cox 					obj = NULL;
904b23f72e9SBrian Feldman 				for (i = 0; i < keg->uk_ppera; i++)
905b23f72e9SBrian Feldman 					vsetobj((vm_offset_t)mem +
906b70458aeSAlan Cox 					    (i * PAGE_SIZE), obj);
907b70458aeSAlan Cox 			}
908b23f72e9SBrian Feldman 			if (keg->uk_flags & UMA_ZONE_OFFPAGE)
909e20a199fSJeff Roberson 				zone_free_item(keg->uk_slabzone, slab,
910f4ff923bSRobert Watson 				    NULL, SKIP_NONE, ZFREE_STATFREE);
911b23f72e9SBrian Feldman 			keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
912b23f72e9SBrian Feldman 			    flags);
913e20a199fSJeff Roberson 			KEG_LOCK(keg);
914b23f72e9SBrian Feldman 			return (NULL);
915b23f72e9SBrian Feldman 		}
916b23f72e9SBrian Feldman 	}
917e20a199fSJeff Roberson 	KEG_LOCK(keg);
9185c0e403bSJeff Roberson 
919099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_HASH)
920099a0e58SBosko Milekic 		UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
9218355f576SJeff Roberson 
922099a0e58SBosko Milekic 	keg->uk_pages += keg->uk_ppera;
923099a0e58SBosko Milekic 	keg->uk_free += keg->uk_ipers;
9248355f576SJeff Roberson 
9258355f576SJeff Roberson 	return (slab);
9268355f576SJeff Roberson }
9278355f576SJeff Roberson 
9288355f576SJeff Roberson /*
929009b6fcbSJeff Roberson  * This function is intended to be used early on in place of page_alloc() so
930009b6fcbSJeff Roberson  * that we may use the boot time page cache to satisfy allocations before
931009b6fcbSJeff Roberson  * the VM is ready.
932009b6fcbSJeff Roberson  */
933009b6fcbSJeff Roberson static void *
934009b6fcbSJeff Roberson startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
935009b6fcbSJeff Roberson {
936099a0e58SBosko Milekic 	uma_keg_t keg;
937f353d338SAlan Cox 	uma_slab_t tmps;
938e9a069d8SJohn Baldwin 	int pages, check_pages;
939099a0e58SBosko Milekic 
940e20a199fSJeff Roberson 	keg = zone_first_keg(zone);
941e9a069d8SJohn Baldwin 	pages = howmany(bytes, PAGE_SIZE);
942e9a069d8SJohn Baldwin 	check_pages = pages - 1;
943e9a069d8SJohn Baldwin 	KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
944099a0e58SBosko Milekic 
945009b6fcbSJeff Roberson 	/*
946009b6fcbSJeff Roberson 	 * Check our small startup cache to see if it has pages remaining.
947009b6fcbSJeff Roberson 	 */
948f353d338SAlan Cox 	mtx_lock(&uma_boot_pages_mtx);
949e9a069d8SJohn Baldwin 
950e9a069d8SJohn Baldwin 	/* First check if we have enough room. */
951e9a069d8SJohn Baldwin 	tmps = LIST_FIRST(&uma_boot_pages);
952e9a069d8SJohn Baldwin 	while (tmps != NULL && check_pages-- > 0)
953e9a069d8SJohn Baldwin 		tmps = LIST_NEXT(tmps, us_link);
954e9a069d8SJohn Baldwin 	if (tmps != NULL) {
955e9a069d8SJohn Baldwin 		/*
956e9a069d8SJohn Baldwin 		 * It's ok to lose tmps references.  The last one will
957e9a069d8SJohn Baldwin 		 * have tmps->us_data pointing to the start address of
958e9a069d8SJohn Baldwin 		 * "pages" contiguous pages of memory.
959e9a069d8SJohn Baldwin 		 */
960e9a069d8SJohn Baldwin 		while (pages-- > 0) {
961e9a069d8SJohn Baldwin 			tmps = LIST_FIRST(&uma_boot_pages);
962009b6fcbSJeff Roberson 			LIST_REMOVE(tmps, us_link);
963e9a069d8SJohn Baldwin 		}
964f353d338SAlan Cox 		mtx_unlock(&uma_boot_pages_mtx);
965009b6fcbSJeff Roberson 		*pflag = tmps->us_flags;
966009b6fcbSJeff Roberson 		return (tmps->us_data);
967009b6fcbSJeff Roberson 	}
968f353d338SAlan Cox 	mtx_unlock(&uma_boot_pages_mtx);
969342f1793SAlan Cox 	if (booted < UMA_STARTUP2)
9703803b26bSDag-Erling Smørgrav 		panic("UMA: Increase vm.boot_pages");
971009b6fcbSJeff Roberson 	/*
972009b6fcbSJeff Roberson 	 * Now that we've booted reset these users to their real allocator.
973009b6fcbSJeff Roberson 	 */
974009b6fcbSJeff Roberson #ifdef UMA_MD_SMALL_ALLOC
975e9a069d8SJohn Baldwin 	keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
976009b6fcbSJeff Roberson #else
977099a0e58SBosko Milekic 	keg->uk_allocf = page_alloc;
978009b6fcbSJeff Roberson #endif
979099a0e58SBosko Milekic 	return keg->uk_allocf(zone, bytes, pflag, wait);
980009b6fcbSJeff Roberson }
981009b6fcbSJeff Roberson 
982009b6fcbSJeff Roberson /*
9838355f576SJeff Roberson  * Allocates a number of pages from the system
9848355f576SJeff Roberson  *
9858355f576SJeff Roberson  * Arguments:
9868355f576SJeff Roberson  *	bytes  The number of bytes requested
9878355f576SJeff Roberson  *	wait  Shall we wait?
9888355f576SJeff Roberson  *
9898355f576SJeff Roberson  * Returns:
9908355f576SJeff Roberson  *	A pointer to the alloced memory or possibly
9918355f576SJeff Roberson  *	NULL if M_NOWAIT is set.
9928355f576SJeff Roberson  */
9938355f576SJeff Roberson static void *
9948355f576SJeff Roberson page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
9958355f576SJeff Roberson {
9968355f576SJeff Roberson 	void *p;	/* Returned page */
9978355f576SJeff Roberson 
9988355f576SJeff Roberson 	*pflag = UMA_SLAB_KMEM;
9998355f576SJeff Roberson 	p = (void *) kmem_malloc(kmem_map, bytes, wait);
10008355f576SJeff Roberson 
10018355f576SJeff Roberson 	return (p);
10028355f576SJeff Roberson }
10038355f576SJeff Roberson 
10048355f576SJeff Roberson /*
10058355f576SJeff Roberson  * Allocates a number of pages from within an object
10068355f576SJeff Roberson  *
10078355f576SJeff Roberson  * Arguments:
10088355f576SJeff Roberson  *	bytes  The number of bytes requested
10098355f576SJeff Roberson  *	wait   Shall we wait?
10108355f576SJeff Roberson  *
10118355f576SJeff Roberson  * Returns:
10128355f576SJeff Roberson  *	A pointer to the alloced memory or possibly
10138355f576SJeff Roberson  *	NULL if M_NOWAIT is set.
10148355f576SJeff Roberson  */
10158355f576SJeff Roberson static void *
10168355f576SJeff Roberson obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
10178355f576SJeff Roberson {
1018b245ac95SAlan Cox 	vm_object_t object;
1019b245ac95SAlan Cox 	vm_offset_t retkva, zkva;
10208355f576SJeff Roberson 	vm_page_t p;
1021b245ac95SAlan Cox 	int pages, startpages;
1022e20a199fSJeff Roberson 	uma_keg_t keg;
10238355f576SJeff Roberson 
1024e20a199fSJeff Roberson 	keg = zone_first_keg(zone);
1025e20a199fSJeff Roberson 	object = keg->uk_obj;
102655f7c614SArchie Cobbs 	retkva = 0;
10278355f576SJeff Roberson 
10288355f576SJeff Roberson 	/*
102964f051e9SJeff Roberson 	 * This looks a little weird since we're getting one page at a time.
10308355f576SJeff Roberson 	 */
1031b245ac95SAlan Cox 	VM_OBJECT_LOCK(object);
1032b245ac95SAlan Cox 	p = TAILQ_LAST(&object->memq, pglist);
1033b245ac95SAlan Cox 	pages = p != NULL ? p->pindex + 1 : 0;
1034b245ac95SAlan Cox 	startpages = pages;
1035e20a199fSJeff Roberson 	zkva = keg->uk_kva + pages * PAGE_SIZE;
1036b245ac95SAlan Cox 	for (; bytes > 0; bytes -= PAGE_SIZE) {
1037b245ac95SAlan Cox 		p = vm_page_alloc(object, pages,
1038b245ac95SAlan Cox 		    VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
1039b245ac95SAlan Cox 		if (p == NULL) {
1040b245ac95SAlan Cox 			if (pages != startpages)
1041b245ac95SAlan Cox 				pmap_qremove(retkva, pages - startpages);
1042b245ac95SAlan Cox 			while (pages != startpages) {
1043b245ac95SAlan Cox 				pages--;
1044b245ac95SAlan Cox 				p = TAILQ_LAST(&object->memq, pglist);
1045b245ac95SAlan Cox 				vm_page_unwire(p, 0);
1046b245ac95SAlan Cox 				vm_page_free(p);
1047b245ac95SAlan Cox 			}
1048b245ac95SAlan Cox 			retkva = 0;
1049b245ac95SAlan Cox 			goto done;
1050b245ac95SAlan Cox 		}
1051b245ac95SAlan Cox 		pmap_qenter(zkva, &p, 1);
105255f7c614SArchie Cobbs 		if (retkva == 0)
10538355f576SJeff Roberson 			retkva = zkva;
1054b245ac95SAlan Cox 		zkva += PAGE_SIZE;
10558355f576SJeff Roberson 		pages += 1;
10568355f576SJeff Roberson 	}
1057b245ac95SAlan Cox done:
1058b245ac95SAlan Cox 	VM_OBJECT_UNLOCK(object);
10598355f576SJeff Roberson 	*flags = UMA_SLAB_PRIV;
10608355f576SJeff Roberson 
10618355f576SJeff Roberson 	return ((void *)retkva);
10628355f576SJeff Roberson }
10638355f576SJeff Roberson 
10648355f576SJeff Roberson /*
10658355f576SJeff Roberson  * Frees a number of pages to the system
10668355f576SJeff Roberson  *
10678355f576SJeff Roberson  * Arguments:
10688355f576SJeff Roberson  *	mem   A pointer to the memory to be freed
10698355f576SJeff Roberson  *	size  The size of the memory being freed
10708355f576SJeff Roberson  *	flags The original p->us_flags field
10718355f576SJeff Roberson  *
10728355f576SJeff Roberson  * Returns:
10738355f576SJeff Roberson  *	Nothing
10748355f576SJeff Roberson  */
10758355f576SJeff Roberson static void
10768355f576SJeff Roberson page_free(void *mem, int size, u_int8_t flags)
10778355f576SJeff Roberson {
10788355f576SJeff Roberson 	vm_map_t map;
10793370c5bfSJeff Roberson 
10808355f576SJeff Roberson 	if (flags & UMA_SLAB_KMEM)
10818355f576SJeff Roberson 		map = kmem_map;
1082aea6e893SAlan Cox 	else if (flags & UMA_SLAB_KERNEL)
1083aea6e893SAlan Cox 		map = kernel_map;
10848355f576SJeff Roberson 	else
1085aea6e893SAlan Cox 		panic("UMA: page_free used with invalid flags %d", flags);
10868355f576SJeff Roberson 
10878355f576SJeff Roberson 	kmem_free(map, (vm_offset_t)mem, size);
10888355f576SJeff Roberson }
10898355f576SJeff Roberson 
10908355f576SJeff Roberson /*
10918355f576SJeff Roberson  * Zero fill initializer
10928355f576SJeff Roberson  *
10938355f576SJeff Roberson  * Arguments/Returns follow uma_init specifications
10948355f576SJeff Roberson  */
1095b23f72e9SBrian Feldman static int
1096b23f72e9SBrian Feldman zero_init(void *mem, int size, int flags)
10978355f576SJeff Roberson {
10988355f576SJeff Roberson 	bzero(mem, size);
1099b23f72e9SBrian Feldman 	return (0);
11008355f576SJeff Roberson }
11018355f576SJeff Roberson 
11028355f576SJeff Roberson /*
1103e20a199fSJeff Roberson  * Finish creating a small uma keg.  This calculates ipers, and the keg size.
11048355f576SJeff Roberson  *
11058355f576SJeff Roberson  * Arguments
1106e20a199fSJeff Roberson  *	keg  The zone we should initialize
11078355f576SJeff Roberson  *
11088355f576SJeff Roberson  * Returns
11098355f576SJeff Roberson  *	Nothing
11108355f576SJeff Roberson  */
11118355f576SJeff Roberson static void
1112e20a199fSJeff Roberson keg_small_init(uma_keg_t keg)
11138355f576SJeff Roberson {
1114244f4554SBosko Milekic 	u_int rsize;
1115244f4554SBosko Milekic 	u_int memused;
1116244f4554SBosko Milekic 	u_int wastedspace;
1117244f4554SBosko Milekic 	u_int shsize;
11188355f576SJeff Roberson 
1119e20a199fSJeff Roberson 	KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
1120099a0e58SBosko Milekic 	rsize = keg->uk_size;
11218355f576SJeff Roberson 
11228355f576SJeff Roberson 	if (rsize < UMA_SMALLEST_UNIT)
11238355f576SJeff Roberson 		rsize = UMA_SMALLEST_UNIT;
1124099a0e58SBosko Milekic 	if (rsize & keg->uk_align)
1125099a0e58SBosko Milekic 		rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
11268355f576SJeff Roberson 
1127099a0e58SBosko Milekic 	keg->uk_rsize = rsize;
1128099a0e58SBosko Milekic 	keg->uk_ppera = 1;
11298355f576SJeff Roberson 
1130244f4554SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_REFCNT) {
1131244f4554SBosko Milekic 		rsize += UMA_FRITMREF_SZ;	/* linkage & refcnt */
1132244f4554SBosko Milekic 		shsize = sizeof(struct uma_slab_refcnt);
1133244f4554SBosko Milekic 	} else {
1134244f4554SBosko Milekic 		rsize += UMA_FRITM_SZ;	/* Account for linkage */
1135244f4554SBosko Milekic 		shsize = sizeof(struct uma_slab);
1136244f4554SBosko Milekic 	}
11378355f576SJeff Roberson 
1138244f4554SBosko Milekic 	keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1139e20a199fSJeff Roberson 	KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
1140244f4554SBosko Milekic 	memused = keg->uk_ipers * rsize + shsize;
1141244f4554SBosko Milekic 	wastedspace = UMA_SLAB_SIZE - memused;
1142244f4554SBosko Milekic 
114320e8e865SBosko Milekic 	/*
1144244f4554SBosko Milekic 	 * We can't do OFFPAGE if we're internal or if we've been
114520e8e865SBosko Milekic 	 * asked to not go to the VM for buckets.  If we do this we
114620e8e865SBosko Milekic 	 * may end up going to the VM (kmem_map) for slabs which we
114720e8e865SBosko Milekic 	 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
114820e8e865SBosko Milekic 	 * result of UMA_ZONE_VM, which clearly forbids it.
114920e8e865SBosko Milekic 	 */
1150099a0e58SBosko Milekic 	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1151099a0e58SBosko Milekic 	    (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
11528355f576SJeff Roberson 		return;
1153244f4554SBosko Milekic 
1154244f4554SBosko Milekic 	if ((wastedspace >= UMA_MAX_WASTE) &&
1155244f4554SBosko Milekic 	    (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1156244f4554SBosko Milekic 		keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1157244f4554SBosko Milekic 		KASSERT(keg->uk_ipers <= 255,
1158e20a199fSJeff Roberson 		    ("keg_small_init: keg->uk_ipers too high!"));
1159244f4554SBosko Milekic #ifdef UMA_DEBUG
1160244f4554SBosko Milekic 		printf("UMA decided we need offpage slab headers for "
1161e20a199fSJeff Roberson 		    "keg: %s, calculated wastedspace = %d, "
1162244f4554SBosko Milekic 		    "maximum wasted space allowed = %d, "
1163244f4554SBosko Milekic 		    "calculated ipers = %d, "
1164e20a199fSJeff Roberson 		    "new wasted space = %d\n", keg->uk_name, wastedspace,
1165244f4554SBosko Milekic 		    UMA_MAX_WASTE, keg->uk_ipers,
1166244f4554SBosko Milekic 		    UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1167244f4554SBosko Milekic #endif
1168099a0e58SBosko Milekic 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
1169e20a199fSJeff Roberson 		if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1170099a0e58SBosko Milekic 			keg->uk_flags |= UMA_ZONE_HASH;
11718355f576SJeff Roberson 	}
11728355f576SJeff Roberson }
11738355f576SJeff Roberson 
11748355f576SJeff Roberson /*
1175e20a199fSJeff Roberson  * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
11768355f576SJeff Roberson  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
11778355f576SJeff Roberson  * more complicated.
11788355f576SJeff Roberson  *
11798355f576SJeff Roberson  * Arguments
1180e20a199fSJeff Roberson  *	keg  The keg we should initialize
11818355f576SJeff Roberson  *
11828355f576SJeff Roberson  * Returns
11838355f576SJeff Roberson  *	Nothing
11848355f576SJeff Roberson  */
11858355f576SJeff Roberson static void
1186e20a199fSJeff Roberson keg_large_init(uma_keg_t keg)
11878355f576SJeff Roberson {
11888355f576SJeff Roberson 	int pages;
11898355f576SJeff Roberson 
1190e20a199fSJeff Roberson 	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1191099a0e58SBosko Milekic 	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1192e20a199fSJeff Roberson 	    ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
119320e8e865SBosko Milekic 
1194099a0e58SBosko Milekic 	pages = keg->uk_size / UMA_SLAB_SIZE;
11958355f576SJeff Roberson 
11968355f576SJeff Roberson 	/* Account for remainder */
1197099a0e58SBosko Milekic 	if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
11988355f576SJeff Roberson 		pages++;
11998355f576SJeff Roberson 
1200099a0e58SBosko Milekic 	keg->uk_ppera = pages;
1201099a0e58SBosko Milekic 	keg->uk_ipers = 1;
1202e9a069d8SJohn Baldwin 	keg->uk_rsize = keg->uk_size;
1203e9a069d8SJohn Baldwin 
1204e9a069d8SJohn Baldwin 	/* We can't do OFFPAGE if we're internal, bail out here. */
1205e9a069d8SJohn Baldwin 	if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1206e9a069d8SJohn Baldwin 		return;
12078355f576SJeff Roberson 
1208099a0e58SBosko Milekic 	keg->uk_flags |= UMA_ZONE_OFFPAGE;
1209e20a199fSJeff Roberson 	if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1210099a0e58SBosko Milekic 		keg->uk_flags |= UMA_ZONE_HASH;
12118355f576SJeff Roberson }
12128355f576SJeff Roberson 
1213e20a199fSJeff Roberson static void
1214e20a199fSJeff Roberson keg_cachespread_init(uma_keg_t keg)
1215e20a199fSJeff Roberson {
1216e20a199fSJeff Roberson 	int alignsize;
1217e20a199fSJeff Roberson 	int trailer;
1218e20a199fSJeff Roberson 	int pages;
1219e20a199fSJeff Roberson 	int rsize;
1220e20a199fSJeff Roberson 
1221e20a199fSJeff Roberson 	alignsize = keg->uk_align + 1;
1222e20a199fSJeff Roberson 	rsize = keg->uk_size;
1223e20a199fSJeff Roberson 	/*
1224e20a199fSJeff Roberson 	 * We want one item to start on every align boundary in a page.  To
1225e20a199fSJeff Roberson 	 * do this we will span pages.  We will also extend the item by the
1226e20a199fSJeff Roberson 	 * size of align if it is an even multiple of align.  Otherwise, it
1227e20a199fSJeff Roberson 	 * would fall on the same boundary every time.
1228e20a199fSJeff Roberson 	 */
1229e20a199fSJeff Roberson 	if (rsize & keg->uk_align)
1230e20a199fSJeff Roberson 		rsize = (rsize & ~keg->uk_align) + alignsize;
1231e20a199fSJeff Roberson 	if ((rsize & alignsize) == 0)
1232e20a199fSJeff Roberson 		rsize += alignsize;
1233e20a199fSJeff Roberson 	trailer = rsize - keg->uk_size;
1234e20a199fSJeff Roberson 	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1235e20a199fSJeff Roberson 	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1236e20a199fSJeff Roberson 	keg->uk_rsize = rsize;
1237e20a199fSJeff Roberson 	keg->uk_ppera = pages;
1238e20a199fSJeff Roberson 	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1239e20a199fSJeff Roberson 	keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1240e20a199fSJeff Roberson 	KASSERT(keg->uk_ipers <= uma_max_ipers,
1241e20a199fSJeff Roberson 	    ("keg_small_init: keg->uk_ipers too high(%d) increase max_ipers",
1242e20a199fSJeff Roberson 	    keg->uk_ipers));
1243e20a199fSJeff Roberson }
1244e20a199fSJeff Roberson 
12458355f576SJeff Roberson /*
1246099a0e58SBosko Milekic  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1247099a0e58SBosko Milekic  * the keg onto the global keg list.
12488355f576SJeff Roberson  *
12498355f576SJeff Roberson  * Arguments/Returns follow uma_ctor specifications
1250099a0e58SBosko Milekic  *	udata  Actually uma_kctor_args
1251099a0e58SBosko Milekic  */
1252b23f72e9SBrian Feldman static int
1253b23f72e9SBrian Feldman keg_ctor(void *mem, int size, void *udata, int flags)
1254099a0e58SBosko Milekic {
1255099a0e58SBosko Milekic 	struct uma_kctor_args *arg = udata;
1256099a0e58SBosko Milekic 	uma_keg_t keg = mem;
1257099a0e58SBosko Milekic 	uma_zone_t zone;
1258099a0e58SBosko Milekic 
1259099a0e58SBosko Milekic 	bzero(keg, size);
1260099a0e58SBosko Milekic 	keg->uk_size = arg->size;
1261099a0e58SBosko Milekic 	keg->uk_init = arg->uminit;
1262099a0e58SBosko Milekic 	keg->uk_fini = arg->fini;
1263099a0e58SBosko Milekic 	keg->uk_align = arg->align;
1264099a0e58SBosko Milekic 	keg->uk_free = 0;
1265099a0e58SBosko Milekic 	keg->uk_pages = 0;
1266099a0e58SBosko Milekic 	keg->uk_flags = arg->flags;
1267099a0e58SBosko Milekic 	keg->uk_allocf = page_alloc;
1268099a0e58SBosko Milekic 	keg->uk_freef = page_free;
1269099a0e58SBosko Milekic 	keg->uk_recurse = 0;
1270099a0e58SBosko Milekic 	keg->uk_slabzone = NULL;
1271099a0e58SBosko Milekic 
1272099a0e58SBosko Milekic 	/*
1273099a0e58SBosko Milekic 	 * The master zone is passed to us at keg-creation time.
1274099a0e58SBosko Milekic 	 */
1275099a0e58SBosko Milekic 	zone = arg->zone;
1276e20a199fSJeff Roberson 	keg->uk_name = zone->uz_name;
1277099a0e58SBosko Milekic 
1278099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_VM)
1279099a0e58SBosko Milekic 		keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1280099a0e58SBosko Milekic 
1281099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_ZINIT)
1282099a0e58SBosko Milekic 		keg->uk_init = zero_init;
1283099a0e58SBosko Milekic 
1284e20a199fSJeff Roberson 	if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
1285e20a199fSJeff Roberson 		keg->uk_flags |= UMA_ZONE_VTOSLAB;
1286e20a199fSJeff Roberson 
1287099a0e58SBosko Milekic 	/*
1288244f4554SBosko Milekic 	 * The +UMA_FRITM_SZ added to uk_size is to account for the
1289e20a199fSJeff Roberson 	 * linkage that is added to the size in keg_small_init().  If
1290099a0e58SBosko Milekic 	 * we don't account for this here then we may end up in
1291e20a199fSJeff Roberson 	 * keg_small_init() with a calculated 'ipers' of 0.
1292099a0e58SBosko Milekic 	 */
1293244f4554SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_REFCNT) {
1294e20a199fSJeff Roberson 		if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
1295e20a199fSJeff Roberson 			keg_cachespread_init(keg);
1296e20a199fSJeff Roberson 		else if ((keg->uk_size+UMA_FRITMREF_SZ) >
1297244f4554SBosko Milekic 		    (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1298e20a199fSJeff Roberson 			keg_large_init(keg);
1299099a0e58SBosko Milekic 		else
1300e20a199fSJeff Roberson 			keg_small_init(keg);
1301244f4554SBosko Milekic 	} else {
1302e20a199fSJeff Roberson 		if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
1303e20a199fSJeff Roberson 			keg_cachespread_init(keg);
1304e20a199fSJeff Roberson 		else if ((keg->uk_size+UMA_FRITM_SZ) >
1305244f4554SBosko Milekic 		    (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1306e20a199fSJeff Roberson 			keg_large_init(keg);
1307244f4554SBosko Milekic 		else
1308e20a199fSJeff Roberson 			keg_small_init(keg);
1309244f4554SBosko Milekic 	}
1310099a0e58SBosko Milekic 
1311244f4554SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1312099a0e58SBosko Milekic 		if (keg->uk_flags & UMA_ZONE_REFCNT)
1313099a0e58SBosko Milekic 			keg->uk_slabzone = slabrefzone;
1314244f4554SBosko Milekic 		else
1315099a0e58SBosko Milekic 			keg->uk_slabzone = slabzone;
1316244f4554SBosko Milekic 	}
1317099a0e58SBosko Milekic 
1318099a0e58SBosko Milekic 	/*
1319099a0e58SBosko Milekic 	 * If we haven't booted yet we need allocations to go through the
1320099a0e58SBosko Milekic 	 * startup cache until the vm is ready.
1321099a0e58SBosko Milekic 	 */
1322099a0e58SBosko Milekic 	if (keg->uk_ppera == 1) {
1323099a0e58SBosko Milekic #ifdef UMA_MD_SMALL_ALLOC
1324099a0e58SBosko Milekic 		keg->uk_allocf = uma_small_alloc;
1325099a0e58SBosko Milekic 		keg->uk_freef = uma_small_free;
13268cd02d00SAlan Cox 
1327342f1793SAlan Cox 		if (booted < UMA_STARTUP)
1328099a0e58SBosko Milekic 			keg->uk_allocf = startup_alloc;
13298cd02d00SAlan Cox #else
13308cd02d00SAlan Cox 		if (booted < UMA_STARTUP2)
13318cd02d00SAlan Cox 			keg->uk_allocf = startup_alloc;
13328cd02d00SAlan Cox #endif
1333342f1793SAlan Cox 	} else if (booted < UMA_STARTUP2 &&
1334342f1793SAlan Cox 	    (keg->uk_flags & UMA_ZFLAG_INTERNAL))
1335e9a069d8SJohn Baldwin 		keg->uk_allocf = startup_alloc;
1336099a0e58SBosko Milekic 
1337099a0e58SBosko Milekic 	/*
1338e20a199fSJeff Roberson 	 * Initialize keg's lock (shared among zones).
1339099a0e58SBosko Milekic 	 */
1340099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_MTXCLASS)
1341e20a199fSJeff Roberson 		KEG_LOCK_INIT(keg, 1);
1342099a0e58SBosko Milekic 	else
1343e20a199fSJeff Roberson 		KEG_LOCK_INIT(keg, 0);
1344099a0e58SBosko Milekic 
1345099a0e58SBosko Milekic 	/*
1346099a0e58SBosko Milekic 	 * If we're putting the slab header in the actual page we need to
1347099a0e58SBosko Milekic 	 * figure out where in each page it goes.  This calculates a right
1348099a0e58SBosko Milekic 	 * justified offset into the memory on an ALIGN_PTR boundary.
1349099a0e58SBosko Milekic 	 */
1350099a0e58SBosko Milekic 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1351244f4554SBosko Milekic 		u_int totsize;
1352099a0e58SBosko Milekic 
1353099a0e58SBosko Milekic 		/* Size of the slab struct and free list */
1354244f4554SBosko Milekic 		if (keg->uk_flags & UMA_ZONE_REFCNT)
1355244f4554SBosko Milekic 			totsize = sizeof(struct uma_slab_refcnt) +
1356244f4554SBosko Milekic 			    keg->uk_ipers * UMA_FRITMREF_SZ;
1357244f4554SBosko Milekic 		else
1358244f4554SBosko Milekic 			totsize = sizeof(struct uma_slab) +
1359244f4554SBosko Milekic 			    keg->uk_ipers * UMA_FRITM_SZ;
1360244f4554SBosko Milekic 
1361099a0e58SBosko Milekic 		if (totsize & UMA_ALIGN_PTR)
1362099a0e58SBosko Milekic 			totsize = (totsize & ~UMA_ALIGN_PTR) +
1363099a0e58SBosko Milekic 			    (UMA_ALIGN_PTR + 1);
1364e9a069d8SJohn Baldwin 		keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize;
1365244f4554SBosko Milekic 
1366244f4554SBosko Milekic 		if (keg->uk_flags & UMA_ZONE_REFCNT)
1367244f4554SBosko Milekic 			totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1368244f4554SBosko Milekic 			    + keg->uk_ipers * UMA_FRITMREF_SZ;
1369244f4554SBosko Milekic 		else
1370099a0e58SBosko Milekic 			totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1371244f4554SBosko Milekic 			    + keg->uk_ipers * UMA_FRITM_SZ;
1372244f4554SBosko Milekic 
1373244f4554SBosko Milekic 		/*
1374244f4554SBosko Milekic 		 * The only way the following is possible is if with our
1375244f4554SBosko Milekic 		 * UMA_ALIGN_PTR adjustments we are now bigger than
1376244f4554SBosko Milekic 		 * UMA_SLAB_SIZE.  I haven't checked whether this is
1377244f4554SBosko Milekic 		 * mathematically possible for all cases, so we make
1378244f4554SBosko Milekic 		 * sure here anyway.
1379244f4554SBosko Milekic 		 */
1380e9a069d8SJohn Baldwin 		if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) {
1381099a0e58SBosko Milekic 			printf("zone %s ipers %d rsize %d size %d\n",
1382099a0e58SBosko Milekic 			    zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1383099a0e58SBosko Milekic 			    keg->uk_size);
1384aea6e893SAlan Cox 			panic("UMA slab won't fit.");
1385099a0e58SBosko Milekic 		}
1386099a0e58SBosko Milekic 	}
1387099a0e58SBosko Milekic 
1388099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_HASH)
1389099a0e58SBosko Milekic 		hash_alloc(&keg->uk_hash);
1390099a0e58SBosko Milekic 
1391099a0e58SBosko Milekic #ifdef UMA_DEBUG
1392e20a199fSJeff Roberson 	printf("UMA: %s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
1393e20a199fSJeff Roberson 	    zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1394e20a199fSJeff Roberson 	    keg->uk_ipers, keg->uk_ppera,
1395e20a199fSJeff Roberson 	    (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
1396099a0e58SBosko Milekic #endif
1397099a0e58SBosko Milekic 
1398099a0e58SBosko Milekic 	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1399099a0e58SBosko Milekic 
1400099a0e58SBosko Milekic 	mtx_lock(&uma_mtx);
1401099a0e58SBosko Milekic 	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1402099a0e58SBosko Milekic 	mtx_unlock(&uma_mtx);
1403b23f72e9SBrian Feldman 	return (0);
1404099a0e58SBosko Milekic }
1405099a0e58SBosko Milekic 
1406099a0e58SBosko Milekic /*
1407099a0e58SBosko Milekic  * Zone header ctor.  This initializes all fields, locks, etc.
1408099a0e58SBosko Milekic  *
1409099a0e58SBosko Milekic  * Arguments/Returns follow uma_ctor specifications
1410099a0e58SBosko Milekic  *	udata  Actually uma_zctor_args
14118355f576SJeff Roberson  */
1412b23f72e9SBrian Feldman static int
1413b23f72e9SBrian Feldman zone_ctor(void *mem, int size, void *udata, int flags)
14148355f576SJeff Roberson {
14158355f576SJeff Roberson 	struct uma_zctor_args *arg = udata;
14168355f576SJeff Roberson 	uma_zone_t zone = mem;
1417099a0e58SBosko Milekic 	uma_zone_t z;
1418099a0e58SBosko Milekic 	uma_keg_t keg;
14198355f576SJeff Roberson 
14208355f576SJeff Roberson 	bzero(zone, size);
14218355f576SJeff Roberson 	zone->uz_name = arg->name;
14228355f576SJeff Roberson 	zone->uz_ctor = arg->ctor;
14238355f576SJeff Roberson 	zone->uz_dtor = arg->dtor;
1424e20a199fSJeff Roberson 	zone->uz_slab = zone_fetch_slab;
1425099a0e58SBosko Milekic 	zone->uz_init = NULL;
1426099a0e58SBosko Milekic 	zone->uz_fini = NULL;
1427099a0e58SBosko Milekic 	zone->uz_allocs = 0;
1428773df9abSRobert Watson 	zone->uz_frees = 0;
14292019094aSRobert Watson 	zone->uz_fails = 0;
1430bf965959SSean Bruno 	zone->uz_sleeps = 0;
1431099a0e58SBosko Milekic 	zone->uz_fills = zone->uz_count = 0;
1432e20a199fSJeff Roberson 	zone->uz_flags = 0;
1433e20a199fSJeff Roberson 	keg = arg->keg;
1434099a0e58SBosko Milekic 
1435099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_SECONDARY) {
1436099a0e58SBosko Milekic 		KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
14378355f576SJeff Roberson 		zone->uz_init = arg->uminit;
1438e221e841SJeff Roberson 		zone->uz_fini = arg->fini;
1439099a0e58SBosko Milekic 		zone->uz_lock = &keg->uk_lock;
1440e20a199fSJeff Roberson 		zone->uz_flags |= UMA_ZONE_SECONDARY;
14418355f576SJeff Roberson 		mtx_lock(&uma_mtx);
1442099a0e58SBosko Milekic 		ZONE_LOCK(zone);
1443099a0e58SBosko Milekic 		LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1444099a0e58SBosko Milekic 			if (LIST_NEXT(z, uz_link) == NULL) {
1445099a0e58SBosko Milekic 				LIST_INSERT_AFTER(z, zone, uz_link);
1446099a0e58SBosko Milekic 				break;
1447099a0e58SBosko Milekic 			}
1448099a0e58SBosko Milekic 		}
1449099a0e58SBosko Milekic 		ZONE_UNLOCK(zone);
14508355f576SJeff Roberson 		mtx_unlock(&uma_mtx);
1451e20a199fSJeff Roberson 	} else if (keg == NULL) {
1452e20a199fSJeff Roberson 		if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1453e20a199fSJeff Roberson 		    arg->align, arg->flags)) == NULL)
1454b23f72e9SBrian Feldman 			return (ENOMEM);
1455099a0e58SBosko Milekic 	} else {
1456099a0e58SBosko Milekic 		struct uma_kctor_args karg;
1457b23f72e9SBrian Feldman 		int error;
1458099a0e58SBosko Milekic 
1459099a0e58SBosko Milekic 		/* We should only be here from uma_startup() */
1460099a0e58SBosko Milekic 		karg.size = arg->size;
1461099a0e58SBosko Milekic 		karg.uminit = arg->uminit;
1462099a0e58SBosko Milekic 		karg.fini = arg->fini;
1463099a0e58SBosko Milekic 		karg.align = arg->align;
1464099a0e58SBosko Milekic 		karg.flags = arg->flags;
1465099a0e58SBosko Milekic 		karg.zone = zone;
1466b23f72e9SBrian Feldman 		error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1467b23f72e9SBrian Feldman 		    flags);
1468b23f72e9SBrian Feldman 		if (error)
1469b23f72e9SBrian Feldman 			return (error);
1470099a0e58SBosko Milekic 	}
1471e20a199fSJeff Roberson 	/*
1472e20a199fSJeff Roberson 	 * Link in the first keg.
1473e20a199fSJeff Roberson 	 */
1474e20a199fSJeff Roberson 	zone->uz_klink.kl_keg = keg;
1475e20a199fSJeff Roberson 	LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1476099a0e58SBosko Milekic 	zone->uz_lock = &keg->uk_lock;
1477e20a199fSJeff Roberson 	zone->uz_size = keg->uk_size;
1478e20a199fSJeff Roberson 	zone->uz_flags |= (keg->uk_flags &
1479e20a199fSJeff Roberson 	    (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
14808355f576SJeff Roberson 
14818355f576SJeff Roberson 	/*
14828355f576SJeff Roberson 	 * Some internal zones don't have room allocated for the per cpu
14838355f576SJeff Roberson 	 * caches.  If we're internal, bail out here.
14848355f576SJeff Roberson 	 */
1485099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1486e20a199fSJeff Roberson 		KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1487099a0e58SBosko Milekic 		    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1488b23f72e9SBrian Feldman 		return (0);
1489099a0e58SBosko Milekic 	}
14908355f576SJeff Roberson 
1491099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1492099a0e58SBosko Milekic 		zone->uz_count = BUCKET_MAX;
1493099a0e58SBosko Milekic 	else if (keg->uk_ipers <= BUCKET_MAX)
1494099a0e58SBosko Milekic 		zone->uz_count = keg->uk_ipers;
14958355f576SJeff Roberson 	else
1496cae33c14SJeff Roberson 		zone->uz_count = BUCKET_MAX;
1497b23f72e9SBrian Feldman 	return (0);
14988355f576SJeff Roberson }
14998355f576SJeff Roberson 
15008355f576SJeff Roberson /*
1501099a0e58SBosko Milekic  * Keg header dtor.  This frees all data, destroys locks, frees the hash
1502099a0e58SBosko Milekic  * table and removes the keg from the global list.
15039c2cd7e5SJeff Roberson  *
15049c2cd7e5SJeff Roberson  * Arguments/Returns follow uma_dtor specifications
15059c2cd7e5SJeff Roberson  *	udata  unused
15069c2cd7e5SJeff Roberson  */
1507099a0e58SBosko Milekic static void
1508099a0e58SBosko Milekic keg_dtor(void *arg, int size, void *udata)
1509099a0e58SBosko Milekic {
1510099a0e58SBosko Milekic 	uma_keg_t keg;
15119c2cd7e5SJeff Roberson 
1512099a0e58SBosko Milekic 	keg = (uma_keg_t)arg;
1513e20a199fSJeff Roberson 	KEG_LOCK(keg);
1514099a0e58SBosko Milekic 	if (keg->uk_free != 0) {
1515099a0e58SBosko Milekic 		printf("Freed UMA keg was not empty (%d items). "
1516099a0e58SBosko Milekic 		    " Lost %d pages of memory.\n",
1517099a0e58SBosko Milekic 		    keg->uk_free, keg->uk_pages);
1518099a0e58SBosko Milekic 	}
1519e20a199fSJeff Roberson 	KEG_UNLOCK(keg);
1520099a0e58SBosko Milekic 
1521099a0e58SBosko Milekic 	hash_free(&keg->uk_hash);
1522099a0e58SBosko Milekic 
1523e20a199fSJeff Roberson 	KEG_LOCK_FINI(keg);
1524099a0e58SBosko Milekic }
1525099a0e58SBosko Milekic 
1526099a0e58SBosko Milekic /*
1527099a0e58SBosko Milekic  * Zone header dtor.
1528099a0e58SBosko Milekic  *
1529099a0e58SBosko Milekic  * Arguments/Returns follow uma_dtor specifications
1530099a0e58SBosko Milekic  *	udata  unused
1531099a0e58SBosko Milekic  */
15329c2cd7e5SJeff Roberson static void
15339c2cd7e5SJeff Roberson zone_dtor(void *arg, int size, void *udata)
15349c2cd7e5SJeff Roberson {
1535e20a199fSJeff Roberson 	uma_klink_t klink;
15369c2cd7e5SJeff Roberson 	uma_zone_t zone;
1537099a0e58SBosko Milekic 	uma_keg_t keg;
15389c2cd7e5SJeff Roberson 
15399c2cd7e5SJeff Roberson 	zone = (uma_zone_t)arg;
1540e20a199fSJeff Roberson 	keg = zone_first_keg(zone);
15419643769aSJeff Roberson 
1542e20a199fSJeff Roberson 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
15439643769aSJeff Roberson 		cache_drain(zone);
1544099a0e58SBosko Milekic 
154517b9cc49SJeff Roberson 	mtx_lock(&uma_mtx);
1546099a0e58SBosko Milekic 	LIST_REMOVE(zone, uz_link);
1547e20a199fSJeff Roberson 	mtx_unlock(&uma_mtx);
1548099a0e58SBosko Milekic 	/*
1549099a0e58SBosko Milekic 	 * XXX there are some races here where
1550099a0e58SBosko Milekic 	 * the zone can be drained but zone lock
1551099a0e58SBosko Milekic 	 * released and then refilled before we
1552099a0e58SBosko Milekic 	 * remove it... we dont care for now
1553099a0e58SBosko Milekic 	 */
1554e20a199fSJeff Roberson 	zone_drain_wait(zone, M_WAITOK);
1555e20a199fSJeff Roberson 	/*
1556e20a199fSJeff Roberson 	 * Unlink all of our kegs.
1557e20a199fSJeff Roberson 	 */
1558e20a199fSJeff Roberson 	while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1559e20a199fSJeff Roberson 		klink->kl_keg = NULL;
1560e20a199fSJeff Roberson 		LIST_REMOVE(klink, kl_link);
1561e20a199fSJeff Roberson 		if (klink == &zone->uz_klink)
1562e20a199fSJeff Roberson 			continue;
1563e20a199fSJeff Roberson 		free(klink, M_TEMP);
1564e20a199fSJeff Roberson 	}
1565e20a199fSJeff Roberson 	/*
1566e20a199fSJeff Roberson 	 * We only destroy kegs from non secondary zones.
1567e20a199fSJeff Roberson 	 */
1568e20a199fSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
1569e20a199fSJeff Roberson 		mtx_lock(&uma_mtx);
1570099a0e58SBosko Milekic 		LIST_REMOVE(keg, uk_link);
1571099a0e58SBosko Milekic 		mtx_unlock(&uma_mtx);
1572e20a199fSJeff Roberson 		zone_free_item(kegs, keg, NULL, SKIP_NONE,
1573f4ff923bSRobert Watson 		    ZFREE_STATFREE);
15749c2cd7e5SJeff Roberson 	}
1575099a0e58SBosko Milekic }
1576099a0e58SBosko Milekic 
15779c2cd7e5SJeff Roberson /*
15788355f576SJeff Roberson  * Traverses every zone in the system and calls a callback
15798355f576SJeff Roberson  *
15808355f576SJeff Roberson  * Arguments:
15818355f576SJeff Roberson  *	zfunc  A pointer to a function which accepts a zone
15828355f576SJeff Roberson  *		as an argument.
15838355f576SJeff Roberson  *
15848355f576SJeff Roberson  * Returns:
15858355f576SJeff Roberson  *	Nothing
15868355f576SJeff Roberson  */
15878355f576SJeff Roberson static void
15888355f576SJeff Roberson zone_foreach(void (*zfunc)(uma_zone_t))
15898355f576SJeff Roberson {
1590099a0e58SBosko Milekic 	uma_keg_t keg;
15918355f576SJeff Roberson 	uma_zone_t zone;
15928355f576SJeff Roberson 
15938355f576SJeff Roberson 	mtx_lock(&uma_mtx);
1594099a0e58SBosko Milekic 	LIST_FOREACH(keg, &uma_kegs, uk_link) {
1595099a0e58SBosko Milekic 		LIST_FOREACH(zone, &keg->uk_zones, uz_link)
15968355f576SJeff Roberson 			zfunc(zone);
1597099a0e58SBosko Milekic 	}
15988355f576SJeff Roberson 	mtx_unlock(&uma_mtx);
15998355f576SJeff Roberson }
16008355f576SJeff Roberson 
16018355f576SJeff Roberson /* Public functions */
16028355f576SJeff Roberson /* See uma.h */
16038355f576SJeff Roberson void
16043803b26bSDag-Erling Smørgrav uma_startup(void *bootmem, int boot_pages)
16058355f576SJeff Roberson {
16068355f576SJeff Roberson 	struct uma_zctor_args args;
16078355f576SJeff Roberson 	uma_slab_t slab;
1608244f4554SBosko Milekic 	u_int slabsize;
1609244f4554SBosko Milekic 	u_int objsize, totsize, wsize;
16108355f576SJeff Roberson 	int i;
16118355f576SJeff Roberson 
16128355f576SJeff Roberson #ifdef UMA_DEBUG
1613099a0e58SBosko Milekic 	printf("Creating uma keg headers zone and keg.\n");
16148355f576SJeff Roberson #endif
1615f353d338SAlan Cox 	mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1616099a0e58SBosko Milekic 
1617244f4554SBosko Milekic 	/*
1618244f4554SBosko Milekic 	 * Figure out the maximum number of items-per-slab we'll have if
1619244f4554SBosko Milekic 	 * we're using the OFFPAGE slab header to track free items, given
1620244f4554SBosko Milekic 	 * all possible object sizes and the maximum desired wastage
1621244f4554SBosko Milekic 	 * (UMA_MAX_WASTE).
1622244f4554SBosko Milekic 	 *
1623244f4554SBosko Milekic 	 * We iterate until we find an object size for
1624e20a199fSJeff Roberson 	 * which the calculated wastage in keg_small_init() will be
1625244f4554SBosko Milekic 	 * enough to warrant OFFPAGE.  Since wastedspace versus objsize
1626244f4554SBosko Milekic 	 * is an overall increasing see-saw function, we find the smallest
1627244f4554SBosko Milekic 	 * objsize such that the wastage is always acceptable for objects
1628244f4554SBosko Milekic 	 * with that objsize or smaller.  Since a smaller objsize always
1629244f4554SBosko Milekic 	 * generates a larger possible uma_max_ipers, we use this computed
1630244f4554SBosko Milekic 	 * objsize to calculate the largest ipers possible.  Since the
1631244f4554SBosko Milekic 	 * ipers calculated for OFFPAGE slab headers is always larger than
1632e20a199fSJeff Roberson 	 * the ipers initially calculated in keg_small_init(), we use
1633244f4554SBosko Milekic 	 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1634244f4554SBosko Milekic 	 * obtain the maximum ipers possible for offpage slab headers.
1635244f4554SBosko Milekic 	 *
1636244f4554SBosko Milekic 	 * It should be noted that ipers versus objsize is an inversly
1637244f4554SBosko Milekic 	 * proportional function which drops off rather quickly so as
1638244f4554SBosko Milekic 	 * long as our UMA_MAX_WASTE is such that the objsize we calculate
1639244f4554SBosko Milekic 	 * falls into the portion of the inverse relation AFTER the steep
1640244f4554SBosko Milekic 	 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1641244f4554SBosko Milekic 	 *
1642244f4554SBosko Milekic 	 * Note that we have 8-bits (1 byte) to use as a freelist index
1643244f4554SBosko Milekic 	 * inside the actual slab header itself and this is enough to
1644244f4554SBosko Milekic 	 * accomodate us.  In the worst case, a UMA_SMALLEST_UNIT sized
1645244f4554SBosko Milekic 	 * object with offpage slab header would have ipers =
1646244f4554SBosko Milekic 	 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1647244f4554SBosko Milekic 	 * 1 greater than what our byte-integer freelist index can
1648244f4554SBosko Milekic 	 * accomodate, but we know that this situation never occurs as
1649244f4554SBosko Milekic 	 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1650244f4554SBosko Milekic 	 * that we need to go to offpage slab headers.  Or, if we do,
1651244f4554SBosko Milekic 	 * then we trap that condition below and panic in the INVARIANTS case.
1652244f4554SBosko Milekic 	 */
1653244f4554SBosko Milekic 	wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1654244f4554SBosko Milekic 	totsize = wsize;
1655244f4554SBosko Milekic 	objsize = UMA_SMALLEST_UNIT;
1656244f4554SBosko Milekic 	while (totsize >= wsize) {
1657244f4554SBosko Milekic 		totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1658244f4554SBosko Milekic 		    (objsize + UMA_FRITM_SZ);
1659244f4554SBosko Milekic 		totsize *= (UMA_FRITM_SZ + objsize);
1660244f4554SBosko Milekic 		objsize++;
1661244f4554SBosko Milekic 	}
1662244f4554SBosko Milekic 	if (objsize > UMA_SMALLEST_UNIT)
1663244f4554SBosko Milekic 		objsize--;
1664e20a199fSJeff Roberson 	uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64);
1665244f4554SBosko Milekic 
1666244f4554SBosko Milekic 	wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1667244f4554SBosko Milekic 	totsize = wsize;
1668244f4554SBosko Milekic 	objsize = UMA_SMALLEST_UNIT;
1669244f4554SBosko Milekic 	while (totsize >= wsize) {
1670244f4554SBosko Milekic 		totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1671244f4554SBosko Milekic 		    (objsize + UMA_FRITMREF_SZ);
1672244f4554SBosko Milekic 		totsize *= (UMA_FRITMREF_SZ + objsize);
1673244f4554SBosko Milekic 		objsize++;
1674244f4554SBosko Milekic 	}
1675244f4554SBosko Milekic 	if (objsize > UMA_SMALLEST_UNIT)
1676244f4554SBosko Milekic 		objsize--;
1677e20a199fSJeff Roberson 	uma_max_ipers_ref = MAX(UMA_SLAB_SIZE / objsize, 64);
1678244f4554SBosko Milekic 
1679244f4554SBosko Milekic 	KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1680244f4554SBosko Milekic 	    ("uma_startup: calculated uma_max_ipers values too large!"));
1681244f4554SBosko Milekic 
1682244f4554SBosko Milekic #ifdef UMA_DEBUG
1683244f4554SBosko Milekic 	printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1684244f4554SBosko Milekic 	printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1685244f4554SBosko Milekic 	    uma_max_ipers_ref);
1686244f4554SBosko Milekic #endif
1687244f4554SBosko Milekic 
1688099a0e58SBosko Milekic 	/* "manually" create the initial zone */
1689099a0e58SBosko Milekic 	args.name = "UMA Kegs";
1690099a0e58SBosko Milekic 	args.size = sizeof(struct uma_keg);
1691099a0e58SBosko Milekic 	args.ctor = keg_ctor;
1692099a0e58SBosko Milekic 	args.dtor = keg_dtor;
16938355f576SJeff Roberson 	args.uminit = zero_init;
16948355f576SJeff Roberson 	args.fini = NULL;
1695099a0e58SBosko Milekic 	args.keg = &masterkeg;
16968355f576SJeff Roberson 	args.align = 32 - 1;
1697b60f5b79SJeff Roberson 	args.flags = UMA_ZFLAG_INTERNAL;
16988355f576SJeff Roberson 	/* The initial zone has no Per cpu queues so it's smaller */
1699b23f72e9SBrian Feldman 	zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
17008355f576SJeff Roberson 
17018355f576SJeff Roberson #ifdef UMA_DEBUG
17028355f576SJeff Roberson 	printf("Filling boot free list.\n");
17038355f576SJeff Roberson #endif
17043803b26bSDag-Erling Smørgrav 	for (i = 0; i < boot_pages; i++) {
17058355f576SJeff Roberson 		slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
17068355f576SJeff Roberson 		slab->us_data = (u_int8_t *)slab;
17078355f576SJeff Roberson 		slab->us_flags = UMA_SLAB_BOOT;
17088355f576SJeff Roberson 		LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
17098355f576SJeff Roberson 	}
1710f353d338SAlan Cox 	mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
17118355f576SJeff Roberson 
17128355f576SJeff Roberson #ifdef UMA_DEBUG
1713099a0e58SBosko Milekic 	printf("Creating uma zone headers zone and keg.\n");
1714099a0e58SBosko Milekic #endif
1715099a0e58SBosko Milekic 	args.name = "UMA Zones";
1716099a0e58SBosko Milekic 	args.size = sizeof(struct uma_zone) +
1717099a0e58SBosko Milekic 	    (sizeof(struct uma_cache) * (mp_maxid + 1));
1718099a0e58SBosko Milekic 	args.ctor = zone_ctor;
1719099a0e58SBosko Milekic 	args.dtor = zone_dtor;
1720099a0e58SBosko Milekic 	args.uminit = zero_init;
1721099a0e58SBosko Milekic 	args.fini = NULL;
1722099a0e58SBosko Milekic 	args.keg = NULL;
1723099a0e58SBosko Milekic 	args.align = 32 - 1;
1724099a0e58SBosko Milekic 	args.flags = UMA_ZFLAG_INTERNAL;
1725099a0e58SBosko Milekic 	/* The initial zone has no Per cpu queues so it's smaller */
1726b23f72e9SBrian Feldman 	zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1727099a0e58SBosko Milekic 
1728099a0e58SBosko Milekic #ifdef UMA_DEBUG
1729099a0e58SBosko Milekic 	printf("Initializing pcpu cache locks.\n");
1730099a0e58SBosko Milekic #endif
1731099a0e58SBosko Milekic #ifdef UMA_DEBUG
1732099a0e58SBosko Milekic 	printf("Creating slab and hash zones.\n");
17338355f576SJeff Roberson #endif
17348355f576SJeff Roberson 
17358355f576SJeff Roberson 	/*
17368355f576SJeff Roberson 	 * This is the max number of free list items we'll have with
17378355f576SJeff Roberson 	 * offpage slabs.
17388355f576SJeff Roberson 	 */
1739244f4554SBosko Milekic 	slabsize = uma_max_ipers * UMA_FRITM_SZ;
17408355f576SJeff Roberson 	slabsize += sizeof(struct uma_slab);
17418355f576SJeff Roberson 
17428355f576SJeff Roberson 	/* Now make a zone for slab headers */
17438355f576SJeff Roberson 	slabzone = uma_zcreate("UMA Slabs",
17448355f576SJeff Roberson 				slabsize,
17458355f576SJeff Roberson 				NULL, NULL, NULL, NULL,
1746b60f5b79SJeff Roberson 				UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
17478355f576SJeff Roberson 
1748099a0e58SBosko Milekic 	/*
1749099a0e58SBosko Milekic 	 * We also create a zone for the bigger slabs with reference
1750099a0e58SBosko Milekic 	 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1751099a0e58SBosko Milekic 	 */
1752244f4554SBosko Milekic 	slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1753099a0e58SBosko Milekic 	slabsize += sizeof(struct uma_slab_refcnt);
1754099a0e58SBosko Milekic 	slabrefzone = uma_zcreate("UMA RCntSlabs",
1755099a0e58SBosko Milekic 				  slabsize,
1756099a0e58SBosko Milekic 				  NULL, NULL, NULL, NULL,
1757e66468eaSBosko Milekic 				  UMA_ALIGN_PTR,
17587fd87882SBosko Milekic 				  UMA_ZFLAG_INTERNAL);
1759099a0e58SBosko Milekic 
17608355f576SJeff Roberson 	hashzone = uma_zcreate("UMA Hash",
17618355f576SJeff Roberson 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
17628355f576SJeff Roberson 	    NULL, NULL, NULL, NULL,
1763b60f5b79SJeff Roberson 	    UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
17648355f576SJeff Roberson 
1765cae33c14SJeff Roberson 	bucket_init();
17668355f576SJeff Roberson 
1767342f1793SAlan Cox 	booted = UMA_STARTUP;
17688355f576SJeff Roberson 
17698355f576SJeff Roberson #ifdef UMA_DEBUG
17708355f576SJeff Roberson 	printf("UMA startup complete.\n");
17718355f576SJeff Roberson #endif
17728355f576SJeff Roberson }
17738355f576SJeff Roberson 
17748355f576SJeff Roberson /* see uma.h */
17758355f576SJeff Roberson void
177699571dc3SJeff Roberson uma_startup2(void)
17778355f576SJeff Roberson {
1778342f1793SAlan Cox 	booted = UMA_STARTUP2;
177986bbae32SJeff Roberson 	bucket_enable();
17808355f576SJeff Roberson #ifdef UMA_DEBUG
17818355f576SJeff Roberson 	printf("UMA startup2 complete.\n");
17828355f576SJeff Roberson #endif
17838355f576SJeff Roberson }
17848355f576SJeff Roberson 
17858355f576SJeff Roberson /*
17868355f576SJeff Roberson  * Initialize our callout handle
17878355f576SJeff Roberson  *
17888355f576SJeff Roberson  */
17898355f576SJeff Roberson 
17908355f576SJeff Roberson static void
17918355f576SJeff Roberson uma_startup3(void)
17928355f576SJeff Roberson {
17938355f576SJeff Roberson #ifdef UMA_DEBUG
17948355f576SJeff Roberson 	printf("Starting callout.\n");
17958355f576SJeff Roberson #endif
1796a3c07611SRobert Watson 	callout_init(&uma_callout, CALLOUT_MPSAFE);
17979643769aSJeff Roberson 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
17988355f576SJeff Roberson #ifdef UMA_DEBUG
17998355f576SJeff Roberson 	printf("UMA startup3 complete.\n");
18008355f576SJeff Roberson #endif
18018355f576SJeff Roberson }
18028355f576SJeff Roberson 
1803e20a199fSJeff Roberson static uma_keg_t
1804099a0e58SBosko Milekic uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
18052018f30cSMike Silbersack 		int align, u_int32_t flags)
1806099a0e58SBosko Milekic {
1807099a0e58SBosko Milekic 	struct uma_kctor_args args;
1808099a0e58SBosko Milekic 
1809099a0e58SBosko Milekic 	args.size = size;
1810099a0e58SBosko Milekic 	args.uminit = uminit;
1811099a0e58SBosko Milekic 	args.fini = fini;
18121e319f6dSRobert Watson 	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1813099a0e58SBosko Milekic 	args.flags = flags;
1814099a0e58SBosko Milekic 	args.zone = zone;
1815e20a199fSJeff Roberson 	return (zone_alloc_item(kegs, &args, M_WAITOK));
1816099a0e58SBosko Milekic }
1817099a0e58SBosko Milekic 
18188355f576SJeff Roberson /* See uma.h */
18191e319f6dSRobert Watson void
18201e319f6dSRobert Watson uma_set_align(int align)
18211e319f6dSRobert Watson {
18221e319f6dSRobert Watson 
18231e319f6dSRobert Watson 	if (align != UMA_ALIGN_CACHE)
18241e319f6dSRobert Watson 		uma_align_cache = align;
18251e319f6dSRobert Watson }
18261e319f6dSRobert Watson 
18271e319f6dSRobert Watson /* See uma.h */
18288355f576SJeff Roberson uma_zone_t
1829c3bdc05fSAndrew R. Reiter uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
18302018f30cSMike Silbersack 		uma_init uminit, uma_fini fini, int align, u_int32_t flags)
18318355f576SJeff Roberson 
18328355f576SJeff Roberson {
18338355f576SJeff Roberson 	struct uma_zctor_args args;
18348355f576SJeff Roberson 
18358355f576SJeff Roberson 	/* This stuff is essential for the zone ctor */
18368355f576SJeff Roberson 	args.name = name;
18378355f576SJeff Roberson 	args.size = size;
18388355f576SJeff Roberson 	args.ctor = ctor;
18398355f576SJeff Roberson 	args.dtor = dtor;
18408355f576SJeff Roberson 	args.uminit = uminit;
18418355f576SJeff Roberson 	args.fini = fini;
18428355f576SJeff Roberson 	args.align = align;
18438355f576SJeff Roberson 	args.flags = flags;
1844099a0e58SBosko Milekic 	args.keg = NULL;
1845099a0e58SBosko Milekic 
1846e20a199fSJeff Roberson 	return (zone_alloc_item(zones, &args, M_WAITOK));
1847099a0e58SBosko Milekic }
1848099a0e58SBosko Milekic 
1849099a0e58SBosko Milekic /* See uma.h */
1850099a0e58SBosko Milekic uma_zone_t
1851099a0e58SBosko Milekic uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1852099a0e58SBosko Milekic 		    uma_init zinit, uma_fini zfini, uma_zone_t master)
1853099a0e58SBosko Milekic {
1854099a0e58SBosko Milekic 	struct uma_zctor_args args;
1855e20a199fSJeff Roberson 	uma_keg_t keg;
1856099a0e58SBosko Milekic 
1857e20a199fSJeff Roberson 	keg = zone_first_keg(master);
1858099a0e58SBosko Milekic 	args.name = name;
1859e20a199fSJeff Roberson 	args.size = keg->uk_size;
1860099a0e58SBosko Milekic 	args.ctor = ctor;
1861099a0e58SBosko Milekic 	args.dtor = dtor;
1862099a0e58SBosko Milekic 	args.uminit = zinit;
1863099a0e58SBosko Milekic 	args.fini = zfini;
1864e20a199fSJeff Roberson 	args.align = keg->uk_align;
1865e20a199fSJeff Roberson 	args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
1866e20a199fSJeff Roberson 	args.keg = keg;
18678355f576SJeff Roberson 
1868e20a199fSJeff Roberson 	/* XXX Attaches only one keg of potentially many. */
1869e20a199fSJeff Roberson 	return (zone_alloc_item(zones, &args, M_WAITOK));
18708355f576SJeff Roberson }
18718355f576SJeff Roberson 
1872e20a199fSJeff Roberson static void
1873e20a199fSJeff Roberson zone_lock_pair(uma_zone_t a, uma_zone_t b)
1874e20a199fSJeff Roberson {
1875e20a199fSJeff Roberson 	if (a < b) {
1876e20a199fSJeff Roberson 		ZONE_LOCK(a);
1877e20a199fSJeff Roberson 		mtx_lock_flags(b->uz_lock, MTX_DUPOK);
1878e20a199fSJeff Roberson 	} else {
1879e20a199fSJeff Roberson 		ZONE_LOCK(b);
1880e20a199fSJeff Roberson 		mtx_lock_flags(a->uz_lock, MTX_DUPOK);
1881e20a199fSJeff Roberson 	}
1882e20a199fSJeff Roberson }
1883e20a199fSJeff Roberson 
1884e20a199fSJeff Roberson static void
1885e20a199fSJeff Roberson zone_unlock_pair(uma_zone_t a, uma_zone_t b)
1886e20a199fSJeff Roberson {
1887e20a199fSJeff Roberson 
1888e20a199fSJeff Roberson 	ZONE_UNLOCK(a);
1889e20a199fSJeff Roberson 	ZONE_UNLOCK(b);
1890e20a199fSJeff Roberson }
1891e20a199fSJeff Roberson 
1892e20a199fSJeff Roberson int
1893e20a199fSJeff Roberson uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
1894e20a199fSJeff Roberson {
1895e20a199fSJeff Roberson 	uma_klink_t klink;
1896e20a199fSJeff Roberson 	uma_klink_t kl;
1897e20a199fSJeff Roberson 	int error;
1898e20a199fSJeff Roberson 
1899e20a199fSJeff Roberson 	error = 0;
1900e20a199fSJeff Roberson 	klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
1901e20a199fSJeff Roberson 
1902e20a199fSJeff Roberson 	zone_lock_pair(zone, master);
1903e20a199fSJeff Roberson 	/*
1904e20a199fSJeff Roberson 	 * zone must use vtoslab() to resolve objects and must already be
1905e20a199fSJeff Roberson 	 * a secondary.
1906e20a199fSJeff Roberson 	 */
1907e20a199fSJeff Roberson 	if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
1908e20a199fSJeff Roberson 	    != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
1909e20a199fSJeff Roberson 		error = EINVAL;
1910e20a199fSJeff Roberson 		goto out;
1911e20a199fSJeff Roberson 	}
1912e20a199fSJeff Roberson 	/*
1913e20a199fSJeff Roberson 	 * The new master must also use vtoslab().
1914e20a199fSJeff Roberson 	 */
1915e20a199fSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
1916e20a199fSJeff Roberson 		error = EINVAL;
1917e20a199fSJeff Roberson 		goto out;
1918e20a199fSJeff Roberson 	}
1919e20a199fSJeff Roberson 	/*
1920e20a199fSJeff Roberson 	 * Both must either be refcnt, or not be refcnt.
1921e20a199fSJeff Roberson 	 */
1922e20a199fSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_REFCNT) !=
1923e20a199fSJeff Roberson 	    (master->uz_flags & UMA_ZONE_REFCNT)) {
1924e20a199fSJeff Roberson 		error = EINVAL;
1925e20a199fSJeff Roberson 		goto out;
1926e20a199fSJeff Roberson 	}
1927e20a199fSJeff Roberson 	/*
1928e20a199fSJeff Roberson 	 * The underlying object must be the same size.  rsize
1929e20a199fSJeff Roberson 	 * may be different.
1930e20a199fSJeff Roberson 	 */
1931e20a199fSJeff Roberson 	if (master->uz_size != zone->uz_size) {
1932e20a199fSJeff Roberson 		error = E2BIG;
1933e20a199fSJeff Roberson 		goto out;
1934e20a199fSJeff Roberson 	}
1935e20a199fSJeff Roberson 	/*
1936e20a199fSJeff Roberson 	 * Put it at the end of the list.
1937e20a199fSJeff Roberson 	 */
1938e20a199fSJeff Roberson 	klink->kl_keg = zone_first_keg(master);
1939e20a199fSJeff Roberson 	LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
1940e20a199fSJeff Roberson 		if (LIST_NEXT(kl, kl_link) == NULL) {
1941e20a199fSJeff Roberson 			LIST_INSERT_AFTER(kl, klink, kl_link);
1942e20a199fSJeff Roberson 			break;
1943e20a199fSJeff Roberson 		}
1944e20a199fSJeff Roberson 	}
1945e20a199fSJeff Roberson 	klink = NULL;
1946e20a199fSJeff Roberson 	zone->uz_flags |= UMA_ZFLAG_MULTI;
1947e20a199fSJeff Roberson 	zone->uz_slab = zone_fetch_slab_multi;
1948e20a199fSJeff Roberson 
1949e20a199fSJeff Roberson out:
1950e20a199fSJeff Roberson 	zone_unlock_pair(zone, master);
1951e20a199fSJeff Roberson 	if (klink != NULL)
1952e20a199fSJeff Roberson 		free(klink, M_TEMP);
1953e20a199fSJeff Roberson 
1954e20a199fSJeff Roberson 	return (error);
1955e20a199fSJeff Roberson }
1956e20a199fSJeff Roberson 
1957e20a199fSJeff Roberson 
19588355f576SJeff Roberson /* See uma.h */
19599c2cd7e5SJeff Roberson void
19609c2cd7e5SJeff Roberson uma_zdestroy(uma_zone_t zone)
19619c2cd7e5SJeff Roberson {
1962f4ff923bSRobert Watson 
1963e20a199fSJeff Roberson 	zone_free_item(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE);
19649c2cd7e5SJeff Roberson }
19659c2cd7e5SJeff Roberson 
19669c2cd7e5SJeff Roberson /* See uma.h */
19678355f576SJeff Roberson void *
19682cc35ff9SJeff Roberson uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
19698355f576SJeff Roberson {
19708355f576SJeff Roberson 	void *item;
19718355f576SJeff Roberson 	uma_cache_t cache;
19728355f576SJeff Roberson 	uma_bucket_t bucket;
19738355f576SJeff Roberson 	int cpu;
19748355f576SJeff Roberson 
19758355f576SJeff Roberson 	/* This is the fast path allocation */
19768355f576SJeff Roberson #ifdef UMA_DEBUG_ALLOC_1
19778355f576SJeff Roberson 	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
19788355f576SJeff Roberson #endif
19793659f747SRobert Watson 	CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
19803659f747SRobert Watson 	    zone->uz_name, flags);
1981a553d4b8SJeff Roberson 
1982635fd505SRobert Watson 	if (flags & M_WAITOK) {
1983b23f72e9SBrian Feldman 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1984635fd505SRobert Watson 		    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
19854c1cc01cSJohn Baldwin 	}
19868d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
19878d689e04SGleb Smirnoff 	if (memguard_cmp_zone(zone)) {
19888d689e04SGleb Smirnoff 		item = memguard_alloc(zone->uz_size, flags);
19898d689e04SGleb Smirnoff 		if (item != NULL) {
19908d689e04SGleb Smirnoff 			/*
19918d689e04SGleb Smirnoff 			 * Avoid conflict with the use-after-free
19928d689e04SGleb Smirnoff 			 * protecting infrastructure from INVARIANTS.
19938d689e04SGleb Smirnoff 			 */
19948d689e04SGleb Smirnoff 			if (zone->uz_init != NULL &&
19958d689e04SGleb Smirnoff 			    zone->uz_init != mtrash_init &&
19968d689e04SGleb Smirnoff 			    zone->uz_init(item, zone->uz_size, flags) != 0)
19978d689e04SGleb Smirnoff 				return (NULL);
19988d689e04SGleb Smirnoff 			if (zone->uz_ctor != NULL &&
19998d689e04SGleb Smirnoff 			    zone->uz_ctor != mtrash_ctor &&
20008d689e04SGleb Smirnoff 			    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
20018d689e04SGleb Smirnoff 			    	zone->uz_fini(item, zone->uz_size);
20028d689e04SGleb Smirnoff 				return (NULL);
20038d689e04SGleb Smirnoff 			}
20048d689e04SGleb Smirnoff 			return (item);
20058d689e04SGleb Smirnoff 		}
20068d689e04SGleb Smirnoff 		/* This is unfortunate but should not be fatal. */
20078d689e04SGleb Smirnoff 	}
20088d689e04SGleb Smirnoff #endif
20095d1ae027SRobert Watson 	/*
20105d1ae027SRobert Watson 	 * If possible, allocate from the per-CPU cache.  There are two
20115d1ae027SRobert Watson 	 * requirements for safe access to the per-CPU cache: (1) the thread
20125d1ae027SRobert Watson 	 * accessing the cache must not be preempted or yield during access,
20135d1ae027SRobert Watson 	 * and (2) the thread must not migrate CPUs without switching which
20145d1ae027SRobert Watson 	 * cache it accesses.  We rely on a critical section to prevent
20155d1ae027SRobert Watson 	 * preemption and migration.  We release the critical section in
20165d1ae027SRobert Watson 	 * order to acquire the zone mutex if we are unable to allocate from
20175d1ae027SRobert Watson 	 * the current cache; when we re-acquire the critical section, we
20185d1ae027SRobert Watson 	 * must detect and handle migration if it has occurred.
20195d1ae027SRobert Watson 	 */
2020a553d4b8SJeff Roberson zalloc_restart:
20215d1ae027SRobert Watson 	critical_enter();
20225d1ae027SRobert Watson 	cpu = curcpu;
20238355f576SJeff Roberson 	cache = &zone->uz_cpu[cpu];
20248355f576SJeff Roberson 
20258355f576SJeff Roberson zalloc_start:
20268355f576SJeff Roberson 	bucket = cache->uc_allocbucket;
20278355f576SJeff Roberson 
20288355f576SJeff Roberson 	if (bucket) {
2029cae33c14SJeff Roberson 		if (bucket->ub_cnt > 0) {
2030cae33c14SJeff Roberson 			bucket->ub_cnt--;
2031cae33c14SJeff Roberson 			item = bucket->ub_bucket[bucket->ub_cnt];
20328355f576SJeff Roberson #ifdef INVARIANTS
2033cae33c14SJeff Roberson 			bucket->ub_bucket[bucket->ub_cnt] = NULL;
20348355f576SJeff Roberson #endif
20358355f576SJeff Roberson 			KASSERT(item != NULL,
20368355f576SJeff Roberson 			    ("uma_zalloc: Bucket pointer mangled."));
20378355f576SJeff Roberson 			cache->uc_allocs++;
20385d1ae027SRobert Watson 			critical_exit();
2039639c9550SJeff Roberson #ifdef INVARIANTS
204081f71edaSMatt Jacob 			ZONE_LOCK(zone);
2041639c9550SJeff Roberson 			uma_dbg_alloc(zone, NULL, item);
204281f71edaSMatt Jacob 			ZONE_UNLOCK(zone);
2043639c9550SJeff Roberson #endif
2044b23f72e9SBrian Feldman 			if (zone->uz_ctor != NULL) {
2045e20a199fSJeff Roberson 				if (zone->uz_ctor(item, zone->uz_size,
2046b23f72e9SBrian Feldman 				    udata, flags) != 0) {
2047e20a199fSJeff Roberson 					zone_free_item(zone, item, udata,
2048f4ff923bSRobert Watson 					    SKIP_DTOR, ZFREE_STATFAIL |
2049f4ff923bSRobert Watson 					    ZFREE_STATFREE);
2050b23f72e9SBrian Feldman 					return (NULL);
2051b23f72e9SBrian Feldman 				}
2052b23f72e9SBrian Feldman 			}
20532cc35ff9SJeff Roberson 			if (flags & M_ZERO)
2054e20a199fSJeff Roberson 				bzero(item, zone->uz_size);
20558355f576SJeff Roberson 			return (item);
20568355f576SJeff Roberson 		} else if (cache->uc_freebucket) {
20578355f576SJeff Roberson 			/*
20588355f576SJeff Roberson 			 * We have run out of items in our allocbucket.
20598355f576SJeff Roberson 			 * See if we can switch with our free bucket.
20608355f576SJeff Roberson 			 */
2061cae33c14SJeff Roberson 			if (cache->uc_freebucket->ub_cnt > 0) {
20628355f576SJeff Roberson #ifdef UMA_DEBUG_ALLOC
206364f051e9SJeff Roberson 				printf("uma_zalloc: Swapping empty with"
206464f051e9SJeff Roberson 				    " alloc.\n");
20658355f576SJeff Roberson #endif
2066b983089aSJeff Roberson 				bucket = cache->uc_freebucket;
20678355f576SJeff Roberson 				cache->uc_freebucket = cache->uc_allocbucket;
2068b983089aSJeff Roberson 				cache->uc_allocbucket = bucket;
20698355f576SJeff Roberson 
20708355f576SJeff Roberson 				goto zalloc_start;
20718355f576SJeff Roberson 			}
20728355f576SJeff Roberson 		}
20738355f576SJeff Roberson 	}
20745d1ae027SRobert Watson 	/*
20755d1ae027SRobert Watson 	 * Attempt to retrieve the item from the per-CPU cache has failed, so
20765d1ae027SRobert Watson 	 * we must go back to the zone.  This requires the zone lock, so we
20775d1ae027SRobert Watson 	 * must drop the critical section, then re-acquire it when we go back
20785d1ae027SRobert Watson 	 * to the cache.  Since the critical section is released, we may be
20795d1ae027SRobert Watson 	 * preempted or migrate.  As such, make sure not to maintain any
20805d1ae027SRobert Watson 	 * thread-local state specific to the cache from prior to releasing
20815d1ae027SRobert Watson 	 * the critical section.
20825d1ae027SRobert Watson 	 */
20835d1ae027SRobert Watson 	critical_exit();
2084a553d4b8SJeff Roberson 	ZONE_LOCK(zone);
20855d1ae027SRobert Watson 	critical_enter();
20865d1ae027SRobert Watson 	cpu = curcpu;
20875d1ae027SRobert Watson 	cache = &zone->uz_cpu[cpu];
20885d1ae027SRobert Watson 	bucket = cache->uc_allocbucket;
20895d1ae027SRobert Watson 	if (bucket != NULL) {
20905d1ae027SRobert Watson 		if (bucket->ub_cnt > 0) {
20915d1ae027SRobert Watson 			ZONE_UNLOCK(zone);
20925d1ae027SRobert Watson 			goto zalloc_start;
20935d1ae027SRobert Watson 		}
20945d1ae027SRobert Watson 		bucket = cache->uc_freebucket;
20955d1ae027SRobert Watson 		if (bucket != NULL && bucket->ub_cnt > 0) {
20965d1ae027SRobert Watson 			ZONE_UNLOCK(zone);
20975d1ae027SRobert Watson 			goto zalloc_start;
20985d1ae027SRobert Watson 		}
20995d1ae027SRobert Watson 	}
21005d1ae027SRobert Watson 
2101a553d4b8SJeff Roberson 	/* Since we have locked the zone we may as well send back our stats */
2102a553d4b8SJeff Roberson 	zone->uz_allocs += cache->uc_allocs;
2103a553d4b8SJeff Roberson 	cache->uc_allocs = 0;
2104773df9abSRobert Watson 	zone->uz_frees += cache->uc_frees;
2105773df9abSRobert Watson 	cache->uc_frees = 0;
21068355f576SJeff Roberson 
2107a553d4b8SJeff Roberson 	/* Our old one is now a free bucket */
2108a553d4b8SJeff Roberson 	if (cache->uc_allocbucket) {
2109cae33c14SJeff Roberson 		KASSERT(cache->uc_allocbucket->ub_cnt == 0,
2110a553d4b8SJeff Roberson 		    ("uma_zalloc_arg: Freeing a non free bucket."));
2111a553d4b8SJeff Roberson 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
2112a553d4b8SJeff Roberson 		    cache->uc_allocbucket, ub_link);
2113a553d4b8SJeff Roberson 		cache->uc_allocbucket = NULL;
2114a553d4b8SJeff Roberson 	}
21158355f576SJeff Roberson 
2116a553d4b8SJeff Roberson 	/* Check the free list for a new alloc bucket */
2117a553d4b8SJeff Roberson 	if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
2118cae33c14SJeff Roberson 		KASSERT(bucket->ub_cnt != 0,
2119a553d4b8SJeff Roberson 		    ("uma_zalloc_arg: Returning an empty bucket."));
21208355f576SJeff Roberson 
2121a553d4b8SJeff Roberson 		LIST_REMOVE(bucket, ub_link);
2122a553d4b8SJeff Roberson 		cache->uc_allocbucket = bucket;
2123a553d4b8SJeff Roberson 		ZONE_UNLOCK(zone);
21248355f576SJeff Roberson 		goto zalloc_start;
2125a553d4b8SJeff Roberson 	}
21265d1ae027SRobert Watson 	/* We are no longer associated with this CPU. */
21275d1ae027SRobert Watson 	critical_exit();
2128bbee39c6SJeff Roberson 
2129a553d4b8SJeff Roberson 	/* Bump up our uz_count so we get here less */
2130cae33c14SJeff Roberson 	if (zone->uz_count < BUCKET_MAX)
2131a553d4b8SJeff Roberson 		zone->uz_count++;
2132099a0e58SBosko Milekic 
21338355f576SJeff Roberson 	/*
2134a553d4b8SJeff Roberson 	 * Now lets just fill a bucket and put it on the free list.  If that
2135a553d4b8SJeff Roberson 	 * works we'll restart the allocation from the begining.
2136bbee39c6SJeff Roberson 	 */
2137e20a199fSJeff Roberson 	if (zone_alloc_bucket(zone, flags)) {
2138bbee39c6SJeff Roberson 		ZONE_UNLOCK(zone);
2139bbee39c6SJeff Roberson 		goto zalloc_restart;
2140bbee39c6SJeff Roberson 	}
2141bbee39c6SJeff Roberson 	ZONE_UNLOCK(zone);
2142bbee39c6SJeff Roberson 	/*
2143bbee39c6SJeff Roberson 	 * We may not be able to get a bucket so return an actual item.
2144bbee39c6SJeff Roberson 	 */
2145bbee39c6SJeff Roberson #ifdef UMA_DEBUG
2146bbee39c6SJeff Roberson 	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2147bbee39c6SJeff Roberson #endif
2148bbee39c6SJeff Roberson 
2149e20a199fSJeff Roberson 	item = zone_alloc_item(zone, udata, flags);
2150e20a199fSJeff Roberson 	return (item);
2151bbee39c6SJeff Roberson }
2152bbee39c6SJeff Roberson 
2153bbee39c6SJeff Roberson static uma_slab_t
2154e20a199fSJeff Roberson keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2155bbee39c6SJeff Roberson {
2156bbee39c6SJeff Roberson 	uma_slab_t slab;
2157099a0e58SBosko Milekic 
2158e20a199fSJeff Roberson 	mtx_assert(&keg->uk_lock, MA_OWNED);
2159bbee39c6SJeff Roberson 	slab = NULL;
2160bbee39c6SJeff Roberson 
2161bbee39c6SJeff Roberson 	for (;;) {
2162bbee39c6SJeff Roberson 		/*
2163bbee39c6SJeff Roberson 		 * Find a slab with some space.  Prefer slabs that are partially
2164bbee39c6SJeff Roberson 		 * used over those that are totally full.  This helps to reduce
2165bbee39c6SJeff Roberson 		 * fragmentation.
2166bbee39c6SJeff Roberson 		 */
2167099a0e58SBosko Milekic 		if (keg->uk_free != 0) {
2168099a0e58SBosko Milekic 			if (!LIST_EMPTY(&keg->uk_part_slab)) {
2169099a0e58SBosko Milekic 				slab = LIST_FIRST(&keg->uk_part_slab);
2170bbee39c6SJeff Roberson 			} else {
2171099a0e58SBosko Milekic 				slab = LIST_FIRST(&keg->uk_free_slab);
2172bbee39c6SJeff Roberson 				LIST_REMOVE(slab, us_link);
2173099a0e58SBosko Milekic 				LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2174bbee39c6SJeff Roberson 				    us_link);
2175bbee39c6SJeff Roberson 			}
2176e20a199fSJeff Roberson 			MPASS(slab->us_keg == keg);
2177bbee39c6SJeff Roberson 			return (slab);
2178bbee39c6SJeff Roberson 		}
2179bbee39c6SJeff Roberson 
2180bbee39c6SJeff Roberson 		/*
2181bbee39c6SJeff Roberson 		 * M_NOVM means don't ask at all!
2182bbee39c6SJeff Roberson 		 */
2183bbee39c6SJeff Roberson 		if (flags & M_NOVM)
2184bbee39c6SJeff Roberson 			break;
2185bbee39c6SJeff Roberson 
2186e20a199fSJeff Roberson 		if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2187099a0e58SBosko Milekic 			keg->uk_flags |= UMA_ZFLAG_FULL;
2188e20a199fSJeff Roberson 			/*
2189e20a199fSJeff Roberson 			 * If this is not a multi-zone, set the FULL bit.
2190e20a199fSJeff Roberson 			 * Otherwise slab_multi() takes care of it.
2191e20a199fSJeff Roberson 			 */
2192e20a199fSJeff Roberson 			if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0)
2193e20a199fSJeff Roberson 				zone->uz_flags |= UMA_ZFLAG_FULL;
2194ebc85edfSJeff Roberson 			if (flags & M_NOWAIT)
2195bbee39c6SJeff Roberson 				break;
2196e20a199fSJeff Roberson 			msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2197bbee39c6SJeff Roberson 			continue;
2198bbee39c6SJeff Roberson 		}
2199099a0e58SBosko Milekic 		keg->uk_recurse++;
2200e20a199fSJeff Roberson 		slab = keg_alloc_slab(keg, zone, flags);
2201099a0e58SBosko Milekic 		keg->uk_recurse--;
2202bbee39c6SJeff Roberson 		/*
2203bbee39c6SJeff Roberson 		 * If we got a slab here it's safe to mark it partially used
2204bbee39c6SJeff Roberson 		 * and return.  We assume that the caller is going to remove
2205bbee39c6SJeff Roberson 		 * at least one item.
2206bbee39c6SJeff Roberson 		 */
2207bbee39c6SJeff Roberson 		if (slab) {
2208e20a199fSJeff Roberson 			MPASS(slab->us_keg == keg);
2209099a0e58SBosko Milekic 			LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2210bbee39c6SJeff Roberson 			return (slab);
2211bbee39c6SJeff Roberson 		}
2212bbee39c6SJeff Roberson 		/*
2213bbee39c6SJeff Roberson 		 * We might not have been able to get a slab but another cpu
2214bbee39c6SJeff Roberson 		 * could have while we were unlocked.  Check again before we
2215bbee39c6SJeff Roberson 		 * fail.
2216bbee39c6SJeff Roberson 		 */
2217bbee39c6SJeff Roberson 		flags |= M_NOVM;
2218bbee39c6SJeff Roberson 	}
2219bbee39c6SJeff Roberson 	return (slab);
2220bbee39c6SJeff Roberson }
2221bbee39c6SJeff Roberson 
2222e20a199fSJeff Roberson static inline void
2223e20a199fSJeff Roberson zone_relock(uma_zone_t zone, uma_keg_t keg)
2224e20a199fSJeff Roberson {
2225e20a199fSJeff Roberson 	if (zone->uz_lock != &keg->uk_lock) {
2226e20a199fSJeff Roberson 		KEG_UNLOCK(keg);
2227e20a199fSJeff Roberson 		ZONE_LOCK(zone);
2228e20a199fSJeff Roberson 	}
2229e20a199fSJeff Roberson }
2230e20a199fSJeff Roberson 
2231e20a199fSJeff Roberson static inline void
2232e20a199fSJeff Roberson keg_relock(uma_keg_t keg, uma_zone_t zone)
2233e20a199fSJeff Roberson {
2234e20a199fSJeff Roberson 	if (zone->uz_lock != &keg->uk_lock) {
2235e20a199fSJeff Roberson 		ZONE_UNLOCK(zone);
2236e20a199fSJeff Roberson 		KEG_LOCK(keg);
2237e20a199fSJeff Roberson 	}
2238e20a199fSJeff Roberson }
2239e20a199fSJeff Roberson 
2240e20a199fSJeff Roberson static uma_slab_t
2241e20a199fSJeff Roberson zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2242e20a199fSJeff Roberson {
2243e20a199fSJeff Roberson 	uma_slab_t slab;
2244e20a199fSJeff Roberson 
2245e20a199fSJeff Roberson 	if (keg == NULL)
2246e20a199fSJeff Roberson 		keg = zone_first_keg(zone);
2247e20a199fSJeff Roberson 	/*
2248e20a199fSJeff Roberson 	 * This is to prevent us from recursively trying to allocate
2249e20a199fSJeff Roberson 	 * buckets.  The problem is that if an allocation forces us to
2250e20a199fSJeff Roberson 	 * grab a new bucket we will call page_alloc, which will go off
2251e20a199fSJeff Roberson 	 * and cause the vm to allocate vm_map_entries.  If we need new
2252e20a199fSJeff Roberson 	 * buckets there too we will recurse in kmem_alloc and bad
2253e20a199fSJeff Roberson 	 * things happen.  So instead we return a NULL bucket, and make
2254e20a199fSJeff Roberson 	 * the code that allocates buckets smart enough to deal with it
2255e20a199fSJeff Roberson 	 */
2256e20a199fSJeff Roberson 	if (keg->uk_flags & UMA_ZFLAG_BUCKET && keg->uk_recurse != 0)
2257e20a199fSJeff Roberson 		return (NULL);
2258e20a199fSJeff Roberson 
2259e20a199fSJeff Roberson 	for (;;) {
2260e20a199fSJeff Roberson 		slab = keg_fetch_slab(keg, zone, flags);
2261e20a199fSJeff Roberson 		if (slab)
2262e20a199fSJeff Roberson 			return (slab);
2263e20a199fSJeff Roberson 		if (flags & (M_NOWAIT | M_NOVM))
2264e20a199fSJeff Roberson 			break;
2265e20a199fSJeff Roberson 	}
2266e20a199fSJeff Roberson 	return (NULL);
2267e20a199fSJeff Roberson }
2268e20a199fSJeff Roberson 
2269e20a199fSJeff Roberson /*
2270e20a199fSJeff Roberson  * uma_zone_fetch_slab_multi:  Fetches a slab from one available keg.  Returns
2271e20a199fSJeff Roberson  * with the keg locked.  Caller must call zone_relock() afterwards if the
2272e20a199fSJeff Roberson  * zone lock is required.  On NULL the zone lock is held.
2273e20a199fSJeff Roberson  *
2274e20a199fSJeff Roberson  * The last pointer is used to seed the search.  It is not required.
2275e20a199fSJeff Roberson  */
2276e20a199fSJeff Roberson static uma_slab_t
2277e20a199fSJeff Roberson zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2278e20a199fSJeff Roberson {
2279e20a199fSJeff Roberson 	uma_klink_t klink;
2280e20a199fSJeff Roberson 	uma_slab_t slab;
2281e20a199fSJeff Roberson 	uma_keg_t keg;
2282e20a199fSJeff Roberson 	int flags;
2283e20a199fSJeff Roberson 	int empty;
2284e20a199fSJeff Roberson 	int full;
2285e20a199fSJeff Roberson 
2286e20a199fSJeff Roberson 	/*
2287e20a199fSJeff Roberson 	 * Don't wait on the first pass.  This will skip limit tests
2288e20a199fSJeff Roberson 	 * as well.  We don't want to block if we can find a provider
2289e20a199fSJeff Roberson 	 * without blocking.
2290e20a199fSJeff Roberson 	 */
2291e20a199fSJeff Roberson 	flags = (rflags & ~M_WAITOK) | M_NOWAIT;
2292e20a199fSJeff Roberson 	/*
2293e20a199fSJeff Roberson 	 * Use the last slab allocated as a hint for where to start
2294e20a199fSJeff Roberson 	 * the search.
2295e20a199fSJeff Roberson 	 */
2296e20a199fSJeff Roberson 	if (last) {
2297e20a199fSJeff Roberson 		slab = keg_fetch_slab(last, zone, flags);
2298e20a199fSJeff Roberson 		if (slab)
2299e20a199fSJeff Roberson 			return (slab);
2300e20a199fSJeff Roberson 		zone_relock(zone, last);
2301e20a199fSJeff Roberson 		last = NULL;
2302e20a199fSJeff Roberson 	}
2303e20a199fSJeff Roberson 	/*
2304e20a199fSJeff Roberson 	 * Loop until we have a slab incase of transient failures
2305e20a199fSJeff Roberson 	 * while M_WAITOK is specified.  I'm not sure this is 100%
2306e20a199fSJeff Roberson 	 * required but we've done it for so long now.
2307e20a199fSJeff Roberson 	 */
2308e20a199fSJeff Roberson 	for (;;) {
2309e20a199fSJeff Roberson 		empty = 0;
2310e20a199fSJeff Roberson 		full = 0;
2311e20a199fSJeff Roberson 		/*
2312e20a199fSJeff Roberson 		 * Search the available kegs for slabs.  Be careful to hold the
2313e20a199fSJeff Roberson 		 * correct lock while calling into the keg layer.
2314e20a199fSJeff Roberson 		 */
2315e20a199fSJeff Roberson 		LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2316e20a199fSJeff Roberson 			keg = klink->kl_keg;
2317e20a199fSJeff Roberson 			keg_relock(keg, zone);
2318e20a199fSJeff Roberson 			if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2319e20a199fSJeff Roberson 				slab = keg_fetch_slab(keg, zone, flags);
2320e20a199fSJeff Roberson 				if (slab)
2321e20a199fSJeff Roberson 					return (slab);
2322e20a199fSJeff Roberson 			}
2323e20a199fSJeff Roberson 			if (keg->uk_flags & UMA_ZFLAG_FULL)
2324e20a199fSJeff Roberson 				full++;
2325e20a199fSJeff Roberson 			else
2326e20a199fSJeff Roberson 				empty++;
2327e20a199fSJeff Roberson 			zone_relock(zone, keg);
2328e20a199fSJeff Roberson 		}
2329e20a199fSJeff Roberson 		if (rflags & (M_NOWAIT | M_NOVM))
2330e20a199fSJeff Roberson 			break;
2331e20a199fSJeff Roberson 		flags = rflags;
2332e20a199fSJeff Roberson 		/*
2333e20a199fSJeff Roberson 		 * All kegs are full.  XXX We can't atomically check all kegs
2334e20a199fSJeff Roberson 		 * and sleep so just sleep for a short period and retry.
2335e20a199fSJeff Roberson 		 */
2336e20a199fSJeff Roberson 		if (full && !empty) {
2337e20a199fSJeff Roberson 			zone->uz_flags |= UMA_ZFLAG_FULL;
2338bf965959SSean Bruno 			zone->uz_sleeps++;
2339e20a199fSJeff Roberson 			msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
2340e20a199fSJeff Roberson 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
2341e20a199fSJeff Roberson 			continue;
2342e20a199fSJeff Roberson 		}
2343e20a199fSJeff Roberson 	}
2344e20a199fSJeff Roberson 	return (NULL);
2345e20a199fSJeff Roberson }
2346e20a199fSJeff Roberson 
2347d56368d7SBosko Milekic static void *
2348e20a199fSJeff Roberson slab_alloc_item(uma_zone_t zone, uma_slab_t slab)
2349bbee39c6SJeff Roberson {
2350099a0e58SBosko Milekic 	uma_keg_t keg;
2351ab14a3f7SBrian Feldman 	uma_slabrefcnt_t slabref;
2352bbee39c6SJeff Roberson 	void *item;
2353bbee39c6SJeff Roberson 	u_int8_t freei;
2354bbee39c6SJeff Roberson 
2355e20a199fSJeff Roberson 	keg = slab->us_keg;
2356e20a199fSJeff Roberson 	mtx_assert(&keg->uk_lock, MA_OWNED);
2357099a0e58SBosko Milekic 
2358bbee39c6SJeff Roberson 	freei = slab->us_firstfree;
2359ab14a3f7SBrian Feldman 	if (keg->uk_flags & UMA_ZONE_REFCNT) {
2360ab14a3f7SBrian Feldman 		slabref = (uma_slabrefcnt_t)slab;
2361ab14a3f7SBrian Feldman 		slab->us_firstfree = slabref->us_freelist[freei].us_item;
2362ab14a3f7SBrian Feldman 	} else {
2363099a0e58SBosko Milekic 		slab->us_firstfree = slab->us_freelist[freei].us_item;
2364ab14a3f7SBrian Feldman 	}
2365099a0e58SBosko Milekic 	item = slab->us_data + (keg->uk_rsize * freei);
2366bbee39c6SJeff Roberson 
2367bbee39c6SJeff Roberson 	slab->us_freecount--;
2368099a0e58SBosko Milekic 	keg->uk_free--;
2369bbee39c6SJeff Roberson #ifdef INVARIANTS
2370bbee39c6SJeff Roberson 	uma_dbg_alloc(zone, slab, item);
2371bbee39c6SJeff Roberson #endif
2372bbee39c6SJeff Roberson 	/* Move this slab to the full list */
2373bbee39c6SJeff Roberson 	if (slab->us_freecount == 0) {
2374bbee39c6SJeff Roberson 		LIST_REMOVE(slab, us_link);
2375099a0e58SBosko Milekic 		LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2376bbee39c6SJeff Roberson 	}
2377bbee39c6SJeff Roberson 
2378bbee39c6SJeff Roberson 	return (item);
2379bbee39c6SJeff Roberson }
2380bbee39c6SJeff Roberson 
2381bbee39c6SJeff Roberson static int
2382e20a199fSJeff Roberson zone_alloc_bucket(uma_zone_t zone, int flags)
2383bbee39c6SJeff Roberson {
2384bbee39c6SJeff Roberson 	uma_bucket_t bucket;
2385bbee39c6SJeff Roberson 	uma_slab_t slab;
2386e20a199fSJeff Roberson 	uma_keg_t keg;
2387099a0e58SBosko Milekic 	int16_t saved;
2388b23f72e9SBrian Feldman 	int max, origflags = flags;
2389bbee39c6SJeff Roberson 
2390bbee39c6SJeff Roberson 	/*
2391a553d4b8SJeff Roberson 	 * Try this zone's free list first so we don't allocate extra buckets.
23928355f576SJeff Roberson 	 */
2393bbee39c6SJeff Roberson 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2394cae33c14SJeff Roberson 		KASSERT(bucket->ub_cnt == 0,
2395e20a199fSJeff Roberson 		    ("zone_alloc_bucket: Bucket on free list is not empty."));
2396a553d4b8SJeff Roberson 		LIST_REMOVE(bucket, ub_link);
2397bbee39c6SJeff Roberson 	} else {
239818aa2de5SJeff Roberson 		int bflags;
239918aa2de5SJeff Roberson 
2400cae33c14SJeff Roberson 		bflags = (flags & ~M_ZERO);
2401e20a199fSJeff Roberson 		if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
240218aa2de5SJeff Roberson 			bflags |= M_NOVM;
240318aa2de5SJeff Roberson 
2404bbee39c6SJeff Roberson 		ZONE_UNLOCK(zone);
2405cae33c14SJeff Roberson 		bucket = bucket_alloc(zone->uz_count, bflags);
2406bbee39c6SJeff Roberson 		ZONE_LOCK(zone);
2407bbee39c6SJeff Roberson 	}
2408bbee39c6SJeff Roberson 
2409e20a199fSJeff Roberson 	if (bucket == NULL) {
2410bbee39c6SJeff Roberson 		return (0);
2411e20a199fSJeff Roberson 	}
2412bbee39c6SJeff Roberson 
2413bbee39c6SJeff Roberson #ifdef SMP
2414a553d4b8SJeff Roberson 	/*
2415bbee39c6SJeff Roberson 	 * This code is here to limit the number of simultaneous bucket fills
2416bbee39c6SJeff Roberson 	 * for any given zone to the number of per cpu caches in this zone. This
2417bbee39c6SJeff Roberson 	 * is done so that we don't allocate more memory than we really need.
2418a553d4b8SJeff Roberson 	 */
2419bbee39c6SJeff Roberson 	if (zone->uz_fills >= mp_ncpus)
2420bbee39c6SJeff Roberson 		goto done;
2421a553d4b8SJeff Roberson 
2422bbee39c6SJeff Roberson #endif
2423bbee39c6SJeff Roberson 	zone->uz_fills++;
2424bbee39c6SJeff Roberson 
242544eca34aSJeff Roberson 	max = MIN(bucket->ub_entries, zone->uz_count);
2426bbee39c6SJeff Roberson 	/* Try to keep the buckets totally full */
2427099a0e58SBosko Milekic 	saved = bucket->ub_cnt;
2428e20a199fSJeff Roberson 	slab = NULL;
2429e20a199fSJeff Roberson 	keg = NULL;
243044eca34aSJeff Roberson 	while (bucket->ub_cnt < max &&
2431e20a199fSJeff Roberson 	    (slab = zone->uz_slab(zone, keg, flags)) != NULL) {
2432e20a199fSJeff Roberson 		keg = slab->us_keg;
243344eca34aSJeff Roberson 		while (slab->us_freecount && bucket->ub_cnt < max) {
2434cae33c14SJeff Roberson 			bucket->ub_bucket[bucket->ub_cnt++] =
2435e20a199fSJeff Roberson 			    slab_alloc_item(zone, slab);
2436bbee39c6SJeff Roberson 		}
2437099a0e58SBosko Milekic 
2438bbee39c6SJeff Roberson 		/* Don't block on the next fill */
2439bbee39c6SJeff Roberson 		flags |= M_NOWAIT;
24408355f576SJeff Roberson 	}
2441e20a199fSJeff Roberson 	if (slab)
2442e20a199fSJeff Roberson 		zone_relock(zone, keg);
24438355f576SJeff Roberson 
2444099a0e58SBosko Milekic 	/*
2445099a0e58SBosko Milekic 	 * We unlock here because we need to call the zone's init.
2446099a0e58SBosko Milekic 	 * It should be safe to unlock because the slab dealt with
2447099a0e58SBosko Milekic 	 * above is already on the appropriate list within the keg
2448099a0e58SBosko Milekic 	 * and the bucket we filled is not yet on any list, so we
2449099a0e58SBosko Milekic 	 * own it.
2450099a0e58SBosko Milekic 	 */
2451099a0e58SBosko Milekic 	if (zone->uz_init != NULL) {
2452099a0e58SBosko Milekic 		int i;
2453bbee39c6SJeff Roberson 
2454099a0e58SBosko Milekic 		ZONE_UNLOCK(zone);
2455099a0e58SBosko Milekic 		for (i = saved; i < bucket->ub_cnt; i++)
2456e20a199fSJeff Roberson 			if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2457e20a199fSJeff Roberson 			    origflags) != 0)
2458b23f72e9SBrian Feldman 				break;
2459b23f72e9SBrian Feldman 		/*
2460b23f72e9SBrian Feldman 		 * If we couldn't initialize the whole bucket, put the
2461b23f72e9SBrian Feldman 		 * rest back onto the freelist.
2462b23f72e9SBrian Feldman 		 */
2463b23f72e9SBrian Feldman 		if (i != bucket->ub_cnt) {
2464b23f72e9SBrian Feldman 			int j;
2465b23f72e9SBrian Feldman 
2466a5a262c6SBosko Milekic 			for (j = i; j < bucket->ub_cnt; j++) {
2467e20a199fSJeff Roberson 				zone_free_item(zone, bucket->ub_bucket[j],
24682019094aSRobert Watson 				    NULL, SKIP_FINI, 0);
2469a5a262c6SBosko Milekic #ifdef INVARIANTS
2470a5a262c6SBosko Milekic 				bucket->ub_bucket[j] = NULL;
2471a5a262c6SBosko Milekic #endif
2472a5a262c6SBosko Milekic 			}
2473b23f72e9SBrian Feldman 			bucket->ub_cnt = i;
2474b23f72e9SBrian Feldman 		}
2475099a0e58SBosko Milekic 		ZONE_LOCK(zone);
2476099a0e58SBosko Milekic 	}
2477099a0e58SBosko Milekic 
2478099a0e58SBosko Milekic 	zone->uz_fills--;
2479cae33c14SJeff Roberson 	if (bucket->ub_cnt != 0) {
2480bbee39c6SJeff Roberson 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
2481bbee39c6SJeff Roberson 		    bucket, ub_link);
2482bbee39c6SJeff Roberson 		return (1);
2483bbee39c6SJeff Roberson 	}
2484bbee39c6SJeff Roberson #ifdef SMP
2485bbee39c6SJeff Roberson done:
2486bbee39c6SJeff Roberson #endif
2487cae33c14SJeff Roberson 	bucket_free(bucket);
2488bbee39c6SJeff Roberson 
2489bbee39c6SJeff Roberson 	return (0);
2490bbee39c6SJeff Roberson }
24918355f576SJeff Roberson /*
2492bbee39c6SJeff Roberson  * Allocates an item for an internal zone
24938355f576SJeff Roberson  *
24948355f576SJeff Roberson  * Arguments
24958355f576SJeff Roberson  *	zone   The zone to alloc for.
24968355f576SJeff Roberson  *	udata  The data to be passed to the constructor.
2497a163d034SWarner Losh  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
24988355f576SJeff Roberson  *
24998355f576SJeff Roberson  * Returns
25008355f576SJeff Roberson  *	NULL if there is no memory and M_NOWAIT is set
2501bbee39c6SJeff Roberson  *	An item if successful
25028355f576SJeff Roberson  */
25038355f576SJeff Roberson 
25048355f576SJeff Roberson static void *
2505e20a199fSJeff Roberson zone_alloc_item(uma_zone_t zone, void *udata, int flags)
25068355f576SJeff Roberson {
25078355f576SJeff Roberson 	uma_slab_t slab;
25088355f576SJeff Roberson 	void *item;
25098355f576SJeff Roberson 
25108355f576SJeff Roberson 	item = NULL;
25118355f576SJeff Roberson 
25128355f576SJeff Roberson #ifdef UMA_DEBUG_ALLOC
25138355f576SJeff Roberson 	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
25148355f576SJeff Roberson #endif
25158355f576SJeff Roberson 	ZONE_LOCK(zone);
25168355f576SJeff Roberson 
2517e20a199fSJeff Roberson 	slab = zone->uz_slab(zone, NULL, flags);
2518bbee39c6SJeff Roberson 	if (slab == NULL) {
25192019094aSRobert Watson 		zone->uz_fails++;
2520bce97791SJeff Roberson 		ZONE_UNLOCK(zone);
2521a553d4b8SJeff Roberson 		return (NULL);
2522bce97791SJeff Roberson 	}
2523a553d4b8SJeff Roberson 
2524e20a199fSJeff Roberson 	item = slab_alloc_item(zone, slab);
25258355f576SJeff Roberson 
2526e20a199fSJeff Roberson 	zone_relock(zone, slab->us_keg);
25272c743d36SRobert Watson 	zone->uz_allocs++;
25288355f576SJeff Roberson 	ZONE_UNLOCK(zone);
25298355f576SJeff Roberson 
2530099a0e58SBosko Milekic 	/*
2531099a0e58SBosko Milekic 	 * We have to call both the zone's init (not the keg's init)
2532099a0e58SBosko Milekic 	 * and the zone's ctor.  This is because the item is going from
2533099a0e58SBosko Milekic 	 * a keg slab directly to the user, and the user is expecting it
2534099a0e58SBosko Milekic 	 * to be both zone-init'd as well as zone-ctor'd.
2535099a0e58SBosko Milekic 	 */
2536b23f72e9SBrian Feldman 	if (zone->uz_init != NULL) {
2537e20a199fSJeff Roberson 		if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2538e20a199fSJeff Roberson 			zone_free_item(zone, item, udata, SKIP_FINI,
2539f4ff923bSRobert Watson 			    ZFREE_STATFAIL | ZFREE_STATFREE);
2540b23f72e9SBrian Feldman 			return (NULL);
2541b23f72e9SBrian Feldman 		}
2542b23f72e9SBrian Feldman 	}
2543b23f72e9SBrian Feldman 	if (zone->uz_ctor != NULL) {
2544e20a199fSJeff Roberson 		if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2545e20a199fSJeff Roberson 			zone_free_item(zone, item, udata, SKIP_DTOR,
2546f4ff923bSRobert Watson 			    ZFREE_STATFAIL | ZFREE_STATFREE);
2547b23f72e9SBrian Feldman 			return (NULL);
2548b23f72e9SBrian Feldman 		}
2549b23f72e9SBrian Feldman 	}
25502cc35ff9SJeff Roberson 	if (flags & M_ZERO)
2551e20a199fSJeff Roberson 		bzero(item, zone->uz_size);
25528355f576SJeff Roberson 
25538355f576SJeff Roberson 	return (item);
25548355f576SJeff Roberson }
25558355f576SJeff Roberson 
25568355f576SJeff Roberson /* See uma.h */
25578355f576SJeff Roberson void
25588355f576SJeff Roberson uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
25598355f576SJeff Roberson {
25608355f576SJeff Roberson 	uma_cache_t cache;
25618355f576SJeff Roberson 	uma_bucket_t bucket;
25624741dcbfSJeff Roberson 	int bflags;
25638355f576SJeff Roberson 	int cpu;
25648355f576SJeff Roberson 
25658355f576SJeff Roberson #ifdef UMA_DEBUG_ALLOC_1
25668355f576SJeff Roberson 	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
25678355f576SJeff Roberson #endif
25683659f747SRobert Watson 	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
25693659f747SRobert Watson 	    zone->uz_name);
25703659f747SRobert Watson 
257120ed0cb0SMatthew D Fleming         /* uma_zfree(..., NULL) does nothing, to match free(9). */
257220ed0cb0SMatthew D Fleming         if (item == NULL)
257320ed0cb0SMatthew D Fleming                 return;
25748d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
25758d689e04SGleb Smirnoff 	if (is_memguard_addr(item)) {
25768d689e04SGleb Smirnoff 		if (zone->uz_dtor != NULL && zone->uz_dtor != mtrash_dtor)
25778d689e04SGleb Smirnoff 			zone->uz_dtor(item, zone->uz_size, udata);
25788d689e04SGleb Smirnoff 		if (zone->uz_fini != NULL && zone->uz_fini != mtrash_fini)
25798d689e04SGleb Smirnoff 			zone->uz_fini(item, zone->uz_size);
25808d689e04SGleb Smirnoff 		memguard_free(item);
25818d689e04SGleb Smirnoff 		return;
25828d689e04SGleb Smirnoff 	}
25838d689e04SGleb Smirnoff #endif
25845d1ae027SRobert Watson 	if (zone->uz_dtor)
2585e20a199fSJeff Roberson 		zone->uz_dtor(item, zone->uz_size, udata);
2586e20a199fSJeff Roberson 
25875d1ae027SRobert Watson #ifdef INVARIANTS
25885d1ae027SRobert Watson 	ZONE_LOCK(zone);
2589e20a199fSJeff Roberson 	if (zone->uz_flags & UMA_ZONE_MALLOC)
25905d1ae027SRobert Watson 		uma_dbg_free(zone, udata, item);
25915d1ae027SRobert Watson 	else
25925d1ae027SRobert Watson 		uma_dbg_free(zone, NULL, item);
25935d1ae027SRobert Watson 	ZONE_UNLOCK(zone);
25945d1ae027SRobert Watson #endif
2595af7f9b97SJeff Roberson 	/*
2596af7f9b97SJeff Roberson 	 * The race here is acceptable.  If we miss it we'll just have to wait
2597af7f9b97SJeff Roberson 	 * a little longer for the limits to be reset.
2598af7f9b97SJeff Roberson 	 */
2599e20a199fSJeff Roberson 	if (zone->uz_flags & UMA_ZFLAG_FULL)
2600af7f9b97SJeff Roberson 		goto zfree_internal;
2601af7f9b97SJeff Roberson 
26025d1ae027SRobert Watson 	/*
26035d1ae027SRobert Watson 	 * If possible, free to the per-CPU cache.  There are two
26045d1ae027SRobert Watson 	 * requirements for safe access to the per-CPU cache: (1) the thread
26055d1ae027SRobert Watson 	 * accessing the cache must not be preempted or yield during access,
26065d1ae027SRobert Watson 	 * and (2) the thread must not migrate CPUs without switching which
26075d1ae027SRobert Watson 	 * cache it accesses.  We rely on a critical section to prevent
26085d1ae027SRobert Watson 	 * preemption and migration.  We release the critical section in
26095d1ae027SRobert Watson 	 * order to acquire the zone mutex if we are unable to free to the
26105d1ae027SRobert Watson 	 * current cache; when we re-acquire the critical section, we must
26115d1ae027SRobert Watson 	 * detect and handle migration if it has occurred.
26125d1ae027SRobert Watson 	 */
2613a553d4b8SJeff Roberson zfree_restart:
26145d1ae027SRobert Watson 	critical_enter();
26155d1ae027SRobert Watson 	cpu = curcpu;
26168355f576SJeff Roberson 	cache = &zone->uz_cpu[cpu];
26178355f576SJeff Roberson 
26188355f576SJeff Roberson zfree_start:
26198355f576SJeff Roberson 	bucket = cache->uc_freebucket;
26208355f576SJeff Roberson 
26218355f576SJeff Roberson 	if (bucket) {
2622a553d4b8SJeff Roberson 		/*
2623a553d4b8SJeff Roberson 		 * Do we have room in our bucket? It is OK for this uz count
2624a553d4b8SJeff Roberson 		 * check to be slightly out of sync.
2625a553d4b8SJeff Roberson 		 */
2626a553d4b8SJeff Roberson 
2627cae33c14SJeff Roberson 		if (bucket->ub_cnt < bucket->ub_entries) {
2628cae33c14SJeff Roberson 			KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
26298355f576SJeff Roberson 			    ("uma_zfree: Freeing to non free bucket index."));
2630cae33c14SJeff Roberson 			bucket->ub_bucket[bucket->ub_cnt] = item;
2631cae33c14SJeff Roberson 			bucket->ub_cnt++;
2632773df9abSRobert Watson 			cache->uc_frees++;
26335d1ae027SRobert Watson 			critical_exit();
26348355f576SJeff Roberson 			return;
26358355f576SJeff Roberson 		} else if (cache->uc_allocbucket) {
26368355f576SJeff Roberson #ifdef UMA_DEBUG_ALLOC
26378355f576SJeff Roberson 			printf("uma_zfree: Swapping buckets.\n");
26388355f576SJeff Roberson #endif
26398355f576SJeff Roberson 			/*
26408355f576SJeff Roberson 			 * We have run out of space in our freebucket.
26418355f576SJeff Roberson 			 * See if we can switch with our alloc bucket.
26428355f576SJeff Roberson 			 */
2643cae33c14SJeff Roberson 			if (cache->uc_allocbucket->ub_cnt <
2644cae33c14SJeff Roberson 			    cache->uc_freebucket->ub_cnt) {
2645b983089aSJeff Roberson 				bucket = cache->uc_freebucket;
26468355f576SJeff Roberson 				cache->uc_freebucket = cache->uc_allocbucket;
2647b983089aSJeff Roberson 				cache->uc_allocbucket = bucket;
26488355f576SJeff Roberson 				goto zfree_start;
26498355f576SJeff Roberson 			}
26508355f576SJeff Roberson 		}
26518355f576SJeff Roberson 	}
26528355f576SJeff Roberson 	/*
2653a553d4b8SJeff Roberson 	 * We can get here for two reasons:
26548355f576SJeff Roberson 	 *
26558355f576SJeff Roberson 	 * 1) The buckets are NULL
2656a553d4b8SJeff Roberson 	 * 2) The alloc and free buckets are both somewhat full.
26575d1ae027SRobert Watson 	 *
26585d1ae027SRobert Watson 	 * We must go back the zone, which requires acquiring the zone lock,
26595d1ae027SRobert Watson 	 * which in turn means we must release and re-acquire the critical
26605d1ae027SRobert Watson 	 * section.  Since the critical section is released, we may be
26615d1ae027SRobert Watson 	 * preempted or migrate.  As such, make sure not to maintain any
26625d1ae027SRobert Watson 	 * thread-local state specific to the cache from prior to releasing
26635d1ae027SRobert Watson 	 * the critical section.
26648355f576SJeff Roberson 	 */
26655d1ae027SRobert Watson 	critical_exit();
26668355f576SJeff Roberson 	ZONE_LOCK(zone);
26675d1ae027SRobert Watson 	critical_enter();
26685d1ae027SRobert Watson 	cpu = curcpu;
26695d1ae027SRobert Watson 	cache = &zone->uz_cpu[cpu];
26705d1ae027SRobert Watson 	if (cache->uc_freebucket != NULL) {
26715d1ae027SRobert Watson 		if (cache->uc_freebucket->ub_cnt <
26725d1ae027SRobert Watson 		    cache->uc_freebucket->ub_entries) {
26735d1ae027SRobert Watson 			ZONE_UNLOCK(zone);
26745d1ae027SRobert Watson 			goto zfree_start;
26755d1ae027SRobert Watson 		}
26765d1ae027SRobert Watson 		if (cache->uc_allocbucket != NULL &&
26775d1ae027SRobert Watson 		    (cache->uc_allocbucket->ub_cnt <
26785d1ae027SRobert Watson 		    cache->uc_freebucket->ub_cnt)) {
26795d1ae027SRobert Watson 			ZONE_UNLOCK(zone);
26805d1ae027SRobert Watson 			goto zfree_start;
26815d1ae027SRobert Watson 		}
26825d1ae027SRobert Watson 	}
26838355f576SJeff Roberson 
2684f4ff923bSRobert Watson 	/* Since we have locked the zone we may as well send back our stats */
2685f4ff923bSRobert Watson 	zone->uz_allocs += cache->uc_allocs;
2686f4ff923bSRobert Watson 	cache->uc_allocs = 0;
2687f4ff923bSRobert Watson 	zone->uz_frees += cache->uc_frees;
2688f4ff923bSRobert Watson 	cache->uc_frees = 0;
2689f4ff923bSRobert Watson 
26908355f576SJeff Roberson 	bucket = cache->uc_freebucket;
26918355f576SJeff Roberson 	cache->uc_freebucket = NULL;
26928355f576SJeff Roberson 
26938355f576SJeff Roberson 	/* Can we throw this on the zone full list? */
26948355f576SJeff Roberson 	if (bucket != NULL) {
26958355f576SJeff Roberson #ifdef UMA_DEBUG_ALLOC
26968355f576SJeff Roberson 		printf("uma_zfree: Putting old bucket on the free list.\n");
26978355f576SJeff Roberson #endif
2698cae33c14SJeff Roberson 		/* ub_cnt is pointing to the last free item */
2699cae33c14SJeff Roberson 		KASSERT(bucket->ub_cnt != 0,
27008355f576SJeff Roberson 		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
27018355f576SJeff Roberson 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
27028355f576SJeff Roberson 		    bucket, ub_link);
27038355f576SJeff Roberson 	}
2704a553d4b8SJeff Roberson 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2705a553d4b8SJeff Roberson 		LIST_REMOVE(bucket, ub_link);
2706a553d4b8SJeff Roberson 		ZONE_UNLOCK(zone);
2707a553d4b8SJeff Roberson 		cache->uc_freebucket = bucket;
2708a553d4b8SJeff Roberson 		goto zfree_start;
2709a553d4b8SJeff Roberson 	}
27105d1ae027SRobert Watson 	/* We are no longer associated with this CPU. */
27115d1ae027SRobert Watson 	critical_exit();
2712a553d4b8SJeff Roberson 
2713a553d4b8SJeff Roberson 	/* And the zone.. */
2714a553d4b8SJeff Roberson 	ZONE_UNLOCK(zone);
2715a553d4b8SJeff Roberson 
27168355f576SJeff Roberson #ifdef UMA_DEBUG_ALLOC
27178355f576SJeff Roberson 	printf("uma_zfree: Allocating new free bucket.\n");
27188355f576SJeff Roberson #endif
27194741dcbfSJeff Roberson 	bflags = M_NOWAIT;
27204741dcbfSJeff Roberson 
2721e20a199fSJeff Roberson 	if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
27224741dcbfSJeff Roberson 		bflags |= M_NOVM;
2723cae33c14SJeff Roberson 	bucket = bucket_alloc(zone->uz_count, bflags);
27244741dcbfSJeff Roberson 	if (bucket) {
2725a553d4b8SJeff Roberson 		ZONE_LOCK(zone);
2726a553d4b8SJeff Roberson 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
2727a553d4b8SJeff Roberson 		    bucket, ub_link);
27288355f576SJeff Roberson 		ZONE_UNLOCK(zone);
2729a553d4b8SJeff Roberson 		goto zfree_restart;
27308355f576SJeff Roberson 	}
27318355f576SJeff Roberson 
2732a553d4b8SJeff Roberson 	/*
2733a553d4b8SJeff Roberson 	 * If nothing else caught this, we'll just do an internal free.
2734a553d4b8SJeff Roberson 	 */
2735af7f9b97SJeff Roberson zfree_internal:
2736e20a199fSJeff Roberson 	zone_free_item(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE);
27378355f576SJeff Roberson 
27388355f576SJeff Roberson 	return;
27398355f576SJeff Roberson }
27408355f576SJeff Roberson 
27418355f576SJeff Roberson /*
27428355f576SJeff Roberson  * Frees an item to an INTERNAL zone or allocates a free bucket
27438355f576SJeff Roberson  *
27448355f576SJeff Roberson  * Arguments:
27458355f576SJeff Roberson  *	zone   The zone to free to
27468355f576SJeff Roberson  *	item   The item we're freeing
27478355f576SJeff Roberson  *	udata  User supplied data for the dtor
2748b23f72e9SBrian Feldman  *	skip   Skip dtors and finis
27498355f576SJeff Roberson  */
27508355f576SJeff Roberson static void
2751e20a199fSJeff Roberson zone_free_item(uma_zone_t zone, void *item, void *udata,
27522019094aSRobert Watson     enum zfreeskip skip, int flags)
27538355f576SJeff Roberson {
27548355f576SJeff Roberson 	uma_slab_t slab;
2755ab14a3f7SBrian Feldman 	uma_slabrefcnt_t slabref;
2756099a0e58SBosko Milekic 	uma_keg_t keg;
27578355f576SJeff Roberson 	u_int8_t *mem;
27588355f576SJeff Roberson 	u_int8_t freei;
2759e20a199fSJeff Roberson 	int clearfull;
2760099a0e58SBosko Milekic 
2761b23f72e9SBrian Feldman 	if (skip < SKIP_DTOR && zone->uz_dtor)
2762e20a199fSJeff Roberson 		zone->uz_dtor(item, zone->uz_size, udata);
2763e20a199fSJeff Roberson 
2764b23f72e9SBrian Feldman 	if (skip < SKIP_FINI && zone->uz_fini)
2765e20a199fSJeff Roberson 		zone->uz_fini(item, zone->uz_size);
2766bba739abSJeff Roberson 
27678355f576SJeff Roberson 	ZONE_LOCK(zone);
27688355f576SJeff Roberson 
27692019094aSRobert Watson 	if (flags & ZFREE_STATFAIL)
27702019094aSRobert Watson 		zone->uz_fails++;
2771f4ff923bSRobert Watson 	if (flags & ZFREE_STATFREE)
2772f4ff923bSRobert Watson 		zone->uz_frees++;
27732019094aSRobert Watson 
2774e20a199fSJeff Roberson 	if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
27758355f576SJeff Roberson 		mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2776e20a199fSJeff Roberson 		keg = zone_first_keg(zone); /* Must only be one. */
2777e20a199fSJeff Roberson 		if (zone->uz_flags & UMA_ZONE_HASH) {
2778099a0e58SBosko Milekic 			slab = hash_sfind(&keg->uk_hash, mem);
2779e20a199fSJeff Roberson 		} else {
2780099a0e58SBosko Milekic 			mem += keg->uk_pgoff;
27818355f576SJeff Roberson 			slab = (uma_slab_t)mem;
27828355f576SJeff Roberson 		}
27838355f576SJeff Roberson 	} else {
2784e20a199fSJeff Roberson 		/* This prevents redundant lookups via free(). */
2785e20a199fSJeff Roberson 		if ((zone->uz_flags & UMA_ZONE_MALLOC) && udata != NULL)
27868355f576SJeff Roberson 			slab = (uma_slab_t)udata;
2787e20a199fSJeff Roberson 		else
2788e20a199fSJeff Roberson 			slab = vtoslab((vm_offset_t)item);
2789e20a199fSJeff Roberson 		keg = slab->us_keg;
2790e20a199fSJeff Roberson 		keg_relock(keg, zone);
27918355f576SJeff Roberson 	}
2792e20a199fSJeff Roberson 	MPASS(keg == slab->us_keg);
27938355f576SJeff Roberson 
27948355f576SJeff Roberson 	/* Do we need to remove from any lists? */
2795099a0e58SBosko Milekic 	if (slab->us_freecount+1 == keg->uk_ipers) {
27968355f576SJeff Roberson 		LIST_REMOVE(slab, us_link);
2797099a0e58SBosko Milekic 		LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
27988355f576SJeff Roberson 	} else if (slab->us_freecount == 0) {
27998355f576SJeff Roberson 		LIST_REMOVE(slab, us_link);
2800099a0e58SBosko Milekic 		LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
28018355f576SJeff Roberson 	}
28028355f576SJeff Roberson 
28038355f576SJeff Roberson 	/* Slab management stuff */
28048355f576SJeff Roberson 	freei = ((unsigned long)item - (unsigned long)slab->us_data)
2805099a0e58SBosko Milekic 		/ keg->uk_rsize;
28068355f576SJeff Roberson 
2807639c9550SJeff Roberson #ifdef INVARIANTS
2808639c9550SJeff Roberson 	if (!skip)
2809639c9550SJeff Roberson 		uma_dbg_free(zone, slab, item);
28108355f576SJeff Roberson #endif
2811639c9550SJeff Roberson 
2812ab14a3f7SBrian Feldman 	if (keg->uk_flags & UMA_ZONE_REFCNT) {
2813ab14a3f7SBrian Feldman 		slabref = (uma_slabrefcnt_t)slab;
2814ab14a3f7SBrian Feldman 		slabref->us_freelist[freei].us_item = slab->us_firstfree;
2815ab14a3f7SBrian Feldman 	} else {
2816099a0e58SBosko Milekic 		slab->us_freelist[freei].us_item = slab->us_firstfree;
2817ab14a3f7SBrian Feldman 	}
28188355f576SJeff Roberson 	slab->us_firstfree = freei;
28198355f576SJeff Roberson 	slab->us_freecount++;
28208355f576SJeff Roberson 
28218355f576SJeff Roberson 	/* Zone statistics */
2822099a0e58SBosko Milekic 	keg->uk_free++;
28238355f576SJeff Roberson 
2824e20a199fSJeff Roberson 	clearfull = 0;
2825099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZFLAG_FULL) {
2826e20a199fSJeff Roberson 		if (keg->uk_pages < keg->uk_maxpages) {
2827099a0e58SBosko Milekic 			keg->uk_flags &= ~UMA_ZFLAG_FULL;
2828e20a199fSJeff Roberson 			clearfull = 1;
2829e20a199fSJeff Roberson 		}
2830af7f9b97SJeff Roberson 
283177380291SMohan Srinivasan 		/*
283277380291SMohan Srinivasan 		 * We can handle one more allocation. Since we're clearing ZFLAG_FULL,
283377380291SMohan Srinivasan 		 * wake up all procs blocked on pages. This should be uncommon, so
283477380291SMohan Srinivasan 		 * keeping this simple for now (rather than adding count of blocked
283577380291SMohan Srinivasan 		 * threads etc).
283677380291SMohan Srinivasan 		 */
283777380291SMohan Srinivasan 		wakeup(keg);
2838af7f9b97SJeff Roberson 	}
2839e20a199fSJeff Roberson 	if (clearfull) {
2840e20a199fSJeff Roberson 		zone_relock(zone, keg);
2841e20a199fSJeff Roberson 		zone->uz_flags &= ~UMA_ZFLAG_FULL;
2842e20a199fSJeff Roberson 		wakeup(zone);
2843605cbd6aSJeff Roberson 		ZONE_UNLOCK(zone);
2844e20a199fSJeff Roberson 	} else
2845e20a199fSJeff Roberson 		KEG_UNLOCK(keg);
28468355f576SJeff Roberson }
28478355f576SJeff Roberson 
28488355f576SJeff Roberson /* See uma.h */
28491c6cae97SLawrence Stewart int
2850736ee590SJeff Roberson uma_zone_set_max(uma_zone_t zone, int nitems)
2851736ee590SJeff Roberson {
2852099a0e58SBosko Milekic 	uma_keg_t keg;
2853099a0e58SBosko Milekic 
2854736ee590SJeff Roberson 	ZONE_LOCK(zone);
2855e20a199fSJeff Roberson 	keg = zone_first_keg(zone);
2856e20a199fSJeff Roberson 	keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
2857099a0e58SBosko Milekic 	if (keg->uk_maxpages * keg->uk_ipers < nitems)
2858e20a199fSJeff Roberson 		keg->uk_maxpages += keg->uk_ppera;
28591c6cae97SLawrence Stewart 	nitems = keg->uk_maxpages * keg->uk_ipers;
2860736ee590SJeff Roberson 	ZONE_UNLOCK(zone);
28611c6cae97SLawrence Stewart 
28621c6cae97SLawrence Stewart 	return (nitems);
2863736ee590SJeff Roberson }
2864736ee590SJeff Roberson 
2865736ee590SJeff Roberson /* See uma.h */
2866e49471b0SAndre Oppermann int
2867e49471b0SAndre Oppermann uma_zone_get_max(uma_zone_t zone)
2868e49471b0SAndre Oppermann {
2869e49471b0SAndre Oppermann 	int nitems;
2870e49471b0SAndre Oppermann 	uma_keg_t keg;
2871e49471b0SAndre Oppermann 
2872e49471b0SAndre Oppermann 	ZONE_LOCK(zone);
2873e49471b0SAndre Oppermann 	keg = zone_first_keg(zone);
2874e49471b0SAndre Oppermann 	nitems = keg->uk_maxpages * keg->uk_ipers;
2875e49471b0SAndre Oppermann 	ZONE_UNLOCK(zone);
2876e49471b0SAndre Oppermann 
2877e49471b0SAndre Oppermann 	return (nitems);
2878e49471b0SAndre Oppermann }
2879e49471b0SAndre Oppermann 
2880e49471b0SAndre Oppermann /* See uma.h */
2881c4ae7908SLawrence Stewart int
2882c4ae7908SLawrence Stewart uma_zone_get_cur(uma_zone_t zone)
2883c4ae7908SLawrence Stewart {
2884c4ae7908SLawrence Stewart 	int64_t nitems;
2885c4ae7908SLawrence Stewart 	u_int i;
2886c4ae7908SLawrence Stewart 
2887c4ae7908SLawrence Stewart 	ZONE_LOCK(zone);
2888c4ae7908SLawrence Stewart 	nitems = zone->uz_allocs - zone->uz_frees;
2889c4ae7908SLawrence Stewart 	CPU_FOREACH(i) {
2890c4ae7908SLawrence Stewart 		/*
2891c4ae7908SLawrence Stewart 		 * See the comment in sysctl_vm_zone_stats() regarding the
2892c4ae7908SLawrence Stewart 		 * safety of accessing the per-cpu caches. With the zone lock
2893c4ae7908SLawrence Stewart 		 * held, it is safe, but can potentially result in stale data.
2894c4ae7908SLawrence Stewart 		 */
2895c4ae7908SLawrence Stewart 		nitems += zone->uz_cpu[i].uc_allocs -
2896c4ae7908SLawrence Stewart 		    zone->uz_cpu[i].uc_frees;
2897c4ae7908SLawrence Stewart 	}
2898c4ae7908SLawrence Stewart 	ZONE_UNLOCK(zone);
2899c4ae7908SLawrence Stewart 
2900c4ae7908SLawrence Stewart 	return (nitems < 0 ? 0 : nitems);
2901c4ae7908SLawrence Stewart }
2902c4ae7908SLawrence Stewart 
2903c4ae7908SLawrence Stewart /* See uma.h */
2904736ee590SJeff Roberson void
2905099a0e58SBosko Milekic uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2906099a0e58SBosko Milekic {
2907e20a199fSJeff Roberson 	uma_keg_t keg;
2908e20a199fSJeff Roberson 
2909099a0e58SBosko Milekic 	ZONE_LOCK(zone);
2910e20a199fSJeff Roberson 	keg = zone_first_keg(zone);
2911e20a199fSJeff Roberson 	KASSERT(keg->uk_pages == 0,
2912099a0e58SBosko Milekic 	    ("uma_zone_set_init on non-empty keg"));
2913e20a199fSJeff Roberson 	keg->uk_init = uminit;
2914099a0e58SBosko Milekic 	ZONE_UNLOCK(zone);
2915099a0e58SBosko Milekic }
2916099a0e58SBosko Milekic 
2917099a0e58SBosko Milekic /* See uma.h */
2918099a0e58SBosko Milekic void
2919099a0e58SBosko Milekic uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2920099a0e58SBosko Milekic {
2921e20a199fSJeff Roberson 	uma_keg_t keg;
2922e20a199fSJeff Roberson 
2923099a0e58SBosko Milekic 	ZONE_LOCK(zone);
2924e20a199fSJeff Roberson 	keg = zone_first_keg(zone);
2925e20a199fSJeff Roberson 	KASSERT(keg->uk_pages == 0,
2926099a0e58SBosko Milekic 	    ("uma_zone_set_fini on non-empty keg"));
2927e20a199fSJeff Roberson 	keg->uk_fini = fini;
2928099a0e58SBosko Milekic 	ZONE_UNLOCK(zone);
2929099a0e58SBosko Milekic }
2930099a0e58SBosko Milekic 
2931099a0e58SBosko Milekic /* See uma.h */
2932099a0e58SBosko Milekic void
2933099a0e58SBosko Milekic uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2934099a0e58SBosko Milekic {
2935099a0e58SBosko Milekic 	ZONE_LOCK(zone);
2936e20a199fSJeff Roberson 	KASSERT(zone_first_keg(zone)->uk_pages == 0,
2937099a0e58SBosko Milekic 	    ("uma_zone_set_zinit on non-empty keg"));
2938099a0e58SBosko Milekic 	zone->uz_init = zinit;
2939099a0e58SBosko Milekic 	ZONE_UNLOCK(zone);
2940099a0e58SBosko Milekic }
2941099a0e58SBosko Milekic 
2942099a0e58SBosko Milekic /* See uma.h */
2943099a0e58SBosko Milekic void
2944099a0e58SBosko Milekic uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2945099a0e58SBosko Milekic {
2946099a0e58SBosko Milekic 	ZONE_LOCK(zone);
2947e20a199fSJeff Roberson 	KASSERT(zone_first_keg(zone)->uk_pages == 0,
2948099a0e58SBosko Milekic 	    ("uma_zone_set_zfini on non-empty keg"));
2949099a0e58SBosko Milekic 	zone->uz_fini = zfini;
2950099a0e58SBosko Milekic 	ZONE_UNLOCK(zone);
2951099a0e58SBosko Milekic }
2952099a0e58SBosko Milekic 
2953099a0e58SBosko Milekic /* See uma.h */
2954b23f72e9SBrian Feldman /* XXX uk_freef is not actually used with the zone locked */
2955099a0e58SBosko Milekic void
29568355f576SJeff Roberson uma_zone_set_freef(uma_zone_t zone, uma_free freef)
29578355f576SJeff Roberson {
2958e20a199fSJeff Roberson 
29598355f576SJeff Roberson 	ZONE_LOCK(zone);
2960e20a199fSJeff Roberson 	zone_first_keg(zone)->uk_freef = freef;
29618355f576SJeff Roberson 	ZONE_UNLOCK(zone);
29628355f576SJeff Roberson }
29638355f576SJeff Roberson 
29648355f576SJeff Roberson /* See uma.h */
2965b23f72e9SBrian Feldman /* XXX uk_allocf is not actually used with the zone locked */
29668355f576SJeff Roberson void
29678355f576SJeff Roberson uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
29688355f576SJeff Roberson {
2969e20a199fSJeff Roberson 	uma_keg_t keg;
2970e20a199fSJeff Roberson 
29718355f576SJeff Roberson 	ZONE_LOCK(zone);
2972e20a199fSJeff Roberson 	keg = zone_first_keg(zone);
2973e20a199fSJeff Roberson 	keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2974e20a199fSJeff Roberson 	keg->uk_allocf = allocf;
29758355f576SJeff Roberson 	ZONE_UNLOCK(zone);
29768355f576SJeff Roberson }
29778355f576SJeff Roberson 
29788355f576SJeff Roberson /* See uma.h */
29798355f576SJeff Roberson int
29808355f576SJeff Roberson uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
29818355f576SJeff Roberson {
2982099a0e58SBosko Milekic 	uma_keg_t keg;
29838355f576SJeff Roberson 	vm_offset_t kva;
2984099a0e58SBosko Milekic 	int pages;
29858355f576SJeff Roberson 
2986e20a199fSJeff Roberson 	keg = zone_first_keg(zone);
2987099a0e58SBosko Milekic 	pages = count / keg->uk_ipers;
29888355f576SJeff Roberson 
2989099a0e58SBosko Milekic 	if (pages * keg->uk_ipers < count)
29908355f576SJeff Roberson 		pages++;
2991a553d4b8SJeff Roberson 
29925285558aSAlan Cox 	kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
29938355f576SJeff Roberson 
2994d1f42ac2SAlan Cox 	if (kva == 0)
29958355f576SJeff Roberson 		return (0);
2996451033a4SAlan Cox 	if (obj == NULL)
2997451033a4SAlan Cox 		obj = vm_object_allocate(OBJT_PHYS, pages);
2998451033a4SAlan Cox 	else {
29995285558aSAlan Cox 		VM_OBJECT_LOCK_INIT(obj, "uma object");
3000451033a4SAlan Cox 		_vm_object_allocate(OBJT_PHYS, pages, obj);
300182774d80SAlan Cox 	}
3002a553d4b8SJeff Roberson 	ZONE_LOCK(zone);
3003099a0e58SBosko Milekic 	keg->uk_kva = kva;
3004099a0e58SBosko Milekic 	keg->uk_obj = obj;
3005099a0e58SBosko Milekic 	keg->uk_maxpages = pages;
3006099a0e58SBosko Milekic 	keg->uk_allocf = obj_alloc;
3007099a0e58SBosko Milekic 	keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
30088355f576SJeff Roberson 	ZONE_UNLOCK(zone);
30098355f576SJeff Roberson 	return (1);
30108355f576SJeff Roberson }
30118355f576SJeff Roberson 
30128355f576SJeff Roberson /* See uma.h */
30138355f576SJeff Roberson void
30148355f576SJeff Roberson uma_prealloc(uma_zone_t zone, int items)
30158355f576SJeff Roberson {
30168355f576SJeff Roberson 	int slabs;
30178355f576SJeff Roberson 	uma_slab_t slab;
3018099a0e58SBosko Milekic 	uma_keg_t keg;
30198355f576SJeff Roberson 
3020e20a199fSJeff Roberson 	keg = zone_first_keg(zone);
30218355f576SJeff Roberson 	ZONE_LOCK(zone);
3022099a0e58SBosko Milekic 	slabs = items / keg->uk_ipers;
3023099a0e58SBosko Milekic 	if (slabs * keg->uk_ipers < items)
30248355f576SJeff Roberson 		slabs++;
30258355f576SJeff Roberson 	while (slabs > 0) {
3026e20a199fSJeff Roberson 		slab = keg_alloc_slab(keg, zone, M_WAITOK);
3027e20a199fSJeff Roberson 		if (slab == NULL)
3028e20a199fSJeff Roberson 			break;
3029e20a199fSJeff Roberson 		MPASS(slab->us_keg == keg);
3030099a0e58SBosko Milekic 		LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
30318355f576SJeff Roberson 		slabs--;
30328355f576SJeff Roberson 	}
30338355f576SJeff Roberson 	ZONE_UNLOCK(zone);
30348355f576SJeff Roberson }
30358355f576SJeff Roberson 
30368355f576SJeff Roberson /* See uma.h */
3037099a0e58SBosko Milekic u_int32_t *
3038099a0e58SBosko Milekic uma_find_refcnt(uma_zone_t zone, void *item)
3039099a0e58SBosko Milekic {
3040ab14a3f7SBrian Feldman 	uma_slabrefcnt_t slabref;
3041099a0e58SBosko Milekic 	uma_keg_t keg;
3042099a0e58SBosko Milekic 	u_int32_t *refcnt;
3043099a0e58SBosko Milekic 	int idx;
3044099a0e58SBosko Milekic 
3045ab14a3f7SBrian Feldman 	slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
3046ab14a3f7SBrian Feldman 	    (~UMA_SLAB_MASK));
3047e20a199fSJeff Roberson 	keg = slabref->us_keg;
3048ab14a3f7SBrian Feldman 	KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
3049099a0e58SBosko Milekic 	    ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
3050ab14a3f7SBrian Feldman 	idx = ((unsigned long)item - (unsigned long)slabref->us_data)
3051099a0e58SBosko Milekic 	    / keg->uk_rsize;
3052ab14a3f7SBrian Feldman 	refcnt = &slabref->us_freelist[idx].us_refcnt;
3053099a0e58SBosko Milekic 	return refcnt;
3054099a0e58SBosko Milekic }
3055099a0e58SBosko Milekic 
3056099a0e58SBosko Milekic /* See uma.h */
30578355f576SJeff Roberson void
30588355f576SJeff Roberson uma_reclaim(void)
30598355f576SJeff Roberson {
30608355f576SJeff Roberson #ifdef UMA_DEBUG
30618355f576SJeff Roberson 	printf("UMA: vm asked us to release pages!\n");
30628355f576SJeff Roberson #endif
306386bbae32SJeff Roberson 	bucket_enable();
30648355f576SJeff Roberson 	zone_foreach(zone_drain);
30658355f576SJeff Roberson 	/*
30668355f576SJeff Roberson 	 * Some slabs may have been freed but this zone will be visited early
30678355f576SJeff Roberson 	 * we visit again so that we can free pages that are empty once other
30688355f576SJeff Roberson 	 * zones are drained.  We have to do the same for buckets.
30698355f576SJeff Roberson 	 */
30709643769aSJeff Roberson 	zone_drain(slabzone);
3071099a0e58SBosko Milekic 	zone_drain(slabrefzone);
3072cae33c14SJeff Roberson 	bucket_zone_drain();
30738355f576SJeff Roberson }
30748355f576SJeff Roberson 
3075663b416fSJohn Baldwin /* See uma.h */
3076663b416fSJohn Baldwin int
3077663b416fSJohn Baldwin uma_zone_exhausted(uma_zone_t zone)
3078663b416fSJohn Baldwin {
3079663b416fSJohn Baldwin 	int full;
3080663b416fSJohn Baldwin 
3081663b416fSJohn Baldwin 	ZONE_LOCK(zone);
3082e20a199fSJeff Roberson 	full = (zone->uz_flags & UMA_ZFLAG_FULL);
3083663b416fSJohn Baldwin 	ZONE_UNLOCK(zone);
3084663b416fSJohn Baldwin 	return (full);
3085663b416fSJohn Baldwin }
3086663b416fSJohn Baldwin 
30876c125b8dSMohan Srinivasan int
30886c125b8dSMohan Srinivasan uma_zone_exhausted_nolock(uma_zone_t zone)
30896c125b8dSMohan Srinivasan {
3090e20a199fSJeff Roberson 	return (zone->uz_flags & UMA_ZFLAG_FULL);
30916c125b8dSMohan Srinivasan }
30926c125b8dSMohan Srinivasan 
30938355f576SJeff Roberson void *
30948355f576SJeff Roberson uma_large_malloc(int size, int wait)
30958355f576SJeff Roberson {
30968355f576SJeff Roberson 	void *mem;
30978355f576SJeff Roberson 	uma_slab_t slab;
30988355f576SJeff Roberson 	u_int8_t flags;
30998355f576SJeff Roberson 
3100e20a199fSJeff Roberson 	slab = zone_alloc_item(slabzone, NULL, wait);
31018355f576SJeff Roberson 	if (slab == NULL)
31028355f576SJeff Roberson 		return (NULL);
31038355f576SJeff Roberson 	mem = page_alloc(NULL, size, &flags, wait);
31048355f576SJeff Roberson 	if (mem) {
310599571dc3SJeff Roberson 		vsetslab((vm_offset_t)mem, slab);
31068355f576SJeff Roberson 		slab->us_data = mem;
31078355f576SJeff Roberson 		slab->us_flags = flags | UMA_SLAB_MALLOC;
31088355f576SJeff Roberson 		slab->us_size = size;
31098355f576SJeff Roberson 	} else {
3110e20a199fSJeff Roberson 		zone_free_item(slabzone, slab, NULL, SKIP_NONE,
3111f4ff923bSRobert Watson 		    ZFREE_STATFAIL | ZFREE_STATFREE);
31128355f576SJeff Roberson 	}
31138355f576SJeff Roberson 
31148355f576SJeff Roberson 	return (mem);
31158355f576SJeff Roberson }
31168355f576SJeff Roberson 
31178355f576SJeff Roberson void
31188355f576SJeff Roberson uma_large_free(uma_slab_t slab)
31198355f576SJeff Roberson {
312099571dc3SJeff Roberson 	vsetobj((vm_offset_t)slab->us_data, kmem_object);
31218355f576SJeff Roberson 	page_free(slab->us_data, slab->us_size, slab->us_flags);
3122e20a199fSJeff Roberson 	zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE);
31238355f576SJeff Roberson }
31248355f576SJeff Roberson 
31258355f576SJeff Roberson void
31268355f576SJeff Roberson uma_print_stats(void)
31278355f576SJeff Roberson {
31288355f576SJeff Roberson 	zone_foreach(uma_print_zone);
31298355f576SJeff Roberson }
31308355f576SJeff Roberson 
3131504d5de3SJeff Roberson static void
3132504d5de3SJeff Roberson slab_print(uma_slab_t slab)
3133504d5de3SJeff Roberson {
3134099a0e58SBosko Milekic 	printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
3135099a0e58SBosko Milekic 		slab->us_keg, slab->us_data, slab->us_freecount,
3136504d5de3SJeff Roberson 		slab->us_firstfree);
3137504d5de3SJeff Roberson }
3138504d5de3SJeff Roberson 
3139504d5de3SJeff Roberson static void
3140504d5de3SJeff Roberson cache_print(uma_cache_t cache)
3141504d5de3SJeff Roberson {
3142504d5de3SJeff Roberson 	printf("alloc: %p(%d), free: %p(%d)\n",
3143504d5de3SJeff Roberson 		cache->uc_allocbucket,
3144504d5de3SJeff Roberson 		cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3145504d5de3SJeff Roberson 		cache->uc_freebucket,
3146504d5de3SJeff Roberson 		cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3147504d5de3SJeff Roberson }
3148504d5de3SJeff Roberson 
3149e20a199fSJeff Roberson static void
3150e20a199fSJeff Roberson uma_print_keg(uma_keg_t keg)
31518355f576SJeff Roberson {
3152504d5de3SJeff Roberson 	uma_slab_t slab;
3153504d5de3SJeff Roberson 
3154e20a199fSJeff Roberson 	printf("keg: %s(%p) size %d(%d) flags %d ipers %d ppera %d "
3155e20a199fSJeff Roberson 	    "out %d free %d limit %d\n",
3156e20a199fSJeff Roberson 	    keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3157099a0e58SBosko Milekic 	    keg->uk_ipers, keg->uk_ppera,
3158e20a199fSJeff Roberson 	    (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free,
3159e20a199fSJeff Roberson 	    (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3160504d5de3SJeff Roberson 	printf("Part slabs:\n");
3161099a0e58SBosko Milekic 	LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3162504d5de3SJeff Roberson 		slab_print(slab);
3163504d5de3SJeff Roberson 	printf("Free slabs:\n");
3164099a0e58SBosko Milekic 	LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3165504d5de3SJeff Roberson 		slab_print(slab);
3166504d5de3SJeff Roberson 	printf("Full slabs:\n");
3167099a0e58SBosko Milekic 	LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3168504d5de3SJeff Roberson 		slab_print(slab);
3169e20a199fSJeff Roberson }
3170e20a199fSJeff Roberson 
3171e20a199fSJeff Roberson void
3172e20a199fSJeff Roberson uma_print_zone(uma_zone_t zone)
3173e20a199fSJeff Roberson {
3174e20a199fSJeff Roberson 	uma_cache_t cache;
3175e20a199fSJeff Roberson 	uma_klink_t kl;
3176e20a199fSJeff Roberson 	int i;
3177e20a199fSJeff Roberson 
3178e20a199fSJeff Roberson 	printf("zone: %s(%p) size %d flags %d\n",
3179e20a199fSJeff Roberson 	    zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3180e20a199fSJeff Roberson 	LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3181e20a199fSJeff Roberson 		uma_print_keg(kl->kl_keg);
31823aa6d94eSJohn Baldwin 	CPU_FOREACH(i) {
3183504d5de3SJeff Roberson 		cache = &zone->uz_cpu[i];
3184504d5de3SJeff Roberson 		printf("CPU %d Cache:\n", i);
3185504d5de3SJeff Roberson 		cache_print(cache);
3186504d5de3SJeff Roberson 	}
31878355f576SJeff Roberson }
31888355f576SJeff Roberson 
3189a0d4b0aeSRobert Watson #ifdef DDB
31908355f576SJeff Roberson /*
31917a52a97eSRobert Watson  * Generate statistics across both the zone and its per-cpu cache's.  Return
31927a52a97eSRobert Watson  * desired statistics if the pointer is non-NULL for that statistic.
31937a52a97eSRobert Watson  *
31947a52a97eSRobert Watson  * Note: does not update the zone statistics, as it can't safely clear the
31957a52a97eSRobert Watson  * per-CPU cache statistic.
31967a52a97eSRobert Watson  *
31977a52a97eSRobert Watson  * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
31987a52a97eSRobert Watson  * safe from off-CPU; we should modify the caches to track this information
31997a52a97eSRobert Watson  * directly so that we don't have to.
32007a52a97eSRobert Watson  */
32017a52a97eSRobert Watson static void
32027a52a97eSRobert Watson uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
3203bf965959SSean Bruno     u_int64_t *freesp, u_int64_t *sleepsp)
32047a52a97eSRobert Watson {
32057a52a97eSRobert Watson 	uma_cache_t cache;
3206bf965959SSean Bruno 	u_int64_t allocs, frees, sleeps;
32077a52a97eSRobert Watson 	int cachefree, cpu;
32087a52a97eSRobert Watson 
3209bf965959SSean Bruno 	allocs = frees = sleeps = 0;
32107a52a97eSRobert Watson 	cachefree = 0;
32113aa6d94eSJohn Baldwin 	CPU_FOREACH(cpu) {
32127a52a97eSRobert Watson 		cache = &z->uz_cpu[cpu];
32137a52a97eSRobert Watson 		if (cache->uc_allocbucket != NULL)
32147a52a97eSRobert Watson 			cachefree += cache->uc_allocbucket->ub_cnt;
32157a52a97eSRobert Watson 		if (cache->uc_freebucket != NULL)
32167a52a97eSRobert Watson 			cachefree += cache->uc_freebucket->ub_cnt;
32177a52a97eSRobert Watson 		allocs += cache->uc_allocs;
32187a52a97eSRobert Watson 		frees += cache->uc_frees;
32197a52a97eSRobert Watson 	}
32207a52a97eSRobert Watson 	allocs += z->uz_allocs;
32217a52a97eSRobert Watson 	frees += z->uz_frees;
3222bf965959SSean Bruno 	sleeps += z->uz_sleeps;
32237a52a97eSRobert Watson 	if (cachefreep != NULL)
32247a52a97eSRobert Watson 		*cachefreep = cachefree;
32257a52a97eSRobert Watson 	if (allocsp != NULL)
32267a52a97eSRobert Watson 		*allocsp = allocs;
32277a52a97eSRobert Watson 	if (freesp != NULL)
32287a52a97eSRobert Watson 		*freesp = frees;
3229bf965959SSean Bruno 	if (sleepsp != NULL)
3230bf965959SSean Bruno 		*sleepsp = sleeps;
32317a52a97eSRobert Watson }
3232a0d4b0aeSRobert Watson #endif /* DDB */
32337a52a97eSRobert Watson 
32347a52a97eSRobert Watson static int
32357a52a97eSRobert Watson sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
32367a52a97eSRobert Watson {
32377a52a97eSRobert Watson 	uma_keg_t kz;
32387a52a97eSRobert Watson 	uma_zone_t z;
32397a52a97eSRobert Watson 	int count;
32407a52a97eSRobert Watson 
32417a52a97eSRobert Watson 	count = 0;
32427a52a97eSRobert Watson 	mtx_lock(&uma_mtx);
32437a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
32447a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
32457a52a97eSRobert Watson 			count++;
32467a52a97eSRobert Watson 	}
32477a52a97eSRobert Watson 	mtx_unlock(&uma_mtx);
32487a52a97eSRobert Watson 	return (sysctl_handle_int(oidp, &count, 0, req));
32497a52a97eSRobert Watson }
32507a52a97eSRobert Watson 
32517a52a97eSRobert Watson static int
32527a52a97eSRobert Watson sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
32537a52a97eSRobert Watson {
32547a52a97eSRobert Watson 	struct uma_stream_header ush;
32557a52a97eSRobert Watson 	struct uma_type_header uth;
32567a52a97eSRobert Watson 	struct uma_percpu_stat ups;
32577a52a97eSRobert Watson 	uma_bucket_t bucket;
32587a52a97eSRobert Watson 	struct sbuf sbuf;
32597a52a97eSRobert Watson 	uma_cache_t cache;
3260e20a199fSJeff Roberson 	uma_klink_t kl;
32617a52a97eSRobert Watson 	uma_keg_t kz;
32627a52a97eSRobert Watson 	uma_zone_t z;
3263e20a199fSJeff Roberson 	uma_keg_t k;
32644e657159SMatthew D Fleming 	int count, error, i;
32657a52a97eSRobert Watson 
326600f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
326700f0e671SMatthew D Fleming 	if (error != 0)
326800f0e671SMatthew D Fleming 		return (error);
32694e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
32704e657159SMatthew D Fleming 
3271404a593eSMatthew D Fleming 	count = 0;
32724e657159SMatthew D Fleming 	mtx_lock(&uma_mtx);
32737a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
32747a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
32757a52a97eSRobert Watson 			count++;
32767a52a97eSRobert Watson 	}
32777a52a97eSRobert Watson 
32787a52a97eSRobert Watson 	/*
32797a52a97eSRobert Watson 	 * Insert stream header.
32807a52a97eSRobert Watson 	 */
32817a52a97eSRobert Watson 	bzero(&ush, sizeof(ush));
32827a52a97eSRobert Watson 	ush.ush_version = UMA_STREAM_VERSION;
3283ab3a57c0SRobert Watson 	ush.ush_maxcpus = (mp_maxid + 1);
32847a52a97eSRobert Watson 	ush.ush_count = count;
32854e657159SMatthew D Fleming 	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
32867a52a97eSRobert Watson 
32877a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
32887a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
32897a52a97eSRobert Watson 			bzero(&uth, sizeof(uth));
32907a52a97eSRobert Watson 			ZONE_LOCK(z);
3291cbbb4a00SRobert Watson 			strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
32927a52a97eSRobert Watson 			uth.uth_align = kz->uk_align;
32937a52a97eSRobert Watson 			uth.uth_size = kz->uk_size;
32947a52a97eSRobert Watson 			uth.uth_rsize = kz->uk_rsize;
3295e20a199fSJeff Roberson 			LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3296e20a199fSJeff Roberson 				k = kl->kl_keg;
3297e20a199fSJeff Roberson 				uth.uth_maxpages += k->uk_maxpages;
3298e20a199fSJeff Roberson 				uth.uth_pages += k->uk_pages;
3299e20a199fSJeff Roberson 				uth.uth_keg_free += k->uk_free;
3300e20a199fSJeff Roberson 				uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3301e20a199fSJeff Roberson 				    * k->uk_ipers;
3302e20a199fSJeff Roberson 			}
3303cbbb4a00SRobert Watson 
3304cbbb4a00SRobert Watson 			/*
3305cbbb4a00SRobert Watson 			 * A zone is secondary is it is not the first entry
3306cbbb4a00SRobert Watson 			 * on the keg's zone list.
3307cbbb4a00SRobert Watson 			 */
3308e20a199fSJeff Roberson 			if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3309cbbb4a00SRobert Watson 			    (LIST_FIRST(&kz->uk_zones) != z))
3310cbbb4a00SRobert Watson 				uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3311cbbb4a00SRobert Watson 
33127a52a97eSRobert Watson 			LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
33137a52a97eSRobert Watson 				uth.uth_zone_free += bucket->ub_cnt;
33147a52a97eSRobert Watson 			uth.uth_allocs = z->uz_allocs;
33157a52a97eSRobert Watson 			uth.uth_frees = z->uz_frees;
33162019094aSRobert Watson 			uth.uth_fails = z->uz_fails;
3317bf965959SSean Bruno 			uth.uth_sleeps = z->uz_sleeps;
33184e657159SMatthew D Fleming 			(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
33197a52a97eSRobert Watson 			/*
33202450bbb8SRobert Watson 			 * While it is not normally safe to access the cache
33212450bbb8SRobert Watson 			 * bucket pointers while not on the CPU that owns the
33222450bbb8SRobert Watson 			 * cache, we only allow the pointers to be exchanged
33232450bbb8SRobert Watson 			 * without the zone lock held, not invalidated, so
33242450bbb8SRobert Watson 			 * accept the possible race associated with bucket
33252450bbb8SRobert Watson 			 * exchange during monitoring.
33267a52a97eSRobert Watson 			 */
3327ab3a57c0SRobert Watson 			for (i = 0; i < (mp_maxid + 1); i++) {
33287a52a97eSRobert Watson 				bzero(&ups, sizeof(ups));
33297a52a97eSRobert Watson 				if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
33307a52a97eSRobert Watson 					goto skip;
3331082dc776SRobert Watson 				if (CPU_ABSENT(i))
3332082dc776SRobert Watson 					goto skip;
33337a52a97eSRobert Watson 				cache = &z->uz_cpu[i];
33347a52a97eSRobert Watson 				if (cache->uc_allocbucket != NULL)
33357a52a97eSRobert Watson 					ups.ups_cache_free +=
33367a52a97eSRobert Watson 					    cache->uc_allocbucket->ub_cnt;
33377a52a97eSRobert Watson 				if (cache->uc_freebucket != NULL)
33387a52a97eSRobert Watson 					ups.ups_cache_free +=
33397a52a97eSRobert Watson 					    cache->uc_freebucket->ub_cnt;
33407a52a97eSRobert Watson 				ups.ups_allocs = cache->uc_allocs;
33417a52a97eSRobert Watson 				ups.ups_frees = cache->uc_frees;
33427a52a97eSRobert Watson skip:
33434e657159SMatthew D Fleming 				(void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
33447a52a97eSRobert Watson 			}
33452450bbb8SRobert Watson 			ZONE_UNLOCK(z);
33467a52a97eSRobert Watson 		}
33477a52a97eSRobert Watson 	}
33487a52a97eSRobert Watson 	mtx_unlock(&uma_mtx);
33494e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
33504e657159SMatthew D Fleming 	sbuf_delete(&sbuf);
33517a52a97eSRobert Watson 	return (error);
33527a52a97eSRobert Watson }
335348c5777eSRobert Watson 
335448c5777eSRobert Watson #ifdef DDB
335548c5777eSRobert Watson DB_SHOW_COMMAND(uma, db_show_uma)
335648c5777eSRobert Watson {
3357bf965959SSean Bruno 	u_int64_t allocs, frees, sleeps;
335848c5777eSRobert Watson 	uma_bucket_t bucket;
335948c5777eSRobert Watson 	uma_keg_t kz;
336048c5777eSRobert Watson 	uma_zone_t z;
336148c5777eSRobert Watson 	int cachefree;
336248c5777eSRobert Watson 
3363bf965959SSean Bruno 	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3364bf965959SSean Bruno 	    "Requests", "Sleeps");
336548c5777eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
336648c5777eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
336748c5777eSRobert Watson 			if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
336848c5777eSRobert Watson 				allocs = z->uz_allocs;
336948c5777eSRobert Watson 				frees = z->uz_frees;
3370bf965959SSean Bruno 				sleeps = z->uz_sleeps;
337148c5777eSRobert Watson 				cachefree = 0;
337248c5777eSRobert Watson 			} else
337348c5777eSRobert Watson 				uma_zone_sumstat(z, &cachefree, &allocs,
3374bf965959SSean Bruno 				    &frees, &sleeps);
3375e20a199fSJeff Roberson 			if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
337648c5777eSRobert Watson 			    (LIST_FIRST(&kz->uk_zones) != z)))
337748c5777eSRobert Watson 				cachefree += kz->uk_free;
337848c5777eSRobert Watson 			LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
337948c5777eSRobert Watson 				cachefree += bucket->ub_cnt;
3380bf965959SSean Bruno 			db_printf("%18s %8ju %8jd %8d %12ju %8ju\n", z->uz_name,
3381ae4e9636SRobert Watson 			    (uintmax_t)kz->uk_size,
3382ae4e9636SRobert Watson 			    (intmax_t)(allocs - frees), cachefree,
3383bf965959SSean Bruno 			    (uintmax_t)allocs, sleeps);
338448c5777eSRobert Watson 		}
338548c5777eSRobert Watson 	}
338648c5777eSRobert Watson }
338748c5777eSRobert Watson #endif
3388