xref: /freebsd/sys/vm/uma_core.c (revision 3639ac42e5f1e80708c957366d0db6f23fa584ab)
160727d8bSWarner Losh /*-
2fe267a55SPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3fe267a55SPedro F. Giffuni  *
4584061b4SJeff Roberson  * Copyright (c) 2002-2019 Jeffrey Roberson <jeff@FreeBSD.org>
508ecce74SRobert Watson  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6ae4e9636SRobert Watson  * Copyright (c) 2004-2006 Robert N. M. Watson
708ecce74SRobert Watson  * All rights reserved.
88355f576SJeff Roberson  *
98355f576SJeff Roberson  * Redistribution and use in source and binary forms, with or without
108355f576SJeff Roberson  * modification, are permitted provided that the following conditions
118355f576SJeff Roberson  * are met:
128355f576SJeff Roberson  * 1. Redistributions of source code must retain the above copyright
138355f576SJeff Roberson  *    notice unmodified, this list of conditions, and the following
148355f576SJeff Roberson  *    disclaimer.
158355f576SJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
168355f576SJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
178355f576SJeff Roberson  *    documentation and/or other materials provided with the distribution.
188355f576SJeff Roberson  *
198355f576SJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
208355f576SJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
218355f576SJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
228355f576SJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
238355f576SJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
248355f576SJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
258355f576SJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
268355f576SJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
278355f576SJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
288355f576SJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
298355f576SJeff Roberson  */
308355f576SJeff Roberson 
318355f576SJeff Roberson /*
328355f576SJeff Roberson  * uma_core.c  Implementation of the Universal Memory allocator
338355f576SJeff Roberson  *
348355f576SJeff Roberson  * This allocator is intended to replace the multitude of similar object caches
358355f576SJeff Roberson  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
36763df3ecSPedro F. Giffuni  * efficient.  A primary design goal is to return unused memory to the rest of
378355f576SJeff Roberson  * the system.  This will make the system as a whole more flexible due to the
388355f576SJeff Roberson  * ability to move memory to subsystems which most need it instead of leaving
398355f576SJeff Roberson  * pools of reserved memory unused.
408355f576SJeff Roberson  *
418355f576SJeff Roberson  * The basic ideas stem from similar slab/zone based allocators whose algorithms
428355f576SJeff Roberson  * are well known.
438355f576SJeff Roberson  *
448355f576SJeff Roberson  */
458355f576SJeff Roberson 
468355f576SJeff Roberson /*
478355f576SJeff Roberson  * TODO:
488355f576SJeff Roberson  *	- Improve memory usage for large allocations
498355f576SJeff Roberson  *	- Investigate cache size adjustments
508355f576SJeff Roberson  */
518355f576SJeff Roberson 
52874651b1SDavid E. O'Brien #include <sys/cdefs.h>
53874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$");
54874651b1SDavid E. O'Brien 
5548c5777eSRobert Watson #include "opt_ddb.h"
568355f576SJeff Roberson #include "opt_param.h"
578d689e04SGleb Smirnoff #include "opt_vm.h"
5848c5777eSRobert Watson 
598355f576SJeff Roberson #include <sys/param.h>
608355f576SJeff Roberson #include <sys/systm.h>
61ef72505eSJeff Roberson #include <sys/bitset.h>
62194a979eSMark Johnston #include <sys/domainset.h>
639b43bc27SAndriy Gapon #include <sys/eventhandler.h>
648355f576SJeff Roberson #include <sys/kernel.h>
658355f576SJeff Roberson #include <sys/types.h>
66ad5b0f5bSJeff Roberson #include <sys/limits.h>
678355f576SJeff Roberson #include <sys/queue.h>
688355f576SJeff Roberson #include <sys/malloc.h>
693659f747SRobert Watson #include <sys/ktr.h>
708355f576SJeff Roberson #include <sys/lock.h>
718355f576SJeff Roberson #include <sys/sysctl.h>
728355f576SJeff Roberson #include <sys/mutex.h>
734c1cc01cSJohn Baldwin #include <sys/proc.h>
7410cb2424SMark Murray #include <sys/random.h>
7589f6b863SAttilio Rao #include <sys/rwlock.h>
767a52a97eSRobert Watson #include <sys/sbuf.h>
77a2de44abSAlexander Motin #include <sys/sched.h>
788355f576SJeff Roberson #include <sys/smp.h>
79e60b2fcbSGleb Smirnoff #include <sys/taskqueue.h>
8086bbae32SJeff Roberson #include <sys/vmmeter.h>
8186bbae32SJeff Roberson 
828355f576SJeff Roberson #include <vm/vm.h>
83194a979eSMark Johnston #include <vm/vm_domainset.h>
848355f576SJeff Roberson #include <vm/vm_object.h>
858355f576SJeff Roberson #include <vm/vm_page.h>
86a4915c21SAttilio Rao #include <vm/vm_pageout.h>
878355f576SJeff Roberson #include <vm/vm_param.h>
88ab3185d1SJeff Roberson #include <vm/vm_phys.h>
8930c5525bSAndrew Gallatin #include <vm/vm_pagequeue.h>
908355f576SJeff Roberson #include <vm/vm_map.h>
918355f576SJeff Roberson #include <vm/vm_kern.h>
928355f576SJeff Roberson #include <vm/vm_extern.h>
938355f576SJeff Roberson #include <vm/uma.h>
948355f576SJeff Roberson #include <vm/uma_int.h>
95639c9550SJeff Roberson #include <vm/uma_dbg.h>
968355f576SJeff Roberson 
9748c5777eSRobert Watson #include <ddb/ddb.h>
9848c5777eSRobert Watson 
998d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
1008d689e04SGleb Smirnoff #include <vm/memguard.h>
1018d689e04SGleb Smirnoff #endif
1028d689e04SGleb Smirnoff 
1038355f576SJeff Roberson /*
104ab3185d1SJeff Roberson  * This is the zone and keg from which all zones are spawned.
1058355f576SJeff Roberson  */
106ab3185d1SJeff Roberson static uma_zone_t kegs;
107ab3185d1SJeff Roberson static uma_zone_t zones;
1088355f576SJeff Roberson 
109ab3185d1SJeff Roberson /* This is the zone from which all offpage uma_slab_ts are allocated. */
1108355f576SJeff Roberson static uma_zone_t slabzone;
1118355f576SJeff Roberson 
1128355f576SJeff Roberson /*
1138355f576SJeff Roberson  * The initial hash tables come out of this zone so they can be allocated
1148355f576SJeff Roberson  * prior to malloc coming up.
1158355f576SJeff Roberson  */
1168355f576SJeff Roberson static uma_zone_t hashzone;
1178355f576SJeff Roberson 
1181e319f6dSRobert Watson /* The boot-time adjusted value for cache line alignment. */
119e4cd31ddSJeff Roberson int uma_align_cache = 64 - 1;
1201e319f6dSRobert Watson 
121961647dfSJeff Roberson static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
12220a4e154SJeff Roberson static MALLOC_DEFINE(M_UMA, "UMA", "UMA Misc");
123961647dfSJeff Roberson 
1248355f576SJeff Roberson /*
12586bbae32SJeff Roberson  * Are we allowed to allocate buckets?
12686bbae32SJeff Roberson  */
12786bbae32SJeff Roberson static int bucketdisable = 1;
12886bbae32SJeff Roberson 
129099a0e58SBosko Milekic /* Linked list of all kegs in the system */
13013e403fdSAntoine Brodin static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
1318355f576SJeff Roberson 
13203175483SAlexander Motin /* Linked list of all cache-only zones in the system */
13303175483SAlexander Motin static LIST_HEAD(,uma_zone) uma_cachezones =
13403175483SAlexander Motin     LIST_HEAD_INITIALIZER(uma_cachezones);
13503175483SAlexander Motin 
136111fbcd5SBryan Venteicher /* This RW lock protects the keg list */
137fe933c1dSMateusz Guzik static struct rwlock_padalign __exclusive_cache_line uma_rwlock;
1388355f576SJeff Roberson 
139ac0a6fd0SGleb Smirnoff /*
140ac0a6fd0SGleb Smirnoff  * Pointer and counter to pool of pages, that is preallocated at
141f7d35785SGleb Smirnoff  * startup to bootstrap UMA.
142ac0a6fd0SGleb Smirnoff  */
143ac0a6fd0SGleb Smirnoff static char *bootmem;
144ac0a6fd0SGleb Smirnoff static int boot_pages;
1458355f576SJeff Roberson 
14608cfa56eSMark Johnston static struct sx uma_reclaim_lock;
14795c4bf75SKonstantin Belousov 
148fbd95859SMark Johnston /*
149fbd95859SMark Johnston  * kmem soft limit, initialized by uma_set_limit().  Ensure that early
150fbd95859SMark Johnston  * allocations don't trigger a wakeup of the reclaim thread.
151fbd95859SMark Johnston  */
1526d6a03d7SJeff Roberson unsigned long uma_kmem_limit = LONG_MAX;
153fbd95859SMark Johnston SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_limit, CTLFLAG_RD, &uma_kmem_limit, 0,
154fbd95859SMark Johnston     "UMA kernel memory soft limit");
1556d6a03d7SJeff Roberson unsigned long uma_kmem_total;
156fbd95859SMark Johnston SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD, &uma_kmem_total, 0,
157fbd95859SMark Johnston     "UMA kernel memory usage");
1582e47807cSJeff Roberson 
1598355f576SJeff Roberson /* Is the VM done starting up? */
160f4bef67cSGleb Smirnoff static enum { BOOT_COLD = 0, BOOT_STRAPPED, BOOT_PAGEALLOC, BOOT_BUCKETS,
161f4bef67cSGleb Smirnoff     BOOT_RUNNING } booted = BOOT_COLD;
1628355f576SJeff Roberson 
163ef72505eSJeff Roberson /*
1649643769aSJeff Roberson  * This is the handle used to schedule events that need to happen
1659643769aSJeff Roberson  * outside of the allocation fast path.
1669643769aSJeff Roberson  */
1678355f576SJeff Roberson static struct callout uma_callout;
1689643769aSJeff Roberson #define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
1698355f576SJeff Roberson 
1708355f576SJeff Roberson /*
1718355f576SJeff Roberson  * This structure is passed as the zone ctor arg so that I don't have to create
1728355f576SJeff Roberson  * a special allocation function just for zones.
1738355f576SJeff Roberson  */
1748355f576SJeff Roberson struct uma_zctor_args {
175bb196eb4SMatthew D Fleming 	const char *name;
176c3bdc05fSAndrew R. Reiter 	size_t size;
1778355f576SJeff Roberson 	uma_ctor ctor;
1788355f576SJeff Roberson 	uma_dtor dtor;
1798355f576SJeff Roberson 	uma_init uminit;
1808355f576SJeff Roberson 	uma_fini fini;
1810095a784SJeff Roberson 	uma_import import;
1820095a784SJeff Roberson 	uma_release release;
1830095a784SJeff Roberson 	void *arg;
184099a0e58SBosko Milekic 	uma_keg_t keg;
185099a0e58SBosko Milekic 	int align;
18685dcf349SGleb Smirnoff 	uint32_t flags;
187099a0e58SBosko Milekic };
188099a0e58SBosko Milekic 
189099a0e58SBosko Milekic struct uma_kctor_args {
190099a0e58SBosko Milekic 	uma_zone_t zone;
191099a0e58SBosko Milekic 	size_t size;
192099a0e58SBosko Milekic 	uma_init uminit;
193099a0e58SBosko Milekic 	uma_fini fini;
1948355f576SJeff Roberson 	int align;
19585dcf349SGleb Smirnoff 	uint32_t flags;
1968355f576SJeff Roberson };
1978355f576SJeff Roberson 
198cae33c14SJeff Roberson struct uma_bucket_zone {
199cae33c14SJeff Roberson 	uma_zone_t	ubz_zone;
200cae33c14SJeff Roberson 	char		*ubz_name;
201fc03d22bSJeff Roberson 	int		ubz_entries;	/* Number of items it can hold. */
202fc03d22bSJeff Roberson 	int		ubz_maxsize;	/* Maximum allocation size per-item. */
203cae33c14SJeff Roberson };
204cae33c14SJeff Roberson 
205f9d27e75SRobert Watson /*
206fc03d22bSJeff Roberson  * Compute the actual number of bucket entries to pack them in power
207fc03d22bSJeff Roberson  * of two sizes for more efficient space utilization.
208f9d27e75SRobert Watson  */
209fc03d22bSJeff Roberson #define	BUCKET_SIZE(n)						\
210fc03d22bSJeff Roberson     (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
211fc03d22bSJeff Roberson 
2121aa6c758SAlexander Motin #define	BUCKET_MAX	BUCKET_SIZE(256)
213eda1b016SJeff Roberson #define	BUCKET_MIN	BUCKET_SIZE(4)
214fc03d22bSJeff Roberson 
215fc03d22bSJeff Roberson struct uma_bucket_zone bucket_zones[] = {
2166fd34d6fSJeff Roberson 	{ NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
217f3932e90SAlexander Motin 	{ NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
2186fd34d6fSJeff Roberson 	{ NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
219f3932e90SAlexander Motin 	{ NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
2206fd34d6fSJeff Roberson 	{ NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
221fc03d22bSJeff Roberson 	{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
222fc03d22bSJeff Roberson 	{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
223fc03d22bSJeff Roberson 	{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
2241aa6c758SAlexander Motin 	{ NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
225fc03d22bSJeff Roberson 	{ NULL, NULL, 0}
226fc03d22bSJeff Roberson };
227cae33c14SJeff Roberson 
2282019094aSRobert Watson /*
2292019094aSRobert Watson  * Flags and enumerations to be passed to internal functions.
2302019094aSRobert Watson  */
231bb15d1c7SGleb Smirnoff enum zfreeskip {
232bb15d1c7SGleb Smirnoff 	SKIP_NONE =	0,
233bb15d1c7SGleb Smirnoff 	SKIP_CNT =	0x00000001,
234bb15d1c7SGleb Smirnoff 	SKIP_DTOR =	0x00010000,
235bb15d1c7SGleb Smirnoff 	SKIP_FINI =	0x00020000,
236bb15d1c7SGleb Smirnoff };
237b23f72e9SBrian Feldman 
2388355f576SJeff Roberson /* Prototypes.. */
2398355f576SJeff Roberson 
240f4bef67cSGleb Smirnoff int	uma_startup_count(int);
241f4bef67cSGleb Smirnoff void	uma_startup(void *, int);
242f4bef67cSGleb Smirnoff void	uma_startup1(void);
243f4bef67cSGleb Smirnoff void	uma_startup2(void);
244f4bef67cSGleb Smirnoff 
245ab3185d1SJeff Roberson static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
246ab3185d1SJeff Roberson static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
247ab3059a8SMatt Macy static void *pcpu_page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
248ab3185d1SJeff Roberson static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
249f2c2231eSRyan Stone static void page_free(void *, vm_size_t, uint8_t);
250ab3059a8SMatt Macy static void pcpu_page_free(void *, vm_size_t, uint8_t);
25186220393SMark Johnston static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int);
2529643769aSJeff Roberson static void cache_drain(uma_zone_t);
2538355f576SJeff Roberson static void bucket_drain(uma_zone_t, uma_bucket_t);
25408cfa56eSMark Johnston static void bucket_cache_reclaim(uma_zone_t zone, bool);
255b23f72e9SBrian Feldman static int keg_ctor(void *, int, void *, int);
256099a0e58SBosko Milekic static void keg_dtor(void *, int, void *);
257b23f72e9SBrian Feldman static int zone_ctor(void *, int, void *, int);
2589c2cd7e5SJeff Roberson static void zone_dtor(void *, int, void *);
259b23f72e9SBrian Feldman static int zero_init(void *, int, int);
260e20a199fSJeff Roberson static void keg_small_init(uma_keg_t keg);
261e20a199fSJeff Roberson static void keg_large_init(uma_keg_t keg);
26220a4e154SJeff Roberson static void zone_foreach(void (*zfunc)(uma_zone_t, void *), void *);
26320a4e154SJeff Roberson static void zone_timeout(uma_zone_t zone, void *);
2643b2f2cb8SAlexander Motin static int hash_alloc(struct uma_hash *, u_int);
2650aef6126SJeff Roberson static int hash_expand(struct uma_hash *, struct uma_hash *);
2660aef6126SJeff Roberson static void hash_free(struct uma_hash *hash);
2678355f576SJeff Roberson static void uma_timeout(void *);
2688355f576SJeff Roberson static void uma_startup3(void);
269ab3185d1SJeff Roberson static void *zone_alloc_item(uma_zone_t, void *, int, int);
270bb15d1c7SGleb Smirnoff static void *zone_alloc_item_locked(uma_zone_t, void *, int, int);
2710095a784SJeff Roberson static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
27286bbae32SJeff Roberson static void bucket_enable(void);
273cae33c14SJeff Roberson static void bucket_init(void);
2746fd34d6fSJeff Roberson static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
2756fd34d6fSJeff Roberson static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
276cae33c14SJeff Roberson static void bucket_zone_drain(void);
277beb8beefSJeff Roberson static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
2780095a784SJeff Roberson static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
279bb15d1c7SGleb Smirnoff static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item);
280e20a199fSJeff Roberson static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
28185dcf349SGleb Smirnoff     uma_fini fini, int align, uint32_t flags);
282b75c4efcSAndrew Turner static int zone_import(void *, void **, int, int, int);
283b75c4efcSAndrew Turner static void zone_release(void *, void **, int);
284ab3185d1SJeff Roberson static void uma_zero_item(void *, uma_zone_t);
285beb8beefSJeff Roberson static bool cache_alloc(uma_zone_t, uma_cache_t, void *, int);
2860a81b439SJeff Roberson static bool cache_free(uma_zone_t, uma_cache_t, void *, void *, int);
287bbee39c6SJeff Roberson 
2887a52a97eSRobert Watson static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
2897a52a97eSRobert Watson static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
29020a4e154SJeff Roberson static int sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS);
29120a4e154SJeff Roberson static int sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS);
2926d204a6aSRyan Libby static int sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS);
293f7af5015SRyan Libby static int sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS);
2948355f576SJeff Roberson 
2959542ea7bSGleb Smirnoff #ifdef INVARIANTS
296815db204SRyan Libby static inline struct noslabbits *slab_dbg_bits(uma_slab_t slab, uma_keg_t keg);
297815db204SRyan Libby 
298c5deaf04SGleb Smirnoff static bool uma_dbg_kskip(uma_keg_t keg, void *mem);
299c5deaf04SGleb Smirnoff static bool uma_dbg_zskip(uma_zone_t zone, void *mem);
3009542ea7bSGleb Smirnoff static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
3019542ea7bSGleb Smirnoff static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
302c5deaf04SGleb Smirnoff 
303c5deaf04SGleb Smirnoff static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD, 0,
304c5deaf04SGleb Smirnoff     "Memory allocation debugging");
305c5deaf04SGleb Smirnoff 
306c5deaf04SGleb Smirnoff static u_int dbg_divisor = 1;
307c5deaf04SGleb Smirnoff SYSCTL_UINT(_vm_debug, OID_AUTO, divisor,
308c5deaf04SGleb Smirnoff     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0,
309c5deaf04SGleb Smirnoff     "Debug & thrash every this item in memory allocator");
310c5deaf04SGleb Smirnoff 
311c5deaf04SGleb Smirnoff static counter_u64_t uma_dbg_cnt = EARLY_COUNTER;
312c5deaf04SGleb Smirnoff static counter_u64_t uma_skip_cnt = EARLY_COUNTER;
313c5deaf04SGleb Smirnoff SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD,
314c5deaf04SGleb Smirnoff     &uma_dbg_cnt, "memory items debugged");
315c5deaf04SGleb Smirnoff SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD,
316c5deaf04SGleb Smirnoff     &uma_skip_cnt, "memory items skipped, not debugged");
3179542ea7bSGleb Smirnoff #endif
3189542ea7bSGleb Smirnoff 
3198355f576SJeff Roberson SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
3208355f576SJeff Roberson 
32135ec24f3SRyan Libby SYSCTL_NODE(_vm, OID_AUTO, uma, CTLFLAG_RW, 0, "Universal Memory Allocator");
32235ec24f3SRyan Libby 
3237a52a97eSRobert Watson SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
3247a52a97eSRobert Watson     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
3257a52a97eSRobert Watson 
3267a52a97eSRobert Watson SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
3277a52a97eSRobert Watson     0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
3287a52a97eSRobert Watson 
3292f891cd5SPawel Jakub Dawidek static int zone_warnings = 1;
330af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
3312f891cd5SPawel Jakub Dawidek     "Warn when UMA zones becomes full");
3322f891cd5SPawel Jakub Dawidek 
33386bbae32SJeff Roberson /*
33486bbae32SJeff Roberson  * This routine checks to see whether or not it's safe to enable buckets.
33586bbae32SJeff Roberson  */
33686bbae32SJeff Roberson static void
33786bbae32SJeff Roberson bucket_enable(void)
33886bbae32SJeff Roberson {
3393182660aSRyan Libby 
3403182660aSRyan Libby 	KASSERT(booted >= BOOT_BUCKETS, ("Bucket enable before init"));
341251386b4SMaksim Yevmenkin 	bucketdisable = vm_page_count_min();
34286bbae32SJeff Roberson }
34386bbae32SJeff Roberson 
344dc2c7965SRobert Watson /*
345dc2c7965SRobert Watson  * Initialize bucket_zones, the array of zones of buckets of various sizes.
346dc2c7965SRobert Watson  *
347dc2c7965SRobert Watson  * For each zone, calculate the memory required for each bucket, consisting
348fc03d22bSJeff Roberson  * of the header and an array of pointers.
349dc2c7965SRobert Watson  */
350cae33c14SJeff Roberson static void
351cae33c14SJeff Roberson bucket_init(void)
352cae33c14SJeff Roberson {
353cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
354cae33c14SJeff Roberson 	int size;
355cae33c14SJeff Roberson 
356d74e6a1dSAlan Cox 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
357cae33c14SJeff Roberson 		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
358cae33c14SJeff Roberson 		size += sizeof(void *) * ubz->ubz_entries;
359cae33c14SJeff Roberson 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
360e20a199fSJeff Roberson 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
361ab3185d1SJeff Roberson 		    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET | UMA_ZONE_NUMA);
362cae33c14SJeff Roberson 	}
363cae33c14SJeff Roberson }
364cae33c14SJeff Roberson 
365dc2c7965SRobert Watson /*
366dc2c7965SRobert Watson  * Given a desired number of entries for a bucket, return the zone from which
367dc2c7965SRobert Watson  * to allocate the bucket.
368dc2c7965SRobert Watson  */
369dc2c7965SRobert Watson static struct uma_bucket_zone *
370dc2c7965SRobert Watson bucket_zone_lookup(int entries)
371dc2c7965SRobert Watson {
372fc03d22bSJeff Roberson 	struct uma_bucket_zone *ubz;
373dc2c7965SRobert Watson 
374fc03d22bSJeff Roberson 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
375fc03d22bSJeff Roberson 		if (ubz->ubz_entries >= entries)
376fc03d22bSJeff Roberson 			return (ubz);
377fc03d22bSJeff Roberson 	ubz--;
378fc03d22bSJeff Roberson 	return (ubz);
379fc03d22bSJeff Roberson }
380fc03d22bSJeff Roberson 
381003cf08bSMark Johnston static struct uma_bucket_zone *
382003cf08bSMark Johnston bucket_zone_max(uma_zone_t zone, int nitems)
383003cf08bSMark Johnston {
384003cf08bSMark Johnston 	struct uma_bucket_zone *ubz;
385003cf08bSMark Johnston 	int bpcpu;
386003cf08bSMark Johnston 
387003cf08bSMark Johnston 	bpcpu = 2;
388003cf08bSMark Johnston #ifdef UMA_XDOMAIN
389003cf08bSMark Johnston 	if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
390003cf08bSMark Johnston 		/* Count the cross-domain bucket. */
391003cf08bSMark Johnston 		bpcpu++;
392003cf08bSMark Johnston #endif
393003cf08bSMark Johnston 
394003cf08bSMark Johnston 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
395003cf08bSMark Johnston 		if (ubz->ubz_entries * bpcpu * mp_ncpus > nitems)
396003cf08bSMark Johnston 			break;
397003cf08bSMark Johnston 	if (ubz == &bucket_zones[0])
398003cf08bSMark Johnston 		ubz = NULL;
399003cf08bSMark Johnston 	else
400003cf08bSMark Johnston 		ubz--;
401003cf08bSMark Johnston 	return (ubz);
402003cf08bSMark Johnston }
403003cf08bSMark Johnston 
404fc03d22bSJeff Roberson static int
405fc03d22bSJeff Roberson bucket_select(int size)
406fc03d22bSJeff Roberson {
407fc03d22bSJeff Roberson 	struct uma_bucket_zone *ubz;
408fc03d22bSJeff Roberson 
409fc03d22bSJeff Roberson 	ubz = &bucket_zones[0];
410fc03d22bSJeff Roberson 	if (size > ubz->ubz_maxsize)
411fc03d22bSJeff Roberson 		return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
412fc03d22bSJeff Roberson 
413fc03d22bSJeff Roberson 	for (; ubz->ubz_entries != 0; ubz++)
414fc03d22bSJeff Roberson 		if (ubz->ubz_maxsize < size)
415fc03d22bSJeff Roberson 			break;
416fc03d22bSJeff Roberson 	ubz--;
417fc03d22bSJeff Roberson 	return (ubz->ubz_entries);
418dc2c7965SRobert Watson }
419dc2c7965SRobert Watson 
420cae33c14SJeff Roberson static uma_bucket_t
4216fd34d6fSJeff Roberson bucket_alloc(uma_zone_t zone, void *udata, int flags)
422cae33c14SJeff Roberson {
423cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
424cae33c14SJeff Roberson 	uma_bucket_t bucket;
425cae33c14SJeff Roberson 
426cae33c14SJeff Roberson 	/*
427cae33c14SJeff Roberson 	 * This is to stop us from allocating per cpu buckets while we're
4283803b26bSDag-Erling Smørgrav 	 * running out of vm.boot_pages.  Otherwise, we would exhaust the
429cae33c14SJeff Roberson 	 * boot pages.  This also prevents us from allocating buckets in
430cae33c14SJeff Roberson 	 * low memory situations.
431cae33c14SJeff Roberson 	 */
432cae33c14SJeff Roberson 	if (bucketdisable)
433cae33c14SJeff Roberson 		return (NULL);
4346fd34d6fSJeff Roberson 	/*
4356fd34d6fSJeff Roberson 	 * To limit bucket recursion we store the original zone flags
4366fd34d6fSJeff Roberson 	 * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
4376fd34d6fSJeff Roberson 	 * NOVM flag to persist even through deep recursions.  We also
4386fd34d6fSJeff Roberson 	 * store ZFLAG_BUCKET once we have recursed attempting to allocate
4396fd34d6fSJeff Roberson 	 * a bucket for a bucket zone so we do not allow infinite bucket
4406fd34d6fSJeff Roberson 	 * recursion.  This cookie will even persist to frees of unused
4416fd34d6fSJeff Roberson 	 * buckets via the allocation path or bucket allocations in the
4426fd34d6fSJeff Roberson 	 * free path.
4436fd34d6fSJeff Roberson 	 */
4446fd34d6fSJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
4456fd34d6fSJeff Roberson 		udata = (void *)(uintptr_t)zone->uz_flags;
446e8a720feSAlexander Motin 	else {
447e8a720feSAlexander Motin 		if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
448e8a720feSAlexander Motin 			return (NULL);
4496fd34d6fSJeff Roberson 		udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
450e8a720feSAlexander Motin 	}
4516fd34d6fSJeff Roberson 	if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
452af526374SJeff Roberson 		flags |= M_NOVM;
45320a4e154SJeff Roberson 	ubz = bucket_zone_lookup(zone->uz_bucket_size);
45420d3ab87SAlexander Motin 	if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
45520d3ab87SAlexander Motin 		ubz++;
4566fd34d6fSJeff Roberson 	bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
457cae33c14SJeff Roberson 	if (bucket) {
458cae33c14SJeff Roberson #ifdef INVARIANTS
459cae33c14SJeff Roberson 		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
460cae33c14SJeff Roberson #endif
461cae33c14SJeff Roberson 		bucket->ub_cnt = 0;
462cae33c14SJeff Roberson 		bucket->ub_entries = ubz->ubz_entries;
463cae33c14SJeff Roberson 	}
464cae33c14SJeff Roberson 
465cae33c14SJeff Roberson 	return (bucket);
466cae33c14SJeff Roberson }
467cae33c14SJeff Roberson 
468cae33c14SJeff Roberson static void
4696fd34d6fSJeff Roberson bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
470cae33c14SJeff Roberson {
471cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
472cae33c14SJeff Roberson 
473fc03d22bSJeff Roberson 	KASSERT(bucket->ub_cnt == 0,
474fc03d22bSJeff Roberson 	    ("bucket_free: Freeing a non free bucket."));
4756fd34d6fSJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
4766fd34d6fSJeff Roberson 		udata = (void *)(uintptr_t)zone->uz_flags;
477dc2c7965SRobert Watson 	ubz = bucket_zone_lookup(bucket->ub_entries);
4786fd34d6fSJeff Roberson 	uma_zfree_arg(ubz->ubz_zone, bucket, udata);
479cae33c14SJeff Roberson }
480cae33c14SJeff Roberson 
481cae33c14SJeff Roberson static void
482cae33c14SJeff Roberson bucket_zone_drain(void)
483cae33c14SJeff Roberson {
484cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
485cae33c14SJeff Roberson 
486cae33c14SJeff Roberson 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
48708cfa56eSMark Johnston 		uma_zone_reclaim(ubz->ubz_zone, UMA_RECLAIM_DRAIN);
488cae33c14SJeff Roberson }
489cae33c14SJeff Roberson 
49008cfa56eSMark Johnston /*
49108cfa56eSMark Johnston  * Attempt to satisfy an allocation by retrieving a full bucket from one of the
49208cfa56eSMark Johnston  * zone's caches.
49308cfa56eSMark Johnston  */
4940f9b7bf3SMark Johnston static uma_bucket_t
49508cfa56eSMark Johnston zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom)
4960f9b7bf3SMark Johnston {
4970f9b7bf3SMark Johnston 	uma_bucket_t bucket;
4980f9b7bf3SMark Johnston 
4990f9b7bf3SMark Johnston 	ZONE_LOCK_ASSERT(zone);
5000f9b7bf3SMark Johnston 
50108cfa56eSMark Johnston 	if ((bucket = TAILQ_FIRST(&zdom->uzd_buckets)) != NULL) {
5020f9b7bf3SMark Johnston 		MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
50308cfa56eSMark Johnston 		TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
5040f9b7bf3SMark Johnston 		zdom->uzd_nitems -= bucket->ub_cnt;
50508cfa56eSMark Johnston 		if (zdom->uzd_imin > zdom->uzd_nitems)
5060f9b7bf3SMark Johnston 			zdom->uzd_imin = zdom->uzd_nitems;
507bb15d1c7SGleb Smirnoff 		zone->uz_bkt_count -= bucket->ub_cnt;
5080f9b7bf3SMark Johnston 	}
5090f9b7bf3SMark Johnston 	return (bucket);
5100f9b7bf3SMark Johnston }
5110f9b7bf3SMark Johnston 
51208cfa56eSMark Johnston /*
51308cfa56eSMark Johnston  * Insert a full bucket into the specified cache.  The "ws" parameter indicates
51408cfa56eSMark Johnston  * whether the bucket's contents should be counted as part of the zone's working
51508cfa56eSMark Johnston  * set.
51608cfa56eSMark Johnston  */
5170f9b7bf3SMark Johnston static void
5180f9b7bf3SMark Johnston zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket,
5190f9b7bf3SMark Johnston     const bool ws)
5200f9b7bf3SMark Johnston {
5210f9b7bf3SMark Johnston 
5220f9b7bf3SMark Johnston 	ZONE_LOCK_ASSERT(zone);
52308034d10SKonstantin Belousov 	KASSERT(!ws || zone->uz_bkt_count < zone->uz_bkt_max,
52408034d10SKonstantin Belousov 	    ("%s: zone %p overflow", __func__, zone));
5250f9b7bf3SMark Johnston 
52608cfa56eSMark Johnston 	if (ws)
52708cfa56eSMark Johnston 		TAILQ_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
52808cfa56eSMark Johnston 	else
52908cfa56eSMark Johnston 		TAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link);
5300f9b7bf3SMark Johnston 	zdom->uzd_nitems += bucket->ub_cnt;
5310f9b7bf3SMark Johnston 	if (ws && zdom->uzd_imax < zdom->uzd_nitems)
5320f9b7bf3SMark Johnston 		zdom->uzd_imax = zdom->uzd_nitems;
533bb15d1c7SGleb Smirnoff 	zone->uz_bkt_count += bucket->ub_cnt;
5340f9b7bf3SMark Johnston }
5350f9b7bf3SMark Johnston 
5362f891cd5SPawel Jakub Dawidek static void
5372f891cd5SPawel Jakub Dawidek zone_log_warning(uma_zone_t zone)
5382f891cd5SPawel Jakub Dawidek {
5392f891cd5SPawel Jakub Dawidek 	static const struct timeval warninterval = { 300, 0 };
5402f891cd5SPawel Jakub Dawidek 
5412f891cd5SPawel Jakub Dawidek 	if (!zone_warnings || zone->uz_warning == NULL)
5422f891cd5SPawel Jakub Dawidek 		return;
5432f891cd5SPawel Jakub Dawidek 
5442f891cd5SPawel Jakub Dawidek 	if (ratecheck(&zone->uz_ratecheck, &warninterval))
5452f891cd5SPawel Jakub Dawidek 		printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
5462f891cd5SPawel Jakub Dawidek }
5472f891cd5SPawel Jakub Dawidek 
54854503a13SJonathan T. Looney static inline void
54954503a13SJonathan T. Looney zone_maxaction(uma_zone_t zone)
55054503a13SJonathan T. Looney {
551e60b2fcbSGleb Smirnoff 
552e60b2fcbSGleb Smirnoff 	if (zone->uz_maxaction.ta_func != NULL)
553e60b2fcbSGleb Smirnoff 		taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
55454503a13SJonathan T. Looney }
55554503a13SJonathan T. Looney 
5568355f576SJeff Roberson /*
5578355f576SJeff Roberson  * Routine called by timeout which is used to fire off some time interval
5589643769aSJeff Roberson  * based calculations.  (stats, hash size, etc.)
5598355f576SJeff Roberson  *
5608355f576SJeff Roberson  * Arguments:
5618355f576SJeff Roberson  *	arg   Unused
5628355f576SJeff Roberson  *
5638355f576SJeff Roberson  * Returns:
5648355f576SJeff Roberson  *	Nothing
5658355f576SJeff Roberson  */
5668355f576SJeff Roberson static void
5678355f576SJeff Roberson uma_timeout(void *unused)
5688355f576SJeff Roberson {
56986bbae32SJeff Roberson 	bucket_enable();
57020a4e154SJeff Roberson 	zone_foreach(zone_timeout, NULL);
5718355f576SJeff Roberson 
5728355f576SJeff Roberson 	/* Reschedule this event */
5739643769aSJeff Roberson 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
5748355f576SJeff Roberson }
5758355f576SJeff Roberson 
5768355f576SJeff Roberson /*
5770f9b7bf3SMark Johnston  * Update the working set size estimate for the zone's bucket cache.
5780f9b7bf3SMark Johnston  * The constants chosen here are somewhat arbitrary.  With an update period of
5790f9b7bf3SMark Johnston  * 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the
5800f9b7bf3SMark Johnston  * last 100s.
5810f9b7bf3SMark Johnston  */
5820f9b7bf3SMark Johnston static void
5830f9b7bf3SMark Johnston zone_domain_update_wss(uma_zone_domain_t zdom)
5840f9b7bf3SMark Johnston {
5850f9b7bf3SMark Johnston 	long wss;
5860f9b7bf3SMark Johnston 
5870f9b7bf3SMark Johnston 	MPASS(zdom->uzd_imax >= zdom->uzd_imin);
5880f9b7bf3SMark Johnston 	wss = zdom->uzd_imax - zdom->uzd_imin;
5890f9b7bf3SMark Johnston 	zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
59008cfa56eSMark Johnston 	zdom->uzd_wss = (4 * wss + zdom->uzd_wss) / 5;
5910f9b7bf3SMark Johnston }
5920f9b7bf3SMark Johnston 
5930f9b7bf3SMark Johnston /*
5949643769aSJeff Roberson  * Routine to perform timeout driven calculations.  This expands the
5959643769aSJeff Roberson  * hashes and does per cpu statistics aggregation.
5968355f576SJeff Roberson  *
597e20a199fSJeff Roberson  *  Returns nothing.
5988355f576SJeff Roberson  */
5998355f576SJeff Roberson static void
60020a4e154SJeff Roberson zone_timeout(uma_zone_t zone, void *unused)
6018355f576SJeff Roberson {
60208034d10SKonstantin Belousov 	uma_keg_t keg;
6033b2f2cb8SAlexander Motin 	u_int slabs;
6048355f576SJeff Roberson 
60571353f7aSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_HASH) == 0)
60608034d10SKonstantin Belousov 		goto update_wss;
60708034d10SKonstantin Belousov 
60808034d10SKonstantin Belousov 	keg = zone->uz_keg;
609e20a199fSJeff Roberson 	KEG_LOCK(keg);
6108355f576SJeff Roberson 	/*
611e20a199fSJeff Roberson 	 * Expand the keg hash table.
6128355f576SJeff Roberson 	 *
6138355f576SJeff Roberson 	 * This is done if the number of slabs is larger than the hash size.
6148355f576SJeff Roberson 	 * What I'm trying to do here is completely reduce collisions.  This
6158355f576SJeff Roberson 	 * may be a little aggressive.  Should I allow for two collisions max?
6168355f576SJeff Roberson 	 */
617099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_HASH &&
6183b2f2cb8SAlexander Motin 	    (slabs = keg->uk_pages / keg->uk_ppera) >
6193b2f2cb8SAlexander Motin 	     keg->uk_hash.uh_hashsize) {
6200aef6126SJeff Roberson 		struct uma_hash newhash;
6210aef6126SJeff Roberson 		struct uma_hash oldhash;
6220aef6126SJeff Roberson 		int ret;
6235300d9ddSJeff Roberson 
6240aef6126SJeff Roberson 		/*
6250aef6126SJeff Roberson 		 * This is so involved because allocating and freeing
626e20a199fSJeff Roberson 		 * while the keg lock is held will lead to deadlock.
6270aef6126SJeff Roberson 		 * I have to do everything in stages and check for
6280aef6126SJeff Roberson 		 * races.
6290aef6126SJeff Roberson 		 */
630e20a199fSJeff Roberson 		KEG_UNLOCK(keg);
6313b2f2cb8SAlexander Motin 		ret = hash_alloc(&newhash, 1 << fls(slabs));
632e20a199fSJeff Roberson 		KEG_LOCK(keg);
6330aef6126SJeff Roberson 		if (ret) {
634099a0e58SBosko Milekic 			if (hash_expand(&keg->uk_hash, &newhash)) {
635099a0e58SBosko Milekic 				oldhash = keg->uk_hash;
636099a0e58SBosko Milekic 				keg->uk_hash = newhash;
6370aef6126SJeff Roberson 			} else
6380aef6126SJeff Roberson 				oldhash = newhash;
6390aef6126SJeff Roberson 
640e20a199fSJeff Roberson 			KEG_UNLOCK(keg);
6410aef6126SJeff Roberson 			hash_free(&oldhash);
642a1dff920SDavide Italiano 			return;
6430aef6126SJeff Roberson 		}
6445300d9ddSJeff Roberson 	}
64508cfa56eSMark Johnston 	KEG_UNLOCK(keg);
646e20a199fSJeff Roberson 
64708034d10SKonstantin Belousov update_wss:
64808cfa56eSMark Johnston 	ZONE_LOCK(zone);
649bb15d1c7SGleb Smirnoff 	for (int i = 0; i < vm_ndomains; i++)
6500f9b7bf3SMark Johnston 		zone_domain_update_wss(&zone->uz_domain[i]);
65108cfa56eSMark Johnston 	ZONE_UNLOCK(zone);
6528355f576SJeff Roberson }
6538355f576SJeff Roberson 
6548355f576SJeff Roberson /*
6555300d9ddSJeff Roberson  * Allocate and zero fill the next sized hash table from the appropriate
6565300d9ddSJeff Roberson  * backing store.
6575300d9ddSJeff Roberson  *
6585300d9ddSJeff Roberson  * Arguments:
6590aef6126SJeff Roberson  *	hash  A new hash structure with the old hash size in uh_hashsize
6605300d9ddSJeff Roberson  *
6615300d9ddSJeff Roberson  * Returns:
662763df3ecSPedro F. Giffuni  *	1 on success and 0 on failure.
6635300d9ddSJeff Roberson  */
66437c84183SPoul-Henning Kamp static int
6653b2f2cb8SAlexander Motin hash_alloc(struct uma_hash *hash, u_int size)
6665300d9ddSJeff Roberson {
66759568a0eSAlexander Motin 	size_t alloc;
6685300d9ddSJeff Roberson 
6693b2f2cb8SAlexander Motin 	KASSERT(powerof2(size), ("hash size must be power of 2"));
6703b2f2cb8SAlexander Motin 	if (size > UMA_HASH_SIZE_INIT)  {
6713b2f2cb8SAlexander Motin 		hash->uh_hashsize = size;
6720aef6126SJeff Roberson 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
6731e0701e1SJeff Roberson 		hash->uh_slab_hash = malloc(alloc, M_UMAHASH, M_NOWAIT);
6745300d9ddSJeff Roberson 	} else {
6750aef6126SJeff Roberson 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
676e20a199fSJeff Roberson 		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
677ab3185d1SJeff Roberson 		    UMA_ANYDOMAIN, M_WAITOK);
6780aef6126SJeff Roberson 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
6795300d9ddSJeff Roberson 	}
6800aef6126SJeff Roberson 	if (hash->uh_slab_hash) {
6810aef6126SJeff Roberson 		bzero(hash->uh_slab_hash, alloc);
6820aef6126SJeff Roberson 		hash->uh_hashmask = hash->uh_hashsize - 1;
6830aef6126SJeff Roberson 		return (1);
6840aef6126SJeff Roberson 	}
6855300d9ddSJeff Roberson 
6860aef6126SJeff Roberson 	return (0);
6875300d9ddSJeff Roberson }
6885300d9ddSJeff Roberson 
6895300d9ddSJeff Roberson /*
69064f051e9SJeff Roberson  * Expands the hash table for HASH zones.  This is done from zone_timeout
69164f051e9SJeff Roberson  * to reduce collisions.  This must not be done in the regular allocation
69264f051e9SJeff Roberson  * path, otherwise, we can recurse on the vm while allocating pages.
6938355f576SJeff Roberson  *
6948355f576SJeff Roberson  * Arguments:
6950aef6126SJeff Roberson  *	oldhash  The hash you want to expand
6960aef6126SJeff Roberson  *	newhash  The hash structure for the new table
6978355f576SJeff Roberson  *
6988355f576SJeff Roberson  * Returns:
6998355f576SJeff Roberson  *	Nothing
7008355f576SJeff Roberson  *
7018355f576SJeff Roberson  * Discussion:
7028355f576SJeff Roberson  */
7030aef6126SJeff Roberson static int
7040aef6126SJeff Roberson hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
7058355f576SJeff Roberson {
7061e0701e1SJeff Roberson 	uma_hash_slab_t slab;
7076929b7d1SPedro F. Giffuni 	u_int hval;
7086929b7d1SPedro F. Giffuni 	u_int idx;
7098355f576SJeff Roberson 
7100aef6126SJeff Roberson 	if (!newhash->uh_slab_hash)
7110aef6126SJeff Roberson 		return (0);
7128355f576SJeff Roberson 
7130aef6126SJeff Roberson 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
7140aef6126SJeff Roberson 		return (0);
7158355f576SJeff Roberson 
7168355f576SJeff Roberson 	/*
7178355f576SJeff Roberson 	 * I need to investigate hash algorithms for resizing without a
7188355f576SJeff Roberson 	 * full rehash.
7198355f576SJeff Roberson 	 */
7208355f576SJeff Roberson 
7216929b7d1SPedro F. Giffuni 	for (idx = 0; idx < oldhash->uh_hashsize; idx++)
7221e0701e1SJeff Roberson 		while (!LIST_EMPTY(&oldhash->uh_slab_hash[idx])) {
7231e0701e1SJeff Roberson 			slab = LIST_FIRST(&oldhash->uh_slab_hash[idx]);
7241e0701e1SJeff Roberson 			LIST_REMOVE(slab, uhs_hlink);
7251e0701e1SJeff Roberson 			hval = UMA_HASH(newhash, slab->uhs_data);
7261e0701e1SJeff Roberson 			LIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
7271e0701e1SJeff Roberson 			    slab, uhs_hlink);
7288355f576SJeff Roberson 		}
7298355f576SJeff Roberson 
7300aef6126SJeff Roberson 	return (1);
7319c2cd7e5SJeff Roberson }
7329c2cd7e5SJeff Roberson 
7335300d9ddSJeff Roberson /*
7345300d9ddSJeff Roberson  * Free the hash bucket to the appropriate backing store.
7355300d9ddSJeff Roberson  *
7365300d9ddSJeff Roberson  * Arguments:
7375300d9ddSJeff Roberson  *	slab_hash  The hash bucket we're freeing
7385300d9ddSJeff Roberson  *	hashsize   The number of entries in that hash bucket
7395300d9ddSJeff Roberson  *
7405300d9ddSJeff Roberson  * Returns:
7415300d9ddSJeff Roberson  *	Nothing
7425300d9ddSJeff Roberson  */
7439c2cd7e5SJeff Roberson static void
7440aef6126SJeff Roberson hash_free(struct uma_hash *hash)
7459c2cd7e5SJeff Roberson {
7460aef6126SJeff Roberson 	if (hash->uh_slab_hash == NULL)
7470aef6126SJeff Roberson 		return;
7480aef6126SJeff Roberson 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
7490095a784SJeff Roberson 		zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
7508355f576SJeff Roberson 	else
751961647dfSJeff Roberson 		free(hash->uh_slab_hash, M_UMAHASH);
7528355f576SJeff Roberson }
7538355f576SJeff Roberson 
7548355f576SJeff Roberson /*
7558355f576SJeff Roberson  * Frees all outstanding items in a bucket
7568355f576SJeff Roberson  *
7578355f576SJeff Roberson  * Arguments:
7588355f576SJeff Roberson  *	zone   The zone to free to, must be unlocked.
7598355f576SJeff Roberson  *	bucket The free/alloc bucket with items, cpu queue must be locked.
7608355f576SJeff Roberson  *
7618355f576SJeff Roberson  * Returns:
7628355f576SJeff Roberson  *	Nothing
7638355f576SJeff Roberson  */
7648355f576SJeff Roberson 
7658355f576SJeff Roberson static void
7668355f576SJeff Roberson bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
7678355f576SJeff Roberson {
7680095a784SJeff Roberson 	int i;
7698355f576SJeff Roberson 
7708355f576SJeff Roberson 	if (bucket == NULL)
7718355f576SJeff Roberson 		return;
7728355f576SJeff Roberson 
7730095a784SJeff Roberson 	if (zone->uz_fini)
7740095a784SJeff Roberson 		for (i = 0; i < bucket->ub_cnt; i++)
7750095a784SJeff Roberson 			zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
7760095a784SJeff Roberson 	zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
777bb45b411SGleb Smirnoff 	if (zone->uz_max_items > 0) {
778bb15d1c7SGleb Smirnoff 		ZONE_LOCK(zone);
779bb15d1c7SGleb Smirnoff 		zone->uz_items -= bucket->ub_cnt;
780bb15d1c7SGleb Smirnoff 		if (zone->uz_sleepers && zone->uz_items < zone->uz_max_items)
781bb15d1c7SGleb Smirnoff 			wakeup_one(zone);
782bb15d1c7SGleb Smirnoff 		ZONE_UNLOCK(zone);
783bb45b411SGleb Smirnoff 	}
7840095a784SJeff Roberson 	bucket->ub_cnt = 0;
7858355f576SJeff Roberson }
7868355f576SJeff Roberson 
7878355f576SJeff Roberson /*
7888355f576SJeff Roberson  * Drains the per cpu caches for a zone.
7898355f576SJeff Roberson  *
7905d1ae027SRobert Watson  * NOTE: This may only be called while the zone is being turn down, and not
7915d1ae027SRobert Watson  * during normal operation.  This is necessary in order that we do not have
7925d1ae027SRobert Watson  * to migrate CPUs to drain the per-CPU caches.
7935d1ae027SRobert Watson  *
7948355f576SJeff Roberson  * Arguments:
7958355f576SJeff Roberson  *	zone     The zone to drain, must be unlocked.
7968355f576SJeff Roberson  *
7978355f576SJeff Roberson  * Returns:
7988355f576SJeff Roberson  *	Nothing
7998355f576SJeff Roberson  */
8008355f576SJeff Roberson static void
8019643769aSJeff Roberson cache_drain(uma_zone_t zone)
8028355f576SJeff Roberson {
8038355f576SJeff Roberson 	uma_cache_t cache;
8048355f576SJeff Roberson 	int cpu;
8058355f576SJeff Roberson 
8068355f576SJeff Roberson 	/*
8075d1ae027SRobert Watson 	 * XXX: It is safe to not lock the per-CPU caches, because we're
8085d1ae027SRobert Watson 	 * tearing down the zone anyway.  I.e., there will be no further use
8095d1ae027SRobert Watson 	 * of the caches at this point.
8105d1ae027SRobert Watson 	 *
8115d1ae027SRobert Watson 	 * XXX: It would good to be able to assert that the zone is being
8125d1ae027SRobert Watson 	 * torn down to prevent improper use of cache_drain().
8135d1ae027SRobert Watson 	 *
81408cfa56eSMark Johnston 	 * XXX: We lock the zone before passing into bucket_cache_reclaim() as
8155d1ae027SRobert Watson 	 * it is used elsewhere.  Should the tear-down path be made special
8165d1ae027SRobert Watson 	 * there in some form?
8178355f576SJeff Roberson 	 */
8183aa6d94eSJohn Baldwin 	CPU_FOREACH(cpu) {
8198355f576SJeff Roberson 		cache = &zone->uz_cpu[cpu];
8208355f576SJeff Roberson 		bucket_drain(zone, cache->uc_allocbucket);
821174ab450SBosko Milekic 		if (cache->uc_allocbucket != NULL)
8226fd34d6fSJeff Roberson 			bucket_free(zone, cache->uc_allocbucket, NULL);
823c1685086SJeff Roberson 		cache->uc_allocbucket = NULL;
824c1685086SJeff Roberson 		bucket_drain(zone, cache->uc_freebucket);
825174ab450SBosko Milekic 		if (cache->uc_freebucket != NULL)
8266fd34d6fSJeff Roberson 			bucket_free(zone, cache->uc_freebucket, NULL);
827c1685086SJeff Roberson 		cache->uc_freebucket = NULL;
828c1685086SJeff Roberson 		bucket_drain(zone, cache->uc_crossbucket);
829c1685086SJeff Roberson 		if (cache->uc_crossbucket != NULL)
830c1685086SJeff Roberson 			bucket_free(zone, cache->uc_crossbucket, NULL);
831c1685086SJeff Roberson 		cache->uc_crossbucket = NULL;
832d56368d7SBosko Milekic 	}
833aaa8bb16SJeff Roberson 	ZONE_LOCK(zone);
83408cfa56eSMark Johnston 	bucket_cache_reclaim(zone, true);
835aaa8bb16SJeff Roberson 	ZONE_UNLOCK(zone);
836aaa8bb16SJeff Roberson }
837aaa8bb16SJeff Roberson 
838a2de44abSAlexander Motin static void
83920a4e154SJeff Roberson cache_shrink(uma_zone_t zone, void *unused)
840a2de44abSAlexander Motin {
841a2de44abSAlexander Motin 
842a2de44abSAlexander Motin 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
843a2de44abSAlexander Motin 		return;
844a2de44abSAlexander Motin 
845a2de44abSAlexander Motin 	ZONE_LOCK(zone);
84620a4e154SJeff Roberson 	zone->uz_bucket_size =
84720a4e154SJeff Roberson 	    (zone->uz_bucket_size_min + zone->uz_bucket_size) / 2;
848a2de44abSAlexander Motin 	ZONE_UNLOCK(zone);
849a2de44abSAlexander Motin }
850a2de44abSAlexander Motin 
851a2de44abSAlexander Motin static void
85220a4e154SJeff Roberson cache_drain_safe_cpu(uma_zone_t zone, void *unused)
853a2de44abSAlexander Motin {
854a2de44abSAlexander Motin 	uma_cache_t cache;
855c1685086SJeff Roberson 	uma_bucket_t b1, b2, b3;
856ab3185d1SJeff Roberson 	int domain;
857a2de44abSAlexander Motin 
858a2de44abSAlexander Motin 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
859a2de44abSAlexander Motin 		return;
860a2de44abSAlexander Motin 
861c1685086SJeff Roberson 	b1 = b2 = b3 = NULL;
862a2de44abSAlexander Motin 	ZONE_LOCK(zone);
863a2de44abSAlexander Motin 	critical_enter();
864ab3185d1SJeff Roberson 	if (zone->uz_flags & UMA_ZONE_NUMA)
865ab3185d1SJeff Roberson 		domain = PCPU_GET(domain);
866ab3185d1SJeff Roberson 	else
867ab3185d1SJeff Roberson 		domain = 0;
868a2de44abSAlexander Motin 	cache = &zone->uz_cpu[curcpu];
869a2de44abSAlexander Motin 	if (cache->uc_allocbucket) {
8708a8d9d14SAlexander Motin 		if (cache->uc_allocbucket->ub_cnt != 0)
8710f9b7bf3SMark Johnston 			zone_put_bucket(zone, &zone->uz_domain[domain],
8720f9b7bf3SMark Johnston 			    cache->uc_allocbucket, false);
8738a8d9d14SAlexander Motin 		else
8748a8d9d14SAlexander Motin 			b1 = cache->uc_allocbucket;
875a2de44abSAlexander Motin 		cache->uc_allocbucket = NULL;
876a2de44abSAlexander Motin 	}
877a2de44abSAlexander Motin 	if (cache->uc_freebucket) {
8788a8d9d14SAlexander Motin 		if (cache->uc_freebucket->ub_cnt != 0)
8790f9b7bf3SMark Johnston 			zone_put_bucket(zone, &zone->uz_domain[domain],
8800f9b7bf3SMark Johnston 			    cache->uc_freebucket, false);
8818a8d9d14SAlexander Motin 		else
8828a8d9d14SAlexander Motin 			b2 = cache->uc_freebucket;
883a2de44abSAlexander Motin 		cache->uc_freebucket = NULL;
884a2de44abSAlexander Motin 	}
885c1685086SJeff Roberson 	b3 = cache->uc_crossbucket;
886c1685086SJeff Roberson 	cache->uc_crossbucket = NULL;
887a2de44abSAlexander Motin 	critical_exit();
888a2de44abSAlexander Motin 	ZONE_UNLOCK(zone);
8898a8d9d14SAlexander Motin 	if (b1)
8908a8d9d14SAlexander Motin 		bucket_free(zone, b1, NULL);
8918a8d9d14SAlexander Motin 	if (b2)
8928a8d9d14SAlexander Motin 		bucket_free(zone, b2, NULL);
893c1685086SJeff Roberson 	if (b3) {
894c1685086SJeff Roberson 		bucket_drain(zone, b3);
895c1685086SJeff Roberson 		bucket_free(zone, b3, NULL);
896c1685086SJeff Roberson 	}
897a2de44abSAlexander Motin }
898a2de44abSAlexander Motin 
899a2de44abSAlexander Motin /*
900a2de44abSAlexander Motin  * Safely drain per-CPU caches of a zone(s) to alloc bucket.
901a2de44abSAlexander Motin  * This is an expensive call because it needs to bind to all CPUs
902a2de44abSAlexander Motin  * one by one and enter a critical section on each of them in order
903a2de44abSAlexander Motin  * to safely access their cache buckets.
904a2de44abSAlexander Motin  * Zone lock must not be held on call this function.
905a2de44abSAlexander Motin  */
906a2de44abSAlexander Motin static void
90708cfa56eSMark Johnston pcpu_cache_drain_safe(uma_zone_t zone)
908a2de44abSAlexander Motin {
909a2de44abSAlexander Motin 	int cpu;
910a2de44abSAlexander Motin 
911a2de44abSAlexander Motin 	/*
912a2de44abSAlexander Motin 	 * Polite bucket sizes shrinking was not enouth, shrink aggressively.
913a2de44abSAlexander Motin 	 */
914a2de44abSAlexander Motin 	if (zone)
91520a4e154SJeff Roberson 		cache_shrink(zone, NULL);
916a2de44abSAlexander Motin 	else
91720a4e154SJeff Roberson 		zone_foreach(cache_shrink, NULL);
918a2de44abSAlexander Motin 
919a2de44abSAlexander Motin 	CPU_FOREACH(cpu) {
920a2de44abSAlexander Motin 		thread_lock(curthread);
921a2de44abSAlexander Motin 		sched_bind(curthread, cpu);
922a2de44abSAlexander Motin 		thread_unlock(curthread);
923a2de44abSAlexander Motin 
924a2de44abSAlexander Motin 		if (zone)
92520a4e154SJeff Roberson 			cache_drain_safe_cpu(zone, NULL);
926a2de44abSAlexander Motin 		else
92720a4e154SJeff Roberson 			zone_foreach(cache_drain_safe_cpu, NULL);
928a2de44abSAlexander Motin 	}
929a2de44abSAlexander Motin 	thread_lock(curthread);
930a2de44abSAlexander Motin 	sched_unbind(curthread);
931a2de44abSAlexander Motin 	thread_unlock(curthread);
932a2de44abSAlexander Motin }
933a2de44abSAlexander Motin 
934aaa8bb16SJeff Roberson /*
93508cfa56eSMark Johnston  * Reclaim cached buckets from a zone.  All buckets are reclaimed if the caller
93608cfa56eSMark Johnston  * requested a drain, otherwise the per-domain caches are trimmed to either
93708cfa56eSMark Johnston  * estimated working set size.
938aaa8bb16SJeff Roberson  */
939aaa8bb16SJeff Roberson static void
94008cfa56eSMark Johnston bucket_cache_reclaim(uma_zone_t zone, bool drain)
941aaa8bb16SJeff Roberson {
942ab3185d1SJeff Roberson 	uma_zone_domain_t zdom;
943aaa8bb16SJeff Roberson 	uma_bucket_t bucket;
94408cfa56eSMark Johnston 	long target, tofree;
945ab3185d1SJeff Roberson 	int i;
9468355f576SJeff Roberson 
947ab3185d1SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
948ab3185d1SJeff Roberson 		zdom = &zone->uz_domain[i];
94908cfa56eSMark Johnston 
95008cfa56eSMark Johnston 		/*
95108cfa56eSMark Johnston 		 * If we were asked to drain the zone, we are done only once
95208cfa56eSMark Johnston 		 * this bucket cache is empty.  Otherwise, we reclaim items in
95308cfa56eSMark Johnston 		 * excess of the zone's estimated working set size.  If the
95408cfa56eSMark Johnston 		 * difference nitems - imin is larger than the WSS estimate,
95508cfa56eSMark Johnston 		 * then the estimate will grow at the end of this interval and
95608cfa56eSMark Johnston 		 * we ignore the historical average.
95708cfa56eSMark Johnston 		 */
95808cfa56eSMark Johnston 		target = drain ? 0 : lmax(zdom->uzd_wss, zdom->uzd_nitems -
95908cfa56eSMark Johnston 		    zdom->uzd_imin);
96008cfa56eSMark Johnston 		while (zdom->uzd_nitems > target) {
96108cfa56eSMark Johnston 			bucket = TAILQ_LAST(&zdom->uzd_buckets, uma_bucketlist);
96208cfa56eSMark Johnston 			if (bucket == NULL)
96308cfa56eSMark Johnston 				break;
96408cfa56eSMark Johnston 			tofree = bucket->ub_cnt;
96508cfa56eSMark Johnston 			TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
96608cfa56eSMark Johnston 			zdom->uzd_nitems -= tofree;
96708cfa56eSMark Johnston 
96808cfa56eSMark Johnston 			/*
96908cfa56eSMark Johnston 			 * Shift the bounds of the current WSS interval to avoid
97008cfa56eSMark Johnston 			 * perturbing the estimate.
97108cfa56eSMark Johnston 			 */
97208cfa56eSMark Johnston 			zdom->uzd_imax -= lmin(zdom->uzd_imax, tofree);
97308cfa56eSMark Johnston 			zdom->uzd_imin -= lmin(zdom->uzd_imin, tofree);
97408cfa56eSMark Johnston 
9758355f576SJeff Roberson 			ZONE_UNLOCK(zone);
9768355f576SJeff Roberson 			bucket_drain(zone, bucket);
9776fd34d6fSJeff Roberson 			bucket_free(zone, bucket, NULL);
9788355f576SJeff Roberson 			ZONE_LOCK(zone);
9798355f576SJeff Roberson 		}
980ab3185d1SJeff Roberson 	}
981ace66b56SAlexander Motin 
982ace66b56SAlexander Motin 	/*
98308cfa56eSMark Johnston 	 * Shrink the zone bucket size to ensure that the per-CPU caches
98408cfa56eSMark Johnston 	 * don't grow too large.
985ace66b56SAlexander Motin 	 */
98620a4e154SJeff Roberson 	if (zone->uz_bucket_size > zone->uz_bucket_size_min)
98720a4e154SJeff Roberson 		zone->uz_bucket_size--;
9888355f576SJeff Roberson }
989fc03d22bSJeff Roberson 
990fc03d22bSJeff Roberson static void
991fc03d22bSJeff Roberson keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
992fc03d22bSJeff Roberson {
993fc03d22bSJeff Roberson 	uint8_t *mem;
994fc03d22bSJeff Roberson 	int i;
995fc03d22bSJeff Roberson 	uint8_t flags;
996fc03d22bSJeff Roberson 
9971431a748SGleb Smirnoff 	CTR4(KTR_UMA, "keg_free_slab keg %s(%p) slab %p, returning %d bytes",
9981431a748SGleb Smirnoff 	    keg->uk_name, keg, slab, PAGE_SIZE * keg->uk_ppera);
9991431a748SGleb Smirnoff 
10001e0701e1SJeff Roberson 	mem = slab_data(slab, keg);
1001fc03d22bSJeff Roberson 	flags = slab->us_flags;
1002fc03d22bSJeff Roberson 	i = start;
1003fc03d22bSJeff Roberson 	if (keg->uk_fini != NULL) {
1004fc03d22bSJeff Roberson 		for (i--; i > -1; i--)
1005c5deaf04SGleb Smirnoff #ifdef INVARIANTS
1006c5deaf04SGleb Smirnoff 		/*
1007c5deaf04SGleb Smirnoff 		 * trash_fini implies that dtor was trash_dtor. trash_fini
1008c5deaf04SGleb Smirnoff 		 * would check that memory hasn't been modified since free,
1009c5deaf04SGleb Smirnoff 		 * which executed trash_dtor.
1010c5deaf04SGleb Smirnoff 		 * That's why we need to run uma_dbg_kskip() check here,
1011c5deaf04SGleb Smirnoff 		 * albeit we don't make skip check for other init/fini
1012c5deaf04SGleb Smirnoff 		 * invocations.
1013c5deaf04SGleb Smirnoff 		 */
10141e0701e1SJeff Roberson 		if (!uma_dbg_kskip(keg, slab_item(slab, keg, i)) ||
1015c5deaf04SGleb Smirnoff 		    keg->uk_fini != trash_fini)
1016c5deaf04SGleb Smirnoff #endif
10171e0701e1SJeff Roberson 			keg->uk_fini(slab_item(slab, keg, i), keg->uk_size);
1018fc03d22bSJeff Roberson 	}
1019fc03d22bSJeff Roberson 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1020fc03d22bSJeff Roberson 		zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
1021fc03d22bSJeff Roberson 	keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
10222e47807cSJeff Roberson 	uma_total_dec(PAGE_SIZE * keg->uk_ppera);
10238355f576SJeff Roberson }
10248355f576SJeff Roberson 
10258355f576SJeff Roberson /*
1026e20a199fSJeff Roberson  * Frees pages from a keg back to the system.  This is done on demand from
10278355f576SJeff Roberson  * the pageout daemon.
10288355f576SJeff Roberson  *
1029e20a199fSJeff Roberson  * Returns nothing.
10308355f576SJeff Roberson  */
1031e20a199fSJeff Roberson static void
1032e20a199fSJeff Roberson keg_drain(uma_keg_t keg)
10338355f576SJeff Roberson {
10341e183df2SStefan Farfeleder 	struct slabhead freeslabs = { 0 };
1035ab3185d1SJeff Roberson 	uma_domain_t dom;
1036829be516SMark Johnston 	uma_slab_t slab, tmp;
1037ab3185d1SJeff Roberson 	int i;
10388355f576SJeff Roberson 
10398355f576SJeff Roberson 	/*
1040e20a199fSJeff Roberson 	 * We don't want to take pages from statically allocated kegs at this
10418355f576SJeff Roberson 	 * time
10428355f576SJeff Roberson 	 */
1043099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
10448355f576SJeff Roberson 		return;
10458355f576SJeff Roberson 
10461431a748SGleb Smirnoff 	CTR3(KTR_UMA, "keg_drain %s(%p) free items: %u",
10471431a748SGleb Smirnoff 	    keg->uk_name, keg, keg->uk_free);
1048e20a199fSJeff Roberson 	KEG_LOCK(keg);
1049099a0e58SBosko Milekic 	if (keg->uk_free == 0)
10508355f576SJeff Roberson 		goto finished;
10518355f576SJeff Roberson 
1052ab3185d1SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
1053ab3185d1SJeff Roberson 		dom = &keg->uk_domain[i];
1054ab3185d1SJeff Roberson 		LIST_FOREACH_SAFE(slab, &dom->ud_free_slab, us_link, tmp) {
1055829be516SMark Johnston 			/* We have nowhere to free these to. */
1056829be516SMark Johnston 			if (slab->us_flags & UMA_SLAB_BOOT)
10578355f576SJeff Roberson 				continue;
10588355f576SJeff Roberson 
10598355f576SJeff Roberson 			LIST_REMOVE(slab, us_link);
1060099a0e58SBosko Milekic 			keg->uk_pages -= keg->uk_ppera;
1061099a0e58SBosko Milekic 			keg->uk_free -= keg->uk_ipers;
1062713deb36SJeff Roberson 
1063099a0e58SBosko Milekic 			if (keg->uk_flags & UMA_ZONE_HASH)
10641e0701e1SJeff Roberson 				UMA_HASH_REMOVE(&keg->uk_hash, slab);
1065713deb36SJeff Roberson 
10661e0701e1SJeff Roberson 			LIST_INSERT_HEAD(&freeslabs, slab, us_link);
1067713deb36SJeff Roberson 		}
1068ab3185d1SJeff Roberson 	}
1069ab3185d1SJeff Roberson 
1070713deb36SJeff Roberson finished:
1071e20a199fSJeff Roberson 	KEG_UNLOCK(keg);
1072713deb36SJeff Roberson 
10731e0701e1SJeff Roberson 	while ((slab = LIST_FIRST(&freeslabs)) != NULL) {
10741e0701e1SJeff Roberson 		LIST_REMOVE(slab, us_link);
10751645995bSKirk McKusick 		keg_free_slab(keg, slab, keg->uk_ipers);
10768355f576SJeff Roberson 	}
10778355f576SJeff Roberson }
10788355f576SJeff Roberson 
1079e20a199fSJeff Roberson static void
108008cfa56eSMark Johnston zone_reclaim(uma_zone_t zone, int waitok, bool drain)
1081e20a199fSJeff Roberson {
1082e20a199fSJeff Roberson 
10838355f576SJeff Roberson 	/*
1084e20a199fSJeff Roberson 	 * Set draining to interlock with zone_dtor() so we can release our
1085e20a199fSJeff Roberson 	 * locks as we go.  Only dtor() should do a WAITOK call since it
1086e20a199fSJeff Roberson 	 * is the only call that knows the structure will still be available
1087e20a199fSJeff Roberson 	 * when it wakes up.
1088e20a199fSJeff Roberson 	 */
1089e20a199fSJeff Roberson 	ZONE_LOCK(zone);
109008cfa56eSMark Johnston 	while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) {
1091e20a199fSJeff Roberson 		if (waitok == M_NOWAIT)
1092e20a199fSJeff Roberson 			goto out;
1093af526374SJeff Roberson 		msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
1094e20a199fSJeff Roberson 	}
109508cfa56eSMark Johnston 	zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
109608cfa56eSMark Johnston 	bucket_cache_reclaim(zone, drain);
1097e20a199fSJeff Roberson 	ZONE_UNLOCK(zone);
109808cfa56eSMark Johnston 
1099e20a199fSJeff Roberson 	/*
1100e20a199fSJeff Roberson 	 * The DRAINING flag protects us from being freed while
1101111fbcd5SBryan Venteicher 	 * we're running.  Normally the uma_rwlock would protect us but we
1102e20a199fSJeff Roberson 	 * must be able to release and acquire the right lock for each keg.
1103e20a199fSJeff Roberson 	 */
110408034d10SKonstantin Belousov 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0)
1105bb15d1c7SGleb Smirnoff 		keg_drain(zone->uz_keg);
1106e20a199fSJeff Roberson 	ZONE_LOCK(zone);
110708cfa56eSMark Johnston 	zone->uz_flags &= ~UMA_ZFLAG_RECLAIMING;
1108e20a199fSJeff Roberson 	wakeup(zone);
1109e20a199fSJeff Roberson out:
1110e20a199fSJeff Roberson 	ZONE_UNLOCK(zone);
1111e20a199fSJeff Roberson }
1112e20a199fSJeff Roberson 
111308cfa56eSMark Johnston static void
111420a4e154SJeff Roberson zone_drain(uma_zone_t zone, void *unused)
1115e20a199fSJeff Roberson {
1116e20a199fSJeff Roberson 
111708cfa56eSMark Johnston 	zone_reclaim(zone, M_NOWAIT, true);
111808cfa56eSMark Johnston }
111908cfa56eSMark Johnston 
112008cfa56eSMark Johnston static void
112120a4e154SJeff Roberson zone_trim(uma_zone_t zone, void *unused)
112208cfa56eSMark Johnston {
112308cfa56eSMark Johnston 
112408cfa56eSMark Johnston 	zone_reclaim(zone, M_NOWAIT, false);
1125e20a199fSJeff Roberson }
1126e20a199fSJeff Roberson 
1127e20a199fSJeff Roberson /*
1128e20a199fSJeff Roberson  * Allocate a new slab for a keg.  This does not insert the slab onto a list.
1129194a979eSMark Johnston  * If the allocation was successful, the keg lock will be held upon return,
1130194a979eSMark Johnston  * otherwise the keg will be left unlocked.
11318355f576SJeff Roberson  *
11328355f576SJeff Roberson  * Arguments:
113386220393SMark Johnston  *	flags   Wait flags for the item initialization routine
113486220393SMark Johnston  *	aflags  Wait flags for the slab allocation
11358355f576SJeff Roberson  *
11368355f576SJeff Roberson  * Returns:
11378355f576SJeff Roberson  *	The slab that was allocated or NULL if there is no memory and the
11388355f576SJeff Roberson  *	caller specified M_NOWAIT.
11398355f576SJeff Roberson  */
11408355f576SJeff Roberson static uma_slab_t
114186220393SMark Johnston keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int flags,
114286220393SMark Johnston     int aflags)
11438355f576SJeff Roberson {
1144e20a199fSJeff Roberson 	uma_alloc allocf;
1145099a0e58SBosko Milekic 	uma_slab_t slab;
11462e47807cSJeff Roberson 	unsigned long size;
114785dcf349SGleb Smirnoff 	uint8_t *mem;
114886220393SMark Johnston 	uint8_t sflags;
11498355f576SJeff Roberson 	int i;
11508355f576SJeff Roberson 
1151ab3185d1SJeff Roberson 	KASSERT(domain >= 0 && domain < vm_ndomains,
1152ab3185d1SJeff Roberson 	    ("keg_alloc_slab: domain %d out of range", domain));
1153bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
1154bb15d1c7SGleb Smirnoff 	MPASS(zone->uz_lockptr == &keg->uk_lock);
1155a553d4b8SJeff Roberson 
1156e20a199fSJeff Roberson 	allocf = keg->uk_allocf;
1157e20a199fSJeff Roberson 	KEG_UNLOCK(keg);
1158a553d4b8SJeff Roberson 
1159194a979eSMark Johnston 	slab = NULL;
1160194a979eSMark Johnston 	mem = NULL;
1161099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
116286220393SMark Johnston 		slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, aflags);
1163fc03d22bSJeff Roberson 		if (slab == NULL)
1164fc03d22bSJeff Roberson 			goto out;
1165a553d4b8SJeff Roberson 	}
1166a553d4b8SJeff Roberson 
11673370c5bfSJeff Roberson 	/*
11683370c5bfSJeff Roberson 	 * This reproduces the old vm_zone behavior of zero filling pages the
11693370c5bfSJeff Roberson 	 * first time they are added to a zone.
11703370c5bfSJeff Roberson 	 *
11713370c5bfSJeff Roberson 	 * Malloced items are zeroed in uma_zalloc.
11723370c5bfSJeff Roberson 	 */
11733370c5bfSJeff Roberson 
1174099a0e58SBosko Milekic 	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
117586220393SMark Johnston 		aflags |= M_ZERO;
11763370c5bfSJeff Roberson 	else
117786220393SMark Johnston 		aflags &= ~M_ZERO;
11783370c5bfSJeff Roberson 
1179263811f7SKip Macy 	if (keg->uk_flags & UMA_ZONE_NODUMP)
118086220393SMark Johnston 		aflags |= M_NODUMP;
1181263811f7SKip Macy 
1182e20a199fSJeff Roberson 	/* zone is passed for legacy reasons. */
1183194a979eSMark Johnston 	size = keg->uk_ppera * PAGE_SIZE;
118486220393SMark Johnston 	mem = allocf(zone, size, domain, &sflags, aflags);
1185a553d4b8SJeff Roberson 	if (mem == NULL) {
1186b23f72e9SBrian Feldman 		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
11870095a784SJeff Roberson 			zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
1188fc03d22bSJeff Roberson 		slab = NULL;
1189fc03d22bSJeff Roberson 		goto out;
1190a553d4b8SJeff Roberson 	}
11912e47807cSJeff Roberson 	uma_total_inc(size);
11928355f576SJeff Roberson 
11935c0e403bSJeff Roberson 	/* Point the slab into the allocated memory */
1194099a0e58SBosko Milekic 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
1195099a0e58SBosko Milekic 		slab = (uma_slab_t )(mem + keg->uk_pgoff);
11961e0701e1SJeff Roberson 	else
11971e0701e1SJeff Roberson 		((uma_hash_slab_t)slab)->uhs_data = mem;
11985c0e403bSJeff Roberson 
1199e20a199fSJeff Roberson 	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
1200099a0e58SBosko Milekic 		for (i = 0; i < keg->uk_ppera; i++)
1201584061b4SJeff Roberson 			vsetzoneslab((vm_offset_t)mem + (i * PAGE_SIZE),
1202584061b4SJeff Roberson 			    zone, slab);
12038355f576SJeff Roberson 
1204099a0e58SBosko Milekic 	slab->us_freecount = keg->uk_ipers;
120586220393SMark Johnston 	slab->us_flags = sflags;
1206ab3185d1SJeff Roberson 	slab->us_domain = domain;
12079b78b1f4SJeff Roberson 	BIT_FILL(keg->uk_ipers, &slab->us_free);
1208ef72505eSJeff Roberson #ifdef INVARIANTS
1209815db204SRyan Libby 	BIT_ZERO(keg->uk_ipers, slab_dbg_bits(slab, keg));
1210ef72505eSJeff Roberson #endif
1211099a0e58SBosko Milekic 
1212b23f72e9SBrian Feldman 	if (keg->uk_init != NULL) {
1213099a0e58SBosko Milekic 		for (i = 0; i < keg->uk_ipers; i++)
12141e0701e1SJeff Roberson 			if (keg->uk_init(slab_item(slab, keg, i),
121586220393SMark Johnston 			    keg->uk_size, flags) != 0)
1216b23f72e9SBrian Feldman 				break;
1217b23f72e9SBrian Feldman 		if (i != keg->uk_ipers) {
1218fc03d22bSJeff Roberson 			keg_free_slab(keg, slab, i);
1219fc03d22bSJeff Roberson 			slab = NULL;
1220fc03d22bSJeff Roberson 			goto out;
1221b23f72e9SBrian Feldman 		}
1222b23f72e9SBrian Feldman 	}
1223e20a199fSJeff Roberson 	KEG_LOCK(keg);
12245c0e403bSJeff Roberson 
12251431a748SGleb Smirnoff 	CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)",
12261431a748SGleb Smirnoff 	    slab, keg->uk_name, keg);
12271431a748SGleb Smirnoff 
1228099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_HASH)
1229099a0e58SBosko Milekic 		UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
12308355f576SJeff Roberson 
1231099a0e58SBosko Milekic 	keg->uk_pages += keg->uk_ppera;
1232099a0e58SBosko Milekic 	keg->uk_free += keg->uk_ipers;
12338355f576SJeff Roberson 
1234194a979eSMark Johnston out:
12358355f576SJeff Roberson 	return (slab);
12368355f576SJeff Roberson }
12378355f576SJeff Roberson 
12388355f576SJeff Roberson /*
1239009b6fcbSJeff Roberson  * This function is intended to be used early on in place of page_alloc() so
1240009b6fcbSJeff Roberson  * that we may use the boot time page cache to satisfy allocations before
1241009b6fcbSJeff Roberson  * the VM is ready.
1242009b6fcbSJeff Roberson  */
1243009b6fcbSJeff Roberson static void *
1244ab3185d1SJeff Roberson startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1245ab3185d1SJeff Roberson     int wait)
1246009b6fcbSJeff Roberson {
1247099a0e58SBosko Milekic 	uma_keg_t keg;
1248ac0a6fd0SGleb Smirnoff 	void *mem;
1249ac0a6fd0SGleb Smirnoff 	int pages;
1250099a0e58SBosko Milekic 
1251bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
1252009b6fcbSJeff Roberson 	/*
1253f7d35785SGleb Smirnoff 	 * If we are in BOOT_BUCKETS or higher, than switch to real
1254f7d35785SGleb Smirnoff 	 * allocator.  Zones with page sized slabs switch at BOOT_PAGEALLOC.
1255009b6fcbSJeff Roberson 	 */
1256f7d35785SGleb Smirnoff 	switch (booted) {
1257f7d35785SGleb Smirnoff 		case BOOT_COLD:
1258f7d35785SGleb Smirnoff 		case BOOT_STRAPPED:
1259f7d35785SGleb Smirnoff 			break;
1260f7d35785SGleb Smirnoff 		case BOOT_PAGEALLOC:
1261f7d35785SGleb Smirnoff 			if (keg->uk_ppera > 1)
1262f7d35785SGleb Smirnoff 				break;
1263f7d35785SGleb Smirnoff 		case BOOT_BUCKETS:
1264f7d35785SGleb Smirnoff 		case BOOT_RUNNING:
1265009b6fcbSJeff Roberson #ifdef UMA_MD_SMALL_ALLOC
1266f7d35785SGleb Smirnoff 			keg->uk_allocf = (keg->uk_ppera > 1) ?
1267f7d35785SGleb Smirnoff 			    page_alloc : uma_small_alloc;
1268009b6fcbSJeff Roberson #else
1269099a0e58SBosko Milekic 			keg->uk_allocf = page_alloc;
1270009b6fcbSJeff Roberson #endif
1271ab3185d1SJeff Roberson 			return keg->uk_allocf(zone, bytes, domain, pflag, wait);
1272009b6fcbSJeff Roberson 	}
1273009b6fcbSJeff Roberson 
1274009b6fcbSJeff Roberson 	/*
1275f7d35785SGleb Smirnoff 	 * Check our small startup cache to see if it has pages remaining.
1276f7d35785SGleb Smirnoff 	 */
1277f7d35785SGleb Smirnoff 	pages = howmany(bytes, PAGE_SIZE);
1278f7d35785SGleb Smirnoff 	KASSERT(pages > 0, ("%s can't reserve 0 pages", __func__));
1279f7d35785SGleb Smirnoff 	if (pages > boot_pages)
1280f7d35785SGleb Smirnoff 		panic("UMA zone \"%s\": Increase vm.boot_pages", zone->uz_name);
1281f7d35785SGleb Smirnoff #ifdef DIAGNOSTIC
1282f7d35785SGleb Smirnoff 	printf("%s from \"%s\", %d boot pages left\n", __func__, zone->uz_name,
1283f7d35785SGleb Smirnoff 	    boot_pages);
1284f7d35785SGleb Smirnoff #endif
1285f7d35785SGleb Smirnoff 	mem = bootmem;
1286f7d35785SGleb Smirnoff 	boot_pages -= pages;
1287f7d35785SGleb Smirnoff 	bootmem += pages * PAGE_SIZE;
1288f7d35785SGleb Smirnoff 	*pflag = UMA_SLAB_BOOT;
1289f7d35785SGleb Smirnoff 
1290f7d35785SGleb Smirnoff 	return (mem);
1291f7d35785SGleb Smirnoff }
1292f7d35785SGleb Smirnoff 
1293f7d35785SGleb Smirnoff /*
12948355f576SJeff Roberson  * Allocates a number of pages from the system
12958355f576SJeff Roberson  *
12968355f576SJeff Roberson  * Arguments:
12978355f576SJeff Roberson  *	bytes  The number of bytes requested
12988355f576SJeff Roberson  *	wait  Shall we wait?
12998355f576SJeff Roberson  *
13008355f576SJeff Roberson  * Returns:
13018355f576SJeff Roberson  *	A pointer to the alloced memory or possibly
13028355f576SJeff Roberson  *	NULL if M_NOWAIT is set.
13038355f576SJeff Roberson  */
13048355f576SJeff Roberson static void *
1305ab3185d1SJeff Roberson page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1306ab3185d1SJeff Roberson     int wait)
13078355f576SJeff Roberson {
13088355f576SJeff Roberson 	void *p;	/* Returned page */
13098355f576SJeff Roberson 
13102e47807cSJeff Roberson 	*pflag = UMA_SLAB_KERNEL;
13119978bd99SMark Johnston 	p = (void *)kmem_malloc_domainset(DOMAINSET_FIXED(domain), bytes, wait);
13128355f576SJeff Roberson 
13138355f576SJeff Roberson 	return (p);
13148355f576SJeff Roberson }
13158355f576SJeff Roberson 
1316ab3059a8SMatt Macy static void *
1317ab3059a8SMatt Macy pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1318ab3059a8SMatt Macy     int wait)
1319ab3059a8SMatt Macy {
1320ab3059a8SMatt Macy 	struct pglist alloctail;
1321ab3059a8SMatt Macy 	vm_offset_t addr, zkva;
1322ab3059a8SMatt Macy 	int cpu, flags;
1323ab3059a8SMatt Macy 	vm_page_t p, p_next;
1324ab3059a8SMatt Macy #ifdef NUMA
1325ab3059a8SMatt Macy 	struct pcpu *pc;
1326ab3059a8SMatt Macy #endif
1327ab3059a8SMatt Macy 
1328ab3059a8SMatt Macy 	MPASS(bytes == (mp_maxid + 1) * PAGE_SIZE);
1329ab3059a8SMatt Macy 
1330013072f0SMark Johnston 	TAILQ_INIT(&alloctail);
1331ab3059a8SMatt Macy 	flags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
1332013072f0SMark Johnston 	    malloc2vm_flags(wait);
1333013072f0SMark Johnston 	*pflag = UMA_SLAB_KERNEL;
1334ab3059a8SMatt Macy 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
1335ab3059a8SMatt Macy 		if (CPU_ABSENT(cpu)) {
1336ab3059a8SMatt Macy 			p = vm_page_alloc(NULL, 0, flags);
1337ab3059a8SMatt Macy 		} else {
1338ab3059a8SMatt Macy #ifndef NUMA
1339ab3059a8SMatt Macy 			p = vm_page_alloc(NULL, 0, flags);
1340ab3059a8SMatt Macy #else
1341ab3059a8SMatt Macy 			pc = pcpu_find(cpu);
1342ab3059a8SMatt Macy 			p = vm_page_alloc_domain(NULL, 0, pc->pc_domain, flags);
1343ab3059a8SMatt Macy 			if (__predict_false(p == NULL))
1344ab3059a8SMatt Macy 				p = vm_page_alloc(NULL, 0, flags);
1345ab3059a8SMatt Macy #endif
1346ab3059a8SMatt Macy 		}
1347ab3059a8SMatt Macy 		if (__predict_false(p == NULL))
1348ab3059a8SMatt Macy 			goto fail;
1349ab3059a8SMatt Macy 		TAILQ_INSERT_TAIL(&alloctail, p, listq);
1350ab3059a8SMatt Macy 	}
1351ab3059a8SMatt Macy 	if ((addr = kva_alloc(bytes)) == 0)
1352ab3059a8SMatt Macy 		goto fail;
1353ab3059a8SMatt Macy 	zkva = addr;
1354ab3059a8SMatt Macy 	TAILQ_FOREACH(p, &alloctail, listq) {
1355ab3059a8SMatt Macy 		pmap_qenter(zkva, &p, 1);
1356ab3059a8SMatt Macy 		zkva += PAGE_SIZE;
1357ab3059a8SMatt Macy 	}
1358ab3059a8SMatt Macy 	return ((void*)addr);
1359ab3059a8SMatt Macy fail:
1360ab3059a8SMatt Macy 	TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
136188ea538aSMark Johnston 		vm_page_unwire_noq(p);
1362ab3059a8SMatt Macy 		vm_page_free(p);
1363ab3059a8SMatt Macy 	}
1364ab3059a8SMatt Macy 	return (NULL);
1365ab3059a8SMatt Macy }
1366ab3059a8SMatt Macy 
13678355f576SJeff Roberson /*
13688355f576SJeff Roberson  * Allocates a number of pages from within an object
13698355f576SJeff Roberson  *
13708355f576SJeff Roberson  * Arguments:
13718355f576SJeff Roberson  *	bytes  The number of bytes requested
13728355f576SJeff Roberson  *	wait   Shall we wait?
13738355f576SJeff Roberson  *
13748355f576SJeff Roberson  * Returns:
13758355f576SJeff Roberson  *	A pointer to the alloced memory or possibly
13768355f576SJeff Roberson  *	NULL if M_NOWAIT is set.
13778355f576SJeff Roberson  */
13788355f576SJeff Roberson static void *
1379ab3185d1SJeff Roberson noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
1380ab3185d1SJeff Roberson     int wait)
13818355f576SJeff Roberson {
1382a4915c21SAttilio Rao 	TAILQ_HEAD(, vm_page) alloctail;
1383a4915c21SAttilio Rao 	u_long npages;
1384b245ac95SAlan Cox 	vm_offset_t retkva, zkva;
1385a4915c21SAttilio Rao 	vm_page_t p, p_next;
1386e20a199fSJeff Roberson 	uma_keg_t keg;
13878355f576SJeff Roberson 
1388a4915c21SAttilio Rao 	TAILQ_INIT(&alloctail);
1389bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
1390a4915c21SAttilio Rao 
1391a4915c21SAttilio Rao 	npages = howmany(bytes, PAGE_SIZE);
1392a4915c21SAttilio Rao 	while (npages > 0) {
1393ab3185d1SJeff Roberson 		p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT |
13948d6fbbb8SJeff Roberson 		    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
1395772c8b67SKonstantin Belousov 		    ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK :
1396772c8b67SKonstantin Belousov 		    VM_ALLOC_NOWAIT));
1397a4915c21SAttilio Rao 		if (p != NULL) {
1398a4915c21SAttilio Rao 			/*
1399a4915c21SAttilio Rao 			 * Since the page does not belong to an object, its
1400a4915c21SAttilio Rao 			 * listq is unused.
1401a4915c21SAttilio Rao 			 */
1402a4915c21SAttilio Rao 			TAILQ_INSERT_TAIL(&alloctail, p, listq);
1403a4915c21SAttilio Rao 			npages--;
1404a4915c21SAttilio Rao 			continue;
1405a4915c21SAttilio Rao 		}
14068355f576SJeff Roberson 		/*
1407a4915c21SAttilio Rao 		 * Page allocation failed, free intermediate pages and
1408a4915c21SAttilio Rao 		 * exit.
14098355f576SJeff Roberson 		 */
1410a4915c21SAttilio Rao 		TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
141188ea538aSMark Johnston 			vm_page_unwire_noq(p);
1412b245ac95SAlan Cox 			vm_page_free(p);
1413b245ac95SAlan Cox 		}
1414a4915c21SAttilio Rao 		return (NULL);
1415b245ac95SAlan Cox 	}
14168355f576SJeff Roberson 	*flags = UMA_SLAB_PRIV;
1417a4915c21SAttilio Rao 	zkva = keg->uk_kva +
1418a4915c21SAttilio Rao 	    atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1419a4915c21SAttilio Rao 	retkva = zkva;
1420a4915c21SAttilio Rao 	TAILQ_FOREACH(p, &alloctail, listq) {
1421a4915c21SAttilio Rao 		pmap_qenter(zkva, &p, 1);
1422a4915c21SAttilio Rao 		zkva += PAGE_SIZE;
1423a4915c21SAttilio Rao 	}
14248355f576SJeff Roberson 
14258355f576SJeff Roberson 	return ((void *)retkva);
14268355f576SJeff Roberson }
14278355f576SJeff Roberson 
14288355f576SJeff Roberson /*
14298355f576SJeff Roberson  * Frees a number of pages to the system
14308355f576SJeff Roberson  *
14318355f576SJeff Roberson  * Arguments:
14328355f576SJeff Roberson  *	mem   A pointer to the memory to be freed
14338355f576SJeff Roberson  *	size  The size of the memory being freed
14348355f576SJeff Roberson  *	flags The original p->us_flags field
14358355f576SJeff Roberson  *
14368355f576SJeff Roberson  * Returns:
14378355f576SJeff Roberson  *	Nothing
14388355f576SJeff Roberson  */
14398355f576SJeff Roberson static void
1440f2c2231eSRyan Stone page_free(void *mem, vm_size_t size, uint8_t flags)
14418355f576SJeff Roberson {
14423370c5bfSJeff Roberson 
144349bfa624SAlan Cox 	if ((flags & UMA_SLAB_KERNEL) == 0)
1444b5345ef1SJustin Hibbits 		panic("UMA: page_free used with invalid flags %x", flags);
14458355f576SJeff Roberson 
144649bfa624SAlan Cox 	kmem_free((vm_offset_t)mem, size);
14478355f576SJeff Roberson }
14488355f576SJeff Roberson 
14498355f576SJeff Roberson /*
1450ab3059a8SMatt Macy  * Frees pcpu zone allocations
1451ab3059a8SMatt Macy  *
1452ab3059a8SMatt Macy  * Arguments:
1453ab3059a8SMatt Macy  *	mem   A pointer to the memory to be freed
1454ab3059a8SMatt Macy  *	size  The size of the memory being freed
1455ab3059a8SMatt Macy  *	flags The original p->us_flags field
1456ab3059a8SMatt Macy  *
1457ab3059a8SMatt Macy  * Returns:
1458ab3059a8SMatt Macy  *	Nothing
1459ab3059a8SMatt Macy  */
1460ab3059a8SMatt Macy static void
1461ab3059a8SMatt Macy pcpu_page_free(void *mem, vm_size_t size, uint8_t flags)
1462ab3059a8SMatt Macy {
1463ab3059a8SMatt Macy 	vm_offset_t sva, curva;
1464ab3059a8SMatt Macy 	vm_paddr_t paddr;
1465ab3059a8SMatt Macy 	vm_page_t m;
1466ab3059a8SMatt Macy 
1467ab3059a8SMatt Macy 	MPASS(size == (mp_maxid+1)*PAGE_SIZE);
1468ab3059a8SMatt Macy 	sva = (vm_offset_t)mem;
1469ab3059a8SMatt Macy 	for (curva = sva; curva < sva + size; curva += PAGE_SIZE) {
1470ab3059a8SMatt Macy 		paddr = pmap_kextract(curva);
1471ab3059a8SMatt Macy 		m = PHYS_TO_VM_PAGE(paddr);
147288ea538aSMark Johnston 		vm_page_unwire_noq(m);
1473ab3059a8SMatt Macy 		vm_page_free(m);
1474ab3059a8SMatt Macy 	}
1475ab3059a8SMatt Macy 	pmap_qremove(sva, size >> PAGE_SHIFT);
1476ab3059a8SMatt Macy 	kva_free(sva, size);
1477ab3059a8SMatt Macy }
1478ab3059a8SMatt Macy 
1479ab3059a8SMatt Macy 
1480ab3059a8SMatt Macy /*
14818355f576SJeff Roberson  * Zero fill initializer
14828355f576SJeff Roberson  *
14838355f576SJeff Roberson  * Arguments/Returns follow uma_init specifications
14848355f576SJeff Roberson  */
1485b23f72e9SBrian Feldman static int
1486b23f72e9SBrian Feldman zero_init(void *mem, int size, int flags)
14878355f576SJeff Roberson {
14888355f576SJeff Roberson 	bzero(mem, size);
1489b23f72e9SBrian Feldman 	return (0);
14908355f576SJeff Roberson }
14918355f576SJeff Roberson 
1492815db204SRyan Libby #ifdef INVARIANTS
1493815db204SRyan Libby struct noslabbits *
1494815db204SRyan Libby slab_dbg_bits(uma_slab_t slab, uma_keg_t keg)
1495815db204SRyan Libby {
1496815db204SRyan Libby 
1497815db204SRyan Libby 	return ((void *)((char *)&slab->us_free + BITSET_SIZE(keg->uk_ipers)));
1498815db204SRyan Libby }
1499815db204SRyan Libby #endif
1500815db204SRyan Libby 
15018355f576SJeff Roberson /*
15029b78b1f4SJeff Roberson  * Actual size of embedded struct slab (!OFFPAGE).
15039b78b1f4SJeff Roberson  */
15049b78b1f4SJeff Roberson size_t
15059b78b1f4SJeff Roberson slab_sizeof(int nitems)
15069b78b1f4SJeff Roberson {
15079b78b1f4SJeff Roberson 	size_t s;
15089b78b1f4SJeff Roberson 
1509815db204SRyan Libby 	s = sizeof(struct uma_slab) + BITSET_SIZE(nitems) * SLAB_BITSETS;
15109b78b1f4SJeff Roberson 	return (roundup(s, UMA_ALIGN_PTR + 1));
15119b78b1f4SJeff Roberson }
15129b78b1f4SJeff Roberson 
15139b78b1f4SJeff Roberson /*
15149b78b1f4SJeff Roberson  * Size of memory for embedded slabs (!OFFPAGE).
15159b78b1f4SJeff Roberson  */
15169b78b1f4SJeff Roberson size_t
15179b78b1f4SJeff Roberson slab_space(int nitems)
15189b78b1f4SJeff Roberson {
15199b78b1f4SJeff Roberson 	return (UMA_SLAB_SIZE - slab_sizeof(nitems));
15209b78b1f4SJeff Roberson }
15219b78b1f4SJeff Roberson 
15229b78b1f4SJeff Roberson /*
15239b78b1f4SJeff Roberson  * Compute the number of items that will fit in an embedded (!OFFPAGE) slab
15249b78b1f4SJeff Roberson  * with a given size and alignment.
15259b78b1f4SJeff Roberson  */
15269b78b1f4SJeff Roberson int
15279b78b1f4SJeff Roberson slab_ipers(size_t size, int align)
15289b78b1f4SJeff Roberson {
15299b78b1f4SJeff Roberson 	int rsize;
15309b78b1f4SJeff Roberson 	int nitems;
15319b78b1f4SJeff Roberson 
15329b78b1f4SJeff Roberson         /*
15339b78b1f4SJeff Roberson          * Compute the ideal number of items that will fit in a page and
15349b78b1f4SJeff Roberson          * then compute the actual number based on a bitset nitems wide.
15359b78b1f4SJeff Roberson          */
15369b78b1f4SJeff Roberson 	rsize = roundup(size, align + 1);
15379b78b1f4SJeff Roberson         nitems = UMA_SLAB_SIZE / rsize;
15389b78b1f4SJeff Roberson 	return (slab_space(nitems) / rsize);
15399b78b1f4SJeff Roberson }
15409b78b1f4SJeff Roberson 
15419b78b1f4SJeff Roberson /*
1542e20a199fSJeff Roberson  * Finish creating a small uma keg.  This calculates ipers, and the keg size.
15438355f576SJeff Roberson  *
15448355f576SJeff Roberson  * Arguments
1545e20a199fSJeff Roberson  *	keg  The zone we should initialize
15468355f576SJeff Roberson  *
15478355f576SJeff Roberson  * Returns
15488355f576SJeff Roberson  *	Nothing
15498355f576SJeff Roberson  */
15508355f576SJeff Roberson static void
1551e20a199fSJeff Roberson keg_small_init(uma_keg_t keg)
15528355f576SJeff Roberson {
1553244f4554SBosko Milekic 	u_int rsize;
1554244f4554SBosko Milekic 	u_int memused;
1555244f4554SBosko Milekic 	u_int wastedspace;
1556244f4554SBosko Milekic 	u_int shsize;
1557a55ebb7cSAndriy Gapon 	u_int slabsize;
15588355f576SJeff Roberson 
1559ad97af7eSGleb Smirnoff 	if (keg->uk_flags & UMA_ZONE_PCPU) {
156096c85efbSNathan Whitehorn 		u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
1561e28a647dSGleb Smirnoff 
1562ab3059a8SMatt Macy 		slabsize = UMA_PCPU_ALLOC_SIZE;
1563ab3059a8SMatt Macy 		keg->uk_ppera = ncpus;
1564ad97af7eSGleb Smirnoff 	} else {
1565a55ebb7cSAndriy Gapon 		slabsize = UMA_SLAB_SIZE;
1566ad97af7eSGleb Smirnoff 		keg->uk_ppera = 1;
1567ad97af7eSGleb Smirnoff 	}
1568ad97af7eSGleb Smirnoff 
1569ef72505eSJeff Roberson 	/*
1570ef72505eSJeff Roberson 	 * Calculate the size of each allocation (rsize) according to
1571ef72505eSJeff Roberson 	 * alignment.  If the requested size is smaller than we have
1572ef72505eSJeff Roberson 	 * allocation bits for we round it up.
1573ef72505eSJeff Roberson 	 */
1574099a0e58SBosko Milekic 	rsize = keg->uk_size;
15759b78b1f4SJeff Roberson 	if (rsize < slabsize / SLAB_MAX_SETSIZE)
15769b78b1f4SJeff Roberson 		rsize = slabsize / SLAB_MAX_SETSIZE;
1577099a0e58SBosko Milekic 	if (rsize & keg->uk_align)
15789b78b1f4SJeff Roberson 		rsize = roundup(rsize, keg->uk_align + 1);
1579099a0e58SBosko Milekic 	keg->uk_rsize = rsize;
1580ad97af7eSGleb Smirnoff 
1581ad97af7eSGleb Smirnoff 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
1582ab3059a8SMatt Macy 	    keg->uk_rsize < UMA_PCPU_ALLOC_SIZE,
1583ad97af7eSGleb Smirnoff 	    ("%s: size %u too large", __func__, keg->uk_rsize));
15848355f576SJeff Roberson 
15859b78b1f4SJeff Roberson 	/*
15869b78b1f4SJeff Roberson 	 * Use a pessimistic bit count for shsize.  It may be possible to
15879b78b1f4SJeff Roberson 	 * squeeze one more item in for very particular sizes if we were
15889b78b1f4SJeff Roberson 	 * to loop and reduce the bitsize if there is waste.
15899b78b1f4SJeff Roberson 	 */
1590ef72505eSJeff Roberson 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
15912864dbbfSGleb Smirnoff 		shsize = 0;
1592ef72505eSJeff Roberson 	else
15939b78b1f4SJeff Roberson 		shsize = slab_sizeof(slabsize / rsize);
15948355f576SJeff Roberson 
15951ca6ed45SGleb Smirnoff 	if (rsize <= slabsize - shsize)
1596a55ebb7cSAndriy Gapon 		keg->uk_ipers = (slabsize - shsize) / rsize;
15971ca6ed45SGleb Smirnoff 	else {
15981ca6ed45SGleb Smirnoff 		/* Handle special case when we have 1 item per slab, so
15991ca6ed45SGleb Smirnoff 		 * alignment requirement can be relaxed. */
16001ca6ed45SGleb Smirnoff 		KASSERT(keg->uk_size <= slabsize - shsize,
16011ca6ed45SGleb Smirnoff 		    ("%s: size %u greater than slab", __func__, keg->uk_size));
16021ca6ed45SGleb Smirnoff 		keg->uk_ipers = 1;
16031ca6ed45SGleb Smirnoff 	}
16049b78b1f4SJeff Roberson 	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_MAX_SETSIZE,
1605ad97af7eSGleb Smirnoff 	    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1606ad97af7eSGleb Smirnoff 
1607244f4554SBosko Milekic 	memused = keg->uk_ipers * rsize + shsize;
1608a55ebb7cSAndriy Gapon 	wastedspace = slabsize - memused;
1609244f4554SBosko Milekic 
161020e8e865SBosko Milekic 	/*
1611244f4554SBosko Milekic 	 * We can't do OFFPAGE if we're internal or if we've been
161220e8e865SBosko Milekic 	 * asked to not go to the VM for buckets.  If we do this we
16136fd34d6fSJeff Roberson 	 * may end up going to the VM  for slabs which we do not
16146fd34d6fSJeff Roberson 	 * want to do if we're UMA_ZFLAG_CACHEONLY as a result
16156fd34d6fSJeff Roberson 	 * of UMA_ZONE_VM, which clearly forbids it.
161620e8e865SBosko Milekic 	 */
1617099a0e58SBosko Milekic 	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1618099a0e58SBosko Milekic 	    (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
16198355f576SJeff Roberson 		return;
1620244f4554SBosko Milekic 
1621ef72505eSJeff Roberson 	/*
1622ef72505eSJeff Roberson 	 * See if using an OFFPAGE slab will limit our waste.  Only do
1623ef72505eSJeff Roberson 	 * this if it permits more items per-slab.
1624ef72505eSJeff Roberson 	 *
1625ef72505eSJeff Roberson 	 * XXX We could try growing slabsize to limit max waste as well.
1626ef72505eSJeff Roberson 	 * Historically this was not done because the VM could not
1627ef72505eSJeff Roberson 	 * efficiently handle contiguous allocations.
1628ef72505eSJeff Roberson 	 */
1629a55ebb7cSAndriy Gapon 	if ((wastedspace >= slabsize / UMA_MAX_WASTE) &&
1630a55ebb7cSAndriy Gapon 	    (keg->uk_ipers < (slabsize / keg->uk_rsize))) {
1631a55ebb7cSAndriy Gapon 		keg->uk_ipers = slabsize / keg->uk_rsize;
16329b78b1f4SJeff Roberson 		KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_MAX_SETSIZE,
1633ad97af7eSGleb Smirnoff 		    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
16341431a748SGleb Smirnoff 		CTR6(KTR_UMA, "UMA decided we need offpage slab headers for "
16351431a748SGleb Smirnoff 		    "keg: %s(%p), calculated wastedspace = %d, "
1636244f4554SBosko Milekic 		    "maximum wasted space allowed = %d, "
1637244f4554SBosko Milekic 		    "calculated ipers = %d, "
16381431a748SGleb Smirnoff 		    "new wasted space = %d\n", keg->uk_name, keg, wastedspace,
1639a55ebb7cSAndriy Gapon 		    slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1640a55ebb7cSAndriy Gapon 		    slabsize - keg->uk_ipers * keg->uk_rsize);
164171353f7aSJeff Roberson 		/*
164271353f7aSJeff Roberson 		 * If we had access to memory to embed a slab header we
164371353f7aSJeff Roberson 		 * also have a page structure to use vtoslab() instead of
164471353f7aSJeff Roberson 		 * hash to find slabs.  If the zone was explicitly created
164571353f7aSJeff Roberson 		 * OFFPAGE we can't necessarily touch the memory.
164671353f7aSJeff Roberson 		 */
164771353f7aSJeff Roberson 		if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0)
164871353f7aSJeff Roberson 			keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
16498355f576SJeff Roberson 	}
1650ad97af7eSGleb Smirnoff 
1651ad97af7eSGleb Smirnoff 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1652ad97af7eSGleb Smirnoff 	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1653ad97af7eSGleb Smirnoff 		keg->uk_flags |= UMA_ZONE_HASH;
16548355f576SJeff Roberson }
16558355f576SJeff Roberson 
16568355f576SJeff Roberson /*
1657e20a199fSJeff Roberson  * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
16588355f576SJeff Roberson  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
16598355f576SJeff Roberson  * more complicated.
16608355f576SJeff Roberson  *
16618355f576SJeff Roberson  * Arguments
1662e20a199fSJeff Roberson  *	keg  The keg we should initialize
16638355f576SJeff Roberson  *
16648355f576SJeff Roberson  * Returns
16658355f576SJeff Roberson  *	Nothing
16668355f576SJeff Roberson  */
16678355f576SJeff Roberson static void
1668e20a199fSJeff Roberson keg_large_init(uma_keg_t keg)
16698355f576SJeff Roberson {
16708355f576SJeff Roberson 
1671e20a199fSJeff Roberson 	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1672ad97af7eSGleb Smirnoff 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1673ad97af7eSGleb Smirnoff 	    ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
167420e8e865SBosko Milekic 
1675ad97af7eSGleb Smirnoff 	keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1676099a0e58SBosko Milekic 	keg->uk_ipers = 1;
1677e9a069d8SJohn Baldwin 	keg->uk_rsize = keg->uk_size;
1678e9a069d8SJohn Baldwin 
1679cec48e00SAlexander Motin 	/* Check whether we have enough space to not do OFFPAGE. */
16803d5e3df7SGleb Smirnoff 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0 &&
16819b78b1f4SJeff Roberson 	    PAGE_SIZE * keg->uk_ppera - keg->uk_rsize <
16829b78b1f4SJeff Roberson 	    slab_sizeof(SLAB_MIN_SETSIZE)) {
16832934eb8aSMark Johnston 		/*
16842934eb8aSMark Johnston 		 * We can't do OFFPAGE if we're internal, in which case
16852934eb8aSMark Johnston 		 * we need an extra page per allocation to contain the
16862934eb8aSMark Johnston 		 * slab header.
16872934eb8aSMark Johnston 		 */
16882934eb8aSMark Johnston 		if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) == 0)
168971353f7aSJeff Roberson 			keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
16902934eb8aSMark Johnston 		else
16912934eb8aSMark Johnston 			keg->uk_ppera++;
16922934eb8aSMark Johnston 	}
1693cec48e00SAlexander Motin 
1694cec48e00SAlexander Motin 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1695cec48e00SAlexander Motin 	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1696099a0e58SBosko Milekic 		keg->uk_flags |= UMA_ZONE_HASH;
16978355f576SJeff Roberson }
16988355f576SJeff Roberson 
1699e20a199fSJeff Roberson static void
1700e20a199fSJeff Roberson keg_cachespread_init(uma_keg_t keg)
1701e20a199fSJeff Roberson {
1702e20a199fSJeff Roberson 	int alignsize;
1703e20a199fSJeff Roberson 	int trailer;
1704e20a199fSJeff Roberson 	int pages;
1705e20a199fSJeff Roberson 	int rsize;
1706e20a199fSJeff Roberson 
1707ad97af7eSGleb Smirnoff 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1708ad97af7eSGleb Smirnoff 	    ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1709ad97af7eSGleb Smirnoff 
1710e20a199fSJeff Roberson 	alignsize = keg->uk_align + 1;
1711e20a199fSJeff Roberson 	rsize = keg->uk_size;
1712e20a199fSJeff Roberson 	/*
1713e20a199fSJeff Roberson 	 * We want one item to start on every align boundary in a page.  To
1714e20a199fSJeff Roberson 	 * do this we will span pages.  We will also extend the item by the
1715e20a199fSJeff Roberson 	 * size of align if it is an even multiple of align.  Otherwise, it
1716e20a199fSJeff Roberson 	 * would fall on the same boundary every time.
1717e20a199fSJeff Roberson 	 */
1718e20a199fSJeff Roberson 	if (rsize & keg->uk_align)
1719e20a199fSJeff Roberson 		rsize = (rsize & ~keg->uk_align) + alignsize;
1720e20a199fSJeff Roberson 	if ((rsize & alignsize) == 0)
1721e20a199fSJeff Roberson 		rsize += alignsize;
1722e20a199fSJeff Roberson 	trailer = rsize - keg->uk_size;
1723e20a199fSJeff Roberson 	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1724e20a199fSJeff Roberson 	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1725e20a199fSJeff Roberson 	keg->uk_rsize = rsize;
1726e20a199fSJeff Roberson 	keg->uk_ppera = pages;
1727e20a199fSJeff Roberson 	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1728e20a199fSJeff Roberson 	keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
17299b78b1f4SJeff Roberson 	KASSERT(keg->uk_ipers <= SLAB_MAX_SETSIZE,
173042321809SGleb Smirnoff 	    ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1731e20a199fSJeff Roberson 	    keg->uk_ipers));
1732e20a199fSJeff Roberson }
1733e20a199fSJeff Roberson 
17348355f576SJeff Roberson /*
1735099a0e58SBosko Milekic  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1736099a0e58SBosko Milekic  * the keg onto the global keg list.
17378355f576SJeff Roberson  *
17388355f576SJeff Roberson  * Arguments/Returns follow uma_ctor specifications
1739099a0e58SBosko Milekic  *	udata  Actually uma_kctor_args
1740099a0e58SBosko Milekic  */
1741b23f72e9SBrian Feldman static int
1742b23f72e9SBrian Feldman keg_ctor(void *mem, int size, void *udata, int flags)
1743099a0e58SBosko Milekic {
1744099a0e58SBosko Milekic 	struct uma_kctor_args *arg = udata;
1745099a0e58SBosko Milekic 	uma_keg_t keg = mem;
1746099a0e58SBosko Milekic 	uma_zone_t zone;
1747099a0e58SBosko Milekic 
1748099a0e58SBosko Milekic 	bzero(keg, size);
1749099a0e58SBosko Milekic 	keg->uk_size = arg->size;
1750099a0e58SBosko Milekic 	keg->uk_init = arg->uminit;
1751099a0e58SBosko Milekic 	keg->uk_fini = arg->fini;
1752099a0e58SBosko Milekic 	keg->uk_align = arg->align;
1753099a0e58SBosko Milekic 	keg->uk_free = 0;
17546fd34d6fSJeff Roberson 	keg->uk_reserve = 0;
1755099a0e58SBosko Milekic 	keg->uk_pages = 0;
1756099a0e58SBosko Milekic 	keg->uk_flags = arg->flags;
1757099a0e58SBosko Milekic 	keg->uk_slabzone = NULL;
1758099a0e58SBosko Milekic 
1759099a0e58SBosko Milekic 	/*
1760194a979eSMark Johnston 	 * We use a global round-robin policy by default.  Zones with
1761194a979eSMark Johnston 	 * UMA_ZONE_NUMA set will use first-touch instead, in which case the
1762194a979eSMark Johnston 	 * iterator is never run.
1763194a979eSMark Johnston 	 */
1764194a979eSMark Johnston 	keg->uk_dr.dr_policy = DOMAINSET_RR();
1765194a979eSMark Johnston 	keg->uk_dr.dr_iter = 0;
1766194a979eSMark Johnston 
1767194a979eSMark Johnston 	/*
1768099a0e58SBosko Milekic 	 * The master zone is passed to us at keg-creation time.
1769099a0e58SBosko Milekic 	 */
1770099a0e58SBosko Milekic 	zone = arg->zone;
1771e20a199fSJeff Roberson 	keg->uk_name = zone->uz_name;
1772099a0e58SBosko Milekic 
1773099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_VM)
1774099a0e58SBosko Milekic 		keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1775099a0e58SBosko Milekic 
1776099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_ZINIT)
1777099a0e58SBosko Milekic 		keg->uk_init = zero_init;
1778099a0e58SBosko Milekic 
1779cfcae3f8SGleb Smirnoff 	if (arg->flags & UMA_ZONE_MALLOC)
1780e20a199fSJeff Roberson 		keg->uk_flags |= UMA_ZONE_VTOSLAB;
1781e20a199fSJeff Roberson 
1782ad97af7eSGleb Smirnoff 	if (arg->flags & UMA_ZONE_PCPU)
1783ad97af7eSGleb Smirnoff #ifdef SMP
1784ad97af7eSGleb Smirnoff 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
1785ad97af7eSGleb Smirnoff #else
1786ad97af7eSGleb Smirnoff 		keg->uk_flags &= ~UMA_ZONE_PCPU;
1787ad97af7eSGleb Smirnoff #endif
1788ad97af7eSGleb Smirnoff 
1789ef72505eSJeff Roberson 	if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1790e20a199fSJeff Roberson 		keg_cachespread_init(keg);
1791244f4554SBosko Milekic 	} else {
17929b78b1f4SJeff Roberson 		if (keg->uk_size > slab_space(SLAB_MIN_SETSIZE))
1793e20a199fSJeff Roberson 			keg_large_init(keg);
1794244f4554SBosko Milekic 		else
1795e20a199fSJeff Roberson 			keg_small_init(keg);
1796244f4554SBosko Milekic 	}
1797099a0e58SBosko Milekic 
1798cfcae3f8SGleb Smirnoff 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1799099a0e58SBosko Milekic 		keg->uk_slabzone = slabzone;
1800099a0e58SBosko Milekic 
1801099a0e58SBosko Milekic 	/*
1802099a0e58SBosko Milekic 	 * If we haven't booted yet we need allocations to go through the
1803099a0e58SBosko Milekic 	 * startup cache until the vm is ready.
1804099a0e58SBosko Milekic 	 */
1805f4bef67cSGleb Smirnoff 	if (booted < BOOT_PAGEALLOC)
18068cd02d00SAlan Cox 		keg->uk_allocf = startup_alloc;
180777e19437SGleb Smirnoff #ifdef UMA_MD_SMALL_ALLOC
180877e19437SGleb Smirnoff 	else if (keg->uk_ppera == 1)
180977e19437SGleb Smirnoff 		keg->uk_allocf = uma_small_alloc;
18108cd02d00SAlan Cox #endif
1811ab3059a8SMatt Macy 	else if (keg->uk_flags & UMA_ZONE_PCPU)
1812ab3059a8SMatt Macy 		keg->uk_allocf = pcpu_page_alloc;
181377e19437SGleb Smirnoff 	else
181477e19437SGleb Smirnoff 		keg->uk_allocf = page_alloc;
181577e19437SGleb Smirnoff #ifdef UMA_MD_SMALL_ALLOC
181677e19437SGleb Smirnoff 	if (keg->uk_ppera == 1)
181777e19437SGleb Smirnoff 		keg->uk_freef = uma_small_free;
181877e19437SGleb Smirnoff 	else
181977e19437SGleb Smirnoff #endif
1820ab3059a8SMatt Macy 	if (keg->uk_flags & UMA_ZONE_PCPU)
1821ab3059a8SMatt Macy 		keg->uk_freef = pcpu_page_free;
1822ab3059a8SMatt Macy 	else
182377e19437SGleb Smirnoff 		keg->uk_freef = page_free;
1824099a0e58SBosko Milekic 
1825099a0e58SBosko Milekic 	/*
1826af526374SJeff Roberson 	 * Initialize keg's lock
1827099a0e58SBosko Milekic 	 */
1828af526374SJeff Roberson 	KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
1829099a0e58SBosko Milekic 
1830099a0e58SBosko Milekic 	/*
1831099a0e58SBosko Milekic 	 * If we're putting the slab header in the actual page we need to
18329b78b1f4SJeff Roberson 	 * figure out where in each page it goes.  See slab_sizeof
18339b78b1f4SJeff Roberson 	 * definition.
1834099a0e58SBosko Milekic 	 */
1835099a0e58SBosko Milekic 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
18369b78b1f4SJeff Roberson 		size_t shsize;
18379b78b1f4SJeff Roberson 
18389b78b1f4SJeff Roberson 		shsize = slab_sizeof(keg->uk_ipers);
18399b78b1f4SJeff Roberson 		keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - shsize;
1840244f4554SBosko Milekic 		/*
1841244f4554SBosko Milekic 		 * The only way the following is possible is if with our
1842244f4554SBosko Milekic 		 * UMA_ALIGN_PTR adjustments we are now bigger than
1843244f4554SBosko Milekic 		 * UMA_SLAB_SIZE.  I haven't checked whether this is
1844244f4554SBosko Milekic 		 * mathematically possible for all cases, so we make
1845244f4554SBosko Milekic 		 * sure here anyway.
1846244f4554SBosko Milekic 		 */
18479b78b1f4SJeff Roberson 		KASSERT(keg->uk_pgoff + shsize <= PAGE_SIZE * keg->uk_ppera,
18483d5e3df7SGleb Smirnoff 		    ("zone %s ipers %d rsize %d size %d slab won't fit",
18493d5e3df7SGleb Smirnoff 		    zone->uz_name, keg->uk_ipers, keg->uk_rsize, keg->uk_size));
1850099a0e58SBosko Milekic 	}
1851099a0e58SBosko Milekic 
1852099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_HASH)
18533b2f2cb8SAlexander Motin 		hash_alloc(&keg->uk_hash, 0);
1854099a0e58SBosko Milekic 
18551431a748SGleb Smirnoff 	CTR5(KTR_UMA, "keg_ctor %p zone %s(%p) out %d free %d\n",
18561431a748SGleb Smirnoff 	    keg, zone->uz_name, zone,
185757223e99SAndriy Gapon 	    (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
185857223e99SAndriy Gapon 	    keg->uk_free);
1859099a0e58SBosko Milekic 
1860099a0e58SBosko Milekic 	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1861099a0e58SBosko Milekic 
1862111fbcd5SBryan Venteicher 	rw_wlock(&uma_rwlock);
1863099a0e58SBosko Milekic 	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1864111fbcd5SBryan Venteicher 	rw_wunlock(&uma_rwlock);
1865b23f72e9SBrian Feldman 	return (0);
1866099a0e58SBosko Milekic }
1867099a0e58SBosko Milekic 
18682efcc8cbSGleb Smirnoff static void
186920a4e154SJeff Roberson zone_alloc_counters(uma_zone_t zone, void *unused)
18702efcc8cbSGleb Smirnoff {
18712efcc8cbSGleb Smirnoff 
18722efcc8cbSGleb Smirnoff 	zone->uz_allocs = counter_u64_alloc(M_WAITOK);
18732efcc8cbSGleb Smirnoff 	zone->uz_frees = counter_u64_alloc(M_WAITOK);
18742efcc8cbSGleb Smirnoff 	zone->uz_fails = counter_u64_alloc(M_WAITOK);
18752efcc8cbSGleb Smirnoff }
18762efcc8cbSGleb Smirnoff 
187720a4e154SJeff Roberson static void
187820a4e154SJeff Roberson zone_alloc_sysctl(uma_zone_t zone, void *unused)
187920a4e154SJeff Roberson {
188020a4e154SJeff Roberson 	uma_zone_domain_t zdom;
188120a4e154SJeff Roberson 	uma_keg_t keg;
188220a4e154SJeff Roberson 	struct sysctl_oid *oid, *domainoid;
18833b490537SJeff Roberson 	int domains, i, cnt;
188420a4e154SJeff Roberson 	static const char *nokeg = "cache zone";
188520a4e154SJeff Roberson 	char *c;
188620a4e154SJeff Roberson 
188720a4e154SJeff Roberson 	/*
188820a4e154SJeff Roberson 	 * Make a sysctl safe copy of the zone name by removing
188920a4e154SJeff Roberson 	 * any special characters and handling dups by appending
189020a4e154SJeff Roberson 	 * an index.
189120a4e154SJeff Roberson 	 */
189220a4e154SJeff Roberson 	if (zone->uz_namecnt != 0) {
18933b490537SJeff Roberson 		/* Count the number of decimal digits and '_' separator. */
18943b490537SJeff Roberson 		for (i = 1, cnt = zone->uz_namecnt; cnt != 0; i++)
18953b490537SJeff Roberson 			cnt /= 10;
18963b490537SJeff Roberson 		zone->uz_ctlname = malloc(strlen(zone->uz_name) + i + 1,
18973b490537SJeff Roberson 		    M_UMA, M_WAITOK);
189820a4e154SJeff Roberson 		sprintf(zone->uz_ctlname, "%s_%d", zone->uz_name,
189920a4e154SJeff Roberson 		    zone->uz_namecnt);
190020a4e154SJeff Roberson 	} else
190120a4e154SJeff Roberson 		zone->uz_ctlname = strdup(zone->uz_name, M_UMA);
190220a4e154SJeff Roberson 	for (c = zone->uz_ctlname; *c != '\0'; c++)
190320a4e154SJeff Roberson 		if (strchr("./\\ -", *c) != NULL)
190420a4e154SJeff Roberson 			*c = '_';
190520a4e154SJeff Roberson 
190620a4e154SJeff Roberson 	/*
190720a4e154SJeff Roberson 	 * Basic parameters at the root.
190820a4e154SJeff Roberson 	 */
190920a4e154SJeff Roberson 	zone->uz_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_vm_uma),
191020a4e154SJeff Roberson 	    OID_AUTO, zone->uz_ctlname, CTLFLAG_RD, NULL, "");
191120a4e154SJeff Roberson 	oid = zone->uz_oid;
191220a4e154SJeff Roberson 	SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
191320a4e154SJeff Roberson 	    "size", CTLFLAG_RD, &zone->uz_size, 0, "Allocation size");
19146d204a6aSRyan Libby 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
19156d204a6aSRyan Libby 	    "flags", CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_MPSAFE,
19166d204a6aSRyan Libby 	    zone, 0, sysctl_handle_uma_zone_flags, "A",
191720a4e154SJeff Roberson 	    "Allocator configuration flags");
191820a4e154SJeff Roberson 	SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
191920a4e154SJeff Roberson 	    "bucket_size", CTLFLAG_RD, &zone->uz_bucket_size, 0,
192020a4e154SJeff Roberson 	    "Desired per-cpu cache size");
192120a4e154SJeff Roberson 	SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
192220a4e154SJeff Roberson 	    "bucket_size_max", CTLFLAG_RD, &zone->uz_bucket_size_max, 0,
192320a4e154SJeff Roberson 	    "Maximum allowed per-cpu cache size");
192420a4e154SJeff Roberson 
192520a4e154SJeff Roberson 	/*
192620a4e154SJeff Roberson 	 * keg if present.
192720a4e154SJeff Roberson 	 */
192820a4e154SJeff Roberson 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
192920a4e154SJeff Roberson 	    "keg", CTLFLAG_RD, NULL, "");
193020a4e154SJeff Roberson 	keg = zone->uz_keg;
19313b490537SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0) {
193220a4e154SJeff Roberson 		SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
193320a4e154SJeff Roberson 		    "name", CTLFLAG_RD, keg->uk_name, "Keg name");
193420a4e154SJeff Roberson 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
193520a4e154SJeff Roberson 		    "rsize", CTLFLAG_RD, &keg->uk_rsize, 0,
193620a4e154SJeff Roberson 		    "Real object size with alignment");
193720a4e154SJeff Roberson 		SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
193820a4e154SJeff Roberson 		    "ppera", CTLFLAG_RD, &keg->uk_ppera, 0,
193920a4e154SJeff Roberson 		    "pages per-slab allocation");
194020a4e154SJeff Roberson 		SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
194120a4e154SJeff Roberson 		    "ipers", CTLFLAG_RD, &keg->uk_ipers, 0,
194220a4e154SJeff Roberson 		    "items available per-slab");
194320a4e154SJeff Roberson 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
194420a4e154SJeff Roberson 		    "align", CTLFLAG_RD, &keg->uk_align, 0,
194520a4e154SJeff Roberson 		    "item alignment mask");
194620a4e154SJeff Roberson 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
194720a4e154SJeff Roberson 		    "pages", CTLFLAG_RD, &keg->uk_pages, 0,
194820a4e154SJeff Roberson 		    "Total pages currently allocated from VM");
194920a4e154SJeff Roberson 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
195020a4e154SJeff Roberson 		    "free", CTLFLAG_RD, &keg->uk_free, 0,
195120a4e154SJeff Roberson 		    "items free in the slab layer");
1952f7af5015SRyan Libby 		SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1953f7af5015SRyan Libby 		    "efficiency", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE,
1954f7af5015SRyan Libby 		    keg, 0, sysctl_handle_uma_slab_efficiency, "I",
1955f7af5015SRyan Libby 		    "Slab utilization (100 - internal fragmentation %)");
195620a4e154SJeff Roberson 	} else
195720a4e154SJeff Roberson 		SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
195820a4e154SJeff Roberson 		    "name", CTLFLAG_RD, nokeg, "Keg name");
195920a4e154SJeff Roberson 
196020a4e154SJeff Roberson 	/*
196120a4e154SJeff Roberson 	 * Information about zone limits.
196220a4e154SJeff Roberson 	 */
196320a4e154SJeff Roberson 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
196420a4e154SJeff Roberson 	    "limit", CTLFLAG_RD, NULL, "");
196520a4e154SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
196620a4e154SJeff Roberson 	    "items", CTLFLAG_RD, &zone->uz_items, 0,
196720a4e154SJeff Roberson 	    "current number of cached items");
196820a4e154SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
196920a4e154SJeff Roberson 	    "max_items", CTLFLAG_RD, &zone->uz_max_items, 0,
197020a4e154SJeff Roberson 	    "Maximum number of cached items");
197120a4e154SJeff Roberson 	SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
197220a4e154SJeff Roberson 	    "sleepers", CTLFLAG_RD, &zone->uz_sleepers, 0,
197320a4e154SJeff Roberson 	    "Number of threads sleeping at limit");
197420a4e154SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
197520a4e154SJeff Roberson 	    "sleeps", CTLFLAG_RD, &zone->uz_sleeps, 0,
197620a4e154SJeff Roberson 	    "Total zone limit sleeps");
197720a4e154SJeff Roberson 
197820a4e154SJeff Roberson 	/*
197920a4e154SJeff Roberson 	 * Per-domain information.
198020a4e154SJeff Roberson 	 */
198120a4e154SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
198220a4e154SJeff Roberson 		domains = vm_ndomains;
198320a4e154SJeff Roberson 	else
198420a4e154SJeff Roberson 		domains = 1;
198520a4e154SJeff Roberson 	domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid),
198620a4e154SJeff Roberson 	    OID_AUTO, "domain", CTLFLAG_RD, NULL, "");
198720a4e154SJeff Roberson 	for (i = 0; i < domains; i++) {
198820a4e154SJeff Roberson 		zdom = &zone->uz_domain[i];
198920a4e154SJeff Roberson 		oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
199020a4e154SJeff Roberson 		    OID_AUTO, VM_DOMAIN(i)->vmd_name, CTLFLAG_RD, NULL, "");
199120a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
199220a4e154SJeff Roberson 		    "nitems", CTLFLAG_RD, &zdom->uzd_nitems,
199320a4e154SJeff Roberson 		    "number of items in this domain");
199420a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
199520a4e154SJeff Roberson 		    "imax", CTLFLAG_RD, &zdom->uzd_imax,
199620a4e154SJeff Roberson 		    "maximum item count in this period");
199720a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
199820a4e154SJeff Roberson 		    "imin", CTLFLAG_RD, &zdom->uzd_imin,
199920a4e154SJeff Roberson 		    "minimum item count in this period");
200020a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
200120a4e154SJeff Roberson 		    "wss", CTLFLAG_RD, &zdom->uzd_wss,
200220a4e154SJeff Roberson 		    "Working set size");
200320a4e154SJeff Roberson 	}
200420a4e154SJeff Roberson 
200520a4e154SJeff Roberson 	/*
200620a4e154SJeff Roberson 	 * General statistics.
200720a4e154SJeff Roberson 	 */
200820a4e154SJeff Roberson 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
200920a4e154SJeff Roberson 	    "stats", CTLFLAG_RD, NULL, "");
201020a4e154SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
201120a4e154SJeff Roberson 	    "current", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE,
201220a4e154SJeff Roberson 	    zone, 1, sysctl_handle_uma_zone_cur, "I",
201320a4e154SJeff Roberson 	    "Current number of allocated items");
201420a4e154SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
201520a4e154SJeff Roberson 	    "allocs", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
201620a4e154SJeff Roberson 	    zone, 0, sysctl_handle_uma_zone_allocs, "QU",
201720a4e154SJeff Roberson 	    "Total allocation calls");
201820a4e154SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
201920a4e154SJeff Roberson 	    "frees", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
202020a4e154SJeff Roberson 	    zone, 0, sysctl_handle_uma_zone_frees, "QU",
202120a4e154SJeff Roberson 	    "Total free calls");
202220a4e154SJeff Roberson 	SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
202320a4e154SJeff Roberson 	    "fails", CTLFLAG_RD, &zone->uz_fails,
202420a4e154SJeff Roberson 	    "Number of allocation failures");
202520a4e154SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
202620a4e154SJeff Roberson 	    "xdomain", CTLFLAG_RD, &zone->uz_xdomain, 0,
202720a4e154SJeff Roberson 	    "Free calls from the wrong domain");
202820a4e154SJeff Roberson }
202920a4e154SJeff Roberson 
203020a4e154SJeff Roberson struct uma_zone_count {
203120a4e154SJeff Roberson 	const char	*name;
203220a4e154SJeff Roberson 	int		count;
203320a4e154SJeff Roberson };
203420a4e154SJeff Roberson 
203520a4e154SJeff Roberson static void
203620a4e154SJeff Roberson zone_count(uma_zone_t zone, void *arg)
203720a4e154SJeff Roberson {
203820a4e154SJeff Roberson 	struct uma_zone_count *cnt;
203920a4e154SJeff Roberson 
204020a4e154SJeff Roberson 	cnt = arg;
20413b490537SJeff Roberson 	/*
20423b490537SJeff Roberson 	 * Some zones are rapidly created with identical names and
20433b490537SJeff Roberson 	 * destroyed out of order.  This can lead to gaps in the count.
20443b490537SJeff Roberson 	 * Use one greater than the maximum observed for this name.
20453b490537SJeff Roberson 	 */
204620a4e154SJeff Roberson 	if (strcmp(zone->uz_name, cnt->name) == 0)
20473b490537SJeff Roberson 		cnt->count = MAX(cnt->count,
20483b490537SJeff Roberson 		    zone->uz_namecnt + 1);
204920a4e154SJeff Roberson }
205020a4e154SJeff Roberson 
2051099a0e58SBosko Milekic /*
2052099a0e58SBosko Milekic  * Zone header ctor.  This initializes all fields, locks, etc.
2053099a0e58SBosko Milekic  *
2054099a0e58SBosko Milekic  * Arguments/Returns follow uma_ctor specifications
2055099a0e58SBosko Milekic  *	udata  Actually uma_zctor_args
20568355f576SJeff Roberson  */
2057b23f72e9SBrian Feldman static int
2058b23f72e9SBrian Feldman zone_ctor(void *mem, int size, void *udata, int flags)
20598355f576SJeff Roberson {
206020a4e154SJeff Roberson 	struct uma_zone_count cnt;
20618355f576SJeff Roberson 	struct uma_zctor_args *arg = udata;
20628355f576SJeff Roberson 	uma_zone_t zone = mem;
2063099a0e58SBosko Milekic 	uma_zone_t z;
2064099a0e58SBosko Milekic 	uma_keg_t keg;
206508cfa56eSMark Johnston 	int i;
20668355f576SJeff Roberson 
20678355f576SJeff Roberson 	bzero(zone, size);
20688355f576SJeff Roberson 	zone->uz_name = arg->name;
20698355f576SJeff Roberson 	zone->uz_ctor = arg->ctor;
20708355f576SJeff Roberson 	zone->uz_dtor = arg->dtor;
2071099a0e58SBosko Milekic 	zone->uz_init = NULL;
2072099a0e58SBosko Milekic 	zone->uz_fini = NULL;
2073bf965959SSean Bruno 	zone->uz_sleeps = 0;
2074c1685086SJeff Roberson 	zone->uz_xdomain = 0;
207520a4e154SJeff Roberson 	zone->uz_bucket_size = 0;
207620a4e154SJeff Roberson 	zone->uz_bucket_size_min = 0;
207720a4e154SJeff Roberson 	zone->uz_bucket_size_max = BUCKET_MAX;
2078e20a199fSJeff Roberson 	zone->uz_flags = 0;
20792f891cd5SPawel Jakub Dawidek 	zone->uz_warning = NULL;
2080ab3185d1SJeff Roberson 	/* The domain structures follow the cpu structures. */
2081ab3185d1SJeff Roberson 	zone->uz_domain = (struct uma_zone_domain *)&zone->uz_cpu[mp_ncpus];
2082bb15d1c7SGleb Smirnoff 	zone->uz_bkt_max = ULONG_MAX;
20832f891cd5SPawel Jakub Dawidek 	timevalclear(&zone->uz_ratecheck);
2084af526374SJeff Roberson 
208520a4e154SJeff Roberson 	/* Count the number of duplicate names. */
208620a4e154SJeff Roberson 	cnt.name = arg->name;
208720a4e154SJeff Roberson 	cnt.count = 0;
208820a4e154SJeff Roberson 	zone_foreach(zone_count, &cnt);
208920a4e154SJeff Roberson 	zone->uz_namecnt = cnt.count;
20902efcc8cbSGleb Smirnoff 
209108cfa56eSMark Johnston 	for (i = 0; i < vm_ndomains; i++)
209208cfa56eSMark Johnston 		TAILQ_INIT(&zone->uz_domain[i].uzd_buckets);
209308cfa56eSMark Johnston 
2094ca293436SRyan Libby #ifdef INVARIANTS
2095ca293436SRyan Libby 	if (arg->uminit == trash_init && arg->fini == trash_fini)
2096ca293436SRyan Libby 		zone->uz_flags |= UMA_ZFLAG_TRASH;
2097ca293436SRyan Libby #endif
2098ca293436SRyan Libby 
20990095a784SJeff Roberson 	/*
21000095a784SJeff Roberson 	 * This is a pure cache zone, no kegs.
21010095a784SJeff Roberson 	 */
21020095a784SJeff Roberson 	if (arg->import) {
21036fd34d6fSJeff Roberson 		if (arg->flags & UMA_ZONE_VM)
21046fd34d6fSJeff Roberson 			arg->flags |= UMA_ZFLAG_CACHEONLY;
21056fd34d6fSJeff Roberson 		zone->uz_flags = arg->flags;
2106af526374SJeff Roberson 		zone->uz_size = arg->size;
21070095a784SJeff Roberson 		zone->uz_import = arg->import;
21080095a784SJeff Roberson 		zone->uz_release = arg->release;
21090095a784SJeff Roberson 		zone->uz_arg = arg->arg;
2110af526374SJeff Roberson 		zone->uz_lockptr = &zone->uz_lock;
2111bb15d1c7SGleb Smirnoff 		ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
2112111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
211303175483SAlexander Motin 		LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
2114111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
2115af526374SJeff Roberson 		goto out;
21160095a784SJeff Roberson 	}
21170095a784SJeff Roberson 
21180095a784SJeff Roberson 	/*
21190095a784SJeff Roberson 	 * Use the regular zone/keg/slab allocator.
21200095a784SJeff Roberson 	 */
2121b75c4efcSAndrew Turner 	zone->uz_import = zone_import;
2122b75c4efcSAndrew Turner 	zone->uz_release = zone_release;
21230095a784SJeff Roberson 	zone->uz_arg = zone;
2124bb15d1c7SGleb Smirnoff 	keg = arg->keg;
21250095a784SJeff Roberson 
2126099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_SECONDARY) {
212720a4e154SJeff Roberson 		KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
212820a4e154SJeff Roberson 		    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
2129099a0e58SBosko Milekic 		KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
21308355f576SJeff Roberson 		zone->uz_init = arg->uminit;
2131e221e841SJeff Roberson 		zone->uz_fini = arg->fini;
2132af526374SJeff Roberson 		zone->uz_lockptr = &keg->uk_lock;
2133e20a199fSJeff Roberson 		zone->uz_flags |= UMA_ZONE_SECONDARY;
2134111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
2135099a0e58SBosko Milekic 		ZONE_LOCK(zone);
2136099a0e58SBosko Milekic 		LIST_FOREACH(z, &keg->uk_zones, uz_link) {
2137099a0e58SBosko Milekic 			if (LIST_NEXT(z, uz_link) == NULL) {
2138099a0e58SBosko Milekic 				LIST_INSERT_AFTER(z, zone, uz_link);
2139099a0e58SBosko Milekic 				break;
2140099a0e58SBosko Milekic 			}
2141099a0e58SBosko Milekic 		}
2142099a0e58SBosko Milekic 		ZONE_UNLOCK(zone);
2143111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
2144e20a199fSJeff Roberson 	} else if (keg == NULL) {
2145e20a199fSJeff Roberson 		if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
2146e20a199fSJeff Roberson 		    arg->align, arg->flags)) == NULL)
2147b23f72e9SBrian Feldman 			return (ENOMEM);
2148099a0e58SBosko Milekic 	} else {
2149099a0e58SBosko Milekic 		struct uma_kctor_args karg;
2150b23f72e9SBrian Feldman 		int error;
2151099a0e58SBosko Milekic 
2152099a0e58SBosko Milekic 		/* We should only be here from uma_startup() */
2153099a0e58SBosko Milekic 		karg.size = arg->size;
2154099a0e58SBosko Milekic 		karg.uminit = arg->uminit;
2155099a0e58SBosko Milekic 		karg.fini = arg->fini;
2156099a0e58SBosko Milekic 		karg.align = arg->align;
2157099a0e58SBosko Milekic 		karg.flags = arg->flags;
2158099a0e58SBosko Milekic 		karg.zone = zone;
2159b23f72e9SBrian Feldman 		error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
2160b23f72e9SBrian Feldman 		    flags);
2161b23f72e9SBrian Feldman 		if (error)
2162b23f72e9SBrian Feldman 			return (error);
2163099a0e58SBosko Milekic 	}
21640095a784SJeff Roberson 
216520a4e154SJeff Roberson 	/* Inherit properties from the keg. */
2166bb15d1c7SGleb Smirnoff 	zone->uz_keg = keg;
2167e20a199fSJeff Roberson 	zone->uz_size = keg->uk_size;
2168e20a199fSJeff Roberson 	zone->uz_flags |= (keg->uk_flags &
2169e20a199fSJeff Roberson 	    (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
21708355f576SJeff Roberson 
217120a4e154SJeff Roberson out:
217220a4e154SJeff Roberson 	if (__predict_true(booted == BOOT_RUNNING)) {
217320a4e154SJeff Roberson 		zone_alloc_counters(zone, NULL);
217420a4e154SJeff Roberson 		zone_alloc_sysctl(zone, NULL);
217520a4e154SJeff Roberson 	} else {
217620a4e154SJeff Roberson 		zone->uz_allocs = EARLY_COUNTER;
217720a4e154SJeff Roberson 		zone->uz_frees = EARLY_COUNTER;
217820a4e154SJeff Roberson 		zone->uz_fails = EARLY_COUNTER;
2179099a0e58SBosko Milekic 	}
21808355f576SJeff Roberson 
21817e28037aSMark Johnston 	KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) !=
21827e28037aSMark Johnston 	    (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET),
21837e28037aSMark Johnston 	    ("Invalid zone flag combination"));
218420a4e154SJeff Roberson 	if (arg->flags & UMA_ZFLAG_INTERNAL)
218520a4e154SJeff Roberson 		zone->uz_bucket_size_max = zone->uz_bucket_size = 0;
218620a4e154SJeff Roberson 	if ((arg->flags & UMA_ZONE_MAXBUCKET) != 0)
218720a4e154SJeff Roberson 		zone->uz_bucket_size = BUCKET_MAX;
218820a4e154SJeff Roberson 	else if ((arg->flags & UMA_ZONE_MINBUCKET) != 0)
218920a4e154SJeff Roberson 		zone->uz_bucket_size_max = zone->uz_bucket_size = BUCKET_MIN;
219020a4e154SJeff Roberson 	else if ((arg->flags & UMA_ZONE_NOBUCKET) != 0)
219120a4e154SJeff Roberson 		zone->uz_bucket_size = 0;
21927e28037aSMark Johnston 	else
219320a4e154SJeff Roberson 		zone->uz_bucket_size = bucket_select(zone->uz_size);
219420a4e154SJeff Roberson 	zone->uz_bucket_size_min = zone->uz_bucket_size;
2195fc03d22bSJeff Roberson 
2196b23f72e9SBrian Feldman 	return (0);
21978355f576SJeff Roberson }
21988355f576SJeff Roberson 
21998355f576SJeff Roberson /*
2200099a0e58SBosko Milekic  * Keg header dtor.  This frees all data, destroys locks, frees the hash
2201099a0e58SBosko Milekic  * table and removes the keg from the global list.
22029c2cd7e5SJeff Roberson  *
22039c2cd7e5SJeff Roberson  * Arguments/Returns follow uma_dtor specifications
22049c2cd7e5SJeff Roberson  *	udata  unused
22059c2cd7e5SJeff Roberson  */
2206099a0e58SBosko Milekic static void
2207099a0e58SBosko Milekic keg_dtor(void *arg, int size, void *udata)
2208099a0e58SBosko Milekic {
2209099a0e58SBosko Milekic 	uma_keg_t keg;
22109c2cd7e5SJeff Roberson 
2211099a0e58SBosko Milekic 	keg = (uma_keg_t)arg;
2212e20a199fSJeff Roberson 	KEG_LOCK(keg);
2213099a0e58SBosko Milekic 	if (keg->uk_free != 0) {
2214a3845534SCraig Rodrigues 		printf("Freed UMA keg (%s) was not empty (%d items). "
2215099a0e58SBosko Milekic 		    " Lost %d pages of memory.\n",
2216a3845534SCraig Rodrigues 		    keg->uk_name ? keg->uk_name : "",
2217099a0e58SBosko Milekic 		    keg->uk_free, keg->uk_pages);
2218099a0e58SBosko Milekic 	}
2219e20a199fSJeff Roberson 	KEG_UNLOCK(keg);
2220099a0e58SBosko Milekic 
2221099a0e58SBosko Milekic 	hash_free(&keg->uk_hash);
2222099a0e58SBosko Milekic 
2223e20a199fSJeff Roberson 	KEG_LOCK_FINI(keg);
2224099a0e58SBosko Milekic }
2225099a0e58SBosko Milekic 
2226099a0e58SBosko Milekic /*
2227099a0e58SBosko Milekic  * Zone header dtor.
2228099a0e58SBosko Milekic  *
2229099a0e58SBosko Milekic  * Arguments/Returns follow uma_dtor specifications
2230099a0e58SBosko Milekic  *	udata  unused
2231099a0e58SBosko Milekic  */
22329c2cd7e5SJeff Roberson static void
22339c2cd7e5SJeff Roberson zone_dtor(void *arg, int size, void *udata)
22349c2cd7e5SJeff Roberson {
22359c2cd7e5SJeff Roberson 	uma_zone_t zone;
2236099a0e58SBosko Milekic 	uma_keg_t keg;
22379c2cd7e5SJeff Roberson 
22389c2cd7e5SJeff Roberson 	zone = (uma_zone_t)arg;
22399643769aSJeff Roberson 
224020a4e154SJeff Roberson 	sysctl_remove_oid(zone->uz_oid, 1, 1);
224120a4e154SJeff Roberson 
2242e20a199fSJeff Roberson 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
22439643769aSJeff Roberson 		cache_drain(zone);
2244099a0e58SBosko Milekic 
2245111fbcd5SBryan Venteicher 	rw_wlock(&uma_rwlock);
2246099a0e58SBosko Milekic 	LIST_REMOVE(zone, uz_link);
2247111fbcd5SBryan Venteicher 	rw_wunlock(&uma_rwlock);
2248099a0e58SBosko Milekic 	/*
2249099a0e58SBosko Milekic 	 * XXX there are some races here where
2250099a0e58SBosko Milekic 	 * the zone can be drained but zone lock
2251099a0e58SBosko Milekic 	 * released and then refilled before we
2252099a0e58SBosko Milekic 	 * remove it... we dont care for now
2253099a0e58SBosko Milekic 	 */
225408cfa56eSMark Johnston 	zone_reclaim(zone, M_WAITOK, true);
2255e20a199fSJeff Roberson 	/*
2256323ad386STycho Nightingale 	 * We only destroy kegs from non secondary/non cache zones.
2257e20a199fSJeff Roberson 	 */
2258323ad386STycho Nightingale 	if ((zone->uz_flags & (UMA_ZONE_SECONDARY | UMA_ZFLAG_CACHE)) == 0) {
2259323ad386STycho Nightingale 		keg = zone->uz_keg;
2260111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
2261099a0e58SBosko Milekic 		LIST_REMOVE(keg, uk_link);
2262111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
22630095a784SJeff Roberson 		zone_free_item(kegs, keg, NULL, SKIP_NONE);
22649c2cd7e5SJeff Roberson 	}
22652efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_allocs);
22662efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_frees);
22672efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_fails);
226820a4e154SJeff Roberson 	free(zone->uz_ctlname, M_UMA);
2269bb15d1c7SGleb Smirnoff 	if (zone->uz_lockptr == &zone->uz_lock)
2270af526374SJeff Roberson 		ZONE_LOCK_FINI(zone);
2271099a0e58SBosko Milekic }
2272099a0e58SBosko Milekic 
22739c2cd7e5SJeff Roberson /*
22748355f576SJeff Roberson  * Traverses every zone in the system and calls a callback
22758355f576SJeff Roberson  *
22768355f576SJeff Roberson  * Arguments:
22778355f576SJeff Roberson  *	zfunc  A pointer to a function which accepts a zone
22788355f576SJeff Roberson  *		as an argument.
22798355f576SJeff Roberson  *
22808355f576SJeff Roberson  * Returns:
22818355f576SJeff Roberson  *	Nothing
22828355f576SJeff Roberson  */
22838355f576SJeff Roberson static void
228420a4e154SJeff Roberson zone_foreach(void (*zfunc)(uma_zone_t, void *arg), void *arg)
22858355f576SJeff Roberson {
2286099a0e58SBosko Milekic 	uma_keg_t keg;
22878355f576SJeff Roberson 	uma_zone_t zone;
22888355f576SJeff Roberson 
22892efcc8cbSGleb Smirnoff 	/*
22902efcc8cbSGleb Smirnoff 	 * Before BOOT_RUNNING we are guaranteed to be single
22912efcc8cbSGleb Smirnoff 	 * threaded, so locking isn't needed. Startup functions
22922efcc8cbSGleb Smirnoff 	 * are allowed to use M_WAITOK.
22932efcc8cbSGleb Smirnoff 	 */
22942efcc8cbSGleb Smirnoff 	if (__predict_true(booted == BOOT_RUNNING))
2295111fbcd5SBryan Venteicher 		rw_rlock(&uma_rwlock);
2296099a0e58SBosko Milekic 	LIST_FOREACH(keg, &uma_kegs, uk_link) {
2297099a0e58SBosko Milekic 		LIST_FOREACH(zone, &keg->uk_zones, uz_link)
229820a4e154SJeff Roberson 			zfunc(zone, arg);
2299099a0e58SBosko Milekic 	}
230008034d10SKonstantin Belousov 	LIST_FOREACH(zone, &uma_cachezones, uz_link)
230120a4e154SJeff Roberson 		zfunc(zone, arg);
23022efcc8cbSGleb Smirnoff 	if (__predict_true(booted == BOOT_RUNNING))
2303111fbcd5SBryan Venteicher 		rw_runlock(&uma_rwlock);
23048355f576SJeff Roberson }
23058355f576SJeff Roberson 
2306f4bef67cSGleb Smirnoff /*
2307f4bef67cSGleb Smirnoff  * Count how many pages do we need to bootstrap.  VM supplies
2308f4bef67cSGleb Smirnoff  * its need in early zones in the argument, we add up our zones,
2309325c4cedSMark Johnston  * which consist of the UMA Slabs, UMA Hash and 9 Bucket zones.  The
2310f4bef67cSGleb Smirnoff  * zone of zones and zone of kegs are accounted separately.
2311f4bef67cSGleb Smirnoff  */
2312325c4cedSMark Johnston #define	UMA_BOOT_ZONES	11
23135073a083SGleb Smirnoff /* Zone of zones and zone of kegs have arbitrary alignment. */
23145073a083SGleb Smirnoff #define	UMA_BOOT_ALIGN	32
2315f4bef67cSGleb Smirnoff static int zsize, ksize;
2316f4bef67cSGleb Smirnoff int
2317f7d35785SGleb Smirnoff uma_startup_count(int vm_zones)
2318f4bef67cSGleb Smirnoff {
2319f7d35785SGleb Smirnoff 	int zones, pages;
23209b78b1f4SJeff Roberson 	size_t space, size;
2321f4bef67cSGleb Smirnoff 
2322f4bef67cSGleb Smirnoff 	ksize = sizeof(struct uma_keg) +
2323f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_domain) * vm_ndomains);
2324f4bef67cSGleb Smirnoff 	zsize = sizeof(struct uma_zone) +
2325f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_cache) * (mp_maxid + 1)) +
2326f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_zone_domain) * vm_ndomains);
2327f4bef67cSGleb Smirnoff 
23285073a083SGleb Smirnoff 	/*
23295073a083SGleb Smirnoff 	 * Memory for the zone of kegs and its keg,
23305073a083SGleb Smirnoff 	 * and for zone of zones.
23315073a083SGleb Smirnoff 	 */
2332f4bef67cSGleb Smirnoff 	pages = howmany(roundup(zsize, CACHE_LINE_SIZE) * 2 +
2333f4bef67cSGleb Smirnoff 	    roundup(ksize, CACHE_LINE_SIZE), PAGE_SIZE);
2334f4bef67cSGleb Smirnoff 
2335f7d35785SGleb Smirnoff #ifdef	UMA_MD_SMALL_ALLOC
2336f7d35785SGleb Smirnoff 	zones = UMA_BOOT_ZONES;
2337f7d35785SGleb Smirnoff #else
2338f7d35785SGleb Smirnoff 	zones = UMA_BOOT_ZONES + vm_zones;
2339f7d35785SGleb Smirnoff 	vm_zones = 0;
2340f7d35785SGleb Smirnoff #endif
23419b78b1f4SJeff Roberson 	size = slab_sizeof(SLAB_MAX_SETSIZE);
23429b78b1f4SJeff Roberson 	space = slab_space(SLAB_MAX_SETSIZE);
2343f4bef67cSGleb Smirnoff 
23445073a083SGleb Smirnoff 	/* Memory for the rest of startup zones, UMA and VM, ... */
23459b78b1f4SJeff Roberson 	if (zsize > space) {
23460b2e3aeaSGleb Smirnoff 		/* See keg_large_init(). */
23470b2e3aeaSGleb Smirnoff 		u_int ppera;
23480b2e3aeaSGleb Smirnoff 
23490b2e3aeaSGleb Smirnoff 		ppera = howmany(roundup2(zsize, UMA_BOOT_ALIGN), PAGE_SIZE);
23509b78b1f4SJeff Roberson 		if (PAGE_SIZE * ppera - roundup2(zsize, UMA_BOOT_ALIGN) < size)
23510b2e3aeaSGleb Smirnoff 			ppera++;
23520b2e3aeaSGleb Smirnoff 		pages += (zones + vm_zones) * ppera;
23539b78b1f4SJeff Roberson 	} else if (roundup2(zsize, UMA_BOOT_ALIGN) > space)
23540b2e3aeaSGleb Smirnoff 		/* See keg_small_init() special case for uk_ppera = 1. */
235596a10340SGleb Smirnoff 		pages += zones;
2356f4bef67cSGleb Smirnoff 	else
23575073a083SGleb Smirnoff 		pages += howmany(zones,
23589b78b1f4SJeff Roberson 		    space / roundup2(zsize, UMA_BOOT_ALIGN));
2359f4bef67cSGleb Smirnoff 
23605073a083SGleb Smirnoff 	/* ... and their kegs. Note that zone of zones allocates a keg! */
23615073a083SGleb Smirnoff 	pages += howmany(zones + 1,
23629b78b1f4SJeff Roberson 	    space / roundup2(ksize, UMA_BOOT_ALIGN));
2363f4bef67cSGleb Smirnoff 
2364f4bef67cSGleb Smirnoff 	return (pages);
2365f4bef67cSGleb Smirnoff }
2366f4bef67cSGleb Smirnoff 
23678355f576SJeff Roberson void
2368ac0a6fd0SGleb Smirnoff uma_startup(void *mem, int npages)
23698355f576SJeff Roberson {
23708355f576SJeff Roberson 	struct uma_zctor_args args;
2371ab3185d1SJeff Roberson 	uma_keg_t masterkeg;
2372ab3185d1SJeff Roberson 	uintptr_t m;
2373f4bef67cSGleb Smirnoff 
2374f4bef67cSGleb Smirnoff #ifdef DIAGNOSTIC
2375f4bef67cSGleb Smirnoff 	printf("Entering %s with %d boot pages configured\n", __func__, npages);
2376f4bef67cSGleb Smirnoff #endif
23778355f576SJeff Roberson 
2378111fbcd5SBryan Venteicher 	rw_init(&uma_rwlock, "UMA lock");
2379099a0e58SBosko Milekic 
2380ab3185d1SJeff Roberson 	/* Use bootpages memory for the zone of zones and zone of kegs. */
2381ab3185d1SJeff Roberson 	m = (uintptr_t)mem;
2382ab3185d1SJeff Roberson 	zones = (uma_zone_t)m;
2383ab3185d1SJeff Roberson 	m += roundup(zsize, CACHE_LINE_SIZE);
2384ab3185d1SJeff Roberson 	kegs = (uma_zone_t)m;
2385ab3185d1SJeff Roberson 	m += roundup(zsize, CACHE_LINE_SIZE);
2386ab3185d1SJeff Roberson 	masterkeg = (uma_keg_t)m;
2387ab3185d1SJeff Roberson 	m += roundup(ksize, CACHE_LINE_SIZE);
2388ab3185d1SJeff Roberson 	m = roundup(m, PAGE_SIZE);
2389ab3185d1SJeff Roberson 	npages -= (m - (uintptr_t)mem) / PAGE_SIZE;
2390ab3185d1SJeff Roberson 	mem = (void *)m;
2391ab3185d1SJeff Roberson 
2392099a0e58SBosko Milekic 	/* "manually" create the initial zone */
23930095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
2394099a0e58SBosko Milekic 	args.name = "UMA Kegs";
2395ab3185d1SJeff Roberson 	args.size = ksize;
2396099a0e58SBosko Milekic 	args.ctor = keg_ctor;
2397099a0e58SBosko Milekic 	args.dtor = keg_dtor;
23988355f576SJeff Roberson 	args.uminit = zero_init;
23998355f576SJeff Roberson 	args.fini = NULL;
2400ab3185d1SJeff Roberson 	args.keg = masterkeg;
24015073a083SGleb Smirnoff 	args.align = UMA_BOOT_ALIGN - 1;
2402b60f5b79SJeff Roberson 	args.flags = UMA_ZFLAG_INTERNAL;
2403ab3185d1SJeff Roberson 	zone_ctor(kegs, zsize, &args, M_WAITOK);
24048355f576SJeff Roberson 
2405ac0a6fd0SGleb Smirnoff 	bootmem = mem;
2406ac0a6fd0SGleb Smirnoff 	boot_pages = npages;
24078355f576SJeff Roberson 
2408099a0e58SBosko Milekic 	args.name = "UMA Zones";
2409f4bef67cSGleb Smirnoff 	args.size = zsize;
2410099a0e58SBosko Milekic 	args.ctor = zone_ctor;
2411099a0e58SBosko Milekic 	args.dtor = zone_dtor;
2412099a0e58SBosko Milekic 	args.uminit = zero_init;
2413099a0e58SBosko Milekic 	args.fini = NULL;
2414099a0e58SBosko Milekic 	args.keg = NULL;
24155073a083SGleb Smirnoff 	args.align = UMA_BOOT_ALIGN - 1;
2416099a0e58SBosko Milekic 	args.flags = UMA_ZFLAG_INTERNAL;
2417ab3185d1SJeff Roberson 	zone_ctor(zones, zsize, &args, M_WAITOK);
2418099a0e58SBosko Milekic 
24198355f576SJeff Roberson 	/* Now make a zone for slab headers */
24201e0701e1SJeff Roberson 	slabzone = uma_zcreate("UMA Slabs", sizeof(struct uma_hash_slab),
24211e0701e1SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
24228355f576SJeff Roberson 
24238355f576SJeff Roberson 	hashzone = uma_zcreate("UMA Hash",
24248355f576SJeff Roberson 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
24251e0701e1SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
24268355f576SJeff Roberson 
2427f4bef67cSGleb Smirnoff 	booted = BOOT_STRAPPED;
24288355f576SJeff Roberson }
24298355f576SJeff Roberson 
2430f4bef67cSGleb Smirnoff void
2431f4bef67cSGleb Smirnoff uma_startup1(void)
2432f4bef67cSGleb Smirnoff {
2433f4bef67cSGleb Smirnoff 
2434f4bef67cSGleb Smirnoff #ifdef DIAGNOSTIC
2435f4bef67cSGleb Smirnoff 	printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
2436f4bef67cSGleb Smirnoff #endif
2437f4bef67cSGleb Smirnoff 	booted = BOOT_PAGEALLOC;
2438f4bef67cSGleb Smirnoff }
2439f4bef67cSGleb Smirnoff 
24408355f576SJeff Roberson void
244199571dc3SJeff Roberson uma_startup2(void)
24428355f576SJeff Roberson {
2443f4bef67cSGleb Smirnoff 
2444f7d35785SGleb Smirnoff #ifdef DIAGNOSTIC
2445f7d35785SGleb Smirnoff 	printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
2446f7d35785SGleb Smirnoff #endif
244708cfa56eSMark Johnston 	sx_init(&uma_reclaim_lock, "umareclaim");
24483182660aSRyan Libby 	bucket_init();
24493182660aSRyan Libby 	booted = BOOT_BUCKETS;
2450f4bef67cSGleb Smirnoff 	bucket_enable();
24518355f576SJeff Roberson }
24528355f576SJeff Roberson 
24538355f576SJeff Roberson /*
24548355f576SJeff Roberson  * Initialize our callout handle
24558355f576SJeff Roberson  *
24568355f576SJeff Roberson  */
24578355f576SJeff Roberson static void
24588355f576SJeff Roberson uma_startup3(void)
24598355f576SJeff Roberson {
24601431a748SGleb Smirnoff 
2461c5deaf04SGleb Smirnoff #ifdef INVARIANTS
2462c5deaf04SGleb Smirnoff 	TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor);
2463c5deaf04SGleb Smirnoff 	uma_dbg_cnt = counter_u64_alloc(M_WAITOK);
2464c5deaf04SGleb Smirnoff 	uma_skip_cnt = counter_u64_alloc(M_WAITOK);
2465c5deaf04SGleb Smirnoff #endif
246620a4e154SJeff Roberson 	zone_foreach(zone_alloc_counters, NULL);
246720a4e154SJeff Roberson 	zone_foreach(zone_alloc_sysctl, NULL);
2468fd90e2edSJung-uk Kim 	callout_init(&uma_callout, 1);
24699643769aSJeff Roberson 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
2470c5deaf04SGleb Smirnoff 	booted = BOOT_RUNNING;
24718355f576SJeff Roberson }
24728355f576SJeff Roberson 
2473e20a199fSJeff Roberson static uma_keg_t
2474099a0e58SBosko Milekic uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
247585dcf349SGleb Smirnoff 		int align, uint32_t flags)
2476099a0e58SBosko Milekic {
2477099a0e58SBosko Milekic 	struct uma_kctor_args args;
2478099a0e58SBosko Milekic 
2479099a0e58SBosko Milekic 	args.size = size;
2480099a0e58SBosko Milekic 	args.uminit = uminit;
2481099a0e58SBosko Milekic 	args.fini = fini;
24821e319f6dSRobert Watson 	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
2483099a0e58SBosko Milekic 	args.flags = flags;
2484099a0e58SBosko Milekic 	args.zone = zone;
2485ab3185d1SJeff Roberson 	return (zone_alloc_item(kegs, &args, UMA_ANYDOMAIN, M_WAITOK));
2486099a0e58SBosko Milekic }
2487099a0e58SBosko Milekic 
2488f4bef67cSGleb Smirnoff /* Public functions */
24898355f576SJeff Roberson /* See uma.h */
24901e319f6dSRobert Watson void
24911e319f6dSRobert Watson uma_set_align(int align)
24921e319f6dSRobert Watson {
24931e319f6dSRobert Watson 
24941e319f6dSRobert Watson 	if (align != UMA_ALIGN_CACHE)
24951e319f6dSRobert Watson 		uma_align_cache = align;
24961e319f6dSRobert Watson }
24971e319f6dSRobert Watson 
24981e319f6dSRobert Watson /* See uma.h */
24998355f576SJeff Roberson uma_zone_t
2500bb196eb4SMatthew D Fleming uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
250185dcf349SGleb Smirnoff 		uma_init uminit, uma_fini fini, int align, uint32_t flags)
25028355f576SJeff Roberson 
25038355f576SJeff Roberson {
25048355f576SJeff Roberson 	struct uma_zctor_args args;
250595c4bf75SKonstantin Belousov 	uma_zone_t res;
250695c4bf75SKonstantin Belousov 	bool locked;
25078355f576SJeff Roberson 
2508a5a35578SJohn Baldwin 	KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"",
2509a5a35578SJohn Baldwin 	    align, name));
2510a5a35578SJohn Baldwin 
2511c1685086SJeff Roberson 	/* Sets all zones to a first-touch domain policy. */
2512c1685086SJeff Roberson #ifdef UMA_FIRSTTOUCH
2513c1685086SJeff Roberson 	flags |= UMA_ZONE_NUMA;
2514c1685086SJeff Roberson #endif
2515c1685086SJeff Roberson 
25168355f576SJeff Roberson 	/* This stuff is essential for the zone ctor */
25170095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
25188355f576SJeff Roberson 	args.name = name;
25198355f576SJeff Roberson 	args.size = size;
25208355f576SJeff Roberson 	args.ctor = ctor;
25218355f576SJeff Roberson 	args.dtor = dtor;
25228355f576SJeff Roberson 	args.uminit = uminit;
25238355f576SJeff Roberson 	args.fini = fini;
2524afc6dc36SJohn-Mark Gurney #ifdef  INVARIANTS
2525afc6dc36SJohn-Mark Gurney 	/*
2526ca293436SRyan Libby 	 * Inject procedures which check for memory use after free if we are
2527ca293436SRyan Libby 	 * allowed to scramble the memory while it is not allocated.  This
2528ca293436SRyan Libby 	 * requires that: UMA is actually able to access the memory, no init
2529ca293436SRyan Libby 	 * or fini procedures, no dependency on the initial value of the
2530ca293436SRyan Libby 	 * memory, and no (legitimate) use of the memory after free.  Note,
2531ca293436SRyan Libby 	 * the ctor and dtor do not need to be empty.
2532ca293436SRyan Libby 	 *
2533ca293436SRyan Libby 	 * XXX UMA_ZONE_OFFPAGE.
2534afc6dc36SJohn-Mark Gurney 	 */
253519c591bfSMateusz Guzik 	if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) &&
2536ca293436SRyan Libby 	    uminit == NULL && fini == NULL) {
2537afc6dc36SJohn-Mark Gurney 		args.uminit = trash_init;
2538afc6dc36SJohn-Mark Gurney 		args.fini = trash_fini;
2539afc6dc36SJohn-Mark Gurney 	}
2540afc6dc36SJohn-Mark Gurney #endif
25418355f576SJeff Roberson 	args.align = align;
25428355f576SJeff Roberson 	args.flags = flags;
2543099a0e58SBosko Milekic 	args.keg = NULL;
2544099a0e58SBosko Milekic 
2545f4bef67cSGleb Smirnoff 	if (booted < BOOT_BUCKETS) {
254695c4bf75SKonstantin Belousov 		locked = false;
254795c4bf75SKonstantin Belousov 	} else {
254808cfa56eSMark Johnston 		sx_slock(&uma_reclaim_lock);
254995c4bf75SKonstantin Belousov 		locked = true;
255095c4bf75SKonstantin Belousov 	}
2551ab3185d1SJeff Roberson 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
255295c4bf75SKonstantin Belousov 	if (locked)
255308cfa56eSMark Johnston 		sx_sunlock(&uma_reclaim_lock);
255495c4bf75SKonstantin Belousov 	return (res);
2555099a0e58SBosko Milekic }
2556099a0e58SBosko Milekic 
2557099a0e58SBosko Milekic /* See uma.h */
2558099a0e58SBosko Milekic uma_zone_t
2559099a0e58SBosko Milekic uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
2560099a0e58SBosko Milekic 		    uma_init zinit, uma_fini zfini, uma_zone_t master)
2561099a0e58SBosko Milekic {
2562099a0e58SBosko Milekic 	struct uma_zctor_args args;
2563e20a199fSJeff Roberson 	uma_keg_t keg;
256495c4bf75SKonstantin Belousov 	uma_zone_t res;
256595c4bf75SKonstantin Belousov 	bool locked;
2566099a0e58SBosko Milekic 
2567bb15d1c7SGleb Smirnoff 	keg = master->uz_keg;
25680095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
2569099a0e58SBosko Milekic 	args.name = name;
2570e20a199fSJeff Roberson 	args.size = keg->uk_size;
2571099a0e58SBosko Milekic 	args.ctor = ctor;
2572099a0e58SBosko Milekic 	args.dtor = dtor;
2573099a0e58SBosko Milekic 	args.uminit = zinit;
2574099a0e58SBosko Milekic 	args.fini = zfini;
2575e20a199fSJeff Roberson 	args.align = keg->uk_align;
2576e20a199fSJeff Roberson 	args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
2577e20a199fSJeff Roberson 	args.keg = keg;
25788355f576SJeff Roberson 
2579f4bef67cSGleb Smirnoff 	if (booted < BOOT_BUCKETS) {
258095c4bf75SKonstantin Belousov 		locked = false;
258195c4bf75SKonstantin Belousov 	} else {
258208cfa56eSMark Johnston 		sx_slock(&uma_reclaim_lock);
258395c4bf75SKonstantin Belousov 		locked = true;
258495c4bf75SKonstantin Belousov 	}
2585e20a199fSJeff Roberson 	/* XXX Attaches only one keg of potentially many. */
2586ab3185d1SJeff Roberson 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
258795c4bf75SKonstantin Belousov 	if (locked)
258808cfa56eSMark Johnston 		sx_sunlock(&uma_reclaim_lock);
258995c4bf75SKonstantin Belousov 	return (res);
25908355f576SJeff Roberson }
25918355f576SJeff Roberson 
25920095a784SJeff Roberson /* See uma.h */
25930095a784SJeff Roberson uma_zone_t
2594af526374SJeff Roberson uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
2595af526374SJeff Roberson 		    uma_init zinit, uma_fini zfini, uma_import zimport,
2596af526374SJeff Roberson 		    uma_release zrelease, void *arg, int flags)
25970095a784SJeff Roberson {
25980095a784SJeff Roberson 	struct uma_zctor_args args;
25990095a784SJeff Roberson 
26000095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
26010095a784SJeff Roberson 	args.name = name;
2602af526374SJeff Roberson 	args.size = size;
26030095a784SJeff Roberson 	args.ctor = ctor;
26040095a784SJeff Roberson 	args.dtor = dtor;
26050095a784SJeff Roberson 	args.uminit = zinit;
26060095a784SJeff Roberson 	args.fini = zfini;
26070095a784SJeff Roberson 	args.import = zimport;
26080095a784SJeff Roberson 	args.release = zrelease;
26090095a784SJeff Roberson 	args.arg = arg;
26100095a784SJeff Roberson 	args.align = 0;
2611bb15d1c7SGleb Smirnoff 	args.flags = flags | UMA_ZFLAG_CACHE;
26120095a784SJeff Roberson 
2613ab3185d1SJeff Roberson 	return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
26140095a784SJeff Roberson }
26150095a784SJeff Roberson 
26168355f576SJeff Roberson /* See uma.h */
26179c2cd7e5SJeff Roberson void
26189c2cd7e5SJeff Roberson uma_zdestroy(uma_zone_t zone)
26199c2cd7e5SJeff Roberson {
2620f4ff923bSRobert Watson 
262108cfa56eSMark Johnston 	sx_slock(&uma_reclaim_lock);
26220095a784SJeff Roberson 	zone_free_item(zones, zone, NULL, SKIP_NONE);
262308cfa56eSMark Johnston 	sx_sunlock(&uma_reclaim_lock);
26249c2cd7e5SJeff Roberson }
26259c2cd7e5SJeff Roberson 
26268d6fbbb8SJeff Roberson void
26278d6fbbb8SJeff Roberson uma_zwait(uma_zone_t zone)
26288d6fbbb8SJeff Roberson {
26298d6fbbb8SJeff Roberson 	void *item;
26308d6fbbb8SJeff Roberson 
26318d6fbbb8SJeff Roberson 	item = uma_zalloc_arg(zone, NULL, M_WAITOK);
26328d6fbbb8SJeff Roberson 	uma_zfree(zone, item);
26338d6fbbb8SJeff Roberson }
26348d6fbbb8SJeff Roberson 
26354e180881SMateusz Guzik void *
26364e180881SMateusz Guzik uma_zalloc_pcpu_arg(uma_zone_t zone, void *udata, int flags)
26374e180881SMateusz Guzik {
26384e180881SMateusz Guzik 	void *item;
2639b4799947SRuslan Bukin #ifdef SMP
26404e180881SMateusz Guzik 	int i;
26414e180881SMateusz Guzik 
26424e180881SMateusz Guzik 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
2643b4799947SRuslan Bukin #endif
26444e180881SMateusz Guzik 	item = uma_zalloc_arg(zone, udata, flags & ~M_ZERO);
26454e180881SMateusz Guzik 	if (item != NULL && (flags & M_ZERO)) {
2646b4799947SRuslan Bukin #ifdef SMP
2647013072f0SMark Johnston 		for (i = 0; i <= mp_maxid; i++)
26484e180881SMateusz Guzik 			bzero(zpcpu_get_cpu(item, i), zone->uz_size);
2649b4799947SRuslan Bukin #else
2650b4799947SRuslan Bukin 		bzero(item, zone->uz_size);
2651b4799947SRuslan Bukin #endif
26524e180881SMateusz Guzik 	}
26534e180881SMateusz Guzik 	return (item);
26544e180881SMateusz Guzik }
26554e180881SMateusz Guzik 
26564e180881SMateusz Guzik /*
26574e180881SMateusz Guzik  * A stub while both regular and pcpu cases are identical.
26584e180881SMateusz Guzik  */
26594e180881SMateusz Guzik void
26604e180881SMateusz Guzik uma_zfree_pcpu_arg(uma_zone_t zone, void *item, void *udata)
26614e180881SMateusz Guzik {
26624e180881SMateusz Guzik 
2663c5b7751fSIan Lepore #ifdef SMP
26644e180881SMateusz Guzik 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
2665c5b7751fSIan Lepore #endif
26664e180881SMateusz Guzik 	uma_zfree_arg(zone, item, udata);
26674e180881SMateusz Guzik }
26684e180881SMateusz Guzik 
2669beb8beefSJeff Roberson static inline void *
2670beb8beefSJeff Roberson bucket_pop(uma_zone_t zone, uma_cache_t cache, uma_bucket_t bucket)
2671beb8beefSJeff Roberson {
2672beb8beefSJeff Roberson 	void *item;
2673beb8beefSJeff Roberson 
2674beb8beefSJeff Roberson 	bucket->ub_cnt--;
2675beb8beefSJeff Roberson 	item = bucket->ub_bucket[bucket->ub_cnt];
2676beb8beefSJeff Roberson #ifdef INVARIANTS
2677beb8beefSJeff Roberson 	bucket->ub_bucket[bucket->ub_cnt] = NULL;
2678beb8beefSJeff Roberson 	KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
2679beb8beefSJeff Roberson #endif
2680beb8beefSJeff Roberson 	cache->uc_allocs++;
2681beb8beefSJeff Roberson 
2682beb8beefSJeff Roberson 	return (item);
2683beb8beefSJeff Roberson }
2684beb8beefSJeff Roberson 
26850a81b439SJeff Roberson static inline void
26860a81b439SJeff Roberson bucket_push(uma_zone_t zone, uma_cache_t cache, uma_bucket_t bucket,
26870a81b439SJeff Roberson     void *item)
26880a81b439SJeff Roberson {
26890a81b439SJeff Roberson 	KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
26900a81b439SJeff Roberson 	    ("uma_zfree: Freeing to non free bucket index."));
26910a81b439SJeff Roberson 	bucket->ub_bucket[bucket->ub_cnt] = item;
26920a81b439SJeff Roberson 	bucket->ub_cnt++;
26930a81b439SJeff Roberson 	cache->uc_frees++;
26940a81b439SJeff Roberson }
26950a81b439SJeff Roberson 
2696beb8beefSJeff Roberson static void *
2697beb8beefSJeff Roberson item_ctor(uma_zone_t zone, void *udata, int flags, void *item)
2698beb8beefSJeff Roberson {
2699beb8beefSJeff Roberson #ifdef INVARIANTS
2700ca293436SRyan Libby 	bool skipdbg;
2701beb8beefSJeff Roberson 
2702beb8beefSJeff Roberson 	skipdbg = uma_dbg_zskip(zone, item);
2703ca293436SRyan Libby 	if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
2704ca293436SRyan Libby 	    zone->uz_ctor != trash_ctor)
2705ca293436SRyan Libby 		trash_ctor(item, zone->uz_size, udata, flags);
2706beb8beefSJeff Roberson #endif
2707ca293436SRyan Libby 	if (__predict_false(zone->uz_ctor != NULL) &&
2708beb8beefSJeff Roberson 	    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2709beb8beefSJeff Roberson 		counter_u64_add(zone->uz_fails, 1);
2710beb8beefSJeff Roberson 		zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT);
2711beb8beefSJeff Roberson 		return (NULL);
2712beb8beefSJeff Roberson 	}
2713beb8beefSJeff Roberson #ifdef INVARIANTS
2714beb8beefSJeff Roberson 	if (!skipdbg)
2715beb8beefSJeff Roberson 		uma_dbg_alloc(zone, NULL, item);
2716beb8beefSJeff Roberson #endif
2717beb8beefSJeff Roberson 	if (flags & M_ZERO)
2718beb8beefSJeff Roberson 		uma_zero_item(item, zone);
2719beb8beefSJeff Roberson 
2720beb8beefSJeff Roberson 	return (item);
2721beb8beefSJeff Roberson }
2722beb8beefSJeff Roberson 
2723ca293436SRyan Libby static inline void
2724ca293436SRyan Libby item_dtor(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
2725ca293436SRyan Libby {
2726ca293436SRyan Libby #ifdef INVARIANTS
2727ca293436SRyan Libby 	bool skipdbg;
2728ca293436SRyan Libby 
2729ca293436SRyan Libby 	skipdbg = uma_dbg_zskip(zone, item);
2730ca293436SRyan Libby 	if (skip == SKIP_NONE && !skipdbg) {
2731ca293436SRyan Libby 		if ((zone->uz_flags & UMA_ZONE_MALLOC) != 0)
2732ca293436SRyan Libby 			uma_dbg_free(zone, udata, item);
2733ca293436SRyan Libby 		else
2734ca293436SRyan Libby 			uma_dbg_free(zone, NULL, item);
2735ca293436SRyan Libby 	}
2736ca293436SRyan Libby #endif
2737ca293436SRyan Libby 	if (skip < SKIP_DTOR) {
2738ca293436SRyan Libby 		if (zone->uz_dtor != NULL)
2739ca293436SRyan Libby 			zone->uz_dtor(item, zone->uz_size, udata);
2740ca293436SRyan Libby #ifdef INVARIANTS
2741ca293436SRyan Libby 		if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
2742ca293436SRyan Libby 		    zone->uz_dtor != trash_dtor)
2743ca293436SRyan Libby 			trash_dtor(item, zone->uz_size, udata);
2744ca293436SRyan Libby #endif
2745ca293436SRyan Libby 	}
2746ca293436SRyan Libby }
2747ca293436SRyan Libby 
27489c2cd7e5SJeff Roberson /* See uma.h */
27498355f576SJeff Roberson void *
27502cc35ff9SJeff Roberson uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
27518355f576SJeff Roberson {
27528355f576SJeff Roberson 	uma_bucket_t bucket;
2753ab3185d1SJeff Roberson 	uma_cache_t cache;
2754ab3185d1SJeff Roberson 	void *item;
2755beb8beefSJeff Roberson 	int cpu, domain;
27568355f576SJeff Roberson 
2757e866d8f0SMark Murray 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
275819fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
275910cb2424SMark Murray 
27608355f576SJeff Roberson 	/* This is the fast path allocation */
27611431a748SGleb Smirnoff 	CTR4(KTR_UMA, "uma_zalloc_arg thread %x zone %s(%p) flags %d",
27621431a748SGleb Smirnoff 	    curthread, zone->uz_name, zone, flags);
2763a553d4b8SJeff Roberson 
2764635fd505SRobert Watson 	if (flags & M_WAITOK) {
2765b23f72e9SBrian Feldman 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2766635fd505SRobert Watson 		    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
27674c1cc01cSJohn Baldwin 	}
27680766f278SJonathan T. Looney 	KASSERT((flags & M_EXEC) == 0, ("uma_zalloc_arg: called with M_EXEC"));
2769d9e2e68dSMark Johnston 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
27701067a2baSJonathan T. Looney 	    ("uma_zalloc_arg: called with spinlock or critical section held"));
2771ea99223eSMateusz Guzik 	if (zone->uz_flags & UMA_ZONE_PCPU)
2772b8af2820SMateusz Guzik 		KASSERT((flags & M_ZERO) == 0, ("allocating from a pcpu zone "
2773b8af2820SMateusz Guzik 		    "with M_ZERO passed"));
27741067a2baSJonathan T. Looney 
27758d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
27768d689e04SGleb Smirnoff 	if (memguard_cmp_zone(zone)) {
27778d689e04SGleb Smirnoff 		item = memguard_alloc(zone->uz_size, flags);
27788d689e04SGleb Smirnoff 		if (item != NULL) {
27798d689e04SGleb Smirnoff 			if (zone->uz_init != NULL &&
27808d689e04SGleb Smirnoff 			    zone->uz_init(item, zone->uz_size, flags) != 0)
27818d689e04SGleb Smirnoff 				return (NULL);
27828d689e04SGleb Smirnoff 			if (zone->uz_ctor != NULL &&
2783fc03d22bSJeff Roberson 			    zone->uz_ctor(item, zone->uz_size, udata,
2784fc03d22bSJeff Roberson 			    flags) != 0) {
2785ca293436SRyan Libby 				counter_u64_add(zone->uz_fails, 1);
27868d689e04SGleb Smirnoff 			    	zone->uz_fini(item, zone->uz_size);
27878d689e04SGleb Smirnoff 				return (NULL);
27888d689e04SGleb Smirnoff 			}
27898d689e04SGleb Smirnoff 			return (item);
27908d689e04SGleb Smirnoff 		}
27918d689e04SGleb Smirnoff 		/* This is unfortunate but should not be fatal. */
27928d689e04SGleb Smirnoff 	}
27938d689e04SGleb Smirnoff #endif
27945d1ae027SRobert Watson 	/*
27955d1ae027SRobert Watson 	 * If possible, allocate from the per-CPU cache.  There are two
27965d1ae027SRobert Watson 	 * requirements for safe access to the per-CPU cache: (1) the thread
27975d1ae027SRobert Watson 	 * accessing the cache must not be preempted or yield during access,
27985d1ae027SRobert Watson 	 * and (2) the thread must not migrate CPUs without switching which
27995d1ae027SRobert Watson 	 * cache it accesses.  We rely on a critical section to prevent
28005d1ae027SRobert Watson 	 * preemption and migration.  We release the critical section in
28015d1ae027SRobert Watson 	 * order to acquire the zone mutex if we are unable to allocate from
28025d1ae027SRobert Watson 	 * the current cache; when we re-acquire the critical section, we
28035d1ae027SRobert Watson 	 * must detect and handle migration if it has occurred.
28045d1ae027SRobert Watson 	 */
28055d1ae027SRobert Watson 	critical_enter();
2806beb8beefSJeff Roberson 	do {
28075d1ae027SRobert Watson 		cpu = curcpu;
28088355f576SJeff Roberson 		cache = &zone->uz_cpu[cpu];
28098355f576SJeff Roberson 		bucket = cache->uc_allocbucket;
2810beb8beefSJeff Roberson 		if (__predict_true(bucket != NULL && bucket->ub_cnt != 0)) {
2811beb8beefSJeff Roberson 			item = bucket_pop(zone, cache, bucket);
28125d1ae027SRobert Watson 			critical_exit();
2813beb8beefSJeff Roberson 			return (item_ctor(zone, udata, flags, item));
2814b23f72e9SBrian Feldman 		}
2815beb8beefSJeff Roberson 	} while (cache_alloc(zone, cache, udata, flags));
2816beb8beefSJeff Roberson 	critical_exit();
2817beb8beefSJeff Roberson 
2818beb8beefSJeff Roberson 	/*
2819beb8beefSJeff Roberson 	 * We can not get a bucket so try to return a single item.
2820beb8beefSJeff Roberson 	 */
2821beb8beefSJeff Roberson 	if (zone->uz_flags & UMA_ZONE_NUMA)
2822beb8beefSJeff Roberson 		domain = PCPU_GET(domain);
2823beb8beefSJeff Roberson 	else
2824beb8beefSJeff Roberson 		domain = UMA_ANYDOMAIN;
2825beb8beefSJeff Roberson 	return (zone_alloc_item_locked(zone, udata, domain, flags));
2826fc03d22bSJeff Roberson }
2827fc03d22bSJeff Roberson 
28288355f576SJeff Roberson /*
2829beb8beefSJeff Roberson  * Replenish an alloc bucket and possibly restore an old one.  Called in
2830beb8beefSJeff Roberson  * a critical section.  Returns in a critical section.
2831beb8beefSJeff Roberson  *
2832beb8beefSJeff Roberson  * A false return value indicates failure and returns with the zone lock
2833beb8beefSJeff Roberson  * held.  A true return value indicates success and the caller should retry.
2834beb8beefSJeff Roberson  */
2835beb8beefSJeff Roberson static __noinline bool
2836beb8beefSJeff Roberson cache_alloc(uma_zone_t zone, uma_cache_t cache, void *udata, int flags)
2837beb8beefSJeff Roberson {
2838beb8beefSJeff Roberson 	uma_zone_domain_t zdom;
2839beb8beefSJeff Roberson 	uma_bucket_t bucket;
2840beb8beefSJeff Roberson 	int cpu, domain;
2841beb8beefSJeff Roberson 	bool lockfail;
2842beb8beefSJeff Roberson 
2843beb8beefSJeff Roberson 	CRITICAL_ASSERT(curthread);
2844beb8beefSJeff Roberson 
2845beb8beefSJeff Roberson 	/*
2846beb8beefSJeff Roberson 	 * If we have run out of items in our alloc bucket see
2847beb8beefSJeff Roberson 	 * if we can switch with the free bucket.
28488355f576SJeff Roberson 	 */
2849b983089aSJeff Roberson 	bucket = cache->uc_freebucket;
2850beb8beefSJeff Roberson 	if (bucket != NULL && bucket->ub_cnt != 0) {
28518355f576SJeff Roberson 		cache->uc_freebucket = cache->uc_allocbucket;
2852b983089aSJeff Roberson 		cache->uc_allocbucket = bucket;
2853beb8beefSJeff Roberson 		return (true);
28548355f576SJeff Roberson 	}
2855fc03d22bSJeff Roberson 
2856fc03d22bSJeff Roberson 	/*
2857fc03d22bSJeff Roberson 	 * Discard any empty allocation bucket while we hold no locks.
2858fc03d22bSJeff Roberson 	 */
2859fc03d22bSJeff Roberson 	bucket = cache->uc_allocbucket;
2860fc03d22bSJeff Roberson 	cache->uc_allocbucket = NULL;
2861fc03d22bSJeff Roberson 	critical_exit();
2862fc03d22bSJeff Roberson 	if (bucket != NULL)
28636fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
2864fc03d22bSJeff Roberson 
28655d1ae027SRobert Watson 	/*
28665d1ae027SRobert Watson 	 * Attempt to retrieve the item from the per-CPU cache has failed, so
28675d1ae027SRobert Watson 	 * we must go back to the zone.  This requires the zone lock, so we
28685d1ae027SRobert Watson 	 * must drop the critical section, then re-acquire it when we go back
28695d1ae027SRobert Watson 	 * to the cache.  Since the critical section is released, we may be
28705d1ae027SRobert Watson 	 * preempted or migrate.  As such, make sure not to maintain any
28715d1ae027SRobert Watson 	 * thread-local state specific to the cache from prior to releasing
28725d1ae027SRobert Watson 	 * the critical section.
28735d1ae027SRobert Watson 	 */
2874fc03d22bSJeff Roberson 	lockfail = 0;
2875fc03d22bSJeff Roberson 	if (ZONE_TRYLOCK(zone) == 0) {
2876fc03d22bSJeff Roberson 		/* Record contention to size the buckets. */
2877a553d4b8SJeff Roberson 		ZONE_LOCK(zone);
2878fc03d22bSJeff Roberson 		lockfail = 1;
2879fc03d22bSJeff Roberson 	}
2880beb8beefSJeff Roberson 
28815d1ae027SRobert Watson 	critical_enter();
2882beb8beefSJeff Roberson 	/* Short-circuit for zones without buckets and low memory. */
288320a4e154SJeff Roberson 	if (zone->uz_bucket_size == 0 || bucketdisable)
2884beb8beefSJeff Roberson 		return (false);
2885beb8beefSJeff Roberson 
28865d1ae027SRobert Watson 	cpu = curcpu;
28875d1ae027SRobert Watson 	cache = &zone->uz_cpu[cpu];
28885d1ae027SRobert Watson 
2889fc03d22bSJeff Roberson 	/* See if we lost the race to fill the cache. */
2890fc03d22bSJeff Roberson 	if (cache->uc_allocbucket != NULL) {
2891fc03d22bSJeff Roberson 		ZONE_UNLOCK(zone);
2892beb8beefSJeff Roberson 		return (true);
2893a553d4b8SJeff Roberson 	}
28948355f576SJeff Roberson 
2895fc03d22bSJeff Roberson 	/*
2896fc03d22bSJeff Roberson 	 * Check the zone's cache of buckets.
2897fc03d22bSJeff Roberson 	 */
2898c1685086SJeff Roberson 	if (zone->uz_flags & UMA_ZONE_NUMA) {
2899c1685086SJeff Roberson 		domain = PCPU_GET(domain);
2900ab3185d1SJeff Roberson 		zdom = &zone->uz_domain[domain];
2901c1685086SJeff Roberson 	} else {
2902c1685086SJeff Roberson 		domain = UMA_ANYDOMAIN;
2903c1685086SJeff Roberson 		zdom = &zone->uz_domain[0];
2904c1685086SJeff Roberson 	}
2905c1685086SJeff Roberson 
290608cfa56eSMark Johnston 	if ((bucket = zone_fetch_bucket(zone, zdom)) != NULL) {
2907beb8beefSJeff Roberson 		ZONE_UNLOCK(zone);
2908cae33c14SJeff Roberson 		KASSERT(bucket->ub_cnt != 0,
2909a553d4b8SJeff Roberson 		    ("uma_zalloc_arg: Returning an empty bucket."));
2910a553d4b8SJeff Roberson 		cache->uc_allocbucket = bucket;
2911beb8beefSJeff Roberson 		return (true);
2912a553d4b8SJeff Roberson 	}
29135d1ae027SRobert Watson 	/* We are no longer associated with this CPU. */
29145d1ae027SRobert Watson 	critical_exit();
2915bbee39c6SJeff Roberson 
2916fc03d22bSJeff Roberson 	/*
2917fc03d22bSJeff Roberson 	 * We bump the uz count when the cache size is insufficient to
2918fc03d22bSJeff Roberson 	 * handle the working set.
2919fc03d22bSJeff Roberson 	 */
292020a4e154SJeff Roberson 	if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max)
292120a4e154SJeff Roberson 		zone->uz_bucket_size++;
2922bb15d1c7SGleb Smirnoff 
29238355f576SJeff Roberson 	/*
2924beb8beefSJeff Roberson 	 * Fill a bucket and attempt to use it as the alloc bucket.
2925bbee39c6SJeff Roberson 	 */
2926beb8beefSJeff Roberson 	bucket = zone_alloc_bucket(zone, udata, domain, flags);
29271431a748SGleb Smirnoff 	CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
29281431a748SGleb Smirnoff 	    zone->uz_name, zone, bucket);
2929fc03d22bSJeff Roberson 	critical_enter();
2930beb8beefSJeff Roberson 	if (bucket == NULL)
2931beb8beefSJeff Roberson 		return (false);
29320f9b7bf3SMark Johnston 
2933fc03d22bSJeff Roberson 	/*
2934fc03d22bSJeff Roberson 	 * See if we lost the race or were migrated.  Cache the
2935fc03d22bSJeff Roberson 	 * initialized bucket to make this less likely or claim
2936fc03d22bSJeff Roberson 	 * the memory directly.
2937fc03d22bSJeff Roberson 	 */
2938beb8beefSJeff Roberson 	cpu = curcpu;
2939beb8beefSJeff Roberson 	cache = &zone->uz_cpu[cpu];
294081c0d72cSGleb Smirnoff 	if (cache->uc_allocbucket == NULL &&
294181c0d72cSGleb Smirnoff 	    ((zone->uz_flags & UMA_ZONE_NUMA) == 0 ||
294281c0d72cSGleb Smirnoff 	    domain == PCPU_GET(domain))) {
2943ab3185d1SJeff Roberson 		cache->uc_allocbucket = bucket;
29440f9b7bf3SMark Johnston 		zdom->uzd_imax += bucket->ub_cnt;
2945bb15d1c7SGleb Smirnoff 	} else if (zone->uz_bkt_count >= zone->uz_bkt_max) {
294681c0d72cSGleb Smirnoff 		critical_exit();
294781c0d72cSGleb Smirnoff 		ZONE_UNLOCK(zone);
294881c0d72cSGleb Smirnoff 		bucket_drain(zone, bucket);
294981c0d72cSGleb Smirnoff 		bucket_free(zone, bucket, udata);
2950beb8beefSJeff Roberson 		critical_enter();
2951beb8beefSJeff Roberson 		return (true);
295281c0d72cSGleb Smirnoff 	} else
29530f9b7bf3SMark Johnston 		zone_put_bucket(zone, zdom, bucket, false);
2954bbee39c6SJeff Roberson 	ZONE_UNLOCK(zone);
2955beb8beefSJeff Roberson 	return (true);
2956bbee39c6SJeff Roberson }
2957bbee39c6SJeff Roberson 
2958ab3185d1SJeff Roberson void *
2959ab3185d1SJeff Roberson uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags)
2960bbee39c6SJeff Roberson {
2961ab3185d1SJeff Roberson 
2962ab3185d1SJeff Roberson 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
296319fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
2964ab3185d1SJeff Roberson 
2965ab3185d1SJeff Roberson 	/* This is the fast path allocation */
2966ab3185d1SJeff Roberson 	CTR5(KTR_UMA,
2967ab3185d1SJeff Roberson 	    "uma_zalloc_domain thread %x zone %s(%p) domain %d flags %d",
2968ab3185d1SJeff Roberson 	    curthread, zone->uz_name, zone, domain, flags);
2969ab3185d1SJeff Roberson 
2970ab3185d1SJeff Roberson 	if (flags & M_WAITOK) {
2971ab3185d1SJeff Roberson 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2972ab3185d1SJeff Roberson 		    "uma_zalloc_domain: zone \"%s\"", zone->uz_name);
2973ab3185d1SJeff Roberson 	}
2974ab3185d1SJeff Roberson 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
2975ab3185d1SJeff Roberson 	    ("uma_zalloc_domain: called with spinlock or critical section held"));
2976ab3185d1SJeff Roberson 
2977ab3185d1SJeff Roberson 	return (zone_alloc_item(zone, udata, domain, flags));
2978ab3185d1SJeff Roberson }
2979ab3185d1SJeff Roberson 
2980ab3185d1SJeff Roberson /*
2981ab3185d1SJeff Roberson  * Find a slab with some space.  Prefer slabs that are partially used over those
2982ab3185d1SJeff Roberson  * that are totally full.  This helps to reduce fragmentation.
2983ab3185d1SJeff Roberson  *
2984ab3185d1SJeff Roberson  * If 'rr' is 1, search all domains starting from 'domain'.  Otherwise check
2985ab3185d1SJeff Roberson  * only 'domain'.
2986ab3185d1SJeff Roberson  */
2987ab3185d1SJeff Roberson static uma_slab_t
2988194a979eSMark Johnston keg_first_slab(uma_keg_t keg, int domain, bool rr)
2989ab3185d1SJeff Roberson {
2990ab3185d1SJeff Roberson 	uma_domain_t dom;
2991bbee39c6SJeff Roberson 	uma_slab_t slab;
2992ab3185d1SJeff Roberson 	int start;
2993ab3185d1SJeff Roberson 
2994ab3185d1SJeff Roberson 	KASSERT(domain >= 0 && domain < vm_ndomains,
2995ab3185d1SJeff Roberson 	    ("keg_first_slab: domain %d out of range", domain));
2996bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
2997ab3185d1SJeff Roberson 
2998ab3185d1SJeff Roberson 	slab = NULL;
2999ab3185d1SJeff Roberson 	start = domain;
3000ab3185d1SJeff Roberson 	do {
3001ab3185d1SJeff Roberson 		dom = &keg->uk_domain[domain];
3002ab3185d1SJeff Roberson 		if (!LIST_EMPTY(&dom->ud_part_slab))
3003ab3185d1SJeff Roberson 			return (LIST_FIRST(&dom->ud_part_slab));
3004ab3185d1SJeff Roberson 		if (!LIST_EMPTY(&dom->ud_free_slab)) {
3005ab3185d1SJeff Roberson 			slab = LIST_FIRST(&dom->ud_free_slab);
3006ab3185d1SJeff Roberson 			LIST_REMOVE(slab, us_link);
3007ab3185d1SJeff Roberson 			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
3008ab3185d1SJeff Roberson 			return (slab);
3009ab3185d1SJeff Roberson 		}
3010ab3185d1SJeff Roberson 		if (rr)
3011ab3185d1SJeff Roberson 			domain = (domain + 1) % vm_ndomains;
3012ab3185d1SJeff Roberson 	} while (domain != start);
3013ab3185d1SJeff Roberson 
3014ab3185d1SJeff Roberson 	return (NULL);
3015ab3185d1SJeff Roberson }
3016ab3185d1SJeff Roberson 
3017ab3185d1SJeff Roberson static uma_slab_t
3018194a979eSMark Johnston keg_fetch_free_slab(uma_keg_t keg, int domain, bool rr, int flags)
3019ab3185d1SJeff Roberson {
3020194a979eSMark Johnston 	uint32_t reserve;
3021099a0e58SBosko Milekic 
3022bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
3023194a979eSMark Johnston 
3024194a979eSMark Johnston 	reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve;
3025194a979eSMark Johnston 	if (keg->uk_free <= reserve)
3026194a979eSMark Johnston 		return (NULL);
3027194a979eSMark Johnston 	return (keg_first_slab(keg, domain, rr));
3028194a979eSMark Johnston }
3029194a979eSMark Johnston 
3030194a979eSMark Johnston static uma_slab_t
3031194a979eSMark Johnston keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, const int flags)
3032194a979eSMark Johnston {
3033194a979eSMark Johnston 	struct vm_domainset_iter di;
3034194a979eSMark Johnston 	uma_domain_t dom;
3035194a979eSMark Johnston 	uma_slab_t slab;
3036194a979eSMark Johnston 	int aflags, domain;
3037194a979eSMark Johnston 	bool rr;
3038194a979eSMark Johnston 
3039194a979eSMark Johnston restart:
3040bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
3041bbee39c6SJeff Roberson 
3042bbee39c6SJeff Roberson 	/*
3043194a979eSMark Johnston 	 * Use the keg's policy if upper layers haven't already specified a
3044194a979eSMark Johnston 	 * domain (as happens with first-touch zones).
3045194a979eSMark Johnston 	 *
3046194a979eSMark Johnston 	 * To avoid races we run the iterator with the keg lock held, but that
3047194a979eSMark Johnston 	 * means that we cannot allow the vm_domainset layer to sleep.  Thus,
3048194a979eSMark Johnston 	 * clear M_WAITOK and handle low memory conditions locally.
3049bbee39c6SJeff Roberson 	 */
3050ab3185d1SJeff Roberson 	rr = rdomain == UMA_ANYDOMAIN;
3051ab3185d1SJeff Roberson 	if (rr) {
3052194a979eSMark Johnston 		aflags = (flags & ~M_WAITOK) | M_NOWAIT;
3053194a979eSMark Johnston 		vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
3054194a979eSMark Johnston 		    &aflags);
3055194a979eSMark Johnston 	} else {
3056194a979eSMark Johnston 		aflags = flags;
3057194a979eSMark Johnston 		domain = rdomain;
3058194a979eSMark Johnston 	}
3059ab3185d1SJeff Roberson 
3060194a979eSMark Johnston 	for (;;) {
3061194a979eSMark Johnston 		slab = keg_fetch_free_slab(keg, domain, rr, flags);
3062584061b4SJeff Roberson 		if (slab != NULL)
3063bbee39c6SJeff Roberson 			return (slab);
3064bbee39c6SJeff Roberson 
3065bbee39c6SJeff Roberson 		/*
3066bbee39c6SJeff Roberson 		 * M_NOVM means don't ask at all!
3067bbee39c6SJeff Roberson 		 */
3068bbee39c6SJeff Roberson 		if (flags & M_NOVM)
3069bbee39c6SJeff Roberson 			break;
3070bbee39c6SJeff Roberson 
3071bb15d1c7SGleb Smirnoff 		KASSERT(zone->uz_max_items == 0 ||
3072bb15d1c7SGleb Smirnoff 		    zone->uz_items <= zone->uz_max_items,
3073bb15d1c7SGleb Smirnoff 		    ("%s: zone %p overflow", __func__, zone));
3074bb15d1c7SGleb Smirnoff 
307586220393SMark Johnston 		slab = keg_alloc_slab(keg, zone, domain, flags, aflags);
3076bbee39c6SJeff Roberson 		/*
3077bbee39c6SJeff Roberson 		 * If we got a slab here it's safe to mark it partially used
3078bbee39c6SJeff Roberson 		 * and return.  We assume that the caller is going to remove
3079bbee39c6SJeff Roberson 		 * at least one item.
3080bbee39c6SJeff Roberson 		 */
3081bbee39c6SJeff Roberson 		if (slab) {
3082ab3185d1SJeff Roberson 			dom = &keg->uk_domain[slab->us_domain];
3083ab3185d1SJeff Roberson 			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
3084bbee39c6SJeff Roberson 			return (slab);
3085bbee39c6SJeff Roberson 		}
3086194a979eSMark Johnston 		KEG_LOCK(keg);
3087*3639ac42SJeff Roberson 		if (!rr && (flags & M_WAITOK) == 0)
3088*3639ac42SJeff Roberson 			break;
3089194a979eSMark Johnston 		if (rr && vm_domainset_iter_policy(&di, &domain) != 0) {
3090194a979eSMark Johnston 			if ((flags & M_WAITOK) != 0) {
3091194a979eSMark Johnston 				KEG_UNLOCK(keg);
3092194a979eSMark Johnston 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
3093194a979eSMark Johnston 				KEG_LOCK(keg);
3094194a979eSMark Johnston 				goto restart;
309530c5525bSAndrew Gallatin 			}
3096194a979eSMark Johnston 			break;
3097194a979eSMark Johnston 		}
3098ab3185d1SJeff Roberson 	}
3099ab3185d1SJeff Roberson 
3100bbee39c6SJeff Roberson 	/*
3101bbee39c6SJeff Roberson 	 * We might not have been able to get a slab but another cpu
3102bbee39c6SJeff Roberson 	 * could have while we were unlocked.  Check again before we
3103bbee39c6SJeff Roberson 	 * fail.
3104bbee39c6SJeff Roberson 	 */
3105194a979eSMark Johnston 	if ((slab = keg_fetch_free_slab(keg, domain, rr, flags)) != NULL) {
3106bbee39c6SJeff Roberson 		return (slab);
3107bbee39c6SJeff Roberson 	}
3108ab3185d1SJeff Roberson 	return (NULL);
3109ab3185d1SJeff Roberson }
3110bbee39c6SJeff Roberson 
3111d56368d7SBosko Milekic static void *
31120095a784SJeff Roberson slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
3113bbee39c6SJeff Roberson {
3114ab3185d1SJeff Roberson 	uma_domain_t dom;
3115bbee39c6SJeff Roberson 	void *item;
311685dcf349SGleb Smirnoff 	uint8_t freei;
3117bbee39c6SJeff Roberson 
3118bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
3119099a0e58SBosko Milekic 
31209b78b1f4SJeff Roberson 	freei = BIT_FFS(keg->uk_ipers, &slab->us_free) - 1;
31219b78b1f4SJeff Roberson 	BIT_CLR(keg->uk_ipers, freei, &slab->us_free);
31221e0701e1SJeff Roberson 	item = slab_item(slab, keg, freei);
3123bbee39c6SJeff Roberson 	slab->us_freecount--;
3124099a0e58SBosko Milekic 	keg->uk_free--;
3125ef72505eSJeff Roberson 
3126bbee39c6SJeff Roberson 	/* Move this slab to the full list */
3127bbee39c6SJeff Roberson 	if (slab->us_freecount == 0) {
3128bbee39c6SJeff Roberson 		LIST_REMOVE(slab, us_link);
3129ab3185d1SJeff Roberson 		dom = &keg->uk_domain[slab->us_domain];
3130ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link);
3131bbee39c6SJeff Roberson 	}
3132bbee39c6SJeff Roberson 
3133bbee39c6SJeff Roberson 	return (item);
3134bbee39c6SJeff Roberson }
3135bbee39c6SJeff Roberson 
3136bbee39c6SJeff Roberson static int
3137b75c4efcSAndrew Turner zone_import(void *arg, void **bucket, int max, int domain, int flags)
31380095a784SJeff Roberson {
3139b75c4efcSAndrew Turner 	uma_zone_t zone;
31400095a784SJeff Roberson 	uma_slab_t slab;
31410095a784SJeff Roberson 	uma_keg_t keg;
3142a03af342SSean Bruno #ifdef NUMA
3143ab3185d1SJeff Roberson 	int stripe;
3144a03af342SSean Bruno #endif
31450095a784SJeff Roberson 	int i;
31460095a784SJeff Roberson 
3147b75c4efcSAndrew Turner 	zone = arg;
31480095a784SJeff Roberson 	slab = NULL;
3149584061b4SJeff Roberson 	keg = zone->uz_keg;
3150584061b4SJeff Roberson 	KEG_LOCK(keg);
3151af526374SJeff Roberson 	/* Try to keep the buckets totally full */
31520095a784SJeff Roberson 	for (i = 0; i < max; ) {
3153584061b4SJeff Roberson 		if ((slab = keg_fetch_slab(keg, zone, domain, flags)) == NULL)
31540095a784SJeff Roberson 			break;
3155a03af342SSean Bruno #ifdef NUMA
3156ab3185d1SJeff Roberson 		stripe = howmany(max, vm_ndomains);
3157a03af342SSean Bruno #endif
31586fd34d6fSJeff Roberson 		while (slab->us_freecount && i < max) {
31590095a784SJeff Roberson 			bucket[i++] = slab_alloc_item(keg, slab);
31606fd34d6fSJeff Roberson 			if (keg->uk_free <= keg->uk_reserve)
31616fd34d6fSJeff Roberson 				break;
3162b6715dabSJeff Roberson #ifdef NUMA
3163ab3185d1SJeff Roberson 			/*
3164ab3185d1SJeff Roberson 			 * If the zone is striped we pick a new slab for every
3165ab3185d1SJeff Roberson 			 * N allocations.  Eliminating this conditional will
3166ab3185d1SJeff Roberson 			 * instead pick a new domain for each bucket rather
3167ab3185d1SJeff Roberson 			 * than stripe within each bucket.  The current option
3168ab3185d1SJeff Roberson 			 * produces more fragmentation and requires more cpu
3169ab3185d1SJeff Roberson 			 * time but yields better distribution.
3170ab3185d1SJeff Roberson 			 */
3171ab3185d1SJeff Roberson 			if ((zone->uz_flags & UMA_ZONE_NUMA) == 0 &&
3172ab3185d1SJeff Roberson 			    vm_ndomains > 1 && --stripe == 0)
3173ab3185d1SJeff Roberson 				break;
3174ab3185d1SJeff Roberson #endif
31756fd34d6fSJeff Roberson 		}
3176ab3185d1SJeff Roberson 		/* Don't block if we allocated any successfully. */
31770095a784SJeff Roberson 		flags &= ~M_WAITOK;
31780095a784SJeff Roberson 		flags |= M_NOWAIT;
31790095a784SJeff Roberson 	}
31800095a784SJeff Roberson 	KEG_UNLOCK(keg);
31810095a784SJeff Roberson 
31820095a784SJeff Roberson 	return i;
31830095a784SJeff Roberson }
31840095a784SJeff Roberson 
3185fc03d22bSJeff Roberson static uma_bucket_t
3186beb8beefSJeff Roberson zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
3187bbee39c6SJeff Roberson {
3188bbee39c6SJeff Roberson 	uma_bucket_t bucket;
3189beb8beefSJeff Roberson 	int maxbucket, cnt;
3190bbee39c6SJeff Roberson 
319130c5525bSAndrew Gallatin 	CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain);
319230c5525bSAndrew Gallatin 
3193c1685086SJeff Roberson 	/* Avoid allocs targeting empty domains. */
3194c1685086SJeff Roberson 	if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
3195c1685086SJeff Roberson 		domain = UMA_ANYDOMAIN;
3196c1685086SJeff Roberson 
3197beb8beefSJeff Roberson 	if (zone->uz_max_items > 0) {
3198beb8beefSJeff Roberson 		if (zone->uz_items >= zone->uz_max_items)
3199beb8beefSJeff Roberson 			return (false);
320020a4e154SJeff Roberson 		maxbucket = MIN(zone->uz_bucket_size,
3201beb8beefSJeff Roberson 		    zone->uz_max_items - zone->uz_items);
3202beb8beefSJeff Roberson 		zone->uz_items += maxbucket;
3203beb8beefSJeff Roberson 	} else
320420a4e154SJeff Roberson 		maxbucket = zone->uz_bucket_size;
3205beb8beefSJeff Roberson 	ZONE_UNLOCK(zone);
3206beb8beefSJeff Roberson 
32076fd34d6fSJeff Roberson 	/* Don't wait for buckets, preserve caller's NOVM setting. */
32086fd34d6fSJeff Roberson 	bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
3209beb8beefSJeff Roberson 	if (bucket == NULL) {
3210beb8beefSJeff Roberson 		cnt = 0;
3211beb8beefSJeff Roberson 		goto out;
3212beb8beefSJeff Roberson 	}
32130095a784SJeff Roberson 
32140095a784SJeff Roberson 	bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
3215beb8beefSJeff Roberson 	    MIN(maxbucket, bucket->ub_entries), domain, flags);
32160095a784SJeff Roberson 
32170095a784SJeff Roberson 	/*
32180095a784SJeff Roberson 	 * Initialize the memory if necessary.
32190095a784SJeff Roberson 	 */
32200095a784SJeff Roberson 	if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
3221099a0e58SBosko Milekic 		int i;
3222bbee39c6SJeff Roberson 
32230095a784SJeff Roberson 		for (i = 0; i < bucket->ub_cnt; i++)
3224e20a199fSJeff Roberson 			if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
32250095a784SJeff Roberson 			    flags) != 0)
3226b23f72e9SBrian Feldman 				break;
3227b23f72e9SBrian Feldman 		/*
3228b23f72e9SBrian Feldman 		 * If we couldn't initialize the whole bucket, put the
3229b23f72e9SBrian Feldman 		 * rest back onto the freelist.
3230b23f72e9SBrian Feldman 		 */
3231b23f72e9SBrian Feldman 		if (i != bucket->ub_cnt) {
3232af526374SJeff Roberson 			zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
32330095a784SJeff Roberson 			    bucket->ub_cnt - i);
3234a5a262c6SBosko Milekic #ifdef INVARIANTS
32350095a784SJeff Roberson 			bzero(&bucket->ub_bucket[i],
32360095a784SJeff Roberson 			    sizeof(void *) * (bucket->ub_cnt - i));
3237a5a262c6SBosko Milekic #endif
3238b23f72e9SBrian Feldman 			bucket->ub_cnt = i;
3239b23f72e9SBrian Feldman 		}
3240099a0e58SBosko Milekic 	}
3241099a0e58SBosko Milekic 
3242beb8beefSJeff Roberson 	cnt = bucket->ub_cnt;
3243f7104ccdSAlexander Motin 	if (bucket->ub_cnt == 0) {
32446fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
32452efcc8cbSGleb Smirnoff 		counter_u64_add(zone->uz_fails, 1);
3246beb8beefSJeff Roberson 		bucket = NULL;
3247beb8beefSJeff Roberson 	}
3248beb8beefSJeff Roberson out:
3249beb8beefSJeff Roberson 	ZONE_LOCK(zone);
3250beb8beefSJeff Roberson 	if (zone->uz_max_items > 0 && cnt < maxbucket) {
3251beb8beefSJeff Roberson 		MPASS(zone->uz_items >= maxbucket - cnt);
3252beb8beefSJeff Roberson 		zone->uz_items -= maxbucket - cnt;
3253beb8beefSJeff Roberson 		if (zone->uz_sleepers > 0 &&
3254beb8beefSJeff Roberson 		    (cnt == 0 ? zone->uz_items + 1 : zone->uz_items) <
3255beb8beefSJeff Roberson 		    zone->uz_max_items)
3256beb8beefSJeff Roberson 			wakeup_one(zone);
3257bbee39c6SJeff Roberson 	}
3258fc03d22bSJeff Roberson 
3259fc03d22bSJeff Roberson 	return (bucket);
3260fc03d22bSJeff Roberson }
3261fc03d22bSJeff Roberson 
32628355f576SJeff Roberson /*
32630095a784SJeff Roberson  * Allocates a single item from a zone.
32648355f576SJeff Roberson  *
32658355f576SJeff Roberson  * Arguments
32668355f576SJeff Roberson  *	zone   The zone to alloc for.
32678355f576SJeff Roberson  *	udata  The data to be passed to the constructor.
3268ab3185d1SJeff Roberson  *	domain The domain to allocate from or UMA_ANYDOMAIN.
3269a163d034SWarner Losh  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
32708355f576SJeff Roberson  *
32718355f576SJeff Roberson  * Returns
32728355f576SJeff Roberson  *	NULL if there is no memory and M_NOWAIT is set
3273bbee39c6SJeff Roberson  *	An item if successful
32748355f576SJeff Roberson  */
32758355f576SJeff Roberson 
32768355f576SJeff Roberson static void *
3277ab3185d1SJeff Roberson zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
32788355f576SJeff Roberson {
3279bb15d1c7SGleb Smirnoff 
3280bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3281bb15d1c7SGleb Smirnoff 	return (zone_alloc_item_locked(zone, udata, domain, flags));
3282bb15d1c7SGleb Smirnoff }
3283bb15d1c7SGleb Smirnoff 
3284bb15d1c7SGleb Smirnoff /*
3285bb15d1c7SGleb Smirnoff  * Returns with zone unlocked.
3286bb15d1c7SGleb Smirnoff  */
3287bb15d1c7SGleb Smirnoff static void *
3288bb15d1c7SGleb Smirnoff zone_alloc_item_locked(uma_zone_t zone, void *udata, int domain, int flags)
3289bb15d1c7SGleb Smirnoff {
32908355f576SJeff Roberson 	void *item;
32918355f576SJeff Roberson 
3292bb15d1c7SGleb Smirnoff 	ZONE_LOCK_ASSERT(zone);
3293bb15d1c7SGleb Smirnoff 
3294bb45b411SGleb Smirnoff 	if (zone->uz_max_items > 0) {
3295bb45b411SGleb Smirnoff 		if (zone->uz_items >= zone->uz_max_items) {
3296bb15d1c7SGleb Smirnoff 			zone_log_warning(zone);
3297bb15d1c7SGleb Smirnoff 			zone_maxaction(zone);
3298bb15d1c7SGleb Smirnoff 			if (flags & M_NOWAIT) {
3299bb15d1c7SGleb Smirnoff 				ZONE_UNLOCK(zone);
3300bb15d1c7SGleb Smirnoff 				return (NULL);
3301bb15d1c7SGleb Smirnoff 			}
3302bb15d1c7SGleb Smirnoff 			zone->uz_sleeps++;
3303bb15d1c7SGleb Smirnoff 			zone->uz_sleepers++;
3304bb15d1c7SGleb Smirnoff 			while (zone->uz_items >= zone->uz_max_items)
3305e7e4bcd8SGleb Smirnoff 				mtx_sleep(zone, zone->uz_lockptr, PVM,
3306e7e4bcd8SGleb Smirnoff 				    "zonelimit", 0);
3307bb15d1c7SGleb Smirnoff 			zone->uz_sleepers--;
3308bb15d1c7SGleb Smirnoff 			if (zone->uz_sleepers > 0 &&
3309bb15d1c7SGleb Smirnoff 			    zone->uz_items + 1 < zone->uz_max_items)
3310bb15d1c7SGleb Smirnoff 				wakeup_one(zone);
3311bb15d1c7SGleb Smirnoff 		}
3312bb15d1c7SGleb Smirnoff 		zone->uz_items++;
3313bb45b411SGleb Smirnoff 	}
3314bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
33158355f576SJeff Roberson 
3316c1685086SJeff Roberson 	/* Avoid allocs targeting empty domains. */
3317c1685086SJeff Roberson 	if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
331830c5525bSAndrew Gallatin 		domain = UMA_ANYDOMAIN;
3319c1685086SJeff Roberson 
3320ab3185d1SJeff Roberson 	if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
3321beb8beefSJeff Roberson 		goto fail_cnt;
33228355f576SJeff Roberson 
3323099a0e58SBosko Milekic 	/*
3324099a0e58SBosko Milekic 	 * We have to call both the zone's init (not the keg's init)
3325099a0e58SBosko Milekic 	 * and the zone's ctor.  This is because the item is going from
3326099a0e58SBosko Milekic 	 * a keg slab directly to the user, and the user is expecting it
3327099a0e58SBosko Milekic 	 * to be both zone-init'd as well as zone-ctor'd.
3328099a0e58SBosko Milekic 	 */
3329b23f72e9SBrian Feldman 	if (zone->uz_init != NULL) {
3330e20a199fSJeff Roberson 		if (zone->uz_init(item, zone->uz_size, flags) != 0) {
3331bb15d1c7SGleb Smirnoff 			zone_free_item(zone, item, udata, SKIP_FINI | SKIP_CNT);
3332beb8beefSJeff Roberson 			goto fail_cnt;
3333beb8beefSJeff Roberson 		}
3334beb8beefSJeff Roberson 	}
3335beb8beefSJeff Roberson 	item = item_ctor(zone, udata, flags, item);
3336beb8beefSJeff Roberson 	if (item == NULL)
33370095a784SJeff Roberson 		goto fail;
33388355f576SJeff Roberson 
33392efcc8cbSGleb Smirnoff 	counter_u64_add(zone->uz_allocs, 1);
33401431a748SGleb Smirnoff 	CTR3(KTR_UMA, "zone_alloc_item item %p from %s(%p)", item,
33411431a748SGleb Smirnoff 	    zone->uz_name, zone);
33421431a748SGleb Smirnoff 
33438355f576SJeff Roberson 	return (item);
33440095a784SJeff Roberson 
3345beb8beefSJeff Roberson fail_cnt:
3346beb8beefSJeff Roberson 	counter_u64_add(zone->uz_fails, 1);
33470095a784SJeff Roberson fail:
3348bb45b411SGleb Smirnoff 	if (zone->uz_max_items > 0) {
3349bb15d1c7SGleb Smirnoff 		ZONE_LOCK(zone);
3350beb8beefSJeff Roberson 		/* XXX Decrement without wakeup */
3351bb15d1c7SGleb Smirnoff 		zone->uz_items--;
3352bb15d1c7SGleb Smirnoff 		ZONE_UNLOCK(zone);
3353bb45b411SGleb Smirnoff 	}
33541431a748SGleb Smirnoff 	CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)",
33551431a748SGleb Smirnoff 	    zone->uz_name, zone);
33560095a784SJeff Roberson 	return (NULL);
33578355f576SJeff Roberson }
33588355f576SJeff Roberson 
33598355f576SJeff Roberson /* See uma.h */
33608355f576SJeff Roberson void
33618355f576SJeff Roberson uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
33628355f576SJeff Roberson {
33638355f576SJeff Roberson 	uma_cache_t cache;
33648355f576SJeff Roberson 	uma_bucket_t bucket;
33650a81b439SJeff Roberson 	int cpu, domain, itemdomain;
33668355f576SJeff Roberson 
3367e866d8f0SMark Murray 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
336819fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
336910cb2424SMark Murray 
33703659f747SRobert Watson 	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
33713659f747SRobert Watson 	    zone->uz_name);
33723659f747SRobert Watson 
3373d9e2e68dSMark Johnston 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
33741067a2baSJonathan T. Looney 	    ("uma_zfree_arg: called with spinlock or critical section held"));
33751067a2baSJonathan T. Looney 
337620ed0cb0SMatthew D Fleming         /* uma_zfree(..., NULL) does nothing, to match free(9). */
337720ed0cb0SMatthew D Fleming         if (item == NULL)
337820ed0cb0SMatthew D Fleming                 return;
33798d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
33808d689e04SGleb Smirnoff 	if (is_memguard_addr(item)) {
3381bc9d08e1SMark Johnston 		if (zone->uz_dtor != NULL)
33828d689e04SGleb Smirnoff 			zone->uz_dtor(item, zone->uz_size, udata);
3383bc9d08e1SMark Johnston 		if (zone->uz_fini != NULL)
33848d689e04SGleb Smirnoff 			zone->uz_fini(item, zone->uz_size);
33858d689e04SGleb Smirnoff 		memguard_free(item);
33868d689e04SGleb Smirnoff 		return;
33878d689e04SGleb Smirnoff 	}
33888d689e04SGleb Smirnoff #endif
3389ca293436SRyan Libby 	item_dtor(zone, item, udata, SKIP_NONE);
3390ef72505eSJeff Roberson 
3391af7f9b97SJeff Roberson 	/*
3392af7f9b97SJeff Roberson 	 * The race here is acceptable.  If we miss it we'll just have to wait
3393af7f9b97SJeff Roberson 	 * a little longer for the limits to be reset.
3394af7f9b97SJeff Roberson 	 */
3395bb15d1c7SGleb Smirnoff 	if (zone->uz_sleepers > 0)
3396fc03d22bSJeff Roberson 		goto zfree_item;
3397af7f9b97SJeff Roberson 
33985d1ae027SRobert Watson 	/*
33995d1ae027SRobert Watson 	 * If possible, free to the per-CPU cache.  There are two
34005d1ae027SRobert Watson 	 * requirements for safe access to the per-CPU cache: (1) the thread
34015d1ae027SRobert Watson 	 * accessing the cache must not be preempted or yield during access,
34025d1ae027SRobert Watson 	 * and (2) the thread must not migrate CPUs without switching which
34035d1ae027SRobert Watson 	 * cache it accesses.  We rely on a critical section to prevent
34045d1ae027SRobert Watson 	 * preemption and migration.  We release the critical section in
34055d1ae027SRobert Watson 	 * order to acquire the zone mutex if we are unable to free to the
34065d1ae027SRobert Watson 	 * current cache; when we re-acquire the critical section, we must
34075d1ae027SRobert Watson 	 * detect and handle migration if it has occurred.
34085d1ae027SRobert Watson 	 */
34090a81b439SJeff Roberson 	domain = itemdomain = 0;
34105d1ae027SRobert Watson 	critical_enter();
34110a81b439SJeff Roberson 	do {
34125d1ae027SRobert Watson 		cpu = curcpu;
34138355f576SJeff Roberson 		cache = &zone->uz_cpu[cpu];
34140a81b439SJeff Roberson 		bucket = cache->uc_allocbucket;
3415c1685086SJeff Roberson #ifdef UMA_XDOMAIN
34160a81b439SJeff Roberson 		if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) {
34170a81b439SJeff Roberson 			itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
34180a81b439SJeff Roberson 			domain = PCPU_GET(domain);
34190a81b439SJeff Roberson 		}
34200a81b439SJeff Roberson 		if ((zone->uz_flags & UMA_ZONE_NUMA) != 0 &&
34210a81b439SJeff Roberson 		    domain != itemdomain) {
34220a81b439SJeff Roberson 			bucket = cache->uc_crossbucket;
34230a81b439SJeff Roberson 		} else
3424c1685086SJeff Roberson #endif
34250a81b439SJeff Roberson 
3426a553d4b8SJeff Roberson 		/*
3427fc03d22bSJeff Roberson 		 * Try to free into the allocbucket first to give LIFO ordering
3428fc03d22bSJeff Roberson 		 * for cache-hot datastructures.  Spill over into the freebucket
3429fc03d22bSJeff Roberson 		 * if necessary.  Alloc will swap them if one runs dry.
3430a553d4b8SJeff Roberson 		 */
3431fc03d22bSJeff Roberson 		if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
3432fc03d22bSJeff Roberson 			bucket = cache->uc_freebucket;
34330a81b439SJeff Roberson 		if (__predict_true(bucket != NULL &&
34340a81b439SJeff Roberson 		    bucket->ub_cnt < bucket->ub_entries)) {
34350a81b439SJeff Roberson 			bucket_push(zone, cache, bucket, item);
34365d1ae027SRobert Watson 			critical_exit();
34378355f576SJeff Roberson 			return;
3438fc03d22bSJeff Roberson 		}
34390a81b439SJeff Roberson 	} while (cache_free(zone, cache, udata, item, itemdomain));
34400a81b439SJeff Roberson 	critical_exit();
3441fc03d22bSJeff Roberson 
34428355f576SJeff Roberson 	/*
34430a81b439SJeff Roberson 	 * If nothing else caught this, we'll just do an internal free.
34448355f576SJeff Roberson 	 */
34450a81b439SJeff Roberson zfree_item:
34460a81b439SJeff Roberson 	zone_free_item(zone, item, udata, SKIP_DTOR);
34470a81b439SJeff Roberson }
3448fc03d22bSJeff Roberson 
34490a81b439SJeff Roberson static void
34500a81b439SJeff Roberson zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
34510a81b439SJeff Roberson     int domain, int itemdomain)
34520a81b439SJeff Roberson {
34530a81b439SJeff Roberson 	uma_zone_domain_t zdom;
34540a81b439SJeff Roberson 
34550a81b439SJeff Roberson #ifdef UMA_XDOMAIN
34560a81b439SJeff Roberson 	/*
34570a81b439SJeff Roberson 	 * Buckets coming from the wrong domain will be entirely for the
34580a81b439SJeff Roberson 	 * only other domain on two domain systems.  In this case we can
34590a81b439SJeff Roberson 	 * simply cache them.  Otherwise we need to sort them back to
34600a81b439SJeff Roberson 	 * correct domains by freeing the contents to the slab layer.
34610a81b439SJeff Roberson 	 */
34620a81b439SJeff Roberson 	if (domain != itemdomain && vm_ndomains > 2) {
34630a81b439SJeff Roberson 		CTR3(KTR_UMA,
34640a81b439SJeff Roberson 		    "uma_zfree: zone %s(%p) draining cross bucket %p",
34650a81b439SJeff Roberson 		    zone->uz_name, zone, bucket);
34660a81b439SJeff Roberson 		bucket_drain(zone, bucket);
34670a81b439SJeff Roberson 		bucket_free(zone, bucket, udata);
34680a81b439SJeff Roberson 		return;
34690a81b439SJeff Roberson 	}
34700a81b439SJeff Roberson #endif
34710a81b439SJeff Roberson 	/*
34720a81b439SJeff Roberson 	 * Attempt to save the bucket in the zone's domain bucket cache.
34730a81b439SJeff Roberson 	 *
34740a81b439SJeff Roberson 	 * We bump the uz count when the cache size is insufficient to
34750a81b439SJeff Roberson 	 * handle the working set.
34760a81b439SJeff Roberson 	 */
34774d104ba0SAlexander Motin 	if (ZONE_TRYLOCK(zone) == 0) {
34784d104ba0SAlexander Motin 		/* Record contention to size the buckets. */
34798355f576SJeff Roberson 		ZONE_LOCK(zone);
348020a4e154SJeff Roberson 		if (zone->uz_bucket_size < zone->uz_bucket_size_max)
348120a4e154SJeff Roberson 			zone->uz_bucket_size++;
34824d104ba0SAlexander Motin 	}
34838355f576SJeff Roberson 
34840a81b439SJeff Roberson 	CTR3(KTR_UMA,
34850a81b439SJeff Roberson 	    "uma_zfree: zone %s(%p) putting bucket %p on free list",
34860a81b439SJeff Roberson 	    zone->uz_name, zone, bucket);
34870a81b439SJeff Roberson 	/* ub_cnt is pointing to the last free item */
34880a81b439SJeff Roberson 	KASSERT(bucket->ub_cnt == bucket->ub_entries,
34890a81b439SJeff Roberson 	    ("uma_zfree: Attempting to insert partial  bucket onto the full list.\n"));
34900a81b439SJeff Roberson 	if (zone->uz_bkt_count >= zone->uz_bkt_max) {
3491c1685086SJeff Roberson 		ZONE_UNLOCK(zone);
3492c1685086SJeff Roberson 		bucket_drain(zone, bucket);
3493c1685086SJeff Roberson 		bucket_free(zone, bucket, udata);
3494c1685086SJeff Roberson 	} else {
3495c1685086SJeff Roberson 		zdom = &zone->uz_domain[itemdomain];
3496c1685086SJeff Roberson 		zone_put_bucket(zone, zdom, bucket, true);
3497c1685086SJeff Roberson 		ZONE_UNLOCK(zone);
3498c1685086SJeff Roberson 	}
34998355f576SJeff Roberson }
3500fc03d22bSJeff Roberson 
35014d104ba0SAlexander Motin /*
35020a81b439SJeff Roberson  * Populate a free or cross bucket for the current cpu cache.  Free any
35030a81b439SJeff Roberson  * existing full bucket either to the zone cache or back to the slab layer.
35040a81b439SJeff Roberson  *
35050a81b439SJeff Roberson  * Enters and returns in a critical section.  false return indicates that
35060a81b439SJeff Roberson  * we can not satisfy this free in the cache layer.  true indicates that
35070a81b439SJeff Roberson  * the caller should retry.
35084d104ba0SAlexander Motin  */
35090a81b439SJeff Roberson static __noinline bool
35100a81b439SJeff Roberson cache_free(uma_zone_t zone, uma_cache_t cache, void *udata, void *item,
35110a81b439SJeff Roberson     int itemdomain)
35120a81b439SJeff Roberson {
35130a81b439SJeff Roberson 	uma_bucket_t bucket;
35140a81b439SJeff Roberson 	int cpu, domain;
35150a81b439SJeff Roberson 
35160a81b439SJeff Roberson 	CRITICAL_ASSERT(curthread);
35170a81b439SJeff Roberson 
351820a4e154SJeff Roberson 	if (zone->uz_bucket_size == 0 || bucketdisable)
35190a81b439SJeff Roberson 		return false;
35200a81b439SJeff Roberson 
35210a81b439SJeff Roberson 	cpu = curcpu;
35220a81b439SJeff Roberson 	cache = &zone->uz_cpu[cpu];
35230a81b439SJeff Roberson 
35240a81b439SJeff Roberson 	/*
35250a81b439SJeff Roberson 	 * NUMA domains need to free to the correct zdom.  When XDOMAIN
35260a81b439SJeff Roberson 	 * is enabled this is the zdom of the item and the bucket may be
35270a81b439SJeff Roberson 	 * the cross bucket if they do not match.
35280a81b439SJeff Roberson 	 */
35290a81b439SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
35300a81b439SJeff Roberson #ifdef UMA_XDOMAIN
35310a81b439SJeff Roberson 		domain = PCPU_GET(domain);
35320a81b439SJeff Roberson #else
35330a81b439SJeff Roberson 		itemdomain = domain = PCPU_GET(domain);
35340a81b439SJeff Roberson #endif
35350a81b439SJeff Roberson 	else
35360a81b439SJeff Roberson 		itemdomain = domain = 0;
35370a81b439SJeff Roberson #ifdef UMA_XDOMAIN
35380a81b439SJeff Roberson 	if (domain != itemdomain) {
35390a81b439SJeff Roberson 		bucket = cache->uc_crossbucket;
35400a81b439SJeff Roberson 		cache->uc_crossbucket = NULL;
35410a81b439SJeff Roberson 		if (bucket != NULL)
35420a81b439SJeff Roberson 			atomic_add_64(&zone->uz_xdomain, bucket->ub_cnt);
35430a81b439SJeff Roberson 	} else
35440a81b439SJeff Roberson #endif
35450a81b439SJeff Roberson 	{
35460a81b439SJeff Roberson 		bucket = cache->uc_freebucket;
35470a81b439SJeff Roberson 		cache->uc_freebucket = NULL;
35480a81b439SJeff Roberson 	}
35490a81b439SJeff Roberson 
35500a81b439SJeff Roberson 
35510a81b439SJeff Roberson 	/* We are no longer associated with this CPU. */
35520a81b439SJeff Roberson 	critical_exit();
35530a81b439SJeff Roberson 
35540a81b439SJeff Roberson 	if (bucket != NULL)
35550a81b439SJeff Roberson 		zone_free_bucket(zone, bucket, udata, domain, itemdomain);
3556a553d4b8SJeff Roberson 
35576fd34d6fSJeff Roberson 	bucket = bucket_alloc(zone, udata, M_NOWAIT);
35581431a748SGleb Smirnoff 	CTR3(KTR_UMA, "uma_zfree: zone %s(%p) allocated bucket %p",
35591431a748SGleb Smirnoff 	    zone->uz_name, zone, bucket);
3560fc03d22bSJeff Roberson 	critical_enter();
35610a81b439SJeff Roberson 	if (bucket == NULL)
35620a81b439SJeff Roberson 		return (false);
3563fc03d22bSJeff Roberson 	cpu = curcpu;
3564fc03d22bSJeff Roberson 	cache = &zone->uz_cpu[cpu];
35650a81b439SJeff Roberson #ifdef UMA_XDOMAIN
3566fc03d22bSJeff Roberson 	/*
35670a81b439SJeff Roberson 	 * Check to see if we should be populating the cross bucket.  If it
35680a81b439SJeff Roberson 	 * is already populated we will fall through and attempt to populate
35690a81b439SJeff Roberson 	 * the free bucket.
3570fc03d22bSJeff Roberson 	 */
35710a81b439SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) {
35720a81b439SJeff Roberson 		domain = PCPU_GET(domain);
35730a81b439SJeff Roberson 		if (domain != itemdomain && cache->uc_crossbucket == NULL) {
35740a81b439SJeff Roberson 			cache->uc_crossbucket = bucket;
35750a81b439SJeff Roberson 			return (true);
35760a81b439SJeff Roberson 		}
35770a81b439SJeff Roberson 	}
35780a81b439SJeff Roberson #endif
35790a81b439SJeff Roberson 	/*
35800a81b439SJeff Roberson 	 * We may have lost the race to fill the bucket or switched CPUs.
35810a81b439SJeff Roberson 	 */
35820a81b439SJeff Roberson 	if (cache->uc_freebucket != NULL) {
3583fc03d22bSJeff Roberson 		critical_exit();
35846fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
35850a81b439SJeff Roberson 		critical_enter();
35860a81b439SJeff Roberson 	} else
35870a81b439SJeff Roberson 		cache->uc_freebucket = bucket;
35888355f576SJeff Roberson 
35890a81b439SJeff Roberson 	return (true);
35908355f576SJeff Roberson }
35918355f576SJeff Roberson 
3592ab3185d1SJeff Roberson void
3593ab3185d1SJeff Roberson uma_zfree_domain(uma_zone_t zone, void *item, void *udata)
3594ab3185d1SJeff Roberson {
3595ab3185d1SJeff Roberson 
3596ab3185d1SJeff Roberson 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
359719fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
3598ab3185d1SJeff Roberson 
3599ab3185d1SJeff Roberson 	CTR2(KTR_UMA, "uma_zfree_domain thread %x zone %s", curthread,
3600ab3185d1SJeff Roberson 	    zone->uz_name);
3601ab3185d1SJeff Roberson 
3602ab3185d1SJeff Roberson 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
3603ab3185d1SJeff Roberson 	    ("uma_zfree_domain: called with spinlock or critical section held"));
3604ab3185d1SJeff Roberson 
3605ab3185d1SJeff Roberson         /* uma_zfree(..., NULL) does nothing, to match free(9). */
3606ab3185d1SJeff Roberson         if (item == NULL)
3607ab3185d1SJeff Roberson                 return;
3608ab3185d1SJeff Roberson 	zone_free_item(zone, item, udata, SKIP_NONE);
3609ab3185d1SJeff Roberson }
3610ab3185d1SJeff Roberson 
36118355f576SJeff Roberson static void
3612bb15d1c7SGleb Smirnoff slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item)
36138355f576SJeff Roberson {
3614bb15d1c7SGleb Smirnoff 	uma_keg_t keg;
3615ab3185d1SJeff Roberson 	uma_domain_t dom;
361685dcf349SGleb Smirnoff 	uint8_t freei;
3617099a0e58SBosko Milekic 
3618bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
3619bb15d1c7SGleb Smirnoff 	MPASS(zone->uz_lockptr == &keg->uk_lock);
3620bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
36218355f576SJeff Roberson 
3622ab3185d1SJeff Roberson 	dom = &keg->uk_domain[slab->us_domain];
3623ab3185d1SJeff Roberson 
36248355f576SJeff Roberson 	/* Do we need to remove from any lists? */
3625099a0e58SBosko Milekic 	if (slab->us_freecount+1 == keg->uk_ipers) {
36268355f576SJeff Roberson 		LIST_REMOVE(slab, us_link);
3627ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
36288355f576SJeff Roberson 	} else if (slab->us_freecount == 0) {
36298355f576SJeff Roberson 		LIST_REMOVE(slab, us_link);
3630ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
36318355f576SJeff Roberson 	}
36328355f576SJeff Roberson 
3633ef72505eSJeff Roberson 	/* Slab management. */
36341e0701e1SJeff Roberson 	freei = slab_item_index(slab, keg, item);
36359b78b1f4SJeff Roberson 	BIT_SET(keg->uk_ipers, freei, &slab->us_free);
36368355f576SJeff Roberson 	slab->us_freecount++;
36378355f576SJeff Roberson 
3638ef72505eSJeff Roberson 	/* Keg statistics. */
3639099a0e58SBosko Milekic 	keg->uk_free++;
36400095a784SJeff Roberson }
36410095a784SJeff Roberson 
36420095a784SJeff Roberson static void
3643b75c4efcSAndrew Turner zone_release(void *arg, void **bucket, int cnt)
36440095a784SJeff Roberson {
3645b75c4efcSAndrew Turner 	uma_zone_t zone;
36460095a784SJeff Roberson 	void *item;
36470095a784SJeff Roberson 	uma_slab_t slab;
36480095a784SJeff Roberson 	uma_keg_t keg;
36490095a784SJeff Roberson 	uint8_t *mem;
36500095a784SJeff Roberson 	int i;
36518355f576SJeff Roberson 
3652b75c4efcSAndrew Turner 	zone = arg;
3653bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
3654af526374SJeff Roberson 	KEG_LOCK(keg);
36550095a784SJeff Roberson 	for (i = 0; i < cnt; i++) {
36560095a784SJeff Roberson 		item = bucket[i];
36570095a784SJeff Roberson 		if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
36580095a784SJeff Roberson 			mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
36590095a784SJeff Roberson 			if (zone->uz_flags & UMA_ZONE_HASH) {
36600095a784SJeff Roberson 				slab = hash_sfind(&keg->uk_hash, mem);
36610095a784SJeff Roberson 			} else {
36620095a784SJeff Roberson 				mem += keg->uk_pgoff;
36630095a784SJeff Roberson 				slab = (uma_slab_t)mem;
36640095a784SJeff Roberson 			}
3665584061b4SJeff Roberson 		} else
36660095a784SJeff Roberson 			slab = vtoslab((vm_offset_t)item);
3667bb15d1c7SGleb Smirnoff 		slab_free_item(zone, slab, item);
36680095a784SJeff Roberson 	}
3669af526374SJeff Roberson 	KEG_UNLOCK(keg);
36708355f576SJeff Roberson }
36718355f576SJeff Roberson 
36720095a784SJeff Roberson /*
36730095a784SJeff Roberson  * Frees a single item to any zone.
36740095a784SJeff Roberson  *
36750095a784SJeff Roberson  * Arguments:
36760095a784SJeff Roberson  *	zone   The zone to free to
36770095a784SJeff Roberson  *	item   The item we're freeing
36780095a784SJeff Roberson  *	udata  User supplied data for the dtor
36790095a784SJeff Roberson  *	skip   Skip dtors and finis
36800095a784SJeff Roberson  */
36810095a784SJeff Roberson static void
36820095a784SJeff Roberson zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
36830095a784SJeff Roberson {
3684c5deaf04SGleb Smirnoff 
3685ca293436SRyan Libby 	item_dtor(zone, item, udata, skip);
36860095a784SJeff Roberson 
36870095a784SJeff Roberson 	if (skip < SKIP_FINI && zone->uz_fini)
36880095a784SJeff Roberson 		zone->uz_fini(item, zone->uz_size);
36890095a784SJeff Roberson 
36900095a784SJeff Roberson 	zone->uz_release(zone->uz_arg, &item, 1);
3691bb15d1c7SGleb Smirnoff 
3692bb15d1c7SGleb Smirnoff 	if (skip & SKIP_CNT)
3693bb15d1c7SGleb Smirnoff 		return;
3694bb15d1c7SGleb Smirnoff 
36952efcc8cbSGleb Smirnoff 	counter_u64_add(zone->uz_frees, 1);
36962efcc8cbSGleb Smirnoff 
3697bb45b411SGleb Smirnoff 	if (zone->uz_max_items > 0) {
3698bb15d1c7SGleb Smirnoff 		ZONE_LOCK(zone);
3699bb15d1c7SGleb Smirnoff 		zone->uz_items--;
3700bb45b411SGleb Smirnoff 		if (zone->uz_sleepers > 0 &&
3701bb45b411SGleb Smirnoff 		    zone->uz_items < zone->uz_max_items)
3702bb15d1c7SGleb Smirnoff 			wakeup_one(zone);
3703bb15d1c7SGleb Smirnoff 		ZONE_UNLOCK(zone);
37040095a784SJeff Roberson 	}
3705bb45b411SGleb Smirnoff }
37060095a784SJeff Roberson 
37078355f576SJeff Roberson /* See uma.h */
37081c6cae97SLawrence Stewart int
3709736ee590SJeff Roberson uma_zone_set_max(uma_zone_t zone, int nitems)
3710736ee590SJeff Roberson {
3711bb15d1c7SGleb Smirnoff 	struct uma_bucket_zone *ubz;
3712003cf08bSMark Johnston 	int count;
3713bb15d1c7SGleb Smirnoff 
3714bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3715003cf08bSMark Johnston 	ubz = bucket_zone_max(zone, nitems);
3716003cf08bSMark Johnston 	count = ubz != NULL ? ubz->ubz_entries : 0;
371720a4e154SJeff Roberson 	zone->uz_bucket_size_max = zone->uz_bucket_size = count;
371820a4e154SJeff Roberson 	if (zone->uz_bucket_size_min > zone->uz_bucket_size_max)
371920a4e154SJeff Roberson 		zone->uz_bucket_size_min = zone->uz_bucket_size_max;
3720bb15d1c7SGleb Smirnoff 	zone->uz_max_items = nitems;
3721bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
3722bb15d1c7SGleb Smirnoff 
3723bb15d1c7SGleb Smirnoff 	return (nitems);
3724bb15d1c7SGleb Smirnoff }
3725bb15d1c7SGleb Smirnoff 
3726bb15d1c7SGleb Smirnoff /* See uma.h */
3727003cf08bSMark Johnston void
3728bb15d1c7SGleb Smirnoff uma_zone_set_maxcache(uma_zone_t zone, int nitems)
3729bb15d1c7SGleb Smirnoff {
3730003cf08bSMark Johnston 	struct uma_bucket_zone *ubz;
3731003cf08bSMark Johnston 	int bpcpu;
3732bb15d1c7SGleb Smirnoff 
3733bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3734003cf08bSMark Johnston 	ubz = bucket_zone_max(zone, nitems);
3735003cf08bSMark Johnston 	if (ubz != NULL) {
3736003cf08bSMark Johnston 		bpcpu = 2;
3737003cf08bSMark Johnston #ifdef UMA_XDOMAIN
3738003cf08bSMark Johnston 		if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
3739003cf08bSMark Johnston 			/* Count the cross-domain bucket. */
3740003cf08bSMark Johnston 			bpcpu++;
3741003cf08bSMark Johnston #endif
3742003cf08bSMark Johnston 		nitems -= ubz->ubz_entries * bpcpu * mp_ncpus;
374320a4e154SJeff Roberson 		zone->uz_bucket_size_max = ubz->ubz_entries;
3744003cf08bSMark Johnston 	} else {
374520a4e154SJeff Roberson 		zone->uz_bucket_size_max = zone->uz_bucket_size = 0;
3746003cf08bSMark Johnston 	}
374720a4e154SJeff Roberson 	if (zone->uz_bucket_size_min > zone->uz_bucket_size_max)
374820a4e154SJeff Roberson 		zone->uz_bucket_size_min = zone->uz_bucket_size_max;
3749bb15d1c7SGleb Smirnoff 	zone->uz_bkt_max = nitems;
3750bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
3751736ee590SJeff Roberson }
3752736ee590SJeff Roberson 
3753736ee590SJeff Roberson /* See uma.h */
3754e49471b0SAndre Oppermann int
3755e49471b0SAndre Oppermann uma_zone_get_max(uma_zone_t zone)
3756e49471b0SAndre Oppermann {
3757e49471b0SAndre Oppermann 	int nitems;
3758e49471b0SAndre Oppermann 
3759bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3760bb15d1c7SGleb Smirnoff 	nitems = zone->uz_max_items;
3761bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
3762e49471b0SAndre Oppermann 
3763e49471b0SAndre Oppermann 	return (nitems);
3764e49471b0SAndre Oppermann }
3765e49471b0SAndre Oppermann 
3766e49471b0SAndre Oppermann /* See uma.h */
37672f891cd5SPawel Jakub Dawidek void
37682f891cd5SPawel Jakub Dawidek uma_zone_set_warning(uma_zone_t zone, const char *warning)
37692f891cd5SPawel Jakub Dawidek {
37702f891cd5SPawel Jakub Dawidek 
37712f891cd5SPawel Jakub Dawidek 	ZONE_LOCK(zone);
37722f891cd5SPawel Jakub Dawidek 	zone->uz_warning = warning;
37732f891cd5SPawel Jakub Dawidek 	ZONE_UNLOCK(zone);
37742f891cd5SPawel Jakub Dawidek }
37752f891cd5SPawel Jakub Dawidek 
37762f891cd5SPawel Jakub Dawidek /* See uma.h */
377754503a13SJonathan T. Looney void
377854503a13SJonathan T. Looney uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
377954503a13SJonathan T. Looney {
378054503a13SJonathan T. Looney 
378154503a13SJonathan T. Looney 	ZONE_LOCK(zone);
3782e60b2fcbSGleb Smirnoff 	TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
378354503a13SJonathan T. Looney 	ZONE_UNLOCK(zone);
378454503a13SJonathan T. Looney }
378554503a13SJonathan T. Looney 
378654503a13SJonathan T. Looney /* See uma.h */
3787c4ae7908SLawrence Stewart int
3788c4ae7908SLawrence Stewart uma_zone_get_cur(uma_zone_t zone)
3789c4ae7908SLawrence Stewart {
3790c4ae7908SLawrence Stewart 	int64_t nitems;
3791c4ae7908SLawrence Stewart 	u_int i;
3792c4ae7908SLawrence Stewart 
3793c4ae7908SLawrence Stewart 	ZONE_LOCK(zone);
37942efcc8cbSGleb Smirnoff 	nitems = counter_u64_fetch(zone->uz_allocs) -
37952efcc8cbSGleb Smirnoff 	    counter_u64_fetch(zone->uz_frees);
379620a4e154SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0) {
3797c4ae7908SLawrence Stewart 		CPU_FOREACH(i) {
3798c4ae7908SLawrence Stewart 			/*
379920a4e154SJeff Roberson 			 * See the comment in uma_vm_zone_stats() regarding
380020a4e154SJeff Roberson 			 * the safety of accessing the per-cpu caches. With
380120a4e154SJeff Roberson 			 * the zone lock held, it is safe, but can potentially
380220a4e154SJeff Roberson 			 * result in stale data.
3803c4ae7908SLawrence Stewart 			 */
3804c4ae7908SLawrence Stewart 			nitems += zone->uz_cpu[i].uc_allocs -
3805c4ae7908SLawrence Stewart 			    zone->uz_cpu[i].uc_frees;
3806c4ae7908SLawrence Stewart 		}
380720a4e154SJeff Roberson 	}
3808c4ae7908SLawrence Stewart 	ZONE_UNLOCK(zone);
3809c4ae7908SLawrence Stewart 
3810c4ae7908SLawrence Stewart 	return (nitems < 0 ? 0 : nitems);
3811c4ae7908SLawrence Stewart }
3812c4ae7908SLawrence Stewart 
381320a4e154SJeff Roberson static uint64_t
381420a4e154SJeff Roberson uma_zone_get_allocs(uma_zone_t zone)
381520a4e154SJeff Roberson {
381620a4e154SJeff Roberson 	uint64_t nitems;
381720a4e154SJeff Roberson 	u_int i;
381820a4e154SJeff Roberson 
381920a4e154SJeff Roberson 	ZONE_LOCK(zone);
382020a4e154SJeff Roberson 	nitems = counter_u64_fetch(zone->uz_allocs);
382120a4e154SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0) {
382220a4e154SJeff Roberson 		CPU_FOREACH(i) {
382320a4e154SJeff Roberson 			/*
382420a4e154SJeff Roberson 			 * See the comment in uma_vm_zone_stats() regarding
382520a4e154SJeff Roberson 			 * the safety of accessing the per-cpu caches. With
382620a4e154SJeff Roberson 			 * the zone lock held, it is safe, but can potentially
382720a4e154SJeff Roberson 			 * result in stale data.
382820a4e154SJeff Roberson 			 */
382920a4e154SJeff Roberson 			nitems += zone->uz_cpu[i].uc_allocs;
383020a4e154SJeff Roberson 		}
383120a4e154SJeff Roberson 	}
383220a4e154SJeff Roberson 	ZONE_UNLOCK(zone);
383320a4e154SJeff Roberson 
383420a4e154SJeff Roberson 	return (nitems);
383520a4e154SJeff Roberson }
383620a4e154SJeff Roberson 
383720a4e154SJeff Roberson static uint64_t
383820a4e154SJeff Roberson uma_zone_get_frees(uma_zone_t zone)
383920a4e154SJeff Roberson {
384020a4e154SJeff Roberson 	uint64_t nitems;
384120a4e154SJeff Roberson 	u_int i;
384220a4e154SJeff Roberson 
384320a4e154SJeff Roberson 	ZONE_LOCK(zone);
384420a4e154SJeff Roberson 	nitems = counter_u64_fetch(zone->uz_frees);
384520a4e154SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0) {
384620a4e154SJeff Roberson 		CPU_FOREACH(i) {
384720a4e154SJeff Roberson 			/*
384820a4e154SJeff Roberson 			 * See the comment in uma_vm_zone_stats() regarding
384920a4e154SJeff Roberson 			 * the safety of accessing the per-cpu caches. With
385020a4e154SJeff Roberson 			 * the zone lock held, it is safe, but can potentially
385120a4e154SJeff Roberson 			 * result in stale data.
385220a4e154SJeff Roberson 			 */
385320a4e154SJeff Roberson 			nitems += zone->uz_cpu[i].uc_frees;
385420a4e154SJeff Roberson 		}
385520a4e154SJeff Roberson 	}
385620a4e154SJeff Roberson 	ZONE_UNLOCK(zone);
385720a4e154SJeff Roberson 
385820a4e154SJeff Roberson 	return (nitems);
385920a4e154SJeff Roberson }
386020a4e154SJeff Roberson 
3861c4ae7908SLawrence Stewart /* See uma.h */
3862736ee590SJeff Roberson void
3863099a0e58SBosko Milekic uma_zone_set_init(uma_zone_t zone, uma_init uminit)
3864099a0e58SBosko Milekic {
3865e20a199fSJeff Roberson 	uma_keg_t keg;
3866e20a199fSJeff Roberson 
3867bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
3868af526374SJeff Roberson 	KEG_LOCK(keg);
3869e20a199fSJeff Roberson 	KASSERT(keg->uk_pages == 0,
3870099a0e58SBosko Milekic 	    ("uma_zone_set_init on non-empty keg"));
3871e20a199fSJeff Roberson 	keg->uk_init = uminit;
3872af526374SJeff Roberson 	KEG_UNLOCK(keg);
3873099a0e58SBosko Milekic }
3874099a0e58SBosko Milekic 
3875099a0e58SBosko Milekic /* See uma.h */
3876099a0e58SBosko Milekic void
3877099a0e58SBosko Milekic uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
3878099a0e58SBosko Milekic {
3879e20a199fSJeff Roberson 	uma_keg_t keg;
3880e20a199fSJeff Roberson 
3881bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
3882af526374SJeff Roberson 	KEG_LOCK(keg);
3883e20a199fSJeff Roberson 	KASSERT(keg->uk_pages == 0,
3884099a0e58SBosko Milekic 	    ("uma_zone_set_fini on non-empty keg"));
3885e20a199fSJeff Roberson 	keg->uk_fini = fini;
3886af526374SJeff Roberson 	KEG_UNLOCK(keg);
3887099a0e58SBosko Milekic }
3888099a0e58SBosko Milekic 
3889099a0e58SBosko Milekic /* See uma.h */
3890099a0e58SBosko Milekic void
3891099a0e58SBosko Milekic uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
3892099a0e58SBosko Milekic {
3893af526374SJeff Roberson 
3894099a0e58SBosko Milekic 	ZONE_LOCK(zone);
3895bb15d1c7SGleb Smirnoff 	KASSERT(zone->uz_keg->uk_pages == 0,
3896099a0e58SBosko Milekic 	    ("uma_zone_set_zinit on non-empty keg"));
3897099a0e58SBosko Milekic 	zone->uz_init = zinit;
3898099a0e58SBosko Milekic 	ZONE_UNLOCK(zone);
3899099a0e58SBosko Milekic }
3900099a0e58SBosko Milekic 
3901099a0e58SBosko Milekic /* See uma.h */
3902099a0e58SBosko Milekic void
3903099a0e58SBosko Milekic uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
3904099a0e58SBosko Milekic {
3905af526374SJeff Roberson 
3906099a0e58SBosko Milekic 	ZONE_LOCK(zone);
3907bb15d1c7SGleb Smirnoff 	KASSERT(zone->uz_keg->uk_pages == 0,
3908099a0e58SBosko Milekic 	    ("uma_zone_set_zfini on non-empty keg"));
3909099a0e58SBosko Milekic 	zone->uz_fini = zfini;
3910099a0e58SBosko Milekic 	ZONE_UNLOCK(zone);
3911099a0e58SBosko Milekic }
3912099a0e58SBosko Milekic 
3913099a0e58SBosko Milekic /* See uma.h */
3914b23f72e9SBrian Feldman /* XXX uk_freef is not actually used with the zone locked */
3915099a0e58SBosko Milekic void
39168355f576SJeff Roberson uma_zone_set_freef(uma_zone_t zone, uma_free freef)
39178355f576SJeff Roberson {
39180095a784SJeff Roberson 	uma_keg_t keg;
3919e20a199fSJeff Roberson 
3920bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
39211d2c0c46SDmitry Chagin 	KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
3922af526374SJeff Roberson 	KEG_LOCK(keg);
39230095a784SJeff Roberson 	keg->uk_freef = freef;
3924af526374SJeff Roberson 	KEG_UNLOCK(keg);
39258355f576SJeff Roberson }
39268355f576SJeff Roberson 
39278355f576SJeff Roberson /* See uma.h */
3928b23f72e9SBrian Feldman /* XXX uk_allocf is not actually used with the zone locked */
39298355f576SJeff Roberson void
39308355f576SJeff Roberson uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
39318355f576SJeff Roberson {
3932e20a199fSJeff Roberson 	uma_keg_t keg;
3933e20a199fSJeff Roberson 
3934bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
3935af526374SJeff Roberson 	KEG_LOCK(keg);
3936e20a199fSJeff Roberson 	keg->uk_allocf = allocf;
3937af526374SJeff Roberson 	KEG_UNLOCK(keg);
39388355f576SJeff Roberson }
39398355f576SJeff Roberson 
39408355f576SJeff Roberson /* See uma.h */
39416fd34d6fSJeff Roberson void
39426fd34d6fSJeff Roberson uma_zone_reserve(uma_zone_t zone, int items)
39436fd34d6fSJeff Roberson {
39446fd34d6fSJeff Roberson 	uma_keg_t keg;
39456fd34d6fSJeff Roberson 
3946bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
39476fd34d6fSJeff Roberson 	KEG_LOCK(keg);
39486fd34d6fSJeff Roberson 	keg->uk_reserve = items;
39496fd34d6fSJeff Roberson 	KEG_UNLOCK(keg);
39506fd34d6fSJeff Roberson }
39516fd34d6fSJeff Roberson 
39526fd34d6fSJeff Roberson /* See uma.h */
39538355f576SJeff Roberson int
3954a4915c21SAttilio Rao uma_zone_reserve_kva(uma_zone_t zone, int count)
39558355f576SJeff Roberson {
3956099a0e58SBosko Milekic 	uma_keg_t keg;
39578355f576SJeff Roberson 	vm_offset_t kva;
39589ba30bcbSZbigniew Bodek 	u_int pages;
39598355f576SJeff Roberson 
3960bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
39618355f576SJeff Roberson 
3962bb15d1c7SGleb Smirnoff 	pages = count / keg->uk_ipers;
3963099a0e58SBosko Milekic 	if (pages * keg->uk_ipers < count)
39648355f576SJeff Roberson 		pages++;
396557223e99SAndriy Gapon 	pages *= keg->uk_ppera;
3966a553d4b8SJeff Roberson 
3967a4915c21SAttilio Rao #ifdef UMA_MD_SMALL_ALLOC
3968a4915c21SAttilio Rao 	if (keg->uk_ppera > 1) {
3969a4915c21SAttilio Rao #else
3970a4915c21SAttilio Rao 	if (1) {
3971a4915c21SAttilio Rao #endif
397257223e99SAndriy Gapon 		kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
3973d1f42ac2SAlan Cox 		if (kva == 0)
39748355f576SJeff Roberson 			return (0);
3975a4915c21SAttilio Rao 	} else
3976a4915c21SAttilio Rao 		kva = 0;
3977bb15d1c7SGleb Smirnoff 
3978bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3979bb15d1c7SGleb Smirnoff 	MPASS(keg->uk_kva == 0);
3980099a0e58SBosko Milekic 	keg->uk_kva = kva;
3981a4915c21SAttilio Rao 	keg->uk_offset = 0;
3982bb15d1c7SGleb Smirnoff 	zone->uz_max_items = pages * keg->uk_ipers;
3983a4915c21SAttilio Rao #ifdef UMA_MD_SMALL_ALLOC
3984a4915c21SAttilio Rao 	keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
3985a4915c21SAttilio Rao #else
3986a4915c21SAttilio Rao 	keg->uk_allocf = noobj_alloc;
3987a4915c21SAttilio Rao #endif
39886fd34d6fSJeff Roberson 	keg->uk_flags |= UMA_ZONE_NOFREE;
3989bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
3990af526374SJeff Roberson 
39918355f576SJeff Roberson 	return (1);
39928355f576SJeff Roberson }
39938355f576SJeff Roberson 
39948355f576SJeff Roberson /* See uma.h */
39958355f576SJeff Roberson void
39968355f576SJeff Roberson uma_prealloc(uma_zone_t zone, int items)
39978355f576SJeff Roberson {
3998920239efSMark Johnston 	struct vm_domainset_iter di;
3999ab3185d1SJeff Roberson 	uma_domain_t dom;
40008355f576SJeff Roberson 	uma_slab_t slab;
4001099a0e58SBosko Milekic 	uma_keg_t keg;
400286220393SMark Johnston 	int aflags, domain, slabs;
40038355f576SJeff Roberson 
4004bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
4005af526374SJeff Roberson 	KEG_LOCK(keg);
4006099a0e58SBosko Milekic 	slabs = items / keg->uk_ipers;
4007099a0e58SBosko Milekic 	if (slabs * keg->uk_ipers < items)
40088355f576SJeff Roberson 		slabs++;
4009194a979eSMark Johnston 	while (slabs-- > 0) {
401086220393SMark Johnston 		aflags = M_NOWAIT;
401186220393SMark Johnston 		vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
401286220393SMark Johnston 		    &aflags);
401386220393SMark Johnston 		for (;;) {
401486220393SMark Johnston 			slab = keg_alloc_slab(keg, zone, domain, M_WAITOK,
401586220393SMark Johnston 			    aflags);
401686220393SMark Johnston 			if (slab != NULL) {
4017ab3185d1SJeff Roberson 				dom = &keg->uk_domain[slab->us_domain];
401886220393SMark Johnston 				LIST_INSERT_HEAD(&dom->ud_free_slab, slab,
401986220393SMark Johnston 				    us_link);
4020920239efSMark Johnston 				break;
40218355f576SJeff Roberson 			}
402286220393SMark Johnston 			KEG_LOCK(keg);
402386220393SMark Johnston 			if (vm_domainset_iter_policy(&di, &domain) != 0) {
402486220393SMark Johnston 				KEG_UNLOCK(keg);
402586220393SMark Johnston 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
402686220393SMark Johnston 				KEG_LOCK(keg);
402786220393SMark Johnston 			}
402886220393SMark Johnston 		}
402986220393SMark Johnston 	}
4030af526374SJeff Roberson 	KEG_UNLOCK(keg);
40318355f576SJeff Roberson }
40328355f576SJeff Roberson 
40338355f576SJeff Roberson /* See uma.h */
403408cfa56eSMark Johnston void
403508cfa56eSMark Johnston uma_reclaim(int req)
40368355f576SJeff Roberson {
403744ec2b63SKonstantin Belousov 
40381431a748SGleb Smirnoff 	CTR0(KTR_UMA, "UMA: vm asked us to release pages!");
403908cfa56eSMark Johnston 	sx_xlock(&uma_reclaim_lock);
404086bbae32SJeff Roberson 	bucket_enable();
404108cfa56eSMark Johnston 
404208cfa56eSMark Johnston 	switch (req) {
404308cfa56eSMark Johnston 	case UMA_RECLAIM_TRIM:
404420a4e154SJeff Roberson 		zone_foreach(zone_trim, NULL);
404508cfa56eSMark Johnston 		break;
404608cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN:
404708cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN_CPU:
404820a4e154SJeff Roberson 		zone_foreach(zone_drain, NULL);
404908cfa56eSMark Johnston 		if (req == UMA_RECLAIM_DRAIN_CPU) {
405008cfa56eSMark Johnston 			pcpu_cache_drain_safe(NULL);
405120a4e154SJeff Roberson 			zone_foreach(zone_drain, NULL);
4052a2de44abSAlexander Motin 		}
405308cfa56eSMark Johnston 		break;
405408cfa56eSMark Johnston 	default:
405508cfa56eSMark Johnston 		panic("unhandled reclamation request %d", req);
405608cfa56eSMark Johnston 	}
40570f9b7bf3SMark Johnston 
40588355f576SJeff Roberson 	/*
40598355f576SJeff Roberson 	 * Some slabs may have been freed but this zone will be visited early
40608355f576SJeff Roberson 	 * we visit again so that we can free pages that are empty once other
40618355f576SJeff Roberson 	 * zones are drained.  We have to do the same for buckets.
40628355f576SJeff Roberson 	 */
406320a4e154SJeff Roberson 	zone_drain(slabzone, NULL);
4064cae33c14SJeff Roberson 	bucket_zone_drain();
406508cfa56eSMark Johnston 	sx_xunlock(&uma_reclaim_lock);
40668355f576SJeff Roberson }
40678355f576SJeff Roberson 
40682e47807cSJeff Roberson static volatile int uma_reclaim_needed;
406944ec2b63SKonstantin Belousov 
407044ec2b63SKonstantin Belousov void
407144ec2b63SKonstantin Belousov uma_reclaim_wakeup(void)
407244ec2b63SKonstantin Belousov {
407344ec2b63SKonstantin Belousov 
40742e47807cSJeff Roberson 	if (atomic_fetchadd_int(&uma_reclaim_needed, 1) == 0)
40752e47807cSJeff Roberson 		wakeup(uma_reclaim);
407644ec2b63SKonstantin Belousov }
407744ec2b63SKonstantin Belousov 
407844ec2b63SKonstantin Belousov void
407944ec2b63SKonstantin Belousov uma_reclaim_worker(void *arg __unused)
408044ec2b63SKonstantin Belousov {
408144ec2b63SKonstantin Belousov 
408244ec2b63SKonstantin Belousov 	for (;;) {
408308cfa56eSMark Johnston 		sx_xlock(&uma_reclaim_lock);
4084200f8117SKonstantin Belousov 		while (atomic_load_int(&uma_reclaim_needed) == 0)
408508cfa56eSMark Johnston 			sx_sleep(uma_reclaim, &uma_reclaim_lock, PVM, "umarcl",
40862e47807cSJeff Roberson 			    hz);
408708cfa56eSMark Johnston 		sx_xunlock(&uma_reclaim_lock);
40889b43bc27SAndriy Gapon 		EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
408908cfa56eSMark Johnston 		uma_reclaim(UMA_RECLAIM_DRAIN_CPU);
4090200f8117SKonstantin Belousov 		atomic_store_int(&uma_reclaim_needed, 0);
40912e47807cSJeff Roberson 		/* Don't fire more than once per-second. */
40922e47807cSJeff Roberson 		pause("umarclslp", hz);
409344ec2b63SKonstantin Belousov 	}
409444ec2b63SKonstantin Belousov }
409544ec2b63SKonstantin Belousov 
4096663b416fSJohn Baldwin /* See uma.h */
409708cfa56eSMark Johnston void
409808cfa56eSMark Johnston uma_zone_reclaim(uma_zone_t zone, int req)
409908cfa56eSMark Johnston {
410008cfa56eSMark Johnston 
410108cfa56eSMark Johnston 	switch (req) {
410208cfa56eSMark Johnston 	case UMA_RECLAIM_TRIM:
410320a4e154SJeff Roberson 		zone_trim(zone, NULL);
410408cfa56eSMark Johnston 		break;
410508cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN:
410620a4e154SJeff Roberson 		zone_drain(zone, NULL);
410708cfa56eSMark Johnston 		break;
410808cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN_CPU:
410908cfa56eSMark Johnston 		pcpu_cache_drain_safe(zone);
411020a4e154SJeff Roberson 		zone_drain(zone, NULL);
411108cfa56eSMark Johnston 		break;
411208cfa56eSMark Johnston 	default:
411308cfa56eSMark Johnston 		panic("unhandled reclamation request %d", req);
411408cfa56eSMark Johnston 	}
411508cfa56eSMark Johnston }
411608cfa56eSMark Johnston 
411708cfa56eSMark Johnston /* See uma.h */
4118663b416fSJohn Baldwin int
4119663b416fSJohn Baldwin uma_zone_exhausted(uma_zone_t zone)
4120663b416fSJohn Baldwin {
4121663b416fSJohn Baldwin 	int full;
4122663b416fSJohn Baldwin 
4123663b416fSJohn Baldwin 	ZONE_LOCK(zone);
4124bb15d1c7SGleb Smirnoff 	full = zone->uz_sleepers > 0;
4125663b416fSJohn Baldwin 	ZONE_UNLOCK(zone);
4126663b416fSJohn Baldwin 	return (full);
4127663b416fSJohn Baldwin }
4128663b416fSJohn Baldwin 
41296c125b8dSMohan Srinivasan int
41306c125b8dSMohan Srinivasan uma_zone_exhausted_nolock(uma_zone_t zone)
41316c125b8dSMohan Srinivasan {
4132bb15d1c7SGleb Smirnoff 	return (zone->uz_sleepers > 0);
41336c125b8dSMohan Srinivasan }
41346c125b8dSMohan Srinivasan 
413548343a2fSGleb Smirnoff static void
413648343a2fSGleb Smirnoff uma_zero_item(void *item, uma_zone_t zone)
413748343a2fSGleb Smirnoff {
413848343a2fSGleb Smirnoff 
413948343a2fSGleb Smirnoff 	bzero(item, zone->uz_size);
414048343a2fSGleb Smirnoff }
414148343a2fSGleb Smirnoff 
41422e47807cSJeff Roberson unsigned long
41432e47807cSJeff Roberson uma_limit(void)
41442e47807cSJeff Roberson {
41452e47807cSJeff Roberson 
41462e47807cSJeff Roberson 	return (uma_kmem_limit);
41472e47807cSJeff Roberson }
41482e47807cSJeff Roberson 
41492e47807cSJeff Roberson void
41502e47807cSJeff Roberson uma_set_limit(unsigned long limit)
41512e47807cSJeff Roberson {
41522e47807cSJeff Roberson 
41532e47807cSJeff Roberson 	uma_kmem_limit = limit;
41542e47807cSJeff Roberson }
41552e47807cSJeff Roberson 
41562e47807cSJeff Roberson unsigned long
41572e47807cSJeff Roberson uma_size(void)
41582e47807cSJeff Roberson {
41592e47807cSJeff Roberson 
4160058f0f74SMark Johnston 	return (atomic_load_long(&uma_kmem_total));
4161ad5b0f5bSJeff Roberson }
4162ad5b0f5bSJeff Roberson 
4163ad5b0f5bSJeff Roberson long
4164ad5b0f5bSJeff Roberson uma_avail(void)
4165ad5b0f5bSJeff Roberson {
4166ad5b0f5bSJeff Roberson 
4167058f0f74SMark Johnston 	return (uma_kmem_limit - uma_size());
41682e47807cSJeff Roberson }
41692e47807cSJeff Roberson 
4170a0d4b0aeSRobert Watson #ifdef DDB
41718355f576SJeff Roberson /*
41727a52a97eSRobert Watson  * Generate statistics across both the zone and its per-cpu cache's.  Return
41737a52a97eSRobert Watson  * desired statistics if the pointer is non-NULL for that statistic.
41747a52a97eSRobert Watson  *
41757a52a97eSRobert Watson  * Note: does not update the zone statistics, as it can't safely clear the
41767a52a97eSRobert Watson  * per-CPU cache statistic.
41777a52a97eSRobert Watson  *
41787a52a97eSRobert Watson  * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
41797a52a97eSRobert Watson  * safe from off-CPU; we should modify the caches to track this information
41807a52a97eSRobert Watson  * directly so that we don't have to.
41817a52a97eSRobert Watson  */
41827a52a97eSRobert Watson static void
41830f9b7bf3SMark Johnston uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
4184c1685086SJeff Roberson     uint64_t *freesp, uint64_t *sleepsp, uint64_t *xdomainp)
41857a52a97eSRobert Watson {
41867a52a97eSRobert Watson 	uma_cache_t cache;
4187c1685086SJeff Roberson 	uint64_t allocs, frees, sleeps, xdomain;
41887a52a97eSRobert Watson 	int cachefree, cpu;
41897a52a97eSRobert Watson 
4190c1685086SJeff Roberson 	allocs = frees = sleeps = xdomain = 0;
41917a52a97eSRobert Watson 	cachefree = 0;
41923aa6d94eSJohn Baldwin 	CPU_FOREACH(cpu) {
41937a52a97eSRobert Watson 		cache = &z->uz_cpu[cpu];
41947a52a97eSRobert Watson 		if (cache->uc_allocbucket != NULL)
41957a52a97eSRobert Watson 			cachefree += cache->uc_allocbucket->ub_cnt;
41967a52a97eSRobert Watson 		if (cache->uc_freebucket != NULL)
41977a52a97eSRobert Watson 			cachefree += cache->uc_freebucket->ub_cnt;
4198c1685086SJeff Roberson 		if (cache->uc_crossbucket != NULL) {
4199c1685086SJeff Roberson 			xdomain += cache->uc_crossbucket->ub_cnt;
4200c1685086SJeff Roberson 			cachefree += cache->uc_crossbucket->ub_cnt;
4201c1685086SJeff Roberson 		}
42027a52a97eSRobert Watson 		allocs += cache->uc_allocs;
42037a52a97eSRobert Watson 		frees += cache->uc_frees;
42047a52a97eSRobert Watson 	}
42052efcc8cbSGleb Smirnoff 	allocs += counter_u64_fetch(z->uz_allocs);
42062efcc8cbSGleb Smirnoff 	frees += counter_u64_fetch(z->uz_frees);
4207bf965959SSean Bruno 	sleeps += z->uz_sleeps;
4208c1685086SJeff Roberson 	xdomain += z->uz_xdomain;
42097a52a97eSRobert Watson 	if (cachefreep != NULL)
42107a52a97eSRobert Watson 		*cachefreep = cachefree;
42117a52a97eSRobert Watson 	if (allocsp != NULL)
42127a52a97eSRobert Watson 		*allocsp = allocs;
42137a52a97eSRobert Watson 	if (freesp != NULL)
42147a52a97eSRobert Watson 		*freesp = frees;
4215bf965959SSean Bruno 	if (sleepsp != NULL)
4216bf965959SSean Bruno 		*sleepsp = sleeps;
4217c1685086SJeff Roberson 	if (xdomainp != NULL)
4218c1685086SJeff Roberson 		*xdomainp = xdomain;
42197a52a97eSRobert Watson }
4220a0d4b0aeSRobert Watson #endif /* DDB */
42217a52a97eSRobert Watson 
42227a52a97eSRobert Watson static int
42237a52a97eSRobert Watson sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
42247a52a97eSRobert Watson {
42257a52a97eSRobert Watson 	uma_keg_t kz;
42267a52a97eSRobert Watson 	uma_zone_t z;
42277a52a97eSRobert Watson 	int count;
42287a52a97eSRobert Watson 
42297a52a97eSRobert Watson 	count = 0;
4230111fbcd5SBryan Venteicher 	rw_rlock(&uma_rwlock);
42317a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
42327a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
42337a52a97eSRobert Watson 			count++;
42347a52a97eSRobert Watson 	}
4235b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link)
4236b47acb0aSGleb Smirnoff 		count++;
4237b47acb0aSGleb Smirnoff 
4238111fbcd5SBryan Venteicher 	rw_runlock(&uma_rwlock);
42397a52a97eSRobert Watson 	return (sysctl_handle_int(oidp, &count, 0, req));
42407a52a97eSRobert Watson }
42417a52a97eSRobert Watson 
4242b47acb0aSGleb Smirnoff static void
4243b47acb0aSGleb Smirnoff uma_vm_zone_stats(struct uma_type_header *uth, uma_zone_t z, struct sbuf *sbuf,
4244b47acb0aSGleb Smirnoff     struct uma_percpu_stat *ups, bool internal)
4245b47acb0aSGleb Smirnoff {
4246b47acb0aSGleb Smirnoff 	uma_zone_domain_t zdom;
42471de9724eSMark Johnston 	uma_bucket_t bucket;
4248b47acb0aSGleb Smirnoff 	uma_cache_t cache;
4249b47acb0aSGleb Smirnoff 	int i;
4250b47acb0aSGleb Smirnoff 
4251b47acb0aSGleb Smirnoff 
4252b47acb0aSGleb Smirnoff 	for (i = 0; i < vm_ndomains; i++) {
4253b47acb0aSGleb Smirnoff 		zdom = &z->uz_domain[i];
4254b47acb0aSGleb Smirnoff 		uth->uth_zone_free += zdom->uzd_nitems;
4255b47acb0aSGleb Smirnoff 	}
4256b47acb0aSGleb Smirnoff 	uth->uth_allocs = counter_u64_fetch(z->uz_allocs);
4257b47acb0aSGleb Smirnoff 	uth->uth_frees = counter_u64_fetch(z->uz_frees);
4258b47acb0aSGleb Smirnoff 	uth->uth_fails = counter_u64_fetch(z->uz_fails);
4259b47acb0aSGleb Smirnoff 	uth->uth_sleeps = z->uz_sleeps;
4260c1685086SJeff Roberson 	uth->uth_xdomain = z->uz_xdomain;
42611de9724eSMark Johnston 
4262b47acb0aSGleb Smirnoff 	/*
42631de9724eSMark Johnston 	 * While it is not normally safe to access the cache bucket pointers
42641de9724eSMark Johnston 	 * while not on the CPU that owns the cache, we only allow the pointers
42651de9724eSMark Johnston 	 * to be exchanged without the zone lock held, not invalidated, so
42661de9724eSMark Johnston 	 * accept the possible race associated with bucket exchange during
42671de9724eSMark Johnston 	 * monitoring.  Use atomic_load_ptr() to ensure that the bucket pointers
42681de9724eSMark Johnston 	 * are loaded only once.
4269b47acb0aSGleb Smirnoff 	 */
4270b47acb0aSGleb Smirnoff 	for (i = 0; i < mp_maxid + 1; i++) {
4271b47acb0aSGleb Smirnoff 		bzero(&ups[i], sizeof(*ups));
4272b47acb0aSGleb Smirnoff 		if (internal || CPU_ABSENT(i))
4273b47acb0aSGleb Smirnoff 			continue;
4274b47acb0aSGleb Smirnoff 		cache = &z->uz_cpu[i];
42751de9724eSMark Johnston 		bucket = (uma_bucket_t)atomic_load_ptr(&cache->uc_allocbucket);
42761de9724eSMark Johnston 		if (bucket != NULL)
42771de9724eSMark Johnston 			ups[i].ups_cache_free += bucket->ub_cnt;
42781de9724eSMark Johnston 		bucket = (uma_bucket_t)atomic_load_ptr(&cache->uc_freebucket);
42791de9724eSMark Johnston 		if (bucket != NULL)
42801de9724eSMark Johnston 			ups[i].ups_cache_free += bucket->ub_cnt;
42811de9724eSMark Johnston 		bucket = (uma_bucket_t)atomic_load_ptr(&cache->uc_crossbucket);
42821de9724eSMark Johnston 		if (bucket != NULL)
42831de9724eSMark Johnston 			ups[i].ups_cache_free += bucket->ub_cnt;
4284b47acb0aSGleb Smirnoff 		ups[i].ups_allocs = cache->uc_allocs;
4285b47acb0aSGleb Smirnoff 		ups[i].ups_frees = cache->uc_frees;
4286b47acb0aSGleb Smirnoff 	}
4287b47acb0aSGleb Smirnoff }
4288b47acb0aSGleb Smirnoff 
42897a52a97eSRobert Watson static int
42907a52a97eSRobert Watson sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
42917a52a97eSRobert Watson {
42927a52a97eSRobert Watson 	struct uma_stream_header ush;
42937a52a97eSRobert Watson 	struct uma_type_header uth;
429463b5d112SKonstantin Belousov 	struct uma_percpu_stat *ups;
42957a52a97eSRobert Watson 	struct sbuf sbuf;
42967a52a97eSRobert Watson 	uma_keg_t kz;
42977a52a97eSRobert Watson 	uma_zone_t z;
42984e657159SMatthew D Fleming 	int count, error, i;
42997a52a97eSRobert Watson 
430000f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
430100f0e671SMatthew D Fleming 	if (error != 0)
430200f0e671SMatthew D Fleming 		return (error);
43034e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
43041eafc078SIan Lepore 	sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
430563b5d112SKonstantin Belousov 	ups = malloc((mp_maxid + 1) * sizeof(*ups), M_TEMP, M_WAITOK);
43064e657159SMatthew D Fleming 
4307404a593eSMatthew D Fleming 	count = 0;
4308111fbcd5SBryan Venteicher 	rw_rlock(&uma_rwlock);
43097a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
43107a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
43117a52a97eSRobert Watson 			count++;
43127a52a97eSRobert Watson 	}
43137a52a97eSRobert Watson 
4314b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link)
4315b47acb0aSGleb Smirnoff 		count++;
4316b47acb0aSGleb Smirnoff 
43177a52a97eSRobert Watson 	/*
43187a52a97eSRobert Watson 	 * Insert stream header.
43197a52a97eSRobert Watson 	 */
43207a52a97eSRobert Watson 	bzero(&ush, sizeof(ush));
43217a52a97eSRobert Watson 	ush.ush_version = UMA_STREAM_VERSION;
4322ab3a57c0SRobert Watson 	ush.ush_maxcpus = (mp_maxid + 1);
43237a52a97eSRobert Watson 	ush.ush_count = count;
43244e657159SMatthew D Fleming 	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
43257a52a97eSRobert Watson 
43267a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
43277a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
43287a52a97eSRobert Watson 			bzero(&uth, sizeof(uth));
43297a52a97eSRobert Watson 			ZONE_LOCK(z);
4330cbbb4a00SRobert Watson 			strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
43317a52a97eSRobert Watson 			uth.uth_align = kz->uk_align;
43327a52a97eSRobert Watson 			uth.uth_size = kz->uk_size;
43337a52a97eSRobert Watson 			uth.uth_rsize = kz->uk_rsize;
4334bb45b411SGleb Smirnoff 			if (z->uz_max_items > 0)
4335bb45b411SGleb Smirnoff 				uth.uth_pages = (z->uz_items / kz->uk_ipers) *
4336bb15d1c7SGleb Smirnoff 					kz->uk_ppera;
4337bb45b411SGleb Smirnoff 			else
4338bb45b411SGleb Smirnoff 				uth.uth_pages = kz->uk_pages;
4339f8c86a5fSGleb Smirnoff 			uth.uth_maxpages = (z->uz_max_items / kz->uk_ipers) *
4340bb15d1c7SGleb Smirnoff 			    kz->uk_ppera;
4341bb15d1c7SGleb Smirnoff 			uth.uth_limit = z->uz_max_items;
4342f8c86a5fSGleb Smirnoff 			uth.uth_keg_free = z->uz_keg->uk_free;
4343cbbb4a00SRobert Watson 
4344cbbb4a00SRobert Watson 			/*
4345cbbb4a00SRobert Watson 			 * A zone is secondary is it is not the first entry
4346cbbb4a00SRobert Watson 			 * on the keg's zone list.
4347cbbb4a00SRobert Watson 			 */
4348e20a199fSJeff Roberson 			if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
4349cbbb4a00SRobert Watson 			    (LIST_FIRST(&kz->uk_zones) != z))
4350cbbb4a00SRobert Watson 				uth.uth_zone_flags = UTH_ZONE_SECONDARY;
4351b47acb0aSGleb Smirnoff 			uma_vm_zone_stats(&uth, z, &sbuf, ups,
4352b47acb0aSGleb Smirnoff 			    kz->uk_flags & UMA_ZFLAG_INTERNAL);
43532450bbb8SRobert Watson 			ZONE_UNLOCK(z);
435463b5d112SKonstantin Belousov 			(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
435563b5d112SKonstantin Belousov 			for (i = 0; i < mp_maxid + 1; i++)
435663b5d112SKonstantin Belousov 				(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
43577a52a97eSRobert Watson 		}
43587a52a97eSRobert Watson 	}
4359b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
4360b47acb0aSGleb Smirnoff 		bzero(&uth, sizeof(uth));
4361b47acb0aSGleb Smirnoff 		ZONE_LOCK(z);
4362b47acb0aSGleb Smirnoff 		strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
4363b47acb0aSGleb Smirnoff 		uth.uth_size = z->uz_size;
4364b47acb0aSGleb Smirnoff 		uma_vm_zone_stats(&uth, z, &sbuf, ups, false);
4365b47acb0aSGleb Smirnoff 		ZONE_UNLOCK(z);
4366b47acb0aSGleb Smirnoff 		(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
4367b47acb0aSGleb Smirnoff 		for (i = 0; i < mp_maxid + 1; i++)
4368b47acb0aSGleb Smirnoff 			(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
4369b47acb0aSGleb Smirnoff 	}
4370b47acb0aSGleb Smirnoff 
4371111fbcd5SBryan Venteicher 	rw_runlock(&uma_rwlock);
43724e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
43734e657159SMatthew D Fleming 	sbuf_delete(&sbuf);
437463b5d112SKonstantin Belousov 	free(ups, M_TEMP);
43757a52a97eSRobert Watson 	return (error);
43767a52a97eSRobert Watson }
437748c5777eSRobert Watson 
43780a5a3ccbSGleb Smirnoff int
43790a5a3ccbSGleb Smirnoff sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
43800a5a3ccbSGleb Smirnoff {
43810a5a3ccbSGleb Smirnoff 	uma_zone_t zone = *(uma_zone_t *)arg1;
438216be9f54SGleb Smirnoff 	int error, max;
43830a5a3ccbSGleb Smirnoff 
438416be9f54SGleb Smirnoff 	max = uma_zone_get_max(zone);
43850a5a3ccbSGleb Smirnoff 	error = sysctl_handle_int(oidp, &max, 0, req);
43860a5a3ccbSGleb Smirnoff 	if (error || !req->newptr)
43870a5a3ccbSGleb Smirnoff 		return (error);
43880a5a3ccbSGleb Smirnoff 
43890a5a3ccbSGleb Smirnoff 	uma_zone_set_max(zone, max);
43900a5a3ccbSGleb Smirnoff 
43910a5a3ccbSGleb Smirnoff 	return (0);
43920a5a3ccbSGleb Smirnoff }
43930a5a3ccbSGleb Smirnoff 
43940a5a3ccbSGleb Smirnoff int
43950a5a3ccbSGleb Smirnoff sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
43960a5a3ccbSGleb Smirnoff {
439720a4e154SJeff Roberson 	uma_zone_t zone;
43980a5a3ccbSGleb Smirnoff 	int cur;
43990a5a3ccbSGleb Smirnoff 
440020a4e154SJeff Roberson 	/*
440120a4e154SJeff Roberson 	 * Some callers want to add sysctls for global zones that
440220a4e154SJeff Roberson 	 * may not yet exist so they pass a pointer to a pointer.
440320a4e154SJeff Roberson 	 */
440420a4e154SJeff Roberson 	if (arg2 == 0)
440520a4e154SJeff Roberson 		zone = *(uma_zone_t *)arg1;
440620a4e154SJeff Roberson 	else
440720a4e154SJeff Roberson 		zone = arg1;
44080a5a3ccbSGleb Smirnoff 	cur = uma_zone_get_cur(zone);
44090a5a3ccbSGleb Smirnoff 	return (sysctl_handle_int(oidp, &cur, 0, req));
44100a5a3ccbSGleb Smirnoff }
44110a5a3ccbSGleb Smirnoff 
441220a4e154SJeff Roberson static int
441320a4e154SJeff Roberson sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS)
441420a4e154SJeff Roberson {
441520a4e154SJeff Roberson 	uma_zone_t zone = arg1;
441620a4e154SJeff Roberson 	uint64_t cur;
441720a4e154SJeff Roberson 
441820a4e154SJeff Roberson 	cur = uma_zone_get_allocs(zone);
441920a4e154SJeff Roberson 	return (sysctl_handle_64(oidp, &cur, 0, req));
442020a4e154SJeff Roberson }
442120a4e154SJeff Roberson 
442220a4e154SJeff Roberson static int
442320a4e154SJeff Roberson sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS)
442420a4e154SJeff Roberson {
442520a4e154SJeff Roberson 	uma_zone_t zone = arg1;
442620a4e154SJeff Roberson 	uint64_t cur;
442720a4e154SJeff Roberson 
442820a4e154SJeff Roberson 	cur = uma_zone_get_frees(zone);
442920a4e154SJeff Roberson 	return (sysctl_handle_64(oidp, &cur, 0, req));
443020a4e154SJeff Roberson }
443120a4e154SJeff Roberson 
44326d204a6aSRyan Libby static int
44336d204a6aSRyan Libby sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS)
44346d204a6aSRyan Libby {
44356d204a6aSRyan Libby 	struct sbuf sbuf;
44366d204a6aSRyan Libby 	uma_zone_t zone = arg1;
44376d204a6aSRyan Libby 	int error;
44386d204a6aSRyan Libby 
44396d204a6aSRyan Libby 	sbuf_new_for_sysctl(&sbuf, NULL, 0, req);
44406d204a6aSRyan Libby 	if (zone->uz_flags != 0)
44416d204a6aSRyan Libby 		sbuf_printf(&sbuf, "0x%b", zone->uz_flags, PRINT_UMA_ZFLAGS);
44426d204a6aSRyan Libby 	else
44436d204a6aSRyan Libby 		sbuf_printf(&sbuf, "0");
44446d204a6aSRyan Libby 	error = sbuf_finish(&sbuf);
44456d204a6aSRyan Libby 	sbuf_delete(&sbuf);
44466d204a6aSRyan Libby 
44476d204a6aSRyan Libby 	return (error);
44486d204a6aSRyan Libby }
44496d204a6aSRyan Libby 
4450f7af5015SRyan Libby static int
4451f7af5015SRyan Libby sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS)
4452f7af5015SRyan Libby {
4453f7af5015SRyan Libby 	uma_keg_t keg = arg1;
4454f7af5015SRyan Libby 	int avail, effpct, total;
4455f7af5015SRyan Libby 
4456f7af5015SRyan Libby 	total = keg->uk_ppera * PAGE_SIZE;
4457f7af5015SRyan Libby 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) != 0)
4458f7af5015SRyan Libby 		total += slab_sizeof(SLAB_MAX_SETSIZE);
4459f7af5015SRyan Libby 	/*
4460f7af5015SRyan Libby 	 * We consider the client's requested size and alignment here, not the
4461f7af5015SRyan Libby 	 * real size determination uk_rsize, because we also adjust the real
4462f7af5015SRyan Libby 	 * size for internal implementation reasons (max bitset size).
4463f7af5015SRyan Libby 	 */
4464f7af5015SRyan Libby 	avail = keg->uk_ipers * roundup2(keg->uk_size, keg->uk_align + 1);
4465f7af5015SRyan Libby 	if ((keg->uk_flags & UMA_ZONE_PCPU) != 0)
4466f7af5015SRyan Libby 		avail *= mp_maxid + 1;
4467f7af5015SRyan Libby 	effpct = 100 * avail / total;
4468f7af5015SRyan Libby 	return (sysctl_handle_int(oidp, &effpct, 0, req));
4469f7af5015SRyan Libby }
4470f7af5015SRyan Libby 
44719542ea7bSGleb Smirnoff #ifdef INVARIANTS
44729542ea7bSGleb Smirnoff static uma_slab_t
44739542ea7bSGleb Smirnoff uma_dbg_getslab(uma_zone_t zone, void *item)
44749542ea7bSGleb Smirnoff {
44759542ea7bSGleb Smirnoff 	uma_slab_t slab;
44769542ea7bSGleb Smirnoff 	uma_keg_t keg;
44779542ea7bSGleb Smirnoff 	uint8_t *mem;
44789542ea7bSGleb Smirnoff 
44799542ea7bSGleb Smirnoff 	mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
44809542ea7bSGleb Smirnoff 	if (zone->uz_flags & UMA_ZONE_VTOSLAB) {
44819542ea7bSGleb Smirnoff 		slab = vtoslab((vm_offset_t)mem);
44829542ea7bSGleb Smirnoff 	} else {
44839542ea7bSGleb Smirnoff 		/*
44849542ea7bSGleb Smirnoff 		 * It is safe to return the slab here even though the
44859542ea7bSGleb Smirnoff 		 * zone is unlocked because the item's allocation state
44869542ea7bSGleb Smirnoff 		 * essentially holds a reference.
44879542ea7bSGleb Smirnoff 		 */
4488bb15d1c7SGleb Smirnoff 		if (zone->uz_lockptr == &zone->uz_lock)
4489bb15d1c7SGleb Smirnoff 			return (NULL);
44909542ea7bSGleb Smirnoff 		ZONE_LOCK(zone);
4491bb15d1c7SGleb Smirnoff 		keg = zone->uz_keg;
44929542ea7bSGleb Smirnoff 		if (keg->uk_flags & UMA_ZONE_HASH)
44939542ea7bSGleb Smirnoff 			slab = hash_sfind(&keg->uk_hash, mem);
44949542ea7bSGleb Smirnoff 		else
44959542ea7bSGleb Smirnoff 			slab = (uma_slab_t)(mem + keg->uk_pgoff);
44969542ea7bSGleb Smirnoff 		ZONE_UNLOCK(zone);
44979542ea7bSGleb Smirnoff 	}
44989542ea7bSGleb Smirnoff 
44999542ea7bSGleb Smirnoff 	return (slab);
45009542ea7bSGleb Smirnoff }
45019542ea7bSGleb Smirnoff 
4502c5deaf04SGleb Smirnoff static bool
4503c5deaf04SGleb Smirnoff uma_dbg_zskip(uma_zone_t zone, void *mem)
4504c5deaf04SGleb Smirnoff {
4505c5deaf04SGleb Smirnoff 
4506bb15d1c7SGleb Smirnoff 	if (zone->uz_lockptr == &zone->uz_lock)
4507c5deaf04SGleb Smirnoff 		return (true);
4508c5deaf04SGleb Smirnoff 
4509bb15d1c7SGleb Smirnoff 	return (uma_dbg_kskip(zone->uz_keg, mem));
4510c5deaf04SGleb Smirnoff }
4511c5deaf04SGleb Smirnoff 
4512c5deaf04SGleb Smirnoff static bool
4513c5deaf04SGleb Smirnoff uma_dbg_kskip(uma_keg_t keg, void *mem)
4514c5deaf04SGleb Smirnoff {
4515c5deaf04SGleb Smirnoff 	uintptr_t idx;
4516c5deaf04SGleb Smirnoff 
4517c5deaf04SGleb Smirnoff 	if (dbg_divisor == 0)
4518c5deaf04SGleb Smirnoff 		return (true);
4519c5deaf04SGleb Smirnoff 
4520c5deaf04SGleb Smirnoff 	if (dbg_divisor == 1)
4521c5deaf04SGleb Smirnoff 		return (false);
4522c5deaf04SGleb Smirnoff 
4523c5deaf04SGleb Smirnoff 	idx = (uintptr_t)mem >> PAGE_SHIFT;
4524c5deaf04SGleb Smirnoff 	if (keg->uk_ipers > 1) {
4525c5deaf04SGleb Smirnoff 		idx *= keg->uk_ipers;
4526c5deaf04SGleb Smirnoff 		idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize;
4527c5deaf04SGleb Smirnoff 	}
4528c5deaf04SGleb Smirnoff 
4529c5deaf04SGleb Smirnoff 	if ((idx / dbg_divisor) * dbg_divisor != idx) {
4530c5deaf04SGleb Smirnoff 		counter_u64_add(uma_skip_cnt, 1);
4531c5deaf04SGleb Smirnoff 		return (true);
4532c5deaf04SGleb Smirnoff 	}
4533c5deaf04SGleb Smirnoff 	counter_u64_add(uma_dbg_cnt, 1);
4534c5deaf04SGleb Smirnoff 
4535c5deaf04SGleb Smirnoff 	return (false);
4536c5deaf04SGleb Smirnoff }
4537c5deaf04SGleb Smirnoff 
45389542ea7bSGleb Smirnoff /*
45399542ea7bSGleb Smirnoff  * Set up the slab's freei data such that uma_dbg_free can function.
45409542ea7bSGleb Smirnoff  *
45419542ea7bSGleb Smirnoff  */
45429542ea7bSGleb Smirnoff static void
45439542ea7bSGleb Smirnoff uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
45449542ea7bSGleb Smirnoff {
45459542ea7bSGleb Smirnoff 	uma_keg_t keg;
45469542ea7bSGleb Smirnoff 	int freei;
45479542ea7bSGleb Smirnoff 
45489542ea7bSGleb Smirnoff 	if (slab == NULL) {
45499542ea7bSGleb Smirnoff 		slab = uma_dbg_getslab(zone, item);
45509542ea7bSGleb Smirnoff 		if (slab == NULL)
45519542ea7bSGleb Smirnoff 			panic("uma: item %p did not belong to zone %s\n",
45529542ea7bSGleb Smirnoff 			    item, zone->uz_name);
45539542ea7bSGleb Smirnoff 	}
4554584061b4SJeff Roberson 	keg = zone->uz_keg;
45551e0701e1SJeff Roberson 	freei = slab_item_index(slab, keg, item);
45569542ea7bSGleb Smirnoff 
4557815db204SRyan Libby 	if (BIT_ISSET(keg->uk_ipers, freei, slab_dbg_bits(slab, keg)))
45589542ea7bSGleb Smirnoff 		panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
45599542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
4560815db204SRyan Libby 	BIT_SET_ATOMIC(keg->uk_ipers, freei, slab_dbg_bits(slab, keg));
45619542ea7bSGleb Smirnoff }
45629542ea7bSGleb Smirnoff 
45639542ea7bSGleb Smirnoff /*
45649542ea7bSGleb Smirnoff  * Verifies freed addresses.  Checks for alignment, valid slab membership
45659542ea7bSGleb Smirnoff  * and duplicate frees.
45669542ea7bSGleb Smirnoff  *
45679542ea7bSGleb Smirnoff  */
45689542ea7bSGleb Smirnoff static void
45699542ea7bSGleb Smirnoff uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
45709542ea7bSGleb Smirnoff {
45719542ea7bSGleb Smirnoff 	uma_keg_t keg;
45729542ea7bSGleb Smirnoff 	int freei;
45739542ea7bSGleb Smirnoff 
45749542ea7bSGleb Smirnoff 	if (slab == NULL) {
45759542ea7bSGleb Smirnoff 		slab = uma_dbg_getslab(zone, item);
45769542ea7bSGleb Smirnoff 		if (slab == NULL)
45779542ea7bSGleb Smirnoff 			panic("uma: Freed item %p did not belong to zone %s\n",
45789542ea7bSGleb Smirnoff 			    item, zone->uz_name);
45799542ea7bSGleb Smirnoff 	}
4580584061b4SJeff Roberson 	keg = zone->uz_keg;
45811e0701e1SJeff Roberson 	freei = slab_item_index(slab, keg, item);
45829542ea7bSGleb Smirnoff 
45839542ea7bSGleb Smirnoff 	if (freei >= keg->uk_ipers)
45849542ea7bSGleb Smirnoff 		panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
45859542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
45869542ea7bSGleb Smirnoff 
45871e0701e1SJeff Roberson 	if (slab_item(slab, keg, freei) != item)
45889542ea7bSGleb Smirnoff 		panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
45899542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
45909542ea7bSGleb Smirnoff 
4591815db204SRyan Libby 	if (!BIT_ISSET(keg->uk_ipers, freei, slab_dbg_bits(slab, keg)))
45929542ea7bSGleb Smirnoff 		panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
45939542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
45949542ea7bSGleb Smirnoff 
4595815db204SRyan Libby 	BIT_CLR_ATOMIC(keg->uk_ipers, freei, slab_dbg_bits(slab, keg));
45969542ea7bSGleb Smirnoff }
45979542ea7bSGleb Smirnoff #endif /* INVARIANTS */
45989542ea7bSGleb Smirnoff 
459948c5777eSRobert Watson #ifdef DDB
460046d70077SConrad Meyer static int64_t
460146d70077SConrad Meyer get_uma_stats(uma_keg_t kz, uma_zone_t z, uint64_t *allocs, uint64_t *used,
46020223790fSConrad Meyer     uint64_t *sleeps, long *cachefree, uint64_t *xdomain)
460348c5777eSRobert Watson {
460446d70077SConrad Meyer 	uint64_t frees;
46050f9b7bf3SMark Johnston 	int i;
460648c5777eSRobert Watson 
460748c5777eSRobert Watson 	if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
460846d70077SConrad Meyer 		*allocs = counter_u64_fetch(z->uz_allocs);
46092efcc8cbSGleb Smirnoff 		frees = counter_u64_fetch(z->uz_frees);
461046d70077SConrad Meyer 		*sleeps = z->uz_sleeps;
461146d70077SConrad Meyer 		*cachefree = 0;
461246d70077SConrad Meyer 		*xdomain = 0;
461348c5777eSRobert Watson 	} else
461446d70077SConrad Meyer 		uma_zone_sumstat(z, cachefree, allocs, &frees, sleeps,
461546d70077SConrad Meyer 		    xdomain);
4616e20a199fSJeff Roberson 	if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
461748c5777eSRobert Watson 	    (LIST_FIRST(&kz->uk_zones) != z)))
461846d70077SConrad Meyer 		*cachefree += kz->uk_free;
46190f9b7bf3SMark Johnston 	for (i = 0; i < vm_ndomains; i++)
462046d70077SConrad Meyer 		*cachefree += z->uz_domain[i].uzd_nitems;
462146d70077SConrad Meyer 	*used = *allocs - frees;
462246d70077SConrad Meyer 	return (((int64_t)*used + *cachefree) * kz->uk_size);
462346d70077SConrad Meyer }
46240f9b7bf3SMark Johnston 
462546d70077SConrad Meyer DB_SHOW_COMMAND(uma, db_show_uma)
462646d70077SConrad Meyer {
462746d70077SConrad Meyer 	const char *fmt_hdr, *fmt_entry;
462846d70077SConrad Meyer 	uma_keg_t kz;
462946d70077SConrad Meyer 	uma_zone_t z;
463046d70077SConrad Meyer 	uint64_t allocs, used, sleeps, xdomain;
463146d70077SConrad Meyer 	long cachefree;
463246d70077SConrad Meyer 	/* variables for sorting */
463346d70077SConrad Meyer 	uma_keg_t cur_keg;
463446d70077SConrad Meyer 	uma_zone_t cur_zone, last_zone;
463546d70077SConrad Meyer 	int64_t cur_size, last_size, size;
463646d70077SConrad Meyer 	int ties;
463746d70077SConrad Meyer 
463846d70077SConrad Meyer 	/* /i option produces machine-parseable CSV output */
463946d70077SConrad Meyer 	if (modif[0] == 'i') {
464046d70077SConrad Meyer 		fmt_hdr = "%s,%s,%s,%s,%s,%s,%s,%s,%s\n";
464146d70077SConrad Meyer 		fmt_entry = "\"%s\",%ju,%jd,%ld,%ju,%ju,%u,%jd,%ju\n";
464246d70077SConrad Meyer 	} else {
464346d70077SConrad Meyer 		fmt_hdr = "%18s %6s %7s %7s %11s %7s %7s %10s %8s\n";
464446d70077SConrad Meyer 		fmt_entry = "%18s %6ju %7jd %7ld %11ju %7ju %7u %10jd %8ju\n";
464546d70077SConrad Meyer 	}
464646d70077SConrad Meyer 
464746d70077SConrad Meyer 	db_printf(fmt_hdr, "Zone", "Size", "Used", "Free", "Requests",
464846d70077SConrad Meyer 	    "Sleeps", "Bucket", "Total Mem", "XFree");
464946d70077SConrad Meyer 
465046d70077SConrad Meyer 	/* Sort the zones with largest size first. */
465146d70077SConrad Meyer 	last_zone = NULL;
465246d70077SConrad Meyer 	last_size = INT64_MAX;
465346d70077SConrad Meyer 	for (;;) {
465446d70077SConrad Meyer 		cur_zone = NULL;
465546d70077SConrad Meyer 		cur_size = -1;
465646d70077SConrad Meyer 		ties = 0;
465746d70077SConrad Meyer 		LIST_FOREACH(kz, &uma_kegs, uk_link) {
465846d70077SConrad Meyer 			LIST_FOREACH(z, &kz->uk_zones, uz_link) {
465946d70077SConrad Meyer 				/*
466046d70077SConrad Meyer 				 * In the case of size ties, print out zones
466146d70077SConrad Meyer 				 * in the order they are encountered.  That is,
466246d70077SConrad Meyer 				 * when we encounter the most recently output
466346d70077SConrad Meyer 				 * zone, we have already printed all preceding
466446d70077SConrad Meyer 				 * ties, and we must print all following ties.
466546d70077SConrad Meyer 				 */
466646d70077SConrad Meyer 				if (z == last_zone) {
466746d70077SConrad Meyer 					ties = 1;
466846d70077SConrad Meyer 					continue;
466946d70077SConrad Meyer 				}
467046d70077SConrad Meyer 				size = get_uma_stats(kz, z, &allocs, &used,
467146d70077SConrad Meyer 				    &sleeps, &cachefree, &xdomain);
467246d70077SConrad Meyer 				if (size > cur_size && size < last_size + ties)
467346d70077SConrad Meyer 				{
467446d70077SConrad Meyer 					cur_size = size;
467546d70077SConrad Meyer 					cur_zone = z;
467646d70077SConrad Meyer 					cur_keg = kz;
467746d70077SConrad Meyer 				}
467846d70077SConrad Meyer 			}
467946d70077SConrad Meyer 		}
468046d70077SConrad Meyer 		if (cur_zone == NULL)
468146d70077SConrad Meyer 			break;
468246d70077SConrad Meyer 
468346d70077SConrad Meyer 		size = get_uma_stats(cur_keg, cur_zone, &allocs, &used,
468446d70077SConrad Meyer 		    &sleeps, &cachefree, &xdomain);
468546d70077SConrad Meyer 		db_printf(fmt_entry, cur_zone->uz_name,
468646d70077SConrad Meyer 		    (uintmax_t)cur_keg->uk_size, (intmax_t)used, cachefree,
468746d70077SConrad Meyer 		    (uintmax_t)allocs, (uintmax_t)sleeps,
468820a4e154SJeff Roberson 		    (unsigned)cur_zone->uz_bucket_size, (intmax_t)size,
468920a4e154SJeff Roberson 		    xdomain);
469046d70077SConrad Meyer 
4691687c94aaSJohn Baldwin 		if (db_pager_quit)
4692687c94aaSJohn Baldwin 			return;
469346d70077SConrad Meyer 		last_zone = cur_zone;
469446d70077SConrad Meyer 		last_size = cur_size;
469548c5777eSRobert Watson 	}
469648c5777eSRobert Watson }
469703175483SAlexander Motin 
469803175483SAlexander Motin DB_SHOW_COMMAND(umacache, db_show_umacache)
469903175483SAlexander Motin {
470003175483SAlexander Motin 	uma_zone_t z;
4701ab3185d1SJeff Roberson 	uint64_t allocs, frees;
47020f9b7bf3SMark Johnston 	long cachefree;
47030f9b7bf3SMark Johnston 	int i;
470403175483SAlexander Motin 
470503175483SAlexander Motin 	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
470603175483SAlexander Motin 	    "Requests", "Bucket");
470703175483SAlexander Motin 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
4708c1685086SJeff Roberson 		uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL, NULL);
47090f9b7bf3SMark Johnston 		for (i = 0; i < vm_ndomains; i++)
47100f9b7bf3SMark Johnston 			cachefree += z->uz_domain[i].uzd_nitems;
47110f9b7bf3SMark Johnston 		db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
471203175483SAlexander Motin 		    z->uz_name, (uintmax_t)z->uz_size,
471303175483SAlexander Motin 		    (intmax_t)(allocs - frees), cachefree,
471420a4e154SJeff Roberson 		    (uintmax_t)allocs, z->uz_bucket_size);
471503175483SAlexander Motin 		if (db_pager_quit)
471603175483SAlexander Motin 			return;
471703175483SAlexander Motin 	}
471803175483SAlexander Motin }
47199542ea7bSGleb Smirnoff #endif	/* DDB */
4720