xref: /freebsd/sys/vm/uma_core.c (revision aabe13f1450bb4caba66ec2a7a41c0dfefff511d)
160727d8bSWarner Losh /*-
2fe267a55SPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3fe267a55SPedro F. Giffuni  *
4584061b4SJeff Roberson  * Copyright (c) 2002-2019 Jeffrey Roberson <jeff@FreeBSD.org>
508ecce74SRobert Watson  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6ae4e9636SRobert Watson  * Copyright (c) 2004-2006 Robert N. M. Watson
708ecce74SRobert Watson  * All rights reserved.
88355f576SJeff Roberson  *
98355f576SJeff Roberson  * Redistribution and use in source and binary forms, with or without
108355f576SJeff Roberson  * modification, are permitted provided that the following conditions
118355f576SJeff Roberson  * are met:
128355f576SJeff Roberson  * 1. Redistributions of source code must retain the above copyright
138355f576SJeff Roberson  *    notice unmodified, this list of conditions, and the following
148355f576SJeff Roberson  *    disclaimer.
158355f576SJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
168355f576SJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
178355f576SJeff Roberson  *    documentation and/or other materials provided with the distribution.
188355f576SJeff Roberson  *
198355f576SJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
208355f576SJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
218355f576SJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
228355f576SJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
238355f576SJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
248355f576SJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
258355f576SJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
268355f576SJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
278355f576SJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
288355f576SJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
298355f576SJeff Roberson  */
308355f576SJeff Roberson 
318355f576SJeff Roberson /*
328355f576SJeff Roberson  * uma_core.c  Implementation of the Universal Memory allocator
338355f576SJeff Roberson  *
348355f576SJeff Roberson  * This allocator is intended to replace the multitude of similar object caches
358355f576SJeff Roberson  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
36763df3ecSPedro F. Giffuni  * efficient.  A primary design goal is to return unused memory to the rest of
378355f576SJeff Roberson  * the system.  This will make the system as a whole more flexible due to the
388355f576SJeff Roberson  * ability to move memory to subsystems which most need it instead of leaving
398355f576SJeff Roberson  * pools of reserved memory unused.
408355f576SJeff Roberson  *
418355f576SJeff Roberson  * The basic ideas stem from similar slab/zone based allocators whose algorithms
428355f576SJeff Roberson  * are well known.
438355f576SJeff Roberson  *
448355f576SJeff Roberson  */
458355f576SJeff Roberson 
468355f576SJeff Roberson /*
478355f576SJeff Roberson  * TODO:
488355f576SJeff Roberson  *	- Improve memory usage for large allocations
498355f576SJeff Roberson  *	- Investigate cache size adjustments
508355f576SJeff Roberson  */
518355f576SJeff Roberson 
52874651b1SDavid E. O'Brien #include <sys/cdefs.h>
53874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$");
54874651b1SDavid E. O'Brien 
5548c5777eSRobert Watson #include "opt_ddb.h"
568355f576SJeff Roberson #include "opt_param.h"
578d689e04SGleb Smirnoff #include "opt_vm.h"
5848c5777eSRobert Watson 
598355f576SJeff Roberson #include <sys/param.h>
608355f576SJeff Roberson #include <sys/systm.h>
6109c8cb71SMark Johnston #include <sys/asan.h>
62ef72505eSJeff Roberson #include <sys/bitset.h>
63194a979eSMark Johnston #include <sys/domainset.h>
649b43bc27SAndriy Gapon #include <sys/eventhandler.h>
658355f576SJeff Roberson #include <sys/kernel.h>
668355f576SJeff Roberson #include <sys/types.h>
67ad5b0f5bSJeff Roberson #include <sys/limits.h>
688355f576SJeff Roberson #include <sys/queue.h>
698355f576SJeff Roberson #include <sys/malloc.h>
703659f747SRobert Watson #include <sys/ktr.h>
718355f576SJeff Roberson #include <sys/lock.h>
728355f576SJeff Roberson #include <sys/sysctl.h>
738355f576SJeff Roberson #include <sys/mutex.h>
744c1cc01cSJohn Baldwin #include <sys/proc.h>
7510cb2424SMark Murray #include <sys/random.h>
7689f6b863SAttilio Rao #include <sys/rwlock.h>
777a52a97eSRobert Watson #include <sys/sbuf.h>
78a2de44abSAlexander Motin #include <sys/sched.h>
794bd61e19SJeff Roberson #include <sys/sleepqueue.h>
808355f576SJeff Roberson #include <sys/smp.h>
81d4665eaaSJeff Roberson #include <sys/smr.h>
82e60b2fcbSGleb Smirnoff #include <sys/taskqueue.h>
8386bbae32SJeff Roberson #include <sys/vmmeter.h>
8486bbae32SJeff Roberson 
858355f576SJeff Roberson #include <vm/vm.h>
866f3b523cSKonstantin Belousov #include <vm/vm_param.h>
87194a979eSMark Johnston #include <vm/vm_domainset.h>
888355f576SJeff Roberson #include <vm/vm_object.h>
898355f576SJeff Roberson #include <vm/vm_page.h>
90a4915c21SAttilio Rao #include <vm/vm_pageout.h>
91ab3185d1SJeff Roberson #include <vm/vm_phys.h>
9230c5525bSAndrew Gallatin #include <vm/vm_pagequeue.h>
938355f576SJeff Roberson #include <vm/vm_map.h>
948355f576SJeff Roberson #include <vm/vm_kern.h>
958355f576SJeff Roberson #include <vm/vm_extern.h>
966f3b523cSKonstantin Belousov #include <vm/vm_dumpset.h>
978355f576SJeff Roberson #include <vm/uma.h>
988355f576SJeff Roberson #include <vm/uma_int.h>
99639c9550SJeff Roberson #include <vm/uma_dbg.h>
1008355f576SJeff Roberson 
10148c5777eSRobert Watson #include <ddb/ddb.h>
10248c5777eSRobert Watson 
1038d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
1048d689e04SGleb Smirnoff #include <vm/memguard.h>
1058d689e04SGleb Smirnoff #endif
1068d689e04SGleb Smirnoff 
107a81c400eSJeff Roberson #include <machine/md_var.h>
108a81c400eSJeff Roberson 
109d4665eaaSJeff Roberson #ifdef INVARIANTS
110d4665eaaSJeff Roberson #define	UMA_ALWAYS_CTORDTOR	1
111d4665eaaSJeff Roberson #else
112d4665eaaSJeff Roberson #define	UMA_ALWAYS_CTORDTOR	0
113d4665eaaSJeff Roberson #endif
114d4665eaaSJeff Roberson 
1158355f576SJeff Roberson /*
116ab3185d1SJeff Roberson  * This is the zone and keg from which all zones are spawned.
1178355f576SJeff Roberson  */
118ab3185d1SJeff Roberson static uma_zone_t kegs;
119ab3185d1SJeff Roberson static uma_zone_t zones;
1208355f576SJeff Roberson 
1219b8db4d0SRyan Libby /*
12254007ce8SMark Johnston  * On INVARIANTS builds, the slab contains a second bitset of the same size,
12354007ce8SMark Johnston  * "dbg_bits", which is laid out immediately after us_free.
12454007ce8SMark Johnston  */
12554007ce8SMark Johnston #ifdef INVARIANTS
12654007ce8SMark Johnston #define	SLAB_BITSETS	2
12754007ce8SMark Johnston #else
12854007ce8SMark Johnston #define	SLAB_BITSETS	1
12954007ce8SMark Johnston #endif
13054007ce8SMark Johnston 
13154007ce8SMark Johnston /*
1329b8db4d0SRyan Libby  * These are the two zones from which all offpage uma_slab_ts are allocated.
1339b8db4d0SRyan Libby  *
1349b8db4d0SRyan Libby  * One zone is for slab headers that can represent a larger number of items,
1359b8db4d0SRyan Libby  * making the slabs themselves more efficient, and the other zone is for
1369b8db4d0SRyan Libby  * headers that are smaller and represent fewer items, making the headers more
1379b8db4d0SRyan Libby  * efficient.
1389b8db4d0SRyan Libby  */
1399b8db4d0SRyan Libby #define	SLABZONE_SIZE(setsize)					\
1409b8db4d0SRyan Libby     (sizeof(struct uma_hash_slab) + BITSET_SIZE(setsize) * SLAB_BITSETS)
1419b8db4d0SRyan Libby #define	SLABZONE0_SETSIZE	(PAGE_SIZE / 16)
1429b8db4d0SRyan Libby #define	SLABZONE1_SETSIZE	SLAB_MAX_SETSIZE
1439b8db4d0SRyan Libby #define	SLABZONE0_SIZE	SLABZONE_SIZE(SLABZONE0_SETSIZE)
1449b8db4d0SRyan Libby #define	SLABZONE1_SIZE	SLABZONE_SIZE(SLABZONE1_SETSIZE)
1459b8db4d0SRyan Libby static uma_zone_t slabzones[2];
1468355f576SJeff Roberson 
1478355f576SJeff Roberson /*
1488355f576SJeff Roberson  * The initial hash tables come out of this zone so they can be allocated
1498355f576SJeff Roberson  * prior to malloc coming up.
1508355f576SJeff Roberson  */
1518355f576SJeff Roberson static uma_zone_t hashzone;
1528355f576SJeff Roberson 
1531e319f6dSRobert Watson /* The boot-time adjusted value for cache line alignment. */
154e4cd31ddSJeff Roberson int uma_align_cache = 64 - 1;
1551e319f6dSRobert Watson 
156961647dfSJeff Roberson static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
15720a4e154SJeff Roberson static MALLOC_DEFINE(M_UMA, "UMA", "UMA Misc");
158961647dfSJeff Roberson 
1598355f576SJeff Roberson /*
16086bbae32SJeff Roberson  * Are we allowed to allocate buckets?
16186bbae32SJeff Roberson  */
16286bbae32SJeff Roberson static int bucketdisable = 1;
16386bbae32SJeff Roberson 
164099a0e58SBosko Milekic /* Linked list of all kegs in the system */
16513e403fdSAntoine Brodin static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
1668355f576SJeff Roberson 
16703175483SAlexander Motin /* Linked list of all cache-only zones in the system */
16803175483SAlexander Motin static LIST_HEAD(,uma_zone) uma_cachezones =
16903175483SAlexander Motin     LIST_HEAD_INITIALIZER(uma_cachezones);
17003175483SAlexander Motin 
171*aabe13f1SMark Johnston /*
172*aabe13f1SMark Johnston  * Mutex for global lists: uma_kegs, uma_cachezones, and the per-keg list of
173*aabe13f1SMark Johnston  * zones.
174*aabe13f1SMark Johnston  */
175fe933c1dSMateusz Guzik static struct rwlock_padalign __exclusive_cache_line uma_rwlock;
1768355f576SJeff Roberson 
177*aabe13f1SMark Johnston static struct sx uma_reclaim_lock;
178*aabe13f1SMark Johnston 
179ac0a6fd0SGleb Smirnoff /*
180a81c400eSJeff Roberson  * First available virual address for boot time allocations.
181ac0a6fd0SGleb Smirnoff  */
182a81c400eSJeff Roberson static vm_offset_t bootstart;
183a81c400eSJeff Roberson static vm_offset_t bootmem;
1848355f576SJeff Roberson 
185fbd95859SMark Johnston /*
186fbd95859SMark Johnston  * kmem soft limit, initialized by uma_set_limit().  Ensure that early
187fbd95859SMark Johnston  * allocations don't trigger a wakeup of the reclaim thread.
188fbd95859SMark Johnston  */
1896d6a03d7SJeff Roberson unsigned long uma_kmem_limit = LONG_MAX;
190fbd95859SMark Johnston SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_limit, CTLFLAG_RD, &uma_kmem_limit, 0,
191fbd95859SMark Johnston     "UMA kernel memory soft limit");
1926d6a03d7SJeff Roberson unsigned long uma_kmem_total;
193fbd95859SMark Johnston SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD, &uma_kmem_total, 0,
194fbd95859SMark Johnston     "UMA kernel memory usage");
1952e47807cSJeff Roberson 
1968355f576SJeff Roberson /* Is the VM done starting up? */
197860bb7a0SMark Johnston static enum {
198860bb7a0SMark Johnston 	BOOT_COLD,
199a81c400eSJeff Roberson 	BOOT_KVA,
200dc2b3205SMark Johnston 	BOOT_PCPU,
201860bb7a0SMark Johnston 	BOOT_RUNNING,
202860bb7a0SMark Johnston 	BOOT_SHUTDOWN,
203860bb7a0SMark Johnston } booted = BOOT_COLD;
2048355f576SJeff Roberson 
205ef72505eSJeff Roberson /*
2069643769aSJeff Roberson  * This is the handle used to schedule events that need to happen
2079643769aSJeff Roberson  * outside of the allocation fast path.
2089643769aSJeff Roberson  */
2098355f576SJeff Roberson static struct callout uma_callout;
2109643769aSJeff Roberson #define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
2118355f576SJeff Roberson 
2128355f576SJeff Roberson /*
2138355f576SJeff Roberson  * This structure is passed as the zone ctor arg so that I don't have to create
2148355f576SJeff Roberson  * a special allocation function just for zones.
2158355f576SJeff Roberson  */
2168355f576SJeff Roberson struct uma_zctor_args {
217bb196eb4SMatthew D Fleming 	const char *name;
218c3bdc05fSAndrew R. Reiter 	size_t size;
2198355f576SJeff Roberson 	uma_ctor ctor;
2208355f576SJeff Roberson 	uma_dtor dtor;
2218355f576SJeff Roberson 	uma_init uminit;
2228355f576SJeff Roberson 	uma_fini fini;
2230095a784SJeff Roberson 	uma_import import;
2240095a784SJeff Roberson 	uma_release release;
2250095a784SJeff Roberson 	void *arg;
226099a0e58SBosko Milekic 	uma_keg_t keg;
227099a0e58SBosko Milekic 	int align;
22885dcf349SGleb Smirnoff 	uint32_t flags;
229099a0e58SBosko Milekic };
230099a0e58SBosko Milekic 
231099a0e58SBosko Milekic struct uma_kctor_args {
232099a0e58SBosko Milekic 	uma_zone_t zone;
233099a0e58SBosko Milekic 	size_t size;
234099a0e58SBosko Milekic 	uma_init uminit;
235099a0e58SBosko Milekic 	uma_fini fini;
2368355f576SJeff Roberson 	int align;
23785dcf349SGleb Smirnoff 	uint32_t flags;
2388355f576SJeff Roberson };
2398355f576SJeff Roberson 
240cae33c14SJeff Roberson struct uma_bucket_zone {
241cae33c14SJeff Roberson 	uma_zone_t	ubz_zone;
242eaa17d42SRyan Libby 	const char	*ubz_name;
243fc03d22bSJeff Roberson 	int		ubz_entries;	/* Number of items it can hold. */
244fc03d22bSJeff Roberson 	int		ubz_maxsize;	/* Maximum allocation size per-item. */
245cae33c14SJeff Roberson };
246cae33c14SJeff Roberson 
247f9d27e75SRobert Watson /*
248fc03d22bSJeff Roberson  * Compute the actual number of bucket entries to pack them in power
249fc03d22bSJeff Roberson  * of two sizes for more efficient space utilization.
250f9d27e75SRobert Watson  */
251fc03d22bSJeff Roberson #define	BUCKET_SIZE(n)						\
252fc03d22bSJeff Roberson     (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
253fc03d22bSJeff Roberson 
2541aa6c758SAlexander Motin #define	BUCKET_MAX	BUCKET_SIZE(256)
255fc03d22bSJeff Roberson 
256fc03d22bSJeff Roberson struct uma_bucket_zone bucket_zones[] = {
257e84130a0SJeff Roberson 	/* Literal bucket sizes. */
258e84130a0SJeff Roberson 	{ NULL, "2 Bucket", 2, 4096 },
259e84130a0SJeff Roberson 	{ NULL, "4 Bucket", 4, 3072 },
260e84130a0SJeff Roberson 	{ NULL, "8 Bucket", 8, 2048 },
261e84130a0SJeff Roberson 	{ NULL, "16 Bucket", 16, 1024 },
262e84130a0SJeff Roberson 	/* Rounded down power of 2 sizes for efficiency. */
263fc03d22bSJeff Roberson 	{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
264fc03d22bSJeff Roberson 	{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
265fc03d22bSJeff Roberson 	{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
2661aa6c758SAlexander Motin 	{ NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
267fc03d22bSJeff Roberson 	{ NULL, NULL, 0}
268fc03d22bSJeff Roberson };
269cae33c14SJeff Roberson 
2702019094aSRobert Watson /*
2712019094aSRobert Watson  * Flags and enumerations to be passed to internal functions.
2722019094aSRobert Watson  */
273bb15d1c7SGleb Smirnoff enum zfreeskip {
274bb15d1c7SGleb Smirnoff 	SKIP_NONE =	0,
275bb15d1c7SGleb Smirnoff 	SKIP_CNT =	0x00000001,
276bb15d1c7SGleb Smirnoff 	SKIP_DTOR =	0x00010000,
277bb15d1c7SGleb Smirnoff 	SKIP_FINI =	0x00020000,
278bb15d1c7SGleb Smirnoff };
279b23f72e9SBrian Feldman 
2808355f576SJeff Roberson /* Prototypes.. */
2818355f576SJeff Roberson 
282a81c400eSJeff Roberson void	uma_startup1(vm_offset_t);
283f4bef67cSGleb Smirnoff void	uma_startup2(void);
284f4bef67cSGleb Smirnoff 
285ab3185d1SJeff Roberson static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
286ab3185d1SJeff Roberson static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
287ab3059a8SMatt Macy static void *pcpu_page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
288ab3185d1SJeff Roberson static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
289ec0d8280SRyan Libby static void *contig_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
290f2c2231eSRyan Stone static void page_free(void *, vm_size_t, uint8_t);
291ab3059a8SMatt Macy static void pcpu_page_free(void *, vm_size_t, uint8_t);
29286220393SMark Johnston static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int);
2939643769aSJeff Roberson static void cache_drain(uma_zone_t);
2948355f576SJeff Roberson static void bucket_drain(uma_zone_t, uma_bucket_t);
295*aabe13f1SMark Johnston static void bucket_cache_reclaim(uma_zone_t zone, bool, int);
296b23f72e9SBrian Feldman static int keg_ctor(void *, int, void *, int);
297099a0e58SBosko Milekic static void keg_dtor(void *, int, void *);
298b23f72e9SBrian Feldman static int zone_ctor(void *, int, void *, int);
2999c2cd7e5SJeff Roberson static void zone_dtor(void *, int, void *);
300d4665eaaSJeff Roberson static inline void item_dtor(uma_zone_t zone, void *item, int size,
301d4665eaaSJeff Roberson     void *udata, enum zfreeskip skip);
302b23f72e9SBrian Feldman static int zero_init(void *, int, int);
303c6fd3e23SJeff Roberson static void zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
304c6fd3e23SJeff Roberson     int itemdomain, bool ws);
30520a4e154SJeff Roberson static void zone_foreach(void (*zfunc)(uma_zone_t, void *), void *);
306a81c400eSJeff Roberson static void zone_foreach_unlocked(void (*zfunc)(uma_zone_t, void *), void *);
30720a4e154SJeff Roberson static void zone_timeout(uma_zone_t zone, void *);
3083b2f2cb8SAlexander Motin static int hash_alloc(struct uma_hash *, u_int);
3090aef6126SJeff Roberson static int hash_expand(struct uma_hash *, struct uma_hash *);
3100aef6126SJeff Roberson static void hash_free(struct uma_hash *hash);
3118355f576SJeff Roberson static void uma_timeout(void *);
312860bb7a0SMark Johnston static void uma_shutdown(void);
313ab3185d1SJeff Roberson static void *zone_alloc_item(uma_zone_t, void *, int, int);
3140095a784SJeff Roberson static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
3154bd61e19SJeff Roberson static int zone_alloc_limit(uma_zone_t zone, int count, int flags);
3164bd61e19SJeff Roberson static void zone_free_limit(uma_zone_t zone, int count);
31786bbae32SJeff Roberson static void bucket_enable(void);
318cae33c14SJeff Roberson static void bucket_init(void);
3196fd34d6fSJeff Roberson static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
3206fd34d6fSJeff Roberson static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
321*aabe13f1SMark Johnston static void bucket_zone_drain(int domain);
322beb8beefSJeff Roberson static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
3230095a784SJeff Roberson static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
324bb15d1c7SGleb Smirnoff static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item);
32509c8cb71SMark Johnston static size_t slab_sizeof(int nitems);
326e20a199fSJeff Roberson static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
32785dcf349SGleb Smirnoff     uma_fini fini, int align, uint32_t flags);
328b75c4efcSAndrew Turner static int zone_import(void *, void **, int, int, int);
329b75c4efcSAndrew Turner static void zone_release(void *, void **, int);
330beb8beefSJeff Roberson static bool cache_alloc(uma_zone_t, uma_cache_t, void *, int);
3310a81b439SJeff Roberson static bool cache_free(uma_zone_t, uma_cache_t, void *, void *, int);
332bbee39c6SJeff Roberson 
3337a52a97eSRobert Watson static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
3347a52a97eSRobert Watson static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
33520a4e154SJeff Roberson static int sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS);
33620a4e154SJeff Roberson static int sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS);
3376d204a6aSRyan Libby static int sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS);
338f7af5015SRyan Libby static int sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS);
3394bd61e19SJeff Roberson static int sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS);
3408355f576SJeff Roberson 
34131c251a0SJeff Roberson static uint64_t uma_zone_get_allocs(uma_zone_t zone);
34231c251a0SJeff Roberson 
3437029da5cSPawel Biernacki static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
34433e5a1eaSRyan Libby     "Memory allocation debugging");
34533e5a1eaSRyan Libby 
3469542ea7bSGleb Smirnoff #ifdef INVARIANTS
34731c251a0SJeff Roberson static uint64_t uma_keg_get_allocs(uma_keg_t zone);
348815db204SRyan Libby static inline struct noslabbits *slab_dbg_bits(uma_slab_t slab, uma_keg_t keg);
349815db204SRyan Libby 
350c5deaf04SGleb Smirnoff static bool uma_dbg_kskip(uma_keg_t keg, void *mem);
351c5deaf04SGleb Smirnoff static bool uma_dbg_zskip(uma_zone_t zone, void *mem);
3529542ea7bSGleb Smirnoff static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
3539542ea7bSGleb Smirnoff static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
354c5deaf04SGleb Smirnoff 
355c5deaf04SGleb Smirnoff static u_int dbg_divisor = 1;
356c5deaf04SGleb Smirnoff SYSCTL_UINT(_vm_debug, OID_AUTO, divisor,
357c5deaf04SGleb Smirnoff     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0,
358c5deaf04SGleb Smirnoff     "Debug & thrash every this item in memory allocator");
359c5deaf04SGleb Smirnoff 
360c5deaf04SGleb Smirnoff static counter_u64_t uma_dbg_cnt = EARLY_COUNTER;
361c5deaf04SGleb Smirnoff static counter_u64_t uma_skip_cnt = EARLY_COUNTER;
362c5deaf04SGleb Smirnoff SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD,
363c5deaf04SGleb Smirnoff     &uma_dbg_cnt, "memory items debugged");
364c5deaf04SGleb Smirnoff SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD,
365c5deaf04SGleb Smirnoff     &uma_skip_cnt, "memory items skipped, not debugged");
3669542ea7bSGleb Smirnoff #endif
3679542ea7bSGleb Smirnoff 
3687029da5cSPawel Biernacki SYSCTL_NODE(_vm, OID_AUTO, uma, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
3697029da5cSPawel Biernacki     "Universal Memory Allocator");
37035ec24f3SRyan Libby 
371a314aba8SMateusz Guzik SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLTYPE_INT,
3727a52a97eSRobert Watson     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
3737a52a97eSRobert Watson 
374a314aba8SMateusz Guzik SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLTYPE_STRUCT,
3757a52a97eSRobert Watson     0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
3767a52a97eSRobert Watson 
3772f891cd5SPawel Jakub Dawidek static int zone_warnings = 1;
378af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
3792f891cd5SPawel Jakub Dawidek     "Warn when UMA zones becomes full");
3802f891cd5SPawel Jakub Dawidek 
38133e5a1eaSRyan Libby static int multipage_slabs = 1;
38233e5a1eaSRyan Libby TUNABLE_INT("vm.debug.uma_multipage_slabs", &multipage_slabs);
38333e5a1eaSRyan Libby SYSCTL_INT(_vm_debug, OID_AUTO, uma_multipage_slabs,
38433e5a1eaSRyan Libby     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &multipage_slabs, 0,
38533e5a1eaSRyan Libby     "UMA may choose larger slab sizes for better efficiency");
38633e5a1eaSRyan Libby 
38786bbae32SJeff Roberson /*
3889b8db4d0SRyan Libby  * Select the slab zone for an offpage slab with the given maximum item count.
3899b8db4d0SRyan Libby  */
3909b8db4d0SRyan Libby static inline uma_zone_t
3919b8db4d0SRyan Libby slabzone(int ipers)
3929b8db4d0SRyan Libby {
3939b8db4d0SRyan Libby 
3949b8db4d0SRyan Libby 	return (slabzones[ipers > SLABZONE0_SETSIZE]);
3959b8db4d0SRyan Libby }
3969b8db4d0SRyan Libby 
3979b8db4d0SRyan Libby /*
39886bbae32SJeff Roberson  * This routine checks to see whether or not it's safe to enable buckets.
39986bbae32SJeff Roberson  */
40086bbae32SJeff Roberson static void
40186bbae32SJeff Roberson bucket_enable(void)
40286bbae32SJeff Roberson {
4033182660aSRyan Libby 
404a81c400eSJeff Roberson 	KASSERT(booted >= BOOT_KVA, ("Bucket enable before init"));
405251386b4SMaksim Yevmenkin 	bucketdisable = vm_page_count_min();
40686bbae32SJeff Roberson }
40786bbae32SJeff Roberson 
408dc2c7965SRobert Watson /*
409dc2c7965SRobert Watson  * Initialize bucket_zones, the array of zones of buckets of various sizes.
410dc2c7965SRobert Watson  *
411dc2c7965SRobert Watson  * For each zone, calculate the memory required for each bucket, consisting
412fc03d22bSJeff Roberson  * of the header and an array of pointers.
413dc2c7965SRobert Watson  */
414cae33c14SJeff Roberson static void
415cae33c14SJeff Roberson bucket_init(void)
416cae33c14SJeff Roberson {
417cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
418cae33c14SJeff Roberson 	int size;
419cae33c14SJeff Roberson 
420d74e6a1dSAlan Cox 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
421cae33c14SJeff Roberson 		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
422cae33c14SJeff Roberson 		size += sizeof(void *) * ubz->ubz_entries;
423cae33c14SJeff Roberson 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
424e20a199fSJeff Roberson 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
425dfe13344SJeff Roberson 		    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET |
426dfe13344SJeff Roberson 		    UMA_ZONE_FIRSTTOUCH);
427cae33c14SJeff Roberson 	}
428cae33c14SJeff Roberson }
429cae33c14SJeff Roberson 
430dc2c7965SRobert Watson /*
431dc2c7965SRobert Watson  * Given a desired number of entries for a bucket, return the zone from which
432dc2c7965SRobert Watson  * to allocate the bucket.
433dc2c7965SRobert Watson  */
434dc2c7965SRobert Watson static struct uma_bucket_zone *
435dc2c7965SRobert Watson bucket_zone_lookup(int entries)
436dc2c7965SRobert Watson {
437fc03d22bSJeff Roberson 	struct uma_bucket_zone *ubz;
438dc2c7965SRobert Watson 
439fc03d22bSJeff Roberson 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
440fc03d22bSJeff Roberson 		if (ubz->ubz_entries >= entries)
441fc03d22bSJeff Roberson 			return (ubz);
442fc03d22bSJeff Roberson 	ubz--;
443fc03d22bSJeff Roberson 	return (ubz);
444fc03d22bSJeff Roberson }
445fc03d22bSJeff Roberson 
446fc03d22bSJeff Roberson static int
447fc03d22bSJeff Roberson bucket_select(int size)
448fc03d22bSJeff Roberson {
449fc03d22bSJeff Roberson 	struct uma_bucket_zone *ubz;
450fc03d22bSJeff Roberson 
451fc03d22bSJeff Roberson 	ubz = &bucket_zones[0];
452fc03d22bSJeff Roberson 	if (size > ubz->ubz_maxsize)
453fc03d22bSJeff Roberson 		return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
454fc03d22bSJeff Roberson 
455fc03d22bSJeff Roberson 	for (; ubz->ubz_entries != 0; ubz++)
456fc03d22bSJeff Roberson 		if (ubz->ubz_maxsize < size)
457fc03d22bSJeff Roberson 			break;
458fc03d22bSJeff Roberson 	ubz--;
459fc03d22bSJeff Roberson 	return (ubz->ubz_entries);
460dc2c7965SRobert Watson }
461dc2c7965SRobert Watson 
462cae33c14SJeff Roberson static uma_bucket_t
4636fd34d6fSJeff Roberson bucket_alloc(uma_zone_t zone, void *udata, int flags)
464cae33c14SJeff Roberson {
465cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
466cae33c14SJeff Roberson 	uma_bucket_t bucket;
467cae33c14SJeff Roberson 
468cae33c14SJeff Roberson 	/*
469d4665eaaSJeff Roberson 	 * Don't allocate buckets early in boot.
470cae33c14SJeff Roberson 	 */
471d4665eaaSJeff Roberson 	if (__predict_false(booted < BOOT_KVA))
472cae33c14SJeff Roberson 		return (NULL);
473a81c400eSJeff Roberson 
4746fd34d6fSJeff Roberson 	/*
4756fd34d6fSJeff Roberson 	 * To limit bucket recursion we store the original zone flags
4766fd34d6fSJeff Roberson 	 * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
4776fd34d6fSJeff Roberson 	 * NOVM flag to persist even through deep recursions.  We also
4786fd34d6fSJeff Roberson 	 * store ZFLAG_BUCKET once we have recursed attempting to allocate
4796fd34d6fSJeff Roberson 	 * a bucket for a bucket zone so we do not allow infinite bucket
4806fd34d6fSJeff Roberson 	 * recursion.  This cookie will even persist to frees of unused
4816fd34d6fSJeff Roberson 	 * buckets via the allocation path or bucket allocations in the
4826fd34d6fSJeff Roberson 	 * free path.
4836fd34d6fSJeff Roberson 	 */
4846fd34d6fSJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
4856fd34d6fSJeff Roberson 		udata = (void *)(uintptr_t)zone->uz_flags;
486e8a720feSAlexander Motin 	else {
487e8a720feSAlexander Motin 		if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
488e8a720feSAlexander Motin 			return (NULL);
4896fd34d6fSJeff Roberson 		udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
490e8a720feSAlexander Motin 	}
491bae55c4aSRyan Libby 	if (((uintptr_t)udata & UMA_ZONE_VM) != 0)
492af526374SJeff Roberson 		flags |= M_NOVM;
493f8b6c515SMark Johnston 	ubz = bucket_zone_lookup(atomic_load_16(&zone->uz_bucket_size));
49420d3ab87SAlexander Motin 	if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
49520d3ab87SAlexander Motin 		ubz++;
4966fd34d6fSJeff Roberson 	bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
497cae33c14SJeff Roberson 	if (bucket) {
498cae33c14SJeff Roberson #ifdef INVARIANTS
499cae33c14SJeff Roberson 		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
500cae33c14SJeff Roberson #endif
501cae33c14SJeff Roberson 		bucket->ub_cnt = 0;
502f8b6c515SMark Johnston 		bucket->ub_entries = min(ubz->ubz_entries,
503f8b6c515SMark Johnston 		    zone->uz_bucket_size_max);
504d4665eaaSJeff Roberson 		bucket->ub_seq = SMR_SEQ_INVALID;
505d4665eaaSJeff Roberson 		CTR3(KTR_UMA, "bucket_alloc: zone %s(%p) allocated bucket %p",
506d4665eaaSJeff Roberson 		    zone->uz_name, zone, bucket);
507cae33c14SJeff Roberson 	}
508cae33c14SJeff Roberson 
509cae33c14SJeff Roberson 	return (bucket);
510cae33c14SJeff Roberson }
511cae33c14SJeff Roberson 
512cae33c14SJeff Roberson static void
5136fd34d6fSJeff Roberson bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
514cae33c14SJeff Roberson {
515cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
516cae33c14SJeff Roberson 
517c6fd3e23SJeff Roberson 	if (bucket->ub_cnt != 0)
518c6fd3e23SJeff Roberson 		bucket_drain(zone, bucket);
519c6fd3e23SJeff Roberson 
520fc03d22bSJeff Roberson 	KASSERT(bucket->ub_cnt == 0,
521fc03d22bSJeff Roberson 	    ("bucket_free: Freeing a non free bucket."));
522d4665eaaSJeff Roberson 	KASSERT(bucket->ub_seq == SMR_SEQ_INVALID,
523d4665eaaSJeff Roberson 	    ("bucket_free: Freeing an SMR bucket."));
5246fd34d6fSJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
5256fd34d6fSJeff Roberson 		udata = (void *)(uintptr_t)zone->uz_flags;
526dc2c7965SRobert Watson 	ubz = bucket_zone_lookup(bucket->ub_entries);
5276fd34d6fSJeff Roberson 	uma_zfree_arg(ubz->ubz_zone, bucket, udata);
528cae33c14SJeff Roberson }
529cae33c14SJeff Roberson 
530cae33c14SJeff Roberson static void
531*aabe13f1SMark Johnston bucket_zone_drain(int domain)
532cae33c14SJeff Roberson {
533cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
534cae33c14SJeff Roberson 
535cae33c14SJeff Roberson 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
536*aabe13f1SMark Johnston 		uma_zone_reclaim_domain(ubz->ubz_zone, UMA_RECLAIM_DRAIN,
537*aabe13f1SMark Johnston 		    domain);
538cae33c14SJeff Roberson }
539cae33c14SJeff Roberson 
54009c8cb71SMark Johnston #ifdef KASAN
54109c8cb71SMark Johnston static void
54209c8cb71SMark Johnston kasan_mark_item_valid(uma_zone_t zone, void *item)
54309c8cb71SMark Johnston {
54409c8cb71SMark Johnston 	void *pcpu_item;
54509c8cb71SMark Johnston 	size_t sz, rsz;
54609c8cb71SMark Johnston 	int i;
54709c8cb71SMark Johnston 
54809c8cb71SMark Johnston 	if ((zone->uz_flags & UMA_ZONE_NOKASAN) != 0)
54909c8cb71SMark Johnston 		return;
55009c8cb71SMark Johnston 
55109c8cb71SMark Johnston 	sz = zone->uz_size;
55209c8cb71SMark Johnston 	rsz = roundup2(sz, KASAN_SHADOW_SCALE);
55309c8cb71SMark Johnston 	if ((zone->uz_flags & UMA_ZONE_PCPU) == 0) {
55409c8cb71SMark Johnston 		kasan_mark(item, sz, rsz, 0);
55509c8cb71SMark Johnston 	} else {
55609c8cb71SMark Johnston 		pcpu_item = zpcpu_base_to_offset(item);
55709c8cb71SMark Johnston 		for (i = 0; i <= mp_maxid; i++)
55809c8cb71SMark Johnston 			kasan_mark(zpcpu_get_cpu(pcpu_item, i), sz, rsz, 0);
55909c8cb71SMark Johnston 	}
56009c8cb71SMark Johnston }
56109c8cb71SMark Johnston 
56209c8cb71SMark Johnston static void
56309c8cb71SMark Johnston kasan_mark_item_invalid(uma_zone_t zone, void *item)
56409c8cb71SMark Johnston {
56509c8cb71SMark Johnston 	void *pcpu_item;
56609c8cb71SMark Johnston 	size_t sz;
56709c8cb71SMark Johnston 	int i;
56809c8cb71SMark Johnston 
56909c8cb71SMark Johnston 	if ((zone->uz_flags & UMA_ZONE_NOKASAN) != 0)
57009c8cb71SMark Johnston 		return;
57109c8cb71SMark Johnston 
57209c8cb71SMark Johnston 	sz = roundup2(zone->uz_size, KASAN_SHADOW_SCALE);
57309c8cb71SMark Johnston 	if ((zone->uz_flags & UMA_ZONE_PCPU) == 0) {
57409c8cb71SMark Johnston 		kasan_mark(item, 0, sz, KASAN_UMA_FREED);
57509c8cb71SMark Johnston 	} else {
57609c8cb71SMark Johnston 		pcpu_item = zpcpu_base_to_offset(item);
57709c8cb71SMark Johnston 		for (i = 0; i <= mp_maxid; i++)
57809c8cb71SMark Johnston 			kasan_mark(zpcpu_get_cpu(pcpu_item, i), 0, sz, 0);
57909c8cb71SMark Johnston 	}
58009c8cb71SMark Johnston }
58109c8cb71SMark Johnston 
58209c8cb71SMark Johnston static void
58309c8cb71SMark Johnston kasan_mark_slab_valid(uma_keg_t keg, void *mem)
58409c8cb71SMark Johnston {
58509c8cb71SMark Johnston 	size_t sz;
58609c8cb71SMark Johnston 
58709c8cb71SMark Johnston 	if ((keg->uk_flags & UMA_ZONE_NOKASAN) == 0) {
58809c8cb71SMark Johnston 		sz = keg->uk_ppera * PAGE_SIZE;
58909c8cb71SMark Johnston 		kasan_mark(mem, sz, sz, 0);
59009c8cb71SMark Johnston 	}
59109c8cb71SMark Johnston }
59209c8cb71SMark Johnston 
59309c8cb71SMark Johnston static void
59409c8cb71SMark Johnston kasan_mark_slab_invalid(uma_keg_t keg, void *mem)
59509c8cb71SMark Johnston {
59609c8cb71SMark Johnston 	size_t sz;
59709c8cb71SMark Johnston 
59809c8cb71SMark Johnston 	if ((keg->uk_flags & UMA_ZONE_NOKASAN) == 0) {
59909c8cb71SMark Johnston 		if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0)
60009c8cb71SMark Johnston 			sz = keg->uk_ppera * PAGE_SIZE;
60109c8cb71SMark Johnston 		else
60209c8cb71SMark Johnston 			sz = keg->uk_pgoff;
60309c8cb71SMark Johnston 		kasan_mark(mem, 0, sz, KASAN_UMA_FREED);
60409c8cb71SMark Johnston 	}
60509c8cb71SMark Johnston }
60609c8cb71SMark Johnston #else /* !KASAN */
60709c8cb71SMark Johnston static void
60809c8cb71SMark Johnston kasan_mark_item_valid(uma_zone_t zone __unused, void *item __unused)
60909c8cb71SMark Johnston {
61009c8cb71SMark Johnston }
61109c8cb71SMark Johnston 
61209c8cb71SMark Johnston static void
61309c8cb71SMark Johnston kasan_mark_item_invalid(uma_zone_t zone __unused, void *item __unused)
61409c8cb71SMark Johnston {
61509c8cb71SMark Johnston }
61609c8cb71SMark Johnston 
61709c8cb71SMark Johnston static void
61809c8cb71SMark Johnston kasan_mark_slab_valid(uma_keg_t keg __unused, void *mem __unused)
61909c8cb71SMark Johnston {
62009c8cb71SMark Johnston }
62109c8cb71SMark Johnston 
62209c8cb71SMark Johnston static void
62309c8cb71SMark Johnston kasan_mark_slab_invalid(uma_keg_t keg __unused, void *mem __unused)
62409c8cb71SMark Johnston {
62509c8cb71SMark Johnston }
62609c8cb71SMark Johnston #endif /* KASAN */
62709c8cb71SMark Johnston 
62808cfa56eSMark Johnston /*
629c6fd3e23SJeff Roberson  * Acquire the domain lock and record contention.
630c6fd3e23SJeff Roberson  */
631c6fd3e23SJeff Roberson static uma_zone_domain_t
632c6fd3e23SJeff Roberson zone_domain_lock(uma_zone_t zone, int domain)
633c6fd3e23SJeff Roberson {
634c6fd3e23SJeff Roberson 	uma_zone_domain_t zdom;
635c6fd3e23SJeff Roberson 	bool lockfail;
636c6fd3e23SJeff Roberson 
637c6fd3e23SJeff Roberson 	zdom = ZDOM_GET(zone, domain);
638c6fd3e23SJeff Roberson 	lockfail = false;
639c6fd3e23SJeff Roberson 	if (ZDOM_OWNED(zdom))
640c6fd3e23SJeff Roberson 		lockfail = true;
641c6fd3e23SJeff Roberson 	ZDOM_LOCK(zdom);
642c6fd3e23SJeff Roberson 	/* This is unsynchronized.  The counter does not need to be precise. */
643c6fd3e23SJeff Roberson 	if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max)
644c6fd3e23SJeff Roberson 		zone->uz_bucket_size++;
645c6fd3e23SJeff Roberson 	return (zdom);
646c6fd3e23SJeff Roberson }
647c6fd3e23SJeff Roberson 
648c6fd3e23SJeff Roberson /*
649fe835cbfSJeff Roberson  * Search for the domain with the least cached items and return it if it
650fe835cbfSJeff Roberson  * is out of balance with the preferred domain.
651c6fd3e23SJeff Roberson  */
652c6fd3e23SJeff Roberson static __noinline int
653c6fd3e23SJeff Roberson zone_domain_lowest(uma_zone_t zone, int pref)
654c6fd3e23SJeff Roberson {
655fe835cbfSJeff Roberson 	long least, nitems, prefitems;
656c6fd3e23SJeff Roberson 	int domain;
657c6fd3e23SJeff Roberson 	int i;
658c6fd3e23SJeff Roberson 
659fe835cbfSJeff Roberson 	prefitems = least = LONG_MAX;
660c6fd3e23SJeff Roberson 	domain = 0;
661c6fd3e23SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
662c6fd3e23SJeff Roberson 		nitems = ZDOM_GET(zone, i)->uzd_nitems;
663c6fd3e23SJeff Roberson 		if (nitems < least) {
664c6fd3e23SJeff Roberson 			domain = i;
665c6fd3e23SJeff Roberson 			least = nitems;
666c6fd3e23SJeff Roberson 		}
667fe835cbfSJeff Roberson 		if (domain == pref)
668fe835cbfSJeff Roberson 			prefitems = nitems;
669fe835cbfSJeff Roberson 	}
670fe835cbfSJeff Roberson 	if (prefitems < least * 2)
671fe835cbfSJeff Roberson 		return (pref);
672c6fd3e23SJeff Roberson 
673c6fd3e23SJeff Roberson 	return (domain);
674c6fd3e23SJeff Roberson }
675c6fd3e23SJeff Roberson 
676c6fd3e23SJeff Roberson /*
677c6fd3e23SJeff Roberson  * Search for the domain with the most cached items and return it or the
678c6fd3e23SJeff Roberson  * preferred domain if it has enough to proceed.
679c6fd3e23SJeff Roberson  */
680c6fd3e23SJeff Roberson static __noinline int
681c6fd3e23SJeff Roberson zone_domain_highest(uma_zone_t zone, int pref)
682c6fd3e23SJeff Roberson {
683c6fd3e23SJeff Roberson 	long most, nitems;
684c6fd3e23SJeff Roberson 	int domain;
685c6fd3e23SJeff Roberson 	int i;
686c6fd3e23SJeff Roberson 
687c6fd3e23SJeff Roberson 	if (ZDOM_GET(zone, pref)->uzd_nitems > BUCKET_MAX)
688c6fd3e23SJeff Roberson 		return (pref);
689c6fd3e23SJeff Roberson 
690c6fd3e23SJeff Roberson 	most = 0;
691c6fd3e23SJeff Roberson 	domain = 0;
692c6fd3e23SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
693c6fd3e23SJeff Roberson 		nitems = ZDOM_GET(zone, i)->uzd_nitems;
694c6fd3e23SJeff Roberson 		if (nitems > most) {
695c6fd3e23SJeff Roberson 			domain = i;
696c6fd3e23SJeff Roberson 			most = nitems;
697c6fd3e23SJeff Roberson 		}
698c6fd3e23SJeff Roberson 	}
699c6fd3e23SJeff Roberson 
700c6fd3e23SJeff Roberson 	return (domain);
701c6fd3e23SJeff Roberson }
702c6fd3e23SJeff Roberson 
703c6fd3e23SJeff Roberson /*
704c6fd3e23SJeff Roberson  * Safely subtract cnt from imax.
705c6fd3e23SJeff Roberson  */
706c6fd3e23SJeff Roberson static void
707c6fd3e23SJeff Roberson zone_domain_imax_sub(uma_zone_domain_t zdom, int cnt)
708c6fd3e23SJeff Roberson {
709c6fd3e23SJeff Roberson 	long new;
710c6fd3e23SJeff Roberson 	long old;
711c6fd3e23SJeff Roberson 
712c6fd3e23SJeff Roberson 	old = zdom->uzd_imax;
713c6fd3e23SJeff Roberson 	do {
714c6fd3e23SJeff Roberson 		if (old <= cnt)
715c6fd3e23SJeff Roberson 			new = 0;
716c6fd3e23SJeff Roberson 		else
717c6fd3e23SJeff Roberson 			new = old - cnt;
718c6fd3e23SJeff Roberson 	} while (atomic_fcmpset_long(&zdom->uzd_imax, &old, new) == 0);
719c6fd3e23SJeff Roberson }
720c6fd3e23SJeff Roberson 
721c6fd3e23SJeff Roberson /*
722c6fd3e23SJeff Roberson  * Set the maximum imax value.
723c6fd3e23SJeff Roberson  */
724c6fd3e23SJeff Roberson static void
725c6fd3e23SJeff Roberson zone_domain_imax_set(uma_zone_domain_t zdom, int nitems)
726c6fd3e23SJeff Roberson {
727c6fd3e23SJeff Roberson 	long old;
728c6fd3e23SJeff Roberson 
729c6fd3e23SJeff Roberson 	old = zdom->uzd_imax;
730c6fd3e23SJeff Roberson 	do {
731c6fd3e23SJeff Roberson 		if (old >= nitems)
732c6fd3e23SJeff Roberson 			break;
733c6fd3e23SJeff Roberson 	} while (atomic_fcmpset_long(&zdom->uzd_imax, &old, nitems) == 0);
734c6fd3e23SJeff Roberson }
735c6fd3e23SJeff Roberson 
736c6fd3e23SJeff Roberson /*
73708cfa56eSMark Johnston  * Attempt to satisfy an allocation by retrieving a full bucket from one of the
738d4665eaaSJeff Roberson  * zone's caches.  If a bucket is found the zone is not locked on return.
73908cfa56eSMark Johnston  */
7400f9b7bf3SMark Johnston static uma_bucket_t
741c6fd3e23SJeff Roberson zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom, bool reclaim)
7420f9b7bf3SMark Johnston {
7430f9b7bf3SMark Johnston 	uma_bucket_t bucket;
744d4665eaaSJeff Roberson 	int i;
745d4665eaaSJeff Roberson 	bool dtor = false;
7460f9b7bf3SMark Johnston 
747c6fd3e23SJeff Roberson 	ZDOM_LOCK_ASSERT(zdom);
7480f9b7bf3SMark Johnston 
749dc3915c8SJeff Roberson 	if ((bucket = STAILQ_FIRST(&zdom->uzd_buckets)) == NULL)
750d4665eaaSJeff Roberson 		return (NULL);
751d4665eaaSJeff Roberson 
752543117beSJeff Roberson 	/* SMR Buckets can not be re-used until readers expire. */
753d4665eaaSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0 &&
754d4665eaaSJeff Roberson 	    bucket->ub_seq != SMR_SEQ_INVALID) {
755d4665eaaSJeff Roberson 		if (!smr_poll(zone->uz_smr, bucket->ub_seq, false))
756d4665eaaSJeff Roberson 			return (NULL);
757d4665eaaSJeff Roberson 		bucket->ub_seq = SMR_SEQ_INVALID;
758543117beSJeff Roberson 		dtor = (zone->uz_dtor != NULL) || UMA_ALWAYS_CTORDTOR;
759c6fd3e23SJeff Roberson 		if (STAILQ_NEXT(bucket, ub_link) != NULL)
760c6fd3e23SJeff Roberson 			zdom->uzd_seq = STAILQ_NEXT(bucket, ub_link)->ub_seq;
761d4665eaaSJeff Roberson 	}
762dc3915c8SJeff Roberson 	STAILQ_REMOVE_HEAD(&zdom->uzd_buckets, ub_link);
76306d8bdcbSMark Johnston 
76406d8bdcbSMark Johnston 	KASSERT(zdom->uzd_nitems >= bucket->ub_cnt,
76506d8bdcbSMark Johnston 	    ("%s: item count underflow (%ld, %d)",
76606d8bdcbSMark Johnston 	    __func__, zdom->uzd_nitems, bucket->ub_cnt));
76706d8bdcbSMark Johnston 	KASSERT(bucket->ub_cnt > 0,
76806d8bdcbSMark Johnston 	    ("%s: empty bucket in bucket cache", __func__));
7690f9b7bf3SMark Johnston 	zdom->uzd_nitems -= bucket->ub_cnt;
770c6fd3e23SJeff Roberson 
771c6fd3e23SJeff Roberson 	/*
772c6fd3e23SJeff Roberson 	 * Shift the bounds of the current WSS interval to avoid
773c6fd3e23SJeff Roberson 	 * perturbing the estimate.
774c6fd3e23SJeff Roberson 	 */
775c6fd3e23SJeff Roberson 	if (reclaim) {
776c6fd3e23SJeff Roberson 		zdom->uzd_imin -= lmin(zdom->uzd_imin, bucket->ub_cnt);
777c6fd3e23SJeff Roberson 		zone_domain_imax_sub(zdom, bucket->ub_cnt);
778c6fd3e23SJeff Roberson 	} else if (zdom->uzd_imin > zdom->uzd_nitems)
7790f9b7bf3SMark Johnston 		zdom->uzd_imin = zdom->uzd_nitems;
780c6fd3e23SJeff Roberson 
781c6fd3e23SJeff Roberson 	ZDOM_UNLOCK(zdom);
782d4665eaaSJeff Roberson 	if (dtor)
783d4665eaaSJeff Roberson 		for (i = 0; i < bucket->ub_cnt; i++)
784d4665eaaSJeff Roberson 			item_dtor(zone, bucket->ub_bucket[i], zone->uz_size,
785d4665eaaSJeff Roberson 			    NULL, SKIP_NONE);
786d4665eaaSJeff Roberson 
7870f9b7bf3SMark Johnston 	return (bucket);
7880f9b7bf3SMark Johnston }
7890f9b7bf3SMark Johnston 
79008cfa56eSMark Johnston /*
79108cfa56eSMark Johnston  * Insert a full bucket into the specified cache.  The "ws" parameter indicates
79208cfa56eSMark Johnston  * whether the bucket's contents should be counted as part of the zone's working
793c6fd3e23SJeff Roberson  * set.  The bucket may be freed if it exceeds the bucket limit.
79408cfa56eSMark Johnston  */
7950f9b7bf3SMark Johnston static void
796c6fd3e23SJeff Roberson zone_put_bucket(uma_zone_t zone, int domain, uma_bucket_t bucket, void *udata,
7970f9b7bf3SMark Johnston     const bool ws)
7980f9b7bf3SMark Johnston {
799c6fd3e23SJeff Roberson 	uma_zone_domain_t zdom;
8000f9b7bf3SMark Johnston 
801c6fd3e23SJeff Roberson 	/* We don't cache empty buckets.  This can happen after a reclaim. */
802c6fd3e23SJeff Roberson 	if (bucket->ub_cnt == 0)
803c6fd3e23SJeff Roberson 		goto out;
804c6fd3e23SJeff Roberson 	zdom = zone_domain_lock(zone, domain);
805c6fd3e23SJeff Roberson 
806c6fd3e23SJeff Roberson 	/*
807c6fd3e23SJeff Roberson 	 * Conditionally set the maximum number of items.
808c6fd3e23SJeff Roberson 	 */
8090f9b7bf3SMark Johnston 	zdom->uzd_nitems += bucket->ub_cnt;
810c6fd3e23SJeff Roberson 	if (__predict_true(zdom->uzd_nitems < zone->uz_bucket_max)) {
811c6fd3e23SJeff Roberson 		if (ws)
812c6fd3e23SJeff Roberson 			zone_domain_imax_set(zdom, zdom->uzd_nitems);
813c6fd3e23SJeff Roberson 		if (STAILQ_EMPTY(&zdom->uzd_buckets))
814c6fd3e23SJeff Roberson 			zdom->uzd_seq = bucket->ub_seq;
8155afdf5c1SMark Johnston 
8165afdf5c1SMark Johnston 		/*
8175afdf5c1SMark Johnston 		 * Try to promote reuse of recently used items.  For items
8185afdf5c1SMark Johnston 		 * protected by SMR, try to defer reuse to minimize polling.
8195afdf5c1SMark Johnston 		 */
8205afdf5c1SMark Johnston 		if (bucket->ub_seq == SMR_SEQ_INVALID)
8215afdf5c1SMark Johnston 			STAILQ_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
8225afdf5c1SMark Johnston 		else
823c6fd3e23SJeff Roberson 			STAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link);
824c6fd3e23SJeff Roberson 		ZDOM_UNLOCK(zdom);
825c6fd3e23SJeff Roberson 		return;
826c6fd3e23SJeff Roberson 	}
827c6fd3e23SJeff Roberson 	zdom->uzd_nitems -= bucket->ub_cnt;
828c6fd3e23SJeff Roberson 	ZDOM_UNLOCK(zdom);
829c6fd3e23SJeff Roberson out:
830c6fd3e23SJeff Roberson 	bucket_free(zone, bucket, udata);
8310f9b7bf3SMark Johnston }
8320f9b7bf3SMark Johnston 
833376b1ba3SJeff Roberson /* Pops an item out of a per-cpu cache bucket. */
834376b1ba3SJeff Roberson static inline void *
835376b1ba3SJeff Roberson cache_bucket_pop(uma_cache_t cache, uma_cache_bucket_t bucket)
836376b1ba3SJeff Roberson {
837376b1ba3SJeff Roberson 	void *item;
838376b1ba3SJeff Roberson 
839376b1ba3SJeff Roberson 	CRITICAL_ASSERT(curthread);
840376b1ba3SJeff Roberson 
841376b1ba3SJeff Roberson 	bucket->ucb_cnt--;
842376b1ba3SJeff Roberson 	item = bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt];
843376b1ba3SJeff Roberson #ifdef INVARIANTS
844376b1ba3SJeff Roberson 	bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = NULL;
845376b1ba3SJeff Roberson 	KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
846376b1ba3SJeff Roberson #endif
847376b1ba3SJeff Roberson 	cache->uc_allocs++;
848376b1ba3SJeff Roberson 
849376b1ba3SJeff Roberson 	return (item);
850376b1ba3SJeff Roberson }
851376b1ba3SJeff Roberson 
852376b1ba3SJeff Roberson /* Pushes an item into a per-cpu cache bucket. */
853376b1ba3SJeff Roberson static inline void
854376b1ba3SJeff Roberson cache_bucket_push(uma_cache_t cache, uma_cache_bucket_t bucket, void *item)
855376b1ba3SJeff Roberson {
856376b1ba3SJeff Roberson 
857376b1ba3SJeff Roberson 	CRITICAL_ASSERT(curthread);
858376b1ba3SJeff Roberson 	KASSERT(bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] == NULL,
859376b1ba3SJeff Roberson 	    ("uma_zfree: Freeing to non free bucket index."));
860376b1ba3SJeff Roberson 
861376b1ba3SJeff Roberson 	bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = item;
862376b1ba3SJeff Roberson 	bucket->ucb_cnt++;
863376b1ba3SJeff Roberson 	cache->uc_frees++;
864376b1ba3SJeff Roberson }
865376b1ba3SJeff Roberson 
866376b1ba3SJeff Roberson /*
867376b1ba3SJeff Roberson  * Unload a UMA bucket from a per-cpu cache.
868376b1ba3SJeff Roberson  */
869376b1ba3SJeff Roberson static inline uma_bucket_t
870376b1ba3SJeff Roberson cache_bucket_unload(uma_cache_bucket_t bucket)
871376b1ba3SJeff Roberson {
872376b1ba3SJeff Roberson 	uma_bucket_t b;
873376b1ba3SJeff Roberson 
874376b1ba3SJeff Roberson 	b = bucket->ucb_bucket;
875376b1ba3SJeff Roberson 	if (b != NULL) {
876376b1ba3SJeff Roberson 		MPASS(b->ub_entries == bucket->ucb_entries);
877376b1ba3SJeff Roberson 		b->ub_cnt = bucket->ucb_cnt;
878376b1ba3SJeff Roberson 		bucket->ucb_bucket = NULL;
879376b1ba3SJeff Roberson 		bucket->ucb_entries = bucket->ucb_cnt = 0;
880376b1ba3SJeff Roberson 	}
881376b1ba3SJeff Roberson 
882376b1ba3SJeff Roberson 	return (b);
883376b1ba3SJeff Roberson }
884376b1ba3SJeff Roberson 
885376b1ba3SJeff Roberson static inline uma_bucket_t
886376b1ba3SJeff Roberson cache_bucket_unload_alloc(uma_cache_t cache)
887376b1ba3SJeff Roberson {
888376b1ba3SJeff Roberson 
889376b1ba3SJeff Roberson 	return (cache_bucket_unload(&cache->uc_allocbucket));
890376b1ba3SJeff Roberson }
891376b1ba3SJeff Roberson 
892376b1ba3SJeff Roberson static inline uma_bucket_t
893376b1ba3SJeff Roberson cache_bucket_unload_free(uma_cache_t cache)
894376b1ba3SJeff Roberson {
895376b1ba3SJeff Roberson 
896376b1ba3SJeff Roberson 	return (cache_bucket_unload(&cache->uc_freebucket));
897376b1ba3SJeff Roberson }
898376b1ba3SJeff Roberson 
899376b1ba3SJeff Roberson static inline uma_bucket_t
900376b1ba3SJeff Roberson cache_bucket_unload_cross(uma_cache_t cache)
901376b1ba3SJeff Roberson {
902376b1ba3SJeff Roberson 
903376b1ba3SJeff Roberson 	return (cache_bucket_unload(&cache->uc_crossbucket));
904376b1ba3SJeff Roberson }
905376b1ba3SJeff Roberson 
906376b1ba3SJeff Roberson /*
907376b1ba3SJeff Roberson  * Load a bucket into a per-cpu cache bucket.
908376b1ba3SJeff Roberson  */
909376b1ba3SJeff Roberson static inline void
910376b1ba3SJeff Roberson cache_bucket_load(uma_cache_bucket_t bucket, uma_bucket_t b)
911376b1ba3SJeff Roberson {
912376b1ba3SJeff Roberson 
913376b1ba3SJeff Roberson 	CRITICAL_ASSERT(curthread);
914376b1ba3SJeff Roberson 	MPASS(bucket->ucb_bucket == NULL);
915543117beSJeff Roberson 	MPASS(b->ub_seq == SMR_SEQ_INVALID);
916376b1ba3SJeff Roberson 
917376b1ba3SJeff Roberson 	bucket->ucb_bucket = b;
918376b1ba3SJeff Roberson 	bucket->ucb_cnt = b->ub_cnt;
919376b1ba3SJeff Roberson 	bucket->ucb_entries = b->ub_entries;
920376b1ba3SJeff Roberson }
921376b1ba3SJeff Roberson 
922376b1ba3SJeff Roberson static inline void
923376b1ba3SJeff Roberson cache_bucket_load_alloc(uma_cache_t cache, uma_bucket_t b)
924376b1ba3SJeff Roberson {
925376b1ba3SJeff Roberson 
926376b1ba3SJeff Roberson 	cache_bucket_load(&cache->uc_allocbucket, b);
927376b1ba3SJeff Roberson }
928376b1ba3SJeff Roberson 
929376b1ba3SJeff Roberson static inline void
930376b1ba3SJeff Roberson cache_bucket_load_free(uma_cache_t cache, uma_bucket_t b)
931376b1ba3SJeff Roberson {
932376b1ba3SJeff Roberson 
933376b1ba3SJeff Roberson 	cache_bucket_load(&cache->uc_freebucket, b);
934376b1ba3SJeff Roberson }
935376b1ba3SJeff Roberson 
936dfe13344SJeff Roberson #ifdef NUMA
937376b1ba3SJeff Roberson static inline void
938376b1ba3SJeff Roberson cache_bucket_load_cross(uma_cache_t cache, uma_bucket_t b)
939376b1ba3SJeff Roberson {
940376b1ba3SJeff Roberson 
941376b1ba3SJeff Roberson 	cache_bucket_load(&cache->uc_crossbucket, b);
942376b1ba3SJeff Roberson }
943376b1ba3SJeff Roberson #endif
944376b1ba3SJeff Roberson 
945376b1ba3SJeff Roberson /*
946376b1ba3SJeff Roberson  * Copy and preserve ucb_spare.
947376b1ba3SJeff Roberson  */
948376b1ba3SJeff Roberson static inline void
949376b1ba3SJeff Roberson cache_bucket_copy(uma_cache_bucket_t b1, uma_cache_bucket_t b2)
950376b1ba3SJeff Roberson {
951376b1ba3SJeff Roberson 
952376b1ba3SJeff Roberson 	b1->ucb_bucket = b2->ucb_bucket;
953376b1ba3SJeff Roberson 	b1->ucb_entries = b2->ucb_entries;
954376b1ba3SJeff Roberson 	b1->ucb_cnt = b2->ucb_cnt;
955376b1ba3SJeff Roberson }
956376b1ba3SJeff Roberson 
957376b1ba3SJeff Roberson /*
958376b1ba3SJeff Roberson  * Swap two cache buckets.
959376b1ba3SJeff Roberson  */
960376b1ba3SJeff Roberson static inline void
961376b1ba3SJeff Roberson cache_bucket_swap(uma_cache_bucket_t b1, uma_cache_bucket_t b2)
962376b1ba3SJeff Roberson {
963376b1ba3SJeff Roberson 	struct uma_cache_bucket b3;
964376b1ba3SJeff Roberson 
965376b1ba3SJeff Roberson 	CRITICAL_ASSERT(curthread);
966376b1ba3SJeff Roberson 
967376b1ba3SJeff Roberson 	cache_bucket_copy(&b3, b1);
968376b1ba3SJeff Roberson 	cache_bucket_copy(b1, b2);
969376b1ba3SJeff Roberson 	cache_bucket_copy(b2, &b3);
970376b1ba3SJeff Roberson }
971376b1ba3SJeff Roberson 
972c6fd3e23SJeff Roberson /*
973c6fd3e23SJeff Roberson  * Attempt to fetch a bucket from a zone on behalf of the current cpu cache.
974c6fd3e23SJeff Roberson  */
975c6fd3e23SJeff Roberson static uma_bucket_t
976c6fd3e23SJeff Roberson cache_fetch_bucket(uma_zone_t zone, uma_cache_t cache, int domain)
977c6fd3e23SJeff Roberson {
978c6fd3e23SJeff Roberson 	uma_zone_domain_t zdom;
979c6fd3e23SJeff Roberson 	uma_bucket_t bucket;
980c6fd3e23SJeff Roberson 
981c6fd3e23SJeff Roberson 	/*
982c6fd3e23SJeff Roberson 	 * Avoid the lock if possible.
983c6fd3e23SJeff Roberson 	 */
984c6fd3e23SJeff Roberson 	zdom = ZDOM_GET(zone, domain);
985c6fd3e23SJeff Roberson 	if (zdom->uzd_nitems == 0)
986c6fd3e23SJeff Roberson 		return (NULL);
987c6fd3e23SJeff Roberson 
988c6fd3e23SJeff Roberson 	if ((cache_uz_flags(cache) & UMA_ZONE_SMR) != 0 &&
989c6fd3e23SJeff Roberson 	    !smr_poll(zone->uz_smr, zdom->uzd_seq, false))
990c6fd3e23SJeff Roberson 		return (NULL);
991c6fd3e23SJeff Roberson 
992c6fd3e23SJeff Roberson 	/*
993c6fd3e23SJeff Roberson 	 * Check the zone's cache of buckets.
994c6fd3e23SJeff Roberson 	 */
995c6fd3e23SJeff Roberson 	zdom = zone_domain_lock(zone, domain);
99606d8bdcbSMark Johnston 	if ((bucket = zone_fetch_bucket(zone, zdom, false)) != NULL)
997c6fd3e23SJeff Roberson 		return (bucket);
998c6fd3e23SJeff Roberson 	ZDOM_UNLOCK(zdom);
999c6fd3e23SJeff Roberson 
1000c6fd3e23SJeff Roberson 	return (NULL);
1001c6fd3e23SJeff Roberson }
1002c6fd3e23SJeff Roberson 
10032f891cd5SPawel Jakub Dawidek static void
10042f891cd5SPawel Jakub Dawidek zone_log_warning(uma_zone_t zone)
10052f891cd5SPawel Jakub Dawidek {
10062f891cd5SPawel Jakub Dawidek 	static const struct timeval warninterval = { 300, 0 };
10072f891cd5SPawel Jakub Dawidek 
10082f891cd5SPawel Jakub Dawidek 	if (!zone_warnings || zone->uz_warning == NULL)
10092f891cd5SPawel Jakub Dawidek 		return;
10102f891cd5SPawel Jakub Dawidek 
10112f891cd5SPawel Jakub Dawidek 	if (ratecheck(&zone->uz_ratecheck, &warninterval))
10122f891cd5SPawel Jakub Dawidek 		printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
10132f891cd5SPawel Jakub Dawidek }
10142f891cd5SPawel Jakub Dawidek 
101554503a13SJonathan T. Looney static inline void
101654503a13SJonathan T. Looney zone_maxaction(uma_zone_t zone)
101754503a13SJonathan T. Looney {
1018e60b2fcbSGleb Smirnoff 
1019e60b2fcbSGleb Smirnoff 	if (zone->uz_maxaction.ta_func != NULL)
1020e60b2fcbSGleb Smirnoff 		taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
102154503a13SJonathan T. Looney }
102254503a13SJonathan T. Looney 
10238355f576SJeff Roberson /*
10248355f576SJeff Roberson  * Routine called by timeout which is used to fire off some time interval
10259643769aSJeff Roberson  * based calculations.  (stats, hash size, etc.)
10268355f576SJeff Roberson  *
10278355f576SJeff Roberson  * Arguments:
10288355f576SJeff Roberson  *	arg   Unused
10298355f576SJeff Roberson  *
10308355f576SJeff Roberson  * Returns:
10318355f576SJeff Roberson  *	Nothing
10328355f576SJeff Roberson  */
10338355f576SJeff Roberson static void
10348355f576SJeff Roberson uma_timeout(void *unused)
10358355f576SJeff Roberson {
103686bbae32SJeff Roberson 	bucket_enable();
103720a4e154SJeff Roberson 	zone_foreach(zone_timeout, NULL);
10388355f576SJeff Roberson 
10398355f576SJeff Roberson 	/* Reschedule this event */
10409643769aSJeff Roberson 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
10418355f576SJeff Roberson }
10428355f576SJeff Roberson 
10438355f576SJeff Roberson /*
10440f9b7bf3SMark Johnston  * Update the working set size estimate for the zone's bucket cache.
10450f9b7bf3SMark Johnston  * The constants chosen here are somewhat arbitrary.  With an update period of
10460f9b7bf3SMark Johnston  * 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the
10470f9b7bf3SMark Johnston  * last 100s.
10480f9b7bf3SMark Johnston  */
10490f9b7bf3SMark Johnston static void
10500f9b7bf3SMark Johnston zone_domain_update_wss(uma_zone_domain_t zdom)
10510f9b7bf3SMark Johnston {
10520f9b7bf3SMark Johnston 	long wss;
10530f9b7bf3SMark Johnston 
1054c6fd3e23SJeff Roberson 	ZDOM_LOCK(zdom);
10550f9b7bf3SMark Johnston 	MPASS(zdom->uzd_imax >= zdom->uzd_imin);
10560f9b7bf3SMark Johnston 	wss = zdom->uzd_imax - zdom->uzd_imin;
10570f9b7bf3SMark Johnston 	zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
105808cfa56eSMark Johnston 	zdom->uzd_wss = (4 * wss + zdom->uzd_wss) / 5;
1059c6fd3e23SJeff Roberson 	ZDOM_UNLOCK(zdom);
10600f9b7bf3SMark Johnston }
10610f9b7bf3SMark Johnston 
10620f9b7bf3SMark Johnston /*
10639643769aSJeff Roberson  * Routine to perform timeout driven calculations.  This expands the
10649643769aSJeff Roberson  * hashes and does per cpu statistics aggregation.
10658355f576SJeff Roberson  *
1066e20a199fSJeff Roberson  *  Returns nothing.
10678355f576SJeff Roberson  */
10688355f576SJeff Roberson static void
106920a4e154SJeff Roberson zone_timeout(uma_zone_t zone, void *unused)
10708355f576SJeff Roberson {
107108034d10SKonstantin Belousov 	uma_keg_t keg;
10728b987a77SJeff Roberson 	u_int slabs, pages;
10738355f576SJeff Roberson 
107454c5ae80SRyan Libby 	if ((zone->uz_flags & UMA_ZFLAG_HASH) == 0)
107508034d10SKonstantin Belousov 		goto update_wss;
107608034d10SKonstantin Belousov 
107708034d10SKonstantin Belousov 	keg = zone->uz_keg;
10788b987a77SJeff Roberson 
10798b987a77SJeff Roberson 	/*
10808b987a77SJeff Roberson 	 * Hash zones are non-numa by definition so the first domain
10818b987a77SJeff Roberson 	 * is the only one present.
10828b987a77SJeff Roberson 	 */
10838b987a77SJeff Roberson 	KEG_LOCK(keg, 0);
10848b987a77SJeff Roberson 	pages = keg->uk_domain[0].ud_pages;
10858b987a77SJeff Roberson 
10868355f576SJeff Roberson 	/*
1087e20a199fSJeff Roberson 	 * Expand the keg hash table.
10888355f576SJeff Roberson 	 *
10898355f576SJeff Roberson 	 * This is done if the number of slabs is larger than the hash size.
10908355f576SJeff Roberson 	 * What I'm trying to do here is completely reduce collisions.  This
10918355f576SJeff Roberson 	 * may be a little aggressive.  Should I allow for two collisions max?
10928355f576SJeff Roberson 	 */
10938b987a77SJeff Roberson 	if ((slabs = pages / keg->uk_ppera) > keg->uk_hash.uh_hashsize) {
10940aef6126SJeff Roberson 		struct uma_hash newhash;
10950aef6126SJeff Roberson 		struct uma_hash oldhash;
10960aef6126SJeff Roberson 		int ret;
10975300d9ddSJeff Roberson 
10980aef6126SJeff Roberson 		/*
10990aef6126SJeff Roberson 		 * This is so involved because allocating and freeing
1100e20a199fSJeff Roberson 		 * while the keg lock is held will lead to deadlock.
11010aef6126SJeff Roberson 		 * I have to do everything in stages and check for
11020aef6126SJeff Roberson 		 * races.
11030aef6126SJeff Roberson 		 */
11048b987a77SJeff Roberson 		KEG_UNLOCK(keg, 0);
11053b2f2cb8SAlexander Motin 		ret = hash_alloc(&newhash, 1 << fls(slabs));
11068b987a77SJeff Roberson 		KEG_LOCK(keg, 0);
11070aef6126SJeff Roberson 		if (ret) {
1108099a0e58SBosko Milekic 			if (hash_expand(&keg->uk_hash, &newhash)) {
1109099a0e58SBosko Milekic 				oldhash = keg->uk_hash;
1110099a0e58SBosko Milekic 				keg->uk_hash = newhash;
11110aef6126SJeff Roberson 			} else
11120aef6126SJeff Roberson 				oldhash = newhash;
11130aef6126SJeff Roberson 
11148b987a77SJeff Roberson 			KEG_UNLOCK(keg, 0);
11150aef6126SJeff Roberson 			hash_free(&oldhash);
11168b987a77SJeff Roberson 			goto update_wss;
11170aef6126SJeff Roberson 		}
11185300d9ddSJeff Roberson 	}
11198b987a77SJeff Roberson 	KEG_UNLOCK(keg, 0);
1120e20a199fSJeff Roberson 
112108034d10SKonstantin Belousov update_wss:
1122bb15d1c7SGleb Smirnoff 	for (int i = 0; i < vm_ndomains; i++)
1123c6fd3e23SJeff Roberson 		zone_domain_update_wss(ZDOM_GET(zone, i));
11248355f576SJeff Roberson }
11258355f576SJeff Roberson 
11268355f576SJeff Roberson /*
11275300d9ddSJeff Roberson  * Allocate and zero fill the next sized hash table from the appropriate
11285300d9ddSJeff Roberson  * backing store.
11295300d9ddSJeff Roberson  *
11305300d9ddSJeff Roberson  * Arguments:
11310aef6126SJeff Roberson  *	hash  A new hash structure with the old hash size in uh_hashsize
11325300d9ddSJeff Roberson  *
11335300d9ddSJeff Roberson  * Returns:
1134763df3ecSPedro F. Giffuni  *	1 on success and 0 on failure.
11355300d9ddSJeff Roberson  */
113637c84183SPoul-Henning Kamp static int
11373b2f2cb8SAlexander Motin hash_alloc(struct uma_hash *hash, u_int size)
11385300d9ddSJeff Roberson {
113959568a0eSAlexander Motin 	size_t alloc;
11405300d9ddSJeff Roberson 
11413b2f2cb8SAlexander Motin 	KASSERT(powerof2(size), ("hash size must be power of 2"));
11423b2f2cb8SAlexander Motin 	if (size > UMA_HASH_SIZE_INIT)  {
11433b2f2cb8SAlexander Motin 		hash->uh_hashsize = size;
11440aef6126SJeff Roberson 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
11451e0701e1SJeff Roberson 		hash->uh_slab_hash = malloc(alloc, M_UMAHASH, M_NOWAIT);
11465300d9ddSJeff Roberson 	} else {
11470aef6126SJeff Roberson 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
1148e20a199fSJeff Roberson 		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
1149ab3185d1SJeff Roberson 		    UMA_ANYDOMAIN, M_WAITOK);
11500aef6126SJeff Roberson 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
11515300d9ddSJeff Roberson 	}
11520aef6126SJeff Roberson 	if (hash->uh_slab_hash) {
11530aef6126SJeff Roberson 		bzero(hash->uh_slab_hash, alloc);
11540aef6126SJeff Roberson 		hash->uh_hashmask = hash->uh_hashsize - 1;
11550aef6126SJeff Roberson 		return (1);
11560aef6126SJeff Roberson 	}
11575300d9ddSJeff Roberson 
11580aef6126SJeff Roberson 	return (0);
11595300d9ddSJeff Roberson }
11605300d9ddSJeff Roberson 
11615300d9ddSJeff Roberson /*
116264f051e9SJeff Roberson  * Expands the hash table for HASH zones.  This is done from zone_timeout
116364f051e9SJeff Roberson  * to reduce collisions.  This must not be done in the regular allocation
116464f051e9SJeff Roberson  * path, otherwise, we can recurse on the vm while allocating pages.
11658355f576SJeff Roberson  *
11668355f576SJeff Roberson  * Arguments:
11670aef6126SJeff Roberson  *	oldhash  The hash you want to expand
11680aef6126SJeff Roberson  *	newhash  The hash structure for the new table
11698355f576SJeff Roberson  *
11708355f576SJeff Roberson  * Returns:
11718355f576SJeff Roberson  *	Nothing
11728355f576SJeff Roberson  *
11738355f576SJeff Roberson  * Discussion:
11748355f576SJeff Roberson  */
11750aef6126SJeff Roberson static int
11760aef6126SJeff Roberson hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
11778355f576SJeff Roberson {
11781e0701e1SJeff Roberson 	uma_hash_slab_t slab;
11796929b7d1SPedro F. Giffuni 	u_int hval;
11806929b7d1SPedro F. Giffuni 	u_int idx;
11818355f576SJeff Roberson 
11820aef6126SJeff Roberson 	if (!newhash->uh_slab_hash)
11830aef6126SJeff Roberson 		return (0);
11848355f576SJeff Roberson 
11850aef6126SJeff Roberson 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
11860aef6126SJeff Roberson 		return (0);
11878355f576SJeff Roberson 
11888355f576SJeff Roberson 	/*
11898355f576SJeff Roberson 	 * I need to investigate hash algorithms for resizing without a
11908355f576SJeff Roberson 	 * full rehash.
11918355f576SJeff Roberson 	 */
11928355f576SJeff Roberson 
11936929b7d1SPedro F. Giffuni 	for (idx = 0; idx < oldhash->uh_hashsize; idx++)
11941e0701e1SJeff Roberson 		while (!LIST_EMPTY(&oldhash->uh_slab_hash[idx])) {
11951e0701e1SJeff Roberson 			slab = LIST_FIRST(&oldhash->uh_slab_hash[idx]);
11961e0701e1SJeff Roberson 			LIST_REMOVE(slab, uhs_hlink);
11971e0701e1SJeff Roberson 			hval = UMA_HASH(newhash, slab->uhs_data);
11981e0701e1SJeff Roberson 			LIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
11991e0701e1SJeff Roberson 			    slab, uhs_hlink);
12008355f576SJeff Roberson 		}
12018355f576SJeff Roberson 
12020aef6126SJeff Roberson 	return (1);
12039c2cd7e5SJeff Roberson }
12049c2cd7e5SJeff Roberson 
12055300d9ddSJeff Roberson /*
12065300d9ddSJeff Roberson  * Free the hash bucket to the appropriate backing store.
12075300d9ddSJeff Roberson  *
12085300d9ddSJeff Roberson  * Arguments:
12095300d9ddSJeff Roberson  *	slab_hash  The hash bucket we're freeing
12105300d9ddSJeff Roberson  *	hashsize   The number of entries in that hash bucket
12115300d9ddSJeff Roberson  *
12125300d9ddSJeff Roberson  * Returns:
12135300d9ddSJeff Roberson  *	Nothing
12145300d9ddSJeff Roberson  */
12159c2cd7e5SJeff Roberson static void
12160aef6126SJeff Roberson hash_free(struct uma_hash *hash)
12179c2cd7e5SJeff Roberson {
12180aef6126SJeff Roberson 	if (hash->uh_slab_hash == NULL)
12190aef6126SJeff Roberson 		return;
12200aef6126SJeff Roberson 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
12210095a784SJeff Roberson 		zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
12228355f576SJeff Roberson 	else
1223961647dfSJeff Roberson 		free(hash->uh_slab_hash, M_UMAHASH);
12248355f576SJeff Roberson }
12258355f576SJeff Roberson 
12268355f576SJeff Roberson /*
12278355f576SJeff Roberson  * Frees all outstanding items in a bucket
12288355f576SJeff Roberson  *
12298355f576SJeff Roberson  * Arguments:
12308355f576SJeff Roberson  *	zone   The zone to free to, must be unlocked.
12314bd61e19SJeff Roberson  *	bucket The free/alloc bucket with items.
12328355f576SJeff Roberson  *
12338355f576SJeff Roberson  * Returns:
12348355f576SJeff Roberson  *	Nothing
12358355f576SJeff Roberson  */
12368355f576SJeff Roberson static void
12378355f576SJeff Roberson bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
12388355f576SJeff Roberson {
12390095a784SJeff Roberson 	int i;
12408355f576SJeff Roberson 
1241c6fd3e23SJeff Roberson 	if (bucket->ub_cnt == 0)
12428355f576SJeff Roberson 		return;
12438355f576SJeff Roberson 
1244d4665eaaSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0 &&
1245d4665eaaSJeff Roberson 	    bucket->ub_seq != SMR_SEQ_INVALID) {
1246d4665eaaSJeff Roberson 		smr_wait(zone->uz_smr, bucket->ub_seq);
1247543117beSJeff Roberson 		bucket->ub_seq = SMR_SEQ_INVALID;
1248d4665eaaSJeff Roberson 		for (i = 0; i < bucket->ub_cnt; i++)
1249d4665eaaSJeff Roberson 			item_dtor(zone, bucket->ub_bucket[i],
1250d4665eaaSJeff Roberson 			    zone->uz_size, NULL, SKIP_NONE);
1251d4665eaaSJeff Roberson 	}
12520095a784SJeff Roberson 	if (zone->uz_fini)
125309c8cb71SMark Johnston 		for (i = 0; i < bucket->ub_cnt; i++) {
125409c8cb71SMark Johnston 			kasan_mark_item_valid(zone, bucket->ub_bucket[i]);
12550095a784SJeff Roberson 			zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
125609c8cb71SMark Johnston 			kasan_mark_item_invalid(zone, bucket->ub_bucket[i]);
125709c8cb71SMark Johnston 		}
12580095a784SJeff Roberson 	zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
12594bd61e19SJeff Roberson 	if (zone->uz_max_items > 0)
12604bd61e19SJeff Roberson 		zone_free_limit(zone, bucket->ub_cnt);
1261d4665eaaSJeff Roberson #ifdef INVARIANTS
1262d4665eaaSJeff Roberson 	bzero(bucket->ub_bucket, sizeof(void *) * bucket->ub_cnt);
1263d4665eaaSJeff Roberson #endif
12640095a784SJeff Roberson 	bucket->ub_cnt = 0;
12658355f576SJeff Roberson }
12668355f576SJeff Roberson 
12678355f576SJeff Roberson /*
12688355f576SJeff Roberson  * Drains the per cpu caches for a zone.
12698355f576SJeff Roberson  *
1270727c6918SJeff Roberson  * NOTE: This may only be called while the zone is being torn down, and not
12715d1ae027SRobert Watson  * during normal operation.  This is necessary in order that we do not have
12725d1ae027SRobert Watson  * to migrate CPUs to drain the per-CPU caches.
12735d1ae027SRobert Watson  *
12748355f576SJeff Roberson  * Arguments:
12758355f576SJeff Roberson  *	zone     The zone to drain, must be unlocked.
12768355f576SJeff Roberson  *
12778355f576SJeff Roberson  * Returns:
12788355f576SJeff Roberson  *	Nothing
12798355f576SJeff Roberson  */
12808355f576SJeff Roberson static void
12819643769aSJeff Roberson cache_drain(uma_zone_t zone)
12828355f576SJeff Roberson {
12838355f576SJeff Roberson 	uma_cache_t cache;
1284376b1ba3SJeff Roberson 	uma_bucket_t bucket;
1285543117beSJeff Roberson 	smr_seq_t seq;
12868355f576SJeff Roberson 	int cpu;
12878355f576SJeff Roberson 
12888355f576SJeff Roberson 	/*
12895d1ae027SRobert Watson 	 * XXX: It is safe to not lock the per-CPU caches, because we're
12905d1ae027SRobert Watson 	 * tearing down the zone anyway.  I.e., there will be no further use
12915d1ae027SRobert Watson 	 * of the caches at this point.
12925d1ae027SRobert Watson 	 *
12935d1ae027SRobert Watson 	 * XXX: It would good to be able to assert that the zone is being
12945d1ae027SRobert Watson 	 * torn down to prevent improper use of cache_drain().
12958355f576SJeff Roberson 	 */
1296543117beSJeff Roberson 	seq = SMR_SEQ_INVALID;
1297543117beSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
1298226dd6dbSJeff Roberson 		seq = smr_advance(zone->uz_smr);
12993aa6d94eSJohn Baldwin 	CPU_FOREACH(cpu) {
13008355f576SJeff Roberson 		cache = &zone->uz_cpu[cpu];
1301376b1ba3SJeff Roberson 		bucket = cache_bucket_unload_alloc(cache);
1302c6fd3e23SJeff Roberson 		if (bucket != NULL)
1303376b1ba3SJeff Roberson 			bucket_free(zone, bucket, NULL);
1304376b1ba3SJeff Roberson 		bucket = cache_bucket_unload_free(cache);
1305376b1ba3SJeff Roberson 		if (bucket != NULL) {
1306543117beSJeff Roberson 			bucket->ub_seq = seq;
1307376b1ba3SJeff Roberson 			bucket_free(zone, bucket, NULL);
1308376b1ba3SJeff Roberson 		}
1309376b1ba3SJeff Roberson 		bucket = cache_bucket_unload_cross(cache);
1310376b1ba3SJeff Roberson 		if (bucket != NULL) {
1311543117beSJeff Roberson 			bucket->ub_seq = seq;
1312376b1ba3SJeff Roberson 			bucket_free(zone, bucket, NULL);
1313376b1ba3SJeff Roberson 		}
1314d56368d7SBosko Milekic 	}
1315*aabe13f1SMark Johnston 	bucket_cache_reclaim(zone, true, UMA_ANYDOMAIN);
1316aaa8bb16SJeff Roberson }
1317aaa8bb16SJeff Roberson 
1318a2de44abSAlexander Motin static void
131920a4e154SJeff Roberson cache_shrink(uma_zone_t zone, void *unused)
1320a2de44abSAlexander Motin {
1321a2de44abSAlexander Motin 
1322a2de44abSAlexander Motin 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1323a2de44abSAlexander Motin 		return;
1324a2de44abSAlexander Motin 
1325*aabe13f1SMark Johnston 	ZONE_LOCK(zone);
132620a4e154SJeff Roberson 	zone->uz_bucket_size =
132720a4e154SJeff Roberson 	    (zone->uz_bucket_size_min + zone->uz_bucket_size) / 2;
1328*aabe13f1SMark Johnston 	ZONE_UNLOCK(zone);
1329a2de44abSAlexander Motin }
1330a2de44abSAlexander Motin 
1331a2de44abSAlexander Motin static void
133220a4e154SJeff Roberson cache_drain_safe_cpu(uma_zone_t zone, void *unused)
1333a2de44abSAlexander Motin {
1334a2de44abSAlexander Motin 	uma_cache_t cache;
1335c1685086SJeff Roberson 	uma_bucket_t b1, b2, b3;
1336ab3185d1SJeff Roberson 	int domain;
1337a2de44abSAlexander Motin 
1338a2de44abSAlexander Motin 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1339a2de44abSAlexander Motin 		return;
1340a2de44abSAlexander Motin 
1341c1685086SJeff Roberson 	b1 = b2 = b3 = NULL;
1342a2de44abSAlexander Motin 	critical_enter();
1343a2de44abSAlexander Motin 	cache = &zone->uz_cpu[curcpu];
1344c6fd3e23SJeff Roberson 	domain = PCPU_GET(domain);
1345376b1ba3SJeff Roberson 	b1 = cache_bucket_unload_alloc(cache);
1346d4665eaaSJeff Roberson 
1347d4665eaaSJeff Roberson 	/*
1348d4665eaaSJeff Roberson 	 * Don't flush SMR zone buckets.  This leaves the zone without a
1349d4665eaaSJeff Roberson 	 * bucket and forces every free to synchronize().
1350d4665eaaSJeff Roberson 	 */
1351543117beSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) == 0) {
1352376b1ba3SJeff Roberson 		b2 = cache_bucket_unload_free(cache);
1353543117beSJeff Roberson 		b3 = cache_bucket_unload_cross(cache);
1354543117beSJeff Roberson 	}
1355543117beSJeff Roberson 	critical_exit();
1356543117beSJeff Roberson 
1357543117beSJeff Roberson 	if (b1 != NULL)
1358c6fd3e23SJeff Roberson 		zone_free_bucket(zone, b1, NULL, domain, false);
1359543117beSJeff Roberson 	if (b2 != NULL)
1360c6fd3e23SJeff Roberson 		zone_free_bucket(zone, b2, NULL, domain, false);
1361543117beSJeff Roberson 	if (b3 != NULL) {
1362c6fd3e23SJeff Roberson 		/* Adjust the domain so it goes to zone_free_cross. */
1363c6fd3e23SJeff Roberson 		domain = (domain + 1) % vm_ndomains;
1364c6fd3e23SJeff Roberson 		zone_free_bucket(zone, b3, NULL, domain, false);
1365c1685086SJeff Roberson 	}
1366a2de44abSAlexander Motin }
1367a2de44abSAlexander Motin 
1368a2de44abSAlexander Motin /*
1369a2de44abSAlexander Motin  * Safely drain per-CPU caches of a zone(s) to alloc bucket.
1370a2de44abSAlexander Motin  * This is an expensive call because it needs to bind to all CPUs
1371a2de44abSAlexander Motin  * one by one and enter a critical section on each of them in order
1372a2de44abSAlexander Motin  * to safely access their cache buckets.
1373a2de44abSAlexander Motin  * Zone lock must not be held on call this function.
1374a2de44abSAlexander Motin  */
1375a2de44abSAlexander Motin static void
137608cfa56eSMark Johnston pcpu_cache_drain_safe(uma_zone_t zone)
1377a2de44abSAlexander Motin {
1378a2de44abSAlexander Motin 	int cpu;
1379a2de44abSAlexander Motin 
1380a2de44abSAlexander Motin 	/*
1381727c6918SJeff Roberson 	 * Polite bucket sizes shrinking was not enough, shrink aggressively.
1382a2de44abSAlexander Motin 	 */
1383a2de44abSAlexander Motin 	if (zone)
138420a4e154SJeff Roberson 		cache_shrink(zone, NULL);
1385a2de44abSAlexander Motin 	else
138620a4e154SJeff Roberson 		zone_foreach(cache_shrink, NULL);
1387a2de44abSAlexander Motin 
1388a2de44abSAlexander Motin 	CPU_FOREACH(cpu) {
1389a2de44abSAlexander Motin 		thread_lock(curthread);
1390a2de44abSAlexander Motin 		sched_bind(curthread, cpu);
1391a2de44abSAlexander Motin 		thread_unlock(curthread);
1392a2de44abSAlexander Motin 
1393a2de44abSAlexander Motin 		if (zone)
139420a4e154SJeff Roberson 			cache_drain_safe_cpu(zone, NULL);
1395a2de44abSAlexander Motin 		else
139620a4e154SJeff Roberson 			zone_foreach(cache_drain_safe_cpu, NULL);
1397a2de44abSAlexander Motin 	}
1398a2de44abSAlexander Motin 	thread_lock(curthread);
1399a2de44abSAlexander Motin 	sched_unbind(curthread);
1400a2de44abSAlexander Motin 	thread_unlock(curthread);
1401a2de44abSAlexander Motin }
1402a2de44abSAlexander Motin 
1403aaa8bb16SJeff Roberson /*
140408cfa56eSMark Johnston  * Reclaim cached buckets from a zone.  All buckets are reclaimed if the caller
140508cfa56eSMark Johnston  * requested a drain, otherwise the per-domain caches are trimmed to either
140608cfa56eSMark Johnston  * estimated working set size.
1407aaa8bb16SJeff Roberson  */
1408aaa8bb16SJeff Roberson static void
140954f421f9SMark Johnston bucket_cache_reclaim_domain(uma_zone_t zone, bool drain, int domain)
1410aaa8bb16SJeff Roberson {
1411ab3185d1SJeff Roberson 	uma_zone_domain_t zdom;
1412aaa8bb16SJeff Roberson 	uma_bucket_t bucket;
1413c6fd3e23SJeff Roberson 	long target;
14148355f576SJeff Roberson 
1415c6fd3e23SJeff Roberson 	/*
141691d947bfSJeff Roberson 	 * The cross bucket is partially filled and not part of
141791d947bfSJeff Roberson 	 * the item count.  Reclaim it individually here.
141891d947bfSJeff Roberson 	 */
141954f421f9SMark Johnston 	zdom = ZDOM_GET(zone, domain);
1420226dd6dbSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) == 0 || drain) {
142191d947bfSJeff Roberson 		ZONE_CROSS_LOCK(zone);
142291d947bfSJeff Roberson 		bucket = zdom->uzd_cross;
142391d947bfSJeff Roberson 		zdom->uzd_cross = NULL;
142491d947bfSJeff Roberson 		ZONE_CROSS_UNLOCK(zone);
1425c6fd3e23SJeff Roberson 		if (bucket != NULL)
142691d947bfSJeff Roberson 			bucket_free(zone, bucket, NULL);
142791d947bfSJeff Roberson 	}
142891d947bfSJeff Roberson 
142991d947bfSJeff Roberson 	/*
143008cfa56eSMark Johnston 	 * If we were asked to drain the zone, we are done only once
143108cfa56eSMark Johnston 	 * this bucket cache is empty.  Otherwise, we reclaim items in
143208cfa56eSMark Johnston 	 * excess of the zone's estimated working set size.  If the
143308cfa56eSMark Johnston 	 * difference nitems - imin is larger than the WSS estimate,
143408cfa56eSMark Johnston 	 * then the estimate will grow at the end of this interval and
143508cfa56eSMark Johnston 	 * we ignore the historical average.
143608cfa56eSMark Johnston 	 */
1437c6fd3e23SJeff Roberson 	ZDOM_LOCK(zdom);
143808cfa56eSMark Johnston 	target = drain ? 0 : lmax(zdom->uzd_wss, zdom->uzd_nitems -
143908cfa56eSMark Johnston 	    zdom->uzd_imin);
144008cfa56eSMark Johnston 	while (zdom->uzd_nitems > target) {
1441c6fd3e23SJeff Roberson 		bucket = zone_fetch_bucket(zone, zdom, true);
144208cfa56eSMark Johnston 		if (bucket == NULL)
144308cfa56eSMark Johnston 			break;
14446fd34d6fSJeff Roberson 		bucket_free(zone, bucket, NULL);
1445c6fd3e23SJeff Roberson 		ZDOM_LOCK(zdom);
14468355f576SJeff Roberson 	}
1447c6fd3e23SJeff Roberson 	ZDOM_UNLOCK(zdom);
1448ab3185d1SJeff Roberson }
144954f421f9SMark Johnston 
145054f421f9SMark Johnston static void
1451*aabe13f1SMark Johnston bucket_cache_reclaim(uma_zone_t zone, bool drain, int domain)
145254f421f9SMark Johnston {
145354f421f9SMark Johnston 	int i;
145454f421f9SMark Johnston 
145554f421f9SMark Johnston 	/*
145654f421f9SMark Johnston 	 * Shrink the zone bucket size to ensure that the per-CPU caches
145754f421f9SMark Johnston 	 * don't grow too large.
145854f421f9SMark Johnston 	 */
145954f421f9SMark Johnston 	if (zone->uz_bucket_size > zone->uz_bucket_size_min)
146054f421f9SMark Johnston 		zone->uz_bucket_size--;
146154f421f9SMark Johnston 
1462*aabe13f1SMark Johnston 	if (domain != UMA_ANYDOMAIN &&
1463*aabe13f1SMark Johnston 	    (zone->uz_flags & UMA_ZONE_ROUNDROBIN) == 0) {
1464*aabe13f1SMark Johnston 		bucket_cache_reclaim_domain(zone, drain, domain);
1465*aabe13f1SMark Johnston 	} else {
146654f421f9SMark Johnston 		for (i = 0; i < vm_ndomains; i++)
146754f421f9SMark Johnston 			bucket_cache_reclaim_domain(zone, drain, i);
14688355f576SJeff Roberson 	}
1469*aabe13f1SMark Johnston }
1470fc03d22bSJeff Roberson 
1471fc03d22bSJeff Roberson static void
1472fc03d22bSJeff Roberson keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
1473fc03d22bSJeff Roberson {
1474fc03d22bSJeff Roberson 	uint8_t *mem;
147509c8cb71SMark Johnston 	size_t size;
1476fc03d22bSJeff Roberson 	int i;
1477fc03d22bSJeff Roberson 	uint8_t flags;
1478fc03d22bSJeff Roberson 
14791431a748SGleb Smirnoff 	CTR4(KTR_UMA, "keg_free_slab keg %s(%p) slab %p, returning %d bytes",
14801431a748SGleb Smirnoff 	    keg->uk_name, keg, slab, PAGE_SIZE * keg->uk_ppera);
14811431a748SGleb Smirnoff 
14821e0701e1SJeff Roberson 	mem = slab_data(slab, keg);
148309c8cb71SMark Johnston 	size = PAGE_SIZE * keg->uk_ppera;
148409c8cb71SMark Johnston 
148509c8cb71SMark Johnston 	kasan_mark_slab_valid(keg, mem);
1486fc03d22bSJeff Roberson 	if (keg->uk_fini != NULL) {
148709c8cb71SMark Johnston 		for (i = start - 1; i > -1; i--)
1488c5deaf04SGleb Smirnoff #ifdef INVARIANTS
1489c5deaf04SGleb Smirnoff 		/*
1490c5deaf04SGleb Smirnoff 		 * trash_fini implies that dtor was trash_dtor. trash_fini
1491c5deaf04SGleb Smirnoff 		 * would check that memory hasn't been modified since free,
1492c5deaf04SGleb Smirnoff 		 * which executed trash_dtor.
1493c5deaf04SGleb Smirnoff 		 * That's why we need to run uma_dbg_kskip() check here,
1494c5deaf04SGleb Smirnoff 		 * albeit we don't make skip check for other init/fini
1495c5deaf04SGleb Smirnoff 		 * invocations.
1496c5deaf04SGleb Smirnoff 		 */
14971e0701e1SJeff Roberson 		if (!uma_dbg_kskip(keg, slab_item(slab, keg, i)) ||
1498c5deaf04SGleb Smirnoff 		    keg->uk_fini != trash_fini)
1499c5deaf04SGleb Smirnoff #endif
15001e0701e1SJeff Roberson 			keg->uk_fini(slab_item(slab, keg, i), keg->uk_size);
1501fc03d22bSJeff Roberson 	}
150209c8cb71SMark Johnston 	flags = slab->us_flags;
150309c8cb71SMark Johnston 	if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) {
15049b8db4d0SRyan Libby 		zone_free_item(slabzone(keg->uk_ipers), slab_tohashslab(slab),
15059b8db4d0SRyan Libby 		    NULL, SKIP_NONE);
150609c8cb71SMark Johnston 	}
150709c8cb71SMark Johnston 	keg->uk_freef(mem, size, flags);
150809c8cb71SMark Johnston 	uma_total_dec(size);
15098355f576SJeff Roberson }
15108355f576SJeff Roberson 
1511f09cbea3SMark Johnston static void
1512f09cbea3SMark Johnston keg_drain_domain(uma_keg_t keg, int domain)
1513f09cbea3SMark Johnston {
1514f09cbea3SMark Johnston 	struct slabhead freeslabs;
1515f09cbea3SMark Johnston 	uma_domain_t dom;
1516f09cbea3SMark Johnston 	uma_slab_t slab, tmp;
1517f09cbea3SMark Johnston 	uint32_t i, stofree, stokeep, partial;
1518f09cbea3SMark Johnston 
1519f09cbea3SMark Johnston 	dom = &keg->uk_domain[domain];
1520f09cbea3SMark Johnston 	LIST_INIT(&freeslabs);
1521f09cbea3SMark Johnston 
1522f09cbea3SMark Johnston 	CTR4(KTR_UMA, "keg_drain %s(%p) domain %d free items: %u",
1523575a4437SEd Maste 	    keg->uk_name, keg, domain, dom->ud_free_items);
1524f09cbea3SMark Johnston 
1525f09cbea3SMark Johnston 	KEG_LOCK(keg, domain);
1526f09cbea3SMark Johnston 
1527f09cbea3SMark Johnston 	/*
1528f09cbea3SMark Johnston 	 * Are the free items in partially allocated slabs sufficient to meet
1529f09cbea3SMark Johnston 	 * the reserve? If not, compute the number of fully free slabs that must
1530f09cbea3SMark Johnston 	 * be kept.
1531f09cbea3SMark Johnston 	 */
1532f09cbea3SMark Johnston 	partial = dom->ud_free_items - dom->ud_free_slabs * keg->uk_ipers;
1533f09cbea3SMark Johnston 	if (partial < keg->uk_reserve) {
1534f09cbea3SMark Johnston 		stokeep = min(dom->ud_free_slabs,
1535f09cbea3SMark Johnston 		    howmany(keg->uk_reserve - partial, keg->uk_ipers));
1536f09cbea3SMark Johnston 	} else {
1537f09cbea3SMark Johnston 		stokeep = 0;
1538f09cbea3SMark Johnston 	}
1539f09cbea3SMark Johnston 	stofree = dom->ud_free_slabs - stokeep;
1540f09cbea3SMark Johnston 
1541f09cbea3SMark Johnston 	/*
1542f09cbea3SMark Johnston 	 * Partition the free slabs into two sets: those that must be kept in
1543f09cbea3SMark Johnston 	 * order to maintain the reserve, and those that may be released back to
1544f09cbea3SMark Johnston 	 * the system.  Since one set may be much larger than the other,
1545f09cbea3SMark Johnston 	 * populate the smaller of the two sets and swap them if necessary.
1546f09cbea3SMark Johnston 	 */
1547f09cbea3SMark Johnston 	for (i = min(stofree, stokeep); i > 0; i--) {
1548f09cbea3SMark Johnston 		slab = LIST_FIRST(&dom->ud_free_slab);
1549f09cbea3SMark Johnston 		LIST_REMOVE(slab, us_link);
1550f09cbea3SMark Johnston 		LIST_INSERT_HEAD(&freeslabs, slab, us_link);
1551f09cbea3SMark Johnston 	}
1552f09cbea3SMark Johnston 	if (stofree > stokeep)
1553f09cbea3SMark Johnston 		LIST_SWAP(&freeslabs, &dom->ud_free_slab, uma_slab, us_link);
1554f09cbea3SMark Johnston 
1555f09cbea3SMark Johnston 	if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0) {
1556f09cbea3SMark Johnston 		LIST_FOREACH(slab, &freeslabs, us_link)
1557f09cbea3SMark Johnston 			UMA_HASH_REMOVE(&keg->uk_hash, slab);
1558f09cbea3SMark Johnston 	}
1559f09cbea3SMark Johnston 	dom->ud_free_items -= stofree * keg->uk_ipers;
1560f09cbea3SMark Johnston 	dom->ud_free_slabs -= stofree;
1561f09cbea3SMark Johnston 	dom->ud_pages -= stofree * keg->uk_ppera;
1562f09cbea3SMark Johnston 	KEG_UNLOCK(keg, domain);
1563f09cbea3SMark Johnston 
1564f09cbea3SMark Johnston 	LIST_FOREACH_SAFE(slab, &freeslabs, us_link, tmp)
1565f09cbea3SMark Johnston 		keg_free_slab(keg, slab, keg->uk_ipers);
1566f09cbea3SMark Johnston }
1567f09cbea3SMark Johnston 
15688355f576SJeff Roberson /*
1569e20a199fSJeff Roberson  * Frees pages from a keg back to the system.  This is done on demand from
15708355f576SJeff Roberson  * the pageout daemon.
15718355f576SJeff Roberson  *
1572e20a199fSJeff Roberson  * Returns nothing.
15738355f576SJeff Roberson  */
1574e20a199fSJeff Roberson static void
1575*aabe13f1SMark Johnston keg_drain(uma_keg_t keg, int domain)
15768355f576SJeff Roberson {
1577f09cbea3SMark Johnston 	int i;
15788355f576SJeff Roberson 
1579f09cbea3SMark Johnston 	if ((keg->uk_flags & UMA_ZONE_NOFREE) != 0)
15808355f576SJeff Roberson 		return;
1581*aabe13f1SMark Johnston 	if (domain != UMA_ANYDOMAIN) {
1582*aabe13f1SMark Johnston 		keg_drain_domain(keg, domain);
1583*aabe13f1SMark Johnston 	} else {
1584f09cbea3SMark Johnston 		for (i = 0; i < vm_ndomains; i++)
1585f09cbea3SMark Johnston 			keg_drain_domain(keg, i);
15868355f576SJeff Roberson 	}
1587*aabe13f1SMark Johnston }
15888355f576SJeff Roberson 
1589e20a199fSJeff Roberson static void
1590*aabe13f1SMark Johnston zone_reclaim(uma_zone_t zone, int domain, int waitok, bool drain)
1591e20a199fSJeff Roberson {
15928355f576SJeff Roberson 	/*
1593*aabe13f1SMark Johnston 	 * Count active reclaim operations in order to interlock with
1594*aabe13f1SMark Johnston 	 * zone_dtor(), which removes the zone from global lists before
1595*aabe13f1SMark Johnston 	 * attempting to reclaim items itself.
1596*aabe13f1SMark Johnston 	 *
1597*aabe13f1SMark Johnston 	 * The zone may be destroyed while sleeping, so only zone_dtor() should
1598*aabe13f1SMark Johnston 	 * specify M_WAITOK.
1599e20a199fSJeff Roberson 	 */
1600e20a199fSJeff Roberson 	ZONE_LOCK(zone);
1601*aabe13f1SMark Johnston 	if (waitok == M_WAITOK) {
1602*aabe13f1SMark Johnston 		while (zone->uz_reclaimers > 0)
1603*aabe13f1SMark Johnston 			msleep(zone, ZONE_LOCKPTR(zone), PVM, "zonedrain", 1);
1604e20a199fSJeff Roberson 	}
1605*aabe13f1SMark Johnston 	zone->uz_reclaimers++;
1606e20a199fSJeff Roberson 	ZONE_UNLOCK(zone);
1607*aabe13f1SMark Johnston 	bucket_cache_reclaim(zone, drain, domain);
160808cfa56eSMark Johnston 
160908034d10SKonstantin Belousov 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0)
1610*aabe13f1SMark Johnston 		keg_drain(zone->uz_keg, domain);
1611e20a199fSJeff Roberson 	ZONE_LOCK(zone);
1612*aabe13f1SMark Johnston 	zone->uz_reclaimers--;
1613*aabe13f1SMark Johnston 	if (zone->uz_reclaimers == 0)
1614e20a199fSJeff Roberson 		wakeup(zone);
1615e20a199fSJeff Roberson 	ZONE_UNLOCK(zone);
1616e20a199fSJeff Roberson }
1617e20a199fSJeff Roberson 
161808cfa56eSMark Johnston static void
1619*aabe13f1SMark Johnston zone_drain(uma_zone_t zone, void *arg)
1620e20a199fSJeff Roberson {
1621*aabe13f1SMark Johnston 	int domain;
1622e20a199fSJeff Roberson 
1623*aabe13f1SMark Johnston 	domain = (int)(uintptr_t)arg;
1624*aabe13f1SMark Johnston 	zone_reclaim(zone, domain, M_NOWAIT, true);
162508cfa56eSMark Johnston }
162608cfa56eSMark Johnston 
162708cfa56eSMark Johnston static void
1628*aabe13f1SMark Johnston zone_trim(uma_zone_t zone, void *arg)
162908cfa56eSMark Johnston {
1630*aabe13f1SMark Johnston 	int domain;
163108cfa56eSMark Johnston 
1632*aabe13f1SMark Johnston 	domain = (int)(uintptr_t)arg;
1633*aabe13f1SMark Johnston 	zone_reclaim(zone, domain, M_NOWAIT, false);
1634e20a199fSJeff Roberson }
1635e20a199fSJeff Roberson 
1636e20a199fSJeff Roberson /*
16378b987a77SJeff Roberson  * Allocate a new slab for a keg and inserts it into the partial slab list.
16388b987a77SJeff Roberson  * The keg should be unlocked on entry.  If the allocation succeeds it will
16398b987a77SJeff Roberson  * be locked on return.
16408355f576SJeff Roberson  *
16418355f576SJeff Roberson  * Arguments:
164286220393SMark Johnston  *	flags   Wait flags for the item initialization routine
164386220393SMark Johnston  *	aflags  Wait flags for the slab allocation
16448355f576SJeff Roberson  *
16458355f576SJeff Roberson  * Returns:
16468355f576SJeff Roberson  *	The slab that was allocated or NULL if there is no memory and the
16478355f576SJeff Roberson  *	caller specified M_NOWAIT.
16488355f576SJeff Roberson  */
16498355f576SJeff Roberson static uma_slab_t
165086220393SMark Johnston keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int flags,
165186220393SMark Johnston     int aflags)
16528355f576SJeff Roberson {
16538b987a77SJeff Roberson 	uma_domain_t dom;
1654099a0e58SBosko Milekic 	uma_slab_t slab;
16552e47807cSJeff Roberson 	unsigned long size;
165685dcf349SGleb Smirnoff 	uint8_t *mem;
165786220393SMark Johnston 	uint8_t sflags;
16588355f576SJeff Roberson 	int i;
16598355f576SJeff Roberson 
1660ab3185d1SJeff Roberson 	KASSERT(domain >= 0 && domain < vm_ndomains,
1661ab3185d1SJeff Roberson 	    ("keg_alloc_slab: domain %d out of range", domain));
1662a553d4b8SJeff Roberson 
1663194a979eSMark Johnston 	slab = NULL;
1664194a979eSMark Johnston 	mem = NULL;
166554c5ae80SRyan Libby 	if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) {
16669b8db4d0SRyan Libby 		uma_hash_slab_t hslab;
16679b8db4d0SRyan Libby 		hslab = zone_alloc_item(slabzone(keg->uk_ipers), NULL,
16689b8db4d0SRyan Libby 		    domain, aflags);
16699b8db4d0SRyan Libby 		if (hslab == NULL)
1670727c6918SJeff Roberson 			goto fail;
16719b8db4d0SRyan Libby 		slab = &hslab->uhs_slab;
1672a553d4b8SJeff Roberson 	}
1673a553d4b8SJeff Roberson 
16743370c5bfSJeff Roberson 	/*
16753370c5bfSJeff Roberson 	 * This reproduces the old vm_zone behavior of zero filling pages the
16763370c5bfSJeff Roberson 	 * first time they are added to a zone.
16773370c5bfSJeff Roberson 	 *
16783370c5bfSJeff Roberson 	 * Malloced items are zeroed in uma_zalloc.
16793370c5bfSJeff Roberson 	 */
16803370c5bfSJeff Roberson 
1681099a0e58SBosko Milekic 	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
168286220393SMark Johnston 		aflags |= M_ZERO;
16833370c5bfSJeff Roberson 	else
168486220393SMark Johnston 		aflags &= ~M_ZERO;
16853370c5bfSJeff Roberson 
1686263811f7SKip Macy 	if (keg->uk_flags & UMA_ZONE_NODUMP)
168786220393SMark Johnston 		aflags |= M_NODUMP;
1688263811f7SKip Macy 
1689e20a199fSJeff Roberson 	/* zone is passed for legacy reasons. */
1690194a979eSMark Johnston 	size = keg->uk_ppera * PAGE_SIZE;
169109c8cb71SMark Johnston 	mem = keg->uk_allocf(zone, size, domain, &sflags, aflags);
1692a553d4b8SJeff Roberson 	if (mem == NULL) {
169354c5ae80SRyan Libby 		if (keg->uk_flags & UMA_ZFLAG_OFFPAGE)
16949b8db4d0SRyan Libby 			zone_free_item(slabzone(keg->uk_ipers),
16959b8db4d0SRyan Libby 			    slab_tohashslab(slab), NULL, SKIP_NONE);
1696727c6918SJeff Roberson 		goto fail;
1697a553d4b8SJeff Roberson 	}
16982e47807cSJeff Roberson 	uma_total_inc(size);
16998355f576SJeff Roberson 
17008b987a77SJeff Roberson 	/* For HASH zones all pages go to the same uma_domain. */
170154c5ae80SRyan Libby 	if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0)
17028b987a77SJeff Roberson 		domain = 0;
17038b987a77SJeff Roberson 
17045c0e403bSJeff Roberson 	/* Point the slab into the allocated memory */
170554c5ae80SRyan Libby 	if (!(keg->uk_flags & UMA_ZFLAG_OFFPAGE))
1706099a0e58SBosko Milekic 		slab = (uma_slab_t)(mem + keg->uk_pgoff);
17071e0701e1SJeff Roberson 	else
17089b8db4d0SRyan Libby 		slab_tohashslab(slab)->uhs_data = mem;
17095c0e403bSJeff Roberson 
171054c5ae80SRyan Libby 	if (keg->uk_flags & UMA_ZFLAG_VTOSLAB)
1711099a0e58SBosko Milekic 		for (i = 0; i < keg->uk_ppera; i++)
1712584061b4SJeff Roberson 			vsetzoneslab((vm_offset_t)mem + (i * PAGE_SIZE),
1713584061b4SJeff Roberson 			    zone, slab);
17148355f576SJeff Roberson 
1715099a0e58SBosko Milekic 	slab->us_freecount = keg->uk_ipers;
171686220393SMark Johnston 	slab->us_flags = sflags;
1717ab3185d1SJeff Roberson 	slab->us_domain = domain;
17188b987a77SJeff Roberson 
17199b78b1f4SJeff Roberson 	BIT_FILL(keg->uk_ipers, &slab->us_free);
1720ef72505eSJeff Roberson #ifdef INVARIANTS
1721815db204SRyan Libby 	BIT_ZERO(keg->uk_ipers, slab_dbg_bits(slab, keg));
1722ef72505eSJeff Roberson #endif
1723099a0e58SBosko Milekic 
1724b23f72e9SBrian Feldman 	if (keg->uk_init != NULL) {
1725099a0e58SBosko Milekic 		for (i = 0; i < keg->uk_ipers; i++)
17261e0701e1SJeff Roberson 			if (keg->uk_init(slab_item(slab, keg, i),
172786220393SMark Johnston 			    keg->uk_size, flags) != 0)
1728b23f72e9SBrian Feldman 				break;
1729b23f72e9SBrian Feldman 		if (i != keg->uk_ipers) {
1730fc03d22bSJeff Roberson 			keg_free_slab(keg, slab, i);
1731727c6918SJeff Roberson 			goto fail;
1732b23f72e9SBrian Feldman 		}
1733b23f72e9SBrian Feldman 	}
173409c8cb71SMark Johnston 	kasan_mark_slab_invalid(keg, mem);
17358b987a77SJeff Roberson 	KEG_LOCK(keg, domain);
17365c0e403bSJeff Roberson 
17371431a748SGleb Smirnoff 	CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)",
17381431a748SGleb Smirnoff 	    slab, keg->uk_name, keg);
17391431a748SGleb Smirnoff 
174054c5ae80SRyan Libby 	if (keg->uk_flags & UMA_ZFLAG_HASH)
1741099a0e58SBosko Milekic 		UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
17428355f576SJeff Roberson 
17438b987a77SJeff Roberson 	/*
17448b987a77SJeff Roberson 	 * If we got a slab here it's safe to mark it partially used
17458b987a77SJeff Roberson 	 * and return.  We assume that the caller is going to remove
17468b987a77SJeff Roberson 	 * at least one item.
17478b987a77SJeff Roberson 	 */
17488b987a77SJeff Roberson 	dom = &keg->uk_domain[domain];
17498b987a77SJeff Roberson 	LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
17508b987a77SJeff Roberson 	dom->ud_pages += keg->uk_ppera;
17514ab3aee8SMark Johnston 	dom->ud_free_items += keg->uk_ipers;
17528355f576SJeff Roberson 
17538355f576SJeff Roberson 	return (slab);
1754727c6918SJeff Roberson 
1755727c6918SJeff Roberson fail:
1756727c6918SJeff Roberson 	return (NULL);
17578355f576SJeff Roberson }
17588355f576SJeff Roberson 
17598355f576SJeff Roberson /*
1760537f92cdSMark Johnston  * This function is intended to be used early on in place of page_alloc().  It
1761537f92cdSMark Johnston  * performs contiguous physical memory allocations and uses a bump allocator for
1762537f92cdSMark Johnston  * KVA, so is usable before the kernel map is initialized.
1763009b6fcbSJeff Roberson  */
1764009b6fcbSJeff Roberson static void *
1765ab3185d1SJeff Roberson startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1766ab3185d1SJeff Roberson     int wait)
1767009b6fcbSJeff Roberson {
1768a81c400eSJeff Roberson 	vm_paddr_t pa;
1769a81c400eSJeff Roberson 	vm_page_t m;
1770ac0a6fd0SGleb Smirnoff 	void *mem;
1771ac0a6fd0SGleb Smirnoff 	int pages;
1772a81c400eSJeff Roberson 	int i;
1773099a0e58SBosko Milekic 
1774f7d35785SGleb Smirnoff 	pages = howmany(bytes, PAGE_SIZE);
1775f7d35785SGleb Smirnoff 	KASSERT(pages > 0, ("%s can't reserve 0 pages", __func__));
1776a81c400eSJeff Roberson 
1777f7d35785SGleb Smirnoff 	*pflag = UMA_SLAB_BOOT;
1778a81c400eSJeff Roberson 	m = vm_page_alloc_contig_domain(NULL, 0, domain,
1779a81c400eSJeff Roberson 	    malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED, pages,
1780a81c400eSJeff Roberson 	    (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT);
1781a81c400eSJeff Roberson 	if (m == NULL)
1782a81c400eSJeff Roberson 		return (NULL);
1783a81c400eSJeff Roberson 
1784a81c400eSJeff Roberson 	pa = VM_PAGE_TO_PHYS(m);
1785a81c400eSJeff Roberson 	for (i = 0; i < pages; i++, pa += PAGE_SIZE) {
1786a81c400eSJeff Roberson #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \
1787a81c400eSJeff Roberson     defined(__riscv) || defined(__powerpc64__)
1788a81c400eSJeff Roberson 		if ((wait & M_NODUMP) == 0)
1789a81c400eSJeff Roberson 			dump_add_page(pa);
1790a81c400eSJeff Roberson #endif
1791a81c400eSJeff Roberson 	}
1792a81c400eSJeff Roberson 	/* Allocate KVA and indirectly advance bootmem. */
1793a81c400eSJeff Roberson 	mem = (void *)pmap_map(&bootmem, m->phys_addr,
1794a81c400eSJeff Roberson 	    m->phys_addr + (pages * PAGE_SIZE), VM_PROT_READ | VM_PROT_WRITE);
1795a81c400eSJeff Roberson         if ((wait & M_ZERO) != 0)
1796a81c400eSJeff Roberson                 bzero(mem, pages * PAGE_SIZE);
1797f7d35785SGleb Smirnoff 
1798f7d35785SGleb Smirnoff         return (mem);
1799f7d35785SGleb Smirnoff }
1800f7d35785SGleb Smirnoff 
1801a81c400eSJeff Roberson static void
1802a81c400eSJeff Roberson startup_free(void *mem, vm_size_t bytes)
1803a81c400eSJeff Roberson {
1804a81c400eSJeff Roberson 	vm_offset_t va;
1805a81c400eSJeff Roberson 	vm_page_t m;
1806a81c400eSJeff Roberson 
1807a81c400eSJeff Roberson 	va = (vm_offset_t)mem;
1808a81c400eSJeff Roberson 	m = PHYS_TO_VM_PAGE(pmap_kextract(va));
1809663de81fSMark Johnston 
1810663de81fSMark Johnston 	/*
1811663de81fSMark Johnston 	 * startup_alloc() returns direct-mapped slabs on some platforms.  Avoid
1812663de81fSMark Johnston 	 * unmapping ranges of the direct map.
1813663de81fSMark Johnston 	 */
1814663de81fSMark Johnston 	if (va >= bootstart && va + bytes <= bootmem)
1815a81c400eSJeff Roberson 		pmap_remove(kernel_pmap, va, va + bytes);
1816a81c400eSJeff Roberson 	for (; bytes != 0; bytes -= PAGE_SIZE, m++) {
1817a81c400eSJeff Roberson #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \
1818a81c400eSJeff Roberson     defined(__riscv) || defined(__powerpc64__)
1819a81c400eSJeff Roberson 		dump_drop_page(VM_PAGE_TO_PHYS(m));
1820a81c400eSJeff Roberson #endif
1821a81c400eSJeff Roberson 		vm_page_unwire_noq(m);
1822a81c400eSJeff Roberson 		vm_page_free(m);
1823a81c400eSJeff Roberson 	}
1824a81c400eSJeff Roberson }
1825a81c400eSJeff Roberson 
1826f7d35785SGleb Smirnoff /*
18278355f576SJeff Roberson  * Allocates a number of pages from the system
18288355f576SJeff Roberson  *
18298355f576SJeff Roberson  * Arguments:
18308355f576SJeff Roberson  *	bytes  The number of bytes requested
18318355f576SJeff Roberson  *	wait  Shall we wait?
18328355f576SJeff Roberson  *
18338355f576SJeff Roberson  * Returns:
18348355f576SJeff Roberson  *	A pointer to the alloced memory or possibly
18358355f576SJeff Roberson  *	NULL if M_NOWAIT is set.
18368355f576SJeff Roberson  */
18378355f576SJeff Roberson static void *
1838ab3185d1SJeff Roberson page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1839ab3185d1SJeff Roberson     int wait)
18408355f576SJeff Roberson {
18418355f576SJeff Roberson 	void *p;	/* Returned page */
18428355f576SJeff Roberson 
18432e47807cSJeff Roberson 	*pflag = UMA_SLAB_KERNEL;
18449978bd99SMark Johnston 	p = (void *)kmem_malloc_domainset(DOMAINSET_FIXED(domain), bytes, wait);
18458355f576SJeff Roberson 
18468355f576SJeff Roberson 	return (p);
18478355f576SJeff Roberson }
18488355f576SJeff Roberson 
1849ab3059a8SMatt Macy static void *
1850ab3059a8SMatt Macy pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1851ab3059a8SMatt Macy     int wait)
1852ab3059a8SMatt Macy {
1853ab3059a8SMatt Macy 	struct pglist alloctail;
1854ab3059a8SMatt Macy 	vm_offset_t addr, zkva;
1855ab3059a8SMatt Macy 	int cpu, flags;
1856ab3059a8SMatt Macy 	vm_page_t p, p_next;
1857ab3059a8SMatt Macy #ifdef NUMA
1858ab3059a8SMatt Macy 	struct pcpu *pc;
1859ab3059a8SMatt Macy #endif
1860ab3059a8SMatt Macy 
1861ab3059a8SMatt Macy 	MPASS(bytes == (mp_maxid + 1) * PAGE_SIZE);
1862ab3059a8SMatt Macy 
1863013072f0SMark Johnston 	TAILQ_INIT(&alloctail);
1864ab3059a8SMatt Macy 	flags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
1865013072f0SMark Johnston 	    malloc2vm_flags(wait);
1866013072f0SMark Johnston 	*pflag = UMA_SLAB_KERNEL;
1867ab3059a8SMatt Macy 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
1868ab3059a8SMatt Macy 		if (CPU_ABSENT(cpu)) {
1869ab3059a8SMatt Macy 			p = vm_page_alloc(NULL, 0, flags);
1870ab3059a8SMatt Macy 		} else {
1871ab3059a8SMatt Macy #ifndef NUMA
1872ab3059a8SMatt Macy 			p = vm_page_alloc(NULL, 0, flags);
1873ab3059a8SMatt Macy #else
1874ab3059a8SMatt Macy 			pc = pcpu_find(cpu);
187520526802SAndrew Gallatin 			if (__predict_false(VM_DOMAIN_EMPTY(pc->pc_domain)))
187620526802SAndrew Gallatin 				p = NULL;
187720526802SAndrew Gallatin 			else
187820526802SAndrew Gallatin 				p = vm_page_alloc_domain(NULL, 0,
187920526802SAndrew Gallatin 				    pc->pc_domain, flags);
1880ab3059a8SMatt Macy 			if (__predict_false(p == NULL))
1881ab3059a8SMatt Macy 				p = vm_page_alloc(NULL, 0, flags);
1882ab3059a8SMatt Macy #endif
1883ab3059a8SMatt Macy 		}
1884ab3059a8SMatt Macy 		if (__predict_false(p == NULL))
1885ab3059a8SMatt Macy 			goto fail;
1886ab3059a8SMatt Macy 		TAILQ_INSERT_TAIL(&alloctail, p, listq);
1887ab3059a8SMatt Macy 	}
1888ab3059a8SMatt Macy 	if ((addr = kva_alloc(bytes)) == 0)
1889ab3059a8SMatt Macy 		goto fail;
1890ab3059a8SMatt Macy 	zkva = addr;
1891ab3059a8SMatt Macy 	TAILQ_FOREACH(p, &alloctail, listq) {
1892ab3059a8SMatt Macy 		pmap_qenter(zkva, &p, 1);
1893ab3059a8SMatt Macy 		zkva += PAGE_SIZE;
1894ab3059a8SMatt Macy 	}
1895ab3059a8SMatt Macy 	return ((void*)addr);
1896ab3059a8SMatt Macy fail:
1897ab3059a8SMatt Macy 	TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
189888ea538aSMark Johnston 		vm_page_unwire_noq(p);
1899ab3059a8SMatt Macy 		vm_page_free(p);
1900ab3059a8SMatt Macy 	}
1901ab3059a8SMatt Macy 	return (NULL);
1902ab3059a8SMatt Macy }
1903ab3059a8SMatt Macy 
19048355f576SJeff Roberson /*
19058355f576SJeff Roberson  * Allocates a number of pages from within an object
19068355f576SJeff Roberson  *
19078355f576SJeff Roberson  * Arguments:
19088355f576SJeff Roberson  *	bytes  The number of bytes requested
19098355f576SJeff Roberson  *	wait   Shall we wait?
19108355f576SJeff Roberson  *
19118355f576SJeff Roberson  * Returns:
19128355f576SJeff Roberson  *	A pointer to the alloced memory or possibly
19138355f576SJeff Roberson  *	NULL if M_NOWAIT is set.
19148355f576SJeff Roberson  */
19158355f576SJeff Roberson static void *
1916ab3185d1SJeff Roberson noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
1917ab3185d1SJeff Roberson     int wait)
19188355f576SJeff Roberson {
1919a4915c21SAttilio Rao 	TAILQ_HEAD(, vm_page) alloctail;
1920a4915c21SAttilio Rao 	u_long npages;
1921b245ac95SAlan Cox 	vm_offset_t retkva, zkva;
1922a4915c21SAttilio Rao 	vm_page_t p, p_next;
1923e20a199fSJeff Roberson 	uma_keg_t keg;
19248355f576SJeff Roberson 
1925a4915c21SAttilio Rao 	TAILQ_INIT(&alloctail);
1926bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
1927a4915c21SAttilio Rao 
1928a4915c21SAttilio Rao 	npages = howmany(bytes, PAGE_SIZE);
1929a4915c21SAttilio Rao 	while (npages > 0) {
1930ab3185d1SJeff Roberson 		p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT |
19318d6fbbb8SJeff Roberson 		    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
1932772c8b67SKonstantin Belousov 		    ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK :
1933772c8b67SKonstantin Belousov 		    VM_ALLOC_NOWAIT));
1934a4915c21SAttilio Rao 		if (p != NULL) {
1935a4915c21SAttilio Rao 			/*
1936a4915c21SAttilio Rao 			 * Since the page does not belong to an object, its
1937a4915c21SAttilio Rao 			 * listq is unused.
1938a4915c21SAttilio Rao 			 */
1939a4915c21SAttilio Rao 			TAILQ_INSERT_TAIL(&alloctail, p, listq);
1940a4915c21SAttilio Rao 			npages--;
1941a4915c21SAttilio Rao 			continue;
1942a4915c21SAttilio Rao 		}
19438355f576SJeff Roberson 		/*
1944a4915c21SAttilio Rao 		 * Page allocation failed, free intermediate pages and
1945a4915c21SAttilio Rao 		 * exit.
19468355f576SJeff Roberson 		 */
1947a4915c21SAttilio Rao 		TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
194888ea538aSMark Johnston 			vm_page_unwire_noq(p);
1949b245ac95SAlan Cox 			vm_page_free(p);
1950b245ac95SAlan Cox 		}
1951a4915c21SAttilio Rao 		return (NULL);
1952b245ac95SAlan Cox 	}
19538355f576SJeff Roberson 	*flags = UMA_SLAB_PRIV;
1954a4915c21SAttilio Rao 	zkva = keg->uk_kva +
1955a4915c21SAttilio Rao 	    atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1956a4915c21SAttilio Rao 	retkva = zkva;
1957a4915c21SAttilio Rao 	TAILQ_FOREACH(p, &alloctail, listq) {
1958a4915c21SAttilio Rao 		pmap_qenter(zkva, &p, 1);
1959a4915c21SAttilio Rao 		zkva += PAGE_SIZE;
1960a4915c21SAttilio Rao 	}
19618355f576SJeff Roberson 
19628355f576SJeff Roberson 	return ((void *)retkva);
19638355f576SJeff Roberson }
19648355f576SJeff Roberson 
19658355f576SJeff Roberson /*
1966ec0d8280SRyan Libby  * Allocate physically contiguous pages.
1967ec0d8280SRyan Libby  */
1968ec0d8280SRyan Libby static void *
1969ec0d8280SRyan Libby contig_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1970ec0d8280SRyan Libby     int wait)
1971ec0d8280SRyan Libby {
1972ec0d8280SRyan Libby 
1973ec0d8280SRyan Libby 	*pflag = UMA_SLAB_KERNEL;
1974ec0d8280SRyan Libby 	return ((void *)kmem_alloc_contig_domainset(DOMAINSET_FIXED(domain),
1975ec0d8280SRyan Libby 	    bytes, wait, 0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT));
1976ec0d8280SRyan Libby }
1977ec0d8280SRyan Libby 
1978ec0d8280SRyan Libby /*
19798355f576SJeff Roberson  * Frees a number of pages to the system
19808355f576SJeff Roberson  *
19818355f576SJeff Roberson  * Arguments:
19828355f576SJeff Roberson  *	mem   A pointer to the memory to be freed
19838355f576SJeff Roberson  *	size  The size of the memory being freed
19848355f576SJeff Roberson  *	flags The original p->us_flags field
19858355f576SJeff Roberson  *
19868355f576SJeff Roberson  * Returns:
19878355f576SJeff Roberson  *	Nothing
19888355f576SJeff Roberson  */
19898355f576SJeff Roberson static void
1990f2c2231eSRyan Stone page_free(void *mem, vm_size_t size, uint8_t flags)
19918355f576SJeff Roberson {
19923370c5bfSJeff Roberson 
1993a81c400eSJeff Roberson 	if ((flags & UMA_SLAB_BOOT) != 0) {
1994a81c400eSJeff Roberson 		startup_free(mem, size);
1995a81c400eSJeff Roberson 		return;
1996a81c400eSJeff Roberson 	}
1997a81c400eSJeff Roberson 
1998ec0d8280SRyan Libby 	KASSERT((flags & UMA_SLAB_KERNEL) != 0,
1999ec0d8280SRyan Libby 	    ("UMA: page_free used with invalid flags %x", flags));
20008355f576SJeff Roberson 
200149bfa624SAlan Cox 	kmem_free((vm_offset_t)mem, size);
20028355f576SJeff Roberson }
20038355f576SJeff Roberson 
20048355f576SJeff Roberson /*
2005ab3059a8SMatt Macy  * Frees pcpu zone allocations
2006ab3059a8SMatt Macy  *
2007ab3059a8SMatt Macy  * Arguments:
2008ab3059a8SMatt Macy  *	mem   A pointer to the memory to be freed
2009ab3059a8SMatt Macy  *	size  The size of the memory being freed
2010ab3059a8SMatt Macy  *	flags The original p->us_flags field
2011ab3059a8SMatt Macy  *
2012ab3059a8SMatt Macy  * Returns:
2013ab3059a8SMatt Macy  *	Nothing
2014ab3059a8SMatt Macy  */
2015ab3059a8SMatt Macy static void
2016ab3059a8SMatt Macy pcpu_page_free(void *mem, vm_size_t size, uint8_t flags)
2017ab3059a8SMatt Macy {
2018ab3059a8SMatt Macy 	vm_offset_t sva, curva;
2019ab3059a8SMatt Macy 	vm_paddr_t paddr;
2020ab3059a8SMatt Macy 	vm_page_t m;
2021ab3059a8SMatt Macy 
2022ab3059a8SMatt Macy 	MPASS(size == (mp_maxid+1)*PAGE_SIZE);
20235ba16cf3SRyan Libby 
20245ba16cf3SRyan Libby 	if ((flags & UMA_SLAB_BOOT) != 0) {
20255ba16cf3SRyan Libby 		startup_free(mem, size);
20265ba16cf3SRyan Libby 		return;
20275ba16cf3SRyan Libby 	}
20285ba16cf3SRyan Libby 
2029ab3059a8SMatt Macy 	sva = (vm_offset_t)mem;
2030ab3059a8SMatt Macy 	for (curva = sva; curva < sva + size; curva += PAGE_SIZE) {
2031ab3059a8SMatt Macy 		paddr = pmap_kextract(curva);
2032ab3059a8SMatt Macy 		m = PHYS_TO_VM_PAGE(paddr);
203388ea538aSMark Johnston 		vm_page_unwire_noq(m);
2034ab3059a8SMatt Macy 		vm_page_free(m);
2035ab3059a8SMatt Macy 	}
2036ab3059a8SMatt Macy 	pmap_qremove(sva, size >> PAGE_SHIFT);
2037ab3059a8SMatt Macy 	kva_free(sva, size);
2038ab3059a8SMatt Macy }
2039ab3059a8SMatt Macy 
2040ab3059a8SMatt Macy /*
20418355f576SJeff Roberson  * Zero fill initializer
20428355f576SJeff Roberson  *
20438355f576SJeff Roberson  * Arguments/Returns follow uma_init specifications
20448355f576SJeff Roberson  */
2045b23f72e9SBrian Feldman static int
2046b23f72e9SBrian Feldman zero_init(void *mem, int size, int flags)
20478355f576SJeff Roberson {
20488355f576SJeff Roberson 	bzero(mem, size);
2049b23f72e9SBrian Feldman 	return (0);
20508355f576SJeff Roberson }
20518355f576SJeff Roberson 
2052815db204SRyan Libby #ifdef INVARIANTS
205354007ce8SMark Johnston static struct noslabbits *
2054815db204SRyan Libby slab_dbg_bits(uma_slab_t slab, uma_keg_t keg)
2055815db204SRyan Libby {
2056815db204SRyan Libby 
2057815db204SRyan Libby 	return ((void *)((char *)&slab->us_free + BITSET_SIZE(keg->uk_ipers)));
2058815db204SRyan Libby }
2059815db204SRyan Libby #endif
2060815db204SRyan Libby 
20618355f576SJeff Roberson /*
20629b78b1f4SJeff Roberson  * Actual size of embedded struct slab (!OFFPAGE).
20639b78b1f4SJeff Roberson  */
206454007ce8SMark Johnston static size_t
20659b78b1f4SJeff Roberson slab_sizeof(int nitems)
20669b78b1f4SJeff Roberson {
20679b78b1f4SJeff Roberson 	size_t s;
20689b78b1f4SJeff Roberson 
2069815db204SRyan Libby 	s = sizeof(struct uma_slab) + BITSET_SIZE(nitems) * SLAB_BITSETS;
20709b78b1f4SJeff Roberson 	return (roundup(s, UMA_ALIGN_PTR + 1));
20719b78b1f4SJeff Roberson }
20729b78b1f4SJeff Roberson 
20734a8b575cSRyan Libby #define	UMA_FIXPT_SHIFT	31
20744a8b575cSRyan Libby #define	UMA_FRAC_FIXPT(n, d)						\
20754a8b575cSRyan Libby 	((uint32_t)(((uint64_t)(n) << UMA_FIXPT_SHIFT) / (d)))
20764a8b575cSRyan Libby #define	UMA_FIXPT_PCT(f)						\
20774a8b575cSRyan Libby 	((u_int)(((uint64_t)100 * (f)) >> UMA_FIXPT_SHIFT))
20784a8b575cSRyan Libby #define	UMA_PCT_FIXPT(pct)	UMA_FRAC_FIXPT((pct), 100)
20794a8b575cSRyan Libby #define	UMA_MIN_EFF	UMA_PCT_FIXPT(100 - UMA_MAX_WASTE)
20804a8b575cSRyan Libby 
20819b78b1f4SJeff Roberson /*
20824a8b575cSRyan Libby  * Compute the number of items that will fit in a slab.  If hdr is true, the
20834a8b575cSRyan Libby  * item count may be limited to provide space in the slab for an inline slab
20844a8b575cSRyan Libby  * header.  Otherwise, all slab space will be provided for item storage.
20854a8b575cSRyan Libby  */
20864a8b575cSRyan Libby static u_int
20874a8b575cSRyan Libby slab_ipers_hdr(u_int size, u_int rsize, u_int slabsize, bool hdr)
20884a8b575cSRyan Libby {
20894a8b575cSRyan Libby 	u_int ipers;
20904a8b575cSRyan Libby 	u_int padpi;
20914a8b575cSRyan Libby 
20924a8b575cSRyan Libby 	/* The padding between items is not needed after the last item. */
20934a8b575cSRyan Libby 	padpi = rsize - size;
20944a8b575cSRyan Libby 
20954a8b575cSRyan Libby 	if (hdr) {
20964a8b575cSRyan Libby 		/*
20974a8b575cSRyan Libby 		 * Start with the maximum item count and remove items until
20984a8b575cSRyan Libby 		 * the slab header first alongside the allocatable memory.
20994a8b575cSRyan Libby 		 */
21004a8b575cSRyan Libby 		for (ipers = MIN(SLAB_MAX_SETSIZE,
21014a8b575cSRyan Libby 		    (slabsize + padpi - slab_sizeof(1)) / rsize);
21024a8b575cSRyan Libby 		    ipers > 0 &&
21034a8b575cSRyan Libby 		    ipers * rsize - padpi + slab_sizeof(ipers) > slabsize;
21044a8b575cSRyan Libby 		    ipers--)
21054a8b575cSRyan Libby 			continue;
21064a8b575cSRyan Libby 	} else {
21074a8b575cSRyan Libby 		ipers = MIN((slabsize + padpi) / rsize, SLAB_MAX_SETSIZE);
21084a8b575cSRyan Libby 	}
21094a8b575cSRyan Libby 
21104a8b575cSRyan Libby 	return (ipers);
21114a8b575cSRyan Libby }
21124a8b575cSRyan Libby 
211327ca37acSRyan Libby struct keg_layout_result {
211427ca37acSRyan Libby 	u_int format;
211527ca37acSRyan Libby 	u_int slabsize;
211627ca37acSRyan Libby 	u_int ipers;
211727ca37acSRyan Libby 	u_int eff;
211827ca37acSRyan Libby };
211927ca37acSRyan Libby 
212027ca37acSRyan Libby static void
212127ca37acSRyan Libby keg_layout_one(uma_keg_t keg, u_int rsize, u_int slabsize, u_int fmt,
212227ca37acSRyan Libby     struct keg_layout_result *kl)
212327ca37acSRyan Libby {
212427ca37acSRyan Libby 	u_int total;
212527ca37acSRyan Libby 
212627ca37acSRyan Libby 	kl->format = fmt;
212727ca37acSRyan Libby 	kl->slabsize = slabsize;
212827ca37acSRyan Libby 
212927ca37acSRyan Libby 	/* Handle INTERNAL as inline with an extra page. */
213027ca37acSRyan Libby 	if ((fmt & UMA_ZFLAG_INTERNAL) != 0) {
213127ca37acSRyan Libby 		kl->format &= ~UMA_ZFLAG_INTERNAL;
213227ca37acSRyan Libby 		kl->slabsize += PAGE_SIZE;
213327ca37acSRyan Libby 	}
213427ca37acSRyan Libby 
213527ca37acSRyan Libby 	kl->ipers = slab_ipers_hdr(keg->uk_size, rsize, kl->slabsize,
213627ca37acSRyan Libby 	    (fmt & UMA_ZFLAG_OFFPAGE) == 0);
213727ca37acSRyan Libby 
213827ca37acSRyan Libby 	/* Account for memory used by an offpage slab header. */
213927ca37acSRyan Libby 	total = kl->slabsize;
214027ca37acSRyan Libby 	if ((fmt & UMA_ZFLAG_OFFPAGE) != 0)
214127ca37acSRyan Libby 		total += slabzone(kl->ipers)->uz_keg->uk_rsize;
214227ca37acSRyan Libby 
214327ca37acSRyan Libby 	kl->eff = UMA_FRAC_FIXPT(kl->ipers * rsize, total);
214427ca37acSRyan Libby }
214527ca37acSRyan Libby 
21469b78b1f4SJeff Roberson /*
21474a8b575cSRyan Libby  * Determine the format of a uma keg.  This determines where the slab header
21484a8b575cSRyan Libby  * will be placed (inline or offpage) and calculates ipers, rsize, and ppera.
21498355f576SJeff Roberson  *
21508355f576SJeff Roberson  * Arguments
2151e20a199fSJeff Roberson  *	keg  The zone we should initialize
21528355f576SJeff Roberson  *
21538355f576SJeff Roberson  * Returns
21548355f576SJeff Roberson  *	Nothing
21558355f576SJeff Roberson  */
21568355f576SJeff Roberson static void
21574a8b575cSRyan Libby keg_layout(uma_keg_t keg)
21588355f576SJeff Roberson {
215927ca37acSRyan Libby 	struct keg_layout_result kl = {}, kl_tmp;
216027ca37acSRyan Libby 	u_int fmts[2];
21614a8b575cSRyan Libby 	u_int alignsize;
216227ca37acSRyan Libby 	u_int nfmt;
21634a8b575cSRyan Libby 	u_int pages;
2164244f4554SBosko Milekic 	u_int rsize;
2165a55ebb7cSAndriy Gapon 	u_int slabsize;
216627ca37acSRyan Libby 	u_int i, j;
21678355f576SJeff Roberson 
21684a8b575cSRyan Libby 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
21694a8b575cSRyan Libby 	    (keg->uk_size <= UMA_PCPU_ALLOC_SIZE &&
21704a8b575cSRyan Libby 	     (keg->uk_flags & UMA_ZONE_CACHESPREAD) == 0),
21714a8b575cSRyan Libby 	    ("%s: cannot configure for PCPU: keg=%s, size=%u, flags=0x%b",
21724a8b575cSRyan Libby 	     __func__, keg->uk_name, keg->uk_size, keg->uk_flags,
21734a8b575cSRyan Libby 	     PRINT_UMA_ZFLAGS));
2174bae55c4aSRyan Libby 	KASSERT((keg->uk_flags & (UMA_ZFLAG_INTERNAL | UMA_ZONE_VM)) == 0 ||
21754a8b575cSRyan Libby 	    (keg->uk_flags & (UMA_ZONE_NOTOUCH | UMA_ZONE_PCPU)) == 0,
21764a8b575cSRyan Libby 	    ("%s: incompatible flags 0x%b", __func__, keg->uk_flags,
21774a8b575cSRyan Libby 	     PRINT_UMA_ZFLAGS));
2178e28a647dSGleb Smirnoff 
21794a8b575cSRyan Libby 	alignsize = keg->uk_align + 1;
2180ad97af7eSGleb Smirnoff 
2181ef72505eSJeff Roberson 	/*
2182ef72505eSJeff Roberson 	 * Calculate the size of each allocation (rsize) according to
2183ef72505eSJeff Roberson 	 * alignment.  If the requested size is smaller than we have
2184ef72505eSJeff Roberson 	 * allocation bits for we round it up.
2185ef72505eSJeff Roberson 	 */
21869b8db4d0SRyan Libby 	rsize = MAX(keg->uk_size, UMA_SMALLEST_UNIT);
21874a8b575cSRyan Libby 	rsize = roundup2(rsize, alignsize);
2188ad97af7eSGleb Smirnoff 
218927ca37acSRyan Libby 	if ((keg->uk_flags & UMA_ZONE_CACHESPREAD) != 0) {
21909b78b1f4SJeff Roberson 		/*
21914a8b575cSRyan Libby 		 * We want one item to start on every align boundary in a page.
21924a8b575cSRyan Libby 		 * To do this we will span pages.  We will also extend the item
21934a8b575cSRyan Libby 		 * by the size of align if it is an even multiple of align.
21944a8b575cSRyan Libby 		 * Otherwise, it would fall on the same boundary every time.
21959b78b1f4SJeff Roberson 		 */
21964a8b575cSRyan Libby 		if ((rsize & alignsize) == 0)
21974a8b575cSRyan Libby 			rsize += alignsize;
21984a8b575cSRyan Libby 		slabsize = rsize * (PAGE_SIZE / alignsize);
21994a8b575cSRyan Libby 		slabsize = MIN(slabsize, rsize * SLAB_MAX_SETSIZE);
22004a8b575cSRyan Libby 		slabsize = MIN(slabsize, UMA_CACHESPREAD_MAX_SIZE);
220127ca37acSRyan Libby 		slabsize = round_page(slabsize);
22024a8b575cSRyan Libby 	} else {
22034a8b575cSRyan Libby 		/*
220427ca37acSRyan Libby 		 * Start with a slab size of as many pages as it takes to
220527ca37acSRyan Libby 		 * represent a single item.  We will try to fit as many
220627ca37acSRyan Libby 		 * additional items into the slab as possible.
22074a8b575cSRyan Libby 		 */
220827ca37acSRyan Libby 		slabsize = round_page(keg->uk_size);
22091ca6ed45SGleb Smirnoff 	}
2210ad97af7eSGleb Smirnoff 
221127ca37acSRyan Libby 	/* Build a list of all of the available formats for this keg. */
221227ca37acSRyan Libby 	nfmt = 0;
221327ca37acSRyan Libby 
22144a8b575cSRyan Libby 	/* Evaluate an inline slab layout. */
22154a8b575cSRyan Libby 	if ((keg->uk_flags & (UMA_ZONE_NOTOUCH | UMA_ZONE_PCPU)) == 0)
221627ca37acSRyan Libby 		fmts[nfmt++] = 0;
22174a8b575cSRyan Libby 
22184a8b575cSRyan Libby 	/* TODO: vm_page-embedded slab. */
2219244f4554SBosko Milekic 
222020e8e865SBosko Milekic 	/*
2221244f4554SBosko Milekic 	 * We can't do OFFPAGE if we're internal or if we've been
222220e8e865SBosko Milekic 	 * asked to not go to the VM for buckets.  If we do this we
2223bae55c4aSRyan Libby 	 * may end up going to the VM for slabs which we do not want
2224bae55c4aSRyan Libby 	 * to do if we're UMA_ZONE_VM, which clearly forbids it.
2225bae55c4aSRyan Libby 	 * In those cases, evaluate a pseudo-format called INTERNAL
2226bae55c4aSRyan Libby 	 * which has an inline slab header and one extra page to
2227bae55c4aSRyan Libby 	 * guarantee that it fits.
222827ca37acSRyan Libby 	 *
222927ca37acSRyan Libby 	 * Otherwise, see if using an OFFPAGE slab will improve our
223027ca37acSRyan Libby 	 * efficiency.
223120e8e865SBosko Milekic 	 */
2232bae55c4aSRyan Libby 	if ((keg->uk_flags & (UMA_ZFLAG_INTERNAL | UMA_ZONE_VM)) != 0)
223327ca37acSRyan Libby 		fmts[nfmt++] = UMA_ZFLAG_INTERNAL;
223427ca37acSRyan Libby 	else
223527ca37acSRyan Libby 		fmts[nfmt++] = UMA_ZFLAG_OFFPAGE;
2236244f4554SBosko Milekic 
2237ef72505eSJeff Roberson 	/*
223827ca37acSRyan Libby 	 * Choose a slab size and format which satisfy the minimum efficiency.
223927ca37acSRyan Libby 	 * Prefer the smallest slab size that meets the constraints.
2240ef72505eSJeff Roberson 	 *
224127ca37acSRyan Libby 	 * Start with a minimum slab size, to accommodate CACHESPREAD.  Then,
224227ca37acSRyan Libby 	 * for small items (up to PAGE_SIZE), the iteration increment is one
224327ca37acSRyan Libby 	 * page; and for large items, the increment is one item.
2244ef72505eSJeff Roberson 	 */
224527ca37acSRyan Libby 	i = (slabsize + rsize - keg->uk_size) / MAX(PAGE_SIZE, rsize);
224627ca37acSRyan Libby 	KASSERT(i >= 1, ("keg %s(%p) flags=0x%b slabsize=%u, rsize=%u, i=%u",
224727ca37acSRyan Libby 	    keg->uk_name, keg, keg->uk_flags, PRINT_UMA_ZFLAGS, slabsize,
224827ca37acSRyan Libby 	    rsize, i));
224927ca37acSRyan Libby 	for ( ; ; i++) {
225027ca37acSRyan Libby 		slabsize = (rsize <= PAGE_SIZE) ? ptoa(i) :
225127ca37acSRyan Libby 		    round_page(rsize * (i - 1) + keg->uk_size);
225227ca37acSRyan Libby 
225327ca37acSRyan Libby 		for (j = 0; j < nfmt; j++) {
225427ca37acSRyan Libby 			/* Only if we have no viable format yet. */
225527ca37acSRyan Libby 			if ((fmts[j] & UMA_ZFLAG_INTERNAL) != 0 &&
225627ca37acSRyan Libby 			    kl.ipers > 0)
225727ca37acSRyan Libby 				continue;
225827ca37acSRyan Libby 
225927ca37acSRyan Libby 			keg_layout_one(keg, rsize, slabsize, fmts[j], &kl_tmp);
226027ca37acSRyan Libby 			if (kl_tmp.eff <= kl.eff)
226127ca37acSRyan Libby 				continue;
226227ca37acSRyan Libby 
226327ca37acSRyan Libby 			kl = kl_tmp;
226427ca37acSRyan Libby 
226527ca37acSRyan Libby 			CTR6(KTR_UMA, "keg %s layout: format %#x "
226627ca37acSRyan Libby 			    "(ipers %u * rsize %u) / slabsize %#x = %u%% eff",
226727ca37acSRyan Libby 			    keg->uk_name, kl.format, kl.ipers, rsize,
226827ca37acSRyan Libby 			    kl.slabsize, UMA_FIXPT_PCT(kl.eff));
226927ca37acSRyan Libby 
227027ca37acSRyan Libby 			/* Stop when we reach the minimum efficiency. */
227127ca37acSRyan Libby 			if (kl.eff >= UMA_MIN_EFF)
227227ca37acSRyan Libby 				break;
22738355f576SJeff Roberson 		}
2274ad97af7eSGleb Smirnoff 
227533e5a1eaSRyan Libby 		if (kl.eff >= UMA_MIN_EFF || !multipage_slabs ||
227627ca37acSRyan Libby 		    slabsize >= SLAB_MAX_SETSIZE * rsize ||
227727ca37acSRyan Libby 		    (keg->uk_flags & (UMA_ZONE_PCPU | UMA_ZONE_CONTIG)) != 0)
227827ca37acSRyan Libby 			break;
227927ca37acSRyan Libby 	}
228027ca37acSRyan Libby 
228127ca37acSRyan Libby 	pages = atop(kl.slabsize);
228227ca37acSRyan Libby 	if ((keg->uk_flags & UMA_ZONE_PCPU) != 0)
228327ca37acSRyan Libby 		pages *= mp_maxid + 1;
228427ca37acSRyan Libby 
228527ca37acSRyan Libby 	keg->uk_rsize = rsize;
228627ca37acSRyan Libby 	keg->uk_ipers = kl.ipers;
228727ca37acSRyan Libby 	keg->uk_ppera = pages;
228827ca37acSRyan Libby 	keg->uk_flags |= kl.format;
228927ca37acSRyan Libby 
22904a8b575cSRyan Libby 	/*
22914a8b575cSRyan Libby 	 * How do we find the slab header if it is offpage or if not all item
22924a8b575cSRyan Libby 	 * start addresses are in the same page?  We could solve the latter
22934a8b575cSRyan Libby 	 * case with vaddr alignment, but we don't.
22944a8b575cSRyan Libby 	 */
229527ca37acSRyan Libby 	if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0 ||
229627ca37acSRyan Libby 	    (keg->uk_ipers - 1) * rsize >= PAGE_SIZE) {
229754c5ae80SRyan Libby 		if ((keg->uk_flags & UMA_ZONE_NOTPAGE) != 0)
229827ca37acSRyan Libby 			keg->uk_flags |= UMA_ZFLAG_HASH;
229954c5ae80SRyan Libby 		else
230027ca37acSRyan Libby 			keg->uk_flags |= UMA_ZFLAG_VTOSLAB;
230154c5ae80SRyan Libby 	}
230227ca37acSRyan Libby 
2303e63a1c2fSRyan Libby 	CTR6(KTR_UMA, "%s: keg=%s, flags=%#x, rsize=%u, ipers=%u, ppera=%u",
230427ca37acSRyan Libby 	    __func__, keg->uk_name, keg->uk_flags, rsize, keg->uk_ipers,
230527ca37acSRyan Libby 	    pages);
23064a8b575cSRyan Libby 	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_MAX_SETSIZE,
23074a8b575cSRyan Libby 	    ("%s: keg=%s, flags=0x%b, rsize=%u, ipers=%u, ppera=%u", __func__,
230827ca37acSRyan Libby 	     keg->uk_name, keg->uk_flags, PRINT_UMA_ZFLAGS, rsize,
230927ca37acSRyan Libby 	     keg->uk_ipers, pages));
2310e20a199fSJeff Roberson }
2311e20a199fSJeff Roberson 
23128355f576SJeff Roberson /*
2313099a0e58SBosko Milekic  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
2314099a0e58SBosko Milekic  * the keg onto the global keg list.
23158355f576SJeff Roberson  *
23168355f576SJeff Roberson  * Arguments/Returns follow uma_ctor specifications
2317099a0e58SBosko Milekic  *	udata  Actually uma_kctor_args
2318099a0e58SBosko Milekic  */
2319b23f72e9SBrian Feldman static int
2320b23f72e9SBrian Feldman keg_ctor(void *mem, int size, void *udata, int flags)
2321099a0e58SBosko Milekic {
2322099a0e58SBosko Milekic 	struct uma_kctor_args *arg = udata;
2323099a0e58SBosko Milekic 	uma_keg_t keg = mem;
2324099a0e58SBosko Milekic 	uma_zone_t zone;
23258b987a77SJeff Roberson 	int i;
2326099a0e58SBosko Milekic 
2327099a0e58SBosko Milekic 	bzero(keg, size);
2328099a0e58SBosko Milekic 	keg->uk_size = arg->size;
2329099a0e58SBosko Milekic 	keg->uk_init = arg->uminit;
2330099a0e58SBosko Milekic 	keg->uk_fini = arg->fini;
2331099a0e58SBosko Milekic 	keg->uk_align = arg->align;
23326fd34d6fSJeff Roberson 	keg->uk_reserve = 0;
2333099a0e58SBosko Milekic 	keg->uk_flags = arg->flags;
2334099a0e58SBosko Milekic 
2335099a0e58SBosko Milekic 	/*
2336194a979eSMark Johnston 	 * We use a global round-robin policy by default.  Zones with
2337dfe13344SJeff Roberson 	 * UMA_ZONE_FIRSTTOUCH set will use first-touch instead, in which
2338dfe13344SJeff Roberson 	 * case the iterator is never run.
2339194a979eSMark Johnston 	 */
2340194a979eSMark Johnston 	keg->uk_dr.dr_policy = DOMAINSET_RR();
2341194a979eSMark Johnston 	keg->uk_dr.dr_iter = 0;
2342194a979eSMark Johnston 
2343194a979eSMark Johnston 	/*
2344c8b0a88bSJeff Roberson 	 * The primary zone is passed to us at keg-creation time.
2345099a0e58SBosko Milekic 	 */
2346099a0e58SBosko Milekic 	zone = arg->zone;
2347e20a199fSJeff Roberson 	keg->uk_name = zone->uz_name;
2348099a0e58SBosko Milekic 
2349099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_ZINIT)
2350099a0e58SBosko Milekic 		keg->uk_init = zero_init;
2351099a0e58SBosko Milekic 
2352cfcae3f8SGleb Smirnoff 	if (arg->flags & UMA_ZONE_MALLOC)
235354c5ae80SRyan Libby 		keg->uk_flags |= UMA_ZFLAG_VTOSLAB;
2354e20a199fSJeff Roberson 
235554c5ae80SRyan Libby #ifndef SMP
2356ad97af7eSGleb Smirnoff 	keg->uk_flags &= ~UMA_ZONE_PCPU;
2357ad97af7eSGleb Smirnoff #endif
2358ad97af7eSGleb Smirnoff 
23594a8b575cSRyan Libby 	keg_layout(keg);
2360099a0e58SBosko Milekic 
23618b987a77SJeff Roberson 	/*
2362c6fd3e23SJeff Roberson 	 * Use a first-touch NUMA policy for kegs that pmap_extract() will
2363c6fd3e23SJeff Roberson 	 * work on.  Use round-robin for everything else.
2364dfe13344SJeff Roberson 	 *
2365dfe13344SJeff Roberson 	 * Zones may override the default by specifying either.
23668b987a77SJeff Roberson 	 */
2367dfe13344SJeff Roberson #ifdef NUMA
2368dfe13344SJeff Roberson 	if ((keg->uk_flags &
2369c6fd3e23SJeff Roberson 	    (UMA_ZONE_ROUNDROBIN | UMA_ZFLAG_CACHE | UMA_ZONE_NOTPAGE)) == 0)
2370dfe13344SJeff Roberson 		keg->uk_flags |= UMA_ZONE_FIRSTTOUCH;
2371dfe13344SJeff Roberson 	else if ((keg->uk_flags & UMA_ZONE_FIRSTTOUCH) == 0)
2372dfe13344SJeff Roberson 		keg->uk_flags |= UMA_ZONE_ROUNDROBIN;
23738b987a77SJeff Roberson #endif
23748b987a77SJeff Roberson 
2375099a0e58SBosko Milekic 	/*
2376099a0e58SBosko Milekic 	 * If we haven't booted yet we need allocations to go through the
2377099a0e58SBosko Milekic 	 * startup cache until the vm is ready.
2378099a0e58SBosko Milekic 	 */
237977e19437SGleb Smirnoff #ifdef UMA_MD_SMALL_ALLOC
2380a81c400eSJeff Roberson 	if (keg->uk_ppera == 1)
238177e19437SGleb Smirnoff 		keg->uk_allocf = uma_small_alloc;
2382a81c400eSJeff Roberson 	else
23838cd02d00SAlan Cox #endif
2384a81c400eSJeff Roberson 	if (booted < BOOT_KVA)
2385a81c400eSJeff Roberson 		keg->uk_allocf = startup_alloc;
2386ab3059a8SMatt Macy 	else if (keg->uk_flags & UMA_ZONE_PCPU)
2387ab3059a8SMatt Macy 		keg->uk_allocf = pcpu_page_alloc;
2388ec0d8280SRyan Libby 	else if ((keg->uk_flags & UMA_ZONE_CONTIG) != 0 && keg->uk_ppera > 1)
2389ec0d8280SRyan Libby 		keg->uk_allocf = contig_alloc;
239077e19437SGleb Smirnoff 	else
239177e19437SGleb Smirnoff 		keg->uk_allocf = page_alloc;
239277e19437SGleb Smirnoff #ifdef UMA_MD_SMALL_ALLOC
239377e19437SGleb Smirnoff 	if (keg->uk_ppera == 1)
239477e19437SGleb Smirnoff 		keg->uk_freef = uma_small_free;
239577e19437SGleb Smirnoff 	else
239677e19437SGleb Smirnoff #endif
2397ab3059a8SMatt Macy 	if (keg->uk_flags & UMA_ZONE_PCPU)
2398ab3059a8SMatt Macy 		keg->uk_freef = pcpu_page_free;
2399ab3059a8SMatt Macy 	else
240077e19437SGleb Smirnoff 		keg->uk_freef = page_free;
2401099a0e58SBosko Milekic 
2402099a0e58SBosko Milekic 	/*
24038b987a77SJeff Roberson 	 * Initialize keg's locks.
2404099a0e58SBosko Milekic 	 */
24058b987a77SJeff Roberson 	for (i = 0; i < vm_ndomains; i++)
24068b987a77SJeff Roberson 		KEG_LOCK_INIT(keg, i, (arg->flags & UMA_ZONE_MTXCLASS));
2407099a0e58SBosko Milekic 
2408099a0e58SBosko Milekic 	/*
2409099a0e58SBosko Milekic 	 * If we're putting the slab header in the actual page we need to
24109b78b1f4SJeff Roberson 	 * figure out where in each page it goes.  See slab_sizeof
24119b78b1f4SJeff Roberson 	 * definition.
2412099a0e58SBosko Milekic 	 */
241354c5ae80SRyan Libby 	if (!(keg->uk_flags & UMA_ZFLAG_OFFPAGE)) {
24149b78b1f4SJeff Roberson 		size_t shsize;
24159b78b1f4SJeff Roberson 
24169b78b1f4SJeff Roberson 		shsize = slab_sizeof(keg->uk_ipers);
24179b78b1f4SJeff Roberson 		keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - shsize;
2418244f4554SBosko Milekic 		/*
2419244f4554SBosko Milekic 		 * The only way the following is possible is if with our
2420244f4554SBosko Milekic 		 * UMA_ALIGN_PTR adjustments we are now bigger than
2421244f4554SBosko Milekic 		 * UMA_SLAB_SIZE.  I haven't checked whether this is
2422244f4554SBosko Milekic 		 * mathematically possible for all cases, so we make
2423244f4554SBosko Milekic 		 * sure here anyway.
2424244f4554SBosko Milekic 		 */
24259b78b1f4SJeff Roberson 		KASSERT(keg->uk_pgoff + shsize <= PAGE_SIZE * keg->uk_ppera,
24263d5e3df7SGleb Smirnoff 		    ("zone %s ipers %d rsize %d size %d slab won't fit",
24273d5e3df7SGleb Smirnoff 		    zone->uz_name, keg->uk_ipers, keg->uk_rsize, keg->uk_size));
2428099a0e58SBosko Milekic 	}
2429099a0e58SBosko Milekic 
243054c5ae80SRyan Libby 	if (keg->uk_flags & UMA_ZFLAG_HASH)
24313b2f2cb8SAlexander Motin 		hash_alloc(&keg->uk_hash, 0);
2432099a0e58SBosko Milekic 
2433e63a1c2fSRyan Libby 	CTR3(KTR_UMA, "keg_ctor %p zone %s(%p)", keg, zone->uz_name, zone);
2434099a0e58SBosko Milekic 
2435099a0e58SBosko Milekic 	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
2436099a0e58SBosko Milekic 
2437111fbcd5SBryan Venteicher 	rw_wlock(&uma_rwlock);
2438099a0e58SBosko Milekic 	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
2439111fbcd5SBryan Venteicher 	rw_wunlock(&uma_rwlock);
2440b23f72e9SBrian Feldman 	return (0);
2441099a0e58SBosko Milekic }
2442099a0e58SBosko Milekic 
24432efcc8cbSGleb Smirnoff static void
2444a81c400eSJeff Roberson zone_kva_available(uma_zone_t zone, void *unused)
2445a81c400eSJeff Roberson {
2446a81c400eSJeff Roberson 	uma_keg_t keg;
2447a81c400eSJeff Roberson 
2448a81c400eSJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0)
2449a81c400eSJeff Roberson 		return;
2450a81c400eSJeff Roberson 	KEG_GET(zone, keg);
2451ec0d8280SRyan Libby 
2452ec0d8280SRyan Libby 	if (keg->uk_allocf == startup_alloc) {
2453ec0d8280SRyan Libby 		/* Switch to the real allocator. */
2454f96d4157SJeff Roberson 		if (keg->uk_flags & UMA_ZONE_PCPU)
2455f96d4157SJeff Roberson 			keg->uk_allocf = pcpu_page_alloc;
2456ec0d8280SRyan Libby 		else if ((keg->uk_flags & UMA_ZONE_CONTIG) != 0 &&
2457ec0d8280SRyan Libby 		    keg->uk_ppera > 1)
2458ec0d8280SRyan Libby 			keg->uk_allocf = contig_alloc;
2459ec0d8280SRyan Libby 		else
2460a81c400eSJeff Roberson 			keg->uk_allocf = page_alloc;
2461a81c400eSJeff Roberson 	}
2462ec0d8280SRyan Libby }
2463a81c400eSJeff Roberson 
2464a81c400eSJeff Roberson static void
246520a4e154SJeff Roberson zone_alloc_counters(uma_zone_t zone, void *unused)
24662efcc8cbSGleb Smirnoff {
24672efcc8cbSGleb Smirnoff 
24682efcc8cbSGleb Smirnoff 	zone->uz_allocs = counter_u64_alloc(M_WAITOK);
24692efcc8cbSGleb Smirnoff 	zone->uz_frees = counter_u64_alloc(M_WAITOK);
24702efcc8cbSGleb Smirnoff 	zone->uz_fails = counter_u64_alloc(M_WAITOK);
2471c6fd3e23SJeff Roberson 	zone->uz_xdomain = counter_u64_alloc(M_WAITOK);
24722efcc8cbSGleb Smirnoff }
24732efcc8cbSGleb Smirnoff 
247420a4e154SJeff Roberson static void
247520a4e154SJeff Roberson zone_alloc_sysctl(uma_zone_t zone, void *unused)
247620a4e154SJeff Roberson {
247720a4e154SJeff Roberson 	uma_zone_domain_t zdom;
24788b987a77SJeff Roberson 	uma_domain_t dom;
247920a4e154SJeff Roberson 	uma_keg_t keg;
248020a4e154SJeff Roberson 	struct sysctl_oid *oid, *domainoid;
24813b490537SJeff Roberson 	int domains, i, cnt;
248220a4e154SJeff Roberson 	static const char *nokeg = "cache zone";
248320a4e154SJeff Roberson 	char *c;
248420a4e154SJeff Roberson 
248520a4e154SJeff Roberson 	/*
248620a4e154SJeff Roberson 	 * Make a sysctl safe copy of the zone name by removing
248720a4e154SJeff Roberson 	 * any special characters and handling dups by appending
248820a4e154SJeff Roberson 	 * an index.
248920a4e154SJeff Roberson 	 */
249020a4e154SJeff Roberson 	if (zone->uz_namecnt != 0) {
24913b490537SJeff Roberson 		/* Count the number of decimal digits and '_' separator. */
24923b490537SJeff Roberson 		for (i = 1, cnt = zone->uz_namecnt; cnt != 0; i++)
24933b490537SJeff Roberson 			cnt /= 10;
24943b490537SJeff Roberson 		zone->uz_ctlname = malloc(strlen(zone->uz_name) + i + 1,
24953b490537SJeff Roberson 		    M_UMA, M_WAITOK);
249620a4e154SJeff Roberson 		sprintf(zone->uz_ctlname, "%s_%d", zone->uz_name,
249720a4e154SJeff Roberson 		    zone->uz_namecnt);
249820a4e154SJeff Roberson 	} else
249920a4e154SJeff Roberson 		zone->uz_ctlname = strdup(zone->uz_name, M_UMA);
250020a4e154SJeff Roberson 	for (c = zone->uz_ctlname; *c != '\0'; c++)
250120a4e154SJeff Roberson 		if (strchr("./\\ -", *c) != NULL)
250220a4e154SJeff Roberson 			*c = '_';
250320a4e154SJeff Roberson 
250420a4e154SJeff Roberson 	/*
250520a4e154SJeff Roberson 	 * Basic parameters at the root.
250620a4e154SJeff Roberson 	 */
250720a4e154SJeff Roberson 	zone->uz_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_vm_uma),
25087029da5cSPawel Biernacki 	    OID_AUTO, zone->uz_ctlname, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
250920a4e154SJeff Roberson 	oid = zone->uz_oid;
251020a4e154SJeff Roberson 	SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
251120a4e154SJeff Roberson 	    "size", CTLFLAG_RD, &zone->uz_size, 0, "Allocation size");
25126d204a6aSRyan Libby 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
25136d204a6aSRyan Libby 	    "flags", CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_MPSAFE,
25146d204a6aSRyan Libby 	    zone, 0, sysctl_handle_uma_zone_flags, "A",
251520a4e154SJeff Roberson 	    "Allocator configuration flags");
251620a4e154SJeff Roberson 	SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
251720a4e154SJeff Roberson 	    "bucket_size", CTLFLAG_RD, &zone->uz_bucket_size, 0,
251820a4e154SJeff Roberson 	    "Desired per-cpu cache size");
251920a4e154SJeff Roberson 	SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
252020a4e154SJeff Roberson 	    "bucket_size_max", CTLFLAG_RD, &zone->uz_bucket_size_max, 0,
252120a4e154SJeff Roberson 	    "Maximum allowed per-cpu cache size");
252220a4e154SJeff Roberson 
252320a4e154SJeff Roberson 	/*
252420a4e154SJeff Roberson 	 * keg if present.
252520a4e154SJeff Roberson 	 */
252654c5ae80SRyan Libby 	if ((zone->uz_flags & UMA_ZFLAG_HASH) == 0)
25278b987a77SJeff Roberson 		domains = vm_ndomains;
25288b987a77SJeff Roberson 	else
25298b987a77SJeff Roberson 		domains = 1;
253020a4e154SJeff Roberson 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
25317029da5cSPawel Biernacki 	    "keg", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
253220a4e154SJeff Roberson 	keg = zone->uz_keg;
25333b490537SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0) {
253420a4e154SJeff Roberson 		SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
253520a4e154SJeff Roberson 		    "name", CTLFLAG_RD, keg->uk_name, "Keg name");
253620a4e154SJeff Roberson 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
253720a4e154SJeff Roberson 		    "rsize", CTLFLAG_RD, &keg->uk_rsize, 0,
253820a4e154SJeff Roberson 		    "Real object size with alignment");
253920a4e154SJeff Roberson 		SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
254020a4e154SJeff Roberson 		    "ppera", CTLFLAG_RD, &keg->uk_ppera, 0,
254120a4e154SJeff Roberson 		    "pages per-slab allocation");
254220a4e154SJeff Roberson 		SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
254320a4e154SJeff Roberson 		    "ipers", CTLFLAG_RD, &keg->uk_ipers, 0,
254420a4e154SJeff Roberson 		    "items available per-slab");
254520a4e154SJeff Roberson 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
254620a4e154SJeff Roberson 		    "align", CTLFLAG_RD, &keg->uk_align, 0,
254720a4e154SJeff Roberson 		    "item alignment mask");
2548f09cbea3SMark Johnston 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2549f09cbea3SMark Johnston 		    "reserve", CTLFLAG_RD, &keg->uk_reserve, 0,
2550f09cbea3SMark Johnston 		    "number of reserved items");
2551f7af5015SRyan Libby 		SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2552f7af5015SRyan Libby 		    "efficiency", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE,
2553f7af5015SRyan Libby 		    keg, 0, sysctl_handle_uma_slab_efficiency, "I",
2554f7af5015SRyan Libby 		    "Slab utilization (100 - internal fragmentation %)");
25558b987a77SJeff Roberson 		domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(oid),
25567029da5cSPawel Biernacki 		    OID_AUTO, "domain", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
25578b987a77SJeff Roberson 		for (i = 0; i < domains; i++) {
25588b987a77SJeff Roberson 			dom = &keg->uk_domain[i];
25598b987a77SJeff Roberson 			oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
25607029da5cSPawel Biernacki 			    OID_AUTO, VM_DOMAIN(i)->vmd_name,
25617029da5cSPawel Biernacki 			    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
25628b987a77SJeff Roberson 			SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
25638b987a77SJeff Roberson 			    "pages", CTLFLAG_RD, &dom->ud_pages, 0,
25648b987a77SJeff Roberson 			    "Total pages currently allocated from VM");
25658b987a77SJeff Roberson 			SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
25664ab3aee8SMark Johnston 			    "free_items", CTLFLAG_RD, &dom->ud_free_items, 0,
25678b987a77SJeff Roberson 			    "items free in the slab layer");
25688b987a77SJeff Roberson 		}
256920a4e154SJeff Roberson 	} else
257020a4e154SJeff Roberson 		SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
257120a4e154SJeff Roberson 		    "name", CTLFLAG_RD, nokeg, "Keg name");
257220a4e154SJeff Roberson 
257320a4e154SJeff Roberson 	/*
257420a4e154SJeff Roberson 	 * Information about zone limits.
257520a4e154SJeff Roberson 	 */
257620a4e154SJeff Roberson 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
25777029da5cSPawel Biernacki 	    "limit", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
25784bd61e19SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
25794bd61e19SJeff Roberson 	    "items", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
25804bd61e19SJeff Roberson 	    zone, 0, sysctl_handle_uma_zone_items, "QU",
2581e574d407SMark Johnston 	    "Current number of allocated items if limit is set");
258220a4e154SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
258320a4e154SJeff Roberson 	    "max_items", CTLFLAG_RD, &zone->uz_max_items, 0,
2584e574d407SMark Johnston 	    "Maximum number of allocated and cached items");
258520a4e154SJeff Roberson 	SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
258620a4e154SJeff Roberson 	    "sleepers", CTLFLAG_RD, &zone->uz_sleepers, 0,
258720a4e154SJeff Roberson 	    "Number of threads sleeping at limit");
258820a4e154SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
258920a4e154SJeff Roberson 	    "sleeps", CTLFLAG_RD, &zone->uz_sleeps, 0,
259020a4e154SJeff Roberson 	    "Total zone limit sleeps");
25914bd61e19SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2592c6fd3e23SJeff Roberson 	    "bucket_max", CTLFLAG_RD, &zone->uz_bucket_max, 0,
2593c6fd3e23SJeff Roberson 	    "Maximum number of items in each domain's bucket cache");
259420a4e154SJeff Roberson 
259520a4e154SJeff Roberson 	/*
25968b987a77SJeff Roberson 	 * Per-domain zone information.
259720a4e154SJeff Roberson 	 */
259820a4e154SJeff Roberson 	domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid),
25997029da5cSPawel Biernacki 	    OID_AUTO, "domain", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
260020a4e154SJeff Roberson 	for (i = 0; i < domains; i++) {
2601c6fd3e23SJeff Roberson 		zdom = ZDOM_GET(zone, i);
260220a4e154SJeff Roberson 		oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
26037029da5cSPawel Biernacki 		    OID_AUTO, VM_DOMAIN(i)->vmd_name,
26047029da5cSPawel Biernacki 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
260520a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
260620a4e154SJeff Roberson 		    "nitems", CTLFLAG_RD, &zdom->uzd_nitems,
260720a4e154SJeff Roberson 		    "number of items in this domain");
260820a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
260920a4e154SJeff Roberson 		    "imax", CTLFLAG_RD, &zdom->uzd_imax,
261020a4e154SJeff Roberson 		    "maximum item count in this period");
261120a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
261220a4e154SJeff Roberson 		    "imin", CTLFLAG_RD, &zdom->uzd_imin,
261320a4e154SJeff Roberson 		    "minimum item count in this period");
261420a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
261520a4e154SJeff Roberson 		    "wss", CTLFLAG_RD, &zdom->uzd_wss,
261620a4e154SJeff Roberson 		    "Working set size");
261720a4e154SJeff Roberson 	}
261820a4e154SJeff Roberson 
261920a4e154SJeff Roberson 	/*
262020a4e154SJeff Roberson 	 * General statistics.
262120a4e154SJeff Roberson 	 */
262220a4e154SJeff Roberson 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
26237029da5cSPawel Biernacki 	    "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
262420a4e154SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
262520a4e154SJeff Roberson 	    "current", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE,
262620a4e154SJeff Roberson 	    zone, 1, sysctl_handle_uma_zone_cur, "I",
262720a4e154SJeff Roberson 	    "Current number of allocated items");
262820a4e154SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
262920a4e154SJeff Roberson 	    "allocs", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
263020a4e154SJeff Roberson 	    zone, 0, sysctl_handle_uma_zone_allocs, "QU",
263120a4e154SJeff Roberson 	    "Total allocation calls");
263220a4e154SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
263320a4e154SJeff Roberson 	    "frees", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
263420a4e154SJeff Roberson 	    zone, 0, sysctl_handle_uma_zone_frees, "QU",
263520a4e154SJeff Roberson 	    "Total free calls");
263620a4e154SJeff Roberson 	SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
263720a4e154SJeff Roberson 	    "fails", CTLFLAG_RD, &zone->uz_fails,
263820a4e154SJeff Roberson 	    "Number of allocation failures");
2639c6fd3e23SJeff Roberson 	SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2640c6fd3e23SJeff Roberson 	    "xdomain", CTLFLAG_RD, &zone->uz_xdomain,
264120a4e154SJeff Roberson 	    "Free calls from the wrong domain");
264220a4e154SJeff Roberson }
264320a4e154SJeff Roberson 
264420a4e154SJeff Roberson struct uma_zone_count {
264520a4e154SJeff Roberson 	const char	*name;
264620a4e154SJeff Roberson 	int		count;
264720a4e154SJeff Roberson };
264820a4e154SJeff Roberson 
264920a4e154SJeff Roberson static void
265020a4e154SJeff Roberson zone_count(uma_zone_t zone, void *arg)
265120a4e154SJeff Roberson {
265220a4e154SJeff Roberson 	struct uma_zone_count *cnt;
265320a4e154SJeff Roberson 
265420a4e154SJeff Roberson 	cnt = arg;
26553b490537SJeff Roberson 	/*
26563b490537SJeff Roberson 	 * Some zones are rapidly created with identical names and
26573b490537SJeff Roberson 	 * destroyed out of order.  This can lead to gaps in the count.
26583b490537SJeff Roberson 	 * Use one greater than the maximum observed for this name.
26593b490537SJeff Roberson 	 */
266020a4e154SJeff Roberson 	if (strcmp(zone->uz_name, cnt->name) == 0)
26613b490537SJeff Roberson 		cnt->count = MAX(cnt->count,
26623b490537SJeff Roberson 		    zone->uz_namecnt + 1);
266320a4e154SJeff Roberson }
266420a4e154SJeff Roberson 
2665cc7ce83aSJeff Roberson static void
2666cc7ce83aSJeff Roberson zone_update_caches(uma_zone_t zone)
2667cc7ce83aSJeff Roberson {
2668cc7ce83aSJeff Roberson 	int i;
2669cc7ce83aSJeff Roberson 
2670cc7ce83aSJeff Roberson 	for (i = 0; i <= mp_maxid; i++) {
2671cc7ce83aSJeff Roberson 		cache_set_uz_size(&zone->uz_cpu[i], zone->uz_size);
2672cc7ce83aSJeff Roberson 		cache_set_uz_flags(&zone->uz_cpu[i], zone->uz_flags);
2673cc7ce83aSJeff Roberson 	}
2674cc7ce83aSJeff Roberson }
2675cc7ce83aSJeff Roberson 
2676099a0e58SBosko Milekic /*
2677099a0e58SBosko Milekic  * Zone header ctor.  This initializes all fields, locks, etc.
2678099a0e58SBosko Milekic  *
2679099a0e58SBosko Milekic  * Arguments/Returns follow uma_ctor specifications
2680099a0e58SBosko Milekic  *	udata  Actually uma_zctor_args
26818355f576SJeff Roberson  */
2682b23f72e9SBrian Feldman static int
2683b23f72e9SBrian Feldman zone_ctor(void *mem, int size, void *udata, int flags)
26848355f576SJeff Roberson {
268520a4e154SJeff Roberson 	struct uma_zone_count cnt;
26868355f576SJeff Roberson 	struct uma_zctor_args *arg = udata;
2687c6fd3e23SJeff Roberson 	uma_zone_domain_t zdom;
26888355f576SJeff Roberson 	uma_zone_t zone = mem;
2689099a0e58SBosko Milekic 	uma_zone_t z;
2690099a0e58SBosko Milekic 	uma_keg_t keg;
269108cfa56eSMark Johnston 	int i;
26928355f576SJeff Roberson 
26938355f576SJeff Roberson 	bzero(zone, size);
26948355f576SJeff Roberson 	zone->uz_name = arg->name;
26958355f576SJeff Roberson 	zone->uz_ctor = arg->ctor;
26968355f576SJeff Roberson 	zone->uz_dtor = arg->dtor;
2697099a0e58SBosko Milekic 	zone->uz_init = NULL;
2698099a0e58SBosko Milekic 	zone->uz_fini = NULL;
2699bf965959SSean Bruno 	zone->uz_sleeps = 0;
270020a4e154SJeff Roberson 	zone->uz_bucket_size = 0;
270120a4e154SJeff Roberson 	zone->uz_bucket_size_min = 0;
270220a4e154SJeff Roberson 	zone->uz_bucket_size_max = BUCKET_MAX;
2703d4665eaaSJeff Roberson 	zone->uz_flags = (arg->flags & UMA_ZONE_SMR);
27042f891cd5SPawel Jakub Dawidek 	zone->uz_warning = NULL;
2705ab3185d1SJeff Roberson 	/* The domain structures follow the cpu structures. */
2706c6fd3e23SJeff Roberson 	zone->uz_bucket_max = ULONG_MAX;
27072f891cd5SPawel Jakub Dawidek 	timevalclear(&zone->uz_ratecheck);
2708af526374SJeff Roberson 
270920a4e154SJeff Roberson 	/* Count the number of duplicate names. */
271020a4e154SJeff Roberson 	cnt.name = arg->name;
271120a4e154SJeff Roberson 	cnt.count = 0;
271220a4e154SJeff Roberson 	zone_foreach(zone_count, &cnt);
271320a4e154SJeff Roberson 	zone->uz_namecnt = cnt.count;
271491d947bfSJeff Roberson 	ZONE_CROSS_LOCK_INIT(zone);
27152efcc8cbSGleb Smirnoff 
2716c6fd3e23SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
2717c6fd3e23SJeff Roberson 		zdom = ZDOM_GET(zone, i);
2718c6fd3e23SJeff Roberson 		ZDOM_LOCK_INIT(zone, zdom, (arg->flags & UMA_ZONE_MTXCLASS));
2719c6fd3e23SJeff Roberson 		STAILQ_INIT(&zdom->uzd_buckets);
2720c6fd3e23SJeff Roberson 	}
272108cfa56eSMark Johnston 
272209c8cb71SMark Johnston #if defined(INVARIANTS) && !defined(KASAN)
2723ca293436SRyan Libby 	if (arg->uminit == trash_init && arg->fini == trash_fini)
2724cc7ce83aSJeff Roberson 		zone->uz_flags |= UMA_ZFLAG_TRASH | UMA_ZFLAG_CTORDTOR;
272509c8cb71SMark Johnston #elif defined(KASAN)
272609c8cb71SMark Johnston 	if ((arg->flags & (UMA_ZONE_NOFREE | UMA_ZFLAG_CACHE)) != 0)
272709c8cb71SMark Johnston 		arg->flags |= UMA_ZONE_NOKASAN;
2728ca293436SRyan Libby #endif
2729ca293436SRyan Libby 
27300095a784SJeff Roberson 	/*
27310095a784SJeff Roberson 	 * This is a pure cache zone, no kegs.
27320095a784SJeff Roberson 	 */
27330095a784SJeff Roberson 	if (arg->import) {
2734727c6918SJeff Roberson 		KASSERT((arg->flags & UMA_ZFLAG_CACHE) != 0,
2735727c6918SJeff Roberson 		    ("zone_ctor: Import specified for non-cache zone."));
27366fd34d6fSJeff Roberson 		zone->uz_flags = arg->flags;
2737af526374SJeff Roberson 		zone->uz_size = arg->size;
27380095a784SJeff Roberson 		zone->uz_import = arg->import;
27390095a784SJeff Roberson 		zone->uz_release = arg->release;
27400095a784SJeff Roberson 		zone->uz_arg = arg->arg;
2741c6fd3e23SJeff Roberson #ifdef NUMA
2742c6fd3e23SJeff Roberson 		/*
2743c6fd3e23SJeff Roberson 		 * Cache zones are round-robin unless a policy is
2744c6fd3e23SJeff Roberson 		 * specified because they may have incompatible
2745c6fd3e23SJeff Roberson 		 * constraints.
2746c6fd3e23SJeff Roberson 		 */
2747c6fd3e23SJeff Roberson 		if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) == 0)
2748c6fd3e23SJeff Roberson 			zone->uz_flags |= UMA_ZONE_ROUNDROBIN;
2749c6fd3e23SJeff Roberson #endif
2750111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
275103175483SAlexander Motin 		LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
2752111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
2753af526374SJeff Roberson 		goto out;
27540095a784SJeff Roberson 	}
27550095a784SJeff Roberson 
27560095a784SJeff Roberson 	/*
27570095a784SJeff Roberson 	 * Use the regular zone/keg/slab allocator.
27580095a784SJeff Roberson 	 */
2759b75c4efcSAndrew Turner 	zone->uz_import = zone_import;
2760b75c4efcSAndrew Turner 	zone->uz_release = zone_release;
27610095a784SJeff Roberson 	zone->uz_arg = zone;
2762bb15d1c7SGleb Smirnoff 	keg = arg->keg;
27630095a784SJeff Roberson 
2764099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_SECONDARY) {
276520a4e154SJeff Roberson 		KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
276620a4e154SJeff Roberson 		    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
2767099a0e58SBosko Milekic 		KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
27688355f576SJeff Roberson 		zone->uz_init = arg->uminit;
2769e221e841SJeff Roberson 		zone->uz_fini = arg->fini;
2770e20a199fSJeff Roberson 		zone->uz_flags |= UMA_ZONE_SECONDARY;
2771111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
2772099a0e58SBosko Milekic 		ZONE_LOCK(zone);
2773099a0e58SBosko Milekic 		LIST_FOREACH(z, &keg->uk_zones, uz_link) {
2774099a0e58SBosko Milekic 			if (LIST_NEXT(z, uz_link) == NULL) {
2775099a0e58SBosko Milekic 				LIST_INSERT_AFTER(z, zone, uz_link);
2776099a0e58SBosko Milekic 				break;
2777099a0e58SBosko Milekic 			}
2778099a0e58SBosko Milekic 		}
2779099a0e58SBosko Milekic 		ZONE_UNLOCK(zone);
2780111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
2781e20a199fSJeff Roberson 	} else if (keg == NULL) {
2782e20a199fSJeff Roberson 		if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
2783e20a199fSJeff Roberson 		    arg->align, arg->flags)) == NULL)
2784b23f72e9SBrian Feldman 			return (ENOMEM);
2785099a0e58SBosko Milekic 	} else {
2786099a0e58SBosko Milekic 		struct uma_kctor_args karg;
2787b23f72e9SBrian Feldman 		int error;
2788099a0e58SBosko Milekic 
2789099a0e58SBosko Milekic 		/* We should only be here from uma_startup() */
2790099a0e58SBosko Milekic 		karg.size = arg->size;
2791099a0e58SBosko Milekic 		karg.uminit = arg->uminit;
2792099a0e58SBosko Milekic 		karg.fini = arg->fini;
2793099a0e58SBosko Milekic 		karg.align = arg->align;
2794d4665eaaSJeff Roberson 		karg.flags = (arg->flags & ~UMA_ZONE_SMR);
2795099a0e58SBosko Milekic 		karg.zone = zone;
2796b23f72e9SBrian Feldman 		error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
2797b23f72e9SBrian Feldman 		    flags);
2798b23f72e9SBrian Feldman 		if (error)
2799b23f72e9SBrian Feldman 			return (error);
2800099a0e58SBosko Milekic 	}
28010095a784SJeff Roberson 
280220a4e154SJeff Roberson 	/* Inherit properties from the keg. */
2803bb15d1c7SGleb Smirnoff 	zone->uz_keg = keg;
2804e20a199fSJeff Roberson 	zone->uz_size = keg->uk_size;
2805e20a199fSJeff Roberson 	zone->uz_flags |= (keg->uk_flags &
2806e20a199fSJeff Roberson 	    (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
28078355f576SJeff Roberson 
280820a4e154SJeff Roberson out:
2809dc2b3205SMark Johnston 	if (booted >= BOOT_PCPU) {
281020a4e154SJeff Roberson 		zone_alloc_counters(zone, NULL);
2811dc2b3205SMark Johnston 		if (booted >= BOOT_RUNNING)
281220a4e154SJeff Roberson 			zone_alloc_sysctl(zone, NULL);
281320a4e154SJeff Roberson 	} else {
281420a4e154SJeff Roberson 		zone->uz_allocs = EARLY_COUNTER;
281520a4e154SJeff Roberson 		zone->uz_frees = EARLY_COUNTER;
281620a4e154SJeff Roberson 		zone->uz_fails = EARLY_COUNTER;
2817099a0e58SBosko Milekic 	}
28188355f576SJeff Roberson 
2819d4665eaaSJeff Roberson 	/* Caller requests a private SMR context. */
2820d4665eaaSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
2821226dd6dbSJeff Roberson 		zone->uz_smr = smr_create(zone->uz_name, 0, 0);
2822d4665eaaSJeff Roberson 
28237e28037aSMark Johnston 	KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) !=
28247e28037aSMark Johnston 	    (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET),
28257e28037aSMark Johnston 	    ("Invalid zone flag combination"));
282620a4e154SJeff Roberson 	if (arg->flags & UMA_ZFLAG_INTERNAL)
282720a4e154SJeff Roberson 		zone->uz_bucket_size_max = zone->uz_bucket_size = 0;
282820a4e154SJeff Roberson 	if ((arg->flags & UMA_ZONE_MAXBUCKET) != 0)
282920a4e154SJeff Roberson 		zone->uz_bucket_size = BUCKET_MAX;
283020a4e154SJeff Roberson 	else if ((arg->flags & UMA_ZONE_NOBUCKET) != 0)
283120a4e154SJeff Roberson 		zone->uz_bucket_size = 0;
28327e28037aSMark Johnston 	else
283320a4e154SJeff Roberson 		zone->uz_bucket_size = bucket_select(zone->uz_size);
283420a4e154SJeff Roberson 	zone->uz_bucket_size_min = zone->uz_bucket_size;
2835cc7ce83aSJeff Roberson 	if (zone->uz_dtor != NULL || zone->uz_ctor != NULL)
2836cc7ce83aSJeff Roberson 		zone->uz_flags |= UMA_ZFLAG_CTORDTOR;
2837cc7ce83aSJeff Roberson 	zone_update_caches(zone);
2838fc03d22bSJeff Roberson 
2839b23f72e9SBrian Feldman 	return (0);
28408355f576SJeff Roberson }
28418355f576SJeff Roberson 
28428355f576SJeff Roberson /*
2843099a0e58SBosko Milekic  * Keg header dtor.  This frees all data, destroys locks, frees the hash
2844099a0e58SBosko Milekic  * table and removes the keg from the global list.
28459c2cd7e5SJeff Roberson  *
28469c2cd7e5SJeff Roberson  * Arguments/Returns follow uma_dtor specifications
28479c2cd7e5SJeff Roberson  *	udata  unused
28489c2cd7e5SJeff Roberson  */
2849099a0e58SBosko Milekic static void
2850099a0e58SBosko Milekic keg_dtor(void *arg, int size, void *udata)
2851099a0e58SBosko Milekic {
2852099a0e58SBosko Milekic 	uma_keg_t keg;
28538b987a77SJeff Roberson 	uint32_t free, pages;
28548b987a77SJeff Roberson 	int i;
28559c2cd7e5SJeff Roberson 
2856099a0e58SBosko Milekic 	keg = (uma_keg_t)arg;
28578b987a77SJeff Roberson 	free = pages = 0;
28588b987a77SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
28594ab3aee8SMark Johnston 		free += keg->uk_domain[i].ud_free_items;
28608b987a77SJeff Roberson 		pages += keg->uk_domain[i].ud_pages;
28618b987a77SJeff Roberson 		KEG_LOCK_FINI(keg, i);
2862099a0e58SBosko Milekic 	}
28637e240677SRyan Libby 	if (pages != 0)
28648b987a77SJeff Roberson 		printf("Freed UMA keg (%s) was not empty (%u items). "
28658b987a77SJeff Roberson 		    " Lost %u pages of memory.\n",
28668b987a77SJeff Roberson 		    keg->uk_name ? keg->uk_name : "",
28677e240677SRyan Libby 		    pages / keg->uk_ppera * keg->uk_ipers - free, pages);
2868099a0e58SBosko Milekic 
2869099a0e58SBosko Milekic 	hash_free(&keg->uk_hash);
2870099a0e58SBosko Milekic }
2871099a0e58SBosko Milekic 
2872099a0e58SBosko Milekic /*
2873099a0e58SBosko Milekic  * Zone header dtor.
2874099a0e58SBosko Milekic  *
2875099a0e58SBosko Milekic  * Arguments/Returns follow uma_dtor specifications
2876099a0e58SBosko Milekic  *	udata  unused
2877099a0e58SBosko Milekic  */
28789c2cd7e5SJeff Roberson static void
28799c2cd7e5SJeff Roberson zone_dtor(void *arg, int size, void *udata)
28809c2cd7e5SJeff Roberson {
28819c2cd7e5SJeff Roberson 	uma_zone_t zone;
2882099a0e58SBosko Milekic 	uma_keg_t keg;
2883c6fd3e23SJeff Roberson 	int i;
28849c2cd7e5SJeff Roberson 
28859c2cd7e5SJeff Roberson 	zone = (uma_zone_t)arg;
28869643769aSJeff Roberson 
288720a4e154SJeff Roberson 	sysctl_remove_oid(zone->uz_oid, 1, 1);
288820a4e154SJeff Roberson 
2889e20a199fSJeff Roberson 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
28909643769aSJeff Roberson 		cache_drain(zone);
2891099a0e58SBosko Milekic 
2892111fbcd5SBryan Venteicher 	rw_wlock(&uma_rwlock);
2893099a0e58SBosko Milekic 	LIST_REMOVE(zone, uz_link);
2894111fbcd5SBryan Venteicher 	rw_wunlock(&uma_rwlock);
28957b516613SJonathan T. Looney 	if ((zone->uz_flags & (UMA_ZONE_SECONDARY | UMA_ZFLAG_CACHE)) == 0) {
28967b516613SJonathan T. Looney 		keg = zone->uz_keg;
28977b516613SJonathan T. Looney 		keg->uk_reserve = 0;
28987b516613SJonathan T. Looney 	}
2899*aabe13f1SMark Johnston 	zone_reclaim(zone, UMA_ANYDOMAIN, M_WAITOK, true);
2900c6fd3e23SJeff Roberson 
2901e20a199fSJeff Roberson 	/*
2902323ad386STycho Nightingale 	 * We only destroy kegs from non secondary/non cache zones.
2903e20a199fSJeff Roberson 	 */
2904323ad386STycho Nightingale 	if ((zone->uz_flags & (UMA_ZONE_SECONDARY | UMA_ZFLAG_CACHE)) == 0) {
2905323ad386STycho Nightingale 		keg = zone->uz_keg;
2906111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
2907099a0e58SBosko Milekic 		LIST_REMOVE(keg, uk_link);
2908111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
29090095a784SJeff Roberson 		zone_free_item(kegs, keg, NULL, SKIP_NONE);
29109c2cd7e5SJeff Roberson 	}
29112efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_allocs);
29122efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_frees);
29132efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_fails);
2914c6fd3e23SJeff Roberson 	counter_u64_free(zone->uz_xdomain);
291520a4e154SJeff Roberson 	free(zone->uz_ctlname, M_UMA);
2916c6fd3e23SJeff Roberson 	for (i = 0; i < vm_ndomains; i++)
2917c6fd3e23SJeff Roberson 		ZDOM_LOCK_FINI(ZDOM_GET(zone, i));
291891d947bfSJeff Roberson 	ZONE_CROSS_LOCK_FINI(zone);
2919099a0e58SBosko Milekic }
2920099a0e58SBosko Milekic 
2921a81c400eSJeff Roberson static void
2922a81c400eSJeff Roberson zone_foreach_unlocked(void (*zfunc)(uma_zone_t, void *arg), void *arg)
2923a81c400eSJeff Roberson {
2924a81c400eSJeff Roberson 	uma_keg_t keg;
2925a81c400eSJeff Roberson 	uma_zone_t zone;
2926a81c400eSJeff Roberson 
2927a81c400eSJeff Roberson 	LIST_FOREACH(keg, &uma_kegs, uk_link) {
2928a81c400eSJeff Roberson 		LIST_FOREACH(zone, &keg->uk_zones, uz_link)
2929a81c400eSJeff Roberson 			zfunc(zone, arg);
2930a81c400eSJeff Roberson 	}
2931a81c400eSJeff Roberson 	LIST_FOREACH(zone, &uma_cachezones, uz_link)
2932a81c400eSJeff Roberson 		zfunc(zone, arg);
2933a81c400eSJeff Roberson }
2934a81c400eSJeff Roberson 
29359c2cd7e5SJeff Roberson /*
29368355f576SJeff Roberson  * Traverses every zone in the system and calls a callback
29378355f576SJeff Roberson  *
29388355f576SJeff Roberson  * Arguments:
29398355f576SJeff Roberson  *	zfunc  A pointer to a function which accepts a zone
29408355f576SJeff Roberson  *		as an argument.
29418355f576SJeff Roberson  *
29428355f576SJeff Roberson  * Returns:
29438355f576SJeff Roberson  *	Nothing
29448355f576SJeff Roberson  */
29458355f576SJeff Roberson static void
294620a4e154SJeff Roberson zone_foreach(void (*zfunc)(uma_zone_t, void *arg), void *arg)
29478355f576SJeff Roberson {
29488355f576SJeff Roberson 
2949111fbcd5SBryan Venteicher 	rw_rlock(&uma_rwlock);
2950a81c400eSJeff Roberson 	zone_foreach_unlocked(zfunc, arg);
2951111fbcd5SBryan Venteicher 	rw_runlock(&uma_rwlock);
29528355f576SJeff Roberson }
29538355f576SJeff Roberson 
2954f4bef67cSGleb Smirnoff /*
2955a81c400eSJeff Roberson  * Initialize the kernel memory allocator.  This is done after pages can be
2956a81c400eSJeff Roberson  * allocated but before general KVA is available.
2957f4bef67cSGleb Smirnoff  */
2958a81c400eSJeff Roberson void
2959a81c400eSJeff Roberson uma_startup1(vm_offset_t virtual_avail)
2960f4bef67cSGleb Smirnoff {
2961a81c400eSJeff Roberson 	struct uma_zctor_args args;
2962a81c400eSJeff Roberson 	size_t ksize, zsize, size;
2963c8b0a88bSJeff Roberson 	uma_keg_t primarykeg;
2964a81c400eSJeff Roberson 	uintptr_t m;
296581302f1dSMark Johnston 	int domain;
2966a81c400eSJeff Roberson 	uint8_t pflag;
2967a81c400eSJeff Roberson 
2968a81c400eSJeff Roberson 	bootstart = bootmem = virtual_avail;
2969a81c400eSJeff Roberson 
2970a81c400eSJeff Roberson 	rw_init(&uma_rwlock, "UMA lock");
2971a81c400eSJeff Roberson 	sx_init(&uma_reclaim_lock, "umareclaim");
2972f4bef67cSGleb Smirnoff 
2973f4bef67cSGleb Smirnoff 	ksize = sizeof(struct uma_keg) +
2974f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_domain) * vm_ndomains);
297579c9f942SJeff Roberson 	ksize = roundup(ksize, UMA_SUPER_ALIGN);
2976f4bef67cSGleb Smirnoff 	zsize = sizeof(struct uma_zone) +
2977f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_cache) * (mp_maxid + 1)) +
2978f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_zone_domain) * vm_ndomains);
297979c9f942SJeff Roberson 	zsize = roundup(zsize, UMA_SUPER_ALIGN);
2980f4bef67cSGleb Smirnoff 
2981a81c400eSJeff Roberson 	/* Allocate the zone of zones, zone of kegs, and zone of zones keg. */
2982a81c400eSJeff Roberson 	size = (zsize * 2) + ksize;
298381302f1dSMark Johnston 	for (domain = 0; domain < vm_ndomains; domain++) {
298481302f1dSMark Johnston 		m = (uintptr_t)startup_alloc(NULL, size, domain, &pflag,
298581302f1dSMark Johnston 		    M_NOWAIT | M_ZERO);
298681302f1dSMark Johnston 		if (m != 0)
298781302f1dSMark Johnston 			break;
298881302f1dSMark Johnston 	}
2989ab3185d1SJeff Roberson 	zones = (uma_zone_t)m;
299079c9f942SJeff Roberson 	m += zsize;
2991ab3185d1SJeff Roberson 	kegs = (uma_zone_t)m;
299279c9f942SJeff Roberson 	m += zsize;
2993c8b0a88bSJeff Roberson 	primarykeg = (uma_keg_t)m;
2994ab3185d1SJeff Roberson 
2995099a0e58SBosko Milekic 	/* "manually" create the initial zone */
29960095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
2997099a0e58SBosko Milekic 	args.name = "UMA Kegs";
2998ab3185d1SJeff Roberson 	args.size = ksize;
2999099a0e58SBosko Milekic 	args.ctor = keg_ctor;
3000099a0e58SBosko Milekic 	args.dtor = keg_dtor;
30018355f576SJeff Roberson 	args.uminit = zero_init;
30028355f576SJeff Roberson 	args.fini = NULL;
3003c8b0a88bSJeff Roberson 	args.keg = primarykeg;
300479c9f942SJeff Roberson 	args.align = UMA_SUPER_ALIGN - 1;
3005b60f5b79SJeff Roberson 	args.flags = UMA_ZFLAG_INTERNAL;
3006ab3185d1SJeff Roberson 	zone_ctor(kegs, zsize, &args, M_WAITOK);
30078355f576SJeff Roberson 
3008099a0e58SBosko Milekic 	args.name = "UMA Zones";
3009f4bef67cSGleb Smirnoff 	args.size = zsize;
3010099a0e58SBosko Milekic 	args.ctor = zone_ctor;
3011099a0e58SBosko Milekic 	args.dtor = zone_dtor;
3012099a0e58SBosko Milekic 	args.uminit = zero_init;
3013099a0e58SBosko Milekic 	args.fini = NULL;
3014099a0e58SBosko Milekic 	args.keg = NULL;
301579c9f942SJeff Roberson 	args.align = UMA_SUPER_ALIGN - 1;
3016099a0e58SBosko Milekic 	args.flags = UMA_ZFLAG_INTERNAL;
3017ab3185d1SJeff Roberson 	zone_ctor(zones, zsize, &args, M_WAITOK);
3018099a0e58SBosko Milekic 
30199b8db4d0SRyan Libby 	/* Now make zones for slab headers */
30209b8db4d0SRyan Libby 	slabzones[0] = uma_zcreate("UMA Slabs 0", SLABZONE0_SIZE,
30219b8db4d0SRyan Libby 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
30229b8db4d0SRyan Libby 	slabzones[1] = uma_zcreate("UMA Slabs 1", SLABZONE1_SIZE,
30231e0701e1SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
30248355f576SJeff Roberson 
30258355f576SJeff Roberson 	hashzone = uma_zcreate("UMA Hash",
30268355f576SJeff Roberson 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
30271e0701e1SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
30288355f576SJeff Roberson 
3029a81c400eSJeff Roberson 	bucket_init();
3030d4665eaaSJeff Roberson 	smr_init();
30318355f576SJeff Roberson }
30328355f576SJeff Roberson 
3033a81c400eSJeff Roberson #ifndef UMA_MD_SMALL_ALLOC
3034a81c400eSJeff Roberson extern void vm_radix_reserve_kva(void);
3035f4bef67cSGleb Smirnoff #endif
3036f4bef67cSGleb Smirnoff 
3037a81c400eSJeff Roberson /*
3038a81c400eSJeff Roberson  * Advertise the availability of normal kva allocations and switch to
3039a81c400eSJeff Roberson  * the default back-end allocator.  Marks the KVA we consumed on startup
3040a81c400eSJeff Roberson  * as used in the map.
3041a81c400eSJeff Roberson  */
30428355f576SJeff Roberson void
304399571dc3SJeff Roberson uma_startup2(void)
30448355f576SJeff Roberson {
3045f4bef67cSGleb Smirnoff 
3046530cc6a2SJeff Roberson 	if (bootstart != bootmem) {
3047a81c400eSJeff Roberson 		vm_map_lock(kernel_map);
3048a81c400eSJeff Roberson 		(void)vm_map_insert(kernel_map, NULL, 0, bootstart, bootmem,
3049a81c400eSJeff Roberson 		    VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
3050a81c400eSJeff Roberson 		vm_map_unlock(kernel_map);
3051a81c400eSJeff Roberson 	}
3052a81c400eSJeff Roberson 
3053a81c400eSJeff Roberson #ifndef UMA_MD_SMALL_ALLOC
3054a81c400eSJeff Roberson 	/* Set up radix zone to use noobj_alloc. */
3055a81c400eSJeff Roberson 	vm_radix_reserve_kva();
3056f7d35785SGleb Smirnoff #endif
3057a81c400eSJeff Roberson 
3058a81c400eSJeff Roberson 	booted = BOOT_KVA;
3059a81c400eSJeff Roberson 	zone_foreach_unlocked(zone_kva_available, NULL);
3060f4bef67cSGleb Smirnoff 	bucket_enable();
30618355f576SJeff Roberson }
30628355f576SJeff Roberson 
3063a81c400eSJeff Roberson /*
3064dc2b3205SMark Johnston  * Allocate counters as early as possible so that boot-time allocations are
3065dc2b3205SMark Johnston  * accounted more precisely.
3066dc2b3205SMark Johnston  */
3067dc2b3205SMark Johnston static void
3068dc2b3205SMark Johnston uma_startup_pcpu(void *arg __unused)
3069dc2b3205SMark Johnston {
3070dc2b3205SMark Johnston 
3071dc2b3205SMark Johnston 	zone_foreach_unlocked(zone_alloc_counters, NULL);
3072dc2b3205SMark Johnston 	booted = BOOT_PCPU;
3073dc2b3205SMark Johnston }
3074dc2b3205SMark Johnston SYSINIT(uma_startup_pcpu, SI_SUB_COUNTER, SI_ORDER_ANY, uma_startup_pcpu, NULL);
3075dc2b3205SMark Johnston 
3076dc2b3205SMark Johnston /*
3077a81c400eSJeff Roberson  * Finish our initialization steps.
3078a81c400eSJeff Roberson  */
30798355f576SJeff Roberson static void
3080dc2b3205SMark Johnston uma_startup3(void *arg __unused)
30818355f576SJeff Roberson {
30821431a748SGleb Smirnoff 
3083c5deaf04SGleb Smirnoff #ifdef INVARIANTS
3084c5deaf04SGleb Smirnoff 	TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor);
3085c5deaf04SGleb Smirnoff 	uma_dbg_cnt = counter_u64_alloc(M_WAITOK);
3086c5deaf04SGleb Smirnoff 	uma_skip_cnt = counter_u64_alloc(M_WAITOK);
3087c5deaf04SGleb Smirnoff #endif
3088a81c400eSJeff Roberson 	zone_foreach_unlocked(zone_alloc_sysctl, NULL);
3089fd90e2edSJung-uk Kim 	callout_init(&uma_callout, 1);
30909643769aSJeff Roberson 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
3091c5deaf04SGleb Smirnoff 	booted = BOOT_RUNNING;
3092860bb7a0SMark Johnston 
3093860bb7a0SMark Johnston 	EVENTHANDLER_REGISTER(shutdown_post_sync, uma_shutdown, NULL,
3094860bb7a0SMark Johnston 	    EVENTHANDLER_PRI_FIRST);
3095860bb7a0SMark Johnston }
3096dc2b3205SMark Johnston SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
3097860bb7a0SMark Johnston 
3098860bb7a0SMark Johnston static void
3099860bb7a0SMark Johnston uma_shutdown(void)
3100860bb7a0SMark Johnston {
3101860bb7a0SMark Johnston 
3102860bb7a0SMark Johnston 	booted = BOOT_SHUTDOWN;
31038355f576SJeff Roberson }
31048355f576SJeff Roberson 
3105e20a199fSJeff Roberson static uma_keg_t
3106099a0e58SBosko Milekic uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
310785dcf349SGleb Smirnoff 		int align, uint32_t flags)
3108099a0e58SBosko Milekic {
3109099a0e58SBosko Milekic 	struct uma_kctor_args args;
3110099a0e58SBosko Milekic 
3111099a0e58SBosko Milekic 	args.size = size;
3112099a0e58SBosko Milekic 	args.uminit = uminit;
3113099a0e58SBosko Milekic 	args.fini = fini;
31141e319f6dSRobert Watson 	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
3115099a0e58SBosko Milekic 	args.flags = flags;
3116099a0e58SBosko Milekic 	args.zone = zone;
3117ab3185d1SJeff Roberson 	return (zone_alloc_item(kegs, &args, UMA_ANYDOMAIN, M_WAITOK));
3118099a0e58SBosko Milekic }
3119099a0e58SBosko Milekic 
3120f4bef67cSGleb Smirnoff /* Public functions */
31218355f576SJeff Roberson /* See uma.h */
31221e319f6dSRobert Watson void
31231e319f6dSRobert Watson uma_set_align(int align)
31241e319f6dSRobert Watson {
31251e319f6dSRobert Watson 
31261e319f6dSRobert Watson 	if (align != UMA_ALIGN_CACHE)
31271e319f6dSRobert Watson 		uma_align_cache = align;
31281e319f6dSRobert Watson }
31291e319f6dSRobert Watson 
31301e319f6dSRobert Watson /* See uma.h */
31318355f576SJeff Roberson uma_zone_t
3132bb196eb4SMatthew D Fleming uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
313385dcf349SGleb Smirnoff 		uma_init uminit, uma_fini fini, int align, uint32_t flags)
31348355f576SJeff Roberson 
31358355f576SJeff Roberson {
31368355f576SJeff Roberson 	struct uma_zctor_args args;
313795c4bf75SKonstantin Belousov 	uma_zone_t res;
31388355f576SJeff Roberson 
3139a5a35578SJohn Baldwin 	KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"",
3140a5a35578SJohn Baldwin 	    align, name));
3141a5a35578SJohn Baldwin 
31428355f576SJeff Roberson 	/* This stuff is essential for the zone ctor */
31430095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
31448355f576SJeff Roberson 	args.name = name;
31458355f576SJeff Roberson 	args.size = size;
31468355f576SJeff Roberson 	args.ctor = ctor;
31478355f576SJeff Roberson 	args.dtor = dtor;
31488355f576SJeff Roberson 	args.uminit = uminit;
31498355f576SJeff Roberson 	args.fini = fini;
315009c8cb71SMark Johnston #if defined(INVARIANTS) && !defined(KASAN)
3151afc6dc36SJohn-Mark Gurney 	/*
3152ca293436SRyan Libby 	 * Inject procedures which check for memory use after free if we are
3153ca293436SRyan Libby 	 * allowed to scramble the memory while it is not allocated.  This
3154ca293436SRyan Libby 	 * requires that: UMA is actually able to access the memory, no init
3155ca293436SRyan Libby 	 * or fini procedures, no dependency on the initial value of the
3156ca293436SRyan Libby 	 * memory, and no (legitimate) use of the memory after free.  Note,
3157ca293436SRyan Libby 	 * the ctor and dtor do not need to be empty.
3158afc6dc36SJohn-Mark Gurney 	 */
315954c5ae80SRyan Libby 	if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOTOUCH |
316054c5ae80SRyan Libby 	    UMA_ZONE_NOFREE))) && uminit == NULL && fini == NULL) {
3161afc6dc36SJohn-Mark Gurney 		args.uminit = trash_init;
3162afc6dc36SJohn-Mark Gurney 		args.fini = trash_fini;
3163afc6dc36SJohn-Mark Gurney 	}
3164afc6dc36SJohn-Mark Gurney #endif
31658355f576SJeff Roberson 	args.align = align;
31668355f576SJeff Roberson 	args.flags = flags;
3167099a0e58SBosko Milekic 	args.keg = NULL;
3168099a0e58SBosko Milekic 
3169*aabe13f1SMark Johnston 	sx_xlock(&uma_reclaim_lock);
3170ab3185d1SJeff Roberson 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
3171*aabe13f1SMark Johnston 	sx_xunlock(&uma_reclaim_lock);
3172a81c400eSJeff Roberson 
317395c4bf75SKonstantin Belousov 	return (res);
3174099a0e58SBosko Milekic }
3175099a0e58SBosko Milekic 
3176099a0e58SBosko Milekic /* See uma.h */
3177099a0e58SBosko Milekic uma_zone_t
31780464f16eSMark Johnston uma_zsecond_create(const char *name, uma_ctor ctor, uma_dtor dtor,
3179c8b0a88bSJeff Roberson     uma_init zinit, uma_fini zfini, uma_zone_t primary)
3180099a0e58SBosko Milekic {
3181099a0e58SBosko Milekic 	struct uma_zctor_args args;
3182e20a199fSJeff Roberson 	uma_keg_t keg;
318395c4bf75SKonstantin Belousov 	uma_zone_t res;
3184099a0e58SBosko Milekic 
3185c8b0a88bSJeff Roberson 	keg = primary->uz_keg;
31860095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
3187099a0e58SBosko Milekic 	args.name = name;
3188e20a199fSJeff Roberson 	args.size = keg->uk_size;
3189099a0e58SBosko Milekic 	args.ctor = ctor;
3190099a0e58SBosko Milekic 	args.dtor = dtor;
3191099a0e58SBosko Milekic 	args.uminit = zinit;
3192099a0e58SBosko Milekic 	args.fini = zfini;
3193e20a199fSJeff Roberson 	args.align = keg->uk_align;
3194e20a199fSJeff Roberson 	args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
3195e20a199fSJeff Roberson 	args.keg = keg;
31968355f576SJeff Roberson 
3197*aabe13f1SMark Johnston 	sx_xlock(&uma_reclaim_lock);
3198ab3185d1SJeff Roberson 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
3199*aabe13f1SMark Johnston 	sx_xunlock(&uma_reclaim_lock);
3200a81c400eSJeff Roberson 
320195c4bf75SKonstantin Belousov 	return (res);
32028355f576SJeff Roberson }
32038355f576SJeff Roberson 
32040095a784SJeff Roberson /* See uma.h */
32050095a784SJeff Roberson uma_zone_t
32060464f16eSMark Johnston uma_zcache_create(const char *name, int size, uma_ctor ctor, uma_dtor dtor,
32070464f16eSMark Johnston     uma_init zinit, uma_fini zfini, uma_import zimport, uma_release zrelease,
32080464f16eSMark Johnston     void *arg, int flags)
32090095a784SJeff Roberson {
32100095a784SJeff Roberson 	struct uma_zctor_args args;
32110095a784SJeff Roberson 
32120095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
32130095a784SJeff Roberson 	args.name = name;
3214af526374SJeff Roberson 	args.size = size;
32150095a784SJeff Roberson 	args.ctor = ctor;
32160095a784SJeff Roberson 	args.dtor = dtor;
32170095a784SJeff Roberson 	args.uminit = zinit;
32180095a784SJeff Roberson 	args.fini = zfini;
32190095a784SJeff Roberson 	args.import = zimport;
32200095a784SJeff Roberson 	args.release = zrelease;
32210095a784SJeff Roberson 	args.arg = arg;
32220095a784SJeff Roberson 	args.align = 0;
3223bb15d1c7SGleb Smirnoff 	args.flags = flags | UMA_ZFLAG_CACHE;
32240095a784SJeff Roberson 
3225ab3185d1SJeff Roberson 	return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
32260095a784SJeff Roberson }
32270095a784SJeff Roberson 
32288355f576SJeff Roberson /* See uma.h */
32299c2cd7e5SJeff Roberson void
32309c2cd7e5SJeff Roberson uma_zdestroy(uma_zone_t zone)
32319c2cd7e5SJeff Roberson {
3232f4ff923bSRobert Watson 
3233860bb7a0SMark Johnston 	/*
3234860bb7a0SMark Johnston 	 * Large slabs are expensive to reclaim, so don't bother doing
3235860bb7a0SMark Johnston 	 * unnecessary work if we're shutting down.
3236860bb7a0SMark Johnston 	 */
3237860bb7a0SMark Johnston 	if (booted == BOOT_SHUTDOWN &&
3238860bb7a0SMark Johnston 	    zone->uz_fini == NULL && zone->uz_release == zone_release)
3239860bb7a0SMark Johnston 		return;
3240*aabe13f1SMark Johnston 	sx_xlock(&uma_reclaim_lock);
32410095a784SJeff Roberson 	zone_free_item(zones, zone, NULL, SKIP_NONE);
3242*aabe13f1SMark Johnston 	sx_xunlock(&uma_reclaim_lock);
32439c2cd7e5SJeff Roberson }
32449c2cd7e5SJeff Roberson 
32458d6fbbb8SJeff Roberson void
32468d6fbbb8SJeff Roberson uma_zwait(uma_zone_t zone)
32478d6fbbb8SJeff Roberson {
32488d6fbbb8SJeff Roberson 
324970260874SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
325070260874SJeff Roberson 		uma_zfree_smr(zone, uma_zalloc_smr(zone, M_WAITOK));
325170260874SJeff Roberson 	else if ((zone->uz_flags & UMA_ZONE_PCPU) != 0)
325270260874SJeff Roberson 		uma_zfree_pcpu(zone, uma_zalloc_pcpu(zone, M_WAITOK));
325370260874SJeff Roberson 	else
325470260874SJeff Roberson 		uma_zfree(zone, uma_zalloc(zone, M_WAITOK));
32558d6fbbb8SJeff Roberson }
32568d6fbbb8SJeff Roberson 
32574e180881SMateusz Guzik void *
32584e180881SMateusz Guzik uma_zalloc_pcpu_arg(uma_zone_t zone, void *udata, int flags)
32594e180881SMateusz Guzik {
32603acb6572SMateusz Guzik 	void *item, *pcpu_item;
3261b4799947SRuslan Bukin #ifdef SMP
32624e180881SMateusz Guzik 	int i;
32634e180881SMateusz Guzik 
32644e180881SMateusz Guzik 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
3265b4799947SRuslan Bukin #endif
32664e180881SMateusz Guzik 	item = uma_zalloc_arg(zone, udata, flags & ~M_ZERO);
32673acb6572SMateusz Guzik 	if (item == NULL)
32683acb6572SMateusz Guzik 		return (NULL);
32693acb6572SMateusz Guzik 	pcpu_item = zpcpu_base_to_offset(item);
32703acb6572SMateusz Guzik 	if (flags & M_ZERO) {
3271b4799947SRuslan Bukin #ifdef SMP
3272013072f0SMark Johnston 		for (i = 0; i <= mp_maxid; i++)
32733acb6572SMateusz Guzik 			bzero(zpcpu_get_cpu(pcpu_item, i), zone->uz_size);
3274b4799947SRuslan Bukin #else
3275b4799947SRuslan Bukin 		bzero(item, zone->uz_size);
3276b4799947SRuslan Bukin #endif
32774e180881SMateusz Guzik 	}
32783acb6572SMateusz Guzik 	return (pcpu_item);
32794e180881SMateusz Guzik }
32804e180881SMateusz Guzik 
32814e180881SMateusz Guzik /*
32824e180881SMateusz Guzik  * A stub while both regular and pcpu cases are identical.
32834e180881SMateusz Guzik  */
32844e180881SMateusz Guzik void
32853acb6572SMateusz Guzik uma_zfree_pcpu_arg(uma_zone_t zone, void *pcpu_item, void *udata)
32864e180881SMateusz Guzik {
32873acb6572SMateusz Guzik 	void *item;
32884e180881SMateusz Guzik 
3289c5b7751fSIan Lepore #ifdef SMP
32904e180881SMateusz Guzik 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
3291c5b7751fSIan Lepore #endif
3292b8f7267dSKristof Provost 
3293b8f7267dSKristof Provost         /* uma_zfree_pcu_*(..., NULL) does nothing, to match free(9). */
3294b8f7267dSKristof Provost         if (pcpu_item == NULL)
3295b8f7267dSKristof Provost                 return;
3296b8f7267dSKristof Provost 
32973acb6572SMateusz Guzik 	item = zpcpu_offset_to_base(pcpu_item);
32984e180881SMateusz Guzik 	uma_zfree_arg(zone, item, udata);
32994e180881SMateusz Guzik }
33004e180881SMateusz Guzik 
3301d4665eaaSJeff Roberson static inline void *
3302d4665eaaSJeff Roberson item_ctor(uma_zone_t zone, int uz_flags, int size, void *udata, int flags,
3303d4665eaaSJeff Roberson     void *item)
3304beb8beefSJeff Roberson {
3305beb8beefSJeff Roberson #ifdef INVARIANTS
3306ca293436SRyan Libby 	bool skipdbg;
330709c8cb71SMark Johnston #endif
3308beb8beefSJeff Roberson 
330909c8cb71SMark Johnston 	kasan_mark_item_valid(zone, item);
331009c8cb71SMark Johnston 
331109c8cb71SMark Johnston #ifdef INVARIANTS
3312beb8beefSJeff Roberson 	skipdbg = uma_dbg_zskip(zone, item);
331309c8cb71SMark Johnston 	if (!skipdbg && (uz_flags & UMA_ZFLAG_TRASH) != 0 &&
3314ca293436SRyan Libby 	    zone->uz_ctor != trash_ctor)
3315cc7ce83aSJeff Roberson 		trash_ctor(item, size, udata, flags);
3316beb8beefSJeff Roberson #endif
331709c8cb71SMark Johnston 
3318d4665eaaSJeff Roberson 	/* Check flags before loading ctor pointer. */
3319d4665eaaSJeff Roberson 	if (__predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0) &&
3320d4665eaaSJeff Roberson 	    __predict_false(zone->uz_ctor != NULL) &&
3321cc7ce83aSJeff Roberson 	    zone->uz_ctor(item, size, udata, flags) != 0) {
3322beb8beefSJeff Roberson 		counter_u64_add(zone->uz_fails, 1);
3323beb8beefSJeff Roberson 		zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT);
3324beb8beefSJeff Roberson 		return (NULL);
3325beb8beefSJeff Roberson 	}
3326beb8beefSJeff Roberson #ifdef INVARIANTS
3327beb8beefSJeff Roberson 	if (!skipdbg)
3328beb8beefSJeff Roberson 		uma_dbg_alloc(zone, NULL, item);
3329beb8beefSJeff Roberson #endif
33306d88d784SJeff Roberson 	if (__predict_false(flags & M_ZERO))
33316d88d784SJeff Roberson 		return (memset(item, 0, size));
3332beb8beefSJeff Roberson 
3333beb8beefSJeff Roberson 	return (item);
3334beb8beefSJeff Roberson }
3335beb8beefSJeff Roberson 
3336ca293436SRyan Libby static inline void
3337cc7ce83aSJeff Roberson item_dtor(uma_zone_t zone, void *item, int size, void *udata,
3338cc7ce83aSJeff Roberson     enum zfreeskip skip)
3339ca293436SRyan Libby {
3340ca293436SRyan Libby #ifdef INVARIANTS
3341ca293436SRyan Libby 	bool skipdbg;
3342ca293436SRyan Libby 
3343ca293436SRyan Libby 	skipdbg = uma_dbg_zskip(zone, item);
3344ca293436SRyan Libby 	if (skip == SKIP_NONE && !skipdbg) {
3345ca293436SRyan Libby 		if ((zone->uz_flags & UMA_ZONE_MALLOC) != 0)
3346ca293436SRyan Libby 			uma_dbg_free(zone, udata, item);
3347ca293436SRyan Libby 		else
3348ca293436SRyan Libby 			uma_dbg_free(zone, NULL, item);
3349ca293436SRyan Libby 	}
3350ca293436SRyan Libby #endif
3351cc7ce83aSJeff Roberson 	if (__predict_true(skip < SKIP_DTOR)) {
3352ca293436SRyan Libby 		if (zone->uz_dtor != NULL)
3353cc7ce83aSJeff Roberson 			zone->uz_dtor(item, size, udata);
3354ca293436SRyan Libby #ifdef INVARIANTS
3355ca293436SRyan Libby 		if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
3356ca293436SRyan Libby 		    zone->uz_dtor != trash_dtor)
3357cc7ce83aSJeff Roberson 			trash_dtor(item, size, udata);
3358ca293436SRyan Libby #endif
3359ca293436SRyan Libby 	}
336009c8cb71SMark Johnston 	kasan_mark_item_invalid(zone, item);
3361ca293436SRyan Libby }
3362ca293436SRyan Libby 
33631c58c09fSMateusz Guzik #ifdef NUMA
336481302f1dSMark Johnston static int
336581302f1dSMark Johnston item_domain(void *item)
336681302f1dSMark Johnston {
336781302f1dSMark Johnston 	int domain;
336881302f1dSMark Johnston 
3369431fb8abSMark Johnston 	domain = vm_phys_domain(vtophys(item));
337081302f1dSMark Johnston 	KASSERT(domain >= 0 && domain < vm_ndomains,
337181302f1dSMark Johnston 	    ("%s: unknown domain for item %p", __func__, item));
337281302f1dSMark Johnston 	return (domain);
337381302f1dSMark Johnston }
33741c58c09fSMateusz Guzik #endif
337581302f1dSMark Johnston 
3376d4665eaaSJeff Roberson #if defined(INVARIANTS) || defined(DEBUG_MEMGUARD) || defined(WITNESS)
3377d4665eaaSJeff Roberson #define	UMA_ZALLOC_DEBUG
3378d4665eaaSJeff Roberson static int
3379d4665eaaSJeff Roberson uma_zalloc_debug(uma_zone_t zone, void **itemp, void *udata, int flags)
3380d4665eaaSJeff Roberson {
3381d4665eaaSJeff Roberson 	int error;
3382d4665eaaSJeff Roberson 
3383d4665eaaSJeff Roberson 	error = 0;
3384d4665eaaSJeff Roberson #ifdef WITNESS
3385d4665eaaSJeff Roberson 	if (flags & M_WAITOK) {
3386d4665eaaSJeff Roberson 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
3387d4665eaaSJeff Roberson 		    "uma_zalloc_debug: zone \"%s\"", zone->uz_name);
3388d4665eaaSJeff Roberson 	}
3389d4665eaaSJeff Roberson #endif
3390d4665eaaSJeff Roberson 
3391d4665eaaSJeff Roberson #ifdef INVARIANTS
3392d4665eaaSJeff Roberson 	KASSERT((flags & M_EXEC) == 0,
3393d4665eaaSJeff Roberson 	    ("uma_zalloc_debug: called with M_EXEC"));
3394d4665eaaSJeff Roberson 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
3395d4665eaaSJeff Roberson 	    ("uma_zalloc_debug: called within spinlock or critical section"));
3396d4665eaaSJeff Roberson 	KASSERT((zone->uz_flags & UMA_ZONE_PCPU) == 0 || (flags & M_ZERO) == 0,
3397d4665eaaSJeff Roberson 	    ("uma_zalloc_debug: allocating from a pcpu zone with M_ZERO"));
3398d4665eaaSJeff Roberson #endif
3399d4665eaaSJeff Roberson 
3400d4665eaaSJeff Roberson #ifdef DEBUG_MEMGUARD
34019e47b341SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) == 0 && memguard_cmp_zone(zone)) {
3402d4665eaaSJeff Roberson 		void *item;
3403d4665eaaSJeff Roberson 		item = memguard_alloc(zone->uz_size, flags);
3404d4665eaaSJeff Roberson 		if (item != NULL) {
3405d4665eaaSJeff Roberson 			error = EJUSTRETURN;
3406d4665eaaSJeff Roberson 			if (zone->uz_init != NULL &&
3407d4665eaaSJeff Roberson 			    zone->uz_init(item, zone->uz_size, flags) != 0) {
3408d4665eaaSJeff Roberson 				*itemp = NULL;
3409d4665eaaSJeff Roberson 				return (error);
3410d4665eaaSJeff Roberson 			}
3411d4665eaaSJeff Roberson 			if (zone->uz_ctor != NULL &&
3412d4665eaaSJeff Roberson 			    zone->uz_ctor(item, zone->uz_size, udata,
3413d4665eaaSJeff Roberson 			    flags) != 0) {
3414d4665eaaSJeff Roberson 				counter_u64_add(zone->uz_fails, 1);
3415d4665eaaSJeff Roberson 			    	zone->uz_fini(item, zone->uz_size);
3416d4665eaaSJeff Roberson 				*itemp = NULL;
3417d4665eaaSJeff Roberson 				return (error);
3418d4665eaaSJeff Roberson 			}
3419d4665eaaSJeff Roberson 			*itemp = item;
3420d4665eaaSJeff Roberson 			return (error);
3421d4665eaaSJeff Roberson 		}
3422d4665eaaSJeff Roberson 		/* This is unfortunate but should not be fatal. */
3423d4665eaaSJeff Roberson 	}
3424d4665eaaSJeff Roberson #endif
3425d4665eaaSJeff Roberson 	return (error);
3426d4665eaaSJeff Roberson }
3427d4665eaaSJeff Roberson 
3428d4665eaaSJeff Roberson static int
3429d4665eaaSJeff Roberson uma_zfree_debug(uma_zone_t zone, void *item, void *udata)
3430d4665eaaSJeff Roberson {
3431d4665eaaSJeff Roberson 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
3432d4665eaaSJeff Roberson 	    ("uma_zfree_debug: called with spinlock or critical section held"));
3433d4665eaaSJeff Roberson 
3434d4665eaaSJeff Roberson #ifdef DEBUG_MEMGUARD
34359e47b341SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) == 0 && is_memguard_addr(item)) {
3436d4665eaaSJeff Roberson 		if (zone->uz_dtor != NULL)
3437d4665eaaSJeff Roberson 			zone->uz_dtor(item, zone->uz_size, udata);
3438d4665eaaSJeff Roberson 		if (zone->uz_fini != NULL)
3439d4665eaaSJeff Roberson 			zone->uz_fini(item, zone->uz_size);
3440d4665eaaSJeff Roberson 		memguard_free(item);
3441d4665eaaSJeff Roberson 		return (EJUSTRETURN);
3442d4665eaaSJeff Roberson 	}
3443d4665eaaSJeff Roberson #endif
3444d4665eaaSJeff Roberson 	return (0);
3445d4665eaaSJeff Roberson }
3446d4665eaaSJeff Roberson #endif
3447d4665eaaSJeff Roberson 
34486d88d784SJeff Roberson static inline void *
34496d88d784SJeff Roberson cache_alloc_item(uma_zone_t zone, uma_cache_t cache, uma_cache_bucket_t bucket,
34506d88d784SJeff Roberson     void *udata, int flags)
3451d4665eaaSJeff Roberson {
34526d88d784SJeff Roberson 	void *item;
34536d88d784SJeff Roberson 	int size, uz_flags;
34546d88d784SJeff Roberson 
34556d88d784SJeff Roberson 	item = cache_bucket_pop(cache, bucket);
34566d88d784SJeff Roberson 	size = cache_uz_size(cache);
34576d88d784SJeff Roberson 	uz_flags = cache_uz_flags(cache);
34586d88d784SJeff Roberson 	critical_exit();
34596d88d784SJeff Roberson 	return (item_ctor(zone, uz_flags, size, udata, flags, item));
34606d88d784SJeff Roberson }
34616d88d784SJeff Roberson 
34626d88d784SJeff Roberson static __noinline void *
34636d88d784SJeff Roberson cache_alloc_retry(uma_zone_t zone, uma_cache_t cache, void *udata, int flags)
34646d88d784SJeff Roberson {
34656d88d784SJeff Roberson 	uma_cache_bucket_t bucket;
3466d4665eaaSJeff Roberson 	int domain;
3467d4665eaaSJeff Roberson 
34686d88d784SJeff Roberson 	while (cache_alloc(zone, cache, udata, flags)) {
34696d88d784SJeff Roberson 		cache = &zone->uz_cpu[curcpu];
34706d88d784SJeff Roberson 		bucket = &cache->uc_allocbucket;
34716d88d784SJeff Roberson 		if (__predict_false(bucket->ucb_cnt == 0))
34726d88d784SJeff Roberson 			continue;
34736d88d784SJeff Roberson 		return (cache_alloc_item(zone, cache, bucket, udata, flags));
34746d88d784SJeff Roberson 	}
34756d88d784SJeff Roberson 	critical_exit();
34766d88d784SJeff Roberson 
3477d4665eaaSJeff Roberson 	/*
3478d4665eaaSJeff Roberson 	 * We can not get a bucket so try to return a single item.
3479d4665eaaSJeff Roberson 	 */
3480d4665eaaSJeff Roberson 	if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH)
3481d4665eaaSJeff Roberson 		domain = PCPU_GET(domain);
3482d4665eaaSJeff Roberson 	else
3483d4665eaaSJeff Roberson 		domain = UMA_ANYDOMAIN;
3484d4665eaaSJeff Roberson 	return (zone_alloc_item(zone, udata, domain, flags));
3485d4665eaaSJeff Roberson }
3486d4665eaaSJeff Roberson 
3487d4665eaaSJeff Roberson /* See uma.h */
3488d4665eaaSJeff Roberson void *
3489d4665eaaSJeff Roberson uma_zalloc_smr(uma_zone_t zone, int flags)
3490d4665eaaSJeff Roberson {
3491d4665eaaSJeff Roberson 	uma_cache_bucket_t bucket;
3492d4665eaaSJeff Roberson 	uma_cache_t cache;
3493d4665eaaSJeff Roberson 
3494d4665eaaSJeff Roberson #ifdef UMA_ZALLOC_DEBUG
34956d88d784SJeff Roberson 	void *item;
34966d88d784SJeff Roberson 
3497d4665eaaSJeff Roberson 	KASSERT((zone->uz_flags & UMA_ZONE_SMR) != 0,
3498952c8964SMark Johnston 	    ("uma_zalloc_arg: called with non-SMR zone."));
3499d4665eaaSJeff Roberson 	if (uma_zalloc_debug(zone, &item, NULL, flags) == EJUSTRETURN)
3500d4665eaaSJeff Roberson 		return (item);
3501d4665eaaSJeff Roberson #endif
3502d4665eaaSJeff Roberson 
3503d4665eaaSJeff Roberson 	critical_enter();
3504d4665eaaSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
3505d4665eaaSJeff Roberson 	bucket = &cache->uc_allocbucket;
35066d88d784SJeff Roberson 	if (__predict_false(bucket->ucb_cnt == 0))
35076d88d784SJeff Roberson 		return (cache_alloc_retry(zone, cache, NULL, flags));
35086d88d784SJeff Roberson 	return (cache_alloc_item(zone, cache, bucket, NULL, flags));
3509d4665eaaSJeff Roberson }
3510d4665eaaSJeff Roberson 
35119c2cd7e5SJeff Roberson /* See uma.h */
35128355f576SJeff Roberson void *
35132cc35ff9SJeff Roberson uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
35148355f576SJeff Roberson {
3515376b1ba3SJeff Roberson 	uma_cache_bucket_t bucket;
3516ab3185d1SJeff Roberson 	uma_cache_t cache;
35178355f576SJeff Roberson 
3518e866d8f0SMark Murray 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
351919fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
352010cb2424SMark Murray 
35218355f576SJeff Roberson 	/* This is the fast path allocation */
3522e63a1c2fSRyan Libby 	CTR3(KTR_UMA, "uma_zalloc_arg zone %s(%p) flags %d", zone->uz_name,
3523e63a1c2fSRyan Libby 	    zone, flags);
3524a553d4b8SJeff Roberson 
3525d4665eaaSJeff Roberson #ifdef UMA_ZALLOC_DEBUG
35266d88d784SJeff Roberson 	void *item;
35276d88d784SJeff Roberson 
3528d4665eaaSJeff Roberson 	KASSERT((zone->uz_flags & UMA_ZONE_SMR) == 0,
3529952c8964SMark Johnston 	    ("uma_zalloc_arg: called with SMR zone."));
3530d4665eaaSJeff Roberson 	if (uma_zalloc_debug(zone, &item, udata, flags) == EJUSTRETURN)
35318d689e04SGleb Smirnoff 		return (item);
35328d689e04SGleb Smirnoff #endif
3533d4665eaaSJeff Roberson 
35345d1ae027SRobert Watson 	/*
35355d1ae027SRobert Watson 	 * If possible, allocate from the per-CPU cache.  There are two
35365d1ae027SRobert Watson 	 * requirements for safe access to the per-CPU cache: (1) the thread
35375d1ae027SRobert Watson 	 * accessing the cache must not be preempted or yield during access,
35385d1ae027SRobert Watson 	 * and (2) the thread must not migrate CPUs without switching which
35395d1ae027SRobert Watson 	 * cache it accesses.  We rely on a critical section to prevent
35405d1ae027SRobert Watson 	 * preemption and migration.  We release the critical section in
35415d1ae027SRobert Watson 	 * order to acquire the zone mutex if we are unable to allocate from
35425d1ae027SRobert Watson 	 * the current cache; when we re-acquire the critical section, we
35435d1ae027SRobert Watson 	 * must detect and handle migration if it has occurred.
35445d1ae027SRobert Watson 	 */
35455d1ae027SRobert Watson 	critical_enter();
3546cc7ce83aSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
3547376b1ba3SJeff Roberson 	bucket = &cache->uc_allocbucket;
35486d88d784SJeff Roberson 	if (__predict_false(bucket->ucb_cnt == 0))
35496d88d784SJeff Roberson 		return (cache_alloc_retry(zone, cache, udata, flags));
35506d88d784SJeff Roberson 	return (cache_alloc_item(zone, cache, bucket, udata, flags));
3551fc03d22bSJeff Roberson }
3552fc03d22bSJeff Roberson 
35538355f576SJeff Roberson /*
3554beb8beefSJeff Roberson  * Replenish an alloc bucket and possibly restore an old one.  Called in
3555beb8beefSJeff Roberson  * a critical section.  Returns in a critical section.
3556beb8beefSJeff Roberson  *
35574bd61e19SJeff Roberson  * A false return value indicates an allocation failure.
35584bd61e19SJeff Roberson  * A true return value indicates success and the caller should retry.
3559beb8beefSJeff Roberson  */
3560beb8beefSJeff Roberson static __noinline bool
3561beb8beefSJeff Roberson cache_alloc(uma_zone_t zone, uma_cache_t cache, void *udata, int flags)
3562beb8beefSJeff Roberson {
3563beb8beefSJeff Roberson 	uma_bucket_t bucket;
35648c277118SMark Johnston 	int curdomain, domain;
3565c6fd3e23SJeff Roberson 	bool new;
3566beb8beefSJeff Roberson 
3567beb8beefSJeff Roberson 	CRITICAL_ASSERT(curthread);
3568beb8beefSJeff Roberson 
3569beb8beefSJeff Roberson 	/*
3570beb8beefSJeff Roberson 	 * If we have run out of items in our alloc bucket see
3571beb8beefSJeff Roberson 	 * if we can switch with the free bucket.
3572d4665eaaSJeff Roberson 	 *
3573d4665eaaSJeff Roberson 	 * SMR Zones can't re-use the free bucket until the sequence has
3574d4665eaaSJeff Roberson 	 * expired.
35758355f576SJeff Roberson 	 */
3576c6fd3e23SJeff Roberson 	if ((cache_uz_flags(cache) & UMA_ZONE_SMR) == 0 &&
3577d4665eaaSJeff Roberson 	    cache->uc_freebucket.ucb_cnt != 0) {
3578d4665eaaSJeff Roberson 		cache_bucket_swap(&cache->uc_freebucket,
3579d4665eaaSJeff Roberson 		    &cache->uc_allocbucket);
3580beb8beefSJeff Roberson 		return (true);
35818355f576SJeff Roberson 	}
3582fc03d22bSJeff Roberson 
3583fc03d22bSJeff Roberson 	/*
3584fc03d22bSJeff Roberson 	 * Discard any empty allocation bucket while we hold no locks.
3585fc03d22bSJeff Roberson 	 */
3586376b1ba3SJeff Roberson 	bucket = cache_bucket_unload_alloc(cache);
3587fc03d22bSJeff Roberson 	critical_exit();
3588c6fd3e23SJeff Roberson 
3589c6fd3e23SJeff Roberson 	if (bucket != NULL) {
3590c6fd3e23SJeff Roberson 		KASSERT(bucket->ub_cnt == 0,
3591c6fd3e23SJeff Roberson 		    ("cache_alloc: Entered with non-empty alloc bucket."));
35926fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
3593c6fd3e23SJeff Roberson 	}
3594fc03d22bSJeff Roberson 
35955d1ae027SRobert Watson 	/*
35965d1ae027SRobert Watson 	 * Attempt to retrieve the item from the per-CPU cache has failed, so
3597c6fd3e23SJeff Roberson 	 * we must go back to the zone.  This requires the zdom lock, so we
35985d1ae027SRobert Watson 	 * must drop the critical section, then re-acquire it when we go back
35995d1ae027SRobert Watson 	 * to the cache.  Since the critical section is released, we may be
36005d1ae027SRobert Watson 	 * preempted or migrate.  As such, make sure not to maintain any
36015d1ae027SRobert Watson 	 * thread-local state specific to the cache from prior to releasing
36025d1ae027SRobert Watson 	 * the critical section.
36035d1ae027SRobert Watson 	 */
3604c1685086SJeff Roberson 	domain = PCPU_GET(domain);
36058c277118SMark Johnston 	if ((cache_uz_flags(cache) & UMA_ZONE_ROUNDROBIN) != 0 ||
36068c277118SMark Johnston 	    VM_DOMAIN_EMPTY(domain))
3607c6fd3e23SJeff Roberson 		domain = zone_domain_highest(zone, domain);
3608c6fd3e23SJeff Roberson 	bucket = cache_fetch_bucket(zone, cache, domain);
3609af32cefdSMark Johnston 	if (bucket == NULL && zone->uz_bucket_size != 0 && !bucketdisable) {
3610beb8beefSJeff Roberson 		bucket = zone_alloc_bucket(zone, udata, domain, flags);
3611c6fd3e23SJeff Roberson 		new = true;
3612af32cefdSMark Johnston 	} else {
3613c6fd3e23SJeff Roberson 		new = false;
3614af32cefdSMark Johnston 	}
3615c6fd3e23SJeff Roberson 
36161431a748SGleb Smirnoff 	CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
36171431a748SGleb Smirnoff 	    zone->uz_name, zone, bucket);
36184bd61e19SJeff Roberson 	if (bucket == NULL) {
3619fc03d22bSJeff Roberson 		critical_enter();
3620beb8beefSJeff Roberson 		return (false);
36214bd61e19SJeff Roberson 	}
36220f9b7bf3SMark Johnston 
3623fc03d22bSJeff Roberson 	/*
3624fc03d22bSJeff Roberson 	 * See if we lost the race or were migrated.  Cache the
3625fc03d22bSJeff Roberson 	 * initialized bucket to make this less likely or claim
3626fc03d22bSJeff Roberson 	 * the memory directly.
3627fc03d22bSJeff Roberson 	 */
36284bd61e19SJeff Roberson 	critical_enter();
3629cc7ce83aSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
3630376b1ba3SJeff Roberson 	if (cache->uc_allocbucket.ucb_bucket == NULL &&
3631c6fd3e23SJeff Roberson 	    ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) == 0 ||
36328c277118SMark Johnston 	    (curdomain = PCPU_GET(domain)) == domain ||
36338c277118SMark Johnston 	    VM_DOMAIN_EMPTY(curdomain))) {
3634c6fd3e23SJeff Roberson 		if (new)
3635c6fd3e23SJeff Roberson 			atomic_add_long(&ZDOM_GET(zone, domain)->uzd_imax,
3636c6fd3e23SJeff Roberson 			    bucket->ub_cnt);
3637376b1ba3SJeff Roberson 		cache_bucket_load_alloc(cache, bucket);
3638beb8beefSJeff Roberson 		return (true);
3639c6fd3e23SJeff Roberson 	}
3640c6fd3e23SJeff Roberson 
3641c6fd3e23SJeff Roberson 	/*
3642c6fd3e23SJeff Roberson 	 * We lost the race, release this bucket and start over.
3643c6fd3e23SJeff Roberson 	 */
3644c6fd3e23SJeff Roberson 	critical_exit();
3645c6fd3e23SJeff Roberson 	zone_put_bucket(zone, domain, bucket, udata, false);
3646c6fd3e23SJeff Roberson 	critical_enter();
3647c6fd3e23SJeff Roberson 
3648beb8beefSJeff Roberson 	return (true);
3649bbee39c6SJeff Roberson }
3650bbee39c6SJeff Roberson 
3651ab3185d1SJeff Roberson void *
3652ab3185d1SJeff Roberson uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags)
3653bbee39c6SJeff Roberson {
365406d8bdcbSMark Johnston #ifdef NUMA
365506d8bdcbSMark Johnston 	uma_bucket_t bucket;
365606d8bdcbSMark Johnston 	uma_zone_domain_t zdom;
365706d8bdcbSMark Johnston 	void *item;
365806d8bdcbSMark Johnston #endif
3659ab3185d1SJeff Roberson 
3660ab3185d1SJeff Roberson 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
366119fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
3662ab3185d1SJeff Roberson 
3663ab3185d1SJeff Roberson 	/* This is the fast path allocation */
3664e63a1c2fSRyan Libby 	CTR4(KTR_UMA, "uma_zalloc_domain zone %s(%p) domain %d flags %d",
3665e63a1c2fSRyan Libby 	    zone->uz_name, zone, domain, flags);
3666ab3185d1SJeff Roberson 
3667ab3185d1SJeff Roberson 	if (flags & M_WAITOK) {
3668ab3185d1SJeff Roberson 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
3669ab3185d1SJeff Roberson 		    "uma_zalloc_domain: zone \"%s\"", zone->uz_name);
3670ab3185d1SJeff Roberson 	}
3671ab3185d1SJeff Roberson 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
3672ab3185d1SJeff Roberson 	    ("uma_zalloc_domain: called with spinlock or critical section held"));
367306d8bdcbSMark Johnston 	KASSERT((zone->uz_flags & UMA_ZONE_SMR) == 0,
367406d8bdcbSMark Johnston 	    ("uma_zalloc_domain: called with SMR zone."));
367506d8bdcbSMark Johnston #ifdef NUMA
367606d8bdcbSMark Johnston 	KASSERT((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0,
367706d8bdcbSMark Johnston 	    ("uma_zalloc_domain: called with non-FIRSTTOUCH zone."));
3678ab3185d1SJeff Roberson 
367906d8bdcbSMark Johnston 	if (vm_ndomains == 1)
368006d8bdcbSMark Johnston 		return (uma_zalloc_arg(zone, udata, flags));
368106d8bdcbSMark Johnston 
368206d8bdcbSMark Johnston 	/*
368306d8bdcbSMark Johnston 	 * Try to allocate from the bucket cache before falling back to the keg.
368406d8bdcbSMark Johnston 	 * We could try harder and attempt to allocate from per-CPU caches or
368506d8bdcbSMark Johnston 	 * the per-domain cross-domain buckets, but the complexity is probably
368606d8bdcbSMark Johnston 	 * not worth it.  It is more important that frees of previous
368706d8bdcbSMark Johnston 	 * cross-domain allocations do not blow up the cache.
368806d8bdcbSMark Johnston 	 */
368906d8bdcbSMark Johnston 	zdom = zone_domain_lock(zone, domain);
369006d8bdcbSMark Johnston 	if ((bucket = zone_fetch_bucket(zone, zdom, false)) != NULL) {
369106d8bdcbSMark Johnston 		item = bucket->ub_bucket[bucket->ub_cnt - 1];
369206d8bdcbSMark Johnston #ifdef INVARIANTS
369306d8bdcbSMark Johnston 		bucket->ub_bucket[bucket->ub_cnt - 1] = NULL;
369406d8bdcbSMark Johnston #endif
369506d8bdcbSMark Johnston 		bucket->ub_cnt--;
369606d8bdcbSMark Johnston 		zone_put_bucket(zone, domain, bucket, udata, true);
369706d8bdcbSMark Johnston 		item = item_ctor(zone, zone->uz_flags, zone->uz_size, udata,
369806d8bdcbSMark Johnston 		    flags, item);
369906d8bdcbSMark Johnston 		if (item != NULL) {
370006d8bdcbSMark Johnston 			KASSERT(item_domain(item) == domain,
370106d8bdcbSMark Johnston 			    ("%s: bucket cache item %p from wrong domain",
370206d8bdcbSMark Johnston 			    __func__, item));
370306d8bdcbSMark Johnston 			counter_u64_add(zone->uz_allocs, 1);
370406d8bdcbSMark Johnston 		}
370506d8bdcbSMark Johnston 		return (item);
370606d8bdcbSMark Johnston 	}
370706d8bdcbSMark Johnston 	ZDOM_UNLOCK(zdom);
3708ab3185d1SJeff Roberson 	return (zone_alloc_item(zone, udata, domain, flags));
370906d8bdcbSMark Johnston #else
371006d8bdcbSMark Johnston 	return (uma_zalloc_arg(zone, udata, flags));
371106d8bdcbSMark Johnston #endif
3712ab3185d1SJeff Roberson }
3713ab3185d1SJeff Roberson 
3714ab3185d1SJeff Roberson /*
3715ab3185d1SJeff Roberson  * Find a slab with some space.  Prefer slabs that are partially used over those
3716ab3185d1SJeff Roberson  * that are totally full.  This helps to reduce fragmentation.
3717ab3185d1SJeff Roberson  *
3718ab3185d1SJeff Roberson  * If 'rr' is 1, search all domains starting from 'domain'.  Otherwise check
3719ab3185d1SJeff Roberson  * only 'domain'.
3720ab3185d1SJeff Roberson  */
3721ab3185d1SJeff Roberson static uma_slab_t
3722194a979eSMark Johnston keg_first_slab(uma_keg_t keg, int domain, bool rr)
3723ab3185d1SJeff Roberson {
3724ab3185d1SJeff Roberson 	uma_domain_t dom;
3725bbee39c6SJeff Roberson 	uma_slab_t slab;
3726ab3185d1SJeff Roberson 	int start;
3727ab3185d1SJeff Roberson 
3728ab3185d1SJeff Roberson 	KASSERT(domain >= 0 && domain < vm_ndomains,
3729ab3185d1SJeff Roberson 	    ("keg_first_slab: domain %d out of range", domain));
37308b987a77SJeff Roberson 	KEG_LOCK_ASSERT(keg, domain);
3731ab3185d1SJeff Roberson 
3732ab3185d1SJeff Roberson 	slab = NULL;
3733ab3185d1SJeff Roberson 	start = domain;
3734ab3185d1SJeff Roberson 	do {
3735ab3185d1SJeff Roberson 		dom = &keg->uk_domain[domain];
37364ab3aee8SMark Johnston 		if ((slab = LIST_FIRST(&dom->ud_part_slab)) != NULL)
37374ab3aee8SMark Johnston 			return (slab);
37384ab3aee8SMark Johnston 		if ((slab = LIST_FIRST(&dom->ud_free_slab)) != NULL) {
3739ab3185d1SJeff Roberson 			LIST_REMOVE(slab, us_link);
37404ab3aee8SMark Johnston 			dom->ud_free_slabs--;
3741ab3185d1SJeff Roberson 			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
3742ab3185d1SJeff Roberson 			return (slab);
3743ab3185d1SJeff Roberson 		}
3744ab3185d1SJeff Roberson 		if (rr)
3745ab3185d1SJeff Roberson 			domain = (domain + 1) % vm_ndomains;
3746ab3185d1SJeff Roberson 	} while (domain != start);
3747ab3185d1SJeff Roberson 
3748ab3185d1SJeff Roberson 	return (NULL);
3749ab3185d1SJeff Roberson }
3750ab3185d1SJeff Roberson 
37518b987a77SJeff Roberson /*
37528b987a77SJeff Roberson  * Fetch an existing slab from a free or partial list.  Returns with the
37538b987a77SJeff Roberson  * keg domain lock held if a slab was found or unlocked if not.
37548b987a77SJeff Roberson  */
3755ab3185d1SJeff Roberson static uma_slab_t
3756194a979eSMark Johnston keg_fetch_free_slab(uma_keg_t keg, int domain, bool rr, int flags)
3757ab3185d1SJeff Roberson {
37588b987a77SJeff Roberson 	uma_slab_t slab;
3759194a979eSMark Johnston 	uint32_t reserve;
3760099a0e58SBosko Milekic 
37618b987a77SJeff Roberson 	/* HASH has a single free list. */
376254c5ae80SRyan Libby 	if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0)
37638b987a77SJeff Roberson 		domain = 0;
3764194a979eSMark Johnston 
37658b987a77SJeff Roberson 	KEG_LOCK(keg, domain);
3766194a979eSMark Johnston 	reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve;
37674ab3aee8SMark Johnston 	if (keg->uk_domain[domain].ud_free_items <= reserve ||
37688b987a77SJeff Roberson 	    (slab = keg_first_slab(keg, domain, rr)) == NULL) {
37698b987a77SJeff Roberson 		KEG_UNLOCK(keg, domain);
3770194a979eSMark Johnston 		return (NULL);
37718b987a77SJeff Roberson 	}
37728b987a77SJeff Roberson 	return (slab);
3773194a979eSMark Johnston }
3774194a979eSMark Johnston 
3775194a979eSMark Johnston static uma_slab_t
3776194a979eSMark Johnston keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, const int flags)
3777194a979eSMark Johnston {
3778194a979eSMark Johnston 	struct vm_domainset_iter di;
3779194a979eSMark Johnston 	uma_slab_t slab;
3780194a979eSMark Johnston 	int aflags, domain;
3781194a979eSMark Johnston 	bool rr;
3782194a979eSMark Johnston 
3783194a979eSMark Johnston restart:
3784bbee39c6SJeff Roberson 	/*
3785194a979eSMark Johnston 	 * Use the keg's policy if upper layers haven't already specified a
3786194a979eSMark Johnston 	 * domain (as happens with first-touch zones).
3787194a979eSMark Johnston 	 *
3788194a979eSMark Johnston 	 * To avoid races we run the iterator with the keg lock held, but that
3789194a979eSMark Johnston 	 * means that we cannot allow the vm_domainset layer to sleep.  Thus,
3790194a979eSMark Johnston 	 * clear M_WAITOK and handle low memory conditions locally.
3791bbee39c6SJeff Roberson 	 */
3792ab3185d1SJeff Roberson 	rr = rdomain == UMA_ANYDOMAIN;
3793ab3185d1SJeff Roberson 	if (rr) {
3794194a979eSMark Johnston 		aflags = (flags & ~M_WAITOK) | M_NOWAIT;
3795194a979eSMark Johnston 		vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
3796194a979eSMark Johnston 		    &aflags);
3797194a979eSMark Johnston 	} else {
3798194a979eSMark Johnston 		aflags = flags;
3799194a979eSMark Johnston 		domain = rdomain;
3800194a979eSMark Johnston 	}
3801ab3185d1SJeff Roberson 
3802194a979eSMark Johnston 	for (;;) {
3803194a979eSMark Johnston 		slab = keg_fetch_free_slab(keg, domain, rr, flags);
3804584061b4SJeff Roberson 		if (slab != NULL)
3805bbee39c6SJeff Roberson 			return (slab);
3806bbee39c6SJeff Roberson 
3807bbee39c6SJeff Roberson 		/*
3808bbee39c6SJeff Roberson 		 * M_NOVM means don't ask at all!
3809bbee39c6SJeff Roberson 		 */
3810bbee39c6SJeff Roberson 		if (flags & M_NOVM)
3811bbee39c6SJeff Roberson 			break;
3812bbee39c6SJeff Roberson 
381386220393SMark Johnston 		slab = keg_alloc_slab(keg, zone, domain, flags, aflags);
38148b987a77SJeff Roberson 		if (slab != NULL)
3815bbee39c6SJeff Roberson 			return (slab);
38163639ac42SJeff Roberson 		if (!rr && (flags & M_WAITOK) == 0)
38173639ac42SJeff Roberson 			break;
3818194a979eSMark Johnston 		if (rr && vm_domainset_iter_policy(&di, &domain) != 0) {
3819194a979eSMark Johnston 			if ((flags & M_WAITOK) != 0) {
382089d2fb14SKonstantin Belousov 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0);
3821194a979eSMark Johnston 				goto restart;
382230c5525bSAndrew Gallatin 			}
3823194a979eSMark Johnston 			break;
3824194a979eSMark Johnston 		}
3825ab3185d1SJeff Roberson 	}
3826ab3185d1SJeff Roberson 
3827bbee39c6SJeff Roberson 	/*
3828bbee39c6SJeff Roberson 	 * We might not have been able to get a slab but another cpu
3829bbee39c6SJeff Roberson 	 * could have while we were unlocked.  Check again before we
3830bbee39c6SJeff Roberson 	 * fail.
3831bbee39c6SJeff Roberson 	 */
38328b987a77SJeff Roberson 	if ((slab = keg_fetch_free_slab(keg, domain, rr, flags)) != NULL)
3833bbee39c6SJeff Roberson 		return (slab);
38348b987a77SJeff Roberson 
3835ab3185d1SJeff Roberson 	return (NULL);
3836ab3185d1SJeff Roberson }
3837bbee39c6SJeff Roberson 
3838d56368d7SBosko Milekic static void *
38390095a784SJeff Roberson slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
3840bbee39c6SJeff Roberson {
3841ab3185d1SJeff Roberson 	uma_domain_t dom;
3842bbee39c6SJeff Roberson 	void *item;
38439b8db4d0SRyan Libby 	int freei;
3844bbee39c6SJeff Roberson 
38458b987a77SJeff Roberson 	KEG_LOCK_ASSERT(keg, slab->us_domain);
3846099a0e58SBosko Milekic 
38478b987a77SJeff Roberson 	dom = &keg->uk_domain[slab->us_domain];
38489b78b1f4SJeff Roberson 	freei = BIT_FFS(keg->uk_ipers, &slab->us_free) - 1;
38499b78b1f4SJeff Roberson 	BIT_CLR(keg->uk_ipers, freei, &slab->us_free);
38501e0701e1SJeff Roberson 	item = slab_item(slab, keg, freei);
3851bbee39c6SJeff Roberson 	slab->us_freecount--;
38524ab3aee8SMark Johnston 	dom->ud_free_items--;
3853ef72505eSJeff Roberson 
38544ab3aee8SMark Johnston 	/*
38554ab3aee8SMark Johnston 	 * Move this slab to the full list.  It must be on the partial list, so
38564ab3aee8SMark Johnston 	 * we do not need to update the free slab count.  In particular,
38574ab3aee8SMark Johnston 	 * keg_fetch_slab() always returns slabs on the partial list.
38584ab3aee8SMark Johnston 	 */
3859bbee39c6SJeff Roberson 	if (slab->us_freecount == 0) {
3860bbee39c6SJeff Roberson 		LIST_REMOVE(slab, us_link);
3861ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link);
3862bbee39c6SJeff Roberson 	}
3863bbee39c6SJeff Roberson 
3864bbee39c6SJeff Roberson 	return (item);
3865bbee39c6SJeff Roberson }
3866bbee39c6SJeff Roberson 
3867bbee39c6SJeff Roberson static int
3868b75c4efcSAndrew Turner zone_import(void *arg, void **bucket, int max, int domain, int flags)
38690095a784SJeff Roberson {
38708b987a77SJeff Roberson 	uma_domain_t dom;
3871b75c4efcSAndrew Turner 	uma_zone_t zone;
38720095a784SJeff Roberson 	uma_slab_t slab;
38730095a784SJeff Roberson 	uma_keg_t keg;
3874a03af342SSean Bruno #ifdef NUMA
3875ab3185d1SJeff Roberson 	int stripe;
3876a03af342SSean Bruno #endif
38770095a784SJeff Roberson 	int i;
38780095a784SJeff Roberson 
3879b75c4efcSAndrew Turner 	zone = arg;
38800095a784SJeff Roberson 	slab = NULL;
3881584061b4SJeff Roberson 	keg = zone->uz_keg;
3882af526374SJeff Roberson 	/* Try to keep the buckets totally full */
38830095a784SJeff Roberson 	for (i = 0; i < max; ) {
3884584061b4SJeff Roberson 		if ((slab = keg_fetch_slab(keg, zone, domain, flags)) == NULL)
38850095a784SJeff Roberson 			break;
3886a03af342SSean Bruno #ifdef NUMA
3887ab3185d1SJeff Roberson 		stripe = howmany(max, vm_ndomains);
3888a03af342SSean Bruno #endif
38898b987a77SJeff Roberson 		dom = &keg->uk_domain[slab->us_domain];
38901b2dcc8cSMark Johnston 		do {
38910095a784SJeff Roberson 			bucket[i++] = slab_alloc_item(keg, slab);
38921b2dcc8cSMark Johnston 			if (dom->ud_free_items <= keg->uk_reserve) {
38931b2dcc8cSMark Johnston 				/*
38941b2dcc8cSMark Johnston 				 * Avoid depleting the reserve after a
38951b2dcc8cSMark Johnston 				 * successful item allocation, even if
38961b2dcc8cSMark Johnston 				 * M_USE_RESERVE is specified.
38971b2dcc8cSMark Johnston 				 */
38981b2dcc8cSMark Johnston 				KEG_UNLOCK(keg, slab->us_domain);
38991b2dcc8cSMark Johnston 				goto out;
39001b2dcc8cSMark Johnston 			}
3901b6715dabSJeff Roberson #ifdef NUMA
3902ab3185d1SJeff Roberson 			/*
3903ab3185d1SJeff Roberson 			 * If the zone is striped we pick a new slab for every
3904ab3185d1SJeff Roberson 			 * N allocations.  Eliminating this conditional will
3905ab3185d1SJeff Roberson 			 * instead pick a new domain for each bucket rather
3906ab3185d1SJeff Roberson 			 * than stripe within each bucket.  The current option
3907ab3185d1SJeff Roberson 			 * produces more fragmentation and requires more cpu
3908ab3185d1SJeff Roberson 			 * time but yields better distribution.
3909ab3185d1SJeff Roberson 			 */
3910dfe13344SJeff Roberson 			if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0 &&
3911ab3185d1SJeff Roberson 			    vm_ndomains > 1 && --stripe == 0)
3912ab3185d1SJeff Roberson 				break;
3913ab3185d1SJeff Roberson #endif
39141b2dcc8cSMark Johnston 		} while (slab->us_freecount != 0 && i < max);
39158b987a77SJeff Roberson 		KEG_UNLOCK(keg, slab->us_domain);
39161b2dcc8cSMark Johnston 
3917ab3185d1SJeff Roberson 		/* Don't block if we allocated any successfully. */
39180095a784SJeff Roberson 		flags &= ~M_WAITOK;
39190095a784SJeff Roberson 		flags |= M_NOWAIT;
39200095a784SJeff Roberson 	}
39211b2dcc8cSMark Johnston out:
39220095a784SJeff Roberson 	return i;
39230095a784SJeff Roberson }
39240095a784SJeff Roberson 
39254bd61e19SJeff Roberson static int
39264bd61e19SJeff Roberson zone_alloc_limit_hard(uma_zone_t zone, int count, int flags)
39274bd61e19SJeff Roberson {
39284bd61e19SJeff Roberson 	uint64_t old, new, total, max;
39294bd61e19SJeff Roberson 
39304bd61e19SJeff Roberson 	/*
39314bd61e19SJeff Roberson 	 * The hard case.  We're going to sleep because there were existing
39324bd61e19SJeff Roberson 	 * sleepers or because we ran out of items.  This routine enforces
39334bd61e19SJeff Roberson 	 * fairness by keeping fifo order.
39344bd61e19SJeff Roberson 	 *
39354bd61e19SJeff Roberson 	 * First release our ill gotten gains and make some noise.
39364bd61e19SJeff Roberson 	 */
39374bd61e19SJeff Roberson 	for (;;) {
39384bd61e19SJeff Roberson 		zone_free_limit(zone, count);
39394bd61e19SJeff Roberson 		zone_log_warning(zone);
39404bd61e19SJeff Roberson 		zone_maxaction(zone);
39414bd61e19SJeff Roberson 		if (flags & M_NOWAIT)
39424bd61e19SJeff Roberson 			return (0);
39434bd61e19SJeff Roberson 
39444bd61e19SJeff Roberson 		/*
39454bd61e19SJeff Roberson 		 * We need to allocate an item or set ourself as a sleeper
39464bd61e19SJeff Roberson 		 * while the sleepq lock is held to avoid wakeup races.  This
39474bd61e19SJeff Roberson 		 * is essentially a home rolled semaphore.
39484bd61e19SJeff Roberson 		 */
39494bd61e19SJeff Roberson 		sleepq_lock(&zone->uz_max_items);
39504bd61e19SJeff Roberson 		old = zone->uz_items;
39514bd61e19SJeff Roberson 		do {
39524bd61e19SJeff Roberson 			MPASS(UZ_ITEMS_SLEEPERS(old) < UZ_ITEMS_SLEEPERS_MAX);
39534bd61e19SJeff Roberson 			/* Cache the max since we will evaluate twice. */
39544bd61e19SJeff Roberson 			max = zone->uz_max_items;
39554bd61e19SJeff Roberson 			if (UZ_ITEMS_SLEEPERS(old) != 0 ||
39564bd61e19SJeff Roberson 			    UZ_ITEMS_COUNT(old) >= max)
39574bd61e19SJeff Roberson 				new = old + UZ_ITEMS_SLEEPER;
39584bd61e19SJeff Roberson 			else
39594bd61e19SJeff Roberson 				new = old + MIN(count, max - old);
39604bd61e19SJeff Roberson 		} while (atomic_fcmpset_64(&zone->uz_items, &old, new) == 0);
39614bd61e19SJeff Roberson 
39624bd61e19SJeff Roberson 		/* We may have successfully allocated under the sleepq lock. */
39634bd61e19SJeff Roberson 		if (UZ_ITEMS_SLEEPERS(new) == 0) {
39644bd61e19SJeff Roberson 			sleepq_release(&zone->uz_max_items);
39654bd61e19SJeff Roberson 			return (new - old);
39664bd61e19SJeff Roberson 		}
39674bd61e19SJeff Roberson 
39684bd61e19SJeff Roberson 		/*
39694bd61e19SJeff Roberson 		 * This is in a different cacheline from uz_items so that we
39704bd61e19SJeff Roberson 		 * don't constantly invalidate the fastpath cacheline when we
39714bd61e19SJeff Roberson 		 * adjust item counts.  This could be limited to toggling on
39724bd61e19SJeff Roberson 		 * transitions.
39734bd61e19SJeff Roberson 		 */
39744bd61e19SJeff Roberson 		atomic_add_32(&zone->uz_sleepers, 1);
39754bd61e19SJeff Roberson 		atomic_add_64(&zone->uz_sleeps, 1);
39764bd61e19SJeff Roberson 
39774bd61e19SJeff Roberson 		/*
39784bd61e19SJeff Roberson 		 * We have added ourselves as a sleeper.  The sleepq lock
39794bd61e19SJeff Roberson 		 * protects us from wakeup races.  Sleep now and then retry.
39804bd61e19SJeff Roberson 		 */
39814bd61e19SJeff Roberson 		sleepq_add(&zone->uz_max_items, NULL, "zonelimit", 0, 0);
39824bd61e19SJeff Roberson 		sleepq_wait(&zone->uz_max_items, PVM);
39834bd61e19SJeff Roberson 
39844bd61e19SJeff Roberson 		/*
39854bd61e19SJeff Roberson 		 * After wakeup, remove ourselves as a sleeper and try
39864bd61e19SJeff Roberson 		 * again.  We no longer have the sleepq lock for protection.
39874bd61e19SJeff Roberson 		 *
39884bd61e19SJeff Roberson 		 * Subract ourselves as a sleeper while attempting to add
39894bd61e19SJeff Roberson 		 * our count.
39904bd61e19SJeff Roberson 		 */
39914bd61e19SJeff Roberson 		atomic_subtract_32(&zone->uz_sleepers, 1);
39924bd61e19SJeff Roberson 		old = atomic_fetchadd_64(&zone->uz_items,
39934bd61e19SJeff Roberson 		    -(UZ_ITEMS_SLEEPER - count));
39944bd61e19SJeff Roberson 		/* We're no longer a sleeper. */
39954bd61e19SJeff Roberson 		old -= UZ_ITEMS_SLEEPER;
39964bd61e19SJeff Roberson 
39974bd61e19SJeff Roberson 		/*
39984bd61e19SJeff Roberson 		 * If we're still at the limit, restart.  Notably do not
39994bd61e19SJeff Roberson 		 * block on other sleepers.  Cache the max value to protect
40004bd61e19SJeff Roberson 		 * against changes via sysctl.
40014bd61e19SJeff Roberson 		 */
40024bd61e19SJeff Roberson 		total = UZ_ITEMS_COUNT(old);
40034bd61e19SJeff Roberson 		max = zone->uz_max_items;
40044bd61e19SJeff Roberson 		if (total >= max)
40054bd61e19SJeff Roberson 			continue;
40064bd61e19SJeff Roberson 		/* Truncate if necessary, otherwise wake other sleepers. */
40074bd61e19SJeff Roberson 		if (total + count > max) {
40084bd61e19SJeff Roberson 			zone_free_limit(zone, total + count - max);
40094bd61e19SJeff Roberson 			count = max - total;
40104bd61e19SJeff Roberson 		} else if (total + count < max && UZ_ITEMS_SLEEPERS(old) != 0)
40114bd61e19SJeff Roberson 			wakeup_one(&zone->uz_max_items);
40124bd61e19SJeff Roberson 
40134bd61e19SJeff Roberson 		return (count);
40144bd61e19SJeff Roberson 	}
40154bd61e19SJeff Roberson }
40164bd61e19SJeff Roberson 
40174bd61e19SJeff Roberson /*
40184bd61e19SJeff Roberson  * Allocate 'count' items from our max_items limit.  Returns the number
40194bd61e19SJeff Roberson  * available.  If M_NOWAIT is not specified it will sleep until at least
40204bd61e19SJeff Roberson  * one item can be allocated.
40214bd61e19SJeff Roberson  */
40224bd61e19SJeff Roberson static int
40234bd61e19SJeff Roberson zone_alloc_limit(uma_zone_t zone, int count, int flags)
40244bd61e19SJeff Roberson {
40254bd61e19SJeff Roberson 	uint64_t old;
40264bd61e19SJeff Roberson 	uint64_t max;
40274bd61e19SJeff Roberson 
40284bd61e19SJeff Roberson 	max = zone->uz_max_items;
40294bd61e19SJeff Roberson 	MPASS(max > 0);
40304bd61e19SJeff Roberson 
40314bd61e19SJeff Roberson 	/*
40324bd61e19SJeff Roberson 	 * We expect normal allocations to succeed with a simple
40334bd61e19SJeff Roberson 	 * fetchadd.
40344bd61e19SJeff Roberson 	 */
40354bd61e19SJeff Roberson 	old = atomic_fetchadd_64(&zone->uz_items, count);
40364bd61e19SJeff Roberson 	if (__predict_true(old + count <= max))
40374bd61e19SJeff Roberson 		return (count);
40384bd61e19SJeff Roberson 
40394bd61e19SJeff Roberson 	/*
40404bd61e19SJeff Roberson 	 * If we had some items and no sleepers just return the
40414bd61e19SJeff Roberson 	 * truncated value.  We have to release the excess space
40424bd61e19SJeff Roberson 	 * though because that may wake sleepers who weren't woken
40434bd61e19SJeff Roberson 	 * because we were temporarily over the limit.
40444bd61e19SJeff Roberson 	 */
40454bd61e19SJeff Roberson 	if (old < max) {
40464bd61e19SJeff Roberson 		zone_free_limit(zone, (old + count) - max);
40474bd61e19SJeff Roberson 		return (max - old);
40484bd61e19SJeff Roberson 	}
40494bd61e19SJeff Roberson 	return (zone_alloc_limit_hard(zone, count, flags));
40504bd61e19SJeff Roberson }
40514bd61e19SJeff Roberson 
40524bd61e19SJeff Roberson /*
40534bd61e19SJeff Roberson  * Free a number of items back to the limit.
40544bd61e19SJeff Roberson  */
40554bd61e19SJeff Roberson static void
40564bd61e19SJeff Roberson zone_free_limit(uma_zone_t zone, int count)
40574bd61e19SJeff Roberson {
40584bd61e19SJeff Roberson 	uint64_t old;
40594bd61e19SJeff Roberson 
40604bd61e19SJeff Roberson 	MPASS(count > 0);
40614bd61e19SJeff Roberson 
40624bd61e19SJeff Roberson 	/*
40634bd61e19SJeff Roberson 	 * In the common case we either have no sleepers or
40644bd61e19SJeff Roberson 	 * are still over the limit and can just return.
40654bd61e19SJeff Roberson 	 */
40664bd61e19SJeff Roberson 	old = atomic_fetchadd_64(&zone->uz_items, -count);
40674bd61e19SJeff Roberson 	if (__predict_true(UZ_ITEMS_SLEEPERS(old) == 0 ||
40684bd61e19SJeff Roberson 	   UZ_ITEMS_COUNT(old) - count >= zone->uz_max_items))
40694bd61e19SJeff Roberson 		return;
40704bd61e19SJeff Roberson 
40714bd61e19SJeff Roberson 	/*
40724bd61e19SJeff Roberson 	 * Moderate the rate of wakeups.  Sleepers will continue
40734bd61e19SJeff Roberson 	 * to generate wakeups if necessary.
40744bd61e19SJeff Roberson 	 */
40754bd61e19SJeff Roberson 	wakeup_one(&zone->uz_max_items);
40764bd61e19SJeff Roberson }
40774bd61e19SJeff Roberson 
4078fc03d22bSJeff Roberson static uma_bucket_t
4079beb8beefSJeff Roberson zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
4080bbee39c6SJeff Roberson {
4081bbee39c6SJeff Roberson 	uma_bucket_t bucket;
408209c8cb71SMark Johnston 	int error, maxbucket, cnt;
4083bbee39c6SJeff Roberson 
4084e63a1c2fSRyan Libby 	CTR3(KTR_UMA, "zone_alloc_bucket zone %s(%p) domain %d", zone->uz_name,
4085e63a1c2fSRyan Libby 	    zone, domain);
408630c5525bSAndrew Gallatin 
4087c1685086SJeff Roberson 	/* Avoid allocs targeting empty domains. */
4088c1685086SJeff Roberson 	if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
4089c1685086SJeff Roberson 		domain = UMA_ANYDOMAIN;
40908c277118SMark Johnston 	else if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0)
4091c6fd3e23SJeff Roberson 		domain = UMA_ANYDOMAIN;
4092c1685086SJeff Roberson 
40934bd61e19SJeff Roberson 	if (zone->uz_max_items > 0)
40944bd61e19SJeff Roberson 		maxbucket = zone_alloc_limit(zone, zone->uz_bucket_size,
40954bd61e19SJeff Roberson 		    M_NOWAIT);
40964bd61e19SJeff Roberson 	else
409720a4e154SJeff Roberson 		maxbucket = zone->uz_bucket_size;
40984bd61e19SJeff Roberson 	if (maxbucket == 0)
40994bd61e19SJeff Roberson 		return (false);
4100beb8beefSJeff Roberson 
41016fd34d6fSJeff Roberson 	/* Don't wait for buckets, preserve caller's NOVM setting. */
41026fd34d6fSJeff Roberson 	bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
4103beb8beefSJeff Roberson 	if (bucket == NULL) {
4104beb8beefSJeff Roberson 		cnt = 0;
4105beb8beefSJeff Roberson 		goto out;
4106beb8beefSJeff Roberson 	}
41070095a784SJeff Roberson 
41080095a784SJeff Roberson 	bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
4109beb8beefSJeff Roberson 	    MIN(maxbucket, bucket->ub_entries), domain, flags);
41100095a784SJeff Roberson 
41110095a784SJeff Roberson 	/*
41120095a784SJeff Roberson 	 * Initialize the memory if necessary.
41130095a784SJeff Roberson 	 */
41140095a784SJeff Roberson 	if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
4115099a0e58SBosko Milekic 		int i;
4116bbee39c6SJeff Roberson 
411709c8cb71SMark Johnston 		for (i = 0; i < bucket->ub_cnt; i++) {
411809c8cb71SMark Johnston 			kasan_mark_item_valid(zone, bucket->ub_bucket[i]);
411909c8cb71SMark Johnston 			error = zone->uz_init(bucket->ub_bucket[i],
412009c8cb71SMark Johnston 			    zone->uz_size, flags);
412109c8cb71SMark Johnston 			kasan_mark_item_invalid(zone, bucket->ub_bucket[i]);
412209c8cb71SMark Johnston 			if (error != 0)
4123b23f72e9SBrian Feldman 				break;
412409c8cb71SMark Johnston 		}
412509c8cb71SMark Johnston 
4126b23f72e9SBrian Feldman 		/*
4127b23f72e9SBrian Feldman 		 * If we couldn't initialize the whole bucket, put the
4128b23f72e9SBrian Feldman 		 * rest back onto the freelist.
4129b23f72e9SBrian Feldman 		 */
4130b23f72e9SBrian Feldman 		if (i != bucket->ub_cnt) {
4131af526374SJeff Roberson 			zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
41320095a784SJeff Roberson 			    bucket->ub_cnt - i);
4133a5a262c6SBosko Milekic #ifdef INVARIANTS
41340095a784SJeff Roberson 			bzero(&bucket->ub_bucket[i],
41350095a784SJeff Roberson 			    sizeof(void *) * (bucket->ub_cnt - i));
4136a5a262c6SBosko Milekic #endif
4137b23f72e9SBrian Feldman 			bucket->ub_cnt = i;
4138b23f72e9SBrian Feldman 		}
4139099a0e58SBosko Milekic 	}
4140099a0e58SBosko Milekic 
4141beb8beefSJeff Roberson 	cnt = bucket->ub_cnt;
4142f7104ccdSAlexander Motin 	if (bucket->ub_cnt == 0) {
41436fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
41442efcc8cbSGleb Smirnoff 		counter_u64_add(zone->uz_fails, 1);
4145beb8beefSJeff Roberson 		bucket = NULL;
4146beb8beefSJeff Roberson 	}
4147beb8beefSJeff Roberson out:
41484bd61e19SJeff Roberson 	if (zone->uz_max_items > 0 && cnt < maxbucket)
41494bd61e19SJeff Roberson 		zone_free_limit(zone, maxbucket - cnt);
4150fc03d22bSJeff Roberson 
4151fc03d22bSJeff Roberson 	return (bucket);
4152fc03d22bSJeff Roberson }
4153fc03d22bSJeff Roberson 
41548355f576SJeff Roberson /*
41550095a784SJeff Roberson  * Allocates a single item from a zone.
41568355f576SJeff Roberson  *
41578355f576SJeff Roberson  * Arguments
41588355f576SJeff Roberson  *	zone   The zone to alloc for.
41598355f576SJeff Roberson  *	udata  The data to be passed to the constructor.
4160ab3185d1SJeff Roberson  *	domain The domain to allocate from or UMA_ANYDOMAIN.
4161a163d034SWarner Losh  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
41628355f576SJeff Roberson  *
41638355f576SJeff Roberson  * Returns
41648355f576SJeff Roberson  *	NULL if there is no memory and M_NOWAIT is set
4165bbee39c6SJeff Roberson  *	An item if successful
41668355f576SJeff Roberson  */
41678355f576SJeff Roberson 
41688355f576SJeff Roberson static void *
4169ab3185d1SJeff Roberson zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
41708355f576SJeff Roberson {
41718355f576SJeff Roberson 	void *item;
41728355f576SJeff Roberson 
4173791dda87SAndrew Gallatin 	if (zone->uz_max_items > 0 && zone_alloc_limit(zone, 1, flags) == 0) {
4174791dda87SAndrew Gallatin 		counter_u64_add(zone->uz_fails, 1);
4175bb15d1c7SGleb Smirnoff 		return (NULL);
4176791dda87SAndrew Gallatin 	}
41778355f576SJeff Roberson 
4178c1685086SJeff Roberson 	/* Avoid allocs targeting empty domains. */
4179c1685086SJeff Roberson 	if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
418030c5525bSAndrew Gallatin 		domain = UMA_ANYDOMAIN;
4181c1685086SJeff Roberson 
4182ab3185d1SJeff Roberson 	if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
4183beb8beefSJeff Roberson 		goto fail_cnt;
41848355f576SJeff Roberson 
4185099a0e58SBosko Milekic 	/*
4186099a0e58SBosko Milekic 	 * We have to call both the zone's init (not the keg's init)
4187099a0e58SBosko Milekic 	 * and the zone's ctor.  This is because the item is going from
4188099a0e58SBosko Milekic 	 * a keg slab directly to the user, and the user is expecting it
4189099a0e58SBosko Milekic 	 * to be both zone-init'd as well as zone-ctor'd.
4190099a0e58SBosko Milekic 	 */
4191b23f72e9SBrian Feldman 	if (zone->uz_init != NULL) {
419209c8cb71SMark Johnston 		int error;
419309c8cb71SMark Johnston 
419409c8cb71SMark Johnston 		kasan_mark_item_valid(zone, item);
419509c8cb71SMark Johnston 		error = zone->uz_init(item, zone->uz_size, flags);
419609c8cb71SMark Johnston 		kasan_mark_item_invalid(zone, item);
419709c8cb71SMark Johnston 		if (error != 0) {
4198bb15d1c7SGleb Smirnoff 			zone_free_item(zone, item, udata, SKIP_FINI | SKIP_CNT);
4199beb8beefSJeff Roberson 			goto fail_cnt;
4200beb8beefSJeff Roberson 		}
4201beb8beefSJeff Roberson 	}
4202d4665eaaSJeff Roberson 	item = item_ctor(zone, zone->uz_flags, zone->uz_size, udata, flags,
4203d4665eaaSJeff Roberson 	    item);
4204beb8beefSJeff Roberson 	if (item == NULL)
42050095a784SJeff Roberson 		goto fail;
42068355f576SJeff Roberson 
42072efcc8cbSGleb Smirnoff 	counter_u64_add(zone->uz_allocs, 1);
42081431a748SGleb Smirnoff 	CTR3(KTR_UMA, "zone_alloc_item item %p from %s(%p)", item,
42091431a748SGleb Smirnoff 	    zone->uz_name, zone);
42101431a748SGleb Smirnoff 
42118355f576SJeff Roberson 	return (item);
42120095a784SJeff Roberson 
4213beb8beefSJeff Roberson fail_cnt:
4214beb8beefSJeff Roberson 	counter_u64_add(zone->uz_fails, 1);
42150095a784SJeff Roberson fail:
42164bd61e19SJeff Roberson 	if (zone->uz_max_items > 0)
42174bd61e19SJeff Roberson 		zone_free_limit(zone, 1);
42181431a748SGleb Smirnoff 	CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)",
42191431a748SGleb Smirnoff 	    zone->uz_name, zone);
42204bd61e19SJeff Roberson 
42210095a784SJeff Roberson 	return (NULL);
42228355f576SJeff Roberson }
42238355f576SJeff Roberson 
42248355f576SJeff Roberson /* See uma.h */
42258355f576SJeff Roberson void
4226d4665eaaSJeff Roberson uma_zfree_smr(uma_zone_t zone, void *item)
4227d4665eaaSJeff Roberson {
4228d4665eaaSJeff Roberson 	uma_cache_t cache;
4229d4665eaaSJeff Roberson 	uma_cache_bucket_t bucket;
4230c6fd3e23SJeff Roberson 	int itemdomain, uz_flags;
4231d4665eaaSJeff Roberson 
4232d4665eaaSJeff Roberson #ifdef UMA_ZALLOC_DEBUG
4233d4665eaaSJeff Roberson 	KASSERT((zone->uz_flags & UMA_ZONE_SMR) != 0,
4234952c8964SMark Johnston 	    ("uma_zfree_smr: called with non-SMR zone."));
4235d4665eaaSJeff Roberson 	KASSERT(item != NULL, ("uma_zfree_smr: Called with NULL pointer."));
4236c6fd3e23SJeff Roberson 	SMR_ASSERT_NOT_ENTERED(zone->uz_smr);
4237d4665eaaSJeff Roberson 	if (uma_zfree_debug(zone, item, NULL) == EJUSTRETURN)
4238d4665eaaSJeff Roberson 		return;
4239d4665eaaSJeff Roberson #endif
4240d4665eaaSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
4241d4665eaaSJeff Roberson 	uz_flags = cache_uz_flags(cache);
4242c6fd3e23SJeff Roberson 	itemdomain = 0;
4243d4665eaaSJeff Roberson #ifdef NUMA
4244d4665eaaSJeff Roberson 	if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
424581302f1dSMark Johnston 		itemdomain = item_domain(item);
4246d4665eaaSJeff Roberson #endif
4247d4665eaaSJeff Roberson 	critical_enter();
4248d4665eaaSJeff Roberson 	do {
4249d4665eaaSJeff Roberson 		cache = &zone->uz_cpu[curcpu];
4250d4665eaaSJeff Roberson 		/* SMR Zones must free to the free bucket. */
4251d4665eaaSJeff Roberson 		bucket = &cache->uc_freebucket;
4252d4665eaaSJeff Roberson #ifdef NUMA
4253d4665eaaSJeff Roberson 		if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 &&
4254c6fd3e23SJeff Roberson 		    PCPU_GET(domain) != itemdomain) {
4255d4665eaaSJeff Roberson 			bucket = &cache->uc_crossbucket;
4256d4665eaaSJeff Roberson 		}
4257d4665eaaSJeff Roberson #endif
4258d4665eaaSJeff Roberson 		if (__predict_true(bucket->ucb_cnt < bucket->ucb_entries)) {
4259d4665eaaSJeff Roberson 			cache_bucket_push(cache, bucket, item);
4260d4665eaaSJeff Roberson 			critical_exit();
4261d4665eaaSJeff Roberson 			return;
4262d4665eaaSJeff Roberson 		}
4263d4665eaaSJeff Roberson 	} while (cache_free(zone, cache, NULL, item, itemdomain));
4264d4665eaaSJeff Roberson 	critical_exit();
4265d4665eaaSJeff Roberson 
4266d4665eaaSJeff Roberson 	/*
4267d4665eaaSJeff Roberson 	 * If nothing else caught this, we'll just do an internal free.
4268d4665eaaSJeff Roberson 	 */
4269d4665eaaSJeff Roberson 	zone_free_item(zone, item, NULL, SKIP_NONE);
4270d4665eaaSJeff Roberson }
4271d4665eaaSJeff Roberson 
4272d4665eaaSJeff Roberson /* See uma.h */
4273d4665eaaSJeff Roberson void
42748355f576SJeff Roberson uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
42758355f576SJeff Roberson {
42768355f576SJeff Roberson 	uma_cache_t cache;
4277376b1ba3SJeff Roberson 	uma_cache_bucket_t bucket;
4278c6fd3e23SJeff Roberson 	int itemdomain, uz_flags;
42798355f576SJeff Roberson 
4280e866d8f0SMark Murray 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
428119fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
428210cb2424SMark Murray 
4283e63a1c2fSRyan Libby 	CTR2(KTR_UMA, "uma_zfree_arg zone %s(%p)", zone->uz_name, zone);
42843659f747SRobert Watson 
4285d4665eaaSJeff Roberson #ifdef UMA_ZALLOC_DEBUG
4286d4665eaaSJeff Roberson 	KASSERT((zone->uz_flags & UMA_ZONE_SMR) == 0,
4287952c8964SMark Johnston 	    ("uma_zfree_arg: called with SMR zone."));
4288d4665eaaSJeff Roberson 	if (uma_zfree_debug(zone, item, udata) == EJUSTRETURN)
4289d4665eaaSJeff Roberson 		return;
4290d4665eaaSJeff Roberson #endif
429120ed0cb0SMatthew D Fleming         /* uma_zfree(..., NULL) does nothing, to match free(9). */
429220ed0cb0SMatthew D Fleming         if (item == NULL)
429320ed0cb0SMatthew D Fleming                 return;
4294cc7ce83aSJeff Roberson 
4295cc7ce83aSJeff Roberson 	/*
4296cc7ce83aSJeff Roberson 	 * We are accessing the per-cpu cache without a critical section to
4297cc7ce83aSJeff Roberson 	 * fetch size and flags.  This is acceptable, if we are preempted we
4298cc7ce83aSJeff Roberson 	 * will simply read another cpu's line.
4299cc7ce83aSJeff Roberson 	 */
4300cc7ce83aSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
4301cc7ce83aSJeff Roberson 	uz_flags = cache_uz_flags(cache);
4302d4665eaaSJeff Roberson 	if (UMA_ALWAYS_CTORDTOR ||
4303d4665eaaSJeff Roberson 	    __predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0))
4304cc7ce83aSJeff Roberson 		item_dtor(zone, item, cache_uz_size(cache), udata, SKIP_NONE);
4305ef72505eSJeff Roberson 
4306af7f9b97SJeff Roberson 	/*
4307af7f9b97SJeff Roberson 	 * The race here is acceptable.  If we miss it we'll just have to wait
4308af7f9b97SJeff Roberson 	 * a little longer for the limits to be reset.
4309af7f9b97SJeff Roberson 	 */
4310cc7ce83aSJeff Roberson 	if (__predict_false(uz_flags & UMA_ZFLAG_LIMIT)) {
43118a6776caSMark Johnston 		if (atomic_load_32(&zone->uz_sleepers) > 0)
4312fc03d22bSJeff Roberson 			goto zfree_item;
4313cc7ce83aSJeff Roberson 	}
4314af7f9b97SJeff Roberson 
43155d1ae027SRobert Watson 	/*
43165d1ae027SRobert Watson 	 * If possible, free to the per-CPU cache.  There are two
43175d1ae027SRobert Watson 	 * requirements for safe access to the per-CPU cache: (1) the thread
43185d1ae027SRobert Watson 	 * accessing the cache must not be preempted or yield during access,
43195d1ae027SRobert Watson 	 * and (2) the thread must not migrate CPUs without switching which
43205d1ae027SRobert Watson 	 * cache it accesses.  We rely on a critical section to prevent
43215d1ae027SRobert Watson 	 * preemption and migration.  We release the critical section in
43225d1ae027SRobert Watson 	 * order to acquire the zone mutex if we are unable to free to the
43235d1ae027SRobert Watson 	 * current cache; when we re-acquire the critical section, we must
43245d1ae027SRobert Watson 	 * detect and handle migration if it has occurred.
43255d1ae027SRobert Watson 	 */
4326c6fd3e23SJeff Roberson 	itemdomain = 0;
4327dfe13344SJeff Roberson #ifdef NUMA
4328dfe13344SJeff Roberson 	if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
432981302f1dSMark Johnston 		itemdomain = item_domain(item);
4330dfe13344SJeff Roberson #endif
43315d1ae027SRobert Watson 	critical_enter();
43320a81b439SJeff Roberson 	do {
4333cc7ce83aSJeff Roberson 		cache = &zone->uz_cpu[curcpu];
4334a553d4b8SJeff Roberson 		/*
4335dfe13344SJeff Roberson 		 * Try to free into the allocbucket first to give LIFO
4336dfe13344SJeff Roberson 		 * ordering for cache-hot datastructures.  Spill over
4337dfe13344SJeff Roberson 		 * into the freebucket if necessary.  Alloc will swap
4338dfe13344SJeff Roberson 		 * them if one runs dry.
4339a553d4b8SJeff Roberson 		 */
4340dfe13344SJeff Roberson 		bucket = &cache->uc_allocbucket;
4341d4665eaaSJeff Roberson #ifdef NUMA
4342d4665eaaSJeff Roberson 		if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 &&
4343c6fd3e23SJeff Roberson 		    PCPU_GET(domain) != itemdomain) {
4344d4665eaaSJeff Roberson 			bucket = &cache->uc_crossbucket;
4345d4665eaaSJeff Roberson 		} else
4346d4665eaaSJeff Roberson #endif
4347fe835cbfSJeff Roberson 		if (bucket->ucb_cnt == bucket->ucb_entries &&
4348fe835cbfSJeff Roberson 		   cache->uc_freebucket.ucb_cnt <
4349fe835cbfSJeff Roberson 		   cache->uc_freebucket.ucb_entries)
4350fe835cbfSJeff Roberson 			cache_bucket_swap(&cache->uc_freebucket,
4351fe835cbfSJeff Roberson 			    &cache->uc_allocbucket);
4352376b1ba3SJeff Roberson 		if (__predict_true(bucket->ucb_cnt < bucket->ucb_entries)) {
4353376b1ba3SJeff Roberson 			cache_bucket_push(cache, bucket, item);
43545d1ae027SRobert Watson 			critical_exit();
43558355f576SJeff Roberson 			return;
4356fc03d22bSJeff Roberson 		}
43570a81b439SJeff Roberson 	} while (cache_free(zone, cache, udata, item, itemdomain));
43580a81b439SJeff Roberson 	critical_exit();
4359fc03d22bSJeff Roberson 
43608355f576SJeff Roberson 	/*
43610a81b439SJeff Roberson 	 * If nothing else caught this, we'll just do an internal free.
43628355f576SJeff Roberson 	 */
43630a81b439SJeff Roberson zfree_item:
43640a81b439SJeff Roberson 	zone_free_item(zone, item, udata, SKIP_DTOR);
43650a81b439SJeff Roberson }
4366fc03d22bSJeff Roberson 
4367dfe13344SJeff Roberson #ifdef NUMA
436891d947bfSJeff Roberson /*
436991d947bfSJeff Roberson  * sort crossdomain free buckets to domain correct buckets and cache
437091d947bfSJeff Roberson  * them.
437191d947bfSJeff Roberson  */
437291d947bfSJeff Roberson static void
437391d947bfSJeff Roberson zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, void *udata)
437491d947bfSJeff Roberson {
4375991f23efSMark Johnston 	struct uma_bucketlist emptybuckets, fullbuckets;
437691d947bfSJeff Roberson 	uma_zone_domain_t zdom;
437791d947bfSJeff Roberson 	uma_bucket_t b;
4378543117beSJeff Roberson 	smr_seq_t seq;
437991d947bfSJeff Roberson 	void *item;
438091d947bfSJeff Roberson 	int domain;
438191d947bfSJeff Roberson 
438291d947bfSJeff Roberson 	CTR3(KTR_UMA,
438391d947bfSJeff Roberson 	    "uma_zfree: zone %s(%p) draining cross bucket %p",
438491d947bfSJeff Roberson 	    zone->uz_name, zone, bucket);
438591d947bfSJeff Roberson 
4386543117beSJeff Roberson 	/*
4387543117beSJeff Roberson 	 * It is possible for buckets to arrive here out of order so we fetch
4388543117beSJeff Roberson 	 * the current smr seq rather than accepting the bucket's.
4389543117beSJeff Roberson 	 */
4390543117beSJeff Roberson 	seq = SMR_SEQ_INVALID;
4391543117beSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
4392226dd6dbSJeff Roberson 		seq = smr_advance(zone->uz_smr);
4393226dd6dbSJeff Roberson 
4394226dd6dbSJeff Roberson 	/*
4395226dd6dbSJeff Roberson 	 * To avoid having ndomain * ndomain buckets for sorting we have a
4396226dd6dbSJeff Roberson 	 * lock on the current crossfree bucket.  A full matrix with
4397226dd6dbSJeff Roberson 	 * per-domain locking could be used if necessary.
4398226dd6dbSJeff Roberson 	 */
4399991f23efSMark Johnston 	STAILQ_INIT(&emptybuckets);
4400226dd6dbSJeff Roberson 	STAILQ_INIT(&fullbuckets);
4401226dd6dbSJeff Roberson 	ZONE_CROSS_LOCK(zone);
4402991f23efSMark Johnston 	for (; bucket->ub_cnt > 0; bucket->ub_cnt--) {
440391d947bfSJeff Roberson 		item = bucket->ub_bucket[bucket->ub_cnt - 1];
440481302f1dSMark Johnston 		domain = item_domain(item);
4405c6fd3e23SJeff Roberson 		zdom = ZDOM_GET(zone, domain);
440691d947bfSJeff Roberson 		if (zdom->uzd_cross == NULL) {
4407991f23efSMark Johnston 			if ((b = STAILQ_FIRST(&emptybuckets)) != NULL) {
4408991f23efSMark Johnston 				STAILQ_REMOVE_HEAD(&emptybuckets, ub_link);
4409991f23efSMark Johnston 				zdom->uzd_cross = b;
4410991f23efSMark Johnston 			} else {
4411991f23efSMark Johnston 				/*
4412991f23efSMark Johnston 				 * Avoid allocating a bucket with the cross lock
4413991f23efSMark Johnston 				 * held, since allocation can trigger a
4414991f23efSMark Johnston 				 * cross-domain free and bucket zones may
4415991f23efSMark Johnston 				 * allocate from each other.
4416991f23efSMark Johnston 				 */
4417991f23efSMark Johnston 				ZONE_CROSS_UNLOCK(zone);
4418991f23efSMark Johnston 				b = bucket_alloc(zone, udata, M_NOWAIT);
4419991f23efSMark Johnston 				if (b == NULL)
4420991f23efSMark Johnston 					goto out;
4421991f23efSMark Johnston 				ZONE_CROSS_LOCK(zone);
4422991f23efSMark Johnston 				if (zdom->uzd_cross != NULL) {
4423991f23efSMark Johnston 					STAILQ_INSERT_HEAD(&emptybuckets, b,
4424991f23efSMark Johnston 					    ub_link);
4425991f23efSMark Johnston 				} else {
4426991f23efSMark Johnston 					zdom->uzd_cross = b;
4427991f23efSMark Johnston 				}
4428991f23efSMark Johnston 			}
442991d947bfSJeff Roberson 		}
4430543117beSJeff Roberson 		b = zdom->uzd_cross;
4431543117beSJeff Roberson 		b->ub_bucket[b->ub_cnt++] = item;
4432543117beSJeff Roberson 		b->ub_seq = seq;
4433543117beSJeff Roberson 		if (b->ub_cnt == b->ub_entries) {
4434543117beSJeff Roberson 			STAILQ_INSERT_HEAD(&fullbuckets, b, ub_link);
4435991f23efSMark Johnston 			if ((b = STAILQ_FIRST(&emptybuckets)) != NULL)
4436991f23efSMark Johnston 				STAILQ_REMOVE_HEAD(&emptybuckets, ub_link);
4437991f23efSMark Johnston 			zdom->uzd_cross = b;
443891d947bfSJeff Roberson 		}
443991d947bfSJeff Roberson 	}
444091d947bfSJeff Roberson 	ZONE_CROSS_UNLOCK(zone);
4441991f23efSMark Johnston out:
4442c6fd3e23SJeff Roberson 	if (bucket->ub_cnt == 0)
4443d4665eaaSJeff Roberson 		bucket->ub_seq = SMR_SEQ_INVALID;
444491d947bfSJeff Roberson 	bucket_free(zone, bucket, udata);
4445c6fd3e23SJeff Roberson 
4446991f23efSMark Johnston 	while ((b = STAILQ_FIRST(&emptybuckets)) != NULL) {
4447991f23efSMark Johnston 		STAILQ_REMOVE_HEAD(&emptybuckets, ub_link);
4448991f23efSMark Johnston 		bucket_free(zone, b, udata);
4449991f23efSMark Johnston 	}
4450c6fd3e23SJeff Roberson 	while ((b = STAILQ_FIRST(&fullbuckets)) != NULL) {
4451c6fd3e23SJeff Roberson 		STAILQ_REMOVE_HEAD(&fullbuckets, ub_link);
445281302f1dSMark Johnston 		domain = item_domain(b->ub_bucket[0]);
4453c6fd3e23SJeff Roberson 		zone_put_bucket(zone, domain, b, udata, true);
4454c6fd3e23SJeff Roberson 	}
445591d947bfSJeff Roberson }
445691d947bfSJeff Roberson #endif
445791d947bfSJeff Roberson 
44580a81b439SJeff Roberson static void
44590a81b439SJeff Roberson zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
4460c6fd3e23SJeff Roberson     int itemdomain, bool ws)
44610a81b439SJeff Roberson {
44620a81b439SJeff Roberson 
4463dfe13344SJeff Roberson #ifdef NUMA
44640a81b439SJeff Roberson 	/*
44650a81b439SJeff Roberson 	 * Buckets coming from the wrong domain will be entirely for the
44660a81b439SJeff Roberson 	 * only other domain on two domain systems.  In this case we can
44670a81b439SJeff Roberson 	 * simply cache them.  Otherwise we need to sort them back to
446891d947bfSJeff Roberson 	 * correct domains.
44690a81b439SJeff Roberson 	 */
4470c6fd3e23SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 &&
4471c6fd3e23SJeff Roberson 	    vm_ndomains > 2 && PCPU_GET(domain) != itemdomain) {
447291d947bfSJeff Roberson 		zone_free_cross(zone, bucket, udata);
44730a81b439SJeff Roberson 		return;
44740a81b439SJeff Roberson 	}
44750a81b439SJeff Roberson #endif
447691d947bfSJeff Roberson 
44770a81b439SJeff Roberson 	/*
44780a81b439SJeff Roberson 	 * Attempt to save the bucket in the zone's domain bucket cache.
44790a81b439SJeff Roberson 	 */
44800a81b439SJeff Roberson 	CTR3(KTR_UMA,
44810a81b439SJeff Roberson 	    "uma_zfree: zone %s(%p) putting bucket %p on free list",
44820a81b439SJeff Roberson 	    zone->uz_name, zone, bucket);
44830a81b439SJeff Roberson 	/* ub_cnt is pointing to the last free item */
4484c6fd3e23SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0)
4485c6fd3e23SJeff Roberson 		itemdomain = zone_domain_lowest(zone, itemdomain);
4486c6fd3e23SJeff Roberson 	zone_put_bucket(zone, itemdomain, bucket, udata, ws);
44878355f576SJeff Roberson }
4488fc03d22bSJeff Roberson 
44894d104ba0SAlexander Motin /*
44900a81b439SJeff Roberson  * Populate a free or cross bucket for the current cpu cache.  Free any
44910a81b439SJeff Roberson  * existing full bucket either to the zone cache or back to the slab layer.
44920a81b439SJeff Roberson  *
44930a81b439SJeff Roberson  * Enters and returns in a critical section.  false return indicates that
44940a81b439SJeff Roberson  * we can not satisfy this free in the cache layer.  true indicates that
44950a81b439SJeff Roberson  * the caller should retry.
44964d104ba0SAlexander Motin  */
44970a81b439SJeff Roberson static __noinline bool
44980a81b439SJeff Roberson cache_free(uma_zone_t zone, uma_cache_t cache, void *udata, void *item,
44990a81b439SJeff Roberson     int itemdomain)
45000a81b439SJeff Roberson {
4501dfe13344SJeff Roberson 	uma_cache_bucket_t cbucket;
4502d4665eaaSJeff Roberson 	uma_bucket_t newbucket, bucket;
45030a81b439SJeff Roberson 
45040a81b439SJeff Roberson 	CRITICAL_ASSERT(curthread);
45050a81b439SJeff Roberson 
4506d4665eaaSJeff Roberson 	if (zone->uz_bucket_size == 0)
45070a81b439SJeff Roberson 		return false;
45080a81b439SJeff Roberson 
4509cc7ce83aSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
4510d4665eaaSJeff Roberson 	newbucket = NULL;
45110a81b439SJeff Roberson 
45120a81b439SJeff Roberson 	/*
4513dfe13344SJeff Roberson 	 * FIRSTTOUCH domains need to free to the correct zdom.  When
4514dfe13344SJeff Roberson 	 * enabled this is the zdom of the item.   The bucket is the
4515dfe13344SJeff Roberson 	 * cross bucket if the current domain and itemdomain do not match.
45160a81b439SJeff Roberson 	 */
4517dfe13344SJeff Roberson 	cbucket = &cache->uc_freebucket;
4518dfe13344SJeff Roberson #ifdef NUMA
4519c6fd3e23SJeff Roberson 	if ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) != 0) {
4520c6fd3e23SJeff Roberson 		if (PCPU_GET(domain) != itemdomain) {
4521dfe13344SJeff Roberson 			cbucket = &cache->uc_crossbucket;
4522dfe13344SJeff Roberson 			if (cbucket->ucb_cnt != 0)
4523c6fd3e23SJeff Roberson 				counter_u64_add(zone->uz_xdomain,
4524dfe13344SJeff Roberson 				    cbucket->ucb_cnt);
4525dfe13344SJeff Roberson 		}
4526c6fd3e23SJeff Roberson 	}
45270a81b439SJeff Roberson #endif
4528dfe13344SJeff Roberson 	bucket = cache_bucket_unload(cbucket);
4529c6fd3e23SJeff Roberson 	KASSERT(bucket == NULL || bucket->ub_cnt == bucket->ub_entries,
4530c6fd3e23SJeff Roberson 	    ("cache_free: Entered with non-full free bucket."));
45310a81b439SJeff Roberson 
45320a81b439SJeff Roberson 	/* We are no longer associated with this CPU. */
45330a81b439SJeff Roberson 	critical_exit();
45340a81b439SJeff Roberson 
4535d4665eaaSJeff Roberson 	/*
4536d4665eaaSJeff Roberson 	 * Don't let SMR zones operate without a free bucket.  Force
4537d4665eaaSJeff Roberson 	 * a synchronize and re-use this one.  We will only degrade
4538d4665eaaSJeff Roberson 	 * to a synchronize every bucket_size items rather than every
4539d4665eaaSJeff Roberson 	 * item if we fail to allocate a bucket.
4540d4665eaaSJeff Roberson 	 */
4541d4665eaaSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0) {
4542d4665eaaSJeff Roberson 		if (bucket != NULL)
4543d4665eaaSJeff Roberson 			bucket->ub_seq = smr_advance(zone->uz_smr);
4544d4665eaaSJeff Roberson 		newbucket = bucket_alloc(zone, udata, M_NOWAIT);
4545d4665eaaSJeff Roberson 		if (newbucket == NULL && bucket != NULL) {
4546d4665eaaSJeff Roberson 			bucket_drain(zone, bucket);
4547d4665eaaSJeff Roberson 			newbucket = bucket;
4548d4665eaaSJeff Roberson 			bucket = NULL;
4549d4665eaaSJeff Roberson 		}
4550d4665eaaSJeff Roberson 	} else if (!bucketdisable)
4551d4665eaaSJeff Roberson 		newbucket = bucket_alloc(zone, udata, M_NOWAIT);
4552d4665eaaSJeff Roberson 
45530a81b439SJeff Roberson 	if (bucket != NULL)
4554c6fd3e23SJeff Roberson 		zone_free_bucket(zone, bucket, udata, itemdomain, true);
4555a553d4b8SJeff Roberson 
4556fc03d22bSJeff Roberson 	critical_enter();
4557d4665eaaSJeff Roberson 	if ((bucket = newbucket) == NULL)
45580a81b439SJeff Roberson 		return (false);
4559cc7ce83aSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
4560dfe13344SJeff Roberson #ifdef NUMA
4561fc03d22bSJeff Roberson 	/*
45620a81b439SJeff Roberson 	 * Check to see if we should be populating the cross bucket.  If it
45630a81b439SJeff Roberson 	 * is already populated we will fall through and attempt to populate
45640a81b439SJeff Roberson 	 * the free bucket.
4565fc03d22bSJeff Roberson 	 */
4566c6fd3e23SJeff Roberson 	if ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) != 0) {
4567c6fd3e23SJeff Roberson 		if (PCPU_GET(domain) != itemdomain &&
4568376b1ba3SJeff Roberson 		    cache->uc_crossbucket.ucb_bucket == NULL) {
4569376b1ba3SJeff Roberson 			cache_bucket_load_cross(cache, bucket);
45700a81b439SJeff Roberson 			return (true);
45710a81b439SJeff Roberson 		}
45720a81b439SJeff Roberson 	}
45730a81b439SJeff Roberson #endif
45740a81b439SJeff Roberson 	/*
45750a81b439SJeff Roberson 	 * We may have lost the race to fill the bucket or switched CPUs.
45760a81b439SJeff Roberson 	 */
4577376b1ba3SJeff Roberson 	if (cache->uc_freebucket.ucb_bucket != NULL) {
4578fc03d22bSJeff Roberson 		critical_exit();
45796fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
45800a81b439SJeff Roberson 		critical_enter();
45810a81b439SJeff Roberson 	} else
4582376b1ba3SJeff Roberson 		cache_bucket_load_free(cache, bucket);
45838355f576SJeff Roberson 
45840a81b439SJeff Roberson 	return (true);
45858355f576SJeff Roberson }
45868355f576SJeff Roberson 
45878355f576SJeff Roberson static void
4588bb15d1c7SGleb Smirnoff slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item)
45898355f576SJeff Roberson {
4590bb15d1c7SGleb Smirnoff 	uma_keg_t keg;
4591ab3185d1SJeff Roberson 	uma_domain_t dom;
45929b8db4d0SRyan Libby 	int freei;
4593099a0e58SBosko Milekic 
4594bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
45958b987a77SJeff Roberson 	KEG_LOCK_ASSERT(keg, slab->us_domain);
4596ab3185d1SJeff Roberson 
45978355f576SJeff Roberson 	/* Do we need to remove from any lists? */
45988b987a77SJeff Roberson 	dom = &keg->uk_domain[slab->us_domain];
4599099a0e58SBosko Milekic 	if (slab->us_freecount + 1 == keg->uk_ipers) {
46008355f576SJeff Roberson 		LIST_REMOVE(slab, us_link);
4601ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
46024ab3aee8SMark Johnston 		dom->ud_free_slabs++;
46038355f576SJeff Roberson 	} else if (slab->us_freecount == 0) {
46048355f576SJeff Roberson 		LIST_REMOVE(slab, us_link);
4605ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
46068355f576SJeff Roberson 	}
46078355f576SJeff Roberson 
4608ef72505eSJeff Roberson 	/* Slab management. */
46091e0701e1SJeff Roberson 	freei = slab_item_index(slab, keg, item);
46109b78b1f4SJeff Roberson 	BIT_SET(keg->uk_ipers, freei, &slab->us_free);
46118355f576SJeff Roberson 	slab->us_freecount++;
46128355f576SJeff Roberson 
4613ef72505eSJeff Roberson 	/* Keg statistics. */
46144ab3aee8SMark Johnston 	dom->ud_free_items++;
46150095a784SJeff Roberson }
46160095a784SJeff Roberson 
46170095a784SJeff Roberson static void
4618b75c4efcSAndrew Turner zone_release(void *arg, void **bucket, int cnt)
46190095a784SJeff Roberson {
46208b987a77SJeff Roberson 	struct mtx *lock;
4621b75c4efcSAndrew Turner 	uma_zone_t zone;
46220095a784SJeff Roberson 	uma_slab_t slab;
46230095a784SJeff Roberson 	uma_keg_t keg;
46240095a784SJeff Roberson 	uint8_t *mem;
46258b987a77SJeff Roberson 	void *item;
46260095a784SJeff Roberson 	int i;
46278355f576SJeff Roberson 
4628b75c4efcSAndrew Turner 	zone = arg;
4629bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
46308b987a77SJeff Roberson 	lock = NULL;
463154c5ae80SRyan Libby 	if (__predict_false((zone->uz_flags & UMA_ZFLAG_HASH) != 0))
46328b987a77SJeff Roberson 		lock = KEG_LOCK(keg, 0);
46330095a784SJeff Roberson 	for (i = 0; i < cnt; i++) {
46340095a784SJeff Roberson 		item = bucket[i];
463554c5ae80SRyan Libby 		if (__predict_true((zone->uz_flags & UMA_ZFLAG_VTOSLAB) != 0)) {
46360095a784SJeff Roberson 			slab = vtoslab((vm_offset_t)item);
46378b987a77SJeff Roberson 		} else {
46388b987a77SJeff Roberson 			mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
463954c5ae80SRyan Libby 			if ((zone->uz_flags & UMA_ZFLAG_HASH) != 0)
46408b987a77SJeff Roberson 				slab = hash_sfind(&keg->uk_hash, mem);
46418b987a77SJeff Roberson 			else
46428b987a77SJeff Roberson 				slab = (uma_slab_t)(mem + keg->uk_pgoff);
46438b987a77SJeff Roberson 		}
46448b987a77SJeff Roberson 		if (lock != KEG_LOCKPTR(keg, slab->us_domain)) {
46458b987a77SJeff Roberson 			if (lock != NULL)
46468b987a77SJeff Roberson 				mtx_unlock(lock);
46478b987a77SJeff Roberson 			lock = KEG_LOCK(keg, slab->us_domain);
46488b987a77SJeff Roberson 		}
4649bb15d1c7SGleb Smirnoff 		slab_free_item(zone, slab, item);
46500095a784SJeff Roberson 	}
46518b987a77SJeff Roberson 	if (lock != NULL)
46528b987a77SJeff Roberson 		mtx_unlock(lock);
46538355f576SJeff Roberson }
46548355f576SJeff Roberson 
46550095a784SJeff Roberson /*
46560095a784SJeff Roberson  * Frees a single item to any zone.
46570095a784SJeff Roberson  *
46580095a784SJeff Roberson  * Arguments:
46590095a784SJeff Roberson  *	zone   The zone to free to
46600095a784SJeff Roberson  *	item   The item we're freeing
46610095a784SJeff Roberson  *	udata  User supplied data for the dtor
46620095a784SJeff Roberson  *	skip   Skip dtors and finis
46630095a784SJeff Roberson  */
46646d88d784SJeff Roberson static __noinline void
46650095a784SJeff Roberson zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
46660095a784SJeff Roberson {
4667c5deaf04SGleb Smirnoff 
4668d4665eaaSJeff Roberson 	/*
4669d4665eaaSJeff Roberson 	 * If a free is sent directly to an SMR zone we have to
4670d4665eaaSJeff Roberson 	 * synchronize immediately because the item can instantly
4671d4665eaaSJeff Roberson 	 * be reallocated. This should only happen in degenerate
4672d4665eaaSJeff Roberson 	 * cases when no memory is available for per-cpu caches.
4673d4665eaaSJeff Roberson 	 */
4674d4665eaaSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0 && skip == SKIP_NONE)
4675d4665eaaSJeff Roberson 		smr_synchronize(zone->uz_smr);
4676d4665eaaSJeff Roberson 
4677cc7ce83aSJeff Roberson 	item_dtor(zone, item, zone->uz_size, udata, skip);
46780095a784SJeff Roberson 
467909c8cb71SMark Johnston 	if (skip < SKIP_FINI && zone->uz_fini) {
468009c8cb71SMark Johnston 		kasan_mark_item_valid(zone, item);
46810095a784SJeff Roberson 		zone->uz_fini(item, zone->uz_size);
468209c8cb71SMark Johnston 		kasan_mark_item_invalid(zone, item);
468309c8cb71SMark Johnston 	}
46840095a784SJeff Roberson 
46850095a784SJeff Roberson 	zone->uz_release(zone->uz_arg, &item, 1);
4686bb15d1c7SGleb Smirnoff 
4687bb15d1c7SGleb Smirnoff 	if (skip & SKIP_CNT)
4688bb15d1c7SGleb Smirnoff 		return;
4689bb15d1c7SGleb Smirnoff 
46902efcc8cbSGleb Smirnoff 	counter_u64_add(zone->uz_frees, 1);
46912efcc8cbSGleb Smirnoff 
46924bd61e19SJeff Roberson 	if (zone->uz_max_items > 0)
46934bd61e19SJeff Roberson 		zone_free_limit(zone, 1);
4694bb45b411SGleb Smirnoff }
46950095a784SJeff Roberson 
46968355f576SJeff Roberson /* See uma.h */
46971c6cae97SLawrence Stewart int
4698736ee590SJeff Roberson uma_zone_set_max(uma_zone_t zone, int nitems)
4699736ee590SJeff Roberson {
4700e574d407SMark Johnston 
4701e574d407SMark Johnston 	/*
4702e574d407SMark Johnston 	 * If the limit is small, we may need to constrain the maximum per-CPU
4703e574d407SMark Johnston 	 * cache size, or disable caching entirely.
4704e574d407SMark Johnston 	 */
4705e574d407SMark Johnston 	uma_zone_set_maxcache(zone, nitems);
4706bb15d1c7SGleb Smirnoff 
47074bd61e19SJeff Roberson 	/*
47084bd61e19SJeff Roberson 	 * XXX This can misbehave if the zone has any allocations with
47094bd61e19SJeff Roberson 	 * no limit and a limit is imposed.  There is currently no
47104bd61e19SJeff Roberson 	 * way to clear a limit.
47114bd61e19SJeff Roberson 	 */
4712bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
4713bb15d1c7SGleb Smirnoff 	zone->uz_max_items = nitems;
4714cc7ce83aSJeff Roberson 	zone->uz_flags |= UMA_ZFLAG_LIMIT;
4715cc7ce83aSJeff Roberson 	zone_update_caches(zone);
47164bd61e19SJeff Roberson 	/* We may need to wake waiters. */
47174bd61e19SJeff Roberson 	wakeup(&zone->uz_max_items);
4718bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
4719bb15d1c7SGleb Smirnoff 
4720bb15d1c7SGleb Smirnoff 	return (nitems);
4721bb15d1c7SGleb Smirnoff }
4722bb15d1c7SGleb Smirnoff 
4723bb15d1c7SGleb Smirnoff /* See uma.h */
4724003cf08bSMark Johnston void
4725bb15d1c7SGleb Smirnoff uma_zone_set_maxcache(uma_zone_t zone, int nitems)
4726bb15d1c7SGleb Smirnoff {
4727e574d407SMark Johnston 	int bpcpu, bpdom, bsize, nb;
4728bb15d1c7SGleb Smirnoff 
4729bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
4730e574d407SMark Johnston 
4731e574d407SMark Johnston 	/*
4732e574d407SMark Johnston 	 * Compute a lower bound on the number of items that may be cached in
4733e574d407SMark Johnston 	 * the zone.  Each CPU gets at least two buckets, and for cross-domain
4734e574d407SMark Johnston 	 * frees we use an additional bucket per CPU and per domain.  Select the
4735e574d407SMark Johnston 	 * largest bucket size that does not exceed half of the requested limit,
4736e574d407SMark Johnston 	 * with the left over space given to the full bucket cache.
4737e574d407SMark Johnston 	 */
4738e574d407SMark Johnston 	bpdom = 0;
4739003cf08bSMark Johnston 	bpcpu = 2;
4740e574d407SMark Johnston #ifdef NUMA
4741e574d407SMark Johnston 	if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 && vm_ndomains > 1) {
4742003cf08bSMark Johnston 		bpcpu++;
4743e574d407SMark Johnston 		bpdom++;
4744003cf08bSMark Johnston 	}
4745e574d407SMark Johnston #endif
4746e574d407SMark Johnston 	nb = bpcpu * mp_ncpus + bpdom * vm_ndomains;
4747e574d407SMark Johnston 	bsize = nitems / nb / 2;
4748e574d407SMark Johnston 	if (bsize > BUCKET_MAX)
4749e574d407SMark Johnston 		bsize = BUCKET_MAX;
4750e574d407SMark Johnston 	else if (bsize == 0 && nitems / nb > 0)
4751e574d407SMark Johnston 		bsize = 1;
4752e574d407SMark Johnston 	zone->uz_bucket_size_max = zone->uz_bucket_size = bsize;
475320a4e154SJeff Roberson 	if (zone->uz_bucket_size_min > zone->uz_bucket_size_max)
475420a4e154SJeff Roberson 		zone->uz_bucket_size_min = zone->uz_bucket_size_max;
4755e574d407SMark Johnston 	zone->uz_bucket_max = nitems - nb * bsize;
4756bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
4757736ee590SJeff Roberson }
4758736ee590SJeff Roberson 
4759736ee590SJeff Roberson /* See uma.h */
4760e49471b0SAndre Oppermann int
4761e49471b0SAndre Oppermann uma_zone_get_max(uma_zone_t zone)
4762e49471b0SAndre Oppermann {
4763e49471b0SAndre Oppermann 	int nitems;
4764e49471b0SAndre Oppermann 
4765727c6918SJeff Roberson 	nitems = atomic_load_64(&zone->uz_max_items);
4766e49471b0SAndre Oppermann 
4767e49471b0SAndre Oppermann 	return (nitems);
4768e49471b0SAndre Oppermann }
4769e49471b0SAndre Oppermann 
4770e49471b0SAndre Oppermann /* See uma.h */
47712f891cd5SPawel Jakub Dawidek void
47722f891cd5SPawel Jakub Dawidek uma_zone_set_warning(uma_zone_t zone, const char *warning)
47732f891cd5SPawel Jakub Dawidek {
47742f891cd5SPawel Jakub Dawidek 
4775727c6918SJeff Roberson 	ZONE_ASSERT_COLD(zone);
47762f891cd5SPawel Jakub Dawidek 	zone->uz_warning = warning;
47772f891cd5SPawel Jakub Dawidek }
47782f891cd5SPawel Jakub Dawidek 
47792f891cd5SPawel Jakub Dawidek /* See uma.h */
478054503a13SJonathan T. Looney void
478154503a13SJonathan T. Looney uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
478254503a13SJonathan T. Looney {
478354503a13SJonathan T. Looney 
4784727c6918SJeff Roberson 	ZONE_ASSERT_COLD(zone);
4785e60b2fcbSGleb Smirnoff 	TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
478654503a13SJonathan T. Looney }
478754503a13SJonathan T. Looney 
478854503a13SJonathan T. Looney /* See uma.h */
4789c4ae7908SLawrence Stewart int
4790c4ae7908SLawrence Stewart uma_zone_get_cur(uma_zone_t zone)
4791c4ae7908SLawrence Stewart {
4792c4ae7908SLawrence Stewart 	int64_t nitems;
4793c4ae7908SLawrence Stewart 	u_int i;
4794c4ae7908SLawrence Stewart 
4795bfb6b7a1SJeff Roberson 	nitems = 0;
4796bfb6b7a1SJeff Roberson 	if (zone->uz_allocs != EARLY_COUNTER && zone->uz_frees != EARLY_COUNTER)
47972efcc8cbSGleb Smirnoff 		nitems = counter_u64_fetch(zone->uz_allocs) -
47982efcc8cbSGleb Smirnoff 		    counter_u64_fetch(zone->uz_frees);
4799727c6918SJeff Roberson 	CPU_FOREACH(i)
4800727c6918SJeff Roberson 		nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs) -
4801727c6918SJeff Roberson 		    atomic_load_64(&zone->uz_cpu[i].uc_frees);
4802c4ae7908SLawrence Stewart 
4803c4ae7908SLawrence Stewart 	return (nitems < 0 ? 0 : nitems);
4804c4ae7908SLawrence Stewart }
4805c4ae7908SLawrence Stewart 
480620a4e154SJeff Roberson static uint64_t
480720a4e154SJeff Roberson uma_zone_get_allocs(uma_zone_t zone)
480820a4e154SJeff Roberson {
480920a4e154SJeff Roberson 	uint64_t nitems;
481020a4e154SJeff Roberson 	u_int i;
481120a4e154SJeff Roberson 
4812bfb6b7a1SJeff Roberson 	nitems = 0;
4813bfb6b7a1SJeff Roberson 	if (zone->uz_allocs != EARLY_COUNTER)
481420a4e154SJeff Roberson 		nitems = counter_u64_fetch(zone->uz_allocs);
4815727c6918SJeff Roberson 	CPU_FOREACH(i)
4816727c6918SJeff Roberson 		nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs);
481720a4e154SJeff Roberson 
481820a4e154SJeff Roberson 	return (nitems);
481920a4e154SJeff Roberson }
482020a4e154SJeff Roberson 
482120a4e154SJeff Roberson static uint64_t
482220a4e154SJeff Roberson uma_zone_get_frees(uma_zone_t zone)
482320a4e154SJeff Roberson {
482420a4e154SJeff Roberson 	uint64_t nitems;
482520a4e154SJeff Roberson 	u_int i;
482620a4e154SJeff Roberson 
4827bfb6b7a1SJeff Roberson 	nitems = 0;
4828bfb6b7a1SJeff Roberson 	if (zone->uz_frees != EARLY_COUNTER)
482920a4e154SJeff Roberson 		nitems = counter_u64_fetch(zone->uz_frees);
4830727c6918SJeff Roberson 	CPU_FOREACH(i)
4831727c6918SJeff Roberson 		nitems += atomic_load_64(&zone->uz_cpu[i].uc_frees);
483220a4e154SJeff Roberson 
483320a4e154SJeff Roberson 	return (nitems);
483420a4e154SJeff Roberson }
483520a4e154SJeff Roberson 
483631c251a0SJeff Roberson #ifdef INVARIANTS
483731c251a0SJeff Roberson /* Used only for KEG_ASSERT_COLD(). */
483831c251a0SJeff Roberson static uint64_t
483931c251a0SJeff Roberson uma_keg_get_allocs(uma_keg_t keg)
484031c251a0SJeff Roberson {
484131c251a0SJeff Roberson 	uma_zone_t z;
484231c251a0SJeff Roberson 	uint64_t nitems;
484331c251a0SJeff Roberson 
484431c251a0SJeff Roberson 	nitems = 0;
484531c251a0SJeff Roberson 	LIST_FOREACH(z, &keg->uk_zones, uz_link)
484631c251a0SJeff Roberson 		nitems += uma_zone_get_allocs(z);
484731c251a0SJeff Roberson 
484831c251a0SJeff Roberson 	return (nitems);
484931c251a0SJeff Roberson }
485031c251a0SJeff Roberson #endif
485131c251a0SJeff Roberson 
4852c4ae7908SLawrence Stewart /* See uma.h */
4853736ee590SJeff Roberson void
4854099a0e58SBosko Milekic uma_zone_set_init(uma_zone_t zone, uma_init uminit)
4855099a0e58SBosko Milekic {
4856e20a199fSJeff Roberson 	uma_keg_t keg;
4857e20a199fSJeff Roberson 
4858bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
4859727c6918SJeff Roberson 	KEG_ASSERT_COLD(keg);
4860e20a199fSJeff Roberson 	keg->uk_init = uminit;
4861099a0e58SBosko Milekic }
4862099a0e58SBosko Milekic 
4863099a0e58SBosko Milekic /* See uma.h */
4864099a0e58SBosko Milekic void
4865099a0e58SBosko Milekic uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
4866099a0e58SBosko Milekic {
4867e20a199fSJeff Roberson 	uma_keg_t keg;
4868e20a199fSJeff Roberson 
4869bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
4870727c6918SJeff Roberson 	KEG_ASSERT_COLD(keg);
4871e20a199fSJeff Roberson 	keg->uk_fini = fini;
4872099a0e58SBosko Milekic }
4873099a0e58SBosko Milekic 
4874099a0e58SBosko Milekic /* See uma.h */
4875099a0e58SBosko Milekic void
4876099a0e58SBosko Milekic uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
4877099a0e58SBosko Milekic {
4878af526374SJeff Roberson 
4879727c6918SJeff Roberson 	ZONE_ASSERT_COLD(zone);
4880099a0e58SBosko Milekic 	zone->uz_init = zinit;
4881099a0e58SBosko Milekic }
4882099a0e58SBosko Milekic 
4883099a0e58SBosko Milekic /* See uma.h */
4884099a0e58SBosko Milekic void
4885099a0e58SBosko Milekic uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
4886099a0e58SBosko Milekic {
4887af526374SJeff Roberson 
4888727c6918SJeff Roberson 	ZONE_ASSERT_COLD(zone);
4889099a0e58SBosko Milekic 	zone->uz_fini = zfini;
4890099a0e58SBosko Milekic }
4891099a0e58SBosko Milekic 
4892099a0e58SBosko Milekic /* See uma.h */
4893099a0e58SBosko Milekic void
48948355f576SJeff Roberson uma_zone_set_freef(uma_zone_t zone, uma_free freef)
48958355f576SJeff Roberson {
48960095a784SJeff Roberson 	uma_keg_t keg;
4897e20a199fSJeff Roberson 
4898bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
4899727c6918SJeff Roberson 	KEG_ASSERT_COLD(keg);
49000095a784SJeff Roberson 	keg->uk_freef = freef;
49018355f576SJeff Roberson }
49028355f576SJeff Roberson 
49038355f576SJeff Roberson /* See uma.h */
49048355f576SJeff Roberson void
49058355f576SJeff Roberson uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
49068355f576SJeff Roberson {
4907e20a199fSJeff Roberson 	uma_keg_t keg;
4908e20a199fSJeff Roberson 
4909bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
4910727c6918SJeff Roberson 	KEG_ASSERT_COLD(keg);
4911e20a199fSJeff Roberson 	keg->uk_allocf = allocf;
49128355f576SJeff Roberson }
49138355f576SJeff Roberson 
49148355f576SJeff Roberson /* See uma.h */
49156fd34d6fSJeff Roberson void
4916d4665eaaSJeff Roberson uma_zone_set_smr(uma_zone_t zone, smr_t smr)
4917d4665eaaSJeff Roberson {
4918d4665eaaSJeff Roberson 
4919d4665eaaSJeff Roberson 	ZONE_ASSERT_COLD(zone);
4920d4665eaaSJeff Roberson 
49217f746c9fSMateusz Guzik 	KASSERT(smr != NULL, ("Got NULL smr"));
49227f746c9fSMateusz Guzik 	KASSERT((zone->uz_flags & UMA_ZONE_SMR) == 0,
49237f746c9fSMateusz Guzik 	    ("zone %p (%s) already uses SMR", zone, zone->uz_name));
4924d4665eaaSJeff Roberson 	zone->uz_flags |= UMA_ZONE_SMR;
4925d4665eaaSJeff Roberson 	zone->uz_smr = smr;
4926d4665eaaSJeff Roberson 	zone_update_caches(zone);
4927d4665eaaSJeff Roberson }
4928d4665eaaSJeff Roberson 
4929d4665eaaSJeff Roberson smr_t
4930d4665eaaSJeff Roberson uma_zone_get_smr(uma_zone_t zone)
4931d4665eaaSJeff Roberson {
4932d4665eaaSJeff Roberson 
4933d4665eaaSJeff Roberson 	return (zone->uz_smr);
4934d4665eaaSJeff Roberson }
4935d4665eaaSJeff Roberson 
4936d4665eaaSJeff Roberson /* See uma.h */
4937d4665eaaSJeff Roberson void
49386fd34d6fSJeff Roberson uma_zone_reserve(uma_zone_t zone, int items)
49396fd34d6fSJeff Roberson {
49406fd34d6fSJeff Roberson 	uma_keg_t keg;
49416fd34d6fSJeff Roberson 
4942bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
4943727c6918SJeff Roberson 	KEG_ASSERT_COLD(keg);
49446fd34d6fSJeff Roberson 	keg->uk_reserve = items;
49456fd34d6fSJeff Roberson }
49466fd34d6fSJeff Roberson 
49476fd34d6fSJeff Roberson /* See uma.h */
49488355f576SJeff Roberson int
4949a4915c21SAttilio Rao uma_zone_reserve_kva(uma_zone_t zone, int count)
49508355f576SJeff Roberson {
4951099a0e58SBosko Milekic 	uma_keg_t keg;
49528355f576SJeff Roberson 	vm_offset_t kva;
49539ba30bcbSZbigniew Bodek 	u_int pages;
49548355f576SJeff Roberson 
4955bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
4956727c6918SJeff Roberson 	KEG_ASSERT_COLD(keg);
4957727c6918SJeff Roberson 	ZONE_ASSERT_COLD(zone);
49588355f576SJeff Roberson 
495979c9f942SJeff Roberson 	pages = howmany(count, keg->uk_ipers) * keg->uk_ppera;
4960a553d4b8SJeff Roberson 
4961a4915c21SAttilio Rao #ifdef UMA_MD_SMALL_ALLOC
4962a4915c21SAttilio Rao 	if (keg->uk_ppera > 1) {
4963a4915c21SAttilio Rao #else
4964a4915c21SAttilio Rao 	if (1) {
4965a4915c21SAttilio Rao #endif
496657223e99SAndriy Gapon 		kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
4967d1f42ac2SAlan Cox 		if (kva == 0)
49688355f576SJeff Roberson 			return (0);
4969a4915c21SAttilio Rao 	} else
4970a4915c21SAttilio Rao 		kva = 0;
4971bb15d1c7SGleb Smirnoff 
4972bb15d1c7SGleb Smirnoff 	MPASS(keg->uk_kva == 0);
4973099a0e58SBosko Milekic 	keg->uk_kva = kva;
4974a4915c21SAttilio Rao 	keg->uk_offset = 0;
4975bb15d1c7SGleb Smirnoff 	zone->uz_max_items = pages * keg->uk_ipers;
4976a4915c21SAttilio Rao #ifdef UMA_MD_SMALL_ALLOC
4977a4915c21SAttilio Rao 	keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
4978a4915c21SAttilio Rao #else
4979a4915c21SAttilio Rao 	keg->uk_allocf = noobj_alloc;
4980a4915c21SAttilio Rao #endif
4981cc7ce83aSJeff Roberson 	keg->uk_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
4982cc7ce83aSJeff Roberson 	zone->uz_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
4983cc7ce83aSJeff Roberson 	zone_update_caches(zone);
4984af526374SJeff Roberson 
49858355f576SJeff Roberson 	return (1);
49868355f576SJeff Roberson }
49878355f576SJeff Roberson 
49888355f576SJeff Roberson /* See uma.h */
49898355f576SJeff Roberson void
49908355f576SJeff Roberson uma_prealloc(uma_zone_t zone, int items)
49918355f576SJeff Roberson {
4992920239efSMark Johnston 	struct vm_domainset_iter di;
4993ab3185d1SJeff Roberson 	uma_domain_t dom;
49948355f576SJeff Roberson 	uma_slab_t slab;
4995099a0e58SBosko Milekic 	uma_keg_t keg;
499686220393SMark Johnston 	int aflags, domain, slabs;
49978355f576SJeff Roberson 
4998bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
499979c9f942SJeff Roberson 	slabs = howmany(items, keg->uk_ipers);
5000194a979eSMark Johnston 	while (slabs-- > 0) {
500186220393SMark Johnston 		aflags = M_NOWAIT;
500286220393SMark Johnston 		vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
500386220393SMark Johnston 		    &aflags);
500486220393SMark Johnston 		for (;;) {
500586220393SMark Johnston 			slab = keg_alloc_slab(keg, zone, domain, M_WAITOK,
500686220393SMark Johnston 			    aflags);
500786220393SMark Johnston 			if (slab != NULL) {
5008ab3185d1SJeff Roberson 				dom = &keg->uk_domain[slab->us_domain];
50094ab3aee8SMark Johnston 				/*
50104ab3aee8SMark Johnston 				 * keg_alloc_slab() always returns a slab on the
50114ab3aee8SMark Johnston 				 * partial list.
50124ab3aee8SMark Johnston 				 */
50138b987a77SJeff Roberson 				LIST_REMOVE(slab, us_link);
501486220393SMark Johnston 				LIST_INSERT_HEAD(&dom->ud_free_slab, slab,
501586220393SMark Johnston 				    us_link);
50164ab3aee8SMark Johnston 				dom->ud_free_slabs++;
50178b987a77SJeff Roberson 				KEG_UNLOCK(keg, slab->us_domain);
5018920239efSMark Johnston 				break;
50198355f576SJeff Roberson 			}
50208b987a77SJeff Roberson 			if (vm_domainset_iter_policy(&di, &domain) != 0)
502189d2fb14SKonstantin Belousov 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0);
502286220393SMark Johnston 		}
502386220393SMark Johnston 	}
502486220393SMark Johnston }
50258355f576SJeff Roberson 
5026ed581bf6SJeff Roberson /*
5027ed581bf6SJeff Roberson  * Returns a snapshot of memory consumption in bytes.
5028ed581bf6SJeff Roberson  */
5029ed581bf6SJeff Roberson size_t
5030ed581bf6SJeff Roberson uma_zone_memory(uma_zone_t zone)
5031ed581bf6SJeff Roberson {
5032ed581bf6SJeff Roberson 	size_t sz;
5033ed581bf6SJeff Roberson 	int i;
5034ed581bf6SJeff Roberson 
5035ed581bf6SJeff Roberson 	sz = 0;
5036ed581bf6SJeff Roberson 	if (zone->uz_flags & UMA_ZFLAG_CACHE) {
5037ed581bf6SJeff Roberson 		for (i = 0; i < vm_ndomains; i++)
5038c6fd3e23SJeff Roberson 			sz += ZDOM_GET(zone, i)->uzd_nitems;
5039ed581bf6SJeff Roberson 		return (sz * zone->uz_size);
5040ed581bf6SJeff Roberson 	}
5041ed581bf6SJeff Roberson 	for (i = 0; i < vm_ndomains; i++)
5042ed581bf6SJeff Roberson 		sz += zone->uz_keg->uk_domain[i].ud_pages;
5043ed581bf6SJeff Roberson 
5044ed581bf6SJeff Roberson 	return (sz * PAGE_SIZE);
5045ed581bf6SJeff Roberson }
5046ed581bf6SJeff Roberson 
50478355f576SJeff Roberson /* See uma.h */
504808cfa56eSMark Johnston void
504908cfa56eSMark Johnston uma_reclaim(int req)
50508355f576SJeff Roberson {
5051*aabe13f1SMark Johnston 	uma_reclaim_domain(req, UMA_ANYDOMAIN);
5052*aabe13f1SMark Johnston }
505344ec2b63SKonstantin Belousov 
5054*aabe13f1SMark Johnston void
5055*aabe13f1SMark Johnston uma_reclaim_domain(int req, int domain)
5056*aabe13f1SMark Johnston {
5057*aabe13f1SMark Johnston 	void *arg;
5058*aabe13f1SMark Johnston 
505986bbae32SJeff Roberson 	bucket_enable();
506008cfa56eSMark Johnston 
5061*aabe13f1SMark Johnston 	arg = (void *)(uintptr_t)domain;
5062*aabe13f1SMark Johnston 	sx_slock(&uma_reclaim_lock);
506308cfa56eSMark Johnston 	switch (req) {
506408cfa56eSMark Johnston 	case UMA_RECLAIM_TRIM:
5065*aabe13f1SMark Johnston 		zone_foreach(zone_trim, arg);
506608cfa56eSMark Johnston 		break;
506708cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN:
5068*aabe13f1SMark Johnston 		zone_foreach(zone_drain, arg);
5069*aabe13f1SMark Johnston 		break;
507008cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN_CPU:
5071*aabe13f1SMark Johnston 		zone_foreach(zone_drain, arg);
507208cfa56eSMark Johnston 		pcpu_cache_drain_safe(NULL);
5073*aabe13f1SMark Johnston 		zone_foreach(zone_drain, arg);
507408cfa56eSMark Johnston 		break;
507508cfa56eSMark Johnston 	default:
507608cfa56eSMark Johnston 		panic("unhandled reclamation request %d", req);
507708cfa56eSMark Johnston 	}
50780f9b7bf3SMark Johnston 
50798355f576SJeff Roberson 	/*
50808355f576SJeff Roberson 	 * Some slabs may have been freed but this zone will be visited early
50818355f576SJeff Roberson 	 * we visit again so that we can free pages that are empty once other
50828355f576SJeff Roberson 	 * zones are drained.  We have to do the same for buckets.
50838355f576SJeff Roberson 	 */
5084*aabe13f1SMark Johnston 	zone_drain(slabzones[0], arg);
5085*aabe13f1SMark Johnston 	zone_drain(slabzones[1], arg);
5086*aabe13f1SMark Johnston 	bucket_zone_drain(domain);
5087*aabe13f1SMark Johnston 	sx_sunlock(&uma_reclaim_lock);
50888355f576SJeff Roberson }
50898355f576SJeff Roberson 
50902e47807cSJeff Roberson static volatile int uma_reclaim_needed;
509144ec2b63SKonstantin Belousov 
509244ec2b63SKonstantin Belousov void
509344ec2b63SKonstantin Belousov uma_reclaim_wakeup(void)
509444ec2b63SKonstantin Belousov {
509544ec2b63SKonstantin Belousov 
50962e47807cSJeff Roberson 	if (atomic_fetchadd_int(&uma_reclaim_needed, 1) == 0)
50972e47807cSJeff Roberson 		wakeup(uma_reclaim);
509844ec2b63SKonstantin Belousov }
509944ec2b63SKonstantin Belousov 
510044ec2b63SKonstantin Belousov void
510144ec2b63SKonstantin Belousov uma_reclaim_worker(void *arg __unused)
510244ec2b63SKonstantin Belousov {
510344ec2b63SKonstantin Belousov 
510444ec2b63SKonstantin Belousov 	for (;;) {
510508cfa56eSMark Johnston 		sx_xlock(&uma_reclaim_lock);
5106200f8117SKonstantin Belousov 		while (atomic_load_int(&uma_reclaim_needed) == 0)
510708cfa56eSMark Johnston 			sx_sleep(uma_reclaim, &uma_reclaim_lock, PVM, "umarcl",
51082e47807cSJeff Roberson 			    hz);
510908cfa56eSMark Johnston 		sx_xunlock(&uma_reclaim_lock);
51109b43bc27SAndriy Gapon 		EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
511108cfa56eSMark Johnston 		uma_reclaim(UMA_RECLAIM_DRAIN_CPU);
5112200f8117SKonstantin Belousov 		atomic_store_int(&uma_reclaim_needed, 0);
51132e47807cSJeff Roberson 		/* Don't fire more than once per-second. */
51142e47807cSJeff Roberson 		pause("umarclslp", hz);
511544ec2b63SKonstantin Belousov 	}
511644ec2b63SKonstantin Belousov }
511744ec2b63SKonstantin Belousov 
5118663b416fSJohn Baldwin /* See uma.h */
511908cfa56eSMark Johnston void
512008cfa56eSMark Johnston uma_zone_reclaim(uma_zone_t zone, int req)
512108cfa56eSMark Johnston {
5122*aabe13f1SMark Johnston 	uma_zone_reclaim_domain(zone, req, UMA_ANYDOMAIN);
5123*aabe13f1SMark Johnston }
512408cfa56eSMark Johnston 
5125*aabe13f1SMark Johnston void
5126*aabe13f1SMark Johnston uma_zone_reclaim_domain(uma_zone_t zone, int req, int domain)
5127*aabe13f1SMark Johnston {
5128*aabe13f1SMark Johnston 	void *arg;
5129*aabe13f1SMark Johnston 
5130*aabe13f1SMark Johnston 	arg = (void *)(uintptr_t)domain;
513108cfa56eSMark Johnston 	switch (req) {
513208cfa56eSMark Johnston 	case UMA_RECLAIM_TRIM:
5133*aabe13f1SMark Johnston 		zone_trim(zone, arg);
513408cfa56eSMark Johnston 		break;
513508cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN:
5136*aabe13f1SMark Johnston 		zone_drain(zone, arg);
513708cfa56eSMark Johnston 		break;
513808cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN_CPU:
513908cfa56eSMark Johnston 		pcpu_cache_drain_safe(zone);
5140*aabe13f1SMark Johnston 		zone_drain(zone, arg);
514108cfa56eSMark Johnston 		break;
514208cfa56eSMark Johnston 	default:
514308cfa56eSMark Johnston 		panic("unhandled reclamation request %d", req);
514408cfa56eSMark Johnston 	}
514508cfa56eSMark Johnston }
514608cfa56eSMark Johnston 
514708cfa56eSMark Johnston /* See uma.h */
5148663b416fSJohn Baldwin int
5149663b416fSJohn Baldwin uma_zone_exhausted(uma_zone_t zone)
5150663b416fSJohn Baldwin {
5151663b416fSJohn Baldwin 
5152727c6918SJeff Roberson 	return (atomic_load_32(&zone->uz_sleepers) > 0);
51536c125b8dSMohan Srinivasan }
51546c125b8dSMohan Srinivasan 
51552e47807cSJeff Roberson unsigned long
51562e47807cSJeff Roberson uma_limit(void)
51572e47807cSJeff Roberson {
51582e47807cSJeff Roberson 
51592e47807cSJeff Roberson 	return (uma_kmem_limit);
51602e47807cSJeff Roberson }
51612e47807cSJeff Roberson 
51622e47807cSJeff Roberson void
51632e47807cSJeff Roberson uma_set_limit(unsigned long limit)
51642e47807cSJeff Roberson {
51652e47807cSJeff Roberson 
51662e47807cSJeff Roberson 	uma_kmem_limit = limit;
51672e47807cSJeff Roberson }
51682e47807cSJeff Roberson 
51692e47807cSJeff Roberson unsigned long
51702e47807cSJeff Roberson uma_size(void)
51712e47807cSJeff Roberson {
51722e47807cSJeff Roberson 
5173058f0f74SMark Johnston 	return (atomic_load_long(&uma_kmem_total));
5174ad5b0f5bSJeff Roberson }
5175ad5b0f5bSJeff Roberson 
5176ad5b0f5bSJeff Roberson long
5177ad5b0f5bSJeff Roberson uma_avail(void)
5178ad5b0f5bSJeff Roberson {
5179ad5b0f5bSJeff Roberson 
5180058f0f74SMark Johnston 	return (uma_kmem_limit - uma_size());
51812e47807cSJeff Roberson }
51822e47807cSJeff Roberson 
5183a0d4b0aeSRobert Watson #ifdef DDB
51848355f576SJeff Roberson /*
51857a52a97eSRobert Watson  * Generate statistics across both the zone and its per-cpu cache's.  Return
51867a52a97eSRobert Watson  * desired statistics if the pointer is non-NULL for that statistic.
51877a52a97eSRobert Watson  *
51887a52a97eSRobert Watson  * Note: does not update the zone statistics, as it can't safely clear the
51897a52a97eSRobert Watson  * per-CPU cache statistic.
51907a52a97eSRobert Watson  *
51917a52a97eSRobert Watson  */
51927a52a97eSRobert Watson static void
51930f9b7bf3SMark Johnston uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
5194c1685086SJeff Roberson     uint64_t *freesp, uint64_t *sleepsp, uint64_t *xdomainp)
51957a52a97eSRobert Watson {
51967a52a97eSRobert Watson 	uma_cache_t cache;
5197c1685086SJeff Roberson 	uint64_t allocs, frees, sleeps, xdomain;
51987a52a97eSRobert Watson 	int cachefree, cpu;
51997a52a97eSRobert Watson 
5200c1685086SJeff Roberson 	allocs = frees = sleeps = xdomain = 0;
52017a52a97eSRobert Watson 	cachefree = 0;
52023aa6d94eSJohn Baldwin 	CPU_FOREACH(cpu) {
52037a52a97eSRobert Watson 		cache = &z->uz_cpu[cpu];
5204376b1ba3SJeff Roberson 		cachefree += cache->uc_allocbucket.ucb_cnt;
5205376b1ba3SJeff Roberson 		cachefree += cache->uc_freebucket.ucb_cnt;
5206376b1ba3SJeff Roberson 		xdomain += cache->uc_crossbucket.ucb_cnt;
5207376b1ba3SJeff Roberson 		cachefree += cache->uc_crossbucket.ucb_cnt;
52087a52a97eSRobert Watson 		allocs += cache->uc_allocs;
52097a52a97eSRobert Watson 		frees += cache->uc_frees;
52107a52a97eSRobert Watson 	}
52112efcc8cbSGleb Smirnoff 	allocs += counter_u64_fetch(z->uz_allocs);
52122efcc8cbSGleb Smirnoff 	frees += counter_u64_fetch(z->uz_frees);
5213c6fd3e23SJeff Roberson 	xdomain += counter_u64_fetch(z->uz_xdomain);
5214bf965959SSean Bruno 	sleeps += z->uz_sleeps;
52157a52a97eSRobert Watson 	if (cachefreep != NULL)
52167a52a97eSRobert Watson 		*cachefreep = cachefree;
52177a52a97eSRobert Watson 	if (allocsp != NULL)
52187a52a97eSRobert Watson 		*allocsp = allocs;
52197a52a97eSRobert Watson 	if (freesp != NULL)
52207a52a97eSRobert Watson 		*freesp = frees;
5221bf965959SSean Bruno 	if (sleepsp != NULL)
5222bf965959SSean Bruno 		*sleepsp = sleeps;
5223c1685086SJeff Roberson 	if (xdomainp != NULL)
5224c1685086SJeff Roberson 		*xdomainp = xdomain;
52257a52a97eSRobert Watson }
5226a0d4b0aeSRobert Watson #endif /* DDB */
52277a52a97eSRobert Watson 
52287a52a97eSRobert Watson static int
52297a52a97eSRobert Watson sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
52307a52a97eSRobert Watson {
52317a52a97eSRobert Watson 	uma_keg_t kz;
52327a52a97eSRobert Watson 	uma_zone_t z;
52337a52a97eSRobert Watson 	int count;
52347a52a97eSRobert Watson 
52357a52a97eSRobert Watson 	count = 0;
5236111fbcd5SBryan Venteicher 	rw_rlock(&uma_rwlock);
52377a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
52387a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
52397a52a97eSRobert Watson 			count++;
52407a52a97eSRobert Watson 	}
5241b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link)
5242b47acb0aSGleb Smirnoff 		count++;
5243b47acb0aSGleb Smirnoff 
5244111fbcd5SBryan Venteicher 	rw_runlock(&uma_rwlock);
52457a52a97eSRobert Watson 	return (sysctl_handle_int(oidp, &count, 0, req));
52467a52a97eSRobert Watson }
52477a52a97eSRobert Watson 
5248b47acb0aSGleb Smirnoff static void
5249b47acb0aSGleb Smirnoff uma_vm_zone_stats(struct uma_type_header *uth, uma_zone_t z, struct sbuf *sbuf,
5250b47acb0aSGleb Smirnoff     struct uma_percpu_stat *ups, bool internal)
5251b47acb0aSGleb Smirnoff {
5252b47acb0aSGleb Smirnoff 	uma_zone_domain_t zdom;
5253b47acb0aSGleb Smirnoff 	uma_cache_t cache;
5254b47acb0aSGleb Smirnoff 	int i;
5255b47acb0aSGleb Smirnoff 
5256b47acb0aSGleb Smirnoff 	for (i = 0; i < vm_ndomains; i++) {
5257c6fd3e23SJeff Roberson 		zdom = ZDOM_GET(z, i);
5258b47acb0aSGleb Smirnoff 		uth->uth_zone_free += zdom->uzd_nitems;
5259b47acb0aSGleb Smirnoff 	}
5260b47acb0aSGleb Smirnoff 	uth->uth_allocs = counter_u64_fetch(z->uz_allocs);
5261b47acb0aSGleb Smirnoff 	uth->uth_frees = counter_u64_fetch(z->uz_frees);
5262b47acb0aSGleb Smirnoff 	uth->uth_fails = counter_u64_fetch(z->uz_fails);
5263c6fd3e23SJeff Roberson 	uth->uth_xdomain = counter_u64_fetch(z->uz_xdomain);
5264b47acb0aSGleb Smirnoff 	uth->uth_sleeps = z->uz_sleeps;
52651de9724eSMark Johnston 
5266b47acb0aSGleb Smirnoff 	for (i = 0; i < mp_maxid + 1; i++) {
5267b47acb0aSGleb Smirnoff 		bzero(&ups[i], sizeof(*ups));
5268b47acb0aSGleb Smirnoff 		if (internal || CPU_ABSENT(i))
5269b47acb0aSGleb Smirnoff 			continue;
5270b47acb0aSGleb Smirnoff 		cache = &z->uz_cpu[i];
5271376b1ba3SJeff Roberson 		ups[i].ups_cache_free += cache->uc_allocbucket.ucb_cnt;
5272376b1ba3SJeff Roberson 		ups[i].ups_cache_free += cache->uc_freebucket.ucb_cnt;
5273376b1ba3SJeff Roberson 		ups[i].ups_cache_free += cache->uc_crossbucket.ucb_cnt;
5274b47acb0aSGleb Smirnoff 		ups[i].ups_allocs = cache->uc_allocs;
5275b47acb0aSGleb Smirnoff 		ups[i].ups_frees = cache->uc_frees;
5276b47acb0aSGleb Smirnoff 	}
5277b47acb0aSGleb Smirnoff }
5278b47acb0aSGleb Smirnoff 
52797a52a97eSRobert Watson static int
52807a52a97eSRobert Watson sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
52817a52a97eSRobert Watson {
52827a52a97eSRobert Watson 	struct uma_stream_header ush;
52837a52a97eSRobert Watson 	struct uma_type_header uth;
528463b5d112SKonstantin Belousov 	struct uma_percpu_stat *ups;
52857a52a97eSRobert Watson 	struct sbuf sbuf;
52867a52a97eSRobert Watson 	uma_keg_t kz;
52877a52a97eSRobert Watson 	uma_zone_t z;
52884bd61e19SJeff Roberson 	uint64_t items;
52898b987a77SJeff Roberson 	uint32_t kfree, pages;
52904e657159SMatthew D Fleming 	int count, error, i;
52917a52a97eSRobert Watson 
529200f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
529300f0e671SMatthew D Fleming 	if (error != 0)
529400f0e671SMatthew D Fleming 		return (error);
52954e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
52961eafc078SIan Lepore 	sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
529763b5d112SKonstantin Belousov 	ups = malloc((mp_maxid + 1) * sizeof(*ups), M_TEMP, M_WAITOK);
52984e657159SMatthew D Fleming 
5299404a593eSMatthew D Fleming 	count = 0;
5300111fbcd5SBryan Venteicher 	rw_rlock(&uma_rwlock);
53017a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
53027a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
53037a52a97eSRobert Watson 			count++;
53047a52a97eSRobert Watson 	}
53057a52a97eSRobert Watson 
5306b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link)
5307b47acb0aSGleb Smirnoff 		count++;
5308b47acb0aSGleb Smirnoff 
53097a52a97eSRobert Watson 	/*
53107a52a97eSRobert Watson 	 * Insert stream header.
53117a52a97eSRobert Watson 	 */
53127a52a97eSRobert Watson 	bzero(&ush, sizeof(ush));
53137a52a97eSRobert Watson 	ush.ush_version = UMA_STREAM_VERSION;
5314ab3a57c0SRobert Watson 	ush.ush_maxcpus = (mp_maxid + 1);
53157a52a97eSRobert Watson 	ush.ush_count = count;
53164e657159SMatthew D Fleming 	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
53177a52a97eSRobert Watson 
53187a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
53198b987a77SJeff Roberson 		kfree = pages = 0;
53208b987a77SJeff Roberson 		for (i = 0; i < vm_ndomains; i++) {
53214ab3aee8SMark Johnston 			kfree += kz->uk_domain[i].ud_free_items;
53228b987a77SJeff Roberson 			pages += kz->uk_domain[i].ud_pages;
53238b987a77SJeff Roberson 		}
53247a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
53257a52a97eSRobert Watson 			bzero(&uth, sizeof(uth));
5326cbbb4a00SRobert Watson 			strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
53277a52a97eSRobert Watson 			uth.uth_align = kz->uk_align;
53287a52a97eSRobert Watson 			uth.uth_size = kz->uk_size;
53297a52a97eSRobert Watson 			uth.uth_rsize = kz->uk_rsize;
53304bd61e19SJeff Roberson 			if (z->uz_max_items > 0) {
53314bd61e19SJeff Roberson 				items = UZ_ITEMS_COUNT(z->uz_items);
53324bd61e19SJeff Roberson 				uth.uth_pages = (items / kz->uk_ipers) *
5333bb15d1c7SGleb Smirnoff 					kz->uk_ppera;
53344bd61e19SJeff Roberson 			} else
53358b987a77SJeff Roberson 				uth.uth_pages = pages;
5336f8c86a5fSGleb Smirnoff 			uth.uth_maxpages = (z->uz_max_items / kz->uk_ipers) *
5337bb15d1c7SGleb Smirnoff 			    kz->uk_ppera;
5338bb15d1c7SGleb Smirnoff 			uth.uth_limit = z->uz_max_items;
53398b987a77SJeff Roberson 			uth.uth_keg_free = kfree;
5340cbbb4a00SRobert Watson 
5341cbbb4a00SRobert Watson 			/*
5342cbbb4a00SRobert Watson 			 * A zone is secondary is it is not the first entry
5343cbbb4a00SRobert Watson 			 * on the keg's zone list.
5344cbbb4a00SRobert Watson 			 */
5345e20a199fSJeff Roberson 			if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
5346cbbb4a00SRobert Watson 			    (LIST_FIRST(&kz->uk_zones) != z))
5347cbbb4a00SRobert Watson 				uth.uth_zone_flags = UTH_ZONE_SECONDARY;
5348b47acb0aSGleb Smirnoff 			uma_vm_zone_stats(&uth, z, &sbuf, ups,
5349b47acb0aSGleb Smirnoff 			    kz->uk_flags & UMA_ZFLAG_INTERNAL);
535063b5d112SKonstantin Belousov 			(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
535163b5d112SKonstantin Belousov 			for (i = 0; i < mp_maxid + 1; i++)
535263b5d112SKonstantin Belousov 				(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
53537a52a97eSRobert Watson 		}
53547a52a97eSRobert Watson 	}
5355b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
5356b47acb0aSGleb Smirnoff 		bzero(&uth, sizeof(uth));
5357b47acb0aSGleb Smirnoff 		strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
5358b47acb0aSGleb Smirnoff 		uth.uth_size = z->uz_size;
5359b47acb0aSGleb Smirnoff 		uma_vm_zone_stats(&uth, z, &sbuf, ups, false);
5360b47acb0aSGleb Smirnoff 		(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
5361b47acb0aSGleb Smirnoff 		for (i = 0; i < mp_maxid + 1; i++)
5362b47acb0aSGleb Smirnoff 			(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
5363b47acb0aSGleb Smirnoff 	}
5364b47acb0aSGleb Smirnoff 
5365111fbcd5SBryan Venteicher 	rw_runlock(&uma_rwlock);
53664e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
53674e657159SMatthew D Fleming 	sbuf_delete(&sbuf);
536863b5d112SKonstantin Belousov 	free(ups, M_TEMP);
53697a52a97eSRobert Watson 	return (error);
53707a52a97eSRobert Watson }
537148c5777eSRobert Watson 
53720a5a3ccbSGleb Smirnoff int
53730a5a3ccbSGleb Smirnoff sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
53740a5a3ccbSGleb Smirnoff {
53750a5a3ccbSGleb Smirnoff 	uma_zone_t zone = *(uma_zone_t *)arg1;
537616be9f54SGleb Smirnoff 	int error, max;
53770a5a3ccbSGleb Smirnoff 
537816be9f54SGleb Smirnoff 	max = uma_zone_get_max(zone);
53790a5a3ccbSGleb Smirnoff 	error = sysctl_handle_int(oidp, &max, 0, req);
53800a5a3ccbSGleb Smirnoff 	if (error || !req->newptr)
53810a5a3ccbSGleb Smirnoff 		return (error);
53820a5a3ccbSGleb Smirnoff 
53830a5a3ccbSGleb Smirnoff 	uma_zone_set_max(zone, max);
53840a5a3ccbSGleb Smirnoff 
53850a5a3ccbSGleb Smirnoff 	return (0);
53860a5a3ccbSGleb Smirnoff }
53870a5a3ccbSGleb Smirnoff 
53880a5a3ccbSGleb Smirnoff int
53890a5a3ccbSGleb Smirnoff sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
53900a5a3ccbSGleb Smirnoff {
539120a4e154SJeff Roberson 	uma_zone_t zone;
53920a5a3ccbSGleb Smirnoff 	int cur;
53930a5a3ccbSGleb Smirnoff 
539420a4e154SJeff Roberson 	/*
539520a4e154SJeff Roberson 	 * Some callers want to add sysctls for global zones that
539620a4e154SJeff Roberson 	 * may not yet exist so they pass a pointer to a pointer.
539720a4e154SJeff Roberson 	 */
539820a4e154SJeff Roberson 	if (arg2 == 0)
539920a4e154SJeff Roberson 		zone = *(uma_zone_t *)arg1;
540020a4e154SJeff Roberson 	else
540120a4e154SJeff Roberson 		zone = arg1;
54020a5a3ccbSGleb Smirnoff 	cur = uma_zone_get_cur(zone);
54030a5a3ccbSGleb Smirnoff 	return (sysctl_handle_int(oidp, &cur, 0, req));
54040a5a3ccbSGleb Smirnoff }
54050a5a3ccbSGleb Smirnoff 
540620a4e154SJeff Roberson static int
540720a4e154SJeff Roberson sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS)
540820a4e154SJeff Roberson {
540920a4e154SJeff Roberson 	uma_zone_t zone = arg1;
541020a4e154SJeff Roberson 	uint64_t cur;
541120a4e154SJeff Roberson 
541220a4e154SJeff Roberson 	cur = uma_zone_get_allocs(zone);
541320a4e154SJeff Roberson 	return (sysctl_handle_64(oidp, &cur, 0, req));
541420a4e154SJeff Roberson }
541520a4e154SJeff Roberson 
541620a4e154SJeff Roberson static int
541720a4e154SJeff Roberson sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS)
541820a4e154SJeff Roberson {
541920a4e154SJeff Roberson 	uma_zone_t zone = arg1;
542020a4e154SJeff Roberson 	uint64_t cur;
542120a4e154SJeff Roberson 
542220a4e154SJeff Roberson 	cur = uma_zone_get_frees(zone);
542320a4e154SJeff Roberson 	return (sysctl_handle_64(oidp, &cur, 0, req));
542420a4e154SJeff Roberson }
542520a4e154SJeff Roberson 
54266d204a6aSRyan Libby static int
54276d204a6aSRyan Libby sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS)
54286d204a6aSRyan Libby {
54296d204a6aSRyan Libby 	struct sbuf sbuf;
54306d204a6aSRyan Libby 	uma_zone_t zone = arg1;
54316d204a6aSRyan Libby 	int error;
54326d204a6aSRyan Libby 
54336d204a6aSRyan Libby 	sbuf_new_for_sysctl(&sbuf, NULL, 0, req);
54346d204a6aSRyan Libby 	if (zone->uz_flags != 0)
54356d204a6aSRyan Libby 		sbuf_printf(&sbuf, "0x%b", zone->uz_flags, PRINT_UMA_ZFLAGS);
54366d204a6aSRyan Libby 	else
54376d204a6aSRyan Libby 		sbuf_printf(&sbuf, "0");
54386d204a6aSRyan Libby 	error = sbuf_finish(&sbuf);
54396d204a6aSRyan Libby 	sbuf_delete(&sbuf);
54406d204a6aSRyan Libby 
54416d204a6aSRyan Libby 	return (error);
54426d204a6aSRyan Libby }
54436d204a6aSRyan Libby 
5444f7af5015SRyan Libby static int
5445f7af5015SRyan Libby sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS)
5446f7af5015SRyan Libby {
5447f7af5015SRyan Libby 	uma_keg_t keg = arg1;
5448f7af5015SRyan Libby 	int avail, effpct, total;
5449f7af5015SRyan Libby 
5450f7af5015SRyan Libby 	total = keg->uk_ppera * PAGE_SIZE;
545154c5ae80SRyan Libby 	if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0)
54529b8db4d0SRyan Libby 		total += slabzone(keg->uk_ipers)->uz_keg->uk_rsize;
5453f7af5015SRyan Libby 	/*
5454f7af5015SRyan Libby 	 * We consider the client's requested size and alignment here, not the
5455f7af5015SRyan Libby 	 * real size determination uk_rsize, because we also adjust the real
5456f7af5015SRyan Libby 	 * size for internal implementation reasons (max bitset size).
5457f7af5015SRyan Libby 	 */
5458f7af5015SRyan Libby 	avail = keg->uk_ipers * roundup2(keg->uk_size, keg->uk_align + 1);
5459f7af5015SRyan Libby 	if ((keg->uk_flags & UMA_ZONE_PCPU) != 0)
5460f7af5015SRyan Libby 		avail *= mp_maxid + 1;
5461f7af5015SRyan Libby 	effpct = 100 * avail / total;
5462f7af5015SRyan Libby 	return (sysctl_handle_int(oidp, &effpct, 0, req));
5463f7af5015SRyan Libby }
5464f7af5015SRyan Libby 
54654bd61e19SJeff Roberson static int
54664bd61e19SJeff Roberson sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS)
54674bd61e19SJeff Roberson {
54684bd61e19SJeff Roberson 	uma_zone_t zone = arg1;
54694bd61e19SJeff Roberson 	uint64_t cur;
54704bd61e19SJeff Roberson 
54714bd61e19SJeff Roberson 	cur = UZ_ITEMS_COUNT(atomic_load_64(&zone->uz_items));
54724bd61e19SJeff Roberson 	return (sysctl_handle_64(oidp, &cur, 0, req));
54734bd61e19SJeff Roberson }
54744bd61e19SJeff Roberson 
54759542ea7bSGleb Smirnoff #ifdef INVARIANTS
54769542ea7bSGleb Smirnoff static uma_slab_t
54779542ea7bSGleb Smirnoff uma_dbg_getslab(uma_zone_t zone, void *item)
54789542ea7bSGleb Smirnoff {
54799542ea7bSGleb Smirnoff 	uma_slab_t slab;
54809542ea7bSGleb Smirnoff 	uma_keg_t keg;
54819542ea7bSGleb Smirnoff 	uint8_t *mem;
54829542ea7bSGleb Smirnoff 
54839542ea7bSGleb Smirnoff 	/*
54849542ea7bSGleb Smirnoff 	 * It is safe to return the slab here even though the
54859542ea7bSGleb Smirnoff 	 * zone is unlocked because the item's allocation state
54869542ea7bSGleb Smirnoff 	 * essentially holds a reference.
54879542ea7bSGleb Smirnoff 	 */
5488727c6918SJeff Roberson 	mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
5489727c6918SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0)
5490bb15d1c7SGleb Smirnoff 		return (NULL);
549154c5ae80SRyan Libby 	if (zone->uz_flags & UMA_ZFLAG_VTOSLAB)
5492727c6918SJeff Roberson 		return (vtoslab((vm_offset_t)mem));
5493bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
549454c5ae80SRyan Libby 	if ((keg->uk_flags & UMA_ZFLAG_HASH) == 0)
5495727c6918SJeff Roberson 		return ((uma_slab_t)(mem + keg->uk_pgoff));
54968b987a77SJeff Roberson 	KEG_LOCK(keg, 0);
54979542ea7bSGleb Smirnoff 	slab = hash_sfind(&keg->uk_hash, mem);
54988b987a77SJeff Roberson 	KEG_UNLOCK(keg, 0);
54999542ea7bSGleb Smirnoff 
55009542ea7bSGleb Smirnoff 	return (slab);
55019542ea7bSGleb Smirnoff }
55029542ea7bSGleb Smirnoff 
5503c5deaf04SGleb Smirnoff static bool
5504c5deaf04SGleb Smirnoff uma_dbg_zskip(uma_zone_t zone, void *mem)
5505c5deaf04SGleb Smirnoff {
5506c5deaf04SGleb Smirnoff 
5507727c6918SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0)
5508c5deaf04SGleb Smirnoff 		return (true);
5509c5deaf04SGleb Smirnoff 
5510bb15d1c7SGleb Smirnoff 	return (uma_dbg_kskip(zone->uz_keg, mem));
5511c5deaf04SGleb Smirnoff }
5512c5deaf04SGleb Smirnoff 
5513c5deaf04SGleb Smirnoff static bool
5514c5deaf04SGleb Smirnoff uma_dbg_kskip(uma_keg_t keg, void *mem)
5515c5deaf04SGleb Smirnoff {
5516c5deaf04SGleb Smirnoff 	uintptr_t idx;
5517c5deaf04SGleb Smirnoff 
5518c5deaf04SGleb Smirnoff 	if (dbg_divisor == 0)
5519c5deaf04SGleb Smirnoff 		return (true);
5520c5deaf04SGleb Smirnoff 
5521c5deaf04SGleb Smirnoff 	if (dbg_divisor == 1)
5522c5deaf04SGleb Smirnoff 		return (false);
5523c5deaf04SGleb Smirnoff 
5524c5deaf04SGleb Smirnoff 	idx = (uintptr_t)mem >> PAGE_SHIFT;
5525c5deaf04SGleb Smirnoff 	if (keg->uk_ipers > 1) {
5526c5deaf04SGleb Smirnoff 		idx *= keg->uk_ipers;
5527c5deaf04SGleb Smirnoff 		idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize;
5528c5deaf04SGleb Smirnoff 	}
5529c5deaf04SGleb Smirnoff 
5530c5deaf04SGleb Smirnoff 	if ((idx / dbg_divisor) * dbg_divisor != idx) {
5531c5deaf04SGleb Smirnoff 		counter_u64_add(uma_skip_cnt, 1);
5532c5deaf04SGleb Smirnoff 		return (true);
5533c5deaf04SGleb Smirnoff 	}
5534c5deaf04SGleb Smirnoff 	counter_u64_add(uma_dbg_cnt, 1);
5535c5deaf04SGleb Smirnoff 
5536c5deaf04SGleb Smirnoff 	return (false);
5537c5deaf04SGleb Smirnoff }
5538c5deaf04SGleb Smirnoff 
55399542ea7bSGleb Smirnoff /*
55409542ea7bSGleb Smirnoff  * Set up the slab's freei data such that uma_dbg_free can function.
55419542ea7bSGleb Smirnoff  *
55429542ea7bSGleb Smirnoff  */
55439542ea7bSGleb Smirnoff static void
55449542ea7bSGleb Smirnoff uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
55459542ea7bSGleb Smirnoff {
55469542ea7bSGleb Smirnoff 	uma_keg_t keg;
55479542ea7bSGleb Smirnoff 	int freei;
55489542ea7bSGleb Smirnoff 
55499542ea7bSGleb Smirnoff 	if (slab == NULL) {
55509542ea7bSGleb Smirnoff 		slab = uma_dbg_getslab(zone, item);
55519542ea7bSGleb Smirnoff 		if (slab == NULL)
5552952c8964SMark Johnston 			panic("uma: item %p did not belong to zone %s",
55539542ea7bSGleb Smirnoff 			    item, zone->uz_name);
55549542ea7bSGleb Smirnoff 	}
5555584061b4SJeff Roberson 	keg = zone->uz_keg;
55561e0701e1SJeff Roberson 	freei = slab_item_index(slab, keg, item);
55579542ea7bSGleb Smirnoff 
5558942951baSRyan Libby 	if (BIT_TEST_SET_ATOMIC(keg->uk_ipers, freei,
5559942951baSRyan Libby 	    slab_dbg_bits(slab, keg)))
5560952c8964SMark Johnston 		panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)",
55619542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
55629542ea7bSGleb Smirnoff }
55639542ea7bSGleb Smirnoff 
55649542ea7bSGleb Smirnoff /*
55659542ea7bSGleb Smirnoff  * Verifies freed addresses.  Checks for alignment, valid slab membership
55669542ea7bSGleb Smirnoff  * and duplicate frees.
55679542ea7bSGleb Smirnoff  *
55689542ea7bSGleb Smirnoff  */
55699542ea7bSGleb Smirnoff static void
55709542ea7bSGleb Smirnoff uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
55719542ea7bSGleb Smirnoff {
55729542ea7bSGleb Smirnoff 	uma_keg_t keg;
55739542ea7bSGleb Smirnoff 	int freei;
55749542ea7bSGleb Smirnoff 
55759542ea7bSGleb Smirnoff 	if (slab == NULL) {
55769542ea7bSGleb Smirnoff 		slab = uma_dbg_getslab(zone, item);
55779542ea7bSGleb Smirnoff 		if (slab == NULL)
5578952c8964SMark Johnston 			panic("uma: Freed item %p did not belong to zone %s",
55799542ea7bSGleb Smirnoff 			    item, zone->uz_name);
55809542ea7bSGleb Smirnoff 	}
5581584061b4SJeff Roberson 	keg = zone->uz_keg;
55821e0701e1SJeff Roberson 	freei = slab_item_index(slab, keg, item);
55839542ea7bSGleb Smirnoff 
55849542ea7bSGleb Smirnoff 	if (freei >= keg->uk_ipers)
5585952c8964SMark Johnston 		panic("Invalid free of %p from zone %p(%s) slab %p(%d)",
55869542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
55879542ea7bSGleb Smirnoff 
55881e0701e1SJeff Roberson 	if (slab_item(slab, keg, freei) != item)
5589952c8964SMark Johnston 		panic("Unaligned free of %p from zone %p(%s) slab %p(%d)",
55909542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
55919542ea7bSGleb Smirnoff 
5592942951baSRyan Libby 	if (!BIT_TEST_CLR_ATOMIC(keg->uk_ipers, freei,
5593942951baSRyan Libby 	    slab_dbg_bits(slab, keg)))
5594952c8964SMark Johnston 		panic("Duplicate free of %p from zone %p(%s) slab %p(%d)",
55959542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
55969542ea7bSGleb Smirnoff }
55979542ea7bSGleb Smirnoff #endif /* INVARIANTS */
55989542ea7bSGleb Smirnoff 
559948c5777eSRobert Watson #ifdef DDB
560046d70077SConrad Meyer static int64_t
560146d70077SConrad Meyer get_uma_stats(uma_keg_t kz, uma_zone_t z, uint64_t *allocs, uint64_t *used,
56020223790fSConrad Meyer     uint64_t *sleeps, long *cachefree, uint64_t *xdomain)
560348c5777eSRobert Watson {
560446d70077SConrad Meyer 	uint64_t frees;
56050f9b7bf3SMark Johnston 	int i;
560648c5777eSRobert Watson 
560748c5777eSRobert Watson 	if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
560846d70077SConrad Meyer 		*allocs = counter_u64_fetch(z->uz_allocs);
56092efcc8cbSGleb Smirnoff 		frees = counter_u64_fetch(z->uz_frees);
561046d70077SConrad Meyer 		*sleeps = z->uz_sleeps;
561146d70077SConrad Meyer 		*cachefree = 0;
561246d70077SConrad Meyer 		*xdomain = 0;
561348c5777eSRobert Watson 	} else
561446d70077SConrad Meyer 		uma_zone_sumstat(z, cachefree, allocs, &frees, sleeps,
561546d70077SConrad Meyer 		    xdomain);
56168b987a77SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
5617c6fd3e23SJeff Roberson 		*cachefree += ZDOM_GET(z, i)->uzd_nitems;
5618e20a199fSJeff Roberson 		if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
561948c5777eSRobert Watson 		    (LIST_FIRST(&kz->uk_zones) != z)))
56204ab3aee8SMark Johnston 			*cachefree += kz->uk_domain[i].ud_free_items;
56218b987a77SJeff Roberson 	}
562246d70077SConrad Meyer 	*used = *allocs - frees;
562346d70077SConrad Meyer 	return (((int64_t)*used + *cachefree) * kz->uk_size);
562446d70077SConrad Meyer }
56250f9b7bf3SMark Johnston 
562646d70077SConrad Meyer DB_SHOW_COMMAND(uma, db_show_uma)
562746d70077SConrad Meyer {
562846d70077SConrad Meyer 	const char *fmt_hdr, *fmt_entry;
562946d70077SConrad Meyer 	uma_keg_t kz;
563046d70077SConrad Meyer 	uma_zone_t z;
563146d70077SConrad Meyer 	uint64_t allocs, used, sleeps, xdomain;
563246d70077SConrad Meyer 	long cachefree;
563346d70077SConrad Meyer 	/* variables for sorting */
563446d70077SConrad Meyer 	uma_keg_t cur_keg;
563546d70077SConrad Meyer 	uma_zone_t cur_zone, last_zone;
563646d70077SConrad Meyer 	int64_t cur_size, last_size, size;
563746d70077SConrad Meyer 	int ties;
563846d70077SConrad Meyer 
563946d70077SConrad Meyer 	/* /i option produces machine-parseable CSV output */
564046d70077SConrad Meyer 	if (modif[0] == 'i') {
564146d70077SConrad Meyer 		fmt_hdr = "%s,%s,%s,%s,%s,%s,%s,%s,%s\n";
564246d70077SConrad Meyer 		fmt_entry = "\"%s\",%ju,%jd,%ld,%ju,%ju,%u,%jd,%ju\n";
564346d70077SConrad Meyer 	} else {
564446d70077SConrad Meyer 		fmt_hdr = "%18s %6s %7s %7s %11s %7s %7s %10s %8s\n";
564546d70077SConrad Meyer 		fmt_entry = "%18s %6ju %7jd %7ld %11ju %7ju %7u %10jd %8ju\n";
564646d70077SConrad Meyer 	}
564746d70077SConrad Meyer 
564846d70077SConrad Meyer 	db_printf(fmt_hdr, "Zone", "Size", "Used", "Free", "Requests",
564946d70077SConrad Meyer 	    "Sleeps", "Bucket", "Total Mem", "XFree");
565046d70077SConrad Meyer 
565146d70077SConrad Meyer 	/* Sort the zones with largest size first. */
565246d70077SConrad Meyer 	last_zone = NULL;
565346d70077SConrad Meyer 	last_size = INT64_MAX;
565446d70077SConrad Meyer 	for (;;) {
565546d70077SConrad Meyer 		cur_zone = NULL;
565646d70077SConrad Meyer 		cur_size = -1;
565746d70077SConrad Meyer 		ties = 0;
565846d70077SConrad Meyer 		LIST_FOREACH(kz, &uma_kegs, uk_link) {
565946d70077SConrad Meyer 			LIST_FOREACH(z, &kz->uk_zones, uz_link) {
566046d70077SConrad Meyer 				/*
566146d70077SConrad Meyer 				 * In the case of size ties, print out zones
566246d70077SConrad Meyer 				 * in the order they are encountered.  That is,
566346d70077SConrad Meyer 				 * when we encounter the most recently output
566446d70077SConrad Meyer 				 * zone, we have already printed all preceding
566546d70077SConrad Meyer 				 * ties, and we must print all following ties.
566646d70077SConrad Meyer 				 */
566746d70077SConrad Meyer 				if (z == last_zone) {
566846d70077SConrad Meyer 					ties = 1;
566946d70077SConrad Meyer 					continue;
567046d70077SConrad Meyer 				}
567146d70077SConrad Meyer 				size = get_uma_stats(kz, z, &allocs, &used,
567246d70077SConrad Meyer 				    &sleeps, &cachefree, &xdomain);
567346d70077SConrad Meyer 				if (size > cur_size && size < last_size + ties)
567446d70077SConrad Meyer 				{
567546d70077SConrad Meyer 					cur_size = size;
567646d70077SConrad Meyer 					cur_zone = z;
567746d70077SConrad Meyer 					cur_keg = kz;
567846d70077SConrad Meyer 				}
567946d70077SConrad Meyer 			}
568046d70077SConrad Meyer 		}
568146d70077SConrad Meyer 		if (cur_zone == NULL)
568246d70077SConrad Meyer 			break;
568346d70077SConrad Meyer 
568446d70077SConrad Meyer 		size = get_uma_stats(cur_keg, cur_zone, &allocs, &used,
568546d70077SConrad Meyer 		    &sleeps, &cachefree, &xdomain);
568646d70077SConrad Meyer 		db_printf(fmt_entry, cur_zone->uz_name,
568746d70077SConrad Meyer 		    (uintmax_t)cur_keg->uk_size, (intmax_t)used, cachefree,
568846d70077SConrad Meyer 		    (uintmax_t)allocs, (uintmax_t)sleeps,
568920a4e154SJeff Roberson 		    (unsigned)cur_zone->uz_bucket_size, (intmax_t)size,
569020a4e154SJeff Roberson 		    xdomain);
569146d70077SConrad Meyer 
5692687c94aaSJohn Baldwin 		if (db_pager_quit)
5693687c94aaSJohn Baldwin 			return;
569446d70077SConrad Meyer 		last_zone = cur_zone;
569546d70077SConrad Meyer 		last_size = cur_size;
569648c5777eSRobert Watson 	}
569748c5777eSRobert Watson }
569803175483SAlexander Motin 
569903175483SAlexander Motin DB_SHOW_COMMAND(umacache, db_show_umacache)
570003175483SAlexander Motin {
570103175483SAlexander Motin 	uma_zone_t z;
5702ab3185d1SJeff Roberson 	uint64_t allocs, frees;
57030f9b7bf3SMark Johnston 	long cachefree;
57040f9b7bf3SMark Johnston 	int i;
570503175483SAlexander Motin 
570603175483SAlexander Motin 	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
570703175483SAlexander Motin 	    "Requests", "Bucket");
570803175483SAlexander Motin 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
5709c1685086SJeff Roberson 		uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL, NULL);
57100f9b7bf3SMark Johnston 		for (i = 0; i < vm_ndomains; i++)
5711c6fd3e23SJeff Roberson 			cachefree += ZDOM_GET(z, i)->uzd_nitems;
57120f9b7bf3SMark Johnston 		db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
571303175483SAlexander Motin 		    z->uz_name, (uintmax_t)z->uz_size,
571403175483SAlexander Motin 		    (intmax_t)(allocs - frees), cachefree,
571520a4e154SJeff Roberson 		    (uintmax_t)allocs, z->uz_bucket_size);
571603175483SAlexander Motin 		if (db_pager_quit)
571703175483SAlexander Motin 			return;
571803175483SAlexander Motin 	}
571903175483SAlexander Motin }
57209542ea7bSGleb Smirnoff #endif	/* DDB */
5721