xref: /freebsd/sys/vm/uma_core.c (revision f506d5af50fccc37f5aa9fe090e9a0d5f05506c8)
160727d8bSWarner Losh /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
3fe267a55SPedro F. Giffuni  *
4584061b4SJeff Roberson  * Copyright (c) 2002-2019 Jeffrey Roberson <jeff@FreeBSD.org>
508ecce74SRobert Watson  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6ae4e9636SRobert Watson  * Copyright (c) 2004-2006 Robert N. M. Watson
708ecce74SRobert Watson  * All rights reserved.
88355f576SJeff Roberson  *
98355f576SJeff Roberson  * Redistribution and use in source and binary forms, with or without
108355f576SJeff Roberson  * modification, are permitted provided that the following conditions
118355f576SJeff Roberson  * are met:
128355f576SJeff Roberson  * 1. Redistributions of source code must retain the above copyright
138355f576SJeff Roberson  *    notice unmodified, this list of conditions, and the following
148355f576SJeff Roberson  *    disclaimer.
158355f576SJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
168355f576SJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
178355f576SJeff Roberson  *    documentation and/or other materials provided with the distribution.
188355f576SJeff Roberson  *
198355f576SJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
208355f576SJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
218355f576SJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
228355f576SJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
238355f576SJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
248355f576SJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
258355f576SJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
268355f576SJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
278355f576SJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
288355f576SJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
298355f576SJeff Roberson  */
308355f576SJeff Roberson 
318355f576SJeff Roberson /*
328355f576SJeff Roberson  * uma_core.c  Implementation of the Universal Memory allocator
338355f576SJeff Roberson  *
348355f576SJeff Roberson  * This allocator is intended to replace the multitude of similar object caches
358355f576SJeff Roberson  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
36763df3ecSPedro F. Giffuni  * efficient.  A primary design goal is to return unused memory to the rest of
378355f576SJeff Roberson  * the system.  This will make the system as a whole more flexible due to the
388355f576SJeff Roberson  * ability to move memory to subsystems which most need it instead of leaving
398355f576SJeff Roberson  * pools of reserved memory unused.
408355f576SJeff Roberson  *
418355f576SJeff Roberson  * The basic ideas stem from similar slab/zone based allocators whose algorithms
428355f576SJeff Roberson  * are well known.
438355f576SJeff Roberson  *
448355f576SJeff Roberson  */
458355f576SJeff Roberson 
468355f576SJeff Roberson /*
478355f576SJeff Roberson  * TODO:
488355f576SJeff Roberson  *	- Improve memory usage for large allocations
498355f576SJeff Roberson  *	- Investigate cache size adjustments
508355f576SJeff Roberson  */
518355f576SJeff Roberson 
52874651b1SDavid E. O'Brien #include <sys/cdefs.h>
5348c5777eSRobert Watson #include "opt_ddb.h"
548355f576SJeff Roberson #include "opt_param.h"
558d689e04SGleb Smirnoff #include "opt_vm.h"
5648c5777eSRobert Watson 
578355f576SJeff Roberson #include <sys/param.h>
588355f576SJeff Roberson #include <sys/systm.h>
5909c8cb71SMark Johnston #include <sys/asan.h>
60ef72505eSJeff Roberson #include <sys/bitset.h>
61194a979eSMark Johnston #include <sys/domainset.h>
629b43bc27SAndriy Gapon #include <sys/eventhandler.h>
638355f576SJeff Roberson #include <sys/kernel.h>
648355f576SJeff Roberson #include <sys/types.h>
65ad5b0f5bSJeff Roberson #include <sys/limits.h>
668355f576SJeff Roberson #include <sys/queue.h>
678355f576SJeff Roberson #include <sys/malloc.h>
683659f747SRobert Watson #include <sys/ktr.h>
698355f576SJeff Roberson #include <sys/lock.h>
7010094910SMark Johnston #include <sys/msan.h>
718355f576SJeff Roberson #include <sys/mutex.h>
724c1cc01cSJohn Baldwin #include <sys/proc.h>
7310cb2424SMark Murray #include <sys/random.h>
7489f6b863SAttilio Rao #include <sys/rwlock.h>
757a52a97eSRobert Watson #include <sys/sbuf.h>
76a2de44abSAlexander Motin #include <sys/sched.h>
774bd61e19SJeff Roberson #include <sys/sleepqueue.h>
788355f576SJeff Roberson #include <sys/smp.h>
79d4665eaaSJeff Roberson #include <sys/smr.h>
8010094910SMark Johnston #include <sys/sysctl.h>
81e60b2fcbSGleb Smirnoff #include <sys/taskqueue.h>
8286bbae32SJeff Roberson #include <sys/vmmeter.h>
8386bbae32SJeff Roberson 
848355f576SJeff Roberson #include <vm/vm.h>
856f3b523cSKonstantin Belousov #include <vm/vm_param.h>
86194a979eSMark Johnston #include <vm/vm_domainset.h>
878355f576SJeff Roberson #include <vm/vm_object.h>
888355f576SJeff Roberson #include <vm/vm_page.h>
89a4915c21SAttilio Rao #include <vm/vm_pageout.h>
90ab3185d1SJeff Roberson #include <vm/vm_phys.h>
9130c5525bSAndrew Gallatin #include <vm/vm_pagequeue.h>
928355f576SJeff Roberson #include <vm/vm_map.h>
938355f576SJeff Roberson #include <vm/vm_kern.h>
948355f576SJeff Roberson #include <vm/vm_extern.h>
956f3b523cSKonstantin Belousov #include <vm/vm_dumpset.h>
968355f576SJeff Roberson #include <vm/uma.h>
978355f576SJeff Roberson #include <vm/uma_int.h>
98639c9550SJeff Roberson #include <vm/uma_dbg.h>
998355f576SJeff Roberson 
10048c5777eSRobert Watson #include <ddb/ddb.h>
10148c5777eSRobert Watson 
1028d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
1038d689e04SGleb Smirnoff #include <vm/memguard.h>
1048d689e04SGleb Smirnoff #endif
1058d689e04SGleb Smirnoff 
106a81c400eSJeff Roberson #include <machine/md_var.h>
107a81c400eSJeff Roberson 
108d4665eaaSJeff Roberson #ifdef INVARIANTS
109d4665eaaSJeff Roberson #define	UMA_ALWAYS_CTORDTOR	1
110d4665eaaSJeff Roberson #else
111d4665eaaSJeff Roberson #define	UMA_ALWAYS_CTORDTOR	0
112d4665eaaSJeff Roberson #endif
113d4665eaaSJeff Roberson 
1148355f576SJeff Roberson /*
115ab3185d1SJeff Roberson  * This is the zone and keg from which all zones are spawned.
1168355f576SJeff Roberson  */
117ab3185d1SJeff Roberson static uma_zone_t kegs;
118ab3185d1SJeff Roberson static uma_zone_t zones;
1198355f576SJeff Roberson 
1209b8db4d0SRyan Libby /*
12154007ce8SMark Johnston  * On INVARIANTS builds, the slab contains a second bitset of the same size,
12254007ce8SMark Johnston  * "dbg_bits", which is laid out immediately after us_free.
12354007ce8SMark Johnston  */
12454007ce8SMark Johnston #ifdef INVARIANTS
12554007ce8SMark Johnston #define	SLAB_BITSETS	2
12654007ce8SMark Johnston #else
12754007ce8SMark Johnston #define	SLAB_BITSETS	1
12854007ce8SMark Johnston #endif
12954007ce8SMark Johnston 
13054007ce8SMark Johnston /*
1319b8db4d0SRyan Libby  * These are the two zones from which all offpage uma_slab_ts are allocated.
1329b8db4d0SRyan Libby  *
1339b8db4d0SRyan Libby  * One zone is for slab headers that can represent a larger number of items,
1349b8db4d0SRyan Libby  * making the slabs themselves more efficient, and the other zone is for
1359b8db4d0SRyan Libby  * headers that are smaller and represent fewer items, making the headers more
1369b8db4d0SRyan Libby  * efficient.
1379b8db4d0SRyan Libby  */
1389b8db4d0SRyan Libby #define	SLABZONE_SIZE(setsize)					\
1399b8db4d0SRyan Libby     (sizeof(struct uma_hash_slab) + BITSET_SIZE(setsize) * SLAB_BITSETS)
1409b8db4d0SRyan Libby #define	SLABZONE0_SETSIZE	(PAGE_SIZE / 16)
1419b8db4d0SRyan Libby #define	SLABZONE1_SETSIZE	SLAB_MAX_SETSIZE
1429b8db4d0SRyan Libby #define	SLABZONE0_SIZE	SLABZONE_SIZE(SLABZONE0_SETSIZE)
1439b8db4d0SRyan Libby #define	SLABZONE1_SIZE	SLABZONE_SIZE(SLABZONE1_SETSIZE)
1449b8db4d0SRyan Libby static uma_zone_t slabzones[2];
1458355f576SJeff Roberson 
1468355f576SJeff Roberson /*
1478355f576SJeff Roberson  * The initial hash tables come out of this zone so they can be allocated
1488355f576SJeff Roberson  * prior to malloc coming up.
1498355f576SJeff Roberson  */
1508355f576SJeff Roberson static uma_zone_t hashzone;
1518355f576SJeff Roberson 
1521e319f6dSRobert Watson /* The boot-time adjusted value for cache line alignment. */
1533d8f548bSOlivier Certner static unsigned int uma_cache_align_mask = 64 - 1;
1541e319f6dSRobert Watson 
155961647dfSJeff Roberson static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
15620a4e154SJeff Roberson static MALLOC_DEFINE(M_UMA, "UMA", "UMA Misc");
157961647dfSJeff Roberson 
1588355f576SJeff Roberson /*
15986bbae32SJeff Roberson  * Are we allowed to allocate buckets?
16086bbae32SJeff Roberson  */
16186bbae32SJeff Roberson static int bucketdisable = 1;
16286bbae32SJeff Roberson 
163099a0e58SBosko Milekic /* Linked list of all kegs in the system */
16413e403fdSAntoine Brodin static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
1658355f576SJeff Roberson 
16603175483SAlexander Motin /* Linked list of all cache-only zones in the system */
16703175483SAlexander Motin static LIST_HEAD(,uma_zone) uma_cachezones =
16803175483SAlexander Motin     LIST_HEAD_INITIALIZER(uma_cachezones);
16903175483SAlexander Motin 
170aabe13f1SMark Johnston /*
171aabe13f1SMark Johnston  * Mutex for global lists: uma_kegs, uma_cachezones, and the per-keg list of
172aabe13f1SMark Johnston  * zones.
173aabe13f1SMark Johnston  */
174fe933c1dSMateusz Guzik static struct rwlock_padalign __exclusive_cache_line uma_rwlock;
1758355f576SJeff Roberson 
176aabe13f1SMark Johnston static struct sx uma_reclaim_lock;
177aabe13f1SMark Johnston 
178ac0a6fd0SGleb Smirnoff /*
179a81c400eSJeff Roberson  * First available virual address for boot time allocations.
180ac0a6fd0SGleb Smirnoff  */
181a81c400eSJeff Roberson static vm_offset_t bootstart;
182a81c400eSJeff Roberson static vm_offset_t bootmem;
1838355f576SJeff Roberson 
184fbd95859SMark Johnston /*
185fbd95859SMark Johnston  * kmem soft limit, initialized by uma_set_limit().  Ensure that early
186fbd95859SMark Johnston  * allocations don't trigger a wakeup of the reclaim thread.
187fbd95859SMark Johnston  */
1886d6a03d7SJeff Roberson unsigned long uma_kmem_limit = LONG_MAX;
189fbd95859SMark Johnston SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_limit, CTLFLAG_RD, &uma_kmem_limit, 0,
190fbd95859SMark Johnston     "UMA kernel memory soft limit");
1916d6a03d7SJeff Roberson unsigned long uma_kmem_total;
192fbd95859SMark Johnston SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD, &uma_kmem_total, 0,
193fbd95859SMark Johnston     "UMA kernel memory usage");
1942e47807cSJeff Roberson 
1958355f576SJeff Roberson /* Is the VM done starting up? */
196860bb7a0SMark Johnston static enum {
197860bb7a0SMark Johnston 	BOOT_COLD,
198a81c400eSJeff Roberson 	BOOT_KVA,
199dc2b3205SMark Johnston 	BOOT_PCPU,
200860bb7a0SMark Johnston 	BOOT_RUNNING,
201860bb7a0SMark Johnston 	BOOT_SHUTDOWN,
202860bb7a0SMark Johnston } booted = BOOT_COLD;
2038355f576SJeff Roberson 
204ef72505eSJeff Roberson /*
2059643769aSJeff Roberson  * This is the handle used to schedule events that need to happen
2069643769aSJeff Roberson  * outside of the allocation fast path.
2079643769aSJeff Roberson  */
20893cd28eaSMark Johnston static struct timeout_task uma_timeout_task;
2099643769aSJeff Roberson #define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
2108355f576SJeff Roberson 
2118355f576SJeff Roberson /*
2128355f576SJeff Roberson  * This structure is passed as the zone ctor arg so that I don't have to create
2138355f576SJeff Roberson  * a special allocation function just for zones.
2148355f576SJeff Roberson  */
2158355f576SJeff Roberson struct uma_zctor_args {
216bb196eb4SMatthew D Fleming 	const char *name;
217c3bdc05fSAndrew R. Reiter 	size_t size;
2188355f576SJeff Roberson 	uma_ctor ctor;
2198355f576SJeff Roberson 	uma_dtor dtor;
2208355f576SJeff Roberson 	uma_init uminit;
2218355f576SJeff Roberson 	uma_fini fini;
2220095a784SJeff Roberson 	uma_import import;
2230095a784SJeff Roberson 	uma_release release;
2240095a784SJeff Roberson 	void *arg;
225099a0e58SBosko Milekic 	uma_keg_t keg;
226099a0e58SBosko Milekic 	int align;
22785dcf349SGleb Smirnoff 	uint32_t flags;
228099a0e58SBosko Milekic };
229099a0e58SBosko Milekic 
230099a0e58SBosko Milekic struct uma_kctor_args {
231099a0e58SBosko Milekic 	uma_zone_t zone;
232099a0e58SBosko Milekic 	size_t size;
233099a0e58SBosko Milekic 	uma_init uminit;
234099a0e58SBosko Milekic 	uma_fini fini;
2358355f576SJeff Roberson 	int align;
23685dcf349SGleb Smirnoff 	uint32_t flags;
2378355f576SJeff Roberson };
2388355f576SJeff Roberson 
239cae33c14SJeff Roberson struct uma_bucket_zone {
240cae33c14SJeff Roberson 	uma_zone_t	ubz_zone;
241eaa17d42SRyan Libby 	const char	*ubz_name;
242fc03d22bSJeff Roberson 	int		ubz_entries;	/* Number of items it can hold. */
243fc03d22bSJeff Roberson 	int		ubz_maxsize;	/* Maximum allocation size per-item. */
244cae33c14SJeff Roberson };
245cae33c14SJeff Roberson 
246f9d27e75SRobert Watson /*
247fc03d22bSJeff Roberson  * Compute the actual number of bucket entries to pack them in power
248fc03d22bSJeff Roberson  * of two sizes for more efficient space utilization.
249f9d27e75SRobert Watson  */
250fc03d22bSJeff Roberson #define	BUCKET_SIZE(n)						\
251fc03d22bSJeff Roberson     (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
252fc03d22bSJeff Roberson 
2531aa6c758SAlexander Motin #define	BUCKET_MAX	BUCKET_SIZE(256)
254fc03d22bSJeff Roberson 
255fc03d22bSJeff Roberson struct uma_bucket_zone bucket_zones[] = {
256e84130a0SJeff Roberson 	/* Literal bucket sizes. */
257e84130a0SJeff Roberson 	{ NULL, "2 Bucket", 2, 4096 },
258e84130a0SJeff Roberson 	{ NULL, "4 Bucket", 4, 3072 },
259e84130a0SJeff Roberson 	{ NULL, "8 Bucket", 8, 2048 },
260e84130a0SJeff Roberson 	{ NULL, "16 Bucket", 16, 1024 },
261e84130a0SJeff Roberson 	/* Rounded down power of 2 sizes for efficiency. */
262fc03d22bSJeff Roberson 	{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
263fc03d22bSJeff Roberson 	{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
264fc03d22bSJeff Roberson 	{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
2651aa6c758SAlexander Motin 	{ NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
266fc03d22bSJeff Roberson 	{ NULL, NULL, 0}
267fc03d22bSJeff Roberson };
268cae33c14SJeff Roberson 
2692019094aSRobert Watson /*
2702019094aSRobert Watson  * Flags and enumerations to be passed to internal functions.
2712019094aSRobert Watson  */
272bb15d1c7SGleb Smirnoff enum zfreeskip {
273bb15d1c7SGleb Smirnoff 	SKIP_NONE =	0,
274bb15d1c7SGleb Smirnoff 	SKIP_CNT =	0x00000001,
275bb15d1c7SGleb Smirnoff 	SKIP_DTOR =	0x00010000,
276bb15d1c7SGleb Smirnoff 	SKIP_FINI =	0x00020000,
277bb15d1c7SGleb Smirnoff };
278b23f72e9SBrian Feldman 
2798355f576SJeff Roberson /* Prototypes.. */
2808355f576SJeff Roberson 
281a81c400eSJeff Roberson void	uma_startup1(vm_offset_t);
282f4bef67cSGleb Smirnoff void	uma_startup2(void);
283f4bef67cSGleb Smirnoff 
284ab3185d1SJeff Roberson static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
285ab3185d1SJeff Roberson static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
286ab3059a8SMatt Macy static void *pcpu_page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
287ab3185d1SJeff Roberson static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
288ec0d8280SRyan Libby static void *contig_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
289f2c2231eSRyan Stone static void page_free(void *, vm_size_t, uint8_t);
290ab3059a8SMatt Macy static void pcpu_page_free(void *, vm_size_t, uint8_t);
29186220393SMark Johnston static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int);
2929643769aSJeff Roberson static void cache_drain(uma_zone_t);
2938355f576SJeff Roberson static void bucket_drain(uma_zone_t, uma_bucket_t);
294aabe13f1SMark Johnston static void bucket_cache_reclaim(uma_zone_t zone, bool, int);
2952760658bSAlexander Motin static bool bucket_cache_reclaim_domain(uma_zone_t, bool, bool, int);
296b23f72e9SBrian Feldman static int keg_ctor(void *, int, void *, int);
297099a0e58SBosko Milekic static void keg_dtor(void *, int, void *);
2982760658bSAlexander Motin static void keg_drain(uma_keg_t keg, int domain);
299b23f72e9SBrian Feldman static int zone_ctor(void *, int, void *, int);
3009c2cd7e5SJeff Roberson static void zone_dtor(void *, int, void *);
301d4665eaaSJeff Roberson static inline void item_dtor(uma_zone_t zone, void *item, int size,
302d4665eaaSJeff Roberson     void *udata, enum zfreeskip skip);
303b23f72e9SBrian Feldman static int zero_init(void *, int, int);
304c6fd3e23SJeff Roberson static void zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
305c6fd3e23SJeff Roberson     int itemdomain, bool ws);
30620a4e154SJeff Roberson static void zone_foreach(void (*zfunc)(uma_zone_t, void *), void *);
307a81c400eSJeff Roberson static void zone_foreach_unlocked(void (*zfunc)(uma_zone_t, void *), void *);
30820a4e154SJeff Roberson static void zone_timeout(uma_zone_t zone, void *);
3093b2f2cb8SAlexander Motin static int hash_alloc(struct uma_hash *, u_int);
3100aef6126SJeff Roberson static int hash_expand(struct uma_hash *, struct uma_hash *);
3110aef6126SJeff Roberson static void hash_free(struct uma_hash *hash);
31293cd28eaSMark Johnston static void uma_timeout(void *, int);
313860bb7a0SMark Johnston static void uma_shutdown(void);
314ab3185d1SJeff Roberson static void *zone_alloc_item(uma_zone_t, void *, int, int);
3150095a784SJeff Roberson static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
3164bd61e19SJeff Roberson static int zone_alloc_limit(uma_zone_t zone, int count, int flags);
3174bd61e19SJeff Roberson static void zone_free_limit(uma_zone_t zone, int count);
31886bbae32SJeff Roberson static void bucket_enable(void);
319cae33c14SJeff Roberson static void bucket_init(void);
3206fd34d6fSJeff Roberson static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
3216fd34d6fSJeff Roberson static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
322aabe13f1SMark Johnston static void bucket_zone_drain(int domain);
323beb8beefSJeff Roberson static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
3240095a784SJeff Roberson static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
325bb15d1c7SGleb Smirnoff static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item);
32609c8cb71SMark Johnston static size_t slab_sizeof(int nitems);
327e20a199fSJeff Roberson static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
32885dcf349SGleb Smirnoff     uma_fini fini, int align, uint32_t flags);
329b75c4efcSAndrew Turner static int zone_import(void *, void **, int, int, int);
330b75c4efcSAndrew Turner static void zone_release(void *, void **, int);
331beb8beefSJeff Roberson static bool cache_alloc(uma_zone_t, uma_cache_t, void *, int);
3322cb67bd7SGleb Smirnoff static bool cache_free(uma_zone_t, uma_cache_t, void *, int);
333bbee39c6SJeff Roberson 
3347a52a97eSRobert Watson static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
3357a52a97eSRobert Watson static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
33620a4e154SJeff Roberson static int sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS);
33720a4e154SJeff Roberson static int sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS);
3386d204a6aSRyan Libby static int sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS);
339f7af5015SRyan Libby static int sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS);
3404bd61e19SJeff Roberson static int sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS);
3418355f576SJeff Roberson 
34231c251a0SJeff Roberson static uint64_t uma_zone_get_allocs(uma_zone_t zone);
34331c251a0SJeff Roberson 
3447029da5cSPawel Biernacki static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
34533e5a1eaSRyan Libby     "Memory allocation debugging");
34633e5a1eaSRyan Libby 
3479542ea7bSGleb Smirnoff #ifdef INVARIANTS
34831c251a0SJeff Roberson static uint64_t uma_keg_get_allocs(uma_keg_t zone);
349815db204SRyan Libby static inline struct noslabbits *slab_dbg_bits(uma_slab_t slab, uma_keg_t keg);
350815db204SRyan Libby 
351c5deaf04SGleb Smirnoff static bool uma_dbg_kskip(uma_keg_t keg, void *mem);
352c5deaf04SGleb Smirnoff static bool uma_dbg_zskip(uma_zone_t zone, void *mem);
3539542ea7bSGleb Smirnoff static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
3549542ea7bSGleb Smirnoff static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
355c5deaf04SGleb Smirnoff 
356c5deaf04SGleb Smirnoff static u_int dbg_divisor = 1;
357c5deaf04SGleb Smirnoff SYSCTL_UINT(_vm_debug, OID_AUTO, divisor,
358c5deaf04SGleb Smirnoff     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0,
359c5deaf04SGleb Smirnoff     "Debug & thrash every this item in memory allocator");
360c5deaf04SGleb Smirnoff 
361c5deaf04SGleb Smirnoff static counter_u64_t uma_dbg_cnt = EARLY_COUNTER;
362c5deaf04SGleb Smirnoff static counter_u64_t uma_skip_cnt = EARLY_COUNTER;
363c5deaf04SGleb Smirnoff SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD,
364c5deaf04SGleb Smirnoff     &uma_dbg_cnt, "memory items debugged");
365c5deaf04SGleb Smirnoff SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD,
366c5deaf04SGleb Smirnoff     &uma_skip_cnt, "memory items skipped, not debugged");
3679542ea7bSGleb Smirnoff #endif
3689542ea7bSGleb Smirnoff 
3697029da5cSPawel Biernacki SYSCTL_NODE(_vm, OID_AUTO, uma, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
3707029da5cSPawel Biernacki     "Universal Memory Allocator");
37135ec24f3SRyan Libby 
372a314aba8SMateusz Guzik SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLTYPE_INT,
3737a52a97eSRobert Watson     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
3747a52a97eSRobert Watson 
375a314aba8SMateusz Guzik SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLTYPE_STRUCT,
3767a52a97eSRobert Watson     0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
3777a52a97eSRobert Watson 
3782f891cd5SPawel Jakub Dawidek static int zone_warnings = 1;
379af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
3802f891cd5SPawel Jakub Dawidek     "Warn when UMA zones becomes full");
3812f891cd5SPawel Jakub Dawidek 
38233e5a1eaSRyan Libby static int multipage_slabs = 1;
38333e5a1eaSRyan Libby TUNABLE_INT("vm.debug.uma_multipage_slabs", &multipage_slabs);
38433e5a1eaSRyan Libby SYSCTL_INT(_vm_debug, OID_AUTO, uma_multipage_slabs,
38533e5a1eaSRyan Libby     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &multipage_slabs, 0,
38633e5a1eaSRyan Libby     "UMA may choose larger slab sizes for better efficiency");
38733e5a1eaSRyan Libby 
38886bbae32SJeff Roberson /*
3899b8db4d0SRyan Libby  * Select the slab zone for an offpage slab with the given maximum item count.
3909b8db4d0SRyan Libby  */
3919b8db4d0SRyan Libby static inline uma_zone_t
slabzone(int ipers)3929b8db4d0SRyan Libby slabzone(int ipers)
3939b8db4d0SRyan Libby {
3949b8db4d0SRyan Libby 
3959b8db4d0SRyan Libby 	return (slabzones[ipers > SLABZONE0_SETSIZE]);
3969b8db4d0SRyan Libby }
3979b8db4d0SRyan Libby 
3989b8db4d0SRyan Libby /*
39986bbae32SJeff Roberson  * This routine checks to see whether or not it's safe to enable buckets.
40086bbae32SJeff Roberson  */
40186bbae32SJeff Roberson static void
bucket_enable(void)40286bbae32SJeff Roberson bucket_enable(void)
40386bbae32SJeff Roberson {
4043182660aSRyan Libby 
405a81c400eSJeff Roberson 	KASSERT(booted >= BOOT_KVA, ("Bucket enable before init"));
406251386b4SMaksim Yevmenkin 	bucketdisable = vm_page_count_min();
40786bbae32SJeff Roberson }
40886bbae32SJeff Roberson 
409dc2c7965SRobert Watson /*
410dc2c7965SRobert Watson  * Initialize bucket_zones, the array of zones of buckets of various sizes.
411dc2c7965SRobert Watson  *
412dc2c7965SRobert Watson  * For each zone, calculate the memory required for each bucket, consisting
413fc03d22bSJeff Roberson  * of the header and an array of pointers.
414dc2c7965SRobert Watson  */
415cae33c14SJeff Roberson static void
bucket_init(void)416cae33c14SJeff Roberson bucket_init(void)
417cae33c14SJeff Roberson {
418cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
419cae33c14SJeff Roberson 	int size;
420cae33c14SJeff Roberson 
421d74e6a1dSAlan Cox 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
422cae33c14SJeff Roberson 		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
423cae33c14SJeff Roberson 		size += sizeof(void *) * ubz->ubz_entries;
424cae33c14SJeff Roberson 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
425e20a199fSJeff Roberson 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
426dfe13344SJeff Roberson 		    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET |
427dfe13344SJeff Roberson 		    UMA_ZONE_FIRSTTOUCH);
428cae33c14SJeff Roberson 	}
429cae33c14SJeff Roberson }
430cae33c14SJeff Roberson 
431dc2c7965SRobert Watson /*
432dc2c7965SRobert Watson  * Given a desired number of entries for a bucket, return the zone from which
433dc2c7965SRobert Watson  * to allocate the bucket.
434dc2c7965SRobert Watson  */
435dc2c7965SRobert Watson static struct uma_bucket_zone *
bucket_zone_lookup(int entries)436dc2c7965SRobert Watson bucket_zone_lookup(int entries)
437dc2c7965SRobert Watson {
438fc03d22bSJeff Roberson 	struct uma_bucket_zone *ubz;
439dc2c7965SRobert Watson 
440fc03d22bSJeff Roberson 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
441fc03d22bSJeff Roberson 		if (ubz->ubz_entries >= entries)
442fc03d22bSJeff Roberson 			return (ubz);
443fc03d22bSJeff Roberson 	ubz--;
444fc03d22bSJeff Roberson 	return (ubz);
445fc03d22bSJeff Roberson }
446fc03d22bSJeff Roberson 
447fc03d22bSJeff Roberson static int
bucket_select(int size)448fc03d22bSJeff Roberson bucket_select(int size)
449fc03d22bSJeff Roberson {
450fc03d22bSJeff Roberson 	struct uma_bucket_zone *ubz;
451fc03d22bSJeff Roberson 
452fc03d22bSJeff Roberson 	ubz = &bucket_zones[0];
453fc03d22bSJeff Roberson 	if (size > ubz->ubz_maxsize)
454fc03d22bSJeff Roberson 		return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
455fc03d22bSJeff Roberson 
456fc03d22bSJeff Roberson 	for (; ubz->ubz_entries != 0; ubz++)
457fc03d22bSJeff Roberson 		if (ubz->ubz_maxsize < size)
458fc03d22bSJeff Roberson 			break;
459fc03d22bSJeff Roberson 	ubz--;
460fc03d22bSJeff Roberson 	return (ubz->ubz_entries);
461dc2c7965SRobert Watson }
462dc2c7965SRobert Watson 
463cae33c14SJeff Roberson static uma_bucket_t
bucket_alloc(uma_zone_t zone,void * udata,int flags)4646fd34d6fSJeff Roberson bucket_alloc(uma_zone_t zone, void *udata, int flags)
465cae33c14SJeff Roberson {
466cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
467cae33c14SJeff Roberson 	uma_bucket_t bucket;
468cae33c14SJeff Roberson 
469cae33c14SJeff Roberson 	/*
470d4665eaaSJeff Roberson 	 * Don't allocate buckets early in boot.
471cae33c14SJeff Roberson 	 */
472d4665eaaSJeff Roberson 	if (__predict_false(booted < BOOT_KVA))
473cae33c14SJeff Roberson 		return (NULL);
474a81c400eSJeff Roberson 
4756fd34d6fSJeff Roberson 	/*
4766fd34d6fSJeff Roberson 	 * To limit bucket recursion we store the original zone flags
4776fd34d6fSJeff Roberson 	 * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
4786fd34d6fSJeff Roberson 	 * NOVM flag to persist even through deep recursions.  We also
4796fd34d6fSJeff Roberson 	 * store ZFLAG_BUCKET once we have recursed attempting to allocate
4806fd34d6fSJeff Roberson 	 * a bucket for a bucket zone so we do not allow infinite bucket
4816fd34d6fSJeff Roberson 	 * recursion.  This cookie will even persist to frees of unused
4826fd34d6fSJeff Roberson 	 * buckets via the allocation path or bucket allocations in the
4836fd34d6fSJeff Roberson 	 * free path.
4846fd34d6fSJeff Roberson 	 */
4856fd34d6fSJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
4866fd34d6fSJeff Roberson 		udata = (void *)(uintptr_t)zone->uz_flags;
487e8a720feSAlexander Motin 	else {
488e8a720feSAlexander Motin 		if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
489e8a720feSAlexander Motin 			return (NULL);
4906fd34d6fSJeff Roberson 		udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
491e8a720feSAlexander Motin 	}
492bae55c4aSRyan Libby 	if (((uintptr_t)udata & UMA_ZONE_VM) != 0)
493af526374SJeff Roberson 		flags |= M_NOVM;
494f8b6c515SMark Johnston 	ubz = bucket_zone_lookup(atomic_load_16(&zone->uz_bucket_size));
49520d3ab87SAlexander Motin 	if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
49620d3ab87SAlexander Motin 		ubz++;
4976fd34d6fSJeff Roberson 	bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
498cae33c14SJeff Roberson 	if (bucket) {
499cae33c14SJeff Roberson #ifdef INVARIANTS
500cae33c14SJeff Roberson 		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
501cae33c14SJeff Roberson #endif
502cae33c14SJeff Roberson 		bucket->ub_cnt = 0;
503f8b6c515SMark Johnston 		bucket->ub_entries = min(ubz->ubz_entries,
504f8b6c515SMark Johnston 		    zone->uz_bucket_size_max);
505d4665eaaSJeff Roberson 		bucket->ub_seq = SMR_SEQ_INVALID;
506d4665eaaSJeff Roberson 		CTR3(KTR_UMA, "bucket_alloc: zone %s(%p) allocated bucket %p",
507d4665eaaSJeff Roberson 		    zone->uz_name, zone, bucket);
508cae33c14SJeff Roberson 	}
509cae33c14SJeff Roberson 
510cae33c14SJeff Roberson 	return (bucket);
511cae33c14SJeff Roberson }
512cae33c14SJeff Roberson 
513cae33c14SJeff Roberson static void
bucket_free(uma_zone_t zone,uma_bucket_t bucket,void * udata)5146fd34d6fSJeff Roberson bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
515cae33c14SJeff Roberson {
516cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
517cae33c14SJeff Roberson 
518c6fd3e23SJeff Roberson 	if (bucket->ub_cnt != 0)
519c6fd3e23SJeff Roberson 		bucket_drain(zone, bucket);
520c6fd3e23SJeff Roberson 
521fc03d22bSJeff Roberson 	KASSERT(bucket->ub_cnt == 0,
522fc03d22bSJeff Roberson 	    ("bucket_free: Freeing a non free bucket."));
523d4665eaaSJeff Roberson 	KASSERT(bucket->ub_seq == SMR_SEQ_INVALID,
524d4665eaaSJeff Roberson 	    ("bucket_free: Freeing an SMR bucket."));
5256fd34d6fSJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
5266fd34d6fSJeff Roberson 		udata = (void *)(uintptr_t)zone->uz_flags;
527dc2c7965SRobert Watson 	ubz = bucket_zone_lookup(bucket->ub_entries);
5286fd34d6fSJeff Roberson 	uma_zfree_arg(ubz->ubz_zone, bucket, udata);
529cae33c14SJeff Roberson }
530cae33c14SJeff Roberson 
531cae33c14SJeff Roberson static void
bucket_zone_drain(int domain)532aabe13f1SMark Johnston bucket_zone_drain(int domain)
533cae33c14SJeff Roberson {
534cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
535cae33c14SJeff Roberson 
536cae33c14SJeff Roberson 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
537aabe13f1SMark Johnston 		uma_zone_reclaim_domain(ubz->ubz_zone, UMA_RECLAIM_DRAIN,
538aabe13f1SMark Johnston 		    domain);
539cae33c14SJeff Roberson }
540cae33c14SJeff Roberson 
54109c8cb71SMark Johnston #ifdef KASAN
5429a7c2de3SMark Johnston _Static_assert(UMA_SMALLEST_UNIT % KASAN_SHADOW_SCALE == 0,
5439a7c2de3SMark Johnston     "Base UMA allocation size not a multiple of the KASAN scale factor");
5449a7c2de3SMark Johnston 
54509c8cb71SMark Johnston static void
kasan_mark_item_valid(uma_zone_t zone,void * item)54609c8cb71SMark Johnston kasan_mark_item_valid(uma_zone_t zone, void *item)
54709c8cb71SMark Johnston {
54809c8cb71SMark Johnston 	void *pcpu_item;
54909c8cb71SMark Johnston 	size_t sz, rsz;
55009c8cb71SMark Johnston 	int i;
55109c8cb71SMark Johnston 
55209c8cb71SMark Johnston 	if ((zone->uz_flags & UMA_ZONE_NOKASAN) != 0)
55309c8cb71SMark Johnston 		return;
55409c8cb71SMark Johnston 
55509c8cb71SMark Johnston 	sz = zone->uz_size;
55609c8cb71SMark Johnston 	rsz = roundup2(sz, KASAN_SHADOW_SCALE);
55709c8cb71SMark Johnston 	if ((zone->uz_flags & UMA_ZONE_PCPU) == 0) {
558b0dfc486SMark Johnston 		kasan_mark(item, sz, rsz, KASAN_GENERIC_REDZONE);
55909c8cb71SMark Johnston 	} else {
56009c8cb71SMark Johnston 		pcpu_item = zpcpu_base_to_offset(item);
56109c8cb71SMark Johnston 		for (i = 0; i <= mp_maxid; i++)
562b0dfc486SMark Johnston 			kasan_mark(zpcpu_get_cpu(pcpu_item, i), sz, rsz,
563b0dfc486SMark Johnston 			    KASAN_GENERIC_REDZONE);
56409c8cb71SMark Johnston 	}
56509c8cb71SMark Johnston }
56609c8cb71SMark Johnston 
56709c8cb71SMark Johnston static void
kasan_mark_item_invalid(uma_zone_t zone,void * item)56809c8cb71SMark Johnston kasan_mark_item_invalid(uma_zone_t zone, void *item)
56909c8cb71SMark Johnston {
57009c8cb71SMark Johnston 	void *pcpu_item;
57109c8cb71SMark Johnston 	size_t sz;
57209c8cb71SMark Johnston 	int i;
57309c8cb71SMark Johnston 
57409c8cb71SMark Johnston 	if ((zone->uz_flags & UMA_ZONE_NOKASAN) != 0)
57509c8cb71SMark Johnston 		return;
57609c8cb71SMark Johnston 
57709c8cb71SMark Johnston 	sz = roundup2(zone->uz_size, KASAN_SHADOW_SCALE);
57809c8cb71SMark Johnston 	if ((zone->uz_flags & UMA_ZONE_PCPU) == 0) {
57909c8cb71SMark Johnston 		kasan_mark(item, 0, sz, KASAN_UMA_FREED);
58009c8cb71SMark Johnston 	} else {
58109c8cb71SMark Johnston 		pcpu_item = zpcpu_base_to_offset(item);
58209c8cb71SMark Johnston 		for (i = 0; i <= mp_maxid; i++)
583b0dfc486SMark Johnston 			kasan_mark(zpcpu_get_cpu(pcpu_item, i), 0, sz,
584b0dfc486SMark Johnston 			    KASAN_UMA_FREED);
58509c8cb71SMark Johnston 	}
58609c8cb71SMark Johnston }
58709c8cb71SMark Johnston 
58809c8cb71SMark Johnston static void
kasan_mark_slab_valid(uma_keg_t keg,void * mem)58909c8cb71SMark Johnston kasan_mark_slab_valid(uma_keg_t keg, void *mem)
59009c8cb71SMark Johnston {
59109c8cb71SMark Johnston 	size_t sz;
59209c8cb71SMark Johnston 
59309c8cb71SMark Johnston 	if ((keg->uk_flags & UMA_ZONE_NOKASAN) == 0) {
59409c8cb71SMark Johnston 		sz = keg->uk_ppera * PAGE_SIZE;
59509c8cb71SMark Johnston 		kasan_mark(mem, sz, sz, 0);
59609c8cb71SMark Johnston 	}
59709c8cb71SMark Johnston }
59809c8cb71SMark Johnston 
59909c8cb71SMark Johnston static void
kasan_mark_slab_invalid(uma_keg_t keg,void * mem)60009c8cb71SMark Johnston kasan_mark_slab_invalid(uma_keg_t keg, void *mem)
60109c8cb71SMark Johnston {
60209c8cb71SMark Johnston 	size_t sz;
60309c8cb71SMark Johnston 
60409c8cb71SMark Johnston 	if ((keg->uk_flags & UMA_ZONE_NOKASAN) == 0) {
60509c8cb71SMark Johnston 		if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0)
60609c8cb71SMark Johnston 			sz = keg->uk_ppera * PAGE_SIZE;
60709c8cb71SMark Johnston 		else
60809c8cb71SMark Johnston 			sz = keg->uk_pgoff;
60909c8cb71SMark Johnston 		kasan_mark(mem, 0, sz, KASAN_UMA_FREED);
61009c8cb71SMark Johnston 	}
61109c8cb71SMark Johnston }
61209c8cb71SMark Johnston #else /* !KASAN */
61309c8cb71SMark Johnston static void
kasan_mark_item_valid(uma_zone_t zone __unused,void * item __unused)61409c8cb71SMark Johnston kasan_mark_item_valid(uma_zone_t zone __unused, void *item __unused)
61509c8cb71SMark Johnston {
61609c8cb71SMark Johnston }
61709c8cb71SMark Johnston 
61809c8cb71SMark Johnston static void
kasan_mark_item_invalid(uma_zone_t zone __unused,void * item __unused)61909c8cb71SMark Johnston kasan_mark_item_invalid(uma_zone_t zone __unused, void *item __unused)
62009c8cb71SMark Johnston {
62109c8cb71SMark Johnston }
62209c8cb71SMark Johnston 
62309c8cb71SMark Johnston static void
kasan_mark_slab_valid(uma_keg_t keg __unused,void * mem __unused)62409c8cb71SMark Johnston kasan_mark_slab_valid(uma_keg_t keg __unused, void *mem __unused)
62509c8cb71SMark Johnston {
62609c8cb71SMark Johnston }
62709c8cb71SMark Johnston 
62809c8cb71SMark Johnston static void
kasan_mark_slab_invalid(uma_keg_t keg __unused,void * mem __unused)62909c8cb71SMark Johnston kasan_mark_slab_invalid(uma_keg_t keg __unused, void *mem __unused)
63009c8cb71SMark Johnston {
63109c8cb71SMark Johnston }
63209c8cb71SMark Johnston #endif /* KASAN */
63309c8cb71SMark Johnston 
63410094910SMark Johnston #ifdef KMSAN
63510094910SMark Johnston static inline void
kmsan_mark_item_uninitialized(uma_zone_t zone,void * item)63610094910SMark Johnston kmsan_mark_item_uninitialized(uma_zone_t zone, void *item)
63710094910SMark Johnston {
63810094910SMark Johnston 	void *pcpu_item;
63910094910SMark Johnston 	size_t sz;
64010094910SMark Johnston 	int i;
64110094910SMark Johnston 
64210094910SMark Johnston 	if ((zone->uz_flags &
64310094910SMark Johnston 	    (UMA_ZFLAG_CACHE | UMA_ZONE_SECONDARY | UMA_ZONE_MALLOC)) != 0) {
64410094910SMark Johnston 		/*
64510094910SMark Johnston 		 * Cache zones should not be instrumented by default, as UMA
64610094910SMark Johnston 		 * does not have enough information to do so correctly.
64710094910SMark Johnston 		 * Consumers can mark items themselves if it makes sense to do
64810094910SMark Johnston 		 * so.
64910094910SMark Johnston 		 *
65010094910SMark Johnston 		 * Items from secondary zones are initialized by the parent
65110094910SMark Johnston 		 * zone and thus cannot safely be marked by UMA.
65210094910SMark Johnston 		 *
65310094910SMark Johnston 		 * malloc zones are handled directly by malloc(9) and friends,
65410094910SMark Johnston 		 * since they can provide more precise origin tracking.
65510094910SMark Johnston 		 */
65610094910SMark Johnston 		return;
65710094910SMark Johnston 	}
65810094910SMark Johnston 	if (zone->uz_keg->uk_init != NULL) {
65910094910SMark Johnston 		/*
66010094910SMark Johnston 		 * By definition, initialized items cannot be marked.  The
66110094910SMark Johnston 		 * best we can do is mark items from these zones after they
66210094910SMark Johnston 		 * are freed to the keg.
66310094910SMark Johnston 		 */
66410094910SMark Johnston 		return;
66510094910SMark Johnston 	}
66610094910SMark Johnston 
66710094910SMark Johnston 	sz = zone->uz_size;
66810094910SMark Johnston 	if ((zone->uz_flags & UMA_ZONE_PCPU) == 0) {
66910094910SMark Johnston 		kmsan_orig(item, sz, KMSAN_TYPE_UMA, KMSAN_RET_ADDR);
67010094910SMark Johnston 		kmsan_mark(item, sz, KMSAN_STATE_UNINIT);
67110094910SMark Johnston 	} else {
67210094910SMark Johnston 		pcpu_item = zpcpu_base_to_offset(item);
67310094910SMark Johnston 		for (i = 0; i <= mp_maxid; i++) {
67410094910SMark Johnston 			kmsan_orig(zpcpu_get_cpu(pcpu_item, i), sz,
67510094910SMark Johnston 			    KMSAN_TYPE_UMA, KMSAN_RET_ADDR);
67610094910SMark Johnston 			kmsan_mark(zpcpu_get_cpu(pcpu_item, i), sz,
67710094910SMark Johnston 			    KMSAN_STATE_INITED);
67810094910SMark Johnston 		}
67910094910SMark Johnston 	}
68010094910SMark Johnston }
68110094910SMark Johnston #else /* !KMSAN */
68210094910SMark Johnston static inline void
kmsan_mark_item_uninitialized(uma_zone_t zone __unused,void * item __unused)68310094910SMark Johnston kmsan_mark_item_uninitialized(uma_zone_t zone __unused, void *item __unused)
68410094910SMark Johnston {
68510094910SMark Johnston }
68610094910SMark Johnston #endif /* KMSAN */
68710094910SMark Johnston 
68808cfa56eSMark Johnston /*
689c6fd3e23SJeff Roberson  * Acquire the domain lock and record contention.
690c6fd3e23SJeff Roberson  */
691c6fd3e23SJeff Roberson static uma_zone_domain_t
zone_domain_lock(uma_zone_t zone,int domain)692c6fd3e23SJeff Roberson zone_domain_lock(uma_zone_t zone, int domain)
693c6fd3e23SJeff Roberson {
694c6fd3e23SJeff Roberson 	uma_zone_domain_t zdom;
695c6fd3e23SJeff Roberson 	bool lockfail;
696c6fd3e23SJeff Roberson 
697c6fd3e23SJeff Roberson 	zdom = ZDOM_GET(zone, domain);
698c6fd3e23SJeff Roberson 	lockfail = false;
699c6fd3e23SJeff Roberson 	if (ZDOM_OWNED(zdom))
700c6fd3e23SJeff Roberson 		lockfail = true;
701c6fd3e23SJeff Roberson 	ZDOM_LOCK(zdom);
702c6fd3e23SJeff Roberson 	/* This is unsynchronized.  The counter does not need to be precise. */
703c6fd3e23SJeff Roberson 	if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max)
704c6fd3e23SJeff Roberson 		zone->uz_bucket_size++;
705c6fd3e23SJeff Roberson 	return (zdom);
706c6fd3e23SJeff Roberson }
707c6fd3e23SJeff Roberson 
708c6fd3e23SJeff Roberson /*
709fe835cbfSJeff Roberson  * Search for the domain with the least cached items and return it if it
710fe835cbfSJeff Roberson  * is out of balance with the preferred domain.
711c6fd3e23SJeff Roberson  */
712c6fd3e23SJeff Roberson static __noinline int
zone_domain_lowest(uma_zone_t zone,int pref)713c6fd3e23SJeff Roberson zone_domain_lowest(uma_zone_t zone, int pref)
714c6fd3e23SJeff Roberson {
715fe835cbfSJeff Roberson 	long least, nitems, prefitems;
716c6fd3e23SJeff Roberson 	int domain;
717c6fd3e23SJeff Roberson 	int i;
718c6fd3e23SJeff Roberson 
719fe835cbfSJeff Roberson 	prefitems = least = LONG_MAX;
720c6fd3e23SJeff Roberson 	domain = 0;
721c6fd3e23SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
722c6fd3e23SJeff Roberson 		nitems = ZDOM_GET(zone, i)->uzd_nitems;
723c6fd3e23SJeff Roberson 		if (nitems < least) {
724c6fd3e23SJeff Roberson 			domain = i;
725c6fd3e23SJeff Roberson 			least = nitems;
726c6fd3e23SJeff Roberson 		}
727fe835cbfSJeff Roberson 		if (domain == pref)
728fe835cbfSJeff Roberson 			prefitems = nitems;
729fe835cbfSJeff Roberson 	}
730fe835cbfSJeff Roberson 	if (prefitems < least * 2)
731fe835cbfSJeff Roberson 		return (pref);
732c6fd3e23SJeff Roberson 
733c6fd3e23SJeff Roberson 	return (domain);
734c6fd3e23SJeff Roberson }
735c6fd3e23SJeff Roberson 
736c6fd3e23SJeff Roberson /*
737c6fd3e23SJeff Roberson  * Search for the domain with the most cached items and return it or the
738c6fd3e23SJeff Roberson  * preferred domain if it has enough to proceed.
739c6fd3e23SJeff Roberson  */
740c6fd3e23SJeff Roberson static __noinline int
zone_domain_highest(uma_zone_t zone,int pref)741c6fd3e23SJeff Roberson zone_domain_highest(uma_zone_t zone, int pref)
742c6fd3e23SJeff Roberson {
743c6fd3e23SJeff Roberson 	long most, nitems;
744c6fd3e23SJeff Roberson 	int domain;
745c6fd3e23SJeff Roberson 	int i;
746c6fd3e23SJeff Roberson 
747c6fd3e23SJeff Roberson 	if (ZDOM_GET(zone, pref)->uzd_nitems > BUCKET_MAX)
748c6fd3e23SJeff Roberson 		return (pref);
749c6fd3e23SJeff Roberson 
750c6fd3e23SJeff Roberson 	most = 0;
751c6fd3e23SJeff Roberson 	domain = 0;
752c6fd3e23SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
753c6fd3e23SJeff Roberson 		nitems = ZDOM_GET(zone, i)->uzd_nitems;
754c6fd3e23SJeff Roberson 		if (nitems > most) {
755c6fd3e23SJeff Roberson 			domain = i;
756c6fd3e23SJeff Roberson 			most = nitems;
757c6fd3e23SJeff Roberson 		}
758c6fd3e23SJeff Roberson 	}
759c6fd3e23SJeff Roberson 
760c6fd3e23SJeff Roberson 	return (domain);
761c6fd3e23SJeff Roberson }
762c6fd3e23SJeff Roberson 
763c6fd3e23SJeff Roberson /*
764c6fd3e23SJeff Roberson  * Set the maximum imax value.
765c6fd3e23SJeff Roberson  */
766c6fd3e23SJeff Roberson static void
zone_domain_imax_set(uma_zone_domain_t zdom,int nitems)767c6fd3e23SJeff Roberson zone_domain_imax_set(uma_zone_domain_t zdom, int nitems)
768c6fd3e23SJeff Roberson {
769c6fd3e23SJeff Roberson 	long old;
770c6fd3e23SJeff Roberson 
771c6fd3e23SJeff Roberson 	old = zdom->uzd_imax;
772c6fd3e23SJeff Roberson 	do {
773c6fd3e23SJeff Roberson 		if (old >= nitems)
7742760658bSAlexander Motin 			return;
775c6fd3e23SJeff Roberson 	} while (atomic_fcmpset_long(&zdom->uzd_imax, &old, nitems) == 0);
7762760658bSAlexander Motin 
7772760658bSAlexander Motin 	/*
7782760658bSAlexander Motin 	 * We are at new maximum, so do the last WSS update for the old
7792760658bSAlexander Motin 	 * bimin and prepare to measure next allocation batch.
7802760658bSAlexander Motin 	 */
7812760658bSAlexander Motin 	if (zdom->uzd_wss < old - zdom->uzd_bimin)
7822760658bSAlexander Motin 		zdom->uzd_wss = old - zdom->uzd_bimin;
7832760658bSAlexander Motin 	zdom->uzd_bimin = nitems;
784c6fd3e23SJeff Roberson }
785c6fd3e23SJeff Roberson 
786c6fd3e23SJeff Roberson /*
78708cfa56eSMark Johnston  * Attempt to satisfy an allocation by retrieving a full bucket from one of the
788d4665eaaSJeff Roberson  * zone's caches.  If a bucket is found the zone is not locked on return.
78908cfa56eSMark Johnston  */
7900f9b7bf3SMark Johnston static uma_bucket_t
zone_fetch_bucket(uma_zone_t zone,uma_zone_domain_t zdom,bool reclaim)791c6fd3e23SJeff Roberson zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom, bool reclaim)
7920f9b7bf3SMark Johnston {
7930f9b7bf3SMark Johnston 	uma_bucket_t bucket;
7942760658bSAlexander Motin 	long cnt;
795d4665eaaSJeff Roberson 	int i;
796d4665eaaSJeff Roberson 	bool dtor = false;
7970f9b7bf3SMark Johnston 
798c6fd3e23SJeff Roberson 	ZDOM_LOCK_ASSERT(zdom);
7990f9b7bf3SMark Johnston 
800dc3915c8SJeff Roberson 	if ((bucket = STAILQ_FIRST(&zdom->uzd_buckets)) == NULL)
801d4665eaaSJeff Roberson 		return (NULL);
802d4665eaaSJeff Roberson 
803543117beSJeff Roberson 	/* SMR Buckets can not be re-used until readers expire. */
804d4665eaaSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0 &&
805d4665eaaSJeff Roberson 	    bucket->ub_seq != SMR_SEQ_INVALID) {
806d4665eaaSJeff Roberson 		if (!smr_poll(zone->uz_smr, bucket->ub_seq, false))
807d4665eaaSJeff Roberson 			return (NULL);
808d4665eaaSJeff Roberson 		bucket->ub_seq = SMR_SEQ_INVALID;
809543117beSJeff Roberson 		dtor = (zone->uz_dtor != NULL) || UMA_ALWAYS_CTORDTOR;
810c6fd3e23SJeff Roberson 		if (STAILQ_NEXT(bucket, ub_link) != NULL)
811c6fd3e23SJeff Roberson 			zdom->uzd_seq = STAILQ_NEXT(bucket, ub_link)->ub_seq;
812d4665eaaSJeff Roberson 	}
813dc3915c8SJeff Roberson 	STAILQ_REMOVE_HEAD(&zdom->uzd_buckets, ub_link);
81406d8bdcbSMark Johnston 
81506d8bdcbSMark Johnston 	KASSERT(zdom->uzd_nitems >= bucket->ub_cnt,
81606d8bdcbSMark Johnston 	    ("%s: item count underflow (%ld, %d)",
81706d8bdcbSMark Johnston 	    __func__, zdom->uzd_nitems, bucket->ub_cnt));
81806d8bdcbSMark Johnston 	KASSERT(bucket->ub_cnt > 0,
81906d8bdcbSMark Johnston 	    ("%s: empty bucket in bucket cache", __func__));
8200f9b7bf3SMark Johnston 	zdom->uzd_nitems -= bucket->ub_cnt;
821c6fd3e23SJeff Roberson 
8222760658bSAlexander Motin 	if (reclaim) {
823c6fd3e23SJeff Roberson 		/*
824c6fd3e23SJeff Roberson 		 * Shift the bounds of the current WSS interval to avoid
8252760658bSAlexander Motin 		 * perturbing the estimates.
826c6fd3e23SJeff Roberson 		 */
8272760658bSAlexander Motin 		cnt = lmin(zdom->uzd_bimin, bucket->ub_cnt);
8282760658bSAlexander Motin 		atomic_subtract_long(&zdom->uzd_imax, cnt);
8292760658bSAlexander Motin 		zdom->uzd_bimin -= cnt;
830c6fd3e23SJeff Roberson 		zdom->uzd_imin -= lmin(zdom->uzd_imin, bucket->ub_cnt);
8312760658bSAlexander Motin 		if (zdom->uzd_limin >= bucket->ub_cnt) {
8322760658bSAlexander Motin 			zdom->uzd_limin -= bucket->ub_cnt;
8332760658bSAlexander Motin 		} else {
8342760658bSAlexander Motin 			zdom->uzd_limin = 0;
8352760658bSAlexander Motin 			zdom->uzd_timin = 0;
8362760658bSAlexander Motin 		}
8372760658bSAlexander Motin 	} else if (zdom->uzd_bimin > zdom->uzd_nitems) {
8382760658bSAlexander Motin 		zdom->uzd_bimin = zdom->uzd_nitems;
8392760658bSAlexander Motin 		if (zdom->uzd_imin > zdom->uzd_nitems)
8400f9b7bf3SMark Johnston 			zdom->uzd_imin = zdom->uzd_nitems;
8412760658bSAlexander Motin 	}
842c6fd3e23SJeff Roberson 
843c6fd3e23SJeff Roberson 	ZDOM_UNLOCK(zdom);
844d4665eaaSJeff Roberson 	if (dtor)
845d4665eaaSJeff Roberson 		for (i = 0; i < bucket->ub_cnt; i++)
846d4665eaaSJeff Roberson 			item_dtor(zone, bucket->ub_bucket[i], zone->uz_size,
847d4665eaaSJeff Roberson 			    NULL, SKIP_NONE);
848d4665eaaSJeff Roberson 
8490f9b7bf3SMark Johnston 	return (bucket);
8500f9b7bf3SMark Johnston }
8510f9b7bf3SMark Johnston 
85208cfa56eSMark Johnston /*
85308cfa56eSMark Johnston  * Insert a full bucket into the specified cache.  The "ws" parameter indicates
85408cfa56eSMark Johnston  * whether the bucket's contents should be counted as part of the zone's working
855c6fd3e23SJeff Roberson  * set.  The bucket may be freed if it exceeds the bucket limit.
85608cfa56eSMark Johnston  */
8570f9b7bf3SMark Johnston static void
zone_put_bucket(uma_zone_t zone,int domain,uma_bucket_t bucket,void * udata,const bool ws)858c6fd3e23SJeff Roberson zone_put_bucket(uma_zone_t zone, int domain, uma_bucket_t bucket, void *udata,
8590f9b7bf3SMark Johnston     const bool ws)
8600f9b7bf3SMark Johnston {
861c6fd3e23SJeff Roberson 	uma_zone_domain_t zdom;
8620f9b7bf3SMark Johnston 
863c6fd3e23SJeff Roberson 	/* We don't cache empty buckets.  This can happen after a reclaim. */
864c6fd3e23SJeff Roberson 	if (bucket->ub_cnt == 0)
865c6fd3e23SJeff Roberson 		goto out;
866c6fd3e23SJeff Roberson 	zdom = zone_domain_lock(zone, domain);
867c6fd3e23SJeff Roberson 
868c6fd3e23SJeff Roberson 	/*
869c6fd3e23SJeff Roberson 	 * Conditionally set the maximum number of items.
870c6fd3e23SJeff Roberson 	 */
8710f9b7bf3SMark Johnston 	zdom->uzd_nitems += bucket->ub_cnt;
872c6fd3e23SJeff Roberson 	if (__predict_true(zdom->uzd_nitems < zone->uz_bucket_max)) {
8732760658bSAlexander Motin 		if (ws) {
874c6fd3e23SJeff Roberson 			zone_domain_imax_set(zdom, zdom->uzd_nitems);
8752760658bSAlexander Motin 		} else {
8762760658bSAlexander Motin 			/*
8772760658bSAlexander Motin 			 * Shift the bounds of the current WSS interval to
8782760658bSAlexander Motin 			 * avoid perturbing the estimates.
8792760658bSAlexander Motin 			 */
8802760658bSAlexander Motin 			atomic_add_long(&zdom->uzd_imax, bucket->ub_cnt);
8812760658bSAlexander Motin 			zdom->uzd_imin += bucket->ub_cnt;
8822760658bSAlexander Motin 			zdom->uzd_bimin += bucket->ub_cnt;
8832760658bSAlexander Motin 			zdom->uzd_limin += bucket->ub_cnt;
8842760658bSAlexander Motin 		}
885c6fd3e23SJeff Roberson 		if (STAILQ_EMPTY(&zdom->uzd_buckets))
886c6fd3e23SJeff Roberson 			zdom->uzd_seq = bucket->ub_seq;
8875afdf5c1SMark Johnston 
8885afdf5c1SMark Johnston 		/*
8895afdf5c1SMark Johnston 		 * Try to promote reuse of recently used items.  For items
8905afdf5c1SMark Johnston 		 * protected by SMR, try to defer reuse to minimize polling.
8915afdf5c1SMark Johnston 		 */
8925afdf5c1SMark Johnston 		if (bucket->ub_seq == SMR_SEQ_INVALID)
8935afdf5c1SMark Johnston 			STAILQ_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
8945afdf5c1SMark Johnston 		else
895c6fd3e23SJeff Roberson 			STAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link);
896c6fd3e23SJeff Roberson 		ZDOM_UNLOCK(zdom);
897c6fd3e23SJeff Roberson 		return;
898c6fd3e23SJeff Roberson 	}
899c6fd3e23SJeff Roberson 	zdom->uzd_nitems -= bucket->ub_cnt;
900c6fd3e23SJeff Roberson 	ZDOM_UNLOCK(zdom);
901c6fd3e23SJeff Roberson out:
902c6fd3e23SJeff Roberson 	bucket_free(zone, bucket, udata);
9030f9b7bf3SMark Johnston }
9040f9b7bf3SMark Johnston 
905376b1ba3SJeff Roberson /* Pops an item out of a per-cpu cache bucket. */
906376b1ba3SJeff Roberson static inline void *
cache_bucket_pop(uma_cache_t cache,uma_cache_bucket_t bucket)907376b1ba3SJeff Roberson cache_bucket_pop(uma_cache_t cache, uma_cache_bucket_t bucket)
908376b1ba3SJeff Roberson {
909376b1ba3SJeff Roberson 	void *item;
910376b1ba3SJeff Roberson 
911376b1ba3SJeff Roberson 	CRITICAL_ASSERT(curthread);
912376b1ba3SJeff Roberson 
913376b1ba3SJeff Roberson 	bucket->ucb_cnt--;
914376b1ba3SJeff Roberson 	item = bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt];
915376b1ba3SJeff Roberson #ifdef INVARIANTS
916376b1ba3SJeff Roberson 	bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = NULL;
917376b1ba3SJeff Roberson 	KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
918376b1ba3SJeff Roberson #endif
919376b1ba3SJeff Roberson 	cache->uc_allocs++;
920376b1ba3SJeff Roberson 
921376b1ba3SJeff Roberson 	return (item);
922376b1ba3SJeff Roberson }
923376b1ba3SJeff Roberson 
924376b1ba3SJeff Roberson /* Pushes an item into a per-cpu cache bucket. */
925376b1ba3SJeff Roberson static inline void
cache_bucket_push(uma_cache_t cache,uma_cache_bucket_t bucket,void * item)926376b1ba3SJeff Roberson cache_bucket_push(uma_cache_t cache, uma_cache_bucket_t bucket, void *item)
927376b1ba3SJeff Roberson {
928376b1ba3SJeff Roberson 
929376b1ba3SJeff Roberson 	CRITICAL_ASSERT(curthread);
930376b1ba3SJeff Roberson 	KASSERT(bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] == NULL,
931376b1ba3SJeff Roberson 	    ("uma_zfree: Freeing to non free bucket index."));
932376b1ba3SJeff Roberson 
933376b1ba3SJeff Roberson 	bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = item;
934376b1ba3SJeff Roberson 	bucket->ucb_cnt++;
935376b1ba3SJeff Roberson 	cache->uc_frees++;
936376b1ba3SJeff Roberson }
937376b1ba3SJeff Roberson 
938376b1ba3SJeff Roberson /*
939376b1ba3SJeff Roberson  * Unload a UMA bucket from a per-cpu cache.
940376b1ba3SJeff Roberson  */
941376b1ba3SJeff Roberson static inline uma_bucket_t
cache_bucket_unload(uma_cache_bucket_t bucket)942376b1ba3SJeff Roberson cache_bucket_unload(uma_cache_bucket_t bucket)
943376b1ba3SJeff Roberson {
944376b1ba3SJeff Roberson 	uma_bucket_t b;
945376b1ba3SJeff Roberson 
946376b1ba3SJeff Roberson 	b = bucket->ucb_bucket;
947376b1ba3SJeff Roberson 	if (b != NULL) {
948376b1ba3SJeff Roberson 		MPASS(b->ub_entries == bucket->ucb_entries);
949376b1ba3SJeff Roberson 		b->ub_cnt = bucket->ucb_cnt;
950376b1ba3SJeff Roberson 		bucket->ucb_bucket = NULL;
951376b1ba3SJeff Roberson 		bucket->ucb_entries = bucket->ucb_cnt = 0;
952376b1ba3SJeff Roberson 	}
953376b1ba3SJeff Roberson 
954376b1ba3SJeff Roberson 	return (b);
955376b1ba3SJeff Roberson }
956376b1ba3SJeff Roberson 
957376b1ba3SJeff Roberson static inline uma_bucket_t
cache_bucket_unload_alloc(uma_cache_t cache)958376b1ba3SJeff Roberson cache_bucket_unload_alloc(uma_cache_t cache)
959376b1ba3SJeff Roberson {
960376b1ba3SJeff Roberson 
961376b1ba3SJeff Roberson 	return (cache_bucket_unload(&cache->uc_allocbucket));
962376b1ba3SJeff Roberson }
963376b1ba3SJeff Roberson 
964376b1ba3SJeff Roberson static inline uma_bucket_t
cache_bucket_unload_free(uma_cache_t cache)965376b1ba3SJeff Roberson cache_bucket_unload_free(uma_cache_t cache)
966376b1ba3SJeff Roberson {
967376b1ba3SJeff Roberson 
968376b1ba3SJeff Roberson 	return (cache_bucket_unload(&cache->uc_freebucket));
969376b1ba3SJeff Roberson }
970376b1ba3SJeff Roberson 
971376b1ba3SJeff Roberson static inline uma_bucket_t
cache_bucket_unload_cross(uma_cache_t cache)972376b1ba3SJeff Roberson cache_bucket_unload_cross(uma_cache_t cache)
973376b1ba3SJeff Roberson {
974376b1ba3SJeff Roberson 
975376b1ba3SJeff Roberson 	return (cache_bucket_unload(&cache->uc_crossbucket));
976376b1ba3SJeff Roberson }
977376b1ba3SJeff Roberson 
978376b1ba3SJeff Roberson /*
979376b1ba3SJeff Roberson  * Load a bucket into a per-cpu cache bucket.
980376b1ba3SJeff Roberson  */
981376b1ba3SJeff Roberson static inline void
cache_bucket_load(uma_cache_bucket_t bucket,uma_bucket_t b)982376b1ba3SJeff Roberson cache_bucket_load(uma_cache_bucket_t bucket, uma_bucket_t b)
983376b1ba3SJeff Roberson {
984376b1ba3SJeff Roberson 
985376b1ba3SJeff Roberson 	CRITICAL_ASSERT(curthread);
986376b1ba3SJeff Roberson 	MPASS(bucket->ucb_bucket == NULL);
987543117beSJeff Roberson 	MPASS(b->ub_seq == SMR_SEQ_INVALID);
988376b1ba3SJeff Roberson 
989376b1ba3SJeff Roberson 	bucket->ucb_bucket = b;
990376b1ba3SJeff Roberson 	bucket->ucb_cnt = b->ub_cnt;
991376b1ba3SJeff Roberson 	bucket->ucb_entries = b->ub_entries;
992376b1ba3SJeff Roberson }
993376b1ba3SJeff Roberson 
994376b1ba3SJeff Roberson static inline void
cache_bucket_load_alloc(uma_cache_t cache,uma_bucket_t b)995376b1ba3SJeff Roberson cache_bucket_load_alloc(uma_cache_t cache, uma_bucket_t b)
996376b1ba3SJeff Roberson {
997376b1ba3SJeff Roberson 
998376b1ba3SJeff Roberson 	cache_bucket_load(&cache->uc_allocbucket, b);
999376b1ba3SJeff Roberson }
1000376b1ba3SJeff Roberson 
1001376b1ba3SJeff Roberson static inline void
cache_bucket_load_free(uma_cache_t cache,uma_bucket_t b)1002376b1ba3SJeff Roberson cache_bucket_load_free(uma_cache_t cache, uma_bucket_t b)
1003376b1ba3SJeff Roberson {
1004376b1ba3SJeff Roberson 
1005376b1ba3SJeff Roberson 	cache_bucket_load(&cache->uc_freebucket, b);
1006376b1ba3SJeff Roberson }
1007376b1ba3SJeff Roberson 
1008dfe13344SJeff Roberson #ifdef NUMA
1009376b1ba3SJeff Roberson static inline void
cache_bucket_load_cross(uma_cache_t cache,uma_bucket_t b)1010376b1ba3SJeff Roberson cache_bucket_load_cross(uma_cache_t cache, uma_bucket_t b)
1011376b1ba3SJeff Roberson {
1012376b1ba3SJeff Roberson 
1013376b1ba3SJeff Roberson 	cache_bucket_load(&cache->uc_crossbucket, b);
1014376b1ba3SJeff Roberson }
1015376b1ba3SJeff Roberson #endif
1016376b1ba3SJeff Roberson 
1017376b1ba3SJeff Roberson /*
1018376b1ba3SJeff Roberson  * Copy and preserve ucb_spare.
1019376b1ba3SJeff Roberson  */
1020376b1ba3SJeff Roberson static inline void
cache_bucket_copy(uma_cache_bucket_t b1,uma_cache_bucket_t b2)1021376b1ba3SJeff Roberson cache_bucket_copy(uma_cache_bucket_t b1, uma_cache_bucket_t b2)
1022376b1ba3SJeff Roberson {
1023376b1ba3SJeff Roberson 
1024376b1ba3SJeff Roberson 	b1->ucb_bucket = b2->ucb_bucket;
1025376b1ba3SJeff Roberson 	b1->ucb_entries = b2->ucb_entries;
1026376b1ba3SJeff Roberson 	b1->ucb_cnt = b2->ucb_cnt;
1027376b1ba3SJeff Roberson }
1028376b1ba3SJeff Roberson 
1029376b1ba3SJeff Roberson /*
1030376b1ba3SJeff Roberson  * Swap two cache buckets.
1031376b1ba3SJeff Roberson  */
1032376b1ba3SJeff Roberson static inline void
cache_bucket_swap(uma_cache_bucket_t b1,uma_cache_bucket_t b2)1033376b1ba3SJeff Roberson cache_bucket_swap(uma_cache_bucket_t b1, uma_cache_bucket_t b2)
1034376b1ba3SJeff Roberson {
1035376b1ba3SJeff Roberson 	struct uma_cache_bucket b3;
1036376b1ba3SJeff Roberson 
1037376b1ba3SJeff Roberson 	CRITICAL_ASSERT(curthread);
1038376b1ba3SJeff Roberson 
1039376b1ba3SJeff Roberson 	cache_bucket_copy(&b3, b1);
1040376b1ba3SJeff Roberson 	cache_bucket_copy(b1, b2);
1041376b1ba3SJeff Roberson 	cache_bucket_copy(b2, &b3);
1042376b1ba3SJeff Roberson }
1043376b1ba3SJeff Roberson 
1044c6fd3e23SJeff Roberson /*
1045c6fd3e23SJeff Roberson  * Attempt to fetch a bucket from a zone on behalf of the current cpu cache.
1046c6fd3e23SJeff Roberson  */
1047c6fd3e23SJeff Roberson static uma_bucket_t
cache_fetch_bucket(uma_zone_t zone,uma_cache_t cache,int domain)1048c6fd3e23SJeff Roberson cache_fetch_bucket(uma_zone_t zone, uma_cache_t cache, int domain)
1049c6fd3e23SJeff Roberson {
1050c6fd3e23SJeff Roberson 	uma_zone_domain_t zdom;
1051c6fd3e23SJeff Roberson 	uma_bucket_t bucket;
1052a04ce833SMark Johnston 	smr_seq_t seq;
1053c6fd3e23SJeff Roberson 
1054c6fd3e23SJeff Roberson 	/*
1055c6fd3e23SJeff Roberson 	 * Avoid the lock if possible.
1056c6fd3e23SJeff Roberson 	 */
1057c6fd3e23SJeff Roberson 	zdom = ZDOM_GET(zone, domain);
1058c6fd3e23SJeff Roberson 	if (zdom->uzd_nitems == 0)
1059c6fd3e23SJeff Roberson 		return (NULL);
1060c6fd3e23SJeff Roberson 
1061c6fd3e23SJeff Roberson 	if ((cache_uz_flags(cache) & UMA_ZONE_SMR) != 0 &&
1062a04ce833SMark Johnston 	    (seq = atomic_load_32(&zdom->uzd_seq)) != SMR_SEQ_INVALID &&
1063a04ce833SMark Johnston 	    !smr_poll(zone->uz_smr, seq, false))
1064c6fd3e23SJeff Roberson 		return (NULL);
1065c6fd3e23SJeff Roberson 
1066c6fd3e23SJeff Roberson 	/*
1067c6fd3e23SJeff Roberson 	 * Check the zone's cache of buckets.
1068c6fd3e23SJeff Roberson 	 */
1069c6fd3e23SJeff Roberson 	zdom = zone_domain_lock(zone, domain);
107006d8bdcbSMark Johnston 	if ((bucket = zone_fetch_bucket(zone, zdom, false)) != NULL)
1071c6fd3e23SJeff Roberson 		return (bucket);
1072c6fd3e23SJeff Roberson 	ZDOM_UNLOCK(zdom);
1073c6fd3e23SJeff Roberson 
1074c6fd3e23SJeff Roberson 	return (NULL);
1075c6fd3e23SJeff Roberson }
1076c6fd3e23SJeff Roberson 
10772f891cd5SPawel Jakub Dawidek static void
zone_log_warning(uma_zone_t zone)10782f891cd5SPawel Jakub Dawidek zone_log_warning(uma_zone_t zone)
10792f891cd5SPawel Jakub Dawidek {
10802f891cd5SPawel Jakub Dawidek 	static const struct timeval warninterval = { 300, 0 };
10812f891cd5SPawel Jakub Dawidek 
10822f891cd5SPawel Jakub Dawidek 	if (!zone_warnings || zone->uz_warning == NULL)
10832f891cd5SPawel Jakub Dawidek 		return;
10842f891cd5SPawel Jakub Dawidek 
10852f891cd5SPawel Jakub Dawidek 	if (ratecheck(&zone->uz_ratecheck, &warninterval))
10862f891cd5SPawel Jakub Dawidek 		printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
10872f891cd5SPawel Jakub Dawidek }
10882f891cd5SPawel Jakub Dawidek 
108954503a13SJonathan T. Looney static inline void
zone_maxaction(uma_zone_t zone)109054503a13SJonathan T. Looney zone_maxaction(uma_zone_t zone)
109154503a13SJonathan T. Looney {
1092e60b2fcbSGleb Smirnoff 
1093e60b2fcbSGleb Smirnoff 	if (zone->uz_maxaction.ta_func != NULL)
1094e60b2fcbSGleb Smirnoff 		taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
109554503a13SJonathan T. Looney }
109654503a13SJonathan T. Looney 
10978355f576SJeff Roberson /*
10988355f576SJeff Roberson  * Routine called by timeout which is used to fire off some time interval
10999643769aSJeff Roberson  * based calculations.  (stats, hash size, etc.)
11008355f576SJeff Roberson  *
11018355f576SJeff Roberson  * Arguments:
11028355f576SJeff Roberson  *	arg   Unused
11038355f576SJeff Roberson  *
11048355f576SJeff Roberson  * Returns:
11058355f576SJeff Roberson  *	Nothing
11068355f576SJeff Roberson  */
11078355f576SJeff Roberson static void
uma_timeout(void * context __unused,int pending __unused)110831508912SMark Johnston uma_timeout(void *context __unused, int pending __unused)
11098355f576SJeff Roberson {
111086bbae32SJeff Roberson 	bucket_enable();
111120a4e154SJeff Roberson 	zone_foreach(zone_timeout, NULL);
11128355f576SJeff Roberson 
11138355f576SJeff Roberson 	/* Reschedule this event */
111493cd28eaSMark Johnston 	taskqueue_enqueue_timeout(taskqueue_thread, &uma_timeout_task,
111593cd28eaSMark Johnston 	    UMA_TIMEOUT * hz);
11168355f576SJeff Roberson }
11178355f576SJeff Roberson 
11188355f576SJeff Roberson /*
11192760658bSAlexander Motin  * Update the working set size estimates for the zone's bucket cache.
11202760658bSAlexander Motin  * The constants chosen here are somewhat arbitrary.
11210f9b7bf3SMark Johnston  */
11220f9b7bf3SMark Johnston static void
zone_domain_update_wss(uma_zone_domain_t zdom)11230f9b7bf3SMark Johnston zone_domain_update_wss(uma_zone_domain_t zdom)
11240f9b7bf3SMark Johnston {
11252760658bSAlexander Motin 	long m;
11260f9b7bf3SMark Johnston 
11272760658bSAlexander Motin 	ZDOM_LOCK_ASSERT(zdom);
11282760658bSAlexander Motin 	MPASS(zdom->uzd_imax >= zdom->uzd_nitems);
11292760658bSAlexander Motin 	MPASS(zdom->uzd_nitems >= zdom->uzd_bimin);
11302760658bSAlexander Motin 	MPASS(zdom->uzd_bimin >= zdom->uzd_imin);
11312760658bSAlexander Motin 
11322760658bSAlexander Motin 	/*
11332760658bSAlexander Motin 	 * Estimate WSS as modified moving average of biggest allocation
11342760658bSAlexander Motin 	 * batches for each period over few minutes (UMA_TIMEOUT of 20s).
11352760658bSAlexander Motin 	 */
11362760658bSAlexander Motin 	zdom->uzd_wss = lmax(zdom->uzd_wss * 3 / 4,
11372760658bSAlexander Motin 	    zdom->uzd_imax - zdom->uzd_bimin);
11382760658bSAlexander Motin 
11392760658bSAlexander Motin 	/*
11402760658bSAlexander Motin 	 * Estimate longtime minimum item count as a combination of recent
11412760658bSAlexander Motin 	 * minimum item count, adjusted by WSS for safety, and the modified
11422760658bSAlexander Motin 	 * moving average over the last several hours (UMA_TIMEOUT of 20s).
11432760658bSAlexander Motin 	 * timin measures time since limin tried to go negative, that means
11442760658bSAlexander Motin 	 * we were dangerously close to or got out of cache.
11452760658bSAlexander Motin 	 */
11462760658bSAlexander Motin 	m = zdom->uzd_imin - zdom->uzd_wss;
11472760658bSAlexander Motin 	if (m >= 0) {
11482760658bSAlexander Motin 		if (zdom->uzd_limin >= m)
11492760658bSAlexander Motin 			zdom->uzd_limin = m;
11502760658bSAlexander Motin 		else
11512760658bSAlexander Motin 			zdom->uzd_limin = (m + zdom->uzd_limin * 255) / 256;
11522760658bSAlexander Motin 		zdom->uzd_timin++;
11532760658bSAlexander Motin 	} else {
11542760658bSAlexander Motin 		zdom->uzd_limin = 0;
11552760658bSAlexander Motin 		zdom->uzd_timin = 0;
11562760658bSAlexander Motin 	}
11572760658bSAlexander Motin 
11582760658bSAlexander Motin 	/* To reduce period edge effects on WSS keep half of the imax. */
11592760658bSAlexander Motin 	atomic_subtract_long(&zdom->uzd_imax,
11602760658bSAlexander Motin 	    (zdom->uzd_imax - zdom->uzd_nitems + 1) / 2);
11612760658bSAlexander Motin 	zdom->uzd_imin = zdom->uzd_bimin = zdom->uzd_nitems;
11620f9b7bf3SMark Johnston }
11630f9b7bf3SMark Johnston 
11640f9b7bf3SMark Johnston /*
11659643769aSJeff Roberson  * Routine to perform timeout driven calculations.  This expands the
11669643769aSJeff Roberson  * hashes and does per cpu statistics aggregation.
11678355f576SJeff Roberson  *
1168e20a199fSJeff Roberson  *  Returns nothing.
11698355f576SJeff Roberson  */
11708355f576SJeff Roberson static void
zone_timeout(uma_zone_t zone,void * unused)117120a4e154SJeff Roberson zone_timeout(uma_zone_t zone, void *unused)
11728355f576SJeff Roberson {
117308034d10SKonstantin Belousov 	uma_keg_t keg;
11748b987a77SJeff Roberson 	u_int slabs, pages;
11758355f576SJeff Roberson 
117654c5ae80SRyan Libby 	if ((zone->uz_flags & UMA_ZFLAG_HASH) == 0)
11772760658bSAlexander Motin 		goto trim;
117808034d10SKonstantin Belousov 
117908034d10SKonstantin Belousov 	keg = zone->uz_keg;
11808b987a77SJeff Roberson 
11818b987a77SJeff Roberson 	/*
11828b987a77SJeff Roberson 	 * Hash zones are non-numa by definition so the first domain
11838b987a77SJeff Roberson 	 * is the only one present.
11848b987a77SJeff Roberson 	 */
11858b987a77SJeff Roberson 	KEG_LOCK(keg, 0);
11868b987a77SJeff Roberson 	pages = keg->uk_domain[0].ud_pages;
11878b987a77SJeff Roberson 
11888355f576SJeff Roberson 	/*
1189e20a199fSJeff Roberson 	 * Expand the keg hash table.
11908355f576SJeff Roberson 	 *
11918355f576SJeff Roberson 	 * This is done if the number of slabs is larger than the hash size.
11928355f576SJeff Roberson 	 * What I'm trying to do here is completely reduce collisions.  This
11938355f576SJeff Roberson 	 * may be a little aggressive.  Should I allow for two collisions max?
11948355f576SJeff Roberson 	 */
11958b987a77SJeff Roberson 	if ((slabs = pages / keg->uk_ppera) > keg->uk_hash.uh_hashsize) {
11960aef6126SJeff Roberson 		struct uma_hash newhash;
11970aef6126SJeff Roberson 		struct uma_hash oldhash;
11980aef6126SJeff Roberson 		int ret;
11995300d9ddSJeff Roberson 
12000aef6126SJeff Roberson 		/*
12010aef6126SJeff Roberson 		 * This is so involved because allocating and freeing
1202e20a199fSJeff Roberson 		 * while the keg lock is held will lead to deadlock.
12030aef6126SJeff Roberson 		 * I have to do everything in stages and check for
12040aef6126SJeff Roberson 		 * races.
12050aef6126SJeff Roberson 		 */
12068b987a77SJeff Roberson 		KEG_UNLOCK(keg, 0);
12073b2f2cb8SAlexander Motin 		ret = hash_alloc(&newhash, 1 << fls(slabs));
12088b987a77SJeff Roberson 		KEG_LOCK(keg, 0);
12090aef6126SJeff Roberson 		if (ret) {
1210099a0e58SBosko Milekic 			if (hash_expand(&keg->uk_hash, &newhash)) {
1211099a0e58SBosko Milekic 				oldhash = keg->uk_hash;
1212099a0e58SBosko Milekic 				keg->uk_hash = newhash;
12130aef6126SJeff Roberson 			} else
12140aef6126SJeff Roberson 				oldhash = newhash;
12150aef6126SJeff Roberson 
12168b987a77SJeff Roberson 			KEG_UNLOCK(keg, 0);
12170aef6126SJeff Roberson 			hash_free(&oldhash);
12182760658bSAlexander Motin 			goto trim;
12190aef6126SJeff Roberson 		}
12205300d9ddSJeff Roberson 	}
12218b987a77SJeff Roberson 	KEG_UNLOCK(keg, 0);
1222e20a199fSJeff Roberson 
12232760658bSAlexander Motin trim:
12242760658bSAlexander Motin 	/* Trim caches not used for a long time. */
1225cf907074SAndrew Gallatin 	if ((zone->uz_flags & (UMA_ZONE_UNMANAGED | UMA_ZONE_NOTRIM)) == 0) {
12262760658bSAlexander Motin 		for (int i = 0; i < vm_ndomains; i++) {
12272760658bSAlexander Motin 			if (bucket_cache_reclaim_domain(zone, false, false, i) &&
12282760658bSAlexander Motin 			    (zone->uz_flags & UMA_ZFLAG_CACHE) == 0)
12292760658bSAlexander Motin 				keg_drain(zone->uz_keg, i);
12302760658bSAlexander Motin 		}
12318355f576SJeff Roberson 	}
1232389a3fa6SMark Johnston }
12338355f576SJeff Roberson 
12348355f576SJeff Roberson /*
12355300d9ddSJeff Roberson  * Allocate and zero fill the next sized hash table from the appropriate
12365300d9ddSJeff Roberson  * backing store.
12375300d9ddSJeff Roberson  *
12385300d9ddSJeff Roberson  * Arguments:
12390aef6126SJeff Roberson  *	hash  A new hash structure with the old hash size in uh_hashsize
12405300d9ddSJeff Roberson  *
12415300d9ddSJeff Roberson  * Returns:
1242763df3ecSPedro F. Giffuni  *	1 on success and 0 on failure.
12435300d9ddSJeff Roberson  */
124437c84183SPoul-Henning Kamp static int
hash_alloc(struct uma_hash * hash,u_int size)12453b2f2cb8SAlexander Motin hash_alloc(struct uma_hash *hash, u_int size)
12465300d9ddSJeff Roberson {
124759568a0eSAlexander Motin 	size_t alloc;
12485300d9ddSJeff Roberson 
12493b2f2cb8SAlexander Motin 	KASSERT(powerof2(size), ("hash size must be power of 2"));
12503b2f2cb8SAlexander Motin 	if (size > UMA_HASH_SIZE_INIT)  {
12513b2f2cb8SAlexander Motin 		hash->uh_hashsize = size;
12520aef6126SJeff Roberson 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
12531e0701e1SJeff Roberson 		hash->uh_slab_hash = malloc(alloc, M_UMAHASH, M_NOWAIT);
12545300d9ddSJeff Roberson 	} else {
12550aef6126SJeff Roberson 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
1256e20a199fSJeff Roberson 		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
1257ab3185d1SJeff Roberson 		    UMA_ANYDOMAIN, M_WAITOK);
12580aef6126SJeff Roberson 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
12595300d9ddSJeff Roberson 	}
12600aef6126SJeff Roberson 	if (hash->uh_slab_hash) {
12610aef6126SJeff Roberson 		bzero(hash->uh_slab_hash, alloc);
12620aef6126SJeff Roberson 		hash->uh_hashmask = hash->uh_hashsize - 1;
12630aef6126SJeff Roberson 		return (1);
12640aef6126SJeff Roberson 	}
12655300d9ddSJeff Roberson 
12660aef6126SJeff Roberson 	return (0);
12675300d9ddSJeff Roberson }
12685300d9ddSJeff Roberson 
12695300d9ddSJeff Roberson /*
127064f051e9SJeff Roberson  * Expands the hash table for HASH zones.  This is done from zone_timeout
127164f051e9SJeff Roberson  * to reduce collisions.  This must not be done in the regular allocation
127264f051e9SJeff Roberson  * path, otherwise, we can recurse on the vm while allocating pages.
12738355f576SJeff Roberson  *
12748355f576SJeff Roberson  * Arguments:
12750aef6126SJeff Roberson  *	oldhash  The hash you want to expand
12760aef6126SJeff Roberson  *	newhash  The hash structure for the new table
12778355f576SJeff Roberson  *
12788355f576SJeff Roberson  * Returns:
12798355f576SJeff Roberson  *	Nothing
12808355f576SJeff Roberson  *
12818355f576SJeff Roberson  * Discussion:
12828355f576SJeff Roberson  */
12830aef6126SJeff Roberson static int
hash_expand(struct uma_hash * oldhash,struct uma_hash * newhash)12840aef6126SJeff Roberson hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
12858355f576SJeff Roberson {
12861e0701e1SJeff Roberson 	uma_hash_slab_t slab;
12876929b7d1SPedro F. Giffuni 	u_int hval;
12886929b7d1SPedro F. Giffuni 	u_int idx;
12898355f576SJeff Roberson 
12900aef6126SJeff Roberson 	if (!newhash->uh_slab_hash)
12910aef6126SJeff Roberson 		return (0);
12928355f576SJeff Roberson 
12930aef6126SJeff Roberson 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
12940aef6126SJeff Roberson 		return (0);
12958355f576SJeff Roberson 
12968355f576SJeff Roberson 	/*
12978355f576SJeff Roberson 	 * I need to investigate hash algorithms for resizing without a
12988355f576SJeff Roberson 	 * full rehash.
12998355f576SJeff Roberson 	 */
13008355f576SJeff Roberson 
13016929b7d1SPedro F. Giffuni 	for (idx = 0; idx < oldhash->uh_hashsize; idx++)
13021e0701e1SJeff Roberson 		while (!LIST_EMPTY(&oldhash->uh_slab_hash[idx])) {
13031e0701e1SJeff Roberson 			slab = LIST_FIRST(&oldhash->uh_slab_hash[idx]);
13041e0701e1SJeff Roberson 			LIST_REMOVE(slab, uhs_hlink);
13051e0701e1SJeff Roberson 			hval = UMA_HASH(newhash, slab->uhs_data);
13061e0701e1SJeff Roberson 			LIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
13071e0701e1SJeff Roberson 			    slab, uhs_hlink);
13088355f576SJeff Roberson 		}
13098355f576SJeff Roberson 
13100aef6126SJeff Roberson 	return (1);
13119c2cd7e5SJeff Roberson }
13129c2cd7e5SJeff Roberson 
13135300d9ddSJeff Roberson /*
13145300d9ddSJeff Roberson  * Free the hash bucket to the appropriate backing store.
13155300d9ddSJeff Roberson  *
13165300d9ddSJeff Roberson  * Arguments:
13175300d9ddSJeff Roberson  *	slab_hash  The hash bucket we're freeing
13185300d9ddSJeff Roberson  *	hashsize   The number of entries in that hash bucket
13195300d9ddSJeff Roberson  *
13205300d9ddSJeff Roberson  * Returns:
13215300d9ddSJeff Roberson  *	Nothing
13225300d9ddSJeff Roberson  */
13239c2cd7e5SJeff Roberson static void
hash_free(struct uma_hash * hash)13240aef6126SJeff Roberson hash_free(struct uma_hash *hash)
13259c2cd7e5SJeff Roberson {
13260aef6126SJeff Roberson 	if (hash->uh_slab_hash == NULL)
13270aef6126SJeff Roberson 		return;
13280aef6126SJeff Roberson 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
13290095a784SJeff Roberson 		zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
13308355f576SJeff Roberson 	else
1331961647dfSJeff Roberson 		free(hash->uh_slab_hash, M_UMAHASH);
13328355f576SJeff Roberson }
13338355f576SJeff Roberson 
13348355f576SJeff Roberson /*
13358355f576SJeff Roberson  * Frees all outstanding items in a bucket
13368355f576SJeff Roberson  *
13378355f576SJeff Roberson  * Arguments:
13388355f576SJeff Roberson  *	zone   The zone to free to, must be unlocked.
13394bd61e19SJeff Roberson  *	bucket The free/alloc bucket with items.
13408355f576SJeff Roberson  *
13418355f576SJeff Roberson  * Returns:
13428355f576SJeff Roberson  *	Nothing
13438355f576SJeff Roberson  */
13448355f576SJeff Roberson static void
bucket_drain(uma_zone_t zone,uma_bucket_t bucket)13458355f576SJeff Roberson bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
13468355f576SJeff Roberson {
13470095a784SJeff Roberson 	int i;
13488355f576SJeff Roberson 
1349c6fd3e23SJeff Roberson 	if (bucket->ub_cnt == 0)
13508355f576SJeff Roberson 		return;
13518355f576SJeff Roberson 
1352d4665eaaSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0 &&
1353d4665eaaSJeff Roberson 	    bucket->ub_seq != SMR_SEQ_INVALID) {
1354d4665eaaSJeff Roberson 		smr_wait(zone->uz_smr, bucket->ub_seq);
1355543117beSJeff Roberson 		bucket->ub_seq = SMR_SEQ_INVALID;
1356d4665eaaSJeff Roberson 		for (i = 0; i < bucket->ub_cnt; i++)
1357d4665eaaSJeff Roberson 			item_dtor(zone, bucket->ub_bucket[i],
1358d4665eaaSJeff Roberson 			    zone->uz_size, NULL, SKIP_NONE);
1359d4665eaaSJeff Roberson 	}
13600095a784SJeff Roberson 	if (zone->uz_fini)
136109c8cb71SMark Johnston 		for (i = 0; i < bucket->ub_cnt; i++) {
136209c8cb71SMark Johnston 			kasan_mark_item_valid(zone, bucket->ub_bucket[i]);
13630095a784SJeff Roberson 			zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
136409c8cb71SMark Johnston 			kasan_mark_item_invalid(zone, bucket->ub_bucket[i]);
136509c8cb71SMark Johnston 		}
13660095a784SJeff Roberson 	zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
13674bd61e19SJeff Roberson 	if (zone->uz_max_items > 0)
13684bd61e19SJeff Roberson 		zone_free_limit(zone, bucket->ub_cnt);
1369d4665eaaSJeff Roberson #ifdef INVARIANTS
1370d4665eaaSJeff Roberson 	bzero(bucket->ub_bucket, sizeof(void *) * bucket->ub_cnt);
1371d4665eaaSJeff Roberson #endif
13720095a784SJeff Roberson 	bucket->ub_cnt = 0;
13738355f576SJeff Roberson }
13748355f576SJeff Roberson 
13758355f576SJeff Roberson /*
13768355f576SJeff Roberson  * Drains the per cpu caches for a zone.
13778355f576SJeff Roberson  *
1378727c6918SJeff Roberson  * NOTE: This may only be called while the zone is being torn down, and not
13795d1ae027SRobert Watson  * during normal operation.  This is necessary in order that we do not have
13805d1ae027SRobert Watson  * to migrate CPUs to drain the per-CPU caches.
13815d1ae027SRobert Watson  *
13828355f576SJeff Roberson  * Arguments:
13838355f576SJeff Roberson  *	zone     The zone to drain, must be unlocked.
13848355f576SJeff Roberson  *
13858355f576SJeff Roberson  * Returns:
13868355f576SJeff Roberson  *	Nothing
13878355f576SJeff Roberson  */
13888355f576SJeff Roberson static void
cache_drain(uma_zone_t zone)13899643769aSJeff Roberson cache_drain(uma_zone_t zone)
13908355f576SJeff Roberson {
13918355f576SJeff Roberson 	uma_cache_t cache;
1392376b1ba3SJeff Roberson 	uma_bucket_t bucket;
1393543117beSJeff Roberson 	smr_seq_t seq;
13948355f576SJeff Roberson 	int cpu;
13958355f576SJeff Roberson 
13968355f576SJeff Roberson 	/*
13975d1ae027SRobert Watson 	 * XXX: It is safe to not lock the per-CPU caches, because we're
13985d1ae027SRobert Watson 	 * tearing down the zone anyway.  I.e., there will be no further use
13995d1ae027SRobert Watson 	 * of the caches at this point.
14005d1ae027SRobert Watson 	 *
14015d1ae027SRobert Watson 	 * XXX: It would good to be able to assert that the zone is being
14025d1ae027SRobert Watson 	 * torn down to prevent improper use of cache_drain().
14038355f576SJeff Roberson 	 */
1404543117beSJeff Roberson 	seq = SMR_SEQ_INVALID;
1405543117beSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
1406226dd6dbSJeff Roberson 		seq = smr_advance(zone->uz_smr);
14073aa6d94eSJohn Baldwin 	CPU_FOREACH(cpu) {
14088355f576SJeff Roberson 		cache = &zone->uz_cpu[cpu];
1409376b1ba3SJeff Roberson 		bucket = cache_bucket_unload_alloc(cache);
1410c6fd3e23SJeff Roberson 		if (bucket != NULL)
1411376b1ba3SJeff Roberson 			bucket_free(zone, bucket, NULL);
1412376b1ba3SJeff Roberson 		bucket = cache_bucket_unload_free(cache);
1413376b1ba3SJeff Roberson 		if (bucket != NULL) {
1414543117beSJeff Roberson 			bucket->ub_seq = seq;
1415376b1ba3SJeff Roberson 			bucket_free(zone, bucket, NULL);
1416376b1ba3SJeff Roberson 		}
1417376b1ba3SJeff Roberson 		bucket = cache_bucket_unload_cross(cache);
1418376b1ba3SJeff Roberson 		if (bucket != NULL) {
1419543117beSJeff Roberson 			bucket->ub_seq = seq;
1420376b1ba3SJeff Roberson 			bucket_free(zone, bucket, NULL);
1421376b1ba3SJeff Roberson 		}
1422d56368d7SBosko Milekic 	}
1423aabe13f1SMark Johnston 	bucket_cache_reclaim(zone, true, UMA_ANYDOMAIN);
1424aaa8bb16SJeff Roberson }
1425aaa8bb16SJeff Roberson 
1426a2de44abSAlexander Motin static void
cache_shrink(uma_zone_t zone,void * unused)142720a4e154SJeff Roberson cache_shrink(uma_zone_t zone, void *unused)
1428a2de44abSAlexander Motin {
1429a2de44abSAlexander Motin 
1430a2de44abSAlexander Motin 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1431a2de44abSAlexander Motin 		return;
1432a2de44abSAlexander Motin 
1433aabe13f1SMark Johnston 	ZONE_LOCK(zone);
143420a4e154SJeff Roberson 	zone->uz_bucket_size =
143520a4e154SJeff Roberson 	    (zone->uz_bucket_size_min + zone->uz_bucket_size) / 2;
1436aabe13f1SMark Johnston 	ZONE_UNLOCK(zone);
1437a2de44abSAlexander Motin }
1438a2de44abSAlexander Motin 
1439a2de44abSAlexander Motin static void
cache_drain_safe_cpu(uma_zone_t zone,void * unused)144020a4e154SJeff Roberson cache_drain_safe_cpu(uma_zone_t zone, void *unused)
1441a2de44abSAlexander Motin {
1442a2de44abSAlexander Motin 	uma_cache_t cache;
1443c1685086SJeff Roberson 	uma_bucket_t b1, b2, b3;
1444ab3185d1SJeff Roberson 	int domain;
1445a2de44abSAlexander Motin 
1446a2de44abSAlexander Motin 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1447a2de44abSAlexander Motin 		return;
1448a2de44abSAlexander Motin 
1449c1685086SJeff Roberson 	b1 = b2 = b3 = NULL;
1450a2de44abSAlexander Motin 	critical_enter();
1451a2de44abSAlexander Motin 	cache = &zone->uz_cpu[curcpu];
1452c6fd3e23SJeff Roberson 	domain = PCPU_GET(domain);
1453376b1ba3SJeff Roberson 	b1 = cache_bucket_unload_alloc(cache);
1454d4665eaaSJeff Roberson 
1455d4665eaaSJeff Roberson 	/*
1456d4665eaaSJeff Roberson 	 * Don't flush SMR zone buckets.  This leaves the zone without a
1457d4665eaaSJeff Roberson 	 * bucket and forces every free to synchronize().
1458d4665eaaSJeff Roberson 	 */
1459543117beSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) == 0) {
1460376b1ba3SJeff Roberson 		b2 = cache_bucket_unload_free(cache);
1461543117beSJeff Roberson 		b3 = cache_bucket_unload_cross(cache);
1462543117beSJeff Roberson 	}
1463543117beSJeff Roberson 	critical_exit();
1464543117beSJeff Roberson 
1465543117beSJeff Roberson 	if (b1 != NULL)
1466c6fd3e23SJeff Roberson 		zone_free_bucket(zone, b1, NULL, domain, false);
1467543117beSJeff Roberson 	if (b2 != NULL)
1468c6fd3e23SJeff Roberson 		zone_free_bucket(zone, b2, NULL, domain, false);
1469543117beSJeff Roberson 	if (b3 != NULL) {
1470c6fd3e23SJeff Roberson 		/* Adjust the domain so it goes to zone_free_cross. */
1471c6fd3e23SJeff Roberson 		domain = (domain + 1) % vm_ndomains;
1472c6fd3e23SJeff Roberson 		zone_free_bucket(zone, b3, NULL, domain, false);
1473c1685086SJeff Roberson 	}
1474a2de44abSAlexander Motin }
1475a2de44abSAlexander Motin 
1476a2de44abSAlexander Motin /*
1477a2de44abSAlexander Motin  * Safely drain per-CPU caches of a zone(s) to alloc bucket.
1478a2de44abSAlexander Motin  * This is an expensive call because it needs to bind to all CPUs
1479a2de44abSAlexander Motin  * one by one and enter a critical section on each of them in order
1480a2de44abSAlexander Motin  * to safely access their cache buckets.
1481a2de44abSAlexander Motin  * Zone lock must not be held on call this function.
1482a2de44abSAlexander Motin  */
1483a2de44abSAlexander Motin static void
pcpu_cache_drain_safe(uma_zone_t zone)148408cfa56eSMark Johnston pcpu_cache_drain_safe(uma_zone_t zone)
1485a2de44abSAlexander Motin {
1486a2de44abSAlexander Motin 	int cpu;
1487a2de44abSAlexander Motin 
1488a2de44abSAlexander Motin 	/*
1489727c6918SJeff Roberson 	 * Polite bucket sizes shrinking was not enough, shrink aggressively.
1490a2de44abSAlexander Motin 	 */
1491a2de44abSAlexander Motin 	if (zone)
149220a4e154SJeff Roberson 		cache_shrink(zone, NULL);
1493a2de44abSAlexander Motin 	else
149420a4e154SJeff Roberson 		zone_foreach(cache_shrink, NULL);
1495a2de44abSAlexander Motin 
1496a2de44abSAlexander Motin 	CPU_FOREACH(cpu) {
1497a2de44abSAlexander Motin 		thread_lock(curthread);
1498a2de44abSAlexander Motin 		sched_bind(curthread, cpu);
1499a2de44abSAlexander Motin 		thread_unlock(curthread);
1500a2de44abSAlexander Motin 
1501a2de44abSAlexander Motin 		if (zone)
150220a4e154SJeff Roberson 			cache_drain_safe_cpu(zone, NULL);
1503a2de44abSAlexander Motin 		else
150420a4e154SJeff Roberson 			zone_foreach(cache_drain_safe_cpu, NULL);
1505a2de44abSAlexander Motin 	}
1506a2de44abSAlexander Motin 	thread_lock(curthread);
1507a2de44abSAlexander Motin 	sched_unbind(curthread);
1508a2de44abSAlexander Motin 	thread_unlock(curthread);
1509a2de44abSAlexander Motin }
1510a2de44abSAlexander Motin 
1511aaa8bb16SJeff Roberson /*
151208cfa56eSMark Johnston  * Reclaim cached buckets from a zone.  All buckets are reclaimed if the caller
151308cfa56eSMark Johnston  * requested a drain, otherwise the per-domain caches are trimmed to either
151408cfa56eSMark Johnston  * estimated working set size.
1515aaa8bb16SJeff Roberson  */
15162760658bSAlexander Motin static bool
bucket_cache_reclaim_domain(uma_zone_t zone,bool drain,bool trim,int domain)15172760658bSAlexander Motin bucket_cache_reclaim_domain(uma_zone_t zone, bool drain, bool trim, int domain)
1518aaa8bb16SJeff Roberson {
1519ab3185d1SJeff Roberson 	uma_zone_domain_t zdom;
1520aaa8bb16SJeff Roberson 	uma_bucket_t bucket;
1521c6fd3e23SJeff Roberson 	long target;
15222760658bSAlexander Motin 	bool done = false;
15238355f576SJeff Roberson 
1524c6fd3e23SJeff Roberson 	/*
152591d947bfSJeff Roberson 	 * The cross bucket is partially filled and not part of
152691d947bfSJeff Roberson 	 * the item count.  Reclaim it individually here.
152791d947bfSJeff Roberson 	 */
152854f421f9SMark Johnston 	zdom = ZDOM_GET(zone, domain);
1529226dd6dbSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) == 0 || drain) {
153091d947bfSJeff Roberson 		ZONE_CROSS_LOCK(zone);
153191d947bfSJeff Roberson 		bucket = zdom->uzd_cross;
153291d947bfSJeff Roberson 		zdom->uzd_cross = NULL;
153391d947bfSJeff Roberson 		ZONE_CROSS_UNLOCK(zone);
1534c6fd3e23SJeff Roberson 		if (bucket != NULL)
153591d947bfSJeff Roberson 			bucket_free(zone, bucket, NULL);
153691d947bfSJeff Roberson 	}
153791d947bfSJeff Roberson 
153891d947bfSJeff Roberson 	/*
153908cfa56eSMark Johnston 	 * If we were asked to drain the zone, we are done only once
15402760658bSAlexander Motin 	 * this bucket cache is empty.  If trim, we reclaim items in
15412760658bSAlexander Motin 	 * excess of the zone's estimated working set size.  Multiple
15422760658bSAlexander Motin 	 * consecutive calls will shrink the WSS and so reclaim more.
15432760658bSAlexander Motin 	 * If neither drain nor trim, then voluntarily reclaim 1/4
15442760658bSAlexander Motin 	 * (to reduce first spike) of items not used for a long time.
154508cfa56eSMark Johnston 	 */
1546c6fd3e23SJeff Roberson 	ZDOM_LOCK(zdom);
15472760658bSAlexander Motin 	zone_domain_update_wss(zdom);
15482760658bSAlexander Motin 	if (drain)
15492760658bSAlexander Motin 		target = 0;
15502760658bSAlexander Motin 	else if (trim)
15512760658bSAlexander Motin 		target = zdom->uzd_wss;
15522760658bSAlexander Motin 	else if (zdom->uzd_timin > 900 / UMA_TIMEOUT)
15532760658bSAlexander Motin 		target = zdom->uzd_nitems - zdom->uzd_limin / 4;
15542760658bSAlexander Motin 	else {
15552760658bSAlexander Motin 		ZDOM_UNLOCK(zdom);
15562760658bSAlexander Motin 		return (done);
15572760658bSAlexander Motin 	}
15582760658bSAlexander Motin 	while ((bucket = STAILQ_FIRST(&zdom->uzd_buckets)) != NULL &&
15592760658bSAlexander Motin 	    zdom->uzd_nitems >= target + bucket->ub_cnt) {
1560c6fd3e23SJeff Roberson 		bucket = zone_fetch_bucket(zone, zdom, true);
156108cfa56eSMark Johnston 		if (bucket == NULL)
156208cfa56eSMark Johnston 			break;
15636fd34d6fSJeff Roberson 		bucket_free(zone, bucket, NULL);
15642760658bSAlexander Motin 		done = true;
1565c6fd3e23SJeff Roberson 		ZDOM_LOCK(zdom);
15668355f576SJeff Roberson 	}
1567c6fd3e23SJeff Roberson 	ZDOM_UNLOCK(zdom);
15682760658bSAlexander Motin 	return (done);
1569ab3185d1SJeff Roberson }
157054f421f9SMark Johnston 
157154f421f9SMark Johnston static void
bucket_cache_reclaim(uma_zone_t zone,bool drain,int domain)1572aabe13f1SMark Johnston bucket_cache_reclaim(uma_zone_t zone, bool drain, int domain)
157354f421f9SMark Johnston {
157454f421f9SMark Johnston 	int i;
157554f421f9SMark Johnston 
157654f421f9SMark Johnston 	/*
157754f421f9SMark Johnston 	 * Shrink the zone bucket size to ensure that the per-CPU caches
157854f421f9SMark Johnston 	 * don't grow too large.
157954f421f9SMark Johnston 	 */
158054f421f9SMark Johnston 	if (zone->uz_bucket_size > zone->uz_bucket_size_min)
158154f421f9SMark Johnston 		zone->uz_bucket_size--;
158254f421f9SMark Johnston 
1583aabe13f1SMark Johnston 	if (domain != UMA_ANYDOMAIN &&
1584aabe13f1SMark Johnston 	    (zone->uz_flags & UMA_ZONE_ROUNDROBIN) == 0) {
15852760658bSAlexander Motin 		bucket_cache_reclaim_domain(zone, drain, true, domain);
1586aabe13f1SMark Johnston 	} else {
158754f421f9SMark Johnston 		for (i = 0; i < vm_ndomains; i++)
15882760658bSAlexander Motin 			bucket_cache_reclaim_domain(zone, drain, true, i);
15898355f576SJeff Roberson 	}
1590aabe13f1SMark Johnston }
1591fc03d22bSJeff Roberson 
1592fc03d22bSJeff Roberson static void
keg_free_slab(uma_keg_t keg,uma_slab_t slab,int start)1593fc03d22bSJeff Roberson keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
1594fc03d22bSJeff Roberson {
1595fc03d22bSJeff Roberson 	uint8_t *mem;
159609c8cb71SMark Johnston 	size_t size;
1597fc03d22bSJeff Roberson 	int i;
1598fc03d22bSJeff Roberson 	uint8_t flags;
1599fc03d22bSJeff Roberson 
16001431a748SGleb Smirnoff 	CTR4(KTR_UMA, "keg_free_slab keg %s(%p) slab %p, returning %d bytes",
16011431a748SGleb Smirnoff 	    keg->uk_name, keg, slab, PAGE_SIZE * keg->uk_ppera);
16021431a748SGleb Smirnoff 
16031e0701e1SJeff Roberson 	mem = slab_data(slab, keg);
160409c8cb71SMark Johnston 	size = PAGE_SIZE * keg->uk_ppera;
160509c8cb71SMark Johnston 
160609c8cb71SMark Johnston 	kasan_mark_slab_valid(keg, mem);
1607fc03d22bSJeff Roberson 	if (keg->uk_fini != NULL) {
160809c8cb71SMark Johnston 		for (i = start - 1; i > -1; i--)
1609c5deaf04SGleb Smirnoff #ifdef INVARIANTS
1610c5deaf04SGleb Smirnoff 		/*
1611c5deaf04SGleb Smirnoff 		 * trash_fini implies that dtor was trash_dtor. trash_fini
1612c5deaf04SGleb Smirnoff 		 * would check that memory hasn't been modified since free,
1613c5deaf04SGleb Smirnoff 		 * which executed trash_dtor.
1614c5deaf04SGleb Smirnoff 		 * That's why we need to run uma_dbg_kskip() check here,
1615c5deaf04SGleb Smirnoff 		 * albeit we don't make skip check for other init/fini
1616c5deaf04SGleb Smirnoff 		 * invocations.
1617c5deaf04SGleb Smirnoff 		 */
16181e0701e1SJeff Roberson 		if (!uma_dbg_kskip(keg, slab_item(slab, keg, i)) ||
1619c5deaf04SGleb Smirnoff 		    keg->uk_fini != trash_fini)
1620c5deaf04SGleb Smirnoff #endif
16211e0701e1SJeff Roberson 			keg->uk_fini(slab_item(slab, keg, i), keg->uk_size);
1622fc03d22bSJeff Roberson 	}
162309c8cb71SMark Johnston 	flags = slab->us_flags;
162409c8cb71SMark Johnston 	if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) {
16259b8db4d0SRyan Libby 		zone_free_item(slabzone(keg->uk_ipers), slab_tohashslab(slab),
16269b8db4d0SRyan Libby 		    NULL, SKIP_NONE);
162709c8cb71SMark Johnston 	}
162809c8cb71SMark Johnston 	keg->uk_freef(mem, size, flags);
162909c8cb71SMark Johnston 	uma_total_dec(size);
16308355f576SJeff Roberson }
16318355f576SJeff Roberson 
1632f09cbea3SMark Johnston static void
keg_drain_domain(uma_keg_t keg,int domain)1633f09cbea3SMark Johnston keg_drain_domain(uma_keg_t keg, int domain)
1634f09cbea3SMark Johnston {
1635f09cbea3SMark Johnston 	struct slabhead freeslabs;
1636f09cbea3SMark Johnston 	uma_domain_t dom;
1637f09cbea3SMark Johnston 	uma_slab_t slab, tmp;
1638f09cbea3SMark Johnston 	uint32_t i, stofree, stokeep, partial;
1639f09cbea3SMark Johnston 
1640f09cbea3SMark Johnston 	dom = &keg->uk_domain[domain];
1641f09cbea3SMark Johnston 	LIST_INIT(&freeslabs);
1642f09cbea3SMark Johnston 
1643f09cbea3SMark Johnston 	CTR4(KTR_UMA, "keg_drain %s(%p) domain %d free items: %u",
1644575a4437SEd Maste 	    keg->uk_name, keg, domain, dom->ud_free_items);
1645f09cbea3SMark Johnston 
1646f09cbea3SMark Johnston 	KEG_LOCK(keg, domain);
1647f09cbea3SMark Johnston 
1648f09cbea3SMark Johnston 	/*
1649f09cbea3SMark Johnston 	 * Are the free items in partially allocated slabs sufficient to meet
1650f09cbea3SMark Johnston 	 * the reserve? If not, compute the number of fully free slabs that must
1651f09cbea3SMark Johnston 	 * be kept.
1652f09cbea3SMark Johnston 	 */
1653f09cbea3SMark Johnston 	partial = dom->ud_free_items - dom->ud_free_slabs * keg->uk_ipers;
1654f09cbea3SMark Johnston 	if (partial < keg->uk_reserve) {
1655f09cbea3SMark Johnston 		stokeep = min(dom->ud_free_slabs,
1656f09cbea3SMark Johnston 		    howmany(keg->uk_reserve - partial, keg->uk_ipers));
1657f09cbea3SMark Johnston 	} else {
1658f09cbea3SMark Johnston 		stokeep = 0;
1659f09cbea3SMark Johnston 	}
1660f09cbea3SMark Johnston 	stofree = dom->ud_free_slabs - stokeep;
1661f09cbea3SMark Johnston 
1662f09cbea3SMark Johnston 	/*
1663f09cbea3SMark Johnston 	 * Partition the free slabs into two sets: those that must be kept in
1664f09cbea3SMark Johnston 	 * order to maintain the reserve, and those that may be released back to
1665f09cbea3SMark Johnston 	 * the system.  Since one set may be much larger than the other,
1666f09cbea3SMark Johnston 	 * populate the smaller of the two sets and swap them if necessary.
1667f09cbea3SMark Johnston 	 */
1668f09cbea3SMark Johnston 	for (i = min(stofree, stokeep); i > 0; i--) {
1669f09cbea3SMark Johnston 		slab = LIST_FIRST(&dom->ud_free_slab);
1670f09cbea3SMark Johnston 		LIST_REMOVE(slab, us_link);
1671f09cbea3SMark Johnston 		LIST_INSERT_HEAD(&freeslabs, slab, us_link);
1672f09cbea3SMark Johnston 	}
1673f09cbea3SMark Johnston 	if (stofree > stokeep)
1674f09cbea3SMark Johnston 		LIST_SWAP(&freeslabs, &dom->ud_free_slab, uma_slab, us_link);
1675f09cbea3SMark Johnston 
1676f09cbea3SMark Johnston 	if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0) {
1677f09cbea3SMark Johnston 		LIST_FOREACH(slab, &freeslabs, us_link)
1678f09cbea3SMark Johnston 			UMA_HASH_REMOVE(&keg->uk_hash, slab);
1679f09cbea3SMark Johnston 	}
1680f09cbea3SMark Johnston 	dom->ud_free_items -= stofree * keg->uk_ipers;
1681f09cbea3SMark Johnston 	dom->ud_free_slabs -= stofree;
1682f09cbea3SMark Johnston 	dom->ud_pages -= stofree * keg->uk_ppera;
1683f09cbea3SMark Johnston 	KEG_UNLOCK(keg, domain);
1684f09cbea3SMark Johnston 
1685f09cbea3SMark Johnston 	LIST_FOREACH_SAFE(slab, &freeslabs, us_link, tmp)
1686f09cbea3SMark Johnston 		keg_free_slab(keg, slab, keg->uk_ipers);
1687f09cbea3SMark Johnston }
1688f09cbea3SMark Johnston 
16898355f576SJeff Roberson /*
1690e20a199fSJeff Roberson  * Frees pages from a keg back to the system.  This is done on demand from
16918355f576SJeff Roberson  * the pageout daemon.
16928355f576SJeff Roberson  *
1693e20a199fSJeff Roberson  * Returns nothing.
16948355f576SJeff Roberson  */
1695e20a199fSJeff Roberson static void
keg_drain(uma_keg_t keg,int domain)1696aabe13f1SMark Johnston keg_drain(uma_keg_t keg, int domain)
16978355f576SJeff Roberson {
1698f09cbea3SMark Johnston 	int i;
16998355f576SJeff Roberson 
1700f09cbea3SMark Johnston 	if ((keg->uk_flags & UMA_ZONE_NOFREE) != 0)
17018355f576SJeff Roberson 		return;
1702aabe13f1SMark Johnston 	if (domain != UMA_ANYDOMAIN) {
1703aabe13f1SMark Johnston 		keg_drain_domain(keg, domain);
1704aabe13f1SMark Johnston 	} else {
1705f09cbea3SMark Johnston 		for (i = 0; i < vm_ndomains; i++)
1706f09cbea3SMark Johnston 			keg_drain_domain(keg, i);
17078355f576SJeff Roberson 	}
1708aabe13f1SMark Johnston }
17098355f576SJeff Roberson 
1710e20a199fSJeff Roberson static void
zone_reclaim(uma_zone_t zone,int domain,int waitok,bool drain)1711aabe13f1SMark Johnston zone_reclaim(uma_zone_t zone, int domain, int waitok, bool drain)
1712e20a199fSJeff Roberson {
17138355f576SJeff Roberson 	/*
1714aabe13f1SMark Johnston 	 * Count active reclaim operations in order to interlock with
1715aabe13f1SMark Johnston 	 * zone_dtor(), which removes the zone from global lists before
1716aabe13f1SMark Johnston 	 * attempting to reclaim items itself.
1717aabe13f1SMark Johnston 	 *
1718aabe13f1SMark Johnston 	 * The zone may be destroyed while sleeping, so only zone_dtor() should
1719aabe13f1SMark Johnston 	 * specify M_WAITOK.
1720e20a199fSJeff Roberson 	 */
1721e20a199fSJeff Roberson 	ZONE_LOCK(zone);
1722aabe13f1SMark Johnston 	if (waitok == M_WAITOK) {
1723aabe13f1SMark Johnston 		while (zone->uz_reclaimers > 0)
1724aabe13f1SMark Johnston 			msleep(zone, ZONE_LOCKPTR(zone), PVM, "zonedrain", 1);
1725e20a199fSJeff Roberson 	}
1726aabe13f1SMark Johnston 	zone->uz_reclaimers++;
1727e20a199fSJeff Roberson 	ZONE_UNLOCK(zone);
1728aabe13f1SMark Johnston 	bucket_cache_reclaim(zone, drain, domain);
172908cfa56eSMark Johnston 
173008034d10SKonstantin Belousov 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0)
1731aabe13f1SMark Johnston 		keg_drain(zone->uz_keg, domain);
1732e20a199fSJeff Roberson 	ZONE_LOCK(zone);
1733aabe13f1SMark Johnston 	zone->uz_reclaimers--;
1734aabe13f1SMark Johnston 	if (zone->uz_reclaimers == 0)
1735e20a199fSJeff Roberson 		wakeup(zone);
1736e20a199fSJeff Roberson 	ZONE_UNLOCK(zone);
1737e20a199fSJeff Roberson }
1738e20a199fSJeff Roberson 
1739e20a199fSJeff Roberson /*
17408b987a77SJeff Roberson  * Allocate a new slab for a keg and inserts it into the partial slab list.
17418b987a77SJeff Roberson  * The keg should be unlocked on entry.  If the allocation succeeds it will
17428b987a77SJeff Roberson  * be locked on return.
17438355f576SJeff Roberson  *
17448355f576SJeff Roberson  * Arguments:
174586220393SMark Johnston  *	flags   Wait flags for the item initialization routine
174686220393SMark Johnston  *	aflags  Wait flags for the slab allocation
17478355f576SJeff Roberson  *
17488355f576SJeff Roberson  * Returns:
17498355f576SJeff Roberson  *	The slab that was allocated or NULL if there is no memory and the
17508355f576SJeff Roberson  *	caller specified M_NOWAIT.
17518355f576SJeff Roberson  */
17528355f576SJeff Roberson static uma_slab_t
keg_alloc_slab(uma_keg_t keg,uma_zone_t zone,int domain,int flags,int aflags)175386220393SMark Johnston keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int flags,
175486220393SMark Johnston     int aflags)
17558355f576SJeff Roberson {
17568b987a77SJeff Roberson 	uma_domain_t dom;
1757099a0e58SBosko Milekic 	uma_slab_t slab;
17582e47807cSJeff Roberson 	unsigned long size;
175985dcf349SGleb Smirnoff 	uint8_t *mem;
176086220393SMark Johnston 	uint8_t sflags;
17618355f576SJeff Roberson 	int i;
17628355f576SJeff Roberson 
1763b9fd884aSColin Percival 	TSENTER();
1764b9fd884aSColin Percival 
1765ab3185d1SJeff Roberson 	KASSERT(domain >= 0 && domain < vm_ndomains,
1766ab3185d1SJeff Roberson 	    ("keg_alloc_slab: domain %d out of range", domain));
1767a553d4b8SJeff Roberson 
1768194a979eSMark Johnston 	slab = NULL;
1769194a979eSMark Johnston 	mem = NULL;
177054c5ae80SRyan Libby 	if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) {
17719b8db4d0SRyan Libby 		uma_hash_slab_t hslab;
17729b8db4d0SRyan Libby 		hslab = zone_alloc_item(slabzone(keg->uk_ipers), NULL,
17739b8db4d0SRyan Libby 		    domain, aflags);
17749b8db4d0SRyan Libby 		if (hslab == NULL)
1775727c6918SJeff Roberson 			goto fail;
17769b8db4d0SRyan Libby 		slab = &hslab->uhs_slab;
1777a553d4b8SJeff Roberson 	}
1778a553d4b8SJeff Roberson 
17793370c5bfSJeff Roberson 	/*
17803370c5bfSJeff Roberson 	 * This reproduces the old vm_zone behavior of zero filling pages the
17813370c5bfSJeff Roberson 	 * first time they are added to a zone.
17823370c5bfSJeff Roberson 	 *
17833370c5bfSJeff Roberson 	 * Malloced items are zeroed in uma_zalloc.
17843370c5bfSJeff Roberson 	 */
17853370c5bfSJeff Roberson 
1786099a0e58SBosko Milekic 	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
178786220393SMark Johnston 		aflags |= M_ZERO;
17883370c5bfSJeff Roberson 	else
178986220393SMark Johnston 		aflags &= ~M_ZERO;
17903370c5bfSJeff Roberson 
1791263811f7SKip Macy 	if (keg->uk_flags & UMA_ZONE_NODUMP)
179286220393SMark Johnston 		aflags |= M_NODUMP;
1793263811f7SKip Macy 
179478f3e0f6SBojan Novković 	if (keg->uk_flags & UMA_ZONE_NOFREE)
179578f3e0f6SBojan Novković 		aflags |= M_NEVERFREED;
179678f3e0f6SBojan Novković 
1797e20a199fSJeff Roberson 	/* zone is passed for legacy reasons. */
1798194a979eSMark Johnston 	size = keg->uk_ppera * PAGE_SIZE;
179909c8cb71SMark Johnston 	mem = keg->uk_allocf(zone, size, domain, &sflags, aflags);
1800a553d4b8SJeff Roberson 	if (mem == NULL) {
180154c5ae80SRyan Libby 		if (keg->uk_flags & UMA_ZFLAG_OFFPAGE)
18029b8db4d0SRyan Libby 			zone_free_item(slabzone(keg->uk_ipers),
18039b8db4d0SRyan Libby 			    slab_tohashslab(slab), NULL, SKIP_NONE);
1804727c6918SJeff Roberson 		goto fail;
1805a553d4b8SJeff Roberson 	}
18062e47807cSJeff Roberson 	uma_total_inc(size);
18078355f576SJeff Roberson 
18088b987a77SJeff Roberson 	/* For HASH zones all pages go to the same uma_domain. */
180954c5ae80SRyan Libby 	if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0)
18108b987a77SJeff Roberson 		domain = 0;
18118b987a77SJeff Roberson 
1812a932a5a6SMark Johnston 	kmsan_mark(mem, size,
1813a932a5a6SMark Johnston 	    (aflags & M_ZERO) != 0 ? KMSAN_STATE_INITED : KMSAN_STATE_UNINIT);
1814a932a5a6SMark Johnston 
18155c0e403bSJeff Roberson 	/* Point the slab into the allocated memory */
181654c5ae80SRyan Libby 	if (!(keg->uk_flags & UMA_ZFLAG_OFFPAGE))
1817099a0e58SBosko Milekic 		slab = (uma_slab_t)(mem + keg->uk_pgoff);
18181e0701e1SJeff Roberson 	else
18199b8db4d0SRyan Libby 		slab_tohashslab(slab)->uhs_data = mem;
18205c0e403bSJeff Roberson 
182154c5ae80SRyan Libby 	if (keg->uk_flags & UMA_ZFLAG_VTOSLAB)
1822099a0e58SBosko Milekic 		for (i = 0; i < keg->uk_ppera; i++)
1823584061b4SJeff Roberson 			vsetzoneslab((vm_offset_t)mem + (i * PAGE_SIZE),
1824584061b4SJeff Roberson 			    zone, slab);
18258355f576SJeff Roberson 
1826099a0e58SBosko Milekic 	slab->us_freecount = keg->uk_ipers;
182786220393SMark Johnston 	slab->us_flags = sflags;
1828ab3185d1SJeff Roberson 	slab->us_domain = domain;
18298b987a77SJeff Roberson 
18309b78b1f4SJeff Roberson 	BIT_FILL(keg->uk_ipers, &slab->us_free);
1831ef72505eSJeff Roberson #ifdef INVARIANTS
1832815db204SRyan Libby 	BIT_ZERO(keg->uk_ipers, slab_dbg_bits(slab, keg));
1833ef72505eSJeff Roberson #endif
1834099a0e58SBosko Milekic 
1835b23f72e9SBrian Feldman 	if (keg->uk_init != NULL) {
1836099a0e58SBosko Milekic 		for (i = 0; i < keg->uk_ipers; i++)
18371e0701e1SJeff Roberson 			if (keg->uk_init(slab_item(slab, keg, i),
183886220393SMark Johnston 			    keg->uk_size, flags) != 0)
1839b23f72e9SBrian Feldman 				break;
1840b23f72e9SBrian Feldman 		if (i != keg->uk_ipers) {
1841fc03d22bSJeff Roberson 			keg_free_slab(keg, slab, i);
1842727c6918SJeff Roberson 			goto fail;
1843b23f72e9SBrian Feldman 		}
1844b23f72e9SBrian Feldman 	}
184509c8cb71SMark Johnston 	kasan_mark_slab_invalid(keg, mem);
18468b987a77SJeff Roberson 	KEG_LOCK(keg, domain);
18475c0e403bSJeff Roberson 
18481431a748SGleb Smirnoff 	CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)",
18491431a748SGleb Smirnoff 	    slab, keg->uk_name, keg);
18501431a748SGleb Smirnoff 
185154c5ae80SRyan Libby 	if (keg->uk_flags & UMA_ZFLAG_HASH)
1852099a0e58SBosko Milekic 		UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
18538355f576SJeff Roberson 
18548b987a77SJeff Roberson 	/*
18558b987a77SJeff Roberson 	 * If we got a slab here it's safe to mark it partially used
18568b987a77SJeff Roberson 	 * and return.  We assume that the caller is going to remove
18578b987a77SJeff Roberson 	 * at least one item.
18588b987a77SJeff Roberson 	 */
18598b987a77SJeff Roberson 	dom = &keg->uk_domain[domain];
18608b987a77SJeff Roberson 	LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
18618b987a77SJeff Roberson 	dom->ud_pages += keg->uk_ppera;
18624ab3aee8SMark Johnston 	dom->ud_free_items += keg->uk_ipers;
18638355f576SJeff Roberson 
1864b9fd884aSColin Percival 	TSEXIT();
18658355f576SJeff Roberson 	return (slab);
1866727c6918SJeff Roberson 
1867727c6918SJeff Roberson fail:
1868727c6918SJeff Roberson 	return (NULL);
18698355f576SJeff Roberson }
18708355f576SJeff Roberson 
18718355f576SJeff Roberson /*
1872537f92cdSMark Johnston  * This function is intended to be used early on in place of page_alloc().  It
1873537f92cdSMark Johnston  * performs contiguous physical memory allocations and uses a bump allocator for
1874537f92cdSMark Johnston  * KVA, so is usable before the kernel map is initialized.
1875009b6fcbSJeff Roberson  */
1876009b6fcbSJeff Roberson static void *
startup_alloc(uma_zone_t zone,vm_size_t bytes,int domain,uint8_t * pflag,int wait)1877ab3185d1SJeff Roberson startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1878ab3185d1SJeff Roberson     int wait)
1879009b6fcbSJeff Roberson {
1880a81c400eSJeff Roberson 	vm_paddr_t pa;
1881a81c400eSJeff Roberson 	vm_page_t m;
188284c39222SMark Johnston 	int i, pages;
1883099a0e58SBosko Milekic 
1884f7d35785SGleb Smirnoff 	pages = howmany(bytes, PAGE_SIZE);
1885f7d35785SGleb Smirnoff 	KASSERT(pages > 0, ("%s can't reserve 0 pages", __func__));
1886a81c400eSJeff Roberson 
1887f7d35785SGleb Smirnoff 	*pflag = UMA_SLAB_BOOT;
188884c39222SMark Johnston 	m = vm_page_alloc_noobj_contig_domain(domain, malloc2vm_flags(wait) |
188984c39222SMark Johnston 	    VM_ALLOC_WIRED, pages, (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0,
189084c39222SMark Johnston 	    VM_MEMATTR_DEFAULT);
1891a81c400eSJeff Roberson 	if (m == NULL)
1892a81c400eSJeff Roberson 		return (NULL);
1893a81c400eSJeff Roberson 
1894a81c400eSJeff Roberson 	pa = VM_PAGE_TO_PHYS(m);
1895a81c400eSJeff Roberson 	for (i = 0; i < pages; i++, pa += PAGE_SIZE) {
18960a44b8a5SBojan Novković #if MINIDUMP_PAGE_TRACKING && MINIDUMP_STARTUP_PAGE_TRACKING
1897a81c400eSJeff Roberson 		if ((wait & M_NODUMP) == 0)
1898a81c400eSJeff Roberson 			dump_add_page(pa);
1899a81c400eSJeff Roberson #endif
1900a81c400eSJeff Roberson 	}
1901f7d35785SGleb Smirnoff 
190284c39222SMark Johnston 	/* Allocate KVA and indirectly advance bootmem. */
190384c39222SMark Johnston 	return ((void *)pmap_map(&bootmem, m->phys_addr,
190484c39222SMark Johnston 	    m->phys_addr + (pages * PAGE_SIZE), VM_PROT_READ | VM_PROT_WRITE));
1905f7d35785SGleb Smirnoff }
1906f7d35785SGleb Smirnoff 
1907a81c400eSJeff Roberson static void
startup_free(void * mem,vm_size_t bytes)1908a81c400eSJeff Roberson startup_free(void *mem, vm_size_t bytes)
1909a81c400eSJeff Roberson {
1910a81c400eSJeff Roberson 	vm_offset_t va;
1911a81c400eSJeff Roberson 	vm_page_t m;
1912a81c400eSJeff Roberson 
1913a81c400eSJeff Roberson 	va = (vm_offset_t)mem;
1914a81c400eSJeff Roberson 	m = PHYS_TO_VM_PAGE(pmap_kextract(va));
1915663de81fSMark Johnston 
1916663de81fSMark Johnston 	/*
1917663de81fSMark Johnston 	 * startup_alloc() returns direct-mapped slabs on some platforms.  Avoid
1918663de81fSMark Johnston 	 * unmapping ranges of the direct map.
1919663de81fSMark Johnston 	 */
1920663de81fSMark Johnston 	if (va >= bootstart && va + bytes <= bootmem)
1921a81c400eSJeff Roberson 		pmap_remove(kernel_pmap, va, va + bytes);
1922a81c400eSJeff Roberson 	for (; bytes != 0; bytes -= PAGE_SIZE, m++) {
19230a44b8a5SBojan Novković #if MINIDUMP_PAGE_TRACKING && MINIDUMP_STARTUP_PAGE_TRACKING
1924a81c400eSJeff Roberson 		dump_drop_page(VM_PAGE_TO_PHYS(m));
1925a81c400eSJeff Roberson #endif
1926a81c400eSJeff Roberson 		vm_page_unwire_noq(m);
1927a81c400eSJeff Roberson 		vm_page_free(m);
1928a81c400eSJeff Roberson 	}
1929a81c400eSJeff Roberson }
1930a81c400eSJeff Roberson 
1931f7d35785SGleb Smirnoff /*
19328355f576SJeff Roberson  * Allocates a number of pages from the system
19338355f576SJeff Roberson  *
19348355f576SJeff Roberson  * Arguments:
19358355f576SJeff Roberson  *	bytes  The number of bytes requested
19368355f576SJeff Roberson  *	wait  Shall we wait?
19378355f576SJeff Roberson  *
19388355f576SJeff Roberson  * Returns:
19398355f576SJeff Roberson  *	A pointer to the alloced memory or possibly
19408355f576SJeff Roberson  *	NULL if M_NOWAIT is set.
19418355f576SJeff Roberson  */
19428355f576SJeff Roberson static void *
page_alloc(uma_zone_t zone,vm_size_t bytes,int domain,uint8_t * pflag,int wait)1943ab3185d1SJeff Roberson page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1944ab3185d1SJeff Roberson     int wait)
19458355f576SJeff Roberson {
19468355f576SJeff Roberson 	void *p;	/* Returned page */
19478355f576SJeff Roberson 
19482e47807cSJeff Roberson 	*pflag = UMA_SLAB_KERNEL;
1949f49fd63aSJohn Baldwin 	p = kmem_malloc_domainset(DOMAINSET_FIXED(domain), bytes, wait);
19508355f576SJeff Roberson 
19518355f576SJeff Roberson 	return (p);
19528355f576SJeff Roberson }
19538355f576SJeff Roberson 
1954ab3059a8SMatt Macy static void *
pcpu_page_alloc(uma_zone_t zone,vm_size_t bytes,int domain,uint8_t * pflag,int wait)1955ab3059a8SMatt Macy pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1956ab3059a8SMatt Macy     int wait)
1957ab3059a8SMatt Macy {
1958ab3059a8SMatt Macy 	struct pglist alloctail;
1959ab3059a8SMatt Macy 	vm_offset_t addr, zkva;
1960ab3059a8SMatt Macy 	int cpu, flags;
1961ab3059a8SMatt Macy 	vm_page_t p, p_next;
1962ab3059a8SMatt Macy #ifdef NUMA
1963ab3059a8SMatt Macy 	struct pcpu *pc;
1964ab3059a8SMatt Macy #endif
1965ab3059a8SMatt Macy 
1966ab3059a8SMatt Macy 	MPASS(bytes == (mp_maxid + 1) * PAGE_SIZE);
1967ab3059a8SMatt Macy 
1968013072f0SMark Johnston 	TAILQ_INIT(&alloctail);
1969a4667e09SMark Johnston 	flags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | malloc2vm_flags(wait);
1970013072f0SMark Johnston 	*pflag = UMA_SLAB_KERNEL;
1971ab3059a8SMatt Macy 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
1972ab3059a8SMatt Macy 		if (CPU_ABSENT(cpu)) {
1973a4667e09SMark Johnston 			p = vm_page_alloc_noobj(flags);
1974ab3059a8SMatt Macy 		} else {
1975ab3059a8SMatt Macy #ifndef NUMA
1976a4667e09SMark Johnston 			p = vm_page_alloc_noobj(flags);
1977ab3059a8SMatt Macy #else
1978ab3059a8SMatt Macy 			pc = pcpu_find(cpu);
197920526802SAndrew Gallatin 			if (__predict_false(VM_DOMAIN_EMPTY(pc->pc_domain)))
198020526802SAndrew Gallatin 				p = NULL;
198120526802SAndrew Gallatin 			else
1982a4667e09SMark Johnston 				p = vm_page_alloc_noobj_domain(pc->pc_domain,
1983a4667e09SMark Johnston 				    flags);
1984ab3059a8SMatt Macy 			if (__predict_false(p == NULL))
1985a4667e09SMark Johnston 				p = vm_page_alloc_noobj(flags);
1986ab3059a8SMatt Macy #endif
1987ab3059a8SMatt Macy 		}
1988ab3059a8SMatt Macy 		if (__predict_false(p == NULL))
1989ab3059a8SMatt Macy 			goto fail;
1990ab3059a8SMatt Macy 		TAILQ_INSERT_TAIL(&alloctail, p, listq);
1991ab3059a8SMatt Macy 	}
1992ab3059a8SMatt Macy 	if ((addr = kva_alloc(bytes)) == 0)
1993ab3059a8SMatt Macy 		goto fail;
1994ab3059a8SMatt Macy 	zkva = addr;
1995ab3059a8SMatt Macy 	TAILQ_FOREACH(p, &alloctail, listq) {
1996ab3059a8SMatt Macy 		pmap_qenter(zkva, &p, 1);
1997ab3059a8SMatt Macy 		zkva += PAGE_SIZE;
1998ab3059a8SMatt Macy 	}
1999ab3059a8SMatt Macy 	return ((void*)addr);
2000ab3059a8SMatt Macy fail:
2001ab3059a8SMatt Macy 	TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
200288ea538aSMark Johnston 		vm_page_unwire_noq(p);
2003ab3059a8SMatt Macy 		vm_page_free(p);
2004ab3059a8SMatt Macy 	}
2005ab3059a8SMatt Macy 	return (NULL);
2006ab3059a8SMatt Macy }
2007ab3059a8SMatt Macy 
20088355f576SJeff Roberson /*
2009a9d6f1feSMark Johnston  * Allocates a number of pages not belonging to a VM object
20108355f576SJeff Roberson  *
20118355f576SJeff Roberson  * Arguments:
20128355f576SJeff Roberson  *	bytes  The number of bytes requested
20138355f576SJeff Roberson  *	wait   Shall we wait?
20148355f576SJeff Roberson  *
20158355f576SJeff Roberson  * Returns:
20168355f576SJeff Roberson  *	A pointer to the alloced memory or possibly
20178355f576SJeff Roberson  *	NULL if M_NOWAIT is set.
20188355f576SJeff Roberson  */
20198355f576SJeff Roberson static void *
noobj_alloc(uma_zone_t zone,vm_size_t bytes,int domain,uint8_t * flags,int wait)2020ab3185d1SJeff Roberson noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
2021ab3185d1SJeff Roberson     int wait)
20228355f576SJeff Roberson {
2023a4915c21SAttilio Rao 	TAILQ_HEAD(, vm_page) alloctail;
2024a4915c21SAttilio Rao 	u_long npages;
2025b245ac95SAlan Cox 	vm_offset_t retkva, zkva;
2026a4915c21SAttilio Rao 	vm_page_t p, p_next;
2027e20a199fSJeff Roberson 	uma_keg_t keg;
2028a4667e09SMark Johnston 	int req;
20298355f576SJeff Roberson 
2030a4915c21SAttilio Rao 	TAILQ_INIT(&alloctail);
2031bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
2032a4667e09SMark Johnston 	req = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED;
2033a4667e09SMark Johnston 	if ((wait & M_WAITOK) != 0)
2034a4667e09SMark Johnston 		req |= VM_ALLOC_WAITOK;
2035a4915c21SAttilio Rao 
2036a4915c21SAttilio Rao 	npages = howmany(bytes, PAGE_SIZE);
2037a4915c21SAttilio Rao 	while (npages > 0) {
2038a4667e09SMark Johnston 		p = vm_page_alloc_noobj_domain(domain, req);
2039a4915c21SAttilio Rao 		if (p != NULL) {
2040a4915c21SAttilio Rao 			/*
2041a4915c21SAttilio Rao 			 * Since the page does not belong to an object, its
2042a4915c21SAttilio Rao 			 * listq is unused.
2043a4915c21SAttilio Rao 			 */
2044a4915c21SAttilio Rao 			TAILQ_INSERT_TAIL(&alloctail, p, listq);
2045a4915c21SAttilio Rao 			npages--;
2046a4915c21SAttilio Rao 			continue;
2047a4915c21SAttilio Rao 		}
20488355f576SJeff Roberson 		/*
2049a4915c21SAttilio Rao 		 * Page allocation failed, free intermediate pages and
2050a4915c21SAttilio Rao 		 * exit.
20518355f576SJeff Roberson 		 */
2052a4915c21SAttilio Rao 		TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
205388ea538aSMark Johnston 			vm_page_unwire_noq(p);
2054b245ac95SAlan Cox 			vm_page_free(p);
2055b245ac95SAlan Cox 		}
2056a4915c21SAttilio Rao 		return (NULL);
2057b245ac95SAlan Cox 	}
20588355f576SJeff Roberson 	*flags = UMA_SLAB_PRIV;
2059a4915c21SAttilio Rao 	zkva = keg->uk_kva +
2060a4915c21SAttilio Rao 	    atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
2061a4915c21SAttilio Rao 	retkva = zkva;
2062a4915c21SAttilio Rao 	TAILQ_FOREACH(p, &alloctail, listq) {
2063a4915c21SAttilio Rao 		pmap_qenter(zkva, &p, 1);
2064a4915c21SAttilio Rao 		zkva += PAGE_SIZE;
2065a4915c21SAttilio Rao 	}
20668355f576SJeff Roberson 
20678355f576SJeff Roberson 	return ((void *)retkva);
20688355f576SJeff Roberson }
20698355f576SJeff Roberson 
20708355f576SJeff Roberson /*
2071ec0d8280SRyan Libby  * Allocate physically contiguous pages.
2072ec0d8280SRyan Libby  */
2073ec0d8280SRyan Libby static void *
contig_alloc(uma_zone_t zone,vm_size_t bytes,int domain,uint8_t * pflag,int wait)2074ec0d8280SRyan Libby contig_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
2075ec0d8280SRyan Libby     int wait)
2076ec0d8280SRyan Libby {
2077ec0d8280SRyan Libby 
2078ec0d8280SRyan Libby 	*pflag = UMA_SLAB_KERNEL;
2079ec0d8280SRyan Libby 	return ((void *)kmem_alloc_contig_domainset(DOMAINSET_FIXED(domain),
2080ec0d8280SRyan Libby 	    bytes, wait, 0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT));
2081ec0d8280SRyan Libby }
2082ec0d8280SRyan Libby 
2083da76d349SBojan Novković #if defined(UMA_USE_DMAP) && !defined(UMA_MD_SMALL_ALLOC)
2084da76d349SBojan Novković void *
uma_small_alloc(uma_zone_t zone,vm_size_t bytes,int domain,uint8_t * flags,int wait)2085da76d349SBojan Novković uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
2086da76d349SBojan Novković     int wait)
2087da76d349SBojan Novković {
2088da76d349SBojan Novković 	vm_page_t m;
2089da76d349SBojan Novković 	vm_paddr_t pa;
2090da76d349SBojan Novković 	void *va;
2091da76d349SBojan Novković 
2092da76d349SBojan Novković 	*flags = UMA_SLAB_PRIV;
2093da76d349SBojan Novković 	m = vm_page_alloc_noobj_domain(domain,
2094da76d349SBojan Novković 	    malloc2vm_flags(wait) | VM_ALLOC_WIRED);
2095da76d349SBojan Novković 	if (m == NULL)
2096da76d349SBojan Novković 		return (NULL);
2097da76d349SBojan Novković 	pa = m->phys_addr;
2098da76d349SBojan Novković 	if ((wait & M_NODUMP) == 0)
2099da76d349SBojan Novković 		dump_add_page(pa);
2100da76d349SBojan Novković 	va = (void *)PHYS_TO_DMAP(pa);
2101da76d349SBojan Novković 	return (va);
2102da76d349SBojan Novković }
2103da76d349SBojan Novković #endif
2104da76d349SBojan Novković 
2105ec0d8280SRyan Libby /*
21068355f576SJeff Roberson  * Frees a number of pages to the system
21078355f576SJeff Roberson  *
21088355f576SJeff Roberson  * Arguments:
21098355f576SJeff Roberson  *	mem   A pointer to the memory to be freed
21108355f576SJeff Roberson  *	size  The size of the memory being freed
21118355f576SJeff Roberson  *	flags The original p->us_flags field
21128355f576SJeff Roberson  *
21138355f576SJeff Roberson  * Returns:
21148355f576SJeff Roberson  *	Nothing
21158355f576SJeff Roberson  */
21168355f576SJeff Roberson static void
page_free(void * mem,vm_size_t size,uint8_t flags)2117f2c2231eSRyan Stone page_free(void *mem, vm_size_t size, uint8_t flags)
21188355f576SJeff Roberson {
21193370c5bfSJeff Roberson 
2120a81c400eSJeff Roberson 	if ((flags & UMA_SLAB_BOOT) != 0) {
2121a81c400eSJeff Roberson 		startup_free(mem, size);
2122a81c400eSJeff Roberson 		return;
2123a81c400eSJeff Roberson 	}
2124a81c400eSJeff Roberson 
2125ec0d8280SRyan Libby 	KASSERT((flags & UMA_SLAB_KERNEL) != 0,
2126ec0d8280SRyan Libby 	    ("UMA: page_free used with invalid flags %x", flags));
21278355f576SJeff Roberson 
2128f49fd63aSJohn Baldwin 	kmem_free(mem, size);
21298355f576SJeff Roberson }
21308355f576SJeff Roberson 
21318355f576SJeff Roberson /*
2132ab3059a8SMatt Macy  * Frees pcpu zone allocations
2133ab3059a8SMatt Macy  *
2134ab3059a8SMatt Macy  * Arguments:
2135ab3059a8SMatt Macy  *	mem   A pointer to the memory to be freed
2136ab3059a8SMatt Macy  *	size  The size of the memory being freed
2137ab3059a8SMatt Macy  *	flags The original p->us_flags field
2138ab3059a8SMatt Macy  *
2139ab3059a8SMatt Macy  * Returns:
2140ab3059a8SMatt Macy  *	Nothing
2141ab3059a8SMatt Macy  */
2142ab3059a8SMatt Macy static void
pcpu_page_free(void * mem,vm_size_t size,uint8_t flags)2143ab3059a8SMatt Macy pcpu_page_free(void *mem, vm_size_t size, uint8_t flags)
2144ab3059a8SMatt Macy {
2145ab3059a8SMatt Macy 	vm_offset_t sva, curva;
2146ab3059a8SMatt Macy 	vm_paddr_t paddr;
2147ab3059a8SMatt Macy 	vm_page_t m;
2148ab3059a8SMatt Macy 
2149ab3059a8SMatt Macy 	MPASS(size == (mp_maxid+1)*PAGE_SIZE);
21505ba16cf3SRyan Libby 
21515ba16cf3SRyan Libby 	if ((flags & UMA_SLAB_BOOT) != 0) {
21525ba16cf3SRyan Libby 		startup_free(mem, size);
21535ba16cf3SRyan Libby 		return;
21545ba16cf3SRyan Libby 	}
21555ba16cf3SRyan Libby 
2156ab3059a8SMatt Macy 	sva = (vm_offset_t)mem;
2157ab3059a8SMatt Macy 	for (curva = sva; curva < sva + size; curva += PAGE_SIZE) {
2158ab3059a8SMatt Macy 		paddr = pmap_kextract(curva);
2159ab3059a8SMatt Macy 		m = PHYS_TO_VM_PAGE(paddr);
216088ea538aSMark Johnston 		vm_page_unwire_noq(m);
2161ab3059a8SMatt Macy 		vm_page_free(m);
2162ab3059a8SMatt Macy 	}
2163ab3059a8SMatt Macy 	pmap_qremove(sva, size >> PAGE_SHIFT);
2164ab3059a8SMatt Macy 	kva_free(sva, size);
2165ab3059a8SMatt Macy }
2166ab3059a8SMatt Macy 
2167da76d349SBojan Novković #if defined(UMA_USE_DMAP) && !defined(UMA_MD_SMALL_ALLOC)
2168da76d349SBojan Novković void
uma_small_free(void * mem,vm_size_t size,uint8_t flags)2169da76d349SBojan Novković uma_small_free(void *mem, vm_size_t size, uint8_t flags)
2170da76d349SBojan Novković {
2171da76d349SBojan Novković 	vm_page_t m;
2172da76d349SBojan Novković 	vm_paddr_t pa;
2173da76d349SBojan Novković 
2174da76d349SBojan Novković 	pa = DMAP_TO_PHYS((vm_offset_t)mem);
2175da76d349SBojan Novković 	dump_drop_page(pa);
2176da76d349SBojan Novković 	m = PHYS_TO_VM_PAGE(pa);
2177da76d349SBojan Novković 	vm_page_unwire_noq(m);
2178da76d349SBojan Novković 	vm_page_free(m);
2179da76d349SBojan Novković }
2180da76d349SBojan Novković #endif
2181da76d349SBojan Novković 
2182ab3059a8SMatt Macy /*
21838355f576SJeff Roberson  * Zero fill initializer
21848355f576SJeff Roberson  *
21858355f576SJeff Roberson  * Arguments/Returns follow uma_init specifications
21868355f576SJeff Roberson  */
2187b23f72e9SBrian Feldman static int
zero_init(void * mem,int size,int flags)2188b23f72e9SBrian Feldman zero_init(void *mem, int size, int flags)
21898355f576SJeff Roberson {
21908355f576SJeff Roberson 	bzero(mem, size);
2191b23f72e9SBrian Feldman 	return (0);
21928355f576SJeff Roberson }
21938355f576SJeff Roberson 
2194815db204SRyan Libby #ifdef INVARIANTS
219554007ce8SMark Johnston static struct noslabbits *
slab_dbg_bits(uma_slab_t slab,uma_keg_t keg)2196815db204SRyan Libby slab_dbg_bits(uma_slab_t slab, uma_keg_t keg)
2197815db204SRyan Libby {
2198815db204SRyan Libby 
2199815db204SRyan Libby 	return ((void *)((char *)&slab->us_free + BITSET_SIZE(keg->uk_ipers)));
2200815db204SRyan Libby }
2201815db204SRyan Libby #endif
2202815db204SRyan Libby 
22038355f576SJeff Roberson /*
22049b78b1f4SJeff Roberson  * Actual size of embedded struct slab (!OFFPAGE).
22059b78b1f4SJeff Roberson  */
220654007ce8SMark Johnston static size_t
slab_sizeof(int nitems)22079b78b1f4SJeff Roberson slab_sizeof(int nitems)
22089b78b1f4SJeff Roberson {
22099b78b1f4SJeff Roberson 	size_t s;
22109b78b1f4SJeff Roberson 
2211815db204SRyan Libby 	s = sizeof(struct uma_slab) + BITSET_SIZE(nitems) * SLAB_BITSETS;
22129b78b1f4SJeff Roberson 	return (roundup(s, UMA_ALIGN_PTR + 1));
22139b78b1f4SJeff Roberson }
22149b78b1f4SJeff Roberson 
22154a8b575cSRyan Libby #define	UMA_FIXPT_SHIFT	31
22164a8b575cSRyan Libby #define	UMA_FRAC_FIXPT(n, d)						\
22174a8b575cSRyan Libby 	((uint32_t)(((uint64_t)(n) << UMA_FIXPT_SHIFT) / (d)))
22184a8b575cSRyan Libby #define	UMA_FIXPT_PCT(f)						\
22194a8b575cSRyan Libby 	((u_int)(((uint64_t)100 * (f)) >> UMA_FIXPT_SHIFT))
22204a8b575cSRyan Libby #define	UMA_PCT_FIXPT(pct)	UMA_FRAC_FIXPT((pct), 100)
22214a8b575cSRyan Libby #define	UMA_MIN_EFF	UMA_PCT_FIXPT(100 - UMA_MAX_WASTE)
22224a8b575cSRyan Libby 
22239b78b1f4SJeff Roberson /*
22244a8b575cSRyan Libby  * Compute the number of items that will fit in a slab.  If hdr is true, the
22254a8b575cSRyan Libby  * item count may be limited to provide space in the slab for an inline slab
22264a8b575cSRyan Libby  * header.  Otherwise, all slab space will be provided for item storage.
22274a8b575cSRyan Libby  */
22284a8b575cSRyan Libby static u_int
slab_ipers_hdr(u_int size,u_int rsize,u_int slabsize,bool hdr)22294a8b575cSRyan Libby slab_ipers_hdr(u_int size, u_int rsize, u_int slabsize, bool hdr)
22304a8b575cSRyan Libby {
22314a8b575cSRyan Libby 	u_int ipers;
22324a8b575cSRyan Libby 	u_int padpi;
22334a8b575cSRyan Libby 
22344a8b575cSRyan Libby 	/* The padding between items is not needed after the last item. */
22354a8b575cSRyan Libby 	padpi = rsize - size;
22364a8b575cSRyan Libby 
22374a8b575cSRyan Libby 	if (hdr) {
22384a8b575cSRyan Libby 		/*
22394a8b575cSRyan Libby 		 * Start with the maximum item count and remove items until
22404a8b575cSRyan Libby 		 * the slab header first alongside the allocatable memory.
22414a8b575cSRyan Libby 		 */
22424a8b575cSRyan Libby 		for (ipers = MIN(SLAB_MAX_SETSIZE,
22434a8b575cSRyan Libby 		    (slabsize + padpi - slab_sizeof(1)) / rsize);
22444a8b575cSRyan Libby 		    ipers > 0 &&
22454a8b575cSRyan Libby 		    ipers * rsize - padpi + slab_sizeof(ipers) > slabsize;
22464a8b575cSRyan Libby 		    ipers--)
22474a8b575cSRyan Libby 			continue;
22484a8b575cSRyan Libby 	} else {
22494a8b575cSRyan Libby 		ipers = MIN((slabsize + padpi) / rsize, SLAB_MAX_SETSIZE);
22504a8b575cSRyan Libby 	}
22514a8b575cSRyan Libby 
22524a8b575cSRyan Libby 	return (ipers);
22534a8b575cSRyan Libby }
22544a8b575cSRyan Libby 
225527ca37acSRyan Libby struct keg_layout_result {
225627ca37acSRyan Libby 	u_int format;
225727ca37acSRyan Libby 	u_int slabsize;
225827ca37acSRyan Libby 	u_int ipers;
225927ca37acSRyan Libby 	u_int eff;
226027ca37acSRyan Libby };
226127ca37acSRyan Libby 
226227ca37acSRyan Libby static void
keg_layout_one(uma_keg_t keg,u_int rsize,u_int slabsize,u_int fmt,struct keg_layout_result * kl)226327ca37acSRyan Libby keg_layout_one(uma_keg_t keg, u_int rsize, u_int slabsize, u_int fmt,
226427ca37acSRyan Libby     struct keg_layout_result *kl)
226527ca37acSRyan Libby {
226627ca37acSRyan Libby 	u_int total;
226727ca37acSRyan Libby 
226827ca37acSRyan Libby 	kl->format = fmt;
226927ca37acSRyan Libby 	kl->slabsize = slabsize;
227027ca37acSRyan Libby 
227127ca37acSRyan Libby 	/* Handle INTERNAL as inline with an extra page. */
227227ca37acSRyan Libby 	if ((fmt & UMA_ZFLAG_INTERNAL) != 0) {
227327ca37acSRyan Libby 		kl->format &= ~UMA_ZFLAG_INTERNAL;
227427ca37acSRyan Libby 		kl->slabsize += PAGE_SIZE;
227527ca37acSRyan Libby 	}
227627ca37acSRyan Libby 
227727ca37acSRyan Libby 	kl->ipers = slab_ipers_hdr(keg->uk_size, rsize, kl->slabsize,
227827ca37acSRyan Libby 	    (fmt & UMA_ZFLAG_OFFPAGE) == 0);
227927ca37acSRyan Libby 
228027ca37acSRyan Libby 	/* Account for memory used by an offpage slab header. */
228127ca37acSRyan Libby 	total = kl->slabsize;
228227ca37acSRyan Libby 	if ((fmt & UMA_ZFLAG_OFFPAGE) != 0)
228327ca37acSRyan Libby 		total += slabzone(kl->ipers)->uz_keg->uk_rsize;
228427ca37acSRyan Libby 
228527ca37acSRyan Libby 	kl->eff = UMA_FRAC_FIXPT(kl->ipers * rsize, total);
228627ca37acSRyan Libby }
228727ca37acSRyan Libby 
22889b78b1f4SJeff Roberson /*
22894a8b575cSRyan Libby  * Determine the format of a uma keg.  This determines where the slab header
22904a8b575cSRyan Libby  * will be placed (inline or offpage) and calculates ipers, rsize, and ppera.
22918355f576SJeff Roberson  *
22928355f576SJeff Roberson  * Arguments
2293e20a199fSJeff Roberson  *	keg  The zone we should initialize
22948355f576SJeff Roberson  *
22958355f576SJeff Roberson  * Returns
22968355f576SJeff Roberson  *	Nothing
22978355f576SJeff Roberson  */
22988355f576SJeff Roberson static void
keg_layout(uma_keg_t keg)22994a8b575cSRyan Libby keg_layout(uma_keg_t keg)
23008355f576SJeff Roberson {
230127ca37acSRyan Libby 	struct keg_layout_result kl = {}, kl_tmp;
230227ca37acSRyan Libby 	u_int fmts[2];
23034a8b575cSRyan Libby 	u_int alignsize;
230427ca37acSRyan Libby 	u_int nfmt;
23054a8b575cSRyan Libby 	u_int pages;
2306244f4554SBosko Milekic 	u_int rsize;
2307a55ebb7cSAndriy Gapon 	u_int slabsize;
230827ca37acSRyan Libby 	u_int i, j;
23098355f576SJeff Roberson 
23104a8b575cSRyan Libby 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
23114a8b575cSRyan Libby 	    (keg->uk_size <= UMA_PCPU_ALLOC_SIZE &&
23124a8b575cSRyan Libby 	     (keg->uk_flags & UMA_ZONE_CACHESPREAD) == 0),
23134a8b575cSRyan Libby 	    ("%s: cannot configure for PCPU: keg=%s, size=%u, flags=0x%b",
23144a8b575cSRyan Libby 	     __func__, keg->uk_name, keg->uk_size, keg->uk_flags,
23154a8b575cSRyan Libby 	     PRINT_UMA_ZFLAGS));
2316bae55c4aSRyan Libby 	KASSERT((keg->uk_flags & (UMA_ZFLAG_INTERNAL | UMA_ZONE_VM)) == 0 ||
23174a8b575cSRyan Libby 	    (keg->uk_flags & (UMA_ZONE_NOTOUCH | UMA_ZONE_PCPU)) == 0,
23184a8b575cSRyan Libby 	    ("%s: incompatible flags 0x%b", __func__, keg->uk_flags,
23194a8b575cSRyan Libby 	     PRINT_UMA_ZFLAGS));
2320e28a647dSGleb Smirnoff 
23214a8b575cSRyan Libby 	alignsize = keg->uk_align + 1;
2322b0dfc486SMark Johnston #ifdef KASAN
2323b0dfc486SMark Johnston 	/*
2324b0dfc486SMark Johnston 	 * ASAN requires that each allocation be aligned to the shadow map
2325b0dfc486SMark Johnston 	 * scale factor.
2326b0dfc486SMark Johnston 	 */
2327b0dfc486SMark Johnston 	if (alignsize < KASAN_SHADOW_SCALE)
2328b0dfc486SMark Johnston 		alignsize = KASAN_SHADOW_SCALE;
2329b0dfc486SMark Johnston #endif
2330ad97af7eSGleb Smirnoff 
2331ef72505eSJeff Roberson 	/*
2332ef72505eSJeff Roberson 	 * Calculate the size of each allocation (rsize) according to
2333ef72505eSJeff Roberson 	 * alignment.  If the requested size is smaller than we have
2334ef72505eSJeff Roberson 	 * allocation bits for we round it up.
2335ef72505eSJeff Roberson 	 */
23369b8db4d0SRyan Libby 	rsize = MAX(keg->uk_size, UMA_SMALLEST_UNIT);
23374a8b575cSRyan Libby 	rsize = roundup2(rsize, alignsize);
2338ad97af7eSGleb Smirnoff 
233927ca37acSRyan Libby 	if ((keg->uk_flags & UMA_ZONE_CACHESPREAD) != 0) {
23409b78b1f4SJeff Roberson 		/*
23414a8b575cSRyan Libby 		 * We want one item to start on every align boundary in a page.
23424a8b575cSRyan Libby 		 * To do this we will span pages.  We will also extend the item
23434a8b575cSRyan Libby 		 * by the size of align if it is an even multiple of align.
23444a8b575cSRyan Libby 		 * Otherwise, it would fall on the same boundary every time.
23459b78b1f4SJeff Roberson 		 */
23464a8b575cSRyan Libby 		if ((rsize & alignsize) == 0)
23474a8b575cSRyan Libby 			rsize += alignsize;
23484a8b575cSRyan Libby 		slabsize = rsize * (PAGE_SIZE / alignsize);
23494a8b575cSRyan Libby 		slabsize = MIN(slabsize, rsize * SLAB_MAX_SETSIZE);
23504a8b575cSRyan Libby 		slabsize = MIN(slabsize, UMA_CACHESPREAD_MAX_SIZE);
235127ca37acSRyan Libby 		slabsize = round_page(slabsize);
23524a8b575cSRyan Libby 	} else {
23534a8b575cSRyan Libby 		/*
235427ca37acSRyan Libby 		 * Start with a slab size of as many pages as it takes to
235527ca37acSRyan Libby 		 * represent a single item.  We will try to fit as many
235627ca37acSRyan Libby 		 * additional items into the slab as possible.
23574a8b575cSRyan Libby 		 */
235827ca37acSRyan Libby 		slabsize = round_page(keg->uk_size);
23591ca6ed45SGleb Smirnoff 	}
2360ad97af7eSGleb Smirnoff 
236127ca37acSRyan Libby 	/* Build a list of all of the available formats for this keg. */
236227ca37acSRyan Libby 	nfmt = 0;
236327ca37acSRyan Libby 
23644a8b575cSRyan Libby 	/* Evaluate an inline slab layout. */
23654a8b575cSRyan Libby 	if ((keg->uk_flags & (UMA_ZONE_NOTOUCH | UMA_ZONE_PCPU)) == 0)
236627ca37acSRyan Libby 		fmts[nfmt++] = 0;
23674a8b575cSRyan Libby 
23684a8b575cSRyan Libby 	/* TODO: vm_page-embedded slab. */
2369244f4554SBosko Milekic 
237020e8e865SBosko Milekic 	/*
2371244f4554SBosko Milekic 	 * We can't do OFFPAGE if we're internal or if we've been
237220e8e865SBosko Milekic 	 * asked to not go to the VM for buckets.  If we do this we
2373bae55c4aSRyan Libby 	 * may end up going to the VM for slabs which we do not want
2374bae55c4aSRyan Libby 	 * to do if we're UMA_ZONE_VM, which clearly forbids it.
2375bae55c4aSRyan Libby 	 * In those cases, evaluate a pseudo-format called INTERNAL
2376bae55c4aSRyan Libby 	 * which has an inline slab header and one extra page to
2377bae55c4aSRyan Libby 	 * guarantee that it fits.
237827ca37acSRyan Libby 	 *
237927ca37acSRyan Libby 	 * Otherwise, see if using an OFFPAGE slab will improve our
238027ca37acSRyan Libby 	 * efficiency.
238120e8e865SBosko Milekic 	 */
2382bae55c4aSRyan Libby 	if ((keg->uk_flags & (UMA_ZFLAG_INTERNAL | UMA_ZONE_VM)) != 0)
238327ca37acSRyan Libby 		fmts[nfmt++] = UMA_ZFLAG_INTERNAL;
238427ca37acSRyan Libby 	else
238527ca37acSRyan Libby 		fmts[nfmt++] = UMA_ZFLAG_OFFPAGE;
2386244f4554SBosko Milekic 
2387ef72505eSJeff Roberson 	/*
238827ca37acSRyan Libby 	 * Choose a slab size and format which satisfy the minimum efficiency.
238927ca37acSRyan Libby 	 * Prefer the smallest slab size that meets the constraints.
2390ef72505eSJeff Roberson 	 *
239127ca37acSRyan Libby 	 * Start with a minimum slab size, to accommodate CACHESPREAD.  Then,
239227ca37acSRyan Libby 	 * for small items (up to PAGE_SIZE), the iteration increment is one
239327ca37acSRyan Libby 	 * page; and for large items, the increment is one item.
2394ef72505eSJeff Roberson 	 */
239527ca37acSRyan Libby 	i = (slabsize + rsize - keg->uk_size) / MAX(PAGE_SIZE, rsize);
239627ca37acSRyan Libby 	KASSERT(i >= 1, ("keg %s(%p) flags=0x%b slabsize=%u, rsize=%u, i=%u",
239727ca37acSRyan Libby 	    keg->uk_name, keg, keg->uk_flags, PRINT_UMA_ZFLAGS, slabsize,
239827ca37acSRyan Libby 	    rsize, i));
239927ca37acSRyan Libby 	for ( ; ; i++) {
240027ca37acSRyan Libby 		slabsize = (rsize <= PAGE_SIZE) ? ptoa(i) :
240127ca37acSRyan Libby 		    round_page(rsize * (i - 1) + keg->uk_size);
240227ca37acSRyan Libby 
240327ca37acSRyan Libby 		for (j = 0; j < nfmt; j++) {
240427ca37acSRyan Libby 			/* Only if we have no viable format yet. */
240527ca37acSRyan Libby 			if ((fmts[j] & UMA_ZFLAG_INTERNAL) != 0 &&
240627ca37acSRyan Libby 			    kl.ipers > 0)
240727ca37acSRyan Libby 				continue;
240827ca37acSRyan Libby 
240927ca37acSRyan Libby 			keg_layout_one(keg, rsize, slabsize, fmts[j], &kl_tmp);
241027ca37acSRyan Libby 			if (kl_tmp.eff <= kl.eff)
241127ca37acSRyan Libby 				continue;
241227ca37acSRyan Libby 
241327ca37acSRyan Libby 			kl = kl_tmp;
241427ca37acSRyan Libby 
241527ca37acSRyan Libby 			CTR6(KTR_UMA, "keg %s layout: format %#x "
241627ca37acSRyan Libby 			    "(ipers %u * rsize %u) / slabsize %#x = %u%% eff",
241727ca37acSRyan Libby 			    keg->uk_name, kl.format, kl.ipers, rsize,
241827ca37acSRyan Libby 			    kl.slabsize, UMA_FIXPT_PCT(kl.eff));
241927ca37acSRyan Libby 
242027ca37acSRyan Libby 			/* Stop when we reach the minimum efficiency. */
242127ca37acSRyan Libby 			if (kl.eff >= UMA_MIN_EFF)
242227ca37acSRyan Libby 				break;
24238355f576SJeff Roberson 		}
2424ad97af7eSGleb Smirnoff 
242533e5a1eaSRyan Libby 		if (kl.eff >= UMA_MIN_EFF || !multipage_slabs ||
242627ca37acSRyan Libby 		    slabsize >= SLAB_MAX_SETSIZE * rsize ||
242727ca37acSRyan Libby 		    (keg->uk_flags & (UMA_ZONE_PCPU | UMA_ZONE_CONTIG)) != 0)
242827ca37acSRyan Libby 			break;
242927ca37acSRyan Libby 	}
243027ca37acSRyan Libby 
243127ca37acSRyan Libby 	pages = atop(kl.slabsize);
243227ca37acSRyan Libby 	if ((keg->uk_flags & UMA_ZONE_PCPU) != 0)
243327ca37acSRyan Libby 		pages *= mp_maxid + 1;
243427ca37acSRyan Libby 
243527ca37acSRyan Libby 	keg->uk_rsize = rsize;
243627ca37acSRyan Libby 	keg->uk_ipers = kl.ipers;
243727ca37acSRyan Libby 	keg->uk_ppera = pages;
243827ca37acSRyan Libby 	keg->uk_flags |= kl.format;
243927ca37acSRyan Libby 
24404a8b575cSRyan Libby 	/*
24414a8b575cSRyan Libby 	 * How do we find the slab header if it is offpage or if not all item
24424a8b575cSRyan Libby 	 * start addresses are in the same page?  We could solve the latter
24434a8b575cSRyan Libby 	 * case with vaddr alignment, but we don't.
24444a8b575cSRyan Libby 	 */
244527ca37acSRyan Libby 	if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0 ||
244627ca37acSRyan Libby 	    (keg->uk_ipers - 1) * rsize >= PAGE_SIZE) {
244754c5ae80SRyan Libby 		if ((keg->uk_flags & UMA_ZONE_NOTPAGE) != 0)
244827ca37acSRyan Libby 			keg->uk_flags |= UMA_ZFLAG_HASH;
244954c5ae80SRyan Libby 		else
245027ca37acSRyan Libby 			keg->uk_flags |= UMA_ZFLAG_VTOSLAB;
245154c5ae80SRyan Libby 	}
245227ca37acSRyan Libby 
2453e63a1c2fSRyan Libby 	CTR6(KTR_UMA, "%s: keg=%s, flags=%#x, rsize=%u, ipers=%u, ppera=%u",
245427ca37acSRyan Libby 	    __func__, keg->uk_name, keg->uk_flags, rsize, keg->uk_ipers,
245527ca37acSRyan Libby 	    pages);
24564a8b575cSRyan Libby 	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_MAX_SETSIZE,
24574a8b575cSRyan Libby 	    ("%s: keg=%s, flags=0x%b, rsize=%u, ipers=%u, ppera=%u", __func__,
245827ca37acSRyan Libby 	     keg->uk_name, keg->uk_flags, PRINT_UMA_ZFLAGS, rsize,
245927ca37acSRyan Libby 	     keg->uk_ipers, pages));
2460e20a199fSJeff Roberson }
2461e20a199fSJeff Roberson 
24628355f576SJeff Roberson /*
2463099a0e58SBosko Milekic  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
2464099a0e58SBosko Milekic  * the keg onto the global keg list.
24658355f576SJeff Roberson  *
24668355f576SJeff Roberson  * Arguments/Returns follow uma_ctor specifications
2467099a0e58SBosko Milekic  *	udata  Actually uma_kctor_args
2468099a0e58SBosko Milekic  */
2469b23f72e9SBrian Feldman static int
keg_ctor(void * mem,int size,void * udata,int flags)2470b23f72e9SBrian Feldman keg_ctor(void *mem, int size, void *udata, int flags)
2471099a0e58SBosko Milekic {
2472099a0e58SBosko Milekic 	struct uma_kctor_args *arg = udata;
2473099a0e58SBosko Milekic 	uma_keg_t keg = mem;
2474099a0e58SBosko Milekic 	uma_zone_t zone;
24758b987a77SJeff Roberson 	int i;
2476099a0e58SBosko Milekic 
2477099a0e58SBosko Milekic 	bzero(keg, size);
2478099a0e58SBosko Milekic 	keg->uk_size = arg->size;
2479099a0e58SBosko Milekic 	keg->uk_init = arg->uminit;
2480099a0e58SBosko Milekic 	keg->uk_fini = arg->fini;
2481099a0e58SBosko Milekic 	keg->uk_align = arg->align;
24826fd34d6fSJeff Roberson 	keg->uk_reserve = 0;
2483099a0e58SBosko Milekic 	keg->uk_flags = arg->flags;
2484099a0e58SBosko Milekic 
2485099a0e58SBosko Milekic 	/*
2486194a979eSMark Johnston 	 * We use a global round-robin policy by default.  Zones with
2487dfe13344SJeff Roberson 	 * UMA_ZONE_FIRSTTOUCH set will use first-touch instead, in which
2488dfe13344SJeff Roberson 	 * case the iterator is never run.
2489194a979eSMark Johnston 	 */
2490194a979eSMark Johnston 	keg->uk_dr.dr_policy = DOMAINSET_RR();
2491194a979eSMark Johnston 	keg->uk_dr.dr_iter = 0;
2492194a979eSMark Johnston 
2493194a979eSMark Johnston 	/*
2494c8b0a88bSJeff Roberson 	 * The primary zone is passed to us at keg-creation time.
2495099a0e58SBosko Milekic 	 */
2496099a0e58SBosko Milekic 	zone = arg->zone;
2497e20a199fSJeff Roberson 	keg->uk_name = zone->uz_name;
2498099a0e58SBosko Milekic 
2499099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_ZINIT)
2500099a0e58SBosko Milekic 		keg->uk_init = zero_init;
2501099a0e58SBosko Milekic 
2502cfcae3f8SGleb Smirnoff 	if (arg->flags & UMA_ZONE_MALLOC)
250354c5ae80SRyan Libby 		keg->uk_flags |= UMA_ZFLAG_VTOSLAB;
2504e20a199fSJeff Roberson 
250554c5ae80SRyan Libby #ifndef SMP
2506ad97af7eSGleb Smirnoff 	keg->uk_flags &= ~UMA_ZONE_PCPU;
2507ad97af7eSGleb Smirnoff #endif
2508ad97af7eSGleb Smirnoff 
25094a8b575cSRyan Libby 	keg_layout(keg);
2510099a0e58SBosko Milekic 
25118b987a77SJeff Roberson 	/*
2512c6fd3e23SJeff Roberson 	 * Use a first-touch NUMA policy for kegs that pmap_extract() will
2513c6fd3e23SJeff Roberson 	 * work on.  Use round-robin for everything else.
2514dfe13344SJeff Roberson 	 *
2515dfe13344SJeff Roberson 	 * Zones may override the default by specifying either.
25168b987a77SJeff Roberson 	 */
2517dfe13344SJeff Roberson #ifdef NUMA
2518dfe13344SJeff Roberson 	if ((keg->uk_flags &
2519c6fd3e23SJeff Roberson 	    (UMA_ZONE_ROUNDROBIN | UMA_ZFLAG_CACHE | UMA_ZONE_NOTPAGE)) == 0)
2520dfe13344SJeff Roberson 		keg->uk_flags |= UMA_ZONE_FIRSTTOUCH;
2521dfe13344SJeff Roberson 	else if ((keg->uk_flags & UMA_ZONE_FIRSTTOUCH) == 0)
2522dfe13344SJeff Roberson 		keg->uk_flags |= UMA_ZONE_ROUNDROBIN;
25238b987a77SJeff Roberson #endif
25248b987a77SJeff Roberson 
2525099a0e58SBosko Milekic 	/*
2526099a0e58SBosko Milekic 	 * If we haven't booted yet we need allocations to go through the
2527099a0e58SBosko Milekic 	 * startup cache until the vm is ready.
2528099a0e58SBosko Milekic 	 */
2529d25ed650SBojan Novković #ifdef UMA_USE_DMAP
2530a81c400eSJeff Roberson 	if (keg->uk_ppera == 1)
253177e19437SGleb Smirnoff 		keg->uk_allocf = uma_small_alloc;
2532a81c400eSJeff Roberson 	else
25338cd02d00SAlan Cox #endif
2534a81c400eSJeff Roberson 	if (booted < BOOT_KVA)
2535a81c400eSJeff Roberson 		keg->uk_allocf = startup_alloc;
2536ab3059a8SMatt Macy 	else if (keg->uk_flags & UMA_ZONE_PCPU)
2537ab3059a8SMatt Macy 		keg->uk_allocf = pcpu_page_alloc;
2538ec0d8280SRyan Libby 	else if ((keg->uk_flags & UMA_ZONE_CONTIG) != 0 && keg->uk_ppera > 1)
2539ec0d8280SRyan Libby 		keg->uk_allocf = contig_alloc;
254077e19437SGleb Smirnoff 	else
254177e19437SGleb Smirnoff 		keg->uk_allocf = page_alloc;
2542d25ed650SBojan Novković #ifdef UMA_USE_DMAP
254377e19437SGleb Smirnoff 	if (keg->uk_ppera == 1)
254477e19437SGleb Smirnoff 		keg->uk_freef = uma_small_free;
254577e19437SGleb Smirnoff 	else
254677e19437SGleb Smirnoff #endif
2547ab3059a8SMatt Macy 	if (keg->uk_flags & UMA_ZONE_PCPU)
2548ab3059a8SMatt Macy 		keg->uk_freef = pcpu_page_free;
2549ab3059a8SMatt Macy 	else
255077e19437SGleb Smirnoff 		keg->uk_freef = page_free;
2551099a0e58SBosko Milekic 
2552099a0e58SBosko Milekic 	/*
25538b987a77SJeff Roberson 	 * Initialize keg's locks.
2554099a0e58SBosko Milekic 	 */
25558b987a77SJeff Roberson 	for (i = 0; i < vm_ndomains; i++)
25568b987a77SJeff Roberson 		KEG_LOCK_INIT(keg, i, (arg->flags & UMA_ZONE_MTXCLASS));
2557099a0e58SBosko Milekic 
2558099a0e58SBosko Milekic 	/*
2559099a0e58SBosko Milekic 	 * If we're putting the slab header in the actual page we need to
25609b78b1f4SJeff Roberson 	 * figure out where in each page it goes.  See slab_sizeof
25619b78b1f4SJeff Roberson 	 * definition.
2562099a0e58SBosko Milekic 	 */
256354c5ae80SRyan Libby 	if (!(keg->uk_flags & UMA_ZFLAG_OFFPAGE)) {
25649b78b1f4SJeff Roberson 		size_t shsize;
25659b78b1f4SJeff Roberson 
25669b78b1f4SJeff Roberson 		shsize = slab_sizeof(keg->uk_ipers);
25679b78b1f4SJeff Roberson 		keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - shsize;
2568244f4554SBosko Milekic 		/*
2569244f4554SBosko Milekic 		 * The only way the following is possible is if with our
2570244f4554SBosko Milekic 		 * UMA_ALIGN_PTR adjustments we are now bigger than
2571244f4554SBosko Milekic 		 * UMA_SLAB_SIZE.  I haven't checked whether this is
2572244f4554SBosko Milekic 		 * mathematically possible for all cases, so we make
2573244f4554SBosko Milekic 		 * sure here anyway.
2574244f4554SBosko Milekic 		 */
25759b78b1f4SJeff Roberson 		KASSERT(keg->uk_pgoff + shsize <= PAGE_SIZE * keg->uk_ppera,
25763d5e3df7SGleb Smirnoff 		    ("zone %s ipers %d rsize %d size %d slab won't fit",
25773d5e3df7SGleb Smirnoff 		    zone->uz_name, keg->uk_ipers, keg->uk_rsize, keg->uk_size));
2578099a0e58SBosko Milekic 	}
2579099a0e58SBosko Milekic 
258054c5ae80SRyan Libby 	if (keg->uk_flags & UMA_ZFLAG_HASH)
25813b2f2cb8SAlexander Motin 		hash_alloc(&keg->uk_hash, 0);
2582099a0e58SBosko Milekic 
2583e63a1c2fSRyan Libby 	CTR3(KTR_UMA, "keg_ctor %p zone %s(%p)", keg, zone->uz_name, zone);
2584099a0e58SBosko Milekic 
2585099a0e58SBosko Milekic 	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
2586099a0e58SBosko Milekic 
2587111fbcd5SBryan Venteicher 	rw_wlock(&uma_rwlock);
2588099a0e58SBosko Milekic 	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
2589111fbcd5SBryan Venteicher 	rw_wunlock(&uma_rwlock);
2590b23f72e9SBrian Feldman 	return (0);
2591099a0e58SBosko Milekic }
2592099a0e58SBosko Milekic 
25932efcc8cbSGleb Smirnoff static void
zone_kva_available(uma_zone_t zone,void * unused)2594a81c400eSJeff Roberson zone_kva_available(uma_zone_t zone, void *unused)
2595a81c400eSJeff Roberson {
2596a81c400eSJeff Roberson 	uma_keg_t keg;
2597a81c400eSJeff Roberson 
2598a81c400eSJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0)
2599a81c400eSJeff Roberson 		return;
2600a81c400eSJeff Roberson 	KEG_GET(zone, keg);
2601ec0d8280SRyan Libby 
2602ec0d8280SRyan Libby 	if (keg->uk_allocf == startup_alloc) {
2603ec0d8280SRyan Libby 		/* Switch to the real allocator. */
2604f96d4157SJeff Roberson 		if (keg->uk_flags & UMA_ZONE_PCPU)
2605f96d4157SJeff Roberson 			keg->uk_allocf = pcpu_page_alloc;
2606ec0d8280SRyan Libby 		else if ((keg->uk_flags & UMA_ZONE_CONTIG) != 0 &&
2607ec0d8280SRyan Libby 		    keg->uk_ppera > 1)
2608ec0d8280SRyan Libby 			keg->uk_allocf = contig_alloc;
2609ec0d8280SRyan Libby 		else
2610a81c400eSJeff Roberson 			keg->uk_allocf = page_alloc;
2611a81c400eSJeff Roberson 	}
2612ec0d8280SRyan Libby }
2613a81c400eSJeff Roberson 
2614a81c400eSJeff Roberson static void
zone_alloc_counters(uma_zone_t zone,void * unused)261520a4e154SJeff Roberson zone_alloc_counters(uma_zone_t zone, void *unused)
26162efcc8cbSGleb Smirnoff {
26172efcc8cbSGleb Smirnoff 
26182efcc8cbSGleb Smirnoff 	zone->uz_allocs = counter_u64_alloc(M_WAITOK);
26192efcc8cbSGleb Smirnoff 	zone->uz_frees = counter_u64_alloc(M_WAITOK);
26202efcc8cbSGleb Smirnoff 	zone->uz_fails = counter_u64_alloc(M_WAITOK);
2621c6fd3e23SJeff Roberson 	zone->uz_xdomain = counter_u64_alloc(M_WAITOK);
26222efcc8cbSGleb Smirnoff }
26232efcc8cbSGleb Smirnoff 
262420a4e154SJeff Roberson static void
zone_alloc_sysctl(uma_zone_t zone,void * unused)262520a4e154SJeff Roberson zone_alloc_sysctl(uma_zone_t zone, void *unused)
262620a4e154SJeff Roberson {
262720a4e154SJeff Roberson 	uma_zone_domain_t zdom;
26288b987a77SJeff Roberson 	uma_domain_t dom;
262920a4e154SJeff Roberson 	uma_keg_t keg;
263020a4e154SJeff Roberson 	struct sysctl_oid *oid, *domainoid;
26313b490537SJeff Roberson 	int domains, i, cnt;
263220a4e154SJeff Roberson 	static const char *nokeg = "cache zone";
263320a4e154SJeff Roberson 	char *c;
263420a4e154SJeff Roberson 
263520a4e154SJeff Roberson 	/*
263620a4e154SJeff Roberson 	 * Make a sysctl safe copy of the zone name by removing
263720a4e154SJeff Roberson 	 * any special characters and handling dups by appending
263820a4e154SJeff Roberson 	 * an index.
263920a4e154SJeff Roberson 	 */
264020a4e154SJeff Roberson 	if (zone->uz_namecnt != 0) {
26413b490537SJeff Roberson 		/* Count the number of decimal digits and '_' separator. */
26423b490537SJeff Roberson 		for (i = 1, cnt = zone->uz_namecnt; cnt != 0; i++)
26433b490537SJeff Roberson 			cnt /= 10;
26443b490537SJeff Roberson 		zone->uz_ctlname = malloc(strlen(zone->uz_name) + i + 1,
26453b490537SJeff Roberson 		    M_UMA, M_WAITOK);
264620a4e154SJeff Roberson 		sprintf(zone->uz_ctlname, "%s_%d", zone->uz_name,
264720a4e154SJeff Roberson 		    zone->uz_namecnt);
264820a4e154SJeff Roberson 	} else
264920a4e154SJeff Roberson 		zone->uz_ctlname = strdup(zone->uz_name, M_UMA);
265020a4e154SJeff Roberson 	for (c = zone->uz_ctlname; *c != '\0'; c++)
265120a4e154SJeff Roberson 		if (strchr("./\\ -", *c) != NULL)
265220a4e154SJeff Roberson 			*c = '_';
265320a4e154SJeff Roberson 
265420a4e154SJeff Roberson 	/*
265520a4e154SJeff Roberson 	 * Basic parameters at the root.
265620a4e154SJeff Roberson 	 */
265720a4e154SJeff Roberson 	zone->uz_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_vm_uma),
26587029da5cSPawel Biernacki 	    OID_AUTO, zone->uz_ctlname, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
265920a4e154SJeff Roberson 	oid = zone->uz_oid;
266020a4e154SJeff Roberson 	SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
266120a4e154SJeff Roberson 	    "size", CTLFLAG_RD, &zone->uz_size, 0, "Allocation size");
26626d204a6aSRyan Libby 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
26636d204a6aSRyan Libby 	    "flags", CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_MPSAFE,
26646d204a6aSRyan Libby 	    zone, 0, sysctl_handle_uma_zone_flags, "A",
266520a4e154SJeff Roberson 	    "Allocator configuration flags");
266620a4e154SJeff Roberson 	SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
266720a4e154SJeff Roberson 	    "bucket_size", CTLFLAG_RD, &zone->uz_bucket_size, 0,
266820a4e154SJeff Roberson 	    "Desired per-cpu cache size");
266920a4e154SJeff Roberson 	SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
267020a4e154SJeff Roberson 	    "bucket_size_max", CTLFLAG_RD, &zone->uz_bucket_size_max, 0,
267120a4e154SJeff Roberson 	    "Maximum allowed per-cpu cache size");
267220a4e154SJeff Roberson 
267320a4e154SJeff Roberson 	/*
267420a4e154SJeff Roberson 	 * keg if present.
267520a4e154SJeff Roberson 	 */
267654c5ae80SRyan Libby 	if ((zone->uz_flags & UMA_ZFLAG_HASH) == 0)
26778b987a77SJeff Roberson 		domains = vm_ndomains;
26788b987a77SJeff Roberson 	else
26798b987a77SJeff Roberson 		domains = 1;
268020a4e154SJeff Roberson 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
26817029da5cSPawel Biernacki 	    "keg", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
268220a4e154SJeff Roberson 	keg = zone->uz_keg;
26833b490537SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0) {
268420a4e154SJeff Roberson 		SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
268520a4e154SJeff Roberson 		    "name", CTLFLAG_RD, keg->uk_name, "Keg name");
268620a4e154SJeff Roberson 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
268720a4e154SJeff Roberson 		    "rsize", CTLFLAG_RD, &keg->uk_rsize, 0,
268820a4e154SJeff Roberson 		    "Real object size with alignment");
268920a4e154SJeff Roberson 		SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
269020a4e154SJeff Roberson 		    "ppera", CTLFLAG_RD, &keg->uk_ppera, 0,
269120a4e154SJeff Roberson 		    "pages per-slab allocation");
269220a4e154SJeff Roberson 		SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
269320a4e154SJeff Roberson 		    "ipers", CTLFLAG_RD, &keg->uk_ipers, 0,
269420a4e154SJeff Roberson 		    "items available per-slab");
269520a4e154SJeff Roberson 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
269620a4e154SJeff Roberson 		    "align", CTLFLAG_RD, &keg->uk_align, 0,
269720a4e154SJeff Roberson 		    "item alignment mask");
2698f09cbea3SMark Johnston 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2699f09cbea3SMark Johnston 		    "reserve", CTLFLAG_RD, &keg->uk_reserve, 0,
2700f09cbea3SMark Johnston 		    "number of reserved items");
2701f7af5015SRyan Libby 		SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2702f7af5015SRyan Libby 		    "efficiency", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE,
2703f7af5015SRyan Libby 		    keg, 0, sysctl_handle_uma_slab_efficiency, "I",
2704f7af5015SRyan Libby 		    "Slab utilization (100 - internal fragmentation %)");
27058b987a77SJeff Roberson 		domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(oid),
27067029da5cSPawel Biernacki 		    OID_AUTO, "domain", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
27078b987a77SJeff Roberson 		for (i = 0; i < domains; i++) {
27088b987a77SJeff Roberson 			dom = &keg->uk_domain[i];
27098b987a77SJeff Roberson 			oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
27107029da5cSPawel Biernacki 			    OID_AUTO, VM_DOMAIN(i)->vmd_name,
27117029da5cSPawel Biernacki 			    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
27128b987a77SJeff Roberson 			SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
27138b987a77SJeff Roberson 			    "pages", CTLFLAG_RD, &dom->ud_pages, 0,
27148b987a77SJeff Roberson 			    "Total pages currently allocated from VM");
27158b987a77SJeff Roberson 			SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
27164ab3aee8SMark Johnston 			    "free_items", CTLFLAG_RD, &dom->ud_free_items, 0,
2717d6e77cdaSMark Johnston 			    "Items free in the slab layer");
2718d6e77cdaSMark Johnston 			SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2719d6e77cdaSMark Johnston 			    "free_slabs", CTLFLAG_RD, &dom->ud_free_slabs, 0,
2720d6e77cdaSMark Johnston 			    "Unused slabs");
27218b987a77SJeff Roberson 		}
272220a4e154SJeff Roberson 	} else
272320a4e154SJeff Roberson 		SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
272420a4e154SJeff Roberson 		    "name", CTLFLAG_RD, nokeg, "Keg name");
272520a4e154SJeff Roberson 
272620a4e154SJeff Roberson 	/*
272720a4e154SJeff Roberson 	 * Information about zone limits.
272820a4e154SJeff Roberson 	 */
272920a4e154SJeff Roberson 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
27307029da5cSPawel Biernacki 	    "limit", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
27314bd61e19SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
27324bd61e19SJeff Roberson 	    "items", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
27334bd61e19SJeff Roberson 	    zone, 0, sysctl_handle_uma_zone_items, "QU",
2734e574d407SMark Johnston 	    "Current number of allocated items if limit is set");
273520a4e154SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
273620a4e154SJeff Roberson 	    "max_items", CTLFLAG_RD, &zone->uz_max_items, 0,
2737e574d407SMark Johnston 	    "Maximum number of allocated and cached items");
273820a4e154SJeff Roberson 	SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
273920a4e154SJeff Roberson 	    "sleepers", CTLFLAG_RD, &zone->uz_sleepers, 0,
274020a4e154SJeff Roberson 	    "Number of threads sleeping at limit");
274120a4e154SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
274220a4e154SJeff Roberson 	    "sleeps", CTLFLAG_RD, &zone->uz_sleeps, 0,
274320a4e154SJeff Roberson 	    "Total zone limit sleeps");
27444bd61e19SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2745c6fd3e23SJeff Roberson 	    "bucket_max", CTLFLAG_RD, &zone->uz_bucket_max, 0,
2746c6fd3e23SJeff Roberson 	    "Maximum number of items in each domain's bucket cache");
274720a4e154SJeff Roberson 
274820a4e154SJeff Roberson 	/*
27498b987a77SJeff Roberson 	 * Per-domain zone information.
275020a4e154SJeff Roberson 	 */
275120a4e154SJeff Roberson 	domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid),
27527029da5cSPawel Biernacki 	    OID_AUTO, "domain", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
275320a4e154SJeff Roberson 	for (i = 0; i < domains; i++) {
2754c6fd3e23SJeff Roberson 		zdom = ZDOM_GET(zone, i);
275520a4e154SJeff Roberson 		oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
27567029da5cSPawel Biernacki 		    OID_AUTO, VM_DOMAIN(i)->vmd_name,
27577029da5cSPawel Biernacki 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
275820a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
275920a4e154SJeff Roberson 		    "nitems", CTLFLAG_RD, &zdom->uzd_nitems,
276020a4e154SJeff Roberson 		    "number of items in this domain");
276120a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
276220a4e154SJeff Roberson 		    "imax", CTLFLAG_RD, &zdom->uzd_imax,
276320a4e154SJeff Roberson 		    "maximum item count in this period");
276420a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
276520a4e154SJeff Roberson 		    "imin", CTLFLAG_RD, &zdom->uzd_imin,
276620a4e154SJeff Roberson 		    "minimum item count in this period");
276720a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
27682760658bSAlexander Motin 		    "bimin", CTLFLAG_RD, &zdom->uzd_bimin,
27692760658bSAlexander Motin 		    "Minimum item count in this batch");
27702760658bSAlexander Motin 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
277120a4e154SJeff Roberson 		    "wss", CTLFLAG_RD, &zdom->uzd_wss,
277220a4e154SJeff Roberson 		    "Working set size");
27732760658bSAlexander Motin 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
27742760658bSAlexander Motin 		    "limin", CTLFLAG_RD, &zdom->uzd_limin,
27752760658bSAlexander Motin 		    "Long time minimum item count");
27762760658bSAlexander Motin 		SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
27772760658bSAlexander Motin 		    "timin", CTLFLAG_RD, &zdom->uzd_timin, 0,
27782760658bSAlexander Motin 		    "Time since zero long time minimum item count");
277920a4e154SJeff Roberson 	}
278020a4e154SJeff Roberson 
278120a4e154SJeff Roberson 	/*
278220a4e154SJeff Roberson 	 * General statistics.
278320a4e154SJeff Roberson 	 */
278420a4e154SJeff Roberson 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
27857029da5cSPawel Biernacki 	    "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
278620a4e154SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
278720a4e154SJeff Roberson 	    "current", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE,
278820a4e154SJeff Roberson 	    zone, 1, sysctl_handle_uma_zone_cur, "I",
278920a4e154SJeff Roberson 	    "Current number of allocated items");
279020a4e154SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
279120a4e154SJeff Roberson 	    "allocs", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
279220a4e154SJeff Roberson 	    zone, 0, sysctl_handle_uma_zone_allocs, "QU",
279320a4e154SJeff Roberson 	    "Total allocation calls");
279420a4e154SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
279520a4e154SJeff Roberson 	    "frees", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
279620a4e154SJeff Roberson 	    zone, 0, sysctl_handle_uma_zone_frees, "QU",
279720a4e154SJeff Roberson 	    "Total free calls");
279820a4e154SJeff Roberson 	SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
279920a4e154SJeff Roberson 	    "fails", CTLFLAG_RD, &zone->uz_fails,
280020a4e154SJeff Roberson 	    "Number of allocation failures");
2801c6fd3e23SJeff Roberson 	SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2802c6fd3e23SJeff Roberson 	    "xdomain", CTLFLAG_RD, &zone->uz_xdomain,
280320a4e154SJeff Roberson 	    "Free calls from the wrong domain");
280420a4e154SJeff Roberson }
280520a4e154SJeff Roberson 
280620a4e154SJeff Roberson struct uma_zone_count {
280720a4e154SJeff Roberson 	const char	*name;
280820a4e154SJeff Roberson 	int		count;
280920a4e154SJeff Roberson };
281020a4e154SJeff Roberson 
281120a4e154SJeff Roberson static void
zone_count(uma_zone_t zone,void * arg)281220a4e154SJeff Roberson zone_count(uma_zone_t zone, void *arg)
281320a4e154SJeff Roberson {
281420a4e154SJeff Roberson 	struct uma_zone_count *cnt;
281520a4e154SJeff Roberson 
281620a4e154SJeff Roberson 	cnt = arg;
28173b490537SJeff Roberson 	/*
28183b490537SJeff Roberson 	 * Some zones are rapidly created with identical names and
28193b490537SJeff Roberson 	 * destroyed out of order.  This can lead to gaps in the count.
28203b490537SJeff Roberson 	 * Use one greater than the maximum observed for this name.
28213b490537SJeff Roberson 	 */
282220a4e154SJeff Roberson 	if (strcmp(zone->uz_name, cnt->name) == 0)
28233b490537SJeff Roberson 		cnt->count = MAX(cnt->count,
28243b490537SJeff Roberson 		    zone->uz_namecnt + 1);
282520a4e154SJeff Roberson }
282620a4e154SJeff Roberson 
2827cc7ce83aSJeff Roberson static void
zone_update_caches(uma_zone_t zone)2828cc7ce83aSJeff Roberson zone_update_caches(uma_zone_t zone)
2829cc7ce83aSJeff Roberson {
2830cc7ce83aSJeff Roberson 	int i;
2831cc7ce83aSJeff Roberson 
2832cc7ce83aSJeff Roberson 	for (i = 0; i <= mp_maxid; i++) {
2833cc7ce83aSJeff Roberson 		cache_set_uz_size(&zone->uz_cpu[i], zone->uz_size);
2834cc7ce83aSJeff Roberson 		cache_set_uz_flags(&zone->uz_cpu[i], zone->uz_flags);
2835cc7ce83aSJeff Roberson 	}
2836cc7ce83aSJeff Roberson }
2837cc7ce83aSJeff Roberson 
2838099a0e58SBosko Milekic /*
2839099a0e58SBosko Milekic  * Zone header ctor.  This initializes all fields, locks, etc.
2840099a0e58SBosko Milekic  *
2841099a0e58SBosko Milekic  * Arguments/Returns follow uma_ctor specifications
2842099a0e58SBosko Milekic  *	udata  Actually uma_zctor_args
28438355f576SJeff Roberson  */
2844b23f72e9SBrian Feldman static int
zone_ctor(void * mem,int size,void * udata,int flags)2845b23f72e9SBrian Feldman zone_ctor(void *mem, int size, void *udata, int flags)
28468355f576SJeff Roberson {
284720a4e154SJeff Roberson 	struct uma_zone_count cnt;
28488355f576SJeff Roberson 	struct uma_zctor_args *arg = udata;
2849c6fd3e23SJeff Roberson 	uma_zone_domain_t zdom;
28508355f576SJeff Roberson 	uma_zone_t zone = mem;
2851099a0e58SBosko Milekic 	uma_zone_t z;
2852099a0e58SBosko Milekic 	uma_keg_t keg;
285308cfa56eSMark Johnston 	int i;
28548355f576SJeff Roberson 
28558355f576SJeff Roberson 	bzero(zone, size);
28568355f576SJeff Roberson 	zone->uz_name = arg->name;
28578355f576SJeff Roberson 	zone->uz_ctor = arg->ctor;
28588355f576SJeff Roberson 	zone->uz_dtor = arg->dtor;
2859099a0e58SBosko Milekic 	zone->uz_init = NULL;
2860099a0e58SBosko Milekic 	zone->uz_fini = NULL;
2861bf965959SSean Bruno 	zone->uz_sleeps = 0;
286220a4e154SJeff Roberson 	zone->uz_bucket_size = 0;
286320a4e154SJeff Roberson 	zone->uz_bucket_size_min = 0;
286420a4e154SJeff Roberson 	zone->uz_bucket_size_max = BUCKET_MAX;
2865d4665eaaSJeff Roberson 	zone->uz_flags = (arg->flags & UMA_ZONE_SMR);
28662f891cd5SPawel Jakub Dawidek 	zone->uz_warning = NULL;
2867ab3185d1SJeff Roberson 	/* The domain structures follow the cpu structures. */
2868c6fd3e23SJeff Roberson 	zone->uz_bucket_max = ULONG_MAX;
28692f891cd5SPawel Jakub Dawidek 	timevalclear(&zone->uz_ratecheck);
2870af526374SJeff Roberson 
287120a4e154SJeff Roberson 	/* Count the number of duplicate names. */
287220a4e154SJeff Roberson 	cnt.name = arg->name;
287320a4e154SJeff Roberson 	cnt.count = 0;
287420a4e154SJeff Roberson 	zone_foreach(zone_count, &cnt);
287520a4e154SJeff Roberson 	zone->uz_namecnt = cnt.count;
287691d947bfSJeff Roberson 	ZONE_CROSS_LOCK_INIT(zone);
28772efcc8cbSGleb Smirnoff 
2878c6fd3e23SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
2879c6fd3e23SJeff Roberson 		zdom = ZDOM_GET(zone, i);
2880c6fd3e23SJeff Roberson 		ZDOM_LOCK_INIT(zone, zdom, (arg->flags & UMA_ZONE_MTXCLASS));
2881c6fd3e23SJeff Roberson 		STAILQ_INIT(&zdom->uzd_buckets);
2882c6fd3e23SJeff Roberson 	}
288308cfa56eSMark Johnston 
288410094910SMark Johnston #if defined(INVARIANTS) && !defined(KASAN) && !defined(KMSAN)
2885ca293436SRyan Libby 	if (arg->uminit == trash_init && arg->fini == trash_fini)
2886cc7ce83aSJeff Roberson 		zone->uz_flags |= UMA_ZFLAG_TRASH | UMA_ZFLAG_CTORDTOR;
288709c8cb71SMark Johnston #elif defined(KASAN)
288809c8cb71SMark Johnston 	if ((arg->flags & (UMA_ZONE_NOFREE | UMA_ZFLAG_CACHE)) != 0)
288909c8cb71SMark Johnston 		arg->flags |= UMA_ZONE_NOKASAN;
2890ca293436SRyan Libby #endif
2891ca293436SRyan Libby 
28920095a784SJeff Roberson 	/*
28930095a784SJeff Roberson 	 * This is a pure cache zone, no kegs.
28940095a784SJeff Roberson 	 */
28950095a784SJeff Roberson 	if (arg->import) {
2896727c6918SJeff Roberson 		KASSERT((arg->flags & UMA_ZFLAG_CACHE) != 0,
2897727c6918SJeff Roberson 		    ("zone_ctor: Import specified for non-cache zone."));
28986fd34d6fSJeff Roberson 		zone->uz_flags = arg->flags;
2899af526374SJeff Roberson 		zone->uz_size = arg->size;
29000095a784SJeff Roberson 		zone->uz_import = arg->import;
29010095a784SJeff Roberson 		zone->uz_release = arg->release;
29020095a784SJeff Roberson 		zone->uz_arg = arg->arg;
2903c6fd3e23SJeff Roberson #ifdef NUMA
2904c6fd3e23SJeff Roberson 		/*
2905c6fd3e23SJeff Roberson 		 * Cache zones are round-robin unless a policy is
2906c6fd3e23SJeff Roberson 		 * specified because they may have incompatible
2907c6fd3e23SJeff Roberson 		 * constraints.
2908c6fd3e23SJeff Roberson 		 */
2909c6fd3e23SJeff Roberson 		if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) == 0)
2910c6fd3e23SJeff Roberson 			zone->uz_flags |= UMA_ZONE_ROUNDROBIN;
2911c6fd3e23SJeff Roberson #endif
2912111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
291303175483SAlexander Motin 		LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
2914111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
2915af526374SJeff Roberson 		goto out;
29160095a784SJeff Roberson 	}
29170095a784SJeff Roberson 
29180095a784SJeff Roberson 	/*
29190095a784SJeff Roberson 	 * Use the regular zone/keg/slab allocator.
29200095a784SJeff Roberson 	 */
2921b75c4efcSAndrew Turner 	zone->uz_import = zone_import;
2922b75c4efcSAndrew Turner 	zone->uz_release = zone_release;
29230095a784SJeff Roberson 	zone->uz_arg = zone;
2924bb15d1c7SGleb Smirnoff 	keg = arg->keg;
29250095a784SJeff Roberson 
2926099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_SECONDARY) {
292720a4e154SJeff Roberson 		KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
292820a4e154SJeff Roberson 		    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
2929099a0e58SBosko Milekic 		KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
29308355f576SJeff Roberson 		zone->uz_init = arg->uminit;
2931e221e841SJeff Roberson 		zone->uz_fini = arg->fini;
2932e20a199fSJeff Roberson 		zone->uz_flags |= UMA_ZONE_SECONDARY;
2933111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
2934099a0e58SBosko Milekic 		ZONE_LOCK(zone);
2935099a0e58SBosko Milekic 		LIST_FOREACH(z, &keg->uk_zones, uz_link) {
2936099a0e58SBosko Milekic 			if (LIST_NEXT(z, uz_link) == NULL) {
2937099a0e58SBosko Milekic 				LIST_INSERT_AFTER(z, zone, uz_link);
2938099a0e58SBosko Milekic 				break;
2939099a0e58SBosko Milekic 			}
2940099a0e58SBosko Milekic 		}
2941099a0e58SBosko Milekic 		ZONE_UNLOCK(zone);
2942111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
2943e20a199fSJeff Roberson 	} else if (keg == NULL) {
2944e20a199fSJeff Roberson 		if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
2945e20a199fSJeff Roberson 		    arg->align, arg->flags)) == NULL)
2946b23f72e9SBrian Feldman 			return (ENOMEM);
2947099a0e58SBosko Milekic 	} else {
2948099a0e58SBosko Milekic 		struct uma_kctor_args karg;
2949b23f72e9SBrian Feldman 		int error;
2950099a0e58SBosko Milekic 
2951099a0e58SBosko Milekic 		/* We should only be here from uma_startup() */
2952099a0e58SBosko Milekic 		karg.size = arg->size;
2953099a0e58SBosko Milekic 		karg.uminit = arg->uminit;
2954099a0e58SBosko Milekic 		karg.fini = arg->fini;
2955099a0e58SBosko Milekic 		karg.align = arg->align;
2956d4665eaaSJeff Roberson 		karg.flags = (arg->flags & ~UMA_ZONE_SMR);
2957099a0e58SBosko Milekic 		karg.zone = zone;
2958b23f72e9SBrian Feldman 		error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
2959b23f72e9SBrian Feldman 		    flags);
2960b23f72e9SBrian Feldman 		if (error)
2961b23f72e9SBrian Feldman 			return (error);
2962099a0e58SBosko Milekic 	}
29630095a784SJeff Roberson 
296420a4e154SJeff Roberson 	/* Inherit properties from the keg. */
2965bb15d1c7SGleb Smirnoff 	zone->uz_keg = keg;
2966e20a199fSJeff Roberson 	zone->uz_size = keg->uk_size;
2967e20a199fSJeff Roberson 	zone->uz_flags |= (keg->uk_flags &
2968e20a199fSJeff Roberson 	    (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
29698355f576SJeff Roberson 
297020a4e154SJeff Roberson out:
2971dc2b3205SMark Johnston 	if (booted >= BOOT_PCPU) {
297220a4e154SJeff Roberson 		zone_alloc_counters(zone, NULL);
2973dc2b3205SMark Johnston 		if (booted >= BOOT_RUNNING)
297420a4e154SJeff Roberson 			zone_alloc_sysctl(zone, NULL);
297520a4e154SJeff Roberson 	} else {
297620a4e154SJeff Roberson 		zone->uz_allocs = EARLY_COUNTER;
297720a4e154SJeff Roberson 		zone->uz_frees = EARLY_COUNTER;
297820a4e154SJeff Roberson 		zone->uz_fails = EARLY_COUNTER;
2979099a0e58SBosko Milekic 	}
29808355f576SJeff Roberson 
2981d4665eaaSJeff Roberson 	/* Caller requests a private SMR context. */
2982d4665eaaSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
2983226dd6dbSJeff Roberson 		zone->uz_smr = smr_create(zone->uz_name, 0, 0);
2984d4665eaaSJeff Roberson 
29857e28037aSMark Johnston 	KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) !=
29867e28037aSMark Johnston 	    (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET),
29877e28037aSMark Johnston 	    ("Invalid zone flag combination"));
298820a4e154SJeff Roberson 	if (arg->flags & UMA_ZFLAG_INTERNAL)
298920a4e154SJeff Roberson 		zone->uz_bucket_size_max = zone->uz_bucket_size = 0;
299020a4e154SJeff Roberson 	if ((arg->flags & UMA_ZONE_MAXBUCKET) != 0)
299120a4e154SJeff Roberson 		zone->uz_bucket_size = BUCKET_MAX;
299220a4e154SJeff Roberson 	else if ((arg->flags & UMA_ZONE_NOBUCKET) != 0)
299320a4e154SJeff Roberson 		zone->uz_bucket_size = 0;
29947e28037aSMark Johnston 	else
299520a4e154SJeff Roberson 		zone->uz_bucket_size = bucket_select(zone->uz_size);
299620a4e154SJeff Roberson 	zone->uz_bucket_size_min = zone->uz_bucket_size;
2997cc7ce83aSJeff Roberson 	if (zone->uz_dtor != NULL || zone->uz_ctor != NULL)
2998cc7ce83aSJeff Roberson 		zone->uz_flags |= UMA_ZFLAG_CTORDTOR;
2999cc7ce83aSJeff Roberson 	zone_update_caches(zone);
3000fc03d22bSJeff Roberson 
3001b23f72e9SBrian Feldman 	return (0);
30028355f576SJeff Roberson }
30038355f576SJeff Roberson 
30048355f576SJeff Roberson /*
3005099a0e58SBosko Milekic  * Keg header dtor.  This frees all data, destroys locks, frees the hash
3006099a0e58SBosko Milekic  * table and removes the keg from the global list.
30079c2cd7e5SJeff Roberson  *
30089c2cd7e5SJeff Roberson  * Arguments/Returns follow uma_dtor specifications
30099c2cd7e5SJeff Roberson  *	udata  unused
30109c2cd7e5SJeff Roberson  */
3011099a0e58SBosko Milekic static void
keg_dtor(void * arg,int size,void * udata)3012099a0e58SBosko Milekic keg_dtor(void *arg, int size, void *udata)
3013099a0e58SBosko Milekic {
3014099a0e58SBosko Milekic 	uma_keg_t keg;
30158b987a77SJeff Roberson 	uint32_t free, pages;
30168b987a77SJeff Roberson 	int i;
30179c2cd7e5SJeff Roberson 
3018099a0e58SBosko Milekic 	keg = (uma_keg_t)arg;
30198b987a77SJeff Roberson 	free = pages = 0;
30208b987a77SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
30214ab3aee8SMark Johnston 		free += keg->uk_domain[i].ud_free_items;
30228b987a77SJeff Roberson 		pages += keg->uk_domain[i].ud_pages;
30238b987a77SJeff Roberson 		KEG_LOCK_FINI(keg, i);
3024099a0e58SBosko Milekic 	}
30257e240677SRyan Libby 	if (pages != 0)
30268b987a77SJeff Roberson 		printf("Freed UMA keg (%s) was not empty (%u items). "
30278b987a77SJeff Roberson 		    " Lost %u pages of memory.\n",
30288b987a77SJeff Roberson 		    keg->uk_name ? keg->uk_name : "",
30297e240677SRyan Libby 		    pages / keg->uk_ppera * keg->uk_ipers - free, pages);
3030099a0e58SBosko Milekic 
3031099a0e58SBosko Milekic 	hash_free(&keg->uk_hash);
3032099a0e58SBosko Milekic }
3033099a0e58SBosko Milekic 
3034099a0e58SBosko Milekic /*
3035099a0e58SBosko Milekic  * Zone header dtor.
3036099a0e58SBosko Milekic  *
3037099a0e58SBosko Milekic  * Arguments/Returns follow uma_dtor specifications
3038099a0e58SBosko Milekic  *	udata  unused
3039099a0e58SBosko Milekic  */
30409c2cd7e5SJeff Roberson static void
zone_dtor(void * arg,int size,void * udata)30419c2cd7e5SJeff Roberson zone_dtor(void *arg, int size, void *udata)
30429c2cd7e5SJeff Roberson {
30439c2cd7e5SJeff Roberson 	uma_zone_t zone;
3044099a0e58SBosko Milekic 	uma_keg_t keg;
3045c6fd3e23SJeff Roberson 	int i;
30469c2cd7e5SJeff Roberson 
30479c2cd7e5SJeff Roberson 	zone = (uma_zone_t)arg;
30489643769aSJeff Roberson 
304920a4e154SJeff Roberson 	sysctl_remove_oid(zone->uz_oid, 1, 1);
305020a4e154SJeff Roberson 
3051e20a199fSJeff Roberson 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
30529643769aSJeff Roberson 		cache_drain(zone);
3053099a0e58SBosko Milekic 
3054111fbcd5SBryan Venteicher 	rw_wlock(&uma_rwlock);
3055099a0e58SBosko Milekic 	LIST_REMOVE(zone, uz_link);
3056111fbcd5SBryan Venteicher 	rw_wunlock(&uma_rwlock);
30577b516613SJonathan T. Looney 	if ((zone->uz_flags & (UMA_ZONE_SECONDARY | UMA_ZFLAG_CACHE)) == 0) {
30587b516613SJonathan T. Looney 		keg = zone->uz_keg;
30597b516613SJonathan T. Looney 		keg->uk_reserve = 0;
30607b516613SJonathan T. Looney 	}
3061aabe13f1SMark Johnston 	zone_reclaim(zone, UMA_ANYDOMAIN, M_WAITOK, true);
3062c6fd3e23SJeff Roberson 
3063e20a199fSJeff Roberson 	/*
3064323ad386STycho Nightingale 	 * We only destroy kegs from non secondary/non cache zones.
3065e20a199fSJeff Roberson 	 */
3066323ad386STycho Nightingale 	if ((zone->uz_flags & (UMA_ZONE_SECONDARY | UMA_ZFLAG_CACHE)) == 0) {
3067323ad386STycho Nightingale 		keg = zone->uz_keg;
3068111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
3069099a0e58SBosko Milekic 		LIST_REMOVE(keg, uk_link);
3070111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
30710095a784SJeff Roberson 		zone_free_item(kegs, keg, NULL, SKIP_NONE);
30729c2cd7e5SJeff Roberson 	}
30732efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_allocs);
30742efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_frees);
30752efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_fails);
3076c6fd3e23SJeff Roberson 	counter_u64_free(zone->uz_xdomain);
307720a4e154SJeff Roberson 	free(zone->uz_ctlname, M_UMA);
3078c6fd3e23SJeff Roberson 	for (i = 0; i < vm_ndomains; i++)
3079c6fd3e23SJeff Roberson 		ZDOM_LOCK_FINI(ZDOM_GET(zone, i));
308091d947bfSJeff Roberson 	ZONE_CROSS_LOCK_FINI(zone);
3081099a0e58SBosko Milekic }
3082099a0e58SBosko Milekic 
3083a81c400eSJeff Roberson static void
zone_foreach_unlocked(void (* zfunc)(uma_zone_t,void * arg),void * arg)3084a81c400eSJeff Roberson zone_foreach_unlocked(void (*zfunc)(uma_zone_t, void *arg), void *arg)
3085a81c400eSJeff Roberson {
3086a81c400eSJeff Roberson 	uma_keg_t keg;
3087a81c400eSJeff Roberson 	uma_zone_t zone;
3088a81c400eSJeff Roberson 
3089a81c400eSJeff Roberson 	LIST_FOREACH(keg, &uma_kegs, uk_link) {
3090a81c400eSJeff Roberson 		LIST_FOREACH(zone, &keg->uk_zones, uz_link)
3091a81c400eSJeff Roberson 			zfunc(zone, arg);
3092a81c400eSJeff Roberson 	}
3093a81c400eSJeff Roberson 	LIST_FOREACH(zone, &uma_cachezones, uz_link)
3094a81c400eSJeff Roberson 		zfunc(zone, arg);
3095a81c400eSJeff Roberson }
3096a81c400eSJeff Roberson 
30979c2cd7e5SJeff Roberson /*
30988355f576SJeff Roberson  * Traverses every zone in the system and calls a callback
30998355f576SJeff Roberson  *
31008355f576SJeff Roberson  * Arguments:
31018355f576SJeff Roberson  *	zfunc  A pointer to a function which accepts a zone
31028355f576SJeff Roberson  *		as an argument.
31038355f576SJeff Roberson  *
31048355f576SJeff Roberson  * Returns:
31058355f576SJeff Roberson  *	Nothing
31068355f576SJeff Roberson  */
31078355f576SJeff Roberson static void
zone_foreach(void (* zfunc)(uma_zone_t,void * arg),void * arg)310820a4e154SJeff Roberson zone_foreach(void (*zfunc)(uma_zone_t, void *arg), void *arg)
31098355f576SJeff Roberson {
31108355f576SJeff Roberson 
3111111fbcd5SBryan Venteicher 	rw_rlock(&uma_rwlock);
3112a81c400eSJeff Roberson 	zone_foreach_unlocked(zfunc, arg);
3113111fbcd5SBryan Venteicher 	rw_runlock(&uma_rwlock);
31148355f576SJeff Roberson }
31158355f576SJeff Roberson 
3116f4bef67cSGleb Smirnoff /*
3117a81c400eSJeff Roberson  * Initialize the kernel memory allocator.  This is done after pages can be
3118a81c400eSJeff Roberson  * allocated but before general KVA is available.
3119f4bef67cSGleb Smirnoff  */
3120a81c400eSJeff Roberson void
uma_startup1(vm_offset_t virtual_avail)3121a81c400eSJeff Roberson uma_startup1(vm_offset_t virtual_avail)
3122f4bef67cSGleb Smirnoff {
3123a81c400eSJeff Roberson 	struct uma_zctor_args args;
3124a81c400eSJeff Roberson 	size_t ksize, zsize, size;
3125c8b0a88bSJeff Roberson 	uma_keg_t primarykeg;
3126a81c400eSJeff Roberson 	uintptr_t m;
312781302f1dSMark Johnston 	int domain;
3128a81c400eSJeff Roberson 	uint8_t pflag;
3129a81c400eSJeff Roberson 
3130a81c400eSJeff Roberson 	bootstart = bootmem = virtual_avail;
3131a81c400eSJeff Roberson 
3132a81c400eSJeff Roberson 	rw_init(&uma_rwlock, "UMA lock");
3133a81c400eSJeff Roberson 	sx_init(&uma_reclaim_lock, "umareclaim");
3134f4bef67cSGleb Smirnoff 
3135f4bef67cSGleb Smirnoff 	ksize = sizeof(struct uma_keg) +
3136f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_domain) * vm_ndomains);
313779c9f942SJeff Roberson 	ksize = roundup(ksize, UMA_SUPER_ALIGN);
3138f4bef67cSGleb Smirnoff 	zsize = sizeof(struct uma_zone) +
3139f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_cache) * (mp_maxid + 1)) +
3140f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_zone_domain) * vm_ndomains);
314179c9f942SJeff Roberson 	zsize = roundup(zsize, UMA_SUPER_ALIGN);
3142f4bef67cSGleb Smirnoff 
3143a81c400eSJeff Roberson 	/* Allocate the zone of zones, zone of kegs, and zone of zones keg. */
3144a81c400eSJeff Roberson 	size = (zsize * 2) + ksize;
314581302f1dSMark Johnston 	for (domain = 0; domain < vm_ndomains; domain++) {
314681302f1dSMark Johnston 		m = (uintptr_t)startup_alloc(NULL, size, domain, &pflag,
314781302f1dSMark Johnston 		    M_NOWAIT | M_ZERO);
314881302f1dSMark Johnston 		if (m != 0)
314981302f1dSMark Johnston 			break;
315081302f1dSMark Johnston 	}
3151ab3185d1SJeff Roberson 	zones = (uma_zone_t)m;
315279c9f942SJeff Roberson 	m += zsize;
3153ab3185d1SJeff Roberson 	kegs = (uma_zone_t)m;
315479c9f942SJeff Roberson 	m += zsize;
3155c8b0a88bSJeff Roberson 	primarykeg = (uma_keg_t)m;
3156ab3185d1SJeff Roberson 
3157099a0e58SBosko Milekic 	/* "manually" create the initial zone */
31580095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
3159099a0e58SBosko Milekic 	args.name = "UMA Kegs";
3160ab3185d1SJeff Roberson 	args.size = ksize;
3161099a0e58SBosko Milekic 	args.ctor = keg_ctor;
3162099a0e58SBosko Milekic 	args.dtor = keg_dtor;
31638355f576SJeff Roberson 	args.uminit = zero_init;
31648355f576SJeff Roberson 	args.fini = NULL;
3165c8b0a88bSJeff Roberson 	args.keg = primarykeg;
316679c9f942SJeff Roberson 	args.align = UMA_SUPER_ALIGN - 1;
3167b60f5b79SJeff Roberson 	args.flags = UMA_ZFLAG_INTERNAL;
3168ab3185d1SJeff Roberson 	zone_ctor(kegs, zsize, &args, M_WAITOK);
31698355f576SJeff Roberson 
3170099a0e58SBosko Milekic 	args.name = "UMA Zones";
3171f4bef67cSGleb Smirnoff 	args.size = zsize;
3172099a0e58SBosko Milekic 	args.ctor = zone_ctor;
3173099a0e58SBosko Milekic 	args.dtor = zone_dtor;
3174099a0e58SBosko Milekic 	args.uminit = zero_init;
3175099a0e58SBosko Milekic 	args.fini = NULL;
3176099a0e58SBosko Milekic 	args.keg = NULL;
317779c9f942SJeff Roberson 	args.align = UMA_SUPER_ALIGN - 1;
3178099a0e58SBosko Milekic 	args.flags = UMA_ZFLAG_INTERNAL;
3179ab3185d1SJeff Roberson 	zone_ctor(zones, zsize, &args, M_WAITOK);
3180099a0e58SBosko Milekic 
31819b8db4d0SRyan Libby 	/* Now make zones for slab headers */
31829b8db4d0SRyan Libby 	slabzones[0] = uma_zcreate("UMA Slabs 0", SLABZONE0_SIZE,
31839b8db4d0SRyan Libby 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
31849b8db4d0SRyan Libby 	slabzones[1] = uma_zcreate("UMA Slabs 1", SLABZONE1_SIZE,
31851e0701e1SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
31868355f576SJeff Roberson 
31878355f576SJeff Roberson 	hashzone = uma_zcreate("UMA Hash",
31888355f576SJeff Roberson 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
31891e0701e1SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
31908355f576SJeff Roberson 
3191a81c400eSJeff Roberson 	bucket_init();
3192d4665eaaSJeff Roberson 	smr_init();
31938355f576SJeff Roberson }
31948355f576SJeff Roberson 
3195da76d349SBojan Novković #ifndef UMA_USE_DMAP
3196a81c400eSJeff Roberson extern void vm_radix_reserve_kva(void);
3197f4bef67cSGleb Smirnoff #endif
3198f4bef67cSGleb Smirnoff 
3199a81c400eSJeff Roberson /*
3200a81c400eSJeff Roberson  * Advertise the availability of normal kva allocations and switch to
3201a81c400eSJeff Roberson  * the default back-end allocator.  Marks the KVA we consumed on startup
3202a81c400eSJeff Roberson  * as used in the map.
3203a81c400eSJeff Roberson  */
32048355f576SJeff Roberson void
uma_startup2(void)320599571dc3SJeff Roberson uma_startup2(void)
32068355f576SJeff Roberson {
3207f4bef67cSGleb Smirnoff 
3208530cc6a2SJeff Roberson 	if (bootstart != bootmem) {
3209a81c400eSJeff Roberson 		vm_map_lock(kernel_map);
3210a81c400eSJeff Roberson 		(void)vm_map_insert(kernel_map, NULL, 0, bootstart, bootmem,
3211a81c400eSJeff Roberson 		    VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
3212a81c400eSJeff Roberson 		vm_map_unlock(kernel_map);
3213a81c400eSJeff Roberson 	}
3214a81c400eSJeff Roberson 
3215da76d349SBojan Novković #ifndef UMA_USE_DMAP
3216a81c400eSJeff Roberson 	/* Set up radix zone to use noobj_alloc. */
3217a81c400eSJeff Roberson 	vm_radix_reserve_kva();
3218f7d35785SGleb Smirnoff #endif
3219a81c400eSJeff Roberson 
3220a81c400eSJeff Roberson 	booted = BOOT_KVA;
3221a81c400eSJeff Roberson 	zone_foreach_unlocked(zone_kva_available, NULL);
3222f4bef67cSGleb Smirnoff 	bucket_enable();
32238355f576SJeff Roberson }
32248355f576SJeff Roberson 
3225a81c400eSJeff Roberson /*
3226dc2b3205SMark Johnston  * Allocate counters as early as possible so that boot-time allocations are
3227dc2b3205SMark Johnston  * accounted more precisely.
3228dc2b3205SMark Johnston  */
3229dc2b3205SMark Johnston static void
uma_startup_pcpu(void * arg __unused)3230dc2b3205SMark Johnston uma_startup_pcpu(void *arg __unused)
3231dc2b3205SMark Johnston {
3232dc2b3205SMark Johnston 
3233dc2b3205SMark Johnston 	zone_foreach_unlocked(zone_alloc_counters, NULL);
3234dc2b3205SMark Johnston 	booted = BOOT_PCPU;
3235dc2b3205SMark Johnston }
3236dc2b3205SMark Johnston SYSINIT(uma_startup_pcpu, SI_SUB_COUNTER, SI_ORDER_ANY, uma_startup_pcpu, NULL);
3237dc2b3205SMark Johnston 
3238dc2b3205SMark Johnston /*
3239a81c400eSJeff Roberson  * Finish our initialization steps.
3240a81c400eSJeff Roberson  */
32418355f576SJeff Roberson static void
uma_startup3(void * arg __unused)3242dc2b3205SMark Johnston uma_startup3(void *arg __unused)
32438355f576SJeff Roberson {
32441431a748SGleb Smirnoff 
3245c5deaf04SGleb Smirnoff #ifdef INVARIANTS
3246c5deaf04SGleb Smirnoff 	TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor);
3247c5deaf04SGleb Smirnoff 	uma_dbg_cnt = counter_u64_alloc(M_WAITOK);
3248c5deaf04SGleb Smirnoff 	uma_skip_cnt = counter_u64_alloc(M_WAITOK);
3249c5deaf04SGleb Smirnoff #endif
3250a81c400eSJeff Roberson 	zone_foreach_unlocked(zone_alloc_sysctl, NULL);
3251c5deaf04SGleb Smirnoff 	booted = BOOT_RUNNING;
3252860bb7a0SMark Johnston 
3253860bb7a0SMark Johnston 	EVENTHANDLER_REGISTER(shutdown_post_sync, uma_shutdown, NULL,
3254860bb7a0SMark Johnston 	    EVENTHANDLER_PRI_FIRST);
3255860bb7a0SMark Johnston }
3256dc2b3205SMark Johnston SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
3257860bb7a0SMark Johnston 
3258860bb7a0SMark Johnston static void
uma_startup4(void * arg __unused)325993cd28eaSMark Johnston uma_startup4(void *arg __unused)
326093cd28eaSMark Johnston {
326193cd28eaSMark Johnston 	TIMEOUT_TASK_INIT(taskqueue_thread, &uma_timeout_task, 0, uma_timeout,
326293cd28eaSMark Johnston 	    NULL);
326393cd28eaSMark Johnston 	taskqueue_enqueue_timeout(taskqueue_thread, &uma_timeout_task,
326493cd28eaSMark Johnston 	    UMA_TIMEOUT * hz);
326593cd28eaSMark Johnston }
326693cd28eaSMark Johnston SYSINIT(uma_startup4, SI_SUB_TASKQ, SI_ORDER_ANY, uma_startup4, NULL);
326793cd28eaSMark Johnston 
326893cd28eaSMark Johnston static void
uma_shutdown(void)3269860bb7a0SMark Johnston uma_shutdown(void)
3270860bb7a0SMark Johnston {
3271860bb7a0SMark Johnston 
3272860bb7a0SMark Johnston 	booted = BOOT_SHUTDOWN;
32738355f576SJeff Roberson }
32748355f576SJeff Roberson 
3275e20a199fSJeff Roberson static uma_keg_t
uma_kcreate(uma_zone_t zone,size_t size,uma_init uminit,uma_fini fini,int align,uint32_t flags)3276099a0e58SBosko Milekic uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
327785dcf349SGleb Smirnoff 		int align, uint32_t flags)
3278099a0e58SBosko Milekic {
3279099a0e58SBosko Milekic 	struct uma_kctor_args args;
3280099a0e58SBosko Milekic 
3281099a0e58SBosko Milekic 	args.size = size;
3282099a0e58SBosko Milekic 	args.uminit = uminit;
3283099a0e58SBosko Milekic 	args.fini = fini;
3284e557eafeSOlivier Certner 	args.align = align;
3285099a0e58SBosko Milekic 	args.flags = flags;
3286099a0e58SBosko Milekic 	args.zone = zone;
3287ab3185d1SJeff Roberson 	return (zone_alloc_item(kegs, &args, UMA_ANYDOMAIN, M_WAITOK));
3288099a0e58SBosko Milekic }
3289099a0e58SBosko Milekic 
329087090f5eSOlivier Certner 
329187090f5eSOlivier Certner static void
check_align_mask(unsigned int mask)329287090f5eSOlivier Certner check_align_mask(unsigned int mask)
32931e319f6dSRobert Watson {
32941e319f6dSRobert Watson 
329587090f5eSOlivier Certner 	KASSERT(powerof2(mask + 1),
329687090f5eSOlivier Certner 	    ("UMA: %s: Not the mask of a power of 2 (%#x)", __func__, mask));
32973d8f548bSOlivier Certner 	/*
32983d8f548bSOlivier Certner 	 * Make sure the stored align mask doesn't have its highest bit set,
32993d8f548bSOlivier Certner 	 * which would cause implementation-defined behavior when passing it as
33003d8f548bSOlivier Certner 	 * the 'align' argument of uma_zcreate().  Such very large alignments do
33013d8f548bSOlivier Certner 	 * not make sense anyway.
33023d8f548bSOlivier Certner 	 */
330387090f5eSOlivier Certner 	KASSERT(mask <= INT_MAX,
330487090f5eSOlivier Certner 	    ("UMA: %s: Mask too big (%#x)", __func__, mask));
330587090f5eSOlivier Certner }
330687090f5eSOlivier Certner 
330787090f5eSOlivier Certner /* Public functions */
330887090f5eSOlivier Certner /* See uma.h */
330987090f5eSOlivier Certner void
uma_set_cache_align_mask(unsigned int mask)331087090f5eSOlivier Certner uma_set_cache_align_mask(unsigned int mask)
331187090f5eSOlivier Certner {
331287090f5eSOlivier Certner 
331387090f5eSOlivier Certner 	check_align_mask(mask);
331487090f5eSOlivier Certner 	uma_cache_align_mask = mask;
3315dc8f7692SOlivier Certner }
3316dc8f7692SOlivier Certner 
3317dc8f7692SOlivier Certner /* Returns the alignment mask to use to request cache alignment. */
33183d8f548bSOlivier Certner unsigned int
uma_get_cache_align_mask(void)3319dc8f7692SOlivier Certner uma_get_cache_align_mask(void)
3320dc8f7692SOlivier Certner {
3321dc8f7692SOlivier Certner 	return (uma_cache_align_mask);
33221e319f6dSRobert Watson }
33231e319f6dSRobert Watson 
33241e319f6dSRobert Watson /* See uma.h */
33258355f576SJeff Roberson uma_zone_t
uma_zcreate(const char * name,size_t size,uma_ctor ctor,uma_dtor dtor,uma_init uminit,uma_fini fini,int align,uint32_t flags)3326bb196eb4SMatthew D Fleming uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
332785dcf349SGleb Smirnoff 		uma_init uminit, uma_fini fini, int align, uint32_t flags)
33288355f576SJeff Roberson 
33298355f576SJeff Roberson {
33308355f576SJeff Roberson 	struct uma_zctor_args args;
333195c4bf75SKonstantin Belousov 	uma_zone_t res;
33328355f576SJeff Roberson 
333387090f5eSOlivier Certner 	check_align_mask(align);
3334a5a35578SJohn Baldwin 
33358355f576SJeff Roberson 	/* This stuff is essential for the zone ctor */
33360095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
33378355f576SJeff Roberson 	args.name = name;
33388355f576SJeff Roberson 	args.size = size;
33398355f576SJeff Roberson 	args.ctor = ctor;
33408355f576SJeff Roberson 	args.dtor = dtor;
33418355f576SJeff Roberson 	args.uminit = uminit;
33428355f576SJeff Roberson 	args.fini = fini;
334310094910SMark Johnston #if defined(INVARIANTS) && !defined(KASAN) && !defined(KMSAN)
3344afc6dc36SJohn-Mark Gurney 	/*
3345ca293436SRyan Libby 	 * Inject procedures which check for memory use after free if we are
3346ca293436SRyan Libby 	 * allowed to scramble the memory while it is not allocated.  This
3347ca293436SRyan Libby 	 * requires that: UMA is actually able to access the memory, no init
3348ca293436SRyan Libby 	 * or fini procedures, no dependency on the initial value of the
3349ca293436SRyan Libby 	 * memory, and no (legitimate) use of the memory after free.  Note,
3350ca293436SRyan Libby 	 * the ctor and dtor do not need to be empty.
3351afc6dc36SJohn-Mark Gurney 	 */
335254c5ae80SRyan Libby 	if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOTOUCH |
335354c5ae80SRyan Libby 	    UMA_ZONE_NOFREE))) && uminit == NULL && fini == NULL) {
3354afc6dc36SJohn-Mark Gurney 		args.uminit = trash_init;
3355afc6dc36SJohn-Mark Gurney 		args.fini = trash_fini;
3356afc6dc36SJohn-Mark Gurney 	}
3357afc6dc36SJohn-Mark Gurney #endif
33588355f576SJeff Roberson 	args.align = align;
33598355f576SJeff Roberson 	args.flags = flags;
3360099a0e58SBosko Milekic 	args.keg = NULL;
3361099a0e58SBosko Milekic 
3362aabe13f1SMark Johnston 	sx_xlock(&uma_reclaim_lock);
3363ab3185d1SJeff Roberson 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
3364aabe13f1SMark Johnston 	sx_xunlock(&uma_reclaim_lock);
3365a81c400eSJeff Roberson 
336695c4bf75SKonstantin Belousov 	return (res);
3367099a0e58SBosko Milekic }
3368099a0e58SBosko Milekic 
3369099a0e58SBosko Milekic /* See uma.h */
3370099a0e58SBosko Milekic uma_zone_t
uma_zsecond_create(const char * name,uma_ctor ctor,uma_dtor dtor,uma_init zinit,uma_fini zfini,uma_zone_t primary)33710464f16eSMark Johnston uma_zsecond_create(const char *name, uma_ctor ctor, uma_dtor dtor,
3372c8b0a88bSJeff Roberson     uma_init zinit, uma_fini zfini, uma_zone_t primary)
3373099a0e58SBosko Milekic {
3374099a0e58SBosko Milekic 	struct uma_zctor_args args;
3375e20a199fSJeff Roberson 	uma_keg_t keg;
337695c4bf75SKonstantin Belousov 	uma_zone_t res;
3377099a0e58SBosko Milekic 
3378c8b0a88bSJeff Roberson 	keg = primary->uz_keg;
33790095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
3380099a0e58SBosko Milekic 	args.name = name;
3381e20a199fSJeff Roberson 	args.size = keg->uk_size;
3382099a0e58SBosko Milekic 	args.ctor = ctor;
3383099a0e58SBosko Milekic 	args.dtor = dtor;
3384099a0e58SBosko Milekic 	args.uminit = zinit;
3385099a0e58SBosko Milekic 	args.fini = zfini;
3386e20a199fSJeff Roberson 	args.align = keg->uk_align;
3387e20a199fSJeff Roberson 	args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
3388e20a199fSJeff Roberson 	args.keg = keg;
33898355f576SJeff Roberson 
3390aabe13f1SMark Johnston 	sx_xlock(&uma_reclaim_lock);
3391ab3185d1SJeff Roberson 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
3392aabe13f1SMark Johnston 	sx_xunlock(&uma_reclaim_lock);
3393a81c400eSJeff Roberson 
339495c4bf75SKonstantin Belousov 	return (res);
33958355f576SJeff Roberson }
33968355f576SJeff Roberson 
33970095a784SJeff Roberson /* See uma.h */
33980095a784SJeff Roberson uma_zone_t
uma_zcache_create(const char * name,int size,uma_ctor ctor,uma_dtor dtor,uma_init zinit,uma_fini zfini,uma_import zimport,uma_release zrelease,void * arg,int flags)33990464f16eSMark Johnston uma_zcache_create(const char *name, int size, uma_ctor ctor, uma_dtor dtor,
34000464f16eSMark Johnston     uma_init zinit, uma_fini zfini, uma_import zimport, uma_release zrelease,
34010464f16eSMark Johnston     void *arg, int flags)
34020095a784SJeff Roberson {
34030095a784SJeff Roberson 	struct uma_zctor_args args;
34040095a784SJeff Roberson 
34050095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
34060095a784SJeff Roberson 	args.name = name;
3407af526374SJeff Roberson 	args.size = size;
34080095a784SJeff Roberson 	args.ctor = ctor;
34090095a784SJeff Roberson 	args.dtor = dtor;
34100095a784SJeff Roberson 	args.uminit = zinit;
34110095a784SJeff Roberson 	args.fini = zfini;
34120095a784SJeff Roberson 	args.import = zimport;
34130095a784SJeff Roberson 	args.release = zrelease;
34140095a784SJeff Roberson 	args.arg = arg;
34150095a784SJeff Roberson 	args.align = 0;
3416bb15d1c7SGleb Smirnoff 	args.flags = flags | UMA_ZFLAG_CACHE;
34170095a784SJeff Roberson 
3418ab3185d1SJeff Roberson 	return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
34190095a784SJeff Roberson }
34200095a784SJeff Roberson 
34218355f576SJeff Roberson /* See uma.h */
34229c2cd7e5SJeff Roberson void
uma_zdestroy(uma_zone_t zone)34239c2cd7e5SJeff Roberson uma_zdestroy(uma_zone_t zone)
34249c2cd7e5SJeff Roberson {
3425f4ff923bSRobert Watson 
3426860bb7a0SMark Johnston 	/*
3427860bb7a0SMark Johnston 	 * Large slabs are expensive to reclaim, so don't bother doing
3428860bb7a0SMark Johnston 	 * unnecessary work if we're shutting down.
3429860bb7a0SMark Johnston 	 */
3430860bb7a0SMark Johnston 	if (booted == BOOT_SHUTDOWN &&
3431860bb7a0SMark Johnston 	    zone->uz_fini == NULL && zone->uz_release == zone_release)
3432860bb7a0SMark Johnston 		return;
3433aabe13f1SMark Johnston 	sx_xlock(&uma_reclaim_lock);
34340095a784SJeff Roberson 	zone_free_item(zones, zone, NULL, SKIP_NONE);
3435aabe13f1SMark Johnston 	sx_xunlock(&uma_reclaim_lock);
34369c2cd7e5SJeff Roberson }
34379c2cd7e5SJeff Roberson 
34388d6fbbb8SJeff Roberson void
uma_zwait(uma_zone_t zone)34398d6fbbb8SJeff Roberson uma_zwait(uma_zone_t zone)
34408d6fbbb8SJeff Roberson {
34418d6fbbb8SJeff Roberson 
344270260874SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
344370260874SJeff Roberson 		uma_zfree_smr(zone, uma_zalloc_smr(zone, M_WAITOK));
344470260874SJeff Roberson 	else if ((zone->uz_flags & UMA_ZONE_PCPU) != 0)
344570260874SJeff Roberson 		uma_zfree_pcpu(zone, uma_zalloc_pcpu(zone, M_WAITOK));
344670260874SJeff Roberson 	else
344770260874SJeff Roberson 		uma_zfree(zone, uma_zalloc(zone, M_WAITOK));
34488d6fbbb8SJeff Roberson }
34498d6fbbb8SJeff Roberson 
34504e180881SMateusz Guzik void *
uma_zalloc_pcpu_arg(uma_zone_t zone,void * udata,int flags)34514e180881SMateusz Guzik uma_zalloc_pcpu_arg(uma_zone_t zone, void *udata, int flags)
34524e180881SMateusz Guzik {
34533acb6572SMateusz Guzik 	void *item, *pcpu_item;
3454b4799947SRuslan Bukin #ifdef SMP
34554e180881SMateusz Guzik 	int i;
34564e180881SMateusz Guzik 
34574e180881SMateusz Guzik 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
3458b4799947SRuslan Bukin #endif
34594e180881SMateusz Guzik 	item = uma_zalloc_arg(zone, udata, flags & ~M_ZERO);
34603acb6572SMateusz Guzik 	if (item == NULL)
34613acb6572SMateusz Guzik 		return (NULL);
34623acb6572SMateusz Guzik 	pcpu_item = zpcpu_base_to_offset(item);
34633acb6572SMateusz Guzik 	if (flags & M_ZERO) {
3464b4799947SRuslan Bukin #ifdef SMP
3465013072f0SMark Johnston 		for (i = 0; i <= mp_maxid; i++)
34663acb6572SMateusz Guzik 			bzero(zpcpu_get_cpu(pcpu_item, i), zone->uz_size);
3467b4799947SRuslan Bukin #else
3468b4799947SRuslan Bukin 		bzero(item, zone->uz_size);
3469b4799947SRuslan Bukin #endif
34704e180881SMateusz Guzik 	}
34713acb6572SMateusz Guzik 	return (pcpu_item);
34724e180881SMateusz Guzik }
34734e180881SMateusz Guzik 
34744e180881SMateusz Guzik /*
34754e180881SMateusz Guzik  * A stub while both regular and pcpu cases are identical.
34764e180881SMateusz Guzik  */
34774e180881SMateusz Guzik void
uma_zfree_pcpu_arg(uma_zone_t zone,void * pcpu_item,void * udata)34783acb6572SMateusz Guzik uma_zfree_pcpu_arg(uma_zone_t zone, void *pcpu_item, void *udata)
34794e180881SMateusz Guzik {
34803acb6572SMateusz Guzik 	void *item;
34814e180881SMateusz Guzik 
3482c5b7751fSIan Lepore #ifdef SMP
34834e180881SMateusz Guzik 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
3484c5b7751fSIan Lepore #endif
3485b8f7267dSKristof Provost 
3486b8f7267dSKristof Provost         /* uma_zfree_pcu_*(..., NULL) does nothing, to match free(9). */
3487b8f7267dSKristof Provost         if (pcpu_item == NULL)
3488b8f7267dSKristof Provost                 return;
3489b8f7267dSKristof Provost 
34903acb6572SMateusz Guzik 	item = zpcpu_offset_to_base(pcpu_item);
34914e180881SMateusz Guzik 	uma_zfree_arg(zone, item, udata);
34924e180881SMateusz Guzik }
34934e180881SMateusz Guzik 
3494d4665eaaSJeff Roberson static inline void *
item_ctor(uma_zone_t zone,int uz_flags,int size,void * udata,int flags,void * item)3495d4665eaaSJeff Roberson item_ctor(uma_zone_t zone, int uz_flags, int size, void *udata, int flags,
3496d4665eaaSJeff Roberson     void *item)
3497beb8beefSJeff Roberson {
3498beb8beefSJeff Roberson #ifdef INVARIANTS
3499ca293436SRyan Libby 	bool skipdbg;
350009c8cb71SMark Johnston #endif
3501beb8beefSJeff Roberson 
350209c8cb71SMark Johnston 	kasan_mark_item_valid(zone, item);
350310094910SMark Johnston 	kmsan_mark_item_uninitialized(zone, item);
350409c8cb71SMark Johnston 
350509c8cb71SMark Johnston #ifdef INVARIANTS
3506beb8beefSJeff Roberson 	skipdbg = uma_dbg_zskip(zone, item);
350709c8cb71SMark Johnston 	if (!skipdbg && (uz_flags & UMA_ZFLAG_TRASH) != 0 &&
3508ca293436SRyan Libby 	    zone->uz_ctor != trash_ctor)
3509a03c2393SAlexander Motin 		trash_ctor(item, size, zone, flags);
3510beb8beefSJeff Roberson #endif
351109c8cb71SMark Johnston 
3512d4665eaaSJeff Roberson 	/* Check flags before loading ctor pointer. */
3513d4665eaaSJeff Roberson 	if (__predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0) &&
3514d4665eaaSJeff Roberson 	    __predict_false(zone->uz_ctor != NULL) &&
3515cc7ce83aSJeff Roberson 	    zone->uz_ctor(item, size, udata, flags) != 0) {
3516beb8beefSJeff Roberson 		counter_u64_add(zone->uz_fails, 1);
3517beb8beefSJeff Roberson 		zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT);
3518beb8beefSJeff Roberson 		return (NULL);
3519beb8beefSJeff Roberson 	}
3520beb8beefSJeff Roberson #ifdef INVARIANTS
3521beb8beefSJeff Roberson 	if (!skipdbg)
3522beb8beefSJeff Roberson 		uma_dbg_alloc(zone, NULL, item);
3523beb8beefSJeff Roberson #endif
35246d88d784SJeff Roberson 	if (__predict_false(flags & M_ZERO))
35256d88d784SJeff Roberson 		return (memset(item, 0, size));
3526beb8beefSJeff Roberson 
3527beb8beefSJeff Roberson 	return (item);
3528beb8beefSJeff Roberson }
3529beb8beefSJeff Roberson 
3530ca293436SRyan Libby static inline void
item_dtor(uma_zone_t zone,void * item,int size,void * udata,enum zfreeskip skip)3531cc7ce83aSJeff Roberson item_dtor(uma_zone_t zone, void *item, int size, void *udata,
3532cc7ce83aSJeff Roberson     enum zfreeskip skip)
3533ca293436SRyan Libby {
3534ca293436SRyan Libby #ifdef INVARIANTS
3535ca293436SRyan Libby 	bool skipdbg;
3536ca293436SRyan Libby 
3537ca293436SRyan Libby 	skipdbg = uma_dbg_zskip(zone, item);
3538ca293436SRyan Libby 	if (skip == SKIP_NONE && !skipdbg) {
3539ca293436SRyan Libby 		if ((zone->uz_flags & UMA_ZONE_MALLOC) != 0)
3540ca293436SRyan Libby 			uma_dbg_free(zone, udata, item);
3541ca293436SRyan Libby 		else
3542ca293436SRyan Libby 			uma_dbg_free(zone, NULL, item);
3543ca293436SRyan Libby 	}
3544ca293436SRyan Libby #endif
3545cc7ce83aSJeff Roberson 	if (__predict_true(skip < SKIP_DTOR)) {
3546ca293436SRyan Libby 		if (zone->uz_dtor != NULL)
3547cc7ce83aSJeff Roberson 			zone->uz_dtor(item, size, udata);
3548ca293436SRyan Libby #ifdef INVARIANTS
3549ca293436SRyan Libby 		if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
3550ca293436SRyan Libby 		    zone->uz_dtor != trash_dtor)
3551a03c2393SAlexander Motin 			trash_dtor(item, size, zone);
3552ca293436SRyan Libby #endif
3553ca293436SRyan Libby 	}
355409c8cb71SMark Johnston 	kasan_mark_item_invalid(zone, item);
3555ca293436SRyan Libby }
3556ca293436SRyan Libby 
35571c58c09fSMateusz Guzik #ifdef NUMA
355881302f1dSMark Johnston static int
item_domain(void * item)355981302f1dSMark Johnston item_domain(void *item)
356081302f1dSMark Johnston {
356181302f1dSMark Johnston 	int domain;
356281302f1dSMark Johnston 
3563431fb8abSMark Johnston 	domain = vm_phys_domain(vtophys(item));
356481302f1dSMark Johnston 	KASSERT(domain >= 0 && domain < vm_ndomains,
356581302f1dSMark Johnston 	    ("%s: unknown domain for item %p", __func__, item));
356681302f1dSMark Johnston 	return (domain);
356781302f1dSMark Johnston }
35681c58c09fSMateusz Guzik #endif
356981302f1dSMark Johnston 
3570d4665eaaSJeff Roberson #if defined(INVARIANTS) || defined(DEBUG_MEMGUARD) || defined(WITNESS)
3571a8cbb835SEric van Gyzen #if defined(INVARIANTS) && (defined(DDB) || defined(STACK))
3572a8cbb835SEric van Gyzen #include <sys/stack.h>
3573a8cbb835SEric van Gyzen #endif
3574d4665eaaSJeff Roberson #define	UMA_ZALLOC_DEBUG
3575d4665eaaSJeff Roberson static int
uma_zalloc_debug(uma_zone_t zone,void ** itemp,void * udata,int flags)3576d4665eaaSJeff Roberson uma_zalloc_debug(uma_zone_t zone, void **itemp, void *udata, int flags)
3577d4665eaaSJeff Roberson {
3578d4665eaaSJeff Roberson 	int error;
3579d4665eaaSJeff Roberson 
3580d4665eaaSJeff Roberson 	error = 0;
3581d4665eaaSJeff Roberson #ifdef WITNESS
3582d4665eaaSJeff Roberson 	if (flags & M_WAITOK) {
3583d4665eaaSJeff Roberson 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
3584d4665eaaSJeff Roberson 		    "uma_zalloc_debug: zone \"%s\"", zone->uz_name);
3585d4665eaaSJeff Roberson 	}
3586d4665eaaSJeff Roberson #endif
3587d4665eaaSJeff Roberson 
3588d4665eaaSJeff Roberson #ifdef INVARIANTS
3589d4665eaaSJeff Roberson 	KASSERT((flags & M_EXEC) == 0,
3590d4665eaaSJeff Roberson 	    ("uma_zalloc_debug: called with M_EXEC"));
3591d4665eaaSJeff Roberson 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
3592d4665eaaSJeff Roberson 	    ("uma_zalloc_debug: called within spinlock or critical section"));
3593d4665eaaSJeff Roberson 	KASSERT((zone->uz_flags & UMA_ZONE_PCPU) == 0 || (flags & M_ZERO) == 0,
3594d4665eaaSJeff Roberson 	    ("uma_zalloc_debug: allocating from a pcpu zone with M_ZERO"));
3595a8cbb835SEric van Gyzen 
3596a8cbb835SEric van Gyzen 	_Static_assert(M_NOWAIT != 0 && M_WAITOK != 0,
3597a8cbb835SEric van Gyzen 	    "M_NOWAIT and M_WAITOK must be non-zero for this assertion:");
3598a8cbb835SEric van Gyzen #if 0
3599a8cbb835SEric van Gyzen 	/*
3600a8cbb835SEric van Gyzen 	 * Give the #elif clause time to find problems, then remove it
3601a8cbb835SEric van Gyzen 	 * and enable this.  (Remove <sys/stack.h> above, too.)
3602a8cbb835SEric van Gyzen 	 */
3603a8cbb835SEric van Gyzen 	KASSERT((flags & (M_NOWAIT|M_WAITOK)) == M_NOWAIT ||
3604a8cbb835SEric van Gyzen 	    (flags & (M_NOWAIT|M_WAITOK)) == M_WAITOK,
3605a8cbb835SEric van Gyzen 	    ("uma_zalloc_debug: must pass one of M_NOWAIT or M_WAITOK"));
3606a8cbb835SEric van Gyzen #elif defined(DDB) || defined(STACK)
3607a8cbb835SEric van Gyzen 	if (__predict_false((flags & (M_NOWAIT|M_WAITOK)) != M_NOWAIT &&
3608a8cbb835SEric van Gyzen 	    (flags & (M_NOWAIT|M_WAITOK)) != M_WAITOK)) {
3609a8cbb835SEric van Gyzen 		static int stack_count;
3610a8cbb835SEric van Gyzen 		struct stack st;
3611a8cbb835SEric van Gyzen 
3612a8cbb835SEric van Gyzen 		if (stack_count < 10) {
3613a8cbb835SEric van Gyzen 			++stack_count;
3614a8cbb835SEric van Gyzen 			printf("uma_zalloc* called with bad WAIT flags:\n");
3615a8cbb835SEric van Gyzen 			stack_save(&st);
3616a8cbb835SEric van Gyzen 			stack_print(&st);
3617a8cbb835SEric van Gyzen 		}
3618a8cbb835SEric van Gyzen 	}
3619a8cbb835SEric van Gyzen #endif
3620d4665eaaSJeff Roberson #endif
3621d4665eaaSJeff Roberson 
3622d4665eaaSJeff Roberson #ifdef DEBUG_MEMGUARD
36232dba2288SMark Johnston 	if ((zone->uz_flags & (UMA_ZONE_SMR | UMA_ZFLAG_CACHE)) == 0 &&
36242dba2288SMark Johnston 	    memguard_cmp_zone(zone)) {
3625d4665eaaSJeff Roberson 		void *item;
3626d4665eaaSJeff Roberson 		item = memguard_alloc(zone->uz_size, flags);
3627d4665eaaSJeff Roberson 		if (item != NULL) {
3628d4665eaaSJeff Roberson 			error = EJUSTRETURN;
3629d4665eaaSJeff Roberson 			if (zone->uz_init != NULL &&
3630d4665eaaSJeff Roberson 			    zone->uz_init(item, zone->uz_size, flags) != 0) {
3631d4665eaaSJeff Roberson 				*itemp = NULL;
3632d4665eaaSJeff Roberson 				return (error);
3633d4665eaaSJeff Roberson 			}
3634d4665eaaSJeff Roberson 			if (zone->uz_ctor != NULL &&
3635d4665eaaSJeff Roberson 			    zone->uz_ctor(item, zone->uz_size, udata,
3636d4665eaaSJeff Roberson 			    flags) != 0) {
3637d4665eaaSJeff Roberson 				counter_u64_add(zone->uz_fails, 1);
3638389a3fa6SMark Johnston 				if (zone->uz_fini != NULL)
3639d4665eaaSJeff Roberson 					zone->uz_fini(item, zone->uz_size);
3640d4665eaaSJeff Roberson 				*itemp = NULL;
3641d4665eaaSJeff Roberson 				return (error);
3642d4665eaaSJeff Roberson 			}
3643d4665eaaSJeff Roberson 			*itemp = item;
3644d4665eaaSJeff Roberson 			return (error);
3645d4665eaaSJeff Roberson 		}
3646d4665eaaSJeff Roberson 		/* This is unfortunate but should not be fatal. */
3647d4665eaaSJeff Roberson 	}
3648d4665eaaSJeff Roberson #endif
3649d4665eaaSJeff Roberson 	return (error);
3650d4665eaaSJeff Roberson }
3651d4665eaaSJeff Roberson 
3652d4665eaaSJeff Roberson static int
uma_zfree_debug(uma_zone_t zone,void * item,void * udata)3653d4665eaaSJeff Roberson uma_zfree_debug(uma_zone_t zone, void *item, void *udata)
3654d4665eaaSJeff Roberson {
3655d4665eaaSJeff Roberson 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
3656d4665eaaSJeff Roberson 	    ("uma_zfree_debug: called with spinlock or critical section held"));
3657d4665eaaSJeff Roberson 
3658d4665eaaSJeff Roberson #ifdef DEBUG_MEMGUARD
36592dba2288SMark Johnston 	if ((zone->uz_flags & (UMA_ZONE_SMR | UMA_ZFLAG_CACHE)) == 0 &&
36602dba2288SMark Johnston 	    is_memguard_addr(item)) {
3661d4665eaaSJeff Roberson 		if (zone->uz_dtor != NULL)
3662d4665eaaSJeff Roberson 			zone->uz_dtor(item, zone->uz_size, udata);
3663d4665eaaSJeff Roberson 		if (zone->uz_fini != NULL)
3664d4665eaaSJeff Roberson 			zone->uz_fini(item, zone->uz_size);
3665d4665eaaSJeff Roberson 		memguard_free(item);
3666d4665eaaSJeff Roberson 		return (EJUSTRETURN);
3667d4665eaaSJeff Roberson 	}
3668d4665eaaSJeff Roberson #endif
3669d4665eaaSJeff Roberson 	return (0);
3670d4665eaaSJeff Roberson }
3671d4665eaaSJeff Roberson #endif
3672d4665eaaSJeff Roberson 
36736d88d784SJeff Roberson static inline void *
cache_alloc_item(uma_zone_t zone,uma_cache_t cache,uma_cache_bucket_t bucket,void * udata,int flags)36746d88d784SJeff Roberson cache_alloc_item(uma_zone_t zone, uma_cache_t cache, uma_cache_bucket_t bucket,
36756d88d784SJeff Roberson     void *udata, int flags)
3676d4665eaaSJeff Roberson {
36776d88d784SJeff Roberson 	void *item;
36786d88d784SJeff Roberson 	int size, uz_flags;
36796d88d784SJeff Roberson 
36806d88d784SJeff Roberson 	item = cache_bucket_pop(cache, bucket);
36816d88d784SJeff Roberson 	size = cache_uz_size(cache);
36826d88d784SJeff Roberson 	uz_flags = cache_uz_flags(cache);
36836d88d784SJeff Roberson 	critical_exit();
36846d88d784SJeff Roberson 	return (item_ctor(zone, uz_flags, size, udata, flags, item));
36856d88d784SJeff Roberson }
36866d88d784SJeff Roberson 
36876d88d784SJeff Roberson static __noinline void *
cache_alloc_retry(uma_zone_t zone,uma_cache_t cache,void * udata,int flags)36886d88d784SJeff Roberson cache_alloc_retry(uma_zone_t zone, uma_cache_t cache, void *udata, int flags)
36896d88d784SJeff Roberson {
36906d88d784SJeff Roberson 	uma_cache_bucket_t bucket;
3691d4665eaaSJeff Roberson 	int domain;
3692d4665eaaSJeff Roberson 
36936d88d784SJeff Roberson 	while (cache_alloc(zone, cache, udata, flags)) {
36946d88d784SJeff Roberson 		cache = &zone->uz_cpu[curcpu];
36956d88d784SJeff Roberson 		bucket = &cache->uc_allocbucket;
36966d88d784SJeff Roberson 		if (__predict_false(bucket->ucb_cnt == 0))
36976d88d784SJeff Roberson 			continue;
36986d88d784SJeff Roberson 		return (cache_alloc_item(zone, cache, bucket, udata, flags));
36996d88d784SJeff Roberson 	}
37006d88d784SJeff Roberson 	critical_exit();
37016d88d784SJeff Roberson 
3702d4665eaaSJeff Roberson 	/*
3703d4665eaaSJeff Roberson 	 * We can not get a bucket so try to return a single item.
3704d4665eaaSJeff Roberson 	 */
3705d4665eaaSJeff Roberson 	if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH)
3706d4665eaaSJeff Roberson 		domain = PCPU_GET(domain);
3707d4665eaaSJeff Roberson 	else
3708d4665eaaSJeff Roberson 		domain = UMA_ANYDOMAIN;
3709d4665eaaSJeff Roberson 	return (zone_alloc_item(zone, udata, domain, flags));
3710d4665eaaSJeff Roberson }
3711d4665eaaSJeff Roberson 
3712d4665eaaSJeff Roberson /* See uma.h */
3713d4665eaaSJeff Roberson void *
uma_zalloc_smr(uma_zone_t zone,int flags)3714d4665eaaSJeff Roberson uma_zalloc_smr(uma_zone_t zone, int flags)
3715d4665eaaSJeff Roberson {
3716d4665eaaSJeff Roberson 	uma_cache_bucket_t bucket;
3717d4665eaaSJeff Roberson 	uma_cache_t cache;
3718d4665eaaSJeff Roberson 
3719841e0a87SGleb Smirnoff 	CTR3(KTR_UMA, "uma_zalloc_smr zone %s(%p) flags %d", zone->uz_name,
3720841e0a87SGleb Smirnoff 	    zone, flags);
3721841e0a87SGleb Smirnoff 
3722d4665eaaSJeff Roberson #ifdef UMA_ZALLOC_DEBUG
37236d88d784SJeff Roberson 	void *item;
37246d88d784SJeff Roberson 
3725d4665eaaSJeff Roberson 	KASSERT((zone->uz_flags & UMA_ZONE_SMR) != 0,
3726952c8964SMark Johnston 	    ("uma_zalloc_arg: called with non-SMR zone."));
3727d4665eaaSJeff Roberson 	if (uma_zalloc_debug(zone, &item, NULL, flags) == EJUSTRETURN)
3728d4665eaaSJeff Roberson 		return (item);
3729d4665eaaSJeff Roberson #endif
3730d4665eaaSJeff Roberson 
3731d4665eaaSJeff Roberson 	critical_enter();
3732d4665eaaSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
3733d4665eaaSJeff Roberson 	bucket = &cache->uc_allocbucket;
37346d88d784SJeff Roberson 	if (__predict_false(bucket->ucb_cnt == 0))
37356d88d784SJeff Roberson 		return (cache_alloc_retry(zone, cache, NULL, flags));
37366d88d784SJeff Roberson 	return (cache_alloc_item(zone, cache, bucket, NULL, flags));
3737d4665eaaSJeff Roberson }
3738d4665eaaSJeff Roberson 
37399c2cd7e5SJeff Roberson /* See uma.h */
37408355f576SJeff Roberson void *
uma_zalloc_arg(uma_zone_t zone,void * udata,int flags)37412cc35ff9SJeff Roberson uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
37428355f576SJeff Roberson {
3743376b1ba3SJeff Roberson 	uma_cache_bucket_t bucket;
3744ab3185d1SJeff Roberson 	uma_cache_t cache;
37458355f576SJeff Roberson 
3746e866d8f0SMark Murray 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
374719fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
374810cb2424SMark Murray 
37498355f576SJeff Roberson 	/* This is the fast path allocation */
3750e63a1c2fSRyan Libby 	CTR3(KTR_UMA, "uma_zalloc_arg zone %s(%p) flags %d", zone->uz_name,
3751e63a1c2fSRyan Libby 	    zone, flags);
3752a553d4b8SJeff Roberson 
3753d4665eaaSJeff Roberson #ifdef UMA_ZALLOC_DEBUG
37546d88d784SJeff Roberson 	void *item;
37556d88d784SJeff Roberson 
3756d4665eaaSJeff Roberson 	KASSERT((zone->uz_flags & UMA_ZONE_SMR) == 0,
3757952c8964SMark Johnston 	    ("uma_zalloc_arg: called with SMR zone."));
3758d4665eaaSJeff Roberson 	if (uma_zalloc_debug(zone, &item, udata, flags) == EJUSTRETURN)
37598d689e04SGleb Smirnoff 		return (item);
37608d689e04SGleb Smirnoff #endif
3761d4665eaaSJeff Roberson 
37625d1ae027SRobert Watson 	/*
37635d1ae027SRobert Watson 	 * If possible, allocate from the per-CPU cache.  There are two
37645d1ae027SRobert Watson 	 * requirements for safe access to the per-CPU cache: (1) the thread
37655d1ae027SRobert Watson 	 * accessing the cache must not be preempted or yield during access,
37665d1ae027SRobert Watson 	 * and (2) the thread must not migrate CPUs without switching which
37675d1ae027SRobert Watson 	 * cache it accesses.  We rely on a critical section to prevent
37685d1ae027SRobert Watson 	 * preemption and migration.  We release the critical section in
37695d1ae027SRobert Watson 	 * order to acquire the zone mutex if we are unable to allocate from
37705d1ae027SRobert Watson 	 * the current cache; when we re-acquire the critical section, we
37715d1ae027SRobert Watson 	 * must detect and handle migration if it has occurred.
37725d1ae027SRobert Watson 	 */
37735d1ae027SRobert Watson 	critical_enter();
3774cc7ce83aSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
3775376b1ba3SJeff Roberson 	bucket = &cache->uc_allocbucket;
37766d88d784SJeff Roberson 	if (__predict_false(bucket->ucb_cnt == 0))
37776d88d784SJeff Roberson 		return (cache_alloc_retry(zone, cache, udata, flags));
37786d88d784SJeff Roberson 	return (cache_alloc_item(zone, cache, bucket, udata, flags));
3779fc03d22bSJeff Roberson }
3780fc03d22bSJeff Roberson 
37818355f576SJeff Roberson /*
3782beb8beefSJeff Roberson  * Replenish an alloc bucket and possibly restore an old one.  Called in
3783beb8beefSJeff Roberson  * a critical section.  Returns in a critical section.
3784beb8beefSJeff Roberson  *
37854bd61e19SJeff Roberson  * A false return value indicates an allocation failure.
37864bd61e19SJeff Roberson  * A true return value indicates success and the caller should retry.
3787beb8beefSJeff Roberson  */
3788beb8beefSJeff Roberson static __noinline bool
cache_alloc(uma_zone_t zone,uma_cache_t cache,void * udata,int flags)3789beb8beefSJeff Roberson cache_alloc(uma_zone_t zone, uma_cache_t cache, void *udata, int flags)
3790beb8beefSJeff Roberson {
3791beb8beefSJeff Roberson 	uma_bucket_t bucket;
37928c277118SMark Johnston 	int curdomain, domain;
3793c6fd3e23SJeff Roberson 	bool new;
3794beb8beefSJeff Roberson 
3795beb8beefSJeff Roberson 	CRITICAL_ASSERT(curthread);
3796beb8beefSJeff Roberson 
3797beb8beefSJeff Roberson 	/*
3798beb8beefSJeff Roberson 	 * If we have run out of items in our alloc bucket see
3799beb8beefSJeff Roberson 	 * if we can switch with the free bucket.
3800d4665eaaSJeff Roberson 	 *
3801d4665eaaSJeff Roberson 	 * SMR Zones can't re-use the free bucket until the sequence has
3802d4665eaaSJeff Roberson 	 * expired.
38038355f576SJeff Roberson 	 */
3804c6fd3e23SJeff Roberson 	if ((cache_uz_flags(cache) & UMA_ZONE_SMR) == 0 &&
3805d4665eaaSJeff Roberson 	    cache->uc_freebucket.ucb_cnt != 0) {
3806d4665eaaSJeff Roberson 		cache_bucket_swap(&cache->uc_freebucket,
3807d4665eaaSJeff Roberson 		    &cache->uc_allocbucket);
3808beb8beefSJeff Roberson 		return (true);
38098355f576SJeff Roberson 	}
3810fc03d22bSJeff Roberson 
3811fc03d22bSJeff Roberson 	/*
3812fc03d22bSJeff Roberson 	 * Discard any empty allocation bucket while we hold no locks.
3813fc03d22bSJeff Roberson 	 */
3814376b1ba3SJeff Roberson 	bucket = cache_bucket_unload_alloc(cache);
3815fc03d22bSJeff Roberson 	critical_exit();
3816c6fd3e23SJeff Roberson 
3817c6fd3e23SJeff Roberson 	if (bucket != NULL) {
3818c6fd3e23SJeff Roberson 		KASSERT(bucket->ub_cnt == 0,
3819c6fd3e23SJeff Roberson 		    ("cache_alloc: Entered with non-empty alloc bucket."));
38206fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
3821c6fd3e23SJeff Roberson 	}
3822fc03d22bSJeff Roberson 
38235d1ae027SRobert Watson 	/*
38245d1ae027SRobert Watson 	 * Attempt to retrieve the item from the per-CPU cache has failed, so
3825c6fd3e23SJeff Roberson 	 * we must go back to the zone.  This requires the zdom lock, so we
38265d1ae027SRobert Watson 	 * must drop the critical section, then re-acquire it when we go back
38275d1ae027SRobert Watson 	 * to the cache.  Since the critical section is released, we may be
38285d1ae027SRobert Watson 	 * preempted or migrate.  As such, make sure not to maintain any
38295d1ae027SRobert Watson 	 * thread-local state specific to the cache from prior to releasing
38305d1ae027SRobert Watson 	 * the critical section.
38315d1ae027SRobert Watson 	 */
3832c1685086SJeff Roberson 	domain = PCPU_GET(domain);
38338c277118SMark Johnston 	if ((cache_uz_flags(cache) & UMA_ZONE_ROUNDROBIN) != 0 ||
38348c277118SMark Johnston 	    VM_DOMAIN_EMPTY(domain))
3835c6fd3e23SJeff Roberson 		domain = zone_domain_highest(zone, domain);
3836c6fd3e23SJeff Roberson 	bucket = cache_fetch_bucket(zone, cache, domain);
3837af32cefdSMark Johnston 	if (bucket == NULL && zone->uz_bucket_size != 0 && !bucketdisable) {
3838beb8beefSJeff Roberson 		bucket = zone_alloc_bucket(zone, udata, domain, flags);
3839c6fd3e23SJeff Roberson 		new = true;
3840af32cefdSMark Johnston 	} else {
3841c6fd3e23SJeff Roberson 		new = false;
3842af32cefdSMark Johnston 	}
3843c6fd3e23SJeff Roberson 
38441431a748SGleb Smirnoff 	CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
38451431a748SGleb Smirnoff 	    zone->uz_name, zone, bucket);
38464bd61e19SJeff Roberson 	if (bucket == NULL) {
3847fc03d22bSJeff Roberson 		critical_enter();
3848beb8beefSJeff Roberson 		return (false);
38494bd61e19SJeff Roberson 	}
38500f9b7bf3SMark Johnston 
3851fc03d22bSJeff Roberson 	/*
3852fc03d22bSJeff Roberson 	 * See if we lost the race or were migrated.  Cache the
3853fc03d22bSJeff Roberson 	 * initialized bucket to make this less likely or claim
3854fc03d22bSJeff Roberson 	 * the memory directly.
3855fc03d22bSJeff Roberson 	 */
38564bd61e19SJeff Roberson 	critical_enter();
3857cc7ce83aSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
3858376b1ba3SJeff Roberson 	if (cache->uc_allocbucket.ucb_bucket == NULL &&
3859c6fd3e23SJeff Roberson 	    ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) == 0 ||
38608c277118SMark Johnston 	    (curdomain = PCPU_GET(domain)) == domain ||
38618c277118SMark Johnston 	    VM_DOMAIN_EMPTY(curdomain))) {
3862c6fd3e23SJeff Roberson 		if (new)
3863c6fd3e23SJeff Roberson 			atomic_add_long(&ZDOM_GET(zone, domain)->uzd_imax,
3864c6fd3e23SJeff Roberson 			    bucket->ub_cnt);
3865376b1ba3SJeff Roberson 		cache_bucket_load_alloc(cache, bucket);
3866beb8beefSJeff Roberson 		return (true);
3867c6fd3e23SJeff Roberson 	}
3868c6fd3e23SJeff Roberson 
3869c6fd3e23SJeff Roberson 	/*
3870c6fd3e23SJeff Roberson 	 * We lost the race, release this bucket and start over.
3871c6fd3e23SJeff Roberson 	 */
3872c6fd3e23SJeff Roberson 	critical_exit();
38732760658bSAlexander Motin 	zone_put_bucket(zone, domain, bucket, udata, !new);
3874c6fd3e23SJeff Roberson 	critical_enter();
3875c6fd3e23SJeff Roberson 
3876beb8beefSJeff Roberson 	return (true);
3877bbee39c6SJeff Roberson }
3878bbee39c6SJeff Roberson 
3879ab3185d1SJeff Roberson void *
uma_zalloc_domain(uma_zone_t zone,void * udata,int domain,int flags)3880ab3185d1SJeff Roberson uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags)
3881bbee39c6SJeff Roberson {
388206d8bdcbSMark Johnston #ifdef NUMA
388306d8bdcbSMark Johnston 	uma_bucket_t bucket;
388406d8bdcbSMark Johnston 	uma_zone_domain_t zdom;
388506d8bdcbSMark Johnston 	void *item;
388606d8bdcbSMark Johnston #endif
3887ab3185d1SJeff Roberson 
3888ab3185d1SJeff Roberson 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
388919fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
3890ab3185d1SJeff Roberson 
3891ab3185d1SJeff Roberson 	/* This is the fast path allocation */
3892e63a1c2fSRyan Libby 	CTR4(KTR_UMA, "uma_zalloc_domain zone %s(%p) domain %d flags %d",
3893e63a1c2fSRyan Libby 	    zone->uz_name, zone, domain, flags);
3894ab3185d1SJeff Roberson 
389506d8bdcbSMark Johnston 	KASSERT((zone->uz_flags & UMA_ZONE_SMR) == 0,
389606d8bdcbSMark Johnston 	    ("uma_zalloc_domain: called with SMR zone."));
389706d8bdcbSMark Johnston #ifdef NUMA
389806d8bdcbSMark Johnston 	KASSERT((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0,
389906d8bdcbSMark Johnston 	    ("uma_zalloc_domain: called with non-FIRSTTOUCH zone."));
3900ab3185d1SJeff Roberson 
390106d8bdcbSMark Johnston 	if (vm_ndomains == 1)
390206d8bdcbSMark Johnston 		return (uma_zalloc_arg(zone, udata, flags));
390306d8bdcbSMark Johnston 
3904490b09f2SEric van Gyzen #ifdef UMA_ZALLOC_DEBUG
3905490b09f2SEric van Gyzen 	if (uma_zalloc_debug(zone, &item, udata, flags) == EJUSTRETURN)
3906490b09f2SEric van Gyzen 		return (item);
3907490b09f2SEric van Gyzen #endif
3908490b09f2SEric van Gyzen 
390906d8bdcbSMark Johnston 	/*
391006d8bdcbSMark Johnston 	 * Try to allocate from the bucket cache before falling back to the keg.
391106d8bdcbSMark Johnston 	 * We could try harder and attempt to allocate from per-CPU caches or
391206d8bdcbSMark Johnston 	 * the per-domain cross-domain buckets, but the complexity is probably
391306d8bdcbSMark Johnston 	 * not worth it.  It is more important that frees of previous
391406d8bdcbSMark Johnston 	 * cross-domain allocations do not blow up the cache.
391506d8bdcbSMark Johnston 	 */
391606d8bdcbSMark Johnston 	zdom = zone_domain_lock(zone, domain);
391706d8bdcbSMark Johnston 	if ((bucket = zone_fetch_bucket(zone, zdom, false)) != NULL) {
391806d8bdcbSMark Johnston 		item = bucket->ub_bucket[bucket->ub_cnt - 1];
391906d8bdcbSMark Johnston #ifdef INVARIANTS
392006d8bdcbSMark Johnston 		bucket->ub_bucket[bucket->ub_cnt - 1] = NULL;
392106d8bdcbSMark Johnston #endif
392206d8bdcbSMark Johnston 		bucket->ub_cnt--;
392306d8bdcbSMark Johnston 		zone_put_bucket(zone, domain, bucket, udata, true);
392406d8bdcbSMark Johnston 		item = item_ctor(zone, zone->uz_flags, zone->uz_size, udata,
392506d8bdcbSMark Johnston 		    flags, item);
392606d8bdcbSMark Johnston 		if (item != NULL) {
392706d8bdcbSMark Johnston 			KASSERT(item_domain(item) == domain,
392806d8bdcbSMark Johnston 			    ("%s: bucket cache item %p from wrong domain",
392906d8bdcbSMark Johnston 			    __func__, item));
393006d8bdcbSMark Johnston 			counter_u64_add(zone->uz_allocs, 1);
393106d8bdcbSMark Johnston 		}
393206d8bdcbSMark Johnston 		return (item);
393306d8bdcbSMark Johnston 	}
393406d8bdcbSMark Johnston 	ZDOM_UNLOCK(zdom);
3935ab3185d1SJeff Roberson 	return (zone_alloc_item(zone, udata, domain, flags));
393606d8bdcbSMark Johnston #else
393706d8bdcbSMark Johnston 	return (uma_zalloc_arg(zone, udata, flags));
393806d8bdcbSMark Johnston #endif
3939ab3185d1SJeff Roberson }
3940ab3185d1SJeff Roberson 
3941ab3185d1SJeff Roberson /*
3942ab3185d1SJeff Roberson  * Find a slab with some space.  Prefer slabs that are partially used over those
3943ab3185d1SJeff Roberson  * that are totally full.  This helps to reduce fragmentation.
3944ab3185d1SJeff Roberson  *
3945ab3185d1SJeff Roberson  * If 'rr' is 1, search all domains starting from 'domain'.  Otherwise check
3946ab3185d1SJeff Roberson  * only 'domain'.
3947ab3185d1SJeff Roberson  */
3948ab3185d1SJeff Roberson static uma_slab_t
keg_first_slab(uma_keg_t keg,int domain,bool rr)3949194a979eSMark Johnston keg_first_slab(uma_keg_t keg, int domain, bool rr)
3950ab3185d1SJeff Roberson {
3951ab3185d1SJeff Roberson 	uma_domain_t dom;
3952bbee39c6SJeff Roberson 	uma_slab_t slab;
3953ab3185d1SJeff Roberson 	int start;
3954ab3185d1SJeff Roberson 
3955ab3185d1SJeff Roberson 	KASSERT(domain >= 0 && domain < vm_ndomains,
3956ab3185d1SJeff Roberson 	    ("keg_first_slab: domain %d out of range", domain));
39578b987a77SJeff Roberson 	KEG_LOCK_ASSERT(keg, domain);
3958ab3185d1SJeff Roberson 
3959ab3185d1SJeff Roberson 	slab = NULL;
3960ab3185d1SJeff Roberson 	start = domain;
3961ab3185d1SJeff Roberson 	do {
3962ab3185d1SJeff Roberson 		dom = &keg->uk_domain[domain];
39634ab3aee8SMark Johnston 		if ((slab = LIST_FIRST(&dom->ud_part_slab)) != NULL)
39644ab3aee8SMark Johnston 			return (slab);
39654ab3aee8SMark Johnston 		if ((slab = LIST_FIRST(&dom->ud_free_slab)) != NULL) {
3966ab3185d1SJeff Roberson 			LIST_REMOVE(slab, us_link);
39674ab3aee8SMark Johnston 			dom->ud_free_slabs--;
3968ab3185d1SJeff Roberson 			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
3969ab3185d1SJeff Roberson 			return (slab);
3970ab3185d1SJeff Roberson 		}
3971ab3185d1SJeff Roberson 		if (rr)
3972ab3185d1SJeff Roberson 			domain = (domain + 1) % vm_ndomains;
3973ab3185d1SJeff Roberson 	} while (domain != start);
3974ab3185d1SJeff Roberson 
3975ab3185d1SJeff Roberson 	return (NULL);
3976ab3185d1SJeff Roberson }
3977ab3185d1SJeff Roberson 
39788b987a77SJeff Roberson /*
39798b987a77SJeff Roberson  * Fetch an existing slab from a free or partial list.  Returns with the
39808b987a77SJeff Roberson  * keg domain lock held if a slab was found or unlocked if not.
39818b987a77SJeff Roberson  */
3982ab3185d1SJeff Roberson static uma_slab_t
keg_fetch_free_slab(uma_keg_t keg,int domain,bool rr,int flags)3983194a979eSMark Johnston keg_fetch_free_slab(uma_keg_t keg, int domain, bool rr, int flags)
3984ab3185d1SJeff Roberson {
39858b987a77SJeff Roberson 	uma_slab_t slab;
3986194a979eSMark Johnston 	uint32_t reserve;
3987099a0e58SBosko Milekic 
39888b987a77SJeff Roberson 	/* HASH has a single free list. */
398954c5ae80SRyan Libby 	if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0)
39908b987a77SJeff Roberson 		domain = 0;
3991194a979eSMark Johnston 
39928b987a77SJeff Roberson 	KEG_LOCK(keg, domain);
3993194a979eSMark Johnston 	reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve;
39944ab3aee8SMark Johnston 	if (keg->uk_domain[domain].ud_free_items <= reserve ||
39958b987a77SJeff Roberson 	    (slab = keg_first_slab(keg, domain, rr)) == NULL) {
39968b987a77SJeff Roberson 		KEG_UNLOCK(keg, domain);
3997194a979eSMark Johnston 		return (NULL);
39988b987a77SJeff Roberson 	}
39998b987a77SJeff Roberson 	return (slab);
4000194a979eSMark Johnston }
4001194a979eSMark Johnston 
4002194a979eSMark Johnston static uma_slab_t
keg_fetch_slab(uma_keg_t keg,uma_zone_t zone,int rdomain,const int flags)4003194a979eSMark Johnston keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, const int flags)
4004194a979eSMark Johnston {
4005194a979eSMark Johnston 	struct vm_domainset_iter di;
4006194a979eSMark Johnston 	uma_slab_t slab;
4007194a979eSMark Johnston 	int aflags, domain;
4008194a979eSMark Johnston 	bool rr;
4009194a979eSMark Johnston 
4010fab343a7SMark Johnston 	KASSERT((flags & (M_WAITOK | M_NOVM)) != (M_WAITOK | M_NOVM),
4011fab343a7SMark Johnston 	    ("%s: invalid flags %#x", __func__, flags));
4012fab343a7SMark Johnston 
4013194a979eSMark Johnston restart:
4014bbee39c6SJeff Roberson 	/*
4015194a979eSMark Johnston 	 * Use the keg's policy if upper layers haven't already specified a
4016194a979eSMark Johnston 	 * domain (as happens with first-touch zones).
4017194a979eSMark Johnston 	 *
4018194a979eSMark Johnston 	 * To avoid races we run the iterator with the keg lock held, but that
4019194a979eSMark Johnston 	 * means that we cannot allow the vm_domainset layer to sleep.  Thus,
4020194a979eSMark Johnston 	 * clear M_WAITOK and handle low memory conditions locally.
4021bbee39c6SJeff Roberson 	 */
4022ab3185d1SJeff Roberson 	rr = rdomain == UMA_ANYDOMAIN;
4023ab3185d1SJeff Roberson 	if (rr) {
4024194a979eSMark Johnston 		aflags = (flags & ~M_WAITOK) | M_NOWAIT;
4025194a979eSMark Johnston 		vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
4026194a979eSMark Johnston 		    &aflags);
4027194a979eSMark Johnston 	} else {
4028194a979eSMark Johnston 		aflags = flags;
4029194a979eSMark Johnston 		domain = rdomain;
4030194a979eSMark Johnston 	}
4031ab3185d1SJeff Roberson 
4032194a979eSMark Johnston 	for (;;) {
4033194a979eSMark Johnston 		slab = keg_fetch_free_slab(keg, domain, rr, flags);
4034584061b4SJeff Roberson 		if (slab != NULL)
4035bbee39c6SJeff Roberson 			return (slab);
4036bbee39c6SJeff Roberson 
4037bbee39c6SJeff Roberson 		/*
4038fab343a7SMark Johnston 		 * M_NOVM is used to break the recursion that can otherwise
4039fab343a7SMark Johnston 		 * occur if low-level memory management routines use UMA.
4040bbee39c6SJeff Roberson 		 */
4041fab343a7SMark Johnston 		if ((flags & M_NOVM) == 0) {
404286220393SMark Johnston 			slab = keg_alloc_slab(keg, zone, domain, flags, aflags);
40438b987a77SJeff Roberson 			if (slab != NULL)
4044bbee39c6SJeff Roberson 				return (slab);
4045fab343a7SMark Johnston 		}
4046fab343a7SMark Johnston 
4047fab343a7SMark Johnston 		if (!rr) {
4048fab343a7SMark Johnston 			if ((flags & M_USE_RESERVE) != 0) {
4049fab343a7SMark Johnston 				/*
4050fab343a7SMark Johnston 				 * Drain reserves from other domains before
4051fab343a7SMark Johnston 				 * giving up or sleeping.  It may be useful to
4052fab343a7SMark Johnston 				 * support per-domain reserves eventually.
4053fab343a7SMark Johnston 				 */
4054fab343a7SMark Johnston 				rdomain = UMA_ANYDOMAIN;
4055fab343a7SMark Johnston 				goto restart;
4056fab343a7SMark Johnston 			}
4057fab343a7SMark Johnston 			if ((flags & M_WAITOK) == 0)
40583639ac42SJeff Roberson 				break;
4059fab343a7SMark Johnston 			vm_wait_domain(domain);
4060fab343a7SMark Johnston 		} else if (vm_domainset_iter_policy(&di, &domain) != 0) {
4061194a979eSMark Johnston 			if ((flags & M_WAITOK) != 0) {
406289d2fb14SKonstantin Belousov 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0);
4063194a979eSMark Johnston 				goto restart;
406430c5525bSAndrew Gallatin 			}
4065194a979eSMark Johnston 			break;
4066194a979eSMark Johnston 		}
4067ab3185d1SJeff Roberson 	}
4068ab3185d1SJeff Roberson 
4069bbee39c6SJeff Roberson 	/*
4070bbee39c6SJeff Roberson 	 * We might not have been able to get a slab but another cpu
4071bbee39c6SJeff Roberson 	 * could have while we were unlocked.  Check again before we
4072bbee39c6SJeff Roberson 	 * fail.
4073bbee39c6SJeff Roberson 	 */
40748b987a77SJeff Roberson 	if ((slab = keg_fetch_free_slab(keg, domain, rr, flags)) != NULL)
4075bbee39c6SJeff Roberson 		return (slab);
40768b987a77SJeff Roberson 
4077ab3185d1SJeff Roberson 	return (NULL);
4078ab3185d1SJeff Roberson }
4079bbee39c6SJeff Roberson 
4080d56368d7SBosko Milekic static void *
slab_alloc_item(uma_keg_t keg,uma_slab_t slab)40810095a784SJeff Roberson slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
4082bbee39c6SJeff Roberson {
4083ab3185d1SJeff Roberson 	uma_domain_t dom;
4084bbee39c6SJeff Roberson 	void *item;
40859b8db4d0SRyan Libby 	int freei;
4086bbee39c6SJeff Roberson 
40878b987a77SJeff Roberson 	KEG_LOCK_ASSERT(keg, slab->us_domain);
4088099a0e58SBosko Milekic 
40898b987a77SJeff Roberson 	dom = &keg->uk_domain[slab->us_domain];
40909b78b1f4SJeff Roberson 	freei = BIT_FFS(keg->uk_ipers, &slab->us_free) - 1;
40919b78b1f4SJeff Roberson 	BIT_CLR(keg->uk_ipers, freei, &slab->us_free);
40921e0701e1SJeff Roberson 	item = slab_item(slab, keg, freei);
4093bbee39c6SJeff Roberson 	slab->us_freecount--;
40944ab3aee8SMark Johnston 	dom->ud_free_items--;
4095ef72505eSJeff Roberson 
40964ab3aee8SMark Johnston 	/*
40974ab3aee8SMark Johnston 	 * Move this slab to the full list.  It must be on the partial list, so
40984ab3aee8SMark Johnston 	 * we do not need to update the free slab count.  In particular,
40994ab3aee8SMark Johnston 	 * keg_fetch_slab() always returns slabs on the partial list.
41004ab3aee8SMark Johnston 	 */
4101bbee39c6SJeff Roberson 	if (slab->us_freecount == 0) {
4102bbee39c6SJeff Roberson 		LIST_REMOVE(slab, us_link);
4103ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link);
4104bbee39c6SJeff Roberson 	}
4105bbee39c6SJeff Roberson 
4106bbee39c6SJeff Roberson 	return (item);
4107bbee39c6SJeff Roberson }
4108bbee39c6SJeff Roberson 
4109bbee39c6SJeff Roberson static int
zone_import(void * arg,void ** bucket,int max,int domain,int flags)4110b75c4efcSAndrew Turner zone_import(void *arg, void **bucket, int max, int domain, int flags)
41110095a784SJeff Roberson {
41128b987a77SJeff Roberson 	uma_domain_t dom;
4113b75c4efcSAndrew Turner 	uma_zone_t zone;
41140095a784SJeff Roberson 	uma_slab_t slab;
41150095a784SJeff Roberson 	uma_keg_t keg;
4116a03af342SSean Bruno #ifdef NUMA
4117ab3185d1SJeff Roberson 	int stripe;
4118a03af342SSean Bruno #endif
41190095a784SJeff Roberson 	int i;
41200095a784SJeff Roberson 
4121b75c4efcSAndrew Turner 	zone = arg;
41220095a784SJeff Roberson 	slab = NULL;
4123584061b4SJeff Roberson 	keg = zone->uz_keg;
4124af526374SJeff Roberson 	/* Try to keep the buckets totally full */
41250095a784SJeff Roberson 	for (i = 0; i < max; ) {
4126584061b4SJeff Roberson 		if ((slab = keg_fetch_slab(keg, zone, domain, flags)) == NULL)
41270095a784SJeff Roberson 			break;
4128a03af342SSean Bruno #ifdef NUMA
4129ab3185d1SJeff Roberson 		stripe = howmany(max, vm_ndomains);
4130a03af342SSean Bruno #endif
41318b987a77SJeff Roberson 		dom = &keg->uk_domain[slab->us_domain];
41321b2dcc8cSMark Johnston 		do {
41330095a784SJeff Roberson 			bucket[i++] = slab_alloc_item(keg, slab);
41347585c5dbSMark Johnston 			if (keg->uk_reserve > 0 &&
41357585c5dbSMark Johnston 			    dom->ud_free_items <= keg->uk_reserve) {
41361b2dcc8cSMark Johnston 				/*
41371b2dcc8cSMark Johnston 				 * Avoid depleting the reserve after a
41381b2dcc8cSMark Johnston 				 * successful item allocation, even if
41391b2dcc8cSMark Johnston 				 * M_USE_RESERVE is specified.
41401b2dcc8cSMark Johnston 				 */
41411b2dcc8cSMark Johnston 				KEG_UNLOCK(keg, slab->us_domain);
41421b2dcc8cSMark Johnston 				goto out;
41431b2dcc8cSMark Johnston 			}
4144b6715dabSJeff Roberson #ifdef NUMA
4145ab3185d1SJeff Roberson 			/*
4146ab3185d1SJeff Roberson 			 * If the zone is striped we pick a new slab for every
4147ab3185d1SJeff Roberson 			 * N allocations.  Eliminating this conditional will
4148ab3185d1SJeff Roberson 			 * instead pick a new domain for each bucket rather
4149ab3185d1SJeff Roberson 			 * than stripe within each bucket.  The current option
4150ab3185d1SJeff Roberson 			 * produces more fragmentation and requires more cpu
4151ab3185d1SJeff Roberson 			 * time but yields better distribution.
4152ab3185d1SJeff Roberson 			 */
4153dfe13344SJeff Roberson 			if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0 &&
4154ab3185d1SJeff Roberson 			    vm_ndomains > 1 && --stripe == 0)
4155ab3185d1SJeff Roberson 				break;
4156ab3185d1SJeff Roberson #endif
41571b2dcc8cSMark Johnston 		} while (slab->us_freecount != 0 && i < max);
41588b987a77SJeff Roberson 		KEG_UNLOCK(keg, slab->us_domain);
41591b2dcc8cSMark Johnston 
4160ab3185d1SJeff Roberson 		/* Don't block if we allocated any successfully. */
41610095a784SJeff Roberson 		flags &= ~M_WAITOK;
41620095a784SJeff Roberson 		flags |= M_NOWAIT;
41630095a784SJeff Roberson 	}
41641b2dcc8cSMark Johnston out:
41650095a784SJeff Roberson 	return i;
41660095a784SJeff Roberson }
41670095a784SJeff Roberson 
41684bd61e19SJeff Roberson static int
zone_alloc_limit_hard(uma_zone_t zone,int count,int flags)41694bd61e19SJeff Roberson zone_alloc_limit_hard(uma_zone_t zone, int count, int flags)
41704bd61e19SJeff Roberson {
41714bd61e19SJeff Roberson 	uint64_t old, new, total, max;
41724bd61e19SJeff Roberson 
41734bd61e19SJeff Roberson 	/*
41744bd61e19SJeff Roberson 	 * The hard case.  We're going to sleep because there were existing
41754bd61e19SJeff Roberson 	 * sleepers or because we ran out of items.  This routine enforces
41764bd61e19SJeff Roberson 	 * fairness by keeping fifo order.
41774bd61e19SJeff Roberson 	 *
41784bd61e19SJeff Roberson 	 * First release our ill gotten gains and make some noise.
41794bd61e19SJeff Roberson 	 */
41804bd61e19SJeff Roberson 	for (;;) {
41814bd61e19SJeff Roberson 		zone_free_limit(zone, count);
41824bd61e19SJeff Roberson 		zone_log_warning(zone);
41834bd61e19SJeff Roberson 		zone_maxaction(zone);
41844bd61e19SJeff Roberson 		if (flags & M_NOWAIT)
41854bd61e19SJeff Roberson 			return (0);
41864bd61e19SJeff Roberson 
41874bd61e19SJeff Roberson 		/*
41884bd61e19SJeff Roberson 		 * We need to allocate an item or set ourself as a sleeper
41894bd61e19SJeff Roberson 		 * while the sleepq lock is held to avoid wakeup races.  This
41904bd61e19SJeff Roberson 		 * is essentially a home rolled semaphore.
41914bd61e19SJeff Roberson 		 */
41924bd61e19SJeff Roberson 		sleepq_lock(&zone->uz_max_items);
41934bd61e19SJeff Roberson 		old = zone->uz_items;
41944bd61e19SJeff Roberson 		do {
41954bd61e19SJeff Roberson 			MPASS(UZ_ITEMS_SLEEPERS(old) < UZ_ITEMS_SLEEPERS_MAX);
41964bd61e19SJeff Roberson 			/* Cache the max since we will evaluate twice. */
41974bd61e19SJeff Roberson 			max = zone->uz_max_items;
41984bd61e19SJeff Roberson 			if (UZ_ITEMS_SLEEPERS(old) != 0 ||
41994bd61e19SJeff Roberson 			    UZ_ITEMS_COUNT(old) >= max)
42004bd61e19SJeff Roberson 				new = old + UZ_ITEMS_SLEEPER;
42014bd61e19SJeff Roberson 			else
42024bd61e19SJeff Roberson 				new = old + MIN(count, max - old);
42034bd61e19SJeff Roberson 		} while (atomic_fcmpset_64(&zone->uz_items, &old, new) == 0);
42044bd61e19SJeff Roberson 
42054bd61e19SJeff Roberson 		/* We may have successfully allocated under the sleepq lock. */
42064bd61e19SJeff Roberson 		if (UZ_ITEMS_SLEEPERS(new) == 0) {
42074bd61e19SJeff Roberson 			sleepq_release(&zone->uz_max_items);
42084bd61e19SJeff Roberson 			return (new - old);
42094bd61e19SJeff Roberson 		}
42104bd61e19SJeff Roberson 
42114bd61e19SJeff Roberson 		/*
42124bd61e19SJeff Roberson 		 * This is in a different cacheline from uz_items so that we
42134bd61e19SJeff Roberson 		 * don't constantly invalidate the fastpath cacheline when we
42144bd61e19SJeff Roberson 		 * adjust item counts.  This could be limited to toggling on
42154bd61e19SJeff Roberson 		 * transitions.
42164bd61e19SJeff Roberson 		 */
42174bd61e19SJeff Roberson 		atomic_add_32(&zone->uz_sleepers, 1);
42184bd61e19SJeff Roberson 		atomic_add_64(&zone->uz_sleeps, 1);
42194bd61e19SJeff Roberson 
42204bd61e19SJeff Roberson 		/*
42214bd61e19SJeff Roberson 		 * We have added ourselves as a sleeper.  The sleepq lock
42224bd61e19SJeff Roberson 		 * protects us from wakeup races.  Sleep now and then retry.
42234bd61e19SJeff Roberson 		 */
42244bd61e19SJeff Roberson 		sleepq_add(&zone->uz_max_items, NULL, "zonelimit", 0, 0);
42254bd61e19SJeff Roberson 		sleepq_wait(&zone->uz_max_items, PVM);
42264bd61e19SJeff Roberson 
42274bd61e19SJeff Roberson 		/*
42284bd61e19SJeff Roberson 		 * After wakeup, remove ourselves as a sleeper and try
42294bd61e19SJeff Roberson 		 * again.  We no longer have the sleepq lock for protection.
42304bd61e19SJeff Roberson 		 *
42314bd61e19SJeff Roberson 		 * Subract ourselves as a sleeper while attempting to add
42324bd61e19SJeff Roberson 		 * our count.
42334bd61e19SJeff Roberson 		 */
42344bd61e19SJeff Roberson 		atomic_subtract_32(&zone->uz_sleepers, 1);
42354bd61e19SJeff Roberson 		old = atomic_fetchadd_64(&zone->uz_items,
42364bd61e19SJeff Roberson 		    -(UZ_ITEMS_SLEEPER - count));
42374bd61e19SJeff Roberson 		/* We're no longer a sleeper. */
42384bd61e19SJeff Roberson 		old -= UZ_ITEMS_SLEEPER;
42394bd61e19SJeff Roberson 
42404bd61e19SJeff Roberson 		/*
42414bd61e19SJeff Roberson 		 * If we're still at the limit, restart.  Notably do not
42424bd61e19SJeff Roberson 		 * block on other sleepers.  Cache the max value to protect
42434bd61e19SJeff Roberson 		 * against changes via sysctl.
42444bd61e19SJeff Roberson 		 */
42454bd61e19SJeff Roberson 		total = UZ_ITEMS_COUNT(old);
42464bd61e19SJeff Roberson 		max = zone->uz_max_items;
42474bd61e19SJeff Roberson 		if (total >= max)
42484bd61e19SJeff Roberson 			continue;
42494bd61e19SJeff Roberson 		/* Truncate if necessary, otherwise wake other sleepers. */
42504bd61e19SJeff Roberson 		if (total + count > max) {
42514bd61e19SJeff Roberson 			zone_free_limit(zone, total + count - max);
42524bd61e19SJeff Roberson 			count = max - total;
42534bd61e19SJeff Roberson 		} else if (total + count < max && UZ_ITEMS_SLEEPERS(old) != 0)
42544bd61e19SJeff Roberson 			wakeup_one(&zone->uz_max_items);
42554bd61e19SJeff Roberson 
42564bd61e19SJeff Roberson 		return (count);
42574bd61e19SJeff Roberson 	}
42584bd61e19SJeff Roberson }
42594bd61e19SJeff Roberson 
42604bd61e19SJeff Roberson /*
42614bd61e19SJeff Roberson  * Allocate 'count' items from our max_items limit.  Returns the number
42624bd61e19SJeff Roberson  * available.  If M_NOWAIT is not specified it will sleep until at least
42634bd61e19SJeff Roberson  * one item can be allocated.
42644bd61e19SJeff Roberson  */
42654bd61e19SJeff Roberson static int
zone_alloc_limit(uma_zone_t zone,int count,int flags)42664bd61e19SJeff Roberson zone_alloc_limit(uma_zone_t zone, int count, int flags)
42674bd61e19SJeff Roberson {
42684bd61e19SJeff Roberson 	uint64_t old;
42694bd61e19SJeff Roberson 	uint64_t max;
42704bd61e19SJeff Roberson 
42714bd61e19SJeff Roberson 	max = zone->uz_max_items;
42724bd61e19SJeff Roberson 	MPASS(max > 0);
42734bd61e19SJeff Roberson 
42744bd61e19SJeff Roberson 	/*
42754bd61e19SJeff Roberson 	 * We expect normal allocations to succeed with a simple
42764bd61e19SJeff Roberson 	 * fetchadd.
42774bd61e19SJeff Roberson 	 */
42784bd61e19SJeff Roberson 	old = atomic_fetchadd_64(&zone->uz_items, count);
42794bd61e19SJeff Roberson 	if (__predict_true(old + count <= max))
42804bd61e19SJeff Roberson 		return (count);
42814bd61e19SJeff Roberson 
42824bd61e19SJeff Roberson 	/*
42834bd61e19SJeff Roberson 	 * If we had some items and no sleepers just return the
42844bd61e19SJeff Roberson 	 * truncated value.  We have to release the excess space
42854bd61e19SJeff Roberson 	 * though because that may wake sleepers who weren't woken
42864bd61e19SJeff Roberson 	 * because we were temporarily over the limit.
42874bd61e19SJeff Roberson 	 */
42884bd61e19SJeff Roberson 	if (old < max) {
42894bd61e19SJeff Roberson 		zone_free_limit(zone, (old + count) - max);
42904bd61e19SJeff Roberson 		return (max - old);
42914bd61e19SJeff Roberson 	}
42924bd61e19SJeff Roberson 	return (zone_alloc_limit_hard(zone, count, flags));
42934bd61e19SJeff Roberson }
42944bd61e19SJeff Roberson 
42954bd61e19SJeff Roberson /*
42964bd61e19SJeff Roberson  * Free a number of items back to the limit.
42974bd61e19SJeff Roberson  */
42984bd61e19SJeff Roberson static void
zone_free_limit(uma_zone_t zone,int count)42994bd61e19SJeff Roberson zone_free_limit(uma_zone_t zone, int count)
43004bd61e19SJeff Roberson {
43014bd61e19SJeff Roberson 	uint64_t old;
43024bd61e19SJeff Roberson 
43034bd61e19SJeff Roberson 	MPASS(count > 0);
43044bd61e19SJeff Roberson 
43054bd61e19SJeff Roberson 	/*
43064bd61e19SJeff Roberson 	 * In the common case we either have no sleepers or
43074bd61e19SJeff Roberson 	 * are still over the limit and can just return.
43084bd61e19SJeff Roberson 	 */
43094bd61e19SJeff Roberson 	old = atomic_fetchadd_64(&zone->uz_items, -count);
43104bd61e19SJeff Roberson 	if (__predict_true(UZ_ITEMS_SLEEPERS(old) == 0 ||
43114bd61e19SJeff Roberson 	   UZ_ITEMS_COUNT(old) - count >= zone->uz_max_items))
43124bd61e19SJeff Roberson 		return;
43134bd61e19SJeff Roberson 
43144bd61e19SJeff Roberson 	/*
43154bd61e19SJeff Roberson 	 * Moderate the rate of wakeups.  Sleepers will continue
43164bd61e19SJeff Roberson 	 * to generate wakeups if necessary.
43174bd61e19SJeff Roberson 	 */
43184bd61e19SJeff Roberson 	wakeup_one(&zone->uz_max_items);
43194bd61e19SJeff Roberson }
43204bd61e19SJeff Roberson 
4321fc03d22bSJeff Roberson static uma_bucket_t
zone_alloc_bucket(uma_zone_t zone,void * udata,int domain,int flags)4322beb8beefSJeff Roberson zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
4323bbee39c6SJeff Roberson {
4324bbee39c6SJeff Roberson 	uma_bucket_t bucket;
432509c8cb71SMark Johnston 	int error, maxbucket, cnt;
4326bbee39c6SJeff Roberson 
4327e63a1c2fSRyan Libby 	CTR3(KTR_UMA, "zone_alloc_bucket zone %s(%p) domain %d", zone->uz_name,
4328e63a1c2fSRyan Libby 	    zone, domain);
432930c5525bSAndrew Gallatin 
4330c1685086SJeff Roberson 	/* Avoid allocs targeting empty domains. */
4331c1685086SJeff Roberson 	if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
4332c1685086SJeff Roberson 		domain = UMA_ANYDOMAIN;
43338c277118SMark Johnston 	else if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0)
4334c6fd3e23SJeff Roberson 		domain = UMA_ANYDOMAIN;
4335c1685086SJeff Roberson 
43364bd61e19SJeff Roberson 	if (zone->uz_max_items > 0)
43374bd61e19SJeff Roberson 		maxbucket = zone_alloc_limit(zone, zone->uz_bucket_size,
43384bd61e19SJeff Roberson 		    M_NOWAIT);
43394bd61e19SJeff Roberson 	else
434020a4e154SJeff Roberson 		maxbucket = zone->uz_bucket_size;
43414bd61e19SJeff Roberson 	if (maxbucket == 0)
434254361f90SMark Johnston 		return (NULL);
4343beb8beefSJeff Roberson 
43446fd34d6fSJeff Roberson 	/* Don't wait for buckets, preserve caller's NOVM setting. */
43456fd34d6fSJeff Roberson 	bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
4346beb8beefSJeff Roberson 	if (bucket == NULL) {
4347beb8beefSJeff Roberson 		cnt = 0;
4348beb8beefSJeff Roberson 		goto out;
4349beb8beefSJeff Roberson 	}
43500095a784SJeff Roberson 
43510095a784SJeff Roberson 	bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
4352beb8beefSJeff Roberson 	    MIN(maxbucket, bucket->ub_entries), domain, flags);
43530095a784SJeff Roberson 
43540095a784SJeff Roberson 	/*
43550095a784SJeff Roberson 	 * Initialize the memory if necessary.
43560095a784SJeff Roberson 	 */
43570095a784SJeff Roberson 	if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
4358099a0e58SBosko Milekic 		int i;
4359bbee39c6SJeff Roberson 
436009c8cb71SMark Johnston 		for (i = 0; i < bucket->ub_cnt; i++) {
436109c8cb71SMark Johnston 			kasan_mark_item_valid(zone, bucket->ub_bucket[i]);
436209c8cb71SMark Johnston 			error = zone->uz_init(bucket->ub_bucket[i],
436309c8cb71SMark Johnston 			    zone->uz_size, flags);
436409c8cb71SMark Johnston 			kasan_mark_item_invalid(zone, bucket->ub_bucket[i]);
436509c8cb71SMark Johnston 			if (error != 0)
4366b23f72e9SBrian Feldman 				break;
436709c8cb71SMark Johnston 		}
436809c8cb71SMark Johnston 
4369b23f72e9SBrian Feldman 		/*
4370b23f72e9SBrian Feldman 		 * If we couldn't initialize the whole bucket, put the
4371b23f72e9SBrian Feldman 		 * rest back onto the freelist.
4372b23f72e9SBrian Feldman 		 */
4373b23f72e9SBrian Feldman 		if (i != bucket->ub_cnt) {
4374af526374SJeff Roberson 			zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
43750095a784SJeff Roberson 			    bucket->ub_cnt - i);
4376a5a262c6SBosko Milekic #ifdef INVARIANTS
43770095a784SJeff Roberson 			bzero(&bucket->ub_bucket[i],
43780095a784SJeff Roberson 			    sizeof(void *) * (bucket->ub_cnt - i));
4379a5a262c6SBosko Milekic #endif
4380b23f72e9SBrian Feldman 			bucket->ub_cnt = i;
4381b23f72e9SBrian Feldman 		}
4382099a0e58SBosko Milekic 	}
4383099a0e58SBosko Milekic 
4384beb8beefSJeff Roberson 	cnt = bucket->ub_cnt;
4385f7104ccdSAlexander Motin 	if (bucket->ub_cnt == 0) {
43866fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
43872efcc8cbSGleb Smirnoff 		counter_u64_add(zone->uz_fails, 1);
4388beb8beefSJeff Roberson 		bucket = NULL;
4389beb8beefSJeff Roberson 	}
4390beb8beefSJeff Roberson out:
43914bd61e19SJeff Roberson 	if (zone->uz_max_items > 0 && cnt < maxbucket)
43924bd61e19SJeff Roberson 		zone_free_limit(zone, maxbucket - cnt);
4393fc03d22bSJeff Roberson 
4394fc03d22bSJeff Roberson 	return (bucket);
4395fc03d22bSJeff Roberson }
4396fc03d22bSJeff Roberson 
43978355f576SJeff Roberson /*
43980095a784SJeff Roberson  * Allocates a single item from a zone.
43998355f576SJeff Roberson  *
44008355f576SJeff Roberson  * Arguments
44018355f576SJeff Roberson  *	zone   The zone to alloc for.
44028355f576SJeff Roberson  *	udata  The data to be passed to the constructor.
4403ab3185d1SJeff Roberson  *	domain The domain to allocate from or UMA_ANYDOMAIN.
4404a163d034SWarner Losh  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
44058355f576SJeff Roberson  *
44068355f576SJeff Roberson  * Returns
44078355f576SJeff Roberson  *	NULL if there is no memory and M_NOWAIT is set
4408bbee39c6SJeff Roberson  *	An item if successful
44098355f576SJeff Roberson  */
44108355f576SJeff Roberson 
44118355f576SJeff Roberson static void *
zone_alloc_item(uma_zone_t zone,void * udata,int domain,int flags)4412ab3185d1SJeff Roberson zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
44138355f576SJeff Roberson {
44148355f576SJeff Roberson 	void *item;
44158355f576SJeff Roberson 
4416791dda87SAndrew Gallatin 	if (zone->uz_max_items > 0 && zone_alloc_limit(zone, 1, flags) == 0) {
4417791dda87SAndrew Gallatin 		counter_u64_add(zone->uz_fails, 1);
4418bb15d1c7SGleb Smirnoff 		return (NULL);
4419791dda87SAndrew Gallatin 	}
44208355f576SJeff Roberson 
4421c1685086SJeff Roberson 	/* Avoid allocs targeting empty domains. */
4422c1685086SJeff Roberson 	if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
442330c5525bSAndrew Gallatin 		domain = UMA_ANYDOMAIN;
4424c1685086SJeff Roberson 
4425ab3185d1SJeff Roberson 	if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
4426beb8beefSJeff Roberson 		goto fail_cnt;
44278355f576SJeff Roberson 
4428099a0e58SBosko Milekic 	/*
4429099a0e58SBosko Milekic 	 * We have to call both the zone's init (not the keg's init)
4430099a0e58SBosko Milekic 	 * and the zone's ctor.  This is because the item is going from
4431099a0e58SBosko Milekic 	 * a keg slab directly to the user, and the user is expecting it
4432099a0e58SBosko Milekic 	 * to be both zone-init'd as well as zone-ctor'd.
4433099a0e58SBosko Milekic 	 */
4434b23f72e9SBrian Feldman 	if (zone->uz_init != NULL) {
443509c8cb71SMark Johnston 		int error;
443609c8cb71SMark Johnston 
443709c8cb71SMark Johnston 		kasan_mark_item_valid(zone, item);
443809c8cb71SMark Johnston 		error = zone->uz_init(item, zone->uz_size, flags);
443909c8cb71SMark Johnston 		kasan_mark_item_invalid(zone, item);
444009c8cb71SMark Johnston 		if (error != 0) {
4441bb15d1c7SGleb Smirnoff 			zone_free_item(zone, item, udata, SKIP_FINI | SKIP_CNT);
4442beb8beefSJeff Roberson 			goto fail_cnt;
4443beb8beefSJeff Roberson 		}
4444beb8beefSJeff Roberson 	}
4445d4665eaaSJeff Roberson 	item = item_ctor(zone, zone->uz_flags, zone->uz_size, udata, flags,
4446d4665eaaSJeff Roberson 	    item);
4447beb8beefSJeff Roberson 	if (item == NULL)
44480095a784SJeff Roberson 		goto fail;
44498355f576SJeff Roberson 
44502efcc8cbSGleb Smirnoff 	counter_u64_add(zone->uz_allocs, 1);
44511431a748SGleb Smirnoff 	CTR3(KTR_UMA, "zone_alloc_item item %p from %s(%p)", item,
44521431a748SGleb Smirnoff 	    zone->uz_name, zone);
44531431a748SGleb Smirnoff 
44548355f576SJeff Roberson 	return (item);
44550095a784SJeff Roberson 
4456beb8beefSJeff Roberson fail_cnt:
4457beb8beefSJeff Roberson 	counter_u64_add(zone->uz_fails, 1);
44580095a784SJeff Roberson fail:
44594bd61e19SJeff Roberson 	if (zone->uz_max_items > 0)
44604bd61e19SJeff Roberson 		zone_free_limit(zone, 1);
44611431a748SGleb Smirnoff 	CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)",
44621431a748SGleb Smirnoff 	    zone->uz_name, zone);
44634bd61e19SJeff Roberson 
44640095a784SJeff Roberson 	return (NULL);
44658355f576SJeff Roberson }
44668355f576SJeff Roberson 
44678355f576SJeff Roberson /* See uma.h */
44688355f576SJeff Roberson void
uma_zfree_smr(uma_zone_t zone,void * item)4469d4665eaaSJeff Roberson uma_zfree_smr(uma_zone_t zone, void *item)
4470d4665eaaSJeff Roberson {
4471d4665eaaSJeff Roberson 	uma_cache_t cache;
4472d4665eaaSJeff Roberson 	uma_cache_bucket_t bucket;
4473a7e1a585SJohn Baldwin 	int itemdomain;
4474a7e1a585SJohn Baldwin #ifdef NUMA
4475a7e1a585SJohn Baldwin 	int uz_flags;
4476a7e1a585SJohn Baldwin #endif
4477d4665eaaSJeff Roberson 
4478841e0a87SGleb Smirnoff 	CTR3(KTR_UMA, "uma_zfree_smr zone %s(%p) item %p",
4479841e0a87SGleb Smirnoff 	    zone->uz_name, zone, item);
4480841e0a87SGleb Smirnoff 
4481d4665eaaSJeff Roberson #ifdef UMA_ZALLOC_DEBUG
4482d4665eaaSJeff Roberson 	KASSERT((zone->uz_flags & UMA_ZONE_SMR) != 0,
4483952c8964SMark Johnston 	    ("uma_zfree_smr: called with non-SMR zone."));
4484d4665eaaSJeff Roberson 	KASSERT(item != NULL, ("uma_zfree_smr: Called with NULL pointer."));
4485c6fd3e23SJeff Roberson 	SMR_ASSERT_NOT_ENTERED(zone->uz_smr);
4486d4665eaaSJeff Roberson 	if (uma_zfree_debug(zone, item, NULL) == EJUSTRETURN)
4487d4665eaaSJeff Roberson 		return;
4488d4665eaaSJeff Roberson #endif
4489d4665eaaSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
4490c6fd3e23SJeff Roberson 	itemdomain = 0;
4491d4665eaaSJeff Roberson #ifdef NUMA
4492a7e1a585SJohn Baldwin 	uz_flags = cache_uz_flags(cache);
4493d4665eaaSJeff Roberson 	if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
449481302f1dSMark Johnston 		itemdomain = item_domain(item);
4495d4665eaaSJeff Roberson #endif
4496d4665eaaSJeff Roberson 	critical_enter();
4497d4665eaaSJeff Roberson 	do {
4498d4665eaaSJeff Roberson 		cache = &zone->uz_cpu[curcpu];
4499d4665eaaSJeff Roberson 		/* SMR Zones must free to the free bucket. */
4500d4665eaaSJeff Roberson 		bucket = &cache->uc_freebucket;
4501d4665eaaSJeff Roberson #ifdef NUMA
4502d4665eaaSJeff Roberson 		if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 &&
4503c6fd3e23SJeff Roberson 		    PCPU_GET(domain) != itemdomain) {
4504d4665eaaSJeff Roberson 			bucket = &cache->uc_crossbucket;
4505d4665eaaSJeff Roberson 		}
4506d4665eaaSJeff Roberson #endif
4507d4665eaaSJeff Roberson 		if (__predict_true(bucket->ucb_cnt < bucket->ucb_entries)) {
4508d4665eaaSJeff Roberson 			cache_bucket_push(cache, bucket, item);
4509d4665eaaSJeff Roberson 			critical_exit();
4510d4665eaaSJeff Roberson 			return;
4511d4665eaaSJeff Roberson 		}
45122cb67bd7SGleb Smirnoff 	} while (cache_free(zone, cache, NULL, itemdomain));
4513d4665eaaSJeff Roberson 	critical_exit();
4514d4665eaaSJeff Roberson 
4515d4665eaaSJeff Roberson 	/*
4516d4665eaaSJeff Roberson 	 * If nothing else caught this, we'll just do an internal free.
4517d4665eaaSJeff Roberson 	 */
4518d4665eaaSJeff Roberson 	zone_free_item(zone, item, NULL, SKIP_NONE);
4519d4665eaaSJeff Roberson }
4520d4665eaaSJeff Roberson 
4521d4665eaaSJeff Roberson /* See uma.h */
4522d4665eaaSJeff Roberson void
uma_zfree_arg(uma_zone_t zone,void * item,void * udata)45238355f576SJeff Roberson uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
45248355f576SJeff Roberson {
45258355f576SJeff Roberson 	uma_cache_t cache;
4526376b1ba3SJeff Roberson 	uma_cache_bucket_t bucket;
4527c6fd3e23SJeff Roberson 	int itemdomain, uz_flags;
45288355f576SJeff Roberson 
4529e866d8f0SMark Murray 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
453019fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
453110cb2424SMark Murray 
453228782f73SGleb Smirnoff 	CTR3(KTR_UMA, "uma_zfree_arg zone %s(%p) item %p",
453328782f73SGleb Smirnoff 	    zone->uz_name, zone, item);
45343659f747SRobert Watson 
4535d4665eaaSJeff Roberson #ifdef UMA_ZALLOC_DEBUG
4536d4665eaaSJeff Roberson 	KASSERT((zone->uz_flags & UMA_ZONE_SMR) == 0,
4537952c8964SMark Johnston 	    ("uma_zfree_arg: called with SMR zone."));
4538d4665eaaSJeff Roberson 	if (uma_zfree_debug(zone, item, udata) == EJUSTRETURN)
4539d4665eaaSJeff Roberson 		return;
4540d4665eaaSJeff Roberson #endif
454120ed0cb0SMatthew D Fleming         /* uma_zfree(..., NULL) does nothing, to match free(9). */
454220ed0cb0SMatthew D Fleming         if (item == NULL)
454320ed0cb0SMatthew D Fleming                 return;
4544cc7ce83aSJeff Roberson 
4545cc7ce83aSJeff Roberson 	/*
4546cc7ce83aSJeff Roberson 	 * We are accessing the per-cpu cache without a critical section to
4547cc7ce83aSJeff Roberson 	 * fetch size and flags.  This is acceptable, if we are preempted we
4548cc7ce83aSJeff Roberson 	 * will simply read another cpu's line.
4549cc7ce83aSJeff Roberson 	 */
4550cc7ce83aSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
4551cc7ce83aSJeff Roberson 	uz_flags = cache_uz_flags(cache);
4552d4665eaaSJeff Roberson 	if (UMA_ALWAYS_CTORDTOR ||
4553d4665eaaSJeff Roberson 	    __predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0))
4554cc7ce83aSJeff Roberson 		item_dtor(zone, item, cache_uz_size(cache), udata, SKIP_NONE);
4555ef72505eSJeff Roberson 
4556af7f9b97SJeff Roberson 	/*
4557af7f9b97SJeff Roberson 	 * The race here is acceptable.  If we miss it we'll just have to wait
4558af7f9b97SJeff Roberson 	 * a little longer for the limits to be reset.
4559af7f9b97SJeff Roberson 	 */
4560cc7ce83aSJeff Roberson 	if (__predict_false(uz_flags & UMA_ZFLAG_LIMIT)) {
45618a6776caSMark Johnston 		if (atomic_load_32(&zone->uz_sleepers) > 0)
4562fc03d22bSJeff Roberson 			goto zfree_item;
4563cc7ce83aSJeff Roberson 	}
4564af7f9b97SJeff Roberson 
45655d1ae027SRobert Watson 	/*
45665d1ae027SRobert Watson 	 * If possible, free to the per-CPU cache.  There are two
45675d1ae027SRobert Watson 	 * requirements for safe access to the per-CPU cache: (1) the thread
45685d1ae027SRobert Watson 	 * accessing the cache must not be preempted or yield during access,
45695d1ae027SRobert Watson 	 * and (2) the thread must not migrate CPUs without switching which
45705d1ae027SRobert Watson 	 * cache it accesses.  We rely on a critical section to prevent
45715d1ae027SRobert Watson 	 * preemption and migration.  We release the critical section in
45725d1ae027SRobert Watson 	 * order to acquire the zone mutex if we are unable to free to the
45735d1ae027SRobert Watson 	 * current cache; when we re-acquire the critical section, we must
45745d1ae027SRobert Watson 	 * detect and handle migration if it has occurred.
45755d1ae027SRobert Watson 	 */
4576c6fd3e23SJeff Roberson 	itemdomain = 0;
4577dfe13344SJeff Roberson #ifdef NUMA
4578dfe13344SJeff Roberson 	if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
457981302f1dSMark Johnston 		itemdomain = item_domain(item);
4580dfe13344SJeff Roberson #endif
45815d1ae027SRobert Watson 	critical_enter();
45820a81b439SJeff Roberson 	do {
4583cc7ce83aSJeff Roberson 		cache = &zone->uz_cpu[curcpu];
4584a553d4b8SJeff Roberson 		/*
4585dfe13344SJeff Roberson 		 * Try to free into the allocbucket first to give LIFO
4586dfe13344SJeff Roberson 		 * ordering for cache-hot datastructures.  Spill over
4587dfe13344SJeff Roberson 		 * into the freebucket if necessary.  Alloc will swap
4588dfe13344SJeff Roberson 		 * them if one runs dry.
4589a553d4b8SJeff Roberson 		 */
4590dfe13344SJeff Roberson 		bucket = &cache->uc_allocbucket;
4591d4665eaaSJeff Roberson #ifdef NUMA
4592d4665eaaSJeff Roberson 		if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 &&
4593c6fd3e23SJeff Roberson 		    PCPU_GET(domain) != itemdomain) {
4594d4665eaaSJeff Roberson 			bucket = &cache->uc_crossbucket;
4595d4665eaaSJeff Roberson 		} else
4596d4665eaaSJeff Roberson #endif
4597fe835cbfSJeff Roberson 		if (bucket->ucb_cnt == bucket->ucb_entries &&
4598fe835cbfSJeff Roberson 		   cache->uc_freebucket.ucb_cnt <
4599fe835cbfSJeff Roberson 		   cache->uc_freebucket.ucb_entries)
4600fe835cbfSJeff Roberson 			cache_bucket_swap(&cache->uc_freebucket,
4601fe835cbfSJeff Roberson 			    &cache->uc_allocbucket);
4602376b1ba3SJeff Roberson 		if (__predict_true(bucket->ucb_cnt < bucket->ucb_entries)) {
4603376b1ba3SJeff Roberson 			cache_bucket_push(cache, bucket, item);
46045d1ae027SRobert Watson 			critical_exit();
46058355f576SJeff Roberson 			return;
4606fc03d22bSJeff Roberson 		}
46072cb67bd7SGleb Smirnoff 	} while (cache_free(zone, cache, udata, itemdomain));
46080a81b439SJeff Roberson 	critical_exit();
4609fc03d22bSJeff Roberson 
46108355f576SJeff Roberson 	/*
46110a81b439SJeff Roberson 	 * If nothing else caught this, we'll just do an internal free.
46128355f576SJeff Roberson 	 */
46130a81b439SJeff Roberson zfree_item:
46140a81b439SJeff Roberson 	zone_free_item(zone, item, udata, SKIP_DTOR);
46150a81b439SJeff Roberson }
4616fc03d22bSJeff Roberson 
4617dfe13344SJeff Roberson #ifdef NUMA
461891d947bfSJeff Roberson /*
461991d947bfSJeff Roberson  * sort crossdomain free buckets to domain correct buckets and cache
462091d947bfSJeff Roberson  * them.
462191d947bfSJeff Roberson  */
462291d947bfSJeff Roberson static void
zone_free_cross(uma_zone_t zone,uma_bucket_t bucket,void * udata)462391d947bfSJeff Roberson zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, void *udata)
462491d947bfSJeff Roberson {
4625991f23efSMark Johnston 	struct uma_bucketlist emptybuckets, fullbuckets;
462691d947bfSJeff Roberson 	uma_zone_domain_t zdom;
462791d947bfSJeff Roberson 	uma_bucket_t b;
4628543117beSJeff Roberson 	smr_seq_t seq;
462991d947bfSJeff Roberson 	void *item;
463091d947bfSJeff Roberson 	int domain;
463191d947bfSJeff Roberson 
463291d947bfSJeff Roberson 	CTR3(KTR_UMA,
463391d947bfSJeff Roberson 	    "uma_zfree: zone %s(%p) draining cross bucket %p",
463491d947bfSJeff Roberson 	    zone->uz_name, zone, bucket);
463591d947bfSJeff Roberson 
4636543117beSJeff Roberson 	/*
4637543117beSJeff Roberson 	 * It is possible for buckets to arrive here out of order so we fetch
4638543117beSJeff Roberson 	 * the current smr seq rather than accepting the bucket's.
4639543117beSJeff Roberson 	 */
4640543117beSJeff Roberson 	seq = SMR_SEQ_INVALID;
4641543117beSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
4642226dd6dbSJeff Roberson 		seq = smr_advance(zone->uz_smr);
4643226dd6dbSJeff Roberson 
4644226dd6dbSJeff Roberson 	/*
4645226dd6dbSJeff Roberson 	 * To avoid having ndomain * ndomain buckets for sorting we have a
4646226dd6dbSJeff Roberson 	 * lock on the current crossfree bucket.  A full matrix with
4647226dd6dbSJeff Roberson 	 * per-domain locking could be used if necessary.
4648226dd6dbSJeff Roberson 	 */
4649991f23efSMark Johnston 	STAILQ_INIT(&emptybuckets);
4650226dd6dbSJeff Roberson 	STAILQ_INIT(&fullbuckets);
4651226dd6dbSJeff Roberson 	ZONE_CROSS_LOCK(zone);
4652991f23efSMark Johnston 	for (; bucket->ub_cnt > 0; bucket->ub_cnt--) {
465391d947bfSJeff Roberson 		item = bucket->ub_bucket[bucket->ub_cnt - 1];
465481302f1dSMark Johnston 		domain = item_domain(item);
4655c6fd3e23SJeff Roberson 		zdom = ZDOM_GET(zone, domain);
465691d947bfSJeff Roberson 		if (zdom->uzd_cross == NULL) {
4657991f23efSMark Johnston 			if ((b = STAILQ_FIRST(&emptybuckets)) != NULL) {
4658991f23efSMark Johnston 				STAILQ_REMOVE_HEAD(&emptybuckets, ub_link);
4659991f23efSMark Johnston 				zdom->uzd_cross = b;
4660991f23efSMark Johnston 			} else {
4661991f23efSMark Johnston 				/*
4662991f23efSMark Johnston 				 * Avoid allocating a bucket with the cross lock
4663991f23efSMark Johnston 				 * held, since allocation can trigger a
4664991f23efSMark Johnston 				 * cross-domain free and bucket zones may
4665991f23efSMark Johnston 				 * allocate from each other.
4666991f23efSMark Johnston 				 */
4667991f23efSMark Johnston 				ZONE_CROSS_UNLOCK(zone);
4668991f23efSMark Johnston 				b = bucket_alloc(zone, udata, M_NOWAIT);
4669991f23efSMark Johnston 				if (b == NULL)
4670991f23efSMark Johnston 					goto out;
4671991f23efSMark Johnston 				ZONE_CROSS_LOCK(zone);
4672991f23efSMark Johnston 				if (zdom->uzd_cross != NULL) {
4673991f23efSMark Johnston 					STAILQ_INSERT_HEAD(&emptybuckets, b,
4674991f23efSMark Johnston 					    ub_link);
4675991f23efSMark Johnston 				} else {
4676991f23efSMark Johnston 					zdom->uzd_cross = b;
4677991f23efSMark Johnston 				}
4678991f23efSMark Johnston 			}
467991d947bfSJeff Roberson 		}
4680543117beSJeff Roberson 		b = zdom->uzd_cross;
4681543117beSJeff Roberson 		b->ub_bucket[b->ub_cnt++] = item;
4682543117beSJeff Roberson 		b->ub_seq = seq;
4683543117beSJeff Roberson 		if (b->ub_cnt == b->ub_entries) {
4684543117beSJeff Roberson 			STAILQ_INSERT_HEAD(&fullbuckets, b, ub_link);
4685991f23efSMark Johnston 			if ((b = STAILQ_FIRST(&emptybuckets)) != NULL)
4686991f23efSMark Johnston 				STAILQ_REMOVE_HEAD(&emptybuckets, ub_link);
4687991f23efSMark Johnston 			zdom->uzd_cross = b;
468891d947bfSJeff Roberson 		}
468991d947bfSJeff Roberson 	}
469091d947bfSJeff Roberson 	ZONE_CROSS_UNLOCK(zone);
4691991f23efSMark Johnston out:
4692c6fd3e23SJeff Roberson 	if (bucket->ub_cnt == 0)
4693d4665eaaSJeff Roberson 		bucket->ub_seq = SMR_SEQ_INVALID;
469491d947bfSJeff Roberson 	bucket_free(zone, bucket, udata);
4695c6fd3e23SJeff Roberson 
4696991f23efSMark Johnston 	while ((b = STAILQ_FIRST(&emptybuckets)) != NULL) {
4697991f23efSMark Johnston 		STAILQ_REMOVE_HEAD(&emptybuckets, ub_link);
4698991f23efSMark Johnston 		bucket_free(zone, b, udata);
4699991f23efSMark Johnston 	}
4700c6fd3e23SJeff Roberson 	while ((b = STAILQ_FIRST(&fullbuckets)) != NULL) {
4701c6fd3e23SJeff Roberson 		STAILQ_REMOVE_HEAD(&fullbuckets, ub_link);
470281302f1dSMark Johnston 		domain = item_domain(b->ub_bucket[0]);
4703c6fd3e23SJeff Roberson 		zone_put_bucket(zone, domain, b, udata, true);
4704c6fd3e23SJeff Roberson 	}
470591d947bfSJeff Roberson }
470691d947bfSJeff Roberson #endif
470791d947bfSJeff Roberson 
47080a81b439SJeff Roberson static void
zone_free_bucket(uma_zone_t zone,uma_bucket_t bucket,void * udata,int itemdomain,bool ws)47090a81b439SJeff Roberson zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
4710c6fd3e23SJeff Roberson     int itemdomain, bool ws)
47110a81b439SJeff Roberson {
47120a81b439SJeff Roberson 
4713dfe13344SJeff Roberson #ifdef NUMA
47140a81b439SJeff Roberson 	/*
47150a81b439SJeff Roberson 	 * Buckets coming from the wrong domain will be entirely for the
47160a81b439SJeff Roberson 	 * only other domain on two domain systems.  In this case we can
47170a81b439SJeff Roberson 	 * simply cache them.  Otherwise we need to sort them back to
471891d947bfSJeff Roberson 	 * correct domains.
47190a81b439SJeff Roberson 	 */
4720c6fd3e23SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 &&
4721c6fd3e23SJeff Roberson 	    vm_ndomains > 2 && PCPU_GET(domain) != itemdomain) {
472291d947bfSJeff Roberson 		zone_free_cross(zone, bucket, udata);
47230a81b439SJeff Roberson 		return;
47240a81b439SJeff Roberson 	}
47250a81b439SJeff Roberson #endif
472691d947bfSJeff Roberson 
47270a81b439SJeff Roberson 	/*
47280a81b439SJeff Roberson 	 * Attempt to save the bucket in the zone's domain bucket cache.
47290a81b439SJeff Roberson 	 */
47300a81b439SJeff Roberson 	CTR3(KTR_UMA,
47310a81b439SJeff Roberson 	    "uma_zfree: zone %s(%p) putting bucket %p on free list",
47320a81b439SJeff Roberson 	    zone->uz_name, zone, bucket);
47330a81b439SJeff Roberson 	/* ub_cnt is pointing to the last free item */
4734c6fd3e23SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0)
4735c6fd3e23SJeff Roberson 		itemdomain = zone_domain_lowest(zone, itemdomain);
4736c6fd3e23SJeff Roberson 	zone_put_bucket(zone, itemdomain, bucket, udata, ws);
47378355f576SJeff Roberson }
4738fc03d22bSJeff Roberson 
47394d104ba0SAlexander Motin /*
47400a81b439SJeff Roberson  * Populate a free or cross bucket for the current cpu cache.  Free any
47410a81b439SJeff Roberson  * existing full bucket either to the zone cache or back to the slab layer.
47420a81b439SJeff Roberson  *
47430a81b439SJeff Roberson  * Enters and returns in a critical section.  false return indicates that
47440a81b439SJeff Roberson  * we can not satisfy this free in the cache layer.  true indicates that
47450a81b439SJeff Roberson  * the caller should retry.
47464d104ba0SAlexander Motin  */
47470a81b439SJeff Roberson static __noinline bool
cache_free(uma_zone_t zone,uma_cache_t cache,void * udata,int itemdomain)47482cb67bd7SGleb Smirnoff cache_free(uma_zone_t zone, uma_cache_t cache, void *udata, int itemdomain)
47490a81b439SJeff Roberson {
4750dfe13344SJeff Roberson 	uma_cache_bucket_t cbucket;
4751d4665eaaSJeff Roberson 	uma_bucket_t newbucket, bucket;
47520a81b439SJeff Roberson 
47530a81b439SJeff Roberson 	CRITICAL_ASSERT(curthread);
47540a81b439SJeff Roberson 
4755d4665eaaSJeff Roberson 	if (zone->uz_bucket_size == 0)
47560a81b439SJeff Roberson 		return false;
47570a81b439SJeff Roberson 
4758cc7ce83aSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
4759d4665eaaSJeff Roberson 	newbucket = NULL;
47600a81b439SJeff Roberson 
47610a81b439SJeff Roberson 	/*
4762dfe13344SJeff Roberson 	 * FIRSTTOUCH domains need to free to the correct zdom.  When
4763dfe13344SJeff Roberson 	 * enabled this is the zdom of the item.   The bucket is the
4764dfe13344SJeff Roberson 	 * cross bucket if the current domain and itemdomain do not match.
47650a81b439SJeff Roberson 	 */
4766dfe13344SJeff Roberson 	cbucket = &cache->uc_freebucket;
4767dfe13344SJeff Roberson #ifdef NUMA
4768c6fd3e23SJeff Roberson 	if ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) != 0) {
4769c6fd3e23SJeff Roberson 		if (PCPU_GET(domain) != itemdomain) {
4770dfe13344SJeff Roberson 			cbucket = &cache->uc_crossbucket;
4771dfe13344SJeff Roberson 			if (cbucket->ucb_cnt != 0)
4772c6fd3e23SJeff Roberson 				counter_u64_add(zone->uz_xdomain,
4773dfe13344SJeff Roberson 				    cbucket->ucb_cnt);
4774dfe13344SJeff Roberson 		}
4775c6fd3e23SJeff Roberson 	}
47760a81b439SJeff Roberson #endif
4777dfe13344SJeff Roberson 	bucket = cache_bucket_unload(cbucket);
4778c6fd3e23SJeff Roberson 	KASSERT(bucket == NULL || bucket->ub_cnt == bucket->ub_entries,
4779c6fd3e23SJeff Roberson 	    ("cache_free: Entered with non-full free bucket."));
47800a81b439SJeff Roberson 
47810a81b439SJeff Roberson 	/* We are no longer associated with this CPU. */
47820a81b439SJeff Roberson 	critical_exit();
47830a81b439SJeff Roberson 
4784d4665eaaSJeff Roberson 	/*
4785d4665eaaSJeff Roberson 	 * Don't let SMR zones operate without a free bucket.  Force
4786d4665eaaSJeff Roberson 	 * a synchronize and re-use this one.  We will only degrade
4787d4665eaaSJeff Roberson 	 * to a synchronize every bucket_size items rather than every
4788d4665eaaSJeff Roberson 	 * item if we fail to allocate a bucket.
4789d4665eaaSJeff Roberson 	 */
4790d4665eaaSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0) {
4791d4665eaaSJeff Roberson 		if (bucket != NULL)
4792d4665eaaSJeff Roberson 			bucket->ub_seq = smr_advance(zone->uz_smr);
4793d4665eaaSJeff Roberson 		newbucket = bucket_alloc(zone, udata, M_NOWAIT);
4794d4665eaaSJeff Roberson 		if (newbucket == NULL && bucket != NULL) {
4795d4665eaaSJeff Roberson 			bucket_drain(zone, bucket);
4796d4665eaaSJeff Roberson 			newbucket = bucket;
4797d4665eaaSJeff Roberson 			bucket = NULL;
4798d4665eaaSJeff Roberson 		}
4799d4665eaaSJeff Roberson 	} else if (!bucketdisable)
4800d4665eaaSJeff Roberson 		newbucket = bucket_alloc(zone, udata, M_NOWAIT);
4801d4665eaaSJeff Roberson 
48020a81b439SJeff Roberson 	if (bucket != NULL)
4803c6fd3e23SJeff Roberson 		zone_free_bucket(zone, bucket, udata, itemdomain, true);
4804a553d4b8SJeff Roberson 
4805fc03d22bSJeff Roberson 	critical_enter();
4806d4665eaaSJeff Roberson 	if ((bucket = newbucket) == NULL)
48070a81b439SJeff Roberson 		return (false);
4808cc7ce83aSJeff Roberson 	cache = &zone->uz_cpu[curcpu];
4809dfe13344SJeff Roberson #ifdef NUMA
4810fc03d22bSJeff Roberson 	/*
48110a81b439SJeff Roberson 	 * Check to see if we should be populating the cross bucket.  If it
48120a81b439SJeff Roberson 	 * is already populated we will fall through and attempt to populate
48130a81b439SJeff Roberson 	 * the free bucket.
4814fc03d22bSJeff Roberson 	 */
4815c6fd3e23SJeff Roberson 	if ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) != 0) {
4816c6fd3e23SJeff Roberson 		if (PCPU_GET(domain) != itemdomain &&
4817376b1ba3SJeff Roberson 		    cache->uc_crossbucket.ucb_bucket == NULL) {
4818376b1ba3SJeff Roberson 			cache_bucket_load_cross(cache, bucket);
48190a81b439SJeff Roberson 			return (true);
48200a81b439SJeff Roberson 		}
48210a81b439SJeff Roberson 	}
48220a81b439SJeff Roberson #endif
48230a81b439SJeff Roberson 	/*
48240a81b439SJeff Roberson 	 * We may have lost the race to fill the bucket or switched CPUs.
48250a81b439SJeff Roberson 	 */
4826376b1ba3SJeff Roberson 	if (cache->uc_freebucket.ucb_bucket != NULL) {
4827fc03d22bSJeff Roberson 		critical_exit();
48286fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
48290a81b439SJeff Roberson 		critical_enter();
48300a81b439SJeff Roberson 	} else
4831376b1ba3SJeff Roberson 		cache_bucket_load_free(cache, bucket);
48328355f576SJeff Roberson 
48330a81b439SJeff Roberson 	return (true);
48348355f576SJeff Roberson }
48358355f576SJeff Roberson 
48368355f576SJeff Roberson static void
slab_free_item(uma_zone_t zone,uma_slab_t slab,void * item)4837bb15d1c7SGleb Smirnoff slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item)
48388355f576SJeff Roberson {
4839bb15d1c7SGleb Smirnoff 	uma_keg_t keg;
4840ab3185d1SJeff Roberson 	uma_domain_t dom;
48419b8db4d0SRyan Libby 	int freei;
4842099a0e58SBosko Milekic 
4843bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
48448b987a77SJeff Roberson 	KEG_LOCK_ASSERT(keg, slab->us_domain);
4845ab3185d1SJeff Roberson 
48468355f576SJeff Roberson 	/* Do we need to remove from any lists? */
48478b987a77SJeff Roberson 	dom = &keg->uk_domain[slab->us_domain];
4848099a0e58SBosko Milekic 	if (slab->us_freecount + 1 == keg->uk_ipers) {
48498355f576SJeff Roberson 		LIST_REMOVE(slab, us_link);
4850ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
48514ab3aee8SMark Johnston 		dom->ud_free_slabs++;
48528355f576SJeff Roberson 	} else if (slab->us_freecount == 0) {
48538355f576SJeff Roberson 		LIST_REMOVE(slab, us_link);
4854ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
48558355f576SJeff Roberson 	}
48568355f576SJeff Roberson 
4857ef72505eSJeff Roberson 	/* Slab management. */
48581e0701e1SJeff Roberson 	freei = slab_item_index(slab, keg, item);
48599b78b1f4SJeff Roberson 	BIT_SET(keg->uk_ipers, freei, &slab->us_free);
48608355f576SJeff Roberson 	slab->us_freecount++;
48618355f576SJeff Roberson 
4862ef72505eSJeff Roberson 	/* Keg statistics. */
48634ab3aee8SMark Johnston 	dom->ud_free_items++;
48640095a784SJeff Roberson }
48650095a784SJeff Roberson 
48660095a784SJeff Roberson static void
zone_release(void * arg,void ** bucket,int cnt)4867b75c4efcSAndrew Turner zone_release(void *arg, void **bucket, int cnt)
48680095a784SJeff Roberson {
48698b987a77SJeff Roberson 	struct mtx *lock;
4870b75c4efcSAndrew Turner 	uma_zone_t zone;
48710095a784SJeff Roberson 	uma_slab_t slab;
48720095a784SJeff Roberson 	uma_keg_t keg;
48730095a784SJeff Roberson 	uint8_t *mem;
48748b987a77SJeff Roberson 	void *item;
48750095a784SJeff Roberson 	int i;
48768355f576SJeff Roberson 
4877b75c4efcSAndrew Turner 	zone = arg;
4878bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
48798b987a77SJeff Roberson 	lock = NULL;
488054c5ae80SRyan Libby 	if (__predict_false((zone->uz_flags & UMA_ZFLAG_HASH) != 0))
48818b987a77SJeff Roberson 		lock = KEG_LOCK(keg, 0);
48820095a784SJeff Roberson 	for (i = 0; i < cnt; i++) {
48830095a784SJeff Roberson 		item = bucket[i];
488454c5ae80SRyan Libby 		if (__predict_true((zone->uz_flags & UMA_ZFLAG_VTOSLAB) != 0)) {
48850095a784SJeff Roberson 			slab = vtoslab((vm_offset_t)item);
48868b987a77SJeff Roberson 		} else {
48878b987a77SJeff Roberson 			mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
488854c5ae80SRyan Libby 			if ((zone->uz_flags & UMA_ZFLAG_HASH) != 0)
48898b987a77SJeff Roberson 				slab = hash_sfind(&keg->uk_hash, mem);
48908b987a77SJeff Roberson 			else
48918b987a77SJeff Roberson 				slab = (uma_slab_t)(mem + keg->uk_pgoff);
48928b987a77SJeff Roberson 		}
48938b987a77SJeff Roberson 		if (lock != KEG_LOCKPTR(keg, slab->us_domain)) {
48948b987a77SJeff Roberson 			if (lock != NULL)
48958b987a77SJeff Roberson 				mtx_unlock(lock);
48968b987a77SJeff Roberson 			lock = KEG_LOCK(keg, slab->us_domain);
48978b987a77SJeff Roberson 		}
4898bb15d1c7SGleb Smirnoff 		slab_free_item(zone, slab, item);
48990095a784SJeff Roberson 	}
49008b987a77SJeff Roberson 	if (lock != NULL)
49018b987a77SJeff Roberson 		mtx_unlock(lock);
49028355f576SJeff Roberson }
49038355f576SJeff Roberson 
49040095a784SJeff Roberson /*
49050095a784SJeff Roberson  * Frees a single item to any zone.
49060095a784SJeff Roberson  *
49070095a784SJeff Roberson  * Arguments:
49080095a784SJeff Roberson  *	zone   The zone to free to
49090095a784SJeff Roberson  *	item   The item we're freeing
49100095a784SJeff Roberson  *	udata  User supplied data for the dtor
49110095a784SJeff Roberson  *	skip   Skip dtors and finis
49120095a784SJeff Roberson  */
49136d88d784SJeff Roberson static __noinline void
zone_free_item(uma_zone_t zone,void * item,void * udata,enum zfreeskip skip)49140095a784SJeff Roberson zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
49150095a784SJeff Roberson {
4916c5deaf04SGleb Smirnoff 
4917d4665eaaSJeff Roberson 	/*
4918d4665eaaSJeff Roberson 	 * If a free is sent directly to an SMR zone we have to
4919d4665eaaSJeff Roberson 	 * synchronize immediately because the item can instantly
4920d4665eaaSJeff Roberson 	 * be reallocated. This should only happen in degenerate
4921d4665eaaSJeff Roberson 	 * cases when no memory is available for per-cpu caches.
4922d4665eaaSJeff Roberson 	 */
4923d4665eaaSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0 && skip == SKIP_NONE)
4924d4665eaaSJeff Roberson 		smr_synchronize(zone->uz_smr);
4925d4665eaaSJeff Roberson 
4926cc7ce83aSJeff Roberson 	item_dtor(zone, item, zone->uz_size, udata, skip);
49270095a784SJeff Roberson 
492809c8cb71SMark Johnston 	if (skip < SKIP_FINI && zone->uz_fini) {
492909c8cb71SMark Johnston 		kasan_mark_item_valid(zone, item);
49300095a784SJeff Roberson 		zone->uz_fini(item, zone->uz_size);
493109c8cb71SMark Johnston 		kasan_mark_item_invalid(zone, item);
493209c8cb71SMark Johnston 	}
49330095a784SJeff Roberson 
49340095a784SJeff Roberson 	zone->uz_release(zone->uz_arg, &item, 1);
4935bb15d1c7SGleb Smirnoff 
4936bb15d1c7SGleb Smirnoff 	if (skip & SKIP_CNT)
4937bb15d1c7SGleb Smirnoff 		return;
4938bb15d1c7SGleb Smirnoff 
49392efcc8cbSGleb Smirnoff 	counter_u64_add(zone->uz_frees, 1);
49402efcc8cbSGleb Smirnoff 
49414bd61e19SJeff Roberson 	if (zone->uz_max_items > 0)
49424bd61e19SJeff Roberson 		zone_free_limit(zone, 1);
4943bb45b411SGleb Smirnoff }
49440095a784SJeff Roberson 
49458355f576SJeff Roberson /* See uma.h */
49461c6cae97SLawrence Stewart int
uma_zone_set_max(uma_zone_t zone,int nitems)4947736ee590SJeff Roberson uma_zone_set_max(uma_zone_t zone, int nitems)
4948736ee590SJeff Roberson {
4949e574d407SMark Johnston 
4950e574d407SMark Johnston 	/*
4951e574d407SMark Johnston 	 * If the limit is small, we may need to constrain the maximum per-CPU
4952e574d407SMark Johnston 	 * cache size, or disable caching entirely.
4953e574d407SMark Johnston 	 */
4954e574d407SMark Johnston 	uma_zone_set_maxcache(zone, nitems);
4955bb15d1c7SGleb Smirnoff 
49564bd61e19SJeff Roberson 	/*
49574bd61e19SJeff Roberson 	 * XXX This can misbehave if the zone has any allocations with
49584bd61e19SJeff Roberson 	 * no limit and a limit is imposed.  There is currently no
49594bd61e19SJeff Roberson 	 * way to clear a limit.
49604bd61e19SJeff Roberson 	 */
4961bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
4962d53927b0SMark Johnston 	if (zone->uz_max_items == 0)
4963d53927b0SMark Johnston 		ZONE_ASSERT_COLD(zone);
4964bb15d1c7SGleb Smirnoff 	zone->uz_max_items = nitems;
4965cc7ce83aSJeff Roberson 	zone->uz_flags |= UMA_ZFLAG_LIMIT;
4966cc7ce83aSJeff Roberson 	zone_update_caches(zone);
49674bd61e19SJeff Roberson 	/* We may need to wake waiters. */
49684bd61e19SJeff Roberson 	wakeup(&zone->uz_max_items);
4969bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
4970bb15d1c7SGleb Smirnoff 
4971bb15d1c7SGleb Smirnoff 	return (nitems);
4972bb15d1c7SGleb Smirnoff }
4973bb15d1c7SGleb Smirnoff 
4974bb15d1c7SGleb Smirnoff /* See uma.h */
4975003cf08bSMark Johnston void
uma_zone_set_maxcache(uma_zone_t zone,int nitems)4976bb15d1c7SGleb Smirnoff uma_zone_set_maxcache(uma_zone_t zone, int nitems)
4977bb15d1c7SGleb Smirnoff {
4978e574d407SMark Johnston 	int bpcpu, bpdom, bsize, nb;
4979bb15d1c7SGleb Smirnoff 
4980bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
4981e574d407SMark Johnston 
4982e574d407SMark Johnston 	/*
4983e574d407SMark Johnston 	 * Compute a lower bound on the number of items that may be cached in
4984e574d407SMark Johnston 	 * the zone.  Each CPU gets at least two buckets, and for cross-domain
4985e574d407SMark Johnston 	 * frees we use an additional bucket per CPU and per domain.  Select the
4986e574d407SMark Johnston 	 * largest bucket size that does not exceed half of the requested limit,
4987e574d407SMark Johnston 	 * with the left over space given to the full bucket cache.
4988e574d407SMark Johnston 	 */
4989e574d407SMark Johnston 	bpdom = 0;
4990003cf08bSMark Johnston 	bpcpu = 2;
4991e574d407SMark Johnston #ifdef NUMA
4992e574d407SMark Johnston 	if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 && vm_ndomains > 1) {
4993003cf08bSMark Johnston 		bpcpu++;
4994e574d407SMark Johnston 		bpdom++;
4995003cf08bSMark Johnston 	}
4996e574d407SMark Johnston #endif
4997e574d407SMark Johnston 	nb = bpcpu * mp_ncpus + bpdom * vm_ndomains;
4998e574d407SMark Johnston 	bsize = nitems / nb / 2;
4999e574d407SMark Johnston 	if (bsize > BUCKET_MAX)
5000e574d407SMark Johnston 		bsize = BUCKET_MAX;
5001e574d407SMark Johnston 	else if (bsize == 0 && nitems / nb > 0)
5002e574d407SMark Johnston 		bsize = 1;
5003e574d407SMark Johnston 	zone->uz_bucket_size_max = zone->uz_bucket_size = bsize;
500420a4e154SJeff Roberson 	if (zone->uz_bucket_size_min > zone->uz_bucket_size_max)
500520a4e154SJeff Roberson 		zone->uz_bucket_size_min = zone->uz_bucket_size_max;
5006e574d407SMark Johnston 	zone->uz_bucket_max = nitems - nb * bsize;
5007bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
5008736ee590SJeff Roberson }
5009736ee590SJeff Roberson 
5010736ee590SJeff Roberson /* See uma.h */
5011e49471b0SAndre Oppermann int
uma_zone_get_max(uma_zone_t zone)5012e49471b0SAndre Oppermann uma_zone_get_max(uma_zone_t zone)
5013e49471b0SAndre Oppermann {
5014e49471b0SAndre Oppermann 	int nitems;
5015e49471b0SAndre Oppermann 
5016727c6918SJeff Roberson 	nitems = atomic_load_64(&zone->uz_max_items);
5017e49471b0SAndre Oppermann 
5018e49471b0SAndre Oppermann 	return (nitems);
5019e49471b0SAndre Oppermann }
5020e49471b0SAndre Oppermann 
5021e49471b0SAndre Oppermann /* See uma.h */
50222f891cd5SPawel Jakub Dawidek void
uma_zone_set_warning(uma_zone_t zone,const char * warning)50232f891cd5SPawel Jakub Dawidek uma_zone_set_warning(uma_zone_t zone, const char *warning)
50242f891cd5SPawel Jakub Dawidek {
50252f891cd5SPawel Jakub Dawidek 
5026727c6918SJeff Roberson 	ZONE_ASSERT_COLD(zone);
50272f891cd5SPawel Jakub Dawidek 	zone->uz_warning = warning;
50282f891cd5SPawel Jakub Dawidek }
50292f891cd5SPawel Jakub Dawidek 
50302f891cd5SPawel Jakub Dawidek /* See uma.h */
503154503a13SJonathan T. Looney void
uma_zone_set_maxaction(uma_zone_t zone,uma_maxaction_t maxaction)503254503a13SJonathan T. Looney uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
503354503a13SJonathan T. Looney {
503454503a13SJonathan T. Looney 
5035727c6918SJeff Roberson 	ZONE_ASSERT_COLD(zone);
5036e60b2fcbSGleb Smirnoff 	TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
503754503a13SJonathan T. Looney }
503854503a13SJonathan T. Looney 
503954503a13SJonathan T. Looney /* See uma.h */
5040c4ae7908SLawrence Stewart int
uma_zone_get_cur(uma_zone_t zone)5041c4ae7908SLawrence Stewart uma_zone_get_cur(uma_zone_t zone)
5042c4ae7908SLawrence Stewart {
5043c4ae7908SLawrence Stewart 	int64_t nitems;
5044c4ae7908SLawrence Stewart 	u_int i;
5045c4ae7908SLawrence Stewart 
5046bfb6b7a1SJeff Roberson 	nitems = 0;
5047bfb6b7a1SJeff Roberson 	if (zone->uz_allocs != EARLY_COUNTER && zone->uz_frees != EARLY_COUNTER)
50482efcc8cbSGleb Smirnoff 		nitems = counter_u64_fetch(zone->uz_allocs) -
50492efcc8cbSGleb Smirnoff 		    counter_u64_fetch(zone->uz_frees);
5050727c6918SJeff Roberson 	CPU_FOREACH(i)
5051727c6918SJeff Roberson 		nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs) -
5052727c6918SJeff Roberson 		    atomic_load_64(&zone->uz_cpu[i].uc_frees);
5053c4ae7908SLawrence Stewart 
5054c4ae7908SLawrence Stewart 	return (nitems < 0 ? 0 : nitems);
5055c4ae7908SLawrence Stewart }
5056c4ae7908SLawrence Stewart 
505720a4e154SJeff Roberson static uint64_t
uma_zone_get_allocs(uma_zone_t zone)505820a4e154SJeff Roberson uma_zone_get_allocs(uma_zone_t zone)
505920a4e154SJeff Roberson {
506020a4e154SJeff Roberson 	uint64_t nitems;
506120a4e154SJeff Roberson 	u_int i;
506220a4e154SJeff Roberson 
5063bfb6b7a1SJeff Roberson 	nitems = 0;
5064bfb6b7a1SJeff Roberson 	if (zone->uz_allocs != EARLY_COUNTER)
506520a4e154SJeff Roberson 		nitems = counter_u64_fetch(zone->uz_allocs);
5066727c6918SJeff Roberson 	CPU_FOREACH(i)
5067727c6918SJeff Roberson 		nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs);
506820a4e154SJeff Roberson 
506920a4e154SJeff Roberson 	return (nitems);
507020a4e154SJeff Roberson }
507120a4e154SJeff Roberson 
507220a4e154SJeff Roberson static uint64_t
uma_zone_get_frees(uma_zone_t zone)507320a4e154SJeff Roberson uma_zone_get_frees(uma_zone_t zone)
507420a4e154SJeff Roberson {
507520a4e154SJeff Roberson 	uint64_t nitems;
507620a4e154SJeff Roberson 	u_int i;
507720a4e154SJeff Roberson 
5078bfb6b7a1SJeff Roberson 	nitems = 0;
5079bfb6b7a1SJeff Roberson 	if (zone->uz_frees != EARLY_COUNTER)
508020a4e154SJeff Roberson 		nitems = counter_u64_fetch(zone->uz_frees);
5081727c6918SJeff Roberson 	CPU_FOREACH(i)
5082727c6918SJeff Roberson 		nitems += atomic_load_64(&zone->uz_cpu[i].uc_frees);
508320a4e154SJeff Roberson 
508420a4e154SJeff Roberson 	return (nitems);
508520a4e154SJeff Roberson }
508620a4e154SJeff Roberson 
508731c251a0SJeff Roberson #ifdef INVARIANTS
508831c251a0SJeff Roberson /* Used only for KEG_ASSERT_COLD(). */
508931c251a0SJeff Roberson static uint64_t
uma_keg_get_allocs(uma_keg_t keg)509031c251a0SJeff Roberson uma_keg_get_allocs(uma_keg_t keg)
509131c251a0SJeff Roberson {
509231c251a0SJeff Roberson 	uma_zone_t z;
509331c251a0SJeff Roberson 	uint64_t nitems;
509431c251a0SJeff Roberson 
509531c251a0SJeff Roberson 	nitems = 0;
509631c251a0SJeff Roberson 	LIST_FOREACH(z, &keg->uk_zones, uz_link)
509731c251a0SJeff Roberson 		nitems += uma_zone_get_allocs(z);
509831c251a0SJeff Roberson 
509931c251a0SJeff Roberson 	return (nitems);
510031c251a0SJeff Roberson }
510131c251a0SJeff Roberson #endif
510231c251a0SJeff Roberson 
5103c4ae7908SLawrence Stewart /* See uma.h */
5104736ee590SJeff Roberson void
uma_zone_set_init(uma_zone_t zone,uma_init uminit)5105099a0e58SBosko Milekic uma_zone_set_init(uma_zone_t zone, uma_init uminit)
5106099a0e58SBosko Milekic {
5107e20a199fSJeff Roberson 	uma_keg_t keg;
5108e20a199fSJeff Roberson 
5109bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
5110727c6918SJeff Roberson 	KEG_ASSERT_COLD(keg);
5111e20a199fSJeff Roberson 	keg->uk_init = uminit;
5112099a0e58SBosko Milekic }
5113099a0e58SBosko Milekic 
5114099a0e58SBosko Milekic /* See uma.h */
5115099a0e58SBosko Milekic void
uma_zone_set_fini(uma_zone_t zone,uma_fini fini)5116099a0e58SBosko Milekic uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
5117099a0e58SBosko Milekic {
5118e20a199fSJeff Roberson 	uma_keg_t keg;
5119e20a199fSJeff Roberson 
5120bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
5121727c6918SJeff Roberson 	KEG_ASSERT_COLD(keg);
5122e20a199fSJeff Roberson 	keg->uk_fini = fini;
5123099a0e58SBosko Milekic }
5124099a0e58SBosko Milekic 
5125099a0e58SBosko Milekic /* See uma.h */
5126099a0e58SBosko Milekic void
uma_zone_set_zinit(uma_zone_t zone,uma_init zinit)5127099a0e58SBosko Milekic uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
5128099a0e58SBosko Milekic {
5129af526374SJeff Roberson 
5130727c6918SJeff Roberson 	ZONE_ASSERT_COLD(zone);
5131099a0e58SBosko Milekic 	zone->uz_init = zinit;
5132099a0e58SBosko Milekic }
5133099a0e58SBosko Milekic 
5134099a0e58SBosko Milekic /* See uma.h */
5135099a0e58SBosko Milekic void
uma_zone_set_zfini(uma_zone_t zone,uma_fini zfini)5136099a0e58SBosko Milekic uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
5137099a0e58SBosko Milekic {
5138af526374SJeff Roberson 
5139727c6918SJeff Roberson 	ZONE_ASSERT_COLD(zone);
5140099a0e58SBosko Milekic 	zone->uz_fini = zfini;
5141099a0e58SBosko Milekic }
5142099a0e58SBosko Milekic 
5143099a0e58SBosko Milekic /* See uma.h */
5144099a0e58SBosko Milekic void
uma_zone_set_freef(uma_zone_t zone,uma_free freef)51458355f576SJeff Roberson uma_zone_set_freef(uma_zone_t zone, uma_free freef)
51468355f576SJeff Roberson {
51470095a784SJeff Roberson 	uma_keg_t keg;
5148e20a199fSJeff Roberson 
5149bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
5150727c6918SJeff Roberson 	KEG_ASSERT_COLD(keg);
51510095a784SJeff Roberson 	keg->uk_freef = freef;
51528355f576SJeff Roberson }
51538355f576SJeff Roberson 
51548355f576SJeff Roberson /* See uma.h */
51558355f576SJeff Roberson void
uma_zone_set_allocf(uma_zone_t zone,uma_alloc allocf)51568355f576SJeff Roberson uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
51578355f576SJeff Roberson {
5158e20a199fSJeff Roberson 	uma_keg_t keg;
5159e20a199fSJeff Roberson 
5160bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
5161727c6918SJeff Roberson 	KEG_ASSERT_COLD(keg);
5162e20a199fSJeff Roberson 	keg->uk_allocf = allocf;
51638355f576SJeff Roberson }
51648355f576SJeff Roberson 
51658355f576SJeff Roberson /* See uma.h */
51666fd34d6fSJeff Roberson void
uma_zone_set_smr(uma_zone_t zone,smr_t smr)5167d4665eaaSJeff Roberson uma_zone_set_smr(uma_zone_t zone, smr_t smr)
5168d4665eaaSJeff Roberson {
5169d4665eaaSJeff Roberson 
5170d4665eaaSJeff Roberson 	ZONE_ASSERT_COLD(zone);
5171d4665eaaSJeff Roberson 
51727f746c9fSMateusz Guzik 	KASSERT(smr != NULL, ("Got NULL smr"));
51737f746c9fSMateusz Guzik 	KASSERT((zone->uz_flags & UMA_ZONE_SMR) == 0,
51747f746c9fSMateusz Guzik 	    ("zone %p (%s) already uses SMR", zone, zone->uz_name));
5175d4665eaaSJeff Roberson 	zone->uz_flags |= UMA_ZONE_SMR;
5176d4665eaaSJeff Roberson 	zone->uz_smr = smr;
5177d4665eaaSJeff Roberson 	zone_update_caches(zone);
5178d4665eaaSJeff Roberson }
5179d4665eaaSJeff Roberson 
5180d4665eaaSJeff Roberson smr_t
uma_zone_get_smr(uma_zone_t zone)5181d4665eaaSJeff Roberson uma_zone_get_smr(uma_zone_t zone)
5182d4665eaaSJeff Roberson {
5183d4665eaaSJeff Roberson 
5184d4665eaaSJeff Roberson 	return (zone->uz_smr);
5185d4665eaaSJeff Roberson }
5186d4665eaaSJeff Roberson 
5187d4665eaaSJeff Roberson /* See uma.h */
5188d4665eaaSJeff Roberson void
uma_zone_reserve(uma_zone_t zone,int items)51896fd34d6fSJeff Roberson uma_zone_reserve(uma_zone_t zone, int items)
51906fd34d6fSJeff Roberson {
51916fd34d6fSJeff Roberson 	uma_keg_t keg;
51926fd34d6fSJeff Roberson 
5193bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
5194727c6918SJeff Roberson 	KEG_ASSERT_COLD(keg);
51956fd34d6fSJeff Roberson 	keg->uk_reserve = items;
51966fd34d6fSJeff Roberson }
51976fd34d6fSJeff Roberson 
51986fd34d6fSJeff Roberson /* See uma.h */
51998355f576SJeff Roberson int
uma_zone_reserve_kva(uma_zone_t zone,int count)5200a4915c21SAttilio Rao uma_zone_reserve_kva(uma_zone_t zone, int count)
52018355f576SJeff Roberson {
5202099a0e58SBosko Milekic 	uma_keg_t keg;
52038355f576SJeff Roberson 	vm_offset_t kva;
52049ba30bcbSZbigniew Bodek 	u_int pages;
52058355f576SJeff Roberson 
5206bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
5207727c6918SJeff Roberson 	KEG_ASSERT_COLD(keg);
5208727c6918SJeff Roberson 	ZONE_ASSERT_COLD(zone);
52098355f576SJeff Roberson 
521079c9f942SJeff Roberson 	pages = howmany(count, keg->uk_ipers) * keg->uk_ppera;
5211a553d4b8SJeff Roberson 
5212da76d349SBojan Novković #ifdef UMA_USE_DMAP
5213a4915c21SAttilio Rao 	if (keg->uk_ppera > 1) {
5214a4915c21SAttilio Rao #else
5215a4915c21SAttilio Rao 	if (1) {
5216a4915c21SAttilio Rao #endif
521757223e99SAndriy Gapon 		kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
5218d1f42ac2SAlan Cox 		if (kva == 0)
52198355f576SJeff Roberson 			return (0);
5220a4915c21SAttilio Rao 	} else
5221a4915c21SAttilio Rao 		kva = 0;
5222bb15d1c7SGleb Smirnoff 
5223bb15d1c7SGleb Smirnoff 	MPASS(keg->uk_kva == 0);
5224099a0e58SBosko Milekic 	keg->uk_kva = kva;
5225a4915c21SAttilio Rao 	keg->uk_offset = 0;
5226bb15d1c7SGleb Smirnoff 	zone->uz_max_items = pages * keg->uk_ipers;
5227d25ed650SBojan Novković #ifdef UMA_USE_DMAP
5228a4915c21SAttilio Rao 	keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
5229a4915c21SAttilio Rao #else
5230a4915c21SAttilio Rao 	keg->uk_allocf = noobj_alloc;
5231a4915c21SAttilio Rao #endif
5232cc7ce83aSJeff Roberson 	keg->uk_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
5233cc7ce83aSJeff Roberson 	zone->uz_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
5234cc7ce83aSJeff Roberson 	zone_update_caches(zone);
5235af526374SJeff Roberson 
52368355f576SJeff Roberson 	return (1);
52378355f576SJeff Roberson }
52388355f576SJeff Roberson 
52398355f576SJeff Roberson /* See uma.h */
52408355f576SJeff Roberson void
52418355f576SJeff Roberson uma_prealloc(uma_zone_t zone, int items)
52428355f576SJeff Roberson {
5243920239efSMark Johnston 	struct vm_domainset_iter di;
5244ab3185d1SJeff Roberson 	uma_domain_t dom;
52458355f576SJeff Roberson 	uma_slab_t slab;
5246099a0e58SBosko Milekic 	uma_keg_t keg;
524786220393SMark Johnston 	int aflags, domain, slabs;
52488355f576SJeff Roberson 
5249bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
525079c9f942SJeff Roberson 	slabs = howmany(items, keg->uk_ipers);
5251194a979eSMark Johnston 	while (slabs-- > 0) {
525286220393SMark Johnston 		aflags = M_NOWAIT;
525386220393SMark Johnston 		vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
525486220393SMark Johnston 		    &aflags);
525586220393SMark Johnston 		for (;;) {
525686220393SMark Johnston 			slab = keg_alloc_slab(keg, zone, domain, M_WAITOK,
525786220393SMark Johnston 			    aflags);
525886220393SMark Johnston 			if (slab != NULL) {
5259ab3185d1SJeff Roberson 				dom = &keg->uk_domain[slab->us_domain];
52604ab3aee8SMark Johnston 				/*
52614ab3aee8SMark Johnston 				 * keg_alloc_slab() always returns a slab on the
52624ab3aee8SMark Johnston 				 * partial list.
52634ab3aee8SMark Johnston 				 */
52648b987a77SJeff Roberson 				LIST_REMOVE(slab, us_link);
526586220393SMark Johnston 				LIST_INSERT_HEAD(&dom->ud_free_slab, slab,
526686220393SMark Johnston 				    us_link);
52674ab3aee8SMark Johnston 				dom->ud_free_slabs++;
52688b987a77SJeff Roberson 				KEG_UNLOCK(keg, slab->us_domain);
5269920239efSMark Johnston 				break;
52708355f576SJeff Roberson 			}
52718b987a77SJeff Roberson 			if (vm_domainset_iter_policy(&di, &domain) != 0)
527289d2fb14SKonstantin Belousov 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0);
527386220393SMark Johnston 		}
527486220393SMark Johnston 	}
527586220393SMark Johnston }
52768355f576SJeff Roberson 
5277ed581bf6SJeff Roberson /*
5278ed581bf6SJeff Roberson  * Returns a snapshot of memory consumption in bytes.
5279ed581bf6SJeff Roberson  */
5280ed581bf6SJeff Roberson size_t
5281ed581bf6SJeff Roberson uma_zone_memory(uma_zone_t zone)
5282ed581bf6SJeff Roberson {
5283ed581bf6SJeff Roberson 	size_t sz;
5284ed581bf6SJeff Roberson 	int i;
5285ed581bf6SJeff Roberson 
5286ed581bf6SJeff Roberson 	sz = 0;
5287ed581bf6SJeff Roberson 	if (zone->uz_flags & UMA_ZFLAG_CACHE) {
5288ed581bf6SJeff Roberson 		for (i = 0; i < vm_ndomains; i++)
5289c6fd3e23SJeff Roberson 			sz += ZDOM_GET(zone, i)->uzd_nitems;
5290ed581bf6SJeff Roberson 		return (sz * zone->uz_size);
5291ed581bf6SJeff Roberson 	}
5292ed581bf6SJeff Roberson 	for (i = 0; i < vm_ndomains; i++)
5293ed581bf6SJeff Roberson 		sz += zone->uz_keg->uk_domain[i].ud_pages;
5294ed581bf6SJeff Roberson 
5295ed581bf6SJeff Roberson 	return (sz * PAGE_SIZE);
5296ed581bf6SJeff Roberson }
5297ed581bf6SJeff Roberson 
5298389a3fa6SMark Johnston struct uma_reclaim_args {
5299389a3fa6SMark Johnston 	int	domain;
5300389a3fa6SMark Johnston 	int	req;
5301389a3fa6SMark Johnston };
5302389a3fa6SMark Johnston 
5303389a3fa6SMark Johnston static void
5304389a3fa6SMark Johnston uma_reclaim_domain_cb(uma_zone_t zone, void *arg)
5305389a3fa6SMark Johnston {
5306389a3fa6SMark Johnston 	struct uma_reclaim_args *args;
5307389a3fa6SMark Johnston 
5308389a3fa6SMark Johnston 	args = arg;
5309cf907074SAndrew Gallatin 	if ((zone->uz_flags & UMA_ZONE_UNMANAGED) != 0)
5310cf907074SAndrew Gallatin 		return;
5311cf907074SAndrew Gallatin 	if ((args->req == UMA_RECLAIM_TRIM) &&
5312cf907074SAndrew Gallatin 	    (zone->uz_flags & UMA_ZONE_NOTRIM) !=0)
5313cf907074SAndrew Gallatin 		return;
5314cf907074SAndrew Gallatin 
5315389a3fa6SMark Johnston 	uma_zone_reclaim_domain(zone, args->req, args->domain);
5316389a3fa6SMark Johnston }
5317389a3fa6SMark Johnston 
53188355f576SJeff Roberson /* See uma.h */
531908cfa56eSMark Johnston void
532008cfa56eSMark Johnston uma_reclaim(int req)
53218355f576SJeff Roberson {
5322aabe13f1SMark Johnston 	uma_reclaim_domain(req, UMA_ANYDOMAIN);
5323aabe13f1SMark Johnston }
532444ec2b63SKonstantin Belousov 
5325aabe13f1SMark Johnston void
5326aabe13f1SMark Johnston uma_reclaim_domain(int req, int domain)
5327aabe13f1SMark Johnston {
5328389a3fa6SMark Johnston 	struct uma_reclaim_args args;
5329aabe13f1SMark Johnston 
533086bbae32SJeff Roberson 	bucket_enable();
533108cfa56eSMark Johnston 
5332389a3fa6SMark Johnston 	args.domain = domain;
5333389a3fa6SMark Johnston 	args.req = req;
5334389a3fa6SMark Johnston 
5335aabe13f1SMark Johnston 	sx_slock(&uma_reclaim_lock);
533608cfa56eSMark Johnston 	switch (req) {
533708cfa56eSMark Johnston 	case UMA_RECLAIM_TRIM:
533808cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN:
5339389a3fa6SMark Johnston 		zone_foreach(uma_reclaim_domain_cb, &args);
5340aabe13f1SMark Johnston 		break;
534108cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN_CPU:
5342*f506d5afSMark Johnston 		/*
5343*f506d5afSMark Johnston 		 * Reclaim globally visible free items from all zones, then drain
5344*f506d5afSMark Johnston 		 * per-CPU buckets, then reclaim items freed while draining.
5345*f506d5afSMark Johnston 		 * This approach minimizes expensive context switching needed to
5346*f506d5afSMark Johnston 		 * drain each zone's per-CPU buckets.
5347*f506d5afSMark Johnston 		 */
5348*f506d5afSMark Johnston 		args.req = UMA_RECLAIM_DRAIN;
5349389a3fa6SMark Johnston 		zone_foreach(uma_reclaim_domain_cb, &args);
535008cfa56eSMark Johnston 		pcpu_cache_drain_safe(NULL);
5351389a3fa6SMark Johnston 		zone_foreach(uma_reclaim_domain_cb, &args);
535208cfa56eSMark Johnston 		break;
535308cfa56eSMark Johnston 	default:
535408cfa56eSMark Johnston 		panic("unhandled reclamation request %d", req);
535508cfa56eSMark Johnston 	}
53560f9b7bf3SMark Johnston 
53578355f576SJeff Roberson 	/*
53588355f576SJeff Roberson 	 * Some slabs may have been freed but this zone will be visited early
53598355f576SJeff Roberson 	 * we visit again so that we can free pages that are empty once other
53608355f576SJeff Roberson 	 * zones are drained.  We have to do the same for buckets.
53618355f576SJeff Roberson 	 */
5362389a3fa6SMark Johnston 	uma_zone_reclaim_domain(slabzones[0], UMA_RECLAIM_DRAIN, domain);
5363389a3fa6SMark Johnston 	uma_zone_reclaim_domain(slabzones[1], UMA_RECLAIM_DRAIN, domain);
5364aabe13f1SMark Johnston 	bucket_zone_drain(domain);
5365aabe13f1SMark Johnston 	sx_sunlock(&uma_reclaim_lock);
53668355f576SJeff Roberson }
53678355f576SJeff Roberson 
53682e47807cSJeff Roberson static volatile int uma_reclaim_needed;
536944ec2b63SKonstantin Belousov 
537044ec2b63SKonstantin Belousov void
537144ec2b63SKonstantin Belousov uma_reclaim_wakeup(void)
537244ec2b63SKonstantin Belousov {
537344ec2b63SKonstantin Belousov 
53742e47807cSJeff Roberson 	if (atomic_fetchadd_int(&uma_reclaim_needed, 1) == 0)
53752e47807cSJeff Roberson 		wakeup(uma_reclaim);
537644ec2b63SKonstantin Belousov }
537744ec2b63SKonstantin Belousov 
537844ec2b63SKonstantin Belousov void
537944ec2b63SKonstantin Belousov uma_reclaim_worker(void *arg __unused)
538044ec2b63SKonstantin Belousov {
538144ec2b63SKonstantin Belousov 
538244ec2b63SKonstantin Belousov 	for (;;) {
538308cfa56eSMark Johnston 		sx_xlock(&uma_reclaim_lock);
5384200f8117SKonstantin Belousov 		while (atomic_load_int(&uma_reclaim_needed) == 0)
538508cfa56eSMark Johnston 			sx_sleep(uma_reclaim, &uma_reclaim_lock, PVM, "umarcl",
53862e47807cSJeff Roberson 			    hz);
538708cfa56eSMark Johnston 		sx_xunlock(&uma_reclaim_lock);
53889b43bc27SAndriy Gapon 		EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
538908cfa56eSMark Johnston 		uma_reclaim(UMA_RECLAIM_DRAIN_CPU);
5390200f8117SKonstantin Belousov 		atomic_store_int(&uma_reclaim_needed, 0);
53912e47807cSJeff Roberson 		/* Don't fire more than once per-second. */
53922e47807cSJeff Roberson 		pause("umarclslp", hz);
539344ec2b63SKonstantin Belousov 	}
539444ec2b63SKonstantin Belousov }
539544ec2b63SKonstantin Belousov 
5396663b416fSJohn Baldwin /* See uma.h */
539708cfa56eSMark Johnston void
539808cfa56eSMark Johnston uma_zone_reclaim(uma_zone_t zone, int req)
539908cfa56eSMark Johnston {
5400aabe13f1SMark Johnston 	uma_zone_reclaim_domain(zone, req, UMA_ANYDOMAIN);
5401aabe13f1SMark Johnston }
540208cfa56eSMark Johnston 
5403aabe13f1SMark Johnston void
5404aabe13f1SMark Johnston uma_zone_reclaim_domain(uma_zone_t zone, int req, int domain)
5405aabe13f1SMark Johnston {
540608cfa56eSMark Johnston 	switch (req) {
540708cfa56eSMark Johnston 	case UMA_RECLAIM_TRIM:
5408389a3fa6SMark Johnston 		zone_reclaim(zone, domain, M_NOWAIT, false);
540908cfa56eSMark Johnston 		break;
541008cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN:
5411389a3fa6SMark Johnston 		zone_reclaim(zone, domain, M_NOWAIT, true);
541208cfa56eSMark Johnston 		break;
541308cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN_CPU:
541408cfa56eSMark Johnston 		pcpu_cache_drain_safe(zone);
5415389a3fa6SMark Johnston 		zone_reclaim(zone, domain, M_NOWAIT, true);
541608cfa56eSMark Johnston 		break;
541708cfa56eSMark Johnston 	default:
541808cfa56eSMark Johnston 		panic("unhandled reclamation request %d", req);
541908cfa56eSMark Johnston 	}
542008cfa56eSMark Johnston }
542108cfa56eSMark Johnston 
542208cfa56eSMark Johnston /* See uma.h */
5423663b416fSJohn Baldwin int
5424663b416fSJohn Baldwin uma_zone_exhausted(uma_zone_t zone)
5425663b416fSJohn Baldwin {
5426663b416fSJohn Baldwin 
5427727c6918SJeff Roberson 	return (atomic_load_32(&zone->uz_sleepers) > 0);
54286c125b8dSMohan Srinivasan }
54296c125b8dSMohan Srinivasan 
54302e47807cSJeff Roberson unsigned long
54312e47807cSJeff Roberson uma_limit(void)
54322e47807cSJeff Roberson {
54332e47807cSJeff Roberson 
54342e47807cSJeff Roberson 	return (uma_kmem_limit);
54352e47807cSJeff Roberson }
54362e47807cSJeff Roberson 
54372e47807cSJeff Roberson void
54382e47807cSJeff Roberson uma_set_limit(unsigned long limit)
54392e47807cSJeff Roberson {
54402e47807cSJeff Roberson 
54412e47807cSJeff Roberson 	uma_kmem_limit = limit;
54422e47807cSJeff Roberson }
54432e47807cSJeff Roberson 
54442e47807cSJeff Roberson unsigned long
54452e47807cSJeff Roberson uma_size(void)
54462e47807cSJeff Roberson {
54472e47807cSJeff Roberson 
5448058f0f74SMark Johnston 	return (atomic_load_long(&uma_kmem_total));
5449ad5b0f5bSJeff Roberson }
5450ad5b0f5bSJeff Roberson 
5451ad5b0f5bSJeff Roberson long
5452ad5b0f5bSJeff Roberson uma_avail(void)
5453ad5b0f5bSJeff Roberson {
5454ad5b0f5bSJeff Roberson 
5455058f0f74SMark Johnston 	return (uma_kmem_limit - uma_size());
54562e47807cSJeff Roberson }
54572e47807cSJeff Roberson 
5458a0d4b0aeSRobert Watson #ifdef DDB
54598355f576SJeff Roberson /*
54607a52a97eSRobert Watson  * Generate statistics across both the zone and its per-cpu cache's.  Return
54617a52a97eSRobert Watson  * desired statistics if the pointer is non-NULL for that statistic.
54627a52a97eSRobert Watson  *
54637a52a97eSRobert Watson  * Note: does not update the zone statistics, as it can't safely clear the
54647a52a97eSRobert Watson  * per-CPU cache statistic.
54657a52a97eSRobert Watson  *
54667a52a97eSRobert Watson  */
54677a52a97eSRobert Watson static void
54680f9b7bf3SMark Johnston uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
5469c1685086SJeff Roberson     uint64_t *freesp, uint64_t *sleepsp, uint64_t *xdomainp)
54707a52a97eSRobert Watson {
54717a52a97eSRobert Watson 	uma_cache_t cache;
5472c1685086SJeff Roberson 	uint64_t allocs, frees, sleeps, xdomain;
54737a52a97eSRobert Watson 	int cachefree, cpu;
54747a52a97eSRobert Watson 
5475c1685086SJeff Roberson 	allocs = frees = sleeps = xdomain = 0;
54767a52a97eSRobert Watson 	cachefree = 0;
54773aa6d94eSJohn Baldwin 	CPU_FOREACH(cpu) {
54787a52a97eSRobert Watson 		cache = &z->uz_cpu[cpu];
5479376b1ba3SJeff Roberson 		cachefree += cache->uc_allocbucket.ucb_cnt;
5480376b1ba3SJeff Roberson 		cachefree += cache->uc_freebucket.ucb_cnt;
5481376b1ba3SJeff Roberson 		xdomain += cache->uc_crossbucket.ucb_cnt;
5482376b1ba3SJeff Roberson 		cachefree += cache->uc_crossbucket.ucb_cnt;
54837a52a97eSRobert Watson 		allocs += cache->uc_allocs;
54847a52a97eSRobert Watson 		frees += cache->uc_frees;
54857a52a97eSRobert Watson 	}
54862efcc8cbSGleb Smirnoff 	allocs += counter_u64_fetch(z->uz_allocs);
54872efcc8cbSGleb Smirnoff 	frees += counter_u64_fetch(z->uz_frees);
5488c6fd3e23SJeff Roberson 	xdomain += counter_u64_fetch(z->uz_xdomain);
5489bf965959SSean Bruno 	sleeps += z->uz_sleeps;
54907a52a97eSRobert Watson 	if (cachefreep != NULL)
54917a52a97eSRobert Watson 		*cachefreep = cachefree;
54927a52a97eSRobert Watson 	if (allocsp != NULL)
54937a52a97eSRobert Watson 		*allocsp = allocs;
54947a52a97eSRobert Watson 	if (freesp != NULL)
54957a52a97eSRobert Watson 		*freesp = frees;
5496bf965959SSean Bruno 	if (sleepsp != NULL)
5497bf965959SSean Bruno 		*sleepsp = sleeps;
5498c1685086SJeff Roberson 	if (xdomainp != NULL)
5499c1685086SJeff Roberson 		*xdomainp = xdomain;
55007a52a97eSRobert Watson }
5501a0d4b0aeSRobert Watson #endif /* DDB */
55027a52a97eSRobert Watson 
55037a52a97eSRobert Watson static int
55047a52a97eSRobert Watson sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
55057a52a97eSRobert Watson {
55067a52a97eSRobert Watson 	uma_keg_t kz;
55077a52a97eSRobert Watson 	uma_zone_t z;
55087a52a97eSRobert Watson 	int count;
55097a52a97eSRobert Watson 
55107a52a97eSRobert Watson 	count = 0;
5511111fbcd5SBryan Venteicher 	rw_rlock(&uma_rwlock);
55127a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
55137a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
55147a52a97eSRobert Watson 			count++;
55157a52a97eSRobert Watson 	}
5516b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link)
5517b47acb0aSGleb Smirnoff 		count++;
5518b47acb0aSGleb Smirnoff 
5519111fbcd5SBryan Venteicher 	rw_runlock(&uma_rwlock);
55207a52a97eSRobert Watson 	return (sysctl_handle_int(oidp, &count, 0, req));
55217a52a97eSRobert Watson }
55227a52a97eSRobert Watson 
5523b47acb0aSGleb Smirnoff static void
5524b47acb0aSGleb Smirnoff uma_vm_zone_stats(struct uma_type_header *uth, uma_zone_t z, struct sbuf *sbuf,
5525b47acb0aSGleb Smirnoff     struct uma_percpu_stat *ups, bool internal)
5526b47acb0aSGleb Smirnoff {
5527b47acb0aSGleb Smirnoff 	uma_zone_domain_t zdom;
5528b47acb0aSGleb Smirnoff 	uma_cache_t cache;
5529b47acb0aSGleb Smirnoff 	int i;
5530b47acb0aSGleb Smirnoff 
5531b47acb0aSGleb Smirnoff 	for (i = 0; i < vm_ndomains; i++) {
5532c6fd3e23SJeff Roberson 		zdom = ZDOM_GET(z, i);
5533b47acb0aSGleb Smirnoff 		uth->uth_zone_free += zdom->uzd_nitems;
5534b47acb0aSGleb Smirnoff 	}
5535b47acb0aSGleb Smirnoff 	uth->uth_allocs = counter_u64_fetch(z->uz_allocs);
5536b47acb0aSGleb Smirnoff 	uth->uth_frees = counter_u64_fetch(z->uz_frees);
5537b47acb0aSGleb Smirnoff 	uth->uth_fails = counter_u64_fetch(z->uz_fails);
5538c6fd3e23SJeff Roberson 	uth->uth_xdomain = counter_u64_fetch(z->uz_xdomain);
5539b47acb0aSGleb Smirnoff 	uth->uth_sleeps = z->uz_sleeps;
55401de9724eSMark Johnston 
5541b47acb0aSGleb Smirnoff 	for (i = 0; i < mp_maxid + 1; i++) {
5542b47acb0aSGleb Smirnoff 		bzero(&ups[i], sizeof(*ups));
5543b47acb0aSGleb Smirnoff 		if (internal || CPU_ABSENT(i))
5544b47acb0aSGleb Smirnoff 			continue;
5545b47acb0aSGleb Smirnoff 		cache = &z->uz_cpu[i];
5546376b1ba3SJeff Roberson 		ups[i].ups_cache_free += cache->uc_allocbucket.ucb_cnt;
5547376b1ba3SJeff Roberson 		ups[i].ups_cache_free += cache->uc_freebucket.ucb_cnt;
5548376b1ba3SJeff Roberson 		ups[i].ups_cache_free += cache->uc_crossbucket.ucb_cnt;
5549b47acb0aSGleb Smirnoff 		ups[i].ups_allocs = cache->uc_allocs;
5550b47acb0aSGleb Smirnoff 		ups[i].ups_frees = cache->uc_frees;
5551b47acb0aSGleb Smirnoff 	}
5552b47acb0aSGleb Smirnoff }
5553b47acb0aSGleb Smirnoff 
55547a52a97eSRobert Watson static int
55557a52a97eSRobert Watson sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
55567a52a97eSRobert Watson {
55577a52a97eSRobert Watson 	struct uma_stream_header ush;
55587a52a97eSRobert Watson 	struct uma_type_header uth;
555963b5d112SKonstantin Belousov 	struct uma_percpu_stat *ups;
55607a52a97eSRobert Watson 	struct sbuf sbuf;
55617a52a97eSRobert Watson 	uma_keg_t kz;
55627a52a97eSRobert Watson 	uma_zone_t z;
55634bd61e19SJeff Roberson 	uint64_t items;
55648b987a77SJeff Roberson 	uint32_t kfree, pages;
55654e657159SMatthew D Fleming 	int count, error, i;
55667a52a97eSRobert Watson 
556700f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
556800f0e671SMatthew D Fleming 	if (error != 0)
556900f0e671SMatthew D Fleming 		return (error);
55704e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
55711eafc078SIan Lepore 	sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
557263b5d112SKonstantin Belousov 	ups = malloc((mp_maxid + 1) * sizeof(*ups), M_TEMP, M_WAITOK);
55734e657159SMatthew D Fleming 
5574404a593eSMatthew D Fleming 	count = 0;
5575111fbcd5SBryan Venteicher 	rw_rlock(&uma_rwlock);
55767a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
55777a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
55787a52a97eSRobert Watson 			count++;
55797a52a97eSRobert Watson 	}
55807a52a97eSRobert Watson 
5581b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link)
5582b47acb0aSGleb Smirnoff 		count++;
5583b47acb0aSGleb Smirnoff 
55847a52a97eSRobert Watson 	/*
55857a52a97eSRobert Watson 	 * Insert stream header.
55867a52a97eSRobert Watson 	 */
55877a52a97eSRobert Watson 	bzero(&ush, sizeof(ush));
55887a52a97eSRobert Watson 	ush.ush_version = UMA_STREAM_VERSION;
5589ab3a57c0SRobert Watson 	ush.ush_maxcpus = (mp_maxid + 1);
55907a52a97eSRobert Watson 	ush.ush_count = count;
55914e657159SMatthew D Fleming 	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
55927a52a97eSRobert Watson 
55937a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
55948b987a77SJeff Roberson 		kfree = pages = 0;
55958b987a77SJeff Roberson 		for (i = 0; i < vm_ndomains; i++) {
55964ab3aee8SMark Johnston 			kfree += kz->uk_domain[i].ud_free_items;
55978b987a77SJeff Roberson 			pages += kz->uk_domain[i].ud_pages;
55988b987a77SJeff Roberson 		}
55997a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
56007a52a97eSRobert Watson 			bzero(&uth, sizeof(uth));
5601cbbb4a00SRobert Watson 			strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
56027a52a97eSRobert Watson 			uth.uth_align = kz->uk_align;
56037a52a97eSRobert Watson 			uth.uth_size = kz->uk_size;
56047a52a97eSRobert Watson 			uth.uth_rsize = kz->uk_rsize;
56054bd61e19SJeff Roberson 			if (z->uz_max_items > 0) {
56064bd61e19SJeff Roberson 				items = UZ_ITEMS_COUNT(z->uz_items);
56074bd61e19SJeff Roberson 				uth.uth_pages = (items / kz->uk_ipers) *
5608bb15d1c7SGleb Smirnoff 					kz->uk_ppera;
56094bd61e19SJeff Roberson 			} else
56108b987a77SJeff Roberson 				uth.uth_pages = pages;
5611f8c86a5fSGleb Smirnoff 			uth.uth_maxpages = (z->uz_max_items / kz->uk_ipers) *
5612bb15d1c7SGleb Smirnoff 			    kz->uk_ppera;
5613bb15d1c7SGleb Smirnoff 			uth.uth_limit = z->uz_max_items;
56148b987a77SJeff Roberson 			uth.uth_keg_free = kfree;
5615cbbb4a00SRobert Watson 
5616cbbb4a00SRobert Watson 			/*
5617cbbb4a00SRobert Watson 			 * A zone is secondary is it is not the first entry
5618cbbb4a00SRobert Watson 			 * on the keg's zone list.
5619cbbb4a00SRobert Watson 			 */
5620e20a199fSJeff Roberson 			if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
5621cbbb4a00SRobert Watson 			    (LIST_FIRST(&kz->uk_zones) != z))
5622cbbb4a00SRobert Watson 				uth.uth_zone_flags = UTH_ZONE_SECONDARY;
5623b47acb0aSGleb Smirnoff 			uma_vm_zone_stats(&uth, z, &sbuf, ups,
5624b47acb0aSGleb Smirnoff 			    kz->uk_flags & UMA_ZFLAG_INTERNAL);
562563b5d112SKonstantin Belousov 			(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
562663b5d112SKonstantin Belousov 			for (i = 0; i < mp_maxid + 1; i++)
562763b5d112SKonstantin Belousov 				(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
56287a52a97eSRobert Watson 		}
56297a52a97eSRobert Watson 	}
5630b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
5631b47acb0aSGleb Smirnoff 		bzero(&uth, sizeof(uth));
5632b47acb0aSGleb Smirnoff 		strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
5633b47acb0aSGleb Smirnoff 		uth.uth_size = z->uz_size;
5634b47acb0aSGleb Smirnoff 		uma_vm_zone_stats(&uth, z, &sbuf, ups, false);
5635b47acb0aSGleb Smirnoff 		(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
5636b47acb0aSGleb Smirnoff 		for (i = 0; i < mp_maxid + 1; i++)
5637b47acb0aSGleb Smirnoff 			(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
5638b47acb0aSGleb Smirnoff 	}
5639b47acb0aSGleb Smirnoff 
5640111fbcd5SBryan Venteicher 	rw_runlock(&uma_rwlock);
56414e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
56424e657159SMatthew D Fleming 	sbuf_delete(&sbuf);
564363b5d112SKonstantin Belousov 	free(ups, M_TEMP);
56447a52a97eSRobert Watson 	return (error);
56457a52a97eSRobert Watson }
564648c5777eSRobert Watson 
56470a5a3ccbSGleb Smirnoff int
56480a5a3ccbSGleb Smirnoff sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
56490a5a3ccbSGleb Smirnoff {
56500a5a3ccbSGleb Smirnoff 	uma_zone_t zone = *(uma_zone_t *)arg1;
565116be9f54SGleb Smirnoff 	int error, max;
56520a5a3ccbSGleb Smirnoff 
565316be9f54SGleb Smirnoff 	max = uma_zone_get_max(zone);
56540a5a3ccbSGleb Smirnoff 	error = sysctl_handle_int(oidp, &max, 0, req);
56550a5a3ccbSGleb Smirnoff 	if (error || !req->newptr)
56560a5a3ccbSGleb Smirnoff 		return (error);
56570a5a3ccbSGleb Smirnoff 
56580a5a3ccbSGleb Smirnoff 	uma_zone_set_max(zone, max);
56590a5a3ccbSGleb Smirnoff 
56600a5a3ccbSGleb Smirnoff 	return (0);
56610a5a3ccbSGleb Smirnoff }
56620a5a3ccbSGleb Smirnoff 
56630a5a3ccbSGleb Smirnoff int
56640a5a3ccbSGleb Smirnoff sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
56650a5a3ccbSGleb Smirnoff {
566620a4e154SJeff Roberson 	uma_zone_t zone;
56670a5a3ccbSGleb Smirnoff 	int cur;
56680a5a3ccbSGleb Smirnoff 
566920a4e154SJeff Roberson 	/*
567020a4e154SJeff Roberson 	 * Some callers want to add sysctls for global zones that
567120a4e154SJeff Roberson 	 * may not yet exist so they pass a pointer to a pointer.
567220a4e154SJeff Roberson 	 */
567320a4e154SJeff Roberson 	if (arg2 == 0)
567420a4e154SJeff Roberson 		zone = *(uma_zone_t *)arg1;
567520a4e154SJeff Roberson 	else
567620a4e154SJeff Roberson 		zone = arg1;
56770a5a3ccbSGleb Smirnoff 	cur = uma_zone_get_cur(zone);
56780a5a3ccbSGleb Smirnoff 	return (sysctl_handle_int(oidp, &cur, 0, req));
56790a5a3ccbSGleb Smirnoff }
56800a5a3ccbSGleb Smirnoff 
568120a4e154SJeff Roberson static int
568220a4e154SJeff Roberson sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS)
568320a4e154SJeff Roberson {
568420a4e154SJeff Roberson 	uma_zone_t zone = arg1;
568520a4e154SJeff Roberson 	uint64_t cur;
568620a4e154SJeff Roberson 
568720a4e154SJeff Roberson 	cur = uma_zone_get_allocs(zone);
568820a4e154SJeff Roberson 	return (sysctl_handle_64(oidp, &cur, 0, req));
568920a4e154SJeff Roberson }
569020a4e154SJeff Roberson 
569120a4e154SJeff Roberson static int
569220a4e154SJeff Roberson sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS)
569320a4e154SJeff Roberson {
569420a4e154SJeff Roberson 	uma_zone_t zone = arg1;
569520a4e154SJeff Roberson 	uint64_t cur;
569620a4e154SJeff Roberson 
569720a4e154SJeff Roberson 	cur = uma_zone_get_frees(zone);
569820a4e154SJeff Roberson 	return (sysctl_handle_64(oidp, &cur, 0, req));
569920a4e154SJeff Roberson }
570020a4e154SJeff Roberson 
57016d204a6aSRyan Libby static int
57026d204a6aSRyan Libby sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS)
57036d204a6aSRyan Libby {
57046d204a6aSRyan Libby 	struct sbuf sbuf;
57056d204a6aSRyan Libby 	uma_zone_t zone = arg1;
57066d204a6aSRyan Libby 	int error;
57076d204a6aSRyan Libby 
57086d204a6aSRyan Libby 	sbuf_new_for_sysctl(&sbuf, NULL, 0, req);
57096d204a6aSRyan Libby 	if (zone->uz_flags != 0)
57106d204a6aSRyan Libby 		sbuf_printf(&sbuf, "0x%b", zone->uz_flags, PRINT_UMA_ZFLAGS);
57116d204a6aSRyan Libby 	else
57126d204a6aSRyan Libby 		sbuf_printf(&sbuf, "0");
57136d204a6aSRyan Libby 	error = sbuf_finish(&sbuf);
57146d204a6aSRyan Libby 	sbuf_delete(&sbuf);
57156d204a6aSRyan Libby 
57166d204a6aSRyan Libby 	return (error);
57176d204a6aSRyan Libby }
57186d204a6aSRyan Libby 
5719f7af5015SRyan Libby static int
5720f7af5015SRyan Libby sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS)
5721f7af5015SRyan Libby {
5722f7af5015SRyan Libby 	uma_keg_t keg = arg1;
5723f7af5015SRyan Libby 	int avail, effpct, total;
5724f7af5015SRyan Libby 
5725f7af5015SRyan Libby 	total = keg->uk_ppera * PAGE_SIZE;
572654c5ae80SRyan Libby 	if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0)
57279b8db4d0SRyan Libby 		total += slabzone(keg->uk_ipers)->uz_keg->uk_rsize;
5728f7af5015SRyan Libby 	/*
5729f7af5015SRyan Libby 	 * We consider the client's requested size and alignment here, not the
5730f7af5015SRyan Libby 	 * real size determination uk_rsize, because we also adjust the real
5731f7af5015SRyan Libby 	 * size for internal implementation reasons (max bitset size).
5732f7af5015SRyan Libby 	 */
5733f7af5015SRyan Libby 	avail = keg->uk_ipers * roundup2(keg->uk_size, keg->uk_align + 1);
5734f7af5015SRyan Libby 	if ((keg->uk_flags & UMA_ZONE_PCPU) != 0)
5735f7af5015SRyan Libby 		avail *= mp_maxid + 1;
5736f7af5015SRyan Libby 	effpct = 100 * avail / total;
5737f7af5015SRyan Libby 	return (sysctl_handle_int(oidp, &effpct, 0, req));
5738f7af5015SRyan Libby }
5739f7af5015SRyan Libby 
57404bd61e19SJeff Roberson static int
57414bd61e19SJeff Roberson sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS)
57424bd61e19SJeff Roberson {
57434bd61e19SJeff Roberson 	uma_zone_t zone = arg1;
57444bd61e19SJeff Roberson 	uint64_t cur;
57454bd61e19SJeff Roberson 
57464bd61e19SJeff Roberson 	cur = UZ_ITEMS_COUNT(atomic_load_64(&zone->uz_items));
57474bd61e19SJeff Roberson 	return (sysctl_handle_64(oidp, &cur, 0, req));
57484bd61e19SJeff Roberson }
57494bd61e19SJeff Roberson 
57509542ea7bSGleb Smirnoff #ifdef INVARIANTS
57519542ea7bSGleb Smirnoff static uma_slab_t
57529542ea7bSGleb Smirnoff uma_dbg_getslab(uma_zone_t zone, void *item)
57539542ea7bSGleb Smirnoff {
57549542ea7bSGleb Smirnoff 	uma_slab_t slab;
57559542ea7bSGleb Smirnoff 	uma_keg_t keg;
57569542ea7bSGleb Smirnoff 	uint8_t *mem;
57579542ea7bSGleb Smirnoff 
57589542ea7bSGleb Smirnoff 	/*
57599542ea7bSGleb Smirnoff 	 * It is safe to return the slab here even though the
57609542ea7bSGleb Smirnoff 	 * zone is unlocked because the item's allocation state
57619542ea7bSGleb Smirnoff 	 * essentially holds a reference.
57629542ea7bSGleb Smirnoff 	 */
5763727c6918SJeff Roberson 	mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
5764727c6918SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0)
5765bb15d1c7SGleb Smirnoff 		return (NULL);
576654c5ae80SRyan Libby 	if (zone->uz_flags & UMA_ZFLAG_VTOSLAB)
5767727c6918SJeff Roberson 		return (vtoslab((vm_offset_t)mem));
5768bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
576954c5ae80SRyan Libby 	if ((keg->uk_flags & UMA_ZFLAG_HASH) == 0)
5770727c6918SJeff Roberson 		return ((uma_slab_t)(mem + keg->uk_pgoff));
57718b987a77SJeff Roberson 	KEG_LOCK(keg, 0);
57729542ea7bSGleb Smirnoff 	slab = hash_sfind(&keg->uk_hash, mem);
57738b987a77SJeff Roberson 	KEG_UNLOCK(keg, 0);
57749542ea7bSGleb Smirnoff 
57759542ea7bSGleb Smirnoff 	return (slab);
57769542ea7bSGleb Smirnoff }
57779542ea7bSGleb Smirnoff 
5778c5deaf04SGleb Smirnoff static bool
5779c5deaf04SGleb Smirnoff uma_dbg_zskip(uma_zone_t zone, void *mem)
5780c5deaf04SGleb Smirnoff {
5781c5deaf04SGleb Smirnoff 
5782727c6918SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0)
5783c5deaf04SGleb Smirnoff 		return (true);
5784c5deaf04SGleb Smirnoff 
5785bb15d1c7SGleb Smirnoff 	return (uma_dbg_kskip(zone->uz_keg, mem));
5786c5deaf04SGleb Smirnoff }
5787c5deaf04SGleb Smirnoff 
5788c5deaf04SGleb Smirnoff static bool
5789c5deaf04SGleb Smirnoff uma_dbg_kskip(uma_keg_t keg, void *mem)
5790c5deaf04SGleb Smirnoff {
5791c5deaf04SGleb Smirnoff 	uintptr_t idx;
5792c5deaf04SGleb Smirnoff 
5793c5deaf04SGleb Smirnoff 	if (dbg_divisor == 0)
5794c5deaf04SGleb Smirnoff 		return (true);
5795c5deaf04SGleb Smirnoff 
5796c5deaf04SGleb Smirnoff 	if (dbg_divisor == 1)
5797c5deaf04SGleb Smirnoff 		return (false);
5798c5deaf04SGleb Smirnoff 
5799c5deaf04SGleb Smirnoff 	idx = (uintptr_t)mem >> PAGE_SHIFT;
5800c5deaf04SGleb Smirnoff 	if (keg->uk_ipers > 1) {
5801c5deaf04SGleb Smirnoff 		idx *= keg->uk_ipers;
5802c5deaf04SGleb Smirnoff 		idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize;
5803c5deaf04SGleb Smirnoff 	}
5804c5deaf04SGleb Smirnoff 
5805c5deaf04SGleb Smirnoff 	if ((idx / dbg_divisor) * dbg_divisor != idx) {
5806c5deaf04SGleb Smirnoff 		counter_u64_add(uma_skip_cnt, 1);
5807c5deaf04SGleb Smirnoff 		return (true);
5808c5deaf04SGleb Smirnoff 	}
5809c5deaf04SGleb Smirnoff 	counter_u64_add(uma_dbg_cnt, 1);
5810c5deaf04SGleb Smirnoff 
5811c5deaf04SGleb Smirnoff 	return (false);
5812c5deaf04SGleb Smirnoff }
5813c5deaf04SGleb Smirnoff 
58149542ea7bSGleb Smirnoff /*
58159542ea7bSGleb Smirnoff  * Set up the slab's freei data such that uma_dbg_free can function.
58169542ea7bSGleb Smirnoff  *
58179542ea7bSGleb Smirnoff  */
58189542ea7bSGleb Smirnoff static void
58199542ea7bSGleb Smirnoff uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
58209542ea7bSGleb Smirnoff {
58219542ea7bSGleb Smirnoff 	uma_keg_t keg;
58229542ea7bSGleb Smirnoff 	int freei;
58239542ea7bSGleb Smirnoff 
58249542ea7bSGleb Smirnoff 	if (slab == NULL) {
58259542ea7bSGleb Smirnoff 		slab = uma_dbg_getslab(zone, item);
58269542ea7bSGleb Smirnoff 		if (slab == NULL)
5827952c8964SMark Johnston 			panic("uma: item %p did not belong to zone %s",
58289542ea7bSGleb Smirnoff 			    item, zone->uz_name);
58299542ea7bSGleb Smirnoff 	}
5830584061b4SJeff Roberson 	keg = zone->uz_keg;
58311e0701e1SJeff Roberson 	freei = slab_item_index(slab, keg, item);
58329542ea7bSGleb Smirnoff 
5833942951baSRyan Libby 	if (BIT_TEST_SET_ATOMIC(keg->uk_ipers, freei,
5834942951baSRyan Libby 	    slab_dbg_bits(slab, keg)))
5835952c8964SMark Johnston 		panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)",
58369542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
58379542ea7bSGleb Smirnoff }
58389542ea7bSGleb Smirnoff 
58399542ea7bSGleb Smirnoff /*
58409542ea7bSGleb Smirnoff  * Verifies freed addresses.  Checks for alignment, valid slab membership
58419542ea7bSGleb Smirnoff  * and duplicate frees.
58429542ea7bSGleb Smirnoff  *
58439542ea7bSGleb Smirnoff  */
58449542ea7bSGleb Smirnoff static void
58459542ea7bSGleb Smirnoff uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
58469542ea7bSGleb Smirnoff {
58479542ea7bSGleb Smirnoff 	uma_keg_t keg;
58489542ea7bSGleb Smirnoff 	int freei;
58499542ea7bSGleb Smirnoff 
58509542ea7bSGleb Smirnoff 	if (slab == NULL) {
58519542ea7bSGleb Smirnoff 		slab = uma_dbg_getslab(zone, item);
58529542ea7bSGleb Smirnoff 		if (slab == NULL)
5853952c8964SMark Johnston 			panic("uma: Freed item %p did not belong to zone %s",
58549542ea7bSGleb Smirnoff 			    item, zone->uz_name);
58559542ea7bSGleb Smirnoff 	}
5856584061b4SJeff Roberson 	keg = zone->uz_keg;
58571e0701e1SJeff Roberson 	freei = slab_item_index(slab, keg, item);
58589542ea7bSGleb Smirnoff 
58599542ea7bSGleb Smirnoff 	if (freei >= keg->uk_ipers)
5860952c8964SMark Johnston 		panic("Invalid free of %p from zone %p(%s) slab %p(%d)",
58619542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
58629542ea7bSGleb Smirnoff 
58631e0701e1SJeff Roberson 	if (slab_item(slab, keg, freei) != item)
5864952c8964SMark Johnston 		panic("Unaligned free of %p from zone %p(%s) slab %p(%d)",
58659542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
58669542ea7bSGleb Smirnoff 
5867942951baSRyan Libby 	if (!BIT_TEST_CLR_ATOMIC(keg->uk_ipers, freei,
5868942951baSRyan Libby 	    slab_dbg_bits(slab, keg)))
5869952c8964SMark Johnston 		panic("Duplicate free of %p from zone %p(%s) slab %p(%d)",
58709542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
58719542ea7bSGleb Smirnoff }
58729542ea7bSGleb Smirnoff #endif /* INVARIANTS */
58739542ea7bSGleb Smirnoff 
587448c5777eSRobert Watson #ifdef DDB
587546d70077SConrad Meyer static int64_t
587646d70077SConrad Meyer get_uma_stats(uma_keg_t kz, uma_zone_t z, uint64_t *allocs, uint64_t *used,
58770223790fSConrad Meyer     uint64_t *sleeps, long *cachefree, uint64_t *xdomain)
587848c5777eSRobert Watson {
587946d70077SConrad Meyer 	uint64_t frees;
58800f9b7bf3SMark Johnston 	int i;
588148c5777eSRobert Watson 
588248c5777eSRobert Watson 	if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
588346d70077SConrad Meyer 		*allocs = counter_u64_fetch(z->uz_allocs);
58842efcc8cbSGleb Smirnoff 		frees = counter_u64_fetch(z->uz_frees);
588546d70077SConrad Meyer 		*sleeps = z->uz_sleeps;
588646d70077SConrad Meyer 		*cachefree = 0;
588746d70077SConrad Meyer 		*xdomain = 0;
588848c5777eSRobert Watson 	} else
588946d70077SConrad Meyer 		uma_zone_sumstat(z, cachefree, allocs, &frees, sleeps,
589046d70077SConrad Meyer 		    xdomain);
58918b987a77SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
5892c6fd3e23SJeff Roberson 		*cachefree += ZDOM_GET(z, i)->uzd_nitems;
5893e20a199fSJeff Roberson 		if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
589448c5777eSRobert Watson 		    (LIST_FIRST(&kz->uk_zones) != z)))
58954ab3aee8SMark Johnston 			*cachefree += kz->uk_domain[i].ud_free_items;
58968b987a77SJeff Roberson 	}
589746d70077SConrad Meyer 	*used = *allocs - frees;
589846d70077SConrad Meyer 	return (((int64_t)*used + *cachefree) * kz->uk_size);
589946d70077SConrad Meyer }
59000f9b7bf3SMark Johnston 
5901c84c5e00SMitchell Horne DB_SHOW_COMMAND_FLAGS(uma, db_show_uma, DB_CMD_MEMSAFE)
590246d70077SConrad Meyer {
590346d70077SConrad Meyer 	const char *fmt_hdr, *fmt_entry;
590446d70077SConrad Meyer 	uma_keg_t kz;
590546d70077SConrad Meyer 	uma_zone_t z;
590646d70077SConrad Meyer 	uint64_t allocs, used, sleeps, xdomain;
590746d70077SConrad Meyer 	long cachefree;
590846d70077SConrad Meyer 	/* variables for sorting */
590946d70077SConrad Meyer 	uma_keg_t cur_keg;
591046d70077SConrad Meyer 	uma_zone_t cur_zone, last_zone;
591146d70077SConrad Meyer 	int64_t cur_size, last_size, size;
591246d70077SConrad Meyer 	int ties;
591346d70077SConrad Meyer 
591446d70077SConrad Meyer 	/* /i option produces machine-parseable CSV output */
591546d70077SConrad Meyer 	if (modif[0] == 'i') {
591646d70077SConrad Meyer 		fmt_hdr = "%s,%s,%s,%s,%s,%s,%s,%s,%s\n";
591746d70077SConrad Meyer 		fmt_entry = "\"%s\",%ju,%jd,%ld,%ju,%ju,%u,%jd,%ju\n";
591846d70077SConrad Meyer 	} else {
591946d70077SConrad Meyer 		fmt_hdr = "%18s %6s %7s %7s %11s %7s %7s %10s %8s\n";
592046d70077SConrad Meyer 		fmt_entry = "%18s %6ju %7jd %7ld %11ju %7ju %7u %10jd %8ju\n";
592146d70077SConrad Meyer 	}
592246d70077SConrad Meyer 
592346d70077SConrad Meyer 	db_printf(fmt_hdr, "Zone", "Size", "Used", "Free", "Requests",
592446d70077SConrad Meyer 	    "Sleeps", "Bucket", "Total Mem", "XFree");
592546d70077SConrad Meyer 
592646d70077SConrad Meyer 	/* Sort the zones with largest size first. */
592746d70077SConrad Meyer 	last_zone = NULL;
592846d70077SConrad Meyer 	last_size = INT64_MAX;
592946d70077SConrad Meyer 	for (;;) {
593046d70077SConrad Meyer 		cur_zone = NULL;
593146d70077SConrad Meyer 		cur_size = -1;
593246d70077SConrad Meyer 		ties = 0;
593346d70077SConrad Meyer 		LIST_FOREACH(kz, &uma_kegs, uk_link) {
593446d70077SConrad Meyer 			LIST_FOREACH(z, &kz->uk_zones, uz_link) {
593546d70077SConrad Meyer 				/*
593646d70077SConrad Meyer 				 * In the case of size ties, print out zones
593746d70077SConrad Meyer 				 * in the order they are encountered.  That is,
593846d70077SConrad Meyer 				 * when we encounter the most recently output
593946d70077SConrad Meyer 				 * zone, we have already printed all preceding
594046d70077SConrad Meyer 				 * ties, and we must print all following ties.
594146d70077SConrad Meyer 				 */
594246d70077SConrad Meyer 				if (z == last_zone) {
594346d70077SConrad Meyer 					ties = 1;
594446d70077SConrad Meyer 					continue;
594546d70077SConrad Meyer 				}
594646d70077SConrad Meyer 				size = get_uma_stats(kz, z, &allocs, &used,
594746d70077SConrad Meyer 				    &sleeps, &cachefree, &xdomain);
594846d70077SConrad Meyer 				if (size > cur_size && size < last_size + ties)
594946d70077SConrad Meyer 				{
595046d70077SConrad Meyer 					cur_size = size;
595146d70077SConrad Meyer 					cur_zone = z;
595246d70077SConrad Meyer 					cur_keg = kz;
595346d70077SConrad Meyer 				}
595446d70077SConrad Meyer 			}
595546d70077SConrad Meyer 		}
595646d70077SConrad Meyer 		if (cur_zone == NULL)
595746d70077SConrad Meyer 			break;
595846d70077SConrad Meyer 
595946d70077SConrad Meyer 		size = get_uma_stats(cur_keg, cur_zone, &allocs, &used,
596046d70077SConrad Meyer 		    &sleeps, &cachefree, &xdomain);
596146d70077SConrad Meyer 		db_printf(fmt_entry, cur_zone->uz_name,
596246d70077SConrad Meyer 		    (uintmax_t)cur_keg->uk_size, (intmax_t)used, cachefree,
596346d70077SConrad Meyer 		    (uintmax_t)allocs, (uintmax_t)sleeps,
596420a4e154SJeff Roberson 		    (unsigned)cur_zone->uz_bucket_size, (intmax_t)size,
596520a4e154SJeff Roberson 		    xdomain);
596646d70077SConrad Meyer 
5967687c94aaSJohn Baldwin 		if (db_pager_quit)
5968687c94aaSJohn Baldwin 			return;
596946d70077SConrad Meyer 		last_zone = cur_zone;
597046d70077SConrad Meyer 		last_size = cur_size;
597148c5777eSRobert Watson 	}
597248c5777eSRobert Watson }
597303175483SAlexander Motin 
5974c84c5e00SMitchell Horne DB_SHOW_COMMAND_FLAGS(umacache, db_show_umacache, DB_CMD_MEMSAFE)
597503175483SAlexander Motin {
597603175483SAlexander Motin 	uma_zone_t z;
5977ab3185d1SJeff Roberson 	uint64_t allocs, frees;
59780f9b7bf3SMark Johnston 	long cachefree;
59790f9b7bf3SMark Johnston 	int i;
598003175483SAlexander Motin 
598103175483SAlexander Motin 	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
598203175483SAlexander Motin 	    "Requests", "Bucket");
598303175483SAlexander Motin 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
5984c1685086SJeff Roberson 		uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL, NULL);
59850f9b7bf3SMark Johnston 		for (i = 0; i < vm_ndomains; i++)
5986c6fd3e23SJeff Roberson 			cachefree += ZDOM_GET(z, i)->uzd_nitems;
59870f9b7bf3SMark Johnston 		db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
598803175483SAlexander Motin 		    z->uz_name, (uintmax_t)z->uz_size,
598903175483SAlexander Motin 		    (intmax_t)(allocs - frees), cachefree,
599020a4e154SJeff Roberson 		    (uintmax_t)allocs, z->uz_bucket_size);
599103175483SAlexander Motin 		if (db_pager_quit)
599203175483SAlexander Motin 			return;
599303175483SAlexander Motin 	}
599403175483SAlexander Motin }
59959542ea7bSGleb Smirnoff #endif	/* DDB */
5996