xref: /freebsd/sys/vm/uma_core.c (revision 20a4e15451cd1bc0302931dda10c13310183117d)
160727d8bSWarner Losh /*-
2fe267a55SPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3fe267a55SPedro F. Giffuni  *
4ef72505eSJeff Roberson  * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
508ecce74SRobert Watson  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6ae4e9636SRobert Watson  * Copyright (c) 2004-2006 Robert N. M. Watson
708ecce74SRobert Watson  * All rights reserved.
88355f576SJeff Roberson  *
98355f576SJeff Roberson  * Redistribution and use in source and binary forms, with or without
108355f576SJeff Roberson  * modification, are permitted provided that the following conditions
118355f576SJeff Roberson  * are met:
128355f576SJeff Roberson  * 1. Redistributions of source code must retain the above copyright
138355f576SJeff Roberson  *    notice unmodified, this list of conditions, and the following
148355f576SJeff Roberson  *    disclaimer.
158355f576SJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
168355f576SJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
178355f576SJeff Roberson  *    documentation and/or other materials provided with the distribution.
188355f576SJeff Roberson  *
198355f576SJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
208355f576SJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
218355f576SJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
228355f576SJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
238355f576SJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
248355f576SJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
258355f576SJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
268355f576SJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
278355f576SJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
288355f576SJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
298355f576SJeff Roberson  */
308355f576SJeff Roberson 
318355f576SJeff Roberson /*
328355f576SJeff Roberson  * uma_core.c  Implementation of the Universal Memory allocator
338355f576SJeff Roberson  *
348355f576SJeff Roberson  * This allocator is intended to replace the multitude of similar object caches
358355f576SJeff Roberson  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
36763df3ecSPedro F. Giffuni  * efficient.  A primary design goal is to return unused memory to the rest of
378355f576SJeff Roberson  * the system.  This will make the system as a whole more flexible due to the
388355f576SJeff Roberson  * ability to move memory to subsystems which most need it instead of leaving
398355f576SJeff Roberson  * pools of reserved memory unused.
408355f576SJeff Roberson  *
418355f576SJeff Roberson  * The basic ideas stem from similar slab/zone based allocators whose algorithms
428355f576SJeff Roberson  * are well known.
438355f576SJeff Roberson  *
448355f576SJeff Roberson  */
458355f576SJeff Roberson 
468355f576SJeff Roberson /*
478355f576SJeff Roberson  * TODO:
488355f576SJeff Roberson  *	- Improve memory usage for large allocations
498355f576SJeff Roberson  *	- Investigate cache size adjustments
508355f576SJeff Roberson  */
518355f576SJeff Roberson 
52874651b1SDavid E. O'Brien #include <sys/cdefs.h>
53874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$");
54874651b1SDavid E. O'Brien 
5548c5777eSRobert Watson #include "opt_ddb.h"
568355f576SJeff Roberson #include "opt_param.h"
578d689e04SGleb Smirnoff #include "opt_vm.h"
5848c5777eSRobert Watson 
598355f576SJeff Roberson #include <sys/param.h>
608355f576SJeff Roberson #include <sys/systm.h>
61ef72505eSJeff Roberson #include <sys/bitset.h>
62194a979eSMark Johnston #include <sys/domainset.h>
639b43bc27SAndriy Gapon #include <sys/eventhandler.h>
648355f576SJeff Roberson #include <sys/kernel.h>
658355f576SJeff Roberson #include <sys/types.h>
66ad5b0f5bSJeff Roberson #include <sys/limits.h>
678355f576SJeff Roberson #include <sys/queue.h>
688355f576SJeff Roberson #include <sys/malloc.h>
693659f747SRobert Watson #include <sys/ktr.h>
708355f576SJeff Roberson #include <sys/lock.h>
718355f576SJeff Roberson #include <sys/sysctl.h>
728355f576SJeff Roberson #include <sys/mutex.h>
734c1cc01cSJohn Baldwin #include <sys/proc.h>
7410cb2424SMark Murray #include <sys/random.h>
7589f6b863SAttilio Rao #include <sys/rwlock.h>
767a52a97eSRobert Watson #include <sys/sbuf.h>
77a2de44abSAlexander Motin #include <sys/sched.h>
788355f576SJeff Roberson #include <sys/smp.h>
79e60b2fcbSGleb Smirnoff #include <sys/taskqueue.h>
8086bbae32SJeff Roberson #include <sys/vmmeter.h>
8186bbae32SJeff Roberson 
828355f576SJeff Roberson #include <vm/vm.h>
83194a979eSMark Johnston #include <vm/vm_domainset.h>
848355f576SJeff Roberson #include <vm/vm_object.h>
858355f576SJeff Roberson #include <vm/vm_page.h>
86a4915c21SAttilio Rao #include <vm/vm_pageout.h>
878355f576SJeff Roberson #include <vm/vm_param.h>
88ab3185d1SJeff Roberson #include <vm/vm_phys.h>
8930c5525bSAndrew Gallatin #include <vm/vm_pagequeue.h>
908355f576SJeff Roberson #include <vm/vm_map.h>
918355f576SJeff Roberson #include <vm/vm_kern.h>
928355f576SJeff Roberson #include <vm/vm_extern.h>
938355f576SJeff Roberson #include <vm/uma.h>
948355f576SJeff Roberson #include <vm/uma_int.h>
95639c9550SJeff Roberson #include <vm/uma_dbg.h>
968355f576SJeff Roberson 
9748c5777eSRobert Watson #include <ddb/ddb.h>
9848c5777eSRobert Watson 
998d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
1008d689e04SGleb Smirnoff #include <vm/memguard.h>
1018d689e04SGleb Smirnoff #endif
1028d689e04SGleb Smirnoff 
1038355f576SJeff Roberson /*
104ab3185d1SJeff Roberson  * This is the zone and keg from which all zones are spawned.
1058355f576SJeff Roberson  */
106ab3185d1SJeff Roberson static uma_zone_t kegs;
107ab3185d1SJeff Roberson static uma_zone_t zones;
1088355f576SJeff Roberson 
109ab3185d1SJeff Roberson /* This is the zone from which all offpage uma_slab_ts are allocated. */
1108355f576SJeff Roberson static uma_zone_t slabzone;
1118355f576SJeff Roberson 
1128355f576SJeff Roberson /*
1138355f576SJeff Roberson  * The initial hash tables come out of this zone so they can be allocated
1148355f576SJeff Roberson  * prior to malloc coming up.
1158355f576SJeff Roberson  */
1168355f576SJeff Roberson static uma_zone_t hashzone;
1178355f576SJeff Roberson 
1181e319f6dSRobert Watson /* The boot-time adjusted value for cache line alignment. */
119e4cd31ddSJeff Roberson int uma_align_cache = 64 - 1;
1201e319f6dSRobert Watson 
121961647dfSJeff Roberson static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
122*20a4e154SJeff Roberson static MALLOC_DEFINE(M_UMA, "UMA", "UMA Misc");
123961647dfSJeff Roberson 
1248355f576SJeff Roberson /*
12586bbae32SJeff Roberson  * Are we allowed to allocate buckets?
12686bbae32SJeff Roberson  */
12786bbae32SJeff Roberson static int bucketdisable = 1;
12886bbae32SJeff Roberson 
129099a0e58SBosko Milekic /* Linked list of all kegs in the system */
13013e403fdSAntoine Brodin static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
1318355f576SJeff Roberson 
13203175483SAlexander Motin /* Linked list of all cache-only zones in the system */
13303175483SAlexander Motin static LIST_HEAD(,uma_zone) uma_cachezones =
13403175483SAlexander Motin     LIST_HEAD_INITIALIZER(uma_cachezones);
13503175483SAlexander Motin 
136111fbcd5SBryan Venteicher /* This RW lock protects the keg list */
137fe933c1dSMateusz Guzik static struct rwlock_padalign __exclusive_cache_line uma_rwlock;
1388355f576SJeff Roberson 
139ac0a6fd0SGleb Smirnoff /*
140ac0a6fd0SGleb Smirnoff  * Pointer and counter to pool of pages, that is preallocated at
141f7d35785SGleb Smirnoff  * startup to bootstrap UMA.
142ac0a6fd0SGleb Smirnoff  */
143ac0a6fd0SGleb Smirnoff static char *bootmem;
144ac0a6fd0SGleb Smirnoff static int boot_pages;
1458355f576SJeff Roberson 
14608cfa56eSMark Johnston static struct sx uma_reclaim_lock;
14795c4bf75SKonstantin Belousov 
148fbd95859SMark Johnston /*
149fbd95859SMark Johnston  * kmem soft limit, initialized by uma_set_limit().  Ensure that early
150fbd95859SMark Johnston  * allocations don't trigger a wakeup of the reclaim thread.
151fbd95859SMark Johnston  */
152ad5b0f5bSJeff Roberson static unsigned long uma_kmem_limit = LONG_MAX;
153fbd95859SMark Johnston SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_limit, CTLFLAG_RD, &uma_kmem_limit, 0,
154fbd95859SMark Johnston     "UMA kernel memory soft limit");
155058f0f74SMark Johnston static unsigned long uma_kmem_total;
156fbd95859SMark Johnston SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD, &uma_kmem_total, 0,
157fbd95859SMark Johnston     "UMA kernel memory usage");
1582e47807cSJeff Roberson 
1598355f576SJeff Roberson /* Is the VM done starting up? */
160f4bef67cSGleb Smirnoff static enum { BOOT_COLD = 0, BOOT_STRAPPED, BOOT_PAGEALLOC, BOOT_BUCKETS,
161f4bef67cSGleb Smirnoff     BOOT_RUNNING } booted = BOOT_COLD;
1628355f576SJeff Roberson 
163ef72505eSJeff Roberson /*
1649643769aSJeff Roberson  * This is the handle used to schedule events that need to happen
1659643769aSJeff Roberson  * outside of the allocation fast path.
1669643769aSJeff Roberson  */
1678355f576SJeff Roberson static struct callout uma_callout;
1689643769aSJeff Roberson #define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
1698355f576SJeff Roberson 
1708355f576SJeff Roberson /*
1718355f576SJeff Roberson  * This structure is passed as the zone ctor arg so that I don't have to create
1728355f576SJeff Roberson  * a special allocation function just for zones.
1738355f576SJeff Roberson  */
1748355f576SJeff Roberson struct uma_zctor_args {
175bb196eb4SMatthew D Fleming 	const char *name;
176c3bdc05fSAndrew R. Reiter 	size_t size;
1778355f576SJeff Roberson 	uma_ctor ctor;
1788355f576SJeff Roberson 	uma_dtor dtor;
1798355f576SJeff Roberson 	uma_init uminit;
1808355f576SJeff Roberson 	uma_fini fini;
1810095a784SJeff Roberson 	uma_import import;
1820095a784SJeff Roberson 	uma_release release;
1830095a784SJeff Roberson 	void *arg;
184099a0e58SBosko Milekic 	uma_keg_t keg;
185099a0e58SBosko Milekic 	int align;
18685dcf349SGleb Smirnoff 	uint32_t flags;
187099a0e58SBosko Milekic };
188099a0e58SBosko Milekic 
189099a0e58SBosko Milekic struct uma_kctor_args {
190099a0e58SBosko Milekic 	uma_zone_t zone;
191099a0e58SBosko Milekic 	size_t size;
192099a0e58SBosko Milekic 	uma_init uminit;
193099a0e58SBosko Milekic 	uma_fini fini;
1948355f576SJeff Roberson 	int align;
19585dcf349SGleb Smirnoff 	uint32_t flags;
1968355f576SJeff Roberson };
1978355f576SJeff Roberson 
198cae33c14SJeff Roberson struct uma_bucket_zone {
199cae33c14SJeff Roberson 	uma_zone_t	ubz_zone;
200cae33c14SJeff Roberson 	char		*ubz_name;
201fc03d22bSJeff Roberson 	int		ubz_entries;	/* Number of items it can hold. */
202fc03d22bSJeff Roberson 	int		ubz_maxsize;	/* Maximum allocation size per-item. */
203cae33c14SJeff Roberson };
204cae33c14SJeff Roberson 
205f9d27e75SRobert Watson /*
206fc03d22bSJeff Roberson  * Compute the actual number of bucket entries to pack them in power
207fc03d22bSJeff Roberson  * of two sizes for more efficient space utilization.
208f9d27e75SRobert Watson  */
209fc03d22bSJeff Roberson #define	BUCKET_SIZE(n)						\
210fc03d22bSJeff Roberson     (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
211fc03d22bSJeff Roberson 
2121aa6c758SAlexander Motin #define	BUCKET_MAX	BUCKET_SIZE(256)
213eda1b016SJeff Roberson #define	BUCKET_MIN	BUCKET_SIZE(4)
214fc03d22bSJeff Roberson 
215fc03d22bSJeff Roberson struct uma_bucket_zone bucket_zones[] = {
2166fd34d6fSJeff Roberson 	{ NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
217f3932e90SAlexander Motin 	{ NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
2186fd34d6fSJeff Roberson 	{ NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
219f3932e90SAlexander Motin 	{ NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
2206fd34d6fSJeff Roberson 	{ NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
221fc03d22bSJeff Roberson 	{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
222fc03d22bSJeff Roberson 	{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
223fc03d22bSJeff Roberson 	{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
2241aa6c758SAlexander Motin 	{ NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
225fc03d22bSJeff Roberson 	{ NULL, NULL, 0}
226fc03d22bSJeff Roberson };
227cae33c14SJeff Roberson 
2282019094aSRobert Watson /*
2292019094aSRobert Watson  * Flags and enumerations to be passed to internal functions.
2302019094aSRobert Watson  */
231bb15d1c7SGleb Smirnoff enum zfreeskip {
232bb15d1c7SGleb Smirnoff 	SKIP_NONE =	0,
233bb15d1c7SGleb Smirnoff 	SKIP_CNT =	0x00000001,
234bb15d1c7SGleb Smirnoff 	SKIP_DTOR =	0x00010000,
235bb15d1c7SGleb Smirnoff 	SKIP_FINI =	0x00020000,
236bb15d1c7SGleb Smirnoff };
237b23f72e9SBrian Feldman 
2388355f576SJeff Roberson /* Prototypes.. */
2398355f576SJeff Roberson 
240f4bef67cSGleb Smirnoff int	uma_startup_count(int);
241f4bef67cSGleb Smirnoff void	uma_startup(void *, int);
242f4bef67cSGleb Smirnoff void	uma_startup1(void);
243f4bef67cSGleb Smirnoff void	uma_startup2(void);
244f4bef67cSGleb Smirnoff 
245ab3185d1SJeff Roberson static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
246ab3185d1SJeff Roberson static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
247ab3059a8SMatt Macy static void *pcpu_page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
248ab3185d1SJeff Roberson static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
249f2c2231eSRyan Stone static void page_free(void *, vm_size_t, uint8_t);
250ab3059a8SMatt Macy static void pcpu_page_free(void *, vm_size_t, uint8_t);
25186220393SMark Johnston static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int);
2529643769aSJeff Roberson static void cache_drain(uma_zone_t);
2538355f576SJeff Roberson static void bucket_drain(uma_zone_t, uma_bucket_t);
25408cfa56eSMark Johnston static void bucket_cache_reclaim(uma_zone_t zone, bool);
255b23f72e9SBrian Feldman static int keg_ctor(void *, int, void *, int);
256099a0e58SBosko Milekic static void keg_dtor(void *, int, void *);
257b23f72e9SBrian Feldman static int zone_ctor(void *, int, void *, int);
2589c2cd7e5SJeff Roberson static void zone_dtor(void *, int, void *);
259b23f72e9SBrian Feldman static int zero_init(void *, int, int);
260e20a199fSJeff Roberson static void keg_small_init(uma_keg_t keg);
261e20a199fSJeff Roberson static void keg_large_init(uma_keg_t keg);
262*20a4e154SJeff Roberson static void zone_foreach(void (*zfunc)(uma_zone_t, void *), void *);
263*20a4e154SJeff Roberson static void zone_timeout(uma_zone_t zone, void *);
2643b2f2cb8SAlexander Motin static int hash_alloc(struct uma_hash *, u_int);
2650aef6126SJeff Roberson static int hash_expand(struct uma_hash *, struct uma_hash *);
2660aef6126SJeff Roberson static void hash_free(struct uma_hash *hash);
2678355f576SJeff Roberson static void uma_timeout(void *);
2688355f576SJeff Roberson static void uma_startup3(void);
269ab3185d1SJeff Roberson static void *zone_alloc_item(uma_zone_t, void *, int, int);
270bb15d1c7SGleb Smirnoff static void *zone_alloc_item_locked(uma_zone_t, void *, int, int);
2710095a784SJeff Roberson static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
27286bbae32SJeff Roberson static void bucket_enable(void);
273cae33c14SJeff Roberson static void bucket_init(void);
2746fd34d6fSJeff Roberson static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
2756fd34d6fSJeff Roberson static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
276cae33c14SJeff Roberson static void bucket_zone_drain(void);
277beb8beefSJeff Roberson static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
278ab3185d1SJeff Roberson static uma_slab_t zone_fetch_slab(uma_zone_t, uma_keg_t, int, int);
2790095a784SJeff Roberson static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
280bb15d1c7SGleb Smirnoff static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item);
281e20a199fSJeff Roberson static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
28285dcf349SGleb Smirnoff     uma_fini fini, int align, uint32_t flags);
283ab3185d1SJeff Roberson static int zone_import(uma_zone_t, void **, int, int, int);
284ab3185d1SJeff Roberson static void zone_release(uma_zone_t, void **, int);
285ab3185d1SJeff Roberson static void uma_zero_item(void *, uma_zone_t);
286beb8beefSJeff Roberson static bool cache_alloc(uma_zone_t, uma_cache_t, void *, int);
2870a81b439SJeff Roberson static bool cache_free(uma_zone_t, uma_cache_t, void *, void *, int);
288bbee39c6SJeff Roberson 
2897a52a97eSRobert Watson static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
2907a52a97eSRobert Watson static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
291*20a4e154SJeff Roberson static int sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS);
292*20a4e154SJeff Roberson static int sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS);
2938355f576SJeff Roberson 
2949542ea7bSGleb Smirnoff #ifdef INVARIANTS
295c5deaf04SGleb Smirnoff static bool uma_dbg_kskip(uma_keg_t keg, void *mem);
296c5deaf04SGleb Smirnoff static bool uma_dbg_zskip(uma_zone_t zone, void *mem);
2979542ea7bSGleb Smirnoff static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
2989542ea7bSGleb Smirnoff static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
299c5deaf04SGleb Smirnoff 
300*20a4e154SJeff Roberson SYSCTL_NODE(_vm, OID_AUTO, uma, CTLFLAG_RW, 0, "Universal Memory Allocator");
301c5deaf04SGleb Smirnoff static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD, 0,
302c5deaf04SGleb Smirnoff     "Memory allocation debugging");
303c5deaf04SGleb Smirnoff 
304c5deaf04SGleb Smirnoff static u_int dbg_divisor = 1;
305c5deaf04SGleb Smirnoff SYSCTL_UINT(_vm_debug, OID_AUTO, divisor,
306c5deaf04SGleb Smirnoff     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0,
307c5deaf04SGleb Smirnoff     "Debug & thrash every this item in memory allocator");
308c5deaf04SGleb Smirnoff 
309c5deaf04SGleb Smirnoff static counter_u64_t uma_dbg_cnt = EARLY_COUNTER;
310c5deaf04SGleb Smirnoff static counter_u64_t uma_skip_cnt = EARLY_COUNTER;
311c5deaf04SGleb Smirnoff SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD,
312c5deaf04SGleb Smirnoff     &uma_dbg_cnt, "memory items debugged");
313c5deaf04SGleb Smirnoff SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD,
314c5deaf04SGleb Smirnoff     &uma_skip_cnt, "memory items skipped, not debugged");
3159542ea7bSGleb Smirnoff #endif
3169542ea7bSGleb Smirnoff 
3178355f576SJeff Roberson SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
3188355f576SJeff Roberson 
3197a52a97eSRobert Watson SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
3207a52a97eSRobert Watson     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
3217a52a97eSRobert Watson 
3227a52a97eSRobert Watson SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
3237a52a97eSRobert Watson     0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
3247a52a97eSRobert Watson 
3252f891cd5SPawel Jakub Dawidek static int zone_warnings = 1;
326af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
3272f891cd5SPawel Jakub Dawidek     "Warn when UMA zones becomes full");
3282f891cd5SPawel Jakub Dawidek 
3292e47807cSJeff Roberson /* Adjust bytes under management by UMA. */
3302e47807cSJeff Roberson static inline void
3312e47807cSJeff Roberson uma_total_dec(unsigned long size)
3322e47807cSJeff Roberson {
3332e47807cSJeff Roberson 
3342e47807cSJeff Roberson 	atomic_subtract_long(&uma_kmem_total, size);
3352e47807cSJeff Roberson }
3362e47807cSJeff Roberson 
3372e47807cSJeff Roberson static inline void
3382e47807cSJeff Roberson uma_total_inc(unsigned long size)
3392e47807cSJeff Roberson {
3402e47807cSJeff Roberson 
3412e47807cSJeff Roberson 	if (atomic_fetchadd_long(&uma_kmem_total, size) > uma_kmem_limit)
3422e47807cSJeff Roberson 		uma_reclaim_wakeup();
3432e47807cSJeff Roberson }
3442e47807cSJeff Roberson 
34586bbae32SJeff Roberson /*
34686bbae32SJeff Roberson  * This routine checks to see whether or not it's safe to enable buckets.
34786bbae32SJeff Roberson  */
34886bbae32SJeff Roberson static void
34986bbae32SJeff Roberson bucket_enable(void)
35086bbae32SJeff Roberson {
351251386b4SMaksim Yevmenkin 	bucketdisable = vm_page_count_min();
35286bbae32SJeff Roberson }
35386bbae32SJeff Roberson 
354dc2c7965SRobert Watson /*
355dc2c7965SRobert Watson  * Initialize bucket_zones, the array of zones of buckets of various sizes.
356dc2c7965SRobert Watson  *
357dc2c7965SRobert Watson  * For each zone, calculate the memory required for each bucket, consisting
358fc03d22bSJeff Roberson  * of the header and an array of pointers.
359dc2c7965SRobert Watson  */
360cae33c14SJeff Roberson static void
361cae33c14SJeff Roberson bucket_init(void)
362cae33c14SJeff Roberson {
363cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
364cae33c14SJeff Roberson 	int size;
365cae33c14SJeff Roberson 
366d74e6a1dSAlan Cox 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
367cae33c14SJeff Roberson 		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
368cae33c14SJeff Roberson 		size += sizeof(void *) * ubz->ubz_entries;
369cae33c14SJeff Roberson 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
370e20a199fSJeff Roberson 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
371ab3185d1SJeff Roberson 		    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET | UMA_ZONE_NUMA);
372cae33c14SJeff Roberson 	}
373cae33c14SJeff Roberson }
374cae33c14SJeff Roberson 
375dc2c7965SRobert Watson /*
376dc2c7965SRobert Watson  * Given a desired number of entries for a bucket, return the zone from which
377dc2c7965SRobert Watson  * to allocate the bucket.
378dc2c7965SRobert Watson  */
379dc2c7965SRobert Watson static struct uma_bucket_zone *
380dc2c7965SRobert Watson bucket_zone_lookup(int entries)
381dc2c7965SRobert Watson {
382fc03d22bSJeff Roberson 	struct uma_bucket_zone *ubz;
383dc2c7965SRobert Watson 
384fc03d22bSJeff Roberson 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
385fc03d22bSJeff Roberson 		if (ubz->ubz_entries >= entries)
386fc03d22bSJeff Roberson 			return (ubz);
387fc03d22bSJeff Roberson 	ubz--;
388fc03d22bSJeff Roberson 	return (ubz);
389fc03d22bSJeff Roberson }
390fc03d22bSJeff Roberson 
391003cf08bSMark Johnston static struct uma_bucket_zone *
392003cf08bSMark Johnston bucket_zone_max(uma_zone_t zone, int nitems)
393003cf08bSMark Johnston {
394003cf08bSMark Johnston 	struct uma_bucket_zone *ubz;
395003cf08bSMark Johnston 	int bpcpu;
396003cf08bSMark Johnston 
397003cf08bSMark Johnston 	bpcpu = 2;
398003cf08bSMark Johnston #ifdef UMA_XDOMAIN
399003cf08bSMark Johnston 	if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
400003cf08bSMark Johnston 		/* Count the cross-domain bucket. */
401003cf08bSMark Johnston 		bpcpu++;
402003cf08bSMark Johnston #endif
403003cf08bSMark Johnston 
404003cf08bSMark Johnston 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
405003cf08bSMark Johnston 		if (ubz->ubz_entries * bpcpu * mp_ncpus > nitems)
406003cf08bSMark Johnston 			break;
407003cf08bSMark Johnston 	if (ubz == &bucket_zones[0])
408003cf08bSMark Johnston 		ubz = NULL;
409003cf08bSMark Johnston 	else
410003cf08bSMark Johnston 		ubz--;
411003cf08bSMark Johnston 	return (ubz);
412003cf08bSMark Johnston }
413003cf08bSMark Johnston 
414fc03d22bSJeff Roberson static int
415fc03d22bSJeff Roberson bucket_select(int size)
416fc03d22bSJeff Roberson {
417fc03d22bSJeff Roberson 	struct uma_bucket_zone *ubz;
418fc03d22bSJeff Roberson 
419fc03d22bSJeff Roberson 	ubz = &bucket_zones[0];
420fc03d22bSJeff Roberson 	if (size > ubz->ubz_maxsize)
421fc03d22bSJeff Roberson 		return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
422fc03d22bSJeff Roberson 
423fc03d22bSJeff Roberson 	for (; ubz->ubz_entries != 0; ubz++)
424fc03d22bSJeff Roberson 		if (ubz->ubz_maxsize < size)
425fc03d22bSJeff Roberson 			break;
426fc03d22bSJeff Roberson 	ubz--;
427fc03d22bSJeff Roberson 	return (ubz->ubz_entries);
428dc2c7965SRobert Watson }
429dc2c7965SRobert Watson 
430cae33c14SJeff Roberson static uma_bucket_t
4316fd34d6fSJeff Roberson bucket_alloc(uma_zone_t zone, void *udata, int flags)
432cae33c14SJeff Roberson {
433cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
434cae33c14SJeff Roberson 	uma_bucket_t bucket;
435cae33c14SJeff Roberson 
436cae33c14SJeff Roberson 	/*
437cae33c14SJeff Roberson 	 * This is to stop us from allocating per cpu buckets while we're
4383803b26bSDag-Erling Smørgrav 	 * running out of vm.boot_pages.  Otherwise, we would exhaust the
439cae33c14SJeff Roberson 	 * boot pages.  This also prevents us from allocating buckets in
440cae33c14SJeff Roberson 	 * low memory situations.
441cae33c14SJeff Roberson 	 */
442cae33c14SJeff Roberson 	if (bucketdisable)
443cae33c14SJeff Roberson 		return (NULL);
4446fd34d6fSJeff Roberson 	/*
4456fd34d6fSJeff Roberson 	 * To limit bucket recursion we store the original zone flags
4466fd34d6fSJeff Roberson 	 * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
4476fd34d6fSJeff Roberson 	 * NOVM flag to persist even through deep recursions.  We also
4486fd34d6fSJeff Roberson 	 * store ZFLAG_BUCKET once we have recursed attempting to allocate
4496fd34d6fSJeff Roberson 	 * a bucket for a bucket zone so we do not allow infinite bucket
4506fd34d6fSJeff Roberson 	 * recursion.  This cookie will even persist to frees of unused
4516fd34d6fSJeff Roberson 	 * buckets via the allocation path or bucket allocations in the
4526fd34d6fSJeff Roberson 	 * free path.
4536fd34d6fSJeff Roberson 	 */
4546fd34d6fSJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
4556fd34d6fSJeff Roberson 		udata = (void *)(uintptr_t)zone->uz_flags;
456e8a720feSAlexander Motin 	else {
457e8a720feSAlexander Motin 		if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
458e8a720feSAlexander Motin 			return (NULL);
4596fd34d6fSJeff Roberson 		udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
460e8a720feSAlexander Motin 	}
4616fd34d6fSJeff Roberson 	if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
462af526374SJeff Roberson 		flags |= M_NOVM;
463*20a4e154SJeff Roberson 	ubz = bucket_zone_lookup(zone->uz_bucket_size);
46420d3ab87SAlexander Motin 	if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
46520d3ab87SAlexander Motin 		ubz++;
4666fd34d6fSJeff Roberson 	bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
467cae33c14SJeff Roberson 	if (bucket) {
468cae33c14SJeff Roberson #ifdef INVARIANTS
469cae33c14SJeff Roberson 		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
470cae33c14SJeff Roberson #endif
471cae33c14SJeff Roberson 		bucket->ub_cnt = 0;
472cae33c14SJeff Roberson 		bucket->ub_entries = ubz->ubz_entries;
473cae33c14SJeff Roberson 	}
474cae33c14SJeff Roberson 
475cae33c14SJeff Roberson 	return (bucket);
476cae33c14SJeff Roberson }
477cae33c14SJeff Roberson 
478cae33c14SJeff Roberson static void
4796fd34d6fSJeff Roberson bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
480cae33c14SJeff Roberson {
481cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
482cae33c14SJeff Roberson 
483fc03d22bSJeff Roberson 	KASSERT(bucket->ub_cnt == 0,
484fc03d22bSJeff Roberson 	    ("bucket_free: Freeing a non free bucket."));
4856fd34d6fSJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
4866fd34d6fSJeff Roberson 		udata = (void *)(uintptr_t)zone->uz_flags;
487dc2c7965SRobert Watson 	ubz = bucket_zone_lookup(bucket->ub_entries);
4886fd34d6fSJeff Roberson 	uma_zfree_arg(ubz->ubz_zone, bucket, udata);
489cae33c14SJeff Roberson }
490cae33c14SJeff Roberson 
491cae33c14SJeff Roberson static void
492cae33c14SJeff Roberson bucket_zone_drain(void)
493cae33c14SJeff Roberson {
494cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
495cae33c14SJeff Roberson 
496cae33c14SJeff Roberson 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
49708cfa56eSMark Johnston 		uma_zone_reclaim(ubz->ubz_zone, UMA_RECLAIM_DRAIN);
498cae33c14SJeff Roberson }
499cae33c14SJeff Roberson 
50008cfa56eSMark Johnston /*
50108cfa56eSMark Johnston  * Attempt to satisfy an allocation by retrieving a full bucket from one of the
50208cfa56eSMark Johnston  * zone's caches.
50308cfa56eSMark Johnston  */
5040f9b7bf3SMark Johnston static uma_bucket_t
50508cfa56eSMark Johnston zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom)
5060f9b7bf3SMark Johnston {
5070f9b7bf3SMark Johnston 	uma_bucket_t bucket;
5080f9b7bf3SMark Johnston 
5090f9b7bf3SMark Johnston 	ZONE_LOCK_ASSERT(zone);
5100f9b7bf3SMark Johnston 
51108cfa56eSMark Johnston 	if ((bucket = TAILQ_FIRST(&zdom->uzd_buckets)) != NULL) {
5120f9b7bf3SMark Johnston 		MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
51308cfa56eSMark Johnston 		TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
5140f9b7bf3SMark Johnston 		zdom->uzd_nitems -= bucket->ub_cnt;
51508cfa56eSMark Johnston 		if (zdom->uzd_imin > zdom->uzd_nitems)
5160f9b7bf3SMark Johnston 			zdom->uzd_imin = zdom->uzd_nitems;
517bb15d1c7SGleb Smirnoff 		zone->uz_bkt_count -= bucket->ub_cnt;
5180f9b7bf3SMark Johnston 	}
5190f9b7bf3SMark Johnston 	return (bucket);
5200f9b7bf3SMark Johnston }
5210f9b7bf3SMark Johnston 
52208cfa56eSMark Johnston /*
52308cfa56eSMark Johnston  * Insert a full bucket into the specified cache.  The "ws" parameter indicates
52408cfa56eSMark Johnston  * whether the bucket's contents should be counted as part of the zone's working
52508cfa56eSMark Johnston  * set.
52608cfa56eSMark Johnston  */
5270f9b7bf3SMark Johnston static void
5280f9b7bf3SMark Johnston zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket,
5290f9b7bf3SMark Johnston     const bool ws)
5300f9b7bf3SMark Johnston {
5310f9b7bf3SMark Johnston 
5320f9b7bf3SMark Johnston 	ZONE_LOCK_ASSERT(zone);
53308034d10SKonstantin Belousov 	KASSERT(!ws || zone->uz_bkt_count < zone->uz_bkt_max,
53408034d10SKonstantin Belousov 	    ("%s: zone %p overflow", __func__, zone));
5350f9b7bf3SMark Johnston 
53608cfa56eSMark Johnston 	if (ws)
53708cfa56eSMark Johnston 		TAILQ_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
53808cfa56eSMark Johnston 	else
53908cfa56eSMark Johnston 		TAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link);
5400f9b7bf3SMark Johnston 	zdom->uzd_nitems += bucket->ub_cnt;
5410f9b7bf3SMark Johnston 	if (ws && zdom->uzd_imax < zdom->uzd_nitems)
5420f9b7bf3SMark Johnston 		zdom->uzd_imax = zdom->uzd_nitems;
543bb15d1c7SGleb Smirnoff 	zone->uz_bkt_count += bucket->ub_cnt;
5440f9b7bf3SMark Johnston }
5450f9b7bf3SMark Johnston 
5462f891cd5SPawel Jakub Dawidek static void
5472f891cd5SPawel Jakub Dawidek zone_log_warning(uma_zone_t zone)
5482f891cd5SPawel Jakub Dawidek {
5492f891cd5SPawel Jakub Dawidek 	static const struct timeval warninterval = { 300, 0 };
5502f891cd5SPawel Jakub Dawidek 
5512f891cd5SPawel Jakub Dawidek 	if (!zone_warnings || zone->uz_warning == NULL)
5522f891cd5SPawel Jakub Dawidek 		return;
5532f891cd5SPawel Jakub Dawidek 
5542f891cd5SPawel Jakub Dawidek 	if (ratecheck(&zone->uz_ratecheck, &warninterval))
5552f891cd5SPawel Jakub Dawidek 		printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
5562f891cd5SPawel Jakub Dawidek }
5572f891cd5SPawel Jakub Dawidek 
55854503a13SJonathan T. Looney static inline void
55954503a13SJonathan T. Looney zone_maxaction(uma_zone_t zone)
56054503a13SJonathan T. Looney {
561e60b2fcbSGleb Smirnoff 
562e60b2fcbSGleb Smirnoff 	if (zone->uz_maxaction.ta_func != NULL)
563e60b2fcbSGleb Smirnoff 		taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
56454503a13SJonathan T. Looney }
56554503a13SJonathan T. Looney 
5668355f576SJeff Roberson /*
5678355f576SJeff Roberson  * Routine called by timeout which is used to fire off some time interval
5689643769aSJeff Roberson  * based calculations.  (stats, hash size, etc.)
5698355f576SJeff Roberson  *
5708355f576SJeff Roberson  * Arguments:
5718355f576SJeff Roberson  *	arg   Unused
5728355f576SJeff Roberson  *
5738355f576SJeff Roberson  * Returns:
5748355f576SJeff Roberson  *	Nothing
5758355f576SJeff Roberson  */
5768355f576SJeff Roberson static void
5778355f576SJeff Roberson uma_timeout(void *unused)
5788355f576SJeff Roberson {
57986bbae32SJeff Roberson 	bucket_enable();
580*20a4e154SJeff Roberson 	zone_foreach(zone_timeout, NULL);
5818355f576SJeff Roberson 
5828355f576SJeff Roberson 	/* Reschedule this event */
5839643769aSJeff Roberson 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
5848355f576SJeff Roberson }
5858355f576SJeff Roberson 
5868355f576SJeff Roberson /*
5870f9b7bf3SMark Johnston  * Update the working set size estimate for the zone's bucket cache.
5880f9b7bf3SMark Johnston  * The constants chosen here are somewhat arbitrary.  With an update period of
5890f9b7bf3SMark Johnston  * 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the
5900f9b7bf3SMark Johnston  * last 100s.
5910f9b7bf3SMark Johnston  */
5920f9b7bf3SMark Johnston static void
5930f9b7bf3SMark Johnston zone_domain_update_wss(uma_zone_domain_t zdom)
5940f9b7bf3SMark Johnston {
5950f9b7bf3SMark Johnston 	long wss;
5960f9b7bf3SMark Johnston 
5970f9b7bf3SMark Johnston 	MPASS(zdom->uzd_imax >= zdom->uzd_imin);
5980f9b7bf3SMark Johnston 	wss = zdom->uzd_imax - zdom->uzd_imin;
5990f9b7bf3SMark Johnston 	zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
60008cfa56eSMark Johnston 	zdom->uzd_wss = (4 * wss + zdom->uzd_wss) / 5;
6010f9b7bf3SMark Johnston }
6020f9b7bf3SMark Johnston 
6030f9b7bf3SMark Johnston /*
6049643769aSJeff Roberson  * Routine to perform timeout driven calculations.  This expands the
6059643769aSJeff Roberson  * hashes and does per cpu statistics aggregation.
6068355f576SJeff Roberson  *
607e20a199fSJeff Roberson  *  Returns nothing.
6088355f576SJeff Roberson  */
6098355f576SJeff Roberson static void
610*20a4e154SJeff Roberson zone_timeout(uma_zone_t zone, void *unused)
6118355f576SJeff Roberson {
61208034d10SKonstantin Belousov 	uma_keg_t keg;
6133b2f2cb8SAlexander Motin 	u_int slabs;
6148355f576SJeff Roberson 
61571353f7aSJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_HASH) == 0)
61608034d10SKonstantin Belousov 		goto update_wss;
61708034d10SKonstantin Belousov 
61808034d10SKonstantin Belousov 	keg = zone->uz_keg;
619e20a199fSJeff Roberson 	KEG_LOCK(keg);
6208355f576SJeff Roberson 	/*
621e20a199fSJeff Roberson 	 * Expand the keg hash table.
6228355f576SJeff Roberson 	 *
6238355f576SJeff Roberson 	 * This is done if the number of slabs is larger than the hash size.
6248355f576SJeff Roberson 	 * What I'm trying to do here is completely reduce collisions.  This
6258355f576SJeff Roberson 	 * may be a little aggressive.  Should I allow for two collisions max?
6268355f576SJeff Roberson 	 */
627099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_HASH &&
6283b2f2cb8SAlexander Motin 	    (slabs = keg->uk_pages / keg->uk_ppera) >
6293b2f2cb8SAlexander Motin 	     keg->uk_hash.uh_hashsize) {
6300aef6126SJeff Roberson 		struct uma_hash newhash;
6310aef6126SJeff Roberson 		struct uma_hash oldhash;
6320aef6126SJeff Roberson 		int ret;
6335300d9ddSJeff Roberson 
6340aef6126SJeff Roberson 		/*
6350aef6126SJeff Roberson 		 * This is so involved because allocating and freeing
636e20a199fSJeff Roberson 		 * while the keg lock is held will lead to deadlock.
6370aef6126SJeff Roberson 		 * I have to do everything in stages and check for
6380aef6126SJeff Roberson 		 * races.
6390aef6126SJeff Roberson 		 */
640e20a199fSJeff Roberson 		KEG_UNLOCK(keg);
6413b2f2cb8SAlexander Motin 		ret = hash_alloc(&newhash, 1 << fls(slabs));
642e20a199fSJeff Roberson 		KEG_LOCK(keg);
6430aef6126SJeff Roberson 		if (ret) {
644099a0e58SBosko Milekic 			if (hash_expand(&keg->uk_hash, &newhash)) {
645099a0e58SBosko Milekic 				oldhash = keg->uk_hash;
646099a0e58SBosko Milekic 				keg->uk_hash = newhash;
6470aef6126SJeff Roberson 			} else
6480aef6126SJeff Roberson 				oldhash = newhash;
6490aef6126SJeff Roberson 
650e20a199fSJeff Roberson 			KEG_UNLOCK(keg);
6510aef6126SJeff Roberson 			hash_free(&oldhash);
652a1dff920SDavide Italiano 			return;
6530aef6126SJeff Roberson 		}
6545300d9ddSJeff Roberson 	}
65508cfa56eSMark Johnston 	KEG_UNLOCK(keg);
656e20a199fSJeff Roberson 
65708034d10SKonstantin Belousov update_wss:
65808cfa56eSMark Johnston 	ZONE_LOCK(zone);
659bb15d1c7SGleb Smirnoff 	for (int i = 0; i < vm_ndomains; i++)
6600f9b7bf3SMark Johnston 		zone_domain_update_wss(&zone->uz_domain[i]);
66108cfa56eSMark Johnston 	ZONE_UNLOCK(zone);
6628355f576SJeff Roberson }
6638355f576SJeff Roberson 
6648355f576SJeff Roberson /*
6655300d9ddSJeff Roberson  * Allocate and zero fill the next sized hash table from the appropriate
6665300d9ddSJeff Roberson  * backing store.
6675300d9ddSJeff Roberson  *
6685300d9ddSJeff Roberson  * Arguments:
6690aef6126SJeff Roberson  *	hash  A new hash structure with the old hash size in uh_hashsize
6705300d9ddSJeff Roberson  *
6715300d9ddSJeff Roberson  * Returns:
672763df3ecSPedro F. Giffuni  *	1 on success and 0 on failure.
6735300d9ddSJeff Roberson  */
67437c84183SPoul-Henning Kamp static int
6753b2f2cb8SAlexander Motin hash_alloc(struct uma_hash *hash, u_int size)
6765300d9ddSJeff Roberson {
67759568a0eSAlexander Motin 	size_t alloc;
6785300d9ddSJeff Roberson 
6793b2f2cb8SAlexander Motin 	KASSERT(powerof2(size), ("hash size must be power of 2"));
6803b2f2cb8SAlexander Motin 	if (size > UMA_HASH_SIZE_INIT)  {
6813b2f2cb8SAlexander Motin 		hash->uh_hashsize = size;
6820aef6126SJeff Roberson 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
6830aef6126SJeff Roberson 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
684961647dfSJeff Roberson 		    M_UMAHASH, M_NOWAIT);
6855300d9ddSJeff Roberson 	} else {
6860aef6126SJeff Roberson 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
687e20a199fSJeff Roberson 		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
688ab3185d1SJeff Roberson 		    UMA_ANYDOMAIN, M_WAITOK);
6890aef6126SJeff Roberson 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
6905300d9ddSJeff Roberson 	}
6910aef6126SJeff Roberson 	if (hash->uh_slab_hash) {
6920aef6126SJeff Roberson 		bzero(hash->uh_slab_hash, alloc);
6930aef6126SJeff Roberson 		hash->uh_hashmask = hash->uh_hashsize - 1;
6940aef6126SJeff Roberson 		return (1);
6950aef6126SJeff Roberson 	}
6965300d9ddSJeff Roberson 
6970aef6126SJeff Roberson 	return (0);
6985300d9ddSJeff Roberson }
6995300d9ddSJeff Roberson 
7005300d9ddSJeff Roberson /*
70164f051e9SJeff Roberson  * Expands the hash table for HASH zones.  This is done from zone_timeout
70264f051e9SJeff Roberson  * to reduce collisions.  This must not be done in the regular allocation
70364f051e9SJeff Roberson  * path, otherwise, we can recurse on the vm while allocating pages.
7048355f576SJeff Roberson  *
7058355f576SJeff Roberson  * Arguments:
7060aef6126SJeff Roberson  *	oldhash  The hash you want to expand
7070aef6126SJeff Roberson  *	newhash  The hash structure for the new table
7088355f576SJeff Roberson  *
7098355f576SJeff Roberson  * Returns:
7108355f576SJeff Roberson  *	Nothing
7118355f576SJeff Roberson  *
7128355f576SJeff Roberson  * Discussion:
7138355f576SJeff Roberson  */
7140aef6126SJeff Roberson static int
7150aef6126SJeff Roberson hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
7168355f576SJeff Roberson {
7178355f576SJeff Roberson 	uma_slab_t slab;
7186929b7d1SPedro F. Giffuni 	u_int hval;
7196929b7d1SPedro F. Giffuni 	u_int idx;
7208355f576SJeff Roberson 
7210aef6126SJeff Roberson 	if (!newhash->uh_slab_hash)
7220aef6126SJeff Roberson 		return (0);
7238355f576SJeff Roberson 
7240aef6126SJeff Roberson 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
7250aef6126SJeff Roberson 		return (0);
7268355f576SJeff Roberson 
7278355f576SJeff Roberson 	/*
7288355f576SJeff Roberson 	 * I need to investigate hash algorithms for resizing without a
7298355f576SJeff Roberson 	 * full rehash.
7308355f576SJeff Roberson 	 */
7318355f576SJeff Roberson 
7326929b7d1SPedro F. Giffuni 	for (idx = 0; idx < oldhash->uh_hashsize; idx++)
7336929b7d1SPedro F. Giffuni 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[idx])) {
7346929b7d1SPedro F. Giffuni 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[idx]);
7356929b7d1SPedro F. Giffuni 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[idx], us_hlink);
7360aef6126SJeff Roberson 			hval = UMA_HASH(newhash, slab->us_data);
7370aef6126SJeff Roberson 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
7380aef6126SJeff Roberson 			    slab, us_hlink);
7398355f576SJeff Roberson 		}
7408355f576SJeff Roberson 
7410aef6126SJeff Roberson 	return (1);
7429c2cd7e5SJeff Roberson }
7439c2cd7e5SJeff Roberson 
7445300d9ddSJeff Roberson /*
7455300d9ddSJeff Roberson  * Free the hash bucket to the appropriate backing store.
7465300d9ddSJeff Roberson  *
7475300d9ddSJeff Roberson  * Arguments:
7485300d9ddSJeff Roberson  *	slab_hash  The hash bucket we're freeing
7495300d9ddSJeff Roberson  *	hashsize   The number of entries in that hash bucket
7505300d9ddSJeff Roberson  *
7515300d9ddSJeff Roberson  * Returns:
7525300d9ddSJeff Roberson  *	Nothing
7535300d9ddSJeff Roberson  */
7549c2cd7e5SJeff Roberson static void
7550aef6126SJeff Roberson hash_free(struct uma_hash *hash)
7569c2cd7e5SJeff Roberson {
7570aef6126SJeff Roberson 	if (hash->uh_slab_hash == NULL)
7580aef6126SJeff Roberson 		return;
7590aef6126SJeff Roberson 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
7600095a784SJeff Roberson 		zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
7618355f576SJeff Roberson 	else
762961647dfSJeff Roberson 		free(hash->uh_slab_hash, M_UMAHASH);
7638355f576SJeff Roberson }
7648355f576SJeff Roberson 
7658355f576SJeff Roberson /*
7668355f576SJeff Roberson  * Frees all outstanding items in a bucket
7678355f576SJeff Roberson  *
7688355f576SJeff Roberson  * Arguments:
7698355f576SJeff Roberson  *	zone   The zone to free to, must be unlocked.
7708355f576SJeff Roberson  *	bucket The free/alloc bucket with items, cpu queue must be locked.
7718355f576SJeff Roberson  *
7728355f576SJeff Roberson  * Returns:
7738355f576SJeff Roberson  *	Nothing
7748355f576SJeff Roberson  */
7758355f576SJeff Roberson 
7768355f576SJeff Roberson static void
7778355f576SJeff Roberson bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
7788355f576SJeff Roberson {
7790095a784SJeff Roberson 	int i;
7808355f576SJeff Roberson 
7818355f576SJeff Roberson 	if (bucket == NULL)
7828355f576SJeff Roberson 		return;
7838355f576SJeff Roberson 
7840095a784SJeff Roberson 	if (zone->uz_fini)
7850095a784SJeff Roberson 		for (i = 0; i < bucket->ub_cnt; i++)
7860095a784SJeff Roberson 			zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
7870095a784SJeff Roberson 	zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
788bb45b411SGleb Smirnoff 	if (zone->uz_max_items > 0) {
789bb15d1c7SGleb Smirnoff 		ZONE_LOCK(zone);
790bb15d1c7SGleb Smirnoff 		zone->uz_items -= bucket->ub_cnt;
791bb15d1c7SGleb Smirnoff 		if (zone->uz_sleepers && zone->uz_items < zone->uz_max_items)
792bb15d1c7SGleb Smirnoff 			wakeup_one(zone);
793bb15d1c7SGleb Smirnoff 		ZONE_UNLOCK(zone);
794bb45b411SGleb Smirnoff 	}
7950095a784SJeff Roberson 	bucket->ub_cnt = 0;
7968355f576SJeff Roberson }
7978355f576SJeff Roberson 
7988355f576SJeff Roberson /*
7998355f576SJeff Roberson  * Drains the per cpu caches for a zone.
8008355f576SJeff Roberson  *
8015d1ae027SRobert Watson  * NOTE: This may only be called while the zone is being turn down, and not
8025d1ae027SRobert Watson  * during normal operation.  This is necessary in order that we do not have
8035d1ae027SRobert Watson  * to migrate CPUs to drain the per-CPU caches.
8045d1ae027SRobert Watson  *
8058355f576SJeff Roberson  * Arguments:
8068355f576SJeff Roberson  *	zone     The zone to drain, must be unlocked.
8078355f576SJeff Roberson  *
8088355f576SJeff Roberson  * Returns:
8098355f576SJeff Roberson  *	Nothing
8108355f576SJeff Roberson  */
8118355f576SJeff Roberson static void
8129643769aSJeff Roberson cache_drain(uma_zone_t zone)
8138355f576SJeff Roberson {
8148355f576SJeff Roberson 	uma_cache_t cache;
8158355f576SJeff Roberson 	int cpu;
8168355f576SJeff Roberson 
8178355f576SJeff Roberson 	/*
8185d1ae027SRobert Watson 	 * XXX: It is safe to not lock the per-CPU caches, because we're
8195d1ae027SRobert Watson 	 * tearing down the zone anyway.  I.e., there will be no further use
8205d1ae027SRobert Watson 	 * of the caches at this point.
8215d1ae027SRobert Watson 	 *
8225d1ae027SRobert Watson 	 * XXX: It would good to be able to assert that the zone is being
8235d1ae027SRobert Watson 	 * torn down to prevent improper use of cache_drain().
8245d1ae027SRobert Watson 	 *
82508cfa56eSMark Johnston 	 * XXX: We lock the zone before passing into bucket_cache_reclaim() as
8265d1ae027SRobert Watson 	 * it is used elsewhere.  Should the tear-down path be made special
8275d1ae027SRobert Watson 	 * there in some form?
8288355f576SJeff Roberson 	 */
8293aa6d94eSJohn Baldwin 	CPU_FOREACH(cpu) {
8308355f576SJeff Roberson 		cache = &zone->uz_cpu[cpu];
8318355f576SJeff Roberson 		bucket_drain(zone, cache->uc_allocbucket);
832174ab450SBosko Milekic 		if (cache->uc_allocbucket != NULL)
8336fd34d6fSJeff Roberson 			bucket_free(zone, cache->uc_allocbucket, NULL);
834c1685086SJeff Roberson 		cache->uc_allocbucket = NULL;
835c1685086SJeff Roberson 		bucket_drain(zone, cache->uc_freebucket);
836174ab450SBosko Milekic 		if (cache->uc_freebucket != NULL)
8376fd34d6fSJeff Roberson 			bucket_free(zone, cache->uc_freebucket, NULL);
838c1685086SJeff Roberson 		cache->uc_freebucket = NULL;
839c1685086SJeff Roberson 		bucket_drain(zone, cache->uc_crossbucket);
840c1685086SJeff Roberson 		if (cache->uc_crossbucket != NULL)
841c1685086SJeff Roberson 			bucket_free(zone, cache->uc_crossbucket, NULL);
842c1685086SJeff Roberson 		cache->uc_crossbucket = NULL;
843d56368d7SBosko Milekic 	}
844aaa8bb16SJeff Roberson 	ZONE_LOCK(zone);
84508cfa56eSMark Johnston 	bucket_cache_reclaim(zone, true);
846aaa8bb16SJeff Roberson 	ZONE_UNLOCK(zone);
847aaa8bb16SJeff Roberson }
848aaa8bb16SJeff Roberson 
849a2de44abSAlexander Motin static void
850*20a4e154SJeff Roberson cache_shrink(uma_zone_t zone, void *unused)
851a2de44abSAlexander Motin {
852a2de44abSAlexander Motin 
853a2de44abSAlexander Motin 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
854a2de44abSAlexander Motin 		return;
855a2de44abSAlexander Motin 
856a2de44abSAlexander Motin 	ZONE_LOCK(zone);
857*20a4e154SJeff Roberson 	zone->uz_bucket_size =
858*20a4e154SJeff Roberson 	    (zone->uz_bucket_size_min + zone->uz_bucket_size) / 2;
859a2de44abSAlexander Motin 	ZONE_UNLOCK(zone);
860a2de44abSAlexander Motin }
861a2de44abSAlexander Motin 
862a2de44abSAlexander Motin static void
863*20a4e154SJeff Roberson cache_drain_safe_cpu(uma_zone_t zone, void *unused)
864a2de44abSAlexander Motin {
865a2de44abSAlexander Motin 	uma_cache_t cache;
866c1685086SJeff Roberson 	uma_bucket_t b1, b2, b3;
867ab3185d1SJeff Roberson 	int domain;
868a2de44abSAlexander Motin 
869a2de44abSAlexander Motin 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
870a2de44abSAlexander Motin 		return;
871a2de44abSAlexander Motin 
872c1685086SJeff Roberson 	b1 = b2 = b3 = NULL;
873a2de44abSAlexander Motin 	ZONE_LOCK(zone);
874a2de44abSAlexander Motin 	critical_enter();
875ab3185d1SJeff Roberson 	if (zone->uz_flags & UMA_ZONE_NUMA)
876ab3185d1SJeff Roberson 		domain = PCPU_GET(domain);
877ab3185d1SJeff Roberson 	else
878ab3185d1SJeff Roberson 		domain = 0;
879a2de44abSAlexander Motin 	cache = &zone->uz_cpu[curcpu];
880a2de44abSAlexander Motin 	if (cache->uc_allocbucket) {
8818a8d9d14SAlexander Motin 		if (cache->uc_allocbucket->ub_cnt != 0)
8820f9b7bf3SMark Johnston 			zone_put_bucket(zone, &zone->uz_domain[domain],
8830f9b7bf3SMark Johnston 			    cache->uc_allocbucket, false);
8848a8d9d14SAlexander Motin 		else
8858a8d9d14SAlexander Motin 			b1 = cache->uc_allocbucket;
886a2de44abSAlexander Motin 		cache->uc_allocbucket = NULL;
887a2de44abSAlexander Motin 	}
888a2de44abSAlexander Motin 	if (cache->uc_freebucket) {
8898a8d9d14SAlexander Motin 		if (cache->uc_freebucket->ub_cnt != 0)
8900f9b7bf3SMark Johnston 			zone_put_bucket(zone, &zone->uz_domain[domain],
8910f9b7bf3SMark Johnston 			    cache->uc_freebucket, false);
8928a8d9d14SAlexander Motin 		else
8938a8d9d14SAlexander Motin 			b2 = cache->uc_freebucket;
894a2de44abSAlexander Motin 		cache->uc_freebucket = NULL;
895a2de44abSAlexander Motin 	}
896c1685086SJeff Roberson 	b3 = cache->uc_crossbucket;
897c1685086SJeff Roberson 	cache->uc_crossbucket = NULL;
898a2de44abSAlexander Motin 	critical_exit();
899a2de44abSAlexander Motin 	ZONE_UNLOCK(zone);
9008a8d9d14SAlexander Motin 	if (b1)
9018a8d9d14SAlexander Motin 		bucket_free(zone, b1, NULL);
9028a8d9d14SAlexander Motin 	if (b2)
9038a8d9d14SAlexander Motin 		bucket_free(zone, b2, NULL);
904c1685086SJeff Roberson 	if (b3) {
905c1685086SJeff Roberson 		bucket_drain(zone, b3);
906c1685086SJeff Roberson 		bucket_free(zone, b3, NULL);
907c1685086SJeff Roberson 	}
908a2de44abSAlexander Motin }
909a2de44abSAlexander Motin 
910a2de44abSAlexander Motin /*
911a2de44abSAlexander Motin  * Safely drain per-CPU caches of a zone(s) to alloc bucket.
912a2de44abSAlexander Motin  * This is an expensive call because it needs to bind to all CPUs
913a2de44abSAlexander Motin  * one by one and enter a critical section on each of them in order
914a2de44abSAlexander Motin  * to safely access their cache buckets.
915a2de44abSAlexander Motin  * Zone lock must not be held on call this function.
916a2de44abSAlexander Motin  */
917a2de44abSAlexander Motin static void
91808cfa56eSMark Johnston pcpu_cache_drain_safe(uma_zone_t zone)
919a2de44abSAlexander Motin {
920a2de44abSAlexander Motin 	int cpu;
921a2de44abSAlexander Motin 
922a2de44abSAlexander Motin 	/*
923a2de44abSAlexander Motin 	 * Polite bucket sizes shrinking was not enouth, shrink aggressively.
924a2de44abSAlexander Motin 	 */
925a2de44abSAlexander Motin 	if (zone)
926*20a4e154SJeff Roberson 		cache_shrink(zone, NULL);
927a2de44abSAlexander Motin 	else
928*20a4e154SJeff Roberson 		zone_foreach(cache_shrink, NULL);
929a2de44abSAlexander Motin 
930a2de44abSAlexander Motin 	CPU_FOREACH(cpu) {
931a2de44abSAlexander Motin 		thread_lock(curthread);
932a2de44abSAlexander Motin 		sched_bind(curthread, cpu);
933a2de44abSAlexander Motin 		thread_unlock(curthread);
934a2de44abSAlexander Motin 
935a2de44abSAlexander Motin 		if (zone)
936*20a4e154SJeff Roberson 			cache_drain_safe_cpu(zone, NULL);
937a2de44abSAlexander Motin 		else
938*20a4e154SJeff Roberson 			zone_foreach(cache_drain_safe_cpu, NULL);
939a2de44abSAlexander Motin 	}
940a2de44abSAlexander Motin 	thread_lock(curthread);
941a2de44abSAlexander Motin 	sched_unbind(curthread);
942a2de44abSAlexander Motin 	thread_unlock(curthread);
943a2de44abSAlexander Motin }
944a2de44abSAlexander Motin 
945aaa8bb16SJeff Roberson /*
94608cfa56eSMark Johnston  * Reclaim cached buckets from a zone.  All buckets are reclaimed if the caller
94708cfa56eSMark Johnston  * requested a drain, otherwise the per-domain caches are trimmed to either
94808cfa56eSMark Johnston  * estimated working set size.
949aaa8bb16SJeff Roberson  */
950aaa8bb16SJeff Roberson static void
95108cfa56eSMark Johnston bucket_cache_reclaim(uma_zone_t zone, bool drain)
952aaa8bb16SJeff Roberson {
953ab3185d1SJeff Roberson 	uma_zone_domain_t zdom;
954aaa8bb16SJeff Roberson 	uma_bucket_t bucket;
95508cfa56eSMark Johnston 	long target, tofree;
956ab3185d1SJeff Roberson 	int i;
9578355f576SJeff Roberson 
958ab3185d1SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
959ab3185d1SJeff Roberson 		zdom = &zone->uz_domain[i];
96008cfa56eSMark Johnston 
96108cfa56eSMark Johnston 		/*
96208cfa56eSMark Johnston 		 * If we were asked to drain the zone, we are done only once
96308cfa56eSMark Johnston 		 * this bucket cache is empty.  Otherwise, we reclaim items in
96408cfa56eSMark Johnston 		 * excess of the zone's estimated working set size.  If the
96508cfa56eSMark Johnston 		 * difference nitems - imin is larger than the WSS estimate,
96608cfa56eSMark Johnston 		 * then the estimate will grow at the end of this interval and
96708cfa56eSMark Johnston 		 * we ignore the historical average.
96808cfa56eSMark Johnston 		 */
96908cfa56eSMark Johnston 		target = drain ? 0 : lmax(zdom->uzd_wss, zdom->uzd_nitems -
97008cfa56eSMark Johnston 		    zdom->uzd_imin);
97108cfa56eSMark Johnston 		while (zdom->uzd_nitems > target) {
97208cfa56eSMark Johnston 			bucket = TAILQ_LAST(&zdom->uzd_buckets, uma_bucketlist);
97308cfa56eSMark Johnston 			if (bucket == NULL)
97408cfa56eSMark Johnston 				break;
97508cfa56eSMark Johnston 			tofree = bucket->ub_cnt;
97608cfa56eSMark Johnston 			TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
97708cfa56eSMark Johnston 			zdom->uzd_nitems -= tofree;
97808cfa56eSMark Johnston 
97908cfa56eSMark Johnston 			/*
98008cfa56eSMark Johnston 			 * Shift the bounds of the current WSS interval to avoid
98108cfa56eSMark Johnston 			 * perturbing the estimate.
98208cfa56eSMark Johnston 			 */
98308cfa56eSMark Johnston 			zdom->uzd_imax -= lmin(zdom->uzd_imax, tofree);
98408cfa56eSMark Johnston 			zdom->uzd_imin -= lmin(zdom->uzd_imin, tofree);
98508cfa56eSMark Johnston 
9868355f576SJeff Roberson 			ZONE_UNLOCK(zone);
9878355f576SJeff Roberson 			bucket_drain(zone, bucket);
9886fd34d6fSJeff Roberson 			bucket_free(zone, bucket, NULL);
9898355f576SJeff Roberson 			ZONE_LOCK(zone);
9908355f576SJeff Roberson 		}
991ab3185d1SJeff Roberson 	}
992ace66b56SAlexander Motin 
993ace66b56SAlexander Motin 	/*
99408cfa56eSMark Johnston 	 * Shrink the zone bucket size to ensure that the per-CPU caches
99508cfa56eSMark Johnston 	 * don't grow too large.
996ace66b56SAlexander Motin 	 */
997*20a4e154SJeff Roberson 	if (zone->uz_bucket_size > zone->uz_bucket_size_min)
998*20a4e154SJeff Roberson 		zone->uz_bucket_size--;
9998355f576SJeff Roberson }
1000fc03d22bSJeff Roberson 
1001fc03d22bSJeff Roberson static void
1002fc03d22bSJeff Roberson keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
1003fc03d22bSJeff Roberson {
1004fc03d22bSJeff Roberson 	uint8_t *mem;
1005fc03d22bSJeff Roberson 	int i;
1006fc03d22bSJeff Roberson 	uint8_t flags;
1007fc03d22bSJeff Roberson 
10081431a748SGleb Smirnoff 	CTR4(KTR_UMA, "keg_free_slab keg %s(%p) slab %p, returning %d bytes",
10091431a748SGleb Smirnoff 	    keg->uk_name, keg, slab, PAGE_SIZE * keg->uk_ppera);
10101431a748SGleb Smirnoff 
1011fc03d22bSJeff Roberson 	mem = slab->us_data;
1012fc03d22bSJeff Roberson 	flags = slab->us_flags;
1013fc03d22bSJeff Roberson 	i = start;
1014fc03d22bSJeff Roberson 	if (keg->uk_fini != NULL) {
1015fc03d22bSJeff Roberson 		for (i--; i > -1; i--)
1016c5deaf04SGleb Smirnoff #ifdef INVARIANTS
1017c5deaf04SGleb Smirnoff 		/*
1018c5deaf04SGleb Smirnoff 		 * trash_fini implies that dtor was trash_dtor. trash_fini
1019c5deaf04SGleb Smirnoff 		 * would check that memory hasn't been modified since free,
1020c5deaf04SGleb Smirnoff 		 * which executed trash_dtor.
1021c5deaf04SGleb Smirnoff 		 * That's why we need to run uma_dbg_kskip() check here,
1022c5deaf04SGleb Smirnoff 		 * albeit we don't make skip check for other init/fini
1023c5deaf04SGleb Smirnoff 		 * invocations.
1024c5deaf04SGleb Smirnoff 		 */
1025c5deaf04SGleb Smirnoff 		if (!uma_dbg_kskip(keg, slab->us_data + (keg->uk_rsize * i)) ||
1026c5deaf04SGleb Smirnoff 		    keg->uk_fini != trash_fini)
1027c5deaf04SGleb Smirnoff #endif
1028fc03d22bSJeff Roberson 			keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
1029fc03d22bSJeff Roberson 			    keg->uk_size);
1030fc03d22bSJeff Roberson 	}
1031fc03d22bSJeff Roberson 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1032fc03d22bSJeff Roberson 		zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
1033fc03d22bSJeff Roberson 	keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
10342e47807cSJeff Roberson 	uma_total_dec(PAGE_SIZE * keg->uk_ppera);
10358355f576SJeff Roberson }
10368355f576SJeff Roberson 
10378355f576SJeff Roberson /*
1038e20a199fSJeff Roberson  * Frees pages from a keg back to the system.  This is done on demand from
10398355f576SJeff Roberson  * the pageout daemon.
10408355f576SJeff Roberson  *
1041e20a199fSJeff Roberson  * Returns nothing.
10428355f576SJeff Roberson  */
1043e20a199fSJeff Roberson static void
1044e20a199fSJeff Roberson keg_drain(uma_keg_t keg)
10458355f576SJeff Roberson {
10461e183df2SStefan Farfeleder 	struct slabhead freeslabs = { 0 };
1047ab3185d1SJeff Roberson 	uma_domain_t dom;
1048829be516SMark Johnston 	uma_slab_t slab, tmp;
1049ab3185d1SJeff Roberson 	int i;
10508355f576SJeff Roberson 
10518355f576SJeff Roberson 	/*
1052e20a199fSJeff Roberson 	 * We don't want to take pages from statically allocated kegs at this
10538355f576SJeff Roberson 	 * time
10548355f576SJeff Roberson 	 */
1055099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
10568355f576SJeff Roberson 		return;
10578355f576SJeff Roberson 
10581431a748SGleb Smirnoff 	CTR3(KTR_UMA, "keg_drain %s(%p) free items: %u",
10591431a748SGleb Smirnoff 	    keg->uk_name, keg, keg->uk_free);
1060e20a199fSJeff Roberson 	KEG_LOCK(keg);
1061099a0e58SBosko Milekic 	if (keg->uk_free == 0)
10628355f576SJeff Roberson 		goto finished;
10638355f576SJeff Roberson 
1064ab3185d1SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
1065ab3185d1SJeff Roberson 		dom = &keg->uk_domain[i];
1066ab3185d1SJeff Roberson 		LIST_FOREACH_SAFE(slab, &dom->ud_free_slab, us_link, tmp) {
1067829be516SMark Johnston 			/* We have nowhere to free these to. */
1068829be516SMark Johnston 			if (slab->us_flags & UMA_SLAB_BOOT)
10698355f576SJeff Roberson 				continue;
10708355f576SJeff Roberson 
10718355f576SJeff Roberson 			LIST_REMOVE(slab, us_link);
1072099a0e58SBosko Milekic 			keg->uk_pages -= keg->uk_ppera;
1073099a0e58SBosko Milekic 			keg->uk_free -= keg->uk_ipers;
1074713deb36SJeff Roberson 
1075099a0e58SBosko Milekic 			if (keg->uk_flags & UMA_ZONE_HASH)
1076ab3185d1SJeff Roberson 				UMA_HASH_REMOVE(&keg->uk_hash, slab,
1077ab3185d1SJeff Roberson 				    slab->us_data);
1078713deb36SJeff Roberson 
1079713deb36SJeff Roberson 			SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
1080713deb36SJeff Roberson 		}
1081ab3185d1SJeff Roberson 	}
1082ab3185d1SJeff Roberson 
1083713deb36SJeff Roberson finished:
1084e20a199fSJeff Roberson 	KEG_UNLOCK(keg);
1085713deb36SJeff Roberson 
1086713deb36SJeff Roberson 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
1087713deb36SJeff Roberson 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
10881645995bSKirk McKusick 		keg_free_slab(keg, slab, keg->uk_ipers);
10898355f576SJeff Roberson 	}
10908355f576SJeff Roberson }
10918355f576SJeff Roberson 
1092e20a199fSJeff Roberson static void
109308cfa56eSMark Johnston zone_reclaim(uma_zone_t zone, int waitok, bool drain)
1094e20a199fSJeff Roberson {
1095e20a199fSJeff Roberson 
10968355f576SJeff Roberson 	/*
1097e20a199fSJeff Roberson 	 * Set draining to interlock with zone_dtor() so we can release our
1098e20a199fSJeff Roberson 	 * locks as we go.  Only dtor() should do a WAITOK call since it
1099e20a199fSJeff Roberson 	 * is the only call that knows the structure will still be available
1100e20a199fSJeff Roberson 	 * when it wakes up.
1101e20a199fSJeff Roberson 	 */
1102e20a199fSJeff Roberson 	ZONE_LOCK(zone);
110308cfa56eSMark Johnston 	while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) {
1104e20a199fSJeff Roberson 		if (waitok == M_NOWAIT)
1105e20a199fSJeff Roberson 			goto out;
1106af526374SJeff Roberson 		msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
1107e20a199fSJeff Roberson 	}
110808cfa56eSMark Johnston 	zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
110908cfa56eSMark Johnston 	bucket_cache_reclaim(zone, drain);
1110e20a199fSJeff Roberson 	ZONE_UNLOCK(zone);
111108cfa56eSMark Johnston 
1112e20a199fSJeff Roberson 	/*
1113e20a199fSJeff Roberson 	 * The DRAINING flag protects us from being freed while
1114111fbcd5SBryan Venteicher 	 * we're running.  Normally the uma_rwlock would protect us but we
1115e20a199fSJeff Roberson 	 * must be able to release and acquire the right lock for each keg.
1116e20a199fSJeff Roberson 	 */
111708034d10SKonstantin Belousov 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0)
1118bb15d1c7SGleb Smirnoff 		keg_drain(zone->uz_keg);
1119e20a199fSJeff Roberson 	ZONE_LOCK(zone);
112008cfa56eSMark Johnston 	zone->uz_flags &= ~UMA_ZFLAG_RECLAIMING;
1121e20a199fSJeff Roberson 	wakeup(zone);
1122e20a199fSJeff Roberson out:
1123e20a199fSJeff Roberson 	ZONE_UNLOCK(zone);
1124e20a199fSJeff Roberson }
1125e20a199fSJeff Roberson 
112608cfa56eSMark Johnston static void
1127*20a4e154SJeff Roberson zone_drain(uma_zone_t zone, void *unused)
1128e20a199fSJeff Roberson {
1129e20a199fSJeff Roberson 
113008cfa56eSMark Johnston 	zone_reclaim(zone, M_NOWAIT, true);
113108cfa56eSMark Johnston }
113208cfa56eSMark Johnston 
113308cfa56eSMark Johnston static void
1134*20a4e154SJeff Roberson zone_trim(uma_zone_t zone, void *unused)
113508cfa56eSMark Johnston {
113608cfa56eSMark Johnston 
113708cfa56eSMark Johnston 	zone_reclaim(zone, M_NOWAIT, false);
1138e20a199fSJeff Roberson }
1139e20a199fSJeff Roberson 
1140e20a199fSJeff Roberson /*
1141e20a199fSJeff Roberson  * Allocate a new slab for a keg.  This does not insert the slab onto a list.
1142194a979eSMark Johnston  * If the allocation was successful, the keg lock will be held upon return,
1143194a979eSMark Johnston  * otherwise the keg will be left unlocked.
11448355f576SJeff Roberson  *
11458355f576SJeff Roberson  * Arguments:
114686220393SMark Johnston  *	flags   Wait flags for the item initialization routine
114786220393SMark Johnston  *	aflags  Wait flags for the slab allocation
11488355f576SJeff Roberson  *
11498355f576SJeff Roberson  * Returns:
11508355f576SJeff Roberson  *	The slab that was allocated or NULL if there is no memory and the
11518355f576SJeff Roberson  *	caller specified M_NOWAIT.
11528355f576SJeff Roberson  */
11538355f576SJeff Roberson static uma_slab_t
115486220393SMark Johnston keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int flags,
115586220393SMark Johnston     int aflags)
11568355f576SJeff Roberson {
1157e20a199fSJeff Roberson 	uma_alloc allocf;
1158099a0e58SBosko Milekic 	uma_slab_t slab;
11592e47807cSJeff Roberson 	unsigned long size;
116085dcf349SGleb Smirnoff 	uint8_t *mem;
116186220393SMark Johnston 	uint8_t sflags;
11628355f576SJeff Roberson 	int i;
11638355f576SJeff Roberson 
1164ab3185d1SJeff Roberson 	KASSERT(domain >= 0 && domain < vm_ndomains,
1165ab3185d1SJeff Roberson 	    ("keg_alloc_slab: domain %d out of range", domain));
1166bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
1167bb15d1c7SGleb Smirnoff 	MPASS(zone->uz_lockptr == &keg->uk_lock);
1168a553d4b8SJeff Roberson 
1169e20a199fSJeff Roberson 	allocf = keg->uk_allocf;
1170e20a199fSJeff Roberson 	KEG_UNLOCK(keg);
1171a553d4b8SJeff Roberson 
1172194a979eSMark Johnston 	slab = NULL;
1173194a979eSMark Johnston 	mem = NULL;
1174099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
117586220393SMark Johnston 		slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, aflags);
1176fc03d22bSJeff Roberson 		if (slab == NULL)
1177fc03d22bSJeff Roberson 			goto out;
1178a553d4b8SJeff Roberson 	}
1179a553d4b8SJeff Roberson 
11803370c5bfSJeff Roberson 	/*
11813370c5bfSJeff Roberson 	 * This reproduces the old vm_zone behavior of zero filling pages the
11823370c5bfSJeff Roberson 	 * first time they are added to a zone.
11833370c5bfSJeff Roberson 	 *
11843370c5bfSJeff Roberson 	 * Malloced items are zeroed in uma_zalloc.
11853370c5bfSJeff Roberson 	 */
11863370c5bfSJeff Roberson 
1187099a0e58SBosko Milekic 	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
118886220393SMark Johnston 		aflags |= M_ZERO;
11893370c5bfSJeff Roberson 	else
119086220393SMark Johnston 		aflags &= ~M_ZERO;
11913370c5bfSJeff Roberson 
1192263811f7SKip Macy 	if (keg->uk_flags & UMA_ZONE_NODUMP)
119386220393SMark Johnston 		aflags |= M_NODUMP;
1194263811f7SKip Macy 
1195e20a199fSJeff Roberson 	/* zone is passed for legacy reasons. */
1196194a979eSMark Johnston 	size = keg->uk_ppera * PAGE_SIZE;
119786220393SMark Johnston 	mem = allocf(zone, size, domain, &sflags, aflags);
1198a553d4b8SJeff Roberson 	if (mem == NULL) {
1199b23f72e9SBrian Feldman 		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
12000095a784SJeff Roberson 			zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
1201fc03d22bSJeff Roberson 		slab = NULL;
1202fc03d22bSJeff Roberson 		goto out;
1203a553d4b8SJeff Roberson 	}
12042e47807cSJeff Roberson 	uma_total_inc(size);
12058355f576SJeff Roberson 
12065c0e403bSJeff Roberson 	/* Point the slab into the allocated memory */
1207099a0e58SBosko Milekic 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
1208099a0e58SBosko Milekic 		slab = (uma_slab_t )(mem + keg->uk_pgoff);
12095c0e403bSJeff Roberson 
1210e20a199fSJeff Roberson 	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
1211099a0e58SBosko Milekic 		for (i = 0; i < keg->uk_ppera; i++)
121299571dc3SJeff Roberson 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
12138355f576SJeff Roberson 
1214099a0e58SBosko Milekic 	slab->us_keg = keg;
12158355f576SJeff Roberson 	slab->us_data = mem;
1216099a0e58SBosko Milekic 	slab->us_freecount = keg->uk_ipers;
121786220393SMark Johnston 	slab->us_flags = sflags;
1218ab3185d1SJeff Roberson 	slab->us_domain = domain;
1219ef72505eSJeff Roberson 	BIT_FILL(SLAB_SETSIZE, &slab->us_free);
1220ef72505eSJeff Roberson #ifdef INVARIANTS
1221ef72505eSJeff Roberson 	BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
1222ef72505eSJeff Roberson #endif
1223099a0e58SBosko Milekic 
1224b23f72e9SBrian Feldman 	if (keg->uk_init != NULL) {
1225099a0e58SBosko Milekic 		for (i = 0; i < keg->uk_ipers; i++)
1226b23f72e9SBrian Feldman 			if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
122786220393SMark Johnston 			    keg->uk_size, flags) != 0)
1228b23f72e9SBrian Feldman 				break;
1229b23f72e9SBrian Feldman 		if (i != keg->uk_ipers) {
1230fc03d22bSJeff Roberson 			keg_free_slab(keg, slab, i);
1231fc03d22bSJeff Roberson 			slab = NULL;
1232fc03d22bSJeff Roberson 			goto out;
1233b23f72e9SBrian Feldman 		}
1234b23f72e9SBrian Feldman 	}
1235e20a199fSJeff Roberson 	KEG_LOCK(keg);
12365c0e403bSJeff Roberson 
12371431a748SGleb Smirnoff 	CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)",
12381431a748SGleb Smirnoff 	    slab, keg->uk_name, keg);
12391431a748SGleb Smirnoff 
1240099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_HASH)
1241099a0e58SBosko Milekic 		UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
12428355f576SJeff Roberson 
1243099a0e58SBosko Milekic 	keg->uk_pages += keg->uk_ppera;
1244099a0e58SBosko Milekic 	keg->uk_free += keg->uk_ipers;
12458355f576SJeff Roberson 
1246194a979eSMark Johnston out:
12478355f576SJeff Roberson 	return (slab);
12488355f576SJeff Roberson }
12498355f576SJeff Roberson 
12508355f576SJeff Roberson /*
1251009b6fcbSJeff Roberson  * This function is intended to be used early on in place of page_alloc() so
1252009b6fcbSJeff Roberson  * that we may use the boot time page cache to satisfy allocations before
1253009b6fcbSJeff Roberson  * the VM is ready.
1254009b6fcbSJeff Roberson  */
1255009b6fcbSJeff Roberson static void *
1256ab3185d1SJeff Roberson startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1257ab3185d1SJeff Roberson     int wait)
1258009b6fcbSJeff Roberson {
1259099a0e58SBosko Milekic 	uma_keg_t keg;
1260ac0a6fd0SGleb Smirnoff 	void *mem;
1261ac0a6fd0SGleb Smirnoff 	int pages;
1262099a0e58SBosko Milekic 
1263bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
1264009b6fcbSJeff Roberson 	/*
1265f7d35785SGleb Smirnoff 	 * If we are in BOOT_BUCKETS or higher, than switch to real
1266f7d35785SGleb Smirnoff 	 * allocator.  Zones with page sized slabs switch at BOOT_PAGEALLOC.
1267009b6fcbSJeff Roberson 	 */
1268f7d35785SGleb Smirnoff 	switch (booted) {
1269f7d35785SGleb Smirnoff 		case BOOT_COLD:
1270f7d35785SGleb Smirnoff 		case BOOT_STRAPPED:
1271f7d35785SGleb Smirnoff 			break;
1272f7d35785SGleb Smirnoff 		case BOOT_PAGEALLOC:
1273f7d35785SGleb Smirnoff 			if (keg->uk_ppera > 1)
1274f7d35785SGleb Smirnoff 				break;
1275f7d35785SGleb Smirnoff 		case BOOT_BUCKETS:
1276f7d35785SGleb Smirnoff 		case BOOT_RUNNING:
1277009b6fcbSJeff Roberson #ifdef UMA_MD_SMALL_ALLOC
1278f7d35785SGleb Smirnoff 			keg->uk_allocf = (keg->uk_ppera > 1) ?
1279f7d35785SGleb Smirnoff 			    page_alloc : uma_small_alloc;
1280009b6fcbSJeff Roberson #else
1281099a0e58SBosko Milekic 			keg->uk_allocf = page_alloc;
1282009b6fcbSJeff Roberson #endif
1283ab3185d1SJeff Roberson 			return keg->uk_allocf(zone, bytes, domain, pflag, wait);
1284009b6fcbSJeff Roberson 	}
1285009b6fcbSJeff Roberson 
1286009b6fcbSJeff Roberson 	/*
1287f7d35785SGleb Smirnoff 	 * Check our small startup cache to see if it has pages remaining.
1288f7d35785SGleb Smirnoff 	 */
1289f7d35785SGleb Smirnoff 	pages = howmany(bytes, PAGE_SIZE);
1290f7d35785SGleb Smirnoff 	KASSERT(pages > 0, ("%s can't reserve 0 pages", __func__));
1291f7d35785SGleb Smirnoff 	if (pages > boot_pages)
1292f7d35785SGleb Smirnoff 		panic("UMA zone \"%s\": Increase vm.boot_pages", zone->uz_name);
1293f7d35785SGleb Smirnoff #ifdef DIAGNOSTIC
1294f7d35785SGleb Smirnoff 	printf("%s from \"%s\", %d boot pages left\n", __func__, zone->uz_name,
1295f7d35785SGleb Smirnoff 	    boot_pages);
1296f7d35785SGleb Smirnoff #endif
1297f7d35785SGleb Smirnoff 	mem = bootmem;
1298f7d35785SGleb Smirnoff 	boot_pages -= pages;
1299f7d35785SGleb Smirnoff 	bootmem += pages * PAGE_SIZE;
1300f7d35785SGleb Smirnoff 	*pflag = UMA_SLAB_BOOT;
1301f7d35785SGleb Smirnoff 
1302f7d35785SGleb Smirnoff 	return (mem);
1303f7d35785SGleb Smirnoff }
1304f7d35785SGleb Smirnoff 
1305f7d35785SGleb Smirnoff /*
13068355f576SJeff Roberson  * Allocates a number of pages from the system
13078355f576SJeff Roberson  *
13088355f576SJeff Roberson  * Arguments:
13098355f576SJeff Roberson  *	bytes  The number of bytes requested
13108355f576SJeff Roberson  *	wait  Shall we wait?
13118355f576SJeff Roberson  *
13128355f576SJeff Roberson  * Returns:
13138355f576SJeff Roberson  *	A pointer to the alloced memory or possibly
13148355f576SJeff Roberson  *	NULL if M_NOWAIT is set.
13158355f576SJeff Roberson  */
13168355f576SJeff Roberson static void *
1317ab3185d1SJeff Roberson page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1318ab3185d1SJeff Roberson     int wait)
13198355f576SJeff Roberson {
13208355f576SJeff Roberson 	void *p;	/* Returned page */
13218355f576SJeff Roberson 
13222e47807cSJeff Roberson 	*pflag = UMA_SLAB_KERNEL;
13239978bd99SMark Johnston 	p = (void *)kmem_malloc_domainset(DOMAINSET_FIXED(domain), bytes, wait);
13248355f576SJeff Roberson 
13258355f576SJeff Roberson 	return (p);
13268355f576SJeff Roberson }
13278355f576SJeff Roberson 
1328ab3059a8SMatt Macy static void *
1329ab3059a8SMatt Macy pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1330ab3059a8SMatt Macy     int wait)
1331ab3059a8SMatt Macy {
1332ab3059a8SMatt Macy 	struct pglist alloctail;
1333ab3059a8SMatt Macy 	vm_offset_t addr, zkva;
1334ab3059a8SMatt Macy 	int cpu, flags;
1335ab3059a8SMatt Macy 	vm_page_t p, p_next;
1336ab3059a8SMatt Macy #ifdef NUMA
1337ab3059a8SMatt Macy 	struct pcpu *pc;
1338ab3059a8SMatt Macy #endif
1339ab3059a8SMatt Macy 
1340ab3059a8SMatt Macy 	MPASS(bytes == (mp_maxid + 1) * PAGE_SIZE);
1341ab3059a8SMatt Macy 
1342013072f0SMark Johnston 	TAILQ_INIT(&alloctail);
1343ab3059a8SMatt Macy 	flags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
1344013072f0SMark Johnston 	    malloc2vm_flags(wait);
1345013072f0SMark Johnston 	*pflag = UMA_SLAB_KERNEL;
1346ab3059a8SMatt Macy 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
1347ab3059a8SMatt Macy 		if (CPU_ABSENT(cpu)) {
1348ab3059a8SMatt Macy 			p = vm_page_alloc(NULL, 0, flags);
1349ab3059a8SMatt Macy 		} else {
1350ab3059a8SMatt Macy #ifndef NUMA
1351ab3059a8SMatt Macy 			p = vm_page_alloc(NULL, 0, flags);
1352ab3059a8SMatt Macy #else
1353ab3059a8SMatt Macy 			pc = pcpu_find(cpu);
1354ab3059a8SMatt Macy 			p = vm_page_alloc_domain(NULL, 0, pc->pc_domain, flags);
1355ab3059a8SMatt Macy 			if (__predict_false(p == NULL))
1356ab3059a8SMatt Macy 				p = vm_page_alloc(NULL, 0, flags);
1357ab3059a8SMatt Macy #endif
1358ab3059a8SMatt Macy 		}
1359ab3059a8SMatt Macy 		if (__predict_false(p == NULL))
1360ab3059a8SMatt Macy 			goto fail;
1361ab3059a8SMatt Macy 		TAILQ_INSERT_TAIL(&alloctail, p, listq);
1362ab3059a8SMatt Macy 	}
1363ab3059a8SMatt Macy 	if ((addr = kva_alloc(bytes)) == 0)
1364ab3059a8SMatt Macy 		goto fail;
1365ab3059a8SMatt Macy 	zkva = addr;
1366ab3059a8SMatt Macy 	TAILQ_FOREACH(p, &alloctail, listq) {
1367ab3059a8SMatt Macy 		pmap_qenter(zkva, &p, 1);
1368ab3059a8SMatt Macy 		zkva += PAGE_SIZE;
1369ab3059a8SMatt Macy 	}
1370ab3059a8SMatt Macy 	return ((void*)addr);
1371ab3059a8SMatt Macy fail:
1372ab3059a8SMatt Macy 	TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
137388ea538aSMark Johnston 		vm_page_unwire_noq(p);
1374ab3059a8SMatt Macy 		vm_page_free(p);
1375ab3059a8SMatt Macy 	}
1376ab3059a8SMatt Macy 	return (NULL);
1377ab3059a8SMatt Macy }
1378ab3059a8SMatt Macy 
13798355f576SJeff Roberson /*
13808355f576SJeff Roberson  * Allocates a number of pages from within an object
13818355f576SJeff Roberson  *
13828355f576SJeff Roberson  * Arguments:
13838355f576SJeff Roberson  *	bytes  The number of bytes requested
13848355f576SJeff Roberson  *	wait   Shall we wait?
13858355f576SJeff Roberson  *
13868355f576SJeff Roberson  * Returns:
13878355f576SJeff Roberson  *	A pointer to the alloced memory or possibly
13888355f576SJeff Roberson  *	NULL if M_NOWAIT is set.
13898355f576SJeff Roberson  */
13908355f576SJeff Roberson static void *
1391ab3185d1SJeff Roberson noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
1392ab3185d1SJeff Roberson     int wait)
13938355f576SJeff Roberson {
1394a4915c21SAttilio Rao 	TAILQ_HEAD(, vm_page) alloctail;
1395a4915c21SAttilio Rao 	u_long npages;
1396b245ac95SAlan Cox 	vm_offset_t retkva, zkva;
1397a4915c21SAttilio Rao 	vm_page_t p, p_next;
1398e20a199fSJeff Roberson 	uma_keg_t keg;
13998355f576SJeff Roberson 
1400a4915c21SAttilio Rao 	TAILQ_INIT(&alloctail);
1401bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
1402a4915c21SAttilio Rao 
1403a4915c21SAttilio Rao 	npages = howmany(bytes, PAGE_SIZE);
1404a4915c21SAttilio Rao 	while (npages > 0) {
1405ab3185d1SJeff Roberson 		p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT |
14068d6fbbb8SJeff Roberson 		    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
1407772c8b67SKonstantin Belousov 		    ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK :
1408772c8b67SKonstantin Belousov 		    VM_ALLOC_NOWAIT));
1409a4915c21SAttilio Rao 		if (p != NULL) {
1410a4915c21SAttilio Rao 			/*
1411a4915c21SAttilio Rao 			 * Since the page does not belong to an object, its
1412a4915c21SAttilio Rao 			 * listq is unused.
1413a4915c21SAttilio Rao 			 */
1414a4915c21SAttilio Rao 			TAILQ_INSERT_TAIL(&alloctail, p, listq);
1415a4915c21SAttilio Rao 			npages--;
1416a4915c21SAttilio Rao 			continue;
1417a4915c21SAttilio Rao 		}
14188355f576SJeff Roberson 		/*
1419a4915c21SAttilio Rao 		 * Page allocation failed, free intermediate pages and
1420a4915c21SAttilio Rao 		 * exit.
14218355f576SJeff Roberson 		 */
1422a4915c21SAttilio Rao 		TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
142388ea538aSMark Johnston 			vm_page_unwire_noq(p);
1424b245ac95SAlan Cox 			vm_page_free(p);
1425b245ac95SAlan Cox 		}
1426a4915c21SAttilio Rao 		return (NULL);
1427b245ac95SAlan Cox 	}
14288355f576SJeff Roberson 	*flags = UMA_SLAB_PRIV;
1429a4915c21SAttilio Rao 	zkva = keg->uk_kva +
1430a4915c21SAttilio Rao 	    atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1431a4915c21SAttilio Rao 	retkva = zkva;
1432a4915c21SAttilio Rao 	TAILQ_FOREACH(p, &alloctail, listq) {
1433a4915c21SAttilio Rao 		pmap_qenter(zkva, &p, 1);
1434a4915c21SAttilio Rao 		zkva += PAGE_SIZE;
1435a4915c21SAttilio Rao 	}
14368355f576SJeff Roberson 
14378355f576SJeff Roberson 	return ((void *)retkva);
14388355f576SJeff Roberson }
14398355f576SJeff Roberson 
14408355f576SJeff Roberson /*
14418355f576SJeff Roberson  * Frees a number of pages to the system
14428355f576SJeff Roberson  *
14438355f576SJeff Roberson  * Arguments:
14448355f576SJeff Roberson  *	mem   A pointer to the memory to be freed
14458355f576SJeff Roberson  *	size  The size of the memory being freed
14468355f576SJeff Roberson  *	flags The original p->us_flags field
14478355f576SJeff Roberson  *
14488355f576SJeff Roberson  * Returns:
14498355f576SJeff Roberson  *	Nothing
14508355f576SJeff Roberson  */
14518355f576SJeff Roberson static void
1452f2c2231eSRyan Stone page_free(void *mem, vm_size_t size, uint8_t flags)
14538355f576SJeff Roberson {
14543370c5bfSJeff Roberson 
145549bfa624SAlan Cox 	if ((flags & UMA_SLAB_KERNEL) == 0)
1456b5345ef1SJustin Hibbits 		panic("UMA: page_free used with invalid flags %x", flags);
14578355f576SJeff Roberson 
145849bfa624SAlan Cox 	kmem_free((vm_offset_t)mem, size);
14598355f576SJeff Roberson }
14608355f576SJeff Roberson 
14618355f576SJeff Roberson /*
1462ab3059a8SMatt Macy  * Frees pcpu zone allocations
1463ab3059a8SMatt Macy  *
1464ab3059a8SMatt Macy  * Arguments:
1465ab3059a8SMatt Macy  *	mem   A pointer to the memory to be freed
1466ab3059a8SMatt Macy  *	size  The size of the memory being freed
1467ab3059a8SMatt Macy  *	flags The original p->us_flags field
1468ab3059a8SMatt Macy  *
1469ab3059a8SMatt Macy  * Returns:
1470ab3059a8SMatt Macy  *	Nothing
1471ab3059a8SMatt Macy  */
1472ab3059a8SMatt Macy static void
1473ab3059a8SMatt Macy pcpu_page_free(void *mem, vm_size_t size, uint8_t flags)
1474ab3059a8SMatt Macy {
1475ab3059a8SMatt Macy 	vm_offset_t sva, curva;
1476ab3059a8SMatt Macy 	vm_paddr_t paddr;
1477ab3059a8SMatt Macy 	vm_page_t m;
1478ab3059a8SMatt Macy 
1479ab3059a8SMatt Macy 	MPASS(size == (mp_maxid+1)*PAGE_SIZE);
1480ab3059a8SMatt Macy 	sva = (vm_offset_t)mem;
1481ab3059a8SMatt Macy 	for (curva = sva; curva < sva + size; curva += PAGE_SIZE) {
1482ab3059a8SMatt Macy 		paddr = pmap_kextract(curva);
1483ab3059a8SMatt Macy 		m = PHYS_TO_VM_PAGE(paddr);
148488ea538aSMark Johnston 		vm_page_unwire_noq(m);
1485ab3059a8SMatt Macy 		vm_page_free(m);
1486ab3059a8SMatt Macy 	}
1487ab3059a8SMatt Macy 	pmap_qremove(sva, size >> PAGE_SHIFT);
1488ab3059a8SMatt Macy 	kva_free(sva, size);
1489ab3059a8SMatt Macy }
1490ab3059a8SMatt Macy 
1491ab3059a8SMatt Macy 
1492ab3059a8SMatt Macy /*
14938355f576SJeff Roberson  * Zero fill initializer
14948355f576SJeff Roberson  *
14958355f576SJeff Roberson  * Arguments/Returns follow uma_init specifications
14968355f576SJeff Roberson  */
1497b23f72e9SBrian Feldman static int
1498b23f72e9SBrian Feldman zero_init(void *mem, int size, int flags)
14998355f576SJeff Roberson {
15008355f576SJeff Roberson 	bzero(mem, size);
1501b23f72e9SBrian Feldman 	return (0);
15028355f576SJeff Roberson }
15038355f576SJeff Roberson 
15048355f576SJeff Roberson /*
1505e20a199fSJeff Roberson  * Finish creating a small uma keg.  This calculates ipers, and the keg size.
15068355f576SJeff Roberson  *
15078355f576SJeff Roberson  * Arguments
1508e20a199fSJeff Roberson  *	keg  The zone we should initialize
15098355f576SJeff Roberson  *
15108355f576SJeff Roberson  * Returns
15118355f576SJeff Roberson  *	Nothing
15128355f576SJeff Roberson  */
15138355f576SJeff Roberson static void
1514e20a199fSJeff Roberson keg_small_init(uma_keg_t keg)
15158355f576SJeff Roberson {
1516244f4554SBosko Milekic 	u_int rsize;
1517244f4554SBosko Milekic 	u_int memused;
1518244f4554SBosko Milekic 	u_int wastedspace;
1519244f4554SBosko Milekic 	u_int shsize;
1520a55ebb7cSAndriy Gapon 	u_int slabsize;
15218355f576SJeff Roberson 
1522ad97af7eSGleb Smirnoff 	if (keg->uk_flags & UMA_ZONE_PCPU) {
152396c85efbSNathan Whitehorn 		u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
1524e28a647dSGleb Smirnoff 
1525ab3059a8SMatt Macy 		slabsize = UMA_PCPU_ALLOC_SIZE;
1526ab3059a8SMatt Macy 		keg->uk_ppera = ncpus;
1527ad97af7eSGleb Smirnoff 	} else {
1528a55ebb7cSAndriy Gapon 		slabsize = UMA_SLAB_SIZE;
1529ad97af7eSGleb Smirnoff 		keg->uk_ppera = 1;
1530ad97af7eSGleb Smirnoff 	}
1531ad97af7eSGleb Smirnoff 
1532ef72505eSJeff Roberson 	/*
1533ef72505eSJeff Roberson 	 * Calculate the size of each allocation (rsize) according to
1534ef72505eSJeff Roberson 	 * alignment.  If the requested size is smaller than we have
1535ef72505eSJeff Roberson 	 * allocation bits for we round it up.
1536ef72505eSJeff Roberson 	 */
1537099a0e58SBosko Milekic 	rsize = keg->uk_size;
1538a55ebb7cSAndriy Gapon 	if (rsize < slabsize / SLAB_SETSIZE)
1539a55ebb7cSAndriy Gapon 		rsize = slabsize / SLAB_SETSIZE;
1540099a0e58SBosko Milekic 	if (rsize & keg->uk_align)
1541099a0e58SBosko Milekic 		rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1542099a0e58SBosko Milekic 	keg->uk_rsize = rsize;
1543ad97af7eSGleb Smirnoff 
1544ad97af7eSGleb Smirnoff 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
1545ab3059a8SMatt Macy 	    keg->uk_rsize < UMA_PCPU_ALLOC_SIZE,
1546ad97af7eSGleb Smirnoff 	    ("%s: size %u too large", __func__, keg->uk_rsize));
15478355f576SJeff Roberson 
1548ef72505eSJeff Roberson 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
15492864dbbfSGleb Smirnoff 		shsize = 0;
1550ef72505eSJeff Roberson 	else
15513d5e3df7SGleb Smirnoff 		shsize = SIZEOF_UMA_SLAB;
15528355f576SJeff Roberson 
15531ca6ed45SGleb Smirnoff 	if (rsize <= slabsize - shsize)
1554a55ebb7cSAndriy Gapon 		keg->uk_ipers = (slabsize - shsize) / rsize;
15551ca6ed45SGleb Smirnoff 	else {
15561ca6ed45SGleb Smirnoff 		/* Handle special case when we have 1 item per slab, so
15571ca6ed45SGleb Smirnoff 		 * alignment requirement can be relaxed. */
15581ca6ed45SGleb Smirnoff 		KASSERT(keg->uk_size <= slabsize - shsize,
15591ca6ed45SGleb Smirnoff 		    ("%s: size %u greater than slab", __func__, keg->uk_size));
15601ca6ed45SGleb Smirnoff 		keg->uk_ipers = 1;
15611ca6ed45SGleb Smirnoff 	}
1562ef72505eSJeff Roberson 	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1563ad97af7eSGleb Smirnoff 	    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1564ad97af7eSGleb Smirnoff 
1565244f4554SBosko Milekic 	memused = keg->uk_ipers * rsize + shsize;
1566a55ebb7cSAndriy Gapon 	wastedspace = slabsize - memused;
1567244f4554SBosko Milekic 
156820e8e865SBosko Milekic 	/*
1569244f4554SBosko Milekic 	 * We can't do OFFPAGE if we're internal or if we've been
157020e8e865SBosko Milekic 	 * asked to not go to the VM for buckets.  If we do this we
15716fd34d6fSJeff Roberson 	 * may end up going to the VM  for slabs which we do not
15726fd34d6fSJeff Roberson 	 * want to do if we're UMA_ZFLAG_CACHEONLY as a result
15736fd34d6fSJeff Roberson 	 * of UMA_ZONE_VM, which clearly forbids it.
157420e8e865SBosko Milekic 	 */
1575099a0e58SBosko Milekic 	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1576099a0e58SBosko Milekic 	    (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
15778355f576SJeff Roberson 		return;
1578244f4554SBosko Milekic 
1579ef72505eSJeff Roberson 	/*
1580ef72505eSJeff Roberson 	 * See if using an OFFPAGE slab will limit our waste.  Only do
1581ef72505eSJeff Roberson 	 * this if it permits more items per-slab.
1582ef72505eSJeff Roberson 	 *
1583ef72505eSJeff Roberson 	 * XXX We could try growing slabsize to limit max waste as well.
1584ef72505eSJeff Roberson 	 * Historically this was not done because the VM could not
1585ef72505eSJeff Roberson 	 * efficiently handle contiguous allocations.
1586ef72505eSJeff Roberson 	 */
1587a55ebb7cSAndriy Gapon 	if ((wastedspace >= slabsize / UMA_MAX_WASTE) &&
1588a55ebb7cSAndriy Gapon 	    (keg->uk_ipers < (slabsize / keg->uk_rsize))) {
1589a55ebb7cSAndriy Gapon 		keg->uk_ipers = slabsize / keg->uk_rsize;
1590ef72505eSJeff Roberson 		KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1591ad97af7eSGleb Smirnoff 		    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
15921431a748SGleb Smirnoff 		CTR6(KTR_UMA, "UMA decided we need offpage slab headers for "
15931431a748SGleb Smirnoff 		    "keg: %s(%p), calculated wastedspace = %d, "
1594244f4554SBosko Milekic 		    "maximum wasted space allowed = %d, "
1595244f4554SBosko Milekic 		    "calculated ipers = %d, "
15961431a748SGleb Smirnoff 		    "new wasted space = %d\n", keg->uk_name, keg, wastedspace,
1597a55ebb7cSAndriy Gapon 		    slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1598a55ebb7cSAndriy Gapon 		    slabsize - keg->uk_ipers * keg->uk_rsize);
159971353f7aSJeff Roberson 		/*
160071353f7aSJeff Roberson 		 * If we had access to memory to embed a slab header we
160171353f7aSJeff Roberson 		 * also have a page structure to use vtoslab() instead of
160271353f7aSJeff Roberson 		 * hash to find slabs.  If the zone was explicitly created
160371353f7aSJeff Roberson 		 * OFFPAGE we can't necessarily touch the memory.
160471353f7aSJeff Roberson 		 */
160571353f7aSJeff Roberson 		if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0)
160671353f7aSJeff Roberson 			keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
16078355f576SJeff Roberson 	}
1608ad97af7eSGleb Smirnoff 
1609ad97af7eSGleb Smirnoff 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1610ad97af7eSGleb Smirnoff 	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1611ad97af7eSGleb Smirnoff 		keg->uk_flags |= UMA_ZONE_HASH;
16128355f576SJeff Roberson }
16138355f576SJeff Roberson 
16148355f576SJeff Roberson /*
1615e20a199fSJeff Roberson  * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
16168355f576SJeff Roberson  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
16178355f576SJeff Roberson  * more complicated.
16188355f576SJeff Roberson  *
16198355f576SJeff Roberson  * Arguments
1620e20a199fSJeff Roberson  *	keg  The keg we should initialize
16218355f576SJeff Roberson  *
16228355f576SJeff Roberson  * Returns
16238355f576SJeff Roberson  *	Nothing
16248355f576SJeff Roberson  */
16258355f576SJeff Roberson static void
1626e20a199fSJeff Roberson keg_large_init(uma_keg_t keg)
16278355f576SJeff Roberson {
16288355f576SJeff Roberson 
1629e20a199fSJeff Roberson 	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1630ad97af7eSGleb Smirnoff 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1631ad97af7eSGleb Smirnoff 	    ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
163220e8e865SBosko Milekic 
1633ad97af7eSGleb Smirnoff 	keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1634099a0e58SBosko Milekic 	keg->uk_ipers = 1;
1635e9a069d8SJohn Baldwin 	keg->uk_rsize = keg->uk_size;
1636e9a069d8SJohn Baldwin 
1637cec48e00SAlexander Motin 	/* Check whether we have enough space to not do OFFPAGE. */
16383d5e3df7SGleb Smirnoff 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0 &&
16393d5e3df7SGleb Smirnoff 	    PAGE_SIZE * keg->uk_ppera - keg->uk_rsize < SIZEOF_UMA_SLAB) {
16402934eb8aSMark Johnston 		/*
16412934eb8aSMark Johnston 		 * We can't do OFFPAGE if we're internal, in which case
16422934eb8aSMark Johnston 		 * we need an extra page per allocation to contain the
16432934eb8aSMark Johnston 		 * slab header.
16442934eb8aSMark Johnston 		 */
16452934eb8aSMark Johnston 		if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) == 0)
164671353f7aSJeff Roberson 			keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
16472934eb8aSMark Johnston 		else
16482934eb8aSMark Johnston 			keg->uk_ppera++;
16492934eb8aSMark Johnston 	}
1650cec48e00SAlexander Motin 
1651cec48e00SAlexander Motin 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1652cec48e00SAlexander Motin 	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1653099a0e58SBosko Milekic 		keg->uk_flags |= UMA_ZONE_HASH;
16548355f576SJeff Roberson }
16558355f576SJeff Roberson 
1656e20a199fSJeff Roberson static void
1657e20a199fSJeff Roberson keg_cachespread_init(uma_keg_t keg)
1658e20a199fSJeff Roberson {
1659e20a199fSJeff Roberson 	int alignsize;
1660e20a199fSJeff Roberson 	int trailer;
1661e20a199fSJeff Roberson 	int pages;
1662e20a199fSJeff Roberson 	int rsize;
1663e20a199fSJeff Roberson 
1664ad97af7eSGleb Smirnoff 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1665ad97af7eSGleb Smirnoff 	    ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1666ad97af7eSGleb Smirnoff 
1667e20a199fSJeff Roberson 	alignsize = keg->uk_align + 1;
1668e20a199fSJeff Roberson 	rsize = keg->uk_size;
1669e20a199fSJeff Roberson 	/*
1670e20a199fSJeff Roberson 	 * We want one item to start on every align boundary in a page.  To
1671e20a199fSJeff Roberson 	 * do this we will span pages.  We will also extend the item by the
1672e20a199fSJeff Roberson 	 * size of align if it is an even multiple of align.  Otherwise, it
1673e20a199fSJeff Roberson 	 * would fall on the same boundary every time.
1674e20a199fSJeff Roberson 	 */
1675e20a199fSJeff Roberson 	if (rsize & keg->uk_align)
1676e20a199fSJeff Roberson 		rsize = (rsize & ~keg->uk_align) + alignsize;
1677e20a199fSJeff Roberson 	if ((rsize & alignsize) == 0)
1678e20a199fSJeff Roberson 		rsize += alignsize;
1679e20a199fSJeff Roberson 	trailer = rsize - keg->uk_size;
1680e20a199fSJeff Roberson 	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1681e20a199fSJeff Roberson 	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1682e20a199fSJeff Roberson 	keg->uk_rsize = rsize;
1683e20a199fSJeff Roberson 	keg->uk_ppera = pages;
1684e20a199fSJeff Roberson 	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1685e20a199fSJeff Roberson 	keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
16862367b4ddSDimitry Andric 	KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
168742321809SGleb Smirnoff 	    ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1688e20a199fSJeff Roberson 	    keg->uk_ipers));
1689e20a199fSJeff Roberson }
1690e20a199fSJeff Roberson 
16918355f576SJeff Roberson /*
1692099a0e58SBosko Milekic  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1693099a0e58SBosko Milekic  * the keg onto the global keg list.
16948355f576SJeff Roberson  *
16958355f576SJeff Roberson  * Arguments/Returns follow uma_ctor specifications
1696099a0e58SBosko Milekic  *	udata  Actually uma_kctor_args
1697099a0e58SBosko Milekic  */
1698b23f72e9SBrian Feldman static int
1699b23f72e9SBrian Feldman keg_ctor(void *mem, int size, void *udata, int flags)
1700099a0e58SBosko Milekic {
1701099a0e58SBosko Milekic 	struct uma_kctor_args *arg = udata;
1702099a0e58SBosko Milekic 	uma_keg_t keg = mem;
1703099a0e58SBosko Milekic 	uma_zone_t zone;
1704099a0e58SBosko Milekic 
1705099a0e58SBosko Milekic 	bzero(keg, size);
1706099a0e58SBosko Milekic 	keg->uk_size = arg->size;
1707099a0e58SBosko Milekic 	keg->uk_init = arg->uminit;
1708099a0e58SBosko Milekic 	keg->uk_fini = arg->fini;
1709099a0e58SBosko Milekic 	keg->uk_align = arg->align;
1710099a0e58SBosko Milekic 	keg->uk_free = 0;
17116fd34d6fSJeff Roberson 	keg->uk_reserve = 0;
1712099a0e58SBosko Milekic 	keg->uk_pages = 0;
1713099a0e58SBosko Milekic 	keg->uk_flags = arg->flags;
1714099a0e58SBosko Milekic 	keg->uk_slabzone = NULL;
1715099a0e58SBosko Milekic 
1716099a0e58SBosko Milekic 	/*
1717194a979eSMark Johnston 	 * We use a global round-robin policy by default.  Zones with
1718194a979eSMark Johnston 	 * UMA_ZONE_NUMA set will use first-touch instead, in which case the
1719194a979eSMark Johnston 	 * iterator is never run.
1720194a979eSMark Johnston 	 */
1721194a979eSMark Johnston 	keg->uk_dr.dr_policy = DOMAINSET_RR();
1722194a979eSMark Johnston 	keg->uk_dr.dr_iter = 0;
1723194a979eSMark Johnston 
1724194a979eSMark Johnston 	/*
1725099a0e58SBosko Milekic 	 * The master zone is passed to us at keg-creation time.
1726099a0e58SBosko Milekic 	 */
1727099a0e58SBosko Milekic 	zone = arg->zone;
1728e20a199fSJeff Roberson 	keg->uk_name = zone->uz_name;
1729099a0e58SBosko Milekic 
1730099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_VM)
1731099a0e58SBosko Milekic 		keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1732099a0e58SBosko Milekic 
1733099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_ZINIT)
1734099a0e58SBosko Milekic 		keg->uk_init = zero_init;
1735099a0e58SBosko Milekic 
1736cfcae3f8SGleb Smirnoff 	if (arg->flags & UMA_ZONE_MALLOC)
1737e20a199fSJeff Roberson 		keg->uk_flags |= UMA_ZONE_VTOSLAB;
1738e20a199fSJeff Roberson 
1739ad97af7eSGleb Smirnoff 	if (arg->flags & UMA_ZONE_PCPU)
1740ad97af7eSGleb Smirnoff #ifdef SMP
1741ad97af7eSGleb Smirnoff 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
1742ad97af7eSGleb Smirnoff #else
1743ad97af7eSGleb Smirnoff 		keg->uk_flags &= ~UMA_ZONE_PCPU;
1744ad97af7eSGleb Smirnoff #endif
1745ad97af7eSGleb Smirnoff 
1746ef72505eSJeff Roberson 	if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1747e20a199fSJeff Roberson 		keg_cachespread_init(keg);
1748244f4554SBosko Milekic 	} else {
1749b92b26adSGleb Smirnoff 		if (keg->uk_size > UMA_SLAB_SPACE)
1750e20a199fSJeff Roberson 			keg_large_init(keg);
1751244f4554SBosko Milekic 		else
1752e20a199fSJeff Roberson 			keg_small_init(keg);
1753244f4554SBosko Milekic 	}
1754099a0e58SBosko Milekic 
1755cfcae3f8SGleb Smirnoff 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1756099a0e58SBosko Milekic 		keg->uk_slabzone = slabzone;
1757099a0e58SBosko Milekic 
1758099a0e58SBosko Milekic 	/*
1759099a0e58SBosko Milekic 	 * If we haven't booted yet we need allocations to go through the
1760099a0e58SBosko Milekic 	 * startup cache until the vm is ready.
1761099a0e58SBosko Milekic 	 */
1762f4bef67cSGleb Smirnoff 	if (booted < BOOT_PAGEALLOC)
17638cd02d00SAlan Cox 		keg->uk_allocf = startup_alloc;
176477e19437SGleb Smirnoff #ifdef UMA_MD_SMALL_ALLOC
176577e19437SGleb Smirnoff 	else if (keg->uk_ppera == 1)
176677e19437SGleb Smirnoff 		keg->uk_allocf = uma_small_alloc;
17678cd02d00SAlan Cox #endif
1768ab3059a8SMatt Macy 	else if (keg->uk_flags & UMA_ZONE_PCPU)
1769ab3059a8SMatt Macy 		keg->uk_allocf = pcpu_page_alloc;
177077e19437SGleb Smirnoff 	else
177177e19437SGleb Smirnoff 		keg->uk_allocf = page_alloc;
177277e19437SGleb Smirnoff #ifdef UMA_MD_SMALL_ALLOC
177377e19437SGleb Smirnoff 	if (keg->uk_ppera == 1)
177477e19437SGleb Smirnoff 		keg->uk_freef = uma_small_free;
177577e19437SGleb Smirnoff 	else
177677e19437SGleb Smirnoff #endif
1777ab3059a8SMatt Macy 	if (keg->uk_flags & UMA_ZONE_PCPU)
1778ab3059a8SMatt Macy 		keg->uk_freef = pcpu_page_free;
1779ab3059a8SMatt Macy 	else
178077e19437SGleb Smirnoff 		keg->uk_freef = page_free;
1781099a0e58SBosko Milekic 
1782099a0e58SBosko Milekic 	/*
1783af526374SJeff Roberson 	 * Initialize keg's lock
1784099a0e58SBosko Milekic 	 */
1785af526374SJeff Roberson 	KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
1786099a0e58SBosko Milekic 
1787099a0e58SBosko Milekic 	/*
1788099a0e58SBosko Milekic 	 * If we're putting the slab header in the actual page we need to
17893d5e3df7SGleb Smirnoff 	 * figure out where in each page it goes.  See SIZEOF_UMA_SLAB
17903d5e3df7SGleb Smirnoff 	 * macro definition.
1791099a0e58SBosko Milekic 	 */
1792099a0e58SBosko Milekic 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
17933d5e3df7SGleb Smirnoff 		keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - SIZEOF_UMA_SLAB;
1794244f4554SBosko Milekic 		/*
1795244f4554SBosko Milekic 		 * The only way the following is possible is if with our
1796244f4554SBosko Milekic 		 * UMA_ALIGN_PTR adjustments we are now bigger than
1797244f4554SBosko Milekic 		 * UMA_SLAB_SIZE.  I haven't checked whether this is
1798244f4554SBosko Milekic 		 * mathematically possible for all cases, so we make
1799244f4554SBosko Milekic 		 * sure here anyway.
1800244f4554SBosko Milekic 		 */
18013d5e3df7SGleb Smirnoff 		KASSERT(keg->uk_pgoff + sizeof(struct uma_slab) <=
18023d5e3df7SGleb Smirnoff 		    PAGE_SIZE * keg->uk_ppera,
18033d5e3df7SGleb Smirnoff 		    ("zone %s ipers %d rsize %d size %d slab won't fit",
18043d5e3df7SGleb Smirnoff 		    zone->uz_name, keg->uk_ipers, keg->uk_rsize, keg->uk_size));
1805099a0e58SBosko Milekic 	}
1806099a0e58SBosko Milekic 
1807099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_HASH)
18083b2f2cb8SAlexander Motin 		hash_alloc(&keg->uk_hash, 0);
1809099a0e58SBosko Milekic 
18101431a748SGleb Smirnoff 	CTR5(KTR_UMA, "keg_ctor %p zone %s(%p) out %d free %d\n",
18111431a748SGleb Smirnoff 	    keg, zone->uz_name, zone,
181257223e99SAndriy Gapon 	    (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
181357223e99SAndriy Gapon 	    keg->uk_free);
1814099a0e58SBosko Milekic 
1815099a0e58SBosko Milekic 	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1816099a0e58SBosko Milekic 
1817111fbcd5SBryan Venteicher 	rw_wlock(&uma_rwlock);
1818099a0e58SBosko Milekic 	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1819111fbcd5SBryan Venteicher 	rw_wunlock(&uma_rwlock);
1820b23f72e9SBrian Feldman 	return (0);
1821099a0e58SBosko Milekic }
1822099a0e58SBosko Milekic 
18232efcc8cbSGleb Smirnoff static void
1824*20a4e154SJeff Roberson zone_alloc_counters(uma_zone_t zone, void *unused)
18252efcc8cbSGleb Smirnoff {
18262efcc8cbSGleb Smirnoff 
18272efcc8cbSGleb Smirnoff 	zone->uz_allocs = counter_u64_alloc(M_WAITOK);
18282efcc8cbSGleb Smirnoff 	zone->uz_frees = counter_u64_alloc(M_WAITOK);
18292efcc8cbSGleb Smirnoff 	zone->uz_fails = counter_u64_alloc(M_WAITOK);
18302efcc8cbSGleb Smirnoff }
18312efcc8cbSGleb Smirnoff 
1832*20a4e154SJeff Roberson #define	UMA_MAX_DUP	999
1833*20a4e154SJeff Roberson static void
1834*20a4e154SJeff Roberson zone_alloc_sysctl(uma_zone_t zone, void *unused)
1835*20a4e154SJeff Roberson {
1836*20a4e154SJeff Roberson 	uma_zone_domain_t zdom;
1837*20a4e154SJeff Roberson 	uma_keg_t keg;
1838*20a4e154SJeff Roberson 	struct sysctl_oid *oid, *domainoid;
1839*20a4e154SJeff Roberson 	int domains, i;
1840*20a4e154SJeff Roberson 	static const char *nokeg = "cache zone";
1841*20a4e154SJeff Roberson 	char *c;
1842*20a4e154SJeff Roberson 
1843*20a4e154SJeff Roberson 	/*
1844*20a4e154SJeff Roberson 	 * Make a sysctl safe copy of the zone name by removing
1845*20a4e154SJeff Roberson 	 * any special characters and handling dups by appending
1846*20a4e154SJeff Roberson 	 * an index.
1847*20a4e154SJeff Roberson 	 */
1848*20a4e154SJeff Roberson 	if (zone->uz_namecnt != 0) {
1849*20a4e154SJeff Roberson 		if (zone->uz_namecnt > UMA_MAX_DUP)
1850*20a4e154SJeff Roberson 			zone->uz_namecnt = UMA_MAX_DUP;
1851*20a4e154SJeff Roberson 		zone->uz_ctlname = malloc(strlen(zone->uz_name) +
1852*20a4e154SJeff Roberson 		    sizeof(__XSTRING(UMA_MAX_DUP)) + 1 , M_UMA, M_WAITOK);
1853*20a4e154SJeff Roberson 		sprintf(zone->uz_ctlname, "%s_%d", zone->uz_name,
1854*20a4e154SJeff Roberson 		    zone->uz_namecnt);
1855*20a4e154SJeff Roberson 	} else
1856*20a4e154SJeff Roberson 		zone->uz_ctlname = strdup(zone->uz_name, M_UMA);
1857*20a4e154SJeff Roberson 	for (c = zone->uz_ctlname; *c != '\0'; c++)
1858*20a4e154SJeff Roberson 		if (strchr("./\\ -", *c) != NULL)
1859*20a4e154SJeff Roberson 			*c = '_';
1860*20a4e154SJeff Roberson 
1861*20a4e154SJeff Roberson 	/*
1862*20a4e154SJeff Roberson 	 * Basic parameters at the root.
1863*20a4e154SJeff Roberson 	 */
1864*20a4e154SJeff Roberson 	zone->uz_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_vm_uma),
1865*20a4e154SJeff Roberson 	    OID_AUTO, zone->uz_ctlname, CTLFLAG_RD, NULL, "");
1866*20a4e154SJeff Roberson 	oid = zone->uz_oid;
1867*20a4e154SJeff Roberson 	SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1868*20a4e154SJeff Roberson 	    "size", CTLFLAG_RD, &zone->uz_size, 0, "Allocation size");
1869*20a4e154SJeff Roberson 	SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1870*20a4e154SJeff Roberson 	    "flags", CTLFLAG_RD, &zone->uz_flags, 0,
1871*20a4e154SJeff Roberson 	    "Allocator configuration flags");
1872*20a4e154SJeff Roberson 	SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1873*20a4e154SJeff Roberson 	    "bucket_size", CTLFLAG_RD, &zone->uz_bucket_size, 0,
1874*20a4e154SJeff Roberson 	    "Desired per-cpu cache size");
1875*20a4e154SJeff Roberson 	SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1876*20a4e154SJeff Roberson 	    "bucket_size_max", CTLFLAG_RD, &zone->uz_bucket_size_max, 0,
1877*20a4e154SJeff Roberson 	    "Maximum allowed per-cpu cache size");
1878*20a4e154SJeff Roberson 
1879*20a4e154SJeff Roberson 	/*
1880*20a4e154SJeff Roberson 	 * keg if present.
1881*20a4e154SJeff Roberson 	 */
1882*20a4e154SJeff Roberson 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
1883*20a4e154SJeff Roberson 	    "keg", CTLFLAG_RD, NULL, "");
1884*20a4e154SJeff Roberson 	keg = zone->uz_keg;
1885*20a4e154SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_CACHEONLY) == 0) {
1886*20a4e154SJeff Roberson 		SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1887*20a4e154SJeff Roberson 		    "name", CTLFLAG_RD, keg->uk_name, "Keg name");
1888*20a4e154SJeff Roberson 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1889*20a4e154SJeff Roberson 		    "rsize", CTLFLAG_RD, &keg->uk_rsize, 0,
1890*20a4e154SJeff Roberson 		    "Real object size with alignment");
1891*20a4e154SJeff Roberson 		SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1892*20a4e154SJeff Roberson 		    "ppera", CTLFLAG_RD, &keg->uk_ppera, 0,
1893*20a4e154SJeff Roberson 		    "pages per-slab allocation");
1894*20a4e154SJeff Roberson 		SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1895*20a4e154SJeff Roberson 		    "ipers", CTLFLAG_RD, &keg->uk_ipers, 0,
1896*20a4e154SJeff Roberson 		    "items available per-slab");
1897*20a4e154SJeff Roberson 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1898*20a4e154SJeff Roberson 		    "align", CTLFLAG_RD, &keg->uk_align, 0,
1899*20a4e154SJeff Roberson 		    "item alignment mask");
1900*20a4e154SJeff Roberson 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1901*20a4e154SJeff Roberson 		    "pages", CTLFLAG_RD, &keg->uk_pages, 0,
1902*20a4e154SJeff Roberson 		    "Total pages currently allocated from VM");
1903*20a4e154SJeff Roberson 		SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1904*20a4e154SJeff Roberson 		    "free", CTLFLAG_RD, &keg->uk_free, 0,
1905*20a4e154SJeff Roberson 		    "items free in the slab layer");
1906*20a4e154SJeff Roberson 	} else
1907*20a4e154SJeff Roberson 		SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1908*20a4e154SJeff Roberson 		    "name", CTLFLAG_RD, nokeg, "Keg name");
1909*20a4e154SJeff Roberson 
1910*20a4e154SJeff Roberson 	/*
1911*20a4e154SJeff Roberson 	 * Information about zone limits.
1912*20a4e154SJeff Roberson 	 */
1913*20a4e154SJeff Roberson 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
1914*20a4e154SJeff Roberson 	    "limit", CTLFLAG_RD, NULL, "");
1915*20a4e154SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1916*20a4e154SJeff Roberson 	    "items", CTLFLAG_RD, &zone->uz_items, 0,
1917*20a4e154SJeff Roberson 	    "current number of cached items");
1918*20a4e154SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1919*20a4e154SJeff Roberson 	    "max_items", CTLFLAG_RD, &zone->uz_max_items, 0,
1920*20a4e154SJeff Roberson 	    "Maximum number of cached items");
1921*20a4e154SJeff Roberson 	SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1922*20a4e154SJeff Roberson 	    "sleepers", CTLFLAG_RD, &zone->uz_sleepers, 0,
1923*20a4e154SJeff Roberson 	    "Number of threads sleeping at limit");
1924*20a4e154SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1925*20a4e154SJeff Roberson 	    "sleeps", CTLFLAG_RD, &zone->uz_sleeps, 0,
1926*20a4e154SJeff Roberson 	    "Total zone limit sleeps");
1927*20a4e154SJeff Roberson 
1928*20a4e154SJeff Roberson 	/*
1929*20a4e154SJeff Roberson 	 * Per-domain information.
1930*20a4e154SJeff Roberson 	 */
1931*20a4e154SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
1932*20a4e154SJeff Roberson 		domains = vm_ndomains;
1933*20a4e154SJeff Roberson 	else
1934*20a4e154SJeff Roberson 		domains = 1;
1935*20a4e154SJeff Roberson 	domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid),
1936*20a4e154SJeff Roberson 	    OID_AUTO, "domain", CTLFLAG_RD, NULL, "");
1937*20a4e154SJeff Roberson 	for (i = 0; i < domains; i++) {
1938*20a4e154SJeff Roberson 		zdom = &zone->uz_domain[i];
1939*20a4e154SJeff Roberson 		oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
1940*20a4e154SJeff Roberson 		    OID_AUTO, VM_DOMAIN(i)->vmd_name, CTLFLAG_RD, NULL, "");
1941*20a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1942*20a4e154SJeff Roberson 		    "nitems", CTLFLAG_RD, &zdom->uzd_nitems,
1943*20a4e154SJeff Roberson 		    "number of items in this domain");
1944*20a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1945*20a4e154SJeff Roberson 		    "imax", CTLFLAG_RD, &zdom->uzd_imax,
1946*20a4e154SJeff Roberson 		    "maximum item count in this period");
1947*20a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1948*20a4e154SJeff Roberson 		    "imin", CTLFLAG_RD, &zdom->uzd_imin,
1949*20a4e154SJeff Roberson 		    "minimum item count in this period");
1950*20a4e154SJeff Roberson 		SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1951*20a4e154SJeff Roberson 		    "wss", CTLFLAG_RD, &zdom->uzd_wss,
1952*20a4e154SJeff Roberson 		    "Working set size");
1953*20a4e154SJeff Roberson 	}
1954*20a4e154SJeff Roberson 
1955*20a4e154SJeff Roberson 	/*
1956*20a4e154SJeff Roberson 	 * General statistics.
1957*20a4e154SJeff Roberson 	 */
1958*20a4e154SJeff Roberson 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
1959*20a4e154SJeff Roberson 	    "stats", CTLFLAG_RD, NULL, "");
1960*20a4e154SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1961*20a4e154SJeff Roberson 	    "current", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE,
1962*20a4e154SJeff Roberson 	    zone, 1, sysctl_handle_uma_zone_cur, "I",
1963*20a4e154SJeff Roberson 	    "Current number of allocated items");
1964*20a4e154SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1965*20a4e154SJeff Roberson 	    "allocs", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
1966*20a4e154SJeff Roberson 	    zone, 0, sysctl_handle_uma_zone_allocs, "QU",
1967*20a4e154SJeff Roberson 	    "Total allocation calls");
1968*20a4e154SJeff Roberson 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1969*20a4e154SJeff Roberson 	    "frees", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
1970*20a4e154SJeff Roberson 	    zone, 0, sysctl_handle_uma_zone_frees, "QU",
1971*20a4e154SJeff Roberson 	    "Total free calls");
1972*20a4e154SJeff Roberson 	SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1973*20a4e154SJeff Roberson 	    "fails", CTLFLAG_RD, &zone->uz_fails,
1974*20a4e154SJeff Roberson 	    "Number of allocation failures");
1975*20a4e154SJeff Roberson 	SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
1976*20a4e154SJeff Roberson 	    "xdomain", CTLFLAG_RD, &zone->uz_xdomain, 0,
1977*20a4e154SJeff Roberson 	    "Free calls from the wrong domain");
1978*20a4e154SJeff Roberson }
1979*20a4e154SJeff Roberson 
1980*20a4e154SJeff Roberson struct uma_zone_count {
1981*20a4e154SJeff Roberson 	const char	*name;
1982*20a4e154SJeff Roberson 	int		count;
1983*20a4e154SJeff Roberson };
1984*20a4e154SJeff Roberson 
1985*20a4e154SJeff Roberson static void
1986*20a4e154SJeff Roberson zone_count(uma_zone_t zone, void *arg)
1987*20a4e154SJeff Roberson {
1988*20a4e154SJeff Roberson 	struct uma_zone_count *cnt;
1989*20a4e154SJeff Roberson 
1990*20a4e154SJeff Roberson 	cnt = arg;
1991*20a4e154SJeff Roberson 	if (strcmp(zone->uz_name, cnt->name) == 0)
1992*20a4e154SJeff Roberson 		cnt->count++;
1993*20a4e154SJeff Roberson }
1994*20a4e154SJeff Roberson 
1995099a0e58SBosko Milekic /*
1996099a0e58SBosko Milekic  * Zone header ctor.  This initializes all fields, locks, etc.
1997099a0e58SBosko Milekic  *
1998099a0e58SBosko Milekic  * Arguments/Returns follow uma_ctor specifications
1999099a0e58SBosko Milekic  *	udata  Actually uma_zctor_args
20008355f576SJeff Roberson  */
2001b23f72e9SBrian Feldman static int
2002b23f72e9SBrian Feldman zone_ctor(void *mem, int size, void *udata, int flags)
20038355f576SJeff Roberson {
2004*20a4e154SJeff Roberson 	struct uma_zone_count cnt;
20058355f576SJeff Roberson 	struct uma_zctor_args *arg = udata;
20068355f576SJeff Roberson 	uma_zone_t zone = mem;
2007099a0e58SBosko Milekic 	uma_zone_t z;
2008099a0e58SBosko Milekic 	uma_keg_t keg;
200908cfa56eSMark Johnston 	int i;
20108355f576SJeff Roberson 
20118355f576SJeff Roberson 	bzero(zone, size);
20128355f576SJeff Roberson 	zone->uz_name = arg->name;
20138355f576SJeff Roberson 	zone->uz_ctor = arg->ctor;
20148355f576SJeff Roberson 	zone->uz_dtor = arg->dtor;
2015099a0e58SBosko Milekic 	zone->uz_init = NULL;
2016099a0e58SBosko Milekic 	zone->uz_fini = NULL;
2017bf965959SSean Bruno 	zone->uz_sleeps = 0;
2018c1685086SJeff Roberson 	zone->uz_xdomain = 0;
2019*20a4e154SJeff Roberson 	zone->uz_bucket_size = 0;
2020*20a4e154SJeff Roberson 	zone->uz_bucket_size_min = 0;
2021*20a4e154SJeff Roberson 	zone->uz_bucket_size_max = BUCKET_MAX;
2022e20a199fSJeff Roberson 	zone->uz_flags = 0;
20232f891cd5SPawel Jakub Dawidek 	zone->uz_warning = NULL;
2024ab3185d1SJeff Roberson 	/* The domain structures follow the cpu structures. */
2025ab3185d1SJeff Roberson 	zone->uz_domain = (struct uma_zone_domain *)&zone->uz_cpu[mp_ncpus];
2026bb15d1c7SGleb Smirnoff 	zone->uz_bkt_max = ULONG_MAX;
20272f891cd5SPawel Jakub Dawidek 	timevalclear(&zone->uz_ratecheck);
2028af526374SJeff Roberson 
2029*20a4e154SJeff Roberson 	/* Count the number of duplicate names. */
2030*20a4e154SJeff Roberson 	cnt.name = arg->name;
2031*20a4e154SJeff Roberson 	cnt.count = 0;
2032*20a4e154SJeff Roberson 	zone_foreach(zone_count, &cnt);
2033*20a4e154SJeff Roberson 	zone->uz_namecnt = cnt.count;
20342efcc8cbSGleb Smirnoff 
203508cfa56eSMark Johnston 	for (i = 0; i < vm_ndomains; i++)
203608cfa56eSMark Johnston 		TAILQ_INIT(&zone->uz_domain[i].uzd_buckets);
203708cfa56eSMark Johnston 
2038ca293436SRyan Libby #ifdef INVARIANTS
2039ca293436SRyan Libby 	if (arg->uminit == trash_init && arg->fini == trash_fini)
2040ca293436SRyan Libby 		zone->uz_flags |= UMA_ZFLAG_TRASH;
2041ca293436SRyan Libby #endif
2042ca293436SRyan Libby 
20430095a784SJeff Roberson 	/*
20440095a784SJeff Roberson 	 * This is a pure cache zone, no kegs.
20450095a784SJeff Roberson 	 */
20460095a784SJeff Roberson 	if (arg->import) {
20476fd34d6fSJeff Roberson 		if (arg->flags & UMA_ZONE_VM)
20486fd34d6fSJeff Roberson 			arg->flags |= UMA_ZFLAG_CACHEONLY;
20496fd34d6fSJeff Roberson 		zone->uz_flags = arg->flags;
2050af526374SJeff Roberson 		zone->uz_size = arg->size;
20510095a784SJeff Roberson 		zone->uz_import = arg->import;
20520095a784SJeff Roberson 		zone->uz_release = arg->release;
20530095a784SJeff Roberson 		zone->uz_arg = arg->arg;
2054af526374SJeff Roberson 		zone->uz_lockptr = &zone->uz_lock;
2055bb15d1c7SGleb Smirnoff 		ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
2056111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
205703175483SAlexander Motin 		LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
2058111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
2059af526374SJeff Roberson 		goto out;
20600095a784SJeff Roberson 	}
20610095a784SJeff Roberson 
20620095a784SJeff Roberson 	/*
20630095a784SJeff Roberson 	 * Use the regular zone/keg/slab allocator.
20640095a784SJeff Roberson 	 */
20650095a784SJeff Roberson 	zone->uz_import = (uma_import)zone_import;
20660095a784SJeff Roberson 	zone->uz_release = (uma_release)zone_release;
20670095a784SJeff Roberson 	zone->uz_arg = zone;
2068bb15d1c7SGleb Smirnoff 	keg = arg->keg;
20690095a784SJeff Roberson 
2070099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_SECONDARY) {
2071*20a4e154SJeff Roberson 		KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
2072*20a4e154SJeff Roberson 		    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
2073099a0e58SBosko Milekic 		KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
20748355f576SJeff Roberson 		zone->uz_init = arg->uminit;
2075e221e841SJeff Roberson 		zone->uz_fini = arg->fini;
2076af526374SJeff Roberson 		zone->uz_lockptr = &keg->uk_lock;
2077e20a199fSJeff Roberson 		zone->uz_flags |= UMA_ZONE_SECONDARY;
2078111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
2079099a0e58SBosko Milekic 		ZONE_LOCK(zone);
2080099a0e58SBosko Milekic 		LIST_FOREACH(z, &keg->uk_zones, uz_link) {
2081099a0e58SBosko Milekic 			if (LIST_NEXT(z, uz_link) == NULL) {
2082099a0e58SBosko Milekic 				LIST_INSERT_AFTER(z, zone, uz_link);
2083099a0e58SBosko Milekic 				break;
2084099a0e58SBosko Milekic 			}
2085099a0e58SBosko Milekic 		}
2086099a0e58SBosko Milekic 		ZONE_UNLOCK(zone);
2087111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
2088e20a199fSJeff Roberson 	} else if (keg == NULL) {
2089e20a199fSJeff Roberson 		if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
2090e20a199fSJeff Roberson 		    arg->align, arg->flags)) == NULL)
2091b23f72e9SBrian Feldman 			return (ENOMEM);
2092099a0e58SBosko Milekic 	} else {
2093099a0e58SBosko Milekic 		struct uma_kctor_args karg;
2094b23f72e9SBrian Feldman 		int error;
2095099a0e58SBosko Milekic 
2096099a0e58SBosko Milekic 		/* We should only be here from uma_startup() */
2097099a0e58SBosko Milekic 		karg.size = arg->size;
2098099a0e58SBosko Milekic 		karg.uminit = arg->uminit;
2099099a0e58SBosko Milekic 		karg.fini = arg->fini;
2100099a0e58SBosko Milekic 		karg.align = arg->align;
2101099a0e58SBosko Milekic 		karg.flags = arg->flags;
2102099a0e58SBosko Milekic 		karg.zone = zone;
2103b23f72e9SBrian Feldman 		error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
2104b23f72e9SBrian Feldman 		    flags);
2105b23f72e9SBrian Feldman 		if (error)
2106b23f72e9SBrian Feldman 			return (error);
2107099a0e58SBosko Milekic 	}
21080095a784SJeff Roberson 
2109*20a4e154SJeff Roberson 	/* Inherit properties from the keg. */
2110bb15d1c7SGleb Smirnoff 	zone->uz_keg = keg;
2111e20a199fSJeff Roberson 	zone->uz_size = keg->uk_size;
2112e20a199fSJeff Roberson 	zone->uz_flags |= (keg->uk_flags &
2113e20a199fSJeff Roberson 	    (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
21148355f576SJeff Roberson 
2115*20a4e154SJeff Roberson out:
2116*20a4e154SJeff Roberson 	if (__predict_true(booted == BOOT_RUNNING)) {
2117*20a4e154SJeff Roberson 		zone_alloc_counters(zone, NULL);
2118*20a4e154SJeff Roberson 		zone_alloc_sysctl(zone, NULL);
2119*20a4e154SJeff Roberson 	} else {
2120*20a4e154SJeff Roberson 		zone->uz_allocs = EARLY_COUNTER;
2121*20a4e154SJeff Roberson 		zone->uz_frees = EARLY_COUNTER;
2122*20a4e154SJeff Roberson 		zone->uz_fails = EARLY_COUNTER;
2123099a0e58SBosko Milekic 	}
21248355f576SJeff Roberson 
21257e28037aSMark Johnston 	KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) !=
21267e28037aSMark Johnston 	    (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET),
21277e28037aSMark Johnston 	    ("Invalid zone flag combination"));
2128*20a4e154SJeff Roberson 	if (arg->flags & UMA_ZFLAG_INTERNAL)
2129*20a4e154SJeff Roberson 		zone->uz_bucket_size_max = zone->uz_bucket_size = 0;
2130*20a4e154SJeff Roberson 	if ((arg->flags & UMA_ZONE_MAXBUCKET) != 0)
2131*20a4e154SJeff Roberson 		zone->uz_bucket_size = BUCKET_MAX;
2132*20a4e154SJeff Roberson 	else if ((arg->flags & UMA_ZONE_MINBUCKET) != 0)
2133*20a4e154SJeff Roberson 		zone->uz_bucket_size_max = zone->uz_bucket_size = BUCKET_MIN;
2134*20a4e154SJeff Roberson 	else if ((arg->flags & UMA_ZONE_NOBUCKET) != 0)
2135*20a4e154SJeff Roberson 		zone->uz_bucket_size = 0;
21367e28037aSMark Johnston 	else
2137*20a4e154SJeff Roberson 		zone->uz_bucket_size = bucket_select(zone->uz_size);
2138*20a4e154SJeff Roberson 	zone->uz_bucket_size_min = zone->uz_bucket_size;
2139fc03d22bSJeff Roberson 
2140b23f72e9SBrian Feldman 	return (0);
21418355f576SJeff Roberson }
21428355f576SJeff Roberson 
21438355f576SJeff Roberson /*
2144099a0e58SBosko Milekic  * Keg header dtor.  This frees all data, destroys locks, frees the hash
2145099a0e58SBosko Milekic  * table and removes the keg from the global list.
21469c2cd7e5SJeff Roberson  *
21479c2cd7e5SJeff Roberson  * Arguments/Returns follow uma_dtor specifications
21489c2cd7e5SJeff Roberson  *	udata  unused
21499c2cd7e5SJeff Roberson  */
2150099a0e58SBosko Milekic static void
2151099a0e58SBosko Milekic keg_dtor(void *arg, int size, void *udata)
2152099a0e58SBosko Milekic {
2153099a0e58SBosko Milekic 	uma_keg_t keg;
21549c2cd7e5SJeff Roberson 
2155099a0e58SBosko Milekic 	keg = (uma_keg_t)arg;
2156e20a199fSJeff Roberson 	KEG_LOCK(keg);
2157099a0e58SBosko Milekic 	if (keg->uk_free != 0) {
2158a3845534SCraig Rodrigues 		printf("Freed UMA keg (%s) was not empty (%d items). "
2159099a0e58SBosko Milekic 		    " Lost %d pages of memory.\n",
2160a3845534SCraig Rodrigues 		    keg->uk_name ? keg->uk_name : "",
2161099a0e58SBosko Milekic 		    keg->uk_free, keg->uk_pages);
2162099a0e58SBosko Milekic 	}
2163e20a199fSJeff Roberson 	KEG_UNLOCK(keg);
2164099a0e58SBosko Milekic 
2165099a0e58SBosko Milekic 	hash_free(&keg->uk_hash);
2166099a0e58SBosko Milekic 
2167e20a199fSJeff Roberson 	KEG_LOCK_FINI(keg);
2168099a0e58SBosko Milekic }
2169099a0e58SBosko Milekic 
2170099a0e58SBosko Milekic /*
2171099a0e58SBosko Milekic  * Zone header dtor.
2172099a0e58SBosko Milekic  *
2173099a0e58SBosko Milekic  * Arguments/Returns follow uma_dtor specifications
2174099a0e58SBosko Milekic  *	udata  unused
2175099a0e58SBosko Milekic  */
21769c2cd7e5SJeff Roberson static void
21779c2cd7e5SJeff Roberson zone_dtor(void *arg, int size, void *udata)
21789c2cd7e5SJeff Roberson {
21799c2cd7e5SJeff Roberson 	uma_zone_t zone;
2180099a0e58SBosko Milekic 	uma_keg_t keg;
21819c2cd7e5SJeff Roberson 
21829c2cd7e5SJeff Roberson 	zone = (uma_zone_t)arg;
21839643769aSJeff Roberson 
2184*20a4e154SJeff Roberson 	sysctl_remove_oid(zone->uz_oid, 1, 1);
2185*20a4e154SJeff Roberson 
2186e20a199fSJeff Roberson 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
21879643769aSJeff Roberson 		cache_drain(zone);
2188099a0e58SBosko Milekic 
2189111fbcd5SBryan Venteicher 	rw_wlock(&uma_rwlock);
2190099a0e58SBosko Milekic 	LIST_REMOVE(zone, uz_link);
2191111fbcd5SBryan Venteicher 	rw_wunlock(&uma_rwlock);
2192099a0e58SBosko Milekic 	/*
2193099a0e58SBosko Milekic 	 * XXX there are some races here where
2194099a0e58SBosko Milekic 	 * the zone can be drained but zone lock
2195099a0e58SBosko Milekic 	 * released and then refilled before we
2196099a0e58SBosko Milekic 	 * remove it... we dont care for now
2197099a0e58SBosko Milekic 	 */
219808cfa56eSMark Johnston 	zone_reclaim(zone, M_WAITOK, true);
2199e20a199fSJeff Roberson 	/*
2200323ad386STycho Nightingale 	 * We only destroy kegs from non secondary/non cache zones.
2201e20a199fSJeff Roberson 	 */
2202323ad386STycho Nightingale 	if ((zone->uz_flags & (UMA_ZONE_SECONDARY | UMA_ZFLAG_CACHE)) == 0) {
2203323ad386STycho Nightingale 		keg = zone->uz_keg;
2204111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
2205099a0e58SBosko Milekic 		LIST_REMOVE(keg, uk_link);
2206111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
22070095a784SJeff Roberson 		zone_free_item(kegs, keg, NULL, SKIP_NONE);
22089c2cd7e5SJeff Roberson 	}
22092efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_allocs);
22102efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_frees);
22112efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_fails);
2212*20a4e154SJeff Roberson 	free(zone->uz_ctlname, M_UMA);
2213bb15d1c7SGleb Smirnoff 	if (zone->uz_lockptr == &zone->uz_lock)
2214af526374SJeff Roberson 		ZONE_LOCK_FINI(zone);
2215099a0e58SBosko Milekic }
2216099a0e58SBosko Milekic 
22179c2cd7e5SJeff Roberson /*
22188355f576SJeff Roberson  * Traverses every zone in the system and calls a callback
22198355f576SJeff Roberson  *
22208355f576SJeff Roberson  * Arguments:
22218355f576SJeff Roberson  *	zfunc  A pointer to a function which accepts a zone
22228355f576SJeff Roberson  *		as an argument.
22238355f576SJeff Roberson  *
22248355f576SJeff Roberson  * Returns:
22258355f576SJeff Roberson  *	Nothing
22268355f576SJeff Roberson  */
22278355f576SJeff Roberson static void
2228*20a4e154SJeff Roberson zone_foreach(void (*zfunc)(uma_zone_t, void *arg), void *arg)
22298355f576SJeff Roberson {
2230099a0e58SBosko Milekic 	uma_keg_t keg;
22318355f576SJeff Roberson 	uma_zone_t zone;
22328355f576SJeff Roberson 
22332efcc8cbSGleb Smirnoff 	/*
22342efcc8cbSGleb Smirnoff 	 * Before BOOT_RUNNING we are guaranteed to be single
22352efcc8cbSGleb Smirnoff 	 * threaded, so locking isn't needed. Startup functions
22362efcc8cbSGleb Smirnoff 	 * are allowed to use M_WAITOK.
22372efcc8cbSGleb Smirnoff 	 */
22382efcc8cbSGleb Smirnoff 	if (__predict_true(booted == BOOT_RUNNING))
2239111fbcd5SBryan Venteicher 		rw_rlock(&uma_rwlock);
2240099a0e58SBosko Milekic 	LIST_FOREACH(keg, &uma_kegs, uk_link) {
2241099a0e58SBosko Milekic 		LIST_FOREACH(zone, &keg->uk_zones, uz_link)
2242*20a4e154SJeff Roberson 			zfunc(zone, arg);
2243099a0e58SBosko Milekic 	}
224408034d10SKonstantin Belousov 	LIST_FOREACH(zone, &uma_cachezones, uz_link)
2245*20a4e154SJeff Roberson 		zfunc(zone, arg);
22462efcc8cbSGleb Smirnoff 	if (__predict_true(booted == BOOT_RUNNING))
2247111fbcd5SBryan Venteicher 		rw_runlock(&uma_rwlock);
22488355f576SJeff Roberson }
22498355f576SJeff Roberson 
2250f4bef67cSGleb Smirnoff /*
2251f4bef67cSGleb Smirnoff  * Count how many pages do we need to bootstrap.  VM supplies
2252f4bef67cSGleb Smirnoff  * its need in early zones in the argument, we add up our zones,
2253f4bef67cSGleb Smirnoff  * which consist of: UMA Slabs, UMA Hash and 9 Bucket zones. The
2254f4bef67cSGleb Smirnoff  * zone of zones and zone of kegs are accounted separately.
2255f4bef67cSGleb Smirnoff  */
2256f4bef67cSGleb Smirnoff #define	UMA_BOOT_ZONES	11
22575073a083SGleb Smirnoff /* Zone of zones and zone of kegs have arbitrary alignment. */
22585073a083SGleb Smirnoff #define	UMA_BOOT_ALIGN	32
2259f4bef67cSGleb Smirnoff static int zsize, ksize;
2260f4bef67cSGleb Smirnoff int
2261f7d35785SGleb Smirnoff uma_startup_count(int vm_zones)
2262f4bef67cSGleb Smirnoff {
2263f7d35785SGleb Smirnoff 	int zones, pages;
2264f4bef67cSGleb Smirnoff 
2265f4bef67cSGleb Smirnoff 	ksize = sizeof(struct uma_keg) +
2266f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_domain) * vm_ndomains);
2267f4bef67cSGleb Smirnoff 	zsize = sizeof(struct uma_zone) +
2268f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_cache) * (mp_maxid + 1)) +
2269f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_zone_domain) * vm_ndomains);
2270f4bef67cSGleb Smirnoff 
22715073a083SGleb Smirnoff 	/*
22725073a083SGleb Smirnoff 	 * Memory for the zone of kegs and its keg,
22735073a083SGleb Smirnoff 	 * and for zone of zones.
22745073a083SGleb Smirnoff 	 */
2275f4bef67cSGleb Smirnoff 	pages = howmany(roundup(zsize, CACHE_LINE_SIZE) * 2 +
2276f4bef67cSGleb Smirnoff 	    roundup(ksize, CACHE_LINE_SIZE), PAGE_SIZE);
2277f4bef67cSGleb Smirnoff 
2278f7d35785SGleb Smirnoff #ifdef	UMA_MD_SMALL_ALLOC
2279f7d35785SGleb Smirnoff 	zones = UMA_BOOT_ZONES;
2280f7d35785SGleb Smirnoff #else
2281f7d35785SGleb Smirnoff 	zones = UMA_BOOT_ZONES + vm_zones;
2282f7d35785SGleb Smirnoff 	vm_zones = 0;
2283f7d35785SGleb Smirnoff #endif
2284f4bef67cSGleb Smirnoff 
22855073a083SGleb Smirnoff 	/* Memory for the rest of startup zones, UMA and VM, ... */
22860b2e3aeaSGleb Smirnoff 	if (zsize > UMA_SLAB_SPACE) {
22870b2e3aeaSGleb Smirnoff 		/* See keg_large_init(). */
22880b2e3aeaSGleb Smirnoff 		u_int ppera;
22890b2e3aeaSGleb Smirnoff 
22900b2e3aeaSGleb Smirnoff 		ppera = howmany(roundup2(zsize, UMA_BOOT_ALIGN), PAGE_SIZE);
22910b2e3aeaSGleb Smirnoff 		if (PAGE_SIZE * ppera - roundup2(zsize, UMA_BOOT_ALIGN) <
22920b2e3aeaSGleb Smirnoff 		    SIZEOF_UMA_SLAB)
22930b2e3aeaSGleb Smirnoff 			ppera++;
22940b2e3aeaSGleb Smirnoff 		pages += (zones + vm_zones) * ppera;
22950b2e3aeaSGleb Smirnoff 	} else if (roundup2(zsize, UMA_BOOT_ALIGN) > UMA_SLAB_SPACE)
22960b2e3aeaSGleb Smirnoff 		/* See keg_small_init() special case for uk_ppera = 1. */
229796a10340SGleb Smirnoff 		pages += zones;
2298f4bef67cSGleb Smirnoff 	else
22995073a083SGleb Smirnoff 		pages += howmany(zones,
23005073a083SGleb Smirnoff 		    UMA_SLAB_SPACE / roundup2(zsize, UMA_BOOT_ALIGN));
2301f4bef67cSGleb Smirnoff 
23025073a083SGleb Smirnoff 	/* ... and their kegs. Note that zone of zones allocates a keg! */
23035073a083SGleb Smirnoff 	pages += howmany(zones + 1,
23045073a083SGleb Smirnoff 	    UMA_SLAB_SPACE / roundup2(ksize, UMA_BOOT_ALIGN));
2305f4bef67cSGleb Smirnoff 
2306f4bef67cSGleb Smirnoff 	/*
23075073a083SGleb Smirnoff 	 * Most of startup zones are not going to be offpages, that's
23085073a083SGleb Smirnoff 	 * why we use UMA_SLAB_SPACE instead of UMA_SLAB_SIZE in all
23095073a083SGleb Smirnoff 	 * calculations.  Some large bucket zones will be offpage, and
23105073a083SGleb Smirnoff 	 * thus will allocate hashes.  We take conservative approach
23115073a083SGleb Smirnoff 	 * and assume that all zones may allocate hash.  This may give
23125073a083SGleb Smirnoff 	 * us some positive inaccuracy, usually an extra single page.
2313f4bef67cSGleb Smirnoff 	 */
23145073a083SGleb Smirnoff 	pages += howmany(zones, UMA_SLAB_SPACE /
2315d2be4a1eSGleb Smirnoff 	    (sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT));
2316f4bef67cSGleb Smirnoff 
2317f4bef67cSGleb Smirnoff 	return (pages);
2318f4bef67cSGleb Smirnoff }
2319f4bef67cSGleb Smirnoff 
23208355f576SJeff Roberson void
2321ac0a6fd0SGleb Smirnoff uma_startup(void *mem, int npages)
23228355f576SJeff Roberson {
23238355f576SJeff Roberson 	struct uma_zctor_args args;
2324ab3185d1SJeff Roberson 	uma_keg_t masterkeg;
2325ab3185d1SJeff Roberson 	uintptr_t m;
2326f4bef67cSGleb Smirnoff 
2327f4bef67cSGleb Smirnoff #ifdef DIAGNOSTIC
2328f4bef67cSGleb Smirnoff 	printf("Entering %s with %d boot pages configured\n", __func__, npages);
2329f4bef67cSGleb Smirnoff #endif
23308355f576SJeff Roberson 
2331111fbcd5SBryan Venteicher 	rw_init(&uma_rwlock, "UMA lock");
2332099a0e58SBosko Milekic 
2333ab3185d1SJeff Roberson 	/* Use bootpages memory for the zone of zones and zone of kegs. */
2334ab3185d1SJeff Roberson 	m = (uintptr_t)mem;
2335ab3185d1SJeff Roberson 	zones = (uma_zone_t)m;
2336ab3185d1SJeff Roberson 	m += roundup(zsize, CACHE_LINE_SIZE);
2337ab3185d1SJeff Roberson 	kegs = (uma_zone_t)m;
2338ab3185d1SJeff Roberson 	m += roundup(zsize, CACHE_LINE_SIZE);
2339ab3185d1SJeff Roberson 	masterkeg = (uma_keg_t)m;
2340ab3185d1SJeff Roberson 	m += roundup(ksize, CACHE_LINE_SIZE);
2341ab3185d1SJeff Roberson 	m = roundup(m, PAGE_SIZE);
2342ab3185d1SJeff Roberson 	npages -= (m - (uintptr_t)mem) / PAGE_SIZE;
2343ab3185d1SJeff Roberson 	mem = (void *)m;
2344ab3185d1SJeff Roberson 
2345099a0e58SBosko Milekic 	/* "manually" create the initial zone */
23460095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
2347099a0e58SBosko Milekic 	args.name = "UMA Kegs";
2348ab3185d1SJeff Roberson 	args.size = ksize;
2349099a0e58SBosko Milekic 	args.ctor = keg_ctor;
2350099a0e58SBosko Milekic 	args.dtor = keg_dtor;
23518355f576SJeff Roberson 	args.uminit = zero_init;
23528355f576SJeff Roberson 	args.fini = NULL;
2353ab3185d1SJeff Roberson 	args.keg = masterkeg;
23545073a083SGleb Smirnoff 	args.align = UMA_BOOT_ALIGN - 1;
2355b60f5b79SJeff Roberson 	args.flags = UMA_ZFLAG_INTERNAL;
2356ab3185d1SJeff Roberson 	zone_ctor(kegs, zsize, &args, M_WAITOK);
23578355f576SJeff Roberson 
2358ac0a6fd0SGleb Smirnoff 	bootmem = mem;
2359ac0a6fd0SGleb Smirnoff 	boot_pages = npages;
23608355f576SJeff Roberson 
2361099a0e58SBosko Milekic 	args.name = "UMA Zones";
2362f4bef67cSGleb Smirnoff 	args.size = zsize;
2363099a0e58SBosko Milekic 	args.ctor = zone_ctor;
2364099a0e58SBosko Milekic 	args.dtor = zone_dtor;
2365099a0e58SBosko Milekic 	args.uminit = zero_init;
2366099a0e58SBosko Milekic 	args.fini = NULL;
2367099a0e58SBosko Milekic 	args.keg = NULL;
23685073a083SGleb Smirnoff 	args.align = UMA_BOOT_ALIGN - 1;
2369099a0e58SBosko Milekic 	args.flags = UMA_ZFLAG_INTERNAL;
2370ab3185d1SJeff Roberson 	zone_ctor(zones, zsize, &args, M_WAITOK);
2371099a0e58SBosko Milekic 
23728355f576SJeff Roberson 	/* Now make a zone for slab headers */
23738355f576SJeff Roberson 	slabzone = uma_zcreate("UMA Slabs",
2374ef72505eSJeff Roberson 				sizeof(struct uma_slab),
23758355f576SJeff Roberson 				NULL, NULL, NULL, NULL,
2376b60f5b79SJeff Roberson 				UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
23778355f576SJeff Roberson 
23788355f576SJeff Roberson 	hashzone = uma_zcreate("UMA Hash",
23798355f576SJeff Roberson 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
23808355f576SJeff Roberson 	    NULL, NULL, NULL, NULL,
2381b60f5b79SJeff Roberson 	    UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
23828355f576SJeff Roberson 
2383cae33c14SJeff Roberson 	bucket_init();
23848355f576SJeff Roberson 
2385f4bef67cSGleb Smirnoff 	booted = BOOT_STRAPPED;
23868355f576SJeff Roberson }
23878355f576SJeff Roberson 
2388f4bef67cSGleb Smirnoff void
2389f4bef67cSGleb Smirnoff uma_startup1(void)
2390f4bef67cSGleb Smirnoff {
2391f4bef67cSGleb Smirnoff 
2392f4bef67cSGleb Smirnoff #ifdef DIAGNOSTIC
2393f4bef67cSGleb Smirnoff 	printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
2394f4bef67cSGleb Smirnoff #endif
2395f4bef67cSGleb Smirnoff 	booted = BOOT_PAGEALLOC;
2396f4bef67cSGleb Smirnoff }
2397f4bef67cSGleb Smirnoff 
23988355f576SJeff Roberson void
239999571dc3SJeff Roberson uma_startup2(void)
24008355f576SJeff Roberson {
2401f4bef67cSGleb Smirnoff 
2402f7d35785SGleb Smirnoff #ifdef DIAGNOSTIC
2403f7d35785SGleb Smirnoff 	printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
2404f7d35785SGleb Smirnoff #endif
2405f4bef67cSGleb Smirnoff 	booted = BOOT_BUCKETS;
240608cfa56eSMark Johnston 	sx_init(&uma_reclaim_lock, "umareclaim");
2407f4bef67cSGleb Smirnoff 	bucket_enable();
24088355f576SJeff Roberson }
24098355f576SJeff Roberson 
24108355f576SJeff Roberson /*
24118355f576SJeff Roberson  * Initialize our callout handle
24128355f576SJeff Roberson  *
24138355f576SJeff Roberson  */
24148355f576SJeff Roberson static void
24158355f576SJeff Roberson uma_startup3(void)
24168355f576SJeff Roberson {
24171431a748SGleb Smirnoff 
2418c5deaf04SGleb Smirnoff #ifdef INVARIANTS
2419c5deaf04SGleb Smirnoff 	TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor);
2420c5deaf04SGleb Smirnoff 	uma_dbg_cnt = counter_u64_alloc(M_WAITOK);
2421c5deaf04SGleb Smirnoff 	uma_skip_cnt = counter_u64_alloc(M_WAITOK);
2422c5deaf04SGleb Smirnoff #endif
2423*20a4e154SJeff Roberson 	zone_foreach(zone_alloc_counters, NULL);
2424*20a4e154SJeff Roberson 	zone_foreach(zone_alloc_sysctl, NULL);
2425fd90e2edSJung-uk Kim 	callout_init(&uma_callout, 1);
24269643769aSJeff Roberson 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
2427c5deaf04SGleb Smirnoff 	booted = BOOT_RUNNING;
24288355f576SJeff Roberson }
24298355f576SJeff Roberson 
2430e20a199fSJeff Roberson static uma_keg_t
2431099a0e58SBosko Milekic uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
243285dcf349SGleb Smirnoff 		int align, uint32_t flags)
2433099a0e58SBosko Milekic {
2434099a0e58SBosko Milekic 	struct uma_kctor_args args;
2435099a0e58SBosko Milekic 
2436099a0e58SBosko Milekic 	args.size = size;
2437099a0e58SBosko Milekic 	args.uminit = uminit;
2438099a0e58SBosko Milekic 	args.fini = fini;
24391e319f6dSRobert Watson 	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
2440099a0e58SBosko Milekic 	args.flags = flags;
2441099a0e58SBosko Milekic 	args.zone = zone;
2442ab3185d1SJeff Roberson 	return (zone_alloc_item(kegs, &args, UMA_ANYDOMAIN, M_WAITOK));
2443099a0e58SBosko Milekic }
2444099a0e58SBosko Milekic 
2445f4bef67cSGleb Smirnoff /* Public functions */
24468355f576SJeff Roberson /* See uma.h */
24471e319f6dSRobert Watson void
24481e319f6dSRobert Watson uma_set_align(int align)
24491e319f6dSRobert Watson {
24501e319f6dSRobert Watson 
24511e319f6dSRobert Watson 	if (align != UMA_ALIGN_CACHE)
24521e319f6dSRobert Watson 		uma_align_cache = align;
24531e319f6dSRobert Watson }
24541e319f6dSRobert Watson 
24551e319f6dSRobert Watson /* See uma.h */
24568355f576SJeff Roberson uma_zone_t
2457bb196eb4SMatthew D Fleming uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
245885dcf349SGleb Smirnoff 		uma_init uminit, uma_fini fini, int align, uint32_t flags)
24598355f576SJeff Roberson 
24608355f576SJeff Roberson {
24618355f576SJeff Roberson 	struct uma_zctor_args args;
246295c4bf75SKonstantin Belousov 	uma_zone_t res;
246395c4bf75SKonstantin Belousov 	bool locked;
24648355f576SJeff Roberson 
2465a5a35578SJohn Baldwin 	KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"",
2466a5a35578SJohn Baldwin 	    align, name));
2467a5a35578SJohn Baldwin 
2468c1685086SJeff Roberson 	/* Sets all zones to a first-touch domain policy. */
2469c1685086SJeff Roberson #ifdef UMA_FIRSTTOUCH
2470c1685086SJeff Roberson 	flags |= UMA_ZONE_NUMA;
2471c1685086SJeff Roberson #endif
2472c1685086SJeff Roberson 
24738355f576SJeff Roberson 	/* This stuff is essential for the zone ctor */
24740095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
24758355f576SJeff Roberson 	args.name = name;
24768355f576SJeff Roberson 	args.size = size;
24778355f576SJeff Roberson 	args.ctor = ctor;
24788355f576SJeff Roberson 	args.dtor = dtor;
24798355f576SJeff Roberson 	args.uminit = uminit;
24808355f576SJeff Roberson 	args.fini = fini;
2481afc6dc36SJohn-Mark Gurney #ifdef  INVARIANTS
2482afc6dc36SJohn-Mark Gurney 	/*
2483ca293436SRyan Libby 	 * Inject procedures which check for memory use after free if we are
2484ca293436SRyan Libby 	 * allowed to scramble the memory while it is not allocated.  This
2485ca293436SRyan Libby 	 * requires that: UMA is actually able to access the memory, no init
2486ca293436SRyan Libby 	 * or fini procedures, no dependency on the initial value of the
2487ca293436SRyan Libby 	 * memory, and no (legitimate) use of the memory after free.  Note,
2488ca293436SRyan Libby 	 * the ctor and dtor do not need to be empty.
2489ca293436SRyan Libby 	 *
2490ca293436SRyan Libby 	 * XXX UMA_ZONE_OFFPAGE.
2491afc6dc36SJohn-Mark Gurney 	 */
249219c591bfSMateusz Guzik 	if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) &&
2493ca293436SRyan Libby 	    uminit == NULL && fini == NULL) {
2494afc6dc36SJohn-Mark Gurney 		args.uminit = trash_init;
2495afc6dc36SJohn-Mark Gurney 		args.fini = trash_fini;
2496afc6dc36SJohn-Mark Gurney 	}
2497afc6dc36SJohn-Mark Gurney #endif
24988355f576SJeff Roberson 	args.align = align;
24998355f576SJeff Roberson 	args.flags = flags;
2500099a0e58SBosko Milekic 	args.keg = NULL;
2501099a0e58SBosko Milekic 
2502f4bef67cSGleb Smirnoff 	if (booted < BOOT_BUCKETS) {
250395c4bf75SKonstantin Belousov 		locked = false;
250495c4bf75SKonstantin Belousov 	} else {
250508cfa56eSMark Johnston 		sx_slock(&uma_reclaim_lock);
250695c4bf75SKonstantin Belousov 		locked = true;
250795c4bf75SKonstantin Belousov 	}
2508ab3185d1SJeff Roberson 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
250995c4bf75SKonstantin Belousov 	if (locked)
251008cfa56eSMark Johnston 		sx_sunlock(&uma_reclaim_lock);
251195c4bf75SKonstantin Belousov 	return (res);
2512099a0e58SBosko Milekic }
2513099a0e58SBosko Milekic 
2514099a0e58SBosko Milekic /* See uma.h */
2515099a0e58SBosko Milekic uma_zone_t
2516099a0e58SBosko Milekic uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
2517099a0e58SBosko Milekic 		    uma_init zinit, uma_fini zfini, uma_zone_t master)
2518099a0e58SBosko Milekic {
2519099a0e58SBosko Milekic 	struct uma_zctor_args args;
2520e20a199fSJeff Roberson 	uma_keg_t keg;
252195c4bf75SKonstantin Belousov 	uma_zone_t res;
252295c4bf75SKonstantin Belousov 	bool locked;
2523099a0e58SBosko Milekic 
2524bb15d1c7SGleb Smirnoff 	keg = master->uz_keg;
25250095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
2526099a0e58SBosko Milekic 	args.name = name;
2527e20a199fSJeff Roberson 	args.size = keg->uk_size;
2528099a0e58SBosko Milekic 	args.ctor = ctor;
2529099a0e58SBosko Milekic 	args.dtor = dtor;
2530099a0e58SBosko Milekic 	args.uminit = zinit;
2531099a0e58SBosko Milekic 	args.fini = zfini;
2532e20a199fSJeff Roberson 	args.align = keg->uk_align;
2533e20a199fSJeff Roberson 	args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
2534e20a199fSJeff Roberson 	args.keg = keg;
25358355f576SJeff Roberson 
2536f4bef67cSGleb Smirnoff 	if (booted < BOOT_BUCKETS) {
253795c4bf75SKonstantin Belousov 		locked = false;
253895c4bf75SKonstantin Belousov 	} else {
253908cfa56eSMark Johnston 		sx_slock(&uma_reclaim_lock);
254095c4bf75SKonstantin Belousov 		locked = true;
254195c4bf75SKonstantin Belousov 	}
2542e20a199fSJeff Roberson 	/* XXX Attaches only one keg of potentially many. */
2543ab3185d1SJeff Roberson 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
254495c4bf75SKonstantin Belousov 	if (locked)
254508cfa56eSMark Johnston 		sx_sunlock(&uma_reclaim_lock);
254695c4bf75SKonstantin Belousov 	return (res);
25478355f576SJeff Roberson }
25488355f576SJeff Roberson 
25490095a784SJeff Roberson /* See uma.h */
25500095a784SJeff Roberson uma_zone_t
2551af526374SJeff Roberson uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
2552af526374SJeff Roberson 		    uma_init zinit, uma_fini zfini, uma_import zimport,
2553af526374SJeff Roberson 		    uma_release zrelease, void *arg, int flags)
25540095a784SJeff Roberson {
25550095a784SJeff Roberson 	struct uma_zctor_args args;
25560095a784SJeff Roberson 
25570095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
25580095a784SJeff Roberson 	args.name = name;
2559af526374SJeff Roberson 	args.size = size;
25600095a784SJeff Roberson 	args.ctor = ctor;
25610095a784SJeff Roberson 	args.dtor = dtor;
25620095a784SJeff Roberson 	args.uminit = zinit;
25630095a784SJeff Roberson 	args.fini = zfini;
25640095a784SJeff Roberson 	args.import = zimport;
25650095a784SJeff Roberson 	args.release = zrelease;
25660095a784SJeff Roberson 	args.arg = arg;
25670095a784SJeff Roberson 	args.align = 0;
2568bb15d1c7SGleb Smirnoff 	args.flags = flags | UMA_ZFLAG_CACHE;
25690095a784SJeff Roberson 
2570ab3185d1SJeff Roberson 	return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
25710095a784SJeff Roberson }
25720095a784SJeff Roberson 
25738355f576SJeff Roberson /* See uma.h */
25749c2cd7e5SJeff Roberson void
25759c2cd7e5SJeff Roberson uma_zdestroy(uma_zone_t zone)
25769c2cd7e5SJeff Roberson {
2577f4ff923bSRobert Watson 
257808cfa56eSMark Johnston 	sx_slock(&uma_reclaim_lock);
25790095a784SJeff Roberson 	zone_free_item(zones, zone, NULL, SKIP_NONE);
258008cfa56eSMark Johnston 	sx_sunlock(&uma_reclaim_lock);
25819c2cd7e5SJeff Roberson }
25829c2cd7e5SJeff Roberson 
25838d6fbbb8SJeff Roberson void
25848d6fbbb8SJeff Roberson uma_zwait(uma_zone_t zone)
25858d6fbbb8SJeff Roberson {
25868d6fbbb8SJeff Roberson 	void *item;
25878d6fbbb8SJeff Roberson 
25888d6fbbb8SJeff Roberson 	item = uma_zalloc_arg(zone, NULL, M_WAITOK);
25898d6fbbb8SJeff Roberson 	uma_zfree(zone, item);
25908d6fbbb8SJeff Roberson }
25918d6fbbb8SJeff Roberson 
25924e180881SMateusz Guzik void *
25934e180881SMateusz Guzik uma_zalloc_pcpu_arg(uma_zone_t zone, void *udata, int flags)
25944e180881SMateusz Guzik {
25954e180881SMateusz Guzik 	void *item;
2596b4799947SRuslan Bukin #ifdef SMP
25974e180881SMateusz Guzik 	int i;
25984e180881SMateusz Guzik 
25994e180881SMateusz Guzik 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
2600b4799947SRuslan Bukin #endif
26014e180881SMateusz Guzik 	item = uma_zalloc_arg(zone, udata, flags & ~M_ZERO);
26024e180881SMateusz Guzik 	if (item != NULL && (flags & M_ZERO)) {
2603b4799947SRuslan Bukin #ifdef SMP
2604013072f0SMark Johnston 		for (i = 0; i <= mp_maxid; i++)
26054e180881SMateusz Guzik 			bzero(zpcpu_get_cpu(item, i), zone->uz_size);
2606b4799947SRuslan Bukin #else
2607b4799947SRuslan Bukin 		bzero(item, zone->uz_size);
2608b4799947SRuslan Bukin #endif
26094e180881SMateusz Guzik 	}
26104e180881SMateusz Guzik 	return (item);
26114e180881SMateusz Guzik }
26124e180881SMateusz Guzik 
26134e180881SMateusz Guzik /*
26144e180881SMateusz Guzik  * A stub while both regular and pcpu cases are identical.
26154e180881SMateusz Guzik  */
26164e180881SMateusz Guzik void
26174e180881SMateusz Guzik uma_zfree_pcpu_arg(uma_zone_t zone, void *item, void *udata)
26184e180881SMateusz Guzik {
26194e180881SMateusz Guzik 
2620c5b7751fSIan Lepore #ifdef SMP
26214e180881SMateusz Guzik 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
2622c5b7751fSIan Lepore #endif
26234e180881SMateusz Guzik 	uma_zfree_arg(zone, item, udata);
26244e180881SMateusz Guzik }
26254e180881SMateusz Guzik 
2626beb8beefSJeff Roberson static inline void *
2627beb8beefSJeff Roberson bucket_pop(uma_zone_t zone, uma_cache_t cache, uma_bucket_t bucket)
2628beb8beefSJeff Roberson {
2629beb8beefSJeff Roberson 	void *item;
2630beb8beefSJeff Roberson 
2631beb8beefSJeff Roberson 	bucket->ub_cnt--;
2632beb8beefSJeff Roberson 	item = bucket->ub_bucket[bucket->ub_cnt];
2633beb8beefSJeff Roberson #ifdef INVARIANTS
2634beb8beefSJeff Roberson 	bucket->ub_bucket[bucket->ub_cnt] = NULL;
2635beb8beefSJeff Roberson 	KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
2636beb8beefSJeff Roberson #endif
2637beb8beefSJeff Roberson 	cache->uc_allocs++;
2638beb8beefSJeff Roberson 
2639beb8beefSJeff Roberson 	return (item);
2640beb8beefSJeff Roberson }
2641beb8beefSJeff Roberson 
26420a81b439SJeff Roberson static inline void
26430a81b439SJeff Roberson bucket_push(uma_zone_t zone, uma_cache_t cache, uma_bucket_t bucket,
26440a81b439SJeff Roberson     void *item)
26450a81b439SJeff Roberson {
26460a81b439SJeff Roberson 	KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
26470a81b439SJeff Roberson 	    ("uma_zfree: Freeing to non free bucket index."));
26480a81b439SJeff Roberson 	bucket->ub_bucket[bucket->ub_cnt] = item;
26490a81b439SJeff Roberson 	bucket->ub_cnt++;
26500a81b439SJeff Roberson 	cache->uc_frees++;
26510a81b439SJeff Roberson }
26520a81b439SJeff Roberson 
2653beb8beefSJeff Roberson static void *
2654beb8beefSJeff Roberson item_ctor(uma_zone_t zone, void *udata, int flags, void *item)
2655beb8beefSJeff Roberson {
2656beb8beefSJeff Roberson #ifdef INVARIANTS
2657ca293436SRyan Libby 	bool skipdbg;
2658beb8beefSJeff Roberson 
2659beb8beefSJeff Roberson 	skipdbg = uma_dbg_zskip(zone, item);
2660ca293436SRyan Libby 	if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
2661ca293436SRyan Libby 	    zone->uz_ctor != trash_ctor)
2662ca293436SRyan Libby 		trash_ctor(item, zone->uz_size, udata, flags);
2663beb8beefSJeff Roberson #endif
2664ca293436SRyan Libby 	if (__predict_false(zone->uz_ctor != NULL) &&
2665beb8beefSJeff Roberson 	    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2666beb8beefSJeff Roberson 		counter_u64_add(zone->uz_fails, 1);
2667beb8beefSJeff Roberson 		zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT);
2668beb8beefSJeff Roberson 		return (NULL);
2669beb8beefSJeff Roberson 	}
2670beb8beefSJeff Roberson #ifdef INVARIANTS
2671beb8beefSJeff Roberson 	if (!skipdbg)
2672beb8beefSJeff Roberson 		uma_dbg_alloc(zone, NULL, item);
2673beb8beefSJeff Roberson #endif
2674beb8beefSJeff Roberson 	if (flags & M_ZERO)
2675beb8beefSJeff Roberson 		uma_zero_item(item, zone);
2676beb8beefSJeff Roberson 
2677beb8beefSJeff Roberson 	return (item);
2678beb8beefSJeff Roberson }
2679beb8beefSJeff Roberson 
2680ca293436SRyan Libby static inline void
2681ca293436SRyan Libby item_dtor(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
2682ca293436SRyan Libby {
2683ca293436SRyan Libby #ifdef INVARIANTS
2684ca293436SRyan Libby 	bool skipdbg;
2685ca293436SRyan Libby 
2686ca293436SRyan Libby 	skipdbg = uma_dbg_zskip(zone, item);
2687ca293436SRyan Libby 	if (skip == SKIP_NONE && !skipdbg) {
2688ca293436SRyan Libby 		if ((zone->uz_flags & UMA_ZONE_MALLOC) != 0)
2689ca293436SRyan Libby 			uma_dbg_free(zone, udata, item);
2690ca293436SRyan Libby 		else
2691ca293436SRyan Libby 			uma_dbg_free(zone, NULL, item);
2692ca293436SRyan Libby 	}
2693ca293436SRyan Libby #endif
2694ca293436SRyan Libby 	if (skip < SKIP_DTOR) {
2695ca293436SRyan Libby 		if (zone->uz_dtor != NULL)
2696ca293436SRyan Libby 			zone->uz_dtor(item, zone->uz_size, udata);
2697ca293436SRyan Libby #ifdef INVARIANTS
2698ca293436SRyan Libby 		if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
2699ca293436SRyan Libby 		    zone->uz_dtor != trash_dtor)
2700ca293436SRyan Libby 			trash_dtor(item, zone->uz_size, udata);
2701ca293436SRyan Libby #endif
2702ca293436SRyan Libby 	}
2703ca293436SRyan Libby }
2704ca293436SRyan Libby 
27059c2cd7e5SJeff Roberson /* See uma.h */
27068355f576SJeff Roberson void *
27072cc35ff9SJeff Roberson uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
27088355f576SJeff Roberson {
27098355f576SJeff Roberson 	uma_bucket_t bucket;
2710ab3185d1SJeff Roberson 	uma_cache_t cache;
2711ab3185d1SJeff Roberson 	void *item;
2712beb8beefSJeff Roberson 	int cpu, domain;
27138355f576SJeff Roberson 
2714e866d8f0SMark Murray 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
271519fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
271610cb2424SMark Murray 
27178355f576SJeff Roberson 	/* This is the fast path allocation */
27181431a748SGleb Smirnoff 	CTR4(KTR_UMA, "uma_zalloc_arg thread %x zone %s(%p) flags %d",
27191431a748SGleb Smirnoff 	    curthread, zone->uz_name, zone, flags);
2720a553d4b8SJeff Roberson 
2721635fd505SRobert Watson 	if (flags & M_WAITOK) {
2722b23f72e9SBrian Feldman 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2723635fd505SRobert Watson 		    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
27244c1cc01cSJohn Baldwin 	}
27250766f278SJonathan T. Looney 	KASSERT((flags & M_EXEC) == 0, ("uma_zalloc_arg: called with M_EXEC"));
2726d9e2e68dSMark Johnston 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
27271067a2baSJonathan T. Looney 	    ("uma_zalloc_arg: called with spinlock or critical section held"));
2728ea99223eSMateusz Guzik 	if (zone->uz_flags & UMA_ZONE_PCPU)
2729b8af2820SMateusz Guzik 		KASSERT((flags & M_ZERO) == 0, ("allocating from a pcpu zone "
2730b8af2820SMateusz Guzik 		    "with M_ZERO passed"));
27311067a2baSJonathan T. Looney 
27328d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
27338d689e04SGleb Smirnoff 	if (memguard_cmp_zone(zone)) {
27348d689e04SGleb Smirnoff 		item = memguard_alloc(zone->uz_size, flags);
27358d689e04SGleb Smirnoff 		if (item != NULL) {
27368d689e04SGleb Smirnoff 			if (zone->uz_init != NULL &&
27378d689e04SGleb Smirnoff 			    zone->uz_init(item, zone->uz_size, flags) != 0)
27388d689e04SGleb Smirnoff 				return (NULL);
27398d689e04SGleb Smirnoff 			if (zone->uz_ctor != NULL &&
2740fc03d22bSJeff Roberson 			    zone->uz_ctor(item, zone->uz_size, udata,
2741fc03d22bSJeff Roberson 			    flags) != 0) {
2742ca293436SRyan Libby 				counter_u64_add(zone->uz_fails, 1);
27438d689e04SGleb Smirnoff 			    	zone->uz_fini(item, zone->uz_size);
27448d689e04SGleb Smirnoff 				return (NULL);
27458d689e04SGleb Smirnoff 			}
27468d689e04SGleb Smirnoff 			return (item);
27478d689e04SGleb Smirnoff 		}
27488d689e04SGleb Smirnoff 		/* This is unfortunate but should not be fatal. */
27498d689e04SGleb Smirnoff 	}
27508d689e04SGleb Smirnoff #endif
27515d1ae027SRobert Watson 	/*
27525d1ae027SRobert Watson 	 * If possible, allocate from the per-CPU cache.  There are two
27535d1ae027SRobert Watson 	 * requirements for safe access to the per-CPU cache: (1) the thread
27545d1ae027SRobert Watson 	 * accessing the cache must not be preempted or yield during access,
27555d1ae027SRobert Watson 	 * and (2) the thread must not migrate CPUs without switching which
27565d1ae027SRobert Watson 	 * cache it accesses.  We rely on a critical section to prevent
27575d1ae027SRobert Watson 	 * preemption and migration.  We release the critical section in
27585d1ae027SRobert Watson 	 * order to acquire the zone mutex if we are unable to allocate from
27595d1ae027SRobert Watson 	 * the current cache; when we re-acquire the critical section, we
27605d1ae027SRobert Watson 	 * must detect and handle migration if it has occurred.
27615d1ae027SRobert Watson 	 */
27625d1ae027SRobert Watson 	critical_enter();
2763beb8beefSJeff Roberson 	do {
27645d1ae027SRobert Watson 		cpu = curcpu;
27658355f576SJeff Roberson 		cache = &zone->uz_cpu[cpu];
27668355f576SJeff Roberson 		bucket = cache->uc_allocbucket;
2767beb8beefSJeff Roberson 		if (__predict_true(bucket != NULL && bucket->ub_cnt != 0)) {
2768beb8beefSJeff Roberson 			item = bucket_pop(zone, cache, bucket);
27695d1ae027SRobert Watson 			critical_exit();
2770beb8beefSJeff Roberson 			return (item_ctor(zone, udata, flags, item));
2771b23f72e9SBrian Feldman 		}
2772beb8beefSJeff Roberson 	} while (cache_alloc(zone, cache, udata, flags));
2773beb8beefSJeff Roberson 	critical_exit();
2774beb8beefSJeff Roberson 
2775beb8beefSJeff Roberson 	/*
2776beb8beefSJeff Roberson 	 * We can not get a bucket so try to return a single item.
2777beb8beefSJeff Roberson 	 */
2778beb8beefSJeff Roberson 	if (zone->uz_flags & UMA_ZONE_NUMA)
2779beb8beefSJeff Roberson 		domain = PCPU_GET(domain);
2780beb8beefSJeff Roberson 	else
2781beb8beefSJeff Roberson 		domain = UMA_ANYDOMAIN;
2782beb8beefSJeff Roberson 	return (zone_alloc_item_locked(zone, udata, domain, flags));
2783fc03d22bSJeff Roberson }
2784fc03d22bSJeff Roberson 
27858355f576SJeff Roberson /*
2786beb8beefSJeff Roberson  * Replenish an alloc bucket and possibly restore an old one.  Called in
2787beb8beefSJeff Roberson  * a critical section.  Returns in a critical section.
2788beb8beefSJeff Roberson  *
2789beb8beefSJeff Roberson  * A false return value indicates failure and returns with the zone lock
2790beb8beefSJeff Roberson  * held.  A true return value indicates success and the caller should retry.
2791beb8beefSJeff Roberson  */
2792beb8beefSJeff Roberson static __noinline bool
2793beb8beefSJeff Roberson cache_alloc(uma_zone_t zone, uma_cache_t cache, void *udata, int flags)
2794beb8beefSJeff Roberson {
2795beb8beefSJeff Roberson 	uma_zone_domain_t zdom;
2796beb8beefSJeff Roberson 	uma_bucket_t bucket;
2797beb8beefSJeff Roberson 	int cpu, domain;
2798beb8beefSJeff Roberson 	bool lockfail;
2799beb8beefSJeff Roberson 
2800beb8beefSJeff Roberson 	CRITICAL_ASSERT(curthread);
2801beb8beefSJeff Roberson 
2802beb8beefSJeff Roberson 	/*
2803beb8beefSJeff Roberson 	 * If we have run out of items in our alloc bucket see
2804beb8beefSJeff Roberson 	 * if we can switch with the free bucket.
28058355f576SJeff Roberson 	 */
2806b983089aSJeff Roberson 	bucket = cache->uc_freebucket;
2807beb8beefSJeff Roberson 	if (bucket != NULL && bucket->ub_cnt != 0) {
28088355f576SJeff Roberson 		cache->uc_freebucket = cache->uc_allocbucket;
2809b983089aSJeff Roberson 		cache->uc_allocbucket = bucket;
2810beb8beefSJeff Roberson 		return (true);
28118355f576SJeff Roberson 	}
2812fc03d22bSJeff Roberson 
2813fc03d22bSJeff Roberson 	/*
2814fc03d22bSJeff Roberson 	 * Discard any empty allocation bucket while we hold no locks.
2815fc03d22bSJeff Roberson 	 */
2816fc03d22bSJeff Roberson 	bucket = cache->uc_allocbucket;
2817fc03d22bSJeff Roberson 	cache->uc_allocbucket = NULL;
2818fc03d22bSJeff Roberson 	critical_exit();
2819fc03d22bSJeff Roberson 	if (bucket != NULL)
28206fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
2821fc03d22bSJeff Roberson 
28225d1ae027SRobert Watson 	/*
28235d1ae027SRobert Watson 	 * Attempt to retrieve the item from the per-CPU cache has failed, so
28245d1ae027SRobert Watson 	 * we must go back to the zone.  This requires the zone lock, so we
28255d1ae027SRobert Watson 	 * must drop the critical section, then re-acquire it when we go back
28265d1ae027SRobert Watson 	 * to the cache.  Since the critical section is released, we may be
28275d1ae027SRobert Watson 	 * preempted or migrate.  As such, make sure not to maintain any
28285d1ae027SRobert Watson 	 * thread-local state specific to the cache from prior to releasing
28295d1ae027SRobert Watson 	 * the critical section.
28305d1ae027SRobert Watson 	 */
2831fc03d22bSJeff Roberson 	lockfail = 0;
2832fc03d22bSJeff Roberson 	if (ZONE_TRYLOCK(zone) == 0) {
2833fc03d22bSJeff Roberson 		/* Record contention to size the buckets. */
2834a553d4b8SJeff Roberson 		ZONE_LOCK(zone);
2835fc03d22bSJeff Roberson 		lockfail = 1;
2836fc03d22bSJeff Roberson 	}
2837beb8beefSJeff Roberson 
28385d1ae027SRobert Watson 	critical_enter();
2839beb8beefSJeff Roberson 	/* Short-circuit for zones without buckets and low memory. */
2840*20a4e154SJeff Roberson 	if (zone->uz_bucket_size == 0 || bucketdisable)
2841beb8beefSJeff Roberson 		return (false);
2842beb8beefSJeff Roberson 
28435d1ae027SRobert Watson 	cpu = curcpu;
28445d1ae027SRobert Watson 	cache = &zone->uz_cpu[cpu];
28455d1ae027SRobert Watson 
2846fc03d22bSJeff Roberson 	/* See if we lost the race to fill the cache. */
2847fc03d22bSJeff Roberson 	if (cache->uc_allocbucket != NULL) {
2848fc03d22bSJeff Roberson 		ZONE_UNLOCK(zone);
2849beb8beefSJeff Roberson 		return (true);
2850a553d4b8SJeff Roberson 	}
28518355f576SJeff Roberson 
2852fc03d22bSJeff Roberson 	/*
2853fc03d22bSJeff Roberson 	 * Check the zone's cache of buckets.
2854fc03d22bSJeff Roberson 	 */
2855c1685086SJeff Roberson 	if (zone->uz_flags & UMA_ZONE_NUMA) {
2856c1685086SJeff Roberson 		domain = PCPU_GET(domain);
2857ab3185d1SJeff Roberson 		zdom = &zone->uz_domain[domain];
2858c1685086SJeff Roberson 	} else {
2859c1685086SJeff Roberson 		domain = UMA_ANYDOMAIN;
2860c1685086SJeff Roberson 		zdom = &zone->uz_domain[0];
2861c1685086SJeff Roberson 	}
2862c1685086SJeff Roberson 
286308cfa56eSMark Johnston 	if ((bucket = zone_fetch_bucket(zone, zdom)) != NULL) {
2864beb8beefSJeff Roberson 		ZONE_UNLOCK(zone);
2865cae33c14SJeff Roberson 		KASSERT(bucket->ub_cnt != 0,
2866a553d4b8SJeff Roberson 		    ("uma_zalloc_arg: Returning an empty bucket."));
2867a553d4b8SJeff Roberson 		cache->uc_allocbucket = bucket;
2868beb8beefSJeff Roberson 		return (true);
2869a553d4b8SJeff Roberson 	}
28705d1ae027SRobert Watson 	/* We are no longer associated with this CPU. */
28715d1ae027SRobert Watson 	critical_exit();
2872bbee39c6SJeff Roberson 
2873fc03d22bSJeff Roberson 	/*
2874fc03d22bSJeff Roberson 	 * We bump the uz count when the cache size is insufficient to
2875fc03d22bSJeff Roberson 	 * handle the working set.
2876fc03d22bSJeff Roberson 	 */
2877*20a4e154SJeff Roberson 	if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max)
2878*20a4e154SJeff Roberson 		zone->uz_bucket_size++;
2879bb15d1c7SGleb Smirnoff 
28808355f576SJeff Roberson 	/*
2881beb8beefSJeff Roberson 	 * Fill a bucket and attempt to use it as the alloc bucket.
2882bbee39c6SJeff Roberson 	 */
2883beb8beefSJeff Roberson 	bucket = zone_alloc_bucket(zone, udata, domain, flags);
28841431a748SGleb Smirnoff 	CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
28851431a748SGleb Smirnoff 	    zone->uz_name, zone, bucket);
2886fc03d22bSJeff Roberson 	critical_enter();
2887beb8beefSJeff Roberson 	if (bucket == NULL)
2888beb8beefSJeff Roberson 		return (false);
28890f9b7bf3SMark Johnston 
2890fc03d22bSJeff Roberson 	/*
2891fc03d22bSJeff Roberson 	 * See if we lost the race or were migrated.  Cache the
2892fc03d22bSJeff Roberson 	 * initialized bucket to make this less likely or claim
2893fc03d22bSJeff Roberson 	 * the memory directly.
2894fc03d22bSJeff Roberson 	 */
2895beb8beefSJeff Roberson 	cpu = curcpu;
2896beb8beefSJeff Roberson 	cache = &zone->uz_cpu[cpu];
289781c0d72cSGleb Smirnoff 	if (cache->uc_allocbucket == NULL &&
289881c0d72cSGleb Smirnoff 	    ((zone->uz_flags & UMA_ZONE_NUMA) == 0 ||
289981c0d72cSGleb Smirnoff 	    domain == PCPU_GET(domain))) {
2900ab3185d1SJeff Roberson 		cache->uc_allocbucket = bucket;
29010f9b7bf3SMark Johnston 		zdom->uzd_imax += bucket->ub_cnt;
2902bb15d1c7SGleb Smirnoff 	} else if (zone->uz_bkt_count >= zone->uz_bkt_max) {
290381c0d72cSGleb Smirnoff 		critical_exit();
290481c0d72cSGleb Smirnoff 		ZONE_UNLOCK(zone);
290581c0d72cSGleb Smirnoff 		bucket_drain(zone, bucket);
290681c0d72cSGleb Smirnoff 		bucket_free(zone, bucket, udata);
2907beb8beefSJeff Roberson 		critical_enter();
2908beb8beefSJeff Roberson 		return (true);
290981c0d72cSGleb Smirnoff 	} else
29100f9b7bf3SMark Johnston 		zone_put_bucket(zone, zdom, bucket, false);
2911bbee39c6SJeff Roberson 	ZONE_UNLOCK(zone);
2912beb8beefSJeff Roberson 	return (true);
2913bbee39c6SJeff Roberson }
2914bbee39c6SJeff Roberson 
2915ab3185d1SJeff Roberson void *
2916ab3185d1SJeff Roberson uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags)
2917bbee39c6SJeff Roberson {
2918ab3185d1SJeff Roberson 
2919ab3185d1SJeff Roberson 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
292019fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
2921ab3185d1SJeff Roberson 
2922ab3185d1SJeff Roberson 	/* This is the fast path allocation */
2923ab3185d1SJeff Roberson 	CTR5(KTR_UMA,
2924ab3185d1SJeff Roberson 	    "uma_zalloc_domain thread %x zone %s(%p) domain %d flags %d",
2925ab3185d1SJeff Roberson 	    curthread, zone->uz_name, zone, domain, flags);
2926ab3185d1SJeff Roberson 
2927ab3185d1SJeff Roberson 	if (flags & M_WAITOK) {
2928ab3185d1SJeff Roberson 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2929ab3185d1SJeff Roberson 		    "uma_zalloc_domain: zone \"%s\"", zone->uz_name);
2930ab3185d1SJeff Roberson 	}
2931ab3185d1SJeff Roberson 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
2932ab3185d1SJeff Roberson 	    ("uma_zalloc_domain: called with spinlock or critical section held"));
2933ab3185d1SJeff Roberson 
2934ab3185d1SJeff Roberson 	return (zone_alloc_item(zone, udata, domain, flags));
2935ab3185d1SJeff Roberson }
2936ab3185d1SJeff Roberson 
2937ab3185d1SJeff Roberson /*
2938ab3185d1SJeff Roberson  * Find a slab with some space.  Prefer slabs that are partially used over those
2939ab3185d1SJeff Roberson  * that are totally full.  This helps to reduce fragmentation.
2940ab3185d1SJeff Roberson  *
2941ab3185d1SJeff Roberson  * If 'rr' is 1, search all domains starting from 'domain'.  Otherwise check
2942ab3185d1SJeff Roberson  * only 'domain'.
2943ab3185d1SJeff Roberson  */
2944ab3185d1SJeff Roberson static uma_slab_t
2945194a979eSMark Johnston keg_first_slab(uma_keg_t keg, int domain, bool rr)
2946ab3185d1SJeff Roberson {
2947ab3185d1SJeff Roberson 	uma_domain_t dom;
2948bbee39c6SJeff Roberson 	uma_slab_t slab;
2949ab3185d1SJeff Roberson 	int start;
2950ab3185d1SJeff Roberson 
2951ab3185d1SJeff Roberson 	KASSERT(domain >= 0 && domain < vm_ndomains,
2952ab3185d1SJeff Roberson 	    ("keg_first_slab: domain %d out of range", domain));
2953bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
2954ab3185d1SJeff Roberson 
2955ab3185d1SJeff Roberson 	slab = NULL;
2956ab3185d1SJeff Roberson 	start = domain;
2957ab3185d1SJeff Roberson 	do {
2958ab3185d1SJeff Roberson 		dom = &keg->uk_domain[domain];
2959ab3185d1SJeff Roberson 		if (!LIST_EMPTY(&dom->ud_part_slab))
2960ab3185d1SJeff Roberson 			return (LIST_FIRST(&dom->ud_part_slab));
2961ab3185d1SJeff Roberson 		if (!LIST_EMPTY(&dom->ud_free_slab)) {
2962ab3185d1SJeff Roberson 			slab = LIST_FIRST(&dom->ud_free_slab);
2963ab3185d1SJeff Roberson 			LIST_REMOVE(slab, us_link);
2964ab3185d1SJeff Roberson 			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
2965ab3185d1SJeff Roberson 			return (slab);
2966ab3185d1SJeff Roberson 		}
2967ab3185d1SJeff Roberson 		if (rr)
2968ab3185d1SJeff Roberson 			domain = (domain + 1) % vm_ndomains;
2969ab3185d1SJeff Roberson 	} while (domain != start);
2970ab3185d1SJeff Roberson 
2971ab3185d1SJeff Roberson 	return (NULL);
2972ab3185d1SJeff Roberson }
2973ab3185d1SJeff Roberson 
2974ab3185d1SJeff Roberson static uma_slab_t
2975194a979eSMark Johnston keg_fetch_free_slab(uma_keg_t keg, int domain, bool rr, int flags)
2976ab3185d1SJeff Roberson {
2977194a979eSMark Johnston 	uint32_t reserve;
2978099a0e58SBosko Milekic 
2979bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
2980194a979eSMark Johnston 
2981194a979eSMark Johnston 	reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve;
2982194a979eSMark Johnston 	if (keg->uk_free <= reserve)
2983194a979eSMark Johnston 		return (NULL);
2984194a979eSMark Johnston 	return (keg_first_slab(keg, domain, rr));
2985194a979eSMark Johnston }
2986194a979eSMark Johnston 
2987194a979eSMark Johnston static uma_slab_t
2988194a979eSMark Johnston keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, const int flags)
2989194a979eSMark Johnston {
2990194a979eSMark Johnston 	struct vm_domainset_iter di;
2991194a979eSMark Johnston 	uma_domain_t dom;
2992194a979eSMark Johnston 	uma_slab_t slab;
2993194a979eSMark Johnston 	int aflags, domain;
2994194a979eSMark Johnston 	bool rr;
2995194a979eSMark Johnston 
2996194a979eSMark Johnston restart:
2997bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
2998bbee39c6SJeff Roberson 
2999bbee39c6SJeff Roberson 	/*
3000194a979eSMark Johnston 	 * Use the keg's policy if upper layers haven't already specified a
3001194a979eSMark Johnston 	 * domain (as happens with first-touch zones).
3002194a979eSMark Johnston 	 *
3003194a979eSMark Johnston 	 * To avoid races we run the iterator with the keg lock held, but that
3004194a979eSMark Johnston 	 * means that we cannot allow the vm_domainset layer to sleep.  Thus,
3005194a979eSMark Johnston 	 * clear M_WAITOK and handle low memory conditions locally.
3006bbee39c6SJeff Roberson 	 */
3007ab3185d1SJeff Roberson 	rr = rdomain == UMA_ANYDOMAIN;
3008ab3185d1SJeff Roberson 	if (rr) {
3009194a979eSMark Johnston 		aflags = (flags & ~M_WAITOK) | M_NOWAIT;
3010194a979eSMark Johnston 		vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
3011194a979eSMark Johnston 		    &aflags);
3012194a979eSMark Johnston 	} else {
3013194a979eSMark Johnston 		aflags = flags;
3014194a979eSMark Johnston 		domain = rdomain;
3015194a979eSMark Johnston 	}
3016ab3185d1SJeff Roberson 
3017194a979eSMark Johnston 	for (;;) {
3018194a979eSMark Johnston 		slab = keg_fetch_free_slab(keg, domain, rr, flags);
3019194a979eSMark Johnston 		if (slab != NULL) {
3020e20a199fSJeff Roberson 			MPASS(slab->us_keg == keg);
3021bbee39c6SJeff Roberson 			return (slab);
3022bbee39c6SJeff Roberson 		}
3023bbee39c6SJeff Roberson 
3024bbee39c6SJeff Roberson 		/*
3025bbee39c6SJeff Roberson 		 * M_NOVM means don't ask at all!
3026bbee39c6SJeff Roberson 		 */
3027bbee39c6SJeff Roberson 		if (flags & M_NOVM)
3028bbee39c6SJeff Roberson 			break;
3029bbee39c6SJeff Roberson 
3030bb15d1c7SGleb Smirnoff 		KASSERT(zone->uz_max_items == 0 ||
3031bb15d1c7SGleb Smirnoff 		    zone->uz_items <= zone->uz_max_items,
3032bb15d1c7SGleb Smirnoff 		    ("%s: zone %p overflow", __func__, zone));
3033bb15d1c7SGleb Smirnoff 
303486220393SMark Johnston 		slab = keg_alloc_slab(keg, zone, domain, flags, aflags);
3035bbee39c6SJeff Roberson 		/*
3036bbee39c6SJeff Roberson 		 * If we got a slab here it's safe to mark it partially used
3037bbee39c6SJeff Roberson 		 * and return.  We assume that the caller is going to remove
3038bbee39c6SJeff Roberson 		 * at least one item.
3039bbee39c6SJeff Roberson 		 */
3040bbee39c6SJeff Roberson 		if (slab) {
3041e20a199fSJeff Roberson 			MPASS(slab->us_keg == keg);
3042ab3185d1SJeff Roberson 			dom = &keg->uk_domain[slab->us_domain];
3043ab3185d1SJeff Roberson 			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
3044bbee39c6SJeff Roberson 			return (slab);
3045bbee39c6SJeff Roberson 		}
3046194a979eSMark Johnston 		KEG_LOCK(keg);
3047194a979eSMark Johnston 		if (rr && vm_domainset_iter_policy(&di, &domain) != 0) {
3048194a979eSMark Johnston 			if ((flags & M_WAITOK) != 0) {
3049194a979eSMark Johnston 				KEG_UNLOCK(keg);
3050194a979eSMark Johnston 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
3051194a979eSMark Johnston 				KEG_LOCK(keg);
3052194a979eSMark Johnston 				goto restart;
305330c5525bSAndrew Gallatin 			}
3054194a979eSMark Johnston 			break;
3055194a979eSMark Johnston 		}
3056ab3185d1SJeff Roberson 	}
3057ab3185d1SJeff Roberson 
3058bbee39c6SJeff Roberson 	/*
3059bbee39c6SJeff Roberson 	 * We might not have been able to get a slab but another cpu
3060bbee39c6SJeff Roberson 	 * could have while we were unlocked.  Check again before we
3061bbee39c6SJeff Roberson 	 * fail.
3062bbee39c6SJeff Roberson 	 */
3063194a979eSMark Johnston 	if ((slab = keg_fetch_free_slab(keg, domain, rr, flags)) != NULL) {
3064ab3185d1SJeff Roberson 		MPASS(slab->us_keg == keg);
3065bbee39c6SJeff Roberson 		return (slab);
3066bbee39c6SJeff Roberson 	}
3067ab3185d1SJeff Roberson 	return (NULL);
3068ab3185d1SJeff Roberson }
3069bbee39c6SJeff Roberson 
3070e20a199fSJeff Roberson static uma_slab_t
3071ab3185d1SJeff Roberson zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int domain, int flags)
3072e20a199fSJeff Roberson {
3073e20a199fSJeff Roberson 	uma_slab_t slab;
3074e20a199fSJeff Roberson 
3075af526374SJeff Roberson 	if (keg == NULL) {
3076bb15d1c7SGleb Smirnoff 		keg = zone->uz_keg;
3077af526374SJeff Roberson 		KEG_LOCK(keg);
3078af526374SJeff Roberson 	}
3079e20a199fSJeff Roberson 
3080e20a199fSJeff Roberson 	for (;;) {
3081ab3185d1SJeff Roberson 		slab = keg_fetch_slab(keg, zone, domain, flags);
3082e20a199fSJeff Roberson 		if (slab)
3083e20a199fSJeff Roberson 			return (slab);
3084e20a199fSJeff Roberson 		if (flags & (M_NOWAIT | M_NOVM))
3085e20a199fSJeff Roberson 			break;
3086e20a199fSJeff Roberson 	}
3087af526374SJeff Roberson 	KEG_UNLOCK(keg);
3088e20a199fSJeff Roberson 	return (NULL);
3089e20a199fSJeff Roberson }
3090e20a199fSJeff Roberson 
3091d56368d7SBosko Milekic static void *
30920095a784SJeff Roberson slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
3093bbee39c6SJeff Roberson {
3094ab3185d1SJeff Roberson 	uma_domain_t dom;
3095bbee39c6SJeff Roberson 	void *item;
309685dcf349SGleb Smirnoff 	uint8_t freei;
3097bbee39c6SJeff Roberson 
30980095a784SJeff Roberson 	MPASS(keg == slab->us_keg);
3099bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
3100099a0e58SBosko Milekic 
3101ef72505eSJeff Roberson 	freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
3102ef72505eSJeff Roberson 	BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
3103099a0e58SBosko Milekic 	item = slab->us_data + (keg->uk_rsize * freei);
3104bbee39c6SJeff Roberson 	slab->us_freecount--;
3105099a0e58SBosko Milekic 	keg->uk_free--;
3106ef72505eSJeff Roberson 
3107bbee39c6SJeff Roberson 	/* Move this slab to the full list */
3108bbee39c6SJeff Roberson 	if (slab->us_freecount == 0) {
3109bbee39c6SJeff Roberson 		LIST_REMOVE(slab, us_link);
3110ab3185d1SJeff Roberson 		dom = &keg->uk_domain[slab->us_domain];
3111ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link);
3112bbee39c6SJeff Roberson 	}
3113bbee39c6SJeff Roberson 
3114bbee39c6SJeff Roberson 	return (item);
3115bbee39c6SJeff Roberson }
3116bbee39c6SJeff Roberson 
3117bbee39c6SJeff Roberson static int
3118ab3185d1SJeff Roberson zone_import(uma_zone_t zone, void **bucket, int max, int domain, int flags)
31190095a784SJeff Roberson {
31200095a784SJeff Roberson 	uma_slab_t slab;
31210095a784SJeff Roberson 	uma_keg_t keg;
3122a03af342SSean Bruno #ifdef NUMA
3123ab3185d1SJeff Roberson 	int stripe;
3124a03af342SSean Bruno #endif
31250095a784SJeff Roberson 	int i;
31260095a784SJeff Roberson 
31270095a784SJeff Roberson 	slab = NULL;
31280095a784SJeff Roberson 	keg = NULL;
3129af526374SJeff Roberson 	/* Try to keep the buckets totally full */
31300095a784SJeff Roberson 	for (i = 0; i < max; ) {
3131ad66f958SGleb Smirnoff 		if ((slab = zone_fetch_slab(zone, keg, domain, flags)) == NULL)
31320095a784SJeff Roberson 			break;
31330095a784SJeff Roberson 		keg = slab->us_keg;
3134a03af342SSean Bruno #ifdef NUMA
3135ab3185d1SJeff Roberson 		stripe = howmany(max, vm_ndomains);
3136a03af342SSean Bruno #endif
31376fd34d6fSJeff Roberson 		while (slab->us_freecount && i < max) {
31380095a784SJeff Roberson 			bucket[i++] = slab_alloc_item(keg, slab);
31396fd34d6fSJeff Roberson 			if (keg->uk_free <= keg->uk_reserve)
31406fd34d6fSJeff Roberson 				break;
3141b6715dabSJeff Roberson #ifdef NUMA
3142ab3185d1SJeff Roberson 			/*
3143ab3185d1SJeff Roberson 			 * If the zone is striped we pick a new slab for every
3144ab3185d1SJeff Roberson 			 * N allocations.  Eliminating this conditional will
3145ab3185d1SJeff Roberson 			 * instead pick a new domain for each bucket rather
3146ab3185d1SJeff Roberson 			 * than stripe within each bucket.  The current option
3147ab3185d1SJeff Roberson 			 * produces more fragmentation and requires more cpu
3148ab3185d1SJeff Roberson 			 * time but yields better distribution.
3149ab3185d1SJeff Roberson 			 */
3150ab3185d1SJeff Roberson 			if ((zone->uz_flags & UMA_ZONE_NUMA) == 0 &&
3151ab3185d1SJeff Roberson 			    vm_ndomains > 1 && --stripe == 0)
3152ab3185d1SJeff Roberson 				break;
3153ab3185d1SJeff Roberson #endif
31546fd34d6fSJeff Roberson 		}
3155ab3185d1SJeff Roberson 		/* Don't block if we allocated any successfully. */
31560095a784SJeff Roberson 		flags &= ~M_WAITOK;
31570095a784SJeff Roberson 		flags |= M_NOWAIT;
31580095a784SJeff Roberson 	}
31590095a784SJeff Roberson 	if (slab != NULL)
31600095a784SJeff Roberson 		KEG_UNLOCK(keg);
31610095a784SJeff Roberson 
31620095a784SJeff Roberson 	return i;
31630095a784SJeff Roberson }
31640095a784SJeff Roberson 
3165fc03d22bSJeff Roberson static uma_bucket_t
3166beb8beefSJeff Roberson zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
3167bbee39c6SJeff Roberson {
3168bbee39c6SJeff Roberson 	uma_bucket_t bucket;
3169beb8beefSJeff Roberson 	int maxbucket, cnt;
3170bbee39c6SJeff Roberson 
317130c5525bSAndrew Gallatin 	CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain);
317230c5525bSAndrew Gallatin 
3173c1685086SJeff Roberson 	/* Avoid allocs targeting empty domains. */
3174c1685086SJeff Roberson 	if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
3175c1685086SJeff Roberson 		domain = UMA_ANYDOMAIN;
3176c1685086SJeff Roberson 
3177beb8beefSJeff Roberson 	if (zone->uz_max_items > 0) {
3178beb8beefSJeff Roberson 		if (zone->uz_items >= zone->uz_max_items)
3179beb8beefSJeff Roberson 			return (false);
3180*20a4e154SJeff Roberson 		maxbucket = MIN(zone->uz_bucket_size,
3181beb8beefSJeff Roberson 		    zone->uz_max_items - zone->uz_items);
3182beb8beefSJeff Roberson 		zone->uz_items += maxbucket;
3183beb8beefSJeff Roberson 	} else
3184*20a4e154SJeff Roberson 		maxbucket = zone->uz_bucket_size;
3185beb8beefSJeff Roberson 	ZONE_UNLOCK(zone);
3186beb8beefSJeff Roberson 
31876fd34d6fSJeff Roberson 	/* Don't wait for buckets, preserve caller's NOVM setting. */
31886fd34d6fSJeff Roberson 	bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
3189beb8beefSJeff Roberson 	if (bucket == NULL) {
3190beb8beefSJeff Roberson 		cnt = 0;
3191beb8beefSJeff Roberson 		goto out;
3192beb8beefSJeff Roberson 	}
31930095a784SJeff Roberson 
31940095a784SJeff Roberson 	bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
3195beb8beefSJeff Roberson 	    MIN(maxbucket, bucket->ub_entries), domain, flags);
31960095a784SJeff Roberson 
31970095a784SJeff Roberson 	/*
31980095a784SJeff Roberson 	 * Initialize the memory if necessary.
31990095a784SJeff Roberson 	 */
32000095a784SJeff Roberson 	if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
3201099a0e58SBosko Milekic 		int i;
3202bbee39c6SJeff Roberson 
32030095a784SJeff Roberson 		for (i = 0; i < bucket->ub_cnt; i++)
3204e20a199fSJeff Roberson 			if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
32050095a784SJeff Roberson 			    flags) != 0)
3206b23f72e9SBrian Feldman 				break;
3207b23f72e9SBrian Feldman 		/*
3208b23f72e9SBrian Feldman 		 * If we couldn't initialize the whole bucket, put the
3209b23f72e9SBrian Feldman 		 * rest back onto the freelist.
3210b23f72e9SBrian Feldman 		 */
3211b23f72e9SBrian Feldman 		if (i != bucket->ub_cnt) {
3212af526374SJeff Roberson 			zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
32130095a784SJeff Roberson 			    bucket->ub_cnt - i);
3214a5a262c6SBosko Milekic #ifdef INVARIANTS
32150095a784SJeff Roberson 			bzero(&bucket->ub_bucket[i],
32160095a784SJeff Roberson 			    sizeof(void *) * (bucket->ub_cnt - i));
3217a5a262c6SBosko Milekic #endif
3218b23f72e9SBrian Feldman 			bucket->ub_cnt = i;
3219b23f72e9SBrian Feldman 		}
3220099a0e58SBosko Milekic 	}
3221099a0e58SBosko Milekic 
3222beb8beefSJeff Roberson 	cnt = bucket->ub_cnt;
3223f7104ccdSAlexander Motin 	if (bucket->ub_cnt == 0) {
32246fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
32252efcc8cbSGleb Smirnoff 		counter_u64_add(zone->uz_fails, 1);
3226beb8beefSJeff Roberson 		bucket = NULL;
3227beb8beefSJeff Roberson 	}
3228beb8beefSJeff Roberson out:
3229beb8beefSJeff Roberson 	ZONE_LOCK(zone);
3230beb8beefSJeff Roberson 	if (zone->uz_max_items > 0 && cnt < maxbucket) {
3231beb8beefSJeff Roberson 		MPASS(zone->uz_items >= maxbucket - cnt);
3232beb8beefSJeff Roberson 		zone->uz_items -= maxbucket - cnt;
3233beb8beefSJeff Roberson 		if (zone->uz_sleepers > 0 &&
3234beb8beefSJeff Roberson 		    (cnt == 0 ? zone->uz_items + 1 : zone->uz_items) <
3235beb8beefSJeff Roberson 		    zone->uz_max_items)
3236beb8beefSJeff Roberson 			wakeup_one(zone);
3237bbee39c6SJeff Roberson 	}
3238fc03d22bSJeff Roberson 
3239fc03d22bSJeff Roberson 	return (bucket);
3240fc03d22bSJeff Roberson }
3241fc03d22bSJeff Roberson 
32428355f576SJeff Roberson /*
32430095a784SJeff Roberson  * Allocates a single item from a zone.
32448355f576SJeff Roberson  *
32458355f576SJeff Roberson  * Arguments
32468355f576SJeff Roberson  *	zone   The zone to alloc for.
32478355f576SJeff Roberson  *	udata  The data to be passed to the constructor.
3248ab3185d1SJeff Roberson  *	domain The domain to allocate from or UMA_ANYDOMAIN.
3249a163d034SWarner Losh  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
32508355f576SJeff Roberson  *
32518355f576SJeff Roberson  * Returns
32528355f576SJeff Roberson  *	NULL if there is no memory and M_NOWAIT is set
3253bbee39c6SJeff Roberson  *	An item if successful
32548355f576SJeff Roberson  */
32558355f576SJeff Roberson 
32568355f576SJeff Roberson static void *
3257ab3185d1SJeff Roberson zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
32588355f576SJeff Roberson {
3259bb15d1c7SGleb Smirnoff 
3260bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3261bb15d1c7SGleb Smirnoff 	return (zone_alloc_item_locked(zone, udata, domain, flags));
3262bb15d1c7SGleb Smirnoff }
3263bb15d1c7SGleb Smirnoff 
3264bb15d1c7SGleb Smirnoff /*
3265bb15d1c7SGleb Smirnoff  * Returns with zone unlocked.
3266bb15d1c7SGleb Smirnoff  */
3267bb15d1c7SGleb Smirnoff static void *
3268bb15d1c7SGleb Smirnoff zone_alloc_item_locked(uma_zone_t zone, void *udata, int domain, int flags)
3269bb15d1c7SGleb Smirnoff {
32708355f576SJeff Roberson 	void *item;
32718355f576SJeff Roberson 
3272bb15d1c7SGleb Smirnoff 	ZONE_LOCK_ASSERT(zone);
3273bb15d1c7SGleb Smirnoff 
3274bb45b411SGleb Smirnoff 	if (zone->uz_max_items > 0) {
3275bb45b411SGleb Smirnoff 		if (zone->uz_items >= zone->uz_max_items) {
3276bb15d1c7SGleb Smirnoff 			zone_log_warning(zone);
3277bb15d1c7SGleb Smirnoff 			zone_maxaction(zone);
3278bb15d1c7SGleb Smirnoff 			if (flags & M_NOWAIT) {
3279bb15d1c7SGleb Smirnoff 				ZONE_UNLOCK(zone);
3280bb15d1c7SGleb Smirnoff 				return (NULL);
3281bb15d1c7SGleb Smirnoff 			}
3282bb15d1c7SGleb Smirnoff 			zone->uz_sleeps++;
3283bb15d1c7SGleb Smirnoff 			zone->uz_sleepers++;
3284bb15d1c7SGleb Smirnoff 			while (zone->uz_items >= zone->uz_max_items)
3285e7e4bcd8SGleb Smirnoff 				mtx_sleep(zone, zone->uz_lockptr, PVM,
3286e7e4bcd8SGleb Smirnoff 				    "zonelimit", 0);
3287bb15d1c7SGleb Smirnoff 			zone->uz_sleepers--;
3288bb15d1c7SGleb Smirnoff 			if (zone->uz_sleepers > 0 &&
3289bb15d1c7SGleb Smirnoff 			    zone->uz_items + 1 < zone->uz_max_items)
3290bb15d1c7SGleb Smirnoff 				wakeup_one(zone);
3291bb15d1c7SGleb Smirnoff 		}
3292bb15d1c7SGleb Smirnoff 		zone->uz_items++;
3293bb45b411SGleb Smirnoff 	}
3294bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
32958355f576SJeff Roberson 
3296c1685086SJeff Roberson 	/* Avoid allocs targeting empty domains. */
3297c1685086SJeff Roberson 	if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
329830c5525bSAndrew Gallatin 		domain = UMA_ANYDOMAIN;
3299c1685086SJeff Roberson 
3300ab3185d1SJeff Roberson 	if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
3301beb8beefSJeff Roberson 		goto fail_cnt;
33028355f576SJeff Roberson 
3303099a0e58SBosko Milekic 	/*
3304099a0e58SBosko Milekic 	 * We have to call both the zone's init (not the keg's init)
3305099a0e58SBosko Milekic 	 * and the zone's ctor.  This is because the item is going from
3306099a0e58SBosko Milekic 	 * a keg slab directly to the user, and the user is expecting it
3307099a0e58SBosko Milekic 	 * to be both zone-init'd as well as zone-ctor'd.
3308099a0e58SBosko Milekic 	 */
3309b23f72e9SBrian Feldman 	if (zone->uz_init != NULL) {
3310e20a199fSJeff Roberson 		if (zone->uz_init(item, zone->uz_size, flags) != 0) {
3311bb15d1c7SGleb Smirnoff 			zone_free_item(zone, item, udata, SKIP_FINI | SKIP_CNT);
3312beb8beefSJeff Roberson 			goto fail_cnt;
3313beb8beefSJeff Roberson 		}
3314beb8beefSJeff Roberson 	}
3315beb8beefSJeff Roberson 	item = item_ctor(zone, udata, flags, item);
3316beb8beefSJeff Roberson 	if (item == NULL)
33170095a784SJeff Roberson 		goto fail;
33188355f576SJeff Roberson 
33192efcc8cbSGleb Smirnoff 	counter_u64_add(zone->uz_allocs, 1);
33201431a748SGleb Smirnoff 	CTR3(KTR_UMA, "zone_alloc_item item %p from %s(%p)", item,
33211431a748SGleb Smirnoff 	    zone->uz_name, zone);
33221431a748SGleb Smirnoff 
33238355f576SJeff Roberson 	return (item);
33240095a784SJeff Roberson 
3325beb8beefSJeff Roberson fail_cnt:
3326beb8beefSJeff Roberson 	counter_u64_add(zone->uz_fails, 1);
33270095a784SJeff Roberson fail:
3328bb45b411SGleb Smirnoff 	if (zone->uz_max_items > 0) {
3329bb15d1c7SGleb Smirnoff 		ZONE_LOCK(zone);
3330beb8beefSJeff Roberson 		/* XXX Decrement without wakeup */
3331bb15d1c7SGleb Smirnoff 		zone->uz_items--;
3332bb15d1c7SGleb Smirnoff 		ZONE_UNLOCK(zone);
3333bb45b411SGleb Smirnoff 	}
33341431a748SGleb Smirnoff 	CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)",
33351431a748SGleb Smirnoff 	    zone->uz_name, zone);
33360095a784SJeff Roberson 	return (NULL);
33378355f576SJeff Roberson }
33388355f576SJeff Roberson 
33398355f576SJeff Roberson /* See uma.h */
33408355f576SJeff Roberson void
33418355f576SJeff Roberson uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
33428355f576SJeff Roberson {
33438355f576SJeff Roberson 	uma_cache_t cache;
33448355f576SJeff Roberson 	uma_bucket_t bucket;
33450a81b439SJeff Roberson 	int cpu, domain, itemdomain;
33468355f576SJeff Roberson 
3347e866d8f0SMark Murray 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
334819fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
334910cb2424SMark Murray 
33503659f747SRobert Watson 	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
33513659f747SRobert Watson 	    zone->uz_name);
33523659f747SRobert Watson 
3353d9e2e68dSMark Johnston 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
33541067a2baSJonathan T. Looney 	    ("uma_zfree_arg: called with spinlock or critical section held"));
33551067a2baSJonathan T. Looney 
335620ed0cb0SMatthew D Fleming         /* uma_zfree(..., NULL) does nothing, to match free(9). */
335720ed0cb0SMatthew D Fleming         if (item == NULL)
335820ed0cb0SMatthew D Fleming                 return;
33598d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
33608d689e04SGleb Smirnoff 	if (is_memguard_addr(item)) {
3361bc9d08e1SMark Johnston 		if (zone->uz_dtor != NULL)
33628d689e04SGleb Smirnoff 			zone->uz_dtor(item, zone->uz_size, udata);
3363bc9d08e1SMark Johnston 		if (zone->uz_fini != NULL)
33648d689e04SGleb Smirnoff 			zone->uz_fini(item, zone->uz_size);
33658d689e04SGleb Smirnoff 		memguard_free(item);
33668d689e04SGleb Smirnoff 		return;
33678d689e04SGleb Smirnoff 	}
33688d689e04SGleb Smirnoff #endif
3369ca293436SRyan Libby 	item_dtor(zone, item, udata, SKIP_NONE);
3370ef72505eSJeff Roberson 
3371af7f9b97SJeff Roberson 	/*
3372af7f9b97SJeff Roberson 	 * The race here is acceptable.  If we miss it we'll just have to wait
3373af7f9b97SJeff Roberson 	 * a little longer for the limits to be reset.
3374af7f9b97SJeff Roberson 	 */
3375bb15d1c7SGleb Smirnoff 	if (zone->uz_sleepers > 0)
3376fc03d22bSJeff Roberson 		goto zfree_item;
3377af7f9b97SJeff Roberson 
33785d1ae027SRobert Watson 	/*
33795d1ae027SRobert Watson 	 * If possible, free to the per-CPU cache.  There are two
33805d1ae027SRobert Watson 	 * requirements for safe access to the per-CPU cache: (1) the thread
33815d1ae027SRobert Watson 	 * accessing the cache must not be preempted or yield during access,
33825d1ae027SRobert Watson 	 * and (2) the thread must not migrate CPUs without switching which
33835d1ae027SRobert Watson 	 * cache it accesses.  We rely on a critical section to prevent
33845d1ae027SRobert Watson 	 * preemption and migration.  We release the critical section in
33855d1ae027SRobert Watson 	 * order to acquire the zone mutex if we are unable to free to the
33865d1ae027SRobert Watson 	 * current cache; when we re-acquire the critical section, we must
33875d1ae027SRobert Watson 	 * detect and handle migration if it has occurred.
33885d1ae027SRobert Watson 	 */
33890a81b439SJeff Roberson 	domain = itemdomain = 0;
33905d1ae027SRobert Watson 	critical_enter();
33910a81b439SJeff Roberson 	do {
33925d1ae027SRobert Watson 		cpu = curcpu;
33938355f576SJeff Roberson 		cache = &zone->uz_cpu[cpu];
33940a81b439SJeff Roberson 		bucket = cache->uc_allocbucket;
3395c1685086SJeff Roberson #ifdef UMA_XDOMAIN
33960a81b439SJeff Roberson 		if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) {
33970a81b439SJeff Roberson 			itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
33980a81b439SJeff Roberson 			domain = PCPU_GET(domain);
33990a81b439SJeff Roberson 		}
34000a81b439SJeff Roberson 		if ((zone->uz_flags & UMA_ZONE_NUMA) != 0 &&
34010a81b439SJeff Roberson 		    domain != itemdomain) {
34020a81b439SJeff Roberson 			bucket = cache->uc_crossbucket;
34030a81b439SJeff Roberson 		} else
3404c1685086SJeff Roberson #endif
34050a81b439SJeff Roberson 
3406a553d4b8SJeff Roberson 		/*
3407fc03d22bSJeff Roberson 		 * Try to free into the allocbucket first to give LIFO ordering
3408fc03d22bSJeff Roberson 		 * for cache-hot datastructures.  Spill over into the freebucket
3409fc03d22bSJeff Roberson 		 * if necessary.  Alloc will swap them if one runs dry.
3410a553d4b8SJeff Roberson 		 */
3411fc03d22bSJeff Roberson 		if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
3412fc03d22bSJeff Roberson 			bucket = cache->uc_freebucket;
34130a81b439SJeff Roberson 		if (__predict_true(bucket != NULL &&
34140a81b439SJeff Roberson 		    bucket->ub_cnt < bucket->ub_entries)) {
34150a81b439SJeff Roberson 			bucket_push(zone, cache, bucket, item);
34165d1ae027SRobert Watson 			critical_exit();
34178355f576SJeff Roberson 			return;
3418fc03d22bSJeff Roberson 		}
34190a81b439SJeff Roberson 	} while (cache_free(zone, cache, udata, item, itemdomain));
34200a81b439SJeff Roberson 	critical_exit();
3421fc03d22bSJeff Roberson 
34228355f576SJeff Roberson 	/*
34230a81b439SJeff Roberson 	 * If nothing else caught this, we'll just do an internal free.
34248355f576SJeff Roberson 	 */
34250a81b439SJeff Roberson zfree_item:
34260a81b439SJeff Roberson 	zone_free_item(zone, item, udata, SKIP_DTOR);
34270a81b439SJeff Roberson }
3428fc03d22bSJeff Roberson 
34290a81b439SJeff Roberson static void
34300a81b439SJeff Roberson zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
34310a81b439SJeff Roberson     int domain, int itemdomain)
34320a81b439SJeff Roberson {
34330a81b439SJeff Roberson 	uma_zone_domain_t zdom;
34340a81b439SJeff Roberson 
34350a81b439SJeff Roberson #ifdef UMA_XDOMAIN
34360a81b439SJeff Roberson 	/*
34370a81b439SJeff Roberson 	 * Buckets coming from the wrong domain will be entirely for the
34380a81b439SJeff Roberson 	 * only other domain on two domain systems.  In this case we can
34390a81b439SJeff Roberson 	 * simply cache them.  Otherwise we need to sort them back to
34400a81b439SJeff Roberson 	 * correct domains by freeing the contents to the slab layer.
34410a81b439SJeff Roberson 	 */
34420a81b439SJeff Roberson 	if (domain != itemdomain && vm_ndomains > 2) {
34430a81b439SJeff Roberson 		CTR3(KTR_UMA,
34440a81b439SJeff Roberson 		    "uma_zfree: zone %s(%p) draining cross bucket %p",
34450a81b439SJeff Roberson 		    zone->uz_name, zone, bucket);
34460a81b439SJeff Roberson 		bucket_drain(zone, bucket);
34470a81b439SJeff Roberson 		bucket_free(zone, bucket, udata);
34480a81b439SJeff Roberson 		return;
34490a81b439SJeff Roberson 	}
34500a81b439SJeff Roberson #endif
34510a81b439SJeff Roberson 	/*
34520a81b439SJeff Roberson 	 * Attempt to save the bucket in the zone's domain bucket cache.
34530a81b439SJeff Roberson 	 *
34540a81b439SJeff Roberson 	 * We bump the uz count when the cache size is insufficient to
34550a81b439SJeff Roberson 	 * handle the working set.
34560a81b439SJeff Roberson 	 */
34574d104ba0SAlexander Motin 	if (ZONE_TRYLOCK(zone) == 0) {
34584d104ba0SAlexander Motin 		/* Record contention to size the buckets. */
34598355f576SJeff Roberson 		ZONE_LOCK(zone);
3460*20a4e154SJeff Roberson 		if (zone->uz_bucket_size < zone->uz_bucket_size_max)
3461*20a4e154SJeff Roberson 			zone->uz_bucket_size++;
34624d104ba0SAlexander Motin 	}
34638355f576SJeff Roberson 
34640a81b439SJeff Roberson 	CTR3(KTR_UMA,
34650a81b439SJeff Roberson 	    "uma_zfree: zone %s(%p) putting bucket %p on free list",
34660a81b439SJeff Roberson 	    zone->uz_name, zone, bucket);
34670a81b439SJeff Roberson 	/* ub_cnt is pointing to the last free item */
34680a81b439SJeff Roberson 	KASSERT(bucket->ub_cnt == bucket->ub_entries,
34690a81b439SJeff Roberson 	    ("uma_zfree: Attempting to insert partial  bucket onto the full list.\n"));
34700a81b439SJeff Roberson 	if (zone->uz_bkt_count >= zone->uz_bkt_max) {
3471c1685086SJeff Roberson 		ZONE_UNLOCK(zone);
3472c1685086SJeff Roberson 		bucket_drain(zone, bucket);
3473c1685086SJeff Roberson 		bucket_free(zone, bucket, udata);
3474c1685086SJeff Roberson 	} else {
3475c1685086SJeff Roberson 		zdom = &zone->uz_domain[itemdomain];
3476c1685086SJeff Roberson 		zone_put_bucket(zone, zdom, bucket, true);
3477c1685086SJeff Roberson 		ZONE_UNLOCK(zone);
3478c1685086SJeff Roberson 	}
34798355f576SJeff Roberson }
3480fc03d22bSJeff Roberson 
34814d104ba0SAlexander Motin /*
34820a81b439SJeff Roberson  * Populate a free or cross bucket for the current cpu cache.  Free any
34830a81b439SJeff Roberson  * existing full bucket either to the zone cache or back to the slab layer.
34840a81b439SJeff Roberson  *
34850a81b439SJeff Roberson  * Enters and returns in a critical section.  false return indicates that
34860a81b439SJeff Roberson  * we can not satisfy this free in the cache layer.  true indicates that
34870a81b439SJeff Roberson  * the caller should retry.
34884d104ba0SAlexander Motin  */
34890a81b439SJeff Roberson static __noinline bool
34900a81b439SJeff Roberson cache_free(uma_zone_t zone, uma_cache_t cache, void *udata, void *item,
34910a81b439SJeff Roberson     int itemdomain)
34920a81b439SJeff Roberson {
34930a81b439SJeff Roberson 	uma_bucket_t bucket;
34940a81b439SJeff Roberson 	int cpu, domain;
34950a81b439SJeff Roberson 
34960a81b439SJeff Roberson 	CRITICAL_ASSERT(curthread);
34970a81b439SJeff Roberson 
3498*20a4e154SJeff Roberson 	if (zone->uz_bucket_size == 0 || bucketdisable)
34990a81b439SJeff Roberson 		return false;
35000a81b439SJeff Roberson 
35010a81b439SJeff Roberson 	cpu = curcpu;
35020a81b439SJeff Roberson 	cache = &zone->uz_cpu[cpu];
35030a81b439SJeff Roberson 
35040a81b439SJeff Roberson 	/*
35050a81b439SJeff Roberson 	 * NUMA domains need to free to the correct zdom.  When XDOMAIN
35060a81b439SJeff Roberson 	 * is enabled this is the zdom of the item and the bucket may be
35070a81b439SJeff Roberson 	 * the cross bucket if they do not match.
35080a81b439SJeff Roberson 	 */
35090a81b439SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
35100a81b439SJeff Roberson #ifdef UMA_XDOMAIN
35110a81b439SJeff Roberson 		domain = PCPU_GET(domain);
35120a81b439SJeff Roberson #else
35130a81b439SJeff Roberson 		itemdomain = domain = PCPU_GET(domain);
35140a81b439SJeff Roberson #endif
35150a81b439SJeff Roberson 	else
35160a81b439SJeff Roberson 		itemdomain = domain = 0;
35170a81b439SJeff Roberson #ifdef UMA_XDOMAIN
35180a81b439SJeff Roberson 	if (domain != itemdomain) {
35190a81b439SJeff Roberson 		bucket = cache->uc_crossbucket;
35200a81b439SJeff Roberson 		cache->uc_crossbucket = NULL;
35210a81b439SJeff Roberson 		if (bucket != NULL)
35220a81b439SJeff Roberson 			atomic_add_64(&zone->uz_xdomain, bucket->ub_cnt);
35230a81b439SJeff Roberson 	} else
35240a81b439SJeff Roberson #endif
35250a81b439SJeff Roberson 	{
35260a81b439SJeff Roberson 		bucket = cache->uc_freebucket;
35270a81b439SJeff Roberson 		cache->uc_freebucket = NULL;
35280a81b439SJeff Roberson 	}
35290a81b439SJeff Roberson 
35300a81b439SJeff Roberson 
35310a81b439SJeff Roberson 	/* We are no longer associated with this CPU. */
35320a81b439SJeff Roberson 	critical_exit();
35330a81b439SJeff Roberson 
35340a81b439SJeff Roberson 	if (bucket != NULL)
35350a81b439SJeff Roberson 		zone_free_bucket(zone, bucket, udata, domain, itemdomain);
3536a553d4b8SJeff Roberson 
35376fd34d6fSJeff Roberson 	bucket = bucket_alloc(zone, udata, M_NOWAIT);
35381431a748SGleb Smirnoff 	CTR3(KTR_UMA, "uma_zfree: zone %s(%p) allocated bucket %p",
35391431a748SGleb Smirnoff 	    zone->uz_name, zone, bucket);
3540fc03d22bSJeff Roberson 	critical_enter();
35410a81b439SJeff Roberson 	if (bucket == NULL)
35420a81b439SJeff Roberson 		return (false);
3543fc03d22bSJeff Roberson 	cpu = curcpu;
3544fc03d22bSJeff Roberson 	cache = &zone->uz_cpu[cpu];
35450a81b439SJeff Roberson #ifdef UMA_XDOMAIN
3546fc03d22bSJeff Roberson 	/*
35470a81b439SJeff Roberson 	 * Check to see if we should be populating the cross bucket.  If it
35480a81b439SJeff Roberson 	 * is already populated we will fall through and attempt to populate
35490a81b439SJeff Roberson 	 * the free bucket.
3550fc03d22bSJeff Roberson 	 */
35510a81b439SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) {
35520a81b439SJeff Roberson 		domain = PCPU_GET(domain);
35530a81b439SJeff Roberson 		if (domain != itemdomain && cache->uc_crossbucket == NULL) {
35540a81b439SJeff Roberson 			cache->uc_crossbucket = bucket;
35550a81b439SJeff Roberson 			return (true);
35560a81b439SJeff Roberson 		}
35570a81b439SJeff Roberson 	}
35580a81b439SJeff Roberson #endif
35590a81b439SJeff Roberson 	/*
35600a81b439SJeff Roberson 	 * We may have lost the race to fill the bucket or switched CPUs.
35610a81b439SJeff Roberson 	 */
35620a81b439SJeff Roberson 	if (cache->uc_freebucket != NULL) {
3563fc03d22bSJeff Roberson 		critical_exit();
35646fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
35650a81b439SJeff Roberson 		critical_enter();
35660a81b439SJeff Roberson 	} else
35670a81b439SJeff Roberson 		cache->uc_freebucket = bucket;
35688355f576SJeff Roberson 
35690a81b439SJeff Roberson 	return (true);
35708355f576SJeff Roberson }
35718355f576SJeff Roberson 
3572ab3185d1SJeff Roberson void
3573ab3185d1SJeff Roberson uma_zfree_domain(uma_zone_t zone, void *item, void *udata)
3574ab3185d1SJeff Roberson {
3575ab3185d1SJeff Roberson 
3576ab3185d1SJeff Roberson 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
357719fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
3578ab3185d1SJeff Roberson 
3579ab3185d1SJeff Roberson 	CTR2(KTR_UMA, "uma_zfree_domain thread %x zone %s", curthread,
3580ab3185d1SJeff Roberson 	    zone->uz_name);
3581ab3185d1SJeff Roberson 
3582ab3185d1SJeff Roberson 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
3583ab3185d1SJeff Roberson 	    ("uma_zfree_domain: called with spinlock or critical section held"));
3584ab3185d1SJeff Roberson 
3585ab3185d1SJeff Roberson         /* uma_zfree(..., NULL) does nothing, to match free(9). */
3586ab3185d1SJeff Roberson         if (item == NULL)
3587ab3185d1SJeff Roberson                 return;
3588ab3185d1SJeff Roberson 	zone_free_item(zone, item, udata, SKIP_NONE);
3589ab3185d1SJeff Roberson }
3590ab3185d1SJeff Roberson 
35918355f576SJeff Roberson static void
3592bb15d1c7SGleb Smirnoff slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item)
35938355f576SJeff Roberson {
3594bb15d1c7SGleb Smirnoff 	uma_keg_t keg;
3595ab3185d1SJeff Roberson 	uma_domain_t dom;
359685dcf349SGleb Smirnoff 	uint8_t freei;
3597099a0e58SBosko Milekic 
3598bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
3599bb15d1c7SGleb Smirnoff 	MPASS(zone->uz_lockptr == &keg->uk_lock);
3600bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
3601e20a199fSJeff Roberson 	MPASS(keg == slab->us_keg);
36028355f576SJeff Roberson 
3603ab3185d1SJeff Roberson 	dom = &keg->uk_domain[slab->us_domain];
3604ab3185d1SJeff Roberson 
36058355f576SJeff Roberson 	/* Do we need to remove from any lists? */
3606099a0e58SBosko Milekic 	if (slab->us_freecount+1 == keg->uk_ipers) {
36078355f576SJeff Roberson 		LIST_REMOVE(slab, us_link);
3608ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
36098355f576SJeff Roberson 	} else if (slab->us_freecount == 0) {
36108355f576SJeff Roberson 		LIST_REMOVE(slab, us_link);
3611ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
36128355f576SJeff Roberson 	}
36138355f576SJeff Roberson 
3614ef72505eSJeff Roberson 	/* Slab management. */
3615ef72505eSJeff Roberson 	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3616ef72505eSJeff Roberson 	BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
36178355f576SJeff Roberson 	slab->us_freecount++;
36188355f576SJeff Roberson 
3619ef72505eSJeff Roberson 	/* Keg statistics. */
3620099a0e58SBosko Milekic 	keg->uk_free++;
36210095a784SJeff Roberson }
36220095a784SJeff Roberson 
36230095a784SJeff Roberson static void
36240095a784SJeff Roberson zone_release(uma_zone_t zone, void **bucket, int cnt)
36250095a784SJeff Roberson {
36260095a784SJeff Roberson 	void *item;
36270095a784SJeff Roberson 	uma_slab_t slab;
36280095a784SJeff Roberson 	uma_keg_t keg;
36290095a784SJeff Roberson 	uint8_t *mem;
36300095a784SJeff Roberson 	int i;
36318355f576SJeff Roberson 
3632bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
3633af526374SJeff Roberson 	KEG_LOCK(keg);
36340095a784SJeff Roberson 	for (i = 0; i < cnt; i++) {
36350095a784SJeff Roberson 		item = bucket[i];
36360095a784SJeff Roberson 		if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
36370095a784SJeff Roberson 			mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
36380095a784SJeff Roberson 			if (zone->uz_flags & UMA_ZONE_HASH) {
36390095a784SJeff Roberson 				slab = hash_sfind(&keg->uk_hash, mem);
36400095a784SJeff Roberson 			} else {
36410095a784SJeff Roberson 				mem += keg->uk_pgoff;
36420095a784SJeff Roberson 				slab = (uma_slab_t)mem;
36430095a784SJeff Roberson 			}
36440095a784SJeff Roberson 		} else {
36450095a784SJeff Roberson 			slab = vtoslab((vm_offset_t)item);
3646bb15d1c7SGleb Smirnoff 			MPASS(slab->us_keg == keg);
36470095a784SJeff Roberson 		}
3648bb15d1c7SGleb Smirnoff 		slab_free_item(zone, slab, item);
36490095a784SJeff Roberson 	}
3650af526374SJeff Roberson 	KEG_UNLOCK(keg);
36518355f576SJeff Roberson }
36528355f576SJeff Roberson 
36530095a784SJeff Roberson /*
36540095a784SJeff Roberson  * Frees a single item to any zone.
36550095a784SJeff Roberson  *
36560095a784SJeff Roberson  * Arguments:
36570095a784SJeff Roberson  *	zone   The zone to free to
36580095a784SJeff Roberson  *	item   The item we're freeing
36590095a784SJeff Roberson  *	udata  User supplied data for the dtor
36600095a784SJeff Roberson  *	skip   Skip dtors and finis
36610095a784SJeff Roberson  */
36620095a784SJeff Roberson static void
36630095a784SJeff Roberson zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
36640095a784SJeff Roberson {
3665c5deaf04SGleb Smirnoff 
3666ca293436SRyan Libby 	item_dtor(zone, item, udata, skip);
36670095a784SJeff Roberson 
36680095a784SJeff Roberson 	if (skip < SKIP_FINI && zone->uz_fini)
36690095a784SJeff Roberson 		zone->uz_fini(item, zone->uz_size);
36700095a784SJeff Roberson 
36710095a784SJeff Roberson 	zone->uz_release(zone->uz_arg, &item, 1);
3672bb15d1c7SGleb Smirnoff 
3673bb15d1c7SGleb Smirnoff 	if (skip & SKIP_CNT)
3674bb15d1c7SGleb Smirnoff 		return;
3675bb15d1c7SGleb Smirnoff 
36762efcc8cbSGleb Smirnoff 	counter_u64_add(zone->uz_frees, 1);
36772efcc8cbSGleb Smirnoff 
3678bb45b411SGleb Smirnoff 	if (zone->uz_max_items > 0) {
3679bb15d1c7SGleb Smirnoff 		ZONE_LOCK(zone);
3680bb15d1c7SGleb Smirnoff 		zone->uz_items--;
3681bb45b411SGleb Smirnoff 		if (zone->uz_sleepers > 0 &&
3682bb45b411SGleb Smirnoff 		    zone->uz_items < zone->uz_max_items)
3683bb15d1c7SGleb Smirnoff 			wakeup_one(zone);
3684bb15d1c7SGleb Smirnoff 		ZONE_UNLOCK(zone);
36850095a784SJeff Roberson 	}
3686bb45b411SGleb Smirnoff }
36870095a784SJeff Roberson 
36888355f576SJeff Roberson /* See uma.h */
36891c6cae97SLawrence Stewart int
3690736ee590SJeff Roberson uma_zone_set_max(uma_zone_t zone, int nitems)
3691736ee590SJeff Roberson {
3692bb15d1c7SGleb Smirnoff 	struct uma_bucket_zone *ubz;
3693003cf08bSMark Johnston 	int count;
3694bb15d1c7SGleb Smirnoff 
3695bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3696003cf08bSMark Johnston 	ubz = bucket_zone_max(zone, nitems);
3697003cf08bSMark Johnston 	count = ubz != NULL ? ubz->ubz_entries : 0;
3698*20a4e154SJeff Roberson 	zone->uz_bucket_size_max = zone->uz_bucket_size = count;
3699*20a4e154SJeff Roberson 	if (zone->uz_bucket_size_min > zone->uz_bucket_size_max)
3700*20a4e154SJeff Roberson 		zone->uz_bucket_size_min = zone->uz_bucket_size_max;
3701bb15d1c7SGleb Smirnoff 	zone->uz_max_items = nitems;
3702bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
3703bb15d1c7SGleb Smirnoff 
3704bb15d1c7SGleb Smirnoff 	return (nitems);
3705bb15d1c7SGleb Smirnoff }
3706bb15d1c7SGleb Smirnoff 
3707bb15d1c7SGleb Smirnoff /* See uma.h */
3708003cf08bSMark Johnston void
3709bb15d1c7SGleb Smirnoff uma_zone_set_maxcache(uma_zone_t zone, int nitems)
3710bb15d1c7SGleb Smirnoff {
3711003cf08bSMark Johnston 	struct uma_bucket_zone *ubz;
3712003cf08bSMark Johnston 	int bpcpu;
3713bb15d1c7SGleb Smirnoff 
3714bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3715003cf08bSMark Johnston 	ubz = bucket_zone_max(zone, nitems);
3716003cf08bSMark Johnston 	if (ubz != NULL) {
3717003cf08bSMark Johnston 		bpcpu = 2;
3718003cf08bSMark Johnston #ifdef UMA_XDOMAIN
3719003cf08bSMark Johnston 		if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
3720003cf08bSMark Johnston 			/* Count the cross-domain bucket. */
3721003cf08bSMark Johnston 			bpcpu++;
3722003cf08bSMark Johnston #endif
3723003cf08bSMark Johnston 		nitems -= ubz->ubz_entries * bpcpu * mp_ncpus;
3724*20a4e154SJeff Roberson 		zone->uz_bucket_size_max = ubz->ubz_entries;
3725003cf08bSMark Johnston 	} else {
3726*20a4e154SJeff Roberson 		zone->uz_bucket_size_max = zone->uz_bucket_size = 0;
3727003cf08bSMark Johnston 	}
3728*20a4e154SJeff Roberson 	if (zone->uz_bucket_size_min > zone->uz_bucket_size_max)
3729*20a4e154SJeff Roberson 		zone->uz_bucket_size_min = zone->uz_bucket_size_max;
3730bb15d1c7SGleb Smirnoff 	zone->uz_bkt_max = nitems;
3731bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
3732736ee590SJeff Roberson }
3733736ee590SJeff Roberson 
3734736ee590SJeff Roberson /* See uma.h */
3735e49471b0SAndre Oppermann int
3736e49471b0SAndre Oppermann uma_zone_get_max(uma_zone_t zone)
3737e49471b0SAndre Oppermann {
3738e49471b0SAndre Oppermann 	int nitems;
3739e49471b0SAndre Oppermann 
3740bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3741bb15d1c7SGleb Smirnoff 	nitems = zone->uz_max_items;
3742bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
3743e49471b0SAndre Oppermann 
3744e49471b0SAndre Oppermann 	return (nitems);
3745e49471b0SAndre Oppermann }
3746e49471b0SAndre Oppermann 
3747e49471b0SAndre Oppermann /* See uma.h */
37482f891cd5SPawel Jakub Dawidek void
37492f891cd5SPawel Jakub Dawidek uma_zone_set_warning(uma_zone_t zone, const char *warning)
37502f891cd5SPawel Jakub Dawidek {
37512f891cd5SPawel Jakub Dawidek 
37522f891cd5SPawel Jakub Dawidek 	ZONE_LOCK(zone);
37532f891cd5SPawel Jakub Dawidek 	zone->uz_warning = warning;
37542f891cd5SPawel Jakub Dawidek 	ZONE_UNLOCK(zone);
37552f891cd5SPawel Jakub Dawidek }
37562f891cd5SPawel Jakub Dawidek 
37572f891cd5SPawel Jakub Dawidek /* See uma.h */
375854503a13SJonathan T. Looney void
375954503a13SJonathan T. Looney uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
376054503a13SJonathan T. Looney {
376154503a13SJonathan T. Looney 
376254503a13SJonathan T. Looney 	ZONE_LOCK(zone);
3763e60b2fcbSGleb Smirnoff 	TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
376454503a13SJonathan T. Looney 	ZONE_UNLOCK(zone);
376554503a13SJonathan T. Looney }
376654503a13SJonathan T. Looney 
376754503a13SJonathan T. Looney /* See uma.h */
3768c4ae7908SLawrence Stewart int
3769c4ae7908SLawrence Stewart uma_zone_get_cur(uma_zone_t zone)
3770c4ae7908SLawrence Stewart {
3771c4ae7908SLawrence Stewart 	int64_t nitems;
3772c4ae7908SLawrence Stewart 	u_int i;
3773c4ae7908SLawrence Stewart 
3774c4ae7908SLawrence Stewart 	ZONE_LOCK(zone);
37752efcc8cbSGleb Smirnoff 	nitems = counter_u64_fetch(zone->uz_allocs) -
37762efcc8cbSGleb Smirnoff 	    counter_u64_fetch(zone->uz_frees);
3777*20a4e154SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0) {
3778c4ae7908SLawrence Stewart 		CPU_FOREACH(i) {
3779c4ae7908SLawrence Stewart 			/*
3780*20a4e154SJeff Roberson 			 * See the comment in uma_vm_zone_stats() regarding
3781*20a4e154SJeff Roberson 			 * the safety of accessing the per-cpu caches. With
3782*20a4e154SJeff Roberson 			 * the zone lock held, it is safe, but can potentially
3783*20a4e154SJeff Roberson 			 * result in stale data.
3784c4ae7908SLawrence Stewart 			 */
3785c4ae7908SLawrence Stewart 			nitems += zone->uz_cpu[i].uc_allocs -
3786c4ae7908SLawrence Stewart 			    zone->uz_cpu[i].uc_frees;
3787c4ae7908SLawrence Stewart 		}
3788*20a4e154SJeff Roberson 	}
3789c4ae7908SLawrence Stewart 	ZONE_UNLOCK(zone);
3790c4ae7908SLawrence Stewart 
3791c4ae7908SLawrence Stewart 	return (nitems < 0 ? 0 : nitems);
3792c4ae7908SLawrence Stewart }
3793c4ae7908SLawrence Stewart 
3794*20a4e154SJeff Roberson static uint64_t
3795*20a4e154SJeff Roberson uma_zone_get_allocs(uma_zone_t zone)
3796*20a4e154SJeff Roberson {
3797*20a4e154SJeff Roberson 	uint64_t nitems;
3798*20a4e154SJeff Roberson 	u_int i;
3799*20a4e154SJeff Roberson 
3800*20a4e154SJeff Roberson 	ZONE_LOCK(zone);
3801*20a4e154SJeff Roberson 	nitems = counter_u64_fetch(zone->uz_allocs);
3802*20a4e154SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0) {
3803*20a4e154SJeff Roberson 		CPU_FOREACH(i) {
3804*20a4e154SJeff Roberson 			/*
3805*20a4e154SJeff Roberson 			 * See the comment in uma_vm_zone_stats() regarding
3806*20a4e154SJeff Roberson 			 * the safety of accessing the per-cpu caches. With
3807*20a4e154SJeff Roberson 			 * the zone lock held, it is safe, but can potentially
3808*20a4e154SJeff Roberson 			 * result in stale data.
3809*20a4e154SJeff Roberson 			 */
3810*20a4e154SJeff Roberson 			nitems += zone->uz_cpu[i].uc_allocs;
3811*20a4e154SJeff Roberson 		}
3812*20a4e154SJeff Roberson 	}
3813*20a4e154SJeff Roberson 	ZONE_UNLOCK(zone);
3814*20a4e154SJeff Roberson 
3815*20a4e154SJeff Roberson 	return (nitems);
3816*20a4e154SJeff Roberson }
3817*20a4e154SJeff Roberson 
3818*20a4e154SJeff Roberson static uint64_t
3819*20a4e154SJeff Roberson uma_zone_get_frees(uma_zone_t zone)
3820*20a4e154SJeff Roberson {
3821*20a4e154SJeff Roberson 	uint64_t nitems;
3822*20a4e154SJeff Roberson 	u_int i;
3823*20a4e154SJeff Roberson 
3824*20a4e154SJeff Roberson 	ZONE_LOCK(zone);
3825*20a4e154SJeff Roberson 	nitems = counter_u64_fetch(zone->uz_frees);
3826*20a4e154SJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0) {
3827*20a4e154SJeff Roberson 		CPU_FOREACH(i) {
3828*20a4e154SJeff Roberson 			/*
3829*20a4e154SJeff Roberson 			 * See the comment in uma_vm_zone_stats() regarding
3830*20a4e154SJeff Roberson 			 * the safety of accessing the per-cpu caches. With
3831*20a4e154SJeff Roberson 			 * the zone lock held, it is safe, but can potentially
3832*20a4e154SJeff Roberson 			 * result in stale data.
3833*20a4e154SJeff Roberson 			 */
3834*20a4e154SJeff Roberson 			nitems += zone->uz_cpu[i].uc_frees;
3835*20a4e154SJeff Roberson 		}
3836*20a4e154SJeff Roberson 	}
3837*20a4e154SJeff Roberson 	ZONE_UNLOCK(zone);
3838*20a4e154SJeff Roberson 
3839*20a4e154SJeff Roberson 	return (nitems);
3840*20a4e154SJeff Roberson }
3841*20a4e154SJeff Roberson 
3842c4ae7908SLawrence Stewart /* See uma.h */
3843736ee590SJeff Roberson void
3844099a0e58SBosko Milekic uma_zone_set_init(uma_zone_t zone, uma_init uminit)
3845099a0e58SBosko Milekic {
3846e20a199fSJeff Roberson 	uma_keg_t keg;
3847e20a199fSJeff Roberson 
3848bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
3849af526374SJeff Roberson 	KEG_LOCK(keg);
3850e20a199fSJeff Roberson 	KASSERT(keg->uk_pages == 0,
3851099a0e58SBosko Milekic 	    ("uma_zone_set_init on non-empty keg"));
3852e20a199fSJeff Roberson 	keg->uk_init = uminit;
3853af526374SJeff Roberson 	KEG_UNLOCK(keg);
3854099a0e58SBosko Milekic }
3855099a0e58SBosko Milekic 
3856099a0e58SBosko Milekic /* See uma.h */
3857099a0e58SBosko Milekic void
3858099a0e58SBosko Milekic uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
3859099a0e58SBosko Milekic {
3860e20a199fSJeff Roberson 	uma_keg_t keg;
3861e20a199fSJeff Roberson 
3862bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
3863af526374SJeff Roberson 	KEG_LOCK(keg);
3864e20a199fSJeff Roberson 	KASSERT(keg->uk_pages == 0,
3865099a0e58SBosko Milekic 	    ("uma_zone_set_fini on non-empty keg"));
3866e20a199fSJeff Roberson 	keg->uk_fini = fini;
3867af526374SJeff Roberson 	KEG_UNLOCK(keg);
3868099a0e58SBosko Milekic }
3869099a0e58SBosko Milekic 
3870099a0e58SBosko Milekic /* See uma.h */
3871099a0e58SBosko Milekic void
3872099a0e58SBosko Milekic uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
3873099a0e58SBosko Milekic {
3874af526374SJeff Roberson 
3875099a0e58SBosko Milekic 	ZONE_LOCK(zone);
3876bb15d1c7SGleb Smirnoff 	KASSERT(zone->uz_keg->uk_pages == 0,
3877099a0e58SBosko Milekic 	    ("uma_zone_set_zinit on non-empty keg"));
3878099a0e58SBosko Milekic 	zone->uz_init = zinit;
3879099a0e58SBosko Milekic 	ZONE_UNLOCK(zone);
3880099a0e58SBosko Milekic }
3881099a0e58SBosko Milekic 
3882099a0e58SBosko Milekic /* See uma.h */
3883099a0e58SBosko Milekic void
3884099a0e58SBosko Milekic uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
3885099a0e58SBosko Milekic {
3886af526374SJeff Roberson 
3887099a0e58SBosko Milekic 	ZONE_LOCK(zone);
3888bb15d1c7SGleb Smirnoff 	KASSERT(zone->uz_keg->uk_pages == 0,
3889099a0e58SBosko Milekic 	    ("uma_zone_set_zfini on non-empty keg"));
3890099a0e58SBosko Milekic 	zone->uz_fini = zfini;
3891099a0e58SBosko Milekic 	ZONE_UNLOCK(zone);
3892099a0e58SBosko Milekic }
3893099a0e58SBosko Milekic 
3894099a0e58SBosko Milekic /* See uma.h */
3895b23f72e9SBrian Feldman /* XXX uk_freef is not actually used with the zone locked */
3896099a0e58SBosko Milekic void
38978355f576SJeff Roberson uma_zone_set_freef(uma_zone_t zone, uma_free freef)
38988355f576SJeff Roberson {
38990095a784SJeff Roberson 	uma_keg_t keg;
3900e20a199fSJeff Roberson 
3901bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
39021d2c0c46SDmitry Chagin 	KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
3903af526374SJeff Roberson 	KEG_LOCK(keg);
39040095a784SJeff Roberson 	keg->uk_freef = freef;
3905af526374SJeff Roberson 	KEG_UNLOCK(keg);
39068355f576SJeff Roberson }
39078355f576SJeff Roberson 
39088355f576SJeff Roberson /* See uma.h */
3909b23f72e9SBrian Feldman /* XXX uk_allocf is not actually used with the zone locked */
39108355f576SJeff Roberson void
39118355f576SJeff Roberson uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
39128355f576SJeff Roberson {
3913e20a199fSJeff Roberson 	uma_keg_t keg;
3914e20a199fSJeff Roberson 
3915bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
3916af526374SJeff Roberson 	KEG_LOCK(keg);
3917e20a199fSJeff Roberson 	keg->uk_allocf = allocf;
3918af526374SJeff Roberson 	KEG_UNLOCK(keg);
39198355f576SJeff Roberson }
39208355f576SJeff Roberson 
39218355f576SJeff Roberson /* See uma.h */
39226fd34d6fSJeff Roberson void
39236fd34d6fSJeff Roberson uma_zone_reserve(uma_zone_t zone, int items)
39246fd34d6fSJeff Roberson {
39256fd34d6fSJeff Roberson 	uma_keg_t keg;
39266fd34d6fSJeff Roberson 
3927bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
39286fd34d6fSJeff Roberson 	KEG_LOCK(keg);
39296fd34d6fSJeff Roberson 	keg->uk_reserve = items;
39306fd34d6fSJeff Roberson 	KEG_UNLOCK(keg);
39316fd34d6fSJeff Roberson }
39326fd34d6fSJeff Roberson 
39336fd34d6fSJeff Roberson /* See uma.h */
39348355f576SJeff Roberson int
3935a4915c21SAttilio Rao uma_zone_reserve_kva(uma_zone_t zone, int count)
39368355f576SJeff Roberson {
3937099a0e58SBosko Milekic 	uma_keg_t keg;
39388355f576SJeff Roberson 	vm_offset_t kva;
39399ba30bcbSZbigniew Bodek 	u_int pages;
39408355f576SJeff Roberson 
3941bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
39428355f576SJeff Roberson 
3943bb15d1c7SGleb Smirnoff 	pages = count / keg->uk_ipers;
3944099a0e58SBosko Milekic 	if (pages * keg->uk_ipers < count)
39458355f576SJeff Roberson 		pages++;
394657223e99SAndriy Gapon 	pages *= keg->uk_ppera;
3947a553d4b8SJeff Roberson 
3948a4915c21SAttilio Rao #ifdef UMA_MD_SMALL_ALLOC
3949a4915c21SAttilio Rao 	if (keg->uk_ppera > 1) {
3950a4915c21SAttilio Rao #else
3951a4915c21SAttilio Rao 	if (1) {
3952a4915c21SAttilio Rao #endif
395357223e99SAndriy Gapon 		kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
3954d1f42ac2SAlan Cox 		if (kva == 0)
39558355f576SJeff Roberson 			return (0);
3956a4915c21SAttilio Rao 	} else
3957a4915c21SAttilio Rao 		kva = 0;
3958bb15d1c7SGleb Smirnoff 
3959bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3960bb15d1c7SGleb Smirnoff 	MPASS(keg->uk_kva == 0);
3961099a0e58SBosko Milekic 	keg->uk_kva = kva;
3962a4915c21SAttilio Rao 	keg->uk_offset = 0;
3963bb15d1c7SGleb Smirnoff 	zone->uz_max_items = pages * keg->uk_ipers;
3964a4915c21SAttilio Rao #ifdef UMA_MD_SMALL_ALLOC
3965a4915c21SAttilio Rao 	keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
3966a4915c21SAttilio Rao #else
3967a4915c21SAttilio Rao 	keg->uk_allocf = noobj_alloc;
3968a4915c21SAttilio Rao #endif
39696fd34d6fSJeff Roberson 	keg->uk_flags |= UMA_ZONE_NOFREE;
3970bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
3971af526374SJeff Roberson 
39728355f576SJeff Roberson 	return (1);
39738355f576SJeff Roberson }
39748355f576SJeff Roberson 
39758355f576SJeff Roberson /* See uma.h */
39768355f576SJeff Roberson void
39778355f576SJeff Roberson uma_prealloc(uma_zone_t zone, int items)
39788355f576SJeff Roberson {
3979920239efSMark Johnston 	struct vm_domainset_iter di;
3980ab3185d1SJeff Roberson 	uma_domain_t dom;
39818355f576SJeff Roberson 	uma_slab_t slab;
3982099a0e58SBosko Milekic 	uma_keg_t keg;
398386220393SMark Johnston 	int aflags, domain, slabs;
39848355f576SJeff Roberson 
3985bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
3986af526374SJeff Roberson 	KEG_LOCK(keg);
3987099a0e58SBosko Milekic 	slabs = items / keg->uk_ipers;
3988099a0e58SBosko Milekic 	if (slabs * keg->uk_ipers < items)
39898355f576SJeff Roberson 		slabs++;
3990194a979eSMark Johnston 	while (slabs-- > 0) {
399186220393SMark Johnston 		aflags = M_NOWAIT;
399286220393SMark Johnston 		vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
399386220393SMark Johnston 		    &aflags);
399486220393SMark Johnston 		for (;;) {
399586220393SMark Johnston 			slab = keg_alloc_slab(keg, zone, domain, M_WAITOK,
399686220393SMark Johnston 			    aflags);
399786220393SMark Johnston 			if (slab != NULL) {
3998e20a199fSJeff Roberson 				MPASS(slab->us_keg == keg);
3999ab3185d1SJeff Roberson 				dom = &keg->uk_domain[slab->us_domain];
400086220393SMark Johnston 				LIST_INSERT_HEAD(&dom->ud_free_slab, slab,
400186220393SMark Johnston 				    us_link);
4002920239efSMark Johnston 				break;
40038355f576SJeff Roberson 			}
400486220393SMark Johnston 			KEG_LOCK(keg);
400586220393SMark Johnston 			if (vm_domainset_iter_policy(&di, &domain) != 0) {
400686220393SMark Johnston 				KEG_UNLOCK(keg);
400786220393SMark Johnston 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
400886220393SMark Johnston 				KEG_LOCK(keg);
400986220393SMark Johnston 			}
401086220393SMark Johnston 		}
401186220393SMark Johnston 	}
4012af526374SJeff Roberson 	KEG_UNLOCK(keg);
40138355f576SJeff Roberson }
40148355f576SJeff Roberson 
40158355f576SJeff Roberson /* See uma.h */
401608cfa56eSMark Johnston void
401708cfa56eSMark Johnston uma_reclaim(int req)
40188355f576SJeff Roberson {
401944ec2b63SKonstantin Belousov 
40201431a748SGleb Smirnoff 	CTR0(KTR_UMA, "UMA: vm asked us to release pages!");
402108cfa56eSMark Johnston 	sx_xlock(&uma_reclaim_lock);
402286bbae32SJeff Roberson 	bucket_enable();
402308cfa56eSMark Johnston 
402408cfa56eSMark Johnston 	switch (req) {
402508cfa56eSMark Johnston 	case UMA_RECLAIM_TRIM:
4026*20a4e154SJeff Roberson 		zone_foreach(zone_trim, NULL);
402708cfa56eSMark Johnston 		break;
402808cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN:
402908cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN_CPU:
4030*20a4e154SJeff Roberson 		zone_foreach(zone_drain, NULL);
403108cfa56eSMark Johnston 		if (req == UMA_RECLAIM_DRAIN_CPU) {
403208cfa56eSMark Johnston 			pcpu_cache_drain_safe(NULL);
4033*20a4e154SJeff Roberson 			zone_foreach(zone_drain, NULL);
4034a2de44abSAlexander Motin 		}
403508cfa56eSMark Johnston 		break;
403608cfa56eSMark Johnston 	default:
403708cfa56eSMark Johnston 		panic("unhandled reclamation request %d", req);
403808cfa56eSMark Johnston 	}
40390f9b7bf3SMark Johnston 
40408355f576SJeff Roberson 	/*
40418355f576SJeff Roberson 	 * Some slabs may have been freed but this zone will be visited early
40428355f576SJeff Roberson 	 * we visit again so that we can free pages that are empty once other
40438355f576SJeff Roberson 	 * zones are drained.  We have to do the same for buckets.
40448355f576SJeff Roberson 	 */
4045*20a4e154SJeff Roberson 	zone_drain(slabzone, NULL);
4046cae33c14SJeff Roberson 	bucket_zone_drain();
404708cfa56eSMark Johnston 	sx_xunlock(&uma_reclaim_lock);
40488355f576SJeff Roberson }
40498355f576SJeff Roberson 
40502e47807cSJeff Roberson static volatile int uma_reclaim_needed;
405144ec2b63SKonstantin Belousov 
405244ec2b63SKonstantin Belousov void
405344ec2b63SKonstantin Belousov uma_reclaim_wakeup(void)
405444ec2b63SKonstantin Belousov {
405544ec2b63SKonstantin Belousov 
40562e47807cSJeff Roberson 	if (atomic_fetchadd_int(&uma_reclaim_needed, 1) == 0)
40572e47807cSJeff Roberson 		wakeup(uma_reclaim);
405844ec2b63SKonstantin Belousov }
405944ec2b63SKonstantin Belousov 
406044ec2b63SKonstantin Belousov void
406144ec2b63SKonstantin Belousov uma_reclaim_worker(void *arg __unused)
406244ec2b63SKonstantin Belousov {
406344ec2b63SKonstantin Belousov 
406444ec2b63SKonstantin Belousov 	for (;;) {
406508cfa56eSMark Johnston 		sx_xlock(&uma_reclaim_lock);
4066200f8117SKonstantin Belousov 		while (atomic_load_int(&uma_reclaim_needed) == 0)
406708cfa56eSMark Johnston 			sx_sleep(uma_reclaim, &uma_reclaim_lock, PVM, "umarcl",
40682e47807cSJeff Roberson 			    hz);
406908cfa56eSMark Johnston 		sx_xunlock(&uma_reclaim_lock);
40709b43bc27SAndriy Gapon 		EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
407108cfa56eSMark Johnston 		uma_reclaim(UMA_RECLAIM_DRAIN_CPU);
4072200f8117SKonstantin Belousov 		atomic_store_int(&uma_reclaim_needed, 0);
40732e47807cSJeff Roberson 		/* Don't fire more than once per-second. */
40742e47807cSJeff Roberson 		pause("umarclslp", hz);
407544ec2b63SKonstantin Belousov 	}
407644ec2b63SKonstantin Belousov }
407744ec2b63SKonstantin Belousov 
4078663b416fSJohn Baldwin /* See uma.h */
407908cfa56eSMark Johnston void
408008cfa56eSMark Johnston uma_zone_reclaim(uma_zone_t zone, int req)
408108cfa56eSMark Johnston {
408208cfa56eSMark Johnston 
408308cfa56eSMark Johnston 	switch (req) {
408408cfa56eSMark Johnston 	case UMA_RECLAIM_TRIM:
4085*20a4e154SJeff Roberson 		zone_trim(zone, NULL);
408608cfa56eSMark Johnston 		break;
408708cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN:
4088*20a4e154SJeff Roberson 		zone_drain(zone, NULL);
408908cfa56eSMark Johnston 		break;
409008cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN_CPU:
409108cfa56eSMark Johnston 		pcpu_cache_drain_safe(zone);
4092*20a4e154SJeff Roberson 		zone_drain(zone, NULL);
409308cfa56eSMark Johnston 		break;
409408cfa56eSMark Johnston 	default:
409508cfa56eSMark Johnston 		panic("unhandled reclamation request %d", req);
409608cfa56eSMark Johnston 	}
409708cfa56eSMark Johnston }
409808cfa56eSMark Johnston 
409908cfa56eSMark Johnston /* See uma.h */
4100663b416fSJohn Baldwin int
4101663b416fSJohn Baldwin uma_zone_exhausted(uma_zone_t zone)
4102663b416fSJohn Baldwin {
4103663b416fSJohn Baldwin 	int full;
4104663b416fSJohn Baldwin 
4105663b416fSJohn Baldwin 	ZONE_LOCK(zone);
4106bb15d1c7SGleb Smirnoff 	full = zone->uz_sleepers > 0;
4107663b416fSJohn Baldwin 	ZONE_UNLOCK(zone);
4108663b416fSJohn Baldwin 	return (full);
4109663b416fSJohn Baldwin }
4110663b416fSJohn Baldwin 
41116c125b8dSMohan Srinivasan int
41126c125b8dSMohan Srinivasan uma_zone_exhausted_nolock(uma_zone_t zone)
41136c125b8dSMohan Srinivasan {
4114bb15d1c7SGleb Smirnoff 	return (zone->uz_sleepers > 0);
41156c125b8dSMohan Srinivasan }
41166c125b8dSMohan Srinivasan 
41178355f576SJeff Roberson void *
4118ab3185d1SJeff Roberson uma_large_malloc_domain(vm_size_t size, int domain, int wait)
41198355f576SJeff Roberson {
41209978bd99SMark Johnston 	struct domainset *policy;
4121ab3185d1SJeff Roberson 	vm_offset_t addr;
41228355f576SJeff Roberson 	uma_slab_t slab;
41238355f576SJeff Roberson 
412430c5525bSAndrew Gallatin 	if (domain != UMA_ANYDOMAIN) {
412530c5525bSAndrew Gallatin 		/* avoid allocs targeting empty domains */
412630c5525bSAndrew Gallatin 		if (VM_DOMAIN_EMPTY(domain))
412730c5525bSAndrew Gallatin 			domain = UMA_ANYDOMAIN;
412830c5525bSAndrew Gallatin 	}
4129ab3185d1SJeff Roberson 	slab = zone_alloc_item(slabzone, NULL, domain, wait);
41308355f576SJeff Roberson 	if (slab == NULL)
41318355f576SJeff Roberson 		return (NULL);
41329978bd99SMark Johnston 	policy = (domain == UMA_ANYDOMAIN) ? DOMAINSET_RR() :
41339978bd99SMark Johnston 	    DOMAINSET_FIXED(domain);
41349978bd99SMark Johnston 	addr = kmem_malloc_domainset(policy, size, wait);
4135ab3185d1SJeff Roberson 	if (addr != 0) {
4136ab3185d1SJeff Roberson 		vsetslab(addr, slab);
4137ab3185d1SJeff Roberson 		slab->us_data = (void *)addr;
4138ab3185d1SJeff Roberson 		slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC;
41398355f576SJeff Roberson 		slab->us_size = size;
4140e2068d0bSJeff Roberson 		slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE(
4141ab3185d1SJeff Roberson 		    pmap_kextract(addr)));
41422e47807cSJeff Roberson 		uma_total_inc(size);
41438355f576SJeff Roberson 	} else {
41440095a784SJeff Roberson 		zone_free_item(slabzone, slab, NULL, SKIP_NONE);
41458355f576SJeff Roberson 	}
41468355f576SJeff Roberson 
4147ab3185d1SJeff Roberson 	return ((void *)addr);
4148ab3185d1SJeff Roberson }
4149ab3185d1SJeff Roberson 
4150ab3185d1SJeff Roberson void *
4151ab3185d1SJeff Roberson uma_large_malloc(vm_size_t size, int wait)
4152ab3185d1SJeff Roberson {
4153ab3185d1SJeff Roberson 
4154ab3185d1SJeff Roberson 	return uma_large_malloc_domain(size, UMA_ANYDOMAIN, wait);
41558355f576SJeff Roberson }
41568355f576SJeff Roberson 
41578355f576SJeff Roberson void
41588355f576SJeff Roberson uma_large_free(uma_slab_t slab)
41598355f576SJeff Roberson {
4160c325e866SKonstantin Belousov 
4161ab3185d1SJeff Roberson 	KASSERT((slab->us_flags & UMA_SLAB_KERNEL) != 0,
4162ab3185d1SJeff Roberson 	    ("uma_large_free:  Memory not allocated with uma_large_malloc."));
416349bfa624SAlan Cox 	kmem_free((vm_offset_t)slab->us_data, slab->us_size);
41642e47807cSJeff Roberson 	uma_total_dec(slab->us_size);
41650095a784SJeff Roberson 	zone_free_item(slabzone, slab, NULL, SKIP_NONE);
41668355f576SJeff Roberson }
41678355f576SJeff Roberson 
416848343a2fSGleb Smirnoff static void
416948343a2fSGleb Smirnoff uma_zero_item(void *item, uma_zone_t zone)
417048343a2fSGleb Smirnoff {
417148343a2fSGleb Smirnoff 
417248343a2fSGleb Smirnoff 	bzero(item, zone->uz_size);
417348343a2fSGleb Smirnoff }
417448343a2fSGleb Smirnoff 
41752e47807cSJeff Roberson unsigned long
41762e47807cSJeff Roberson uma_limit(void)
41772e47807cSJeff Roberson {
41782e47807cSJeff Roberson 
41792e47807cSJeff Roberson 	return (uma_kmem_limit);
41802e47807cSJeff Roberson }
41812e47807cSJeff Roberson 
41822e47807cSJeff Roberson void
41832e47807cSJeff Roberson uma_set_limit(unsigned long limit)
41842e47807cSJeff Roberson {
41852e47807cSJeff Roberson 
41862e47807cSJeff Roberson 	uma_kmem_limit = limit;
41872e47807cSJeff Roberson }
41882e47807cSJeff Roberson 
41892e47807cSJeff Roberson unsigned long
41902e47807cSJeff Roberson uma_size(void)
41912e47807cSJeff Roberson {
41922e47807cSJeff Roberson 
4193058f0f74SMark Johnston 	return (atomic_load_long(&uma_kmem_total));
4194ad5b0f5bSJeff Roberson }
4195ad5b0f5bSJeff Roberson 
4196ad5b0f5bSJeff Roberson long
4197ad5b0f5bSJeff Roberson uma_avail(void)
4198ad5b0f5bSJeff Roberson {
4199ad5b0f5bSJeff Roberson 
4200058f0f74SMark Johnston 	return (uma_kmem_limit - uma_size());
42012e47807cSJeff Roberson }
42022e47807cSJeff Roberson 
4203a0d4b0aeSRobert Watson #ifdef DDB
42048355f576SJeff Roberson /*
42057a52a97eSRobert Watson  * Generate statistics across both the zone and its per-cpu cache's.  Return
42067a52a97eSRobert Watson  * desired statistics if the pointer is non-NULL for that statistic.
42077a52a97eSRobert Watson  *
42087a52a97eSRobert Watson  * Note: does not update the zone statistics, as it can't safely clear the
42097a52a97eSRobert Watson  * per-CPU cache statistic.
42107a52a97eSRobert Watson  *
42117a52a97eSRobert Watson  * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
42127a52a97eSRobert Watson  * safe from off-CPU; we should modify the caches to track this information
42137a52a97eSRobert Watson  * directly so that we don't have to.
42147a52a97eSRobert Watson  */
42157a52a97eSRobert Watson static void
42160f9b7bf3SMark Johnston uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
4217c1685086SJeff Roberson     uint64_t *freesp, uint64_t *sleepsp, uint64_t *xdomainp)
42187a52a97eSRobert Watson {
42197a52a97eSRobert Watson 	uma_cache_t cache;
4220c1685086SJeff Roberson 	uint64_t allocs, frees, sleeps, xdomain;
42217a52a97eSRobert Watson 	int cachefree, cpu;
42227a52a97eSRobert Watson 
4223c1685086SJeff Roberson 	allocs = frees = sleeps = xdomain = 0;
42247a52a97eSRobert Watson 	cachefree = 0;
42253aa6d94eSJohn Baldwin 	CPU_FOREACH(cpu) {
42267a52a97eSRobert Watson 		cache = &z->uz_cpu[cpu];
42277a52a97eSRobert Watson 		if (cache->uc_allocbucket != NULL)
42287a52a97eSRobert Watson 			cachefree += cache->uc_allocbucket->ub_cnt;
42297a52a97eSRobert Watson 		if (cache->uc_freebucket != NULL)
42307a52a97eSRobert Watson 			cachefree += cache->uc_freebucket->ub_cnt;
4231c1685086SJeff Roberson 		if (cache->uc_crossbucket != NULL) {
4232c1685086SJeff Roberson 			xdomain += cache->uc_crossbucket->ub_cnt;
4233c1685086SJeff Roberson 			cachefree += cache->uc_crossbucket->ub_cnt;
4234c1685086SJeff Roberson 		}
42357a52a97eSRobert Watson 		allocs += cache->uc_allocs;
42367a52a97eSRobert Watson 		frees += cache->uc_frees;
42377a52a97eSRobert Watson 	}
42382efcc8cbSGleb Smirnoff 	allocs += counter_u64_fetch(z->uz_allocs);
42392efcc8cbSGleb Smirnoff 	frees += counter_u64_fetch(z->uz_frees);
4240bf965959SSean Bruno 	sleeps += z->uz_sleeps;
4241c1685086SJeff Roberson 	xdomain += z->uz_xdomain;
42427a52a97eSRobert Watson 	if (cachefreep != NULL)
42437a52a97eSRobert Watson 		*cachefreep = cachefree;
42447a52a97eSRobert Watson 	if (allocsp != NULL)
42457a52a97eSRobert Watson 		*allocsp = allocs;
42467a52a97eSRobert Watson 	if (freesp != NULL)
42477a52a97eSRobert Watson 		*freesp = frees;
4248bf965959SSean Bruno 	if (sleepsp != NULL)
4249bf965959SSean Bruno 		*sleepsp = sleeps;
4250c1685086SJeff Roberson 	if (xdomainp != NULL)
4251c1685086SJeff Roberson 		*xdomainp = xdomain;
42527a52a97eSRobert Watson }
4253a0d4b0aeSRobert Watson #endif /* DDB */
42547a52a97eSRobert Watson 
42557a52a97eSRobert Watson static int
42567a52a97eSRobert Watson sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
42577a52a97eSRobert Watson {
42587a52a97eSRobert Watson 	uma_keg_t kz;
42597a52a97eSRobert Watson 	uma_zone_t z;
42607a52a97eSRobert Watson 	int count;
42617a52a97eSRobert Watson 
42627a52a97eSRobert Watson 	count = 0;
4263111fbcd5SBryan Venteicher 	rw_rlock(&uma_rwlock);
42647a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
42657a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
42667a52a97eSRobert Watson 			count++;
42677a52a97eSRobert Watson 	}
4268b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link)
4269b47acb0aSGleb Smirnoff 		count++;
4270b47acb0aSGleb Smirnoff 
4271111fbcd5SBryan Venteicher 	rw_runlock(&uma_rwlock);
42727a52a97eSRobert Watson 	return (sysctl_handle_int(oidp, &count, 0, req));
42737a52a97eSRobert Watson }
42747a52a97eSRobert Watson 
4275b47acb0aSGleb Smirnoff static void
4276b47acb0aSGleb Smirnoff uma_vm_zone_stats(struct uma_type_header *uth, uma_zone_t z, struct sbuf *sbuf,
4277b47acb0aSGleb Smirnoff     struct uma_percpu_stat *ups, bool internal)
4278b47acb0aSGleb Smirnoff {
4279b47acb0aSGleb Smirnoff 	uma_zone_domain_t zdom;
42801de9724eSMark Johnston 	uma_bucket_t bucket;
4281b47acb0aSGleb Smirnoff 	uma_cache_t cache;
4282b47acb0aSGleb Smirnoff 	int i;
4283b47acb0aSGleb Smirnoff 
4284b47acb0aSGleb Smirnoff 
4285b47acb0aSGleb Smirnoff 	for (i = 0; i < vm_ndomains; i++) {
4286b47acb0aSGleb Smirnoff 		zdom = &z->uz_domain[i];
4287b47acb0aSGleb Smirnoff 		uth->uth_zone_free += zdom->uzd_nitems;
4288b47acb0aSGleb Smirnoff 	}
4289b47acb0aSGleb Smirnoff 	uth->uth_allocs = counter_u64_fetch(z->uz_allocs);
4290b47acb0aSGleb Smirnoff 	uth->uth_frees = counter_u64_fetch(z->uz_frees);
4291b47acb0aSGleb Smirnoff 	uth->uth_fails = counter_u64_fetch(z->uz_fails);
4292b47acb0aSGleb Smirnoff 	uth->uth_sleeps = z->uz_sleeps;
4293c1685086SJeff Roberson 	uth->uth_xdomain = z->uz_xdomain;
42941de9724eSMark Johnston 
4295b47acb0aSGleb Smirnoff 	/*
42961de9724eSMark Johnston 	 * While it is not normally safe to access the cache bucket pointers
42971de9724eSMark Johnston 	 * while not on the CPU that owns the cache, we only allow the pointers
42981de9724eSMark Johnston 	 * to be exchanged without the zone lock held, not invalidated, so
42991de9724eSMark Johnston 	 * accept the possible race associated with bucket exchange during
43001de9724eSMark Johnston 	 * monitoring.  Use atomic_load_ptr() to ensure that the bucket pointers
43011de9724eSMark Johnston 	 * are loaded only once.
4302b47acb0aSGleb Smirnoff 	 */
4303b47acb0aSGleb Smirnoff 	for (i = 0; i < mp_maxid + 1; i++) {
4304b47acb0aSGleb Smirnoff 		bzero(&ups[i], sizeof(*ups));
4305b47acb0aSGleb Smirnoff 		if (internal || CPU_ABSENT(i))
4306b47acb0aSGleb Smirnoff 			continue;
4307b47acb0aSGleb Smirnoff 		cache = &z->uz_cpu[i];
43081de9724eSMark Johnston 		bucket = (uma_bucket_t)atomic_load_ptr(&cache->uc_allocbucket);
43091de9724eSMark Johnston 		if (bucket != NULL)
43101de9724eSMark Johnston 			ups[i].ups_cache_free += bucket->ub_cnt;
43111de9724eSMark Johnston 		bucket = (uma_bucket_t)atomic_load_ptr(&cache->uc_freebucket);
43121de9724eSMark Johnston 		if (bucket != NULL)
43131de9724eSMark Johnston 			ups[i].ups_cache_free += bucket->ub_cnt;
43141de9724eSMark Johnston 		bucket = (uma_bucket_t)atomic_load_ptr(&cache->uc_crossbucket);
43151de9724eSMark Johnston 		if (bucket != NULL)
43161de9724eSMark Johnston 			ups[i].ups_cache_free += bucket->ub_cnt;
4317b47acb0aSGleb Smirnoff 		ups[i].ups_allocs = cache->uc_allocs;
4318b47acb0aSGleb Smirnoff 		ups[i].ups_frees = cache->uc_frees;
4319b47acb0aSGleb Smirnoff 	}
4320b47acb0aSGleb Smirnoff }
4321b47acb0aSGleb Smirnoff 
43227a52a97eSRobert Watson static int
43237a52a97eSRobert Watson sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
43247a52a97eSRobert Watson {
43257a52a97eSRobert Watson 	struct uma_stream_header ush;
43267a52a97eSRobert Watson 	struct uma_type_header uth;
432763b5d112SKonstantin Belousov 	struct uma_percpu_stat *ups;
43287a52a97eSRobert Watson 	struct sbuf sbuf;
43297a52a97eSRobert Watson 	uma_keg_t kz;
43307a52a97eSRobert Watson 	uma_zone_t z;
43314e657159SMatthew D Fleming 	int count, error, i;
43327a52a97eSRobert Watson 
433300f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
433400f0e671SMatthew D Fleming 	if (error != 0)
433500f0e671SMatthew D Fleming 		return (error);
43364e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
43371eafc078SIan Lepore 	sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
433863b5d112SKonstantin Belousov 	ups = malloc((mp_maxid + 1) * sizeof(*ups), M_TEMP, M_WAITOK);
43394e657159SMatthew D Fleming 
4340404a593eSMatthew D Fleming 	count = 0;
4341111fbcd5SBryan Venteicher 	rw_rlock(&uma_rwlock);
43427a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
43437a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
43447a52a97eSRobert Watson 			count++;
43457a52a97eSRobert Watson 	}
43467a52a97eSRobert Watson 
4347b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link)
4348b47acb0aSGleb Smirnoff 		count++;
4349b47acb0aSGleb Smirnoff 
43507a52a97eSRobert Watson 	/*
43517a52a97eSRobert Watson 	 * Insert stream header.
43527a52a97eSRobert Watson 	 */
43537a52a97eSRobert Watson 	bzero(&ush, sizeof(ush));
43547a52a97eSRobert Watson 	ush.ush_version = UMA_STREAM_VERSION;
4355ab3a57c0SRobert Watson 	ush.ush_maxcpus = (mp_maxid + 1);
43567a52a97eSRobert Watson 	ush.ush_count = count;
43574e657159SMatthew D Fleming 	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
43587a52a97eSRobert Watson 
43597a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
43607a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
43617a52a97eSRobert Watson 			bzero(&uth, sizeof(uth));
43627a52a97eSRobert Watson 			ZONE_LOCK(z);
4363cbbb4a00SRobert Watson 			strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
43647a52a97eSRobert Watson 			uth.uth_align = kz->uk_align;
43657a52a97eSRobert Watson 			uth.uth_size = kz->uk_size;
43667a52a97eSRobert Watson 			uth.uth_rsize = kz->uk_rsize;
4367bb45b411SGleb Smirnoff 			if (z->uz_max_items > 0)
4368bb45b411SGleb Smirnoff 				uth.uth_pages = (z->uz_items / kz->uk_ipers) *
4369bb15d1c7SGleb Smirnoff 					kz->uk_ppera;
4370bb45b411SGleb Smirnoff 			else
4371bb45b411SGleb Smirnoff 				uth.uth_pages = kz->uk_pages;
4372f8c86a5fSGleb Smirnoff 			uth.uth_maxpages = (z->uz_max_items / kz->uk_ipers) *
4373bb15d1c7SGleb Smirnoff 			    kz->uk_ppera;
4374bb15d1c7SGleb Smirnoff 			uth.uth_limit = z->uz_max_items;
4375f8c86a5fSGleb Smirnoff 			uth.uth_keg_free = z->uz_keg->uk_free;
4376cbbb4a00SRobert Watson 
4377cbbb4a00SRobert Watson 			/*
4378cbbb4a00SRobert Watson 			 * A zone is secondary is it is not the first entry
4379cbbb4a00SRobert Watson 			 * on the keg's zone list.
4380cbbb4a00SRobert Watson 			 */
4381e20a199fSJeff Roberson 			if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
4382cbbb4a00SRobert Watson 			    (LIST_FIRST(&kz->uk_zones) != z))
4383cbbb4a00SRobert Watson 				uth.uth_zone_flags = UTH_ZONE_SECONDARY;
4384b47acb0aSGleb Smirnoff 			uma_vm_zone_stats(&uth, z, &sbuf, ups,
4385b47acb0aSGleb Smirnoff 			    kz->uk_flags & UMA_ZFLAG_INTERNAL);
43862450bbb8SRobert Watson 			ZONE_UNLOCK(z);
438763b5d112SKonstantin Belousov 			(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
438863b5d112SKonstantin Belousov 			for (i = 0; i < mp_maxid + 1; i++)
438963b5d112SKonstantin Belousov 				(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
43907a52a97eSRobert Watson 		}
43917a52a97eSRobert Watson 	}
4392b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
4393b47acb0aSGleb Smirnoff 		bzero(&uth, sizeof(uth));
4394b47acb0aSGleb Smirnoff 		ZONE_LOCK(z);
4395b47acb0aSGleb Smirnoff 		strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
4396b47acb0aSGleb Smirnoff 		uth.uth_size = z->uz_size;
4397b47acb0aSGleb Smirnoff 		uma_vm_zone_stats(&uth, z, &sbuf, ups, false);
4398b47acb0aSGleb Smirnoff 		ZONE_UNLOCK(z);
4399b47acb0aSGleb Smirnoff 		(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
4400b47acb0aSGleb Smirnoff 		for (i = 0; i < mp_maxid + 1; i++)
4401b47acb0aSGleb Smirnoff 			(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
4402b47acb0aSGleb Smirnoff 	}
4403b47acb0aSGleb Smirnoff 
4404111fbcd5SBryan Venteicher 	rw_runlock(&uma_rwlock);
44054e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
44064e657159SMatthew D Fleming 	sbuf_delete(&sbuf);
440763b5d112SKonstantin Belousov 	free(ups, M_TEMP);
44087a52a97eSRobert Watson 	return (error);
44097a52a97eSRobert Watson }
441048c5777eSRobert Watson 
44110a5a3ccbSGleb Smirnoff int
44120a5a3ccbSGleb Smirnoff sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
44130a5a3ccbSGleb Smirnoff {
44140a5a3ccbSGleb Smirnoff 	uma_zone_t zone = *(uma_zone_t *)arg1;
441516be9f54SGleb Smirnoff 	int error, max;
44160a5a3ccbSGleb Smirnoff 
441716be9f54SGleb Smirnoff 	max = uma_zone_get_max(zone);
44180a5a3ccbSGleb Smirnoff 	error = sysctl_handle_int(oidp, &max, 0, req);
44190a5a3ccbSGleb Smirnoff 	if (error || !req->newptr)
44200a5a3ccbSGleb Smirnoff 		return (error);
44210a5a3ccbSGleb Smirnoff 
44220a5a3ccbSGleb Smirnoff 	uma_zone_set_max(zone, max);
44230a5a3ccbSGleb Smirnoff 
44240a5a3ccbSGleb Smirnoff 	return (0);
44250a5a3ccbSGleb Smirnoff }
44260a5a3ccbSGleb Smirnoff 
44270a5a3ccbSGleb Smirnoff int
44280a5a3ccbSGleb Smirnoff sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
44290a5a3ccbSGleb Smirnoff {
4430*20a4e154SJeff Roberson 	uma_zone_t zone;
44310a5a3ccbSGleb Smirnoff 	int cur;
44320a5a3ccbSGleb Smirnoff 
4433*20a4e154SJeff Roberson 	/*
4434*20a4e154SJeff Roberson 	 * Some callers want to add sysctls for global zones that
4435*20a4e154SJeff Roberson 	 * may not yet exist so they pass a pointer to a pointer.
4436*20a4e154SJeff Roberson 	 */
4437*20a4e154SJeff Roberson 	if (arg2 == 0)
4438*20a4e154SJeff Roberson 		zone = *(uma_zone_t *)arg1;
4439*20a4e154SJeff Roberson 	else
4440*20a4e154SJeff Roberson 		zone = arg1;
44410a5a3ccbSGleb Smirnoff 	cur = uma_zone_get_cur(zone);
44420a5a3ccbSGleb Smirnoff 	return (sysctl_handle_int(oidp, &cur, 0, req));
44430a5a3ccbSGleb Smirnoff }
44440a5a3ccbSGleb Smirnoff 
4445*20a4e154SJeff Roberson static int
4446*20a4e154SJeff Roberson sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS)
4447*20a4e154SJeff Roberson {
4448*20a4e154SJeff Roberson 	uma_zone_t zone = arg1;
4449*20a4e154SJeff Roberson 	uint64_t cur;
4450*20a4e154SJeff Roberson 
4451*20a4e154SJeff Roberson 	cur = uma_zone_get_allocs(zone);
4452*20a4e154SJeff Roberson 	return (sysctl_handle_64(oidp, &cur, 0, req));
4453*20a4e154SJeff Roberson }
4454*20a4e154SJeff Roberson 
4455*20a4e154SJeff Roberson static int
4456*20a4e154SJeff Roberson sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS)
4457*20a4e154SJeff Roberson {
4458*20a4e154SJeff Roberson 	uma_zone_t zone = arg1;
4459*20a4e154SJeff Roberson 	uint64_t cur;
4460*20a4e154SJeff Roberson 
4461*20a4e154SJeff Roberson 	cur = uma_zone_get_frees(zone);
4462*20a4e154SJeff Roberson 	return (sysctl_handle_64(oidp, &cur, 0, req));
4463*20a4e154SJeff Roberson }
4464*20a4e154SJeff Roberson 
44659542ea7bSGleb Smirnoff #ifdef INVARIANTS
44669542ea7bSGleb Smirnoff static uma_slab_t
44679542ea7bSGleb Smirnoff uma_dbg_getslab(uma_zone_t zone, void *item)
44689542ea7bSGleb Smirnoff {
44699542ea7bSGleb Smirnoff 	uma_slab_t slab;
44709542ea7bSGleb Smirnoff 	uma_keg_t keg;
44719542ea7bSGleb Smirnoff 	uint8_t *mem;
44729542ea7bSGleb Smirnoff 
44739542ea7bSGleb Smirnoff 	mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
44749542ea7bSGleb Smirnoff 	if (zone->uz_flags & UMA_ZONE_VTOSLAB) {
44759542ea7bSGleb Smirnoff 		slab = vtoslab((vm_offset_t)mem);
44769542ea7bSGleb Smirnoff 	} else {
44779542ea7bSGleb Smirnoff 		/*
44789542ea7bSGleb Smirnoff 		 * It is safe to return the slab here even though the
44799542ea7bSGleb Smirnoff 		 * zone is unlocked because the item's allocation state
44809542ea7bSGleb Smirnoff 		 * essentially holds a reference.
44819542ea7bSGleb Smirnoff 		 */
4482bb15d1c7SGleb Smirnoff 		if (zone->uz_lockptr == &zone->uz_lock)
4483bb15d1c7SGleb Smirnoff 			return (NULL);
44849542ea7bSGleb Smirnoff 		ZONE_LOCK(zone);
4485bb15d1c7SGleb Smirnoff 		keg = zone->uz_keg;
44869542ea7bSGleb Smirnoff 		if (keg->uk_flags & UMA_ZONE_HASH)
44879542ea7bSGleb Smirnoff 			slab = hash_sfind(&keg->uk_hash, mem);
44889542ea7bSGleb Smirnoff 		else
44899542ea7bSGleb Smirnoff 			slab = (uma_slab_t)(mem + keg->uk_pgoff);
44909542ea7bSGleb Smirnoff 		ZONE_UNLOCK(zone);
44919542ea7bSGleb Smirnoff 	}
44929542ea7bSGleb Smirnoff 
44939542ea7bSGleb Smirnoff 	return (slab);
44949542ea7bSGleb Smirnoff }
44959542ea7bSGleb Smirnoff 
4496c5deaf04SGleb Smirnoff static bool
4497c5deaf04SGleb Smirnoff uma_dbg_zskip(uma_zone_t zone, void *mem)
4498c5deaf04SGleb Smirnoff {
4499c5deaf04SGleb Smirnoff 
4500bb15d1c7SGleb Smirnoff 	if (zone->uz_lockptr == &zone->uz_lock)
4501c5deaf04SGleb Smirnoff 		return (true);
4502c5deaf04SGleb Smirnoff 
4503bb15d1c7SGleb Smirnoff 	return (uma_dbg_kskip(zone->uz_keg, mem));
4504c5deaf04SGleb Smirnoff }
4505c5deaf04SGleb Smirnoff 
4506c5deaf04SGleb Smirnoff static bool
4507c5deaf04SGleb Smirnoff uma_dbg_kskip(uma_keg_t keg, void *mem)
4508c5deaf04SGleb Smirnoff {
4509c5deaf04SGleb Smirnoff 	uintptr_t idx;
4510c5deaf04SGleb Smirnoff 
4511c5deaf04SGleb Smirnoff 	if (dbg_divisor == 0)
4512c5deaf04SGleb Smirnoff 		return (true);
4513c5deaf04SGleb Smirnoff 
4514c5deaf04SGleb Smirnoff 	if (dbg_divisor == 1)
4515c5deaf04SGleb Smirnoff 		return (false);
4516c5deaf04SGleb Smirnoff 
4517c5deaf04SGleb Smirnoff 	idx = (uintptr_t)mem >> PAGE_SHIFT;
4518c5deaf04SGleb Smirnoff 	if (keg->uk_ipers > 1) {
4519c5deaf04SGleb Smirnoff 		idx *= keg->uk_ipers;
4520c5deaf04SGleb Smirnoff 		idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize;
4521c5deaf04SGleb Smirnoff 	}
4522c5deaf04SGleb Smirnoff 
4523c5deaf04SGleb Smirnoff 	if ((idx / dbg_divisor) * dbg_divisor != idx) {
4524c5deaf04SGleb Smirnoff 		counter_u64_add(uma_skip_cnt, 1);
4525c5deaf04SGleb Smirnoff 		return (true);
4526c5deaf04SGleb Smirnoff 	}
4527c5deaf04SGleb Smirnoff 	counter_u64_add(uma_dbg_cnt, 1);
4528c5deaf04SGleb Smirnoff 
4529c5deaf04SGleb Smirnoff 	return (false);
4530c5deaf04SGleb Smirnoff }
4531c5deaf04SGleb Smirnoff 
45329542ea7bSGleb Smirnoff /*
45339542ea7bSGleb Smirnoff  * Set up the slab's freei data such that uma_dbg_free can function.
45349542ea7bSGleb Smirnoff  *
45359542ea7bSGleb Smirnoff  */
45369542ea7bSGleb Smirnoff static void
45379542ea7bSGleb Smirnoff uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
45389542ea7bSGleb Smirnoff {
45399542ea7bSGleb Smirnoff 	uma_keg_t keg;
45409542ea7bSGleb Smirnoff 	int freei;
45419542ea7bSGleb Smirnoff 
45429542ea7bSGleb Smirnoff 	if (slab == NULL) {
45439542ea7bSGleb Smirnoff 		slab = uma_dbg_getslab(zone, item);
45449542ea7bSGleb Smirnoff 		if (slab == NULL)
45459542ea7bSGleb Smirnoff 			panic("uma: item %p did not belong to zone %s\n",
45469542ea7bSGleb Smirnoff 			    item, zone->uz_name);
45479542ea7bSGleb Smirnoff 	}
45489542ea7bSGleb Smirnoff 	keg = slab->us_keg;
45499542ea7bSGleb Smirnoff 	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
45509542ea7bSGleb Smirnoff 
45519542ea7bSGleb Smirnoff 	if (BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
45529542ea7bSGleb Smirnoff 		panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
45539542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
45549542ea7bSGleb Smirnoff 	BIT_SET_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
45559542ea7bSGleb Smirnoff 
45569542ea7bSGleb Smirnoff 	return;
45579542ea7bSGleb Smirnoff }
45589542ea7bSGleb Smirnoff 
45599542ea7bSGleb Smirnoff /*
45609542ea7bSGleb Smirnoff  * Verifies freed addresses.  Checks for alignment, valid slab membership
45619542ea7bSGleb Smirnoff  * and duplicate frees.
45629542ea7bSGleb Smirnoff  *
45639542ea7bSGleb Smirnoff  */
45649542ea7bSGleb Smirnoff static void
45659542ea7bSGleb Smirnoff uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
45669542ea7bSGleb Smirnoff {
45679542ea7bSGleb Smirnoff 	uma_keg_t keg;
45689542ea7bSGleb Smirnoff 	int freei;
45699542ea7bSGleb Smirnoff 
45709542ea7bSGleb Smirnoff 	if (slab == NULL) {
45719542ea7bSGleb Smirnoff 		slab = uma_dbg_getslab(zone, item);
45729542ea7bSGleb Smirnoff 		if (slab == NULL)
45739542ea7bSGleb Smirnoff 			panic("uma: Freed item %p did not belong to zone %s\n",
45749542ea7bSGleb Smirnoff 			    item, zone->uz_name);
45759542ea7bSGleb Smirnoff 	}
45769542ea7bSGleb Smirnoff 	keg = slab->us_keg;
45779542ea7bSGleb Smirnoff 	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
45789542ea7bSGleb Smirnoff 
45799542ea7bSGleb Smirnoff 	if (freei >= keg->uk_ipers)
45809542ea7bSGleb Smirnoff 		panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
45819542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
45829542ea7bSGleb Smirnoff 
45839542ea7bSGleb Smirnoff 	if (((freei * keg->uk_rsize) + slab->us_data) != item)
45849542ea7bSGleb Smirnoff 		panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
45859542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
45869542ea7bSGleb Smirnoff 
45879542ea7bSGleb Smirnoff 	if (!BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
45889542ea7bSGleb Smirnoff 		panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
45899542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
45909542ea7bSGleb Smirnoff 
45919542ea7bSGleb Smirnoff 	BIT_CLR_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
45929542ea7bSGleb Smirnoff }
45939542ea7bSGleb Smirnoff #endif /* INVARIANTS */
45949542ea7bSGleb Smirnoff 
459548c5777eSRobert Watson #ifdef DDB
459646d70077SConrad Meyer static int64_t
459746d70077SConrad Meyer get_uma_stats(uma_keg_t kz, uma_zone_t z, uint64_t *allocs, uint64_t *used,
45980223790fSConrad Meyer     uint64_t *sleeps, long *cachefree, uint64_t *xdomain)
459948c5777eSRobert Watson {
460046d70077SConrad Meyer 	uint64_t frees;
46010f9b7bf3SMark Johnston 	int i;
460248c5777eSRobert Watson 
460348c5777eSRobert Watson 	if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
460446d70077SConrad Meyer 		*allocs = counter_u64_fetch(z->uz_allocs);
46052efcc8cbSGleb Smirnoff 		frees = counter_u64_fetch(z->uz_frees);
460646d70077SConrad Meyer 		*sleeps = z->uz_sleeps;
460746d70077SConrad Meyer 		*cachefree = 0;
460846d70077SConrad Meyer 		*xdomain = 0;
460948c5777eSRobert Watson 	} else
461046d70077SConrad Meyer 		uma_zone_sumstat(z, cachefree, allocs, &frees, sleeps,
461146d70077SConrad Meyer 		    xdomain);
4612e20a199fSJeff Roberson 	if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
461348c5777eSRobert Watson 	    (LIST_FIRST(&kz->uk_zones) != z)))
461446d70077SConrad Meyer 		*cachefree += kz->uk_free;
46150f9b7bf3SMark Johnston 	for (i = 0; i < vm_ndomains; i++)
461646d70077SConrad Meyer 		*cachefree += z->uz_domain[i].uzd_nitems;
461746d70077SConrad Meyer 	*used = *allocs - frees;
461846d70077SConrad Meyer 	return (((int64_t)*used + *cachefree) * kz->uk_size);
461946d70077SConrad Meyer }
46200f9b7bf3SMark Johnston 
462146d70077SConrad Meyer DB_SHOW_COMMAND(uma, db_show_uma)
462246d70077SConrad Meyer {
462346d70077SConrad Meyer 	const char *fmt_hdr, *fmt_entry;
462446d70077SConrad Meyer 	uma_keg_t kz;
462546d70077SConrad Meyer 	uma_zone_t z;
462646d70077SConrad Meyer 	uint64_t allocs, used, sleeps, xdomain;
462746d70077SConrad Meyer 	long cachefree;
462846d70077SConrad Meyer 	/* variables for sorting */
462946d70077SConrad Meyer 	uma_keg_t cur_keg;
463046d70077SConrad Meyer 	uma_zone_t cur_zone, last_zone;
463146d70077SConrad Meyer 	int64_t cur_size, last_size, size;
463246d70077SConrad Meyer 	int ties;
463346d70077SConrad Meyer 
463446d70077SConrad Meyer 	/* /i option produces machine-parseable CSV output */
463546d70077SConrad Meyer 	if (modif[0] == 'i') {
463646d70077SConrad Meyer 		fmt_hdr = "%s,%s,%s,%s,%s,%s,%s,%s,%s\n";
463746d70077SConrad Meyer 		fmt_entry = "\"%s\",%ju,%jd,%ld,%ju,%ju,%u,%jd,%ju\n";
463846d70077SConrad Meyer 	} else {
463946d70077SConrad Meyer 		fmt_hdr = "%18s %6s %7s %7s %11s %7s %7s %10s %8s\n";
464046d70077SConrad Meyer 		fmt_entry = "%18s %6ju %7jd %7ld %11ju %7ju %7u %10jd %8ju\n";
464146d70077SConrad Meyer 	}
464246d70077SConrad Meyer 
464346d70077SConrad Meyer 	db_printf(fmt_hdr, "Zone", "Size", "Used", "Free", "Requests",
464446d70077SConrad Meyer 	    "Sleeps", "Bucket", "Total Mem", "XFree");
464546d70077SConrad Meyer 
464646d70077SConrad Meyer 	/* Sort the zones with largest size first. */
464746d70077SConrad Meyer 	last_zone = NULL;
464846d70077SConrad Meyer 	last_size = INT64_MAX;
464946d70077SConrad Meyer 	for (;;) {
465046d70077SConrad Meyer 		cur_zone = NULL;
465146d70077SConrad Meyer 		cur_size = -1;
465246d70077SConrad Meyer 		ties = 0;
465346d70077SConrad Meyer 		LIST_FOREACH(kz, &uma_kegs, uk_link) {
465446d70077SConrad Meyer 			LIST_FOREACH(z, &kz->uk_zones, uz_link) {
465546d70077SConrad Meyer 				/*
465646d70077SConrad Meyer 				 * In the case of size ties, print out zones
465746d70077SConrad Meyer 				 * in the order they are encountered.  That is,
465846d70077SConrad Meyer 				 * when we encounter the most recently output
465946d70077SConrad Meyer 				 * zone, we have already printed all preceding
466046d70077SConrad Meyer 				 * ties, and we must print all following ties.
466146d70077SConrad Meyer 				 */
466246d70077SConrad Meyer 				if (z == last_zone) {
466346d70077SConrad Meyer 					ties = 1;
466446d70077SConrad Meyer 					continue;
466546d70077SConrad Meyer 				}
466646d70077SConrad Meyer 				size = get_uma_stats(kz, z, &allocs, &used,
466746d70077SConrad Meyer 				    &sleeps, &cachefree, &xdomain);
466846d70077SConrad Meyer 				if (size > cur_size && size < last_size + ties)
466946d70077SConrad Meyer 				{
467046d70077SConrad Meyer 					cur_size = size;
467146d70077SConrad Meyer 					cur_zone = z;
467246d70077SConrad Meyer 					cur_keg = kz;
467346d70077SConrad Meyer 				}
467446d70077SConrad Meyer 			}
467546d70077SConrad Meyer 		}
467646d70077SConrad Meyer 		if (cur_zone == NULL)
467746d70077SConrad Meyer 			break;
467846d70077SConrad Meyer 
467946d70077SConrad Meyer 		size = get_uma_stats(cur_keg, cur_zone, &allocs, &used,
468046d70077SConrad Meyer 		    &sleeps, &cachefree, &xdomain);
468146d70077SConrad Meyer 		db_printf(fmt_entry, cur_zone->uz_name,
468246d70077SConrad Meyer 		    (uintmax_t)cur_keg->uk_size, (intmax_t)used, cachefree,
468346d70077SConrad Meyer 		    (uintmax_t)allocs, (uintmax_t)sleeps,
4684*20a4e154SJeff Roberson 		    (unsigned)cur_zone->uz_bucket_size, (intmax_t)size,
4685*20a4e154SJeff Roberson 		    xdomain);
468646d70077SConrad Meyer 
4687687c94aaSJohn Baldwin 		if (db_pager_quit)
4688687c94aaSJohn Baldwin 			return;
468946d70077SConrad Meyer 		last_zone = cur_zone;
469046d70077SConrad Meyer 		last_size = cur_size;
469148c5777eSRobert Watson 	}
469248c5777eSRobert Watson }
469303175483SAlexander Motin 
469403175483SAlexander Motin DB_SHOW_COMMAND(umacache, db_show_umacache)
469503175483SAlexander Motin {
469603175483SAlexander Motin 	uma_zone_t z;
4697ab3185d1SJeff Roberson 	uint64_t allocs, frees;
46980f9b7bf3SMark Johnston 	long cachefree;
46990f9b7bf3SMark Johnston 	int i;
470003175483SAlexander Motin 
470103175483SAlexander Motin 	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
470203175483SAlexander Motin 	    "Requests", "Bucket");
470303175483SAlexander Motin 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
4704c1685086SJeff Roberson 		uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL, NULL);
47050f9b7bf3SMark Johnston 		for (i = 0; i < vm_ndomains; i++)
47060f9b7bf3SMark Johnston 			cachefree += z->uz_domain[i].uzd_nitems;
47070f9b7bf3SMark Johnston 		db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
470803175483SAlexander Motin 		    z->uz_name, (uintmax_t)z->uz_size,
470903175483SAlexander Motin 		    (intmax_t)(allocs - frees), cachefree,
4710*20a4e154SJeff Roberson 		    (uintmax_t)allocs, z->uz_bucket_size);
471103175483SAlexander Motin 		if (db_pager_quit)
471203175483SAlexander Motin 			return;
471303175483SAlexander Motin 	}
471403175483SAlexander Motin }
47159542ea7bSGleb Smirnoff #endif	/* DDB */
4716