xref: /freebsd/sys/vm/uma_core.c (revision 08cfa56ea35b32be96f439c74fe6677653401e6e)
160727d8bSWarner Losh /*-
2fe267a55SPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3fe267a55SPedro F. Giffuni  *
4ef72505eSJeff Roberson  * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
508ecce74SRobert Watson  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6ae4e9636SRobert Watson  * Copyright (c) 2004-2006 Robert N. M. Watson
708ecce74SRobert Watson  * All rights reserved.
88355f576SJeff Roberson  *
98355f576SJeff Roberson  * Redistribution and use in source and binary forms, with or without
108355f576SJeff Roberson  * modification, are permitted provided that the following conditions
118355f576SJeff Roberson  * are met:
128355f576SJeff Roberson  * 1. Redistributions of source code must retain the above copyright
138355f576SJeff Roberson  *    notice unmodified, this list of conditions, and the following
148355f576SJeff Roberson  *    disclaimer.
158355f576SJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
168355f576SJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
178355f576SJeff Roberson  *    documentation and/or other materials provided with the distribution.
188355f576SJeff Roberson  *
198355f576SJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
208355f576SJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
218355f576SJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
228355f576SJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
238355f576SJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
248355f576SJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
258355f576SJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
268355f576SJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
278355f576SJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
288355f576SJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
298355f576SJeff Roberson  */
308355f576SJeff Roberson 
318355f576SJeff Roberson /*
328355f576SJeff Roberson  * uma_core.c  Implementation of the Universal Memory allocator
338355f576SJeff Roberson  *
348355f576SJeff Roberson  * This allocator is intended to replace the multitude of similar object caches
358355f576SJeff Roberson  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
36763df3ecSPedro F. Giffuni  * efficient.  A primary design goal is to return unused memory to the rest of
378355f576SJeff Roberson  * the system.  This will make the system as a whole more flexible due to the
388355f576SJeff Roberson  * ability to move memory to subsystems which most need it instead of leaving
398355f576SJeff Roberson  * pools of reserved memory unused.
408355f576SJeff Roberson  *
418355f576SJeff Roberson  * The basic ideas stem from similar slab/zone based allocators whose algorithms
428355f576SJeff Roberson  * are well known.
438355f576SJeff Roberson  *
448355f576SJeff Roberson  */
458355f576SJeff Roberson 
468355f576SJeff Roberson /*
478355f576SJeff Roberson  * TODO:
488355f576SJeff Roberson  *	- Improve memory usage for large allocations
498355f576SJeff Roberson  *	- Investigate cache size adjustments
508355f576SJeff Roberson  */
518355f576SJeff Roberson 
52874651b1SDavid E. O'Brien #include <sys/cdefs.h>
53874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$");
54874651b1SDavid E. O'Brien 
5548c5777eSRobert Watson #include "opt_ddb.h"
568355f576SJeff Roberson #include "opt_param.h"
578d689e04SGleb Smirnoff #include "opt_vm.h"
5848c5777eSRobert Watson 
598355f576SJeff Roberson #include <sys/param.h>
608355f576SJeff Roberson #include <sys/systm.h>
61ef72505eSJeff Roberson #include <sys/bitset.h>
62194a979eSMark Johnston #include <sys/domainset.h>
639b43bc27SAndriy Gapon #include <sys/eventhandler.h>
648355f576SJeff Roberson #include <sys/kernel.h>
658355f576SJeff Roberson #include <sys/types.h>
66ad5b0f5bSJeff Roberson #include <sys/limits.h>
678355f576SJeff Roberson #include <sys/queue.h>
688355f576SJeff Roberson #include <sys/malloc.h>
693659f747SRobert Watson #include <sys/ktr.h>
708355f576SJeff Roberson #include <sys/lock.h>
718355f576SJeff Roberson #include <sys/sysctl.h>
728355f576SJeff Roberson #include <sys/mutex.h>
734c1cc01cSJohn Baldwin #include <sys/proc.h>
7410cb2424SMark Murray #include <sys/random.h>
7589f6b863SAttilio Rao #include <sys/rwlock.h>
767a52a97eSRobert Watson #include <sys/sbuf.h>
77a2de44abSAlexander Motin #include <sys/sched.h>
788355f576SJeff Roberson #include <sys/smp.h>
79e60b2fcbSGleb Smirnoff #include <sys/taskqueue.h>
8086bbae32SJeff Roberson #include <sys/vmmeter.h>
8186bbae32SJeff Roberson 
828355f576SJeff Roberson #include <vm/vm.h>
83194a979eSMark Johnston #include <vm/vm_domainset.h>
848355f576SJeff Roberson #include <vm/vm_object.h>
858355f576SJeff Roberson #include <vm/vm_page.h>
86a4915c21SAttilio Rao #include <vm/vm_pageout.h>
878355f576SJeff Roberson #include <vm/vm_param.h>
88ab3185d1SJeff Roberson #include <vm/vm_phys.h>
8930c5525bSAndrew Gallatin #include <vm/vm_pagequeue.h>
908355f576SJeff Roberson #include <vm/vm_map.h>
918355f576SJeff Roberson #include <vm/vm_kern.h>
928355f576SJeff Roberson #include <vm/vm_extern.h>
938355f576SJeff Roberson #include <vm/uma.h>
948355f576SJeff Roberson #include <vm/uma_int.h>
95639c9550SJeff Roberson #include <vm/uma_dbg.h>
968355f576SJeff Roberson 
9748c5777eSRobert Watson #include <ddb/ddb.h>
9848c5777eSRobert Watson 
998d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
1008d689e04SGleb Smirnoff #include <vm/memguard.h>
1018d689e04SGleb Smirnoff #endif
1028d689e04SGleb Smirnoff 
1038355f576SJeff Roberson /*
104ab3185d1SJeff Roberson  * This is the zone and keg from which all zones are spawned.
1058355f576SJeff Roberson  */
106ab3185d1SJeff Roberson static uma_zone_t kegs;
107ab3185d1SJeff Roberson static uma_zone_t zones;
1088355f576SJeff Roberson 
109ab3185d1SJeff Roberson /* This is the zone from which all offpage uma_slab_ts are allocated. */
1108355f576SJeff Roberson static uma_zone_t slabzone;
1118355f576SJeff Roberson 
1128355f576SJeff Roberson /*
1138355f576SJeff Roberson  * The initial hash tables come out of this zone so they can be allocated
1148355f576SJeff Roberson  * prior to malloc coming up.
1158355f576SJeff Roberson  */
1168355f576SJeff Roberson static uma_zone_t hashzone;
1178355f576SJeff Roberson 
1181e319f6dSRobert Watson /* The boot-time adjusted value for cache line alignment. */
119e4cd31ddSJeff Roberson int uma_align_cache = 64 - 1;
1201e319f6dSRobert Watson 
121961647dfSJeff Roberson static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
122961647dfSJeff Roberson 
1238355f576SJeff Roberson /*
12486bbae32SJeff Roberson  * Are we allowed to allocate buckets?
12586bbae32SJeff Roberson  */
12686bbae32SJeff Roberson static int bucketdisable = 1;
12786bbae32SJeff Roberson 
128099a0e58SBosko Milekic /* Linked list of all kegs in the system */
12913e403fdSAntoine Brodin static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
1308355f576SJeff Roberson 
13103175483SAlexander Motin /* Linked list of all cache-only zones in the system */
13203175483SAlexander Motin static LIST_HEAD(,uma_zone) uma_cachezones =
13303175483SAlexander Motin     LIST_HEAD_INITIALIZER(uma_cachezones);
13403175483SAlexander Motin 
135111fbcd5SBryan Venteicher /* This RW lock protects the keg list */
136fe933c1dSMateusz Guzik static struct rwlock_padalign __exclusive_cache_line uma_rwlock;
1378355f576SJeff Roberson 
138ac0a6fd0SGleb Smirnoff /*
139ac0a6fd0SGleb Smirnoff  * Pointer and counter to pool of pages, that is preallocated at
140f7d35785SGleb Smirnoff  * startup to bootstrap UMA.
141ac0a6fd0SGleb Smirnoff  */
142ac0a6fd0SGleb Smirnoff static char *bootmem;
143ac0a6fd0SGleb Smirnoff static int boot_pages;
1448355f576SJeff Roberson 
145*08cfa56eSMark Johnston static struct sx uma_reclaim_lock;
14695c4bf75SKonstantin Belousov 
147fbd95859SMark Johnston /*
148fbd95859SMark Johnston  * kmem soft limit, initialized by uma_set_limit().  Ensure that early
149fbd95859SMark Johnston  * allocations don't trigger a wakeup of the reclaim thread.
150fbd95859SMark Johnston  */
151ad5b0f5bSJeff Roberson static unsigned long uma_kmem_limit = LONG_MAX;
152fbd95859SMark Johnston SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_limit, CTLFLAG_RD, &uma_kmem_limit, 0,
153fbd95859SMark Johnston     "UMA kernel memory soft limit");
154058f0f74SMark Johnston static unsigned long uma_kmem_total;
155fbd95859SMark Johnston SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD, &uma_kmem_total, 0,
156fbd95859SMark Johnston     "UMA kernel memory usage");
1572e47807cSJeff Roberson 
1588355f576SJeff Roberson /* Is the VM done starting up? */
159f4bef67cSGleb Smirnoff static enum { BOOT_COLD = 0, BOOT_STRAPPED, BOOT_PAGEALLOC, BOOT_BUCKETS,
160f4bef67cSGleb Smirnoff     BOOT_RUNNING } booted = BOOT_COLD;
1618355f576SJeff Roberson 
162ef72505eSJeff Roberson /*
1639643769aSJeff Roberson  * This is the handle used to schedule events that need to happen
1649643769aSJeff Roberson  * outside of the allocation fast path.
1659643769aSJeff Roberson  */
1668355f576SJeff Roberson static struct callout uma_callout;
1679643769aSJeff Roberson #define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
1688355f576SJeff Roberson 
1698355f576SJeff Roberson /*
1708355f576SJeff Roberson  * This structure is passed as the zone ctor arg so that I don't have to create
1718355f576SJeff Roberson  * a special allocation function just for zones.
1728355f576SJeff Roberson  */
1738355f576SJeff Roberson struct uma_zctor_args {
174bb196eb4SMatthew D Fleming 	const char *name;
175c3bdc05fSAndrew R. Reiter 	size_t size;
1768355f576SJeff Roberson 	uma_ctor ctor;
1778355f576SJeff Roberson 	uma_dtor dtor;
1788355f576SJeff Roberson 	uma_init uminit;
1798355f576SJeff Roberson 	uma_fini fini;
1800095a784SJeff Roberson 	uma_import import;
1810095a784SJeff Roberson 	uma_release release;
1820095a784SJeff Roberson 	void *arg;
183099a0e58SBosko Milekic 	uma_keg_t keg;
184099a0e58SBosko Milekic 	int align;
18585dcf349SGleb Smirnoff 	uint32_t flags;
186099a0e58SBosko Milekic };
187099a0e58SBosko Milekic 
188099a0e58SBosko Milekic struct uma_kctor_args {
189099a0e58SBosko Milekic 	uma_zone_t zone;
190099a0e58SBosko Milekic 	size_t size;
191099a0e58SBosko Milekic 	uma_init uminit;
192099a0e58SBosko Milekic 	uma_fini fini;
1938355f576SJeff Roberson 	int align;
19485dcf349SGleb Smirnoff 	uint32_t flags;
1958355f576SJeff Roberson };
1968355f576SJeff Roberson 
197cae33c14SJeff Roberson struct uma_bucket_zone {
198cae33c14SJeff Roberson 	uma_zone_t	ubz_zone;
199cae33c14SJeff Roberson 	char		*ubz_name;
200fc03d22bSJeff Roberson 	int		ubz_entries;	/* Number of items it can hold. */
201fc03d22bSJeff Roberson 	int		ubz_maxsize;	/* Maximum allocation size per-item. */
202cae33c14SJeff Roberson };
203cae33c14SJeff Roberson 
204f9d27e75SRobert Watson /*
205fc03d22bSJeff Roberson  * Compute the actual number of bucket entries to pack them in power
206fc03d22bSJeff Roberson  * of two sizes for more efficient space utilization.
207f9d27e75SRobert Watson  */
208fc03d22bSJeff Roberson #define	BUCKET_SIZE(n)						\
209fc03d22bSJeff Roberson     (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
210fc03d22bSJeff Roberson 
2111aa6c758SAlexander Motin #define	BUCKET_MAX	BUCKET_SIZE(256)
212eda1b016SJeff Roberson #define	BUCKET_MIN	BUCKET_SIZE(4)
213fc03d22bSJeff Roberson 
214fc03d22bSJeff Roberson struct uma_bucket_zone bucket_zones[] = {
2156fd34d6fSJeff Roberson 	{ NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
216f3932e90SAlexander Motin 	{ NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
2176fd34d6fSJeff Roberson 	{ NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
218f3932e90SAlexander Motin 	{ NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
2196fd34d6fSJeff Roberson 	{ NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
220fc03d22bSJeff Roberson 	{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
221fc03d22bSJeff Roberson 	{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
222fc03d22bSJeff Roberson 	{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
2231aa6c758SAlexander Motin 	{ NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
224fc03d22bSJeff Roberson 	{ NULL, NULL, 0}
225fc03d22bSJeff Roberson };
226cae33c14SJeff Roberson 
2272019094aSRobert Watson /*
2282019094aSRobert Watson  * Flags and enumerations to be passed to internal functions.
2292019094aSRobert Watson  */
230bb15d1c7SGleb Smirnoff enum zfreeskip {
231bb15d1c7SGleb Smirnoff 	SKIP_NONE =	0,
232bb15d1c7SGleb Smirnoff 	SKIP_CNT =	0x00000001,
233bb15d1c7SGleb Smirnoff 	SKIP_DTOR =	0x00010000,
234bb15d1c7SGleb Smirnoff 	SKIP_FINI =	0x00020000,
235bb15d1c7SGleb Smirnoff };
236b23f72e9SBrian Feldman 
2378355f576SJeff Roberson /* Prototypes.. */
2388355f576SJeff Roberson 
239f4bef67cSGleb Smirnoff int	uma_startup_count(int);
240f4bef67cSGleb Smirnoff void	uma_startup(void *, int);
241f4bef67cSGleb Smirnoff void	uma_startup1(void);
242f4bef67cSGleb Smirnoff void	uma_startup2(void);
243f4bef67cSGleb Smirnoff 
244ab3185d1SJeff Roberson static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
245ab3185d1SJeff Roberson static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
246ab3059a8SMatt Macy static void *pcpu_page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
247ab3185d1SJeff Roberson static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
248f2c2231eSRyan Stone static void page_free(void *, vm_size_t, uint8_t);
249ab3059a8SMatt Macy static void pcpu_page_free(void *, vm_size_t, uint8_t);
25086220393SMark Johnston static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int);
2519643769aSJeff Roberson static void cache_drain(uma_zone_t);
2528355f576SJeff Roberson static void bucket_drain(uma_zone_t, uma_bucket_t);
253*08cfa56eSMark Johnston static void bucket_cache_reclaim(uma_zone_t zone, bool);
254b23f72e9SBrian Feldman static int keg_ctor(void *, int, void *, int);
255099a0e58SBosko Milekic static void keg_dtor(void *, int, void *);
256b23f72e9SBrian Feldman static int zone_ctor(void *, int, void *, int);
2579c2cd7e5SJeff Roberson static void zone_dtor(void *, int, void *);
258b23f72e9SBrian Feldman static int zero_init(void *, int, int);
259e20a199fSJeff Roberson static void keg_small_init(uma_keg_t keg);
260e20a199fSJeff Roberson static void keg_large_init(uma_keg_t keg);
2618355f576SJeff Roberson static void zone_foreach(void (*zfunc)(uma_zone_t));
2628355f576SJeff Roberson static void zone_timeout(uma_zone_t zone);
2633b2f2cb8SAlexander Motin static int hash_alloc(struct uma_hash *, u_int);
2640aef6126SJeff Roberson static int hash_expand(struct uma_hash *, struct uma_hash *);
2650aef6126SJeff Roberson static void hash_free(struct uma_hash *hash);
2668355f576SJeff Roberson static void uma_timeout(void *);
2678355f576SJeff Roberson static void uma_startup3(void);
268ab3185d1SJeff Roberson static void *zone_alloc_item(uma_zone_t, void *, int, int);
269bb15d1c7SGleb Smirnoff static void *zone_alloc_item_locked(uma_zone_t, void *, int, int);
2700095a784SJeff Roberson static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
27186bbae32SJeff Roberson static void bucket_enable(void);
272cae33c14SJeff Roberson static void bucket_init(void);
2736fd34d6fSJeff Roberson static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
2746fd34d6fSJeff Roberson static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
275cae33c14SJeff Roberson static void bucket_zone_drain(void);
276bb15d1c7SGleb Smirnoff static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int, int);
277ab3185d1SJeff Roberson static uma_slab_t zone_fetch_slab(uma_zone_t, uma_keg_t, int, int);
2780095a784SJeff Roberson static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
279bb15d1c7SGleb Smirnoff static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item);
280e20a199fSJeff Roberson static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
28185dcf349SGleb Smirnoff     uma_fini fini, int align, uint32_t flags);
282ab3185d1SJeff Roberson static int zone_import(uma_zone_t, void **, int, int, int);
283ab3185d1SJeff Roberson static void zone_release(uma_zone_t, void **, int);
284ab3185d1SJeff Roberson static void uma_zero_item(void *, uma_zone_t);
285bbee39c6SJeff Roberson 
2868355f576SJeff Roberson void uma_print_zone(uma_zone_t);
2878355f576SJeff Roberson void uma_print_stats(void);
2887a52a97eSRobert Watson static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
2897a52a97eSRobert Watson static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
2908355f576SJeff Roberson 
2919542ea7bSGleb Smirnoff #ifdef INVARIANTS
292c5deaf04SGleb Smirnoff static bool uma_dbg_kskip(uma_keg_t keg, void *mem);
293c5deaf04SGleb Smirnoff static bool uma_dbg_zskip(uma_zone_t zone, void *mem);
2949542ea7bSGleb Smirnoff static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
2959542ea7bSGleb Smirnoff static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
296c5deaf04SGleb Smirnoff 
297c5deaf04SGleb Smirnoff static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD, 0,
298c5deaf04SGleb Smirnoff     "Memory allocation debugging");
299c5deaf04SGleb Smirnoff 
300c5deaf04SGleb Smirnoff static u_int dbg_divisor = 1;
301c5deaf04SGleb Smirnoff SYSCTL_UINT(_vm_debug, OID_AUTO, divisor,
302c5deaf04SGleb Smirnoff     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0,
303c5deaf04SGleb Smirnoff     "Debug & thrash every this item in memory allocator");
304c5deaf04SGleb Smirnoff 
305c5deaf04SGleb Smirnoff static counter_u64_t uma_dbg_cnt = EARLY_COUNTER;
306c5deaf04SGleb Smirnoff static counter_u64_t uma_skip_cnt = EARLY_COUNTER;
307c5deaf04SGleb Smirnoff SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD,
308c5deaf04SGleb Smirnoff     &uma_dbg_cnt, "memory items debugged");
309c5deaf04SGleb Smirnoff SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD,
310c5deaf04SGleb Smirnoff     &uma_skip_cnt, "memory items skipped, not debugged");
3119542ea7bSGleb Smirnoff #endif
3129542ea7bSGleb Smirnoff 
3138355f576SJeff Roberson SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
3148355f576SJeff Roberson 
3157a52a97eSRobert Watson SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
3167a52a97eSRobert Watson     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
3177a52a97eSRobert Watson 
3187a52a97eSRobert Watson SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
3197a52a97eSRobert Watson     0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
3207a52a97eSRobert Watson 
3212f891cd5SPawel Jakub Dawidek static int zone_warnings = 1;
322af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
3232f891cd5SPawel Jakub Dawidek     "Warn when UMA zones becomes full");
3242f891cd5SPawel Jakub Dawidek 
3252e47807cSJeff Roberson /* Adjust bytes under management by UMA. */
3262e47807cSJeff Roberson static inline void
3272e47807cSJeff Roberson uma_total_dec(unsigned long size)
3282e47807cSJeff Roberson {
3292e47807cSJeff Roberson 
3302e47807cSJeff Roberson 	atomic_subtract_long(&uma_kmem_total, size);
3312e47807cSJeff Roberson }
3322e47807cSJeff Roberson 
3332e47807cSJeff Roberson static inline void
3342e47807cSJeff Roberson uma_total_inc(unsigned long size)
3352e47807cSJeff Roberson {
3362e47807cSJeff Roberson 
3372e47807cSJeff Roberson 	if (atomic_fetchadd_long(&uma_kmem_total, size) > uma_kmem_limit)
3382e47807cSJeff Roberson 		uma_reclaim_wakeup();
3392e47807cSJeff Roberson }
3402e47807cSJeff Roberson 
34186bbae32SJeff Roberson /*
34286bbae32SJeff Roberson  * This routine checks to see whether or not it's safe to enable buckets.
34386bbae32SJeff Roberson  */
34486bbae32SJeff Roberson static void
34586bbae32SJeff Roberson bucket_enable(void)
34686bbae32SJeff Roberson {
347251386b4SMaksim Yevmenkin 	bucketdisable = vm_page_count_min();
34886bbae32SJeff Roberson }
34986bbae32SJeff Roberson 
350dc2c7965SRobert Watson /*
351dc2c7965SRobert Watson  * Initialize bucket_zones, the array of zones of buckets of various sizes.
352dc2c7965SRobert Watson  *
353dc2c7965SRobert Watson  * For each zone, calculate the memory required for each bucket, consisting
354fc03d22bSJeff Roberson  * of the header and an array of pointers.
355dc2c7965SRobert Watson  */
356cae33c14SJeff Roberson static void
357cae33c14SJeff Roberson bucket_init(void)
358cae33c14SJeff Roberson {
359cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
360cae33c14SJeff Roberson 	int size;
361cae33c14SJeff Roberson 
362d74e6a1dSAlan Cox 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
363cae33c14SJeff Roberson 		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
364cae33c14SJeff Roberson 		size += sizeof(void *) * ubz->ubz_entries;
365cae33c14SJeff Roberson 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
366e20a199fSJeff Roberson 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
367ab3185d1SJeff Roberson 		    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET | UMA_ZONE_NUMA);
368cae33c14SJeff Roberson 	}
369cae33c14SJeff Roberson }
370cae33c14SJeff Roberson 
371dc2c7965SRobert Watson /*
372dc2c7965SRobert Watson  * Given a desired number of entries for a bucket, return the zone from which
373dc2c7965SRobert Watson  * to allocate the bucket.
374dc2c7965SRobert Watson  */
375dc2c7965SRobert Watson static struct uma_bucket_zone *
376dc2c7965SRobert Watson bucket_zone_lookup(int entries)
377dc2c7965SRobert Watson {
378fc03d22bSJeff Roberson 	struct uma_bucket_zone *ubz;
379dc2c7965SRobert Watson 
380fc03d22bSJeff Roberson 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
381fc03d22bSJeff Roberson 		if (ubz->ubz_entries >= entries)
382fc03d22bSJeff Roberson 			return (ubz);
383fc03d22bSJeff Roberson 	ubz--;
384fc03d22bSJeff Roberson 	return (ubz);
385fc03d22bSJeff Roberson }
386fc03d22bSJeff Roberson 
387fc03d22bSJeff Roberson static int
388fc03d22bSJeff Roberson bucket_select(int size)
389fc03d22bSJeff Roberson {
390fc03d22bSJeff Roberson 	struct uma_bucket_zone *ubz;
391fc03d22bSJeff Roberson 
392fc03d22bSJeff Roberson 	ubz = &bucket_zones[0];
393fc03d22bSJeff Roberson 	if (size > ubz->ubz_maxsize)
394fc03d22bSJeff Roberson 		return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
395fc03d22bSJeff Roberson 
396fc03d22bSJeff Roberson 	for (; ubz->ubz_entries != 0; ubz++)
397fc03d22bSJeff Roberson 		if (ubz->ubz_maxsize < size)
398fc03d22bSJeff Roberson 			break;
399fc03d22bSJeff Roberson 	ubz--;
400fc03d22bSJeff Roberson 	return (ubz->ubz_entries);
401dc2c7965SRobert Watson }
402dc2c7965SRobert Watson 
403cae33c14SJeff Roberson static uma_bucket_t
4046fd34d6fSJeff Roberson bucket_alloc(uma_zone_t zone, void *udata, int flags)
405cae33c14SJeff Roberson {
406cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
407cae33c14SJeff Roberson 	uma_bucket_t bucket;
408cae33c14SJeff Roberson 
409cae33c14SJeff Roberson 	/*
410cae33c14SJeff Roberson 	 * This is to stop us from allocating per cpu buckets while we're
4113803b26bSDag-Erling Smørgrav 	 * running out of vm.boot_pages.  Otherwise, we would exhaust the
412cae33c14SJeff Roberson 	 * boot pages.  This also prevents us from allocating buckets in
413cae33c14SJeff Roberson 	 * low memory situations.
414cae33c14SJeff Roberson 	 */
415cae33c14SJeff Roberson 	if (bucketdisable)
416cae33c14SJeff Roberson 		return (NULL);
4176fd34d6fSJeff Roberson 	/*
4186fd34d6fSJeff Roberson 	 * To limit bucket recursion we store the original zone flags
4196fd34d6fSJeff Roberson 	 * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
4206fd34d6fSJeff Roberson 	 * NOVM flag to persist even through deep recursions.  We also
4216fd34d6fSJeff Roberson 	 * store ZFLAG_BUCKET once we have recursed attempting to allocate
4226fd34d6fSJeff Roberson 	 * a bucket for a bucket zone so we do not allow infinite bucket
4236fd34d6fSJeff Roberson 	 * recursion.  This cookie will even persist to frees of unused
4246fd34d6fSJeff Roberson 	 * buckets via the allocation path or bucket allocations in the
4256fd34d6fSJeff Roberson 	 * free path.
4266fd34d6fSJeff Roberson 	 */
4276fd34d6fSJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
4286fd34d6fSJeff Roberson 		udata = (void *)(uintptr_t)zone->uz_flags;
429e8a720feSAlexander Motin 	else {
430e8a720feSAlexander Motin 		if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
431e8a720feSAlexander Motin 			return (NULL);
4326fd34d6fSJeff Roberson 		udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
433e8a720feSAlexander Motin 	}
4346fd34d6fSJeff Roberson 	if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
435af526374SJeff Roberson 		flags |= M_NOVM;
436af526374SJeff Roberson 	ubz = bucket_zone_lookup(zone->uz_count);
43720d3ab87SAlexander Motin 	if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
43820d3ab87SAlexander Motin 		ubz++;
4396fd34d6fSJeff Roberson 	bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
440cae33c14SJeff Roberson 	if (bucket) {
441cae33c14SJeff Roberson #ifdef INVARIANTS
442cae33c14SJeff Roberson 		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
443cae33c14SJeff Roberson #endif
444cae33c14SJeff Roberson 		bucket->ub_cnt = 0;
445cae33c14SJeff Roberson 		bucket->ub_entries = ubz->ubz_entries;
446cae33c14SJeff Roberson 	}
447cae33c14SJeff Roberson 
448cae33c14SJeff Roberson 	return (bucket);
449cae33c14SJeff Roberson }
450cae33c14SJeff Roberson 
451cae33c14SJeff Roberson static void
4526fd34d6fSJeff Roberson bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
453cae33c14SJeff Roberson {
454cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
455cae33c14SJeff Roberson 
456fc03d22bSJeff Roberson 	KASSERT(bucket->ub_cnt == 0,
457fc03d22bSJeff Roberson 	    ("bucket_free: Freeing a non free bucket."));
4586fd34d6fSJeff Roberson 	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
4596fd34d6fSJeff Roberson 		udata = (void *)(uintptr_t)zone->uz_flags;
460dc2c7965SRobert Watson 	ubz = bucket_zone_lookup(bucket->ub_entries);
4616fd34d6fSJeff Roberson 	uma_zfree_arg(ubz->ubz_zone, bucket, udata);
462cae33c14SJeff Roberson }
463cae33c14SJeff Roberson 
464cae33c14SJeff Roberson static void
465cae33c14SJeff Roberson bucket_zone_drain(void)
466cae33c14SJeff Roberson {
467cae33c14SJeff Roberson 	struct uma_bucket_zone *ubz;
468cae33c14SJeff Roberson 
469cae33c14SJeff Roberson 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
470*08cfa56eSMark Johnston 		uma_zone_reclaim(ubz->ubz_zone, UMA_RECLAIM_DRAIN);
471cae33c14SJeff Roberson }
472cae33c14SJeff Roberson 
473*08cfa56eSMark Johnston /*
474*08cfa56eSMark Johnston  * Attempt to satisfy an allocation by retrieving a full bucket from one of the
475*08cfa56eSMark Johnston  * zone's caches.
476*08cfa56eSMark Johnston  */
4770f9b7bf3SMark Johnston static uma_bucket_t
478*08cfa56eSMark Johnston zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom)
4790f9b7bf3SMark Johnston {
4800f9b7bf3SMark Johnston 	uma_bucket_t bucket;
4810f9b7bf3SMark Johnston 
4820f9b7bf3SMark Johnston 	ZONE_LOCK_ASSERT(zone);
4830f9b7bf3SMark Johnston 
484*08cfa56eSMark Johnston 	if ((bucket = TAILQ_FIRST(&zdom->uzd_buckets)) != NULL) {
4850f9b7bf3SMark Johnston 		MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
486*08cfa56eSMark Johnston 		TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
4870f9b7bf3SMark Johnston 		zdom->uzd_nitems -= bucket->ub_cnt;
488*08cfa56eSMark Johnston 		if (zdom->uzd_imin > zdom->uzd_nitems)
4890f9b7bf3SMark Johnston 			zdom->uzd_imin = zdom->uzd_nitems;
490bb15d1c7SGleb Smirnoff 		zone->uz_bkt_count -= bucket->ub_cnt;
4910f9b7bf3SMark Johnston 	}
4920f9b7bf3SMark Johnston 	return (bucket);
4930f9b7bf3SMark Johnston }
4940f9b7bf3SMark Johnston 
495*08cfa56eSMark Johnston /*
496*08cfa56eSMark Johnston  * Insert a full bucket into the specified cache.  The "ws" parameter indicates
497*08cfa56eSMark Johnston  * whether the bucket's contents should be counted as part of the zone's working
498*08cfa56eSMark Johnston  * set.
499*08cfa56eSMark Johnston  */
5000f9b7bf3SMark Johnston static void
5010f9b7bf3SMark Johnston zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket,
5020f9b7bf3SMark Johnston     const bool ws)
5030f9b7bf3SMark Johnston {
5040f9b7bf3SMark Johnston 
5050f9b7bf3SMark Johnston 	ZONE_LOCK_ASSERT(zone);
506bb15d1c7SGleb Smirnoff 	KASSERT(zone->uz_bkt_count < zone->uz_bkt_max, ("%s: zone %p overflow",
507bb15d1c7SGleb Smirnoff 	    __func__, zone));
5080f9b7bf3SMark Johnston 
509*08cfa56eSMark Johnston 	if (ws)
510*08cfa56eSMark Johnston 		TAILQ_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
511*08cfa56eSMark Johnston 	else
512*08cfa56eSMark Johnston 		TAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link);
5130f9b7bf3SMark Johnston 	zdom->uzd_nitems += bucket->ub_cnt;
5140f9b7bf3SMark Johnston 	if (ws && zdom->uzd_imax < zdom->uzd_nitems)
5150f9b7bf3SMark Johnston 		zdom->uzd_imax = zdom->uzd_nitems;
516bb15d1c7SGleb Smirnoff 	zone->uz_bkt_count += bucket->ub_cnt;
5170f9b7bf3SMark Johnston }
5180f9b7bf3SMark Johnston 
5192f891cd5SPawel Jakub Dawidek static void
5202f891cd5SPawel Jakub Dawidek zone_log_warning(uma_zone_t zone)
5212f891cd5SPawel Jakub Dawidek {
5222f891cd5SPawel Jakub Dawidek 	static const struct timeval warninterval = { 300, 0 };
5232f891cd5SPawel Jakub Dawidek 
5242f891cd5SPawel Jakub Dawidek 	if (!zone_warnings || zone->uz_warning == NULL)
5252f891cd5SPawel Jakub Dawidek 		return;
5262f891cd5SPawel Jakub Dawidek 
5272f891cd5SPawel Jakub Dawidek 	if (ratecheck(&zone->uz_ratecheck, &warninterval))
5282f891cd5SPawel Jakub Dawidek 		printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
5292f891cd5SPawel Jakub Dawidek }
5302f891cd5SPawel Jakub Dawidek 
53154503a13SJonathan T. Looney static inline void
53254503a13SJonathan T. Looney zone_maxaction(uma_zone_t zone)
53354503a13SJonathan T. Looney {
534e60b2fcbSGleb Smirnoff 
535e60b2fcbSGleb Smirnoff 	if (zone->uz_maxaction.ta_func != NULL)
536e60b2fcbSGleb Smirnoff 		taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
53754503a13SJonathan T. Looney }
53854503a13SJonathan T. Looney 
5398355f576SJeff Roberson /*
5408355f576SJeff Roberson  * Routine called by timeout which is used to fire off some time interval
5419643769aSJeff Roberson  * based calculations.  (stats, hash size, etc.)
5428355f576SJeff Roberson  *
5438355f576SJeff Roberson  * Arguments:
5448355f576SJeff Roberson  *	arg   Unused
5458355f576SJeff Roberson  *
5468355f576SJeff Roberson  * Returns:
5478355f576SJeff Roberson  *	Nothing
5488355f576SJeff Roberson  */
5498355f576SJeff Roberson static void
5508355f576SJeff Roberson uma_timeout(void *unused)
5518355f576SJeff Roberson {
55286bbae32SJeff Roberson 	bucket_enable();
5538355f576SJeff Roberson 	zone_foreach(zone_timeout);
5548355f576SJeff Roberson 
5558355f576SJeff Roberson 	/* Reschedule this event */
5569643769aSJeff Roberson 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
5578355f576SJeff Roberson }
5588355f576SJeff Roberson 
5598355f576SJeff Roberson /*
5600f9b7bf3SMark Johnston  * Update the working set size estimate for the zone's bucket cache.
5610f9b7bf3SMark Johnston  * The constants chosen here are somewhat arbitrary.  With an update period of
5620f9b7bf3SMark Johnston  * 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the
5630f9b7bf3SMark Johnston  * last 100s.
5640f9b7bf3SMark Johnston  */
5650f9b7bf3SMark Johnston static void
5660f9b7bf3SMark Johnston zone_domain_update_wss(uma_zone_domain_t zdom)
5670f9b7bf3SMark Johnston {
5680f9b7bf3SMark Johnston 	long wss;
5690f9b7bf3SMark Johnston 
5700f9b7bf3SMark Johnston 	MPASS(zdom->uzd_imax >= zdom->uzd_imin);
5710f9b7bf3SMark Johnston 	wss = zdom->uzd_imax - zdom->uzd_imin;
5720f9b7bf3SMark Johnston 	zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
573*08cfa56eSMark Johnston 	zdom->uzd_wss = (4 * wss + zdom->uzd_wss) / 5;
5740f9b7bf3SMark Johnston }
5750f9b7bf3SMark Johnston 
5760f9b7bf3SMark Johnston /*
5779643769aSJeff Roberson  * Routine to perform timeout driven calculations.  This expands the
5789643769aSJeff Roberson  * hashes and does per cpu statistics aggregation.
5798355f576SJeff Roberson  *
580e20a199fSJeff Roberson  *  Returns nothing.
5818355f576SJeff Roberson  */
5828355f576SJeff Roberson static void
583bb15d1c7SGleb Smirnoff zone_timeout(uma_zone_t zone)
5848355f576SJeff Roberson {
585bb15d1c7SGleb Smirnoff 	uma_keg_t keg = zone->uz_keg;
5863b2f2cb8SAlexander Motin 	u_int slabs;
5878355f576SJeff Roberson 
588e20a199fSJeff Roberson 	KEG_LOCK(keg);
5898355f576SJeff Roberson 	/*
590e20a199fSJeff Roberson 	 * Expand the keg hash table.
5918355f576SJeff Roberson 	 *
5928355f576SJeff Roberson 	 * This is done if the number of slabs is larger than the hash size.
5938355f576SJeff Roberson 	 * What I'm trying to do here is completely reduce collisions.  This
5948355f576SJeff Roberson 	 * may be a little aggressive.  Should I allow for two collisions max?
5958355f576SJeff Roberson 	 */
596099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_HASH &&
5973b2f2cb8SAlexander Motin 	    (slabs = keg->uk_pages / keg->uk_ppera) >
5983b2f2cb8SAlexander Motin 	     keg->uk_hash.uh_hashsize) {
5990aef6126SJeff Roberson 		struct uma_hash newhash;
6000aef6126SJeff Roberson 		struct uma_hash oldhash;
6010aef6126SJeff Roberson 		int ret;
6025300d9ddSJeff Roberson 
6030aef6126SJeff Roberson 		/*
6040aef6126SJeff Roberson 		 * This is so involved because allocating and freeing
605e20a199fSJeff Roberson 		 * while the keg lock is held will lead to deadlock.
6060aef6126SJeff Roberson 		 * I have to do everything in stages and check for
6070aef6126SJeff Roberson 		 * races.
6080aef6126SJeff Roberson 		 */
609e20a199fSJeff Roberson 		KEG_UNLOCK(keg);
6103b2f2cb8SAlexander Motin 		ret = hash_alloc(&newhash, 1 << fls(slabs));
611e20a199fSJeff Roberson 		KEG_LOCK(keg);
6120aef6126SJeff Roberson 		if (ret) {
613099a0e58SBosko Milekic 			if (hash_expand(&keg->uk_hash, &newhash)) {
614099a0e58SBosko Milekic 				oldhash = keg->uk_hash;
615099a0e58SBosko Milekic 				keg->uk_hash = newhash;
6160aef6126SJeff Roberson 			} else
6170aef6126SJeff Roberson 				oldhash = newhash;
6180aef6126SJeff Roberson 
619e20a199fSJeff Roberson 			KEG_UNLOCK(keg);
6200aef6126SJeff Roberson 			hash_free(&oldhash);
621a1dff920SDavide Italiano 			return;
6220aef6126SJeff Roberson 		}
6235300d9ddSJeff Roberson 	}
624*08cfa56eSMark Johnston 	KEG_UNLOCK(keg);
625e20a199fSJeff Roberson 
626*08cfa56eSMark Johnston 	ZONE_LOCK(zone);
627bb15d1c7SGleb Smirnoff 	for (int i = 0; i < vm_ndomains; i++)
6280f9b7bf3SMark Johnston 		zone_domain_update_wss(&zone->uz_domain[i]);
629*08cfa56eSMark Johnston 	ZONE_UNLOCK(zone);
6308355f576SJeff Roberson }
6318355f576SJeff Roberson 
6328355f576SJeff Roberson /*
6335300d9ddSJeff Roberson  * Allocate and zero fill the next sized hash table from the appropriate
6345300d9ddSJeff Roberson  * backing store.
6355300d9ddSJeff Roberson  *
6365300d9ddSJeff Roberson  * Arguments:
6370aef6126SJeff Roberson  *	hash  A new hash structure with the old hash size in uh_hashsize
6385300d9ddSJeff Roberson  *
6395300d9ddSJeff Roberson  * Returns:
640763df3ecSPedro F. Giffuni  *	1 on success and 0 on failure.
6415300d9ddSJeff Roberson  */
64237c84183SPoul-Henning Kamp static int
6433b2f2cb8SAlexander Motin hash_alloc(struct uma_hash *hash, u_int size)
6445300d9ddSJeff Roberson {
64559568a0eSAlexander Motin 	size_t alloc;
6465300d9ddSJeff Roberson 
6473b2f2cb8SAlexander Motin 	KASSERT(powerof2(size), ("hash size must be power of 2"));
6483b2f2cb8SAlexander Motin 	if (size > UMA_HASH_SIZE_INIT)  {
6493b2f2cb8SAlexander Motin 		hash->uh_hashsize = size;
6500aef6126SJeff Roberson 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
6510aef6126SJeff Roberson 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
652961647dfSJeff Roberson 		    M_UMAHASH, M_NOWAIT);
6535300d9ddSJeff Roberson 	} else {
6540aef6126SJeff Roberson 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
655e20a199fSJeff Roberson 		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
656ab3185d1SJeff Roberson 		    UMA_ANYDOMAIN, M_WAITOK);
6570aef6126SJeff Roberson 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
6585300d9ddSJeff Roberson 	}
6590aef6126SJeff Roberson 	if (hash->uh_slab_hash) {
6600aef6126SJeff Roberson 		bzero(hash->uh_slab_hash, alloc);
6610aef6126SJeff Roberson 		hash->uh_hashmask = hash->uh_hashsize - 1;
6620aef6126SJeff Roberson 		return (1);
6630aef6126SJeff Roberson 	}
6645300d9ddSJeff Roberson 
6650aef6126SJeff Roberson 	return (0);
6665300d9ddSJeff Roberson }
6675300d9ddSJeff Roberson 
6685300d9ddSJeff Roberson /*
66964f051e9SJeff Roberson  * Expands the hash table for HASH zones.  This is done from zone_timeout
67064f051e9SJeff Roberson  * to reduce collisions.  This must not be done in the regular allocation
67164f051e9SJeff Roberson  * path, otherwise, we can recurse on the vm while allocating pages.
6728355f576SJeff Roberson  *
6738355f576SJeff Roberson  * Arguments:
6740aef6126SJeff Roberson  *	oldhash  The hash you want to expand
6750aef6126SJeff Roberson  *	newhash  The hash structure for the new table
6768355f576SJeff Roberson  *
6778355f576SJeff Roberson  * Returns:
6788355f576SJeff Roberson  *	Nothing
6798355f576SJeff Roberson  *
6808355f576SJeff Roberson  * Discussion:
6818355f576SJeff Roberson  */
6820aef6126SJeff Roberson static int
6830aef6126SJeff Roberson hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
6848355f576SJeff Roberson {
6858355f576SJeff Roberson 	uma_slab_t slab;
6866929b7d1SPedro F. Giffuni 	u_int hval;
6876929b7d1SPedro F. Giffuni 	u_int idx;
6888355f576SJeff Roberson 
6890aef6126SJeff Roberson 	if (!newhash->uh_slab_hash)
6900aef6126SJeff Roberson 		return (0);
6918355f576SJeff Roberson 
6920aef6126SJeff Roberson 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
6930aef6126SJeff Roberson 		return (0);
6948355f576SJeff Roberson 
6958355f576SJeff Roberson 	/*
6968355f576SJeff Roberson 	 * I need to investigate hash algorithms for resizing without a
6978355f576SJeff Roberson 	 * full rehash.
6988355f576SJeff Roberson 	 */
6998355f576SJeff Roberson 
7006929b7d1SPedro F. Giffuni 	for (idx = 0; idx < oldhash->uh_hashsize; idx++)
7016929b7d1SPedro F. Giffuni 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[idx])) {
7026929b7d1SPedro F. Giffuni 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[idx]);
7036929b7d1SPedro F. Giffuni 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[idx], us_hlink);
7040aef6126SJeff Roberson 			hval = UMA_HASH(newhash, slab->us_data);
7050aef6126SJeff Roberson 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
7060aef6126SJeff Roberson 			    slab, us_hlink);
7078355f576SJeff Roberson 		}
7088355f576SJeff Roberson 
7090aef6126SJeff Roberson 	return (1);
7109c2cd7e5SJeff Roberson }
7119c2cd7e5SJeff Roberson 
7125300d9ddSJeff Roberson /*
7135300d9ddSJeff Roberson  * Free the hash bucket to the appropriate backing store.
7145300d9ddSJeff Roberson  *
7155300d9ddSJeff Roberson  * Arguments:
7165300d9ddSJeff Roberson  *	slab_hash  The hash bucket we're freeing
7175300d9ddSJeff Roberson  *	hashsize   The number of entries in that hash bucket
7185300d9ddSJeff Roberson  *
7195300d9ddSJeff Roberson  * Returns:
7205300d9ddSJeff Roberson  *	Nothing
7215300d9ddSJeff Roberson  */
7229c2cd7e5SJeff Roberson static void
7230aef6126SJeff Roberson hash_free(struct uma_hash *hash)
7249c2cd7e5SJeff Roberson {
7250aef6126SJeff Roberson 	if (hash->uh_slab_hash == NULL)
7260aef6126SJeff Roberson 		return;
7270aef6126SJeff Roberson 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
7280095a784SJeff Roberson 		zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
7298355f576SJeff Roberson 	else
730961647dfSJeff Roberson 		free(hash->uh_slab_hash, M_UMAHASH);
7318355f576SJeff Roberson }
7328355f576SJeff Roberson 
7338355f576SJeff Roberson /*
7348355f576SJeff Roberson  * Frees all outstanding items in a bucket
7358355f576SJeff Roberson  *
7368355f576SJeff Roberson  * Arguments:
7378355f576SJeff Roberson  *	zone   The zone to free to, must be unlocked.
7388355f576SJeff Roberson  *	bucket The free/alloc bucket with items, cpu queue must be locked.
7398355f576SJeff Roberson  *
7408355f576SJeff Roberson  * Returns:
7418355f576SJeff Roberson  *	Nothing
7428355f576SJeff Roberson  */
7438355f576SJeff Roberson 
7448355f576SJeff Roberson static void
7458355f576SJeff Roberson bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
7468355f576SJeff Roberson {
7470095a784SJeff Roberson 	int i;
7488355f576SJeff Roberson 
7498355f576SJeff Roberson 	if (bucket == NULL)
7508355f576SJeff Roberson 		return;
7518355f576SJeff Roberson 
7520095a784SJeff Roberson 	if (zone->uz_fini)
7530095a784SJeff Roberson 		for (i = 0; i < bucket->ub_cnt; i++)
7540095a784SJeff Roberson 			zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
7550095a784SJeff Roberson 	zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
756bb45b411SGleb Smirnoff 	if (zone->uz_max_items > 0) {
757bb15d1c7SGleb Smirnoff 		ZONE_LOCK(zone);
758bb15d1c7SGleb Smirnoff 		zone->uz_items -= bucket->ub_cnt;
759bb15d1c7SGleb Smirnoff 		if (zone->uz_sleepers && zone->uz_items < zone->uz_max_items)
760bb15d1c7SGleb Smirnoff 			wakeup_one(zone);
761bb15d1c7SGleb Smirnoff 		ZONE_UNLOCK(zone);
762bb45b411SGleb Smirnoff 	}
7630095a784SJeff Roberson 	bucket->ub_cnt = 0;
7648355f576SJeff Roberson }
7658355f576SJeff Roberson 
7668355f576SJeff Roberson /*
7678355f576SJeff Roberson  * Drains the per cpu caches for a zone.
7688355f576SJeff Roberson  *
7695d1ae027SRobert Watson  * NOTE: This may only be called while the zone is being turn down, and not
7705d1ae027SRobert Watson  * during normal operation.  This is necessary in order that we do not have
7715d1ae027SRobert Watson  * to migrate CPUs to drain the per-CPU caches.
7725d1ae027SRobert Watson  *
7738355f576SJeff Roberson  * Arguments:
7748355f576SJeff Roberson  *	zone     The zone to drain, must be unlocked.
7758355f576SJeff Roberson  *
7768355f576SJeff Roberson  * Returns:
7778355f576SJeff Roberson  *	Nothing
7788355f576SJeff Roberson  */
7798355f576SJeff Roberson static void
7809643769aSJeff Roberson cache_drain(uma_zone_t zone)
7818355f576SJeff Roberson {
7828355f576SJeff Roberson 	uma_cache_t cache;
7838355f576SJeff Roberson 	int cpu;
7848355f576SJeff Roberson 
7858355f576SJeff Roberson 	/*
7865d1ae027SRobert Watson 	 * XXX: It is safe to not lock the per-CPU caches, because we're
7875d1ae027SRobert Watson 	 * tearing down the zone anyway.  I.e., there will be no further use
7885d1ae027SRobert Watson 	 * of the caches at this point.
7895d1ae027SRobert Watson 	 *
7905d1ae027SRobert Watson 	 * XXX: It would good to be able to assert that the zone is being
7915d1ae027SRobert Watson 	 * torn down to prevent improper use of cache_drain().
7925d1ae027SRobert Watson 	 *
793*08cfa56eSMark Johnston 	 * XXX: We lock the zone before passing into bucket_cache_reclaim() as
7945d1ae027SRobert Watson 	 * it is used elsewhere.  Should the tear-down path be made special
7955d1ae027SRobert Watson 	 * there in some form?
7968355f576SJeff Roberson 	 */
7973aa6d94eSJohn Baldwin 	CPU_FOREACH(cpu) {
7988355f576SJeff Roberson 		cache = &zone->uz_cpu[cpu];
7998355f576SJeff Roberson 		bucket_drain(zone, cache->uc_allocbucket);
800174ab450SBosko Milekic 		if (cache->uc_allocbucket != NULL)
8016fd34d6fSJeff Roberson 			bucket_free(zone, cache->uc_allocbucket, NULL);
802c1685086SJeff Roberson 		cache->uc_allocbucket = NULL;
803c1685086SJeff Roberson 		bucket_drain(zone, cache->uc_freebucket);
804174ab450SBosko Milekic 		if (cache->uc_freebucket != NULL)
8056fd34d6fSJeff Roberson 			bucket_free(zone, cache->uc_freebucket, NULL);
806c1685086SJeff Roberson 		cache->uc_freebucket = NULL;
807c1685086SJeff Roberson 		bucket_drain(zone, cache->uc_crossbucket);
808c1685086SJeff Roberson 		if (cache->uc_crossbucket != NULL)
809c1685086SJeff Roberson 			bucket_free(zone, cache->uc_crossbucket, NULL);
810c1685086SJeff Roberson 		cache->uc_crossbucket = NULL;
811d56368d7SBosko Milekic 	}
812aaa8bb16SJeff Roberson 	ZONE_LOCK(zone);
813*08cfa56eSMark Johnston 	bucket_cache_reclaim(zone, true);
814aaa8bb16SJeff Roberson 	ZONE_UNLOCK(zone);
815aaa8bb16SJeff Roberson }
816aaa8bb16SJeff Roberson 
817a2de44abSAlexander Motin static void
818a2de44abSAlexander Motin cache_shrink(uma_zone_t zone)
819a2de44abSAlexander Motin {
820a2de44abSAlexander Motin 
821a2de44abSAlexander Motin 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
822a2de44abSAlexander Motin 		return;
823a2de44abSAlexander Motin 
824a2de44abSAlexander Motin 	ZONE_LOCK(zone);
825a2de44abSAlexander Motin 	zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2;
826a2de44abSAlexander Motin 	ZONE_UNLOCK(zone);
827a2de44abSAlexander Motin }
828a2de44abSAlexander Motin 
829a2de44abSAlexander Motin static void
830a2de44abSAlexander Motin cache_drain_safe_cpu(uma_zone_t zone)
831a2de44abSAlexander Motin {
832a2de44abSAlexander Motin 	uma_cache_t cache;
833c1685086SJeff Roberson 	uma_bucket_t b1, b2, b3;
834ab3185d1SJeff Roberson 	int domain;
835a2de44abSAlexander Motin 
836a2de44abSAlexander Motin 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
837a2de44abSAlexander Motin 		return;
838a2de44abSAlexander Motin 
839c1685086SJeff Roberson 	b1 = b2 = b3 = NULL;
840a2de44abSAlexander Motin 	ZONE_LOCK(zone);
841a2de44abSAlexander Motin 	critical_enter();
842ab3185d1SJeff Roberson 	if (zone->uz_flags & UMA_ZONE_NUMA)
843ab3185d1SJeff Roberson 		domain = PCPU_GET(domain);
844ab3185d1SJeff Roberson 	else
845ab3185d1SJeff Roberson 		domain = 0;
846a2de44abSAlexander Motin 	cache = &zone->uz_cpu[curcpu];
847a2de44abSAlexander Motin 	if (cache->uc_allocbucket) {
8488a8d9d14SAlexander Motin 		if (cache->uc_allocbucket->ub_cnt != 0)
8490f9b7bf3SMark Johnston 			zone_put_bucket(zone, &zone->uz_domain[domain],
8500f9b7bf3SMark Johnston 			    cache->uc_allocbucket, false);
8518a8d9d14SAlexander Motin 		else
8528a8d9d14SAlexander Motin 			b1 = cache->uc_allocbucket;
853a2de44abSAlexander Motin 		cache->uc_allocbucket = NULL;
854a2de44abSAlexander Motin 	}
855a2de44abSAlexander Motin 	if (cache->uc_freebucket) {
8568a8d9d14SAlexander Motin 		if (cache->uc_freebucket->ub_cnt != 0)
8570f9b7bf3SMark Johnston 			zone_put_bucket(zone, &zone->uz_domain[domain],
8580f9b7bf3SMark Johnston 			    cache->uc_freebucket, false);
8598a8d9d14SAlexander Motin 		else
8608a8d9d14SAlexander Motin 			b2 = cache->uc_freebucket;
861a2de44abSAlexander Motin 		cache->uc_freebucket = NULL;
862a2de44abSAlexander Motin 	}
863c1685086SJeff Roberson 	b3 = cache->uc_crossbucket;
864c1685086SJeff Roberson 	cache->uc_crossbucket = NULL;
865a2de44abSAlexander Motin 	critical_exit();
866a2de44abSAlexander Motin 	ZONE_UNLOCK(zone);
8678a8d9d14SAlexander Motin 	if (b1)
8688a8d9d14SAlexander Motin 		bucket_free(zone, b1, NULL);
8698a8d9d14SAlexander Motin 	if (b2)
8708a8d9d14SAlexander Motin 		bucket_free(zone, b2, NULL);
871c1685086SJeff Roberson 	if (b3) {
872c1685086SJeff Roberson 		bucket_drain(zone, b3);
873c1685086SJeff Roberson 		bucket_free(zone, b3, NULL);
874c1685086SJeff Roberson 	}
875a2de44abSAlexander Motin }
876a2de44abSAlexander Motin 
877a2de44abSAlexander Motin /*
878a2de44abSAlexander Motin  * Safely drain per-CPU caches of a zone(s) to alloc bucket.
879a2de44abSAlexander Motin  * This is an expensive call because it needs to bind to all CPUs
880a2de44abSAlexander Motin  * one by one and enter a critical section on each of them in order
881a2de44abSAlexander Motin  * to safely access their cache buckets.
882a2de44abSAlexander Motin  * Zone lock must not be held on call this function.
883a2de44abSAlexander Motin  */
884a2de44abSAlexander Motin static void
885*08cfa56eSMark Johnston pcpu_cache_drain_safe(uma_zone_t zone)
886a2de44abSAlexander Motin {
887a2de44abSAlexander Motin 	int cpu;
888a2de44abSAlexander Motin 
889a2de44abSAlexander Motin 	/*
890a2de44abSAlexander Motin 	 * Polite bucket sizes shrinking was not enouth, shrink aggressively.
891a2de44abSAlexander Motin 	 */
892a2de44abSAlexander Motin 	if (zone)
893a2de44abSAlexander Motin 		cache_shrink(zone);
894a2de44abSAlexander Motin 	else
895a2de44abSAlexander Motin 		zone_foreach(cache_shrink);
896a2de44abSAlexander Motin 
897a2de44abSAlexander Motin 	CPU_FOREACH(cpu) {
898a2de44abSAlexander Motin 		thread_lock(curthread);
899a2de44abSAlexander Motin 		sched_bind(curthread, cpu);
900a2de44abSAlexander Motin 		thread_unlock(curthread);
901a2de44abSAlexander Motin 
902a2de44abSAlexander Motin 		if (zone)
903a2de44abSAlexander Motin 			cache_drain_safe_cpu(zone);
904a2de44abSAlexander Motin 		else
905a2de44abSAlexander Motin 			zone_foreach(cache_drain_safe_cpu);
906a2de44abSAlexander Motin 	}
907a2de44abSAlexander Motin 	thread_lock(curthread);
908a2de44abSAlexander Motin 	sched_unbind(curthread);
909a2de44abSAlexander Motin 	thread_unlock(curthread);
910a2de44abSAlexander Motin }
911a2de44abSAlexander Motin 
912aaa8bb16SJeff Roberson /*
913*08cfa56eSMark Johnston  * Reclaim cached buckets from a zone.  All buckets are reclaimed if the caller
914*08cfa56eSMark Johnston  * requested a drain, otherwise the per-domain caches are trimmed to either
915*08cfa56eSMark Johnston  * estimated working set size.
916aaa8bb16SJeff Roberson  */
917aaa8bb16SJeff Roberson static void
918*08cfa56eSMark Johnston bucket_cache_reclaim(uma_zone_t zone, bool drain)
919aaa8bb16SJeff Roberson {
920ab3185d1SJeff Roberson 	uma_zone_domain_t zdom;
921aaa8bb16SJeff Roberson 	uma_bucket_t bucket;
922*08cfa56eSMark Johnston 	long target, tofree;
923ab3185d1SJeff Roberson 	int i;
9248355f576SJeff Roberson 
925ab3185d1SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
926ab3185d1SJeff Roberson 		zdom = &zone->uz_domain[i];
927*08cfa56eSMark Johnston 
928*08cfa56eSMark Johnston 		/*
929*08cfa56eSMark Johnston 		 * If we were asked to drain the zone, we are done only once
930*08cfa56eSMark Johnston 		 * this bucket cache is empty.  Otherwise, we reclaim items in
931*08cfa56eSMark Johnston 		 * excess of the zone's estimated working set size.  If the
932*08cfa56eSMark Johnston 		 * difference nitems - imin is larger than the WSS estimate,
933*08cfa56eSMark Johnston 		 * then the estimate will grow at the end of this interval and
934*08cfa56eSMark Johnston 		 * we ignore the historical average.
935*08cfa56eSMark Johnston 		 */
936*08cfa56eSMark Johnston 		target = drain ? 0 : lmax(zdom->uzd_wss, zdom->uzd_nitems -
937*08cfa56eSMark Johnston 		    zdom->uzd_imin);
938*08cfa56eSMark Johnston 		while (zdom->uzd_nitems > target) {
939*08cfa56eSMark Johnston 			bucket = TAILQ_LAST(&zdom->uzd_buckets, uma_bucketlist);
940*08cfa56eSMark Johnston 			if (bucket == NULL)
941*08cfa56eSMark Johnston 				break;
942*08cfa56eSMark Johnston 			tofree = bucket->ub_cnt;
943*08cfa56eSMark Johnston 			TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
944*08cfa56eSMark Johnston 			zdom->uzd_nitems -= tofree;
945*08cfa56eSMark Johnston 
946*08cfa56eSMark Johnston 			/*
947*08cfa56eSMark Johnston 			 * Shift the bounds of the current WSS interval to avoid
948*08cfa56eSMark Johnston 			 * perturbing the estimate.
949*08cfa56eSMark Johnston 			 */
950*08cfa56eSMark Johnston 			zdom->uzd_imax -= lmin(zdom->uzd_imax, tofree);
951*08cfa56eSMark Johnston 			zdom->uzd_imin -= lmin(zdom->uzd_imin, tofree);
952*08cfa56eSMark Johnston 
9538355f576SJeff Roberson 			ZONE_UNLOCK(zone);
9548355f576SJeff Roberson 			bucket_drain(zone, bucket);
9556fd34d6fSJeff Roberson 			bucket_free(zone, bucket, NULL);
9568355f576SJeff Roberson 			ZONE_LOCK(zone);
9578355f576SJeff Roberson 		}
958ab3185d1SJeff Roberson 	}
959ace66b56SAlexander Motin 
960ace66b56SAlexander Motin 	/*
961*08cfa56eSMark Johnston 	 * Shrink the zone bucket size to ensure that the per-CPU caches
962*08cfa56eSMark Johnston 	 * don't grow too large.
963ace66b56SAlexander Motin 	 */
964ace66b56SAlexander Motin 	if (zone->uz_count > zone->uz_count_min)
965ace66b56SAlexander Motin 		zone->uz_count--;
9668355f576SJeff Roberson }
967fc03d22bSJeff Roberson 
968fc03d22bSJeff Roberson static void
969fc03d22bSJeff Roberson keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
970fc03d22bSJeff Roberson {
971fc03d22bSJeff Roberson 	uint8_t *mem;
972fc03d22bSJeff Roberson 	int i;
973fc03d22bSJeff Roberson 	uint8_t flags;
974fc03d22bSJeff Roberson 
9751431a748SGleb Smirnoff 	CTR4(KTR_UMA, "keg_free_slab keg %s(%p) slab %p, returning %d bytes",
9761431a748SGleb Smirnoff 	    keg->uk_name, keg, slab, PAGE_SIZE * keg->uk_ppera);
9771431a748SGleb Smirnoff 
978fc03d22bSJeff Roberson 	mem = slab->us_data;
979fc03d22bSJeff Roberson 	flags = slab->us_flags;
980fc03d22bSJeff Roberson 	i = start;
981fc03d22bSJeff Roberson 	if (keg->uk_fini != NULL) {
982fc03d22bSJeff Roberson 		for (i--; i > -1; i--)
983c5deaf04SGleb Smirnoff #ifdef INVARIANTS
984c5deaf04SGleb Smirnoff 		/*
985c5deaf04SGleb Smirnoff 		 * trash_fini implies that dtor was trash_dtor. trash_fini
986c5deaf04SGleb Smirnoff 		 * would check that memory hasn't been modified since free,
987c5deaf04SGleb Smirnoff 		 * which executed trash_dtor.
988c5deaf04SGleb Smirnoff 		 * That's why we need to run uma_dbg_kskip() check here,
989c5deaf04SGleb Smirnoff 		 * albeit we don't make skip check for other init/fini
990c5deaf04SGleb Smirnoff 		 * invocations.
991c5deaf04SGleb Smirnoff 		 */
992c5deaf04SGleb Smirnoff 		if (!uma_dbg_kskip(keg, slab->us_data + (keg->uk_rsize * i)) ||
993c5deaf04SGleb Smirnoff 		    keg->uk_fini != trash_fini)
994c5deaf04SGleb Smirnoff #endif
995fc03d22bSJeff Roberson 			keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
996fc03d22bSJeff Roberson 			    keg->uk_size);
997fc03d22bSJeff Roberson 	}
998fc03d22bSJeff Roberson 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
999fc03d22bSJeff Roberson 		zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
1000fc03d22bSJeff Roberson 	keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
10012e47807cSJeff Roberson 	uma_total_dec(PAGE_SIZE * keg->uk_ppera);
10028355f576SJeff Roberson }
10038355f576SJeff Roberson 
10048355f576SJeff Roberson /*
1005e20a199fSJeff Roberson  * Frees pages from a keg back to the system.  This is done on demand from
10068355f576SJeff Roberson  * the pageout daemon.
10078355f576SJeff Roberson  *
1008e20a199fSJeff Roberson  * Returns nothing.
10098355f576SJeff Roberson  */
1010e20a199fSJeff Roberson static void
1011e20a199fSJeff Roberson keg_drain(uma_keg_t keg)
10128355f576SJeff Roberson {
10131e183df2SStefan Farfeleder 	struct slabhead freeslabs = { 0 };
1014ab3185d1SJeff Roberson 	uma_domain_t dom;
1015829be516SMark Johnston 	uma_slab_t slab, tmp;
1016ab3185d1SJeff Roberson 	int i;
10178355f576SJeff Roberson 
10188355f576SJeff Roberson 	/*
1019e20a199fSJeff Roberson 	 * We don't want to take pages from statically allocated kegs at this
10208355f576SJeff Roberson 	 * time
10218355f576SJeff Roberson 	 */
1022099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
10238355f576SJeff Roberson 		return;
10248355f576SJeff Roberson 
10251431a748SGleb Smirnoff 	CTR3(KTR_UMA, "keg_drain %s(%p) free items: %u",
10261431a748SGleb Smirnoff 	    keg->uk_name, keg, keg->uk_free);
1027e20a199fSJeff Roberson 	KEG_LOCK(keg);
1028099a0e58SBosko Milekic 	if (keg->uk_free == 0)
10298355f576SJeff Roberson 		goto finished;
10308355f576SJeff Roberson 
1031ab3185d1SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
1032ab3185d1SJeff Roberson 		dom = &keg->uk_domain[i];
1033ab3185d1SJeff Roberson 		LIST_FOREACH_SAFE(slab, &dom->ud_free_slab, us_link, tmp) {
1034829be516SMark Johnston 			/* We have nowhere to free these to. */
1035829be516SMark Johnston 			if (slab->us_flags & UMA_SLAB_BOOT)
10368355f576SJeff Roberson 				continue;
10378355f576SJeff Roberson 
10388355f576SJeff Roberson 			LIST_REMOVE(slab, us_link);
1039099a0e58SBosko Milekic 			keg->uk_pages -= keg->uk_ppera;
1040099a0e58SBosko Milekic 			keg->uk_free -= keg->uk_ipers;
1041713deb36SJeff Roberson 
1042099a0e58SBosko Milekic 			if (keg->uk_flags & UMA_ZONE_HASH)
1043ab3185d1SJeff Roberson 				UMA_HASH_REMOVE(&keg->uk_hash, slab,
1044ab3185d1SJeff Roberson 				    slab->us_data);
1045713deb36SJeff Roberson 
1046713deb36SJeff Roberson 			SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
1047713deb36SJeff Roberson 		}
1048ab3185d1SJeff Roberson 	}
1049ab3185d1SJeff Roberson 
1050713deb36SJeff Roberson finished:
1051e20a199fSJeff Roberson 	KEG_UNLOCK(keg);
1052713deb36SJeff Roberson 
1053713deb36SJeff Roberson 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
1054713deb36SJeff Roberson 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
10551645995bSKirk McKusick 		keg_free_slab(keg, slab, keg->uk_ipers);
10568355f576SJeff Roberson 	}
10578355f576SJeff Roberson }
10588355f576SJeff Roberson 
1059e20a199fSJeff Roberson static void
1060*08cfa56eSMark Johnston zone_reclaim(uma_zone_t zone, int waitok, bool drain)
1061e20a199fSJeff Roberson {
1062e20a199fSJeff Roberson 
10638355f576SJeff Roberson 	/*
1064e20a199fSJeff Roberson 	 * Set draining to interlock with zone_dtor() so we can release our
1065e20a199fSJeff Roberson 	 * locks as we go.  Only dtor() should do a WAITOK call since it
1066e20a199fSJeff Roberson 	 * is the only call that knows the structure will still be available
1067e20a199fSJeff Roberson 	 * when it wakes up.
1068e20a199fSJeff Roberson 	 */
1069e20a199fSJeff Roberson 	ZONE_LOCK(zone);
1070*08cfa56eSMark Johnston 	while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) {
1071e20a199fSJeff Roberson 		if (waitok == M_NOWAIT)
1072e20a199fSJeff Roberson 			goto out;
1073af526374SJeff Roberson 		msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
1074e20a199fSJeff Roberson 	}
1075*08cfa56eSMark Johnston 	zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
1076*08cfa56eSMark Johnston 	bucket_cache_reclaim(zone, drain);
1077e20a199fSJeff Roberson 	ZONE_UNLOCK(zone);
1078*08cfa56eSMark Johnston 
1079e20a199fSJeff Roberson 	/*
1080e20a199fSJeff Roberson 	 * The DRAINING flag protects us from being freed while
1081111fbcd5SBryan Venteicher 	 * we're running.  Normally the uma_rwlock would protect us but we
1082e20a199fSJeff Roberson 	 * must be able to release and acquire the right lock for each keg.
1083e20a199fSJeff Roberson 	 */
1084bb15d1c7SGleb Smirnoff 	keg_drain(zone->uz_keg);
1085e20a199fSJeff Roberson 	ZONE_LOCK(zone);
1086*08cfa56eSMark Johnston 	zone->uz_flags &= ~UMA_ZFLAG_RECLAIMING;
1087e20a199fSJeff Roberson 	wakeup(zone);
1088e20a199fSJeff Roberson out:
1089e20a199fSJeff Roberson 	ZONE_UNLOCK(zone);
1090e20a199fSJeff Roberson }
1091e20a199fSJeff Roberson 
1092*08cfa56eSMark Johnston static void
1093e20a199fSJeff Roberson zone_drain(uma_zone_t zone)
1094e20a199fSJeff Roberson {
1095e20a199fSJeff Roberson 
1096*08cfa56eSMark Johnston 	zone_reclaim(zone, M_NOWAIT, true);
1097*08cfa56eSMark Johnston }
1098*08cfa56eSMark Johnston 
1099*08cfa56eSMark Johnston static void
1100*08cfa56eSMark Johnston zone_trim(uma_zone_t zone)
1101*08cfa56eSMark Johnston {
1102*08cfa56eSMark Johnston 
1103*08cfa56eSMark Johnston 	zone_reclaim(zone, M_NOWAIT, false);
1104e20a199fSJeff Roberson }
1105e20a199fSJeff Roberson 
1106e20a199fSJeff Roberson /*
1107e20a199fSJeff Roberson  * Allocate a new slab for a keg.  This does not insert the slab onto a list.
1108194a979eSMark Johnston  * If the allocation was successful, the keg lock will be held upon return,
1109194a979eSMark Johnston  * otherwise the keg will be left unlocked.
11108355f576SJeff Roberson  *
11118355f576SJeff Roberson  * Arguments:
111286220393SMark Johnston  *	flags   Wait flags for the item initialization routine
111386220393SMark Johnston  *	aflags  Wait flags for the slab allocation
11148355f576SJeff Roberson  *
11158355f576SJeff Roberson  * Returns:
11168355f576SJeff Roberson  *	The slab that was allocated or NULL if there is no memory and the
11178355f576SJeff Roberson  *	caller specified M_NOWAIT.
11188355f576SJeff Roberson  */
11198355f576SJeff Roberson static uma_slab_t
112086220393SMark Johnston keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int flags,
112186220393SMark Johnston     int aflags)
11228355f576SJeff Roberson {
1123e20a199fSJeff Roberson 	uma_alloc allocf;
1124099a0e58SBosko Milekic 	uma_slab_t slab;
11252e47807cSJeff Roberson 	unsigned long size;
112685dcf349SGleb Smirnoff 	uint8_t *mem;
112786220393SMark Johnston 	uint8_t sflags;
11288355f576SJeff Roberson 	int i;
11298355f576SJeff Roberson 
1130ab3185d1SJeff Roberson 	KASSERT(domain >= 0 && domain < vm_ndomains,
1131ab3185d1SJeff Roberson 	    ("keg_alloc_slab: domain %d out of range", domain));
1132bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
1133bb15d1c7SGleb Smirnoff 	MPASS(zone->uz_lockptr == &keg->uk_lock);
1134a553d4b8SJeff Roberson 
1135e20a199fSJeff Roberson 	allocf = keg->uk_allocf;
1136e20a199fSJeff Roberson 	KEG_UNLOCK(keg);
1137a553d4b8SJeff Roberson 
1138194a979eSMark Johnston 	slab = NULL;
1139194a979eSMark Johnston 	mem = NULL;
1140099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
114186220393SMark Johnston 		slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, aflags);
1142fc03d22bSJeff Roberson 		if (slab == NULL)
1143fc03d22bSJeff Roberson 			goto out;
1144a553d4b8SJeff Roberson 	}
1145a553d4b8SJeff Roberson 
11463370c5bfSJeff Roberson 	/*
11473370c5bfSJeff Roberson 	 * This reproduces the old vm_zone behavior of zero filling pages the
11483370c5bfSJeff Roberson 	 * first time they are added to a zone.
11493370c5bfSJeff Roberson 	 *
11503370c5bfSJeff Roberson 	 * Malloced items are zeroed in uma_zalloc.
11513370c5bfSJeff Roberson 	 */
11523370c5bfSJeff Roberson 
1153099a0e58SBosko Milekic 	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
115486220393SMark Johnston 		aflags |= M_ZERO;
11553370c5bfSJeff Roberson 	else
115686220393SMark Johnston 		aflags &= ~M_ZERO;
11573370c5bfSJeff Roberson 
1158263811f7SKip Macy 	if (keg->uk_flags & UMA_ZONE_NODUMP)
115986220393SMark Johnston 		aflags |= M_NODUMP;
1160263811f7SKip Macy 
1161e20a199fSJeff Roberson 	/* zone is passed for legacy reasons. */
1162194a979eSMark Johnston 	size = keg->uk_ppera * PAGE_SIZE;
116386220393SMark Johnston 	mem = allocf(zone, size, domain, &sflags, aflags);
1164a553d4b8SJeff Roberson 	if (mem == NULL) {
1165b23f72e9SBrian Feldman 		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
11660095a784SJeff Roberson 			zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
1167fc03d22bSJeff Roberson 		slab = NULL;
1168fc03d22bSJeff Roberson 		goto out;
1169a553d4b8SJeff Roberson 	}
11702e47807cSJeff Roberson 	uma_total_inc(size);
11718355f576SJeff Roberson 
11725c0e403bSJeff Roberson 	/* Point the slab into the allocated memory */
1173099a0e58SBosko Milekic 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
1174099a0e58SBosko Milekic 		slab = (uma_slab_t )(mem + keg->uk_pgoff);
11755c0e403bSJeff Roberson 
1176e20a199fSJeff Roberson 	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
1177099a0e58SBosko Milekic 		for (i = 0; i < keg->uk_ppera; i++)
117899571dc3SJeff Roberson 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
11798355f576SJeff Roberson 
1180099a0e58SBosko Milekic 	slab->us_keg = keg;
11818355f576SJeff Roberson 	slab->us_data = mem;
1182099a0e58SBosko Milekic 	slab->us_freecount = keg->uk_ipers;
118386220393SMark Johnston 	slab->us_flags = sflags;
1184ab3185d1SJeff Roberson 	slab->us_domain = domain;
1185ef72505eSJeff Roberson 	BIT_FILL(SLAB_SETSIZE, &slab->us_free);
1186ef72505eSJeff Roberson #ifdef INVARIANTS
1187ef72505eSJeff Roberson 	BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
1188ef72505eSJeff Roberson #endif
1189099a0e58SBosko Milekic 
1190b23f72e9SBrian Feldman 	if (keg->uk_init != NULL) {
1191099a0e58SBosko Milekic 		for (i = 0; i < keg->uk_ipers; i++)
1192b23f72e9SBrian Feldman 			if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
119386220393SMark Johnston 			    keg->uk_size, flags) != 0)
1194b23f72e9SBrian Feldman 				break;
1195b23f72e9SBrian Feldman 		if (i != keg->uk_ipers) {
1196fc03d22bSJeff Roberson 			keg_free_slab(keg, slab, i);
1197fc03d22bSJeff Roberson 			slab = NULL;
1198fc03d22bSJeff Roberson 			goto out;
1199b23f72e9SBrian Feldman 		}
1200b23f72e9SBrian Feldman 	}
1201e20a199fSJeff Roberson 	KEG_LOCK(keg);
12025c0e403bSJeff Roberson 
12031431a748SGleb Smirnoff 	CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)",
12041431a748SGleb Smirnoff 	    slab, keg->uk_name, keg);
12051431a748SGleb Smirnoff 
1206099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_HASH)
1207099a0e58SBosko Milekic 		UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
12088355f576SJeff Roberson 
1209099a0e58SBosko Milekic 	keg->uk_pages += keg->uk_ppera;
1210099a0e58SBosko Milekic 	keg->uk_free += keg->uk_ipers;
12118355f576SJeff Roberson 
1212194a979eSMark Johnston out:
12138355f576SJeff Roberson 	return (slab);
12148355f576SJeff Roberson }
12158355f576SJeff Roberson 
12168355f576SJeff Roberson /*
1217009b6fcbSJeff Roberson  * This function is intended to be used early on in place of page_alloc() so
1218009b6fcbSJeff Roberson  * that we may use the boot time page cache to satisfy allocations before
1219009b6fcbSJeff Roberson  * the VM is ready.
1220009b6fcbSJeff Roberson  */
1221009b6fcbSJeff Roberson static void *
1222ab3185d1SJeff Roberson startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1223ab3185d1SJeff Roberson     int wait)
1224009b6fcbSJeff Roberson {
1225099a0e58SBosko Milekic 	uma_keg_t keg;
1226ac0a6fd0SGleb Smirnoff 	void *mem;
1227ac0a6fd0SGleb Smirnoff 	int pages;
1228099a0e58SBosko Milekic 
1229bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
1230009b6fcbSJeff Roberson 	/*
1231f7d35785SGleb Smirnoff 	 * If we are in BOOT_BUCKETS or higher, than switch to real
1232f7d35785SGleb Smirnoff 	 * allocator.  Zones with page sized slabs switch at BOOT_PAGEALLOC.
1233009b6fcbSJeff Roberson 	 */
1234f7d35785SGleb Smirnoff 	switch (booted) {
1235f7d35785SGleb Smirnoff 		case BOOT_COLD:
1236f7d35785SGleb Smirnoff 		case BOOT_STRAPPED:
1237f7d35785SGleb Smirnoff 			break;
1238f7d35785SGleb Smirnoff 		case BOOT_PAGEALLOC:
1239f7d35785SGleb Smirnoff 			if (keg->uk_ppera > 1)
1240f7d35785SGleb Smirnoff 				break;
1241f7d35785SGleb Smirnoff 		case BOOT_BUCKETS:
1242f7d35785SGleb Smirnoff 		case BOOT_RUNNING:
1243009b6fcbSJeff Roberson #ifdef UMA_MD_SMALL_ALLOC
1244f7d35785SGleb Smirnoff 			keg->uk_allocf = (keg->uk_ppera > 1) ?
1245f7d35785SGleb Smirnoff 			    page_alloc : uma_small_alloc;
1246009b6fcbSJeff Roberson #else
1247099a0e58SBosko Milekic 			keg->uk_allocf = page_alloc;
1248009b6fcbSJeff Roberson #endif
1249ab3185d1SJeff Roberson 			return keg->uk_allocf(zone, bytes, domain, pflag, wait);
1250009b6fcbSJeff Roberson 	}
1251009b6fcbSJeff Roberson 
1252009b6fcbSJeff Roberson 	/*
1253f7d35785SGleb Smirnoff 	 * Check our small startup cache to see if it has pages remaining.
1254f7d35785SGleb Smirnoff 	 */
1255f7d35785SGleb Smirnoff 	pages = howmany(bytes, PAGE_SIZE);
1256f7d35785SGleb Smirnoff 	KASSERT(pages > 0, ("%s can't reserve 0 pages", __func__));
1257f7d35785SGleb Smirnoff 	if (pages > boot_pages)
1258f7d35785SGleb Smirnoff 		panic("UMA zone \"%s\": Increase vm.boot_pages", zone->uz_name);
1259f7d35785SGleb Smirnoff #ifdef DIAGNOSTIC
1260f7d35785SGleb Smirnoff 	printf("%s from \"%s\", %d boot pages left\n", __func__, zone->uz_name,
1261f7d35785SGleb Smirnoff 	    boot_pages);
1262f7d35785SGleb Smirnoff #endif
1263f7d35785SGleb Smirnoff 	mem = bootmem;
1264f7d35785SGleb Smirnoff 	boot_pages -= pages;
1265f7d35785SGleb Smirnoff 	bootmem += pages * PAGE_SIZE;
1266f7d35785SGleb Smirnoff 	*pflag = UMA_SLAB_BOOT;
1267f7d35785SGleb Smirnoff 
1268f7d35785SGleb Smirnoff 	return (mem);
1269f7d35785SGleb Smirnoff }
1270f7d35785SGleb Smirnoff 
1271f7d35785SGleb Smirnoff /*
12728355f576SJeff Roberson  * Allocates a number of pages from the system
12738355f576SJeff Roberson  *
12748355f576SJeff Roberson  * Arguments:
12758355f576SJeff Roberson  *	bytes  The number of bytes requested
12768355f576SJeff Roberson  *	wait  Shall we wait?
12778355f576SJeff Roberson  *
12788355f576SJeff Roberson  * Returns:
12798355f576SJeff Roberson  *	A pointer to the alloced memory or possibly
12808355f576SJeff Roberson  *	NULL if M_NOWAIT is set.
12818355f576SJeff Roberson  */
12828355f576SJeff Roberson static void *
1283ab3185d1SJeff Roberson page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1284ab3185d1SJeff Roberson     int wait)
12858355f576SJeff Roberson {
12868355f576SJeff Roberson 	void *p;	/* Returned page */
12878355f576SJeff Roberson 
12882e47807cSJeff Roberson 	*pflag = UMA_SLAB_KERNEL;
12899978bd99SMark Johnston 	p = (void *)kmem_malloc_domainset(DOMAINSET_FIXED(domain), bytes, wait);
12908355f576SJeff Roberson 
12918355f576SJeff Roberson 	return (p);
12928355f576SJeff Roberson }
12938355f576SJeff Roberson 
1294ab3059a8SMatt Macy static void *
1295ab3059a8SMatt Macy pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1296ab3059a8SMatt Macy     int wait)
1297ab3059a8SMatt Macy {
1298ab3059a8SMatt Macy 	struct pglist alloctail;
1299ab3059a8SMatt Macy 	vm_offset_t addr, zkva;
1300ab3059a8SMatt Macy 	int cpu, flags;
1301ab3059a8SMatt Macy 	vm_page_t p, p_next;
1302ab3059a8SMatt Macy #ifdef NUMA
1303ab3059a8SMatt Macy 	struct pcpu *pc;
1304ab3059a8SMatt Macy #endif
1305ab3059a8SMatt Macy 
1306ab3059a8SMatt Macy 	MPASS(bytes == (mp_maxid + 1) * PAGE_SIZE);
1307ab3059a8SMatt Macy 
1308013072f0SMark Johnston 	TAILQ_INIT(&alloctail);
1309ab3059a8SMatt Macy 	flags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
1310013072f0SMark Johnston 	    malloc2vm_flags(wait);
1311013072f0SMark Johnston 	*pflag = UMA_SLAB_KERNEL;
1312ab3059a8SMatt Macy 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
1313ab3059a8SMatt Macy 		if (CPU_ABSENT(cpu)) {
1314ab3059a8SMatt Macy 			p = vm_page_alloc(NULL, 0, flags);
1315ab3059a8SMatt Macy 		} else {
1316ab3059a8SMatt Macy #ifndef NUMA
1317ab3059a8SMatt Macy 			p = vm_page_alloc(NULL, 0, flags);
1318ab3059a8SMatt Macy #else
1319ab3059a8SMatt Macy 			pc = pcpu_find(cpu);
1320ab3059a8SMatt Macy 			p = vm_page_alloc_domain(NULL, 0, pc->pc_domain, flags);
1321ab3059a8SMatt Macy 			if (__predict_false(p == NULL))
1322ab3059a8SMatt Macy 				p = vm_page_alloc(NULL, 0, flags);
1323ab3059a8SMatt Macy #endif
1324ab3059a8SMatt Macy 		}
1325ab3059a8SMatt Macy 		if (__predict_false(p == NULL))
1326ab3059a8SMatt Macy 			goto fail;
1327ab3059a8SMatt Macy 		TAILQ_INSERT_TAIL(&alloctail, p, listq);
1328ab3059a8SMatt Macy 	}
1329ab3059a8SMatt Macy 	if ((addr = kva_alloc(bytes)) == 0)
1330ab3059a8SMatt Macy 		goto fail;
1331ab3059a8SMatt Macy 	zkva = addr;
1332ab3059a8SMatt Macy 	TAILQ_FOREACH(p, &alloctail, listq) {
1333ab3059a8SMatt Macy 		pmap_qenter(zkva, &p, 1);
1334ab3059a8SMatt Macy 		zkva += PAGE_SIZE;
1335ab3059a8SMatt Macy 	}
1336ab3059a8SMatt Macy 	return ((void*)addr);
1337ab3059a8SMatt Macy fail:
1338ab3059a8SMatt Macy 	TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
133988ea538aSMark Johnston 		vm_page_unwire_noq(p);
1340ab3059a8SMatt Macy 		vm_page_free(p);
1341ab3059a8SMatt Macy 	}
1342ab3059a8SMatt Macy 	return (NULL);
1343ab3059a8SMatt Macy }
1344ab3059a8SMatt Macy 
13458355f576SJeff Roberson /*
13468355f576SJeff Roberson  * Allocates a number of pages from within an object
13478355f576SJeff Roberson  *
13488355f576SJeff Roberson  * Arguments:
13498355f576SJeff Roberson  *	bytes  The number of bytes requested
13508355f576SJeff Roberson  *	wait   Shall we wait?
13518355f576SJeff Roberson  *
13528355f576SJeff Roberson  * Returns:
13538355f576SJeff Roberson  *	A pointer to the alloced memory or possibly
13548355f576SJeff Roberson  *	NULL if M_NOWAIT is set.
13558355f576SJeff Roberson  */
13568355f576SJeff Roberson static void *
1357ab3185d1SJeff Roberson noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
1358ab3185d1SJeff Roberson     int wait)
13598355f576SJeff Roberson {
1360a4915c21SAttilio Rao 	TAILQ_HEAD(, vm_page) alloctail;
1361a4915c21SAttilio Rao 	u_long npages;
1362b245ac95SAlan Cox 	vm_offset_t retkva, zkva;
1363a4915c21SAttilio Rao 	vm_page_t p, p_next;
1364e20a199fSJeff Roberson 	uma_keg_t keg;
13658355f576SJeff Roberson 
1366a4915c21SAttilio Rao 	TAILQ_INIT(&alloctail);
1367bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
1368a4915c21SAttilio Rao 
1369a4915c21SAttilio Rao 	npages = howmany(bytes, PAGE_SIZE);
1370a4915c21SAttilio Rao 	while (npages > 0) {
1371ab3185d1SJeff Roberson 		p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT |
13728d6fbbb8SJeff Roberson 		    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
1373772c8b67SKonstantin Belousov 		    ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK :
1374772c8b67SKonstantin Belousov 		    VM_ALLOC_NOWAIT));
1375a4915c21SAttilio Rao 		if (p != NULL) {
1376a4915c21SAttilio Rao 			/*
1377a4915c21SAttilio Rao 			 * Since the page does not belong to an object, its
1378a4915c21SAttilio Rao 			 * listq is unused.
1379a4915c21SAttilio Rao 			 */
1380a4915c21SAttilio Rao 			TAILQ_INSERT_TAIL(&alloctail, p, listq);
1381a4915c21SAttilio Rao 			npages--;
1382a4915c21SAttilio Rao 			continue;
1383a4915c21SAttilio Rao 		}
13848355f576SJeff Roberson 		/*
1385a4915c21SAttilio Rao 		 * Page allocation failed, free intermediate pages and
1386a4915c21SAttilio Rao 		 * exit.
13878355f576SJeff Roberson 		 */
1388a4915c21SAttilio Rao 		TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
138988ea538aSMark Johnston 			vm_page_unwire_noq(p);
1390b245ac95SAlan Cox 			vm_page_free(p);
1391b245ac95SAlan Cox 		}
1392a4915c21SAttilio Rao 		return (NULL);
1393b245ac95SAlan Cox 	}
13948355f576SJeff Roberson 	*flags = UMA_SLAB_PRIV;
1395a4915c21SAttilio Rao 	zkva = keg->uk_kva +
1396a4915c21SAttilio Rao 	    atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1397a4915c21SAttilio Rao 	retkva = zkva;
1398a4915c21SAttilio Rao 	TAILQ_FOREACH(p, &alloctail, listq) {
1399a4915c21SAttilio Rao 		pmap_qenter(zkva, &p, 1);
1400a4915c21SAttilio Rao 		zkva += PAGE_SIZE;
1401a4915c21SAttilio Rao 	}
14028355f576SJeff Roberson 
14038355f576SJeff Roberson 	return ((void *)retkva);
14048355f576SJeff Roberson }
14058355f576SJeff Roberson 
14068355f576SJeff Roberson /*
14078355f576SJeff Roberson  * Frees a number of pages to the system
14088355f576SJeff Roberson  *
14098355f576SJeff Roberson  * Arguments:
14108355f576SJeff Roberson  *	mem   A pointer to the memory to be freed
14118355f576SJeff Roberson  *	size  The size of the memory being freed
14128355f576SJeff Roberson  *	flags The original p->us_flags field
14138355f576SJeff Roberson  *
14148355f576SJeff Roberson  * Returns:
14158355f576SJeff Roberson  *	Nothing
14168355f576SJeff Roberson  */
14178355f576SJeff Roberson static void
1418f2c2231eSRyan Stone page_free(void *mem, vm_size_t size, uint8_t flags)
14198355f576SJeff Roberson {
14203370c5bfSJeff Roberson 
142149bfa624SAlan Cox 	if ((flags & UMA_SLAB_KERNEL) == 0)
1422b5345ef1SJustin Hibbits 		panic("UMA: page_free used with invalid flags %x", flags);
14238355f576SJeff Roberson 
142449bfa624SAlan Cox 	kmem_free((vm_offset_t)mem, size);
14258355f576SJeff Roberson }
14268355f576SJeff Roberson 
14278355f576SJeff Roberson /*
1428ab3059a8SMatt Macy  * Frees pcpu zone allocations
1429ab3059a8SMatt Macy  *
1430ab3059a8SMatt Macy  * Arguments:
1431ab3059a8SMatt Macy  *	mem   A pointer to the memory to be freed
1432ab3059a8SMatt Macy  *	size  The size of the memory being freed
1433ab3059a8SMatt Macy  *	flags The original p->us_flags field
1434ab3059a8SMatt Macy  *
1435ab3059a8SMatt Macy  * Returns:
1436ab3059a8SMatt Macy  *	Nothing
1437ab3059a8SMatt Macy  */
1438ab3059a8SMatt Macy static void
1439ab3059a8SMatt Macy pcpu_page_free(void *mem, vm_size_t size, uint8_t flags)
1440ab3059a8SMatt Macy {
1441ab3059a8SMatt Macy 	vm_offset_t sva, curva;
1442ab3059a8SMatt Macy 	vm_paddr_t paddr;
1443ab3059a8SMatt Macy 	vm_page_t m;
1444ab3059a8SMatt Macy 
1445ab3059a8SMatt Macy 	MPASS(size == (mp_maxid+1)*PAGE_SIZE);
1446ab3059a8SMatt Macy 	sva = (vm_offset_t)mem;
1447ab3059a8SMatt Macy 	for (curva = sva; curva < sva + size; curva += PAGE_SIZE) {
1448ab3059a8SMatt Macy 		paddr = pmap_kextract(curva);
1449ab3059a8SMatt Macy 		m = PHYS_TO_VM_PAGE(paddr);
145088ea538aSMark Johnston 		vm_page_unwire_noq(m);
1451ab3059a8SMatt Macy 		vm_page_free(m);
1452ab3059a8SMatt Macy 	}
1453ab3059a8SMatt Macy 	pmap_qremove(sva, size >> PAGE_SHIFT);
1454ab3059a8SMatt Macy 	kva_free(sva, size);
1455ab3059a8SMatt Macy }
1456ab3059a8SMatt Macy 
1457ab3059a8SMatt Macy 
1458ab3059a8SMatt Macy /*
14598355f576SJeff Roberson  * Zero fill initializer
14608355f576SJeff Roberson  *
14618355f576SJeff Roberson  * Arguments/Returns follow uma_init specifications
14628355f576SJeff Roberson  */
1463b23f72e9SBrian Feldman static int
1464b23f72e9SBrian Feldman zero_init(void *mem, int size, int flags)
14658355f576SJeff Roberson {
14668355f576SJeff Roberson 	bzero(mem, size);
1467b23f72e9SBrian Feldman 	return (0);
14688355f576SJeff Roberson }
14698355f576SJeff Roberson 
14708355f576SJeff Roberson /*
1471e20a199fSJeff Roberson  * Finish creating a small uma keg.  This calculates ipers, and the keg size.
14728355f576SJeff Roberson  *
14738355f576SJeff Roberson  * Arguments
1474e20a199fSJeff Roberson  *	keg  The zone we should initialize
14758355f576SJeff Roberson  *
14768355f576SJeff Roberson  * Returns
14778355f576SJeff Roberson  *	Nothing
14788355f576SJeff Roberson  */
14798355f576SJeff Roberson static void
1480e20a199fSJeff Roberson keg_small_init(uma_keg_t keg)
14818355f576SJeff Roberson {
1482244f4554SBosko Milekic 	u_int rsize;
1483244f4554SBosko Milekic 	u_int memused;
1484244f4554SBosko Milekic 	u_int wastedspace;
1485244f4554SBosko Milekic 	u_int shsize;
1486a55ebb7cSAndriy Gapon 	u_int slabsize;
14878355f576SJeff Roberson 
1488ad97af7eSGleb Smirnoff 	if (keg->uk_flags & UMA_ZONE_PCPU) {
148996c85efbSNathan Whitehorn 		u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
1490e28a647dSGleb Smirnoff 
1491ab3059a8SMatt Macy 		slabsize = UMA_PCPU_ALLOC_SIZE;
1492ab3059a8SMatt Macy 		keg->uk_ppera = ncpus;
1493ad97af7eSGleb Smirnoff 	} else {
1494a55ebb7cSAndriy Gapon 		slabsize = UMA_SLAB_SIZE;
1495ad97af7eSGleb Smirnoff 		keg->uk_ppera = 1;
1496ad97af7eSGleb Smirnoff 	}
1497ad97af7eSGleb Smirnoff 
1498ef72505eSJeff Roberson 	/*
1499ef72505eSJeff Roberson 	 * Calculate the size of each allocation (rsize) according to
1500ef72505eSJeff Roberson 	 * alignment.  If the requested size is smaller than we have
1501ef72505eSJeff Roberson 	 * allocation bits for we round it up.
1502ef72505eSJeff Roberson 	 */
1503099a0e58SBosko Milekic 	rsize = keg->uk_size;
1504a55ebb7cSAndriy Gapon 	if (rsize < slabsize / SLAB_SETSIZE)
1505a55ebb7cSAndriy Gapon 		rsize = slabsize / SLAB_SETSIZE;
1506099a0e58SBosko Milekic 	if (rsize & keg->uk_align)
1507099a0e58SBosko Milekic 		rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1508099a0e58SBosko Milekic 	keg->uk_rsize = rsize;
1509ad97af7eSGleb Smirnoff 
1510ad97af7eSGleb Smirnoff 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
1511ab3059a8SMatt Macy 	    keg->uk_rsize < UMA_PCPU_ALLOC_SIZE,
1512ad97af7eSGleb Smirnoff 	    ("%s: size %u too large", __func__, keg->uk_rsize));
15138355f576SJeff Roberson 
1514ef72505eSJeff Roberson 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
15152864dbbfSGleb Smirnoff 		shsize = 0;
1516ef72505eSJeff Roberson 	else
15173d5e3df7SGleb Smirnoff 		shsize = SIZEOF_UMA_SLAB;
15188355f576SJeff Roberson 
15191ca6ed45SGleb Smirnoff 	if (rsize <= slabsize - shsize)
1520a55ebb7cSAndriy Gapon 		keg->uk_ipers = (slabsize - shsize) / rsize;
15211ca6ed45SGleb Smirnoff 	else {
15221ca6ed45SGleb Smirnoff 		/* Handle special case when we have 1 item per slab, so
15231ca6ed45SGleb Smirnoff 		 * alignment requirement can be relaxed. */
15241ca6ed45SGleb Smirnoff 		KASSERT(keg->uk_size <= slabsize - shsize,
15251ca6ed45SGleb Smirnoff 		    ("%s: size %u greater than slab", __func__, keg->uk_size));
15261ca6ed45SGleb Smirnoff 		keg->uk_ipers = 1;
15271ca6ed45SGleb Smirnoff 	}
1528ef72505eSJeff Roberson 	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1529ad97af7eSGleb Smirnoff 	    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1530ad97af7eSGleb Smirnoff 
1531244f4554SBosko Milekic 	memused = keg->uk_ipers * rsize + shsize;
1532a55ebb7cSAndriy Gapon 	wastedspace = slabsize - memused;
1533244f4554SBosko Milekic 
153420e8e865SBosko Milekic 	/*
1535244f4554SBosko Milekic 	 * We can't do OFFPAGE if we're internal or if we've been
153620e8e865SBosko Milekic 	 * asked to not go to the VM for buckets.  If we do this we
15376fd34d6fSJeff Roberson 	 * may end up going to the VM  for slabs which we do not
15386fd34d6fSJeff Roberson 	 * want to do if we're UMA_ZFLAG_CACHEONLY as a result
15396fd34d6fSJeff Roberson 	 * of UMA_ZONE_VM, which clearly forbids it.
154020e8e865SBosko Milekic 	 */
1541099a0e58SBosko Milekic 	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1542099a0e58SBosko Milekic 	    (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
15438355f576SJeff Roberson 		return;
1544244f4554SBosko Milekic 
1545ef72505eSJeff Roberson 	/*
1546ef72505eSJeff Roberson 	 * See if using an OFFPAGE slab will limit our waste.  Only do
1547ef72505eSJeff Roberson 	 * this if it permits more items per-slab.
1548ef72505eSJeff Roberson 	 *
1549ef72505eSJeff Roberson 	 * XXX We could try growing slabsize to limit max waste as well.
1550ef72505eSJeff Roberson 	 * Historically this was not done because the VM could not
1551ef72505eSJeff Roberson 	 * efficiently handle contiguous allocations.
1552ef72505eSJeff Roberson 	 */
1553a55ebb7cSAndriy Gapon 	if ((wastedspace >= slabsize / UMA_MAX_WASTE) &&
1554a55ebb7cSAndriy Gapon 	    (keg->uk_ipers < (slabsize / keg->uk_rsize))) {
1555a55ebb7cSAndriy Gapon 		keg->uk_ipers = slabsize / keg->uk_rsize;
1556ef72505eSJeff Roberson 		KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1557ad97af7eSGleb Smirnoff 		    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
15581431a748SGleb Smirnoff 		CTR6(KTR_UMA, "UMA decided we need offpage slab headers for "
15591431a748SGleb Smirnoff 		    "keg: %s(%p), calculated wastedspace = %d, "
1560244f4554SBosko Milekic 		    "maximum wasted space allowed = %d, "
1561244f4554SBosko Milekic 		    "calculated ipers = %d, "
15621431a748SGleb Smirnoff 		    "new wasted space = %d\n", keg->uk_name, keg, wastedspace,
1563a55ebb7cSAndriy Gapon 		    slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1564a55ebb7cSAndriy Gapon 		    slabsize - keg->uk_ipers * keg->uk_rsize);
1565099a0e58SBosko Milekic 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
15668355f576SJeff Roberson 	}
1567ad97af7eSGleb Smirnoff 
1568ad97af7eSGleb Smirnoff 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1569ad97af7eSGleb Smirnoff 	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1570ad97af7eSGleb Smirnoff 		keg->uk_flags |= UMA_ZONE_HASH;
15718355f576SJeff Roberson }
15728355f576SJeff Roberson 
15738355f576SJeff Roberson /*
1574e20a199fSJeff Roberson  * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
15758355f576SJeff Roberson  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
15768355f576SJeff Roberson  * more complicated.
15778355f576SJeff Roberson  *
15788355f576SJeff Roberson  * Arguments
1579e20a199fSJeff Roberson  *	keg  The keg we should initialize
15808355f576SJeff Roberson  *
15818355f576SJeff Roberson  * Returns
15828355f576SJeff Roberson  *	Nothing
15838355f576SJeff Roberson  */
15848355f576SJeff Roberson static void
1585e20a199fSJeff Roberson keg_large_init(uma_keg_t keg)
15868355f576SJeff Roberson {
15878355f576SJeff Roberson 
1588e20a199fSJeff Roberson 	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1589ad97af7eSGleb Smirnoff 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1590ad97af7eSGleb Smirnoff 	    ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
159120e8e865SBosko Milekic 
1592ad97af7eSGleb Smirnoff 	keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1593099a0e58SBosko Milekic 	keg->uk_ipers = 1;
1594e9a069d8SJohn Baldwin 	keg->uk_rsize = keg->uk_size;
1595e9a069d8SJohn Baldwin 
1596cec48e00SAlexander Motin 	/* Check whether we have enough space to not do OFFPAGE. */
15973d5e3df7SGleb Smirnoff 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0 &&
15983d5e3df7SGleb Smirnoff 	    PAGE_SIZE * keg->uk_ppera - keg->uk_rsize < SIZEOF_UMA_SLAB) {
15992934eb8aSMark Johnston 		/*
16002934eb8aSMark Johnston 		 * We can't do OFFPAGE if we're internal, in which case
16012934eb8aSMark Johnston 		 * we need an extra page per allocation to contain the
16022934eb8aSMark Johnston 		 * slab header.
16032934eb8aSMark Johnston 		 */
16042934eb8aSMark Johnston 		if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) == 0)
1605099a0e58SBosko Milekic 			keg->uk_flags |= UMA_ZONE_OFFPAGE;
16062934eb8aSMark Johnston 		else
16072934eb8aSMark Johnston 			keg->uk_ppera++;
16082934eb8aSMark Johnston 	}
1609cec48e00SAlexander Motin 
1610cec48e00SAlexander Motin 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1611cec48e00SAlexander Motin 	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1612099a0e58SBosko Milekic 		keg->uk_flags |= UMA_ZONE_HASH;
16138355f576SJeff Roberson }
16148355f576SJeff Roberson 
1615e20a199fSJeff Roberson static void
1616e20a199fSJeff Roberson keg_cachespread_init(uma_keg_t keg)
1617e20a199fSJeff Roberson {
1618e20a199fSJeff Roberson 	int alignsize;
1619e20a199fSJeff Roberson 	int trailer;
1620e20a199fSJeff Roberson 	int pages;
1621e20a199fSJeff Roberson 	int rsize;
1622e20a199fSJeff Roberson 
1623ad97af7eSGleb Smirnoff 	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1624ad97af7eSGleb Smirnoff 	    ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1625ad97af7eSGleb Smirnoff 
1626e20a199fSJeff Roberson 	alignsize = keg->uk_align + 1;
1627e20a199fSJeff Roberson 	rsize = keg->uk_size;
1628e20a199fSJeff Roberson 	/*
1629e20a199fSJeff Roberson 	 * We want one item to start on every align boundary in a page.  To
1630e20a199fSJeff Roberson 	 * do this we will span pages.  We will also extend the item by the
1631e20a199fSJeff Roberson 	 * size of align if it is an even multiple of align.  Otherwise, it
1632e20a199fSJeff Roberson 	 * would fall on the same boundary every time.
1633e20a199fSJeff Roberson 	 */
1634e20a199fSJeff Roberson 	if (rsize & keg->uk_align)
1635e20a199fSJeff Roberson 		rsize = (rsize & ~keg->uk_align) + alignsize;
1636e20a199fSJeff Roberson 	if ((rsize & alignsize) == 0)
1637e20a199fSJeff Roberson 		rsize += alignsize;
1638e20a199fSJeff Roberson 	trailer = rsize - keg->uk_size;
1639e20a199fSJeff Roberson 	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1640e20a199fSJeff Roberson 	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1641e20a199fSJeff Roberson 	keg->uk_rsize = rsize;
1642e20a199fSJeff Roberson 	keg->uk_ppera = pages;
1643e20a199fSJeff Roberson 	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1644e20a199fSJeff Roberson 	keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
16452367b4ddSDimitry Andric 	KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
164642321809SGleb Smirnoff 	    ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1647e20a199fSJeff Roberson 	    keg->uk_ipers));
1648e20a199fSJeff Roberson }
1649e20a199fSJeff Roberson 
16508355f576SJeff Roberson /*
1651099a0e58SBosko Milekic  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1652099a0e58SBosko Milekic  * the keg onto the global keg list.
16538355f576SJeff Roberson  *
16548355f576SJeff Roberson  * Arguments/Returns follow uma_ctor specifications
1655099a0e58SBosko Milekic  *	udata  Actually uma_kctor_args
1656099a0e58SBosko Milekic  */
1657b23f72e9SBrian Feldman static int
1658b23f72e9SBrian Feldman keg_ctor(void *mem, int size, void *udata, int flags)
1659099a0e58SBosko Milekic {
1660099a0e58SBosko Milekic 	struct uma_kctor_args *arg = udata;
1661099a0e58SBosko Milekic 	uma_keg_t keg = mem;
1662099a0e58SBosko Milekic 	uma_zone_t zone;
1663099a0e58SBosko Milekic 
1664099a0e58SBosko Milekic 	bzero(keg, size);
1665099a0e58SBosko Milekic 	keg->uk_size = arg->size;
1666099a0e58SBosko Milekic 	keg->uk_init = arg->uminit;
1667099a0e58SBosko Milekic 	keg->uk_fini = arg->fini;
1668099a0e58SBosko Milekic 	keg->uk_align = arg->align;
1669099a0e58SBosko Milekic 	keg->uk_free = 0;
16706fd34d6fSJeff Roberson 	keg->uk_reserve = 0;
1671099a0e58SBosko Milekic 	keg->uk_pages = 0;
1672099a0e58SBosko Milekic 	keg->uk_flags = arg->flags;
1673099a0e58SBosko Milekic 	keg->uk_slabzone = NULL;
1674099a0e58SBosko Milekic 
1675099a0e58SBosko Milekic 	/*
1676194a979eSMark Johnston 	 * We use a global round-robin policy by default.  Zones with
1677194a979eSMark Johnston 	 * UMA_ZONE_NUMA set will use first-touch instead, in which case the
1678194a979eSMark Johnston 	 * iterator is never run.
1679194a979eSMark Johnston 	 */
1680194a979eSMark Johnston 	keg->uk_dr.dr_policy = DOMAINSET_RR();
1681194a979eSMark Johnston 	keg->uk_dr.dr_iter = 0;
1682194a979eSMark Johnston 
1683194a979eSMark Johnston 	/*
1684099a0e58SBosko Milekic 	 * The master zone is passed to us at keg-creation time.
1685099a0e58SBosko Milekic 	 */
1686099a0e58SBosko Milekic 	zone = arg->zone;
1687e20a199fSJeff Roberson 	keg->uk_name = zone->uz_name;
1688099a0e58SBosko Milekic 
1689099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_VM)
1690099a0e58SBosko Milekic 		keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1691099a0e58SBosko Milekic 
1692099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_ZINIT)
1693099a0e58SBosko Milekic 		keg->uk_init = zero_init;
1694099a0e58SBosko Milekic 
1695cfcae3f8SGleb Smirnoff 	if (arg->flags & UMA_ZONE_MALLOC)
1696e20a199fSJeff Roberson 		keg->uk_flags |= UMA_ZONE_VTOSLAB;
1697e20a199fSJeff Roberson 
1698ad97af7eSGleb Smirnoff 	if (arg->flags & UMA_ZONE_PCPU)
1699ad97af7eSGleb Smirnoff #ifdef SMP
1700ad97af7eSGleb Smirnoff 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
1701ad97af7eSGleb Smirnoff #else
1702ad97af7eSGleb Smirnoff 		keg->uk_flags &= ~UMA_ZONE_PCPU;
1703ad97af7eSGleb Smirnoff #endif
1704ad97af7eSGleb Smirnoff 
1705ef72505eSJeff Roberson 	if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1706e20a199fSJeff Roberson 		keg_cachespread_init(keg);
1707244f4554SBosko Milekic 	} else {
1708b92b26adSGleb Smirnoff 		if (keg->uk_size > UMA_SLAB_SPACE)
1709e20a199fSJeff Roberson 			keg_large_init(keg);
1710244f4554SBosko Milekic 		else
1711e20a199fSJeff Roberson 			keg_small_init(keg);
1712244f4554SBosko Milekic 	}
1713099a0e58SBosko Milekic 
1714cfcae3f8SGleb Smirnoff 	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1715099a0e58SBosko Milekic 		keg->uk_slabzone = slabzone;
1716099a0e58SBosko Milekic 
1717099a0e58SBosko Milekic 	/*
1718099a0e58SBosko Milekic 	 * If we haven't booted yet we need allocations to go through the
1719099a0e58SBosko Milekic 	 * startup cache until the vm is ready.
1720099a0e58SBosko Milekic 	 */
1721f4bef67cSGleb Smirnoff 	if (booted < BOOT_PAGEALLOC)
17228cd02d00SAlan Cox 		keg->uk_allocf = startup_alloc;
172377e19437SGleb Smirnoff #ifdef UMA_MD_SMALL_ALLOC
172477e19437SGleb Smirnoff 	else if (keg->uk_ppera == 1)
172577e19437SGleb Smirnoff 		keg->uk_allocf = uma_small_alloc;
17268cd02d00SAlan Cox #endif
1727ab3059a8SMatt Macy 	else if (keg->uk_flags & UMA_ZONE_PCPU)
1728ab3059a8SMatt Macy 		keg->uk_allocf = pcpu_page_alloc;
172977e19437SGleb Smirnoff 	else
173077e19437SGleb Smirnoff 		keg->uk_allocf = page_alloc;
173177e19437SGleb Smirnoff #ifdef UMA_MD_SMALL_ALLOC
173277e19437SGleb Smirnoff 	if (keg->uk_ppera == 1)
173377e19437SGleb Smirnoff 		keg->uk_freef = uma_small_free;
173477e19437SGleb Smirnoff 	else
173577e19437SGleb Smirnoff #endif
1736ab3059a8SMatt Macy 	if (keg->uk_flags & UMA_ZONE_PCPU)
1737ab3059a8SMatt Macy 		keg->uk_freef = pcpu_page_free;
1738ab3059a8SMatt Macy 	else
173977e19437SGleb Smirnoff 		keg->uk_freef = page_free;
1740099a0e58SBosko Milekic 
1741099a0e58SBosko Milekic 	/*
1742af526374SJeff Roberson 	 * Initialize keg's lock
1743099a0e58SBosko Milekic 	 */
1744af526374SJeff Roberson 	KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
1745099a0e58SBosko Milekic 
1746099a0e58SBosko Milekic 	/*
1747099a0e58SBosko Milekic 	 * If we're putting the slab header in the actual page we need to
17483d5e3df7SGleb Smirnoff 	 * figure out where in each page it goes.  See SIZEOF_UMA_SLAB
17493d5e3df7SGleb Smirnoff 	 * macro definition.
1750099a0e58SBosko Milekic 	 */
1751099a0e58SBosko Milekic 	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
17523d5e3df7SGleb Smirnoff 		keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - SIZEOF_UMA_SLAB;
1753244f4554SBosko Milekic 		/*
1754244f4554SBosko Milekic 		 * The only way the following is possible is if with our
1755244f4554SBosko Milekic 		 * UMA_ALIGN_PTR adjustments we are now bigger than
1756244f4554SBosko Milekic 		 * UMA_SLAB_SIZE.  I haven't checked whether this is
1757244f4554SBosko Milekic 		 * mathematically possible for all cases, so we make
1758244f4554SBosko Milekic 		 * sure here anyway.
1759244f4554SBosko Milekic 		 */
17603d5e3df7SGleb Smirnoff 		KASSERT(keg->uk_pgoff + sizeof(struct uma_slab) <=
17613d5e3df7SGleb Smirnoff 		    PAGE_SIZE * keg->uk_ppera,
17623d5e3df7SGleb Smirnoff 		    ("zone %s ipers %d rsize %d size %d slab won't fit",
17633d5e3df7SGleb Smirnoff 		    zone->uz_name, keg->uk_ipers, keg->uk_rsize, keg->uk_size));
1764099a0e58SBosko Milekic 	}
1765099a0e58SBosko Milekic 
1766099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZONE_HASH)
17673b2f2cb8SAlexander Motin 		hash_alloc(&keg->uk_hash, 0);
1768099a0e58SBosko Milekic 
17691431a748SGleb Smirnoff 	CTR5(KTR_UMA, "keg_ctor %p zone %s(%p) out %d free %d\n",
17701431a748SGleb Smirnoff 	    keg, zone->uz_name, zone,
177157223e99SAndriy Gapon 	    (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
177257223e99SAndriy Gapon 	    keg->uk_free);
1773099a0e58SBosko Milekic 
1774099a0e58SBosko Milekic 	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1775099a0e58SBosko Milekic 
1776111fbcd5SBryan Venteicher 	rw_wlock(&uma_rwlock);
1777099a0e58SBosko Milekic 	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1778111fbcd5SBryan Venteicher 	rw_wunlock(&uma_rwlock);
1779b23f72e9SBrian Feldman 	return (0);
1780099a0e58SBosko Milekic }
1781099a0e58SBosko Milekic 
17822efcc8cbSGleb Smirnoff static void
17832efcc8cbSGleb Smirnoff zone_alloc_counters(uma_zone_t zone)
17842efcc8cbSGleb Smirnoff {
17852efcc8cbSGleb Smirnoff 
17862efcc8cbSGleb Smirnoff 	zone->uz_allocs = counter_u64_alloc(M_WAITOK);
17872efcc8cbSGleb Smirnoff 	zone->uz_frees = counter_u64_alloc(M_WAITOK);
17882efcc8cbSGleb Smirnoff 	zone->uz_fails = counter_u64_alloc(M_WAITOK);
17892efcc8cbSGleb Smirnoff }
17902efcc8cbSGleb Smirnoff 
1791099a0e58SBosko Milekic /*
1792099a0e58SBosko Milekic  * Zone header ctor.  This initializes all fields, locks, etc.
1793099a0e58SBosko Milekic  *
1794099a0e58SBosko Milekic  * Arguments/Returns follow uma_ctor specifications
1795099a0e58SBosko Milekic  *	udata  Actually uma_zctor_args
17968355f576SJeff Roberson  */
1797b23f72e9SBrian Feldman static int
1798b23f72e9SBrian Feldman zone_ctor(void *mem, int size, void *udata, int flags)
17998355f576SJeff Roberson {
18008355f576SJeff Roberson 	struct uma_zctor_args *arg = udata;
18018355f576SJeff Roberson 	uma_zone_t zone = mem;
1802099a0e58SBosko Milekic 	uma_zone_t z;
1803099a0e58SBosko Milekic 	uma_keg_t keg;
1804*08cfa56eSMark Johnston 	int i;
18058355f576SJeff Roberson 
18068355f576SJeff Roberson 	bzero(zone, size);
18078355f576SJeff Roberson 	zone->uz_name = arg->name;
18088355f576SJeff Roberson 	zone->uz_ctor = arg->ctor;
18098355f576SJeff Roberson 	zone->uz_dtor = arg->dtor;
1810099a0e58SBosko Milekic 	zone->uz_init = NULL;
1811099a0e58SBosko Milekic 	zone->uz_fini = NULL;
1812bf965959SSean Bruno 	zone->uz_sleeps = 0;
1813c1685086SJeff Roberson 	zone->uz_xdomain = 0;
1814fc03d22bSJeff Roberson 	zone->uz_count = 0;
1815ace66b56SAlexander Motin 	zone->uz_count_min = 0;
1816bb15d1c7SGleb Smirnoff 	zone->uz_count_max = BUCKET_MAX;
1817e20a199fSJeff Roberson 	zone->uz_flags = 0;
18182f891cd5SPawel Jakub Dawidek 	zone->uz_warning = NULL;
1819ab3185d1SJeff Roberson 	/* The domain structures follow the cpu structures. */
1820ab3185d1SJeff Roberson 	zone->uz_domain = (struct uma_zone_domain *)&zone->uz_cpu[mp_ncpus];
1821bb15d1c7SGleb Smirnoff 	zone->uz_bkt_max = ULONG_MAX;
18222f891cd5SPawel Jakub Dawidek 	timevalclear(&zone->uz_ratecheck);
1823af526374SJeff Roberson 
18242efcc8cbSGleb Smirnoff 	if (__predict_true(booted == BOOT_RUNNING))
18252efcc8cbSGleb Smirnoff 		zone_alloc_counters(zone);
18262efcc8cbSGleb Smirnoff 	else {
18272efcc8cbSGleb Smirnoff 		zone->uz_allocs = EARLY_COUNTER;
18282efcc8cbSGleb Smirnoff 		zone->uz_frees = EARLY_COUNTER;
18292efcc8cbSGleb Smirnoff 		zone->uz_fails = EARLY_COUNTER;
18302efcc8cbSGleb Smirnoff 	}
18312efcc8cbSGleb Smirnoff 
1832*08cfa56eSMark Johnston 	for (i = 0; i < vm_ndomains; i++)
1833*08cfa56eSMark Johnston 		TAILQ_INIT(&zone->uz_domain[i].uzd_buckets);
1834*08cfa56eSMark Johnston 
18350095a784SJeff Roberson 	/*
18360095a784SJeff Roberson 	 * This is a pure cache zone, no kegs.
18370095a784SJeff Roberson 	 */
18380095a784SJeff Roberson 	if (arg->import) {
18396fd34d6fSJeff Roberson 		if (arg->flags & UMA_ZONE_VM)
18406fd34d6fSJeff Roberson 			arg->flags |= UMA_ZFLAG_CACHEONLY;
18416fd34d6fSJeff Roberson 		zone->uz_flags = arg->flags;
1842af526374SJeff Roberson 		zone->uz_size = arg->size;
18430095a784SJeff Roberson 		zone->uz_import = arg->import;
18440095a784SJeff Roberson 		zone->uz_release = arg->release;
18450095a784SJeff Roberson 		zone->uz_arg = arg->arg;
1846af526374SJeff Roberson 		zone->uz_lockptr = &zone->uz_lock;
1847bb15d1c7SGleb Smirnoff 		ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
1848111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
184903175483SAlexander Motin 		LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
1850111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
1851af526374SJeff Roberson 		goto out;
18520095a784SJeff Roberson 	}
18530095a784SJeff Roberson 
18540095a784SJeff Roberson 	/*
18550095a784SJeff Roberson 	 * Use the regular zone/keg/slab allocator.
18560095a784SJeff Roberson 	 */
18570095a784SJeff Roberson 	zone->uz_import = (uma_import)zone_import;
18580095a784SJeff Roberson 	zone->uz_release = (uma_release)zone_release;
18590095a784SJeff Roberson 	zone->uz_arg = zone;
1860bb15d1c7SGleb Smirnoff 	keg = arg->keg;
18610095a784SJeff Roberson 
1862099a0e58SBosko Milekic 	if (arg->flags & UMA_ZONE_SECONDARY) {
1863099a0e58SBosko Milekic 		KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
18648355f576SJeff Roberson 		zone->uz_init = arg->uminit;
1865e221e841SJeff Roberson 		zone->uz_fini = arg->fini;
1866af526374SJeff Roberson 		zone->uz_lockptr = &keg->uk_lock;
1867e20a199fSJeff Roberson 		zone->uz_flags |= UMA_ZONE_SECONDARY;
1868111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
1869099a0e58SBosko Milekic 		ZONE_LOCK(zone);
1870099a0e58SBosko Milekic 		LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1871099a0e58SBosko Milekic 			if (LIST_NEXT(z, uz_link) == NULL) {
1872099a0e58SBosko Milekic 				LIST_INSERT_AFTER(z, zone, uz_link);
1873099a0e58SBosko Milekic 				break;
1874099a0e58SBosko Milekic 			}
1875099a0e58SBosko Milekic 		}
1876099a0e58SBosko Milekic 		ZONE_UNLOCK(zone);
1877111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
1878e20a199fSJeff Roberson 	} else if (keg == NULL) {
1879e20a199fSJeff Roberson 		if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1880e20a199fSJeff Roberson 		    arg->align, arg->flags)) == NULL)
1881b23f72e9SBrian Feldman 			return (ENOMEM);
1882099a0e58SBosko Milekic 	} else {
1883099a0e58SBosko Milekic 		struct uma_kctor_args karg;
1884b23f72e9SBrian Feldman 		int error;
1885099a0e58SBosko Milekic 
1886099a0e58SBosko Milekic 		/* We should only be here from uma_startup() */
1887099a0e58SBosko Milekic 		karg.size = arg->size;
1888099a0e58SBosko Milekic 		karg.uminit = arg->uminit;
1889099a0e58SBosko Milekic 		karg.fini = arg->fini;
1890099a0e58SBosko Milekic 		karg.align = arg->align;
1891099a0e58SBosko Milekic 		karg.flags = arg->flags;
1892099a0e58SBosko Milekic 		karg.zone = zone;
1893b23f72e9SBrian Feldman 		error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1894b23f72e9SBrian Feldman 		    flags);
1895b23f72e9SBrian Feldman 		if (error)
1896b23f72e9SBrian Feldman 			return (error);
1897099a0e58SBosko Milekic 	}
18980095a784SJeff Roberson 
1899bb15d1c7SGleb Smirnoff 	zone->uz_keg = keg;
1900e20a199fSJeff Roberson 	zone->uz_size = keg->uk_size;
1901e20a199fSJeff Roberson 	zone->uz_flags |= (keg->uk_flags &
1902e20a199fSJeff Roberson 	    (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
19038355f576SJeff Roberson 
19048355f576SJeff Roberson 	/*
19058355f576SJeff Roberson 	 * Some internal zones don't have room allocated for the per cpu
19068355f576SJeff Roberson 	 * caches.  If we're internal, bail out here.
19078355f576SJeff Roberson 	 */
1908099a0e58SBosko Milekic 	if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1909e20a199fSJeff Roberson 		KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1910099a0e58SBosko Milekic 		    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1911b23f72e9SBrian Feldman 		return (0);
1912099a0e58SBosko Milekic 	}
19138355f576SJeff Roberson 
1914af526374SJeff Roberson out:
19157e28037aSMark Johnston 	KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) !=
19167e28037aSMark Johnston 	    (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET),
19177e28037aSMark Johnston 	    ("Invalid zone flag combination"));
1918eda1b016SJeff Roberson 	if ((arg->flags & UMA_ZONE_MAXBUCKET) != 0) {
1919cae33c14SJeff Roberson 		zone->uz_count = BUCKET_MAX;
1920eda1b016SJeff Roberson 	} else if ((arg->flags & UMA_ZONE_MINBUCKET) != 0) {
1921eda1b016SJeff Roberson 		zone->uz_count = BUCKET_MIN;
1922eda1b016SJeff Roberson 		zone->uz_count_max = BUCKET_MIN;
1923eda1b016SJeff Roberson 	} else if ((arg->flags & UMA_ZONE_NOBUCKET) != 0)
19247e28037aSMark Johnston 		zone->uz_count = 0;
19257e28037aSMark Johnston 	else
19267e28037aSMark Johnston 		zone->uz_count = bucket_select(zone->uz_size);
1927ace66b56SAlexander Motin 	zone->uz_count_min = zone->uz_count;
1928fc03d22bSJeff Roberson 
1929b23f72e9SBrian Feldman 	return (0);
19308355f576SJeff Roberson }
19318355f576SJeff Roberson 
19328355f576SJeff Roberson /*
1933099a0e58SBosko Milekic  * Keg header dtor.  This frees all data, destroys locks, frees the hash
1934099a0e58SBosko Milekic  * table and removes the keg from the global list.
19359c2cd7e5SJeff Roberson  *
19369c2cd7e5SJeff Roberson  * Arguments/Returns follow uma_dtor specifications
19379c2cd7e5SJeff Roberson  *	udata  unused
19389c2cd7e5SJeff Roberson  */
1939099a0e58SBosko Milekic static void
1940099a0e58SBosko Milekic keg_dtor(void *arg, int size, void *udata)
1941099a0e58SBosko Milekic {
1942099a0e58SBosko Milekic 	uma_keg_t keg;
19439c2cd7e5SJeff Roberson 
1944099a0e58SBosko Milekic 	keg = (uma_keg_t)arg;
1945e20a199fSJeff Roberson 	KEG_LOCK(keg);
1946099a0e58SBosko Milekic 	if (keg->uk_free != 0) {
1947a3845534SCraig Rodrigues 		printf("Freed UMA keg (%s) was not empty (%d items). "
1948099a0e58SBosko Milekic 		    " Lost %d pages of memory.\n",
1949a3845534SCraig Rodrigues 		    keg->uk_name ? keg->uk_name : "",
1950099a0e58SBosko Milekic 		    keg->uk_free, keg->uk_pages);
1951099a0e58SBosko Milekic 	}
1952e20a199fSJeff Roberson 	KEG_UNLOCK(keg);
1953099a0e58SBosko Milekic 
1954099a0e58SBosko Milekic 	hash_free(&keg->uk_hash);
1955099a0e58SBosko Milekic 
1956e20a199fSJeff Roberson 	KEG_LOCK_FINI(keg);
1957099a0e58SBosko Milekic }
1958099a0e58SBosko Milekic 
1959099a0e58SBosko Milekic /*
1960099a0e58SBosko Milekic  * Zone header dtor.
1961099a0e58SBosko Milekic  *
1962099a0e58SBosko Milekic  * Arguments/Returns follow uma_dtor specifications
1963099a0e58SBosko Milekic  *	udata  unused
1964099a0e58SBosko Milekic  */
19659c2cd7e5SJeff Roberson static void
19669c2cd7e5SJeff Roberson zone_dtor(void *arg, int size, void *udata)
19679c2cd7e5SJeff Roberson {
19689c2cd7e5SJeff Roberson 	uma_zone_t zone;
1969099a0e58SBosko Milekic 	uma_keg_t keg;
19709c2cd7e5SJeff Roberson 
19719c2cd7e5SJeff Roberson 	zone = (uma_zone_t)arg;
19729643769aSJeff Roberson 
1973e20a199fSJeff Roberson 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
19749643769aSJeff Roberson 		cache_drain(zone);
1975099a0e58SBosko Milekic 
1976111fbcd5SBryan Venteicher 	rw_wlock(&uma_rwlock);
1977099a0e58SBosko Milekic 	LIST_REMOVE(zone, uz_link);
1978111fbcd5SBryan Venteicher 	rw_wunlock(&uma_rwlock);
1979099a0e58SBosko Milekic 	/*
1980099a0e58SBosko Milekic 	 * XXX there are some races here where
1981099a0e58SBosko Milekic 	 * the zone can be drained but zone lock
1982099a0e58SBosko Milekic 	 * released and then refilled before we
1983099a0e58SBosko Milekic 	 * remove it... we dont care for now
1984099a0e58SBosko Milekic 	 */
1985*08cfa56eSMark Johnston 	zone_reclaim(zone, M_WAITOK, true);
1986e20a199fSJeff Roberson 	/*
1987323ad386STycho Nightingale 	 * We only destroy kegs from non secondary/non cache zones.
1988e20a199fSJeff Roberson 	 */
1989323ad386STycho Nightingale 	if ((zone->uz_flags & (UMA_ZONE_SECONDARY | UMA_ZFLAG_CACHE)) == 0) {
1990323ad386STycho Nightingale 		keg = zone->uz_keg;
1991111fbcd5SBryan Venteicher 		rw_wlock(&uma_rwlock);
1992099a0e58SBosko Milekic 		LIST_REMOVE(keg, uk_link);
1993111fbcd5SBryan Venteicher 		rw_wunlock(&uma_rwlock);
19940095a784SJeff Roberson 		zone_free_item(kegs, keg, NULL, SKIP_NONE);
19959c2cd7e5SJeff Roberson 	}
19962efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_allocs);
19972efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_frees);
19982efcc8cbSGleb Smirnoff 	counter_u64_free(zone->uz_fails);
1999bb15d1c7SGleb Smirnoff 	if (zone->uz_lockptr == &zone->uz_lock)
2000af526374SJeff Roberson 		ZONE_LOCK_FINI(zone);
2001099a0e58SBosko Milekic }
2002099a0e58SBosko Milekic 
20039c2cd7e5SJeff Roberson /*
20048355f576SJeff Roberson  * Traverses every zone in the system and calls a callback
20058355f576SJeff Roberson  *
20068355f576SJeff Roberson  * Arguments:
20078355f576SJeff Roberson  *	zfunc  A pointer to a function which accepts a zone
20088355f576SJeff Roberson  *		as an argument.
20098355f576SJeff Roberson  *
20108355f576SJeff Roberson  * Returns:
20118355f576SJeff Roberson  *	Nothing
20128355f576SJeff Roberson  */
20138355f576SJeff Roberson static void
20148355f576SJeff Roberson zone_foreach(void (*zfunc)(uma_zone_t))
20158355f576SJeff Roberson {
2016099a0e58SBosko Milekic 	uma_keg_t keg;
20178355f576SJeff Roberson 	uma_zone_t zone;
20188355f576SJeff Roberson 
20192efcc8cbSGleb Smirnoff 	/*
20202efcc8cbSGleb Smirnoff 	 * Before BOOT_RUNNING we are guaranteed to be single
20212efcc8cbSGleb Smirnoff 	 * threaded, so locking isn't needed. Startup functions
20222efcc8cbSGleb Smirnoff 	 * are allowed to use M_WAITOK.
20232efcc8cbSGleb Smirnoff 	 */
20242efcc8cbSGleb Smirnoff 	if (__predict_true(booted == BOOT_RUNNING))
2025111fbcd5SBryan Venteicher 		rw_rlock(&uma_rwlock);
2026099a0e58SBosko Milekic 	LIST_FOREACH(keg, &uma_kegs, uk_link) {
2027099a0e58SBosko Milekic 		LIST_FOREACH(zone, &keg->uk_zones, uz_link)
20288355f576SJeff Roberson 			zfunc(zone);
2029099a0e58SBosko Milekic 	}
20302efcc8cbSGleb Smirnoff 	if (__predict_true(booted == BOOT_RUNNING))
2031111fbcd5SBryan Venteicher 		rw_runlock(&uma_rwlock);
20328355f576SJeff Roberson }
20338355f576SJeff Roberson 
2034f4bef67cSGleb Smirnoff /*
2035f4bef67cSGleb Smirnoff  * Count how many pages do we need to bootstrap.  VM supplies
2036f4bef67cSGleb Smirnoff  * its need in early zones in the argument, we add up our zones,
2037f4bef67cSGleb Smirnoff  * which consist of: UMA Slabs, UMA Hash and 9 Bucket zones. The
2038f4bef67cSGleb Smirnoff  * zone of zones and zone of kegs are accounted separately.
2039f4bef67cSGleb Smirnoff  */
2040f4bef67cSGleb Smirnoff #define	UMA_BOOT_ZONES	11
20415073a083SGleb Smirnoff /* Zone of zones and zone of kegs have arbitrary alignment. */
20425073a083SGleb Smirnoff #define	UMA_BOOT_ALIGN	32
2043f4bef67cSGleb Smirnoff static int zsize, ksize;
2044f4bef67cSGleb Smirnoff int
2045f7d35785SGleb Smirnoff uma_startup_count(int vm_zones)
2046f4bef67cSGleb Smirnoff {
2047f7d35785SGleb Smirnoff 	int zones, pages;
2048f4bef67cSGleb Smirnoff 
2049f4bef67cSGleb Smirnoff 	ksize = sizeof(struct uma_keg) +
2050f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_domain) * vm_ndomains);
2051f4bef67cSGleb Smirnoff 	zsize = sizeof(struct uma_zone) +
2052f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_cache) * (mp_maxid + 1)) +
2053f4bef67cSGleb Smirnoff 	    (sizeof(struct uma_zone_domain) * vm_ndomains);
2054f4bef67cSGleb Smirnoff 
20555073a083SGleb Smirnoff 	/*
20565073a083SGleb Smirnoff 	 * Memory for the zone of kegs and its keg,
20575073a083SGleb Smirnoff 	 * and for zone of zones.
20585073a083SGleb Smirnoff 	 */
2059f4bef67cSGleb Smirnoff 	pages = howmany(roundup(zsize, CACHE_LINE_SIZE) * 2 +
2060f4bef67cSGleb Smirnoff 	    roundup(ksize, CACHE_LINE_SIZE), PAGE_SIZE);
2061f4bef67cSGleb Smirnoff 
2062f7d35785SGleb Smirnoff #ifdef	UMA_MD_SMALL_ALLOC
2063f7d35785SGleb Smirnoff 	zones = UMA_BOOT_ZONES;
2064f7d35785SGleb Smirnoff #else
2065f7d35785SGleb Smirnoff 	zones = UMA_BOOT_ZONES + vm_zones;
2066f7d35785SGleb Smirnoff 	vm_zones = 0;
2067f7d35785SGleb Smirnoff #endif
2068f4bef67cSGleb Smirnoff 
20695073a083SGleb Smirnoff 	/* Memory for the rest of startup zones, UMA and VM, ... */
20700b2e3aeaSGleb Smirnoff 	if (zsize > UMA_SLAB_SPACE) {
20710b2e3aeaSGleb Smirnoff 		/* See keg_large_init(). */
20720b2e3aeaSGleb Smirnoff 		u_int ppera;
20730b2e3aeaSGleb Smirnoff 
20740b2e3aeaSGleb Smirnoff 		ppera = howmany(roundup2(zsize, UMA_BOOT_ALIGN), PAGE_SIZE);
20750b2e3aeaSGleb Smirnoff 		if (PAGE_SIZE * ppera - roundup2(zsize, UMA_BOOT_ALIGN) <
20760b2e3aeaSGleb Smirnoff 		    SIZEOF_UMA_SLAB)
20770b2e3aeaSGleb Smirnoff 			ppera++;
20780b2e3aeaSGleb Smirnoff 		pages += (zones + vm_zones) * ppera;
20790b2e3aeaSGleb Smirnoff 	} else if (roundup2(zsize, UMA_BOOT_ALIGN) > UMA_SLAB_SPACE)
20800b2e3aeaSGleb Smirnoff 		/* See keg_small_init() special case for uk_ppera = 1. */
208196a10340SGleb Smirnoff 		pages += zones;
2082f4bef67cSGleb Smirnoff 	else
20835073a083SGleb Smirnoff 		pages += howmany(zones,
20845073a083SGleb Smirnoff 		    UMA_SLAB_SPACE / roundup2(zsize, UMA_BOOT_ALIGN));
2085f4bef67cSGleb Smirnoff 
20865073a083SGleb Smirnoff 	/* ... and their kegs. Note that zone of zones allocates a keg! */
20875073a083SGleb Smirnoff 	pages += howmany(zones + 1,
20885073a083SGleb Smirnoff 	    UMA_SLAB_SPACE / roundup2(ksize, UMA_BOOT_ALIGN));
2089f4bef67cSGleb Smirnoff 
2090f4bef67cSGleb Smirnoff 	/*
20915073a083SGleb Smirnoff 	 * Most of startup zones are not going to be offpages, that's
20925073a083SGleb Smirnoff 	 * why we use UMA_SLAB_SPACE instead of UMA_SLAB_SIZE in all
20935073a083SGleb Smirnoff 	 * calculations.  Some large bucket zones will be offpage, and
20945073a083SGleb Smirnoff 	 * thus will allocate hashes.  We take conservative approach
20955073a083SGleb Smirnoff 	 * and assume that all zones may allocate hash.  This may give
20965073a083SGleb Smirnoff 	 * us some positive inaccuracy, usually an extra single page.
2097f4bef67cSGleb Smirnoff 	 */
20985073a083SGleb Smirnoff 	pages += howmany(zones, UMA_SLAB_SPACE /
2099d2be4a1eSGleb Smirnoff 	    (sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT));
2100f4bef67cSGleb Smirnoff 
2101f4bef67cSGleb Smirnoff 	return (pages);
2102f4bef67cSGleb Smirnoff }
2103f4bef67cSGleb Smirnoff 
21048355f576SJeff Roberson void
2105ac0a6fd0SGleb Smirnoff uma_startup(void *mem, int npages)
21068355f576SJeff Roberson {
21078355f576SJeff Roberson 	struct uma_zctor_args args;
2108ab3185d1SJeff Roberson 	uma_keg_t masterkeg;
2109ab3185d1SJeff Roberson 	uintptr_t m;
2110f4bef67cSGleb Smirnoff 
2111f4bef67cSGleb Smirnoff #ifdef DIAGNOSTIC
2112f4bef67cSGleb Smirnoff 	printf("Entering %s with %d boot pages configured\n", __func__, npages);
2113f4bef67cSGleb Smirnoff #endif
21148355f576SJeff Roberson 
2115111fbcd5SBryan Venteicher 	rw_init(&uma_rwlock, "UMA lock");
2116099a0e58SBosko Milekic 
2117ab3185d1SJeff Roberson 	/* Use bootpages memory for the zone of zones and zone of kegs. */
2118ab3185d1SJeff Roberson 	m = (uintptr_t)mem;
2119ab3185d1SJeff Roberson 	zones = (uma_zone_t)m;
2120ab3185d1SJeff Roberson 	m += roundup(zsize, CACHE_LINE_SIZE);
2121ab3185d1SJeff Roberson 	kegs = (uma_zone_t)m;
2122ab3185d1SJeff Roberson 	m += roundup(zsize, CACHE_LINE_SIZE);
2123ab3185d1SJeff Roberson 	masterkeg = (uma_keg_t)m;
2124ab3185d1SJeff Roberson 	m += roundup(ksize, CACHE_LINE_SIZE);
2125ab3185d1SJeff Roberson 	m = roundup(m, PAGE_SIZE);
2126ab3185d1SJeff Roberson 	npages -= (m - (uintptr_t)mem) / PAGE_SIZE;
2127ab3185d1SJeff Roberson 	mem = (void *)m;
2128ab3185d1SJeff Roberson 
2129099a0e58SBosko Milekic 	/* "manually" create the initial zone */
21300095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
2131099a0e58SBosko Milekic 	args.name = "UMA Kegs";
2132ab3185d1SJeff Roberson 	args.size = ksize;
2133099a0e58SBosko Milekic 	args.ctor = keg_ctor;
2134099a0e58SBosko Milekic 	args.dtor = keg_dtor;
21358355f576SJeff Roberson 	args.uminit = zero_init;
21368355f576SJeff Roberson 	args.fini = NULL;
2137ab3185d1SJeff Roberson 	args.keg = masterkeg;
21385073a083SGleb Smirnoff 	args.align = UMA_BOOT_ALIGN - 1;
2139b60f5b79SJeff Roberson 	args.flags = UMA_ZFLAG_INTERNAL;
2140ab3185d1SJeff Roberson 	zone_ctor(kegs, zsize, &args, M_WAITOK);
21418355f576SJeff Roberson 
2142ac0a6fd0SGleb Smirnoff 	bootmem = mem;
2143ac0a6fd0SGleb Smirnoff 	boot_pages = npages;
21448355f576SJeff Roberson 
2145099a0e58SBosko Milekic 	args.name = "UMA Zones";
2146f4bef67cSGleb Smirnoff 	args.size = zsize;
2147099a0e58SBosko Milekic 	args.ctor = zone_ctor;
2148099a0e58SBosko Milekic 	args.dtor = zone_dtor;
2149099a0e58SBosko Milekic 	args.uminit = zero_init;
2150099a0e58SBosko Milekic 	args.fini = NULL;
2151099a0e58SBosko Milekic 	args.keg = NULL;
21525073a083SGleb Smirnoff 	args.align = UMA_BOOT_ALIGN - 1;
2153099a0e58SBosko Milekic 	args.flags = UMA_ZFLAG_INTERNAL;
2154ab3185d1SJeff Roberson 	zone_ctor(zones, zsize, &args, M_WAITOK);
2155099a0e58SBosko Milekic 
21568355f576SJeff Roberson 	/* Now make a zone for slab headers */
21578355f576SJeff Roberson 	slabzone = uma_zcreate("UMA Slabs",
2158ef72505eSJeff Roberson 				sizeof(struct uma_slab),
21598355f576SJeff Roberson 				NULL, NULL, NULL, NULL,
2160b60f5b79SJeff Roberson 				UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
21618355f576SJeff Roberson 
21628355f576SJeff Roberson 	hashzone = uma_zcreate("UMA Hash",
21638355f576SJeff Roberson 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
21648355f576SJeff Roberson 	    NULL, NULL, NULL, NULL,
2165b60f5b79SJeff Roberson 	    UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
21668355f576SJeff Roberson 
2167cae33c14SJeff Roberson 	bucket_init();
21688355f576SJeff Roberson 
2169f4bef67cSGleb Smirnoff 	booted = BOOT_STRAPPED;
21708355f576SJeff Roberson }
21718355f576SJeff Roberson 
2172f4bef67cSGleb Smirnoff void
2173f4bef67cSGleb Smirnoff uma_startup1(void)
2174f4bef67cSGleb Smirnoff {
2175f4bef67cSGleb Smirnoff 
2176f4bef67cSGleb Smirnoff #ifdef DIAGNOSTIC
2177f4bef67cSGleb Smirnoff 	printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
2178f4bef67cSGleb Smirnoff #endif
2179f4bef67cSGleb Smirnoff 	booted = BOOT_PAGEALLOC;
2180f4bef67cSGleb Smirnoff }
2181f4bef67cSGleb Smirnoff 
21828355f576SJeff Roberson void
218399571dc3SJeff Roberson uma_startup2(void)
21848355f576SJeff Roberson {
2185f4bef67cSGleb Smirnoff 
2186f7d35785SGleb Smirnoff #ifdef DIAGNOSTIC
2187f7d35785SGleb Smirnoff 	printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
2188f7d35785SGleb Smirnoff #endif
2189f4bef67cSGleb Smirnoff 	booted = BOOT_BUCKETS;
2190*08cfa56eSMark Johnston 	sx_init(&uma_reclaim_lock, "umareclaim");
2191f4bef67cSGleb Smirnoff 	bucket_enable();
21928355f576SJeff Roberson }
21938355f576SJeff Roberson 
21948355f576SJeff Roberson /*
21958355f576SJeff Roberson  * Initialize our callout handle
21968355f576SJeff Roberson  *
21978355f576SJeff Roberson  */
21988355f576SJeff Roberson static void
21998355f576SJeff Roberson uma_startup3(void)
22008355f576SJeff Roberson {
22011431a748SGleb Smirnoff 
2202c5deaf04SGleb Smirnoff #ifdef INVARIANTS
2203c5deaf04SGleb Smirnoff 	TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor);
2204c5deaf04SGleb Smirnoff 	uma_dbg_cnt = counter_u64_alloc(M_WAITOK);
2205c5deaf04SGleb Smirnoff 	uma_skip_cnt = counter_u64_alloc(M_WAITOK);
2206c5deaf04SGleb Smirnoff #endif
22072efcc8cbSGleb Smirnoff 	zone_foreach(zone_alloc_counters);
2208fd90e2edSJung-uk Kim 	callout_init(&uma_callout, 1);
22099643769aSJeff Roberson 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
2210c5deaf04SGleb Smirnoff 	booted = BOOT_RUNNING;
22118355f576SJeff Roberson }
22128355f576SJeff Roberson 
2213e20a199fSJeff Roberson static uma_keg_t
2214099a0e58SBosko Milekic uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
221585dcf349SGleb Smirnoff 		int align, uint32_t flags)
2216099a0e58SBosko Milekic {
2217099a0e58SBosko Milekic 	struct uma_kctor_args args;
2218099a0e58SBosko Milekic 
2219099a0e58SBosko Milekic 	args.size = size;
2220099a0e58SBosko Milekic 	args.uminit = uminit;
2221099a0e58SBosko Milekic 	args.fini = fini;
22221e319f6dSRobert Watson 	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
2223099a0e58SBosko Milekic 	args.flags = flags;
2224099a0e58SBosko Milekic 	args.zone = zone;
2225ab3185d1SJeff Roberson 	return (zone_alloc_item(kegs, &args, UMA_ANYDOMAIN, M_WAITOK));
2226099a0e58SBosko Milekic }
2227099a0e58SBosko Milekic 
2228f4bef67cSGleb Smirnoff /* Public functions */
22298355f576SJeff Roberson /* See uma.h */
22301e319f6dSRobert Watson void
22311e319f6dSRobert Watson uma_set_align(int align)
22321e319f6dSRobert Watson {
22331e319f6dSRobert Watson 
22341e319f6dSRobert Watson 	if (align != UMA_ALIGN_CACHE)
22351e319f6dSRobert Watson 		uma_align_cache = align;
22361e319f6dSRobert Watson }
22371e319f6dSRobert Watson 
22381e319f6dSRobert Watson /* See uma.h */
22398355f576SJeff Roberson uma_zone_t
2240bb196eb4SMatthew D Fleming uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
224185dcf349SGleb Smirnoff 		uma_init uminit, uma_fini fini, int align, uint32_t flags)
22428355f576SJeff Roberson 
22438355f576SJeff Roberson {
22448355f576SJeff Roberson 	struct uma_zctor_args args;
224595c4bf75SKonstantin Belousov 	uma_zone_t res;
224695c4bf75SKonstantin Belousov 	bool locked;
22478355f576SJeff Roberson 
2248a5a35578SJohn Baldwin 	KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"",
2249a5a35578SJohn Baldwin 	    align, name));
2250a5a35578SJohn Baldwin 
2251c1685086SJeff Roberson 	/* Sets all zones to a first-touch domain policy. */
2252c1685086SJeff Roberson #ifdef UMA_FIRSTTOUCH
2253c1685086SJeff Roberson 	flags |= UMA_ZONE_NUMA;
2254c1685086SJeff Roberson #endif
2255c1685086SJeff Roberson 
22568355f576SJeff Roberson 	/* This stuff is essential for the zone ctor */
22570095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
22588355f576SJeff Roberson 	args.name = name;
22598355f576SJeff Roberson 	args.size = size;
22608355f576SJeff Roberson 	args.ctor = ctor;
22618355f576SJeff Roberson 	args.dtor = dtor;
22628355f576SJeff Roberson 	args.uminit = uminit;
22638355f576SJeff Roberson 	args.fini = fini;
2264afc6dc36SJohn-Mark Gurney #ifdef  INVARIANTS
2265afc6dc36SJohn-Mark Gurney 	/*
2266afc6dc36SJohn-Mark Gurney 	 * If a zone is being created with an empty constructor and
2267afc6dc36SJohn-Mark Gurney 	 * destructor, pass UMA constructor/destructor which checks for
2268afc6dc36SJohn-Mark Gurney 	 * memory use after free.
2269afc6dc36SJohn-Mark Gurney 	 */
227019c591bfSMateusz Guzik 	if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) &&
227119c591bfSMateusz Guzik 	    ctor == NULL && dtor == NULL && uminit == NULL && fini == NULL) {
2272afc6dc36SJohn-Mark Gurney 		args.ctor = trash_ctor;
2273afc6dc36SJohn-Mark Gurney 		args.dtor = trash_dtor;
2274afc6dc36SJohn-Mark Gurney 		args.uminit = trash_init;
2275afc6dc36SJohn-Mark Gurney 		args.fini = trash_fini;
2276afc6dc36SJohn-Mark Gurney 	}
2277afc6dc36SJohn-Mark Gurney #endif
22788355f576SJeff Roberson 	args.align = align;
22798355f576SJeff Roberson 	args.flags = flags;
2280099a0e58SBosko Milekic 	args.keg = NULL;
2281099a0e58SBosko Milekic 
2282f4bef67cSGleb Smirnoff 	if (booted < BOOT_BUCKETS) {
228395c4bf75SKonstantin Belousov 		locked = false;
228495c4bf75SKonstantin Belousov 	} else {
2285*08cfa56eSMark Johnston 		sx_slock(&uma_reclaim_lock);
228695c4bf75SKonstantin Belousov 		locked = true;
228795c4bf75SKonstantin Belousov 	}
2288ab3185d1SJeff Roberson 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
228995c4bf75SKonstantin Belousov 	if (locked)
2290*08cfa56eSMark Johnston 		sx_sunlock(&uma_reclaim_lock);
229195c4bf75SKonstantin Belousov 	return (res);
2292099a0e58SBosko Milekic }
2293099a0e58SBosko Milekic 
2294099a0e58SBosko Milekic /* See uma.h */
2295099a0e58SBosko Milekic uma_zone_t
2296099a0e58SBosko Milekic uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
2297099a0e58SBosko Milekic 		    uma_init zinit, uma_fini zfini, uma_zone_t master)
2298099a0e58SBosko Milekic {
2299099a0e58SBosko Milekic 	struct uma_zctor_args args;
2300e20a199fSJeff Roberson 	uma_keg_t keg;
230195c4bf75SKonstantin Belousov 	uma_zone_t res;
230295c4bf75SKonstantin Belousov 	bool locked;
2303099a0e58SBosko Milekic 
2304bb15d1c7SGleb Smirnoff 	keg = master->uz_keg;
23050095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
2306099a0e58SBosko Milekic 	args.name = name;
2307e20a199fSJeff Roberson 	args.size = keg->uk_size;
2308099a0e58SBosko Milekic 	args.ctor = ctor;
2309099a0e58SBosko Milekic 	args.dtor = dtor;
2310099a0e58SBosko Milekic 	args.uminit = zinit;
2311099a0e58SBosko Milekic 	args.fini = zfini;
2312e20a199fSJeff Roberson 	args.align = keg->uk_align;
2313e20a199fSJeff Roberson 	args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
2314e20a199fSJeff Roberson 	args.keg = keg;
23158355f576SJeff Roberson 
2316f4bef67cSGleb Smirnoff 	if (booted < BOOT_BUCKETS) {
231795c4bf75SKonstantin Belousov 		locked = false;
231895c4bf75SKonstantin Belousov 	} else {
2319*08cfa56eSMark Johnston 		sx_slock(&uma_reclaim_lock);
232095c4bf75SKonstantin Belousov 		locked = true;
232195c4bf75SKonstantin Belousov 	}
2322e20a199fSJeff Roberson 	/* XXX Attaches only one keg of potentially many. */
2323ab3185d1SJeff Roberson 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
232495c4bf75SKonstantin Belousov 	if (locked)
2325*08cfa56eSMark Johnston 		sx_sunlock(&uma_reclaim_lock);
232695c4bf75SKonstantin Belousov 	return (res);
23278355f576SJeff Roberson }
23288355f576SJeff Roberson 
23290095a784SJeff Roberson /* See uma.h */
23300095a784SJeff Roberson uma_zone_t
2331af526374SJeff Roberson uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
2332af526374SJeff Roberson 		    uma_init zinit, uma_fini zfini, uma_import zimport,
2333af526374SJeff Roberson 		    uma_release zrelease, void *arg, int flags)
23340095a784SJeff Roberson {
23350095a784SJeff Roberson 	struct uma_zctor_args args;
23360095a784SJeff Roberson 
23370095a784SJeff Roberson 	memset(&args, 0, sizeof(args));
23380095a784SJeff Roberson 	args.name = name;
2339af526374SJeff Roberson 	args.size = size;
23400095a784SJeff Roberson 	args.ctor = ctor;
23410095a784SJeff Roberson 	args.dtor = dtor;
23420095a784SJeff Roberson 	args.uminit = zinit;
23430095a784SJeff Roberson 	args.fini = zfini;
23440095a784SJeff Roberson 	args.import = zimport;
23450095a784SJeff Roberson 	args.release = zrelease;
23460095a784SJeff Roberson 	args.arg = arg;
23470095a784SJeff Roberson 	args.align = 0;
2348bb15d1c7SGleb Smirnoff 	args.flags = flags | UMA_ZFLAG_CACHE;
23490095a784SJeff Roberson 
2350ab3185d1SJeff Roberson 	return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
23510095a784SJeff Roberson }
23520095a784SJeff Roberson 
23538355f576SJeff Roberson /* See uma.h */
23549c2cd7e5SJeff Roberson void
23559c2cd7e5SJeff Roberson uma_zdestroy(uma_zone_t zone)
23569c2cd7e5SJeff Roberson {
2357f4ff923bSRobert Watson 
2358*08cfa56eSMark Johnston 	sx_slock(&uma_reclaim_lock);
23590095a784SJeff Roberson 	zone_free_item(zones, zone, NULL, SKIP_NONE);
2360*08cfa56eSMark Johnston 	sx_sunlock(&uma_reclaim_lock);
23619c2cd7e5SJeff Roberson }
23629c2cd7e5SJeff Roberson 
23638d6fbbb8SJeff Roberson void
23648d6fbbb8SJeff Roberson uma_zwait(uma_zone_t zone)
23658d6fbbb8SJeff Roberson {
23668d6fbbb8SJeff Roberson 	void *item;
23678d6fbbb8SJeff Roberson 
23688d6fbbb8SJeff Roberson 	item = uma_zalloc_arg(zone, NULL, M_WAITOK);
23698d6fbbb8SJeff Roberson 	uma_zfree(zone, item);
23708d6fbbb8SJeff Roberson }
23718d6fbbb8SJeff Roberson 
23724e180881SMateusz Guzik void *
23734e180881SMateusz Guzik uma_zalloc_pcpu_arg(uma_zone_t zone, void *udata, int flags)
23744e180881SMateusz Guzik {
23754e180881SMateusz Guzik 	void *item;
2376b4799947SRuslan Bukin #ifdef SMP
23774e180881SMateusz Guzik 	int i;
23784e180881SMateusz Guzik 
23794e180881SMateusz Guzik 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
2380b4799947SRuslan Bukin #endif
23814e180881SMateusz Guzik 	item = uma_zalloc_arg(zone, udata, flags & ~M_ZERO);
23824e180881SMateusz Guzik 	if (item != NULL && (flags & M_ZERO)) {
2383b4799947SRuslan Bukin #ifdef SMP
2384013072f0SMark Johnston 		for (i = 0; i <= mp_maxid; i++)
23854e180881SMateusz Guzik 			bzero(zpcpu_get_cpu(item, i), zone->uz_size);
2386b4799947SRuslan Bukin #else
2387b4799947SRuslan Bukin 		bzero(item, zone->uz_size);
2388b4799947SRuslan Bukin #endif
23894e180881SMateusz Guzik 	}
23904e180881SMateusz Guzik 	return (item);
23914e180881SMateusz Guzik }
23924e180881SMateusz Guzik 
23934e180881SMateusz Guzik /*
23944e180881SMateusz Guzik  * A stub while both regular and pcpu cases are identical.
23954e180881SMateusz Guzik  */
23964e180881SMateusz Guzik void
23974e180881SMateusz Guzik uma_zfree_pcpu_arg(uma_zone_t zone, void *item, void *udata)
23984e180881SMateusz Guzik {
23994e180881SMateusz Guzik 
2400c5b7751fSIan Lepore #ifdef SMP
24014e180881SMateusz Guzik 	MPASS(zone->uz_flags & UMA_ZONE_PCPU);
2402c5b7751fSIan Lepore #endif
24034e180881SMateusz Guzik 	uma_zfree_arg(zone, item, udata);
24044e180881SMateusz Guzik }
24054e180881SMateusz Guzik 
24069c2cd7e5SJeff Roberson /* See uma.h */
24078355f576SJeff Roberson void *
24082cc35ff9SJeff Roberson uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
24098355f576SJeff Roberson {
2410ab3185d1SJeff Roberson 	uma_zone_domain_t zdom;
24118355f576SJeff Roberson 	uma_bucket_t bucket;
2412ab3185d1SJeff Roberson 	uma_cache_t cache;
2413ab3185d1SJeff Roberson 	void *item;
2414bb15d1c7SGleb Smirnoff 	int cpu, domain, lockfail, maxbucket;
2415c5deaf04SGleb Smirnoff #ifdef INVARIANTS
2416c5deaf04SGleb Smirnoff 	bool skipdbg;
2417c5deaf04SGleb Smirnoff #endif
24188355f576SJeff Roberson 
2419e866d8f0SMark Murray 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
242019fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
242110cb2424SMark Murray 
24228355f576SJeff Roberson 	/* This is the fast path allocation */
24231431a748SGleb Smirnoff 	CTR4(KTR_UMA, "uma_zalloc_arg thread %x zone %s(%p) flags %d",
24241431a748SGleb Smirnoff 	    curthread, zone->uz_name, zone, flags);
2425a553d4b8SJeff Roberson 
2426635fd505SRobert Watson 	if (flags & M_WAITOK) {
2427b23f72e9SBrian Feldman 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2428635fd505SRobert Watson 		    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
24294c1cc01cSJohn Baldwin 	}
24300766f278SJonathan T. Looney 	KASSERT((flags & M_EXEC) == 0, ("uma_zalloc_arg: called with M_EXEC"));
2431d9e2e68dSMark Johnston 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
24321067a2baSJonathan T. Looney 	    ("uma_zalloc_arg: called with spinlock or critical section held"));
2433ea99223eSMateusz Guzik 	if (zone->uz_flags & UMA_ZONE_PCPU)
2434b8af2820SMateusz Guzik 		KASSERT((flags & M_ZERO) == 0, ("allocating from a pcpu zone "
2435b8af2820SMateusz Guzik 		    "with M_ZERO passed"));
24361067a2baSJonathan T. Looney 
24378d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
24388d689e04SGleb Smirnoff 	if (memguard_cmp_zone(zone)) {
24398d689e04SGleb Smirnoff 		item = memguard_alloc(zone->uz_size, flags);
24408d689e04SGleb Smirnoff 		if (item != NULL) {
24418d689e04SGleb Smirnoff 			if (zone->uz_init != NULL &&
24428d689e04SGleb Smirnoff 			    zone->uz_init(item, zone->uz_size, flags) != 0)
24438d689e04SGleb Smirnoff 				return (NULL);
24448d689e04SGleb Smirnoff 			if (zone->uz_ctor != NULL &&
2445fc03d22bSJeff Roberson 			    zone->uz_ctor(item, zone->uz_size, udata,
2446fc03d22bSJeff Roberson 			    flags) != 0) {
24478d689e04SGleb Smirnoff 			    	zone->uz_fini(item, zone->uz_size);
24488d689e04SGleb Smirnoff 				return (NULL);
24498d689e04SGleb Smirnoff 			}
24508d689e04SGleb Smirnoff 			return (item);
24518d689e04SGleb Smirnoff 		}
24528d689e04SGleb Smirnoff 		/* This is unfortunate but should not be fatal. */
24538d689e04SGleb Smirnoff 	}
24548d689e04SGleb Smirnoff #endif
24555d1ae027SRobert Watson 	/*
24565d1ae027SRobert Watson 	 * If possible, allocate from the per-CPU cache.  There are two
24575d1ae027SRobert Watson 	 * requirements for safe access to the per-CPU cache: (1) the thread
24585d1ae027SRobert Watson 	 * accessing the cache must not be preempted or yield during access,
24595d1ae027SRobert Watson 	 * and (2) the thread must not migrate CPUs without switching which
24605d1ae027SRobert Watson 	 * cache it accesses.  We rely on a critical section to prevent
24615d1ae027SRobert Watson 	 * preemption and migration.  We release the critical section in
24625d1ae027SRobert Watson 	 * order to acquire the zone mutex if we are unable to allocate from
24635d1ae027SRobert Watson 	 * the current cache; when we re-acquire the critical section, we
24645d1ae027SRobert Watson 	 * must detect and handle migration if it has occurred.
24655d1ae027SRobert Watson 	 */
246681c0d72cSGleb Smirnoff zalloc_restart:
24675d1ae027SRobert Watson 	critical_enter();
24685d1ae027SRobert Watson 	cpu = curcpu;
24698355f576SJeff Roberson 	cache = &zone->uz_cpu[cpu];
24708355f576SJeff Roberson 
24718355f576SJeff Roberson zalloc_start:
24728355f576SJeff Roberson 	bucket = cache->uc_allocbucket;
2473fc03d22bSJeff Roberson 	if (bucket != NULL && bucket->ub_cnt > 0) {
2474cae33c14SJeff Roberson 		bucket->ub_cnt--;
2475cae33c14SJeff Roberson 		item = bucket->ub_bucket[bucket->ub_cnt];
24768355f576SJeff Roberson #ifdef INVARIANTS
2477cae33c14SJeff Roberson 		bucket->ub_bucket[bucket->ub_cnt] = NULL;
24788355f576SJeff Roberson #endif
2479fc03d22bSJeff Roberson 		KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
24808355f576SJeff Roberson 		cache->uc_allocs++;
24815d1ae027SRobert Watson 		critical_exit();
2482c5deaf04SGleb Smirnoff #ifdef INVARIANTS
2483c5deaf04SGleb Smirnoff 		skipdbg = uma_dbg_zskip(zone, item);
2484c5deaf04SGleb Smirnoff #endif
2485fc03d22bSJeff Roberson 		if (zone->uz_ctor != NULL &&
2486c5deaf04SGleb Smirnoff #ifdef INVARIANTS
2487c5deaf04SGleb Smirnoff 		    (!skipdbg || zone->uz_ctor != trash_ctor ||
2488c5deaf04SGleb Smirnoff 		    zone->uz_dtor != trash_dtor) &&
2489c5deaf04SGleb Smirnoff #endif
2490fc03d22bSJeff Roberson 		    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
24912efcc8cbSGleb Smirnoff 			counter_u64_add(zone->uz_fails, 1);
2492bb15d1c7SGleb Smirnoff 			zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT);
2493b23f72e9SBrian Feldman 			return (NULL);
2494b23f72e9SBrian Feldman 		}
2495ef72505eSJeff Roberson #ifdef INVARIANTS
2496c5deaf04SGleb Smirnoff 		if (!skipdbg)
2497ef72505eSJeff Roberson 			uma_dbg_alloc(zone, NULL, item);
2498ef72505eSJeff Roberson #endif
24992cc35ff9SJeff Roberson 		if (flags & M_ZERO)
250048343a2fSGleb Smirnoff 			uma_zero_item(item, zone);
25018355f576SJeff Roberson 		return (item);
2502fc03d22bSJeff Roberson 	}
2503fc03d22bSJeff Roberson 
25048355f576SJeff Roberson 	/*
25058355f576SJeff Roberson 	 * We have run out of items in our alloc bucket.
25068355f576SJeff Roberson 	 * See if we can switch with our free bucket.
25078355f576SJeff Roberson 	 */
2508b983089aSJeff Roberson 	bucket = cache->uc_freebucket;
2509fc03d22bSJeff Roberson 	if (bucket != NULL && bucket->ub_cnt > 0) {
25101431a748SGleb Smirnoff 		CTR2(KTR_UMA,
25111431a748SGleb Smirnoff 		    "uma_zalloc: zone %s(%p) swapping empty with alloc",
25121431a748SGleb Smirnoff 		    zone->uz_name, zone);
25138355f576SJeff Roberson 		cache->uc_freebucket = cache->uc_allocbucket;
2514b983089aSJeff Roberson 		cache->uc_allocbucket = bucket;
25158355f576SJeff Roberson 		goto zalloc_start;
25168355f576SJeff Roberson 	}
2517fc03d22bSJeff Roberson 
2518fc03d22bSJeff Roberson 	/*
2519fc03d22bSJeff Roberson 	 * Discard any empty allocation bucket while we hold no locks.
2520fc03d22bSJeff Roberson 	 */
2521fc03d22bSJeff Roberson 	bucket = cache->uc_allocbucket;
2522fc03d22bSJeff Roberson 	cache->uc_allocbucket = NULL;
2523fc03d22bSJeff Roberson 	critical_exit();
2524fc03d22bSJeff Roberson 	if (bucket != NULL)
25256fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
2526fc03d22bSJeff Roberson 
2527fc03d22bSJeff Roberson 	/* Short-circuit for zones without buckets and low memory. */
2528bb15d1c7SGleb Smirnoff 	if (zone->uz_count == 0 || bucketdisable) {
2529bb15d1c7SGleb Smirnoff 		ZONE_LOCK(zone);
2530c1685086SJeff Roberson 		if (zone->uz_flags & UMA_ZONE_NUMA)
2531c1685086SJeff Roberson 			domain = PCPU_GET(domain);
2532c1685086SJeff Roberson 		else
2533c1685086SJeff Roberson 			domain = UMA_ANYDOMAIN;
2534fc03d22bSJeff Roberson 		goto zalloc_item;
2535bb15d1c7SGleb Smirnoff 	}
2536fc03d22bSJeff Roberson 
25375d1ae027SRobert Watson 	/*
25385d1ae027SRobert Watson 	 * Attempt to retrieve the item from the per-CPU cache has failed, so
25395d1ae027SRobert Watson 	 * we must go back to the zone.  This requires the zone lock, so we
25405d1ae027SRobert Watson 	 * must drop the critical section, then re-acquire it when we go back
25415d1ae027SRobert Watson 	 * to the cache.  Since the critical section is released, we may be
25425d1ae027SRobert Watson 	 * preempted or migrate.  As such, make sure not to maintain any
25435d1ae027SRobert Watson 	 * thread-local state specific to the cache from prior to releasing
25445d1ae027SRobert Watson 	 * the critical section.
25455d1ae027SRobert Watson 	 */
2546fc03d22bSJeff Roberson 	lockfail = 0;
2547fc03d22bSJeff Roberson 	if (ZONE_TRYLOCK(zone) == 0) {
2548fc03d22bSJeff Roberson 		/* Record contention to size the buckets. */
2549a553d4b8SJeff Roberson 		ZONE_LOCK(zone);
2550fc03d22bSJeff Roberson 		lockfail = 1;
2551fc03d22bSJeff Roberson 	}
25525d1ae027SRobert Watson 	critical_enter();
25535d1ae027SRobert Watson 	cpu = curcpu;
25545d1ae027SRobert Watson 	cache = &zone->uz_cpu[cpu];
25555d1ae027SRobert Watson 
2556fc03d22bSJeff Roberson 	/* See if we lost the race to fill the cache. */
2557fc03d22bSJeff Roberson 	if (cache->uc_allocbucket != NULL) {
2558fc03d22bSJeff Roberson 		ZONE_UNLOCK(zone);
2559fc03d22bSJeff Roberson 		goto zalloc_start;
2560a553d4b8SJeff Roberson 	}
25618355f576SJeff Roberson 
2562fc03d22bSJeff Roberson 	/*
2563fc03d22bSJeff Roberson 	 * Check the zone's cache of buckets.
2564fc03d22bSJeff Roberson 	 */
2565c1685086SJeff Roberson 	if (zone->uz_flags & UMA_ZONE_NUMA) {
2566c1685086SJeff Roberson 		domain = PCPU_GET(domain);
2567ab3185d1SJeff Roberson 		zdom = &zone->uz_domain[domain];
2568c1685086SJeff Roberson 	} else {
2569c1685086SJeff Roberson 		domain = UMA_ANYDOMAIN;
2570c1685086SJeff Roberson 		zdom = &zone->uz_domain[0];
2571c1685086SJeff Roberson 	}
2572c1685086SJeff Roberson 
2573*08cfa56eSMark Johnston 	if ((bucket = zone_fetch_bucket(zone, zdom)) != NULL) {
2574cae33c14SJeff Roberson 		KASSERT(bucket->ub_cnt != 0,
2575a553d4b8SJeff Roberson 		    ("uma_zalloc_arg: Returning an empty bucket."));
2576a553d4b8SJeff Roberson 		cache->uc_allocbucket = bucket;
2577a553d4b8SJeff Roberson 		ZONE_UNLOCK(zone);
25788355f576SJeff Roberson 		goto zalloc_start;
2579a553d4b8SJeff Roberson 	}
25805d1ae027SRobert Watson 	/* We are no longer associated with this CPU. */
25815d1ae027SRobert Watson 	critical_exit();
2582bbee39c6SJeff Roberson 
2583fc03d22bSJeff Roberson 	/*
2584fc03d22bSJeff Roberson 	 * We bump the uz count when the cache size is insufficient to
2585fc03d22bSJeff Roberson 	 * handle the working set.
2586fc03d22bSJeff Roberson 	 */
2587bb15d1c7SGleb Smirnoff 	if (lockfail && zone->uz_count < zone->uz_count_max)
2588a553d4b8SJeff Roberson 		zone->uz_count++;
2589bb15d1c7SGleb Smirnoff 
2590bb15d1c7SGleb Smirnoff 	if (zone->uz_max_items > 0) {
2591bb15d1c7SGleb Smirnoff 		if (zone->uz_items >= zone->uz_max_items)
2592bb15d1c7SGleb Smirnoff 			goto zalloc_item;
2593bb15d1c7SGleb Smirnoff 		maxbucket = MIN(zone->uz_count,
2594bb15d1c7SGleb Smirnoff 		    zone->uz_max_items - zone->uz_items);
2595bb45b411SGleb Smirnoff 		zone->uz_items += maxbucket;
2596bb15d1c7SGleb Smirnoff 	} else
2597bb15d1c7SGleb Smirnoff 		maxbucket = zone->uz_count;
2598fc03d22bSJeff Roberson 	ZONE_UNLOCK(zone);
2599099a0e58SBosko Milekic 
26008355f576SJeff Roberson 	/*
2601a553d4b8SJeff Roberson 	 * Now lets just fill a bucket and put it on the free list.  If that
2602763df3ecSPedro F. Giffuni 	 * works we'll restart the allocation from the beginning and it
2603fc03d22bSJeff Roberson 	 * will use the just filled bucket.
2604bbee39c6SJeff Roberson 	 */
2605bb15d1c7SGleb Smirnoff 	bucket = zone_alloc_bucket(zone, udata, domain, flags, maxbucket);
26061431a748SGleb Smirnoff 	CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
26071431a748SGleb Smirnoff 	    zone->uz_name, zone, bucket);
2608fc03d22bSJeff Roberson 	ZONE_LOCK(zone);
2609bb15d1c7SGleb Smirnoff 	if (bucket != NULL) {
2610bb45b411SGleb Smirnoff 		if (zone->uz_max_items > 0 && bucket->ub_cnt < maxbucket) {
2611bb45b411SGleb Smirnoff 			MPASS(zone->uz_items >= maxbucket - bucket->ub_cnt);
2612bb15d1c7SGleb Smirnoff 			zone->uz_items -= maxbucket - bucket->ub_cnt;
2613bb15d1c7SGleb Smirnoff 			if (zone->uz_sleepers > 0 &&
2614bb15d1c7SGleb Smirnoff 			    zone->uz_items < zone->uz_max_items)
2615bb15d1c7SGleb Smirnoff 				wakeup_one(zone);
2616bb15d1c7SGleb Smirnoff 		}
2617fc03d22bSJeff Roberson 		critical_enter();
2618fc03d22bSJeff Roberson 		cpu = curcpu;
2619fc03d22bSJeff Roberson 		cache = &zone->uz_cpu[cpu];
26200f9b7bf3SMark Johnston 
2621fc03d22bSJeff Roberson 		/*
2622fc03d22bSJeff Roberson 		 * See if we lost the race or were migrated.  Cache the
2623fc03d22bSJeff Roberson 		 * initialized bucket to make this less likely or claim
2624fc03d22bSJeff Roberson 		 * the memory directly.
2625fc03d22bSJeff Roberson 		 */
262681c0d72cSGleb Smirnoff 		if (cache->uc_allocbucket == NULL &&
262781c0d72cSGleb Smirnoff 		    ((zone->uz_flags & UMA_ZONE_NUMA) == 0 ||
262881c0d72cSGleb Smirnoff 		    domain == PCPU_GET(domain))) {
2629ab3185d1SJeff Roberson 			cache->uc_allocbucket = bucket;
26300f9b7bf3SMark Johnston 			zdom->uzd_imax += bucket->ub_cnt;
2631bb15d1c7SGleb Smirnoff 		} else if (zone->uz_bkt_count >= zone->uz_bkt_max) {
263281c0d72cSGleb Smirnoff 			critical_exit();
263381c0d72cSGleb Smirnoff 			ZONE_UNLOCK(zone);
263481c0d72cSGleb Smirnoff 			bucket_drain(zone, bucket);
263581c0d72cSGleb Smirnoff 			bucket_free(zone, bucket, udata);
263681c0d72cSGleb Smirnoff 			goto zalloc_restart;
263781c0d72cSGleb Smirnoff 		} else
26380f9b7bf3SMark Johnston 			zone_put_bucket(zone, zdom, bucket, false);
2639bbee39c6SJeff Roberson 		ZONE_UNLOCK(zone);
2640fc03d22bSJeff Roberson 		goto zalloc_start;
2641bb45b411SGleb Smirnoff 	} else if (zone->uz_max_items > 0) {
2642bb15d1c7SGleb Smirnoff 		zone->uz_items -= maxbucket;
2643bb15d1c7SGleb Smirnoff 		if (zone->uz_sleepers > 0 &&
2644bb15d1c7SGleb Smirnoff 		    zone->uz_items + 1 < zone->uz_max_items)
2645bb15d1c7SGleb Smirnoff 			wakeup_one(zone);
2646bbee39c6SJeff Roberson 	}
2647fc03d22bSJeff Roberson 
2648bbee39c6SJeff Roberson 	/*
2649bbee39c6SJeff Roberson 	 * We may not be able to get a bucket so return an actual item.
2650bbee39c6SJeff Roberson 	 */
2651fc03d22bSJeff Roberson zalloc_item:
2652bb15d1c7SGleb Smirnoff 	item = zone_alloc_item_locked(zone, udata, domain, flags);
2653fc03d22bSJeff Roberson 
2654e20a199fSJeff Roberson 	return (item);
2655bbee39c6SJeff Roberson }
2656bbee39c6SJeff Roberson 
2657ab3185d1SJeff Roberson void *
2658ab3185d1SJeff Roberson uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags)
2659bbee39c6SJeff Roberson {
2660ab3185d1SJeff Roberson 
2661ab3185d1SJeff Roberson 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
266219fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
2663ab3185d1SJeff Roberson 
2664ab3185d1SJeff Roberson 	/* This is the fast path allocation */
2665ab3185d1SJeff Roberson 	CTR5(KTR_UMA,
2666ab3185d1SJeff Roberson 	    "uma_zalloc_domain thread %x zone %s(%p) domain %d flags %d",
2667ab3185d1SJeff Roberson 	    curthread, zone->uz_name, zone, domain, flags);
2668ab3185d1SJeff Roberson 
2669ab3185d1SJeff Roberson 	if (flags & M_WAITOK) {
2670ab3185d1SJeff Roberson 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2671ab3185d1SJeff Roberson 		    "uma_zalloc_domain: zone \"%s\"", zone->uz_name);
2672ab3185d1SJeff Roberson 	}
2673ab3185d1SJeff Roberson 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
2674ab3185d1SJeff Roberson 	    ("uma_zalloc_domain: called with spinlock or critical section held"));
2675ab3185d1SJeff Roberson 
2676ab3185d1SJeff Roberson 	return (zone_alloc_item(zone, udata, domain, flags));
2677ab3185d1SJeff Roberson }
2678ab3185d1SJeff Roberson 
2679ab3185d1SJeff Roberson /*
2680ab3185d1SJeff Roberson  * Find a slab with some space.  Prefer slabs that are partially used over those
2681ab3185d1SJeff Roberson  * that are totally full.  This helps to reduce fragmentation.
2682ab3185d1SJeff Roberson  *
2683ab3185d1SJeff Roberson  * If 'rr' is 1, search all domains starting from 'domain'.  Otherwise check
2684ab3185d1SJeff Roberson  * only 'domain'.
2685ab3185d1SJeff Roberson  */
2686ab3185d1SJeff Roberson static uma_slab_t
2687194a979eSMark Johnston keg_first_slab(uma_keg_t keg, int domain, bool rr)
2688ab3185d1SJeff Roberson {
2689ab3185d1SJeff Roberson 	uma_domain_t dom;
2690bbee39c6SJeff Roberson 	uma_slab_t slab;
2691ab3185d1SJeff Roberson 	int start;
2692ab3185d1SJeff Roberson 
2693ab3185d1SJeff Roberson 	KASSERT(domain >= 0 && domain < vm_ndomains,
2694ab3185d1SJeff Roberson 	    ("keg_first_slab: domain %d out of range", domain));
2695bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
2696ab3185d1SJeff Roberson 
2697ab3185d1SJeff Roberson 	slab = NULL;
2698ab3185d1SJeff Roberson 	start = domain;
2699ab3185d1SJeff Roberson 	do {
2700ab3185d1SJeff Roberson 		dom = &keg->uk_domain[domain];
2701ab3185d1SJeff Roberson 		if (!LIST_EMPTY(&dom->ud_part_slab))
2702ab3185d1SJeff Roberson 			return (LIST_FIRST(&dom->ud_part_slab));
2703ab3185d1SJeff Roberson 		if (!LIST_EMPTY(&dom->ud_free_slab)) {
2704ab3185d1SJeff Roberson 			slab = LIST_FIRST(&dom->ud_free_slab);
2705ab3185d1SJeff Roberson 			LIST_REMOVE(slab, us_link);
2706ab3185d1SJeff Roberson 			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
2707ab3185d1SJeff Roberson 			return (slab);
2708ab3185d1SJeff Roberson 		}
2709ab3185d1SJeff Roberson 		if (rr)
2710ab3185d1SJeff Roberson 			domain = (domain + 1) % vm_ndomains;
2711ab3185d1SJeff Roberson 	} while (domain != start);
2712ab3185d1SJeff Roberson 
2713ab3185d1SJeff Roberson 	return (NULL);
2714ab3185d1SJeff Roberson }
2715ab3185d1SJeff Roberson 
2716ab3185d1SJeff Roberson static uma_slab_t
2717194a979eSMark Johnston keg_fetch_free_slab(uma_keg_t keg, int domain, bool rr, int flags)
2718ab3185d1SJeff Roberson {
2719194a979eSMark Johnston 	uint32_t reserve;
2720099a0e58SBosko Milekic 
2721bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
2722194a979eSMark Johnston 
2723194a979eSMark Johnston 	reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve;
2724194a979eSMark Johnston 	if (keg->uk_free <= reserve)
2725194a979eSMark Johnston 		return (NULL);
2726194a979eSMark Johnston 	return (keg_first_slab(keg, domain, rr));
2727194a979eSMark Johnston }
2728194a979eSMark Johnston 
2729194a979eSMark Johnston static uma_slab_t
2730194a979eSMark Johnston keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, const int flags)
2731194a979eSMark Johnston {
2732194a979eSMark Johnston 	struct vm_domainset_iter di;
2733194a979eSMark Johnston 	uma_domain_t dom;
2734194a979eSMark Johnston 	uma_slab_t slab;
2735194a979eSMark Johnston 	int aflags, domain;
2736194a979eSMark Johnston 	bool rr;
2737194a979eSMark Johnston 
2738194a979eSMark Johnston restart:
2739bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
2740bbee39c6SJeff Roberson 
2741bbee39c6SJeff Roberson 	/*
2742194a979eSMark Johnston 	 * Use the keg's policy if upper layers haven't already specified a
2743194a979eSMark Johnston 	 * domain (as happens with first-touch zones).
2744194a979eSMark Johnston 	 *
2745194a979eSMark Johnston 	 * To avoid races we run the iterator with the keg lock held, but that
2746194a979eSMark Johnston 	 * means that we cannot allow the vm_domainset layer to sleep.  Thus,
2747194a979eSMark Johnston 	 * clear M_WAITOK and handle low memory conditions locally.
2748bbee39c6SJeff Roberson 	 */
2749ab3185d1SJeff Roberson 	rr = rdomain == UMA_ANYDOMAIN;
2750ab3185d1SJeff Roberson 	if (rr) {
2751194a979eSMark Johnston 		aflags = (flags & ~M_WAITOK) | M_NOWAIT;
2752194a979eSMark Johnston 		vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
2753194a979eSMark Johnston 		    &aflags);
2754194a979eSMark Johnston 	} else {
2755194a979eSMark Johnston 		aflags = flags;
2756194a979eSMark Johnston 		domain = rdomain;
2757194a979eSMark Johnston 	}
2758ab3185d1SJeff Roberson 
2759194a979eSMark Johnston 	for (;;) {
2760194a979eSMark Johnston 		slab = keg_fetch_free_slab(keg, domain, rr, flags);
2761194a979eSMark Johnston 		if (slab != NULL) {
2762e20a199fSJeff Roberson 			MPASS(slab->us_keg == keg);
2763bbee39c6SJeff Roberson 			return (slab);
2764bbee39c6SJeff Roberson 		}
2765bbee39c6SJeff Roberson 
2766bbee39c6SJeff Roberson 		/*
2767bbee39c6SJeff Roberson 		 * M_NOVM means don't ask at all!
2768bbee39c6SJeff Roberson 		 */
2769bbee39c6SJeff Roberson 		if (flags & M_NOVM)
2770bbee39c6SJeff Roberson 			break;
2771bbee39c6SJeff Roberson 
2772bb15d1c7SGleb Smirnoff 		KASSERT(zone->uz_max_items == 0 ||
2773bb15d1c7SGleb Smirnoff 		    zone->uz_items <= zone->uz_max_items,
2774bb15d1c7SGleb Smirnoff 		    ("%s: zone %p overflow", __func__, zone));
2775bb15d1c7SGleb Smirnoff 
277686220393SMark Johnston 		slab = keg_alloc_slab(keg, zone, domain, flags, aflags);
2777bbee39c6SJeff Roberson 		/*
2778bbee39c6SJeff Roberson 		 * If we got a slab here it's safe to mark it partially used
2779bbee39c6SJeff Roberson 		 * and return.  We assume that the caller is going to remove
2780bbee39c6SJeff Roberson 		 * at least one item.
2781bbee39c6SJeff Roberson 		 */
2782bbee39c6SJeff Roberson 		if (slab) {
2783e20a199fSJeff Roberson 			MPASS(slab->us_keg == keg);
2784ab3185d1SJeff Roberson 			dom = &keg->uk_domain[slab->us_domain];
2785ab3185d1SJeff Roberson 			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
2786bbee39c6SJeff Roberson 			return (slab);
2787bbee39c6SJeff Roberson 		}
2788194a979eSMark Johnston 		KEG_LOCK(keg);
2789194a979eSMark Johnston 		if (rr && vm_domainset_iter_policy(&di, &domain) != 0) {
2790194a979eSMark Johnston 			if ((flags & M_WAITOK) != 0) {
2791194a979eSMark Johnston 				KEG_UNLOCK(keg);
2792194a979eSMark Johnston 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
2793194a979eSMark Johnston 				KEG_LOCK(keg);
2794194a979eSMark Johnston 				goto restart;
279530c5525bSAndrew Gallatin 			}
2796194a979eSMark Johnston 			break;
2797194a979eSMark Johnston 		}
2798ab3185d1SJeff Roberson 	}
2799ab3185d1SJeff Roberson 
2800bbee39c6SJeff Roberson 	/*
2801bbee39c6SJeff Roberson 	 * We might not have been able to get a slab but another cpu
2802bbee39c6SJeff Roberson 	 * could have while we were unlocked.  Check again before we
2803bbee39c6SJeff Roberson 	 * fail.
2804bbee39c6SJeff Roberson 	 */
2805194a979eSMark Johnston 	if ((slab = keg_fetch_free_slab(keg, domain, rr, flags)) != NULL) {
2806ab3185d1SJeff Roberson 		MPASS(slab->us_keg == keg);
2807bbee39c6SJeff Roberson 		return (slab);
2808bbee39c6SJeff Roberson 	}
2809ab3185d1SJeff Roberson 	return (NULL);
2810ab3185d1SJeff Roberson }
2811bbee39c6SJeff Roberson 
2812e20a199fSJeff Roberson static uma_slab_t
2813ab3185d1SJeff Roberson zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int domain, int flags)
2814e20a199fSJeff Roberson {
2815e20a199fSJeff Roberson 	uma_slab_t slab;
2816e20a199fSJeff Roberson 
2817af526374SJeff Roberson 	if (keg == NULL) {
2818bb15d1c7SGleb Smirnoff 		keg = zone->uz_keg;
2819af526374SJeff Roberson 		KEG_LOCK(keg);
2820af526374SJeff Roberson 	}
2821e20a199fSJeff Roberson 
2822e20a199fSJeff Roberson 	for (;;) {
2823ab3185d1SJeff Roberson 		slab = keg_fetch_slab(keg, zone, domain, flags);
2824e20a199fSJeff Roberson 		if (slab)
2825e20a199fSJeff Roberson 			return (slab);
2826e20a199fSJeff Roberson 		if (flags & (M_NOWAIT | M_NOVM))
2827e20a199fSJeff Roberson 			break;
2828e20a199fSJeff Roberson 	}
2829af526374SJeff Roberson 	KEG_UNLOCK(keg);
2830e20a199fSJeff Roberson 	return (NULL);
2831e20a199fSJeff Roberson }
2832e20a199fSJeff Roberson 
2833d56368d7SBosko Milekic static void *
28340095a784SJeff Roberson slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
2835bbee39c6SJeff Roberson {
2836ab3185d1SJeff Roberson 	uma_domain_t dom;
2837bbee39c6SJeff Roberson 	void *item;
283885dcf349SGleb Smirnoff 	uint8_t freei;
2839bbee39c6SJeff Roberson 
28400095a784SJeff Roberson 	MPASS(keg == slab->us_keg);
2841bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
2842099a0e58SBosko Milekic 
2843ef72505eSJeff Roberson 	freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
2844ef72505eSJeff Roberson 	BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
2845099a0e58SBosko Milekic 	item = slab->us_data + (keg->uk_rsize * freei);
2846bbee39c6SJeff Roberson 	slab->us_freecount--;
2847099a0e58SBosko Milekic 	keg->uk_free--;
2848ef72505eSJeff Roberson 
2849bbee39c6SJeff Roberson 	/* Move this slab to the full list */
2850bbee39c6SJeff Roberson 	if (slab->us_freecount == 0) {
2851bbee39c6SJeff Roberson 		LIST_REMOVE(slab, us_link);
2852ab3185d1SJeff Roberson 		dom = &keg->uk_domain[slab->us_domain];
2853ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link);
2854bbee39c6SJeff Roberson 	}
2855bbee39c6SJeff Roberson 
2856bbee39c6SJeff Roberson 	return (item);
2857bbee39c6SJeff Roberson }
2858bbee39c6SJeff Roberson 
2859bbee39c6SJeff Roberson static int
2860ab3185d1SJeff Roberson zone_import(uma_zone_t zone, void **bucket, int max, int domain, int flags)
28610095a784SJeff Roberson {
28620095a784SJeff Roberson 	uma_slab_t slab;
28630095a784SJeff Roberson 	uma_keg_t keg;
2864a03af342SSean Bruno #ifdef NUMA
2865ab3185d1SJeff Roberson 	int stripe;
2866a03af342SSean Bruno #endif
28670095a784SJeff Roberson 	int i;
28680095a784SJeff Roberson 
28690095a784SJeff Roberson 	slab = NULL;
28700095a784SJeff Roberson 	keg = NULL;
2871af526374SJeff Roberson 	/* Try to keep the buckets totally full */
28720095a784SJeff Roberson 	for (i = 0; i < max; ) {
2873ad66f958SGleb Smirnoff 		if ((slab = zone_fetch_slab(zone, keg, domain, flags)) == NULL)
28740095a784SJeff Roberson 			break;
28750095a784SJeff Roberson 		keg = slab->us_keg;
2876a03af342SSean Bruno #ifdef NUMA
2877ab3185d1SJeff Roberson 		stripe = howmany(max, vm_ndomains);
2878a03af342SSean Bruno #endif
28796fd34d6fSJeff Roberson 		while (slab->us_freecount && i < max) {
28800095a784SJeff Roberson 			bucket[i++] = slab_alloc_item(keg, slab);
28816fd34d6fSJeff Roberson 			if (keg->uk_free <= keg->uk_reserve)
28826fd34d6fSJeff Roberson 				break;
2883b6715dabSJeff Roberson #ifdef NUMA
2884ab3185d1SJeff Roberson 			/*
2885ab3185d1SJeff Roberson 			 * If the zone is striped we pick a new slab for every
2886ab3185d1SJeff Roberson 			 * N allocations.  Eliminating this conditional will
2887ab3185d1SJeff Roberson 			 * instead pick a new domain for each bucket rather
2888ab3185d1SJeff Roberson 			 * than stripe within each bucket.  The current option
2889ab3185d1SJeff Roberson 			 * produces more fragmentation and requires more cpu
2890ab3185d1SJeff Roberson 			 * time but yields better distribution.
2891ab3185d1SJeff Roberson 			 */
2892ab3185d1SJeff Roberson 			if ((zone->uz_flags & UMA_ZONE_NUMA) == 0 &&
2893ab3185d1SJeff Roberson 			    vm_ndomains > 1 && --stripe == 0)
2894ab3185d1SJeff Roberson 				break;
2895ab3185d1SJeff Roberson #endif
28966fd34d6fSJeff Roberson 		}
2897ab3185d1SJeff Roberson 		/* Don't block if we allocated any successfully. */
28980095a784SJeff Roberson 		flags &= ~M_WAITOK;
28990095a784SJeff Roberson 		flags |= M_NOWAIT;
29000095a784SJeff Roberson 	}
29010095a784SJeff Roberson 	if (slab != NULL)
29020095a784SJeff Roberson 		KEG_UNLOCK(keg);
29030095a784SJeff Roberson 
29040095a784SJeff Roberson 	return i;
29050095a784SJeff Roberson }
29060095a784SJeff Roberson 
2907fc03d22bSJeff Roberson static uma_bucket_t
2908bb15d1c7SGleb Smirnoff zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags, int max)
2909bbee39c6SJeff Roberson {
2910bbee39c6SJeff Roberson 	uma_bucket_t bucket;
2911bbee39c6SJeff Roberson 
291230c5525bSAndrew Gallatin 	CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain);
291330c5525bSAndrew Gallatin 
2914c1685086SJeff Roberson 	/* Avoid allocs targeting empty domains. */
2915c1685086SJeff Roberson 	if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
2916c1685086SJeff Roberson 		domain = UMA_ANYDOMAIN;
2917c1685086SJeff Roberson 
29186fd34d6fSJeff Roberson 	/* Don't wait for buckets, preserve caller's NOVM setting. */
29196fd34d6fSJeff Roberson 	bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
29200095a784SJeff Roberson 	if (bucket == NULL)
2921f7104ccdSAlexander Motin 		return (NULL);
29220095a784SJeff Roberson 
29230095a784SJeff Roberson 	bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
292437125720SGleb Smirnoff 	    MIN(max, bucket->ub_entries), domain, flags);
29250095a784SJeff Roberson 
29260095a784SJeff Roberson 	/*
29270095a784SJeff Roberson 	 * Initialize the memory if necessary.
29280095a784SJeff Roberson 	 */
29290095a784SJeff Roberson 	if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
2930099a0e58SBosko Milekic 		int i;
2931bbee39c6SJeff Roberson 
29320095a784SJeff Roberson 		for (i = 0; i < bucket->ub_cnt; i++)
2933e20a199fSJeff Roberson 			if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
29340095a784SJeff Roberson 			    flags) != 0)
2935b23f72e9SBrian Feldman 				break;
2936b23f72e9SBrian Feldman 		/*
2937b23f72e9SBrian Feldman 		 * If we couldn't initialize the whole bucket, put the
2938b23f72e9SBrian Feldman 		 * rest back onto the freelist.
2939b23f72e9SBrian Feldman 		 */
2940b23f72e9SBrian Feldman 		if (i != bucket->ub_cnt) {
2941af526374SJeff Roberson 			zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
29420095a784SJeff Roberson 			    bucket->ub_cnt - i);
2943a5a262c6SBosko Milekic #ifdef INVARIANTS
29440095a784SJeff Roberson 			bzero(&bucket->ub_bucket[i],
29450095a784SJeff Roberson 			    sizeof(void *) * (bucket->ub_cnt - i));
2946a5a262c6SBosko Milekic #endif
2947b23f72e9SBrian Feldman 			bucket->ub_cnt = i;
2948b23f72e9SBrian Feldman 		}
2949099a0e58SBosko Milekic 	}
2950099a0e58SBosko Milekic 
2951f7104ccdSAlexander Motin 	if (bucket->ub_cnt == 0) {
29526fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
29532efcc8cbSGleb Smirnoff 		counter_u64_add(zone->uz_fails, 1);
2954fc03d22bSJeff Roberson 		return (NULL);
2955bbee39c6SJeff Roberson 	}
2956fc03d22bSJeff Roberson 
2957fc03d22bSJeff Roberson 	return (bucket);
2958fc03d22bSJeff Roberson }
2959fc03d22bSJeff Roberson 
29608355f576SJeff Roberson /*
29610095a784SJeff Roberson  * Allocates a single item from a zone.
29628355f576SJeff Roberson  *
29638355f576SJeff Roberson  * Arguments
29648355f576SJeff Roberson  *	zone   The zone to alloc for.
29658355f576SJeff Roberson  *	udata  The data to be passed to the constructor.
2966ab3185d1SJeff Roberson  *	domain The domain to allocate from or UMA_ANYDOMAIN.
2967a163d034SWarner Losh  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
29688355f576SJeff Roberson  *
29698355f576SJeff Roberson  * Returns
29708355f576SJeff Roberson  *	NULL if there is no memory and M_NOWAIT is set
2971bbee39c6SJeff Roberson  *	An item if successful
29728355f576SJeff Roberson  */
29738355f576SJeff Roberson 
29748355f576SJeff Roberson static void *
2975ab3185d1SJeff Roberson zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
29768355f576SJeff Roberson {
2977bb15d1c7SGleb Smirnoff 
2978bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
2979bb15d1c7SGleb Smirnoff 	return (zone_alloc_item_locked(zone, udata, domain, flags));
2980bb15d1c7SGleb Smirnoff }
2981bb15d1c7SGleb Smirnoff 
2982bb15d1c7SGleb Smirnoff /*
2983bb15d1c7SGleb Smirnoff  * Returns with zone unlocked.
2984bb15d1c7SGleb Smirnoff  */
2985bb15d1c7SGleb Smirnoff static void *
2986bb15d1c7SGleb Smirnoff zone_alloc_item_locked(uma_zone_t zone, void *udata, int domain, int flags)
2987bb15d1c7SGleb Smirnoff {
29888355f576SJeff Roberson 	void *item;
2989c5deaf04SGleb Smirnoff #ifdef INVARIANTS
2990c5deaf04SGleb Smirnoff 	bool skipdbg;
2991c5deaf04SGleb Smirnoff #endif
29928355f576SJeff Roberson 
2993bb15d1c7SGleb Smirnoff 	ZONE_LOCK_ASSERT(zone);
2994bb15d1c7SGleb Smirnoff 
2995bb45b411SGleb Smirnoff 	if (zone->uz_max_items > 0) {
2996bb45b411SGleb Smirnoff 		if (zone->uz_items >= zone->uz_max_items) {
2997bb15d1c7SGleb Smirnoff 			zone_log_warning(zone);
2998bb15d1c7SGleb Smirnoff 			zone_maxaction(zone);
2999bb15d1c7SGleb Smirnoff 			if (flags & M_NOWAIT) {
3000bb15d1c7SGleb Smirnoff 				ZONE_UNLOCK(zone);
3001bb15d1c7SGleb Smirnoff 				return (NULL);
3002bb15d1c7SGleb Smirnoff 			}
3003bb15d1c7SGleb Smirnoff 			zone->uz_sleeps++;
3004bb15d1c7SGleb Smirnoff 			zone->uz_sleepers++;
3005bb15d1c7SGleb Smirnoff 			while (zone->uz_items >= zone->uz_max_items)
3006e7e4bcd8SGleb Smirnoff 				mtx_sleep(zone, zone->uz_lockptr, PVM,
3007e7e4bcd8SGleb Smirnoff 				    "zonelimit", 0);
3008bb15d1c7SGleb Smirnoff 			zone->uz_sleepers--;
3009bb15d1c7SGleb Smirnoff 			if (zone->uz_sleepers > 0 &&
3010bb15d1c7SGleb Smirnoff 			    zone->uz_items + 1 < zone->uz_max_items)
3011bb15d1c7SGleb Smirnoff 				wakeup_one(zone);
3012bb15d1c7SGleb Smirnoff 		}
3013bb15d1c7SGleb Smirnoff 		zone->uz_items++;
3014bb45b411SGleb Smirnoff 	}
3015bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
30168355f576SJeff Roberson 
3017c1685086SJeff Roberson 	/* Avoid allocs targeting empty domains. */
3018c1685086SJeff Roberson 	if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
301930c5525bSAndrew Gallatin 		domain = UMA_ANYDOMAIN;
3020c1685086SJeff Roberson 
3021ab3185d1SJeff Roberson 	if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
30220095a784SJeff Roberson 		goto fail;
30238355f576SJeff Roberson 
3024c5deaf04SGleb Smirnoff #ifdef INVARIANTS
3025c5deaf04SGleb Smirnoff 	skipdbg = uma_dbg_zskip(zone, item);
3026c5deaf04SGleb Smirnoff #endif
3027099a0e58SBosko Milekic 	/*
3028099a0e58SBosko Milekic 	 * We have to call both the zone's init (not the keg's init)
3029099a0e58SBosko Milekic 	 * and the zone's ctor.  This is because the item is going from
3030099a0e58SBosko Milekic 	 * a keg slab directly to the user, and the user is expecting it
3031099a0e58SBosko Milekic 	 * to be both zone-init'd as well as zone-ctor'd.
3032099a0e58SBosko Milekic 	 */
3033b23f72e9SBrian Feldman 	if (zone->uz_init != NULL) {
3034e20a199fSJeff Roberson 		if (zone->uz_init(item, zone->uz_size, flags) != 0) {
3035bb15d1c7SGleb Smirnoff 			zone_free_item(zone, item, udata, SKIP_FINI | SKIP_CNT);
30360095a784SJeff Roberson 			goto fail;
3037b23f72e9SBrian Feldman 		}
3038b23f72e9SBrian Feldman 	}
3039c5deaf04SGleb Smirnoff 	if (zone->uz_ctor != NULL &&
3040c5deaf04SGleb Smirnoff #ifdef INVARIANTS
3041c5deaf04SGleb Smirnoff 	    (!skipdbg || zone->uz_ctor != trash_ctor ||
3042c5deaf04SGleb Smirnoff 	    zone->uz_dtor != trash_dtor) &&
3043c5deaf04SGleb Smirnoff #endif
3044c5deaf04SGleb Smirnoff 	    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
3045bb15d1c7SGleb Smirnoff 		zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT);
30460095a784SJeff Roberson 		goto fail;
3047b23f72e9SBrian Feldman 	}
3048ef72505eSJeff Roberson #ifdef INVARIANTS
3049c5deaf04SGleb Smirnoff 	if (!skipdbg)
30500095a784SJeff Roberson 		uma_dbg_alloc(zone, NULL, item);
3051ef72505eSJeff Roberson #endif
30522cc35ff9SJeff Roberson 	if (flags & M_ZERO)
305348343a2fSGleb Smirnoff 		uma_zero_item(item, zone);
30548355f576SJeff Roberson 
30552efcc8cbSGleb Smirnoff 	counter_u64_add(zone->uz_allocs, 1);
30561431a748SGleb Smirnoff 	CTR3(KTR_UMA, "zone_alloc_item item %p from %s(%p)", item,
30571431a748SGleb Smirnoff 	    zone->uz_name, zone);
30581431a748SGleb Smirnoff 
30598355f576SJeff Roberson 	return (item);
30600095a784SJeff Roberson 
30610095a784SJeff Roberson fail:
3062bb45b411SGleb Smirnoff 	if (zone->uz_max_items > 0) {
3063bb15d1c7SGleb Smirnoff 		ZONE_LOCK(zone);
3064bb15d1c7SGleb Smirnoff 		zone->uz_items--;
3065bb15d1c7SGleb Smirnoff 		ZONE_UNLOCK(zone);
3066bb45b411SGleb Smirnoff 	}
30672efcc8cbSGleb Smirnoff 	counter_u64_add(zone->uz_fails, 1);
30681431a748SGleb Smirnoff 	CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)",
30691431a748SGleb Smirnoff 	    zone->uz_name, zone);
30700095a784SJeff Roberson 	return (NULL);
30718355f576SJeff Roberson }
30728355f576SJeff Roberson 
30738355f576SJeff Roberson /* See uma.h */
30748355f576SJeff Roberson void
30758355f576SJeff Roberson uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
30768355f576SJeff Roberson {
30778355f576SJeff Roberson 	uma_cache_t cache;
30788355f576SJeff Roberson 	uma_bucket_t bucket;
3079ab3185d1SJeff Roberson 	uma_zone_domain_t zdom;
3080bb15d1c7SGleb Smirnoff 	int cpu, domain;
3081c1685086SJeff Roberson #ifdef UMA_XDOMAIN
3082c1685086SJeff Roberson 	int itemdomain;
3083c1685086SJeff Roberson #endif
3084bb15d1c7SGleb Smirnoff 	bool lockfail;
3085c5deaf04SGleb Smirnoff #ifdef INVARIANTS
3086c5deaf04SGleb Smirnoff 	bool skipdbg;
3087c5deaf04SGleb Smirnoff #endif
30888355f576SJeff Roberson 
3089e866d8f0SMark Murray 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
309019fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
309110cb2424SMark Murray 
30923659f747SRobert Watson 	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
30933659f747SRobert Watson 	    zone->uz_name);
30943659f747SRobert Watson 
3095d9e2e68dSMark Johnston 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
30961067a2baSJonathan T. Looney 	    ("uma_zfree_arg: called with spinlock or critical section held"));
30971067a2baSJonathan T. Looney 
309820ed0cb0SMatthew D Fleming         /* uma_zfree(..., NULL) does nothing, to match free(9). */
309920ed0cb0SMatthew D Fleming         if (item == NULL)
310020ed0cb0SMatthew D Fleming                 return;
31018d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD
31028d689e04SGleb Smirnoff 	if (is_memguard_addr(item)) {
3103bc9d08e1SMark Johnston 		if (zone->uz_dtor != NULL)
31048d689e04SGleb Smirnoff 			zone->uz_dtor(item, zone->uz_size, udata);
3105bc9d08e1SMark Johnston 		if (zone->uz_fini != NULL)
31068d689e04SGleb Smirnoff 			zone->uz_fini(item, zone->uz_size);
31078d689e04SGleb Smirnoff 		memguard_free(item);
31088d689e04SGleb Smirnoff 		return;
31098d689e04SGleb Smirnoff 	}
31108d689e04SGleb Smirnoff #endif
31115d1ae027SRobert Watson #ifdef INVARIANTS
3112c5deaf04SGleb Smirnoff 	skipdbg = uma_dbg_zskip(zone, item);
3113c5deaf04SGleb Smirnoff 	if (skipdbg == false) {
3114e20a199fSJeff Roberson 		if (zone->uz_flags & UMA_ZONE_MALLOC)
31155d1ae027SRobert Watson 			uma_dbg_free(zone, udata, item);
31165d1ae027SRobert Watson 		else
31175d1ae027SRobert Watson 			uma_dbg_free(zone, NULL, item);
3118c5deaf04SGleb Smirnoff 	}
3119c5deaf04SGleb Smirnoff 	if (zone->uz_dtor != NULL && (!skipdbg ||
3120c5deaf04SGleb Smirnoff 	    zone->uz_dtor != trash_dtor || zone->uz_ctor != trash_ctor))
3121c5deaf04SGleb Smirnoff #else
3122fc03d22bSJeff Roberson 	if (zone->uz_dtor != NULL)
3123c5deaf04SGleb Smirnoff #endif
3124ef72505eSJeff Roberson 		zone->uz_dtor(item, zone->uz_size, udata);
3125ef72505eSJeff Roberson 
3126af7f9b97SJeff Roberson 	/*
3127af7f9b97SJeff Roberson 	 * The race here is acceptable.  If we miss it we'll just have to wait
3128af7f9b97SJeff Roberson 	 * a little longer for the limits to be reset.
3129af7f9b97SJeff Roberson 	 */
3130bb15d1c7SGleb Smirnoff 	if (zone->uz_sleepers > 0)
3131fc03d22bSJeff Roberson 		goto zfree_item;
3132af7f9b97SJeff Roberson 
3133c1685086SJeff Roberson #ifdef UMA_XDOMAIN
3134c1685086SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
3135c1685086SJeff Roberson 		itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
3136c1685086SJeff Roberson #endif
3137c1685086SJeff Roberson 
31385d1ae027SRobert Watson 	/*
31395d1ae027SRobert Watson 	 * If possible, free to the per-CPU cache.  There are two
31405d1ae027SRobert Watson 	 * requirements for safe access to the per-CPU cache: (1) the thread
31415d1ae027SRobert Watson 	 * accessing the cache must not be preempted or yield during access,
31425d1ae027SRobert Watson 	 * and (2) the thread must not migrate CPUs without switching which
31435d1ae027SRobert Watson 	 * cache it accesses.  We rely on a critical section to prevent
31445d1ae027SRobert Watson 	 * preemption and migration.  We release the critical section in
31455d1ae027SRobert Watson 	 * order to acquire the zone mutex if we are unable to free to the
31465d1ae027SRobert Watson 	 * current cache; when we re-acquire the critical section, we must
31475d1ae027SRobert Watson 	 * detect and handle migration if it has occurred.
31485d1ae027SRobert Watson 	 */
3149a553d4b8SJeff Roberson zfree_restart:
31505d1ae027SRobert Watson 	critical_enter();
31515d1ae027SRobert Watson 	cpu = curcpu;
31528355f576SJeff Roberson 	cache = &zone->uz_cpu[cpu];
31538355f576SJeff Roberson 
31548355f576SJeff Roberson zfree_start:
3155c1685086SJeff Roberson 	domain = PCPU_GET(domain);
3156c1685086SJeff Roberson #ifdef UMA_XDOMAIN
3157c1685086SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_NUMA) == 0)
3158c1685086SJeff Roberson 		itemdomain = domain;
3159c1685086SJeff Roberson #endif
3160a553d4b8SJeff Roberson 	/*
3161fc03d22bSJeff Roberson 	 * Try to free into the allocbucket first to give LIFO ordering
3162fc03d22bSJeff Roberson 	 * for cache-hot datastructures.  Spill over into the freebucket
3163fc03d22bSJeff Roberson 	 * if necessary.  Alloc will swap them if one runs dry.
3164a553d4b8SJeff Roberson 	 */
3165c1685086SJeff Roberson #ifdef UMA_XDOMAIN
3166c1685086SJeff Roberson 	if (domain != itemdomain) {
3167c1685086SJeff Roberson 		bucket = cache->uc_crossbucket;
3168c1685086SJeff Roberson 	} else
3169c1685086SJeff Roberson #endif
3170c1685086SJeff Roberson 	{
3171fc03d22bSJeff Roberson 		bucket = cache->uc_allocbucket;
3172fc03d22bSJeff Roberson 		if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
3173fc03d22bSJeff Roberson 			bucket = cache->uc_freebucket;
3174c1685086SJeff Roberson 	}
3175fc03d22bSJeff Roberson 	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
3176cae33c14SJeff Roberson 		KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
31778355f576SJeff Roberson 		    ("uma_zfree: Freeing to non free bucket index."));
3178cae33c14SJeff Roberson 		bucket->ub_bucket[bucket->ub_cnt] = item;
3179cae33c14SJeff Roberson 		bucket->ub_cnt++;
3180773df9abSRobert Watson 		cache->uc_frees++;
31815d1ae027SRobert Watson 		critical_exit();
31828355f576SJeff Roberson 		return;
3183fc03d22bSJeff Roberson 	}
3184fc03d22bSJeff Roberson 
31858355f576SJeff Roberson 	/*
31865d1ae027SRobert Watson 	 * We must go back the zone, which requires acquiring the zone lock,
31875d1ae027SRobert Watson 	 * which in turn means we must release and re-acquire the critical
31885d1ae027SRobert Watson 	 * section.  Since the critical section is released, we may be
31895d1ae027SRobert Watson 	 * preempted or migrate.  As such, make sure not to maintain any
31905d1ae027SRobert Watson 	 * thread-local state specific to the cache from prior to releasing
31915d1ae027SRobert Watson 	 * the critical section.
31928355f576SJeff Roberson 	 */
31935d1ae027SRobert Watson 	critical_exit();
3194fc03d22bSJeff Roberson 	if (zone->uz_count == 0 || bucketdisable)
3195fc03d22bSJeff Roberson 		goto zfree_item;
3196fc03d22bSJeff Roberson 
3197bb15d1c7SGleb Smirnoff 	lockfail = false;
31984d104ba0SAlexander Motin 	if (ZONE_TRYLOCK(zone) == 0) {
31994d104ba0SAlexander Motin 		/* Record contention to size the buckets. */
32008355f576SJeff Roberson 		ZONE_LOCK(zone);
3201bb15d1c7SGleb Smirnoff 		lockfail = true;
32024d104ba0SAlexander Motin 	}
32035d1ae027SRobert Watson 	critical_enter();
32045d1ae027SRobert Watson 	cpu = curcpu;
3205c1685086SJeff Roberson 	domain = PCPU_GET(domain);
32065d1ae027SRobert Watson 	cache = &zone->uz_cpu[cpu];
32078355f576SJeff Roberson 
3208c1685086SJeff Roberson #ifdef UMA_XDOMAIN
3209c1685086SJeff Roberson 	if (domain != itemdomain)
3210c1685086SJeff Roberson 		bucket = cache->uc_crossbucket;
3211c1685086SJeff Roberson 	else
3212c1685086SJeff Roberson #endif
32138355f576SJeff Roberson 		bucket = cache->uc_freebucket;
3214fc03d22bSJeff Roberson 	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
3215fc03d22bSJeff Roberson 		ZONE_UNLOCK(zone);
3216fc03d22bSJeff Roberson 		goto zfree_start;
3217fc03d22bSJeff Roberson 	}
3218c1685086SJeff Roberson #ifdef UMA_XDOMAIN
3219c1685086SJeff Roberson 	if (domain != itemdomain)
3220c1685086SJeff Roberson 		cache->uc_crossbucket = NULL;
3221c1685086SJeff Roberson 	else
3222c1685086SJeff Roberson #endif
32238355f576SJeff Roberson 		cache->uc_freebucket = NULL;
3224afa5d703SMark Johnston 	/* We are no longer associated with this CPU. */
3225afa5d703SMark Johnston 	critical_exit();
32268355f576SJeff Roberson 
3227c1685086SJeff Roberson #ifdef UMA_XDOMAIN
3228c1685086SJeff Roberson 	if (domain != itemdomain) {
3229c1685086SJeff Roberson 		if (bucket != NULL) {
3230c1685086SJeff Roberson 			zone->uz_xdomain += bucket->ub_cnt;
3231c1685086SJeff Roberson 			if (vm_ndomains > 2 ||
3232c1685086SJeff Roberson 			    zone->uz_bkt_count >= zone->uz_bkt_max) {
3233c1685086SJeff Roberson 				ZONE_UNLOCK(zone);
3234c1685086SJeff Roberson 				bucket_drain(zone, bucket);
3235c1685086SJeff Roberson 				bucket_free(zone, bucket, udata);
3236c1685086SJeff Roberson 			} else {
3237c1685086SJeff Roberson 				zdom = &zone->uz_domain[itemdomain];
3238c1685086SJeff Roberson 				zone_put_bucket(zone, zdom, bucket, true);
3239c1685086SJeff Roberson 				ZONE_UNLOCK(zone);
3240c1685086SJeff Roberson 			}
324130c5525bSAndrew Gallatin 		} else
3242c1685086SJeff Roberson 			ZONE_UNLOCK(zone);
3243c1685086SJeff Roberson 		bucket = bucket_alloc(zone, udata, M_NOWAIT);
3244c1685086SJeff Roberson 		if (bucket == NULL)
3245c1685086SJeff Roberson 			goto zfree_item;
3246c1685086SJeff Roberson 		critical_enter();
3247c1685086SJeff Roberson 		cpu = curcpu;
3248c1685086SJeff Roberson 		cache = &zone->uz_cpu[cpu];
3249c1685086SJeff Roberson 		if (cache->uc_crossbucket == NULL) {
3250c1685086SJeff Roberson 			cache->uc_crossbucket = bucket;
3251c1685086SJeff Roberson 			goto zfree_start;
3252c1685086SJeff Roberson 		}
3253c1685086SJeff Roberson 		critical_exit();
3254c1685086SJeff Roberson 		bucket_free(zone, bucket, udata);
3255c1685086SJeff Roberson 		goto zfree_restart;
3256c1685086SJeff Roberson 	}
3257c1685086SJeff Roberson #endif
3258c1685086SJeff Roberson 
3259c1685086SJeff Roberson 	if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) {
3260c1685086SJeff Roberson 		zdom = &zone->uz_domain[domain];
3261c1685086SJeff Roberson 	} else {
3262ab3185d1SJeff Roberson 		domain = 0;
3263ab3185d1SJeff Roberson 		zdom = &zone->uz_domain[0];
3264c1685086SJeff Roberson 	}
3265ab3185d1SJeff Roberson 
32668355f576SJeff Roberson 	/* Can we throw this on the zone full list? */
32678355f576SJeff Roberson 	if (bucket != NULL) {
32681431a748SGleb Smirnoff 		CTR3(KTR_UMA,
32691431a748SGleb Smirnoff 		    "uma_zfree: zone %s(%p) putting bucket %p on free list",
32701431a748SGleb Smirnoff 		    zone->uz_name, zone, bucket);
3271cae33c14SJeff Roberson 		/* ub_cnt is pointing to the last free item */
3272bb15d1c7SGleb Smirnoff 		KASSERT(bucket->ub_cnt == bucket->ub_entries,
3273bb15d1c7SGleb Smirnoff 		    ("uma_zfree: Attempting to insert not full bucket onto the full list.\n"));
3274bb15d1c7SGleb Smirnoff 		if (zone->uz_bkt_count >= zone->uz_bkt_max) {
3275e8bb2dc7SJeff Roberson 			ZONE_UNLOCK(zone);
3276e8bb2dc7SJeff Roberson 			bucket_drain(zone, bucket);
3277e8bb2dc7SJeff Roberson 			bucket_free(zone, bucket, udata);
3278e8bb2dc7SJeff Roberson 			goto zfree_restart;
3279e8bb2dc7SJeff Roberson 		} else
32800f9b7bf3SMark Johnston 			zone_put_bucket(zone, zdom, bucket, true);
32818355f576SJeff Roberson 	}
3282fc03d22bSJeff Roberson 
32834d104ba0SAlexander Motin 	/*
32844d104ba0SAlexander Motin 	 * We bump the uz count when the cache size is insufficient to
32854d104ba0SAlexander Motin 	 * handle the working set.
32864d104ba0SAlexander Motin 	 */
3287bb15d1c7SGleb Smirnoff 	if (lockfail && zone->uz_count < zone->uz_count_max)
32884d104ba0SAlexander Motin 		zone->uz_count++;
3289a553d4b8SJeff Roberson 	ZONE_UNLOCK(zone);
3290a553d4b8SJeff Roberson 
32916fd34d6fSJeff Roberson 	bucket = bucket_alloc(zone, udata, M_NOWAIT);
32921431a748SGleb Smirnoff 	CTR3(KTR_UMA, "uma_zfree: zone %s(%p) allocated bucket %p",
32931431a748SGleb Smirnoff 	    zone->uz_name, zone, bucket);
32944741dcbfSJeff Roberson 	if (bucket) {
3295fc03d22bSJeff Roberson 		critical_enter();
3296fc03d22bSJeff Roberson 		cpu = curcpu;
3297fc03d22bSJeff Roberson 		cache = &zone->uz_cpu[cpu];
3298ab3185d1SJeff Roberson 		if (cache->uc_freebucket == NULL &&
3299ab3185d1SJeff Roberson 		    ((zone->uz_flags & UMA_ZONE_NUMA) == 0 ||
3300ab3185d1SJeff Roberson 		    domain == PCPU_GET(domain))) {
3301fc03d22bSJeff Roberson 			cache->uc_freebucket = bucket;
3302fc03d22bSJeff Roberson 			goto zfree_start;
3303fc03d22bSJeff Roberson 		}
3304fc03d22bSJeff Roberson 		/*
3305fc03d22bSJeff Roberson 		 * We lost the race, start over.  We have to drop our
3306fc03d22bSJeff Roberson 		 * critical section to free the bucket.
3307fc03d22bSJeff Roberson 		 */
3308fc03d22bSJeff Roberson 		critical_exit();
33096fd34d6fSJeff Roberson 		bucket_free(zone, bucket, udata);
3310a553d4b8SJeff Roberson 		goto zfree_restart;
33118355f576SJeff Roberson 	}
33128355f576SJeff Roberson 
3313a553d4b8SJeff Roberson 	/*
3314a553d4b8SJeff Roberson 	 * If nothing else caught this, we'll just do an internal free.
3315a553d4b8SJeff Roberson 	 */
3316fc03d22bSJeff Roberson zfree_item:
33170095a784SJeff Roberson 	zone_free_item(zone, item, udata, SKIP_DTOR);
33188355f576SJeff Roberson }
33198355f576SJeff Roberson 
3320ab3185d1SJeff Roberson void
3321ab3185d1SJeff Roberson uma_zfree_domain(uma_zone_t zone, void *item, void *udata)
3322ab3185d1SJeff Roberson {
3323ab3185d1SJeff Roberson 
3324ab3185d1SJeff Roberson 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
332519fa89e9SMark Murray 	random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
3326ab3185d1SJeff Roberson 
3327ab3185d1SJeff Roberson 	CTR2(KTR_UMA, "uma_zfree_domain thread %x zone %s", curthread,
3328ab3185d1SJeff Roberson 	    zone->uz_name);
3329ab3185d1SJeff Roberson 
3330ab3185d1SJeff Roberson 	KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
3331ab3185d1SJeff Roberson 	    ("uma_zfree_domain: called with spinlock or critical section held"));
3332ab3185d1SJeff Roberson 
3333ab3185d1SJeff Roberson         /* uma_zfree(..., NULL) does nothing, to match free(9). */
3334ab3185d1SJeff Roberson         if (item == NULL)
3335ab3185d1SJeff Roberson                 return;
3336ab3185d1SJeff Roberson 	zone_free_item(zone, item, udata, SKIP_NONE);
3337ab3185d1SJeff Roberson }
3338ab3185d1SJeff Roberson 
33398355f576SJeff Roberson static void
3340bb15d1c7SGleb Smirnoff slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item)
33418355f576SJeff Roberson {
3342bb15d1c7SGleb Smirnoff 	uma_keg_t keg;
3343ab3185d1SJeff Roberson 	uma_domain_t dom;
334485dcf349SGleb Smirnoff 	uint8_t freei;
3345099a0e58SBosko Milekic 
3346bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
3347bb15d1c7SGleb Smirnoff 	MPASS(zone->uz_lockptr == &keg->uk_lock);
3348bb15d1c7SGleb Smirnoff 	KEG_LOCK_ASSERT(keg);
3349e20a199fSJeff Roberson 	MPASS(keg == slab->us_keg);
33508355f576SJeff Roberson 
3351ab3185d1SJeff Roberson 	dom = &keg->uk_domain[slab->us_domain];
3352ab3185d1SJeff Roberson 
33538355f576SJeff Roberson 	/* Do we need to remove from any lists? */
3354099a0e58SBosko Milekic 	if (slab->us_freecount+1 == keg->uk_ipers) {
33558355f576SJeff Roberson 		LIST_REMOVE(slab, us_link);
3356ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
33578355f576SJeff Roberson 	} else if (slab->us_freecount == 0) {
33588355f576SJeff Roberson 		LIST_REMOVE(slab, us_link);
3359ab3185d1SJeff Roberson 		LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
33608355f576SJeff Roberson 	}
33618355f576SJeff Roberson 
3362ef72505eSJeff Roberson 	/* Slab management. */
3363ef72505eSJeff Roberson 	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3364ef72505eSJeff Roberson 	BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
33658355f576SJeff Roberson 	slab->us_freecount++;
33668355f576SJeff Roberson 
3367ef72505eSJeff Roberson 	/* Keg statistics. */
3368099a0e58SBosko Milekic 	keg->uk_free++;
33690095a784SJeff Roberson }
33700095a784SJeff Roberson 
33710095a784SJeff Roberson static void
33720095a784SJeff Roberson zone_release(uma_zone_t zone, void **bucket, int cnt)
33730095a784SJeff Roberson {
33740095a784SJeff Roberson 	void *item;
33750095a784SJeff Roberson 	uma_slab_t slab;
33760095a784SJeff Roberson 	uma_keg_t keg;
33770095a784SJeff Roberson 	uint8_t *mem;
33780095a784SJeff Roberson 	int i;
33798355f576SJeff Roberson 
3380bb15d1c7SGleb Smirnoff 	keg = zone->uz_keg;
3381af526374SJeff Roberson 	KEG_LOCK(keg);
33820095a784SJeff Roberson 	for (i = 0; i < cnt; i++) {
33830095a784SJeff Roberson 		item = bucket[i];
33840095a784SJeff Roberson 		if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
33850095a784SJeff Roberson 			mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
33860095a784SJeff Roberson 			if (zone->uz_flags & UMA_ZONE_HASH) {
33870095a784SJeff Roberson 				slab = hash_sfind(&keg->uk_hash, mem);
33880095a784SJeff Roberson 			} else {
33890095a784SJeff Roberson 				mem += keg->uk_pgoff;
33900095a784SJeff Roberson 				slab = (uma_slab_t)mem;
33910095a784SJeff Roberson 			}
33920095a784SJeff Roberson 		} else {
33930095a784SJeff Roberson 			slab = vtoslab((vm_offset_t)item);
3394bb15d1c7SGleb Smirnoff 			MPASS(slab->us_keg == keg);
33950095a784SJeff Roberson 		}
3396bb15d1c7SGleb Smirnoff 		slab_free_item(zone, slab, item);
33970095a784SJeff Roberson 	}
3398af526374SJeff Roberson 	KEG_UNLOCK(keg);
33998355f576SJeff Roberson }
34008355f576SJeff Roberson 
34010095a784SJeff Roberson /*
34020095a784SJeff Roberson  * Frees a single item to any zone.
34030095a784SJeff Roberson  *
34040095a784SJeff Roberson  * Arguments:
34050095a784SJeff Roberson  *	zone   The zone to free to
34060095a784SJeff Roberson  *	item   The item we're freeing
34070095a784SJeff Roberson  *	udata  User supplied data for the dtor
34080095a784SJeff Roberson  *	skip   Skip dtors and finis
34090095a784SJeff Roberson  */
34100095a784SJeff Roberson static void
34110095a784SJeff Roberson zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
34120095a784SJeff Roberson {
34130095a784SJeff Roberson #ifdef INVARIANTS
3414c5deaf04SGleb Smirnoff 	bool skipdbg;
3415c5deaf04SGleb Smirnoff 
3416c5deaf04SGleb Smirnoff 	skipdbg = uma_dbg_zskip(zone, item);
3417c5deaf04SGleb Smirnoff 	if (skip == SKIP_NONE && !skipdbg) {
34180095a784SJeff Roberson 		if (zone->uz_flags & UMA_ZONE_MALLOC)
34190095a784SJeff Roberson 			uma_dbg_free(zone, udata, item);
34200095a784SJeff Roberson 		else
34210095a784SJeff Roberson 			uma_dbg_free(zone, NULL, item);
34220095a784SJeff Roberson 	}
3423c5deaf04SGleb Smirnoff 
3424c5deaf04SGleb Smirnoff 	if (skip < SKIP_DTOR && zone->uz_dtor != NULL &&
3425c5deaf04SGleb Smirnoff 	    (!skipdbg || zone->uz_dtor != trash_dtor ||
3426c5deaf04SGleb Smirnoff 	    zone->uz_ctor != trash_ctor))
3427c5deaf04SGleb Smirnoff #else
3428c5deaf04SGleb Smirnoff 	if (skip < SKIP_DTOR && zone->uz_dtor != NULL)
34290095a784SJeff Roberson #endif
34300095a784SJeff Roberson 		zone->uz_dtor(item, zone->uz_size, udata);
34310095a784SJeff Roberson 
34320095a784SJeff Roberson 	if (skip < SKIP_FINI && zone->uz_fini)
34330095a784SJeff Roberson 		zone->uz_fini(item, zone->uz_size);
34340095a784SJeff Roberson 
34350095a784SJeff Roberson 	zone->uz_release(zone->uz_arg, &item, 1);
3436bb15d1c7SGleb Smirnoff 
3437bb15d1c7SGleb Smirnoff 	if (skip & SKIP_CNT)
3438bb15d1c7SGleb Smirnoff 		return;
3439bb15d1c7SGleb Smirnoff 
34402efcc8cbSGleb Smirnoff 	counter_u64_add(zone->uz_frees, 1);
34412efcc8cbSGleb Smirnoff 
3442bb45b411SGleb Smirnoff 	if (zone->uz_max_items > 0) {
3443bb15d1c7SGleb Smirnoff 		ZONE_LOCK(zone);
3444bb15d1c7SGleb Smirnoff 		zone->uz_items--;
3445bb45b411SGleb Smirnoff 		if (zone->uz_sleepers > 0 &&
3446bb45b411SGleb Smirnoff 		    zone->uz_items < zone->uz_max_items)
3447bb15d1c7SGleb Smirnoff 			wakeup_one(zone);
3448bb15d1c7SGleb Smirnoff 		ZONE_UNLOCK(zone);
34490095a784SJeff Roberson 	}
3450bb45b411SGleb Smirnoff }
34510095a784SJeff Roberson 
34528355f576SJeff Roberson /* See uma.h */
34531c6cae97SLawrence Stewart int
3454736ee590SJeff Roberson uma_zone_set_max(uma_zone_t zone, int nitems)
3455736ee590SJeff Roberson {
3456bb15d1c7SGleb Smirnoff 	struct uma_bucket_zone *ubz;
3457099a0e58SBosko Milekic 
3458bb15d1c7SGleb Smirnoff 	/*
3459bb15d1c7SGleb Smirnoff 	 * If limit is very low we may need to limit how
3460bb15d1c7SGleb Smirnoff 	 * much items are allowed in CPU caches.
3461bb15d1c7SGleb Smirnoff 	 */
3462bb15d1c7SGleb Smirnoff 	ubz = &bucket_zones[0];
3463bb15d1c7SGleb Smirnoff 	for (; ubz->ubz_entries != 0; ubz++)
3464bb15d1c7SGleb Smirnoff 		if (ubz->ubz_entries * 2 * mp_ncpus > nitems)
3465bb15d1c7SGleb Smirnoff 			break;
3466bb15d1c7SGleb Smirnoff 	if (ubz == &bucket_zones[0])
3467bb15d1c7SGleb Smirnoff 		nitems = ubz->ubz_entries * 2 * mp_ncpus;
3468bb15d1c7SGleb Smirnoff 	else
3469bb15d1c7SGleb Smirnoff 		ubz--;
3470bb15d1c7SGleb Smirnoff 
3471bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3472bb15d1c7SGleb Smirnoff 	zone->uz_count_max = zone->uz_count = ubz->ubz_entries;
3473bb15d1c7SGleb Smirnoff 	if (zone->uz_count_min > zone->uz_count_max)
3474bb15d1c7SGleb Smirnoff 		zone->uz_count_min = zone->uz_count_max;
3475bb15d1c7SGleb Smirnoff 	zone->uz_max_items = nitems;
3476bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
3477bb15d1c7SGleb Smirnoff 
3478bb15d1c7SGleb Smirnoff 	return (nitems);
3479bb15d1c7SGleb Smirnoff }
3480bb15d1c7SGleb Smirnoff 
3481bb15d1c7SGleb Smirnoff /* See uma.h */
3482bb15d1c7SGleb Smirnoff int
3483bb15d1c7SGleb Smirnoff uma_zone_set_maxcache(uma_zone_t zone, int nitems)
3484bb15d1c7SGleb Smirnoff {
3485bb15d1c7SGleb Smirnoff 
3486bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3487bb15d1c7SGleb Smirnoff 	zone->uz_bkt_max = nitems;
3488bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
34891c6cae97SLawrence Stewart 
34901c6cae97SLawrence Stewart 	return (nitems);
3491736ee590SJeff Roberson }
3492736ee590SJeff Roberson 
3493736ee590SJeff Roberson /* See uma.h */
3494e49471b0SAndre Oppermann int
3495e49471b0SAndre Oppermann uma_zone_get_max(uma_zone_t zone)
3496e49471b0SAndre Oppermann {
3497e49471b0SAndre Oppermann 	int nitems;
3498e49471b0SAndre Oppermann 
3499bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3500bb15d1c7SGleb Smirnoff 	nitems = zone->uz_max_items;
3501bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
3502e49471b0SAndre Oppermann 
3503e49471b0SAndre Oppermann 	return (nitems);
3504e49471b0SAndre Oppermann }
3505e49471b0SAndre Oppermann 
3506e49471b0SAndre Oppermann /* See uma.h */
35072f891cd5SPawel Jakub Dawidek void
35082f891cd5SPawel Jakub Dawidek uma_zone_set_warning(uma_zone_t zone, const char *warning)
35092f891cd5SPawel Jakub Dawidek {
35102f891cd5SPawel Jakub Dawidek 
35112f891cd5SPawel Jakub Dawidek 	ZONE_LOCK(zone);
35122f891cd5SPawel Jakub Dawidek 	zone->uz_warning = warning;
35132f891cd5SPawel Jakub Dawidek 	ZONE_UNLOCK(zone);
35142f891cd5SPawel Jakub Dawidek }
35152f891cd5SPawel Jakub Dawidek 
35162f891cd5SPawel Jakub Dawidek /* See uma.h */
351754503a13SJonathan T. Looney void
351854503a13SJonathan T. Looney uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
351954503a13SJonathan T. Looney {
352054503a13SJonathan T. Looney 
352154503a13SJonathan T. Looney 	ZONE_LOCK(zone);
3522e60b2fcbSGleb Smirnoff 	TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
352354503a13SJonathan T. Looney 	ZONE_UNLOCK(zone);
352454503a13SJonathan T. Looney }
352554503a13SJonathan T. Looney 
352654503a13SJonathan T. Looney /* See uma.h */
3527c4ae7908SLawrence Stewart int
3528c4ae7908SLawrence Stewart uma_zone_get_cur(uma_zone_t zone)
3529c4ae7908SLawrence Stewart {
3530c4ae7908SLawrence Stewart 	int64_t nitems;
3531c4ae7908SLawrence Stewart 	u_int i;
3532c4ae7908SLawrence Stewart 
3533c4ae7908SLawrence Stewart 	ZONE_LOCK(zone);
35342efcc8cbSGleb Smirnoff 	nitems = counter_u64_fetch(zone->uz_allocs) -
35352efcc8cbSGleb Smirnoff 	    counter_u64_fetch(zone->uz_frees);
3536c4ae7908SLawrence Stewart 	CPU_FOREACH(i) {
3537c4ae7908SLawrence Stewart 		/*
35384a9f6ba7SGleb Smirnoff 		 * See the comment in uma_vm_zone_stats() regarding the
3539c4ae7908SLawrence Stewart 		 * safety of accessing the per-cpu caches. With the zone lock
3540c4ae7908SLawrence Stewart 		 * held, it is safe, but can potentially result in stale data.
3541c4ae7908SLawrence Stewart 		 */
3542c4ae7908SLawrence Stewart 		nitems += zone->uz_cpu[i].uc_allocs -
3543c4ae7908SLawrence Stewart 		    zone->uz_cpu[i].uc_frees;
3544c4ae7908SLawrence Stewart 	}
3545c4ae7908SLawrence Stewart 	ZONE_UNLOCK(zone);
3546c4ae7908SLawrence Stewart 
3547c4ae7908SLawrence Stewart 	return (nitems < 0 ? 0 : nitems);
3548c4ae7908SLawrence Stewart }
3549c4ae7908SLawrence Stewart 
3550c4ae7908SLawrence Stewart /* See uma.h */
3551736ee590SJeff Roberson void
3552099a0e58SBosko Milekic uma_zone_set_init(uma_zone_t zone, uma_init uminit)
3553099a0e58SBosko Milekic {
3554e20a199fSJeff Roberson 	uma_keg_t keg;
3555e20a199fSJeff Roberson 
3556bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
3557af526374SJeff Roberson 	KEG_LOCK(keg);
3558e20a199fSJeff Roberson 	KASSERT(keg->uk_pages == 0,
3559099a0e58SBosko Milekic 	    ("uma_zone_set_init on non-empty keg"));
3560e20a199fSJeff Roberson 	keg->uk_init = uminit;
3561af526374SJeff Roberson 	KEG_UNLOCK(keg);
3562099a0e58SBosko Milekic }
3563099a0e58SBosko Milekic 
3564099a0e58SBosko Milekic /* See uma.h */
3565099a0e58SBosko Milekic void
3566099a0e58SBosko Milekic uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
3567099a0e58SBosko Milekic {
3568e20a199fSJeff Roberson 	uma_keg_t keg;
3569e20a199fSJeff Roberson 
3570bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
3571af526374SJeff Roberson 	KEG_LOCK(keg);
3572e20a199fSJeff Roberson 	KASSERT(keg->uk_pages == 0,
3573099a0e58SBosko Milekic 	    ("uma_zone_set_fini on non-empty keg"));
3574e20a199fSJeff Roberson 	keg->uk_fini = fini;
3575af526374SJeff Roberson 	KEG_UNLOCK(keg);
3576099a0e58SBosko Milekic }
3577099a0e58SBosko Milekic 
3578099a0e58SBosko Milekic /* See uma.h */
3579099a0e58SBosko Milekic void
3580099a0e58SBosko Milekic uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
3581099a0e58SBosko Milekic {
3582af526374SJeff Roberson 
3583099a0e58SBosko Milekic 	ZONE_LOCK(zone);
3584bb15d1c7SGleb Smirnoff 	KASSERT(zone->uz_keg->uk_pages == 0,
3585099a0e58SBosko Milekic 	    ("uma_zone_set_zinit on non-empty keg"));
3586099a0e58SBosko Milekic 	zone->uz_init = zinit;
3587099a0e58SBosko Milekic 	ZONE_UNLOCK(zone);
3588099a0e58SBosko Milekic }
3589099a0e58SBosko Milekic 
3590099a0e58SBosko Milekic /* See uma.h */
3591099a0e58SBosko Milekic void
3592099a0e58SBosko Milekic uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
3593099a0e58SBosko Milekic {
3594af526374SJeff Roberson 
3595099a0e58SBosko Milekic 	ZONE_LOCK(zone);
3596bb15d1c7SGleb Smirnoff 	KASSERT(zone->uz_keg->uk_pages == 0,
3597099a0e58SBosko Milekic 	    ("uma_zone_set_zfini on non-empty keg"));
3598099a0e58SBosko Milekic 	zone->uz_fini = zfini;
3599099a0e58SBosko Milekic 	ZONE_UNLOCK(zone);
3600099a0e58SBosko Milekic }
3601099a0e58SBosko Milekic 
3602099a0e58SBosko Milekic /* See uma.h */
3603b23f72e9SBrian Feldman /* XXX uk_freef is not actually used with the zone locked */
3604099a0e58SBosko Milekic void
36058355f576SJeff Roberson uma_zone_set_freef(uma_zone_t zone, uma_free freef)
36068355f576SJeff Roberson {
36070095a784SJeff Roberson 	uma_keg_t keg;
3608e20a199fSJeff Roberson 
3609bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
36101d2c0c46SDmitry Chagin 	KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
3611af526374SJeff Roberson 	KEG_LOCK(keg);
36120095a784SJeff Roberson 	keg->uk_freef = freef;
3613af526374SJeff Roberson 	KEG_UNLOCK(keg);
36148355f576SJeff Roberson }
36158355f576SJeff Roberson 
36168355f576SJeff Roberson /* See uma.h */
3617b23f72e9SBrian Feldman /* XXX uk_allocf is not actually used with the zone locked */
36188355f576SJeff Roberson void
36198355f576SJeff Roberson uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
36208355f576SJeff Roberson {
3621e20a199fSJeff Roberson 	uma_keg_t keg;
3622e20a199fSJeff Roberson 
3623bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
3624af526374SJeff Roberson 	KEG_LOCK(keg);
3625e20a199fSJeff Roberson 	keg->uk_allocf = allocf;
3626af526374SJeff Roberson 	KEG_UNLOCK(keg);
36278355f576SJeff Roberson }
36288355f576SJeff Roberson 
36298355f576SJeff Roberson /* See uma.h */
36306fd34d6fSJeff Roberson void
36316fd34d6fSJeff Roberson uma_zone_reserve(uma_zone_t zone, int items)
36326fd34d6fSJeff Roberson {
36336fd34d6fSJeff Roberson 	uma_keg_t keg;
36346fd34d6fSJeff Roberson 
3635bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
36366fd34d6fSJeff Roberson 	KEG_LOCK(keg);
36376fd34d6fSJeff Roberson 	keg->uk_reserve = items;
36386fd34d6fSJeff Roberson 	KEG_UNLOCK(keg);
36396fd34d6fSJeff Roberson }
36406fd34d6fSJeff Roberson 
36416fd34d6fSJeff Roberson /* See uma.h */
36428355f576SJeff Roberson int
3643a4915c21SAttilio Rao uma_zone_reserve_kva(uma_zone_t zone, int count)
36448355f576SJeff Roberson {
3645099a0e58SBosko Milekic 	uma_keg_t keg;
36468355f576SJeff Roberson 	vm_offset_t kva;
36479ba30bcbSZbigniew Bodek 	u_int pages;
36488355f576SJeff Roberson 
3649bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
36508355f576SJeff Roberson 
3651bb15d1c7SGleb Smirnoff 	pages = count / keg->uk_ipers;
3652099a0e58SBosko Milekic 	if (pages * keg->uk_ipers < count)
36538355f576SJeff Roberson 		pages++;
365457223e99SAndriy Gapon 	pages *= keg->uk_ppera;
3655a553d4b8SJeff Roberson 
3656a4915c21SAttilio Rao #ifdef UMA_MD_SMALL_ALLOC
3657a4915c21SAttilio Rao 	if (keg->uk_ppera > 1) {
3658a4915c21SAttilio Rao #else
3659a4915c21SAttilio Rao 	if (1) {
3660a4915c21SAttilio Rao #endif
366157223e99SAndriy Gapon 		kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
3662d1f42ac2SAlan Cox 		if (kva == 0)
36638355f576SJeff Roberson 			return (0);
3664a4915c21SAttilio Rao 	} else
3665a4915c21SAttilio Rao 		kva = 0;
3666bb15d1c7SGleb Smirnoff 
3667bb15d1c7SGleb Smirnoff 	ZONE_LOCK(zone);
3668bb15d1c7SGleb Smirnoff 	MPASS(keg->uk_kva == 0);
3669099a0e58SBosko Milekic 	keg->uk_kva = kva;
3670a4915c21SAttilio Rao 	keg->uk_offset = 0;
3671bb15d1c7SGleb Smirnoff 	zone->uz_max_items = pages * keg->uk_ipers;
3672a4915c21SAttilio Rao #ifdef UMA_MD_SMALL_ALLOC
3673a4915c21SAttilio Rao 	keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
3674a4915c21SAttilio Rao #else
3675a4915c21SAttilio Rao 	keg->uk_allocf = noobj_alloc;
3676a4915c21SAttilio Rao #endif
36776fd34d6fSJeff Roberson 	keg->uk_flags |= UMA_ZONE_NOFREE;
3678bb15d1c7SGleb Smirnoff 	ZONE_UNLOCK(zone);
3679af526374SJeff Roberson 
36808355f576SJeff Roberson 	return (1);
36818355f576SJeff Roberson }
36828355f576SJeff Roberson 
36838355f576SJeff Roberson /* See uma.h */
36848355f576SJeff Roberson void
36858355f576SJeff Roberson uma_prealloc(uma_zone_t zone, int items)
36868355f576SJeff Roberson {
3687920239efSMark Johnston 	struct vm_domainset_iter di;
3688ab3185d1SJeff Roberson 	uma_domain_t dom;
36898355f576SJeff Roberson 	uma_slab_t slab;
3690099a0e58SBosko Milekic 	uma_keg_t keg;
369186220393SMark Johnston 	int aflags, domain, slabs;
36928355f576SJeff Roberson 
3693bb15d1c7SGleb Smirnoff 	KEG_GET(zone, keg);
3694af526374SJeff Roberson 	KEG_LOCK(keg);
3695099a0e58SBosko Milekic 	slabs = items / keg->uk_ipers;
3696099a0e58SBosko Milekic 	if (slabs * keg->uk_ipers < items)
36978355f576SJeff Roberson 		slabs++;
3698194a979eSMark Johnston 	while (slabs-- > 0) {
369986220393SMark Johnston 		aflags = M_NOWAIT;
370086220393SMark Johnston 		vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
370186220393SMark Johnston 		    &aflags);
370286220393SMark Johnston 		for (;;) {
370386220393SMark Johnston 			slab = keg_alloc_slab(keg, zone, domain, M_WAITOK,
370486220393SMark Johnston 			    aflags);
370586220393SMark Johnston 			if (slab != NULL) {
3706e20a199fSJeff Roberson 				MPASS(slab->us_keg == keg);
3707ab3185d1SJeff Roberson 				dom = &keg->uk_domain[slab->us_domain];
370886220393SMark Johnston 				LIST_INSERT_HEAD(&dom->ud_free_slab, slab,
370986220393SMark Johnston 				    us_link);
3710920239efSMark Johnston 				break;
37118355f576SJeff Roberson 			}
371286220393SMark Johnston 			KEG_LOCK(keg);
371386220393SMark Johnston 			if (vm_domainset_iter_policy(&di, &domain) != 0) {
371486220393SMark Johnston 				KEG_UNLOCK(keg);
371586220393SMark Johnston 				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
371686220393SMark Johnston 				KEG_LOCK(keg);
371786220393SMark Johnston 			}
371886220393SMark Johnston 		}
371986220393SMark Johnston 	}
3720af526374SJeff Roberson 	KEG_UNLOCK(keg);
37218355f576SJeff Roberson }
37228355f576SJeff Roberson 
37238355f576SJeff Roberson /* See uma.h */
3724*08cfa56eSMark Johnston void
3725*08cfa56eSMark Johnston uma_reclaim(int req)
37268355f576SJeff Roberson {
372744ec2b63SKonstantin Belousov 
37281431a748SGleb Smirnoff 	CTR0(KTR_UMA, "UMA: vm asked us to release pages!");
3729*08cfa56eSMark Johnston 	sx_xlock(&uma_reclaim_lock);
373086bbae32SJeff Roberson 	bucket_enable();
3731*08cfa56eSMark Johnston 
3732*08cfa56eSMark Johnston 	switch (req) {
3733*08cfa56eSMark Johnston 	case UMA_RECLAIM_TRIM:
3734*08cfa56eSMark Johnston 		zone_foreach(zone_trim);
3735*08cfa56eSMark Johnston 		break;
3736*08cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN:
3737*08cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN_CPU:
37388355f576SJeff Roberson 		zone_foreach(zone_drain);
3739*08cfa56eSMark Johnston 		if (req == UMA_RECLAIM_DRAIN_CPU) {
3740*08cfa56eSMark Johnston 			pcpu_cache_drain_safe(NULL);
3741a2de44abSAlexander Motin 			zone_foreach(zone_drain);
3742a2de44abSAlexander Motin 		}
3743*08cfa56eSMark Johnston 		break;
3744*08cfa56eSMark Johnston 	default:
3745*08cfa56eSMark Johnston 		panic("unhandled reclamation request %d", req);
3746*08cfa56eSMark Johnston 	}
37470f9b7bf3SMark Johnston 
37488355f576SJeff Roberson 	/*
37498355f576SJeff Roberson 	 * Some slabs may have been freed but this zone will be visited early
37508355f576SJeff Roberson 	 * we visit again so that we can free pages that are empty once other
37518355f576SJeff Roberson 	 * zones are drained.  We have to do the same for buckets.
37528355f576SJeff Roberson 	 */
37539643769aSJeff Roberson 	zone_drain(slabzone);
3754cae33c14SJeff Roberson 	bucket_zone_drain();
3755*08cfa56eSMark Johnston 	sx_xunlock(&uma_reclaim_lock);
37568355f576SJeff Roberson }
37578355f576SJeff Roberson 
37582e47807cSJeff Roberson static volatile int uma_reclaim_needed;
375944ec2b63SKonstantin Belousov 
376044ec2b63SKonstantin Belousov void
376144ec2b63SKonstantin Belousov uma_reclaim_wakeup(void)
376244ec2b63SKonstantin Belousov {
376344ec2b63SKonstantin Belousov 
37642e47807cSJeff Roberson 	if (atomic_fetchadd_int(&uma_reclaim_needed, 1) == 0)
37652e47807cSJeff Roberson 		wakeup(uma_reclaim);
376644ec2b63SKonstantin Belousov }
376744ec2b63SKonstantin Belousov 
376844ec2b63SKonstantin Belousov void
376944ec2b63SKonstantin Belousov uma_reclaim_worker(void *arg __unused)
377044ec2b63SKonstantin Belousov {
377144ec2b63SKonstantin Belousov 
377244ec2b63SKonstantin Belousov 	for (;;) {
3773*08cfa56eSMark Johnston 		sx_xlock(&uma_reclaim_lock);
3774200f8117SKonstantin Belousov 		while (atomic_load_int(&uma_reclaim_needed) == 0)
3775*08cfa56eSMark Johnston 			sx_sleep(uma_reclaim, &uma_reclaim_lock, PVM, "umarcl",
37762e47807cSJeff Roberson 			    hz);
3777*08cfa56eSMark Johnston 		sx_xunlock(&uma_reclaim_lock);
37789b43bc27SAndriy Gapon 		EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
3779*08cfa56eSMark Johnston 		uma_reclaim(UMA_RECLAIM_DRAIN_CPU);
3780200f8117SKonstantin Belousov 		atomic_store_int(&uma_reclaim_needed, 0);
37812e47807cSJeff Roberson 		/* Don't fire more than once per-second. */
37822e47807cSJeff Roberson 		pause("umarclslp", hz);
378344ec2b63SKonstantin Belousov 	}
378444ec2b63SKonstantin Belousov }
378544ec2b63SKonstantin Belousov 
3786663b416fSJohn Baldwin /* See uma.h */
3787*08cfa56eSMark Johnston void
3788*08cfa56eSMark Johnston uma_zone_reclaim(uma_zone_t zone, int req)
3789*08cfa56eSMark Johnston {
3790*08cfa56eSMark Johnston 
3791*08cfa56eSMark Johnston 	switch (req) {
3792*08cfa56eSMark Johnston 	case UMA_RECLAIM_TRIM:
3793*08cfa56eSMark Johnston 		zone_trim(zone);
3794*08cfa56eSMark Johnston 		break;
3795*08cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN:
3796*08cfa56eSMark Johnston 		zone_drain(zone);
3797*08cfa56eSMark Johnston 		break;
3798*08cfa56eSMark Johnston 	case UMA_RECLAIM_DRAIN_CPU:
3799*08cfa56eSMark Johnston 		pcpu_cache_drain_safe(zone);
3800*08cfa56eSMark Johnston 		zone_drain(zone);
3801*08cfa56eSMark Johnston 		break;
3802*08cfa56eSMark Johnston 	default:
3803*08cfa56eSMark Johnston 		panic("unhandled reclamation request %d", req);
3804*08cfa56eSMark Johnston 	}
3805*08cfa56eSMark Johnston }
3806*08cfa56eSMark Johnston 
3807*08cfa56eSMark Johnston /* See uma.h */
3808663b416fSJohn Baldwin int
3809663b416fSJohn Baldwin uma_zone_exhausted(uma_zone_t zone)
3810663b416fSJohn Baldwin {
3811663b416fSJohn Baldwin 	int full;
3812663b416fSJohn Baldwin 
3813663b416fSJohn Baldwin 	ZONE_LOCK(zone);
3814bb15d1c7SGleb Smirnoff 	full = zone->uz_sleepers > 0;
3815663b416fSJohn Baldwin 	ZONE_UNLOCK(zone);
3816663b416fSJohn Baldwin 	return (full);
3817663b416fSJohn Baldwin }
3818663b416fSJohn Baldwin 
38196c125b8dSMohan Srinivasan int
38206c125b8dSMohan Srinivasan uma_zone_exhausted_nolock(uma_zone_t zone)
38216c125b8dSMohan Srinivasan {
3822bb15d1c7SGleb Smirnoff 	return (zone->uz_sleepers > 0);
38236c125b8dSMohan Srinivasan }
38246c125b8dSMohan Srinivasan 
38258355f576SJeff Roberson void *
3826ab3185d1SJeff Roberson uma_large_malloc_domain(vm_size_t size, int domain, int wait)
38278355f576SJeff Roberson {
38289978bd99SMark Johnston 	struct domainset *policy;
3829ab3185d1SJeff Roberson 	vm_offset_t addr;
38308355f576SJeff Roberson 	uma_slab_t slab;
38318355f576SJeff Roberson 
383230c5525bSAndrew Gallatin 	if (domain != UMA_ANYDOMAIN) {
383330c5525bSAndrew Gallatin 		/* avoid allocs targeting empty domains */
383430c5525bSAndrew Gallatin 		if (VM_DOMAIN_EMPTY(domain))
383530c5525bSAndrew Gallatin 			domain = UMA_ANYDOMAIN;
383630c5525bSAndrew Gallatin 	}
3837ab3185d1SJeff Roberson 	slab = zone_alloc_item(slabzone, NULL, domain, wait);
38388355f576SJeff Roberson 	if (slab == NULL)
38398355f576SJeff Roberson 		return (NULL);
38409978bd99SMark Johnston 	policy = (domain == UMA_ANYDOMAIN) ? DOMAINSET_RR() :
38419978bd99SMark Johnston 	    DOMAINSET_FIXED(domain);
38429978bd99SMark Johnston 	addr = kmem_malloc_domainset(policy, size, wait);
3843ab3185d1SJeff Roberson 	if (addr != 0) {
3844ab3185d1SJeff Roberson 		vsetslab(addr, slab);
3845ab3185d1SJeff Roberson 		slab->us_data = (void *)addr;
3846ab3185d1SJeff Roberson 		slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC;
38478355f576SJeff Roberson 		slab->us_size = size;
3848e2068d0bSJeff Roberson 		slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE(
3849ab3185d1SJeff Roberson 		    pmap_kextract(addr)));
38502e47807cSJeff Roberson 		uma_total_inc(size);
38518355f576SJeff Roberson 	} else {
38520095a784SJeff Roberson 		zone_free_item(slabzone, slab, NULL, SKIP_NONE);
38538355f576SJeff Roberson 	}
38548355f576SJeff Roberson 
3855ab3185d1SJeff Roberson 	return ((void *)addr);
3856ab3185d1SJeff Roberson }
3857ab3185d1SJeff Roberson 
3858ab3185d1SJeff Roberson void *
3859ab3185d1SJeff Roberson uma_large_malloc(vm_size_t size, int wait)
3860ab3185d1SJeff Roberson {
3861ab3185d1SJeff Roberson 
3862ab3185d1SJeff Roberson 	return uma_large_malloc_domain(size, UMA_ANYDOMAIN, wait);
38638355f576SJeff Roberson }
38648355f576SJeff Roberson 
38658355f576SJeff Roberson void
38668355f576SJeff Roberson uma_large_free(uma_slab_t slab)
38678355f576SJeff Roberson {
3868c325e866SKonstantin Belousov 
3869ab3185d1SJeff Roberson 	KASSERT((slab->us_flags & UMA_SLAB_KERNEL) != 0,
3870ab3185d1SJeff Roberson 	    ("uma_large_free:  Memory not allocated with uma_large_malloc."));
387149bfa624SAlan Cox 	kmem_free((vm_offset_t)slab->us_data, slab->us_size);
38722e47807cSJeff Roberson 	uma_total_dec(slab->us_size);
38730095a784SJeff Roberson 	zone_free_item(slabzone, slab, NULL, SKIP_NONE);
38748355f576SJeff Roberson }
38758355f576SJeff Roberson 
387648343a2fSGleb Smirnoff static void
387748343a2fSGleb Smirnoff uma_zero_item(void *item, uma_zone_t zone)
387848343a2fSGleb Smirnoff {
387948343a2fSGleb Smirnoff 
388048343a2fSGleb Smirnoff 	bzero(item, zone->uz_size);
388148343a2fSGleb Smirnoff }
388248343a2fSGleb Smirnoff 
38832e47807cSJeff Roberson unsigned long
38842e47807cSJeff Roberson uma_limit(void)
38852e47807cSJeff Roberson {
38862e47807cSJeff Roberson 
38872e47807cSJeff Roberson 	return (uma_kmem_limit);
38882e47807cSJeff Roberson }
38892e47807cSJeff Roberson 
38902e47807cSJeff Roberson void
38912e47807cSJeff Roberson uma_set_limit(unsigned long limit)
38922e47807cSJeff Roberson {
38932e47807cSJeff Roberson 
38942e47807cSJeff Roberson 	uma_kmem_limit = limit;
38952e47807cSJeff Roberson }
38962e47807cSJeff Roberson 
38972e47807cSJeff Roberson unsigned long
38982e47807cSJeff Roberson uma_size(void)
38992e47807cSJeff Roberson {
39002e47807cSJeff Roberson 
3901058f0f74SMark Johnston 	return (atomic_load_long(&uma_kmem_total));
3902ad5b0f5bSJeff Roberson }
3903ad5b0f5bSJeff Roberson 
3904ad5b0f5bSJeff Roberson long
3905ad5b0f5bSJeff Roberson uma_avail(void)
3906ad5b0f5bSJeff Roberson {
3907ad5b0f5bSJeff Roberson 
3908058f0f74SMark Johnston 	return (uma_kmem_limit - uma_size());
39092e47807cSJeff Roberson }
39102e47807cSJeff Roberson 
39118355f576SJeff Roberson void
39128355f576SJeff Roberson uma_print_stats(void)
39138355f576SJeff Roberson {
39148355f576SJeff Roberson 	zone_foreach(uma_print_zone);
39158355f576SJeff Roberson }
39168355f576SJeff Roberson 
3917504d5de3SJeff Roberson static void
3918504d5de3SJeff Roberson slab_print(uma_slab_t slab)
3919504d5de3SJeff Roberson {
3920ef72505eSJeff Roberson 	printf("slab: keg %p, data %p, freecount %d\n",
3921ef72505eSJeff Roberson 		slab->us_keg, slab->us_data, slab->us_freecount);
3922504d5de3SJeff Roberson }
3923504d5de3SJeff Roberson 
3924504d5de3SJeff Roberson static void
3925504d5de3SJeff Roberson cache_print(uma_cache_t cache)
3926504d5de3SJeff Roberson {
3927c1685086SJeff Roberson 	printf("alloc: %p(%d), free: %p(%d), cross: %p(%d)j\n",
3928504d5de3SJeff Roberson 		cache->uc_allocbucket,
3929504d5de3SJeff Roberson 		cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3930504d5de3SJeff Roberson 		cache->uc_freebucket,
3931c1685086SJeff Roberson 		cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0,
3932c1685086SJeff Roberson 		cache->uc_crossbucket,
3933c1685086SJeff Roberson 		cache->uc_crossbucket?cache->uc_crossbucket->ub_cnt:0);
3934504d5de3SJeff Roberson }
3935504d5de3SJeff Roberson 
3936e20a199fSJeff Roberson static void
3937e20a199fSJeff Roberson uma_print_keg(uma_keg_t keg)
39388355f576SJeff Roberson {
3939ab3185d1SJeff Roberson 	uma_domain_t dom;
3940504d5de3SJeff Roberson 	uma_slab_t slab;
3941ab3185d1SJeff Roberson 	int i;
3942504d5de3SJeff Roberson 
39430b80c1e4SEitan Adler 	printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3944bb15d1c7SGleb Smirnoff 	    "out %d free %d\n",
3945e20a199fSJeff Roberson 	    keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3946099a0e58SBosko Milekic 	    keg->uk_ipers, keg->uk_ppera,
394757223e99SAndriy Gapon 	    (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
3948bb15d1c7SGleb Smirnoff 	    keg->uk_free);
3949ab3185d1SJeff Roberson 	for (i = 0; i < vm_ndomains; i++) {
3950ab3185d1SJeff Roberson 		dom = &keg->uk_domain[i];
3951504d5de3SJeff Roberson 		printf("Part slabs:\n");
3952ab3185d1SJeff Roberson 		LIST_FOREACH(slab, &dom->ud_part_slab, us_link)
3953504d5de3SJeff Roberson 			slab_print(slab);
3954504d5de3SJeff Roberson 		printf("Free slabs:\n");
3955ab3185d1SJeff Roberson 		LIST_FOREACH(slab, &dom->ud_free_slab, us_link)
3956504d5de3SJeff Roberson 			slab_print(slab);
3957504d5de3SJeff Roberson 		printf("Full slabs:\n");
3958ab3185d1SJeff Roberson 		LIST_FOREACH(slab, &dom->ud_full_slab, us_link)
3959504d5de3SJeff Roberson 			slab_print(slab);
3960e20a199fSJeff Roberson 	}
3961ab3185d1SJeff Roberson }
3962e20a199fSJeff Roberson 
3963e20a199fSJeff Roberson void
3964e20a199fSJeff Roberson uma_print_zone(uma_zone_t zone)
3965e20a199fSJeff Roberson {
3966e20a199fSJeff Roberson 	uma_cache_t cache;
3967e20a199fSJeff Roberson 	int i;
3968e20a199fSJeff Roberson 
39695a8eee2bSGleb Smirnoff 	printf("zone: %s(%p) size %d maxitems %ju flags %#x\n",
39705a8eee2bSGleb Smirnoff 	    zone->uz_name, zone, zone->uz_size, (uintmax_t)zone->uz_max_items,
3971bb15d1c7SGleb Smirnoff 	    zone->uz_flags);
3972bb15d1c7SGleb Smirnoff 	if (zone->uz_lockptr != &zone->uz_lock)
3973bb15d1c7SGleb Smirnoff 		uma_print_keg(zone->uz_keg);
39743aa6d94eSJohn Baldwin 	CPU_FOREACH(i) {
3975504d5de3SJeff Roberson 		cache = &zone->uz_cpu[i];
3976504d5de3SJeff Roberson 		printf("CPU %d Cache:\n", i);
3977504d5de3SJeff Roberson 		cache_print(cache);
3978504d5de3SJeff Roberson 	}
39798355f576SJeff Roberson }
39808355f576SJeff Roberson 
3981a0d4b0aeSRobert Watson #ifdef DDB
39828355f576SJeff Roberson /*
39837a52a97eSRobert Watson  * Generate statistics across both the zone and its per-cpu cache's.  Return
39847a52a97eSRobert Watson  * desired statistics if the pointer is non-NULL for that statistic.
39857a52a97eSRobert Watson  *
39867a52a97eSRobert Watson  * Note: does not update the zone statistics, as it can't safely clear the
39877a52a97eSRobert Watson  * per-CPU cache statistic.
39887a52a97eSRobert Watson  *
39897a52a97eSRobert Watson  * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
39907a52a97eSRobert Watson  * safe from off-CPU; we should modify the caches to track this information
39917a52a97eSRobert Watson  * directly so that we don't have to.
39927a52a97eSRobert Watson  */
39937a52a97eSRobert Watson static void
39940f9b7bf3SMark Johnston uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
3995c1685086SJeff Roberson     uint64_t *freesp, uint64_t *sleepsp, uint64_t *xdomainp)
39967a52a97eSRobert Watson {
39977a52a97eSRobert Watson 	uma_cache_t cache;
3998c1685086SJeff Roberson 	uint64_t allocs, frees, sleeps, xdomain;
39997a52a97eSRobert Watson 	int cachefree, cpu;
40007a52a97eSRobert Watson 
4001c1685086SJeff Roberson 	allocs = frees = sleeps = xdomain = 0;
40027a52a97eSRobert Watson 	cachefree = 0;
40033aa6d94eSJohn Baldwin 	CPU_FOREACH(cpu) {
40047a52a97eSRobert Watson 		cache = &z->uz_cpu[cpu];
40057a52a97eSRobert Watson 		if (cache->uc_allocbucket != NULL)
40067a52a97eSRobert Watson 			cachefree += cache->uc_allocbucket->ub_cnt;
40077a52a97eSRobert Watson 		if (cache->uc_freebucket != NULL)
40087a52a97eSRobert Watson 			cachefree += cache->uc_freebucket->ub_cnt;
4009c1685086SJeff Roberson 		if (cache->uc_crossbucket != NULL) {
4010c1685086SJeff Roberson 			xdomain += cache->uc_crossbucket->ub_cnt;
4011c1685086SJeff Roberson 			cachefree += cache->uc_crossbucket->ub_cnt;
4012c1685086SJeff Roberson 		}
40137a52a97eSRobert Watson 		allocs += cache->uc_allocs;
40147a52a97eSRobert Watson 		frees += cache->uc_frees;
40157a52a97eSRobert Watson 	}
40162efcc8cbSGleb Smirnoff 	allocs += counter_u64_fetch(z->uz_allocs);
40172efcc8cbSGleb Smirnoff 	frees += counter_u64_fetch(z->uz_frees);
4018bf965959SSean Bruno 	sleeps += z->uz_sleeps;
4019c1685086SJeff Roberson 	xdomain += z->uz_xdomain;
40207a52a97eSRobert Watson 	if (cachefreep != NULL)
40217a52a97eSRobert Watson 		*cachefreep = cachefree;
40227a52a97eSRobert Watson 	if (allocsp != NULL)
40237a52a97eSRobert Watson 		*allocsp = allocs;
40247a52a97eSRobert Watson 	if (freesp != NULL)
40257a52a97eSRobert Watson 		*freesp = frees;
4026bf965959SSean Bruno 	if (sleepsp != NULL)
4027bf965959SSean Bruno 		*sleepsp = sleeps;
4028c1685086SJeff Roberson 	if (xdomainp != NULL)
4029c1685086SJeff Roberson 		*xdomainp = xdomain;
40307a52a97eSRobert Watson }
4031a0d4b0aeSRobert Watson #endif /* DDB */
40327a52a97eSRobert Watson 
40337a52a97eSRobert Watson static int
40347a52a97eSRobert Watson sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
40357a52a97eSRobert Watson {
40367a52a97eSRobert Watson 	uma_keg_t kz;
40377a52a97eSRobert Watson 	uma_zone_t z;
40387a52a97eSRobert Watson 	int count;
40397a52a97eSRobert Watson 
40407a52a97eSRobert Watson 	count = 0;
4041111fbcd5SBryan Venteicher 	rw_rlock(&uma_rwlock);
40427a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
40437a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
40447a52a97eSRobert Watson 			count++;
40457a52a97eSRobert Watson 	}
4046b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link)
4047b47acb0aSGleb Smirnoff 		count++;
4048b47acb0aSGleb Smirnoff 
4049111fbcd5SBryan Venteicher 	rw_runlock(&uma_rwlock);
40507a52a97eSRobert Watson 	return (sysctl_handle_int(oidp, &count, 0, req));
40517a52a97eSRobert Watson }
40527a52a97eSRobert Watson 
4053b47acb0aSGleb Smirnoff static void
4054b47acb0aSGleb Smirnoff uma_vm_zone_stats(struct uma_type_header *uth, uma_zone_t z, struct sbuf *sbuf,
4055b47acb0aSGleb Smirnoff     struct uma_percpu_stat *ups, bool internal)
4056b47acb0aSGleb Smirnoff {
4057b47acb0aSGleb Smirnoff 	uma_zone_domain_t zdom;
4058b47acb0aSGleb Smirnoff 	uma_cache_t cache;
4059b47acb0aSGleb Smirnoff 	int i;
4060b47acb0aSGleb Smirnoff 
4061b47acb0aSGleb Smirnoff 
4062b47acb0aSGleb Smirnoff 	for (i = 0; i < vm_ndomains; i++) {
4063b47acb0aSGleb Smirnoff 		zdom = &z->uz_domain[i];
4064b47acb0aSGleb Smirnoff 		uth->uth_zone_free += zdom->uzd_nitems;
4065b47acb0aSGleb Smirnoff 	}
4066b47acb0aSGleb Smirnoff 	uth->uth_allocs = counter_u64_fetch(z->uz_allocs);
4067b47acb0aSGleb Smirnoff 	uth->uth_frees = counter_u64_fetch(z->uz_frees);
4068b47acb0aSGleb Smirnoff 	uth->uth_fails = counter_u64_fetch(z->uz_fails);
4069b47acb0aSGleb Smirnoff 	uth->uth_sleeps = z->uz_sleeps;
4070c1685086SJeff Roberson 	uth->uth_xdomain = z->uz_xdomain;
4071b47acb0aSGleb Smirnoff 	/*
4072b47acb0aSGleb Smirnoff 	 * While it is not normally safe to access the cache
4073b47acb0aSGleb Smirnoff 	 * bucket pointers while not on the CPU that owns the
4074b47acb0aSGleb Smirnoff 	 * cache, we only allow the pointers to be exchanged
4075b47acb0aSGleb Smirnoff 	 * without the zone lock held, not invalidated, so
4076b47acb0aSGleb Smirnoff 	 * accept the possible race associated with bucket
4077b47acb0aSGleb Smirnoff 	 * exchange during monitoring.
4078b47acb0aSGleb Smirnoff 	 */
4079b47acb0aSGleb Smirnoff 	for (i = 0; i < mp_maxid + 1; i++) {
4080b47acb0aSGleb Smirnoff 		bzero(&ups[i], sizeof(*ups));
4081b47acb0aSGleb Smirnoff 		if (internal || CPU_ABSENT(i))
4082b47acb0aSGleb Smirnoff 			continue;
4083b47acb0aSGleb Smirnoff 		cache = &z->uz_cpu[i];
4084b47acb0aSGleb Smirnoff 		if (cache->uc_allocbucket != NULL)
4085b47acb0aSGleb Smirnoff 			ups[i].ups_cache_free +=
4086b47acb0aSGleb Smirnoff 			    cache->uc_allocbucket->ub_cnt;
4087b47acb0aSGleb Smirnoff 		if (cache->uc_freebucket != NULL)
4088b47acb0aSGleb Smirnoff 			ups[i].ups_cache_free +=
4089b47acb0aSGleb Smirnoff 			    cache->uc_freebucket->ub_cnt;
4090c1685086SJeff Roberson 		if (cache->uc_crossbucket != NULL)
4091c1685086SJeff Roberson 			ups[i].ups_cache_free +=
4092c1685086SJeff Roberson 			    cache->uc_crossbucket->ub_cnt;
4093b47acb0aSGleb Smirnoff 		ups[i].ups_allocs = cache->uc_allocs;
4094b47acb0aSGleb Smirnoff 		ups[i].ups_frees = cache->uc_frees;
4095b47acb0aSGleb Smirnoff 	}
4096b47acb0aSGleb Smirnoff }
4097b47acb0aSGleb Smirnoff 
40987a52a97eSRobert Watson static int
40997a52a97eSRobert Watson sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
41007a52a97eSRobert Watson {
41017a52a97eSRobert Watson 	struct uma_stream_header ush;
41027a52a97eSRobert Watson 	struct uma_type_header uth;
410363b5d112SKonstantin Belousov 	struct uma_percpu_stat *ups;
41047a52a97eSRobert Watson 	struct sbuf sbuf;
41057a52a97eSRobert Watson 	uma_keg_t kz;
41067a52a97eSRobert Watson 	uma_zone_t z;
41074e657159SMatthew D Fleming 	int count, error, i;
41087a52a97eSRobert Watson 
410900f0e671SMatthew D Fleming 	error = sysctl_wire_old_buffer(req, 0);
411000f0e671SMatthew D Fleming 	if (error != 0)
411100f0e671SMatthew D Fleming 		return (error);
41124e657159SMatthew D Fleming 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
41131eafc078SIan Lepore 	sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
411463b5d112SKonstantin Belousov 	ups = malloc((mp_maxid + 1) * sizeof(*ups), M_TEMP, M_WAITOK);
41154e657159SMatthew D Fleming 
4116404a593eSMatthew D Fleming 	count = 0;
4117111fbcd5SBryan Venteicher 	rw_rlock(&uma_rwlock);
41187a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
41197a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link)
41207a52a97eSRobert Watson 			count++;
41217a52a97eSRobert Watson 	}
41227a52a97eSRobert Watson 
4123b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link)
4124b47acb0aSGleb Smirnoff 		count++;
4125b47acb0aSGleb Smirnoff 
41267a52a97eSRobert Watson 	/*
41277a52a97eSRobert Watson 	 * Insert stream header.
41287a52a97eSRobert Watson 	 */
41297a52a97eSRobert Watson 	bzero(&ush, sizeof(ush));
41307a52a97eSRobert Watson 	ush.ush_version = UMA_STREAM_VERSION;
4131ab3a57c0SRobert Watson 	ush.ush_maxcpus = (mp_maxid + 1);
41327a52a97eSRobert Watson 	ush.ush_count = count;
41334e657159SMatthew D Fleming 	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
41347a52a97eSRobert Watson 
41357a52a97eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
41367a52a97eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
41377a52a97eSRobert Watson 			bzero(&uth, sizeof(uth));
41387a52a97eSRobert Watson 			ZONE_LOCK(z);
4139cbbb4a00SRobert Watson 			strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
41407a52a97eSRobert Watson 			uth.uth_align = kz->uk_align;
41417a52a97eSRobert Watson 			uth.uth_size = kz->uk_size;
41427a52a97eSRobert Watson 			uth.uth_rsize = kz->uk_rsize;
4143bb45b411SGleb Smirnoff 			if (z->uz_max_items > 0)
4144bb45b411SGleb Smirnoff 				uth.uth_pages = (z->uz_items / kz->uk_ipers) *
4145bb15d1c7SGleb Smirnoff 					kz->uk_ppera;
4146bb45b411SGleb Smirnoff 			else
4147bb45b411SGleb Smirnoff 				uth.uth_pages = kz->uk_pages;
4148f8c86a5fSGleb Smirnoff 			uth.uth_maxpages = (z->uz_max_items / kz->uk_ipers) *
4149bb15d1c7SGleb Smirnoff 			    kz->uk_ppera;
4150bb15d1c7SGleb Smirnoff 			uth.uth_limit = z->uz_max_items;
4151f8c86a5fSGleb Smirnoff 			uth.uth_keg_free = z->uz_keg->uk_free;
4152cbbb4a00SRobert Watson 
4153cbbb4a00SRobert Watson 			/*
4154cbbb4a00SRobert Watson 			 * A zone is secondary is it is not the first entry
4155cbbb4a00SRobert Watson 			 * on the keg's zone list.
4156cbbb4a00SRobert Watson 			 */
4157e20a199fSJeff Roberson 			if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
4158cbbb4a00SRobert Watson 			    (LIST_FIRST(&kz->uk_zones) != z))
4159cbbb4a00SRobert Watson 				uth.uth_zone_flags = UTH_ZONE_SECONDARY;
4160b47acb0aSGleb Smirnoff 			uma_vm_zone_stats(&uth, z, &sbuf, ups,
4161b47acb0aSGleb Smirnoff 			    kz->uk_flags & UMA_ZFLAG_INTERNAL);
41622450bbb8SRobert Watson 			ZONE_UNLOCK(z);
416363b5d112SKonstantin Belousov 			(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
416463b5d112SKonstantin Belousov 			for (i = 0; i < mp_maxid + 1; i++)
416563b5d112SKonstantin Belousov 				(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
41667a52a97eSRobert Watson 		}
41677a52a97eSRobert Watson 	}
4168b47acb0aSGleb Smirnoff 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
4169b47acb0aSGleb Smirnoff 		bzero(&uth, sizeof(uth));
4170b47acb0aSGleb Smirnoff 		ZONE_LOCK(z);
4171b47acb0aSGleb Smirnoff 		strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
4172b47acb0aSGleb Smirnoff 		uth.uth_size = z->uz_size;
4173b47acb0aSGleb Smirnoff 		uma_vm_zone_stats(&uth, z, &sbuf, ups, false);
4174b47acb0aSGleb Smirnoff 		ZONE_UNLOCK(z);
4175b47acb0aSGleb Smirnoff 		(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
4176b47acb0aSGleb Smirnoff 		for (i = 0; i < mp_maxid + 1; i++)
4177b47acb0aSGleb Smirnoff 			(void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
4178b47acb0aSGleb Smirnoff 	}
4179b47acb0aSGleb Smirnoff 
4180111fbcd5SBryan Venteicher 	rw_runlock(&uma_rwlock);
41814e657159SMatthew D Fleming 	error = sbuf_finish(&sbuf);
41824e657159SMatthew D Fleming 	sbuf_delete(&sbuf);
418363b5d112SKonstantin Belousov 	free(ups, M_TEMP);
41847a52a97eSRobert Watson 	return (error);
41857a52a97eSRobert Watson }
418648c5777eSRobert Watson 
41870a5a3ccbSGleb Smirnoff int
41880a5a3ccbSGleb Smirnoff sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
41890a5a3ccbSGleb Smirnoff {
41900a5a3ccbSGleb Smirnoff 	uma_zone_t zone = *(uma_zone_t *)arg1;
419116be9f54SGleb Smirnoff 	int error, max;
41920a5a3ccbSGleb Smirnoff 
419316be9f54SGleb Smirnoff 	max = uma_zone_get_max(zone);
41940a5a3ccbSGleb Smirnoff 	error = sysctl_handle_int(oidp, &max, 0, req);
41950a5a3ccbSGleb Smirnoff 	if (error || !req->newptr)
41960a5a3ccbSGleb Smirnoff 		return (error);
41970a5a3ccbSGleb Smirnoff 
41980a5a3ccbSGleb Smirnoff 	uma_zone_set_max(zone, max);
41990a5a3ccbSGleb Smirnoff 
42000a5a3ccbSGleb Smirnoff 	return (0);
42010a5a3ccbSGleb Smirnoff }
42020a5a3ccbSGleb Smirnoff 
42030a5a3ccbSGleb Smirnoff int
42040a5a3ccbSGleb Smirnoff sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
42050a5a3ccbSGleb Smirnoff {
42060a5a3ccbSGleb Smirnoff 	uma_zone_t zone = *(uma_zone_t *)arg1;
42070a5a3ccbSGleb Smirnoff 	int cur;
42080a5a3ccbSGleb Smirnoff 
42090a5a3ccbSGleb Smirnoff 	cur = uma_zone_get_cur(zone);
42100a5a3ccbSGleb Smirnoff 	return (sysctl_handle_int(oidp, &cur, 0, req));
42110a5a3ccbSGleb Smirnoff }
42120a5a3ccbSGleb Smirnoff 
42139542ea7bSGleb Smirnoff #ifdef INVARIANTS
42149542ea7bSGleb Smirnoff static uma_slab_t
42159542ea7bSGleb Smirnoff uma_dbg_getslab(uma_zone_t zone, void *item)
42169542ea7bSGleb Smirnoff {
42179542ea7bSGleb Smirnoff 	uma_slab_t slab;
42189542ea7bSGleb Smirnoff 	uma_keg_t keg;
42199542ea7bSGleb Smirnoff 	uint8_t *mem;
42209542ea7bSGleb Smirnoff 
42219542ea7bSGleb Smirnoff 	mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
42229542ea7bSGleb Smirnoff 	if (zone->uz_flags & UMA_ZONE_VTOSLAB) {
42239542ea7bSGleb Smirnoff 		slab = vtoslab((vm_offset_t)mem);
42249542ea7bSGleb Smirnoff 	} else {
42259542ea7bSGleb Smirnoff 		/*
42269542ea7bSGleb Smirnoff 		 * It is safe to return the slab here even though the
42279542ea7bSGleb Smirnoff 		 * zone is unlocked because the item's allocation state
42289542ea7bSGleb Smirnoff 		 * essentially holds a reference.
42299542ea7bSGleb Smirnoff 		 */
4230bb15d1c7SGleb Smirnoff 		if (zone->uz_lockptr == &zone->uz_lock)
4231bb15d1c7SGleb Smirnoff 			return (NULL);
42329542ea7bSGleb Smirnoff 		ZONE_LOCK(zone);
4233bb15d1c7SGleb Smirnoff 		keg = zone->uz_keg;
42349542ea7bSGleb Smirnoff 		if (keg->uk_flags & UMA_ZONE_HASH)
42359542ea7bSGleb Smirnoff 			slab = hash_sfind(&keg->uk_hash, mem);
42369542ea7bSGleb Smirnoff 		else
42379542ea7bSGleb Smirnoff 			slab = (uma_slab_t)(mem + keg->uk_pgoff);
42389542ea7bSGleb Smirnoff 		ZONE_UNLOCK(zone);
42399542ea7bSGleb Smirnoff 	}
42409542ea7bSGleb Smirnoff 
42419542ea7bSGleb Smirnoff 	return (slab);
42429542ea7bSGleb Smirnoff }
42439542ea7bSGleb Smirnoff 
4244c5deaf04SGleb Smirnoff static bool
4245c5deaf04SGleb Smirnoff uma_dbg_zskip(uma_zone_t zone, void *mem)
4246c5deaf04SGleb Smirnoff {
4247c5deaf04SGleb Smirnoff 
4248bb15d1c7SGleb Smirnoff 	if (zone->uz_lockptr == &zone->uz_lock)
4249c5deaf04SGleb Smirnoff 		return (true);
4250c5deaf04SGleb Smirnoff 
4251bb15d1c7SGleb Smirnoff 	return (uma_dbg_kskip(zone->uz_keg, mem));
4252c5deaf04SGleb Smirnoff }
4253c5deaf04SGleb Smirnoff 
4254c5deaf04SGleb Smirnoff static bool
4255c5deaf04SGleb Smirnoff uma_dbg_kskip(uma_keg_t keg, void *mem)
4256c5deaf04SGleb Smirnoff {
4257c5deaf04SGleb Smirnoff 	uintptr_t idx;
4258c5deaf04SGleb Smirnoff 
4259c5deaf04SGleb Smirnoff 	if (dbg_divisor == 0)
4260c5deaf04SGleb Smirnoff 		return (true);
4261c5deaf04SGleb Smirnoff 
4262c5deaf04SGleb Smirnoff 	if (dbg_divisor == 1)
4263c5deaf04SGleb Smirnoff 		return (false);
4264c5deaf04SGleb Smirnoff 
4265c5deaf04SGleb Smirnoff 	idx = (uintptr_t)mem >> PAGE_SHIFT;
4266c5deaf04SGleb Smirnoff 	if (keg->uk_ipers > 1) {
4267c5deaf04SGleb Smirnoff 		idx *= keg->uk_ipers;
4268c5deaf04SGleb Smirnoff 		idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize;
4269c5deaf04SGleb Smirnoff 	}
4270c5deaf04SGleb Smirnoff 
4271c5deaf04SGleb Smirnoff 	if ((idx / dbg_divisor) * dbg_divisor != idx) {
4272c5deaf04SGleb Smirnoff 		counter_u64_add(uma_skip_cnt, 1);
4273c5deaf04SGleb Smirnoff 		return (true);
4274c5deaf04SGleb Smirnoff 	}
4275c5deaf04SGleb Smirnoff 	counter_u64_add(uma_dbg_cnt, 1);
4276c5deaf04SGleb Smirnoff 
4277c5deaf04SGleb Smirnoff 	return (false);
4278c5deaf04SGleb Smirnoff }
4279c5deaf04SGleb Smirnoff 
42809542ea7bSGleb Smirnoff /*
42819542ea7bSGleb Smirnoff  * Set up the slab's freei data such that uma_dbg_free can function.
42829542ea7bSGleb Smirnoff  *
42839542ea7bSGleb Smirnoff  */
42849542ea7bSGleb Smirnoff static void
42859542ea7bSGleb Smirnoff uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
42869542ea7bSGleb Smirnoff {
42879542ea7bSGleb Smirnoff 	uma_keg_t keg;
42889542ea7bSGleb Smirnoff 	int freei;
42899542ea7bSGleb Smirnoff 
42909542ea7bSGleb Smirnoff 	if (slab == NULL) {
42919542ea7bSGleb Smirnoff 		slab = uma_dbg_getslab(zone, item);
42929542ea7bSGleb Smirnoff 		if (slab == NULL)
42939542ea7bSGleb Smirnoff 			panic("uma: item %p did not belong to zone %s\n",
42949542ea7bSGleb Smirnoff 			    item, zone->uz_name);
42959542ea7bSGleb Smirnoff 	}
42969542ea7bSGleb Smirnoff 	keg = slab->us_keg;
42979542ea7bSGleb Smirnoff 	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
42989542ea7bSGleb Smirnoff 
42999542ea7bSGleb Smirnoff 	if (BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
43009542ea7bSGleb Smirnoff 		panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
43019542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
43029542ea7bSGleb Smirnoff 	BIT_SET_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
43039542ea7bSGleb Smirnoff 
43049542ea7bSGleb Smirnoff 	return;
43059542ea7bSGleb Smirnoff }
43069542ea7bSGleb Smirnoff 
43079542ea7bSGleb Smirnoff /*
43089542ea7bSGleb Smirnoff  * Verifies freed addresses.  Checks for alignment, valid slab membership
43099542ea7bSGleb Smirnoff  * and duplicate frees.
43109542ea7bSGleb Smirnoff  *
43119542ea7bSGleb Smirnoff  */
43129542ea7bSGleb Smirnoff static void
43139542ea7bSGleb Smirnoff uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
43149542ea7bSGleb Smirnoff {
43159542ea7bSGleb Smirnoff 	uma_keg_t keg;
43169542ea7bSGleb Smirnoff 	int freei;
43179542ea7bSGleb Smirnoff 
43189542ea7bSGleb Smirnoff 	if (slab == NULL) {
43199542ea7bSGleb Smirnoff 		slab = uma_dbg_getslab(zone, item);
43209542ea7bSGleb Smirnoff 		if (slab == NULL)
43219542ea7bSGleb Smirnoff 			panic("uma: Freed item %p did not belong to zone %s\n",
43229542ea7bSGleb Smirnoff 			    item, zone->uz_name);
43239542ea7bSGleb Smirnoff 	}
43249542ea7bSGleb Smirnoff 	keg = slab->us_keg;
43259542ea7bSGleb Smirnoff 	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
43269542ea7bSGleb Smirnoff 
43279542ea7bSGleb Smirnoff 	if (freei >= keg->uk_ipers)
43289542ea7bSGleb Smirnoff 		panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
43299542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
43309542ea7bSGleb Smirnoff 
43319542ea7bSGleb Smirnoff 	if (((freei * keg->uk_rsize) + slab->us_data) != item)
43329542ea7bSGleb Smirnoff 		panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
43339542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
43349542ea7bSGleb Smirnoff 
43359542ea7bSGleb Smirnoff 	if (!BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
43369542ea7bSGleb Smirnoff 		panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
43379542ea7bSGleb Smirnoff 		    item, zone, zone->uz_name, slab, freei);
43389542ea7bSGleb Smirnoff 
43399542ea7bSGleb Smirnoff 	BIT_CLR_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
43409542ea7bSGleb Smirnoff }
43419542ea7bSGleb Smirnoff #endif /* INVARIANTS */
43429542ea7bSGleb Smirnoff 
434348c5777eSRobert Watson #ifdef DDB
434448c5777eSRobert Watson DB_SHOW_COMMAND(uma, db_show_uma)
434548c5777eSRobert Watson {
434648c5777eSRobert Watson 	uma_keg_t kz;
434748c5777eSRobert Watson 	uma_zone_t z;
4348c1685086SJeff Roberson 	uint64_t allocs, frees, sleeps, xdomain;
43490f9b7bf3SMark Johnston 	long cachefree;
43500f9b7bf3SMark Johnston 	int i;
435148c5777eSRobert Watson 
4352c1685086SJeff Roberson 	db_printf("%18s %8s %8s %8s %12s %8s %8s %8s\n", "Zone", "Size", "Used",
4353c1685086SJeff Roberson 	    "Free", "Requests", "Sleeps", "Bucket", "XFree");
435448c5777eSRobert Watson 	LIST_FOREACH(kz, &uma_kegs, uk_link) {
435548c5777eSRobert Watson 		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
435648c5777eSRobert Watson 			if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
43572efcc8cbSGleb Smirnoff 				allocs = counter_u64_fetch(z->uz_allocs);
43582efcc8cbSGleb Smirnoff 				frees = counter_u64_fetch(z->uz_frees);
4359bf965959SSean Bruno 				sleeps = z->uz_sleeps;
436048c5777eSRobert Watson 				cachefree = 0;
436148c5777eSRobert Watson 			} else
436248c5777eSRobert Watson 				uma_zone_sumstat(z, &cachefree, &allocs,
4363c1685086SJeff Roberson 				    &frees, &sleeps, &xdomain);
4364e20a199fSJeff Roberson 			if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
436548c5777eSRobert Watson 			    (LIST_FIRST(&kz->uk_zones) != z)))
436648c5777eSRobert Watson 				cachefree += kz->uk_free;
43670f9b7bf3SMark Johnston 			for (i = 0; i < vm_ndomains; i++)
43680f9b7bf3SMark Johnston 				cachefree += z->uz_domain[i].uzd_nitems;
43690f9b7bf3SMark Johnston 
4370c1685086SJeff Roberson 			db_printf("%18s %8ju %8jd %8ld %12ju %8ju %8u %8ju\n",
437103175483SAlexander Motin 			    z->uz_name, (uintmax_t)kz->uk_size,
4372ae4e9636SRobert Watson 			    (intmax_t)(allocs - frees), cachefree,
4373c1685086SJeff Roberson 			    (uintmax_t)allocs, sleeps, z->uz_count, xdomain);
4374687c94aaSJohn Baldwin 			if (db_pager_quit)
4375687c94aaSJohn Baldwin 				return;
437648c5777eSRobert Watson 		}
437748c5777eSRobert Watson 	}
437848c5777eSRobert Watson }
437903175483SAlexander Motin 
438003175483SAlexander Motin DB_SHOW_COMMAND(umacache, db_show_umacache)
438103175483SAlexander Motin {
438203175483SAlexander Motin 	uma_zone_t z;
4383ab3185d1SJeff Roberson 	uint64_t allocs, frees;
43840f9b7bf3SMark Johnston 	long cachefree;
43850f9b7bf3SMark Johnston 	int i;
438603175483SAlexander Motin 
438703175483SAlexander Motin 	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
438803175483SAlexander Motin 	    "Requests", "Bucket");
438903175483SAlexander Motin 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
4390c1685086SJeff Roberson 		uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL, NULL);
43910f9b7bf3SMark Johnston 		for (i = 0; i < vm_ndomains; i++)
43920f9b7bf3SMark Johnston 			cachefree += z->uz_domain[i].uzd_nitems;
43930f9b7bf3SMark Johnston 		db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
439403175483SAlexander Motin 		    z->uz_name, (uintmax_t)z->uz_size,
439503175483SAlexander Motin 		    (intmax_t)(allocs - frees), cachefree,
439603175483SAlexander Motin 		    (uintmax_t)allocs, z->uz_count);
439703175483SAlexander Motin 		if (db_pager_quit)
439803175483SAlexander Motin 			return;
439903175483SAlexander Motin 	}
440003175483SAlexander Motin }
44019542ea7bSGleb Smirnoff #endif	/* DDB */
4402