160727d8bSWarner Losh /*- 2fe267a55SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3fe267a55SPedro F. Giffuni * 4584061b4SJeff Roberson * Copyright (c) 2002-2019 Jeffrey Roberson <jeff@FreeBSD.org> 508ecce74SRobert Watson * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org> 6ae4e9636SRobert Watson * Copyright (c) 2004-2006 Robert N. M. Watson 708ecce74SRobert Watson * All rights reserved. 88355f576SJeff Roberson * 98355f576SJeff Roberson * Redistribution and use in source and binary forms, with or without 108355f576SJeff Roberson * modification, are permitted provided that the following conditions 118355f576SJeff Roberson * are met: 128355f576SJeff Roberson * 1. Redistributions of source code must retain the above copyright 138355f576SJeff Roberson * notice unmodified, this list of conditions, and the following 148355f576SJeff Roberson * disclaimer. 158355f576SJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 168355f576SJeff Roberson * notice, this list of conditions and the following disclaimer in the 178355f576SJeff Roberson * documentation and/or other materials provided with the distribution. 188355f576SJeff Roberson * 198355f576SJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 208355f576SJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 218355f576SJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 228355f576SJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 238355f576SJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 248355f576SJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 258355f576SJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 268355f576SJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 278355f576SJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 288355f576SJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 298355f576SJeff Roberson */ 308355f576SJeff Roberson 318355f576SJeff Roberson /* 328355f576SJeff Roberson * uma_core.c Implementation of the Universal Memory allocator 338355f576SJeff Roberson * 348355f576SJeff Roberson * This allocator is intended to replace the multitude of similar object caches 358355f576SJeff Roberson * in the standard FreeBSD kernel. The intent is to be flexible as well as 36763df3ecSPedro F. Giffuni * efficient. A primary design goal is to return unused memory to the rest of 378355f576SJeff Roberson * the system. This will make the system as a whole more flexible due to the 388355f576SJeff Roberson * ability to move memory to subsystems which most need it instead of leaving 398355f576SJeff Roberson * pools of reserved memory unused. 408355f576SJeff Roberson * 418355f576SJeff Roberson * The basic ideas stem from similar slab/zone based allocators whose algorithms 428355f576SJeff Roberson * are well known. 438355f576SJeff Roberson * 448355f576SJeff Roberson */ 458355f576SJeff Roberson 468355f576SJeff Roberson /* 478355f576SJeff Roberson * TODO: 488355f576SJeff Roberson * - Improve memory usage for large allocations 498355f576SJeff Roberson * - Investigate cache size adjustments 508355f576SJeff Roberson */ 518355f576SJeff Roberson 52874651b1SDavid E. O'Brien #include <sys/cdefs.h> 53874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 54874651b1SDavid E. O'Brien 5548c5777eSRobert Watson #include "opt_ddb.h" 568355f576SJeff Roberson #include "opt_param.h" 578d689e04SGleb Smirnoff #include "opt_vm.h" 5848c5777eSRobert Watson 598355f576SJeff Roberson #include <sys/param.h> 608355f576SJeff Roberson #include <sys/systm.h> 6109c8cb71SMark Johnston #include <sys/asan.h> 62ef72505eSJeff Roberson #include <sys/bitset.h> 63194a979eSMark Johnston #include <sys/domainset.h> 649b43bc27SAndriy Gapon #include <sys/eventhandler.h> 658355f576SJeff Roberson #include <sys/kernel.h> 668355f576SJeff Roberson #include <sys/types.h> 67ad5b0f5bSJeff Roberson #include <sys/limits.h> 688355f576SJeff Roberson #include <sys/queue.h> 698355f576SJeff Roberson #include <sys/malloc.h> 703659f747SRobert Watson #include <sys/ktr.h> 718355f576SJeff Roberson #include <sys/lock.h> 728355f576SJeff Roberson #include <sys/sysctl.h> 738355f576SJeff Roberson #include <sys/mutex.h> 744c1cc01cSJohn Baldwin #include <sys/proc.h> 7510cb2424SMark Murray #include <sys/random.h> 7689f6b863SAttilio Rao #include <sys/rwlock.h> 777a52a97eSRobert Watson #include <sys/sbuf.h> 78a2de44abSAlexander Motin #include <sys/sched.h> 794bd61e19SJeff Roberson #include <sys/sleepqueue.h> 808355f576SJeff Roberson #include <sys/smp.h> 81d4665eaaSJeff Roberson #include <sys/smr.h> 82e60b2fcbSGleb Smirnoff #include <sys/taskqueue.h> 8386bbae32SJeff Roberson #include <sys/vmmeter.h> 8486bbae32SJeff Roberson 858355f576SJeff Roberson #include <vm/vm.h> 866f3b523cSKonstantin Belousov #include <vm/vm_param.h> 87194a979eSMark Johnston #include <vm/vm_domainset.h> 888355f576SJeff Roberson #include <vm/vm_object.h> 898355f576SJeff Roberson #include <vm/vm_page.h> 90a4915c21SAttilio Rao #include <vm/vm_pageout.h> 91ab3185d1SJeff Roberson #include <vm/vm_phys.h> 9230c5525bSAndrew Gallatin #include <vm/vm_pagequeue.h> 938355f576SJeff Roberson #include <vm/vm_map.h> 948355f576SJeff Roberson #include <vm/vm_kern.h> 958355f576SJeff Roberson #include <vm/vm_extern.h> 966f3b523cSKonstantin Belousov #include <vm/vm_dumpset.h> 978355f576SJeff Roberson #include <vm/uma.h> 988355f576SJeff Roberson #include <vm/uma_int.h> 99639c9550SJeff Roberson #include <vm/uma_dbg.h> 1008355f576SJeff Roberson 10148c5777eSRobert Watson #include <ddb/ddb.h> 10248c5777eSRobert Watson 1038d689e04SGleb Smirnoff #ifdef DEBUG_MEMGUARD 1048d689e04SGleb Smirnoff #include <vm/memguard.h> 1058d689e04SGleb Smirnoff #endif 1068d689e04SGleb Smirnoff 107a81c400eSJeff Roberson #include <machine/md_var.h> 108a81c400eSJeff Roberson 109d4665eaaSJeff Roberson #ifdef INVARIANTS 110d4665eaaSJeff Roberson #define UMA_ALWAYS_CTORDTOR 1 111d4665eaaSJeff Roberson #else 112d4665eaaSJeff Roberson #define UMA_ALWAYS_CTORDTOR 0 113d4665eaaSJeff Roberson #endif 114d4665eaaSJeff Roberson 1158355f576SJeff Roberson /* 116ab3185d1SJeff Roberson * This is the zone and keg from which all zones are spawned. 1178355f576SJeff Roberson */ 118ab3185d1SJeff Roberson static uma_zone_t kegs; 119ab3185d1SJeff Roberson static uma_zone_t zones; 1208355f576SJeff Roberson 1219b8db4d0SRyan Libby /* 12254007ce8SMark Johnston * On INVARIANTS builds, the slab contains a second bitset of the same size, 12354007ce8SMark Johnston * "dbg_bits", which is laid out immediately after us_free. 12454007ce8SMark Johnston */ 12554007ce8SMark Johnston #ifdef INVARIANTS 12654007ce8SMark Johnston #define SLAB_BITSETS 2 12754007ce8SMark Johnston #else 12854007ce8SMark Johnston #define SLAB_BITSETS 1 12954007ce8SMark Johnston #endif 13054007ce8SMark Johnston 13154007ce8SMark Johnston /* 1329b8db4d0SRyan Libby * These are the two zones from which all offpage uma_slab_ts are allocated. 1339b8db4d0SRyan Libby * 1349b8db4d0SRyan Libby * One zone is for slab headers that can represent a larger number of items, 1359b8db4d0SRyan Libby * making the slabs themselves more efficient, and the other zone is for 1369b8db4d0SRyan Libby * headers that are smaller and represent fewer items, making the headers more 1379b8db4d0SRyan Libby * efficient. 1389b8db4d0SRyan Libby */ 1399b8db4d0SRyan Libby #define SLABZONE_SIZE(setsize) \ 1409b8db4d0SRyan Libby (sizeof(struct uma_hash_slab) + BITSET_SIZE(setsize) * SLAB_BITSETS) 1419b8db4d0SRyan Libby #define SLABZONE0_SETSIZE (PAGE_SIZE / 16) 1429b8db4d0SRyan Libby #define SLABZONE1_SETSIZE SLAB_MAX_SETSIZE 1439b8db4d0SRyan Libby #define SLABZONE0_SIZE SLABZONE_SIZE(SLABZONE0_SETSIZE) 1449b8db4d0SRyan Libby #define SLABZONE1_SIZE SLABZONE_SIZE(SLABZONE1_SETSIZE) 1459b8db4d0SRyan Libby static uma_zone_t slabzones[2]; 1468355f576SJeff Roberson 1478355f576SJeff Roberson /* 1488355f576SJeff Roberson * The initial hash tables come out of this zone so they can be allocated 1498355f576SJeff Roberson * prior to malloc coming up. 1508355f576SJeff Roberson */ 1518355f576SJeff Roberson static uma_zone_t hashzone; 1528355f576SJeff Roberson 1531e319f6dSRobert Watson /* The boot-time adjusted value for cache line alignment. */ 154e4cd31ddSJeff Roberson int uma_align_cache = 64 - 1; 1551e319f6dSRobert Watson 156961647dfSJeff Roberson static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets"); 15720a4e154SJeff Roberson static MALLOC_DEFINE(M_UMA, "UMA", "UMA Misc"); 158961647dfSJeff Roberson 1598355f576SJeff Roberson /* 16086bbae32SJeff Roberson * Are we allowed to allocate buckets? 16186bbae32SJeff Roberson */ 16286bbae32SJeff Roberson static int bucketdisable = 1; 16386bbae32SJeff Roberson 164099a0e58SBosko Milekic /* Linked list of all kegs in the system */ 16513e403fdSAntoine Brodin static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs); 1668355f576SJeff Roberson 16703175483SAlexander Motin /* Linked list of all cache-only zones in the system */ 16803175483SAlexander Motin static LIST_HEAD(,uma_zone) uma_cachezones = 16903175483SAlexander Motin LIST_HEAD_INITIALIZER(uma_cachezones); 17003175483SAlexander Motin 171*aabe13f1SMark Johnston /* 172*aabe13f1SMark Johnston * Mutex for global lists: uma_kegs, uma_cachezones, and the per-keg list of 173*aabe13f1SMark Johnston * zones. 174*aabe13f1SMark Johnston */ 175fe933c1dSMateusz Guzik static struct rwlock_padalign __exclusive_cache_line uma_rwlock; 1768355f576SJeff Roberson 177*aabe13f1SMark Johnston static struct sx uma_reclaim_lock; 178*aabe13f1SMark Johnston 179ac0a6fd0SGleb Smirnoff /* 180a81c400eSJeff Roberson * First available virual address for boot time allocations. 181ac0a6fd0SGleb Smirnoff */ 182a81c400eSJeff Roberson static vm_offset_t bootstart; 183a81c400eSJeff Roberson static vm_offset_t bootmem; 1848355f576SJeff Roberson 185fbd95859SMark Johnston /* 186fbd95859SMark Johnston * kmem soft limit, initialized by uma_set_limit(). Ensure that early 187fbd95859SMark Johnston * allocations don't trigger a wakeup of the reclaim thread. 188fbd95859SMark Johnston */ 1896d6a03d7SJeff Roberson unsigned long uma_kmem_limit = LONG_MAX; 190fbd95859SMark Johnston SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_limit, CTLFLAG_RD, &uma_kmem_limit, 0, 191fbd95859SMark Johnston "UMA kernel memory soft limit"); 1926d6a03d7SJeff Roberson unsigned long uma_kmem_total; 193fbd95859SMark Johnston SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD, &uma_kmem_total, 0, 194fbd95859SMark Johnston "UMA kernel memory usage"); 1952e47807cSJeff Roberson 1968355f576SJeff Roberson /* Is the VM done starting up? */ 197860bb7a0SMark Johnston static enum { 198860bb7a0SMark Johnston BOOT_COLD, 199a81c400eSJeff Roberson BOOT_KVA, 200dc2b3205SMark Johnston BOOT_PCPU, 201860bb7a0SMark Johnston BOOT_RUNNING, 202860bb7a0SMark Johnston BOOT_SHUTDOWN, 203860bb7a0SMark Johnston } booted = BOOT_COLD; 2048355f576SJeff Roberson 205ef72505eSJeff Roberson /* 2069643769aSJeff Roberson * This is the handle used to schedule events that need to happen 2079643769aSJeff Roberson * outside of the allocation fast path. 2089643769aSJeff Roberson */ 2098355f576SJeff Roberson static struct callout uma_callout; 2109643769aSJeff Roberson #define UMA_TIMEOUT 20 /* Seconds for callout interval. */ 2118355f576SJeff Roberson 2128355f576SJeff Roberson /* 2138355f576SJeff Roberson * This structure is passed as the zone ctor arg so that I don't have to create 2148355f576SJeff Roberson * a special allocation function just for zones. 2158355f576SJeff Roberson */ 2168355f576SJeff Roberson struct uma_zctor_args { 217bb196eb4SMatthew D Fleming const char *name; 218c3bdc05fSAndrew R. Reiter size_t size; 2198355f576SJeff Roberson uma_ctor ctor; 2208355f576SJeff Roberson uma_dtor dtor; 2218355f576SJeff Roberson uma_init uminit; 2228355f576SJeff Roberson uma_fini fini; 2230095a784SJeff Roberson uma_import import; 2240095a784SJeff Roberson uma_release release; 2250095a784SJeff Roberson void *arg; 226099a0e58SBosko Milekic uma_keg_t keg; 227099a0e58SBosko Milekic int align; 22885dcf349SGleb Smirnoff uint32_t flags; 229099a0e58SBosko Milekic }; 230099a0e58SBosko Milekic 231099a0e58SBosko Milekic struct uma_kctor_args { 232099a0e58SBosko Milekic uma_zone_t zone; 233099a0e58SBosko Milekic size_t size; 234099a0e58SBosko Milekic uma_init uminit; 235099a0e58SBosko Milekic uma_fini fini; 2368355f576SJeff Roberson int align; 23785dcf349SGleb Smirnoff uint32_t flags; 2388355f576SJeff Roberson }; 2398355f576SJeff Roberson 240cae33c14SJeff Roberson struct uma_bucket_zone { 241cae33c14SJeff Roberson uma_zone_t ubz_zone; 242eaa17d42SRyan Libby const char *ubz_name; 243fc03d22bSJeff Roberson int ubz_entries; /* Number of items it can hold. */ 244fc03d22bSJeff Roberson int ubz_maxsize; /* Maximum allocation size per-item. */ 245cae33c14SJeff Roberson }; 246cae33c14SJeff Roberson 247f9d27e75SRobert Watson /* 248fc03d22bSJeff Roberson * Compute the actual number of bucket entries to pack them in power 249fc03d22bSJeff Roberson * of two sizes for more efficient space utilization. 250f9d27e75SRobert Watson */ 251fc03d22bSJeff Roberson #define BUCKET_SIZE(n) \ 252fc03d22bSJeff Roberson (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *)) 253fc03d22bSJeff Roberson 2541aa6c758SAlexander Motin #define BUCKET_MAX BUCKET_SIZE(256) 255fc03d22bSJeff Roberson 256fc03d22bSJeff Roberson struct uma_bucket_zone bucket_zones[] = { 257e84130a0SJeff Roberson /* Literal bucket sizes. */ 258e84130a0SJeff Roberson { NULL, "2 Bucket", 2, 4096 }, 259e84130a0SJeff Roberson { NULL, "4 Bucket", 4, 3072 }, 260e84130a0SJeff Roberson { NULL, "8 Bucket", 8, 2048 }, 261e84130a0SJeff Roberson { NULL, "16 Bucket", 16, 1024 }, 262e84130a0SJeff Roberson /* Rounded down power of 2 sizes for efficiency. */ 263fc03d22bSJeff Roberson { NULL, "32 Bucket", BUCKET_SIZE(32), 512 }, 264fc03d22bSJeff Roberson { NULL, "64 Bucket", BUCKET_SIZE(64), 256 }, 265fc03d22bSJeff Roberson { NULL, "128 Bucket", BUCKET_SIZE(128), 128 }, 2661aa6c758SAlexander Motin { NULL, "256 Bucket", BUCKET_SIZE(256), 64 }, 267fc03d22bSJeff Roberson { NULL, NULL, 0} 268fc03d22bSJeff Roberson }; 269cae33c14SJeff Roberson 2702019094aSRobert Watson /* 2712019094aSRobert Watson * Flags and enumerations to be passed to internal functions. 2722019094aSRobert Watson */ 273bb15d1c7SGleb Smirnoff enum zfreeskip { 274bb15d1c7SGleb Smirnoff SKIP_NONE = 0, 275bb15d1c7SGleb Smirnoff SKIP_CNT = 0x00000001, 276bb15d1c7SGleb Smirnoff SKIP_DTOR = 0x00010000, 277bb15d1c7SGleb Smirnoff SKIP_FINI = 0x00020000, 278bb15d1c7SGleb Smirnoff }; 279b23f72e9SBrian Feldman 2808355f576SJeff Roberson /* Prototypes.. */ 2818355f576SJeff Roberson 282a81c400eSJeff Roberson void uma_startup1(vm_offset_t); 283f4bef67cSGleb Smirnoff void uma_startup2(void); 284f4bef67cSGleb Smirnoff 285ab3185d1SJeff Roberson static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); 286ab3185d1SJeff Roberson static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); 287ab3059a8SMatt Macy static void *pcpu_page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); 288ab3185d1SJeff Roberson static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); 289ec0d8280SRyan Libby static void *contig_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); 290f2c2231eSRyan Stone static void page_free(void *, vm_size_t, uint8_t); 291ab3059a8SMatt Macy static void pcpu_page_free(void *, vm_size_t, uint8_t); 29286220393SMark Johnston static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int); 2939643769aSJeff Roberson static void cache_drain(uma_zone_t); 2948355f576SJeff Roberson static void bucket_drain(uma_zone_t, uma_bucket_t); 295*aabe13f1SMark Johnston static void bucket_cache_reclaim(uma_zone_t zone, bool, int); 296b23f72e9SBrian Feldman static int keg_ctor(void *, int, void *, int); 297099a0e58SBosko Milekic static void keg_dtor(void *, int, void *); 298b23f72e9SBrian Feldman static int zone_ctor(void *, int, void *, int); 2999c2cd7e5SJeff Roberson static void zone_dtor(void *, int, void *); 300d4665eaaSJeff Roberson static inline void item_dtor(uma_zone_t zone, void *item, int size, 301d4665eaaSJeff Roberson void *udata, enum zfreeskip skip); 302b23f72e9SBrian Feldman static int zero_init(void *, int, int); 303c6fd3e23SJeff Roberson static void zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata, 304c6fd3e23SJeff Roberson int itemdomain, bool ws); 30520a4e154SJeff Roberson static void zone_foreach(void (*zfunc)(uma_zone_t, void *), void *); 306a81c400eSJeff Roberson static void zone_foreach_unlocked(void (*zfunc)(uma_zone_t, void *), void *); 30720a4e154SJeff Roberson static void zone_timeout(uma_zone_t zone, void *); 3083b2f2cb8SAlexander Motin static int hash_alloc(struct uma_hash *, u_int); 3090aef6126SJeff Roberson static int hash_expand(struct uma_hash *, struct uma_hash *); 3100aef6126SJeff Roberson static void hash_free(struct uma_hash *hash); 3118355f576SJeff Roberson static void uma_timeout(void *); 312860bb7a0SMark Johnston static void uma_shutdown(void); 313ab3185d1SJeff Roberson static void *zone_alloc_item(uma_zone_t, void *, int, int); 3140095a784SJeff Roberson static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip); 3154bd61e19SJeff Roberson static int zone_alloc_limit(uma_zone_t zone, int count, int flags); 3164bd61e19SJeff Roberson static void zone_free_limit(uma_zone_t zone, int count); 31786bbae32SJeff Roberson static void bucket_enable(void); 318cae33c14SJeff Roberson static void bucket_init(void); 3196fd34d6fSJeff Roberson static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int); 3206fd34d6fSJeff Roberson static void bucket_free(uma_zone_t zone, uma_bucket_t, void *); 321*aabe13f1SMark Johnston static void bucket_zone_drain(int domain); 322beb8beefSJeff Roberson static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int); 3230095a784SJeff Roberson static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab); 324bb15d1c7SGleb Smirnoff static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item); 32509c8cb71SMark Johnston static size_t slab_sizeof(int nitems); 326e20a199fSJeff Roberson static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, 32785dcf349SGleb Smirnoff uma_fini fini, int align, uint32_t flags); 328b75c4efcSAndrew Turner static int zone_import(void *, void **, int, int, int); 329b75c4efcSAndrew Turner static void zone_release(void *, void **, int); 330beb8beefSJeff Roberson static bool cache_alloc(uma_zone_t, uma_cache_t, void *, int); 3310a81b439SJeff Roberson static bool cache_free(uma_zone_t, uma_cache_t, void *, void *, int); 332bbee39c6SJeff Roberson 3337a52a97eSRobert Watson static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS); 3347a52a97eSRobert Watson static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS); 33520a4e154SJeff Roberson static int sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS); 33620a4e154SJeff Roberson static int sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS); 3376d204a6aSRyan Libby static int sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS); 338f7af5015SRyan Libby static int sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS); 3394bd61e19SJeff Roberson static int sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS); 3408355f576SJeff Roberson 34131c251a0SJeff Roberson static uint64_t uma_zone_get_allocs(uma_zone_t zone); 34231c251a0SJeff Roberson 3437029da5cSPawel Biernacki static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 34433e5a1eaSRyan Libby "Memory allocation debugging"); 34533e5a1eaSRyan Libby 3469542ea7bSGleb Smirnoff #ifdef INVARIANTS 34731c251a0SJeff Roberson static uint64_t uma_keg_get_allocs(uma_keg_t zone); 348815db204SRyan Libby static inline struct noslabbits *slab_dbg_bits(uma_slab_t slab, uma_keg_t keg); 349815db204SRyan Libby 350c5deaf04SGleb Smirnoff static bool uma_dbg_kskip(uma_keg_t keg, void *mem); 351c5deaf04SGleb Smirnoff static bool uma_dbg_zskip(uma_zone_t zone, void *mem); 3529542ea7bSGleb Smirnoff static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item); 3539542ea7bSGleb Smirnoff static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item); 354c5deaf04SGleb Smirnoff 355c5deaf04SGleb Smirnoff static u_int dbg_divisor = 1; 356c5deaf04SGleb Smirnoff SYSCTL_UINT(_vm_debug, OID_AUTO, divisor, 357c5deaf04SGleb Smirnoff CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0, 358c5deaf04SGleb Smirnoff "Debug & thrash every this item in memory allocator"); 359c5deaf04SGleb Smirnoff 360c5deaf04SGleb Smirnoff static counter_u64_t uma_dbg_cnt = EARLY_COUNTER; 361c5deaf04SGleb Smirnoff static counter_u64_t uma_skip_cnt = EARLY_COUNTER; 362c5deaf04SGleb Smirnoff SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD, 363c5deaf04SGleb Smirnoff &uma_dbg_cnt, "memory items debugged"); 364c5deaf04SGleb Smirnoff SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD, 365c5deaf04SGleb Smirnoff &uma_skip_cnt, "memory items skipped, not debugged"); 3669542ea7bSGleb Smirnoff #endif 3679542ea7bSGleb Smirnoff 3687029da5cSPawel Biernacki SYSCTL_NODE(_vm, OID_AUTO, uma, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 3697029da5cSPawel Biernacki "Universal Memory Allocator"); 37035ec24f3SRyan Libby 371a314aba8SMateusz Guzik SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLTYPE_INT, 3727a52a97eSRobert Watson 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones"); 3737a52a97eSRobert Watson 374a314aba8SMateusz Guzik SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLTYPE_STRUCT, 3757a52a97eSRobert Watson 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats"); 3767a52a97eSRobert Watson 3772f891cd5SPawel Jakub Dawidek static int zone_warnings = 1; 378af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0, 3792f891cd5SPawel Jakub Dawidek "Warn when UMA zones becomes full"); 3802f891cd5SPawel Jakub Dawidek 38133e5a1eaSRyan Libby static int multipage_slabs = 1; 38233e5a1eaSRyan Libby TUNABLE_INT("vm.debug.uma_multipage_slabs", &multipage_slabs); 38333e5a1eaSRyan Libby SYSCTL_INT(_vm_debug, OID_AUTO, uma_multipage_slabs, 38433e5a1eaSRyan Libby CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &multipage_slabs, 0, 38533e5a1eaSRyan Libby "UMA may choose larger slab sizes for better efficiency"); 38633e5a1eaSRyan Libby 38786bbae32SJeff Roberson /* 3889b8db4d0SRyan Libby * Select the slab zone for an offpage slab with the given maximum item count. 3899b8db4d0SRyan Libby */ 3909b8db4d0SRyan Libby static inline uma_zone_t 3919b8db4d0SRyan Libby slabzone(int ipers) 3929b8db4d0SRyan Libby { 3939b8db4d0SRyan Libby 3949b8db4d0SRyan Libby return (slabzones[ipers > SLABZONE0_SETSIZE]); 3959b8db4d0SRyan Libby } 3969b8db4d0SRyan Libby 3979b8db4d0SRyan Libby /* 39886bbae32SJeff Roberson * This routine checks to see whether or not it's safe to enable buckets. 39986bbae32SJeff Roberson */ 40086bbae32SJeff Roberson static void 40186bbae32SJeff Roberson bucket_enable(void) 40286bbae32SJeff Roberson { 4033182660aSRyan Libby 404a81c400eSJeff Roberson KASSERT(booted >= BOOT_KVA, ("Bucket enable before init")); 405251386b4SMaksim Yevmenkin bucketdisable = vm_page_count_min(); 40686bbae32SJeff Roberson } 40786bbae32SJeff Roberson 408dc2c7965SRobert Watson /* 409dc2c7965SRobert Watson * Initialize bucket_zones, the array of zones of buckets of various sizes. 410dc2c7965SRobert Watson * 411dc2c7965SRobert Watson * For each zone, calculate the memory required for each bucket, consisting 412fc03d22bSJeff Roberson * of the header and an array of pointers. 413dc2c7965SRobert Watson */ 414cae33c14SJeff Roberson static void 415cae33c14SJeff Roberson bucket_init(void) 416cae33c14SJeff Roberson { 417cae33c14SJeff Roberson struct uma_bucket_zone *ubz; 418cae33c14SJeff Roberson int size; 419cae33c14SJeff Roberson 420d74e6a1dSAlan Cox for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) { 421cae33c14SJeff Roberson size = roundup(sizeof(struct uma_bucket), sizeof(void *)); 422cae33c14SJeff Roberson size += sizeof(void *) * ubz->ubz_entries; 423cae33c14SJeff Roberson ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, 424e20a199fSJeff Roberson NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 425dfe13344SJeff Roberson UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET | 426dfe13344SJeff Roberson UMA_ZONE_FIRSTTOUCH); 427cae33c14SJeff Roberson } 428cae33c14SJeff Roberson } 429cae33c14SJeff Roberson 430dc2c7965SRobert Watson /* 431dc2c7965SRobert Watson * Given a desired number of entries for a bucket, return the zone from which 432dc2c7965SRobert Watson * to allocate the bucket. 433dc2c7965SRobert Watson */ 434dc2c7965SRobert Watson static struct uma_bucket_zone * 435dc2c7965SRobert Watson bucket_zone_lookup(int entries) 436dc2c7965SRobert Watson { 437fc03d22bSJeff Roberson struct uma_bucket_zone *ubz; 438dc2c7965SRobert Watson 439fc03d22bSJeff Roberson for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) 440fc03d22bSJeff Roberson if (ubz->ubz_entries >= entries) 441fc03d22bSJeff Roberson return (ubz); 442fc03d22bSJeff Roberson ubz--; 443fc03d22bSJeff Roberson return (ubz); 444fc03d22bSJeff Roberson } 445fc03d22bSJeff Roberson 446fc03d22bSJeff Roberson static int 447fc03d22bSJeff Roberson bucket_select(int size) 448fc03d22bSJeff Roberson { 449fc03d22bSJeff Roberson struct uma_bucket_zone *ubz; 450fc03d22bSJeff Roberson 451fc03d22bSJeff Roberson ubz = &bucket_zones[0]; 452fc03d22bSJeff Roberson if (size > ubz->ubz_maxsize) 453fc03d22bSJeff Roberson return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1); 454fc03d22bSJeff Roberson 455fc03d22bSJeff Roberson for (; ubz->ubz_entries != 0; ubz++) 456fc03d22bSJeff Roberson if (ubz->ubz_maxsize < size) 457fc03d22bSJeff Roberson break; 458fc03d22bSJeff Roberson ubz--; 459fc03d22bSJeff Roberson return (ubz->ubz_entries); 460dc2c7965SRobert Watson } 461dc2c7965SRobert Watson 462cae33c14SJeff Roberson static uma_bucket_t 4636fd34d6fSJeff Roberson bucket_alloc(uma_zone_t zone, void *udata, int flags) 464cae33c14SJeff Roberson { 465cae33c14SJeff Roberson struct uma_bucket_zone *ubz; 466cae33c14SJeff Roberson uma_bucket_t bucket; 467cae33c14SJeff Roberson 468cae33c14SJeff Roberson /* 469d4665eaaSJeff Roberson * Don't allocate buckets early in boot. 470cae33c14SJeff Roberson */ 471d4665eaaSJeff Roberson if (__predict_false(booted < BOOT_KVA)) 472cae33c14SJeff Roberson return (NULL); 473a81c400eSJeff Roberson 4746fd34d6fSJeff Roberson /* 4756fd34d6fSJeff Roberson * To limit bucket recursion we store the original zone flags 4766fd34d6fSJeff Roberson * in a cookie passed via zalloc_arg/zfree_arg. This allows the 4776fd34d6fSJeff Roberson * NOVM flag to persist even through deep recursions. We also 4786fd34d6fSJeff Roberson * store ZFLAG_BUCKET once we have recursed attempting to allocate 4796fd34d6fSJeff Roberson * a bucket for a bucket zone so we do not allow infinite bucket 4806fd34d6fSJeff Roberson * recursion. This cookie will even persist to frees of unused 4816fd34d6fSJeff Roberson * buckets via the allocation path or bucket allocations in the 4826fd34d6fSJeff Roberson * free path. 4836fd34d6fSJeff Roberson */ 4846fd34d6fSJeff Roberson if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0) 4856fd34d6fSJeff Roberson udata = (void *)(uintptr_t)zone->uz_flags; 486e8a720feSAlexander Motin else { 487e8a720feSAlexander Motin if ((uintptr_t)udata & UMA_ZFLAG_BUCKET) 488e8a720feSAlexander Motin return (NULL); 4896fd34d6fSJeff Roberson udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET); 490e8a720feSAlexander Motin } 491bae55c4aSRyan Libby if (((uintptr_t)udata & UMA_ZONE_VM) != 0) 492af526374SJeff Roberson flags |= M_NOVM; 493f8b6c515SMark Johnston ubz = bucket_zone_lookup(atomic_load_16(&zone->uz_bucket_size)); 49420d3ab87SAlexander Motin if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0) 49520d3ab87SAlexander Motin ubz++; 4966fd34d6fSJeff Roberson bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags); 497cae33c14SJeff Roberson if (bucket) { 498cae33c14SJeff Roberson #ifdef INVARIANTS 499cae33c14SJeff Roberson bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries); 500cae33c14SJeff Roberson #endif 501cae33c14SJeff Roberson bucket->ub_cnt = 0; 502f8b6c515SMark Johnston bucket->ub_entries = min(ubz->ubz_entries, 503f8b6c515SMark Johnston zone->uz_bucket_size_max); 504d4665eaaSJeff Roberson bucket->ub_seq = SMR_SEQ_INVALID; 505d4665eaaSJeff Roberson CTR3(KTR_UMA, "bucket_alloc: zone %s(%p) allocated bucket %p", 506d4665eaaSJeff Roberson zone->uz_name, zone, bucket); 507cae33c14SJeff Roberson } 508cae33c14SJeff Roberson 509cae33c14SJeff Roberson return (bucket); 510cae33c14SJeff Roberson } 511cae33c14SJeff Roberson 512cae33c14SJeff Roberson static void 5136fd34d6fSJeff Roberson bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata) 514cae33c14SJeff Roberson { 515cae33c14SJeff Roberson struct uma_bucket_zone *ubz; 516cae33c14SJeff Roberson 517c6fd3e23SJeff Roberson if (bucket->ub_cnt != 0) 518c6fd3e23SJeff Roberson bucket_drain(zone, bucket); 519c6fd3e23SJeff Roberson 520fc03d22bSJeff Roberson KASSERT(bucket->ub_cnt == 0, 521fc03d22bSJeff Roberson ("bucket_free: Freeing a non free bucket.")); 522d4665eaaSJeff Roberson KASSERT(bucket->ub_seq == SMR_SEQ_INVALID, 523d4665eaaSJeff Roberson ("bucket_free: Freeing an SMR bucket.")); 5246fd34d6fSJeff Roberson if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0) 5256fd34d6fSJeff Roberson udata = (void *)(uintptr_t)zone->uz_flags; 526dc2c7965SRobert Watson ubz = bucket_zone_lookup(bucket->ub_entries); 5276fd34d6fSJeff Roberson uma_zfree_arg(ubz->ubz_zone, bucket, udata); 528cae33c14SJeff Roberson } 529cae33c14SJeff Roberson 530cae33c14SJeff Roberson static void 531*aabe13f1SMark Johnston bucket_zone_drain(int domain) 532cae33c14SJeff Roberson { 533cae33c14SJeff Roberson struct uma_bucket_zone *ubz; 534cae33c14SJeff Roberson 535cae33c14SJeff Roberson for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) 536*aabe13f1SMark Johnston uma_zone_reclaim_domain(ubz->ubz_zone, UMA_RECLAIM_DRAIN, 537*aabe13f1SMark Johnston domain); 538cae33c14SJeff Roberson } 539cae33c14SJeff Roberson 54009c8cb71SMark Johnston #ifdef KASAN 54109c8cb71SMark Johnston static void 54209c8cb71SMark Johnston kasan_mark_item_valid(uma_zone_t zone, void *item) 54309c8cb71SMark Johnston { 54409c8cb71SMark Johnston void *pcpu_item; 54509c8cb71SMark Johnston size_t sz, rsz; 54609c8cb71SMark Johnston int i; 54709c8cb71SMark Johnston 54809c8cb71SMark Johnston if ((zone->uz_flags & UMA_ZONE_NOKASAN) != 0) 54909c8cb71SMark Johnston return; 55009c8cb71SMark Johnston 55109c8cb71SMark Johnston sz = zone->uz_size; 55209c8cb71SMark Johnston rsz = roundup2(sz, KASAN_SHADOW_SCALE); 55309c8cb71SMark Johnston if ((zone->uz_flags & UMA_ZONE_PCPU) == 0) { 55409c8cb71SMark Johnston kasan_mark(item, sz, rsz, 0); 55509c8cb71SMark Johnston } else { 55609c8cb71SMark Johnston pcpu_item = zpcpu_base_to_offset(item); 55709c8cb71SMark Johnston for (i = 0; i <= mp_maxid; i++) 55809c8cb71SMark Johnston kasan_mark(zpcpu_get_cpu(pcpu_item, i), sz, rsz, 0); 55909c8cb71SMark Johnston } 56009c8cb71SMark Johnston } 56109c8cb71SMark Johnston 56209c8cb71SMark Johnston static void 56309c8cb71SMark Johnston kasan_mark_item_invalid(uma_zone_t zone, void *item) 56409c8cb71SMark Johnston { 56509c8cb71SMark Johnston void *pcpu_item; 56609c8cb71SMark Johnston size_t sz; 56709c8cb71SMark Johnston int i; 56809c8cb71SMark Johnston 56909c8cb71SMark Johnston if ((zone->uz_flags & UMA_ZONE_NOKASAN) != 0) 57009c8cb71SMark Johnston return; 57109c8cb71SMark Johnston 57209c8cb71SMark Johnston sz = roundup2(zone->uz_size, KASAN_SHADOW_SCALE); 57309c8cb71SMark Johnston if ((zone->uz_flags & UMA_ZONE_PCPU) == 0) { 57409c8cb71SMark Johnston kasan_mark(item, 0, sz, KASAN_UMA_FREED); 57509c8cb71SMark Johnston } else { 57609c8cb71SMark Johnston pcpu_item = zpcpu_base_to_offset(item); 57709c8cb71SMark Johnston for (i = 0; i <= mp_maxid; i++) 57809c8cb71SMark Johnston kasan_mark(zpcpu_get_cpu(pcpu_item, i), 0, sz, 0); 57909c8cb71SMark Johnston } 58009c8cb71SMark Johnston } 58109c8cb71SMark Johnston 58209c8cb71SMark Johnston static void 58309c8cb71SMark Johnston kasan_mark_slab_valid(uma_keg_t keg, void *mem) 58409c8cb71SMark Johnston { 58509c8cb71SMark Johnston size_t sz; 58609c8cb71SMark Johnston 58709c8cb71SMark Johnston if ((keg->uk_flags & UMA_ZONE_NOKASAN) == 0) { 58809c8cb71SMark Johnston sz = keg->uk_ppera * PAGE_SIZE; 58909c8cb71SMark Johnston kasan_mark(mem, sz, sz, 0); 59009c8cb71SMark Johnston } 59109c8cb71SMark Johnston } 59209c8cb71SMark Johnston 59309c8cb71SMark Johnston static void 59409c8cb71SMark Johnston kasan_mark_slab_invalid(uma_keg_t keg, void *mem) 59509c8cb71SMark Johnston { 59609c8cb71SMark Johnston size_t sz; 59709c8cb71SMark Johnston 59809c8cb71SMark Johnston if ((keg->uk_flags & UMA_ZONE_NOKASAN) == 0) { 59909c8cb71SMark Johnston if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0) 60009c8cb71SMark Johnston sz = keg->uk_ppera * PAGE_SIZE; 60109c8cb71SMark Johnston else 60209c8cb71SMark Johnston sz = keg->uk_pgoff; 60309c8cb71SMark Johnston kasan_mark(mem, 0, sz, KASAN_UMA_FREED); 60409c8cb71SMark Johnston } 60509c8cb71SMark Johnston } 60609c8cb71SMark Johnston #else /* !KASAN */ 60709c8cb71SMark Johnston static void 60809c8cb71SMark Johnston kasan_mark_item_valid(uma_zone_t zone __unused, void *item __unused) 60909c8cb71SMark Johnston { 61009c8cb71SMark Johnston } 61109c8cb71SMark Johnston 61209c8cb71SMark Johnston static void 61309c8cb71SMark Johnston kasan_mark_item_invalid(uma_zone_t zone __unused, void *item __unused) 61409c8cb71SMark Johnston { 61509c8cb71SMark Johnston } 61609c8cb71SMark Johnston 61709c8cb71SMark Johnston static void 61809c8cb71SMark Johnston kasan_mark_slab_valid(uma_keg_t keg __unused, void *mem __unused) 61909c8cb71SMark Johnston { 62009c8cb71SMark Johnston } 62109c8cb71SMark Johnston 62209c8cb71SMark Johnston static void 62309c8cb71SMark Johnston kasan_mark_slab_invalid(uma_keg_t keg __unused, void *mem __unused) 62409c8cb71SMark Johnston { 62509c8cb71SMark Johnston } 62609c8cb71SMark Johnston #endif /* KASAN */ 62709c8cb71SMark Johnston 62808cfa56eSMark Johnston /* 629c6fd3e23SJeff Roberson * Acquire the domain lock and record contention. 630c6fd3e23SJeff Roberson */ 631c6fd3e23SJeff Roberson static uma_zone_domain_t 632c6fd3e23SJeff Roberson zone_domain_lock(uma_zone_t zone, int domain) 633c6fd3e23SJeff Roberson { 634c6fd3e23SJeff Roberson uma_zone_domain_t zdom; 635c6fd3e23SJeff Roberson bool lockfail; 636c6fd3e23SJeff Roberson 637c6fd3e23SJeff Roberson zdom = ZDOM_GET(zone, domain); 638c6fd3e23SJeff Roberson lockfail = false; 639c6fd3e23SJeff Roberson if (ZDOM_OWNED(zdom)) 640c6fd3e23SJeff Roberson lockfail = true; 641c6fd3e23SJeff Roberson ZDOM_LOCK(zdom); 642c6fd3e23SJeff Roberson /* This is unsynchronized. The counter does not need to be precise. */ 643c6fd3e23SJeff Roberson if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max) 644c6fd3e23SJeff Roberson zone->uz_bucket_size++; 645c6fd3e23SJeff Roberson return (zdom); 646c6fd3e23SJeff Roberson } 647c6fd3e23SJeff Roberson 648c6fd3e23SJeff Roberson /* 649fe835cbfSJeff Roberson * Search for the domain with the least cached items and return it if it 650fe835cbfSJeff Roberson * is out of balance with the preferred domain. 651c6fd3e23SJeff Roberson */ 652c6fd3e23SJeff Roberson static __noinline int 653c6fd3e23SJeff Roberson zone_domain_lowest(uma_zone_t zone, int pref) 654c6fd3e23SJeff Roberson { 655fe835cbfSJeff Roberson long least, nitems, prefitems; 656c6fd3e23SJeff Roberson int domain; 657c6fd3e23SJeff Roberson int i; 658c6fd3e23SJeff Roberson 659fe835cbfSJeff Roberson prefitems = least = LONG_MAX; 660c6fd3e23SJeff Roberson domain = 0; 661c6fd3e23SJeff Roberson for (i = 0; i < vm_ndomains; i++) { 662c6fd3e23SJeff Roberson nitems = ZDOM_GET(zone, i)->uzd_nitems; 663c6fd3e23SJeff Roberson if (nitems < least) { 664c6fd3e23SJeff Roberson domain = i; 665c6fd3e23SJeff Roberson least = nitems; 666c6fd3e23SJeff Roberson } 667fe835cbfSJeff Roberson if (domain == pref) 668fe835cbfSJeff Roberson prefitems = nitems; 669fe835cbfSJeff Roberson } 670fe835cbfSJeff Roberson if (prefitems < least * 2) 671fe835cbfSJeff Roberson return (pref); 672c6fd3e23SJeff Roberson 673c6fd3e23SJeff Roberson return (domain); 674c6fd3e23SJeff Roberson } 675c6fd3e23SJeff Roberson 676c6fd3e23SJeff Roberson /* 677c6fd3e23SJeff Roberson * Search for the domain with the most cached items and return it or the 678c6fd3e23SJeff Roberson * preferred domain if it has enough to proceed. 679c6fd3e23SJeff Roberson */ 680c6fd3e23SJeff Roberson static __noinline int 681c6fd3e23SJeff Roberson zone_domain_highest(uma_zone_t zone, int pref) 682c6fd3e23SJeff Roberson { 683c6fd3e23SJeff Roberson long most, nitems; 684c6fd3e23SJeff Roberson int domain; 685c6fd3e23SJeff Roberson int i; 686c6fd3e23SJeff Roberson 687c6fd3e23SJeff Roberson if (ZDOM_GET(zone, pref)->uzd_nitems > BUCKET_MAX) 688c6fd3e23SJeff Roberson return (pref); 689c6fd3e23SJeff Roberson 690c6fd3e23SJeff Roberson most = 0; 691c6fd3e23SJeff Roberson domain = 0; 692c6fd3e23SJeff Roberson for (i = 0; i < vm_ndomains; i++) { 693c6fd3e23SJeff Roberson nitems = ZDOM_GET(zone, i)->uzd_nitems; 694c6fd3e23SJeff Roberson if (nitems > most) { 695c6fd3e23SJeff Roberson domain = i; 696c6fd3e23SJeff Roberson most = nitems; 697c6fd3e23SJeff Roberson } 698c6fd3e23SJeff Roberson } 699c6fd3e23SJeff Roberson 700c6fd3e23SJeff Roberson return (domain); 701c6fd3e23SJeff Roberson } 702c6fd3e23SJeff Roberson 703c6fd3e23SJeff Roberson /* 704c6fd3e23SJeff Roberson * Safely subtract cnt from imax. 705c6fd3e23SJeff Roberson */ 706c6fd3e23SJeff Roberson static void 707c6fd3e23SJeff Roberson zone_domain_imax_sub(uma_zone_domain_t zdom, int cnt) 708c6fd3e23SJeff Roberson { 709c6fd3e23SJeff Roberson long new; 710c6fd3e23SJeff Roberson long old; 711c6fd3e23SJeff Roberson 712c6fd3e23SJeff Roberson old = zdom->uzd_imax; 713c6fd3e23SJeff Roberson do { 714c6fd3e23SJeff Roberson if (old <= cnt) 715c6fd3e23SJeff Roberson new = 0; 716c6fd3e23SJeff Roberson else 717c6fd3e23SJeff Roberson new = old - cnt; 718c6fd3e23SJeff Roberson } while (atomic_fcmpset_long(&zdom->uzd_imax, &old, new) == 0); 719c6fd3e23SJeff Roberson } 720c6fd3e23SJeff Roberson 721c6fd3e23SJeff Roberson /* 722c6fd3e23SJeff Roberson * Set the maximum imax value. 723c6fd3e23SJeff Roberson */ 724c6fd3e23SJeff Roberson static void 725c6fd3e23SJeff Roberson zone_domain_imax_set(uma_zone_domain_t zdom, int nitems) 726c6fd3e23SJeff Roberson { 727c6fd3e23SJeff Roberson long old; 728c6fd3e23SJeff Roberson 729c6fd3e23SJeff Roberson old = zdom->uzd_imax; 730c6fd3e23SJeff Roberson do { 731c6fd3e23SJeff Roberson if (old >= nitems) 732c6fd3e23SJeff Roberson break; 733c6fd3e23SJeff Roberson } while (atomic_fcmpset_long(&zdom->uzd_imax, &old, nitems) == 0); 734c6fd3e23SJeff Roberson } 735c6fd3e23SJeff Roberson 736c6fd3e23SJeff Roberson /* 73708cfa56eSMark Johnston * Attempt to satisfy an allocation by retrieving a full bucket from one of the 738d4665eaaSJeff Roberson * zone's caches. If a bucket is found the zone is not locked on return. 73908cfa56eSMark Johnston */ 7400f9b7bf3SMark Johnston static uma_bucket_t 741c6fd3e23SJeff Roberson zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom, bool reclaim) 7420f9b7bf3SMark Johnston { 7430f9b7bf3SMark Johnston uma_bucket_t bucket; 744d4665eaaSJeff Roberson int i; 745d4665eaaSJeff Roberson bool dtor = false; 7460f9b7bf3SMark Johnston 747c6fd3e23SJeff Roberson ZDOM_LOCK_ASSERT(zdom); 7480f9b7bf3SMark Johnston 749dc3915c8SJeff Roberson if ((bucket = STAILQ_FIRST(&zdom->uzd_buckets)) == NULL) 750d4665eaaSJeff Roberson return (NULL); 751d4665eaaSJeff Roberson 752543117beSJeff Roberson /* SMR Buckets can not be re-used until readers expire. */ 753d4665eaaSJeff Roberson if ((zone->uz_flags & UMA_ZONE_SMR) != 0 && 754d4665eaaSJeff Roberson bucket->ub_seq != SMR_SEQ_INVALID) { 755d4665eaaSJeff Roberson if (!smr_poll(zone->uz_smr, bucket->ub_seq, false)) 756d4665eaaSJeff Roberson return (NULL); 757d4665eaaSJeff Roberson bucket->ub_seq = SMR_SEQ_INVALID; 758543117beSJeff Roberson dtor = (zone->uz_dtor != NULL) || UMA_ALWAYS_CTORDTOR; 759c6fd3e23SJeff Roberson if (STAILQ_NEXT(bucket, ub_link) != NULL) 760c6fd3e23SJeff Roberson zdom->uzd_seq = STAILQ_NEXT(bucket, ub_link)->ub_seq; 761d4665eaaSJeff Roberson } 762dc3915c8SJeff Roberson STAILQ_REMOVE_HEAD(&zdom->uzd_buckets, ub_link); 76306d8bdcbSMark Johnston 76406d8bdcbSMark Johnston KASSERT(zdom->uzd_nitems >= bucket->ub_cnt, 76506d8bdcbSMark Johnston ("%s: item count underflow (%ld, %d)", 76606d8bdcbSMark Johnston __func__, zdom->uzd_nitems, bucket->ub_cnt)); 76706d8bdcbSMark Johnston KASSERT(bucket->ub_cnt > 0, 76806d8bdcbSMark Johnston ("%s: empty bucket in bucket cache", __func__)); 7690f9b7bf3SMark Johnston zdom->uzd_nitems -= bucket->ub_cnt; 770c6fd3e23SJeff Roberson 771c6fd3e23SJeff Roberson /* 772c6fd3e23SJeff Roberson * Shift the bounds of the current WSS interval to avoid 773c6fd3e23SJeff Roberson * perturbing the estimate. 774c6fd3e23SJeff Roberson */ 775c6fd3e23SJeff Roberson if (reclaim) { 776c6fd3e23SJeff Roberson zdom->uzd_imin -= lmin(zdom->uzd_imin, bucket->ub_cnt); 777c6fd3e23SJeff Roberson zone_domain_imax_sub(zdom, bucket->ub_cnt); 778c6fd3e23SJeff Roberson } else if (zdom->uzd_imin > zdom->uzd_nitems) 7790f9b7bf3SMark Johnston zdom->uzd_imin = zdom->uzd_nitems; 780c6fd3e23SJeff Roberson 781c6fd3e23SJeff Roberson ZDOM_UNLOCK(zdom); 782d4665eaaSJeff Roberson if (dtor) 783d4665eaaSJeff Roberson for (i = 0; i < bucket->ub_cnt; i++) 784d4665eaaSJeff Roberson item_dtor(zone, bucket->ub_bucket[i], zone->uz_size, 785d4665eaaSJeff Roberson NULL, SKIP_NONE); 786d4665eaaSJeff Roberson 7870f9b7bf3SMark Johnston return (bucket); 7880f9b7bf3SMark Johnston } 7890f9b7bf3SMark Johnston 79008cfa56eSMark Johnston /* 79108cfa56eSMark Johnston * Insert a full bucket into the specified cache. The "ws" parameter indicates 79208cfa56eSMark Johnston * whether the bucket's contents should be counted as part of the zone's working 793c6fd3e23SJeff Roberson * set. The bucket may be freed if it exceeds the bucket limit. 79408cfa56eSMark Johnston */ 7950f9b7bf3SMark Johnston static void 796c6fd3e23SJeff Roberson zone_put_bucket(uma_zone_t zone, int domain, uma_bucket_t bucket, void *udata, 7970f9b7bf3SMark Johnston const bool ws) 7980f9b7bf3SMark Johnston { 799c6fd3e23SJeff Roberson uma_zone_domain_t zdom; 8000f9b7bf3SMark Johnston 801c6fd3e23SJeff Roberson /* We don't cache empty buckets. This can happen after a reclaim. */ 802c6fd3e23SJeff Roberson if (bucket->ub_cnt == 0) 803c6fd3e23SJeff Roberson goto out; 804c6fd3e23SJeff Roberson zdom = zone_domain_lock(zone, domain); 805c6fd3e23SJeff Roberson 806c6fd3e23SJeff Roberson /* 807c6fd3e23SJeff Roberson * Conditionally set the maximum number of items. 808c6fd3e23SJeff Roberson */ 8090f9b7bf3SMark Johnston zdom->uzd_nitems += bucket->ub_cnt; 810c6fd3e23SJeff Roberson if (__predict_true(zdom->uzd_nitems < zone->uz_bucket_max)) { 811c6fd3e23SJeff Roberson if (ws) 812c6fd3e23SJeff Roberson zone_domain_imax_set(zdom, zdom->uzd_nitems); 813c6fd3e23SJeff Roberson if (STAILQ_EMPTY(&zdom->uzd_buckets)) 814c6fd3e23SJeff Roberson zdom->uzd_seq = bucket->ub_seq; 8155afdf5c1SMark Johnston 8165afdf5c1SMark Johnston /* 8175afdf5c1SMark Johnston * Try to promote reuse of recently used items. For items 8185afdf5c1SMark Johnston * protected by SMR, try to defer reuse to minimize polling. 8195afdf5c1SMark Johnston */ 8205afdf5c1SMark Johnston if (bucket->ub_seq == SMR_SEQ_INVALID) 8215afdf5c1SMark Johnston STAILQ_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link); 8225afdf5c1SMark Johnston else 823c6fd3e23SJeff Roberson STAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link); 824c6fd3e23SJeff Roberson ZDOM_UNLOCK(zdom); 825c6fd3e23SJeff Roberson return; 826c6fd3e23SJeff Roberson } 827c6fd3e23SJeff Roberson zdom->uzd_nitems -= bucket->ub_cnt; 828c6fd3e23SJeff Roberson ZDOM_UNLOCK(zdom); 829c6fd3e23SJeff Roberson out: 830c6fd3e23SJeff Roberson bucket_free(zone, bucket, udata); 8310f9b7bf3SMark Johnston } 8320f9b7bf3SMark Johnston 833376b1ba3SJeff Roberson /* Pops an item out of a per-cpu cache bucket. */ 834376b1ba3SJeff Roberson static inline void * 835376b1ba3SJeff Roberson cache_bucket_pop(uma_cache_t cache, uma_cache_bucket_t bucket) 836376b1ba3SJeff Roberson { 837376b1ba3SJeff Roberson void *item; 838376b1ba3SJeff Roberson 839376b1ba3SJeff Roberson CRITICAL_ASSERT(curthread); 840376b1ba3SJeff Roberson 841376b1ba3SJeff Roberson bucket->ucb_cnt--; 842376b1ba3SJeff Roberson item = bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt]; 843376b1ba3SJeff Roberson #ifdef INVARIANTS 844376b1ba3SJeff Roberson bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = NULL; 845376b1ba3SJeff Roberson KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled.")); 846376b1ba3SJeff Roberson #endif 847376b1ba3SJeff Roberson cache->uc_allocs++; 848376b1ba3SJeff Roberson 849376b1ba3SJeff Roberson return (item); 850376b1ba3SJeff Roberson } 851376b1ba3SJeff Roberson 852376b1ba3SJeff Roberson /* Pushes an item into a per-cpu cache bucket. */ 853376b1ba3SJeff Roberson static inline void 854376b1ba3SJeff Roberson cache_bucket_push(uma_cache_t cache, uma_cache_bucket_t bucket, void *item) 855376b1ba3SJeff Roberson { 856376b1ba3SJeff Roberson 857376b1ba3SJeff Roberson CRITICAL_ASSERT(curthread); 858376b1ba3SJeff Roberson KASSERT(bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] == NULL, 859376b1ba3SJeff Roberson ("uma_zfree: Freeing to non free bucket index.")); 860376b1ba3SJeff Roberson 861376b1ba3SJeff Roberson bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = item; 862376b1ba3SJeff Roberson bucket->ucb_cnt++; 863376b1ba3SJeff Roberson cache->uc_frees++; 864376b1ba3SJeff Roberson } 865376b1ba3SJeff Roberson 866376b1ba3SJeff Roberson /* 867376b1ba3SJeff Roberson * Unload a UMA bucket from a per-cpu cache. 868376b1ba3SJeff Roberson */ 869376b1ba3SJeff Roberson static inline uma_bucket_t 870376b1ba3SJeff Roberson cache_bucket_unload(uma_cache_bucket_t bucket) 871376b1ba3SJeff Roberson { 872376b1ba3SJeff Roberson uma_bucket_t b; 873376b1ba3SJeff Roberson 874376b1ba3SJeff Roberson b = bucket->ucb_bucket; 875376b1ba3SJeff Roberson if (b != NULL) { 876376b1ba3SJeff Roberson MPASS(b->ub_entries == bucket->ucb_entries); 877376b1ba3SJeff Roberson b->ub_cnt = bucket->ucb_cnt; 878376b1ba3SJeff Roberson bucket->ucb_bucket = NULL; 879376b1ba3SJeff Roberson bucket->ucb_entries = bucket->ucb_cnt = 0; 880376b1ba3SJeff Roberson } 881376b1ba3SJeff Roberson 882376b1ba3SJeff Roberson return (b); 883376b1ba3SJeff Roberson } 884376b1ba3SJeff Roberson 885376b1ba3SJeff Roberson static inline uma_bucket_t 886376b1ba3SJeff Roberson cache_bucket_unload_alloc(uma_cache_t cache) 887376b1ba3SJeff Roberson { 888376b1ba3SJeff Roberson 889376b1ba3SJeff Roberson return (cache_bucket_unload(&cache->uc_allocbucket)); 890376b1ba3SJeff Roberson } 891376b1ba3SJeff Roberson 892376b1ba3SJeff Roberson static inline uma_bucket_t 893376b1ba3SJeff Roberson cache_bucket_unload_free(uma_cache_t cache) 894376b1ba3SJeff Roberson { 895376b1ba3SJeff Roberson 896376b1ba3SJeff Roberson return (cache_bucket_unload(&cache->uc_freebucket)); 897376b1ba3SJeff Roberson } 898376b1ba3SJeff Roberson 899376b1ba3SJeff Roberson static inline uma_bucket_t 900376b1ba3SJeff Roberson cache_bucket_unload_cross(uma_cache_t cache) 901376b1ba3SJeff Roberson { 902376b1ba3SJeff Roberson 903376b1ba3SJeff Roberson return (cache_bucket_unload(&cache->uc_crossbucket)); 904376b1ba3SJeff Roberson } 905376b1ba3SJeff Roberson 906376b1ba3SJeff Roberson /* 907376b1ba3SJeff Roberson * Load a bucket into a per-cpu cache bucket. 908376b1ba3SJeff Roberson */ 909376b1ba3SJeff Roberson static inline void 910376b1ba3SJeff Roberson cache_bucket_load(uma_cache_bucket_t bucket, uma_bucket_t b) 911376b1ba3SJeff Roberson { 912376b1ba3SJeff Roberson 913376b1ba3SJeff Roberson CRITICAL_ASSERT(curthread); 914376b1ba3SJeff Roberson MPASS(bucket->ucb_bucket == NULL); 915543117beSJeff Roberson MPASS(b->ub_seq == SMR_SEQ_INVALID); 916376b1ba3SJeff Roberson 917376b1ba3SJeff Roberson bucket->ucb_bucket = b; 918376b1ba3SJeff Roberson bucket->ucb_cnt = b->ub_cnt; 919376b1ba3SJeff Roberson bucket->ucb_entries = b->ub_entries; 920376b1ba3SJeff Roberson } 921376b1ba3SJeff Roberson 922376b1ba3SJeff Roberson static inline void 923376b1ba3SJeff Roberson cache_bucket_load_alloc(uma_cache_t cache, uma_bucket_t b) 924376b1ba3SJeff Roberson { 925376b1ba3SJeff Roberson 926376b1ba3SJeff Roberson cache_bucket_load(&cache->uc_allocbucket, b); 927376b1ba3SJeff Roberson } 928376b1ba3SJeff Roberson 929376b1ba3SJeff Roberson static inline void 930376b1ba3SJeff Roberson cache_bucket_load_free(uma_cache_t cache, uma_bucket_t b) 931376b1ba3SJeff Roberson { 932376b1ba3SJeff Roberson 933376b1ba3SJeff Roberson cache_bucket_load(&cache->uc_freebucket, b); 934376b1ba3SJeff Roberson } 935376b1ba3SJeff Roberson 936dfe13344SJeff Roberson #ifdef NUMA 937376b1ba3SJeff Roberson static inline void 938376b1ba3SJeff Roberson cache_bucket_load_cross(uma_cache_t cache, uma_bucket_t b) 939376b1ba3SJeff Roberson { 940376b1ba3SJeff Roberson 941376b1ba3SJeff Roberson cache_bucket_load(&cache->uc_crossbucket, b); 942376b1ba3SJeff Roberson } 943376b1ba3SJeff Roberson #endif 944376b1ba3SJeff Roberson 945376b1ba3SJeff Roberson /* 946376b1ba3SJeff Roberson * Copy and preserve ucb_spare. 947376b1ba3SJeff Roberson */ 948376b1ba3SJeff Roberson static inline void 949376b1ba3SJeff Roberson cache_bucket_copy(uma_cache_bucket_t b1, uma_cache_bucket_t b2) 950376b1ba3SJeff Roberson { 951376b1ba3SJeff Roberson 952376b1ba3SJeff Roberson b1->ucb_bucket = b2->ucb_bucket; 953376b1ba3SJeff Roberson b1->ucb_entries = b2->ucb_entries; 954376b1ba3SJeff Roberson b1->ucb_cnt = b2->ucb_cnt; 955376b1ba3SJeff Roberson } 956376b1ba3SJeff Roberson 957376b1ba3SJeff Roberson /* 958376b1ba3SJeff Roberson * Swap two cache buckets. 959376b1ba3SJeff Roberson */ 960376b1ba3SJeff Roberson static inline void 961376b1ba3SJeff Roberson cache_bucket_swap(uma_cache_bucket_t b1, uma_cache_bucket_t b2) 962376b1ba3SJeff Roberson { 963376b1ba3SJeff Roberson struct uma_cache_bucket b3; 964376b1ba3SJeff Roberson 965376b1ba3SJeff Roberson CRITICAL_ASSERT(curthread); 966376b1ba3SJeff Roberson 967376b1ba3SJeff Roberson cache_bucket_copy(&b3, b1); 968376b1ba3SJeff Roberson cache_bucket_copy(b1, b2); 969376b1ba3SJeff Roberson cache_bucket_copy(b2, &b3); 970376b1ba3SJeff Roberson } 971376b1ba3SJeff Roberson 972c6fd3e23SJeff Roberson /* 973c6fd3e23SJeff Roberson * Attempt to fetch a bucket from a zone on behalf of the current cpu cache. 974c6fd3e23SJeff Roberson */ 975c6fd3e23SJeff Roberson static uma_bucket_t 976c6fd3e23SJeff Roberson cache_fetch_bucket(uma_zone_t zone, uma_cache_t cache, int domain) 977c6fd3e23SJeff Roberson { 978c6fd3e23SJeff Roberson uma_zone_domain_t zdom; 979c6fd3e23SJeff Roberson uma_bucket_t bucket; 980c6fd3e23SJeff Roberson 981c6fd3e23SJeff Roberson /* 982c6fd3e23SJeff Roberson * Avoid the lock if possible. 983c6fd3e23SJeff Roberson */ 984c6fd3e23SJeff Roberson zdom = ZDOM_GET(zone, domain); 985c6fd3e23SJeff Roberson if (zdom->uzd_nitems == 0) 986c6fd3e23SJeff Roberson return (NULL); 987c6fd3e23SJeff Roberson 988c6fd3e23SJeff Roberson if ((cache_uz_flags(cache) & UMA_ZONE_SMR) != 0 && 989c6fd3e23SJeff Roberson !smr_poll(zone->uz_smr, zdom->uzd_seq, false)) 990c6fd3e23SJeff Roberson return (NULL); 991c6fd3e23SJeff Roberson 992c6fd3e23SJeff Roberson /* 993c6fd3e23SJeff Roberson * Check the zone's cache of buckets. 994c6fd3e23SJeff Roberson */ 995c6fd3e23SJeff Roberson zdom = zone_domain_lock(zone, domain); 99606d8bdcbSMark Johnston if ((bucket = zone_fetch_bucket(zone, zdom, false)) != NULL) 997c6fd3e23SJeff Roberson return (bucket); 998c6fd3e23SJeff Roberson ZDOM_UNLOCK(zdom); 999c6fd3e23SJeff Roberson 1000c6fd3e23SJeff Roberson return (NULL); 1001c6fd3e23SJeff Roberson } 1002c6fd3e23SJeff Roberson 10032f891cd5SPawel Jakub Dawidek static void 10042f891cd5SPawel Jakub Dawidek zone_log_warning(uma_zone_t zone) 10052f891cd5SPawel Jakub Dawidek { 10062f891cd5SPawel Jakub Dawidek static const struct timeval warninterval = { 300, 0 }; 10072f891cd5SPawel Jakub Dawidek 10082f891cd5SPawel Jakub Dawidek if (!zone_warnings || zone->uz_warning == NULL) 10092f891cd5SPawel Jakub Dawidek return; 10102f891cd5SPawel Jakub Dawidek 10112f891cd5SPawel Jakub Dawidek if (ratecheck(&zone->uz_ratecheck, &warninterval)) 10122f891cd5SPawel Jakub Dawidek printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning); 10132f891cd5SPawel Jakub Dawidek } 10142f891cd5SPawel Jakub Dawidek 101554503a13SJonathan T. Looney static inline void 101654503a13SJonathan T. Looney zone_maxaction(uma_zone_t zone) 101754503a13SJonathan T. Looney { 1018e60b2fcbSGleb Smirnoff 1019e60b2fcbSGleb Smirnoff if (zone->uz_maxaction.ta_func != NULL) 1020e60b2fcbSGleb Smirnoff taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction); 102154503a13SJonathan T. Looney } 102254503a13SJonathan T. Looney 10238355f576SJeff Roberson /* 10248355f576SJeff Roberson * Routine called by timeout which is used to fire off some time interval 10259643769aSJeff Roberson * based calculations. (stats, hash size, etc.) 10268355f576SJeff Roberson * 10278355f576SJeff Roberson * Arguments: 10288355f576SJeff Roberson * arg Unused 10298355f576SJeff Roberson * 10308355f576SJeff Roberson * Returns: 10318355f576SJeff Roberson * Nothing 10328355f576SJeff Roberson */ 10338355f576SJeff Roberson static void 10348355f576SJeff Roberson uma_timeout(void *unused) 10358355f576SJeff Roberson { 103686bbae32SJeff Roberson bucket_enable(); 103720a4e154SJeff Roberson zone_foreach(zone_timeout, NULL); 10388355f576SJeff Roberson 10398355f576SJeff Roberson /* Reschedule this event */ 10409643769aSJeff Roberson callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 10418355f576SJeff Roberson } 10428355f576SJeff Roberson 10438355f576SJeff Roberson /* 10440f9b7bf3SMark Johnston * Update the working set size estimate for the zone's bucket cache. 10450f9b7bf3SMark Johnston * The constants chosen here are somewhat arbitrary. With an update period of 10460f9b7bf3SMark Johnston * 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the 10470f9b7bf3SMark Johnston * last 100s. 10480f9b7bf3SMark Johnston */ 10490f9b7bf3SMark Johnston static void 10500f9b7bf3SMark Johnston zone_domain_update_wss(uma_zone_domain_t zdom) 10510f9b7bf3SMark Johnston { 10520f9b7bf3SMark Johnston long wss; 10530f9b7bf3SMark Johnston 1054c6fd3e23SJeff Roberson ZDOM_LOCK(zdom); 10550f9b7bf3SMark Johnston MPASS(zdom->uzd_imax >= zdom->uzd_imin); 10560f9b7bf3SMark Johnston wss = zdom->uzd_imax - zdom->uzd_imin; 10570f9b7bf3SMark Johnston zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems; 105808cfa56eSMark Johnston zdom->uzd_wss = (4 * wss + zdom->uzd_wss) / 5; 1059c6fd3e23SJeff Roberson ZDOM_UNLOCK(zdom); 10600f9b7bf3SMark Johnston } 10610f9b7bf3SMark Johnston 10620f9b7bf3SMark Johnston /* 10639643769aSJeff Roberson * Routine to perform timeout driven calculations. This expands the 10649643769aSJeff Roberson * hashes and does per cpu statistics aggregation. 10658355f576SJeff Roberson * 1066e20a199fSJeff Roberson * Returns nothing. 10678355f576SJeff Roberson */ 10688355f576SJeff Roberson static void 106920a4e154SJeff Roberson zone_timeout(uma_zone_t zone, void *unused) 10708355f576SJeff Roberson { 107108034d10SKonstantin Belousov uma_keg_t keg; 10728b987a77SJeff Roberson u_int slabs, pages; 10738355f576SJeff Roberson 107454c5ae80SRyan Libby if ((zone->uz_flags & UMA_ZFLAG_HASH) == 0) 107508034d10SKonstantin Belousov goto update_wss; 107608034d10SKonstantin Belousov 107708034d10SKonstantin Belousov keg = zone->uz_keg; 10788b987a77SJeff Roberson 10798b987a77SJeff Roberson /* 10808b987a77SJeff Roberson * Hash zones are non-numa by definition so the first domain 10818b987a77SJeff Roberson * is the only one present. 10828b987a77SJeff Roberson */ 10838b987a77SJeff Roberson KEG_LOCK(keg, 0); 10848b987a77SJeff Roberson pages = keg->uk_domain[0].ud_pages; 10858b987a77SJeff Roberson 10868355f576SJeff Roberson /* 1087e20a199fSJeff Roberson * Expand the keg hash table. 10888355f576SJeff Roberson * 10898355f576SJeff Roberson * This is done if the number of slabs is larger than the hash size. 10908355f576SJeff Roberson * What I'm trying to do here is completely reduce collisions. This 10918355f576SJeff Roberson * may be a little aggressive. Should I allow for two collisions max? 10928355f576SJeff Roberson */ 10938b987a77SJeff Roberson if ((slabs = pages / keg->uk_ppera) > keg->uk_hash.uh_hashsize) { 10940aef6126SJeff Roberson struct uma_hash newhash; 10950aef6126SJeff Roberson struct uma_hash oldhash; 10960aef6126SJeff Roberson int ret; 10975300d9ddSJeff Roberson 10980aef6126SJeff Roberson /* 10990aef6126SJeff Roberson * This is so involved because allocating and freeing 1100e20a199fSJeff Roberson * while the keg lock is held will lead to deadlock. 11010aef6126SJeff Roberson * I have to do everything in stages and check for 11020aef6126SJeff Roberson * races. 11030aef6126SJeff Roberson */ 11048b987a77SJeff Roberson KEG_UNLOCK(keg, 0); 11053b2f2cb8SAlexander Motin ret = hash_alloc(&newhash, 1 << fls(slabs)); 11068b987a77SJeff Roberson KEG_LOCK(keg, 0); 11070aef6126SJeff Roberson if (ret) { 1108099a0e58SBosko Milekic if (hash_expand(&keg->uk_hash, &newhash)) { 1109099a0e58SBosko Milekic oldhash = keg->uk_hash; 1110099a0e58SBosko Milekic keg->uk_hash = newhash; 11110aef6126SJeff Roberson } else 11120aef6126SJeff Roberson oldhash = newhash; 11130aef6126SJeff Roberson 11148b987a77SJeff Roberson KEG_UNLOCK(keg, 0); 11150aef6126SJeff Roberson hash_free(&oldhash); 11168b987a77SJeff Roberson goto update_wss; 11170aef6126SJeff Roberson } 11185300d9ddSJeff Roberson } 11198b987a77SJeff Roberson KEG_UNLOCK(keg, 0); 1120e20a199fSJeff Roberson 112108034d10SKonstantin Belousov update_wss: 1122bb15d1c7SGleb Smirnoff for (int i = 0; i < vm_ndomains; i++) 1123c6fd3e23SJeff Roberson zone_domain_update_wss(ZDOM_GET(zone, i)); 11248355f576SJeff Roberson } 11258355f576SJeff Roberson 11268355f576SJeff Roberson /* 11275300d9ddSJeff Roberson * Allocate and zero fill the next sized hash table from the appropriate 11285300d9ddSJeff Roberson * backing store. 11295300d9ddSJeff Roberson * 11305300d9ddSJeff Roberson * Arguments: 11310aef6126SJeff Roberson * hash A new hash structure with the old hash size in uh_hashsize 11325300d9ddSJeff Roberson * 11335300d9ddSJeff Roberson * Returns: 1134763df3ecSPedro F. Giffuni * 1 on success and 0 on failure. 11355300d9ddSJeff Roberson */ 113637c84183SPoul-Henning Kamp static int 11373b2f2cb8SAlexander Motin hash_alloc(struct uma_hash *hash, u_int size) 11385300d9ddSJeff Roberson { 113959568a0eSAlexander Motin size_t alloc; 11405300d9ddSJeff Roberson 11413b2f2cb8SAlexander Motin KASSERT(powerof2(size), ("hash size must be power of 2")); 11423b2f2cb8SAlexander Motin if (size > UMA_HASH_SIZE_INIT) { 11433b2f2cb8SAlexander Motin hash->uh_hashsize = size; 11440aef6126SJeff Roberson alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 11451e0701e1SJeff Roberson hash->uh_slab_hash = malloc(alloc, M_UMAHASH, M_NOWAIT); 11465300d9ddSJeff Roberson } else { 11470aef6126SJeff Roberson alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 1148e20a199fSJeff Roberson hash->uh_slab_hash = zone_alloc_item(hashzone, NULL, 1149ab3185d1SJeff Roberson UMA_ANYDOMAIN, M_WAITOK); 11500aef6126SJeff Roberson hash->uh_hashsize = UMA_HASH_SIZE_INIT; 11515300d9ddSJeff Roberson } 11520aef6126SJeff Roberson if (hash->uh_slab_hash) { 11530aef6126SJeff Roberson bzero(hash->uh_slab_hash, alloc); 11540aef6126SJeff Roberson hash->uh_hashmask = hash->uh_hashsize - 1; 11550aef6126SJeff Roberson return (1); 11560aef6126SJeff Roberson } 11575300d9ddSJeff Roberson 11580aef6126SJeff Roberson return (0); 11595300d9ddSJeff Roberson } 11605300d9ddSJeff Roberson 11615300d9ddSJeff Roberson /* 116264f051e9SJeff Roberson * Expands the hash table for HASH zones. This is done from zone_timeout 116364f051e9SJeff Roberson * to reduce collisions. This must not be done in the regular allocation 116464f051e9SJeff Roberson * path, otherwise, we can recurse on the vm while allocating pages. 11658355f576SJeff Roberson * 11668355f576SJeff Roberson * Arguments: 11670aef6126SJeff Roberson * oldhash The hash you want to expand 11680aef6126SJeff Roberson * newhash The hash structure for the new table 11698355f576SJeff Roberson * 11708355f576SJeff Roberson * Returns: 11718355f576SJeff Roberson * Nothing 11728355f576SJeff Roberson * 11738355f576SJeff Roberson * Discussion: 11748355f576SJeff Roberson */ 11750aef6126SJeff Roberson static int 11760aef6126SJeff Roberson hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) 11778355f576SJeff Roberson { 11781e0701e1SJeff Roberson uma_hash_slab_t slab; 11796929b7d1SPedro F. Giffuni u_int hval; 11806929b7d1SPedro F. Giffuni u_int idx; 11818355f576SJeff Roberson 11820aef6126SJeff Roberson if (!newhash->uh_slab_hash) 11830aef6126SJeff Roberson return (0); 11848355f576SJeff Roberson 11850aef6126SJeff Roberson if (oldhash->uh_hashsize >= newhash->uh_hashsize) 11860aef6126SJeff Roberson return (0); 11878355f576SJeff Roberson 11888355f576SJeff Roberson /* 11898355f576SJeff Roberson * I need to investigate hash algorithms for resizing without a 11908355f576SJeff Roberson * full rehash. 11918355f576SJeff Roberson */ 11928355f576SJeff Roberson 11936929b7d1SPedro F. Giffuni for (idx = 0; idx < oldhash->uh_hashsize; idx++) 11941e0701e1SJeff Roberson while (!LIST_EMPTY(&oldhash->uh_slab_hash[idx])) { 11951e0701e1SJeff Roberson slab = LIST_FIRST(&oldhash->uh_slab_hash[idx]); 11961e0701e1SJeff Roberson LIST_REMOVE(slab, uhs_hlink); 11971e0701e1SJeff Roberson hval = UMA_HASH(newhash, slab->uhs_data); 11981e0701e1SJeff Roberson LIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 11991e0701e1SJeff Roberson slab, uhs_hlink); 12008355f576SJeff Roberson } 12018355f576SJeff Roberson 12020aef6126SJeff Roberson return (1); 12039c2cd7e5SJeff Roberson } 12049c2cd7e5SJeff Roberson 12055300d9ddSJeff Roberson /* 12065300d9ddSJeff Roberson * Free the hash bucket to the appropriate backing store. 12075300d9ddSJeff Roberson * 12085300d9ddSJeff Roberson * Arguments: 12095300d9ddSJeff Roberson * slab_hash The hash bucket we're freeing 12105300d9ddSJeff Roberson * hashsize The number of entries in that hash bucket 12115300d9ddSJeff Roberson * 12125300d9ddSJeff Roberson * Returns: 12135300d9ddSJeff Roberson * Nothing 12145300d9ddSJeff Roberson */ 12159c2cd7e5SJeff Roberson static void 12160aef6126SJeff Roberson hash_free(struct uma_hash *hash) 12179c2cd7e5SJeff Roberson { 12180aef6126SJeff Roberson if (hash->uh_slab_hash == NULL) 12190aef6126SJeff Roberson return; 12200aef6126SJeff Roberson if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 12210095a784SJeff Roberson zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE); 12228355f576SJeff Roberson else 1223961647dfSJeff Roberson free(hash->uh_slab_hash, M_UMAHASH); 12248355f576SJeff Roberson } 12258355f576SJeff Roberson 12268355f576SJeff Roberson /* 12278355f576SJeff Roberson * Frees all outstanding items in a bucket 12288355f576SJeff Roberson * 12298355f576SJeff Roberson * Arguments: 12308355f576SJeff Roberson * zone The zone to free to, must be unlocked. 12314bd61e19SJeff Roberson * bucket The free/alloc bucket with items. 12328355f576SJeff Roberson * 12338355f576SJeff Roberson * Returns: 12348355f576SJeff Roberson * Nothing 12358355f576SJeff Roberson */ 12368355f576SJeff Roberson static void 12378355f576SJeff Roberson bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 12388355f576SJeff Roberson { 12390095a784SJeff Roberson int i; 12408355f576SJeff Roberson 1241c6fd3e23SJeff Roberson if (bucket->ub_cnt == 0) 12428355f576SJeff Roberson return; 12438355f576SJeff Roberson 1244d4665eaaSJeff Roberson if ((zone->uz_flags & UMA_ZONE_SMR) != 0 && 1245d4665eaaSJeff Roberson bucket->ub_seq != SMR_SEQ_INVALID) { 1246d4665eaaSJeff Roberson smr_wait(zone->uz_smr, bucket->ub_seq); 1247543117beSJeff Roberson bucket->ub_seq = SMR_SEQ_INVALID; 1248d4665eaaSJeff Roberson for (i = 0; i < bucket->ub_cnt; i++) 1249d4665eaaSJeff Roberson item_dtor(zone, bucket->ub_bucket[i], 1250d4665eaaSJeff Roberson zone->uz_size, NULL, SKIP_NONE); 1251d4665eaaSJeff Roberson } 12520095a784SJeff Roberson if (zone->uz_fini) 125309c8cb71SMark Johnston for (i = 0; i < bucket->ub_cnt; i++) { 125409c8cb71SMark Johnston kasan_mark_item_valid(zone, bucket->ub_bucket[i]); 12550095a784SJeff Roberson zone->uz_fini(bucket->ub_bucket[i], zone->uz_size); 125609c8cb71SMark Johnston kasan_mark_item_invalid(zone, bucket->ub_bucket[i]); 125709c8cb71SMark Johnston } 12580095a784SJeff Roberson zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt); 12594bd61e19SJeff Roberson if (zone->uz_max_items > 0) 12604bd61e19SJeff Roberson zone_free_limit(zone, bucket->ub_cnt); 1261d4665eaaSJeff Roberson #ifdef INVARIANTS 1262d4665eaaSJeff Roberson bzero(bucket->ub_bucket, sizeof(void *) * bucket->ub_cnt); 1263d4665eaaSJeff Roberson #endif 12640095a784SJeff Roberson bucket->ub_cnt = 0; 12658355f576SJeff Roberson } 12668355f576SJeff Roberson 12678355f576SJeff Roberson /* 12688355f576SJeff Roberson * Drains the per cpu caches for a zone. 12698355f576SJeff Roberson * 1270727c6918SJeff Roberson * NOTE: This may only be called while the zone is being torn down, and not 12715d1ae027SRobert Watson * during normal operation. This is necessary in order that we do not have 12725d1ae027SRobert Watson * to migrate CPUs to drain the per-CPU caches. 12735d1ae027SRobert Watson * 12748355f576SJeff Roberson * Arguments: 12758355f576SJeff Roberson * zone The zone to drain, must be unlocked. 12768355f576SJeff Roberson * 12778355f576SJeff Roberson * Returns: 12788355f576SJeff Roberson * Nothing 12798355f576SJeff Roberson */ 12808355f576SJeff Roberson static void 12819643769aSJeff Roberson cache_drain(uma_zone_t zone) 12828355f576SJeff Roberson { 12838355f576SJeff Roberson uma_cache_t cache; 1284376b1ba3SJeff Roberson uma_bucket_t bucket; 1285543117beSJeff Roberson smr_seq_t seq; 12868355f576SJeff Roberson int cpu; 12878355f576SJeff Roberson 12888355f576SJeff Roberson /* 12895d1ae027SRobert Watson * XXX: It is safe to not lock the per-CPU caches, because we're 12905d1ae027SRobert Watson * tearing down the zone anyway. I.e., there will be no further use 12915d1ae027SRobert Watson * of the caches at this point. 12925d1ae027SRobert Watson * 12935d1ae027SRobert Watson * XXX: It would good to be able to assert that the zone is being 12945d1ae027SRobert Watson * torn down to prevent improper use of cache_drain(). 12958355f576SJeff Roberson */ 1296543117beSJeff Roberson seq = SMR_SEQ_INVALID; 1297543117beSJeff Roberson if ((zone->uz_flags & UMA_ZONE_SMR) != 0) 1298226dd6dbSJeff Roberson seq = smr_advance(zone->uz_smr); 12993aa6d94eSJohn Baldwin CPU_FOREACH(cpu) { 13008355f576SJeff Roberson cache = &zone->uz_cpu[cpu]; 1301376b1ba3SJeff Roberson bucket = cache_bucket_unload_alloc(cache); 1302c6fd3e23SJeff Roberson if (bucket != NULL) 1303376b1ba3SJeff Roberson bucket_free(zone, bucket, NULL); 1304376b1ba3SJeff Roberson bucket = cache_bucket_unload_free(cache); 1305376b1ba3SJeff Roberson if (bucket != NULL) { 1306543117beSJeff Roberson bucket->ub_seq = seq; 1307376b1ba3SJeff Roberson bucket_free(zone, bucket, NULL); 1308376b1ba3SJeff Roberson } 1309376b1ba3SJeff Roberson bucket = cache_bucket_unload_cross(cache); 1310376b1ba3SJeff Roberson if (bucket != NULL) { 1311543117beSJeff Roberson bucket->ub_seq = seq; 1312376b1ba3SJeff Roberson bucket_free(zone, bucket, NULL); 1313376b1ba3SJeff Roberson } 1314d56368d7SBosko Milekic } 1315*aabe13f1SMark Johnston bucket_cache_reclaim(zone, true, UMA_ANYDOMAIN); 1316aaa8bb16SJeff Roberson } 1317aaa8bb16SJeff Roberson 1318a2de44abSAlexander Motin static void 131920a4e154SJeff Roberson cache_shrink(uma_zone_t zone, void *unused) 1320a2de44abSAlexander Motin { 1321a2de44abSAlexander Motin 1322a2de44abSAlexander Motin if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 1323a2de44abSAlexander Motin return; 1324a2de44abSAlexander Motin 1325*aabe13f1SMark Johnston ZONE_LOCK(zone); 132620a4e154SJeff Roberson zone->uz_bucket_size = 132720a4e154SJeff Roberson (zone->uz_bucket_size_min + zone->uz_bucket_size) / 2; 1328*aabe13f1SMark Johnston ZONE_UNLOCK(zone); 1329a2de44abSAlexander Motin } 1330a2de44abSAlexander Motin 1331a2de44abSAlexander Motin static void 133220a4e154SJeff Roberson cache_drain_safe_cpu(uma_zone_t zone, void *unused) 1333a2de44abSAlexander Motin { 1334a2de44abSAlexander Motin uma_cache_t cache; 1335c1685086SJeff Roberson uma_bucket_t b1, b2, b3; 1336ab3185d1SJeff Roberson int domain; 1337a2de44abSAlexander Motin 1338a2de44abSAlexander Motin if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 1339a2de44abSAlexander Motin return; 1340a2de44abSAlexander Motin 1341c1685086SJeff Roberson b1 = b2 = b3 = NULL; 1342a2de44abSAlexander Motin critical_enter(); 1343a2de44abSAlexander Motin cache = &zone->uz_cpu[curcpu]; 1344c6fd3e23SJeff Roberson domain = PCPU_GET(domain); 1345376b1ba3SJeff Roberson b1 = cache_bucket_unload_alloc(cache); 1346d4665eaaSJeff Roberson 1347d4665eaaSJeff Roberson /* 1348d4665eaaSJeff Roberson * Don't flush SMR zone buckets. This leaves the zone without a 1349d4665eaaSJeff Roberson * bucket and forces every free to synchronize(). 1350d4665eaaSJeff Roberson */ 1351543117beSJeff Roberson if ((zone->uz_flags & UMA_ZONE_SMR) == 0) { 1352376b1ba3SJeff Roberson b2 = cache_bucket_unload_free(cache); 1353543117beSJeff Roberson b3 = cache_bucket_unload_cross(cache); 1354543117beSJeff Roberson } 1355543117beSJeff Roberson critical_exit(); 1356543117beSJeff Roberson 1357543117beSJeff Roberson if (b1 != NULL) 1358c6fd3e23SJeff Roberson zone_free_bucket(zone, b1, NULL, domain, false); 1359543117beSJeff Roberson if (b2 != NULL) 1360c6fd3e23SJeff Roberson zone_free_bucket(zone, b2, NULL, domain, false); 1361543117beSJeff Roberson if (b3 != NULL) { 1362c6fd3e23SJeff Roberson /* Adjust the domain so it goes to zone_free_cross. */ 1363c6fd3e23SJeff Roberson domain = (domain + 1) % vm_ndomains; 1364c6fd3e23SJeff Roberson zone_free_bucket(zone, b3, NULL, domain, false); 1365c1685086SJeff Roberson } 1366a2de44abSAlexander Motin } 1367a2de44abSAlexander Motin 1368a2de44abSAlexander Motin /* 1369a2de44abSAlexander Motin * Safely drain per-CPU caches of a zone(s) to alloc bucket. 1370a2de44abSAlexander Motin * This is an expensive call because it needs to bind to all CPUs 1371a2de44abSAlexander Motin * one by one and enter a critical section on each of them in order 1372a2de44abSAlexander Motin * to safely access their cache buckets. 1373a2de44abSAlexander Motin * Zone lock must not be held on call this function. 1374a2de44abSAlexander Motin */ 1375a2de44abSAlexander Motin static void 137608cfa56eSMark Johnston pcpu_cache_drain_safe(uma_zone_t zone) 1377a2de44abSAlexander Motin { 1378a2de44abSAlexander Motin int cpu; 1379a2de44abSAlexander Motin 1380a2de44abSAlexander Motin /* 1381727c6918SJeff Roberson * Polite bucket sizes shrinking was not enough, shrink aggressively. 1382a2de44abSAlexander Motin */ 1383a2de44abSAlexander Motin if (zone) 138420a4e154SJeff Roberson cache_shrink(zone, NULL); 1385a2de44abSAlexander Motin else 138620a4e154SJeff Roberson zone_foreach(cache_shrink, NULL); 1387a2de44abSAlexander Motin 1388a2de44abSAlexander Motin CPU_FOREACH(cpu) { 1389a2de44abSAlexander Motin thread_lock(curthread); 1390a2de44abSAlexander Motin sched_bind(curthread, cpu); 1391a2de44abSAlexander Motin thread_unlock(curthread); 1392a2de44abSAlexander Motin 1393a2de44abSAlexander Motin if (zone) 139420a4e154SJeff Roberson cache_drain_safe_cpu(zone, NULL); 1395a2de44abSAlexander Motin else 139620a4e154SJeff Roberson zone_foreach(cache_drain_safe_cpu, NULL); 1397a2de44abSAlexander Motin } 1398a2de44abSAlexander Motin thread_lock(curthread); 1399a2de44abSAlexander Motin sched_unbind(curthread); 1400a2de44abSAlexander Motin thread_unlock(curthread); 1401a2de44abSAlexander Motin } 1402a2de44abSAlexander Motin 1403aaa8bb16SJeff Roberson /* 140408cfa56eSMark Johnston * Reclaim cached buckets from a zone. All buckets are reclaimed if the caller 140508cfa56eSMark Johnston * requested a drain, otherwise the per-domain caches are trimmed to either 140608cfa56eSMark Johnston * estimated working set size. 1407aaa8bb16SJeff Roberson */ 1408aaa8bb16SJeff Roberson static void 140954f421f9SMark Johnston bucket_cache_reclaim_domain(uma_zone_t zone, bool drain, int domain) 1410aaa8bb16SJeff Roberson { 1411ab3185d1SJeff Roberson uma_zone_domain_t zdom; 1412aaa8bb16SJeff Roberson uma_bucket_t bucket; 1413c6fd3e23SJeff Roberson long target; 14148355f576SJeff Roberson 1415c6fd3e23SJeff Roberson /* 141691d947bfSJeff Roberson * The cross bucket is partially filled and not part of 141791d947bfSJeff Roberson * the item count. Reclaim it individually here. 141891d947bfSJeff Roberson */ 141954f421f9SMark Johnston zdom = ZDOM_GET(zone, domain); 1420226dd6dbSJeff Roberson if ((zone->uz_flags & UMA_ZONE_SMR) == 0 || drain) { 142191d947bfSJeff Roberson ZONE_CROSS_LOCK(zone); 142291d947bfSJeff Roberson bucket = zdom->uzd_cross; 142391d947bfSJeff Roberson zdom->uzd_cross = NULL; 142491d947bfSJeff Roberson ZONE_CROSS_UNLOCK(zone); 1425c6fd3e23SJeff Roberson if (bucket != NULL) 142691d947bfSJeff Roberson bucket_free(zone, bucket, NULL); 142791d947bfSJeff Roberson } 142891d947bfSJeff Roberson 142991d947bfSJeff Roberson /* 143008cfa56eSMark Johnston * If we were asked to drain the zone, we are done only once 143108cfa56eSMark Johnston * this bucket cache is empty. Otherwise, we reclaim items in 143208cfa56eSMark Johnston * excess of the zone's estimated working set size. If the 143308cfa56eSMark Johnston * difference nitems - imin is larger than the WSS estimate, 143408cfa56eSMark Johnston * then the estimate will grow at the end of this interval and 143508cfa56eSMark Johnston * we ignore the historical average. 143608cfa56eSMark Johnston */ 1437c6fd3e23SJeff Roberson ZDOM_LOCK(zdom); 143808cfa56eSMark Johnston target = drain ? 0 : lmax(zdom->uzd_wss, zdom->uzd_nitems - 143908cfa56eSMark Johnston zdom->uzd_imin); 144008cfa56eSMark Johnston while (zdom->uzd_nitems > target) { 1441c6fd3e23SJeff Roberson bucket = zone_fetch_bucket(zone, zdom, true); 144208cfa56eSMark Johnston if (bucket == NULL) 144308cfa56eSMark Johnston break; 14446fd34d6fSJeff Roberson bucket_free(zone, bucket, NULL); 1445c6fd3e23SJeff Roberson ZDOM_LOCK(zdom); 14468355f576SJeff Roberson } 1447c6fd3e23SJeff Roberson ZDOM_UNLOCK(zdom); 1448ab3185d1SJeff Roberson } 144954f421f9SMark Johnston 145054f421f9SMark Johnston static void 1451*aabe13f1SMark Johnston bucket_cache_reclaim(uma_zone_t zone, bool drain, int domain) 145254f421f9SMark Johnston { 145354f421f9SMark Johnston int i; 145454f421f9SMark Johnston 145554f421f9SMark Johnston /* 145654f421f9SMark Johnston * Shrink the zone bucket size to ensure that the per-CPU caches 145754f421f9SMark Johnston * don't grow too large. 145854f421f9SMark Johnston */ 145954f421f9SMark Johnston if (zone->uz_bucket_size > zone->uz_bucket_size_min) 146054f421f9SMark Johnston zone->uz_bucket_size--; 146154f421f9SMark Johnston 1462*aabe13f1SMark Johnston if (domain != UMA_ANYDOMAIN && 1463*aabe13f1SMark Johnston (zone->uz_flags & UMA_ZONE_ROUNDROBIN) == 0) { 1464*aabe13f1SMark Johnston bucket_cache_reclaim_domain(zone, drain, domain); 1465*aabe13f1SMark Johnston } else { 146654f421f9SMark Johnston for (i = 0; i < vm_ndomains; i++) 146754f421f9SMark Johnston bucket_cache_reclaim_domain(zone, drain, i); 14688355f576SJeff Roberson } 1469*aabe13f1SMark Johnston } 1470fc03d22bSJeff Roberson 1471fc03d22bSJeff Roberson static void 1472fc03d22bSJeff Roberson keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start) 1473fc03d22bSJeff Roberson { 1474fc03d22bSJeff Roberson uint8_t *mem; 147509c8cb71SMark Johnston size_t size; 1476fc03d22bSJeff Roberson int i; 1477fc03d22bSJeff Roberson uint8_t flags; 1478fc03d22bSJeff Roberson 14791431a748SGleb Smirnoff CTR4(KTR_UMA, "keg_free_slab keg %s(%p) slab %p, returning %d bytes", 14801431a748SGleb Smirnoff keg->uk_name, keg, slab, PAGE_SIZE * keg->uk_ppera); 14811431a748SGleb Smirnoff 14821e0701e1SJeff Roberson mem = slab_data(slab, keg); 148309c8cb71SMark Johnston size = PAGE_SIZE * keg->uk_ppera; 148409c8cb71SMark Johnston 148509c8cb71SMark Johnston kasan_mark_slab_valid(keg, mem); 1486fc03d22bSJeff Roberson if (keg->uk_fini != NULL) { 148709c8cb71SMark Johnston for (i = start - 1; i > -1; i--) 1488c5deaf04SGleb Smirnoff #ifdef INVARIANTS 1489c5deaf04SGleb Smirnoff /* 1490c5deaf04SGleb Smirnoff * trash_fini implies that dtor was trash_dtor. trash_fini 1491c5deaf04SGleb Smirnoff * would check that memory hasn't been modified since free, 1492c5deaf04SGleb Smirnoff * which executed trash_dtor. 1493c5deaf04SGleb Smirnoff * That's why we need to run uma_dbg_kskip() check here, 1494c5deaf04SGleb Smirnoff * albeit we don't make skip check for other init/fini 1495c5deaf04SGleb Smirnoff * invocations. 1496c5deaf04SGleb Smirnoff */ 14971e0701e1SJeff Roberson if (!uma_dbg_kskip(keg, slab_item(slab, keg, i)) || 1498c5deaf04SGleb Smirnoff keg->uk_fini != trash_fini) 1499c5deaf04SGleb Smirnoff #endif 15001e0701e1SJeff Roberson keg->uk_fini(slab_item(slab, keg, i), keg->uk_size); 1501fc03d22bSJeff Roberson } 150209c8cb71SMark Johnston flags = slab->us_flags; 150309c8cb71SMark Johnston if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) { 15049b8db4d0SRyan Libby zone_free_item(slabzone(keg->uk_ipers), slab_tohashslab(slab), 15059b8db4d0SRyan Libby NULL, SKIP_NONE); 150609c8cb71SMark Johnston } 150709c8cb71SMark Johnston keg->uk_freef(mem, size, flags); 150809c8cb71SMark Johnston uma_total_dec(size); 15098355f576SJeff Roberson } 15108355f576SJeff Roberson 1511f09cbea3SMark Johnston static void 1512f09cbea3SMark Johnston keg_drain_domain(uma_keg_t keg, int domain) 1513f09cbea3SMark Johnston { 1514f09cbea3SMark Johnston struct slabhead freeslabs; 1515f09cbea3SMark Johnston uma_domain_t dom; 1516f09cbea3SMark Johnston uma_slab_t slab, tmp; 1517f09cbea3SMark Johnston uint32_t i, stofree, stokeep, partial; 1518f09cbea3SMark Johnston 1519f09cbea3SMark Johnston dom = &keg->uk_domain[domain]; 1520f09cbea3SMark Johnston LIST_INIT(&freeslabs); 1521f09cbea3SMark Johnston 1522f09cbea3SMark Johnston CTR4(KTR_UMA, "keg_drain %s(%p) domain %d free items: %u", 1523575a4437SEd Maste keg->uk_name, keg, domain, dom->ud_free_items); 1524f09cbea3SMark Johnston 1525f09cbea3SMark Johnston KEG_LOCK(keg, domain); 1526f09cbea3SMark Johnston 1527f09cbea3SMark Johnston /* 1528f09cbea3SMark Johnston * Are the free items in partially allocated slabs sufficient to meet 1529f09cbea3SMark Johnston * the reserve? If not, compute the number of fully free slabs that must 1530f09cbea3SMark Johnston * be kept. 1531f09cbea3SMark Johnston */ 1532f09cbea3SMark Johnston partial = dom->ud_free_items - dom->ud_free_slabs * keg->uk_ipers; 1533f09cbea3SMark Johnston if (partial < keg->uk_reserve) { 1534f09cbea3SMark Johnston stokeep = min(dom->ud_free_slabs, 1535f09cbea3SMark Johnston howmany(keg->uk_reserve - partial, keg->uk_ipers)); 1536f09cbea3SMark Johnston } else { 1537f09cbea3SMark Johnston stokeep = 0; 1538f09cbea3SMark Johnston } 1539f09cbea3SMark Johnston stofree = dom->ud_free_slabs - stokeep; 1540f09cbea3SMark Johnston 1541f09cbea3SMark Johnston /* 1542f09cbea3SMark Johnston * Partition the free slabs into two sets: those that must be kept in 1543f09cbea3SMark Johnston * order to maintain the reserve, and those that may be released back to 1544f09cbea3SMark Johnston * the system. Since one set may be much larger than the other, 1545f09cbea3SMark Johnston * populate the smaller of the two sets and swap them if necessary. 1546f09cbea3SMark Johnston */ 1547f09cbea3SMark Johnston for (i = min(stofree, stokeep); i > 0; i--) { 1548f09cbea3SMark Johnston slab = LIST_FIRST(&dom->ud_free_slab); 1549f09cbea3SMark Johnston LIST_REMOVE(slab, us_link); 1550f09cbea3SMark Johnston LIST_INSERT_HEAD(&freeslabs, slab, us_link); 1551f09cbea3SMark Johnston } 1552f09cbea3SMark Johnston if (stofree > stokeep) 1553f09cbea3SMark Johnston LIST_SWAP(&freeslabs, &dom->ud_free_slab, uma_slab, us_link); 1554f09cbea3SMark Johnston 1555f09cbea3SMark Johnston if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0) { 1556f09cbea3SMark Johnston LIST_FOREACH(slab, &freeslabs, us_link) 1557f09cbea3SMark Johnston UMA_HASH_REMOVE(&keg->uk_hash, slab); 1558f09cbea3SMark Johnston } 1559f09cbea3SMark Johnston dom->ud_free_items -= stofree * keg->uk_ipers; 1560f09cbea3SMark Johnston dom->ud_free_slabs -= stofree; 1561f09cbea3SMark Johnston dom->ud_pages -= stofree * keg->uk_ppera; 1562f09cbea3SMark Johnston KEG_UNLOCK(keg, domain); 1563f09cbea3SMark Johnston 1564f09cbea3SMark Johnston LIST_FOREACH_SAFE(slab, &freeslabs, us_link, tmp) 1565f09cbea3SMark Johnston keg_free_slab(keg, slab, keg->uk_ipers); 1566f09cbea3SMark Johnston } 1567f09cbea3SMark Johnston 15688355f576SJeff Roberson /* 1569e20a199fSJeff Roberson * Frees pages from a keg back to the system. This is done on demand from 15708355f576SJeff Roberson * the pageout daemon. 15718355f576SJeff Roberson * 1572e20a199fSJeff Roberson * Returns nothing. 15738355f576SJeff Roberson */ 1574e20a199fSJeff Roberson static void 1575*aabe13f1SMark Johnston keg_drain(uma_keg_t keg, int domain) 15768355f576SJeff Roberson { 1577f09cbea3SMark Johnston int i; 15788355f576SJeff Roberson 1579f09cbea3SMark Johnston if ((keg->uk_flags & UMA_ZONE_NOFREE) != 0) 15808355f576SJeff Roberson return; 1581*aabe13f1SMark Johnston if (domain != UMA_ANYDOMAIN) { 1582*aabe13f1SMark Johnston keg_drain_domain(keg, domain); 1583*aabe13f1SMark Johnston } else { 1584f09cbea3SMark Johnston for (i = 0; i < vm_ndomains; i++) 1585f09cbea3SMark Johnston keg_drain_domain(keg, i); 15868355f576SJeff Roberson } 1587*aabe13f1SMark Johnston } 15888355f576SJeff Roberson 1589e20a199fSJeff Roberson static void 1590*aabe13f1SMark Johnston zone_reclaim(uma_zone_t zone, int domain, int waitok, bool drain) 1591e20a199fSJeff Roberson { 15928355f576SJeff Roberson /* 1593*aabe13f1SMark Johnston * Count active reclaim operations in order to interlock with 1594*aabe13f1SMark Johnston * zone_dtor(), which removes the zone from global lists before 1595*aabe13f1SMark Johnston * attempting to reclaim items itself. 1596*aabe13f1SMark Johnston * 1597*aabe13f1SMark Johnston * The zone may be destroyed while sleeping, so only zone_dtor() should 1598*aabe13f1SMark Johnston * specify M_WAITOK. 1599e20a199fSJeff Roberson */ 1600e20a199fSJeff Roberson ZONE_LOCK(zone); 1601*aabe13f1SMark Johnston if (waitok == M_WAITOK) { 1602*aabe13f1SMark Johnston while (zone->uz_reclaimers > 0) 1603*aabe13f1SMark Johnston msleep(zone, ZONE_LOCKPTR(zone), PVM, "zonedrain", 1); 1604e20a199fSJeff Roberson } 1605*aabe13f1SMark Johnston zone->uz_reclaimers++; 1606e20a199fSJeff Roberson ZONE_UNLOCK(zone); 1607*aabe13f1SMark Johnston bucket_cache_reclaim(zone, drain, domain); 160808cfa56eSMark Johnston 160908034d10SKonstantin Belousov if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0) 1610*aabe13f1SMark Johnston keg_drain(zone->uz_keg, domain); 1611e20a199fSJeff Roberson ZONE_LOCK(zone); 1612*aabe13f1SMark Johnston zone->uz_reclaimers--; 1613*aabe13f1SMark Johnston if (zone->uz_reclaimers == 0) 1614e20a199fSJeff Roberson wakeup(zone); 1615e20a199fSJeff Roberson ZONE_UNLOCK(zone); 1616e20a199fSJeff Roberson } 1617e20a199fSJeff Roberson 161808cfa56eSMark Johnston static void 1619*aabe13f1SMark Johnston zone_drain(uma_zone_t zone, void *arg) 1620e20a199fSJeff Roberson { 1621*aabe13f1SMark Johnston int domain; 1622e20a199fSJeff Roberson 1623*aabe13f1SMark Johnston domain = (int)(uintptr_t)arg; 1624*aabe13f1SMark Johnston zone_reclaim(zone, domain, M_NOWAIT, true); 162508cfa56eSMark Johnston } 162608cfa56eSMark Johnston 162708cfa56eSMark Johnston static void 1628*aabe13f1SMark Johnston zone_trim(uma_zone_t zone, void *arg) 162908cfa56eSMark Johnston { 1630*aabe13f1SMark Johnston int domain; 163108cfa56eSMark Johnston 1632*aabe13f1SMark Johnston domain = (int)(uintptr_t)arg; 1633*aabe13f1SMark Johnston zone_reclaim(zone, domain, M_NOWAIT, false); 1634e20a199fSJeff Roberson } 1635e20a199fSJeff Roberson 1636e20a199fSJeff Roberson /* 16378b987a77SJeff Roberson * Allocate a new slab for a keg and inserts it into the partial slab list. 16388b987a77SJeff Roberson * The keg should be unlocked on entry. If the allocation succeeds it will 16398b987a77SJeff Roberson * be locked on return. 16408355f576SJeff Roberson * 16418355f576SJeff Roberson * Arguments: 164286220393SMark Johnston * flags Wait flags for the item initialization routine 164386220393SMark Johnston * aflags Wait flags for the slab allocation 16448355f576SJeff Roberson * 16458355f576SJeff Roberson * Returns: 16468355f576SJeff Roberson * The slab that was allocated or NULL if there is no memory and the 16478355f576SJeff Roberson * caller specified M_NOWAIT. 16488355f576SJeff Roberson */ 16498355f576SJeff Roberson static uma_slab_t 165086220393SMark Johnston keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int flags, 165186220393SMark Johnston int aflags) 16528355f576SJeff Roberson { 16538b987a77SJeff Roberson uma_domain_t dom; 1654099a0e58SBosko Milekic uma_slab_t slab; 16552e47807cSJeff Roberson unsigned long size; 165685dcf349SGleb Smirnoff uint8_t *mem; 165786220393SMark Johnston uint8_t sflags; 16588355f576SJeff Roberson int i; 16598355f576SJeff Roberson 1660ab3185d1SJeff Roberson KASSERT(domain >= 0 && domain < vm_ndomains, 1661ab3185d1SJeff Roberson ("keg_alloc_slab: domain %d out of range", domain)); 1662a553d4b8SJeff Roberson 1663194a979eSMark Johnston slab = NULL; 1664194a979eSMark Johnston mem = NULL; 166554c5ae80SRyan Libby if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) { 16669b8db4d0SRyan Libby uma_hash_slab_t hslab; 16679b8db4d0SRyan Libby hslab = zone_alloc_item(slabzone(keg->uk_ipers), NULL, 16689b8db4d0SRyan Libby domain, aflags); 16699b8db4d0SRyan Libby if (hslab == NULL) 1670727c6918SJeff Roberson goto fail; 16719b8db4d0SRyan Libby slab = &hslab->uhs_slab; 1672a553d4b8SJeff Roberson } 1673a553d4b8SJeff Roberson 16743370c5bfSJeff Roberson /* 16753370c5bfSJeff Roberson * This reproduces the old vm_zone behavior of zero filling pages the 16763370c5bfSJeff Roberson * first time they are added to a zone. 16773370c5bfSJeff Roberson * 16783370c5bfSJeff Roberson * Malloced items are zeroed in uma_zalloc. 16793370c5bfSJeff Roberson */ 16803370c5bfSJeff Roberson 1681099a0e58SBosko Milekic if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 168286220393SMark Johnston aflags |= M_ZERO; 16833370c5bfSJeff Roberson else 168486220393SMark Johnston aflags &= ~M_ZERO; 16853370c5bfSJeff Roberson 1686263811f7SKip Macy if (keg->uk_flags & UMA_ZONE_NODUMP) 168786220393SMark Johnston aflags |= M_NODUMP; 1688263811f7SKip Macy 1689e20a199fSJeff Roberson /* zone is passed for legacy reasons. */ 1690194a979eSMark Johnston size = keg->uk_ppera * PAGE_SIZE; 169109c8cb71SMark Johnston mem = keg->uk_allocf(zone, size, domain, &sflags, aflags); 1692a553d4b8SJeff Roberson if (mem == NULL) { 169354c5ae80SRyan Libby if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) 16949b8db4d0SRyan Libby zone_free_item(slabzone(keg->uk_ipers), 16959b8db4d0SRyan Libby slab_tohashslab(slab), NULL, SKIP_NONE); 1696727c6918SJeff Roberson goto fail; 1697a553d4b8SJeff Roberson } 16982e47807cSJeff Roberson uma_total_inc(size); 16998355f576SJeff Roberson 17008b987a77SJeff Roberson /* For HASH zones all pages go to the same uma_domain. */ 170154c5ae80SRyan Libby if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0) 17028b987a77SJeff Roberson domain = 0; 17038b987a77SJeff Roberson 17045c0e403bSJeff Roberson /* Point the slab into the allocated memory */ 170554c5ae80SRyan Libby if (!(keg->uk_flags & UMA_ZFLAG_OFFPAGE)) 1706099a0e58SBosko Milekic slab = (uma_slab_t)(mem + keg->uk_pgoff); 17071e0701e1SJeff Roberson else 17089b8db4d0SRyan Libby slab_tohashslab(slab)->uhs_data = mem; 17095c0e403bSJeff Roberson 171054c5ae80SRyan Libby if (keg->uk_flags & UMA_ZFLAG_VTOSLAB) 1711099a0e58SBosko Milekic for (i = 0; i < keg->uk_ppera; i++) 1712584061b4SJeff Roberson vsetzoneslab((vm_offset_t)mem + (i * PAGE_SIZE), 1713584061b4SJeff Roberson zone, slab); 17148355f576SJeff Roberson 1715099a0e58SBosko Milekic slab->us_freecount = keg->uk_ipers; 171686220393SMark Johnston slab->us_flags = sflags; 1717ab3185d1SJeff Roberson slab->us_domain = domain; 17188b987a77SJeff Roberson 17199b78b1f4SJeff Roberson BIT_FILL(keg->uk_ipers, &slab->us_free); 1720ef72505eSJeff Roberson #ifdef INVARIANTS 1721815db204SRyan Libby BIT_ZERO(keg->uk_ipers, slab_dbg_bits(slab, keg)); 1722ef72505eSJeff Roberson #endif 1723099a0e58SBosko Milekic 1724b23f72e9SBrian Feldman if (keg->uk_init != NULL) { 1725099a0e58SBosko Milekic for (i = 0; i < keg->uk_ipers; i++) 17261e0701e1SJeff Roberson if (keg->uk_init(slab_item(slab, keg, i), 172786220393SMark Johnston keg->uk_size, flags) != 0) 1728b23f72e9SBrian Feldman break; 1729b23f72e9SBrian Feldman if (i != keg->uk_ipers) { 1730fc03d22bSJeff Roberson keg_free_slab(keg, slab, i); 1731727c6918SJeff Roberson goto fail; 1732b23f72e9SBrian Feldman } 1733b23f72e9SBrian Feldman } 173409c8cb71SMark Johnston kasan_mark_slab_invalid(keg, mem); 17358b987a77SJeff Roberson KEG_LOCK(keg, domain); 17365c0e403bSJeff Roberson 17371431a748SGleb Smirnoff CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)", 17381431a748SGleb Smirnoff slab, keg->uk_name, keg); 17391431a748SGleb Smirnoff 174054c5ae80SRyan Libby if (keg->uk_flags & UMA_ZFLAG_HASH) 1741099a0e58SBosko Milekic UMA_HASH_INSERT(&keg->uk_hash, slab, mem); 17428355f576SJeff Roberson 17438b987a77SJeff Roberson /* 17448b987a77SJeff Roberson * If we got a slab here it's safe to mark it partially used 17458b987a77SJeff Roberson * and return. We assume that the caller is going to remove 17468b987a77SJeff Roberson * at least one item. 17478b987a77SJeff Roberson */ 17488b987a77SJeff Roberson dom = &keg->uk_domain[domain]; 17498b987a77SJeff Roberson LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); 17508b987a77SJeff Roberson dom->ud_pages += keg->uk_ppera; 17514ab3aee8SMark Johnston dom->ud_free_items += keg->uk_ipers; 17528355f576SJeff Roberson 17538355f576SJeff Roberson return (slab); 1754727c6918SJeff Roberson 1755727c6918SJeff Roberson fail: 1756727c6918SJeff Roberson return (NULL); 17578355f576SJeff Roberson } 17588355f576SJeff Roberson 17598355f576SJeff Roberson /* 1760537f92cdSMark Johnston * This function is intended to be used early on in place of page_alloc(). It 1761537f92cdSMark Johnston * performs contiguous physical memory allocations and uses a bump allocator for 1762537f92cdSMark Johnston * KVA, so is usable before the kernel map is initialized. 1763009b6fcbSJeff Roberson */ 1764009b6fcbSJeff Roberson static void * 1765ab3185d1SJeff Roberson startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, 1766ab3185d1SJeff Roberson int wait) 1767009b6fcbSJeff Roberson { 1768a81c400eSJeff Roberson vm_paddr_t pa; 1769a81c400eSJeff Roberson vm_page_t m; 1770ac0a6fd0SGleb Smirnoff void *mem; 1771ac0a6fd0SGleb Smirnoff int pages; 1772a81c400eSJeff Roberson int i; 1773099a0e58SBosko Milekic 1774f7d35785SGleb Smirnoff pages = howmany(bytes, PAGE_SIZE); 1775f7d35785SGleb Smirnoff KASSERT(pages > 0, ("%s can't reserve 0 pages", __func__)); 1776a81c400eSJeff Roberson 1777f7d35785SGleb Smirnoff *pflag = UMA_SLAB_BOOT; 1778a81c400eSJeff Roberson m = vm_page_alloc_contig_domain(NULL, 0, domain, 1779a81c400eSJeff Roberson malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED, pages, 1780a81c400eSJeff Roberson (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT); 1781a81c400eSJeff Roberson if (m == NULL) 1782a81c400eSJeff Roberson return (NULL); 1783a81c400eSJeff Roberson 1784a81c400eSJeff Roberson pa = VM_PAGE_TO_PHYS(m); 1785a81c400eSJeff Roberson for (i = 0; i < pages; i++, pa += PAGE_SIZE) { 1786a81c400eSJeff Roberson #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \ 1787a81c400eSJeff Roberson defined(__riscv) || defined(__powerpc64__) 1788a81c400eSJeff Roberson if ((wait & M_NODUMP) == 0) 1789a81c400eSJeff Roberson dump_add_page(pa); 1790a81c400eSJeff Roberson #endif 1791a81c400eSJeff Roberson } 1792a81c400eSJeff Roberson /* Allocate KVA and indirectly advance bootmem. */ 1793a81c400eSJeff Roberson mem = (void *)pmap_map(&bootmem, m->phys_addr, 1794a81c400eSJeff Roberson m->phys_addr + (pages * PAGE_SIZE), VM_PROT_READ | VM_PROT_WRITE); 1795a81c400eSJeff Roberson if ((wait & M_ZERO) != 0) 1796a81c400eSJeff Roberson bzero(mem, pages * PAGE_SIZE); 1797f7d35785SGleb Smirnoff 1798f7d35785SGleb Smirnoff return (mem); 1799f7d35785SGleb Smirnoff } 1800f7d35785SGleb Smirnoff 1801a81c400eSJeff Roberson static void 1802a81c400eSJeff Roberson startup_free(void *mem, vm_size_t bytes) 1803a81c400eSJeff Roberson { 1804a81c400eSJeff Roberson vm_offset_t va; 1805a81c400eSJeff Roberson vm_page_t m; 1806a81c400eSJeff Roberson 1807a81c400eSJeff Roberson va = (vm_offset_t)mem; 1808a81c400eSJeff Roberson m = PHYS_TO_VM_PAGE(pmap_kextract(va)); 1809663de81fSMark Johnston 1810663de81fSMark Johnston /* 1811663de81fSMark Johnston * startup_alloc() returns direct-mapped slabs on some platforms. Avoid 1812663de81fSMark Johnston * unmapping ranges of the direct map. 1813663de81fSMark Johnston */ 1814663de81fSMark Johnston if (va >= bootstart && va + bytes <= bootmem) 1815a81c400eSJeff Roberson pmap_remove(kernel_pmap, va, va + bytes); 1816a81c400eSJeff Roberson for (; bytes != 0; bytes -= PAGE_SIZE, m++) { 1817a81c400eSJeff Roberson #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \ 1818a81c400eSJeff Roberson defined(__riscv) || defined(__powerpc64__) 1819a81c400eSJeff Roberson dump_drop_page(VM_PAGE_TO_PHYS(m)); 1820a81c400eSJeff Roberson #endif 1821a81c400eSJeff Roberson vm_page_unwire_noq(m); 1822a81c400eSJeff Roberson vm_page_free(m); 1823a81c400eSJeff Roberson } 1824a81c400eSJeff Roberson } 1825a81c400eSJeff Roberson 1826f7d35785SGleb Smirnoff /* 18278355f576SJeff Roberson * Allocates a number of pages from the system 18288355f576SJeff Roberson * 18298355f576SJeff Roberson * Arguments: 18308355f576SJeff Roberson * bytes The number of bytes requested 18318355f576SJeff Roberson * wait Shall we wait? 18328355f576SJeff Roberson * 18338355f576SJeff Roberson * Returns: 18348355f576SJeff Roberson * A pointer to the alloced memory or possibly 18358355f576SJeff Roberson * NULL if M_NOWAIT is set. 18368355f576SJeff Roberson */ 18378355f576SJeff Roberson static void * 1838ab3185d1SJeff Roberson page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, 1839ab3185d1SJeff Roberson int wait) 18408355f576SJeff Roberson { 18418355f576SJeff Roberson void *p; /* Returned page */ 18428355f576SJeff Roberson 18432e47807cSJeff Roberson *pflag = UMA_SLAB_KERNEL; 18449978bd99SMark Johnston p = (void *)kmem_malloc_domainset(DOMAINSET_FIXED(domain), bytes, wait); 18458355f576SJeff Roberson 18468355f576SJeff Roberson return (p); 18478355f576SJeff Roberson } 18488355f576SJeff Roberson 1849ab3059a8SMatt Macy static void * 1850ab3059a8SMatt Macy pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, 1851ab3059a8SMatt Macy int wait) 1852ab3059a8SMatt Macy { 1853ab3059a8SMatt Macy struct pglist alloctail; 1854ab3059a8SMatt Macy vm_offset_t addr, zkva; 1855ab3059a8SMatt Macy int cpu, flags; 1856ab3059a8SMatt Macy vm_page_t p, p_next; 1857ab3059a8SMatt Macy #ifdef NUMA 1858ab3059a8SMatt Macy struct pcpu *pc; 1859ab3059a8SMatt Macy #endif 1860ab3059a8SMatt Macy 1861ab3059a8SMatt Macy MPASS(bytes == (mp_maxid + 1) * PAGE_SIZE); 1862ab3059a8SMatt Macy 1863013072f0SMark Johnston TAILQ_INIT(&alloctail); 1864ab3059a8SMatt Macy flags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ | 1865013072f0SMark Johnston malloc2vm_flags(wait); 1866013072f0SMark Johnston *pflag = UMA_SLAB_KERNEL; 1867ab3059a8SMatt Macy for (cpu = 0; cpu <= mp_maxid; cpu++) { 1868ab3059a8SMatt Macy if (CPU_ABSENT(cpu)) { 1869ab3059a8SMatt Macy p = vm_page_alloc(NULL, 0, flags); 1870ab3059a8SMatt Macy } else { 1871ab3059a8SMatt Macy #ifndef NUMA 1872ab3059a8SMatt Macy p = vm_page_alloc(NULL, 0, flags); 1873ab3059a8SMatt Macy #else 1874ab3059a8SMatt Macy pc = pcpu_find(cpu); 187520526802SAndrew Gallatin if (__predict_false(VM_DOMAIN_EMPTY(pc->pc_domain))) 187620526802SAndrew Gallatin p = NULL; 187720526802SAndrew Gallatin else 187820526802SAndrew Gallatin p = vm_page_alloc_domain(NULL, 0, 187920526802SAndrew Gallatin pc->pc_domain, flags); 1880ab3059a8SMatt Macy if (__predict_false(p == NULL)) 1881ab3059a8SMatt Macy p = vm_page_alloc(NULL, 0, flags); 1882ab3059a8SMatt Macy #endif 1883ab3059a8SMatt Macy } 1884ab3059a8SMatt Macy if (__predict_false(p == NULL)) 1885ab3059a8SMatt Macy goto fail; 1886ab3059a8SMatt Macy TAILQ_INSERT_TAIL(&alloctail, p, listq); 1887ab3059a8SMatt Macy } 1888ab3059a8SMatt Macy if ((addr = kva_alloc(bytes)) == 0) 1889ab3059a8SMatt Macy goto fail; 1890ab3059a8SMatt Macy zkva = addr; 1891ab3059a8SMatt Macy TAILQ_FOREACH(p, &alloctail, listq) { 1892ab3059a8SMatt Macy pmap_qenter(zkva, &p, 1); 1893ab3059a8SMatt Macy zkva += PAGE_SIZE; 1894ab3059a8SMatt Macy } 1895ab3059a8SMatt Macy return ((void*)addr); 1896ab3059a8SMatt Macy fail: 1897ab3059a8SMatt Macy TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) { 189888ea538aSMark Johnston vm_page_unwire_noq(p); 1899ab3059a8SMatt Macy vm_page_free(p); 1900ab3059a8SMatt Macy } 1901ab3059a8SMatt Macy return (NULL); 1902ab3059a8SMatt Macy } 1903ab3059a8SMatt Macy 19048355f576SJeff Roberson /* 19058355f576SJeff Roberson * Allocates a number of pages from within an object 19068355f576SJeff Roberson * 19078355f576SJeff Roberson * Arguments: 19088355f576SJeff Roberson * bytes The number of bytes requested 19098355f576SJeff Roberson * wait Shall we wait? 19108355f576SJeff Roberson * 19118355f576SJeff Roberson * Returns: 19128355f576SJeff Roberson * A pointer to the alloced memory or possibly 19138355f576SJeff Roberson * NULL if M_NOWAIT is set. 19148355f576SJeff Roberson */ 19158355f576SJeff Roberson static void * 1916ab3185d1SJeff Roberson noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, 1917ab3185d1SJeff Roberson int wait) 19188355f576SJeff Roberson { 1919a4915c21SAttilio Rao TAILQ_HEAD(, vm_page) alloctail; 1920a4915c21SAttilio Rao u_long npages; 1921b245ac95SAlan Cox vm_offset_t retkva, zkva; 1922a4915c21SAttilio Rao vm_page_t p, p_next; 1923e20a199fSJeff Roberson uma_keg_t keg; 19248355f576SJeff Roberson 1925a4915c21SAttilio Rao TAILQ_INIT(&alloctail); 1926bb15d1c7SGleb Smirnoff keg = zone->uz_keg; 1927a4915c21SAttilio Rao 1928a4915c21SAttilio Rao npages = howmany(bytes, PAGE_SIZE); 1929a4915c21SAttilio Rao while (npages > 0) { 1930ab3185d1SJeff Roberson p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT | 19318d6fbbb8SJeff Roberson VM_ALLOC_WIRED | VM_ALLOC_NOOBJ | 1932772c8b67SKonstantin Belousov ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK : 1933772c8b67SKonstantin Belousov VM_ALLOC_NOWAIT)); 1934a4915c21SAttilio Rao if (p != NULL) { 1935a4915c21SAttilio Rao /* 1936a4915c21SAttilio Rao * Since the page does not belong to an object, its 1937a4915c21SAttilio Rao * listq is unused. 1938a4915c21SAttilio Rao */ 1939a4915c21SAttilio Rao TAILQ_INSERT_TAIL(&alloctail, p, listq); 1940a4915c21SAttilio Rao npages--; 1941a4915c21SAttilio Rao continue; 1942a4915c21SAttilio Rao } 19438355f576SJeff Roberson /* 1944a4915c21SAttilio Rao * Page allocation failed, free intermediate pages and 1945a4915c21SAttilio Rao * exit. 19468355f576SJeff Roberson */ 1947a4915c21SAttilio Rao TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) { 194888ea538aSMark Johnston vm_page_unwire_noq(p); 1949b245ac95SAlan Cox vm_page_free(p); 1950b245ac95SAlan Cox } 1951a4915c21SAttilio Rao return (NULL); 1952b245ac95SAlan Cox } 19538355f576SJeff Roberson *flags = UMA_SLAB_PRIV; 1954a4915c21SAttilio Rao zkva = keg->uk_kva + 1955a4915c21SAttilio Rao atomic_fetchadd_long(&keg->uk_offset, round_page(bytes)); 1956a4915c21SAttilio Rao retkva = zkva; 1957a4915c21SAttilio Rao TAILQ_FOREACH(p, &alloctail, listq) { 1958a4915c21SAttilio Rao pmap_qenter(zkva, &p, 1); 1959a4915c21SAttilio Rao zkva += PAGE_SIZE; 1960a4915c21SAttilio Rao } 19618355f576SJeff Roberson 19628355f576SJeff Roberson return ((void *)retkva); 19638355f576SJeff Roberson } 19648355f576SJeff Roberson 19658355f576SJeff Roberson /* 1966ec0d8280SRyan Libby * Allocate physically contiguous pages. 1967ec0d8280SRyan Libby */ 1968ec0d8280SRyan Libby static void * 1969ec0d8280SRyan Libby contig_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, 1970ec0d8280SRyan Libby int wait) 1971ec0d8280SRyan Libby { 1972ec0d8280SRyan Libby 1973ec0d8280SRyan Libby *pflag = UMA_SLAB_KERNEL; 1974ec0d8280SRyan Libby return ((void *)kmem_alloc_contig_domainset(DOMAINSET_FIXED(domain), 1975ec0d8280SRyan Libby bytes, wait, 0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT)); 1976ec0d8280SRyan Libby } 1977ec0d8280SRyan Libby 1978ec0d8280SRyan Libby /* 19798355f576SJeff Roberson * Frees a number of pages to the system 19808355f576SJeff Roberson * 19818355f576SJeff Roberson * Arguments: 19828355f576SJeff Roberson * mem A pointer to the memory to be freed 19838355f576SJeff Roberson * size The size of the memory being freed 19848355f576SJeff Roberson * flags The original p->us_flags field 19858355f576SJeff Roberson * 19868355f576SJeff Roberson * Returns: 19878355f576SJeff Roberson * Nothing 19888355f576SJeff Roberson */ 19898355f576SJeff Roberson static void 1990f2c2231eSRyan Stone page_free(void *mem, vm_size_t size, uint8_t flags) 19918355f576SJeff Roberson { 19923370c5bfSJeff Roberson 1993a81c400eSJeff Roberson if ((flags & UMA_SLAB_BOOT) != 0) { 1994a81c400eSJeff Roberson startup_free(mem, size); 1995a81c400eSJeff Roberson return; 1996a81c400eSJeff Roberson } 1997a81c400eSJeff Roberson 1998ec0d8280SRyan Libby KASSERT((flags & UMA_SLAB_KERNEL) != 0, 1999ec0d8280SRyan Libby ("UMA: page_free used with invalid flags %x", flags)); 20008355f576SJeff Roberson 200149bfa624SAlan Cox kmem_free((vm_offset_t)mem, size); 20028355f576SJeff Roberson } 20038355f576SJeff Roberson 20048355f576SJeff Roberson /* 2005ab3059a8SMatt Macy * Frees pcpu zone allocations 2006ab3059a8SMatt Macy * 2007ab3059a8SMatt Macy * Arguments: 2008ab3059a8SMatt Macy * mem A pointer to the memory to be freed 2009ab3059a8SMatt Macy * size The size of the memory being freed 2010ab3059a8SMatt Macy * flags The original p->us_flags field 2011ab3059a8SMatt Macy * 2012ab3059a8SMatt Macy * Returns: 2013ab3059a8SMatt Macy * Nothing 2014ab3059a8SMatt Macy */ 2015ab3059a8SMatt Macy static void 2016ab3059a8SMatt Macy pcpu_page_free(void *mem, vm_size_t size, uint8_t flags) 2017ab3059a8SMatt Macy { 2018ab3059a8SMatt Macy vm_offset_t sva, curva; 2019ab3059a8SMatt Macy vm_paddr_t paddr; 2020ab3059a8SMatt Macy vm_page_t m; 2021ab3059a8SMatt Macy 2022ab3059a8SMatt Macy MPASS(size == (mp_maxid+1)*PAGE_SIZE); 20235ba16cf3SRyan Libby 20245ba16cf3SRyan Libby if ((flags & UMA_SLAB_BOOT) != 0) { 20255ba16cf3SRyan Libby startup_free(mem, size); 20265ba16cf3SRyan Libby return; 20275ba16cf3SRyan Libby } 20285ba16cf3SRyan Libby 2029ab3059a8SMatt Macy sva = (vm_offset_t)mem; 2030ab3059a8SMatt Macy for (curva = sva; curva < sva + size; curva += PAGE_SIZE) { 2031ab3059a8SMatt Macy paddr = pmap_kextract(curva); 2032ab3059a8SMatt Macy m = PHYS_TO_VM_PAGE(paddr); 203388ea538aSMark Johnston vm_page_unwire_noq(m); 2034ab3059a8SMatt Macy vm_page_free(m); 2035ab3059a8SMatt Macy } 2036ab3059a8SMatt Macy pmap_qremove(sva, size >> PAGE_SHIFT); 2037ab3059a8SMatt Macy kva_free(sva, size); 2038ab3059a8SMatt Macy } 2039ab3059a8SMatt Macy 2040ab3059a8SMatt Macy /* 20418355f576SJeff Roberson * Zero fill initializer 20428355f576SJeff Roberson * 20438355f576SJeff Roberson * Arguments/Returns follow uma_init specifications 20448355f576SJeff Roberson */ 2045b23f72e9SBrian Feldman static int 2046b23f72e9SBrian Feldman zero_init(void *mem, int size, int flags) 20478355f576SJeff Roberson { 20488355f576SJeff Roberson bzero(mem, size); 2049b23f72e9SBrian Feldman return (0); 20508355f576SJeff Roberson } 20518355f576SJeff Roberson 2052815db204SRyan Libby #ifdef INVARIANTS 205354007ce8SMark Johnston static struct noslabbits * 2054815db204SRyan Libby slab_dbg_bits(uma_slab_t slab, uma_keg_t keg) 2055815db204SRyan Libby { 2056815db204SRyan Libby 2057815db204SRyan Libby return ((void *)((char *)&slab->us_free + BITSET_SIZE(keg->uk_ipers))); 2058815db204SRyan Libby } 2059815db204SRyan Libby #endif 2060815db204SRyan Libby 20618355f576SJeff Roberson /* 20629b78b1f4SJeff Roberson * Actual size of embedded struct slab (!OFFPAGE). 20639b78b1f4SJeff Roberson */ 206454007ce8SMark Johnston static size_t 20659b78b1f4SJeff Roberson slab_sizeof(int nitems) 20669b78b1f4SJeff Roberson { 20679b78b1f4SJeff Roberson size_t s; 20689b78b1f4SJeff Roberson 2069815db204SRyan Libby s = sizeof(struct uma_slab) + BITSET_SIZE(nitems) * SLAB_BITSETS; 20709b78b1f4SJeff Roberson return (roundup(s, UMA_ALIGN_PTR + 1)); 20719b78b1f4SJeff Roberson } 20729b78b1f4SJeff Roberson 20734a8b575cSRyan Libby #define UMA_FIXPT_SHIFT 31 20744a8b575cSRyan Libby #define UMA_FRAC_FIXPT(n, d) \ 20754a8b575cSRyan Libby ((uint32_t)(((uint64_t)(n) << UMA_FIXPT_SHIFT) / (d))) 20764a8b575cSRyan Libby #define UMA_FIXPT_PCT(f) \ 20774a8b575cSRyan Libby ((u_int)(((uint64_t)100 * (f)) >> UMA_FIXPT_SHIFT)) 20784a8b575cSRyan Libby #define UMA_PCT_FIXPT(pct) UMA_FRAC_FIXPT((pct), 100) 20794a8b575cSRyan Libby #define UMA_MIN_EFF UMA_PCT_FIXPT(100 - UMA_MAX_WASTE) 20804a8b575cSRyan Libby 20819b78b1f4SJeff Roberson /* 20824a8b575cSRyan Libby * Compute the number of items that will fit in a slab. If hdr is true, the 20834a8b575cSRyan Libby * item count may be limited to provide space in the slab for an inline slab 20844a8b575cSRyan Libby * header. Otherwise, all slab space will be provided for item storage. 20854a8b575cSRyan Libby */ 20864a8b575cSRyan Libby static u_int 20874a8b575cSRyan Libby slab_ipers_hdr(u_int size, u_int rsize, u_int slabsize, bool hdr) 20884a8b575cSRyan Libby { 20894a8b575cSRyan Libby u_int ipers; 20904a8b575cSRyan Libby u_int padpi; 20914a8b575cSRyan Libby 20924a8b575cSRyan Libby /* The padding between items is not needed after the last item. */ 20934a8b575cSRyan Libby padpi = rsize - size; 20944a8b575cSRyan Libby 20954a8b575cSRyan Libby if (hdr) { 20964a8b575cSRyan Libby /* 20974a8b575cSRyan Libby * Start with the maximum item count and remove items until 20984a8b575cSRyan Libby * the slab header first alongside the allocatable memory. 20994a8b575cSRyan Libby */ 21004a8b575cSRyan Libby for (ipers = MIN(SLAB_MAX_SETSIZE, 21014a8b575cSRyan Libby (slabsize + padpi - slab_sizeof(1)) / rsize); 21024a8b575cSRyan Libby ipers > 0 && 21034a8b575cSRyan Libby ipers * rsize - padpi + slab_sizeof(ipers) > slabsize; 21044a8b575cSRyan Libby ipers--) 21054a8b575cSRyan Libby continue; 21064a8b575cSRyan Libby } else { 21074a8b575cSRyan Libby ipers = MIN((slabsize + padpi) / rsize, SLAB_MAX_SETSIZE); 21084a8b575cSRyan Libby } 21094a8b575cSRyan Libby 21104a8b575cSRyan Libby return (ipers); 21114a8b575cSRyan Libby } 21124a8b575cSRyan Libby 211327ca37acSRyan Libby struct keg_layout_result { 211427ca37acSRyan Libby u_int format; 211527ca37acSRyan Libby u_int slabsize; 211627ca37acSRyan Libby u_int ipers; 211727ca37acSRyan Libby u_int eff; 211827ca37acSRyan Libby }; 211927ca37acSRyan Libby 212027ca37acSRyan Libby static void 212127ca37acSRyan Libby keg_layout_one(uma_keg_t keg, u_int rsize, u_int slabsize, u_int fmt, 212227ca37acSRyan Libby struct keg_layout_result *kl) 212327ca37acSRyan Libby { 212427ca37acSRyan Libby u_int total; 212527ca37acSRyan Libby 212627ca37acSRyan Libby kl->format = fmt; 212727ca37acSRyan Libby kl->slabsize = slabsize; 212827ca37acSRyan Libby 212927ca37acSRyan Libby /* Handle INTERNAL as inline with an extra page. */ 213027ca37acSRyan Libby if ((fmt & UMA_ZFLAG_INTERNAL) != 0) { 213127ca37acSRyan Libby kl->format &= ~UMA_ZFLAG_INTERNAL; 213227ca37acSRyan Libby kl->slabsize += PAGE_SIZE; 213327ca37acSRyan Libby } 213427ca37acSRyan Libby 213527ca37acSRyan Libby kl->ipers = slab_ipers_hdr(keg->uk_size, rsize, kl->slabsize, 213627ca37acSRyan Libby (fmt & UMA_ZFLAG_OFFPAGE) == 0); 213727ca37acSRyan Libby 213827ca37acSRyan Libby /* Account for memory used by an offpage slab header. */ 213927ca37acSRyan Libby total = kl->slabsize; 214027ca37acSRyan Libby if ((fmt & UMA_ZFLAG_OFFPAGE) != 0) 214127ca37acSRyan Libby total += slabzone(kl->ipers)->uz_keg->uk_rsize; 214227ca37acSRyan Libby 214327ca37acSRyan Libby kl->eff = UMA_FRAC_FIXPT(kl->ipers * rsize, total); 214427ca37acSRyan Libby } 214527ca37acSRyan Libby 21469b78b1f4SJeff Roberson /* 21474a8b575cSRyan Libby * Determine the format of a uma keg. This determines where the slab header 21484a8b575cSRyan Libby * will be placed (inline or offpage) and calculates ipers, rsize, and ppera. 21498355f576SJeff Roberson * 21508355f576SJeff Roberson * Arguments 2151e20a199fSJeff Roberson * keg The zone we should initialize 21528355f576SJeff Roberson * 21538355f576SJeff Roberson * Returns 21548355f576SJeff Roberson * Nothing 21558355f576SJeff Roberson */ 21568355f576SJeff Roberson static void 21574a8b575cSRyan Libby keg_layout(uma_keg_t keg) 21588355f576SJeff Roberson { 215927ca37acSRyan Libby struct keg_layout_result kl = {}, kl_tmp; 216027ca37acSRyan Libby u_int fmts[2]; 21614a8b575cSRyan Libby u_int alignsize; 216227ca37acSRyan Libby u_int nfmt; 21634a8b575cSRyan Libby u_int pages; 2164244f4554SBosko Milekic u_int rsize; 2165a55ebb7cSAndriy Gapon u_int slabsize; 216627ca37acSRyan Libby u_int i, j; 21678355f576SJeff Roberson 21684a8b575cSRyan Libby KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 || 21694a8b575cSRyan Libby (keg->uk_size <= UMA_PCPU_ALLOC_SIZE && 21704a8b575cSRyan Libby (keg->uk_flags & UMA_ZONE_CACHESPREAD) == 0), 21714a8b575cSRyan Libby ("%s: cannot configure for PCPU: keg=%s, size=%u, flags=0x%b", 21724a8b575cSRyan Libby __func__, keg->uk_name, keg->uk_size, keg->uk_flags, 21734a8b575cSRyan Libby PRINT_UMA_ZFLAGS)); 2174bae55c4aSRyan Libby KASSERT((keg->uk_flags & (UMA_ZFLAG_INTERNAL | UMA_ZONE_VM)) == 0 || 21754a8b575cSRyan Libby (keg->uk_flags & (UMA_ZONE_NOTOUCH | UMA_ZONE_PCPU)) == 0, 21764a8b575cSRyan Libby ("%s: incompatible flags 0x%b", __func__, keg->uk_flags, 21774a8b575cSRyan Libby PRINT_UMA_ZFLAGS)); 2178e28a647dSGleb Smirnoff 21794a8b575cSRyan Libby alignsize = keg->uk_align + 1; 2180ad97af7eSGleb Smirnoff 2181ef72505eSJeff Roberson /* 2182ef72505eSJeff Roberson * Calculate the size of each allocation (rsize) according to 2183ef72505eSJeff Roberson * alignment. If the requested size is smaller than we have 2184ef72505eSJeff Roberson * allocation bits for we round it up. 2185ef72505eSJeff Roberson */ 21869b8db4d0SRyan Libby rsize = MAX(keg->uk_size, UMA_SMALLEST_UNIT); 21874a8b575cSRyan Libby rsize = roundup2(rsize, alignsize); 2188ad97af7eSGleb Smirnoff 218927ca37acSRyan Libby if ((keg->uk_flags & UMA_ZONE_CACHESPREAD) != 0) { 21909b78b1f4SJeff Roberson /* 21914a8b575cSRyan Libby * We want one item to start on every align boundary in a page. 21924a8b575cSRyan Libby * To do this we will span pages. We will also extend the item 21934a8b575cSRyan Libby * by the size of align if it is an even multiple of align. 21944a8b575cSRyan Libby * Otherwise, it would fall on the same boundary every time. 21959b78b1f4SJeff Roberson */ 21964a8b575cSRyan Libby if ((rsize & alignsize) == 0) 21974a8b575cSRyan Libby rsize += alignsize; 21984a8b575cSRyan Libby slabsize = rsize * (PAGE_SIZE / alignsize); 21994a8b575cSRyan Libby slabsize = MIN(slabsize, rsize * SLAB_MAX_SETSIZE); 22004a8b575cSRyan Libby slabsize = MIN(slabsize, UMA_CACHESPREAD_MAX_SIZE); 220127ca37acSRyan Libby slabsize = round_page(slabsize); 22024a8b575cSRyan Libby } else { 22034a8b575cSRyan Libby /* 220427ca37acSRyan Libby * Start with a slab size of as many pages as it takes to 220527ca37acSRyan Libby * represent a single item. We will try to fit as many 220627ca37acSRyan Libby * additional items into the slab as possible. 22074a8b575cSRyan Libby */ 220827ca37acSRyan Libby slabsize = round_page(keg->uk_size); 22091ca6ed45SGleb Smirnoff } 2210ad97af7eSGleb Smirnoff 221127ca37acSRyan Libby /* Build a list of all of the available formats for this keg. */ 221227ca37acSRyan Libby nfmt = 0; 221327ca37acSRyan Libby 22144a8b575cSRyan Libby /* Evaluate an inline slab layout. */ 22154a8b575cSRyan Libby if ((keg->uk_flags & (UMA_ZONE_NOTOUCH | UMA_ZONE_PCPU)) == 0) 221627ca37acSRyan Libby fmts[nfmt++] = 0; 22174a8b575cSRyan Libby 22184a8b575cSRyan Libby /* TODO: vm_page-embedded slab. */ 2219244f4554SBosko Milekic 222020e8e865SBosko Milekic /* 2221244f4554SBosko Milekic * We can't do OFFPAGE if we're internal or if we've been 222220e8e865SBosko Milekic * asked to not go to the VM for buckets. If we do this we 2223bae55c4aSRyan Libby * may end up going to the VM for slabs which we do not want 2224bae55c4aSRyan Libby * to do if we're UMA_ZONE_VM, which clearly forbids it. 2225bae55c4aSRyan Libby * In those cases, evaluate a pseudo-format called INTERNAL 2226bae55c4aSRyan Libby * which has an inline slab header and one extra page to 2227bae55c4aSRyan Libby * guarantee that it fits. 222827ca37acSRyan Libby * 222927ca37acSRyan Libby * Otherwise, see if using an OFFPAGE slab will improve our 223027ca37acSRyan Libby * efficiency. 223120e8e865SBosko Milekic */ 2232bae55c4aSRyan Libby if ((keg->uk_flags & (UMA_ZFLAG_INTERNAL | UMA_ZONE_VM)) != 0) 223327ca37acSRyan Libby fmts[nfmt++] = UMA_ZFLAG_INTERNAL; 223427ca37acSRyan Libby else 223527ca37acSRyan Libby fmts[nfmt++] = UMA_ZFLAG_OFFPAGE; 2236244f4554SBosko Milekic 2237ef72505eSJeff Roberson /* 223827ca37acSRyan Libby * Choose a slab size and format which satisfy the minimum efficiency. 223927ca37acSRyan Libby * Prefer the smallest slab size that meets the constraints. 2240ef72505eSJeff Roberson * 224127ca37acSRyan Libby * Start with a minimum slab size, to accommodate CACHESPREAD. Then, 224227ca37acSRyan Libby * for small items (up to PAGE_SIZE), the iteration increment is one 224327ca37acSRyan Libby * page; and for large items, the increment is one item. 2244ef72505eSJeff Roberson */ 224527ca37acSRyan Libby i = (slabsize + rsize - keg->uk_size) / MAX(PAGE_SIZE, rsize); 224627ca37acSRyan Libby KASSERT(i >= 1, ("keg %s(%p) flags=0x%b slabsize=%u, rsize=%u, i=%u", 224727ca37acSRyan Libby keg->uk_name, keg, keg->uk_flags, PRINT_UMA_ZFLAGS, slabsize, 224827ca37acSRyan Libby rsize, i)); 224927ca37acSRyan Libby for ( ; ; i++) { 225027ca37acSRyan Libby slabsize = (rsize <= PAGE_SIZE) ? ptoa(i) : 225127ca37acSRyan Libby round_page(rsize * (i - 1) + keg->uk_size); 225227ca37acSRyan Libby 225327ca37acSRyan Libby for (j = 0; j < nfmt; j++) { 225427ca37acSRyan Libby /* Only if we have no viable format yet. */ 225527ca37acSRyan Libby if ((fmts[j] & UMA_ZFLAG_INTERNAL) != 0 && 225627ca37acSRyan Libby kl.ipers > 0) 225727ca37acSRyan Libby continue; 225827ca37acSRyan Libby 225927ca37acSRyan Libby keg_layout_one(keg, rsize, slabsize, fmts[j], &kl_tmp); 226027ca37acSRyan Libby if (kl_tmp.eff <= kl.eff) 226127ca37acSRyan Libby continue; 226227ca37acSRyan Libby 226327ca37acSRyan Libby kl = kl_tmp; 226427ca37acSRyan Libby 226527ca37acSRyan Libby CTR6(KTR_UMA, "keg %s layout: format %#x " 226627ca37acSRyan Libby "(ipers %u * rsize %u) / slabsize %#x = %u%% eff", 226727ca37acSRyan Libby keg->uk_name, kl.format, kl.ipers, rsize, 226827ca37acSRyan Libby kl.slabsize, UMA_FIXPT_PCT(kl.eff)); 226927ca37acSRyan Libby 227027ca37acSRyan Libby /* Stop when we reach the minimum efficiency. */ 227127ca37acSRyan Libby if (kl.eff >= UMA_MIN_EFF) 227227ca37acSRyan Libby break; 22738355f576SJeff Roberson } 2274ad97af7eSGleb Smirnoff 227533e5a1eaSRyan Libby if (kl.eff >= UMA_MIN_EFF || !multipage_slabs || 227627ca37acSRyan Libby slabsize >= SLAB_MAX_SETSIZE * rsize || 227727ca37acSRyan Libby (keg->uk_flags & (UMA_ZONE_PCPU | UMA_ZONE_CONTIG)) != 0) 227827ca37acSRyan Libby break; 227927ca37acSRyan Libby } 228027ca37acSRyan Libby 228127ca37acSRyan Libby pages = atop(kl.slabsize); 228227ca37acSRyan Libby if ((keg->uk_flags & UMA_ZONE_PCPU) != 0) 228327ca37acSRyan Libby pages *= mp_maxid + 1; 228427ca37acSRyan Libby 228527ca37acSRyan Libby keg->uk_rsize = rsize; 228627ca37acSRyan Libby keg->uk_ipers = kl.ipers; 228727ca37acSRyan Libby keg->uk_ppera = pages; 228827ca37acSRyan Libby keg->uk_flags |= kl.format; 228927ca37acSRyan Libby 22904a8b575cSRyan Libby /* 22914a8b575cSRyan Libby * How do we find the slab header if it is offpage or if not all item 22924a8b575cSRyan Libby * start addresses are in the same page? We could solve the latter 22934a8b575cSRyan Libby * case with vaddr alignment, but we don't. 22944a8b575cSRyan Libby */ 229527ca37acSRyan Libby if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0 || 229627ca37acSRyan Libby (keg->uk_ipers - 1) * rsize >= PAGE_SIZE) { 229754c5ae80SRyan Libby if ((keg->uk_flags & UMA_ZONE_NOTPAGE) != 0) 229827ca37acSRyan Libby keg->uk_flags |= UMA_ZFLAG_HASH; 229954c5ae80SRyan Libby else 230027ca37acSRyan Libby keg->uk_flags |= UMA_ZFLAG_VTOSLAB; 230154c5ae80SRyan Libby } 230227ca37acSRyan Libby 2303e63a1c2fSRyan Libby CTR6(KTR_UMA, "%s: keg=%s, flags=%#x, rsize=%u, ipers=%u, ppera=%u", 230427ca37acSRyan Libby __func__, keg->uk_name, keg->uk_flags, rsize, keg->uk_ipers, 230527ca37acSRyan Libby pages); 23064a8b575cSRyan Libby KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_MAX_SETSIZE, 23074a8b575cSRyan Libby ("%s: keg=%s, flags=0x%b, rsize=%u, ipers=%u, ppera=%u", __func__, 230827ca37acSRyan Libby keg->uk_name, keg->uk_flags, PRINT_UMA_ZFLAGS, rsize, 230927ca37acSRyan Libby keg->uk_ipers, pages)); 2310e20a199fSJeff Roberson } 2311e20a199fSJeff Roberson 23128355f576SJeff Roberson /* 2313099a0e58SBosko Milekic * Keg header ctor. This initializes all fields, locks, etc. And inserts 2314099a0e58SBosko Milekic * the keg onto the global keg list. 23158355f576SJeff Roberson * 23168355f576SJeff Roberson * Arguments/Returns follow uma_ctor specifications 2317099a0e58SBosko Milekic * udata Actually uma_kctor_args 2318099a0e58SBosko Milekic */ 2319b23f72e9SBrian Feldman static int 2320b23f72e9SBrian Feldman keg_ctor(void *mem, int size, void *udata, int flags) 2321099a0e58SBosko Milekic { 2322099a0e58SBosko Milekic struct uma_kctor_args *arg = udata; 2323099a0e58SBosko Milekic uma_keg_t keg = mem; 2324099a0e58SBosko Milekic uma_zone_t zone; 23258b987a77SJeff Roberson int i; 2326099a0e58SBosko Milekic 2327099a0e58SBosko Milekic bzero(keg, size); 2328099a0e58SBosko Milekic keg->uk_size = arg->size; 2329099a0e58SBosko Milekic keg->uk_init = arg->uminit; 2330099a0e58SBosko Milekic keg->uk_fini = arg->fini; 2331099a0e58SBosko Milekic keg->uk_align = arg->align; 23326fd34d6fSJeff Roberson keg->uk_reserve = 0; 2333099a0e58SBosko Milekic keg->uk_flags = arg->flags; 2334099a0e58SBosko Milekic 2335099a0e58SBosko Milekic /* 2336194a979eSMark Johnston * We use a global round-robin policy by default. Zones with 2337dfe13344SJeff Roberson * UMA_ZONE_FIRSTTOUCH set will use first-touch instead, in which 2338dfe13344SJeff Roberson * case the iterator is never run. 2339194a979eSMark Johnston */ 2340194a979eSMark Johnston keg->uk_dr.dr_policy = DOMAINSET_RR(); 2341194a979eSMark Johnston keg->uk_dr.dr_iter = 0; 2342194a979eSMark Johnston 2343194a979eSMark Johnston /* 2344c8b0a88bSJeff Roberson * The primary zone is passed to us at keg-creation time. 2345099a0e58SBosko Milekic */ 2346099a0e58SBosko Milekic zone = arg->zone; 2347e20a199fSJeff Roberson keg->uk_name = zone->uz_name; 2348099a0e58SBosko Milekic 2349099a0e58SBosko Milekic if (arg->flags & UMA_ZONE_ZINIT) 2350099a0e58SBosko Milekic keg->uk_init = zero_init; 2351099a0e58SBosko Milekic 2352cfcae3f8SGleb Smirnoff if (arg->flags & UMA_ZONE_MALLOC) 235354c5ae80SRyan Libby keg->uk_flags |= UMA_ZFLAG_VTOSLAB; 2354e20a199fSJeff Roberson 235554c5ae80SRyan Libby #ifndef SMP 2356ad97af7eSGleb Smirnoff keg->uk_flags &= ~UMA_ZONE_PCPU; 2357ad97af7eSGleb Smirnoff #endif 2358ad97af7eSGleb Smirnoff 23594a8b575cSRyan Libby keg_layout(keg); 2360099a0e58SBosko Milekic 23618b987a77SJeff Roberson /* 2362c6fd3e23SJeff Roberson * Use a first-touch NUMA policy for kegs that pmap_extract() will 2363c6fd3e23SJeff Roberson * work on. Use round-robin for everything else. 2364dfe13344SJeff Roberson * 2365dfe13344SJeff Roberson * Zones may override the default by specifying either. 23668b987a77SJeff Roberson */ 2367dfe13344SJeff Roberson #ifdef NUMA 2368dfe13344SJeff Roberson if ((keg->uk_flags & 2369c6fd3e23SJeff Roberson (UMA_ZONE_ROUNDROBIN | UMA_ZFLAG_CACHE | UMA_ZONE_NOTPAGE)) == 0) 2370dfe13344SJeff Roberson keg->uk_flags |= UMA_ZONE_FIRSTTOUCH; 2371dfe13344SJeff Roberson else if ((keg->uk_flags & UMA_ZONE_FIRSTTOUCH) == 0) 2372dfe13344SJeff Roberson keg->uk_flags |= UMA_ZONE_ROUNDROBIN; 23738b987a77SJeff Roberson #endif 23748b987a77SJeff Roberson 2375099a0e58SBosko Milekic /* 2376099a0e58SBosko Milekic * If we haven't booted yet we need allocations to go through the 2377099a0e58SBosko Milekic * startup cache until the vm is ready. 2378099a0e58SBosko Milekic */ 237977e19437SGleb Smirnoff #ifdef UMA_MD_SMALL_ALLOC 2380a81c400eSJeff Roberson if (keg->uk_ppera == 1) 238177e19437SGleb Smirnoff keg->uk_allocf = uma_small_alloc; 2382a81c400eSJeff Roberson else 23838cd02d00SAlan Cox #endif 2384a81c400eSJeff Roberson if (booted < BOOT_KVA) 2385a81c400eSJeff Roberson keg->uk_allocf = startup_alloc; 2386ab3059a8SMatt Macy else if (keg->uk_flags & UMA_ZONE_PCPU) 2387ab3059a8SMatt Macy keg->uk_allocf = pcpu_page_alloc; 2388ec0d8280SRyan Libby else if ((keg->uk_flags & UMA_ZONE_CONTIG) != 0 && keg->uk_ppera > 1) 2389ec0d8280SRyan Libby keg->uk_allocf = contig_alloc; 239077e19437SGleb Smirnoff else 239177e19437SGleb Smirnoff keg->uk_allocf = page_alloc; 239277e19437SGleb Smirnoff #ifdef UMA_MD_SMALL_ALLOC 239377e19437SGleb Smirnoff if (keg->uk_ppera == 1) 239477e19437SGleb Smirnoff keg->uk_freef = uma_small_free; 239577e19437SGleb Smirnoff else 239677e19437SGleb Smirnoff #endif 2397ab3059a8SMatt Macy if (keg->uk_flags & UMA_ZONE_PCPU) 2398ab3059a8SMatt Macy keg->uk_freef = pcpu_page_free; 2399ab3059a8SMatt Macy else 240077e19437SGleb Smirnoff keg->uk_freef = page_free; 2401099a0e58SBosko Milekic 2402099a0e58SBosko Milekic /* 24038b987a77SJeff Roberson * Initialize keg's locks. 2404099a0e58SBosko Milekic */ 24058b987a77SJeff Roberson for (i = 0; i < vm_ndomains; i++) 24068b987a77SJeff Roberson KEG_LOCK_INIT(keg, i, (arg->flags & UMA_ZONE_MTXCLASS)); 2407099a0e58SBosko Milekic 2408099a0e58SBosko Milekic /* 2409099a0e58SBosko Milekic * If we're putting the slab header in the actual page we need to 24109b78b1f4SJeff Roberson * figure out where in each page it goes. See slab_sizeof 24119b78b1f4SJeff Roberson * definition. 2412099a0e58SBosko Milekic */ 241354c5ae80SRyan Libby if (!(keg->uk_flags & UMA_ZFLAG_OFFPAGE)) { 24149b78b1f4SJeff Roberson size_t shsize; 24159b78b1f4SJeff Roberson 24169b78b1f4SJeff Roberson shsize = slab_sizeof(keg->uk_ipers); 24179b78b1f4SJeff Roberson keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - shsize; 2418244f4554SBosko Milekic /* 2419244f4554SBosko Milekic * The only way the following is possible is if with our 2420244f4554SBosko Milekic * UMA_ALIGN_PTR adjustments we are now bigger than 2421244f4554SBosko Milekic * UMA_SLAB_SIZE. I haven't checked whether this is 2422244f4554SBosko Milekic * mathematically possible for all cases, so we make 2423244f4554SBosko Milekic * sure here anyway. 2424244f4554SBosko Milekic */ 24259b78b1f4SJeff Roberson KASSERT(keg->uk_pgoff + shsize <= PAGE_SIZE * keg->uk_ppera, 24263d5e3df7SGleb Smirnoff ("zone %s ipers %d rsize %d size %d slab won't fit", 24273d5e3df7SGleb Smirnoff zone->uz_name, keg->uk_ipers, keg->uk_rsize, keg->uk_size)); 2428099a0e58SBosko Milekic } 2429099a0e58SBosko Milekic 243054c5ae80SRyan Libby if (keg->uk_flags & UMA_ZFLAG_HASH) 24313b2f2cb8SAlexander Motin hash_alloc(&keg->uk_hash, 0); 2432099a0e58SBosko Milekic 2433e63a1c2fSRyan Libby CTR3(KTR_UMA, "keg_ctor %p zone %s(%p)", keg, zone->uz_name, zone); 2434099a0e58SBosko Milekic 2435099a0e58SBosko Milekic LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link); 2436099a0e58SBosko Milekic 2437111fbcd5SBryan Venteicher rw_wlock(&uma_rwlock); 2438099a0e58SBosko Milekic LIST_INSERT_HEAD(&uma_kegs, keg, uk_link); 2439111fbcd5SBryan Venteicher rw_wunlock(&uma_rwlock); 2440b23f72e9SBrian Feldman return (0); 2441099a0e58SBosko Milekic } 2442099a0e58SBosko Milekic 24432efcc8cbSGleb Smirnoff static void 2444a81c400eSJeff Roberson zone_kva_available(uma_zone_t zone, void *unused) 2445a81c400eSJeff Roberson { 2446a81c400eSJeff Roberson uma_keg_t keg; 2447a81c400eSJeff Roberson 2448a81c400eSJeff Roberson if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0) 2449a81c400eSJeff Roberson return; 2450a81c400eSJeff Roberson KEG_GET(zone, keg); 2451ec0d8280SRyan Libby 2452ec0d8280SRyan Libby if (keg->uk_allocf == startup_alloc) { 2453ec0d8280SRyan Libby /* Switch to the real allocator. */ 2454f96d4157SJeff Roberson if (keg->uk_flags & UMA_ZONE_PCPU) 2455f96d4157SJeff Roberson keg->uk_allocf = pcpu_page_alloc; 2456ec0d8280SRyan Libby else if ((keg->uk_flags & UMA_ZONE_CONTIG) != 0 && 2457ec0d8280SRyan Libby keg->uk_ppera > 1) 2458ec0d8280SRyan Libby keg->uk_allocf = contig_alloc; 2459ec0d8280SRyan Libby else 2460a81c400eSJeff Roberson keg->uk_allocf = page_alloc; 2461a81c400eSJeff Roberson } 2462ec0d8280SRyan Libby } 2463a81c400eSJeff Roberson 2464a81c400eSJeff Roberson static void 246520a4e154SJeff Roberson zone_alloc_counters(uma_zone_t zone, void *unused) 24662efcc8cbSGleb Smirnoff { 24672efcc8cbSGleb Smirnoff 24682efcc8cbSGleb Smirnoff zone->uz_allocs = counter_u64_alloc(M_WAITOK); 24692efcc8cbSGleb Smirnoff zone->uz_frees = counter_u64_alloc(M_WAITOK); 24702efcc8cbSGleb Smirnoff zone->uz_fails = counter_u64_alloc(M_WAITOK); 2471c6fd3e23SJeff Roberson zone->uz_xdomain = counter_u64_alloc(M_WAITOK); 24722efcc8cbSGleb Smirnoff } 24732efcc8cbSGleb Smirnoff 247420a4e154SJeff Roberson static void 247520a4e154SJeff Roberson zone_alloc_sysctl(uma_zone_t zone, void *unused) 247620a4e154SJeff Roberson { 247720a4e154SJeff Roberson uma_zone_domain_t zdom; 24788b987a77SJeff Roberson uma_domain_t dom; 247920a4e154SJeff Roberson uma_keg_t keg; 248020a4e154SJeff Roberson struct sysctl_oid *oid, *domainoid; 24813b490537SJeff Roberson int domains, i, cnt; 248220a4e154SJeff Roberson static const char *nokeg = "cache zone"; 248320a4e154SJeff Roberson char *c; 248420a4e154SJeff Roberson 248520a4e154SJeff Roberson /* 248620a4e154SJeff Roberson * Make a sysctl safe copy of the zone name by removing 248720a4e154SJeff Roberson * any special characters and handling dups by appending 248820a4e154SJeff Roberson * an index. 248920a4e154SJeff Roberson */ 249020a4e154SJeff Roberson if (zone->uz_namecnt != 0) { 24913b490537SJeff Roberson /* Count the number of decimal digits and '_' separator. */ 24923b490537SJeff Roberson for (i = 1, cnt = zone->uz_namecnt; cnt != 0; i++) 24933b490537SJeff Roberson cnt /= 10; 24943b490537SJeff Roberson zone->uz_ctlname = malloc(strlen(zone->uz_name) + i + 1, 24953b490537SJeff Roberson M_UMA, M_WAITOK); 249620a4e154SJeff Roberson sprintf(zone->uz_ctlname, "%s_%d", zone->uz_name, 249720a4e154SJeff Roberson zone->uz_namecnt); 249820a4e154SJeff Roberson } else 249920a4e154SJeff Roberson zone->uz_ctlname = strdup(zone->uz_name, M_UMA); 250020a4e154SJeff Roberson for (c = zone->uz_ctlname; *c != '\0'; c++) 250120a4e154SJeff Roberson if (strchr("./\\ -", *c) != NULL) 250220a4e154SJeff Roberson *c = '_'; 250320a4e154SJeff Roberson 250420a4e154SJeff Roberson /* 250520a4e154SJeff Roberson * Basic parameters at the root. 250620a4e154SJeff Roberson */ 250720a4e154SJeff Roberson zone->uz_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_vm_uma), 25087029da5cSPawel Biernacki OID_AUTO, zone->uz_ctlname, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 250920a4e154SJeff Roberson oid = zone->uz_oid; 251020a4e154SJeff Roberson SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 251120a4e154SJeff Roberson "size", CTLFLAG_RD, &zone->uz_size, 0, "Allocation size"); 25126d204a6aSRyan Libby SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 25136d204a6aSRyan Libby "flags", CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_MPSAFE, 25146d204a6aSRyan Libby zone, 0, sysctl_handle_uma_zone_flags, "A", 251520a4e154SJeff Roberson "Allocator configuration flags"); 251620a4e154SJeff Roberson SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 251720a4e154SJeff Roberson "bucket_size", CTLFLAG_RD, &zone->uz_bucket_size, 0, 251820a4e154SJeff Roberson "Desired per-cpu cache size"); 251920a4e154SJeff Roberson SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 252020a4e154SJeff Roberson "bucket_size_max", CTLFLAG_RD, &zone->uz_bucket_size_max, 0, 252120a4e154SJeff Roberson "Maximum allowed per-cpu cache size"); 252220a4e154SJeff Roberson 252320a4e154SJeff Roberson /* 252420a4e154SJeff Roberson * keg if present. 252520a4e154SJeff Roberson */ 252654c5ae80SRyan Libby if ((zone->uz_flags & UMA_ZFLAG_HASH) == 0) 25278b987a77SJeff Roberson domains = vm_ndomains; 25288b987a77SJeff Roberson else 25298b987a77SJeff Roberson domains = 1; 253020a4e154SJeff Roberson oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO, 25317029da5cSPawel Biernacki "keg", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 253220a4e154SJeff Roberson keg = zone->uz_keg; 25333b490537SJeff Roberson if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0) { 253420a4e154SJeff Roberson SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 253520a4e154SJeff Roberson "name", CTLFLAG_RD, keg->uk_name, "Keg name"); 253620a4e154SJeff Roberson SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 253720a4e154SJeff Roberson "rsize", CTLFLAG_RD, &keg->uk_rsize, 0, 253820a4e154SJeff Roberson "Real object size with alignment"); 253920a4e154SJeff Roberson SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 254020a4e154SJeff Roberson "ppera", CTLFLAG_RD, &keg->uk_ppera, 0, 254120a4e154SJeff Roberson "pages per-slab allocation"); 254220a4e154SJeff Roberson SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 254320a4e154SJeff Roberson "ipers", CTLFLAG_RD, &keg->uk_ipers, 0, 254420a4e154SJeff Roberson "items available per-slab"); 254520a4e154SJeff Roberson SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 254620a4e154SJeff Roberson "align", CTLFLAG_RD, &keg->uk_align, 0, 254720a4e154SJeff Roberson "item alignment mask"); 2548f09cbea3SMark Johnston SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 2549f09cbea3SMark Johnston "reserve", CTLFLAG_RD, &keg->uk_reserve, 0, 2550f09cbea3SMark Johnston "number of reserved items"); 2551f7af5015SRyan Libby SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 2552f7af5015SRyan Libby "efficiency", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE, 2553f7af5015SRyan Libby keg, 0, sysctl_handle_uma_slab_efficiency, "I", 2554f7af5015SRyan Libby "Slab utilization (100 - internal fragmentation %)"); 25558b987a77SJeff Roberson domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(oid), 25567029da5cSPawel Biernacki OID_AUTO, "domain", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 25578b987a77SJeff Roberson for (i = 0; i < domains; i++) { 25588b987a77SJeff Roberson dom = &keg->uk_domain[i]; 25598b987a77SJeff Roberson oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid), 25607029da5cSPawel Biernacki OID_AUTO, VM_DOMAIN(i)->vmd_name, 25617029da5cSPawel Biernacki CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 25628b987a77SJeff Roberson SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 25638b987a77SJeff Roberson "pages", CTLFLAG_RD, &dom->ud_pages, 0, 25648b987a77SJeff Roberson "Total pages currently allocated from VM"); 25658b987a77SJeff Roberson SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 25664ab3aee8SMark Johnston "free_items", CTLFLAG_RD, &dom->ud_free_items, 0, 25678b987a77SJeff Roberson "items free in the slab layer"); 25688b987a77SJeff Roberson } 256920a4e154SJeff Roberson } else 257020a4e154SJeff Roberson SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 257120a4e154SJeff Roberson "name", CTLFLAG_RD, nokeg, "Keg name"); 257220a4e154SJeff Roberson 257320a4e154SJeff Roberson /* 257420a4e154SJeff Roberson * Information about zone limits. 257520a4e154SJeff Roberson */ 257620a4e154SJeff Roberson oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO, 25777029da5cSPawel Biernacki "limit", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 25784bd61e19SJeff Roberson SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 25794bd61e19SJeff Roberson "items", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE, 25804bd61e19SJeff Roberson zone, 0, sysctl_handle_uma_zone_items, "QU", 2581e574d407SMark Johnston "Current number of allocated items if limit is set"); 258220a4e154SJeff Roberson SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 258320a4e154SJeff Roberson "max_items", CTLFLAG_RD, &zone->uz_max_items, 0, 2584e574d407SMark Johnston "Maximum number of allocated and cached items"); 258520a4e154SJeff Roberson SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 258620a4e154SJeff Roberson "sleepers", CTLFLAG_RD, &zone->uz_sleepers, 0, 258720a4e154SJeff Roberson "Number of threads sleeping at limit"); 258820a4e154SJeff Roberson SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 258920a4e154SJeff Roberson "sleeps", CTLFLAG_RD, &zone->uz_sleeps, 0, 259020a4e154SJeff Roberson "Total zone limit sleeps"); 25914bd61e19SJeff Roberson SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 2592c6fd3e23SJeff Roberson "bucket_max", CTLFLAG_RD, &zone->uz_bucket_max, 0, 2593c6fd3e23SJeff Roberson "Maximum number of items in each domain's bucket cache"); 259420a4e154SJeff Roberson 259520a4e154SJeff Roberson /* 25968b987a77SJeff Roberson * Per-domain zone information. 259720a4e154SJeff Roberson */ 259820a4e154SJeff Roberson domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), 25997029da5cSPawel Biernacki OID_AUTO, "domain", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 260020a4e154SJeff Roberson for (i = 0; i < domains; i++) { 2601c6fd3e23SJeff Roberson zdom = ZDOM_GET(zone, i); 260220a4e154SJeff Roberson oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid), 26037029da5cSPawel Biernacki OID_AUTO, VM_DOMAIN(i)->vmd_name, 26047029da5cSPawel Biernacki CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 260520a4e154SJeff Roberson SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 260620a4e154SJeff Roberson "nitems", CTLFLAG_RD, &zdom->uzd_nitems, 260720a4e154SJeff Roberson "number of items in this domain"); 260820a4e154SJeff Roberson SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 260920a4e154SJeff Roberson "imax", CTLFLAG_RD, &zdom->uzd_imax, 261020a4e154SJeff Roberson "maximum item count in this period"); 261120a4e154SJeff Roberson SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 261220a4e154SJeff Roberson "imin", CTLFLAG_RD, &zdom->uzd_imin, 261320a4e154SJeff Roberson "minimum item count in this period"); 261420a4e154SJeff Roberson SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 261520a4e154SJeff Roberson "wss", CTLFLAG_RD, &zdom->uzd_wss, 261620a4e154SJeff Roberson "Working set size"); 261720a4e154SJeff Roberson } 261820a4e154SJeff Roberson 261920a4e154SJeff Roberson /* 262020a4e154SJeff Roberson * General statistics. 262120a4e154SJeff Roberson */ 262220a4e154SJeff Roberson oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO, 26237029da5cSPawel Biernacki "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 262420a4e154SJeff Roberson SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 262520a4e154SJeff Roberson "current", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE, 262620a4e154SJeff Roberson zone, 1, sysctl_handle_uma_zone_cur, "I", 262720a4e154SJeff Roberson "Current number of allocated items"); 262820a4e154SJeff Roberson SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 262920a4e154SJeff Roberson "allocs", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE, 263020a4e154SJeff Roberson zone, 0, sysctl_handle_uma_zone_allocs, "QU", 263120a4e154SJeff Roberson "Total allocation calls"); 263220a4e154SJeff Roberson SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 263320a4e154SJeff Roberson "frees", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE, 263420a4e154SJeff Roberson zone, 0, sysctl_handle_uma_zone_frees, "QU", 263520a4e154SJeff Roberson "Total free calls"); 263620a4e154SJeff Roberson SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 263720a4e154SJeff Roberson "fails", CTLFLAG_RD, &zone->uz_fails, 263820a4e154SJeff Roberson "Number of allocation failures"); 2639c6fd3e23SJeff Roberson SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, 2640c6fd3e23SJeff Roberson "xdomain", CTLFLAG_RD, &zone->uz_xdomain, 264120a4e154SJeff Roberson "Free calls from the wrong domain"); 264220a4e154SJeff Roberson } 264320a4e154SJeff Roberson 264420a4e154SJeff Roberson struct uma_zone_count { 264520a4e154SJeff Roberson const char *name; 264620a4e154SJeff Roberson int count; 264720a4e154SJeff Roberson }; 264820a4e154SJeff Roberson 264920a4e154SJeff Roberson static void 265020a4e154SJeff Roberson zone_count(uma_zone_t zone, void *arg) 265120a4e154SJeff Roberson { 265220a4e154SJeff Roberson struct uma_zone_count *cnt; 265320a4e154SJeff Roberson 265420a4e154SJeff Roberson cnt = arg; 26553b490537SJeff Roberson /* 26563b490537SJeff Roberson * Some zones are rapidly created with identical names and 26573b490537SJeff Roberson * destroyed out of order. This can lead to gaps in the count. 26583b490537SJeff Roberson * Use one greater than the maximum observed for this name. 26593b490537SJeff Roberson */ 266020a4e154SJeff Roberson if (strcmp(zone->uz_name, cnt->name) == 0) 26613b490537SJeff Roberson cnt->count = MAX(cnt->count, 26623b490537SJeff Roberson zone->uz_namecnt + 1); 266320a4e154SJeff Roberson } 266420a4e154SJeff Roberson 2665cc7ce83aSJeff Roberson static void 2666cc7ce83aSJeff Roberson zone_update_caches(uma_zone_t zone) 2667cc7ce83aSJeff Roberson { 2668cc7ce83aSJeff Roberson int i; 2669cc7ce83aSJeff Roberson 2670cc7ce83aSJeff Roberson for (i = 0; i <= mp_maxid; i++) { 2671cc7ce83aSJeff Roberson cache_set_uz_size(&zone->uz_cpu[i], zone->uz_size); 2672cc7ce83aSJeff Roberson cache_set_uz_flags(&zone->uz_cpu[i], zone->uz_flags); 2673cc7ce83aSJeff Roberson } 2674cc7ce83aSJeff Roberson } 2675cc7ce83aSJeff Roberson 2676099a0e58SBosko Milekic /* 2677099a0e58SBosko Milekic * Zone header ctor. This initializes all fields, locks, etc. 2678099a0e58SBosko Milekic * 2679099a0e58SBosko Milekic * Arguments/Returns follow uma_ctor specifications 2680099a0e58SBosko Milekic * udata Actually uma_zctor_args 26818355f576SJeff Roberson */ 2682b23f72e9SBrian Feldman static int 2683b23f72e9SBrian Feldman zone_ctor(void *mem, int size, void *udata, int flags) 26848355f576SJeff Roberson { 268520a4e154SJeff Roberson struct uma_zone_count cnt; 26868355f576SJeff Roberson struct uma_zctor_args *arg = udata; 2687c6fd3e23SJeff Roberson uma_zone_domain_t zdom; 26888355f576SJeff Roberson uma_zone_t zone = mem; 2689099a0e58SBosko Milekic uma_zone_t z; 2690099a0e58SBosko Milekic uma_keg_t keg; 269108cfa56eSMark Johnston int i; 26928355f576SJeff Roberson 26938355f576SJeff Roberson bzero(zone, size); 26948355f576SJeff Roberson zone->uz_name = arg->name; 26958355f576SJeff Roberson zone->uz_ctor = arg->ctor; 26968355f576SJeff Roberson zone->uz_dtor = arg->dtor; 2697099a0e58SBosko Milekic zone->uz_init = NULL; 2698099a0e58SBosko Milekic zone->uz_fini = NULL; 2699bf965959SSean Bruno zone->uz_sleeps = 0; 270020a4e154SJeff Roberson zone->uz_bucket_size = 0; 270120a4e154SJeff Roberson zone->uz_bucket_size_min = 0; 270220a4e154SJeff Roberson zone->uz_bucket_size_max = BUCKET_MAX; 2703d4665eaaSJeff Roberson zone->uz_flags = (arg->flags & UMA_ZONE_SMR); 27042f891cd5SPawel Jakub Dawidek zone->uz_warning = NULL; 2705ab3185d1SJeff Roberson /* The domain structures follow the cpu structures. */ 2706c6fd3e23SJeff Roberson zone->uz_bucket_max = ULONG_MAX; 27072f891cd5SPawel Jakub Dawidek timevalclear(&zone->uz_ratecheck); 2708af526374SJeff Roberson 270920a4e154SJeff Roberson /* Count the number of duplicate names. */ 271020a4e154SJeff Roberson cnt.name = arg->name; 271120a4e154SJeff Roberson cnt.count = 0; 271220a4e154SJeff Roberson zone_foreach(zone_count, &cnt); 271320a4e154SJeff Roberson zone->uz_namecnt = cnt.count; 271491d947bfSJeff Roberson ZONE_CROSS_LOCK_INIT(zone); 27152efcc8cbSGleb Smirnoff 2716c6fd3e23SJeff Roberson for (i = 0; i < vm_ndomains; i++) { 2717c6fd3e23SJeff Roberson zdom = ZDOM_GET(zone, i); 2718c6fd3e23SJeff Roberson ZDOM_LOCK_INIT(zone, zdom, (arg->flags & UMA_ZONE_MTXCLASS)); 2719c6fd3e23SJeff Roberson STAILQ_INIT(&zdom->uzd_buckets); 2720c6fd3e23SJeff Roberson } 272108cfa56eSMark Johnston 272209c8cb71SMark Johnston #if defined(INVARIANTS) && !defined(KASAN) 2723ca293436SRyan Libby if (arg->uminit == trash_init && arg->fini == trash_fini) 2724cc7ce83aSJeff Roberson zone->uz_flags |= UMA_ZFLAG_TRASH | UMA_ZFLAG_CTORDTOR; 272509c8cb71SMark Johnston #elif defined(KASAN) 272609c8cb71SMark Johnston if ((arg->flags & (UMA_ZONE_NOFREE | UMA_ZFLAG_CACHE)) != 0) 272709c8cb71SMark Johnston arg->flags |= UMA_ZONE_NOKASAN; 2728ca293436SRyan Libby #endif 2729ca293436SRyan Libby 27300095a784SJeff Roberson /* 27310095a784SJeff Roberson * This is a pure cache zone, no kegs. 27320095a784SJeff Roberson */ 27330095a784SJeff Roberson if (arg->import) { 2734727c6918SJeff Roberson KASSERT((arg->flags & UMA_ZFLAG_CACHE) != 0, 2735727c6918SJeff Roberson ("zone_ctor: Import specified for non-cache zone.")); 27366fd34d6fSJeff Roberson zone->uz_flags = arg->flags; 2737af526374SJeff Roberson zone->uz_size = arg->size; 27380095a784SJeff Roberson zone->uz_import = arg->import; 27390095a784SJeff Roberson zone->uz_release = arg->release; 27400095a784SJeff Roberson zone->uz_arg = arg->arg; 2741c6fd3e23SJeff Roberson #ifdef NUMA 2742c6fd3e23SJeff Roberson /* 2743c6fd3e23SJeff Roberson * Cache zones are round-robin unless a policy is 2744c6fd3e23SJeff Roberson * specified because they may have incompatible 2745c6fd3e23SJeff Roberson * constraints. 2746c6fd3e23SJeff Roberson */ 2747c6fd3e23SJeff Roberson if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) == 0) 2748c6fd3e23SJeff Roberson zone->uz_flags |= UMA_ZONE_ROUNDROBIN; 2749c6fd3e23SJeff Roberson #endif 2750111fbcd5SBryan Venteicher rw_wlock(&uma_rwlock); 275103175483SAlexander Motin LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link); 2752111fbcd5SBryan Venteicher rw_wunlock(&uma_rwlock); 2753af526374SJeff Roberson goto out; 27540095a784SJeff Roberson } 27550095a784SJeff Roberson 27560095a784SJeff Roberson /* 27570095a784SJeff Roberson * Use the regular zone/keg/slab allocator. 27580095a784SJeff Roberson */ 2759b75c4efcSAndrew Turner zone->uz_import = zone_import; 2760b75c4efcSAndrew Turner zone->uz_release = zone_release; 27610095a784SJeff Roberson zone->uz_arg = zone; 2762bb15d1c7SGleb Smirnoff keg = arg->keg; 27630095a784SJeff Roberson 2764099a0e58SBosko Milekic if (arg->flags & UMA_ZONE_SECONDARY) { 276520a4e154SJeff Roberson KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0, 276620a4e154SJeff Roberson ("Secondary zone requested UMA_ZFLAG_INTERNAL")); 2767099a0e58SBosko Milekic KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg")); 27688355f576SJeff Roberson zone->uz_init = arg->uminit; 2769e221e841SJeff Roberson zone->uz_fini = arg->fini; 2770e20a199fSJeff Roberson zone->uz_flags |= UMA_ZONE_SECONDARY; 2771111fbcd5SBryan Venteicher rw_wlock(&uma_rwlock); 2772099a0e58SBosko Milekic ZONE_LOCK(zone); 2773099a0e58SBosko Milekic LIST_FOREACH(z, &keg->uk_zones, uz_link) { 2774099a0e58SBosko Milekic if (LIST_NEXT(z, uz_link) == NULL) { 2775099a0e58SBosko Milekic LIST_INSERT_AFTER(z, zone, uz_link); 2776099a0e58SBosko Milekic break; 2777099a0e58SBosko Milekic } 2778099a0e58SBosko Milekic } 2779099a0e58SBosko Milekic ZONE_UNLOCK(zone); 2780111fbcd5SBryan Venteicher rw_wunlock(&uma_rwlock); 2781e20a199fSJeff Roberson } else if (keg == NULL) { 2782e20a199fSJeff Roberson if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini, 2783e20a199fSJeff Roberson arg->align, arg->flags)) == NULL) 2784b23f72e9SBrian Feldman return (ENOMEM); 2785099a0e58SBosko Milekic } else { 2786099a0e58SBosko Milekic struct uma_kctor_args karg; 2787b23f72e9SBrian Feldman int error; 2788099a0e58SBosko Milekic 2789099a0e58SBosko Milekic /* We should only be here from uma_startup() */ 2790099a0e58SBosko Milekic karg.size = arg->size; 2791099a0e58SBosko Milekic karg.uminit = arg->uminit; 2792099a0e58SBosko Milekic karg.fini = arg->fini; 2793099a0e58SBosko Milekic karg.align = arg->align; 2794d4665eaaSJeff Roberson karg.flags = (arg->flags & ~UMA_ZONE_SMR); 2795099a0e58SBosko Milekic karg.zone = zone; 2796b23f72e9SBrian Feldman error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg, 2797b23f72e9SBrian Feldman flags); 2798b23f72e9SBrian Feldman if (error) 2799b23f72e9SBrian Feldman return (error); 2800099a0e58SBosko Milekic } 28010095a784SJeff Roberson 280220a4e154SJeff Roberson /* Inherit properties from the keg. */ 2803bb15d1c7SGleb Smirnoff zone->uz_keg = keg; 2804e20a199fSJeff Roberson zone->uz_size = keg->uk_size; 2805e20a199fSJeff Roberson zone->uz_flags |= (keg->uk_flags & 2806e20a199fSJeff Roberson (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT)); 28078355f576SJeff Roberson 280820a4e154SJeff Roberson out: 2809dc2b3205SMark Johnston if (booted >= BOOT_PCPU) { 281020a4e154SJeff Roberson zone_alloc_counters(zone, NULL); 2811dc2b3205SMark Johnston if (booted >= BOOT_RUNNING) 281220a4e154SJeff Roberson zone_alloc_sysctl(zone, NULL); 281320a4e154SJeff Roberson } else { 281420a4e154SJeff Roberson zone->uz_allocs = EARLY_COUNTER; 281520a4e154SJeff Roberson zone->uz_frees = EARLY_COUNTER; 281620a4e154SJeff Roberson zone->uz_fails = EARLY_COUNTER; 2817099a0e58SBosko Milekic } 28188355f576SJeff Roberson 2819d4665eaaSJeff Roberson /* Caller requests a private SMR context. */ 2820d4665eaaSJeff Roberson if ((zone->uz_flags & UMA_ZONE_SMR) != 0) 2821226dd6dbSJeff Roberson zone->uz_smr = smr_create(zone->uz_name, 0, 0); 2822d4665eaaSJeff Roberson 28237e28037aSMark Johnston KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) != 28247e28037aSMark Johnston (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET), 28257e28037aSMark Johnston ("Invalid zone flag combination")); 282620a4e154SJeff Roberson if (arg->flags & UMA_ZFLAG_INTERNAL) 282720a4e154SJeff Roberson zone->uz_bucket_size_max = zone->uz_bucket_size = 0; 282820a4e154SJeff Roberson if ((arg->flags & UMA_ZONE_MAXBUCKET) != 0) 282920a4e154SJeff Roberson zone->uz_bucket_size = BUCKET_MAX; 283020a4e154SJeff Roberson else if ((arg->flags & UMA_ZONE_NOBUCKET) != 0) 283120a4e154SJeff Roberson zone->uz_bucket_size = 0; 28327e28037aSMark Johnston else 283320a4e154SJeff Roberson zone->uz_bucket_size = bucket_select(zone->uz_size); 283420a4e154SJeff Roberson zone->uz_bucket_size_min = zone->uz_bucket_size; 2835cc7ce83aSJeff Roberson if (zone->uz_dtor != NULL || zone->uz_ctor != NULL) 2836cc7ce83aSJeff Roberson zone->uz_flags |= UMA_ZFLAG_CTORDTOR; 2837cc7ce83aSJeff Roberson zone_update_caches(zone); 2838fc03d22bSJeff Roberson 2839b23f72e9SBrian Feldman return (0); 28408355f576SJeff Roberson } 28418355f576SJeff Roberson 28428355f576SJeff Roberson /* 2843099a0e58SBosko Milekic * Keg header dtor. This frees all data, destroys locks, frees the hash 2844099a0e58SBosko Milekic * table and removes the keg from the global list. 28459c2cd7e5SJeff Roberson * 28469c2cd7e5SJeff Roberson * Arguments/Returns follow uma_dtor specifications 28479c2cd7e5SJeff Roberson * udata unused 28489c2cd7e5SJeff Roberson */ 2849099a0e58SBosko Milekic static void 2850099a0e58SBosko Milekic keg_dtor(void *arg, int size, void *udata) 2851099a0e58SBosko Milekic { 2852099a0e58SBosko Milekic uma_keg_t keg; 28538b987a77SJeff Roberson uint32_t free, pages; 28548b987a77SJeff Roberson int i; 28559c2cd7e5SJeff Roberson 2856099a0e58SBosko Milekic keg = (uma_keg_t)arg; 28578b987a77SJeff Roberson free = pages = 0; 28588b987a77SJeff Roberson for (i = 0; i < vm_ndomains; i++) { 28594ab3aee8SMark Johnston free += keg->uk_domain[i].ud_free_items; 28608b987a77SJeff Roberson pages += keg->uk_domain[i].ud_pages; 28618b987a77SJeff Roberson KEG_LOCK_FINI(keg, i); 2862099a0e58SBosko Milekic } 28637e240677SRyan Libby if (pages != 0) 28648b987a77SJeff Roberson printf("Freed UMA keg (%s) was not empty (%u items). " 28658b987a77SJeff Roberson " Lost %u pages of memory.\n", 28668b987a77SJeff Roberson keg->uk_name ? keg->uk_name : "", 28677e240677SRyan Libby pages / keg->uk_ppera * keg->uk_ipers - free, pages); 2868099a0e58SBosko Milekic 2869099a0e58SBosko Milekic hash_free(&keg->uk_hash); 2870099a0e58SBosko Milekic } 2871099a0e58SBosko Milekic 2872099a0e58SBosko Milekic /* 2873099a0e58SBosko Milekic * Zone header dtor. 2874099a0e58SBosko Milekic * 2875099a0e58SBosko Milekic * Arguments/Returns follow uma_dtor specifications 2876099a0e58SBosko Milekic * udata unused 2877099a0e58SBosko Milekic */ 28789c2cd7e5SJeff Roberson static void 28799c2cd7e5SJeff Roberson zone_dtor(void *arg, int size, void *udata) 28809c2cd7e5SJeff Roberson { 28819c2cd7e5SJeff Roberson uma_zone_t zone; 2882099a0e58SBosko Milekic uma_keg_t keg; 2883c6fd3e23SJeff Roberson int i; 28849c2cd7e5SJeff Roberson 28859c2cd7e5SJeff Roberson zone = (uma_zone_t)arg; 28869643769aSJeff Roberson 288720a4e154SJeff Roberson sysctl_remove_oid(zone->uz_oid, 1, 1); 288820a4e154SJeff Roberson 2889e20a199fSJeff Roberson if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) 28909643769aSJeff Roberson cache_drain(zone); 2891099a0e58SBosko Milekic 2892111fbcd5SBryan Venteicher rw_wlock(&uma_rwlock); 2893099a0e58SBosko Milekic LIST_REMOVE(zone, uz_link); 2894111fbcd5SBryan Venteicher rw_wunlock(&uma_rwlock); 28957b516613SJonathan T. Looney if ((zone->uz_flags & (UMA_ZONE_SECONDARY | UMA_ZFLAG_CACHE)) == 0) { 28967b516613SJonathan T. Looney keg = zone->uz_keg; 28977b516613SJonathan T. Looney keg->uk_reserve = 0; 28987b516613SJonathan T. Looney } 2899*aabe13f1SMark Johnston zone_reclaim(zone, UMA_ANYDOMAIN, M_WAITOK, true); 2900c6fd3e23SJeff Roberson 2901e20a199fSJeff Roberson /* 2902323ad386STycho Nightingale * We only destroy kegs from non secondary/non cache zones. 2903e20a199fSJeff Roberson */ 2904323ad386STycho Nightingale if ((zone->uz_flags & (UMA_ZONE_SECONDARY | UMA_ZFLAG_CACHE)) == 0) { 2905323ad386STycho Nightingale keg = zone->uz_keg; 2906111fbcd5SBryan Venteicher rw_wlock(&uma_rwlock); 2907099a0e58SBosko Milekic LIST_REMOVE(keg, uk_link); 2908111fbcd5SBryan Venteicher rw_wunlock(&uma_rwlock); 29090095a784SJeff Roberson zone_free_item(kegs, keg, NULL, SKIP_NONE); 29109c2cd7e5SJeff Roberson } 29112efcc8cbSGleb Smirnoff counter_u64_free(zone->uz_allocs); 29122efcc8cbSGleb Smirnoff counter_u64_free(zone->uz_frees); 29132efcc8cbSGleb Smirnoff counter_u64_free(zone->uz_fails); 2914c6fd3e23SJeff Roberson counter_u64_free(zone->uz_xdomain); 291520a4e154SJeff Roberson free(zone->uz_ctlname, M_UMA); 2916c6fd3e23SJeff Roberson for (i = 0; i < vm_ndomains; i++) 2917c6fd3e23SJeff Roberson ZDOM_LOCK_FINI(ZDOM_GET(zone, i)); 291891d947bfSJeff Roberson ZONE_CROSS_LOCK_FINI(zone); 2919099a0e58SBosko Milekic } 2920099a0e58SBosko Milekic 2921a81c400eSJeff Roberson static void 2922a81c400eSJeff Roberson zone_foreach_unlocked(void (*zfunc)(uma_zone_t, void *arg), void *arg) 2923a81c400eSJeff Roberson { 2924a81c400eSJeff Roberson uma_keg_t keg; 2925a81c400eSJeff Roberson uma_zone_t zone; 2926a81c400eSJeff Roberson 2927a81c400eSJeff Roberson LIST_FOREACH(keg, &uma_kegs, uk_link) { 2928a81c400eSJeff Roberson LIST_FOREACH(zone, &keg->uk_zones, uz_link) 2929a81c400eSJeff Roberson zfunc(zone, arg); 2930a81c400eSJeff Roberson } 2931a81c400eSJeff Roberson LIST_FOREACH(zone, &uma_cachezones, uz_link) 2932a81c400eSJeff Roberson zfunc(zone, arg); 2933a81c400eSJeff Roberson } 2934a81c400eSJeff Roberson 29359c2cd7e5SJeff Roberson /* 29368355f576SJeff Roberson * Traverses every zone in the system and calls a callback 29378355f576SJeff Roberson * 29388355f576SJeff Roberson * Arguments: 29398355f576SJeff Roberson * zfunc A pointer to a function which accepts a zone 29408355f576SJeff Roberson * as an argument. 29418355f576SJeff Roberson * 29428355f576SJeff Roberson * Returns: 29438355f576SJeff Roberson * Nothing 29448355f576SJeff Roberson */ 29458355f576SJeff Roberson static void 294620a4e154SJeff Roberson zone_foreach(void (*zfunc)(uma_zone_t, void *arg), void *arg) 29478355f576SJeff Roberson { 29488355f576SJeff Roberson 2949111fbcd5SBryan Venteicher rw_rlock(&uma_rwlock); 2950a81c400eSJeff Roberson zone_foreach_unlocked(zfunc, arg); 2951111fbcd5SBryan Venteicher rw_runlock(&uma_rwlock); 29528355f576SJeff Roberson } 29538355f576SJeff Roberson 2954f4bef67cSGleb Smirnoff /* 2955a81c400eSJeff Roberson * Initialize the kernel memory allocator. This is done after pages can be 2956a81c400eSJeff Roberson * allocated but before general KVA is available. 2957f4bef67cSGleb Smirnoff */ 2958a81c400eSJeff Roberson void 2959a81c400eSJeff Roberson uma_startup1(vm_offset_t virtual_avail) 2960f4bef67cSGleb Smirnoff { 2961a81c400eSJeff Roberson struct uma_zctor_args args; 2962a81c400eSJeff Roberson size_t ksize, zsize, size; 2963c8b0a88bSJeff Roberson uma_keg_t primarykeg; 2964a81c400eSJeff Roberson uintptr_t m; 296581302f1dSMark Johnston int domain; 2966a81c400eSJeff Roberson uint8_t pflag; 2967a81c400eSJeff Roberson 2968a81c400eSJeff Roberson bootstart = bootmem = virtual_avail; 2969a81c400eSJeff Roberson 2970a81c400eSJeff Roberson rw_init(&uma_rwlock, "UMA lock"); 2971a81c400eSJeff Roberson sx_init(&uma_reclaim_lock, "umareclaim"); 2972f4bef67cSGleb Smirnoff 2973f4bef67cSGleb Smirnoff ksize = sizeof(struct uma_keg) + 2974f4bef67cSGleb Smirnoff (sizeof(struct uma_domain) * vm_ndomains); 297579c9f942SJeff Roberson ksize = roundup(ksize, UMA_SUPER_ALIGN); 2976f4bef67cSGleb Smirnoff zsize = sizeof(struct uma_zone) + 2977f4bef67cSGleb Smirnoff (sizeof(struct uma_cache) * (mp_maxid + 1)) + 2978f4bef67cSGleb Smirnoff (sizeof(struct uma_zone_domain) * vm_ndomains); 297979c9f942SJeff Roberson zsize = roundup(zsize, UMA_SUPER_ALIGN); 2980f4bef67cSGleb Smirnoff 2981a81c400eSJeff Roberson /* Allocate the zone of zones, zone of kegs, and zone of zones keg. */ 2982a81c400eSJeff Roberson size = (zsize * 2) + ksize; 298381302f1dSMark Johnston for (domain = 0; domain < vm_ndomains; domain++) { 298481302f1dSMark Johnston m = (uintptr_t)startup_alloc(NULL, size, domain, &pflag, 298581302f1dSMark Johnston M_NOWAIT | M_ZERO); 298681302f1dSMark Johnston if (m != 0) 298781302f1dSMark Johnston break; 298881302f1dSMark Johnston } 2989ab3185d1SJeff Roberson zones = (uma_zone_t)m; 299079c9f942SJeff Roberson m += zsize; 2991ab3185d1SJeff Roberson kegs = (uma_zone_t)m; 299279c9f942SJeff Roberson m += zsize; 2993c8b0a88bSJeff Roberson primarykeg = (uma_keg_t)m; 2994ab3185d1SJeff Roberson 2995099a0e58SBosko Milekic /* "manually" create the initial zone */ 29960095a784SJeff Roberson memset(&args, 0, sizeof(args)); 2997099a0e58SBosko Milekic args.name = "UMA Kegs"; 2998ab3185d1SJeff Roberson args.size = ksize; 2999099a0e58SBosko Milekic args.ctor = keg_ctor; 3000099a0e58SBosko Milekic args.dtor = keg_dtor; 30018355f576SJeff Roberson args.uminit = zero_init; 30028355f576SJeff Roberson args.fini = NULL; 3003c8b0a88bSJeff Roberson args.keg = primarykeg; 300479c9f942SJeff Roberson args.align = UMA_SUPER_ALIGN - 1; 3005b60f5b79SJeff Roberson args.flags = UMA_ZFLAG_INTERNAL; 3006ab3185d1SJeff Roberson zone_ctor(kegs, zsize, &args, M_WAITOK); 30078355f576SJeff Roberson 3008099a0e58SBosko Milekic args.name = "UMA Zones"; 3009f4bef67cSGleb Smirnoff args.size = zsize; 3010099a0e58SBosko Milekic args.ctor = zone_ctor; 3011099a0e58SBosko Milekic args.dtor = zone_dtor; 3012099a0e58SBosko Milekic args.uminit = zero_init; 3013099a0e58SBosko Milekic args.fini = NULL; 3014099a0e58SBosko Milekic args.keg = NULL; 301579c9f942SJeff Roberson args.align = UMA_SUPER_ALIGN - 1; 3016099a0e58SBosko Milekic args.flags = UMA_ZFLAG_INTERNAL; 3017ab3185d1SJeff Roberson zone_ctor(zones, zsize, &args, M_WAITOK); 3018099a0e58SBosko Milekic 30199b8db4d0SRyan Libby /* Now make zones for slab headers */ 30209b8db4d0SRyan Libby slabzones[0] = uma_zcreate("UMA Slabs 0", SLABZONE0_SIZE, 30219b8db4d0SRyan Libby NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 30229b8db4d0SRyan Libby slabzones[1] = uma_zcreate("UMA Slabs 1", SLABZONE1_SIZE, 30231e0701e1SJeff Roberson NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 30248355f576SJeff Roberson 30258355f576SJeff Roberson hashzone = uma_zcreate("UMA Hash", 30268355f576SJeff Roberson sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 30271e0701e1SJeff Roberson NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 30288355f576SJeff Roberson 3029a81c400eSJeff Roberson bucket_init(); 3030d4665eaaSJeff Roberson smr_init(); 30318355f576SJeff Roberson } 30328355f576SJeff Roberson 3033a81c400eSJeff Roberson #ifndef UMA_MD_SMALL_ALLOC 3034a81c400eSJeff Roberson extern void vm_radix_reserve_kva(void); 3035f4bef67cSGleb Smirnoff #endif 3036f4bef67cSGleb Smirnoff 3037a81c400eSJeff Roberson /* 3038a81c400eSJeff Roberson * Advertise the availability of normal kva allocations and switch to 3039a81c400eSJeff Roberson * the default back-end allocator. Marks the KVA we consumed on startup 3040a81c400eSJeff Roberson * as used in the map. 3041a81c400eSJeff Roberson */ 30428355f576SJeff Roberson void 304399571dc3SJeff Roberson uma_startup2(void) 30448355f576SJeff Roberson { 3045f4bef67cSGleb Smirnoff 3046530cc6a2SJeff Roberson if (bootstart != bootmem) { 3047a81c400eSJeff Roberson vm_map_lock(kernel_map); 3048a81c400eSJeff Roberson (void)vm_map_insert(kernel_map, NULL, 0, bootstart, bootmem, 3049a81c400eSJeff Roberson VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT); 3050a81c400eSJeff Roberson vm_map_unlock(kernel_map); 3051a81c400eSJeff Roberson } 3052a81c400eSJeff Roberson 3053a81c400eSJeff Roberson #ifndef UMA_MD_SMALL_ALLOC 3054a81c400eSJeff Roberson /* Set up radix zone to use noobj_alloc. */ 3055a81c400eSJeff Roberson vm_radix_reserve_kva(); 3056f7d35785SGleb Smirnoff #endif 3057a81c400eSJeff Roberson 3058a81c400eSJeff Roberson booted = BOOT_KVA; 3059a81c400eSJeff Roberson zone_foreach_unlocked(zone_kva_available, NULL); 3060f4bef67cSGleb Smirnoff bucket_enable(); 30618355f576SJeff Roberson } 30628355f576SJeff Roberson 3063a81c400eSJeff Roberson /* 3064dc2b3205SMark Johnston * Allocate counters as early as possible so that boot-time allocations are 3065dc2b3205SMark Johnston * accounted more precisely. 3066dc2b3205SMark Johnston */ 3067dc2b3205SMark Johnston static void 3068dc2b3205SMark Johnston uma_startup_pcpu(void *arg __unused) 3069dc2b3205SMark Johnston { 3070dc2b3205SMark Johnston 3071dc2b3205SMark Johnston zone_foreach_unlocked(zone_alloc_counters, NULL); 3072dc2b3205SMark Johnston booted = BOOT_PCPU; 3073dc2b3205SMark Johnston } 3074dc2b3205SMark Johnston SYSINIT(uma_startup_pcpu, SI_SUB_COUNTER, SI_ORDER_ANY, uma_startup_pcpu, NULL); 3075dc2b3205SMark Johnston 3076dc2b3205SMark Johnston /* 3077a81c400eSJeff Roberson * Finish our initialization steps. 3078a81c400eSJeff Roberson */ 30798355f576SJeff Roberson static void 3080dc2b3205SMark Johnston uma_startup3(void *arg __unused) 30818355f576SJeff Roberson { 30821431a748SGleb Smirnoff 3083c5deaf04SGleb Smirnoff #ifdef INVARIANTS 3084c5deaf04SGleb Smirnoff TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor); 3085c5deaf04SGleb Smirnoff uma_dbg_cnt = counter_u64_alloc(M_WAITOK); 3086c5deaf04SGleb Smirnoff uma_skip_cnt = counter_u64_alloc(M_WAITOK); 3087c5deaf04SGleb Smirnoff #endif 3088a81c400eSJeff Roberson zone_foreach_unlocked(zone_alloc_sysctl, NULL); 3089fd90e2edSJung-uk Kim callout_init(&uma_callout, 1); 30909643769aSJeff Roberson callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 3091c5deaf04SGleb Smirnoff booted = BOOT_RUNNING; 3092860bb7a0SMark Johnston 3093860bb7a0SMark Johnston EVENTHANDLER_REGISTER(shutdown_post_sync, uma_shutdown, NULL, 3094860bb7a0SMark Johnston EVENTHANDLER_PRI_FIRST); 3095860bb7a0SMark Johnston } 3096dc2b3205SMark Johnston SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 3097860bb7a0SMark Johnston 3098860bb7a0SMark Johnston static void 3099860bb7a0SMark Johnston uma_shutdown(void) 3100860bb7a0SMark Johnston { 3101860bb7a0SMark Johnston 3102860bb7a0SMark Johnston booted = BOOT_SHUTDOWN; 31038355f576SJeff Roberson } 31048355f576SJeff Roberson 3105e20a199fSJeff Roberson static uma_keg_t 3106099a0e58SBosko Milekic uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, 310785dcf349SGleb Smirnoff int align, uint32_t flags) 3108099a0e58SBosko Milekic { 3109099a0e58SBosko Milekic struct uma_kctor_args args; 3110099a0e58SBosko Milekic 3111099a0e58SBosko Milekic args.size = size; 3112099a0e58SBosko Milekic args.uminit = uminit; 3113099a0e58SBosko Milekic args.fini = fini; 31141e319f6dSRobert Watson args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align; 3115099a0e58SBosko Milekic args.flags = flags; 3116099a0e58SBosko Milekic args.zone = zone; 3117ab3185d1SJeff Roberson return (zone_alloc_item(kegs, &args, UMA_ANYDOMAIN, M_WAITOK)); 3118099a0e58SBosko Milekic } 3119099a0e58SBosko Milekic 3120f4bef67cSGleb Smirnoff /* Public functions */ 31218355f576SJeff Roberson /* See uma.h */ 31221e319f6dSRobert Watson void 31231e319f6dSRobert Watson uma_set_align(int align) 31241e319f6dSRobert Watson { 31251e319f6dSRobert Watson 31261e319f6dSRobert Watson if (align != UMA_ALIGN_CACHE) 31271e319f6dSRobert Watson uma_align_cache = align; 31281e319f6dSRobert Watson } 31291e319f6dSRobert Watson 31301e319f6dSRobert Watson /* See uma.h */ 31318355f576SJeff Roberson uma_zone_t 3132bb196eb4SMatthew D Fleming uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor, 313385dcf349SGleb Smirnoff uma_init uminit, uma_fini fini, int align, uint32_t flags) 31348355f576SJeff Roberson 31358355f576SJeff Roberson { 31368355f576SJeff Roberson struct uma_zctor_args args; 313795c4bf75SKonstantin Belousov uma_zone_t res; 31388355f576SJeff Roberson 3139a5a35578SJohn Baldwin KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"", 3140a5a35578SJohn Baldwin align, name)); 3141a5a35578SJohn Baldwin 31428355f576SJeff Roberson /* This stuff is essential for the zone ctor */ 31430095a784SJeff Roberson memset(&args, 0, sizeof(args)); 31448355f576SJeff Roberson args.name = name; 31458355f576SJeff Roberson args.size = size; 31468355f576SJeff Roberson args.ctor = ctor; 31478355f576SJeff Roberson args.dtor = dtor; 31488355f576SJeff Roberson args.uminit = uminit; 31498355f576SJeff Roberson args.fini = fini; 315009c8cb71SMark Johnston #if defined(INVARIANTS) && !defined(KASAN) 3151afc6dc36SJohn-Mark Gurney /* 3152ca293436SRyan Libby * Inject procedures which check for memory use after free if we are 3153ca293436SRyan Libby * allowed to scramble the memory while it is not allocated. This 3154ca293436SRyan Libby * requires that: UMA is actually able to access the memory, no init 3155ca293436SRyan Libby * or fini procedures, no dependency on the initial value of the 3156ca293436SRyan Libby * memory, and no (legitimate) use of the memory after free. Note, 3157ca293436SRyan Libby * the ctor and dtor do not need to be empty. 3158afc6dc36SJohn-Mark Gurney */ 315954c5ae80SRyan Libby if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOTOUCH | 316054c5ae80SRyan Libby UMA_ZONE_NOFREE))) && uminit == NULL && fini == NULL) { 3161afc6dc36SJohn-Mark Gurney args.uminit = trash_init; 3162afc6dc36SJohn-Mark Gurney args.fini = trash_fini; 3163afc6dc36SJohn-Mark Gurney } 3164afc6dc36SJohn-Mark Gurney #endif 31658355f576SJeff Roberson args.align = align; 31668355f576SJeff Roberson args.flags = flags; 3167099a0e58SBosko Milekic args.keg = NULL; 3168099a0e58SBosko Milekic 3169*aabe13f1SMark Johnston sx_xlock(&uma_reclaim_lock); 3170ab3185d1SJeff Roberson res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK); 3171*aabe13f1SMark Johnston sx_xunlock(&uma_reclaim_lock); 3172a81c400eSJeff Roberson 317395c4bf75SKonstantin Belousov return (res); 3174099a0e58SBosko Milekic } 3175099a0e58SBosko Milekic 3176099a0e58SBosko Milekic /* See uma.h */ 3177099a0e58SBosko Milekic uma_zone_t 31780464f16eSMark Johnston uma_zsecond_create(const char *name, uma_ctor ctor, uma_dtor dtor, 3179c8b0a88bSJeff Roberson uma_init zinit, uma_fini zfini, uma_zone_t primary) 3180099a0e58SBosko Milekic { 3181099a0e58SBosko Milekic struct uma_zctor_args args; 3182e20a199fSJeff Roberson uma_keg_t keg; 318395c4bf75SKonstantin Belousov uma_zone_t res; 3184099a0e58SBosko Milekic 3185c8b0a88bSJeff Roberson keg = primary->uz_keg; 31860095a784SJeff Roberson memset(&args, 0, sizeof(args)); 3187099a0e58SBosko Milekic args.name = name; 3188e20a199fSJeff Roberson args.size = keg->uk_size; 3189099a0e58SBosko Milekic args.ctor = ctor; 3190099a0e58SBosko Milekic args.dtor = dtor; 3191099a0e58SBosko Milekic args.uminit = zinit; 3192099a0e58SBosko Milekic args.fini = zfini; 3193e20a199fSJeff Roberson args.align = keg->uk_align; 3194e20a199fSJeff Roberson args.flags = keg->uk_flags | UMA_ZONE_SECONDARY; 3195e20a199fSJeff Roberson args.keg = keg; 31968355f576SJeff Roberson 3197*aabe13f1SMark Johnston sx_xlock(&uma_reclaim_lock); 3198ab3185d1SJeff Roberson res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK); 3199*aabe13f1SMark Johnston sx_xunlock(&uma_reclaim_lock); 3200a81c400eSJeff Roberson 320195c4bf75SKonstantin Belousov return (res); 32028355f576SJeff Roberson } 32038355f576SJeff Roberson 32040095a784SJeff Roberson /* See uma.h */ 32050095a784SJeff Roberson uma_zone_t 32060464f16eSMark Johnston uma_zcache_create(const char *name, int size, uma_ctor ctor, uma_dtor dtor, 32070464f16eSMark Johnston uma_init zinit, uma_fini zfini, uma_import zimport, uma_release zrelease, 32080464f16eSMark Johnston void *arg, int flags) 32090095a784SJeff Roberson { 32100095a784SJeff Roberson struct uma_zctor_args args; 32110095a784SJeff Roberson 32120095a784SJeff Roberson memset(&args, 0, sizeof(args)); 32130095a784SJeff Roberson args.name = name; 3214af526374SJeff Roberson args.size = size; 32150095a784SJeff Roberson args.ctor = ctor; 32160095a784SJeff Roberson args.dtor = dtor; 32170095a784SJeff Roberson args.uminit = zinit; 32180095a784SJeff Roberson args.fini = zfini; 32190095a784SJeff Roberson args.import = zimport; 32200095a784SJeff Roberson args.release = zrelease; 32210095a784SJeff Roberson args.arg = arg; 32220095a784SJeff Roberson args.align = 0; 3223bb15d1c7SGleb Smirnoff args.flags = flags | UMA_ZFLAG_CACHE; 32240095a784SJeff Roberson 3225ab3185d1SJeff Roberson return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK)); 32260095a784SJeff Roberson } 32270095a784SJeff Roberson 32288355f576SJeff Roberson /* See uma.h */ 32299c2cd7e5SJeff Roberson void 32309c2cd7e5SJeff Roberson uma_zdestroy(uma_zone_t zone) 32319c2cd7e5SJeff Roberson { 3232f4ff923bSRobert Watson 3233860bb7a0SMark Johnston /* 3234860bb7a0SMark Johnston * Large slabs are expensive to reclaim, so don't bother doing 3235860bb7a0SMark Johnston * unnecessary work if we're shutting down. 3236860bb7a0SMark Johnston */ 3237860bb7a0SMark Johnston if (booted == BOOT_SHUTDOWN && 3238860bb7a0SMark Johnston zone->uz_fini == NULL && zone->uz_release == zone_release) 3239860bb7a0SMark Johnston return; 3240*aabe13f1SMark Johnston sx_xlock(&uma_reclaim_lock); 32410095a784SJeff Roberson zone_free_item(zones, zone, NULL, SKIP_NONE); 3242*aabe13f1SMark Johnston sx_xunlock(&uma_reclaim_lock); 32439c2cd7e5SJeff Roberson } 32449c2cd7e5SJeff Roberson 32458d6fbbb8SJeff Roberson void 32468d6fbbb8SJeff Roberson uma_zwait(uma_zone_t zone) 32478d6fbbb8SJeff Roberson { 32488d6fbbb8SJeff Roberson 324970260874SJeff Roberson if ((zone->uz_flags & UMA_ZONE_SMR) != 0) 325070260874SJeff Roberson uma_zfree_smr(zone, uma_zalloc_smr(zone, M_WAITOK)); 325170260874SJeff Roberson else if ((zone->uz_flags & UMA_ZONE_PCPU) != 0) 325270260874SJeff Roberson uma_zfree_pcpu(zone, uma_zalloc_pcpu(zone, M_WAITOK)); 325370260874SJeff Roberson else 325470260874SJeff Roberson uma_zfree(zone, uma_zalloc(zone, M_WAITOK)); 32558d6fbbb8SJeff Roberson } 32568d6fbbb8SJeff Roberson 32574e180881SMateusz Guzik void * 32584e180881SMateusz Guzik uma_zalloc_pcpu_arg(uma_zone_t zone, void *udata, int flags) 32594e180881SMateusz Guzik { 32603acb6572SMateusz Guzik void *item, *pcpu_item; 3261b4799947SRuslan Bukin #ifdef SMP 32624e180881SMateusz Guzik int i; 32634e180881SMateusz Guzik 32644e180881SMateusz Guzik MPASS(zone->uz_flags & UMA_ZONE_PCPU); 3265b4799947SRuslan Bukin #endif 32664e180881SMateusz Guzik item = uma_zalloc_arg(zone, udata, flags & ~M_ZERO); 32673acb6572SMateusz Guzik if (item == NULL) 32683acb6572SMateusz Guzik return (NULL); 32693acb6572SMateusz Guzik pcpu_item = zpcpu_base_to_offset(item); 32703acb6572SMateusz Guzik if (flags & M_ZERO) { 3271b4799947SRuslan Bukin #ifdef SMP 3272013072f0SMark Johnston for (i = 0; i <= mp_maxid; i++) 32733acb6572SMateusz Guzik bzero(zpcpu_get_cpu(pcpu_item, i), zone->uz_size); 3274b4799947SRuslan Bukin #else 3275b4799947SRuslan Bukin bzero(item, zone->uz_size); 3276b4799947SRuslan Bukin #endif 32774e180881SMateusz Guzik } 32783acb6572SMateusz Guzik return (pcpu_item); 32794e180881SMateusz Guzik } 32804e180881SMateusz Guzik 32814e180881SMateusz Guzik /* 32824e180881SMateusz Guzik * A stub while both regular and pcpu cases are identical. 32834e180881SMateusz Guzik */ 32844e180881SMateusz Guzik void 32853acb6572SMateusz Guzik uma_zfree_pcpu_arg(uma_zone_t zone, void *pcpu_item, void *udata) 32864e180881SMateusz Guzik { 32873acb6572SMateusz Guzik void *item; 32884e180881SMateusz Guzik 3289c5b7751fSIan Lepore #ifdef SMP 32904e180881SMateusz Guzik MPASS(zone->uz_flags & UMA_ZONE_PCPU); 3291c5b7751fSIan Lepore #endif 3292b8f7267dSKristof Provost 3293b8f7267dSKristof Provost /* uma_zfree_pcu_*(..., NULL) does nothing, to match free(9). */ 3294b8f7267dSKristof Provost if (pcpu_item == NULL) 3295b8f7267dSKristof Provost return; 3296b8f7267dSKristof Provost 32973acb6572SMateusz Guzik item = zpcpu_offset_to_base(pcpu_item); 32984e180881SMateusz Guzik uma_zfree_arg(zone, item, udata); 32994e180881SMateusz Guzik } 33004e180881SMateusz Guzik 3301d4665eaaSJeff Roberson static inline void * 3302d4665eaaSJeff Roberson item_ctor(uma_zone_t zone, int uz_flags, int size, void *udata, int flags, 3303d4665eaaSJeff Roberson void *item) 3304beb8beefSJeff Roberson { 3305beb8beefSJeff Roberson #ifdef INVARIANTS 3306ca293436SRyan Libby bool skipdbg; 330709c8cb71SMark Johnston #endif 3308beb8beefSJeff Roberson 330909c8cb71SMark Johnston kasan_mark_item_valid(zone, item); 331009c8cb71SMark Johnston 331109c8cb71SMark Johnston #ifdef INVARIANTS 3312beb8beefSJeff Roberson skipdbg = uma_dbg_zskip(zone, item); 331309c8cb71SMark Johnston if (!skipdbg && (uz_flags & UMA_ZFLAG_TRASH) != 0 && 3314ca293436SRyan Libby zone->uz_ctor != trash_ctor) 3315cc7ce83aSJeff Roberson trash_ctor(item, size, udata, flags); 3316beb8beefSJeff Roberson #endif 331709c8cb71SMark Johnston 3318d4665eaaSJeff Roberson /* Check flags before loading ctor pointer. */ 3319d4665eaaSJeff Roberson if (__predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0) && 3320d4665eaaSJeff Roberson __predict_false(zone->uz_ctor != NULL) && 3321cc7ce83aSJeff Roberson zone->uz_ctor(item, size, udata, flags) != 0) { 3322beb8beefSJeff Roberson counter_u64_add(zone->uz_fails, 1); 3323beb8beefSJeff Roberson zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT); 3324beb8beefSJeff Roberson return (NULL); 3325beb8beefSJeff Roberson } 3326beb8beefSJeff Roberson #ifdef INVARIANTS 3327beb8beefSJeff Roberson if (!skipdbg) 3328beb8beefSJeff Roberson uma_dbg_alloc(zone, NULL, item); 3329beb8beefSJeff Roberson #endif 33306d88d784SJeff Roberson if (__predict_false(flags & M_ZERO)) 33316d88d784SJeff Roberson return (memset(item, 0, size)); 3332beb8beefSJeff Roberson 3333beb8beefSJeff Roberson return (item); 3334beb8beefSJeff Roberson } 3335beb8beefSJeff Roberson 3336ca293436SRyan Libby static inline void 3337cc7ce83aSJeff Roberson item_dtor(uma_zone_t zone, void *item, int size, void *udata, 3338cc7ce83aSJeff Roberson enum zfreeskip skip) 3339ca293436SRyan Libby { 3340ca293436SRyan Libby #ifdef INVARIANTS 3341ca293436SRyan Libby bool skipdbg; 3342ca293436SRyan Libby 3343ca293436SRyan Libby skipdbg = uma_dbg_zskip(zone, item); 3344ca293436SRyan Libby if (skip == SKIP_NONE && !skipdbg) { 3345ca293436SRyan Libby if ((zone->uz_flags & UMA_ZONE_MALLOC) != 0) 3346ca293436SRyan Libby uma_dbg_free(zone, udata, item); 3347ca293436SRyan Libby else 3348ca293436SRyan Libby uma_dbg_free(zone, NULL, item); 3349ca293436SRyan Libby } 3350ca293436SRyan Libby #endif 3351cc7ce83aSJeff Roberson if (__predict_true(skip < SKIP_DTOR)) { 3352ca293436SRyan Libby if (zone->uz_dtor != NULL) 3353cc7ce83aSJeff Roberson zone->uz_dtor(item, size, udata); 3354ca293436SRyan Libby #ifdef INVARIANTS 3355ca293436SRyan Libby if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 && 3356ca293436SRyan Libby zone->uz_dtor != trash_dtor) 3357cc7ce83aSJeff Roberson trash_dtor(item, size, udata); 3358ca293436SRyan Libby #endif 3359ca293436SRyan Libby } 336009c8cb71SMark Johnston kasan_mark_item_invalid(zone, item); 3361ca293436SRyan Libby } 3362ca293436SRyan Libby 33631c58c09fSMateusz Guzik #ifdef NUMA 336481302f1dSMark Johnston static int 336581302f1dSMark Johnston item_domain(void *item) 336681302f1dSMark Johnston { 336781302f1dSMark Johnston int domain; 336881302f1dSMark Johnston 3369431fb8abSMark Johnston domain = vm_phys_domain(vtophys(item)); 337081302f1dSMark Johnston KASSERT(domain >= 0 && domain < vm_ndomains, 337181302f1dSMark Johnston ("%s: unknown domain for item %p", __func__, item)); 337281302f1dSMark Johnston return (domain); 337381302f1dSMark Johnston } 33741c58c09fSMateusz Guzik #endif 337581302f1dSMark Johnston 3376d4665eaaSJeff Roberson #if defined(INVARIANTS) || defined(DEBUG_MEMGUARD) || defined(WITNESS) 3377d4665eaaSJeff Roberson #define UMA_ZALLOC_DEBUG 3378d4665eaaSJeff Roberson static int 3379d4665eaaSJeff Roberson uma_zalloc_debug(uma_zone_t zone, void **itemp, void *udata, int flags) 3380d4665eaaSJeff Roberson { 3381d4665eaaSJeff Roberson int error; 3382d4665eaaSJeff Roberson 3383d4665eaaSJeff Roberson error = 0; 3384d4665eaaSJeff Roberson #ifdef WITNESS 3385d4665eaaSJeff Roberson if (flags & M_WAITOK) { 3386d4665eaaSJeff Roberson WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 3387d4665eaaSJeff Roberson "uma_zalloc_debug: zone \"%s\"", zone->uz_name); 3388d4665eaaSJeff Roberson } 3389d4665eaaSJeff Roberson #endif 3390d4665eaaSJeff Roberson 3391d4665eaaSJeff Roberson #ifdef INVARIANTS 3392d4665eaaSJeff Roberson KASSERT((flags & M_EXEC) == 0, 3393d4665eaaSJeff Roberson ("uma_zalloc_debug: called with M_EXEC")); 3394d4665eaaSJeff Roberson KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), 3395d4665eaaSJeff Roberson ("uma_zalloc_debug: called within spinlock or critical section")); 3396d4665eaaSJeff Roberson KASSERT((zone->uz_flags & UMA_ZONE_PCPU) == 0 || (flags & M_ZERO) == 0, 3397d4665eaaSJeff Roberson ("uma_zalloc_debug: allocating from a pcpu zone with M_ZERO")); 3398d4665eaaSJeff Roberson #endif 3399d4665eaaSJeff Roberson 3400d4665eaaSJeff Roberson #ifdef DEBUG_MEMGUARD 34019e47b341SJeff Roberson if ((zone->uz_flags & UMA_ZONE_SMR) == 0 && memguard_cmp_zone(zone)) { 3402d4665eaaSJeff Roberson void *item; 3403d4665eaaSJeff Roberson item = memguard_alloc(zone->uz_size, flags); 3404d4665eaaSJeff Roberson if (item != NULL) { 3405d4665eaaSJeff Roberson error = EJUSTRETURN; 3406d4665eaaSJeff Roberson if (zone->uz_init != NULL && 3407d4665eaaSJeff Roberson zone->uz_init(item, zone->uz_size, flags) != 0) { 3408d4665eaaSJeff Roberson *itemp = NULL; 3409d4665eaaSJeff Roberson return (error); 3410d4665eaaSJeff Roberson } 3411d4665eaaSJeff Roberson if (zone->uz_ctor != NULL && 3412d4665eaaSJeff Roberson zone->uz_ctor(item, zone->uz_size, udata, 3413d4665eaaSJeff Roberson flags) != 0) { 3414d4665eaaSJeff Roberson counter_u64_add(zone->uz_fails, 1); 3415d4665eaaSJeff Roberson zone->uz_fini(item, zone->uz_size); 3416d4665eaaSJeff Roberson *itemp = NULL; 3417d4665eaaSJeff Roberson return (error); 3418d4665eaaSJeff Roberson } 3419d4665eaaSJeff Roberson *itemp = item; 3420d4665eaaSJeff Roberson return (error); 3421d4665eaaSJeff Roberson } 3422d4665eaaSJeff Roberson /* This is unfortunate but should not be fatal. */ 3423d4665eaaSJeff Roberson } 3424d4665eaaSJeff Roberson #endif 3425d4665eaaSJeff Roberson return (error); 3426d4665eaaSJeff Roberson } 3427d4665eaaSJeff Roberson 3428d4665eaaSJeff Roberson static int 3429d4665eaaSJeff Roberson uma_zfree_debug(uma_zone_t zone, void *item, void *udata) 3430d4665eaaSJeff Roberson { 3431d4665eaaSJeff Roberson KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), 3432d4665eaaSJeff Roberson ("uma_zfree_debug: called with spinlock or critical section held")); 3433d4665eaaSJeff Roberson 3434d4665eaaSJeff Roberson #ifdef DEBUG_MEMGUARD 34359e47b341SJeff Roberson if ((zone->uz_flags & UMA_ZONE_SMR) == 0 && is_memguard_addr(item)) { 3436d4665eaaSJeff Roberson if (zone->uz_dtor != NULL) 3437d4665eaaSJeff Roberson zone->uz_dtor(item, zone->uz_size, udata); 3438d4665eaaSJeff Roberson if (zone->uz_fini != NULL) 3439d4665eaaSJeff Roberson zone->uz_fini(item, zone->uz_size); 3440d4665eaaSJeff Roberson memguard_free(item); 3441d4665eaaSJeff Roberson return (EJUSTRETURN); 3442d4665eaaSJeff Roberson } 3443d4665eaaSJeff Roberson #endif 3444d4665eaaSJeff Roberson return (0); 3445d4665eaaSJeff Roberson } 3446d4665eaaSJeff Roberson #endif 3447d4665eaaSJeff Roberson 34486d88d784SJeff Roberson static inline void * 34496d88d784SJeff Roberson cache_alloc_item(uma_zone_t zone, uma_cache_t cache, uma_cache_bucket_t bucket, 34506d88d784SJeff Roberson void *udata, int flags) 3451d4665eaaSJeff Roberson { 34526d88d784SJeff Roberson void *item; 34536d88d784SJeff Roberson int size, uz_flags; 34546d88d784SJeff Roberson 34556d88d784SJeff Roberson item = cache_bucket_pop(cache, bucket); 34566d88d784SJeff Roberson size = cache_uz_size(cache); 34576d88d784SJeff Roberson uz_flags = cache_uz_flags(cache); 34586d88d784SJeff Roberson critical_exit(); 34596d88d784SJeff Roberson return (item_ctor(zone, uz_flags, size, udata, flags, item)); 34606d88d784SJeff Roberson } 34616d88d784SJeff Roberson 34626d88d784SJeff Roberson static __noinline void * 34636d88d784SJeff Roberson cache_alloc_retry(uma_zone_t zone, uma_cache_t cache, void *udata, int flags) 34646d88d784SJeff Roberson { 34656d88d784SJeff Roberson uma_cache_bucket_t bucket; 3466d4665eaaSJeff Roberson int domain; 3467d4665eaaSJeff Roberson 34686d88d784SJeff Roberson while (cache_alloc(zone, cache, udata, flags)) { 34696d88d784SJeff Roberson cache = &zone->uz_cpu[curcpu]; 34706d88d784SJeff Roberson bucket = &cache->uc_allocbucket; 34716d88d784SJeff Roberson if (__predict_false(bucket->ucb_cnt == 0)) 34726d88d784SJeff Roberson continue; 34736d88d784SJeff Roberson return (cache_alloc_item(zone, cache, bucket, udata, flags)); 34746d88d784SJeff Roberson } 34756d88d784SJeff Roberson critical_exit(); 34766d88d784SJeff Roberson 3477d4665eaaSJeff Roberson /* 3478d4665eaaSJeff Roberson * We can not get a bucket so try to return a single item. 3479d4665eaaSJeff Roberson */ 3480d4665eaaSJeff Roberson if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH) 3481d4665eaaSJeff Roberson domain = PCPU_GET(domain); 3482d4665eaaSJeff Roberson else 3483d4665eaaSJeff Roberson domain = UMA_ANYDOMAIN; 3484d4665eaaSJeff Roberson return (zone_alloc_item(zone, udata, domain, flags)); 3485d4665eaaSJeff Roberson } 3486d4665eaaSJeff Roberson 3487d4665eaaSJeff Roberson /* See uma.h */ 3488d4665eaaSJeff Roberson void * 3489d4665eaaSJeff Roberson uma_zalloc_smr(uma_zone_t zone, int flags) 3490d4665eaaSJeff Roberson { 3491d4665eaaSJeff Roberson uma_cache_bucket_t bucket; 3492d4665eaaSJeff Roberson uma_cache_t cache; 3493d4665eaaSJeff Roberson 3494d4665eaaSJeff Roberson #ifdef UMA_ZALLOC_DEBUG 34956d88d784SJeff Roberson void *item; 34966d88d784SJeff Roberson 3497d4665eaaSJeff Roberson KASSERT((zone->uz_flags & UMA_ZONE_SMR) != 0, 3498952c8964SMark Johnston ("uma_zalloc_arg: called with non-SMR zone.")); 3499d4665eaaSJeff Roberson if (uma_zalloc_debug(zone, &item, NULL, flags) == EJUSTRETURN) 3500d4665eaaSJeff Roberson return (item); 3501d4665eaaSJeff Roberson #endif 3502d4665eaaSJeff Roberson 3503d4665eaaSJeff Roberson critical_enter(); 3504d4665eaaSJeff Roberson cache = &zone->uz_cpu[curcpu]; 3505d4665eaaSJeff Roberson bucket = &cache->uc_allocbucket; 35066d88d784SJeff Roberson if (__predict_false(bucket->ucb_cnt == 0)) 35076d88d784SJeff Roberson return (cache_alloc_retry(zone, cache, NULL, flags)); 35086d88d784SJeff Roberson return (cache_alloc_item(zone, cache, bucket, NULL, flags)); 3509d4665eaaSJeff Roberson } 3510d4665eaaSJeff Roberson 35119c2cd7e5SJeff Roberson /* See uma.h */ 35128355f576SJeff Roberson void * 35132cc35ff9SJeff Roberson uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) 35148355f576SJeff Roberson { 3515376b1ba3SJeff Roberson uma_cache_bucket_t bucket; 3516ab3185d1SJeff Roberson uma_cache_t cache; 35178355f576SJeff Roberson 3518e866d8f0SMark Murray /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ 351919fa89e9SMark Murray random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA); 352010cb2424SMark Murray 35218355f576SJeff Roberson /* This is the fast path allocation */ 3522e63a1c2fSRyan Libby CTR3(KTR_UMA, "uma_zalloc_arg zone %s(%p) flags %d", zone->uz_name, 3523e63a1c2fSRyan Libby zone, flags); 3524a553d4b8SJeff Roberson 3525d4665eaaSJeff Roberson #ifdef UMA_ZALLOC_DEBUG 35266d88d784SJeff Roberson void *item; 35276d88d784SJeff Roberson 3528d4665eaaSJeff Roberson KASSERT((zone->uz_flags & UMA_ZONE_SMR) == 0, 3529952c8964SMark Johnston ("uma_zalloc_arg: called with SMR zone.")); 3530d4665eaaSJeff Roberson if (uma_zalloc_debug(zone, &item, udata, flags) == EJUSTRETURN) 35318d689e04SGleb Smirnoff return (item); 35328d689e04SGleb Smirnoff #endif 3533d4665eaaSJeff Roberson 35345d1ae027SRobert Watson /* 35355d1ae027SRobert Watson * If possible, allocate from the per-CPU cache. There are two 35365d1ae027SRobert Watson * requirements for safe access to the per-CPU cache: (1) the thread 35375d1ae027SRobert Watson * accessing the cache must not be preempted or yield during access, 35385d1ae027SRobert Watson * and (2) the thread must not migrate CPUs without switching which 35395d1ae027SRobert Watson * cache it accesses. We rely on a critical section to prevent 35405d1ae027SRobert Watson * preemption and migration. We release the critical section in 35415d1ae027SRobert Watson * order to acquire the zone mutex if we are unable to allocate from 35425d1ae027SRobert Watson * the current cache; when we re-acquire the critical section, we 35435d1ae027SRobert Watson * must detect and handle migration if it has occurred. 35445d1ae027SRobert Watson */ 35455d1ae027SRobert Watson critical_enter(); 3546cc7ce83aSJeff Roberson cache = &zone->uz_cpu[curcpu]; 3547376b1ba3SJeff Roberson bucket = &cache->uc_allocbucket; 35486d88d784SJeff Roberson if (__predict_false(bucket->ucb_cnt == 0)) 35496d88d784SJeff Roberson return (cache_alloc_retry(zone, cache, udata, flags)); 35506d88d784SJeff Roberson return (cache_alloc_item(zone, cache, bucket, udata, flags)); 3551fc03d22bSJeff Roberson } 3552fc03d22bSJeff Roberson 35538355f576SJeff Roberson /* 3554beb8beefSJeff Roberson * Replenish an alloc bucket and possibly restore an old one. Called in 3555beb8beefSJeff Roberson * a critical section. Returns in a critical section. 3556beb8beefSJeff Roberson * 35574bd61e19SJeff Roberson * A false return value indicates an allocation failure. 35584bd61e19SJeff Roberson * A true return value indicates success and the caller should retry. 3559beb8beefSJeff Roberson */ 3560beb8beefSJeff Roberson static __noinline bool 3561beb8beefSJeff Roberson cache_alloc(uma_zone_t zone, uma_cache_t cache, void *udata, int flags) 3562beb8beefSJeff Roberson { 3563beb8beefSJeff Roberson uma_bucket_t bucket; 35648c277118SMark Johnston int curdomain, domain; 3565c6fd3e23SJeff Roberson bool new; 3566beb8beefSJeff Roberson 3567beb8beefSJeff Roberson CRITICAL_ASSERT(curthread); 3568beb8beefSJeff Roberson 3569beb8beefSJeff Roberson /* 3570beb8beefSJeff Roberson * If we have run out of items in our alloc bucket see 3571beb8beefSJeff Roberson * if we can switch with the free bucket. 3572d4665eaaSJeff Roberson * 3573d4665eaaSJeff Roberson * SMR Zones can't re-use the free bucket until the sequence has 3574d4665eaaSJeff Roberson * expired. 35758355f576SJeff Roberson */ 3576c6fd3e23SJeff Roberson if ((cache_uz_flags(cache) & UMA_ZONE_SMR) == 0 && 3577d4665eaaSJeff Roberson cache->uc_freebucket.ucb_cnt != 0) { 3578d4665eaaSJeff Roberson cache_bucket_swap(&cache->uc_freebucket, 3579d4665eaaSJeff Roberson &cache->uc_allocbucket); 3580beb8beefSJeff Roberson return (true); 35818355f576SJeff Roberson } 3582fc03d22bSJeff Roberson 3583fc03d22bSJeff Roberson /* 3584fc03d22bSJeff Roberson * Discard any empty allocation bucket while we hold no locks. 3585fc03d22bSJeff Roberson */ 3586376b1ba3SJeff Roberson bucket = cache_bucket_unload_alloc(cache); 3587fc03d22bSJeff Roberson critical_exit(); 3588c6fd3e23SJeff Roberson 3589c6fd3e23SJeff Roberson if (bucket != NULL) { 3590c6fd3e23SJeff Roberson KASSERT(bucket->ub_cnt == 0, 3591c6fd3e23SJeff Roberson ("cache_alloc: Entered with non-empty alloc bucket.")); 35926fd34d6fSJeff Roberson bucket_free(zone, bucket, udata); 3593c6fd3e23SJeff Roberson } 3594fc03d22bSJeff Roberson 35955d1ae027SRobert Watson /* 35965d1ae027SRobert Watson * Attempt to retrieve the item from the per-CPU cache has failed, so 3597c6fd3e23SJeff Roberson * we must go back to the zone. This requires the zdom lock, so we 35985d1ae027SRobert Watson * must drop the critical section, then re-acquire it when we go back 35995d1ae027SRobert Watson * to the cache. Since the critical section is released, we may be 36005d1ae027SRobert Watson * preempted or migrate. As such, make sure not to maintain any 36015d1ae027SRobert Watson * thread-local state specific to the cache from prior to releasing 36025d1ae027SRobert Watson * the critical section. 36035d1ae027SRobert Watson */ 3604c1685086SJeff Roberson domain = PCPU_GET(domain); 36058c277118SMark Johnston if ((cache_uz_flags(cache) & UMA_ZONE_ROUNDROBIN) != 0 || 36068c277118SMark Johnston VM_DOMAIN_EMPTY(domain)) 3607c6fd3e23SJeff Roberson domain = zone_domain_highest(zone, domain); 3608c6fd3e23SJeff Roberson bucket = cache_fetch_bucket(zone, cache, domain); 3609af32cefdSMark Johnston if (bucket == NULL && zone->uz_bucket_size != 0 && !bucketdisable) { 3610beb8beefSJeff Roberson bucket = zone_alloc_bucket(zone, udata, domain, flags); 3611c6fd3e23SJeff Roberson new = true; 3612af32cefdSMark Johnston } else { 3613c6fd3e23SJeff Roberson new = false; 3614af32cefdSMark Johnston } 3615c6fd3e23SJeff Roberson 36161431a748SGleb Smirnoff CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p", 36171431a748SGleb Smirnoff zone->uz_name, zone, bucket); 36184bd61e19SJeff Roberson if (bucket == NULL) { 3619fc03d22bSJeff Roberson critical_enter(); 3620beb8beefSJeff Roberson return (false); 36214bd61e19SJeff Roberson } 36220f9b7bf3SMark Johnston 3623fc03d22bSJeff Roberson /* 3624fc03d22bSJeff Roberson * See if we lost the race or were migrated. Cache the 3625fc03d22bSJeff Roberson * initialized bucket to make this less likely or claim 3626fc03d22bSJeff Roberson * the memory directly. 3627fc03d22bSJeff Roberson */ 36284bd61e19SJeff Roberson critical_enter(); 3629cc7ce83aSJeff Roberson cache = &zone->uz_cpu[curcpu]; 3630376b1ba3SJeff Roberson if (cache->uc_allocbucket.ucb_bucket == NULL && 3631c6fd3e23SJeff Roberson ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) == 0 || 36328c277118SMark Johnston (curdomain = PCPU_GET(domain)) == domain || 36338c277118SMark Johnston VM_DOMAIN_EMPTY(curdomain))) { 3634c6fd3e23SJeff Roberson if (new) 3635c6fd3e23SJeff Roberson atomic_add_long(&ZDOM_GET(zone, domain)->uzd_imax, 3636c6fd3e23SJeff Roberson bucket->ub_cnt); 3637376b1ba3SJeff Roberson cache_bucket_load_alloc(cache, bucket); 3638beb8beefSJeff Roberson return (true); 3639c6fd3e23SJeff Roberson } 3640c6fd3e23SJeff Roberson 3641c6fd3e23SJeff Roberson /* 3642c6fd3e23SJeff Roberson * We lost the race, release this bucket and start over. 3643c6fd3e23SJeff Roberson */ 3644c6fd3e23SJeff Roberson critical_exit(); 3645c6fd3e23SJeff Roberson zone_put_bucket(zone, domain, bucket, udata, false); 3646c6fd3e23SJeff Roberson critical_enter(); 3647c6fd3e23SJeff Roberson 3648beb8beefSJeff Roberson return (true); 3649bbee39c6SJeff Roberson } 3650bbee39c6SJeff Roberson 3651ab3185d1SJeff Roberson void * 3652ab3185d1SJeff Roberson uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags) 3653bbee39c6SJeff Roberson { 365406d8bdcbSMark Johnston #ifdef NUMA 365506d8bdcbSMark Johnston uma_bucket_t bucket; 365606d8bdcbSMark Johnston uma_zone_domain_t zdom; 365706d8bdcbSMark Johnston void *item; 365806d8bdcbSMark Johnston #endif 3659ab3185d1SJeff Roberson 3660ab3185d1SJeff Roberson /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ 366119fa89e9SMark Murray random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA); 3662ab3185d1SJeff Roberson 3663ab3185d1SJeff Roberson /* This is the fast path allocation */ 3664e63a1c2fSRyan Libby CTR4(KTR_UMA, "uma_zalloc_domain zone %s(%p) domain %d flags %d", 3665e63a1c2fSRyan Libby zone->uz_name, zone, domain, flags); 3666ab3185d1SJeff Roberson 3667ab3185d1SJeff Roberson if (flags & M_WAITOK) { 3668ab3185d1SJeff Roberson WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 3669ab3185d1SJeff Roberson "uma_zalloc_domain: zone \"%s\"", zone->uz_name); 3670ab3185d1SJeff Roberson } 3671ab3185d1SJeff Roberson KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), 3672ab3185d1SJeff Roberson ("uma_zalloc_domain: called with spinlock or critical section held")); 367306d8bdcbSMark Johnston KASSERT((zone->uz_flags & UMA_ZONE_SMR) == 0, 367406d8bdcbSMark Johnston ("uma_zalloc_domain: called with SMR zone.")); 367506d8bdcbSMark Johnston #ifdef NUMA 367606d8bdcbSMark Johnston KASSERT((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0, 367706d8bdcbSMark Johnston ("uma_zalloc_domain: called with non-FIRSTTOUCH zone.")); 3678ab3185d1SJeff Roberson 367906d8bdcbSMark Johnston if (vm_ndomains == 1) 368006d8bdcbSMark Johnston return (uma_zalloc_arg(zone, udata, flags)); 368106d8bdcbSMark Johnston 368206d8bdcbSMark Johnston /* 368306d8bdcbSMark Johnston * Try to allocate from the bucket cache before falling back to the keg. 368406d8bdcbSMark Johnston * We could try harder and attempt to allocate from per-CPU caches or 368506d8bdcbSMark Johnston * the per-domain cross-domain buckets, but the complexity is probably 368606d8bdcbSMark Johnston * not worth it. It is more important that frees of previous 368706d8bdcbSMark Johnston * cross-domain allocations do not blow up the cache. 368806d8bdcbSMark Johnston */ 368906d8bdcbSMark Johnston zdom = zone_domain_lock(zone, domain); 369006d8bdcbSMark Johnston if ((bucket = zone_fetch_bucket(zone, zdom, false)) != NULL) { 369106d8bdcbSMark Johnston item = bucket->ub_bucket[bucket->ub_cnt - 1]; 369206d8bdcbSMark Johnston #ifdef INVARIANTS 369306d8bdcbSMark Johnston bucket->ub_bucket[bucket->ub_cnt - 1] = NULL; 369406d8bdcbSMark Johnston #endif 369506d8bdcbSMark Johnston bucket->ub_cnt--; 369606d8bdcbSMark Johnston zone_put_bucket(zone, domain, bucket, udata, true); 369706d8bdcbSMark Johnston item = item_ctor(zone, zone->uz_flags, zone->uz_size, udata, 369806d8bdcbSMark Johnston flags, item); 369906d8bdcbSMark Johnston if (item != NULL) { 370006d8bdcbSMark Johnston KASSERT(item_domain(item) == domain, 370106d8bdcbSMark Johnston ("%s: bucket cache item %p from wrong domain", 370206d8bdcbSMark Johnston __func__, item)); 370306d8bdcbSMark Johnston counter_u64_add(zone->uz_allocs, 1); 370406d8bdcbSMark Johnston } 370506d8bdcbSMark Johnston return (item); 370606d8bdcbSMark Johnston } 370706d8bdcbSMark Johnston ZDOM_UNLOCK(zdom); 3708ab3185d1SJeff Roberson return (zone_alloc_item(zone, udata, domain, flags)); 370906d8bdcbSMark Johnston #else 371006d8bdcbSMark Johnston return (uma_zalloc_arg(zone, udata, flags)); 371106d8bdcbSMark Johnston #endif 3712ab3185d1SJeff Roberson } 3713ab3185d1SJeff Roberson 3714ab3185d1SJeff Roberson /* 3715ab3185d1SJeff Roberson * Find a slab with some space. Prefer slabs that are partially used over those 3716ab3185d1SJeff Roberson * that are totally full. This helps to reduce fragmentation. 3717ab3185d1SJeff Roberson * 3718ab3185d1SJeff Roberson * If 'rr' is 1, search all domains starting from 'domain'. Otherwise check 3719ab3185d1SJeff Roberson * only 'domain'. 3720ab3185d1SJeff Roberson */ 3721ab3185d1SJeff Roberson static uma_slab_t 3722194a979eSMark Johnston keg_first_slab(uma_keg_t keg, int domain, bool rr) 3723ab3185d1SJeff Roberson { 3724ab3185d1SJeff Roberson uma_domain_t dom; 3725bbee39c6SJeff Roberson uma_slab_t slab; 3726ab3185d1SJeff Roberson int start; 3727ab3185d1SJeff Roberson 3728ab3185d1SJeff Roberson KASSERT(domain >= 0 && domain < vm_ndomains, 3729ab3185d1SJeff Roberson ("keg_first_slab: domain %d out of range", domain)); 37308b987a77SJeff Roberson KEG_LOCK_ASSERT(keg, domain); 3731ab3185d1SJeff Roberson 3732ab3185d1SJeff Roberson slab = NULL; 3733ab3185d1SJeff Roberson start = domain; 3734ab3185d1SJeff Roberson do { 3735ab3185d1SJeff Roberson dom = &keg->uk_domain[domain]; 37364ab3aee8SMark Johnston if ((slab = LIST_FIRST(&dom->ud_part_slab)) != NULL) 37374ab3aee8SMark Johnston return (slab); 37384ab3aee8SMark Johnston if ((slab = LIST_FIRST(&dom->ud_free_slab)) != NULL) { 3739ab3185d1SJeff Roberson LIST_REMOVE(slab, us_link); 37404ab3aee8SMark Johnston dom->ud_free_slabs--; 3741ab3185d1SJeff Roberson LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); 3742ab3185d1SJeff Roberson return (slab); 3743ab3185d1SJeff Roberson } 3744ab3185d1SJeff Roberson if (rr) 3745ab3185d1SJeff Roberson domain = (domain + 1) % vm_ndomains; 3746ab3185d1SJeff Roberson } while (domain != start); 3747ab3185d1SJeff Roberson 3748ab3185d1SJeff Roberson return (NULL); 3749ab3185d1SJeff Roberson } 3750ab3185d1SJeff Roberson 37518b987a77SJeff Roberson /* 37528b987a77SJeff Roberson * Fetch an existing slab from a free or partial list. Returns with the 37538b987a77SJeff Roberson * keg domain lock held if a slab was found or unlocked if not. 37548b987a77SJeff Roberson */ 3755ab3185d1SJeff Roberson static uma_slab_t 3756194a979eSMark Johnston keg_fetch_free_slab(uma_keg_t keg, int domain, bool rr, int flags) 3757ab3185d1SJeff Roberson { 37588b987a77SJeff Roberson uma_slab_t slab; 3759194a979eSMark Johnston uint32_t reserve; 3760099a0e58SBosko Milekic 37618b987a77SJeff Roberson /* HASH has a single free list. */ 376254c5ae80SRyan Libby if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0) 37638b987a77SJeff Roberson domain = 0; 3764194a979eSMark Johnston 37658b987a77SJeff Roberson KEG_LOCK(keg, domain); 3766194a979eSMark Johnston reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve; 37674ab3aee8SMark Johnston if (keg->uk_domain[domain].ud_free_items <= reserve || 37688b987a77SJeff Roberson (slab = keg_first_slab(keg, domain, rr)) == NULL) { 37698b987a77SJeff Roberson KEG_UNLOCK(keg, domain); 3770194a979eSMark Johnston return (NULL); 37718b987a77SJeff Roberson } 37728b987a77SJeff Roberson return (slab); 3773194a979eSMark Johnston } 3774194a979eSMark Johnston 3775194a979eSMark Johnston static uma_slab_t 3776194a979eSMark Johnston keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, const int flags) 3777194a979eSMark Johnston { 3778194a979eSMark Johnston struct vm_domainset_iter di; 3779194a979eSMark Johnston uma_slab_t slab; 3780194a979eSMark Johnston int aflags, domain; 3781194a979eSMark Johnston bool rr; 3782194a979eSMark Johnston 3783194a979eSMark Johnston restart: 3784bbee39c6SJeff Roberson /* 3785194a979eSMark Johnston * Use the keg's policy if upper layers haven't already specified a 3786194a979eSMark Johnston * domain (as happens with first-touch zones). 3787194a979eSMark Johnston * 3788194a979eSMark Johnston * To avoid races we run the iterator with the keg lock held, but that 3789194a979eSMark Johnston * means that we cannot allow the vm_domainset layer to sleep. Thus, 3790194a979eSMark Johnston * clear M_WAITOK and handle low memory conditions locally. 3791bbee39c6SJeff Roberson */ 3792ab3185d1SJeff Roberson rr = rdomain == UMA_ANYDOMAIN; 3793ab3185d1SJeff Roberson if (rr) { 3794194a979eSMark Johnston aflags = (flags & ~M_WAITOK) | M_NOWAIT; 3795194a979eSMark Johnston vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, 3796194a979eSMark Johnston &aflags); 3797194a979eSMark Johnston } else { 3798194a979eSMark Johnston aflags = flags; 3799194a979eSMark Johnston domain = rdomain; 3800194a979eSMark Johnston } 3801ab3185d1SJeff Roberson 3802194a979eSMark Johnston for (;;) { 3803194a979eSMark Johnston slab = keg_fetch_free_slab(keg, domain, rr, flags); 3804584061b4SJeff Roberson if (slab != NULL) 3805bbee39c6SJeff Roberson return (slab); 3806bbee39c6SJeff Roberson 3807bbee39c6SJeff Roberson /* 3808bbee39c6SJeff Roberson * M_NOVM means don't ask at all! 3809bbee39c6SJeff Roberson */ 3810bbee39c6SJeff Roberson if (flags & M_NOVM) 3811bbee39c6SJeff Roberson break; 3812bbee39c6SJeff Roberson 381386220393SMark Johnston slab = keg_alloc_slab(keg, zone, domain, flags, aflags); 38148b987a77SJeff Roberson if (slab != NULL) 3815bbee39c6SJeff Roberson return (slab); 38163639ac42SJeff Roberson if (!rr && (flags & M_WAITOK) == 0) 38173639ac42SJeff Roberson break; 3818194a979eSMark Johnston if (rr && vm_domainset_iter_policy(&di, &domain) != 0) { 3819194a979eSMark Johnston if ((flags & M_WAITOK) != 0) { 382089d2fb14SKonstantin Belousov vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0); 3821194a979eSMark Johnston goto restart; 382230c5525bSAndrew Gallatin } 3823194a979eSMark Johnston break; 3824194a979eSMark Johnston } 3825ab3185d1SJeff Roberson } 3826ab3185d1SJeff Roberson 3827bbee39c6SJeff Roberson /* 3828bbee39c6SJeff Roberson * We might not have been able to get a slab but another cpu 3829bbee39c6SJeff Roberson * could have while we were unlocked. Check again before we 3830bbee39c6SJeff Roberson * fail. 3831bbee39c6SJeff Roberson */ 38328b987a77SJeff Roberson if ((slab = keg_fetch_free_slab(keg, domain, rr, flags)) != NULL) 3833bbee39c6SJeff Roberson return (slab); 38348b987a77SJeff Roberson 3835ab3185d1SJeff Roberson return (NULL); 3836ab3185d1SJeff Roberson } 3837bbee39c6SJeff Roberson 3838d56368d7SBosko Milekic static void * 38390095a784SJeff Roberson slab_alloc_item(uma_keg_t keg, uma_slab_t slab) 3840bbee39c6SJeff Roberson { 3841ab3185d1SJeff Roberson uma_domain_t dom; 3842bbee39c6SJeff Roberson void *item; 38439b8db4d0SRyan Libby int freei; 3844bbee39c6SJeff Roberson 38458b987a77SJeff Roberson KEG_LOCK_ASSERT(keg, slab->us_domain); 3846099a0e58SBosko Milekic 38478b987a77SJeff Roberson dom = &keg->uk_domain[slab->us_domain]; 38489b78b1f4SJeff Roberson freei = BIT_FFS(keg->uk_ipers, &slab->us_free) - 1; 38499b78b1f4SJeff Roberson BIT_CLR(keg->uk_ipers, freei, &slab->us_free); 38501e0701e1SJeff Roberson item = slab_item(slab, keg, freei); 3851bbee39c6SJeff Roberson slab->us_freecount--; 38524ab3aee8SMark Johnston dom->ud_free_items--; 3853ef72505eSJeff Roberson 38544ab3aee8SMark Johnston /* 38554ab3aee8SMark Johnston * Move this slab to the full list. It must be on the partial list, so 38564ab3aee8SMark Johnston * we do not need to update the free slab count. In particular, 38574ab3aee8SMark Johnston * keg_fetch_slab() always returns slabs on the partial list. 38584ab3aee8SMark Johnston */ 3859bbee39c6SJeff Roberson if (slab->us_freecount == 0) { 3860bbee39c6SJeff Roberson LIST_REMOVE(slab, us_link); 3861ab3185d1SJeff Roberson LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link); 3862bbee39c6SJeff Roberson } 3863bbee39c6SJeff Roberson 3864bbee39c6SJeff Roberson return (item); 3865bbee39c6SJeff Roberson } 3866bbee39c6SJeff Roberson 3867bbee39c6SJeff Roberson static int 3868b75c4efcSAndrew Turner zone_import(void *arg, void **bucket, int max, int domain, int flags) 38690095a784SJeff Roberson { 38708b987a77SJeff Roberson uma_domain_t dom; 3871b75c4efcSAndrew Turner uma_zone_t zone; 38720095a784SJeff Roberson uma_slab_t slab; 38730095a784SJeff Roberson uma_keg_t keg; 3874a03af342SSean Bruno #ifdef NUMA 3875ab3185d1SJeff Roberson int stripe; 3876a03af342SSean Bruno #endif 38770095a784SJeff Roberson int i; 38780095a784SJeff Roberson 3879b75c4efcSAndrew Turner zone = arg; 38800095a784SJeff Roberson slab = NULL; 3881584061b4SJeff Roberson keg = zone->uz_keg; 3882af526374SJeff Roberson /* Try to keep the buckets totally full */ 38830095a784SJeff Roberson for (i = 0; i < max; ) { 3884584061b4SJeff Roberson if ((slab = keg_fetch_slab(keg, zone, domain, flags)) == NULL) 38850095a784SJeff Roberson break; 3886a03af342SSean Bruno #ifdef NUMA 3887ab3185d1SJeff Roberson stripe = howmany(max, vm_ndomains); 3888a03af342SSean Bruno #endif 38898b987a77SJeff Roberson dom = &keg->uk_domain[slab->us_domain]; 38901b2dcc8cSMark Johnston do { 38910095a784SJeff Roberson bucket[i++] = slab_alloc_item(keg, slab); 38921b2dcc8cSMark Johnston if (dom->ud_free_items <= keg->uk_reserve) { 38931b2dcc8cSMark Johnston /* 38941b2dcc8cSMark Johnston * Avoid depleting the reserve after a 38951b2dcc8cSMark Johnston * successful item allocation, even if 38961b2dcc8cSMark Johnston * M_USE_RESERVE is specified. 38971b2dcc8cSMark Johnston */ 38981b2dcc8cSMark Johnston KEG_UNLOCK(keg, slab->us_domain); 38991b2dcc8cSMark Johnston goto out; 39001b2dcc8cSMark Johnston } 3901b6715dabSJeff Roberson #ifdef NUMA 3902ab3185d1SJeff Roberson /* 3903ab3185d1SJeff Roberson * If the zone is striped we pick a new slab for every 3904ab3185d1SJeff Roberson * N allocations. Eliminating this conditional will 3905ab3185d1SJeff Roberson * instead pick a new domain for each bucket rather 3906ab3185d1SJeff Roberson * than stripe within each bucket. The current option 3907ab3185d1SJeff Roberson * produces more fragmentation and requires more cpu 3908ab3185d1SJeff Roberson * time but yields better distribution. 3909ab3185d1SJeff Roberson */ 3910dfe13344SJeff Roberson if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0 && 3911ab3185d1SJeff Roberson vm_ndomains > 1 && --stripe == 0) 3912ab3185d1SJeff Roberson break; 3913ab3185d1SJeff Roberson #endif 39141b2dcc8cSMark Johnston } while (slab->us_freecount != 0 && i < max); 39158b987a77SJeff Roberson KEG_UNLOCK(keg, slab->us_domain); 39161b2dcc8cSMark Johnston 3917ab3185d1SJeff Roberson /* Don't block if we allocated any successfully. */ 39180095a784SJeff Roberson flags &= ~M_WAITOK; 39190095a784SJeff Roberson flags |= M_NOWAIT; 39200095a784SJeff Roberson } 39211b2dcc8cSMark Johnston out: 39220095a784SJeff Roberson return i; 39230095a784SJeff Roberson } 39240095a784SJeff Roberson 39254bd61e19SJeff Roberson static int 39264bd61e19SJeff Roberson zone_alloc_limit_hard(uma_zone_t zone, int count, int flags) 39274bd61e19SJeff Roberson { 39284bd61e19SJeff Roberson uint64_t old, new, total, max; 39294bd61e19SJeff Roberson 39304bd61e19SJeff Roberson /* 39314bd61e19SJeff Roberson * The hard case. We're going to sleep because there were existing 39324bd61e19SJeff Roberson * sleepers or because we ran out of items. This routine enforces 39334bd61e19SJeff Roberson * fairness by keeping fifo order. 39344bd61e19SJeff Roberson * 39354bd61e19SJeff Roberson * First release our ill gotten gains and make some noise. 39364bd61e19SJeff Roberson */ 39374bd61e19SJeff Roberson for (;;) { 39384bd61e19SJeff Roberson zone_free_limit(zone, count); 39394bd61e19SJeff Roberson zone_log_warning(zone); 39404bd61e19SJeff Roberson zone_maxaction(zone); 39414bd61e19SJeff Roberson if (flags & M_NOWAIT) 39424bd61e19SJeff Roberson return (0); 39434bd61e19SJeff Roberson 39444bd61e19SJeff Roberson /* 39454bd61e19SJeff Roberson * We need to allocate an item or set ourself as a sleeper 39464bd61e19SJeff Roberson * while the sleepq lock is held to avoid wakeup races. This 39474bd61e19SJeff Roberson * is essentially a home rolled semaphore. 39484bd61e19SJeff Roberson */ 39494bd61e19SJeff Roberson sleepq_lock(&zone->uz_max_items); 39504bd61e19SJeff Roberson old = zone->uz_items; 39514bd61e19SJeff Roberson do { 39524bd61e19SJeff Roberson MPASS(UZ_ITEMS_SLEEPERS(old) < UZ_ITEMS_SLEEPERS_MAX); 39534bd61e19SJeff Roberson /* Cache the max since we will evaluate twice. */ 39544bd61e19SJeff Roberson max = zone->uz_max_items; 39554bd61e19SJeff Roberson if (UZ_ITEMS_SLEEPERS(old) != 0 || 39564bd61e19SJeff Roberson UZ_ITEMS_COUNT(old) >= max) 39574bd61e19SJeff Roberson new = old + UZ_ITEMS_SLEEPER; 39584bd61e19SJeff Roberson else 39594bd61e19SJeff Roberson new = old + MIN(count, max - old); 39604bd61e19SJeff Roberson } while (atomic_fcmpset_64(&zone->uz_items, &old, new) == 0); 39614bd61e19SJeff Roberson 39624bd61e19SJeff Roberson /* We may have successfully allocated under the sleepq lock. */ 39634bd61e19SJeff Roberson if (UZ_ITEMS_SLEEPERS(new) == 0) { 39644bd61e19SJeff Roberson sleepq_release(&zone->uz_max_items); 39654bd61e19SJeff Roberson return (new - old); 39664bd61e19SJeff Roberson } 39674bd61e19SJeff Roberson 39684bd61e19SJeff Roberson /* 39694bd61e19SJeff Roberson * This is in a different cacheline from uz_items so that we 39704bd61e19SJeff Roberson * don't constantly invalidate the fastpath cacheline when we 39714bd61e19SJeff Roberson * adjust item counts. This could be limited to toggling on 39724bd61e19SJeff Roberson * transitions. 39734bd61e19SJeff Roberson */ 39744bd61e19SJeff Roberson atomic_add_32(&zone->uz_sleepers, 1); 39754bd61e19SJeff Roberson atomic_add_64(&zone->uz_sleeps, 1); 39764bd61e19SJeff Roberson 39774bd61e19SJeff Roberson /* 39784bd61e19SJeff Roberson * We have added ourselves as a sleeper. The sleepq lock 39794bd61e19SJeff Roberson * protects us from wakeup races. Sleep now and then retry. 39804bd61e19SJeff Roberson */ 39814bd61e19SJeff Roberson sleepq_add(&zone->uz_max_items, NULL, "zonelimit", 0, 0); 39824bd61e19SJeff Roberson sleepq_wait(&zone->uz_max_items, PVM); 39834bd61e19SJeff Roberson 39844bd61e19SJeff Roberson /* 39854bd61e19SJeff Roberson * After wakeup, remove ourselves as a sleeper and try 39864bd61e19SJeff Roberson * again. We no longer have the sleepq lock for protection. 39874bd61e19SJeff Roberson * 39884bd61e19SJeff Roberson * Subract ourselves as a sleeper while attempting to add 39894bd61e19SJeff Roberson * our count. 39904bd61e19SJeff Roberson */ 39914bd61e19SJeff Roberson atomic_subtract_32(&zone->uz_sleepers, 1); 39924bd61e19SJeff Roberson old = atomic_fetchadd_64(&zone->uz_items, 39934bd61e19SJeff Roberson -(UZ_ITEMS_SLEEPER - count)); 39944bd61e19SJeff Roberson /* We're no longer a sleeper. */ 39954bd61e19SJeff Roberson old -= UZ_ITEMS_SLEEPER; 39964bd61e19SJeff Roberson 39974bd61e19SJeff Roberson /* 39984bd61e19SJeff Roberson * If we're still at the limit, restart. Notably do not 39994bd61e19SJeff Roberson * block on other sleepers. Cache the max value to protect 40004bd61e19SJeff Roberson * against changes via sysctl. 40014bd61e19SJeff Roberson */ 40024bd61e19SJeff Roberson total = UZ_ITEMS_COUNT(old); 40034bd61e19SJeff Roberson max = zone->uz_max_items; 40044bd61e19SJeff Roberson if (total >= max) 40054bd61e19SJeff Roberson continue; 40064bd61e19SJeff Roberson /* Truncate if necessary, otherwise wake other sleepers. */ 40074bd61e19SJeff Roberson if (total + count > max) { 40084bd61e19SJeff Roberson zone_free_limit(zone, total + count - max); 40094bd61e19SJeff Roberson count = max - total; 40104bd61e19SJeff Roberson } else if (total + count < max && UZ_ITEMS_SLEEPERS(old) != 0) 40114bd61e19SJeff Roberson wakeup_one(&zone->uz_max_items); 40124bd61e19SJeff Roberson 40134bd61e19SJeff Roberson return (count); 40144bd61e19SJeff Roberson } 40154bd61e19SJeff Roberson } 40164bd61e19SJeff Roberson 40174bd61e19SJeff Roberson /* 40184bd61e19SJeff Roberson * Allocate 'count' items from our max_items limit. Returns the number 40194bd61e19SJeff Roberson * available. If M_NOWAIT is not specified it will sleep until at least 40204bd61e19SJeff Roberson * one item can be allocated. 40214bd61e19SJeff Roberson */ 40224bd61e19SJeff Roberson static int 40234bd61e19SJeff Roberson zone_alloc_limit(uma_zone_t zone, int count, int flags) 40244bd61e19SJeff Roberson { 40254bd61e19SJeff Roberson uint64_t old; 40264bd61e19SJeff Roberson uint64_t max; 40274bd61e19SJeff Roberson 40284bd61e19SJeff Roberson max = zone->uz_max_items; 40294bd61e19SJeff Roberson MPASS(max > 0); 40304bd61e19SJeff Roberson 40314bd61e19SJeff Roberson /* 40324bd61e19SJeff Roberson * We expect normal allocations to succeed with a simple 40334bd61e19SJeff Roberson * fetchadd. 40344bd61e19SJeff Roberson */ 40354bd61e19SJeff Roberson old = atomic_fetchadd_64(&zone->uz_items, count); 40364bd61e19SJeff Roberson if (__predict_true(old + count <= max)) 40374bd61e19SJeff Roberson return (count); 40384bd61e19SJeff Roberson 40394bd61e19SJeff Roberson /* 40404bd61e19SJeff Roberson * If we had some items and no sleepers just return the 40414bd61e19SJeff Roberson * truncated value. We have to release the excess space 40424bd61e19SJeff Roberson * though because that may wake sleepers who weren't woken 40434bd61e19SJeff Roberson * because we were temporarily over the limit. 40444bd61e19SJeff Roberson */ 40454bd61e19SJeff Roberson if (old < max) { 40464bd61e19SJeff Roberson zone_free_limit(zone, (old + count) - max); 40474bd61e19SJeff Roberson return (max - old); 40484bd61e19SJeff Roberson } 40494bd61e19SJeff Roberson return (zone_alloc_limit_hard(zone, count, flags)); 40504bd61e19SJeff Roberson } 40514bd61e19SJeff Roberson 40524bd61e19SJeff Roberson /* 40534bd61e19SJeff Roberson * Free a number of items back to the limit. 40544bd61e19SJeff Roberson */ 40554bd61e19SJeff Roberson static void 40564bd61e19SJeff Roberson zone_free_limit(uma_zone_t zone, int count) 40574bd61e19SJeff Roberson { 40584bd61e19SJeff Roberson uint64_t old; 40594bd61e19SJeff Roberson 40604bd61e19SJeff Roberson MPASS(count > 0); 40614bd61e19SJeff Roberson 40624bd61e19SJeff Roberson /* 40634bd61e19SJeff Roberson * In the common case we either have no sleepers or 40644bd61e19SJeff Roberson * are still over the limit and can just return. 40654bd61e19SJeff Roberson */ 40664bd61e19SJeff Roberson old = atomic_fetchadd_64(&zone->uz_items, -count); 40674bd61e19SJeff Roberson if (__predict_true(UZ_ITEMS_SLEEPERS(old) == 0 || 40684bd61e19SJeff Roberson UZ_ITEMS_COUNT(old) - count >= zone->uz_max_items)) 40694bd61e19SJeff Roberson return; 40704bd61e19SJeff Roberson 40714bd61e19SJeff Roberson /* 40724bd61e19SJeff Roberson * Moderate the rate of wakeups. Sleepers will continue 40734bd61e19SJeff Roberson * to generate wakeups if necessary. 40744bd61e19SJeff Roberson */ 40754bd61e19SJeff Roberson wakeup_one(&zone->uz_max_items); 40764bd61e19SJeff Roberson } 40774bd61e19SJeff Roberson 4078fc03d22bSJeff Roberson static uma_bucket_t 4079beb8beefSJeff Roberson zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags) 4080bbee39c6SJeff Roberson { 4081bbee39c6SJeff Roberson uma_bucket_t bucket; 408209c8cb71SMark Johnston int error, maxbucket, cnt; 4083bbee39c6SJeff Roberson 4084e63a1c2fSRyan Libby CTR3(KTR_UMA, "zone_alloc_bucket zone %s(%p) domain %d", zone->uz_name, 4085e63a1c2fSRyan Libby zone, domain); 408630c5525bSAndrew Gallatin 4087c1685086SJeff Roberson /* Avoid allocs targeting empty domains. */ 4088c1685086SJeff Roberson if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain)) 4089c1685086SJeff Roberson domain = UMA_ANYDOMAIN; 40908c277118SMark Johnston else if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0) 4091c6fd3e23SJeff Roberson domain = UMA_ANYDOMAIN; 4092c1685086SJeff Roberson 40934bd61e19SJeff Roberson if (zone->uz_max_items > 0) 40944bd61e19SJeff Roberson maxbucket = zone_alloc_limit(zone, zone->uz_bucket_size, 40954bd61e19SJeff Roberson M_NOWAIT); 40964bd61e19SJeff Roberson else 409720a4e154SJeff Roberson maxbucket = zone->uz_bucket_size; 40984bd61e19SJeff Roberson if (maxbucket == 0) 40994bd61e19SJeff Roberson return (false); 4100beb8beefSJeff Roberson 41016fd34d6fSJeff Roberson /* Don't wait for buckets, preserve caller's NOVM setting. */ 41026fd34d6fSJeff Roberson bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM)); 4103beb8beefSJeff Roberson if (bucket == NULL) { 4104beb8beefSJeff Roberson cnt = 0; 4105beb8beefSJeff Roberson goto out; 4106beb8beefSJeff Roberson } 41070095a784SJeff Roberson 41080095a784SJeff Roberson bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket, 4109beb8beefSJeff Roberson MIN(maxbucket, bucket->ub_entries), domain, flags); 41100095a784SJeff Roberson 41110095a784SJeff Roberson /* 41120095a784SJeff Roberson * Initialize the memory if necessary. 41130095a784SJeff Roberson */ 41140095a784SJeff Roberson if (bucket->ub_cnt != 0 && zone->uz_init != NULL) { 4115099a0e58SBosko Milekic int i; 4116bbee39c6SJeff Roberson 411709c8cb71SMark Johnston for (i = 0; i < bucket->ub_cnt; i++) { 411809c8cb71SMark Johnston kasan_mark_item_valid(zone, bucket->ub_bucket[i]); 411909c8cb71SMark Johnston error = zone->uz_init(bucket->ub_bucket[i], 412009c8cb71SMark Johnston zone->uz_size, flags); 412109c8cb71SMark Johnston kasan_mark_item_invalid(zone, bucket->ub_bucket[i]); 412209c8cb71SMark Johnston if (error != 0) 4123b23f72e9SBrian Feldman break; 412409c8cb71SMark Johnston } 412509c8cb71SMark Johnston 4126b23f72e9SBrian Feldman /* 4127b23f72e9SBrian Feldman * If we couldn't initialize the whole bucket, put the 4128b23f72e9SBrian Feldman * rest back onto the freelist. 4129b23f72e9SBrian Feldman */ 4130b23f72e9SBrian Feldman if (i != bucket->ub_cnt) { 4131af526374SJeff Roberson zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i], 41320095a784SJeff Roberson bucket->ub_cnt - i); 4133a5a262c6SBosko Milekic #ifdef INVARIANTS 41340095a784SJeff Roberson bzero(&bucket->ub_bucket[i], 41350095a784SJeff Roberson sizeof(void *) * (bucket->ub_cnt - i)); 4136a5a262c6SBosko Milekic #endif 4137b23f72e9SBrian Feldman bucket->ub_cnt = i; 4138b23f72e9SBrian Feldman } 4139099a0e58SBosko Milekic } 4140099a0e58SBosko Milekic 4141beb8beefSJeff Roberson cnt = bucket->ub_cnt; 4142f7104ccdSAlexander Motin if (bucket->ub_cnt == 0) { 41436fd34d6fSJeff Roberson bucket_free(zone, bucket, udata); 41442efcc8cbSGleb Smirnoff counter_u64_add(zone->uz_fails, 1); 4145beb8beefSJeff Roberson bucket = NULL; 4146beb8beefSJeff Roberson } 4147beb8beefSJeff Roberson out: 41484bd61e19SJeff Roberson if (zone->uz_max_items > 0 && cnt < maxbucket) 41494bd61e19SJeff Roberson zone_free_limit(zone, maxbucket - cnt); 4150fc03d22bSJeff Roberson 4151fc03d22bSJeff Roberson return (bucket); 4152fc03d22bSJeff Roberson } 4153fc03d22bSJeff Roberson 41548355f576SJeff Roberson /* 41550095a784SJeff Roberson * Allocates a single item from a zone. 41568355f576SJeff Roberson * 41578355f576SJeff Roberson * Arguments 41588355f576SJeff Roberson * zone The zone to alloc for. 41598355f576SJeff Roberson * udata The data to be passed to the constructor. 4160ab3185d1SJeff Roberson * domain The domain to allocate from or UMA_ANYDOMAIN. 4161a163d034SWarner Losh * flags M_WAITOK, M_NOWAIT, M_ZERO. 41628355f576SJeff Roberson * 41638355f576SJeff Roberson * Returns 41648355f576SJeff Roberson * NULL if there is no memory and M_NOWAIT is set 4165bbee39c6SJeff Roberson * An item if successful 41668355f576SJeff Roberson */ 41678355f576SJeff Roberson 41688355f576SJeff Roberson static void * 4169ab3185d1SJeff Roberson zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags) 41708355f576SJeff Roberson { 41718355f576SJeff Roberson void *item; 41728355f576SJeff Roberson 4173791dda87SAndrew Gallatin if (zone->uz_max_items > 0 && zone_alloc_limit(zone, 1, flags) == 0) { 4174791dda87SAndrew Gallatin counter_u64_add(zone->uz_fails, 1); 4175bb15d1c7SGleb Smirnoff return (NULL); 4176791dda87SAndrew Gallatin } 41778355f576SJeff Roberson 4178c1685086SJeff Roberson /* Avoid allocs targeting empty domains. */ 4179c1685086SJeff Roberson if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain)) 418030c5525bSAndrew Gallatin domain = UMA_ANYDOMAIN; 4181c1685086SJeff Roberson 4182ab3185d1SJeff Roberson if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1) 4183beb8beefSJeff Roberson goto fail_cnt; 41848355f576SJeff Roberson 4185099a0e58SBosko Milekic /* 4186099a0e58SBosko Milekic * We have to call both the zone's init (not the keg's init) 4187099a0e58SBosko Milekic * and the zone's ctor. This is because the item is going from 4188099a0e58SBosko Milekic * a keg slab directly to the user, and the user is expecting it 4189099a0e58SBosko Milekic * to be both zone-init'd as well as zone-ctor'd. 4190099a0e58SBosko Milekic */ 4191b23f72e9SBrian Feldman if (zone->uz_init != NULL) { 419209c8cb71SMark Johnston int error; 419309c8cb71SMark Johnston 419409c8cb71SMark Johnston kasan_mark_item_valid(zone, item); 419509c8cb71SMark Johnston error = zone->uz_init(item, zone->uz_size, flags); 419609c8cb71SMark Johnston kasan_mark_item_invalid(zone, item); 419709c8cb71SMark Johnston if (error != 0) { 4198bb15d1c7SGleb Smirnoff zone_free_item(zone, item, udata, SKIP_FINI | SKIP_CNT); 4199beb8beefSJeff Roberson goto fail_cnt; 4200beb8beefSJeff Roberson } 4201beb8beefSJeff Roberson } 4202d4665eaaSJeff Roberson item = item_ctor(zone, zone->uz_flags, zone->uz_size, udata, flags, 4203d4665eaaSJeff Roberson item); 4204beb8beefSJeff Roberson if (item == NULL) 42050095a784SJeff Roberson goto fail; 42068355f576SJeff Roberson 42072efcc8cbSGleb Smirnoff counter_u64_add(zone->uz_allocs, 1); 42081431a748SGleb Smirnoff CTR3(KTR_UMA, "zone_alloc_item item %p from %s(%p)", item, 42091431a748SGleb Smirnoff zone->uz_name, zone); 42101431a748SGleb Smirnoff 42118355f576SJeff Roberson return (item); 42120095a784SJeff Roberson 4213beb8beefSJeff Roberson fail_cnt: 4214beb8beefSJeff Roberson counter_u64_add(zone->uz_fails, 1); 42150095a784SJeff Roberson fail: 42164bd61e19SJeff Roberson if (zone->uz_max_items > 0) 42174bd61e19SJeff Roberson zone_free_limit(zone, 1); 42181431a748SGleb Smirnoff CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)", 42191431a748SGleb Smirnoff zone->uz_name, zone); 42204bd61e19SJeff Roberson 42210095a784SJeff Roberson return (NULL); 42228355f576SJeff Roberson } 42238355f576SJeff Roberson 42248355f576SJeff Roberson /* See uma.h */ 42258355f576SJeff Roberson void 4226d4665eaaSJeff Roberson uma_zfree_smr(uma_zone_t zone, void *item) 4227d4665eaaSJeff Roberson { 4228d4665eaaSJeff Roberson uma_cache_t cache; 4229d4665eaaSJeff Roberson uma_cache_bucket_t bucket; 4230c6fd3e23SJeff Roberson int itemdomain, uz_flags; 4231d4665eaaSJeff Roberson 4232d4665eaaSJeff Roberson #ifdef UMA_ZALLOC_DEBUG 4233d4665eaaSJeff Roberson KASSERT((zone->uz_flags & UMA_ZONE_SMR) != 0, 4234952c8964SMark Johnston ("uma_zfree_smr: called with non-SMR zone.")); 4235d4665eaaSJeff Roberson KASSERT(item != NULL, ("uma_zfree_smr: Called with NULL pointer.")); 4236c6fd3e23SJeff Roberson SMR_ASSERT_NOT_ENTERED(zone->uz_smr); 4237d4665eaaSJeff Roberson if (uma_zfree_debug(zone, item, NULL) == EJUSTRETURN) 4238d4665eaaSJeff Roberson return; 4239d4665eaaSJeff Roberson #endif 4240d4665eaaSJeff Roberson cache = &zone->uz_cpu[curcpu]; 4241d4665eaaSJeff Roberson uz_flags = cache_uz_flags(cache); 4242c6fd3e23SJeff Roberson itemdomain = 0; 4243d4665eaaSJeff Roberson #ifdef NUMA 4244d4665eaaSJeff Roberson if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) 424581302f1dSMark Johnston itemdomain = item_domain(item); 4246d4665eaaSJeff Roberson #endif 4247d4665eaaSJeff Roberson critical_enter(); 4248d4665eaaSJeff Roberson do { 4249d4665eaaSJeff Roberson cache = &zone->uz_cpu[curcpu]; 4250d4665eaaSJeff Roberson /* SMR Zones must free to the free bucket. */ 4251d4665eaaSJeff Roberson bucket = &cache->uc_freebucket; 4252d4665eaaSJeff Roberson #ifdef NUMA 4253d4665eaaSJeff Roberson if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 && 4254c6fd3e23SJeff Roberson PCPU_GET(domain) != itemdomain) { 4255d4665eaaSJeff Roberson bucket = &cache->uc_crossbucket; 4256d4665eaaSJeff Roberson } 4257d4665eaaSJeff Roberson #endif 4258d4665eaaSJeff Roberson if (__predict_true(bucket->ucb_cnt < bucket->ucb_entries)) { 4259d4665eaaSJeff Roberson cache_bucket_push(cache, bucket, item); 4260d4665eaaSJeff Roberson critical_exit(); 4261d4665eaaSJeff Roberson return; 4262d4665eaaSJeff Roberson } 4263d4665eaaSJeff Roberson } while (cache_free(zone, cache, NULL, item, itemdomain)); 4264d4665eaaSJeff Roberson critical_exit(); 4265d4665eaaSJeff Roberson 4266d4665eaaSJeff Roberson /* 4267d4665eaaSJeff Roberson * If nothing else caught this, we'll just do an internal free. 4268d4665eaaSJeff Roberson */ 4269d4665eaaSJeff Roberson zone_free_item(zone, item, NULL, SKIP_NONE); 4270d4665eaaSJeff Roberson } 4271d4665eaaSJeff Roberson 4272d4665eaaSJeff Roberson /* See uma.h */ 4273d4665eaaSJeff Roberson void 42748355f576SJeff Roberson uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 42758355f576SJeff Roberson { 42768355f576SJeff Roberson uma_cache_t cache; 4277376b1ba3SJeff Roberson uma_cache_bucket_t bucket; 4278c6fd3e23SJeff Roberson int itemdomain, uz_flags; 42798355f576SJeff Roberson 4280e866d8f0SMark Murray /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ 428119fa89e9SMark Murray random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA); 428210cb2424SMark Murray 4283e63a1c2fSRyan Libby CTR2(KTR_UMA, "uma_zfree_arg zone %s(%p)", zone->uz_name, zone); 42843659f747SRobert Watson 4285d4665eaaSJeff Roberson #ifdef UMA_ZALLOC_DEBUG 4286d4665eaaSJeff Roberson KASSERT((zone->uz_flags & UMA_ZONE_SMR) == 0, 4287952c8964SMark Johnston ("uma_zfree_arg: called with SMR zone.")); 4288d4665eaaSJeff Roberson if (uma_zfree_debug(zone, item, udata) == EJUSTRETURN) 4289d4665eaaSJeff Roberson return; 4290d4665eaaSJeff Roberson #endif 429120ed0cb0SMatthew D Fleming /* uma_zfree(..., NULL) does nothing, to match free(9). */ 429220ed0cb0SMatthew D Fleming if (item == NULL) 429320ed0cb0SMatthew D Fleming return; 4294cc7ce83aSJeff Roberson 4295cc7ce83aSJeff Roberson /* 4296cc7ce83aSJeff Roberson * We are accessing the per-cpu cache without a critical section to 4297cc7ce83aSJeff Roberson * fetch size and flags. This is acceptable, if we are preempted we 4298cc7ce83aSJeff Roberson * will simply read another cpu's line. 4299cc7ce83aSJeff Roberson */ 4300cc7ce83aSJeff Roberson cache = &zone->uz_cpu[curcpu]; 4301cc7ce83aSJeff Roberson uz_flags = cache_uz_flags(cache); 4302d4665eaaSJeff Roberson if (UMA_ALWAYS_CTORDTOR || 4303d4665eaaSJeff Roberson __predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0)) 4304cc7ce83aSJeff Roberson item_dtor(zone, item, cache_uz_size(cache), udata, SKIP_NONE); 4305ef72505eSJeff Roberson 4306af7f9b97SJeff Roberson /* 4307af7f9b97SJeff Roberson * The race here is acceptable. If we miss it we'll just have to wait 4308af7f9b97SJeff Roberson * a little longer for the limits to be reset. 4309af7f9b97SJeff Roberson */ 4310cc7ce83aSJeff Roberson if (__predict_false(uz_flags & UMA_ZFLAG_LIMIT)) { 43118a6776caSMark Johnston if (atomic_load_32(&zone->uz_sleepers) > 0) 4312fc03d22bSJeff Roberson goto zfree_item; 4313cc7ce83aSJeff Roberson } 4314af7f9b97SJeff Roberson 43155d1ae027SRobert Watson /* 43165d1ae027SRobert Watson * If possible, free to the per-CPU cache. There are two 43175d1ae027SRobert Watson * requirements for safe access to the per-CPU cache: (1) the thread 43185d1ae027SRobert Watson * accessing the cache must not be preempted or yield during access, 43195d1ae027SRobert Watson * and (2) the thread must not migrate CPUs without switching which 43205d1ae027SRobert Watson * cache it accesses. We rely on a critical section to prevent 43215d1ae027SRobert Watson * preemption and migration. We release the critical section in 43225d1ae027SRobert Watson * order to acquire the zone mutex if we are unable to free to the 43235d1ae027SRobert Watson * current cache; when we re-acquire the critical section, we must 43245d1ae027SRobert Watson * detect and handle migration if it has occurred. 43255d1ae027SRobert Watson */ 4326c6fd3e23SJeff Roberson itemdomain = 0; 4327dfe13344SJeff Roberson #ifdef NUMA 4328dfe13344SJeff Roberson if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) 432981302f1dSMark Johnston itemdomain = item_domain(item); 4330dfe13344SJeff Roberson #endif 43315d1ae027SRobert Watson critical_enter(); 43320a81b439SJeff Roberson do { 4333cc7ce83aSJeff Roberson cache = &zone->uz_cpu[curcpu]; 4334a553d4b8SJeff Roberson /* 4335dfe13344SJeff Roberson * Try to free into the allocbucket first to give LIFO 4336dfe13344SJeff Roberson * ordering for cache-hot datastructures. Spill over 4337dfe13344SJeff Roberson * into the freebucket if necessary. Alloc will swap 4338dfe13344SJeff Roberson * them if one runs dry. 4339a553d4b8SJeff Roberson */ 4340dfe13344SJeff Roberson bucket = &cache->uc_allocbucket; 4341d4665eaaSJeff Roberson #ifdef NUMA 4342d4665eaaSJeff Roberson if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 && 4343c6fd3e23SJeff Roberson PCPU_GET(domain) != itemdomain) { 4344d4665eaaSJeff Roberson bucket = &cache->uc_crossbucket; 4345d4665eaaSJeff Roberson } else 4346d4665eaaSJeff Roberson #endif 4347fe835cbfSJeff Roberson if (bucket->ucb_cnt == bucket->ucb_entries && 4348fe835cbfSJeff Roberson cache->uc_freebucket.ucb_cnt < 4349fe835cbfSJeff Roberson cache->uc_freebucket.ucb_entries) 4350fe835cbfSJeff Roberson cache_bucket_swap(&cache->uc_freebucket, 4351fe835cbfSJeff Roberson &cache->uc_allocbucket); 4352376b1ba3SJeff Roberson if (__predict_true(bucket->ucb_cnt < bucket->ucb_entries)) { 4353376b1ba3SJeff Roberson cache_bucket_push(cache, bucket, item); 43545d1ae027SRobert Watson critical_exit(); 43558355f576SJeff Roberson return; 4356fc03d22bSJeff Roberson } 43570a81b439SJeff Roberson } while (cache_free(zone, cache, udata, item, itemdomain)); 43580a81b439SJeff Roberson critical_exit(); 4359fc03d22bSJeff Roberson 43608355f576SJeff Roberson /* 43610a81b439SJeff Roberson * If nothing else caught this, we'll just do an internal free. 43628355f576SJeff Roberson */ 43630a81b439SJeff Roberson zfree_item: 43640a81b439SJeff Roberson zone_free_item(zone, item, udata, SKIP_DTOR); 43650a81b439SJeff Roberson } 4366fc03d22bSJeff Roberson 4367dfe13344SJeff Roberson #ifdef NUMA 436891d947bfSJeff Roberson /* 436991d947bfSJeff Roberson * sort crossdomain free buckets to domain correct buckets and cache 437091d947bfSJeff Roberson * them. 437191d947bfSJeff Roberson */ 437291d947bfSJeff Roberson static void 437391d947bfSJeff Roberson zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, void *udata) 437491d947bfSJeff Roberson { 4375991f23efSMark Johnston struct uma_bucketlist emptybuckets, fullbuckets; 437691d947bfSJeff Roberson uma_zone_domain_t zdom; 437791d947bfSJeff Roberson uma_bucket_t b; 4378543117beSJeff Roberson smr_seq_t seq; 437991d947bfSJeff Roberson void *item; 438091d947bfSJeff Roberson int domain; 438191d947bfSJeff Roberson 438291d947bfSJeff Roberson CTR3(KTR_UMA, 438391d947bfSJeff Roberson "uma_zfree: zone %s(%p) draining cross bucket %p", 438491d947bfSJeff Roberson zone->uz_name, zone, bucket); 438591d947bfSJeff Roberson 4386543117beSJeff Roberson /* 4387543117beSJeff Roberson * It is possible for buckets to arrive here out of order so we fetch 4388543117beSJeff Roberson * the current smr seq rather than accepting the bucket's. 4389543117beSJeff Roberson */ 4390543117beSJeff Roberson seq = SMR_SEQ_INVALID; 4391543117beSJeff Roberson if ((zone->uz_flags & UMA_ZONE_SMR) != 0) 4392226dd6dbSJeff Roberson seq = smr_advance(zone->uz_smr); 4393226dd6dbSJeff Roberson 4394226dd6dbSJeff Roberson /* 4395226dd6dbSJeff Roberson * To avoid having ndomain * ndomain buckets for sorting we have a 4396226dd6dbSJeff Roberson * lock on the current crossfree bucket. A full matrix with 4397226dd6dbSJeff Roberson * per-domain locking could be used if necessary. 4398226dd6dbSJeff Roberson */ 4399991f23efSMark Johnston STAILQ_INIT(&emptybuckets); 4400226dd6dbSJeff Roberson STAILQ_INIT(&fullbuckets); 4401226dd6dbSJeff Roberson ZONE_CROSS_LOCK(zone); 4402991f23efSMark Johnston for (; bucket->ub_cnt > 0; bucket->ub_cnt--) { 440391d947bfSJeff Roberson item = bucket->ub_bucket[bucket->ub_cnt - 1]; 440481302f1dSMark Johnston domain = item_domain(item); 4405c6fd3e23SJeff Roberson zdom = ZDOM_GET(zone, domain); 440691d947bfSJeff Roberson if (zdom->uzd_cross == NULL) { 4407991f23efSMark Johnston if ((b = STAILQ_FIRST(&emptybuckets)) != NULL) { 4408991f23efSMark Johnston STAILQ_REMOVE_HEAD(&emptybuckets, ub_link); 4409991f23efSMark Johnston zdom->uzd_cross = b; 4410991f23efSMark Johnston } else { 4411991f23efSMark Johnston /* 4412991f23efSMark Johnston * Avoid allocating a bucket with the cross lock 4413991f23efSMark Johnston * held, since allocation can trigger a 4414991f23efSMark Johnston * cross-domain free and bucket zones may 4415991f23efSMark Johnston * allocate from each other. 4416991f23efSMark Johnston */ 4417991f23efSMark Johnston ZONE_CROSS_UNLOCK(zone); 4418991f23efSMark Johnston b = bucket_alloc(zone, udata, M_NOWAIT); 4419991f23efSMark Johnston if (b == NULL) 4420991f23efSMark Johnston goto out; 4421991f23efSMark Johnston ZONE_CROSS_LOCK(zone); 4422991f23efSMark Johnston if (zdom->uzd_cross != NULL) { 4423991f23efSMark Johnston STAILQ_INSERT_HEAD(&emptybuckets, b, 4424991f23efSMark Johnston ub_link); 4425991f23efSMark Johnston } else { 4426991f23efSMark Johnston zdom->uzd_cross = b; 4427991f23efSMark Johnston } 4428991f23efSMark Johnston } 442991d947bfSJeff Roberson } 4430543117beSJeff Roberson b = zdom->uzd_cross; 4431543117beSJeff Roberson b->ub_bucket[b->ub_cnt++] = item; 4432543117beSJeff Roberson b->ub_seq = seq; 4433543117beSJeff Roberson if (b->ub_cnt == b->ub_entries) { 4434543117beSJeff Roberson STAILQ_INSERT_HEAD(&fullbuckets, b, ub_link); 4435991f23efSMark Johnston if ((b = STAILQ_FIRST(&emptybuckets)) != NULL) 4436991f23efSMark Johnston STAILQ_REMOVE_HEAD(&emptybuckets, ub_link); 4437991f23efSMark Johnston zdom->uzd_cross = b; 443891d947bfSJeff Roberson } 443991d947bfSJeff Roberson } 444091d947bfSJeff Roberson ZONE_CROSS_UNLOCK(zone); 4441991f23efSMark Johnston out: 4442c6fd3e23SJeff Roberson if (bucket->ub_cnt == 0) 4443d4665eaaSJeff Roberson bucket->ub_seq = SMR_SEQ_INVALID; 444491d947bfSJeff Roberson bucket_free(zone, bucket, udata); 4445c6fd3e23SJeff Roberson 4446991f23efSMark Johnston while ((b = STAILQ_FIRST(&emptybuckets)) != NULL) { 4447991f23efSMark Johnston STAILQ_REMOVE_HEAD(&emptybuckets, ub_link); 4448991f23efSMark Johnston bucket_free(zone, b, udata); 4449991f23efSMark Johnston } 4450c6fd3e23SJeff Roberson while ((b = STAILQ_FIRST(&fullbuckets)) != NULL) { 4451c6fd3e23SJeff Roberson STAILQ_REMOVE_HEAD(&fullbuckets, ub_link); 445281302f1dSMark Johnston domain = item_domain(b->ub_bucket[0]); 4453c6fd3e23SJeff Roberson zone_put_bucket(zone, domain, b, udata, true); 4454c6fd3e23SJeff Roberson } 445591d947bfSJeff Roberson } 445691d947bfSJeff Roberson #endif 445791d947bfSJeff Roberson 44580a81b439SJeff Roberson static void 44590a81b439SJeff Roberson zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata, 4460c6fd3e23SJeff Roberson int itemdomain, bool ws) 44610a81b439SJeff Roberson { 44620a81b439SJeff Roberson 4463dfe13344SJeff Roberson #ifdef NUMA 44640a81b439SJeff Roberson /* 44650a81b439SJeff Roberson * Buckets coming from the wrong domain will be entirely for the 44660a81b439SJeff Roberson * only other domain on two domain systems. In this case we can 44670a81b439SJeff Roberson * simply cache them. Otherwise we need to sort them back to 446891d947bfSJeff Roberson * correct domains. 44690a81b439SJeff Roberson */ 4470c6fd3e23SJeff Roberson if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 && 4471c6fd3e23SJeff Roberson vm_ndomains > 2 && PCPU_GET(domain) != itemdomain) { 447291d947bfSJeff Roberson zone_free_cross(zone, bucket, udata); 44730a81b439SJeff Roberson return; 44740a81b439SJeff Roberson } 44750a81b439SJeff Roberson #endif 447691d947bfSJeff Roberson 44770a81b439SJeff Roberson /* 44780a81b439SJeff Roberson * Attempt to save the bucket in the zone's domain bucket cache. 44790a81b439SJeff Roberson */ 44800a81b439SJeff Roberson CTR3(KTR_UMA, 44810a81b439SJeff Roberson "uma_zfree: zone %s(%p) putting bucket %p on free list", 44820a81b439SJeff Roberson zone->uz_name, zone, bucket); 44830a81b439SJeff Roberson /* ub_cnt is pointing to the last free item */ 4484c6fd3e23SJeff Roberson if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0) 4485c6fd3e23SJeff Roberson itemdomain = zone_domain_lowest(zone, itemdomain); 4486c6fd3e23SJeff Roberson zone_put_bucket(zone, itemdomain, bucket, udata, ws); 44878355f576SJeff Roberson } 4488fc03d22bSJeff Roberson 44894d104ba0SAlexander Motin /* 44900a81b439SJeff Roberson * Populate a free or cross bucket for the current cpu cache. Free any 44910a81b439SJeff Roberson * existing full bucket either to the zone cache or back to the slab layer. 44920a81b439SJeff Roberson * 44930a81b439SJeff Roberson * Enters and returns in a critical section. false return indicates that 44940a81b439SJeff Roberson * we can not satisfy this free in the cache layer. true indicates that 44950a81b439SJeff Roberson * the caller should retry. 44964d104ba0SAlexander Motin */ 44970a81b439SJeff Roberson static __noinline bool 44980a81b439SJeff Roberson cache_free(uma_zone_t zone, uma_cache_t cache, void *udata, void *item, 44990a81b439SJeff Roberson int itemdomain) 45000a81b439SJeff Roberson { 4501dfe13344SJeff Roberson uma_cache_bucket_t cbucket; 4502d4665eaaSJeff Roberson uma_bucket_t newbucket, bucket; 45030a81b439SJeff Roberson 45040a81b439SJeff Roberson CRITICAL_ASSERT(curthread); 45050a81b439SJeff Roberson 4506d4665eaaSJeff Roberson if (zone->uz_bucket_size == 0) 45070a81b439SJeff Roberson return false; 45080a81b439SJeff Roberson 4509cc7ce83aSJeff Roberson cache = &zone->uz_cpu[curcpu]; 4510d4665eaaSJeff Roberson newbucket = NULL; 45110a81b439SJeff Roberson 45120a81b439SJeff Roberson /* 4513dfe13344SJeff Roberson * FIRSTTOUCH domains need to free to the correct zdom. When 4514dfe13344SJeff Roberson * enabled this is the zdom of the item. The bucket is the 4515dfe13344SJeff Roberson * cross bucket if the current domain and itemdomain do not match. 45160a81b439SJeff Roberson */ 4517dfe13344SJeff Roberson cbucket = &cache->uc_freebucket; 4518dfe13344SJeff Roberson #ifdef NUMA 4519c6fd3e23SJeff Roberson if ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) != 0) { 4520c6fd3e23SJeff Roberson if (PCPU_GET(domain) != itemdomain) { 4521dfe13344SJeff Roberson cbucket = &cache->uc_crossbucket; 4522dfe13344SJeff Roberson if (cbucket->ucb_cnt != 0) 4523c6fd3e23SJeff Roberson counter_u64_add(zone->uz_xdomain, 4524dfe13344SJeff Roberson cbucket->ucb_cnt); 4525dfe13344SJeff Roberson } 4526c6fd3e23SJeff Roberson } 45270a81b439SJeff Roberson #endif 4528dfe13344SJeff Roberson bucket = cache_bucket_unload(cbucket); 4529c6fd3e23SJeff Roberson KASSERT(bucket == NULL || bucket->ub_cnt == bucket->ub_entries, 4530c6fd3e23SJeff Roberson ("cache_free: Entered with non-full free bucket.")); 45310a81b439SJeff Roberson 45320a81b439SJeff Roberson /* We are no longer associated with this CPU. */ 45330a81b439SJeff Roberson critical_exit(); 45340a81b439SJeff Roberson 4535d4665eaaSJeff Roberson /* 4536d4665eaaSJeff Roberson * Don't let SMR zones operate without a free bucket. Force 4537d4665eaaSJeff Roberson * a synchronize and re-use this one. We will only degrade 4538d4665eaaSJeff Roberson * to a synchronize every bucket_size items rather than every 4539d4665eaaSJeff Roberson * item if we fail to allocate a bucket. 4540d4665eaaSJeff Roberson */ 4541d4665eaaSJeff Roberson if ((zone->uz_flags & UMA_ZONE_SMR) != 0) { 4542d4665eaaSJeff Roberson if (bucket != NULL) 4543d4665eaaSJeff Roberson bucket->ub_seq = smr_advance(zone->uz_smr); 4544d4665eaaSJeff Roberson newbucket = bucket_alloc(zone, udata, M_NOWAIT); 4545d4665eaaSJeff Roberson if (newbucket == NULL && bucket != NULL) { 4546d4665eaaSJeff Roberson bucket_drain(zone, bucket); 4547d4665eaaSJeff Roberson newbucket = bucket; 4548d4665eaaSJeff Roberson bucket = NULL; 4549d4665eaaSJeff Roberson } 4550d4665eaaSJeff Roberson } else if (!bucketdisable) 4551d4665eaaSJeff Roberson newbucket = bucket_alloc(zone, udata, M_NOWAIT); 4552d4665eaaSJeff Roberson 45530a81b439SJeff Roberson if (bucket != NULL) 4554c6fd3e23SJeff Roberson zone_free_bucket(zone, bucket, udata, itemdomain, true); 4555a553d4b8SJeff Roberson 4556fc03d22bSJeff Roberson critical_enter(); 4557d4665eaaSJeff Roberson if ((bucket = newbucket) == NULL) 45580a81b439SJeff Roberson return (false); 4559cc7ce83aSJeff Roberson cache = &zone->uz_cpu[curcpu]; 4560dfe13344SJeff Roberson #ifdef NUMA 4561fc03d22bSJeff Roberson /* 45620a81b439SJeff Roberson * Check to see if we should be populating the cross bucket. If it 45630a81b439SJeff Roberson * is already populated we will fall through and attempt to populate 45640a81b439SJeff Roberson * the free bucket. 4565fc03d22bSJeff Roberson */ 4566c6fd3e23SJeff Roberson if ((cache_uz_flags(cache) & UMA_ZONE_FIRSTTOUCH) != 0) { 4567c6fd3e23SJeff Roberson if (PCPU_GET(domain) != itemdomain && 4568376b1ba3SJeff Roberson cache->uc_crossbucket.ucb_bucket == NULL) { 4569376b1ba3SJeff Roberson cache_bucket_load_cross(cache, bucket); 45700a81b439SJeff Roberson return (true); 45710a81b439SJeff Roberson } 45720a81b439SJeff Roberson } 45730a81b439SJeff Roberson #endif 45740a81b439SJeff Roberson /* 45750a81b439SJeff Roberson * We may have lost the race to fill the bucket or switched CPUs. 45760a81b439SJeff Roberson */ 4577376b1ba3SJeff Roberson if (cache->uc_freebucket.ucb_bucket != NULL) { 4578fc03d22bSJeff Roberson critical_exit(); 45796fd34d6fSJeff Roberson bucket_free(zone, bucket, udata); 45800a81b439SJeff Roberson critical_enter(); 45810a81b439SJeff Roberson } else 4582376b1ba3SJeff Roberson cache_bucket_load_free(cache, bucket); 45838355f576SJeff Roberson 45840a81b439SJeff Roberson return (true); 45858355f576SJeff Roberson } 45868355f576SJeff Roberson 45878355f576SJeff Roberson static void 4588bb15d1c7SGleb Smirnoff slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item) 45898355f576SJeff Roberson { 4590bb15d1c7SGleb Smirnoff uma_keg_t keg; 4591ab3185d1SJeff Roberson uma_domain_t dom; 45929b8db4d0SRyan Libby int freei; 4593099a0e58SBosko Milekic 4594bb15d1c7SGleb Smirnoff keg = zone->uz_keg; 45958b987a77SJeff Roberson KEG_LOCK_ASSERT(keg, slab->us_domain); 4596ab3185d1SJeff Roberson 45978355f576SJeff Roberson /* Do we need to remove from any lists? */ 45988b987a77SJeff Roberson dom = &keg->uk_domain[slab->us_domain]; 4599099a0e58SBosko Milekic if (slab->us_freecount + 1 == keg->uk_ipers) { 46008355f576SJeff Roberson LIST_REMOVE(slab, us_link); 4601ab3185d1SJeff Roberson LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link); 46024ab3aee8SMark Johnston dom->ud_free_slabs++; 46038355f576SJeff Roberson } else if (slab->us_freecount == 0) { 46048355f576SJeff Roberson LIST_REMOVE(slab, us_link); 4605ab3185d1SJeff Roberson LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); 46068355f576SJeff Roberson } 46078355f576SJeff Roberson 4608ef72505eSJeff Roberson /* Slab management. */ 46091e0701e1SJeff Roberson freei = slab_item_index(slab, keg, item); 46109b78b1f4SJeff Roberson BIT_SET(keg->uk_ipers, freei, &slab->us_free); 46118355f576SJeff Roberson slab->us_freecount++; 46128355f576SJeff Roberson 4613ef72505eSJeff Roberson /* Keg statistics. */ 46144ab3aee8SMark Johnston dom->ud_free_items++; 46150095a784SJeff Roberson } 46160095a784SJeff Roberson 46170095a784SJeff Roberson static void 4618b75c4efcSAndrew Turner zone_release(void *arg, void **bucket, int cnt) 46190095a784SJeff Roberson { 46208b987a77SJeff Roberson struct mtx *lock; 4621b75c4efcSAndrew Turner uma_zone_t zone; 46220095a784SJeff Roberson uma_slab_t slab; 46230095a784SJeff Roberson uma_keg_t keg; 46240095a784SJeff Roberson uint8_t *mem; 46258b987a77SJeff Roberson void *item; 46260095a784SJeff Roberson int i; 46278355f576SJeff Roberson 4628b75c4efcSAndrew Turner zone = arg; 4629bb15d1c7SGleb Smirnoff keg = zone->uz_keg; 46308b987a77SJeff Roberson lock = NULL; 463154c5ae80SRyan Libby if (__predict_false((zone->uz_flags & UMA_ZFLAG_HASH) != 0)) 46328b987a77SJeff Roberson lock = KEG_LOCK(keg, 0); 46330095a784SJeff Roberson for (i = 0; i < cnt; i++) { 46340095a784SJeff Roberson item = bucket[i]; 463554c5ae80SRyan Libby if (__predict_true((zone->uz_flags & UMA_ZFLAG_VTOSLAB) != 0)) { 46360095a784SJeff Roberson slab = vtoslab((vm_offset_t)item); 46378b987a77SJeff Roberson } else { 46388b987a77SJeff Roberson mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK)); 463954c5ae80SRyan Libby if ((zone->uz_flags & UMA_ZFLAG_HASH) != 0) 46408b987a77SJeff Roberson slab = hash_sfind(&keg->uk_hash, mem); 46418b987a77SJeff Roberson else 46428b987a77SJeff Roberson slab = (uma_slab_t)(mem + keg->uk_pgoff); 46438b987a77SJeff Roberson } 46448b987a77SJeff Roberson if (lock != KEG_LOCKPTR(keg, slab->us_domain)) { 46458b987a77SJeff Roberson if (lock != NULL) 46468b987a77SJeff Roberson mtx_unlock(lock); 46478b987a77SJeff Roberson lock = KEG_LOCK(keg, slab->us_domain); 46488b987a77SJeff Roberson } 4649bb15d1c7SGleb Smirnoff slab_free_item(zone, slab, item); 46500095a784SJeff Roberson } 46518b987a77SJeff Roberson if (lock != NULL) 46528b987a77SJeff Roberson mtx_unlock(lock); 46538355f576SJeff Roberson } 46548355f576SJeff Roberson 46550095a784SJeff Roberson /* 46560095a784SJeff Roberson * Frees a single item to any zone. 46570095a784SJeff Roberson * 46580095a784SJeff Roberson * Arguments: 46590095a784SJeff Roberson * zone The zone to free to 46600095a784SJeff Roberson * item The item we're freeing 46610095a784SJeff Roberson * udata User supplied data for the dtor 46620095a784SJeff Roberson * skip Skip dtors and finis 46630095a784SJeff Roberson */ 46646d88d784SJeff Roberson static __noinline void 46650095a784SJeff Roberson zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip) 46660095a784SJeff Roberson { 4667c5deaf04SGleb Smirnoff 4668d4665eaaSJeff Roberson /* 4669d4665eaaSJeff Roberson * If a free is sent directly to an SMR zone we have to 4670d4665eaaSJeff Roberson * synchronize immediately because the item can instantly 4671d4665eaaSJeff Roberson * be reallocated. This should only happen in degenerate 4672d4665eaaSJeff Roberson * cases when no memory is available for per-cpu caches. 4673d4665eaaSJeff Roberson */ 4674d4665eaaSJeff Roberson if ((zone->uz_flags & UMA_ZONE_SMR) != 0 && skip == SKIP_NONE) 4675d4665eaaSJeff Roberson smr_synchronize(zone->uz_smr); 4676d4665eaaSJeff Roberson 4677cc7ce83aSJeff Roberson item_dtor(zone, item, zone->uz_size, udata, skip); 46780095a784SJeff Roberson 467909c8cb71SMark Johnston if (skip < SKIP_FINI && zone->uz_fini) { 468009c8cb71SMark Johnston kasan_mark_item_valid(zone, item); 46810095a784SJeff Roberson zone->uz_fini(item, zone->uz_size); 468209c8cb71SMark Johnston kasan_mark_item_invalid(zone, item); 468309c8cb71SMark Johnston } 46840095a784SJeff Roberson 46850095a784SJeff Roberson zone->uz_release(zone->uz_arg, &item, 1); 4686bb15d1c7SGleb Smirnoff 4687bb15d1c7SGleb Smirnoff if (skip & SKIP_CNT) 4688bb15d1c7SGleb Smirnoff return; 4689bb15d1c7SGleb Smirnoff 46902efcc8cbSGleb Smirnoff counter_u64_add(zone->uz_frees, 1); 46912efcc8cbSGleb Smirnoff 46924bd61e19SJeff Roberson if (zone->uz_max_items > 0) 46934bd61e19SJeff Roberson zone_free_limit(zone, 1); 4694bb45b411SGleb Smirnoff } 46950095a784SJeff Roberson 46968355f576SJeff Roberson /* See uma.h */ 46971c6cae97SLawrence Stewart int 4698736ee590SJeff Roberson uma_zone_set_max(uma_zone_t zone, int nitems) 4699736ee590SJeff Roberson { 4700e574d407SMark Johnston 4701e574d407SMark Johnston /* 4702e574d407SMark Johnston * If the limit is small, we may need to constrain the maximum per-CPU 4703e574d407SMark Johnston * cache size, or disable caching entirely. 4704e574d407SMark Johnston */ 4705e574d407SMark Johnston uma_zone_set_maxcache(zone, nitems); 4706bb15d1c7SGleb Smirnoff 47074bd61e19SJeff Roberson /* 47084bd61e19SJeff Roberson * XXX This can misbehave if the zone has any allocations with 47094bd61e19SJeff Roberson * no limit and a limit is imposed. There is currently no 47104bd61e19SJeff Roberson * way to clear a limit. 47114bd61e19SJeff Roberson */ 4712bb15d1c7SGleb Smirnoff ZONE_LOCK(zone); 4713bb15d1c7SGleb Smirnoff zone->uz_max_items = nitems; 4714cc7ce83aSJeff Roberson zone->uz_flags |= UMA_ZFLAG_LIMIT; 4715cc7ce83aSJeff Roberson zone_update_caches(zone); 47164bd61e19SJeff Roberson /* We may need to wake waiters. */ 47174bd61e19SJeff Roberson wakeup(&zone->uz_max_items); 4718bb15d1c7SGleb Smirnoff ZONE_UNLOCK(zone); 4719bb15d1c7SGleb Smirnoff 4720bb15d1c7SGleb Smirnoff return (nitems); 4721bb15d1c7SGleb Smirnoff } 4722bb15d1c7SGleb Smirnoff 4723bb15d1c7SGleb Smirnoff /* See uma.h */ 4724003cf08bSMark Johnston void 4725bb15d1c7SGleb Smirnoff uma_zone_set_maxcache(uma_zone_t zone, int nitems) 4726bb15d1c7SGleb Smirnoff { 4727e574d407SMark Johnston int bpcpu, bpdom, bsize, nb; 4728bb15d1c7SGleb Smirnoff 4729bb15d1c7SGleb Smirnoff ZONE_LOCK(zone); 4730e574d407SMark Johnston 4731e574d407SMark Johnston /* 4732e574d407SMark Johnston * Compute a lower bound on the number of items that may be cached in 4733e574d407SMark Johnston * the zone. Each CPU gets at least two buckets, and for cross-domain 4734e574d407SMark Johnston * frees we use an additional bucket per CPU and per domain. Select the 4735e574d407SMark Johnston * largest bucket size that does not exceed half of the requested limit, 4736e574d407SMark Johnston * with the left over space given to the full bucket cache. 4737e574d407SMark Johnston */ 4738e574d407SMark Johnston bpdom = 0; 4739003cf08bSMark Johnston bpcpu = 2; 4740e574d407SMark Johnston #ifdef NUMA 4741e574d407SMark Johnston if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 && vm_ndomains > 1) { 4742003cf08bSMark Johnston bpcpu++; 4743e574d407SMark Johnston bpdom++; 4744003cf08bSMark Johnston } 4745e574d407SMark Johnston #endif 4746e574d407SMark Johnston nb = bpcpu * mp_ncpus + bpdom * vm_ndomains; 4747e574d407SMark Johnston bsize = nitems / nb / 2; 4748e574d407SMark Johnston if (bsize > BUCKET_MAX) 4749e574d407SMark Johnston bsize = BUCKET_MAX; 4750e574d407SMark Johnston else if (bsize == 0 && nitems / nb > 0) 4751e574d407SMark Johnston bsize = 1; 4752e574d407SMark Johnston zone->uz_bucket_size_max = zone->uz_bucket_size = bsize; 475320a4e154SJeff Roberson if (zone->uz_bucket_size_min > zone->uz_bucket_size_max) 475420a4e154SJeff Roberson zone->uz_bucket_size_min = zone->uz_bucket_size_max; 4755e574d407SMark Johnston zone->uz_bucket_max = nitems - nb * bsize; 4756bb15d1c7SGleb Smirnoff ZONE_UNLOCK(zone); 4757736ee590SJeff Roberson } 4758736ee590SJeff Roberson 4759736ee590SJeff Roberson /* See uma.h */ 4760e49471b0SAndre Oppermann int 4761e49471b0SAndre Oppermann uma_zone_get_max(uma_zone_t zone) 4762e49471b0SAndre Oppermann { 4763e49471b0SAndre Oppermann int nitems; 4764e49471b0SAndre Oppermann 4765727c6918SJeff Roberson nitems = atomic_load_64(&zone->uz_max_items); 4766e49471b0SAndre Oppermann 4767e49471b0SAndre Oppermann return (nitems); 4768e49471b0SAndre Oppermann } 4769e49471b0SAndre Oppermann 4770e49471b0SAndre Oppermann /* See uma.h */ 47712f891cd5SPawel Jakub Dawidek void 47722f891cd5SPawel Jakub Dawidek uma_zone_set_warning(uma_zone_t zone, const char *warning) 47732f891cd5SPawel Jakub Dawidek { 47742f891cd5SPawel Jakub Dawidek 4775727c6918SJeff Roberson ZONE_ASSERT_COLD(zone); 47762f891cd5SPawel Jakub Dawidek zone->uz_warning = warning; 47772f891cd5SPawel Jakub Dawidek } 47782f891cd5SPawel Jakub Dawidek 47792f891cd5SPawel Jakub Dawidek /* See uma.h */ 478054503a13SJonathan T. Looney void 478154503a13SJonathan T. Looney uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction) 478254503a13SJonathan T. Looney { 478354503a13SJonathan T. Looney 4784727c6918SJeff Roberson ZONE_ASSERT_COLD(zone); 4785e60b2fcbSGleb Smirnoff TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone); 478654503a13SJonathan T. Looney } 478754503a13SJonathan T. Looney 478854503a13SJonathan T. Looney /* See uma.h */ 4789c4ae7908SLawrence Stewart int 4790c4ae7908SLawrence Stewart uma_zone_get_cur(uma_zone_t zone) 4791c4ae7908SLawrence Stewart { 4792c4ae7908SLawrence Stewart int64_t nitems; 4793c4ae7908SLawrence Stewart u_int i; 4794c4ae7908SLawrence Stewart 4795bfb6b7a1SJeff Roberson nitems = 0; 4796bfb6b7a1SJeff Roberson if (zone->uz_allocs != EARLY_COUNTER && zone->uz_frees != EARLY_COUNTER) 47972efcc8cbSGleb Smirnoff nitems = counter_u64_fetch(zone->uz_allocs) - 47982efcc8cbSGleb Smirnoff counter_u64_fetch(zone->uz_frees); 4799727c6918SJeff Roberson CPU_FOREACH(i) 4800727c6918SJeff Roberson nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs) - 4801727c6918SJeff Roberson atomic_load_64(&zone->uz_cpu[i].uc_frees); 4802c4ae7908SLawrence Stewart 4803c4ae7908SLawrence Stewart return (nitems < 0 ? 0 : nitems); 4804c4ae7908SLawrence Stewart } 4805c4ae7908SLawrence Stewart 480620a4e154SJeff Roberson static uint64_t 480720a4e154SJeff Roberson uma_zone_get_allocs(uma_zone_t zone) 480820a4e154SJeff Roberson { 480920a4e154SJeff Roberson uint64_t nitems; 481020a4e154SJeff Roberson u_int i; 481120a4e154SJeff Roberson 4812bfb6b7a1SJeff Roberson nitems = 0; 4813bfb6b7a1SJeff Roberson if (zone->uz_allocs != EARLY_COUNTER) 481420a4e154SJeff Roberson nitems = counter_u64_fetch(zone->uz_allocs); 4815727c6918SJeff Roberson CPU_FOREACH(i) 4816727c6918SJeff Roberson nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs); 481720a4e154SJeff Roberson 481820a4e154SJeff Roberson return (nitems); 481920a4e154SJeff Roberson } 482020a4e154SJeff Roberson 482120a4e154SJeff Roberson static uint64_t 482220a4e154SJeff Roberson uma_zone_get_frees(uma_zone_t zone) 482320a4e154SJeff Roberson { 482420a4e154SJeff Roberson uint64_t nitems; 482520a4e154SJeff Roberson u_int i; 482620a4e154SJeff Roberson 4827bfb6b7a1SJeff Roberson nitems = 0; 4828bfb6b7a1SJeff Roberson if (zone->uz_frees != EARLY_COUNTER) 482920a4e154SJeff Roberson nitems = counter_u64_fetch(zone->uz_frees); 4830727c6918SJeff Roberson CPU_FOREACH(i) 4831727c6918SJeff Roberson nitems += atomic_load_64(&zone->uz_cpu[i].uc_frees); 483220a4e154SJeff Roberson 483320a4e154SJeff Roberson return (nitems); 483420a4e154SJeff Roberson } 483520a4e154SJeff Roberson 483631c251a0SJeff Roberson #ifdef INVARIANTS 483731c251a0SJeff Roberson /* Used only for KEG_ASSERT_COLD(). */ 483831c251a0SJeff Roberson static uint64_t 483931c251a0SJeff Roberson uma_keg_get_allocs(uma_keg_t keg) 484031c251a0SJeff Roberson { 484131c251a0SJeff Roberson uma_zone_t z; 484231c251a0SJeff Roberson uint64_t nitems; 484331c251a0SJeff Roberson 484431c251a0SJeff Roberson nitems = 0; 484531c251a0SJeff Roberson LIST_FOREACH(z, &keg->uk_zones, uz_link) 484631c251a0SJeff Roberson nitems += uma_zone_get_allocs(z); 484731c251a0SJeff Roberson 484831c251a0SJeff Roberson return (nitems); 484931c251a0SJeff Roberson } 485031c251a0SJeff Roberson #endif 485131c251a0SJeff Roberson 4852c4ae7908SLawrence Stewart /* See uma.h */ 4853736ee590SJeff Roberson void 4854099a0e58SBosko Milekic uma_zone_set_init(uma_zone_t zone, uma_init uminit) 4855099a0e58SBosko Milekic { 4856e20a199fSJeff Roberson uma_keg_t keg; 4857e20a199fSJeff Roberson 4858bb15d1c7SGleb Smirnoff KEG_GET(zone, keg); 4859727c6918SJeff Roberson KEG_ASSERT_COLD(keg); 4860e20a199fSJeff Roberson keg->uk_init = uminit; 4861099a0e58SBosko Milekic } 4862099a0e58SBosko Milekic 4863099a0e58SBosko Milekic /* See uma.h */ 4864099a0e58SBosko Milekic void 4865099a0e58SBosko Milekic uma_zone_set_fini(uma_zone_t zone, uma_fini fini) 4866099a0e58SBosko Milekic { 4867e20a199fSJeff Roberson uma_keg_t keg; 4868e20a199fSJeff Roberson 4869bb15d1c7SGleb Smirnoff KEG_GET(zone, keg); 4870727c6918SJeff Roberson KEG_ASSERT_COLD(keg); 4871e20a199fSJeff Roberson keg->uk_fini = fini; 4872099a0e58SBosko Milekic } 4873099a0e58SBosko Milekic 4874099a0e58SBosko Milekic /* See uma.h */ 4875099a0e58SBosko Milekic void 4876099a0e58SBosko Milekic uma_zone_set_zinit(uma_zone_t zone, uma_init zinit) 4877099a0e58SBosko Milekic { 4878af526374SJeff Roberson 4879727c6918SJeff Roberson ZONE_ASSERT_COLD(zone); 4880099a0e58SBosko Milekic zone->uz_init = zinit; 4881099a0e58SBosko Milekic } 4882099a0e58SBosko Milekic 4883099a0e58SBosko Milekic /* See uma.h */ 4884099a0e58SBosko Milekic void 4885099a0e58SBosko Milekic uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini) 4886099a0e58SBosko Milekic { 4887af526374SJeff Roberson 4888727c6918SJeff Roberson ZONE_ASSERT_COLD(zone); 4889099a0e58SBosko Milekic zone->uz_fini = zfini; 4890099a0e58SBosko Milekic } 4891099a0e58SBosko Milekic 4892099a0e58SBosko Milekic /* See uma.h */ 4893099a0e58SBosko Milekic void 48948355f576SJeff Roberson uma_zone_set_freef(uma_zone_t zone, uma_free freef) 48958355f576SJeff Roberson { 48960095a784SJeff Roberson uma_keg_t keg; 4897e20a199fSJeff Roberson 4898bb15d1c7SGleb Smirnoff KEG_GET(zone, keg); 4899727c6918SJeff Roberson KEG_ASSERT_COLD(keg); 49000095a784SJeff Roberson keg->uk_freef = freef; 49018355f576SJeff Roberson } 49028355f576SJeff Roberson 49038355f576SJeff Roberson /* See uma.h */ 49048355f576SJeff Roberson void 49058355f576SJeff Roberson uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 49068355f576SJeff Roberson { 4907e20a199fSJeff Roberson uma_keg_t keg; 4908e20a199fSJeff Roberson 4909bb15d1c7SGleb Smirnoff KEG_GET(zone, keg); 4910727c6918SJeff Roberson KEG_ASSERT_COLD(keg); 4911e20a199fSJeff Roberson keg->uk_allocf = allocf; 49128355f576SJeff Roberson } 49138355f576SJeff Roberson 49148355f576SJeff Roberson /* See uma.h */ 49156fd34d6fSJeff Roberson void 4916d4665eaaSJeff Roberson uma_zone_set_smr(uma_zone_t zone, smr_t smr) 4917d4665eaaSJeff Roberson { 4918d4665eaaSJeff Roberson 4919d4665eaaSJeff Roberson ZONE_ASSERT_COLD(zone); 4920d4665eaaSJeff Roberson 49217f746c9fSMateusz Guzik KASSERT(smr != NULL, ("Got NULL smr")); 49227f746c9fSMateusz Guzik KASSERT((zone->uz_flags & UMA_ZONE_SMR) == 0, 49237f746c9fSMateusz Guzik ("zone %p (%s) already uses SMR", zone, zone->uz_name)); 4924d4665eaaSJeff Roberson zone->uz_flags |= UMA_ZONE_SMR; 4925d4665eaaSJeff Roberson zone->uz_smr = smr; 4926d4665eaaSJeff Roberson zone_update_caches(zone); 4927d4665eaaSJeff Roberson } 4928d4665eaaSJeff Roberson 4929d4665eaaSJeff Roberson smr_t 4930d4665eaaSJeff Roberson uma_zone_get_smr(uma_zone_t zone) 4931d4665eaaSJeff Roberson { 4932d4665eaaSJeff Roberson 4933d4665eaaSJeff Roberson return (zone->uz_smr); 4934d4665eaaSJeff Roberson } 4935d4665eaaSJeff Roberson 4936d4665eaaSJeff Roberson /* See uma.h */ 4937d4665eaaSJeff Roberson void 49386fd34d6fSJeff Roberson uma_zone_reserve(uma_zone_t zone, int items) 49396fd34d6fSJeff Roberson { 49406fd34d6fSJeff Roberson uma_keg_t keg; 49416fd34d6fSJeff Roberson 4942bb15d1c7SGleb Smirnoff KEG_GET(zone, keg); 4943727c6918SJeff Roberson KEG_ASSERT_COLD(keg); 49446fd34d6fSJeff Roberson keg->uk_reserve = items; 49456fd34d6fSJeff Roberson } 49466fd34d6fSJeff Roberson 49476fd34d6fSJeff Roberson /* See uma.h */ 49488355f576SJeff Roberson int 4949a4915c21SAttilio Rao uma_zone_reserve_kva(uma_zone_t zone, int count) 49508355f576SJeff Roberson { 4951099a0e58SBosko Milekic uma_keg_t keg; 49528355f576SJeff Roberson vm_offset_t kva; 49539ba30bcbSZbigniew Bodek u_int pages; 49548355f576SJeff Roberson 4955bb15d1c7SGleb Smirnoff KEG_GET(zone, keg); 4956727c6918SJeff Roberson KEG_ASSERT_COLD(keg); 4957727c6918SJeff Roberson ZONE_ASSERT_COLD(zone); 49588355f576SJeff Roberson 495979c9f942SJeff Roberson pages = howmany(count, keg->uk_ipers) * keg->uk_ppera; 4960a553d4b8SJeff Roberson 4961a4915c21SAttilio Rao #ifdef UMA_MD_SMALL_ALLOC 4962a4915c21SAttilio Rao if (keg->uk_ppera > 1) { 4963a4915c21SAttilio Rao #else 4964a4915c21SAttilio Rao if (1) { 4965a4915c21SAttilio Rao #endif 496657223e99SAndriy Gapon kva = kva_alloc((vm_size_t)pages * PAGE_SIZE); 4967d1f42ac2SAlan Cox if (kva == 0) 49688355f576SJeff Roberson return (0); 4969a4915c21SAttilio Rao } else 4970a4915c21SAttilio Rao kva = 0; 4971bb15d1c7SGleb Smirnoff 4972bb15d1c7SGleb Smirnoff MPASS(keg->uk_kva == 0); 4973099a0e58SBosko Milekic keg->uk_kva = kva; 4974a4915c21SAttilio Rao keg->uk_offset = 0; 4975bb15d1c7SGleb Smirnoff zone->uz_max_items = pages * keg->uk_ipers; 4976a4915c21SAttilio Rao #ifdef UMA_MD_SMALL_ALLOC 4977a4915c21SAttilio Rao keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc; 4978a4915c21SAttilio Rao #else 4979a4915c21SAttilio Rao keg->uk_allocf = noobj_alloc; 4980a4915c21SAttilio Rao #endif 4981cc7ce83aSJeff Roberson keg->uk_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE; 4982cc7ce83aSJeff Roberson zone->uz_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE; 4983cc7ce83aSJeff Roberson zone_update_caches(zone); 4984af526374SJeff Roberson 49858355f576SJeff Roberson return (1); 49868355f576SJeff Roberson } 49878355f576SJeff Roberson 49888355f576SJeff Roberson /* See uma.h */ 49898355f576SJeff Roberson void 49908355f576SJeff Roberson uma_prealloc(uma_zone_t zone, int items) 49918355f576SJeff Roberson { 4992920239efSMark Johnston struct vm_domainset_iter di; 4993ab3185d1SJeff Roberson uma_domain_t dom; 49948355f576SJeff Roberson uma_slab_t slab; 4995099a0e58SBosko Milekic uma_keg_t keg; 499686220393SMark Johnston int aflags, domain, slabs; 49978355f576SJeff Roberson 4998bb15d1c7SGleb Smirnoff KEG_GET(zone, keg); 499979c9f942SJeff Roberson slabs = howmany(items, keg->uk_ipers); 5000194a979eSMark Johnston while (slabs-- > 0) { 500186220393SMark Johnston aflags = M_NOWAIT; 500286220393SMark Johnston vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, 500386220393SMark Johnston &aflags); 500486220393SMark Johnston for (;;) { 500586220393SMark Johnston slab = keg_alloc_slab(keg, zone, domain, M_WAITOK, 500686220393SMark Johnston aflags); 500786220393SMark Johnston if (slab != NULL) { 5008ab3185d1SJeff Roberson dom = &keg->uk_domain[slab->us_domain]; 50094ab3aee8SMark Johnston /* 50104ab3aee8SMark Johnston * keg_alloc_slab() always returns a slab on the 50114ab3aee8SMark Johnston * partial list. 50124ab3aee8SMark Johnston */ 50138b987a77SJeff Roberson LIST_REMOVE(slab, us_link); 501486220393SMark Johnston LIST_INSERT_HEAD(&dom->ud_free_slab, slab, 501586220393SMark Johnston us_link); 50164ab3aee8SMark Johnston dom->ud_free_slabs++; 50178b987a77SJeff Roberson KEG_UNLOCK(keg, slab->us_domain); 5018920239efSMark Johnston break; 50198355f576SJeff Roberson } 50208b987a77SJeff Roberson if (vm_domainset_iter_policy(&di, &domain) != 0) 502189d2fb14SKonstantin Belousov vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0); 502286220393SMark Johnston } 502386220393SMark Johnston } 502486220393SMark Johnston } 50258355f576SJeff Roberson 5026ed581bf6SJeff Roberson /* 5027ed581bf6SJeff Roberson * Returns a snapshot of memory consumption in bytes. 5028ed581bf6SJeff Roberson */ 5029ed581bf6SJeff Roberson size_t 5030ed581bf6SJeff Roberson uma_zone_memory(uma_zone_t zone) 5031ed581bf6SJeff Roberson { 5032ed581bf6SJeff Roberson size_t sz; 5033ed581bf6SJeff Roberson int i; 5034ed581bf6SJeff Roberson 5035ed581bf6SJeff Roberson sz = 0; 5036ed581bf6SJeff Roberson if (zone->uz_flags & UMA_ZFLAG_CACHE) { 5037ed581bf6SJeff Roberson for (i = 0; i < vm_ndomains; i++) 5038c6fd3e23SJeff Roberson sz += ZDOM_GET(zone, i)->uzd_nitems; 5039ed581bf6SJeff Roberson return (sz * zone->uz_size); 5040ed581bf6SJeff Roberson } 5041ed581bf6SJeff Roberson for (i = 0; i < vm_ndomains; i++) 5042ed581bf6SJeff Roberson sz += zone->uz_keg->uk_domain[i].ud_pages; 5043ed581bf6SJeff Roberson 5044ed581bf6SJeff Roberson return (sz * PAGE_SIZE); 5045ed581bf6SJeff Roberson } 5046ed581bf6SJeff Roberson 50478355f576SJeff Roberson /* See uma.h */ 504808cfa56eSMark Johnston void 504908cfa56eSMark Johnston uma_reclaim(int req) 50508355f576SJeff Roberson { 5051*aabe13f1SMark Johnston uma_reclaim_domain(req, UMA_ANYDOMAIN); 5052*aabe13f1SMark Johnston } 505344ec2b63SKonstantin Belousov 5054*aabe13f1SMark Johnston void 5055*aabe13f1SMark Johnston uma_reclaim_domain(int req, int domain) 5056*aabe13f1SMark Johnston { 5057*aabe13f1SMark Johnston void *arg; 5058*aabe13f1SMark Johnston 505986bbae32SJeff Roberson bucket_enable(); 506008cfa56eSMark Johnston 5061*aabe13f1SMark Johnston arg = (void *)(uintptr_t)domain; 5062*aabe13f1SMark Johnston sx_slock(&uma_reclaim_lock); 506308cfa56eSMark Johnston switch (req) { 506408cfa56eSMark Johnston case UMA_RECLAIM_TRIM: 5065*aabe13f1SMark Johnston zone_foreach(zone_trim, arg); 506608cfa56eSMark Johnston break; 506708cfa56eSMark Johnston case UMA_RECLAIM_DRAIN: 5068*aabe13f1SMark Johnston zone_foreach(zone_drain, arg); 5069*aabe13f1SMark Johnston break; 507008cfa56eSMark Johnston case UMA_RECLAIM_DRAIN_CPU: 5071*aabe13f1SMark Johnston zone_foreach(zone_drain, arg); 507208cfa56eSMark Johnston pcpu_cache_drain_safe(NULL); 5073*aabe13f1SMark Johnston zone_foreach(zone_drain, arg); 507408cfa56eSMark Johnston break; 507508cfa56eSMark Johnston default: 507608cfa56eSMark Johnston panic("unhandled reclamation request %d", req); 507708cfa56eSMark Johnston } 50780f9b7bf3SMark Johnston 50798355f576SJeff Roberson /* 50808355f576SJeff Roberson * Some slabs may have been freed but this zone will be visited early 50818355f576SJeff Roberson * we visit again so that we can free pages that are empty once other 50828355f576SJeff Roberson * zones are drained. We have to do the same for buckets. 50838355f576SJeff Roberson */ 5084*aabe13f1SMark Johnston zone_drain(slabzones[0], arg); 5085*aabe13f1SMark Johnston zone_drain(slabzones[1], arg); 5086*aabe13f1SMark Johnston bucket_zone_drain(domain); 5087*aabe13f1SMark Johnston sx_sunlock(&uma_reclaim_lock); 50888355f576SJeff Roberson } 50898355f576SJeff Roberson 50902e47807cSJeff Roberson static volatile int uma_reclaim_needed; 509144ec2b63SKonstantin Belousov 509244ec2b63SKonstantin Belousov void 509344ec2b63SKonstantin Belousov uma_reclaim_wakeup(void) 509444ec2b63SKonstantin Belousov { 509544ec2b63SKonstantin Belousov 50962e47807cSJeff Roberson if (atomic_fetchadd_int(&uma_reclaim_needed, 1) == 0) 50972e47807cSJeff Roberson wakeup(uma_reclaim); 509844ec2b63SKonstantin Belousov } 509944ec2b63SKonstantin Belousov 510044ec2b63SKonstantin Belousov void 510144ec2b63SKonstantin Belousov uma_reclaim_worker(void *arg __unused) 510244ec2b63SKonstantin Belousov { 510344ec2b63SKonstantin Belousov 510444ec2b63SKonstantin Belousov for (;;) { 510508cfa56eSMark Johnston sx_xlock(&uma_reclaim_lock); 5106200f8117SKonstantin Belousov while (atomic_load_int(&uma_reclaim_needed) == 0) 510708cfa56eSMark Johnston sx_sleep(uma_reclaim, &uma_reclaim_lock, PVM, "umarcl", 51082e47807cSJeff Roberson hz); 510908cfa56eSMark Johnston sx_xunlock(&uma_reclaim_lock); 51109b43bc27SAndriy Gapon EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM); 511108cfa56eSMark Johnston uma_reclaim(UMA_RECLAIM_DRAIN_CPU); 5112200f8117SKonstantin Belousov atomic_store_int(&uma_reclaim_needed, 0); 51132e47807cSJeff Roberson /* Don't fire more than once per-second. */ 51142e47807cSJeff Roberson pause("umarclslp", hz); 511544ec2b63SKonstantin Belousov } 511644ec2b63SKonstantin Belousov } 511744ec2b63SKonstantin Belousov 5118663b416fSJohn Baldwin /* See uma.h */ 511908cfa56eSMark Johnston void 512008cfa56eSMark Johnston uma_zone_reclaim(uma_zone_t zone, int req) 512108cfa56eSMark Johnston { 5122*aabe13f1SMark Johnston uma_zone_reclaim_domain(zone, req, UMA_ANYDOMAIN); 5123*aabe13f1SMark Johnston } 512408cfa56eSMark Johnston 5125*aabe13f1SMark Johnston void 5126*aabe13f1SMark Johnston uma_zone_reclaim_domain(uma_zone_t zone, int req, int domain) 5127*aabe13f1SMark Johnston { 5128*aabe13f1SMark Johnston void *arg; 5129*aabe13f1SMark Johnston 5130*aabe13f1SMark Johnston arg = (void *)(uintptr_t)domain; 513108cfa56eSMark Johnston switch (req) { 513208cfa56eSMark Johnston case UMA_RECLAIM_TRIM: 5133*aabe13f1SMark Johnston zone_trim(zone, arg); 513408cfa56eSMark Johnston break; 513508cfa56eSMark Johnston case UMA_RECLAIM_DRAIN: 5136*aabe13f1SMark Johnston zone_drain(zone, arg); 513708cfa56eSMark Johnston break; 513808cfa56eSMark Johnston case UMA_RECLAIM_DRAIN_CPU: 513908cfa56eSMark Johnston pcpu_cache_drain_safe(zone); 5140*aabe13f1SMark Johnston zone_drain(zone, arg); 514108cfa56eSMark Johnston break; 514208cfa56eSMark Johnston default: 514308cfa56eSMark Johnston panic("unhandled reclamation request %d", req); 514408cfa56eSMark Johnston } 514508cfa56eSMark Johnston } 514608cfa56eSMark Johnston 514708cfa56eSMark Johnston /* See uma.h */ 5148663b416fSJohn Baldwin int 5149663b416fSJohn Baldwin uma_zone_exhausted(uma_zone_t zone) 5150663b416fSJohn Baldwin { 5151663b416fSJohn Baldwin 5152727c6918SJeff Roberson return (atomic_load_32(&zone->uz_sleepers) > 0); 51536c125b8dSMohan Srinivasan } 51546c125b8dSMohan Srinivasan 51552e47807cSJeff Roberson unsigned long 51562e47807cSJeff Roberson uma_limit(void) 51572e47807cSJeff Roberson { 51582e47807cSJeff Roberson 51592e47807cSJeff Roberson return (uma_kmem_limit); 51602e47807cSJeff Roberson } 51612e47807cSJeff Roberson 51622e47807cSJeff Roberson void 51632e47807cSJeff Roberson uma_set_limit(unsigned long limit) 51642e47807cSJeff Roberson { 51652e47807cSJeff Roberson 51662e47807cSJeff Roberson uma_kmem_limit = limit; 51672e47807cSJeff Roberson } 51682e47807cSJeff Roberson 51692e47807cSJeff Roberson unsigned long 51702e47807cSJeff Roberson uma_size(void) 51712e47807cSJeff Roberson { 51722e47807cSJeff Roberson 5173058f0f74SMark Johnston return (atomic_load_long(&uma_kmem_total)); 5174ad5b0f5bSJeff Roberson } 5175ad5b0f5bSJeff Roberson 5176ad5b0f5bSJeff Roberson long 5177ad5b0f5bSJeff Roberson uma_avail(void) 5178ad5b0f5bSJeff Roberson { 5179ad5b0f5bSJeff Roberson 5180058f0f74SMark Johnston return (uma_kmem_limit - uma_size()); 51812e47807cSJeff Roberson } 51822e47807cSJeff Roberson 5183a0d4b0aeSRobert Watson #ifdef DDB 51848355f576SJeff Roberson /* 51857a52a97eSRobert Watson * Generate statistics across both the zone and its per-cpu cache's. Return 51867a52a97eSRobert Watson * desired statistics if the pointer is non-NULL for that statistic. 51877a52a97eSRobert Watson * 51887a52a97eSRobert Watson * Note: does not update the zone statistics, as it can't safely clear the 51897a52a97eSRobert Watson * per-CPU cache statistic. 51907a52a97eSRobert Watson * 51917a52a97eSRobert Watson */ 51927a52a97eSRobert Watson static void 51930f9b7bf3SMark Johnston uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp, 5194c1685086SJeff Roberson uint64_t *freesp, uint64_t *sleepsp, uint64_t *xdomainp) 51957a52a97eSRobert Watson { 51967a52a97eSRobert Watson uma_cache_t cache; 5197c1685086SJeff Roberson uint64_t allocs, frees, sleeps, xdomain; 51987a52a97eSRobert Watson int cachefree, cpu; 51997a52a97eSRobert Watson 5200c1685086SJeff Roberson allocs = frees = sleeps = xdomain = 0; 52017a52a97eSRobert Watson cachefree = 0; 52023aa6d94eSJohn Baldwin CPU_FOREACH(cpu) { 52037a52a97eSRobert Watson cache = &z->uz_cpu[cpu]; 5204376b1ba3SJeff Roberson cachefree += cache->uc_allocbucket.ucb_cnt; 5205376b1ba3SJeff Roberson cachefree += cache->uc_freebucket.ucb_cnt; 5206376b1ba3SJeff Roberson xdomain += cache->uc_crossbucket.ucb_cnt; 5207376b1ba3SJeff Roberson cachefree += cache->uc_crossbucket.ucb_cnt; 52087a52a97eSRobert Watson allocs += cache->uc_allocs; 52097a52a97eSRobert Watson frees += cache->uc_frees; 52107a52a97eSRobert Watson } 52112efcc8cbSGleb Smirnoff allocs += counter_u64_fetch(z->uz_allocs); 52122efcc8cbSGleb Smirnoff frees += counter_u64_fetch(z->uz_frees); 5213c6fd3e23SJeff Roberson xdomain += counter_u64_fetch(z->uz_xdomain); 5214bf965959SSean Bruno sleeps += z->uz_sleeps; 52157a52a97eSRobert Watson if (cachefreep != NULL) 52167a52a97eSRobert Watson *cachefreep = cachefree; 52177a52a97eSRobert Watson if (allocsp != NULL) 52187a52a97eSRobert Watson *allocsp = allocs; 52197a52a97eSRobert Watson if (freesp != NULL) 52207a52a97eSRobert Watson *freesp = frees; 5221bf965959SSean Bruno if (sleepsp != NULL) 5222bf965959SSean Bruno *sleepsp = sleeps; 5223c1685086SJeff Roberson if (xdomainp != NULL) 5224c1685086SJeff Roberson *xdomainp = xdomain; 52257a52a97eSRobert Watson } 5226a0d4b0aeSRobert Watson #endif /* DDB */ 52277a52a97eSRobert Watson 52287a52a97eSRobert Watson static int 52297a52a97eSRobert Watson sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS) 52307a52a97eSRobert Watson { 52317a52a97eSRobert Watson uma_keg_t kz; 52327a52a97eSRobert Watson uma_zone_t z; 52337a52a97eSRobert Watson int count; 52347a52a97eSRobert Watson 52357a52a97eSRobert Watson count = 0; 5236111fbcd5SBryan Venteicher rw_rlock(&uma_rwlock); 52377a52a97eSRobert Watson LIST_FOREACH(kz, &uma_kegs, uk_link) { 52387a52a97eSRobert Watson LIST_FOREACH(z, &kz->uk_zones, uz_link) 52397a52a97eSRobert Watson count++; 52407a52a97eSRobert Watson } 5241b47acb0aSGleb Smirnoff LIST_FOREACH(z, &uma_cachezones, uz_link) 5242b47acb0aSGleb Smirnoff count++; 5243b47acb0aSGleb Smirnoff 5244111fbcd5SBryan Venteicher rw_runlock(&uma_rwlock); 52457a52a97eSRobert Watson return (sysctl_handle_int(oidp, &count, 0, req)); 52467a52a97eSRobert Watson } 52477a52a97eSRobert Watson 5248b47acb0aSGleb Smirnoff static void 5249b47acb0aSGleb Smirnoff uma_vm_zone_stats(struct uma_type_header *uth, uma_zone_t z, struct sbuf *sbuf, 5250b47acb0aSGleb Smirnoff struct uma_percpu_stat *ups, bool internal) 5251b47acb0aSGleb Smirnoff { 5252b47acb0aSGleb Smirnoff uma_zone_domain_t zdom; 5253b47acb0aSGleb Smirnoff uma_cache_t cache; 5254b47acb0aSGleb Smirnoff int i; 5255b47acb0aSGleb Smirnoff 5256b47acb0aSGleb Smirnoff for (i = 0; i < vm_ndomains; i++) { 5257c6fd3e23SJeff Roberson zdom = ZDOM_GET(z, i); 5258b47acb0aSGleb Smirnoff uth->uth_zone_free += zdom->uzd_nitems; 5259b47acb0aSGleb Smirnoff } 5260b47acb0aSGleb Smirnoff uth->uth_allocs = counter_u64_fetch(z->uz_allocs); 5261b47acb0aSGleb Smirnoff uth->uth_frees = counter_u64_fetch(z->uz_frees); 5262b47acb0aSGleb Smirnoff uth->uth_fails = counter_u64_fetch(z->uz_fails); 5263c6fd3e23SJeff Roberson uth->uth_xdomain = counter_u64_fetch(z->uz_xdomain); 5264b47acb0aSGleb Smirnoff uth->uth_sleeps = z->uz_sleeps; 52651de9724eSMark Johnston 5266b47acb0aSGleb Smirnoff for (i = 0; i < mp_maxid + 1; i++) { 5267b47acb0aSGleb Smirnoff bzero(&ups[i], sizeof(*ups)); 5268b47acb0aSGleb Smirnoff if (internal || CPU_ABSENT(i)) 5269b47acb0aSGleb Smirnoff continue; 5270b47acb0aSGleb Smirnoff cache = &z->uz_cpu[i]; 5271376b1ba3SJeff Roberson ups[i].ups_cache_free += cache->uc_allocbucket.ucb_cnt; 5272376b1ba3SJeff Roberson ups[i].ups_cache_free += cache->uc_freebucket.ucb_cnt; 5273376b1ba3SJeff Roberson ups[i].ups_cache_free += cache->uc_crossbucket.ucb_cnt; 5274b47acb0aSGleb Smirnoff ups[i].ups_allocs = cache->uc_allocs; 5275b47acb0aSGleb Smirnoff ups[i].ups_frees = cache->uc_frees; 5276b47acb0aSGleb Smirnoff } 5277b47acb0aSGleb Smirnoff } 5278b47acb0aSGleb Smirnoff 52797a52a97eSRobert Watson static int 52807a52a97eSRobert Watson sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) 52817a52a97eSRobert Watson { 52827a52a97eSRobert Watson struct uma_stream_header ush; 52837a52a97eSRobert Watson struct uma_type_header uth; 528463b5d112SKonstantin Belousov struct uma_percpu_stat *ups; 52857a52a97eSRobert Watson struct sbuf sbuf; 52867a52a97eSRobert Watson uma_keg_t kz; 52877a52a97eSRobert Watson uma_zone_t z; 52884bd61e19SJeff Roberson uint64_t items; 52898b987a77SJeff Roberson uint32_t kfree, pages; 52904e657159SMatthew D Fleming int count, error, i; 52917a52a97eSRobert Watson 529200f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 529300f0e671SMatthew D Fleming if (error != 0) 529400f0e671SMatthew D Fleming return (error); 52954e657159SMatthew D Fleming sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 52961eafc078SIan Lepore sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL); 529763b5d112SKonstantin Belousov ups = malloc((mp_maxid + 1) * sizeof(*ups), M_TEMP, M_WAITOK); 52984e657159SMatthew D Fleming 5299404a593eSMatthew D Fleming count = 0; 5300111fbcd5SBryan Venteicher rw_rlock(&uma_rwlock); 53017a52a97eSRobert Watson LIST_FOREACH(kz, &uma_kegs, uk_link) { 53027a52a97eSRobert Watson LIST_FOREACH(z, &kz->uk_zones, uz_link) 53037a52a97eSRobert Watson count++; 53047a52a97eSRobert Watson } 53057a52a97eSRobert Watson 5306b47acb0aSGleb Smirnoff LIST_FOREACH(z, &uma_cachezones, uz_link) 5307b47acb0aSGleb Smirnoff count++; 5308b47acb0aSGleb Smirnoff 53097a52a97eSRobert Watson /* 53107a52a97eSRobert Watson * Insert stream header. 53117a52a97eSRobert Watson */ 53127a52a97eSRobert Watson bzero(&ush, sizeof(ush)); 53137a52a97eSRobert Watson ush.ush_version = UMA_STREAM_VERSION; 5314ab3a57c0SRobert Watson ush.ush_maxcpus = (mp_maxid + 1); 53157a52a97eSRobert Watson ush.ush_count = count; 53164e657159SMatthew D Fleming (void)sbuf_bcat(&sbuf, &ush, sizeof(ush)); 53177a52a97eSRobert Watson 53187a52a97eSRobert Watson LIST_FOREACH(kz, &uma_kegs, uk_link) { 53198b987a77SJeff Roberson kfree = pages = 0; 53208b987a77SJeff Roberson for (i = 0; i < vm_ndomains; i++) { 53214ab3aee8SMark Johnston kfree += kz->uk_domain[i].ud_free_items; 53228b987a77SJeff Roberson pages += kz->uk_domain[i].ud_pages; 53238b987a77SJeff Roberson } 53247a52a97eSRobert Watson LIST_FOREACH(z, &kz->uk_zones, uz_link) { 53257a52a97eSRobert Watson bzero(&uth, sizeof(uth)); 5326cbbb4a00SRobert Watson strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME); 53277a52a97eSRobert Watson uth.uth_align = kz->uk_align; 53287a52a97eSRobert Watson uth.uth_size = kz->uk_size; 53297a52a97eSRobert Watson uth.uth_rsize = kz->uk_rsize; 53304bd61e19SJeff Roberson if (z->uz_max_items > 0) { 53314bd61e19SJeff Roberson items = UZ_ITEMS_COUNT(z->uz_items); 53324bd61e19SJeff Roberson uth.uth_pages = (items / kz->uk_ipers) * 5333bb15d1c7SGleb Smirnoff kz->uk_ppera; 53344bd61e19SJeff Roberson } else 53358b987a77SJeff Roberson uth.uth_pages = pages; 5336f8c86a5fSGleb Smirnoff uth.uth_maxpages = (z->uz_max_items / kz->uk_ipers) * 5337bb15d1c7SGleb Smirnoff kz->uk_ppera; 5338bb15d1c7SGleb Smirnoff uth.uth_limit = z->uz_max_items; 53398b987a77SJeff Roberson uth.uth_keg_free = kfree; 5340cbbb4a00SRobert Watson 5341cbbb4a00SRobert Watson /* 5342cbbb4a00SRobert Watson * A zone is secondary is it is not the first entry 5343cbbb4a00SRobert Watson * on the keg's zone list. 5344cbbb4a00SRobert Watson */ 5345e20a199fSJeff Roberson if ((z->uz_flags & UMA_ZONE_SECONDARY) && 5346cbbb4a00SRobert Watson (LIST_FIRST(&kz->uk_zones) != z)) 5347cbbb4a00SRobert Watson uth.uth_zone_flags = UTH_ZONE_SECONDARY; 5348b47acb0aSGleb Smirnoff uma_vm_zone_stats(&uth, z, &sbuf, ups, 5349b47acb0aSGleb Smirnoff kz->uk_flags & UMA_ZFLAG_INTERNAL); 535063b5d112SKonstantin Belousov (void)sbuf_bcat(&sbuf, &uth, sizeof(uth)); 535163b5d112SKonstantin Belousov for (i = 0; i < mp_maxid + 1; i++) 535263b5d112SKonstantin Belousov (void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i])); 53537a52a97eSRobert Watson } 53547a52a97eSRobert Watson } 5355b47acb0aSGleb Smirnoff LIST_FOREACH(z, &uma_cachezones, uz_link) { 5356b47acb0aSGleb Smirnoff bzero(&uth, sizeof(uth)); 5357b47acb0aSGleb Smirnoff strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME); 5358b47acb0aSGleb Smirnoff uth.uth_size = z->uz_size; 5359b47acb0aSGleb Smirnoff uma_vm_zone_stats(&uth, z, &sbuf, ups, false); 5360b47acb0aSGleb Smirnoff (void)sbuf_bcat(&sbuf, &uth, sizeof(uth)); 5361b47acb0aSGleb Smirnoff for (i = 0; i < mp_maxid + 1; i++) 5362b47acb0aSGleb Smirnoff (void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i])); 5363b47acb0aSGleb Smirnoff } 5364b47acb0aSGleb Smirnoff 5365111fbcd5SBryan Venteicher rw_runlock(&uma_rwlock); 53664e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 53674e657159SMatthew D Fleming sbuf_delete(&sbuf); 536863b5d112SKonstantin Belousov free(ups, M_TEMP); 53697a52a97eSRobert Watson return (error); 53707a52a97eSRobert Watson } 537148c5777eSRobert Watson 53720a5a3ccbSGleb Smirnoff int 53730a5a3ccbSGleb Smirnoff sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS) 53740a5a3ccbSGleb Smirnoff { 53750a5a3ccbSGleb Smirnoff uma_zone_t zone = *(uma_zone_t *)arg1; 537616be9f54SGleb Smirnoff int error, max; 53770a5a3ccbSGleb Smirnoff 537816be9f54SGleb Smirnoff max = uma_zone_get_max(zone); 53790a5a3ccbSGleb Smirnoff error = sysctl_handle_int(oidp, &max, 0, req); 53800a5a3ccbSGleb Smirnoff if (error || !req->newptr) 53810a5a3ccbSGleb Smirnoff return (error); 53820a5a3ccbSGleb Smirnoff 53830a5a3ccbSGleb Smirnoff uma_zone_set_max(zone, max); 53840a5a3ccbSGleb Smirnoff 53850a5a3ccbSGleb Smirnoff return (0); 53860a5a3ccbSGleb Smirnoff } 53870a5a3ccbSGleb Smirnoff 53880a5a3ccbSGleb Smirnoff int 53890a5a3ccbSGleb Smirnoff sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS) 53900a5a3ccbSGleb Smirnoff { 539120a4e154SJeff Roberson uma_zone_t zone; 53920a5a3ccbSGleb Smirnoff int cur; 53930a5a3ccbSGleb Smirnoff 539420a4e154SJeff Roberson /* 539520a4e154SJeff Roberson * Some callers want to add sysctls for global zones that 539620a4e154SJeff Roberson * may not yet exist so they pass a pointer to a pointer. 539720a4e154SJeff Roberson */ 539820a4e154SJeff Roberson if (arg2 == 0) 539920a4e154SJeff Roberson zone = *(uma_zone_t *)arg1; 540020a4e154SJeff Roberson else 540120a4e154SJeff Roberson zone = arg1; 54020a5a3ccbSGleb Smirnoff cur = uma_zone_get_cur(zone); 54030a5a3ccbSGleb Smirnoff return (sysctl_handle_int(oidp, &cur, 0, req)); 54040a5a3ccbSGleb Smirnoff } 54050a5a3ccbSGleb Smirnoff 540620a4e154SJeff Roberson static int 540720a4e154SJeff Roberson sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS) 540820a4e154SJeff Roberson { 540920a4e154SJeff Roberson uma_zone_t zone = arg1; 541020a4e154SJeff Roberson uint64_t cur; 541120a4e154SJeff Roberson 541220a4e154SJeff Roberson cur = uma_zone_get_allocs(zone); 541320a4e154SJeff Roberson return (sysctl_handle_64(oidp, &cur, 0, req)); 541420a4e154SJeff Roberson } 541520a4e154SJeff Roberson 541620a4e154SJeff Roberson static int 541720a4e154SJeff Roberson sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS) 541820a4e154SJeff Roberson { 541920a4e154SJeff Roberson uma_zone_t zone = arg1; 542020a4e154SJeff Roberson uint64_t cur; 542120a4e154SJeff Roberson 542220a4e154SJeff Roberson cur = uma_zone_get_frees(zone); 542320a4e154SJeff Roberson return (sysctl_handle_64(oidp, &cur, 0, req)); 542420a4e154SJeff Roberson } 542520a4e154SJeff Roberson 54266d204a6aSRyan Libby static int 54276d204a6aSRyan Libby sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS) 54286d204a6aSRyan Libby { 54296d204a6aSRyan Libby struct sbuf sbuf; 54306d204a6aSRyan Libby uma_zone_t zone = arg1; 54316d204a6aSRyan Libby int error; 54326d204a6aSRyan Libby 54336d204a6aSRyan Libby sbuf_new_for_sysctl(&sbuf, NULL, 0, req); 54346d204a6aSRyan Libby if (zone->uz_flags != 0) 54356d204a6aSRyan Libby sbuf_printf(&sbuf, "0x%b", zone->uz_flags, PRINT_UMA_ZFLAGS); 54366d204a6aSRyan Libby else 54376d204a6aSRyan Libby sbuf_printf(&sbuf, "0"); 54386d204a6aSRyan Libby error = sbuf_finish(&sbuf); 54396d204a6aSRyan Libby sbuf_delete(&sbuf); 54406d204a6aSRyan Libby 54416d204a6aSRyan Libby return (error); 54426d204a6aSRyan Libby } 54436d204a6aSRyan Libby 5444f7af5015SRyan Libby static int 5445f7af5015SRyan Libby sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS) 5446f7af5015SRyan Libby { 5447f7af5015SRyan Libby uma_keg_t keg = arg1; 5448f7af5015SRyan Libby int avail, effpct, total; 5449f7af5015SRyan Libby 5450f7af5015SRyan Libby total = keg->uk_ppera * PAGE_SIZE; 545154c5ae80SRyan Libby if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0) 54529b8db4d0SRyan Libby total += slabzone(keg->uk_ipers)->uz_keg->uk_rsize; 5453f7af5015SRyan Libby /* 5454f7af5015SRyan Libby * We consider the client's requested size and alignment here, not the 5455f7af5015SRyan Libby * real size determination uk_rsize, because we also adjust the real 5456f7af5015SRyan Libby * size for internal implementation reasons (max bitset size). 5457f7af5015SRyan Libby */ 5458f7af5015SRyan Libby avail = keg->uk_ipers * roundup2(keg->uk_size, keg->uk_align + 1); 5459f7af5015SRyan Libby if ((keg->uk_flags & UMA_ZONE_PCPU) != 0) 5460f7af5015SRyan Libby avail *= mp_maxid + 1; 5461f7af5015SRyan Libby effpct = 100 * avail / total; 5462f7af5015SRyan Libby return (sysctl_handle_int(oidp, &effpct, 0, req)); 5463f7af5015SRyan Libby } 5464f7af5015SRyan Libby 54654bd61e19SJeff Roberson static int 54664bd61e19SJeff Roberson sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS) 54674bd61e19SJeff Roberson { 54684bd61e19SJeff Roberson uma_zone_t zone = arg1; 54694bd61e19SJeff Roberson uint64_t cur; 54704bd61e19SJeff Roberson 54714bd61e19SJeff Roberson cur = UZ_ITEMS_COUNT(atomic_load_64(&zone->uz_items)); 54724bd61e19SJeff Roberson return (sysctl_handle_64(oidp, &cur, 0, req)); 54734bd61e19SJeff Roberson } 54744bd61e19SJeff Roberson 54759542ea7bSGleb Smirnoff #ifdef INVARIANTS 54769542ea7bSGleb Smirnoff static uma_slab_t 54779542ea7bSGleb Smirnoff uma_dbg_getslab(uma_zone_t zone, void *item) 54789542ea7bSGleb Smirnoff { 54799542ea7bSGleb Smirnoff uma_slab_t slab; 54809542ea7bSGleb Smirnoff uma_keg_t keg; 54819542ea7bSGleb Smirnoff uint8_t *mem; 54829542ea7bSGleb Smirnoff 54839542ea7bSGleb Smirnoff /* 54849542ea7bSGleb Smirnoff * It is safe to return the slab here even though the 54859542ea7bSGleb Smirnoff * zone is unlocked because the item's allocation state 54869542ea7bSGleb Smirnoff * essentially holds a reference. 54879542ea7bSGleb Smirnoff */ 5488727c6918SJeff Roberson mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK)); 5489727c6918SJeff Roberson if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0) 5490bb15d1c7SGleb Smirnoff return (NULL); 549154c5ae80SRyan Libby if (zone->uz_flags & UMA_ZFLAG_VTOSLAB) 5492727c6918SJeff Roberson return (vtoslab((vm_offset_t)mem)); 5493bb15d1c7SGleb Smirnoff keg = zone->uz_keg; 549454c5ae80SRyan Libby if ((keg->uk_flags & UMA_ZFLAG_HASH) == 0) 5495727c6918SJeff Roberson return ((uma_slab_t)(mem + keg->uk_pgoff)); 54968b987a77SJeff Roberson KEG_LOCK(keg, 0); 54979542ea7bSGleb Smirnoff slab = hash_sfind(&keg->uk_hash, mem); 54988b987a77SJeff Roberson KEG_UNLOCK(keg, 0); 54999542ea7bSGleb Smirnoff 55009542ea7bSGleb Smirnoff return (slab); 55019542ea7bSGleb Smirnoff } 55029542ea7bSGleb Smirnoff 5503c5deaf04SGleb Smirnoff static bool 5504c5deaf04SGleb Smirnoff uma_dbg_zskip(uma_zone_t zone, void *mem) 5505c5deaf04SGleb Smirnoff { 5506c5deaf04SGleb Smirnoff 5507727c6918SJeff Roberson if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0) 5508c5deaf04SGleb Smirnoff return (true); 5509c5deaf04SGleb Smirnoff 5510bb15d1c7SGleb Smirnoff return (uma_dbg_kskip(zone->uz_keg, mem)); 5511c5deaf04SGleb Smirnoff } 5512c5deaf04SGleb Smirnoff 5513c5deaf04SGleb Smirnoff static bool 5514c5deaf04SGleb Smirnoff uma_dbg_kskip(uma_keg_t keg, void *mem) 5515c5deaf04SGleb Smirnoff { 5516c5deaf04SGleb Smirnoff uintptr_t idx; 5517c5deaf04SGleb Smirnoff 5518c5deaf04SGleb Smirnoff if (dbg_divisor == 0) 5519c5deaf04SGleb Smirnoff return (true); 5520c5deaf04SGleb Smirnoff 5521c5deaf04SGleb Smirnoff if (dbg_divisor == 1) 5522c5deaf04SGleb Smirnoff return (false); 5523c5deaf04SGleb Smirnoff 5524c5deaf04SGleb Smirnoff idx = (uintptr_t)mem >> PAGE_SHIFT; 5525c5deaf04SGleb Smirnoff if (keg->uk_ipers > 1) { 5526c5deaf04SGleb Smirnoff idx *= keg->uk_ipers; 5527c5deaf04SGleb Smirnoff idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize; 5528c5deaf04SGleb Smirnoff } 5529c5deaf04SGleb Smirnoff 5530c5deaf04SGleb Smirnoff if ((idx / dbg_divisor) * dbg_divisor != idx) { 5531c5deaf04SGleb Smirnoff counter_u64_add(uma_skip_cnt, 1); 5532c5deaf04SGleb Smirnoff return (true); 5533c5deaf04SGleb Smirnoff } 5534c5deaf04SGleb Smirnoff counter_u64_add(uma_dbg_cnt, 1); 5535c5deaf04SGleb Smirnoff 5536c5deaf04SGleb Smirnoff return (false); 5537c5deaf04SGleb Smirnoff } 5538c5deaf04SGleb Smirnoff 55399542ea7bSGleb Smirnoff /* 55409542ea7bSGleb Smirnoff * Set up the slab's freei data such that uma_dbg_free can function. 55419542ea7bSGleb Smirnoff * 55429542ea7bSGleb Smirnoff */ 55439542ea7bSGleb Smirnoff static void 55449542ea7bSGleb Smirnoff uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item) 55459542ea7bSGleb Smirnoff { 55469542ea7bSGleb Smirnoff uma_keg_t keg; 55479542ea7bSGleb Smirnoff int freei; 55489542ea7bSGleb Smirnoff 55499542ea7bSGleb Smirnoff if (slab == NULL) { 55509542ea7bSGleb Smirnoff slab = uma_dbg_getslab(zone, item); 55519542ea7bSGleb Smirnoff if (slab == NULL) 5552952c8964SMark Johnston panic("uma: item %p did not belong to zone %s", 55539542ea7bSGleb Smirnoff item, zone->uz_name); 55549542ea7bSGleb Smirnoff } 5555584061b4SJeff Roberson keg = zone->uz_keg; 55561e0701e1SJeff Roberson freei = slab_item_index(slab, keg, item); 55579542ea7bSGleb Smirnoff 5558942951baSRyan Libby if (BIT_TEST_SET_ATOMIC(keg->uk_ipers, freei, 5559942951baSRyan Libby slab_dbg_bits(slab, keg))) 5560952c8964SMark Johnston panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)", 55619542ea7bSGleb Smirnoff item, zone, zone->uz_name, slab, freei); 55629542ea7bSGleb Smirnoff } 55639542ea7bSGleb Smirnoff 55649542ea7bSGleb Smirnoff /* 55659542ea7bSGleb Smirnoff * Verifies freed addresses. Checks for alignment, valid slab membership 55669542ea7bSGleb Smirnoff * and duplicate frees. 55679542ea7bSGleb Smirnoff * 55689542ea7bSGleb Smirnoff */ 55699542ea7bSGleb Smirnoff static void 55709542ea7bSGleb Smirnoff uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item) 55719542ea7bSGleb Smirnoff { 55729542ea7bSGleb Smirnoff uma_keg_t keg; 55739542ea7bSGleb Smirnoff int freei; 55749542ea7bSGleb Smirnoff 55759542ea7bSGleb Smirnoff if (slab == NULL) { 55769542ea7bSGleb Smirnoff slab = uma_dbg_getslab(zone, item); 55779542ea7bSGleb Smirnoff if (slab == NULL) 5578952c8964SMark Johnston panic("uma: Freed item %p did not belong to zone %s", 55799542ea7bSGleb Smirnoff item, zone->uz_name); 55809542ea7bSGleb Smirnoff } 5581584061b4SJeff Roberson keg = zone->uz_keg; 55821e0701e1SJeff Roberson freei = slab_item_index(slab, keg, item); 55839542ea7bSGleb Smirnoff 55849542ea7bSGleb Smirnoff if (freei >= keg->uk_ipers) 5585952c8964SMark Johnston panic("Invalid free of %p from zone %p(%s) slab %p(%d)", 55869542ea7bSGleb Smirnoff item, zone, zone->uz_name, slab, freei); 55879542ea7bSGleb Smirnoff 55881e0701e1SJeff Roberson if (slab_item(slab, keg, freei) != item) 5589952c8964SMark Johnston panic("Unaligned free of %p from zone %p(%s) slab %p(%d)", 55909542ea7bSGleb Smirnoff item, zone, zone->uz_name, slab, freei); 55919542ea7bSGleb Smirnoff 5592942951baSRyan Libby if (!BIT_TEST_CLR_ATOMIC(keg->uk_ipers, freei, 5593942951baSRyan Libby slab_dbg_bits(slab, keg))) 5594952c8964SMark Johnston panic("Duplicate free of %p from zone %p(%s) slab %p(%d)", 55959542ea7bSGleb Smirnoff item, zone, zone->uz_name, slab, freei); 55969542ea7bSGleb Smirnoff } 55979542ea7bSGleb Smirnoff #endif /* INVARIANTS */ 55989542ea7bSGleb Smirnoff 559948c5777eSRobert Watson #ifdef DDB 560046d70077SConrad Meyer static int64_t 560146d70077SConrad Meyer get_uma_stats(uma_keg_t kz, uma_zone_t z, uint64_t *allocs, uint64_t *used, 56020223790fSConrad Meyer uint64_t *sleeps, long *cachefree, uint64_t *xdomain) 560348c5777eSRobert Watson { 560446d70077SConrad Meyer uint64_t frees; 56050f9b7bf3SMark Johnston int i; 560648c5777eSRobert Watson 560748c5777eSRobert Watson if (kz->uk_flags & UMA_ZFLAG_INTERNAL) { 560846d70077SConrad Meyer *allocs = counter_u64_fetch(z->uz_allocs); 56092efcc8cbSGleb Smirnoff frees = counter_u64_fetch(z->uz_frees); 561046d70077SConrad Meyer *sleeps = z->uz_sleeps; 561146d70077SConrad Meyer *cachefree = 0; 561246d70077SConrad Meyer *xdomain = 0; 561348c5777eSRobert Watson } else 561446d70077SConrad Meyer uma_zone_sumstat(z, cachefree, allocs, &frees, sleeps, 561546d70077SConrad Meyer xdomain); 56168b987a77SJeff Roberson for (i = 0; i < vm_ndomains; i++) { 5617c6fd3e23SJeff Roberson *cachefree += ZDOM_GET(z, i)->uzd_nitems; 5618e20a199fSJeff Roberson if (!((z->uz_flags & UMA_ZONE_SECONDARY) && 561948c5777eSRobert Watson (LIST_FIRST(&kz->uk_zones) != z))) 56204ab3aee8SMark Johnston *cachefree += kz->uk_domain[i].ud_free_items; 56218b987a77SJeff Roberson } 562246d70077SConrad Meyer *used = *allocs - frees; 562346d70077SConrad Meyer return (((int64_t)*used + *cachefree) * kz->uk_size); 562446d70077SConrad Meyer } 56250f9b7bf3SMark Johnston 562646d70077SConrad Meyer DB_SHOW_COMMAND(uma, db_show_uma) 562746d70077SConrad Meyer { 562846d70077SConrad Meyer const char *fmt_hdr, *fmt_entry; 562946d70077SConrad Meyer uma_keg_t kz; 563046d70077SConrad Meyer uma_zone_t z; 563146d70077SConrad Meyer uint64_t allocs, used, sleeps, xdomain; 563246d70077SConrad Meyer long cachefree; 563346d70077SConrad Meyer /* variables for sorting */ 563446d70077SConrad Meyer uma_keg_t cur_keg; 563546d70077SConrad Meyer uma_zone_t cur_zone, last_zone; 563646d70077SConrad Meyer int64_t cur_size, last_size, size; 563746d70077SConrad Meyer int ties; 563846d70077SConrad Meyer 563946d70077SConrad Meyer /* /i option produces machine-parseable CSV output */ 564046d70077SConrad Meyer if (modif[0] == 'i') { 564146d70077SConrad Meyer fmt_hdr = "%s,%s,%s,%s,%s,%s,%s,%s,%s\n"; 564246d70077SConrad Meyer fmt_entry = "\"%s\",%ju,%jd,%ld,%ju,%ju,%u,%jd,%ju\n"; 564346d70077SConrad Meyer } else { 564446d70077SConrad Meyer fmt_hdr = "%18s %6s %7s %7s %11s %7s %7s %10s %8s\n"; 564546d70077SConrad Meyer fmt_entry = "%18s %6ju %7jd %7ld %11ju %7ju %7u %10jd %8ju\n"; 564646d70077SConrad Meyer } 564746d70077SConrad Meyer 564846d70077SConrad Meyer db_printf(fmt_hdr, "Zone", "Size", "Used", "Free", "Requests", 564946d70077SConrad Meyer "Sleeps", "Bucket", "Total Mem", "XFree"); 565046d70077SConrad Meyer 565146d70077SConrad Meyer /* Sort the zones with largest size first. */ 565246d70077SConrad Meyer last_zone = NULL; 565346d70077SConrad Meyer last_size = INT64_MAX; 565446d70077SConrad Meyer for (;;) { 565546d70077SConrad Meyer cur_zone = NULL; 565646d70077SConrad Meyer cur_size = -1; 565746d70077SConrad Meyer ties = 0; 565846d70077SConrad Meyer LIST_FOREACH(kz, &uma_kegs, uk_link) { 565946d70077SConrad Meyer LIST_FOREACH(z, &kz->uk_zones, uz_link) { 566046d70077SConrad Meyer /* 566146d70077SConrad Meyer * In the case of size ties, print out zones 566246d70077SConrad Meyer * in the order they are encountered. That is, 566346d70077SConrad Meyer * when we encounter the most recently output 566446d70077SConrad Meyer * zone, we have already printed all preceding 566546d70077SConrad Meyer * ties, and we must print all following ties. 566646d70077SConrad Meyer */ 566746d70077SConrad Meyer if (z == last_zone) { 566846d70077SConrad Meyer ties = 1; 566946d70077SConrad Meyer continue; 567046d70077SConrad Meyer } 567146d70077SConrad Meyer size = get_uma_stats(kz, z, &allocs, &used, 567246d70077SConrad Meyer &sleeps, &cachefree, &xdomain); 567346d70077SConrad Meyer if (size > cur_size && size < last_size + ties) 567446d70077SConrad Meyer { 567546d70077SConrad Meyer cur_size = size; 567646d70077SConrad Meyer cur_zone = z; 567746d70077SConrad Meyer cur_keg = kz; 567846d70077SConrad Meyer } 567946d70077SConrad Meyer } 568046d70077SConrad Meyer } 568146d70077SConrad Meyer if (cur_zone == NULL) 568246d70077SConrad Meyer break; 568346d70077SConrad Meyer 568446d70077SConrad Meyer size = get_uma_stats(cur_keg, cur_zone, &allocs, &used, 568546d70077SConrad Meyer &sleeps, &cachefree, &xdomain); 568646d70077SConrad Meyer db_printf(fmt_entry, cur_zone->uz_name, 568746d70077SConrad Meyer (uintmax_t)cur_keg->uk_size, (intmax_t)used, cachefree, 568846d70077SConrad Meyer (uintmax_t)allocs, (uintmax_t)sleeps, 568920a4e154SJeff Roberson (unsigned)cur_zone->uz_bucket_size, (intmax_t)size, 569020a4e154SJeff Roberson xdomain); 569146d70077SConrad Meyer 5692687c94aaSJohn Baldwin if (db_pager_quit) 5693687c94aaSJohn Baldwin return; 569446d70077SConrad Meyer last_zone = cur_zone; 569546d70077SConrad Meyer last_size = cur_size; 569648c5777eSRobert Watson } 569748c5777eSRobert Watson } 569803175483SAlexander Motin 569903175483SAlexander Motin DB_SHOW_COMMAND(umacache, db_show_umacache) 570003175483SAlexander Motin { 570103175483SAlexander Motin uma_zone_t z; 5702ab3185d1SJeff Roberson uint64_t allocs, frees; 57030f9b7bf3SMark Johnston long cachefree; 57040f9b7bf3SMark Johnston int i; 570503175483SAlexander Motin 570603175483SAlexander Motin db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free", 570703175483SAlexander Motin "Requests", "Bucket"); 570803175483SAlexander Motin LIST_FOREACH(z, &uma_cachezones, uz_link) { 5709c1685086SJeff Roberson uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL, NULL); 57100f9b7bf3SMark Johnston for (i = 0; i < vm_ndomains; i++) 5711c6fd3e23SJeff Roberson cachefree += ZDOM_GET(z, i)->uzd_nitems; 57120f9b7bf3SMark Johnston db_printf("%18s %8ju %8jd %8ld %12ju %8u\n", 571303175483SAlexander Motin z->uz_name, (uintmax_t)z->uz_size, 571403175483SAlexander Motin (intmax_t)(allocs - frees), cachefree, 571520a4e154SJeff Roberson (uintmax_t)allocs, z->uz_bucket_size); 571603175483SAlexander Motin if (db_pager_quit) 571703175483SAlexander Motin return; 571803175483SAlexander Motin } 571903175483SAlexander Motin } 57209542ea7bSGleb Smirnoff #endif /* DDB */ 5721