1.\"- 2.\" Copyright (c) 2001 Dag-Erling Coïdan Smørgrav 3.\" All rights reserved. 4.\" 5.\" Redistribution and use in source and binary forms, with or without 6.\" modification, are permitted provided that the following conditions 7.\" are met: 8.\" 1. Redistributions of source code must retain the above copyright 9.\" notice, this list of conditions and the following disclaimer. 10.\" 2. Redistributions in binary form must reproduce the above copyright 11.\" notice, this list of conditions and the following disclaimer in the 12.\" documentation and/or other materials provided with the distribution. 13.\" 14.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24.\" SUCH DAMAGE. 25.\" 26.\" $FreeBSD$ 27.\" 28.Dd April 14, 2021 29.Dt UMA 9 30.Os 31.Sh NAME 32.Nm UMA 33.Nd general-purpose kernel object allocator 34.Sh SYNOPSIS 35.In sys/param.h 36.In sys/queue.h 37.In vm/uma.h 38.Cd "options UMA_FIRSTTOUCH" 39.Cd "options UMA_XDOMAIN" 40.Bd -literal 41typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags); 42typedef void (*uma_dtor)(void *mem, int size, void *arg); 43typedef int (*uma_init)(void *mem, int size, int flags); 44typedef void (*uma_fini)(void *mem, int size); 45typedef int (*uma_import)(void *arg, void **store, int count, int domain, 46 int flags); 47typedef void (*uma_release)(void *arg, void **store, int count); 48typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain, 49 uint8_t *pflag, int wait); 50typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag); 51 52.Ed 53.Ft uma_zone_t 54.Fo uma_zcreate 55.Fa "char *name" "int size" 56.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 57.Fa "int align" "uint16_t flags" 58.Fc 59.Ft uma_zone_t 60.Fo uma_zcache_create 61.Fa "char *name" "int size" 62.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 63.Fa "uma_import zimport" "uma_release zrelease" 64.Fa "void *arg" "int flags" 65.Fc 66.Ft uma_zone_t 67.Fo uma_zsecond_create 68.Fa "char *name" 69.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 70.Fa "uma_zone_t master" 71.Fc 72.Ft void 73.Fn uma_zdestroy "uma_zone_t zone" 74.Ft "void *" 75.Fn uma_zalloc "uma_zone_t zone" "int flags" 76.Ft "void *" 77.Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags" 78.Ft "void *" 79.Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags" 80.Ft "void *" 81.Fn uma_zalloc_pcpu "uma_zone_t zone" "int flags" 82.Ft "void *" 83.Fn uma_zalloc_pcpu_arg "uma_zone_t zone" "void *arg" "int flags" 84.Ft void 85.Fn uma_zfree "uma_zone_t zone" "void *item" 86.Ft void 87.Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg" 88.Ft void 89.Fn uma_zfree_pcpu "uma_zone_t zone" "void *item" 90.Ft void 91.Fn uma_zfree_pcpu_arg "uma_zone_t zone" "void *item" "void *arg" 92.Ft void 93.Fn uma_prealloc "uma_zone_t zone" "int nitems" 94.Ft void 95.Fn uma_zone_reserve "uma_zone_t zone" "int nitems" 96.Ft void 97.Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems" 98.Ft void 99.Fn uma_reclaim "int req" 100.Ft void 101.Fn uma_reclaim_domain "int req" "int domain" 102.Ft void 103.Fn uma_zone_reclaim "uma_zone_t zone" "int req" 104.Ft void 105.Fn uma_zone_reclaim_domain "uma_zone_t zone" "int req" "int domain" 106.Ft void 107.Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf" 108.Ft void 109.Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef" 110.Ft int 111.Fn uma_zone_set_max "uma_zone_t zone" "int nitems" 112.Ft void 113.Fn uma_zone_set_maxcache "uma_zone_t zone" "int nitems" 114.Ft int 115.Fn uma_zone_get_max "uma_zone_t zone" 116.Ft int 117.Fn uma_zone_get_cur "uma_zone_t zone" 118.Ft void 119.Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning" 120.Ft void 121.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)" 122.Ft void 123.Fn uma_reclaim 124.In sys/sysctl.h 125.Fn SYSCTL_UMA_MAX parent nbr name access zone descr 126.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr 127.Fn SYSCTL_UMA_CUR parent nbr name access zone descr 128.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr 129.Sh DESCRIPTION 130UMA (Universal Memory Allocator) provides an efficient interface for managing 131dynamically-sized collections of items of identical size, referred to as zones. 132Zones keep track of which items are in use and which 133are not, and UMA provides functions for allocating items from a zone and 134for releasing them back, making them available for subsequent allocation requests. 135Zones maintain per-CPU caches with linear scalability on SMP 136systems as well as round-robin and first-touch policies for NUMA 137systems. 138The number of items cached per CPU is bounded, and each zone additionally 139maintains an unbounded cache of items that is used to quickly satisfy 140per-CPU cache allocation misses. 141.Pp 142Two types of zones exist: regular zones and cache zones. 143In a regular zone, items are allocated from a slab, which is one or more 144virtually contiguous memory pages that have been allocated from the kernel's 145page allocator. 146Internally, slabs are managed by a UMA keg, which is responsible for allocating 147slabs and keeping track of their usage by one or more zones. 148In typical usage, there is one keg per zone, so slabs are not shared among 149multiple zones. 150.Pp 151Normal zones import items from a keg, and release items back to that keg if 152requested. 153Cache zones do not have a keg, and instead use custom import and release 154methods. 155For example, some collections of kernel objects are statically allocated 156at boot-time, and the size of the collection does not change. 157A cache zone can be used to implement an efficient allocator for the objects in 158such a collection. 159.Pp 160The 161.Fn uma_zcreate 162and 163.Fn uma_zcache_create 164functions create a new regular zone and cache zone, respectively. 165The 166.Fn uma_zsecond_create 167function creates a regular zone which shares the keg of the zone 168specified by the 169.Fa master 170argument. 171The 172.Fa name 173argument is a text name of the zone for debugging and stats; this memory 174should not be freed until the zone has been deallocated. 175.Pp 176The 177.Fa ctor 178and 179.Fa dtor 180arguments are callback functions that are called by 181the UMA subsystem at the time of the call to 182.Fn uma_zalloc 183and 184.Fn uma_zfree 185respectively. 186Their purpose is to provide hooks for initializing or 187destroying things that need to be done at the time of the allocation 188or release of a resource. 189A good usage for the 190.Fa ctor 191and 192.Fa dtor 193callbacks might be to initialize a data structure embedded in the item, 194such as a 195.Xr queue 3 196head. 197.Pp 198The 199.Fa zinit 200and 201.Fa zfini 202arguments are used to optimize the allocation of items from the zone. 203They are called by the UMA subsystem whenever 204it needs to allocate or free items to satisfy requests or memory pressure. 205A good use for the 206.Fa zinit 207and 208.Fa zfini 209callbacks might be to 210initialize and destroy a mutex contained within an item. 211This would allow one to avoid destroying and re-initializing the mutex 212each time the item is freed and re-allocated. 213They are not called on each call to 214.Fn uma_zalloc 215and 216.Fn uma_zfree 217but rather when an item is imported into a zone's cache, and when a zone 218releases an item to the slab allocator, typically as a response to memory 219pressure. 220.Pp 221For 222.Fn uma_zcache_create , 223the 224.Fa zimport 225and 226.Fa zrelease 227functions are called to import items into the zone and to release items 228from the zone, respectively. 229The 230.Fa zimport 231function should store pointers to items in the 232.Fa store 233array, which contains a maximum of 234.Fa count 235entries. 236The function must return the number of imported items, which may be less than 237the maximum. 238Similarly, the 239.Fa store 240parameter to the 241.Fa zrelease 242function contains an array of 243.Fa count 244pointers to items. 245The 246.Fa arg 247parameter passed to 248.Fn uma_zcache_create 249is provided to the import and release functions. 250The 251.Fa domain 252parameter to 253.Fa zimport 254specifies the requested 255.Xr numa 4 256domain for the allocation. 257It is either a NUMA domain number or the special value 258.Dv UMA_ANYDOMAIN . 259.Pp 260The 261.Fa flags 262argument of 263.Fn uma_zcreate 264and 265.Fn uma_zcache_create 266is a subset of the following flags: 267.Bl -tag -width "foo" 268.It Dv UMA_ZONE_NOFREE 269Slabs allocated to the zone's keg are never freed. 270.It Dv UMA_ZONE_NODUMP 271Pages belonging to the zone will not be included in minidumps. 272.It Dv UMA_ZONE_PCPU 273An allocation from zone would have 274.Va mp_ncpu 275shadow copies, that are privately assigned to CPUs. 276A CPU can address its private copy using base the allocation address plus 277a multiple of the current CPU ID and 278.Fn sizeof "struct pcpu" : 279.Bd -literal -offset indent 280foo_zone = uma_zcreate(..., UMA_ZONE_PCPU); 281 ... 282foo_base = uma_zalloc(foo_zone, ...); 283 ... 284critical_enter(); 285foo_pcpu = (foo_t *)zpcpu_get(foo_base); 286/* do something with foo_pcpu */ 287critical_exit(); 288 289.Ed 290Note that 291.Dv M_ZERO 292cannot be used when allocating items from a PCPU zone. 293To obtain zeroed memory from a PCPU zone, use the 294.Fn uma_zalloc_pcpu 295function and its variants instead, and pass 296.Dv M_ZERO . 297.It Dv UMA_ZONE_NOTOUCH 298The UMA subsystem may not directly touch (i.e. read or write) the slab memory. 299Otherwise, by default, book-keeping of items within a slab may be done in the 300slab page itself, and 301.Dv INVARIANTS 302kernels may also do use-after-free checking by accessing the slab memory. 303.It Dv UMA_ZONE_ZINIT 304The zone will have its 305.Ft uma_init 306method set to internal method that initializes a new allocated slab 307to all zeros. 308Do not mistake 309.Ft uma_init 310method with 311.Ft uma_ctor . 312A zone with 313.Dv UMA_ZONE_ZINIT 314flag would not return zeroed memory on every 315.Fn uma_zalloc . 316.It Dv UMA_ZONE_NOTPAGE 317An allocator function will be supplied with 318.Fn uma_zone_set_allocf 319and the memory that it returns may not be kernel virtual memory backed by VM 320pages in the page array. 321.It Dv UMA_ZONE_MALLOC 322The zone is for the 323.Xr malloc 9 324subsystem. 325.It Dv UMA_ZONE_VM 326The zone is for the VM subsystem. 327.It Dv UMA_ZONE_NUMA 328The zone should use a first-touch NUMA policy rather than the round-robin 329default. 330If the 331.Dv UMA_FIRSTTOUCH 332kernel option is configured, all zones implicitly use a first-touch policy, 333and the 334.Dv UMA_ZONE_NUMA 335flag has no effect. 336The 337.Dv UMA_XDOMAIN 338kernel option, when configured, causes UMA to do the extra tracking to ensure 339that allocations from first-touch zones are always local. 340Otherwise, consumers that do not free memory on the same domain from which it 341was allocated will cause mixing in per-CPU caches. 342See 343.Xr numa 4 344for more details. 345.It Dv UMA_ZONE_CONTIG 346Items in this zone must be contiguous in physical address space. 347Items will follow normal alignment constraints and may span page boundaries 348between pages with contiguous physical addresses. 349.El 350.Pp 351Zones can be destroyed using 352.Fn uma_zdestroy , 353freeing all memory that is cached in the zone. 354All items allocated from the zone must be freed to the zone before the zone 355may be safely destroyed. 356.Pp 357To allocate an item from a zone, simply call 358.Fn uma_zalloc 359with a pointer to that zone and set the 360.Fa flags 361argument to selected flags as documented in 362.Xr malloc 9 . 363It will return a pointer to an item if successful, or 364.Dv NULL 365in the rare case where all items in the zone are in use and the 366allocator is unable to grow the zone and 367.Dv M_NOWAIT 368is specified. 369.Pp 370Items are released back to the zone from which they were allocated by 371calling 372.Fn uma_zfree 373with a pointer to the zone and a pointer to the item. 374If 375.Fa item 376is 377.Dv NULL , 378then 379.Fn uma_zfree 380does nothing. 381.Pp 382The variants 383.Fn uma_zalloc_arg 384and 385.Fn uma_zfree_arg 386allow callers to 387specify an argument for the 388.Dv ctor 389and 390.Dv dtor 391functions of the zone, respectively. 392The variants 393.Fn uma_zalloc_pcpu 394and 395.Fn uma_zfree_pcpu 396allocate and free 397.Va mp_ncpu 398shadow copies as described for 399.Dv UMA_ZONE_PCPU . 400If 401.Fa item 402is 403.Dv NULL , 404then 405.Fn uma_zfree_pcpu 406does nothing. 407.Pp 408The 409.Fn uma_zalloc_domain 410function allows callers to specify a fixed 411.Xr numa 4 412domain to allocate from. 413This uses a guaranteed but slow path in the allocator which reduces 414concurrency. 415.Pp 416The 417.Fn uma_prealloc 418function allocates slabs for the requested number of items, typically following 419the initial creation of a zone. 420Subsequent allocations from the zone will be satisfied using the pre-allocated 421slabs. 422Note that slab allocation is performed with the 423.Dv M_WAITOK 424flag, so 425.Fn uma_prealloc 426may sleep. 427.Pp 428The 429.Fn uma_zone_reserve 430function sets the number of reserved items for the zone. 431.Fn uma_zalloc 432and variants will ensure that the zone contains at least the reserved number 433of free items. 434Reserved items may be allocated by specifying 435.Dv M_USE_RESERVE 436in the allocation request flags. 437.Fn uma_zone_reserve 438does not perform any pre-allocation by itself. 439.Pp 440The 441.Fn uma_zone_reserve_kva 442function pre-allocates kernel virtual address space for the requested 443number of items. 444Subsequent allocations from the zone will be satisfied using the pre-allocated 445address space. 446Note that unlike 447.Fn uma_zone_reserve , 448.Fn uma_zone_reserve_kva 449does not restrict the use of the pre-allocation to 450.Dv M_USE_RESERVE 451requests. 452.Pp 453The 454.Fn uma_reclaim 455and 456.Fn uma_zone_reclaim 457functions reclaim cached items from UMA zones, releasing unused memory. 458The 459.Fn uma_reclaim 460function reclaims items from all regular zones, while 461.Fn uma_zone_reclaim 462reclaims items only from the specified zone. 463The 464.Fa req 465parameter must be one of three values which specify how aggressively 466items are to be reclaimed: 467.Bl -tag -width indent 468.It Dv UMA_RECLAIM_TRIM 469Reclaim items only in excess of the zone's estimated working set size. 470The working set size is periodically updated and tracks the recent history 471of the zone's usage. 472.It Dv UMA_RECLAIM_DRAIN 473Reclaim all items from the unbounded cache. 474Free items in the per-CPU caches are left alone. 475.It Dv UMA_RECLAIM_DRAIN_CPU 476Reclaim all cached items. 477.El 478The 479.Fn uma_reclaim_domain 480and 481.Fn uma_zone_reclaim_domain 482functions apply only to items allocated from the specified domain. 483In the case of domains using a round-robin NUMA policy, cached items from all 484domains are freed to the keg, but only slabs from the specific domain will 485be freed. 486.Pp 487The 488.Fn uma_zone_set_allocf 489and 490.Fn uma_zone_set_freef 491functions allow a zone's default slab allocation and free functions to be 492overridden. 493This is useful if memory with special constraints such as attributes, 494alignment, or address ranges must be used. 495.Pp 496The 497.Fn uma_zone_set_max 498function limits the number of items 499.Pq and therefore memory 500that can be allocated to 501.Fa zone . 502The 503.Fa nitems 504argument specifies the requested upper limit number of items. 505The effective limit is returned to the caller, as it may end up being higher 506than requested due to the implementation rounding up to ensure all memory pages 507allocated to the zone are utilised to capacity. 508The limit applies to the total number of items in the zone, which includes 509allocated items, free items and free items in the per-cpu caches. 510On systems with more than one CPU it may not be possible to allocate 511the specified number of items even when there is no shortage of memory, 512because all of the remaining free items may be in the caches of the 513other CPUs when the limit is hit. 514.Pp 515The 516.Fn uma_zone_set_maxcache 517function limits the number of free items which may be cached in the zone. 518This limit applies to both the per-CPU caches and the cache of free buckets. 519.Pp 520The 521.Fn uma_zone_get_max 522function returns the effective upper limit number of items for a zone. 523.Pp 524The 525.Fn uma_zone_get_cur 526function returns an approximation of the number of items currently allocated 527from the zone. 528The returned value is approximate because appropriate synchronisation to 529determine an exact value is not performed by the implementation. 530This ensures low overhead at the expense of potentially stale data being used 531in the calculation. 532.Pp 533The 534.Fn uma_zone_set_warning 535function sets a warning that will be printed on the system console when the 536given zone becomes full and fails to allocate an item. 537The warning will be printed no more often than every five minutes. 538Warnings can be turned off globally by setting the 539.Va vm.zone_warnings 540sysctl tunable to 541.Va 0 . 542.Pp 543The 544.Fn uma_zone_set_maxaction 545function sets a function that will be called when the given zone becomes full 546and fails to allocate an item. 547The function will be called with the zone locked. 548Also, the function 549that called the allocation function may have held additional locks. 550Therefore, 551this function should do very little work (similar to a signal handler). 552.Pp 553The 554.Fn SYSCTL_UMA_MAX parent nbr name access zone descr 555macro declares a static 556.Xr sysctl 9 557oid that exports the effective upper limit number of items for a zone. 558The 559.Fa zone 560argument should be a pointer to 561.Vt uma_zone_t . 562A read of the oid returns value obtained through 563.Fn uma_zone_get_max . 564A write to the oid sets new value via 565.Fn uma_zone_set_max . 566The 567.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr 568macro is provided to create this type of oid dynamically. 569.Pp 570The 571.Fn SYSCTL_UMA_CUR parent nbr name access zone descr 572macro declares a static read-only 573.Xr sysctl 9 574oid that exports the approximate current occupancy of the zone. 575The 576.Fa zone 577argument should be a pointer to 578.Vt uma_zone_t . 579A read of the oid returns value obtained through 580.Fn uma_zone_get_cur . 581The 582.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name zone descr 583macro is provided to create this type of oid dynamically. 584.Sh IMPLEMENTATION NOTES 585The memory that these allocation calls return is not executable. 586The 587.Fn uma_zalloc 588function does not support the 589.Dv M_EXEC 590flag to allocate executable memory. 591Not all platforms enforce a distinction between executable and 592non-executable memory. 593.Sh SEE ALSO 594.Xr numa 4 , 595.Xr vmstat 8 , 596.Xr malloc 9 597.Rs 598.%A Jeff Bonwick 599.%T "The Slab Allocator: An Object-Caching Kernel Memory Allocator" 600.%D 1994 601.Re 602.Sh HISTORY 603The zone allocator first appeared in 604.Fx 3.0 . 605It was radically changed in 606.Fx 5.0 607to function as a slab allocator. 608.Sh AUTHORS 609.An -nosplit 610The zone allocator was written by 611.An John S. Dyson . 612The zone allocator was rewritten in large parts by 613.An Jeff Roberson Aq Mt jeff@FreeBSD.org 614to function as a slab allocator. 615.Pp 616This manual page was written by 617.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org . 618Changes for UMA by 619.An Jeroen Ruigrok van der Werven Aq Mt asmodai@FreeBSD.org . 620