1.\"- 2.\" Copyright (c) 2001 Dag-Erling Coïdan Smørgrav 3.\" All rights reserved. 4.\" 5.\" Redistribution and use in source and binary forms, with or without 6.\" modification, are permitted provided that the following conditions 7.\" are met: 8.\" 1. Redistributions of source code must retain the above copyright 9.\" notice, this list of conditions and the following disclaimer. 10.\" 2. Redistributions in binary form must reproduce the above copyright 11.\" notice, this list of conditions and the following disclaimer in the 12.\" documentation and/or other materials provided with the distribution. 13.\" 14.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24.\" SUCH DAMAGE. 25.\" 26.\" $FreeBSD$ 27.\" 28.Dd April 14, 2021 29.Dt UMA 9 30.Os 31.Sh NAME 32.Nm UMA 33.Nd general-purpose kernel object allocator 34.Sh SYNOPSIS 35.In sys/param.h 36.In sys/queue.h 37.In vm/uma.h 38.Cd "options UMA_FIRSTTOUCH" 39.Cd "options UMA_XDOMAIN" 40.Bd -literal 41typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags); 42typedef void (*uma_dtor)(void *mem, int size, void *arg); 43typedef int (*uma_init)(void *mem, int size, int flags); 44typedef void (*uma_fini)(void *mem, int size); 45typedef int (*uma_import)(void *arg, void **store, int count, int domain, 46 int flags); 47typedef void (*uma_release)(void *arg, void **store, int count); 48typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain, 49 uint8_t *pflag, int wait); 50typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag); 51 52.Ed 53.Ft uma_zone_t 54.Fo uma_zcreate 55.Fa "char *name" "int size" 56.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 57.Fa "int align" "uint16_t flags" 58.Fc 59.Ft uma_zone_t 60.Fo uma_zcache_create 61.Fa "char *name" "int size" 62.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 63.Fa "uma_import zimport" "uma_release zrelease" 64.Fa "void *arg" "int flags" 65.Fc 66.Ft uma_zone_t 67.Fo uma_zsecond_create 68.Fa "char *name" 69.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 70.Fa "uma_zone_t master" 71.Fc 72.Ft void 73.Fn uma_zdestroy "uma_zone_t zone" 74.Ft "void *" 75.Fn uma_zalloc "uma_zone_t zone" "int flags" 76.Ft "void *" 77.Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags" 78.Ft "void *" 79.Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags" 80.Ft "void *" 81.Fn uma_zalloc_pcpu "uma_zone_t zone" "int flags" 82.Ft "void *" 83.Fn uma_zalloc_pcpu_arg "uma_zone_t zone" "void *arg" "int flags" 84.Ft void 85.Fn uma_zfree "uma_zone_t zone" "void *item" 86.Ft void 87.Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg" 88.Ft void 89.Fn uma_zfree_pcpu "uma_zone_t zone" "void *item" 90.Ft void 91.Fn uma_zfree_pcpu_arg "uma_zone_t zone" "void *item" "void *arg" 92.Ft void 93.Fn uma_prealloc "uma_zone_t zone" "int nitems" 94.Ft void 95.Fn uma_zone_reserve "uma_zone_t zone" "int nitems" 96.Ft void 97.Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems" 98.Ft void 99.Fn uma_reclaim "int req" 100.Ft void 101.Fn uma_reclaim_domain "int req" "int domain" 102.Ft void 103.Fn uma_zone_reclaim "uma_zone_t zone" "int req" 104.Ft void 105.Fn uma_zone_reclaim_domain "uma_zone_t zone" "int req" "int domain" 106.Ft void 107.Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf" 108.Ft void 109.Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef" 110.Ft int 111.Fn uma_zone_set_max "uma_zone_t zone" "int nitems" 112.Ft void 113.Fn uma_zone_set_maxcache "uma_zone_t zone" "int nitems" 114.Ft int 115.Fn uma_zone_get_max "uma_zone_t zone" 116.Ft int 117.Fn uma_zone_get_cur "uma_zone_t zone" 118.Ft void 119.Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning" 120.Ft void 121.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)" 122.In sys/sysctl.h 123.Fn SYSCTL_UMA_MAX parent nbr name access zone descr 124.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr 125.Fn SYSCTL_UMA_CUR parent nbr name access zone descr 126.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr 127.Sh DESCRIPTION 128UMA (Universal Memory Allocator) provides an efficient interface for managing 129dynamically-sized collections of items of identical size, referred to as zones. 130Zones keep track of which items are in use and which 131are not, and UMA provides functions for allocating items from a zone and 132for releasing them back, making them available for subsequent allocation requests. 133Zones maintain per-CPU caches with linear scalability on SMP 134systems as well as round-robin and first-touch policies for NUMA 135systems. 136The number of items cached per CPU is bounded, and each zone additionally 137maintains an unbounded cache of items that is used to quickly satisfy 138per-CPU cache allocation misses. 139.Pp 140Two types of zones exist: regular zones and cache zones. 141In a regular zone, items are allocated from a slab, which is one or more 142virtually contiguous memory pages that have been allocated from the kernel's 143page allocator. 144Internally, slabs are managed by a UMA keg, which is responsible for allocating 145slabs and keeping track of their usage by one or more zones. 146In typical usage, there is one keg per zone, so slabs are not shared among 147multiple zones. 148.Pp 149Normal zones import items from a keg, and release items back to that keg if 150requested. 151Cache zones do not have a keg, and instead use custom import and release 152methods. 153For example, some collections of kernel objects are statically allocated 154at boot-time, and the size of the collection does not change. 155A cache zone can be used to implement an efficient allocator for the objects in 156such a collection. 157.Pp 158The 159.Fn uma_zcreate 160and 161.Fn uma_zcache_create 162functions create a new regular zone and cache zone, respectively. 163The 164.Fn uma_zsecond_create 165function creates a regular zone which shares the keg of the zone 166specified by the 167.Fa master 168argument. 169The 170.Fa name 171argument is a text name of the zone for debugging and stats; this memory 172should not be freed until the zone has been deallocated. 173.Pp 174The 175.Fa ctor 176and 177.Fa dtor 178arguments are callback functions that are called by 179the UMA subsystem at the time of the call to 180.Fn uma_zalloc 181and 182.Fn uma_zfree 183respectively. 184Their purpose is to provide hooks for initializing or 185destroying things that need to be done at the time of the allocation 186or release of a resource. 187A good usage for the 188.Fa ctor 189and 190.Fa dtor 191callbacks might be to initialize a data structure embedded in the item, 192such as a 193.Xr queue 3 194head. 195.Pp 196The 197.Fa zinit 198and 199.Fa zfini 200arguments are used to optimize the allocation of items from the zone. 201They are called by the UMA subsystem whenever 202it needs to allocate or free items to satisfy requests or memory pressure. 203A good use for the 204.Fa zinit 205and 206.Fa zfini 207callbacks might be to 208initialize and destroy a mutex contained within an item. 209This would allow one to avoid destroying and re-initializing the mutex 210each time the item is freed and re-allocated. 211They are not called on each call to 212.Fn uma_zalloc 213and 214.Fn uma_zfree 215but rather when an item is imported into a zone's cache, and when a zone 216releases an item to the slab allocator, typically as a response to memory 217pressure. 218.Pp 219For 220.Fn uma_zcache_create , 221the 222.Fa zimport 223and 224.Fa zrelease 225functions are called to import items into the zone and to release items 226from the zone, respectively. 227The 228.Fa zimport 229function should store pointers to items in the 230.Fa store 231array, which contains a maximum of 232.Fa count 233entries. 234The function must return the number of imported items, which may be less than 235the maximum. 236Similarly, the 237.Fa store 238parameter to the 239.Fa zrelease 240function contains an array of 241.Fa count 242pointers to items. 243The 244.Fa arg 245parameter passed to 246.Fn uma_zcache_create 247is provided to the import and release functions. 248The 249.Fa domain 250parameter to 251.Fa zimport 252specifies the requested 253.Xr numa 4 254domain for the allocation. 255It is either a NUMA domain number or the special value 256.Dv UMA_ANYDOMAIN . 257.Pp 258The 259.Fa flags 260argument of 261.Fn uma_zcreate 262and 263.Fn uma_zcache_create 264is a subset of the following flags: 265.Bl -tag -width "foo" 266.It Dv UMA_ZONE_NOFREE 267Slabs allocated to the zone's keg are never freed. 268.It Dv UMA_ZONE_NODUMP 269Pages belonging to the zone will not be included in minidumps. 270.It Dv UMA_ZONE_PCPU 271An allocation from zone would have 272.Va mp_ncpu 273shadow copies, that are privately assigned to CPUs. 274A CPU can address its private copy using base the allocation address plus 275a multiple of the current CPU ID and 276.Fn sizeof "struct pcpu" : 277.Bd -literal -offset indent 278foo_zone = uma_zcreate(..., UMA_ZONE_PCPU); 279 ... 280foo_base = uma_zalloc(foo_zone, ...); 281 ... 282critical_enter(); 283foo_pcpu = (foo_t *)zpcpu_get(foo_base); 284/* do something with foo_pcpu */ 285critical_exit(); 286 287.Ed 288Note that 289.Dv M_ZERO 290cannot be used when allocating items from a PCPU zone. 291To obtain zeroed memory from a PCPU zone, use the 292.Fn uma_zalloc_pcpu 293function and its variants instead, and pass 294.Dv M_ZERO . 295.It Dv UMA_ZONE_NOTOUCH 296The UMA subsystem may not directly touch (i.e. read or write) the slab memory. 297Otherwise, by default, book-keeping of items within a slab may be done in the 298slab page itself, and 299.Dv INVARIANTS 300kernels may also do use-after-free checking by accessing the slab memory. 301.It Dv UMA_ZONE_ZINIT 302The zone will have its 303.Ft uma_init 304method set to internal method that initializes a new allocated slab 305to all zeros. 306Do not mistake 307.Ft uma_init 308method with 309.Ft uma_ctor . 310A zone with 311.Dv UMA_ZONE_ZINIT 312flag would not return zeroed memory on every 313.Fn uma_zalloc . 314.It Dv UMA_ZONE_NOTPAGE 315An allocator function will be supplied with 316.Fn uma_zone_set_allocf 317and the memory that it returns may not be kernel virtual memory backed by VM 318pages in the page array. 319.It Dv UMA_ZONE_MALLOC 320The zone is for the 321.Xr malloc 9 322subsystem. 323.It Dv UMA_ZONE_VM 324The zone is for the VM subsystem. 325.It Dv UMA_ZONE_NUMA 326The zone should use a first-touch NUMA policy rather than the round-robin 327default. 328If the 329.Dv UMA_FIRSTTOUCH 330kernel option is configured, all zones implicitly use a first-touch policy, 331and the 332.Dv UMA_ZONE_NUMA 333flag has no effect. 334The 335.Dv UMA_XDOMAIN 336kernel option, when configured, causes UMA to do the extra tracking to ensure 337that allocations from first-touch zones are always local. 338Otherwise, consumers that do not free memory on the same domain from which it 339was allocated will cause mixing in per-CPU caches. 340See 341.Xr numa 4 342for more details. 343.It Dv UMA_ZONE_CONTIG 344Items in this zone must be contiguous in physical address space. 345Items will follow normal alignment constraints and may span page boundaries 346between pages with contiguous physical addresses. 347.El 348.Pp 349Zones can be destroyed using 350.Fn uma_zdestroy , 351freeing all memory that is cached in the zone. 352All items allocated from the zone must be freed to the zone before the zone 353may be safely destroyed. 354.Pp 355To allocate an item from a zone, simply call 356.Fn uma_zalloc 357with a pointer to that zone and set the 358.Fa flags 359argument to selected flags as documented in 360.Xr malloc 9 . 361It will return a pointer to an item if successful, or 362.Dv NULL 363in the rare case where all items in the zone are in use and the 364allocator is unable to grow the zone and 365.Dv M_NOWAIT 366is specified. 367.Pp 368Items are released back to the zone from which they were allocated by 369calling 370.Fn uma_zfree 371with a pointer to the zone and a pointer to the item. 372If 373.Fa item 374is 375.Dv NULL , 376then 377.Fn uma_zfree 378does nothing. 379.Pp 380The variants 381.Fn uma_zalloc_arg 382and 383.Fn uma_zfree_arg 384allow callers to 385specify an argument for the 386.Dv ctor 387and 388.Dv dtor 389functions of the zone, respectively. 390The variants 391.Fn uma_zalloc_pcpu 392and 393.Fn uma_zfree_pcpu 394allocate and free 395.Va mp_ncpu 396shadow copies as described for 397.Dv UMA_ZONE_PCPU . 398If 399.Fa item 400is 401.Dv NULL , 402then 403.Fn uma_zfree_pcpu 404does nothing. 405.Pp 406The 407.Fn uma_zalloc_domain 408function allows callers to specify a fixed 409.Xr numa 4 410domain to allocate from. 411This uses a guaranteed but slow path in the allocator which reduces 412concurrency. 413.Pp 414The 415.Fn uma_prealloc 416function allocates slabs for the requested number of items, typically following 417the initial creation of a zone. 418Subsequent allocations from the zone will be satisfied using the pre-allocated 419slabs. 420Note that slab allocation is performed with the 421.Dv M_WAITOK 422flag, so 423.Fn uma_prealloc 424may sleep. 425.Pp 426The 427.Fn uma_zone_reserve 428function sets the number of reserved items for the zone. 429.Fn uma_zalloc 430and variants will ensure that the zone contains at least the reserved number 431of free items. 432Reserved items may be allocated by specifying 433.Dv M_USE_RESERVE 434in the allocation request flags. 435.Fn uma_zone_reserve 436does not perform any pre-allocation by itself. 437.Pp 438The 439.Fn uma_zone_reserve_kva 440function pre-allocates kernel virtual address space for the requested 441number of items. 442Subsequent allocations from the zone will be satisfied using the pre-allocated 443address space. 444Note that unlike 445.Fn uma_zone_reserve , 446.Fn uma_zone_reserve_kva 447does not restrict the use of the pre-allocation to 448.Dv M_USE_RESERVE 449requests. 450.Pp 451The 452.Fn uma_reclaim 453and 454.Fn uma_zone_reclaim 455functions reclaim cached items from UMA zones, releasing unused memory. 456The 457.Fn uma_reclaim 458function reclaims items from all regular zones, while 459.Fn uma_zone_reclaim 460reclaims items only from the specified zone. 461The 462.Fa req 463parameter must be one of three values which specify how aggressively 464items are to be reclaimed: 465.Bl -tag -width indent 466.It Dv UMA_RECLAIM_TRIM 467Reclaim items only in excess of the zone's estimated working set size. 468The working set size is periodically updated and tracks the recent history 469of the zone's usage. 470.It Dv UMA_RECLAIM_DRAIN 471Reclaim all items from the unbounded cache. 472Free items in the per-CPU caches are left alone. 473.It Dv UMA_RECLAIM_DRAIN_CPU 474Reclaim all cached items. 475.El 476The 477.Fn uma_reclaim_domain 478and 479.Fn uma_zone_reclaim_domain 480functions apply only to items allocated from the specified domain. 481In the case of domains using a round-robin NUMA policy, cached items from all 482domains are freed to the keg, but only slabs from the specific domain will 483be freed. 484.Pp 485The 486.Fn uma_zone_set_allocf 487and 488.Fn uma_zone_set_freef 489functions allow a zone's default slab allocation and free functions to be 490overridden. 491This is useful if memory with special constraints such as attributes, 492alignment, or address ranges must be used. 493.Pp 494The 495.Fn uma_zone_set_max 496function limits the number of items 497.Pq and therefore memory 498that can be allocated to 499.Fa zone . 500The 501.Fa nitems 502argument specifies the requested upper limit number of items. 503The effective limit is returned to the caller, as it may end up being higher 504than requested due to the implementation rounding up to ensure all memory pages 505allocated to the zone are utilised to capacity. 506The limit applies to the total number of items in the zone, which includes 507allocated items, free items and free items in the per-cpu caches. 508On systems with more than one CPU it may not be possible to allocate 509the specified number of items even when there is no shortage of memory, 510because all of the remaining free items may be in the caches of the 511other CPUs when the limit is hit. 512.Pp 513The 514.Fn uma_zone_set_maxcache 515function limits the number of free items which may be cached in the zone. 516This limit applies to both the per-CPU caches and the cache of free buckets. 517.Pp 518The 519.Fn uma_zone_get_max 520function returns the effective upper limit number of items for a zone. 521.Pp 522The 523.Fn uma_zone_get_cur 524function returns an approximation of the number of items currently allocated 525from the zone. 526The returned value is approximate because appropriate synchronisation to 527determine an exact value is not performed by the implementation. 528This ensures low overhead at the expense of potentially stale data being used 529in the calculation. 530.Pp 531The 532.Fn uma_zone_set_warning 533function sets a warning that will be printed on the system console when the 534given zone becomes full and fails to allocate an item. 535The warning will be printed no more often than every five minutes. 536Warnings can be turned off globally by setting the 537.Va vm.zone_warnings 538sysctl tunable to 539.Va 0 . 540.Pp 541The 542.Fn uma_zone_set_maxaction 543function sets a function that will be called when the given zone becomes full 544and fails to allocate an item. 545The function will be called with the zone locked. 546Also, the function 547that called the allocation function may have held additional locks. 548Therefore, 549this function should do very little work (similar to a signal handler). 550.Pp 551The 552.Fn SYSCTL_UMA_MAX parent nbr name access zone descr 553macro declares a static 554.Xr sysctl 9 555oid that exports the effective upper limit number of items for a zone. 556The 557.Fa zone 558argument should be a pointer to 559.Vt uma_zone_t . 560A read of the oid returns value obtained through 561.Fn uma_zone_get_max . 562A write to the oid sets new value via 563.Fn uma_zone_set_max . 564The 565.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr 566macro is provided to create this type of oid dynamically. 567.Pp 568The 569.Fn SYSCTL_UMA_CUR parent nbr name access zone descr 570macro declares a static read-only 571.Xr sysctl 9 572oid that exports the approximate current occupancy of the zone. 573The 574.Fa zone 575argument should be a pointer to 576.Vt uma_zone_t . 577A read of the oid returns value obtained through 578.Fn uma_zone_get_cur . 579The 580.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name zone descr 581macro is provided to create this type of oid dynamically. 582.Sh IMPLEMENTATION NOTES 583The memory that these allocation calls return is not executable. 584The 585.Fn uma_zalloc 586function does not support the 587.Dv M_EXEC 588flag to allocate executable memory. 589Not all platforms enforce a distinction between executable and 590non-executable memory. 591.Sh SEE ALSO 592.Xr numa 4 , 593.Xr vmstat 8 , 594.Xr malloc 9 595.Rs 596.%A Jeff Bonwick 597.%T "The Slab Allocator: An Object-Caching Kernel Memory Allocator" 598.%D 1994 599.Re 600.Sh HISTORY 601The zone allocator first appeared in 602.Fx 3.0 . 603It was radically changed in 604.Fx 5.0 605to function as a slab allocator. 606.Sh AUTHORS 607.An -nosplit 608The zone allocator was written by 609.An John S. Dyson . 610The zone allocator was rewritten in large parts by 611.An Jeff Roberson Aq Mt jeff@FreeBSD.org 612to function as a slab allocator. 613.Pp 614This manual page was written by 615.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org . 616Changes for UMA by 617.An Jeroen Ruigrok van der Werven Aq Mt asmodai@FreeBSD.org . 618