1.\"- 2.\" Copyright (c) 2001 Dag-Erling Smørgrav 3.\" All rights reserved. 4.\" 5.\" Redistribution and use in source and binary forms, with or without 6.\" modification, are permitted provided that the following conditions 7.\" are met: 8.\" 1. Redistributions of source code must retain the above copyright 9.\" notice, this list of conditions and the following disclaimer. 10.\" 2. Redistributions in binary form must reproduce the above copyright 11.\" notice, this list of conditions and the following disclaimer in the 12.\" documentation and/or other materials provided with the distribution. 13.\" 14.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24.\" SUCH DAMAGE. 25.\" 26.Dd January 16, 2023 27.Dt UMA 9 28.Os 29.Sh NAME 30.Nm UMA 31.Nd general-purpose kernel object allocator 32.Sh SYNOPSIS 33.In sys/param.h 34.In sys/queue.h 35.In vm/uma.h 36.Bd -literal 37typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags); 38typedef void (*uma_dtor)(void *mem, int size, void *arg); 39typedef int (*uma_init)(void *mem, int size, int flags); 40typedef void (*uma_fini)(void *mem, int size); 41typedef int (*uma_import)(void *arg, void **store, int count, int domain, 42 int flags); 43typedef void (*uma_release)(void *arg, void **store, int count); 44typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain, 45 uint8_t *pflag, int wait); 46typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag); 47 48.Ed 49.Ft uma_zone_t 50.Fo uma_zcreate 51.Fa "char *name" "size_t size" 52.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 53.Fa "int align" "uint16_t flags" 54.Fc 55.Ft uma_zone_t 56.Fo uma_zcache_create 57.Fa "char *name" "int size" 58.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 59.Fa "uma_import zimport" "uma_release zrelease" 60.Fa "void *arg" "int flags" 61.Fc 62.Ft uma_zone_t 63.Fo uma_zsecond_create 64.Fa "char *name" 65.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 66.Fa "uma_zone_t master" 67.Fc 68.Ft void 69.Fn uma_zdestroy "uma_zone_t zone" 70.Ft "void *" 71.Fn uma_zalloc "uma_zone_t zone" "int flags" 72.Ft "void *" 73.Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags" 74.Ft "void *" 75.Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags" 76.Ft "void *" 77.Fn uma_zalloc_pcpu "uma_zone_t zone" "int flags" 78.Ft "void *" 79.Fn uma_zalloc_pcpu_arg "uma_zone_t zone" "void *arg" "int flags" 80.Ft "void *" 81.Fn uma_zalloc_smr "uma_zone_t zone" "int flags" 82.Ft void 83.Fn uma_zfree "uma_zone_t zone" "void *item" 84.Ft void 85.Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg" 86.Ft void 87.Fn uma_zfree_pcpu "uma_zone_t zone" "void *item" 88.Ft void 89.Fn uma_zfree_pcpu_arg "uma_zone_t zone" "void *item" "void *arg" 90.Ft void 91.Fn uma_zfree_smr "uma_zone_t zone" "void *item" 92.Ft void 93.Fn uma_prealloc "uma_zone_t zone" "int nitems" 94.Ft void 95.Fn uma_zone_reserve "uma_zone_t zone" "int nitems" 96.Ft void 97.Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems" 98.Ft void 99.Fn uma_reclaim "int req" 100.Ft void 101.Fn uma_reclaim_domain "int req" "int domain" 102.Ft void 103.Fn uma_zone_reclaim "uma_zone_t zone" "int req" 104.Ft void 105.Fn uma_zone_reclaim_domain "uma_zone_t zone" "int req" "int domain" 106.Ft void 107.Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf" 108.Ft void 109.Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef" 110.Ft int 111.Fn uma_zone_set_max "uma_zone_t zone" "int nitems" 112.Ft void 113.Fn uma_zone_set_maxcache "uma_zone_t zone" "int nitems" 114.Ft int 115.Fn uma_zone_get_max "uma_zone_t zone" 116.Ft int 117.Fn uma_zone_get_cur "uma_zone_t zone" 118.Ft void 119.Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning" 120.Ft void 121.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)" 122.Ft smr_t 123.Fn uma_zone_get_smr "uma_zone_t zone" 124.Ft void 125.Fn uma_zone_set_smr "uma_zone_t zone" "smr_t smr" 126.In sys/sysctl.h 127.Fn SYSCTL_UMA_MAX parent nbr name access zone descr 128.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr 129.Fn SYSCTL_UMA_CUR parent nbr name access zone descr 130.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr 131.Sh DESCRIPTION 132UMA (Universal Memory Allocator) provides an efficient interface for managing 133dynamically-sized collections of items of identical size, referred to as zones. 134Zones keep track of which items are in use and which 135are not, and UMA provides functions for allocating items from a zone and 136for releasing them back, making them available for subsequent allocation requests. 137Zones maintain per-CPU caches with linear scalability on SMP 138systems as well as round-robin and first-touch policies for NUMA 139systems. 140The number of items cached per CPU is bounded, and each zone additionally 141maintains an unbounded cache of items that is used to quickly satisfy 142per-CPU cache allocation misses. 143.Pp 144Two types of zones exist: regular zones and cache zones. 145In a regular zone, items are allocated from a slab, which is one or more 146virtually contiguous memory pages that have been allocated from the kernel's 147page allocator. 148Internally, slabs are managed by a UMA keg, which is responsible for allocating 149slabs and keeping track of their usage by one or more zones. 150In typical usage, there is one keg per zone, so slabs are not shared among 151multiple zones. 152.Pp 153Normal zones import items from a keg, and release items back to that keg if 154requested. 155Cache zones do not have a keg, and instead use custom import and release 156methods. 157For example, some collections of kernel objects are statically allocated 158at boot-time, and the size of the collection does not change. 159A cache zone can be used to implement an efficient allocator for the objects in 160such a collection. 161.Pp 162The 163.Fn uma_zcreate 164and 165.Fn uma_zcache_create 166functions create a new regular zone and cache zone, respectively. 167The 168.Fn uma_zsecond_create 169function creates a regular zone which shares the keg of the zone 170specified by the 171.Fa master 172argument. 173The 174.Fa name 175argument is a text name of the zone for debugging and stats; this memory 176should not be freed until the zone has been deallocated. 177.Pp 178The 179.Fa ctor 180and 181.Fa dtor 182arguments are callback functions that are called by 183the UMA subsystem at the time of the call to 184.Fn uma_zalloc 185and 186.Fn uma_zfree 187respectively. 188Their purpose is to provide hooks for initializing or 189destroying things that need to be done at the time of the allocation 190or release of a resource. 191A good usage for the 192.Fa ctor 193and 194.Fa dtor 195callbacks might be to initialize a data structure embedded in the item, 196such as a 197.Xr queue 3 198head. 199.Pp 200The 201.Fa zinit 202and 203.Fa zfini 204arguments are used to optimize the allocation of items from the zone. 205They are called by the UMA subsystem whenever 206it needs to allocate or free items to satisfy requests or memory pressure. 207A good use for the 208.Fa zinit 209and 210.Fa zfini 211callbacks might be to 212initialize and destroy a mutex contained within an item. 213This would allow one to avoid destroying and re-initializing the mutex 214each time the item is freed and re-allocated. 215They are not called on each call to 216.Fn uma_zalloc 217and 218.Fn uma_zfree 219but rather when an item is imported into a zone's cache, and when a zone 220releases an item to the slab allocator, typically as a response to memory 221pressure. 222.Pp 223For 224.Fn uma_zcache_create , 225the 226.Fa zimport 227and 228.Fa zrelease 229functions are called to import items into the zone and to release items 230from the zone, respectively. 231The 232.Fa zimport 233function should store pointers to items in the 234.Fa store 235array, which contains a maximum of 236.Fa count 237entries. 238The function must return the number of imported items, which may be less than 239the maximum. 240Similarly, the 241.Fa store 242parameter to the 243.Fa zrelease 244function contains an array of 245.Fa count 246pointers to items. 247The 248.Fa arg 249parameter passed to 250.Fn uma_zcache_create 251is provided to the import and release functions. 252The 253.Fa domain 254parameter to 255.Fa zimport 256specifies the requested 257.Xr numa 4 258domain for the allocation. 259It is either a NUMA domain number or the special value 260.Dv UMA_ANYDOMAIN . 261.Pp 262The 263.Fa flags 264argument of 265.Fn uma_zcreate 266and 267.Fn uma_zcache_create 268is a subset of the following flags: 269.Bl -tag -width "foo" 270.It Dv UMA_ZONE_NOFREE 271Slabs allocated to the zone's keg are never freed. 272.It Dv UMA_ZONE_NODUMP 273Pages belonging to the zone will not be included in minidumps. 274.It Dv UMA_ZONE_PCPU 275An allocation from zone would have 276.Va mp_ncpu 277shadow copies, that are privately assigned to CPUs. 278A CPU can address its private copy using base the allocation address plus 279a multiple of the current CPU ID and 280.Fn sizeof "struct pcpu" : 281.Bd -literal -offset indent 282foo_zone = uma_zcreate(..., UMA_ZONE_PCPU); 283 ... 284foo_base = uma_zalloc(foo_zone, ...); 285 ... 286critical_enter(); 287foo_pcpu = (foo_t *)zpcpu_get(foo_base); 288/* do something with foo_pcpu */ 289critical_exit(); 290 291.Ed 292Note that 293.Dv M_ZERO 294cannot be used when allocating items from a PCPU zone. 295To obtain zeroed memory from a PCPU zone, use the 296.Fn uma_zalloc_pcpu 297function and its variants instead, and pass 298.Dv M_ZERO . 299.It Dv UMA_ZONE_NOTOUCH 300The UMA subsystem may not directly touch (i.e. read or write) the slab memory. 301Otherwise, by default, book-keeping of items within a slab may be done in the 302slab page itself, and 303.Dv INVARIANTS 304kernels may also do use-after-free checking by accessing the slab memory. 305.It Dv UMA_ZONE_ZINIT 306The zone will have its 307.Ft uma_init 308method set to internal method that initializes a new allocated slab 309to all zeros. 310Do not mistake 311.Ft uma_init 312method with 313.Ft uma_ctor . 314A zone with 315.Dv UMA_ZONE_ZINIT 316flag would not return zeroed memory on every 317.Fn uma_zalloc . 318.It Dv UMA_ZONE_NOTPAGE 319An allocator function will be supplied with 320.Fn uma_zone_set_allocf 321and the memory that it returns may not be kernel virtual memory backed by VM 322pages in the page array. 323.It Dv UMA_ZONE_MALLOC 324The zone is for the 325.Xr malloc 9 326subsystem. 327.It Dv UMA_ZONE_VM 328The zone is for the VM subsystem. 329.It Dv UMA_ZONE_CONTIG 330Items in this zone must be contiguous in physical address space. 331Items will follow normal alignment constraints and may span page boundaries 332between pages with contiguous physical addresses. 333.It Dv UMA_ZONE_UNMANAGED 334By default, UMA zone caches are shrunk to help resolve free page shortages. 335Cached items that have not been used for a long period may also be freed from 336zone. 337When this flag is set, the system will not reclaim memory from the zone's 338caches. 339.It Dv UMA_ZONE_SMR 340Create a zone whose items will be synchronized using the 341.Xr smr 9 342mechanism. 343Upon creation the zone will have an associated 344.Dt smr_t 345structure which can be fetched using 346.Fn uma_zone_get_smr . 347.El 348.Pp 349Zones can be destroyed using 350.Fn uma_zdestroy , 351freeing all memory that is cached in the zone. 352All items allocated from the zone must be freed to the zone before the zone 353may be safely destroyed. 354.Pp 355To allocate an item from a zone, simply call 356.Fn uma_zalloc 357with a pointer to that zone and set the 358.Fa flags 359argument to selected flags as documented in 360.Xr malloc 9 . 361It will return a pointer to an item if successful, or 362.Dv NULL 363in the rare case where all items in the zone are in use and the 364allocator is unable to grow the zone and 365.Dv M_NOWAIT 366is specified. 367.Pp 368Items are released back to the zone from which they were allocated by 369calling 370.Fn uma_zfree 371with a pointer to the zone and a pointer to the item. 372If 373.Fa item 374is 375.Dv NULL , 376then 377.Fn uma_zfree 378does nothing. 379.Pp 380The variants 381.Fn uma_zalloc_arg 382and 383.Fn uma_zfree_arg 384allow callers to 385specify an argument for the 386.Dv ctor 387and 388.Dv dtor 389functions of the zone, respectively. 390The variants 391.Fn uma_zalloc_pcpu 392and 393.Fn uma_zfree_pcpu 394allocate and free 395.Va mp_ncpu 396shadow copies as described for 397.Dv UMA_ZONE_PCPU . 398If 399.Fa item 400is 401.Dv NULL , 402then 403.Fn uma_zfree_pcpu 404does nothing. 405.Pp 406The 407.Fn uma_zalloc_smr 408and 409.Fn uma_zfree_smr 410functions allocate and free items from an SMR-enabled zone, that is, 411a zone created with 412.Dv UMA_ZONE_SMR 413or a zone that has had 414.Fn uma_zone_set_smr 415called. 416.Pp 417The 418.Fn uma_zalloc_domain 419function allows callers to specify a fixed 420.Xr numa 4 421domain to allocate from. 422This uses a guaranteed but slow path in the allocator which reduces 423concurrency. 424.Pp 425The 426.Fn uma_prealloc 427function allocates slabs for the requested number of items, typically following 428the initial creation of a zone. 429Subsequent allocations from the zone will be satisfied using the pre-allocated 430slabs. 431Note that slab allocation is performed with the 432.Dv M_WAITOK 433flag, so 434.Fn uma_prealloc 435may sleep. 436.Pp 437The 438.Fn uma_zone_reserve 439function sets the number of reserved items for the zone. 440.Fn uma_zalloc 441and variants will ensure that the zone contains at least the reserved number 442of free items. 443Reserved items may be allocated by specifying 444.Dv M_USE_RESERVE 445in the allocation request flags. 446.Fn uma_zone_reserve 447does not perform any pre-allocation by itself. 448.Pp 449The 450.Fn uma_zone_reserve_kva 451function pre-allocates kernel virtual address space for the requested 452number of items. 453Subsequent allocations from the zone will be satisfied using the pre-allocated 454address space. 455Note that unlike 456.Fn uma_zone_reserve , 457.Fn uma_zone_reserve_kva 458does not restrict the use of the pre-allocation to 459.Dv M_USE_RESERVE 460requests. 461.Pp 462The 463.Fn uma_reclaim 464and 465.Fn uma_zone_reclaim 466functions reclaim cached items from UMA zones, releasing unused memory. 467The 468.Fn uma_reclaim 469function reclaims items from all regular zones, while 470.Fn uma_zone_reclaim 471reclaims items only from the specified zone. 472The 473.Fa req 474parameter must be one of three values which specify how aggressively 475items are to be reclaimed: 476.Bl -tag -width indent 477.It Dv UMA_RECLAIM_TRIM 478Reclaim items only in excess of the zone's estimated working set size. 479The working set size is periodically updated and tracks the recent history 480of the zone's usage. 481.It Dv UMA_RECLAIM_DRAIN 482Reclaim all items from the unbounded cache. 483Free items in the per-CPU caches are left alone. 484.It Dv UMA_RECLAIM_DRAIN_CPU 485Reclaim all cached items. 486.El 487The 488.Fn uma_reclaim_domain 489and 490.Fn uma_zone_reclaim_domain 491functions apply only to items allocated from the specified domain. 492In the case of domains using a round-robin NUMA policy, cached items from all 493domains are freed to the keg, but only slabs from the specific domain will 494be freed. 495.Pp 496The 497.Fn uma_zone_set_allocf 498and 499.Fn uma_zone_set_freef 500functions allow a zone's default slab allocation and free functions to be 501overridden. 502This is useful if memory with special constraints such as attributes, 503alignment, or address ranges must be used. 504.Pp 505The 506.Fn uma_zone_set_max 507function limits the number of items 508.Pq and therefore memory 509that can be allocated to 510.Fa zone . 511The 512.Fa nitems 513argument specifies the requested upper limit number of items. 514The effective limit is returned to the caller, as it may end up being higher 515than requested due to the implementation rounding up to ensure all memory pages 516allocated to the zone are utilised to capacity. 517The limit applies to the total number of items in the zone, which includes 518allocated items, free items and free items in the per-cpu caches. 519On systems with more than one CPU it may not be possible to allocate 520the specified number of items even when there is no shortage of memory, 521because all of the remaining free items may be in the caches of the 522other CPUs when the limit is hit. 523.Pp 524The 525.Fn uma_zone_set_maxcache 526function limits the number of free items which may be cached in the zone. 527This limit applies to both the per-CPU caches and the cache of free buckets. 528.Pp 529The 530.Fn uma_zone_get_max 531function returns the effective upper limit number of items for a zone. 532.Pp 533The 534.Fn uma_zone_get_cur 535function returns an approximation of the number of items currently allocated 536from the zone. 537The returned value is approximate because appropriate synchronisation to 538determine an exact value is not performed by the implementation. 539This ensures low overhead at the expense of potentially stale data being used 540in the calculation. 541.Pp 542The 543.Fn uma_zone_set_warning 544function sets a warning that will be printed on the system console when the 545given zone becomes full and fails to allocate an item. 546The warning will be printed no more often than every five minutes. 547Warnings can be turned off globally by setting the 548.Va vm.zone_warnings 549sysctl tunable to 550.Va 0 . 551.Pp 552The 553.Fn uma_zone_set_maxaction 554function sets a function that will be called when the given zone becomes full 555and fails to allocate an item. 556The function will be called with the zone locked. 557Also, the function 558that called the allocation function may have held additional locks. 559Therefore, 560this function should do very little work (similar to a signal handler). 561.Pp 562The 563.Fn uma_zone_set_smr 564function associates an existing 565.Xr smr 9 566structure with a UMA zone. 567The effect is similar to creating a zone with the 568.Dv UMA_ZONE_SMR 569flag, except that a new SMR structure is not created. 570This function must be called before any allocations from the zone are performed. 571.Pp 572The 573.Fn SYSCTL_UMA_MAX parent nbr name access zone descr 574macro declares a static 575.Xr sysctl 9 576oid that exports the effective upper limit number of items for a zone. 577The 578.Fa zone 579argument should be a pointer to 580.Vt uma_zone_t . 581A read of the oid returns value obtained through 582.Fn uma_zone_get_max . 583A write to the oid sets new value via 584.Fn uma_zone_set_max . 585The 586.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr 587macro is provided to create this type of oid dynamically. 588.Pp 589The 590.Fn SYSCTL_UMA_CUR parent nbr name access zone descr 591macro declares a static read-only 592.Xr sysctl 9 593oid that exports the approximate current occupancy of the zone. 594The 595.Fa zone 596argument should be a pointer to 597.Vt uma_zone_t . 598A read of the oid returns value obtained through 599.Fn uma_zone_get_cur . 600The 601.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name zone descr 602macro is provided to create this type of oid dynamically. 603.Sh IMPLEMENTATION NOTES 604The memory that these allocation calls return is not executable. 605The 606.Fn uma_zalloc 607function does not support the 608.Dv M_EXEC 609flag to allocate executable memory. 610Not all platforms enforce a distinction between executable and 611non-executable memory. 612.Sh SEE ALSO 613.Xr numa 4 , 614.Xr vmstat 8 , 615.Xr malloc 9 , 616.Xr smr 9 617.Rs 618.%A Jeff Bonwick 619.%T "The Slab Allocator: An Object-Caching Kernel Memory Allocator" 620.%D 1994 621.Re 622.Sh HISTORY 623The zone allocator first appeared in 624.Fx 3.0 . 625It was radically changed in 626.Fx 5.0 627to function as a slab allocator. 628.Sh AUTHORS 629.An -nosplit 630The zone allocator was written by 631.An John S. Dyson . 632The zone allocator was rewritten in large parts by 633.An Jeff Roberson Aq Mt jeff@FreeBSD.org 634to function as a slab allocator. 635.Pp 636This manual page was written by 637.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org . 638Changes for UMA by 639.An Jeroen Ruigrok van der Werven Aq Mt asmodai@FreeBSD.org . 640