1.\"- 2.\" Copyright (c) 2001 Dag-Erling Coïdan Smørgrav 3.\" All rights reserved. 4.\" 5.\" Redistribution and use in source and binary forms, with or without 6.\" modification, are permitted provided that the following conditions 7.\" are met: 8.\" 1. Redistributions of source code must retain the above copyright 9.\" notice, this list of conditions and the following disclaimer. 10.\" 2. Redistributions in binary form must reproduce the above copyright 11.\" notice, this list of conditions and the following disclaimer in the 12.\" documentation and/or other materials provided with the distribution. 13.\" 14.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24.\" SUCH DAMAGE. 25.\" 26.\" $FreeBSD$ 27.\" 28.Dd January 16, 2023 29.Dt UMA 9 30.Os 31.Sh NAME 32.Nm UMA 33.Nd general-purpose kernel object allocator 34.Sh SYNOPSIS 35.In sys/param.h 36.In sys/queue.h 37.In vm/uma.h 38.Bd -literal 39typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags); 40typedef void (*uma_dtor)(void *mem, int size, void *arg); 41typedef int (*uma_init)(void *mem, int size, int flags); 42typedef void (*uma_fini)(void *mem, int size); 43typedef int (*uma_import)(void *arg, void **store, int count, int domain, 44 int flags); 45typedef void (*uma_release)(void *arg, void **store, int count); 46typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain, 47 uint8_t *pflag, int wait); 48typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag); 49 50.Ed 51.Ft uma_zone_t 52.Fo uma_zcreate 53.Fa "char *name" "size_t size" 54.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 55.Fa "int align" "uint16_t flags" 56.Fc 57.Ft uma_zone_t 58.Fo uma_zcache_create 59.Fa "char *name" "int size" 60.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 61.Fa "uma_import zimport" "uma_release zrelease" 62.Fa "void *arg" "int flags" 63.Fc 64.Ft uma_zone_t 65.Fo uma_zsecond_create 66.Fa "char *name" 67.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 68.Fa "uma_zone_t master" 69.Fc 70.Ft void 71.Fn uma_zdestroy "uma_zone_t zone" 72.Ft "void *" 73.Fn uma_zalloc "uma_zone_t zone" "int flags" 74.Ft "void *" 75.Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags" 76.Ft "void *" 77.Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags" 78.Ft "void *" 79.Fn uma_zalloc_pcpu "uma_zone_t zone" "int flags" 80.Ft "void *" 81.Fn uma_zalloc_pcpu_arg "uma_zone_t zone" "void *arg" "int flags" 82.Ft "void *" 83.Fn uma_zalloc_smr "uma_zone_t zone" "int flags" 84.Ft void 85.Fn uma_zfree "uma_zone_t zone" "void *item" 86.Ft void 87.Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg" 88.Ft void 89.Fn uma_zfree_pcpu "uma_zone_t zone" "void *item" 90.Ft void 91.Fn uma_zfree_pcpu_arg "uma_zone_t zone" "void *item" "void *arg" 92.Ft void 93.Fn uma_zfree_smr "uma_zone_t zone" "void *item" 94.Ft void 95.Fn uma_prealloc "uma_zone_t zone" "int nitems" 96.Ft void 97.Fn uma_zone_reserve "uma_zone_t zone" "int nitems" 98.Ft void 99.Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems" 100.Ft void 101.Fn uma_reclaim "int req" 102.Ft void 103.Fn uma_reclaim_domain "int req" "int domain" 104.Ft void 105.Fn uma_zone_reclaim "uma_zone_t zone" "int req" 106.Ft void 107.Fn uma_zone_reclaim_domain "uma_zone_t zone" "int req" "int domain" 108.Ft void 109.Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf" 110.Ft void 111.Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef" 112.Ft int 113.Fn uma_zone_set_max "uma_zone_t zone" "int nitems" 114.Ft void 115.Fn uma_zone_set_maxcache "uma_zone_t zone" "int nitems" 116.Ft int 117.Fn uma_zone_get_max "uma_zone_t zone" 118.Ft int 119.Fn uma_zone_get_cur "uma_zone_t zone" 120.Ft void 121.Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning" 122.Ft void 123.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)" 124.Ft smr_t 125.Fn uma_zone_get_smr "uma_zone_t zone" 126.Ft void 127.Fn uma_zone_set_smr "uma_zone_t zone" "smr_t smr" 128.In sys/sysctl.h 129.Fn SYSCTL_UMA_MAX parent nbr name access zone descr 130.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr 131.Fn SYSCTL_UMA_CUR parent nbr name access zone descr 132.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr 133.Sh DESCRIPTION 134UMA (Universal Memory Allocator) provides an efficient interface for managing 135dynamically-sized collections of items of identical size, referred to as zones. 136Zones keep track of which items are in use and which 137are not, and UMA provides functions for allocating items from a zone and 138for releasing them back, making them available for subsequent allocation requests. 139Zones maintain per-CPU caches with linear scalability on SMP 140systems as well as round-robin and first-touch policies for NUMA 141systems. 142The number of items cached per CPU is bounded, and each zone additionally 143maintains an unbounded cache of items that is used to quickly satisfy 144per-CPU cache allocation misses. 145.Pp 146Two types of zones exist: regular zones and cache zones. 147In a regular zone, items are allocated from a slab, which is one or more 148virtually contiguous memory pages that have been allocated from the kernel's 149page allocator. 150Internally, slabs are managed by a UMA keg, which is responsible for allocating 151slabs and keeping track of their usage by one or more zones. 152In typical usage, there is one keg per zone, so slabs are not shared among 153multiple zones. 154.Pp 155Normal zones import items from a keg, and release items back to that keg if 156requested. 157Cache zones do not have a keg, and instead use custom import and release 158methods. 159For example, some collections of kernel objects are statically allocated 160at boot-time, and the size of the collection does not change. 161A cache zone can be used to implement an efficient allocator for the objects in 162such a collection. 163.Pp 164The 165.Fn uma_zcreate 166and 167.Fn uma_zcache_create 168functions create a new regular zone and cache zone, respectively. 169The 170.Fn uma_zsecond_create 171function creates a regular zone which shares the keg of the zone 172specified by the 173.Fa master 174argument. 175The 176.Fa name 177argument is a text name of the zone for debugging and stats; this memory 178should not be freed until the zone has been deallocated. 179.Pp 180The 181.Fa ctor 182and 183.Fa dtor 184arguments are callback functions that are called by 185the UMA subsystem at the time of the call to 186.Fn uma_zalloc 187and 188.Fn uma_zfree 189respectively. 190Their purpose is to provide hooks for initializing or 191destroying things that need to be done at the time of the allocation 192or release of a resource. 193A good usage for the 194.Fa ctor 195and 196.Fa dtor 197callbacks might be to initialize a data structure embedded in the item, 198such as a 199.Xr queue 3 200head. 201.Pp 202The 203.Fa zinit 204and 205.Fa zfini 206arguments are used to optimize the allocation of items from the zone. 207They are called by the UMA subsystem whenever 208it needs to allocate or free items to satisfy requests or memory pressure. 209A good use for the 210.Fa zinit 211and 212.Fa zfini 213callbacks might be to 214initialize and destroy a mutex contained within an item. 215This would allow one to avoid destroying and re-initializing the mutex 216each time the item is freed and re-allocated. 217They are not called on each call to 218.Fn uma_zalloc 219and 220.Fn uma_zfree 221but rather when an item is imported into a zone's cache, and when a zone 222releases an item to the slab allocator, typically as a response to memory 223pressure. 224.Pp 225For 226.Fn uma_zcache_create , 227the 228.Fa zimport 229and 230.Fa zrelease 231functions are called to import items into the zone and to release items 232from the zone, respectively. 233The 234.Fa zimport 235function should store pointers to items in the 236.Fa store 237array, which contains a maximum of 238.Fa count 239entries. 240The function must return the number of imported items, which may be less than 241the maximum. 242Similarly, the 243.Fa store 244parameter to the 245.Fa zrelease 246function contains an array of 247.Fa count 248pointers to items. 249The 250.Fa arg 251parameter passed to 252.Fn uma_zcache_create 253is provided to the import and release functions. 254The 255.Fa domain 256parameter to 257.Fa zimport 258specifies the requested 259.Xr numa 4 260domain for the allocation. 261It is either a NUMA domain number or the special value 262.Dv UMA_ANYDOMAIN . 263.Pp 264The 265.Fa flags 266argument of 267.Fn uma_zcreate 268and 269.Fn uma_zcache_create 270is a subset of the following flags: 271.Bl -tag -width "foo" 272.It Dv UMA_ZONE_NOFREE 273Slabs allocated to the zone's keg are never freed. 274.It Dv UMA_ZONE_NODUMP 275Pages belonging to the zone will not be included in minidumps. 276.It Dv UMA_ZONE_PCPU 277An allocation from zone would have 278.Va mp_ncpu 279shadow copies, that are privately assigned to CPUs. 280A CPU can address its private copy using base the allocation address plus 281a multiple of the current CPU ID and 282.Fn sizeof "struct pcpu" : 283.Bd -literal -offset indent 284foo_zone = uma_zcreate(..., UMA_ZONE_PCPU); 285 ... 286foo_base = uma_zalloc(foo_zone, ...); 287 ... 288critical_enter(); 289foo_pcpu = (foo_t *)zpcpu_get(foo_base); 290/* do something with foo_pcpu */ 291critical_exit(); 292 293.Ed 294Note that 295.Dv M_ZERO 296cannot be used when allocating items from a PCPU zone. 297To obtain zeroed memory from a PCPU zone, use the 298.Fn uma_zalloc_pcpu 299function and its variants instead, and pass 300.Dv M_ZERO . 301.It Dv UMA_ZONE_NOTOUCH 302The UMA subsystem may not directly touch (i.e. read or write) the slab memory. 303Otherwise, by default, book-keeping of items within a slab may be done in the 304slab page itself, and 305.Dv INVARIANTS 306kernels may also do use-after-free checking by accessing the slab memory. 307.It Dv UMA_ZONE_ZINIT 308The zone will have its 309.Ft uma_init 310method set to internal method that initializes a new allocated slab 311to all zeros. 312Do not mistake 313.Ft uma_init 314method with 315.Ft uma_ctor . 316A zone with 317.Dv UMA_ZONE_ZINIT 318flag would not return zeroed memory on every 319.Fn uma_zalloc . 320.It Dv UMA_ZONE_NOTPAGE 321An allocator function will be supplied with 322.Fn uma_zone_set_allocf 323and the memory that it returns may not be kernel virtual memory backed by VM 324pages in the page array. 325.It Dv UMA_ZONE_MALLOC 326The zone is for the 327.Xr malloc 9 328subsystem. 329.It Dv UMA_ZONE_VM 330The zone is for the VM subsystem. 331.It Dv UMA_ZONE_CONTIG 332Items in this zone must be contiguous in physical address space. 333Items will follow normal alignment constraints and may span page boundaries 334between pages with contiguous physical addresses. 335.It Dv UMA_ZONE_UNMANAGED 336By default, UMA zone caches are shrunk to help resolve free page shortages. 337Cached items that have not been used for a long period may also be freed from 338zone. 339When this flag is set, the system will not reclaim memory from the zone's 340caches. 341.It Dv UMA_ZONE_SMR 342Create a zone whose items will be synchronized using the 343.Xr smr 9 344mechanism. 345Upon creation the zone will have an associated 346.Dt smr_t 347structure which can be fetched using 348.Fn uma_zone_get_smr . 349.El 350.Pp 351Zones can be destroyed using 352.Fn uma_zdestroy , 353freeing all memory that is cached in the zone. 354All items allocated from the zone must be freed to the zone before the zone 355may be safely destroyed. 356.Pp 357To allocate an item from a zone, simply call 358.Fn uma_zalloc 359with a pointer to that zone and set the 360.Fa flags 361argument to selected flags as documented in 362.Xr malloc 9 . 363It will return a pointer to an item if successful, or 364.Dv NULL 365in the rare case where all items in the zone are in use and the 366allocator is unable to grow the zone and 367.Dv M_NOWAIT 368is specified. 369.Pp 370Items are released back to the zone from which they were allocated by 371calling 372.Fn uma_zfree 373with a pointer to the zone and a pointer to the item. 374If 375.Fa item 376is 377.Dv NULL , 378then 379.Fn uma_zfree 380does nothing. 381.Pp 382The variants 383.Fn uma_zalloc_arg 384and 385.Fn uma_zfree_arg 386allow callers to 387specify an argument for the 388.Dv ctor 389and 390.Dv dtor 391functions of the zone, respectively. 392The variants 393.Fn uma_zalloc_pcpu 394and 395.Fn uma_zfree_pcpu 396allocate and free 397.Va mp_ncpu 398shadow copies as described for 399.Dv UMA_ZONE_PCPU . 400If 401.Fa item 402is 403.Dv NULL , 404then 405.Fn uma_zfree_pcpu 406does nothing. 407.Pp 408The 409.Fn uma_zalloc_smr 410and 411.Fn uma_zfree_smr 412functions allocate and free items from an SMR-enabled zone, that is, 413a zone created with 414.Dv UMA_ZONE_SMR 415or a zone that has had 416.Fn uma_zone_set_smr 417called. 418.Pp 419The 420.Fn uma_zalloc_domain 421function allows callers to specify a fixed 422.Xr numa 4 423domain to allocate from. 424This uses a guaranteed but slow path in the allocator which reduces 425concurrency. 426.Pp 427The 428.Fn uma_prealloc 429function allocates slabs for the requested number of items, typically following 430the initial creation of a zone. 431Subsequent allocations from the zone will be satisfied using the pre-allocated 432slabs. 433Note that slab allocation is performed with the 434.Dv M_WAITOK 435flag, so 436.Fn uma_prealloc 437may sleep. 438.Pp 439The 440.Fn uma_zone_reserve 441function sets the number of reserved items for the zone. 442.Fn uma_zalloc 443and variants will ensure that the zone contains at least the reserved number 444of free items. 445Reserved items may be allocated by specifying 446.Dv M_USE_RESERVE 447in the allocation request flags. 448.Fn uma_zone_reserve 449does not perform any pre-allocation by itself. 450.Pp 451The 452.Fn uma_zone_reserve_kva 453function pre-allocates kernel virtual address space for the requested 454number of items. 455Subsequent allocations from the zone will be satisfied using the pre-allocated 456address space. 457Note that unlike 458.Fn uma_zone_reserve , 459.Fn uma_zone_reserve_kva 460does not restrict the use of the pre-allocation to 461.Dv M_USE_RESERVE 462requests. 463.Pp 464The 465.Fn uma_reclaim 466and 467.Fn uma_zone_reclaim 468functions reclaim cached items from UMA zones, releasing unused memory. 469The 470.Fn uma_reclaim 471function reclaims items from all regular zones, while 472.Fn uma_zone_reclaim 473reclaims items only from the specified zone. 474The 475.Fa req 476parameter must be one of three values which specify how aggressively 477items are to be reclaimed: 478.Bl -tag -width indent 479.It Dv UMA_RECLAIM_TRIM 480Reclaim items only in excess of the zone's estimated working set size. 481The working set size is periodically updated and tracks the recent history 482of the zone's usage. 483.It Dv UMA_RECLAIM_DRAIN 484Reclaim all items from the unbounded cache. 485Free items in the per-CPU caches are left alone. 486.It Dv UMA_RECLAIM_DRAIN_CPU 487Reclaim all cached items. 488.El 489The 490.Fn uma_reclaim_domain 491and 492.Fn uma_zone_reclaim_domain 493functions apply only to items allocated from the specified domain. 494In the case of domains using a round-robin NUMA policy, cached items from all 495domains are freed to the keg, but only slabs from the specific domain will 496be freed. 497.Pp 498The 499.Fn uma_zone_set_allocf 500and 501.Fn uma_zone_set_freef 502functions allow a zone's default slab allocation and free functions to be 503overridden. 504This is useful if memory with special constraints such as attributes, 505alignment, or address ranges must be used. 506.Pp 507The 508.Fn uma_zone_set_max 509function limits the number of items 510.Pq and therefore memory 511that can be allocated to 512.Fa zone . 513The 514.Fa nitems 515argument specifies the requested upper limit number of items. 516The effective limit is returned to the caller, as it may end up being higher 517than requested due to the implementation rounding up to ensure all memory pages 518allocated to the zone are utilised to capacity. 519The limit applies to the total number of items in the zone, which includes 520allocated items, free items and free items in the per-cpu caches. 521On systems with more than one CPU it may not be possible to allocate 522the specified number of items even when there is no shortage of memory, 523because all of the remaining free items may be in the caches of the 524other CPUs when the limit is hit. 525.Pp 526The 527.Fn uma_zone_set_maxcache 528function limits the number of free items which may be cached in the zone. 529This limit applies to both the per-CPU caches and the cache of free buckets. 530.Pp 531The 532.Fn uma_zone_get_max 533function returns the effective upper limit number of items for a zone. 534.Pp 535The 536.Fn uma_zone_get_cur 537function returns an approximation of the number of items currently allocated 538from the zone. 539The returned value is approximate because appropriate synchronisation to 540determine an exact value is not performed by the implementation. 541This ensures low overhead at the expense of potentially stale data being used 542in the calculation. 543.Pp 544The 545.Fn uma_zone_set_warning 546function sets a warning that will be printed on the system console when the 547given zone becomes full and fails to allocate an item. 548The warning will be printed no more often than every five minutes. 549Warnings can be turned off globally by setting the 550.Va vm.zone_warnings 551sysctl tunable to 552.Va 0 . 553.Pp 554The 555.Fn uma_zone_set_maxaction 556function sets a function that will be called when the given zone becomes full 557and fails to allocate an item. 558The function will be called with the zone locked. 559Also, the function 560that called the allocation function may have held additional locks. 561Therefore, 562this function should do very little work (similar to a signal handler). 563.Pp 564The 565.Fn uma_zone_set_smr 566function associates an existing 567.Xr smr 9 568structure with a UMA zone. 569The effect is similar to creating a zone with the 570.Dv UMA_ZONE_SMR 571flag, except that a new SMR structure is not created. 572This function must be called before any allocations from the zone are performed. 573.Pp 574The 575.Fn SYSCTL_UMA_MAX parent nbr name access zone descr 576macro declares a static 577.Xr sysctl 9 578oid that exports the effective upper limit number of items for a zone. 579The 580.Fa zone 581argument should be a pointer to 582.Vt uma_zone_t . 583A read of the oid returns value obtained through 584.Fn uma_zone_get_max . 585A write to the oid sets new value via 586.Fn uma_zone_set_max . 587The 588.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr 589macro is provided to create this type of oid dynamically. 590.Pp 591The 592.Fn SYSCTL_UMA_CUR parent nbr name access zone descr 593macro declares a static read-only 594.Xr sysctl 9 595oid that exports the approximate current occupancy of the zone. 596The 597.Fa zone 598argument should be a pointer to 599.Vt uma_zone_t . 600A read of the oid returns value obtained through 601.Fn uma_zone_get_cur . 602The 603.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name zone descr 604macro is provided to create this type of oid dynamically. 605.Sh IMPLEMENTATION NOTES 606The memory that these allocation calls return is not executable. 607The 608.Fn uma_zalloc 609function does not support the 610.Dv M_EXEC 611flag to allocate executable memory. 612Not all platforms enforce a distinction between executable and 613non-executable memory. 614.Sh SEE ALSO 615.Xr numa 4 , 616.Xr vmstat 8 , 617.Xr malloc 9 , 618.Xr smr 9 619.Rs 620.%A Jeff Bonwick 621.%T "The Slab Allocator: An Object-Caching Kernel Memory Allocator" 622.%D 1994 623.Re 624.Sh HISTORY 625The zone allocator first appeared in 626.Fx 3.0 . 627It was radically changed in 628.Fx 5.0 629to function as a slab allocator. 630.Sh AUTHORS 631.An -nosplit 632The zone allocator was written by 633.An John S. Dyson . 634The zone allocator was rewritten in large parts by 635.An Jeff Roberson Aq Mt jeff@FreeBSD.org 636to function as a slab allocator. 637.Pp 638This manual page was written by 639.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org . 640Changes for UMA by 641.An Jeroen Ruigrok van der Werven Aq Mt asmodai@FreeBSD.org . 642