1.\"- 2.\" Copyright (c) 2001 Dag-Erling Coïdan Smørgrav 3.\" All rights reserved. 4.\" 5.\" Redistribution and use in source and binary forms, with or without 6.\" modification, are permitted provided that the following conditions 7.\" are met: 8.\" 1. Redistributions of source code must retain the above copyright 9.\" notice, this list of conditions and the following disclaimer. 10.\" 2. Redistributions in binary form must reproduce the above copyright 11.\" notice, this list of conditions and the following disclaimer in the 12.\" documentation and/or other materials provided with the distribution. 13.\" 14.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24.\" SUCH DAMAGE. 25.\" 26.\" $FreeBSD$ 27.\" 28.Dd August 30, 2019 29.Dt UMA 9 30.Os 31.Sh NAME 32.Nm UMA 33.Nd general-purpose kernel object allocator 34.Sh SYNOPSIS 35.In sys/param.h 36.In sys/queue.h 37.In vm/uma.h 38.Cd "options UMA_FIRSTTOUCH" 39.Cd "options UMA_XDOMAIN" 40.Bd -literal 41typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags); 42typedef void (*uma_dtor)(void *mem, int size, void *arg); 43typedef int (*uma_init)(void *mem, int size, int flags); 44typedef void (*uma_fini)(void *mem, int size); 45typedef int (*uma_import)(void *arg, void **store, int count, int domain, 46 int flags); 47typedef void (*uma_release)(void *arg, void **store, int count); 48typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain, 49 uint8_t *pflag, int wait); 50typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag); 51 52.Ed 53.Ft uma_zone_t 54.Fo uma_zcreate 55.Fa "char *name" "int size" 56.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 57.Fa "int align" "uint16_t flags" 58.Fc 59.Ft uma_zone_t 60.Fo uma_zcache_create 61.Fa "char *name" "int size" 62.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 63.Fa "uma_import zimport" "uma_release zrelease" 64.Fa "void *arg" "int flags" 65.Fc 66.Ft uma_zone_t 67.Fo uma_zsecond_create 68.Fa "char *name" 69.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 70.Fa "uma_zone_t master" 71.Fc 72.Ft void 73.Fn uma_zdestroy "uma_zone_t zone" 74.Ft "void *" 75.Fn uma_zalloc "uma_zone_t zone" "int flags" 76.Ft "void *" 77.Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags" 78.Ft "void *" 79.Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags" 80.Ft "void *" 81.Fn uma_zalloc_pcpu "uma_zone_t zone" "int flags" 82.Ft "void *" 83.Fn uma_zalloc_pcpu_arg "uma_zone_t zone" "void *arg" "int flags" 84.Ft void 85.Fn uma_zfree "uma_zone_t zone" "void *item" 86.Ft void 87.Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg" 88.Ft void 89.Fn uma_zfree_domain "uma_zone_t zone" "void *item" "void *arg" 90.Ft void 91.Fn uma_zfree_pcpu "uma_zone_t zone" "void *item" 92.Ft void 93.Fn uma_zfree_pcpu_arg "uma_zone_t zone" "void *item" "void *arg" 94.Ft void 95.Fn uma_prealloc "uma_zone_t zone" "int nitems" 96.Ft void 97.Fn uma_zone_reserve "uma_zone_t zone" "int nitems" 98.Ft void 99.Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems" 100.Ft void 101.Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf" 102.Ft void 103.Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef" 104.Ft int 105.Fn uma_zone_set_max "uma_zone_t zone" "int nitems" 106.Ft int 107.Fn uma_zone_set_maxcache "uma_zone_t zone" "int nitems" 108.Ft int 109.Fn uma_zone_get_max "uma_zone_t zone" 110.Ft int 111.Fn uma_zone_get_cur "uma_zone_t zone" 112.Ft void 113.Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning" 114.Ft void 115.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)" 116.Ft void 117.Fn uma_reclaim 118.In sys/sysctl.h 119.Fn SYSCTL_UMA_MAX parent nbr name access zone descr 120.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr 121.Fn SYSCTL_UMA_CUR parent nbr name access zone descr 122.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr 123.Sh DESCRIPTION 124UMA (Universal Memory Allocator) provides an efficient interface for managing 125dynamically-sized collections of items of identical size, referred to as zones. 126Zones keep track of which items are in use and which 127are not, and UMA provides functions for allocating items from a zone and 128for releasing them back, making them available for subsequent allocation requests. 129Zones maintain per-CPU caches with linear scalability on SMP 130systems as well as round-robin and first-touch policies for NUMA 131systems. 132The number of items cached per CPU is bounded, and each zone additionally 133maintains an unbounded cache of items that is used to quickly satisfy 134per-CPU cache allocation misses. 135.Pp 136Two types of zones exist: regular zones and cache zones. 137In a regular zone, items are allocated from a slab, which is one or more 138virtually contiguous memory pages that have been allocated from the kernel's 139page allocator. 140Internally, slabs are managed by a UMA keg, which is responsible for allocating 141slabs and keeping track of their usage by one or more zones. 142In typical usage, there is one keg per zone, so slabs are not shared among 143multiple zones. 144.Pp 145Normal zones import items from a keg, and release items back to that keg if 146requested. 147Cache zones do not have a keg, and instead use custom import and release 148methods. 149For example, some collections of kernel objects are statically allocated 150at boot-time, and the size of the collection does not change. 151A cache zone can be used to implement an efficient allocator for the objects in 152such a collection. 153.Pp 154The 155.Fn uma_zcreate 156and 157.Fn uma_zcache_create 158functions create a new regular zone and cache zone, respectively. 159The 160.Fn uma_zsecond_create 161function creates a regular zone which shares the keg of the zone 162specified by the 163.Fa master 164argument. 165The 166.Fa name 167argument is a text name of the zone for debugging and stats; this memory 168should not be freed until the zone has been deallocated. 169.Pp 170The 171.Fa ctor 172and 173.Fa dtor 174arguments are callback functions that are called by 175the UMA subsystem at the time of the call to 176.Fn uma_zalloc 177and 178.Fn uma_zfree 179respectively. 180Their purpose is to provide hooks for initializing or 181destroying things that need to be done at the time of the allocation 182or release of a resource. 183A good usage for the 184.Fa ctor 185and 186.Fa dtor 187callbacks might be to initialize a data structure embedded in the item, 188such as a 189.Xr queue 3 190head. 191.Pp 192The 193.Fa zinit 194and 195.Fa zfini 196arguments are used to optimize the allocation of items from the zone. 197They are called by the UMA subsystem whenever 198it needs to allocate or free items to satisfy requests or memory pressure. 199A good use for the 200.Fa zinit 201and 202.Fa zfini 203callbacks might be to 204initialize and destroy a mutex contained within an item. 205This would allow one to avoid destroying and re-initializing the mutex 206each time the item is freed and re-allocated. 207They are not called on each call to 208.Fn uma_zalloc 209and 210.Fn uma_zfree 211but rather when an item is imported into a zone's cache, and when a zone 212releases an item to the slab allocator, typically as a response to memory 213pressure. 214.Pp 215For 216.Fn uma_zcache_create , 217the 218.Fa zimport 219and 220.Fa zrelease 221functions are called to import items into the zone and to release items 222from the zone, respectively. 223The 224.Fa zimport 225function should store pointers to items in the 226.Fa store 227array, which contains a maximum of 228.Fa count 229entries. 230The function must return the number of imported items, which may be less than 231the maximum. 232Similarly, the 233.Fa store 234parameter to the 235.Fa zrelease 236function contains an array of 237.Fa count 238pointers to items. 239The 240.Fa arg 241parameter passed to 242.Fn uma_zcache_create 243is provided to the import and release functions. 244The 245.Fa domain 246parameter to 247.Fa zimport 248specifies the requested 249.Xr numa 4 250domain for the allocation. 251It is either a NUMA domain number or the special value 252.Dv UMA_ANYDOMAIN . 253.Pp 254The 255.Fa flags 256argument of 257.Fn uma_zcreate 258and 259.Fn uma_zcache_create 260is a subset of the following flags: 261.Bl -tag -width "foo" 262.It Dv UMA_ZONE_NOFREE 263Slabs allocated to the zone's keg are never freed. 264.It Dv UMA_ZONE_NODUMP 265Pages belonging to the zone will not be included in minidumps. 266.It Dv UMA_ZONE_PCPU 267An allocation from zone would have 268.Va mp_ncpu 269shadow copies, that are privately assigned to CPUs. 270A CPU can address its private copy using base the allocation address plus 271a multiple of the current CPU ID and 272.Fn sizeof "struct pcpu" : 273.Bd -literal -offset indent 274foo_zone = uma_zcreate(..., UMA_ZONE_PCPU); 275 ... 276foo_base = uma_zalloc(foo_zone, ...); 277 ... 278critical_enter(); 279foo_pcpu = (foo_t *)zpcpu_get(foo_base); 280/* do something with foo_pcpu */ 281critical_exit(); 282 283.Ed 284Note that 285.Dv M_ZERO 286cannot be used when allocating items from a PCPU zone. 287To obtain zeroed memory from a PCPU zone, use the 288.Fn uma_zalloc_pcpu 289function and its variants instead, and pass 290.Dv M_ZERO . 291.It Dv UMA_ZONE_OFFPAGE 292By default book-keeping of items within a slab is done in the slab page itself. 293This flag explicitly tells subsystem that book-keeping structure should be 294allocated separately from special internal zone. 295This flag requires either 296.Dv UMA_ZONE_VTOSLAB 297or 298.Dv UMA_ZONE_HASH , 299since subsystem requires a mechanism to find a book-keeping structure 300to an item being freed. 301The subsystem may choose to prefer offpage book-keeping for certain zones 302implicitly. 303.It Dv UMA_ZONE_ZINIT 304The zone will have its 305.Ft uma_init 306method set to internal method that initializes a new allocated slab 307to all zeros. 308Do not mistake 309.Ft uma_init 310method with 311.Ft uma_ctor . 312A zone with 313.Dv UMA_ZONE_ZINIT 314flag would not return zeroed memory on every 315.Fn uma_zalloc . 316.It Dv UMA_ZONE_HASH 317The zone should use an internal hash table to find slab book-keeping 318structure where an allocation being freed belongs to. 319.It Dv UMA_ZONE_VTOSLAB 320The zone should use special field of 321.Vt vm_page_t 322to find slab book-keeping structure where an allocation being freed belongs to. 323.It Dv UMA_ZONE_MALLOC 324The zone is for the 325.Xr malloc 9 326subsystem. 327.It Dv UMA_ZONE_VM 328The zone is for the VM subsystem. 329.It Dv UMA_ZONE_NUMA 330The zone should use a first-touch NUMA policy rather than the round-robin 331default. 332If the 333.Dv UMA_FIRSTTOUCH 334kernel option is configured, all zones implicitly use a first-touch policy, 335and the 336.Dv UMA_ZONE_NUMA 337flag has no effect. 338The 339.Dv UMA_XDOMAIN 340kernel option, when configured, causes UMA to do the extra tracking to ensure 341that allocations from first-touch zones are always local. 342Otherwise, consumers that do not free memory on the same domain from which it 343was allocated will cause mixing in per-CPU caches. 344See 345.Xr numa 4 346for more details. 347.El 348.Pp 349Zones can be destroyed using 350.Fn uma_zdestroy , 351freeing all memory that is cached in the zone. 352All items allocated from the zone must be freed to the zone before the zone 353may be safely destroyed. 354.Pp 355To allocate an item from a zone, simply call 356.Fn uma_zalloc 357with a pointer to that zone and set the 358.Fa flags 359argument to selected flags as documented in 360.Xr malloc 9 . 361It will return a pointer to an item if successful, or 362.Dv NULL 363in the rare case where all items in the zone are in use and the 364allocator is unable to grow the zone and 365.Dv M_NOWAIT 366is specified. 367.Pp 368Items are released back to the zone from which they were allocated by 369calling 370.Fn uma_zfree 371with a pointer to the zone and a pointer to the item. 372If 373.Fa item 374is 375.Dv NULL , 376then 377.Fn uma_zfree 378does nothing. 379.Pp 380The variants 381.Fn uma_zalloc_arg 382and 383.Fn uma_zfree_arg 384allow callers to 385specify an argument for the 386.Dv ctor 387and 388.Dv dtor 389functions of the zone, respectively. 390The 391.Fn uma_zalloc_domain 392function allows callers to specify a fixed 393.Xr numa 4 394domain to allocate from. 395This uses a guaranteed but slow path in the allocator which reduces 396concurrency. 397The 398.Fn uma_zfree_domain 399function should be used to return memory allocated in this fashion. 400This function infers the domain from the pointer and does not require it as an 401argument. 402.Pp 403The 404.Fn uma_zone_prealloc 405function allocates slabs for the requested number of items, typically following 406the initial creation of a zone. 407Subsequent allocations from the zone will be satisfied using the pre-allocated 408slabs. 409Note that slab allocation is performed with the 410.Dv M_WAITOK 411flag, so 412.Fn uma_zone_prealloc 413may sleep. 414.Pp 415The 416.Fn uma_zone_reserve 417function sets the number of reserved items for the zone. 418.Fn uma_zalloc 419and variants will ensure that the zone contains at least the reserved number 420of free items. 421Reserved items may be allocated by specifying 422.Dv M_USE_RESERVE 423in the allocation request flags. 424.Fn uma_zone_reserve 425does not perform any pre-allocation by itself. 426.Pp 427The 428.Fn uma_zone_reserve_kva 429function pre-allocates kernel virtual address space for the requested 430number of items. 431Subsequent allocations from the zone will be satisfied using the pre-allocated 432address space. 433Note that unlike 434.Fn uma_zone_reserve , 435.Fn uma_zone_reserve_kva 436does not restrict the use of the pre-allocation to 437.Dv M_USE_RESERVE 438requests. 439.Pp 440The 441.Fn uma_zone_set_allocf 442and 443.Fn uma_zone_set_freef 444functions allow a zone's default slab allocation and free functions to be 445overridden. 446This is useful if the zone's items have special memory allocation constraints. 447For example, if multi-page objects are required to be physically contiguous, 448an 449.Fa allocf 450function which requests contiguous memory from the kernel's page allocator 451may be used. 452.Pp 453The 454.Fn uma_zone_set_max 455function limits the number of items 456.Pq and therefore memory 457that can be allocated to 458.Fa zone . 459The 460.Fa nitems 461argument specifies the requested upper limit number of items. 462The effective limit is returned to the caller, as it may end up being higher 463than requested due to the implementation rounding up to ensure all memory pages 464allocated to the zone are utilised to capacity. 465The limit applies to the total number of items in the zone, which includes 466allocated items, free items and free items in the per-cpu caches. 467On systems with more than one CPU it may not be possible to allocate 468the specified number of items even when there is no shortage of memory, 469because all of the remaining free items may be in the caches of the 470other CPUs when the limit is hit. 471.Pp 472The 473.Fn uma_zone_set_maxcache 474function limits the number of free items which may be cached in the zone, 475excluding the per-CPU caches, which are bounded in size. 476For example, to implement a 477.Ql pure 478per-CPU cache, a cache zone may be configured with a maximum cache size of 0. 479.Pp 480The 481.Fn uma_zone_get_max 482function returns the effective upper limit number of items for a zone. 483.Pp 484The 485.Fn uma_zone_get_cur 486function returns an approximation of the number of items currently allocated 487from the zone. 488The returned value is approximate because appropriate synchronisation to 489determine an exact value is not performed by the implementation. 490This ensures low overhead at the expense of potentially stale data being used 491in the calculation. 492.Pp 493The 494.Fn uma_zone_set_warning 495function sets a warning that will be printed on the system console when the 496given zone becomes full and fails to allocate an item. 497The warning will be printed no more often than every five minutes. 498Warnings can be turned off globally by setting the 499.Va vm.zone_warnings 500sysctl tunable to 501.Va 0 . 502.Pp 503The 504.Fn uma_zone_set_maxaction 505function sets a function that will be called when the given zone becomes full 506and fails to allocate an item. 507The function will be called with the zone locked. 508Also, the function 509that called the allocation function may have held additional locks. 510Therefore, 511this function should do very little work (similar to a signal handler). 512.Pp 513The 514.Fn SYSCTL_UMA_MAX parent nbr name access zone descr 515macro declares a static 516.Xr sysctl 9 517oid that exports the effective upper limit number of items for a zone. 518The 519.Fa zone 520argument should be a pointer to 521.Vt uma_zone_t . 522A read of the oid returns value obtained through 523.Fn uma_zone_get_max . 524A write to the oid sets new value via 525.Fn uma_zone_set_max . 526The 527.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr 528macro is provided to create this type of oid dynamically. 529.Pp 530The 531.Fn SYSCTL_UMA_CUR parent nbr name access zone descr 532macro declares a static read-only 533.Xr sysctl 9 534oid that exports the approximate current occupancy of the zone. 535The 536.Fa zone 537argument should be a pointer to 538.Vt uma_zone_t . 539A read of the oid returns value obtained through 540.Fn uma_zone_get_cur . 541The 542.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name zone descr 543macro is provided to create this type of oid dynamically. 544.Sh IMPLEMENTATION NOTES 545The memory that these allocation calls return is not executable. 546The 547.Fn uma_zalloc 548function does not support the 549.Dv M_EXEC 550flag to allocate executable memory. 551Not all platforms enforce a distinction between executable and 552non-executable memory. 553.Sh SEE ALSO 554.Xr numa 4 , 555.Xr vmstat 8 , 556.Xr malloc 9 557.Rs 558.%A Jeff Bonwick 559.%T "The Slab Allocator: An Object-Caching Kernel Memory Allocator" 560.%D 1994 561.Re 562.Sh HISTORY 563The zone allocator first appeared in 564.Fx 3.0 . 565It was radically changed in 566.Fx 5.0 567to function as a slab allocator. 568.Sh AUTHORS 569.An -nosplit 570The zone allocator was written by 571.An John S. Dyson . 572The zone allocator was rewritten in large parts by 573.An Jeff Roberson Aq Mt jeff@FreeBSD.org 574to function as a slab allocator. 575.Pp 576This manual page was written by 577.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org . 578Changes for UMA by 579.An Jeroen Ruigrok van der Werven Aq Mt asmodai@FreeBSD.org . 580