1.\"- 2.\" Copyright (c) 2001 Dag-Erling Coïdan Smørgrav 3.\" All rights reserved. 4.\" 5.\" Redistribution and use in source and binary forms, with or without 6.\" modification, are permitted provided that the following conditions 7.\" are met: 8.\" 1. Redistributions of source code must retain the above copyright 9.\" notice, this list of conditions and the following disclaimer. 10.\" 2. Redistributions in binary form must reproduce the above copyright 11.\" notice, this list of conditions and the following disclaimer in the 12.\" documentation and/or other materials provided with the distribution. 13.\" 14.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24.\" SUCH DAMAGE. 25.\" 26.\" $FreeBSD$ 27.\" 28.Dd March 11, 2021 29.Dt UMA 9 30.Os 31.Sh NAME 32.Nm UMA 33.Nd general-purpose kernel object allocator 34.Sh SYNOPSIS 35.In sys/param.h 36.In sys/queue.h 37.In vm/uma.h 38.Cd "options UMA_FIRSTTOUCH" 39.Cd "options UMA_XDOMAIN" 40.Bd -literal 41typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags); 42typedef void (*uma_dtor)(void *mem, int size, void *arg); 43typedef int (*uma_init)(void *mem, int size, int flags); 44typedef void (*uma_fini)(void *mem, int size); 45typedef int (*uma_import)(void *arg, void **store, int count, int domain, 46 int flags); 47typedef void (*uma_release)(void *arg, void **store, int count); 48typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain, 49 uint8_t *pflag, int wait); 50typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag); 51 52.Ed 53.Ft uma_zone_t 54.Fo uma_zcreate 55.Fa "char *name" "int size" 56.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 57.Fa "int align" "uint16_t flags" 58.Fc 59.Ft uma_zone_t 60.Fo uma_zcache_create 61.Fa "char *name" "int size" 62.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 63.Fa "uma_import zimport" "uma_release zrelease" 64.Fa "void *arg" "int flags" 65.Fc 66.Ft uma_zone_t 67.Fo uma_zsecond_create 68.Fa "char *name" 69.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini" 70.Fa "uma_zone_t master" 71.Fc 72.Ft void 73.Fn uma_zdestroy "uma_zone_t zone" 74.Ft "void *" 75.Fn uma_zalloc "uma_zone_t zone" "int flags" 76.Ft "void *" 77.Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags" 78.Ft "void *" 79.Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags" 80.Ft "void *" 81.Fn uma_zalloc_pcpu "uma_zone_t zone" "int flags" 82.Ft "void *" 83.Fn uma_zalloc_pcpu_arg "uma_zone_t zone" "void *arg" "int flags" 84.Ft void 85.Fn uma_zfree "uma_zone_t zone" "void *item" 86.Ft void 87.Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg" 88.Ft void 89.Fn uma_zfree_pcpu "uma_zone_t zone" "void *item" 90.Ft void 91.Fn uma_zfree_pcpu_arg "uma_zone_t zone" "void *item" "void *arg" 92.Ft void 93.Fn uma_prealloc "uma_zone_t zone" "int nitems" 94.Ft void 95.Fn uma_zone_reserve "uma_zone_t zone" "int nitems" 96.Ft void 97.Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems" 98.Ft void 99.Fn uma_reclaim "int req" 100.Ft void 101.Fn uma_zone_reclaim "uma_zone_t zone" "int req" 102.Ft void 103.Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf" 104.Ft void 105.Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef" 106.Ft int 107.Fn uma_zone_set_max "uma_zone_t zone" "int nitems" 108.Ft void 109.Fn uma_zone_set_maxcache "uma_zone_t zone" "int nitems" 110.Ft int 111.Fn uma_zone_get_max "uma_zone_t zone" 112.Ft int 113.Fn uma_zone_get_cur "uma_zone_t zone" 114.Ft void 115.Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning" 116.Ft void 117.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)" 118.Ft void 119.Fn uma_reclaim 120.In sys/sysctl.h 121.Fn SYSCTL_UMA_MAX parent nbr name access zone descr 122.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr 123.Fn SYSCTL_UMA_CUR parent nbr name access zone descr 124.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr 125.Sh DESCRIPTION 126UMA (Universal Memory Allocator) provides an efficient interface for managing 127dynamically-sized collections of items of identical size, referred to as zones. 128Zones keep track of which items are in use and which 129are not, and UMA provides functions for allocating items from a zone and 130for releasing them back, making them available for subsequent allocation requests. 131Zones maintain per-CPU caches with linear scalability on SMP 132systems as well as round-robin and first-touch policies for NUMA 133systems. 134The number of items cached per CPU is bounded, and each zone additionally 135maintains an unbounded cache of items that is used to quickly satisfy 136per-CPU cache allocation misses. 137.Pp 138Two types of zones exist: regular zones and cache zones. 139In a regular zone, items are allocated from a slab, which is one or more 140virtually contiguous memory pages that have been allocated from the kernel's 141page allocator. 142Internally, slabs are managed by a UMA keg, which is responsible for allocating 143slabs and keeping track of their usage by one or more zones. 144In typical usage, there is one keg per zone, so slabs are not shared among 145multiple zones. 146.Pp 147Normal zones import items from a keg, and release items back to that keg if 148requested. 149Cache zones do not have a keg, and instead use custom import and release 150methods. 151For example, some collections of kernel objects are statically allocated 152at boot-time, and the size of the collection does not change. 153A cache zone can be used to implement an efficient allocator for the objects in 154such a collection. 155.Pp 156The 157.Fn uma_zcreate 158and 159.Fn uma_zcache_create 160functions create a new regular zone and cache zone, respectively. 161The 162.Fn uma_zsecond_create 163function creates a regular zone which shares the keg of the zone 164specified by the 165.Fa master 166argument. 167The 168.Fa name 169argument is a text name of the zone for debugging and stats; this memory 170should not be freed until the zone has been deallocated. 171.Pp 172The 173.Fa ctor 174and 175.Fa dtor 176arguments are callback functions that are called by 177the UMA subsystem at the time of the call to 178.Fn uma_zalloc 179and 180.Fn uma_zfree 181respectively. 182Their purpose is to provide hooks for initializing or 183destroying things that need to be done at the time of the allocation 184or release of a resource. 185A good usage for the 186.Fa ctor 187and 188.Fa dtor 189callbacks might be to initialize a data structure embedded in the item, 190such as a 191.Xr queue 3 192head. 193.Pp 194The 195.Fa zinit 196and 197.Fa zfini 198arguments are used to optimize the allocation of items from the zone. 199They are called by the UMA subsystem whenever 200it needs to allocate or free items to satisfy requests or memory pressure. 201A good use for the 202.Fa zinit 203and 204.Fa zfini 205callbacks might be to 206initialize and destroy a mutex contained within an item. 207This would allow one to avoid destroying and re-initializing the mutex 208each time the item is freed and re-allocated. 209They are not called on each call to 210.Fn uma_zalloc 211and 212.Fn uma_zfree 213but rather when an item is imported into a zone's cache, and when a zone 214releases an item to the slab allocator, typically as a response to memory 215pressure. 216.Pp 217For 218.Fn uma_zcache_create , 219the 220.Fa zimport 221and 222.Fa zrelease 223functions are called to import items into the zone and to release items 224from the zone, respectively. 225The 226.Fa zimport 227function should store pointers to items in the 228.Fa store 229array, which contains a maximum of 230.Fa count 231entries. 232The function must return the number of imported items, which may be less than 233the maximum. 234Similarly, the 235.Fa store 236parameter to the 237.Fa zrelease 238function contains an array of 239.Fa count 240pointers to items. 241The 242.Fa arg 243parameter passed to 244.Fn uma_zcache_create 245is provided to the import and release functions. 246The 247.Fa domain 248parameter to 249.Fa zimport 250specifies the requested 251.Xr numa 4 252domain for the allocation. 253It is either a NUMA domain number or the special value 254.Dv UMA_ANYDOMAIN . 255.Pp 256The 257.Fa flags 258argument of 259.Fn uma_zcreate 260and 261.Fn uma_zcache_create 262is a subset of the following flags: 263.Bl -tag -width "foo" 264.It Dv UMA_ZONE_NOFREE 265Slabs allocated to the zone's keg are never freed. 266.It Dv UMA_ZONE_NODUMP 267Pages belonging to the zone will not be included in minidumps. 268.It Dv UMA_ZONE_PCPU 269An allocation from zone would have 270.Va mp_ncpu 271shadow copies, that are privately assigned to CPUs. 272A CPU can address its private copy using base the allocation address plus 273a multiple of the current CPU ID and 274.Fn sizeof "struct pcpu" : 275.Bd -literal -offset indent 276foo_zone = uma_zcreate(..., UMA_ZONE_PCPU); 277 ... 278foo_base = uma_zalloc(foo_zone, ...); 279 ... 280critical_enter(); 281foo_pcpu = (foo_t *)zpcpu_get(foo_base); 282/* do something with foo_pcpu */ 283critical_exit(); 284 285.Ed 286Note that 287.Dv M_ZERO 288cannot be used when allocating items from a PCPU zone. 289To obtain zeroed memory from a PCPU zone, use the 290.Fn uma_zalloc_pcpu 291function and its variants instead, and pass 292.Dv M_ZERO . 293.It Dv UMA_ZONE_NOTOUCH 294The UMA subsystem may not directly touch (i.e. read or write) the slab memory. 295Otherwise, by default, book-keeping of items within a slab may be done in the 296slab page itself, and 297.Dv INVARIANTS 298kernels may also do use-after-free checking by accessing the slab memory. 299.It Dv UMA_ZONE_ZINIT 300The zone will have its 301.Ft uma_init 302method set to internal method that initializes a new allocated slab 303to all zeros. 304Do not mistake 305.Ft uma_init 306method with 307.Ft uma_ctor . 308A zone with 309.Dv UMA_ZONE_ZINIT 310flag would not return zeroed memory on every 311.Fn uma_zalloc . 312.It Dv UMA_ZONE_NOTPAGE 313An allocator function will be supplied with 314.Fn uma_zone_set_allocf 315and the memory that it returns may not be kernel virtual memory backed by VM 316pages in the page array. 317.It Dv UMA_ZONE_MALLOC 318The zone is for the 319.Xr malloc 9 320subsystem. 321.It Dv UMA_ZONE_VM 322The zone is for the VM subsystem. 323.It Dv UMA_ZONE_NUMA 324The zone should use a first-touch NUMA policy rather than the round-robin 325default. 326If the 327.Dv UMA_FIRSTTOUCH 328kernel option is configured, all zones implicitly use a first-touch policy, 329and the 330.Dv UMA_ZONE_NUMA 331flag has no effect. 332The 333.Dv UMA_XDOMAIN 334kernel option, when configured, causes UMA to do the extra tracking to ensure 335that allocations from first-touch zones are always local. 336Otherwise, consumers that do not free memory on the same domain from which it 337was allocated will cause mixing in per-CPU caches. 338See 339.Xr numa 4 340for more details. 341.It Dv UMA_ZONE_CONTIG 342Items in this zone must be contiguous in physical address space. 343Items will follow normal alignment constraints and may span page boundaries 344between pages with contiguous physical addresses. 345.El 346.Pp 347Zones can be destroyed using 348.Fn uma_zdestroy , 349freeing all memory that is cached in the zone. 350All items allocated from the zone must be freed to the zone before the zone 351may be safely destroyed. 352.Pp 353To allocate an item from a zone, simply call 354.Fn uma_zalloc 355with a pointer to that zone and set the 356.Fa flags 357argument to selected flags as documented in 358.Xr malloc 9 . 359It will return a pointer to an item if successful, or 360.Dv NULL 361in the rare case where all items in the zone are in use and the 362allocator is unable to grow the zone and 363.Dv M_NOWAIT 364is specified. 365.Pp 366Items are released back to the zone from which they were allocated by 367calling 368.Fn uma_zfree 369with a pointer to the zone and a pointer to the item. 370If 371.Fa item 372is 373.Dv NULL , 374then 375.Fn uma_zfree 376does nothing. 377.Pp 378The variants 379.Fn uma_zalloc_arg 380and 381.Fn uma_zfree_arg 382allow callers to 383specify an argument for the 384.Dv ctor 385and 386.Dv dtor 387functions of the zone, respectively. 388The variants 389.Fn uma_zalloc_pcpu 390and 391.Fn uma_zfree_pcpu 392allocate and free 393.Va mp_ncpu 394shadow copies as described for 395.Dv UMA_ZONE_PCPU . 396If 397.Fa item 398is 399.Dv NULL , 400then 401.Fn uma_zfree_pcpu 402does nothing. 403.Pp 404The 405.Fn uma_zalloc_domain 406function allows callers to specify a fixed 407.Xr numa 4 408domain to allocate from. 409This uses a guaranteed but slow path in the allocator which reduces 410concurrency. 411.Pp 412The 413.Fn uma_prealloc 414function allocates slabs for the requested number of items, typically following 415the initial creation of a zone. 416Subsequent allocations from the zone will be satisfied using the pre-allocated 417slabs. 418Note that slab allocation is performed with the 419.Dv M_WAITOK 420flag, so 421.Fn uma_prealloc 422may sleep. 423.Pp 424The 425.Fn uma_zone_reserve 426function sets the number of reserved items for the zone. 427.Fn uma_zalloc 428and variants will ensure that the zone contains at least the reserved number 429of free items. 430Reserved items may be allocated by specifying 431.Dv M_USE_RESERVE 432in the allocation request flags. 433.Fn uma_zone_reserve 434does not perform any pre-allocation by itself. 435.Pp 436The 437.Fn uma_zone_reserve_kva 438function pre-allocates kernel virtual address space for the requested 439number of items. 440Subsequent allocations from the zone will be satisfied using the pre-allocated 441address space. 442Note that unlike 443.Fn uma_zone_reserve , 444.Fn uma_zone_reserve_kva 445does not restrict the use of the pre-allocation to 446.Dv M_USE_RESERVE 447requests. 448.Pp 449The 450.Fn uma_reclaim 451and 452.Fn uma_zone_reclaim 453functions reclaim cached items from UMA zones, releasing unused memory. 454The 455.Fn uma_reclaim 456function reclaims items from all regular zones, while 457.Fn uma_zone_reclaim 458reclaims items only from the specified zone. 459The 460.Fa req 461parameter must be one of three values which specify how aggressively 462items are to be reclaimed: 463.Bl -tag -width indent 464.It Dv UMA_RECLAIM_TRIM 465Reclaim items only in excess of the zone's estimated working set size. 466The working set size is periodically updated and tracks the recent history 467of the zone's usage. 468.It Dv UMA_RECLAIM_DRAIN 469Reclaim all items from the unbounded cache. 470Free items in the per-CPU caches are left alone. 471.It Dv UMA_RECLAIM_DRAIN_CPU 472Reclaim all cached items. 473.El 474.Pp 475The 476.Fn uma_zone_set_allocf 477and 478.Fn uma_zone_set_freef 479functions allow a zone's default slab allocation and free functions to be 480overridden. 481This is useful if memory with special constraints such as attributes, 482alignment, or address ranges must be used. 483.Pp 484The 485.Fn uma_zone_set_max 486function limits the number of items 487.Pq and therefore memory 488that can be allocated to 489.Fa zone . 490The 491.Fa nitems 492argument specifies the requested upper limit number of items. 493The effective limit is returned to the caller, as it may end up being higher 494than requested due to the implementation rounding up to ensure all memory pages 495allocated to the zone are utilised to capacity. 496The limit applies to the total number of items in the zone, which includes 497allocated items, free items and free items in the per-cpu caches. 498On systems with more than one CPU it may not be possible to allocate 499the specified number of items even when there is no shortage of memory, 500because all of the remaining free items may be in the caches of the 501other CPUs when the limit is hit. 502.Pp 503The 504.Fn uma_zone_set_maxcache 505function limits the number of free items which may be cached in the zone. 506This limit applies to both the per-CPU caches and the cache of free buckets. 507.Pp 508The 509.Fn uma_zone_get_max 510function returns the effective upper limit number of items for a zone. 511.Pp 512The 513.Fn uma_zone_get_cur 514function returns an approximation of the number of items currently allocated 515from the zone. 516The returned value is approximate because appropriate synchronisation to 517determine an exact value is not performed by the implementation. 518This ensures low overhead at the expense of potentially stale data being used 519in the calculation. 520.Pp 521The 522.Fn uma_zone_set_warning 523function sets a warning that will be printed on the system console when the 524given zone becomes full and fails to allocate an item. 525The warning will be printed no more often than every five minutes. 526Warnings can be turned off globally by setting the 527.Va vm.zone_warnings 528sysctl tunable to 529.Va 0 . 530.Pp 531The 532.Fn uma_zone_set_maxaction 533function sets a function that will be called when the given zone becomes full 534and fails to allocate an item. 535The function will be called with the zone locked. 536Also, the function 537that called the allocation function may have held additional locks. 538Therefore, 539this function should do very little work (similar to a signal handler). 540.Pp 541The 542.Fn SYSCTL_UMA_MAX parent nbr name access zone descr 543macro declares a static 544.Xr sysctl 9 545oid that exports the effective upper limit number of items for a zone. 546The 547.Fa zone 548argument should be a pointer to 549.Vt uma_zone_t . 550A read of the oid returns value obtained through 551.Fn uma_zone_get_max . 552A write to the oid sets new value via 553.Fn uma_zone_set_max . 554The 555.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr 556macro is provided to create this type of oid dynamically. 557.Pp 558The 559.Fn SYSCTL_UMA_CUR parent nbr name access zone descr 560macro declares a static read-only 561.Xr sysctl 9 562oid that exports the approximate current occupancy of the zone. 563The 564.Fa zone 565argument should be a pointer to 566.Vt uma_zone_t . 567A read of the oid returns value obtained through 568.Fn uma_zone_get_cur . 569The 570.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name zone descr 571macro is provided to create this type of oid dynamically. 572.Sh IMPLEMENTATION NOTES 573The memory that these allocation calls return is not executable. 574The 575.Fn uma_zalloc 576function does not support the 577.Dv M_EXEC 578flag to allocate executable memory. 579Not all platforms enforce a distinction between executable and 580non-executable memory. 581.Sh SEE ALSO 582.Xr numa 4 , 583.Xr vmstat 8 , 584.Xr malloc 9 585.Rs 586.%A Jeff Bonwick 587.%T "The Slab Allocator: An Object-Caching Kernel Memory Allocator" 588.%D 1994 589.Re 590.Sh HISTORY 591The zone allocator first appeared in 592.Fx 3.0 . 593It was radically changed in 594.Fx 5.0 595to function as a slab allocator. 596.Sh AUTHORS 597.An -nosplit 598The zone allocator was written by 599.An John S. Dyson . 600The zone allocator was rewritten in large parts by 601.An Jeff Roberson Aq Mt jeff@FreeBSD.org 602to function as a slab allocator. 603.Pp 604This manual page was written by 605.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org . 606Changes for UMA by 607.An Jeroen Ruigrok van der Werven Aq Mt asmodai@FreeBSD.org . 608