xref: /freebsd/share/man/man9/zone.9 (revision a466cc55373fc3cf86837f09da729535b57e69a1)
1.\"-
2.\" Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
3.\" All rights reserved.
4.\"
5.\" Redistribution and use in source and binary forms, with or without
6.\" modification, are permitted provided that the following conditions
7.\" are met:
8.\" 1. Redistributions of source code must retain the above copyright
9.\"    notice, this list of conditions and the following disclaimer.
10.\" 2. Redistributions in binary form must reproduce the above copyright
11.\"    notice, this list of conditions and the following disclaimer in the
12.\"    documentation and/or other materials provided with the distribution.
13.\"
14.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24.\" SUCH DAMAGE.
25.\"
26.\" $FreeBSD$
27.\"
28.Dd January 16, 2023
29.Dt UMA 9
30.Os
31.Sh NAME
32.Nm UMA
33.Nd general-purpose kernel object allocator
34.Sh SYNOPSIS
35.In sys/param.h
36.In sys/queue.h
37.In vm/uma.h
38.Bd -literal
39typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags);
40typedef void (*uma_dtor)(void *mem, int size, void *arg);
41typedef int (*uma_init)(void *mem, int size, int flags);
42typedef void (*uma_fini)(void *mem, int size);
43typedef int (*uma_import)(void *arg, void **store, int count, int domain,
44    int flags);
45typedef void (*uma_release)(void *arg, void **store, int count);
46typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain,
47    uint8_t *pflag, int wait);
48typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
49
50.Ed
51.Ft uma_zone_t
52.Fo uma_zcreate
53.Fa "char *name" "size_t size"
54.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
55.Fa "int align" "uint16_t flags"
56.Fc
57.Ft uma_zone_t
58.Fo uma_zcache_create
59.Fa "char *name" "int size"
60.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
61.Fa "uma_import zimport" "uma_release zrelease"
62.Fa "void *arg" "int flags"
63.Fc
64.Ft uma_zone_t
65.Fo uma_zsecond_create
66.Fa "char *name"
67.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
68.Fa "uma_zone_t master"
69.Fc
70.Ft void
71.Fn uma_zdestroy "uma_zone_t zone"
72.Ft "void *"
73.Fn uma_zalloc "uma_zone_t zone" "int flags"
74.Ft "void *"
75.Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags"
76.Ft "void *"
77.Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags"
78.Ft "void *"
79.Fn uma_zalloc_pcpu "uma_zone_t zone" "int flags"
80.Ft "void *"
81.Fn uma_zalloc_pcpu_arg "uma_zone_t zone" "void *arg" "int flags"
82.Ft "void *"
83.Fn uma_zalloc_smr "uma_zone_t zone" "int flags"
84.Ft void
85.Fn uma_zfree "uma_zone_t zone" "void *item"
86.Ft void
87.Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg"
88.Ft void
89.Fn uma_zfree_pcpu "uma_zone_t zone" "void *item"
90.Ft void
91.Fn uma_zfree_pcpu_arg "uma_zone_t zone" "void *item" "void *arg"
92.Ft void
93.Fn uma_zfree_smr "uma_zone_t zone" "void *item"
94.Ft void
95.Fn uma_prealloc "uma_zone_t zone" "int nitems"
96.Ft void
97.Fn uma_zone_reserve "uma_zone_t zone" "int nitems"
98.Ft void
99.Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems"
100.Ft void
101.Fn uma_reclaim "int req"
102.Ft void
103.Fn uma_reclaim_domain "int req" "int domain"
104.Ft void
105.Fn uma_zone_reclaim "uma_zone_t zone" "int req"
106.Ft void
107.Fn uma_zone_reclaim_domain "uma_zone_t zone" "int req" "int domain"
108.Ft void
109.Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf"
110.Ft void
111.Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef"
112.Ft int
113.Fn uma_zone_set_max "uma_zone_t zone" "int nitems"
114.Ft void
115.Fn uma_zone_set_maxcache "uma_zone_t zone" "int nitems"
116.Ft int
117.Fn uma_zone_get_max "uma_zone_t zone"
118.Ft int
119.Fn uma_zone_get_cur "uma_zone_t zone"
120.Ft void
121.Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning"
122.Ft void
123.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)"
124.Ft smr_t
125.Fn uma_zone_get_smr "uma_zone_t zone"
126.Ft void
127.Fn uma_zone_set_smr "uma_zone_t zone" "smr_t smr"
128.In sys/sysctl.h
129.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
130.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
131.Fn SYSCTL_UMA_CUR parent nbr name access zone descr
132.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr
133.Sh DESCRIPTION
134UMA (Universal Memory Allocator) provides an efficient interface for managing
135dynamically-sized collections of items of identical size, referred to as zones.
136Zones keep track of which items are in use and which
137are not, and UMA provides functions for allocating items from a zone and
138for releasing them back, making them available for subsequent allocation requests.
139Zones maintain per-CPU caches with linear scalability on SMP
140systems as well as round-robin and first-touch policies for NUMA
141systems.
142The number of items cached per CPU is bounded, and each zone additionally
143maintains an unbounded cache of items that is used to quickly satisfy
144per-CPU cache allocation misses.
145.Pp
146Two types of zones exist: regular zones and cache zones.
147In a regular zone, items are allocated from a slab, which is one or more
148virtually contiguous memory pages that have been allocated from the kernel's
149page allocator.
150Internally, slabs are managed by a UMA keg, which is responsible for allocating
151slabs and keeping track of their usage by one or more zones.
152In typical usage, there is one keg per zone, so slabs are not shared among
153multiple zones.
154.Pp
155Normal zones import items from a keg, and release items back to that keg if
156requested.
157Cache zones do not have a keg, and instead use custom import and release
158methods.
159For example, some collections of kernel objects are statically allocated
160at boot-time, and the size of the collection does not change.
161A cache zone can be used to implement an efficient allocator for the objects in
162such a collection.
163.Pp
164The
165.Fn uma_zcreate
166and
167.Fn uma_zcache_create
168functions create a new regular zone and cache zone, respectively.
169The
170.Fn uma_zsecond_create
171function creates a regular zone which shares the keg of the zone
172specified by the
173.Fa master
174argument.
175The
176.Fa name
177argument is a text name of the zone for debugging and stats; this memory
178should not be freed until the zone has been deallocated.
179.Pp
180The
181.Fa ctor
182and
183.Fa dtor
184arguments are callback functions that are called by
185the UMA subsystem at the time of the call to
186.Fn uma_zalloc
187and
188.Fn uma_zfree
189respectively.
190Their purpose is to provide hooks for initializing or
191destroying things that need to be done at the time of the allocation
192or release of a resource.
193A good usage for the
194.Fa ctor
195and
196.Fa dtor
197callbacks might be to initialize a data structure embedded in the item,
198such as a
199.Xr queue 3
200head.
201.Pp
202The
203.Fa zinit
204and
205.Fa zfini
206arguments are used to optimize the allocation of items from the zone.
207They are called by the UMA subsystem whenever
208it needs to allocate or free items to satisfy requests or memory pressure.
209A good use for the
210.Fa zinit
211and
212.Fa zfini
213callbacks might be to
214initialize and destroy a mutex contained within an item.
215This would allow one to avoid destroying and re-initializing the mutex
216each time the item is freed and re-allocated.
217They are not called on each call to
218.Fn uma_zalloc
219and
220.Fn uma_zfree
221but rather when an item is imported into a zone's cache, and when a zone
222releases an item to the slab allocator, typically as a response to memory
223pressure.
224.Pp
225For
226.Fn uma_zcache_create ,
227the
228.Fa zimport
229and
230.Fa zrelease
231functions are called to import items into the zone and to release items
232from the zone, respectively.
233The
234.Fa zimport
235function should store pointers to items in the
236.Fa store
237array, which contains a maximum of
238.Fa count
239entries.
240The function must return the number of imported items, which may be less than
241the maximum.
242Similarly, the
243.Fa store
244parameter to the
245.Fa zrelease
246function contains an array of
247.Fa count
248pointers to items.
249The
250.Fa arg
251parameter passed to
252.Fn uma_zcache_create
253is provided to the import and release functions.
254The
255.Fa domain
256parameter to
257.Fa zimport
258specifies the requested
259.Xr numa 4
260domain for the allocation.
261It is either a NUMA domain number or the special value
262.Dv UMA_ANYDOMAIN .
263.Pp
264The
265.Fa flags
266argument of
267.Fn uma_zcreate
268and
269.Fn uma_zcache_create
270is a subset of the following flags:
271.Bl -tag -width "foo"
272.It Dv UMA_ZONE_NOFREE
273Slabs allocated to the zone's keg are never freed.
274.It Dv UMA_ZONE_NODUMP
275Pages belonging to the zone will not be included in minidumps.
276.It Dv UMA_ZONE_PCPU
277An allocation from zone would have
278.Va mp_ncpu
279shadow copies, that are privately assigned to CPUs.
280A CPU can address its private copy using base the allocation address plus
281a multiple of the current CPU ID and
282.Fn sizeof "struct pcpu" :
283.Bd -literal -offset indent
284foo_zone = uma_zcreate(..., UMA_ZONE_PCPU);
285 ...
286foo_base = uma_zalloc(foo_zone, ...);
287 ...
288critical_enter();
289foo_pcpu = (foo_t *)zpcpu_get(foo_base);
290/* do something with foo_pcpu */
291critical_exit();
292
293.Ed
294Note that
295.Dv M_ZERO
296cannot be used when allocating items from a PCPU zone.
297To obtain zeroed memory from a PCPU zone, use the
298.Fn uma_zalloc_pcpu
299function and its variants instead, and pass
300.Dv M_ZERO .
301.It Dv UMA_ZONE_NOTOUCH
302The UMA subsystem may not directly touch (i.e. read or write) the slab memory.
303Otherwise, by default, book-keeping of items within a slab may be done in the
304slab page itself, and
305.Dv INVARIANTS
306kernels may also do use-after-free checking by accessing the slab memory.
307.It Dv UMA_ZONE_ZINIT
308The zone will have its
309.Ft uma_init
310method set to internal method that initializes a new allocated slab
311to all zeros.
312Do not mistake
313.Ft uma_init
314method with
315.Ft uma_ctor .
316A zone with
317.Dv UMA_ZONE_ZINIT
318flag would not return zeroed memory on every
319.Fn uma_zalloc .
320.It Dv UMA_ZONE_NOTPAGE
321An allocator function will be supplied with
322.Fn uma_zone_set_allocf
323and the memory that it returns may not be kernel virtual memory backed by VM
324pages in the page array.
325.It Dv UMA_ZONE_MALLOC
326The zone is for the
327.Xr malloc 9
328subsystem.
329.It Dv UMA_ZONE_VM
330The zone is for the VM subsystem.
331.It Dv UMA_ZONE_CONTIG
332Items in this zone must be contiguous in physical address space.
333Items will follow normal alignment constraints and may span page boundaries
334between pages with contiguous physical addresses.
335.It Dv UMA_ZONE_UNMANAGED
336By default, UMA zone caches are shrunk to help resolve free page shortages.
337Cached items that have not been used for a long period may also be freed from
338zone.
339When this flag is set, the system will not reclaim memory from the zone's
340caches.
341.It Dv UMA_ZONE_SMR
342Create a zone whose items will be synchronized using the
343.Xr smr 9
344mechanism.
345Upon creation the zone will have an associated
346.Dt smr_t
347structure which can be fetched using
348.Fn uma_zone_get_smr .
349.El
350.Pp
351Zones can be destroyed using
352.Fn uma_zdestroy ,
353freeing all memory that is cached in the zone.
354All items allocated from the zone must be freed to the zone before the zone
355may be safely destroyed.
356.Pp
357To allocate an item from a zone, simply call
358.Fn uma_zalloc
359with a pointer to that zone and set the
360.Fa flags
361argument to selected flags as documented in
362.Xr malloc 9 .
363It will return a pointer to an item if successful, or
364.Dv NULL
365in the rare case where all items in the zone are in use and the
366allocator is unable to grow the zone and
367.Dv M_NOWAIT
368is specified.
369.Pp
370Items are released back to the zone from which they were allocated by
371calling
372.Fn uma_zfree
373with a pointer to the zone and a pointer to the item.
374If
375.Fa item
376is
377.Dv NULL ,
378then
379.Fn uma_zfree
380does nothing.
381.Pp
382The variants
383.Fn uma_zalloc_arg
384and
385.Fn uma_zfree_arg
386allow callers to
387specify an argument for the
388.Dv ctor
389and
390.Dv dtor
391functions of the zone, respectively.
392The variants
393.Fn uma_zalloc_pcpu
394and
395.Fn uma_zfree_pcpu
396allocate and free
397.Va mp_ncpu
398shadow copies as described for
399.Dv UMA_ZONE_PCPU .
400If
401.Fa item
402is
403.Dv NULL ,
404then
405.Fn uma_zfree_pcpu
406does nothing.
407.Pp
408The
409.Fn uma_zalloc_smr
410and
411.Fn uma_zfree_smr
412functions allocate and free items from an SMR-enabled zone, that is,
413a zone created with
414.Dv UMA_ZONE_SMR
415or a zone that has had
416.Fn uma_zone_set_smr
417called.
418.Pp
419The
420.Fn uma_zalloc_domain
421function allows callers to specify a fixed
422.Xr numa 4
423domain to allocate from.
424This uses a guaranteed but slow path in the allocator which reduces
425concurrency.
426.Pp
427The
428.Fn uma_prealloc
429function allocates slabs for the requested number of items, typically following
430the initial creation of a zone.
431Subsequent allocations from the zone will be satisfied using the pre-allocated
432slabs.
433Note that slab allocation is performed with the
434.Dv M_WAITOK
435flag, so
436.Fn uma_prealloc
437may sleep.
438.Pp
439The
440.Fn uma_zone_reserve
441function sets the number of reserved items for the zone.
442.Fn uma_zalloc
443and variants will ensure that the zone contains at least the reserved number
444of free items.
445Reserved items may be allocated by specifying
446.Dv M_USE_RESERVE
447in the allocation request flags.
448.Fn uma_zone_reserve
449does not perform any pre-allocation by itself.
450.Pp
451The
452.Fn uma_zone_reserve_kva
453function pre-allocates kernel virtual address space for the requested
454number of items.
455Subsequent allocations from the zone will be satisfied using the pre-allocated
456address space.
457Note that unlike
458.Fn uma_zone_reserve ,
459.Fn uma_zone_reserve_kva
460does not restrict the use of the pre-allocation to
461.Dv M_USE_RESERVE
462requests.
463.Pp
464The
465.Fn uma_reclaim
466and
467.Fn uma_zone_reclaim
468functions reclaim cached items from UMA zones, releasing unused memory.
469The
470.Fn uma_reclaim
471function reclaims items from all regular zones, while
472.Fn uma_zone_reclaim
473reclaims items only from the specified zone.
474The
475.Fa req
476parameter must be one of three values which specify how aggressively
477items are to be reclaimed:
478.Bl -tag -width indent
479.It Dv UMA_RECLAIM_TRIM
480Reclaim items only in excess of the zone's estimated working set size.
481The working set size is periodically updated and tracks the recent history
482of the zone's usage.
483.It Dv UMA_RECLAIM_DRAIN
484Reclaim all items from the unbounded cache.
485Free items in the per-CPU caches are left alone.
486.It Dv UMA_RECLAIM_DRAIN_CPU
487Reclaim all cached items.
488.El
489The
490.Fn uma_reclaim_domain
491and
492.Fn uma_zone_reclaim_domain
493functions apply only to items allocated from the specified domain.
494In the case of domains using a round-robin NUMA policy, cached items from all
495domains are freed to the keg, but only slabs from the specific domain will
496be freed.
497.Pp
498The
499.Fn uma_zone_set_allocf
500and
501.Fn uma_zone_set_freef
502functions allow a zone's default slab allocation and free functions to be
503overridden.
504This is useful if memory with special constraints such as attributes,
505alignment, or address ranges must be used.
506.Pp
507The
508.Fn uma_zone_set_max
509function limits the number of items
510.Pq and therefore memory
511that can be allocated to
512.Fa zone .
513The
514.Fa nitems
515argument specifies the requested upper limit number of items.
516The effective limit is returned to the caller, as it may end up being higher
517than requested due to the implementation rounding up to ensure all memory pages
518allocated to the zone are utilised to capacity.
519The limit applies to the total number of items in the zone, which includes
520allocated items, free items and free items in the per-cpu caches.
521On systems with more than one CPU it may not be possible to allocate
522the specified number of items even when there is no shortage of memory,
523because all of the remaining free items may be in the caches of the
524other CPUs when the limit is hit.
525.Pp
526The
527.Fn uma_zone_set_maxcache
528function limits the number of free items which may be cached in the zone.
529This limit applies to both the per-CPU caches and the cache of free buckets.
530.Pp
531The
532.Fn uma_zone_get_max
533function returns the effective upper limit number of items for a zone.
534.Pp
535The
536.Fn uma_zone_get_cur
537function returns an approximation of the number of items currently allocated
538from the zone.
539The returned value is approximate because appropriate synchronisation to
540determine an exact value is not performed by the implementation.
541This ensures low overhead at the expense of potentially stale data being used
542in the calculation.
543.Pp
544The
545.Fn uma_zone_set_warning
546function sets a warning that will be printed on the system console when the
547given zone becomes full and fails to allocate an item.
548The warning will be printed no more often than every five minutes.
549Warnings can be turned off globally by setting the
550.Va vm.zone_warnings
551sysctl tunable to
552.Va 0 .
553.Pp
554The
555.Fn uma_zone_set_maxaction
556function sets a function that will be called when the given zone becomes full
557and fails to allocate an item.
558The function will be called with the zone locked.
559Also, the function
560that called the allocation function may have held additional locks.
561Therefore,
562this function should do very little work (similar to a signal handler).
563.Pp
564The
565.Fn uma_zone_set_smr
566function associates an existing
567.Xr smr 9
568structure with a UMA zone.
569The effect is similar to creating a zone with the
570.Dv UMA_ZONE_SMR
571flag, except that a new SMR structure is not created.
572This function must be called before any allocations from the zone are performed.
573.Pp
574The
575.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
576macro declares a static
577.Xr sysctl 9
578oid that exports the effective upper limit number of items for a zone.
579The
580.Fa zone
581argument should be a pointer to
582.Vt uma_zone_t .
583A read of the oid returns value obtained through
584.Fn uma_zone_get_max .
585A write to the oid sets new value via
586.Fn uma_zone_set_max .
587The
588.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
589macro is provided to create this type of oid dynamically.
590.Pp
591The
592.Fn SYSCTL_UMA_CUR parent nbr name access zone descr
593macro declares a static read-only
594.Xr sysctl 9
595oid that exports the approximate current occupancy of the zone.
596The
597.Fa zone
598argument should be a pointer to
599.Vt uma_zone_t .
600A read of the oid returns value obtained through
601.Fn uma_zone_get_cur .
602The
603.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name zone descr
604macro is provided to create this type of oid dynamically.
605.Sh IMPLEMENTATION NOTES
606The memory that these allocation calls return is not executable.
607The
608.Fn uma_zalloc
609function does not support the
610.Dv M_EXEC
611flag to allocate executable memory.
612Not all platforms enforce a distinction between executable and
613non-executable memory.
614.Sh SEE ALSO
615.Xr numa 4 ,
616.Xr vmstat 8 ,
617.Xr malloc 9 ,
618.Xr smr 9
619.Rs
620.%A Jeff Bonwick
621.%T "The Slab Allocator: An Object-Caching Kernel Memory Allocator"
622.%D 1994
623.Re
624.Sh HISTORY
625The zone allocator first appeared in
626.Fx 3.0 .
627It was radically changed in
628.Fx 5.0
629to function as a slab allocator.
630.Sh AUTHORS
631.An -nosplit
632The zone allocator was written by
633.An John S. Dyson .
634The zone allocator was rewritten in large parts by
635.An Jeff Roberson Aq Mt jeff@FreeBSD.org
636to function as a slab allocator.
637.Pp
638This manual page was written by
639.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org .
640Changes for UMA by
641.An Jeroen Ruigrok van der Werven Aq Mt asmodai@FreeBSD.org .
642