xref: /freebsd/share/man/man9/zone.9 (revision 5bb3134a8c21cb87b30e135ef168483f0333dabb)
1.\"-
2.\" Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
3.\" All rights reserved.
4.\"
5.\" Redistribution and use in source and binary forms, with or without
6.\" modification, are permitted provided that the following conditions
7.\" are met:
8.\" 1. Redistributions of source code must retain the above copyright
9.\"    notice, this list of conditions and the following disclaimer.
10.\" 2. Redistributions in binary form must reproduce the above copyright
11.\"    notice, this list of conditions and the following disclaimer in the
12.\"    documentation and/or other materials provided with the distribution.
13.\"
14.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24.\" SUCH DAMAGE.
25.\"
26.\" $FreeBSD$
27.\"
28.Dd April 14, 2021
29.Dt UMA 9
30.Os
31.Sh NAME
32.Nm UMA
33.Nd general-purpose kernel object allocator
34.Sh SYNOPSIS
35.In sys/param.h
36.In sys/queue.h
37.In vm/uma.h
38.Cd "options UMA_FIRSTTOUCH"
39.Cd "options UMA_XDOMAIN"
40.Bd -literal
41typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags);
42typedef void (*uma_dtor)(void *mem, int size, void *arg);
43typedef int (*uma_init)(void *mem, int size, int flags);
44typedef void (*uma_fini)(void *mem, int size);
45typedef int (*uma_import)(void *arg, void **store, int count, int domain,
46    int flags);
47typedef void (*uma_release)(void *arg, void **store, int count);
48typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain,
49    uint8_t *pflag, int wait);
50typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
51
52.Ed
53.Ft uma_zone_t
54.Fo uma_zcreate
55.Fa "char *name" "int size"
56.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
57.Fa "int align" "uint16_t flags"
58.Fc
59.Ft uma_zone_t
60.Fo uma_zcache_create
61.Fa "char *name" "int size"
62.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
63.Fa "uma_import zimport" "uma_release zrelease"
64.Fa "void *arg" "int flags"
65.Fc
66.Ft uma_zone_t
67.Fo uma_zsecond_create
68.Fa "char *name"
69.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
70.Fa "uma_zone_t master"
71.Fc
72.Ft void
73.Fn uma_zdestroy "uma_zone_t zone"
74.Ft "void *"
75.Fn uma_zalloc "uma_zone_t zone" "int flags"
76.Ft "void *"
77.Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags"
78.Ft "void *"
79.Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags"
80.Ft "void *"
81.Fn uma_zalloc_pcpu "uma_zone_t zone" "int flags"
82.Ft "void *"
83.Fn uma_zalloc_pcpu_arg "uma_zone_t zone" "void *arg" "int flags"
84.Ft void
85.Fn uma_zfree "uma_zone_t zone" "void *item"
86.Ft void
87.Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg"
88.Ft void
89.Fn uma_zfree_pcpu "uma_zone_t zone" "void *item"
90.Ft void
91.Fn uma_zfree_pcpu_arg "uma_zone_t zone" "void *item" "void *arg"
92.Ft void
93.Fn uma_prealloc "uma_zone_t zone" "int nitems"
94.Ft void
95.Fn uma_zone_reserve "uma_zone_t zone" "int nitems"
96.Ft void
97.Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems"
98.Ft void
99.Fn uma_reclaim "int req"
100.Ft void
101.Fn uma_reclaim_domain "int req" "int domain"
102.Ft void
103.Fn uma_zone_reclaim "uma_zone_t zone" "int req"
104.Ft void
105.Fn uma_zone_reclaim_domain "uma_zone_t zone" "int req" "int domain"
106.Ft void
107.Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf"
108.Ft void
109.Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef"
110.Ft int
111.Fn uma_zone_set_max "uma_zone_t zone" "int nitems"
112.Ft void
113.Fn uma_zone_set_maxcache "uma_zone_t zone" "int nitems"
114.Ft int
115.Fn uma_zone_get_max "uma_zone_t zone"
116.Ft int
117.Fn uma_zone_get_cur "uma_zone_t zone"
118.Ft void
119.Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning"
120.Ft void
121.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)"
122.In sys/sysctl.h
123.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
124.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
125.Fn SYSCTL_UMA_CUR parent nbr name access zone descr
126.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr
127.Sh DESCRIPTION
128UMA (Universal Memory Allocator) provides an efficient interface for managing
129dynamically-sized collections of items of identical size, referred to as zones.
130Zones keep track of which items are in use and which
131are not, and UMA provides functions for allocating items from a zone and
132for releasing them back, making them available for subsequent allocation requests.
133Zones maintain per-CPU caches with linear scalability on SMP
134systems as well as round-robin and first-touch policies for NUMA
135systems.
136The number of items cached per CPU is bounded, and each zone additionally
137maintains an unbounded cache of items that is used to quickly satisfy
138per-CPU cache allocation misses.
139.Pp
140Two types of zones exist: regular zones and cache zones.
141In a regular zone, items are allocated from a slab, which is one or more
142virtually contiguous memory pages that have been allocated from the kernel's
143page allocator.
144Internally, slabs are managed by a UMA keg, which is responsible for allocating
145slabs and keeping track of their usage by one or more zones.
146In typical usage, there is one keg per zone, so slabs are not shared among
147multiple zones.
148.Pp
149Normal zones import items from a keg, and release items back to that keg if
150requested.
151Cache zones do not have a keg, and instead use custom import and release
152methods.
153For example, some collections of kernel objects are statically allocated
154at boot-time, and the size of the collection does not change.
155A cache zone can be used to implement an efficient allocator for the objects in
156such a collection.
157.Pp
158The
159.Fn uma_zcreate
160and
161.Fn uma_zcache_create
162functions create a new regular zone and cache zone, respectively.
163The
164.Fn uma_zsecond_create
165function creates a regular zone which shares the keg of the zone
166specified by the
167.Fa master
168argument.
169The
170.Fa name
171argument is a text name of the zone for debugging and stats; this memory
172should not be freed until the zone has been deallocated.
173.Pp
174The
175.Fa ctor
176and
177.Fa dtor
178arguments are callback functions that are called by
179the UMA subsystem at the time of the call to
180.Fn uma_zalloc
181and
182.Fn uma_zfree
183respectively.
184Their purpose is to provide hooks for initializing or
185destroying things that need to be done at the time of the allocation
186or release of a resource.
187A good usage for the
188.Fa ctor
189and
190.Fa dtor
191callbacks might be to initialize a data structure embedded in the item,
192such as a
193.Xr queue 3
194head.
195.Pp
196The
197.Fa zinit
198and
199.Fa zfini
200arguments are used to optimize the allocation of items from the zone.
201They are called by the UMA subsystem whenever
202it needs to allocate or free items to satisfy requests or memory pressure.
203A good use for the
204.Fa zinit
205and
206.Fa zfini
207callbacks might be to
208initialize and destroy a mutex contained within an item.
209This would allow one to avoid destroying and re-initializing the mutex
210each time the item is freed and re-allocated.
211They are not called on each call to
212.Fn uma_zalloc
213and
214.Fn uma_zfree
215but rather when an item is imported into a zone's cache, and when a zone
216releases an item to the slab allocator, typically as a response to memory
217pressure.
218.Pp
219For
220.Fn uma_zcache_create ,
221the
222.Fa zimport
223and
224.Fa zrelease
225functions are called to import items into the zone and to release items
226from the zone, respectively.
227The
228.Fa zimport
229function should store pointers to items in the
230.Fa store
231array, which contains a maximum of
232.Fa count
233entries.
234The function must return the number of imported items, which may be less than
235the maximum.
236Similarly, the
237.Fa store
238parameter to the
239.Fa zrelease
240function contains an array of
241.Fa count
242pointers to items.
243The
244.Fa arg
245parameter passed to
246.Fn uma_zcache_create
247is provided to the import and release functions.
248The
249.Fa domain
250parameter to
251.Fa zimport
252specifies the requested
253.Xr numa 4
254domain for the allocation.
255It is either a NUMA domain number or the special value
256.Dv UMA_ANYDOMAIN .
257.Pp
258The
259.Fa flags
260argument of
261.Fn uma_zcreate
262and
263.Fn uma_zcache_create
264is a subset of the following flags:
265.Bl -tag -width "foo"
266.It Dv UMA_ZONE_NOFREE
267Slabs allocated to the zone's keg are never freed.
268.It Dv UMA_ZONE_NODUMP
269Pages belonging to the zone will not be included in minidumps.
270.It Dv UMA_ZONE_PCPU
271An allocation from zone would have
272.Va mp_ncpu
273shadow copies, that are privately assigned to CPUs.
274A CPU can address its private copy using base the allocation address plus
275a multiple of the current CPU ID and
276.Fn sizeof "struct pcpu" :
277.Bd -literal -offset indent
278foo_zone = uma_zcreate(..., UMA_ZONE_PCPU);
279 ...
280foo_base = uma_zalloc(foo_zone, ...);
281 ...
282critical_enter();
283foo_pcpu = (foo_t *)zpcpu_get(foo_base);
284/* do something with foo_pcpu */
285critical_exit();
286
287.Ed
288Note that
289.Dv M_ZERO
290cannot be used when allocating items from a PCPU zone.
291To obtain zeroed memory from a PCPU zone, use the
292.Fn uma_zalloc_pcpu
293function and its variants instead, and pass
294.Dv M_ZERO .
295.It Dv UMA_ZONE_NOTOUCH
296The UMA subsystem may not directly touch (i.e. read or write) the slab memory.
297Otherwise, by default, book-keeping of items within a slab may be done in the
298slab page itself, and
299.Dv INVARIANTS
300kernels may also do use-after-free checking by accessing the slab memory.
301.It Dv UMA_ZONE_ZINIT
302The zone will have its
303.Ft uma_init
304method set to internal method that initializes a new allocated slab
305to all zeros.
306Do not mistake
307.Ft uma_init
308method with
309.Ft uma_ctor .
310A zone with
311.Dv UMA_ZONE_ZINIT
312flag would not return zeroed memory on every
313.Fn uma_zalloc .
314.It Dv UMA_ZONE_NOTPAGE
315An allocator function will be supplied with
316.Fn uma_zone_set_allocf
317and the memory that it returns may not be kernel virtual memory backed by VM
318pages in the page array.
319.It Dv UMA_ZONE_MALLOC
320The zone is for the
321.Xr malloc 9
322subsystem.
323.It Dv UMA_ZONE_VM
324The zone is for the VM subsystem.
325.It Dv UMA_ZONE_NUMA
326The zone should use a first-touch NUMA policy rather than the round-robin
327default.
328If the
329.Dv UMA_FIRSTTOUCH
330kernel option is configured, all zones implicitly use a first-touch policy,
331and the
332.Dv UMA_ZONE_NUMA
333flag has no effect.
334The
335.Dv UMA_XDOMAIN
336kernel option, when configured, causes UMA to do the extra tracking to ensure
337that allocations from first-touch zones are always local.
338Otherwise, consumers that do not free memory on the same domain from which it
339was allocated will cause mixing in per-CPU caches.
340See
341.Xr numa 4
342for more details.
343.It Dv UMA_ZONE_CONTIG
344Items in this zone must be contiguous in physical address space.
345Items will follow normal alignment constraints and may span page boundaries
346between pages with contiguous physical addresses.
347.El
348.Pp
349Zones can be destroyed using
350.Fn uma_zdestroy ,
351freeing all memory that is cached in the zone.
352All items allocated from the zone must be freed to the zone before the zone
353may be safely destroyed.
354.Pp
355To allocate an item from a zone, simply call
356.Fn uma_zalloc
357with a pointer to that zone and set the
358.Fa flags
359argument to selected flags as documented in
360.Xr malloc 9 .
361It will return a pointer to an item if successful, or
362.Dv NULL
363in the rare case where all items in the zone are in use and the
364allocator is unable to grow the zone and
365.Dv M_NOWAIT
366is specified.
367.Pp
368Items are released back to the zone from which they were allocated by
369calling
370.Fn uma_zfree
371with a pointer to the zone and a pointer to the item.
372If
373.Fa item
374is
375.Dv NULL ,
376then
377.Fn uma_zfree
378does nothing.
379.Pp
380The variants
381.Fn uma_zalloc_arg
382and
383.Fn uma_zfree_arg
384allow callers to
385specify an argument for the
386.Dv ctor
387and
388.Dv dtor
389functions of the zone, respectively.
390The variants
391.Fn uma_zalloc_pcpu
392and
393.Fn uma_zfree_pcpu
394allocate and free
395.Va mp_ncpu
396shadow copies as described for
397.Dv UMA_ZONE_PCPU .
398If
399.Fa item
400is
401.Dv NULL ,
402then
403.Fn uma_zfree_pcpu
404does nothing.
405.Pp
406The
407.Fn uma_zalloc_domain
408function allows callers to specify a fixed
409.Xr numa 4
410domain to allocate from.
411This uses a guaranteed but slow path in the allocator which reduces
412concurrency.
413.Pp
414The
415.Fn uma_prealloc
416function allocates slabs for the requested number of items, typically following
417the initial creation of a zone.
418Subsequent allocations from the zone will be satisfied using the pre-allocated
419slabs.
420Note that slab allocation is performed with the
421.Dv M_WAITOK
422flag, so
423.Fn uma_prealloc
424may sleep.
425.Pp
426The
427.Fn uma_zone_reserve
428function sets the number of reserved items for the zone.
429.Fn uma_zalloc
430and variants will ensure that the zone contains at least the reserved number
431of free items.
432Reserved items may be allocated by specifying
433.Dv M_USE_RESERVE
434in the allocation request flags.
435.Fn uma_zone_reserve
436does not perform any pre-allocation by itself.
437.Pp
438The
439.Fn uma_zone_reserve_kva
440function pre-allocates kernel virtual address space for the requested
441number of items.
442Subsequent allocations from the zone will be satisfied using the pre-allocated
443address space.
444Note that unlike
445.Fn uma_zone_reserve ,
446.Fn uma_zone_reserve_kva
447does not restrict the use of the pre-allocation to
448.Dv M_USE_RESERVE
449requests.
450.Pp
451The
452.Fn uma_reclaim
453and
454.Fn uma_zone_reclaim
455functions reclaim cached items from UMA zones, releasing unused memory.
456The
457.Fn uma_reclaim
458function reclaims items from all regular zones, while
459.Fn uma_zone_reclaim
460reclaims items only from the specified zone.
461The
462.Fa req
463parameter must be one of three values which specify how aggressively
464items are to be reclaimed:
465.Bl -tag -width indent
466.It Dv UMA_RECLAIM_TRIM
467Reclaim items only in excess of the zone's estimated working set size.
468The working set size is periodically updated and tracks the recent history
469of the zone's usage.
470.It Dv UMA_RECLAIM_DRAIN
471Reclaim all items from the unbounded cache.
472Free items in the per-CPU caches are left alone.
473.It Dv UMA_RECLAIM_DRAIN_CPU
474Reclaim all cached items.
475.El
476The
477.Fn uma_reclaim_domain
478and
479.Fn uma_zone_reclaim_domain
480functions apply only to items allocated from the specified domain.
481In the case of domains using a round-robin NUMA policy, cached items from all
482domains are freed to the keg, but only slabs from the specific domain will
483be freed.
484.Pp
485The
486.Fn uma_zone_set_allocf
487and
488.Fn uma_zone_set_freef
489functions allow a zone's default slab allocation and free functions to be
490overridden.
491This is useful if memory with special constraints such as attributes,
492alignment, or address ranges must be used.
493.Pp
494The
495.Fn uma_zone_set_max
496function limits the number of items
497.Pq and therefore memory
498that can be allocated to
499.Fa zone .
500The
501.Fa nitems
502argument specifies the requested upper limit number of items.
503The effective limit is returned to the caller, as it may end up being higher
504than requested due to the implementation rounding up to ensure all memory pages
505allocated to the zone are utilised to capacity.
506The limit applies to the total number of items in the zone, which includes
507allocated items, free items and free items in the per-cpu caches.
508On systems with more than one CPU it may not be possible to allocate
509the specified number of items even when there is no shortage of memory,
510because all of the remaining free items may be in the caches of the
511other CPUs when the limit is hit.
512.Pp
513The
514.Fn uma_zone_set_maxcache
515function limits the number of free items which may be cached in the zone.
516This limit applies to both the per-CPU caches and the cache of free buckets.
517.Pp
518The
519.Fn uma_zone_get_max
520function returns the effective upper limit number of items for a zone.
521.Pp
522The
523.Fn uma_zone_get_cur
524function returns an approximation of the number of items currently allocated
525from the zone.
526The returned value is approximate because appropriate synchronisation to
527determine an exact value is not performed by the implementation.
528This ensures low overhead at the expense of potentially stale data being used
529in the calculation.
530.Pp
531The
532.Fn uma_zone_set_warning
533function sets a warning that will be printed on the system console when the
534given zone becomes full and fails to allocate an item.
535The warning will be printed no more often than every five minutes.
536Warnings can be turned off globally by setting the
537.Va vm.zone_warnings
538sysctl tunable to
539.Va 0 .
540.Pp
541The
542.Fn uma_zone_set_maxaction
543function sets a function that will be called when the given zone becomes full
544and fails to allocate an item.
545The function will be called with the zone locked.
546Also, the function
547that called the allocation function may have held additional locks.
548Therefore,
549this function should do very little work (similar to a signal handler).
550.Pp
551The
552.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
553macro declares a static
554.Xr sysctl 9
555oid that exports the effective upper limit number of items for a zone.
556The
557.Fa zone
558argument should be a pointer to
559.Vt uma_zone_t .
560A read of the oid returns value obtained through
561.Fn uma_zone_get_max .
562A write to the oid sets new value via
563.Fn uma_zone_set_max .
564The
565.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
566macro is provided to create this type of oid dynamically.
567.Pp
568The
569.Fn SYSCTL_UMA_CUR parent nbr name access zone descr
570macro declares a static read-only
571.Xr sysctl 9
572oid that exports the approximate current occupancy of the zone.
573The
574.Fa zone
575argument should be a pointer to
576.Vt uma_zone_t .
577A read of the oid returns value obtained through
578.Fn uma_zone_get_cur .
579The
580.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name zone descr
581macro is provided to create this type of oid dynamically.
582.Sh IMPLEMENTATION NOTES
583The memory that these allocation calls return is not executable.
584The
585.Fn uma_zalloc
586function does not support the
587.Dv M_EXEC
588flag to allocate executable memory.
589Not all platforms enforce a distinction between executable and
590non-executable memory.
591.Sh SEE ALSO
592.Xr numa 4 ,
593.Xr vmstat 8 ,
594.Xr malloc 9
595.Rs
596.%A Jeff Bonwick
597.%T "The Slab Allocator: An Object-Caching Kernel Memory Allocator"
598.%D 1994
599.Re
600.Sh HISTORY
601The zone allocator first appeared in
602.Fx 3.0 .
603It was radically changed in
604.Fx 5.0
605to function as a slab allocator.
606.Sh AUTHORS
607.An -nosplit
608The zone allocator was written by
609.An John S. Dyson .
610The zone allocator was rewritten in large parts by
611.An Jeff Roberson Aq Mt jeff@FreeBSD.org
612to function as a slab allocator.
613.Pp
614This manual page was written by
615.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org .
616Changes for UMA by
617.An Jeroen Ruigrok van der Werven Aq Mt asmodai@FreeBSD.org .
618