xref: /freebsd/share/man/man9/zone.9 (revision ac099daf6742ead81ea7ea86351a8ef4e783041b)
1.\"-
2.\" Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
3.\" All rights reserved.
4.\"
5.\" Redistribution and use in source and binary forms, with or without
6.\" modification, are permitted provided that the following conditions
7.\" are met:
8.\" 1. Redistributions of source code must retain the above copyright
9.\"    notice, this list of conditions and the following disclaimer.
10.\" 2. Redistributions in binary form must reproduce the above copyright
11.\"    notice, this list of conditions and the following disclaimer in the
12.\"    documentation and/or other materials provided with the distribution.
13.\"
14.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24.\" SUCH DAMAGE.
25.\"
26.\" $FreeBSD$
27.\"
28.Dd April 14, 2021
29.Dt UMA 9
30.Os
31.Sh NAME
32.Nm UMA
33.Nd general-purpose kernel object allocator
34.Sh SYNOPSIS
35.In sys/param.h
36.In sys/queue.h
37.In vm/uma.h
38.Cd "options UMA_FIRSTTOUCH"
39.Cd "options UMA_XDOMAIN"
40.Bd -literal
41typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags);
42typedef void (*uma_dtor)(void *mem, int size, void *arg);
43typedef int (*uma_init)(void *mem, int size, int flags);
44typedef void (*uma_fini)(void *mem, int size);
45typedef int (*uma_import)(void *arg, void **store, int count, int domain,
46    int flags);
47typedef void (*uma_release)(void *arg, void **store, int count);
48typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain,
49    uint8_t *pflag, int wait);
50typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
51
52.Ed
53.Ft uma_zone_t
54.Fo uma_zcreate
55.Fa "char *name" "int size"
56.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
57.Fa "int align" "uint16_t flags"
58.Fc
59.Ft uma_zone_t
60.Fo uma_zcache_create
61.Fa "char *name" "int size"
62.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
63.Fa "uma_import zimport" "uma_release zrelease"
64.Fa "void *arg" "int flags"
65.Fc
66.Ft uma_zone_t
67.Fo uma_zsecond_create
68.Fa "char *name"
69.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
70.Fa "uma_zone_t master"
71.Fc
72.Ft void
73.Fn uma_zdestroy "uma_zone_t zone"
74.Ft "void *"
75.Fn uma_zalloc "uma_zone_t zone" "int flags"
76.Ft "void *"
77.Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags"
78.Ft "void *"
79.Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags"
80.Ft "void *"
81.Fn uma_zalloc_pcpu "uma_zone_t zone" "int flags"
82.Ft "void *"
83.Fn uma_zalloc_pcpu_arg "uma_zone_t zone" "void *arg" "int flags"
84.Ft void
85.Fn uma_zfree "uma_zone_t zone" "void *item"
86.Ft void
87.Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg"
88.Ft void
89.Fn uma_zfree_pcpu "uma_zone_t zone" "void *item"
90.Ft void
91.Fn uma_zfree_pcpu_arg "uma_zone_t zone" "void *item" "void *arg"
92.Ft void
93.Fn uma_prealloc "uma_zone_t zone" "int nitems"
94.Ft void
95.Fn uma_zone_reserve "uma_zone_t zone" "int nitems"
96.Ft void
97.Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems"
98.Ft void
99.Fn uma_reclaim "int req"
100.Ft void
101.Fn uma_reclaim_domain "int req" "int domain"
102.Ft void
103.Fn uma_zone_reclaim "uma_zone_t zone" "int req"
104.Ft void
105.Fn uma_zone_reclaim_domain "uma_zone_t zone" "int req" "int domain"
106.Ft void
107.Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf"
108.Ft void
109.Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef"
110.Ft int
111.Fn uma_zone_set_max "uma_zone_t zone" "int nitems"
112.Ft void
113.Fn uma_zone_set_maxcache "uma_zone_t zone" "int nitems"
114.Ft int
115.Fn uma_zone_get_max "uma_zone_t zone"
116.Ft int
117.Fn uma_zone_get_cur "uma_zone_t zone"
118.Ft void
119.Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning"
120.Ft void
121.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)"
122.Ft void
123.Fn uma_reclaim
124.In sys/sysctl.h
125.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
126.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
127.Fn SYSCTL_UMA_CUR parent nbr name access zone descr
128.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr
129.Sh DESCRIPTION
130UMA (Universal Memory Allocator) provides an efficient interface for managing
131dynamically-sized collections of items of identical size, referred to as zones.
132Zones keep track of which items are in use and which
133are not, and UMA provides functions for allocating items from a zone and
134for releasing them back, making them available for subsequent allocation requests.
135Zones maintain per-CPU caches with linear scalability on SMP
136systems as well as round-robin and first-touch policies for NUMA
137systems.
138The number of items cached per CPU is bounded, and each zone additionally
139maintains an unbounded cache of items that is used to quickly satisfy
140per-CPU cache allocation misses.
141.Pp
142Two types of zones exist: regular zones and cache zones.
143In a regular zone, items are allocated from a slab, which is one or more
144virtually contiguous memory pages that have been allocated from the kernel's
145page allocator.
146Internally, slabs are managed by a UMA keg, which is responsible for allocating
147slabs and keeping track of their usage by one or more zones.
148In typical usage, there is one keg per zone, so slabs are not shared among
149multiple zones.
150.Pp
151Normal zones import items from a keg, and release items back to that keg if
152requested.
153Cache zones do not have a keg, and instead use custom import and release
154methods.
155For example, some collections of kernel objects are statically allocated
156at boot-time, and the size of the collection does not change.
157A cache zone can be used to implement an efficient allocator for the objects in
158such a collection.
159.Pp
160The
161.Fn uma_zcreate
162and
163.Fn uma_zcache_create
164functions create a new regular zone and cache zone, respectively.
165The
166.Fn uma_zsecond_create
167function creates a regular zone which shares the keg of the zone
168specified by the
169.Fa master
170argument.
171The
172.Fa name
173argument is a text name of the zone for debugging and stats; this memory
174should not be freed until the zone has been deallocated.
175.Pp
176The
177.Fa ctor
178and
179.Fa dtor
180arguments are callback functions that are called by
181the UMA subsystem at the time of the call to
182.Fn uma_zalloc
183and
184.Fn uma_zfree
185respectively.
186Their purpose is to provide hooks for initializing or
187destroying things that need to be done at the time of the allocation
188or release of a resource.
189A good usage for the
190.Fa ctor
191and
192.Fa dtor
193callbacks might be to initialize a data structure embedded in the item,
194such as a
195.Xr queue 3
196head.
197.Pp
198The
199.Fa zinit
200and
201.Fa zfini
202arguments are used to optimize the allocation of items from the zone.
203They are called by the UMA subsystem whenever
204it needs to allocate or free items to satisfy requests or memory pressure.
205A good use for the
206.Fa zinit
207and
208.Fa zfini
209callbacks might be to
210initialize and destroy a mutex contained within an item.
211This would allow one to avoid destroying and re-initializing the mutex
212each time the item is freed and re-allocated.
213They are not called on each call to
214.Fn uma_zalloc
215and
216.Fn uma_zfree
217but rather when an item is imported into a zone's cache, and when a zone
218releases an item to the slab allocator, typically as a response to memory
219pressure.
220.Pp
221For
222.Fn uma_zcache_create ,
223the
224.Fa zimport
225and
226.Fa zrelease
227functions are called to import items into the zone and to release items
228from the zone, respectively.
229The
230.Fa zimport
231function should store pointers to items in the
232.Fa store
233array, which contains a maximum of
234.Fa count
235entries.
236The function must return the number of imported items, which may be less than
237the maximum.
238Similarly, the
239.Fa store
240parameter to the
241.Fa zrelease
242function contains an array of
243.Fa count
244pointers to items.
245The
246.Fa arg
247parameter passed to
248.Fn uma_zcache_create
249is provided to the import and release functions.
250The
251.Fa domain
252parameter to
253.Fa zimport
254specifies the requested
255.Xr numa 4
256domain for the allocation.
257It is either a NUMA domain number or the special value
258.Dv UMA_ANYDOMAIN .
259.Pp
260The
261.Fa flags
262argument of
263.Fn uma_zcreate
264and
265.Fn uma_zcache_create
266is a subset of the following flags:
267.Bl -tag -width "foo"
268.It Dv UMA_ZONE_NOFREE
269Slabs allocated to the zone's keg are never freed.
270.It Dv UMA_ZONE_NODUMP
271Pages belonging to the zone will not be included in minidumps.
272.It Dv UMA_ZONE_PCPU
273An allocation from zone would have
274.Va mp_ncpu
275shadow copies, that are privately assigned to CPUs.
276A CPU can address its private copy using base the allocation address plus
277a multiple of the current CPU ID and
278.Fn sizeof "struct pcpu" :
279.Bd -literal -offset indent
280foo_zone = uma_zcreate(..., UMA_ZONE_PCPU);
281 ...
282foo_base = uma_zalloc(foo_zone, ...);
283 ...
284critical_enter();
285foo_pcpu = (foo_t *)zpcpu_get(foo_base);
286/* do something with foo_pcpu */
287critical_exit();
288
289.Ed
290Note that
291.Dv M_ZERO
292cannot be used when allocating items from a PCPU zone.
293To obtain zeroed memory from a PCPU zone, use the
294.Fn uma_zalloc_pcpu
295function and its variants instead, and pass
296.Dv M_ZERO .
297.It Dv UMA_ZONE_NOTOUCH
298The UMA subsystem may not directly touch (i.e. read or write) the slab memory.
299Otherwise, by default, book-keeping of items within a slab may be done in the
300slab page itself, and
301.Dv INVARIANTS
302kernels may also do use-after-free checking by accessing the slab memory.
303.It Dv UMA_ZONE_ZINIT
304The zone will have its
305.Ft uma_init
306method set to internal method that initializes a new allocated slab
307to all zeros.
308Do not mistake
309.Ft uma_init
310method with
311.Ft uma_ctor .
312A zone with
313.Dv UMA_ZONE_ZINIT
314flag would not return zeroed memory on every
315.Fn uma_zalloc .
316.It Dv UMA_ZONE_NOTPAGE
317An allocator function will be supplied with
318.Fn uma_zone_set_allocf
319and the memory that it returns may not be kernel virtual memory backed by VM
320pages in the page array.
321.It Dv UMA_ZONE_MALLOC
322The zone is for the
323.Xr malloc 9
324subsystem.
325.It Dv UMA_ZONE_VM
326The zone is for the VM subsystem.
327.It Dv UMA_ZONE_NUMA
328The zone should use a first-touch NUMA policy rather than the round-robin
329default.
330If the
331.Dv UMA_FIRSTTOUCH
332kernel option is configured, all zones implicitly use a first-touch policy,
333and the
334.Dv UMA_ZONE_NUMA
335flag has no effect.
336The
337.Dv UMA_XDOMAIN
338kernel option, when configured, causes UMA to do the extra tracking to ensure
339that allocations from first-touch zones are always local.
340Otherwise, consumers that do not free memory on the same domain from which it
341was allocated will cause mixing in per-CPU caches.
342See
343.Xr numa 4
344for more details.
345.It Dv UMA_ZONE_CONTIG
346Items in this zone must be contiguous in physical address space.
347Items will follow normal alignment constraints and may span page boundaries
348between pages with contiguous physical addresses.
349.El
350.Pp
351Zones can be destroyed using
352.Fn uma_zdestroy ,
353freeing all memory that is cached in the zone.
354All items allocated from the zone must be freed to the zone before the zone
355may be safely destroyed.
356.Pp
357To allocate an item from a zone, simply call
358.Fn uma_zalloc
359with a pointer to that zone and set the
360.Fa flags
361argument to selected flags as documented in
362.Xr malloc 9 .
363It will return a pointer to an item if successful, or
364.Dv NULL
365in the rare case where all items in the zone are in use and the
366allocator is unable to grow the zone and
367.Dv M_NOWAIT
368is specified.
369.Pp
370Items are released back to the zone from which they were allocated by
371calling
372.Fn uma_zfree
373with a pointer to the zone and a pointer to the item.
374If
375.Fa item
376is
377.Dv NULL ,
378then
379.Fn uma_zfree
380does nothing.
381.Pp
382The variants
383.Fn uma_zalloc_arg
384and
385.Fn uma_zfree_arg
386allow callers to
387specify an argument for the
388.Dv ctor
389and
390.Dv dtor
391functions of the zone, respectively.
392The variants
393.Fn uma_zalloc_pcpu
394and
395.Fn uma_zfree_pcpu
396allocate and free
397.Va mp_ncpu
398shadow copies as described for
399.Dv UMA_ZONE_PCPU .
400If
401.Fa item
402is
403.Dv NULL ,
404then
405.Fn uma_zfree_pcpu
406does nothing.
407.Pp
408The
409.Fn uma_zalloc_domain
410function allows callers to specify a fixed
411.Xr numa 4
412domain to allocate from.
413This uses a guaranteed but slow path in the allocator which reduces
414concurrency.
415.Pp
416The
417.Fn uma_prealloc
418function allocates slabs for the requested number of items, typically following
419the initial creation of a zone.
420Subsequent allocations from the zone will be satisfied using the pre-allocated
421slabs.
422Note that slab allocation is performed with the
423.Dv M_WAITOK
424flag, so
425.Fn uma_prealloc
426may sleep.
427.Pp
428The
429.Fn uma_zone_reserve
430function sets the number of reserved items for the zone.
431.Fn uma_zalloc
432and variants will ensure that the zone contains at least the reserved number
433of free items.
434Reserved items may be allocated by specifying
435.Dv M_USE_RESERVE
436in the allocation request flags.
437.Fn uma_zone_reserve
438does not perform any pre-allocation by itself.
439.Pp
440The
441.Fn uma_zone_reserve_kva
442function pre-allocates kernel virtual address space for the requested
443number of items.
444Subsequent allocations from the zone will be satisfied using the pre-allocated
445address space.
446Note that unlike
447.Fn uma_zone_reserve ,
448.Fn uma_zone_reserve_kva
449does not restrict the use of the pre-allocation to
450.Dv M_USE_RESERVE
451requests.
452.Pp
453The
454.Fn uma_reclaim
455and
456.Fn uma_zone_reclaim
457functions reclaim cached items from UMA zones, releasing unused memory.
458The
459.Fn uma_reclaim
460function reclaims items from all regular zones, while
461.Fn uma_zone_reclaim
462reclaims items only from the specified zone.
463The
464.Fa req
465parameter must be one of three values which specify how aggressively
466items are to be reclaimed:
467.Bl -tag -width indent
468.It Dv UMA_RECLAIM_TRIM
469Reclaim items only in excess of the zone's estimated working set size.
470The working set size is periodically updated and tracks the recent history
471of the zone's usage.
472.It Dv UMA_RECLAIM_DRAIN
473Reclaim all items from the unbounded cache.
474Free items in the per-CPU caches are left alone.
475.It Dv UMA_RECLAIM_DRAIN_CPU
476Reclaim all cached items.
477.El
478The
479.Fn uma_reclaim_domain
480and
481.Fn uma_zone_reclaim_domain
482functions apply only to items allocated from the specified domain.
483In the case of domains using a round-robin NUMA policy, cached items from all
484domains are freed to the keg, but only slabs from the specific domain will
485be freed.
486.Pp
487The
488.Fn uma_zone_set_allocf
489and
490.Fn uma_zone_set_freef
491functions allow a zone's default slab allocation and free functions to be
492overridden.
493This is useful if memory with special constraints such as attributes,
494alignment, or address ranges must be used.
495.Pp
496The
497.Fn uma_zone_set_max
498function limits the number of items
499.Pq and therefore memory
500that can be allocated to
501.Fa zone .
502The
503.Fa nitems
504argument specifies the requested upper limit number of items.
505The effective limit is returned to the caller, as it may end up being higher
506than requested due to the implementation rounding up to ensure all memory pages
507allocated to the zone are utilised to capacity.
508The limit applies to the total number of items in the zone, which includes
509allocated items, free items and free items in the per-cpu caches.
510On systems with more than one CPU it may not be possible to allocate
511the specified number of items even when there is no shortage of memory,
512because all of the remaining free items may be in the caches of the
513other CPUs when the limit is hit.
514.Pp
515The
516.Fn uma_zone_set_maxcache
517function limits the number of free items which may be cached in the zone.
518This limit applies to both the per-CPU caches and the cache of free buckets.
519.Pp
520The
521.Fn uma_zone_get_max
522function returns the effective upper limit number of items for a zone.
523.Pp
524The
525.Fn uma_zone_get_cur
526function returns an approximation of the number of items currently allocated
527from the zone.
528The returned value is approximate because appropriate synchronisation to
529determine an exact value is not performed by the implementation.
530This ensures low overhead at the expense of potentially stale data being used
531in the calculation.
532.Pp
533The
534.Fn uma_zone_set_warning
535function sets a warning that will be printed on the system console when the
536given zone becomes full and fails to allocate an item.
537The warning will be printed no more often than every five minutes.
538Warnings can be turned off globally by setting the
539.Va vm.zone_warnings
540sysctl tunable to
541.Va 0 .
542.Pp
543The
544.Fn uma_zone_set_maxaction
545function sets a function that will be called when the given zone becomes full
546and fails to allocate an item.
547The function will be called with the zone locked.
548Also, the function
549that called the allocation function may have held additional locks.
550Therefore,
551this function should do very little work (similar to a signal handler).
552.Pp
553The
554.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
555macro declares a static
556.Xr sysctl 9
557oid that exports the effective upper limit number of items for a zone.
558The
559.Fa zone
560argument should be a pointer to
561.Vt uma_zone_t .
562A read of the oid returns value obtained through
563.Fn uma_zone_get_max .
564A write to the oid sets new value via
565.Fn uma_zone_set_max .
566The
567.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
568macro is provided to create this type of oid dynamically.
569.Pp
570The
571.Fn SYSCTL_UMA_CUR parent nbr name access zone descr
572macro declares a static read-only
573.Xr sysctl 9
574oid that exports the approximate current occupancy of the zone.
575The
576.Fa zone
577argument should be a pointer to
578.Vt uma_zone_t .
579A read of the oid returns value obtained through
580.Fn uma_zone_get_cur .
581The
582.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name zone descr
583macro is provided to create this type of oid dynamically.
584.Sh IMPLEMENTATION NOTES
585The memory that these allocation calls return is not executable.
586The
587.Fn uma_zalloc
588function does not support the
589.Dv M_EXEC
590flag to allocate executable memory.
591Not all platforms enforce a distinction between executable and
592non-executable memory.
593.Sh SEE ALSO
594.Xr numa 4 ,
595.Xr vmstat 8 ,
596.Xr malloc 9
597.Rs
598.%A Jeff Bonwick
599.%T "The Slab Allocator: An Object-Caching Kernel Memory Allocator"
600.%D 1994
601.Re
602.Sh HISTORY
603The zone allocator first appeared in
604.Fx 3.0 .
605It was radically changed in
606.Fx 5.0
607to function as a slab allocator.
608.Sh AUTHORS
609.An -nosplit
610The zone allocator was written by
611.An John S. Dyson .
612The zone allocator was rewritten in large parts by
613.An Jeff Roberson Aq Mt jeff@FreeBSD.org
614to function as a slab allocator.
615.Pp
616This manual page was written by
617.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org .
618Changes for UMA by
619.An Jeroen Ruigrok van der Werven Aq Mt asmodai@FreeBSD.org .
620