xref: /freebsd/share/man/man9/zone.9 (revision 058ac3e8063366dafa634d9107642e12b038bf09)
1.\"-
2.\" Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
3.\" All rights reserved.
4.\"
5.\" Redistribution and use in source and binary forms, with or without
6.\" modification, are permitted provided that the following conditions
7.\" are met:
8.\" 1. Redistributions of source code must retain the above copyright
9.\"    notice, this list of conditions and the following disclaimer.
10.\" 2. Redistributions in binary form must reproduce the above copyright
11.\"    notice, this list of conditions and the following disclaimer in the
12.\"    documentation and/or other materials provided with the distribution.
13.\"
14.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24.\" SUCH DAMAGE.
25.\"
26.\" $FreeBSD$
27.\"
28.Dd February 15, 2022
29.Dt UMA 9
30.Os
31.Sh NAME
32.Nm UMA
33.Nd general-purpose kernel object allocator
34.Sh SYNOPSIS
35.In sys/param.h
36.In sys/queue.h
37.In vm/uma.h
38.Bd -literal
39typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags);
40typedef void (*uma_dtor)(void *mem, int size, void *arg);
41typedef int (*uma_init)(void *mem, int size, int flags);
42typedef void (*uma_fini)(void *mem, int size);
43typedef int (*uma_import)(void *arg, void **store, int count, int domain,
44    int flags);
45typedef void (*uma_release)(void *arg, void **store, int count);
46typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain,
47    uint8_t *pflag, int wait);
48typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
49
50.Ed
51.Ft uma_zone_t
52.Fo uma_zcreate
53.Fa "char *name" "size_t size"
54.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
55.Fa "int align" "uint16_t flags"
56.Fc
57.Ft uma_zone_t
58.Fo uma_zcache_create
59.Fa "char *name" "int size"
60.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
61.Fa "uma_import zimport" "uma_release zrelease"
62.Fa "void *arg" "int flags"
63.Fc
64.Ft uma_zone_t
65.Fo uma_zsecond_create
66.Fa "char *name"
67.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
68.Fa "uma_zone_t master"
69.Fc
70.Ft void
71.Fn uma_zdestroy "uma_zone_t zone"
72.Ft "void *"
73.Fn uma_zalloc "uma_zone_t zone" "int flags"
74.Ft "void *"
75.Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags"
76.Ft "void *"
77.Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags"
78.Ft "void *"
79.Fn uma_zalloc_pcpu "uma_zone_t zone" "int flags"
80.Ft "void *"
81.Fn uma_zalloc_pcpu_arg "uma_zone_t zone" "void *arg" "int flags"
82.Ft void
83.Fn uma_zfree "uma_zone_t zone" "void *item"
84.Ft void
85.Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg"
86.Ft void
87.Fn uma_zfree_pcpu "uma_zone_t zone" "void *item"
88.Ft void
89.Fn uma_zfree_pcpu_arg "uma_zone_t zone" "void *item" "void *arg"
90.Ft void
91.Fn uma_prealloc "uma_zone_t zone" "int nitems"
92.Ft void
93.Fn uma_zone_reserve "uma_zone_t zone" "int nitems"
94.Ft void
95.Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems"
96.Ft void
97.Fn uma_reclaim "int req"
98.Ft void
99.Fn uma_reclaim_domain "int req" "int domain"
100.Ft void
101.Fn uma_zone_reclaim "uma_zone_t zone" "int req"
102.Ft void
103.Fn uma_zone_reclaim_domain "uma_zone_t zone" "int req" "int domain"
104.Ft void
105.Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf"
106.Ft void
107.Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef"
108.Ft int
109.Fn uma_zone_set_max "uma_zone_t zone" "int nitems"
110.Ft void
111.Fn uma_zone_set_maxcache "uma_zone_t zone" "int nitems"
112.Ft int
113.Fn uma_zone_get_max "uma_zone_t zone"
114.Ft int
115.Fn uma_zone_get_cur "uma_zone_t zone"
116.Ft void
117.Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning"
118.Ft void
119.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)"
120.In sys/sysctl.h
121.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
122.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
123.Fn SYSCTL_UMA_CUR parent nbr name access zone descr
124.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr
125.Sh DESCRIPTION
126UMA (Universal Memory Allocator) provides an efficient interface for managing
127dynamically-sized collections of items of identical size, referred to as zones.
128Zones keep track of which items are in use and which
129are not, and UMA provides functions for allocating items from a zone and
130for releasing them back, making them available for subsequent allocation requests.
131Zones maintain per-CPU caches with linear scalability on SMP
132systems as well as round-robin and first-touch policies for NUMA
133systems.
134The number of items cached per CPU is bounded, and each zone additionally
135maintains an unbounded cache of items that is used to quickly satisfy
136per-CPU cache allocation misses.
137.Pp
138Two types of zones exist: regular zones and cache zones.
139In a regular zone, items are allocated from a slab, which is one or more
140virtually contiguous memory pages that have been allocated from the kernel's
141page allocator.
142Internally, slabs are managed by a UMA keg, which is responsible for allocating
143slabs and keeping track of their usage by one or more zones.
144In typical usage, there is one keg per zone, so slabs are not shared among
145multiple zones.
146.Pp
147Normal zones import items from a keg, and release items back to that keg if
148requested.
149Cache zones do not have a keg, and instead use custom import and release
150methods.
151For example, some collections of kernel objects are statically allocated
152at boot-time, and the size of the collection does not change.
153A cache zone can be used to implement an efficient allocator for the objects in
154such a collection.
155.Pp
156The
157.Fn uma_zcreate
158and
159.Fn uma_zcache_create
160functions create a new regular zone and cache zone, respectively.
161The
162.Fn uma_zsecond_create
163function creates a regular zone which shares the keg of the zone
164specified by the
165.Fa master
166argument.
167The
168.Fa name
169argument is a text name of the zone for debugging and stats; this memory
170should not be freed until the zone has been deallocated.
171.Pp
172The
173.Fa ctor
174and
175.Fa dtor
176arguments are callback functions that are called by
177the UMA subsystem at the time of the call to
178.Fn uma_zalloc
179and
180.Fn uma_zfree
181respectively.
182Their purpose is to provide hooks for initializing or
183destroying things that need to be done at the time of the allocation
184or release of a resource.
185A good usage for the
186.Fa ctor
187and
188.Fa dtor
189callbacks might be to initialize a data structure embedded in the item,
190such as a
191.Xr queue 3
192head.
193.Pp
194The
195.Fa zinit
196and
197.Fa zfini
198arguments are used to optimize the allocation of items from the zone.
199They are called by the UMA subsystem whenever
200it needs to allocate or free items to satisfy requests or memory pressure.
201A good use for the
202.Fa zinit
203and
204.Fa zfini
205callbacks might be to
206initialize and destroy a mutex contained within an item.
207This would allow one to avoid destroying and re-initializing the mutex
208each time the item is freed and re-allocated.
209They are not called on each call to
210.Fn uma_zalloc
211and
212.Fn uma_zfree
213but rather when an item is imported into a zone's cache, and when a zone
214releases an item to the slab allocator, typically as a response to memory
215pressure.
216.Pp
217For
218.Fn uma_zcache_create ,
219the
220.Fa zimport
221and
222.Fa zrelease
223functions are called to import items into the zone and to release items
224from the zone, respectively.
225The
226.Fa zimport
227function should store pointers to items in the
228.Fa store
229array, which contains a maximum of
230.Fa count
231entries.
232The function must return the number of imported items, which may be less than
233the maximum.
234Similarly, the
235.Fa store
236parameter to the
237.Fa zrelease
238function contains an array of
239.Fa count
240pointers to items.
241The
242.Fa arg
243parameter passed to
244.Fn uma_zcache_create
245is provided to the import and release functions.
246The
247.Fa domain
248parameter to
249.Fa zimport
250specifies the requested
251.Xr numa 4
252domain for the allocation.
253It is either a NUMA domain number or the special value
254.Dv UMA_ANYDOMAIN .
255.Pp
256The
257.Fa flags
258argument of
259.Fn uma_zcreate
260and
261.Fn uma_zcache_create
262is a subset of the following flags:
263.Bl -tag -width "foo"
264.It Dv UMA_ZONE_NOFREE
265Slabs allocated to the zone's keg are never freed.
266.It Dv UMA_ZONE_NODUMP
267Pages belonging to the zone will not be included in minidumps.
268.It Dv UMA_ZONE_PCPU
269An allocation from zone would have
270.Va mp_ncpu
271shadow copies, that are privately assigned to CPUs.
272A CPU can address its private copy using base the allocation address plus
273a multiple of the current CPU ID and
274.Fn sizeof "struct pcpu" :
275.Bd -literal -offset indent
276foo_zone = uma_zcreate(..., UMA_ZONE_PCPU);
277 ...
278foo_base = uma_zalloc(foo_zone, ...);
279 ...
280critical_enter();
281foo_pcpu = (foo_t *)zpcpu_get(foo_base);
282/* do something with foo_pcpu */
283critical_exit();
284
285.Ed
286Note that
287.Dv M_ZERO
288cannot be used when allocating items from a PCPU zone.
289To obtain zeroed memory from a PCPU zone, use the
290.Fn uma_zalloc_pcpu
291function and its variants instead, and pass
292.Dv M_ZERO .
293.It Dv UMA_ZONE_NOTOUCH
294The UMA subsystem may not directly touch (i.e. read or write) the slab memory.
295Otherwise, by default, book-keeping of items within a slab may be done in the
296slab page itself, and
297.Dv INVARIANTS
298kernels may also do use-after-free checking by accessing the slab memory.
299.It Dv UMA_ZONE_ZINIT
300The zone will have its
301.Ft uma_init
302method set to internal method that initializes a new allocated slab
303to all zeros.
304Do not mistake
305.Ft uma_init
306method with
307.Ft uma_ctor .
308A zone with
309.Dv UMA_ZONE_ZINIT
310flag would not return zeroed memory on every
311.Fn uma_zalloc .
312.It Dv UMA_ZONE_NOTPAGE
313An allocator function will be supplied with
314.Fn uma_zone_set_allocf
315and the memory that it returns may not be kernel virtual memory backed by VM
316pages in the page array.
317.It Dv UMA_ZONE_MALLOC
318The zone is for the
319.Xr malloc 9
320subsystem.
321.It Dv UMA_ZONE_VM
322The zone is for the VM subsystem.
323.It Dv UMA_ZONE_CONTIG
324Items in this zone must be contiguous in physical address space.
325Items will follow normal alignment constraints and may span page boundaries
326between pages with contiguous physical addresses.
327.It Dv UMA_ZONE_UNMANAGED
328By default, UMA zone caches are shrunk to help resolve free page shortages.
329Cached items that have not been used for a long period may also be freed from
330zone.
331When this flag is set, the system will not reclaim memory from the zone's
332caches.
333.El
334.Pp
335Zones can be destroyed using
336.Fn uma_zdestroy ,
337freeing all memory that is cached in the zone.
338All items allocated from the zone must be freed to the zone before the zone
339may be safely destroyed.
340.Pp
341To allocate an item from a zone, simply call
342.Fn uma_zalloc
343with a pointer to that zone and set the
344.Fa flags
345argument to selected flags as documented in
346.Xr malloc 9 .
347It will return a pointer to an item if successful, or
348.Dv NULL
349in the rare case where all items in the zone are in use and the
350allocator is unable to grow the zone and
351.Dv M_NOWAIT
352is specified.
353.Pp
354Items are released back to the zone from which they were allocated by
355calling
356.Fn uma_zfree
357with a pointer to the zone and a pointer to the item.
358If
359.Fa item
360is
361.Dv NULL ,
362then
363.Fn uma_zfree
364does nothing.
365.Pp
366The variants
367.Fn uma_zalloc_arg
368and
369.Fn uma_zfree_arg
370allow callers to
371specify an argument for the
372.Dv ctor
373and
374.Dv dtor
375functions of the zone, respectively.
376The variants
377.Fn uma_zalloc_pcpu
378and
379.Fn uma_zfree_pcpu
380allocate and free
381.Va mp_ncpu
382shadow copies as described for
383.Dv UMA_ZONE_PCPU .
384If
385.Fa item
386is
387.Dv NULL ,
388then
389.Fn uma_zfree_pcpu
390does nothing.
391.Pp
392The
393.Fn uma_zalloc_domain
394function allows callers to specify a fixed
395.Xr numa 4
396domain to allocate from.
397This uses a guaranteed but slow path in the allocator which reduces
398concurrency.
399.Pp
400The
401.Fn uma_prealloc
402function allocates slabs for the requested number of items, typically following
403the initial creation of a zone.
404Subsequent allocations from the zone will be satisfied using the pre-allocated
405slabs.
406Note that slab allocation is performed with the
407.Dv M_WAITOK
408flag, so
409.Fn uma_prealloc
410may sleep.
411.Pp
412The
413.Fn uma_zone_reserve
414function sets the number of reserved items for the zone.
415.Fn uma_zalloc
416and variants will ensure that the zone contains at least the reserved number
417of free items.
418Reserved items may be allocated by specifying
419.Dv M_USE_RESERVE
420in the allocation request flags.
421.Fn uma_zone_reserve
422does not perform any pre-allocation by itself.
423.Pp
424The
425.Fn uma_zone_reserve_kva
426function pre-allocates kernel virtual address space for the requested
427number of items.
428Subsequent allocations from the zone will be satisfied using the pre-allocated
429address space.
430Note that unlike
431.Fn uma_zone_reserve ,
432.Fn uma_zone_reserve_kva
433does not restrict the use of the pre-allocation to
434.Dv M_USE_RESERVE
435requests.
436.Pp
437The
438.Fn uma_reclaim
439and
440.Fn uma_zone_reclaim
441functions reclaim cached items from UMA zones, releasing unused memory.
442The
443.Fn uma_reclaim
444function reclaims items from all regular zones, while
445.Fn uma_zone_reclaim
446reclaims items only from the specified zone.
447The
448.Fa req
449parameter must be one of three values which specify how aggressively
450items are to be reclaimed:
451.Bl -tag -width indent
452.It Dv UMA_RECLAIM_TRIM
453Reclaim items only in excess of the zone's estimated working set size.
454The working set size is periodically updated and tracks the recent history
455of the zone's usage.
456.It Dv UMA_RECLAIM_DRAIN
457Reclaim all items from the unbounded cache.
458Free items in the per-CPU caches are left alone.
459.It Dv UMA_RECLAIM_DRAIN_CPU
460Reclaim all cached items.
461.El
462The
463.Fn uma_reclaim_domain
464and
465.Fn uma_zone_reclaim_domain
466functions apply only to items allocated from the specified domain.
467In the case of domains using a round-robin NUMA policy, cached items from all
468domains are freed to the keg, but only slabs from the specific domain will
469be freed.
470.Pp
471The
472.Fn uma_zone_set_allocf
473and
474.Fn uma_zone_set_freef
475functions allow a zone's default slab allocation and free functions to be
476overridden.
477This is useful if memory with special constraints such as attributes,
478alignment, or address ranges must be used.
479.Pp
480The
481.Fn uma_zone_set_max
482function limits the number of items
483.Pq and therefore memory
484that can be allocated to
485.Fa zone .
486The
487.Fa nitems
488argument specifies the requested upper limit number of items.
489The effective limit is returned to the caller, as it may end up being higher
490than requested due to the implementation rounding up to ensure all memory pages
491allocated to the zone are utilised to capacity.
492The limit applies to the total number of items in the zone, which includes
493allocated items, free items and free items in the per-cpu caches.
494On systems with more than one CPU it may not be possible to allocate
495the specified number of items even when there is no shortage of memory,
496because all of the remaining free items may be in the caches of the
497other CPUs when the limit is hit.
498.Pp
499The
500.Fn uma_zone_set_maxcache
501function limits the number of free items which may be cached in the zone.
502This limit applies to both the per-CPU caches and the cache of free buckets.
503.Pp
504The
505.Fn uma_zone_get_max
506function returns the effective upper limit number of items for a zone.
507.Pp
508The
509.Fn uma_zone_get_cur
510function returns an approximation of the number of items currently allocated
511from the zone.
512The returned value is approximate because appropriate synchronisation to
513determine an exact value is not performed by the implementation.
514This ensures low overhead at the expense of potentially stale data being used
515in the calculation.
516.Pp
517The
518.Fn uma_zone_set_warning
519function sets a warning that will be printed on the system console when the
520given zone becomes full and fails to allocate an item.
521The warning will be printed no more often than every five minutes.
522Warnings can be turned off globally by setting the
523.Va vm.zone_warnings
524sysctl tunable to
525.Va 0 .
526.Pp
527The
528.Fn uma_zone_set_maxaction
529function sets a function that will be called when the given zone becomes full
530and fails to allocate an item.
531The function will be called with the zone locked.
532Also, the function
533that called the allocation function may have held additional locks.
534Therefore,
535this function should do very little work (similar to a signal handler).
536.Pp
537The
538.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
539macro declares a static
540.Xr sysctl 9
541oid that exports the effective upper limit number of items for a zone.
542The
543.Fa zone
544argument should be a pointer to
545.Vt uma_zone_t .
546A read of the oid returns value obtained through
547.Fn uma_zone_get_max .
548A write to the oid sets new value via
549.Fn uma_zone_set_max .
550The
551.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
552macro is provided to create this type of oid dynamically.
553.Pp
554The
555.Fn SYSCTL_UMA_CUR parent nbr name access zone descr
556macro declares a static read-only
557.Xr sysctl 9
558oid that exports the approximate current occupancy of the zone.
559The
560.Fa zone
561argument should be a pointer to
562.Vt uma_zone_t .
563A read of the oid returns value obtained through
564.Fn uma_zone_get_cur .
565The
566.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name zone descr
567macro is provided to create this type of oid dynamically.
568.Sh IMPLEMENTATION NOTES
569The memory that these allocation calls return is not executable.
570The
571.Fn uma_zalloc
572function does not support the
573.Dv M_EXEC
574flag to allocate executable memory.
575Not all platforms enforce a distinction between executable and
576non-executable memory.
577.Sh SEE ALSO
578.Xr numa 4 ,
579.Xr vmstat 8 ,
580.Xr malloc 9
581.Rs
582.%A Jeff Bonwick
583.%T "The Slab Allocator: An Object-Caching Kernel Memory Allocator"
584.%D 1994
585.Re
586.Sh HISTORY
587The zone allocator first appeared in
588.Fx 3.0 .
589It was radically changed in
590.Fx 5.0
591to function as a slab allocator.
592.Sh AUTHORS
593.An -nosplit
594The zone allocator was written by
595.An John S. Dyson .
596The zone allocator was rewritten in large parts by
597.An Jeff Roberson Aq Mt jeff@FreeBSD.org
598to function as a slab allocator.
599.Pp
600This manual page was written by
601.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org .
602Changes for UMA by
603.An Jeroen Ruigrok van der Werven Aq Mt asmodai@FreeBSD.org .
604