xref: /freebsd/share/man/man9/zone.9 (revision 840aca288042eaf625a23908e807abdfde0bc21d)
1.\"-
2.\" Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
3.\" All rights reserved.
4.\"
5.\" Redistribution and use in source and binary forms, with or without
6.\" modification, are permitted provided that the following conditions
7.\" are met:
8.\" 1. Redistributions of source code must retain the above copyright
9.\"    notice, this list of conditions and the following disclaimer.
10.\" 2. Redistributions in binary form must reproduce the above copyright
11.\"    notice, this list of conditions and the following disclaimer in the
12.\"    documentation and/or other materials provided with the distribution.
13.\"
14.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24.\" SUCH DAMAGE.
25.\"
26.\" $FreeBSD$
27.\"
28.Dd August 30, 2019
29.Dt UMA 9
30.Os
31.Sh NAME
32.Nm UMA
33.Nd general-purpose kernel object allocator
34.Sh SYNOPSIS
35.In sys/param.h
36.In sys/queue.h
37.In vm/uma.h
38.Cd "options UMA_FIRSTTOUCH"
39.Cd "options UMA_XDOMAIN"
40.Bd -literal
41typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags);
42typedef void (*uma_dtor)(void *mem, int size, void *arg);
43typedef int (*uma_init)(void *mem, int size, int flags);
44typedef void (*uma_fini)(void *mem, int size);
45typedef int (*uma_import)(void *arg, void **store, int count, int domain,
46    int flags);
47typedef void (*uma_release)(void *arg, void **store, int count);
48typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain,
49    uint8_t *pflag, int wait);
50typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
51
52.Ed
53.Ft uma_zone_t
54.Fo uma_zcreate
55.Fa "char *name" "int size"
56.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
57.Fa "int align" "uint16_t flags"
58.Fc
59.Ft uma_zone_t
60.Fo uma_zcache_create
61.Fa "char *name" "int size"
62.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
63.Fa "uma_import zimport" "uma_release zrelease"
64.Fa "void *arg" "int flags"
65.Fc
66.Ft uma_zone_t
67.Fo uma_zsecond_create
68.Fa "char *name"
69.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
70.Fa "uma_zone_t master"
71.Fc
72.Ft void
73.Fn uma_zdestroy "uma_zone_t zone"
74.Ft "void *"
75.Fn uma_zalloc "uma_zone_t zone" "int flags"
76.Ft "void *"
77.Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags"
78.Ft "void *"
79.Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags"
80.Ft "void *"
81.Fn uma_zalloc_pcpu "uma_zone_t zone" "int flags"
82.Ft "void *"
83.Fn uma_zalloc_pcpu_arg "uma_zone_t zone" "void *arg" "int flags"
84.Ft void
85.Fn uma_zfree "uma_zone_t zone" "void *item"
86.Ft void
87.Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg"
88.Ft void
89.Fn uma_zfree_domain "uma_zone_t zone" "void *item" "void *arg"
90.Ft void
91.Fn uma_zfree_pcpu "uma_zone_t zone" "void *item"
92.Ft void
93.Fn uma_zfree_pcpu_arg "uma_zone_t zone" "void *item" "void *arg"
94.Ft void
95.Fn uma_prealloc "uma_zone_t zone" "int nitems"
96.Ft void
97.Fn uma_zone_reserve "uma_zone_t zone" "int nitems"
98.Ft void
99.Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems"
100.Ft void
101.Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf"
102.Ft void
103.Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef"
104.Ft int
105.Fn uma_zone_set_max "uma_zone_t zone" "int nitems"
106.Ft int
107.Fn uma_zone_set_maxcache "uma_zone_t zone" "int nitems"
108.Ft int
109.Fn uma_zone_get_max "uma_zone_t zone"
110.Ft int
111.Fn uma_zone_get_cur "uma_zone_t zone"
112.Ft void
113.Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning"
114.Ft void
115.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)"
116.Ft void
117.Fn uma_reclaim
118.In sys/sysctl.h
119.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
120.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
121.Fn SYSCTL_UMA_CUR parent nbr name access zone descr
122.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr
123.Sh DESCRIPTION
124UMA (Universal Memory Allocator) provides an efficient interface for managing
125dynamically-sized collections of items of identical size, referred to as zones.
126Zones keep track of which items are in use and which
127are not, and UMA provides functions for allocating items from a zone and
128for releasing them back, making them available for subsequent allocation requests.
129Zones maintain per-CPU caches with linear scalability on SMP
130systems as well as round-robin and first-touch policies for NUMA
131systems.
132The number of items cached per CPU is bounded, and each zone additionally
133maintains an unbounded cache of items that is used to quickly satisfy
134per-CPU cache allocation misses.
135.Pp
136Two types of zones exist: regular zones and cache zones.
137In a regular zone, items are allocated from a slab, which is one or more
138virtually contiguous memory pages that have been allocated from the kernel's
139page allocator.
140Internally, slabs are managed by a UMA keg, which is responsible for allocating
141slabs and keeping track of their usage by one or more zones.
142In typical usage, there is one keg per zone, so slabs are not shared among
143multiple zones.
144.Pp
145Normal zones import items from a keg, and release items back to that keg if
146requested.
147Cache zones do not have a keg, and instead use custom import and release
148methods.
149For example, some collections of kernel objects are statically allocated
150at boot-time, and the size of the collection does not change.
151A cache zone can be used to implement an efficient allocator for the objects in
152such a collection.
153.Pp
154The
155.Fn uma_zcreate
156and
157.Fn uma_zcache_create
158functions create a new regular zone and cache zone, respectively.
159The
160.Fn uma_zsecond_create
161function creates a regular zone which shares the keg of the zone
162specified by the
163.Fa master
164argument.
165The
166.Fa name
167argument is a text name of the zone for debugging and stats; this memory
168should not be freed until the zone has been deallocated.
169.Pp
170The
171.Fa ctor
172and
173.Fa dtor
174arguments are callback functions that are called by
175the UMA subsystem at the time of the call to
176.Fn uma_zalloc
177and
178.Fn uma_zfree
179respectively.
180Their purpose is to provide hooks for initializing or
181destroying things that need to be done at the time of the allocation
182or release of a resource.
183A good usage for the
184.Fa ctor
185and
186.Fa dtor
187callbacks might be to initialize a data structure embedded in the item,
188such as a
189.Xr queue 3
190head.
191.Pp
192The
193.Fa zinit
194and
195.Fa zfini
196arguments are used to optimize the allocation of items from the zone.
197They are called by the UMA subsystem whenever
198it needs to allocate or free items to satisfy requests or memory pressure.
199A good use for the
200.Fa zinit
201and
202.Fa zfini
203callbacks might be to
204initialize and destroy a mutex contained within an item.
205This would allow one to avoid destroying and re-initializing the mutex
206each time the item is freed and re-allocated.
207They are not called on each call to
208.Fn uma_zalloc
209and
210.Fn uma_zfree
211but rather when an item is imported into a zone's cache, and when a zone
212releases an item to the slab allocator, typically as a response to memory
213pressure.
214.Pp
215For
216.Fn uma_zcache_create ,
217the
218.Fa zimport
219and
220.Fa zrelease
221functions are called to import items into the zone and to release items
222from the zone, respectively.
223The
224.Fa zimport
225function should store pointers to items in the
226.Fa store
227array, which contains a maximum of
228.Fa count
229entries.
230The function must return the number of imported items, which may be less than
231the maximum.
232Similarly, the
233.Fa store
234parameter to the
235.Fa zrelease
236function contains an array of
237.Fa count
238pointers to items.
239The
240.Fa arg
241parameter passed to
242.Fn uma_zcache_create
243is provided to the import and release functions.
244The
245.Fa domain
246parameter to
247.Fa zimport
248specifies the requested
249.Xr numa 4
250domain for the allocation.
251It is either a NUMA domain number or the special value
252.Dv UMA_ANYDOMAIN .
253.Pp
254The
255.Fa flags
256argument of
257.Fn uma_zcreate
258and
259.Fn uma_zcache_create
260is a subset of the following flags:
261.Bl -tag -width "foo"
262.It Dv UMA_ZONE_NOFREE
263Slabs allocated to the zone's keg are never freed.
264.It Dv UMA_ZONE_NODUMP
265Pages belonging to the zone will not be included in minidumps.
266.It Dv UMA_ZONE_PCPU
267An allocation from zone would have
268.Va mp_ncpu
269shadow copies, that are privately assigned to CPUs.
270A CPU can address its private copy using base the allocation address plus
271a multiple of the current CPU ID and
272.Fn sizeof "struct pcpu" :
273.Bd -literal -offset indent
274foo_zone = uma_zcreate(..., UMA_ZONE_PCPU);
275 ...
276foo_base = uma_zalloc(foo_zone, ...);
277 ...
278critical_enter();
279foo_pcpu = (foo_t *)zpcpu_get(foo_base);
280/* do something with foo_pcpu */
281critical_exit();
282
283.Ed
284Note that
285.Dv M_ZERO
286cannot be used when allocating items from a PCPU zone.
287To obtain zeroed memory from a PCPU zone, use the
288.Fn uma_zalloc_pcpu
289function and its variants instead, and pass
290.Dv M_ZERO .
291.It Dv UMA_ZONE_OFFPAGE
292By default book-keeping of items within a slab is done in the slab page itself.
293This flag explicitly tells subsystem that book-keeping structure should be
294allocated separately from special internal zone.
295This flag requires either
296.Dv UMA_ZONE_VTOSLAB
297or
298.Dv UMA_ZONE_HASH ,
299since subsystem requires a mechanism to find a book-keeping structure
300to an item being freed.
301The subsystem may choose to prefer offpage book-keeping for certain zones
302implicitly.
303.It Dv UMA_ZONE_ZINIT
304The zone will have its
305.Ft uma_init
306method set to internal method that initializes a new allocated slab
307to all zeros.
308Do not mistake
309.Ft uma_init
310method with
311.Ft uma_ctor .
312A zone with
313.Dv UMA_ZONE_ZINIT
314flag would not return zeroed memory on every
315.Fn uma_zalloc .
316.It Dv UMA_ZONE_HASH
317The zone should use an internal hash table to find slab book-keeping
318structure where an allocation being freed belongs to.
319.It Dv UMA_ZONE_VTOSLAB
320The zone should use special field of
321.Vt vm_page_t
322to find slab book-keeping structure where an allocation being freed belongs to.
323.It Dv UMA_ZONE_MALLOC
324The zone is for the
325.Xr malloc 9
326subsystem.
327.It Dv UMA_ZONE_VM
328The zone is for the VM subsystem.
329.It Dv UMA_ZONE_NUMA
330The zone should use a first-touch NUMA policy rather than the round-robin
331default.
332If the
333.Dv UMA_FIRSTTOUCH
334kernel option is configured, all zones implicitly use a first-touch policy,
335and the
336.Dv UMA_ZONE_NUMA
337flag has no effect.
338The
339.Dv UMA_XDOMAIN
340kernel option, when configured, causes UMA to do the extra tracking to ensure
341that allocations from first-touch zones are always local.
342Otherwise, consumers that do not free memory on the same domain from which it
343was allocated will cause mixing in per-CPU caches.
344See
345.Xr numa 4
346for more details.
347.El
348.Pp
349Zones can be destroyed using
350.Fn uma_zdestroy ,
351freeing all memory that is cached in the zone.
352All items allocated from the zone must be freed to the zone before the zone
353may be safely destroyed.
354.Pp
355To allocate an item from a zone, simply call
356.Fn uma_zalloc
357with a pointer to that zone and set the
358.Fa flags
359argument to selected flags as documented in
360.Xr malloc 9 .
361It will return a pointer to an item if successful, or
362.Dv NULL
363in the rare case where all items in the zone are in use and the
364allocator is unable to grow the zone and
365.Dv M_NOWAIT
366is specified.
367.Pp
368Items are released back to the zone from which they were allocated by
369calling
370.Fn uma_zfree
371with a pointer to the zone and a pointer to the item.
372If
373.Fa item
374is
375.Dv NULL ,
376then
377.Fn uma_zfree
378does nothing.
379.Pp
380The variants
381.Fn uma_zalloc_arg
382and
383.Fn uma_zfree_arg
384allow callers to
385specify an argument for the
386.Dv ctor
387and
388.Dv dtor
389functions of the zone, respectively.
390The
391.Fn uma_zalloc_domain
392function allows callers to specify a fixed
393.Xr numa 4
394domain to allocate from.
395This uses a guaranteed but slow path in the allocator which reduces
396concurrency.
397The
398.Fn uma_zfree_domain
399function should be used to return memory allocated in this fashion.
400This function infers the domain from the pointer and does not require it as an
401argument.
402.Pp
403The
404.Fn uma_zone_prealloc
405function allocates slabs for the requested number of items, typically following
406the initial creation of a zone.
407Subsequent allocations from the zone will be satisfied using the pre-allocated
408slabs.
409Note that slab allocation is performed with the
410.Dv M_WAITOK
411flag, so
412.Fn uma_zone_prealloc
413may sleep.
414.Pp
415The
416.Fn uma_zone_reserve
417function sets the number of reserved items for the zone.
418.Fn uma_zalloc
419and variants will ensure that the zone contains at least the reserved number
420of free items.
421Reserved items may be allocated by specifying
422.Dv M_USE_RESERVE
423in the allocation request flags.
424.Fn uma_zone_reserve
425does not perform any pre-allocation by itself.
426.Pp
427The
428.Fn uma_zone_reserve_kva
429function pre-allocates kernel virtual address space for the requested
430number of items.
431Subsequent allocations from the zone will be satisfied using the pre-allocated
432address space.
433Note that unlike
434.Fn uma_zone_reserve ,
435.Fn uma_zone_reserve_kva
436does not restrict the use of the pre-allocation to
437.Dv M_USE_RESERVE
438requests.
439.Pp
440The
441.Fn uma_zone_set_allocf
442and
443.Fn uma_zone_set_freef
444functions allow a zone's default slab allocation and free functions to be
445overridden.
446This is useful if the zone's items have special memory allocation constraints.
447For example, if multi-page objects are required to be physically contiguous,
448an
449.Fa allocf
450function which requests contiguous memory from the kernel's page allocator
451may be used.
452.Pp
453The
454.Fn uma_zone_set_max
455function limits the number of items
456.Pq and therefore memory
457that can be allocated to
458.Fa zone .
459The
460.Fa nitems
461argument specifies the requested upper limit number of items.
462The effective limit is returned to the caller, as it may end up being higher
463than requested due to the implementation rounding up to ensure all memory pages
464allocated to the zone are utilised to capacity.
465The limit applies to the total number of items in the zone, which includes
466allocated items, free items and free items in the per-cpu caches.
467On systems with more than one CPU it may not be possible to allocate
468the specified number of items even when there is no shortage of memory,
469because all of the remaining free items may be in the caches of the
470other CPUs when the limit is hit.
471.Pp
472The
473.Fn uma_zone_set_maxcache
474function limits the number of free items which may be cached in the zone,
475excluding the per-CPU caches, which are bounded in size.
476For example, to implement a
477.Ql pure
478per-CPU cache, a cache zone may be configured with a maximum cache size of 0.
479.Pp
480The
481.Fn uma_zone_get_max
482function returns the effective upper limit number of items for a zone.
483.Pp
484The
485.Fn uma_zone_get_cur
486function returns an approximation of the number of items currently allocated
487from the zone.
488The returned value is approximate because appropriate synchronisation to
489determine an exact value is not performed by the implementation.
490This ensures low overhead at the expense of potentially stale data being used
491in the calculation.
492.Pp
493The
494.Fn uma_zone_set_warning
495function sets a warning that will be printed on the system console when the
496given zone becomes full and fails to allocate an item.
497The warning will be printed no more often than every five minutes.
498Warnings can be turned off globally by setting the
499.Va vm.zone_warnings
500sysctl tunable to
501.Va 0 .
502.Pp
503The
504.Fn uma_zone_set_maxaction
505function sets a function that will be called when the given zone becomes full
506and fails to allocate an item.
507The function will be called with the zone locked.
508Also, the function
509that called the allocation function may have held additional locks.
510Therefore,
511this function should do very little work (similar to a signal handler).
512.Pp
513The
514.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
515macro declares a static
516.Xr sysctl 9
517oid that exports the effective upper limit number of items for a zone.
518The
519.Fa zone
520argument should be a pointer to
521.Vt uma_zone_t .
522A read of the oid returns value obtained through
523.Fn uma_zone_get_max .
524A write to the oid sets new value via
525.Fn uma_zone_set_max .
526The
527.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
528macro is provided to create this type of oid dynamically.
529.Pp
530The
531.Fn SYSCTL_UMA_CUR parent nbr name access zone descr
532macro declares a static read-only
533.Xr sysctl 9
534oid that exports the approximate current occupancy of the zone.
535The
536.Fa zone
537argument should be a pointer to
538.Vt uma_zone_t .
539A read of the oid returns value obtained through
540.Fn uma_zone_get_cur .
541The
542.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name zone descr
543macro is provided to create this type of oid dynamically.
544.Sh IMPLEMENTATION NOTES
545The memory that these allocation calls return is not executable.
546The
547.Fn uma_zalloc
548function does not support the
549.Dv M_EXEC
550flag to allocate executable memory.
551Not all platforms enforce a distinction between executable and
552non-executable memory.
553.Sh SEE ALSO
554.Xr numa 4 ,
555.Xr vmstat 8 ,
556.Xr malloc 9
557.Rs
558.%A Jeff Bonwick
559.%T "The Slab Allocator: An Object-Caching Kernel Memory Allocator"
560.%D 1994
561.Re
562.Sh HISTORY
563The zone allocator first appeared in
564.Fx 3.0 .
565It was radically changed in
566.Fx 5.0
567to function as a slab allocator.
568.Sh AUTHORS
569.An -nosplit
570The zone allocator was written by
571.An John S. Dyson .
572The zone allocator was rewritten in large parts by
573.An Jeff Roberson Aq Mt jeff@FreeBSD.org
574to function as a slab allocator.
575.Pp
576This manual page was written by
577.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org .
578Changes for UMA by
579.An Jeroen Ruigrok van der Werven Aq Mt asmodai@FreeBSD.org .
580