xref: /freebsd/share/man/man9/epoch.9 (revision b4af4f93c682e445bf159f0d1ec90b636296c946)
1.\"
2.\" Copyright (C) 2018 Matthew Macy <mmacy@FreeBSD.org>.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice(s), this list of conditions and the following disclaimer as
9.\"    the first lines of this file unmodified other than the possible
10.\"    addition of one or more copyright notices.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice(s), this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\"
15.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
16.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18.\" DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
19.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
25.\" DAMAGE.
26.\"
27.\" $FreeBSD$
28.\"
29.Dd April 30, 2020
30.Dt EPOCH 9
31.Os
32.Sh NAME
33.Nm epoch ,
34.Nm epoch_context ,
35.Nm epoch_alloc ,
36.Nm epoch_free ,
37.Nm epoch_enter ,
38.Nm epoch_exit ,
39.Nm epoch_wait ,
40.Nm epoch_enter_preempt ,
41.Nm epoch_exit_preempt ,
42.Nm epoch_wait_preempt ,
43.Nm epoch_call ,
44.Nm epoch_drain_callbacks ,
45.Nm in_epoch ,
46.Nm in_epoch_verbose ,
47.Nd kernel epoch based reclamation
48.Sh SYNOPSIS
49.In sys/param.h
50.In sys/proc.h
51.In sys/epoch.h
52.\" Types
53.Bd -literal
54struct epoch;		/* Opaque */
55.Ed
56.Vt typedef "struct epoch *epoch_t" ;
57.Bd -literal
58struct epoch_context {
59	void	*data[2];
60};
61.Ed
62.Vt typedef "struct epoch_context *epoch_context_t" ;
63.Vt typedef "void epoch_callback_t(epoch_context_t)" ;
64.Bd -literal
65struct epoch_tracker;	/* Opaque */
66.Ed
67.Vt typedef "struct epoch_tracker *epoch_tracker_t" ;
68.\" Declarations
69.Ft epoch_t
70.Fn epoch_alloc "const char *name" "int flags"
71.Ft void
72.Fn epoch_free "epoch_t epoch"
73.Ft void
74.Fn epoch_enter "epoch_t epoch"
75.Ft void
76.Fn epoch_exit "epoch_t epoch"
77.Ft void
78.Fn epoch_wait "epoch_t epoch"
79.Ft void
80.Fn epoch_enter_preempt "epoch_t epoch" "epoch_tracker_t et"
81.Ft void
82.Fn epoch_exit_preempt "epoch_t epoch" "epoch_tracker_t et"
83.Ft void
84.Fn epoch_wait_preempt "epoch_t epoch"
85.Ft void
86.Fn epoch_call "epoch_t epoch" "epoch_callback_t callback" "epoch_context_t ctx"
87.Ft void
88.Fn epoch_drain_callbacks "epoch_t epoch"
89.Ft int
90.Fn in_epoch "epoch_t epoch"
91.Ft int
92.Fn in_epoch_verbose "epoch_t epoch" "int dump_onfail"
93.Sh DESCRIPTION
94Epochs are used to guarantee liveness and immutability of data by
95deferring reclamation and mutation until a grace period has elapsed.
96Epochs do not have any lock ordering issues.
97Entering and leaving an epoch section will never block.
98.Pp
99Epochs are allocated with
100.Fn epoch_alloc .
101The
102.Fa name
103argument is used for debugging convenience when the
104.Cd EPOCH_TRACE
105kernel option is configured.
106By default, epochs do not allow preemption during sections.
107By default mutexes cannot be held across
108.Fn epoch_wait_preempt .
109The
110.Fa flags
111specified are formed by
112.Em OR Ns 'ing
113the following values:
114.Bl -tag -offset indent -width Ds
115.It Dv EPOCH_LOCKED
116Permit holding mutexes across
117.Fn epoch_wait_preempt
118(requires
119.Dv EPOCH_PREEMPT ) .
120When doing this one must be cautious of creating a situation where a deadlock
121is possible.
122.It Dv EPOCH_PREEMPT
123The
124.Vt epoch
125will allow preemption during sections.
126Only non-sleepable locks may be acquired during a preemptible epoch.
127The functions
128.Fn epoch_enter_preempt ,
129.Fn epoch_exit_preempt ,
130and
131.Fn epoch_wait_preempt
132must be used in place of
133.Fn epoch_enter ,
134.Fn epoch_exit ,
135and
136.Fn epoch_wait ,
137respectively.
138.El
139.Pp
140.Vt epoch Ns s
141are freed with
142.Fn epoch_free .
143.Pp
144Threads indicate the start of an epoch critical section by calling
145.Fn epoch_enter
146(or
147.Fn epoch_enter_preempt
148for preemptible epochs).
149Threads call
150.Fn epoch_exit
151(or
152.Fn epoch_exit_preempt
153for preemptible epochs)
154to indicate the end of a critical section.
155.Vt struct epoch_tracker Ns s
156are stack objects whose pointers are passed to
157.Fn epoch_enter_preempt
158and
159.Fn epoch_exit_preempt
160(much like
161.Vt struct rm_priotracker ) .
162.Pp
163Threads can defer work until a grace period has expired since any thread has
164entered the epoch either synchronously or asynchronously.
165.Fn epoch_call
166defers work asynchronously by invoking the provided
167.Fa callback
168at a later time.
169.Fn epoch_wait
170(or
171.Fn epoch_wait_preempt )
172blocks the current thread until the grace period has expired and the work can be
173done safely.
174.Pp
175Default, non-preemptible epoch wait
176.Fn ( epoch_wait )
177is guaranteed to have much shorter completion times relative to
178preemptible epoch wait
179.Fn ( epoch_wait_preempt ) .
180(In the default type, none of the threads in an epoch section will be preempted
181before completing its section.)
182.Pp
183INVARIANTS can assert that a thread is in an epoch by using
184.Fn in_epoch .
185.Fn in_epoch "epoch"
186is equivalent to invoking
187.Fn in_epoch_verbose "epoch" "0" .
188If
189.Cd EPOCH_TRACE
190is enabled,
191.Fn in_epoch_verbose "epoch" "1"
192provides additional verbose debugging information.
193.Pp
194The epoch API currently does not support sleeping in epoch_preempt sections.
195A caller should never call
196.Fn epoch_wait
197in the middle of an epoch section for the same epoch as this will lead to a deadlock.
198.Pp
199The
200.Fn epoch_drain_callbacks
201function is used to drain all pending callbacks which have been invoked by prior
202.Fn epoch_call
203function calls on the same epoch.
204This function is useful when there are shared memory structure(s)
205referred to by the epoch callback(s) which are not refcounted and are
206rarely freed.
207The typical place for calling this function is right before freeing or
208invalidating the shared resource(s) used by the epoch callback(s).
209This function can sleep and is not optimized for performance.
210.Sh RETURN VALUES
211.Fn in_epoch curepoch
212will return 1 if curthread is in curepoch, 0 otherwise.
213.Sh CAVEATS
214One must be cautious when using
215.Fn epoch_wait_preempt .
216Threads are pinned during epoch sections, so if a thread in a section is then
217preempted by a higher priority compute bound thread on that CPU, it can be
218prevented from leaving the section indefinitely.
219.Pp
220Epochs are not a straight replacement for read locks.
221Callers must use safe list and tailq traversal routines in an epoch (see ck_queue).
222When modifying a list referenced from an epoch section safe removal
223routines must be used and the caller can no longer modify a list entry
224in place.
225An item to be modified must be handled with copy on write
226and frees must be deferred until after a grace period has elapsed.
227.Sh EXAMPLES
228Async free example:
229Thread 1:
230.Bd -literal
231int
232in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_laddr *laddr,
233    struct ucred *cred)
234{
235    /* ... */
236    epoch_enter(net_epoch);
237    CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
238        sa = ifa->ifa_addr;
239	if (sa->sa_family != AF_INET)
240	    continue;
241	sin = (struct sockaddr_in *)sa;
242	if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
243	     ia = (struct in_ifaddr *)ifa;
244	     break;
245	}
246    }
247    epoch_exit(net_epoch);
248    /* ... */
249}
250.Ed
251Thread 2:
252.Bd -literal
253void
254ifa_free(struct ifaddr *ifa)
255{
256
257    if (refcount_release(&ifa->ifa_refcnt))
258        epoch_call(net_epoch, ifa_destroy, &ifa->ifa_epoch_ctx);
259}
260
261void
262if_purgeaddrs(struct ifnet *ifp)
263{
264
265    /* .... *
266    IF_ADDR_WLOCK(ifp);
267    CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
268    IF_ADDR_WUNLOCK(ifp);
269    ifa_free(ifa);
270}
271.Ed
272.Pp
273Thread 1 traverses the ifaddr list in an epoch.
274Thread 2 unlinks with the corresponding epoch safe macro, marks as logically free,
275and then defers deletion.
276More general mutation or a synchronous
277free would have to follow a call to
278.Fn epoch_wait .
279.Sh NOTES
280The
281.Nm
282kernel programming interface is under development and is subject to change.
283.El
284.Sh HISTORY
285The
286.Nm
287framework first appeared in
288.Fx 11.0 .
289.Sh SEE ALSO
290.Xr locking 9 ,
291.Xr mtx_pool 9 ,
292.Xr mutex 9 ,
293.Xr rwlock 9 ,
294.Xr sema 9 ,
295.Xr sleep 9 ,
296.Xr sx 9 ,
297.Xr timeout 9
298