xref: /freebsd/share/man/man9/epoch.9 (revision 5e3190f700637fcfc1a52daeaa4a031fdd2557c7)
1.\"
2.\" Copyright (C) 2018 Matthew Macy <mmacy@FreeBSD.org>.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice(s), this list of conditions and the following disclaimer as
9.\"    the first lines of this file unmodified other than the possible
10.\"    addition of one or more copyright notices.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice(s), this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\"
15.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
16.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18.\" DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
19.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
25.\" DAMAGE.
26.\"
27.Dd April 30, 2020
28.Dt EPOCH 9
29.Os
30.Sh NAME
31.Nm epoch ,
32.Nm epoch_context ,
33.Nm epoch_alloc ,
34.Nm epoch_free ,
35.Nm epoch_enter ,
36.Nm epoch_exit ,
37.Nm epoch_wait ,
38.Nm epoch_enter_preempt ,
39.Nm epoch_exit_preempt ,
40.Nm epoch_wait_preempt ,
41.Nm epoch_call ,
42.Nm epoch_drain_callbacks ,
43.Nm in_epoch ,
44.Nm in_epoch_verbose
45.Nd kernel epoch based reclamation
46.Sh SYNOPSIS
47.In sys/param.h
48.In sys/proc.h
49.In sys/epoch.h
50.\" Types
51.Bd -literal
52struct epoch;		/* Opaque */
53.Ed
54.Vt typedef "struct epoch *epoch_t" ;
55.Bd -literal
56struct epoch_context {
57	void	*data[2];
58};
59.Ed
60.Vt typedef "struct epoch_context *epoch_context_t" ;
61.Vt typedef "void epoch_callback_t(epoch_context_t)" ;
62.Bd -literal
63struct epoch_tracker;	/* Opaque */
64.Ed
65.Vt typedef "struct epoch_tracker *epoch_tracker_t" ;
66.\" Declarations
67.Ft epoch_t
68.Fn epoch_alloc "const char *name" "int flags"
69.Ft void
70.Fn epoch_free "epoch_t epoch"
71.Ft void
72.Fn epoch_enter "epoch_t epoch"
73.Ft void
74.Fn epoch_exit "epoch_t epoch"
75.Ft void
76.Fn epoch_wait "epoch_t epoch"
77.Ft void
78.Fn epoch_enter_preempt "epoch_t epoch" "epoch_tracker_t et"
79.Ft void
80.Fn epoch_exit_preempt "epoch_t epoch" "epoch_tracker_t et"
81.Ft void
82.Fn epoch_wait_preempt "epoch_t epoch"
83.Ft void
84.Fn epoch_call "epoch_t epoch" "epoch_callback_t callback" "epoch_context_t ctx"
85.Ft void
86.Fn epoch_drain_callbacks "epoch_t epoch"
87.Ft int
88.Fn in_epoch "epoch_t epoch"
89.Ft int
90.Fn in_epoch_verbose "epoch_t epoch" "int dump_onfail"
91.Sh DESCRIPTION
92Epochs are used to guarantee liveness and immutability of data by
93deferring reclamation and mutation until a grace period has elapsed.
94Epochs do not have any lock ordering issues.
95Entering and leaving an epoch section will never block.
96.Pp
97Epochs are allocated with
98.Fn epoch_alloc .
99The
100.Fa name
101argument is used for debugging convenience when the
102.Cd EPOCH_TRACE
103kernel option is configured.
104By default, epochs do not allow preemption during sections.
105By default mutexes cannot be held across
106.Fn epoch_wait_preempt .
107The
108.Fa flags
109specified are formed by
110.Em OR Ns 'ing
111the following values:
112.Bl -tag -offset indent -width Ds
113.It Dv EPOCH_LOCKED
114Permit holding mutexes across
115.Fn epoch_wait_preempt
116(requires
117.Dv EPOCH_PREEMPT ) .
118When doing this one must be cautious of creating a situation where a deadlock
119is possible.
120.It Dv EPOCH_PREEMPT
121The
122.Vt epoch
123will allow preemption during sections.
124Only non-sleepable locks may be acquired during a preemptible epoch.
125The functions
126.Fn epoch_enter_preempt ,
127.Fn epoch_exit_preempt ,
128and
129.Fn epoch_wait_preempt
130must be used in place of
131.Fn epoch_enter ,
132.Fn epoch_exit ,
133and
134.Fn epoch_wait ,
135respectively.
136.El
137.Pp
138.Vt epoch Ns s
139are freed with
140.Fn epoch_free .
141.Pp
142Threads indicate the start of an epoch critical section by calling
143.Fn epoch_enter
144(or
145.Fn epoch_enter_preempt
146for preemptible epochs).
147Threads call
148.Fn epoch_exit
149(or
150.Fn epoch_exit_preempt
151for preemptible epochs)
152to indicate the end of a critical section.
153.Vt struct epoch_tracker Ns s
154are stack objects whose pointers are passed to
155.Fn epoch_enter_preempt
156and
157.Fn epoch_exit_preempt
158(much like
159.Vt struct rm_priotracker ) .
160.Pp
161Threads can defer work until a grace period has expired since any thread has
162entered the epoch either synchronously or asynchronously.
163.Fn epoch_call
164defers work asynchronously by invoking the provided
165.Fa callback
166at a later time.
167.Fn epoch_wait
168(or
169.Fn epoch_wait_preempt )
170blocks the current thread until the grace period has expired and the work can be
171done safely.
172.Pp
173Default, non-preemptible epoch wait
174.Fn ( epoch_wait )
175is guaranteed to have much shorter completion times relative to
176preemptible epoch wait
177.Fn ( epoch_wait_preempt ) .
178(In the default type, none of the threads in an epoch section will be preempted
179before completing its section.)
180.Pp
181INVARIANTS can assert that a thread is in an epoch by using
182.Fn in_epoch .
183.Fn in_epoch "epoch"
184is equivalent to invoking
185.Fn in_epoch_verbose "epoch" "0" .
186If
187.Cd EPOCH_TRACE
188is enabled,
189.Fn in_epoch_verbose "epoch" "1"
190provides additional verbose debugging information.
191.Pp
192The epoch API currently does not support sleeping in epoch_preempt sections.
193A caller should never call
194.Fn epoch_wait
195in the middle of an epoch section for the same epoch as this will lead to a deadlock.
196.Pp
197The
198.Fn epoch_drain_callbacks
199function is used to drain all pending callbacks which have been invoked by prior
200.Fn epoch_call
201function calls on the same epoch.
202This function is useful when there are shared memory structure(s)
203referred to by the epoch callback(s) which are not refcounted and are
204rarely freed.
205The typical place for calling this function is right before freeing or
206invalidating the shared resource(s) used by the epoch callback(s).
207This function can sleep and is not optimized for performance.
208.Sh RETURN VALUES
209.Fn in_epoch curepoch
210will return 1 if curthread is in curepoch, 0 otherwise.
211.Sh EXAMPLES
212Async free example:
213Thread 1:
214.Bd -literal
215int
216in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_laddr *laddr,
217    struct ucred *cred)
218{
219    /* ... */
220    epoch_enter(net_epoch);
221    CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
222        sa = ifa->ifa_addr;
223	if (sa->sa_family != AF_INET)
224	    continue;
225	sin = (struct sockaddr_in *)sa;
226	if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
227	     ia = (struct in_ifaddr *)ifa;
228	     break;
229	}
230    }
231    epoch_exit(net_epoch);
232    /* ... */
233}
234.Ed
235Thread 2:
236.Bd -literal
237void
238ifa_free(struct ifaddr *ifa)
239{
240
241    if (refcount_release(&ifa->ifa_refcnt))
242        epoch_call(net_epoch, ifa_destroy, &ifa->ifa_epoch_ctx);
243}
244
245void
246if_purgeaddrs(struct ifnet *ifp)
247{
248
249    /* .... *
250    IF_ADDR_WLOCK(ifp);
251    CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
252    IF_ADDR_WUNLOCK(ifp);
253    ifa_free(ifa);
254}
255.Ed
256.Pp
257Thread 1 traverses the ifaddr list in an epoch.
258Thread 2 unlinks with the corresponding epoch safe macro, marks as logically free,
259and then defers deletion.
260More general mutation or a synchronous
261free would have to follow a call to
262.Fn epoch_wait .
263.Sh NOTES
264The
265.Nm
266kernel programming interface is under development and is subject to change.
267.Sh SEE ALSO
268.Xr callout 9 ,
269.Xr locking 9 ,
270.Xr mtx_pool 9 ,
271.Xr mutex 9 ,
272.Xr rwlock 9 ,
273.Xr sema 9 ,
274.Xr sleep 9 ,
275.Xr sx 9
276.Sh HISTORY
277The
278.Nm
279framework first appeared in
280.Fx 11.0 .
281.Sh CAVEATS
282One must be cautious when using
283.Fn epoch_wait_preempt .
284Threads are pinned during epoch sections, so if a thread in a section is then
285preempted by a higher priority compute bound thread on that CPU, it can be
286prevented from leaving the section indefinitely.
287.Pp
288Epochs are not a straight replacement for read locks.
289Callers must use safe list and tailq traversal routines in an epoch (see ck_queue).
290When modifying a list referenced from an epoch section safe removal
291routines must be used and the caller can no longer modify a list entry
292in place.
293An item to be modified must be handled with copy on write
294and frees must be deferred until after a grace period has elapsed.
295