xref: /freebsd/share/man/man9/epoch.9 (revision 2c1962aba6e73dd0c430636082145f92d636ff13)
133346ed6SMatt Macy.\"
233346ed6SMatt Macy.\" Copyright (C) 2018 Matthew Macy <mmacy@FreeBSD.org>.
333346ed6SMatt Macy.\"
433346ed6SMatt Macy.\" Redistribution and use in source and binary forms, with or without
533346ed6SMatt Macy.\" modification, are permitted provided that the following conditions
633346ed6SMatt Macy.\" are met:
733346ed6SMatt Macy.\" 1. Redistributions of source code must retain the above copyright
833346ed6SMatt Macy.\"    notice(s), this list of conditions and the following disclaimer as
933346ed6SMatt Macy.\"    the first lines of this file unmodified other than the possible
1033346ed6SMatt Macy.\"    addition of one or more copyright notices.
1133346ed6SMatt Macy.\" 2. Redistributions in binary form must reproduce the above copyright
1233346ed6SMatt Macy.\"    notice(s), this list of conditions and the following disclaimer in the
1333346ed6SMatt Macy.\"    documentation and/or other materials provided with the distribution.
1433346ed6SMatt Macy.\"
1533346ed6SMatt Macy.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1633346ed6SMatt Macy.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1733346ed6SMatt Macy.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1833346ed6SMatt Macy.\" DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
1933346ed6SMatt Macy.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2033346ed6SMatt Macy.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2133346ed6SMatt Macy.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2233346ed6SMatt Macy.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2333346ed6SMatt Macy.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2433346ed6SMatt Macy.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2533346ed6SMatt Macy.\" DAMAGE.
2633346ed6SMatt Macy.\"
2733346ed6SMatt Macy.\" $FreeBSD$
2833346ed6SMatt Macy.\"
29*2c1962abSConrad Meyer.Dd December 27, 2019
3033346ed6SMatt Macy.Dt EPOCH 9
3133346ed6SMatt Macy.Os
3233346ed6SMatt Macy.Sh NAME
3333346ed6SMatt Macy.Nm epoch ,
3433346ed6SMatt Macy.Nm epoch_context ,
3533346ed6SMatt Macy.Nm epoch_alloc ,
3633346ed6SMatt Macy.Nm epoch_free ,
3733346ed6SMatt Macy.Nm epoch_enter ,
3833346ed6SMatt Macy.Nm epoch_exit ,
3933346ed6SMatt Macy.Nm epoch_wait ,
40*2c1962abSConrad Meyer.Nm epoch_enter_preempt ,
41*2c1962abSConrad Meyer.Nm epoch_exit_preempt ,
42*2c1962abSConrad Meyer.Nm epoch_wait_preempt ,
4333346ed6SMatt Macy.Nm epoch_call ,
44131b2b76SHans Petter Selasky.Nm epoch_drain_callbacks ,
4533346ed6SMatt Macy.Nm in_epoch ,
46*2c1962abSConrad Meyer.Nm in_epoch_verbose ,
477739f6e5SEd Schouten.Nd kernel epoch based reclamation
4833346ed6SMatt Macy.Sh SYNOPSIS
4933346ed6SMatt Macy.In sys/param.h
5033346ed6SMatt Macy.In sys/proc.h
5133346ed6SMatt Macy.In sys/epoch.h
52*2c1962abSConrad Meyer.\" Types
53*2c1962abSConrad Meyer.Bd -literal
54*2c1962abSConrad Meyerstruct epoch;		/* Opaque */
55*2c1962abSConrad Meyer.Ed
56*2c1962abSConrad Meyer.Vt typedef "struct epoch *epoch_t" ;
57*2c1962abSConrad Meyer.Bd -literal
58*2c1962abSConrad Meyerstruct epoch_context {
59*2c1962abSConrad Meyer	void	*data[2];
60*2c1962abSConrad Meyer};
61*2c1962abSConrad Meyer.Ed
62*2c1962abSConrad Meyer.Vt typedef "struct epoch_context *epoch_context_t" ;
63*2c1962abSConrad Meyer.Bd -literal
64*2c1962abSConrad Meyerstruct epoch_tracker;	/* Opaque */
65*2c1962abSConrad Meyer.Ed
66*2c1962abSConrad Meyer.Vt typedef "struct epoch_tracker *epoch_tracker_t" ;
67*2c1962abSConrad Meyer.\" Declarations
6833346ed6SMatt Macy.Ft epoch_t
69*2c1962abSConrad Meyer.Fn epoch_alloc "const char *name" "int flags"
70*2c1962abSConrad Meyer.Ft void
71*2c1962abSConrad Meyer.Fn epoch_free "epoch_t epoch"
7233346ed6SMatt Macy.Ft void
7333346ed6SMatt Macy.Fn epoch_enter "epoch_t epoch"
7433346ed6SMatt Macy.Ft void
7533346ed6SMatt Macy.Fn epoch_exit "epoch_t epoch"
7633346ed6SMatt Macy.Ft void
7733346ed6SMatt Macy.Fn epoch_wait "epoch_t epoch"
7833346ed6SMatt Macy.Ft void
79*2c1962abSConrad Meyer.Fn epoch_enter_preempt "epoch_t epoch" "epoch_tracker_t et"
80*2c1962abSConrad Meyer.Ft void
81*2c1962abSConrad Meyer.Fn epoch_exit_preempt "epoch_t epoch" "epoch_tracker_t et"
82*2c1962abSConrad Meyer.Ft void
8370398c2fSMatt Macy.Fn epoch_wait_preempt "epoch_t epoch"
846e36248fSMatt Macy.Ft void
8533346ed6SMatt Macy.Fn epoch_call "epoch_t epoch" "epoch_context_t ctx" "void (*callback)(epoch_context_t)"
86131b2b76SHans Petter Selasky.Ft void
87131b2b76SHans Petter Selasky.Fn epoch_drain_callbacks "epoch_t epoch"
8833346ed6SMatt Macy.Ft int
894619bce8SMatt Macy.Fn in_epoch "epoch_t epoch"
90*2c1962abSConrad Meyer.Ft int
91*2c1962abSConrad Meyer.Fn in_epoch_verbose "epoch_t epoch" "int dump_onfail"
9233346ed6SMatt Macy.Sh DESCRIPTION
9333346ed6SMatt MacyEpochs are used to guarantee liveness and immutability of data by
9433346ed6SMatt Macydeferring reclamation and mutation until a grace period has elapsed.
957d9389b0SEitan AdlerEpochs do not have any lock ordering issues.
967d9389b0SEitan AdlerEntering and leaving an epoch section will never block.
9733346ed6SMatt Macy.Pp
9833346ed6SMatt MacyEpochs are allocated with
99*2c1962abSConrad Meyer.Fn epoch_alloc .
100*2c1962abSConrad MeyerThe
101*2c1962abSConrad Meyer.Fa name
102*2c1962abSConrad Meyerargument is used for debugging convenience when the
103*2c1962abSConrad Meyer.Cd EPOCH_TRACE
104*2c1962abSConrad Meyerkernel option is configured.
105*2c1962abSConrad MeyerBy default, epochs do not allow preemption during sections.
106*2c1962abSConrad MeyerBy default mutexes cannot be held across
107*2c1962abSConrad Meyer.Fn epoch_wait_preempt .
108*2c1962abSConrad MeyerThe
109*2c1962abSConrad Meyer.Fa flags
110*2c1962abSConrad Meyerspecified are formed by
111*2c1962abSConrad Meyer.Em OR Ns 'ing
112*2c1962abSConrad Meyerthe following values:
113*2c1962abSConrad Meyer.Bl -tag -offset indent -width Ds
114*2c1962abSConrad Meyer.It Dv EPOCH_LOCKED
115*2c1962abSConrad MeyerPermit holding mutexes across
116*2c1962abSConrad Meyer.Fn epoch_wait_preempt
117*2c1962abSConrad Meyer(requires
118*2c1962abSConrad Meyer.Dv EPOCH_PREEMPT ) .
119*2c1962abSConrad MeyerWhen doing this one must be cautious of creating a situation where a deadlock
120*2c1962abSConrad Meyeris possible.
121*2c1962abSConrad Meyer.It Dv EPOCH_PREEMPT
122*2c1962abSConrad MeyerThe
123*2c1962abSConrad Meyer.Vt epoch
124*2c1962abSConrad Meyerwill allow preemption during sections.
125*2c1962abSConrad MeyerOnly non-sleepable locks may be acquired during a preemptible epoch.
126*2c1962abSConrad MeyerThe functions
127*2c1962abSConrad Meyer.Fn epoch_enter_preempt ,
128*2c1962abSConrad Meyer.Fn epoch_exit_preempt ,
129*2c1962abSConrad Meyerand
130*2c1962abSConrad Meyer.Fn epoch_wait_preempt
131*2c1962abSConrad Meyermust be used in place of
132*2c1962abSConrad Meyer.Fn epoch_enter ,
133*2c1962abSConrad Meyer.Fn epoch_exit ,
134*2c1962abSConrad Meyerand
135*2c1962abSConrad Meyer.Fn epoch_wait ,
136*2c1962abSConrad Meyerrespectively.
137*2c1962abSConrad Meyer.El
138*2c1962abSConrad Meyer.Pp
139*2c1962abSConrad Meyer.Vt epoch Ns s
140*2c1962abSConrad Meyerare freed with
14133346ed6SMatt Macy.Fn epoch_free .
142*2c1962abSConrad Meyer.Pp
14333346ed6SMatt MacyThreads indicate the start of an epoch critical section by calling
144*2c1962abSConrad Meyer.Fn epoch_enter
145*2c1962abSConrad Meyer(or
146*2c1962abSConrad Meyer.Fn epoch_enter_preempt
147*2c1962abSConrad Meyerfor preemptible epochs).
148*2c1962abSConrad MeyerThreads call
149*2c1962abSConrad Meyer.Fn epoch_exit
150*2c1962abSConrad Meyer(or
151*2c1962abSConrad Meyer.Fn epoch_exit_preempt
152*2c1962abSConrad Meyerfor preemptible epochs)
153*2c1962abSConrad Meyerto indicate the end of a critical section.
154*2c1962abSConrad Meyer.Vt struct epoch_tracker Ns s
155*2c1962abSConrad Meyerare stack objects whose pointers are passed to
15670398c2fSMatt Macy.Fn epoch_enter_preempt
15733346ed6SMatt Macyand
158*2c1962abSConrad Meyer.Fn epoch_exit_preempt
159*2c1962abSConrad Meyer(much like
160*2c1962abSConrad Meyer.Vt struct rm_priotracker ) .
161*2c1962abSConrad Meyer.Pp
162*2c1962abSConrad MeyerThreads can defer work until a grace period has expired since any thread has
163*2c1962abSConrad Meyerentered the epoch either synchronously or asynchronously.
164*2c1962abSConrad Meyer.Fn epoch_call
165*2c1962abSConrad Meyerdefers work asynchronously by invoking the provided
166*2c1962abSConrad Meyer.Fa callback
167*2c1962abSConrad Meyerat a later time.
168*2c1962abSConrad Meyer.Fn epoch_wait
169*2c1962abSConrad Meyer(or
170*2c1962abSConrad Meyer.Fn epoch_wait_preempt )
171*2c1962abSConrad Meyerblocks the current thread until the grace period has expired and the work can be
172*2c1962abSConrad Meyerdone safely.
173*2c1962abSConrad Meyer.Pp
174*2c1962abSConrad MeyerDefault, non-preemptible epoch wait
175*2c1962abSConrad Meyer.Fn ( epoch_wait )
176*2c1962abSConrad Meyeris guaranteed to have much shorter completion times relative to
177*2c1962abSConrad Meyerpreemptible epoch wait
178*2c1962abSConrad Meyer.Fn ( epoch_wait_preempt ) .
179*2c1962abSConrad Meyer(In the default type, none of the threads in an epoch section will be preempted
180*2c1962abSConrad Meyerbefore completing its section.)
181*2c1962abSConrad Meyer.Pp
18233346ed6SMatt MacyINVARIANTS can assert that a thread is in an epoch by using
18333346ed6SMatt Macy.Fn in_epoch .
184*2c1962abSConrad Meyer.Fn in_epoch "epoch"
185*2c1962abSConrad Meyeris equivalent to invoking
186*2c1962abSConrad Meyer.Fn in_epoch_verbose "epoch" "0" .
187*2c1962abSConrad MeyerIf
188*2c1962abSConrad Meyer.Cd EPOCH_TRACE
189*2c1962abSConrad Meyeris enabled,
190*2c1962abSConrad Meyer.Fn in_epoch_verbose "epoch" "1"
191*2c1962abSConrad Meyerprovides additional verbose debugging information.
19233346ed6SMatt Macy.Pp
19370398c2fSMatt MacyThe epoch API currently does not support sleeping in epoch_preempt sections.
1947d9389b0SEitan AdlerA caller should never call
19533346ed6SMatt Macy.Fn epoch_wait
19668195696SMatt Macyin the middle of an epoch section for the same epoch as this will lead to a deadlock.
19733346ed6SMatt Macy.Pp
198131b2b76SHans Petter SelaskyThe
199131b2b76SHans Petter Selasky.Fn epoch_drain_callbacks
200131b2b76SHans Petter Selaskyfunction is used to drain all pending callbacks which have been invoked by prior
201131b2b76SHans Petter Selasky.Fn epoch_call
202131b2b76SHans Petter Selaskyfunction calls on the same epoch.
203131b2b76SHans Petter SelaskyThis function is useful when there are shared memory structure(s)
204131b2b76SHans Petter Selaskyreferred to by the epoch callback(s) which are not refcounted and are
205131b2b76SHans Petter Selaskyrarely freed.
206131b2b76SHans Petter SelaskyThe typical place for calling this function is right before freeing or
207131b2b76SHans Petter Selaskyinvalidating the shared resource(s) used by the epoch callback(s).
208131b2b76SHans Petter SelaskyThis function can sleep and is not optimized for performance.
20933346ed6SMatt Macy.Sh RETURN VALUES
2104619bce8SMatt Macy.Fn in_epoch curepoch
2114619bce8SMatt Macywill return 1 if curthread is in curepoch, 0 otherwise.
21270398c2fSMatt Macy.Sh CAVEATS
21370398c2fSMatt MacyOne must be cautious when using
214*2c1962abSConrad Meyer.Fn epoch_wait_preempt .
215*2c1962abSConrad MeyerThreads are pinned during epoch sections, so if a thread in a section is then
216*2c1962abSConrad Meyerpreempted by a higher priority compute bound thread on that CPU, it can be
217*2c1962abSConrad Meyerprevented from leaving the section indefinitely.
218*2c1962abSConrad Meyer.Pp
219*2c1962abSConrad MeyerEpochs are not a straight replacement for read locks.
220*2c1962abSConrad MeyerCallers must use safe list and tailq traversal routines in an epoch (see ck_queue).
221*2c1962abSConrad MeyerWhen modifying a list referenced from an epoch section safe removal
222*2c1962abSConrad Meyerroutines must be used and the caller can no longer modify a list entry
223*2c1962abSConrad Meyerin place.
224*2c1962abSConrad MeyerAn item to be modified must be handled with copy on write
225*2c1962abSConrad Meyerand frees must be deferred until after a grace period has elapsed.
22633346ed6SMatt Macy.Sh EXAMPLES
22733346ed6SMatt MacyAsync free example:
22833346ed6SMatt MacyThread 1:
22933346ed6SMatt Macy.Bd -literal
2306e36248fSMatt Macyint
2316e36248fSMatt Macyin_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_laddr *laddr,
2326e36248fSMatt Macy    struct ucred *cred)
23333346ed6SMatt Macy{
2346e36248fSMatt Macy    /* ... */
23533346ed6SMatt Macy    epoch_enter(net_epoch);
23633346ed6SMatt Macy    CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
23733346ed6SMatt Macy        sa = ifa->ifa_addr;
23833346ed6SMatt Macy	if (sa->sa_family != AF_INET)
23933346ed6SMatt Macy	    continue;
24033346ed6SMatt Macy	sin = (struct sockaddr_in *)sa;
24133346ed6SMatt Macy	if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
24233346ed6SMatt Macy	     ia = (struct in_ifaddr *)ifa;
24333346ed6SMatt Macy	     break;
24433346ed6SMatt Macy	}
24533346ed6SMatt Macy    }
24633346ed6SMatt Macy    epoch_exit(net_epoch);
2476e36248fSMatt Macy    /* ... */
24833346ed6SMatt Macy}
24933346ed6SMatt Macy.Ed
25033346ed6SMatt MacyThread 2:
25133346ed6SMatt Macy.Bd -literal
25233346ed6SMatt Macyvoid
25333346ed6SMatt Macyifa_free(struct ifaddr *ifa)
25433346ed6SMatt Macy{
25533346ed6SMatt Macy
25633346ed6SMatt Macy    if (refcount_release(&ifa->ifa_refcnt))
25733346ed6SMatt Macy        epoch_call(net_epoch, &ifa->ifa_epoch_ctx, ifa_destroy);
25833346ed6SMatt Macy}
25933346ed6SMatt Macy
2606e36248fSMatt Macyvoid
2616e36248fSMatt Macyif_purgeaddrs(struct ifnet *ifp)
26233346ed6SMatt Macy{
26333346ed6SMatt Macy
2647d9389b0SEitan Adler    /* .... *
26533346ed6SMatt Macy    IF_ADDR_WLOCK(ifp);
26633346ed6SMatt Macy    CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
26733346ed6SMatt Macy    IF_ADDR_WUNLOCK(ifp);
26833346ed6SMatt Macy    ifa_free(ifa);
26933346ed6SMatt Macy}
27033346ed6SMatt Macy.Ed
27133346ed6SMatt Macy.Pp
2727d9389b0SEitan AdlerThread 1 traverses the ifaddr list in an epoch.
2737d9389b0SEitan AdlerThread 2 unlinks with the corresponding epoch safe macro, marks as logically free,
2747d9389b0SEitan Adlerand then defers deletion.
2757d9389b0SEitan AdlerMore general mutation or a synchronous
2767d9389b0SEitan Adlerfree would have to follow a call to
27733346ed6SMatt Macy.Fn epoch_wait .
27824929e2cSGleb Smirnoff.Sh NOTES
27924929e2cSGleb SmirnoffThe
28024929e2cSGleb Smirnoff.Nm
28124929e2cSGleb Smirnoffkernel programming interface is under development and is subject to change.
28233346ed6SMatt Macy.El
28333346ed6SMatt Macy.Sh SEE ALSO
28433346ed6SMatt Macy.Xr locking 9 ,
28533346ed6SMatt Macy.Xr mtx_pool 9 ,
28633346ed6SMatt Macy.Xr mutex 9 ,
28733346ed6SMatt Macy.Xr rwlock 9 ,
28833346ed6SMatt Macy.Xr sema 9 ,
28933346ed6SMatt Macy.Xr sleep 9 ,
29033346ed6SMatt Macy.Xr sx 9 ,
29133346ed6SMatt Macy.Xr timeout 9
292