xref: /freebsd/share/man/man9/epoch.9 (revision 131b2b7658d7e961a245697cb5af55306388fc54)
133346ed6SMatt Macy.\"
233346ed6SMatt Macy.\" Copyright (C) 2018 Matthew Macy <mmacy@FreeBSD.org>.
333346ed6SMatt Macy.\"
433346ed6SMatt Macy.\" Redistribution and use in source and binary forms, with or without
533346ed6SMatt Macy.\" modification, are permitted provided that the following conditions
633346ed6SMatt Macy.\" are met:
733346ed6SMatt Macy.\" 1. Redistributions of source code must retain the above copyright
833346ed6SMatt Macy.\"    notice(s), this list of conditions and the following disclaimer as
933346ed6SMatt Macy.\"    the first lines of this file unmodified other than the possible
1033346ed6SMatt Macy.\"    addition of one or more copyright notices.
1133346ed6SMatt Macy.\" 2. Redistributions in binary form must reproduce the above copyright
1233346ed6SMatt Macy.\"    notice(s), this list of conditions and the following disclaimer in the
1333346ed6SMatt Macy.\"    documentation and/or other materials provided with the distribution.
1433346ed6SMatt Macy.\"
1533346ed6SMatt Macy.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1633346ed6SMatt Macy.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1733346ed6SMatt Macy.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1833346ed6SMatt Macy.\" DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
1933346ed6SMatt Macy.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2033346ed6SMatt Macy.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2133346ed6SMatt Macy.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2233346ed6SMatt Macy.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2333346ed6SMatt Macy.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2433346ed6SMatt Macy.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2533346ed6SMatt Macy.\" DAMAGE.
2633346ed6SMatt Macy.\"
2733346ed6SMatt Macy.\" $FreeBSD$
2833346ed6SMatt Macy.\"
29*131b2b76SHans Petter Selasky.Dd June 28, 2019
3033346ed6SMatt Macy.Dt EPOCH 9
3133346ed6SMatt Macy.Os
3233346ed6SMatt Macy.Sh NAME
3333346ed6SMatt Macy.Nm epoch ,
3433346ed6SMatt Macy.Nm epoch_context ,
3533346ed6SMatt Macy.Nm epoch_alloc ,
3633346ed6SMatt Macy.Nm epoch_free ,
3733346ed6SMatt Macy.Nm epoch_enter ,
3833346ed6SMatt Macy.Nm epoch_exit ,
3933346ed6SMatt Macy.Nm epoch_wait ,
4033346ed6SMatt Macy.Nm epoch_call ,
41*131b2b76SHans Petter Selasky.Nm epoch_drain_callbacks ,
4233346ed6SMatt Macy.Nm in_epoch ,
437739f6e5SEd Schouten.Nd kernel epoch based reclamation
4433346ed6SMatt Macy.Sh SYNOPSIS
4533346ed6SMatt Macy.In sys/param.h
4633346ed6SMatt Macy.In sys/proc.h
4733346ed6SMatt Macy.In sys/epoch.h
4833346ed6SMatt Macy.Ft epoch_t
496e36248fSMatt Macy.Fn epoch_alloc "int flags"
5033346ed6SMatt Macy.Ft void
5133346ed6SMatt Macy.Fn epoch_enter "epoch_t epoch"
5233346ed6SMatt Macy.Ft void
5368195696SMatt Macy.Fn epoch_enter_preempt "epoch_t epoch" "epoch_tracker_t et"
546e36248fSMatt Macy.Ft void
5533346ed6SMatt Macy.Fn epoch_exit "epoch_t epoch"
5633346ed6SMatt Macy.Ft void
5768195696SMatt Macy.Fn epoch_exit_preempt "epoch_t epoch" "epoch_tracker_t et"
586e36248fSMatt Macy.Ft void
5933346ed6SMatt Macy.Fn epoch_wait "epoch_t epoch"
6033346ed6SMatt Macy.Ft void
6170398c2fSMatt Macy.Fn epoch_wait_preempt "epoch_t epoch"
626e36248fSMatt Macy.Ft void
6333346ed6SMatt Macy.Fn epoch_call "epoch_t epoch" "epoch_context_t ctx" "void (*callback) (epoch_context_t)"
64*131b2b76SHans Petter Selasky.Ft void
65*131b2b76SHans Petter Selasky.Fn epoch_drain_callbacks "epoch_t epoch"
6633346ed6SMatt Macy.Ft int
674619bce8SMatt Macy.Fn in_epoch "epoch_t epoch"
6833346ed6SMatt Macy.Sh DESCRIPTION
6933346ed6SMatt MacyEpochs are used to guarantee liveness and immutability of data by
7033346ed6SMatt Macydeferring reclamation and mutation until a grace period has elapsed.
717d9389b0SEitan AdlerEpochs do not have any lock ordering issues.
727d9389b0SEitan AdlerEntering and leaving an epoch section will never block.
7333346ed6SMatt Macy.Pp
7433346ed6SMatt MacyEpochs are allocated with
7533346ed6SMatt Macy.Fn epoch_alloc
7633346ed6SMatt Macyand freed with
7733346ed6SMatt Macy.Fn epoch_free .
786e36248fSMatt MacyThe flags passed to epoch_alloc determine whether preemption is
797d9389b0SEitan Adlerallowed during a section or not (the default), as specified by
8070398c2fSMatt MacyEPOCH_PREEMPT.
8133346ed6SMatt MacyThreads indicate the start of an epoch critical section by calling
8233346ed6SMatt Macy.Fn epoch_enter .
8333346ed6SMatt MacyThe end of a critical section is indicated by calling
8433346ed6SMatt Macy.Fn epoch_exit .
8570398c2fSMatt MacyThe _preempt variants can be used around code which requires preemption.
8633346ed6SMatt MacyA thread can wait until a grace period has elapsed
8733346ed6SMatt Macysince any threads have entered
8833346ed6SMatt Macythe epoch by calling
8970398c2fSMatt Macy.Fn epoch_wait
9070398c2fSMatt Macyor
9170398c2fSMatt Macy.Fn epoch_wait_preempt ,
9270398c2fSMatt Macydepending on the epoch_type.
9370398c2fSMatt MacyThe use of a default epoch type allows one to use
9470398c2fSMatt Macy.Fn epoch_wait
956e36248fSMatt Macywhich is guaranteed to have much shorter completion times since
966e36248fSMatt Macywe know that none of the threads in an epoch section will be preempted
976e36248fSMatt Macybefore completing its section.
9833346ed6SMatt MacyIf the thread can't sleep or is otherwise in a performance sensitive
9933346ed6SMatt Macypath it can ensure that a grace period has elapsed by calling
10033346ed6SMatt Macy.Fn epoch_call
10133346ed6SMatt Macywith a callback with any work that needs to wait for an epoch to elapse.
10233346ed6SMatt MacyOnly non-sleepable locks can be acquired during a section protected by
10370398c2fSMatt Macy.Fn epoch_enter_preempt
10433346ed6SMatt Macyand
10570398c2fSMatt Macy.Fn epoch_exit_preempt .
10633346ed6SMatt MacyINVARIANTS can assert that a thread is in an epoch by using
10733346ed6SMatt Macy.Fn in_epoch .
10833346ed6SMatt Macy.Pp
10970398c2fSMatt MacyThe epoch API currently does not support sleeping in epoch_preempt sections.
1107d9389b0SEitan AdlerA caller should never call
11133346ed6SMatt Macy.Fn epoch_wait
11268195696SMatt Macyin the middle of an epoch section for the same epoch as this will lead to a deadlock.
11333346ed6SMatt Macy.Pp
114b3dfc518SMatt MacyBy default mutexes cannot be held across
11568195696SMatt Macy.Fn epoch_wait_preempt .
11668195696SMatt MacyTo permit this the epoch must be allocated with
1174619bce8SMatt MacyEPOCH_LOCKED.
11868195696SMatt MacyWhen doing this one must be cautious of creating a situation where a deadlock is
11968195696SMatt Macypossible. Note that epochs are not a straight replacement for read locks.
1207d9389b0SEitan AdlerCallers must use safe list and tailq traversal routines in an epoch (see ck_queue).
12133346ed6SMatt MacyWhen modifying a list referenced from an epoch section safe removal
12233346ed6SMatt Macyroutines must be used and the caller can no longer modify a list entry
1237d9389b0SEitan Adlerin place.
1247d9389b0SEitan AdlerAn item to be modified must be handled with copy on write
12533346ed6SMatt Macyand frees must be deferred until after a grace period has elapsed.
126*131b2b76SHans Petter Selasky.Pp
127*131b2b76SHans Petter SelaskyThe
128*131b2b76SHans Petter Selasky.Fn epoch_drain_callbacks
129*131b2b76SHans Petter Selaskyfunction is used to drain all pending callbacks which have been invoked by prior
130*131b2b76SHans Petter Selasky.Fn epoch_call
131*131b2b76SHans Petter Selaskyfunction calls on the same epoch.
132*131b2b76SHans Petter SelaskyThis function is useful when there are shared memory structure(s)
133*131b2b76SHans Petter Selaskyreferred to by the epoch callback(s) which are not refcounted and are
134*131b2b76SHans Petter Selaskyrarely freed.
135*131b2b76SHans Petter SelaskyThe typical place for calling this function is right before freeing or
136*131b2b76SHans Petter Selaskyinvalidating the shared resource(s) used by the epoch callback(s).
137*131b2b76SHans Petter SelaskyThis function can sleep and is not optimized for performance.
13833346ed6SMatt Macy.Sh RETURN VALUES
1394619bce8SMatt Macy.Fn in_epoch curepoch
1404619bce8SMatt Macywill return 1 if curthread is in curepoch, 0 otherwise.
14170398c2fSMatt Macy.Sh CAVEATS
14270398c2fSMatt MacyOne must be cautious when using
14370398c2fSMatt Macy.Fn epoch_wait_preempt
14470398c2fSMatt Macythreads are pinned during epoch sections so if a thread in a section is then
14570398c2fSMatt Macypreempted by a higher priority compute bound thread on that CPU it can be
1467d9389b0SEitan Adlerprevented from leaving the section.
1477d9389b0SEitan AdlerThus the wait time for the waiter is
14870398c2fSMatt Macypotentially unbounded.
14933346ed6SMatt Macy.Sh EXAMPLES
15033346ed6SMatt MacyAsync free example:
15133346ed6SMatt MacyThread 1:
15233346ed6SMatt Macy.Bd -literal
1536e36248fSMatt Macyint
1546e36248fSMatt Macyin_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_laddr *laddr,
1556e36248fSMatt Macy    struct ucred *cred)
15633346ed6SMatt Macy{
1576e36248fSMatt Macy   /* ... */
15833346ed6SMatt Macy   epoch_enter(net_epoch);
15933346ed6SMatt Macy    CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
16033346ed6SMatt Macy        sa = ifa->ifa_addr;
16133346ed6SMatt Macy	if (sa->sa_family != AF_INET)
16233346ed6SMatt Macy	    continue;
16333346ed6SMatt Macy	sin = (struct sockaddr_in *)sa;
16433346ed6SMatt Macy	if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
16533346ed6SMatt Macy	     ia = (struct in_ifaddr *)ifa;
16633346ed6SMatt Macy	     break;
16733346ed6SMatt Macy	}
16833346ed6SMatt Macy    }
16933346ed6SMatt Macy    epoch_exit(net_epoch);
1706e36248fSMatt Macy   /* ... */
17133346ed6SMatt Macy}
17233346ed6SMatt Macy.Ed
17333346ed6SMatt MacyThread 2:
17433346ed6SMatt Macy.Bd -literal
17533346ed6SMatt Macyvoid
17633346ed6SMatt Macyifa_free(struct ifaddr *ifa)
17733346ed6SMatt Macy{
17833346ed6SMatt Macy
17933346ed6SMatt Macy    if (refcount_release(&ifa->ifa_refcnt))
18033346ed6SMatt Macy        epoch_call(net_epoch, &ifa->ifa_epoch_ctx, ifa_destroy);
18133346ed6SMatt Macy}
18233346ed6SMatt Macy
1836e36248fSMatt Macyvoid
1846e36248fSMatt Macyif_purgeaddrs(struct ifnet *ifp)
18533346ed6SMatt Macy{
18633346ed6SMatt Macy
1877d9389b0SEitan Adler    /* .... *
18833346ed6SMatt Macy    IF_ADDR_WLOCK(ifp);
18933346ed6SMatt Macy    CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
19033346ed6SMatt Macy    IF_ADDR_WUNLOCK(ifp);
19133346ed6SMatt Macy    ifa_free(ifa);
19233346ed6SMatt Macy}
19333346ed6SMatt Macy.Ed
19433346ed6SMatt Macy.Pp
1957d9389b0SEitan AdlerThread 1 traverses the ifaddr list in an epoch.
1967d9389b0SEitan AdlerThread 2 unlinks with the corresponding epoch safe macro, marks as logically free,
1977d9389b0SEitan Adlerand then defers deletion.
1987d9389b0SEitan AdlerMore general mutation or a synchronous
1997d9389b0SEitan Adlerfree would have to follow a call to
20033346ed6SMatt Macy.Fn epoch_wait .
20133346ed6SMatt Macy.Sh ERRORS
20233346ed6SMatt MacyNone.
20324929e2cSGleb Smirnoff.Sh NOTES
20424929e2cSGleb SmirnoffThe
20524929e2cSGleb Smirnoff.Nm
20624929e2cSGleb Smirnoffkernel programming interface is under development and is subject to change.
20733346ed6SMatt Macy.El
20833346ed6SMatt Macy.Sh SEE ALSO
20933346ed6SMatt Macy.Xr locking 9 ,
21033346ed6SMatt Macy.Xr mtx_pool 9 ,
21133346ed6SMatt Macy.Xr mutex 9 ,
21233346ed6SMatt Macy.Xr rwlock 9 ,
21333346ed6SMatt Macy.Xr sema 9 ,
21433346ed6SMatt Macy.Xr sleep 9 ,
21533346ed6SMatt Macy.Xr sx 9 ,
21633346ed6SMatt Macy.Xr timeout 9
217