xref: /freebsd/share/man/man9/epoch.9 (revision 04b90ac0f9e883a485bad4b0edf516395596d17f)
133346ed6SMatt Macy.\"
233346ed6SMatt Macy.\" Copyright (C) 2018 Matthew Macy <mmacy@FreeBSD.org>.
333346ed6SMatt Macy.\"
433346ed6SMatt Macy.\" Redistribution and use in source and binary forms, with or without
533346ed6SMatt Macy.\" modification, are permitted provided that the following conditions
633346ed6SMatt Macy.\" are met:
733346ed6SMatt Macy.\" 1. Redistributions of source code must retain the above copyright
833346ed6SMatt Macy.\"    notice(s), this list of conditions and the following disclaimer as
933346ed6SMatt Macy.\"    the first lines of this file unmodified other than the possible
1033346ed6SMatt Macy.\"    addition of one or more copyright notices.
1133346ed6SMatt Macy.\" 2. Redistributions in binary form must reproduce the above copyright
1233346ed6SMatt Macy.\"    notice(s), this list of conditions and the following disclaimer in the
1333346ed6SMatt Macy.\"    documentation and/or other materials provided with the distribution.
1433346ed6SMatt Macy.\"
1533346ed6SMatt Macy.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1633346ed6SMatt Macy.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1733346ed6SMatt Macy.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1833346ed6SMatt Macy.\" DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
1933346ed6SMatt Macy.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2033346ed6SMatt Macy.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2133346ed6SMatt Macy.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2233346ed6SMatt Macy.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2333346ed6SMatt Macy.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2433346ed6SMatt Macy.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2533346ed6SMatt Macy.\" DAMAGE.
2633346ed6SMatt Macy.\"
27*04b90ac0SKa Ho Ng.Dd March 25, 2024
2833346ed6SMatt Macy.Dt EPOCH 9
2933346ed6SMatt Macy.Os
3033346ed6SMatt Macy.Sh NAME
3133346ed6SMatt Macy.Nm epoch ,
3233346ed6SMatt Macy.Nm epoch_context ,
3333346ed6SMatt Macy.Nm epoch_alloc ,
3433346ed6SMatt Macy.Nm epoch_free ,
3533346ed6SMatt Macy.Nm epoch_enter ,
3633346ed6SMatt Macy.Nm epoch_exit ,
3733346ed6SMatt Macy.Nm epoch_wait ,
382c1962abSConrad Meyer.Nm epoch_enter_preempt ,
392c1962abSConrad Meyer.Nm epoch_exit_preempt ,
402c1962abSConrad Meyer.Nm epoch_wait_preempt ,
4133346ed6SMatt Macy.Nm epoch_call ,
42131b2b76SHans Petter Selasky.Nm epoch_drain_callbacks ,
4333346ed6SMatt Macy.Nm in_epoch ,
4432068667SChristian Brueffer.Nm in_epoch_verbose
457739f6e5SEd Schouten.Nd kernel epoch based reclamation
4633346ed6SMatt Macy.Sh SYNOPSIS
4733346ed6SMatt Macy.In sys/param.h
4833346ed6SMatt Macy.In sys/proc.h
4933346ed6SMatt Macy.In sys/epoch.h
502c1962abSConrad Meyer.\" Types
512c1962abSConrad Meyer.Bd -literal
522c1962abSConrad Meyerstruct epoch;		/* Opaque */
532c1962abSConrad Meyer.Ed
542c1962abSConrad Meyer.Vt typedef "struct epoch *epoch_t" ;
552c1962abSConrad Meyer.Bd -literal
562c1962abSConrad Meyerstruct epoch_context {
572c1962abSConrad Meyer	void	*data[2];
582c1962abSConrad Meyer};
592c1962abSConrad Meyer.Ed
602c1962abSConrad Meyer.Vt typedef "struct epoch_context *epoch_context_t" ;
611d110928SGleb Smirnoff.Vt typedef "void epoch_callback_t(epoch_context_t)" ;
622c1962abSConrad Meyer.Bd -literal
632c1962abSConrad Meyerstruct epoch_tracker;	/* Opaque */
642c1962abSConrad Meyer.Ed
652c1962abSConrad Meyer.Vt typedef "struct epoch_tracker *epoch_tracker_t" ;
662c1962abSConrad Meyer.\" Declarations
6733346ed6SMatt Macy.Ft epoch_t
682c1962abSConrad Meyer.Fn epoch_alloc "const char *name" "int flags"
692c1962abSConrad Meyer.Ft void
702c1962abSConrad Meyer.Fn epoch_free "epoch_t epoch"
7133346ed6SMatt Macy.Ft void
7233346ed6SMatt Macy.Fn epoch_enter "epoch_t epoch"
7333346ed6SMatt Macy.Ft void
7433346ed6SMatt Macy.Fn epoch_exit "epoch_t epoch"
7533346ed6SMatt Macy.Ft void
7633346ed6SMatt Macy.Fn epoch_wait "epoch_t epoch"
7733346ed6SMatt Macy.Ft void
782c1962abSConrad Meyer.Fn epoch_enter_preempt "epoch_t epoch" "epoch_tracker_t et"
792c1962abSConrad Meyer.Ft void
802c1962abSConrad Meyer.Fn epoch_exit_preempt "epoch_t epoch" "epoch_tracker_t et"
812c1962abSConrad Meyer.Ft void
8270398c2fSMatt Macy.Fn epoch_wait_preempt "epoch_t epoch"
836e36248fSMatt Macy.Ft void
841d110928SGleb Smirnoff.Fn epoch_call "epoch_t epoch" "epoch_callback_t callback" "epoch_context_t ctx"
85131b2b76SHans Petter Selasky.Ft void
86131b2b76SHans Petter Selasky.Fn epoch_drain_callbacks "epoch_t epoch"
8733346ed6SMatt Macy.Ft int
884619bce8SMatt Macy.Fn in_epoch "epoch_t epoch"
892c1962abSConrad Meyer.Ft int
902c1962abSConrad Meyer.Fn in_epoch_verbose "epoch_t epoch" "int dump_onfail"
9133346ed6SMatt Macy.Sh DESCRIPTION
9233346ed6SMatt MacyEpochs are used to guarantee liveness and immutability of data by
9333346ed6SMatt Macydeferring reclamation and mutation until a grace period has elapsed.
947d9389b0SEitan AdlerEpochs do not have any lock ordering issues.
957d9389b0SEitan AdlerEntering and leaving an epoch section will never block.
9633346ed6SMatt Macy.Pp
9733346ed6SMatt MacyEpochs are allocated with
982c1962abSConrad Meyer.Fn epoch_alloc .
992c1962abSConrad MeyerThe
1002c1962abSConrad Meyer.Fa name
1012c1962abSConrad Meyerargument is used for debugging convenience when the
1022c1962abSConrad Meyer.Cd EPOCH_TRACE
1032c1962abSConrad Meyerkernel option is configured.
1042c1962abSConrad MeyerBy default, epochs do not allow preemption during sections.
1052c1962abSConrad MeyerBy default mutexes cannot be held across
1062c1962abSConrad Meyer.Fn epoch_wait_preempt .
1072c1962abSConrad MeyerThe
1082c1962abSConrad Meyer.Fa flags
1092c1962abSConrad Meyerspecified are formed by
1102c1962abSConrad Meyer.Em OR Ns 'ing
1112c1962abSConrad Meyerthe following values:
1122c1962abSConrad Meyer.Bl -tag -offset indent -width Ds
1132c1962abSConrad Meyer.It Dv EPOCH_LOCKED
1142c1962abSConrad MeyerPermit holding mutexes across
1152c1962abSConrad Meyer.Fn epoch_wait_preempt
1162c1962abSConrad Meyer(requires
1172c1962abSConrad Meyer.Dv EPOCH_PREEMPT ) .
1182c1962abSConrad MeyerWhen doing this one must be cautious of creating a situation where a deadlock
1192c1962abSConrad Meyeris possible.
1202c1962abSConrad Meyer.It Dv EPOCH_PREEMPT
1212c1962abSConrad MeyerThe
1222c1962abSConrad Meyer.Vt epoch
1232c1962abSConrad Meyerwill allow preemption during sections.
1242c1962abSConrad MeyerOnly non-sleepable locks may be acquired during a preemptible epoch.
1252c1962abSConrad MeyerThe functions
1262c1962abSConrad Meyer.Fn epoch_enter_preempt ,
1272c1962abSConrad Meyer.Fn epoch_exit_preempt ,
1282c1962abSConrad Meyerand
1292c1962abSConrad Meyer.Fn epoch_wait_preempt
1302c1962abSConrad Meyermust be used in place of
1312c1962abSConrad Meyer.Fn epoch_enter ,
1322c1962abSConrad Meyer.Fn epoch_exit ,
1332c1962abSConrad Meyerand
1342c1962abSConrad Meyer.Fn epoch_wait ,
1352c1962abSConrad Meyerrespectively.
1362c1962abSConrad Meyer.El
1372c1962abSConrad Meyer.Pp
1382c1962abSConrad Meyer.Vt epoch Ns s
1392c1962abSConrad Meyerare freed with
14033346ed6SMatt Macy.Fn epoch_free .
1412c1962abSConrad Meyer.Pp
14233346ed6SMatt MacyThreads indicate the start of an epoch critical section by calling
1432c1962abSConrad Meyer.Fn epoch_enter
1442c1962abSConrad Meyer(or
1452c1962abSConrad Meyer.Fn epoch_enter_preempt
1462c1962abSConrad Meyerfor preemptible epochs).
1472c1962abSConrad MeyerThreads call
1482c1962abSConrad Meyer.Fn epoch_exit
1492c1962abSConrad Meyer(or
1502c1962abSConrad Meyer.Fn epoch_exit_preempt
1512c1962abSConrad Meyerfor preemptible epochs)
1522c1962abSConrad Meyerto indicate the end of a critical section.
1532c1962abSConrad Meyer.Vt struct epoch_tracker Ns s
1542c1962abSConrad Meyerare stack objects whose pointers are passed to
15570398c2fSMatt Macy.Fn epoch_enter_preempt
15633346ed6SMatt Macyand
1572c1962abSConrad Meyer.Fn epoch_exit_preempt
1582c1962abSConrad Meyer(much like
1592c1962abSConrad Meyer.Vt struct rm_priotracker ) .
1602c1962abSConrad Meyer.Pp
1612c1962abSConrad MeyerThreads can defer work until a grace period has expired since any thread has
1622c1962abSConrad Meyerentered the epoch either synchronously or asynchronously.
1632c1962abSConrad Meyer.Fn epoch_call
1642c1962abSConrad Meyerdefers work asynchronously by invoking the provided
1652c1962abSConrad Meyer.Fa callback
1662c1962abSConrad Meyerat a later time.
1672c1962abSConrad Meyer.Fn epoch_wait
1682c1962abSConrad Meyer(or
1692c1962abSConrad Meyer.Fn epoch_wait_preempt )
1702c1962abSConrad Meyerblocks the current thread until the grace period has expired and the work can be
1712c1962abSConrad Meyerdone safely.
1722c1962abSConrad Meyer.Pp
1732c1962abSConrad MeyerDefault, non-preemptible epoch wait
1742c1962abSConrad Meyer.Fn ( epoch_wait )
1752c1962abSConrad Meyeris guaranteed to have much shorter completion times relative to
1762c1962abSConrad Meyerpreemptible epoch wait
1772c1962abSConrad Meyer.Fn ( epoch_wait_preempt ) .
1782c1962abSConrad Meyer(In the default type, none of the threads in an epoch section will be preempted
1792c1962abSConrad Meyerbefore completing its section.)
1802c1962abSConrad Meyer.Pp
18133346ed6SMatt MacyINVARIANTS can assert that a thread is in an epoch by using
18233346ed6SMatt Macy.Fn in_epoch .
1832c1962abSConrad Meyer.Fn in_epoch "epoch"
1842c1962abSConrad Meyeris equivalent to invoking
1852c1962abSConrad Meyer.Fn in_epoch_verbose "epoch" "0" .
1862c1962abSConrad MeyerIf
1872c1962abSConrad Meyer.Cd EPOCH_TRACE
1882c1962abSConrad Meyeris enabled,
1892c1962abSConrad Meyer.Fn in_epoch_verbose "epoch" "1"
1902c1962abSConrad Meyerprovides additional verbose debugging information.
19133346ed6SMatt Macy.Pp
19270398c2fSMatt MacyThe epoch API currently does not support sleeping in epoch_preempt sections.
1937d9389b0SEitan AdlerA caller should never call
19433346ed6SMatt Macy.Fn epoch_wait
19568195696SMatt Macyin the middle of an epoch section for the same epoch as this will lead to a deadlock.
19633346ed6SMatt Macy.Pp
197131b2b76SHans Petter SelaskyThe
198131b2b76SHans Petter Selasky.Fn epoch_drain_callbacks
199131b2b76SHans Petter Selaskyfunction is used to drain all pending callbacks which have been invoked by prior
200131b2b76SHans Petter Selasky.Fn epoch_call
201131b2b76SHans Petter Selaskyfunction calls on the same epoch.
202131b2b76SHans Petter SelaskyThis function is useful when there are shared memory structure(s)
203131b2b76SHans Petter Selaskyreferred to by the epoch callback(s) which are not refcounted and are
204131b2b76SHans Petter Selaskyrarely freed.
205131b2b76SHans Petter SelaskyThe typical place for calling this function is right before freeing or
206131b2b76SHans Petter Selaskyinvalidating the shared resource(s) used by the epoch callback(s).
207131b2b76SHans Petter SelaskyThis function can sleep and is not optimized for performance.
20833346ed6SMatt Macy.Sh RETURN VALUES
2094619bce8SMatt Macy.Fn in_epoch curepoch
2104619bce8SMatt Macywill return 1 if curthread is in curepoch, 0 otherwise.
21133346ed6SMatt Macy.Sh EXAMPLES
21233346ed6SMatt MacyAsync free example:
21333346ed6SMatt MacyThread 1:
21433346ed6SMatt Macy.Bd -literal
2156e36248fSMatt Macyint
2166e36248fSMatt Macyin_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_laddr *laddr,
2176e36248fSMatt Macy    struct ucred *cred)
21833346ed6SMatt Macy{
2196e36248fSMatt Macy    /* ... */
22033346ed6SMatt Macy    epoch_enter(net_epoch);
22133346ed6SMatt Macy    CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
22233346ed6SMatt Macy        sa = ifa->ifa_addr;
22333346ed6SMatt Macy	if (sa->sa_family != AF_INET)
22433346ed6SMatt Macy	    continue;
22533346ed6SMatt Macy	sin = (struct sockaddr_in *)sa;
22633346ed6SMatt Macy	if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
22733346ed6SMatt Macy	     ia = (struct in_ifaddr *)ifa;
22833346ed6SMatt Macy	     break;
22933346ed6SMatt Macy	}
23033346ed6SMatt Macy    }
23133346ed6SMatt Macy    epoch_exit(net_epoch);
2326e36248fSMatt Macy    /* ... */
23333346ed6SMatt Macy}
23433346ed6SMatt Macy.Ed
23533346ed6SMatt MacyThread 2:
23633346ed6SMatt Macy.Bd -literal
23733346ed6SMatt Macyvoid
23833346ed6SMatt Macyifa_free(struct ifaddr *ifa)
23933346ed6SMatt Macy{
24033346ed6SMatt Macy
24133346ed6SMatt Macy    if (refcount_release(&ifa->ifa_refcnt))
2421d110928SGleb Smirnoff        epoch_call(net_epoch, ifa_destroy, &ifa->ifa_epoch_ctx);
24333346ed6SMatt Macy}
24433346ed6SMatt Macy
2456e36248fSMatt Macyvoid
2466e36248fSMatt Macyif_purgeaddrs(struct ifnet *ifp)
24733346ed6SMatt Macy{
24833346ed6SMatt Macy
2497d9389b0SEitan Adler    /* .... *
25033346ed6SMatt Macy    IF_ADDR_WLOCK(ifp);
25133346ed6SMatt Macy    CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
25233346ed6SMatt Macy    IF_ADDR_WUNLOCK(ifp);
25333346ed6SMatt Macy    ifa_free(ifa);
25433346ed6SMatt Macy}
25533346ed6SMatt Macy.Ed
25633346ed6SMatt Macy.Pp
2577d9389b0SEitan AdlerThread 1 traverses the ifaddr list in an epoch.
2587d9389b0SEitan AdlerThread 2 unlinks with the corresponding epoch safe macro, marks as logically free,
2597d9389b0SEitan Adlerand then defers deletion.
2607d9389b0SEitan AdlerMore general mutation or a synchronous
2617d9389b0SEitan Adlerfree would have to follow a call to
26233346ed6SMatt Macy.Fn epoch_wait .
26333346ed6SMatt Macy.Sh SEE ALSO
2648965b303SMitchell Horne.Xr callout 9 ,
26533346ed6SMatt Macy.Xr locking 9 ,
26633346ed6SMatt Macy.Xr mtx_pool 9 ,
26733346ed6SMatt Macy.Xr mutex 9 ,
26833346ed6SMatt Macy.Xr rwlock 9 ,
26933346ed6SMatt Macy.Xr sema 9 ,
27033346ed6SMatt Macy.Xr sleep 9 ,
2718965b303SMitchell Horne.Xr sx 9
2722f3e7fb2SGordon Bergling.Sh HISTORY
2732f3e7fb2SGordon BerglingThe
2742f3e7fb2SGordon Bergling.Nm
2752f3e7fb2SGordon Berglingframework first appeared in
2762f3e7fb2SGordon Bergling.Fx 11.0 .
2772f3e7fb2SGordon Bergling.Sh CAVEATS
2782f3e7fb2SGordon BerglingOne must be cautious when using
2792f3e7fb2SGordon Bergling.Fn epoch_wait_preempt .
2802f3e7fb2SGordon BerglingThreads are pinned during epoch sections, so if a thread in a section is then
2812f3e7fb2SGordon Berglingpreempted by a higher priority compute bound thread on that CPU, it can be
2822f3e7fb2SGordon Berglingprevented from leaving the section indefinitely.
2832f3e7fb2SGordon Bergling.Pp
2842f3e7fb2SGordon BerglingEpochs are not a straight replacement for read locks.
2852f3e7fb2SGordon BerglingCallers must use safe list and tailq traversal routines in an epoch (see ck_queue).
2862f3e7fb2SGordon BerglingWhen modifying a list referenced from an epoch section safe removal
2872f3e7fb2SGordon Berglingroutines must be used and the caller can no longer modify a list entry
2882f3e7fb2SGordon Berglingin place.
2892f3e7fb2SGordon BerglingAn item to be modified must be handled with copy on write
2902f3e7fb2SGordon Berglingand frees must be deferred until after a grace period has elapsed.
291