133346ed6SMatt Macy.\" 233346ed6SMatt Macy.\" Copyright (C) 2018 Matthew Macy <mmacy@FreeBSD.org>. 333346ed6SMatt Macy.\" 433346ed6SMatt Macy.\" Redistribution and use in source and binary forms, with or without 533346ed6SMatt Macy.\" modification, are permitted provided that the following conditions 633346ed6SMatt Macy.\" are met: 733346ed6SMatt Macy.\" 1. Redistributions of source code must retain the above copyright 833346ed6SMatt Macy.\" notice(s), this list of conditions and the following disclaimer as 933346ed6SMatt Macy.\" the first lines of this file unmodified other than the possible 1033346ed6SMatt Macy.\" addition of one or more copyright notices. 1133346ed6SMatt Macy.\" 2. Redistributions in binary form must reproduce the above copyright 1233346ed6SMatt Macy.\" notice(s), this list of conditions and the following disclaimer in the 1333346ed6SMatt Macy.\" documentation and/or other materials provided with the distribution. 1433346ed6SMatt Macy.\" 1533346ed6SMatt Macy.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 1633346ed6SMatt Macy.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 1733346ed6SMatt Macy.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 1833346ed6SMatt Macy.\" DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 1933346ed6SMatt Macy.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 2033346ed6SMatt Macy.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 2133346ed6SMatt Macy.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 2233346ed6SMatt Macy.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2333346ed6SMatt Macy.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2433346ed6SMatt Macy.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 2533346ed6SMatt Macy.\" DAMAGE. 2633346ed6SMatt Macy.\" 2733346ed6SMatt Macy.\" $FreeBSD$ 2833346ed6SMatt Macy.\" 298c63b2dbSBenedict Reuschling.Dd April 30, 2020 3033346ed6SMatt Macy.Dt EPOCH 9 3133346ed6SMatt Macy.Os 3233346ed6SMatt Macy.Sh NAME 3333346ed6SMatt Macy.Nm epoch , 3433346ed6SMatt Macy.Nm epoch_context , 3533346ed6SMatt Macy.Nm epoch_alloc , 3633346ed6SMatt Macy.Nm epoch_free , 3733346ed6SMatt Macy.Nm epoch_enter , 3833346ed6SMatt Macy.Nm epoch_exit , 3933346ed6SMatt Macy.Nm epoch_wait , 402c1962abSConrad Meyer.Nm epoch_enter_preempt , 412c1962abSConrad Meyer.Nm epoch_exit_preempt , 422c1962abSConrad Meyer.Nm epoch_wait_preempt , 4333346ed6SMatt Macy.Nm epoch_call , 44131b2b76SHans Petter Selasky.Nm epoch_drain_callbacks , 4533346ed6SMatt Macy.Nm in_epoch , 46*32068667SChristian Brueffer.Nm in_epoch_verbose 477739f6e5SEd Schouten.Nd kernel epoch based reclamation 4833346ed6SMatt Macy.Sh SYNOPSIS 4933346ed6SMatt Macy.In sys/param.h 5033346ed6SMatt Macy.In sys/proc.h 5133346ed6SMatt Macy.In sys/epoch.h 522c1962abSConrad Meyer.\" Types 532c1962abSConrad Meyer.Bd -literal 542c1962abSConrad Meyerstruct epoch; /* Opaque */ 552c1962abSConrad Meyer.Ed 562c1962abSConrad Meyer.Vt typedef "struct epoch *epoch_t" ; 572c1962abSConrad Meyer.Bd -literal 582c1962abSConrad Meyerstruct epoch_context { 592c1962abSConrad Meyer void *data[2]; 602c1962abSConrad Meyer}; 612c1962abSConrad Meyer.Ed 622c1962abSConrad Meyer.Vt typedef "struct epoch_context *epoch_context_t" ; 631d110928SGleb Smirnoff.Vt typedef "void epoch_callback_t(epoch_context_t)" ; 642c1962abSConrad Meyer.Bd -literal 652c1962abSConrad Meyerstruct epoch_tracker; /* Opaque */ 662c1962abSConrad Meyer.Ed 672c1962abSConrad Meyer.Vt typedef "struct epoch_tracker *epoch_tracker_t" ; 682c1962abSConrad Meyer.\" Declarations 6933346ed6SMatt Macy.Ft epoch_t 702c1962abSConrad Meyer.Fn epoch_alloc "const char *name" "int flags" 712c1962abSConrad Meyer.Ft void 722c1962abSConrad Meyer.Fn epoch_free "epoch_t epoch" 7333346ed6SMatt Macy.Ft void 7433346ed6SMatt Macy.Fn epoch_enter "epoch_t epoch" 7533346ed6SMatt Macy.Ft void 7633346ed6SMatt Macy.Fn epoch_exit "epoch_t epoch" 7733346ed6SMatt Macy.Ft void 7833346ed6SMatt Macy.Fn epoch_wait "epoch_t epoch" 7933346ed6SMatt Macy.Ft void 802c1962abSConrad Meyer.Fn epoch_enter_preempt "epoch_t epoch" "epoch_tracker_t et" 812c1962abSConrad Meyer.Ft void 822c1962abSConrad Meyer.Fn epoch_exit_preempt "epoch_t epoch" "epoch_tracker_t et" 832c1962abSConrad Meyer.Ft void 8470398c2fSMatt Macy.Fn epoch_wait_preempt "epoch_t epoch" 856e36248fSMatt Macy.Ft void 861d110928SGleb Smirnoff.Fn epoch_call "epoch_t epoch" "epoch_callback_t callback" "epoch_context_t ctx" 87131b2b76SHans Petter Selasky.Ft void 88131b2b76SHans Petter Selasky.Fn epoch_drain_callbacks "epoch_t epoch" 8933346ed6SMatt Macy.Ft int 904619bce8SMatt Macy.Fn in_epoch "epoch_t epoch" 912c1962abSConrad Meyer.Ft int 922c1962abSConrad Meyer.Fn in_epoch_verbose "epoch_t epoch" "int dump_onfail" 9333346ed6SMatt Macy.Sh DESCRIPTION 9433346ed6SMatt MacyEpochs are used to guarantee liveness and immutability of data by 9533346ed6SMatt Macydeferring reclamation and mutation until a grace period has elapsed. 967d9389b0SEitan AdlerEpochs do not have any lock ordering issues. 977d9389b0SEitan AdlerEntering and leaving an epoch section will never block. 9833346ed6SMatt Macy.Pp 9933346ed6SMatt MacyEpochs are allocated with 1002c1962abSConrad Meyer.Fn epoch_alloc . 1012c1962abSConrad MeyerThe 1022c1962abSConrad Meyer.Fa name 1032c1962abSConrad Meyerargument is used for debugging convenience when the 1042c1962abSConrad Meyer.Cd EPOCH_TRACE 1052c1962abSConrad Meyerkernel option is configured. 1062c1962abSConrad MeyerBy default, epochs do not allow preemption during sections. 1072c1962abSConrad MeyerBy default mutexes cannot be held across 1082c1962abSConrad Meyer.Fn epoch_wait_preempt . 1092c1962abSConrad MeyerThe 1102c1962abSConrad Meyer.Fa flags 1112c1962abSConrad Meyerspecified are formed by 1122c1962abSConrad Meyer.Em OR Ns 'ing 1132c1962abSConrad Meyerthe following values: 1142c1962abSConrad Meyer.Bl -tag -offset indent -width Ds 1152c1962abSConrad Meyer.It Dv EPOCH_LOCKED 1162c1962abSConrad MeyerPermit holding mutexes across 1172c1962abSConrad Meyer.Fn epoch_wait_preempt 1182c1962abSConrad Meyer(requires 1192c1962abSConrad Meyer.Dv EPOCH_PREEMPT ) . 1202c1962abSConrad MeyerWhen doing this one must be cautious of creating a situation where a deadlock 1212c1962abSConrad Meyeris possible. 1222c1962abSConrad Meyer.It Dv EPOCH_PREEMPT 1232c1962abSConrad MeyerThe 1242c1962abSConrad Meyer.Vt epoch 1252c1962abSConrad Meyerwill allow preemption during sections. 1262c1962abSConrad MeyerOnly non-sleepable locks may be acquired during a preemptible epoch. 1272c1962abSConrad MeyerThe functions 1282c1962abSConrad Meyer.Fn epoch_enter_preempt , 1292c1962abSConrad Meyer.Fn epoch_exit_preempt , 1302c1962abSConrad Meyerand 1312c1962abSConrad Meyer.Fn epoch_wait_preempt 1322c1962abSConrad Meyermust be used in place of 1332c1962abSConrad Meyer.Fn epoch_enter , 1342c1962abSConrad Meyer.Fn epoch_exit , 1352c1962abSConrad Meyerand 1362c1962abSConrad Meyer.Fn epoch_wait , 1372c1962abSConrad Meyerrespectively. 1382c1962abSConrad Meyer.El 1392c1962abSConrad Meyer.Pp 1402c1962abSConrad Meyer.Vt epoch Ns s 1412c1962abSConrad Meyerare freed with 14233346ed6SMatt Macy.Fn epoch_free . 1432c1962abSConrad Meyer.Pp 14433346ed6SMatt MacyThreads indicate the start of an epoch critical section by calling 1452c1962abSConrad Meyer.Fn epoch_enter 1462c1962abSConrad Meyer(or 1472c1962abSConrad Meyer.Fn epoch_enter_preempt 1482c1962abSConrad Meyerfor preemptible epochs). 1492c1962abSConrad MeyerThreads call 1502c1962abSConrad Meyer.Fn epoch_exit 1512c1962abSConrad Meyer(or 1522c1962abSConrad Meyer.Fn epoch_exit_preempt 1532c1962abSConrad Meyerfor preemptible epochs) 1542c1962abSConrad Meyerto indicate the end of a critical section. 1552c1962abSConrad Meyer.Vt struct epoch_tracker Ns s 1562c1962abSConrad Meyerare stack objects whose pointers are passed to 15770398c2fSMatt Macy.Fn epoch_enter_preempt 15833346ed6SMatt Macyand 1592c1962abSConrad Meyer.Fn epoch_exit_preempt 1602c1962abSConrad Meyer(much like 1612c1962abSConrad Meyer.Vt struct rm_priotracker ) . 1622c1962abSConrad Meyer.Pp 1632c1962abSConrad MeyerThreads can defer work until a grace period has expired since any thread has 1642c1962abSConrad Meyerentered the epoch either synchronously or asynchronously. 1652c1962abSConrad Meyer.Fn epoch_call 1662c1962abSConrad Meyerdefers work asynchronously by invoking the provided 1672c1962abSConrad Meyer.Fa callback 1682c1962abSConrad Meyerat a later time. 1692c1962abSConrad Meyer.Fn epoch_wait 1702c1962abSConrad Meyer(or 1712c1962abSConrad Meyer.Fn epoch_wait_preempt ) 1722c1962abSConrad Meyerblocks the current thread until the grace period has expired and the work can be 1732c1962abSConrad Meyerdone safely. 1742c1962abSConrad Meyer.Pp 1752c1962abSConrad MeyerDefault, non-preemptible epoch wait 1762c1962abSConrad Meyer.Fn ( epoch_wait ) 1772c1962abSConrad Meyeris guaranteed to have much shorter completion times relative to 1782c1962abSConrad Meyerpreemptible epoch wait 1792c1962abSConrad Meyer.Fn ( epoch_wait_preempt ) . 1802c1962abSConrad Meyer(In the default type, none of the threads in an epoch section will be preempted 1812c1962abSConrad Meyerbefore completing its section.) 1822c1962abSConrad Meyer.Pp 18333346ed6SMatt MacyINVARIANTS can assert that a thread is in an epoch by using 18433346ed6SMatt Macy.Fn in_epoch . 1852c1962abSConrad Meyer.Fn in_epoch "epoch" 1862c1962abSConrad Meyeris equivalent to invoking 1872c1962abSConrad Meyer.Fn in_epoch_verbose "epoch" "0" . 1882c1962abSConrad MeyerIf 1892c1962abSConrad Meyer.Cd EPOCH_TRACE 1902c1962abSConrad Meyeris enabled, 1912c1962abSConrad Meyer.Fn in_epoch_verbose "epoch" "1" 1922c1962abSConrad Meyerprovides additional verbose debugging information. 19333346ed6SMatt Macy.Pp 19470398c2fSMatt MacyThe epoch API currently does not support sleeping in epoch_preempt sections. 1957d9389b0SEitan AdlerA caller should never call 19633346ed6SMatt Macy.Fn epoch_wait 19768195696SMatt Macyin the middle of an epoch section for the same epoch as this will lead to a deadlock. 19833346ed6SMatt Macy.Pp 199131b2b76SHans Petter SelaskyThe 200131b2b76SHans Petter Selasky.Fn epoch_drain_callbacks 201131b2b76SHans Petter Selaskyfunction is used to drain all pending callbacks which have been invoked by prior 202131b2b76SHans Petter Selasky.Fn epoch_call 203131b2b76SHans Petter Selaskyfunction calls on the same epoch. 204131b2b76SHans Petter SelaskyThis function is useful when there are shared memory structure(s) 205131b2b76SHans Petter Selaskyreferred to by the epoch callback(s) which are not refcounted and are 206131b2b76SHans Petter Selaskyrarely freed. 207131b2b76SHans Petter SelaskyThe typical place for calling this function is right before freeing or 208131b2b76SHans Petter Selaskyinvalidating the shared resource(s) used by the epoch callback(s). 209131b2b76SHans Petter SelaskyThis function can sleep and is not optimized for performance. 21033346ed6SMatt Macy.Sh RETURN VALUES 2114619bce8SMatt Macy.Fn in_epoch curepoch 2124619bce8SMatt Macywill return 1 if curthread is in curepoch, 0 otherwise. 21333346ed6SMatt Macy.Sh EXAMPLES 21433346ed6SMatt MacyAsync free example: 21533346ed6SMatt MacyThread 1: 21633346ed6SMatt Macy.Bd -literal 2176e36248fSMatt Macyint 2186e36248fSMatt Macyin_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_laddr *laddr, 2196e36248fSMatt Macy struct ucred *cred) 22033346ed6SMatt Macy{ 2216e36248fSMatt Macy /* ... */ 22233346ed6SMatt Macy epoch_enter(net_epoch); 22333346ed6SMatt Macy CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 22433346ed6SMatt Macy sa = ifa->ifa_addr; 22533346ed6SMatt Macy if (sa->sa_family != AF_INET) 22633346ed6SMatt Macy continue; 22733346ed6SMatt Macy sin = (struct sockaddr_in *)sa; 22833346ed6SMatt Macy if (prison_check_ip4(cred, &sin->sin_addr) == 0) { 22933346ed6SMatt Macy ia = (struct in_ifaddr *)ifa; 23033346ed6SMatt Macy break; 23133346ed6SMatt Macy } 23233346ed6SMatt Macy } 23333346ed6SMatt Macy epoch_exit(net_epoch); 2346e36248fSMatt Macy /* ... */ 23533346ed6SMatt Macy} 23633346ed6SMatt Macy.Ed 23733346ed6SMatt MacyThread 2: 23833346ed6SMatt Macy.Bd -literal 23933346ed6SMatt Macyvoid 24033346ed6SMatt Macyifa_free(struct ifaddr *ifa) 24133346ed6SMatt Macy{ 24233346ed6SMatt Macy 24333346ed6SMatt Macy if (refcount_release(&ifa->ifa_refcnt)) 2441d110928SGleb Smirnoff epoch_call(net_epoch, ifa_destroy, &ifa->ifa_epoch_ctx); 24533346ed6SMatt Macy} 24633346ed6SMatt Macy 2476e36248fSMatt Macyvoid 2486e36248fSMatt Macyif_purgeaddrs(struct ifnet *ifp) 24933346ed6SMatt Macy{ 25033346ed6SMatt Macy 2517d9389b0SEitan Adler /* .... * 25233346ed6SMatt Macy IF_ADDR_WLOCK(ifp); 25333346ed6SMatt Macy CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); 25433346ed6SMatt Macy IF_ADDR_WUNLOCK(ifp); 25533346ed6SMatt Macy ifa_free(ifa); 25633346ed6SMatt Macy} 25733346ed6SMatt Macy.Ed 25833346ed6SMatt Macy.Pp 2597d9389b0SEitan AdlerThread 1 traverses the ifaddr list in an epoch. 2607d9389b0SEitan AdlerThread 2 unlinks with the corresponding epoch safe macro, marks as logically free, 2617d9389b0SEitan Adlerand then defers deletion. 2627d9389b0SEitan AdlerMore general mutation or a synchronous 2637d9389b0SEitan Adlerfree would have to follow a call to 26433346ed6SMatt Macy.Fn epoch_wait . 26524929e2cSGleb Smirnoff.Sh NOTES 26624929e2cSGleb SmirnoffThe 26724929e2cSGleb Smirnoff.Nm 26824929e2cSGleb Smirnoffkernel programming interface is under development and is subject to change. 26933346ed6SMatt Macy.Sh SEE ALSO 27033346ed6SMatt Macy.Xr locking 9 , 27133346ed6SMatt Macy.Xr mtx_pool 9 , 27233346ed6SMatt Macy.Xr mutex 9 , 27333346ed6SMatt Macy.Xr rwlock 9 , 27433346ed6SMatt Macy.Xr sema 9 , 27533346ed6SMatt Macy.Xr sleep 9 , 27633346ed6SMatt Macy.Xr sx 9 , 27733346ed6SMatt Macy.Xr timeout 9 2782f3e7fb2SGordon Bergling.Sh HISTORY 2792f3e7fb2SGordon BerglingThe 2802f3e7fb2SGordon Bergling.Nm 2812f3e7fb2SGordon Berglingframework first appeared in 2822f3e7fb2SGordon Bergling.Fx 11.0 . 2832f3e7fb2SGordon Bergling.Sh CAVEATS 2842f3e7fb2SGordon BerglingOne must be cautious when using 2852f3e7fb2SGordon Bergling.Fn epoch_wait_preempt . 2862f3e7fb2SGordon BerglingThreads are pinned during epoch sections, so if a thread in a section is then 2872f3e7fb2SGordon Berglingpreempted by a higher priority compute bound thread on that CPU, it can be 2882f3e7fb2SGordon Berglingprevented from leaving the section indefinitely. 2892f3e7fb2SGordon Bergling.Pp 2902f3e7fb2SGordon BerglingEpochs are not a straight replacement for read locks. 2912f3e7fb2SGordon BerglingCallers must use safe list and tailq traversal routines in an epoch (see ck_queue). 2922f3e7fb2SGordon BerglingWhen modifying a list referenced from an epoch section safe removal 2932f3e7fb2SGordon Berglingroutines must be used and the caller can no longer modify a list entry 2942f3e7fb2SGordon Berglingin place. 2952f3e7fb2SGordon BerglingAn item to be modified must be handled with copy on write 2962f3e7fb2SGordon Berglingand frees must be deferred until after a grace period has elapsed. 297