1.\" 2.\" Copyright (C) 2018 Matthew Macy <mmacy@FreeBSD.org>. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice(s), this list of conditions and the following disclaimer as 9.\" the first lines of this file unmodified other than the possible 10.\" addition of one or more copyright notices. 11.\" 2. Redistributions in binary form must reproduce the above copyright 12.\" notice(s), this list of conditions and the following disclaimer in the 13.\" documentation and/or other materials provided with the distribution. 14.\" 15.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 16.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18.\" DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 19.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 25.\" DAMAGE. 26.\" 27.\" $FreeBSD$ 28.\" 29.Dd April 30, 2020 30.Dt EPOCH 9 31.Os 32.Sh NAME 33.Nm epoch , 34.Nm epoch_context , 35.Nm epoch_alloc , 36.Nm epoch_free , 37.Nm epoch_enter , 38.Nm epoch_exit , 39.Nm epoch_wait , 40.Nm epoch_enter_preempt , 41.Nm epoch_exit_preempt , 42.Nm epoch_wait_preempt , 43.Nm epoch_call , 44.Nm epoch_drain_callbacks , 45.Nm in_epoch , 46.Nm in_epoch_verbose 47.Nd kernel epoch based reclamation 48.Sh SYNOPSIS 49.In sys/param.h 50.In sys/proc.h 51.In sys/epoch.h 52.\" Types 53.Bd -literal 54struct epoch; /* Opaque */ 55.Ed 56.Vt typedef "struct epoch *epoch_t" ; 57.Bd -literal 58struct epoch_context { 59 void *data[2]; 60}; 61.Ed 62.Vt typedef "struct epoch_context *epoch_context_t" ; 63.Vt typedef "void epoch_callback_t(epoch_context_t)" ; 64.Bd -literal 65struct epoch_tracker; /* Opaque */ 66.Ed 67.Vt typedef "struct epoch_tracker *epoch_tracker_t" ; 68.\" Declarations 69.Ft epoch_t 70.Fn epoch_alloc "const char *name" "int flags" 71.Ft void 72.Fn epoch_free "epoch_t epoch" 73.Ft void 74.Fn epoch_enter "epoch_t epoch" 75.Ft void 76.Fn epoch_exit "epoch_t epoch" 77.Ft void 78.Fn epoch_wait "epoch_t epoch" 79.Ft void 80.Fn epoch_enter_preempt "epoch_t epoch" "epoch_tracker_t et" 81.Ft void 82.Fn epoch_exit_preempt "epoch_t epoch" "epoch_tracker_t et" 83.Ft void 84.Fn epoch_wait_preempt "epoch_t epoch" 85.Ft void 86.Fn epoch_call "epoch_t epoch" "epoch_callback_t callback" "epoch_context_t ctx" 87.Ft void 88.Fn epoch_drain_callbacks "epoch_t epoch" 89.Ft int 90.Fn in_epoch "epoch_t epoch" 91.Ft int 92.Fn in_epoch_verbose "epoch_t epoch" "int dump_onfail" 93.Sh DESCRIPTION 94Epochs are used to guarantee liveness and immutability of data by 95deferring reclamation and mutation until a grace period has elapsed. 96Epochs do not have any lock ordering issues. 97Entering and leaving an epoch section will never block. 98.Pp 99Epochs are allocated with 100.Fn epoch_alloc . 101The 102.Fa name 103argument is used for debugging convenience when the 104.Cd EPOCH_TRACE 105kernel option is configured. 106By default, epochs do not allow preemption during sections. 107By default mutexes cannot be held across 108.Fn epoch_wait_preempt . 109The 110.Fa flags 111specified are formed by 112.Em OR Ns 'ing 113the following values: 114.Bl -tag -offset indent -width Ds 115.It Dv EPOCH_LOCKED 116Permit holding mutexes across 117.Fn epoch_wait_preempt 118(requires 119.Dv EPOCH_PREEMPT ) . 120When doing this one must be cautious of creating a situation where a deadlock 121is possible. 122.It Dv EPOCH_PREEMPT 123The 124.Vt epoch 125will allow preemption during sections. 126Only non-sleepable locks may be acquired during a preemptible epoch. 127The functions 128.Fn epoch_enter_preempt , 129.Fn epoch_exit_preempt , 130and 131.Fn epoch_wait_preempt 132must be used in place of 133.Fn epoch_enter , 134.Fn epoch_exit , 135and 136.Fn epoch_wait , 137respectively. 138.El 139.Pp 140.Vt epoch Ns s 141are freed with 142.Fn epoch_free . 143.Pp 144Threads indicate the start of an epoch critical section by calling 145.Fn epoch_enter 146(or 147.Fn epoch_enter_preempt 148for preemptible epochs). 149Threads call 150.Fn epoch_exit 151(or 152.Fn epoch_exit_preempt 153for preemptible epochs) 154to indicate the end of a critical section. 155.Vt struct epoch_tracker Ns s 156are stack objects whose pointers are passed to 157.Fn epoch_enter_preempt 158and 159.Fn epoch_exit_preempt 160(much like 161.Vt struct rm_priotracker ) . 162.Pp 163Threads can defer work until a grace period has expired since any thread has 164entered the epoch either synchronously or asynchronously. 165.Fn epoch_call 166defers work asynchronously by invoking the provided 167.Fa callback 168at a later time. 169.Fn epoch_wait 170(or 171.Fn epoch_wait_preempt ) 172blocks the current thread until the grace period has expired and the work can be 173done safely. 174.Pp 175Default, non-preemptible epoch wait 176.Fn ( epoch_wait ) 177is guaranteed to have much shorter completion times relative to 178preemptible epoch wait 179.Fn ( epoch_wait_preempt ) . 180(In the default type, none of the threads in an epoch section will be preempted 181before completing its section.) 182.Pp 183INVARIANTS can assert that a thread is in an epoch by using 184.Fn in_epoch . 185.Fn in_epoch "epoch" 186is equivalent to invoking 187.Fn in_epoch_verbose "epoch" "0" . 188If 189.Cd EPOCH_TRACE 190is enabled, 191.Fn in_epoch_verbose "epoch" "1" 192provides additional verbose debugging information. 193.Pp 194The epoch API currently does not support sleeping in epoch_preempt sections. 195A caller should never call 196.Fn epoch_wait 197in the middle of an epoch section for the same epoch as this will lead to a deadlock. 198.Pp 199The 200.Fn epoch_drain_callbacks 201function is used to drain all pending callbacks which have been invoked by prior 202.Fn epoch_call 203function calls on the same epoch. 204This function is useful when there are shared memory structure(s) 205referred to by the epoch callback(s) which are not refcounted and are 206rarely freed. 207The typical place for calling this function is right before freeing or 208invalidating the shared resource(s) used by the epoch callback(s). 209This function can sleep and is not optimized for performance. 210.Sh RETURN VALUES 211.Fn in_epoch curepoch 212will return 1 if curthread is in curepoch, 0 otherwise. 213.Sh EXAMPLES 214Async free example: 215Thread 1: 216.Bd -literal 217int 218in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_laddr *laddr, 219 struct ucred *cred) 220{ 221 /* ... */ 222 epoch_enter(net_epoch); 223 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 224 sa = ifa->ifa_addr; 225 if (sa->sa_family != AF_INET) 226 continue; 227 sin = (struct sockaddr_in *)sa; 228 if (prison_check_ip4(cred, &sin->sin_addr) == 0) { 229 ia = (struct in_ifaddr *)ifa; 230 break; 231 } 232 } 233 epoch_exit(net_epoch); 234 /* ... */ 235} 236.Ed 237Thread 2: 238.Bd -literal 239void 240ifa_free(struct ifaddr *ifa) 241{ 242 243 if (refcount_release(&ifa->ifa_refcnt)) 244 epoch_call(net_epoch, ifa_destroy, &ifa->ifa_epoch_ctx); 245} 246 247void 248if_purgeaddrs(struct ifnet *ifp) 249{ 250 251 /* .... * 252 IF_ADDR_WLOCK(ifp); 253 CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); 254 IF_ADDR_WUNLOCK(ifp); 255 ifa_free(ifa); 256} 257.Ed 258.Pp 259Thread 1 traverses the ifaddr list in an epoch. 260Thread 2 unlinks with the corresponding epoch safe macro, marks as logically free, 261and then defers deletion. 262More general mutation or a synchronous 263free would have to follow a call to 264.Fn epoch_wait . 265.Sh NOTES 266The 267.Nm 268kernel programming interface is under development and is subject to change. 269.Sh SEE ALSO 270.Xr locking 9 , 271.Xr mtx_pool 9 , 272.Xr mutex 9 , 273.Xr rwlock 9 , 274.Xr sema 9 , 275.Xr sleep 9 , 276.Xr sx 9 , 277.Xr timeout 9 278.Sh HISTORY 279The 280.Nm 281framework first appeared in 282.Fx 11.0 . 283.Sh CAVEATS 284One must be cautious when using 285.Fn epoch_wait_preempt . 286Threads are pinned during epoch sections, so if a thread in a section is then 287preempted by a higher priority compute bound thread on that CPU, it can be 288prevented from leaving the section indefinitely. 289.Pp 290Epochs are not a straight replacement for read locks. 291Callers must use safe list and tailq traversal routines in an epoch (see ck_queue). 292When modifying a list referenced from an epoch section safe removal 293routines must be used and the caller can no longer modify a list entry 294in place. 295An item to be modified must be handled with copy on write 296and frees must be deferred until after a grace period has elapsed. 297