1.\" 2.\" Copyright (C) 2018 Matthew Macy <mmacy@FreeBSD.org>. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice(s), this list of conditions and the following disclaimer as 9.\" the first lines of this file unmodified other than the possible 10.\" addition of one or more copyright notices. 11.\" 2. Redistributions in binary form must reproduce the above copyright 12.\" notice(s), this list of conditions and the following disclaimer in the 13.\" documentation and/or other materials provided with the distribution. 14.\" 15.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 16.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18.\" DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 19.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 25.\" DAMAGE. 26.\" 27.Dd April 30, 2020 28.Dt EPOCH 9 29.Os 30.Sh NAME 31.Nm epoch , 32.Nm epoch_context , 33.Nm epoch_alloc , 34.Nm epoch_free , 35.Nm epoch_enter , 36.Nm epoch_exit , 37.Nm epoch_wait , 38.Nm epoch_enter_preempt , 39.Nm epoch_exit_preempt , 40.Nm epoch_wait_preempt , 41.Nm epoch_call , 42.Nm epoch_drain_callbacks , 43.Nm in_epoch , 44.Nm in_epoch_verbose 45.Nd kernel epoch based reclamation 46.Sh SYNOPSIS 47.In sys/param.h 48.In sys/proc.h 49.In sys/epoch.h 50.\" Types 51.Bd -literal 52struct epoch; /* Opaque */ 53.Ed 54.Vt typedef "struct epoch *epoch_t" ; 55.Bd -literal 56struct epoch_context { 57 void *data[2]; 58}; 59.Ed 60.Vt typedef "struct epoch_context *epoch_context_t" ; 61.Vt typedef "void epoch_callback_t(epoch_context_t)" ; 62.Bd -literal 63struct epoch_tracker; /* Opaque */ 64.Ed 65.Vt typedef "struct epoch_tracker *epoch_tracker_t" ; 66.\" Declarations 67.Ft epoch_t 68.Fn epoch_alloc "const char *name" "int flags" 69.Ft void 70.Fn epoch_free "epoch_t epoch" 71.Ft void 72.Fn epoch_enter "epoch_t epoch" 73.Ft void 74.Fn epoch_exit "epoch_t epoch" 75.Ft void 76.Fn epoch_wait "epoch_t epoch" 77.Ft void 78.Fn epoch_enter_preempt "epoch_t epoch" "epoch_tracker_t et" 79.Ft void 80.Fn epoch_exit_preempt "epoch_t epoch" "epoch_tracker_t et" 81.Ft void 82.Fn epoch_wait_preempt "epoch_t epoch" 83.Ft void 84.Fn epoch_call "epoch_t epoch" "epoch_callback_t callback" "epoch_context_t ctx" 85.Ft void 86.Fn epoch_drain_callbacks "epoch_t epoch" 87.Ft int 88.Fn in_epoch "epoch_t epoch" 89.Ft int 90.Fn in_epoch_verbose "epoch_t epoch" "int dump_onfail" 91.Sh DESCRIPTION 92Epochs are used to guarantee liveness and immutability of data by 93deferring reclamation and mutation until a grace period has elapsed. 94Epochs do not have any lock ordering issues. 95Entering and leaving an epoch section will never block. 96.Pp 97Epochs are allocated with 98.Fn epoch_alloc . 99The 100.Fa name 101argument is used for debugging convenience when the 102.Cd EPOCH_TRACE 103kernel option is configured. 104By default, epochs do not allow preemption during sections. 105By default mutexes cannot be held across 106.Fn epoch_wait_preempt . 107The 108.Fa flags 109specified are formed by 110.Em OR Ns 'ing 111the following values: 112.Bl -tag -offset indent -width Ds 113.It Dv EPOCH_LOCKED 114Permit holding mutexes across 115.Fn epoch_wait_preempt 116(requires 117.Dv EPOCH_PREEMPT ) . 118When doing this one must be cautious of creating a situation where a deadlock 119is possible. 120.It Dv EPOCH_PREEMPT 121The 122.Vt epoch 123will allow preemption during sections. 124Only non-sleepable locks may be acquired during a preemptible epoch. 125The functions 126.Fn epoch_enter_preempt , 127.Fn epoch_exit_preempt , 128and 129.Fn epoch_wait_preempt 130must be used in place of 131.Fn epoch_enter , 132.Fn epoch_exit , 133and 134.Fn epoch_wait , 135respectively. 136.El 137.Pp 138.Vt epoch Ns s 139are freed with 140.Fn epoch_free . 141.Pp 142Threads indicate the start of an epoch critical section by calling 143.Fn epoch_enter 144(or 145.Fn epoch_enter_preempt 146for preemptible epochs). 147Threads call 148.Fn epoch_exit 149(or 150.Fn epoch_exit_preempt 151for preemptible epochs) 152to indicate the end of a critical section. 153.Vt struct epoch_tracker Ns s 154are stack objects whose pointers are passed to 155.Fn epoch_enter_preempt 156and 157.Fn epoch_exit_preempt 158(much like 159.Vt struct rm_priotracker ) . 160.Pp 161Threads can defer work until a grace period has expired since any thread has 162entered the epoch either synchronously or asynchronously. 163.Fn epoch_call 164defers work asynchronously by invoking the provided 165.Fa callback 166at a later time. 167.Fn epoch_wait 168(or 169.Fn epoch_wait_preempt ) 170blocks the current thread until the grace period has expired and the work can be 171done safely. 172.Pp 173Default, non-preemptible epoch wait 174.Fn ( epoch_wait ) 175is guaranteed to have much shorter completion times relative to 176preemptible epoch wait 177.Fn ( epoch_wait_preempt ) . 178(In the default type, none of the threads in an epoch section will be preempted 179before completing its section.) 180.Pp 181INVARIANTS can assert that a thread is in an epoch by using 182.Fn in_epoch . 183.Fn in_epoch "epoch" 184is equivalent to invoking 185.Fn in_epoch_verbose "epoch" "0" . 186If 187.Cd EPOCH_TRACE 188is enabled, 189.Fn in_epoch_verbose "epoch" "1" 190provides additional verbose debugging information. 191.Pp 192The epoch API currently does not support sleeping in epoch_preempt sections. 193A caller should never call 194.Fn epoch_wait 195in the middle of an epoch section for the same epoch as this will lead to a deadlock. 196.Pp 197The 198.Fn epoch_drain_callbacks 199function is used to drain all pending callbacks which have been invoked by prior 200.Fn epoch_call 201function calls on the same epoch. 202This function is useful when there are shared memory structure(s) 203referred to by the epoch callback(s) which are not refcounted and are 204rarely freed. 205The typical place for calling this function is right before freeing or 206invalidating the shared resource(s) used by the epoch callback(s). 207This function can sleep and is not optimized for performance. 208.Sh RETURN VALUES 209.Fn in_epoch curepoch 210will return 1 if curthread is in curepoch, 0 otherwise. 211.Sh EXAMPLES 212Async free example: 213Thread 1: 214.Bd -literal 215int 216in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_laddr *laddr, 217 struct ucred *cred) 218{ 219 /* ... */ 220 epoch_enter(net_epoch); 221 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 222 sa = ifa->ifa_addr; 223 if (sa->sa_family != AF_INET) 224 continue; 225 sin = (struct sockaddr_in *)sa; 226 if (prison_check_ip4(cred, &sin->sin_addr) == 0) { 227 ia = (struct in_ifaddr *)ifa; 228 break; 229 } 230 } 231 epoch_exit(net_epoch); 232 /* ... */ 233} 234.Ed 235Thread 2: 236.Bd -literal 237void 238ifa_free(struct ifaddr *ifa) 239{ 240 241 if (refcount_release(&ifa->ifa_refcnt)) 242 epoch_call(net_epoch, ifa_destroy, &ifa->ifa_epoch_ctx); 243} 244 245void 246if_purgeaddrs(struct ifnet *ifp) 247{ 248 249 /* .... * 250 IF_ADDR_WLOCK(ifp); 251 CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); 252 IF_ADDR_WUNLOCK(ifp); 253 ifa_free(ifa); 254} 255.Ed 256.Pp 257Thread 1 traverses the ifaddr list in an epoch. 258Thread 2 unlinks with the corresponding epoch safe macro, marks as logically free, 259and then defers deletion. 260More general mutation or a synchronous 261free would have to follow a call to 262.Fn epoch_wait . 263.Sh NOTES 264The 265.Nm 266kernel programming interface is under development and is subject to change. 267.Sh SEE ALSO 268.Xr callout 9 , 269.Xr locking 9 , 270.Xr mtx_pool 9 , 271.Xr mutex 9 , 272.Xr rwlock 9 , 273.Xr sema 9 , 274.Xr sleep 9 , 275.Xr sx 9 276.Sh HISTORY 277The 278.Nm 279framework first appeared in 280.Fx 11.0 . 281.Sh CAVEATS 282One must be cautious when using 283.Fn epoch_wait_preempt . 284Threads are pinned during epoch sections, so if a thread in a section is then 285preempted by a higher priority compute bound thread on that CPU, it can be 286prevented from leaving the section indefinitely. 287.Pp 288Epochs are not a straight replacement for read locks. 289Callers must use safe list and tailq traversal routines in an epoch (see ck_queue). 290When modifying a list referenced from an epoch section safe removal 291routines must be used and the caller can no longer modify a list entry 292in place. 293An item to be modified must be handled with copy on write 294and frees must be deferred until after a grace period has elapsed. 295