1.\" 2.\" Copyright (C) 2018 Matthew Macy <mmacy@FreeBSD.org>. 3.\" 4.\" Redistribution and use in source and binary forms, with or without 5.\" modification, are permitted provided that the following conditions 6.\" are met: 7.\" 1. Redistributions of source code must retain the above copyright 8.\" notice(s), this list of conditions and the following disclaimer as 9.\" the first lines of this file unmodified other than the possible 10.\" addition of one or more copyright notices. 11.\" 2. Redistributions in binary form must reproduce the above copyright 12.\" notice(s), this list of conditions and the following disclaimer in the 13.\" documentation and/or other materials provided with the distribution. 14.\" 15.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 16.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18.\" DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 19.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 25.\" DAMAGE. 26.\" 27.\" $FreeBSD$ 28.\" 29.Dd December 27, 2019 30.Dt EPOCH 9 31.Os 32.Sh NAME 33.Nm epoch , 34.Nm epoch_context , 35.Nm epoch_alloc , 36.Nm epoch_free , 37.Nm epoch_enter , 38.Nm epoch_exit , 39.Nm epoch_wait , 40.Nm epoch_enter_preempt , 41.Nm epoch_exit_preempt , 42.Nm epoch_wait_preempt , 43.Nm epoch_call , 44.Nm epoch_drain_callbacks , 45.Nm in_epoch , 46.Nm in_epoch_verbose , 47.Nd kernel epoch based reclamation 48.Sh SYNOPSIS 49.In sys/param.h 50.In sys/proc.h 51.In sys/epoch.h 52.\" Types 53.Bd -literal 54struct epoch; /* Opaque */ 55.Ed 56.Vt typedef "struct epoch *epoch_t" ; 57.Bd -literal 58struct epoch_context { 59 void *data[2]; 60}; 61.Ed 62.Vt typedef "struct epoch_context *epoch_context_t" ; 63.Bd -literal 64struct epoch_tracker; /* Opaque */ 65.Ed 66.Vt typedef "struct epoch_tracker *epoch_tracker_t" ; 67.\" Declarations 68.Ft epoch_t 69.Fn epoch_alloc "const char *name" "int flags" 70.Ft void 71.Fn epoch_free "epoch_t epoch" 72.Ft void 73.Fn epoch_enter "epoch_t epoch" 74.Ft void 75.Fn epoch_exit "epoch_t epoch" 76.Ft void 77.Fn epoch_wait "epoch_t epoch" 78.Ft void 79.Fn epoch_enter_preempt "epoch_t epoch" "epoch_tracker_t et" 80.Ft void 81.Fn epoch_exit_preempt "epoch_t epoch" "epoch_tracker_t et" 82.Ft void 83.Fn epoch_wait_preempt "epoch_t epoch" 84.Ft void 85.Fn epoch_call "epoch_t epoch" "epoch_context_t ctx" "void (*callback)(epoch_context_t)" 86.Ft void 87.Fn epoch_drain_callbacks "epoch_t epoch" 88.Ft int 89.Fn in_epoch "epoch_t epoch" 90.Ft int 91.Fn in_epoch_verbose "epoch_t epoch" "int dump_onfail" 92.Sh DESCRIPTION 93Epochs are used to guarantee liveness and immutability of data by 94deferring reclamation and mutation until a grace period has elapsed. 95Epochs do not have any lock ordering issues. 96Entering and leaving an epoch section will never block. 97.Pp 98Epochs are allocated with 99.Fn epoch_alloc . 100The 101.Fa name 102argument is used for debugging convenience when the 103.Cd EPOCH_TRACE 104kernel option is configured. 105By default, epochs do not allow preemption during sections. 106By default mutexes cannot be held across 107.Fn epoch_wait_preempt . 108The 109.Fa flags 110specified are formed by 111.Em OR Ns 'ing 112the following values: 113.Bl -tag -offset indent -width Ds 114.It Dv EPOCH_LOCKED 115Permit holding mutexes across 116.Fn epoch_wait_preempt 117(requires 118.Dv EPOCH_PREEMPT ) . 119When doing this one must be cautious of creating a situation where a deadlock 120is possible. 121.It Dv EPOCH_PREEMPT 122The 123.Vt epoch 124will allow preemption during sections. 125Only non-sleepable locks may be acquired during a preemptible epoch. 126The functions 127.Fn epoch_enter_preempt , 128.Fn epoch_exit_preempt , 129and 130.Fn epoch_wait_preempt 131must be used in place of 132.Fn epoch_enter , 133.Fn epoch_exit , 134and 135.Fn epoch_wait , 136respectively. 137.El 138.Pp 139.Vt epoch Ns s 140are freed with 141.Fn epoch_free . 142.Pp 143Threads indicate the start of an epoch critical section by calling 144.Fn epoch_enter 145(or 146.Fn epoch_enter_preempt 147for preemptible epochs). 148Threads call 149.Fn epoch_exit 150(or 151.Fn epoch_exit_preempt 152for preemptible epochs) 153to indicate the end of a critical section. 154.Vt struct epoch_tracker Ns s 155are stack objects whose pointers are passed to 156.Fn epoch_enter_preempt 157and 158.Fn epoch_exit_preempt 159(much like 160.Vt struct rm_priotracker ) . 161.Pp 162Threads can defer work until a grace period has expired since any thread has 163entered the epoch either synchronously or asynchronously. 164.Fn epoch_call 165defers work asynchronously by invoking the provided 166.Fa callback 167at a later time. 168.Fn epoch_wait 169(or 170.Fn epoch_wait_preempt ) 171blocks the current thread until the grace period has expired and the work can be 172done safely. 173.Pp 174Default, non-preemptible epoch wait 175.Fn ( epoch_wait ) 176is guaranteed to have much shorter completion times relative to 177preemptible epoch wait 178.Fn ( epoch_wait_preempt ) . 179(In the default type, none of the threads in an epoch section will be preempted 180before completing its section.) 181.Pp 182INVARIANTS can assert that a thread is in an epoch by using 183.Fn in_epoch . 184.Fn in_epoch "epoch" 185is equivalent to invoking 186.Fn in_epoch_verbose "epoch" "0" . 187If 188.Cd EPOCH_TRACE 189is enabled, 190.Fn in_epoch_verbose "epoch" "1" 191provides additional verbose debugging information. 192.Pp 193The epoch API currently does not support sleeping in epoch_preempt sections. 194A caller should never call 195.Fn epoch_wait 196in the middle of an epoch section for the same epoch as this will lead to a deadlock. 197.Pp 198The 199.Fn epoch_drain_callbacks 200function is used to drain all pending callbacks which have been invoked by prior 201.Fn epoch_call 202function calls on the same epoch. 203This function is useful when there are shared memory structure(s) 204referred to by the epoch callback(s) which are not refcounted and are 205rarely freed. 206The typical place for calling this function is right before freeing or 207invalidating the shared resource(s) used by the epoch callback(s). 208This function can sleep and is not optimized for performance. 209.Sh RETURN VALUES 210.Fn in_epoch curepoch 211will return 1 if curthread is in curepoch, 0 otherwise. 212.Sh CAVEATS 213One must be cautious when using 214.Fn epoch_wait_preempt . 215Threads are pinned during epoch sections, so if a thread in a section is then 216preempted by a higher priority compute bound thread on that CPU, it can be 217prevented from leaving the section indefinitely. 218.Pp 219Epochs are not a straight replacement for read locks. 220Callers must use safe list and tailq traversal routines in an epoch (see ck_queue). 221When modifying a list referenced from an epoch section safe removal 222routines must be used and the caller can no longer modify a list entry 223in place. 224An item to be modified must be handled with copy on write 225and frees must be deferred until after a grace period has elapsed. 226.Sh EXAMPLES 227Async free example: 228Thread 1: 229.Bd -literal 230int 231in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_laddr *laddr, 232 struct ucred *cred) 233{ 234 /* ... */ 235 epoch_enter(net_epoch); 236 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 237 sa = ifa->ifa_addr; 238 if (sa->sa_family != AF_INET) 239 continue; 240 sin = (struct sockaddr_in *)sa; 241 if (prison_check_ip4(cred, &sin->sin_addr) == 0) { 242 ia = (struct in_ifaddr *)ifa; 243 break; 244 } 245 } 246 epoch_exit(net_epoch); 247 /* ... */ 248} 249.Ed 250Thread 2: 251.Bd -literal 252void 253ifa_free(struct ifaddr *ifa) 254{ 255 256 if (refcount_release(&ifa->ifa_refcnt)) 257 epoch_call(net_epoch, &ifa->ifa_epoch_ctx, ifa_destroy); 258} 259 260void 261if_purgeaddrs(struct ifnet *ifp) 262{ 263 264 /* .... * 265 IF_ADDR_WLOCK(ifp); 266 CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); 267 IF_ADDR_WUNLOCK(ifp); 268 ifa_free(ifa); 269} 270.Ed 271.Pp 272Thread 1 traverses the ifaddr list in an epoch. 273Thread 2 unlinks with the corresponding epoch safe macro, marks as logically free, 274and then defers deletion. 275More general mutation or a synchronous 276free would have to follow a call to 277.Fn epoch_wait . 278.Sh NOTES 279The 280.Nm 281kernel programming interface is under development and is subject to change. 282.El 283.Sh SEE ALSO 284.Xr locking 9 , 285.Xr mtx_pool 9 , 286.Xr mutex 9 , 287.Xr rwlock 9 , 288.Xr sema 9 , 289.Xr sleep 9 , 290.Xr sx 9 , 291.Xr timeout 9 292