epoch.9 (a63915c2d7ff177ce364488f86eff99949402051) epoch.9 (2c1962aba6e73dd0c430636082145f92d636ff13)
1.\"
2.\" Copyright (C) 2018 Matthew Macy <mmacy@FreeBSD.org>.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\" notice(s), this list of conditions and the following disclaimer as

--- 12 unchanged lines hidden (view full) ---

21.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
25.\" DAMAGE.
26.\"
27.\" $FreeBSD$
28.\"
1.\"
2.\" Copyright (C) 2018 Matthew Macy <mmacy@FreeBSD.org>.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\" notice(s), this list of conditions and the following disclaimer as

--- 12 unchanged lines hidden (view full) ---

21.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
25.\" DAMAGE.
26.\"
27.\" $FreeBSD$
28.\"
29.Dd June 28, 2019
29.Dd December 27, 2019
30.Dt EPOCH 9
31.Os
32.Sh NAME
33.Nm epoch ,
34.Nm epoch_context ,
35.Nm epoch_alloc ,
36.Nm epoch_free ,
37.Nm epoch_enter ,
38.Nm epoch_exit ,
39.Nm epoch_wait ,
30.Dt EPOCH 9
31.Os
32.Sh NAME
33.Nm epoch ,
34.Nm epoch_context ,
35.Nm epoch_alloc ,
36.Nm epoch_free ,
37.Nm epoch_enter ,
38.Nm epoch_exit ,
39.Nm epoch_wait ,
40.Nm epoch_enter_preempt ,
41.Nm epoch_exit_preempt ,
42.Nm epoch_wait_preempt ,
40.Nm epoch_call ,
41.Nm epoch_drain_callbacks ,
42.Nm in_epoch ,
43.Nm epoch_call ,
44.Nm epoch_drain_callbacks ,
45.Nm in_epoch ,
46.Nm in_epoch_verbose ,
43.Nd kernel epoch based reclamation
44.Sh SYNOPSIS
45.In sys/param.h
46.In sys/proc.h
47.In sys/epoch.h
47.Nd kernel epoch based reclamation
48.Sh SYNOPSIS
49.In sys/param.h
50.In sys/proc.h
51.In sys/epoch.h
52.\" Types
53.Bd -literal
54struct epoch; /* Opaque */
55.Ed
56.Vt typedef "struct epoch *epoch_t" ;
57.Bd -literal
58struct epoch_context {
59 void *data[2];
60};
61.Ed
62.Vt typedef "struct epoch_context *epoch_context_t" ;
63.Bd -literal
64struct epoch_tracker; /* Opaque */
65.Ed
66.Vt typedef "struct epoch_tracker *epoch_tracker_t" ;
67.\" Declarations
48.Ft epoch_t
68.Ft epoch_t
49.Fn epoch_alloc "int flags"
69.Fn epoch_alloc "const char *name" "int flags"
50.Ft void
70.Ft void
71.Fn epoch_free "epoch_t epoch"
72.Ft void
51.Fn epoch_enter "epoch_t epoch"
52.Ft void
73.Fn epoch_enter "epoch_t epoch"
74.Ft void
53.Fn epoch_enter_preempt "epoch_t epoch" "epoch_tracker_t et"
54.Ft void
55.Fn epoch_exit "epoch_t epoch"
56.Ft void
75.Fn epoch_exit "epoch_t epoch"
76.Ft void
57.Fn epoch_exit_preempt "epoch_t epoch" "epoch_tracker_t et"
58.Ft void
59.Fn epoch_wait "epoch_t epoch"
60.Ft void
77.Fn epoch_wait "epoch_t epoch"
78.Ft void
79.Fn epoch_enter_preempt "epoch_t epoch" "epoch_tracker_t et"
80.Ft void
81.Fn epoch_exit_preempt "epoch_t epoch" "epoch_tracker_t et"
82.Ft void
61.Fn epoch_wait_preempt "epoch_t epoch"
62.Ft void
83.Fn epoch_wait_preempt "epoch_t epoch"
84.Ft void
63.Fn epoch_call "epoch_t epoch" "epoch_context_t ctx" "void (*callback) (epoch_context_t)"
85.Fn epoch_call "epoch_t epoch" "epoch_context_t ctx" "void (*callback)(epoch_context_t)"
64.Ft void
65.Fn epoch_drain_callbacks "epoch_t epoch"
66.Ft int
67.Fn in_epoch "epoch_t epoch"
86.Ft void
87.Fn epoch_drain_callbacks "epoch_t epoch"
88.Ft int
89.Fn in_epoch "epoch_t epoch"
90.Ft int
91.Fn in_epoch_verbose "epoch_t epoch" "int dump_onfail"
68.Sh DESCRIPTION
69Epochs are used to guarantee liveness and immutability of data by
70deferring reclamation and mutation until a grace period has elapsed.
71Epochs do not have any lock ordering issues.
72Entering and leaving an epoch section will never block.
73.Pp
74Epochs are allocated with
92.Sh DESCRIPTION
93Epochs are used to guarantee liveness and immutability of data by
94deferring reclamation and mutation until a grace period has elapsed.
95Epochs do not have any lock ordering issues.
96Entering and leaving an epoch section will never block.
97.Pp
98Epochs are allocated with
75.Fn epoch_alloc
76and freed with
99.Fn epoch_alloc .
100The
101.Fa name
102argument is used for debugging convenience when the
103.Cd EPOCH_TRACE
104kernel option is configured.
105By default, epochs do not allow preemption during sections.
106By default mutexes cannot be held across
107.Fn epoch_wait_preempt .
108The
109.Fa flags
110specified are formed by
111.Em OR Ns 'ing
112the following values:
113.Bl -tag -offset indent -width Ds
114.It Dv EPOCH_LOCKED
115Permit holding mutexes across
116.Fn epoch_wait_preempt
117(requires
118.Dv EPOCH_PREEMPT ) .
119When doing this one must be cautious of creating a situation where a deadlock
120is possible.
121.It Dv EPOCH_PREEMPT
122The
123.Vt epoch
124will allow preemption during sections.
125Only non-sleepable locks may be acquired during a preemptible epoch.
126The functions
127.Fn epoch_enter_preempt ,
128.Fn epoch_exit_preempt ,
129and
130.Fn epoch_wait_preempt
131must be used in place of
132.Fn epoch_enter ,
133.Fn epoch_exit ,
134and
135.Fn epoch_wait ,
136respectively.
137.El
138.Pp
139.Vt epoch Ns s
140are freed with
77.Fn epoch_free .
141.Fn epoch_free .
78The flags passed to epoch_alloc determine whether preemption is
79allowed during a section or not (the default), as specified by
80EPOCH_PREEMPT.
142.Pp
81Threads indicate the start of an epoch critical section by calling
143Threads indicate the start of an epoch critical section by calling
82.Fn epoch_enter .
83The end of a critical section is indicated by calling
84.Fn epoch_exit .
85The _preempt variants can be used around code which requires preemption.
86A thread can wait until a grace period has elapsed
87since any threads have entered
88the epoch by calling
89.Fn epoch_wait
90or
91.Fn epoch_wait_preempt ,
92depending on the epoch_type.
93The use of a default epoch type allows one to use
94.Fn epoch_wait
95which is guaranteed to have much shorter completion times since
96we know that none of the threads in an epoch section will be preempted
97before completing its section.
98If the thread can't sleep or is otherwise in a performance sensitive
99path it can ensure that a grace period has elapsed by calling
100.Fn epoch_call
101with a callback with any work that needs to wait for an epoch to elapse.
102Only non-sleepable locks can be acquired during a section protected by
144.Fn epoch_enter
145(or
103.Fn epoch_enter_preempt
146.Fn epoch_enter_preempt
147for preemptible epochs).
148Threads call
149.Fn epoch_exit
150(or
151.Fn epoch_exit_preempt
152for preemptible epochs)
153to indicate the end of a critical section.
154.Vt struct epoch_tracker Ns s
155are stack objects whose pointers are passed to
156.Fn epoch_enter_preempt
104and
157and
105.Fn epoch_exit_preempt .
158.Fn epoch_exit_preempt
159(much like
160.Vt struct rm_priotracker ) .
161.Pp
162Threads can defer work until a grace period has expired since any thread has
163entered the epoch either synchronously or asynchronously.
164.Fn epoch_call
165defers work asynchronously by invoking the provided
166.Fa callback
167at a later time.
168.Fn epoch_wait
169(or
170.Fn epoch_wait_preempt )
171blocks the current thread until the grace period has expired and the work can be
172done safely.
173.Pp
174Default, non-preemptible epoch wait
175.Fn ( epoch_wait )
176is guaranteed to have much shorter completion times relative to
177preemptible epoch wait
178.Fn ( epoch_wait_preempt ) .
179(In the default type, none of the threads in an epoch section will be preempted
180before completing its section.)
181.Pp
106INVARIANTS can assert that a thread is in an epoch by using
107.Fn in_epoch .
182INVARIANTS can assert that a thread is in an epoch by using
183.Fn in_epoch .
184.Fn in_epoch "epoch"
185is equivalent to invoking
186.Fn in_epoch_verbose "epoch" "0" .
187If
188.Cd EPOCH_TRACE
189is enabled,
190.Fn in_epoch_verbose "epoch" "1"
191provides additional verbose debugging information.
108.Pp
109The epoch API currently does not support sleeping in epoch_preempt sections.
110A caller should never call
111.Fn epoch_wait
112in the middle of an epoch section for the same epoch as this will lead to a deadlock.
113.Pp
192.Pp
193The epoch API currently does not support sleeping in epoch_preempt sections.
194A caller should never call
195.Fn epoch_wait
196in the middle of an epoch section for the same epoch as this will lead to a deadlock.
197.Pp
114By default mutexes cannot be held across
115.Fn epoch_wait_preempt .
116To permit this the epoch must be allocated with
117EPOCH_LOCKED.
118When doing this one must be cautious of creating a situation where a deadlock is
119possible. Note that epochs are not a straight replacement for read locks.
120Callers must use safe list and tailq traversal routines in an epoch (see ck_queue).
121When modifying a list referenced from an epoch section safe removal
122routines must be used and the caller can no longer modify a list entry
123in place.
124An item to be modified must be handled with copy on write
125and frees must be deferred until after a grace period has elapsed.
126.Pp
127The
128.Fn epoch_drain_callbacks
129function is used to drain all pending callbacks which have been invoked by prior
130.Fn epoch_call
131function calls on the same epoch.
132This function is useful when there are shared memory structure(s)
133referred to by the epoch callback(s) which are not refcounted and are
134rarely freed.
135The typical place for calling this function is right before freeing or
136invalidating the shared resource(s) used by the epoch callback(s).
137This function can sleep and is not optimized for performance.
138.Sh RETURN VALUES
139.Fn in_epoch curepoch
140will return 1 if curthread is in curepoch, 0 otherwise.
141.Sh CAVEATS
142One must be cautious when using
198The
199.Fn epoch_drain_callbacks
200function is used to drain all pending callbacks which have been invoked by prior
201.Fn epoch_call
202function calls on the same epoch.
203This function is useful when there are shared memory structure(s)
204referred to by the epoch callback(s) which are not refcounted and are
205rarely freed.
206The typical place for calling this function is right before freeing or
207invalidating the shared resource(s) used by the epoch callback(s).
208This function can sleep and is not optimized for performance.
209.Sh RETURN VALUES
210.Fn in_epoch curepoch
211will return 1 if curthread is in curepoch, 0 otherwise.
212.Sh CAVEATS
213One must be cautious when using
143.Fn epoch_wait_preempt
144threads are pinned during epoch sections so if a thread in a section is then
145preempted by a higher priority compute bound thread on that CPU it can be
146prevented from leaving the section.
147Thus the wait time for the waiter is
148potentially unbounded.
214.Fn epoch_wait_preempt .
215Threads are pinned during epoch sections, so if a thread in a section is then
216preempted by a higher priority compute bound thread on that CPU, it can be
217prevented from leaving the section indefinitely.
218.Pp
219Epochs are not a straight replacement for read locks.
220Callers must use safe list and tailq traversal routines in an epoch (see ck_queue).
221When modifying a list referenced from an epoch section safe removal
222routines must be used and the caller can no longer modify a list entry
223in place.
224An item to be modified must be handled with copy on write
225and frees must be deferred until after a grace period has elapsed.
149.Sh EXAMPLES
150Async free example:
151Thread 1:
152.Bd -literal
153int
154in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_laddr *laddr,
155 struct ucred *cred)
156{
226.Sh EXAMPLES
227Async free example:
228Thread 1:
229.Bd -literal
230int
231in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_laddr *laddr,
232 struct ucred *cred)
233{
157 /* ... */
158 epoch_enter(net_epoch);
234 /* ... */
235 epoch_enter(net_epoch);
159 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
160 sa = ifa->ifa_addr;
161 if (sa->sa_family != AF_INET)
162 continue;
163 sin = (struct sockaddr_in *)sa;
164 if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
165 ia = (struct in_ifaddr *)ifa;
166 break;
167 }
168 }
169 epoch_exit(net_epoch);
236 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
237 sa = ifa->ifa_addr;
238 if (sa->sa_family != AF_INET)
239 continue;
240 sin = (struct sockaddr_in *)sa;
241 if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
242 ia = (struct in_ifaddr *)ifa;
243 break;
244 }
245 }
246 epoch_exit(net_epoch);
170 /* ... */
247 /* ... */
171}
172.Ed
173Thread 2:
174.Bd -literal
175void
176ifa_free(struct ifaddr *ifa)
177{
178

--- 14 unchanged lines hidden (view full) ---

193.Ed
194.Pp
195Thread 1 traverses the ifaddr list in an epoch.
196Thread 2 unlinks with the corresponding epoch safe macro, marks as logically free,
197and then defers deletion.
198More general mutation or a synchronous
199free would have to follow a call to
200.Fn epoch_wait .
248}
249.Ed
250Thread 2:
251.Bd -literal
252void
253ifa_free(struct ifaddr *ifa)
254{
255

--- 14 unchanged lines hidden (view full) ---

270.Ed
271.Pp
272Thread 1 traverses the ifaddr list in an epoch.
273Thread 2 unlinks with the corresponding epoch safe macro, marks as logically free,
274and then defers deletion.
275More general mutation or a synchronous
276free would have to follow a call to
277.Fn epoch_wait .
201.Sh ERRORS
202None.
203.Sh NOTES
204The
205.Nm
206kernel programming interface is under development and is subject to change.
207.El
208.Sh SEE ALSO
209.Xr locking 9 ,
210.Xr mtx_pool 9 ,
211.Xr mutex 9 ,
212.Xr rwlock 9 ,
213.Xr sema 9 ,
214.Xr sleep 9 ,
215.Xr sx 9 ,
216.Xr timeout 9
278.Sh NOTES
279The
280.Nm
281kernel programming interface is under development and is subject to change.
282.El
283.Sh SEE ALSO
284.Xr locking 9 ,
285.Xr mtx_pool 9 ,
286.Xr mutex 9 ,
287.Xr rwlock 9 ,
288.Xr sema 9 ,
289.Xr sleep 9 ,
290.Xr sx 9 ,
291.Xr timeout 9