xref: /freebsd/sys/sys/smr.h (revision 95ee2897e98f5d444f26ed2334cc7c439f9c16c6)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2019, 2020 Jeffrey Roberson <jeff@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice unmodified, this list of conditions, and the following
11  *    disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  */
28 
29 #ifndef _SYS_SMR_H_
30 #define	_SYS_SMR_H_
31 
32 #include <sys/_smr.h>
33 
34 /*
35  * Safe memory reclamation.  See subr_smr.c for a description of the
36  * algorithm, and smr_types.h for macros to define and access SMR-protected
37  * data structures.
38  *
39  * Readers synchronize with smr_enter()/exit() and writers may either
40  * free directly to a SMR UMA zone or use smr_synchronize or wait.
41  */
42 
43 /*
44  * Modular arithmetic for comparing sequence numbers that have
45  * potentially wrapped.  Copied from tcp_seq.h.
46  */
47 #define	SMR_SEQ_LT(a, b)	((smr_delta_t)((a)-(b)) < 0)
48 #define	SMR_SEQ_LEQ(a, b)	((smr_delta_t)((a)-(b)) <= 0)
49 #define	SMR_SEQ_GT(a, b)	((smr_delta_t)((a)-(b)) > 0)
50 #define	SMR_SEQ_GEQ(a, b)	((smr_delta_t)((a)-(b)) >= 0)
51 #define	SMR_SEQ_DELTA(a, b)	((smr_delta_t)((a)-(b)))
52 #define	SMR_SEQ_MIN(a, b)	(SMR_SEQ_LT((a), (b)) ? (a) : (b))
53 #define	SMR_SEQ_MAX(a, b)	(SMR_SEQ_GT((a), (b)) ? (a) : (b))
54 
55 #define	SMR_SEQ_INVALID		0
56 
57 /* Shared SMR state. */
58 union s_wr {
59 	struct {
60 		smr_seq_t	seq;	/* Current write sequence #. */
61 		int		ticks;	/* tick of last update (LAZY) */
62 	};
63 	uint64_t	_pair;
64 };
65 struct smr_shared {
66 	const char	*s_name;	/* Name for debugging/reporting. */
67 	union s_wr	s_wr;		/* Write sequence */
68 	smr_seq_t	s_rd_seq;	/* Minimum observed read sequence. */
69 };
70 typedef struct smr_shared *smr_shared_t;
71 
72 /* Per-cpu SMR state. */
73 struct smr {
74 	smr_seq_t	c_seq;		/* Current observed sequence. */
75 	smr_shared_t	c_shared;	/* Shared SMR state. */
76 	int		c_deferred;	/* Deferred advance counter. */
77 	int		c_limit;	/* Deferred advance limit. */
78 	int		c_flags;	/* SMR Configuration */
79 };
80 
81 #define	SMR_LAZY	0x0001		/* Higher latency write, fast read. */
82 #define	SMR_DEFERRED	0x0002		/* Aggregate updates to wr_seq. */
83 
84 /*
85  * Return the current write sequence number.  This is not the same as the
86  * current goal which may be in the future.
87  */
88 static inline smr_seq_t
smr_shared_current(smr_shared_t s)89 smr_shared_current(smr_shared_t s)
90 {
91 
92 	return (atomic_load_int(&s->s_wr.seq));
93 }
94 
95 static inline smr_seq_t
smr_current(smr_t smr)96 smr_current(smr_t smr)
97 {
98 
99 	return (smr_shared_current(zpcpu_get(smr)->c_shared));
100 }
101 
102 /*
103  * Enter a read section.
104  */
105 static inline void
smr_enter(smr_t smr)106 smr_enter(smr_t smr)
107 {
108 
109 	critical_enter();
110 	smr = zpcpu_get(smr);
111 	KASSERT((smr->c_flags & SMR_LAZY) == 0,
112 	    ("smr_enter(%s) lazy smr.", smr->c_shared->s_name));
113 	KASSERT(smr->c_seq == 0,
114 	    ("smr_enter(%s) does not support recursion.",
115 	    smr->c_shared->s_name));
116 
117 	/*
118 	 * Store the current observed write sequence number in our
119 	 * per-cpu state so that it can be queried via smr_poll().
120 	 * Frees that are newer than this stored value will be
121 	 * deferred until we call smr_exit().
122 	 *
123 	 * Subsequent loads must not be re-ordered with the store.  On
124 	 * x86 platforms, any locked instruction will provide this
125 	 * guarantee, so as an optimization we use a single operation to
126 	 * both store the cached write sequence number and provide the
127 	 * requisite barrier, taking advantage of the fact that
128 	 * SMR_SEQ_INVALID is zero.
129 	 *
130 	 * It is possible that a long delay between loading the wr_seq
131 	 * and storing the c_seq could create a situation where the
132 	 * rd_seq advances beyond our stored c_seq.  In this situation
133 	 * only the observed wr_seq is stale, the fence still orders
134 	 * the load.  See smr_poll() for details on how this condition
135 	 * is detected and handled there.
136 	 */
137 #if defined(__amd64__) || defined(__i386__)
138 	atomic_add_acq_int(&smr->c_seq, smr_shared_current(smr->c_shared));
139 #else
140 	atomic_store_int(&smr->c_seq, smr_shared_current(smr->c_shared));
141 	atomic_thread_fence_seq_cst();
142 #endif
143 }
144 
145 /*
146  * Exit a read section.
147  */
148 static inline void
smr_exit(smr_t smr)149 smr_exit(smr_t smr)
150 {
151 
152 	smr = zpcpu_get(smr);
153 	CRITICAL_ASSERT(curthread);
154 	KASSERT((smr->c_flags & SMR_LAZY) == 0,
155 	    ("smr_exit(%s) lazy smr.", smr->c_shared->s_name));
156 	KASSERT(smr->c_seq != SMR_SEQ_INVALID,
157 	    ("smr_exit(%s) not in a smr section.", smr->c_shared->s_name));
158 
159 	/*
160 	 * Clear the recorded sequence number.  This allows poll() to
161 	 * detect CPUs not in read sections.
162 	 *
163 	 * Use release semantics to retire any stores before the sequence
164 	 * number is cleared.
165 	 */
166 	atomic_store_rel_int(&smr->c_seq, SMR_SEQ_INVALID);
167 	critical_exit();
168 }
169 
170 /*
171  * Enter a lazy smr section.  This is used for read-mostly state that
172  * can tolerate a high free latency.
173  */
174 static inline void
smr_lazy_enter(smr_t smr)175 smr_lazy_enter(smr_t smr)
176 {
177 
178 	critical_enter();
179 	smr = zpcpu_get(smr);
180 	KASSERT((smr->c_flags & SMR_LAZY) != 0,
181 	    ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
182 	KASSERT(smr->c_seq == 0,
183 	    ("smr_lazy_enter(%s) does not support recursion.",
184 	    smr->c_shared->s_name));
185 
186 	/*
187 	 * This needs no serialization.  If an interrupt occurs before we
188 	 * assign sr_seq to c_seq any speculative loads will be discarded.
189 	 * If we assign a stale wr_seq value due to interrupt we use the
190 	 * same algorithm that renders smr_enter() safe.
191 	 */
192 	atomic_store_int(&smr->c_seq, smr_shared_current(smr->c_shared));
193 }
194 
195 /*
196  * Exit a lazy smr section.  This is used for read-mostly state that
197  * can tolerate a high free latency.
198  */
199 static inline void
smr_lazy_exit(smr_t smr)200 smr_lazy_exit(smr_t smr)
201 {
202 
203 	smr = zpcpu_get(smr);
204 	CRITICAL_ASSERT(curthread);
205 	KASSERT((smr->c_flags & SMR_LAZY) != 0,
206 	    ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
207 	KASSERT(smr->c_seq != SMR_SEQ_INVALID,
208 	    ("smr_lazy_exit(%s) not in a smr section.", smr->c_shared->s_name));
209 
210 	/*
211 	 * All loads/stores must be retired before the sequence becomes
212 	 * visible.  The fence compiles away on amd64.  Another
213 	 * alternative would be to omit the fence but store the exit
214 	 * time and wait 1 tick longer.
215 	 */
216 	atomic_thread_fence_rel();
217 	atomic_store_int(&smr->c_seq, SMR_SEQ_INVALID);
218 	critical_exit();
219 }
220 
221 /*
222  * Advances the write sequence number.  Returns the sequence number
223  * required to ensure that all modifications are visible to readers.
224  */
225 smr_seq_t smr_advance(smr_t smr);
226 
227 /*
228  * Returns true if a goal sequence has been reached.  If
229  * wait is true this will busy loop until success.
230  */
231 bool smr_poll(smr_t smr, smr_seq_t goal, bool wait);
232 
233 /* Create a new SMR context. */
234 smr_t smr_create(const char *name, int limit, int flags);
235 
236 /* Destroy the context. */
237 void smr_destroy(smr_t smr);
238 
239 /*
240  * Blocking wait for all readers to observe 'goal'.
241  */
242 static inline void
smr_wait(smr_t smr,smr_seq_t goal)243 smr_wait(smr_t smr, smr_seq_t goal)
244 {
245 
246 	(void)smr_poll(smr, goal, true);
247 }
248 
249 /*
250  * Synchronize advances the write sequence and returns when all
251  * readers have observed it.
252  *
253  * If your application can cache a sequence number returned from
254  * smr_advance() and poll or wait at a later time there will
255  * be less chance of busy looping while waiting for readers.
256  */
257 static inline void
smr_synchronize(smr_t smr)258 smr_synchronize(smr_t smr)
259 {
260 
261         smr_wait(smr, smr_advance(smr));
262 }
263 
264 /* Only at startup. */
265 void smr_init(void);
266 
267 #endif	/* _SYS_SMR_H_ */
268