xref: /linux/include/linux/srcutree.h (revision 98e7dcbb82fa57de8dfad357f9b851c3625797fa)
1 /* SPDX-License-Identifier: GPL-2.0+ */
2 /*
3  * Sleepable Read-Copy Update mechanism for mutual exclusion,
4  *	tree variant.
5  *
6  * Copyright (C) IBM Corporation, 2017
7  *
8  * Author: Paul McKenney <paulmck@linux.ibm.com>
9  */
10 
11 #ifndef _LINUX_SRCU_TREE_H
12 #define _LINUX_SRCU_TREE_H
13 
14 #include <linux/rcu_node_tree.h>
15 #include <linux/completion.h>
16 
17 struct srcu_node;
18 struct srcu_struct;
19 
20 /* One element of the srcu_data srcu_ctrs array. */
21 struct srcu_ctr {
22 	atomic_long_t srcu_locks;	/* Locks per CPU. */
23 	atomic_long_t srcu_unlocks;	/* Unlocks per CPU. */
24 };
25 
26 /*
27  * Per-CPU structure feeding into leaf srcu_node, similar in function
28  * to rcu_node.
29  */
30 struct srcu_data {
31 	/* Read-side state. */
32 	struct srcu_ctr srcu_ctrs[2];		/* Locks and unlocks per CPU. */
33 	int srcu_reader_flavor;			/* Reader flavor for srcu_struct structure? */
34 						/* Values: SRCU_READ_FLAVOR_.*  */
35 
36 	/* Update-side state. */
37 	spinlock_t __private lock ____cacheline_internodealigned_in_smp;
38 	struct rcu_segcblist srcu_cblist;	/* List of callbacks.*/
39 	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
40 	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
41 	bool srcu_cblist_invoking;		/* Invoking these CBs? */
42 	struct timer_list delay_work;		/* Delay for CB invoking */
43 	struct work_struct work;		/* Context for CB invoking. */
44 	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */
45 	struct rcu_head srcu_ec_head;		/* For srcu_expedite_current() use. */
46 	int srcu_ec_state;			/*  State for srcu_expedite_current(). */
47 	struct srcu_node *mynode;		/* Leaf srcu_node. */
48 	unsigned long grpmask;			/* Mask for leaf srcu_node */
49 						/*  ->srcu_data_have_cbs[]. */
50 	int cpu;
51 	struct srcu_struct *ssp;
52 };
53 
54 /*
55  * Node in SRCU combining tree, similar in function to rcu_data.
56  */
57 struct srcu_node {
58 	spinlock_t __private lock;
59 	unsigned long srcu_have_cbs[4];		/* GP seq for children having CBs, but only */
60 						/*  if greater than ->srcu_gp_seq. */
61 	unsigned long srcu_data_have_cbs[4];	/* Which srcu_data structs have CBs for given GP? */
62 	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
63 	struct srcu_node *srcu_parent;		/* Next up in tree. */
64 	int grplo;				/* Least CPU for node. */
65 	int grphi;				/* Biggest CPU for node. */
66 };
67 
68 /*
69  * Per-SRCU-domain structure, update-side data linked from srcu_struct.
70  */
71 struct srcu_usage {
72 	struct srcu_node *node;			/* Combining tree. */
73 	struct srcu_node *level[RCU_NUM_LVLS + 1];
74 						/* First node at each level. */
75 	int srcu_size_state;			/* Small-to-big transition state. */
76 	struct mutex srcu_cb_mutex;		/* Serialize CB preparation. */
77 	spinlock_t __private lock;		/* Protect counters and size state. */
78 	struct mutex srcu_gp_mutex;		/* Serialize GP work. */
79 	unsigned long srcu_gp_seq;		/* Grace-period seq #. */
80 	unsigned long srcu_gp_seq_needed;	/* Latest gp_seq needed. */
81 	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
82 	unsigned long srcu_gp_start;		/* Last GP start timestamp (jiffies) */
83 	unsigned long srcu_last_gp_end;		/* Last GP end timestamp (ns) */
84 	unsigned long srcu_size_jiffies;	/* Current contention-measurement interval. */
85 	unsigned long srcu_n_lock_retries;	/* Contention events in current interval. */
86 	unsigned long srcu_n_exp_nodelay;	/* # expedited no-delays in current GP phase. */
87 	bool sda_is_static;			/* May ->sda be passed to free_percpu()? */
88 	unsigned long srcu_barrier_seq;		/* srcu_barrier seq #. */
89 	struct mutex srcu_barrier_mutex;	/* Serialize barrier ops. */
90 	struct completion srcu_barrier_completion;
91 						/* Awaken barrier rq at end. */
92 	atomic_t srcu_barrier_cpu_cnt;		/* # CPUs not yet posting a */
93 						/*  callback for the barrier */
94 						/*  operation. */
95 	unsigned long reschedule_jiffies;
96 	unsigned long reschedule_count;
97 	struct delayed_work work;
98 	struct srcu_struct *srcu_ssp;
99 };
100 
101 /*
102  * Per-SRCU-domain structure, similar in function to rcu_state.
103  */
104 struct srcu_struct {
105 	struct srcu_ctr __percpu *srcu_ctrp;
106 	struct srcu_data __percpu *sda;		/* Per-CPU srcu_data array. */
107 	u8 srcu_reader_flavor;
108 	struct lockdep_map dep_map;
109 	struct srcu_usage *srcu_sup;		/* Update-side data. */
110 };
111 
112 // Values for size state variable (->srcu_size_state).  Once the state
113 // has been set to SRCU_SIZE_ALLOC, the grace-period code advances through
114 // this state machine one step per grace period until the SRCU_SIZE_BIG state
115 // is reached.  Otherwise, the state machine remains in the SRCU_SIZE_SMALL
116 // state indefinitely.
117 #define SRCU_SIZE_SMALL		0	// No srcu_node combining tree, ->node == NULL
118 #define SRCU_SIZE_ALLOC		1	// An srcu_node tree is being allocated, initialized,
119 					//  and then referenced by ->node.  It will not be used.
120 #define SRCU_SIZE_WAIT_BARRIER	2	// The srcu_node tree starts being used by everything
121 					//  except call_srcu(), especially by srcu_barrier().
122 					//  By the end of this state, all CPUs and threads
123 					//  are aware of this tree's existence.
124 #define SRCU_SIZE_WAIT_CALL	3	// The srcu_node tree starts being used by call_srcu().
125 					//  By the end of this state, all of the call_srcu()
126 					//  invocations that were running on a non-boot CPU
127 					//  and using the boot CPU's callback queue will have
128 					//  completed.
129 #define SRCU_SIZE_WAIT_CBS1	4	// Don't trust the ->srcu_have_cbs[] grace-period
130 #define SRCU_SIZE_WAIT_CBS2	5	//  sequence elements or the ->srcu_data_have_cbs[]
131 #define SRCU_SIZE_WAIT_CBS3	6	//  CPU-bitmask elements until all four elements of
132 #define SRCU_SIZE_WAIT_CBS4	7	//  each array have been initialized.
133 #define SRCU_SIZE_BIG		8	// The srcu_node combining tree is fully initialized
134 					//  and all aspects of it are being put to use.
135 
136 /* Values for state variable (bottom bits of ->srcu_gp_seq). */
137 #define SRCU_STATE_IDLE		0
138 #define SRCU_STATE_SCAN1	1
139 #define SRCU_STATE_SCAN2	2
140 
141 /* Values for srcu_expedite_current() state (->srcu_ec_state). */
142 #define SRCU_EC_IDLE		0
143 #define SRCU_EC_PENDING		1
144 #define SRCU_EC_REPOST		2
145 
146 /*
147  * Values for initializing gp sequence fields. Higher values allow wrap arounds to
148  * occur earlier.
149  * The second value with state is useful in the case of static initialization of
150  * srcu_usage where srcu_gp_seq_needed is expected to have some state value in its
151  * lower bits (or else it will appear to be already initialized within
152  * the call check_init_srcu_struct()).
153  */
154 #define SRCU_GP_SEQ_INITIAL_VAL ((0UL - 100UL) << RCU_SEQ_CTR_SHIFT)
155 #define SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE (SRCU_GP_SEQ_INITIAL_VAL - 1)
156 
157 #define __SRCU_USAGE_INIT(name)									\
158 {												\
159 	.lock = __SPIN_LOCK_UNLOCKED(name.lock),						\
160 	.srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL,							\
161 	.srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE,				\
162 	.srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL,					\
163 	.work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0),					\
164 }
165 
166 #define __SRCU_STRUCT_INIT_COMMON(name, usage_name, fast)					\
167 	.srcu_sup = &usage_name,								\
168 	.srcu_reader_flavor = fast,								\
169 	__SRCU_DEP_MAP_INIT(name)
170 
171 #define __SRCU_STRUCT_INIT_MODULE(name, usage_name, fast)					\
172 {												\
173 	__SRCU_STRUCT_INIT_COMMON(name, usage_name, fast)					\
174 }
175 
176 #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name, fast)					\
177 {												\
178 	.sda = &pcpu_name,									\
179 	.srcu_ctrp = &pcpu_name.srcu_ctrs[0],							\
180 	__SRCU_STRUCT_INIT_COMMON(name, usage_name, fast)						\
181 }
182 
183 /*
184  * Define and initialize a srcu struct at build time.
185  * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
186  *
187  * Note that although DEFINE_STATIC_SRCU() hides the name from other
188  * files, the per-CPU variable rules nevertheless require that the
189  * chosen name be globally unique.  These rules also prohibit use of
190  * DEFINE_STATIC_SRCU() within a function.  If these rules are too
191  * restrictive, declare the srcu_struct manually.  For example, in
192  * each file:
193  *
194  *	static struct srcu_struct my_srcu;
195  *
196  * Then, before the first use of each my_srcu, manually initialize it:
197  *
198  *	init_srcu_struct(&my_srcu);
199  *
200  * See include/linux/percpu-defs.h for the rules on per-CPU variables.
201  *
202  * DEFINE_SRCU_FAST() and DEFINE_STATIC_SRCU_FAST create an srcu_struct
203  * and associated structures whose readers must be of the SRCU-fast variety.
204  * DEFINE_SRCU_FAST_UPDOWN() and DEFINE_STATIC_SRCU_FAST_UPDOWN() create
205  * an srcu_struct and associated structures whose readers must be of the
206  * SRCU-fast-updown variety.  The key point (aside from error checking) with
207  * both varieties is that the grace periods must use synchronize_rcu()
208  * instead of smp_mb(), and given that the first (for example)
209  * srcu_read_lock_fast() might race with the first synchronize_srcu(),
210  * this different must be specified at initialization time.
211  */
212 #ifdef MODULE
213 # define __DEFINE_SRCU(name, fast, is_static)							\
214 	static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage);	\
215 	is_static struct srcu_struct name = __SRCU_STRUCT_INIT_MODULE(name, name##_srcu_usage,	\
216 								      fast);			\
217 	extern struct srcu_struct * const __srcu_struct_##name;					\
218 	struct srcu_struct * const __srcu_struct_##name						\
219 		__section("___srcu_struct_ptrs") = &name
220 #else
221 # define __DEFINE_SRCU(name, fast, is_static)							\
222 	static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);				\
223 	static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage);	\
224 	is_static struct srcu_struct name =							\
225 		__SRCU_STRUCT_INIT(name, name##_srcu_usage, name##_srcu_data, fast)
226 #endif
227 #define DEFINE_SRCU(name)		__DEFINE_SRCU(name, 0, /* not static */)
228 #define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, 0, static)
229 #define DEFINE_SRCU_FAST(name)		__DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST, /* not static */)
230 #define DEFINE_STATIC_SRCU_FAST(name)	__DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST, static)
231 #define DEFINE_SRCU_FAST_UPDOWN(name)	__DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST_UPDOWN, \
232 						      /* not static */)
233 #define DEFINE_STATIC_SRCU_FAST_UPDOWN(name) \
234 					__DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST_UPDOWN, static)
235 
236 int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
237 void synchronize_srcu_expedited(struct srcu_struct *ssp);
238 void srcu_barrier(struct srcu_struct *ssp);
239 void srcu_expedite_current(struct srcu_struct *ssp);
240 void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
241 
242 // Converts a per-CPU pointer to an ->srcu_ctrs[] array element to that
243 // element's index.
__srcu_ptr_to_ctr(struct srcu_struct * ssp,struct srcu_ctr __percpu * scpp)244 static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp)
245 {
246 	return scpp - &ssp->sda->srcu_ctrs[0];
247 }
248 
249 // Converts an integer to a per-CPU pointer to the corresponding
250 // ->srcu_ctrs[] array element.
__srcu_ctr_to_ptr(struct srcu_struct * ssp,int idx)251 static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx)
252 {
253 	return &ssp->sda->srcu_ctrs[idx];
254 }
255 
256 /*
257  * Counts the new reader in the appropriate per-CPU element of the
258  * srcu_struct.  Returns a pointer that must be passed to the matching
259  * srcu_read_unlock_fast().
260  *
261  * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side
262  * critical sections either because they disables interrupts, because
263  * they are a single instruction, or because they are read-modify-write
264  * atomic operations, depending on the whims of the architecture.
265  * This matters because the SRCU-fast grace-period mechanism uses either
266  * synchronize_rcu() or synchronize_rcu_expedited(), that is, RCU,
267  * *not* SRCU, in order to eliminate the need for the read-side smp_mb()
268  * invocations that are used by srcu_read_lock() and srcu_read_unlock().
269  * The __srcu_read_unlock_fast() function also relies on this same RCU
270  * (again, *not* SRCU) trick to eliminate the need for smp_mb().
271  *
272  * The key point behind this RCU trick is that if any part of a given
273  * RCU reader precedes the beginning of a given RCU grace period, then
274  * the entirety of that RCU reader and everything preceding it happens
275  * before the end of that same RCU grace period.  Similarly, if any part
276  * of a given RCU reader follows the end of a given RCU grace period,
277  * then the entirety of that RCU reader and everything following it
278  * happens after the beginning of that same RCU grace period.  Therefore,
279  * the operations labeled Y in __srcu_read_lock_fast() and those labeled Z
280  * in __srcu_read_unlock_fast() are ordered against the corresponding SRCU
281  * read-side critical section from the viewpoint of the SRCU grace period.
282  * This is all the ordering that is required, hence no calls to smp_mb().
283  *
284  * This means that __srcu_read_lock_fast() is not all that fast
285  * on architectures that support NMIs but do not supply NMI-safe
286  * implementations of this_cpu_inc().
287  */
__srcu_read_lock_fast(struct srcu_struct * ssp)288 static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct srcu_struct *ssp)
289 {
290 	struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
291 
292 	if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
293 		this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader.
294 	else
295 		atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks));  // Y, and implicit RCU reader.
296 	barrier(); /* Avoid leaking the critical section. */
297 	return scp;
298 }
299 
300 /*
301  * Removes the count for the old reader from the appropriate
302  * per-CPU element of the srcu_struct.  Note that this may well be a
303  * different CPU than that which was incremented by the corresponding
304  * srcu_read_lock_fast(), but it must be within the same task.
305  *
306  * Please see the __srcu_read_lock_fast() function's header comment for
307  * information on implicit RCU readers and NMI safety.
308  */
309 static inline void notrace
__srcu_read_unlock_fast(struct srcu_struct * ssp,struct srcu_ctr __percpu * scp)310 __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
311 {
312 	barrier();  /* Avoid leaking the critical section. */
313 	if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
314 		this_cpu_inc(scp->srcu_unlocks.counter);  // Z, and implicit RCU reader.
315 	else
316 		atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks));  // Z, and implicit RCU reader.
317 }
318 
319 /*
320  * Counts the new reader in the appropriate per-CPU element of the
321  * srcu_struct.  Returns a pointer that must be passed to the matching
322  * srcu_read_unlock_fast_updown().  This type of reader is compatible
323  * with srcu_down_read_fast() and srcu_up_read_fast().
324  *
325  * See the __srcu_read_lock_fast() comment for more details.
326  */
327 static inline
__srcu_read_lock_fast_updown(struct srcu_struct * ssp)328 struct srcu_ctr __percpu notrace *__srcu_read_lock_fast_updown(struct srcu_struct *ssp)
329 {
330 	struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
331 
332 	if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
333 		this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader.
334 	else
335 		atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks));  // Y, and implicit RCU reader.
336 	barrier(); /* Avoid leaking the critical section. */
337 	return scp;
338 }
339 
340 /*
341  * Removes the count for the old reader from the appropriate
342  * per-CPU element of the srcu_struct.  Note that this may well be a
343  * different CPU than that which was incremented by the corresponding
344  * srcu_read_lock_fast(), but it must be within the same task.
345  *
346  * Please see the __srcu_read_lock_fast() function's header comment for
347  * information on implicit RCU readers and NMI safety.
348  */
349 static inline void notrace
__srcu_read_unlock_fast_updown(struct srcu_struct * ssp,struct srcu_ctr __percpu * scp)350 __srcu_read_unlock_fast_updown(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
351 {
352 	barrier();  /* Avoid leaking the critical section. */
353 	if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
354 		this_cpu_inc(scp->srcu_unlocks.counter);  // Z, and implicit RCU reader.
355 	else
356 		atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks));  // Z, and implicit RCU reader.
357 }
358 
359 void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
360 
361 // Record SRCU-reader usage type only for CONFIG_PROVE_RCU=y kernels.
srcu_check_read_flavor(struct srcu_struct * ssp,int read_flavor)362 static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor)
363 {
364 	if (IS_ENABLED(CONFIG_PROVE_RCU))
365 		__srcu_check_read_flavor(ssp, read_flavor);
366 }
367 
368 #endif
369