xref: /linux/kernel/rcu/srcutree.c (revision 881ec9d209d5371c21db89ca1bb19afd3fcadab3)
1dad81a20SPaul E. McKenney /*
2dad81a20SPaul E. McKenney  * Sleepable Read-Copy Update mechanism for mutual exclusion.
3dad81a20SPaul E. McKenney  *
4dad81a20SPaul E. McKenney  * This program is free software; you can redistribute it and/or modify
5dad81a20SPaul E. McKenney  * it under the terms of the GNU General Public License as published by
6dad81a20SPaul E. McKenney  * the Free Software Foundation; either version 2 of the License, or
7dad81a20SPaul E. McKenney  * (at your option) any later version.
8dad81a20SPaul E. McKenney  *
9dad81a20SPaul E. McKenney  * This program is distributed in the hope that it will be useful,
10dad81a20SPaul E. McKenney  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11dad81a20SPaul E. McKenney  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12dad81a20SPaul E. McKenney  * GNU General Public License for more details.
13dad81a20SPaul E. McKenney  *
14dad81a20SPaul E. McKenney  * You should have received a copy of the GNU General Public License
15dad81a20SPaul E. McKenney  * along with this program; if not, you can access it online at
16dad81a20SPaul E. McKenney  * http://www.gnu.org/licenses/gpl-2.0.html.
17dad81a20SPaul E. McKenney  *
18dad81a20SPaul E. McKenney  * Copyright (C) IBM Corporation, 2006
19dad81a20SPaul E. McKenney  * Copyright (C) Fujitsu, 2012
20dad81a20SPaul E. McKenney  *
21dad81a20SPaul E. McKenney  * Author: Paul McKenney <paulmck@us.ibm.com>
22dad81a20SPaul E. McKenney  *	   Lai Jiangshan <laijs@cn.fujitsu.com>
23dad81a20SPaul E. McKenney  *
24dad81a20SPaul E. McKenney  * For detailed explanation of Read-Copy Update mechanism see -
25dad81a20SPaul E. McKenney  *		Documentation/RCU/ *.txt
26dad81a20SPaul E. McKenney  *
27dad81a20SPaul E. McKenney  */
28dad81a20SPaul E. McKenney 
29dad81a20SPaul E. McKenney #include <linux/export.h>
30dad81a20SPaul E. McKenney #include <linux/mutex.h>
31dad81a20SPaul E. McKenney #include <linux/percpu.h>
32dad81a20SPaul E. McKenney #include <linux/preempt.h>
33dad81a20SPaul E. McKenney #include <linux/rcupdate_wait.h>
34dad81a20SPaul E. McKenney #include <linux/sched.h>
35dad81a20SPaul E. McKenney #include <linux/smp.h>
36dad81a20SPaul E. McKenney #include <linux/delay.h>
3722607d66SPaul E. McKenney #include <linux/module.h>
38dad81a20SPaul E. McKenney #include <linux/srcu.h>
39dad81a20SPaul E. McKenney 
40dad81a20SPaul E. McKenney #include "rcu.h"
4145753c5fSIngo Molnar #include "rcu_segcblist.h"
42dad81a20SPaul E. McKenney 
43b5fe223aSPaul E. McKenney ulong exp_holdoff = 25 * 1000; /* Holdoff (ns) for auto-expediting. */
4422607d66SPaul E. McKenney module_param(exp_holdoff, ulong, 0444);
4522607d66SPaul E. McKenney 
46da915ad5SPaul E. McKenney static void srcu_invoke_callbacks(struct work_struct *work);
47da915ad5SPaul E. McKenney static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
48da915ad5SPaul E. McKenney 
49da915ad5SPaul E. McKenney /*
50da915ad5SPaul E. McKenney  * Initialize SRCU combining tree.  Note that statically allocated
51da915ad5SPaul E. McKenney  * srcu_struct structures might already have srcu_read_lock() and
52da915ad5SPaul E. McKenney  * srcu_read_unlock() running against them.  So if the is_static parameter
53da915ad5SPaul E. McKenney  * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
54da915ad5SPaul E. McKenney  */
55da915ad5SPaul E. McKenney static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
56dad81a20SPaul E. McKenney {
57da915ad5SPaul E. McKenney 	int cpu;
58da915ad5SPaul E. McKenney 	int i;
59da915ad5SPaul E. McKenney 	int level = 0;
60da915ad5SPaul E. McKenney 	int levelspread[RCU_NUM_LVLS];
61da915ad5SPaul E. McKenney 	struct srcu_data *sdp;
62da915ad5SPaul E. McKenney 	struct srcu_node *snp;
63da915ad5SPaul E. McKenney 	struct srcu_node *snp_first;
64da915ad5SPaul E. McKenney 
65da915ad5SPaul E. McKenney 	/* Work out the overall tree geometry. */
66da915ad5SPaul E. McKenney 	sp->level[0] = &sp->node[0];
67da915ad5SPaul E. McKenney 	for (i = 1; i < rcu_num_lvls; i++)
68da915ad5SPaul E. McKenney 		sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1];
69da915ad5SPaul E. McKenney 	rcu_init_levelspread(levelspread, num_rcu_lvl);
70da915ad5SPaul E. McKenney 
71da915ad5SPaul E. McKenney 	/* Each pass through this loop initializes one srcu_node structure. */
72da915ad5SPaul E. McKenney 	rcu_for_each_node_breadth_first(sp, snp) {
73da915ad5SPaul E. McKenney 		spin_lock_init(&snp->lock);
74c7e88067SPaul E. McKenney 		WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
75c7e88067SPaul E. McKenney 			     ARRAY_SIZE(snp->srcu_data_have_cbs));
76c7e88067SPaul E. McKenney 		for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
77da915ad5SPaul E. McKenney 			snp->srcu_have_cbs[i] = 0;
78c7e88067SPaul E. McKenney 			snp->srcu_data_have_cbs[i] = 0;
79c7e88067SPaul E. McKenney 		}
801e9a038bSPaul E. McKenney 		snp->srcu_gp_seq_needed_exp = 0;
81da915ad5SPaul E. McKenney 		snp->grplo = -1;
82da915ad5SPaul E. McKenney 		snp->grphi = -1;
83da915ad5SPaul E. McKenney 		if (snp == &sp->node[0]) {
84da915ad5SPaul E. McKenney 			/* Root node, special case. */
85da915ad5SPaul E. McKenney 			snp->srcu_parent = NULL;
86da915ad5SPaul E. McKenney 			continue;
87da915ad5SPaul E. McKenney 		}
88da915ad5SPaul E. McKenney 
89da915ad5SPaul E. McKenney 		/* Non-root node. */
90da915ad5SPaul E. McKenney 		if (snp == sp->level[level + 1])
91da915ad5SPaul E. McKenney 			level++;
92da915ad5SPaul E. McKenney 		snp->srcu_parent = sp->level[level - 1] +
93da915ad5SPaul E. McKenney 				   (snp - sp->level[level]) /
94da915ad5SPaul E. McKenney 				   levelspread[level - 1];
95da915ad5SPaul E. McKenney 	}
96da915ad5SPaul E. McKenney 
97da915ad5SPaul E. McKenney 	/*
98da915ad5SPaul E. McKenney 	 * Initialize the per-CPU srcu_data array, which feeds into the
99da915ad5SPaul E. McKenney 	 * leaves of the srcu_node tree.
100da915ad5SPaul E. McKenney 	 */
101da915ad5SPaul E. McKenney 	WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) !=
102da915ad5SPaul E. McKenney 		     ARRAY_SIZE(sdp->srcu_unlock_count));
103da915ad5SPaul E. McKenney 	level = rcu_num_lvls - 1;
104da915ad5SPaul E. McKenney 	snp_first = sp->level[level];
105da915ad5SPaul E. McKenney 	for_each_possible_cpu(cpu) {
106da915ad5SPaul E. McKenney 		sdp = per_cpu_ptr(sp->sda, cpu);
107da915ad5SPaul E. McKenney 		spin_lock_init(&sdp->lock);
108da915ad5SPaul E. McKenney 		rcu_segcblist_init(&sdp->srcu_cblist);
109da915ad5SPaul E. McKenney 		sdp->srcu_cblist_invoking = false;
110da915ad5SPaul E. McKenney 		sdp->srcu_gp_seq_needed = sp->srcu_gp_seq;
1111e9a038bSPaul E. McKenney 		sdp->srcu_gp_seq_needed_exp = sp->srcu_gp_seq;
112da915ad5SPaul E. McKenney 		sdp->mynode = &snp_first[cpu / levelspread[level]];
113da915ad5SPaul E. McKenney 		for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
114da915ad5SPaul E. McKenney 			if (snp->grplo < 0)
115da915ad5SPaul E. McKenney 				snp->grplo = cpu;
116da915ad5SPaul E. McKenney 			snp->grphi = cpu;
117da915ad5SPaul E. McKenney 		}
118da915ad5SPaul E. McKenney 		sdp->cpu = cpu;
119da915ad5SPaul E. McKenney 		INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks);
120da915ad5SPaul E. McKenney 		sdp->sp = sp;
121c7e88067SPaul E. McKenney 		sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
122da915ad5SPaul E. McKenney 		if (is_static)
123da915ad5SPaul E. McKenney 			continue;
124da915ad5SPaul E. McKenney 
125da915ad5SPaul E. McKenney 		/* Dynamically allocated, better be no srcu_read_locks()! */
126da915ad5SPaul E. McKenney 		for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) {
127da915ad5SPaul E. McKenney 			sdp->srcu_lock_count[i] = 0;
128da915ad5SPaul E. McKenney 			sdp->srcu_unlock_count[i] = 0;
129da915ad5SPaul E. McKenney 		}
130da915ad5SPaul E. McKenney 	}
131da915ad5SPaul E. McKenney }
132da915ad5SPaul E. McKenney 
133da915ad5SPaul E. McKenney /*
134da915ad5SPaul E. McKenney  * Initialize non-compile-time initialized fields, including the
135da915ad5SPaul E. McKenney  * associated srcu_node and srcu_data structures.  The is_static
136da915ad5SPaul E. McKenney  * parameter is passed through to init_srcu_struct_nodes(), and
137da915ad5SPaul E. McKenney  * also tells us that ->sda has already been wired up to srcu_data.
138da915ad5SPaul E. McKenney  */
139da915ad5SPaul E. McKenney static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static)
140da915ad5SPaul E. McKenney {
141da915ad5SPaul E. McKenney 	mutex_init(&sp->srcu_cb_mutex);
142da915ad5SPaul E. McKenney 	mutex_init(&sp->srcu_gp_mutex);
143da915ad5SPaul E. McKenney 	sp->srcu_idx = 0;
144dad81a20SPaul E. McKenney 	sp->srcu_gp_seq = 0;
145da915ad5SPaul E. McKenney 	sp->srcu_barrier_seq = 0;
146da915ad5SPaul E. McKenney 	mutex_init(&sp->srcu_barrier_mutex);
147da915ad5SPaul E. McKenney 	atomic_set(&sp->srcu_barrier_cpu_cnt, 0);
148dad81a20SPaul E. McKenney 	INIT_DELAYED_WORK(&sp->work, process_srcu);
149da915ad5SPaul E. McKenney 	if (!is_static)
150da915ad5SPaul E. McKenney 		sp->sda = alloc_percpu(struct srcu_data);
151da915ad5SPaul E. McKenney 	init_srcu_struct_nodes(sp, is_static);
1521e9a038bSPaul E. McKenney 	sp->srcu_gp_seq_needed_exp = 0;
15322607d66SPaul E. McKenney 	sp->srcu_last_gp_end = ktime_get_mono_fast_ns();
154da915ad5SPaul E. McKenney 	smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */
155da915ad5SPaul E. McKenney 	return sp->sda ? 0 : -ENOMEM;
156dad81a20SPaul E. McKenney }
157dad81a20SPaul E. McKenney 
158dad81a20SPaul E. McKenney #ifdef CONFIG_DEBUG_LOCK_ALLOC
159dad81a20SPaul E. McKenney 
160dad81a20SPaul E. McKenney int __init_srcu_struct(struct srcu_struct *sp, const char *name,
161dad81a20SPaul E. McKenney 		       struct lock_class_key *key)
162dad81a20SPaul E. McKenney {
163dad81a20SPaul E. McKenney 	/* Don't re-initialize a lock while it is held. */
164dad81a20SPaul E. McKenney 	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
165dad81a20SPaul E. McKenney 	lockdep_init_map(&sp->dep_map, name, key, 0);
166da915ad5SPaul E. McKenney 	spin_lock_init(&sp->gp_lock);
167da915ad5SPaul E. McKenney 	return init_srcu_struct_fields(sp, false);
168dad81a20SPaul E. McKenney }
169dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(__init_srcu_struct);
170dad81a20SPaul E. McKenney 
171dad81a20SPaul E. McKenney #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
172dad81a20SPaul E. McKenney 
173dad81a20SPaul E. McKenney /**
174dad81a20SPaul E. McKenney  * init_srcu_struct - initialize a sleep-RCU structure
175dad81a20SPaul E. McKenney  * @sp: structure to initialize.
176dad81a20SPaul E. McKenney  *
177dad81a20SPaul E. McKenney  * Must invoke this on a given srcu_struct before passing that srcu_struct
178dad81a20SPaul E. McKenney  * to any other function.  Each srcu_struct represents a separate domain
179dad81a20SPaul E. McKenney  * of SRCU protection.
180dad81a20SPaul E. McKenney  */
181dad81a20SPaul E. McKenney int init_srcu_struct(struct srcu_struct *sp)
182dad81a20SPaul E. McKenney {
183da915ad5SPaul E. McKenney 	spin_lock_init(&sp->gp_lock);
184da915ad5SPaul E. McKenney 	return init_srcu_struct_fields(sp, false);
185dad81a20SPaul E. McKenney }
186dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(init_srcu_struct);
187dad81a20SPaul E. McKenney 
188dad81a20SPaul E. McKenney #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
189dad81a20SPaul E. McKenney 
190dad81a20SPaul E. McKenney /*
191da915ad5SPaul E. McKenney  * First-use initialization of statically allocated srcu_struct
192da915ad5SPaul E. McKenney  * structure.  Wiring up the combining tree is more than can be
193da915ad5SPaul E. McKenney  * done with compile-time initialization, so this check is added
194da915ad5SPaul E. McKenney  * to each update-side SRCU primitive.  Use ->gp_lock, which -is-
195da915ad5SPaul E. McKenney  * compile-time initialized, to resolve races involving multiple
196da915ad5SPaul E. McKenney  * CPUs trying to garner first-use privileges.
197da915ad5SPaul E. McKenney  */
198da915ad5SPaul E. McKenney static void check_init_srcu_struct(struct srcu_struct *sp)
199da915ad5SPaul E. McKenney {
200da915ad5SPaul E. McKenney 	unsigned long flags;
201da915ad5SPaul E. McKenney 
202da915ad5SPaul E. McKenney 	WARN_ON_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INIT);
203da915ad5SPaul E. McKenney 	/* The smp_load_acquire() pairs with the smp_store_release(). */
204da915ad5SPaul E. McKenney 	if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
205da915ad5SPaul E. McKenney 		return; /* Already initialized. */
206da915ad5SPaul E. McKenney 	spin_lock_irqsave(&sp->gp_lock, flags);
207da915ad5SPaul E. McKenney 	if (!rcu_seq_state(sp->srcu_gp_seq_needed)) {
208da915ad5SPaul E. McKenney 		spin_unlock_irqrestore(&sp->gp_lock, flags);
209da915ad5SPaul E. McKenney 		return;
210da915ad5SPaul E. McKenney 	}
211da915ad5SPaul E. McKenney 	init_srcu_struct_fields(sp, true);
212da915ad5SPaul E. McKenney 	spin_unlock_irqrestore(&sp->gp_lock, flags);
213da915ad5SPaul E. McKenney }
214da915ad5SPaul E. McKenney 
215da915ad5SPaul E. McKenney /*
216da915ad5SPaul E. McKenney  * Returns approximate total of the readers' ->srcu_lock_count[] values
217da915ad5SPaul E. McKenney  * for the rank of per-CPU counters specified by idx.
218dad81a20SPaul E. McKenney  */
219dad81a20SPaul E. McKenney static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
220dad81a20SPaul E. McKenney {
221dad81a20SPaul E. McKenney 	int cpu;
222dad81a20SPaul E. McKenney 	unsigned long sum = 0;
223dad81a20SPaul E. McKenney 
224dad81a20SPaul E. McKenney 	for_each_possible_cpu(cpu) {
225da915ad5SPaul E. McKenney 		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
226dad81a20SPaul E. McKenney 
227da915ad5SPaul E. McKenney 		sum += READ_ONCE(cpuc->srcu_lock_count[idx]);
228dad81a20SPaul E. McKenney 	}
229dad81a20SPaul E. McKenney 	return sum;
230dad81a20SPaul E. McKenney }
231dad81a20SPaul E. McKenney 
232dad81a20SPaul E. McKenney /*
233da915ad5SPaul E. McKenney  * Returns approximate total of the readers' ->srcu_unlock_count[] values
234da915ad5SPaul E. McKenney  * for the rank of per-CPU counters specified by idx.
235dad81a20SPaul E. McKenney  */
236dad81a20SPaul E. McKenney static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
237dad81a20SPaul E. McKenney {
238dad81a20SPaul E. McKenney 	int cpu;
239dad81a20SPaul E. McKenney 	unsigned long sum = 0;
240dad81a20SPaul E. McKenney 
241dad81a20SPaul E. McKenney 	for_each_possible_cpu(cpu) {
242da915ad5SPaul E. McKenney 		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
243dad81a20SPaul E. McKenney 
244da915ad5SPaul E. McKenney 		sum += READ_ONCE(cpuc->srcu_unlock_count[idx]);
245dad81a20SPaul E. McKenney 	}
246dad81a20SPaul E. McKenney 	return sum;
247dad81a20SPaul E. McKenney }
248dad81a20SPaul E. McKenney 
249dad81a20SPaul E. McKenney /*
250dad81a20SPaul E. McKenney  * Return true if the number of pre-existing readers is determined to
251dad81a20SPaul E. McKenney  * be zero.
252dad81a20SPaul E. McKenney  */
253dad81a20SPaul E. McKenney static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
254dad81a20SPaul E. McKenney {
255dad81a20SPaul E. McKenney 	unsigned long unlocks;
256dad81a20SPaul E. McKenney 
257dad81a20SPaul E. McKenney 	unlocks = srcu_readers_unlock_idx(sp, idx);
258dad81a20SPaul E. McKenney 
259dad81a20SPaul E. McKenney 	/*
260dad81a20SPaul E. McKenney 	 * Make sure that a lock is always counted if the corresponding
261dad81a20SPaul E. McKenney 	 * unlock is counted. Needs to be a smp_mb() as the read side may
262dad81a20SPaul E. McKenney 	 * contain a read from a variable that is written to before the
263dad81a20SPaul E. McKenney 	 * synchronize_srcu() in the write side. In this case smp_mb()s
264dad81a20SPaul E. McKenney 	 * A and B act like the store buffering pattern.
265dad81a20SPaul E. McKenney 	 *
266dad81a20SPaul E. McKenney 	 * This smp_mb() also pairs with smp_mb() C to prevent accesses
267dad81a20SPaul E. McKenney 	 * after the synchronize_srcu() from being executed before the
268dad81a20SPaul E. McKenney 	 * grace period ends.
269dad81a20SPaul E. McKenney 	 */
270dad81a20SPaul E. McKenney 	smp_mb(); /* A */
271dad81a20SPaul E. McKenney 
272dad81a20SPaul E. McKenney 	/*
273dad81a20SPaul E. McKenney 	 * If the locks are the same as the unlocks, then there must have
274dad81a20SPaul E. McKenney 	 * been no readers on this index at some time in between. This does
275dad81a20SPaul E. McKenney 	 * not mean that there are no more readers, as one could have read
276dad81a20SPaul E. McKenney 	 * the current index but not have incremented the lock counter yet.
277dad81a20SPaul E. McKenney 	 *
278*881ec9d2SPaul E. McKenney 	 * So suppose that the updater is preempted here for so long
279*881ec9d2SPaul E. McKenney 	 * that more than ULONG_MAX non-nested readers come and go in
280*881ec9d2SPaul E. McKenney 	 * the meantime.  It turns out that this cannot result in overflow
281*881ec9d2SPaul E. McKenney 	 * because if a reader modifies its unlock count after we read it
282*881ec9d2SPaul E. McKenney 	 * above, then that reader's next load of ->srcu_idx is guaranteed
283*881ec9d2SPaul E. McKenney 	 * to get the new value, which will cause it to operate on the
284*881ec9d2SPaul E. McKenney 	 * other bank of counters, where it cannot contribute to the
285*881ec9d2SPaul E. McKenney 	 * overflow of these counters.  This means that there is a maximum
286*881ec9d2SPaul E. McKenney 	 * of 2*NR_CPUS increments, which cannot overflow given current
287*881ec9d2SPaul E. McKenney 	 * systems, especially not on 64-bit systems.
288*881ec9d2SPaul E. McKenney 	 *
289*881ec9d2SPaul E. McKenney 	 * OK, how about nesting?  This does impose a limit on nesting
290*881ec9d2SPaul E. McKenney 	 * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient,
291*881ec9d2SPaul E. McKenney 	 * especially on 64-bit systems.
292dad81a20SPaul E. McKenney 	 */
293dad81a20SPaul E. McKenney 	return srcu_readers_lock_idx(sp, idx) == unlocks;
294dad81a20SPaul E. McKenney }
295dad81a20SPaul E. McKenney 
296dad81a20SPaul E. McKenney /**
297dad81a20SPaul E. McKenney  * srcu_readers_active - returns true if there are readers. and false
298dad81a20SPaul E. McKenney  *                       otherwise
299dad81a20SPaul E. McKenney  * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
300dad81a20SPaul E. McKenney  *
301dad81a20SPaul E. McKenney  * Note that this is not an atomic primitive, and can therefore suffer
302dad81a20SPaul E. McKenney  * severe errors when invoked on an active srcu_struct.  That said, it
303dad81a20SPaul E. McKenney  * can be useful as an error check at cleanup time.
304dad81a20SPaul E. McKenney  */
305dad81a20SPaul E. McKenney static bool srcu_readers_active(struct srcu_struct *sp)
306dad81a20SPaul E. McKenney {
307dad81a20SPaul E. McKenney 	int cpu;
308dad81a20SPaul E. McKenney 	unsigned long sum = 0;
309dad81a20SPaul E. McKenney 
310dad81a20SPaul E. McKenney 	for_each_possible_cpu(cpu) {
311da915ad5SPaul E. McKenney 		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
312dad81a20SPaul E. McKenney 
313da915ad5SPaul E. McKenney 		sum += READ_ONCE(cpuc->srcu_lock_count[0]);
314da915ad5SPaul E. McKenney 		sum += READ_ONCE(cpuc->srcu_lock_count[1]);
315da915ad5SPaul E. McKenney 		sum -= READ_ONCE(cpuc->srcu_unlock_count[0]);
316da915ad5SPaul E. McKenney 		sum -= READ_ONCE(cpuc->srcu_unlock_count[1]);
317dad81a20SPaul E. McKenney 	}
318dad81a20SPaul E. McKenney 	return sum;
319dad81a20SPaul E. McKenney }
320dad81a20SPaul E. McKenney 
321dad81a20SPaul E. McKenney #define SRCU_INTERVAL		1
322dad81a20SPaul E. McKenney 
3231e9a038bSPaul E. McKenney /*
3241e9a038bSPaul E. McKenney  * Return grace-period delay, zero if there are expedited grace
3251e9a038bSPaul E. McKenney  * periods pending, SRCU_INTERVAL otherwise.
3261e9a038bSPaul E. McKenney  */
3271e9a038bSPaul E. McKenney static unsigned long srcu_get_delay(struct srcu_struct *sp)
3281e9a038bSPaul E. McKenney {
3291e9a038bSPaul E. McKenney 	if (ULONG_CMP_LT(READ_ONCE(sp->srcu_gp_seq),
3301e9a038bSPaul E. McKenney 			 READ_ONCE(sp->srcu_gp_seq_needed_exp)))
3311e9a038bSPaul E. McKenney 		return 0;
3321e9a038bSPaul E. McKenney 	return SRCU_INTERVAL;
3331e9a038bSPaul E. McKenney }
3341e9a038bSPaul E. McKenney 
335dad81a20SPaul E. McKenney /**
336dad81a20SPaul E. McKenney  * cleanup_srcu_struct - deconstruct a sleep-RCU structure
337dad81a20SPaul E. McKenney  * @sp: structure to clean up.
338dad81a20SPaul E. McKenney  *
339dad81a20SPaul E. McKenney  * Must invoke this after you are finished using a given srcu_struct that
340dad81a20SPaul E. McKenney  * was initialized via init_srcu_struct(), else you leak memory.
341dad81a20SPaul E. McKenney  */
342dad81a20SPaul E. McKenney void cleanup_srcu_struct(struct srcu_struct *sp)
343dad81a20SPaul E. McKenney {
344da915ad5SPaul E. McKenney 	int cpu;
345da915ad5SPaul E. McKenney 
3461e9a038bSPaul E. McKenney 	if (WARN_ON(!srcu_get_delay(sp)))
3471e9a038bSPaul E. McKenney 		return; /* Leakage unless caller handles error. */
348dad81a20SPaul E. McKenney 	if (WARN_ON(srcu_readers_active(sp)))
349dad81a20SPaul E. McKenney 		return; /* Leakage unless caller handles error. */
350dad81a20SPaul E. McKenney 	flush_delayed_work(&sp->work);
351da915ad5SPaul E. McKenney 	for_each_possible_cpu(cpu)
352da915ad5SPaul E. McKenney 		flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
353da915ad5SPaul E. McKenney 	if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
354da915ad5SPaul E. McKenney 	    WARN_ON(srcu_readers_active(sp))) {
355da915ad5SPaul E. McKenney 		pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
356dad81a20SPaul E. McKenney 		return; /* Caller forgot to stop doing call_srcu()? */
357dad81a20SPaul E. McKenney 	}
358da915ad5SPaul E. McKenney 	free_percpu(sp->sda);
359da915ad5SPaul E. McKenney 	sp->sda = NULL;
360dad81a20SPaul E. McKenney }
361dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
362dad81a20SPaul E. McKenney 
363dad81a20SPaul E. McKenney /*
364dad81a20SPaul E. McKenney  * Counts the new reader in the appropriate per-CPU element of the
365cdf7abc4SPaolo Bonzini  * srcu_struct.
366dad81a20SPaul E. McKenney  * Returns an index that must be passed to the matching srcu_read_unlock().
367dad81a20SPaul E. McKenney  */
368dad81a20SPaul E. McKenney int __srcu_read_lock(struct srcu_struct *sp)
369dad81a20SPaul E. McKenney {
370dad81a20SPaul E. McKenney 	int idx;
371dad81a20SPaul E. McKenney 
372da915ad5SPaul E. McKenney 	idx = READ_ONCE(sp->srcu_idx) & 0x1;
373cdf7abc4SPaolo Bonzini 	this_cpu_inc(sp->sda->srcu_lock_count[idx]);
374dad81a20SPaul E. McKenney 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
375dad81a20SPaul E. McKenney 	return idx;
376dad81a20SPaul E. McKenney }
377dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(__srcu_read_lock);
378dad81a20SPaul E. McKenney 
379dad81a20SPaul E. McKenney /*
380dad81a20SPaul E. McKenney  * Removes the count for the old reader from the appropriate per-CPU
381dad81a20SPaul E. McKenney  * element of the srcu_struct.  Note that this may well be a different
382dad81a20SPaul E. McKenney  * CPU than that which was incremented by the corresponding srcu_read_lock().
383dad81a20SPaul E. McKenney  */
384dad81a20SPaul E. McKenney void __srcu_read_unlock(struct srcu_struct *sp, int idx)
385dad81a20SPaul E. McKenney {
386dad81a20SPaul E. McKenney 	smp_mb(); /* C */  /* Avoid leaking the critical section. */
387da915ad5SPaul E. McKenney 	this_cpu_inc(sp->sda->srcu_unlock_count[idx]);
388dad81a20SPaul E. McKenney }
389dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(__srcu_read_unlock);
390dad81a20SPaul E. McKenney 
391dad81a20SPaul E. McKenney /*
392dad81a20SPaul E. McKenney  * We use an adaptive strategy for synchronize_srcu() and especially for
393dad81a20SPaul E. McKenney  * synchronize_srcu_expedited().  We spin for a fixed time period
394dad81a20SPaul E. McKenney  * (defined below) to allow SRCU readers to exit their read-side critical
395dad81a20SPaul E. McKenney  * sections.  If there are still some readers after a few microseconds,
396dad81a20SPaul E. McKenney  * we repeatedly block for 1-millisecond time periods.
397dad81a20SPaul E. McKenney  */
398dad81a20SPaul E. McKenney #define SRCU_RETRY_CHECK_DELAY		5
399dad81a20SPaul E. McKenney 
400dad81a20SPaul E. McKenney /*
401dad81a20SPaul E. McKenney  * Start an SRCU grace period.
402dad81a20SPaul E. McKenney  */
403dad81a20SPaul E. McKenney static void srcu_gp_start(struct srcu_struct *sp)
404dad81a20SPaul E. McKenney {
405da915ad5SPaul E. McKenney 	struct srcu_data *sdp = this_cpu_ptr(sp->sda);
406dad81a20SPaul E. McKenney 	int state;
407dad81a20SPaul E. McKenney 
408da915ad5SPaul E. McKenney 	RCU_LOCKDEP_WARN(!lockdep_is_held(&sp->gp_lock),
409da915ad5SPaul E. McKenney 			 "Invoked srcu_gp_start() without ->gp_lock!");
410da915ad5SPaul E. McKenney 	WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
411da915ad5SPaul E. McKenney 	rcu_segcblist_advance(&sdp->srcu_cblist,
412da915ad5SPaul E. McKenney 			      rcu_seq_current(&sp->srcu_gp_seq));
413da915ad5SPaul E. McKenney 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
414dad81a20SPaul E. McKenney 				       rcu_seq_snap(&sp->srcu_gp_seq));
4152da4b2a7SPaul E. McKenney 	smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
416dad81a20SPaul E. McKenney 	rcu_seq_start(&sp->srcu_gp_seq);
417dad81a20SPaul E. McKenney 	state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
418dad81a20SPaul E. McKenney 	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
419dad81a20SPaul E. McKenney }
420dad81a20SPaul E. McKenney 
421dad81a20SPaul E. McKenney /*
422da915ad5SPaul E. McKenney  * Track online CPUs to guide callback workqueue placement.
423da915ad5SPaul E. McKenney  */
424da915ad5SPaul E. McKenney DEFINE_PER_CPU(bool, srcu_online);
425da915ad5SPaul E. McKenney 
426da915ad5SPaul E. McKenney void srcu_online_cpu(unsigned int cpu)
427da915ad5SPaul E. McKenney {
428da915ad5SPaul E. McKenney 	WRITE_ONCE(per_cpu(srcu_online, cpu), true);
429da915ad5SPaul E. McKenney }
430da915ad5SPaul E. McKenney 
431da915ad5SPaul E. McKenney void srcu_offline_cpu(unsigned int cpu)
432da915ad5SPaul E. McKenney {
433da915ad5SPaul E. McKenney 	WRITE_ONCE(per_cpu(srcu_online, cpu), false);
434da915ad5SPaul E. McKenney }
435da915ad5SPaul E. McKenney 
436da915ad5SPaul E. McKenney /*
437da915ad5SPaul E. McKenney  * Place the workqueue handler on the specified CPU if online, otherwise
438da915ad5SPaul E. McKenney  * just run it whereever.  This is useful for placing workqueue handlers
439da915ad5SPaul E. McKenney  * that are to invoke the specified CPU's callbacks.
440da915ad5SPaul E. McKenney  */
441da915ad5SPaul E. McKenney static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
442da915ad5SPaul E. McKenney 				       struct delayed_work *dwork,
443da915ad5SPaul E. McKenney 				       unsigned long delay)
444da915ad5SPaul E. McKenney {
445da915ad5SPaul E. McKenney 	bool ret;
446da915ad5SPaul E. McKenney 
447da915ad5SPaul E. McKenney 	preempt_disable();
448da915ad5SPaul E. McKenney 	if (READ_ONCE(per_cpu(srcu_online, cpu)))
449da915ad5SPaul E. McKenney 		ret = queue_delayed_work_on(cpu, wq, dwork, delay);
450da915ad5SPaul E. McKenney 	else
451da915ad5SPaul E. McKenney 		ret = queue_delayed_work(wq, dwork, delay);
452da915ad5SPaul E. McKenney 	preempt_enable();
453da915ad5SPaul E. McKenney 	return ret;
454da915ad5SPaul E. McKenney }
455da915ad5SPaul E. McKenney 
456da915ad5SPaul E. McKenney /*
457da915ad5SPaul E. McKenney  * Schedule callback invocation for the specified srcu_data structure,
458da915ad5SPaul E. McKenney  * if possible, on the corresponding CPU.
459da915ad5SPaul E. McKenney  */
460da915ad5SPaul E. McKenney static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
461da915ad5SPaul E. McKenney {
462da915ad5SPaul E. McKenney 	srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq,
463da915ad5SPaul E. McKenney 				   &sdp->work, delay);
464da915ad5SPaul E. McKenney }
465da915ad5SPaul E. McKenney 
466da915ad5SPaul E. McKenney /*
467da915ad5SPaul E. McKenney  * Schedule callback invocation for all srcu_data structures associated
468c7e88067SPaul E. McKenney  * with the specified srcu_node structure that have callbacks for the
469c7e88067SPaul E. McKenney  * just-completed grace period, the one corresponding to idx.  If possible,
470c7e88067SPaul E. McKenney  * schedule this invocation on the corresponding CPUs.
471da915ad5SPaul E. McKenney  */
472c7e88067SPaul E. McKenney static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp,
4731e9a038bSPaul E. McKenney 				  unsigned long mask, unsigned long delay)
474da915ad5SPaul E. McKenney {
475da915ad5SPaul E. McKenney 	int cpu;
476da915ad5SPaul E. McKenney 
477c7e88067SPaul E. McKenney 	for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
478c7e88067SPaul E. McKenney 		if (!(mask & (1 << (cpu - snp->grplo))))
479c7e88067SPaul E. McKenney 			continue;
4801e9a038bSPaul E. McKenney 		srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), delay);
481da915ad5SPaul E. McKenney 	}
482c7e88067SPaul E. McKenney }
483da915ad5SPaul E. McKenney 
484da915ad5SPaul E. McKenney /*
485da915ad5SPaul E. McKenney  * Note the end of an SRCU grace period.  Initiates callback invocation
486da915ad5SPaul E. McKenney  * and starts a new grace period if needed.
487da915ad5SPaul E. McKenney  *
488da915ad5SPaul E. McKenney  * The ->srcu_cb_mutex acquisition does not protect any data, but
489da915ad5SPaul E. McKenney  * instead prevents more than one grace period from starting while we
490da915ad5SPaul E. McKenney  * are initiating callback invocation.  This allows the ->srcu_have_cbs[]
491da915ad5SPaul E. McKenney  * array to have a finite number of elements.
492da915ad5SPaul E. McKenney  */
493da915ad5SPaul E. McKenney static void srcu_gp_end(struct srcu_struct *sp)
494da915ad5SPaul E. McKenney {
4951e9a038bSPaul E. McKenney 	unsigned long cbdelay;
496da915ad5SPaul E. McKenney 	bool cbs;
497da915ad5SPaul E. McKenney 	unsigned long gpseq;
498da915ad5SPaul E. McKenney 	int idx;
499da915ad5SPaul E. McKenney 	int idxnext;
500c7e88067SPaul E. McKenney 	unsigned long mask;
501da915ad5SPaul E. McKenney 	struct srcu_node *snp;
502da915ad5SPaul E. McKenney 
503da915ad5SPaul E. McKenney 	/* Prevent more than one additional grace period. */
504da915ad5SPaul E. McKenney 	mutex_lock(&sp->srcu_cb_mutex);
505da915ad5SPaul E. McKenney 
506da915ad5SPaul E. McKenney 	/* End the current grace period. */
507da915ad5SPaul E. McKenney 	spin_lock_irq(&sp->gp_lock);
508da915ad5SPaul E. McKenney 	idx = rcu_seq_state(sp->srcu_gp_seq);
509da915ad5SPaul E. McKenney 	WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
5101e9a038bSPaul E. McKenney 	cbdelay = srcu_get_delay(sp);
51122607d66SPaul E. McKenney 	sp->srcu_last_gp_end = ktime_get_mono_fast_ns();
512da915ad5SPaul E. McKenney 	rcu_seq_end(&sp->srcu_gp_seq);
513da915ad5SPaul E. McKenney 	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
5141e9a038bSPaul E. McKenney 	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq))
5151e9a038bSPaul E. McKenney 		sp->srcu_gp_seq_needed_exp = gpseq;
516da915ad5SPaul E. McKenney 	spin_unlock_irq(&sp->gp_lock);
517da915ad5SPaul E. McKenney 	mutex_unlock(&sp->srcu_gp_mutex);
518da915ad5SPaul E. McKenney 	/* A new grace period can start at this point.  But only one. */
519da915ad5SPaul E. McKenney 
520da915ad5SPaul E. McKenney 	/* Initiate callback invocation as needed. */
521da915ad5SPaul E. McKenney 	idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
522da915ad5SPaul E. McKenney 	idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
523da915ad5SPaul E. McKenney 	rcu_for_each_node_breadth_first(sp, snp) {
524da915ad5SPaul E. McKenney 		spin_lock_irq(&snp->lock);
525da915ad5SPaul E. McKenney 		cbs = false;
526da915ad5SPaul E. McKenney 		if (snp >= sp->level[rcu_num_lvls - 1])
527da915ad5SPaul E. McKenney 			cbs = snp->srcu_have_cbs[idx] == gpseq;
528da915ad5SPaul E. McKenney 		snp->srcu_have_cbs[idx] = gpseq;
529da915ad5SPaul E. McKenney 		rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
5301e9a038bSPaul E. McKenney 		if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq))
5311e9a038bSPaul E. McKenney 			snp->srcu_gp_seq_needed_exp = gpseq;
532c7e88067SPaul E. McKenney 		mask = snp->srcu_data_have_cbs[idx];
533c7e88067SPaul E. McKenney 		snp->srcu_data_have_cbs[idx] = 0;
534da915ad5SPaul E. McKenney 		spin_unlock_irq(&snp->lock);
535da915ad5SPaul E. McKenney 		if (cbs) {
536da915ad5SPaul E. McKenney 			smp_mb(); /* GP end before CB invocation. */
5371e9a038bSPaul E. McKenney 			srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
538da915ad5SPaul E. McKenney 		}
539da915ad5SPaul E. McKenney 	}
540da915ad5SPaul E. McKenney 
541da915ad5SPaul E. McKenney 	/* Callback initiation done, allow grace periods after next. */
542da915ad5SPaul E. McKenney 	mutex_unlock(&sp->srcu_cb_mutex);
543da915ad5SPaul E. McKenney 
544da915ad5SPaul E. McKenney 	/* Start a new grace period if needed. */
545da915ad5SPaul E. McKenney 	spin_lock_irq(&sp->gp_lock);
546da915ad5SPaul E. McKenney 	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
547da915ad5SPaul E. McKenney 	if (!rcu_seq_state(gpseq) &&
548da915ad5SPaul E. McKenney 	    ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
549da915ad5SPaul E. McKenney 		srcu_gp_start(sp);
550da915ad5SPaul E. McKenney 		spin_unlock_irq(&sp->gp_lock);
551da915ad5SPaul E. McKenney 		/* Throttle expedited grace periods: Should be rare! */
5521e9a038bSPaul E. McKenney 		srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
5531e9a038bSPaul E. McKenney 				    ? 0 : SRCU_INTERVAL);
554da915ad5SPaul E. McKenney 	} else {
555da915ad5SPaul E. McKenney 		spin_unlock_irq(&sp->gp_lock);
556da915ad5SPaul E. McKenney 	}
557da915ad5SPaul E. McKenney }
558da915ad5SPaul E. McKenney 
559da915ad5SPaul E. McKenney /*
5601e9a038bSPaul E. McKenney  * Funnel-locking scheme to scalably mediate many concurrent expedited
5611e9a038bSPaul E. McKenney  * grace-period requests.  This function is invoked for the first known
5621e9a038bSPaul E. McKenney  * expedited request for a grace period that has already been requested,
5631e9a038bSPaul E. McKenney  * but without expediting.  To start a completely new grace period,
5641e9a038bSPaul E. McKenney  * whether expedited or not, use srcu_funnel_gp_start() instead.
5651e9a038bSPaul E. McKenney  */
5661e9a038bSPaul E. McKenney static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
5671e9a038bSPaul E. McKenney 				  unsigned long s)
5681e9a038bSPaul E. McKenney {
5691e9a038bSPaul E. McKenney 	unsigned long flags;
5701e9a038bSPaul E. McKenney 
5711e9a038bSPaul E. McKenney 	for (; snp != NULL; snp = snp->srcu_parent) {
5721e9a038bSPaul E. McKenney 		if (rcu_seq_done(&sp->srcu_gp_seq, s) ||
5731e9a038bSPaul E. McKenney 		    ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
5741e9a038bSPaul E. McKenney 			return;
5751e9a038bSPaul E. McKenney 		spin_lock_irqsave(&snp->lock, flags);
5761e9a038bSPaul E. McKenney 		if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
5771e9a038bSPaul E. McKenney 			spin_unlock_irqrestore(&snp->lock, flags);
5781e9a038bSPaul E. McKenney 			return;
5791e9a038bSPaul E. McKenney 		}
5801e9a038bSPaul E. McKenney 		WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
5811e9a038bSPaul E. McKenney 		spin_unlock_irqrestore(&snp->lock, flags);
5821e9a038bSPaul E. McKenney 	}
5831e9a038bSPaul E. McKenney 	spin_lock_irqsave(&sp->gp_lock, flags);
5841e9a038bSPaul E. McKenney 	if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
5851e9a038bSPaul E. McKenney 		sp->srcu_gp_seq_needed_exp = s;
5861e9a038bSPaul E. McKenney 	spin_unlock_irqrestore(&sp->gp_lock, flags);
5871e9a038bSPaul E. McKenney }
5881e9a038bSPaul E. McKenney 
5891e9a038bSPaul E. McKenney /*
590da915ad5SPaul E. McKenney  * Funnel-locking scheme to scalably mediate many concurrent grace-period
591da915ad5SPaul E. McKenney  * requests.  The winner has to do the work of actually starting grace
592da915ad5SPaul E. McKenney  * period s.  Losers must either ensure that their desired grace-period
593da915ad5SPaul E. McKenney  * number is recorded on at least their leaf srcu_node structure, or they
594da915ad5SPaul E. McKenney  * must take steps to invoke their own callbacks.
595da915ad5SPaul E. McKenney  */
5961e9a038bSPaul E. McKenney static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
5971e9a038bSPaul E. McKenney 				 unsigned long s, bool do_norm)
598da915ad5SPaul E. McKenney {
599da915ad5SPaul E. McKenney 	unsigned long flags;
600da915ad5SPaul E. McKenney 	int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs);
601da915ad5SPaul E. McKenney 	struct srcu_node *snp = sdp->mynode;
602da915ad5SPaul E. McKenney 	unsigned long snp_seq;
603da915ad5SPaul E. McKenney 
604da915ad5SPaul E. McKenney 	/* Each pass through the loop does one level of the srcu_node tree. */
605da915ad5SPaul E. McKenney 	for (; snp != NULL; snp = snp->srcu_parent) {
606da915ad5SPaul E. McKenney 		if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode)
607da915ad5SPaul E. McKenney 			return; /* GP already done and CBs recorded. */
608da915ad5SPaul E. McKenney 		spin_lock_irqsave(&snp->lock, flags);
609da915ad5SPaul E. McKenney 		if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
610da915ad5SPaul E. McKenney 			snp_seq = snp->srcu_have_cbs[idx];
611c7e88067SPaul E. McKenney 			if (snp == sdp->mynode && snp_seq == s)
612c7e88067SPaul E. McKenney 				snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
613da915ad5SPaul E. McKenney 			spin_unlock_irqrestore(&snp->lock, flags);
614da915ad5SPaul E. McKenney 			if (snp == sdp->mynode && snp_seq != s) {
615da915ad5SPaul E. McKenney 				smp_mb(); /* CBs after GP! */
6161e9a038bSPaul E. McKenney 				srcu_schedule_cbs_sdp(sdp, do_norm
6171e9a038bSPaul E. McKenney 							   ? SRCU_INTERVAL
6181e9a038bSPaul E. McKenney 							   : 0);
6191e9a038bSPaul E. McKenney 				return;
620da915ad5SPaul E. McKenney 			}
6211e9a038bSPaul E. McKenney 			if (!do_norm)
6221e9a038bSPaul E. McKenney 				srcu_funnel_exp_start(sp, snp, s);
623da915ad5SPaul E. McKenney 			return;
624da915ad5SPaul E. McKenney 		}
625da915ad5SPaul E. McKenney 		snp->srcu_have_cbs[idx] = s;
626c7e88067SPaul E. McKenney 		if (snp == sdp->mynode)
627c7e88067SPaul E. McKenney 			snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
6281e9a038bSPaul E. McKenney 		if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
6291e9a038bSPaul E. McKenney 			snp->srcu_gp_seq_needed_exp = s;
630da915ad5SPaul E. McKenney 		spin_unlock_irqrestore(&snp->lock, flags);
631da915ad5SPaul E. McKenney 	}
632da915ad5SPaul E. McKenney 
633da915ad5SPaul E. McKenney 	/* Top of tree, must ensure the grace period will be started. */
634da915ad5SPaul E. McKenney 	spin_lock_irqsave(&sp->gp_lock, flags);
635da915ad5SPaul E. McKenney 	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) {
636da915ad5SPaul E. McKenney 		/*
637da915ad5SPaul E. McKenney 		 * Record need for grace period s.  Pair with load
638da915ad5SPaul E. McKenney 		 * acquire setting up for initialization.
639da915ad5SPaul E. McKenney 		 */
640da915ad5SPaul E. McKenney 		smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/
641da915ad5SPaul E. McKenney 	}
6421e9a038bSPaul E. McKenney 	if (!do_norm && ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
6431e9a038bSPaul E. McKenney 		sp->srcu_gp_seq_needed_exp = s;
644da915ad5SPaul E. McKenney 
645da915ad5SPaul E. McKenney 	/* If grace period not already done and none in progress, start it. */
646da915ad5SPaul E. McKenney 	if (!rcu_seq_done(&sp->srcu_gp_seq, s) &&
647da915ad5SPaul E. McKenney 	    rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) {
648da915ad5SPaul E. McKenney 		WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
649da915ad5SPaul E. McKenney 		srcu_gp_start(sp);
650da915ad5SPaul E. McKenney 		queue_delayed_work(system_power_efficient_wq, &sp->work,
6511e9a038bSPaul E. McKenney 				   srcu_get_delay(sp));
652da915ad5SPaul E. McKenney 	}
653da915ad5SPaul E. McKenney 	spin_unlock_irqrestore(&sp->gp_lock, flags);
654da915ad5SPaul E. McKenney }
655da915ad5SPaul E. McKenney 
656da915ad5SPaul E. McKenney /*
657dad81a20SPaul E. McKenney  * Wait until all readers counted by array index idx complete, but
658dad81a20SPaul E. McKenney  * loop an additional time if there is an expedited grace period pending.
659da915ad5SPaul E. McKenney  * The caller must ensure that ->srcu_idx is not changed while checking.
660dad81a20SPaul E. McKenney  */
661dad81a20SPaul E. McKenney static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
662dad81a20SPaul E. McKenney {
663dad81a20SPaul E. McKenney 	for (;;) {
664dad81a20SPaul E. McKenney 		if (srcu_readers_active_idx_check(sp, idx))
665dad81a20SPaul E. McKenney 			return true;
6661e9a038bSPaul E. McKenney 		if (--trycount + !srcu_get_delay(sp) <= 0)
667dad81a20SPaul E. McKenney 			return false;
668dad81a20SPaul E. McKenney 		udelay(SRCU_RETRY_CHECK_DELAY);
669dad81a20SPaul E. McKenney 	}
670dad81a20SPaul E. McKenney }
671dad81a20SPaul E. McKenney 
672dad81a20SPaul E. McKenney /*
673da915ad5SPaul E. McKenney  * Increment the ->srcu_idx counter so that future SRCU readers will
674da915ad5SPaul E. McKenney  * use the other rank of the ->srcu_(un)lock_count[] arrays.  This allows
675dad81a20SPaul E. McKenney  * us to wait for pre-existing readers in a starvation-free manner.
676dad81a20SPaul E. McKenney  */
677dad81a20SPaul E. McKenney static void srcu_flip(struct srcu_struct *sp)
678dad81a20SPaul E. McKenney {
679*881ec9d2SPaul E. McKenney 	/*
680*881ec9d2SPaul E. McKenney 	 * Ensure that if this updater saw a given reader's increment
681*881ec9d2SPaul E. McKenney 	 * from __srcu_read_lock(), that reader was using an old value
682*881ec9d2SPaul E. McKenney 	 * of ->srcu_idx.  Also ensure that if a given reader sees the
683*881ec9d2SPaul E. McKenney 	 * new value of ->srcu_idx, this updater's earlier scans cannot
684*881ec9d2SPaul E. McKenney 	 * have seen that reader's increments (which is OK, because this
685*881ec9d2SPaul E. McKenney 	 * grace period need not wait on that reader).
686*881ec9d2SPaul E. McKenney 	 */
687*881ec9d2SPaul E. McKenney 	smp_mb(); /* E */  /* Pairs with B and C. */
688*881ec9d2SPaul E. McKenney 
689da915ad5SPaul E. McKenney 	WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1);
690dad81a20SPaul E. McKenney 
691dad81a20SPaul E. McKenney 	/*
692dad81a20SPaul E. McKenney 	 * Ensure that if the updater misses an __srcu_read_unlock()
693dad81a20SPaul E. McKenney 	 * increment, that task's next __srcu_read_lock() will see the
694dad81a20SPaul E. McKenney 	 * above counter update.  Note that both this memory barrier
695dad81a20SPaul E. McKenney 	 * and the one in srcu_readers_active_idx_check() provide the
696dad81a20SPaul E. McKenney 	 * guarantee for __srcu_read_lock().
697dad81a20SPaul E. McKenney 	 */
698dad81a20SPaul E. McKenney 	smp_mb(); /* D */  /* Pairs with C. */
699dad81a20SPaul E. McKenney }
700dad81a20SPaul E. McKenney 
701dad81a20SPaul E. McKenney /*
7022da4b2a7SPaul E. McKenney  * If SRCU is likely idle, return true, otherwise return false.
7032da4b2a7SPaul E. McKenney  *
7042da4b2a7SPaul E. McKenney  * Note that it is OK for several current from-idle requests for a new
7052da4b2a7SPaul E. McKenney  * grace period from idle to specify expediting because they will all end
7062da4b2a7SPaul E. McKenney  * up requesting the same grace period anyhow.  So no loss.
7072da4b2a7SPaul E. McKenney  *
7082da4b2a7SPaul E. McKenney  * Note also that if any CPU (including the current one) is still invoking
7092da4b2a7SPaul E. McKenney  * callbacks, this function will nevertheless say "idle".  This is not
7102da4b2a7SPaul E. McKenney  * ideal, but the overhead of checking all CPUs' callback lists is even
7112da4b2a7SPaul E. McKenney  * less ideal, especially on large systems.  Furthermore, the wakeup
7122da4b2a7SPaul E. McKenney  * can happen before the callback is fully removed, so we have no choice
7132da4b2a7SPaul E. McKenney  * but to accept this type of error.
7142da4b2a7SPaul E. McKenney  *
7152da4b2a7SPaul E. McKenney  * This function is also subject to counter-wrap errors, but let's face
7162da4b2a7SPaul E. McKenney  * it, if this function was preempted for enough time for the counters
7172da4b2a7SPaul E. McKenney  * to wrap, it really doesn't matter whether or not we expedite the grace
7182da4b2a7SPaul E. McKenney  * period.  The extra overhead of a needlessly expedited grace period is
7192da4b2a7SPaul E. McKenney  * negligible when amoritized over that time period, and the extra latency
7202da4b2a7SPaul E. McKenney  * of a needlessly non-expedited grace period is similarly negligible.
7212da4b2a7SPaul E. McKenney  */
7222da4b2a7SPaul E. McKenney static bool srcu_might_be_idle(struct srcu_struct *sp)
7232da4b2a7SPaul E. McKenney {
72422607d66SPaul E. McKenney 	unsigned long curseq;
7252da4b2a7SPaul E. McKenney 	unsigned long flags;
7262da4b2a7SPaul E. McKenney 	struct srcu_data *sdp;
72722607d66SPaul E. McKenney 	unsigned long t;
7282da4b2a7SPaul E. McKenney 
7292da4b2a7SPaul E. McKenney 	/* If the local srcu_data structure has callbacks, not idle.  */
7302da4b2a7SPaul E. McKenney 	local_irq_save(flags);
7312da4b2a7SPaul E. McKenney 	sdp = this_cpu_ptr(sp->sda);
7322da4b2a7SPaul E. McKenney 	if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) {
7332da4b2a7SPaul E. McKenney 		local_irq_restore(flags);
7342da4b2a7SPaul E. McKenney 		return false; /* Callbacks already present, so not idle. */
7352da4b2a7SPaul E. McKenney 	}
7362da4b2a7SPaul E. McKenney 	local_irq_restore(flags);
7372da4b2a7SPaul E. McKenney 
7382da4b2a7SPaul E. McKenney 	/*
7392da4b2a7SPaul E. McKenney 	 * No local callbacks, so probabalistically probe global state.
7402da4b2a7SPaul E. McKenney 	 * Exact information would require acquiring locks, which would
7412da4b2a7SPaul E. McKenney 	 * kill scalability, hence the probabalistic nature of the probe.
7422da4b2a7SPaul E. McKenney 	 */
74322607d66SPaul E. McKenney 
74422607d66SPaul E. McKenney 	/* First, see if enough time has passed since the last GP. */
74522607d66SPaul E. McKenney 	t = ktime_get_mono_fast_ns();
74622607d66SPaul E. McKenney 	if (exp_holdoff == 0 ||
74722607d66SPaul E. McKenney 	    time_in_range_open(t, sp->srcu_last_gp_end,
74822607d66SPaul E. McKenney 			       sp->srcu_last_gp_end + exp_holdoff))
74922607d66SPaul E. McKenney 		return false; /* Too soon after last GP. */
75022607d66SPaul E. McKenney 
75122607d66SPaul E. McKenney 	/* Next, check for probable idleness. */
7522da4b2a7SPaul E. McKenney 	curseq = rcu_seq_current(&sp->srcu_gp_seq);
7532da4b2a7SPaul E. McKenney 	smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
7542da4b2a7SPaul E. McKenney 	if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed)))
7552da4b2a7SPaul E. McKenney 		return false; /* Grace period in progress, so not idle. */
7562da4b2a7SPaul E. McKenney 	smp_mb(); /* Order ->srcu_gp_seq with prior access. */
7572da4b2a7SPaul E. McKenney 	if (curseq != rcu_seq_current(&sp->srcu_gp_seq))
7582da4b2a7SPaul E. McKenney 		return false; /* GP # changed, so not idle. */
7592da4b2a7SPaul E. McKenney 	return true; /* With reasonable probability, idle! */
7602da4b2a7SPaul E. McKenney }
7612da4b2a7SPaul E. McKenney 
7622da4b2a7SPaul E. McKenney /*
763da915ad5SPaul E. McKenney  * Enqueue an SRCU callback on the srcu_data structure associated with
764da915ad5SPaul E. McKenney  * the current CPU and the specified srcu_struct structure, initiating
765da915ad5SPaul E. McKenney  * grace-period processing if it is not already running.
766dad81a20SPaul E. McKenney  *
767dad81a20SPaul E. McKenney  * Note that all CPUs must agree that the grace period extended beyond
768dad81a20SPaul E. McKenney  * all pre-existing SRCU read-side critical section.  On systems with
769dad81a20SPaul E. McKenney  * more than one CPU, this means that when "func()" is invoked, each CPU
770dad81a20SPaul E. McKenney  * is guaranteed to have executed a full memory barrier since the end of
771dad81a20SPaul E. McKenney  * its last corresponding SRCU read-side critical section whose beginning
772dad81a20SPaul E. McKenney  * preceded the call to call_rcu().  It also means that each CPU executing
773dad81a20SPaul E. McKenney  * an SRCU read-side critical section that continues beyond the start of
774dad81a20SPaul E. McKenney  * "func()" must have executed a memory barrier after the call_rcu()
775dad81a20SPaul E. McKenney  * but before the beginning of that SRCU read-side critical section.
776dad81a20SPaul E. McKenney  * Note that these guarantees include CPUs that are offline, idle, or
777dad81a20SPaul E. McKenney  * executing in user mode, as well as CPUs that are executing in the kernel.
778dad81a20SPaul E. McKenney  *
779dad81a20SPaul E. McKenney  * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
780dad81a20SPaul E. McKenney  * resulting SRCU callback function "func()", then both CPU A and CPU
781dad81a20SPaul E. McKenney  * B are guaranteed to execute a full memory barrier during the time
782dad81a20SPaul E. McKenney  * interval between the call to call_rcu() and the invocation of "func()".
783dad81a20SPaul E. McKenney  * This guarantee applies even if CPU A and CPU B are the same CPU (but
784dad81a20SPaul E. McKenney  * again only if the system has more than one CPU).
785dad81a20SPaul E. McKenney  *
786dad81a20SPaul E. McKenney  * Of course, these guarantees apply only for invocations of call_srcu(),
787dad81a20SPaul E. McKenney  * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
788dad81a20SPaul E. McKenney  * srcu_struct structure.
789dad81a20SPaul E. McKenney  */
7901e9a038bSPaul E. McKenney void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
7911e9a038bSPaul E. McKenney 		 rcu_callback_t func, bool do_norm)
792dad81a20SPaul E. McKenney {
793dad81a20SPaul E. McKenney 	unsigned long flags;
7941e9a038bSPaul E. McKenney 	bool needexp = false;
795da915ad5SPaul E. McKenney 	bool needgp = false;
796da915ad5SPaul E. McKenney 	unsigned long s;
797da915ad5SPaul E. McKenney 	struct srcu_data *sdp;
798dad81a20SPaul E. McKenney 
799da915ad5SPaul E. McKenney 	check_init_srcu_struct(sp);
800da915ad5SPaul E. McKenney 	rhp->func = func;
801da915ad5SPaul E. McKenney 	local_irq_save(flags);
802da915ad5SPaul E. McKenney 	sdp = this_cpu_ptr(sp->sda);
803da915ad5SPaul E. McKenney 	spin_lock(&sdp->lock);
804da915ad5SPaul E. McKenney 	rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false);
805da915ad5SPaul E. McKenney 	rcu_segcblist_advance(&sdp->srcu_cblist,
806da915ad5SPaul E. McKenney 			      rcu_seq_current(&sp->srcu_gp_seq));
807da915ad5SPaul E. McKenney 	s = rcu_seq_snap(&sp->srcu_gp_seq);
808da915ad5SPaul E. McKenney 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s);
809da915ad5SPaul E. McKenney 	if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
810da915ad5SPaul E. McKenney 		sdp->srcu_gp_seq_needed = s;
811da915ad5SPaul E. McKenney 		needgp = true;
812dad81a20SPaul E. McKenney 	}
8131e9a038bSPaul E. McKenney 	if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) {
8141e9a038bSPaul E. McKenney 		sdp->srcu_gp_seq_needed_exp = s;
8151e9a038bSPaul E. McKenney 		needexp = true;
8161e9a038bSPaul E. McKenney 	}
817da915ad5SPaul E. McKenney 	spin_unlock_irqrestore(&sdp->lock, flags);
818da915ad5SPaul E. McKenney 	if (needgp)
8191e9a038bSPaul E. McKenney 		srcu_funnel_gp_start(sp, sdp, s, do_norm);
8201e9a038bSPaul E. McKenney 	else if (needexp)
8211e9a038bSPaul E. McKenney 		srcu_funnel_exp_start(sp, sdp->mynode, s);
8221e9a038bSPaul E. McKenney }
8231e9a038bSPaul E. McKenney 
8241e9a038bSPaul E. McKenney void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
8251e9a038bSPaul E. McKenney 	       rcu_callback_t func)
8261e9a038bSPaul E. McKenney {
8271e9a038bSPaul E. McKenney 	__call_srcu(sp, rhp, func, true);
828dad81a20SPaul E. McKenney }
829dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(call_srcu);
830dad81a20SPaul E. McKenney 
831dad81a20SPaul E. McKenney /*
832dad81a20SPaul E. McKenney  * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
833dad81a20SPaul E. McKenney  */
8341e9a038bSPaul E. McKenney static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
835dad81a20SPaul E. McKenney {
836dad81a20SPaul E. McKenney 	struct rcu_synchronize rcu;
837dad81a20SPaul E. McKenney 
838dad81a20SPaul E. McKenney 	RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) ||
839dad81a20SPaul E. McKenney 			 lock_is_held(&rcu_bh_lock_map) ||
840dad81a20SPaul E. McKenney 			 lock_is_held(&rcu_lock_map) ||
841dad81a20SPaul E. McKenney 			 lock_is_held(&rcu_sched_lock_map),
842dad81a20SPaul E. McKenney 			 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
843dad81a20SPaul E. McKenney 
844dad81a20SPaul E. McKenney 	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
845dad81a20SPaul E. McKenney 		return;
846dad81a20SPaul E. McKenney 	might_sleep();
847da915ad5SPaul E. McKenney 	check_init_srcu_struct(sp);
848dad81a20SPaul E. McKenney 	init_completion(&rcu.completion);
849da915ad5SPaul E. McKenney 	init_rcu_head_on_stack(&rcu.head);
8501e9a038bSPaul E. McKenney 	__call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm);
851dad81a20SPaul E. McKenney 	wait_for_completion(&rcu.completion);
852da915ad5SPaul E. McKenney 	destroy_rcu_head_on_stack(&rcu.head);
853dad81a20SPaul E. McKenney }
854dad81a20SPaul E. McKenney 
855dad81a20SPaul E. McKenney /**
856dad81a20SPaul E. McKenney  * synchronize_srcu_expedited - Brute-force SRCU grace period
857dad81a20SPaul E. McKenney  * @sp: srcu_struct with which to synchronize.
858dad81a20SPaul E. McKenney  *
859dad81a20SPaul E. McKenney  * Wait for an SRCU grace period to elapse, but be more aggressive about
860dad81a20SPaul E. McKenney  * spinning rather than blocking when waiting.
861dad81a20SPaul E. McKenney  *
862dad81a20SPaul E. McKenney  * Note that synchronize_srcu_expedited() has the same deadlock and
863dad81a20SPaul E. McKenney  * memory-ordering properties as does synchronize_srcu().
864dad81a20SPaul E. McKenney  */
865dad81a20SPaul E. McKenney void synchronize_srcu_expedited(struct srcu_struct *sp)
866dad81a20SPaul E. McKenney {
8671e9a038bSPaul E. McKenney 	__synchronize_srcu(sp, rcu_gp_is_normal());
868dad81a20SPaul E. McKenney }
869dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
870dad81a20SPaul E. McKenney 
871dad81a20SPaul E. McKenney /**
872dad81a20SPaul E. McKenney  * synchronize_srcu - wait for prior SRCU read-side critical-section completion
873dad81a20SPaul E. McKenney  * @sp: srcu_struct with which to synchronize.
874dad81a20SPaul E. McKenney  *
875dad81a20SPaul E. McKenney  * Wait for the count to drain to zero of both indexes. To avoid the
876dad81a20SPaul E. McKenney  * possible starvation of synchronize_srcu(), it waits for the count of
877da915ad5SPaul E. McKenney  * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first,
878da915ad5SPaul E. McKenney  * and then flip the srcu_idx and wait for the count of the other index.
879dad81a20SPaul E. McKenney  *
880dad81a20SPaul E. McKenney  * Can block; must be called from process context.
881dad81a20SPaul E. McKenney  *
882dad81a20SPaul E. McKenney  * Note that it is illegal to call synchronize_srcu() from the corresponding
883dad81a20SPaul E. McKenney  * SRCU read-side critical section; doing so will result in deadlock.
884dad81a20SPaul E. McKenney  * However, it is perfectly legal to call synchronize_srcu() on one
885dad81a20SPaul E. McKenney  * srcu_struct from some other srcu_struct's read-side critical section,
886dad81a20SPaul E. McKenney  * as long as the resulting graph of srcu_structs is acyclic.
887dad81a20SPaul E. McKenney  *
888dad81a20SPaul E. McKenney  * There are memory-ordering constraints implied by synchronize_srcu().
889dad81a20SPaul E. McKenney  * On systems with more than one CPU, when synchronize_srcu() returns,
890dad81a20SPaul E. McKenney  * each CPU is guaranteed to have executed a full memory barrier since
891dad81a20SPaul E. McKenney  * the end of its last corresponding SRCU-sched read-side critical section
892dad81a20SPaul E. McKenney  * whose beginning preceded the call to synchronize_srcu().  In addition,
893dad81a20SPaul E. McKenney  * each CPU having an SRCU read-side critical section that extends beyond
894dad81a20SPaul E. McKenney  * the return from synchronize_srcu() is guaranteed to have executed a
895dad81a20SPaul E. McKenney  * full memory barrier after the beginning of synchronize_srcu() and before
896dad81a20SPaul E. McKenney  * the beginning of that SRCU read-side critical section.  Note that these
897dad81a20SPaul E. McKenney  * guarantees include CPUs that are offline, idle, or executing in user mode,
898dad81a20SPaul E. McKenney  * as well as CPUs that are executing in the kernel.
899dad81a20SPaul E. McKenney  *
900dad81a20SPaul E. McKenney  * Furthermore, if CPU A invoked synchronize_srcu(), which returned
901dad81a20SPaul E. McKenney  * to its caller on CPU B, then both CPU A and CPU B are guaranteed
902dad81a20SPaul E. McKenney  * to have executed a full memory barrier during the execution of
903dad81a20SPaul E. McKenney  * synchronize_srcu().  This guarantee applies even if CPU A and CPU B
904dad81a20SPaul E. McKenney  * are the same CPU, but again only if the system has more than one CPU.
905dad81a20SPaul E. McKenney  *
906dad81a20SPaul E. McKenney  * Of course, these memory-ordering guarantees apply only when
907dad81a20SPaul E. McKenney  * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
908dad81a20SPaul E. McKenney  * passed the same srcu_struct structure.
9092da4b2a7SPaul E. McKenney  *
9102da4b2a7SPaul E. McKenney  * If SRCU is likely idle, expedite the first request.  This semantic
9112da4b2a7SPaul E. McKenney  * was provided by Classic SRCU, and is relied upon by its users, so TREE
9122da4b2a7SPaul E. McKenney  * SRCU must also provide it.  Note that detecting idleness is heuristic
9132da4b2a7SPaul E. McKenney  * and subject to both false positives and negatives.
914dad81a20SPaul E. McKenney  */
915dad81a20SPaul E. McKenney void synchronize_srcu(struct srcu_struct *sp)
916dad81a20SPaul E. McKenney {
9172da4b2a7SPaul E. McKenney 	if (srcu_might_be_idle(sp) || rcu_gp_is_expedited())
918dad81a20SPaul E. McKenney 		synchronize_srcu_expedited(sp);
919dad81a20SPaul E. McKenney 	else
9201e9a038bSPaul E. McKenney 		__synchronize_srcu(sp, true);
921dad81a20SPaul E. McKenney }
922dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(synchronize_srcu);
923dad81a20SPaul E. McKenney 
924da915ad5SPaul E. McKenney /*
925da915ad5SPaul E. McKenney  * Callback function for srcu_barrier() use.
926da915ad5SPaul E. McKenney  */
927da915ad5SPaul E. McKenney static void srcu_barrier_cb(struct rcu_head *rhp)
928da915ad5SPaul E. McKenney {
929da915ad5SPaul E. McKenney 	struct srcu_data *sdp;
930da915ad5SPaul E. McKenney 	struct srcu_struct *sp;
931da915ad5SPaul E. McKenney 
932da915ad5SPaul E. McKenney 	sdp = container_of(rhp, struct srcu_data, srcu_barrier_head);
933da915ad5SPaul E. McKenney 	sp = sdp->sp;
934da915ad5SPaul E. McKenney 	if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt))
935da915ad5SPaul E. McKenney 		complete(&sp->srcu_barrier_completion);
936da915ad5SPaul E. McKenney }
937da915ad5SPaul E. McKenney 
938dad81a20SPaul E. McKenney /**
939dad81a20SPaul E. McKenney  * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
940dad81a20SPaul E. McKenney  * @sp: srcu_struct on which to wait for in-flight callbacks.
941dad81a20SPaul E. McKenney  */
942dad81a20SPaul E. McKenney void srcu_barrier(struct srcu_struct *sp)
943dad81a20SPaul E. McKenney {
944da915ad5SPaul E. McKenney 	int cpu;
945da915ad5SPaul E. McKenney 	struct srcu_data *sdp;
946da915ad5SPaul E. McKenney 	unsigned long s = rcu_seq_snap(&sp->srcu_barrier_seq);
947da915ad5SPaul E. McKenney 
948da915ad5SPaul E. McKenney 	check_init_srcu_struct(sp);
949da915ad5SPaul E. McKenney 	mutex_lock(&sp->srcu_barrier_mutex);
950da915ad5SPaul E. McKenney 	if (rcu_seq_done(&sp->srcu_barrier_seq, s)) {
951da915ad5SPaul E. McKenney 		smp_mb(); /* Force ordering following return. */
952da915ad5SPaul E. McKenney 		mutex_unlock(&sp->srcu_barrier_mutex);
953da915ad5SPaul E. McKenney 		return; /* Someone else did our work for us. */
954da915ad5SPaul E. McKenney 	}
955da915ad5SPaul E. McKenney 	rcu_seq_start(&sp->srcu_barrier_seq);
956da915ad5SPaul E. McKenney 	init_completion(&sp->srcu_barrier_completion);
957da915ad5SPaul E. McKenney 
958da915ad5SPaul E. McKenney 	/* Initial count prevents reaching zero until all CBs are posted. */
959da915ad5SPaul E. McKenney 	atomic_set(&sp->srcu_barrier_cpu_cnt, 1);
960da915ad5SPaul E. McKenney 
961da915ad5SPaul E. McKenney 	/*
962da915ad5SPaul E. McKenney 	 * Each pass through this loop enqueues a callback, but only
963da915ad5SPaul E. McKenney 	 * on CPUs already having callbacks enqueued.  Note that if
964da915ad5SPaul E. McKenney 	 * a CPU already has callbacks enqueue, it must have already
965da915ad5SPaul E. McKenney 	 * registered the need for a future grace period, so all we
966da915ad5SPaul E. McKenney 	 * need do is enqueue a callback that will use the same
967da915ad5SPaul E. McKenney 	 * grace period as the last callback already in the queue.
968da915ad5SPaul E. McKenney 	 */
969da915ad5SPaul E. McKenney 	for_each_possible_cpu(cpu) {
970da915ad5SPaul E. McKenney 		sdp = per_cpu_ptr(sp->sda, cpu);
971da915ad5SPaul E. McKenney 		spin_lock_irq(&sdp->lock);
972da915ad5SPaul E. McKenney 		atomic_inc(&sp->srcu_barrier_cpu_cnt);
973da915ad5SPaul E. McKenney 		sdp->srcu_barrier_head.func = srcu_barrier_cb;
974da915ad5SPaul E. McKenney 		if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
975da915ad5SPaul E. McKenney 					   &sdp->srcu_barrier_head, 0))
976da915ad5SPaul E. McKenney 			atomic_dec(&sp->srcu_barrier_cpu_cnt);
977da915ad5SPaul E. McKenney 		spin_unlock_irq(&sdp->lock);
978da915ad5SPaul E. McKenney 	}
979da915ad5SPaul E. McKenney 
980da915ad5SPaul E. McKenney 	/* Remove the initial count, at which point reaching zero can happen. */
981da915ad5SPaul E. McKenney 	if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt))
982da915ad5SPaul E. McKenney 		complete(&sp->srcu_barrier_completion);
983da915ad5SPaul E. McKenney 	wait_for_completion(&sp->srcu_barrier_completion);
984da915ad5SPaul E. McKenney 
985da915ad5SPaul E. McKenney 	rcu_seq_end(&sp->srcu_barrier_seq);
986da915ad5SPaul E. McKenney 	mutex_unlock(&sp->srcu_barrier_mutex);
987dad81a20SPaul E. McKenney }
988dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(srcu_barrier);
989dad81a20SPaul E. McKenney 
990dad81a20SPaul E. McKenney /**
991dad81a20SPaul E. McKenney  * srcu_batches_completed - return batches completed.
992dad81a20SPaul E. McKenney  * @sp: srcu_struct on which to report batch completion.
993dad81a20SPaul E. McKenney  *
994dad81a20SPaul E. McKenney  * Report the number of batches, correlated with, but not necessarily
995dad81a20SPaul E. McKenney  * precisely the same as, the number of grace periods that have elapsed.
996dad81a20SPaul E. McKenney  */
997dad81a20SPaul E. McKenney unsigned long srcu_batches_completed(struct srcu_struct *sp)
998dad81a20SPaul E. McKenney {
999da915ad5SPaul E. McKenney 	return sp->srcu_idx;
1000dad81a20SPaul E. McKenney }
1001dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(srcu_batches_completed);
1002dad81a20SPaul E. McKenney 
1003dad81a20SPaul E. McKenney /*
1004da915ad5SPaul E. McKenney  * Core SRCU state machine.  Push state bits of ->srcu_gp_seq
1005da915ad5SPaul E. McKenney  * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has
1006da915ad5SPaul E. McKenney  * completed in that state.
1007dad81a20SPaul E. McKenney  */
1008da915ad5SPaul E. McKenney static void srcu_advance_state(struct srcu_struct *sp)
1009dad81a20SPaul E. McKenney {
1010dad81a20SPaul E. McKenney 	int idx;
1011dad81a20SPaul E. McKenney 
1012da915ad5SPaul E. McKenney 	mutex_lock(&sp->srcu_gp_mutex);
1013da915ad5SPaul E. McKenney 
1014dad81a20SPaul E. McKenney 	/*
1015dad81a20SPaul E. McKenney 	 * Because readers might be delayed for an extended period after
1016da915ad5SPaul E. McKenney 	 * fetching ->srcu_idx for their index, at any point in time there
1017dad81a20SPaul E. McKenney 	 * might well be readers using both idx=0 and idx=1.  We therefore
1018dad81a20SPaul E. McKenney 	 * need to wait for readers to clear from both index values before
1019dad81a20SPaul E. McKenney 	 * invoking a callback.
1020dad81a20SPaul E. McKenney 	 *
1021dad81a20SPaul E. McKenney 	 * The load-acquire ensures that we see the accesses performed
1022dad81a20SPaul E. McKenney 	 * by the prior grace period.
1023dad81a20SPaul E. McKenney 	 */
1024dad81a20SPaul E. McKenney 	idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
1025dad81a20SPaul E. McKenney 	if (idx == SRCU_STATE_IDLE) {
1026da915ad5SPaul E. McKenney 		spin_lock_irq(&sp->gp_lock);
1027da915ad5SPaul E. McKenney 		if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
1028da915ad5SPaul E. McKenney 			WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq));
1029da915ad5SPaul E. McKenney 			spin_unlock_irq(&sp->gp_lock);
1030da915ad5SPaul E. McKenney 			mutex_unlock(&sp->srcu_gp_mutex);
1031dad81a20SPaul E. McKenney 			return;
1032dad81a20SPaul E. McKenney 		}
1033dad81a20SPaul E. McKenney 		idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
1034dad81a20SPaul E. McKenney 		if (idx == SRCU_STATE_IDLE)
1035dad81a20SPaul E. McKenney 			srcu_gp_start(sp);
1036da915ad5SPaul E. McKenney 		spin_unlock_irq(&sp->gp_lock);
1037da915ad5SPaul E. McKenney 		if (idx != SRCU_STATE_IDLE) {
1038da915ad5SPaul E. McKenney 			mutex_unlock(&sp->srcu_gp_mutex);
1039dad81a20SPaul E. McKenney 			return; /* Someone else started the grace period. */
1040dad81a20SPaul E. McKenney 		}
1041da915ad5SPaul E. McKenney 	}
1042dad81a20SPaul E. McKenney 
1043dad81a20SPaul E. McKenney 	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
1044da915ad5SPaul E. McKenney 		idx = 1 ^ (sp->srcu_idx & 1);
1045da915ad5SPaul E. McKenney 		if (!try_check_zero(sp, idx, 1)) {
1046da915ad5SPaul E. McKenney 			mutex_unlock(&sp->srcu_gp_mutex);
1047dad81a20SPaul E. McKenney 			return; /* readers present, retry later. */
1048da915ad5SPaul E. McKenney 		}
1049dad81a20SPaul E. McKenney 		srcu_flip(sp);
1050dad81a20SPaul E. McKenney 		rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2);
1051dad81a20SPaul E. McKenney 	}
1052dad81a20SPaul E. McKenney 
1053dad81a20SPaul E. McKenney 	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
1054dad81a20SPaul E. McKenney 
1055dad81a20SPaul E. McKenney 		/*
1056dad81a20SPaul E. McKenney 		 * SRCU read-side critical sections are normally short,
1057dad81a20SPaul E. McKenney 		 * so check at least twice in quick succession after a flip.
1058dad81a20SPaul E. McKenney 		 */
1059da915ad5SPaul E. McKenney 		idx = 1 ^ (sp->srcu_idx & 1);
1060da915ad5SPaul E. McKenney 		if (!try_check_zero(sp, idx, 2)) {
1061da915ad5SPaul E. McKenney 			mutex_unlock(&sp->srcu_gp_mutex);
1062da915ad5SPaul E. McKenney 			return; /* readers present, retry later. */
1063da915ad5SPaul E. McKenney 		}
1064da915ad5SPaul E. McKenney 		srcu_gp_end(sp);  /* Releases ->srcu_gp_mutex. */
1065dad81a20SPaul E. McKenney 	}
1066dad81a20SPaul E. McKenney }
1067dad81a20SPaul E. McKenney 
1068dad81a20SPaul E. McKenney /*
1069dad81a20SPaul E. McKenney  * Invoke a limited number of SRCU callbacks that have passed through
1070dad81a20SPaul E. McKenney  * their grace period.  If there are more to do, SRCU will reschedule
1071dad81a20SPaul E. McKenney  * the workqueue.  Note that needed memory barriers have been executed
1072dad81a20SPaul E. McKenney  * in this task's context by srcu_readers_active_idx_check().
1073dad81a20SPaul E. McKenney  */
1074da915ad5SPaul E. McKenney static void srcu_invoke_callbacks(struct work_struct *work)
1075dad81a20SPaul E. McKenney {
1076da915ad5SPaul E. McKenney 	bool more;
1077dad81a20SPaul E. McKenney 	struct rcu_cblist ready_cbs;
1078dad81a20SPaul E. McKenney 	struct rcu_head *rhp;
1079da915ad5SPaul E. McKenney 	struct srcu_data *sdp;
1080da915ad5SPaul E. McKenney 	struct srcu_struct *sp;
1081dad81a20SPaul E. McKenney 
1082da915ad5SPaul E. McKenney 	sdp = container_of(work, struct srcu_data, work.work);
1083da915ad5SPaul E. McKenney 	sp = sdp->sp;
1084dad81a20SPaul E. McKenney 	rcu_cblist_init(&ready_cbs);
1085da915ad5SPaul E. McKenney 	spin_lock_irq(&sdp->lock);
1086da915ad5SPaul E. McKenney 	smp_mb(); /* Old grace periods before callback invocation! */
1087da915ad5SPaul E. McKenney 	rcu_segcblist_advance(&sdp->srcu_cblist,
1088da915ad5SPaul E. McKenney 			      rcu_seq_current(&sp->srcu_gp_seq));
1089da915ad5SPaul E. McKenney 	if (sdp->srcu_cblist_invoking ||
1090da915ad5SPaul E. McKenney 	    !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
1091da915ad5SPaul E. McKenney 		spin_unlock_irq(&sdp->lock);
1092da915ad5SPaul E. McKenney 		return;  /* Someone else on the job or nothing to do. */
1093da915ad5SPaul E. McKenney 	}
1094da915ad5SPaul E. McKenney 
1095da915ad5SPaul E. McKenney 	/* We are on the job!  Extract and invoke ready callbacks. */
1096da915ad5SPaul E. McKenney 	sdp->srcu_cblist_invoking = true;
1097da915ad5SPaul E. McKenney 	rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
1098da915ad5SPaul E. McKenney 	spin_unlock_irq(&sdp->lock);
1099dad81a20SPaul E. McKenney 	rhp = rcu_cblist_dequeue(&ready_cbs);
1100dad81a20SPaul E. McKenney 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
1101dad81a20SPaul E. McKenney 		local_bh_disable();
1102dad81a20SPaul E. McKenney 		rhp->func(rhp);
1103dad81a20SPaul E. McKenney 		local_bh_enable();
1104dad81a20SPaul E. McKenney 	}
1105da915ad5SPaul E. McKenney 
1106da915ad5SPaul E. McKenney 	/*
1107da915ad5SPaul E. McKenney 	 * Update counts, accelerate new callbacks, and if needed,
1108da915ad5SPaul E. McKenney 	 * schedule another round of callback invocation.
1109da915ad5SPaul E. McKenney 	 */
1110da915ad5SPaul E. McKenney 	spin_lock_irq(&sdp->lock);
1111da915ad5SPaul E. McKenney 	rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs);
1112da915ad5SPaul E. McKenney 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
1113da915ad5SPaul E. McKenney 				       rcu_seq_snap(&sp->srcu_gp_seq));
1114da915ad5SPaul E. McKenney 	sdp->srcu_cblist_invoking = false;
1115da915ad5SPaul E. McKenney 	more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
1116da915ad5SPaul E. McKenney 	spin_unlock_irq(&sdp->lock);
1117da915ad5SPaul E. McKenney 	if (more)
1118da915ad5SPaul E. McKenney 		srcu_schedule_cbs_sdp(sdp, 0);
1119dad81a20SPaul E. McKenney }
1120dad81a20SPaul E. McKenney 
1121dad81a20SPaul E. McKenney /*
1122dad81a20SPaul E. McKenney  * Finished one round of SRCU grace period.  Start another if there are
1123dad81a20SPaul E. McKenney  * more SRCU callbacks queued, otherwise put SRCU into not-running state.
1124dad81a20SPaul E. McKenney  */
1125dad81a20SPaul E. McKenney static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
1126dad81a20SPaul E. McKenney {
1127da915ad5SPaul E. McKenney 	bool pushgp = true;
1128dad81a20SPaul E. McKenney 
1129da915ad5SPaul E. McKenney 	spin_lock_irq(&sp->gp_lock);
1130da915ad5SPaul E. McKenney 	if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
1131da915ad5SPaul E. McKenney 		if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) {
1132da915ad5SPaul E. McKenney 			/* All requests fulfilled, time to go idle. */
1133da915ad5SPaul E. McKenney 			pushgp = false;
1134dad81a20SPaul E. McKenney 		}
1135da915ad5SPaul E. McKenney 	} else if (!rcu_seq_state(sp->srcu_gp_seq)) {
1136da915ad5SPaul E. McKenney 		/* Outstanding request and no GP.  Start one. */
1137da915ad5SPaul E. McKenney 		srcu_gp_start(sp);
1138da915ad5SPaul E. McKenney 	}
1139da915ad5SPaul E. McKenney 	spin_unlock_irq(&sp->gp_lock);
1140dad81a20SPaul E. McKenney 
1141da915ad5SPaul E. McKenney 	if (pushgp)
1142dad81a20SPaul E. McKenney 		queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
1143dad81a20SPaul E. McKenney }
1144dad81a20SPaul E. McKenney 
1145dad81a20SPaul E. McKenney /*
1146dad81a20SPaul E. McKenney  * This is the work-queue function that handles SRCU grace periods.
1147dad81a20SPaul E. McKenney  */
1148dad81a20SPaul E. McKenney void process_srcu(struct work_struct *work)
1149dad81a20SPaul E. McKenney {
1150dad81a20SPaul E. McKenney 	struct srcu_struct *sp;
1151dad81a20SPaul E. McKenney 
1152dad81a20SPaul E. McKenney 	sp = container_of(work, struct srcu_struct, work.work);
1153dad81a20SPaul E. McKenney 
1154da915ad5SPaul E. McKenney 	srcu_advance_state(sp);
11551e9a038bSPaul E. McKenney 	srcu_reschedule(sp, srcu_get_delay(sp));
1156dad81a20SPaul E. McKenney }
1157dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(process_srcu);
11587f6733c3SPaul E. McKenney 
11597f6733c3SPaul E. McKenney void srcutorture_get_gp_data(enum rcutorture_type test_type,
11607f6733c3SPaul E. McKenney 			     struct srcu_struct *sp, int *flags,
11611e9a038bSPaul E. McKenney 			     unsigned long *gpnum, unsigned long *completed)
11627f6733c3SPaul E. McKenney {
11637f6733c3SPaul E. McKenney 	if (test_type != SRCU_FLAVOR)
11647f6733c3SPaul E. McKenney 		return;
11657f6733c3SPaul E. McKenney 	*flags = 0;
11667f6733c3SPaul E. McKenney 	*completed = rcu_seq_ctr(sp->srcu_gp_seq);
11677f6733c3SPaul E. McKenney 	*gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed);
11687f6733c3SPaul E. McKenney }
11697f6733c3SPaul E. McKenney EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
1170