srcutree.c - OpenGrok cross reference for /linux/kernel/rcu/srcutree.c

Lines Matching +full:idle +full:- +full:wait +full:- +full:delay
1 // SPDX-License-Identifier: GPL-2.0+
3  * Sleepable Read-Copy Update mechanism for mutual exclusion.
11  * For detailed explanation of Read-Copy Update mechanism see -
25 #include <linux/delay.h>
33 /* Holdoff in nanoseconds for auto-expediting. */
38 /* Overflow-check frequency.  N bits roughly says every 2**N grace periods. */
63 /* Number of CPUs to trigger init_srcu_struct()-time transition to big. */
71 /* Early-boot callback-management, so early that no lock is required! */
76 static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
117  * Initialize SRCU per-CPU data.  Note that statically allocated
120  * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
128 	 * Initialize the per-CPU srcu_data array, which feeds into the
131 	BUILD_BUG_ON(ARRAY_SIZE(sdp->srcu_lock_count) !=
132 		     ARRAY_SIZE(sdp->srcu_unlock_count));
134 		sdp = per_cpu_ptr(ssp->sda, cpu);
136 		rcu_segcblist_init(&sdp->srcu_cblist);
137 		sdp->srcu_cblist_invoking = false;
138 		sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq;
139 		sdp->srcu_gp_seq_needed_exp = ssp->srcu_sup->srcu_gp_seq;
140 		sdp->srcu_barrier_head.next = &sdp->srcu_barrier_head;
141 		sdp->mynode = NULL;
142 		sdp->cpu = cpu;
143 		INIT_WORK(&sdp->work, srcu_invoke_callbacks);
144 		timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
145 		sdp->ssp = ssp;
177 	ssp->srcu_sup->node = kcalloc(rcu_num_nodes, sizeof(*ssp->srcu_sup->node), gfp_flags);
178 	if (!ssp->srcu_sup->node)
182 	ssp->srcu_sup->level[0] = &ssp->srcu_sup->node[0];
184 		ssp->srcu_sup->level[i] = ssp->srcu_sup->level[i - 1] + num_rcu_lvl[i - 1];
190 		BUILD_BUG_ON(ARRAY_SIZE(snp->srcu_have_cbs) !=
191 			     ARRAY_SIZE(snp->srcu_data_have_cbs));
192 		for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
193 			snp->srcu_have_cbs[i] = SRCU_SNP_INIT_SEQ;
194 			snp->srcu_data_have_cbs[i] = 0;
196 		snp->srcu_gp_seq_needed_exp = SRCU_SNP_INIT_SEQ;
197 		snp->grplo = -1;
198 		snp->grphi = -1;
199 		if (snp == &ssp->srcu_sup->node[0]) {
201 			snp->srcu_parent = NULL;
205 		/* Non-root node. */
206 		if (snp == ssp->srcu_sup->level[level + 1])
208 		snp->srcu_parent = ssp->srcu_sup->level[level - 1] +
209 				   (snp - ssp->srcu_sup->level[level]) /
210 				   levelspread[level - 1];
214 	 * Initialize the per-CPU srcu_data array, which feeds into the
217 	level = rcu_num_lvls - 1;
218 	snp_first = ssp->srcu_sup->level[level];
220 		sdp = per_cpu_ptr(ssp->sda, cpu);
221 		sdp->mynode = &snp_first[cpu / levelspread[level]];
222 		for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
223 			if (snp->grplo < 0)
224 				snp->grplo = cpu;
225 			snp->grphi = cpu;
227 		sdp->grpmask = 1UL << (cpu - sdp->mynode->grplo);
229 	smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_WAIT_BARRIER);
234  * Initialize non-compile-time initialized fields, including the
236  * tells us that ->sda has already been wired up to srcu_data.
241 		ssp->srcu_sup = kzalloc(sizeof(*ssp->srcu_sup), GFP_KERNEL);
242 	if (!ssp->srcu_sup)
243 		return -ENOMEM;
245 		spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
246 	ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL;
247 	ssp->srcu_sup->node = NULL;
248 	mutex_init(&ssp->srcu_sup->srcu_cb_mutex);
249 	mutex_init(&ssp->srcu_sup->srcu_gp_mutex);
250 	ssp->srcu_idx = 0;
251 	ssp->srcu_sup->srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL;
252 	ssp->srcu_sup->srcu_barrier_seq = 0;
253 	mutex_init(&ssp->srcu_sup->srcu_barrier_mutex);
254 	atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
255 	INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
256 	ssp->srcu_sup->sda_is_static = is_static;
258 		ssp->sda = alloc_percpu(struct srcu_data);
259 	if (!ssp->sda)
262 	ssp->srcu_sup->srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL;
263 	ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns();
264 	if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) {
267 		WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG);
269 	ssp->srcu_sup->srcu_ssp = ssp;
270 	smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed,
276 		free_percpu(ssp->sda);
277 		ssp->sda = NULL;
281 		kfree(ssp->srcu_sup);
282 		ssp->srcu_sup = NULL;
284 	return -ENOMEM;
292 	/* Don't re-initialize a lock while it is held. */
294 	lockdep_init_map(&ssp->dep_map, name, key, 0);
302  * init_srcu_struct - initialize a sleep-RCU structure
322 	lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
323 	smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC);
333 	/* Double-checked locking on ->srcu_size-state. */
334 	if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL)
336 	spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
337 	if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) {
338 		spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
342 	spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
346  * Check to see if the just-encountered contention event justifies
353 	if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_sup->srcu_size_state)
356 	if (ssp->srcu_sup->srcu_size_jiffies != j) {
357 		ssp->srcu_sup->srcu_size_jiffies = j;
358 		ssp->srcu_sup->srcu_n_lock_retries = 0;
360 	if (++ssp->srcu_sup->srcu_n_lock_retries <= small_contention_lim)
366  * Acquire the specified srcu_data structure's ->lock, but check for
373 	struct srcu_struct *ssp = sdp->ssp;
377 	spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
379 	spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags);
384  * Acquire the specified srcu_struct structure's ->lock, but check for
391 	if (spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags))
393 	spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
398  * First-use initialization of statically allocated srcu_struct
400  * done with compile-time initialization, so this check is added
401  * to each update-side SRCU primitive.  Use ssp->lock, which -is-
402  * compile-time initialized, to resolve races involving multiple
403  * CPUs trying to garner first-use privileges.
410 	if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/
412 	spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
413 	if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq_needed)) {
414 		spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
418 	spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
426 	struct srcu_usage *sup = ssp->srcu_sup;
428 	return ULONG_CMP_LT(READ_ONCE(sup->srcu_gp_seq), READ_ONCE(sup->srcu_gp_seq_needed_exp));
432  * Computes approximate total of the readers' ->srcu_lock_count[] values
433  * for the rank of per-CPU counters specified by idx, and returns true if
444 		struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
446 		sum += atomic_long_read(&sdp->srcu_lock_count[idx]);
448 			mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
450 	WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
458  * Returns approximate total of the readers' ->srcu_unlock_count[] values
459  * for the rank of per-CPU counters specified by idx.
468 		struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
470 		sum += atomic_long_read(&sdp->srcu_unlock_count[idx]);
471 		mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
473 	WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
480  * Return true if the number of pre-existing readers is determined to
512 	 * the current ->srcu_idx but not yet have incremented its CPU's
513 	 * ->srcu_lock_count[idx] counter.  In fact, it is possible
515 	 * ->srcu_idx and incrementing ->srcu_lock_count[idx].  And there
521 	 * code for a long time.  That now-preempted updater has already
522 	 * flipped ->srcu_idx (possibly during the preceding grace period),
524 	 * period), and summed up the ->srcu_unlock_count[idx] counters.
526 	 * increment the old ->srcu_idx value's ->srcu_lock_count[idx]
530 	 * the old value of ->srcu_idx and is just about to use that value
531 	 * to index its increment of ->srcu_lock_count[idx].  But as soon as
532 	 * it leaves that SRCU read-side critical section, it will increment
533 	 * ->srcu_unlock_count[idx], which must follow the updater's above
535 	 * an smp_mb() and a later fetch from ->srcu_idx, that task will be
537 	 * ->srcu_unlock_count[idx] in __srcu_read_unlock() is after the
538 	 * smp_mb(), and the fetch from ->srcu_idx in __srcu_read_lock()
540 	 * value of ->srcu_idx until the -second- __srcu_read_lock(),
542 	 * ->srcu_lock_count[idx] for the old value of ->srcu_idx twice,
550 	 * ->srcu_lock_count[idx] for the old index, where Nc is the number
560 	 * comfortably beyond excessive.  Especially on 64-bit systems,
568  * srcu_readers_active - returns true if there are readers. and false
582 		struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
584 		sum += atomic_long_read(&sdp->srcu_lock_count[0]);
585 		sum += atomic_long_read(&sdp->srcu_lock_count[1]);
586 		sum -= atomic_long_read(&sdp->srcu_unlock_count[0]);
587 		sum -= atomic_long_read(&sdp->srcu_unlock_count[1]);
596  * their read-side critical sections.  If there are still some readers
598  * The blocking time is increased as the grace-period age increases,
606 #define SRCU_INTERVAL		1		// Base delay if no expedited GPs pending.
607 #define SRCU_MAX_INTERVAL	10		// Maximum incremental delay from slow readers.
609 #define SRCU_DEFAULT_MAX_NODELAY_PHASE_LO	3UL	// Lowmark on default per-GP-phase
610 							// no-delay instances.
611 #define SRCU_DEFAULT_MAX_NODELAY_PHASE_HI	1000UL	// Highmark on default per-GP-phase
612 							// no-delay instances.
617 // per-GP-phase no-delay instances adjusted to allow non-sleeping poll upto
623 // Maximum per-GP-phase consecutive no-delay instances.
632 // Maximum consecutive no-delay instances.
640  * Return grace-period delay, zero if there are expedited grace
648 	struct srcu_usage *sup = ssp->srcu_sup;
652 	if (rcu_seq_state(READ_ONCE(sup->srcu_gp_seq))) {
653 		j = jiffies - 1;
654 		gpstart = READ_ONCE(sup->srcu_gp_start);
656 			jbase += j - gpstart;
658 			ASSERT_EXCLUSIVE_WRITER(sup->srcu_n_exp_nodelay);
659 			WRITE_ONCE(sup->srcu_n_exp_nodelay, READ_ONCE(sup->srcu_n_exp_nodelay) + 1);
660 			if (READ_ONCE(sup->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
668  * cleanup_srcu_struct - deconstruct a sleep-RCU structure
677 	struct srcu_usage *sup = ssp->srcu_sup;
683 	flush_delayed_work(&sup->work);
685 		struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
687 		del_timer_sync(&sdp->delay_work);
688 		flush_work(&sdp->work);
689 		if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
692 	if (WARN_ON(rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
693 	    WARN_ON(rcu_seq_current(&sup->srcu_gp_seq) != sup->srcu_gp_seq_needed) ||
696 			__func__, ssp, rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)),
697 			rcu_seq_current(&sup->srcu_gp_seq), sup->srcu_gp_seq_needed);
703 	kfree(sup->node);
704 	sup->node = NULL;
705 	sup->srcu_size_state = SRCU_SIZE_SMALL;
706 	if (!sup->sda_is_static) {
707 		free_percpu(ssp->sda);
708 		ssp->sda = NULL;
710 		ssp->srcu_sup = NULL;
723 	/* NMI-unsafe use in NMI is a bad sign, as is multi-bit read_flavor values. */
725 	WARN_ON_ONCE(read_flavor & (read_flavor - 1));
727 	sdp = raw_cpu_ptr(ssp->sda);
728 	old_read_flavor = READ_ONCE(sdp->srcu_reader_flavor);
730 		old_read_flavor = cmpxchg(&sdp->srcu_reader_flavor, 0, read_flavor);
734 	WARN_ONCE(old_read_flavor != read_flavor, "CPU %d old state %d new state %d\n", sdp->cpu, old_read_flavor, read_flavor);
739  * Counts the new reader in the appropriate per-CPU element of the
741  * Returns a guaranteed non-negative index that must be passed to the
748 	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
749 	this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter);
756  * Removes the count for the old reader from the appropriate per-CPU
763 	this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter);
770  * Counts the new reader in the appropriate per-CPU element of the
771  * srcu_struct, but in an NMI-safe manner using RMW atomics.
777 	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
779 	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
780 	atomic_long_inc(&sdp->srcu_lock_count[idx]);
787  * Removes the count for the old reader from the appropriate per-CPU
793 	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
796 	atomic_long_inc(&sdp->srcu_unlock_count[idx]);
809 	lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
810 	WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed));
811 	WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies);
812 	WRITE_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay, 0);
813 	smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
814 	rcu_seq_start(&ssp->srcu_sup->srcu_gp_seq);
815 	state = rcu_seq_state(ssp->srcu_sup->srcu_gp_seq);
824 	queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
828 				       unsigned long delay)
830 	if (!delay) {
831 		queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
835 	timer_reduce(&sdp->delay_work, jiffies + delay);
842 static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
844 	srcu_queue_delayed_work_on(sdp, delay);
850  * just-completed grace period, the one corresponding to idx.  If possible,
854 				  unsigned long mask, unsigned long delay)
858 	for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
859 		if (!(mask & (1UL << (cpu - snp->grplo))))
861 		srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay);
869  * The ->srcu_cb_mutex acquisition does not protect any data, but
871  * are initiating callback invocation.  This allows the ->srcu_have_cbs[]
887 	struct srcu_usage *sup = ssp->srcu_sup;
890 	mutex_lock(&sup->srcu_cb_mutex);
894 	idx = rcu_seq_state(sup->srcu_gp_seq);
899 	WRITE_ONCE(sup->srcu_last_gp_end, ktime_get_mono_fast_ns());
900 	rcu_seq_end(&sup->srcu_gp_seq);
901 	gpseq = rcu_seq_current(&sup->srcu_gp_seq);
902 	if (ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, gpseq))
903 		WRITE_ONCE(sup->srcu_gp_seq_needed_exp, gpseq);
905 	mutex_unlock(&sup->srcu_gp_mutex);
909 	ss_state = smp_load_acquire(&sup->srcu_size_state);
911 		srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, get_boot_cpu_id()),
914 		idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
918 			last_lvl = snp >= sup->level[rcu_num_lvls - 1];
920 				cbs = ss_state < SRCU_SIZE_BIG || snp->srcu_have_cbs[idx] == gpseq;
921 			snp->srcu_have_cbs[idx] = gpseq;
922 			rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
923 			sgsne = snp->srcu_gp_seq_needed_exp;
925 				WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq);
929 				mask = snp->srcu_data_have_cbs[idx];
930 			snp->srcu_data_have_cbs[idx] = 0;
940 			sdp = per_cpu_ptr(ssp->sda, cpu);
942 			if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100))
943 				sdp->srcu_gp_seq_needed = gpseq;
944 			if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100))
945 				sdp->srcu_gp_seq_needed_exp = gpseq;
950 	mutex_unlock(&sup->srcu_cb_mutex);
954 	gpseq = rcu_seq_current(&sup->srcu_gp_seq);
956 	    ULONG_CMP_LT(gpseq, sup->srcu_gp_seq_needed)) {
969 			smp_store_release(&sup->srcu_size_state, ss_state + 1);
974  * Funnel-locking scheme to scalably mediate many concurrent expedited
975  * grace-period requests.  This function is invoked for the first known
987 		for (; snp != NULL; snp = snp->srcu_parent) {
988 			sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp);
989 			if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) ||
993 			sgsne = snp->srcu_gp_seq_needed_exp;
998 			WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
1002 	if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s))
1003 		WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s);
1004 	spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
1008  * Funnel-locking scheme to scalably mediate many concurrent grace-period
1010  * period s.  Losers must either ensure that their desired grace-period
1024 	int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs);
1029 	struct srcu_usage *sup = ssp->srcu_sup;
1032 	if (smp_load_acquire(&sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
1035 		snp_leaf = sdp->mynode;
1039 		for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
1040 			if (WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) && snp != snp_leaf)
1043 			snp_seq = snp->srcu_have_cbs[idx];
1046 					snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
1056 			snp->srcu_have_cbs[idx] = s;
1058 				snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
1059 			sgsne = snp->srcu_gp_seq_needed_exp;
1061 				WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
1067 	if (ULONG_CMP_LT(sup->srcu_gp_seq_needed, s)) {
1072 		smp_store_release(&sup->srcu_gp_seq_needed, s); /*^^^*/
1074 	if (!do_norm && ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, s))
1075 		WRITE_ONCE(sup->srcu_gp_seq_needed_exp, s);
1078 	if (!WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) &&
1079 	    rcu_seq_state(sup->srcu_gp_seq) == SRCU_STATE_IDLE) {
1088 			queue_delayed_work(rcu_gp_wq, &sup->work,
1090 		else if (list_empty(&sup->work.work.entry))
1091 			list_add(&sup->work.work.entry, &srcu_boot_list);
1097  * Wait until all readers counted by array index idx complete, but
1099  * The caller must ensure that ->srcu_idx is not changed while checking.
1110 		if ((--trycount + curdelay) <= 0)
1117  * Increment the ->srcu_idx counter so that future SRCU readers will
1118  * use the other rank of the ->srcu_(un)lock_count[] arrays.  This allows
1119  * us to wait for pre-existing readers in a starvation-free manner.
1124 	 * Because the flip of ->srcu_idx is executed only if the
1126 	 * the ->srcu_unlock_count[] and ->srcu_lock_count[] sums matched
1131 	 * __srcu_read_lock(), that reader was using a value of ->srcu_idx
1136 	 * value of ->srcu_idx.
1138 	 * This sum-equality check and ordering also ensures that if
1140 	 * ->srcu_idx, this updater's earlier scans cannot have seen
1142 	 * this grace period need not wait on that reader.  After all,
1155 	WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter.
1169  * If SRCU is likely idle, in other words, the next SRCU grace period
1173  * Note that it is OK for several current from-idle requests for a new
1174  * grace period from idle to specify expediting because they will all end
1178  * callbacks, this function will nevertheless say "idle".  This is not
1184  * This function is also subject to counter-wrap errors, but let's face
1189  * of a needlessly non-expedited grace period is similarly negligible.
1201 	if (this_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)
1203 	/* If the local srcu_data structure has callbacks, not idle.  */
1204 	sdp = raw_cpu_ptr(ssp->sda);
1206 	if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) {
1208 		return false; /* Callbacks already present, so not idle. */
1220 	tlast = READ_ONCE(ssp->srcu_sup->srcu_last_gp_end);
1226 	curseq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
1227 	smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
1228 	if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed)))
1229 		return false; /* Grace period in progress, so not idle. */
1230 	smp_mb(); /* Order ->srcu_gp_seq with prior access. */
1231 	if (curseq != rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq))
1232 		return false; /* GP # changed, so not idle. */
1233 	return true; /* With reasonable probability, idle! */
1244  * Start an SRCU grace period, and also queue the callback if non-NULL.
1261 	 * SRCU read-side critical section so that the grace-period
1265 	ss_state = smp_load_acquire(&ssp->srcu_sup->srcu_size_state);
1267 		sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
1269 		sdp = raw_cpu_ptr(ssp->sda);
1272 		rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
1309 	s = rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq);
1311 		rcu_segcblist_advance(&sdp->srcu_cblist,
1312 				      rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
1320 		WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s));
1322 	if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
1323 		sdp->srcu_gp_seq_needed = s;
1326 	if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) {
1327 		sdp->srcu_gp_seq_needed_exp = s;
1336 		sdp_mynode = sdp->mynode;
1349  * grace-period processing if it is not already running.
1352  * all pre-existing SRCU read-side critical section.  On systems with
1355  * its last corresponding SRCU read-side critical section whose beginning
1357  * an SRCU read-side critical section that continues beyond the start of
1359  * but before the beginning of that SRCU read-side critical section.
1360  * Note that these guarantees include CPUs that are offline, idle, or
1379 		WRITE_ONCE(rhp->func, srcu_leak_callback);
1383 	rhp->func = func;
1388  * call_srcu() - Queue a callback for invocation after an SRCU grace period
1394  * grace period elapses, in other words after all pre-existing SRCU
1395  * read-side critical sections have completed.  However, the callback
1396  * function might well execute concurrently with other SRCU read-side
1398  * read-side critical sections are delimited by srcu_read_lock() and
1418 	srcu_lock_sync(&ssp->dep_map);
1424 			 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
1447  * synchronize_srcu_expedited - Brute-force SRCU grace period
1450  * Wait for an SRCU grace period to elapse, but be more aggressive about
1454  * memory-ordering properties as does synchronize_srcu().
1463  * synchronize_srcu - wait for prior SRCU read-side critical-section completion
1466  * Wait for the count to drain to zero of both indexes. To avoid the
1468  * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first,
1469  * and then flip the srcu_idx and wait for the count of the other index.
1474  * SRCU read-side critical section; doing so will result in deadlock.
1476  * srcu_struct from some other srcu_struct's read-side critical section,
1479  * There are memory-ordering constraints implied by synchronize_srcu().
1482  * the end of its last corresponding SRCU read-side critical section
1484  * each CPU having an SRCU read-side critical section that extends beyond
1487  * the beginning of that SRCU read-side critical section.  Note that these
1488  * guarantees include CPUs that are offline, idle, or executing in user mode,
1497  * Of course, these memory-ordering guarantees apply only when
1501  * Implementation of these memory-ordering guarantees is similar to
1504  * If SRCU is likely idle as determined by srcu_should_expedite(),
1520  * get_state_synchronize_srcu - Provide an end-of-grace-period cookie
1531 	// Any prior manipulation of SRCU-protected data must happen
1532 	// before the load from ->srcu_gp_seq.
1534 	return rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq);
1539  * start_poll_synchronize_srcu - Provide cookie and start grace period
1555  * poll_state_synchronize_srcu - Has cookie's grace period ended?
1565  * This is more pronounced on 32-bit systems where cookies are 32 bits,
1567  * 25-microsecond expedited SRCU grace periods.  However, a more likely
1569  * one-millisecond SRCU grace periods.  Of course, wrapping in a 64-bit
1575  * a 16-bit cookie, which rcutorture routinely wraps in a matter of a
1582 	    !rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie))
1599 	rhp->next = rhp; // Mark the callback as having been invoked.
1601 	ssp = sdp->ssp;
1602 	if (atomic_dec_and_test(&ssp->srcu_sup->srcu_barrier_cpu_cnt))
1603 		complete(&ssp->srcu_sup->srcu_barrier_completion);
1608  * structure's ->cblist.  but only if that ->cblist already has at least one
1617 	atomic_inc(&ssp->srcu_sup->srcu_barrier_cpu_cnt);
1618 	sdp->srcu_barrier_head.func = srcu_barrier_cb;
1619 	debug_rcu_head_queue(&sdp->srcu_barrier_head);
1620 	if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
1621 				   &sdp->srcu_barrier_head)) {
1622 		debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
1623 		atomic_dec(&ssp->srcu_sup->srcu_barrier_cpu_cnt);
1629  * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
1630  * @ssp: srcu_struct on which to wait for in-flight callbacks.
1636 	unsigned long s = rcu_seq_snap(&ssp->srcu_sup->srcu_barrier_seq);
1639 	mutex_lock(&ssp->srcu_sup->srcu_barrier_mutex);
1640 	if (rcu_seq_done(&ssp->srcu_sup->srcu_barrier_seq, s)) {
1642 		mutex_unlock(&ssp->srcu_sup->srcu_barrier_mutex);
1645 	rcu_seq_start(&ssp->srcu_sup->srcu_barrier_seq);
1646 	init_completion(&ssp->srcu_sup->srcu_barrier_completion);
1649 	atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 1);
1652 	if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
1653 		srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda,	get_boot_cpu_id()));
1656 			srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
1660 	if (atomic_dec_and_test(&ssp->srcu_sup->srcu_barrier_cpu_cnt))
1661 		complete(&ssp->srcu_sup->srcu_barrier_completion);
1662 	wait_for_completion(&ssp->srcu_sup->srcu_barrier_completion);
1664 	rcu_seq_end(&ssp->srcu_sup->srcu_barrier_seq);
1665 	mutex_unlock(&ssp->srcu_sup->srcu_barrier_mutex);
1670  * srcu_batches_completed - return batches completed.
1678 	return READ_ONCE(ssp->srcu_idx);
1683  * Core SRCU state machine.  Push state bits of ->srcu_gp_seq
1691 	mutex_lock(&ssp->srcu_sup->srcu_gp_mutex);
1695 	 * fetching ->srcu_idx for their index, at any point in time there
1697 	 * need to wait for readers to clear from both index values before
1700 	 * The load-acquire ensures that we see the accesses performed
1703 	idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); /* ^^^ */
1705 		spin_lock_irq_rcu_node(ssp->srcu_sup);
1706 		if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
1707 			WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq));
1708 			spin_unlock_irq_rcu_node(ssp->srcu_sup);
1709 			mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
1712 		idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq));
1715 		spin_unlock_irq_rcu_node(ssp->srcu_sup);
1717 			mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
1722 	if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
1723 		idx = 1 ^ (ssp->srcu_idx & 1);
1725 			mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
1729 		spin_lock_irq_rcu_node(ssp->srcu_sup);
1730 		rcu_seq_set_state(&ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2);
1731 		ssp->srcu_sup->srcu_n_exp_nodelay = 0;
1732 		spin_unlock_irq_rcu_node(ssp->srcu_sup);
1735 	if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
1738 		 * SRCU read-side critical sections are normally short,
1741 		idx = 1 ^ (ssp->srcu_idx & 1);
1743 			mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
1746 		ssp->srcu_sup->srcu_n_exp_nodelay = 0;
1747 		srcu_gp_end(ssp);  /* Releases ->srcu_gp_mutex. */
1768 	ssp = sdp->ssp;
1771 	WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL));
1772 	rcu_segcblist_advance(&sdp->srcu_cblist,
1773 			      rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
1775 	 * Although this function is theoretically re-entrant, concurrent
1779 	if (sdp->srcu_cblist_invoking ||
1780 	    !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
1786 	sdp->srcu_cblist_invoking = true;
1787 	rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
1795 		rhp->func(rhp);
1805 	rcu_segcblist_add_len(&sdp->srcu_cblist, -len);
1806 	sdp->srcu_cblist_invoking = false;
1807 	more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
1816  * more SRCU callbacks queued, otherwise put SRCU into not-running state.
1818 static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
1822 	spin_lock_irq_rcu_node(ssp->srcu_sup);
1823 	if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
1824 		if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) {
1825 			/* All requests fulfilled, time to go idle. */
1828 	} else if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq)) {
1832 	spin_unlock_irq_rcu_node(ssp->srcu_sup);
1835 		queue_delayed_work(rcu_gp_wq, &ssp->srcu_sup->work, delay);
1839  * This is the work-queue function that handles SRCU grace periods.
1849 	ssp = sup->srcu_ssp;
1854 		WRITE_ONCE(sup->reschedule_count, 0);
1857 		if (READ_ONCE(sup->reschedule_jiffies) == j) {
1858 			ASSERT_EXCLUSIVE_WRITER(sup->reschedule_count);
1859 			WRITE_ONCE(sup->reschedule_count, READ_ONCE(sup->reschedule_count) + 1);
1860 			if (READ_ONCE(sup->reschedule_count) > srcu_max_nodelay)
1863 			WRITE_ONCE(sup->reschedule_count, 1);
1864 			WRITE_ONCE(sup->reschedule_jiffies, j);
1874 	*gp_seq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
1896 	int ss_state = READ_ONCE(ssp->srcu_sup->srcu_size_state);
1899 	idx = ssp->srcu_idx & 0x1;
1901 		ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1;
1903 		 tt, tf, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq), ss_state,
1905 	if (!ssp->sda) {
1907 		pr_cont(" No per-CPU srcu_data structures (->sda == NULL).\n");
1909 		pr_cont(" per-CPU(idx=%d):", idx);
1916 			sdp = per_cpu_ptr(ssp->sda, cpu);
1917 			u0 = data_race(atomic_long_read(&sdp->srcu_unlock_count[!idx]));
1918 			u1 = data_race(atomic_long_read(&sdp->srcu_unlock_count[idx]));
1926 			l0 = data_race(atomic_long_read(&sdp->srcu_lock_count[!idx]));
1927 			l1 = data_race(atomic_long_read(&sdp->srcu_lock_count[idx]));
1929 			c0 = l0 - u0;
1930 			c1 = l1 - u1;
1933 				"C."[rcu_segcblist_empty(&sdp->srcu_cblist)]);
1948 		pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
1950 		pr_info("\tNon-default retry check delay of %lu us.\n", srcu_retry_check_delay);
1952 		pr_info("\tNon-default max no-delay of %lu.\n", srcu_max_nodelay);
1953 	pr_info("\tMax phase no-delay instances is %lu.\n", srcu_max_nodelay_phase);
1962 	/* Decide on srcu_struct-size strategy. */
1982 		list_del_init(&sup->work.work.entry);
1984 		    sup->srcu_size_state == SRCU_SIZE_SMALL)
1985 			sup->srcu_size_state = SRCU_SIZE_ALLOC;
1986 		queue_work(rcu_gp_wq, &sup->work.work);
1992 /* Initialize any global-scope srcu_struct structures used by this module. */
1997 	struct srcu_struct **sspp = mod->srcu_struct_ptrs;
1999 	for (i = 0; i < mod->num_srcu_structs; i++) {
2001 		ssp->sda = alloc_percpu(struct srcu_data);
2002 		if (WARN_ON_ONCE(!ssp->sda))
2003 			return -ENOMEM;
2008 /* Clean up any global-scope srcu_struct structures used by this module. */
2013 	struct srcu_struct **sspp = mod->srcu_struct_ptrs;
2015 	for (i = 0; i < mod->num_srcu_structs; i++) {
2017 		if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed)) &&
2018 		    !WARN_ON_ONCE(!ssp->srcu_sup->sda_is_static))
2021 			free_percpu(ssp->sda);