Lines Matching +full:idle +full:- +full:wait +full:- +full:delay
1 // SPDX-License-Identifier: GPL-2.0+
3 * Sleepable Read-Copy Update mechanism for mutual exclusion.
11 * For detailed explanation of Read-Copy Update mechanism see -
25 #include <linux/delay.h>
33 /* Holdoff in nanoseconds for auto-expediting. */
38 /* Overflow-check frequency. N bits roughly says every 2**N grace periods. */
63 /* Number of CPUs to trigger init_srcu_struct()-time transition to big. */
71 /* Early-boot callback-management, so early that no lock is required! */
76 static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
117 * Initialize SRCU per-CPU data. Note that statically allocated
120 * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
128 * Initialize the per-CPU srcu_data array, which feeds into the
131 BUILD_BUG_ON(ARRAY_SIZE(sdp->srcu_lock_count) !=
132 ARRAY_SIZE(sdp->srcu_unlock_count));
134 sdp = per_cpu_ptr(ssp->sda, cpu);
136 rcu_segcblist_init(&sdp->srcu_cblist);
137 sdp->srcu_cblist_invoking = false;
138 sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq;
139 sdp->srcu_gp_seq_needed_exp = ssp->srcu_sup->srcu_gp_seq;
140 sdp->srcu_barrier_head.next = &sdp->srcu_barrier_head;
141 sdp->mynode = NULL;
142 sdp->cpu = cpu;
143 INIT_WORK(&sdp->work, srcu_invoke_callbacks);
144 timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
145 sdp->ssp = ssp;
177 ssp->srcu_sup->node = kcalloc(rcu_num_nodes, sizeof(*ssp->srcu_sup->node), gfp_flags);
178 if (!ssp->srcu_sup->node)
182 ssp->srcu_sup->level[0] = &ssp->srcu_sup->node[0];
184 ssp->srcu_sup->level[i] = ssp->srcu_sup->level[i - 1] + num_rcu_lvl[i - 1];
190 BUILD_BUG_ON(ARRAY_SIZE(snp->srcu_have_cbs) !=
191 ARRAY_SIZE(snp->srcu_data_have_cbs));
192 for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
193 snp->srcu_have_cbs[i] = SRCU_SNP_INIT_SEQ;
194 snp->srcu_data_have_cbs[i] = 0;
196 snp->srcu_gp_seq_needed_exp = SRCU_SNP_INIT_SEQ;
197 snp->grplo = -1;
198 snp->grphi = -1;
199 if (snp == &ssp->srcu_sup->node[0]) {
201 snp->srcu_parent = NULL;
205 /* Non-root node. */
206 if (snp == ssp->srcu_sup->level[level + 1])
208 snp->srcu_parent = ssp->srcu_sup->level[level - 1] +
209 (snp - ssp->srcu_sup->level[level]) /
210 levelspread[level - 1];
214 * Initialize the per-CPU srcu_data array, which feeds into the
217 level = rcu_num_lvls - 1;
218 snp_first = ssp->srcu_sup->level[level];
220 sdp = per_cpu_ptr(ssp->sda, cpu);
221 sdp->mynode = &snp_first[cpu / levelspread[level]];
222 for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
223 if (snp->grplo < 0)
224 snp->grplo = cpu;
225 snp->grphi = cpu;
227 sdp->grpmask = 1UL << (cpu - sdp->mynode->grplo);
229 smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_WAIT_BARRIER);
234 * Initialize non-compile-time initialized fields, including the
236 * tells us that ->sda has already been wired up to srcu_data.
241 ssp->srcu_sup = kzalloc(sizeof(*ssp->srcu_sup), GFP_KERNEL);
242 if (!ssp->srcu_sup)
243 return -ENOMEM;
245 spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
246 ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL;
247 ssp->srcu_sup->node = NULL;
248 mutex_init(&ssp->srcu_sup->srcu_cb_mutex);
249 mutex_init(&ssp->srcu_sup->srcu_gp_mutex);
250 ssp->srcu_idx = 0;
251 ssp->srcu_sup->srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL;
252 ssp->srcu_sup->srcu_barrier_seq = 0;
253 mutex_init(&ssp->srcu_sup->srcu_barrier_mutex);
254 atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
255 INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
256 ssp->srcu_sup->sda_is_static = is_static;
258 ssp->sda = alloc_percpu(struct srcu_data);
259 if (!ssp->sda)
262 ssp->srcu_sup->srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL;
263 ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns();
264 if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) {
267 WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG);
269 ssp->srcu_sup->srcu_ssp = ssp;
270 smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed,
276 free_percpu(ssp->sda);
277 ssp->sda = NULL;
281 kfree(ssp->srcu_sup);
282 ssp->srcu_sup = NULL;
284 return -ENOMEM;
292 /* Don't re-initialize a lock while it is held. */
294 lockdep_init_map(&ssp->dep_map, name, key, 0);
302 * init_srcu_struct - initialize a sleep-RCU structure
322 lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
323 smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC);
333 /* Double-checked locking on ->srcu_size-state. */
334 if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL)
336 spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
337 if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) {
338 spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
342 spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
346 * Check to see if the just-encountered contention event justifies
353 if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_sup->srcu_size_state)
356 if (ssp->srcu_sup->srcu_size_jiffies != j) {
357 ssp->srcu_sup->srcu_size_jiffies = j;
358 ssp->srcu_sup->srcu_n_lock_retries = 0;
360 if (++ssp->srcu_sup->srcu_n_lock_retries <= small_contention_lim)
366 * Acquire the specified srcu_data structure's ->lock, but check for
373 struct srcu_struct *ssp = sdp->ssp;
377 spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
379 spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags);
384 * Acquire the specified srcu_struct structure's ->lock, but check for
391 if (spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags))
393 spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
398 * First-use initialization of statically allocated srcu_struct
400 * done with compile-time initialization, so this check is added
401 * to each update-side SRCU primitive. Use ssp->lock, which -is-
402 * compile-time initialized, to resolve races involving multiple
403 * CPUs trying to garner first-use privileges.
410 if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/
412 spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
413 if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq_needed)) {
414 spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
418 spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
426 struct srcu_usage *sup = ssp->srcu_sup;
428 return ULONG_CMP_LT(READ_ONCE(sup->srcu_gp_seq), READ_ONCE(sup->srcu_gp_seq_needed_exp));
432 * Computes approximate total of the readers' ->srcu_lock_count[] values
433 * for the rank of per-CPU counters specified by idx, and returns true if
444 struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
446 sum += atomic_long_read(&sdp->srcu_lock_count[idx]);
448 mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
450 WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
458 * Returns approximate total of the readers' ->srcu_unlock_count[] values
459 * for the rank of per-CPU counters specified by idx.
468 struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
470 sum += atomic_long_read(&sdp->srcu_unlock_count[idx]);
471 mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
473 WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
480 * Return true if the number of pre-existing readers is determined to
512 * the current ->srcu_idx but not yet have incremented its CPU's
513 * ->srcu_lock_count[idx] counter. In fact, it is possible
515 * ->srcu_idx and incrementing ->srcu_lock_count[idx]. And there
521 * code for a long time. That now-preempted updater has already
522 * flipped ->srcu_idx (possibly during the preceding grace period),
524 * period), and summed up the ->srcu_unlock_count[idx] counters.
526 * increment the old ->srcu_idx value's ->srcu_lock_count[idx]
530 * the old value of ->srcu_idx and is just about to use that value
531 * to index its increment of ->srcu_lock_count[idx]. But as soon as
532 * it leaves that SRCU read-side critical section, it will increment
533 * ->srcu_unlock_count[idx], which must follow the updater's above
535 * an smp_mb() and a later fetch from ->srcu_idx, that task will be
537 * ->srcu_unlock_count[idx] in __srcu_read_unlock() is after the
538 * smp_mb(), and the fetch from ->srcu_idx in __srcu_read_lock()
540 * value of ->srcu_idx until the -second- __srcu_read_lock(),
542 * ->srcu_lock_count[idx] for the old value of ->srcu_idx twice,
550 * ->srcu_lock_count[idx] for the old index, where Nc is the number
560 * comfortably beyond excessive. Especially on 64-bit systems,
568 * srcu_readers_active - returns true if there are readers. and false
582 struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
584 sum += atomic_long_read(&sdp->srcu_lock_count[0]);
585 sum += atomic_long_read(&sdp->srcu_lock_count[1]);
586 sum -= atomic_long_read(&sdp->srcu_unlock_count[0]);
587 sum -= atomic_long_read(&sdp->srcu_unlock_count[1]);
596 * their read-side critical sections. If there are still some readers
598 * The blocking time is increased as the grace-period age increases,
606 #define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending.
607 #define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers.
609 #define SRCU_DEFAULT_MAX_NODELAY_PHASE_LO 3UL // Lowmark on default per-GP-phase
610 // no-delay instances.
611 #define SRCU_DEFAULT_MAX_NODELAY_PHASE_HI 1000UL // Highmark on default per-GP-phase
612 // no-delay instances.
617 // per-GP-phase no-delay instances adjusted to allow non-sleeping poll upto
623 // Maximum per-GP-phase consecutive no-delay instances.
632 // Maximum consecutive no-delay instances.
640 * Return grace-period delay, zero if there are expedited grace
648 struct srcu_usage *sup = ssp->srcu_sup;
652 if (rcu_seq_state(READ_ONCE(sup->srcu_gp_seq))) {
653 j = jiffies - 1;
654 gpstart = READ_ONCE(sup->srcu_gp_start);
656 jbase += j - gpstart;
658 ASSERT_EXCLUSIVE_WRITER(sup->srcu_n_exp_nodelay);
659 WRITE_ONCE(sup->srcu_n_exp_nodelay, READ_ONCE(sup->srcu_n_exp_nodelay) + 1);
660 if (READ_ONCE(sup->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
668 * cleanup_srcu_struct - deconstruct a sleep-RCU structure
677 struct srcu_usage *sup = ssp->srcu_sup;
683 flush_delayed_work(&sup->work);
685 struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
687 del_timer_sync(&sdp->delay_work);
688 flush_work(&sdp->work);
689 if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
692 if (WARN_ON(rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
693 WARN_ON(rcu_seq_current(&sup->srcu_gp_seq) != sup->srcu_gp_seq_needed) ||
696 __func__, ssp, rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)),
697 rcu_seq_current(&sup->srcu_gp_seq), sup->srcu_gp_seq_needed);
703 kfree(sup->node);
704 sup->node = NULL;
705 sup->srcu_size_state = SRCU_SIZE_SMALL;
706 if (!sup->sda_is_static) {
707 free_percpu(ssp->sda);
708 ssp->sda = NULL;
710 ssp->srcu_sup = NULL;
723 /* NMI-unsafe use in NMI is a bad sign, as is multi-bit read_flavor values. */
725 WARN_ON_ONCE(read_flavor & (read_flavor - 1));
727 sdp = raw_cpu_ptr(ssp->sda);
728 old_read_flavor = READ_ONCE(sdp->srcu_reader_flavor);
730 old_read_flavor = cmpxchg(&sdp->srcu_reader_flavor, 0, read_flavor);
734 WARN_ONCE(old_read_flavor != read_flavor, "CPU %d old state %d new state %d\n", sdp->cpu, old_read_flavor, read_flavor);
739 * Counts the new reader in the appropriate per-CPU element of the
741 * Returns a guaranteed non-negative index that must be passed to the
748 idx = READ_ONCE(ssp->srcu_idx) & 0x1;
749 this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter);
756 * Removes the count for the old reader from the appropriate per-CPU
763 this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter);
770 * Counts the new reader in the appropriate per-CPU element of the
771 * srcu_struct, but in an NMI-safe manner using RMW atomics.
777 struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
779 idx = READ_ONCE(ssp->srcu_idx) & 0x1;
780 atomic_long_inc(&sdp->srcu_lock_count[idx]);
787 * Removes the count for the old reader from the appropriate per-CPU
793 struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
796 atomic_long_inc(&sdp->srcu_unlock_count[idx]);
809 lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
810 WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed));
811 WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies);
812 WRITE_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay, 0);
813 smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
814 rcu_seq_start(&ssp->srcu_sup->srcu_gp_seq);
815 state = rcu_seq_state(ssp->srcu_sup->srcu_gp_seq);
824 queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
828 unsigned long delay)
830 if (!delay) {
831 queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
835 timer_reduce(&sdp->delay_work, jiffies + delay);
842 static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
844 srcu_queue_delayed_work_on(sdp, delay);
850 * just-completed grace period, the one corresponding to idx. If possible,
854 unsigned long mask, unsigned long delay)
858 for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
859 if (!(mask & (1UL << (cpu - snp->grplo))))
861 srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay);
869 * The ->srcu_cb_mutex acquisition does not protect any data, but
871 * are initiating callback invocation. This allows the ->srcu_have_cbs[]
887 struct srcu_usage *sup = ssp->srcu_sup;
890 mutex_lock(&sup->srcu_cb_mutex);
894 idx = rcu_seq_state(sup->srcu_gp_seq);
899 WRITE_ONCE(sup->srcu_last_gp_end, ktime_get_mono_fast_ns());
900 rcu_seq_end(&sup->srcu_gp_seq);
901 gpseq = rcu_seq_current(&sup->srcu_gp_seq);
902 if (ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, gpseq))
903 WRITE_ONCE(sup->srcu_gp_seq_needed_exp, gpseq);
905 mutex_unlock(&sup->srcu_gp_mutex);
909 ss_state = smp_load_acquire(&sup->srcu_size_state);
911 srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, get_boot_cpu_id()),
914 idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
918 last_lvl = snp >= sup->level[rcu_num_lvls - 1];
920 cbs = ss_state < SRCU_SIZE_BIG || snp->srcu_have_cbs[idx] == gpseq;
921 snp->srcu_have_cbs[idx] = gpseq;
922 rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
923 sgsne = snp->srcu_gp_seq_needed_exp;
925 WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq);
929 mask = snp->srcu_data_have_cbs[idx];
930 snp->srcu_data_have_cbs[idx] = 0;
940 sdp = per_cpu_ptr(ssp->sda, cpu);
942 if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100))
943 sdp->srcu_gp_seq_needed = gpseq;
944 if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100))
945 sdp->srcu_gp_seq_needed_exp = gpseq;
950 mutex_unlock(&sup->srcu_cb_mutex);
954 gpseq = rcu_seq_current(&sup->srcu_gp_seq);
956 ULONG_CMP_LT(gpseq, sup->srcu_gp_seq_needed)) {
969 smp_store_release(&sup->srcu_size_state, ss_state + 1);
974 * Funnel-locking scheme to scalably mediate many concurrent expedited
975 * grace-period requests. This function is invoked for the first known
987 for (; snp != NULL; snp = snp->srcu_parent) {
988 sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp);
989 if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) ||
993 sgsne = snp->srcu_gp_seq_needed_exp;
998 WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
1002 if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s))
1003 WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s);
1004 spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
1008 * Funnel-locking scheme to scalably mediate many concurrent grace-period
1010 * period s. Losers must either ensure that their desired grace-period
1024 int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs);
1029 struct srcu_usage *sup = ssp->srcu_sup;
1032 if (smp_load_acquire(&sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
1035 snp_leaf = sdp->mynode;
1039 for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
1040 if (WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) && snp != snp_leaf)
1043 snp_seq = snp->srcu_have_cbs[idx];
1046 snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
1056 snp->srcu_have_cbs[idx] = s;
1058 snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
1059 sgsne = snp->srcu_gp_seq_needed_exp;
1061 WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
1067 if (ULONG_CMP_LT(sup->srcu_gp_seq_needed, s)) {
1072 smp_store_release(&sup->srcu_gp_seq_needed, s); /*^^^*/
1074 if (!do_norm && ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, s))
1075 WRITE_ONCE(sup->srcu_gp_seq_needed_exp, s);
1078 if (!WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) &&
1079 rcu_seq_state(sup->srcu_gp_seq) == SRCU_STATE_IDLE) {
1088 queue_delayed_work(rcu_gp_wq, &sup->work,
1090 else if (list_empty(&sup->work.work.entry))
1091 list_add(&sup->work.work.entry, &srcu_boot_list);
1097 * Wait until all readers counted by array index idx complete, but
1099 * The caller must ensure that ->srcu_idx is not changed while checking.
1110 if ((--trycount + curdelay) <= 0)
1117 * Increment the ->srcu_idx counter so that future SRCU readers will
1118 * use the other rank of the ->srcu_(un)lock_count[] arrays. This allows
1119 * us to wait for pre-existing readers in a starvation-free manner.
1124 * Because the flip of ->srcu_idx is executed only if the
1126 * the ->srcu_unlock_count[] and ->srcu_lock_count[] sums matched
1131 * __srcu_read_lock(), that reader was using a value of ->srcu_idx
1136 * value of ->srcu_idx.
1138 * This sum-equality check and ordering also ensures that if
1140 * ->srcu_idx, this updater's earlier scans cannot have seen
1142 * this grace period need not wait on that reader. After all,
1155 WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter.
1169 * If SRCU is likely idle, in other words, the next SRCU grace period
1173 * Note that it is OK for several current from-idle requests for a new
1174 * grace period from idle to specify expediting because they will all end
1178 * callbacks, this function will nevertheless say "idle". This is not
1184 * This function is also subject to counter-wrap errors, but let's face
1189 * of a needlessly non-expedited grace period is similarly negligible.
1201 if (this_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)
1203 /* If the local srcu_data structure has callbacks, not idle. */
1204 sdp = raw_cpu_ptr(ssp->sda);
1206 if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) {
1208 return false; /* Callbacks already present, so not idle. */
1220 tlast = READ_ONCE(ssp->srcu_sup->srcu_last_gp_end);
1226 curseq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
1227 smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
1228 if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed)))
1229 return false; /* Grace period in progress, so not idle. */
1230 smp_mb(); /* Order ->srcu_gp_seq with prior access. */
1231 if (curseq != rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq))
1232 return false; /* GP # changed, so not idle. */
1233 return true; /* With reasonable probability, idle! */
1244 * Start an SRCU grace period, and also queue the callback if non-NULL.
1261 * SRCU read-side critical section so that the grace-period
1265 ss_state = smp_load_acquire(&ssp->srcu_sup->srcu_size_state);
1267 sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
1269 sdp = raw_cpu_ptr(ssp->sda);
1272 rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
1309 s = rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq);
1311 rcu_segcblist_advance(&sdp->srcu_cblist,
1312 rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
1320 WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s));
1322 if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
1323 sdp->srcu_gp_seq_needed = s;
1326 if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) {
1327 sdp->srcu_gp_seq_needed_exp = s;
1336 sdp_mynode = sdp->mynode;
1349 * grace-period processing if it is not already running.
1352 * all pre-existing SRCU read-side critical section. On systems with
1355 * its last corresponding SRCU read-side critical section whose beginning
1357 * an SRCU read-side critical section that continues beyond the start of
1359 * but before the beginning of that SRCU read-side critical section.
1360 * Note that these guarantees include CPUs that are offline, idle, or
1379 WRITE_ONCE(rhp->func, srcu_leak_callback);
1383 rhp->func = func;
1388 * call_srcu() - Queue a callback for invocation after an SRCU grace period
1394 * grace period elapses, in other words after all pre-existing SRCU
1395 * read-side critical sections have completed. However, the callback
1396 * function might well execute concurrently with other SRCU read-side
1398 * read-side critical sections are delimited by srcu_read_lock() and
1418 srcu_lock_sync(&ssp->dep_map);
1424 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
1447 * synchronize_srcu_expedited - Brute-force SRCU grace period
1450 * Wait for an SRCU grace period to elapse, but be more aggressive about
1454 * memory-ordering properties as does synchronize_srcu().
1463 * synchronize_srcu - wait for prior SRCU read-side critical-section completion
1466 * Wait for the count to drain to zero of both indexes. To avoid the
1468 * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first,
1469 * and then flip the srcu_idx and wait for the count of the other index.
1474 * SRCU read-side critical section; doing so will result in deadlock.
1476 * srcu_struct from some other srcu_struct's read-side critical section,
1479 * There are memory-ordering constraints implied by synchronize_srcu().
1482 * the end of its last corresponding SRCU read-side critical section
1484 * each CPU having an SRCU read-side critical section that extends beyond
1487 * the beginning of that SRCU read-side critical section. Note that these
1488 * guarantees include CPUs that are offline, idle, or executing in user mode,
1497 * Of course, these memory-ordering guarantees apply only when
1501 * Implementation of these memory-ordering guarantees is similar to
1504 * If SRCU is likely idle as determined by srcu_should_expedite(),
1520 * get_state_synchronize_srcu - Provide an end-of-grace-period cookie
1531 // Any prior manipulation of SRCU-protected data must happen
1532 // before the load from ->srcu_gp_seq.
1534 return rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq);
1539 * start_poll_synchronize_srcu - Provide cookie and start grace period
1555 * poll_state_synchronize_srcu - Has cookie's grace period ended?
1565 * This is more pronounced on 32-bit systems where cookies are 32 bits,
1567 * 25-microsecond expedited SRCU grace periods. However, a more likely
1569 * one-millisecond SRCU grace periods. Of course, wrapping in a 64-bit
1575 * a 16-bit cookie, which rcutorture routinely wraps in a matter of a
1582 !rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie))
1599 rhp->next = rhp; // Mark the callback as having been invoked.
1601 ssp = sdp->ssp;
1602 if (atomic_dec_and_test(&ssp->srcu_sup->srcu_barrier_cpu_cnt))
1603 complete(&ssp->srcu_sup->srcu_barrier_completion);
1608 * structure's ->cblist. but only if that ->cblist already has at least one
1617 atomic_inc(&ssp->srcu_sup->srcu_barrier_cpu_cnt);
1618 sdp->srcu_barrier_head.func = srcu_barrier_cb;
1619 debug_rcu_head_queue(&sdp->srcu_barrier_head);
1620 if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
1621 &sdp->srcu_barrier_head)) {
1622 debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
1623 atomic_dec(&ssp->srcu_sup->srcu_barrier_cpu_cnt);
1629 * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
1630 * @ssp: srcu_struct on which to wait for in-flight callbacks.
1636 unsigned long s = rcu_seq_snap(&ssp->srcu_sup->srcu_barrier_seq);
1639 mutex_lock(&ssp->srcu_sup->srcu_barrier_mutex);
1640 if (rcu_seq_done(&ssp->srcu_sup->srcu_barrier_seq, s)) {
1642 mutex_unlock(&ssp->srcu_sup->srcu_barrier_mutex);
1645 rcu_seq_start(&ssp->srcu_sup->srcu_barrier_seq);
1646 init_completion(&ssp->srcu_sup->srcu_barrier_completion);
1649 atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 1);
1652 if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
1653 srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, get_boot_cpu_id()));
1656 srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
1660 if (atomic_dec_and_test(&ssp->srcu_sup->srcu_barrier_cpu_cnt))
1661 complete(&ssp->srcu_sup->srcu_barrier_completion);
1662 wait_for_completion(&ssp->srcu_sup->srcu_barrier_completion);
1664 rcu_seq_end(&ssp->srcu_sup->srcu_barrier_seq);
1665 mutex_unlock(&ssp->srcu_sup->srcu_barrier_mutex);
1670 * srcu_batches_completed - return batches completed.
1678 return READ_ONCE(ssp->srcu_idx);
1683 * Core SRCU state machine. Push state bits of ->srcu_gp_seq
1691 mutex_lock(&ssp->srcu_sup->srcu_gp_mutex);
1695 * fetching ->srcu_idx for their index, at any point in time there
1697 * need to wait for readers to clear from both index values before
1700 * The load-acquire ensures that we see the accesses performed
1703 idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); /* ^^^ */
1705 spin_lock_irq_rcu_node(ssp->srcu_sup);
1706 if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
1707 WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq));
1708 spin_unlock_irq_rcu_node(ssp->srcu_sup);
1709 mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
1712 idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq));
1715 spin_unlock_irq_rcu_node(ssp->srcu_sup);
1717 mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
1722 if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
1723 idx = 1 ^ (ssp->srcu_idx & 1);
1725 mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
1729 spin_lock_irq_rcu_node(ssp->srcu_sup);
1730 rcu_seq_set_state(&ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2);
1731 ssp->srcu_sup->srcu_n_exp_nodelay = 0;
1732 spin_unlock_irq_rcu_node(ssp->srcu_sup);
1735 if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
1738 * SRCU read-side critical sections are normally short,
1741 idx = 1 ^ (ssp->srcu_idx & 1);
1743 mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
1746 ssp->srcu_sup->srcu_n_exp_nodelay = 0;
1747 srcu_gp_end(ssp); /* Releases ->srcu_gp_mutex. */
1768 ssp = sdp->ssp;
1771 WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL));
1772 rcu_segcblist_advance(&sdp->srcu_cblist,
1773 rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
1775 * Although this function is theoretically re-entrant, concurrent
1779 if (sdp->srcu_cblist_invoking ||
1780 !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
1786 sdp->srcu_cblist_invoking = true;
1787 rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
1795 rhp->func(rhp);
1805 rcu_segcblist_add_len(&sdp->srcu_cblist, -len);
1806 sdp->srcu_cblist_invoking = false;
1807 more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
1816 * more SRCU callbacks queued, otherwise put SRCU into not-running state.
1818 static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
1822 spin_lock_irq_rcu_node(ssp->srcu_sup);
1823 if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
1824 if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) {
1825 /* All requests fulfilled, time to go idle. */
1828 } else if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq)) {
1832 spin_unlock_irq_rcu_node(ssp->srcu_sup);
1835 queue_delayed_work(rcu_gp_wq, &ssp->srcu_sup->work, delay);
1839 * This is the work-queue function that handles SRCU grace periods.
1849 ssp = sup->srcu_ssp;
1854 WRITE_ONCE(sup->reschedule_count, 0);
1857 if (READ_ONCE(sup->reschedule_jiffies) == j) {
1858 ASSERT_EXCLUSIVE_WRITER(sup->reschedule_count);
1859 WRITE_ONCE(sup->reschedule_count, READ_ONCE(sup->reschedule_count) + 1);
1860 if (READ_ONCE(sup->reschedule_count) > srcu_max_nodelay)
1863 WRITE_ONCE(sup->reschedule_count, 1);
1864 WRITE_ONCE(sup->reschedule_jiffies, j);
1874 *gp_seq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
1896 int ss_state = READ_ONCE(ssp->srcu_sup->srcu_size_state);
1899 idx = ssp->srcu_idx & 0x1;
1901 ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1;
1903 tt, tf, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq), ss_state,
1905 if (!ssp->sda) {
1907 pr_cont(" No per-CPU srcu_data structures (->sda == NULL).\n");
1909 pr_cont(" per-CPU(idx=%d):", idx);
1916 sdp = per_cpu_ptr(ssp->sda, cpu);
1917 u0 = data_race(atomic_long_read(&sdp->srcu_unlock_count[!idx]));
1918 u1 = data_race(atomic_long_read(&sdp->srcu_unlock_count[idx]));
1926 l0 = data_race(atomic_long_read(&sdp->srcu_lock_count[!idx]));
1927 l1 = data_race(atomic_long_read(&sdp->srcu_lock_count[idx]));
1929 c0 = l0 - u0;
1930 c1 = l1 - u1;
1933 "C."[rcu_segcblist_empty(&sdp->srcu_cblist)]);
1948 pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
1950 pr_info("\tNon-default retry check delay of %lu us.\n", srcu_retry_check_delay);
1952 pr_info("\tNon-default max no-delay of %lu.\n", srcu_max_nodelay);
1953 pr_info("\tMax phase no-delay instances is %lu.\n", srcu_max_nodelay_phase);
1962 /* Decide on srcu_struct-size strategy. */
1982 list_del_init(&sup->work.work.entry);
1984 sup->srcu_size_state == SRCU_SIZE_SMALL)
1985 sup->srcu_size_state = SRCU_SIZE_ALLOC;
1986 queue_work(rcu_gp_wq, &sup->work.work);
1992 /* Initialize any global-scope srcu_struct structures used by this module. */
1997 struct srcu_struct **sspp = mod->srcu_struct_ptrs;
1999 for (i = 0; i < mod->num_srcu_structs; i++) {
2001 ssp->sda = alloc_percpu(struct srcu_data);
2002 if (WARN_ON_ONCE(!ssp->sda))
2003 return -ENOMEM;
2008 /* Clean up any global-scope srcu_struct structures used by this module. */
2013 struct srcu_struct **sspp = mod->srcu_struct_ptrs;
2015 for (i = 0; i < mod->num_srcu_structs; i++) {
2017 if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed)) &&
2018 !WARN_ON_ONCE(!ssp->srcu_sup->sda_is_static))
2021 free_percpu(ssp->sda);