1dad81a20SPaul E. McKenney /* 2dad81a20SPaul E. McKenney * Sleepable Read-Copy Update mechanism for mutual exclusion. 3dad81a20SPaul E. McKenney * 4dad81a20SPaul E. McKenney * This program is free software; you can redistribute it and/or modify 5dad81a20SPaul E. McKenney * it under the terms of the GNU General Public License as published by 6dad81a20SPaul E. McKenney * the Free Software Foundation; either version 2 of the License, or 7dad81a20SPaul E. McKenney * (at your option) any later version. 8dad81a20SPaul E. McKenney * 9dad81a20SPaul E. McKenney * This program is distributed in the hope that it will be useful, 10dad81a20SPaul E. McKenney * but WITHOUT ANY WARRANTY; without even the implied warranty of 11dad81a20SPaul E. McKenney * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12dad81a20SPaul E. McKenney * GNU General Public License for more details. 13dad81a20SPaul E. McKenney * 14dad81a20SPaul E. McKenney * You should have received a copy of the GNU General Public License 15dad81a20SPaul E. McKenney * along with this program; if not, you can access it online at 16dad81a20SPaul E. McKenney * http://www.gnu.org/licenses/gpl-2.0.html. 17dad81a20SPaul E. McKenney * 18dad81a20SPaul E. McKenney * Copyright (C) IBM Corporation, 2006 19dad81a20SPaul E. McKenney * Copyright (C) Fujitsu, 2012 20dad81a20SPaul E. McKenney * 21dad81a20SPaul E. McKenney * Author: Paul McKenney <paulmck@us.ibm.com> 22dad81a20SPaul E. McKenney * Lai Jiangshan <laijs@cn.fujitsu.com> 23dad81a20SPaul E. McKenney * 24dad81a20SPaul E. McKenney * For detailed explanation of Read-Copy Update mechanism see - 25dad81a20SPaul E. McKenney * Documentation/RCU/ *.txt 26dad81a20SPaul E. McKenney * 27dad81a20SPaul E. McKenney */ 28dad81a20SPaul E. McKenney 29dad81a20SPaul E. McKenney #include <linux/export.h> 30dad81a20SPaul E. McKenney #include <linux/mutex.h> 31dad81a20SPaul E. McKenney #include <linux/percpu.h> 32dad81a20SPaul E. McKenney #include <linux/preempt.h> 33dad81a20SPaul E. McKenney #include <linux/rcupdate_wait.h> 34dad81a20SPaul E. McKenney #include <linux/sched.h> 35dad81a20SPaul E. McKenney #include <linux/smp.h> 36dad81a20SPaul E. McKenney #include <linux/delay.h> 3722607d66SPaul E. McKenney #include <linux/module.h> 38dad81a20SPaul E. McKenney #include <linux/srcu.h> 39dad81a20SPaul E. McKenney 40dad81a20SPaul E. McKenney #include "rcu.h" 4145753c5fSIngo Molnar #include "rcu_segcblist.h" 42dad81a20SPaul E. McKenney 43b5fe223aSPaul E. McKenney ulong exp_holdoff = 25 * 1000; /* Holdoff (ns) for auto-expediting. */ 4422607d66SPaul E. McKenney module_param(exp_holdoff, ulong, 0444); 4522607d66SPaul E. McKenney 46da915ad5SPaul E. McKenney static void srcu_invoke_callbacks(struct work_struct *work); 47da915ad5SPaul E. McKenney static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); 48da915ad5SPaul E. McKenney 49da915ad5SPaul E. McKenney /* 50da915ad5SPaul E. McKenney * Initialize SRCU combining tree. Note that statically allocated 51da915ad5SPaul E. McKenney * srcu_struct structures might already have srcu_read_lock() and 52da915ad5SPaul E. McKenney * srcu_read_unlock() running against them. So if the is_static parameter 53da915ad5SPaul E. McKenney * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[]. 54da915ad5SPaul E. McKenney */ 55da915ad5SPaul E. McKenney static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) 56dad81a20SPaul E. McKenney { 57da915ad5SPaul E. McKenney int cpu; 58da915ad5SPaul E. McKenney int i; 59da915ad5SPaul E. McKenney int level = 0; 60da915ad5SPaul E. McKenney int levelspread[RCU_NUM_LVLS]; 61da915ad5SPaul E. McKenney struct srcu_data *sdp; 62da915ad5SPaul E. McKenney struct srcu_node *snp; 63da915ad5SPaul E. McKenney struct srcu_node *snp_first; 64da915ad5SPaul E. McKenney 65da915ad5SPaul E. McKenney /* Work out the overall tree geometry. */ 66da915ad5SPaul E. McKenney sp->level[0] = &sp->node[0]; 67da915ad5SPaul E. McKenney for (i = 1; i < rcu_num_lvls; i++) 68da915ad5SPaul E. McKenney sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1]; 69da915ad5SPaul E. McKenney rcu_init_levelspread(levelspread, num_rcu_lvl); 70da915ad5SPaul E. McKenney 71da915ad5SPaul E. McKenney /* Each pass through this loop initializes one srcu_node structure. */ 72da915ad5SPaul E. McKenney rcu_for_each_node_breadth_first(sp, snp) { 73da915ad5SPaul E. McKenney spin_lock_init(&snp->lock); 74c7e88067SPaul E. McKenney WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != 75c7e88067SPaul E. McKenney ARRAY_SIZE(snp->srcu_data_have_cbs)); 76c7e88067SPaul E. McKenney for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { 77da915ad5SPaul E. McKenney snp->srcu_have_cbs[i] = 0; 78c7e88067SPaul E. McKenney snp->srcu_data_have_cbs[i] = 0; 79c7e88067SPaul E. McKenney } 801e9a038bSPaul E. McKenney snp->srcu_gp_seq_needed_exp = 0; 81da915ad5SPaul E. McKenney snp->grplo = -1; 82da915ad5SPaul E. McKenney snp->grphi = -1; 83da915ad5SPaul E. McKenney if (snp == &sp->node[0]) { 84da915ad5SPaul E. McKenney /* Root node, special case. */ 85da915ad5SPaul E. McKenney snp->srcu_parent = NULL; 86da915ad5SPaul E. McKenney continue; 87da915ad5SPaul E. McKenney } 88da915ad5SPaul E. McKenney 89da915ad5SPaul E. McKenney /* Non-root node. */ 90da915ad5SPaul E. McKenney if (snp == sp->level[level + 1]) 91da915ad5SPaul E. McKenney level++; 92da915ad5SPaul E. McKenney snp->srcu_parent = sp->level[level - 1] + 93da915ad5SPaul E. McKenney (snp - sp->level[level]) / 94da915ad5SPaul E. McKenney levelspread[level - 1]; 95da915ad5SPaul E. McKenney } 96da915ad5SPaul E. McKenney 97da915ad5SPaul E. McKenney /* 98da915ad5SPaul E. McKenney * Initialize the per-CPU srcu_data array, which feeds into the 99da915ad5SPaul E. McKenney * leaves of the srcu_node tree. 100da915ad5SPaul E. McKenney */ 101da915ad5SPaul E. McKenney WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) != 102da915ad5SPaul E. McKenney ARRAY_SIZE(sdp->srcu_unlock_count)); 103da915ad5SPaul E. McKenney level = rcu_num_lvls - 1; 104da915ad5SPaul E. McKenney snp_first = sp->level[level]; 105da915ad5SPaul E. McKenney for_each_possible_cpu(cpu) { 106da915ad5SPaul E. McKenney sdp = per_cpu_ptr(sp->sda, cpu); 107da915ad5SPaul E. McKenney spin_lock_init(&sdp->lock); 108da915ad5SPaul E. McKenney rcu_segcblist_init(&sdp->srcu_cblist); 109da915ad5SPaul E. McKenney sdp->srcu_cblist_invoking = false; 110da915ad5SPaul E. McKenney sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; 1111e9a038bSPaul E. McKenney sdp->srcu_gp_seq_needed_exp = sp->srcu_gp_seq; 112da915ad5SPaul E. McKenney sdp->mynode = &snp_first[cpu / levelspread[level]]; 113da915ad5SPaul E. McKenney for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) { 114da915ad5SPaul E. McKenney if (snp->grplo < 0) 115da915ad5SPaul E. McKenney snp->grplo = cpu; 116da915ad5SPaul E. McKenney snp->grphi = cpu; 117da915ad5SPaul E. McKenney } 118da915ad5SPaul E. McKenney sdp->cpu = cpu; 119da915ad5SPaul E. McKenney INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks); 120da915ad5SPaul E. McKenney sdp->sp = sp; 121c7e88067SPaul E. McKenney sdp->grpmask = 1 << (cpu - sdp->mynode->grplo); 122da915ad5SPaul E. McKenney if (is_static) 123da915ad5SPaul E. McKenney continue; 124da915ad5SPaul E. McKenney 125da915ad5SPaul E. McKenney /* Dynamically allocated, better be no srcu_read_locks()! */ 126da915ad5SPaul E. McKenney for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) { 127da915ad5SPaul E. McKenney sdp->srcu_lock_count[i] = 0; 128da915ad5SPaul E. McKenney sdp->srcu_unlock_count[i] = 0; 129da915ad5SPaul E. McKenney } 130da915ad5SPaul E. McKenney } 131da915ad5SPaul E. McKenney } 132da915ad5SPaul E. McKenney 133da915ad5SPaul E. McKenney /* 134da915ad5SPaul E. McKenney * Initialize non-compile-time initialized fields, including the 135da915ad5SPaul E. McKenney * associated srcu_node and srcu_data structures. The is_static 136da915ad5SPaul E. McKenney * parameter is passed through to init_srcu_struct_nodes(), and 137da915ad5SPaul E. McKenney * also tells us that ->sda has already been wired up to srcu_data. 138da915ad5SPaul E. McKenney */ 139da915ad5SPaul E. McKenney static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static) 140da915ad5SPaul E. McKenney { 141da915ad5SPaul E. McKenney mutex_init(&sp->srcu_cb_mutex); 142da915ad5SPaul E. McKenney mutex_init(&sp->srcu_gp_mutex); 143da915ad5SPaul E. McKenney sp->srcu_idx = 0; 144dad81a20SPaul E. McKenney sp->srcu_gp_seq = 0; 145da915ad5SPaul E. McKenney sp->srcu_barrier_seq = 0; 146da915ad5SPaul E. McKenney mutex_init(&sp->srcu_barrier_mutex); 147da915ad5SPaul E. McKenney atomic_set(&sp->srcu_barrier_cpu_cnt, 0); 148dad81a20SPaul E. McKenney INIT_DELAYED_WORK(&sp->work, process_srcu); 149da915ad5SPaul E. McKenney if (!is_static) 150da915ad5SPaul E. McKenney sp->sda = alloc_percpu(struct srcu_data); 151da915ad5SPaul E. McKenney init_srcu_struct_nodes(sp, is_static); 1521e9a038bSPaul E. McKenney sp->srcu_gp_seq_needed_exp = 0; 15322607d66SPaul E. McKenney sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); 154da915ad5SPaul E. McKenney smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */ 155da915ad5SPaul E. McKenney return sp->sda ? 0 : -ENOMEM; 156dad81a20SPaul E. McKenney } 157dad81a20SPaul E. McKenney 158dad81a20SPaul E. McKenney #ifdef CONFIG_DEBUG_LOCK_ALLOC 159dad81a20SPaul E. McKenney 160dad81a20SPaul E. McKenney int __init_srcu_struct(struct srcu_struct *sp, const char *name, 161dad81a20SPaul E. McKenney struct lock_class_key *key) 162dad81a20SPaul E. McKenney { 163dad81a20SPaul E. McKenney /* Don't re-initialize a lock while it is held. */ 164dad81a20SPaul E. McKenney debug_check_no_locks_freed((void *)sp, sizeof(*sp)); 165dad81a20SPaul E. McKenney lockdep_init_map(&sp->dep_map, name, key, 0); 166da915ad5SPaul E. McKenney spin_lock_init(&sp->gp_lock); 167da915ad5SPaul E. McKenney return init_srcu_struct_fields(sp, false); 168dad81a20SPaul E. McKenney } 169dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(__init_srcu_struct); 170dad81a20SPaul E. McKenney 171dad81a20SPaul E. McKenney #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 172dad81a20SPaul E. McKenney 173dad81a20SPaul E. McKenney /** 174dad81a20SPaul E. McKenney * init_srcu_struct - initialize a sleep-RCU structure 175dad81a20SPaul E. McKenney * @sp: structure to initialize. 176dad81a20SPaul E. McKenney * 177dad81a20SPaul E. McKenney * Must invoke this on a given srcu_struct before passing that srcu_struct 178dad81a20SPaul E. McKenney * to any other function. Each srcu_struct represents a separate domain 179dad81a20SPaul E. McKenney * of SRCU protection. 180dad81a20SPaul E. McKenney */ 181dad81a20SPaul E. McKenney int init_srcu_struct(struct srcu_struct *sp) 182dad81a20SPaul E. McKenney { 183da915ad5SPaul E. McKenney spin_lock_init(&sp->gp_lock); 184da915ad5SPaul E. McKenney return init_srcu_struct_fields(sp, false); 185dad81a20SPaul E. McKenney } 186dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(init_srcu_struct); 187dad81a20SPaul E. McKenney 188dad81a20SPaul E. McKenney #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 189dad81a20SPaul E. McKenney 190dad81a20SPaul E. McKenney /* 191da915ad5SPaul E. McKenney * First-use initialization of statically allocated srcu_struct 192da915ad5SPaul E. McKenney * structure. Wiring up the combining tree is more than can be 193da915ad5SPaul E. McKenney * done with compile-time initialization, so this check is added 194da915ad5SPaul E. McKenney * to each update-side SRCU primitive. Use ->gp_lock, which -is- 195da915ad5SPaul E. McKenney * compile-time initialized, to resolve races involving multiple 196da915ad5SPaul E. McKenney * CPUs trying to garner first-use privileges. 197da915ad5SPaul E. McKenney */ 198da915ad5SPaul E. McKenney static void check_init_srcu_struct(struct srcu_struct *sp) 199da915ad5SPaul E. McKenney { 200da915ad5SPaul E. McKenney unsigned long flags; 201da915ad5SPaul E. McKenney 202da915ad5SPaul E. McKenney WARN_ON_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INIT); 203da915ad5SPaul E. McKenney /* The smp_load_acquire() pairs with the smp_store_release(). */ 204da915ad5SPaul E. McKenney if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ 205da915ad5SPaul E. McKenney return; /* Already initialized. */ 206da915ad5SPaul E. McKenney spin_lock_irqsave(&sp->gp_lock, flags); 207da915ad5SPaul E. McKenney if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { 208da915ad5SPaul E. McKenney spin_unlock_irqrestore(&sp->gp_lock, flags); 209da915ad5SPaul E. McKenney return; 210da915ad5SPaul E. McKenney } 211da915ad5SPaul E. McKenney init_srcu_struct_fields(sp, true); 212da915ad5SPaul E. McKenney spin_unlock_irqrestore(&sp->gp_lock, flags); 213da915ad5SPaul E. McKenney } 214da915ad5SPaul E. McKenney 215da915ad5SPaul E. McKenney /* 216da915ad5SPaul E. McKenney * Returns approximate total of the readers' ->srcu_lock_count[] values 217da915ad5SPaul E. McKenney * for the rank of per-CPU counters specified by idx. 218dad81a20SPaul E. McKenney */ 219dad81a20SPaul E. McKenney static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx) 220dad81a20SPaul E. McKenney { 221dad81a20SPaul E. McKenney int cpu; 222dad81a20SPaul E. McKenney unsigned long sum = 0; 223dad81a20SPaul E. McKenney 224dad81a20SPaul E. McKenney for_each_possible_cpu(cpu) { 225da915ad5SPaul E. McKenney struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); 226dad81a20SPaul E. McKenney 227da915ad5SPaul E. McKenney sum += READ_ONCE(cpuc->srcu_lock_count[idx]); 228dad81a20SPaul E. McKenney } 229dad81a20SPaul E. McKenney return sum; 230dad81a20SPaul E. McKenney } 231dad81a20SPaul E. McKenney 232dad81a20SPaul E. McKenney /* 233da915ad5SPaul E. McKenney * Returns approximate total of the readers' ->srcu_unlock_count[] values 234da915ad5SPaul E. McKenney * for the rank of per-CPU counters specified by idx. 235dad81a20SPaul E. McKenney */ 236dad81a20SPaul E. McKenney static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx) 237dad81a20SPaul E. McKenney { 238dad81a20SPaul E. McKenney int cpu; 239dad81a20SPaul E. McKenney unsigned long sum = 0; 240dad81a20SPaul E. McKenney 241dad81a20SPaul E. McKenney for_each_possible_cpu(cpu) { 242da915ad5SPaul E. McKenney struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); 243dad81a20SPaul E. McKenney 244da915ad5SPaul E. McKenney sum += READ_ONCE(cpuc->srcu_unlock_count[idx]); 245dad81a20SPaul E. McKenney } 246dad81a20SPaul E. McKenney return sum; 247dad81a20SPaul E. McKenney } 248dad81a20SPaul E. McKenney 249dad81a20SPaul E. McKenney /* 250dad81a20SPaul E. McKenney * Return true if the number of pre-existing readers is determined to 251dad81a20SPaul E. McKenney * be zero. 252dad81a20SPaul E. McKenney */ 253dad81a20SPaul E. McKenney static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) 254dad81a20SPaul E. McKenney { 255dad81a20SPaul E. McKenney unsigned long unlocks; 256dad81a20SPaul E. McKenney 257dad81a20SPaul E. McKenney unlocks = srcu_readers_unlock_idx(sp, idx); 258dad81a20SPaul E. McKenney 259dad81a20SPaul E. McKenney /* 260dad81a20SPaul E. McKenney * Make sure that a lock is always counted if the corresponding 261dad81a20SPaul E. McKenney * unlock is counted. Needs to be a smp_mb() as the read side may 262dad81a20SPaul E. McKenney * contain a read from a variable that is written to before the 263dad81a20SPaul E. McKenney * synchronize_srcu() in the write side. In this case smp_mb()s 264dad81a20SPaul E. McKenney * A and B act like the store buffering pattern. 265dad81a20SPaul E. McKenney * 266dad81a20SPaul E. McKenney * This smp_mb() also pairs with smp_mb() C to prevent accesses 267dad81a20SPaul E. McKenney * after the synchronize_srcu() from being executed before the 268dad81a20SPaul E. McKenney * grace period ends. 269dad81a20SPaul E. McKenney */ 270dad81a20SPaul E. McKenney smp_mb(); /* A */ 271dad81a20SPaul E. McKenney 272dad81a20SPaul E. McKenney /* 273dad81a20SPaul E. McKenney * If the locks are the same as the unlocks, then there must have 274dad81a20SPaul E. McKenney * been no readers on this index at some time in between. This does 275dad81a20SPaul E. McKenney * not mean that there are no more readers, as one could have read 276dad81a20SPaul E. McKenney * the current index but not have incremented the lock counter yet. 277dad81a20SPaul E. McKenney * 278*881ec9d2SPaul E. McKenney * So suppose that the updater is preempted here for so long 279*881ec9d2SPaul E. McKenney * that more than ULONG_MAX non-nested readers come and go in 280*881ec9d2SPaul E. McKenney * the meantime. It turns out that this cannot result in overflow 281*881ec9d2SPaul E. McKenney * because if a reader modifies its unlock count after we read it 282*881ec9d2SPaul E. McKenney * above, then that reader's next load of ->srcu_idx is guaranteed 283*881ec9d2SPaul E. McKenney * to get the new value, which will cause it to operate on the 284*881ec9d2SPaul E. McKenney * other bank of counters, where it cannot contribute to the 285*881ec9d2SPaul E. McKenney * overflow of these counters. This means that there is a maximum 286*881ec9d2SPaul E. McKenney * of 2*NR_CPUS increments, which cannot overflow given current 287*881ec9d2SPaul E. McKenney * systems, especially not on 64-bit systems. 288*881ec9d2SPaul E. McKenney * 289*881ec9d2SPaul E. McKenney * OK, how about nesting? This does impose a limit on nesting 290*881ec9d2SPaul E. McKenney * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient, 291*881ec9d2SPaul E. McKenney * especially on 64-bit systems. 292dad81a20SPaul E. McKenney */ 293dad81a20SPaul E. McKenney return srcu_readers_lock_idx(sp, idx) == unlocks; 294dad81a20SPaul E. McKenney } 295dad81a20SPaul E. McKenney 296dad81a20SPaul E. McKenney /** 297dad81a20SPaul E. McKenney * srcu_readers_active - returns true if there are readers. and false 298dad81a20SPaul E. McKenney * otherwise 299dad81a20SPaul E. McKenney * @sp: which srcu_struct to count active readers (holding srcu_read_lock). 300dad81a20SPaul E. McKenney * 301dad81a20SPaul E. McKenney * Note that this is not an atomic primitive, and can therefore suffer 302dad81a20SPaul E. McKenney * severe errors when invoked on an active srcu_struct. That said, it 303dad81a20SPaul E. McKenney * can be useful as an error check at cleanup time. 304dad81a20SPaul E. McKenney */ 305dad81a20SPaul E. McKenney static bool srcu_readers_active(struct srcu_struct *sp) 306dad81a20SPaul E. McKenney { 307dad81a20SPaul E. McKenney int cpu; 308dad81a20SPaul E. McKenney unsigned long sum = 0; 309dad81a20SPaul E. McKenney 310dad81a20SPaul E. McKenney for_each_possible_cpu(cpu) { 311da915ad5SPaul E. McKenney struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); 312dad81a20SPaul E. McKenney 313da915ad5SPaul E. McKenney sum += READ_ONCE(cpuc->srcu_lock_count[0]); 314da915ad5SPaul E. McKenney sum += READ_ONCE(cpuc->srcu_lock_count[1]); 315da915ad5SPaul E. McKenney sum -= READ_ONCE(cpuc->srcu_unlock_count[0]); 316da915ad5SPaul E. McKenney sum -= READ_ONCE(cpuc->srcu_unlock_count[1]); 317dad81a20SPaul E. McKenney } 318dad81a20SPaul E. McKenney return sum; 319dad81a20SPaul E. McKenney } 320dad81a20SPaul E. McKenney 321dad81a20SPaul E. McKenney #define SRCU_INTERVAL 1 322dad81a20SPaul E. McKenney 3231e9a038bSPaul E. McKenney /* 3241e9a038bSPaul E. McKenney * Return grace-period delay, zero if there are expedited grace 3251e9a038bSPaul E. McKenney * periods pending, SRCU_INTERVAL otherwise. 3261e9a038bSPaul E. McKenney */ 3271e9a038bSPaul E. McKenney static unsigned long srcu_get_delay(struct srcu_struct *sp) 3281e9a038bSPaul E. McKenney { 3291e9a038bSPaul E. McKenney if (ULONG_CMP_LT(READ_ONCE(sp->srcu_gp_seq), 3301e9a038bSPaul E. McKenney READ_ONCE(sp->srcu_gp_seq_needed_exp))) 3311e9a038bSPaul E. McKenney return 0; 3321e9a038bSPaul E. McKenney return SRCU_INTERVAL; 3331e9a038bSPaul E. McKenney } 3341e9a038bSPaul E. McKenney 335dad81a20SPaul E. McKenney /** 336dad81a20SPaul E. McKenney * cleanup_srcu_struct - deconstruct a sleep-RCU structure 337dad81a20SPaul E. McKenney * @sp: structure to clean up. 338dad81a20SPaul E. McKenney * 339dad81a20SPaul E. McKenney * Must invoke this after you are finished using a given srcu_struct that 340dad81a20SPaul E. McKenney * was initialized via init_srcu_struct(), else you leak memory. 341dad81a20SPaul E. McKenney */ 342dad81a20SPaul E. McKenney void cleanup_srcu_struct(struct srcu_struct *sp) 343dad81a20SPaul E. McKenney { 344da915ad5SPaul E. McKenney int cpu; 345da915ad5SPaul E. McKenney 3461e9a038bSPaul E. McKenney if (WARN_ON(!srcu_get_delay(sp))) 3471e9a038bSPaul E. McKenney return; /* Leakage unless caller handles error. */ 348dad81a20SPaul E. McKenney if (WARN_ON(srcu_readers_active(sp))) 349dad81a20SPaul E. McKenney return; /* Leakage unless caller handles error. */ 350dad81a20SPaul E. McKenney flush_delayed_work(&sp->work); 351da915ad5SPaul E. McKenney for_each_possible_cpu(cpu) 352da915ad5SPaul E. McKenney flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); 353da915ad5SPaul E. McKenney if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || 354da915ad5SPaul E. McKenney WARN_ON(srcu_readers_active(sp))) { 355da915ad5SPaul E. McKenney pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); 356dad81a20SPaul E. McKenney return; /* Caller forgot to stop doing call_srcu()? */ 357dad81a20SPaul E. McKenney } 358da915ad5SPaul E. McKenney free_percpu(sp->sda); 359da915ad5SPaul E. McKenney sp->sda = NULL; 360dad81a20SPaul E. McKenney } 361dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(cleanup_srcu_struct); 362dad81a20SPaul E. McKenney 363dad81a20SPaul E. McKenney /* 364dad81a20SPaul E. McKenney * Counts the new reader in the appropriate per-CPU element of the 365cdf7abc4SPaolo Bonzini * srcu_struct. 366dad81a20SPaul E. McKenney * Returns an index that must be passed to the matching srcu_read_unlock(). 367dad81a20SPaul E. McKenney */ 368dad81a20SPaul E. McKenney int __srcu_read_lock(struct srcu_struct *sp) 369dad81a20SPaul E. McKenney { 370dad81a20SPaul E. McKenney int idx; 371dad81a20SPaul E. McKenney 372da915ad5SPaul E. McKenney idx = READ_ONCE(sp->srcu_idx) & 0x1; 373cdf7abc4SPaolo Bonzini this_cpu_inc(sp->sda->srcu_lock_count[idx]); 374dad81a20SPaul E. McKenney smp_mb(); /* B */ /* Avoid leaking the critical section. */ 375dad81a20SPaul E. McKenney return idx; 376dad81a20SPaul E. McKenney } 377dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(__srcu_read_lock); 378dad81a20SPaul E. McKenney 379dad81a20SPaul E. McKenney /* 380dad81a20SPaul E. McKenney * Removes the count for the old reader from the appropriate per-CPU 381dad81a20SPaul E. McKenney * element of the srcu_struct. Note that this may well be a different 382dad81a20SPaul E. McKenney * CPU than that which was incremented by the corresponding srcu_read_lock(). 383dad81a20SPaul E. McKenney */ 384dad81a20SPaul E. McKenney void __srcu_read_unlock(struct srcu_struct *sp, int idx) 385dad81a20SPaul E. McKenney { 386dad81a20SPaul E. McKenney smp_mb(); /* C */ /* Avoid leaking the critical section. */ 387da915ad5SPaul E. McKenney this_cpu_inc(sp->sda->srcu_unlock_count[idx]); 388dad81a20SPaul E. McKenney } 389dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(__srcu_read_unlock); 390dad81a20SPaul E. McKenney 391dad81a20SPaul E. McKenney /* 392dad81a20SPaul E. McKenney * We use an adaptive strategy for synchronize_srcu() and especially for 393dad81a20SPaul E. McKenney * synchronize_srcu_expedited(). We spin for a fixed time period 394dad81a20SPaul E. McKenney * (defined below) to allow SRCU readers to exit their read-side critical 395dad81a20SPaul E. McKenney * sections. If there are still some readers after a few microseconds, 396dad81a20SPaul E. McKenney * we repeatedly block for 1-millisecond time periods. 397dad81a20SPaul E. McKenney */ 398dad81a20SPaul E. McKenney #define SRCU_RETRY_CHECK_DELAY 5 399dad81a20SPaul E. McKenney 400dad81a20SPaul E. McKenney /* 401dad81a20SPaul E. McKenney * Start an SRCU grace period. 402dad81a20SPaul E. McKenney */ 403dad81a20SPaul E. McKenney static void srcu_gp_start(struct srcu_struct *sp) 404dad81a20SPaul E. McKenney { 405da915ad5SPaul E. McKenney struct srcu_data *sdp = this_cpu_ptr(sp->sda); 406dad81a20SPaul E. McKenney int state; 407dad81a20SPaul E. McKenney 408da915ad5SPaul E. McKenney RCU_LOCKDEP_WARN(!lockdep_is_held(&sp->gp_lock), 409da915ad5SPaul E. McKenney "Invoked srcu_gp_start() without ->gp_lock!"); 410da915ad5SPaul E. McKenney WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); 411da915ad5SPaul E. McKenney rcu_segcblist_advance(&sdp->srcu_cblist, 412da915ad5SPaul E. McKenney rcu_seq_current(&sp->srcu_gp_seq)); 413da915ad5SPaul E. McKenney (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, 414dad81a20SPaul E. McKenney rcu_seq_snap(&sp->srcu_gp_seq)); 4152da4b2a7SPaul E. McKenney smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ 416dad81a20SPaul E. McKenney rcu_seq_start(&sp->srcu_gp_seq); 417dad81a20SPaul E. McKenney state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); 418dad81a20SPaul E. McKenney WARN_ON_ONCE(state != SRCU_STATE_SCAN1); 419dad81a20SPaul E. McKenney } 420dad81a20SPaul E. McKenney 421dad81a20SPaul E. McKenney /* 422da915ad5SPaul E. McKenney * Track online CPUs to guide callback workqueue placement. 423da915ad5SPaul E. McKenney */ 424da915ad5SPaul E. McKenney DEFINE_PER_CPU(bool, srcu_online); 425da915ad5SPaul E. McKenney 426da915ad5SPaul E. McKenney void srcu_online_cpu(unsigned int cpu) 427da915ad5SPaul E. McKenney { 428da915ad5SPaul E. McKenney WRITE_ONCE(per_cpu(srcu_online, cpu), true); 429da915ad5SPaul E. McKenney } 430da915ad5SPaul E. McKenney 431da915ad5SPaul E. McKenney void srcu_offline_cpu(unsigned int cpu) 432da915ad5SPaul E. McKenney { 433da915ad5SPaul E. McKenney WRITE_ONCE(per_cpu(srcu_online, cpu), false); 434da915ad5SPaul E. McKenney } 435da915ad5SPaul E. McKenney 436da915ad5SPaul E. McKenney /* 437da915ad5SPaul E. McKenney * Place the workqueue handler on the specified CPU if online, otherwise 438da915ad5SPaul E. McKenney * just run it whereever. This is useful for placing workqueue handlers 439da915ad5SPaul E. McKenney * that are to invoke the specified CPU's callbacks. 440da915ad5SPaul E. McKenney */ 441da915ad5SPaul E. McKenney static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, 442da915ad5SPaul E. McKenney struct delayed_work *dwork, 443da915ad5SPaul E. McKenney unsigned long delay) 444da915ad5SPaul E. McKenney { 445da915ad5SPaul E. McKenney bool ret; 446da915ad5SPaul E. McKenney 447da915ad5SPaul E. McKenney preempt_disable(); 448da915ad5SPaul E. McKenney if (READ_ONCE(per_cpu(srcu_online, cpu))) 449da915ad5SPaul E. McKenney ret = queue_delayed_work_on(cpu, wq, dwork, delay); 450da915ad5SPaul E. McKenney else 451da915ad5SPaul E. McKenney ret = queue_delayed_work(wq, dwork, delay); 452da915ad5SPaul E. McKenney preempt_enable(); 453da915ad5SPaul E. McKenney return ret; 454da915ad5SPaul E. McKenney } 455da915ad5SPaul E. McKenney 456da915ad5SPaul E. McKenney /* 457da915ad5SPaul E. McKenney * Schedule callback invocation for the specified srcu_data structure, 458da915ad5SPaul E. McKenney * if possible, on the corresponding CPU. 459da915ad5SPaul E. McKenney */ 460da915ad5SPaul E. McKenney static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) 461da915ad5SPaul E. McKenney { 462da915ad5SPaul E. McKenney srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq, 463da915ad5SPaul E. McKenney &sdp->work, delay); 464da915ad5SPaul E. McKenney } 465da915ad5SPaul E. McKenney 466da915ad5SPaul E. McKenney /* 467da915ad5SPaul E. McKenney * Schedule callback invocation for all srcu_data structures associated 468c7e88067SPaul E. McKenney * with the specified srcu_node structure that have callbacks for the 469c7e88067SPaul E. McKenney * just-completed grace period, the one corresponding to idx. If possible, 470c7e88067SPaul E. McKenney * schedule this invocation on the corresponding CPUs. 471da915ad5SPaul E. McKenney */ 472c7e88067SPaul E. McKenney static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, 4731e9a038bSPaul E. McKenney unsigned long mask, unsigned long delay) 474da915ad5SPaul E. McKenney { 475da915ad5SPaul E. McKenney int cpu; 476da915ad5SPaul E. McKenney 477c7e88067SPaul E. McKenney for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { 478c7e88067SPaul E. McKenney if (!(mask & (1 << (cpu - snp->grplo)))) 479c7e88067SPaul E. McKenney continue; 4801e9a038bSPaul E. McKenney srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), delay); 481da915ad5SPaul E. McKenney } 482c7e88067SPaul E. McKenney } 483da915ad5SPaul E. McKenney 484da915ad5SPaul E. McKenney /* 485da915ad5SPaul E. McKenney * Note the end of an SRCU grace period. Initiates callback invocation 486da915ad5SPaul E. McKenney * and starts a new grace period if needed. 487da915ad5SPaul E. McKenney * 488da915ad5SPaul E. McKenney * The ->srcu_cb_mutex acquisition does not protect any data, but 489da915ad5SPaul E. McKenney * instead prevents more than one grace period from starting while we 490da915ad5SPaul E. McKenney * are initiating callback invocation. This allows the ->srcu_have_cbs[] 491da915ad5SPaul E. McKenney * array to have a finite number of elements. 492da915ad5SPaul E. McKenney */ 493da915ad5SPaul E. McKenney static void srcu_gp_end(struct srcu_struct *sp) 494da915ad5SPaul E. McKenney { 4951e9a038bSPaul E. McKenney unsigned long cbdelay; 496da915ad5SPaul E. McKenney bool cbs; 497da915ad5SPaul E. McKenney unsigned long gpseq; 498da915ad5SPaul E. McKenney int idx; 499da915ad5SPaul E. McKenney int idxnext; 500c7e88067SPaul E. McKenney unsigned long mask; 501da915ad5SPaul E. McKenney struct srcu_node *snp; 502da915ad5SPaul E. McKenney 503da915ad5SPaul E. McKenney /* Prevent more than one additional grace period. */ 504da915ad5SPaul E. McKenney mutex_lock(&sp->srcu_cb_mutex); 505da915ad5SPaul E. McKenney 506da915ad5SPaul E. McKenney /* End the current grace period. */ 507da915ad5SPaul E. McKenney spin_lock_irq(&sp->gp_lock); 508da915ad5SPaul E. McKenney idx = rcu_seq_state(sp->srcu_gp_seq); 509da915ad5SPaul E. McKenney WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); 5101e9a038bSPaul E. McKenney cbdelay = srcu_get_delay(sp); 51122607d66SPaul E. McKenney sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); 512da915ad5SPaul E. McKenney rcu_seq_end(&sp->srcu_gp_seq); 513da915ad5SPaul E. McKenney gpseq = rcu_seq_current(&sp->srcu_gp_seq); 5141e9a038bSPaul E. McKenney if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) 5151e9a038bSPaul E. McKenney sp->srcu_gp_seq_needed_exp = gpseq; 516da915ad5SPaul E. McKenney spin_unlock_irq(&sp->gp_lock); 517da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_gp_mutex); 518da915ad5SPaul E. McKenney /* A new grace period can start at this point. But only one. */ 519da915ad5SPaul E. McKenney 520da915ad5SPaul E. McKenney /* Initiate callback invocation as needed. */ 521da915ad5SPaul E. McKenney idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); 522da915ad5SPaul E. McKenney idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs); 523da915ad5SPaul E. McKenney rcu_for_each_node_breadth_first(sp, snp) { 524da915ad5SPaul E. McKenney spin_lock_irq(&snp->lock); 525da915ad5SPaul E. McKenney cbs = false; 526da915ad5SPaul E. McKenney if (snp >= sp->level[rcu_num_lvls - 1]) 527da915ad5SPaul E. McKenney cbs = snp->srcu_have_cbs[idx] == gpseq; 528da915ad5SPaul E. McKenney snp->srcu_have_cbs[idx] = gpseq; 529da915ad5SPaul E. McKenney rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1); 5301e9a038bSPaul E. McKenney if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq)) 5311e9a038bSPaul E. McKenney snp->srcu_gp_seq_needed_exp = gpseq; 532c7e88067SPaul E. McKenney mask = snp->srcu_data_have_cbs[idx]; 533c7e88067SPaul E. McKenney snp->srcu_data_have_cbs[idx] = 0; 534da915ad5SPaul E. McKenney spin_unlock_irq(&snp->lock); 535da915ad5SPaul E. McKenney if (cbs) { 536da915ad5SPaul E. McKenney smp_mb(); /* GP end before CB invocation. */ 5371e9a038bSPaul E. McKenney srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); 538da915ad5SPaul E. McKenney } 539da915ad5SPaul E. McKenney } 540da915ad5SPaul E. McKenney 541da915ad5SPaul E. McKenney /* Callback initiation done, allow grace periods after next. */ 542da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_cb_mutex); 543da915ad5SPaul E. McKenney 544da915ad5SPaul E. McKenney /* Start a new grace period if needed. */ 545da915ad5SPaul E. McKenney spin_lock_irq(&sp->gp_lock); 546da915ad5SPaul E. McKenney gpseq = rcu_seq_current(&sp->srcu_gp_seq); 547da915ad5SPaul E. McKenney if (!rcu_seq_state(gpseq) && 548da915ad5SPaul E. McKenney ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { 549da915ad5SPaul E. McKenney srcu_gp_start(sp); 550da915ad5SPaul E. McKenney spin_unlock_irq(&sp->gp_lock); 551da915ad5SPaul E. McKenney /* Throttle expedited grace periods: Should be rare! */ 5521e9a038bSPaul E. McKenney srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff 5531e9a038bSPaul E. McKenney ? 0 : SRCU_INTERVAL); 554da915ad5SPaul E. McKenney } else { 555da915ad5SPaul E. McKenney spin_unlock_irq(&sp->gp_lock); 556da915ad5SPaul E. McKenney } 557da915ad5SPaul E. McKenney } 558da915ad5SPaul E. McKenney 559da915ad5SPaul E. McKenney /* 5601e9a038bSPaul E. McKenney * Funnel-locking scheme to scalably mediate many concurrent expedited 5611e9a038bSPaul E. McKenney * grace-period requests. This function is invoked for the first known 5621e9a038bSPaul E. McKenney * expedited request for a grace period that has already been requested, 5631e9a038bSPaul E. McKenney * but without expediting. To start a completely new grace period, 5641e9a038bSPaul E. McKenney * whether expedited or not, use srcu_funnel_gp_start() instead. 5651e9a038bSPaul E. McKenney */ 5661e9a038bSPaul E. McKenney static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, 5671e9a038bSPaul E. McKenney unsigned long s) 5681e9a038bSPaul E. McKenney { 5691e9a038bSPaul E. McKenney unsigned long flags; 5701e9a038bSPaul E. McKenney 5711e9a038bSPaul E. McKenney for (; snp != NULL; snp = snp->srcu_parent) { 5721e9a038bSPaul E. McKenney if (rcu_seq_done(&sp->srcu_gp_seq, s) || 5731e9a038bSPaul E. McKenney ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) 5741e9a038bSPaul E. McKenney return; 5751e9a038bSPaul E. McKenney spin_lock_irqsave(&snp->lock, flags); 5761e9a038bSPaul E. McKenney if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) { 5771e9a038bSPaul E. McKenney spin_unlock_irqrestore(&snp->lock, flags); 5781e9a038bSPaul E. McKenney return; 5791e9a038bSPaul E. McKenney } 5801e9a038bSPaul E. McKenney WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); 5811e9a038bSPaul E. McKenney spin_unlock_irqrestore(&snp->lock, flags); 5821e9a038bSPaul E. McKenney } 5831e9a038bSPaul E. McKenney spin_lock_irqsave(&sp->gp_lock, flags); 5841e9a038bSPaul E. McKenney if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) 5851e9a038bSPaul E. McKenney sp->srcu_gp_seq_needed_exp = s; 5861e9a038bSPaul E. McKenney spin_unlock_irqrestore(&sp->gp_lock, flags); 5871e9a038bSPaul E. McKenney } 5881e9a038bSPaul E. McKenney 5891e9a038bSPaul E. McKenney /* 590da915ad5SPaul E. McKenney * Funnel-locking scheme to scalably mediate many concurrent grace-period 591da915ad5SPaul E. McKenney * requests. The winner has to do the work of actually starting grace 592da915ad5SPaul E. McKenney * period s. Losers must either ensure that their desired grace-period 593da915ad5SPaul E. McKenney * number is recorded on at least their leaf srcu_node structure, or they 594da915ad5SPaul E. McKenney * must take steps to invoke their own callbacks. 595da915ad5SPaul E. McKenney */ 5961e9a038bSPaul E. McKenney static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, 5971e9a038bSPaul E. McKenney unsigned long s, bool do_norm) 598da915ad5SPaul E. McKenney { 599da915ad5SPaul E. McKenney unsigned long flags; 600da915ad5SPaul E. McKenney int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs); 601da915ad5SPaul E. McKenney struct srcu_node *snp = sdp->mynode; 602da915ad5SPaul E. McKenney unsigned long snp_seq; 603da915ad5SPaul E. McKenney 604da915ad5SPaul E. McKenney /* Each pass through the loop does one level of the srcu_node tree. */ 605da915ad5SPaul E. McKenney for (; snp != NULL; snp = snp->srcu_parent) { 606da915ad5SPaul E. McKenney if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) 607da915ad5SPaul E. McKenney return; /* GP already done and CBs recorded. */ 608da915ad5SPaul E. McKenney spin_lock_irqsave(&snp->lock, flags); 609da915ad5SPaul E. McKenney if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { 610da915ad5SPaul E. McKenney snp_seq = snp->srcu_have_cbs[idx]; 611c7e88067SPaul E. McKenney if (snp == sdp->mynode && snp_seq == s) 612c7e88067SPaul E. McKenney snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 613da915ad5SPaul E. McKenney spin_unlock_irqrestore(&snp->lock, flags); 614da915ad5SPaul E. McKenney if (snp == sdp->mynode && snp_seq != s) { 615da915ad5SPaul E. McKenney smp_mb(); /* CBs after GP! */ 6161e9a038bSPaul E. McKenney srcu_schedule_cbs_sdp(sdp, do_norm 6171e9a038bSPaul E. McKenney ? SRCU_INTERVAL 6181e9a038bSPaul E. McKenney : 0); 6191e9a038bSPaul E. McKenney return; 620da915ad5SPaul E. McKenney } 6211e9a038bSPaul E. McKenney if (!do_norm) 6221e9a038bSPaul E. McKenney srcu_funnel_exp_start(sp, snp, s); 623da915ad5SPaul E. McKenney return; 624da915ad5SPaul E. McKenney } 625da915ad5SPaul E. McKenney snp->srcu_have_cbs[idx] = s; 626c7e88067SPaul E. McKenney if (snp == sdp->mynode) 627c7e88067SPaul E. McKenney snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 6281e9a038bSPaul E. McKenney if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s)) 6291e9a038bSPaul E. McKenney snp->srcu_gp_seq_needed_exp = s; 630da915ad5SPaul E. McKenney spin_unlock_irqrestore(&snp->lock, flags); 631da915ad5SPaul E. McKenney } 632da915ad5SPaul E. McKenney 633da915ad5SPaul E. McKenney /* Top of tree, must ensure the grace period will be started. */ 634da915ad5SPaul E. McKenney spin_lock_irqsave(&sp->gp_lock, flags); 635da915ad5SPaul E. McKenney if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { 636da915ad5SPaul E. McKenney /* 637da915ad5SPaul E. McKenney * Record need for grace period s. Pair with load 638da915ad5SPaul E. McKenney * acquire setting up for initialization. 639da915ad5SPaul E. McKenney */ 640da915ad5SPaul E. McKenney smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/ 641da915ad5SPaul E. McKenney } 6421e9a038bSPaul E. McKenney if (!do_norm && ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) 6431e9a038bSPaul E. McKenney sp->srcu_gp_seq_needed_exp = s; 644da915ad5SPaul E. McKenney 645da915ad5SPaul E. McKenney /* If grace period not already done and none in progress, start it. */ 646da915ad5SPaul E. McKenney if (!rcu_seq_done(&sp->srcu_gp_seq, s) && 647da915ad5SPaul E. McKenney rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) { 648da915ad5SPaul E. McKenney WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); 649da915ad5SPaul E. McKenney srcu_gp_start(sp); 650da915ad5SPaul E. McKenney queue_delayed_work(system_power_efficient_wq, &sp->work, 6511e9a038bSPaul E. McKenney srcu_get_delay(sp)); 652da915ad5SPaul E. McKenney } 653da915ad5SPaul E. McKenney spin_unlock_irqrestore(&sp->gp_lock, flags); 654da915ad5SPaul E. McKenney } 655da915ad5SPaul E. McKenney 656da915ad5SPaul E. McKenney /* 657dad81a20SPaul E. McKenney * Wait until all readers counted by array index idx complete, but 658dad81a20SPaul E. McKenney * loop an additional time if there is an expedited grace period pending. 659da915ad5SPaul E. McKenney * The caller must ensure that ->srcu_idx is not changed while checking. 660dad81a20SPaul E. McKenney */ 661dad81a20SPaul E. McKenney static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) 662dad81a20SPaul E. McKenney { 663dad81a20SPaul E. McKenney for (;;) { 664dad81a20SPaul E. McKenney if (srcu_readers_active_idx_check(sp, idx)) 665dad81a20SPaul E. McKenney return true; 6661e9a038bSPaul E. McKenney if (--trycount + !srcu_get_delay(sp) <= 0) 667dad81a20SPaul E. McKenney return false; 668dad81a20SPaul E. McKenney udelay(SRCU_RETRY_CHECK_DELAY); 669dad81a20SPaul E. McKenney } 670dad81a20SPaul E. McKenney } 671dad81a20SPaul E. McKenney 672dad81a20SPaul E. McKenney /* 673da915ad5SPaul E. McKenney * Increment the ->srcu_idx counter so that future SRCU readers will 674da915ad5SPaul E. McKenney * use the other rank of the ->srcu_(un)lock_count[] arrays. This allows 675dad81a20SPaul E. McKenney * us to wait for pre-existing readers in a starvation-free manner. 676dad81a20SPaul E. McKenney */ 677dad81a20SPaul E. McKenney static void srcu_flip(struct srcu_struct *sp) 678dad81a20SPaul E. McKenney { 679*881ec9d2SPaul E. McKenney /* 680*881ec9d2SPaul E. McKenney * Ensure that if this updater saw a given reader's increment 681*881ec9d2SPaul E. McKenney * from __srcu_read_lock(), that reader was using an old value 682*881ec9d2SPaul E. McKenney * of ->srcu_idx. Also ensure that if a given reader sees the 683*881ec9d2SPaul E. McKenney * new value of ->srcu_idx, this updater's earlier scans cannot 684*881ec9d2SPaul E. McKenney * have seen that reader's increments (which is OK, because this 685*881ec9d2SPaul E. McKenney * grace period need not wait on that reader). 686*881ec9d2SPaul E. McKenney */ 687*881ec9d2SPaul E. McKenney smp_mb(); /* E */ /* Pairs with B and C. */ 688*881ec9d2SPaul E. McKenney 689da915ad5SPaul E. McKenney WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1); 690dad81a20SPaul E. McKenney 691dad81a20SPaul E. McKenney /* 692dad81a20SPaul E. McKenney * Ensure that if the updater misses an __srcu_read_unlock() 693dad81a20SPaul E. McKenney * increment, that task's next __srcu_read_lock() will see the 694dad81a20SPaul E. McKenney * above counter update. Note that both this memory barrier 695dad81a20SPaul E. McKenney * and the one in srcu_readers_active_idx_check() provide the 696dad81a20SPaul E. McKenney * guarantee for __srcu_read_lock(). 697dad81a20SPaul E. McKenney */ 698dad81a20SPaul E. McKenney smp_mb(); /* D */ /* Pairs with C. */ 699dad81a20SPaul E. McKenney } 700dad81a20SPaul E. McKenney 701dad81a20SPaul E. McKenney /* 7022da4b2a7SPaul E. McKenney * If SRCU is likely idle, return true, otherwise return false. 7032da4b2a7SPaul E. McKenney * 7042da4b2a7SPaul E. McKenney * Note that it is OK for several current from-idle requests for a new 7052da4b2a7SPaul E. McKenney * grace period from idle to specify expediting because they will all end 7062da4b2a7SPaul E. McKenney * up requesting the same grace period anyhow. So no loss. 7072da4b2a7SPaul E. McKenney * 7082da4b2a7SPaul E. McKenney * Note also that if any CPU (including the current one) is still invoking 7092da4b2a7SPaul E. McKenney * callbacks, this function will nevertheless say "idle". This is not 7102da4b2a7SPaul E. McKenney * ideal, but the overhead of checking all CPUs' callback lists is even 7112da4b2a7SPaul E. McKenney * less ideal, especially on large systems. Furthermore, the wakeup 7122da4b2a7SPaul E. McKenney * can happen before the callback is fully removed, so we have no choice 7132da4b2a7SPaul E. McKenney * but to accept this type of error. 7142da4b2a7SPaul E. McKenney * 7152da4b2a7SPaul E. McKenney * This function is also subject to counter-wrap errors, but let's face 7162da4b2a7SPaul E. McKenney * it, if this function was preempted for enough time for the counters 7172da4b2a7SPaul E. McKenney * to wrap, it really doesn't matter whether or not we expedite the grace 7182da4b2a7SPaul E. McKenney * period. The extra overhead of a needlessly expedited grace period is 7192da4b2a7SPaul E. McKenney * negligible when amoritized over that time period, and the extra latency 7202da4b2a7SPaul E. McKenney * of a needlessly non-expedited grace period is similarly negligible. 7212da4b2a7SPaul E. McKenney */ 7222da4b2a7SPaul E. McKenney static bool srcu_might_be_idle(struct srcu_struct *sp) 7232da4b2a7SPaul E. McKenney { 72422607d66SPaul E. McKenney unsigned long curseq; 7252da4b2a7SPaul E. McKenney unsigned long flags; 7262da4b2a7SPaul E. McKenney struct srcu_data *sdp; 72722607d66SPaul E. McKenney unsigned long t; 7282da4b2a7SPaul E. McKenney 7292da4b2a7SPaul E. McKenney /* If the local srcu_data structure has callbacks, not idle. */ 7302da4b2a7SPaul E. McKenney local_irq_save(flags); 7312da4b2a7SPaul E. McKenney sdp = this_cpu_ptr(sp->sda); 7322da4b2a7SPaul E. McKenney if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { 7332da4b2a7SPaul E. McKenney local_irq_restore(flags); 7342da4b2a7SPaul E. McKenney return false; /* Callbacks already present, so not idle. */ 7352da4b2a7SPaul E. McKenney } 7362da4b2a7SPaul E. McKenney local_irq_restore(flags); 7372da4b2a7SPaul E. McKenney 7382da4b2a7SPaul E. McKenney /* 7392da4b2a7SPaul E. McKenney * No local callbacks, so probabalistically probe global state. 7402da4b2a7SPaul E. McKenney * Exact information would require acquiring locks, which would 7412da4b2a7SPaul E. McKenney * kill scalability, hence the probabalistic nature of the probe. 7422da4b2a7SPaul E. McKenney */ 74322607d66SPaul E. McKenney 74422607d66SPaul E. McKenney /* First, see if enough time has passed since the last GP. */ 74522607d66SPaul E. McKenney t = ktime_get_mono_fast_ns(); 74622607d66SPaul E. McKenney if (exp_holdoff == 0 || 74722607d66SPaul E. McKenney time_in_range_open(t, sp->srcu_last_gp_end, 74822607d66SPaul E. McKenney sp->srcu_last_gp_end + exp_holdoff)) 74922607d66SPaul E. McKenney return false; /* Too soon after last GP. */ 75022607d66SPaul E. McKenney 75122607d66SPaul E. McKenney /* Next, check for probable idleness. */ 7522da4b2a7SPaul E. McKenney curseq = rcu_seq_current(&sp->srcu_gp_seq); 7532da4b2a7SPaul E. McKenney smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */ 7542da4b2a7SPaul E. McKenney if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed))) 7552da4b2a7SPaul E. McKenney return false; /* Grace period in progress, so not idle. */ 7562da4b2a7SPaul E. McKenney smp_mb(); /* Order ->srcu_gp_seq with prior access. */ 7572da4b2a7SPaul E. McKenney if (curseq != rcu_seq_current(&sp->srcu_gp_seq)) 7582da4b2a7SPaul E. McKenney return false; /* GP # changed, so not idle. */ 7592da4b2a7SPaul E. McKenney return true; /* With reasonable probability, idle! */ 7602da4b2a7SPaul E. McKenney } 7612da4b2a7SPaul E. McKenney 7622da4b2a7SPaul E. McKenney /* 763da915ad5SPaul E. McKenney * Enqueue an SRCU callback on the srcu_data structure associated with 764da915ad5SPaul E. McKenney * the current CPU and the specified srcu_struct structure, initiating 765da915ad5SPaul E. McKenney * grace-period processing if it is not already running. 766dad81a20SPaul E. McKenney * 767dad81a20SPaul E. McKenney * Note that all CPUs must agree that the grace period extended beyond 768dad81a20SPaul E. McKenney * all pre-existing SRCU read-side critical section. On systems with 769dad81a20SPaul E. McKenney * more than one CPU, this means that when "func()" is invoked, each CPU 770dad81a20SPaul E. McKenney * is guaranteed to have executed a full memory barrier since the end of 771dad81a20SPaul E. McKenney * its last corresponding SRCU read-side critical section whose beginning 772dad81a20SPaul E. McKenney * preceded the call to call_rcu(). It also means that each CPU executing 773dad81a20SPaul E. McKenney * an SRCU read-side critical section that continues beyond the start of 774dad81a20SPaul E. McKenney * "func()" must have executed a memory barrier after the call_rcu() 775dad81a20SPaul E. McKenney * but before the beginning of that SRCU read-side critical section. 776dad81a20SPaul E. McKenney * Note that these guarantees include CPUs that are offline, idle, or 777dad81a20SPaul E. McKenney * executing in user mode, as well as CPUs that are executing in the kernel. 778dad81a20SPaul E. McKenney * 779dad81a20SPaul E. McKenney * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the 780dad81a20SPaul E. McKenney * resulting SRCU callback function "func()", then both CPU A and CPU 781dad81a20SPaul E. McKenney * B are guaranteed to execute a full memory barrier during the time 782dad81a20SPaul E. McKenney * interval between the call to call_rcu() and the invocation of "func()". 783dad81a20SPaul E. McKenney * This guarantee applies even if CPU A and CPU B are the same CPU (but 784dad81a20SPaul E. McKenney * again only if the system has more than one CPU). 785dad81a20SPaul E. McKenney * 786dad81a20SPaul E. McKenney * Of course, these guarantees apply only for invocations of call_srcu(), 787dad81a20SPaul E. McKenney * srcu_read_lock(), and srcu_read_unlock() that are all passed the same 788dad81a20SPaul E. McKenney * srcu_struct structure. 789dad81a20SPaul E. McKenney */ 7901e9a038bSPaul E. McKenney void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, 7911e9a038bSPaul E. McKenney rcu_callback_t func, bool do_norm) 792dad81a20SPaul E. McKenney { 793dad81a20SPaul E. McKenney unsigned long flags; 7941e9a038bSPaul E. McKenney bool needexp = false; 795da915ad5SPaul E. McKenney bool needgp = false; 796da915ad5SPaul E. McKenney unsigned long s; 797da915ad5SPaul E. McKenney struct srcu_data *sdp; 798dad81a20SPaul E. McKenney 799da915ad5SPaul E. McKenney check_init_srcu_struct(sp); 800da915ad5SPaul E. McKenney rhp->func = func; 801da915ad5SPaul E. McKenney local_irq_save(flags); 802da915ad5SPaul E. McKenney sdp = this_cpu_ptr(sp->sda); 803da915ad5SPaul E. McKenney spin_lock(&sdp->lock); 804da915ad5SPaul E. McKenney rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); 805da915ad5SPaul E. McKenney rcu_segcblist_advance(&sdp->srcu_cblist, 806da915ad5SPaul E. McKenney rcu_seq_current(&sp->srcu_gp_seq)); 807da915ad5SPaul E. McKenney s = rcu_seq_snap(&sp->srcu_gp_seq); 808da915ad5SPaul E. McKenney (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s); 809da915ad5SPaul E. McKenney if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { 810da915ad5SPaul E. McKenney sdp->srcu_gp_seq_needed = s; 811da915ad5SPaul E. McKenney needgp = true; 812dad81a20SPaul E. McKenney } 8131e9a038bSPaul E. McKenney if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) { 8141e9a038bSPaul E. McKenney sdp->srcu_gp_seq_needed_exp = s; 8151e9a038bSPaul E. McKenney needexp = true; 8161e9a038bSPaul E. McKenney } 817da915ad5SPaul E. McKenney spin_unlock_irqrestore(&sdp->lock, flags); 818da915ad5SPaul E. McKenney if (needgp) 8191e9a038bSPaul E. McKenney srcu_funnel_gp_start(sp, sdp, s, do_norm); 8201e9a038bSPaul E. McKenney else if (needexp) 8211e9a038bSPaul E. McKenney srcu_funnel_exp_start(sp, sdp->mynode, s); 8221e9a038bSPaul E. McKenney } 8231e9a038bSPaul E. McKenney 8241e9a038bSPaul E. McKenney void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, 8251e9a038bSPaul E. McKenney rcu_callback_t func) 8261e9a038bSPaul E. McKenney { 8271e9a038bSPaul E. McKenney __call_srcu(sp, rhp, func, true); 828dad81a20SPaul E. McKenney } 829dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(call_srcu); 830dad81a20SPaul E. McKenney 831dad81a20SPaul E. McKenney /* 832dad81a20SPaul E. McKenney * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). 833dad81a20SPaul E. McKenney */ 8341e9a038bSPaul E. McKenney static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) 835dad81a20SPaul E. McKenney { 836dad81a20SPaul E. McKenney struct rcu_synchronize rcu; 837dad81a20SPaul E. McKenney 838dad81a20SPaul E. McKenney RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) || 839dad81a20SPaul E. McKenney lock_is_held(&rcu_bh_lock_map) || 840dad81a20SPaul E. McKenney lock_is_held(&rcu_lock_map) || 841dad81a20SPaul E. McKenney lock_is_held(&rcu_sched_lock_map), 842dad81a20SPaul E. McKenney "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section"); 843dad81a20SPaul E. McKenney 844dad81a20SPaul E. McKenney if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) 845dad81a20SPaul E. McKenney return; 846dad81a20SPaul E. McKenney might_sleep(); 847da915ad5SPaul E. McKenney check_init_srcu_struct(sp); 848dad81a20SPaul E. McKenney init_completion(&rcu.completion); 849da915ad5SPaul E. McKenney init_rcu_head_on_stack(&rcu.head); 8501e9a038bSPaul E. McKenney __call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm); 851dad81a20SPaul E. McKenney wait_for_completion(&rcu.completion); 852da915ad5SPaul E. McKenney destroy_rcu_head_on_stack(&rcu.head); 853dad81a20SPaul E. McKenney } 854dad81a20SPaul E. McKenney 855dad81a20SPaul E. McKenney /** 856dad81a20SPaul E. McKenney * synchronize_srcu_expedited - Brute-force SRCU grace period 857dad81a20SPaul E. McKenney * @sp: srcu_struct with which to synchronize. 858dad81a20SPaul E. McKenney * 859dad81a20SPaul E. McKenney * Wait for an SRCU grace period to elapse, but be more aggressive about 860dad81a20SPaul E. McKenney * spinning rather than blocking when waiting. 861dad81a20SPaul E. McKenney * 862dad81a20SPaul E. McKenney * Note that synchronize_srcu_expedited() has the same deadlock and 863dad81a20SPaul E. McKenney * memory-ordering properties as does synchronize_srcu(). 864dad81a20SPaul E. McKenney */ 865dad81a20SPaul E. McKenney void synchronize_srcu_expedited(struct srcu_struct *sp) 866dad81a20SPaul E. McKenney { 8671e9a038bSPaul E. McKenney __synchronize_srcu(sp, rcu_gp_is_normal()); 868dad81a20SPaul E. McKenney } 869dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); 870dad81a20SPaul E. McKenney 871dad81a20SPaul E. McKenney /** 872dad81a20SPaul E. McKenney * synchronize_srcu - wait for prior SRCU read-side critical-section completion 873dad81a20SPaul E. McKenney * @sp: srcu_struct with which to synchronize. 874dad81a20SPaul E. McKenney * 875dad81a20SPaul E. McKenney * Wait for the count to drain to zero of both indexes. To avoid the 876dad81a20SPaul E. McKenney * possible starvation of synchronize_srcu(), it waits for the count of 877da915ad5SPaul E. McKenney * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first, 878da915ad5SPaul E. McKenney * and then flip the srcu_idx and wait for the count of the other index. 879dad81a20SPaul E. McKenney * 880dad81a20SPaul E. McKenney * Can block; must be called from process context. 881dad81a20SPaul E. McKenney * 882dad81a20SPaul E. McKenney * Note that it is illegal to call synchronize_srcu() from the corresponding 883dad81a20SPaul E. McKenney * SRCU read-side critical section; doing so will result in deadlock. 884dad81a20SPaul E. McKenney * However, it is perfectly legal to call synchronize_srcu() on one 885dad81a20SPaul E. McKenney * srcu_struct from some other srcu_struct's read-side critical section, 886dad81a20SPaul E. McKenney * as long as the resulting graph of srcu_structs is acyclic. 887dad81a20SPaul E. McKenney * 888dad81a20SPaul E. McKenney * There are memory-ordering constraints implied by synchronize_srcu(). 889dad81a20SPaul E. McKenney * On systems with more than one CPU, when synchronize_srcu() returns, 890dad81a20SPaul E. McKenney * each CPU is guaranteed to have executed a full memory barrier since 891dad81a20SPaul E. McKenney * the end of its last corresponding SRCU-sched read-side critical section 892dad81a20SPaul E. McKenney * whose beginning preceded the call to synchronize_srcu(). In addition, 893dad81a20SPaul E. McKenney * each CPU having an SRCU read-side critical section that extends beyond 894dad81a20SPaul E. McKenney * the return from synchronize_srcu() is guaranteed to have executed a 895dad81a20SPaul E. McKenney * full memory barrier after the beginning of synchronize_srcu() and before 896dad81a20SPaul E. McKenney * the beginning of that SRCU read-side critical section. Note that these 897dad81a20SPaul E. McKenney * guarantees include CPUs that are offline, idle, or executing in user mode, 898dad81a20SPaul E. McKenney * as well as CPUs that are executing in the kernel. 899dad81a20SPaul E. McKenney * 900dad81a20SPaul E. McKenney * Furthermore, if CPU A invoked synchronize_srcu(), which returned 901dad81a20SPaul E. McKenney * to its caller on CPU B, then both CPU A and CPU B are guaranteed 902dad81a20SPaul E. McKenney * to have executed a full memory barrier during the execution of 903dad81a20SPaul E. McKenney * synchronize_srcu(). This guarantee applies even if CPU A and CPU B 904dad81a20SPaul E. McKenney * are the same CPU, but again only if the system has more than one CPU. 905dad81a20SPaul E. McKenney * 906dad81a20SPaul E. McKenney * Of course, these memory-ordering guarantees apply only when 907dad81a20SPaul E. McKenney * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are 908dad81a20SPaul E. McKenney * passed the same srcu_struct structure. 9092da4b2a7SPaul E. McKenney * 9102da4b2a7SPaul E. McKenney * If SRCU is likely idle, expedite the first request. This semantic 9112da4b2a7SPaul E. McKenney * was provided by Classic SRCU, and is relied upon by its users, so TREE 9122da4b2a7SPaul E. McKenney * SRCU must also provide it. Note that detecting idleness is heuristic 9132da4b2a7SPaul E. McKenney * and subject to both false positives and negatives. 914dad81a20SPaul E. McKenney */ 915dad81a20SPaul E. McKenney void synchronize_srcu(struct srcu_struct *sp) 916dad81a20SPaul E. McKenney { 9172da4b2a7SPaul E. McKenney if (srcu_might_be_idle(sp) || rcu_gp_is_expedited()) 918dad81a20SPaul E. McKenney synchronize_srcu_expedited(sp); 919dad81a20SPaul E. McKenney else 9201e9a038bSPaul E. McKenney __synchronize_srcu(sp, true); 921dad81a20SPaul E. McKenney } 922dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(synchronize_srcu); 923dad81a20SPaul E. McKenney 924da915ad5SPaul E. McKenney /* 925da915ad5SPaul E. McKenney * Callback function for srcu_barrier() use. 926da915ad5SPaul E. McKenney */ 927da915ad5SPaul E. McKenney static void srcu_barrier_cb(struct rcu_head *rhp) 928da915ad5SPaul E. McKenney { 929da915ad5SPaul E. McKenney struct srcu_data *sdp; 930da915ad5SPaul E. McKenney struct srcu_struct *sp; 931da915ad5SPaul E. McKenney 932da915ad5SPaul E. McKenney sdp = container_of(rhp, struct srcu_data, srcu_barrier_head); 933da915ad5SPaul E. McKenney sp = sdp->sp; 934da915ad5SPaul E. McKenney if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) 935da915ad5SPaul E. McKenney complete(&sp->srcu_barrier_completion); 936da915ad5SPaul E. McKenney } 937da915ad5SPaul E. McKenney 938dad81a20SPaul E. McKenney /** 939dad81a20SPaul E. McKenney * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. 940dad81a20SPaul E. McKenney * @sp: srcu_struct on which to wait for in-flight callbacks. 941dad81a20SPaul E. McKenney */ 942dad81a20SPaul E. McKenney void srcu_barrier(struct srcu_struct *sp) 943dad81a20SPaul E. McKenney { 944da915ad5SPaul E. McKenney int cpu; 945da915ad5SPaul E. McKenney struct srcu_data *sdp; 946da915ad5SPaul E. McKenney unsigned long s = rcu_seq_snap(&sp->srcu_barrier_seq); 947da915ad5SPaul E. McKenney 948da915ad5SPaul E. McKenney check_init_srcu_struct(sp); 949da915ad5SPaul E. McKenney mutex_lock(&sp->srcu_barrier_mutex); 950da915ad5SPaul E. McKenney if (rcu_seq_done(&sp->srcu_barrier_seq, s)) { 951da915ad5SPaul E. McKenney smp_mb(); /* Force ordering following return. */ 952da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_barrier_mutex); 953da915ad5SPaul E. McKenney return; /* Someone else did our work for us. */ 954da915ad5SPaul E. McKenney } 955da915ad5SPaul E. McKenney rcu_seq_start(&sp->srcu_barrier_seq); 956da915ad5SPaul E. McKenney init_completion(&sp->srcu_barrier_completion); 957da915ad5SPaul E. McKenney 958da915ad5SPaul E. McKenney /* Initial count prevents reaching zero until all CBs are posted. */ 959da915ad5SPaul E. McKenney atomic_set(&sp->srcu_barrier_cpu_cnt, 1); 960da915ad5SPaul E. McKenney 961da915ad5SPaul E. McKenney /* 962da915ad5SPaul E. McKenney * Each pass through this loop enqueues a callback, but only 963da915ad5SPaul E. McKenney * on CPUs already having callbacks enqueued. Note that if 964da915ad5SPaul E. McKenney * a CPU already has callbacks enqueue, it must have already 965da915ad5SPaul E. McKenney * registered the need for a future grace period, so all we 966da915ad5SPaul E. McKenney * need do is enqueue a callback that will use the same 967da915ad5SPaul E. McKenney * grace period as the last callback already in the queue. 968da915ad5SPaul E. McKenney */ 969da915ad5SPaul E. McKenney for_each_possible_cpu(cpu) { 970da915ad5SPaul E. McKenney sdp = per_cpu_ptr(sp->sda, cpu); 971da915ad5SPaul E. McKenney spin_lock_irq(&sdp->lock); 972da915ad5SPaul E. McKenney atomic_inc(&sp->srcu_barrier_cpu_cnt); 973da915ad5SPaul E. McKenney sdp->srcu_barrier_head.func = srcu_barrier_cb; 974da915ad5SPaul E. McKenney if (!rcu_segcblist_entrain(&sdp->srcu_cblist, 975da915ad5SPaul E. McKenney &sdp->srcu_barrier_head, 0)) 976da915ad5SPaul E. McKenney atomic_dec(&sp->srcu_barrier_cpu_cnt); 977da915ad5SPaul E. McKenney spin_unlock_irq(&sdp->lock); 978da915ad5SPaul E. McKenney } 979da915ad5SPaul E. McKenney 980da915ad5SPaul E. McKenney /* Remove the initial count, at which point reaching zero can happen. */ 981da915ad5SPaul E. McKenney if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) 982da915ad5SPaul E. McKenney complete(&sp->srcu_barrier_completion); 983da915ad5SPaul E. McKenney wait_for_completion(&sp->srcu_barrier_completion); 984da915ad5SPaul E. McKenney 985da915ad5SPaul E. McKenney rcu_seq_end(&sp->srcu_barrier_seq); 986da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_barrier_mutex); 987dad81a20SPaul E. McKenney } 988dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(srcu_barrier); 989dad81a20SPaul E. McKenney 990dad81a20SPaul E. McKenney /** 991dad81a20SPaul E. McKenney * srcu_batches_completed - return batches completed. 992dad81a20SPaul E. McKenney * @sp: srcu_struct on which to report batch completion. 993dad81a20SPaul E. McKenney * 994dad81a20SPaul E. McKenney * Report the number of batches, correlated with, but not necessarily 995dad81a20SPaul E. McKenney * precisely the same as, the number of grace periods that have elapsed. 996dad81a20SPaul E. McKenney */ 997dad81a20SPaul E. McKenney unsigned long srcu_batches_completed(struct srcu_struct *sp) 998dad81a20SPaul E. McKenney { 999da915ad5SPaul E. McKenney return sp->srcu_idx; 1000dad81a20SPaul E. McKenney } 1001dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(srcu_batches_completed); 1002dad81a20SPaul E. McKenney 1003dad81a20SPaul E. McKenney /* 1004da915ad5SPaul E. McKenney * Core SRCU state machine. Push state bits of ->srcu_gp_seq 1005da915ad5SPaul E. McKenney * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has 1006da915ad5SPaul E. McKenney * completed in that state. 1007dad81a20SPaul E. McKenney */ 1008da915ad5SPaul E. McKenney static void srcu_advance_state(struct srcu_struct *sp) 1009dad81a20SPaul E. McKenney { 1010dad81a20SPaul E. McKenney int idx; 1011dad81a20SPaul E. McKenney 1012da915ad5SPaul E. McKenney mutex_lock(&sp->srcu_gp_mutex); 1013da915ad5SPaul E. McKenney 1014dad81a20SPaul E. McKenney /* 1015dad81a20SPaul E. McKenney * Because readers might be delayed for an extended period after 1016da915ad5SPaul E. McKenney * fetching ->srcu_idx for their index, at any point in time there 1017dad81a20SPaul E. McKenney * might well be readers using both idx=0 and idx=1. We therefore 1018dad81a20SPaul E. McKenney * need to wait for readers to clear from both index values before 1019dad81a20SPaul E. McKenney * invoking a callback. 1020dad81a20SPaul E. McKenney * 1021dad81a20SPaul E. McKenney * The load-acquire ensures that we see the accesses performed 1022dad81a20SPaul E. McKenney * by the prior grace period. 1023dad81a20SPaul E. McKenney */ 1024dad81a20SPaul E. McKenney idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ 1025dad81a20SPaul E. McKenney if (idx == SRCU_STATE_IDLE) { 1026da915ad5SPaul E. McKenney spin_lock_irq(&sp->gp_lock); 1027da915ad5SPaul E. McKenney if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { 1028da915ad5SPaul E. McKenney WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); 1029da915ad5SPaul E. McKenney spin_unlock_irq(&sp->gp_lock); 1030da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_gp_mutex); 1031dad81a20SPaul E. McKenney return; 1032dad81a20SPaul E. McKenney } 1033dad81a20SPaul E. McKenney idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); 1034dad81a20SPaul E. McKenney if (idx == SRCU_STATE_IDLE) 1035dad81a20SPaul E. McKenney srcu_gp_start(sp); 1036da915ad5SPaul E. McKenney spin_unlock_irq(&sp->gp_lock); 1037da915ad5SPaul E. McKenney if (idx != SRCU_STATE_IDLE) { 1038da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_gp_mutex); 1039dad81a20SPaul E. McKenney return; /* Someone else started the grace period. */ 1040dad81a20SPaul E. McKenney } 1041da915ad5SPaul E. McKenney } 1042dad81a20SPaul E. McKenney 1043dad81a20SPaul E. McKenney if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { 1044da915ad5SPaul E. McKenney idx = 1 ^ (sp->srcu_idx & 1); 1045da915ad5SPaul E. McKenney if (!try_check_zero(sp, idx, 1)) { 1046da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_gp_mutex); 1047dad81a20SPaul E. McKenney return; /* readers present, retry later. */ 1048da915ad5SPaul E. McKenney } 1049dad81a20SPaul E. McKenney srcu_flip(sp); 1050dad81a20SPaul E. McKenney rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2); 1051dad81a20SPaul E. McKenney } 1052dad81a20SPaul E. McKenney 1053dad81a20SPaul E. McKenney if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { 1054dad81a20SPaul E. McKenney 1055dad81a20SPaul E. McKenney /* 1056dad81a20SPaul E. McKenney * SRCU read-side critical sections are normally short, 1057dad81a20SPaul E. McKenney * so check at least twice in quick succession after a flip. 1058dad81a20SPaul E. McKenney */ 1059da915ad5SPaul E. McKenney idx = 1 ^ (sp->srcu_idx & 1); 1060da915ad5SPaul E. McKenney if (!try_check_zero(sp, idx, 2)) { 1061da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_gp_mutex); 1062da915ad5SPaul E. McKenney return; /* readers present, retry later. */ 1063da915ad5SPaul E. McKenney } 1064da915ad5SPaul E. McKenney srcu_gp_end(sp); /* Releases ->srcu_gp_mutex. */ 1065dad81a20SPaul E. McKenney } 1066dad81a20SPaul E. McKenney } 1067dad81a20SPaul E. McKenney 1068dad81a20SPaul E. McKenney /* 1069dad81a20SPaul E. McKenney * Invoke a limited number of SRCU callbacks that have passed through 1070dad81a20SPaul E. McKenney * their grace period. If there are more to do, SRCU will reschedule 1071dad81a20SPaul E. McKenney * the workqueue. Note that needed memory barriers have been executed 1072dad81a20SPaul E. McKenney * in this task's context by srcu_readers_active_idx_check(). 1073dad81a20SPaul E. McKenney */ 1074da915ad5SPaul E. McKenney static void srcu_invoke_callbacks(struct work_struct *work) 1075dad81a20SPaul E. McKenney { 1076da915ad5SPaul E. McKenney bool more; 1077dad81a20SPaul E. McKenney struct rcu_cblist ready_cbs; 1078dad81a20SPaul E. McKenney struct rcu_head *rhp; 1079da915ad5SPaul E. McKenney struct srcu_data *sdp; 1080da915ad5SPaul E. McKenney struct srcu_struct *sp; 1081dad81a20SPaul E. McKenney 1082da915ad5SPaul E. McKenney sdp = container_of(work, struct srcu_data, work.work); 1083da915ad5SPaul E. McKenney sp = sdp->sp; 1084dad81a20SPaul E. McKenney rcu_cblist_init(&ready_cbs); 1085da915ad5SPaul E. McKenney spin_lock_irq(&sdp->lock); 1086da915ad5SPaul E. McKenney smp_mb(); /* Old grace periods before callback invocation! */ 1087da915ad5SPaul E. McKenney rcu_segcblist_advance(&sdp->srcu_cblist, 1088da915ad5SPaul E. McKenney rcu_seq_current(&sp->srcu_gp_seq)); 1089da915ad5SPaul E. McKenney if (sdp->srcu_cblist_invoking || 1090da915ad5SPaul E. McKenney !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { 1091da915ad5SPaul E. McKenney spin_unlock_irq(&sdp->lock); 1092da915ad5SPaul E. McKenney return; /* Someone else on the job or nothing to do. */ 1093da915ad5SPaul E. McKenney } 1094da915ad5SPaul E. McKenney 1095da915ad5SPaul E. McKenney /* We are on the job! Extract and invoke ready callbacks. */ 1096da915ad5SPaul E. McKenney sdp->srcu_cblist_invoking = true; 1097da915ad5SPaul E. McKenney rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); 1098da915ad5SPaul E. McKenney spin_unlock_irq(&sdp->lock); 1099dad81a20SPaul E. McKenney rhp = rcu_cblist_dequeue(&ready_cbs); 1100dad81a20SPaul E. McKenney for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { 1101dad81a20SPaul E. McKenney local_bh_disable(); 1102dad81a20SPaul E. McKenney rhp->func(rhp); 1103dad81a20SPaul E. McKenney local_bh_enable(); 1104dad81a20SPaul E. McKenney } 1105da915ad5SPaul E. McKenney 1106da915ad5SPaul E. McKenney /* 1107da915ad5SPaul E. McKenney * Update counts, accelerate new callbacks, and if needed, 1108da915ad5SPaul E. McKenney * schedule another round of callback invocation. 1109da915ad5SPaul E. McKenney */ 1110da915ad5SPaul E. McKenney spin_lock_irq(&sdp->lock); 1111da915ad5SPaul E. McKenney rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); 1112da915ad5SPaul E. McKenney (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, 1113da915ad5SPaul E. McKenney rcu_seq_snap(&sp->srcu_gp_seq)); 1114da915ad5SPaul E. McKenney sdp->srcu_cblist_invoking = false; 1115da915ad5SPaul E. McKenney more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); 1116da915ad5SPaul E. McKenney spin_unlock_irq(&sdp->lock); 1117da915ad5SPaul E. McKenney if (more) 1118da915ad5SPaul E. McKenney srcu_schedule_cbs_sdp(sdp, 0); 1119dad81a20SPaul E. McKenney } 1120dad81a20SPaul E. McKenney 1121dad81a20SPaul E. McKenney /* 1122dad81a20SPaul E. McKenney * Finished one round of SRCU grace period. Start another if there are 1123dad81a20SPaul E. McKenney * more SRCU callbacks queued, otherwise put SRCU into not-running state. 1124dad81a20SPaul E. McKenney */ 1125dad81a20SPaul E. McKenney static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) 1126dad81a20SPaul E. McKenney { 1127da915ad5SPaul E. McKenney bool pushgp = true; 1128dad81a20SPaul E. McKenney 1129da915ad5SPaul E. McKenney spin_lock_irq(&sp->gp_lock); 1130da915ad5SPaul E. McKenney if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { 1131da915ad5SPaul E. McKenney if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { 1132da915ad5SPaul E. McKenney /* All requests fulfilled, time to go idle. */ 1133da915ad5SPaul E. McKenney pushgp = false; 1134dad81a20SPaul E. McKenney } 1135da915ad5SPaul E. McKenney } else if (!rcu_seq_state(sp->srcu_gp_seq)) { 1136da915ad5SPaul E. McKenney /* Outstanding request and no GP. Start one. */ 1137da915ad5SPaul E. McKenney srcu_gp_start(sp); 1138da915ad5SPaul E. McKenney } 1139da915ad5SPaul E. McKenney spin_unlock_irq(&sp->gp_lock); 1140dad81a20SPaul E. McKenney 1141da915ad5SPaul E. McKenney if (pushgp) 1142dad81a20SPaul E. McKenney queue_delayed_work(system_power_efficient_wq, &sp->work, delay); 1143dad81a20SPaul E. McKenney } 1144dad81a20SPaul E. McKenney 1145dad81a20SPaul E. McKenney /* 1146dad81a20SPaul E. McKenney * This is the work-queue function that handles SRCU grace periods. 1147dad81a20SPaul E. McKenney */ 1148dad81a20SPaul E. McKenney void process_srcu(struct work_struct *work) 1149dad81a20SPaul E. McKenney { 1150dad81a20SPaul E. McKenney struct srcu_struct *sp; 1151dad81a20SPaul E. McKenney 1152dad81a20SPaul E. McKenney sp = container_of(work, struct srcu_struct, work.work); 1153dad81a20SPaul E. McKenney 1154da915ad5SPaul E. McKenney srcu_advance_state(sp); 11551e9a038bSPaul E. McKenney srcu_reschedule(sp, srcu_get_delay(sp)); 1156dad81a20SPaul E. McKenney } 1157dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(process_srcu); 11587f6733c3SPaul E. McKenney 11597f6733c3SPaul E. McKenney void srcutorture_get_gp_data(enum rcutorture_type test_type, 11607f6733c3SPaul E. McKenney struct srcu_struct *sp, int *flags, 11611e9a038bSPaul E. McKenney unsigned long *gpnum, unsigned long *completed) 11627f6733c3SPaul E. McKenney { 11637f6733c3SPaul E. McKenney if (test_type != SRCU_FLAVOR) 11647f6733c3SPaul E. McKenney return; 11657f6733c3SPaul E. McKenney *flags = 0; 11667f6733c3SPaul E. McKenney *completed = rcu_seq_ctr(sp->srcu_gp_seq); 11677f6733c3SPaul E. McKenney *gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed); 11687f6733c3SPaul E. McKenney } 11697f6733c3SPaul E. McKenney EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); 1170