1dad81a20SPaul E. McKenney /* 2dad81a20SPaul E. McKenney * Sleepable Read-Copy Update mechanism for mutual exclusion. 3dad81a20SPaul E. McKenney * 4dad81a20SPaul E. McKenney * This program is free software; you can redistribute it and/or modify 5dad81a20SPaul E. McKenney * it under the terms of the GNU General Public License as published by 6dad81a20SPaul E. McKenney * the Free Software Foundation; either version 2 of the License, or 7dad81a20SPaul E. McKenney * (at your option) any later version. 8dad81a20SPaul E. McKenney * 9dad81a20SPaul E. McKenney * This program is distributed in the hope that it will be useful, 10dad81a20SPaul E. McKenney * but WITHOUT ANY WARRANTY; without even the implied warranty of 11dad81a20SPaul E. McKenney * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12dad81a20SPaul E. McKenney * GNU General Public License for more details. 13dad81a20SPaul E. McKenney * 14dad81a20SPaul E. McKenney * You should have received a copy of the GNU General Public License 15dad81a20SPaul E. McKenney * along with this program; if not, you can access it online at 16dad81a20SPaul E. McKenney * http://www.gnu.org/licenses/gpl-2.0.html. 17dad81a20SPaul E. McKenney * 18dad81a20SPaul E. McKenney * Copyright (C) IBM Corporation, 2006 19dad81a20SPaul E. McKenney * Copyright (C) Fujitsu, 2012 20dad81a20SPaul E. McKenney * 21dad81a20SPaul E. McKenney * Author: Paul McKenney <paulmck@us.ibm.com> 22dad81a20SPaul E. McKenney * Lai Jiangshan <laijs@cn.fujitsu.com> 23dad81a20SPaul E. McKenney * 24dad81a20SPaul E. McKenney * For detailed explanation of Read-Copy Update mechanism see - 25dad81a20SPaul E. McKenney * Documentation/RCU/ *.txt 26dad81a20SPaul E. McKenney * 27dad81a20SPaul E. McKenney */ 28dad81a20SPaul E. McKenney 29dad81a20SPaul E. McKenney #include <linux/export.h> 30dad81a20SPaul E. McKenney #include <linux/mutex.h> 31dad81a20SPaul E. McKenney #include <linux/percpu.h> 32dad81a20SPaul E. McKenney #include <linux/preempt.h> 33dad81a20SPaul E. McKenney #include <linux/rcupdate_wait.h> 34dad81a20SPaul E. McKenney #include <linux/sched.h> 35dad81a20SPaul E. McKenney #include <linux/smp.h> 36dad81a20SPaul E. McKenney #include <linux/delay.h> 37*22607d66SPaul E. McKenney #include <linux/module.h> 38dad81a20SPaul E. McKenney #include <linux/srcu.h> 39dad81a20SPaul E. McKenney 40dad81a20SPaul E. McKenney #include "rcu.h" 41dad81a20SPaul E. McKenney 42*22607d66SPaul E. McKenney ulong exp_holdoff = 50 * 1000; /* Holdoff (ns) for auto-expediting. */ 43*22607d66SPaul E. McKenney module_param(exp_holdoff, ulong, 0444); 44*22607d66SPaul E. McKenney 45da915ad5SPaul E. McKenney static void srcu_invoke_callbacks(struct work_struct *work); 46da915ad5SPaul E. McKenney static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); 47da915ad5SPaul E. McKenney 48da915ad5SPaul E. McKenney /* 49da915ad5SPaul E. McKenney * Initialize SRCU combining tree. Note that statically allocated 50da915ad5SPaul E. McKenney * srcu_struct structures might already have srcu_read_lock() and 51da915ad5SPaul E. McKenney * srcu_read_unlock() running against them. So if the is_static parameter 52da915ad5SPaul E. McKenney * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[]. 53da915ad5SPaul E. McKenney */ 54da915ad5SPaul E. McKenney static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) 55dad81a20SPaul E. McKenney { 56da915ad5SPaul E. McKenney int cpu; 57da915ad5SPaul E. McKenney int i; 58da915ad5SPaul E. McKenney int level = 0; 59da915ad5SPaul E. McKenney int levelspread[RCU_NUM_LVLS]; 60da915ad5SPaul E. McKenney struct srcu_data *sdp; 61da915ad5SPaul E. McKenney struct srcu_node *snp; 62da915ad5SPaul E. McKenney struct srcu_node *snp_first; 63da915ad5SPaul E. McKenney 64da915ad5SPaul E. McKenney /* Work out the overall tree geometry. */ 65da915ad5SPaul E. McKenney sp->level[0] = &sp->node[0]; 66da915ad5SPaul E. McKenney for (i = 1; i < rcu_num_lvls; i++) 67da915ad5SPaul E. McKenney sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1]; 68da915ad5SPaul E. McKenney rcu_init_levelspread(levelspread, num_rcu_lvl); 69da915ad5SPaul E. McKenney 70da915ad5SPaul E. McKenney /* Each pass through this loop initializes one srcu_node structure. */ 71da915ad5SPaul E. McKenney rcu_for_each_node_breadth_first(sp, snp) { 72da915ad5SPaul E. McKenney spin_lock_init(&snp->lock); 73c7e88067SPaul E. McKenney WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != 74c7e88067SPaul E. McKenney ARRAY_SIZE(snp->srcu_data_have_cbs)); 75c7e88067SPaul E. McKenney for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { 76da915ad5SPaul E. McKenney snp->srcu_have_cbs[i] = 0; 77c7e88067SPaul E. McKenney snp->srcu_data_have_cbs[i] = 0; 78c7e88067SPaul E. McKenney } 791e9a038bSPaul E. McKenney snp->srcu_gp_seq_needed_exp = 0; 80da915ad5SPaul E. McKenney snp->grplo = -1; 81da915ad5SPaul E. McKenney snp->grphi = -1; 82da915ad5SPaul E. McKenney if (snp == &sp->node[0]) { 83da915ad5SPaul E. McKenney /* Root node, special case. */ 84da915ad5SPaul E. McKenney snp->srcu_parent = NULL; 85da915ad5SPaul E. McKenney continue; 86da915ad5SPaul E. McKenney } 87da915ad5SPaul E. McKenney 88da915ad5SPaul E. McKenney /* Non-root node. */ 89da915ad5SPaul E. McKenney if (snp == sp->level[level + 1]) 90da915ad5SPaul E. McKenney level++; 91da915ad5SPaul E. McKenney snp->srcu_parent = sp->level[level - 1] + 92da915ad5SPaul E. McKenney (snp - sp->level[level]) / 93da915ad5SPaul E. McKenney levelspread[level - 1]; 94da915ad5SPaul E. McKenney } 95da915ad5SPaul E. McKenney 96da915ad5SPaul E. McKenney /* 97da915ad5SPaul E. McKenney * Initialize the per-CPU srcu_data array, which feeds into the 98da915ad5SPaul E. McKenney * leaves of the srcu_node tree. 99da915ad5SPaul E. McKenney */ 100da915ad5SPaul E. McKenney WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) != 101da915ad5SPaul E. McKenney ARRAY_SIZE(sdp->srcu_unlock_count)); 102da915ad5SPaul E. McKenney level = rcu_num_lvls - 1; 103da915ad5SPaul E. McKenney snp_first = sp->level[level]; 104da915ad5SPaul E. McKenney for_each_possible_cpu(cpu) { 105da915ad5SPaul E. McKenney sdp = per_cpu_ptr(sp->sda, cpu); 106da915ad5SPaul E. McKenney spin_lock_init(&sdp->lock); 107da915ad5SPaul E. McKenney rcu_segcblist_init(&sdp->srcu_cblist); 108da915ad5SPaul E. McKenney sdp->srcu_cblist_invoking = false; 109da915ad5SPaul E. McKenney sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; 1101e9a038bSPaul E. McKenney sdp->srcu_gp_seq_needed_exp = sp->srcu_gp_seq; 111da915ad5SPaul E. McKenney sdp->mynode = &snp_first[cpu / levelspread[level]]; 112da915ad5SPaul E. McKenney for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) { 113da915ad5SPaul E. McKenney if (snp->grplo < 0) 114da915ad5SPaul E. McKenney snp->grplo = cpu; 115da915ad5SPaul E. McKenney snp->grphi = cpu; 116da915ad5SPaul E. McKenney } 117da915ad5SPaul E. McKenney sdp->cpu = cpu; 118da915ad5SPaul E. McKenney INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks); 119da915ad5SPaul E. McKenney sdp->sp = sp; 120c7e88067SPaul E. McKenney sdp->grpmask = 1 << (cpu - sdp->mynode->grplo); 121da915ad5SPaul E. McKenney if (is_static) 122da915ad5SPaul E. McKenney continue; 123da915ad5SPaul E. McKenney 124da915ad5SPaul E. McKenney /* Dynamically allocated, better be no srcu_read_locks()! */ 125da915ad5SPaul E. McKenney for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) { 126da915ad5SPaul E. McKenney sdp->srcu_lock_count[i] = 0; 127da915ad5SPaul E. McKenney sdp->srcu_unlock_count[i] = 0; 128da915ad5SPaul E. McKenney } 129da915ad5SPaul E. McKenney } 130da915ad5SPaul E. McKenney } 131da915ad5SPaul E. McKenney 132da915ad5SPaul E. McKenney /* 133da915ad5SPaul E. McKenney * Initialize non-compile-time initialized fields, including the 134da915ad5SPaul E. McKenney * associated srcu_node and srcu_data structures. The is_static 135da915ad5SPaul E. McKenney * parameter is passed through to init_srcu_struct_nodes(), and 136da915ad5SPaul E. McKenney * also tells us that ->sda has already been wired up to srcu_data. 137da915ad5SPaul E. McKenney */ 138da915ad5SPaul E. McKenney static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static) 139da915ad5SPaul E. McKenney { 140da915ad5SPaul E. McKenney mutex_init(&sp->srcu_cb_mutex); 141da915ad5SPaul E. McKenney mutex_init(&sp->srcu_gp_mutex); 142da915ad5SPaul E. McKenney sp->srcu_idx = 0; 143dad81a20SPaul E. McKenney sp->srcu_gp_seq = 0; 144da915ad5SPaul E. McKenney sp->srcu_barrier_seq = 0; 145da915ad5SPaul E. McKenney mutex_init(&sp->srcu_barrier_mutex); 146da915ad5SPaul E. McKenney atomic_set(&sp->srcu_barrier_cpu_cnt, 0); 147dad81a20SPaul E. McKenney INIT_DELAYED_WORK(&sp->work, process_srcu); 148da915ad5SPaul E. McKenney if (!is_static) 149da915ad5SPaul E. McKenney sp->sda = alloc_percpu(struct srcu_data); 150da915ad5SPaul E. McKenney init_srcu_struct_nodes(sp, is_static); 1511e9a038bSPaul E. McKenney sp->srcu_gp_seq_needed_exp = 0; 152*22607d66SPaul E. McKenney sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); 153da915ad5SPaul E. McKenney smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */ 154da915ad5SPaul E. McKenney return sp->sda ? 0 : -ENOMEM; 155dad81a20SPaul E. McKenney } 156dad81a20SPaul E. McKenney 157dad81a20SPaul E. McKenney #ifdef CONFIG_DEBUG_LOCK_ALLOC 158dad81a20SPaul E. McKenney 159dad81a20SPaul E. McKenney int __init_srcu_struct(struct srcu_struct *sp, const char *name, 160dad81a20SPaul E. McKenney struct lock_class_key *key) 161dad81a20SPaul E. McKenney { 162dad81a20SPaul E. McKenney /* Don't re-initialize a lock while it is held. */ 163dad81a20SPaul E. McKenney debug_check_no_locks_freed((void *)sp, sizeof(*sp)); 164dad81a20SPaul E. McKenney lockdep_init_map(&sp->dep_map, name, key, 0); 165da915ad5SPaul E. McKenney spin_lock_init(&sp->gp_lock); 166da915ad5SPaul E. McKenney return init_srcu_struct_fields(sp, false); 167dad81a20SPaul E. McKenney } 168dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(__init_srcu_struct); 169dad81a20SPaul E. McKenney 170dad81a20SPaul E. McKenney #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 171dad81a20SPaul E. McKenney 172dad81a20SPaul E. McKenney /** 173dad81a20SPaul E. McKenney * init_srcu_struct - initialize a sleep-RCU structure 174dad81a20SPaul E. McKenney * @sp: structure to initialize. 175dad81a20SPaul E. McKenney * 176dad81a20SPaul E. McKenney * Must invoke this on a given srcu_struct before passing that srcu_struct 177dad81a20SPaul E. McKenney * to any other function. Each srcu_struct represents a separate domain 178dad81a20SPaul E. McKenney * of SRCU protection. 179dad81a20SPaul E. McKenney */ 180dad81a20SPaul E. McKenney int init_srcu_struct(struct srcu_struct *sp) 181dad81a20SPaul E. McKenney { 182da915ad5SPaul E. McKenney spin_lock_init(&sp->gp_lock); 183da915ad5SPaul E. McKenney return init_srcu_struct_fields(sp, false); 184dad81a20SPaul E. McKenney } 185dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(init_srcu_struct); 186dad81a20SPaul E. McKenney 187dad81a20SPaul E. McKenney #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 188dad81a20SPaul E. McKenney 189dad81a20SPaul E. McKenney /* 190da915ad5SPaul E. McKenney * First-use initialization of statically allocated srcu_struct 191da915ad5SPaul E. McKenney * structure. Wiring up the combining tree is more than can be 192da915ad5SPaul E. McKenney * done with compile-time initialization, so this check is added 193da915ad5SPaul E. McKenney * to each update-side SRCU primitive. Use ->gp_lock, which -is- 194da915ad5SPaul E. McKenney * compile-time initialized, to resolve races involving multiple 195da915ad5SPaul E. McKenney * CPUs trying to garner first-use privileges. 196da915ad5SPaul E. McKenney */ 197da915ad5SPaul E. McKenney static void check_init_srcu_struct(struct srcu_struct *sp) 198da915ad5SPaul E. McKenney { 199da915ad5SPaul E. McKenney unsigned long flags; 200da915ad5SPaul E. McKenney 201da915ad5SPaul E. McKenney WARN_ON_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INIT); 202da915ad5SPaul E. McKenney /* The smp_load_acquire() pairs with the smp_store_release(). */ 203da915ad5SPaul E. McKenney if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ 204da915ad5SPaul E. McKenney return; /* Already initialized. */ 205da915ad5SPaul E. McKenney spin_lock_irqsave(&sp->gp_lock, flags); 206da915ad5SPaul E. McKenney if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { 207da915ad5SPaul E. McKenney spin_unlock_irqrestore(&sp->gp_lock, flags); 208da915ad5SPaul E. McKenney return; 209da915ad5SPaul E. McKenney } 210da915ad5SPaul E. McKenney init_srcu_struct_fields(sp, true); 211da915ad5SPaul E. McKenney spin_unlock_irqrestore(&sp->gp_lock, flags); 212da915ad5SPaul E. McKenney } 213da915ad5SPaul E. McKenney 214da915ad5SPaul E. McKenney /* 215da915ad5SPaul E. McKenney * Returns approximate total of the readers' ->srcu_lock_count[] values 216da915ad5SPaul E. McKenney * for the rank of per-CPU counters specified by idx. 217dad81a20SPaul E. McKenney */ 218dad81a20SPaul E. McKenney static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx) 219dad81a20SPaul E. McKenney { 220dad81a20SPaul E. McKenney int cpu; 221dad81a20SPaul E. McKenney unsigned long sum = 0; 222dad81a20SPaul E. McKenney 223dad81a20SPaul E. McKenney for_each_possible_cpu(cpu) { 224da915ad5SPaul E. McKenney struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); 225dad81a20SPaul E. McKenney 226da915ad5SPaul E. McKenney sum += READ_ONCE(cpuc->srcu_lock_count[idx]); 227dad81a20SPaul E. McKenney } 228dad81a20SPaul E. McKenney return sum; 229dad81a20SPaul E. McKenney } 230dad81a20SPaul E. McKenney 231dad81a20SPaul E. McKenney /* 232da915ad5SPaul E. McKenney * Returns approximate total of the readers' ->srcu_unlock_count[] values 233da915ad5SPaul E. McKenney * for the rank of per-CPU counters specified by idx. 234dad81a20SPaul E. McKenney */ 235dad81a20SPaul E. McKenney static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx) 236dad81a20SPaul E. McKenney { 237dad81a20SPaul E. McKenney int cpu; 238dad81a20SPaul E. McKenney unsigned long sum = 0; 239dad81a20SPaul E. McKenney 240dad81a20SPaul E. McKenney for_each_possible_cpu(cpu) { 241da915ad5SPaul E. McKenney struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); 242dad81a20SPaul E. McKenney 243da915ad5SPaul E. McKenney sum += READ_ONCE(cpuc->srcu_unlock_count[idx]); 244dad81a20SPaul E. McKenney } 245dad81a20SPaul E. McKenney return sum; 246dad81a20SPaul E. McKenney } 247dad81a20SPaul E. McKenney 248dad81a20SPaul E. McKenney /* 249dad81a20SPaul E. McKenney * Return true if the number of pre-existing readers is determined to 250dad81a20SPaul E. McKenney * be zero. 251dad81a20SPaul E. McKenney */ 252dad81a20SPaul E. McKenney static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) 253dad81a20SPaul E. McKenney { 254dad81a20SPaul E. McKenney unsigned long unlocks; 255dad81a20SPaul E. McKenney 256dad81a20SPaul E. McKenney unlocks = srcu_readers_unlock_idx(sp, idx); 257dad81a20SPaul E. McKenney 258dad81a20SPaul E. McKenney /* 259dad81a20SPaul E. McKenney * Make sure that a lock is always counted if the corresponding 260dad81a20SPaul E. McKenney * unlock is counted. Needs to be a smp_mb() as the read side may 261dad81a20SPaul E. McKenney * contain a read from a variable that is written to before the 262dad81a20SPaul E. McKenney * synchronize_srcu() in the write side. In this case smp_mb()s 263dad81a20SPaul E. McKenney * A and B act like the store buffering pattern. 264dad81a20SPaul E. McKenney * 265dad81a20SPaul E. McKenney * This smp_mb() also pairs with smp_mb() C to prevent accesses 266dad81a20SPaul E. McKenney * after the synchronize_srcu() from being executed before the 267dad81a20SPaul E. McKenney * grace period ends. 268dad81a20SPaul E. McKenney */ 269dad81a20SPaul E. McKenney smp_mb(); /* A */ 270dad81a20SPaul E. McKenney 271dad81a20SPaul E. McKenney /* 272dad81a20SPaul E. McKenney * If the locks are the same as the unlocks, then there must have 273dad81a20SPaul E. McKenney * been no readers on this index at some time in between. This does 274dad81a20SPaul E. McKenney * not mean that there are no more readers, as one could have read 275dad81a20SPaul E. McKenney * the current index but not have incremented the lock counter yet. 276dad81a20SPaul E. McKenney * 277dad81a20SPaul E. McKenney * Possible bug: There is no guarantee that there haven't been 278da915ad5SPaul E. McKenney * ULONG_MAX increments of ->srcu_lock_count[] since the unlocks were 279dad81a20SPaul E. McKenney * counted, meaning that this could return true even if there are 280dad81a20SPaul E. McKenney * still active readers. Since there are no memory barriers around 281da915ad5SPaul E. McKenney * srcu_flip(), the CPU is not required to increment ->srcu_idx 282dad81a20SPaul E. McKenney * before running srcu_readers_unlock_idx(), which means that there 283dad81a20SPaul E. McKenney * could be an arbitrarily large number of critical sections that 284dad81a20SPaul E. McKenney * execute after srcu_readers_unlock_idx() but use the old value 285da915ad5SPaul E. McKenney * of ->srcu_idx. 286dad81a20SPaul E. McKenney */ 287dad81a20SPaul E. McKenney return srcu_readers_lock_idx(sp, idx) == unlocks; 288dad81a20SPaul E. McKenney } 289dad81a20SPaul E. McKenney 290dad81a20SPaul E. McKenney /** 291dad81a20SPaul E. McKenney * srcu_readers_active - returns true if there are readers. and false 292dad81a20SPaul E. McKenney * otherwise 293dad81a20SPaul E. McKenney * @sp: which srcu_struct to count active readers (holding srcu_read_lock). 294dad81a20SPaul E. McKenney * 295dad81a20SPaul E. McKenney * Note that this is not an atomic primitive, and can therefore suffer 296dad81a20SPaul E. McKenney * severe errors when invoked on an active srcu_struct. That said, it 297dad81a20SPaul E. McKenney * can be useful as an error check at cleanup time. 298dad81a20SPaul E. McKenney */ 299dad81a20SPaul E. McKenney static bool srcu_readers_active(struct srcu_struct *sp) 300dad81a20SPaul E. McKenney { 301dad81a20SPaul E. McKenney int cpu; 302dad81a20SPaul E. McKenney unsigned long sum = 0; 303dad81a20SPaul E. McKenney 304dad81a20SPaul E. McKenney for_each_possible_cpu(cpu) { 305da915ad5SPaul E. McKenney struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); 306dad81a20SPaul E. McKenney 307da915ad5SPaul E. McKenney sum += READ_ONCE(cpuc->srcu_lock_count[0]); 308da915ad5SPaul E. McKenney sum += READ_ONCE(cpuc->srcu_lock_count[1]); 309da915ad5SPaul E. McKenney sum -= READ_ONCE(cpuc->srcu_unlock_count[0]); 310da915ad5SPaul E. McKenney sum -= READ_ONCE(cpuc->srcu_unlock_count[1]); 311dad81a20SPaul E. McKenney } 312dad81a20SPaul E. McKenney return sum; 313dad81a20SPaul E. McKenney } 314dad81a20SPaul E. McKenney 315dad81a20SPaul E. McKenney #define SRCU_INTERVAL 1 316dad81a20SPaul E. McKenney 3171e9a038bSPaul E. McKenney /* 3181e9a038bSPaul E. McKenney * Return grace-period delay, zero if there are expedited grace 3191e9a038bSPaul E. McKenney * periods pending, SRCU_INTERVAL otherwise. 3201e9a038bSPaul E. McKenney */ 3211e9a038bSPaul E. McKenney static unsigned long srcu_get_delay(struct srcu_struct *sp) 3221e9a038bSPaul E. McKenney { 3231e9a038bSPaul E. McKenney if (ULONG_CMP_LT(READ_ONCE(sp->srcu_gp_seq), 3241e9a038bSPaul E. McKenney READ_ONCE(sp->srcu_gp_seq_needed_exp))) 3251e9a038bSPaul E. McKenney return 0; 3261e9a038bSPaul E. McKenney return SRCU_INTERVAL; 3271e9a038bSPaul E. McKenney } 3281e9a038bSPaul E. McKenney 329dad81a20SPaul E. McKenney /** 330dad81a20SPaul E. McKenney * cleanup_srcu_struct - deconstruct a sleep-RCU structure 331dad81a20SPaul E. McKenney * @sp: structure to clean up. 332dad81a20SPaul E. McKenney * 333dad81a20SPaul E. McKenney * Must invoke this after you are finished using a given srcu_struct that 334dad81a20SPaul E. McKenney * was initialized via init_srcu_struct(), else you leak memory. 335dad81a20SPaul E. McKenney */ 336dad81a20SPaul E. McKenney void cleanup_srcu_struct(struct srcu_struct *sp) 337dad81a20SPaul E. McKenney { 338da915ad5SPaul E. McKenney int cpu; 339da915ad5SPaul E. McKenney 3401e9a038bSPaul E. McKenney if (WARN_ON(!srcu_get_delay(sp))) 3411e9a038bSPaul E. McKenney return; /* Leakage unless caller handles error. */ 342dad81a20SPaul E. McKenney if (WARN_ON(srcu_readers_active(sp))) 343dad81a20SPaul E. McKenney return; /* Leakage unless caller handles error. */ 344dad81a20SPaul E. McKenney flush_delayed_work(&sp->work); 345da915ad5SPaul E. McKenney for_each_possible_cpu(cpu) 346da915ad5SPaul E. McKenney flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); 347da915ad5SPaul E. McKenney if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || 348da915ad5SPaul E. McKenney WARN_ON(srcu_readers_active(sp))) { 349da915ad5SPaul E. McKenney pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); 350dad81a20SPaul E. McKenney return; /* Caller forgot to stop doing call_srcu()? */ 351dad81a20SPaul E. McKenney } 352da915ad5SPaul E. McKenney free_percpu(sp->sda); 353da915ad5SPaul E. McKenney sp->sda = NULL; 354dad81a20SPaul E. McKenney } 355dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(cleanup_srcu_struct); 356dad81a20SPaul E. McKenney 357dad81a20SPaul E. McKenney /* 358dad81a20SPaul E. McKenney * Counts the new reader in the appropriate per-CPU element of the 359dad81a20SPaul E. McKenney * srcu_struct. Must be called from process context. 360dad81a20SPaul E. McKenney * Returns an index that must be passed to the matching srcu_read_unlock(). 361dad81a20SPaul E. McKenney */ 362dad81a20SPaul E. McKenney int __srcu_read_lock(struct srcu_struct *sp) 363dad81a20SPaul E. McKenney { 364dad81a20SPaul E. McKenney int idx; 365dad81a20SPaul E. McKenney 366da915ad5SPaul E. McKenney idx = READ_ONCE(sp->srcu_idx) & 0x1; 367da915ad5SPaul E. McKenney __this_cpu_inc(sp->sda->srcu_lock_count[idx]); 368dad81a20SPaul E. McKenney smp_mb(); /* B */ /* Avoid leaking the critical section. */ 369dad81a20SPaul E. McKenney return idx; 370dad81a20SPaul E. McKenney } 371dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(__srcu_read_lock); 372dad81a20SPaul E. McKenney 373dad81a20SPaul E. McKenney /* 374dad81a20SPaul E. McKenney * Removes the count for the old reader from the appropriate per-CPU 375dad81a20SPaul E. McKenney * element of the srcu_struct. Note that this may well be a different 376dad81a20SPaul E. McKenney * CPU than that which was incremented by the corresponding srcu_read_lock(). 377dad81a20SPaul E. McKenney * Must be called from process context. 378dad81a20SPaul E. McKenney */ 379dad81a20SPaul E. McKenney void __srcu_read_unlock(struct srcu_struct *sp, int idx) 380dad81a20SPaul E. McKenney { 381dad81a20SPaul E. McKenney smp_mb(); /* C */ /* Avoid leaking the critical section. */ 382da915ad5SPaul E. McKenney this_cpu_inc(sp->sda->srcu_unlock_count[idx]); 383dad81a20SPaul E. McKenney } 384dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(__srcu_read_unlock); 385dad81a20SPaul E. McKenney 386dad81a20SPaul E. McKenney /* 387dad81a20SPaul E. McKenney * We use an adaptive strategy for synchronize_srcu() and especially for 388dad81a20SPaul E. McKenney * synchronize_srcu_expedited(). We spin for a fixed time period 389dad81a20SPaul E. McKenney * (defined below) to allow SRCU readers to exit their read-side critical 390dad81a20SPaul E. McKenney * sections. If there are still some readers after a few microseconds, 391dad81a20SPaul E. McKenney * we repeatedly block for 1-millisecond time periods. 392dad81a20SPaul E. McKenney */ 393dad81a20SPaul E. McKenney #define SRCU_RETRY_CHECK_DELAY 5 394dad81a20SPaul E. McKenney 395dad81a20SPaul E. McKenney /* 396dad81a20SPaul E. McKenney * Start an SRCU grace period. 397dad81a20SPaul E. McKenney */ 398dad81a20SPaul E. McKenney static void srcu_gp_start(struct srcu_struct *sp) 399dad81a20SPaul E. McKenney { 400da915ad5SPaul E. McKenney struct srcu_data *sdp = this_cpu_ptr(sp->sda); 401dad81a20SPaul E. McKenney int state; 402dad81a20SPaul E. McKenney 403da915ad5SPaul E. McKenney RCU_LOCKDEP_WARN(!lockdep_is_held(&sp->gp_lock), 404da915ad5SPaul E. McKenney "Invoked srcu_gp_start() without ->gp_lock!"); 405da915ad5SPaul E. McKenney WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); 406da915ad5SPaul E. McKenney rcu_segcblist_advance(&sdp->srcu_cblist, 407da915ad5SPaul E. McKenney rcu_seq_current(&sp->srcu_gp_seq)); 408da915ad5SPaul E. McKenney (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, 409dad81a20SPaul E. McKenney rcu_seq_snap(&sp->srcu_gp_seq)); 4102da4b2a7SPaul E. McKenney smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ 411dad81a20SPaul E. McKenney rcu_seq_start(&sp->srcu_gp_seq); 412dad81a20SPaul E. McKenney state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); 413dad81a20SPaul E. McKenney WARN_ON_ONCE(state != SRCU_STATE_SCAN1); 414dad81a20SPaul E. McKenney } 415dad81a20SPaul E. McKenney 416dad81a20SPaul E. McKenney /* 417da915ad5SPaul E. McKenney * Track online CPUs to guide callback workqueue placement. 418da915ad5SPaul E. McKenney */ 419da915ad5SPaul E. McKenney DEFINE_PER_CPU(bool, srcu_online); 420da915ad5SPaul E. McKenney 421da915ad5SPaul E. McKenney void srcu_online_cpu(unsigned int cpu) 422da915ad5SPaul E. McKenney { 423da915ad5SPaul E. McKenney WRITE_ONCE(per_cpu(srcu_online, cpu), true); 424da915ad5SPaul E. McKenney } 425da915ad5SPaul E. McKenney 426da915ad5SPaul E. McKenney void srcu_offline_cpu(unsigned int cpu) 427da915ad5SPaul E. McKenney { 428da915ad5SPaul E. McKenney WRITE_ONCE(per_cpu(srcu_online, cpu), false); 429da915ad5SPaul E. McKenney } 430da915ad5SPaul E. McKenney 431da915ad5SPaul E. McKenney /* 432da915ad5SPaul E. McKenney * Place the workqueue handler on the specified CPU if online, otherwise 433da915ad5SPaul E. McKenney * just run it whereever. This is useful for placing workqueue handlers 434da915ad5SPaul E. McKenney * that are to invoke the specified CPU's callbacks. 435da915ad5SPaul E. McKenney */ 436da915ad5SPaul E. McKenney static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, 437da915ad5SPaul E. McKenney struct delayed_work *dwork, 438da915ad5SPaul E. McKenney unsigned long delay) 439da915ad5SPaul E. McKenney { 440da915ad5SPaul E. McKenney bool ret; 441da915ad5SPaul E. McKenney 442da915ad5SPaul E. McKenney preempt_disable(); 443da915ad5SPaul E. McKenney if (READ_ONCE(per_cpu(srcu_online, cpu))) 444da915ad5SPaul E. McKenney ret = queue_delayed_work_on(cpu, wq, dwork, delay); 445da915ad5SPaul E. McKenney else 446da915ad5SPaul E. McKenney ret = queue_delayed_work(wq, dwork, delay); 447da915ad5SPaul E. McKenney preempt_enable(); 448da915ad5SPaul E. McKenney return ret; 449da915ad5SPaul E. McKenney } 450da915ad5SPaul E. McKenney 451da915ad5SPaul E. McKenney /* 452da915ad5SPaul E. McKenney * Schedule callback invocation for the specified srcu_data structure, 453da915ad5SPaul E. McKenney * if possible, on the corresponding CPU. 454da915ad5SPaul E. McKenney */ 455da915ad5SPaul E. McKenney static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) 456da915ad5SPaul E. McKenney { 457da915ad5SPaul E. McKenney srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq, 458da915ad5SPaul E. McKenney &sdp->work, delay); 459da915ad5SPaul E. McKenney } 460da915ad5SPaul E. McKenney 461da915ad5SPaul E. McKenney /* 462da915ad5SPaul E. McKenney * Schedule callback invocation for all srcu_data structures associated 463c7e88067SPaul E. McKenney * with the specified srcu_node structure that have callbacks for the 464c7e88067SPaul E. McKenney * just-completed grace period, the one corresponding to idx. If possible, 465c7e88067SPaul E. McKenney * schedule this invocation on the corresponding CPUs. 466da915ad5SPaul E. McKenney */ 467c7e88067SPaul E. McKenney static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, 4681e9a038bSPaul E. McKenney unsigned long mask, unsigned long delay) 469da915ad5SPaul E. McKenney { 470da915ad5SPaul E. McKenney int cpu; 471da915ad5SPaul E. McKenney 472c7e88067SPaul E. McKenney for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { 473c7e88067SPaul E. McKenney if (!(mask & (1 << (cpu - snp->grplo)))) 474c7e88067SPaul E. McKenney continue; 4751e9a038bSPaul E. McKenney srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), delay); 476da915ad5SPaul E. McKenney } 477c7e88067SPaul E. McKenney } 478da915ad5SPaul E. McKenney 479da915ad5SPaul E. McKenney /* 480da915ad5SPaul E. McKenney * Note the end of an SRCU grace period. Initiates callback invocation 481da915ad5SPaul E. McKenney * and starts a new grace period if needed. 482da915ad5SPaul E. McKenney * 483da915ad5SPaul E. McKenney * The ->srcu_cb_mutex acquisition does not protect any data, but 484da915ad5SPaul E. McKenney * instead prevents more than one grace period from starting while we 485da915ad5SPaul E. McKenney * are initiating callback invocation. This allows the ->srcu_have_cbs[] 486da915ad5SPaul E. McKenney * array to have a finite number of elements. 487da915ad5SPaul E. McKenney */ 488da915ad5SPaul E. McKenney static void srcu_gp_end(struct srcu_struct *sp) 489da915ad5SPaul E. McKenney { 4901e9a038bSPaul E. McKenney unsigned long cbdelay; 491da915ad5SPaul E. McKenney bool cbs; 492da915ad5SPaul E. McKenney unsigned long gpseq; 493da915ad5SPaul E. McKenney int idx; 494da915ad5SPaul E. McKenney int idxnext; 495c7e88067SPaul E. McKenney unsigned long mask; 496da915ad5SPaul E. McKenney struct srcu_node *snp; 497da915ad5SPaul E. McKenney 498da915ad5SPaul E. McKenney /* Prevent more than one additional grace period. */ 499da915ad5SPaul E. McKenney mutex_lock(&sp->srcu_cb_mutex); 500da915ad5SPaul E. McKenney 501da915ad5SPaul E. McKenney /* End the current grace period. */ 502da915ad5SPaul E. McKenney spin_lock_irq(&sp->gp_lock); 503da915ad5SPaul E. McKenney idx = rcu_seq_state(sp->srcu_gp_seq); 504da915ad5SPaul E. McKenney WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); 5051e9a038bSPaul E. McKenney cbdelay = srcu_get_delay(sp); 506*22607d66SPaul E. McKenney sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); 507da915ad5SPaul E. McKenney rcu_seq_end(&sp->srcu_gp_seq); 508da915ad5SPaul E. McKenney gpseq = rcu_seq_current(&sp->srcu_gp_seq); 5091e9a038bSPaul E. McKenney if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) 5101e9a038bSPaul E. McKenney sp->srcu_gp_seq_needed_exp = gpseq; 511da915ad5SPaul E. McKenney spin_unlock_irq(&sp->gp_lock); 512da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_gp_mutex); 513da915ad5SPaul E. McKenney /* A new grace period can start at this point. But only one. */ 514da915ad5SPaul E. McKenney 515da915ad5SPaul E. McKenney /* Initiate callback invocation as needed. */ 516da915ad5SPaul E. McKenney idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); 517da915ad5SPaul E. McKenney idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs); 518da915ad5SPaul E. McKenney rcu_for_each_node_breadth_first(sp, snp) { 519da915ad5SPaul E. McKenney spin_lock_irq(&snp->lock); 520da915ad5SPaul E. McKenney cbs = false; 521da915ad5SPaul E. McKenney if (snp >= sp->level[rcu_num_lvls - 1]) 522da915ad5SPaul E. McKenney cbs = snp->srcu_have_cbs[idx] == gpseq; 523da915ad5SPaul E. McKenney snp->srcu_have_cbs[idx] = gpseq; 524da915ad5SPaul E. McKenney rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1); 5251e9a038bSPaul E. McKenney if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq)) 5261e9a038bSPaul E. McKenney snp->srcu_gp_seq_needed_exp = gpseq; 527c7e88067SPaul E. McKenney mask = snp->srcu_data_have_cbs[idx]; 528c7e88067SPaul E. McKenney snp->srcu_data_have_cbs[idx] = 0; 529da915ad5SPaul E. McKenney spin_unlock_irq(&snp->lock); 530da915ad5SPaul E. McKenney if (cbs) { 531da915ad5SPaul E. McKenney smp_mb(); /* GP end before CB invocation. */ 5321e9a038bSPaul E. McKenney srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); 533da915ad5SPaul E. McKenney } 534da915ad5SPaul E. McKenney } 535da915ad5SPaul E. McKenney 536da915ad5SPaul E. McKenney /* Callback initiation done, allow grace periods after next. */ 537da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_cb_mutex); 538da915ad5SPaul E. McKenney 539da915ad5SPaul E. McKenney /* Start a new grace period if needed. */ 540da915ad5SPaul E. McKenney spin_lock_irq(&sp->gp_lock); 541da915ad5SPaul E. McKenney gpseq = rcu_seq_current(&sp->srcu_gp_seq); 542da915ad5SPaul E. McKenney if (!rcu_seq_state(gpseq) && 543da915ad5SPaul E. McKenney ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { 544da915ad5SPaul E. McKenney srcu_gp_start(sp); 545da915ad5SPaul E. McKenney spin_unlock_irq(&sp->gp_lock); 546da915ad5SPaul E. McKenney /* Throttle expedited grace periods: Should be rare! */ 5471e9a038bSPaul E. McKenney srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff 5481e9a038bSPaul E. McKenney ? 0 : SRCU_INTERVAL); 549da915ad5SPaul E. McKenney } else { 550da915ad5SPaul E. McKenney spin_unlock_irq(&sp->gp_lock); 551da915ad5SPaul E. McKenney } 552da915ad5SPaul E. McKenney } 553da915ad5SPaul E. McKenney 554da915ad5SPaul E. McKenney /* 5551e9a038bSPaul E. McKenney * Funnel-locking scheme to scalably mediate many concurrent expedited 5561e9a038bSPaul E. McKenney * grace-period requests. This function is invoked for the first known 5571e9a038bSPaul E. McKenney * expedited request for a grace period that has already been requested, 5581e9a038bSPaul E. McKenney * but without expediting. To start a completely new grace period, 5591e9a038bSPaul E. McKenney * whether expedited or not, use srcu_funnel_gp_start() instead. 5601e9a038bSPaul E. McKenney */ 5611e9a038bSPaul E. McKenney static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, 5621e9a038bSPaul E. McKenney unsigned long s) 5631e9a038bSPaul E. McKenney { 5641e9a038bSPaul E. McKenney unsigned long flags; 5651e9a038bSPaul E. McKenney 5661e9a038bSPaul E. McKenney for (; snp != NULL; snp = snp->srcu_parent) { 5671e9a038bSPaul E. McKenney if (rcu_seq_done(&sp->srcu_gp_seq, s) || 5681e9a038bSPaul E. McKenney ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) 5691e9a038bSPaul E. McKenney return; 5701e9a038bSPaul E. McKenney spin_lock_irqsave(&snp->lock, flags); 5711e9a038bSPaul E. McKenney if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) { 5721e9a038bSPaul E. McKenney spin_unlock_irqrestore(&snp->lock, flags); 5731e9a038bSPaul E. McKenney return; 5741e9a038bSPaul E. McKenney } 5751e9a038bSPaul E. McKenney WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); 5761e9a038bSPaul E. McKenney spin_unlock_irqrestore(&snp->lock, flags); 5771e9a038bSPaul E. McKenney } 5781e9a038bSPaul E. McKenney spin_lock_irqsave(&sp->gp_lock, flags); 5791e9a038bSPaul E. McKenney if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) 5801e9a038bSPaul E. McKenney sp->srcu_gp_seq_needed_exp = s; 5811e9a038bSPaul E. McKenney spin_unlock_irqrestore(&sp->gp_lock, flags); 5821e9a038bSPaul E. McKenney } 5831e9a038bSPaul E. McKenney 5841e9a038bSPaul E. McKenney /* 585da915ad5SPaul E. McKenney * Funnel-locking scheme to scalably mediate many concurrent grace-period 586da915ad5SPaul E. McKenney * requests. The winner has to do the work of actually starting grace 587da915ad5SPaul E. McKenney * period s. Losers must either ensure that their desired grace-period 588da915ad5SPaul E. McKenney * number is recorded on at least their leaf srcu_node structure, or they 589da915ad5SPaul E. McKenney * must take steps to invoke their own callbacks. 590da915ad5SPaul E. McKenney */ 5911e9a038bSPaul E. McKenney static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, 5921e9a038bSPaul E. McKenney unsigned long s, bool do_norm) 593da915ad5SPaul E. McKenney { 594da915ad5SPaul E. McKenney unsigned long flags; 595da915ad5SPaul E. McKenney int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs); 596da915ad5SPaul E. McKenney struct srcu_node *snp = sdp->mynode; 597da915ad5SPaul E. McKenney unsigned long snp_seq; 598da915ad5SPaul E. McKenney 599da915ad5SPaul E. McKenney /* Each pass through the loop does one level of the srcu_node tree. */ 600da915ad5SPaul E. McKenney for (; snp != NULL; snp = snp->srcu_parent) { 601da915ad5SPaul E. McKenney if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) 602da915ad5SPaul E. McKenney return; /* GP already done and CBs recorded. */ 603da915ad5SPaul E. McKenney spin_lock_irqsave(&snp->lock, flags); 604da915ad5SPaul E. McKenney if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { 605da915ad5SPaul E. McKenney snp_seq = snp->srcu_have_cbs[idx]; 606c7e88067SPaul E. McKenney if (snp == sdp->mynode && snp_seq == s) 607c7e88067SPaul E. McKenney snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 608da915ad5SPaul E. McKenney spin_unlock_irqrestore(&snp->lock, flags); 609da915ad5SPaul E. McKenney if (snp == sdp->mynode && snp_seq != s) { 610da915ad5SPaul E. McKenney smp_mb(); /* CBs after GP! */ 6111e9a038bSPaul E. McKenney srcu_schedule_cbs_sdp(sdp, do_norm 6121e9a038bSPaul E. McKenney ? SRCU_INTERVAL 6131e9a038bSPaul E. McKenney : 0); 6141e9a038bSPaul E. McKenney return; 615da915ad5SPaul E. McKenney } 6161e9a038bSPaul E. McKenney if (!do_norm) 6171e9a038bSPaul E. McKenney srcu_funnel_exp_start(sp, snp, s); 618da915ad5SPaul E. McKenney return; 619da915ad5SPaul E. McKenney } 620da915ad5SPaul E. McKenney snp->srcu_have_cbs[idx] = s; 621c7e88067SPaul E. McKenney if (snp == sdp->mynode) 622c7e88067SPaul E. McKenney snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 6231e9a038bSPaul E. McKenney if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s)) 6241e9a038bSPaul E. McKenney snp->srcu_gp_seq_needed_exp = s; 625da915ad5SPaul E. McKenney spin_unlock_irqrestore(&snp->lock, flags); 626da915ad5SPaul E. McKenney } 627da915ad5SPaul E. McKenney 628da915ad5SPaul E. McKenney /* Top of tree, must ensure the grace period will be started. */ 629da915ad5SPaul E. McKenney spin_lock_irqsave(&sp->gp_lock, flags); 630da915ad5SPaul E. McKenney if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { 631da915ad5SPaul E. McKenney /* 632da915ad5SPaul E. McKenney * Record need for grace period s. Pair with load 633da915ad5SPaul E. McKenney * acquire setting up for initialization. 634da915ad5SPaul E. McKenney */ 635da915ad5SPaul E. McKenney smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/ 636da915ad5SPaul E. McKenney } 6371e9a038bSPaul E. McKenney if (!do_norm && ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) 6381e9a038bSPaul E. McKenney sp->srcu_gp_seq_needed_exp = s; 639da915ad5SPaul E. McKenney 640da915ad5SPaul E. McKenney /* If grace period not already done and none in progress, start it. */ 641da915ad5SPaul E. McKenney if (!rcu_seq_done(&sp->srcu_gp_seq, s) && 642da915ad5SPaul E. McKenney rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) { 643da915ad5SPaul E. McKenney WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); 644da915ad5SPaul E. McKenney srcu_gp_start(sp); 645da915ad5SPaul E. McKenney queue_delayed_work(system_power_efficient_wq, &sp->work, 6461e9a038bSPaul E. McKenney srcu_get_delay(sp)); 647da915ad5SPaul E. McKenney } 648da915ad5SPaul E. McKenney spin_unlock_irqrestore(&sp->gp_lock, flags); 649da915ad5SPaul E. McKenney } 650da915ad5SPaul E. McKenney 651da915ad5SPaul E. McKenney /* 652dad81a20SPaul E. McKenney * Wait until all readers counted by array index idx complete, but 653dad81a20SPaul E. McKenney * loop an additional time if there is an expedited grace period pending. 654da915ad5SPaul E. McKenney * The caller must ensure that ->srcu_idx is not changed while checking. 655dad81a20SPaul E. McKenney */ 656dad81a20SPaul E. McKenney static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) 657dad81a20SPaul E. McKenney { 658dad81a20SPaul E. McKenney for (;;) { 659dad81a20SPaul E. McKenney if (srcu_readers_active_idx_check(sp, idx)) 660dad81a20SPaul E. McKenney return true; 6611e9a038bSPaul E. McKenney if (--trycount + !srcu_get_delay(sp) <= 0) 662dad81a20SPaul E. McKenney return false; 663dad81a20SPaul E. McKenney udelay(SRCU_RETRY_CHECK_DELAY); 664dad81a20SPaul E. McKenney } 665dad81a20SPaul E. McKenney } 666dad81a20SPaul E. McKenney 667dad81a20SPaul E. McKenney /* 668da915ad5SPaul E. McKenney * Increment the ->srcu_idx counter so that future SRCU readers will 669da915ad5SPaul E. McKenney * use the other rank of the ->srcu_(un)lock_count[] arrays. This allows 670dad81a20SPaul E. McKenney * us to wait for pre-existing readers in a starvation-free manner. 671dad81a20SPaul E. McKenney */ 672dad81a20SPaul E. McKenney static void srcu_flip(struct srcu_struct *sp) 673dad81a20SPaul E. McKenney { 674da915ad5SPaul E. McKenney WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1); 675dad81a20SPaul E. McKenney 676dad81a20SPaul E. McKenney /* 677dad81a20SPaul E. McKenney * Ensure that if the updater misses an __srcu_read_unlock() 678dad81a20SPaul E. McKenney * increment, that task's next __srcu_read_lock() will see the 679dad81a20SPaul E. McKenney * above counter update. Note that both this memory barrier 680dad81a20SPaul E. McKenney * and the one in srcu_readers_active_idx_check() provide the 681dad81a20SPaul E. McKenney * guarantee for __srcu_read_lock(). 682dad81a20SPaul E. McKenney */ 683dad81a20SPaul E. McKenney smp_mb(); /* D */ /* Pairs with C. */ 684dad81a20SPaul E. McKenney } 685dad81a20SPaul E. McKenney 686dad81a20SPaul E. McKenney /* 6872da4b2a7SPaul E. McKenney * If SRCU is likely idle, return true, otherwise return false. 6882da4b2a7SPaul E. McKenney * 6892da4b2a7SPaul E. McKenney * Note that it is OK for several current from-idle requests for a new 6902da4b2a7SPaul E. McKenney * grace period from idle to specify expediting because they will all end 6912da4b2a7SPaul E. McKenney * up requesting the same grace period anyhow. So no loss. 6922da4b2a7SPaul E. McKenney * 6932da4b2a7SPaul E. McKenney * Note also that if any CPU (including the current one) is still invoking 6942da4b2a7SPaul E. McKenney * callbacks, this function will nevertheless say "idle". This is not 6952da4b2a7SPaul E. McKenney * ideal, but the overhead of checking all CPUs' callback lists is even 6962da4b2a7SPaul E. McKenney * less ideal, especially on large systems. Furthermore, the wakeup 6972da4b2a7SPaul E. McKenney * can happen before the callback is fully removed, so we have no choice 6982da4b2a7SPaul E. McKenney * but to accept this type of error. 6992da4b2a7SPaul E. McKenney * 7002da4b2a7SPaul E. McKenney * This function is also subject to counter-wrap errors, but let's face 7012da4b2a7SPaul E. McKenney * it, if this function was preempted for enough time for the counters 7022da4b2a7SPaul E. McKenney * to wrap, it really doesn't matter whether or not we expedite the grace 7032da4b2a7SPaul E. McKenney * period. The extra overhead of a needlessly expedited grace period is 7042da4b2a7SPaul E. McKenney * negligible when amoritized over that time period, and the extra latency 7052da4b2a7SPaul E. McKenney * of a needlessly non-expedited grace period is similarly negligible. 7062da4b2a7SPaul E. McKenney */ 7072da4b2a7SPaul E. McKenney static bool srcu_might_be_idle(struct srcu_struct *sp) 7082da4b2a7SPaul E. McKenney { 709*22607d66SPaul E. McKenney unsigned long curseq; 7102da4b2a7SPaul E. McKenney unsigned long flags; 7112da4b2a7SPaul E. McKenney struct srcu_data *sdp; 712*22607d66SPaul E. McKenney unsigned long t; 7132da4b2a7SPaul E. McKenney 7142da4b2a7SPaul E. McKenney /* If the local srcu_data structure has callbacks, not idle. */ 7152da4b2a7SPaul E. McKenney local_irq_save(flags); 7162da4b2a7SPaul E. McKenney sdp = this_cpu_ptr(sp->sda); 7172da4b2a7SPaul E. McKenney if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { 7182da4b2a7SPaul E. McKenney local_irq_restore(flags); 7192da4b2a7SPaul E. McKenney return false; /* Callbacks already present, so not idle. */ 7202da4b2a7SPaul E. McKenney } 7212da4b2a7SPaul E. McKenney local_irq_restore(flags); 7222da4b2a7SPaul E. McKenney 7232da4b2a7SPaul E. McKenney /* 7242da4b2a7SPaul E. McKenney * No local callbacks, so probabalistically probe global state. 7252da4b2a7SPaul E. McKenney * Exact information would require acquiring locks, which would 7262da4b2a7SPaul E. McKenney * kill scalability, hence the probabalistic nature of the probe. 7272da4b2a7SPaul E. McKenney */ 728*22607d66SPaul E. McKenney 729*22607d66SPaul E. McKenney /* First, see if enough time has passed since the last GP. */ 730*22607d66SPaul E. McKenney t = ktime_get_mono_fast_ns(); 731*22607d66SPaul E. McKenney if (exp_holdoff == 0 || 732*22607d66SPaul E. McKenney time_in_range_open(t, sp->srcu_last_gp_end, 733*22607d66SPaul E. McKenney sp->srcu_last_gp_end + exp_holdoff)) 734*22607d66SPaul E. McKenney return false; /* Too soon after last GP. */ 735*22607d66SPaul E. McKenney 736*22607d66SPaul E. McKenney /* Next, check for probable idleness. */ 7372da4b2a7SPaul E. McKenney curseq = rcu_seq_current(&sp->srcu_gp_seq); 7382da4b2a7SPaul E. McKenney smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */ 7392da4b2a7SPaul E. McKenney if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed))) 7402da4b2a7SPaul E. McKenney return false; /* Grace period in progress, so not idle. */ 7412da4b2a7SPaul E. McKenney smp_mb(); /* Order ->srcu_gp_seq with prior access. */ 7422da4b2a7SPaul E. McKenney if (curseq != rcu_seq_current(&sp->srcu_gp_seq)) 7432da4b2a7SPaul E. McKenney return false; /* GP # changed, so not idle. */ 7442da4b2a7SPaul E. McKenney return true; /* With reasonable probability, idle! */ 7452da4b2a7SPaul E. McKenney } 7462da4b2a7SPaul E. McKenney 7472da4b2a7SPaul E. McKenney /* 748da915ad5SPaul E. McKenney * Enqueue an SRCU callback on the srcu_data structure associated with 749da915ad5SPaul E. McKenney * the current CPU and the specified srcu_struct structure, initiating 750da915ad5SPaul E. McKenney * grace-period processing if it is not already running. 751dad81a20SPaul E. McKenney * 752dad81a20SPaul E. McKenney * Note that all CPUs must agree that the grace period extended beyond 753dad81a20SPaul E. McKenney * all pre-existing SRCU read-side critical section. On systems with 754dad81a20SPaul E. McKenney * more than one CPU, this means that when "func()" is invoked, each CPU 755dad81a20SPaul E. McKenney * is guaranteed to have executed a full memory barrier since the end of 756dad81a20SPaul E. McKenney * its last corresponding SRCU read-side critical section whose beginning 757dad81a20SPaul E. McKenney * preceded the call to call_rcu(). It also means that each CPU executing 758dad81a20SPaul E. McKenney * an SRCU read-side critical section that continues beyond the start of 759dad81a20SPaul E. McKenney * "func()" must have executed a memory barrier after the call_rcu() 760dad81a20SPaul E. McKenney * but before the beginning of that SRCU read-side critical section. 761dad81a20SPaul E. McKenney * Note that these guarantees include CPUs that are offline, idle, or 762dad81a20SPaul E. McKenney * executing in user mode, as well as CPUs that are executing in the kernel. 763dad81a20SPaul E. McKenney * 764dad81a20SPaul E. McKenney * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the 765dad81a20SPaul E. McKenney * resulting SRCU callback function "func()", then both CPU A and CPU 766dad81a20SPaul E. McKenney * B are guaranteed to execute a full memory barrier during the time 767dad81a20SPaul E. McKenney * interval between the call to call_rcu() and the invocation of "func()". 768dad81a20SPaul E. McKenney * This guarantee applies even if CPU A and CPU B are the same CPU (but 769dad81a20SPaul E. McKenney * again only if the system has more than one CPU). 770dad81a20SPaul E. McKenney * 771dad81a20SPaul E. McKenney * Of course, these guarantees apply only for invocations of call_srcu(), 772dad81a20SPaul E. McKenney * srcu_read_lock(), and srcu_read_unlock() that are all passed the same 773dad81a20SPaul E. McKenney * srcu_struct structure. 774dad81a20SPaul E. McKenney */ 7751e9a038bSPaul E. McKenney void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, 7761e9a038bSPaul E. McKenney rcu_callback_t func, bool do_norm) 777dad81a20SPaul E. McKenney { 778dad81a20SPaul E. McKenney unsigned long flags; 7791e9a038bSPaul E. McKenney bool needexp = false; 780da915ad5SPaul E. McKenney bool needgp = false; 781da915ad5SPaul E. McKenney unsigned long s; 782da915ad5SPaul E. McKenney struct srcu_data *sdp; 783dad81a20SPaul E. McKenney 784da915ad5SPaul E. McKenney check_init_srcu_struct(sp); 785da915ad5SPaul E. McKenney rhp->func = func; 786da915ad5SPaul E. McKenney local_irq_save(flags); 787da915ad5SPaul E. McKenney sdp = this_cpu_ptr(sp->sda); 788da915ad5SPaul E. McKenney spin_lock(&sdp->lock); 789da915ad5SPaul E. McKenney rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); 790da915ad5SPaul E. McKenney rcu_segcblist_advance(&sdp->srcu_cblist, 791da915ad5SPaul E. McKenney rcu_seq_current(&sp->srcu_gp_seq)); 792da915ad5SPaul E. McKenney s = rcu_seq_snap(&sp->srcu_gp_seq); 793da915ad5SPaul E. McKenney (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s); 794da915ad5SPaul E. McKenney if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { 795da915ad5SPaul E. McKenney sdp->srcu_gp_seq_needed = s; 796da915ad5SPaul E. McKenney needgp = true; 797dad81a20SPaul E. McKenney } 7981e9a038bSPaul E. McKenney if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) { 7991e9a038bSPaul E. McKenney sdp->srcu_gp_seq_needed_exp = s; 8001e9a038bSPaul E. McKenney needexp = true; 8011e9a038bSPaul E. McKenney } 802da915ad5SPaul E. McKenney spin_unlock_irqrestore(&sdp->lock, flags); 803da915ad5SPaul E. McKenney if (needgp) 8041e9a038bSPaul E. McKenney srcu_funnel_gp_start(sp, sdp, s, do_norm); 8051e9a038bSPaul E. McKenney else if (needexp) 8061e9a038bSPaul E. McKenney srcu_funnel_exp_start(sp, sdp->mynode, s); 8071e9a038bSPaul E. McKenney } 8081e9a038bSPaul E. McKenney 8091e9a038bSPaul E. McKenney void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, 8101e9a038bSPaul E. McKenney rcu_callback_t func) 8111e9a038bSPaul E. McKenney { 8121e9a038bSPaul E. McKenney __call_srcu(sp, rhp, func, true); 813dad81a20SPaul E. McKenney } 814dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(call_srcu); 815dad81a20SPaul E. McKenney 816dad81a20SPaul E. McKenney /* 817dad81a20SPaul E. McKenney * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). 818dad81a20SPaul E. McKenney */ 8191e9a038bSPaul E. McKenney static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) 820dad81a20SPaul E. McKenney { 821dad81a20SPaul E. McKenney struct rcu_synchronize rcu; 822dad81a20SPaul E. McKenney 823dad81a20SPaul E. McKenney RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) || 824dad81a20SPaul E. McKenney lock_is_held(&rcu_bh_lock_map) || 825dad81a20SPaul E. McKenney lock_is_held(&rcu_lock_map) || 826dad81a20SPaul E. McKenney lock_is_held(&rcu_sched_lock_map), 827dad81a20SPaul E. McKenney "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section"); 828dad81a20SPaul E. McKenney 829dad81a20SPaul E. McKenney if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) 830dad81a20SPaul E. McKenney return; 831dad81a20SPaul E. McKenney might_sleep(); 832da915ad5SPaul E. McKenney check_init_srcu_struct(sp); 833dad81a20SPaul E. McKenney init_completion(&rcu.completion); 834da915ad5SPaul E. McKenney init_rcu_head_on_stack(&rcu.head); 8351e9a038bSPaul E. McKenney __call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm); 836dad81a20SPaul E. McKenney wait_for_completion(&rcu.completion); 837da915ad5SPaul E. McKenney destroy_rcu_head_on_stack(&rcu.head); 838dad81a20SPaul E. McKenney } 839dad81a20SPaul E. McKenney 840dad81a20SPaul E. McKenney /** 841dad81a20SPaul E. McKenney * synchronize_srcu_expedited - Brute-force SRCU grace period 842dad81a20SPaul E. McKenney * @sp: srcu_struct with which to synchronize. 843dad81a20SPaul E. McKenney * 844dad81a20SPaul E. McKenney * Wait for an SRCU grace period to elapse, but be more aggressive about 845dad81a20SPaul E. McKenney * spinning rather than blocking when waiting. 846dad81a20SPaul E. McKenney * 847dad81a20SPaul E. McKenney * Note that synchronize_srcu_expedited() has the same deadlock and 848dad81a20SPaul E. McKenney * memory-ordering properties as does synchronize_srcu(). 849dad81a20SPaul E. McKenney */ 850dad81a20SPaul E. McKenney void synchronize_srcu_expedited(struct srcu_struct *sp) 851dad81a20SPaul E. McKenney { 8521e9a038bSPaul E. McKenney __synchronize_srcu(sp, rcu_gp_is_normal()); 853dad81a20SPaul E. McKenney } 854dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); 855dad81a20SPaul E. McKenney 856dad81a20SPaul E. McKenney /** 857dad81a20SPaul E. McKenney * synchronize_srcu - wait for prior SRCU read-side critical-section completion 858dad81a20SPaul E. McKenney * @sp: srcu_struct with which to synchronize. 859dad81a20SPaul E. McKenney * 860dad81a20SPaul E. McKenney * Wait for the count to drain to zero of both indexes. To avoid the 861dad81a20SPaul E. McKenney * possible starvation of synchronize_srcu(), it waits for the count of 862da915ad5SPaul E. McKenney * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first, 863da915ad5SPaul E. McKenney * and then flip the srcu_idx and wait for the count of the other index. 864dad81a20SPaul E. McKenney * 865dad81a20SPaul E. McKenney * Can block; must be called from process context. 866dad81a20SPaul E. McKenney * 867dad81a20SPaul E. McKenney * Note that it is illegal to call synchronize_srcu() from the corresponding 868dad81a20SPaul E. McKenney * SRCU read-side critical section; doing so will result in deadlock. 869dad81a20SPaul E. McKenney * However, it is perfectly legal to call synchronize_srcu() on one 870dad81a20SPaul E. McKenney * srcu_struct from some other srcu_struct's read-side critical section, 871dad81a20SPaul E. McKenney * as long as the resulting graph of srcu_structs is acyclic. 872dad81a20SPaul E. McKenney * 873dad81a20SPaul E. McKenney * There are memory-ordering constraints implied by synchronize_srcu(). 874dad81a20SPaul E. McKenney * On systems with more than one CPU, when synchronize_srcu() returns, 875dad81a20SPaul E. McKenney * each CPU is guaranteed to have executed a full memory barrier since 876dad81a20SPaul E. McKenney * the end of its last corresponding SRCU-sched read-side critical section 877dad81a20SPaul E. McKenney * whose beginning preceded the call to synchronize_srcu(). In addition, 878dad81a20SPaul E. McKenney * each CPU having an SRCU read-side critical section that extends beyond 879dad81a20SPaul E. McKenney * the return from synchronize_srcu() is guaranteed to have executed a 880dad81a20SPaul E. McKenney * full memory barrier after the beginning of synchronize_srcu() and before 881dad81a20SPaul E. McKenney * the beginning of that SRCU read-side critical section. Note that these 882dad81a20SPaul E. McKenney * guarantees include CPUs that are offline, idle, or executing in user mode, 883dad81a20SPaul E. McKenney * as well as CPUs that are executing in the kernel. 884dad81a20SPaul E. McKenney * 885dad81a20SPaul E. McKenney * Furthermore, if CPU A invoked synchronize_srcu(), which returned 886dad81a20SPaul E. McKenney * to its caller on CPU B, then both CPU A and CPU B are guaranteed 887dad81a20SPaul E. McKenney * to have executed a full memory barrier during the execution of 888dad81a20SPaul E. McKenney * synchronize_srcu(). This guarantee applies even if CPU A and CPU B 889dad81a20SPaul E. McKenney * are the same CPU, but again only if the system has more than one CPU. 890dad81a20SPaul E. McKenney * 891dad81a20SPaul E. McKenney * Of course, these memory-ordering guarantees apply only when 892dad81a20SPaul E. McKenney * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are 893dad81a20SPaul E. McKenney * passed the same srcu_struct structure. 8942da4b2a7SPaul E. McKenney * 8952da4b2a7SPaul E. McKenney * If SRCU is likely idle, expedite the first request. This semantic 8962da4b2a7SPaul E. McKenney * was provided by Classic SRCU, and is relied upon by its users, so TREE 8972da4b2a7SPaul E. McKenney * SRCU must also provide it. Note that detecting idleness is heuristic 8982da4b2a7SPaul E. McKenney * and subject to both false positives and negatives. 899dad81a20SPaul E. McKenney */ 900dad81a20SPaul E. McKenney void synchronize_srcu(struct srcu_struct *sp) 901dad81a20SPaul E. McKenney { 9022da4b2a7SPaul E. McKenney if (srcu_might_be_idle(sp) || rcu_gp_is_expedited()) 903dad81a20SPaul E. McKenney synchronize_srcu_expedited(sp); 904dad81a20SPaul E. McKenney else 9051e9a038bSPaul E. McKenney __synchronize_srcu(sp, true); 906dad81a20SPaul E. McKenney } 907dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(synchronize_srcu); 908dad81a20SPaul E. McKenney 909da915ad5SPaul E. McKenney /* 910da915ad5SPaul E. McKenney * Callback function for srcu_barrier() use. 911da915ad5SPaul E. McKenney */ 912da915ad5SPaul E. McKenney static void srcu_barrier_cb(struct rcu_head *rhp) 913da915ad5SPaul E. McKenney { 914da915ad5SPaul E. McKenney struct srcu_data *sdp; 915da915ad5SPaul E. McKenney struct srcu_struct *sp; 916da915ad5SPaul E. McKenney 917da915ad5SPaul E. McKenney sdp = container_of(rhp, struct srcu_data, srcu_barrier_head); 918da915ad5SPaul E. McKenney sp = sdp->sp; 919da915ad5SPaul E. McKenney if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) 920da915ad5SPaul E. McKenney complete(&sp->srcu_barrier_completion); 921da915ad5SPaul E. McKenney } 922da915ad5SPaul E. McKenney 923dad81a20SPaul E. McKenney /** 924dad81a20SPaul E. McKenney * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. 925dad81a20SPaul E. McKenney * @sp: srcu_struct on which to wait for in-flight callbacks. 926dad81a20SPaul E. McKenney */ 927dad81a20SPaul E. McKenney void srcu_barrier(struct srcu_struct *sp) 928dad81a20SPaul E. McKenney { 929da915ad5SPaul E. McKenney int cpu; 930da915ad5SPaul E. McKenney struct srcu_data *sdp; 931da915ad5SPaul E. McKenney unsigned long s = rcu_seq_snap(&sp->srcu_barrier_seq); 932da915ad5SPaul E. McKenney 933da915ad5SPaul E. McKenney check_init_srcu_struct(sp); 934da915ad5SPaul E. McKenney mutex_lock(&sp->srcu_barrier_mutex); 935da915ad5SPaul E. McKenney if (rcu_seq_done(&sp->srcu_barrier_seq, s)) { 936da915ad5SPaul E. McKenney smp_mb(); /* Force ordering following return. */ 937da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_barrier_mutex); 938da915ad5SPaul E. McKenney return; /* Someone else did our work for us. */ 939da915ad5SPaul E. McKenney } 940da915ad5SPaul E. McKenney rcu_seq_start(&sp->srcu_barrier_seq); 941da915ad5SPaul E. McKenney init_completion(&sp->srcu_barrier_completion); 942da915ad5SPaul E. McKenney 943da915ad5SPaul E. McKenney /* Initial count prevents reaching zero until all CBs are posted. */ 944da915ad5SPaul E. McKenney atomic_set(&sp->srcu_barrier_cpu_cnt, 1); 945da915ad5SPaul E. McKenney 946da915ad5SPaul E. McKenney /* 947da915ad5SPaul E. McKenney * Each pass through this loop enqueues a callback, but only 948da915ad5SPaul E. McKenney * on CPUs already having callbacks enqueued. Note that if 949da915ad5SPaul E. McKenney * a CPU already has callbacks enqueue, it must have already 950da915ad5SPaul E. McKenney * registered the need for a future grace period, so all we 951da915ad5SPaul E. McKenney * need do is enqueue a callback that will use the same 952da915ad5SPaul E. McKenney * grace period as the last callback already in the queue. 953da915ad5SPaul E. McKenney */ 954da915ad5SPaul E. McKenney for_each_possible_cpu(cpu) { 955da915ad5SPaul E. McKenney sdp = per_cpu_ptr(sp->sda, cpu); 956da915ad5SPaul E. McKenney spin_lock_irq(&sdp->lock); 957da915ad5SPaul E. McKenney atomic_inc(&sp->srcu_barrier_cpu_cnt); 958da915ad5SPaul E. McKenney sdp->srcu_barrier_head.func = srcu_barrier_cb; 959da915ad5SPaul E. McKenney if (!rcu_segcblist_entrain(&sdp->srcu_cblist, 960da915ad5SPaul E. McKenney &sdp->srcu_barrier_head, 0)) 961da915ad5SPaul E. McKenney atomic_dec(&sp->srcu_barrier_cpu_cnt); 962da915ad5SPaul E. McKenney spin_unlock_irq(&sdp->lock); 963da915ad5SPaul E. McKenney } 964da915ad5SPaul E. McKenney 965da915ad5SPaul E. McKenney /* Remove the initial count, at which point reaching zero can happen. */ 966da915ad5SPaul E. McKenney if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) 967da915ad5SPaul E. McKenney complete(&sp->srcu_barrier_completion); 968da915ad5SPaul E. McKenney wait_for_completion(&sp->srcu_barrier_completion); 969da915ad5SPaul E. McKenney 970da915ad5SPaul E. McKenney rcu_seq_end(&sp->srcu_barrier_seq); 971da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_barrier_mutex); 972dad81a20SPaul E. McKenney } 973dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(srcu_barrier); 974dad81a20SPaul E. McKenney 975dad81a20SPaul E. McKenney /** 976dad81a20SPaul E. McKenney * srcu_batches_completed - return batches completed. 977dad81a20SPaul E. McKenney * @sp: srcu_struct on which to report batch completion. 978dad81a20SPaul E. McKenney * 979dad81a20SPaul E. McKenney * Report the number of batches, correlated with, but not necessarily 980dad81a20SPaul E. McKenney * precisely the same as, the number of grace periods that have elapsed. 981dad81a20SPaul E. McKenney */ 982dad81a20SPaul E. McKenney unsigned long srcu_batches_completed(struct srcu_struct *sp) 983dad81a20SPaul E. McKenney { 984da915ad5SPaul E. McKenney return sp->srcu_idx; 985dad81a20SPaul E. McKenney } 986dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(srcu_batches_completed); 987dad81a20SPaul E. McKenney 988dad81a20SPaul E. McKenney /* 989da915ad5SPaul E. McKenney * Core SRCU state machine. Push state bits of ->srcu_gp_seq 990da915ad5SPaul E. McKenney * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has 991da915ad5SPaul E. McKenney * completed in that state. 992dad81a20SPaul E. McKenney */ 993da915ad5SPaul E. McKenney static void srcu_advance_state(struct srcu_struct *sp) 994dad81a20SPaul E. McKenney { 995dad81a20SPaul E. McKenney int idx; 996dad81a20SPaul E. McKenney 997da915ad5SPaul E. McKenney mutex_lock(&sp->srcu_gp_mutex); 998da915ad5SPaul E. McKenney 999dad81a20SPaul E. McKenney /* 1000dad81a20SPaul E. McKenney * Because readers might be delayed for an extended period after 1001da915ad5SPaul E. McKenney * fetching ->srcu_idx for their index, at any point in time there 1002dad81a20SPaul E. McKenney * might well be readers using both idx=0 and idx=1. We therefore 1003dad81a20SPaul E. McKenney * need to wait for readers to clear from both index values before 1004dad81a20SPaul E. McKenney * invoking a callback. 1005dad81a20SPaul E. McKenney * 1006dad81a20SPaul E. McKenney * The load-acquire ensures that we see the accesses performed 1007dad81a20SPaul E. McKenney * by the prior grace period. 1008dad81a20SPaul E. McKenney */ 1009dad81a20SPaul E. McKenney idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ 1010dad81a20SPaul E. McKenney if (idx == SRCU_STATE_IDLE) { 1011da915ad5SPaul E. McKenney spin_lock_irq(&sp->gp_lock); 1012da915ad5SPaul E. McKenney if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { 1013da915ad5SPaul E. McKenney WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); 1014da915ad5SPaul E. McKenney spin_unlock_irq(&sp->gp_lock); 1015da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_gp_mutex); 1016dad81a20SPaul E. McKenney return; 1017dad81a20SPaul E. McKenney } 1018dad81a20SPaul E. McKenney idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); 1019dad81a20SPaul E. McKenney if (idx == SRCU_STATE_IDLE) 1020dad81a20SPaul E. McKenney srcu_gp_start(sp); 1021da915ad5SPaul E. McKenney spin_unlock_irq(&sp->gp_lock); 1022da915ad5SPaul E. McKenney if (idx != SRCU_STATE_IDLE) { 1023da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_gp_mutex); 1024dad81a20SPaul E. McKenney return; /* Someone else started the grace period. */ 1025dad81a20SPaul E. McKenney } 1026da915ad5SPaul E. McKenney } 1027dad81a20SPaul E. McKenney 1028dad81a20SPaul E. McKenney if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { 1029da915ad5SPaul E. McKenney idx = 1 ^ (sp->srcu_idx & 1); 1030da915ad5SPaul E. McKenney if (!try_check_zero(sp, idx, 1)) { 1031da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_gp_mutex); 1032dad81a20SPaul E. McKenney return; /* readers present, retry later. */ 1033da915ad5SPaul E. McKenney } 1034dad81a20SPaul E. McKenney srcu_flip(sp); 1035dad81a20SPaul E. McKenney rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2); 1036dad81a20SPaul E. McKenney } 1037dad81a20SPaul E. McKenney 1038dad81a20SPaul E. McKenney if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { 1039dad81a20SPaul E. McKenney 1040dad81a20SPaul E. McKenney /* 1041dad81a20SPaul E. McKenney * SRCU read-side critical sections are normally short, 1042dad81a20SPaul E. McKenney * so check at least twice in quick succession after a flip. 1043dad81a20SPaul E. McKenney */ 1044da915ad5SPaul E. McKenney idx = 1 ^ (sp->srcu_idx & 1); 1045da915ad5SPaul E. McKenney if (!try_check_zero(sp, idx, 2)) { 1046da915ad5SPaul E. McKenney mutex_unlock(&sp->srcu_gp_mutex); 1047da915ad5SPaul E. McKenney return; /* readers present, retry later. */ 1048da915ad5SPaul E. McKenney } 1049da915ad5SPaul E. McKenney srcu_gp_end(sp); /* Releases ->srcu_gp_mutex. */ 1050dad81a20SPaul E. McKenney } 1051dad81a20SPaul E. McKenney } 1052dad81a20SPaul E. McKenney 1053dad81a20SPaul E. McKenney /* 1054dad81a20SPaul E. McKenney * Invoke a limited number of SRCU callbacks that have passed through 1055dad81a20SPaul E. McKenney * their grace period. If there are more to do, SRCU will reschedule 1056dad81a20SPaul E. McKenney * the workqueue. Note that needed memory barriers have been executed 1057dad81a20SPaul E. McKenney * in this task's context by srcu_readers_active_idx_check(). 1058dad81a20SPaul E. McKenney */ 1059da915ad5SPaul E. McKenney static void srcu_invoke_callbacks(struct work_struct *work) 1060dad81a20SPaul E. McKenney { 1061da915ad5SPaul E. McKenney bool more; 1062dad81a20SPaul E. McKenney struct rcu_cblist ready_cbs; 1063dad81a20SPaul E. McKenney struct rcu_head *rhp; 1064da915ad5SPaul E. McKenney struct srcu_data *sdp; 1065da915ad5SPaul E. McKenney struct srcu_struct *sp; 1066dad81a20SPaul E. McKenney 1067da915ad5SPaul E. McKenney sdp = container_of(work, struct srcu_data, work.work); 1068da915ad5SPaul E. McKenney sp = sdp->sp; 1069dad81a20SPaul E. McKenney rcu_cblist_init(&ready_cbs); 1070da915ad5SPaul E. McKenney spin_lock_irq(&sdp->lock); 1071da915ad5SPaul E. McKenney smp_mb(); /* Old grace periods before callback invocation! */ 1072da915ad5SPaul E. McKenney rcu_segcblist_advance(&sdp->srcu_cblist, 1073da915ad5SPaul E. McKenney rcu_seq_current(&sp->srcu_gp_seq)); 1074da915ad5SPaul E. McKenney if (sdp->srcu_cblist_invoking || 1075da915ad5SPaul E. McKenney !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { 1076da915ad5SPaul E. McKenney spin_unlock_irq(&sdp->lock); 1077da915ad5SPaul E. McKenney return; /* Someone else on the job or nothing to do. */ 1078da915ad5SPaul E. McKenney } 1079da915ad5SPaul E. McKenney 1080da915ad5SPaul E. McKenney /* We are on the job! Extract and invoke ready callbacks. */ 1081da915ad5SPaul E. McKenney sdp->srcu_cblist_invoking = true; 1082da915ad5SPaul E. McKenney rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); 1083da915ad5SPaul E. McKenney spin_unlock_irq(&sdp->lock); 1084dad81a20SPaul E. McKenney rhp = rcu_cblist_dequeue(&ready_cbs); 1085dad81a20SPaul E. McKenney for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { 1086dad81a20SPaul E. McKenney local_bh_disable(); 1087dad81a20SPaul E. McKenney rhp->func(rhp); 1088dad81a20SPaul E. McKenney local_bh_enable(); 1089dad81a20SPaul E. McKenney } 1090da915ad5SPaul E. McKenney 1091da915ad5SPaul E. McKenney /* 1092da915ad5SPaul E. McKenney * Update counts, accelerate new callbacks, and if needed, 1093da915ad5SPaul E. McKenney * schedule another round of callback invocation. 1094da915ad5SPaul E. McKenney */ 1095da915ad5SPaul E. McKenney spin_lock_irq(&sdp->lock); 1096da915ad5SPaul E. McKenney rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); 1097da915ad5SPaul E. McKenney (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, 1098da915ad5SPaul E. McKenney rcu_seq_snap(&sp->srcu_gp_seq)); 1099da915ad5SPaul E. McKenney sdp->srcu_cblist_invoking = false; 1100da915ad5SPaul E. McKenney more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); 1101da915ad5SPaul E. McKenney spin_unlock_irq(&sdp->lock); 1102da915ad5SPaul E. McKenney if (more) 1103da915ad5SPaul E. McKenney srcu_schedule_cbs_sdp(sdp, 0); 1104dad81a20SPaul E. McKenney } 1105dad81a20SPaul E. McKenney 1106dad81a20SPaul E. McKenney /* 1107dad81a20SPaul E. McKenney * Finished one round of SRCU grace period. Start another if there are 1108dad81a20SPaul E. McKenney * more SRCU callbacks queued, otherwise put SRCU into not-running state. 1109dad81a20SPaul E. McKenney */ 1110dad81a20SPaul E. McKenney static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) 1111dad81a20SPaul E. McKenney { 1112da915ad5SPaul E. McKenney bool pushgp = true; 1113dad81a20SPaul E. McKenney 1114da915ad5SPaul E. McKenney spin_lock_irq(&sp->gp_lock); 1115da915ad5SPaul E. McKenney if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { 1116da915ad5SPaul E. McKenney if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { 1117da915ad5SPaul E. McKenney /* All requests fulfilled, time to go idle. */ 1118da915ad5SPaul E. McKenney pushgp = false; 1119dad81a20SPaul E. McKenney } 1120da915ad5SPaul E. McKenney } else if (!rcu_seq_state(sp->srcu_gp_seq)) { 1121da915ad5SPaul E. McKenney /* Outstanding request and no GP. Start one. */ 1122da915ad5SPaul E. McKenney srcu_gp_start(sp); 1123da915ad5SPaul E. McKenney } 1124da915ad5SPaul E. McKenney spin_unlock_irq(&sp->gp_lock); 1125dad81a20SPaul E. McKenney 1126da915ad5SPaul E. McKenney if (pushgp) 1127dad81a20SPaul E. McKenney queue_delayed_work(system_power_efficient_wq, &sp->work, delay); 1128dad81a20SPaul E. McKenney } 1129dad81a20SPaul E. McKenney 1130dad81a20SPaul E. McKenney /* 1131dad81a20SPaul E. McKenney * This is the work-queue function that handles SRCU grace periods. 1132dad81a20SPaul E. McKenney */ 1133dad81a20SPaul E. McKenney void process_srcu(struct work_struct *work) 1134dad81a20SPaul E. McKenney { 1135dad81a20SPaul E. McKenney struct srcu_struct *sp; 1136dad81a20SPaul E. McKenney 1137dad81a20SPaul E. McKenney sp = container_of(work, struct srcu_struct, work.work); 1138dad81a20SPaul E. McKenney 1139da915ad5SPaul E. McKenney srcu_advance_state(sp); 11401e9a038bSPaul E. McKenney srcu_reschedule(sp, srcu_get_delay(sp)); 1141dad81a20SPaul E. McKenney } 1142dad81a20SPaul E. McKenney EXPORT_SYMBOL_GPL(process_srcu); 11437f6733c3SPaul E. McKenney 11447f6733c3SPaul E. McKenney void srcutorture_get_gp_data(enum rcutorture_type test_type, 11457f6733c3SPaul E. McKenney struct srcu_struct *sp, int *flags, 11461e9a038bSPaul E. McKenney unsigned long *gpnum, unsigned long *completed) 11477f6733c3SPaul E. McKenney { 11487f6733c3SPaul E. McKenney if (test_type != SRCU_FLAVOR) 11497f6733c3SPaul E. McKenney return; 11507f6733c3SPaul E. McKenney *flags = 0; 11517f6733c3SPaul E. McKenney *completed = rcu_seq_ctr(sp->srcu_gp_seq); 11527f6733c3SPaul E. McKenney *gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed); 11537f6733c3SPaul E. McKenney } 11547f6733c3SPaul E. McKenney EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); 1155