1 /* SPDX-License-Identifier: GPL-2.0+ */ 2 /* 3 * Sleepable Read-Copy Update mechanism for mutual exclusion, 4 * tree variant. 5 * 6 * Copyright (C) IBM Corporation, 2017 7 * 8 * Author: Paul McKenney <paulmck@linux.ibm.com> 9 */ 10 11 #ifndef _LINUX_SRCU_TREE_H 12 #define _LINUX_SRCU_TREE_H 13 14 #include <linux/rcu_node_tree.h> 15 #include <linux/completion.h> 16 17 struct srcu_node; 18 struct srcu_struct; 19 20 /* One element of the srcu_data srcu_ctrs array. */ 21 struct srcu_ctr { 22 atomic_long_t srcu_locks; /* Locks per CPU. */ 23 atomic_long_t srcu_unlocks; /* Unlocks per CPU. */ 24 }; 25 26 /* 27 * Per-CPU structure feeding into leaf srcu_node, similar in function 28 * to rcu_node. 29 */ 30 struct srcu_data { 31 /* Read-side state. */ 32 struct srcu_ctr srcu_ctrs[2]; /* Locks and unlocks per CPU. */ 33 int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */ 34 /* Values: SRCU_READ_FLAVOR_.* */ 35 36 /* Update-side state. */ 37 raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp; 38 struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ 39 unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ 40 unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ 41 bool srcu_cblist_invoking; /* Invoking these CBs? */ 42 struct timer_list delay_work; /* Delay for CB invoking */ 43 struct work_struct work; /* Context for CB invoking. */ 44 struct rcu_head srcu_barrier_head; /* For srcu_barrier() use. */ 45 struct rcu_head srcu_ec_head; /* For srcu_expedite_current() use. */ 46 int srcu_ec_state; /* State for srcu_expedite_current(). */ 47 struct srcu_node *mynode; /* Leaf srcu_node. */ 48 unsigned long grpmask; /* Mask for leaf srcu_node */ 49 /* ->srcu_data_have_cbs[]. */ 50 int cpu; 51 struct srcu_struct *ssp; 52 }; 53 54 /* 55 * Node in SRCU combining tree, similar in function to rcu_data. 56 */ 57 struct srcu_node { 58 raw_spinlock_t __private lock; 59 unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */ 60 /* if greater than ->srcu_gp_seq. */ 61 unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */ 62 unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ 63 struct srcu_node *srcu_parent; /* Next up in tree. */ 64 int grplo; /* Least CPU for node. */ 65 int grphi; /* Biggest CPU for node. */ 66 }; 67 68 /* 69 * Per-SRCU-domain structure, update-side data linked from srcu_struct. 70 */ 71 struct srcu_usage { 72 struct srcu_node *node; /* Combining tree. */ 73 struct srcu_node *level[RCU_NUM_LVLS + 1]; 74 /* First node at each level. */ 75 int srcu_size_state; /* Small-to-big transition state. */ 76 struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ 77 raw_spinlock_t __private lock; /* Protect counters and size state. */ 78 struct mutex srcu_gp_mutex; /* Serialize GP work. */ 79 unsigned long srcu_gp_seq; /* Grace-period seq #. */ 80 unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ 81 unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ 82 unsigned long srcu_gp_start; /* Last GP start timestamp (jiffies) */ 83 unsigned long srcu_last_gp_end; /* Last GP end timestamp (ns) */ 84 unsigned long srcu_size_jiffies; /* Current contention-measurement interval. */ 85 unsigned long srcu_n_lock_retries; /* Contention events in current interval. */ 86 unsigned long srcu_n_exp_nodelay; /* # expedited no-delays in current GP phase. */ 87 bool sda_is_static; /* May ->sda be passed to free_percpu()? */ 88 unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ 89 struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */ 90 struct completion srcu_barrier_completion; 91 /* Awaken barrier rq at end. */ 92 atomic_t srcu_barrier_cpu_cnt; /* # CPUs not yet posting a */ 93 /* callback for the barrier */ 94 /* operation. */ 95 unsigned long reschedule_jiffies; 96 unsigned long reschedule_count; 97 struct delayed_work work; 98 struct irq_work irq_work; 99 struct srcu_struct *srcu_ssp; 100 }; 101 102 /* 103 * Per-SRCU-domain structure, similar in function to rcu_state. 104 */ 105 struct srcu_struct { 106 struct srcu_ctr __percpu *srcu_ctrp; 107 struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ 108 u8 srcu_reader_flavor; 109 struct lockdep_map dep_map; 110 struct srcu_usage *srcu_sup; /* Update-side data. */ 111 }; 112 113 // Values for size state variable (->srcu_size_state). Once the state 114 // has been set to SRCU_SIZE_ALLOC, the grace-period code advances through 115 // this state machine one step per grace period until the SRCU_SIZE_BIG state 116 // is reached. Otherwise, the state machine remains in the SRCU_SIZE_SMALL 117 // state indefinitely. 118 #define SRCU_SIZE_SMALL 0 // No srcu_node combining tree, ->node == NULL 119 #define SRCU_SIZE_ALLOC 1 // An srcu_node tree is being allocated, initialized, 120 // and then referenced by ->node. It will not be used. 121 #define SRCU_SIZE_WAIT_BARRIER 2 // The srcu_node tree starts being used by everything 122 // except call_srcu(), especially by srcu_barrier(). 123 // By the end of this state, all CPUs and threads 124 // are aware of this tree's existence. 125 #define SRCU_SIZE_WAIT_CALL 3 // The srcu_node tree starts being used by call_srcu(). 126 // By the end of this state, all of the call_srcu() 127 // invocations that were running on a non-boot CPU 128 // and using the boot CPU's callback queue will have 129 // completed. 130 #define SRCU_SIZE_WAIT_CBS1 4 // Don't trust the ->srcu_have_cbs[] grace-period 131 #define SRCU_SIZE_WAIT_CBS2 5 // sequence elements or the ->srcu_data_have_cbs[] 132 #define SRCU_SIZE_WAIT_CBS3 6 // CPU-bitmask elements until all four elements of 133 #define SRCU_SIZE_WAIT_CBS4 7 // each array have been initialized. 134 #define SRCU_SIZE_BIG 8 // The srcu_node combining tree is fully initialized 135 // and all aspects of it are being put to use. 136 137 /* Values for state variable (bottom bits of ->srcu_gp_seq). */ 138 #define SRCU_STATE_IDLE 0 139 #define SRCU_STATE_SCAN1 1 140 #define SRCU_STATE_SCAN2 2 141 142 /* Values for srcu_expedite_current() state (->srcu_ec_state). */ 143 #define SRCU_EC_IDLE 0 144 #define SRCU_EC_PENDING 1 145 #define SRCU_EC_REPOST 2 146 147 /* 148 * Values for initializing gp sequence fields. Higher values allow wrap arounds to 149 * occur earlier. 150 * The second value with state is useful in the case of static initialization of 151 * srcu_usage where srcu_gp_seq_needed is expected to have some state value in its 152 * lower bits (or else it will appear to be already initialized within 153 * the call check_init_srcu_struct()). 154 */ 155 #define SRCU_GP_SEQ_INITIAL_VAL ((0UL - 100UL) << RCU_SEQ_CTR_SHIFT) 156 #define SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE (SRCU_GP_SEQ_INITIAL_VAL - 1) 157 158 #define __SRCU_USAGE_INIT(name) \ 159 { \ 160 .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ 161 .srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL, \ 162 .srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE, \ 163 .srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL, \ 164 .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \ 165 } 166 167 #define __SRCU_STRUCT_INIT_COMMON(name, usage_name, fast) \ 168 .srcu_sup = &usage_name, \ 169 .srcu_reader_flavor = fast, \ 170 __SRCU_DEP_MAP_INIT(name) 171 172 #define __SRCU_STRUCT_INIT_MODULE(name, usage_name, fast) \ 173 { \ 174 __SRCU_STRUCT_INIT_COMMON(name, usage_name, fast) \ 175 } 176 177 #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name, fast) \ 178 { \ 179 .sda = &pcpu_name, \ 180 .srcu_ctrp = &pcpu_name.srcu_ctrs[0], \ 181 __SRCU_STRUCT_INIT_COMMON(name, usage_name, fast) \ 182 } 183 184 /* 185 * Define and initialize a srcu struct at build time. 186 * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. 187 * 188 * Note that although DEFINE_STATIC_SRCU() hides the name from other 189 * files, the per-CPU variable rules nevertheless require that the 190 * chosen name be globally unique. These rules also prohibit use of 191 * DEFINE_STATIC_SRCU() within a function. If these rules are too 192 * restrictive, declare the srcu_struct manually. For example, in 193 * each file: 194 * 195 * static struct srcu_struct my_srcu; 196 * 197 * Then, before the first use of each my_srcu, manually initialize it: 198 * 199 * init_srcu_struct(&my_srcu); 200 * 201 * See include/linux/percpu-defs.h for the rules on per-CPU variables. 202 * 203 * DEFINE_SRCU_FAST() and DEFINE_STATIC_SRCU_FAST create an srcu_struct 204 * and associated structures whose readers must be of the SRCU-fast variety. 205 * DEFINE_SRCU_FAST_UPDOWN() and DEFINE_STATIC_SRCU_FAST_UPDOWN() create 206 * an srcu_struct and associated structures whose readers must be of the 207 * SRCU-fast-updown variety. The key point (aside from error checking) with 208 * both varieties is that the grace periods must use synchronize_rcu() 209 * instead of smp_mb(), and given that the first (for example) 210 * srcu_read_lock_fast() might race with the first synchronize_srcu(), 211 * this different must be specified at initialization time. 212 */ 213 #ifdef MODULE 214 # define __DEFINE_SRCU(name, fast, is_static) \ 215 static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage); \ 216 is_static struct srcu_struct name = __SRCU_STRUCT_INIT_MODULE(name, name##_srcu_usage, \ 217 fast); \ 218 extern struct srcu_struct * const __srcu_struct_##name; \ 219 struct srcu_struct * const __srcu_struct_##name \ 220 __section("___srcu_struct_ptrs") = &name 221 #else 222 # define __DEFINE_SRCU(name, fast, is_static) \ 223 static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \ 224 static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage); \ 225 is_static struct srcu_struct name = \ 226 __SRCU_STRUCT_INIT(name, name##_srcu_usage, name##_srcu_data, fast) 227 #endif 228 #define DEFINE_SRCU(name) __DEFINE_SRCU(name, 0, /* not static */) 229 #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, 0, static) 230 #define DEFINE_SRCU_FAST(name) __DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST, /* not static */) 231 #define DEFINE_STATIC_SRCU_FAST(name) __DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST, static) 232 #define DEFINE_SRCU_FAST_UPDOWN(name) __DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST_UPDOWN, \ 233 /* not static */) 234 #define DEFINE_STATIC_SRCU_FAST_UPDOWN(name) \ 235 __DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST_UPDOWN, static) 236 237 int __srcu_read_lock(struct srcu_struct *ssp) __acquires_shared(ssp); 238 void synchronize_srcu_expedited(struct srcu_struct *ssp); 239 void srcu_barrier(struct srcu_struct *ssp); 240 void srcu_expedite_current(struct srcu_struct *ssp); 241 void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); 242 243 // Converts a per-CPU pointer to an ->srcu_ctrs[] array element to that 244 // element's index. 245 static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp) 246 { 247 return scpp - &ssp->sda->srcu_ctrs[0]; 248 } 249 250 // Converts an integer to a per-CPU pointer to the corresponding 251 // ->srcu_ctrs[] array element. 252 static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx) 253 { 254 return &ssp->sda->srcu_ctrs[idx]; 255 } 256 257 /* 258 * Counts the new reader in the appropriate per-CPU element of the 259 * srcu_struct. Returns a pointer that must be passed to the matching 260 * srcu_read_unlock_fast(). 261 * 262 * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side 263 * critical sections either because they disables interrupts, because 264 * they are a single instruction, or because they are read-modify-write 265 * atomic operations, depending on the whims of the architecture. 266 * This matters because the SRCU-fast grace-period mechanism uses either 267 * synchronize_rcu() or synchronize_rcu_expedited(), that is, RCU, 268 * *not* SRCU, in order to eliminate the need for the read-side smp_mb() 269 * invocations that are used by srcu_read_lock() and srcu_read_unlock(). 270 * The __srcu_read_unlock_fast() function also relies on this same RCU 271 * (again, *not* SRCU) trick to eliminate the need for smp_mb(). 272 * 273 * The key point behind this RCU trick is that if any part of a given 274 * RCU reader precedes the beginning of a given RCU grace period, then 275 * the entirety of that RCU reader and everything preceding it happens 276 * before the end of that same RCU grace period. Similarly, if any part 277 * of a given RCU reader follows the end of a given RCU grace period, 278 * then the entirety of that RCU reader and everything following it 279 * happens after the beginning of that same RCU grace period. Therefore, 280 * the operations labeled Y in __srcu_read_lock_fast() and those labeled Z 281 * in __srcu_read_unlock_fast() are ordered against the corresponding SRCU 282 * read-side critical section from the viewpoint of the SRCU grace period. 283 * This is all the ordering that is required, hence no calls to smp_mb(). 284 * 285 * This means that __srcu_read_lock_fast() is not all that fast 286 * on architectures that support NMIs but do not supply NMI-safe 287 * implementations of this_cpu_inc(). 288 */ 289 static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct srcu_struct *ssp) 290 __acquires_shared(ssp) 291 { 292 struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); 293 294 if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) 295 this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader. 296 else 297 atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); // Y, and implicit RCU reader. 298 barrier(); /* Avoid leaking the critical section. */ 299 __acquire_shared(ssp); 300 return scp; 301 } 302 303 /* 304 * Removes the count for the old reader from the appropriate 305 * per-CPU element of the srcu_struct. Note that this may well be a 306 * different CPU than that which was incremented by the corresponding 307 * srcu_read_lock_fast(), but it must be within the same task. 308 * 309 * Please see the __srcu_read_lock_fast() function's header comment for 310 * information on implicit RCU readers and NMI safety. 311 */ 312 static inline void notrace 313 __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) 314 __releases_shared(ssp) 315 { 316 __release_shared(ssp); 317 barrier(); /* Avoid leaking the critical section. */ 318 if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) 319 this_cpu_inc(scp->srcu_unlocks.counter); // Z, and implicit RCU reader. 320 else 321 atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); // Z, and implicit RCU reader. 322 } 323 324 /* 325 * Counts the new reader in the appropriate per-CPU element of the 326 * srcu_struct. Returns a pointer that must be passed to the matching 327 * srcu_read_unlock_fast_updown(). This type of reader is compatible 328 * with srcu_down_read_fast() and srcu_up_read_fast(). 329 * 330 * See the __srcu_read_lock_fast() comment for more details. 331 */ 332 static inline 333 struct srcu_ctr __percpu notrace *__srcu_read_lock_fast_updown(struct srcu_struct *ssp) 334 __acquires_shared(ssp) 335 { 336 struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); 337 338 if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) 339 this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader. 340 else 341 atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); // Y, and implicit RCU reader. 342 barrier(); /* Avoid leaking the critical section. */ 343 __acquire_shared(ssp); 344 return scp; 345 } 346 347 /* 348 * Removes the count for the old reader from the appropriate 349 * per-CPU element of the srcu_struct. Note that this may well be a 350 * different CPU than that which was incremented by the corresponding 351 * srcu_read_lock_fast(), but it must be within the same task. 352 * 353 * Please see the __srcu_read_lock_fast() function's header comment for 354 * information on implicit RCU readers and NMI safety. 355 */ 356 static inline void notrace 357 __srcu_read_unlock_fast_updown(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) 358 __releases_shared(ssp) 359 { 360 __release_shared(ssp); 361 barrier(); /* Avoid leaking the critical section. */ 362 if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) 363 this_cpu_inc(scp->srcu_unlocks.counter); // Z, and implicit RCU reader. 364 else 365 atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); // Z, and implicit RCU reader. 366 } 367 368 void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); 369 370 // Record SRCU-reader usage type only for CONFIG_PROVE_RCU=y kernels. 371 static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor) 372 { 373 if (IS_ENABLED(CONFIG_PROVE_RCU)) 374 __srcu_check_read_flavor(ssp, read_flavor); 375 } 376 377 #endif 378