1 // SPDX-License-Identifier: GPL-2.0-only 2 #define pr_fmt(fmt) "%s: " fmt, __func__ 3 4 #include <linux/kernel.h> 5 #include <linux/sched.h> 6 #include <linux/wait.h> 7 #include <linux/slab.h> 8 #include <linux/percpu-refcount.h> 9 10 /* 11 * Initially, a percpu refcount is just a set of percpu counters. Initially, we 12 * don't try to detect the ref hitting 0 - which means that get/put can just 13 * increment or decrement the local counter. Note that the counter on a 14 * particular cpu can (and will) wrap - this is fine, when we go to shutdown the 15 * percpu counters will all sum to the correct value 16 * 17 * (More precisely: because modular arithmetic is commutative the sum of all the 18 * percpu_count vars will be equal to what it would have been if all the gets 19 * and puts were done to a single integer, even if some of the percpu integers 20 * overflow or underflow). 21 * 22 * The real trick to implementing percpu refcounts is shutdown. We can't detect 23 * the ref hitting 0 on every put - this would require global synchronization 24 * and defeat the whole purpose of using percpu refs. 25 * 26 * What we do is require the user to keep track of the initial refcount; we know 27 * the ref can't hit 0 before the user drops the initial ref, so as long as we 28 * convert to non percpu mode before the initial ref is dropped everything 29 * works. 30 * 31 * Converting to non percpu mode is done with some RCUish stuff in 32 * percpu_ref_kill. Additionally, we need a bias value so that the 33 * atomic_long_t can't hit 0 before we've added up all the percpu refs. 34 */ 35 36 #define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1)) 37 38 static DEFINE_SPINLOCK(percpu_ref_switch_lock); 39 static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq); 40 41 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref) 42 { 43 return (unsigned long __percpu *) 44 (ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD); 45 } 46 47 /** 48 * percpu_ref_init - initialize a percpu refcount 49 * @ref: percpu_ref to initialize 50 * @release: function which will be called when refcount hits 0 51 * @flags: PERCPU_REF_INIT_* flags 52 * @gfp: allocation mask to use 53 * 54 * Initializes @ref. @ref starts out in percpu mode with a refcount of 1 unless 55 * @flags contains PERCPU_REF_INIT_ATOMIC or PERCPU_REF_INIT_DEAD. These flags 56 * change the start state to atomic with the latter setting the initial refcount 57 * to 0. See the definitions of PERCPU_REF_INIT_* flags for flag behaviors. 58 * 59 * Note that @release must not sleep - it may potentially be called from RCU 60 * callback context by percpu_ref_kill(). 61 */ 62 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, 63 unsigned int flags, gfp_t gfp) 64 { 65 size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS, 66 __alignof__(unsigned long)); 67 unsigned long start_count = 0; 68 struct percpu_ref_data *data; 69 70 ref->percpu_count_ptr = (unsigned long) 71 __alloc_percpu_gfp(sizeof(unsigned long), align, gfp); 72 if (!ref->percpu_count_ptr) 73 return -ENOMEM; 74 75 data = kzalloc(sizeof(*ref->data), gfp); 76 if (!data) { 77 free_percpu((void __percpu *)ref->percpu_count_ptr); 78 return -ENOMEM; 79 } 80 81 data->force_atomic = flags & PERCPU_REF_INIT_ATOMIC; 82 data->allow_reinit = flags & PERCPU_REF_ALLOW_REINIT; 83 84 if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) { 85 ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; 86 data->allow_reinit = true; 87 } else { 88 start_count += PERCPU_COUNT_BIAS; 89 } 90 91 if (flags & PERCPU_REF_INIT_DEAD) 92 ref->percpu_count_ptr |= __PERCPU_REF_DEAD; 93 else 94 start_count++; 95 96 atomic_long_set(&data->count, start_count); 97 98 data->release = release; 99 data->confirm_switch = NULL; 100 data->ref = ref; 101 ref->data = data; 102 return 0; 103 } 104 EXPORT_SYMBOL_GPL(percpu_ref_init); 105 106 static void __percpu_ref_exit(struct percpu_ref *ref) 107 { 108 unsigned long __percpu *percpu_count = percpu_count_ptr(ref); 109 110 if (percpu_count) { 111 /* non-NULL confirm_switch indicates switching in progress */ 112 WARN_ON_ONCE(ref->data && ref->data->confirm_switch); 113 free_percpu(percpu_count); 114 ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD; 115 } 116 } 117 118 /** 119 * percpu_ref_exit - undo percpu_ref_init() 120 * @ref: percpu_ref to exit 121 * 122 * This function exits @ref. The caller is responsible for ensuring that 123 * @ref is no longer in active use. The usual places to invoke this 124 * function from are the @ref->release() callback or in init failure path 125 * where percpu_ref_init() succeeded but other parts of the initialization 126 * of the embedding object failed. 127 */ 128 void percpu_ref_exit(struct percpu_ref *ref) 129 { 130 struct percpu_ref_data *data = ref->data; 131 unsigned long flags; 132 133 __percpu_ref_exit(ref); 134 135 if (!data) 136 return; 137 138 spin_lock_irqsave(&percpu_ref_switch_lock, flags); 139 ref->percpu_count_ptr |= atomic_long_read(&ref->data->count) << 140 __PERCPU_REF_FLAG_BITS; 141 ref->data = NULL; 142 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 143 144 kfree(data); 145 } 146 EXPORT_SYMBOL_GPL(percpu_ref_exit); 147 148 static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu) 149 { 150 struct percpu_ref_data *data = container_of(rcu, 151 struct percpu_ref_data, rcu); 152 struct percpu_ref *ref = data->ref; 153 154 data->confirm_switch(ref); 155 data->confirm_switch = NULL; 156 wake_up_all(&percpu_ref_switch_waitq); 157 158 if (!data->allow_reinit) 159 __percpu_ref_exit(ref); 160 161 /* drop ref from percpu_ref_switch_to_atomic() */ 162 percpu_ref_put(ref); 163 } 164 165 static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu) 166 { 167 struct percpu_ref_data *data = container_of(rcu, 168 struct percpu_ref_data, rcu); 169 struct percpu_ref *ref = data->ref; 170 unsigned long __percpu *percpu_count = percpu_count_ptr(ref); 171 unsigned long count = 0; 172 int cpu; 173 174 for_each_possible_cpu(cpu) 175 count += *per_cpu_ptr(percpu_count, cpu); 176 177 pr_debug("global %lu percpu %lu\n", 178 atomic_long_read(&data->count), count); 179 180 /* 181 * It's crucial that we sum the percpu counters _before_ adding the sum 182 * to &ref->count; since gets could be happening on one cpu while puts 183 * happen on another, adding a single cpu's count could cause 184 * @ref->count to hit 0 before we've got a consistent value - but the 185 * sum of all the counts will be consistent and correct. 186 * 187 * Subtracting the bias value then has to happen _after_ adding count to 188 * &ref->count; we need the bias value to prevent &ref->count from 189 * reaching 0 before we add the percpu counts. But doing it at the same 190 * time is equivalent and saves us atomic operations: 191 */ 192 atomic_long_add((long)count - PERCPU_COUNT_BIAS, &data->count); 193 194 WARN_ONCE(atomic_long_read(&data->count) <= 0, 195 "percpu ref (%ps) <= 0 (%ld) after switching to atomic", 196 data->release, atomic_long_read(&data->count)); 197 198 /* @ref is viewed as dead on all CPUs, send out switch confirmation */ 199 percpu_ref_call_confirm_rcu(rcu); 200 } 201 202 static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref) 203 { 204 } 205 206 static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, 207 percpu_ref_func_t *confirm_switch) 208 { 209 if (ref->percpu_count_ptr & __PERCPU_REF_ATOMIC) { 210 if (confirm_switch) 211 confirm_switch(ref); 212 return; 213 } 214 215 /* switching from percpu to atomic */ 216 ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; 217 218 /* 219 * Non-NULL ->confirm_switch is used to indicate that switching is 220 * in progress. Use noop one if unspecified. 221 */ 222 ref->data->confirm_switch = confirm_switch ?: 223 percpu_ref_noop_confirm_switch; 224 225 percpu_ref_get(ref); /* put after confirmation */ 226 call_rcu(&ref->data->rcu, percpu_ref_switch_to_atomic_rcu); 227 } 228 229 static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) 230 { 231 unsigned long __percpu *percpu_count = percpu_count_ptr(ref); 232 int cpu; 233 234 BUG_ON(!percpu_count); 235 236 if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) 237 return; 238 239 if (WARN_ON_ONCE(!ref->data->allow_reinit)) 240 return; 241 242 atomic_long_add(PERCPU_COUNT_BIAS, &ref->data->count); 243 244 /* 245 * Restore per-cpu operation. smp_store_release() is paired 246 * with READ_ONCE() in __ref_is_percpu() and guarantees that the 247 * zeroing is visible to all percpu accesses which can see the 248 * following __PERCPU_REF_ATOMIC clearing. 249 */ 250 for_each_possible_cpu(cpu) 251 *per_cpu_ptr(percpu_count, cpu) = 0; 252 253 smp_store_release(&ref->percpu_count_ptr, 254 ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC); 255 } 256 257 static void __percpu_ref_switch_mode(struct percpu_ref *ref, 258 percpu_ref_func_t *confirm_switch) 259 { 260 struct percpu_ref_data *data = ref->data; 261 262 lockdep_assert_held(&percpu_ref_switch_lock); 263 264 /* 265 * If the previous ATOMIC switching hasn't finished yet, wait for 266 * its completion. If the caller ensures that ATOMIC switching 267 * isn't in progress, this function can be called from any context. 268 */ 269 wait_event_lock_irq(percpu_ref_switch_waitq, !data->confirm_switch, 270 percpu_ref_switch_lock); 271 272 if (data->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD)) 273 __percpu_ref_switch_to_atomic(ref, confirm_switch); 274 else 275 __percpu_ref_switch_to_percpu(ref); 276 } 277 278 /** 279 * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode 280 * @ref: percpu_ref to switch to atomic mode 281 * @confirm_switch: optional confirmation callback 282 * 283 * There's no reason to use this function for the usual reference counting. 284 * Use percpu_ref_kill[_and_confirm](). 285 * 286 * Schedule switching of @ref to atomic mode. All its percpu counts will 287 * be collected to the main atomic counter. On completion, when all CPUs 288 * are guaraneed to be in atomic mode, @confirm_switch, which may not 289 * block, is invoked. This function may be invoked concurrently with all 290 * the get/put operations and can safely be mixed with kill and reinit 291 * operations. Note that @ref will stay in atomic mode across kill/reinit 292 * cycles until percpu_ref_switch_to_percpu() is called. 293 * 294 * This function may block if @ref is in the process of switching to atomic 295 * mode. If the caller ensures that @ref is not in the process of 296 * switching to atomic mode, this function can be called from any context. 297 */ 298 void percpu_ref_switch_to_atomic(struct percpu_ref *ref, 299 percpu_ref_func_t *confirm_switch) 300 { 301 unsigned long flags; 302 303 spin_lock_irqsave(&percpu_ref_switch_lock, flags); 304 305 ref->data->force_atomic = true; 306 __percpu_ref_switch_mode(ref, confirm_switch); 307 308 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 309 } 310 EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic); 311 312 /** 313 * percpu_ref_switch_to_atomic_sync - switch a percpu_ref to atomic mode 314 * @ref: percpu_ref to switch to atomic mode 315 * 316 * Schedule switching the ref to atomic mode, and wait for the 317 * switch to complete. Caller must ensure that no other thread 318 * will switch back to percpu mode. 319 */ 320 void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref) 321 { 322 percpu_ref_switch_to_atomic(ref, NULL); 323 wait_event(percpu_ref_switch_waitq, !ref->data->confirm_switch); 324 } 325 EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic_sync); 326 327 /** 328 * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode 329 * @ref: percpu_ref to switch to percpu mode 330 * 331 * There's no reason to use this function for the usual reference counting. 332 * To re-use an expired ref, use percpu_ref_reinit(). 333 * 334 * Switch @ref to percpu mode. This function may be invoked concurrently 335 * with all the get/put operations and can safely be mixed with kill and 336 * reinit operations. This function reverses the sticky atomic state set 337 * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic(). If @ref is 338 * dying or dead, the actual switching takes place on the following 339 * percpu_ref_reinit(). 340 * 341 * This function may block if @ref is in the process of switching to atomic 342 * mode. If the caller ensures that @ref is not in the process of 343 * switching to atomic mode, this function can be called from any context. 344 */ 345 void percpu_ref_switch_to_percpu(struct percpu_ref *ref) 346 { 347 unsigned long flags; 348 349 spin_lock_irqsave(&percpu_ref_switch_lock, flags); 350 351 ref->data->force_atomic = false; 352 __percpu_ref_switch_mode(ref, NULL); 353 354 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 355 } 356 EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu); 357 358 /** 359 * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation 360 * @ref: percpu_ref to kill 361 * @confirm_kill: optional confirmation callback 362 * 363 * Equivalent to percpu_ref_kill() but also schedules kill confirmation if 364 * @confirm_kill is not NULL. @confirm_kill, which may not block, will be 365 * called after @ref is seen as dead from all CPUs at which point all 366 * further invocations of percpu_ref_tryget_live() will fail. See 367 * percpu_ref_tryget_live() for details. 368 * 369 * This function normally doesn't block and can be called from any context 370 * but it may block if @confirm_kill is specified and @ref is in the 371 * process of switching to atomic mode by percpu_ref_switch_to_atomic(). 372 * 373 * There are no implied RCU grace periods between kill and release. 374 */ 375 void percpu_ref_kill_and_confirm(struct percpu_ref *ref, 376 percpu_ref_func_t *confirm_kill) 377 { 378 unsigned long flags; 379 380 spin_lock_irqsave(&percpu_ref_switch_lock, flags); 381 382 WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, 383 "%s called more than once on %ps!", __func__, 384 ref->data->release); 385 386 ref->percpu_count_ptr |= __PERCPU_REF_DEAD; 387 __percpu_ref_switch_mode(ref, confirm_kill); 388 percpu_ref_put(ref); 389 390 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 391 } 392 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); 393 394 /** 395 * percpu_ref_is_zero - test whether a percpu refcount reached zero 396 * @ref: percpu_ref to test 397 * 398 * Returns %true if @ref reached zero. 399 * 400 * This function is safe to call as long as @ref is between init and exit. 401 */ 402 bool percpu_ref_is_zero(struct percpu_ref *ref) 403 { 404 unsigned long __percpu *percpu_count; 405 unsigned long count, flags; 406 407 if (__ref_is_percpu(ref, &percpu_count)) 408 return false; 409 410 /* protect us from being destroyed */ 411 spin_lock_irqsave(&percpu_ref_switch_lock, flags); 412 if (ref->data) 413 count = atomic_long_read(&ref->data->count); 414 else 415 count = ref->percpu_count_ptr >> __PERCPU_REF_FLAG_BITS; 416 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 417 418 return count == 0; 419 } 420 EXPORT_SYMBOL_GPL(percpu_ref_is_zero); 421 422 /** 423 * percpu_ref_reinit - re-initialize a percpu refcount 424 * @ref: perpcu_ref to re-initialize 425 * 426 * Re-initialize @ref so that it's in the same state as when it finished 427 * percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD. @ref must have been 428 * initialized successfully and reached 0 but not exited. 429 * 430 * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while 431 * this function is in progress. 432 */ 433 void percpu_ref_reinit(struct percpu_ref *ref) 434 { 435 WARN_ON_ONCE(!percpu_ref_is_zero(ref)); 436 437 percpu_ref_resurrect(ref); 438 } 439 EXPORT_SYMBOL_GPL(percpu_ref_reinit); 440 441 /** 442 * percpu_ref_resurrect - modify a percpu refcount from dead to live 443 * @ref: perpcu_ref to resurrect 444 * 445 * Modify @ref so that it's in the same state as before percpu_ref_kill() was 446 * called. @ref must be dead but must not yet have exited. 447 * 448 * If @ref->release() frees @ref then the caller is responsible for 449 * guaranteeing that @ref->release() does not get called while this 450 * function is in progress. 451 * 452 * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while 453 * this function is in progress. 454 */ 455 void percpu_ref_resurrect(struct percpu_ref *ref) 456 { 457 unsigned long __percpu *percpu_count; 458 unsigned long flags; 459 460 spin_lock_irqsave(&percpu_ref_switch_lock, flags); 461 462 WARN_ON_ONCE(!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)); 463 WARN_ON_ONCE(__ref_is_percpu(ref, &percpu_count)); 464 465 ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; 466 percpu_ref_get(ref); 467 __percpu_ref_switch_mode(ref, NULL); 468 469 spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); 470 } 471 EXPORT_SYMBOL_GPL(percpu_ref_resurrect); 472