1 /* 2 * Read-Copy Update mechanism for mutual exclusion 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, you can access it online at 16 * http://www.gnu.org/licenses/gpl-2.0.html. 17 * 18 * Copyright IBM Corporation, 2001 19 * 20 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 21 * Manfred Spraul <manfred@colorfullife.com> 22 * 23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 25 * Papers: 26 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf 27 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) 28 * 29 * For detailed explanation of Read-Copy Update mechanism see - 30 * http://lse.sourceforge.net/locking/rcupdate.html 31 * 32 */ 33 #include <linux/types.h> 34 #include <linux/kernel.h> 35 #include <linux/init.h> 36 #include <linux/spinlock.h> 37 #include <linux/smp.h> 38 #include <linux/interrupt.h> 39 #include <linux/sched.h> 40 #include <linux/atomic.h> 41 #include <linux/bitops.h> 42 #include <linux/percpu.h> 43 #include <linux/notifier.h> 44 #include <linux/cpu.h> 45 #include <linux/mutex.h> 46 #include <linux/export.h> 47 #include <linux/hardirq.h> 48 #include <linux/delay.h> 49 #include <linux/module.h> 50 #include <linux/kthread.h> 51 #include <linux/tick.h> 52 53 #define CREATE_TRACE_POINTS 54 55 #include "rcu.h" 56 57 MODULE_ALIAS("rcupdate"); 58 #ifdef MODULE_PARAM_PREFIX 59 #undef MODULE_PARAM_PREFIX 60 #endif 61 #define MODULE_PARAM_PREFIX "rcupdate." 62 63 module_param(rcu_expedited, int, 0); 64 65 #ifndef CONFIG_TINY_RCU 66 67 static atomic_t rcu_expedited_nesting = 68 ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0); 69 70 /* 71 * Should normal grace-period primitives be expedited? Intended for 72 * use within RCU. Note that this function takes the rcu_expedited 73 * sysfs/boot variable into account as well as the rcu_expedite_gp() 74 * nesting. So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited() 75 * returns false is a -really- bad idea. 76 */ 77 bool rcu_gp_is_expedited(void) 78 { 79 return rcu_expedited || atomic_read(&rcu_expedited_nesting); 80 } 81 EXPORT_SYMBOL_GPL(rcu_gp_is_expedited); 82 83 /** 84 * rcu_expedite_gp - Expedite future RCU grace periods 85 * 86 * After a call to this function, future calls to synchronize_rcu() and 87 * friends act as the corresponding synchronize_rcu_expedited() function 88 * had instead been called. 89 */ 90 void rcu_expedite_gp(void) 91 { 92 atomic_inc(&rcu_expedited_nesting); 93 } 94 EXPORT_SYMBOL_GPL(rcu_expedite_gp); 95 96 /** 97 * rcu_unexpedite_gp - Cancel prior rcu_expedite_gp() invocation 98 * 99 * Undo a prior call to rcu_expedite_gp(). If all prior calls to 100 * rcu_expedite_gp() are undone by a subsequent call to rcu_unexpedite_gp(), 101 * and if the rcu_expedited sysfs/boot parameter is not set, then all 102 * subsequent calls to synchronize_rcu() and friends will return to 103 * their normal non-expedited behavior. 104 */ 105 void rcu_unexpedite_gp(void) 106 { 107 atomic_dec(&rcu_expedited_nesting); 108 } 109 EXPORT_SYMBOL_GPL(rcu_unexpedite_gp); 110 111 #endif /* #ifndef CONFIG_TINY_RCU */ 112 113 /* 114 * Inform RCU of the end of the in-kernel boot sequence. 115 */ 116 void rcu_end_inkernel_boot(void) 117 { 118 if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT)) 119 rcu_unexpedite_gp(); 120 } 121 122 #ifdef CONFIG_PREEMPT_RCU 123 124 /* 125 * Preemptible RCU implementation for rcu_read_lock(). 126 * Just increment ->rcu_read_lock_nesting, shared state will be updated 127 * if we block. 128 */ 129 void __rcu_read_lock(void) 130 { 131 current->rcu_read_lock_nesting++; 132 barrier(); /* critical section after entry code. */ 133 } 134 EXPORT_SYMBOL_GPL(__rcu_read_lock); 135 136 /* 137 * Preemptible RCU implementation for rcu_read_unlock(). 138 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost 139 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then 140 * invoke rcu_read_unlock_special() to clean up after a context switch 141 * in an RCU read-side critical section and other special cases. 142 */ 143 void __rcu_read_unlock(void) 144 { 145 struct task_struct *t = current; 146 147 if (t->rcu_read_lock_nesting != 1) { 148 --t->rcu_read_lock_nesting; 149 } else { 150 barrier(); /* critical section before exit code. */ 151 t->rcu_read_lock_nesting = INT_MIN; 152 barrier(); /* assign before ->rcu_read_unlock_special load */ 153 if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s))) 154 rcu_read_unlock_special(t); 155 barrier(); /* ->rcu_read_unlock_special load before assign */ 156 t->rcu_read_lock_nesting = 0; 157 } 158 #ifdef CONFIG_PROVE_LOCKING 159 { 160 int rrln = READ_ONCE(t->rcu_read_lock_nesting); 161 162 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); 163 } 164 #endif /* #ifdef CONFIG_PROVE_LOCKING */ 165 } 166 EXPORT_SYMBOL_GPL(__rcu_read_unlock); 167 168 #endif /* #ifdef CONFIG_PREEMPT_RCU */ 169 170 #ifdef CONFIG_DEBUG_LOCK_ALLOC 171 static struct lock_class_key rcu_lock_key; 172 struct lockdep_map rcu_lock_map = 173 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); 174 EXPORT_SYMBOL_GPL(rcu_lock_map); 175 176 static struct lock_class_key rcu_bh_lock_key; 177 struct lockdep_map rcu_bh_lock_map = 178 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key); 179 EXPORT_SYMBOL_GPL(rcu_bh_lock_map); 180 181 static struct lock_class_key rcu_sched_lock_key; 182 struct lockdep_map rcu_sched_lock_map = 183 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key); 184 EXPORT_SYMBOL_GPL(rcu_sched_lock_map); 185 186 static struct lock_class_key rcu_callback_key; 187 struct lockdep_map rcu_callback_map = 188 STATIC_LOCKDEP_MAP_INIT("rcu_callback", &rcu_callback_key); 189 EXPORT_SYMBOL_GPL(rcu_callback_map); 190 191 int notrace debug_lockdep_rcu_enabled(void) 192 { 193 return rcu_scheduler_active && debug_locks && 194 current->lockdep_recursion == 0; 195 } 196 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); 197 198 /** 199 * rcu_read_lock_held() - might we be in RCU read-side critical section? 200 * 201 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU 202 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, 203 * this assumes we are in an RCU read-side critical section unless it can 204 * prove otherwise. This is useful for debug checks in functions that 205 * require that they be called within an RCU read-side critical section. 206 * 207 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot 208 * and while lockdep is disabled. 209 * 210 * Note that rcu_read_lock() and the matching rcu_read_unlock() must 211 * occur in the same context, for example, it is illegal to invoke 212 * rcu_read_unlock() in process context if the matching rcu_read_lock() 213 * was invoked from within an irq handler. 214 * 215 * Note that rcu_read_lock() is disallowed if the CPU is either idle or 216 * offline from an RCU perspective, so check for those as well. 217 */ 218 int rcu_read_lock_held(void) 219 { 220 if (!debug_lockdep_rcu_enabled()) 221 return 1; 222 if (!rcu_is_watching()) 223 return 0; 224 if (!rcu_lockdep_current_cpu_online()) 225 return 0; 226 return lock_is_held(&rcu_lock_map); 227 } 228 EXPORT_SYMBOL_GPL(rcu_read_lock_held); 229 230 /** 231 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? 232 * 233 * Check for bottom half being disabled, which covers both the 234 * CONFIG_PROVE_RCU and not cases. Note that if someone uses 235 * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) 236 * will show the situation. This is useful for debug checks in functions 237 * that require that they be called within an RCU read-side critical 238 * section. 239 * 240 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. 241 * 242 * Note that rcu_read_lock() is disallowed if the CPU is either idle or 243 * offline from an RCU perspective, so check for those as well. 244 */ 245 int rcu_read_lock_bh_held(void) 246 { 247 if (!debug_lockdep_rcu_enabled()) 248 return 1; 249 if (!rcu_is_watching()) 250 return 0; 251 if (!rcu_lockdep_current_cpu_online()) 252 return 0; 253 return in_softirq() || irqs_disabled(); 254 } 255 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); 256 257 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 258 259 /** 260 * wakeme_after_rcu() - Callback function to awaken a task after grace period 261 * @head: Pointer to rcu_head member within rcu_synchronize structure 262 * 263 * Awaken the corresponding task now that a grace period has elapsed. 264 */ 265 void wakeme_after_rcu(struct rcu_head *head) 266 { 267 struct rcu_synchronize *rcu; 268 269 rcu = container_of(head, struct rcu_synchronize, head); 270 complete(&rcu->completion); 271 } 272 273 void wait_rcu_gp(call_rcu_func_t crf) 274 { 275 struct rcu_synchronize rcu; 276 277 init_rcu_head_on_stack(&rcu.head); 278 init_completion(&rcu.completion); 279 /* Will wake me after RCU finished. */ 280 crf(&rcu.head, wakeme_after_rcu); 281 /* Wait for it. */ 282 wait_for_completion(&rcu.completion); 283 destroy_rcu_head_on_stack(&rcu.head); 284 } 285 EXPORT_SYMBOL_GPL(wait_rcu_gp); 286 287 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 288 void init_rcu_head(struct rcu_head *head) 289 { 290 debug_object_init(head, &rcuhead_debug_descr); 291 } 292 293 void destroy_rcu_head(struct rcu_head *head) 294 { 295 debug_object_free(head, &rcuhead_debug_descr); 296 } 297 298 /* 299 * fixup_activate is called when: 300 * - an active object is activated 301 * - an unknown object is activated (might be a statically initialized object) 302 * Activation is performed internally by call_rcu(). 303 */ 304 static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state) 305 { 306 struct rcu_head *head = addr; 307 308 switch (state) { 309 310 case ODEBUG_STATE_NOTAVAILABLE: 311 /* 312 * This is not really a fixup. We just make sure that it is 313 * tracked in the object tracker. 314 */ 315 debug_object_init(head, &rcuhead_debug_descr); 316 debug_object_activate(head, &rcuhead_debug_descr); 317 return 0; 318 default: 319 return 1; 320 } 321 } 322 323 /** 324 * init_rcu_head_on_stack() - initialize on-stack rcu_head for debugobjects 325 * @head: pointer to rcu_head structure to be initialized 326 * 327 * This function informs debugobjects of a new rcu_head structure that 328 * has been allocated as an auto variable on the stack. This function 329 * is not required for rcu_head structures that are statically defined or 330 * that are dynamically allocated on the heap. This function has no 331 * effect for !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds. 332 */ 333 void init_rcu_head_on_stack(struct rcu_head *head) 334 { 335 debug_object_init_on_stack(head, &rcuhead_debug_descr); 336 } 337 EXPORT_SYMBOL_GPL(init_rcu_head_on_stack); 338 339 /** 340 * destroy_rcu_head_on_stack() - destroy on-stack rcu_head for debugobjects 341 * @head: pointer to rcu_head structure to be initialized 342 * 343 * This function informs debugobjects that an on-stack rcu_head structure 344 * is about to go out of scope. As with init_rcu_head_on_stack(), this 345 * function is not required for rcu_head structures that are statically 346 * defined or that are dynamically allocated on the heap. Also as with 347 * init_rcu_head_on_stack(), this function has no effect for 348 * !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds. 349 */ 350 void destroy_rcu_head_on_stack(struct rcu_head *head) 351 { 352 debug_object_free(head, &rcuhead_debug_descr); 353 } 354 EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack); 355 356 struct debug_obj_descr rcuhead_debug_descr = { 357 .name = "rcu_head", 358 .fixup_activate = rcuhead_fixup_activate, 359 }; 360 EXPORT_SYMBOL_GPL(rcuhead_debug_descr); 361 #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 362 363 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) 364 void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp, 365 unsigned long secs, 366 unsigned long c_old, unsigned long c) 367 { 368 trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c); 369 } 370 EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); 371 #else 372 #define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ 373 do { } while (0) 374 #endif 375 376 #ifdef CONFIG_RCU_STALL_COMMON 377 378 #ifdef CONFIG_PROVE_RCU 379 #define RCU_STALL_DELAY_DELTA (5 * HZ) 380 #else 381 #define RCU_STALL_DELAY_DELTA 0 382 #endif 383 384 int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ 385 static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; 386 387 module_param(rcu_cpu_stall_suppress, int, 0644); 388 module_param(rcu_cpu_stall_timeout, int, 0644); 389 390 int rcu_jiffies_till_stall_check(void) 391 { 392 int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout); 393 394 /* 395 * Limit check must be consistent with the Kconfig limits 396 * for CONFIG_RCU_CPU_STALL_TIMEOUT. 397 */ 398 if (till_stall_check < 3) { 399 WRITE_ONCE(rcu_cpu_stall_timeout, 3); 400 till_stall_check = 3; 401 } else if (till_stall_check > 300) { 402 WRITE_ONCE(rcu_cpu_stall_timeout, 300); 403 till_stall_check = 300; 404 } 405 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; 406 } 407 408 void rcu_sysrq_start(void) 409 { 410 if (!rcu_cpu_stall_suppress) 411 rcu_cpu_stall_suppress = 2; 412 } 413 414 void rcu_sysrq_end(void) 415 { 416 if (rcu_cpu_stall_suppress == 2) 417 rcu_cpu_stall_suppress = 0; 418 } 419 420 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) 421 { 422 rcu_cpu_stall_suppress = 1; 423 return NOTIFY_DONE; 424 } 425 426 static struct notifier_block rcu_panic_block = { 427 .notifier_call = rcu_panic, 428 }; 429 430 static int __init check_cpu_stall_init(void) 431 { 432 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); 433 return 0; 434 } 435 early_initcall(check_cpu_stall_init); 436 437 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 438 439 #ifdef CONFIG_TASKS_RCU 440 441 /* 442 * Simple variant of RCU whose quiescent states are voluntary context switch, 443 * user-space execution, and idle. As such, grace periods can take one good 444 * long time. There are no read-side primitives similar to rcu_read_lock() 445 * and rcu_read_unlock() because this implementation is intended to get 446 * the system into a safe state for some of the manipulations involved in 447 * tracing and the like. Finally, this implementation does not support 448 * high call_rcu_tasks() rates from multiple CPUs. If this is required, 449 * per-CPU callback lists will be needed. 450 */ 451 452 /* Global list of callbacks and associated lock. */ 453 static struct rcu_head *rcu_tasks_cbs_head; 454 static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; 455 static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq); 456 static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); 457 458 /* Track exiting tasks in order to allow them to be waited for. */ 459 DEFINE_SRCU(tasks_rcu_exit_srcu); 460 461 /* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ 462 static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10; 463 module_param(rcu_task_stall_timeout, int, 0644); 464 465 static void rcu_spawn_tasks_kthread(void); 466 467 /* 468 * Post an RCU-tasks callback. First call must be from process context 469 * after the scheduler if fully operational. 470 */ 471 void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) 472 { 473 unsigned long flags; 474 bool needwake; 475 476 rhp->next = NULL; 477 rhp->func = func; 478 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags); 479 needwake = !rcu_tasks_cbs_head; 480 *rcu_tasks_cbs_tail = rhp; 481 rcu_tasks_cbs_tail = &rhp->next; 482 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); 483 if (needwake) { 484 rcu_spawn_tasks_kthread(); 485 wake_up(&rcu_tasks_cbs_wq); 486 } 487 } 488 EXPORT_SYMBOL_GPL(call_rcu_tasks); 489 490 /** 491 * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed. 492 * 493 * Control will return to the caller some time after a full rcu-tasks 494 * grace period has elapsed, in other words after all currently 495 * executing rcu-tasks read-side critical sections have elapsed. These 496 * read-side critical sections are delimited by calls to schedule(), 497 * cond_resched_rcu_qs(), idle execution, userspace execution, calls 498 * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched(). 499 * 500 * This is a very specialized primitive, intended only for a few uses in 501 * tracing and other situations requiring manipulation of function 502 * preambles and profiling hooks. The synchronize_rcu_tasks() function 503 * is not (yet) intended for heavy use from multiple CPUs. 504 * 505 * Note that this guarantee implies further memory-ordering guarantees. 506 * On systems with more than one CPU, when synchronize_rcu_tasks() returns, 507 * each CPU is guaranteed to have executed a full memory barrier since the 508 * end of its last RCU-tasks read-side critical section whose beginning 509 * preceded the call to synchronize_rcu_tasks(). In addition, each CPU 510 * having an RCU-tasks read-side critical section that extends beyond 511 * the return from synchronize_rcu_tasks() is guaranteed to have executed 512 * a full memory barrier after the beginning of synchronize_rcu_tasks() 513 * and before the beginning of that RCU-tasks read-side critical section. 514 * Note that these guarantees include CPUs that are offline, idle, or 515 * executing in user mode, as well as CPUs that are executing in the kernel. 516 * 517 * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned 518 * to its caller on CPU B, then both CPU A and CPU B are guaranteed 519 * to have executed a full memory barrier during the execution of 520 * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU 521 * (but again only if the system has more than one CPU). 522 */ 523 void synchronize_rcu_tasks(void) 524 { 525 /* Complain if the scheduler has not started. */ 526 rcu_lockdep_assert(!rcu_scheduler_active, 527 "synchronize_rcu_tasks called too soon"); 528 529 /* Wait for the grace period. */ 530 wait_rcu_gp(call_rcu_tasks); 531 } 532 EXPORT_SYMBOL_GPL(synchronize_rcu_tasks); 533 534 /** 535 * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks. 536 * 537 * Although the current implementation is guaranteed to wait, it is not 538 * obligated to, for example, if there are no pending callbacks. 539 */ 540 void rcu_barrier_tasks(void) 541 { 542 /* There is only one callback queue, so this is easy. ;-) */ 543 synchronize_rcu_tasks(); 544 } 545 EXPORT_SYMBOL_GPL(rcu_barrier_tasks); 546 547 /* See if tasks are still holding out, complain if so. */ 548 static void check_holdout_task(struct task_struct *t, 549 bool needreport, bool *firstreport) 550 { 551 int cpu; 552 553 if (!READ_ONCE(t->rcu_tasks_holdout) || 554 t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) || 555 !READ_ONCE(t->on_rq) || 556 (IS_ENABLED(CONFIG_NO_HZ_FULL) && 557 !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) { 558 WRITE_ONCE(t->rcu_tasks_holdout, false); 559 list_del_init(&t->rcu_tasks_holdout_list); 560 put_task_struct(t); 561 return; 562 } 563 if (!needreport) 564 return; 565 if (*firstreport) { 566 pr_err("INFO: rcu_tasks detected stalls on tasks:\n"); 567 *firstreport = false; 568 } 569 cpu = task_cpu(t); 570 pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n", 571 t, ".I"[is_idle_task(t)], 572 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)], 573 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout, 574 t->rcu_tasks_idle_cpu, cpu); 575 sched_show_task(t); 576 } 577 578 /* RCU-tasks kthread that detects grace periods and invokes callbacks. */ 579 static int __noreturn rcu_tasks_kthread(void *arg) 580 { 581 unsigned long flags; 582 struct task_struct *g, *t; 583 unsigned long lastreport; 584 struct rcu_head *list; 585 struct rcu_head *next; 586 LIST_HEAD(rcu_tasks_holdouts); 587 588 /* Run on housekeeping CPUs by default. Sysadm can move if desired. */ 589 housekeeping_affine(current); 590 591 /* 592 * Each pass through the following loop makes one check for 593 * newly arrived callbacks, and, if there are some, waits for 594 * one RCU-tasks grace period and then invokes the callbacks. 595 * This loop is terminated by the system going down. ;-) 596 */ 597 for (;;) { 598 599 /* Pick up any new callbacks. */ 600 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags); 601 list = rcu_tasks_cbs_head; 602 rcu_tasks_cbs_head = NULL; 603 rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; 604 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); 605 606 /* If there were none, wait a bit and start over. */ 607 if (!list) { 608 wait_event_interruptible(rcu_tasks_cbs_wq, 609 rcu_tasks_cbs_head); 610 if (!rcu_tasks_cbs_head) { 611 WARN_ON(signal_pending(current)); 612 schedule_timeout_interruptible(HZ/10); 613 } 614 continue; 615 } 616 617 /* 618 * Wait for all pre-existing t->on_rq and t->nvcsw 619 * transitions to complete. Invoking synchronize_sched() 620 * suffices because all these transitions occur with 621 * interrupts disabled. Without this synchronize_sched(), 622 * a read-side critical section that started before the 623 * grace period might be incorrectly seen as having started 624 * after the grace period. 625 * 626 * This synchronize_sched() also dispenses with the 627 * need for a memory barrier on the first store to 628 * ->rcu_tasks_holdout, as it forces the store to happen 629 * after the beginning of the grace period. 630 */ 631 synchronize_sched(); 632 633 /* 634 * There were callbacks, so we need to wait for an 635 * RCU-tasks grace period. Start off by scanning 636 * the task list for tasks that are not already 637 * voluntarily blocked. Mark these tasks and make 638 * a list of them in rcu_tasks_holdouts. 639 */ 640 rcu_read_lock(); 641 for_each_process_thread(g, t) { 642 if (t != current && READ_ONCE(t->on_rq) && 643 !is_idle_task(t)) { 644 get_task_struct(t); 645 t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw); 646 WRITE_ONCE(t->rcu_tasks_holdout, true); 647 list_add(&t->rcu_tasks_holdout_list, 648 &rcu_tasks_holdouts); 649 } 650 } 651 rcu_read_unlock(); 652 653 /* 654 * Wait for tasks that are in the process of exiting. 655 * This does only part of the job, ensuring that all 656 * tasks that were previously exiting reach the point 657 * where they have disabled preemption, allowing the 658 * later synchronize_sched() to finish the job. 659 */ 660 synchronize_srcu(&tasks_rcu_exit_srcu); 661 662 /* 663 * Each pass through the following loop scans the list 664 * of holdout tasks, removing any that are no longer 665 * holdouts. When the list is empty, we are done. 666 */ 667 lastreport = jiffies; 668 while (!list_empty(&rcu_tasks_holdouts)) { 669 bool firstreport; 670 bool needreport; 671 int rtst; 672 struct task_struct *t1; 673 674 schedule_timeout_interruptible(HZ); 675 rtst = READ_ONCE(rcu_task_stall_timeout); 676 needreport = rtst > 0 && 677 time_after(jiffies, lastreport + rtst); 678 if (needreport) 679 lastreport = jiffies; 680 firstreport = true; 681 WARN_ON(signal_pending(current)); 682 list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts, 683 rcu_tasks_holdout_list) { 684 check_holdout_task(t, needreport, &firstreport); 685 cond_resched(); 686 } 687 } 688 689 /* 690 * Because ->on_rq and ->nvcsw are not guaranteed 691 * to have a full memory barriers prior to them in the 692 * schedule() path, memory reordering on other CPUs could 693 * cause their RCU-tasks read-side critical sections to 694 * extend past the end of the grace period. However, 695 * because these ->nvcsw updates are carried out with 696 * interrupts disabled, we can use synchronize_sched() 697 * to force the needed ordering on all such CPUs. 698 * 699 * This synchronize_sched() also confines all 700 * ->rcu_tasks_holdout accesses to be within the grace 701 * period, avoiding the need for memory barriers for 702 * ->rcu_tasks_holdout accesses. 703 * 704 * In addition, this synchronize_sched() waits for exiting 705 * tasks to complete their final preempt_disable() region 706 * of execution, cleaning up after the synchronize_srcu() 707 * above. 708 */ 709 synchronize_sched(); 710 711 /* Invoke the callbacks. */ 712 while (list) { 713 next = list->next; 714 local_bh_disable(); 715 list->func(list); 716 local_bh_enable(); 717 list = next; 718 cond_resched(); 719 } 720 schedule_timeout_uninterruptible(HZ/10); 721 } 722 } 723 724 /* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */ 725 static void rcu_spawn_tasks_kthread(void) 726 { 727 static DEFINE_MUTEX(rcu_tasks_kthread_mutex); 728 static struct task_struct *rcu_tasks_kthread_ptr; 729 struct task_struct *t; 730 731 if (READ_ONCE(rcu_tasks_kthread_ptr)) { 732 smp_mb(); /* Ensure caller sees full kthread. */ 733 return; 734 } 735 mutex_lock(&rcu_tasks_kthread_mutex); 736 if (rcu_tasks_kthread_ptr) { 737 mutex_unlock(&rcu_tasks_kthread_mutex); 738 return; 739 } 740 t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread"); 741 BUG_ON(IS_ERR(t)); 742 smp_mb(); /* Ensure others see full kthread. */ 743 WRITE_ONCE(rcu_tasks_kthread_ptr, t); 744 mutex_unlock(&rcu_tasks_kthread_mutex); 745 } 746 747 #endif /* #ifdef CONFIG_TASKS_RCU */ 748 749 #ifdef CONFIG_PROVE_RCU 750 751 /* 752 * Early boot self test parameters, one for each flavor 753 */ 754 static bool rcu_self_test; 755 static bool rcu_self_test_bh; 756 static bool rcu_self_test_sched; 757 758 module_param(rcu_self_test, bool, 0444); 759 module_param(rcu_self_test_bh, bool, 0444); 760 module_param(rcu_self_test_sched, bool, 0444); 761 762 static int rcu_self_test_counter; 763 764 static void test_callback(struct rcu_head *r) 765 { 766 rcu_self_test_counter++; 767 pr_info("RCU test callback executed %d\n", rcu_self_test_counter); 768 } 769 770 static void early_boot_test_call_rcu(void) 771 { 772 static struct rcu_head head; 773 774 call_rcu(&head, test_callback); 775 } 776 777 static void early_boot_test_call_rcu_bh(void) 778 { 779 static struct rcu_head head; 780 781 call_rcu_bh(&head, test_callback); 782 } 783 784 static void early_boot_test_call_rcu_sched(void) 785 { 786 static struct rcu_head head; 787 788 call_rcu_sched(&head, test_callback); 789 } 790 791 void rcu_early_boot_tests(void) 792 { 793 pr_info("Running RCU self tests\n"); 794 795 if (rcu_self_test) 796 early_boot_test_call_rcu(); 797 if (rcu_self_test_bh) 798 early_boot_test_call_rcu_bh(); 799 if (rcu_self_test_sched) 800 early_boot_test_call_rcu_sched(); 801 } 802 803 static int rcu_verify_early_boot_tests(void) 804 { 805 int ret = 0; 806 int early_boot_test_counter = 0; 807 808 if (rcu_self_test) { 809 early_boot_test_counter++; 810 rcu_barrier(); 811 } 812 if (rcu_self_test_bh) { 813 early_boot_test_counter++; 814 rcu_barrier_bh(); 815 } 816 if (rcu_self_test_sched) { 817 early_boot_test_counter++; 818 rcu_barrier_sched(); 819 } 820 821 if (rcu_self_test_counter != early_boot_test_counter) { 822 WARN_ON(1); 823 ret = -1; 824 } 825 826 return ret; 827 } 828 late_initcall(rcu_verify_early_boot_tests); 829 #else 830 void rcu_early_boot_tests(void) {} 831 #endif /* CONFIG_PROVE_RCU */ 832