Lines Matching +full:a +full:- +full:side
1 /* SPDX-License-Identifier: GPL-2.0+ */
3 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
4 * Internal non-public definitions that provide either classic
19 * In order to read the offloaded state of an rdp in a safe
23 * non-preemptible reads are also safe. NOCB kthreads and
30 lockdep_is_held(&rdp->nocb_lock) ||
38 return rcu_segcblist_is_offloaded(&rdp->cblist);
51 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d.\n",
58 pr_info("\tRCU strict (and thus non-scalable) grace periods are enabled.\n");
60 pr_info("\tFour(or more)-level hierarchy is enabled.\n");
62 pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
65 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n",
74 pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit);
76 pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark);
78 pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark);
80 pr_info("\tBoot-time adjustment of callback overload level to %ld.\n", qovld);
82 pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
84 pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
86 pr_info("\tBoot-time adjustment of scheduler-enlistment delay to %ld jiffies.\n", jiffies_till_sched_qs);
88 pr_info("\tKick kthreads if too-long grace period.\n");
90 pr_info("\tRCU callback double-/use-after-free debug is enabled.\n");
92 pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
135 * Queues a task preempted within an RCU-preempt read-side critical
136 * section into the appropriate location within the ->blkd_tasks list,
138 * periods. The ->gp_tasks pointer indicates which element the normal
139 * grace period is waiting on (NULL if none), and the ->exp_tasks pointer
141 * NULL if none). If a grace period is waiting on a given element in the
142 * ->blkd_tasks list, it also waits on all subsequent elements. Thus,
143 * adding a task to the tail of the list blocks any grace period that is
144 * already waiting on one of the elements. In contrast, adding a task
149 * period wait for a task that is not strictly speaking blocking it.
150 * Given the choice, we needlessly block a normal grace period rather than
154 * indefinitely postpone a normal grace period. Eventually, all of the
157 * their RCU read-side critical sections. At that point, the ->gp_tasks
158 * pointer will equal the ->exp_tasks pointer, at which point the end of
163 __releases(rnp->lock) /* But leaves rrupts disabled. */
165 int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) +
166 (rnp->exp_tasks ? RCU_EXP_TASKS : 0) +
167 (rnp->qsmask & rdp->grpmask ? RCU_GP_BLKD : 0) +
168 (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
172 WARN_ON_ONCE(rdp->mynode != rnp);
175 WARN_ON_ONCE(rnp->qsmaskinitnext & ~rnp->qsmaskinit & rnp->qsmask &
176 rdp->grpmask);
180 * this could be an if-statement. In practice, when I tried
192 * GP but not blocking the already-waiting expedited GP.
194 * blocking the already-waiting GPs.
196 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
209 * GP already waiting), or a task arriving that blocks
214 list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks);
227 list_add(&t->rcu_node_entry, rnp->exp_tasks);
238 list_add(&t->rcu_node_entry, rnp->gp_tasks);
250 * block either grace period, update the ->gp_tasks and/or
251 * ->exp_tasks pointers, respectively, to reference the newly
254 if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) {
255 WRITE_ONCE(rnp->gp_tasks, &t->rcu_node_entry);
256 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
258 if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
259 WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);
261 !(rnp->qsmask & rdp->grpmask));
263 !(rnp->expmask & rdp->grpmask));
269 * no need to check for a subsequent expedited GP. (Though we are
270 * still in a quiescent state in any case.)
272 * Interrupts are disabled, so ->cpu_no_qs.b.exp cannot change.
274 if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp)
277 WARN_ON_ONCE(rdp->cpu_no_qs.b.exp);
278 ASSERT_EXCLUSIVE_WRITER_SCOPED(rdp->cpu_no_qs.b.exp);
282 * Record a preemptible-RCU quiescent state for the specified CPU.
284 * on the CPU is in a quiescent state: Instead, it means that the current
285 * grace period need not wait on any RCU read-side critical section that
287 * in an RCU read-side critical section, it has already added itself to
288 * some leaf rcu_node structure's ->blkd_tasks list. In addition to the
290 * in an RCU read-side critical section.
292 * Unlike non-preemptible-RCU, quiescent state reports for expedited
307 WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, false);
313 * context-switched away from. If this task is in an RCU read-side
317 * RCU read-side critical section. Therefore, the current grace period
320 * rnp->gp_tasks becomes NULL.
332 WARN_ONCE(!preempt && rcu_preempt_depth() > 0, "Voluntary context switch within RCU read-side critical section!");
334 !t->rcu_read_unlock_special.b.blocked) {
336 /* Possibly blocking in an RCU read-side critical section. */
337 rnp = rdp->mynode;
339 t->rcu_read_unlock_special.b.blocked = true;
340 t->rcu_blocked_node = rnp;
348 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
350 t->pid,
351 (rnp->qsmask & rdp->grpmask)
352 ? rnp->gp_seq
353 : rcu_seq_snap(&rnp->gp_seq));
360 * Either we were not in an RCU read-side critical section to
362 * globally. Either way, we can now note a quiescent state
363 * for this CPU. Again, if we were in an RCU read-side critical
369 if (rdp->cpu_no_qs.b.exp)
378 * for the specified rcu_node structure. If the caller needs a reliable
379 * answer, it must hold the rcu_node's ->lock.
383 return READ_ONCE(rnp->gp_tasks) != NULL;
386 /* limit value for ->rcu_read_lock_nesting. */
391 WRITE_ONCE(current->rcu_read_lock_nesting, READ_ONCE(current->rcu_read_lock_nesting) + 1);
396 int ret = READ_ONCE(current->rcu_read_lock_nesting) - 1;
398 WRITE_ONCE(current->rcu_read_lock_nesting, ret);
404 WRITE_ONCE(current->rcu_read_lock_nesting, val);
409 * Just increment ->rcu_read_lock_nesting, shared state will be updated
418 WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
425 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
426 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
427 * invoke rcu_read_unlock_special() to clean up after a context switch
428 * in an RCU read-side critical section and other special cases.
436 barrier(); // critical-section exit before .s check.
437 if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
449 * Advance a ->blkd_tasks-list pointer to the next entry, instead
457 np = t->rcu_node_entry.next;
458 if (np == &rnp->blkd_tasks)
465 * preempted within an RCU read-side critical section.
469 return !list_empty(&rnp->blkd_tasks);
490 if (rdp->defer_qs_iw_pending == DEFER_QS_PENDING)
491 rdp->defer_qs_iw_pending = DEFER_QS_IDLE;
496 * t->rcu_read_unlock_special cannot change.
498 special = t->rcu_read_unlock_special;
499 if (!special.s && !rdp->cpu_no_qs.b.exp) {
503 t->rcu_read_unlock_special.s = 0;
506 rdp->cpu_no_qs.b.norm = false;
515 * Respond to a request by an expedited grace period for a
518 * blocked-tasks list below.
520 if (rdp->cpu_no_qs.b.exp)
523 /* Clean up if blocked during RCU read-side critical section. */
530 * to loop. Retain a WARN_ON_ONCE() out of sheer paranoia.
532 rnp = t->rcu_blocked_node;
534 WARN_ON_ONCE(rnp != t->rcu_blocked_node);
537 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq &&
538 (!empty_norm || rnp->qsmask));
541 list_del_init(&t->rcu_node_entry);
542 t->rcu_blocked_node = NULL;
544 rnp->gp_seq, t->pid);
545 if (&t->rcu_node_entry == rnp->gp_tasks)
546 WRITE_ONCE(rnp->gp_tasks, np);
547 if (&t->rcu_node_entry == rnp->exp_tasks)
548 WRITE_ONCE(rnp->exp_tasks, np);
550 /* Snapshot ->boost_mtx ownership w/rnp->lock held. */
551 drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t;
552 if (&t->rcu_node_entry == rnp->boost_tasks)
553 WRITE_ONCE(rnp->boost_tasks, np);
559 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
560 * so we must take a snapshot of the expedited state.
565 rnp->gp_seq,
566 0, rnp->qsmask,
567 rnp->level,
568 rnp->grplo,
569 rnp->grphi,
570 !!rnp->gp_tasks);
585 rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex);
592 * Is a deferred quiescent-state pending, and are we also not in
593 * an RCU read-side critical section? It is the caller's responsibility
595 * states. The reason for this is that it is safe to report a
603 READ_ONCE(t->rcu_read_unlock_special.s)) &&
608 * Report a deferred quiescent state if needed and safe to do so.
610 * not being in an RCU read-side critical section. The caller must
625 * Minimal handler to give the scheduler a chance to re-evaluate.
635 * If the IRQ work handler happens to run in the middle of RCU read-side
637 * attention to report a deferred quiescent state (the whole point of the
641 * 1. rcu_read_unlock() queues IRQ work (state -> DEFER_QS_PENDING)
644 * 4. rcu_read_unlock() does not re-queue work (state still PENDING)
648 WRITE_ONCE(rdp->defer_qs_iw_pending, DEFER_QS_IDLE);
655 * 1. Task blocking an expedited grace period (based on a heuristic, could be
656 * false-positive, see below.)
662 * @rdp: The per-CPU RCU data
675 * task was preempted within an RCU read-side critical section and is
678 * check because 't' might not be on the exp_tasks list at all - its
679 * just a fast heuristic that can be false-positive sometimes.
681 if (t->rcu_blocked_node && READ_ONCE(t->rcu_blocked_node->exp_tasks))
690 if (rdp->grpmask & READ_ONCE(rnp->expmask))
697 * - This CPU has not yet reported a quiescent state, or
698 * - This task was preempted within an RCU critical section
702 ((rdp->grpmask & READ_ONCE(rnp->qsmask)) || t->rcu_blocked_node))
706 * RCU priority boosting case: If a task is subject to RCU priority
707 * boosting and exits an RCU read-side critical section with interrupts
709 * Without this, a low-priority task could incorrectly run at high
710 * real-time priority for an extended period degrading real-time
714 if (IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled && t->rcu_blocked_node)
723 * read-side critical section.
741 struct rcu_node *rnp = rdp->mynode;
749 // GP in flight or a potential need to deboost.
759 needs_exp && rdp->defer_qs_iw_pending != DEFER_QS_PENDING &&
760 cpu_online(rdp->cpu)) {
761 // Get scheduler to re-evaluate and call hooks.
763 rdp->defer_qs_iw_pending = DEFER_QS_PENDING;
764 irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
775 * period is in fact empty. It is a serious bug to complete a grace
777 * invoked -before- updating this rnp's ->gp_seq.
780 * block the newly created grace period, so set up ->gp_tasks accordingly.
791 (rnp->qsmaskinit || rnp->wait_blkd_tasks)) {
792 WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next);
793 t = container_of(rnp->gp_tasks, struct task_struct,
795 trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
796 rnp->gp_seq, t->pid);
798 WARN_ON_ONCE(rnp->qsmask);
802 * Check for a quiescent state from the current CPU, including voluntary
803 * context switches for Tasks RCU. When a task blocks, the task is
832 !t->rcu_read_unlock_special.b.need_qs &&
834 t->rcu_read_unlock_special.b.need_qs = true;
838 * Check for a task exiting while in a preemptible-RCU read-side
842 * return, there was a bug of some sort. Spewing warnings from this
849 if (unlikely(!list_empty(¤t->rcu_node_entry))) {
852 WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);
863 * Dump the blocked-tasks state, but limit the list dump to the
876 pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
877 __func__, rnp->grplo, rnp->grphi, rnp->level,
878 (long)READ_ONCE(rnp->gp_seq), (long)rnp->completedqs);
879 for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
880 pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
881 __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
882 pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
883 __func__, READ_ONCE(rnp->gp_tasks), data_race(rnp->boost_tasks),
884 READ_ONCE(rnp->exp_tasks));
885 pr_info("%s: ->blkd_tasks", __func__);
887 list_for_each(lhp, &rnp->blkd_tasks) {
893 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
897 (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_state,
898 (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_state);
904 rdp->defer_qs_iw = IRQ_WORK_INIT_HARD(rcu_preempt_deferred_qs_handler);
910 * __rcu_read_unlock() marks the beginning of a quiescent state, immediately
911 * report that quiescent state and, if requested, spin for a bit.
921 * rcu_report_qs_rdp() can only be invoked with a stable rdp and
929 rdp->cpu_no_qs.b.norm = false;
945 * Note a quiescent state for PREEMPTION=n. Because we do not need to know
947 * the start of the grace period, this just sets a flag. The caller must
964 * emergency, invoke rcu_momentary_eqs() to do a heavy-weight
965 * dyntick-idle quiescent state visible to other CPUs, which will in
967 * Either way, register a lightweight quiescent state.
993 * Note a PREEMPTION=n context switch. The caller must have disabled interrupts.
1037 // Except that we do need to respond to a request by an expedited
1038 // grace period for a quiescent state from this CPU. Note that in
1039 // non-preemptible kernels, there can be no context switches within RCU
1040 // read-side critical sections, which in turn means that the leaf rcu_node
1041 // structure's blocked-tasks list is always empty. is therefore no need to
1042 // actually check it. Instead, a quiescent state from this CPU suffices,
1043 // and this function is only called from such a quiescent state.
1048 if (READ_ONCE(rdp->cpu_no_qs.b.exp))
1059 WARN_ON_ONCE(rnp->qsmask);
1063 * Check to see if this CPU is in a non-context-switch quiescent state,
1074 * mode, from the idle loop without this being a nested
1076 * (with PREEMPT_COUNT=y). In this case, the CPU is in a
1080 * references only CPU-local variables that other CPUs
1090 * while in preemptible RCU read-side critical sections.
1097 * Dump the guaranteed-empty blocked-tasks state. Trust but verify.
1102 WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks));
1122 WRITE_ONCE(rdp->rcuc_activity, jiffies);
1128 return rdp->nocb_cb_kthread == current;
1135 * Is the current CPU running the RCU-callbacks kthread?
1140 return rdp->rcu_cpu_kthread_task == current ||
1147 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1148 * or ->boost_tasks, advancing the pointer to the next task in the
1149 * ->blkd_tasks list.
1160 if (READ_ONCE(rnp->exp_tasks) == NULL &&
1161 READ_ONCE(rnp->boost_tasks) == NULL)
1168 * might exit their RCU read-side critical sections on their own.
1170 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
1177 * This cannot starve the normal grace periods because a second
1179 * those blocking the pre-existing normal grace period.
1181 if (rnp->exp_tasks != NULL)
1182 tb = rnp->exp_tasks;
1184 tb = rnp->boost_tasks;
1188 * be held by task t. We leave a pointer to that rt_mutex where
1190 * exits its outermost RCU read-side critical section. Then
1194 * Note that task t must acquire rnp->lock to remove itself from
1195 * the ->blkd_tasks list, which it will do from exit() if from
1197 * stay around at least until we drop rnp->lock. Note that
1198 * rnp->lock also resolves races between our priority boosting
1199 * and task t's exiting its outermost RCU read-side critical
1203 rt_mutex_init_proxy_locked(&rnp->boost_mtx.rtmutex, t);
1205 /* Lock only for side effect: boosts task t's priority. */
1206 rt_mutex_lock(&rnp->boost_mtx);
1207 rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */
1208 rnp->n_boosts++;
1210 return READ_ONCE(rnp->exp_tasks) != NULL ||
1211 READ_ONCE(rnp->boost_tasks) != NULL;
1215 * Priority-boosting kthread, one per leaf rcu_node.
1225 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
1227 rcu_wait(READ_ONCE(rnp->boost_tasks) ||
1228 READ_ONCE(rnp->exp_tasks));
1230 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
1237 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_YIELDING);
1251 * blocking the current grace period, and, if so, tell the per-rcu_node
1255 * The caller must hold rnp->lock, which this function releases.
1256 * The ->boost_kthread_task is immortal, so we don't need to worry
1260 __releases(rnp->lock)
1263 if (!rnp->boost_kthread_task ||
1264 (!rcu_preempt_blocked_readers_cgp(rnp) && !rnp->exp_tasks)) {
1268 if (rnp->exp_tasks != NULL ||
1269 (rnp->gp_tasks != NULL &&
1270 rnp->boost_tasks == NULL &&
1271 rnp->qsmask == 0 &&
1272 (!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld ||
1274 if (rnp->exp_tasks == NULL)
1275 WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
1277 rcu_wake_cond(rnp->boost_kthread_task,
1278 READ_ONCE(rnp->boost_kthread_status));
1287 * Do priority-boost accounting for the start of a new grace period.
1291 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1295 * Create an RCU-boost kthread for the specified node if one does not
1301 int rnp_index = rnp - rcu_get_root();
1305 if (rnp->boost_kthread_task)
1314 rnp->boost_kthread_task = t;
1326 __releases(rnp->lock)
1342 * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
1343 * grace-period kthread will do force_quiescent_state() processing?
1344 * The idea is to avoid waking up RCU core processing on such a
1362 * Bind the RCU grace-period kthreads to the housekeeping CPU.