tree_plugin.h - OpenGrok cross reference for /linux/kernel/rcu/tree

Lines Matching +full:a +full:- +full:side
1 /* SPDX-License-Identifier: GPL-2.0+ */
3  * Read-Copy Update mechanism for mutual exclusion (tree-based version)
4  * Internal non-public definitions that provide either classic
19 	 * In order to read the offloaded state of an rdp in a safe
23 	 * non-preemptible reads are also safe. NOCB kthreads and
30 		  lockdep_is_held(&rdp->nocb_lock) ||
38 	return rcu_segcblist_is_offloaded(&rdp->cblist);
51 		pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d.\n",
58 		pr_info("\tRCU strict (and thus non-scalable) grace periods are enabled.\n");
60 		pr_info("\tFour(or more)-level hierarchy is enabled.\n");
62 		pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
65 		pr_info("\tBoot-time adjustment of leaf fanout to %d.\n",
74 		pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit);
76 		pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark);
78 		pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark);
80 		pr_info("\tBoot-time adjustment of callback overload level to %ld.\n", qovld);
82 		pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
84 		pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
86 		pr_info("\tBoot-time adjustment of scheduler-enlistment delay to %ld jiffies.\n", jiffies_till_sched_qs);
88 		pr_info("\tKick kthreads if too-long grace period.\n");
90 		pr_info("\tRCU callback double-/use-after-free debug is enabled.\n");
92 		pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
135  * Queues a task preempted within an RCU-preempt read-side critical
136  * section into the appropriate location within the ->blkd_tasks list,
138  * periods.  The ->gp_tasks pointer indicates which element the normal
139  * grace period is waiting on (NULL if none), and the ->exp_tasks pointer
141  * NULL if none).  If a grace period is waiting on a given element in the
142  * ->blkd_tasks list, it also waits on all subsequent elements.  Thus,
143  * adding a task to the tail of the list blocks any grace period that is
144  * already waiting on one of the elements.  In contrast, adding a task
149  * period wait for a task that is not strictly speaking blocking it.
150  * Given the choice, we needlessly block a normal grace period rather than
154  * indefinitely postpone a normal grace period.  Eventually, all of the
157  * their RCU read-side critical sections.  At that point, the ->gp_tasks
158  * pointer will equal the ->exp_tasks pointer, at which point the end of
163 	__releases(rnp->lock) /* But leaves rrupts disabled. */
165 	int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) +
166 			 (rnp->exp_tasks ? RCU_EXP_TASKS : 0) +
167 			 (rnp->qsmask & rdp->grpmask ? RCU_GP_BLKD : 0) +
168 			 (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
172 	WARN_ON_ONCE(rdp->mynode != rnp);
175 	WARN_ON_ONCE(rnp->qsmaskinitnext & ~rnp->qsmaskinit & rnp->qsmask &
176 		     rdp->grpmask);
180 	 * this could be an if-statement.  In practice, when I tried
192 		 * GP but not blocking the already-waiting expedited GP.
194 		 * blocking the already-waiting GPs.
196 		list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
209 		 * GP already waiting), or a task arriving that blocks
214 		list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks);
227 		list_add(&t->rcu_node_entry, rnp->exp_tasks);
238 		list_add(&t->rcu_node_entry, rnp->gp_tasks);
250 	 * block either grace period, update the ->gp_tasks and/or
251 	 * ->exp_tasks pointers, respectively, to reference the newly
254 	if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) {
255 		WRITE_ONCE(rnp->gp_tasks, &t->rcu_node_entry);
256 		WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
258 	if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
259 		WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);
261 		     !(rnp->qsmask & rdp->grpmask));
263 		     !(rnp->expmask & rdp->grpmask));
269 	 * no need to check for a subsequent expedited GP.  (Though we are
270 	 * still in a quiescent state in any case.)
272 	 * Interrupts are disabled, so ->cpu_no_qs.b.exp cannot change.
274 	if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp)
277 		WARN_ON_ONCE(rdp->cpu_no_qs.b.exp);
278 	ASSERT_EXCLUSIVE_WRITER_SCOPED(rdp->cpu_no_qs.b.exp);
282  * Record a preemptible-RCU quiescent state for the specified CPU.
284  * on the CPU is in a quiescent state:  Instead, it means that the current
285  * grace period need not wait on any RCU read-side critical section that
287  * in an RCU read-side critical section, it has already added itself to
288  * some leaf rcu_node structure's ->blkd_tasks list.  In addition to the
290  * in an RCU read-side critical section.
292  * Unlike non-preemptible-RCU, quiescent state reports for expedited
307 		WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, false);
313  * context-switched away from.  If this task is in an RCU read-side
317  * RCU read-side critical section.  Therefore, the current grace period
320  * rnp->gp_tasks becomes NULL.
332 	WARN_ONCE(!preempt && rcu_preempt_depth() > 0, "Voluntary context switch within RCU read-side critical section!");
334 	    !t->rcu_read_unlock_special.b.blocked) {
336 		/* Possibly blocking in an RCU read-side critical section. */
337 		rnp = rdp->mynode;
339 		t->rcu_read_unlock_special.b.blocked = true;
340 		t->rcu_blocked_node = rnp;
348 		WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
350 				       t->pid,
351 				       (rnp->qsmask & rdp->grpmask)
352 				       ? rnp->gp_seq
353 				       : rcu_seq_snap(&rnp->gp_seq));
360 	 * Either we were not in an RCU read-side critical section to
362 	 * globally.  Either way, we can now note a quiescent state
363 	 * for this CPU.  Again, if we were in an RCU read-side critical
369 	if (rdp->cpu_no_qs.b.exp)
378  * for the specified rcu_node structure.  If the caller needs a reliable
379  * answer, it must hold the rcu_node's ->lock.
383 	return READ_ONCE(rnp->gp_tasks) != NULL;
386 /* limit value for ->rcu_read_lock_nesting. */
391 	WRITE_ONCE(current->rcu_read_lock_nesting, READ_ONCE(current->rcu_read_lock_nesting) + 1);
396 	int ret = READ_ONCE(current->rcu_read_lock_nesting) - 1;
398 	WRITE_ONCE(current->rcu_read_lock_nesting, ret);
404 	WRITE_ONCE(current->rcu_read_lock_nesting, val);
409  * Just increment ->rcu_read_lock_nesting, shared state will be updated
418 		WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
425  * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
426  * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
427  * invoke rcu_read_unlock_special() to clean up after a context switch
428  * in an RCU read-side critical section and other special cases.
436 		barrier();  // critical-section exit before .s check.
437 		if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
449  * Advance a ->blkd_tasks-list pointer to the next entry, instead
457 	np = t->rcu_node_entry.next;
458 	if (np == &rnp->blkd_tasks)
465  * preempted within an RCU read-side critical section.
469 	return !list_empty(&rnp->blkd_tasks);
490 	if (rdp->defer_qs_iw_pending == DEFER_QS_PENDING)
491 		rdp->defer_qs_iw_pending = DEFER_QS_IDLE;
496 	 * t->rcu_read_unlock_special cannot change.
498 	special = t->rcu_read_unlock_special;
499 	if (!special.s && !rdp->cpu_no_qs.b.exp) {
503 	t->rcu_read_unlock_special.s = 0;
506 			rdp->cpu_no_qs.b.norm = false;
515 	 * Respond to a request by an expedited grace period for a
518 	 * blocked-tasks list below.
520 	if (rdp->cpu_no_qs.b.exp)
523 	/* Clean up if blocked during RCU read-side critical section. */
530 		 * to loop.  Retain a WARN_ON_ONCE() out of sheer paranoia.
532 		rnp = t->rcu_blocked_node;
534 		WARN_ON_ONCE(rnp != t->rcu_blocked_node);
537 		WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq &&
538 			     (!empty_norm || rnp->qsmask));
541 		list_del_init(&t->rcu_node_entry);
542 		t->rcu_blocked_node = NULL;
544 						rnp->gp_seq, t->pid);
545 		if (&t->rcu_node_entry == rnp->gp_tasks)
546 			WRITE_ONCE(rnp->gp_tasks, np);
547 		if (&t->rcu_node_entry == rnp->exp_tasks)
548 			WRITE_ONCE(rnp->exp_tasks, np);
550 			/* Snapshot ->boost_mtx ownership w/rnp->lock held. */
551 			drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t;
552 			if (&t->rcu_node_entry == rnp->boost_tasks)
553 				WRITE_ONCE(rnp->boost_tasks, np);
559 		 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
560 		 * so we must take a snapshot of the expedited state.
565 							 rnp->gp_seq,
566 							 0, rnp->qsmask,
567 							 rnp->level,
568 							 rnp->grplo,
569 							 rnp->grphi,
570 							 !!rnp->gp_tasks);
585 			rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex);
592  * Is a deferred quiescent-state pending, and are we also not in
593  * an RCU read-side critical section?  It is the caller's responsibility
595  * states.  The reason for this is that it is safe to report a
603 		READ_ONCE(t->rcu_read_unlock_special.s)) &&
608  * Report a deferred quiescent state if needed and safe to do so.
610  * not being in an RCU read-side critical section.  The caller must
625  * Minimal handler to give the scheduler a chance to re-evaluate.
635 	 * If the IRQ work handler happens to run in the middle of RCU read-side
637 	 * attention to report a deferred quiescent state (the whole point of the
641 	 * 1. rcu_read_unlock() queues IRQ work (state -> DEFER_QS_PENDING)
644 	 * 4. rcu_read_unlock() does not re-queue work (state still PENDING)
648 		WRITE_ONCE(rdp->defer_qs_iw_pending, DEFER_QS_IDLE);
655  * 1. Task blocking an expedited grace period (based on a heuristic, could be
656  *    false-positive, see below.)
662  * @rdp: The per-CPU RCU data
675 	 * task was preempted within an RCU read-side critical section and is
678 	 * check because 't' might not be on the exp_tasks list at all - its
679 	 * just a fast heuristic that can be false-positive sometimes.
681 	if (t->rcu_blocked_node && READ_ONCE(t->rcu_blocked_node->exp_tasks))
690 	if (rdp->grpmask & READ_ONCE(rnp->expmask))
697 	 * - This CPU has not yet reported a quiescent state, or
698 	 * - This task was preempted within an RCU critical section
702 	    ((rdp->grpmask & READ_ONCE(rnp->qsmask)) || t->rcu_blocked_node))
706 	 * RCU priority boosting case: If a task is subject to RCU priority
707 	 * boosting and exits an RCU read-side critical section with interrupts
709 	 * Without this, a low-priority task could incorrectly run at high
710 	 * real-time priority for an extended period degrading real-time
714 	if (IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled && t->rcu_blocked_node)
723  * read-side critical section.
741 		struct rcu_node *rnp = rdp->mynode;
749 			// GP in flight or a potential need to deboost.
759 			    needs_exp && rdp->defer_qs_iw_pending != DEFER_QS_PENDING &&
760 			    cpu_online(rdp->cpu)) {
761 				// Get scheduler to re-evaluate and call hooks.
763 				rdp->defer_qs_iw_pending = DEFER_QS_PENDING;
764 				irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
775  * period is in fact empty.  It is a serious bug to complete a grace
777  * invoked -before- updating this rnp's ->gp_seq.
780  * block the newly created grace period, so set up ->gp_tasks accordingly.
791 	    (rnp->qsmaskinit || rnp->wait_blkd_tasks)) {
792 		WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next);
793 		t = container_of(rnp->gp_tasks, struct task_struct,
795 		trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
796 						rnp->gp_seq, t->pid);
798 	WARN_ON_ONCE(rnp->qsmask);
802  * Check for a quiescent state from the current CPU, including voluntary
803  * context switches for Tasks RCU.  When a task blocks, the task is
832 	    !t->rcu_read_unlock_special.b.need_qs &&
834 		t->rcu_read_unlock_special.b.need_qs = true;
838  * Check for a task exiting while in a preemptible-RCU read-side
842  * return, there was a bug of some sort.  Spewing warnings from this
849 	if (unlikely(!list_empty(&current->rcu_node_entry))) {
852 		WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);
863  * Dump the blocked-tasks state, but limit the list dump to the
876 	pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
877 		__func__, rnp->grplo, rnp->grphi, rnp->level,
878 		(long)READ_ONCE(rnp->gp_seq), (long)rnp->completedqs);
879 	for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
880 		pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
881 			__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
882 	pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
883 		__func__, READ_ONCE(rnp->gp_tasks), data_race(rnp->boost_tasks),
884 		READ_ONCE(rnp->exp_tasks));
885 	pr_info("%s: ->blkd_tasks", __func__);
887 	list_for_each(lhp, &rnp->blkd_tasks) {
893 	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
897 			(long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_state,
898 			(long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_state);
904 	rdp->defer_qs_iw = IRQ_WORK_INIT_HARD(rcu_preempt_deferred_qs_handler);
910  * __rcu_read_unlock() marks the beginning of a quiescent state, immediately
911  * report that quiescent state and, if requested, spin for a bit.
921 	 * rcu_report_qs_rdp() can only be invoked with a stable rdp and
929 	rdp->cpu_no_qs.b.norm = false;
945  * Note a quiescent state for PREEMPTION=n.  Because we do not need to know
947  * the start of the grace period, this just sets a flag.  The caller must
964  * emergency, invoke rcu_momentary_eqs() to do a heavy-weight
965  * dyntick-idle quiescent state visible to other CPUs, which will in
967  * Either way, register a lightweight quiescent state.
993  * Note a PREEMPTION=n context switch. The caller must have disabled interrupts.
1037 // Except that we do need to respond to a request by an expedited
1038 // grace period for a quiescent state from this CPU.  Note that in
1039 // non-preemptible kernels, there can be no context switches within RCU
1040 // read-side critical sections, which in turn means that the leaf rcu_node
1041 // structure's blocked-tasks list is always empty.  is therefore no need to
1042 // actually check it.  Instead, a quiescent state from this CPU suffices,
1043 // and this function is only called from such a quiescent state.
1048 	if (READ_ONCE(rdp->cpu_no_qs.b.exp))
1059 	WARN_ON_ONCE(rnp->qsmask);
1063  * Check to see if this CPU is in a non-context-switch quiescent state,
1074 		 * mode, from the idle loop without this being a nested
1076 		 * (with PREEMPT_COUNT=y). In this case, the CPU is in a
1080 		 * references only CPU-local variables that other CPUs
1090  * while in preemptible RCU read-side critical sections.
1097  * Dump the guaranteed-empty blocked-tasks state.  Trust but verify.
1102 	WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks));
1122 	WRITE_ONCE(rdp->rcuc_activity, jiffies);
1128 	return rdp->nocb_cb_kthread == current;
1135  * Is the current CPU running the RCU-callbacks kthread?
1140 	return rdp->rcu_cpu_kthread_task == current ||
1147  * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1148  * or ->boost_tasks, advancing the pointer to the next task in the
1149  * ->blkd_tasks list.
1160 	if (READ_ONCE(rnp->exp_tasks) == NULL &&
1161 	    READ_ONCE(rnp->boost_tasks) == NULL)
1168 	 * might exit their RCU read-side critical sections on their own.
1170 	if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
1177 	 * This cannot starve the normal grace periods because a second
1179 	 * those blocking the pre-existing normal grace period.
1181 	if (rnp->exp_tasks != NULL)
1182 		tb = rnp->exp_tasks;
1184 		tb = rnp->boost_tasks;
1188 	 * be held by task t.  We leave a pointer to that rt_mutex where
1190 	 * exits its outermost RCU read-side critical section.  Then
1194 	 * Note that task t must acquire rnp->lock to remove itself from
1195 	 * the ->blkd_tasks list, which it will do from exit() if from
1197 	 * stay around at least until we drop rnp->lock.  Note that
1198 	 * rnp->lock also resolves races between our priority boosting
1199 	 * and task t's exiting its outermost RCU read-side critical
1203 	rt_mutex_init_proxy_locked(&rnp->boost_mtx.rtmutex, t);
1205 	/* Lock only for side effect: boosts task t's priority. */
1206 	rt_mutex_lock(&rnp->boost_mtx);
1207 	rt_mutex_unlock(&rnp->boost_mtx);  /* Then keep lockdep happy. */
1208 	rnp->n_boosts++;
1210 	return READ_ONCE(rnp->exp_tasks) != NULL ||
1211 	       READ_ONCE(rnp->boost_tasks) != NULL;
1215  * Priority-boosting kthread, one per leaf rcu_node.
1225 		WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
1227 		rcu_wait(READ_ONCE(rnp->boost_tasks) ||
1228 			 READ_ONCE(rnp->exp_tasks));
1230 		WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
1237 			WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_YIELDING);
1251  * blocking the current grace period, and, if so, tell the per-rcu_node
1255  * The caller must hold rnp->lock, which this function releases.
1256  * The ->boost_kthread_task is immortal, so we don't need to worry
1260 	__releases(rnp->lock)
1263 	if (!rnp->boost_kthread_task ||
1264 	    (!rcu_preempt_blocked_readers_cgp(rnp) && !rnp->exp_tasks)) {
1268 	if (rnp->exp_tasks != NULL ||
1269 	    (rnp->gp_tasks != NULL &&
1270 	     rnp->boost_tasks == NULL &&
1271 	     rnp->qsmask == 0 &&
1272 	     (!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld ||
1274 		if (rnp->exp_tasks == NULL)
1275 			WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
1277 		rcu_wake_cond(rnp->boost_kthread_task,
1278 			      READ_ONCE(rnp->boost_kthread_status));
1287  * Do priority-boost accounting for the start of a new grace period.
1291 	rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1295  * Create an RCU-boost kthread for the specified node if one does not
1301 	int rnp_index = rnp - rcu_get_root();
1305 	if (rnp->boost_kthread_task)
1314 	rnp->boost_kthread_task = t;
1326 	__releases(rnp->lock)
1342  * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
1343  * grace-period kthread will do force_quiescent_state() processing?
1344  * The idea is to avoid waking up RCU core processing on such a
1362  * Bind the RCU grace-period kthreads to the housekeeping CPU.