xref: /linux/kernel/rcu/tasks.h (revision 23b0f90ba871f096474e1c27c3d14f455189d2d9)
1 /* SPDX-License-Identifier: GPL-2.0+ */
2 /*
3  * Task-based RCU implementations.
4  *
5  * Copyright (C) 2020 Paul E. McKenney
6  */
7 
8 #ifdef CONFIG_TASKS_RCU_GENERIC
9 #include "rcu_segcblist.h"
10 
11 ////////////////////////////////////////////////////////////////////////
12 //
13 // Generic data structures.
14 
15 struct rcu_tasks;
16 typedef void (*rcu_tasks_gp_func_t)(struct rcu_tasks *rtp);
17 typedef void (*pregp_func_t)(struct list_head *hop);
18 typedef void (*pertask_func_t)(struct task_struct *t, struct list_head *hop);
19 typedef void (*postscan_func_t)(struct list_head *hop);
20 typedef void (*holdouts_func_t)(struct list_head *hop, bool ndrpt, bool *frptp);
21 typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
22 
23 /**
24  * struct rcu_tasks_percpu - Per-CPU component of definition for a Tasks-RCU-like mechanism.
25  * @cblist: Callback list.
26  * @lock: Lock protecting per-CPU callback list.
27  * @rtp_jiffies: Jiffies counter value for statistics.
28  * @lazy_timer: Timer to unlazify callbacks.
29  * @urgent_gp: Number of additional non-lazy grace periods.
30  * @rtp_n_lock_retries: Rough lock-contention statistic.
31  * @rtp_work: Work queue for invoking callbacks.
32  * @rtp_irq_work: IRQ work queue for deferred wakeups.
33  * @barrier_q_head: RCU callback for barrier operation.
34  * @rtp_blkd_tasks: List of tasks blocked as readers.
35  * @rtp_exit_list: List of tasks in the latter portion of do_exit().
36  * @cpu: CPU number corresponding to this entry.
37  * @index: Index of this CPU in rtpcp_array of the rcu_tasks structure.
38  * @rtpp: Pointer to the rcu_tasks structure.
39  */
40 struct rcu_tasks_percpu {
41 	struct rcu_segcblist cblist;
42 	raw_spinlock_t __private lock;
43 	unsigned long rtp_jiffies;
44 	unsigned long rtp_n_lock_retries;
45 	struct timer_list lazy_timer;
46 	unsigned int urgent_gp;
47 	struct work_struct rtp_work;
48 	struct irq_work rtp_irq_work;
49 	struct rcu_head barrier_q_head;
50 	struct list_head rtp_blkd_tasks;
51 	struct list_head rtp_exit_list;
52 	int cpu;
53 	int index;
54 	struct rcu_tasks *rtpp;
55 };
56 
57 /**
58  * struct rcu_tasks - Definition for a Tasks-RCU-like mechanism.
59  * @cbs_wait: RCU wait allowing a new callback to get kthread's attention.
60  * @cbs_gbl_lock: Lock protecting callback list.
61  * @tasks_gp_mutex: Mutex protecting grace period, needed during mid-boot dead zone.
62  * @gp_func: This flavor's grace-period-wait function.
63  * @gp_state: Grace period's most recent state transition (debugging).
64  * @gp_sleep: Per-grace-period sleep to prevent CPU-bound looping.
65  * @init_fract: Initial backoff sleep interval.
66  * @gp_jiffies: Time of last @gp_state transition.
67  * @gp_start: Most recent grace-period start in jiffies.
68  * @tasks_gp_seq: Number of grace periods completed since boot in upper bits.
69  * @n_ipis: Number of IPIs sent to encourage grace periods to end.
70  * @n_ipis_fails: Number of IPI-send failures.
71  * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
72  * @lazy_jiffies: Number of jiffies to allow callbacks to be lazy.
73  * @pregp_func: This flavor's pre-grace-period function (optional).
74  * @pertask_func: This flavor's per-task scan function (optional).
75  * @postscan_func: This flavor's post-task scan function (optional).
76  * @holdouts_func: This flavor's holdout-list scan function (optional).
77  * @postgp_func: This flavor's post-grace-period function (optional).
78  * @call_func: This flavor's call_rcu()-equivalent function.
79  * @wait_state: Task state for synchronous grace-period waits (default TASK_UNINTERRUPTIBLE).
80  * @rtpcpu: This flavor's rcu_tasks_percpu structure.
81  * @rtpcp_array: Array of pointers to rcu_tasks_percpu structure of CPUs in cpu_possible_mask.
82  * @percpu_enqueue_shift: Shift down CPU ID this much when enqueuing callbacks.
83  * @percpu_enqueue_lim: Number of per-CPU callback queues in use for enqueuing.
84  * @percpu_dequeue_lim: Number of per-CPU callback queues in use for dequeuing.
85  * @percpu_dequeue_gpseq: RCU grace-period number to propagate enqueue limit to dequeuers.
86  * @barrier_q_mutex: Serialize barrier operations.
87  * @barrier_q_count: Number of queues being waited on.
88  * @barrier_q_completion: Barrier wait/wakeup mechanism.
89  * @barrier_q_seq: Sequence number for barrier operations.
90  * @barrier_q_start: Most recent barrier start in jiffies.
91  * @name: This flavor's textual name.
92  * @kname: This flavor's kthread name.
93  */
94 struct rcu_tasks {
95 	struct rcuwait cbs_wait;
96 	raw_spinlock_t cbs_gbl_lock;
97 	struct mutex tasks_gp_mutex;
98 	int gp_state;
99 	int gp_sleep;
100 	int init_fract;
101 	unsigned long gp_jiffies;
102 	unsigned long gp_start;
103 	unsigned long tasks_gp_seq;
104 	unsigned long n_ipis;
105 	unsigned long n_ipis_fails;
106 	struct task_struct *kthread_ptr;
107 	unsigned long lazy_jiffies;
108 	rcu_tasks_gp_func_t gp_func;
109 	pregp_func_t pregp_func;
110 	pertask_func_t pertask_func;
111 	postscan_func_t postscan_func;
112 	holdouts_func_t holdouts_func;
113 	postgp_func_t postgp_func;
114 	call_rcu_func_t call_func;
115 	unsigned int wait_state;
116 	struct rcu_tasks_percpu __percpu *rtpcpu;
117 	struct rcu_tasks_percpu **rtpcp_array;
118 	int percpu_enqueue_shift;
119 	int percpu_enqueue_lim;
120 	int percpu_dequeue_lim;
121 	unsigned long percpu_dequeue_gpseq;
122 	struct mutex barrier_q_mutex;
123 	atomic_t barrier_q_count;
124 	struct completion barrier_q_completion;
125 	unsigned long barrier_q_seq;
126 	unsigned long barrier_q_start;
127 	char *name;
128 	char *kname;
129 };
130 
131 static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
132 
133 #define DEFINE_RCU_TASKS(rt_name, gp, call, n)						\
134 static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = {			\
135 	.lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## __percpu.cbs_pcpu_lock),		\
136 	.rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup),			\
137 };											\
138 static struct rcu_tasks rt_name =							\
139 {											\
140 	.cbs_wait = __RCUWAIT_INITIALIZER(rt_name.wait),				\
141 	.cbs_gbl_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_gbl_lock),			\
142 	.tasks_gp_mutex = __MUTEX_INITIALIZER(rt_name.tasks_gp_mutex),			\
143 	.gp_func = gp,									\
144 	.call_func = call,								\
145 	.wait_state = TASK_UNINTERRUPTIBLE,						\
146 	.rtpcpu = &rt_name ## __percpu,							\
147 	.lazy_jiffies = DIV_ROUND_UP(HZ, 4),						\
148 	.name = n,									\
149 	.percpu_enqueue_shift = order_base_2(CONFIG_NR_CPUS),				\
150 	.percpu_enqueue_lim = 1,							\
151 	.percpu_dequeue_lim = 1,							\
152 	.barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex),		\
153 	.barrier_q_seq = (0UL - 50UL) << RCU_SEQ_CTR_SHIFT,				\
154 	.kname = #rt_name,								\
155 }
156 
157 #ifdef CONFIG_TASKS_RCU
158 
159 /* Report delay of scan exiting tasklist in rcu_tasks_postscan(). */
160 static void tasks_rcu_exit_srcu_stall(struct timer_list *unused);
161 static DEFINE_TIMER(tasks_rcu_exit_srcu_stall_timer, tasks_rcu_exit_srcu_stall);
162 #endif
163 
164 /* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
165 #define RCU_TASK_BOOT_STALL_TIMEOUT (HZ * 30)
166 #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
167 static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
168 module_param(rcu_task_stall_timeout, int, 0644);
169 #define RCU_TASK_STALL_INFO (HZ * 10)
170 static int rcu_task_stall_info __read_mostly = RCU_TASK_STALL_INFO;
171 module_param(rcu_task_stall_info, int, 0644);
172 static int rcu_task_stall_info_mult __read_mostly = 3;
173 module_param(rcu_task_stall_info_mult, int, 0444);
174 
175 static int rcu_task_enqueue_lim __read_mostly = -1;
176 module_param(rcu_task_enqueue_lim, int, 0444);
177 
178 static bool rcu_task_cb_adjust;
179 static int rcu_task_contend_lim __read_mostly = 100;
180 module_param(rcu_task_contend_lim, int, 0444);
181 static int rcu_task_collapse_lim __read_mostly = 10;
182 module_param(rcu_task_collapse_lim, int, 0444);
183 static int rcu_task_lazy_lim __read_mostly = 32;
184 module_param(rcu_task_lazy_lim, int, 0444);
185 
186 static int rcu_task_cpu_ids;
187 
188 /* RCU tasks grace-period state for debugging. */
189 #define RTGS_INIT		 0
190 #define RTGS_WAIT_WAIT_CBS	 1
191 #define RTGS_WAIT_GP		 2
192 #define RTGS_PRE_WAIT_GP	 3
193 #define RTGS_SCAN_TASKLIST	 4
194 #define RTGS_POST_SCAN_TASKLIST	 5
195 #define RTGS_WAIT_SCAN_HOLDOUTS	 6
196 #define RTGS_SCAN_HOLDOUTS	 7
197 #define RTGS_POST_GP		 8
198 #define RTGS_WAIT_READERS	 9
199 #define RTGS_INVOKE_CBS		10
200 #define RTGS_WAIT_CBS		11
201 #ifndef CONFIG_TINY_RCU
202 static const char * const rcu_tasks_gp_state_names[] = {
203 	"RTGS_INIT",
204 	"RTGS_WAIT_WAIT_CBS",
205 	"RTGS_WAIT_GP",
206 	"RTGS_PRE_WAIT_GP",
207 	"RTGS_SCAN_TASKLIST",
208 	"RTGS_POST_SCAN_TASKLIST",
209 	"RTGS_WAIT_SCAN_HOLDOUTS",
210 	"RTGS_SCAN_HOLDOUTS",
211 	"RTGS_POST_GP",
212 	"RTGS_WAIT_READERS",
213 	"RTGS_INVOKE_CBS",
214 	"RTGS_WAIT_CBS",
215 };
216 #endif /* #ifndef CONFIG_TINY_RCU */
217 
218 ////////////////////////////////////////////////////////////////////////
219 //
220 // Generic code.
221 
222 static void rcu_tasks_invoke_cbs_wq(struct work_struct *wp);
223 
224 /* Record grace-period phase and time. */
225 static void set_tasks_gp_state(struct rcu_tasks *rtp, int newstate)
226 {
227 	rtp->gp_state = newstate;
228 	rtp->gp_jiffies = jiffies;
229 }
230 
231 #ifndef CONFIG_TINY_RCU
232 /* Return state name. */
233 static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)
234 {
235 	int i = data_race(rtp->gp_state); // Let KCSAN detect update races
236 	int j = READ_ONCE(i); // Prevent the compiler from reading twice
237 
238 	if (j >= ARRAY_SIZE(rcu_tasks_gp_state_names))
239 		return "???";
240 	return rcu_tasks_gp_state_names[j];
241 }
242 #endif /* #ifndef CONFIG_TINY_RCU */
243 
244 // Initialize per-CPU callback lists for the specified flavor of
245 // Tasks RCU.  Do not enqueue callbacks before this function is invoked.
246 static void cblist_init_generic(struct rcu_tasks *rtp)
247 {
248 	int cpu;
249 	int lim;
250 	int shift;
251 	int maxcpu;
252 	int index = 0;
253 
254 	if (rcu_task_enqueue_lim < 0) {
255 		rcu_task_enqueue_lim = 1;
256 		rcu_task_cb_adjust = true;
257 	} else if (rcu_task_enqueue_lim == 0) {
258 		rcu_task_enqueue_lim = 1;
259 	}
260 	lim = rcu_task_enqueue_lim;
261 
262 	rtp->rtpcp_array = kcalloc(num_possible_cpus(), sizeof(struct rcu_tasks_percpu *), GFP_KERNEL);
263 	BUG_ON(!rtp->rtpcp_array);
264 
265 	for_each_possible_cpu(cpu) {
266 		struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
267 
268 		WARN_ON_ONCE(!rtpcp);
269 		if (cpu)
270 			raw_spin_lock_init(&ACCESS_PRIVATE(rtpcp, lock));
271 		if (rcu_segcblist_empty(&rtpcp->cblist))
272 			rcu_segcblist_init(&rtpcp->cblist);
273 		INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq);
274 		rtpcp->cpu = cpu;
275 		rtpcp->rtpp = rtp;
276 		rtpcp->index = index;
277 		rtp->rtpcp_array[index] = rtpcp;
278 		index++;
279 		if (!rtpcp->rtp_blkd_tasks.next)
280 			INIT_LIST_HEAD(&rtpcp->rtp_blkd_tasks);
281 		if (!rtpcp->rtp_exit_list.next)
282 			INIT_LIST_HEAD(&rtpcp->rtp_exit_list);
283 		rtpcp->barrier_q_head.next = &rtpcp->barrier_q_head;
284 		maxcpu = cpu;
285 	}
286 
287 	rcu_task_cpu_ids = maxcpu + 1;
288 	if (lim > rcu_task_cpu_ids)
289 		lim = rcu_task_cpu_ids;
290 	shift = ilog2(rcu_task_cpu_ids / lim);
291 	if (((rcu_task_cpu_ids - 1) >> shift) >= lim)
292 		shift++;
293 	WRITE_ONCE(rtp->percpu_enqueue_shift, shift);
294 	WRITE_ONCE(rtp->percpu_dequeue_lim, lim);
295 	smp_store_release(&rtp->percpu_enqueue_lim, lim);
296 
297 	pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d rcu_task_cpu_ids=%d.\n",
298 			rtp->name, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim),
299 			rcu_task_cb_adjust, rcu_task_cpu_ids);
300 }
301 
302 // Compute wakeup time for lazy callback timer.
303 static unsigned long rcu_tasks_lazy_time(struct rcu_tasks *rtp)
304 {
305 	return jiffies + rtp->lazy_jiffies;
306 }
307 
308 // Timer handler that unlazifies lazy callbacks.
309 static void call_rcu_tasks_generic_timer(struct timer_list *tlp)
310 {
311 	unsigned long flags;
312 	bool needwake = false;
313 	struct rcu_tasks *rtp;
314 	struct rcu_tasks_percpu *rtpcp = timer_container_of(rtpcp, tlp,
315 						            lazy_timer);
316 
317 	rtp = rtpcp->rtpp;
318 	raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
319 	if (!rcu_segcblist_empty(&rtpcp->cblist) && rtp->lazy_jiffies) {
320 		if (!rtpcp->urgent_gp)
321 			rtpcp->urgent_gp = 1;
322 		needwake = true;
323 		mod_timer(&rtpcp->lazy_timer, rcu_tasks_lazy_time(rtp));
324 	}
325 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
326 	if (needwake)
327 		rcuwait_wake_up(&rtp->cbs_wait);
328 }
329 
330 // IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
331 static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp)
332 {
333 	struct rcu_tasks *rtp;
334 	struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct rcu_tasks_percpu, rtp_irq_work);
335 
336 	rtp = rtpcp->rtpp;
337 	rcuwait_wake_up(&rtp->cbs_wait);
338 }
339 
340 // Enqueue a callback for the specified flavor of Tasks RCU.
341 static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
342 				   struct rcu_tasks *rtp)
343 {
344 	int chosen_cpu;
345 	unsigned long flags;
346 	bool havekthread = smp_load_acquire(&rtp->kthread_ptr);
347 	int ideal_cpu;
348 	unsigned long j;
349 	bool needadjust = false;
350 	bool needwake;
351 	struct rcu_tasks_percpu *rtpcp;
352 
353 	rhp->next = NULL;
354 	rhp->func = func;
355 	local_irq_save(flags);
356 	rcu_read_lock();
357 	ideal_cpu = smp_processor_id() >> READ_ONCE(rtp->percpu_enqueue_shift);
358 	chosen_cpu = cpumask_next(ideal_cpu - 1, cpu_possible_mask);
359 	WARN_ON_ONCE(chosen_cpu >= rcu_task_cpu_ids);
360 	rtpcp = per_cpu_ptr(rtp->rtpcpu, chosen_cpu);
361 	if (!raw_spin_trylock_rcu_node(rtpcp)) { // irqs already disabled.
362 		raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
363 		j = jiffies;
364 		if (rtpcp->rtp_jiffies != j) {
365 			rtpcp->rtp_jiffies = j;
366 			rtpcp->rtp_n_lock_retries = 0;
367 		}
368 		if (rcu_task_cb_adjust && ++rtpcp->rtp_n_lock_retries > rcu_task_contend_lim &&
369 		    READ_ONCE(rtp->percpu_enqueue_lim) != rcu_task_cpu_ids)
370 			needadjust = true;  // Defer adjustment to avoid deadlock.
371 	}
372 	// Queuing callbacks before initialization not yet supported.
373 	if (WARN_ON_ONCE(!rcu_segcblist_is_enabled(&rtpcp->cblist)))
374 		rcu_segcblist_init(&rtpcp->cblist);
375 	needwake = (func == wakeme_after_rcu) ||
376 		   (rcu_segcblist_n_cbs(&rtpcp->cblist) == rcu_task_lazy_lim);
377 	if (havekthread && !needwake && !timer_pending(&rtpcp->lazy_timer)) {
378 		if (rtp->lazy_jiffies)
379 			mod_timer(&rtpcp->lazy_timer, rcu_tasks_lazy_time(rtp));
380 		else
381 			needwake = rcu_segcblist_empty(&rtpcp->cblist);
382 	}
383 	if (needwake)
384 		rtpcp->urgent_gp = 3;
385 	rcu_segcblist_enqueue(&rtpcp->cblist, rhp);
386 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
387 	if (unlikely(needadjust)) {
388 		raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
389 		if (rtp->percpu_enqueue_lim != rcu_task_cpu_ids) {
390 			WRITE_ONCE(rtp->percpu_enqueue_shift, 0);
391 			WRITE_ONCE(rtp->percpu_dequeue_lim, rcu_task_cpu_ids);
392 			smp_store_release(&rtp->percpu_enqueue_lim, rcu_task_cpu_ids);
393 			pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
394 		}
395 		raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
396 	}
397 	rcu_read_unlock();
398 	/* We can't create the thread unless interrupts are enabled. */
399 	if (needwake && READ_ONCE(rtp->kthread_ptr))
400 		irq_work_queue(&rtpcp->rtp_irq_work);
401 }
402 
403 // RCU callback function for rcu_barrier_tasks_generic().
404 static void rcu_barrier_tasks_generic_cb(struct rcu_head *rhp)
405 {
406 	struct rcu_tasks *rtp;
407 	struct rcu_tasks_percpu *rtpcp;
408 
409 	rhp->next = rhp; // Mark the callback as having been invoked.
410 	rtpcp = container_of(rhp, struct rcu_tasks_percpu, barrier_q_head);
411 	rtp = rtpcp->rtpp;
412 	if (atomic_dec_and_test(&rtp->barrier_q_count))
413 		complete(&rtp->barrier_q_completion);
414 }
415 
416 // Wait for all in-flight callbacks for the specified RCU Tasks flavor.
417 // Operates in a manner similar to rcu_barrier().
418 static void __maybe_unused rcu_barrier_tasks_generic(struct rcu_tasks *rtp)
419 {
420 	int cpu;
421 	unsigned long flags;
422 	struct rcu_tasks_percpu *rtpcp;
423 	unsigned long s = rcu_seq_snap(&rtp->barrier_q_seq);
424 
425 	mutex_lock(&rtp->barrier_q_mutex);
426 	if (rcu_seq_done(&rtp->barrier_q_seq, s)) {
427 		smp_mb();
428 		mutex_unlock(&rtp->barrier_q_mutex);
429 		return;
430 	}
431 	rtp->barrier_q_start = jiffies;
432 	rcu_seq_start(&rtp->barrier_q_seq);
433 	init_completion(&rtp->barrier_q_completion);
434 	atomic_set(&rtp->barrier_q_count, 2);
435 	for_each_possible_cpu(cpu) {
436 		if (cpu >= smp_load_acquire(&rtp->percpu_dequeue_lim))
437 			break;
438 		rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
439 		rtpcp->barrier_q_head.func = rcu_barrier_tasks_generic_cb;
440 		raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
441 		if (rcu_segcblist_entrain(&rtpcp->cblist, &rtpcp->barrier_q_head))
442 			atomic_inc(&rtp->barrier_q_count);
443 		raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
444 	}
445 	if (atomic_sub_and_test(2, &rtp->barrier_q_count))
446 		complete(&rtp->barrier_q_completion);
447 	wait_for_completion(&rtp->barrier_q_completion);
448 	rcu_seq_end(&rtp->barrier_q_seq);
449 	mutex_unlock(&rtp->barrier_q_mutex);
450 }
451 
452 // Advance callbacks and indicate whether either a grace period or
453 // callback invocation is needed.
454 static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
455 {
456 	int cpu;
457 	int dequeue_limit;
458 	unsigned long flags;
459 	bool gpdone = poll_state_synchronize_rcu(rtp->percpu_dequeue_gpseq);
460 	long n;
461 	long ncbs = 0;
462 	long ncbsnz = 0;
463 	int needgpcb = 0;
464 
465 	dequeue_limit = smp_load_acquire(&rtp->percpu_dequeue_lim);
466 	for (cpu = 0; cpu < dequeue_limit; cpu++) {
467 		if (!cpu_possible(cpu))
468 			continue;
469 		struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
470 
471 		/* Advance and accelerate any new callbacks. */
472 		if (!rcu_segcblist_n_cbs(&rtpcp->cblist))
473 			continue;
474 		raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
475 		// Should we shrink down to a single callback queue?
476 		n = rcu_segcblist_n_cbs(&rtpcp->cblist);
477 		if (n) {
478 			ncbs += n;
479 			if (cpu > 0)
480 				ncbsnz += n;
481 		}
482 		rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq));
483 		(void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq));
484 		if (rtpcp->urgent_gp > 0 && rcu_segcblist_pend_cbs(&rtpcp->cblist)) {
485 			if (rtp->lazy_jiffies)
486 				rtpcp->urgent_gp--;
487 			needgpcb |= 0x3;
488 		} else if (rcu_segcblist_empty(&rtpcp->cblist)) {
489 			rtpcp->urgent_gp = 0;
490 		}
491 		if (rcu_segcblist_ready_cbs(&rtpcp->cblist))
492 			needgpcb |= 0x1;
493 		raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
494 	}
495 
496 	// Shrink down to a single callback queue if appropriate.
497 	// This is done in two stages: (1) If there are no more than
498 	// rcu_task_collapse_lim callbacks on CPU 0 and none on any other
499 	// CPU, limit enqueueing to CPU 0.  (2) After an RCU grace period,
500 	// if there has not been an increase in callbacks, limit dequeuing
501 	// to CPU 0.  Note the matching RCU read-side critical section in
502 	// call_rcu_tasks_generic().
503 	if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) {
504 		raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
505 		if (rtp->percpu_enqueue_lim > 1) {
506 			WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(rcu_task_cpu_ids));
507 			smp_store_release(&rtp->percpu_enqueue_lim, 1);
508 			rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu();
509 			gpdone = false;
510 			pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name);
511 		}
512 		raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
513 	}
514 	if (rcu_task_cb_adjust && !ncbsnz && gpdone) {
515 		raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
516 		if (rtp->percpu_enqueue_lim < rtp->percpu_dequeue_lim) {
517 			WRITE_ONCE(rtp->percpu_dequeue_lim, 1);
518 			pr_info("Completing switch %s to CPU-0 callback queuing.\n", rtp->name);
519 		}
520 		if (rtp->percpu_dequeue_lim == 1) {
521 			for (cpu = rtp->percpu_dequeue_lim; cpu < rcu_task_cpu_ids; cpu++) {
522 				if (!cpu_possible(cpu))
523 					continue;
524 				struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
525 
526 				WARN_ON_ONCE(rcu_segcblist_n_cbs(&rtpcp->cblist));
527 			}
528 		}
529 		raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
530 	}
531 
532 	return needgpcb;
533 }
534 
535 // Advance callbacks and invoke any that are ready.
536 static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu *rtpcp)
537 {
538 	int cpuwq;
539 	unsigned long flags;
540 	int len;
541 	int index;
542 	struct rcu_head *rhp;
543 	struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
544 	struct rcu_tasks_percpu *rtpcp_next;
545 
546 	index = rtpcp->index * 2 + 1;
547 	if (index < num_possible_cpus()) {
548 		rtpcp_next = rtp->rtpcp_array[index];
549 		if (rtpcp_next->cpu < smp_load_acquire(&rtp->percpu_dequeue_lim)) {
550 			cpuwq = rcu_cpu_beenfullyonline(rtpcp_next->cpu) ? rtpcp_next->cpu : WORK_CPU_UNBOUND;
551 			queue_work_on(cpuwq, system_percpu_wq, &rtpcp_next->rtp_work);
552 			index++;
553 			if (index < num_possible_cpus()) {
554 				rtpcp_next = rtp->rtpcp_array[index];
555 				if (rtpcp_next->cpu < smp_load_acquire(&rtp->percpu_dequeue_lim)) {
556 					cpuwq = rcu_cpu_beenfullyonline(rtpcp_next->cpu) ? rtpcp_next->cpu : WORK_CPU_UNBOUND;
557 					queue_work_on(cpuwq, system_percpu_wq, &rtpcp_next->rtp_work);
558 				}
559 			}
560 		}
561 	}
562 
563 	if (rcu_segcblist_empty(&rtpcp->cblist))
564 		return;
565 	raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
566 	rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq));
567 	rcu_segcblist_extract_done_cbs(&rtpcp->cblist, &rcl);
568 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
569 	len = rcl.len;
570 	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = rcu_cblist_dequeue(&rcl)) {
571 		debug_rcu_head_callback(rhp);
572 		local_bh_disable();
573 		rhp->func(rhp);
574 		local_bh_enable();
575 		cond_resched();
576 	}
577 	raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
578 	rcu_segcblist_add_len(&rtpcp->cblist, -len);
579 	(void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq));
580 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
581 }
582 
583 // Workqueue flood to advance callbacks and invoke any that are ready.
584 static void rcu_tasks_invoke_cbs_wq(struct work_struct *wp)
585 {
586 	struct rcu_tasks *rtp;
587 	struct rcu_tasks_percpu *rtpcp = container_of(wp, struct rcu_tasks_percpu, rtp_work);
588 
589 	rtp = rtpcp->rtpp;
590 	rcu_tasks_invoke_cbs(rtp, rtpcp);
591 }
592 
593 // Wait for one grace period.
594 static void rcu_tasks_one_gp(struct rcu_tasks *rtp, bool midboot)
595 {
596 	int needgpcb;
597 
598 	mutex_lock(&rtp->tasks_gp_mutex);
599 
600 	// If there were none, wait a bit and start over.
601 	if (unlikely(midboot)) {
602 		needgpcb = 0x2;
603 	} else {
604 		mutex_unlock(&rtp->tasks_gp_mutex);
605 		set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
606 		rcuwait_wait_event(&rtp->cbs_wait,
607 				   (needgpcb = rcu_tasks_need_gpcb(rtp)),
608 				   TASK_IDLE);
609 		mutex_lock(&rtp->tasks_gp_mutex);
610 	}
611 
612 	if (needgpcb & 0x2) {
613 		// Wait for one grace period.
614 		set_tasks_gp_state(rtp, RTGS_WAIT_GP);
615 		rtp->gp_start = jiffies;
616 		rcu_seq_start(&rtp->tasks_gp_seq);
617 		rtp->gp_func(rtp);
618 		rcu_seq_end(&rtp->tasks_gp_seq);
619 	}
620 
621 	// Invoke callbacks.
622 	set_tasks_gp_state(rtp, RTGS_INVOKE_CBS);
623 	rcu_tasks_invoke_cbs(rtp, per_cpu_ptr(rtp->rtpcpu, 0));
624 	mutex_unlock(&rtp->tasks_gp_mutex);
625 }
626 
627 // RCU-tasks kthread that detects grace periods and invokes callbacks.
628 static int __noreturn rcu_tasks_kthread(void *arg)
629 {
630 	int cpu;
631 	struct rcu_tasks *rtp = arg;
632 
633 	for_each_possible_cpu(cpu) {
634 		struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
635 
636 		timer_setup(&rtpcp->lazy_timer, call_rcu_tasks_generic_timer, 0);
637 		rtpcp->urgent_gp = 1;
638 	}
639 
640 	/* Run on housekeeping CPUs by default.  Sysadm can move if desired. */
641 	housekeeping_affine(current, HK_TYPE_RCU);
642 	smp_store_release(&rtp->kthread_ptr, current); // Let GPs start!
643 
644 	/*
645 	 * Each pass through the following loop makes one check for
646 	 * newly arrived callbacks, and, if there are some, waits for
647 	 * one RCU-tasks grace period and then invokes the callbacks.
648 	 * This loop is terminated by the system going down.  ;-)
649 	 */
650 	for (;;) {
651 		// Wait for one grace period and invoke any callbacks
652 		// that are ready.
653 		rcu_tasks_one_gp(rtp, false);
654 
655 		// Paranoid sleep to keep this from entering a tight loop.
656 		schedule_timeout_idle(rtp->gp_sleep);
657 	}
658 }
659 
660 // Wait for a grace period for the specified flavor of Tasks RCU.
661 static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
662 {
663 	/* Complain if the scheduler has not started.  */
664 	if (WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
665 			 "synchronize_%s() called too soon", rtp->name))
666 		return;
667 
668 	// If the grace-period kthread is running, use it.
669 	if (READ_ONCE(rtp->kthread_ptr)) {
670 		wait_rcu_gp_state(rtp->wait_state, rtp->call_func);
671 		return;
672 	}
673 	rcu_tasks_one_gp(rtp, true);
674 }
675 
676 /* Spawn RCU-tasks grace-period kthread. */
677 static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
678 {
679 	struct task_struct *t;
680 
681 	t = kthread_run(rcu_tasks_kthread, rtp, "%s_kthread", rtp->kname);
682 	if (WARN_ONCE(IS_ERR(t), "%s: Could not start %s grace-period kthread, OOM is now expected behavior\n", __func__, rtp->name))
683 		return;
684 	smp_mb(); /* Ensure others see full kthread. */
685 }
686 
687 #ifndef CONFIG_TINY_RCU
688 
689 /*
690  * Print any non-default Tasks RCU settings.
691  */
692 static void __init rcu_tasks_bootup_oddness(void)
693 {
694 #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
695 	int rtsimc;
696 
697 	if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
698 		pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
699 	rtsimc = clamp(rcu_task_stall_info_mult, 1, 10);
700 	if (rtsimc != rcu_task_stall_info_mult) {
701 		pr_info("\tTasks-RCU CPU stall info multiplier clamped to %d (rcu_task_stall_info_mult).\n", rtsimc);
702 		rcu_task_stall_info_mult = rtsimc;
703 	}
704 #endif /* #ifdef CONFIG_TASKS_RCU */
705 #ifdef CONFIG_TASKS_RCU
706 	pr_info("\tTrampoline variant of Tasks RCU enabled.\n");
707 #endif /* #ifdef CONFIG_TASKS_RCU */
708 #ifdef CONFIG_TASKS_RUDE_RCU
709 	pr_info("\tRude variant of Tasks RCU enabled.\n");
710 #endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
711 #ifdef CONFIG_TASKS_TRACE_RCU
712 	pr_info("\tTracing variant of Tasks RCU enabled.\n");
713 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
714 }
715 
716 /* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */
717 static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
718 {
719 	int cpu;
720 	bool havecbs = false;
721 	bool haveurgent = false;
722 	bool haveurgentcbs = false;
723 
724 	for_each_possible_cpu(cpu) {
725 		struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
726 
727 		if (!data_race(rcu_segcblist_empty(&rtpcp->cblist)))
728 			havecbs = true;
729 		if (data_race(rtpcp->urgent_gp))
730 			haveurgent = true;
731 		if (!data_race(rcu_segcblist_empty(&rtpcp->cblist)) && data_race(rtpcp->urgent_gp))
732 			haveurgentcbs = true;
733 		if (havecbs && haveurgent && haveurgentcbs)
734 			break;
735 	}
736 	pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c%c%c l:%lu %s\n",
737 		rtp->kname,
738 		tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
739 		jiffies - data_race(rtp->gp_jiffies),
740 		data_race(rcu_seq_current(&rtp->tasks_gp_seq)),
741 		data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
742 		".k"[!!data_race(rtp->kthread_ptr)],
743 		".C"[havecbs],
744 		".u"[haveurgent],
745 		".U"[haveurgentcbs],
746 		rtp->lazy_jiffies,
747 		s);
748 }
749 
750 /* Dump out more rcutorture-relevant state common to all RCU-tasks flavors. */
751 static void rcu_tasks_torture_stats_print_generic(struct rcu_tasks *rtp, char *tt,
752 						  char *tf, char *tst)
753 {
754 	cpumask_var_t cm;
755 	int cpu;
756 	bool gotcb = false;
757 	unsigned long j = jiffies;
758 
759 	pr_alert("%s%s Tasks%s RCU g%ld gp_start %lu gp_jiffies %lu gp_state %d (%s).\n",
760 		 tt, tf, tst, data_race(rtp->tasks_gp_seq),
761 		 j - data_race(rtp->gp_start), j - data_race(rtp->gp_jiffies),
762 		 data_race(rtp->gp_state), tasks_gp_state_getname(rtp));
763 	pr_alert("\tEnqueue shift %d limit %d Dequeue limit %d gpseq %lu.\n",
764 		 data_race(rtp->percpu_enqueue_shift),
765 		 data_race(rtp->percpu_enqueue_lim),
766 		 data_race(rtp->percpu_dequeue_lim),
767 		 data_race(rtp->percpu_dequeue_gpseq));
768 	(void)zalloc_cpumask_var(&cm, GFP_KERNEL);
769 	pr_alert("\tCallback counts:");
770 	for_each_possible_cpu(cpu) {
771 		long n;
772 		struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
773 
774 		if (cpumask_available(cm) && !rcu_barrier_cb_is_done(&rtpcp->barrier_q_head))
775 			cpumask_set_cpu(cpu, cm);
776 		n = rcu_segcblist_n_cbs(&rtpcp->cblist);
777 		if (!n)
778 			continue;
779 		pr_cont(" %d:%ld", cpu, n);
780 		gotcb = true;
781 	}
782 	if (gotcb)
783 		pr_cont(".\n");
784 	else
785 		pr_cont(" (none).\n");
786 	pr_alert("\tBarrier seq %lu start %lu count %d holdout CPUs ",
787 		 data_race(rtp->barrier_q_seq), j - data_race(rtp->barrier_q_start),
788 		 atomic_read(&rtp->barrier_q_count));
789 	if (cpumask_available(cm) && !cpumask_empty(cm))
790 		pr_cont(" %*pbl.\n", cpumask_pr_args(cm));
791 	else
792 		pr_cont("(none).\n");
793 	free_cpumask_var(cm);
794 }
795 
796 #endif // #ifndef CONFIG_TINY_RCU
797 
798 #if defined(CONFIG_TASKS_RCU)
799 
800 ////////////////////////////////////////////////////////////////////////
801 //
802 // Shared code between task-list-scanning variants of Tasks RCU.
803 
804 /* Wait for one RCU-tasks grace period. */
805 static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
806 {
807 	struct task_struct *g;
808 	int fract;
809 	LIST_HEAD(holdouts);
810 	unsigned long j;
811 	unsigned long lastinfo;
812 	unsigned long lastreport;
813 	bool reported = false;
814 	int rtsi;
815 	struct task_struct *t;
816 
817 	set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP);
818 	rtp->pregp_func(&holdouts);
819 
820 	/*
821 	 * There were callbacks, so we need to wait for an RCU-tasks
822 	 * grace period.  Start off by scanning the task list for tasks
823 	 * that are not already voluntarily blocked.  Mark these tasks
824 	 * and make a list of them in holdouts.
825 	 */
826 	set_tasks_gp_state(rtp, RTGS_SCAN_TASKLIST);
827 	if (rtp->pertask_func) {
828 		rcu_read_lock();
829 		for_each_process_thread(g, t)
830 			rtp->pertask_func(t, &holdouts);
831 		rcu_read_unlock();
832 	}
833 
834 	set_tasks_gp_state(rtp, RTGS_POST_SCAN_TASKLIST);
835 	rtp->postscan_func(&holdouts);
836 
837 	/*
838 	 * Each pass through the following loop scans the list of holdout
839 	 * tasks, removing any that are no longer holdouts.  When the list
840 	 * is empty, we are done.
841 	 */
842 	lastreport = jiffies;
843 	lastinfo = lastreport;
844 	rtsi = READ_ONCE(rcu_task_stall_info);
845 
846 	// Start off with initial wait and slowly back off to 1 HZ wait.
847 	fract = rtp->init_fract;
848 
849 	while (!list_empty(&holdouts)) {
850 		ktime_t exp;
851 		bool firstreport;
852 		bool needreport;
853 		int rtst;
854 
855 		// Slowly back off waiting for holdouts
856 		set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS);
857 		if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
858 			schedule_timeout_idle(fract);
859 		} else {
860 			exp = jiffies_to_nsecs(fract);
861 			__set_current_state(TASK_IDLE);
862 			schedule_hrtimeout_range(&exp, jiffies_to_nsecs(HZ / 2), HRTIMER_MODE_REL_HARD);
863 		}
864 
865 		if (fract < HZ)
866 			fract++;
867 
868 		rtst = READ_ONCE(rcu_task_stall_timeout);
869 		needreport = rtst > 0 && time_after(jiffies, lastreport + rtst);
870 		if (needreport) {
871 			lastreport = jiffies;
872 			reported = true;
873 		}
874 		firstreport = true;
875 		WARN_ON(signal_pending(current));
876 		set_tasks_gp_state(rtp, RTGS_SCAN_HOLDOUTS);
877 		rtp->holdouts_func(&holdouts, needreport, &firstreport);
878 
879 		// Print pre-stall informational messages if needed.
880 		j = jiffies;
881 		if (rtsi > 0 && !reported && time_after(j, lastinfo + rtsi)) {
882 			lastinfo = j;
883 			rtsi = rtsi * rcu_task_stall_info_mult;
884 			pr_info("%s: %s grace period number %lu (since boot) is %lu jiffies old.\n",
885 				__func__, rtp->kname, rtp->tasks_gp_seq, j - rtp->gp_start);
886 		}
887 	}
888 
889 	set_tasks_gp_state(rtp, RTGS_POST_GP);
890 	rtp->postgp_func(rtp);
891 }
892 
893 #endif /* #if defined(CONFIG_TASKS_RCU) */
894 
895 #ifdef CONFIG_TASKS_RCU
896 
897 ////////////////////////////////////////////////////////////////////////
898 //
899 // Simple variant of RCU whose quiescent states are voluntary context
900 // switch, cond_resched_tasks_rcu_qs(), user-space execution, and idle.
901 // As such, grace periods can take one good long time.  There are no
902 // read-side primitives similar to rcu_read_lock() and rcu_read_unlock()
903 // because this implementation is intended to get the system into a safe
904 // state for some of the manipulations involved in tracing and the like.
905 // Finally, this implementation does not support high call_rcu_tasks()
906 // rates from multiple CPUs.  If this is required, per-CPU callback lists
907 // will be needed.
908 //
909 // The implementation uses rcu_tasks_wait_gp(), which relies on function
910 // pointers in the rcu_tasks structure.  The rcu_spawn_tasks_kthread()
911 // function sets these function pointers up so that rcu_tasks_wait_gp()
912 // invokes these functions in this order:
913 //
914 // rcu_tasks_pregp_step():
915 //	Invokes synchronize_rcu() in order to wait for all in-flight
916 //	t->on_rq and t->nvcsw transitions to complete.	This works because
917 //	all such transitions are carried out with interrupts disabled.
918 // rcu_tasks_pertask(), invoked on every non-idle task:
919 //	For every runnable non-idle task other than the current one, use
920 //	get_task_struct() to pin down that task, snapshot that task's
921 //	number of voluntary context switches, and add that task to the
922 //	holdout list.
923 // rcu_tasks_postscan():
924 //	Gather per-CPU lists of tasks in do_exit() to ensure that all
925 //	tasks that were in the process of exiting (and which thus might
926 //	not know to synchronize with this RCU Tasks grace period) have
927 //	completed exiting.  The synchronize_rcu() in rcu_tasks_postgp()
928 //	will take care of any tasks stuck in the non-preemptible region
929 //	of do_exit() following its call to exit_tasks_rcu_finish().
930 // check_all_holdout_tasks(), repeatedly until holdout list is empty:
931 //	Scans the holdout list, attempting to identify a quiescent state
932 //	for each task on the list.  If there is a quiescent state, the
933 //	corresponding task is removed from the holdout list.
934 // rcu_tasks_postgp():
935 //	Invokes synchronize_rcu() in order to ensure that all prior
936 //	t->on_rq and t->nvcsw transitions are seen by all CPUs and tasks
937 //	to have happened before the end of this RCU Tasks grace period.
938 //	Again, this works because all such transitions are carried out
939 //	with interrupts disabled.
940 //
941 // For each exiting task, the exit_tasks_rcu_start() and
942 // exit_tasks_rcu_finish() functions add and remove, respectively, the
943 // current task to a per-CPU list of tasks that rcu_tasks_postscan() must
944 // wait on.  This is necessary because rcu_tasks_postscan() must wait on
945 // tasks that have already been removed from the global list of tasks.
946 //
947 // Pre-grace-period update-side code is ordered before the grace
948 // via the raw_spin_lock.*rcu_node().  Pre-grace-period read-side code
949 // is ordered before the grace period via synchronize_rcu() call in
950 // rcu_tasks_pregp_step() and by the scheduler's locks and interrupt
951 // disabling.
952 
953 /* Pre-grace-period preparation. */
954 static void rcu_tasks_pregp_step(struct list_head *hop)
955 {
956 	/*
957 	 * Wait for all pre-existing t->on_rq and t->nvcsw transitions
958 	 * to complete.  Invoking synchronize_rcu() suffices because all
959 	 * these transitions occur with interrupts disabled.  Without this
960 	 * synchronize_rcu(), a read-side critical section that started
961 	 * before the grace period might be incorrectly seen as having
962 	 * started after the grace period.
963 	 *
964 	 * This synchronize_rcu() also dispenses with the need for a
965 	 * memory barrier on the first store to t->rcu_tasks_holdout,
966 	 * as it forces the store to happen after the beginning of the
967 	 * grace period.
968 	 */
969 	synchronize_rcu();
970 }
971 
972 /* Check for quiescent states since the pregp's synchronize_rcu() */
973 static bool rcu_tasks_is_holdout(struct task_struct *t)
974 {
975 	int cpu;
976 
977 	/* Has the task been seen voluntarily sleeping? */
978 	if (!READ_ONCE(t->on_rq))
979 		return false;
980 
981 	/*
982 	 * t->on_rq && !t->se.sched_delayed *could* be considered sleeping but
983 	 * since it is a spurious state (it will transition into the
984 	 * traditional blocked state or get woken up without outside
985 	 * dependencies), not considering it such should only affect timing.
986 	 *
987 	 * Be conservative for now and not include it.
988 	 */
989 
990 	/*
991 	 * Idle tasks (or idle injection) within the idle loop are RCU-tasks
992 	 * quiescent states. But CPU boot code performed by the idle task
993 	 * isn't a quiescent state.
994 	 */
995 	if (is_idle_task(t))
996 		return false;
997 
998 	cpu = task_cpu(t);
999 
1000 	/* Idle tasks on offline CPUs are RCU-tasks quiescent states. */
1001 	if (t == idle_task(cpu) && !rcu_cpu_online(cpu))
1002 		return false;
1003 
1004 	return true;
1005 }
1006 
1007 /* Per-task initial processing. */
1008 static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
1009 {
1010 	if (t != current && rcu_tasks_is_holdout(t)) {
1011 		get_task_struct(t);
1012 		t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
1013 		WRITE_ONCE(t->rcu_tasks_holdout, true);
1014 		list_add(&t->rcu_tasks_holdout_list, hop);
1015 	}
1016 }
1017 
1018 void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
1019 DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
1020 
1021 /* Processing between scanning taskslist and draining the holdout list. */
1022 static void rcu_tasks_postscan(struct list_head *hop)
1023 {
1024 	int cpu;
1025 	int rtsi = READ_ONCE(rcu_task_stall_info);
1026 
1027 	if (!IS_ENABLED(CONFIG_TINY_RCU)) {
1028 		tasks_rcu_exit_srcu_stall_timer.expires = jiffies + rtsi;
1029 		add_timer(&tasks_rcu_exit_srcu_stall_timer);
1030 	}
1031 
1032 	/*
1033 	 * Exiting tasks may escape the tasklist scan. Those are vulnerable
1034 	 * until their final schedule() with TASK_DEAD state. To cope with
1035 	 * this, divide the fragile exit path part in two intersecting
1036 	 * read side critical sections:
1037 	 *
1038 	 * 1) A task_struct list addition before calling exit_notify(),
1039 	 *    which may remove the task from the tasklist, with the
1040 	 *    removal after the final preempt_disable() call in do_exit().
1041 	 *
1042 	 * 2) An _RCU_ read side starting with the final preempt_disable()
1043 	 *    call in do_exit() and ending with the final call to schedule()
1044 	 *    with TASK_DEAD state.
1045 	 *
1046 	 * This handles the part 1). And postgp will handle part 2) with a
1047 	 * call to synchronize_rcu().
1048 	 */
1049 
1050 	for_each_possible_cpu(cpu) {
1051 		unsigned long j = jiffies + 1;
1052 		struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, cpu);
1053 		struct task_struct *t;
1054 		struct task_struct *t1;
1055 		struct list_head tmp;
1056 
1057 		raw_spin_lock_irq_rcu_node(rtpcp);
1058 		list_for_each_entry_safe(t, t1, &rtpcp->rtp_exit_list, rcu_tasks_exit_list) {
1059 			if (list_empty(&t->rcu_tasks_holdout_list))
1060 				rcu_tasks_pertask(t, hop);
1061 
1062 			// RT kernels need frequent pauses, otherwise
1063 			// pause at least once per pair of jiffies.
1064 			if (!IS_ENABLED(CONFIG_PREEMPT_RT) && time_before(jiffies, j))
1065 				continue;
1066 
1067 			// Keep our place in the list while pausing.
1068 			// Nothing else traverses this list, so adding a
1069 			// bare list_head is OK.
1070 			list_add(&tmp, &t->rcu_tasks_exit_list);
1071 			raw_spin_unlock_irq_rcu_node(rtpcp);
1072 			cond_resched(); // For CONFIG_PREEMPT=n kernels
1073 			raw_spin_lock_irq_rcu_node(rtpcp);
1074 			t1 = list_entry(tmp.next, struct task_struct, rcu_tasks_exit_list);
1075 			list_del(&tmp);
1076 			j = jiffies + 1;
1077 		}
1078 		raw_spin_unlock_irq_rcu_node(rtpcp);
1079 	}
1080 
1081 	if (!IS_ENABLED(CONFIG_TINY_RCU))
1082 		timer_delete_sync(&tasks_rcu_exit_srcu_stall_timer);
1083 }
1084 
1085 /* See if tasks are still holding out, complain if so. */
1086 static void check_holdout_task(struct task_struct *t,
1087 			       bool needreport, bool *firstreport)
1088 {
1089 	int cpu;
1090 
1091 	if (!READ_ONCE(t->rcu_tasks_holdout) ||
1092 	    t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) ||
1093 	    !rcu_tasks_is_holdout(t) ||
1094 	    (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
1095 	     !is_idle_task(t) && READ_ONCE(t->rcu_tasks_idle_cpu) >= 0)) {
1096 		WRITE_ONCE(t->rcu_tasks_holdout, false);
1097 		list_del_init(&t->rcu_tasks_holdout_list);
1098 		put_task_struct(t);
1099 		return;
1100 	}
1101 	rcu_request_urgent_qs_task(t);
1102 	if (!needreport)
1103 		return;
1104 	if (*firstreport) {
1105 		pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
1106 		*firstreport = false;
1107 	}
1108 	cpu = task_cpu(t);
1109 	pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
1110 		 t, ".I"[is_idle_task(t)],
1111 		 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
1112 		 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
1113 		 data_race(t->rcu_tasks_idle_cpu), cpu);
1114 	sched_show_task(t);
1115 }
1116 
1117 /* Scan the holdout lists for tasks no longer holding out. */
1118 static void check_all_holdout_tasks(struct list_head *hop,
1119 				    bool needreport, bool *firstreport)
1120 {
1121 	struct task_struct *t, *t1;
1122 
1123 	list_for_each_entry_safe(t, t1, hop, rcu_tasks_holdout_list) {
1124 		check_holdout_task(t, needreport, firstreport);
1125 		cond_resched();
1126 	}
1127 }
1128 
1129 /* Finish off the Tasks-RCU grace period. */
1130 static void rcu_tasks_postgp(struct rcu_tasks *rtp)
1131 {
1132 	/*
1133 	 * Because ->on_rq and ->nvcsw are not guaranteed to have a full
1134 	 * memory barriers prior to them in the schedule() path, memory
1135 	 * reordering on other CPUs could cause their RCU-tasks read-side
1136 	 * critical sections to extend past the end of the grace period.
1137 	 * However, because these ->nvcsw updates are carried out with
1138 	 * interrupts disabled, we can use synchronize_rcu() to force the
1139 	 * needed ordering on all such CPUs.
1140 	 *
1141 	 * This synchronize_rcu() also confines all ->rcu_tasks_holdout
1142 	 * accesses to be within the grace period, avoiding the need for
1143 	 * memory barriers for ->rcu_tasks_holdout accesses.
1144 	 *
1145 	 * In addition, this synchronize_rcu() waits for exiting tasks
1146 	 * to complete their final preempt_disable() region of execution,
1147 	 * enforcing the whole region before tasklist removal until
1148 	 * the final schedule() with TASK_DEAD state to be an RCU TASKS
1149 	 * read side critical section.
1150 	 */
1151 	synchronize_rcu();
1152 }
1153 
1154 static void tasks_rcu_exit_srcu_stall(struct timer_list *unused)
1155 {
1156 #ifndef CONFIG_TINY_RCU
1157 	int rtsi;
1158 
1159 	rtsi = READ_ONCE(rcu_task_stall_info);
1160 	pr_info("%s: %s grace period number %lu (since boot) gp_state: %s is %lu jiffies old.\n",
1161 		__func__, rcu_tasks.kname, rcu_tasks.tasks_gp_seq,
1162 		tasks_gp_state_getname(&rcu_tasks), jiffies - rcu_tasks.gp_jiffies);
1163 	pr_info("Please check any exiting tasks stuck between calls to exit_tasks_rcu_start() and exit_tasks_rcu_finish()\n");
1164 	tasks_rcu_exit_srcu_stall_timer.expires = jiffies + rtsi;
1165 	add_timer(&tasks_rcu_exit_srcu_stall_timer);
1166 #endif // #ifndef CONFIG_TINY_RCU
1167 }
1168 
1169 /**
1170  * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
1171  * @rhp: structure to be used for queueing the RCU updates.
1172  * @func: actual callback function to be invoked after the grace period
1173  *
1174  * The callback function will be invoked some time after a full grace
1175  * period elapses, in other words after all currently executing RCU
1176  * read-side critical sections have completed. call_rcu_tasks() assumes
1177  * that the read-side critical sections end at a voluntary context
1178  * switch (not a preemption!), cond_resched_tasks_rcu_qs(), entry into idle,
1179  * or transition to usermode execution.  As such, there are no read-side
1180  * primitives analogous to rcu_read_lock() and rcu_read_unlock() because
1181  * this primitive is intended to determine that all tasks have passed
1182  * through a safe state, not so much for data-structure synchronization.
1183  *
1184  * See the description of call_rcu() for more detailed information on
1185  * memory ordering guarantees.
1186  */
1187 void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
1188 {
1189 	call_rcu_tasks_generic(rhp, func, &rcu_tasks);
1190 }
1191 EXPORT_SYMBOL_GPL(call_rcu_tasks);
1192 
1193 /**
1194  * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
1195  *
1196  * Control will return to the caller some time after a full rcu-tasks
1197  * grace period has elapsed, in other words after all currently
1198  * executing rcu-tasks read-side critical sections have elapsed.  These
1199  * read-side critical sections are delimited by calls to schedule(),
1200  * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
1201  * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
1202  *
1203  * This is a very specialized primitive, intended only for a few uses in
1204  * tracing and other situations requiring manipulation of function
1205  * preambles and profiling hooks.  The synchronize_rcu_tasks() function
1206  * is not (yet) intended for heavy use from multiple CPUs.
1207  *
1208  * See the description of synchronize_rcu() for more detailed information
1209  * on memory ordering guarantees.
1210  */
1211 void synchronize_rcu_tasks(void)
1212 {
1213 	synchronize_rcu_tasks_generic(&rcu_tasks);
1214 }
1215 EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
1216 
1217 /**
1218  * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
1219  *
1220  * Although the current implementation is guaranteed to wait, it is not
1221  * obligated to, for example, if there are no pending callbacks.
1222  */
1223 void rcu_barrier_tasks(void)
1224 {
1225 	rcu_barrier_tasks_generic(&rcu_tasks);
1226 }
1227 EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
1228 
1229 static int rcu_tasks_lazy_ms = -1;
1230 module_param(rcu_tasks_lazy_ms, int, 0444);
1231 
1232 static int __init rcu_spawn_tasks_kthread(void)
1233 {
1234 	rcu_tasks.gp_sleep = HZ / 10;
1235 	rcu_tasks.init_fract = HZ / 10;
1236 	if (rcu_tasks_lazy_ms >= 0)
1237 		rcu_tasks.lazy_jiffies = msecs_to_jiffies(rcu_tasks_lazy_ms);
1238 	rcu_tasks.pregp_func = rcu_tasks_pregp_step;
1239 	rcu_tasks.pertask_func = rcu_tasks_pertask;
1240 	rcu_tasks.postscan_func = rcu_tasks_postscan;
1241 	rcu_tasks.holdouts_func = check_all_holdout_tasks;
1242 	rcu_tasks.postgp_func = rcu_tasks_postgp;
1243 	rcu_tasks.wait_state = TASK_IDLE;
1244 	rcu_spawn_tasks_kthread_generic(&rcu_tasks);
1245 	return 0;
1246 }
1247 
1248 #if !defined(CONFIG_TINY_RCU)
1249 void show_rcu_tasks_classic_gp_kthread(void)
1250 {
1251 	show_rcu_tasks_generic_gp_kthread(&rcu_tasks, "");
1252 }
1253 EXPORT_SYMBOL_GPL(show_rcu_tasks_classic_gp_kthread);
1254 
1255 void rcu_tasks_torture_stats_print(char *tt, char *tf)
1256 {
1257 	rcu_tasks_torture_stats_print_generic(&rcu_tasks, tt, tf, "");
1258 }
1259 EXPORT_SYMBOL_GPL(rcu_tasks_torture_stats_print);
1260 #endif // !defined(CONFIG_TINY_RCU)
1261 
1262 struct task_struct *get_rcu_tasks_gp_kthread(void)
1263 {
1264 	return rcu_tasks.kthread_ptr;
1265 }
1266 EXPORT_SYMBOL_GPL(get_rcu_tasks_gp_kthread);
1267 
1268 void rcu_tasks_get_gp_data(int *flags, unsigned long *gp_seq)
1269 {
1270 	*flags = 0;
1271 	*gp_seq = rcu_seq_current(&rcu_tasks.tasks_gp_seq);
1272 }
1273 EXPORT_SYMBOL_GPL(rcu_tasks_get_gp_data);
1274 
1275 /*
1276  * Protect against tasklist scan blind spot while the task is exiting and
1277  * may be removed from the tasklist.  Do this by adding the task to yet
1278  * another list.
1279  *
1280  * Note that the task will remove itself from this list, so there is no
1281  * need for get_task_struct(), except in the case where rcu_tasks_pertask()
1282  * adds it to the holdout list, in which case rcu_tasks_pertask() supplies
1283  * the needed get_task_struct().
1284  */
1285 void exit_tasks_rcu_start(void)
1286 {
1287 	unsigned long flags;
1288 	struct rcu_tasks_percpu *rtpcp;
1289 	struct task_struct *t = current;
1290 
1291 	WARN_ON_ONCE(!list_empty(&t->rcu_tasks_exit_list));
1292 	preempt_disable();
1293 	rtpcp = this_cpu_ptr(rcu_tasks.rtpcpu);
1294 	t->rcu_tasks_exit_cpu = smp_processor_id();
1295 	raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
1296 	WARN_ON_ONCE(!rtpcp->rtp_exit_list.next);
1297 	list_add(&t->rcu_tasks_exit_list, &rtpcp->rtp_exit_list);
1298 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
1299 	preempt_enable();
1300 }
1301 
1302 /*
1303  * Remove the task from the "yet another list" because do_exit() is now
1304  * non-preemptible, allowing synchronize_rcu() to wait beyond this point.
1305  */
1306 void exit_tasks_rcu_finish(void)
1307 {
1308 	unsigned long flags;
1309 	struct rcu_tasks_percpu *rtpcp;
1310 	struct task_struct *t = current;
1311 
1312 	WARN_ON_ONCE(list_empty(&t->rcu_tasks_exit_list));
1313 	rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, t->rcu_tasks_exit_cpu);
1314 	raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
1315 	list_del_init(&t->rcu_tasks_exit_list);
1316 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
1317 }
1318 
1319 #else /* #ifdef CONFIG_TASKS_RCU */
1320 void exit_tasks_rcu_start(void) { }
1321 void exit_tasks_rcu_finish(void) { }
1322 #endif /* #else #ifdef CONFIG_TASKS_RCU */
1323 
1324 #ifdef CONFIG_TASKS_RUDE_RCU
1325 
1326 ////////////////////////////////////////////////////////////////////////
1327 //
1328 // "Rude" variant of Tasks RCU, inspired by Steve Rostedt's
1329 // trick of passing an empty function to schedule_on_each_cpu().
1330 // This approach provides batching of concurrent calls to the synchronous
1331 // synchronize_rcu_tasks_rude() API.  This invokes schedule_on_each_cpu()
1332 // in order to send IPIs far and wide and induces otherwise unnecessary
1333 // context switches on all online CPUs, whether idle or not.
1334 //
1335 // Callback handling is provided by the rcu_tasks_kthread() function.
1336 //
1337 // Ordering is provided by the scheduler's context-switch code.
1338 
1339 // Empty function to allow workqueues to force a context switch.
1340 static void rcu_tasks_be_rude(struct work_struct *work)
1341 {
1342 }
1343 
1344 // Wait for one rude RCU-tasks grace period.
1345 static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
1346 {
1347 	rtp->n_ipis += cpumask_weight(cpu_online_mask);
1348 	schedule_on_each_cpu(rcu_tasks_be_rude);
1349 }
1350 
1351 static void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func);
1352 DEFINE_RCU_TASKS(rcu_tasks_rude, rcu_tasks_rude_wait_gp, call_rcu_tasks_rude,
1353 		 "RCU Tasks Rude");
1354 
1355 /*
1356  * call_rcu_tasks_rude() - Queue a callback rude task-based grace period
1357  * @rhp: structure to be used for queueing the RCU updates.
1358  * @func: actual callback function to be invoked after the grace period
1359  *
1360  * The callback function will be invoked some time after a full grace
1361  * period elapses, in other words after all currently executing RCU
1362  * read-side critical sections have completed. call_rcu_tasks_rude()
1363  * assumes that the read-side critical sections end at context switch,
1364  * cond_resched_tasks_rcu_qs(), or transition to usermode execution (as
1365  * usermode execution is schedulable). As such, there are no read-side
1366  * primitives analogous to rcu_read_lock() and rcu_read_unlock() because
1367  * this primitive is intended to determine that all tasks have passed
1368  * through a safe state, not so much for data-structure synchronization.
1369  *
1370  * See the description of call_rcu() for more detailed information on
1371  * memory ordering guarantees.
1372  *
1373  * This is no longer exported, and is instead reserved for use by
1374  * synchronize_rcu_tasks_rude().
1375  */
1376 static void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func)
1377 {
1378 	call_rcu_tasks_generic(rhp, func, &rcu_tasks_rude);
1379 }
1380 
1381 /**
1382  * synchronize_rcu_tasks_rude - wait for a rude rcu-tasks grace period
1383  *
1384  * Control will return to the caller some time after a rude rcu-tasks
1385  * grace period has elapsed, in other words after all currently
1386  * executing rcu-tasks read-side critical sections have elapsed.  These
1387  * read-side critical sections are delimited by calls to schedule(),
1388  * cond_resched_tasks_rcu_qs(), userspace execution (which is a schedulable
1389  * context), and (in theory, anyway) cond_resched().
1390  *
1391  * This is a very specialized primitive, intended only for a few uses in
1392  * tracing and other situations requiring manipulation of function preambles
1393  * and profiling hooks.  The synchronize_rcu_tasks_rude() function is not
1394  * (yet) intended for heavy use from multiple CPUs.
1395  *
1396  * See the description of synchronize_rcu() for more detailed information
1397  * on memory ordering guarantees.
1398  */
1399 void synchronize_rcu_tasks_rude(void)
1400 {
1401 	if (!IS_ENABLED(CONFIG_ARCH_WANTS_NO_INSTR) || IS_ENABLED(CONFIG_FORCE_TASKS_RUDE_RCU))
1402 		synchronize_rcu_tasks_generic(&rcu_tasks_rude);
1403 }
1404 EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude);
1405 
1406 static int __init rcu_spawn_tasks_rude_kthread(void)
1407 {
1408 	rcu_tasks_rude.gp_sleep = HZ / 10;
1409 	rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
1410 	return 0;
1411 }
1412 
1413 #if !defined(CONFIG_TINY_RCU)
1414 void show_rcu_tasks_rude_gp_kthread(void)
1415 {
1416 	show_rcu_tasks_generic_gp_kthread(&rcu_tasks_rude, "");
1417 }
1418 EXPORT_SYMBOL_GPL(show_rcu_tasks_rude_gp_kthread);
1419 
1420 void rcu_tasks_rude_torture_stats_print(char *tt, char *tf)
1421 {
1422 	rcu_tasks_torture_stats_print_generic(&rcu_tasks_rude, tt, tf, "");
1423 }
1424 EXPORT_SYMBOL_GPL(rcu_tasks_rude_torture_stats_print);
1425 #endif // !defined(CONFIG_TINY_RCU)
1426 
1427 struct task_struct *get_rcu_tasks_rude_gp_kthread(void)
1428 {
1429 	return rcu_tasks_rude.kthread_ptr;
1430 }
1431 EXPORT_SYMBOL_GPL(get_rcu_tasks_rude_gp_kthread);
1432 
1433 void rcu_tasks_rude_get_gp_data(int *flags, unsigned long *gp_seq)
1434 {
1435 	*flags = 0;
1436 	*gp_seq = rcu_seq_current(&rcu_tasks_rude.tasks_gp_seq);
1437 }
1438 EXPORT_SYMBOL_GPL(rcu_tasks_rude_get_gp_data);
1439 
1440 #endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
1441 
1442 #ifndef CONFIG_TINY_RCU
1443 void show_rcu_tasks_gp_kthreads(void)
1444 {
1445 	show_rcu_tasks_classic_gp_kthread();
1446 	show_rcu_tasks_rude_gp_kthread();
1447 }
1448 #endif /* #ifndef CONFIG_TINY_RCU */
1449 
1450 #ifdef CONFIG_PROVE_RCU
1451 struct rcu_tasks_test_desc {
1452 	struct rcu_head rh;
1453 	const char *name;
1454 	bool notrun;
1455 	unsigned long runstart;
1456 };
1457 
1458 static struct rcu_tasks_test_desc tests[] = {
1459 	{
1460 		.name = "call_rcu_tasks()",
1461 		/* If not defined, the test is skipped. */
1462 		.notrun = IS_ENABLED(CONFIG_TASKS_RCU),
1463 	},
1464 	{
1465 		.name = "call_rcu_tasks_trace()",
1466 		/* If not defined, the test is skipped. */
1467 		.notrun = IS_ENABLED(CONFIG_TASKS_TRACE_RCU)
1468 	}
1469 };
1470 
1471 #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
1472 static void test_rcu_tasks_callback(struct rcu_head *rhp)
1473 {
1474 	struct rcu_tasks_test_desc *rttd =
1475 		container_of(rhp, struct rcu_tasks_test_desc, rh);
1476 
1477 	pr_info("Callback from %s invoked.\n", rttd->name);
1478 
1479 	rttd->notrun = false;
1480 }
1481 #endif // #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
1482 
1483 static void rcu_tasks_initiate_self_tests(void)
1484 {
1485 #ifdef CONFIG_TASKS_RCU
1486 	pr_info("Running RCU Tasks wait API self tests\n");
1487 	tests[0].runstart = jiffies;
1488 	synchronize_rcu_tasks();
1489 	call_rcu_tasks(&tests[0].rh, test_rcu_tasks_callback);
1490 #endif
1491 
1492 #ifdef CONFIG_TASKS_RUDE_RCU
1493 	pr_info("Running RCU Tasks Rude wait API self tests\n");
1494 	synchronize_rcu_tasks_rude();
1495 #endif
1496 
1497 #ifdef CONFIG_TASKS_TRACE_RCU
1498 	pr_info("Running RCU Tasks Trace wait API self tests\n");
1499 	tests[1].runstart = jiffies;
1500 	synchronize_rcu_tasks_trace();
1501 	call_rcu_tasks_trace(&tests[1].rh, test_rcu_tasks_callback);
1502 #endif
1503 }
1504 
1505 /*
1506  * Return:  0 - test passed
1507  *	    1 - test failed, but have not timed out yet
1508  *	   -1 - test failed and timed out
1509  */
1510 static int rcu_tasks_verify_self_tests(void)
1511 {
1512 	int ret = 0;
1513 	int i;
1514 	unsigned long bst = rcu_task_stall_timeout;
1515 
1516 	if (bst <= 0 || bst > RCU_TASK_BOOT_STALL_TIMEOUT)
1517 		bst = RCU_TASK_BOOT_STALL_TIMEOUT;
1518 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
1519 		while (tests[i].notrun) {		// still hanging.
1520 			if (time_after(jiffies, tests[i].runstart + bst)) {
1521 				pr_err("%s has failed boot-time tests.\n", tests[i].name);
1522 				ret = -1;
1523 				break;
1524 			}
1525 			ret = 1;
1526 			break;
1527 		}
1528 	}
1529 	WARN_ON(ret < 0);
1530 
1531 	return ret;
1532 }
1533 
1534 /*
1535  * Repeat the rcu_tasks_verify_self_tests() call once every second until the
1536  * test passes or has timed out.
1537  */
1538 static struct delayed_work rcu_tasks_verify_work;
1539 static void rcu_tasks_verify_work_fn(struct work_struct *work __maybe_unused)
1540 {
1541 	int ret = rcu_tasks_verify_self_tests();
1542 
1543 	if (ret <= 0)
1544 		return;
1545 
1546 	/* Test fails but not timed out yet, reschedule another check */
1547 	schedule_delayed_work(&rcu_tasks_verify_work, HZ);
1548 }
1549 
1550 static int rcu_tasks_verify_schedule_work(void)
1551 {
1552 	INIT_DELAYED_WORK(&rcu_tasks_verify_work, rcu_tasks_verify_work_fn);
1553 	rcu_tasks_verify_work_fn(NULL);
1554 	return 0;
1555 }
1556 late_initcall(rcu_tasks_verify_schedule_work);
1557 #else /* #ifdef CONFIG_PROVE_RCU */
1558 static void rcu_tasks_initiate_self_tests(void) { }
1559 #endif /* #else #ifdef CONFIG_PROVE_RCU */
1560 
1561 void __init tasks_cblist_init_generic(void)
1562 {
1563 	lockdep_assert_irqs_disabled();
1564 	WARN_ON(num_online_cpus() > 1);
1565 
1566 #ifdef CONFIG_TASKS_RCU
1567 	cblist_init_generic(&rcu_tasks);
1568 #endif
1569 
1570 #ifdef CONFIG_TASKS_RUDE_RCU
1571 	cblist_init_generic(&rcu_tasks_rude);
1572 #endif
1573 }
1574 
1575 static int __init rcu_init_tasks_generic(void)
1576 {
1577 #ifdef CONFIG_TASKS_RCU
1578 	rcu_spawn_tasks_kthread();
1579 #endif
1580 
1581 #ifdef CONFIG_TASKS_RUDE_RCU
1582 	rcu_spawn_tasks_rude_kthread();
1583 #endif
1584 
1585 	// Run the self-tests.
1586 	rcu_tasks_initiate_self_tests();
1587 
1588 	return 0;
1589 }
1590 core_initcall(rcu_init_tasks_generic);
1591 
1592 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
1593 static inline void rcu_tasks_bootup_oddness(void) {}
1594 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
1595 
1596 #ifdef CONFIG_TASKS_TRACE_RCU
1597 
1598 ////////////////////////////////////////////////////////////////////////
1599 //
1600 // Tracing variant of Tasks RCU.  This variant is designed to be used
1601 // to protect tracing hooks, including those of BPF.  This variant
1602 // is implemented via a straightforward mapping onto SRCU-fast.
1603 
1604 DEFINE_SRCU_FAST(rcu_tasks_trace_srcu_struct);
1605 EXPORT_SYMBOL_GPL(rcu_tasks_trace_srcu_struct);
1606 
1607 #endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */
1608