xref: /linux/kernel/printk/nbcon.c (revision c94cd9508b1335b949fd13ebd269313c65492df0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 // Copyright (C) 2022 Linutronix GmbH, John Ogness
3 // Copyright (C) 2022 Intel, Thomas Gleixner
4 
5 #include <linux/atomic.h>
6 #include <linux/bug.h>
7 #include <linux/console.h>
8 #include <linux/delay.h>
9 #include <linux/errno.h>
10 #include <linux/export.h>
11 #include <linux/init.h>
12 #include <linux/irqflags.h>
13 #include <linux/kthread.h>
14 #include <linux/minmax.h>
15 #include <linux/percpu.h>
16 #include <linux/preempt.h>
17 #include <linux/slab.h>
18 #include <linux/smp.h>
19 #include <linux/stddef.h>
20 #include <linux/string.h>
21 #include <linux/types.h>
22 #include "internal.h"
23 #include "printk_ringbuffer.h"
24 /*
25  * Printk console printing implementation for consoles which does not depend
26  * on the legacy style console_lock mechanism.
27  *
28  * The state of the console is maintained in the "nbcon_state" atomic
29  * variable.
30  *
31  * The console is locked when:
32  *
33  *   - The 'prio' field contains the priority of the context that owns the
34  *     console. Only higher priority contexts are allowed to take over the
35  *     lock. A value of 0 (NBCON_PRIO_NONE) means the console is not locked.
36  *
37  *   - The 'cpu' field denotes on which CPU the console is locked. It is used
38  *     to prevent busy waiting on the same CPU. Also it informs the lock owner
39  *     that it has lost the lock in a more complex scenario when the lock was
40  *     taken over by a higher priority context, released, and taken on another
41  *     CPU with the same priority as the interrupted owner.
42  *
43  * The acquire mechanism uses a few more fields:
44  *
45  *   - The 'req_prio' field is used by the handover approach to make the
46  *     current owner aware that there is a context with a higher priority
47  *     waiting for the friendly handover.
48  *
49  *   - The 'unsafe' field allows to take over the console in a safe way in the
50  *     middle of emitting a message. The field is set only when accessing some
51  *     shared resources or when the console device is manipulated. It can be
52  *     cleared, for example, after emitting one character when the console
53  *     device is in a consistent state.
54  *
55  *   - The 'unsafe_takeover' field is set when a hostile takeover took the
56  *     console in an unsafe state. The console will stay in the unsafe state
57  *     until re-initialized.
58  *
59  * The acquire mechanism uses three approaches:
60  *
61  *   1) Direct acquire when the console is not owned or is owned by a lower
62  *      priority context and is in a safe state.
63  *
64  *   2) Friendly handover mechanism uses a request/grant handshake. It is used
65  *      when the current owner has lower priority and the console is in an
66  *      unsafe state.
67  *
68  *      The requesting context:
69  *
70  *        a) Sets its priority into the 'req_prio' field.
71  *
72  *        b) Waits (with a timeout) for the owning context to unlock the
73  *           console.
74  *
75  *        c) Takes the lock and clears the 'req_prio' field.
76  *
77  *      The owning context:
78  *
79  *        a) Observes the 'req_prio' field set on exit from the unsafe
80  *           console state.
81  *
82  *        b) Gives up console ownership by clearing the 'prio' field.
83  *
84  *   3) Unsafe hostile takeover allows to take over the lock even when the
85  *      console is an unsafe state. It is used only in panic() by the final
86  *      attempt to flush consoles in a try and hope mode.
87  *
88  *      Note that separate record buffers are used in panic(). As a result,
89  *      the messages can be read and formatted without any risk even after
90  *      using the hostile takeover in unsafe state.
91  *
92  * The release function simply clears the 'prio' field.
93  *
94  * All operations on @console::nbcon_state are atomic cmpxchg based to
95  * handle concurrency.
96  *
97  * The acquire/release functions implement only minimal policies:
98  *
99  *   - Preference for higher priority contexts.
100  *   - Protection of the panic CPU.
101  *
102  * All other policy decisions must be made at the call sites:
103  *
104  *   - What is marked as an unsafe section.
105  *   - Whether to spin-wait if there is already an owner and the console is
106  *     in an unsafe state.
107  *   - Whether to attempt an unsafe hostile takeover.
108  *
109  * The design allows to implement the well known:
110  *
111  *     acquire()
112  *     output_one_printk_record()
113  *     release()
114  *
115  * The output of one printk record might be interrupted with a higher priority
116  * context. The new owner is supposed to reprint the entire interrupted record
117  * from scratch.
118  */
119 
120 /**
121  * nbcon_state_set - Helper function to set the console state
122  * @con:	Console to update
123  * @new:	The new state to write
124  *
125  * Only to be used when the console is not yet or no longer visible in the
126  * system. Otherwise use nbcon_state_try_cmpxchg().
127  */
128 static inline void nbcon_state_set(struct console *con, struct nbcon_state *new)
129 {
130 	atomic_set(&ACCESS_PRIVATE(con, nbcon_state), new->atom);
131 }
132 
133 /**
134  * nbcon_state_read - Helper function to read the console state
135  * @con:	Console to read
136  * @state:	The state to store the result
137  */
138 static inline void nbcon_state_read(struct console *con, struct nbcon_state *state)
139 {
140 	state->atom = atomic_read(&ACCESS_PRIVATE(con, nbcon_state));
141 }
142 
143 /**
144  * nbcon_state_try_cmpxchg() - Helper function for atomic_try_cmpxchg() on console state
145  * @con:	Console to update
146  * @cur:	Old/expected state
147  * @new:	New state
148  *
149  * Return: True on success. False on fail and @cur is updated.
150  */
151 static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_state *cur,
152 					   struct nbcon_state *new)
153 {
154 	return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom);
155 }
156 
157 /**
158  * nbcon_seq_read - Read the current console sequence
159  * @con:	Console to read the sequence of
160  *
161  * Return:	Sequence number of the next record to print on @con.
162  */
163 u64 nbcon_seq_read(struct console *con)
164 {
165 	unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq));
166 
167 	return __ulseq_to_u64seq(prb, nbcon_seq);
168 }
169 
170 /**
171  * nbcon_seq_force - Force console sequence to a specific value
172  * @con:	Console to work on
173  * @seq:	Sequence number value to set
174  *
175  * Only to be used during init (before registration) or in extreme situations
176  * (such as panic with CONSOLE_REPLAY_ALL).
177  */
178 void nbcon_seq_force(struct console *con, u64 seq)
179 {
180 	/*
181 	 * If the specified record no longer exists, the oldest available record
182 	 * is chosen. This is especially important on 32bit systems because only
183 	 * the lower 32 bits of the sequence number are stored. The upper 32 bits
184 	 * are derived from the sequence numbers available in the ringbuffer.
185 	 */
186 	u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb));
187 
188 	atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq));
189 }
190 
191 /**
192  * nbcon_seq_try_update - Try to update the console sequence number
193  * @ctxt:	Pointer to an acquire context that contains
194  *		all information about the acquire mode
195  * @new_seq:	The new sequence number to set
196  *
197  * @ctxt->seq is updated to the new value of @con::nbcon_seq (expanded to
198  * the 64bit value). This could be a different value than @new_seq if
199  * nbcon_seq_force() was used or the current context no longer owns the
200  * console. In the later case, it will stop printing anyway.
201  */
202 static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq)
203 {
204 	unsigned long nbcon_seq = __u64seq_to_ulseq(ctxt->seq);
205 	struct console *con = ctxt->console;
206 
207 	if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq,
208 				    __u64seq_to_ulseq(new_seq))) {
209 		ctxt->seq = new_seq;
210 	} else {
211 		ctxt->seq = nbcon_seq_read(con);
212 	}
213 }
214 
215 /**
216  * nbcon_context_try_acquire_direct - Try to acquire directly
217  * @ctxt:	The context of the caller
218  * @cur:	The current console state
219  *
220  * Acquire the console when it is released. Also acquire the console when
221  * the current owner has a lower priority and the console is in a safe state.
222  *
223  * Return:	0 on success. Otherwise, an error code on failure. Also @cur
224  *		is updated to the latest state when failed to modify it.
225  *
226  * Errors:
227  *
228  *	-EPERM:		A panic is in progress and this is not the panic CPU.
229  *			Or the current owner or waiter has the same or higher
230  *			priority. No acquire method can be successful in
231  *			this case.
232  *
233  *	-EBUSY:		The current owner has a lower priority but the console
234  *			in an unsafe state. The caller should try using
235  *			the handover acquire method.
236  */
237 static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt,
238 					    struct nbcon_state *cur)
239 {
240 	unsigned int cpu = smp_processor_id();
241 	struct console *con = ctxt->console;
242 	struct nbcon_state new;
243 
244 	do {
245 		/*
246 		 * Panic does not imply that the console is owned. However, it
247 		 * is critical that non-panic CPUs during panic are unable to
248 		 * acquire ownership in order to satisfy the assumptions of
249 		 * nbcon_waiter_matches(). In particular, the assumption that
250 		 * lower priorities are ignored during panic.
251 		 */
252 		if (other_cpu_in_panic())
253 			return -EPERM;
254 
255 		if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio)
256 			return -EPERM;
257 
258 		if (cur->unsafe)
259 			return -EBUSY;
260 
261 		/*
262 		 * The console should never be safe for a direct acquire
263 		 * if an unsafe hostile takeover has ever happened.
264 		 */
265 		WARN_ON_ONCE(cur->unsafe_takeover);
266 
267 		new.atom = cur->atom;
268 		new.prio	= ctxt->prio;
269 		new.req_prio	= NBCON_PRIO_NONE;
270 		new.unsafe	= cur->unsafe_takeover;
271 		new.cpu		= cpu;
272 
273 	} while (!nbcon_state_try_cmpxchg(con, cur, &new));
274 
275 	return 0;
276 }
277 
278 static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio)
279 {
280 	/*
281 	 * The request context is well defined by the @req_prio because:
282 	 *
283 	 * - Only a context with a priority higher than the owner can become
284 	 *   a waiter.
285 	 * - Only a context with a priority higher than the waiter can
286 	 *   directly take over the request.
287 	 * - There are only three priorities.
288 	 * - Only one CPU is allowed to request PANIC priority.
289 	 * - Lower priorities are ignored during panic() until reboot.
290 	 *
291 	 * As a result, the following scenario is *not* possible:
292 	 *
293 	 * 1. This context is currently a waiter.
294 	 * 2. Another context with a higher priority than this context
295 	 *    directly takes ownership.
296 	 * 3. The higher priority context releases the ownership.
297 	 * 4. Another lower priority context takes the ownership.
298 	 * 5. Another context with the same priority as this context
299 	 *    creates a request and starts waiting.
300 	 *
301 	 * Event #1 implies this context is EMERGENCY.
302 	 * Event #2 implies the new context is PANIC.
303 	 * Event #3 occurs when panic() has flushed the console.
304 	 * Events #4 and #5 are not possible due to the other_cpu_in_panic()
305 	 * check in nbcon_context_try_acquire_direct().
306 	 */
307 
308 	return (cur->req_prio == expected_prio);
309 }
310 
311 /**
312  * nbcon_context_try_acquire_requested - Try to acquire after having
313  *					 requested a handover
314  * @ctxt:	The context of the caller
315  * @cur:	The current console state
316  *
317  * This is a helper function for nbcon_context_try_acquire_handover().
318  * It is called when the console is in an unsafe state. The current
319  * owner will release the console on exit from the unsafe region.
320  *
321  * Return:	0 on success and @cur is updated to the new console state.
322  *		Otherwise an error code on failure.
323  *
324  * Errors:
325  *
326  *	-EPERM:		A panic is in progress and this is not the panic CPU
327  *			or this context is no longer the waiter.
328  *
329  *	-EBUSY:		The console is still locked. The caller should
330  *			continue waiting.
331  *
332  * Note: The caller must still remove the request when an error has occurred
333  *       except when this context is no longer the waiter.
334  */
335 static int nbcon_context_try_acquire_requested(struct nbcon_context *ctxt,
336 					       struct nbcon_state *cur)
337 {
338 	unsigned int cpu = smp_processor_id();
339 	struct console *con = ctxt->console;
340 	struct nbcon_state new;
341 
342 	/* Note that the caller must still remove the request! */
343 	if (other_cpu_in_panic())
344 		return -EPERM;
345 
346 	/*
347 	 * Note that the waiter will also change if there was an unsafe
348 	 * hostile takeover.
349 	 */
350 	if (!nbcon_waiter_matches(cur, ctxt->prio))
351 		return -EPERM;
352 
353 	/* If still locked, caller should continue waiting. */
354 	if (cur->prio != NBCON_PRIO_NONE)
355 		return -EBUSY;
356 
357 	/*
358 	 * The previous owner should have never released ownership
359 	 * in an unsafe region.
360 	 */
361 	WARN_ON_ONCE(cur->unsafe);
362 
363 	new.atom = cur->atom;
364 	new.prio	= ctxt->prio;
365 	new.req_prio	= NBCON_PRIO_NONE;
366 	new.unsafe	= cur->unsafe_takeover;
367 	new.cpu		= cpu;
368 
369 	if (!nbcon_state_try_cmpxchg(con, cur, &new)) {
370 		/*
371 		 * The acquire could fail only when it has been taken
372 		 * over by a higher priority context.
373 		 */
374 		WARN_ON_ONCE(nbcon_waiter_matches(cur, ctxt->prio));
375 		return -EPERM;
376 	}
377 
378 	/* Handover success. This context now owns the console. */
379 	return 0;
380 }
381 
382 /**
383  * nbcon_context_try_acquire_handover - Try to acquire via handover
384  * @ctxt:	The context of the caller
385  * @cur:	The current console state
386  *
387  * The function must be called only when the context has higher priority
388  * than the current owner and the console is in an unsafe state.
389  * It is the case when nbcon_context_try_acquire_direct() returns -EBUSY.
390  *
391  * The function sets "req_prio" field to make the current owner aware of
392  * the request. Then it waits until the current owner releases the console,
393  * or an even higher context takes over the request, or timeout expires.
394  *
395  * The current owner checks the "req_prio" field on exit from the unsafe
396  * region and releases the console. It does not touch the "req_prio" field
397  * so that the console stays reserved for the waiter.
398  *
399  * Return:	0 on success. Otherwise, an error code on failure. Also @cur
400  *		is updated to the latest state when failed to modify it.
401  *
402  * Errors:
403  *
404  *	-EPERM:		A panic is in progress and this is not the panic CPU.
405  *			Or a higher priority context has taken over the
406  *			console or the handover request.
407  *
408  *	-EBUSY:		The current owner is on the same CPU so that the hand
409  *			shake could not work. Or the current owner is not
410  *			willing to wait (zero timeout). Or the console does
411  *			not enter the safe state before timeout passed. The
412  *			caller might still use the unsafe hostile takeover
413  *			when allowed.
414  *
415  *	-EAGAIN:	@cur has changed when creating the handover request.
416  *			The caller should retry with direct acquire.
417  */
418 static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt,
419 					      struct nbcon_state *cur)
420 {
421 	unsigned int cpu = smp_processor_id();
422 	struct console *con = ctxt->console;
423 	struct nbcon_state new;
424 	int timeout;
425 	int request_err = -EBUSY;
426 
427 	/*
428 	 * Check that the handover is called when the direct acquire failed
429 	 * with -EBUSY.
430 	 */
431 	WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio);
432 	WARN_ON_ONCE(!cur->unsafe);
433 
434 	/* Handover is not possible on the same CPU. */
435 	if (cur->cpu == cpu)
436 		return -EBUSY;
437 
438 	/*
439 	 * Console stays unsafe after an unsafe takeover until re-initialized.
440 	 * Waiting is not going to help in this case.
441 	 */
442 	if (cur->unsafe_takeover)
443 		return -EBUSY;
444 
445 	/* Is the caller willing to wait? */
446 	if (ctxt->spinwait_max_us == 0)
447 		return -EBUSY;
448 
449 	/*
450 	 * Setup a request for the handover. The caller should try to acquire
451 	 * the console directly when the current state has been modified.
452 	 */
453 	new.atom = cur->atom;
454 	new.req_prio = ctxt->prio;
455 	if (!nbcon_state_try_cmpxchg(con, cur, &new))
456 		return -EAGAIN;
457 
458 	cur->atom = new.atom;
459 
460 	/* Wait until there is no owner and then acquire the console. */
461 	for (timeout = ctxt->spinwait_max_us; timeout >= 0; timeout--) {
462 		/* On successful acquire, this request is cleared. */
463 		request_err = nbcon_context_try_acquire_requested(ctxt, cur);
464 		if (!request_err)
465 			return 0;
466 
467 		/*
468 		 * If the acquire should be aborted, it must be ensured
469 		 * that the request is removed before returning to caller.
470 		 */
471 		if (request_err == -EPERM)
472 			break;
473 
474 		udelay(1);
475 
476 		/* Re-read the state because some time has passed. */
477 		nbcon_state_read(con, cur);
478 	}
479 
480 	/* Timed out or aborted. Carefully remove handover request. */
481 	do {
482 		/*
483 		 * No need to remove request if there is a new waiter. This
484 		 * can only happen if a higher priority context has taken over
485 		 * the console or the handover request.
486 		 */
487 		if (!nbcon_waiter_matches(cur, ctxt->prio))
488 			return -EPERM;
489 
490 		/* Unset request for handover. */
491 		new.atom = cur->atom;
492 		new.req_prio = NBCON_PRIO_NONE;
493 		if (nbcon_state_try_cmpxchg(con, cur, &new)) {
494 			/*
495 			 * Request successfully unset. Report failure of
496 			 * acquiring via handover.
497 			 */
498 			cur->atom = new.atom;
499 			return request_err;
500 		}
501 
502 		/*
503 		 * Unable to remove request. Try to acquire in case
504 		 * the owner has released the lock.
505 		 */
506 	} while (nbcon_context_try_acquire_requested(ctxt, cur));
507 
508 	/* Lucky timing. The acquire succeeded while removing the request. */
509 	return 0;
510 }
511 
512 /**
513  * nbcon_context_try_acquire_hostile - Acquire via unsafe hostile takeover
514  * @ctxt:	The context of the caller
515  * @cur:	The current console state
516  *
517  * Acquire the console even in the unsafe state.
518  *
519  * It can be permitted by setting the 'allow_unsafe_takeover' field only
520  * by the final attempt to flush messages in panic().
521  *
522  * Return:	0 on success. -EPERM when not allowed by the context.
523  */
524 static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt,
525 					     struct nbcon_state *cur)
526 {
527 	unsigned int cpu = smp_processor_id();
528 	struct console *con = ctxt->console;
529 	struct nbcon_state new;
530 
531 	if (!ctxt->allow_unsafe_takeover)
532 		return -EPERM;
533 
534 	/* Ensure caller is allowed to perform unsafe hostile takeovers. */
535 	if (WARN_ON_ONCE(ctxt->prio != NBCON_PRIO_PANIC))
536 		return -EPERM;
537 
538 	/*
539 	 * Check that try_acquire_direct() and try_acquire_handover() returned
540 	 * -EBUSY in the right situation.
541 	 */
542 	WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio);
543 	WARN_ON_ONCE(cur->unsafe != true);
544 
545 	do {
546 		new.atom = cur->atom;
547 		new.cpu			= cpu;
548 		new.prio		= ctxt->prio;
549 		new.unsafe		|= cur->unsafe_takeover;
550 		new.unsafe_takeover	|= cur->unsafe;
551 
552 	} while (!nbcon_state_try_cmpxchg(con, cur, &new));
553 
554 	return 0;
555 }
556 
557 static struct printk_buffers panic_nbcon_pbufs;
558 
559 /**
560  * nbcon_context_try_acquire - Try to acquire nbcon console
561  * @ctxt:	The context of the caller
562  *
563  * Context:	Under @ctxt->con->device_lock() or local_irq_save().
564  * Return:	True if the console was acquired. False otherwise.
565  *
566  * If the caller allowed an unsafe hostile takeover, on success the
567  * caller should check the current console state to see if it is
568  * in an unsafe state. Otherwise, on success the caller may assume
569  * the console is not in an unsafe state.
570  */
571 static bool nbcon_context_try_acquire(struct nbcon_context *ctxt)
572 {
573 	unsigned int cpu = smp_processor_id();
574 	struct console *con = ctxt->console;
575 	struct nbcon_state cur;
576 	int err;
577 
578 	nbcon_state_read(con, &cur);
579 try_again:
580 	err = nbcon_context_try_acquire_direct(ctxt, &cur);
581 	if (err != -EBUSY)
582 		goto out;
583 
584 	err = nbcon_context_try_acquire_handover(ctxt, &cur);
585 	if (err == -EAGAIN)
586 		goto try_again;
587 	if (err != -EBUSY)
588 		goto out;
589 
590 	err = nbcon_context_try_acquire_hostile(ctxt, &cur);
591 out:
592 	if (err)
593 		return false;
594 
595 	/* Acquire succeeded. */
596 
597 	/* Assign the appropriate buffer for this context. */
598 	if (atomic_read(&panic_cpu) == cpu)
599 		ctxt->pbufs = &panic_nbcon_pbufs;
600 	else
601 		ctxt->pbufs = con->pbufs;
602 
603 	/* Set the record sequence for this context to print. */
604 	ctxt->seq = nbcon_seq_read(ctxt->console);
605 
606 	return true;
607 }
608 
609 static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu,
610 				int expected_prio)
611 {
612 	/*
613 	 * A similar function, nbcon_waiter_matches(), only deals with
614 	 * EMERGENCY and PANIC priorities. However, this function must also
615 	 * deal with the NORMAL priority, which requires additional checks
616 	 * and constraints.
617 	 *
618 	 * For the case where preemption and interrupts are disabled, it is
619 	 * enough to also verify that the owning CPU has not changed.
620 	 *
621 	 * For the case where preemption or interrupts are enabled, an
622 	 * external synchronization method *must* be used. In particular,
623 	 * the driver-specific locking mechanism used in device_lock()
624 	 * (including disabling migration) should be used. It prevents
625 	 * scenarios such as:
626 	 *
627 	 * 1. [Task A] owns a context with NBCON_PRIO_NORMAL on [CPU X] and
628 	 *    is scheduled out.
629 	 * 2. Another context takes over the lock with NBCON_PRIO_EMERGENCY
630 	 *    and releases it.
631 	 * 3. [Task B] acquires a context with NBCON_PRIO_NORMAL on [CPU X]
632 	 *    and is scheduled out.
633 	 * 4. [Task A] gets running on [CPU X] and sees that the console is
634 	 *    still owned by a task on [CPU X] with NBON_PRIO_NORMAL. Thus
635 	 *    [Task A] thinks it is the owner when it is not.
636 	 */
637 
638 	if (cur->prio != expected_prio)
639 		return false;
640 
641 	if (cur->cpu != expected_cpu)
642 		return false;
643 
644 	return true;
645 }
646 
647 /**
648  * nbcon_context_release - Release the console
649  * @ctxt:	The nbcon context from nbcon_context_try_acquire()
650  */
651 static void nbcon_context_release(struct nbcon_context *ctxt)
652 {
653 	unsigned int cpu = smp_processor_id();
654 	struct console *con = ctxt->console;
655 	struct nbcon_state cur;
656 	struct nbcon_state new;
657 
658 	nbcon_state_read(con, &cur);
659 
660 	do {
661 		if (!nbcon_owner_matches(&cur, cpu, ctxt->prio))
662 			break;
663 
664 		new.atom = cur.atom;
665 		new.prio = NBCON_PRIO_NONE;
666 
667 		/*
668 		 * If @unsafe_takeover is set, it is kept set so that
669 		 * the state remains permanently unsafe.
670 		 */
671 		new.unsafe |= cur.unsafe_takeover;
672 
673 	} while (!nbcon_state_try_cmpxchg(con, &cur, &new));
674 
675 	ctxt->pbufs = NULL;
676 }
677 
678 /**
679  * nbcon_context_can_proceed - Check whether ownership can proceed
680  * @ctxt:	The nbcon context from nbcon_context_try_acquire()
681  * @cur:	The current console state
682  *
683  * Return:	True if this context still owns the console. False if
684  *		ownership was handed over or taken.
685  *
686  * Must be invoked when entering the unsafe state to make sure that it still
687  * owns the lock. Also must be invoked when exiting the unsafe context
688  * to eventually free the lock for a higher priority context which asked
689  * for the friendly handover.
690  *
691  * It can be called inside an unsafe section when the console is just
692  * temporary in safe state instead of exiting and entering the unsafe
693  * state.
694  *
695  * Also it can be called in the safe context before doing an expensive
696  * safe operation. It does not make sense to do the operation when
697  * a higher priority context took the lock.
698  *
699  * When this function returns false then the calling context no longer owns
700  * the console and is no longer allowed to go forward. In this case it must
701  * back out immediately and carefully. The buffer content is also no longer
702  * trusted since it no longer belongs to the calling context.
703  */
704 static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_state *cur)
705 {
706 	unsigned int cpu = smp_processor_id();
707 
708 	/* Make sure this context still owns the console. */
709 	if (!nbcon_owner_matches(cur, cpu, ctxt->prio))
710 		return false;
711 
712 	/* The console owner can proceed if there is no waiter. */
713 	if (cur->req_prio == NBCON_PRIO_NONE)
714 		return true;
715 
716 	/*
717 	 * A console owner within an unsafe region is always allowed to
718 	 * proceed, even if there are waiters. It can perform a handover
719 	 * when exiting the unsafe region. Otherwise the waiter will
720 	 * need to perform an unsafe hostile takeover.
721 	 */
722 	if (cur->unsafe)
723 		return true;
724 
725 	/* Waiters always have higher priorities than owners. */
726 	WARN_ON_ONCE(cur->req_prio <= cur->prio);
727 
728 	/*
729 	 * Having a safe point for take over and eventually a few
730 	 * duplicated characters or a full line is way better than a
731 	 * hostile takeover. Post processing can take care of the garbage.
732 	 * Release and hand over.
733 	 */
734 	nbcon_context_release(ctxt);
735 
736 	/*
737 	 * It is not clear whether the waiter really took over ownership. The
738 	 * outermost callsite must make the final decision whether console
739 	 * ownership is needed for it to proceed. If yes, it must reacquire
740 	 * ownership (possibly hostile) before carefully proceeding.
741 	 *
742 	 * The calling context no longer owns the console so go back all the
743 	 * way instead of trying to implement reacquire heuristics in tons of
744 	 * places.
745 	 */
746 	return false;
747 }
748 
749 /**
750  * nbcon_can_proceed - Check whether ownership can proceed
751  * @wctxt:	The write context that was handed to the write function
752  *
753  * Return:	True if this context still owns the console. False if
754  *		ownership was handed over or taken.
755  *
756  * It is used in nbcon_enter_unsafe() to make sure that it still owns the
757  * lock. Also it is used in nbcon_exit_unsafe() to eventually free the lock
758  * for a higher priority context which asked for the friendly handover.
759  *
760  * It can be called inside an unsafe section when the console is just
761  * temporary in safe state instead of exiting and entering the unsafe state.
762  *
763  * Also it can be called in the safe context before doing an expensive safe
764  * operation. It does not make sense to do the operation when a higher
765  * priority context took the lock.
766  *
767  * When this function returns false then the calling context no longer owns
768  * the console and is no longer allowed to go forward. In this case it must
769  * back out immediately and carefully. The buffer content is also no longer
770  * trusted since it no longer belongs to the calling context.
771  */
772 bool nbcon_can_proceed(struct nbcon_write_context *wctxt)
773 {
774 	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
775 	struct console *con = ctxt->console;
776 	struct nbcon_state cur;
777 
778 	nbcon_state_read(con, &cur);
779 
780 	return nbcon_context_can_proceed(ctxt, &cur);
781 }
782 EXPORT_SYMBOL_GPL(nbcon_can_proceed);
783 
784 #define nbcon_context_enter_unsafe(c)	__nbcon_context_update_unsafe(c, true)
785 #define nbcon_context_exit_unsafe(c)	__nbcon_context_update_unsafe(c, false)
786 
787 /**
788  * __nbcon_context_update_unsafe - Update the unsafe bit in @con->nbcon_state
789  * @ctxt:	The nbcon context from nbcon_context_try_acquire()
790  * @unsafe:	The new value for the unsafe bit
791  *
792  * Return:	True if the unsafe state was updated and this context still
793  *		owns the console. Otherwise false if ownership was handed
794  *		over or taken.
795  *
796  * This function allows console owners to modify the unsafe status of the
797  * console.
798  *
799  * When this function returns false then the calling context no longer owns
800  * the console and is no longer allowed to go forward. In this case it must
801  * back out immediately and carefully. The buffer content is also no longer
802  * trusted since it no longer belongs to the calling context.
803  *
804  * Internal helper to avoid duplicated code.
805  */
806 static bool __nbcon_context_update_unsafe(struct nbcon_context *ctxt, bool unsafe)
807 {
808 	struct console *con = ctxt->console;
809 	struct nbcon_state cur;
810 	struct nbcon_state new;
811 
812 	nbcon_state_read(con, &cur);
813 
814 	do {
815 		/*
816 		 * The unsafe bit must not be cleared if an
817 		 * unsafe hostile takeover has occurred.
818 		 */
819 		if (!unsafe && cur.unsafe_takeover)
820 			goto out;
821 
822 		if (!nbcon_context_can_proceed(ctxt, &cur))
823 			return false;
824 
825 		new.atom = cur.atom;
826 		new.unsafe = unsafe;
827 	} while (!nbcon_state_try_cmpxchg(con, &cur, &new));
828 
829 	cur.atom = new.atom;
830 out:
831 	return nbcon_context_can_proceed(ctxt, &cur);
832 }
833 
834 static void nbcon_write_context_set_buf(struct nbcon_write_context *wctxt,
835 					char *buf, unsigned int len)
836 {
837 	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
838 	struct console *con = ctxt->console;
839 	struct nbcon_state cur;
840 
841 	wctxt->outbuf = buf;
842 	wctxt->len = len;
843 	nbcon_state_read(con, &cur);
844 	wctxt->unsafe_takeover = cur.unsafe_takeover;
845 }
846 
847 /**
848  * nbcon_enter_unsafe - Enter an unsafe region in the driver
849  * @wctxt:	The write context that was handed to the write function
850  *
851  * Return:	True if this context still owns the console. False if
852  *		ownership was handed over or taken.
853  *
854  * When this function returns false then the calling context no longer owns
855  * the console and is no longer allowed to go forward. In this case it must
856  * back out immediately and carefully. The buffer content is also no longer
857  * trusted since it no longer belongs to the calling context.
858  */
859 bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt)
860 {
861 	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
862 	bool is_owner;
863 
864 	is_owner = nbcon_context_enter_unsafe(ctxt);
865 	if (!is_owner)
866 		nbcon_write_context_set_buf(wctxt, NULL, 0);
867 	return is_owner;
868 }
869 EXPORT_SYMBOL_GPL(nbcon_enter_unsafe);
870 
871 /**
872  * nbcon_exit_unsafe - Exit an unsafe region in the driver
873  * @wctxt:	The write context that was handed to the write function
874  *
875  * Return:	True if this context still owns the console. False if
876  *		ownership was handed over or taken.
877  *
878  * When this function returns false then the calling context no longer owns
879  * the console and is no longer allowed to go forward. In this case it must
880  * back out immediately and carefully. The buffer content is also no longer
881  * trusted since it no longer belongs to the calling context.
882  */
883 bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt)
884 {
885 	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
886 	bool ret;
887 
888 	ret = nbcon_context_exit_unsafe(ctxt);
889 	if (!ret)
890 		nbcon_write_context_set_buf(wctxt, NULL, 0);
891 	return ret;
892 }
893 EXPORT_SYMBOL_GPL(nbcon_exit_unsafe);
894 
895 /**
896  * nbcon_reacquire_nobuf - Reacquire a console after losing ownership
897  *				while printing
898  * @wctxt:	The write context that was handed to the write callback
899  *
900  * Since ownership can be lost at any time due to handover or takeover, a
901  * printing context _must_ be prepared to back out immediately and
902  * carefully. However, there are scenarios where the printing context must
903  * reacquire ownership in order to finalize or revert hardware changes.
904  *
905  * This function allows a printing context to reacquire ownership using the
906  * same priority as its previous ownership.
907  *
908  * Note that after a successful reacquire the printing context will have no
909  * output buffer because that has been lost. This function cannot be used to
910  * resume printing.
911  */
912 void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt)
913 {
914 	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
915 
916 	while (!nbcon_context_try_acquire(ctxt))
917 		cpu_relax();
918 
919 	nbcon_write_context_set_buf(wctxt, NULL, 0);
920 }
921 EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf);
922 
923 /**
924  * nbcon_emit_next_record - Emit a record in the acquired context
925  * @wctxt:	The write context that will be handed to the write function
926  * @use_atomic:	True if the write_atomic() callback is to be used
927  *
928  * Return:	True if this context still owns the console. False if
929  *		ownership was handed over or taken.
930  *
931  * When this function returns false then the calling context no longer owns
932  * the console and is no longer allowed to go forward. In this case it must
933  * back out immediately and carefully. The buffer content is also no longer
934  * trusted since it no longer belongs to the calling context. If the caller
935  * wants to do more it must reacquire the console first.
936  *
937  * When true is returned, @wctxt->ctxt.backlog indicates whether there are
938  * still records pending in the ringbuffer,
939  */
940 static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic)
941 {
942 	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
943 	struct console *con = ctxt->console;
944 	bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED;
945 	struct printk_message pmsg = {
946 		.pbufs = ctxt->pbufs,
947 	};
948 	unsigned long con_dropped;
949 	struct nbcon_state cur;
950 	unsigned long dropped;
951 	unsigned long ulseq;
952 
953 	/*
954 	 * This function should never be called for consoles that have not
955 	 * implemented the necessary callback for writing: i.e. legacy
956 	 * consoles and, when atomic, nbcon consoles with no write_atomic().
957 	 * Handle it as if ownership was lost and try to continue.
958 	 *
959 	 * Note that for nbcon consoles the write_thread() callback is
960 	 * mandatory and was already checked in nbcon_alloc().
961 	 */
962 	if (WARN_ON_ONCE((use_atomic && !con->write_atomic) ||
963 			 !(console_srcu_read_flags(con) & CON_NBCON))) {
964 		nbcon_context_release(ctxt);
965 		return false;
966 	}
967 
968 	/*
969 	 * The printk buffers are filled within an unsafe section. This
970 	 * prevents NBCON_PRIO_NORMAL and NBCON_PRIO_EMERGENCY from
971 	 * clobbering each other.
972 	 */
973 
974 	if (!nbcon_context_enter_unsafe(ctxt))
975 		return false;
976 
977 	ctxt->backlog = printk_get_next_message(&pmsg, ctxt->seq, is_extended, true);
978 	if (!ctxt->backlog)
979 		return nbcon_context_exit_unsafe(ctxt);
980 
981 	/*
982 	 * @con->dropped is not protected in case of an unsafe hostile
983 	 * takeover. In that situation the update can be racy so
984 	 * annotate it accordingly.
985 	 */
986 	con_dropped = data_race(READ_ONCE(con->dropped));
987 
988 	dropped = con_dropped + pmsg.dropped;
989 	if (dropped && !is_extended)
990 		console_prepend_dropped(&pmsg, dropped);
991 
992 	/*
993 	 * If the previous owner was assigned the same record, this context
994 	 * has taken over ownership and is replaying the record. Prepend a
995 	 * message to let the user know the record is replayed.
996 	 */
997 	ulseq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_prev_seq));
998 	if (__ulseq_to_u64seq(prb, ulseq) == pmsg.seq) {
999 		console_prepend_replay(&pmsg);
1000 	} else {
1001 		/*
1002 		 * Ensure this context is still the owner before trying to
1003 		 * update @nbcon_prev_seq. Otherwise the value in @ulseq may
1004 		 * not be from the previous owner and instead be some later
1005 		 * value from the context that took over ownership.
1006 		 */
1007 		nbcon_state_read(con, &cur);
1008 		if (!nbcon_context_can_proceed(ctxt, &cur))
1009 			return false;
1010 
1011 		atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_prev_seq), &ulseq,
1012 					__u64seq_to_ulseq(pmsg.seq));
1013 	}
1014 
1015 	if (!nbcon_context_exit_unsafe(ctxt))
1016 		return false;
1017 
1018 	/* For skipped records just update seq/dropped in @con. */
1019 	if (pmsg.outbuf_len == 0)
1020 		goto update_con;
1021 
1022 	/* Initialize the write context for driver callbacks. */
1023 	nbcon_write_context_set_buf(wctxt, &pmsg.pbufs->outbuf[0], pmsg.outbuf_len);
1024 
1025 	if (use_atomic)
1026 		con->write_atomic(con, wctxt);
1027 	else
1028 		con->write_thread(con, wctxt);
1029 
1030 	if (!wctxt->outbuf) {
1031 		/*
1032 		 * Ownership was lost and reacquired by the driver. Handle it
1033 		 * as if ownership was lost.
1034 		 */
1035 		nbcon_context_release(ctxt);
1036 		return false;
1037 	}
1038 
1039 	/*
1040 	 * Ownership may have been lost but _not_ reacquired by the driver.
1041 	 * This case is detected and handled when entering unsafe to update
1042 	 * dropped/seq values.
1043 	 */
1044 
1045 	/*
1046 	 * Since any dropped message was successfully output, reset the
1047 	 * dropped count for the console.
1048 	 */
1049 	dropped = 0;
1050 update_con:
1051 	/*
1052 	 * The dropped count and the sequence number are updated within an
1053 	 * unsafe section. This limits update races to the panic context and
1054 	 * allows the panic context to win.
1055 	 */
1056 
1057 	if (!nbcon_context_enter_unsafe(ctxt))
1058 		return false;
1059 
1060 	if (dropped != con_dropped) {
1061 		/* Counterpart to the READ_ONCE() above. */
1062 		WRITE_ONCE(con->dropped, dropped);
1063 	}
1064 
1065 	nbcon_seq_try_update(ctxt, pmsg.seq + 1);
1066 
1067 	return nbcon_context_exit_unsafe(ctxt);
1068 }
1069 
1070 /*
1071  * nbcon_emit_one - Print one record for an nbcon console using the
1072  *			specified callback
1073  * @wctxt:	An initialized write context struct to use for this context
1074  * @use_atomic:	True if the write_atomic() callback is to be used
1075  *
1076  * Return:	True, when a record has been printed and there are still
1077  *		pending records. The caller might want to continue flushing.
1078  *
1079  *		False, when there is no pending record, or when the console
1080  *		context cannot be acquired, or the ownership has been lost.
1081  *		The caller should give up. Either the job is done, cannot be
1082  *		done, or will be handled by the owning context.
1083  *
1084  * This is an internal helper to handle the locking of the console before
1085  * calling nbcon_emit_next_record().
1086  */
1087 static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic)
1088 {
1089 	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
1090 	struct console *con = ctxt->console;
1091 	unsigned long flags;
1092 	bool ret = false;
1093 
1094 	if (!use_atomic) {
1095 		con->device_lock(con, &flags);
1096 
1097 		/*
1098 		 * Ensure this stays on the CPU to make handover and
1099 		 * takeover possible.
1100 		 */
1101 		cant_migrate();
1102 	}
1103 
1104 	if (!nbcon_context_try_acquire(ctxt))
1105 		goto out;
1106 
1107 	/*
1108 	 * nbcon_emit_next_record() returns false when the console was
1109 	 * handed over or taken over. In both cases the context is no
1110 	 * longer valid.
1111 	 *
1112 	 * The higher priority printing context takes over responsibility
1113 	 * to print the pending records.
1114 	 */
1115 	if (!nbcon_emit_next_record(wctxt, use_atomic))
1116 		goto out;
1117 
1118 	nbcon_context_release(ctxt);
1119 
1120 	ret = ctxt->backlog;
1121 out:
1122 	if (!use_atomic)
1123 		con->device_unlock(con, flags);
1124 	return ret;
1125 }
1126 
1127 /**
1128  * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup
1129  * @con:	Console to operate on
1130  * @ctxt:	The nbcon context from nbcon_context_try_acquire()
1131  *
1132  * Return:	True if the thread should shutdown or if the console is
1133  *		allowed to print and a record is available. False otherwise.
1134  *
1135  * After the thread wakes up, it must first check if it should shutdown before
1136  * attempting any printing.
1137  */
1138 static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_context *ctxt)
1139 {
1140 	bool ret = false;
1141 	short flags;
1142 	int cookie;
1143 
1144 	if (kthread_should_stop())
1145 		return true;
1146 
1147 	cookie = console_srcu_read_lock();
1148 
1149 	flags = console_srcu_read_flags(con);
1150 	if (console_is_usable(con, flags, false)) {
1151 		/* Bring the sequence in @ctxt up to date */
1152 		ctxt->seq = nbcon_seq_read(con);
1153 
1154 		ret = prb_read_valid(prb, ctxt->seq, NULL);
1155 	}
1156 
1157 	console_srcu_read_unlock(cookie);
1158 	return ret;
1159 }
1160 
1161 /**
1162  * nbcon_kthread_func - The printer thread function
1163  * @__console:	Console to operate on
1164  *
1165  * Return:	0
1166  */
1167 static int nbcon_kthread_func(void *__console)
1168 {
1169 	struct console *con = __console;
1170 	struct nbcon_write_context wctxt = {
1171 		.ctxt.console	= con,
1172 		.ctxt.prio	= NBCON_PRIO_NORMAL,
1173 	};
1174 	struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
1175 	short con_flags;
1176 	bool backlog;
1177 	int cookie;
1178 
1179 wait_for_event:
1180 	/*
1181 	 * Guarantee this task is visible on the rcuwait before
1182 	 * checking the wake condition.
1183 	 *
1184 	 * The full memory barrier within set_current_state() of
1185 	 * ___rcuwait_wait_event() pairs with the full memory
1186 	 * barrier within rcuwait_has_sleeper().
1187 	 *
1188 	 * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A.
1189 	 */
1190 	rcuwait_wait_event(&con->rcuwait,
1191 			   nbcon_kthread_should_wakeup(con, ctxt),
1192 			   TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */
1193 
1194 	do {
1195 		if (kthread_should_stop())
1196 			return 0;
1197 
1198 		backlog = false;
1199 
1200 		/*
1201 		 * Keep the srcu read lock around the entire operation so that
1202 		 * synchronize_srcu() can guarantee that the kthread stopped
1203 		 * or suspended printing.
1204 		 */
1205 		cookie = console_srcu_read_lock();
1206 
1207 		con_flags = console_srcu_read_flags(con);
1208 
1209 		if (console_is_usable(con, con_flags, false))
1210 			backlog = nbcon_emit_one(&wctxt, false);
1211 
1212 		console_srcu_read_unlock(cookie);
1213 
1214 		cond_resched();
1215 
1216 	} while (backlog);
1217 
1218 	goto wait_for_event;
1219 }
1220 
1221 /**
1222  * nbcon_irq_work - irq work to wake console printer thread
1223  * @irq_work:	The irq work to operate on
1224  */
1225 static void nbcon_irq_work(struct irq_work *irq_work)
1226 {
1227 	struct console *con = container_of(irq_work, struct console, irq_work);
1228 
1229 	nbcon_kthread_wake(con);
1230 }
1231 
1232 static inline bool rcuwait_has_sleeper(struct rcuwait *w)
1233 {
1234 	/*
1235 	 * Guarantee any new records can be seen by tasks preparing to wait
1236 	 * before this context checks if the rcuwait is empty.
1237 	 *
1238 	 * This full memory barrier pairs with the full memory barrier within
1239 	 * set_current_state() of ___rcuwait_wait_event(), which is called
1240 	 * after prepare_to_rcuwait() adds the waiter but before it has
1241 	 * checked the wait condition.
1242 	 *
1243 	 * This pairs with nbcon_kthread_func:A.
1244 	 */
1245 	smp_mb(); /* LMM(rcuwait_has_sleeper:A) */
1246 	return rcuwait_active(w);
1247 }
1248 
1249 /**
1250  * nbcon_kthreads_wake - Wake up printing threads using irq_work
1251  */
1252 void nbcon_kthreads_wake(void)
1253 {
1254 	struct console *con;
1255 	int cookie;
1256 
1257 	if (!printk_kthreads_running)
1258 		return;
1259 
1260 	cookie = console_srcu_read_lock();
1261 	for_each_console_srcu(con) {
1262 		if (!(console_srcu_read_flags(con) & CON_NBCON))
1263 			continue;
1264 
1265 		/*
1266 		 * Only schedule irq_work if the printing thread is
1267 		 * actively waiting. If not waiting, the thread will
1268 		 * notice by itself that it has work to do.
1269 		 */
1270 		if (rcuwait_has_sleeper(&con->rcuwait))
1271 			irq_work_queue(&con->irq_work);
1272 	}
1273 	console_srcu_read_unlock(cookie);
1274 }
1275 
1276 /*
1277  * nbcon_kthread_stop - Stop a console printer thread
1278  * @con:	Console to operate on
1279  */
1280 void nbcon_kthread_stop(struct console *con)
1281 {
1282 	lockdep_assert_console_list_lock_held();
1283 
1284 	if (!con->kthread)
1285 		return;
1286 
1287 	kthread_stop(con->kthread);
1288 	con->kthread = NULL;
1289 }
1290 
1291 /**
1292  * nbcon_kthread_create - Create a console printer thread
1293  * @con:	Console to operate on
1294  *
1295  * Return:	True if the kthread was started or already exists.
1296  *		Otherwise false and @con must not be registered.
1297  *
1298  * This function is called when it will be expected that nbcon consoles are
1299  * flushed using the kthread. The messages printed with NBCON_PRIO_NORMAL
1300  * will be no longer flushed by the legacy loop. This is why failure must
1301  * be fatal for console registration.
1302  *
1303  * If @con was already registered and this function fails, @con must be
1304  * unregistered before the global state variable @printk_kthreads_running
1305  * can be set.
1306  */
1307 bool nbcon_kthread_create(struct console *con)
1308 {
1309 	struct task_struct *kt;
1310 
1311 	lockdep_assert_console_list_lock_held();
1312 
1313 	if (con->kthread)
1314 		return true;
1315 
1316 	kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index);
1317 	if (WARN_ON(IS_ERR(kt))) {
1318 		con_printk(KERN_ERR, con, "failed to start printing thread\n");
1319 		return false;
1320 	}
1321 
1322 	con->kthread = kt;
1323 
1324 	/*
1325 	 * It is important that console printing threads are scheduled
1326 	 * shortly after a printk call and with generous runtime budgets.
1327 	 */
1328 	sched_set_normal(con->kthread, -20);
1329 
1330 	return true;
1331 }
1332 
1333 /* Track the nbcon emergency nesting per CPU. */
1334 static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting);
1335 static unsigned int early_nbcon_pcpu_emergency_nesting __initdata;
1336 
1337 /**
1338  * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer
1339  *
1340  * Context:	For reading, any context. For writing, any context which could
1341  *		not be migrated to another CPU.
1342  * Return:	Either a pointer to the per CPU emergency nesting counter of
1343  *		the current CPU or to the init data during early boot.
1344  *
1345  * The function is safe for reading per-CPU variables in any context because
1346  * preemption is disabled if the current CPU is in the emergency state. See
1347  * also nbcon_cpu_emergency_enter().
1348  */
1349 static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void)
1350 {
1351 	/*
1352 	 * The value of __printk_percpu_data_ready gets set in normal
1353 	 * context and before SMP initialization. As a result it could
1354 	 * never change while inside an nbcon emergency section.
1355 	 */
1356 	if (!printk_percpu_data_ready())
1357 		return &early_nbcon_pcpu_emergency_nesting;
1358 
1359 	return raw_cpu_ptr(&nbcon_pcpu_emergency_nesting);
1360 }
1361 
1362 /**
1363  * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon
1364  *				printing on the current CPU
1365  *
1366  * Context:	Any context.
1367  * Return:	The nbcon_prio to use for acquiring an nbcon console in this
1368  *		context for printing.
1369  *
1370  * The function is safe for reading per-CPU data in any context because
1371  * preemption is disabled if the current CPU is in the emergency or panic
1372  * state.
1373  */
1374 enum nbcon_prio nbcon_get_default_prio(void)
1375 {
1376 	unsigned int *cpu_emergency_nesting;
1377 
1378 	if (this_cpu_in_panic())
1379 		return NBCON_PRIO_PANIC;
1380 
1381 	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
1382 	if (*cpu_emergency_nesting)
1383 		return NBCON_PRIO_EMERGENCY;
1384 
1385 	return NBCON_PRIO_NORMAL;
1386 }
1387 
1388 /**
1389  * nbcon_legacy_emit_next_record - Print one record for an nbcon console
1390  *					in legacy contexts
1391  * @con:	The console to print on
1392  * @handover:	Will be set to true if a printk waiter has taken over the
1393  *		console_lock, in which case the caller is no longer holding
1394  *		both the console_lock and the SRCU read lock. Otherwise it
1395  *		is set to false.
1396  * @cookie:	The cookie from the SRCU read lock.
1397  * @use_atomic: Set true when called in an atomic or unknown context.
1398  *		It affects which nbcon callback will be used: write_atomic()
1399  *		or write_thread().
1400  *
1401  *		When false, the write_thread() callback is used and would be
1402  *		called in a preemtible context unless disabled by the
1403  *		device_lock. The legacy handover is not allowed in this mode.
1404  *
1405  * Context:	Any context except NMI.
1406  * Return:	True, when a record has been printed and there are still
1407  *		pending records. The caller might want to continue flushing.
1408  *
1409  *		False, when there is no pending record, or when the console
1410  *		context cannot be acquired, or the ownership has been lost.
1411  *		The caller should give up. Either the job is done, cannot be
1412  *		done, or will be handled by the owning context.
1413  *
1414  * This function is meant to be called by console_flush_all() to print records
1415  * on nbcon consoles from legacy context (printing via console unlocking).
1416  * Essentially it is the nbcon version of console_emit_next_record().
1417  */
1418 bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
1419 				   int cookie, bool use_atomic)
1420 {
1421 	struct nbcon_write_context wctxt = { };
1422 	struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
1423 	unsigned long flags;
1424 	bool progress;
1425 
1426 	ctxt->console	= con;
1427 	ctxt->prio	= nbcon_get_default_prio();
1428 
1429 	if (use_atomic) {
1430 		/*
1431 		 * In an atomic or unknown context, use the same procedure as
1432 		 * in console_emit_next_record(). It allows to handover.
1433 		 */
1434 		printk_safe_enter_irqsave(flags);
1435 		console_lock_spinning_enable();
1436 		stop_critical_timings();
1437 	}
1438 
1439 	progress = nbcon_emit_one(&wctxt, use_atomic);
1440 
1441 	if (use_atomic) {
1442 		start_critical_timings();
1443 		*handover = console_lock_spinning_disable_and_check(cookie);
1444 		printk_safe_exit_irqrestore(flags);
1445 	} else {
1446 		/* Non-atomic does not perform legacy spinning handovers. */
1447 		*handover = false;
1448 	}
1449 
1450 	return progress;
1451 }
1452 
1453 /**
1454  * __nbcon_atomic_flush_pending_con - Flush specified nbcon console using its
1455  *					write_atomic() callback
1456  * @con:			The nbcon console to flush
1457  * @stop_seq:			Flush up until this record
1458  * @allow_unsafe_takeover:	True, to allow unsafe hostile takeovers
1459  *
1460  * Return:	0 if @con was flushed up to @stop_seq Otherwise, error code on
1461  *		failure.
1462  *
1463  * Errors:
1464  *
1465  *	-EPERM:		Unable to acquire console ownership.
1466  *
1467  *	-EAGAIN:	Another context took over ownership while printing.
1468  *
1469  *	-ENOENT:	A record before @stop_seq is not available.
1470  *
1471  * If flushing up to @stop_seq was not successful, it only makes sense for the
1472  * caller to try again when -EAGAIN was returned. When -EPERM is returned,
1473  * this context is not allowed to acquire the console. When -ENOENT is
1474  * returned, it cannot be expected that the unfinalized record will become
1475  * available.
1476  */
1477 static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
1478 					    bool allow_unsafe_takeover)
1479 {
1480 	struct nbcon_write_context wctxt = { };
1481 	struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
1482 	int err = 0;
1483 
1484 	ctxt->console			= con;
1485 	ctxt->spinwait_max_us		= 2000;
1486 	ctxt->prio			= nbcon_get_default_prio();
1487 	ctxt->allow_unsafe_takeover	= allow_unsafe_takeover;
1488 
1489 	if (!nbcon_context_try_acquire(ctxt))
1490 		return -EPERM;
1491 
1492 	while (nbcon_seq_read(con) < stop_seq) {
1493 		/*
1494 		 * nbcon_emit_next_record() returns false when the console was
1495 		 * handed over or taken over. In both cases the context is no
1496 		 * longer valid.
1497 		 */
1498 		if (!nbcon_emit_next_record(&wctxt, true))
1499 			return -EAGAIN;
1500 
1501 		if (!ctxt->backlog) {
1502 			/* Are there reserved but not yet finalized records? */
1503 			if (nbcon_seq_read(con) < stop_seq)
1504 				err = -ENOENT;
1505 			break;
1506 		}
1507 	}
1508 
1509 	nbcon_context_release(ctxt);
1510 	return err;
1511 }
1512 
1513 /**
1514  * nbcon_atomic_flush_pending_con - Flush specified nbcon console using its
1515  *					write_atomic() callback
1516  * @con:			The nbcon console to flush
1517  * @stop_seq:			Flush up until this record
1518  * @allow_unsafe_takeover:	True, to allow unsafe hostile takeovers
1519  *
1520  * This will stop flushing before @stop_seq if another context has ownership.
1521  * That context is then responsible for the flushing. Likewise, if new records
1522  * are added while this context was flushing and there is no other context
1523  * to handle the printing, this context must also flush those records.
1524  */
1525 static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
1526 					   bool allow_unsafe_takeover)
1527 {
1528 	struct console_flush_type ft;
1529 	unsigned long flags;
1530 	int err;
1531 
1532 again:
1533 	/*
1534 	 * Atomic flushing does not use console driver synchronization (i.e.
1535 	 * it does not hold the port lock for uart consoles). Therefore IRQs
1536 	 * must be disabled to avoid being interrupted and then calling into
1537 	 * a driver that will deadlock trying to acquire console ownership.
1538 	 */
1539 	local_irq_save(flags);
1540 
1541 	err = __nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover);
1542 
1543 	local_irq_restore(flags);
1544 
1545 	/*
1546 	 * If there was a new owner (-EPERM, -EAGAIN), that context is
1547 	 * responsible for completing.
1548 	 *
1549 	 * Do not wait for records not yet finalized (-ENOENT) to avoid a
1550 	 * possible deadlock. They will either get flushed by the writer or
1551 	 * eventually skipped on panic CPU.
1552 	 */
1553 	if (err)
1554 		return;
1555 
1556 	/*
1557 	 * If flushing was successful but more records are available, this
1558 	 * context must flush those remaining records if the printer thread
1559 	 * is not available do it.
1560 	 */
1561 	printk_get_console_flush_type(&ft);
1562 	if (!ft.nbcon_offload &&
1563 	    prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
1564 		stop_seq = prb_next_reserve_seq(prb);
1565 		goto again;
1566 	}
1567 }
1568 
1569 /**
1570  * __nbcon_atomic_flush_pending - Flush all nbcon consoles using their
1571  *					write_atomic() callback
1572  * @stop_seq:			Flush up until this record
1573  * @allow_unsafe_takeover:	True, to allow unsafe hostile takeovers
1574  */
1575 static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeover)
1576 {
1577 	struct console *con;
1578 	int cookie;
1579 
1580 	cookie = console_srcu_read_lock();
1581 	for_each_console_srcu(con) {
1582 		short flags = console_srcu_read_flags(con);
1583 
1584 		if (!(flags & CON_NBCON))
1585 			continue;
1586 
1587 		if (!console_is_usable(con, flags, true))
1588 			continue;
1589 
1590 		if (nbcon_seq_read(con) >= stop_seq)
1591 			continue;
1592 
1593 		nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover);
1594 	}
1595 	console_srcu_read_unlock(cookie);
1596 }
1597 
1598 /**
1599  * nbcon_atomic_flush_pending - Flush all nbcon consoles using their
1600  *				write_atomic() callback
1601  *
1602  * Flush the backlog up through the currently newest record. Any new
1603  * records added while flushing will not be flushed if there is another
1604  * context available to handle the flushing. This is to avoid one CPU
1605  * printing unbounded because other CPUs continue to add records.
1606  */
1607 void nbcon_atomic_flush_pending(void)
1608 {
1609 	__nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), false);
1610 }
1611 
1612 /**
1613  * nbcon_atomic_flush_unsafe - Flush all nbcon consoles using their
1614  *	write_atomic() callback and allowing unsafe hostile takeovers
1615  *
1616  * Flush the backlog up through the currently newest record. Unsafe hostile
1617  * takeovers will be performed, if necessary.
1618  */
1619 void nbcon_atomic_flush_unsafe(void)
1620 {
1621 	__nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), true);
1622 }
1623 
1624 /**
1625  * nbcon_cpu_emergency_enter - Enter an emergency section where printk()
1626  *				messages for that CPU are flushed directly
1627  *
1628  * Context:	Any context. Disables preemption.
1629  *
1630  * When within an emergency section, printk() calls will attempt to flush any
1631  * pending messages in the ringbuffer.
1632  */
1633 void nbcon_cpu_emergency_enter(void)
1634 {
1635 	unsigned int *cpu_emergency_nesting;
1636 
1637 	preempt_disable();
1638 
1639 	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
1640 	(*cpu_emergency_nesting)++;
1641 }
1642 
1643 /**
1644  * nbcon_cpu_emergency_exit - Exit an emergency section
1645  *
1646  * Context:	Within an emergency section. Enables preemption.
1647  */
1648 void nbcon_cpu_emergency_exit(void)
1649 {
1650 	unsigned int *cpu_emergency_nesting;
1651 
1652 	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
1653 
1654 	if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0))
1655 		(*cpu_emergency_nesting)--;
1656 
1657 	preempt_enable();
1658 }
1659 
1660 /**
1661  * nbcon_alloc - Allocate and init the nbcon console specific data
1662  * @con:	Console to initialize
1663  *
1664  * Return:	True if the console was fully allocated and initialized.
1665  *		Otherwise @con must not be registered.
1666  *
1667  * When allocation and init was successful, the console must be properly
1668  * freed using nbcon_free() once it is no longer needed.
1669  */
1670 bool nbcon_alloc(struct console *con)
1671 {
1672 	struct nbcon_state state = { };
1673 
1674 	/* The write_thread() callback is mandatory. */
1675 	if (WARN_ON(!con->write_thread))
1676 		return false;
1677 
1678 	rcuwait_init(&con->rcuwait);
1679 	init_irq_work(&con->irq_work, nbcon_irq_work);
1680 	atomic_long_set(&ACCESS_PRIVATE(con, nbcon_prev_seq), -1UL);
1681 	nbcon_state_set(con, &state);
1682 
1683 	/*
1684 	 * Initialize @nbcon_seq to the highest possible sequence number so
1685 	 * that practically speaking it will have nothing to print until a
1686 	 * desired initial sequence number has been set via nbcon_seq_force().
1687 	 */
1688 	atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), ULSEQ_MAX(prb));
1689 
1690 	if (con->flags & CON_BOOT) {
1691 		/*
1692 		 * Boot console printing is synchronized with legacy console
1693 		 * printing, so boot consoles can share the same global printk
1694 		 * buffers.
1695 		 */
1696 		con->pbufs = &printk_shared_pbufs;
1697 	} else {
1698 		con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL);
1699 		if (!con->pbufs) {
1700 			con_printk(KERN_ERR, con, "failed to allocate printing buffer\n");
1701 			return false;
1702 		}
1703 
1704 		if (printk_kthreads_running) {
1705 			if (!nbcon_kthread_create(con)) {
1706 				kfree(con->pbufs);
1707 				con->pbufs = NULL;
1708 				return false;
1709 			}
1710 		}
1711 	}
1712 
1713 	return true;
1714 }
1715 
1716 /**
1717  * nbcon_free - Free and cleanup the nbcon console specific data
1718  * @con:	Console to free/cleanup nbcon data
1719  */
1720 void nbcon_free(struct console *con)
1721 {
1722 	struct nbcon_state state = { };
1723 
1724 	if (printk_kthreads_running)
1725 		nbcon_kthread_stop(con);
1726 
1727 	nbcon_state_set(con, &state);
1728 
1729 	/* Boot consoles share global printk buffers. */
1730 	if (!(con->flags & CON_BOOT))
1731 		kfree(con->pbufs);
1732 
1733 	con->pbufs = NULL;
1734 }
1735 
1736 /**
1737  * nbcon_device_try_acquire - Try to acquire nbcon console and enter unsafe
1738  *				section
1739  * @con:	The nbcon console to acquire
1740  *
1741  * Context:	Under the locking mechanism implemented in
1742  *		@con->device_lock() including disabling migration.
1743  * Return:	True if the console was acquired. False otherwise.
1744  *
1745  * Console drivers will usually use their own internal synchronization
1746  * mechasism to synchronize between console printing and non-printing
1747  * activities (such as setting baud rates). However, nbcon console drivers
1748  * supporting atomic consoles may also want to mark unsafe sections when
1749  * performing non-printing activities in order to synchronize against their
1750  * atomic_write() callback.
1751  *
1752  * This function acquires the nbcon console using priority NBCON_PRIO_NORMAL
1753  * and marks it unsafe for handover/takeover.
1754  */
1755 bool nbcon_device_try_acquire(struct console *con)
1756 {
1757 	struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt);
1758 
1759 	cant_migrate();
1760 
1761 	memset(ctxt, 0, sizeof(*ctxt));
1762 	ctxt->console	= con;
1763 	ctxt->prio	= NBCON_PRIO_NORMAL;
1764 
1765 	if (!nbcon_context_try_acquire(ctxt))
1766 		return false;
1767 
1768 	if (!nbcon_context_enter_unsafe(ctxt))
1769 		return false;
1770 
1771 	return true;
1772 }
1773 EXPORT_SYMBOL_GPL(nbcon_device_try_acquire);
1774 
1775 /**
1776  * nbcon_device_release - Exit unsafe section and release the nbcon console
1777  * @con:	The nbcon console acquired in nbcon_device_try_acquire()
1778  */
1779 void nbcon_device_release(struct console *con)
1780 {
1781 	struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt);
1782 	struct console_flush_type ft;
1783 	int cookie;
1784 
1785 	if (!nbcon_context_exit_unsafe(ctxt))
1786 		return;
1787 
1788 	nbcon_context_release(ctxt);
1789 
1790 	/*
1791 	 * This context must flush any new records added while the console
1792 	 * was locked if the printer thread is not available to do it. The
1793 	 * console_srcu_read_lock must be taken to ensure the console is
1794 	 * usable throughout flushing.
1795 	 */
1796 	cookie = console_srcu_read_lock();
1797 	printk_get_console_flush_type(&ft);
1798 	if (console_is_usable(con, console_srcu_read_flags(con), true) &&
1799 	    !ft.nbcon_offload &&
1800 	    prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
1801 		/*
1802 		 * If nbcon_atomic flushing is not available, fallback to
1803 		 * using the legacy loop.
1804 		 */
1805 		if (ft.nbcon_atomic) {
1806 			__nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false);
1807 		} else if (ft.legacy_direct) {
1808 			if (console_trylock())
1809 				console_unlock();
1810 		} else if (ft.legacy_offload) {
1811 			printk_trigger_flush();
1812 		}
1813 	}
1814 	console_srcu_read_unlock(cookie);
1815 }
1816 EXPORT_SYMBOL_GPL(nbcon_device_release);
1817