xref: /linux/kernel/printk/printk_ringbuffer.h (revision 60325c27d3cfe13466f6d6aa882b11bdd1c58cc8)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 
3 #ifndef _KERNEL_PRINTK_RINGBUFFER_H
4 #define _KERNEL_PRINTK_RINGBUFFER_H
5 
6 #include <linux/atomic.h>
7 #include <linux/bits.h>
8 #include <linux/dev_printk.h>
9 #include <linux/stddef.h>
10 #include <linux/types.h>
11 
12 /*
13  * Meta information about each stored message.
14  *
15  * All fields are set by the printk code except for @seq, which is
16  * set by the ringbuffer code.
17  */
18 struct printk_info {
19 	u64	seq;		/* sequence number */
20 	u64	ts_nsec;	/* timestamp in nanoseconds */
21 	u16	text_len;	/* length of text message */
22 	u8	facility;	/* syslog facility */
23 	u8	flags:5;	/* internal record flags */
24 	u8	level:3;	/* syslog level */
25 	u32	caller_id;	/* thread id or processor id */
26 #ifdef CONFIG_PRINTK_EXECUTION_CTX
27 	u32	caller_id2;	/* caller_id complement */
28 	/* name of the task that generated the message */
29 	char	comm[TASK_COMM_LEN];
30 #endif
31 
32 	struct dev_printk_info	dev_info;
33 };
34 
35 /*
36  * A structure providing the buffers, used by writers and readers.
37  *
38  * Writers:
39  * Using prb_rec_init_wr(), a writer sets @text_buf_size before calling
40  * prb_reserve(). On success, prb_reserve() sets @info and @text_buf to
41  * buffers reserved for that writer.
42  *
43  * Readers:
44  * Using prb_rec_init_rd(), a reader sets all fields before calling
45  * prb_read_valid(). Note that the reader provides the @info and @text_buf,
46  * buffers. On success, the struct pointed to by @info will be filled and
47  * the char array pointed to by @text_buf will be filled with text data.
48  */
49 struct printk_record {
50 	struct printk_info	*info;
51 	char			*text_buf;
52 	unsigned int		text_buf_size;
53 };
54 
55 /* Specifies the logical position and span of a data block. */
56 struct prb_data_blk_lpos {
57 	unsigned long	begin;
58 	unsigned long	next;
59 };
60 
61 /*
62  * A descriptor: the complete meta-data for a record.
63  *
64  * @state_var: A bitwise combination of descriptor ID and descriptor state.
65  */
66 struct prb_desc {
67 	atomic_long_t			state_var;
68 	struct prb_data_blk_lpos	text_blk_lpos;
69 };
70 
71 /* A ringbuffer of "ID + data" elements. */
72 struct prb_data_ring {
73 	unsigned int	size_bits;
74 	char		*data;
75 	atomic_long_t	head_lpos;
76 	atomic_long_t	tail_lpos;
77 };
78 
79 /* A ringbuffer of "struct prb_desc" elements. */
80 struct prb_desc_ring {
81 	unsigned int		count_bits;
82 	struct prb_desc		*descs;
83 	struct printk_info	*infos;
84 	atomic_long_t		head_id;
85 	atomic_long_t		tail_id;
86 	atomic_long_t		last_finalized_seq;
87 };
88 
89 /*
90  * The high level structure representing the printk ringbuffer.
91  *
92  * @fail: Count of failed prb_reserve() calls where not even a data-less
93  *        record was created.
94  */
95 struct printk_ringbuffer {
96 	struct prb_desc_ring	desc_ring;
97 	struct prb_data_ring	text_data_ring;
98 	atomic_long_t		fail;
99 };
100 
101 /*
102  * Used by writers as a reserve/commit handle.
103  *
104  * @rb:         Ringbuffer where the entry is reserved.
105  * @irqflags:   Saved irq flags to restore on entry commit.
106  * @id:         ID of the reserved descriptor.
107  * @text_space: Total occupied buffer space in the text data ring, including
108  *              ID, alignment padding, and wrapping data blocks.
109  *
110  * This structure is an opaque handle for writers. Its contents are only
111  * to be used by the ringbuffer implementation.
112  */
113 struct prb_reserved_entry {
114 	struct printk_ringbuffer	*rb;
115 	unsigned long			irqflags;
116 	unsigned long			id;
117 	unsigned int			text_space;
118 };
119 
120 /* The possible responses of a descriptor state-query. */
121 enum desc_state {
122 	desc_miss	=  -1,	/* ID mismatch (pseudo state) */
123 	desc_reserved	= 0x0,	/* reserved, in use by writer */
124 	desc_committed	= 0x1,	/* committed by writer, could get reopened */
125 	desc_finalized	= 0x2,	/* committed, no further modification allowed */
126 	desc_reusable	= 0x3,	/* free, not yet used by any writer */
127 };
128 
129 #define _DATA_SIZE(sz_bits)	(1UL << (sz_bits))
130 #define _DESCS_COUNT(ct_bits)	(1U << (ct_bits))
131 #define DESC_SV_BITS		BITS_PER_LONG
132 #define DESC_FLAGS_SHIFT	(DESC_SV_BITS - 2)
133 #define DESC_FLAGS_MASK		(3UL << DESC_FLAGS_SHIFT)
134 #define DESC_STATE(sv)		(3UL & (sv >> DESC_FLAGS_SHIFT))
135 #define DESC_SV(id, state)	(((unsigned long)state << DESC_FLAGS_SHIFT) | id)
136 #define DESC_ID_MASK		(~DESC_FLAGS_MASK)
137 #define DESC_ID(sv)		((sv) & DESC_ID_MASK)
138 
139 /*
140  * Special data block logical position values (for fields of
141  * @prb_desc.text_blk_lpos).
142  *
143  * - Bit0 is used to identify if the record has no data block. (Implemented in
144  *   the LPOS_DATALESS() macro.)
145  *
146  * - Bit1 specifies the reason for not having a data block.
147  *
148  * These special values could never be real lpos values because of the
149  * meta data and alignment padding of data blocks. (See to_blk_size() for
150  * details.)
151  */
152 #define FAILED_LPOS		0x1
153 #define EMPTY_LINE_LPOS		0x3
154 
155 #define FAILED_BLK_LPOS	\
156 {				\
157 	.begin	= FAILED_LPOS,	\
158 	.next	= FAILED_LPOS,	\
159 }
160 
161 /*
162  * Descriptor Bootstrap
163  *
164  * The descriptor array is minimally initialized to allow immediate usage
165  * by readers and writers. The requirements that the descriptor array
166  * initialization must satisfy:
167  *
168  *   Req1
169  *     The tail must point to an existing (committed or reusable) descriptor.
170  *     This is required by the implementation of prb_first_seq().
171  *
172  *   Req2
173  *     Readers must see that the ringbuffer is initially empty.
174  *
175  *   Req3
176  *     The first record reserved by a writer is assigned sequence number 0.
177  *
178  * To satisfy Req1, the tail initially points to a descriptor that is
179  * minimally initialized (having no data block, i.e. data-less with the
180  * data block's lpos @begin and @next values set to FAILED_LPOS).
181  *
182  * To satisfy Req2, the initial tail descriptor is initialized to the
183  * reusable state. Readers recognize reusable descriptors as existing
184  * records, but skip over them.
185  *
186  * To satisfy Req3, the last descriptor in the array is used as the initial
187  * head (and tail) descriptor. This allows the first record reserved by a
188  * writer (head + 1) to be the first descriptor in the array. (Only the first
189  * descriptor in the array could have a valid sequence number of 0.)
190  *
191  * The first time a descriptor is reserved, it is assigned a sequence number
192  * with the value of the array index. A "first time reserved" descriptor can
193  * be recognized because it has a sequence number of 0 but does not have an
194  * index of 0. (Only the first descriptor in the array could have a valid
195  * sequence number of 0.) After the first reservation, all future reservations
196  * (recycling) simply involve incrementing the sequence number by the array
197  * count.
198  *
199  *   Hack #1
200  *     Only the first descriptor in the array is allowed to have the sequence
201  *     number 0. In this case it is not possible to recognize if it is being
202  *     reserved the first time (set to index value) or has been reserved
203  *     previously (increment by the array count). This is handled by _always_
204  *     incrementing the sequence number by the array count when reserving the
205  *     first descriptor in the array. In order to satisfy Req3, the sequence
206  *     number of the first descriptor in the array is initialized to minus
207  *     the array count. Then, upon the first reservation, it is incremented
208  *     to 0, thus satisfying Req3.
209  *
210  *   Hack #2
211  *     prb_first_seq() can be called at any time by readers to retrieve the
212  *     sequence number of the tail descriptor. However, due to Req2 and Req3,
213  *     initially there are no records to report the sequence number of
214  *     (sequence numbers are u64 and there is nothing less than 0). To handle
215  *     this, the sequence number of the initial tail descriptor is initialized
216  *     to 0. Technically this is incorrect, because there is no record with
217  *     sequence number 0 (yet) and the tail descriptor is not the first
218  *     descriptor in the array. But it allows prb_read_valid() to correctly
219  *     report the existence of a record for _any_ given sequence number at all
220  *     times. Bootstrapping is complete when the tail is pushed the first
221  *     time, thus finally pointing to the first descriptor reserved by a
222  *     writer, which has the assigned sequence number 0.
223  */
224 
225 /*
226  * Initiating Logical Value Overflows
227  *
228  * Both logical position (lpos) and ID values can be mapped to array indexes
229  * but may experience overflows during the lifetime of the system. To ensure
230  * that printk_ringbuffer can handle the overflows for these types, initial
231  * values are chosen that map to the correct initial array indexes, but will
232  * result in overflows soon.
233  *
234  *   BLK0_LPOS
235  *     The initial @head_lpos and @tail_lpos for data rings. It is at index
236  *     0 and the lpos value is such that it will overflow on the first wrap.
237  *
238  *   DESC0_ID
239  *     The initial @head_id and @tail_id for the desc ring. It is at the last
240  *     index of the descriptor array (see Req3 above) and the ID value is such
241  *     that it will overflow on the second wrap.
242  */
243 #define BLK0_LPOS(sz_bits)	(-(_DATA_SIZE(sz_bits)))
244 #define DESC0_ID(ct_bits)	DESC_ID(-(_DESCS_COUNT(ct_bits) + 1))
245 #define DESC0_SV(ct_bits)	DESC_SV(DESC0_ID(ct_bits), desc_reusable)
246 
247 /*
248  * Define a ringbuffer with an external text data buffer. The same as
249  * DEFINE_PRINTKRB() but requires specifying an external buffer for the
250  * text data.
251  *
252  * Note: The specified external buffer must be of the size:
253  *       2 ^ (descbits + avgtextbits)
254  */
255 #define _DEFINE_PRINTKRB(name, descbits, avgtextbits, text_buf)			\
256 static struct prb_desc _##name##_descs[_DESCS_COUNT(descbits)] = {				\
257 	/* the initial head and tail */								\
258 	[_DESCS_COUNT(descbits) - 1] = {							\
259 		/* reusable */									\
260 		.state_var	= ATOMIC_INIT(DESC0_SV(descbits)),				\
261 		/* no associated data block */							\
262 		.text_blk_lpos	= FAILED_BLK_LPOS,						\
263 	},											\
264 };												\
265 static struct printk_info _##name##_infos[_DESCS_COUNT(descbits)] = {				\
266 	/* this will be the first record reserved by a writer */				\
267 	[0] = {											\
268 		/* will be incremented to 0 on the first reservation */				\
269 		.seq = -(u64)_DESCS_COUNT(descbits),						\
270 	},											\
271 	/* the initial head and tail */								\
272 	[_DESCS_COUNT(descbits) - 1] = {							\
273 		/* reports the first seq value during the bootstrap phase */			\
274 		.seq = 0,									\
275 	},											\
276 };												\
277 static struct printk_ringbuffer name = {							\
278 	.desc_ring = {										\
279 		.count_bits	= descbits,							\
280 		.descs		= &_##name##_descs[0],						\
281 		.infos		= &_##name##_infos[0],						\
282 		.head_id	= ATOMIC_INIT(DESC0_ID(descbits)),				\
283 		.tail_id	= ATOMIC_INIT(DESC0_ID(descbits)),				\
284 		.last_finalized_seq = ATOMIC_INIT(0),						\
285 	},											\
286 	.text_data_ring = {									\
287 		.size_bits	= (avgtextbits) + (descbits),					\
288 		.data		= text_buf,							\
289 		.head_lpos	= ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))),	\
290 		.tail_lpos	= ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))),	\
291 	},											\
292 	.fail			= ATOMIC_LONG_INIT(0),						\
293 }
294 
295 /**
296  * DEFINE_PRINTKRB() - Define a ringbuffer.
297  *
298  * @name:        The name of the ringbuffer variable.
299  * @descbits:    The number of descriptors as a power-of-2 value.
300  * @avgtextbits: The average text data size per record as a power-of-2 value.
301  *
302  * This is a macro for defining a ringbuffer and all internal structures
303  * such that it is ready for immediate use. See _DEFINE_PRINTKRB() for a
304  * variant where the text data buffer can be specified externally.
305  */
306 #define DEFINE_PRINTKRB(name, descbits, avgtextbits)				\
307 static char _##name##_text[1U << ((avgtextbits) + (descbits))]			\
308 			__aligned(__alignof__(unsigned long));			\
309 _DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0])
310 
311 /* Writer Interface */
312 
313 /**
314  * prb_rec_init_wr() - Initialize a buffer for writing records.
315  *
316  * @r:             The record to initialize.
317  * @text_buf_size: The needed text buffer size.
318  */
319 static inline void prb_rec_init_wr(struct printk_record *r,
320 				   unsigned int text_buf_size)
321 {
322 	r->info = NULL;
323 	r->text_buf = NULL;
324 	r->text_buf_size = text_buf_size;
325 }
326 
327 bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
328 		 struct printk_record *r);
329 bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
330 			 struct printk_record *r, u32 caller_id, unsigned int max_size);
331 void prb_commit(struct prb_reserved_entry *e);
332 void prb_final_commit(struct prb_reserved_entry *e);
333 
334 void prb_init(struct printk_ringbuffer *rb,
335 	      char *text_buf, unsigned int text_buf_size,
336 	      struct prb_desc *descs, unsigned int descs_count_bits,
337 	      struct printk_info *infos);
338 unsigned int prb_record_text_space(struct prb_reserved_entry *e);
339 
340 /* Reader Interface */
341 
342 /**
343  * prb_rec_init_rd() - Initialize a buffer for reading records.
344  *
345  * @r:             The record to initialize.
346  * @info:          A buffer to store record meta-data.
347  * @text_buf:      A buffer to store text data.
348  * @text_buf_size: The size of @text_buf.
349  *
350  * Initialize all the fields that a reader is interested in. All arguments
351  * (except @r) are optional. Only record data for arguments that are
352  * non-NULL or non-zero will be read.
353  */
354 static inline void prb_rec_init_rd(struct printk_record *r,
355 				   struct printk_info *info,
356 				   char *text_buf, unsigned int text_buf_size)
357 {
358 	r->info = info;
359 	r->text_buf = text_buf;
360 	r->text_buf_size = text_buf_size;
361 }
362 
363 /**
364  * prb_for_each_record() - Iterate over the records of a ringbuffer.
365  *
366  * @from: The sequence number to begin with.
367  * @rb:   The ringbuffer to iterate over.
368  * @s:    A u64 to store the sequence number on each iteration.
369  * @r:    A printk_record to store the record on each iteration.
370  *
371  * This is a macro for conveniently iterating over a ringbuffer.
372  * Note that @s may not be the sequence number of the record on each
373  * iteration. For the sequence number, @r->info->seq should be checked.
374  *
375  * Context: Any context.
376  */
377 #define prb_for_each_record(from, rb, s, r) \
378 for ((s) = from; prb_read_valid(rb, s, r); (s) = (r)->info->seq + 1)
379 
380 /**
381  * prb_for_each_info() - Iterate over the meta data of a ringbuffer.
382  *
383  * @from: The sequence number to begin with.
384  * @rb:   The ringbuffer to iterate over.
385  * @s:    A u64 to store the sequence number on each iteration.
386  * @i:    A printk_info to store the record meta data on each iteration.
387  * @lc:   An unsigned int to store the text line count of each record.
388  *
389  * This is a macro for conveniently iterating over a ringbuffer.
390  * Note that @s may not be the sequence number of the record on each
391  * iteration. For the sequence number, @r->info->seq should be checked.
392  *
393  * Context: Any context.
394  */
395 #define prb_for_each_info(from, rb, s, i, lc) \
396 for ((s) = from; prb_read_valid_info(rb, s, i, lc); (s) = (i)->seq + 1)
397 
398 bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
399 		    struct printk_record *r);
400 bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
401 			 struct printk_info *info, unsigned int *line_count);
402 
403 u64 prb_first_seq(struct printk_ringbuffer *rb);
404 u64 prb_first_valid_seq(struct printk_ringbuffer *rb);
405 u64 prb_next_seq(struct printk_ringbuffer *rb);
406 u64 prb_next_reserve_seq(struct printk_ringbuffer *rb);
407 
408 #ifdef CONFIG_64BIT
409 
410 #define __u64seq_to_ulseq(u64seq) (u64seq)
411 #define __ulseq_to_u64seq(rb, ulseq) (ulseq)
412 #define ULSEQ_MAX(rb) (-1)
413 
414 #else /* CONFIG_64BIT */
415 
416 #define __u64seq_to_ulseq(u64seq) ((u32)u64seq)
417 #define ULSEQ_MAX(rb) __u64seq_to_ulseq(prb_first_seq(rb) + 0x80000000UL)
418 
419 static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq)
420 {
421 	u64 rb_first_seq = prb_first_seq(rb);
422 	u64 seq;
423 
424 	/*
425 	 * The provided sequence is only the lower 32 bits of the ringbuffer
426 	 * sequence. It needs to be expanded to 64bit. Get the first sequence
427 	 * number from the ringbuffer and fold it.
428 	 *
429 	 * Having a 32bit representation in the console is sufficient.
430 	 * If a console ever gets more than 2^31 records behind
431 	 * the ringbuffer then this is the least of the problems.
432 	 *
433 	 * Also the access to the ring buffer is always safe.
434 	 */
435 	seq = rb_first_seq - (s32)((u32)rb_first_seq - ulseq);
436 
437 	return seq;
438 }
439 
440 #endif /* CONFIG_64BIT */
441 
442 #endif /* _KERNEL_PRINTK_RINGBUFFER_H */
443