xref: /linux/kernel/trace/simple_ring_buffer.c (revision 34e5b958bdad0f9cf16306368bbc2dc5b2a50143)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2025 - Google LLC
4  * Author: Vincent Donnefort <vdonnefort@google.com>
5  */
6 
7 #include <linux/atomic.h>
8 #include <linux/simple_ring_buffer.h>
9 
10 #include <asm/barrier.h>
11 #include <asm/local.h>
12 
13 enum simple_rb_link_type {
14 	SIMPLE_RB_LINK_NORMAL		= 0,
15 	SIMPLE_RB_LINK_HEAD		= 1,
16 	SIMPLE_RB_LINK_HEAD_MOVING
17 };
18 
19 #define SIMPLE_RB_LINK_MASK ~(SIMPLE_RB_LINK_HEAD | SIMPLE_RB_LINK_HEAD_MOVING)
20 
21 static void simple_bpage_set_head_link(struct simple_buffer_page *bpage)
22 {
23 	unsigned long link = (unsigned long)bpage->link.next;
24 
25 	link &= SIMPLE_RB_LINK_MASK;
26 	link |= SIMPLE_RB_LINK_HEAD;
27 
28 	/*
29 	 * Paired with simple_rb_find_head() to order access between the head
30 	 * link and overrun. It ensures we always report an up-to-date value
31 	 * after swapping the reader page.
32 	 */
33 	smp_store_release(&bpage->link.next, (struct list_head *)link);
34 }
35 
36 static bool simple_bpage_unset_head_link(struct simple_buffer_page *bpage,
37 					 struct simple_buffer_page *dst,
38 					 enum simple_rb_link_type new_type)
39 {
40 	unsigned long *link = (unsigned long *)(&bpage->link.next);
41 	unsigned long old = (*link & SIMPLE_RB_LINK_MASK) | SIMPLE_RB_LINK_HEAD;
42 	unsigned long new = (unsigned long)(&dst->link) | new_type;
43 
44 	return try_cmpxchg(link, &old, new);
45 }
46 
47 static void simple_bpage_set_normal_link(struct simple_buffer_page *bpage)
48 {
49 	unsigned long link = (unsigned long)bpage->link.next;
50 
51 	WRITE_ONCE(bpage->link.next, (struct list_head *)(link & SIMPLE_RB_LINK_MASK));
52 }
53 
54 static struct simple_buffer_page *simple_bpage_from_link(struct list_head *link)
55 {
56 	unsigned long ptr = (unsigned long)link & SIMPLE_RB_LINK_MASK;
57 
58 	return container_of((struct list_head *)ptr, struct simple_buffer_page, link);
59 }
60 
61 static struct simple_buffer_page *simple_bpage_next_page(struct simple_buffer_page *bpage)
62 {
63 	return simple_bpage_from_link(bpage->link.next);
64 }
65 
66 static void simple_bpage_reset(struct simple_buffer_page *bpage)
67 {
68 	bpage->write = 0;
69 	bpage->entries = 0;
70 
71 	local_set(&bpage->page->commit, 0);
72 }
73 
74 static void simple_bpage_init(struct simple_buffer_page *bpage, unsigned long page)
75 {
76 	INIT_LIST_HEAD(&bpage->link);
77 	bpage->page = (struct buffer_data_page *)page;
78 
79 	simple_bpage_reset(bpage);
80 }
81 
82 #define simple_rb_meta_inc(__meta, __inc)		\
83 	WRITE_ONCE((__meta), (__meta + __inc))
84 
85 static bool simple_rb_loaded(struct simple_rb_per_cpu *cpu_buffer)
86 {
87 	return !!cpu_buffer->bpages;
88 }
89 
90 static int simple_rb_find_head(struct simple_rb_per_cpu *cpu_buffer)
91 {
92 	int retry = cpu_buffer->nr_pages * 2;
93 	struct simple_buffer_page *head;
94 
95 	head = cpu_buffer->head_page;
96 
97 	while (retry--) {
98 		unsigned long link;
99 
100 spin:
101 		/* See smp_store_release in simple_bpage_set_head_link() */
102 		link = (unsigned long)smp_load_acquire(&head->link.prev->next);
103 
104 		switch (link & ~SIMPLE_RB_LINK_MASK) {
105 		/* Found the head */
106 		case SIMPLE_RB_LINK_HEAD:
107 			cpu_buffer->head_page = head;
108 			return 0;
109 		/* The writer caught the head, we can spin, that won't be long */
110 		case SIMPLE_RB_LINK_HEAD_MOVING:
111 			goto spin;
112 		}
113 
114 		head = simple_bpage_next_page(head);
115 	}
116 
117 	return -EBUSY;
118 }
119 
120 /**
121  * simple_ring_buffer_swap_reader_page - Swap ring-buffer head with the reader
122  * @cpu_buffer: A simple_rb_per_cpu
123  *
124  * This function enables consuming reading. It ensures the current head page will not be overwritten
125  * and can be safely read.
126  *
127  * Returns 0 on success, -ENODEV if @cpu_buffer was unloaded or -EBUSY if we failed to catch the
128  * head page.
129  */
130 int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer)
131 {
132 	struct simple_buffer_page *last, *head, *reader;
133 	unsigned long overrun;
134 	int retry = 8;
135 	int ret;
136 
137 	if (!simple_rb_loaded(cpu_buffer))
138 		return -ENODEV;
139 
140 	reader = cpu_buffer->reader_page;
141 
142 	do {
143 		/* Run after the writer to find the head */
144 		ret = simple_rb_find_head(cpu_buffer);
145 		if (ret)
146 			return ret;
147 
148 		head = cpu_buffer->head_page;
149 
150 		/* Connect the reader page around the header page */
151 		reader->link.next = head->link.next;
152 		reader->link.prev = head->link.prev;
153 
154 		/* The last page before the head */
155 		last = simple_bpage_from_link(head->link.prev);
156 
157 		/* The reader page points to the new header page */
158 		simple_bpage_set_head_link(reader);
159 
160 		overrun = cpu_buffer->meta->overrun;
161 	} while (!simple_bpage_unset_head_link(last, reader, SIMPLE_RB_LINK_NORMAL) && retry--);
162 
163 	if (!retry)
164 		return -EINVAL;
165 
166 	cpu_buffer->head_page = simple_bpage_from_link(reader->link.next);
167 	cpu_buffer->head_page->link.prev = &reader->link;
168 	cpu_buffer->reader_page = head;
169 	cpu_buffer->meta->reader.lost_events = overrun - cpu_buffer->last_overrun;
170 	cpu_buffer->meta->reader.id = cpu_buffer->reader_page->id;
171 	cpu_buffer->last_overrun = overrun;
172 
173 	return 0;
174 }
175 EXPORT_SYMBOL_GPL(simple_ring_buffer_swap_reader_page);
176 
177 static struct simple_buffer_page *simple_rb_move_tail(struct simple_rb_per_cpu *cpu_buffer)
178 {
179 	struct simple_buffer_page *tail, *new_tail;
180 
181 	tail = cpu_buffer->tail_page;
182 	new_tail = simple_bpage_next_page(tail);
183 
184 	if (simple_bpage_unset_head_link(tail, new_tail, SIMPLE_RB_LINK_HEAD_MOVING)) {
185 		/*
186 		 * Oh no! we've caught the head. There is none anymore and
187 		 * swap_reader will spin until we set the new one. Overrun must
188 		 * be written first, to make sure we report the correct number
189 		 * of lost events.
190 		 */
191 		simple_rb_meta_inc(cpu_buffer->meta->overrun, new_tail->entries);
192 		simple_rb_meta_inc(cpu_buffer->meta->pages_lost, 1);
193 
194 		simple_bpage_set_head_link(new_tail);
195 		simple_bpage_set_normal_link(tail);
196 	}
197 
198 	simple_bpage_reset(new_tail);
199 	cpu_buffer->tail_page = new_tail;
200 
201 	simple_rb_meta_inc(cpu_buffer->meta->pages_touched, 1);
202 
203 	return new_tail;
204 }
205 
206 static unsigned long rb_event_size(unsigned long length)
207 {
208 	struct ring_buffer_event *event;
209 
210 	return length + RB_EVNT_HDR_SIZE + sizeof(event->array[0]);
211 }
212 
213 static struct ring_buffer_event *
214 rb_event_add_ts_extend(struct ring_buffer_event *event, u64 delta)
215 {
216 	event->type_len = RINGBUF_TYPE_TIME_EXTEND;
217 	event->time_delta = delta & TS_MASK;
218 	event->array[0] = delta >> TS_SHIFT;
219 
220 	return (struct ring_buffer_event *)((unsigned long)event + 8);
221 }
222 
223 static struct ring_buffer_event *
224 simple_rb_reserve_next(struct simple_rb_per_cpu *cpu_buffer, unsigned long length, u64 timestamp)
225 {
226 	unsigned long ts_ext_size = 0, event_size = rb_event_size(length);
227 	struct simple_buffer_page *tail = cpu_buffer->tail_page;
228 	struct ring_buffer_event *event;
229 	u32 write, prev_write;
230 	u64 time_delta;
231 
232 	time_delta = timestamp - cpu_buffer->write_stamp;
233 
234 	if (test_time_stamp(time_delta))
235 		ts_ext_size = 8;
236 
237 	prev_write = tail->write;
238 	write = prev_write + event_size + ts_ext_size;
239 
240 	if (unlikely(write > (PAGE_SIZE - BUF_PAGE_HDR_SIZE)))
241 		tail = simple_rb_move_tail(cpu_buffer);
242 
243 	if (!tail->entries) {
244 		tail->page->time_stamp = timestamp;
245 		time_delta = 0;
246 		ts_ext_size = 0;
247 		write = event_size;
248 		prev_write = 0;
249 	}
250 
251 	tail->write = write;
252 	tail->entries++;
253 
254 	cpu_buffer->write_stamp = timestamp;
255 
256 	event = (struct ring_buffer_event *)(tail->page->data + prev_write);
257 	if (ts_ext_size) {
258 		event = rb_event_add_ts_extend(event, time_delta);
259 		time_delta = 0;
260 	}
261 
262 	event->type_len = 0;
263 	event->time_delta = time_delta;
264 	event->array[0] = event_size - RB_EVNT_HDR_SIZE;
265 
266 	return event;
267 }
268 
269 /**
270  * simple_ring_buffer_reserve - Reserve an entry in @cpu_buffer
271  * @cpu_buffer:	A simple_rb_per_cpu
272  * @length:	Size of the entry in bytes
273  * @timestamp:	Timestamp of the entry
274  *
275  * Returns the address of the entry where to write data or NULL
276  */
277 void *simple_ring_buffer_reserve(struct simple_rb_per_cpu *cpu_buffer, unsigned long length,
278 				 u64 timestamp)
279 {
280 	struct ring_buffer_event *rb_event;
281 
282 	if (cmpxchg(&cpu_buffer->status, SIMPLE_RB_READY, SIMPLE_RB_WRITING) != SIMPLE_RB_READY)
283 		return NULL;
284 
285 	rb_event = simple_rb_reserve_next(cpu_buffer, length, timestamp);
286 
287 	return &rb_event->array[1];
288 }
289 EXPORT_SYMBOL_GPL(simple_ring_buffer_reserve);
290 
291 /**
292  * simple_ring_buffer_commit - Commit the entry reserved with simple_ring_buffer_reserve()
293  * @cpu_buffer:	The simple_rb_per_cpu where the entry has been reserved
294  */
295 void simple_ring_buffer_commit(struct simple_rb_per_cpu *cpu_buffer)
296 {
297 	local_set(&cpu_buffer->tail_page->page->commit,
298 		  cpu_buffer->tail_page->write);
299 	simple_rb_meta_inc(cpu_buffer->meta->entries, 1);
300 
301 	/*
302 	 * Paired with simple_rb_enable_tracing() to ensure data is
303 	 * written to the ring-buffer before teardown.
304 	 */
305 	smp_store_release(&cpu_buffer->status, SIMPLE_RB_READY);
306 }
307 EXPORT_SYMBOL_GPL(simple_ring_buffer_commit);
308 
309 static u32 simple_rb_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable)
310 {
311 	u32 prev_status;
312 
313 	if (enable)
314 		return cmpxchg(&cpu_buffer->status, SIMPLE_RB_UNAVAILABLE, SIMPLE_RB_READY);
315 
316 	/* Wait for the buffer to be released */
317 	do {
318 		prev_status = cmpxchg_acquire(&cpu_buffer->status,
319 					      SIMPLE_RB_READY,
320 					      SIMPLE_RB_UNAVAILABLE);
321 	} while (prev_status == SIMPLE_RB_WRITING);
322 
323 	return prev_status;
324 }
325 
326 /**
327  * simple_ring_buffer_reset - Reset @cpu_buffer
328  * @cpu_buffer: A simple_rb_per_cpu
329  *
330  * This will not clear the content of the data, only reset counters and pointers
331  *
332  * Returns 0 on success or -ENODEV if @cpu_buffer was unloaded.
333  */
334 int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer)
335 {
336 	struct simple_buffer_page *bpage;
337 	u32 prev_status;
338 	int ret;
339 
340 	if (!simple_rb_loaded(cpu_buffer))
341 		return -ENODEV;
342 
343 	prev_status = simple_rb_enable_tracing(cpu_buffer, false);
344 
345 	ret = simple_rb_find_head(cpu_buffer);
346 	if (ret)
347 		return ret;
348 
349 	bpage = cpu_buffer->tail_page = cpu_buffer->head_page;
350 	do {
351 		simple_bpage_reset(bpage);
352 		bpage = simple_bpage_next_page(bpage);
353 	} while (bpage != cpu_buffer->head_page);
354 
355 	simple_bpage_reset(cpu_buffer->reader_page);
356 
357 	cpu_buffer->last_overrun = 0;
358 	cpu_buffer->write_stamp = 0;
359 
360 	cpu_buffer->meta->reader.read = 0;
361 	cpu_buffer->meta->reader.lost_events = 0;
362 	cpu_buffer->meta->entries = 0;
363 	cpu_buffer->meta->overrun = 0;
364 	cpu_buffer->meta->read = 0;
365 	cpu_buffer->meta->pages_lost = 0;
366 	cpu_buffer->meta->pages_touched = 0;
367 
368 	if (prev_status == SIMPLE_RB_READY)
369 		simple_rb_enable_tracing(cpu_buffer, true);
370 
371 	return 0;
372 }
373 EXPORT_SYMBOL_GPL(simple_ring_buffer_reset);
374 
375 /**
376  * simple_ring_buffer_init - Init @cpu_buffer based on @desc
377  * @cpu_buffer:	A simple_rb_per_cpu buffer to init, allocated by the caller.
378  * @bpages:	Array of simple_buffer_pages, with as many elements as @desc->nr_page_va
379  * @desc:	A ring_buffer_desc
380  *
381  * Returns 0 on success or -EINVAL if the content of @desc is invalid
382  */
383 int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages,
384 			    const struct ring_buffer_desc *desc)
385 {
386 	struct simple_buffer_page *bpage = bpages;
387 	int i;
388 
389 	/* At least 1 reader page and two pages in the ring-buffer */
390 	if (desc->nr_page_va < 3)
391 		return -EINVAL;
392 
393 	memset(cpu_buffer, 0, sizeof(*cpu_buffer));
394 
395 	cpu_buffer->bpages = bpages;
396 
397 	cpu_buffer->meta = (void *)desc->meta_va;
398 	memset(cpu_buffer->meta, 0, sizeof(*cpu_buffer->meta));
399 	cpu_buffer->meta->meta_page_size = PAGE_SIZE;
400 	cpu_buffer->meta->nr_subbufs = cpu_buffer->nr_pages;
401 
402 	/* The reader page is not part of the ring initially */
403 	simple_bpage_init(bpage, desc->page_va[0]);
404 	bpage->id = 0;
405 
406 	cpu_buffer->nr_pages = 1;
407 
408 	cpu_buffer->reader_page = bpage;
409 	cpu_buffer->tail_page = bpage + 1;
410 	cpu_buffer->head_page = bpage + 1;
411 
412 	for (i = 1; i < desc->nr_page_va; i++) {
413 		simple_bpage_init(++bpage, desc->page_va[i]);
414 
415 		bpage->link.next = &(bpage + 1)->link;
416 		bpage->link.prev = &(bpage - 1)->link;
417 		bpage->id = i;
418 
419 		cpu_buffer->nr_pages = i + 1;
420 	}
421 
422 	/* Close the ring */
423 	bpage->link.next = &cpu_buffer->tail_page->link;
424 	cpu_buffer->tail_page->link.prev = &bpage->link;
425 
426 	/* The last init'ed page points to the head page */
427 	simple_bpage_set_head_link(bpage);
428 
429 	return 0;
430 }
431 EXPORT_SYMBOL_GPL(simple_ring_buffer_init);
432 
433 /**
434  * simple_ring_buffer_unload - Prepare @cpu_buffer for deletion
435  * @cpu_buffer:	A simple_rb_per_cpu that will be deleted.
436  */
437 void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer)
438 {
439 	if (!simple_rb_loaded(cpu_buffer))
440 		return;
441 
442 	simple_rb_enable_tracing(cpu_buffer, false);
443 
444 	cpu_buffer->bpages = NULL;
445 }
446 EXPORT_SYMBOL_GPL(simple_ring_buffer_unload);
447 
448 /**
449  * simple_ring_buffer_enable_tracing - Enable or disable writing to @cpu_buffer
450  * @cpu_buffer: A simple_rb_per_cpu
451  * @enable:	True to enable tracing, False to disable it
452  *
453  * Returns 0 on success or -ENODEV if @cpu_buffer was unloaded
454  */
455 int simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable)
456 {
457 	if (!simple_rb_loaded(cpu_buffer))
458 		return -ENODEV;
459 
460 	simple_rb_enable_tracing(cpu_buffer, enable);
461 
462 	return 0;
463 }
464 EXPORT_SYMBOL_GPL(simple_ring_buffer_enable_tracing);
465