xref: /linux/kernel/trace/trace_remote.c (revision 9af4ab0e11e336e2671d303ffcc6578e3546d9fc)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2025 - Google LLC
4  * Author: Vincent Donnefort <vdonnefort@google.com>
5  */
6 
7 #include <linux/kstrtox.h>
8 #include <linux/lockdep.h>
9 #include <linux/mutex.h>
10 #include <linux/tracefs.h>
11 #include <linux/trace_remote.h>
12 #include <linux/trace_seq.h>
13 #include <linux/types.h>
14 
15 #include "trace.h"
16 
17 #define TRACEFS_DIR		"remotes"
18 #define TRACEFS_MODE_WRITE	0640
19 #define TRACEFS_MODE_READ	0440
20 
21 struct trace_remote_iterator {
22 	struct trace_remote		*remote;
23 	struct trace_seq		seq;
24 	struct delayed_work		poll_work;
25 	unsigned long			lost_events;
26 	u64				ts;
27 	int				cpu;
28 	int				evt_cpu;
29 };
30 
31 struct trace_remote {
32 	struct trace_remote_callbacks	*cbs;
33 	void				*priv;
34 	struct trace_buffer		*trace_buffer;
35 	struct trace_buffer_desc	*trace_buffer_desc;
36 	unsigned long			trace_buffer_size;
37 	struct ring_buffer_remote	rb_remote;
38 	struct mutex			lock;
39 	unsigned int			nr_readers;
40 	unsigned int			poll_ms;
41 	bool				tracing_on;
42 };
43 
44 static bool trace_remote_loaded(struct trace_remote *remote)
45 {
46 	return !!remote->trace_buffer;
47 }
48 
49 static int trace_remote_load(struct trace_remote *remote)
50 {
51 	struct ring_buffer_remote *rb_remote = &remote->rb_remote;
52 	struct trace_buffer_desc *desc;
53 
54 	lockdep_assert_held(&remote->lock);
55 
56 	if (trace_remote_loaded(remote))
57 		return 0;
58 
59 	desc = remote->cbs->load_trace_buffer(remote->trace_buffer_size, remote->priv);
60 	if (IS_ERR(desc))
61 		return PTR_ERR(desc);
62 
63 	rb_remote->desc = desc;
64 	rb_remote->swap_reader_page = remote->cbs->swap_reader_page;
65 	rb_remote->priv = remote->priv;
66 	rb_remote->reset = remote->cbs->reset;
67 	remote->trace_buffer = ring_buffer_alloc_remote(rb_remote);
68 	if (!remote->trace_buffer) {
69 		remote->cbs->unload_trace_buffer(desc, remote->priv);
70 		return -ENOMEM;
71 	}
72 
73 	remote->trace_buffer_desc = desc;
74 
75 	return 0;
76 }
77 
78 static void trace_remote_try_unload(struct trace_remote *remote)
79 {
80 	lockdep_assert_held(&remote->lock);
81 
82 	if (!trace_remote_loaded(remote))
83 		return;
84 
85 	/* The buffer is being read or writable */
86 	if (remote->nr_readers || remote->tracing_on)
87 		return;
88 
89 	/* The buffer has readable data */
90 	if (!ring_buffer_empty(remote->trace_buffer))
91 		return;
92 
93 	ring_buffer_free(remote->trace_buffer);
94 	remote->trace_buffer = NULL;
95 	remote->cbs->unload_trace_buffer(remote->trace_buffer_desc, remote->priv);
96 }
97 
98 static int trace_remote_enable_tracing(struct trace_remote *remote)
99 {
100 	int ret;
101 
102 	lockdep_assert_held(&remote->lock);
103 
104 	if (remote->tracing_on)
105 		return 0;
106 
107 	ret = trace_remote_load(remote);
108 	if (ret)
109 		return ret;
110 
111 	ret = remote->cbs->enable_tracing(true, remote->priv);
112 	if (ret) {
113 		trace_remote_try_unload(remote);
114 		return ret;
115 	}
116 
117 	remote->tracing_on = true;
118 
119 	return 0;
120 }
121 
122 static int trace_remote_disable_tracing(struct trace_remote *remote)
123 {
124 	int ret;
125 
126 	lockdep_assert_held(&remote->lock);
127 
128 	if (!remote->tracing_on)
129 		return 0;
130 
131 	ret = remote->cbs->enable_tracing(false, remote->priv);
132 	if (ret)
133 		return ret;
134 
135 	ring_buffer_poll_remote(remote->trace_buffer, RING_BUFFER_ALL_CPUS);
136 	remote->tracing_on = false;
137 	trace_remote_try_unload(remote);
138 
139 	return 0;
140 }
141 
142 static void trace_remote_reset(struct trace_remote *remote, int cpu)
143 {
144 	lockdep_assert_held(&remote->lock);
145 
146 	if (!trace_remote_loaded(remote))
147 		return;
148 
149 	if (cpu == RING_BUFFER_ALL_CPUS)
150 		ring_buffer_reset(remote->trace_buffer);
151 	else
152 		ring_buffer_reset_cpu(remote->trace_buffer, cpu);
153 
154 	trace_remote_try_unload(remote);
155 }
156 
157 static ssize_t
158 tracing_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
159 {
160 	struct trace_remote *remote = filp->private_data;
161 	unsigned long val;
162 	int ret;
163 
164 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
165 	if (ret)
166 		return ret;
167 
168 	guard(mutex)(&remote->lock);
169 
170 	ret = val ? trace_remote_enable_tracing(remote) : trace_remote_disable_tracing(remote);
171 	if (ret)
172 		return ret;
173 
174 	return cnt;
175 }
176 static int tracing_on_show(struct seq_file *s, void *unused)
177 {
178 	struct trace_remote *remote = s->private;
179 
180 	seq_printf(s, "%d\n", remote->tracing_on);
181 
182 	return 0;
183 }
184 DEFINE_SHOW_STORE_ATTRIBUTE(tracing_on);
185 
186 static ssize_t buffer_size_kb_write(struct file *filp, const char __user *ubuf, size_t cnt,
187 				    loff_t *ppos)
188 {
189 	struct trace_remote *remote = filp->private_data;
190 	unsigned long val;
191 	int ret;
192 
193 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
194 	if (ret)
195 		return ret;
196 
197 	/* KiB to Bytes */
198 	if (!val || check_shl_overflow(val, 10, &val))
199 		return -EINVAL;
200 
201 	guard(mutex)(&remote->lock);
202 
203 	if (trace_remote_loaded(remote))
204 		return -EBUSY;
205 
206 	remote->trace_buffer_size = val;
207 
208 	return cnt;
209 }
210 
211 static int buffer_size_kb_show(struct seq_file *s, void *unused)
212 {
213 	struct trace_remote *remote = s->private;
214 
215 	seq_printf(s, "%lu (%s)\n", remote->trace_buffer_size >> 10,
216 		   trace_remote_loaded(remote) ? "loaded" : "unloaded");
217 
218 	return 0;
219 }
220 DEFINE_SHOW_STORE_ATTRIBUTE(buffer_size_kb);
221 
222 static int trace_remote_get(struct trace_remote *remote, int cpu)
223 {
224 	int ret;
225 
226 	if (remote->nr_readers == UINT_MAX)
227 		return -EBUSY;
228 
229 	ret = trace_remote_load(remote);
230 	if (ret)
231 		return ret;
232 
233 	remote->nr_readers++;
234 
235 	return 0;
236 }
237 
238 static void trace_remote_put(struct trace_remote *remote)
239 {
240 	if (WARN_ON(!remote->nr_readers))
241 		return;
242 
243 	remote->nr_readers--;
244 	if (remote->nr_readers)
245 		return;
246 
247 	trace_remote_try_unload(remote);
248 }
249 
250 static void __poll_remote(struct work_struct *work)
251 {
252 	struct delayed_work *dwork = to_delayed_work(work);
253 	struct trace_remote_iterator *iter;
254 
255 	iter = container_of(dwork, struct trace_remote_iterator, poll_work);
256 	ring_buffer_poll_remote(iter->remote->trace_buffer, iter->cpu);
257 	schedule_delayed_work((struct delayed_work *)work,
258 			      msecs_to_jiffies(iter->remote->poll_ms));
259 }
260 
261 static struct trace_remote_iterator *trace_remote_iter(struct trace_remote *remote, int cpu)
262 {
263 	struct trace_remote_iterator *iter = NULL;
264 	int ret;
265 
266 	lockdep_assert_held(&remote->lock);
267 
268 
269 	ret = trace_remote_get(remote, cpu);
270 	if (ret)
271 		return ERR_PTR(ret);
272 
273 	/* Test the CPU */
274 	ret = ring_buffer_poll_remote(remote->trace_buffer, cpu);
275 	if (ret)
276 		goto err;
277 
278 	iter = kzalloc_obj(*iter);
279 	if (iter) {
280 		iter->remote = remote;
281 		iter->cpu = cpu;
282 		trace_seq_init(&iter->seq);
283 		INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
284 		schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms));
285 
286 		return iter;
287 	}
288 	ret = -ENOMEM;
289 
290 err:
291 	kfree(iter);
292 	trace_remote_put(remote);
293 
294 	return ERR_PTR(ret);
295 }
296 
297 static void trace_remote_iter_free(struct trace_remote_iterator *iter)
298 {
299 	struct trace_remote *remote;
300 
301 	if (!iter)
302 		return;
303 
304 	remote = iter->remote;
305 
306 	lockdep_assert_held(&remote->lock);
307 
308 	kfree(iter);
309 	trace_remote_put(remote);
310 }
311 
312 static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
313 {
314 	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
315 	int cpu = iter->cpu;
316 
317 	if (cpu != RING_BUFFER_ALL_CPUS) {
318 		if (ring_buffer_empty_cpu(trace_buffer, cpu))
319 			return false;
320 
321 		if (!ring_buffer_peek(trace_buffer, cpu, &iter->ts, &iter->lost_events))
322 			return false;
323 
324 		iter->evt_cpu = cpu;
325 		return true;
326 	}
327 
328 	iter->ts = U64_MAX;
329 	for_each_possible_cpu(cpu) {
330 		unsigned long lost_events;
331 		u64 ts;
332 
333 		if (ring_buffer_empty_cpu(trace_buffer, cpu))
334 			continue;
335 
336 		if (!ring_buffer_peek(trace_buffer, cpu, &ts, &lost_events))
337 			continue;
338 
339 		if (ts >= iter->ts)
340 			continue;
341 
342 		iter->ts = ts;
343 		iter->evt_cpu = cpu;
344 		iter->lost_events = lost_events;
345 	}
346 
347 	return iter->ts != U64_MAX;
348 }
349 
350 static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
351 {
352 	unsigned long usecs_rem;
353 	u64 ts = iter->ts;
354 
355 	if (iter->lost_events)
356 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
357 				 iter->evt_cpu, iter->lost_events);
358 
359 	do_div(ts, 1000);
360 	usecs_rem = do_div(ts, USEC_PER_SEC);
361 
362 	trace_seq_printf(&iter->seq, "[%03d]\t%5llu.%06lu: ", iter->evt_cpu,
363 			 ts, usecs_rem);
364 
365 	return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0;
366 }
367 
368 static int trace_pipe_open(struct inode *inode, struct file *filp)
369 {
370 	struct trace_remote *remote = inode->i_private;
371 	struct trace_remote_iterator *iter;
372 	int cpu = RING_BUFFER_ALL_CPUS;
373 
374 	if (inode->i_cdev)
375 		cpu = (long)inode->i_cdev - 1;
376 
377 	guard(mutex)(&remote->lock);
378 	iter = trace_remote_iter(remote, cpu);
379 	filp->private_data = iter;
380 
381 	return IS_ERR(iter) ? PTR_ERR(iter) : 0;
382 }
383 
384 static int trace_pipe_release(struct inode *inode, struct file *filp)
385 {
386 	struct trace_remote_iterator *iter = filp->private_data;
387 	struct trace_remote *remote = iter->remote;
388 
389 	guard(mutex)(&remote->lock);
390 
391 	trace_remote_iter_free(iter);
392 
393 	return 0;
394 }
395 
396 static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
397 {
398 	struct trace_remote_iterator *iter = filp->private_data;
399 	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
400 	int ret;
401 
402 copy_to_user:
403 	ret = trace_seq_to_user(&iter->seq, ubuf, cnt);
404 	if (ret != -EBUSY)
405 		return ret;
406 
407 	trace_seq_init(&iter->seq);
408 
409 	ret = ring_buffer_wait(trace_buffer, iter->cpu, 0, NULL, NULL);
410 	if (ret < 0)
411 		return ret;
412 
413 	while (trace_remote_iter_read_event(iter)) {
414 		int prev_len = iter->seq.seq.len;
415 
416 		if (trace_remote_iter_print_event(iter)) {
417 			iter->seq.seq.len = prev_len;
418 			break;
419 		}
420 
421 		ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
422 	}
423 
424 	goto copy_to_user;
425 }
426 
427 static const struct file_operations trace_pipe_fops = {
428 	.open		= trace_pipe_open,
429 	.read		= trace_pipe_read,
430 	.release	= trace_pipe_release,
431 };
432 
433 static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
434 {
435 	struct inode *inode = file_inode(filp);
436 	struct trace_remote *remote = inode->i_private;
437 	int cpu = RING_BUFFER_ALL_CPUS;
438 
439 	if (inode->i_cdev)
440 		cpu = (long)inode->i_cdev - 1;
441 
442 	guard(mutex)(&remote->lock);
443 
444 	trace_remote_reset(remote, cpu);
445 
446 	return cnt;
447 }
448 
449 static const struct file_operations trace_fops = {
450 	.write		= trace_write,
451 };
452 
453 static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote)
454 {
455 	struct dentry *remote_d, *percpu_d, *d;
456 	static struct dentry *root;
457 	static DEFINE_MUTEX(lock);
458 	bool root_inited = false;
459 	int cpu;
460 
461 	guard(mutex)(&lock);
462 
463 	if (!root) {
464 		root = tracefs_create_dir(TRACEFS_DIR, NULL);
465 		if (!root) {
466 			pr_err("Failed to create tracefs dir "TRACEFS_DIR"\n");
467 			return -ENOMEM;
468 		}
469 		root_inited = true;
470 	}
471 
472 	remote_d = tracefs_create_dir(name, root);
473 	if (!remote_d) {
474 		pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/\n", name);
475 		goto err;
476 	}
477 
478 	d = trace_create_file("tracing_on", TRACEFS_MODE_WRITE, remote_d, remote, &tracing_on_fops);
479 	if (!d)
480 		goto err;
481 
482 	d = trace_create_file("buffer_size_kb", TRACEFS_MODE_WRITE, remote_d, remote,
483 			      &buffer_size_kb_fops);
484 	if (!d)
485 		goto err;
486 
487 	d = trace_create_file("trace_pipe", TRACEFS_MODE_READ, remote_d, remote, &trace_pipe_fops);
488 	if (!d)
489 		goto err;
490 
491 	d = trace_create_file("trace", TRACEFS_MODE_WRITE, remote_d, remote, &trace_fops);
492 	if (!d)
493 		goto err;
494 
495 	percpu_d = tracefs_create_dir("per_cpu", remote_d);
496 	if (!percpu_d) {
497 		pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/per_cpu/\n", name);
498 		goto err;
499 	}
500 
501 	for_each_possible_cpu(cpu) {
502 		struct dentry *cpu_d;
503 		char cpu_name[16];
504 
505 		snprintf(cpu_name, sizeof(cpu_name), "cpu%d", cpu);
506 		cpu_d = tracefs_create_dir(cpu_name, percpu_d);
507 		if (!cpu_d) {
508 			pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/percpu/cpu%d\n",
509 			       name, cpu);
510 			goto err;
511 		}
512 
513 		d = trace_create_cpu_file("trace_pipe", TRACEFS_MODE_READ, cpu_d, remote, cpu,
514 					  &trace_pipe_fops);
515 		if (!d)
516 			goto err;
517 
518 		d = trace_create_cpu_file("trace", TRACEFS_MODE_WRITE, cpu_d, remote, cpu,
519 					  &trace_fops);
520 		if (!d)
521 			goto err;
522 	}
523 
524 	return 0;
525 
526 err:
527 	if (root_inited) {
528 		tracefs_remove(root);
529 		root = NULL;
530 	} else {
531 		tracefs_remove(remote_d);
532 	}
533 
534 	return -ENOMEM;
535 }
536 
537 /**
538  * trace_remote_register() - Register a Tracefs remote
539  * @name:	Name of the remote, used for the Tracefs remotes/ directory.
540  * @cbs:	Set of callbacks used to control the remote.
541  * @priv:	Private data, passed to each callback from @cbs.
542  * @events:	Array of events. &remote_event.name and &remote_event.id must be
543  *		filled by the caller.
544  * @nr_events:	Number of events in the @events array.
545  *
546  * A trace remote is an entity, outside of the kernel (most likely firmware or
547  * hypervisor) capable of writing events into a Tracefs compatible ring-buffer.
548  * The kernel would then act as a reader.
549  *
550  * The registered remote will be found under the Tracefs directory
551  * remotes/<name>.
552  *
553  * Return: 0 on success, negative error code on failure.
554  */
555 int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv)
556 {
557 	struct trace_remote *remote;
558 
559 	remote = kzalloc_obj(*remote);
560 	if (!remote)
561 		return -ENOMEM;
562 
563 	remote->cbs = cbs;
564 	remote->priv = priv;
565 	remote->trace_buffer_size = 7 << 10;
566 	remote->poll_ms = 100;
567 	mutex_init(&remote->lock);
568 
569 	if (trace_remote_init_tracefs(name, remote)) {
570 		kfree(remote);
571 		return -ENOMEM;
572 	}
573 
574 	return 0;
575 }
576 EXPORT_SYMBOL_GPL(trace_remote_register);
577 
578 /**
579  * trace_remote_free_buffer() - Free trace buffer allocated with trace_remote_alloc_buffer()
580  * @desc:	Descriptor of the per-CPU ring-buffers, originally filled by
581  *		trace_remote_alloc_buffer()
582  *
583  * Most likely called from &trace_remote_callbacks.unload_trace_buffer.
584  */
585 void trace_remote_free_buffer(struct trace_buffer_desc *desc)
586 {
587 	struct ring_buffer_desc *rb_desc;
588 	int cpu;
589 
590 	for_each_ring_buffer_desc(rb_desc, cpu, desc) {
591 		unsigned int id;
592 
593 		free_page(rb_desc->meta_va);
594 
595 		for (id = 0; id < rb_desc->nr_page_va; id++)
596 			free_page(rb_desc->page_va[id]);
597 	}
598 }
599 EXPORT_SYMBOL_GPL(trace_remote_free_buffer);
600 
601 /**
602  * trace_remote_alloc_buffer() - Dynamically allocate a trace buffer
603  * @desc:		Uninitialized trace_buffer_desc
604  * @desc_size:		Size of the trace_buffer_desc. Must be at least equal to
605  *			trace_buffer_desc_size()
606  * @buffer_size:	Size in bytes of each per-CPU ring-buffer
607  * @cpumask:		CPUs to allocate a ring-buffer for
608  *
609  * Helper to dynamically allocate a set of pages (enough to cover @buffer_size)
610  * for each CPU from @cpumask and fill @desc. Most likely called from
611  * &trace_remote_callbacks.load_trace_buffer.
612  *
613  * Return: 0 on success, negative error code on failure.
614  */
615 int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size,
616 			      const struct cpumask *cpumask)
617 {
618 	unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1;
619 	void *desc_end = desc + desc_size;
620 	struct ring_buffer_desc *rb_desc;
621 	int cpu, ret = -ENOMEM;
622 
623 	if (desc_size < struct_size(desc, __data, 0))
624 		return -EINVAL;
625 
626 	desc->nr_cpus = 0;
627 	desc->struct_len = struct_size(desc, __data, 0);
628 
629 	rb_desc = (struct ring_buffer_desc *)&desc->__data[0];
630 
631 	for_each_cpu(cpu, cpumask) {
632 		unsigned int id;
633 
634 		if ((void *)rb_desc + struct_size(rb_desc, page_va, nr_pages) > desc_end) {
635 			ret = -EINVAL;
636 			goto err;
637 		}
638 
639 		rb_desc->cpu = cpu;
640 		rb_desc->nr_page_va = 0;
641 		rb_desc->meta_va = (unsigned long)__get_free_page(GFP_KERNEL);
642 		if (!rb_desc->meta_va)
643 			goto err;
644 
645 		for (id = 0; id < nr_pages; id++) {
646 			rb_desc->page_va[id] = (unsigned long)__get_free_page(GFP_KERNEL);
647 			if (!rb_desc->page_va[id])
648 				goto err;
649 
650 			rb_desc->nr_page_va++;
651 		}
652 		desc->nr_cpus++;
653 		desc->struct_len += offsetof(struct ring_buffer_desc, page_va);
654 		desc->struct_len += struct_size(rb_desc, page_va, rb_desc->nr_page_va);
655 		rb_desc = __next_ring_buffer_desc(rb_desc);
656 	}
657 
658 	return 0;
659 
660 err:
661 	trace_remote_free_buffer(desc);
662 	return ret;
663 }
664 EXPORT_SYMBOL_GPL(trace_remote_alloc_buffer);
665