xref: /linux/kernel/trace/trace_remote.c (revision 96e43537af5461b26f50904c6055046ba65d742f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2025 - Google LLC
4  * Author: Vincent Donnefort <vdonnefort@google.com>
5  */
6 
7 #include <linux/kstrtox.h>
8 #include <linux/lockdep.h>
9 #include <linux/mutex.h>
10 #include <linux/tracefs.h>
11 #include <linux/trace_remote.h>
12 #include <linux/trace_seq.h>
13 #include <linux/types.h>
14 
15 #include "trace.h"
16 
17 #define TRACEFS_DIR		"remotes"
18 #define TRACEFS_MODE_WRITE	0640
19 #define TRACEFS_MODE_READ	0440
20 
21 struct trace_remote_iterator {
22 	struct trace_remote		*remote;
23 	struct trace_seq		seq;
24 	struct delayed_work		poll_work;
25 	unsigned long			lost_events;
26 	u64				ts;
27 	int				cpu;
28 	int				evt_cpu;
29 };
30 
31 struct trace_remote {
32 	struct trace_remote_callbacks	*cbs;
33 	void				*priv;
34 	struct trace_buffer		*trace_buffer;
35 	struct trace_buffer_desc	*trace_buffer_desc;
36 	unsigned long			trace_buffer_size;
37 	struct ring_buffer_remote	rb_remote;
38 	struct mutex			lock;
39 	unsigned int			nr_readers;
40 	unsigned int			poll_ms;
41 	bool				tracing_on;
42 };
43 
44 static bool trace_remote_loaded(struct trace_remote *remote)
45 {
46 	return !!remote->trace_buffer;
47 }
48 
49 static int trace_remote_load(struct trace_remote *remote)
50 {
51 	struct ring_buffer_remote *rb_remote = &remote->rb_remote;
52 	struct trace_buffer_desc *desc;
53 
54 	lockdep_assert_held(&remote->lock);
55 
56 	if (trace_remote_loaded(remote))
57 		return 0;
58 
59 	desc = remote->cbs->load_trace_buffer(remote->trace_buffer_size, remote->priv);
60 	if (IS_ERR(desc))
61 		return PTR_ERR(desc);
62 
63 	rb_remote->desc = desc;
64 	rb_remote->swap_reader_page = remote->cbs->swap_reader_page;
65 	rb_remote->priv = remote->priv;
66 	remote->trace_buffer = ring_buffer_alloc_remote(rb_remote);
67 	if (!remote->trace_buffer) {
68 		remote->cbs->unload_trace_buffer(desc, remote->priv);
69 		return -ENOMEM;
70 	}
71 
72 	remote->trace_buffer_desc = desc;
73 
74 	return 0;
75 }
76 
77 static void trace_remote_try_unload(struct trace_remote *remote)
78 {
79 	lockdep_assert_held(&remote->lock);
80 
81 	if (!trace_remote_loaded(remote))
82 		return;
83 
84 	/* The buffer is being read or writable */
85 	if (remote->nr_readers || remote->tracing_on)
86 		return;
87 
88 	/* The buffer has readable data */
89 	if (!ring_buffer_empty(remote->trace_buffer))
90 		return;
91 
92 	ring_buffer_free(remote->trace_buffer);
93 	remote->trace_buffer = NULL;
94 	remote->cbs->unload_trace_buffer(remote->trace_buffer_desc, remote->priv);
95 }
96 
97 static int trace_remote_enable_tracing(struct trace_remote *remote)
98 {
99 	int ret;
100 
101 	lockdep_assert_held(&remote->lock);
102 
103 	if (remote->tracing_on)
104 		return 0;
105 
106 	ret = trace_remote_load(remote);
107 	if (ret)
108 		return ret;
109 
110 	ret = remote->cbs->enable_tracing(true, remote->priv);
111 	if (ret) {
112 		trace_remote_try_unload(remote);
113 		return ret;
114 	}
115 
116 	remote->tracing_on = true;
117 
118 	return 0;
119 }
120 
121 static int trace_remote_disable_tracing(struct trace_remote *remote)
122 {
123 	int ret;
124 
125 	lockdep_assert_held(&remote->lock);
126 
127 	if (!remote->tracing_on)
128 		return 0;
129 
130 	ret = remote->cbs->enable_tracing(false, remote->priv);
131 	if (ret)
132 		return ret;
133 
134 	ring_buffer_poll_remote(remote->trace_buffer, RING_BUFFER_ALL_CPUS);
135 	remote->tracing_on = false;
136 	trace_remote_try_unload(remote);
137 
138 	return 0;
139 }
140 
141 static ssize_t
142 tracing_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
143 {
144 	struct trace_remote *remote = filp->private_data;
145 	unsigned long val;
146 	int ret;
147 
148 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
149 	if (ret)
150 		return ret;
151 
152 	guard(mutex)(&remote->lock);
153 
154 	ret = val ? trace_remote_enable_tracing(remote) : trace_remote_disable_tracing(remote);
155 	if (ret)
156 		return ret;
157 
158 	return cnt;
159 }
160 static int tracing_on_show(struct seq_file *s, void *unused)
161 {
162 	struct trace_remote *remote = s->private;
163 
164 	seq_printf(s, "%d\n", remote->tracing_on);
165 
166 	return 0;
167 }
168 DEFINE_SHOW_STORE_ATTRIBUTE(tracing_on);
169 
170 static ssize_t buffer_size_kb_write(struct file *filp, const char __user *ubuf, size_t cnt,
171 				    loff_t *ppos)
172 {
173 	struct trace_remote *remote = filp->private_data;
174 	unsigned long val;
175 	int ret;
176 
177 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
178 	if (ret)
179 		return ret;
180 
181 	/* KiB to Bytes */
182 	if (!val || check_shl_overflow(val, 10, &val))
183 		return -EINVAL;
184 
185 	guard(mutex)(&remote->lock);
186 
187 	if (trace_remote_loaded(remote))
188 		return -EBUSY;
189 
190 	remote->trace_buffer_size = val;
191 
192 	return cnt;
193 }
194 
195 static int buffer_size_kb_show(struct seq_file *s, void *unused)
196 {
197 	struct trace_remote *remote = s->private;
198 
199 	seq_printf(s, "%lu (%s)\n", remote->trace_buffer_size >> 10,
200 		   trace_remote_loaded(remote) ? "loaded" : "unloaded");
201 
202 	return 0;
203 }
204 DEFINE_SHOW_STORE_ATTRIBUTE(buffer_size_kb);
205 
206 static int trace_remote_get(struct trace_remote *remote, int cpu)
207 {
208 	int ret;
209 
210 	if (remote->nr_readers == UINT_MAX)
211 		return -EBUSY;
212 
213 	ret = trace_remote_load(remote);
214 	if (ret)
215 		return ret;
216 
217 	remote->nr_readers++;
218 
219 	return 0;
220 }
221 
222 static void trace_remote_put(struct trace_remote *remote)
223 {
224 	if (WARN_ON(!remote->nr_readers))
225 		return;
226 
227 	remote->nr_readers--;
228 	if (remote->nr_readers)
229 		return;
230 
231 	trace_remote_try_unload(remote);
232 }
233 
234 static void __poll_remote(struct work_struct *work)
235 {
236 	struct delayed_work *dwork = to_delayed_work(work);
237 	struct trace_remote_iterator *iter;
238 
239 	iter = container_of(dwork, struct trace_remote_iterator, poll_work);
240 	ring_buffer_poll_remote(iter->remote->trace_buffer, iter->cpu);
241 	schedule_delayed_work((struct delayed_work *)work,
242 			      msecs_to_jiffies(iter->remote->poll_ms));
243 }
244 
245 static struct trace_remote_iterator *trace_remote_iter(struct trace_remote *remote, int cpu)
246 {
247 	struct trace_remote_iterator *iter = NULL;
248 	int ret;
249 
250 	lockdep_assert_held(&remote->lock);
251 
252 
253 	ret = trace_remote_get(remote, cpu);
254 	if (ret)
255 		return ERR_PTR(ret);
256 
257 	/* Test the CPU */
258 	ret = ring_buffer_poll_remote(remote->trace_buffer, cpu);
259 	if (ret)
260 		goto err;
261 
262 	iter = kzalloc_obj(*iter);
263 	if (iter) {
264 		iter->remote = remote;
265 		iter->cpu = cpu;
266 		trace_seq_init(&iter->seq);
267 		INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
268 		schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms));
269 
270 		return iter;
271 	}
272 	ret = -ENOMEM;
273 
274 err:
275 	kfree(iter);
276 	trace_remote_put(remote);
277 
278 	return ERR_PTR(ret);
279 }
280 
281 static void trace_remote_iter_free(struct trace_remote_iterator *iter)
282 {
283 	struct trace_remote *remote;
284 
285 	if (!iter)
286 		return;
287 
288 	remote = iter->remote;
289 
290 	lockdep_assert_held(&remote->lock);
291 
292 	kfree(iter);
293 	trace_remote_put(remote);
294 }
295 
296 static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
297 {
298 	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
299 	int cpu = iter->cpu;
300 
301 	if (cpu != RING_BUFFER_ALL_CPUS) {
302 		if (ring_buffer_empty_cpu(trace_buffer, cpu))
303 			return false;
304 
305 		if (!ring_buffer_peek(trace_buffer, cpu, &iter->ts, &iter->lost_events))
306 			return false;
307 
308 		iter->evt_cpu = cpu;
309 		return true;
310 	}
311 
312 	iter->ts = U64_MAX;
313 	for_each_possible_cpu(cpu) {
314 		unsigned long lost_events;
315 		u64 ts;
316 
317 		if (ring_buffer_empty_cpu(trace_buffer, cpu))
318 			continue;
319 
320 		if (!ring_buffer_peek(trace_buffer, cpu, &ts, &lost_events))
321 			continue;
322 
323 		if (ts >= iter->ts)
324 			continue;
325 
326 		iter->ts = ts;
327 		iter->evt_cpu = cpu;
328 		iter->lost_events = lost_events;
329 	}
330 
331 	return iter->ts != U64_MAX;
332 }
333 
334 static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
335 {
336 	unsigned long usecs_rem;
337 	u64 ts = iter->ts;
338 
339 	if (iter->lost_events)
340 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
341 				 iter->evt_cpu, iter->lost_events);
342 
343 	do_div(ts, 1000);
344 	usecs_rem = do_div(ts, USEC_PER_SEC);
345 
346 	trace_seq_printf(&iter->seq, "[%03d]\t%5llu.%06lu: ", iter->evt_cpu,
347 			 ts, usecs_rem);
348 
349 	return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0;
350 }
351 
352 static int trace_pipe_open(struct inode *inode, struct file *filp)
353 {
354 	struct trace_remote *remote = inode->i_private;
355 	struct trace_remote_iterator *iter;
356 	int cpu = RING_BUFFER_ALL_CPUS;
357 
358 	if (inode->i_cdev)
359 		cpu = (long)inode->i_cdev - 1;
360 
361 	guard(mutex)(&remote->lock);
362 	iter = trace_remote_iter(remote, cpu);
363 	filp->private_data = iter;
364 
365 	return IS_ERR(iter) ? PTR_ERR(iter) : 0;
366 }
367 
368 static int trace_pipe_release(struct inode *inode, struct file *filp)
369 {
370 	struct trace_remote_iterator *iter = filp->private_data;
371 	struct trace_remote *remote = iter->remote;
372 
373 	guard(mutex)(&remote->lock);
374 
375 	trace_remote_iter_free(iter);
376 
377 	return 0;
378 }
379 
380 static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
381 {
382 	struct trace_remote_iterator *iter = filp->private_data;
383 	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
384 	int ret;
385 
386 copy_to_user:
387 	ret = trace_seq_to_user(&iter->seq, ubuf, cnt);
388 	if (ret != -EBUSY)
389 		return ret;
390 
391 	trace_seq_init(&iter->seq);
392 
393 	ret = ring_buffer_wait(trace_buffer, iter->cpu, 0, NULL, NULL);
394 	if (ret < 0)
395 		return ret;
396 
397 	while (trace_remote_iter_read_event(iter)) {
398 		int prev_len = iter->seq.seq.len;
399 
400 		if (trace_remote_iter_print_event(iter)) {
401 			iter->seq.seq.len = prev_len;
402 			break;
403 		}
404 
405 		ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
406 	}
407 
408 	goto copy_to_user;
409 }
410 
411 static const struct file_operations trace_pipe_fops = {
412 	.open		= trace_pipe_open,
413 	.read		= trace_pipe_read,
414 	.release	= trace_pipe_release,
415 };
416 
417 static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote)
418 {
419 	struct dentry *remote_d, *percpu_d, *d;
420 	static struct dentry *root;
421 	static DEFINE_MUTEX(lock);
422 	bool root_inited = false;
423 	int cpu;
424 
425 	guard(mutex)(&lock);
426 
427 	if (!root) {
428 		root = tracefs_create_dir(TRACEFS_DIR, NULL);
429 		if (!root) {
430 			pr_err("Failed to create tracefs dir "TRACEFS_DIR"\n");
431 			return -ENOMEM;
432 		}
433 		root_inited = true;
434 	}
435 
436 	remote_d = tracefs_create_dir(name, root);
437 	if (!remote_d) {
438 		pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/\n", name);
439 		goto err;
440 	}
441 
442 	d = trace_create_file("tracing_on", TRACEFS_MODE_WRITE, remote_d, remote, &tracing_on_fops);
443 	if (!d)
444 		goto err;
445 
446 	d = trace_create_file("buffer_size_kb", TRACEFS_MODE_WRITE, remote_d, remote,
447 			      &buffer_size_kb_fops);
448 	if (!d)
449 		goto err;
450 
451 	d = trace_create_file("trace_pipe", TRACEFS_MODE_READ, remote_d, remote, &trace_pipe_fops);
452 	if (!d)
453 		goto err;
454 
455 	percpu_d = tracefs_create_dir("per_cpu", remote_d);
456 	if (!percpu_d) {
457 		pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/per_cpu/\n", name);
458 		goto err;
459 	}
460 
461 	for_each_possible_cpu(cpu) {
462 		struct dentry *cpu_d;
463 		char cpu_name[16];
464 
465 		snprintf(cpu_name, sizeof(cpu_name), "cpu%d", cpu);
466 		cpu_d = tracefs_create_dir(cpu_name, percpu_d);
467 		if (!cpu_d) {
468 			pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/percpu/cpu%d\n",
469 			       name, cpu);
470 			goto err;
471 		}
472 
473 		d = trace_create_cpu_file("trace_pipe", TRACEFS_MODE_READ, cpu_d, remote, cpu,
474 					  &trace_pipe_fops);
475 		if (!d)
476 			goto err;
477 	}
478 
479 	return 0;
480 
481 err:
482 	if (root_inited) {
483 		tracefs_remove(root);
484 		root = NULL;
485 	} else {
486 		tracefs_remove(remote_d);
487 	}
488 
489 	return -ENOMEM;
490 }
491 
492 /**
493  * trace_remote_register() - Register a Tracefs remote
494  * @name:	Name of the remote, used for the Tracefs remotes/ directory.
495  * @cbs:	Set of callbacks used to control the remote.
496  * @priv:	Private data, passed to each callback from @cbs.
497  * @events:	Array of events. &remote_event.name and &remote_event.id must be
498  *		filled by the caller.
499  * @nr_events:	Number of events in the @events array.
500  *
501  * A trace remote is an entity, outside of the kernel (most likely firmware or
502  * hypervisor) capable of writing events into a Tracefs compatible ring-buffer.
503  * The kernel would then act as a reader.
504  *
505  * The registered remote will be found under the Tracefs directory
506  * remotes/<name>.
507  *
508  * Return: 0 on success, negative error code on failure.
509  */
510 int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv)
511 {
512 	struct trace_remote *remote;
513 
514 	remote = kzalloc_obj(*remote);
515 	if (!remote)
516 		return -ENOMEM;
517 
518 	remote->cbs = cbs;
519 	remote->priv = priv;
520 	remote->trace_buffer_size = 7 << 10;
521 	remote->poll_ms = 100;
522 	mutex_init(&remote->lock);
523 
524 	if (trace_remote_init_tracefs(name, remote)) {
525 		kfree(remote);
526 		return -ENOMEM;
527 	}
528 
529 	return 0;
530 }
531 EXPORT_SYMBOL_GPL(trace_remote_register);
532 
533 /**
534  * trace_remote_free_buffer() - Free trace buffer allocated with trace_remote_alloc_buffer()
535  * @desc:	Descriptor of the per-CPU ring-buffers, originally filled by
536  *		trace_remote_alloc_buffer()
537  *
538  * Most likely called from &trace_remote_callbacks.unload_trace_buffer.
539  */
540 void trace_remote_free_buffer(struct trace_buffer_desc *desc)
541 {
542 	struct ring_buffer_desc *rb_desc;
543 	int cpu;
544 
545 	for_each_ring_buffer_desc(rb_desc, cpu, desc) {
546 		unsigned int id;
547 
548 		free_page(rb_desc->meta_va);
549 
550 		for (id = 0; id < rb_desc->nr_page_va; id++)
551 			free_page(rb_desc->page_va[id]);
552 	}
553 }
554 EXPORT_SYMBOL_GPL(trace_remote_free_buffer);
555 
556 /**
557  * trace_remote_alloc_buffer() - Dynamically allocate a trace buffer
558  * @desc:		Uninitialized trace_buffer_desc
559  * @desc_size:		Size of the trace_buffer_desc. Must be at least equal to
560  *			trace_buffer_desc_size()
561  * @buffer_size:	Size in bytes of each per-CPU ring-buffer
562  * @cpumask:		CPUs to allocate a ring-buffer for
563  *
564  * Helper to dynamically allocate a set of pages (enough to cover @buffer_size)
565  * for each CPU from @cpumask and fill @desc. Most likely called from
566  * &trace_remote_callbacks.load_trace_buffer.
567  *
568  * Return: 0 on success, negative error code on failure.
569  */
570 int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size,
571 			      const struct cpumask *cpumask)
572 {
573 	unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1;
574 	void *desc_end = desc + desc_size;
575 	struct ring_buffer_desc *rb_desc;
576 	int cpu, ret = -ENOMEM;
577 
578 	if (desc_size < struct_size(desc, __data, 0))
579 		return -EINVAL;
580 
581 	desc->nr_cpus = 0;
582 	desc->struct_len = struct_size(desc, __data, 0);
583 
584 	rb_desc = (struct ring_buffer_desc *)&desc->__data[0];
585 
586 	for_each_cpu(cpu, cpumask) {
587 		unsigned int id;
588 
589 		if ((void *)rb_desc + struct_size(rb_desc, page_va, nr_pages) > desc_end) {
590 			ret = -EINVAL;
591 			goto err;
592 		}
593 
594 		rb_desc->cpu = cpu;
595 		rb_desc->nr_page_va = 0;
596 		rb_desc->meta_va = (unsigned long)__get_free_page(GFP_KERNEL);
597 		if (!rb_desc->meta_va)
598 			goto err;
599 
600 		for (id = 0; id < nr_pages; id++) {
601 			rb_desc->page_va[id] = (unsigned long)__get_free_page(GFP_KERNEL);
602 			if (!rb_desc->page_va[id])
603 				goto err;
604 
605 			rb_desc->nr_page_va++;
606 		}
607 		desc->nr_cpus++;
608 		desc->struct_len += offsetof(struct ring_buffer_desc, page_va);
609 		desc->struct_len += struct_size(rb_desc, page_va, rb_desc->nr_page_va);
610 		rb_desc = __next_ring_buffer_desc(rb_desc);
611 	}
612 
613 	return 0;
614 
615 err:
616 	trace_remote_free_buffer(desc);
617 	return ret;
618 }
619 EXPORT_SYMBOL_GPL(trace_remote_alloc_buffer);
620