xref: /linux/kernel/trace/trace_remote.c (revision 01f492e1817e858d1712f2489d0afbaa552f417b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2025 - Google LLC
4  * Author: Vincent Donnefort <vdonnefort@google.com>
5  */
6 
7 #include <linux/kstrtox.h>
8 #include <linux/lockdep.h>
9 #include <linux/mutex.h>
10 #include <linux/tracefs.h>
11 #include <linux/trace_remote.h>
12 #include <linux/trace_seq.h>
13 #include <linux/types.h>
14 
15 #include "trace.h"
16 
17 #define TRACEFS_DIR		"remotes"
18 #define TRACEFS_MODE_WRITE	0640
19 #define TRACEFS_MODE_READ	0440
20 
21 enum tri_type {
22 	TRI_CONSUMING,
23 	TRI_NONCONSUMING,
24 };
25 
26 struct trace_remote_iterator {
27 	struct trace_remote		*remote;
28 	struct trace_seq		seq;
29 	struct delayed_work		poll_work;
30 	unsigned long			lost_events;
31 	u64				ts;
32 	struct ring_buffer_iter		*rb_iter;
33 	struct ring_buffer_iter		**rb_iters;
34 	struct remote_event_hdr		*evt;
35 	int				cpu;
36 	int				evt_cpu;
37 	loff_t				pos;
38 	enum tri_type			type;
39 };
40 
41 struct trace_remote {
42 	struct trace_remote_callbacks	*cbs;
43 	void				*priv;
44 	struct trace_buffer		*trace_buffer;
45 	struct trace_buffer_desc	*trace_buffer_desc;
46 	struct dentry			*dentry;
47 	struct eventfs_inode		*eventfs;
48 	struct remote_event		*events;
49 	unsigned long			nr_events;
50 	unsigned long			trace_buffer_size;
51 	struct ring_buffer_remote	rb_remote;
52 	struct mutex			lock;
53 	struct rw_semaphore		reader_lock;
54 	struct rw_semaphore		*pcpu_reader_locks;
55 	unsigned int			nr_readers;
56 	unsigned int			poll_ms;
57 	bool				tracing_on;
58 };
59 
60 static bool trace_remote_loaded(struct trace_remote *remote)
61 {
62 	return !!remote->trace_buffer;
63 }
64 
65 static int trace_remote_load(struct trace_remote *remote)
66 {
67 	struct ring_buffer_remote *rb_remote = &remote->rb_remote;
68 	struct trace_buffer_desc *desc;
69 
70 	lockdep_assert_held(&remote->lock);
71 
72 	if (trace_remote_loaded(remote))
73 		return 0;
74 
75 	desc = remote->cbs->load_trace_buffer(remote->trace_buffer_size, remote->priv);
76 	if (IS_ERR(desc))
77 		return PTR_ERR(desc);
78 
79 	rb_remote->desc = desc;
80 	rb_remote->swap_reader_page = remote->cbs->swap_reader_page;
81 	rb_remote->priv = remote->priv;
82 	rb_remote->reset = remote->cbs->reset;
83 	remote->trace_buffer = ring_buffer_alloc_remote(rb_remote);
84 	if (!remote->trace_buffer) {
85 		remote->cbs->unload_trace_buffer(desc, remote->priv);
86 		return -ENOMEM;
87 	}
88 
89 	remote->trace_buffer_desc = desc;
90 
91 	return 0;
92 }
93 
94 static void trace_remote_try_unload(struct trace_remote *remote)
95 {
96 	lockdep_assert_held(&remote->lock);
97 
98 	if (!trace_remote_loaded(remote))
99 		return;
100 
101 	/* The buffer is being read or writable */
102 	if (remote->nr_readers || remote->tracing_on)
103 		return;
104 
105 	/* The buffer has readable data */
106 	if (!ring_buffer_empty(remote->trace_buffer))
107 		return;
108 
109 	ring_buffer_free(remote->trace_buffer);
110 	remote->trace_buffer = NULL;
111 	remote->cbs->unload_trace_buffer(remote->trace_buffer_desc, remote->priv);
112 }
113 
114 static int trace_remote_enable_tracing(struct trace_remote *remote)
115 {
116 	int ret;
117 
118 	lockdep_assert_held(&remote->lock);
119 
120 	if (remote->tracing_on)
121 		return 0;
122 
123 	ret = trace_remote_load(remote);
124 	if (ret)
125 		return ret;
126 
127 	ret = remote->cbs->enable_tracing(true, remote->priv);
128 	if (ret) {
129 		trace_remote_try_unload(remote);
130 		return ret;
131 	}
132 
133 	remote->tracing_on = true;
134 
135 	return 0;
136 }
137 
138 static int trace_remote_disable_tracing(struct trace_remote *remote)
139 {
140 	int ret;
141 
142 	lockdep_assert_held(&remote->lock);
143 
144 	if (!remote->tracing_on)
145 		return 0;
146 
147 	ret = remote->cbs->enable_tracing(false, remote->priv);
148 	if (ret)
149 		return ret;
150 
151 	ring_buffer_poll_remote(remote->trace_buffer, RING_BUFFER_ALL_CPUS);
152 	remote->tracing_on = false;
153 	trace_remote_try_unload(remote);
154 
155 	return 0;
156 }
157 
158 static void trace_remote_reset(struct trace_remote *remote, int cpu)
159 {
160 	lockdep_assert_held(&remote->lock);
161 
162 	if (!trace_remote_loaded(remote))
163 		return;
164 
165 	if (cpu == RING_BUFFER_ALL_CPUS)
166 		ring_buffer_reset(remote->trace_buffer);
167 	else
168 		ring_buffer_reset_cpu(remote->trace_buffer, cpu);
169 
170 	trace_remote_try_unload(remote);
171 }
172 
173 static ssize_t
174 tracing_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
175 {
176 	struct seq_file *seq = filp->private_data;
177 	struct trace_remote *remote = seq->private;
178 	unsigned long val;
179 	int ret;
180 
181 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
182 	if (ret)
183 		return ret;
184 
185 	guard(mutex)(&remote->lock);
186 
187 	ret = val ? trace_remote_enable_tracing(remote) : trace_remote_disable_tracing(remote);
188 	if (ret)
189 		return ret;
190 
191 	return cnt;
192 }
193 static int tracing_on_show(struct seq_file *s, void *unused)
194 {
195 	struct trace_remote *remote = s->private;
196 
197 	seq_printf(s, "%d\n", remote->tracing_on);
198 
199 	return 0;
200 }
201 DEFINE_SHOW_STORE_ATTRIBUTE(tracing_on);
202 
203 static ssize_t buffer_size_kb_write(struct file *filp, const char __user *ubuf, size_t cnt,
204 				    loff_t *ppos)
205 {
206 	struct seq_file *seq = filp->private_data;
207 	struct trace_remote *remote = seq->private;
208 	unsigned long val;
209 	int ret;
210 
211 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
212 	if (ret)
213 		return ret;
214 
215 	/* KiB to Bytes */
216 	if (!val || check_shl_overflow(val, 10, &val))
217 		return -EINVAL;
218 
219 	guard(mutex)(&remote->lock);
220 
221 	if (trace_remote_loaded(remote))
222 		return -EBUSY;
223 
224 	remote->trace_buffer_size = val;
225 
226 	return cnt;
227 }
228 
229 static int buffer_size_kb_show(struct seq_file *s, void *unused)
230 {
231 	struct trace_remote *remote = s->private;
232 
233 	seq_printf(s, "%lu (%s)\n", remote->trace_buffer_size >> 10,
234 		   trace_remote_loaded(remote) ? "loaded" : "unloaded");
235 
236 	return 0;
237 }
238 DEFINE_SHOW_STORE_ATTRIBUTE(buffer_size_kb);
239 
240 static int trace_remote_get(struct trace_remote *remote, int cpu)
241 {
242 	int ret;
243 
244 	if (remote->nr_readers == UINT_MAX)
245 		return -EBUSY;
246 
247 	ret = trace_remote_load(remote);
248 	if (ret)
249 		return ret;
250 
251 	if (cpu != RING_BUFFER_ALL_CPUS && !remote->pcpu_reader_locks) {
252 		int lock_cpu;
253 
254 		remote->pcpu_reader_locks = kcalloc(nr_cpu_ids, sizeof(*remote->pcpu_reader_locks),
255 						    GFP_KERNEL);
256 		if (!remote->pcpu_reader_locks) {
257 			trace_remote_try_unload(remote);
258 			return -ENOMEM;
259 		}
260 
261 		for_each_possible_cpu(lock_cpu)
262 			init_rwsem(&remote->pcpu_reader_locks[lock_cpu]);
263 	}
264 
265 	remote->nr_readers++;
266 
267 	return 0;
268 }
269 
270 static void trace_remote_put(struct trace_remote *remote)
271 {
272 	if (WARN_ON(!remote->nr_readers))
273 		return;
274 
275 	remote->nr_readers--;
276 	if (remote->nr_readers)
277 		return;
278 
279 	kfree(remote->pcpu_reader_locks);
280 	remote->pcpu_reader_locks = NULL;
281 
282 	trace_remote_try_unload(remote);
283 }
284 
285 static bool trace_remote_has_cpu(struct trace_remote *remote, int cpu)
286 {
287 	if (cpu == RING_BUFFER_ALL_CPUS)
288 		return true;
289 
290 	return ring_buffer_poll_remote(remote->trace_buffer, cpu) == 0;
291 }
292 
293 static void __poll_remote(struct work_struct *work)
294 {
295 	struct delayed_work *dwork = to_delayed_work(work);
296 	struct trace_remote_iterator *iter;
297 
298 	iter = container_of(dwork, struct trace_remote_iterator, poll_work);
299 	ring_buffer_poll_remote(iter->remote->trace_buffer, iter->cpu);
300 	schedule_delayed_work((struct delayed_work *)work,
301 			      msecs_to_jiffies(iter->remote->poll_ms));
302 }
303 
304 static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
305 {
306 	if (cpu != RING_BUFFER_ALL_CPUS) {
307 		ring_buffer_read_finish(iter->rb_iter);
308 		return;
309 	}
310 
311 	for_each_possible_cpu(cpu) {
312 		if (iter->rb_iters[cpu])
313 			ring_buffer_read_finish(iter->rb_iters[cpu]);
314 	}
315 
316 	kfree(iter->rb_iters);
317 }
318 
319 static int __alloc_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
320 {
321 	if (cpu != RING_BUFFER_ALL_CPUS) {
322 		iter->rb_iter = ring_buffer_read_start(iter->remote->trace_buffer, cpu, GFP_KERNEL);
323 
324 		return iter->rb_iter ? 0 : -ENOMEM;
325 	}
326 
327 	iter->rb_iters = kcalloc(nr_cpu_ids, sizeof(*iter->rb_iters), GFP_KERNEL);
328 	if (!iter->rb_iters)
329 		return -ENOMEM;
330 
331 	for_each_possible_cpu(cpu) {
332 		iter->rb_iters[cpu] = ring_buffer_read_start(iter->remote->trace_buffer, cpu,
333 							     GFP_KERNEL);
334 		if (!iter->rb_iters[cpu]) {
335 			/* This CPU isn't part of trace_buffer. Skip it */
336 			if (!trace_remote_has_cpu(iter->remote, cpu))
337 				continue;
338 
339 			__free_ring_buffer_iter(iter, RING_BUFFER_ALL_CPUS);
340 			return -ENOMEM;
341 		}
342 	}
343 
344 	return 0;
345 }
346 
347 static struct trace_remote_iterator
348 *trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type)
349 {
350 	struct trace_remote_iterator *iter = NULL;
351 	int ret;
352 
353 	lockdep_assert_held(&remote->lock);
354 
355 	if (type == TRI_NONCONSUMING && !trace_remote_loaded(remote))
356 		return NULL;
357 
358 	ret = trace_remote_get(remote, cpu);
359 	if (ret)
360 		return ERR_PTR(ret);
361 
362 	if (!trace_remote_has_cpu(remote, cpu)) {
363 		ret = -ENODEV;
364 		goto err;
365 	}
366 
367 	iter = kzalloc_obj(*iter);
368 	if (iter) {
369 		iter->remote = remote;
370 		iter->cpu = cpu;
371 		iter->type = type;
372 		trace_seq_init(&iter->seq);
373 
374 		switch (type) {
375 		case TRI_CONSUMING:
376 			ring_buffer_poll_remote(remote->trace_buffer, cpu);
377 			INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
378 			schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms));
379 			break;
380 		case TRI_NONCONSUMING:
381 			ret = __alloc_ring_buffer_iter(iter, cpu);
382 			break;
383 		}
384 
385 		if (ret)
386 			goto err;
387 
388 		return iter;
389 	}
390 	ret = -ENOMEM;
391 
392 err:
393 	kfree(iter);
394 	trace_remote_put(remote);
395 
396 	return ERR_PTR(ret);
397 }
398 
399 static void trace_remote_iter_free(struct trace_remote_iterator *iter)
400 {
401 	struct trace_remote *remote;
402 
403 	if (!iter)
404 		return;
405 
406 	remote = iter->remote;
407 
408 	lockdep_assert_held(&remote->lock);
409 
410 	switch (iter->type) {
411 	case TRI_CONSUMING:
412 		cancel_delayed_work_sync(&iter->poll_work);
413 		break;
414 	case TRI_NONCONSUMING:
415 		__free_ring_buffer_iter(iter, iter->cpu);
416 		break;
417 	}
418 
419 	kfree(iter);
420 	trace_remote_put(remote);
421 }
422 
423 static void trace_remote_iter_read_start(struct trace_remote_iterator *iter)
424 {
425 	struct trace_remote *remote = iter->remote;
426 	int cpu = iter->cpu;
427 
428 	/* Acquire global reader lock */
429 	if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
430 		down_write(&remote->reader_lock);
431 	else
432 		down_read(&remote->reader_lock);
433 
434 	if (cpu == RING_BUFFER_ALL_CPUS)
435 		return;
436 
437 	/*
438 	 * No need for the remote lock here, iter holds a reference on
439 	 * remote->nr_readers
440 	 */
441 
442 	/* Get the per-CPU one */
443 	if (WARN_ON_ONCE(!remote->pcpu_reader_locks))
444 		return;
445 
446 	if (iter->type == TRI_CONSUMING)
447 		down_write(&remote->pcpu_reader_locks[cpu]);
448 	else
449 		down_read(&remote->pcpu_reader_locks[cpu]);
450 }
451 
452 static void trace_remote_iter_read_finished(struct trace_remote_iterator *iter)
453 {
454 	struct trace_remote *remote = iter->remote;
455 	int cpu = iter->cpu;
456 
457 	/* Release per-CPU reader lock */
458 	if (cpu != RING_BUFFER_ALL_CPUS) {
459 		/*
460 		 * No need for the remote lock here, iter holds a reference on
461 		 * remote->nr_readers
462 		 */
463 		if (iter->type == TRI_CONSUMING)
464 			up_write(&remote->pcpu_reader_locks[cpu]);
465 		else
466 			up_read(&remote->pcpu_reader_locks[cpu]);
467 	}
468 
469 	/* Release global reader lock */
470 	if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
471 		up_write(&remote->reader_lock);
472 	else
473 		up_read(&remote->reader_lock);
474 }
475 
476 static struct ring_buffer_iter *__get_rb_iter(struct trace_remote_iterator *iter, int cpu)
477 {
478 	return iter->cpu != RING_BUFFER_ALL_CPUS ? iter->rb_iter : iter->rb_iters[cpu];
479 }
480 
481 static struct ring_buffer_event *
482 __peek_event(struct trace_remote_iterator *iter, int cpu, u64 *ts, unsigned long *lost_events)
483 {
484 	struct ring_buffer_event *rb_evt;
485 	struct ring_buffer_iter *rb_iter;
486 
487 	switch (iter->type) {
488 	case TRI_CONSUMING:
489 		return ring_buffer_peek(iter->remote->trace_buffer, cpu, ts, lost_events);
490 	case TRI_NONCONSUMING:
491 		rb_iter = __get_rb_iter(iter, cpu);
492 		if (!rb_iter)
493 			return NULL;
494 
495 		rb_evt = ring_buffer_iter_peek(rb_iter, ts);
496 		if (!rb_evt)
497 			return NULL;
498 
499 		*lost_events = ring_buffer_iter_dropped(rb_iter);
500 
501 		return rb_evt;
502 	}
503 
504 	return NULL;
505 }
506 
507 static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
508 {
509 	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
510 	struct ring_buffer_event *rb_evt;
511 	int cpu = iter->cpu;
512 
513 	if (cpu != RING_BUFFER_ALL_CPUS) {
514 		if (ring_buffer_empty_cpu(trace_buffer, cpu))
515 			return false;
516 
517 		rb_evt = __peek_event(iter, cpu, &iter->ts, &iter->lost_events);
518 		if (!rb_evt)
519 			return false;
520 
521 		iter->evt_cpu = cpu;
522 		iter->evt = ring_buffer_event_data(rb_evt);
523 		return true;
524 	}
525 
526 	iter->ts = U64_MAX;
527 	for_each_possible_cpu(cpu) {
528 		unsigned long lost_events;
529 		u64 ts;
530 
531 		if (ring_buffer_empty_cpu(trace_buffer, cpu))
532 			continue;
533 
534 		rb_evt = __peek_event(iter, cpu, &ts, &lost_events);
535 		if (!rb_evt)
536 			continue;
537 
538 		if (ts >= iter->ts)
539 			continue;
540 
541 		iter->ts = ts;
542 		iter->evt_cpu = cpu;
543 		iter->evt = ring_buffer_event_data(rb_evt);
544 		iter->lost_events = lost_events;
545 	}
546 
547 	return iter->ts != U64_MAX;
548 }
549 
550 static void trace_remote_iter_move(struct trace_remote_iterator *iter)
551 {
552 	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
553 
554 	switch (iter->type) {
555 	case TRI_CONSUMING:
556 		ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
557 		break;
558 	case TRI_NONCONSUMING:
559 		ring_buffer_iter_advance(__get_rb_iter(iter, iter->evt_cpu));
560 		break;
561 	}
562 }
563 
564 static struct remote_event *trace_remote_find_event(struct trace_remote *remote, unsigned short id);
565 
566 static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
567 {
568 	struct remote_event *evt;
569 	unsigned long usecs_rem;
570 	u64 ts = iter->ts;
571 
572 	if (iter->lost_events)
573 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
574 				 iter->evt_cpu, iter->lost_events);
575 
576 	do_div(ts, 1000);
577 	usecs_rem = do_div(ts, USEC_PER_SEC);
578 
579 	trace_seq_printf(&iter->seq, "[%03d]\t%5llu.%06lu: ", iter->evt_cpu,
580 			 ts, usecs_rem);
581 
582 	evt = trace_remote_find_event(iter->remote, iter->evt->id);
583 	if (!evt)
584 		trace_seq_printf(&iter->seq, "UNKNOWN id=%d\n", iter->evt->id);
585 	else
586 		evt->print(iter->evt, &iter->seq);
587 
588 	return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0;
589 }
590 
591 static int trace_pipe_open(struct inode *inode, struct file *filp)
592 {
593 	struct trace_remote *remote = inode->i_private;
594 	struct trace_remote_iterator *iter;
595 	int cpu = tracing_get_cpu(inode);
596 
597 	guard(mutex)(&remote->lock);
598 
599 	iter = trace_remote_iter(remote, cpu, TRI_CONSUMING);
600 	if (IS_ERR(iter))
601 		return PTR_ERR(iter);
602 
603 	filp->private_data = iter;
604 
605 	return IS_ERR(iter) ? PTR_ERR(iter) : 0;
606 }
607 
608 static int trace_pipe_release(struct inode *inode, struct file *filp)
609 {
610 	struct trace_remote_iterator *iter = filp->private_data;
611 	struct trace_remote *remote = iter->remote;
612 
613 	guard(mutex)(&remote->lock);
614 
615 	trace_remote_iter_free(iter);
616 
617 	return 0;
618 }
619 
620 static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
621 {
622 	struct trace_remote_iterator *iter = filp->private_data;
623 	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
624 	int ret;
625 
626 copy_to_user:
627 	ret = trace_seq_to_user(&iter->seq, ubuf, cnt);
628 	if (ret != -EBUSY)
629 		return ret;
630 
631 	trace_seq_init(&iter->seq);
632 
633 	ret = ring_buffer_wait(trace_buffer, iter->cpu, 0, NULL, NULL);
634 	if (ret < 0)
635 		return ret;
636 
637 	trace_remote_iter_read_start(iter);
638 
639 	while (trace_remote_iter_read_event(iter)) {
640 		int prev_len = iter->seq.seq.len;
641 
642 		if (trace_remote_iter_print_event(iter)) {
643 			iter->seq.seq.len = prev_len;
644 			break;
645 		}
646 
647 		trace_remote_iter_move(iter);
648 	}
649 
650 	trace_remote_iter_read_finished(iter);
651 
652 	goto copy_to_user;
653 }
654 
655 static const struct file_operations trace_pipe_fops = {
656 	.open		= trace_pipe_open,
657 	.read		= trace_pipe_read,
658 	.release	= trace_pipe_release,
659 };
660 
661 static void *trace_next(struct seq_file *m, void *v, loff_t *pos)
662 {
663 	struct trace_remote_iterator *iter = m->private;
664 
665 	++*pos;
666 
667 	if (!iter || !trace_remote_iter_read_event(iter))
668 		return NULL;
669 
670 	trace_remote_iter_move(iter);
671 	iter->pos++;
672 
673 	return iter;
674 }
675 
676 static void *trace_start(struct seq_file *m, loff_t *pos)
677 {
678 	struct trace_remote_iterator *iter = m->private;
679 	loff_t i;
680 
681 	if (!iter)
682 		return NULL;
683 
684 	trace_remote_iter_read_start(iter);
685 
686 	if (!*pos) {
687 		iter->pos = -1;
688 		return trace_next(m, NULL, &i);
689 	}
690 
691 	i = iter->pos;
692 	while (i < *pos) {
693 		iter = trace_next(m, NULL, &i);
694 		if (!iter)
695 			return NULL;
696 	}
697 
698 	return iter;
699 }
700 
701 static int trace_show(struct seq_file *m, void *v)
702 {
703 	struct trace_remote_iterator *iter = v;
704 
705 	trace_seq_init(&iter->seq);
706 
707 	if (trace_remote_iter_print_event(iter)) {
708 		seq_printf(m, "[EVENT %d PRINT TOO BIG]\n", iter->evt->id);
709 		return 0;
710 	}
711 
712 	return trace_print_seq(m, &iter->seq);
713 }
714 
715 static void trace_stop(struct seq_file *m, void *v)
716 {
717 	struct trace_remote_iterator *iter = m->private;
718 
719 	if (iter)
720 		trace_remote_iter_read_finished(iter);
721 }
722 
723 static const struct seq_operations trace_sops = {
724 	.start		= trace_start,
725 	.next		= trace_next,
726 	.show		= trace_show,
727 	.stop		= trace_stop,
728 };
729 
730 static int trace_open(struct inode *inode, struct file *filp)
731 {
732 	struct trace_remote *remote = inode->i_private;
733 	struct trace_remote_iterator *iter = NULL;
734 	int cpu = tracing_get_cpu(inode);
735 	int ret;
736 
737 	if (!(filp->f_mode & FMODE_READ))
738 		return 0;
739 
740 	guard(mutex)(&remote->lock);
741 
742 	iter = trace_remote_iter(remote, cpu, TRI_NONCONSUMING);
743 	if (IS_ERR(iter))
744 		return PTR_ERR(iter);
745 
746 	ret = seq_open(filp, &trace_sops);
747 	if (ret) {
748 		trace_remote_iter_free(iter);
749 		return ret;
750 	}
751 
752 	((struct seq_file *)filp->private_data)->private = (void *)iter;
753 
754 	return 0;
755 }
756 
757 static int trace_release(struct inode *inode, struct file *filp)
758 {
759 	struct trace_remote_iterator *iter;
760 
761 	if (!(filp->f_mode & FMODE_READ))
762 		return 0;
763 
764 	iter = ((struct seq_file *)filp->private_data)->private;
765 	seq_release(inode, filp);
766 
767 	if (!iter)
768 		return 0;
769 
770 	guard(mutex)(&iter->remote->lock);
771 
772 	trace_remote_iter_free(iter);
773 
774 	return 0;
775 }
776 
777 static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
778 {
779 	struct inode *inode = file_inode(filp);
780 	struct trace_remote *remote = inode->i_private;
781 	int cpu = tracing_get_cpu(inode);
782 
783 	guard(mutex)(&remote->lock);
784 
785 	trace_remote_reset(remote, cpu);
786 
787 	return cnt;
788 }
789 
790 static const struct file_operations trace_fops = {
791 	.open		= trace_open,
792 	.write		= trace_write,
793 	.read		= seq_read,
794 	.read_iter	= seq_read_iter,
795 	.release	= trace_release,
796 };
797 
798 static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote)
799 {
800 	struct dentry *remote_d, *percpu_d, *d;
801 	static struct dentry *root;
802 	static DEFINE_MUTEX(lock);
803 	bool root_inited = false;
804 	int cpu;
805 
806 	guard(mutex)(&lock);
807 
808 	if (!root) {
809 		root = tracefs_create_dir(TRACEFS_DIR, NULL);
810 		if (!root) {
811 			pr_err("Failed to create tracefs dir "TRACEFS_DIR"\n");
812 			return -ENOMEM;
813 		}
814 		root_inited = true;
815 	}
816 
817 	remote_d = tracefs_create_dir(name, root);
818 	if (!remote_d) {
819 		pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/\n", name);
820 		goto err;
821 	}
822 
823 	d = trace_create_file("tracing_on", TRACEFS_MODE_WRITE, remote_d, remote, &tracing_on_fops);
824 	if (!d)
825 		goto err;
826 
827 	d = trace_create_file("buffer_size_kb", TRACEFS_MODE_WRITE, remote_d, remote,
828 			      &buffer_size_kb_fops);
829 	if (!d)
830 		goto err;
831 
832 	d = trace_create_file("trace_pipe", TRACEFS_MODE_READ, remote_d, remote, &trace_pipe_fops);
833 	if (!d)
834 		goto err;
835 
836 	d = trace_create_file("trace", TRACEFS_MODE_WRITE, remote_d, remote, &trace_fops);
837 	if (!d)
838 		goto err;
839 
840 	percpu_d = tracefs_create_dir("per_cpu", remote_d);
841 	if (!percpu_d) {
842 		pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/per_cpu/\n", name);
843 		goto err;
844 	}
845 
846 	for_each_possible_cpu(cpu) {
847 		struct dentry *cpu_d;
848 		char cpu_name[16];
849 
850 		snprintf(cpu_name, sizeof(cpu_name), "cpu%d", cpu);
851 		cpu_d = tracefs_create_dir(cpu_name, percpu_d);
852 		if (!cpu_d) {
853 			pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/percpu/cpu%d\n",
854 			       name, cpu);
855 			goto err;
856 		}
857 
858 		d = trace_create_cpu_file("trace_pipe", TRACEFS_MODE_READ, cpu_d, remote, cpu,
859 					  &trace_pipe_fops);
860 		if (!d)
861 			goto err;
862 
863 		d = trace_create_cpu_file("trace", TRACEFS_MODE_WRITE, cpu_d, remote, cpu,
864 					  &trace_fops);
865 		if (!d)
866 			goto err;
867 	}
868 
869 	remote->dentry = remote_d;
870 
871 	return 0;
872 
873 err:
874 	if (root_inited) {
875 		tracefs_remove(root);
876 		root = NULL;
877 	} else {
878 		tracefs_remove(remote_d);
879 	}
880 
881 	return -ENOMEM;
882 }
883 
884 static int trace_remote_register_events(const char *remote_name, struct trace_remote *remote,
885 					struct remote_event *events, size_t nr_events);
886 
887 /**
888  * trace_remote_register() - Register a Tracefs remote
889  * @name:	Name of the remote, used for the Tracefs remotes/ directory.
890  * @cbs:	Set of callbacks used to control the remote.
891  * @priv:	Private data, passed to each callback from @cbs.
892  * @events:	Array of events. &remote_event.name and &remote_event.id must be
893  *		filled by the caller.
894  * @nr_events:	Number of events in the @events array.
895  *
896  * A trace remote is an entity, outside of the kernel (most likely firmware or
897  * hypervisor) capable of writing events into a Tracefs compatible ring-buffer.
898  * The kernel would then act as a reader.
899  *
900  * The registered remote will be found under the Tracefs directory
901  * remotes/<name>.
902  *
903  * Return: 0 on success, negative error code on failure.
904  */
905 int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv,
906 			  struct remote_event *events, size_t nr_events)
907 {
908 	struct trace_remote *remote;
909 	int ret;
910 
911 	remote = kzalloc_obj(*remote);
912 	if (!remote)
913 		return -ENOMEM;
914 
915 	remote->cbs = cbs;
916 	remote->priv = priv;
917 	remote->trace_buffer_size = 7 << 10;
918 	remote->poll_ms = 100;
919 	mutex_init(&remote->lock);
920 	init_rwsem(&remote->reader_lock);
921 
922 	if (trace_remote_init_tracefs(name, remote)) {
923 		kfree(remote);
924 		return -ENOMEM;
925 	}
926 
927 	ret = trace_remote_register_events(name, remote, events, nr_events);
928 	if (ret) {
929 		pr_err("Failed to register events for trace remote '%s' (%d)\n",
930 		       name, ret);
931 		return ret;
932 	}
933 
934 	ret = cbs->init ? cbs->init(remote->dentry, priv) : 0;
935 	if (ret)
936 		pr_err("Init failed for trace remote '%s' (%d)\n", name, ret);
937 
938 	return ret;
939 }
940 EXPORT_SYMBOL_GPL(trace_remote_register);
941 
942 /**
943  * trace_remote_free_buffer() - Free trace buffer allocated with trace_remote_alloc_buffer()
944  * @desc:	Descriptor of the per-CPU ring-buffers, originally filled by
945  *		trace_remote_alloc_buffer()
946  *
947  * Most likely called from &trace_remote_callbacks.unload_trace_buffer.
948  */
949 void trace_remote_free_buffer(struct trace_buffer_desc *desc)
950 {
951 	struct ring_buffer_desc *rb_desc;
952 	int cpu;
953 
954 	for_each_ring_buffer_desc(rb_desc, cpu, desc) {
955 		unsigned int id;
956 
957 		free_page(rb_desc->meta_va);
958 
959 		for (id = 0; id < rb_desc->nr_page_va; id++)
960 			free_page(rb_desc->page_va[id]);
961 	}
962 }
963 EXPORT_SYMBOL_GPL(trace_remote_free_buffer);
964 
965 /**
966  * trace_remote_alloc_buffer() - Dynamically allocate a trace buffer
967  * @desc:		Uninitialized trace_buffer_desc
968  * @desc_size:		Size of the trace_buffer_desc. Must be at least equal to
969  *			trace_buffer_desc_size()
970  * @buffer_size:	Size in bytes of each per-CPU ring-buffer
971  * @cpumask:		CPUs to allocate a ring-buffer for
972  *
973  * Helper to dynamically allocate a set of pages (enough to cover @buffer_size)
974  * for each CPU from @cpumask and fill @desc. Most likely called from
975  * &trace_remote_callbacks.load_trace_buffer.
976  *
977  * Return: 0 on success, negative error code on failure.
978  */
979 int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size,
980 			      const struct cpumask *cpumask)
981 {
982 	unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1;
983 	void *desc_end = desc + desc_size;
984 	struct ring_buffer_desc *rb_desc;
985 	int cpu, ret = -ENOMEM;
986 
987 	if (desc_size < struct_size(desc, __data, 0))
988 		return -EINVAL;
989 
990 	desc->nr_cpus = 0;
991 	desc->struct_len = struct_size(desc, __data, 0);
992 
993 	rb_desc = (struct ring_buffer_desc *)&desc->__data[0];
994 
995 	for_each_cpu(cpu, cpumask) {
996 		unsigned int id;
997 
998 		if ((void *)rb_desc + struct_size(rb_desc, page_va, nr_pages) > desc_end) {
999 			ret = -EINVAL;
1000 			goto err;
1001 		}
1002 
1003 		rb_desc->cpu = cpu;
1004 		rb_desc->nr_page_va = 0;
1005 		rb_desc->meta_va = (unsigned long)__get_free_page(GFP_KERNEL);
1006 		if (!rb_desc->meta_va)
1007 			goto err;
1008 
1009 		for (id = 0; id < nr_pages; id++) {
1010 			rb_desc->page_va[id] = (unsigned long)__get_free_page(GFP_KERNEL);
1011 			if (!rb_desc->page_va[id])
1012 				goto err;
1013 
1014 			rb_desc->nr_page_va++;
1015 		}
1016 		desc->nr_cpus++;
1017 		desc->struct_len += offsetof(struct ring_buffer_desc, page_va);
1018 		desc->struct_len += struct_size(rb_desc, page_va, rb_desc->nr_page_va);
1019 		rb_desc = __next_ring_buffer_desc(rb_desc);
1020 	}
1021 
1022 	return 0;
1023 
1024 err:
1025 	trace_remote_free_buffer(desc);
1026 	return ret;
1027 }
1028 EXPORT_SYMBOL_GPL(trace_remote_alloc_buffer);
1029 
1030 static int
1031 trace_remote_enable_event(struct trace_remote *remote, struct remote_event *evt, bool enable)
1032 {
1033 	int ret;
1034 
1035 	lockdep_assert_held(&remote->lock);
1036 
1037 	if (evt->enabled == enable)
1038 		return 0;
1039 
1040 	ret = remote->cbs->enable_event(evt->id, enable, remote->priv);
1041 	if (ret)
1042 		return ret;
1043 
1044 	evt->enabled = enable;
1045 
1046 	return 0;
1047 }
1048 
1049 static int remote_event_enable_show(struct seq_file *s, void *unused)
1050 {
1051 	struct remote_event *evt = s->private;
1052 
1053 	seq_printf(s, "%d\n", evt->enabled);
1054 
1055 	return 0;
1056 }
1057 
1058 static ssize_t remote_event_enable_write(struct file *filp, const char __user *ubuf,
1059 					 size_t count, loff_t *ppos)
1060 {
1061 	struct seq_file *seq = filp->private_data;
1062 	struct remote_event *evt = seq->private;
1063 	struct trace_remote *remote = evt->remote;
1064 	u8 enable;
1065 	int ret;
1066 
1067 	ret = kstrtou8_from_user(ubuf, count, 10, &enable);
1068 	if (ret)
1069 		return ret;
1070 
1071 	guard(mutex)(&remote->lock);
1072 
1073 	ret = trace_remote_enable_event(remote, evt, enable);
1074 	if (ret)
1075 		return ret;
1076 
1077 	return count;
1078 }
1079 DEFINE_SHOW_STORE_ATTRIBUTE(remote_event_enable);
1080 
1081 static int remote_event_id_show(struct seq_file *s, void *unused)
1082 {
1083 	struct remote_event *evt = s->private;
1084 
1085 	seq_printf(s, "%d\n", evt->id);
1086 
1087 	return 0;
1088 }
1089 DEFINE_SHOW_ATTRIBUTE(remote_event_id);
1090 
1091 static int remote_event_format_show(struct seq_file *s, void *unused)
1092 {
1093 	size_t offset = sizeof(struct remote_event_hdr);
1094 	struct remote_event *evt = s->private;
1095 	struct trace_event_fields *field;
1096 
1097 	seq_printf(s, "name: %s\n", evt->name);
1098 	seq_printf(s, "ID: %d\n", evt->id);
1099 	seq_puts(s,
1100 		 "format:\n\tfield:unsigned short common_type;\toffset:0;\tsize:2;\tsigned:0;\n\n");
1101 
1102 	field = &evt->fields[0];
1103 	while (field->name) {
1104 		seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%u;\tsigned:%d;\n",
1105 			   field->type, field->name, offset, field->size,
1106 			   field->is_signed);
1107 		offset += field->size;
1108 		field++;
1109 	}
1110 
1111 	if (field != &evt->fields[0])
1112 		seq_puts(s, "\n");
1113 
1114 	seq_printf(s, "print fmt: %s\n", evt->print_fmt);
1115 
1116 	return 0;
1117 }
1118 DEFINE_SHOW_ATTRIBUTE(remote_event_format);
1119 
1120 static int remote_event_callback(const char *name, umode_t *mode, void **data,
1121 				 const struct file_operations **fops)
1122 {
1123 	if (!strcmp(name, "enable")) {
1124 		*mode = TRACEFS_MODE_WRITE;
1125 		*fops = &remote_event_enable_fops;
1126 		return 1;
1127 	}
1128 
1129 	if (!strcmp(name, "id")) {
1130 		*mode = TRACEFS_MODE_READ;
1131 		*fops = &remote_event_id_fops;
1132 		return 1;
1133 	}
1134 
1135 	if (!strcmp(name, "format")) {
1136 		*mode = TRACEFS_MODE_READ;
1137 		*fops = &remote_event_format_fops;
1138 		return 1;
1139 	}
1140 
1141 	return 0;
1142 }
1143 
1144 static ssize_t remote_events_dir_enable_write(struct file *filp, const char __user *ubuf,
1145 					      size_t count, loff_t *ppos)
1146 {
1147 	struct trace_remote *remote = file_inode(filp)->i_private;
1148 	int i, ret;
1149 	u8 enable;
1150 
1151 	ret = kstrtou8_from_user(ubuf, count, 10, &enable);
1152 	if (ret)
1153 		return ret;
1154 
1155 	guard(mutex)(&remote->lock);
1156 
1157 	for (i = 0; i < remote->nr_events; i++) {
1158 		struct remote_event *evt = &remote->events[i];
1159 
1160 		trace_remote_enable_event(remote, evt, enable);
1161 	}
1162 
1163 	return count;
1164 }
1165 
1166 static ssize_t remote_events_dir_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1167 					     loff_t *ppos)
1168 {
1169 	struct trace_remote *remote = file_inode(filp)->i_private;
1170 	const char enabled_char[] = {'0', '1', 'X'};
1171 	char enabled_str[] = " \n";
1172 	int i, enabled = -1;
1173 
1174 	guard(mutex)(&remote->lock);
1175 
1176 	for (i = 0; i < remote->nr_events; i++) {
1177 		struct remote_event *evt = &remote->events[i];
1178 
1179 		if (enabled == -1) {
1180 			enabled = evt->enabled;
1181 		} else if (enabled != evt->enabled) {
1182 			enabled = 2;
1183 			break;
1184 		}
1185 	}
1186 
1187 	enabled_str[0] = enabled_char[enabled == -1 ? 0 : enabled];
1188 
1189 	return simple_read_from_buffer(ubuf, cnt, ppos, enabled_str, 2);
1190 }
1191 
1192 static const struct file_operations remote_events_dir_enable_fops = {
1193 	.write = remote_events_dir_enable_write,
1194 	.read = remote_events_dir_enable_read,
1195 };
1196 
1197 static ssize_t
1198 remote_events_dir_header_page_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1199 {
1200 	struct trace_seq *s;
1201 	int ret;
1202 
1203 	s = kmalloc(sizeof(*s), GFP_KERNEL);
1204 	if (!s)
1205 		return -ENOMEM;
1206 
1207 	trace_seq_init(s);
1208 
1209 	ring_buffer_print_page_header(NULL, s);
1210 	ret = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s));
1211 	kfree(s);
1212 
1213 	return ret;
1214 }
1215 
1216 static const struct file_operations remote_events_dir_header_page_fops = {
1217 	.read = remote_events_dir_header_page_read,
1218 };
1219 
1220 static ssize_t
1221 remote_events_dir_header_event_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1222 {
1223 	struct trace_seq *s;
1224 	int ret;
1225 
1226 	s = kmalloc(sizeof(*s), GFP_KERNEL);
1227 	if (!s)
1228 		return -ENOMEM;
1229 
1230 	trace_seq_init(s);
1231 
1232 	ring_buffer_print_entry_header(s);
1233 	ret = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s));
1234 	kfree(s);
1235 
1236 	return ret;
1237 }
1238 
1239 static const struct file_operations remote_events_dir_header_event_fops = {
1240 	.read = remote_events_dir_header_event_read,
1241 };
1242 
1243 static int remote_events_dir_callback(const char *name, umode_t *mode, void **data,
1244 				      const struct file_operations **fops)
1245 {
1246 	if (!strcmp(name, "enable")) {
1247 		*mode = TRACEFS_MODE_WRITE;
1248 		*fops = &remote_events_dir_enable_fops;
1249 		return 1;
1250 	}
1251 
1252 	if (!strcmp(name, "header_page")) {
1253 		*mode = TRACEFS_MODE_READ;
1254 		*fops = &remote_events_dir_header_page_fops;
1255 		return 1;
1256 	}
1257 
1258 	if (!strcmp(name, "header_event")) {
1259 		*mode = TRACEFS_MODE_READ;
1260 		*fops = &remote_events_dir_header_event_fops;
1261 		return 1;
1262 	}
1263 
1264 	return 0;
1265 }
1266 
1267 static int trace_remote_init_eventfs(const char *remote_name, struct trace_remote *remote,
1268 				     struct remote_event *evt)
1269 {
1270 	struct eventfs_inode *eventfs = remote->eventfs;
1271 	static struct eventfs_entry dir_entries[] = {
1272 		{
1273 			.name		= "enable",
1274 			.callback	= remote_events_dir_callback,
1275 		}, {
1276 			.name		= "header_page",
1277 			.callback	= remote_events_dir_callback,
1278 		}, {
1279 			.name		= "header_event",
1280 			.callback	= remote_events_dir_callback,
1281 		}
1282 	};
1283 	static struct eventfs_entry entries[] = {
1284 		{
1285 			.name		= "enable",
1286 			.callback	= remote_event_callback,
1287 		}, {
1288 			.name		= "id",
1289 			.callback	= remote_event_callback,
1290 		}, {
1291 			.name		= "format",
1292 			.callback	= remote_event_callback,
1293 		}
1294 	};
1295 	bool eventfs_create = false;
1296 
1297 	if (!eventfs) {
1298 		eventfs = eventfs_create_events_dir("events", remote->dentry, dir_entries,
1299 						    ARRAY_SIZE(dir_entries), remote);
1300 		if (IS_ERR(eventfs))
1301 			return PTR_ERR(eventfs);
1302 
1303 		/*
1304 		 * Create similar hierarchy as local events even if a single system is supported at
1305 		 * the moment
1306 		 */
1307 		eventfs = eventfs_create_dir(remote_name, eventfs, NULL, 0, NULL);
1308 		if (IS_ERR(eventfs))
1309 			return PTR_ERR(eventfs);
1310 
1311 		remote->eventfs = eventfs;
1312 		eventfs_create = true;
1313 	}
1314 
1315 	eventfs = eventfs_create_dir(evt->name, eventfs, entries, ARRAY_SIZE(entries), evt);
1316 	if (IS_ERR(eventfs)) {
1317 		if (eventfs_create) {
1318 			eventfs_remove_events_dir(remote->eventfs);
1319 			remote->eventfs = NULL;
1320 		}
1321 		return PTR_ERR(eventfs);
1322 	}
1323 
1324 	return 0;
1325 }
1326 
1327 static int trace_remote_attach_events(struct trace_remote *remote, struct remote_event *events,
1328 				      size_t nr_events)
1329 {
1330 	int i;
1331 
1332 	for (i = 0; i < nr_events; i++) {
1333 		struct remote_event *evt = &events[i];
1334 
1335 		if (evt->remote)
1336 			return -EEXIST;
1337 
1338 		evt->remote = remote;
1339 
1340 		/* We need events to be sorted for efficient lookup */
1341 		if (i && evt->id <= events[i - 1].id)
1342 			return -EINVAL;
1343 	}
1344 
1345 	remote->events = events;
1346 	remote->nr_events = nr_events;
1347 
1348 	return 0;
1349 }
1350 
1351 static int trace_remote_register_events(const char *remote_name, struct trace_remote *remote,
1352 					struct remote_event *events, size_t nr_events)
1353 {
1354 	int i, ret;
1355 
1356 	ret = trace_remote_attach_events(remote, events, nr_events);
1357 	if (ret)
1358 		return ret;
1359 
1360 	for (i = 0; i < nr_events; i++) {
1361 		struct remote_event *evt = &events[i];
1362 
1363 		ret = trace_remote_init_eventfs(remote_name, remote, evt);
1364 		if (ret)
1365 			pr_warn("Failed to init eventfs for event '%s' (%d)",
1366 				evt->name, ret);
1367 	}
1368 
1369 	return 0;
1370 }
1371 
1372 static int __cmp_events(const void *key, const void *data)
1373 {
1374 	const struct remote_event *evt = data;
1375 	int id = (int)((long)key);
1376 
1377 	return id - (int)evt->id;
1378 }
1379 
1380 static struct remote_event *trace_remote_find_event(struct trace_remote *remote, unsigned short id)
1381 {
1382 	return bsearch((const void *)(unsigned long)id, remote->events, remote->nr_events,
1383 		       sizeof(*remote->events), __cmp_events);
1384 }
1385