xref: /linux/kernel/trace/trace_remote.c (revision 330b0cceb30634864d1e9c661eb5524c52d70c07)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2025 - Google LLC
4  * Author: Vincent Donnefort <vdonnefort@google.com>
5  */
6 
7 #include <linux/kstrtox.h>
8 #include <linux/lockdep.h>
9 #include <linux/mutex.h>
10 #include <linux/tracefs.h>
11 #include <linux/trace_remote.h>
12 #include <linux/trace_seq.h>
13 #include <linux/types.h>
14 
15 #include "trace.h"
16 
17 #define TRACEFS_DIR		"remotes"
18 #define TRACEFS_MODE_WRITE	0640
19 #define TRACEFS_MODE_READ	0440
20 
21 enum tri_type {
22 	TRI_CONSUMING,
23 	TRI_NONCONSUMING,
24 };
25 
26 struct trace_remote_iterator {
27 	struct trace_remote		*remote;
28 	struct trace_seq		seq;
29 	struct delayed_work		poll_work;
30 	unsigned long			lost_events;
31 	u64				ts;
32 	struct ring_buffer_iter		*rb_iter;
33 	struct ring_buffer_iter		**rb_iters;
34 	int				cpu;
35 	int				evt_cpu;
36 	loff_t				pos;
37 	enum tri_type			type;
38 };
39 
40 struct trace_remote {
41 	struct trace_remote_callbacks	*cbs;
42 	void				*priv;
43 	struct trace_buffer		*trace_buffer;
44 	struct trace_buffer_desc	*trace_buffer_desc;
45 	unsigned long			trace_buffer_size;
46 	struct ring_buffer_remote	rb_remote;
47 	struct mutex			lock;
48 	struct rw_semaphore		reader_lock;
49 	struct rw_semaphore		*pcpu_reader_locks;
50 	unsigned int			nr_readers;
51 	unsigned int			poll_ms;
52 	bool				tracing_on;
53 };
54 
55 static bool trace_remote_loaded(struct trace_remote *remote)
56 {
57 	return !!remote->trace_buffer;
58 }
59 
60 static int trace_remote_load(struct trace_remote *remote)
61 {
62 	struct ring_buffer_remote *rb_remote = &remote->rb_remote;
63 	struct trace_buffer_desc *desc;
64 
65 	lockdep_assert_held(&remote->lock);
66 
67 	if (trace_remote_loaded(remote))
68 		return 0;
69 
70 	desc = remote->cbs->load_trace_buffer(remote->trace_buffer_size, remote->priv);
71 	if (IS_ERR(desc))
72 		return PTR_ERR(desc);
73 
74 	rb_remote->desc = desc;
75 	rb_remote->swap_reader_page = remote->cbs->swap_reader_page;
76 	rb_remote->priv = remote->priv;
77 	rb_remote->reset = remote->cbs->reset;
78 	remote->trace_buffer = ring_buffer_alloc_remote(rb_remote);
79 	if (!remote->trace_buffer) {
80 		remote->cbs->unload_trace_buffer(desc, remote->priv);
81 		return -ENOMEM;
82 	}
83 
84 	remote->trace_buffer_desc = desc;
85 
86 	return 0;
87 }
88 
89 static void trace_remote_try_unload(struct trace_remote *remote)
90 {
91 	lockdep_assert_held(&remote->lock);
92 
93 	if (!trace_remote_loaded(remote))
94 		return;
95 
96 	/* The buffer is being read or writable */
97 	if (remote->nr_readers || remote->tracing_on)
98 		return;
99 
100 	/* The buffer has readable data */
101 	if (!ring_buffer_empty(remote->trace_buffer))
102 		return;
103 
104 	ring_buffer_free(remote->trace_buffer);
105 	remote->trace_buffer = NULL;
106 	remote->cbs->unload_trace_buffer(remote->trace_buffer_desc, remote->priv);
107 }
108 
109 static int trace_remote_enable_tracing(struct trace_remote *remote)
110 {
111 	int ret;
112 
113 	lockdep_assert_held(&remote->lock);
114 
115 	if (remote->tracing_on)
116 		return 0;
117 
118 	ret = trace_remote_load(remote);
119 	if (ret)
120 		return ret;
121 
122 	ret = remote->cbs->enable_tracing(true, remote->priv);
123 	if (ret) {
124 		trace_remote_try_unload(remote);
125 		return ret;
126 	}
127 
128 	remote->tracing_on = true;
129 
130 	return 0;
131 }
132 
133 static int trace_remote_disable_tracing(struct trace_remote *remote)
134 {
135 	int ret;
136 
137 	lockdep_assert_held(&remote->lock);
138 
139 	if (!remote->tracing_on)
140 		return 0;
141 
142 	ret = remote->cbs->enable_tracing(false, remote->priv);
143 	if (ret)
144 		return ret;
145 
146 	ring_buffer_poll_remote(remote->trace_buffer, RING_BUFFER_ALL_CPUS);
147 	remote->tracing_on = false;
148 	trace_remote_try_unload(remote);
149 
150 	return 0;
151 }
152 
153 static void trace_remote_reset(struct trace_remote *remote, int cpu)
154 {
155 	lockdep_assert_held(&remote->lock);
156 
157 	if (!trace_remote_loaded(remote))
158 		return;
159 
160 	if (cpu == RING_BUFFER_ALL_CPUS)
161 		ring_buffer_reset(remote->trace_buffer);
162 	else
163 		ring_buffer_reset_cpu(remote->trace_buffer, cpu);
164 
165 	trace_remote_try_unload(remote);
166 }
167 
168 static ssize_t
169 tracing_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
170 {
171 	struct trace_remote *remote = filp->private_data;
172 	unsigned long val;
173 	int ret;
174 
175 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
176 	if (ret)
177 		return ret;
178 
179 	guard(mutex)(&remote->lock);
180 
181 	ret = val ? trace_remote_enable_tracing(remote) : trace_remote_disable_tracing(remote);
182 	if (ret)
183 		return ret;
184 
185 	return cnt;
186 }
187 static int tracing_on_show(struct seq_file *s, void *unused)
188 {
189 	struct trace_remote *remote = s->private;
190 
191 	seq_printf(s, "%d\n", remote->tracing_on);
192 
193 	return 0;
194 }
195 DEFINE_SHOW_STORE_ATTRIBUTE(tracing_on);
196 
197 static ssize_t buffer_size_kb_write(struct file *filp, const char __user *ubuf, size_t cnt,
198 				    loff_t *ppos)
199 {
200 	struct trace_remote *remote = filp->private_data;
201 	unsigned long val;
202 	int ret;
203 
204 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
205 	if (ret)
206 		return ret;
207 
208 	/* KiB to Bytes */
209 	if (!val || check_shl_overflow(val, 10, &val))
210 		return -EINVAL;
211 
212 	guard(mutex)(&remote->lock);
213 
214 	if (trace_remote_loaded(remote))
215 		return -EBUSY;
216 
217 	remote->trace_buffer_size = val;
218 
219 	return cnt;
220 }
221 
222 static int buffer_size_kb_show(struct seq_file *s, void *unused)
223 {
224 	struct trace_remote *remote = s->private;
225 
226 	seq_printf(s, "%lu (%s)\n", remote->trace_buffer_size >> 10,
227 		   trace_remote_loaded(remote) ? "loaded" : "unloaded");
228 
229 	return 0;
230 }
231 DEFINE_SHOW_STORE_ATTRIBUTE(buffer_size_kb);
232 
233 static int trace_remote_get(struct trace_remote *remote, int cpu)
234 {
235 	int ret;
236 
237 	if (remote->nr_readers == UINT_MAX)
238 		return -EBUSY;
239 
240 	ret = trace_remote_load(remote);
241 	if (ret)
242 		return ret;
243 
244 	if (cpu != RING_BUFFER_ALL_CPUS && !remote->pcpu_reader_locks) {
245 		int lock_cpu;
246 
247 		remote->pcpu_reader_locks = kcalloc(nr_cpu_ids, sizeof(*remote->pcpu_reader_locks),
248 						    GFP_KERNEL);
249 		if (!remote->pcpu_reader_locks) {
250 			trace_remote_try_unload(remote);
251 			return -ENOMEM;
252 		}
253 
254 		for_each_possible_cpu(lock_cpu)
255 			init_rwsem(&remote->pcpu_reader_locks[lock_cpu]);
256 	}
257 
258 	remote->nr_readers++;
259 
260 	return 0;
261 }
262 
263 static void trace_remote_put(struct trace_remote *remote)
264 {
265 	if (WARN_ON(!remote->nr_readers))
266 		return;
267 
268 	remote->nr_readers--;
269 	if (remote->nr_readers)
270 		return;
271 
272 	kfree(remote->pcpu_reader_locks);
273 	remote->pcpu_reader_locks = NULL;
274 
275 	trace_remote_try_unload(remote);
276 }
277 
278 static void __poll_remote(struct work_struct *work)
279 {
280 	struct delayed_work *dwork = to_delayed_work(work);
281 	struct trace_remote_iterator *iter;
282 
283 	iter = container_of(dwork, struct trace_remote_iterator, poll_work);
284 	ring_buffer_poll_remote(iter->remote->trace_buffer, iter->cpu);
285 	schedule_delayed_work((struct delayed_work *)work,
286 			      msecs_to_jiffies(iter->remote->poll_ms));
287 }
288 
289 static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
290 {
291 	if (cpu != RING_BUFFER_ALL_CPUS) {
292 		ring_buffer_read_finish(iter->rb_iter);
293 		return;
294 	}
295 
296 	for_each_possible_cpu(cpu) {
297 		if (iter->rb_iters[cpu])
298 			ring_buffer_read_finish(iter->rb_iters[cpu]);
299 	}
300 
301 	kfree(iter->rb_iters);
302 }
303 
304 static int __alloc_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
305 {
306 	if (cpu != RING_BUFFER_ALL_CPUS) {
307 		iter->rb_iter = ring_buffer_read_start(iter->remote->trace_buffer, cpu, GFP_KERNEL);
308 
309 		return iter->rb_iter ? 0 : -ENOMEM;
310 	}
311 
312 	iter->rb_iters = kcalloc(nr_cpu_ids, sizeof(*iter->rb_iters), GFP_KERNEL);
313 	if (!iter->rb_iters)
314 		return -ENOMEM;
315 
316 	for_each_possible_cpu(cpu) {
317 		iter->rb_iters[cpu] = ring_buffer_read_start(iter->remote->trace_buffer, cpu,
318 							     GFP_KERNEL);
319 		if (!iter->rb_iters[cpu]) {
320 			__free_ring_buffer_iter(iter, RING_BUFFER_ALL_CPUS);
321 			return -ENOMEM;
322 		}
323 	}
324 
325 	return 0;
326 }
327 
328 static struct trace_remote_iterator
329 *trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type)
330 {
331 	struct trace_remote_iterator *iter = NULL;
332 	int ret;
333 
334 	lockdep_assert_held(&remote->lock);
335 
336 	if (type == TRI_NONCONSUMING && !trace_remote_loaded(remote))
337 		return NULL;
338 
339 	ret = trace_remote_get(remote, cpu);
340 	if (ret)
341 		return ERR_PTR(ret);
342 
343 	/* Test the CPU */
344 	ret = ring_buffer_poll_remote(remote->trace_buffer, cpu);
345 	if (ret)
346 		goto err;
347 
348 	iter = kzalloc_obj(*iter);
349 	if (iter) {
350 		iter->remote = remote;
351 		iter->cpu = cpu;
352 		iter->type = type;
353 		trace_seq_init(&iter->seq);
354 
355 		switch (type) {
356 		case TRI_CONSUMING:
357 			INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
358 			schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms));
359 			break;
360 		case TRI_NONCONSUMING:
361 			ret = __alloc_ring_buffer_iter(iter, cpu);
362 			break;
363 		}
364 
365 		if (ret)
366 			goto err;
367 
368 		return iter;
369 	}
370 	ret = -ENOMEM;
371 
372 err:
373 	kfree(iter);
374 	trace_remote_put(remote);
375 
376 	return ERR_PTR(ret);
377 }
378 
379 static void trace_remote_iter_free(struct trace_remote_iterator *iter)
380 {
381 	struct trace_remote *remote;
382 
383 	if (!iter)
384 		return;
385 
386 	remote = iter->remote;
387 
388 	lockdep_assert_held(&remote->lock);
389 
390 	switch (iter->type) {
391 	case TRI_CONSUMING:
392 		cancel_delayed_work_sync(&iter->poll_work);
393 		break;
394 	case TRI_NONCONSUMING:
395 		__free_ring_buffer_iter(iter, iter->cpu);
396 		break;
397 	}
398 
399 	kfree(iter);
400 	trace_remote_put(remote);
401 }
402 
403 static void trace_remote_iter_read_start(struct trace_remote_iterator *iter)
404 {
405 	struct trace_remote *remote = iter->remote;
406 	int cpu = iter->cpu;
407 
408 	/* Acquire global reader lock */
409 	if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
410 		down_write(&remote->reader_lock);
411 	else
412 		down_read(&remote->reader_lock);
413 
414 	if (cpu == RING_BUFFER_ALL_CPUS)
415 		return;
416 
417 	/*
418 	 * No need for the remote lock here, iter holds a reference on
419 	 * remote->nr_readers
420 	 */
421 
422 	/* Get the per-CPU one */
423 	if (WARN_ON_ONCE(!remote->pcpu_reader_locks))
424 		return;
425 
426 	if (iter->type == TRI_CONSUMING)
427 		down_write(&remote->pcpu_reader_locks[cpu]);
428 	else
429 		down_read(&remote->pcpu_reader_locks[cpu]);
430 }
431 
432 static void trace_remote_iter_read_finished(struct trace_remote_iterator *iter)
433 {
434 	struct trace_remote *remote = iter->remote;
435 	int cpu = iter->cpu;
436 
437 	/* Release per-CPU reader lock */
438 	if (cpu != RING_BUFFER_ALL_CPUS) {
439 		/*
440 		 * No need for the remote lock here, iter holds a reference on
441 		 * remote->nr_readers
442 		 */
443 		if (iter->type == TRI_CONSUMING)
444 			up_write(&remote->pcpu_reader_locks[cpu]);
445 		else
446 			up_read(&remote->pcpu_reader_locks[cpu]);
447 	}
448 
449 	/* Release global reader lock */
450 	if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
451 		up_write(&remote->reader_lock);
452 	else
453 		up_read(&remote->reader_lock);
454 }
455 
456 static struct ring_buffer_iter *__get_rb_iter(struct trace_remote_iterator *iter, int cpu)
457 {
458 	return iter->cpu != RING_BUFFER_ALL_CPUS ? iter->rb_iter : iter->rb_iters[cpu];
459 }
460 
461 static struct ring_buffer_event *
462 __peek_event(struct trace_remote_iterator *iter, int cpu, u64 *ts, unsigned long *lost_events)
463 {
464 	struct ring_buffer_event *rb_evt;
465 	struct ring_buffer_iter *rb_iter;
466 
467 	switch (iter->type) {
468 	case TRI_CONSUMING:
469 		return ring_buffer_peek(iter->remote->trace_buffer, cpu, ts, lost_events);
470 	case TRI_NONCONSUMING:
471 		rb_iter = __get_rb_iter(iter, cpu);
472 		rb_evt = ring_buffer_iter_peek(rb_iter, ts);
473 		if (!rb_evt)
474 			return NULL;
475 
476 		*lost_events = ring_buffer_iter_dropped(rb_iter);
477 
478 		return rb_evt;
479 	}
480 
481 	return NULL;
482 }
483 
484 static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
485 {
486 	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
487 	int cpu = iter->cpu;
488 
489 	if (cpu != RING_BUFFER_ALL_CPUS) {
490 		if (ring_buffer_empty_cpu(trace_buffer, cpu))
491 			return false;
492 
493 		if (!__peek_event(iter, cpu, &iter->ts, &iter->lost_events))
494 			return false;
495 
496 		iter->evt_cpu = cpu;
497 		return true;
498 	}
499 
500 	iter->ts = U64_MAX;
501 	for_each_possible_cpu(cpu) {
502 		unsigned long lost_events;
503 		u64 ts;
504 
505 		if (ring_buffer_empty_cpu(trace_buffer, cpu))
506 			continue;
507 
508 		if (!__peek_event(iter, cpu, &ts, &lost_events))
509 			continue;
510 
511 		if (ts >= iter->ts)
512 			continue;
513 
514 		iter->ts = ts;
515 		iter->evt_cpu = cpu;
516 		iter->lost_events = lost_events;
517 	}
518 
519 	return iter->ts != U64_MAX;
520 }
521 
522 static void trace_remote_iter_move(struct trace_remote_iterator *iter)
523 {
524 	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
525 
526 	switch (iter->type) {
527 	case TRI_CONSUMING:
528 		ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
529 		break;
530 	case TRI_NONCONSUMING:
531 		ring_buffer_iter_advance(__get_rb_iter(iter, iter->evt_cpu));
532 		break;
533 	}
534 }
535 
536 static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
537 {
538 	unsigned long usecs_rem;
539 	u64 ts = iter->ts;
540 
541 	if (iter->lost_events)
542 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
543 				 iter->evt_cpu, iter->lost_events);
544 
545 	do_div(ts, 1000);
546 	usecs_rem = do_div(ts, USEC_PER_SEC);
547 
548 	trace_seq_printf(&iter->seq, "[%03d]\t%5llu.%06lu: ", iter->evt_cpu,
549 			 ts, usecs_rem);
550 
551 	return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0;
552 }
553 
554 static int trace_pipe_open(struct inode *inode, struct file *filp)
555 {
556 	struct trace_remote *remote = inode->i_private;
557 	struct trace_remote_iterator *iter;
558 	int cpu = tracing_get_cpu(inode);
559 
560 	guard(mutex)(&remote->lock);
561 
562 	iter = trace_remote_iter(remote, cpu, TRI_CONSUMING);
563 	if (IS_ERR(iter))
564 		return PTR_ERR(iter);
565 
566 	filp->private_data = iter;
567 
568 	return IS_ERR(iter) ? PTR_ERR(iter) : 0;
569 }
570 
571 static int trace_pipe_release(struct inode *inode, struct file *filp)
572 {
573 	struct trace_remote_iterator *iter = filp->private_data;
574 	struct trace_remote *remote = iter->remote;
575 
576 	guard(mutex)(&remote->lock);
577 
578 	trace_remote_iter_free(iter);
579 
580 	return 0;
581 }
582 
583 static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
584 {
585 	struct trace_remote_iterator *iter = filp->private_data;
586 	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
587 	int ret;
588 
589 copy_to_user:
590 	ret = trace_seq_to_user(&iter->seq, ubuf, cnt);
591 	if (ret != -EBUSY)
592 		return ret;
593 
594 	trace_seq_init(&iter->seq);
595 
596 	ret = ring_buffer_wait(trace_buffer, iter->cpu, 0, NULL, NULL);
597 	if (ret < 0)
598 		return ret;
599 
600 	trace_remote_iter_read_start(iter);
601 
602 	while (trace_remote_iter_read_event(iter)) {
603 		int prev_len = iter->seq.seq.len;
604 
605 		if (trace_remote_iter_print_event(iter)) {
606 			iter->seq.seq.len = prev_len;
607 			break;
608 		}
609 
610 		trace_remote_iter_move(iter);
611 	}
612 
613 	trace_remote_iter_read_finished(iter);
614 
615 	goto copy_to_user;
616 }
617 
618 static const struct file_operations trace_pipe_fops = {
619 	.open		= trace_pipe_open,
620 	.read		= trace_pipe_read,
621 	.release	= trace_pipe_release,
622 };
623 
624 static void *trace_next(struct seq_file *m, void *v, loff_t *pos)
625 {
626 	struct trace_remote_iterator *iter = m->private;
627 
628 	++*pos;
629 
630 	if (!iter || !trace_remote_iter_read_event(iter))
631 		return NULL;
632 
633 	trace_remote_iter_move(iter);
634 	iter->pos++;
635 
636 	return iter;
637 }
638 
639 static void *trace_start(struct seq_file *m, loff_t *pos)
640 {
641 	struct trace_remote_iterator *iter = m->private;
642 	loff_t i;
643 
644 	if (!iter)
645 		return NULL;
646 
647 	trace_remote_iter_read_start(iter);
648 
649 	if (!*pos) {
650 		iter->pos = -1;
651 		return trace_next(m, NULL, &i);
652 	}
653 
654 	i = iter->pos;
655 	while (i < *pos) {
656 		iter = trace_next(m, NULL, &i);
657 		if (!iter)
658 			return NULL;
659 	}
660 
661 	return iter;
662 }
663 
664 static int trace_show(struct seq_file *m, void *v)
665 {
666 	struct trace_remote_iterator *iter = v;
667 
668 	trace_seq_init(&iter->seq);
669 
670 	if (trace_remote_iter_print_event(iter)) {
671 		seq_printf(m, "[EVENT %d PRINT TOO BIG]\n", iter->evt->id);
672 		return 0;
673 	}
674 
675 	return trace_print_seq(m, &iter->seq);
676 }
677 
678 static void trace_stop(struct seq_file *m, void *v)
679 {
680 	struct trace_remote_iterator *iter = m->private;
681 
682 	if (iter)
683 		trace_remote_iter_read_finished(iter);
684 }
685 
686 static const struct seq_operations trace_sops = {
687 	.start		= trace_start,
688 	.next		= trace_next,
689 	.show		= trace_show,
690 	.stop		= trace_stop,
691 };
692 
693 static int trace_open(struct inode *inode, struct file *filp)
694 {
695 	struct trace_remote *remote = inode->i_private;
696 	struct trace_remote_iterator *iter = NULL;
697 	int cpu = tracing_get_cpu(inode);
698 	int ret;
699 
700 	if (!(filp->f_mode & FMODE_READ))
701 		return 0;
702 
703 	guard(mutex)(&remote->lock);
704 
705 	iter = trace_remote_iter(remote, cpu, TRI_NONCONSUMING);
706 	if (IS_ERR(iter))
707 		return PTR_ERR(iter);
708 
709 	ret = seq_open(filp, &trace_sops);
710 	if (ret) {
711 		trace_remote_iter_free(iter);
712 		return ret;
713 	}
714 
715 	((struct seq_file *)filp->private_data)->private = (void *)iter;
716 
717 	return 0;
718 }
719 
720 static int trace_release(struct inode *inode, struct file *filp)
721 {
722 	struct trace_remote_iterator *iter;
723 
724 	if (!(filp->f_mode & FMODE_READ))
725 		return 0;
726 
727 	iter = ((struct seq_file *)filp->private_data)->private;
728 	seq_release(inode, filp);
729 
730 	if (!iter)
731 		return 0;
732 
733 	guard(mutex)(&iter->remote->lock);
734 
735 	trace_remote_iter_free(iter);
736 
737 	return 0;
738 }
739 
740 static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
741 {
742 	struct inode *inode = file_inode(filp);
743 	struct trace_remote *remote = inode->i_private;
744 	int cpu = tracing_get_cpu(inode);
745 
746 	guard(mutex)(&remote->lock);
747 
748 	trace_remote_reset(remote, cpu);
749 
750 	return cnt;
751 }
752 
753 static const struct file_operations trace_fops = {
754 	.open		= trace_open,
755 	.write		= trace_write,
756 	.read		= seq_read,
757 	.read_iter	= seq_read_iter,
758 	.release	= trace_release,
759 };
760 
761 static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote)
762 {
763 	struct dentry *remote_d, *percpu_d, *d;
764 	static struct dentry *root;
765 	static DEFINE_MUTEX(lock);
766 	bool root_inited = false;
767 	int cpu;
768 
769 	guard(mutex)(&lock);
770 
771 	if (!root) {
772 		root = tracefs_create_dir(TRACEFS_DIR, NULL);
773 		if (!root) {
774 			pr_err("Failed to create tracefs dir "TRACEFS_DIR"\n");
775 			return -ENOMEM;
776 		}
777 		root_inited = true;
778 	}
779 
780 	remote_d = tracefs_create_dir(name, root);
781 	if (!remote_d) {
782 		pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/\n", name);
783 		goto err;
784 	}
785 
786 	d = trace_create_file("tracing_on", TRACEFS_MODE_WRITE, remote_d, remote, &tracing_on_fops);
787 	if (!d)
788 		goto err;
789 
790 	d = trace_create_file("buffer_size_kb", TRACEFS_MODE_WRITE, remote_d, remote,
791 			      &buffer_size_kb_fops);
792 	if (!d)
793 		goto err;
794 
795 	d = trace_create_file("trace_pipe", TRACEFS_MODE_READ, remote_d, remote, &trace_pipe_fops);
796 	if (!d)
797 		goto err;
798 
799 	d = trace_create_file("trace", TRACEFS_MODE_WRITE, remote_d, remote, &trace_fops);
800 	if (!d)
801 		goto err;
802 
803 	percpu_d = tracefs_create_dir("per_cpu", remote_d);
804 	if (!percpu_d) {
805 		pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/per_cpu/\n", name);
806 		goto err;
807 	}
808 
809 	for_each_possible_cpu(cpu) {
810 		struct dentry *cpu_d;
811 		char cpu_name[16];
812 
813 		snprintf(cpu_name, sizeof(cpu_name), "cpu%d", cpu);
814 		cpu_d = tracefs_create_dir(cpu_name, percpu_d);
815 		if (!cpu_d) {
816 			pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/percpu/cpu%d\n",
817 			       name, cpu);
818 			goto err;
819 		}
820 
821 		d = trace_create_cpu_file("trace_pipe", TRACEFS_MODE_READ, cpu_d, remote, cpu,
822 					  &trace_pipe_fops);
823 		if (!d)
824 			goto err;
825 
826 		d = trace_create_cpu_file("trace", TRACEFS_MODE_WRITE, cpu_d, remote, cpu,
827 					  &trace_fops);
828 		if (!d)
829 			goto err;
830 	}
831 
832 	return 0;
833 
834 err:
835 	if (root_inited) {
836 		tracefs_remove(root);
837 		root = NULL;
838 	} else {
839 		tracefs_remove(remote_d);
840 	}
841 
842 	return -ENOMEM;
843 }
844 
845 /**
846  * trace_remote_register() - Register a Tracefs remote
847  * @name:	Name of the remote, used for the Tracefs remotes/ directory.
848  * @cbs:	Set of callbacks used to control the remote.
849  * @priv:	Private data, passed to each callback from @cbs.
850  * @events:	Array of events. &remote_event.name and &remote_event.id must be
851  *		filled by the caller.
852  * @nr_events:	Number of events in the @events array.
853  *
854  * A trace remote is an entity, outside of the kernel (most likely firmware or
855  * hypervisor) capable of writing events into a Tracefs compatible ring-buffer.
856  * The kernel would then act as a reader.
857  *
858  * The registered remote will be found under the Tracefs directory
859  * remotes/<name>.
860  *
861  * Return: 0 on success, negative error code on failure.
862  */
863 int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv)
864 {
865 	struct trace_remote *remote;
866 
867 	remote = kzalloc_obj(*remote);
868 	if (!remote)
869 		return -ENOMEM;
870 
871 	remote->cbs = cbs;
872 	remote->priv = priv;
873 	remote->trace_buffer_size = 7 << 10;
874 	remote->poll_ms = 100;
875 	mutex_init(&remote->lock);
876 	init_rwsem(&remote->reader_lock);
877 
878 	if (trace_remote_init_tracefs(name, remote)) {
879 		kfree(remote);
880 		return -ENOMEM;
881 	}
882 
883 	return 0;
884 }
885 EXPORT_SYMBOL_GPL(trace_remote_register);
886 
887 /**
888  * trace_remote_free_buffer() - Free trace buffer allocated with trace_remote_alloc_buffer()
889  * @desc:	Descriptor of the per-CPU ring-buffers, originally filled by
890  *		trace_remote_alloc_buffer()
891  *
892  * Most likely called from &trace_remote_callbacks.unload_trace_buffer.
893  */
894 void trace_remote_free_buffer(struct trace_buffer_desc *desc)
895 {
896 	struct ring_buffer_desc *rb_desc;
897 	int cpu;
898 
899 	for_each_ring_buffer_desc(rb_desc, cpu, desc) {
900 		unsigned int id;
901 
902 		free_page(rb_desc->meta_va);
903 
904 		for (id = 0; id < rb_desc->nr_page_va; id++)
905 			free_page(rb_desc->page_va[id]);
906 	}
907 }
908 EXPORT_SYMBOL_GPL(trace_remote_free_buffer);
909 
910 /**
911  * trace_remote_alloc_buffer() - Dynamically allocate a trace buffer
912  * @desc:		Uninitialized trace_buffer_desc
913  * @desc_size:		Size of the trace_buffer_desc. Must be at least equal to
914  *			trace_buffer_desc_size()
915  * @buffer_size:	Size in bytes of each per-CPU ring-buffer
916  * @cpumask:		CPUs to allocate a ring-buffer for
917  *
918  * Helper to dynamically allocate a set of pages (enough to cover @buffer_size)
919  * for each CPU from @cpumask and fill @desc. Most likely called from
920  * &trace_remote_callbacks.load_trace_buffer.
921  *
922  * Return: 0 on success, negative error code on failure.
923  */
924 int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size,
925 			      const struct cpumask *cpumask)
926 {
927 	unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1;
928 	void *desc_end = desc + desc_size;
929 	struct ring_buffer_desc *rb_desc;
930 	int cpu, ret = -ENOMEM;
931 
932 	if (desc_size < struct_size(desc, __data, 0))
933 		return -EINVAL;
934 
935 	desc->nr_cpus = 0;
936 	desc->struct_len = struct_size(desc, __data, 0);
937 
938 	rb_desc = (struct ring_buffer_desc *)&desc->__data[0];
939 
940 	for_each_cpu(cpu, cpumask) {
941 		unsigned int id;
942 
943 		if ((void *)rb_desc + struct_size(rb_desc, page_va, nr_pages) > desc_end) {
944 			ret = -EINVAL;
945 			goto err;
946 		}
947 
948 		rb_desc->cpu = cpu;
949 		rb_desc->nr_page_va = 0;
950 		rb_desc->meta_va = (unsigned long)__get_free_page(GFP_KERNEL);
951 		if (!rb_desc->meta_va)
952 			goto err;
953 
954 		for (id = 0; id < nr_pages; id++) {
955 			rb_desc->page_va[id] = (unsigned long)__get_free_page(GFP_KERNEL);
956 			if (!rb_desc->page_va[id])
957 				goto err;
958 
959 			rb_desc->nr_page_va++;
960 		}
961 		desc->nr_cpus++;
962 		desc->struct_len += offsetof(struct ring_buffer_desc, page_va);
963 		desc->struct_len += struct_size(rb_desc, page_va, rb_desc->nr_page_va);
964 		rb_desc = __next_ring_buffer_desc(rb_desc);
965 	}
966 
967 	return 0;
968 
969 err:
970 	trace_remote_free_buffer(desc);
971 	return ret;
972 }
973 EXPORT_SYMBOL_GPL(trace_remote_alloc_buffer);
974