xref: /linux/kernel/trace/trace.c (revision 32a92f8c89326985e05dce8b22d3f0aa07a3e1bd)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <linux/sort.h>
54 #include <linux/io.h> /* vmap_page_range() */
55 #include <linux/fs_context.h>
56 
57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
58 
59 #include "trace.h"
60 #include "trace_output.h"
61 
62 #ifdef CONFIG_FTRACE_STARTUP_TEST
63 /*
64  * We need to change this state when a selftest is running.
65  * A selftest will lurk into the ring-buffer to count the
66  * entries inserted during the selftest although some concurrent
67  * insertions into the ring-buffer such as trace_printk could occurred
68  * at the same time, giving false positive or negative results.
69  */
70 bool __read_mostly tracing_selftest_running;
71 
72 /*
73  * If boot-time tracing including tracers/events via kernel cmdline
74  * is running, we do not want to run SELFTEST.
75  */
76 bool __read_mostly tracing_selftest_disabled;
77 
disable_tracing_selftest(const char * reason)78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #else
86 #define tracing_selftest_disabled	0
87 #endif
88 
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95 
96 /* Store tracers and their flags per instance */
97 struct tracers {
98 	struct list_head	list;
99 	struct tracer		*tracer;
100 	struct tracer_flags	*flags;
101 };
102 
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 DEFINE_PER_CPU(bool, trace_taskinfo_save);
109 
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 int tracing_disabled = 1;
117 
118 cpumask_var_t __read_mostly	tracing_buffer_mask;
119 
120 #define MAX_TRACER_SIZE		100
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputting it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  * Set instance name if you want to dump the specific trace instance
136  * Multiple instance dump is also supported, and instances are separated
137  * by commas.
138  */
139 /* Set to string format zero to disable by default */
140 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
141 
142 /* When set, tracing will stop when a WARN*() is hit */
143 static int __disable_trace_on_warning;
144 
145 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
146 			     void *buffer, size_t *lenp, loff_t *ppos);
147 static const struct ctl_table trace_sysctl_table[] = {
148 	{
149 		.procname	= "ftrace_dump_on_oops",
150 		.data		= &ftrace_dump_on_oops,
151 		.maxlen		= MAX_TRACER_SIZE,
152 		.mode		= 0644,
153 		.proc_handler	= proc_dostring,
154 	},
155 	{
156 		.procname	= "traceoff_on_warning",
157 		.data		= &__disable_trace_on_warning,
158 		.maxlen		= sizeof(__disable_trace_on_warning),
159 		.mode		= 0644,
160 		.proc_handler	= proc_dointvec,
161 	},
162 	{
163 		.procname	= "tracepoint_printk",
164 		.data		= &tracepoint_printk,
165 		.maxlen		= sizeof(tracepoint_printk),
166 		.mode		= 0644,
167 		.proc_handler	= tracepoint_printk_sysctl,
168 	},
169 };
170 
init_trace_sysctls(void)171 static int __init init_trace_sysctls(void)
172 {
173 	register_sysctl_init("kernel", trace_sysctl_table);
174 	return 0;
175 }
176 subsys_initcall(init_trace_sysctls);
177 
178 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
179 /* Map of enums to their values, for "eval_map" file */
180 struct trace_eval_map_head {
181 	struct module			*mod;
182 	unsigned long			length;
183 };
184 
185 union trace_eval_map_item;
186 
187 struct trace_eval_map_tail {
188 	/*
189 	 * "end" is first and points to NULL as it must be different
190 	 * than "mod" or "eval_string"
191 	 */
192 	union trace_eval_map_item	*next;
193 	const char			*end;	/* points to NULL */
194 };
195 
196 static DEFINE_MUTEX(trace_eval_mutex);
197 
198 /*
199  * The trace_eval_maps are saved in an array with two extra elements,
200  * one at the beginning, and one at the end. The beginning item contains
201  * the count of the saved maps (head.length), and the module they
202  * belong to if not built in (head.mod). The ending item contains a
203  * pointer to the next array of saved eval_map items.
204  */
205 union trace_eval_map_item {
206 	struct trace_eval_map		map;
207 	struct trace_eval_map_head	head;
208 	struct trace_eval_map_tail	tail;
209 };
210 
211 static union trace_eval_map_item *trace_eval_maps;
212 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
213 
214 int tracing_set_tracer(struct trace_array *tr, const char *buf);
215 static void ftrace_trace_userstack(struct trace_array *tr,
216 				   struct trace_buffer *buffer,
217 				   unsigned int trace_ctx);
218 
219 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
220 static char *default_bootup_tracer;
221 
222 static bool allocate_snapshot;
223 static bool snapshot_at_boot;
224 
225 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
226 static int boot_instance_index;
227 
228 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
229 static int boot_snapshot_index;
230 
set_cmdline_ftrace(char * str)231 static int __init set_cmdline_ftrace(char *str)
232 {
233 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
234 	default_bootup_tracer = bootup_tracer_buf;
235 	/* We are using ftrace early, expand it */
236 	trace_set_ring_buffer_expanded(NULL);
237 	return 1;
238 }
239 __setup("ftrace=", set_cmdline_ftrace);
240 
ftrace_dump_on_oops_enabled(void)241 int ftrace_dump_on_oops_enabled(void)
242 {
243 	if (!strcmp("0", ftrace_dump_on_oops))
244 		return 0;
245 	else
246 		return 1;
247 }
248 
set_ftrace_dump_on_oops(char * str)249 static int __init set_ftrace_dump_on_oops(char *str)
250 {
251 	if (!*str) {
252 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
253 		return 1;
254 	}
255 
256 	if (*str == ',') {
257 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
258 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
259 		return 1;
260 	}
261 
262 	if (*str++ == '=') {
263 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
264 		return 1;
265 	}
266 
267 	return 0;
268 }
269 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
270 
stop_trace_on_warning(char * str)271 static int __init stop_trace_on_warning(char *str)
272 {
273 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
274 		__disable_trace_on_warning = 1;
275 	return 1;
276 }
277 __setup("traceoff_on_warning", stop_trace_on_warning);
278 
boot_alloc_snapshot(char * str)279 static int __init boot_alloc_snapshot(char *str)
280 {
281 	char *slot = boot_snapshot_info + boot_snapshot_index;
282 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
283 	int ret;
284 
285 	if (str[0] == '=') {
286 		str++;
287 		if (strlen(str) >= left)
288 			return -1;
289 
290 		ret = snprintf(slot, left, "%s\t", str);
291 		boot_snapshot_index += ret;
292 	} else {
293 		allocate_snapshot = true;
294 		/* We also need the main ring buffer expanded */
295 		trace_set_ring_buffer_expanded(NULL);
296 	}
297 	return 1;
298 }
299 __setup("alloc_snapshot", boot_alloc_snapshot);
300 
301 
boot_snapshot(char * str)302 static int __init boot_snapshot(char *str)
303 {
304 	snapshot_at_boot = true;
305 	boot_alloc_snapshot(str);
306 	return 1;
307 }
308 __setup("ftrace_boot_snapshot", boot_snapshot);
309 
310 
boot_instance(char * str)311 static int __init boot_instance(char *str)
312 {
313 	char *slot = boot_instance_info + boot_instance_index;
314 	int left = sizeof(boot_instance_info) - boot_instance_index;
315 	int ret;
316 
317 	if (strlen(str) >= left)
318 		return -1;
319 
320 	ret = snprintf(slot, left, "%s\t", str);
321 	boot_instance_index += ret;
322 
323 	return 1;
324 }
325 __setup("trace_instance=", boot_instance);
326 
327 
328 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
329 
set_trace_boot_options(char * str)330 static int __init set_trace_boot_options(char *str)
331 {
332 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
333 	return 1;
334 }
335 __setup("trace_options=", set_trace_boot_options);
336 
337 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
338 static char *trace_boot_clock __initdata;
339 
set_trace_boot_clock(char * str)340 static int __init set_trace_boot_clock(char *str)
341 {
342 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
343 	trace_boot_clock = trace_boot_clock_buf;
344 	return 1;
345 }
346 __setup("trace_clock=", set_trace_boot_clock);
347 
set_tracepoint_printk(char * str)348 static int __init set_tracepoint_printk(char *str)
349 {
350 	/* Ignore the "tp_printk_stop_on_boot" param */
351 	if (*str == '_')
352 		return 0;
353 
354 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
355 		tracepoint_printk = 1;
356 	return 1;
357 }
358 __setup("tp_printk", set_tracepoint_printk);
359 
set_tracepoint_printk_stop(char * str)360 static int __init set_tracepoint_printk_stop(char *str)
361 {
362 	tracepoint_printk_stop_on_boot = true;
363 	return 1;
364 }
365 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
366 
set_traceoff_after_boot(char * str)367 static int __init set_traceoff_after_boot(char *str)
368 {
369 	traceoff_after_boot = true;
370 	return 1;
371 }
372 __setup("traceoff_after_boot", set_traceoff_after_boot);
373 
ns2usecs(u64 nsec)374 unsigned long long ns2usecs(u64 nsec)
375 {
376 	nsec += 500;
377 	do_div(nsec, 1000);
378 	return nsec;
379 }
380 
381 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)382 trace_process_export(struct trace_export *export,
383 	       struct ring_buffer_event *event, int flag)
384 {
385 	struct trace_entry *entry;
386 	unsigned int size = 0;
387 
388 	if (export->flags & flag) {
389 		entry = ring_buffer_event_data(event);
390 		size = ring_buffer_event_length(event);
391 		export->write(export, entry, size);
392 	}
393 }
394 
395 static DEFINE_MUTEX(ftrace_export_lock);
396 
397 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
398 
399 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
400 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
401 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
402 
ftrace_exports_enable(struct trace_export * export)403 static inline void ftrace_exports_enable(struct trace_export *export)
404 {
405 	if (export->flags & TRACE_EXPORT_FUNCTION)
406 		static_branch_inc(&trace_function_exports_enabled);
407 
408 	if (export->flags & TRACE_EXPORT_EVENT)
409 		static_branch_inc(&trace_event_exports_enabled);
410 
411 	if (export->flags & TRACE_EXPORT_MARKER)
412 		static_branch_inc(&trace_marker_exports_enabled);
413 }
414 
ftrace_exports_disable(struct trace_export * export)415 static inline void ftrace_exports_disable(struct trace_export *export)
416 {
417 	if (export->flags & TRACE_EXPORT_FUNCTION)
418 		static_branch_dec(&trace_function_exports_enabled);
419 
420 	if (export->flags & TRACE_EXPORT_EVENT)
421 		static_branch_dec(&trace_event_exports_enabled);
422 
423 	if (export->flags & TRACE_EXPORT_MARKER)
424 		static_branch_dec(&trace_marker_exports_enabled);
425 }
426 
ftrace_exports(struct ring_buffer_event * event,int flag)427 static void ftrace_exports(struct ring_buffer_event *event, int flag)
428 {
429 	struct trace_export *export;
430 
431 	guard(preempt_notrace)();
432 
433 	export = rcu_dereference_raw_check(ftrace_exports_list);
434 	while (export) {
435 		trace_process_export(export, event, flag);
436 		export = rcu_dereference_raw_check(export->next);
437 	}
438 }
439 
440 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)441 add_trace_export(struct trace_export **list, struct trace_export *export)
442 {
443 	rcu_assign_pointer(export->next, *list);
444 	/*
445 	 * We are entering export into the list but another
446 	 * CPU might be walking that list. We need to make sure
447 	 * the export->next pointer is valid before another CPU sees
448 	 * the export pointer included into the list.
449 	 */
450 	rcu_assign_pointer(*list, export);
451 }
452 
453 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)454 rm_trace_export(struct trace_export **list, struct trace_export *export)
455 {
456 	struct trace_export **p;
457 
458 	for (p = list; *p != NULL; p = &(*p)->next)
459 		if (*p == export)
460 			break;
461 
462 	if (*p != export)
463 		return -1;
464 
465 	rcu_assign_pointer(*p, (*p)->next);
466 
467 	return 0;
468 }
469 
470 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)471 add_ftrace_export(struct trace_export **list, struct trace_export *export)
472 {
473 	ftrace_exports_enable(export);
474 
475 	add_trace_export(list, export);
476 }
477 
478 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)479 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
480 {
481 	int ret;
482 
483 	ret = rm_trace_export(list, export);
484 	ftrace_exports_disable(export);
485 
486 	return ret;
487 }
488 
register_ftrace_export(struct trace_export * export)489 int register_ftrace_export(struct trace_export *export)
490 {
491 	if (WARN_ON_ONCE(!export->write))
492 		return -1;
493 
494 	guard(mutex)(&ftrace_export_lock);
495 
496 	add_ftrace_export(&ftrace_exports_list, export);
497 
498 	return 0;
499 }
500 EXPORT_SYMBOL_GPL(register_ftrace_export);
501 
unregister_ftrace_export(struct trace_export * export)502 int unregister_ftrace_export(struct trace_export *export)
503 {
504 	guard(mutex)(&ftrace_export_lock);
505 	return rm_ftrace_export(&ftrace_exports_list, export);
506 }
507 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
508 
509 /* trace_flags holds trace_options default values */
510 #define TRACE_DEFAULT_FLAGS						\
511 	(FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS |		\
512 	 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) |		\
513 	 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) |		\
514 	 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) |		\
515 	 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) |			\
516 	 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) |		\
517 	 TRACE_ITER(COPY_MARKER))
518 
519 /* trace_options that are only supported by global_trace */
520 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) |			\
521 	       TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) |	\
522 	       TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
523 
524 /* trace_flags that are default zero for instances */
525 #define ZEROED_TRACE_FLAGS \
526 	(TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
527 	 TRACE_ITER(COPY_MARKER))
528 
529 /*
530  * The global_trace is the descriptor that holds the top-level tracing
531  * buffers for the live tracing.
532  */
533 static struct trace_array global_trace = {
534 	.trace_flags = TRACE_DEFAULT_FLAGS,
535 };
536 
537 struct trace_array *printk_trace = &global_trace;
538 
539 /* List of trace_arrays interested in the top level trace_marker */
540 static LIST_HEAD(marker_copies);
541 
update_printk_trace(struct trace_array * tr)542 static void update_printk_trace(struct trace_array *tr)
543 {
544 	if (printk_trace == tr)
545 		return;
546 
547 	printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
548 	printk_trace = tr;
549 	tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
550 }
551 
552 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)553 static bool update_marker_trace(struct trace_array *tr, int enabled)
554 {
555 	lockdep_assert_held(&event_mutex);
556 
557 	if (enabled) {
558 		if (!list_empty(&tr->marker_list))
559 			return false;
560 
561 		list_add_rcu(&tr->marker_list, &marker_copies);
562 		tr->trace_flags |= TRACE_ITER(COPY_MARKER);
563 		return true;
564 	}
565 
566 	if (list_empty(&tr->marker_list))
567 		return false;
568 
569 	list_del_init(&tr->marker_list);
570 	tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
571 	return true;
572 }
573 
trace_set_ring_buffer_expanded(struct trace_array * tr)574 void trace_set_ring_buffer_expanded(struct trace_array *tr)
575 {
576 	if (!tr)
577 		tr = &global_trace;
578 	tr->ring_buffer_expanded = true;
579 }
580 
581 LIST_HEAD(ftrace_trace_arrays);
582 
trace_array_get(struct trace_array * this_tr)583 int trace_array_get(struct trace_array *this_tr)
584 {
585 	struct trace_array *tr;
586 
587 	guard(mutex)(&trace_types_lock);
588 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
589 		if (tr == this_tr) {
590 			tr->ref++;
591 			return 0;
592 		}
593 	}
594 
595 	return -ENODEV;
596 }
597 
__trace_array_put(struct trace_array * this_tr)598 static void __trace_array_put(struct trace_array *this_tr)
599 {
600 	WARN_ON(!this_tr->ref);
601 	this_tr->ref--;
602 }
603 
604 /**
605  * trace_array_put - Decrement the reference counter for this trace array.
606  * @this_tr : pointer to the trace array
607  *
608  * NOTE: Use this when we no longer need the trace array returned by
609  * trace_array_get_by_name(). This ensures the trace array can be later
610  * destroyed.
611  *
612  */
trace_array_put(struct trace_array * this_tr)613 void trace_array_put(struct trace_array *this_tr)
614 {
615 	if (!this_tr)
616 		return;
617 
618 	guard(mutex)(&trace_types_lock);
619 	__trace_array_put(this_tr);
620 }
621 EXPORT_SYMBOL_GPL(trace_array_put);
622 
tracing_check_open_get_tr(struct trace_array * tr)623 int tracing_check_open_get_tr(struct trace_array *tr)
624 {
625 	int ret;
626 
627 	ret = security_locked_down(LOCKDOWN_TRACEFS);
628 	if (ret)
629 		return ret;
630 
631 	if (tracing_disabled)
632 		return -ENODEV;
633 
634 	if (tr && trace_array_get(tr) < 0)
635 		return -ENODEV;
636 
637 	return 0;
638 }
639 
buffer_ftrace_now(struct array_buffer * buf,int cpu)640 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
641 {
642 	u64 ts;
643 
644 	/* Early boot up does not have a buffer yet */
645 	if (!buf->buffer)
646 		return trace_clock_local();
647 
648 	ts = ring_buffer_time_stamp(buf->buffer);
649 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
650 
651 	return ts;
652 }
653 
ftrace_now(int cpu)654 u64 ftrace_now(int cpu)
655 {
656 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
657 }
658 
659 /**
660  * tracing_is_enabled - Show if global_trace has been enabled
661  *
662  * Shows if the global trace has been enabled or not. It uses the
663  * mirror flag "buffer_disabled" to be used in fast paths such as for
664  * the irqsoff tracer. But it may be inaccurate due to races. If you
665  * need to know the accurate state, use tracing_is_on() which is a little
666  * slower, but accurate.
667  */
tracing_is_enabled(void)668 int tracing_is_enabled(void)
669 {
670 	/*
671 	 * For quick access (irqsoff uses this in fast path), just
672 	 * return the mirror variable of the state of the ring buffer.
673 	 * It's a little racy, but we don't really care.
674 	 */
675 	return !global_trace.buffer_disabled;
676 }
677 
678 /*
679  * trace_buf_size is the size in bytes that is allocated
680  * for a buffer. Note, the number of bytes is always rounded
681  * to page size.
682  *
683  * This number is purposely set to a low number of 16384.
684  * If the dump on oops happens, it will be much appreciated
685  * to not have to wait for all that output. Anyway this can be
686  * boot time and run time configurable.
687  */
688 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
689 
690 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
691 
692 /* trace_types holds a link list of available tracers. */
693 static struct tracer		*trace_types __read_mostly;
694 
695 /*
696  * trace_types_lock is used to protect the trace_types list.
697  */
698 DEFINE_MUTEX(trace_types_lock);
699 
700 /*
701  * serialize the access of the ring buffer
702  *
703  * ring buffer serializes readers, but it is low level protection.
704  * The validity of the events (which returns by ring_buffer_peek() ..etc)
705  * are not protected by ring buffer.
706  *
707  * The content of events may become garbage if we allow other process consumes
708  * these events concurrently:
709  *   A) the page of the consumed events may become a normal page
710  *      (not reader page) in ring buffer, and this page will be rewritten
711  *      by events producer.
712  *   B) The page of the consumed events may become a page for splice_read,
713  *      and this page will be returned to system.
714  *
715  * These primitives allow multi process access to different cpu ring buffer
716  * concurrently.
717  *
718  * These primitives don't distinguish read-only and read-consume access.
719  * Multi read-only access are also serialized.
720  */
721 
722 #ifdef CONFIG_SMP
723 static DECLARE_RWSEM(all_cpu_access_lock);
724 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
725 
trace_access_lock(int cpu)726 static inline void trace_access_lock(int cpu)
727 {
728 	if (cpu == RING_BUFFER_ALL_CPUS) {
729 		/* gain it for accessing the whole ring buffer. */
730 		down_write(&all_cpu_access_lock);
731 	} else {
732 		/* gain it for accessing a cpu ring buffer. */
733 
734 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
735 		down_read(&all_cpu_access_lock);
736 
737 		/* Secondly block other access to this @cpu ring buffer. */
738 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
739 	}
740 }
741 
trace_access_unlock(int cpu)742 static inline void trace_access_unlock(int cpu)
743 {
744 	if (cpu == RING_BUFFER_ALL_CPUS) {
745 		up_write(&all_cpu_access_lock);
746 	} else {
747 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
748 		up_read(&all_cpu_access_lock);
749 	}
750 }
751 
trace_access_lock_init(void)752 static inline void trace_access_lock_init(void)
753 {
754 	int cpu;
755 
756 	for_each_possible_cpu(cpu)
757 		mutex_init(&per_cpu(cpu_access_lock, cpu));
758 }
759 
760 #else
761 
762 static DEFINE_MUTEX(access_lock);
763 
trace_access_lock(int cpu)764 static inline void trace_access_lock(int cpu)
765 {
766 	(void)cpu;
767 	mutex_lock(&access_lock);
768 }
769 
trace_access_unlock(int cpu)770 static inline void trace_access_unlock(int cpu)
771 {
772 	(void)cpu;
773 	mutex_unlock(&access_lock);
774 }
775 
trace_access_lock_init(void)776 static inline void trace_access_lock_init(void)
777 {
778 }
779 
780 #endif
781 
tracer_tracing_on(struct trace_array * tr)782 void tracer_tracing_on(struct trace_array *tr)
783 {
784 	if (tr->array_buffer.buffer)
785 		ring_buffer_record_on(tr->array_buffer.buffer);
786 	/*
787 	 * This flag is looked at when buffers haven't been allocated
788 	 * yet, or by some tracers (like irqsoff), that just want to
789 	 * know if the ring buffer has been disabled, but it can handle
790 	 * races of where it gets disabled but we still do a record.
791 	 * As the check is in the fast path of the tracers, it is more
792 	 * important to be fast than accurate.
793 	 */
794 	tr->buffer_disabled = 0;
795 }
796 
797 /**
798  * tracing_on - enable tracing buffers
799  *
800  * This function enables tracing buffers that may have been
801  * disabled with tracing_off.
802  */
tracing_on(void)803 void tracing_on(void)
804 {
805 	tracer_tracing_on(&global_trace);
806 }
807 EXPORT_SYMBOL_GPL(tracing_on);
808 
809 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)810 static void tracing_snapshot_instance_cond(struct trace_array *tr,
811 					   void *cond_data)
812 {
813 	unsigned long flags;
814 
815 	if (in_nmi()) {
816 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
817 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
818 		return;
819 	}
820 
821 	if (!tr->allocated_snapshot) {
822 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
823 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
824 		tracer_tracing_off(tr);
825 		return;
826 	}
827 
828 	if (tr->mapped) {
829 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
830 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
831 		return;
832 	}
833 
834 	/* Note, snapshot can not be used when the tracer uses it */
835 	if (tracer_uses_snapshot(tr->current_trace)) {
836 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
837 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
838 		return;
839 	}
840 
841 	local_irq_save(flags);
842 	update_max_tr(tr, current, smp_processor_id(), cond_data);
843 	local_irq_restore(flags);
844 }
845 
tracing_snapshot_instance(struct trace_array * tr)846 void tracing_snapshot_instance(struct trace_array *tr)
847 {
848 	tracing_snapshot_instance_cond(tr, NULL);
849 }
850 
851 /**
852  * tracing_snapshot - take a snapshot of the current buffer.
853  *
854  * This causes a swap between the snapshot buffer and the current live
855  * tracing buffer. You can use this to take snapshots of the live
856  * trace when some condition is triggered, but continue to trace.
857  *
858  * Note, make sure to allocate the snapshot with either
859  * a tracing_snapshot_alloc(), or by doing it manually
860  * with: echo 1 > /sys/kernel/tracing/snapshot
861  *
862  * If the snapshot buffer is not allocated, it will stop tracing.
863  * Basically making a permanent snapshot.
864  */
tracing_snapshot(void)865 void tracing_snapshot(void)
866 {
867 	struct trace_array *tr = &global_trace;
868 
869 	tracing_snapshot_instance(tr);
870 }
871 EXPORT_SYMBOL_GPL(tracing_snapshot);
872 
873 /**
874  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
875  * @tr:		The tracing instance to snapshot
876  * @cond_data:	The data to be tested conditionally, and possibly saved
877  *
878  * This is the same as tracing_snapshot() except that the snapshot is
879  * conditional - the snapshot will only happen if the
880  * cond_snapshot.update() implementation receiving the cond_data
881  * returns true, which means that the trace array's cond_snapshot
882  * update() operation used the cond_data to determine whether the
883  * snapshot should be taken, and if it was, presumably saved it along
884  * with the snapshot.
885  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)886 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
887 {
888 	tracing_snapshot_instance_cond(tr, cond_data);
889 }
890 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
891 
892 /**
893  * tracing_cond_snapshot_data - get the user data associated with a snapshot
894  * @tr:		The tracing instance
895  *
896  * When the user enables a conditional snapshot using
897  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
898  * with the snapshot.  This accessor is used to retrieve it.
899  *
900  * Should not be called from cond_snapshot.update(), since it takes
901  * the tr->max_lock lock, which the code calling
902  * cond_snapshot.update() has already done.
903  *
904  * Returns the cond_data associated with the trace array's snapshot.
905  */
tracing_cond_snapshot_data(struct trace_array * tr)906 void *tracing_cond_snapshot_data(struct trace_array *tr)
907 {
908 	void *cond_data = NULL;
909 
910 	local_irq_disable();
911 	arch_spin_lock(&tr->max_lock);
912 
913 	if (tr->cond_snapshot)
914 		cond_data = tr->cond_snapshot->cond_data;
915 
916 	arch_spin_unlock(&tr->max_lock);
917 	local_irq_enable();
918 
919 	return cond_data;
920 }
921 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
922 
923 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
924 					struct array_buffer *size_buf, int cpu_id);
925 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
926 
tracing_alloc_snapshot_instance(struct trace_array * tr)927 int tracing_alloc_snapshot_instance(struct trace_array *tr)
928 {
929 	int order;
930 	int ret;
931 
932 	if (!tr->allocated_snapshot) {
933 
934 		/* Make the snapshot buffer have the same order as main buffer */
935 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
936 		ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
937 		if (ret < 0)
938 			return ret;
939 
940 		/* allocate spare buffer */
941 		ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
942 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
943 		if (ret < 0)
944 			return ret;
945 
946 		tr->allocated_snapshot = true;
947 	}
948 
949 	return 0;
950 }
951 
free_snapshot(struct trace_array * tr)952 static void free_snapshot(struct trace_array *tr)
953 {
954 	/*
955 	 * We don't free the ring buffer. instead, resize it because
956 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
957 	 * we want preserve it.
958 	 */
959 	ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0);
960 	ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
961 	set_buffer_entries(&tr->snapshot_buffer, 1);
962 	tracing_reset_online_cpus(&tr->snapshot_buffer);
963 	tr->allocated_snapshot = false;
964 }
965 
tracing_arm_snapshot_locked(struct trace_array * tr)966 static int tracing_arm_snapshot_locked(struct trace_array *tr)
967 {
968 	int ret;
969 
970 	lockdep_assert_held(&trace_types_lock);
971 
972 	spin_lock(&tr->snapshot_trigger_lock);
973 	if (tr->snapshot == UINT_MAX || tr->mapped) {
974 		spin_unlock(&tr->snapshot_trigger_lock);
975 		return -EBUSY;
976 	}
977 
978 	tr->snapshot++;
979 	spin_unlock(&tr->snapshot_trigger_lock);
980 
981 	ret = tracing_alloc_snapshot_instance(tr);
982 	if (ret) {
983 		spin_lock(&tr->snapshot_trigger_lock);
984 		tr->snapshot--;
985 		spin_unlock(&tr->snapshot_trigger_lock);
986 	}
987 
988 	return ret;
989 }
990 
tracing_arm_snapshot(struct trace_array * tr)991 int tracing_arm_snapshot(struct trace_array *tr)
992 {
993 	guard(mutex)(&trace_types_lock);
994 	return tracing_arm_snapshot_locked(tr);
995 }
996 
tracing_disarm_snapshot(struct trace_array * tr)997 void tracing_disarm_snapshot(struct trace_array *tr)
998 {
999 	spin_lock(&tr->snapshot_trigger_lock);
1000 	if (!WARN_ON(!tr->snapshot))
1001 		tr->snapshot--;
1002 	spin_unlock(&tr->snapshot_trigger_lock);
1003 }
1004 
1005 /**
1006  * tracing_alloc_snapshot - allocate snapshot buffer.
1007  *
1008  * This only allocates the snapshot buffer if it isn't already
1009  * allocated - it doesn't also take a snapshot.
1010  *
1011  * This is meant to be used in cases where the snapshot buffer needs
1012  * to be set up for events that can't sleep but need to be able to
1013  * trigger a snapshot.
1014  */
tracing_alloc_snapshot(void)1015 int tracing_alloc_snapshot(void)
1016 {
1017 	struct trace_array *tr = &global_trace;
1018 	int ret;
1019 
1020 	ret = tracing_alloc_snapshot_instance(tr);
1021 	WARN_ON(ret < 0);
1022 
1023 	return ret;
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1026 
1027 /**
1028  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1029  *
1030  * This is similar to tracing_snapshot(), but it will allocate the
1031  * snapshot buffer if it isn't already allocated. Use this only
1032  * where it is safe to sleep, as the allocation may sleep.
1033  *
1034  * This causes a swap between the snapshot buffer and the current live
1035  * tracing buffer. You can use this to take snapshots of the live
1036  * trace when some condition is triggered, but continue to trace.
1037  */
tracing_snapshot_alloc(void)1038 void tracing_snapshot_alloc(void)
1039 {
1040 	int ret;
1041 
1042 	ret = tracing_alloc_snapshot();
1043 	if (ret < 0)
1044 		return;
1045 
1046 	tracing_snapshot();
1047 }
1048 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1049 
1050 /**
1051  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1052  * @tr:		The tracing instance
1053  * @cond_data:	User data to associate with the snapshot
1054  * @update:	Implementation of the cond_snapshot update function
1055  *
1056  * Check whether the conditional snapshot for the given instance has
1057  * already been enabled, or if the current tracer is already using a
1058  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1059  * save the cond_data and update function inside.
1060  *
1061  * Returns 0 if successful, error otherwise.
1062  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1063 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1064 				 cond_update_fn_t update)
1065 {
1066 	struct cond_snapshot *cond_snapshot __free(kfree) =
1067 		kzalloc_obj(*cond_snapshot);
1068 	int ret;
1069 
1070 	if (!cond_snapshot)
1071 		return -ENOMEM;
1072 
1073 	cond_snapshot->cond_data = cond_data;
1074 	cond_snapshot->update = update;
1075 
1076 	guard(mutex)(&trace_types_lock);
1077 
1078 	if (tracer_uses_snapshot(tr->current_trace))
1079 		return -EBUSY;
1080 
1081 	/*
1082 	 * The cond_snapshot can only change to NULL without the
1083 	 * trace_types_lock. We don't care if we race with it going
1084 	 * to NULL, but we want to make sure that it's not set to
1085 	 * something other than NULL when we get here, which we can
1086 	 * do safely with only holding the trace_types_lock and not
1087 	 * having to take the max_lock.
1088 	 */
1089 	if (tr->cond_snapshot)
1090 		return -EBUSY;
1091 
1092 	ret = tracing_arm_snapshot_locked(tr);
1093 	if (ret)
1094 		return ret;
1095 
1096 	local_irq_disable();
1097 	arch_spin_lock(&tr->max_lock);
1098 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1099 	arch_spin_unlock(&tr->max_lock);
1100 	local_irq_enable();
1101 
1102 	return 0;
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1105 
1106 /**
1107  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1108  * @tr:		The tracing instance
1109  *
1110  * Check whether the conditional snapshot for the given instance is
1111  * enabled; if so, free the cond_snapshot associated with it,
1112  * otherwise return -EINVAL.
1113  *
1114  * Returns 0 if successful, error otherwise.
1115  */
tracing_snapshot_cond_disable(struct trace_array * tr)1116 int tracing_snapshot_cond_disable(struct trace_array *tr)
1117 {
1118 	int ret = 0;
1119 
1120 	local_irq_disable();
1121 	arch_spin_lock(&tr->max_lock);
1122 
1123 	if (!tr->cond_snapshot)
1124 		ret = -EINVAL;
1125 	else {
1126 		kfree(tr->cond_snapshot);
1127 		tr->cond_snapshot = NULL;
1128 	}
1129 
1130 	arch_spin_unlock(&tr->max_lock);
1131 	local_irq_enable();
1132 
1133 	tracing_disarm_snapshot(tr);
1134 
1135 	return ret;
1136 }
1137 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1138 #else
tracing_snapshot(void)1139 void tracing_snapshot(void)
1140 {
1141 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1142 }
1143 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1144 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1145 {
1146 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1147 }
1148 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1149 int tracing_alloc_snapshot(void)
1150 {
1151 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1152 	return -ENODEV;
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1155 void tracing_snapshot_alloc(void)
1156 {
1157 	/* Give warning */
1158 	tracing_snapshot();
1159 }
1160 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1161 void *tracing_cond_snapshot_data(struct trace_array *tr)
1162 {
1163 	return NULL;
1164 }
1165 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1166 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1167 {
1168 	return -ENODEV;
1169 }
1170 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1171 int tracing_snapshot_cond_disable(struct trace_array *tr)
1172 {
1173 	return false;
1174 }
1175 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1176 #define free_snapshot(tr)	do { } while (0)
1177 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1178 #endif /* CONFIG_TRACER_SNAPSHOT */
1179 
tracer_tracing_off(struct trace_array * tr)1180 void tracer_tracing_off(struct trace_array *tr)
1181 {
1182 	if (tr->array_buffer.buffer)
1183 		ring_buffer_record_off(tr->array_buffer.buffer);
1184 	/*
1185 	 * This flag is looked at when buffers haven't been allocated
1186 	 * yet, or by some tracers (like irqsoff), that just want to
1187 	 * know if the ring buffer has been disabled, but it can handle
1188 	 * races of where it gets disabled but we still do a record.
1189 	 * As the check is in the fast path of the tracers, it is more
1190 	 * important to be fast than accurate.
1191 	 */
1192 	tr->buffer_disabled = 1;
1193 }
1194 
1195 /**
1196  * tracer_tracing_disable() - temporary disable the buffer from write
1197  * @tr: The trace array to disable its buffer for
1198  *
1199  * Expects trace_tracing_enable() to re-enable tracing.
1200  * The difference between this and tracer_tracing_off() is that this
1201  * is a counter and can nest, whereas, tracer_tracing_off() can
1202  * be called multiple times and a single trace_tracing_on() will
1203  * enable it.
1204  */
tracer_tracing_disable(struct trace_array * tr)1205 void tracer_tracing_disable(struct trace_array *tr)
1206 {
1207 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1208 		return;
1209 
1210 	ring_buffer_record_disable(tr->array_buffer.buffer);
1211 }
1212 
1213 /**
1214  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1215  * @tr: The trace array that had tracer_tracincg_disable() called on it
1216  *
1217  * This is called after tracer_tracing_disable() has been called on @tr,
1218  * when it's safe to re-enable tracing.
1219  */
tracer_tracing_enable(struct trace_array * tr)1220 void tracer_tracing_enable(struct trace_array *tr)
1221 {
1222 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1223 		return;
1224 
1225 	ring_buffer_record_enable(tr->array_buffer.buffer);
1226 }
1227 
1228 /**
1229  * tracing_off - turn off tracing buffers
1230  *
1231  * This function stops the tracing buffers from recording data.
1232  * It does not disable any overhead the tracers themselves may
1233  * be causing. This function simply causes all recording to
1234  * the ring buffers to fail.
1235  */
tracing_off(void)1236 void tracing_off(void)
1237 {
1238 	tracer_tracing_off(&global_trace);
1239 }
1240 EXPORT_SYMBOL_GPL(tracing_off);
1241 
disable_trace_on_warning(void)1242 void disable_trace_on_warning(void)
1243 {
1244 	if (__disable_trace_on_warning) {
1245 		struct trace_array *tr = READ_ONCE(printk_trace);
1246 
1247 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1248 			"Disabling tracing due to warning\n");
1249 		tracing_off();
1250 
1251 		/* Disable trace_printk() buffer too */
1252 		if (tr != &global_trace) {
1253 			trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1254 					       "Disabling tracing due to warning\n");
1255 			tracer_tracing_off(tr);
1256 		}
1257 	}
1258 }
1259 
1260 /**
1261  * tracer_tracing_is_on - show real state of ring buffer enabled
1262  * @tr : the trace array to know if ring buffer is enabled
1263  *
1264  * Shows real state of the ring buffer if it is enabled or not.
1265  */
tracer_tracing_is_on(struct trace_array * tr)1266 bool tracer_tracing_is_on(struct trace_array *tr)
1267 {
1268 	if (tr->array_buffer.buffer)
1269 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1270 	return !tr->buffer_disabled;
1271 }
1272 
1273 /**
1274  * tracing_is_on - show state of ring buffers enabled
1275  */
tracing_is_on(void)1276 int tracing_is_on(void)
1277 {
1278 	return tracer_tracing_is_on(&global_trace);
1279 }
1280 EXPORT_SYMBOL_GPL(tracing_is_on);
1281 
set_buf_size(char * str)1282 static int __init set_buf_size(char *str)
1283 {
1284 	unsigned long buf_size;
1285 
1286 	if (!str)
1287 		return 0;
1288 	buf_size = memparse(str, &str);
1289 	/*
1290 	 * nr_entries can not be zero and the startup
1291 	 * tests require some buffer space. Therefore
1292 	 * ensure we have at least 4096 bytes of buffer.
1293 	 */
1294 	trace_buf_size = max(4096UL, buf_size);
1295 	return 1;
1296 }
1297 __setup("trace_buf_size=", set_buf_size);
1298 
set_tracing_thresh(char * str)1299 static int __init set_tracing_thresh(char *str)
1300 {
1301 	unsigned long threshold;
1302 	int ret;
1303 
1304 	if (!str)
1305 		return 0;
1306 	ret = kstrtoul(str, 0, &threshold);
1307 	if (ret < 0)
1308 		return 0;
1309 	tracing_thresh = threshold * 1000;
1310 	return 1;
1311 }
1312 __setup("tracing_thresh=", set_tracing_thresh);
1313 
nsecs_to_usecs(unsigned long nsecs)1314 unsigned long nsecs_to_usecs(unsigned long nsecs)
1315 {
1316 	return nsecs / 1000;
1317 }
1318 
1319 /*
1320  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1321  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1322  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1323  * of strings in the order that the evals (enum) were defined.
1324  */
1325 #undef C
1326 #define C(a, b) b
1327 
1328 /* These must match the bit positions in trace_iterator_flags */
1329 static const char *trace_options[] = {
1330 	TRACE_FLAGS
1331 	NULL
1332 };
1333 
1334 static struct {
1335 	u64 (*func)(void);
1336 	const char *name;
1337 	int in_ns;		/* is this clock in nanoseconds? */
1338 } trace_clocks[] = {
1339 	{ trace_clock_local,		"local",	1 },
1340 	{ trace_clock_global,		"global",	1 },
1341 	{ trace_clock_counter,		"counter",	0 },
1342 	{ trace_clock_jiffies,		"uptime",	0 },
1343 	{ trace_clock,			"perf",		1 },
1344 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1345 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1346 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1347 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1348 	ARCH_TRACE_CLOCKS
1349 };
1350 
trace_clock_in_ns(struct trace_array * tr)1351 bool trace_clock_in_ns(struct trace_array *tr)
1352 {
1353 	if (trace_clocks[tr->clock_id].in_ns)
1354 		return true;
1355 
1356 	return false;
1357 }
1358 
1359 /*
1360  * trace_parser_get_init - gets the buffer for trace parser
1361  */
trace_parser_get_init(struct trace_parser * parser,int size)1362 int trace_parser_get_init(struct trace_parser *parser, int size)
1363 {
1364 	memset(parser, 0, sizeof(*parser));
1365 
1366 	parser->buffer = kmalloc(size, GFP_KERNEL);
1367 	if (!parser->buffer)
1368 		return 1;
1369 
1370 	parser->size = size;
1371 	return 0;
1372 }
1373 
1374 /*
1375  * trace_parser_put - frees the buffer for trace parser
1376  */
trace_parser_put(struct trace_parser * parser)1377 void trace_parser_put(struct trace_parser *parser)
1378 {
1379 	kfree(parser->buffer);
1380 	parser->buffer = NULL;
1381 }
1382 
1383 /*
1384  * trace_get_user - reads the user input string separated by  space
1385  * (matched by isspace(ch))
1386  *
1387  * For each string found the 'struct trace_parser' is updated,
1388  * and the function returns.
1389  *
1390  * Returns number of bytes read.
1391  *
1392  * See kernel/trace/trace.h for 'struct trace_parser' details.
1393  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1394 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1395 	size_t cnt, loff_t *ppos)
1396 {
1397 	char ch;
1398 	size_t read = 0;
1399 	ssize_t ret;
1400 
1401 	if (!*ppos)
1402 		trace_parser_clear(parser);
1403 
1404 	ret = get_user(ch, ubuf++);
1405 	if (ret)
1406 		goto fail;
1407 
1408 	read++;
1409 	cnt--;
1410 
1411 	/*
1412 	 * The parser is not finished with the last write,
1413 	 * continue reading the user input without skipping spaces.
1414 	 */
1415 	if (!parser->cont) {
1416 		/* skip white space */
1417 		while (cnt && isspace(ch)) {
1418 			ret = get_user(ch, ubuf++);
1419 			if (ret)
1420 				goto fail;
1421 			read++;
1422 			cnt--;
1423 		}
1424 
1425 		parser->idx = 0;
1426 
1427 		/* only spaces were written */
1428 		if (isspace(ch) || !ch) {
1429 			*ppos += read;
1430 			return read;
1431 		}
1432 	}
1433 
1434 	/* read the non-space input */
1435 	while (cnt && !isspace(ch) && ch) {
1436 		if (parser->idx < parser->size - 1)
1437 			parser->buffer[parser->idx++] = ch;
1438 		else {
1439 			ret = -EINVAL;
1440 			goto fail;
1441 		}
1442 
1443 		ret = get_user(ch, ubuf++);
1444 		if (ret)
1445 			goto fail;
1446 		read++;
1447 		cnt--;
1448 	}
1449 
1450 	/* We either got finished input or we have to wait for another call. */
1451 	if (isspace(ch) || !ch) {
1452 		parser->buffer[parser->idx] = 0;
1453 		parser->cont = false;
1454 	} else if (parser->idx < parser->size - 1) {
1455 		parser->cont = true;
1456 		parser->buffer[parser->idx++] = ch;
1457 		/* Make sure the parsed string always terminates with '\0'. */
1458 		parser->buffer[parser->idx] = 0;
1459 	} else {
1460 		ret = -EINVAL;
1461 		goto fail;
1462 	}
1463 
1464 	*ppos += read;
1465 	return read;
1466 fail:
1467 	trace_parser_fail(parser);
1468 	return ret;
1469 }
1470 
1471 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1472 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1473 {
1474 	int len;
1475 
1476 	if (trace_seq_used(s) <= s->readpos)
1477 		return -EBUSY;
1478 
1479 	len = trace_seq_used(s) - s->readpos;
1480 	if (cnt > len)
1481 		cnt = len;
1482 	memcpy(buf, s->buffer + s->readpos, cnt);
1483 
1484 	s->readpos += cnt;
1485 	return cnt;
1486 }
1487 
1488 unsigned long __read_mostly	tracing_thresh;
1489 
1490 #ifdef CONFIG_TRACER_MAX_TRACE
1491 #ifdef LATENCY_FS_NOTIFY
1492 static struct workqueue_struct *fsnotify_wq;
1493 
latency_fsnotify_workfn(struct work_struct * work)1494 static void latency_fsnotify_workfn(struct work_struct *work)
1495 {
1496 	struct trace_array *tr = container_of(work, struct trace_array,
1497 					      fsnotify_work);
1498 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1499 }
1500 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1501 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1502 {
1503 	struct trace_array *tr = container_of(iwork, struct trace_array,
1504 					      fsnotify_irqwork);
1505 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1506 }
1507 
latency_fsnotify_init(void)1508 __init static int latency_fsnotify_init(void)
1509 {
1510 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1511 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1512 	if (!fsnotify_wq) {
1513 		pr_err("Unable to allocate tr_max_lat_wq\n");
1514 		return -ENOMEM;
1515 	}
1516 	return 0;
1517 }
1518 
1519 late_initcall_sync(latency_fsnotify_init);
1520 
latency_fsnotify(struct trace_array * tr)1521 void latency_fsnotify(struct trace_array *tr)
1522 {
1523 	if (!fsnotify_wq)
1524 		return;
1525 	/*
1526 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1527 	 * possible that we are called from __schedule() or do_idle(), which
1528 	 * could cause a deadlock.
1529 	 */
1530 	irq_work_queue(&tr->fsnotify_irqwork);
1531 }
1532 #endif /* !LATENCY_FS_NOTIFY */
1533 
1534 static const struct file_operations tracing_max_lat_fops;
1535 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1536 static void trace_create_maxlat_file(struct trace_array *tr,
1537 				     struct dentry *d_tracer)
1538 {
1539 #ifdef LATENCY_FS_NOTIFY
1540 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1541 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1542 #endif
1543 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1544 					      TRACE_MODE_WRITE,
1545 					      d_tracer, tr,
1546 					      &tracing_max_lat_fops);
1547 }
1548 
1549 /*
1550  * Copy the new maximum trace into the separate maximum-trace
1551  * structure. (this way the maximum trace is permanently saved,
1552  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1553  */
1554 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1555 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1556 {
1557 	struct array_buffer *trace_buf = &tr->array_buffer;
1558 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1559 	struct array_buffer *max_buf = &tr->snapshot_buffer;
1560 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1561 
1562 	max_buf->cpu = cpu;
1563 	max_buf->time_start = data->preempt_timestamp;
1564 
1565 	max_data->saved_latency = tr->max_latency;
1566 	max_data->critical_start = data->critical_start;
1567 	max_data->critical_end = data->critical_end;
1568 
1569 	strscpy(max_data->comm, tsk->comm);
1570 	max_data->pid = tsk->pid;
1571 	/*
1572 	 * If tsk == current, then use current_uid(), as that does not use
1573 	 * RCU. The irq tracer can be called out of RCU scope.
1574 	 */
1575 	if (tsk == current)
1576 		max_data->uid = current_uid();
1577 	else
1578 		max_data->uid = task_uid(tsk);
1579 
1580 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1581 	max_data->policy = tsk->policy;
1582 	max_data->rt_priority = tsk->rt_priority;
1583 
1584 	/* record this tasks comm */
1585 	tracing_record_cmdline(tsk);
1586 	latency_fsnotify(tr);
1587 }
1588 #else
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1589 static inline void trace_create_maxlat_file(struct trace_array *tr,
1590 					    struct dentry *d_tracer) { }
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1591 static inline void __update_max_tr(struct trace_array *tr,
1592 				   struct task_struct *tsk, int cpu) { }
1593 #endif /* CONFIG_TRACER_MAX_TRACE */
1594 
1595 #ifdef CONFIG_TRACER_SNAPSHOT
1596 /**
1597  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1598  * @tr: tracer
1599  * @tsk: the task with the latency
1600  * @cpu: The cpu that initiated the trace.
1601  * @cond_data: User data associated with a conditional snapshot
1602  *
1603  * Flip the buffers between the @tr and the max_tr and record information
1604  * about which task was the cause of this latency.
1605  */
1606 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1607 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1608 	      void *cond_data)
1609 {
1610 	if (tr->stop_count)
1611 		return;
1612 
1613 	WARN_ON_ONCE(!irqs_disabled());
1614 
1615 	if (!tr->allocated_snapshot) {
1616 		/* Only the nop tracer should hit this when disabling */
1617 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1618 		return;
1619 	}
1620 
1621 	arch_spin_lock(&tr->max_lock);
1622 
1623 	/* Inherit the recordable setting from array_buffer */
1624 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1625 		ring_buffer_record_on(tr->snapshot_buffer.buffer);
1626 	else
1627 		ring_buffer_record_off(tr->snapshot_buffer.buffer);
1628 
1629 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1630 		arch_spin_unlock(&tr->max_lock);
1631 		return;
1632 	}
1633 
1634 	swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer);
1635 
1636 	__update_max_tr(tr, tsk, cpu);
1637 
1638 	arch_spin_unlock(&tr->max_lock);
1639 
1640 	/* Any waiters on the old snapshot buffer need to wake up */
1641 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1642 }
1643 
1644 /**
1645  * update_max_tr_single - only copy one trace over, and reset the rest
1646  * @tr: tracer
1647  * @tsk: task with the latency
1648  * @cpu: the cpu of the buffer to copy.
1649  *
1650  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1651  */
1652 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1653 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1654 {
1655 	int ret;
1656 
1657 	if (tr->stop_count)
1658 		return;
1659 
1660 	WARN_ON_ONCE(!irqs_disabled());
1661 	if (!tr->allocated_snapshot) {
1662 		/* Only the nop tracer should hit this when disabling */
1663 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1664 		return;
1665 	}
1666 
1667 	arch_spin_lock(&tr->max_lock);
1668 
1669 	ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu);
1670 
1671 	if (ret == -EBUSY) {
1672 		/*
1673 		 * We failed to swap the buffer due to a commit taking
1674 		 * place on this CPU. We fail to record, but we reset
1675 		 * the max trace buffer (no one writes directly to it)
1676 		 * and flag that it failed.
1677 		 * Another reason is resize is in progress.
1678 		 */
1679 		trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_,
1680 			"Failed to swap buffers due to commit or resize in progress\n");
1681 	}
1682 
1683 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1684 
1685 	__update_max_tr(tr, tsk, cpu);
1686 	arch_spin_unlock(&tr->max_lock);
1687 }
1688 #endif /* CONFIG_TRACER_SNAPSHOT */
1689 
1690 struct pipe_wait {
1691 	struct trace_iterator		*iter;
1692 	int				wait_index;
1693 };
1694 
wait_pipe_cond(void * data)1695 static bool wait_pipe_cond(void *data)
1696 {
1697 	struct pipe_wait *pwait = data;
1698 	struct trace_iterator *iter = pwait->iter;
1699 
1700 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
1701 		return true;
1702 
1703 	return iter->closed;
1704 }
1705 
wait_on_pipe(struct trace_iterator * iter,int full)1706 static int wait_on_pipe(struct trace_iterator *iter, int full)
1707 {
1708 	struct pipe_wait pwait;
1709 	int ret;
1710 
1711 	/* Iterators are static, they should be filled or empty */
1712 	if (trace_buffer_iter(iter, iter->cpu_file))
1713 		return 0;
1714 
1715 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
1716 	pwait.iter = iter;
1717 
1718 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
1719 			       wait_pipe_cond, &pwait);
1720 
1721 #ifdef CONFIG_TRACER_SNAPSHOT
1722 	/*
1723 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1724 	 * to happen, this would now be the main buffer.
1725 	 */
1726 	if (iter->snapshot)
1727 		iter->array_buffer = &iter->tr->snapshot_buffer;
1728 #endif
1729 	return ret;
1730 }
1731 
1732 #ifdef CONFIG_FTRACE_STARTUP_TEST
1733 static bool selftests_can_run;
1734 
1735 struct trace_selftests {
1736 	struct list_head		list;
1737 	struct tracer			*type;
1738 };
1739 
1740 static LIST_HEAD(postponed_selftests);
1741 
save_selftest(struct tracer * type)1742 static int save_selftest(struct tracer *type)
1743 {
1744 	struct trace_selftests *selftest;
1745 
1746 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1747 	if (!selftest)
1748 		return -ENOMEM;
1749 
1750 	selftest->type = type;
1751 	list_add(&selftest->list, &postponed_selftests);
1752 	return 0;
1753 }
1754 
run_tracer_selftest(struct tracer * type)1755 static int run_tracer_selftest(struct tracer *type)
1756 {
1757 	struct trace_array *tr = &global_trace;
1758 	struct tracer_flags *saved_flags = tr->current_trace_flags;
1759 	struct tracer *saved_tracer = tr->current_trace;
1760 	int ret;
1761 
1762 	if (!type->selftest || tracing_selftest_disabled)
1763 		return 0;
1764 
1765 	/*
1766 	 * If a tracer registers early in boot up (before scheduling is
1767 	 * initialized and such), then do not run its selftests yet.
1768 	 * Instead, run it a little later in the boot process.
1769 	 */
1770 	if (!selftests_can_run)
1771 		return save_selftest(type);
1772 
1773 	if (!tracing_is_on()) {
1774 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1775 			type->name);
1776 		return 0;
1777 	}
1778 
1779 	/*
1780 	 * Run a selftest on this tracer.
1781 	 * Here we reset the trace buffer, and set the current
1782 	 * tracer to be this tracer. The tracer can then run some
1783 	 * internal tracing to verify that everything is in order.
1784 	 * If we fail, we do not register this tracer.
1785 	 */
1786 	tracing_reset_online_cpus(&tr->array_buffer);
1787 
1788 	tr->current_trace = type;
1789 	tr->current_trace_flags = type->flags ? : type->default_flags;
1790 
1791 #ifdef CONFIG_TRACER_MAX_TRACE
1792 	if (tracer_uses_snapshot(type)) {
1793 		/* If we expanded the buffers, make sure the max is expanded too */
1794 		if (tr->ring_buffer_expanded)
1795 			ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size,
1796 					   RING_BUFFER_ALL_CPUS);
1797 		tr->allocated_snapshot = true;
1798 	}
1799 #endif
1800 
1801 	/* the test is responsible for initializing and enabling */
1802 	pr_info("Testing tracer %s: ", type->name);
1803 	ret = type->selftest(type, tr);
1804 	/* the test is responsible for resetting too */
1805 	tr->current_trace = saved_tracer;
1806 	tr->current_trace_flags = saved_flags;
1807 	if (ret) {
1808 		printk(KERN_CONT "FAILED!\n");
1809 		/* Add the warning after printing 'FAILED' */
1810 		WARN_ON(1);
1811 		return -1;
1812 	}
1813 	/* Only reset on passing, to avoid touching corrupted buffers */
1814 	tracing_reset_online_cpus(&tr->array_buffer);
1815 
1816 #ifdef CONFIG_TRACER_MAX_TRACE
1817 	if (tracer_uses_snapshot(type)) {
1818 		tr->allocated_snapshot = false;
1819 
1820 		/* Shrink the max buffer again */
1821 		if (tr->ring_buffer_expanded)
1822 			ring_buffer_resize(tr->snapshot_buffer.buffer, 1,
1823 					   RING_BUFFER_ALL_CPUS);
1824 	}
1825 #endif
1826 
1827 	printk(KERN_CONT "PASSED\n");
1828 	return 0;
1829 }
1830 
do_run_tracer_selftest(struct tracer * type)1831 static int do_run_tracer_selftest(struct tracer *type)
1832 {
1833 	int ret;
1834 
1835 	/*
1836 	 * Tests can take a long time, especially if they are run one after the
1837 	 * other, as does happen during bootup when all the tracers are
1838 	 * registered. This could cause the soft lockup watchdog to trigger.
1839 	 */
1840 	cond_resched();
1841 
1842 	tracing_selftest_running = true;
1843 	ret = run_tracer_selftest(type);
1844 	tracing_selftest_running = false;
1845 
1846 	return ret;
1847 }
1848 
init_trace_selftests(void)1849 static __init int init_trace_selftests(void)
1850 {
1851 	struct trace_selftests *p, *n;
1852 	struct tracer *t, **last;
1853 	int ret;
1854 
1855 	selftests_can_run = true;
1856 
1857 	guard(mutex)(&trace_types_lock);
1858 
1859 	if (list_empty(&postponed_selftests))
1860 		return 0;
1861 
1862 	pr_info("Running postponed tracer tests:\n");
1863 
1864 	tracing_selftest_running = true;
1865 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1866 		/* This loop can take minutes when sanitizers are enabled, so
1867 		 * lets make sure we allow RCU processing.
1868 		 */
1869 		cond_resched();
1870 		ret = run_tracer_selftest(p->type);
1871 		/* If the test fails, then warn and remove from available_tracers */
1872 		if (ret < 0) {
1873 			WARN(1, "tracer: %s failed selftest, disabling\n",
1874 			     p->type->name);
1875 			last = &trace_types;
1876 			for (t = trace_types; t; t = t->next) {
1877 				if (t == p->type) {
1878 					*last = t->next;
1879 					break;
1880 				}
1881 				last = &t->next;
1882 			}
1883 		}
1884 		list_del(&p->list);
1885 		kfree(p);
1886 	}
1887 	tracing_selftest_running = false;
1888 
1889 	return 0;
1890 }
1891 core_initcall(init_trace_selftests);
1892 #else
do_run_tracer_selftest(struct tracer * type)1893 static inline int do_run_tracer_selftest(struct tracer *type)
1894 {
1895 	return 0;
1896 }
1897 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1898 
1899 static int add_tracer(struct trace_array *tr, struct tracer *t);
1900 
1901 static void __init apply_trace_boot_options(void);
1902 
free_tracers(struct trace_array * tr)1903 static void free_tracers(struct trace_array *tr)
1904 {
1905 	struct tracers *t, *n;
1906 
1907 	lockdep_assert_held(&trace_types_lock);
1908 
1909 	list_for_each_entry_safe(t, n, &tr->tracers, list) {
1910 		list_del(&t->list);
1911 		kfree(t->flags);
1912 		kfree(t);
1913 	}
1914 }
1915 
1916 /**
1917  * register_tracer - register a tracer with the ftrace system.
1918  * @type: the plugin for the tracer
1919  *
1920  * Register a new plugin tracer.
1921  */
register_tracer(struct tracer * type)1922 int __init register_tracer(struct tracer *type)
1923 {
1924 	struct trace_array *tr;
1925 	struct tracer *t;
1926 	int ret = 0;
1927 
1928 	if (!type->name) {
1929 		pr_info("Tracer must have a name\n");
1930 		return -1;
1931 	}
1932 
1933 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1934 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1935 		return -1;
1936 	}
1937 
1938 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1939 		pr_warn("Can not register tracer %s due to lockdown\n",
1940 			   type->name);
1941 		return -EPERM;
1942 	}
1943 
1944 	mutex_lock(&trace_types_lock);
1945 
1946 	for (t = trace_types; t; t = t->next) {
1947 		if (strcmp(type->name, t->name) == 0) {
1948 			/* already found */
1949 			pr_info("Tracer %s already registered\n",
1950 				type->name);
1951 			ret = -1;
1952 			goto out;
1953 		}
1954 	}
1955 
1956 	/* store the tracer for __set_tracer_option */
1957 	if (type->flags)
1958 		type->flags->trace = type;
1959 
1960 	ret = do_run_tracer_selftest(type);
1961 	if (ret < 0)
1962 		goto out;
1963 
1964 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1965 		ret = add_tracer(tr, type);
1966 		if (ret < 0) {
1967 			/* The tracer will still exist but without options */
1968 			pr_warn("Failed to create tracer options for %s\n", type->name);
1969 			break;
1970 		}
1971 	}
1972 
1973 	type->next = trace_types;
1974 	trace_types = type;
1975 
1976  out:
1977 	mutex_unlock(&trace_types_lock);
1978 
1979 	if (ret || !default_bootup_tracer)
1980 		return ret;
1981 
1982 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1983 		return 0;
1984 
1985 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1986 	/* Do we want this tracer to start on bootup? */
1987 	WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
1988 	default_bootup_tracer = NULL;
1989 
1990 	apply_trace_boot_options();
1991 
1992 	/* disable other selftests, since this will break it. */
1993 	disable_tracing_selftest("running a tracer");
1994 
1995 	return 0;
1996 }
1997 
tracing_reset_cpu(struct array_buffer * buf,int cpu)1998 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1999 {
2000 	struct trace_buffer *buffer = buf->buffer;
2001 
2002 	if (!buffer)
2003 		return;
2004 
2005 	ring_buffer_record_disable(buffer);
2006 
2007 	/* Make sure all commits have finished */
2008 	synchronize_rcu();
2009 	ring_buffer_reset_cpu(buffer, cpu);
2010 
2011 	ring_buffer_record_enable(buffer);
2012 }
2013 
tracing_reset_online_cpus(struct array_buffer * buf)2014 void tracing_reset_online_cpus(struct array_buffer *buf)
2015 {
2016 	struct trace_buffer *buffer = buf->buffer;
2017 
2018 	if (!buffer)
2019 		return;
2020 
2021 	ring_buffer_record_disable(buffer);
2022 
2023 	/* Make sure all commits have finished */
2024 	synchronize_rcu();
2025 
2026 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2027 
2028 	ring_buffer_reset_online_cpus(buffer);
2029 
2030 	ring_buffer_record_enable(buffer);
2031 }
2032 
tracing_reset_all_cpus(struct array_buffer * buf)2033 static void tracing_reset_all_cpus(struct array_buffer *buf)
2034 {
2035 	struct trace_buffer *buffer = buf->buffer;
2036 
2037 	if (!buffer)
2038 		return;
2039 
2040 	ring_buffer_record_disable(buffer);
2041 
2042 	/* Make sure all commits have finished */
2043 	synchronize_rcu();
2044 
2045 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2046 
2047 	ring_buffer_reset(buffer);
2048 
2049 	ring_buffer_record_enable(buffer);
2050 }
2051 
2052 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2053 void tracing_reset_all_online_cpus_unlocked(void)
2054 {
2055 	struct trace_array *tr;
2056 
2057 	lockdep_assert_held(&trace_types_lock);
2058 
2059 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2060 		if (!tr->clear_trace)
2061 			continue;
2062 		tr->clear_trace = false;
2063 		tracing_reset_online_cpus(&tr->array_buffer);
2064 #ifdef CONFIG_TRACER_SNAPSHOT
2065 		tracing_reset_online_cpus(&tr->snapshot_buffer);
2066 #endif
2067 	}
2068 }
2069 
tracing_reset_all_online_cpus(void)2070 void tracing_reset_all_online_cpus(void)
2071 {
2072 	guard(mutex)(&trace_types_lock);
2073 	tracing_reset_all_online_cpus_unlocked();
2074 }
2075 
is_tracing_stopped(void)2076 int is_tracing_stopped(void)
2077 {
2078 	return global_trace.stop_count;
2079 }
2080 
tracing_start_tr(struct trace_array * tr)2081 static void tracing_start_tr(struct trace_array *tr)
2082 {
2083 	struct trace_buffer *buffer;
2084 
2085 	if (tracing_disabled)
2086 		return;
2087 
2088 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2089 	if (--tr->stop_count) {
2090 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2091 			/* Someone screwed up their debugging */
2092 			tr->stop_count = 0;
2093 		}
2094 		return;
2095 	}
2096 
2097 	/* Prevent the buffers from switching */
2098 	arch_spin_lock(&tr->max_lock);
2099 
2100 	buffer = tr->array_buffer.buffer;
2101 	if (buffer)
2102 		ring_buffer_record_enable(buffer);
2103 
2104 #ifdef CONFIG_TRACER_SNAPSHOT
2105 	buffer = tr->snapshot_buffer.buffer;
2106 	if (buffer)
2107 		ring_buffer_record_enable(buffer);
2108 #endif
2109 
2110 	arch_spin_unlock(&tr->max_lock);
2111 }
2112 
2113 /**
2114  * tracing_start - quick start of the tracer
2115  *
2116  * If tracing is enabled but was stopped by tracing_stop,
2117  * this will start the tracer back up.
2118  */
tracing_start(void)2119 void tracing_start(void)
2120 
2121 {
2122 	return tracing_start_tr(&global_trace);
2123 }
2124 
tracing_stop_tr(struct trace_array * tr)2125 static void tracing_stop_tr(struct trace_array *tr)
2126 {
2127 	struct trace_buffer *buffer;
2128 
2129 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2130 	if (tr->stop_count++)
2131 		return;
2132 
2133 	/* Prevent the buffers from switching */
2134 	arch_spin_lock(&tr->max_lock);
2135 
2136 	buffer = tr->array_buffer.buffer;
2137 	if (buffer)
2138 		ring_buffer_record_disable(buffer);
2139 
2140 #ifdef CONFIG_TRACER_SNAPSHOT
2141 	buffer = tr->snapshot_buffer.buffer;
2142 	if (buffer)
2143 		ring_buffer_record_disable(buffer);
2144 #endif
2145 
2146 	arch_spin_unlock(&tr->max_lock);
2147 }
2148 
2149 /**
2150  * tracing_stop - quick stop of the tracer
2151  *
2152  * Light weight way to stop tracing. Use in conjunction with
2153  * tracing_start.
2154  */
tracing_stop(void)2155 void tracing_stop(void)
2156 {
2157 	return tracing_stop_tr(&global_trace);
2158 }
2159 
2160 /*
2161  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2162  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2163  * simplifies those functions and keeps them in sync.
2164  */
trace_handle_return(struct trace_seq * s)2165 enum print_line_t trace_handle_return(struct trace_seq *s)
2166 {
2167 	return trace_seq_has_overflowed(s) ?
2168 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2169 }
2170 EXPORT_SYMBOL_GPL(trace_handle_return);
2171 
migration_disable_value(void)2172 static unsigned short migration_disable_value(void)
2173 {
2174 #if defined(CONFIG_SMP)
2175 	return current->migration_disabled;
2176 #else
2177 	return 0;
2178 #endif
2179 }
2180 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2181 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2182 {
2183 	unsigned int trace_flags = irqs_status;
2184 	unsigned int pc;
2185 
2186 	pc = preempt_count();
2187 
2188 	if (pc & NMI_MASK)
2189 		trace_flags |= TRACE_FLAG_NMI;
2190 	if (pc & HARDIRQ_MASK)
2191 		trace_flags |= TRACE_FLAG_HARDIRQ;
2192 	if (in_serving_softirq())
2193 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2194 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2195 		trace_flags |= TRACE_FLAG_BH_OFF;
2196 
2197 	if (tif_need_resched())
2198 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2199 	if (test_preempt_need_resched())
2200 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2201 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2202 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2203 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2204 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2205 }
2206 
2207 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2208 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2209 			  int type,
2210 			  unsigned long len,
2211 			  unsigned int trace_ctx)
2212 {
2213 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2214 }
2215 
2216 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2217 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2218 static int trace_buffered_event_ref;
2219 
2220 /**
2221  * trace_buffered_event_enable - enable buffering events
2222  *
2223  * When events are being filtered, it is quicker to use a temporary
2224  * buffer to write the event data into if there's a likely chance
2225  * that it will not be committed. The discard of the ring buffer
2226  * is not as fast as committing, and is much slower than copying
2227  * a commit.
2228  *
2229  * When an event is to be filtered, allocate per cpu buffers to
2230  * write the event data into, and if the event is filtered and discarded
2231  * it is simply dropped, otherwise, the entire data is to be committed
2232  * in one shot.
2233  */
trace_buffered_event_enable(void)2234 void trace_buffered_event_enable(void)
2235 {
2236 	struct ring_buffer_event *event;
2237 	struct page *page;
2238 	int cpu;
2239 
2240 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2241 
2242 	if (trace_buffered_event_ref++)
2243 		return;
2244 
2245 	for_each_tracing_cpu(cpu) {
2246 		page = alloc_pages_node(cpu_to_node(cpu),
2247 					GFP_KERNEL | __GFP_NORETRY, 0);
2248 		/* This is just an optimization and can handle failures */
2249 		if (!page) {
2250 			pr_err("Failed to allocate event buffer\n");
2251 			break;
2252 		}
2253 
2254 		event = page_address(page);
2255 		memset(event, 0, sizeof(*event));
2256 
2257 		per_cpu(trace_buffered_event, cpu) = event;
2258 
2259 		scoped_guard(preempt,) {
2260 			if (cpu == smp_processor_id() &&
2261 			    __this_cpu_read(trace_buffered_event) !=
2262 			    per_cpu(trace_buffered_event, cpu))
2263 				WARN_ON_ONCE(1);
2264 		}
2265 	}
2266 }
2267 
enable_trace_buffered_event(void * data)2268 static void enable_trace_buffered_event(void *data)
2269 {
2270 	this_cpu_dec(trace_buffered_event_cnt);
2271 }
2272 
disable_trace_buffered_event(void * data)2273 static void disable_trace_buffered_event(void *data)
2274 {
2275 	this_cpu_inc(trace_buffered_event_cnt);
2276 }
2277 
2278 /**
2279  * trace_buffered_event_disable - disable buffering events
2280  *
2281  * When a filter is removed, it is faster to not use the buffered
2282  * events, and to commit directly into the ring buffer. Free up
2283  * the temp buffers when there are no more users. This requires
2284  * special synchronization with current events.
2285  */
trace_buffered_event_disable(void)2286 void trace_buffered_event_disable(void)
2287 {
2288 	int cpu;
2289 
2290 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2291 
2292 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2293 		return;
2294 
2295 	if (--trace_buffered_event_ref)
2296 		return;
2297 
2298 	/* For each CPU, set the buffer as used. */
2299 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2300 			 NULL, true);
2301 
2302 	/* Wait for all current users to finish */
2303 	synchronize_rcu();
2304 
2305 	for_each_tracing_cpu(cpu) {
2306 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2307 		per_cpu(trace_buffered_event, cpu) = NULL;
2308 	}
2309 
2310 	/*
2311 	 * Wait for all CPUs that potentially started checking if they can use
2312 	 * their event buffer only after the previous synchronize_rcu() call and
2313 	 * they still read a valid pointer from trace_buffered_event. It must be
2314 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2315 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2316 	 */
2317 	synchronize_rcu();
2318 
2319 	/* For each CPU, relinquish the buffer */
2320 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2321 			 true);
2322 }
2323 
2324 static struct trace_buffer *temp_buffer;
2325 
2326 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2327 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2328 			  struct trace_event_file *trace_file,
2329 			  int type, unsigned long len,
2330 			  unsigned int trace_ctx)
2331 {
2332 	struct ring_buffer_event *entry;
2333 	struct trace_array *tr = trace_file->tr;
2334 	int val;
2335 
2336 	*current_rb = tr->array_buffer.buffer;
2337 
2338 	if (!tr->no_filter_buffering_ref &&
2339 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2340 		preempt_disable_notrace();
2341 		/*
2342 		 * Filtering is on, so try to use the per cpu buffer first.
2343 		 * This buffer will simulate a ring_buffer_event,
2344 		 * where the type_len is zero and the array[0] will
2345 		 * hold the full length.
2346 		 * (see include/linux/ring-buffer.h for details on
2347 		 *  how the ring_buffer_event is structured).
2348 		 *
2349 		 * Using a temp buffer during filtering and copying it
2350 		 * on a matched filter is quicker than writing directly
2351 		 * into the ring buffer and then discarding it when
2352 		 * it doesn't match. That is because the discard
2353 		 * requires several atomic operations to get right.
2354 		 * Copying on match and doing nothing on a failed match
2355 		 * is still quicker than no copy on match, but having
2356 		 * to discard out of the ring buffer on a failed match.
2357 		 */
2358 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2359 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2360 
2361 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2362 
2363 			/*
2364 			 * Preemption is disabled, but interrupts and NMIs
2365 			 * can still come in now. If that happens after
2366 			 * the above increment, then it will have to go
2367 			 * back to the old method of allocating the event
2368 			 * on the ring buffer, and if the filter fails, it
2369 			 * will have to call ring_buffer_discard_commit()
2370 			 * to remove it.
2371 			 *
2372 			 * Need to also check the unlikely case that the
2373 			 * length is bigger than the temp buffer size.
2374 			 * If that happens, then the reserve is pretty much
2375 			 * guaranteed to fail, as the ring buffer currently
2376 			 * only allows events less than a page. But that may
2377 			 * change in the future, so let the ring buffer reserve
2378 			 * handle the failure in that case.
2379 			 */
2380 			if (val == 1 && likely(len <= max_len)) {
2381 				trace_event_setup(entry, type, trace_ctx);
2382 				entry->array[0] = len;
2383 				/* Return with preemption disabled */
2384 				return entry;
2385 			}
2386 			this_cpu_dec(trace_buffered_event_cnt);
2387 		}
2388 		/* __trace_buffer_lock_reserve() disables preemption */
2389 		preempt_enable_notrace();
2390 	}
2391 
2392 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2393 					    trace_ctx);
2394 	/*
2395 	 * If tracing is off, but we have triggers enabled
2396 	 * we still need to look at the event data. Use the temp_buffer
2397 	 * to store the trace event for the trigger to use. It's recursive
2398 	 * safe and will not be recorded anywhere.
2399 	 */
2400 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2401 		*current_rb = temp_buffer;
2402 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2403 						    trace_ctx);
2404 	}
2405 	return entry;
2406 }
2407 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2408 
2409 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2410 static DEFINE_MUTEX(tracepoint_printk_mutex);
2411 
output_printk(struct trace_event_buffer * fbuffer)2412 static void output_printk(struct trace_event_buffer *fbuffer)
2413 {
2414 	struct trace_event_call *event_call;
2415 	struct trace_event_file *file;
2416 	struct trace_event *event;
2417 	unsigned long flags;
2418 	struct trace_iterator *iter = tracepoint_print_iter;
2419 
2420 	/* We should never get here if iter is NULL */
2421 	if (WARN_ON_ONCE(!iter))
2422 		return;
2423 
2424 	event_call = fbuffer->trace_file->event_call;
2425 	if (!event_call || !event_call->event.funcs ||
2426 	    !event_call->event.funcs->trace)
2427 		return;
2428 
2429 	file = fbuffer->trace_file;
2430 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2431 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2432 	     !filter_match_preds(file->filter, fbuffer->entry)))
2433 		return;
2434 
2435 	event = &fbuffer->trace_file->event_call->event;
2436 
2437 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2438 	trace_seq_init(&iter->seq);
2439 	iter->ent = fbuffer->entry;
2440 	event_call->event.funcs->trace(iter, 0, event);
2441 	trace_seq_putc(&iter->seq, 0);
2442 	printk("%s", iter->seq.buffer);
2443 
2444 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2445 }
2446 
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2447 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2448 			     void *buffer, size_t *lenp,
2449 			     loff_t *ppos)
2450 {
2451 	int save_tracepoint_printk;
2452 	int ret;
2453 
2454 	guard(mutex)(&tracepoint_printk_mutex);
2455 	save_tracepoint_printk = tracepoint_printk;
2456 
2457 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2458 
2459 	/*
2460 	 * This will force exiting early, as tracepoint_printk
2461 	 * is always zero when tracepoint_printk_iter is not allocated
2462 	 */
2463 	if (!tracepoint_print_iter)
2464 		tracepoint_printk = 0;
2465 
2466 	if (save_tracepoint_printk == tracepoint_printk)
2467 		return ret;
2468 
2469 	if (tracepoint_printk)
2470 		static_key_enable(&tracepoint_printk_key.key);
2471 	else
2472 		static_key_disable(&tracepoint_printk_key.key);
2473 
2474 	return ret;
2475 }
2476 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2477 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2478 {
2479 	enum event_trigger_type tt = ETT_NONE;
2480 	struct trace_event_file *file = fbuffer->trace_file;
2481 
2482 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2483 			fbuffer->entry, &tt))
2484 		goto discard;
2485 
2486 	if (static_key_false(&tracepoint_printk_key.key))
2487 		output_printk(fbuffer);
2488 
2489 	if (static_branch_unlikely(&trace_event_exports_enabled))
2490 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2491 
2492 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2493 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2494 
2495 discard:
2496 	if (tt)
2497 		event_triggers_post_call(file, tt);
2498 
2499 }
2500 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2501 
2502 /*
2503  * Skip 3:
2504  *
2505  *   trace_buffer_unlock_commit_regs()
2506  *   trace_event_buffer_commit()
2507  *   trace_event_raw_event_xxx()
2508  */
2509 # define STACK_SKIP 3
2510 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2511 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2512 				     struct trace_buffer *buffer,
2513 				     struct ring_buffer_event *event,
2514 				     unsigned int trace_ctx,
2515 				     struct pt_regs *regs)
2516 {
2517 	__buffer_unlock_commit(buffer, event);
2518 
2519 	/*
2520 	 * If regs is not set, then skip the necessary functions.
2521 	 * Note, we can still get here via blktrace, wakeup tracer
2522 	 * and mmiotrace, but that's ok if they lose a function or
2523 	 * two. They are not that meaningful.
2524 	 */
2525 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2526 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2527 }
2528 
2529 /*
2530  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2531  */
2532 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2533 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2534 				   struct ring_buffer_event *event)
2535 {
2536 	__buffer_unlock_commit(buffer, event);
2537 }
2538 
2539 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2540 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2541 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2542 {
2543 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2544 	struct ring_buffer_event *event;
2545 	struct ftrace_entry *entry;
2546 	int size = sizeof(*entry);
2547 
2548 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2549 
2550 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2551 					    trace_ctx);
2552 	if (!event)
2553 		return;
2554 	entry	= ring_buffer_event_data(event);
2555 	entry->ip			= ip;
2556 	entry->parent_ip		= parent_ip;
2557 
2558 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2559 	if (fregs) {
2560 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2561 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2562 	}
2563 #endif
2564 
2565 	if (static_branch_unlikely(&trace_function_exports_enabled))
2566 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2567 	__buffer_unlock_commit(buffer, event);
2568 }
2569 
2570 #ifdef CONFIG_STACKTRACE
2571 
2572 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2573 #define FTRACE_KSTACK_NESTING	4
2574 
2575 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2576 
2577 struct ftrace_stack {
2578 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2579 };
2580 
2581 
2582 struct ftrace_stacks {
2583 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2584 };
2585 
2586 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2587 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2588 
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2589 void __ftrace_trace_stack(struct trace_array *tr,
2590 			  struct trace_buffer *buffer,
2591 			  unsigned int trace_ctx,
2592 			  int skip, struct pt_regs *regs)
2593 {
2594 	struct ring_buffer_event *event;
2595 	unsigned int size, nr_entries;
2596 	struct ftrace_stack *fstack;
2597 	struct stack_entry *entry;
2598 	int stackidx;
2599 	int bit;
2600 
2601 	bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START);
2602 	if (bit < 0)
2603 		return;
2604 
2605 	/*
2606 	 * Add one, for this function and the call to save_stack_trace()
2607 	 * If regs is set, then these functions will not be in the way.
2608 	 */
2609 #ifndef CONFIG_UNWINDER_ORC
2610 	if (!regs)
2611 		skip++;
2612 #endif
2613 
2614 	guard(preempt_notrace)();
2615 
2616 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2617 
2618 	/* This should never happen. If it does, yell once and skip */
2619 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2620 		goto out;
2621 
2622 	/*
2623 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2624 	 * interrupt will either see the value pre increment or post
2625 	 * increment. If the interrupt happens pre increment it will have
2626 	 * restored the counter when it returns.  We just need a barrier to
2627 	 * keep gcc from moving things around.
2628 	 */
2629 	barrier();
2630 
2631 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2632 	size = ARRAY_SIZE(fstack->calls);
2633 
2634 	if (regs) {
2635 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2636 						   size, skip);
2637 	} else {
2638 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2639 	}
2640 
2641 #ifdef CONFIG_DYNAMIC_FTRACE
2642 	/* Mark entry of stack trace as trampoline code */
2643 	if (tr->ops && tr->ops->trampoline) {
2644 		unsigned long tramp_start = tr->ops->trampoline;
2645 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2646 		unsigned long *calls = fstack->calls;
2647 
2648 		for (int i = 0; i < nr_entries; i++) {
2649 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2650 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2651 		}
2652 	}
2653 #endif
2654 
2655 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2656 				    struct_size(entry, caller, nr_entries),
2657 				    trace_ctx);
2658 	if (!event)
2659 		goto out;
2660 	entry = ring_buffer_event_data(event);
2661 
2662 	entry->size = nr_entries;
2663 	memcpy(&entry->caller, fstack->calls,
2664 	       flex_array_size(entry, caller, nr_entries));
2665 
2666 	__buffer_unlock_commit(buffer, event);
2667 
2668  out:
2669 	/* Again, don't let gcc optimize things here */
2670 	barrier();
2671 	__this_cpu_dec(ftrace_stack_reserve);
2672 	trace_clear_recursion(bit);
2673 }
2674 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)2675 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2676 		   int skip)
2677 {
2678 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2679 
2680 	if (rcu_is_watching()) {
2681 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2682 		return;
2683 	}
2684 
2685 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2686 		return;
2687 
2688 	/*
2689 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
2690 	 * but if the above rcu_is_watching() failed, then the NMI
2691 	 * triggered someplace critical, and ct_irq_enter() should
2692 	 * not be called from NMI.
2693 	 */
2694 	if (unlikely(in_nmi()))
2695 		return;
2696 
2697 	ct_irq_enter_irqson();
2698 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2699 	ct_irq_exit_irqson();
2700 }
2701 
2702 /**
2703  * trace_dump_stack - record a stack back trace in the trace buffer
2704  * @skip: Number of functions to skip (helper handlers)
2705  */
trace_dump_stack(int skip)2706 void trace_dump_stack(int skip)
2707 {
2708 	if (tracing_disabled || tracing_selftest_running)
2709 		return;
2710 
2711 #ifndef CONFIG_UNWINDER_ORC
2712 	/* Skip 1 to skip this function. */
2713 	skip++;
2714 #endif
2715 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
2716 				tracing_gen_ctx(), skip, NULL);
2717 }
2718 EXPORT_SYMBOL_GPL(trace_dump_stack);
2719 
2720 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2721 static DEFINE_PER_CPU(int, user_stack_count);
2722 
2723 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)2724 ftrace_trace_userstack(struct trace_array *tr,
2725 		       struct trace_buffer *buffer, unsigned int trace_ctx)
2726 {
2727 	struct ring_buffer_event *event;
2728 	struct userstack_entry *entry;
2729 
2730 	if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
2731 		return;
2732 
2733 	/*
2734 	 * NMIs can not handle page faults, even with fix ups.
2735 	 * The save user stack can (and often does) fault.
2736 	 */
2737 	if (unlikely(in_nmi()))
2738 		return;
2739 
2740 	/*
2741 	 * prevent recursion, since the user stack tracing may
2742 	 * trigger other kernel events.
2743 	 */
2744 	guard(preempt)();
2745 	if (__this_cpu_read(user_stack_count))
2746 		return;
2747 
2748 	__this_cpu_inc(user_stack_count);
2749 
2750 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2751 					    sizeof(*entry), trace_ctx);
2752 	if (!event)
2753 		goto out_drop_count;
2754 	entry	= ring_buffer_event_data(event);
2755 
2756 	entry->tgid		= current->tgid;
2757 	memset(&entry->caller, 0, sizeof(entry->caller));
2758 
2759 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2760 	__buffer_unlock_commit(buffer, event);
2761 
2762  out_drop_count:
2763 	__this_cpu_dec(user_stack_count);
2764 }
2765 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)2766 static void ftrace_trace_userstack(struct trace_array *tr,
2767 				   struct trace_buffer *buffer,
2768 				   unsigned int trace_ctx)
2769 {
2770 }
2771 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2772 
2773 #endif /* CONFIG_STACKTRACE */
2774 
2775 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)2776 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
2777 			  unsigned long long delta)
2778 {
2779 	entry->bottom_delta_ts = delta & U32_MAX;
2780 	entry->top_delta_ts = (delta >> 32);
2781 }
2782 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)2783 void trace_last_func_repeats(struct trace_array *tr,
2784 			     struct trace_func_repeats *last_info,
2785 			     unsigned int trace_ctx)
2786 {
2787 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2788 	struct func_repeats_entry *entry;
2789 	struct ring_buffer_event *event;
2790 	u64 delta;
2791 
2792 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
2793 					    sizeof(*entry), trace_ctx);
2794 	if (!event)
2795 		return;
2796 
2797 	delta = ring_buffer_event_time_stamp(buffer, event) -
2798 		last_info->ts_last_call;
2799 
2800 	entry = ring_buffer_event_data(event);
2801 	entry->ip = last_info->ip;
2802 	entry->parent_ip = last_info->parent_ip;
2803 	entry->count = last_info->count;
2804 	func_repeats_set_delta_ts(entry, delta);
2805 
2806 	__buffer_unlock_commit(buffer, event);
2807 }
2808 
trace_iterator_increment(struct trace_iterator * iter)2809 static void trace_iterator_increment(struct trace_iterator *iter)
2810 {
2811 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2812 
2813 	iter->idx++;
2814 	if (buf_iter)
2815 		ring_buffer_iter_advance(buf_iter);
2816 }
2817 
2818 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)2819 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2820 		unsigned long *lost_events)
2821 {
2822 	struct ring_buffer_event *event;
2823 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2824 
2825 	if (buf_iter) {
2826 		event = ring_buffer_iter_peek(buf_iter, ts);
2827 		if (lost_events)
2828 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
2829 				(unsigned long)-1 : 0;
2830 	} else {
2831 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
2832 					 lost_events);
2833 	}
2834 
2835 	if (event) {
2836 		iter->ent_size = ring_buffer_event_length(event);
2837 		return ring_buffer_event_data(event);
2838 	}
2839 	iter->ent_size = 0;
2840 	return NULL;
2841 }
2842 
2843 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)2844 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2845 		  unsigned long *missing_events, u64 *ent_ts)
2846 {
2847 	struct trace_buffer *buffer = iter->array_buffer->buffer;
2848 	struct trace_entry *ent, *next = NULL;
2849 	unsigned long lost_events = 0, next_lost = 0;
2850 	int cpu_file = iter->cpu_file;
2851 	u64 next_ts = 0, ts;
2852 	int next_cpu = -1;
2853 	int next_size = 0;
2854 	int cpu;
2855 
2856 	/*
2857 	 * If we are in a per_cpu trace file, don't bother by iterating over
2858 	 * all cpu and peek directly.
2859 	 */
2860 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2861 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2862 			return NULL;
2863 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2864 		if (ent_cpu)
2865 			*ent_cpu = cpu_file;
2866 
2867 		return ent;
2868 	}
2869 
2870 	for_each_tracing_cpu(cpu) {
2871 
2872 		if (ring_buffer_empty_cpu(buffer, cpu))
2873 			continue;
2874 
2875 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2876 
2877 		/*
2878 		 * Pick the entry with the smallest timestamp:
2879 		 */
2880 		if (ent && (!next || ts < next_ts)) {
2881 			next = ent;
2882 			next_cpu = cpu;
2883 			next_ts = ts;
2884 			next_lost = lost_events;
2885 			next_size = iter->ent_size;
2886 		}
2887 	}
2888 
2889 	iter->ent_size = next_size;
2890 
2891 	if (ent_cpu)
2892 		*ent_cpu = next_cpu;
2893 
2894 	if (ent_ts)
2895 		*ent_ts = next_ts;
2896 
2897 	if (missing_events)
2898 		*missing_events = next_lost;
2899 
2900 	return next;
2901 }
2902 
2903 #define STATIC_FMT_BUF_SIZE	128
2904 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
2905 
trace_iter_expand_format(struct trace_iterator * iter)2906 char *trace_iter_expand_format(struct trace_iterator *iter)
2907 {
2908 	char *tmp;
2909 
2910 	/*
2911 	 * iter->tr is NULL when used with tp_printk, which makes
2912 	 * this get called where it is not safe to call krealloc().
2913 	 */
2914 	if (!iter->tr || iter->fmt == static_fmt_buf)
2915 		return NULL;
2916 
2917 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
2918 		       GFP_KERNEL);
2919 	if (tmp) {
2920 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
2921 		iter->fmt = tmp;
2922 	}
2923 
2924 	return tmp;
2925 }
2926 
2927 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)2928 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
2929 {
2930 	unsigned long addr = (unsigned long)str;
2931 	struct trace_event *trace_event;
2932 	struct trace_event_call *event;
2933 
2934 	/* OK if part of the event data */
2935 	if ((addr >= (unsigned long)iter->ent) &&
2936 	    (addr < (unsigned long)iter->ent + iter->ent_size))
2937 		return true;
2938 
2939 	/* OK if part of the temp seq buffer */
2940 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
2941 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
2942 		return true;
2943 
2944 	/* Core rodata can not be freed */
2945 	if (is_kernel_rodata(addr))
2946 		return true;
2947 
2948 	if (trace_is_tracepoint_string(str))
2949 		return true;
2950 
2951 	/*
2952 	 * Now this could be a module event, referencing core module
2953 	 * data, which is OK.
2954 	 */
2955 	if (!iter->ent)
2956 		return false;
2957 
2958 	trace_event = ftrace_find_event(iter->ent->type);
2959 	if (!trace_event)
2960 		return false;
2961 
2962 	event = container_of(trace_event, struct trace_event_call, event);
2963 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
2964 		return false;
2965 
2966 	/* Would rather have rodata, but this will suffice */
2967 	if (within_module_core(addr, event->module))
2968 		return true;
2969 
2970 	return false;
2971 }
2972 
2973 /**
2974  * ignore_event - Check dereferenced fields while writing to the seq buffer
2975  * @iter: The iterator that holds the seq buffer and the event being printed
2976  *
2977  * At boot up, test_event_printk() will flag any event that dereferences
2978  * a string with "%s" that does exist in the ring buffer. It may still
2979  * be valid, as the string may point to a static string in the kernel
2980  * rodata that never gets freed. But if the string pointer is pointing
2981  * to something that was allocated, there's a chance that it can be freed
2982  * by the time the user reads the trace. This would cause a bad memory
2983  * access by the kernel and possibly crash the system.
2984  *
2985  * This function will check if the event has any fields flagged as needing
2986  * to be checked at runtime and perform those checks.
2987  *
2988  * If it is found that a field is unsafe, it will write into the @iter->seq
2989  * a message stating what was found to be unsafe.
2990  *
2991  * @return: true if the event is unsafe and should be ignored,
2992  *          false otherwise.
2993  */
ignore_event(struct trace_iterator * iter)2994 bool ignore_event(struct trace_iterator *iter)
2995 {
2996 	struct ftrace_event_field *field;
2997 	struct trace_event *trace_event;
2998 	struct trace_event_call *event;
2999 	struct list_head *head;
3000 	struct trace_seq *seq;
3001 	const void *ptr;
3002 
3003 	trace_event = ftrace_find_event(iter->ent->type);
3004 
3005 	seq = &iter->seq;
3006 
3007 	if (!trace_event) {
3008 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3009 		return true;
3010 	}
3011 
3012 	event = container_of(trace_event, struct trace_event_call, event);
3013 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3014 		return false;
3015 
3016 	head = trace_get_fields(event);
3017 	if (!head) {
3018 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3019 				 trace_event_name(event));
3020 		return true;
3021 	}
3022 
3023 	/* Offsets are from the iter->ent that points to the raw event */
3024 	ptr = iter->ent;
3025 
3026 	list_for_each_entry(field, head, link) {
3027 		const char *str;
3028 		bool good;
3029 
3030 		if (!field->needs_test)
3031 			continue;
3032 
3033 		str = *(const char **)(ptr + field->offset);
3034 
3035 		good = trace_safe_str(iter, str);
3036 
3037 		/*
3038 		 * If you hit this warning, it is likely that the
3039 		 * trace event in question used %s on a string that
3040 		 * was saved at the time of the event, but may not be
3041 		 * around when the trace is read. Use __string(),
3042 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3043 		 * instead. See samples/trace_events/trace-events-sample.h
3044 		 * for reference.
3045 		 */
3046 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3047 			      trace_event_name(event), field->name)) {
3048 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3049 					 trace_event_name(event), field->name);
3050 			return true;
3051 		}
3052 	}
3053 	return false;
3054 }
3055 
trace_event_format(struct trace_iterator * iter,const char * fmt)3056 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3057 {
3058 	const char *p, *new_fmt;
3059 	char *q;
3060 
3061 	if (WARN_ON_ONCE(!fmt))
3062 		return fmt;
3063 
3064 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
3065 		return fmt;
3066 
3067 	p = fmt;
3068 	new_fmt = q = iter->fmt;
3069 	while (*p) {
3070 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3071 			if (!trace_iter_expand_format(iter))
3072 				return fmt;
3073 
3074 			q += iter->fmt - new_fmt;
3075 			new_fmt = iter->fmt;
3076 		}
3077 
3078 		*q++ = *p++;
3079 
3080 		/* Replace %p with %px */
3081 		if (p[-1] == '%') {
3082 			if (p[0] == '%') {
3083 				*q++ = *p++;
3084 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3085 				*q++ = *p++;
3086 				*q++ = 'x';
3087 			}
3088 		}
3089 	}
3090 	*q = '\0';
3091 
3092 	return new_fmt;
3093 }
3094 
3095 #define STATIC_TEMP_BUF_SIZE	128
3096 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3097 
3098 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3099 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3100 					  int *ent_cpu, u64 *ent_ts)
3101 {
3102 	/* __find_next_entry will reset ent_size */
3103 	int ent_size = iter->ent_size;
3104 	struct trace_entry *entry;
3105 
3106 	/*
3107 	 * If called from ftrace_dump(), then the iter->temp buffer
3108 	 * will be the static_temp_buf and not created from kmalloc.
3109 	 * If the entry size is greater than the buffer, we can
3110 	 * not save it. Just return NULL in that case. This is only
3111 	 * used to add markers when two consecutive events' time
3112 	 * stamps have a large delta. See trace_print_lat_context()
3113 	 */
3114 	if (iter->temp == static_temp_buf &&
3115 	    STATIC_TEMP_BUF_SIZE < ent_size)
3116 		return NULL;
3117 
3118 	/*
3119 	 * The __find_next_entry() may call peek_next_entry(), which may
3120 	 * call ring_buffer_peek() that may make the contents of iter->ent
3121 	 * undefined. Need to copy iter->ent now.
3122 	 */
3123 	if (iter->ent && iter->ent != iter->temp) {
3124 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3125 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3126 			void *temp;
3127 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3128 			if (!temp)
3129 				return NULL;
3130 			kfree(iter->temp);
3131 			iter->temp = temp;
3132 			iter->temp_size = iter->ent_size;
3133 		}
3134 		memcpy(iter->temp, iter->ent, iter->ent_size);
3135 		iter->ent = iter->temp;
3136 	}
3137 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3138 	/* Put back the original ent_size */
3139 	iter->ent_size = ent_size;
3140 
3141 	return entry;
3142 }
3143 
3144 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3145 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3146 {
3147 	iter->ent = __find_next_entry(iter, &iter->cpu,
3148 				      &iter->lost_events, &iter->ts);
3149 
3150 	if (iter->ent)
3151 		trace_iterator_increment(iter);
3152 
3153 	return iter->ent ? iter : NULL;
3154 }
3155 
trace_consume(struct trace_iterator * iter)3156 static void trace_consume(struct trace_iterator *iter)
3157 {
3158 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3159 			    &iter->lost_events);
3160 }
3161 
s_next(struct seq_file * m,void * v,loff_t * pos)3162 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3163 {
3164 	struct trace_iterator *iter = m->private;
3165 	int i = (int)*pos;
3166 	void *ent;
3167 
3168 	WARN_ON_ONCE(iter->leftover);
3169 
3170 	(*pos)++;
3171 
3172 	/* can't go backwards */
3173 	if (iter->idx > i)
3174 		return NULL;
3175 
3176 	if (iter->idx < 0)
3177 		ent = trace_find_next_entry_inc(iter);
3178 	else
3179 		ent = iter;
3180 
3181 	while (ent && iter->idx < i)
3182 		ent = trace_find_next_entry_inc(iter);
3183 
3184 	iter->pos = *pos;
3185 
3186 	return ent;
3187 }
3188 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3189 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3190 {
3191 	struct ring_buffer_iter *buf_iter;
3192 	unsigned long entries = 0;
3193 	u64 ts;
3194 
3195 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3196 
3197 	buf_iter = trace_buffer_iter(iter, cpu);
3198 	if (!buf_iter)
3199 		return;
3200 
3201 	ring_buffer_iter_reset(buf_iter);
3202 
3203 	/*
3204 	 * We could have the case with the max latency tracers
3205 	 * that a reset never took place on a cpu. This is evident
3206 	 * by the timestamp being before the start of the buffer.
3207 	 */
3208 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3209 		if (ts >= iter->array_buffer->time_start)
3210 			break;
3211 		entries++;
3212 		ring_buffer_iter_advance(buf_iter);
3213 		/* This could be a big loop */
3214 		cond_resched();
3215 	}
3216 
3217 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3218 }
3219 
3220 /*
3221  * The current tracer is copied to avoid a global locking
3222  * all around.
3223  */
s_start(struct seq_file * m,loff_t * pos)3224 static void *s_start(struct seq_file *m, loff_t *pos)
3225 {
3226 	struct trace_iterator *iter = m->private;
3227 	struct trace_array *tr = iter->tr;
3228 	int cpu_file = iter->cpu_file;
3229 	void *p = NULL;
3230 	loff_t l = 0;
3231 	int cpu;
3232 
3233 	mutex_lock(&trace_types_lock);
3234 	if (unlikely(tr->current_trace != iter->trace)) {
3235 		/* Close iter->trace before switching to the new current tracer */
3236 		if (iter->trace->close)
3237 			iter->trace->close(iter);
3238 		iter->trace = tr->current_trace;
3239 		/* Reopen the new current tracer */
3240 		if (iter->trace->open)
3241 			iter->trace->open(iter);
3242 	}
3243 	mutex_unlock(&trace_types_lock);
3244 
3245 	if (iter->snapshot && tracer_uses_snapshot(iter->trace))
3246 		return ERR_PTR(-EBUSY);
3247 
3248 	if (*pos != iter->pos) {
3249 		iter->ent = NULL;
3250 		iter->cpu = 0;
3251 		iter->idx = -1;
3252 
3253 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3254 			for_each_tracing_cpu(cpu)
3255 				tracing_iter_reset(iter, cpu);
3256 		} else
3257 			tracing_iter_reset(iter, cpu_file);
3258 
3259 		iter->leftover = 0;
3260 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3261 			;
3262 
3263 	} else {
3264 		/*
3265 		 * If we overflowed the seq_file before, then we want
3266 		 * to just reuse the trace_seq buffer again.
3267 		 */
3268 		if (iter->leftover)
3269 			p = iter;
3270 		else {
3271 			l = *pos - 1;
3272 			p = s_next(m, p, &l);
3273 		}
3274 	}
3275 
3276 	trace_event_read_lock();
3277 	trace_access_lock(cpu_file);
3278 	return p;
3279 }
3280 
s_stop(struct seq_file * m,void * p)3281 static void s_stop(struct seq_file *m, void *p)
3282 {
3283 	struct trace_iterator *iter = m->private;
3284 
3285 	if (iter->snapshot && tracer_uses_snapshot(iter->trace))
3286 		return;
3287 
3288 	trace_access_unlock(iter->cpu_file);
3289 	trace_event_read_unlock();
3290 }
3291 
3292 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3293 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3294 		      unsigned long *entries, int cpu)
3295 {
3296 	unsigned long count;
3297 
3298 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3299 	/*
3300 	 * If this buffer has skipped entries, then we hold all
3301 	 * entries for the trace and we need to ignore the
3302 	 * ones before the time stamp.
3303 	 */
3304 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3305 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3306 		/* total is the same as the entries */
3307 		*total = count;
3308 	} else
3309 		*total = count +
3310 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3311 	*entries = count;
3312 }
3313 
3314 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)3315 get_total_entries(struct array_buffer *buf,
3316 		  unsigned long *total, unsigned long *entries)
3317 {
3318 	unsigned long t, e;
3319 	int cpu;
3320 
3321 	*total = 0;
3322 	*entries = 0;
3323 
3324 	for_each_tracing_cpu(cpu) {
3325 		get_total_entries_cpu(buf, &t, &e, cpu);
3326 		*total += t;
3327 		*entries += e;
3328 	}
3329 }
3330 
trace_total_entries_cpu(struct trace_array * tr,int cpu)3331 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3332 {
3333 	unsigned long total, entries;
3334 
3335 	if (!tr)
3336 		tr = &global_trace;
3337 
3338 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3339 
3340 	return entries;
3341 }
3342 
trace_total_entries(struct trace_array * tr)3343 unsigned long trace_total_entries(struct trace_array *tr)
3344 {
3345 	unsigned long total, entries;
3346 
3347 	if (!tr)
3348 		tr = &global_trace;
3349 
3350 	get_total_entries(&tr->array_buffer, &total, &entries);
3351 
3352 	return entries;
3353 }
3354 
print_lat_help_header(struct seq_file * m)3355 static void print_lat_help_header(struct seq_file *m)
3356 {
3357 	seq_puts(m, "#                    _------=> CPU#            \n"
3358 		    "#                   / _-----=> irqs-off/BH-disabled\n"
3359 		    "#                  | / _----=> need-resched    \n"
3360 		    "#                  || / _---=> hardirq/softirq \n"
3361 		    "#                  ||| / _--=> preempt-depth   \n"
3362 		    "#                  |||| / _-=> migrate-disable \n"
3363 		    "#                  ||||| /     delay           \n"
3364 		    "#  cmd     pid     |||||| time  |   caller     \n"
3365 		    "#     \\   /        ||||||  \\    |    /       \n");
3366 }
3367 
print_event_info(struct array_buffer * buf,struct seq_file * m)3368 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3369 {
3370 	unsigned long total;
3371 	unsigned long entries;
3372 
3373 	get_total_entries(buf, &total, &entries);
3374 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3375 		   entries, total, num_online_cpus());
3376 	seq_puts(m, "#\n");
3377 }
3378 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3379 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3380 				   unsigned int flags)
3381 {
3382 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
3383 
3384 	print_event_info(buf, m);
3385 
3386 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3387 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3388 }
3389 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3390 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3391 				       unsigned int flags)
3392 {
3393 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
3394 	static const char space[] = "            ";
3395 	int prec = tgid ? 12 : 2;
3396 
3397 	print_event_info(buf, m);
3398 
3399 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
3400 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3401 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3402 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3403 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
3404 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
3405 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3406 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
3407 }
3408 
3409 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3410 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3411 {
3412 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3413 	struct array_buffer *buf = iter->array_buffer;
3414 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3415 	struct tracer *type = iter->trace;
3416 	unsigned long entries;
3417 	unsigned long total;
3418 	const char *name = type->name;
3419 
3420 	get_total_entries(buf, &total, &entries);
3421 
3422 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3423 		   name, init_utsname()->release);
3424 	seq_puts(m, "# -----------------------------------"
3425 		 "---------------------------------\n");
3426 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3427 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3428 		   nsecs_to_usecs(data->saved_latency),
3429 		   entries,
3430 		   total,
3431 		   buf->cpu,
3432 		   preempt_model_str(),
3433 		   /* These are reserved for later use */
3434 		   0, 0, 0, 0);
3435 #ifdef CONFIG_SMP
3436 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3437 #else
3438 	seq_puts(m, ")\n");
3439 #endif
3440 	seq_puts(m, "#    -----------------\n");
3441 	seq_printf(m, "#    | task: %.16s-%d "
3442 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3443 		   data->comm, data->pid,
3444 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3445 		   data->policy, data->rt_priority);
3446 	seq_puts(m, "#    -----------------\n");
3447 
3448 	if (data->critical_start) {
3449 		seq_puts(m, "#  => started at: ");
3450 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3451 		trace_print_seq(m, &iter->seq);
3452 		seq_puts(m, "\n#  => ended at:   ");
3453 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3454 		trace_print_seq(m, &iter->seq);
3455 		seq_puts(m, "\n#\n");
3456 	}
3457 
3458 	seq_puts(m, "#\n");
3459 }
3460 
test_cpu_buff_start(struct trace_iterator * iter)3461 static void test_cpu_buff_start(struct trace_iterator *iter)
3462 {
3463 	struct trace_seq *s = &iter->seq;
3464 	struct trace_array *tr = iter->tr;
3465 
3466 	if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
3467 		return;
3468 
3469 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3470 		return;
3471 
3472 	if (cpumask_available(iter->started) &&
3473 	    cpumask_test_cpu(iter->cpu, iter->started))
3474 		return;
3475 
3476 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3477 		return;
3478 
3479 	if (cpumask_available(iter->started))
3480 		cpumask_set_cpu(iter->cpu, iter->started);
3481 
3482 	/* Don't print started cpu buffer for the first entry of the trace */
3483 	if (iter->idx > 1)
3484 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3485 				iter->cpu);
3486 }
3487 
3488 #ifdef CONFIG_FTRACE_SYSCALLS
is_syscall_event(struct trace_event * event)3489 static bool is_syscall_event(struct trace_event *event)
3490 {
3491 	return (event->funcs == &enter_syscall_print_funcs) ||
3492 	       (event->funcs == &exit_syscall_print_funcs);
3493 
3494 }
3495 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
3496 #else
is_syscall_event(struct trace_event * event)3497 static inline bool is_syscall_event(struct trace_event *event)
3498 {
3499 	return false;
3500 }
3501 #define syscall_buf_size 0
3502 #endif /* CONFIG_FTRACE_SYSCALLS */
3503 
print_trace_fmt(struct trace_iterator * iter)3504 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3505 {
3506 	struct trace_array *tr = iter->tr;
3507 	struct trace_seq *s = &iter->seq;
3508 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3509 	struct trace_entry *entry;
3510 	struct trace_event *event;
3511 
3512 	entry = iter->ent;
3513 
3514 	test_cpu_buff_start(iter);
3515 
3516 	event = ftrace_find_event(entry->type);
3517 
3518 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3519 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3520 			trace_print_lat_context(iter);
3521 		else
3522 			trace_print_context(iter);
3523 	}
3524 
3525 	if (trace_seq_has_overflowed(s))
3526 		return TRACE_TYPE_PARTIAL_LINE;
3527 
3528 	if (event) {
3529 		if (tr->trace_flags & TRACE_ITER(FIELDS))
3530 			return print_event_fields(iter, event);
3531 		/*
3532 		 * For TRACE_EVENT() events, the print_fmt is not
3533 		 * safe to use if the array has delta offsets
3534 		 * Force printing via the fields.
3535 		 */
3536 		if ((tr->text_delta)) {
3537 			/* ftrace and system call events are still OK */
3538 			if ((event->type > __TRACE_LAST_TYPE) &&
3539 			    !is_syscall_event(event))
3540 				return print_event_fields(iter, event);
3541 		}
3542 		return event->funcs->trace(iter, sym_flags, event);
3543 	}
3544 
3545 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3546 
3547 	return trace_handle_return(s);
3548 }
3549 
print_raw_fmt(struct trace_iterator * iter)3550 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3551 {
3552 	struct trace_array *tr = iter->tr;
3553 	struct trace_seq *s = &iter->seq;
3554 	struct trace_entry *entry;
3555 	struct trace_event *event;
3556 
3557 	entry = iter->ent;
3558 
3559 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
3560 		trace_seq_printf(s, "%d %d %llu ",
3561 				 entry->pid, iter->cpu, iter->ts);
3562 
3563 	if (trace_seq_has_overflowed(s))
3564 		return TRACE_TYPE_PARTIAL_LINE;
3565 
3566 	event = ftrace_find_event(entry->type);
3567 	if (event)
3568 		return event->funcs->raw(iter, 0, event);
3569 
3570 	trace_seq_printf(s, "%d ?\n", entry->type);
3571 
3572 	return trace_handle_return(s);
3573 }
3574 
print_hex_fmt(struct trace_iterator * iter)3575 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3576 {
3577 	struct trace_array *tr = iter->tr;
3578 	struct trace_seq *s = &iter->seq;
3579 	unsigned char newline = '\n';
3580 	struct trace_entry *entry;
3581 	struct trace_event *event;
3582 
3583 	entry = iter->ent;
3584 
3585 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3586 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3587 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3588 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3589 		if (trace_seq_has_overflowed(s))
3590 			return TRACE_TYPE_PARTIAL_LINE;
3591 	}
3592 
3593 	event = ftrace_find_event(entry->type);
3594 	if (event) {
3595 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3596 		if (ret != TRACE_TYPE_HANDLED)
3597 			return ret;
3598 	}
3599 
3600 	SEQ_PUT_FIELD(s, newline);
3601 
3602 	return trace_handle_return(s);
3603 }
3604 
print_bin_fmt(struct trace_iterator * iter)3605 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3606 {
3607 	struct trace_array *tr = iter->tr;
3608 	struct trace_seq *s = &iter->seq;
3609 	struct trace_entry *entry;
3610 	struct trace_event *event;
3611 
3612 	entry = iter->ent;
3613 
3614 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3615 		SEQ_PUT_FIELD(s, entry->pid);
3616 		SEQ_PUT_FIELD(s, iter->cpu);
3617 		SEQ_PUT_FIELD(s, iter->ts);
3618 		if (trace_seq_has_overflowed(s))
3619 			return TRACE_TYPE_PARTIAL_LINE;
3620 	}
3621 
3622 	event = ftrace_find_event(entry->type);
3623 	return event ? event->funcs->binary(iter, 0, event) :
3624 		TRACE_TYPE_HANDLED;
3625 }
3626 
trace_empty(struct trace_iterator * iter)3627 int trace_empty(struct trace_iterator *iter)
3628 {
3629 	struct ring_buffer_iter *buf_iter;
3630 	int cpu;
3631 
3632 	/* If we are looking at one CPU buffer, only check that one */
3633 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3634 		cpu = iter->cpu_file;
3635 		buf_iter = trace_buffer_iter(iter, cpu);
3636 		if (buf_iter) {
3637 			if (!ring_buffer_iter_empty(buf_iter))
3638 				return 0;
3639 		} else {
3640 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3641 				return 0;
3642 		}
3643 		return 1;
3644 	}
3645 
3646 	for_each_tracing_cpu(cpu) {
3647 		buf_iter = trace_buffer_iter(iter, cpu);
3648 		if (buf_iter) {
3649 			if (!ring_buffer_iter_empty(buf_iter))
3650 				return 0;
3651 		} else {
3652 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3653 				return 0;
3654 		}
3655 	}
3656 
3657 	return 1;
3658 }
3659 
3660 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)3661 enum print_line_t print_trace_line(struct trace_iterator *iter)
3662 {
3663 	struct trace_array *tr = iter->tr;
3664 	unsigned long trace_flags = tr->trace_flags;
3665 	enum print_line_t ret;
3666 
3667 	if (iter->lost_events) {
3668 		if (iter->lost_events == (unsigned long)-1)
3669 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
3670 					 iter->cpu);
3671 		else
3672 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3673 					 iter->cpu, iter->lost_events);
3674 		if (trace_seq_has_overflowed(&iter->seq))
3675 			return TRACE_TYPE_PARTIAL_LINE;
3676 	}
3677 
3678 	if (iter->trace && iter->trace->print_line) {
3679 		ret = iter->trace->print_line(iter);
3680 		if (ret != TRACE_TYPE_UNHANDLED)
3681 			return ret;
3682 	}
3683 
3684 	if (iter->ent->type == TRACE_BPUTS &&
3685 			trace_flags & TRACE_ITER(PRINTK) &&
3686 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3687 		return trace_print_bputs_msg_only(iter);
3688 
3689 	if (iter->ent->type == TRACE_BPRINT &&
3690 			trace_flags & TRACE_ITER(PRINTK) &&
3691 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3692 		return trace_print_bprintk_msg_only(iter);
3693 
3694 	if (iter->ent->type == TRACE_PRINT &&
3695 			trace_flags & TRACE_ITER(PRINTK) &&
3696 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3697 		return trace_print_printk_msg_only(iter);
3698 
3699 	if (trace_flags & TRACE_ITER(BIN))
3700 		return print_bin_fmt(iter);
3701 
3702 	if (trace_flags & TRACE_ITER(HEX))
3703 		return print_hex_fmt(iter);
3704 
3705 	if (trace_flags & TRACE_ITER(RAW))
3706 		return print_raw_fmt(iter);
3707 
3708 	return print_trace_fmt(iter);
3709 }
3710 
trace_latency_header(struct seq_file * m)3711 void trace_latency_header(struct seq_file *m)
3712 {
3713 	struct trace_iterator *iter = m->private;
3714 	struct trace_array *tr = iter->tr;
3715 
3716 	/* print nothing if the buffers are empty */
3717 	if (trace_empty(iter))
3718 		return;
3719 
3720 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3721 		print_trace_header(m, iter);
3722 
3723 	if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
3724 		print_lat_help_header(m);
3725 }
3726 
trace_default_header(struct seq_file * m)3727 void trace_default_header(struct seq_file *m)
3728 {
3729 	struct trace_iterator *iter = m->private;
3730 	struct trace_array *tr = iter->tr;
3731 	unsigned long trace_flags = tr->trace_flags;
3732 
3733 	if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
3734 		return;
3735 
3736 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3737 		/* print nothing if the buffers are empty */
3738 		if (trace_empty(iter))
3739 			return;
3740 		print_trace_header(m, iter);
3741 		if (!(trace_flags & TRACE_ITER(VERBOSE)))
3742 			print_lat_help_header(m);
3743 	} else {
3744 		if (!(trace_flags & TRACE_ITER(VERBOSE))) {
3745 			if (trace_flags & TRACE_ITER(IRQ_INFO))
3746 				print_func_help_header_irq(iter->array_buffer,
3747 							   m, trace_flags);
3748 			else
3749 				print_func_help_header(iter->array_buffer, m,
3750 						       trace_flags);
3751 		}
3752 	}
3753 }
3754 
test_ftrace_alive(struct seq_file * m)3755 static void test_ftrace_alive(struct seq_file *m)
3756 {
3757 	if (!ftrace_is_dead())
3758 		return;
3759 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3760 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3761 }
3762 
3763 #ifdef CONFIG_TRACER_SNAPSHOT
show_snapshot_main_help(struct seq_file * m)3764 static void show_snapshot_main_help(struct seq_file *m)
3765 {
3766 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3767 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3768 		    "#                      Takes a snapshot of the main buffer.\n"
3769 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3770 		    "#                      (Doesn't have to be '2' works with any number that\n"
3771 		    "#                       is not a '0' or '1')\n");
3772 }
3773 
show_snapshot_percpu_help(struct seq_file * m)3774 static void show_snapshot_percpu_help(struct seq_file *m)
3775 {
3776 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3777 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3778 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3779 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3780 #else
3781 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3782 		    "#                     Must use main snapshot file to allocate.\n");
3783 #endif
3784 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3785 		    "#                      (Doesn't have to be '2' works with any number that\n"
3786 		    "#                       is not a '0' or '1')\n");
3787 }
3788 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3789 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3790 {
3791 	if (iter->tr->allocated_snapshot)
3792 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3793 	else
3794 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3795 
3796 	seq_puts(m, "# Snapshot commands:\n");
3797 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3798 		show_snapshot_main_help(m);
3799 	else
3800 		show_snapshot_percpu_help(m);
3801 }
3802 #else
3803 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3804 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3805 #endif
3806 
s_show(struct seq_file * m,void * v)3807 static int s_show(struct seq_file *m, void *v)
3808 {
3809 	struct trace_iterator *iter = v;
3810 	int ret;
3811 
3812 	if (iter->ent == NULL) {
3813 		if (iter->tr) {
3814 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3815 			seq_puts(m, "#\n");
3816 			test_ftrace_alive(m);
3817 		}
3818 		if (iter->snapshot && trace_empty(iter))
3819 			print_snapshot_help(m, iter);
3820 		else if (iter->trace && iter->trace->print_header)
3821 			iter->trace->print_header(m);
3822 		else
3823 			trace_default_header(m);
3824 
3825 	} else if (iter->leftover) {
3826 		/*
3827 		 * If we filled the seq_file buffer earlier, we
3828 		 * want to just show it now.
3829 		 */
3830 		ret = trace_print_seq(m, &iter->seq);
3831 
3832 		/* ret should this time be zero, but you never know */
3833 		iter->leftover = ret;
3834 
3835 	} else {
3836 		ret = print_trace_line(iter);
3837 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
3838 			iter->seq.full = 0;
3839 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
3840 		}
3841 		ret = trace_print_seq(m, &iter->seq);
3842 		/*
3843 		 * If we overflow the seq_file buffer, then it will
3844 		 * ask us for this data again at start up.
3845 		 * Use that instead.
3846 		 *  ret is 0 if seq_file write succeeded.
3847 		 *        -1 otherwise.
3848 		 */
3849 		iter->leftover = ret;
3850 	}
3851 
3852 	return 0;
3853 }
3854 
3855 /*
3856  * Should be used after trace_array_get(), trace_types_lock
3857  * ensures that i_cdev was already initialized.
3858  */
tracing_get_cpu(struct inode * inode)3859 static inline int tracing_get_cpu(struct inode *inode)
3860 {
3861 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3862 		return (long)inode->i_cdev - 1;
3863 	return RING_BUFFER_ALL_CPUS;
3864 }
3865 
3866 static const struct seq_operations tracer_seq_ops = {
3867 	.start		= s_start,
3868 	.next		= s_next,
3869 	.stop		= s_stop,
3870 	.show		= s_show,
3871 };
3872 
3873 /*
3874  * Note, as iter itself can be allocated and freed in different
3875  * ways, this function is only used to free its content, and not
3876  * the iterator itself. The only requirement to all the allocations
3877  * is that it must zero all fields (kzalloc), as freeing works with
3878  * ethier allocated content or NULL.
3879  */
free_trace_iter_content(struct trace_iterator * iter)3880 static void free_trace_iter_content(struct trace_iterator *iter)
3881 {
3882 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
3883 	if (iter->fmt != static_fmt_buf)
3884 		kfree(iter->fmt);
3885 
3886 	kfree(iter->temp);
3887 	kfree(iter->buffer_iter);
3888 	mutex_destroy(&iter->mutex);
3889 	free_cpumask_var(iter->started);
3890 }
3891 
3892 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)3893 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3894 {
3895 	struct trace_array *tr = inode->i_private;
3896 	struct trace_iterator *iter;
3897 	int cpu;
3898 
3899 	if (tracing_disabled)
3900 		return ERR_PTR(-ENODEV);
3901 
3902 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3903 	if (!iter)
3904 		return ERR_PTR(-ENOMEM);
3905 
3906 	iter->buffer_iter = kzalloc_objs(*iter->buffer_iter, nr_cpu_ids);
3907 	if (!iter->buffer_iter)
3908 		goto release;
3909 
3910 	/*
3911 	 * trace_find_next_entry() may need to save off iter->ent.
3912 	 * It will place it into the iter->temp buffer. As most
3913 	 * events are less than 128, allocate a buffer of that size.
3914 	 * If one is greater, then trace_find_next_entry() will
3915 	 * allocate a new buffer to adjust for the bigger iter->ent.
3916 	 * It's not critical if it fails to get allocated here.
3917 	 */
3918 	iter->temp = kmalloc(128, GFP_KERNEL);
3919 	if (iter->temp)
3920 		iter->temp_size = 128;
3921 
3922 	/*
3923 	 * trace_event_printf() may need to modify given format
3924 	 * string to replace %p with %px so that it shows real address
3925 	 * instead of hash value. However, that is only for the event
3926 	 * tracing, other tracer may not need. Defer the allocation
3927 	 * until it is needed.
3928 	 */
3929 	iter->fmt = NULL;
3930 	iter->fmt_size = 0;
3931 
3932 	mutex_lock(&trace_types_lock);
3933 	iter->trace = tr->current_trace;
3934 
3935 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3936 		goto fail;
3937 
3938 	iter->tr = tr;
3939 
3940 #ifdef CONFIG_TRACER_SNAPSHOT
3941 	/* Currently only the top directory has a snapshot */
3942 	if (tr->current_trace->print_max || snapshot)
3943 		iter->array_buffer = &tr->snapshot_buffer;
3944 	else
3945 #endif
3946 		iter->array_buffer = &tr->array_buffer;
3947 	iter->snapshot = snapshot;
3948 	iter->pos = -1;
3949 	iter->cpu_file = tracing_get_cpu(inode);
3950 	mutex_init(&iter->mutex);
3951 
3952 	/* Notify the tracer early; before we stop tracing. */
3953 	if (iter->trace->open)
3954 		iter->trace->open(iter);
3955 
3956 	/* Annotate start of buffers if we had overruns */
3957 	if (ring_buffer_overruns(iter->array_buffer->buffer))
3958 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3959 
3960 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3961 	if (trace_clocks[tr->clock_id].in_ns)
3962 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3963 
3964 	/*
3965 	 * If pause-on-trace is enabled, then stop the trace while
3966 	 * dumping, unless this is the "snapshot" file
3967 	 */
3968 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
3969 		iter->iter_flags |= TRACE_FILE_PAUSE;
3970 		tracing_stop_tr(tr);
3971 	}
3972 
3973 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3974 		for_each_tracing_cpu(cpu) {
3975 			iter->buffer_iter[cpu] =
3976 				ring_buffer_read_start(iter->array_buffer->buffer,
3977 						       cpu, GFP_KERNEL);
3978 			tracing_iter_reset(iter, cpu);
3979 		}
3980 	} else {
3981 		cpu = iter->cpu_file;
3982 		iter->buffer_iter[cpu] =
3983 			ring_buffer_read_start(iter->array_buffer->buffer,
3984 					       cpu, GFP_KERNEL);
3985 		tracing_iter_reset(iter, cpu);
3986 	}
3987 
3988 	mutex_unlock(&trace_types_lock);
3989 
3990 	return iter;
3991 
3992  fail:
3993 	mutex_unlock(&trace_types_lock);
3994 	free_trace_iter_content(iter);
3995 release:
3996 	seq_release_private(inode, file);
3997 	return ERR_PTR(-ENOMEM);
3998 }
3999 
tracing_open_generic(struct inode * inode,struct file * filp)4000 int tracing_open_generic(struct inode *inode, struct file *filp)
4001 {
4002 	int ret;
4003 
4004 	ret = tracing_check_open_get_tr(NULL);
4005 	if (ret)
4006 		return ret;
4007 
4008 	filp->private_data = inode->i_private;
4009 	return 0;
4010 }
4011 
4012 /*
4013  * Open and update trace_array ref count.
4014  * Must have the current trace_array passed to it.
4015  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4016 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4017 {
4018 	struct trace_array *tr = inode->i_private;
4019 	int ret;
4020 
4021 	ret = tracing_check_open_get_tr(tr);
4022 	if (ret)
4023 		return ret;
4024 
4025 	filp->private_data = inode->i_private;
4026 
4027 	return 0;
4028 }
4029 
4030 /*
4031  * The private pointer of the inode is the trace_event_file.
4032  * Update the tr ref count associated to it.
4033  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4034 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4035 {
4036 	struct trace_event_file *file = inode->i_private;
4037 	int ret;
4038 
4039 	ret = tracing_check_open_get_tr(file->tr);
4040 	if (ret)
4041 		return ret;
4042 
4043 	guard(mutex)(&event_mutex);
4044 
4045 	/* Fail if the file is marked for removal */
4046 	if (file->flags & EVENT_FILE_FL_FREED) {
4047 		trace_array_put(file->tr);
4048 		return -ENODEV;
4049 	} else {
4050 		event_file_get(file);
4051 	}
4052 
4053 	filp->private_data = inode->i_private;
4054 
4055 	return 0;
4056 }
4057 
tracing_release_file_tr(struct inode * inode,struct file * filp)4058 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4059 {
4060 	struct trace_event_file *file = inode->i_private;
4061 
4062 	trace_array_put(file->tr);
4063 	event_file_put(file);
4064 
4065 	return 0;
4066 }
4067 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4068 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4069 {
4070 	tracing_release_file_tr(inode, filp);
4071 	return single_release(inode, filp);
4072 }
4073 
tracing_release(struct inode * inode,struct file * file)4074 static int tracing_release(struct inode *inode, struct file *file)
4075 {
4076 	struct trace_array *tr = inode->i_private;
4077 	struct seq_file *m = file->private_data;
4078 	struct trace_iterator *iter;
4079 	int cpu;
4080 
4081 	if (!(file->f_mode & FMODE_READ)) {
4082 		trace_array_put(tr);
4083 		return 0;
4084 	}
4085 
4086 	/* Writes do not use seq_file */
4087 	iter = m->private;
4088 	mutex_lock(&trace_types_lock);
4089 
4090 	for_each_tracing_cpu(cpu) {
4091 		if (iter->buffer_iter[cpu])
4092 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4093 	}
4094 
4095 	if (iter->trace && iter->trace->close)
4096 		iter->trace->close(iter);
4097 
4098 	if (iter->iter_flags & TRACE_FILE_PAUSE)
4099 		/* reenable tracing if it was previously enabled */
4100 		tracing_start_tr(tr);
4101 
4102 	__trace_array_put(tr);
4103 
4104 	mutex_unlock(&trace_types_lock);
4105 
4106 	free_trace_iter_content(iter);
4107 	seq_release_private(inode, file);
4108 
4109 	return 0;
4110 }
4111 
tracing_release_generic_tr(struct inode * inode,struct file * file)4112 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4113 {
4114 	struct trace_array *tr = inode->i_private;
4115 
4116 	trace_array_put(tr);
4117 	return 0;
4118 }
4119 
tracing_single_release_tr(struct inode * inode,struct file * file)4120 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4121 {
4122 	struct trace_array *tr = inode->i_private;
4123 
4124 	trace_array_put(tr);
4125 
4126 	return single_release(inode, file);
4127 }
4128 
4129 static bool update_last_data_if_empty(struct trace_array *tr);
4130 
tracing_open(struct inode * inode,struct file * file)4131 static int tracing_open(struct inode *inode, struct file *file)
4132 {
4133 	struct trace_array *tr = inode->i_private;
4134 	struct trace_iterator *iter;
4135 	int ret;
4136 
4137 	ret = tracing_check_open_get_tr(tr);
4138 	if (ret)
4139 		return ret;
4140 
4141 	/* If this file was open for write, then erase contents */
4142 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4143 		int cpu = tracing_get_cpu(inode);
4144 		struct array_buffer *trace_buf = &tr->array_buffer;
4145 
4146 #ifdef CONFIG_TRACER_MAX_TRACE
4147 		if (tr->current_trace->print_max)
4148 			trace_buf = &tr->snapshot_buffer;
4149 #endif
4150 
4151 		if (cpu == RING_BUFFER_ALL_CPUS)
4152 			tracing_reset_online_cpus(trace_buf);
4153 		else
4154 			tracing_reset_cpu(trace_buf, cpu);
4155 
4156 		update_last_data_if_empty(tr);
4157 	}
4158 
4159 	if (file->f_mode & FMODE_READ) {
4160 		iter = __tracing_open(inode, file, false);
4161 		if (IS_ERR(iter))
4162 			ret = PTR_ERR(iter);
4163 		else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
4164 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4165 	}
4166 
4167 	if (ret < 0)
4168 		trace_array_put(tr);
4169 
4170 	return ret;
4171 }
4172 
4173 /*
4174  * Some tracers are not suitable for instance buffers.
4175  * A tracer is always available for the global array (toplevel)
4176  * or if it explicitly states that it is.
4177  */
4178 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4179 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4180 {
4181 	/* arrays with mapped buffer range do not have snapshots */
4182 	if (tr->range_addr_start && tracer_uses_snapshot(t))
4183 		return false;
4184 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4185 }
4186 
4187 /* Find the next tracer that this trace array may use */
4188 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4189 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4190 {
4191 	while (t && !trace_ok_for_array(t, tr))
4192 		t = t->next;
4193 
4194 	return t;
4195 }
4196 
4197 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4198 t_next(struct seq_file *m, void *v, loff_t *pos)
4199 {
4200 	struct trace_array *tr = m->private;
4201 	struct tracer *t = v;
4202 
4203 	(*pos)++;
4204 
4205 	if (t)
4206 		t = get_tracer_for_array(tr, t->next);
4207 
4208 	return t;
4209 }
4210 
t_start(struct seq_file * m,loff_t * pos)4211 static void *t_start(struct seq_file *m, loff_t *pos)
4212 {
4213 	struct trace_array *tr = m->private;
4214 	struct tracer *t;
4215 	loff_t l = 0;
4216 
4217 	mutex_lock(&trace_types_lock);
4218 
4219 	t = get_tracer_for_array(tr, trace_types);
4220 	for (; t && l < *pos; t = t_next(m, t, &l))
4221 			;
4222 
4223 	return t;
4224 }
4225 
t_stop(struct seq_file * m,void * p)4226 static void t_stop(struct seq_file *m, void *p)
4227 {
4228 	mutex_unlock(&trace_types_lock);
4229 }
4230 
t_show(struct seq_file * m,void * v)4231 static int t_show(struct seq_file *m, void *v)
4232 {
4233 	struct tracer *t = v;
4234 
4235 	if (!t)
4236 		return 0;
4237 
4238 	seq_puts(m, t->name);
4239 	if (t->next)
4240 		seq_putc(m, ' ');
4241 	else
4242 		seq_putc(m, '\n');
4243 
4244 	return 0;
4245 }
4246 
4247 static const struct seq_operations show_traces_seq_ops = {
4248 	.start		= t_start,
4249 	.next		= t_next,
4250 	.stop		= t_stop,
4251 	.show		= t_show,
4252 };
4253 
show_traces_open(struct inode * inode,struct file * file)4254 static int show_traces_open(struct inode *inode, struct file *file)
4255 {
4256 	struct trace_array *tr = inode->i_private;
4257 	struct seq_file *m;
4258 	int ret;
4259 
4260 	ret = tracing_check_open_get_tr(tr);
4261 	if (ret)
4262 		return ret;
4263 
4264 	ret = seq_open(file, &show_traces_seq_ops);
4265 	if (ret) {
4266 		trace_array_put(tr);
4267 		return ret;
4268 	}
4269 
4270 	m = file->private_data;
4271 	m->private = tr;
4272 
4273 	return 0;
4274 }
4275 
tracing_seq_release(struct inode * inode,struct file * file)4276 static int tracing_seq_release(struct inode *inode, struct file *file)
4277 {
4278 	struct trace_array *tr = inode->i_private;
4279 
4280 	trace_array_put(tr);
4281 	return seq_release(inode, file);
4282 }
4283 
4284 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4285 tracing_write_stub(struct file *filp, const char __user *ubuf,
4286 		   size_t count, loff_t *ppos)
4287 {
4288 	return count;
4289 }
4290 
tracing_lseek(struct file * file,loff_t offset,int whence)4291 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4292 {
4293 	int ret;
4294 
4295 	if (file->f_mode & FMODE_READ)
4296 		ret = seq_lseek(file, offset, whence);
4297 	else
4298 		file->f_pos = ret = 0;
4299 
4300 	return ret;
4301 }
4302 
4303 static const struct file_operations tracing_fops = {
4304 	.open		= tracing_open,
4305 	.read		= seq_read,
4306 	.read_iter	= seq_read_iter,
4307 	.splice_read	= copy_splice_read,
4308 	.write		= tracing_write_stub,
4309 	.llseek		= tracing_lseek,
4310 	.release	= tracing_release,
4311 };
4312 
4313 static const struct file_operations show_traces_fops = {
4314 	.open		= show_traces_open,
4315 	.read		= seq_read,
4316 	.llseek		= seq_lseek,
4317 	.release	= tracing_seq_release,
4318 };
4319 
4320 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4321 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4322 		     size_t count, loff_t *ppos)
4323 {
4324 	struct trace_array *tr = file_inode(filp)->i_private;
4325 	char *mask_str __free(kfree) = NULL;
4326 	int len;
4327 
4328 	len = snprintf(NULL, 0, "%*pb\n",
4329 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4330 	mask_str = kmalloc(len, GFP_KERNEL);
4331 	if (!mask_str)
4332 		return -ENOMEM;
4333 
4334 	len = snprintf(mask_str, len, "%*pb\n",
4335 		       cpumask_pr_args(tr->tracing_cpumask));
4336 	if (len >= count)
4337 		return -EINVAL;
4338 
4339 	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4340 }
4341 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)4342 int tracing_set_cpumask(struct trace_array *tr,
4343 			cpumask_var_t tracing_cpumask_new)
4344 {
4345 	int cpu;
4346 
4347 	if (!tr)
4348 		return -EINVAL;
4349 
4350 	local_irq_disable();
4351 	arch_spin_lock(&tr->max_lock);
4352 	for_each_tracing_cpu(cpu) {
4353 		/*
4354 		 * Increase/decrease the disabled counter if we are
4355 		 * about to flip a bit in the cpumask:
4356 		 */
4357 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4358 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4359 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4360 #ifdef CONFIG_TRACER_SNAPSHOT
4361 			ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu);
4362 #endif
4363 		}
4364 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4365 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4366 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4367 #ifdef CONFIG_TRACER_SNAPSHOT
4368 			ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu);
4369 #endif
4370 		}
4371 	}
4372 	arch_spin_unlock(&tr->max_lock);
4373 	local_irq_enable();
4374 
4375 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4376 
4377 	return 0;
4378 }
4379 
4380 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4381 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4382 		      size_t count, loff_t *ppos)
4383 {
4384 	struct trace_array *tr = file_inode(filp)->i_private;
4385 	cpumask_var_t tracing_cpumask_new;
4386 	int err;
4387 
4388 	if (count == 0 || count > KMALLOC_MAX_SIZE)
4389 		return -EINVAL;
4390 
4391 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4392 		return -ENOMEM;
4393 
4394 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4395 	if (err)
4396 		goto err_free;
4397 
4398 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4399 	if (err)
4400 		goto err_free;
4401 
4402 	free_cpumask_var(tracing_cpumask_new);
4403 
4404 	return count;
4405 
4406 err_free:
4407 	free_cpumask_var(tracing_cpumask_new);
4408 
4409 	return err;
4410 }
4411 
4412 static const struct file_operations tracing_cpumask_fops = {
4413 	.open		= tracing_open_generic_tr,
4414 	.read		= tracing_cpumask_read,
4415 	.write		= tracing_cpumask_write,
4416 	.release	= tracing_release_generic_tr,
4417 	.llseek		= generic_file_llseek,
4418 };
4419 
tracing_trace_options_show(struct seq_file * m,void * v)4420 static int tracing_trace_options_show(struct seq_file *m, void *v)
4421 {
4422 	struct tracer_opt *trace_opts;
4423 	struct trace_array *tr = m->private;
4424 	struct tracer_flags *flags;
4425 	u32 tracer_flags;
4426 	int i;
4427 
4428 	guard(mutex)(&trace_types_lock);
4429 
4430 	for (i = 0; trace_options[i]; i++) {
4431 		if (tr->trace_flags & (1ULL << i))
4432 			seq_printf(m, "%s\n", trace_options[i]);
4433 		else
4434 			seq_printf(m, "no%s\n", trace_options[i]);
4435 	}
4436 
4437 	flags = tr->current_trace_flags;
4438 	if (!flags || !flags->opts)
4439 		return 0;
4440 
4441 	tracer_flags = flags->val;
4442 	trace_opts = flags->opts;
4443 
4444 	for (i = 0; trace_opts[i].name; i++) {
4445 		if (tracer_flags & trace_opts[i].bit)
4446 			seq_printf(m, "%s\n", trace_opts[i].name);
4447 		else
4448 			seq_printf(m, "no%s\n", trace_opts[i].name);
4449 	}
4450 
4451 	return 0;
4452 }
4453 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4454 static int __set_tracer_option(struct trace_array *tr,
4455 			       struct tracer_flags *tracer_flags,
4456 			       struct tracer_opt *opts, int neg)
4457 {
4458 	struct tracer *trace = tracer_flags->trace;
4459 	int ret = 0;
4460 
4461 	if (trace->set_flag)
4462 		ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4463 	if (ret)
4464 		return ret;
4465 
4466 	if (neg)
4467 		tracer_flags->val &= ~opts->bit;
4468 	else
4469 		tracer_flags->val |= opts->bit;
4470 	return 0;
4471 }
4472 
4473 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4474 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4475 {
4476 	struct tracer_flags *tracer_flags = tr->current_trace_flags;
4477 	struct tracer_opt *opts = NULL;
4478 	int i;
4479 
4480 	if (!tracer_flags || !tracer_flags->opts)
4481 		return 0;
4482 
4483 	for (i = 0; tracer_flags->opts[i].name; i++) {
4484 		opts = &tracer_flags->opts[i];
4485 
4486 		if (strcmp(cmp, opts->name) == 0)
4487 			return __set_tracer_option(tr, tracer_flags, opts, neg);
4488 	}
4489 
4490 	return -EINVAL;
4491 }
4492 
4493 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u64 mask,int set)4494 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
4495 {
4496 	if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
4497 		return -1;
4498 
4499 	return 0;
4500 }
4501 
set_tracer_flag(struct trace_array * tr,u64 mask,int enabled)4502 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
4503 {
4504 	switch (mask) {
4505 	case TRACE_ITER(RECORD_TGID):
4506 	case TRACE_ITER(RECORD_CMD):
4507 	case TRACE_ITER(TRACE_PRINTK):
4508 	case TRACE_ITER(COPY_MARKER):
4509 		lockdep_assert_held(&event_mutex);
4510 	}
4511 
4512 	/* do nothing if flag is already set */
4513 	if (!!(tr->trace_flags & mask) == !!enabled)
4514 		return 0;
4515 
4516 	/* Give the tracer a chance to approve the change */
4517 	if (tr->current_trace->flag_changed)
4518 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4519 			return -EINVAL;
4520 
4521 	switch (mask) {
4522 	case TRACE_ITER(TRACE_PRINTK):
4523 		if (enabled) {
4524 			update_printk_trace(tr);
4525 		} else {
4526 			/*
4527 			 * The global_trace cannot clear this.
4528 			 * It's flag only gets cleared if another instance sets it.
4529 			 */
4530 			if (printk_trace == &global_trace)
4531 				return -EINVAL;
4532 			/*
4533 			 * An instance must always have it set.
4534 			 * by default, that's the global_trace instance.
4535 			 */
4536 			if (printk_trace == tr)
4537 				update_printk_trace(&global_trace);
4538 		}
4539 		break;
4540 
4541 	case TRACE_ITER(COPY_MARKER):
4542 		update_marker_trace(tr, enabled);
4543 		/* update_marker_trace updates the tr->trace_flags */
4544 		return 0;
4545 	}
4546 
4547 	if (enabled)
4548 		tr->trace_flags |= mask;
4549 	else
4550 		tr->trace_flags &= ~mask;
4551 
4552 	switch (mask) {
4553 	case TRACE_ITER(RECORD_CMD):
4554 		trace_event_enable_cmd_record(enabled);
4555 		break;
4556 
4557 	case TRACE_ITER(RECORD_TGID):
4558 
4559 		if (trace_alloc_tgid_map() < 0) {
4560 			tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
4561 			return -ENOMEM;
4562 		}
4563 
4564 		trace_event_enable_tgid_record(enabled);
4565 		break;
4566 
4567 	case TRACE_ITER(EVENT_FORK):
4568 		trace_event_follow_fork(tr, enabled);
4569 		break;
4570 
4571 	case TRACE_ITER(FUNC_FORK):
4572 		ftrace_pid_follow_fork(tr, enabled);
4573 		break;
4574 
4575 	case TRACE_ITER(OVERWRITE):
4576 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4577 #ifdef CONFIG_TRACER_SNAPSHOT
4578 		ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled);
4579 #endif
4580 		break;
4581 
4582 	case TRACE_ITER(PRINTK):
4583 		trace_printk_start_stop_comm(enabled);
4584 		trace_printk_control(enabled);
4585 		break;
4586 
4587 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
4588 	case TRACE_GRAPH_GRAPH_TIME:
4589 		ftrace_graph_graph_time_control(enabled);
4590 		break;
4591 #endif
4592 	}
4593 
4594 	return 0;
4595 }
4596 
trace_set_options(struct trace_array * tr,char * option)4597 int trace_set_options(struct trace_array *tr, char *option)
4598 {
4599 	char *cmp;
4600 	int neg = 0;
4601 	int ret;
4602 	size_t orig_len = strlen(option);
4603 	int len;
4604 
4605 	cmp = strstrip(option);
4606 
4607 	len = str_has_prefix(cmp, "no");
4608 	if (len)
4609 		neg = 1;
4610 
4611 	cmp += len;
4612 
4613 	mutex_lock(&event_mutex);
4614 	mutex_lock(&trace_types_lock);
4615 
4616 	ret = match_string(trace_options, -1, cmp);
4617 	/* If no option could be set, test the specific tracer options */
4618 	if (ret < 0)
4619 		ret = set_tracer_option(tr, cmp, neg);
4620 	else
4621 		ret = set_tracer_flag(tr, 1ULL << ret, !neg);
4622 
4623 	mutex_unlock(&trace_types_lock);
4624 	mutex_unlock(&event_mutex);
4625 
4626 	/*
4627 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4628 	 * turn it back into a space.
4629 	 */
4630 	if (orig_len > strlen(option))
4631 		option[strlen(option)] = ' ';
4632 
4633 	return ret;
4634 }
4635 
apply_trace_boot_options(void)4636 static void __init apply_trace_boot_options(void)
4637 {
4638 	char *buf = trace_boot_options_buf;
4639 	char *option;
4640 
4641 	while (true) {
4642 		option = strsep(&buf, ",");
4643 
4644 		if (!option)
4645 			break;
4646 
4647 		if (*option)
4648 			trace_set_options(&global_trace, option);
4649 
4650 		/* Put back the comma to allow this to be called again */
4651 		if (buf)
4652 			*(buf - 1) = ',';
4653 	}
4654 }
4655 
4656 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4657 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4658 			size_t cnt, loff_t *ppos)
4659 {
4660 	struct seq_file *m = filp->private_data;
4661 	struct trace_array *tr = m->private;
4662 	char buf[64];
4663 	int ret;
4664 
4665 	if (cnt >= sizeof(buf))
4666 		return -EINVAL;
4667 
4668 	if (copy_from_user(buf, ubuf, cnt))
4669 		return -EFAULT;
4670 
4671 	buf[cnt] = 0;
4672 
4673 	ret = trace_set_options(tr, buf);
4674 	if (ret < 0)
4675 		return ret;
4676 
4677 	*ppos += cnt;
4678 
4679 	return cnt;
4680 }
4681 
tracing_trace_options_open(struct inode * inode,struct file * file)4682 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4683 {
4684 	struct trace_array *tr = inode->i_private;
4685 	int ret;
4686 
4687 	ret = tracing_check_open_get_tr(tr);
4688 	if (ret)
4689 		return ret;
4690 
4691 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4692 	if (ret < 0)
4693 		trace_array_put(tr);
4694 
4695 	return ret;
4696 }
4697 
4698 static const struct file_operations tracing_iter_fops = {
4699 	.open		= tracing_trace_options_open,
4700 	.read		= seq_read,
4701 	.llseek		= seq_lseek,
4702 	.release	= tracing_single_release_tr,
4703 	.write		= tracing_trace_options_write,
4704 };
4705 
4706 static const char readme_msg[] =
4707 	"tracing mini-HOWTO:\n\n"
4708 	"By default tracefs removes all OTH file permission bits.\n"
4709 	"When mounting tracefs an optional group id can be specified\n"
4710 	"which adds the group to every directory and file in tracefs:\n\n"
4711 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
4712 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4713 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4714 	" Important files:\n"
4715 	"  trace\t\t\t- The static contents of the buffer\n"
4716 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4717 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4718 	"  current_tracer\t- function and latency tracers\n"
4719 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4720 	"  error_log\t- error log for failed commands (that support it)\n"
4721 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4722 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4723 	"  trace_clock\t\t- change the clock used to order events\n"
4724 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4725 	"      global:   Synced across CPUs but slows tracing down.\n"
4726 	"     counter:   Not a clock, but just an increment\n"
4727 	"      uptime:   Jiffy counter from time of boot\n"
4728 	"        perf:   Same clock that perf events use\n"
4729 #ifdef CONFIG_X86_64
4730 	"     x86-tsc:   TSC cycle counter\n"
4731 #endif
4732 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
4733 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4734 	"    absolute:   Absolute (standalone) timestamp\n"
4735 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4736 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4737 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4738 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4739 	"\t\t\t  Remove sub-buffer with rmdir\n"
4740 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4741 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4742 	"\t\t\t  option name\n"
4743 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4744 #ifdef CONFIG_DYNAMIC_FTRACE
4745 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4746 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4747 	"\t\t\t  functions\n"
4748 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4749 	"\t     modules: Can select a group via module\n"
4750 	"\t      Format: :mod:<module-name>\n"
4751 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4752 	"\t    triggers: a command to perform when function is hit\n"
4753 	"\t      Format: <function>:<trigger>[:count]\n"
4754 	"\t     trigger: traceon, traceoff\n"
4755 	"\t\t      enable_event:<system>:<event>\n"
4756 	"\t\t      disable_event:<system>:<event>\n"
4757 #ifdef CONFIG_STACKTRACE
4758 	"\t\t      stacktrace\n"
4759 #endif
4760 #ifdef CONFIG_TRACER_SNAPSHOT
4761 	"\t\t      snapshot\n"
4762 #endif
4763 	"\t\t      dump\n"
4764 	"\t\t      cpudump\n"
4765 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4766 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4767 	"\t     The first one will disable tracing every time do_fault is hit\n"
4768 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4769 	"\t       The first time do trap is hit and it disables tracing, the\n"
4770 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4771 	"\t       the counter will not decrement. It only decrements when the\n"
4772 	"\t       trigger did work\n"
4773 	"\t     To remove trigger without count:\n"
4774 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4775 	"\t     To remove trigger with a count:\n"
4776 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4777 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4778 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4779 	"\t    modules: Can select a group via module command :mod:\n"
4780 	"\t    Does not accept triggers\n"
4781 #endif /* CONFIG_DYNAMIC_FTRACE */
4782 #ifdef CONFIG_FUNCTION_TRACER
4783 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4784 	"\t\t    (function)\n"
4785 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
4786 	"\t\t    (function)\n"
4787 #endif
4788 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4789 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4790 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4791 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4792 #endif
4793 #ifdef CONFIG_TRACER_SNAPSHOT
4794 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4795 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4796 	"\t\t\t  information\n"
4797 #endif
4798 #ifdef CONFIG_STACK_TRACER
4799 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4800 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4801 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4802 	"\t\t\t  new trace)\n"
4803 #ifdef CONFIG_DYNAMIC_FTRACE
4804 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4805 	"\t\t\t  traces\n"
4806 #endif
4807 #endif /* CONFIG_STACK_TRACER */
4808 #ifdef CONFIG_DYNAMIC_EVENTS
4809 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4810 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4811 #endif
4812 #ifdef CONFIG_KPROBE_EVENTS
4813 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4814 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4815 #endif
4816 #ifdef CONFIG_UPROBE_EVENTS
4817 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4818 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4819 #endif
4820 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
4821     defined(CONFIG_FPROBE_EVENTS)
4822 	"\t  accepts: event-definitions (one definition per line)\n"
4823 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4824 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
4825 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
4826 #endif
4827 #ifdef CONFIG_FPROBE_EVENTS
4828 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
4829 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
4830 #endif
4831 #ifdef CONFIG_HIST_TRIGGERS
4832 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4833 #endif
4834 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
4835 	"\t           -:[<group>/][<event>]\n"
4836 #ifdef CONFIG_KPROBE_EVENTS
4837 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4838   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
4839 #endif
4840 #ifdef CONFIG_UPROBE_EVENTS
4841   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
4842 #endif
4843 	"\t     args: <name>=fetcharg[:type]\n"
4844 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
4845 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4846 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4847 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
4848 	"\t           <argname>[->field[->field|.field...]],\n"
4849 #endif
4850 #else
4851 	"\t           $stack<index>, $stack, $retval, $comm,\n"
4852 #endif
4853 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4854 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
4855 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
4856 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4857 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
4858 #ifdef CONFIG_HIST_TRIGGERS
4859 	"\t    field: <stype> <name>;\n"
4860 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4861 	"\t           [unsigned] char/int/long\n"
4862 #endif
4863 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
4864 	"\t            of the <attached-group>/<attached-event>.\n"
4865 #endif
4866 	"  set_event\t\t- Enables events by name written into it\n"
4867 	"\t\t\t  Can enable module events via: :mod:<module>\n"
4868 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4869 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4870 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4871 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4872 	"\t\t\t  events\n"
4873 	"      filter\t\t- If set, only events passing filter are traced\n"
4874 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4875 	"\t\t\t  <event>:\n"
4876 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4877 	"      filter\t\t- If set, only events passing filter are traced\n"
4878 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4879 	"\t    Format: <trigger>[:count][if <filter>]\n"
4880 	"\t   trigger: traceon, traceoff\n"
4881 	"\t            enable_event:<system>:<event>\n"
4882 	"\t            disable_event:<system>:<event>\n"
4883 #ifdef CONFIG_HIST_TRIGGERS
4884 	"\t            enable_hist:<system>:<event>\n"
4885 	"\t            disable_hist:<system>:<event>\n"
4886 #endif
4887 #ifdef CONFIG_STACKTRACE
4888 	"\t\t    stacktrace\n"
4889 #endif
4890 #ifdef CONFIG_TRACER_SNAPSHOT
4891 	"\t\t    snapshot\n"
4892 #endif
4893 #ifdef CONFIG_HIST_TRIGGERS
4894 	"\t\t    hist (see below)\n"
4895 #endif
4896 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4897 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4898 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4899 	"\t                  events/block/block_unplug/trigger\n"
4900 	"\t   The first disables tracing every time block_unplug is hit.\n"
4901 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4902 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4903 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4904 	"\t   Like function triggers, the counter is only decremented if it\n"
4905 	"\t    enabled or disabled tracing.\n"
4906 	"\t   To remove a trigger without a count:\n"
4907 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4908 	"\t   To remove a trigger with a count:\n"
4909 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4910 	"\t   Filters can be ignored when removing a trigger.\n"
4911 #ifdef CONFIG_HIST_TRIGGERS
4912 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4913 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4914 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
4915 	"\t            [:values=<field1[,field2,...]>]\n"
4916 	"\t            [:sort=<field1[,field2,...]>]\n"
4917 	"\t            [:size=#entries]\n"
4918 	"\t            [:pause][:continue][:clear]\n"
4919 	"\t            [:name=histname1]\n"
4920 	"\t            [:nohitcount]\n"
4921 	"\t            [:<handler>.<action>]\n"
4922 	"\t            [if <filter>]\n\n"
4923 	"\t    Note, special fields can be used as well:\n"
4924 	"\t            common_timestamp - to record current timestamp\n"
4925 	"\t            common_cpu - to record the CPU the event happened on\n"
4926 	"\n"
4927 	"\t    A hist trigger variable can be:\n"
4928 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
4929 	"\t        - a reference to another variable e.g. y=$x,\n"
4930 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
4931 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
4932 	"\n"
4933 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
4934 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
4935 	"\t    variable reference, field or numeric literal.\n"
4936 	"\n"
4937 	"\t    When a matching event is hit, an entry is added to a hash\n"
4938 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4939 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4940 	"\t    correspond to fields in the event's format description.  Keys\n"
4941 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
4942 	"\t    Compound keys consisting of up to two fields can be specified\n"
4943 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4944 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4945 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4946 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4947 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4948 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4949 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4950 	"\t    its histogram data will be shared with other triggers of the\n"
4951 	"\t    same name, and trigger hits will update this common data.\n\n"
4952 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4953 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4954 	"\t    triggers attached to an event, there will be a table for each\n"
4955 	"\t    trigger in the output.  The table displayed for a named\n"
4956 	"\t    trigger will be the same as any other instance having the\n"
4957 	"\t    same name.  The default format used to display a given field\n"
4958 	"\t    can be modified by appending any of the following modifiers\n"
4959 	"\t    to the field name, as applicable:\n\n"
4960 	"\t            .hex        display a number as a hex value\n"
4961 	"\t            .sym        display an address as a symbol\n"
4962 	"\t            .sym-offset display an address as a symbol and offset\n"
4963 	"\t            .execname   display a common_pid as a program name\n"
4964 	"\t            .syscall    display a syscall id as a syscall name\n"
4965 	"\t            .log2       display log2 value rather than raw number\n"
4966 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
4967 	"\t            .usecs      display a common_timestamp in microseconds\n"
4968 	"\t            .percent    display a number of percentage value\n"
4969 	"\t            .graph      display a bar-graph of a value\n\n"
4970 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4971 	"\t    trigger or to start a hist trigger but not log any events\n"
4972 	"\t    until told to do so.  'continue' can be used to start or\n"
4973 	"\t    restart a paused hist trigger.\n\n"
4974 	"\t    The 'clear' parameter will clear the contents of a running\n"
4975 	"\t    hist trigger and leave its current paused/active state\n"
4976 	"\t    unchanged.\n\n"
4977 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
4978 	"\t    raw hitcount in the histogram.\n\n"
4979 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4980 	"\t    have one event conditionally start and stop another event's\n"
4981 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4982 	"\t    the enable_event and disable_event triggers.\n\n"
4983 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4984 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4985 	"\t        <handler>.<action>\n\n"
4986 	"\t    The available handlers are:\n\n"
4987 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4988 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4989 	"\t        onchange(var)            - invoke action if var changes\n\n"
4990 	"\t    The available actions are:\n\n"
4991 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4992 	"\t        save(field,...)                      - save current event fields\n"
4993 #ifdef CONFIG_TRACER_SNAPSHOT
4994 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
4995 #endif
4996 #ifdef CONFIG_SYNTH_EVENTS
4997 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
4998 	"\t  Write into this file to define/undefine new synthetic events.\n"
4999 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5000 #endif
5001 #endif
5002 ;
5003 
5004 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5005 tracing_readme_read(struct file *filp, char __user *ubuf,
5006 		       size_t cnt, loff_t *ppos)
5007 {
5008 	return simple_read_from_buffer(ubuf, cnt, ppos,
5009 					readme_msg, strlen(readme_msg));
5010 }
5011 
5012 static const struct file_operations tracing_readme_fops = {
5013 	.open		= tracing_open_generic,
5014 	.read		= tracing_readme_read,
5015 	.llseek		= generic_file_llseek,
5016 };
5017 
5018 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5019 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5020 update_eval_map(union trace_eval_map_item *ptr)
5021 {
5022 	if (!ptr->map.eval_string) {
5023 		if (ptr->tail.next) {
5024 			ptr = ptr->tail.next;
5025 			/* Set ptr to the next real item (skip head) */
5026 			ptr++;
5027 		} else
5028 			return NULL;
5029 	}
5030 	return ptr;
5031 }
5032 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5033 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5034 {
5035 	union trace_eval_map_item *ptr = v;
5036 
5037 	/*
5038 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5039 	 * This really should never happen.
5040 	 */
5041 	(*pos)++;
5042 	ptr = update_eval_map(ptr);
5043 	if (WARN_ON_ONCE(!ptr))
5044 		return NULL;
5045 
5046 	ptr++;
5047 	ptr = update_eval_map(ptr);
5048 
5049 	return ptr;
5050 }
5051 
eval_map_start(struct seq_file * m,loff_t * pos)5052 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5053 {
5054 	union trace_eval_map_item *v;
5055 	loff_t l = 0;
5056 
5057 	mutex_lock(&trace_eval_mutex);
5058 
5059 	v = trace_eval_maps;
5060 	if (v)
5061 		v++;
5062 
5063 	while (v && l < *pos) {
5064 		v = eval_map_next(m, v, &l);
5065 	}
5066 
5067 	return v;
5068 }
5069 
eval_map_stop(struct seq_file * m,void * v)5070 static void eval_map_stop(struct seq_file *m, void *v)
5071 {
5072 	mutex_unlock(&trace_eval_mutex);
5073 }
5074 
eval_map_show(struct seq_file * m,void * v)5075 static int eval_map_show(struct seq_file *m, void *v)
5076 {
5077 	union trace_eval_map_item *ptr = v;
5078 
5079 	seq_printf(m, "%s %ld (%s)\n",
5080 		   ptr->map.eval_string, ptr->map.eval_value,
5081 		   ptr->map.system);
5082 
5083 	return 0;
5084 }
5085 
5086 static const struct seq_operations tracing_eval_map_seq_ops = {
5087 	.start		= eval_map_start,
5088 	.next		= eval_map_next,
5089 	.stop		= eval_map_stop,
5090 	.show		= eval_map_show,
5091 };
5092 
tracing_eval_map_open(struct inode * inode,struct file * filp)5093 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5094 {
5095 	int ret;
5096 
5097 	ret = tracing_check_open_get_tr(NULL);
5098 	if (ret)
5099 		return ret;
5100 
5101 	return seq_open(filp, &tracing_eval_map_seq_ops);
5102 }
5103 
5104 static const struct file_operations tracing_eval_map_fops = {
5105 	.open		= tracing_eval_map_open,
5106 	.read		= seq_read,
5107 	.llseek		= seq_lseek,
5108 	.release	= seq_release,
5109 };
5110 
5111 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5112 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5113 {
5114 	/* Return tail of array given the head */
5115 	return ptr + ptr->head.length + 1;
5116 }
5117 
5118 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5119 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5120 			   int len)
5121 {
5122 	struct trace_eval_map **stop;
5123 	struct trace_eval_map **map;
5124 	union trace_eval_map_item *map_array;
5125 	union trace_eval_map_item *ptr;
5126 
5127 	stop = start + len;
5128 
5129 	/*
5130 	 * The trace_eval_maps contains the map plus a head and tail item,
5131 	 * where the head holds the module and length of array, and the
5132 	 * tail holds a pointer to the next list.
5133 	 */
5134 	map_array = kmalloc_objs(*map_array, len + 2);
5135 	if (!map_array) {
5136 		pr_warn("Unable to allocate trace eval mapping\n");
5137 		return;
5138 	}
5139 
5140 	guard(mutex)(&trace_eval_mutex);
5141 
5142 	if (!trace_eval_maps)
5143 		trace_eval_maps = map_array;
5144 	else {
5145 		ptr = trace_eval_maps;
5146 		for (;;) {
5147 			ptr = trace_eval_jmp_to_tail(ptr);
5148 			if (!ptr->tail.next)
5149 				break;
5150 			ptr = ptr->tail.next;
5151 
5152 		}
5153 		ptr->tail.next = map_array;
5154 	}
5155 	map_array->head.mod = mod;
5156 	map_array->head.length = len;
5157 	map_array++;
5158 
5159 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5160 		map_array->map = **map;
5161 		map_array++;
5162 	}
5163 	memset(map_array, 0, sizeof(*map_array));
5164 }
5165 
trace_create_eval_file(struct dentry * d_tracer)5166 static void trace_create_eval_file(struct dentry *d_tracer)
5167 {
5168 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5169 			  NULL, &tracing_eval_map_fops);
5170 }
5171 
5172 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5173 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5174 static inline void trace_insert_eval_map_file(struct module *mod,
5175 			      struct trace_eval_map **start, int len) { }
5176 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5177 
5178 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5179 trace_event_update_with_eval_map(struct module *mod,
5180 				 struct trace_eval_map **start,
5181 				 int len)
5182 {
5183 	struct trace_eval_map **map;
5184 
5185 	/* Always run sanitizer only if btf_type_tag attr exists. */
5186 	if (len <= 0) {
5187 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5188 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5189 		      __has_attribute(btf_type_tag)))
5190 			return;
5191 	}
5192 
5193 	map = start;
5194 
5195 	trace_event_update_all(map, len);
5196 
5197 	if (len <= 0)
5198 		return;
5199 
5200 	trace_insert_eval_map_file(mod, start, len);
5201 }
5202 
5203 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5204 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5205 		       size_t cnt, loff_t *ppos)
5206 {
5207 	struct trace_array *tr = filp->private_data;
5208 	char buf[MAX_TRACER_SIZE+2];
5209 	int r;
5210 
5211 	scoped_guard(mutex, &trace_types_lock) {
5212 		r = sprintf(buf, "%s\n", tr->current_trace->name);
5213 	}
5214 
5215 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5216 }
5217 
tracer_init(struct tracer * t,struct trace_array * tr)5218 int tracer_init(struct tracer *t, struct trace_array *tr)
5219 {
5220 	tracing_reset_online_cpus(&tr->array_buffer);
5221 	update_last_data_if_empty(tr);
5222 	return t->init(tr);
5223 }
5224 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5225 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5226 {
5227 	int cpu;
5228 
5229 	for_each_tracing_cpu(cpu)
5230 		per_cpu_ptr(buf->data, cpu)->entries = val;
5231 }
5232 
update_buffer_entries(struct array_buffer * buf,int cpu)5233 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5234 {
5235 	if (cpu == RING_BUFFER_ALL_CPUS) {
5236 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5237 	} else {
5238 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5239 	}
5240 }
5241 
5242 #ifdef CONFIG_TRACER_SNAPSHOT
5243 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5244 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5245 					struct array_buffer *size_buf, int cpu_id)
5246 {
5247 	int cpu, ret = 0;
5248 
5249 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5250 		for_each_tracing_cpu(cpu) {
5251 			ret = ring_buffer_resize(trace_buf->buffer,
5252 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5253 			if (ret < 0)
5254 				break;
5255 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5256 				per_cpu_ptr(size_buf->data, cpu)->entries;
5257 		}
5258 	} else {
5259 		ret = ring_buffer_resize(trace_buf->buffer,
5260 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5261 		if (ret == 0)
5262 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5263 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5264 	}
5265 
5266 	return ret;
5267 }
5268 #endif /* CONFIG_TRACER_SNAPSHOT */
5269 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5270 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5271 					unsigned long size, int cpu)
5272 {
5273 	int ret;
5274 
5275 	/*
5276 	 * If kernel or user changes the size of the ring buffer
5277 	 * we use the size that was given, and we can forget about
5278 	 * expanding it later.
5279 	 */
5280 	trace_set_ring_buffer_expanded(tr);
5281 
5282 	/* May be called before buffers are initialized */
5283 	if (!tr->array_buffer.buffer)
5284 		return 0;
5285 
5286 	/* Do not allow tracing while resizing ring buffer */
5287 	tracing_stop_tr(tr);
5288 
5289 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5290 	if (ret < 0)
5291 		goto out_start;
5292 
5293 #ifdef CONFIG_TRACER_SNAPSHOT
5294 	if (!tr->allocated_snapshot)
5295 		goto out;
5296 
5297 	ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu);
5298 	if (ret < 0) {
5299 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5300 						     &tr->array_buffer, cpu);
5301 		if (r < 0) {
5302 			/*
5303 			 * AARGH! We are left with different
5304 			 * size max buffer!!!!
5305 			 * The max buffer is our "snapshot" buffer.
5306 			 * When a tracer needs a snapshot (one of the
5307 			 * latency tracers), it swaps the max buffer
5308 			 * with the saved snap shot. We succeeded to
5309 			 * update the size of the main buffer, but failed to
5310 			 * update the size of the max buffer. But when we tried
5311 			 * to reset the main buffer to the original size, we
5312 			 * failed there too. This is very unlikely to
5313 			 * happen, but if it does, warn and kill all
5314 			 * tracing.
5315 			 */
5316 			WARN_ON(1);
5317 			tracing_disabled = 1;
5318 		}
5319 		goto out_start;
5320 	}
5321 
5322 	update_buffer_entries(&tr->snapshot_buffer, cpu);
5323 
5324  out:
5325 #endif /* CONFIG_TRACER_SNAPSHOT */
5326 
5327 	update_buffer_entries(&tr->array_buffer, cpu);
5328  out_start:
5329 	tracing_start_tr(tr);
5330 	return ret;
5331 }
5332 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5333 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5334 				  unsigned long size, int cpu_id)
5335 {
5336 	guard(mutex)(&trace_types_lock);
5337 
5338 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5339 		/* make sure, this cpu is enabled in the mask */
5340 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5341 			return -EINVAL;
5342 	}
5343 
5344 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
5345 }
5346 
5347 struct trace_mod_entry {
5348 	unsigned long	mod_addr;
5349 	char		mod_name[MODULE_NAME_LEN];
5350 };
5351 
5352 struct trace_scratch {
5353 	unsigned int		clock_id;
5354 	unsigned long		text_addr;
5355 	unsigned long		nr_entries;
5356 	struct trace_mod_entry	entries[];
5357 };
5358 
5359 static DEFINE_MUTEX(scratch_mutex);
5360 
cmp_mod_entry(const void * key,const void * pivot)5361 static int cmp_mod_entry(const void *key, const void *pivot)
5362 {
5363 	unsigned long addr = (unsigned long)key;
5364 	const struct trace_mod_entry *ent = pivot;
5365 
5366 	if (addr < ent[0].mod_addr)
5367 		return -1;
5368 
5369 	return addr >= ent[1].mod_addr;
5370 }
5371 
5372 /**
5373  * trace_adjust_address() - Adjust prev boot address to current address.
5374  * @tr: Persistent ring buffer's trace_array.
5375  * @addr: Address in @tr which is adjusted.
5376  */
trace_adjust_address(struct trace_array * tr,unsigned long addr)5377 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
5378 {
5379 	struct trace_module_delta *module_delta;
5380 	struct trace_scratch *tscratch;
5381 	struct trace_mod_entry *entry;
5382 	unsigned long raddr;
5383 	int idx = 0, nr_entries;
5384 
5385 	/* If we don't have last boot delta, return the address */
5386 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5387 		return addr;
5388 
5389 	/* tr->module_delta must be protected by rcu. */
5390 	guard(rcu)();
5391 	tscratch = tr->scratch;
5392 	/* if there is no tscrach, module_delta must be NULL. */
5393 	module_delta = READ_ONCE(tr->module_delta);
5394 	if (!module_delta || !tscratch->nr_entries ||
5395 	    tscratch->entries[0].mod_addr > addr) {
5396 		raddr = addr + tr->text_delta;
5397 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
5398 			is_kernel_rodata(raddr) ? raddr : addr;
5399 	}
5400 
5401 	/* Note that entries must be sorted. */
5402 	nr_entries = tscratch->nr_entries;
5403 	if (nr_entries == 1 ||
5404 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
5405 		idx = nr_entries - 1;
5406 	else {
5407 		entry = __inline_bsearch((void *)addr,
5408 				tscratch->entries,
5409 				nr_entries - 1,
5410 				sizeof(tscratch->entries[0]),
5411 				cmp_mod_entry);
5412 		if (entry)
5413 			idx = entry - tscratch->entries;
5414 	}
5415 
5416 	return addr + module_delta->delta[idx];
5417 }
5418 
5419 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)5420 static int save_mod(struct module *mod, void *data)
5421 {
5422 	struct trace_array *tr = data;
5423 	struct trace_scratch *tscratch;
5424 	struct trace_mod_entry *entry;
5425 	unsigned int size;
5426 
5427 	tscratch = tr->scratch;
5428 	if (!tscratch)
5429 		return -1;
5430 	size = tr->scratch_size;
5431 
5432 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
5433 		return -1;
5434 
5435 	entry = &tscratch->entries[tscratch->nr_entries];
5436 
5437 	tscratch->nr_entries++;
5438 
5439 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
5440 	strscpy(entry->mod_name, mod->name);
5441 
5442 	return 0;
5443 }
5444 #else
save_mod(struct module * mod,void * data)5445 static int save_mod(struct module *mod, void *data)
5446 {
5447 	return 0;
5448 }
5449 #endif
5450 
update_last_data(struct trace_array * tr)5451 static void update_last_data(struct trace_array *tr)
5452 {
5453 	struct trace_module_delta *module_delta;
5454 	struct trace_scratch *tscratch;
5455 
5456 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
5457 		return;
5458 
5459 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5460 		return;
5461 
5462 	/* Only if the buffer has previous boot data clear and update it. */
5463 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
5464 
5465 	/* Reset the module list and reload them */
5466 	if (tr->scratch) {
5467 		struct trace_scratch *tscratch = tr->scratch;
5468 
5469 		tscratch->clock_id = tr->clock_id;
5470 		memset(tscratch->entries, 0,
5471 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
5472 		tscratch->nr_entries = 0;
5473 
5474 		guard(mutex)(&scratch_mutex);
5475 		module_for_each_mod(save_mod, tr);
5476 	}
5477 
5478 	/*
5479 	 * Need to clear all CPU buffers as there cannot be events
5480 	 * from the previous boot mixed with events with this boot
5481 	 * as that will cause a confusing trace. Need to clear all
5482 	 * CPU buffers, even for those that may currently be offline.
5483 	 */
5484 	tracing_reset_all_cpus(&tr->array_buffer);
5485 
5486 	/* Using current data now */
5487 	tr->text_delta = 0;
5488 
5489 	if (!tr->scratch)
5490 		return;
5491 
5492 	tscratch = tr->scratch;
5493 	module_delta = READ_ONCE(tr->module_delta);
5494 	WRITE_ONCE(tr->module_delta, NULL);
5495 	kfree_rcu(module_delta, rcu);
5496 
5497 	/* Set the persistent ring buffer meta data to this address */
5498 	tscratch->text_addr = (unsigned long)_text;
5499 }
5500 
5501 /**
5502  * tracing_update_buffers - used by tracing facility to expand ring buffers
5503  * @tr: The tracing instance
5504  *
5505  * To save on memory when the tracing is never used on a system with it
5506  * configured in. The ring buffers are set to a minimum size. But once
5507  * a user starts to use the tracing facility, then they need to grow
5508  * to their default size.
5509  *
5510  * This function is to be called when a tracer is about to be used.
5511  */
tracing_update_buffers(struct trace_array * tr)5512 int tracing_update_buffers(struct trace_array *tr)
5513 {
5514 	int ret = 0;
5515 
5516 	if (!tr)
5517 		tr = &global_trace;
5518 
5519 	guard(mutex)(&trace_types_lock);
5520 
5521 	update_last_data(tr);
5522 
5523 	if (!tr->ring_buffer_expanded)
5524 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5525 						RING_BUFFER_ALL_CPUS);
5526 	return ret;
5527 }
5528 
5529 /*
5530  * Used to clear out the tracer before deletion of an instance.
5531  * Must have trace_types_lock held.
5532  */
tracing_set_nop(struct trace_array * tr)5533 static void tracing_set_nop(struct trace_array *tr)
5534 {
5535 	if (tr->current_trace == &nop_trace)
5536 		return;
5537 
5538 	tr->current_trace->enabled--;
5539 
5540 	if (tr->current_trace->reset)
5541 		tr->current_trace->reset(tr);
5542 
5543 	tr->current_trace = &nop_trace;
5544 	tr->current_trace_flags = nop_trace.flags;
5545 }
5546 
5547 static bool tracer_options_updated;
5548 
tracing_set_tracer(struct trace_array * tr,const char * buf)5549 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5550 {
5551 	struct tracer *trace = NULL;
5552 	struct tracers *t;
5553 	bool had_max_tr;
5554 	int ret;
5555 
5556 	guard(mutex)(&trace_types_lock);
5557 
5558 	update_last_data(tr);
5559 
5560 	if (!tr->ring_buffer_expanded) {
5561 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5562 						RING_BUFFER_ALL_CPUS);
5563 		if (ret < 0)
5564 			return ret;
5565 		ret = 0;
5566 	}
5567 
5568 	list_for_each_entry(t, &tr->tracers, list) {
5569 		if (strcmp(t->tracer->name, buf) == 0) {
5570 			trace = t->tracer;
5571 			break;
5572 		}
5573 	}
5574 	if (!trace)
5575 		return -EINVAL;
5576 
5577 	if (trace == tr->current_trace)
5578 		return 0;
5579 
5580 #ifdef CONFIG_TRACER_SNAPSHOT
5581 	if (tracer_uses_snapshot(trace)) {
5582 		local_irq_disable();
5583 		arch_spin_lock(&tr->max_lock);
5584 		ret = tr->cond_snapshot ? -EBUSY : 0;
5585 		arch_spin_unlock(&tr->max_lock);
5586 		local_irq_enable();
5587 		if (ret)
5588 			return ret;
5589 	}
5590 #endif
5591 	/* Some tracers won't work on kernel command line */
5592 	if (system_state < SYSTEM_RUNNING && trace->noboot) {
5593 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5594 			trace->name);
5595 		return -EINVAL;
5596 	}
5597 
5598 	/* Some tracers are only allowed for the top level buffer */
5599 	if (!trace_ok_for_array(trace, tr))
5600 		return -EINVAL;
5601 
5602 	/* If trace pipe files are being read, we can't change the tracer */
5603 	if (tr->trace_ref)
5604 		return -EBUSY;
5605 
5606 	trace_branch_disable();
5607 
5608 	tr->current_trace->enabled--;
5609 
5610 	if (tr->current_trace->reset)
5611 		tr->current_trace->reset(tr);
5612 
5613 	had_max_tr = tracer_uses_snapshot(tr->current_trace);
5614 
5615 	/* Current trace needs to be nop_trace before synchronize_rcu */
5616 	tr->current_trace = &nop_trace;
5617 	tr->current_trace_flags = nop_trace.flags;
5618 
5619 	if (had_max_tr && !tracer_uses_snapshot(trace)) {
5620 		/*
5621 		 * We need to make sure that the update_max_tr sees that
5622 		 * current_trace changed to nop_trace to keep it from
5623 		 * swapping the buffers after we resize it.
5624 		 * The update_max_tr is called from interrupts disabled
5625 		 * so a synchronized_sched() is sufficient.
5626 		 */
5627 		synchronize_rcu();
5628 		free_snapshot(tr);
5629 		tracing_disarm_snapshot(tr);
5630 	}
5631 
5632 	if (!had_max_tr && tracer_uses_snapshot(trace)) {
5633 		ret = tracing_arm_snapshot_locked(tr);
5634 		if (ret)
5635 			return ret;
5636 	}
5637 
5638 	tr->current_trace_flags = t->flags ? : t->tracer->flags;
5639 
5640 	if (trace->init) {
5641 		ret = tracer_init(trace, tr);
5642 		if (ret) {
5643 			if (tracer_uses_snapshot(trace))
5644 				tracing_disarm_snapshot(tr);
5645 			tr->current_trace_flags = nop_trace.flags;
5646 			return ret;
5647 		}
5648 	}
5649 
5650 	tr->current_trace = trace;
5651 	tr->current_trace->enabled++;
5652 	trace_branch_enable(tr);
5653 
5654 	return 0;
5655 }
5656 
5657 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5658 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5659 			size_t cnt, loff_t *ppos)
5660 {
5661 	struct trace_array *tr = filp->private_data;
5662 	char buf[MAX_TRACER_SIZE+1];
5663 	char *name;
5664 	size_t ret;
5665 	int err;
5666 
5667 	ret = cnt;
5668 
5669 	if (cnt > MAX_TRACER_SIZE)
5670 		cnt = MAX_TRACER_SIZE;
5671 
5672 	if (copy_from_user(buf, ubuf, cnt))
5673 		return -EFAULT;
5674 
5675 	buf[cnt] = 0;
5676 
5677 	name = strim(buf);
5678 
5679 	err = tracing_set_tracer(tr, name);
5680 	if (err)
5681 		return err;
5682 
5683 	*ppos += ret;
5684 
5685 	return ret;
5686 }
5687 
5688 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)5689 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5690 		   size_t cnt, loff_t *ppos)
5691 {
5692 	char buf[64];
5693 	int r;
5694 
5695 	r = snprintf(buf, sizeof(buf), "%ld\n",
5696 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5697 	if (r > sizeof(buf))
5698 		r = sizeof(buf);
5699 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5700 }
5701 
5702 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)5703 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5704 		    size_t cnt, loff_t *ppos)
5705 {
5706 	unsigned long val;
5707 	int ret;
5708 
5709 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5710 	if (ret)
5711 		return ret;
5712 
5713 	*ptr = val * 1000;
5714 
5715 	return cnt;
5716 }
5717 
5718 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5719 tracing_thresh_read(struct file *filp, char __user *ubuf,
5720 		    size_t cnt, loff_t *ppos)
5721 {
5722 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5723 }
5724 
5725 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5726 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5727 		     size_t cnt, loff_t *ppos)
5728 {
5729 	struct trace_array *tr = filp->private_data;
5730 	int ret;
5731 
5732 	guard(mutex)(&trace_types_lock);
5733 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5734 	if (ret < 0)
5735 		return ret;
5736 
5737 	if (tr->current_trace->update_thresh) {
5738 		ret = tr->current_trace->update_thresh(tr);
5739 		if (ret < 0)
5740 			return ret;
5741 	}
5742 
5743 	return cnt;
5744 }
5745 
5746 #ifdef CONFIG_TRACER_MAX_TRACE
5747 
5748 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5749 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5750 		     size_t cnt, loff_t *ppos)
5751 {
5752 	struct trace_array *tr = filp->private_data;
5753 
5754 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
5755 }
5756 
5757 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5758 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5759 		      size_t cnt, loff_t *ppos)
5760 {
5761 	struct trace_array *tr = filp->private_data;
5762 
5763 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
5764 }
5765 
5766 #endif
5767 
open_pipe_on_cpu(struct trace_array * tr,int cpu)5768 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
5769 {
5770 	if (cpu == RING_BUFFER_ALL_CPUS) {
5771 		if (cpumask_empty(tr->pipe_cpumask)) {
5772 			cpumask_setall(tr->pipe_cpumask);
5773 			return 0;
5774 		}
5775 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
5776 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
5777 		return 0;
5778 	}
5779 	return -EBUSY;
5780 }
5781 
close_pipe_on_cpu(struct trace_array * tr,int cpu)5782 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
5783 {
5784 	if (cpu == RING_BUFFER_ALL_CPUS) {
5785 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
5786 		cpumask_clear(tr->pipe_cpumask);
5787 	} else {
5788 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
5789 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
5790 	}
5791 }
5792 
tracing_open_pipe(struct inode * inode,struct file * filp)5793 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5794 {
5795 	struct trace_array *tr = inode->i_private;
5796 	struct trace_iterator *iter;
5797 	int cpu;
5798 	int ret;
5799 
5800 	ret = tracing_check_open_get_tr(tr);
5801 	if (ret)
5802 		return ret;
5803 
5804 	guard(mutex)(&trace_types_lock);
5805 	cpu = tracing_get_cpu(inode);
5806 	ret = open_pipe_on_cpu(tr, cpu);
5807 	if (ret)
5808 		goto fail_pipe_on_cpu;
5809 
5810 	/* create a buffer to store the information to pass to userspace */
5811 	iter = kzalloc_obj(*iter);
5812 	if (!iter) {
5813 		ret = -ENOMEM;
5814 		goto fail_alloc_iter;
5815 	}
5816 
5817 	trace_seq_init(&iter->seq);
5818 	iter->trace = tr->current_trace;
5819 
5820 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5821 		ret = -ENOMEM;
5822 		goto fail;
5823 	}
5824 
5825 	/* trace pipe does not show start of buffer */
5826 	cpumask_setall(iter->started);
5827 
5828 	if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
5829 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5830 
5831 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5832 	if (trace_clocks[tr->clock_id].in_ns)
5833 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5834 
5835 	iter->tr = tr;
5836 	iter->array_buffer = &tr->array_buffer;
5837 	iter->cpu_file = cpu;
5838 	mutex_init(&iter->mutex);
5839 	filp->private_data = iter;
5840 
5841 	if (iter->trace->pipe_open)
5842 		iter->trace->pipe_open(iter);
5843 
5844 	nonseekable_open(inode, filp);
5845 
5846 	tr->trace_ref++;
5847 
5848 	return ret;
5849 
5850 fail:
5851 	kfree(iter);
5852 fail_alloc_iter:
5853 	close_pipe_on_cpu(tr, cpu);
5854 fail_pipe_on_cpu:
5855 	__trace_array_put(tr);
5856 	return ret;
5857 }
5858 
tracing_release_pipe(struct inode * inode,struct file * file)5859 static int tracing_release_pipe(struct inode *inode, struct file *file)
5860 {
5861 	struct trace_iterator *iter = file->private_data;
5862 	struct trace_array *tr = inode->i_private;
5863 
5864 	scoped_guard(mutex, &trace_types_lock) {
5865 		tr->trace_ref--;
5866 
5867 		if (iter->trace->pipe_close)
5868 			iter->trace->pipe_close(iter);
5869 		close_pipe_on_cpu(tr, iter->cpu_file);
5870 	}
5871 
5872 	free_trace_iter_content(iter);
5873 	kfree(iter);
5874 
5875 	trace_array_put(tr);
5876 
5877 	return 0;
5878 }
5879 
5880 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)5881 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5882 {
5883 	struct trace_array *tr = iter->tr;
5884 
5885 	/* Iterators are static, they should be filled or empty */
5886 	if (trace_buffer_iter(iter, iter->cpu_file))
5887 		return EPOLLIN | EPOLLRDNORM;
5888 
5889 	if (tr->trace_flags & TRACE_ITER(BLOCK))
5890 		/*
5891 		 * Always select as readable when in blocking mode
5892 		 */
5893 		return EPOLLIN | EPOLLRDNORM;
5894 	else
5895 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
5896 					     filp, poll_table, iter->tr->buffer_percent);
5897 }
5898 
5899 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)5900 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5901 {
5902 	struct trace_iterator *iter = filp->private_data;
5903 
5904 	return trace_poll(iter, filp, poll_table);
5905 }
5906 
5907 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)5908 static int tracing_wait_pipe(struct file *filp)
5909 {
5910 	struct trace_iterator *iter = filp->private_data;
5911 	int ret;
5912 
5913 	while (trace_empty(iter)) {
5914 
5915 		if ((filp->f_flags & O_NONBLOCK)) {
5916 			return -EAGAIN;
5917 		}
5918 
5919 		/*
5920 		 * We block until we read something and tracing is disabled.
5921 		 * We still block if tracing is disabled, but we have never
5922 		 * read anything. This allows a user to cat this file, and
5923 		 * then enable tracing. But after we have read something,
5924 		 * we give an EOF when tracing is again disabled.
5925 		 *
5926 		 * iter->pos will be 0 if we haven't read anything.
5927 		 */
5928 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5929 			break;
5930 
5931 		mutex_unlock(&iter->mutex);
5932 
5933 		ret = wait_on_pipe(iter, 0);
5934 
5935 		mutex_lock(&iter->mutex);
5936 
5937 		if (ret)
5938 			return ret;
5939 	}
5940 
5941 	return 1;
5942 }
5943 
update_last_data_if_empty(struct trace_array * tr)5944 static bool update_last_data_if_empty(struct trace_array *tr)
5945 {
5946 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5947 		return false;
5948 
5949 	if (!ring_buffer_empty(tr->array_buffer.buffer))
5950 		return false;
5951 
5952 	/*
5953 	 * If the buffer contains the last boot data and all per-cpu
5954 	 * buffers are empty, reset it from the kernel side.
5955 	 */
5956 	update_last_data(tr);
5957 	return true;
5958 }
5959 
5960 /*
5961  * Consumer reader.
5962  */
5963 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5964 tracing_read_pipe(struct file *filp, char __user *ubuf,
5965 		  size_t cnt, loff_t *ppos)
5966 {
5967 	struct trace_iterator *iter = filp->private_data;
5968 	ssize_t sret;
5969 
5970 	/*
5971 	 * Avoid more than one consumer on a single file descriptor
5972 	 * This is just a matter of traces coherency, the ring buffer itself
5973 	 * is protected.
5974 	 */
5975 	guard(mutex)(&iter->mutex);
5976 
5977 	/* return any leftover data */
5978 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5979 	if (sret != -EBUSY)
5980 		return sret;
5981 
5982 	trace_seq_init(&iter->seq);
5983 
5984 	if (iter->trace->read) {
5985 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5986 		if (sret)
5987 			return sret;
5988 	}
5989 
5990 waitagain:
5991 	if (update_last_data_if_empty(iter->tr))
5992 		return 0;
5993 
5994 	sret = tracing_wait_pipe(filp);
5995 	if (sret <= 0)
5996 		return sret;
5997 
5998 	/* stop when tracing is finished */
5999 	if (trace_empty(iter))
6000 		return 0;
6001 
6002 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6003 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6004 
6005 	/* reset all but tr, trace, and overruns */
6006 	trace_iterator_reset(iter);
6007 	cpumask_clear(iter->started);
6008 	trace_seq_init(&iter->seq);
6009 
6010 	trace_event_read_lock();
6011 	trace_access_lock(iter->cpu_file);
6012 	while (trace_find_next_entry_inc(iter) != NULL) {
6013 		enum print_line_t ret;
6014 		int save_len = iter->seq.seq.len;
6015 
6016 		ret = print_trace_line(iter);
6017 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6018 			/*
6019 			 * If one print_trace_line() fills entire trace_seq in one shot,
6020 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6021 			 * In this case, we need to consume it, otherwise, loop will peek
6022 			 * this event next time, resulting in an infinite loop.
6023 			 */
6024 			if (save_len == 0) {
6025 				iter->seq.full = 0;
6026 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6027 				trace_consume(iter);
6028 				break;
6029 			}
6030 
6031 			/* In other cases, don't print partial lines */
6032 			iter->seq.seq.len = save_len;
6033 			break;
6034 		}
6035 		if (ret != TRACE_TYPE_NO_CONSUME)
6036 			trace_consume(iter);
6037 
6038 		if (trace_seq_used(&iter->seq) >= cnt)
6039 			break;
6040 
6041 		/*
6042 		 * Setting the full flag means we reached the trace_seq buffer
6043 		 * size and we should leave by partial output condition above.
6044 		 * One of the trace_seq_* functions is not used properly.
6045 		 */
6046 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6047 			  iter->ent->type);
6048 	}
6049 	trace_access_unlock(iter->cpu_file);
6050 	trace_event_read_unlock();
6051 
6052 	/* Now copy what we have to the user */
6053 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6054 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6055 		trace_seq_init(&iter->seq);
6056 
6057 	/*
6058 	 * If there was nothing to send to user, in spite of consuming trace
6059 	 * entries, go back to wait for more entries.
6060 	 */
6061 	if (sret == -EBUSY)
6062 		goto waitagain;
6063 
6064 	return sret;
6065 }
6066 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6067 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6068 				     unsigned int idx)
6069 {
6070 	__free_page(spd->pages[idx]);
6071 }
6072 
6073 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6074 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6075 {
6076 	size_t count;
6077 	int save_len;
6078 	int ret;
6079 
6080 	/* Seq buffer is page-sized, exactly what we need. */
6081 	for (;;) {
6082 		save_len = iter->seq.seq.len;
6083 		ret = print_trace_line(iter);
6084 
6085 		if (trace_seq_has_overflowed(&iter->seq)) {
6086 			iter->seq.seq.len = save_len;
6087 			break;
6088 		}
6089 
6090 		/*
6091 		 * This should not be hit, because it should only
6092 		 * be set if the iter->seq overflowed. But check it
6093 		 * anyway to be safe.
6094 		 */
6095 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6096 			iter->seq.seq.len = save_len;
6097 			break;
6098 		}
6099 
6100 		count = trace_seq_used(&iter->seq) - save_len;
6101 		if (rem < count) {
6102 			rem = 0;
6103 			iter->seq.seq.len = save_len;
6104 			break;
6105 		}
6106 
6107 		if (ret != TRACE_TYPE_NO_CONSUME)
6108 			trace_consume(iter);
6109 		rem -= count;
6110 		if (!trace_find_next_entry_inc(iter))	{
6111 			rem = 0;
6112 			iter->ent = NULL;
6113 			break;
6114 		}
6115 	}
6116 
6117 	return rem;
6118 }
6119 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6120 static ssize_t tracing_splice_read_pipe(struct file *filp,
6121 					loff_t *ppos,
6122 					struct pipe_inode_info *pipe,
6123 					size_t len,
6124 					unsigned int flags)
6125 {
6126 	struct page *pages_def[PIPE_DEF_BUFFERS];
6127 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6128 	struct trace_iterator *iter = filp->private_data;
6129 	struct splice_pipe_desc spd = {
6130 		.pages		= pages_def,
6131 		.partial	= partial_def,
6132 		.nr_pages	= 0, /* This gets updated below. */
6133 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6134 		.ops		= &default_pipe_buf_ops,
6135 		.spd_release	= tracing_spd_release_pipe,
6136 	};
6137 	ssize_t ret;
6138 	size_t rem;
6139 	unsigned int i;
6140 
6141 	if (splice_grow_spd(pipe, &spd))
6142 		return -ENOMEM;
6143 
6144 	mutex_lock(&iter->mutex);
6145 
6146 	if (iter->trace->splice_read) {
6147 		ret = iter->trace->splice_read(iter, filp,
6148 					       ppos, pipe, len, flags);
6149 		if (ret)
6150 			goto out_err;
6151 	}
6152 
6153 	ret = tracing_wait_pipe(filp);
6154 	if (ret <= 0)
6155 		goto out_err;
6156 
6157 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6158 		ret = -EFAULT;
6159 		goto out_err;
6160 	}
6161 
6162 	trace_event_read_lock();
6163 	trace_access_lock(iter->cpu_file);
6164 
6165 	/* Fill as many pages as possible. */
6166 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6167 		spd.pages[i] = alloc_page(GFP_KERNEL);
6168 		if (!spd.pages[i])
6169 			break;
6170 
6171 		rem = tracing_fill_pipe_page(rem, iter);
6172 
6173 		/* Copy the data into the page, so we can start over. */
6174 		ret = trace_seq_to_buffer(&iter->seq,
6175 					  page_address(spd.pages[i]),
6176 					  min((size_t)trace_seq_used(&iter->seq),
6177 						  (size_t)PAGE_SIZE));
6178 		if (ret < 0) {
6179 			__free_page(spd.pages[i]);
6180 			break;
6181 		}
6182 		spd.partial[i].offset = 0;
6183 		spd.partial[i].len = ret;
6184 
6185 		trace_seq_init(&iter->seq);
6186 	}
6187 
6188 	trace_access_unlock(iter->cpu_file);
6189 	trace_event_read_unlock();
6190 	mutex_unlock(&iter->mutex);
6191 
6192 	spd.nr_pages = i;
6193 
6194 	if (i)
6195 		ret = splice_to_pipe(pipe, &spd);
6196 	else
6197 		ret = 0;
6198 out:
6199 	splice_shrink_spd(&spd);
6200 	return ret;
6201 
6202 out_err:
6203 	mutex_unlock(&iter->mutex);
6204 	goto out;
6205 }
6206 
6207 static ssize_t
tracing_syscall_buf_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6208 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
6209 			 size_t cnt, loff_t *ppos)
6210 {
6211 	struct inode *inode = file_inode(filp);
6212 	struct trace_array *tr = inode->i_private;
6213 	char buf[64];
6214 	int r;
6215 
6216 	r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
6217 
6218 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6219 }
6220 
6221 static ssize_t
tracing_syscall_buf_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6222 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
6223 			  size_t cnt, loff_t *ppos)
6224 {
6225 	struct inode *inode = file_inode(filp);
6226 	struct trace_array *tr = inode->i_private;
6227 	unsigned long val;
6228 	int ret;
6229 
6230 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6231 	if (ret)
6232 		return ret;
6233 
6234 	if (val > SYSCALL_FAULT_USER_MAX)
6235 		val = SYSCALL_FAULT_USER_MAX;
6236 
6237 	tr->syscall_buf_sz = val;
6238 
6239 	*ppos += cnt;
6240 
6241 	return cnt;
6242 }
6243 
6244 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6245 tracing_entries_read(struct file *filp, char __user *ubuf,
6246 		     size_t cnt, loff_t *ppos)
6247 {
6248 	struct inode *inode = file_inode(filp);
6249 	struct trace_array *tr = inode->i_private;
6250 	int cpu = tracing_get_cpu(inode);
6251 	char buf[64];
6252 	int r = 0;
6253 	ssize_t ret;
6254 
6255 	mutex_lock(&trace_types_lock);
6256 
6257 	if (cpu == RING_BUFFER_ALL_CPUS) {
6258 		int cpu, buf_size_same;
6259 		unsigned long size;
6260 
6261 		size = 0;
6262 		buf_size_same = 1;
6263 		/* check if all cpu sizes are same */
6264 		for_each_tracing_cpu(cpu) {
6265 			/* fill in the size from first enabled cpu */
6266 			if (size == 0)
6267 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6268 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6269 				buf_size_same = 0;
6270 				break;
6271 			}
6272 		}
6273 
6274 		if (buf_size_same) {
6275 			if (!tr->ring_buffer_expanded)
6276 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6277 					    size >> 10,
6278 					    trace_buf_size >> 10);
6279 			else
6280 				r = sprintf(buf, "%lu\n", size >> 10);
6281 		} else
6282 			r = sprintf(buf, "X\n");
6283 	} else
6284 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6285 
6286 	mutex_unlock(&trace_types_lock);
6287 
6288 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6289 	return ret;
6290 }
6291 
6292 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6293 tracing_entries_write(struct file *filp, const char __user *ubuf,
6294 		      size_t cnt, loff_t *ppos)
6295 {
6296 	struct inode *inode = file_inode(filp);
6297 	struct trace_array *tr = inode->i_private;
6298 	unsigned long val;
6299 	int ret;
6300 
6301 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6302 	if (ret)
6303 		return ret;
6304 
6305 	/* must have at least 1 entry */
6306 	if (!val)
6307 		return -EINVAL;
6308 
6309 	/* value is in KB */
6310 	val <<= 10;
6311 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6312 	if (ret < 0)
6313 		return ret;
6314 
6315 	*ppos += cnt;
6316 
6317 	return cnt;
6318 }
6319 
6320 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6321 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6322 				size_t cnt, loff_t *ppos)
6323 {
6324 	struct trace_array *tr = filp->private_data;
6325 	char buf[64];
6326 	int r, cpu;
6327 	unsigned long size = 0, expanded_size = 0;
6328 
6329 	mutex_lock(&trace_types_lock);
6330 	for_each_tracing_cpu(cpu) {
6331 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6332 		if (!tr->ring_buffer_expanded)
6333 			expanded_size += trace_buf_size >> 10;
6334 	}
6335 	if (tr->ring_buffer_expanded)
6336 		r = sprintf(buf, "%lu\n", size);
6337 	else
6338 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6339 	mutex_unlock(&trace_types_lock);
6340 
6341 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6342 }
6343 
6344 #define LAST_BOOT_HEADER ((void *)1)
6345 
l_next(struct seq_file * m,void * v,loff_t * pos)6346 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
6347 {
6348 	struct trace_array *tr = m->private;
6349 	struct trace_scratch *tscratch = tr->scratch;
6350 	unsigned int index = *pos;
6351 
6352 	(*pos)++;
6353 
6354 	if (*pos == 1)
6355 		return LAST_BOOT_HEADER;
6356 
6357 	/* Only show offsets of the last boot data */
6358 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6359 		return NULL;
6360 
6361 	/* *pos 0 is for the header, 1 is for the first module */
6362 	index--;
6363 
6364 	if (index >= tscratch->nr_entries)
6365 		return NULL;
6366 
6367 	return &tscratch->entries[index];
6368 }
6369 
l_start(struct seq_file * m,loff_t * pos)6370 static void *l_start(struct seq_file *m, loff_t *pos)
6371 {
6372 	mutex_lock(&scratch_mutex);
6373 
6374 	return l_next(m, NULL, pos);
6375 }
6376 
l_stop(struct seq_file * m,void * p)6377 static void l_stop(struct seq_file *m, void *p)
6378 {
6379 	mutex_unlock(&scratch_mutex);
6380 }
6381 
show_last_boot_header(struct seq_file * m,struct trace_array * tr)6382 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
6383 {
6384 	struct trace_scratch *tscratch = tr->scratch;
6385 
6386 	/*
6387 	 * Do not leak KASLR address. This only shows the KASLR address of
6388 	 * the last boot. When the ring buffer is started, the LAST_BOOT
6389 	 * flag gets cleared, and this should only report "current".
6390 	 * Otherwise it shows the KASLR address from the previous boot which
6391 	 * should not be the same as the current boot.
6392 	 */
6393 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6394 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
6395 	else
6396 		seq_puts(m, "# Current\n");
6397 }
6398 
l_show(struct seq_file * m,void * v)6399 static int l_show(struct seq_file *m, void *v)
6400 {
6401 	struct trace_array *tr = m->private;
6402 	struct trace_mod_entry *entry = v;
6403 
6404 	if (v == LAST_BOOT_HEADER) {
6405 		show_last_boot_header(m, tr);
6406 		return 0;
6407 	}
6408 
6409 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
6410 	return 0;
6411 }
6412 
6413 static const struct seq_operations last_boot_seq_ops = {
6414 	.start		= l_start,
6415 	.next		= l_next,
6416 	.stop		= l_stop,
6417 	.show		= l_show,
6418 };
6419 
tracing_last_boot_open(struct inode * inode,struct file * file)6420 static int tracing_last_boot_open(struct inode *inode, struct file *file)
6421 {
6422 	struct trace_array *tr = inode->i_private;
6423 	struct seq_file *m;
6424 	int ret;
6425 
6426 	ret = tracing_check_open_get_tr(tr);
6427 	if (ret)
6428 		return ret;
6429 
6430 	ret = seq_open(file, &last_boot_seq_ops);
6431 	if (ret) {
6432 		trace_array_put(tr);
6433 		return ret;
6434 	}
6435 
6436 	m = file->private_data;
6437 	m->private = tr;
6438 
6439 	return 0;
6440 }
6441 
tracing_buffer_meta_open(struct inode * inode,struct file * filp)6442 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6443 {
6444 	struct trace_array *tr = inode->i_private;
6445 	int cpu = tracing_get_cpu(inode);
6446 	int ret;
6447 
6448 	ret = tracing_check_open_get_tr(tr);
6449 	if (ret)
6450 		return ret;
6451 
6452 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6453 	if (ret < 0)
6454 		__trace_array_put(tr);
6455 	return ret;
6456 }
6457 
6458 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6459 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6460 			  size_t cnt, loff_t *ppos)
6461 {
6462 	/*
6463 	 * There is no need to read what the user has written, this function
6464 	 * is just to make sure that there is no error when "echo" is used
6465 	 */
6466 
6467 	*ppos += cnt;
6468 
6469 	return cnt;
6470 }
6471 
6472 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6473 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6474 {
6475 	struct trace_array *tr = inode->i_private;
6476 
6477 	/* disable tracing ? */
6478 	if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
6479 		tracer_tracing_off(tr);
6480 	/* resize the ring buffer to 0 */
6481 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6482 
6483 	trace_array_put(tr);
6484 
6485 	return 0;
6486 }
6487 
6488 #define TRACE_MARKER_MAX_SIZE		4096
6489 
write_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt,unsigned long ip)6490 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
6491 				      size_t cnt, unsigned long ip)
6492 {
6493 	struct ring_buffer_event *event;
6494 	enum event_trigger_type tt = ETT_NONE;
6495 	struct trace_buffer *buffer;
6496 	struct print_entry *entry;
6497 	int meta_size;
6498 	ssize_t written;
6499 	size_t size;
6500 
6501 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6502  again:
6503 	size = cnt + meta_size;
6504 
6505 	buffer = tr->array_buffer.buffer;
6506 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6507 					    tracing_gen_ctx());
6508 	if (unlikely(!event)) {
6509 		/*
6510 		 * If the size was greater than what was allowed, then
6511 		 * make it smaller and try again.
6512 		 */
6513 		if (size > ring_buffer_max_event_size(buffer)) {
6514 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
6515 			/* The above should only happen once */
6516 			if (WARN_ON_ONCE(cnt + meta_size == size))
6517 				return -EBADF;
6518 			goto again;
6519 		}
6520 
6521 		/* Ring buffer disabled, return as if not open for write */
6522 		return -EBADF;
6523 	}
6524 
6525 	entry = ring_buffer_event_data(event);
6526 	entry->ip = ip;
6527 	memcpy(&entry->buf, buf, cnt);
6528 	written = cnt;
6529 
6530 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6531 		/* do not add \n before testing triggers, but add \0 */
6532 		entry->buf[cnt] = '\0';
6533 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6534 	}
6535 
6536 	if (entry->buf[cnt - 1] != '\n') {
6537 		entry->buf[cnt] = '\n';
6538 		entry->buf[cnt + 1] = '\0';
6539 	} else
6540 		entry->buf[cnt] = '\0';
6541 
6542 	if (static_branch_unlikely(&trace_marker_exports_enabled))
6543 		ftrace_exports(event, TRACE_EXPORT_MARKER);
6544 	__buffer_unlock_commit(buffer, event);
6545 
6546 	if (tt)
6547 		event_triggers_post_call(tr->trace_marker_file, tt);
6548 
6549 	return written;
6550 }
6551 
6552 struct trace_user_buf {
6553 	char		*buf;
6554 };
6555 
6556 static DEFINE_MUTEX(trace_user_buffer_mutex);
6557 static struct trace_user_buf_info *trace_user_buffer;
6558 
6559 /**
6560  * trace_user_fault_destroy - free up allocated memory of a trace user buffer
6561  * @tinfo: The descriptor to free up
6562  *
6563  * Frees any data allocated in the trace info dsecriptor.
6564  */
trace_user_fault_destroy(struct trace_user_buf_info * tinfo)6565 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
6566 {
6567 	char *buf;
6568 	int cpu;
6569 
6570 	if (!tinfo || !tinfo->tbuf)
6571 		return;
6572 
6573 	for_each_possible_cpu(cpu) {
6574 		buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6575 		kfree(buf);
6576 	}
6577 	free_percpu(tinfo->tbuf);
6578 }
6579 
user_fault_buffer_enable(struct trace_user_buf_info * tinfo,size_t size)6580 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
6581 {
6582 	char *buf;
6583 	int cpu;
6584 
6585 	lockdep_assert_held(&trace_user_buffer_mutex);
6586 
6587 	tinfo->tbuf = alloc_percpu(struct trace_user_buf);
6588 	if (!tinfo->tbuf)
6589 		return -ENOMEM;
6590 
6591 	tinfo->ref = 1;
6592 	tinfo->size = size;
6593 
6594 	/* Clear each buffer in case of error */
6595 	for_each_possible_cpu(cpu) {
6596 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
6597 	}
6598 
6599 	for_each_possible_cpu(cpu) {
6600 		buf = kmalloc_node(size, GFP_KERNEL,
6601 				   cpu_to_node(cpu));
6602 		if (!buf)
6603 			return -ENOMEM;
6604 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
6605 	}
6606 
6607 	return 0;
6608 }
6609 
6610 /* For internal use. Free and reinitialize */
user_buffer_free(struct trace_user_buf_info ** tinfo)6611 static void user_buffer_free(struct trace_user_buf_info **tinfo)
6612 {
6613 	lockdep_assert_held(&trace_user_buffer_mutex);
6614 
6615 	trace_user_fault_destroy(*tinfo);
6616 	kfree(*tinfo);
6617 	*tinfo = NULL;
6618 }
6619 
6620 /* For internal use. Initialize and allocate */
user_buffer_init(struct trace_user_buf_info ** tinfo,size_t size)6621 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
6622 {
6623 	bool alloc = false;
6624 	int ret;
6625 
6626 	lockdep_assert_held(&trace_user_buffer_mutex);
6627 
6628 	if (!*tinfo) {
6629 		alloc = true;
6630 		*tinfo = kzalloc_obj(**tinfo);
6631 		if (!*tinfo)
6632 			return -ENOMEM;
6633 	}
6634 
6635 	ret = user_fault_buffer_enable(*tinfo, size);
6636 	if (ret < 0 && alloc)
6637 		user_buffer_free(tinfo);
6638 
6639 	return ret;
6640 }
6641 
6642 /* For internal use, derefrence and free if necessary */
user_buffer_put(struct trace_user_buf_info ** tinfo)6643 static void user_buffer_put(struct trace_user_buf_info **tinfo)
6644 {
6645 	guard(mutex)(&trace_user_buffer_mutex);
6646 
6647 	if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
6648 		return;
6649 
6650 	if (--(*tinfo)->ref)
6651 		return;
6652 
6653 	user_buffer_free(tinfo);
6654 }
6655 
6656 /**
6657  * trace_user_fault_init - Allocated or reference a per CPU buffer
6658  * @tinfo: A pointer to the trace buffer descriptor
6659  * @size: The size to allocate each per CPU buffer
6660  *
6661  * Create a per CPU buffer that can be used to copy from user space
6662  * in a task context. When calling trace_user_fault_read(), preemption
6663  * must be disabled, and it will enable preemption and copy user
6664  * space data to the buffer. If any schedule switches occur, it will
6665  * retry until it succeeds without a schedule switch knowing the buffer
6666  * is still valid.
6667  *
6668  * Returns 0 on success, negative on failure.
6669  */
trace_user_fault_init(struct trace_user_buf_info * tinfo,size_t size)6670 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
6671 {
6672 	int ret;
6673 
6674 	if (!tinfo)
6675 		return -EINVAL;
6676 
6677 	guard(mutex)(&trace_user_buffer_mutex);
6678 
6679 	ret = user_buffer_init(&tinfo, size);
6680 	if (ret < 0)
6681 		trace_user_fault_destroy(tinfo);
6682 
6683 	return ret;
6684 }
6685 
6686 /**
6687  * trace_user_fault_get - up the ref count for the user buffer
6688  * @tinfo: A pointer to a pointer to the trace buffer descriptor
6689  *
6690  * Ups the ref count of the trace buffer.
6691  *
6692  * Returns the new ref count.
6693  */
trace_user_fault_get(struct trace_user_buf_info * tinfo)6694 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
6695 {
6696 	if (!tinfo)
6697 		return -1;
6698 
6699 	guard(mutex)(&trace_user_buffer_mutex);
6700 
6701 	tinfo->ref++;
6702 	return tinfo->ref;
6703 }
6704 
6705 /**
6706  * trace_user_fault_put - dereference a per cpu trace buffer
6707  * @tinfo: The @tinfo that was passed to trace_user_fault_get()
6708  *
6709  * Decrement the ref count of @tinfo.
6710  *
6711  * Returns the new refcount (negative on error).
6712  */
trace_user_fault_put(struct trace_user_buf_info * tinfo)6713 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
6714 {
6715 	guard(mutex)(&trace_user_buffer_mutex);
6716 
6717 	if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
6718 		return -1;
6719 
6720 	--tinfo->ref;
6721 	return tinfo->ref;
6722 }
6723 
6724 /**
6725  * trace_user_fault_read - Read user space into a per CPU buffer
6726  * @tinfo: The @tinfo allocated by trace_user_fault_get()
6727  * @ptr: The user space pointer to read
6728  * @size: The size of user space to read.
6729  * @copy_func: Optional function to use to copy from user space
6730  * @data: Data to pass to copy_func if it was supplied
6731  *
6732  * Preemption must be disabled when this is called, and must not
6733  * be enabled while using the returned buffer.
6734  * This does the copying from user space into a per CPU buffer.
6735  *
6736  * The @size must not be greater than the size passed in to
6737  * trace_user_fault_init().
6738  *
6739  * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
6740  * otherwise it will call @copy_func. It will call @copy_func with:
6741  *
6742  *   buffer: the per CPU buffer of the @tinfo.
6743  *   ptr: The pointer @ptr to user space to read
6744  *   size: The @size of the ptr to read
6745  *   data: The @data parameter
6746  *
6747  * It is expected that @copy_func will return 0 on success and non zero
6748  * if there was a fault.
6749  *
6750  * Returns a pointer to the buffer with the content read from @ptr.
6751  *   Preemption must remain disabled while the caller accesses the
6752  *   buffer returned by this function.
6753  * Returns NULL if there was a fault, or the size passed in is
6754  *   greater than the size passed to trace_user_fault_init().
6755  */
trace_user_fault_read(struct trace_user_buf_info * tinfo,const char __user * ptr,size_t size,trace_user_buf_copy copy_func,void * data)6756 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
6757 			     const char __user *ptr, size_t size,
6758 			     trace_user_buf_copy copy_func, void *data)
6759 {
6760 	int cpu = smp_processor_id();
6761 	char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6762 	unsigned int cnt;
6763 	int trys = 0;
6764 	int ret;
6765 
6766 	lockdep_assert_preemption_disabled();
6767 
6768 	/*
6769 	 * It's up to the caller to not try to copy more than it said
6770 	 * it would.
6771 	 */
6772 	if (size > tinfo->size)
6773 		return NULL;
6774 
6775 	/*
6776 	 * This acts similar to a seqcount. The per CPU context switches are
6777 	 * recorded, migration is disabled and preemption is enabled. The
6778 	 * read of the user space memory is copied into the per CPU buffer.
6779 	 * Preemption is disabled again, and if the per CPU context switches count
6780 	 * is still the same, it means the buffer has not been corrupted.
6781 	 * If the count is different, it is assumed the buffer is corrupted
6782 	 * and reading must be tried again.
6783 	 */
6784 
6785 	do {
6786 		/*
6787 		 * If for some reason, copy_from_user() always causes a context
6788 		 * switch, this would then cause an infinite loop.
6789 		 * If this task is preempted by another user space task, it
6790 		 * will cause this task to try again. But just in case something
6791 		 * changes where the copying from user space causes another task
6792 		 * to run, prevent this from going into an infinite loop.
6793 		 * 100 tries should be plenty.
6794 		 */
6795 		if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
6796 			return NULL;
6797 
6798 		/* Read the current CPU context switch counter */
6799 		cnt = nr_context_switches_cpu(cpu);
6800 
6801 		/*
6802 		 * Preemption is going to be enabled, but this task must
6803 		 * remain on this CPU.
6804 		 */
6805 		migrate_disable();
6806 
6807 		/*
6808 		 * Now preemption is being enabled and another task can come in
6809 		 * and use the same buffer and corrupt our data.
6810 		 */
6811 		preempt_enable_notrace();
6812 
6813 		/* Make sure preemption is enabled here */
6814 		lockdep_assert_preemption_enabled();
6815 
6816 		if (copy_func) {
6817 			ret = copy_func(buffer, ptr, size, data);
6818 		} else {
6819 			ret = __copy_from_user(buffer, ptr, size);
6820 		}
6821 
6822 		preempt_disable_notrace();
6823 		migrate_enable();
6824 
6825 		/* if it faulted, no need to test if the buffer was corrupted */
6826 		if (ret)
6827 			return NULL;
6828 
6829 		/*
6830 		 * Preemption is disabled again, now check the per CPU context
6831 		 * switch counter. If it doesn't match, then another user space
6832 		 * process may have schedule in and corrupted our buffer. In that
6833 		 * case the copying must be retried.
6834 		 */
6835 	} while (nr_context_switches_cpu(cpu) != cnt);
6836 
6837 	return buffer;
6838 }
6839 
6840 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6841 tracing_mark_write(struct file *filp, const char __user *ubuf,
6842 					size_t cnt, loff_t *fpos)
6843 {
6844 	struct trace_array *tr = filp->private_data;
6845 	ssize_t written = -ENODEV;
6846 	unsigned long ip;
6847 	char *buf;
6848 
6849 	if (unlikely(tracing_disabled))
6850 		return -EINVAL;
6851 
6852 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6853 		return -EINVAL;
6854 
6855 	if ((ssize_t)cnt < 0)
6856 		return -EINVAL;
6857 
6858 	if (cnt > TRACE_MARKER_MAX_SIZE)
6859 		cnt = TRACE_MARKER_MAX_SIZE;
6860 
6861 	/* Must have preemption disabled while having access to the buffer */
6862 	guard(preempt_notrace)();
6863 
6864 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6865 	if (!buf)
6866 		return -EFAULT;
6867 
6868 	/* The selftests expect this function to be the IP address */
6869 	ip = _THIS_IP_;
6870 
6871 	/* The global trace_marker can go to multiple instances */
6872 	if (tr == &global_trace) {
6873 		guard(rcu)();
6874 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6875 			written = write_marker_to_buffer(tr, buf, cnt, ip);
6876 			if (written < 0)
6877 				break;
6878 		}
6879 	} else {
6880 		written = write_marker_to_buffer(tr, buf, cnt, ip);
6881 	}
6882 
6883 	return written;
6884 }
6885 
write_raw_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt)6886 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
6887 					  const char *buf, size_t cnt)
6888 {
6889 	struct ring_buffer_event *event;
6890 	struct trace_buffer *buffer;
6891 	struct raw_data_entry *entry;
6892 	ssize_t written;
6893 	size_t size;
6894 
6895 	/* cnt includes both the entry->id and the data behind it. */
6896 	size = struct_offset(entry, id) + cnt;
6897 
6898 	buffer = tr->array_buffer.buffer;
6899 
6900 	if (size > ring_buffer_max_event_size(buffer))
6901 		return -EINVAL;
6902 
6903 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6904 					    tracing_gen_ctx());
6905 	if (!event)
6906 		/* Ring buffer disabled, return as if not open for write */
6907 		return -EBADF;
6908 
6909 	entry = ring_buffer_event_data(event);
6910 	unsafe_memcpy(&entry->id, buf, cnt,
6911 		      "id and content already reserved on ring buffer"
6912 		      "'buf' includes the 'id' and the data."
6913 		      "'entry' was allocated with cnt from 'id'.");
6914 	written = cnt;
6915 
6916 	__buffer_unlock_commit(buffer, event);
6917 
6918 	return written;
6919 }
6920 
6921 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6922 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6923 					size_t cnt, loff_t *fpos)
6924 {
6925 	struct trace_array *tr = filp->private_data;
6926 	ssize_t written = -ENODEV;
6927 	char *buf;
6928 
6929 	if (unlikely(tracing_disabled))
6930 		return -EINVAL;
6931 
6932 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6933 		return -EINVAL;
6934 
6935 	/* The marker must at least have a tag id */
6936 	if (cnt < sizeof(unsigned int))
6937 		return -EINVAL;
6938 
6939 	/* raw write is all or nothing */
6940 	if (cnt > TRACE_MARKER_MAX_SIZE)
6941 		return -EINVAL;
6942 
6943 	/* Must have preemption disabled while having access to the buffer */
6944 	guard(preempt_notrace)();
6945 
6946 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6947 	if (!buf)
6948 		return -EFAULT;
6949 
6950 	/* The global trace_marker_raw can go to multiple instances */
6951 	if (tr == &global_trace) {
6952 		guard(rcu)();
6953 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6954 			written = write_raw_marker_to_buffer(tr, buf, cnt);
6955 			if (written < 0)
6956 				break;
6957 		}
6958 	} else {
6959 		written = write_raw_marker_to_buffer(tr, buf, cnt);
6960 	}
6961 
6962 	return written;
6963 }
6964 
tracing_mark_open(struct inode * inode,struct file * filp)6965 static int tracing_mark_open(struct inode *inode, struct file *filp)
6966 {
6967 	int ret;
6968 
6969 	scoped_guard(mutex, &trace_user_buffer_mutex) {
6970 		if (!trace_user_buffer) {
6971 			ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
6972 			if (ret < 0)
6973 				return ret;
6974 		} else {
6975 			trace_user_buffer->ref++;
6976 		}
6977 	}
6978 
6979 	stream_open(inode, filp);
6980 	ret = tracing_open_generic_tr(inode, filp);
6981 	if (ret < 0)
6982 		user_buffer_put(&trace_user_buffer);
6983 	return ret;
6984 }
6985 
tracing_mark_release(struct inode * inode,struct file * file)6986 static int tracing_mark_release(struct inode *inode, struct file *file)
6987 {
6988 	user_buffer_put(&trace_user_buffer);
6989 	return tracing_release_generic_tr(inode, file);
6990 }
6991 
tracing_clock_show(struct seq_file * m,void * v)6992 static int tracing_clock_show(struct seq_file *m, void *v)
6993 {
6994 	struct trace_array *tr = m->private;
6995 	int i;
6996 
6997 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6998 		seq_printf(m,
6999 			"%s%s%s%s", i ? " " : "",
7000 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7001 			i == tr->clock_id ? "]" : "");
7002 	seq_putc(m, '\n');
7003 
7004 	return 0;
7005 }
7006 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7007 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7008 {
7009 	int i;
7010 
7011 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7012 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7013 			break;
7014 	}
7015 	if (i == ARRAY_SIZE(trace_clocks))
7016 		return -EINVAL;
7017 
7018 	guard(mutex)(&trace_types_lock);
7019 
7020 	tr->clock_id = i;
7021 
7022 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7023 
7024 	/*
7025 	 * New clock may not be consistent with the previous clock.
7026 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7027 	 */
7028 	tracing_reset_online_cpus(&tr->array_buffer);
7029 
7030 #ifdef CONFIG_TRACER_SNAPSHOT
7031 	if (tr->snapshot_buffer.buffer)
7032 		ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func);
7033 	tracing_reset_online_cpus(&tr->snapshot_buffer);
7034 #endif
7035 	update_last_data_if_empty(tr);
7036 
7037 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7038 		struct trace_scratch *tscratch = tr->scratch;
7039 
7040 		tscratch->clock_id = i;
7041 	}
7042 
7043 	return 0;
7044 }
7045 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7046 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7047 				   size_t cnt, loff_t *fpos)
7048 {
7049 	struct seq_file *m = filp->private_data;
7050 	struct trace_array *tr = m->private;
7051 	char buf[64];
7052 	const char *clockstr;
7053 	int ret;
7054 
7055 	if (cnt >= sizeof(buf))
7056 		return -EINVAL;
7057 
7058 	if (copy_from_user(buf, ubuf, cnt))
7059 		return -EFAULT;
7060 
7061 	buf[cnt] = 0;
7062 
7063 	clockstr = strstrip(buf);
7064 
7065 	ret = tracing_set_clock(tr, clockstr);
7066 	if (ret)
7067 		return ret;
7068 
7069 	*fpos += cnt;
7070 
7071 	return cnt;
7072 }
7073 
tracing_clock_open(struct inode * inode,struct file * file)7074 static int tracing_clock_open(struct inode *inode, struct file *file)
7075 {
7076 	struct trace_array *tr = inode->i_private;
7077 	int ret;
7078 
7079 	ret = tracing_check_open_get_tr(tr);
7080 	if (ret)
7081 		return ret;
7082 
7083 	ret = single_open(file, tracing_clock_show, inode->i_private);
7084 	if (ret < 0)
7085 		trace_array_put(tr);
7086 
7087 	return ret;
7088 }
7089 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7090 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7091 {
7092 	struct trace_array *tr = m->private;
7093 
7094 	guard(mutex)(&trace_types_lock);
7095 
7096 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7097 		seq_puts(m, "delta [absolute]\n");
7098 	else
7099 		seq_puts(m, "[delta] absolute\n");
7100 
7101 	return 0;
7102 }
7103 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7104 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7105 {
7106 	struct trace_array *tr = inode->i_private;
7107 	int ret;
7108 
7109 	ret = tracing_check_open_get_tr(tr);
7110 	if (ret)
7111 		return ret;
7112 
7113 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7114 	if (ret < 0)
7115 		trace_array_put(tr);
7116 
7117 	return ret;
7118 }
7119 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7120 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7121 {
7122 	if (rbe == this_cpu_read(trace_buffered_event))
7123 		return ring_buffer_time_stamp(buffer);
7124 
7125 	return ring_buffer_event_time_stamp(buffer, rbe);
7126 }
7127 
7128 struct ftrace_buffer_info {
7129 	struct trace_iterator	iter;
7130 	void			*spare;
7131 	unsigned int		spare_cpu;
7132 	unsigned int		spare_size;
7133 	unsigned int		read;
7134 };
7135 
7136 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7137 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7138 {
7139 	struct trace_array *tr = inode->i_private;
7140 	struct trace_iterator *iter;
7141 	struct seq_file *m;
7142 	int ret;
7143 
7144 	ret = tracing_check_open_get_tr(tr);
7145 	if (ret)
7146 		return ret;
7147 
7148 	if (file->f_mode & FMODE_READ) {
7149 		iter = __tracing_open(inode, file, true);
7150 		if (IS_ERR(iter))
7151 			ret = PTR_ERR(iter);
7152 	} else {
7153 		/* Writes still need the seq_file to hold the private data */
7154 		ret = -ENOMEM;
7155 		m = kzalloc_obj(*m);
7156 		if (!m)
7157 			goto out;
7158 		iter = kzalloc_obj(*iter);
7159 		if (!iter) {
7160 			kfree(m);
7161 			goto out;
7162 		}
7163 		ret = 0;
7164 
7165 		iter->tr = tr;
7166 		iter->array_buffer = &tr->snapshot_buffer;
7167 		iter->cpu_file = tracing_get_cpu(inode);
7168 		m->private = iter;
7169 		file->private_data = m;
7170 	}
7171 out:
7172 	if (ret < 0)
7173 		trace_array_put(tr);
7174 
7175 	return ret;
7176 }
7177 
tracing_swap_cpu_buffer(void * tr)7178 static void tracing_swap_cpu_buffer(void *tr)
7179 {
7180 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7181 }
7182 
7183 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7184 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7185 		       loff_t *ppos)
7186 {
7187 	struct seq_file *m = filp->private_data;
7188 	struct trace_iterator *iter = m->private;
7189 	struct trace_array *tr = iter->tr;
7190 	unsigned long val;
7191 	int ret;
7192 
7193 	ret = tracing_update_buffers(tr);
7194 	if (ret < 0)
7195 		return ret;
7196 
7197 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7198 	if (ret)
7199 		return ret;
7200 
7201 	guard(mutex)(&trace_types_lock);
7202 
7203 	if (tracer_uses_snapshot(tr->current_trace))
7204 		return -EBUSY;
7205 
7206 	local_irq_disable();
7207 	arch_spin_lock(&tr->max_lock);
7208 	if (tr->cond_snapshot)
7209 		ret = -EBUSY;
7210 	arch_spin_unlock(&tr->max_lock);
7211 	local_irq_enable();
7212 	if (ret)
7213 		return ret;
7214 
7215 	switch (val) {
7216 	case 0:
7217 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7218 			return -EINVAL;
7219 		if (tr->allocated_snapshot)
7220 			free_snapshot(tr);
7221 		break;
7222 	case 1:
7223 /* Only allow per-cpu swap if the ring buffer supports it */
7224 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7225 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7226 			return -EINVAL;
7227 #endif
7228 		if (tr->allocated_snapshot)
7229 			ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
7230 					&tr->array_buffer, iter->cpu_file);
7231 
7232 		ret = tracing_arm_snapshot_locked(tr);
7233 		if (ret)
7234 			return ret;
7235 
7236 		/* Now, we're going to swap */
7237 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7238 			local_irq_disable();
7239 			update_max_tr(tr, current, smp_processor_id(), NULL);
7240 			local_irq_enable();
7241 		} else {
7242 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7243 						 (void *)tr, 1);
7244 		}
7245 		tracing_disarm_snapshot(tr);
7246 		break;
7247 	default:
7248 		if (tr->allocated_snapshot) {
7249 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7250 				tracing_reset_online_cpus(&tr->snapshot_buffer);
7251 			else
7252 				tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file);
7253 		}
7254 		break;
7255 	}
7256 
7257 	if (ret >= 0) {
7258 		*ppos += cnt;
7259 		ret = cnt;
7260 	}
7261 
7262 	return ret;
7263 }
7264 
tracing_snapshot_release(struct inode * inode,struct file * file)7265 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7266 {
7267 	struct seq_file *m = file->private_data;
7268 	int ret;
7269 
7270 	ret = tracing_release(inode, file);
7271 
7272 	if (file->f_mode & FMODE_READ)
7273 		return ret;
7274 
7275 	/* If write only, the seq_file is just a stub */
7276 	if (m)
7277 		kfree(m->private);
7278 	kfree(m);
7279 
7280 	return 0;
7281 }
7282 
7283 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7284 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7285 				    size_t count, loff_t *ppos);
7286 static int tracing_buffers_release(struct inode *inode, struct file *file);
7287 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7288 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7289 
snapshot_raw_open(struct inode * inode,struct file * filp)7290 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7291 {
7292 	struct ftrace_buffer_info *info;
7293 	int ret;
7294 
7295 	/* The following checks for tracefs lockdown */
7296 	ret = tracing_buffers_open(inode, filp);
7297 	if (ret < 0)
7298 		return ret;
7299 
7300 	info = filp->private_data;
7301 
7302 	if (tracer_uses_snapshot(info->iter.trace)) {
7303 		tracing_buffers_release(inode, filp);
7304 		return -EBUSY;
7305 	}
7306 
7307 	info->iter.snapshot = true;
7308 	info->iter.array_buffer = &info->iter.tr->snapshot_buffer;
7309 
7310 	return ret;
7311 }
7312 
7313 #endif /* CONFIG_TRACER_SNAPSHOT */
7314 
7315 
7316 static const struct file_operations tracing_thresh_fops = {
7317 	.open		= tracing_open_generic,
7318 	.read		= tracing_thresh_read,
7319 	.write		= tracing_thresh_write,
7320 	.llseek		= generic_file_llseek,
7321 };
7322 
7323 #ifdef CONFIG_TRACER_MAX_TRACE
7324 static const struct file_operations tracing_max_lat_fops = {
7325 	.open		= tracing_open_generic_tr,
7326 	.read		= tracing_max_lat_read,
7327 	.write		= tracing_max_lat_write,
7328 	.llseek		= generic_file_llseek,
7329 	.release	= tracing_release_generic_tr,
7330 };
7331 #endif
7332 
7333 static const struct file_operations set_tracer_fops = {
7334 	.open		= tracing_open_generic_tr,
7335 	.read		= tracing_set_trace_read,
7336 	.write		= tracing_set_trace_write,
7337 	.llseek		= generic_file_llseek,
7338 	.release	= tracing_release_generic_tr,
7339 };
7340 
7341 static const struct file_operations tracing_pipe_fops = {
7342 	.open		= tracing_open_pipe,
7343 	.poll		= tracing_poll_pipe,
7344 	.read		= tracing_read_pipe,
7345 	.splice_read	= tracing_splice_read_pipe,
7346 	.release	= tracing_release_pipe,
7347 };
7348 
7349 static const struct file_operations tracing_entries_fops = {
7350 	.open		= tracing_open_generic_tr,
7351 	.read		= tracing_entries_read,
7352 	.write		= tracing_entries_write,
7353 	.llseek		= generic_file_llseek,
7354 	.release	= tracing_release_generic_tr,
7355 };
7356 
7357 static const struct file_operations tracing_syscall_buf_fops = {
7358 	.open		= tracing_open_generic_tr,
7359 	.read		= tracing_syscall_buf_read,
7360 	.write		= tracing_syscall_buf_write,
7361 	.llseek		= generic_file_llseek,
7362 	.release	= tracing_release_generic_tr,
7363 };
7364 
7365 static const struct file_operations tracing_buffer_meta_fops = {
7366 	.open		= tracing_buffer_meta_open,
7367 	.read		= seq_read,
7368 	.llseek		= seq_lseek,
7369 	.release	= tracing_seq_release,
7370 };
7371 
7372 static const struct file_operations tracing_total_entries_fops = {
7373 	.open		= tracing_open_generic_tr,
7374 	.read		= tracing_total_entries_read,
7375 	.llseek		= generic_file_llseek,
7376 	.release	= tracing_release_generic_tr,
7377 };
7378 
7379 static const struct file_operations tracing_free_buffer_fops = {
7380 	.open		= tracing_open_generic_tr,
7381 	.write		= tracing_free_buffer_write,
7382 	.release	= tracing_free_buffer_release,
7383 };
7384 
7385 static const struct file_operations tracing_mark_fops = {
7386 	.open		= tracing_mark_open,
7387 	.write		= tracing_mark_write,
7388 	.release	= tracing_mark_release,
7389 };
7390 
7391 static const struct file_operations tracing_mark_raw_fops = {
7392 	.open		= tracing_mark_open,
7393 	.write		= tracing_mark_raw_write,
7394 	.release	= tracing_mark_release,
7395 };
7396 
7397 static const struct file_operations trace_clock_fops = {
7398 	.open		= tracing_clock_open,
7399 	.read		= seq_read,
7400 	.llseek		= seq_lseek,
7401 	.release	= tracing_single_release_tr,
7402 	.write		= tracing_clock_write,
7403 };
7404 
7405 static const struct file_operations trace_time_stamp_mode_fops = {
7406 	.open		= tracing_time_stamp_mode_open,
7407 	.read		= seq_read,
7408 	.llseek		= seq_lseek,
7409 	.release	= tracing_single_release_tr,
7410 };
7411 
7412 static const struct file_operations last_boot_fops = {
7413 	.open		= tracing_last_boot_open,
7414 	.read		= seq_read,
7415 	.llseek		= seq_lseek,
7416 	.release	= tracing_seq_release,
7417 };
7418 
7419 #ifdef CONFIG_TRACER_SNAPSHOT
7420 static const struct file_operations snapshot_fops = {
7421 	.open		= tracing_snapshot_open,
7422 	.read		= seq_read,
7423 	.write		= tracing_snapshot_write,
7424 	.llseek		= tracing_lseek,
7425 	.release	= tracing_snapshot_release,
7426 };
7427 
7428 static const struct file_operations snapshot_raw_fops = {
7429 	.open		= snapshot_raw_open,
7430 	.read		= tracing_buffers_read,
7431 	.release	= tracing_buffers_release,
7432 	.splice_read	= tracing_buffers_splice_read,
7433 };
7434 
7435 #endif /* CONFIG_TRACER_SNAPSHOT */
7436 
7437 /*
7438  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7439  * @filp: The active open file structure
7440  * @ubuf: The userspace provided buffer to read value into
7441  * @cnt: The maximum number of bytes to read
7442  * @ppos: The current "file" position
7443  *
7444  * This function implements the write interface for a struct trace_min_max_param.
7445  * The filp->private_data must point to a trace_min_max_param structure that
7446  * defines where to write the value, the min and the max acceptable values,
7447  * and a lock to protect the write.
7448  */
7449 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7450 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7451 {
7452 	struct trace_min_max_param *param = filp->private_data;
7453 	u64 val;
7454 	int err;
7455 
7456 	if (!param)
7457 		return -EFAULT;
7458 
7459 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7460 	if (err)
7461 		return err;
7462 
7463 	if (param->lock)
7464 		mutex_lock(param->lock);
7465 
7466 	if (param->min && val < *param->min)
7467 		err = -EINVAL;
7468 
7469 	if (param->max && val > *param->max)
7470 		err = -EINVAL;
7471 
7472 	if (!err)
7473 		*param->val = val;
7474 
7475 	if (param->lock)
7476 		mutex_unlock(param->lock);
7477 
7478 	if (err)
7479 		return err;
7480 
7481 	return cnt;
7482 }
7483 
7484 /*
7485  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7486  * @filp: The active open file structure
7487  * @ubuf: The userspace provided buffer to read value into
7488  * @cnt: The maximum number of bytes to read
7489  * @ppos: The current "file" position
7490  *
7491  * This function implements the read interface for a struct trace_min_max_param.
7492  * The filp->private_data must point to a trace_min_max_param struct with valid
7493  * data.
7494  */
7495 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7496 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7497 {
7498 	struct trace_min_max_param *param = filp->private_data;
7499 	char buf[U64_STR_SIZE];
7500 	int len;
7501 	u64 val;
7502 
7503 	if (!param)
7504 		return -EFAULT;
7505 
7506 	val = *param->val;
7507 
7508 	if (cnt > sizeof(buf))
7509 		cnt = sizeof(buf);
7510 
7511 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7512 
7513 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7514 }
7515 
7516 const struct file_operations trace_min_max_fops = {
7517 	.open		= tracing_open_generic,
7518 	.read		= trace_min_max_read,
7519 	.write		= trace_min_max_write,
7520 };
7521 
7522 #define TRACING_LOG_ERRS_MAX	8
7523 #define TRACING_LOG_LOC_MAX	128
7524 
7525 #define CMD_PREFIX "  Command: "
7526 
7527 struct err_info {
7528 	const char	**errs;	/* ptr to loc-specific array of err strings */
7529 	u8		type;	/* index into errs -> specific err string */
7530 	u16		pos;	/* caret position */
7531 	u64		ts;
7532 };
7533 
7534 struct tracing_log_err {
7535 	struct list_head	list;
7536 	struct err_info		info;
7537 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7538 	char			*cmd;                     /* what caused err */
7539 };
7540 
7541 static DEFINE_MUTEX(tracing_err_log_lock);
7542 
alloc_tracing_log_err(int len)7543 static struct tracing_log_err *alloc_tracing_log_err(int len)
7544 {
7545 	struct tracing_log_err *err;
7546 
7547 	err = kzalloc_obj(*err);
7548 	if (!err)
7549 		return ERR_PTR(-ENOMEM);
7550 
7551 	err->cmd = kzalloc(len, GFP_KERNEL);
7552 	if (!err->cmd) {
7553 		kfree(err);
7554 		return ERR_PTR(-ENOMEM);
7555 	}
7556 
7557 	return err;
7558 }
7559 
free_tracing_log_err(struct tracing_log_err * err)7560 static void free_tracing_log_err(struct tracing_log_err *err)
7561 {
7562 	kfree(err->cmd);
7563 	kfree(err);
7564 }
7565 
get_tracing_log_err(struct trace_array * tr,int len)7566 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7567 						   int len)
7568 {
7569 	struct tracing_log_err *err;
7570 	char *cmd;
7571 
7572 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7573 		err = alloc_tracing_log_err(len);
7574 		if (PTR_ERR(err) != -ENOMEM)
7575 			tr->n_err_log_entries++;
7576 
7577 		return err;
7578 	}
7579 	cmd = kzalloc(len, GFP_KERNEL);
7580 	if (!cmd)
7581 		return ERR_PTR(-ENOMEM);
7582 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7583 	kfree(err->cmd);
7584 	err->cmd = cmd;
7585 	list_del(&err->list);
7586 
7587 	return err;
7588 }
7589 
7590 /**
7591  * err_pos - find the position of a string within a command for error careting
7592  * @cmd: The tracing command that caused the error
7593  * @str: The string to position the caret at within @cmd
7594  *
7595  * Finds the position of the first occurrence of @str within @cmd.  The
7596  * return value can be passed to tracing_log_err() for caret placement
7597  * within @cmd.
7598  *
7599  * Returns the index within @cmd of the first occurrence of @str or 0
7600  * if @str was not found.
7601  */
err_pos(char * cmd,const char * str)7602 unsigned int err_pos(char *cmd, const char *str)
7603 {
7604 	char *found;
7605 
7606 	if (WARN_ON(!strlen(cmd)))
7607 		return 0;
7608 
7609 	found = strstr(cmd, str);
7610 	if (found)
7611 		return found - cmd;
7612 
7613 	return 0;
7614 }
7615 
7616 /**
7617  * tracing_log_err - write an error to the tracing error log
7618  * @tr: The associated trace array for the error (NULL for top level array)
7619  * @loc: A string describing where the error occurred
7620  * @cmd: The tracing command that caused the error
7621  * @errs: The array of loc-specific static error strings
7622  * @type: The index into errs[], which produces the specific static err string
7623  * @pos: The position the caret should be placed in the cmd
7624  *
7625  * Writes an error into tracing/error_log of the form:
7626  *
7627  * <loc>: error: <text>
7628  *   Command: <cmd>
7629  *              ^
7630  *
7631  * tracing/error_log is a small log file containing the last
7632  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7633  * unless there has been a tracing error, and the error log can be
7634  * cleared and have its memory freed by writing the empty string in
7635  * truncation mode to it i.e. echo > tracing/error_log.
7636  *
7637  * NOTE: the @errs array along with the @type param are used to
7638  * produce a static error string - this string is not copied and saved
7639  * when the error is logged - only a pointer to it is saved.  See
7640  * existing callers for examples of how static strings are typically
7641  * defined for use with tracing_log_err().
7642  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7643 void tracing_log_err(struct trace_array *tr,
7644 		     const char *loc, const char *cmd,
7645 		     const char **errs, u8 type, u16 pos)
7646 {
7647 	struct tracing_log_err *err;
7648 	int len = 0;
7649 
7650 	if (!tr)
7651 		tr = &global_trace;
7652 
7653 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7654 
7655 	guard(mutex)(&tracing_err_log_lock);
7656 
7657 	err = get_tracing_log_err(tr, len);
7658 	if (PTR_ERR(err) == -ENOMEM)
7659 		return;
7660 
7661 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7662 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7663 
7664 	err->info.errs = errs;
7665 	err->info.type = type;
7666 	err->info.pos = pos;
7667 	err->info.ts = local_clock();
7668 
7669 	list_add_tail(&err->list, &tr->err_log);
7670 }
7671 
clear_tracing_err_log(struct trace_array * tr)7672 static void clear_tracing_err_log(struct trace_array *tr)
7673 {
7674 	struct tracing_log_err *err, *next;
7675 
7676 	guard(mutex)(&tracing_err_log_lock);
7677 
7678 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7679 		list_del(&err->list);
7680 		free_tracing_log_err(err);
7681 	}
7682 
7683 	tr->n_err_log_entries = 0;
7684 }
7685 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7686 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7687 {
7688 	struct trace_array *tr = m->private;
7689 
7690 	mutex_lock(&tracing_err_log_lock);
7691 
7692 	return seq_list_start(&tr->err_log, *pos);
7693 }
7694 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7695 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7696 {
7697 	struct trace_array *tr = m->private;
7698 
7699 	return seq_list_next(v, &tr->err_log, pos);
7700 }
7701 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7702 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7703 {
7704 	mutex_unlock(&tracing_err_log_lock);
7705 }
7706 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)7707 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7708 {
7709 	u16 i;
7710 
7711 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7712 		seq_putc(m, ' ');
7713 	for (i = 0; i < pos; i++)
7714 		seq_putc(m, ' ');
7715 	seq_puts(m, "^\n");
7716 }
7717 
tracing_err_log_seq_show(struct seq_file * m,void * v)7718 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7719 {
7720 	struct tracing_log_err *err = v;
7721 
7722 	if (err) {
7723 		const char *err_text = err->info.errs[err->info.type];
7724 		u64 sec = err->info.ts;
7725 		u32 nsec;
7726 
7727 		nsec = do_div(sec, NSEC_PER_SEC);
7728 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7729 			   err->loc, err_text);
7730 		seq_printf(m, "%s", err->cmd);
7731 		tracing_err_log_show_pos(m, err->info.pos);
7732 	}
7733 
7734 	return 0;
7735 }
7736 
7737 static const struct seq_operations tracing_err_log_seq_ops = {
7738 	.start  = tracing_err_log_seq_start,
7739 	.next   = tracing_err_log_seq_next,
7740 	.stop   = tracing_err_log_seq_stop,
7741 	.show   = tracing_err_log_seq_show
7742 };
7743 
tracing_err_log_open(struct inode * inode,struct file * file)7744 static int tracing_err_log_open(struct inode *inode, struct file *file)
7745 {
7746 	struct trace_array *tr = inode->i_private;
7747 	int ret = 0;
7748 
7749 	ret = tracing_check_open_get_tr(tr);
7750 	if (ret)
7751 		return ret;
7752 
7753 	/* If this file was opened for write, then erase contents */
7754 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7755 		clear_tracing_err_log(tr);
7756 
7757 	if (file->f_mode & FMODE_READ) {
7758 		ret = seq_open(file, &tracing_err_log_seq_ops);
7759 		if (!ret) {
7760 			struct seq_file *m = file->private_data;
7761 			m->private = tr;
7762 		} else {
7763 			trace_array_put(tr);
7764 		}
7765 	}
7766 	return ret;
7767 }
7768 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7769 static ssize_t tracing_err_log_write(struct file *file,
7770 				     const char __user *buffer,
7771 				     size_t count, loff_t *ppos)
7772 {
7773 	return count;
7774 }
7775 
tracing_err_log_release(struct inode * inode,struct file * file)7776 static int tracing_err_log_release(struct inode *inode, struct file *file)
7777 {
7778 	struct trace_array *tr = inode->i_private;
7779 
7780 	trace_array_put(tr);
7781 
7782 	if (file->f_mode & FMODE_READ)
7783 		seq_release(inode, file);
7784 
7785 	return 0;
7786 }
7787 
7788 static const struct file_operations tracing_err_log_fops = {
7789 	.open           = tracing_err_log_open,
7790 	.write		= tracing_err_log_write,
7791 	.read           = seq_read,
7792 	.llseek         = tracing_lseek,
7793 	.release        = tracing_err_log_release,
7794 };
7795 
tracing_buffers_open(struct inode * inode,struct file * filp)7796 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7797 {
7798 	struct trace_array *tr = inode->i_private;
7799 	struct ftrace_buffer_info *info;
7800 	int ret;
7801 
7802 	ret = tracing_check_open_get_tr(tr);
7803 	if (ret)
7804 		return ret;
7805 
7806 	info = kvzalloc_obj(*info);
7807 	if (!info) {
7808 		trace_array_put(tr);
7809 		return -ENOMEM;
7810 	}
7811 
7812 	mutex_lock(&trace_types_lock);
7813 
7814 	info->iter.tr		= tr;
7815 	info->iter.cpu_file	= tracing_get_cpu(inode);
7816 	info->iter.trace	= tr->current_trace;
7817 	info->iter.array_buffer = &tr->array_buffer;
7818 	info->spare		= NULL;
7819 	/* Force reading ring buffer for first read */
7820 	info->read		= (unsigned int)-1;
7821 
7822 	filp->private_data = info;
7823 
7824 	tr->trace_ref++;
7825 
7826 	mutex_unlock(&trace_types_lock);
7827 
7828 	ret = nonseekable_open(inode, filp);
7829 	if (ret < 0)
7830 		trace_array_put(tr);
7831 
7832 	return ret;
7833 }
7834 
7835 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7836 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7837 {
7838 	struct ftrace_buffer_info *info = filp->private_data;
7839 	struct trace_iterator *iter = &info->iter;
7840 
7841 	return trace_poll(iter, filp, poll_table);
7842 }
7843 
7844 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7845 tracing_buffers_read(struct file *filp, char __user *ubuf,
7846 		     size_t count, loff_t *ppos)
7847 {
7848 	struct ftrace_buffer_info *info = filp->private_data;
7849 	struct trace_iterator *iter = &info->iter;
7850 	void *trace_data;
7851 	int page_size;
7852 	ssize_t ret = 0;
7853 	ssize_t size;
7854 
7855 	if (!count)
7856 		return 0;
7857 
7858 	if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
7859 		return -EBUSY;
7860 
7861 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7862 
7863 	/* Make sure the spare matches the current sub buffer size */
7864 	if (info->spare) {
7865 		if (page_size != info->spare_size) {
7866 			ring_buffer_free_read_page(iter->array_buffer->buffer,
7867 						   info->spare_cpu, info->spare);
7868 			info->spare = NULL;
7869 		}
7870 	}
7871 
7872 	if (!info->spare) {
7873 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7874 							  iter->cpu_file);
7875 		if (IS_ERR(info->spare)) {
7876 			ret = PTR_ERR(info->spare);
7877 			info->spare = NULL;
7878 		} else {
7879 			info->spare_cpu = iter->cpu_file;
7880 			info->spare_size = page_size;
7881 		}
7882 	}
7883 	if (!info->spare)
7884 		return ret;
7885 
7886 	/* Do we have previous read data to read? */
7887 	if (info->read < page_size)
7888 		goto read;
7889 
7890  again:
7891 	trace_access_lock(iter->cpu_file);
7892 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7893 				    info->spare,
7894 				    count,
7895 				    iter->cpu_file, 0);
7896 	trace_access_unlock(iter->cpu_file);
7897 
7898 	if (ret < 0) {
7899 		if (trace_empty(iter) && !iter->closed) {
7900 			if (update_last_data_if_empty(iter->tr))
7901 				return 0;
7902 
7903 			if ((filp->f_flags & O_NONBLOCK))
7904 				return -EAGAIN;
7905 
7906 			ret = wait_on_pipe(iter, 0);
7907 			if (ret)
7908 				return ret;
7909 
7910 			goto again;
7911 		}
7912 		return 0;
7913 	}
7914 
7915 	info->read = 0;
7916  read:
7917 	size = page_size - info->read;
7918 	if (size > count)
7919 		size = count;
7920 	trace_data = ring_buffer_read_page_data(info->spare);
7921 	ret = copy_to_user(ubuf, trace_data + info->read, size);
7922 	if (ret == size)
7923 		return -EFAULT;
7924 
7925 	size -= ret;
7926 
7927 	*ppos += size;
7928 	info->read += size;
7929 
7930 	return size;
7931 }
7932 
tracing_buffers_flush(struct file * file,fl_owner_t id)7933 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7934 {
7935 	struct ftrace_buffer_info *info = file->private_data;
7936 	struct trace_iterator *iter = &info->iter;
7937 
7938 	iter->closed = true;
7939 	/* Make sure the waiters see the new wait_index */
7940 	(void)atomic_fetch_inc_release(&iter->wait_index);
7941 
7942 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7943 
7944 	return 0;
7945 }
7946 
tracing_buffers_release(struct inode * inode,struct file * file)7947 static int tracing_buffers_release(struct inode *inode, struct file *file)
7948 {
7949 	struct ftrace_buffer_info *info = file->private_data;
7950 	struct trace_iterator *iter = &info->iter;
7951 
7952 	guard(mutex)(&trace_types_lock);
7953 
7954 	iter->tr->trace_ref--;
7955 
7956 	__trace_array_put(iter->tr);
7957 
7958 	if (info->spare)
7959 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7960 					   info->spare_cpu, info->spare);
7961 	kvfree(info);
7962 
7963 	return 0;
7964 }
7965 
7966 struct buffer_ref {
7967 	struct trace_buffer	*buffer;
7968 	void			*page;
7969 	int			cpu;
7970 	refcount_t		refcount;
7971 };
7972 
buffer_ref_release(struct buffer_ref * ref)7973 static void buffer_ref_release(struct buffer_ref *ref)
7974 {
7975 	if (!refcount_dec_and_test(&ref->refcount))
7976 		return;
7977 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7978 	kfree(ref);
7979 }
7980 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7981 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7982 				    struct pipe_buffer *buf)
7983 {
7984 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7985 
7986 	buffer_ref_release(ref);
7987 	buf->private = 0;
7988 }
7989 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7990 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7991 				struct pipe_buffer *buf)
7992 {
7993 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7994 
7995 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7996 		return false;
7997 
7998 	refcount_inc(&ref->refcount);
7999 	return true;
8000 }
8001 
8002 /* Pipe buffer operations for a buffer. */
8003 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8004 	.release		= buffer_pipe_buf_release,
8005 	.get			= buffer_pipe_buf_get,
8006 };
8007 
8008 /*
8009  * Callback from splice_to_pipe(), if we need to release some pages
8010  * at the end of the spd in case we error'ed out in filling the pipe.
8011  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8012 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8013 {
8014 	struct buffer_ref *ref =
8015 		(struct buffer_ref *)spd->partial[i].private;
8016 
8017 	buffer_ref_release(ref);
8018 	spd->partial[i].private = 0;
8019 }
8020 
8021 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8022 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8023 			    struct pipe_inode_info *pipe, size_t len,
8024 			    unsigned int flags)
8025 {
8026 	struct ftrace_buffer_info *info = file->private_data;
8027 	struct trace_iterator *iter = &info->iter;
8028 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8029 	struct page *pages_def[PIPE_DEF_BUFFERS];
8030 	struct splice_pipe_desc spd = {
8031 		.pages		= pages_def,
8032 		.partial	= partial_def,
8033 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8034 		.ops		= &buffer_pipe_buf_ops,
8035 		.spd_release	= buffer_spd_release,
8036 	};
8037 	struct buffer_ref *ref;
8038 	bool woken = false;
8039 	int page_size;
8040 	int entries, i;
8041 	ssize_t ret = 0;
8042 
8043 	if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
8044 		return -EBUSY;
8045 
8046 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8047 	if (*ppos & (page_size - 1))
8048 		return -EINVAL;
8049 
8050 	if (len & (page_size - 1)) {
8051 		if (len < page_size)
8052 			return -EINVAL;
8053 		len &= (~(page_size - 1));
8054 	}
8055 
8056 	if (splice_grow_spd(pipe, &spd))
8057 		return -ENOMEM;
8058 
8059  again:
8060 	trace_access_lock(iter->cpu_file);
8061 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8062 
8063 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8064 		struct page *page;
8065 		int r;
8066 
8067 		ref = kzalloc_obj(*ref);
8068 		if (!ref) {
8069 			ret = -ENOMEM;
8070 			break;
8071 		}
8072 
8073 		refcount_set(&ref->refcount, 1);
8074 		ref->buffer = iter->array_buffer->buffer;
8075 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8076 		if (IS_ERR(ref->page)) {
8077 			ret = PTR_ERR(ref->page);
8078 			ref->page = NULL;
8079 			kfree(ref);
8080 			break;
8081 		}
8082 		ref->cpu = iter->cpu_file;
8083 
8084 		r = ring_buffer_read_page(ref->buffer, ref->page,
8085 					  len, iter->cpu_file, 1);
8086 		if (r < 0) {
8087 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8088 						   ref->page);
8089 			kfree(ref);
8090 			break;
8091 		}
8092 
8093 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8094 
8095 		spd.pages[i] = page;
8096 		spd.partial[i].len = page_size;
8097 		spd.partial[i].offset = 0;
8098 		spd.partial[i].private = (unsigned long)ref;
8099 		spd.nr_pages++;
8100 		*ppos += page_size;
8101 
8102 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8103 	}
8104 
8105 	trace_access_unlock(iter->cpu_file);
8106 	spd.nr_pages = i;
8107 
8108 	/* did we read anything? */
8109 	if (!spd.nr_pages) {
8110 
8111 		if (ret)
8112 			goto out;
8113 
8114 		if (woken)
8115 			goto out;
8116 
8117 		ret = -EAGAIN;
8118 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8119 			goto out;
8120 
8121 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8122 		if (ret)
8123 			goto out;
8124 
8125 		/* No need to wait after waking up when tracing is off */
8126 		if (!tracer_tracing_is_on(iter->tr))
8127 			goto out;
8128 
8129 		/* Iterate one more time to collect any new data then exit */
8130 		woken = true;
8131 
8132 		goto again;
8133 	}
8134 
8135 	ret = splice_to_pipe(pipe, &spd);
8136 out:
8137 	splice_shrink_spd(&spd);
8138 
8139 	return ret;
8140 }
8141 
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8142 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8143 {
8144 	struct ftrace_buffer_info *info = file->private_data;
8145 	struct trace_iterator *iter = &info->iter;
8146 	int err;
8147 
8148 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8149 		if (!(file->f_flags & O_NONBLOCK)) {
8150 			err = ring_buffer_wait(iter->array_buffer->buffer,
8151 					       iter->cpu_file,
8152 					       iter->tr->buffer_percent,
8153 					       NULL, NULL);
8154 			if (err)
8155 				return err;
8156 		}
8157 
8158 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8159 						  iter->cpu_file);
8160 	} else if (cmd) {
8161 		return -ENOTTY;
8162 	}
8163 
8164 	/*
8165 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8166 	 * waiters
8167 	 */
8168 	guard(mutex)(&trace_types_lock);
8169 
8170 	/* Make sure the waiters see the new wait_index */
8171 	(void)atomic_fetch_inc_release(&iter->wait_index);
8172 
8173 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8174 
8175 	return 0;
8176 }
8177 
8178 #ifdef CONFIG_TRACER_SNAPSHOT
get_snapshot_map(struct trace_array * tr)8179 static int get_snapshot_map(struct trace_array *tr)
8180 {
8181 	int err = 0;
8182 
8183 	/*
8184 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8185 	 * take trace_types_lock. Instead use the specific
8186 	 * snapshot_trigger_lock.
8187 	 */
8188 	spin_lock(&tr->snapshot_trigger_lock);
8189 
8190 	if (tr->snapshot || tr->mapped == UINT_MAX)
8191 		err = -EBUSY;
8192 	else
8193 		tr->mapped++;
8194 
8195 	spin_unlock(&tr->snapshot_trigger_lock);
8196 
8197 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8198 	if (tr->mapped == 1)
8199 		synchronize_rcu();
8200 
8201 	return err;
8202 
8203 }
put_snapshot_map(struct trace_array * tr)8204 static void put_snapshot_map(struct trace_array *tr)
8205 {
8206 	spin_lock(&tr->snapshot_trigger_lock);
8207 	if (!WARN_ON(!tr->mapped))
8208 		tr->mapped--;
8209 	spin_unlock(&tr->snapshot_trigger_lock);
8210 }
8211 #else
get_snapshot_map(struct trace_array * tr)8212 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8213 static inline void put_snapshot_map(struct trace_array *tr) { }
8214 #endif
8215 
tracing_buffers_mmap_close(struct vm_area_struct * vma)8216 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8217 {
8218 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8219 	struct trace_iterator *iter = &info->iter;
8220 
8221 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8222 	put_snapshot_map(iter->tr);
8223 }
8224 
tracing_buffers_may_split(struct vm_area_struct * vma,unsigned long addr)8225 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
8226 {
8227 	/*
8228 	 * Trace buffer mappings require the complete buffer including
8229 	 * the meta page. Partial mappings are not supported.
8230 	 */
8231 	return -EINVAL;
8232 }
8233 
8234 static const struct vm_operations_struct tracing_buffers_vmops = {
8235 	.close		= tracing_buffers_mmap_close,
8236 	.may_split      = tracing_buffers_may_split,
8237 };
8238 
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8239 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8240 {
8241 	struct ftrace_buffer_info *info = filp->private_data;
8242 	struct trace_iterator *iter = &info->iter;
8243 	int ret = 0;
8244 
8245 	/* A memmap'ed and backup buffers are not supported for user space mmap */
8246 	if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
8247 		return -ENODEV;
8248 
8249 	ret = get_snapshot_map(iter->tr);
8250 	if (ret)
8251 		return ret;
8252 
8253 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8254 	if (ret)
8255 		put_snapshot_map(iter->tr);
8256 
8257 	vma->vm_ops = &tracing_buffers_vmops;
8258 
8259 	return ret;
8260 }
8261 
8262 static const struct file_operations tracing_buffers_fops = {
8263 	.open		= tracing_buffers_open,
8264 	.read		= tracing_buffers_read,
8265 	.poll		= tracing_buffers_poll,
8266 	.release	= tracing_buffers_release,
8267 	.flush		= tracing_buffers_flush,
8268 	.splice_read	= tracing_buffers_splice_read,
8269 	.unlocked_ioctl = tracing_buffers_ioctl,
8270 	.mmap		= tracing_buffers_mmap,
8271 };
8272 
8273 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8274 tracing_stats_read(struct file *filp, char __user *ubuf,
8275 		   size_t count, loff_t *ppos)
8276 {
8277 	struct inode *inode = file_inode(filp);
8278 	struct trace_array *tr = inode->i_private;
8279 	struct array_buffer *trace_buf = &tr->array_buffer;
8280 	int cpu = tracing_get_cpu(inode);
8281 	struct trace_seq *s;
8282 	unsigned long cnt;
8283 	unsigned long long t;
8284 	unsigned long usec_rem;
8285 
8286 	s = kmalloc_obj(*s);
8287 	if (!s)
8288 		return -ENOMEM;
8289 
8290 	trace_seq_init(s);
8291 
8292 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8293 	trace_seq_printf(s, "entries: %ld\n", cnt);
8294 
8295 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8296 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8297 
8298 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8299 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8300 
8301 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8302 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8303 
8304 	if (trace_clocks[tr->clock_id].in_ns) {
8305 		/* local or global for trace_clock */
8306 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8307 		usec_rem = do_div(t, USEC_PER_SEC);
8308 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8309 								t, usec_rem);
8310 
8311 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8312 		usec_rem = do_div(t, USEC_PER_SEC);
8313 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8314 	} else {
8315 		/* counter or tsc mode for trace_clock */
8316 		trace_seq_printf(s, "oldest event ts: %llu\n",
8317 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8318 
8319 		trace_seq_printf(s, "now ts: %llu\n",
8320 				ring_buffer_time_stamp(trace_buf->buffer));
8321 	}
8322 
8323 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8324 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8325 
8326 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8327 	trace_seq_printf(s, "read events: %ld\n", cnt);
8328 
8329 	count = simple_read_from_buffer(ubuf, count, ppos,
8330 					s->buffer, trace_seq_used(s));
8331 
8332 	kfree(s);
8333 
8334 	return count;
8335 }
8336 
8337 static const struct file_operations tracing_stats_fops = {
8338 	.open		= tracing_open_generic_tr,
8339 	.read		= tracing_stats_read,
8340 	.llseek		= generic_file_llseek,
8341 	.release	= tracing_release_generic_tr,
8342 };
8343 
8344 #ifdef CONFIG_DYNAMIC_FTRACE
8345 
8346 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8347 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8348 		  size_t cnt, loff_t *ppos)
8349 {
8350 	ssize_t ret;
8351 	char *buf;
8352 	int r;
8353 
8354 	/* 512 should be plenty to hold the amount needed */
8355 #define DYN_INFO_BUF_SIZE	512
8356 
8357 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8358 	if (!buf)
8359 		return -ENOMEM;
8360 
8361 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8362 		      "%ld pages:%ld groups: %ld\n"
8363 		      "ftrace boot update time = %llu (ns)\n"
8364 		      "ftrace module total update time = %llu (ns)\n",
8365 		      ftrace_update_tot_cnt,
8366 		      ftrace_number_of_pages,
8367 		      ftrace_number_of_groups,
8368 		      ftrace_update_time,
8369 		      ftrace_total_mod_time);
8370 
8371 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8372 	kfree(buf);
8373 	return ret;
8374 }
8375 
8376 static const struct file_operations tracing_dyn_info_fops = {
8377 	.open		= tracing_open_generic,
8378 	.read		= tracing_read_dyn_info,
8379 	.llseek		= generic_file_llseek,
8380 };
8381 #endif /* CONFIG_DYNAMIC_FTRACE */
8382 
8383 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8384 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8385 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8386 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8387 		void *data)
8388 {
8389 	tracing_snapshot_instance(tr);
8390 }
8391 
8392 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8393 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8394 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8395 		      void *data)
8396 {
8397 	struct ftrace_func_mapper *mapper = data;
8398 	long *count = NULL;
8399 
8400 	if (mapper)
8401 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8402 
8403 	if (count) {
8404 
8405 		if (*count <= 0)
8406 			return;
8407 
8408 		(*count)--;
8409 	}
8410 
8411 	tracing_snapshot_instance(tr);
8412 }
8413 
8414 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8415 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8416 		      struct ftrace_probe_ops *ops, void *data)
8417 {
8418 	struct ftrace_func_mapper *mapper = data;
8419 	long *count = NULL;
8420 
8421 	seq_printf(m, "%ps:", (void *)ip);
8422 
8423 	seq_puts(m, "snapshot");
8424 
8425 	if (mapper)
8426 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8427 
8428 	if (count)
8429 		seq_printf(m, ":count=%ld\n", *count);
8430 	else
8431 		seq_puts(m, ":unlimited\n");
8432 
8433 	return 0;
8434 }
8435 
8436 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8437 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8438 		     unsigned long ip, void *init_data, void **data)
8439 {
8440 	struct ftrace_func_mapper *mapper = *data;
8441 
8442 	if (!mapper) {
8443 		mapper = allocate_ftrace_func_mapper();
8444 		if (!mapper)
8445 			return -ENOMEM;
8446 		*data = mapper;
8447 	}
8448 
8449 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8450 }
8451 
8452 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8453 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8454 		     unsigned long ip, void *data)
8455 {
8456 	struct ftrace_func_mapper *mapper = data;
8457 
8458 	if (!ip) {
8459 		if (!mapper)
8460 			return;
8461 		free_ftrace_func_mapper(mapper, NULL);
8462 		return;
8463 	}
8464 
8465 	ftrace_func_mapper_remove_ip(mapper, ip);
8466 }
8467 
8468 static struct ftrace_probe_ops snapshot_probe_ops = {
8469 	.func			= ftrace_snapshot,
8470 	.print			= ftrace_snapshot_print,
8471 };
8472 
8473 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8474 	.func			= ftrace_count_snapshot,
8475 	.print			= ftrace_snapshot_print,
8476 	.init			= ftrace_snapshot_init,
8477 	.free			= ftrace_snapshot_free,
8478 };
8479 
8480 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8481 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8482 			       char *glob, char *cmd, char *param, int enable)
8483 {
8484 	struct ftrace_probe_ops *ops;
8485 	void *count = (void *)-1;
8486 	char *number;
8487 	int ret;
8488 
8489 	if (!tr)
8490 		return -ENODEV;
8491 
8492 	/* hash funcs only work with set_ftrace_filter */
8493 	if (!enable)
8494 		return -EINVAL;
8495 
8496 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8497 
8498 	if (glob[0] == '!') {
8499 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8500 		if (!ret)
8501 			tracing_disarm_snapshot(tr);
8502 
8503 		return ret;
8504 	}
8505 
8506 	if (!param)
8507 		goto out_reg;
8508 
8509 	number = strsep(&param, ":");
8510 
8511 	if (!strlen(number))
8512 		goto out_reg;
8513 
8514 	/*
8515 	 * We use the callback data field (which is a pointer)
8516 	 * as our counter.
8517 	 */
8518 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8519 	if (ret)
8520 		return ret;
8521 
8522  out_reg:
8523 	ret = tracing_arm_snapshot(tr);
8524 	if (ret < 0)
8525 		return ret;
8526 
8527 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8528 	if (ret < 0)
8529 		tracing_disarm_snapshot(tr);
8530 
8531 	return ret < 0 ? ret : 0;
8532 }
8533 
8534 static struct ftrace_func_command ftrace_snapshot_cmd = {
8535 	.name			= "snapshot",
8536 	.func			= ftrace_trace_snapshot_callback,
8537 };
8538 
register_snapshot_cmd(void)8539 static __init int register_snapshot_cmd(void)
8540 {
8541 	return register_ftrace_command(&ftrace_snapshot_cmd);
8542 }
8543 #else
register_snapshot_cmd(void)8544 static inline __init int register_snapshot_cmd(void) { return 0; }
8545 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8546 
tracing_get_dentry(struct trace_array * tr)8547 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8548 {
8549 	/* Top directory uses NULL as the parent */
8550 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8551 		return NULL;
8552 
8553 	if (WARN_ON(!tr->dir))
8554 		return ERR_PTR(-ENODEV);
8555 
8556 	/* All sub buffers have a descriptor */
8557 	return tr->dir;
8558 }
8559 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8560 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8561 {
8562 	struct dentry *d_tracer;
8563 
8564 	if (tr->percpu_dir)
8565 		return tr->percpu_dir;
8566 
8567 	d_tracer = tracing_get_dentry(tr);
8568 	if (IS_ERR(d_tracer))
8569 		return NULL;
8570 
8571 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8572 
8573 	MEM_FAIL(!tr->percpu_dir,
8574 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8575 
8576 	return tr->percpu_dir;
8577 }
8578 
8579 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8580 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8581 		      void *data, long cpu, const struct file_operations *fops)
8582 {
8583 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8584 
8585 	if (ret) /* See tracing_get_cpu() */
8586 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8587 	return ret;
8588 }
8589 
8590 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8591 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8592 {
8593 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8594 	struct dentry *d_cpu;
8595 	char cpu_dir[30]; /* 30 characters should be more than enough */
8596 
8597 	if (!d_percpu)
8598 		return;
8599 
8600 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8601 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8602 	if (!d_cpu) {
8603 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8604 		return;
8605 	}
8606 
8607 	/* per cpu trace_pipe */
8608 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8609 				tr, cpu, &tracing_pipe_fops);
8610 
8611 	/* per cpu trace */
8612 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8613 				tr, cpu, &tracing_fops);
8614 
8615 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8616 				tr, cpu, &tracing_buffers_fops);
8617 
8618 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8619 				tr, cpu, &tracing_stats_fops);
8620 
8621 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu,
8622 				tr, cpu, &tracing_entries_fops);
8623 
8624 	if (tr->range_addr_start)
8625 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8626 				      tr, cpu, &tracing_buffer_meta_fops);
8627 #ifdef CONFIG_TRACER_SNAPSHOT
8628 	if (!tr->range_addr_start) {
8629 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8630 				      tr, cpu, &snapshot_fops);
8631 
8632 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8633 				      tr, cpu, &snapshot_raw_fops);
8634 	}
8635 #endif
8636 }
8637 
8638 #ifdef CONFIG_FTRACE_SELFTEST
8639 /* Let selftest have access to static functions in this file */
8640 #include "trace_selftest.c"
8641 #endif
8642 
8643 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8644 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8645 			loff_t *ppos)
8646 {
8647 	struct trace_option_dentry *topt = filp->private_data;
8648 	char *buf;
8649 
8650 	if (topt->flags->val & topt->opt->bit)
8651 		buf = "1\n";
8652 	else
8653 		buf = "0\n";
8654 
8655 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8656 }
8657 
8658 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8659 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8660 			 loff_t *ppos)
8661 {
8662 	struct trace_option_dentry *topt = filp->private_data;
8663 	unsigned long val;
8664 	int ret;
8665 
8666 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8667 	if (ret)
8668 		return ret;
8669 
8670 	if (val != 0 && val != 1)
8671 		return -EINVAL;
8672 
8673 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8674 		guard(mutex)(&trace_types_lock);
8675 		ret = __set_tracer_option(topt->tr, topt->flags,
8676 					  topt->opt, !val);
8677 		if (ret)
8678 			return ret;
8679 	}
8680 
8681 	*ppos += cnt;
8682 
8683 	return cnt;
8684 }
8685 
tracing_open_options(struct inode * inode,struct file * filp)8686 static int tracing_open_options(struct inode *inode, struct file *filp)
8687 {
8688 	struct trace_option_dentry *topt = inode->i_private;
8689 	int ret;
8690 
8691 	ret = tracing_check_open_get_tr(topt->tr);
8692 	if (ret)
8693 		return ret;
8694 
8695 	filp->private_data = inode->i_private;
8696 	return 0;
8697 }
8698 
tracing_release_options(struct inode * inode,struct file * file)8699 static int tracing_release_options(struct inode *inode, struct file *file)
8700 {
8701 	struct trace_option_dentry *topt = file->private_data;
8702 
8703 	trace_array_put(topt->tr);
8704 	return 0;
8705 }
8706 
8707 static const struct file_operations trace_options_fops = {
8708 	.open = tracing_open_options,
8709 	.read = trace_options_read,
8710 	.write = trace_options_write,
8711 	.llseek	= generic_file_llseek,
8712 	.release = tracing_release_options,
8713 };
8714 
8715 /*
8716  * In order to pass in both the trace_array descriptor as well as the index
8717  * to the flag that the trace option file represents, the trace_array
8718  * has a character array of trace_flags_index[], which holds the index
8719  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8720  * The address of this character array is passed to the flag option file
8721  * read/write callbacks.
8722  *
8723  * In order to extract both the index and the trace_array descriptor,
8724  * get_tr_index() uses the following algorithm.
8725  *
8726  *   idx = *ptr;
8727  *
8728  * As the pointer itself contains the address of the index (remember
8729  * index[1] == 1).
8730  *
8731  * Then to get the trace_array descriptor, by subtracting that index
8732  * from the ptr, we get to the start of the index itself.
8733  *
8734  *   ptr - idx == &index[0]
8735  *
8736  * Then a simple container_of() from that pointer gets us to the
8737  * trace_array descriptor.
8738  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8739 static void get_tr_index(void *data, struct trace_array **ptr,
8740 			 unsigned int *pindex)
8741 {
8742 	*pindex = *(unsigned char *)data;
8743 
8744 	*ptr = container_of(data - *pindex, struct trace_array,
8745 			    trace_flags_index);
8746 }
8747 
8748 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8749 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8750 			loff_t *ppos)
8751 {
8752 	void *tr_index = filp->private_data;
8753 	struct trace_array *tr;
8754 	unsigned int index;
8755 	char *buf;
8756 
8757 	get_tr_index(tr_index, &tr, &index);
8758 
8759 	if (tr->trace_flags & (1ULL << index))
8760 		buf = "1\n";
8761 	else
8762 		buf = "0\n";
8763 
8764 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8765 }
8766 
8767 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8768 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8769 			 loff_t *ppos)
8770 {
8771 	void *tr_index = filp->private_data;
8772 	struct trace_array *tr;
8773 	unsigned int index;
8774 	unsigned long val;
8775 	int ret;
8776 
8777 	get_tr_index(tr_index, &tr, &index);
8778 
8779 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8780 	if (ret)
8781 		return ret;
8782 
8783 	if (val != 0 && val != 1)
8784 		return -EINVAL;
8785 
8786 	mutex_lock(&event_mutex);
8787 	mutex_lock(&trace_types_lock);
8788 	ret = set_tracer_flag(tr, 1ULL << index, val);
8789 	mutex_unlock(&trace_types_lock);
8790 	mutex_unlock(&event_mutex);
8791 
8792 	if (ret < 0)
8793 		return ret;
8794 
8795 	*ppos += cnt;
8796 
8797 	return cnt;
8798 }
8799 
8800 static const struct file_operations trace_options_core_fops = {
8801 	.open = tracing_open_generic,
8802 	.read = trace_options_core_read,
8803 	.write = trace_options_core_write,
8804 	.llseek = generic_file_llseek,
8805 };
8806 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8807 struct dentry *trace_create_file(const char *name,
8808 				 umode_t mode,
8809 				 struct dentry *parent,
8810 				 void *data,
8811 				 const struct file_operations *fops)
8812 {
8813 	struct dentry *ret;
8814 
8815 	ret = tracefs_create_file(name, mode, parent, data, fops);
8816 	if (!ret)
8817 		pr_warn("Could not create tracefs '%s' entry\n", name);
8818 
8819 	return ret;
8820 }
8821 
8822 
trace_options_init_dentry(struct trace_array * tr)8823 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8824 {
8825 	struct dentry *d_tracer;
8826 
8827 	if (tr->options)
8828 		return tr->options;
8829 
8830 	d_tracer = tracing_get_dentry(tr);
8831 	if (IS_ERR(d_tracer))
8832 		return NULL;
8833 
8834 	tr->options = tracefs_create_dir("options", d_tracer);
8835 	if (!tr->options) {
8836 		pr_warn("Could not create tracefs directory 'options'\n");
8837 		return NULL;
8838 	}
8839 
8840 	return tr->options;
8841 }
8842 
8843 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8844 create_trace_option_file(struct trace_array *tr,
8845 			 struct trace_option_dentry *topt,
8846 			 struct tracer_flags *flags,
8847 			 struct tracer_opt *opt)
8848 {
8849 	struct dentry *t_options;
8850 
8851 	t_options = trace_options_init_dentry(tr);
8852 	if (!t_options)
8853 		return;
8854 
8855 	topt->flags = flags;
8856 	topt->opt = opt;
8857 	topt->tr = tr;
8858 
8859 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8860 					t_options, topt, &trace_options_fops);
8861 }
8862 
8863 static int
create_trace_option_files(struct trace_array * tr,struct tracer * tracer,struct tracer_flags * flags)8864 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
8865 			  struct tracer_flags *flags)
8866 {
8867 	struct trace_option_dentry *topts;
8868 	struct trace_options *tr_topts;
8869 	struct tracer_opt *opts;
8870 	int cnt;
8871 
8872 	if (!flags || !flags->opts)
8873 		return 0;
8874 
8875 	opts = flags->opts;
8876 
8877 	for (cnt = 0; opts[cnt].name; cnt++)
8878 		;
8879 
8880 	topts = kzalloc_objs(*topts, cnt + 1);
8881 	if (!topts)
8882 		return 0;
8883 
8884 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8885 			    GFP_KERNEL);
8886 	if (!tr_topts) {
8887 		kfree(topts);
8888 		return -ENOMEM;
8889 	}
8890 
8891 	tr->topts = tr_topts;
8892 	tr->topts[tr->nr_topts].tracer = tracer;
8893 	tr->topts[tr->nr_topts].topts = topts;
8894 	tr->nr_topts++;
8895 
8896 	for (cnt = 0; opts[cnt].name; cnt++) {
8897 		create_trace_option_file(tr, &topts[cnt], flags,
8898 					 &opts[cnt]);
8899 		MEM_FAIL(topts[cnt].entry == NULL,
8900 			  "Failed to create trace option: %s",
8901 			  opts[cnt].name);
8902 	}
8903 	return 0;
8904 }
8905 
get_global_flags_val(struct tracer * tracer)8906 static int get_global_flags_val(struct tracer *tracer)
8907 {
8908 	struct tracers *t;
8909 
8910 	list_for_each_entry(t, &global_trace.tracers, list) {
8911 		if (t->tracer != tracer)
8912 			continue;
8913 		if (!t->flags)
8914 			return -1;
8915 		return t->flags->val;
8916 	}
8917 	return -1;
8918 }
8919 
add_tracer_options(struct trace_array * tr,struct tracers * t)8920 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
8921 {
8922 	struct tracer *tracer = t->tracer;
8923 	struct tracer_flags *flags = t->flags ?: tracer->flags;
8924 
8925 	if (!flags)
8926 		return 0;
8927 
8928 	/* Only add tracer options after update_tracer_options finish */
8929 	if (!tracer_options_updated)
8930 		return 0;
8931 
8932 	return create_trace_option_files(tr, tracer, flags);
8933 }
8934 
add_tracer(struct trace_array * tr,struct tracer * tracer)8935 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
8936 {
8937 	struct tracer_flags *flags;
8938 	struct tracers *t;
8939 	int ret;
8940 
8941 	/* Only enable if the directory has been created already. */
8942 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
8943 		return 0;
8944 
8945 	/*
8946 	 * If this is an instance, only create flags for tracers
8947 	 * the instance may have.
8948 	 */
8949 	if (!trace_ok_for_array(tracer, tr))
8950 		return 0;
8951 
8952 	t = kmalloc_obj(*t);
8953 	if (!t)
8954 		return -ENOMEM;
8955 
8956 	t->tracer = tracer;
8957 	t->flags = NULL;
8958 	list_add(&t->list, &tr->tracers);
8959 
8960 	flags = tracer->flags;
8961 	if (!flags) {
8962 		if (!tracer->default_flags)
8963 			return 0;
8964 
8965 		/*
8966 		 * If the tracer defines default flags, it means the flags are
8967 		 * per trace instance.
8968 		 */
8969 		flags = kmalloc_obj(*flags);
8970 		if (!flags)
8971 			return -ENOMEM;
8972 
8973 		*flags = *tracer->default_flags;
8974 		flags->trace = tracer;
8975 
8976 		t->flags = flags;
8977 
8978 		/* If this is an instance, inherit the global_trace flags */
8979 		if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
8980 			int val = get_global_flags_val(tracer);
8981 			if (!WARN_ON_ONCE(val < 0))
8982 				flags->val = val;
8983 		}
8984 	}
8985 
8986 	ret = add_tracer_options(tr, t);
8987 	if (ret < 0) {
8988 		list_del(&t->list);
8989 		kfree(t->flags);
8990 		kfree(t);
8991 	}
8992 
8993 	return ret;
8994 }
8995 
8996 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8997 create_trace_option_core_file(struct trace_array *tr,
8998 			      const char *option, long index)
8999 {
9000 	struct dentry *t_options;
9001 
9002 	t_options = trace_options_init_dentry(tr);
9003 	if (!t_options)
9004 		return NULL;
9005 
9006 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9007 				 (void *)&tr->trace_flags_index[index],
9008 				 &trace_options_core_fops);
9009 }
9010 
create_trace_options_dir(struct trace_array * tr)9011 static void create_trace_options_dir(struct trace_array *tr)
9012 {
9013 	struct dentry *t_options;
9014 	bool top_level = tr == &global_trace;
9015 	int i;
9016 
9017 	t_options = trace_options_init_dentry(tr);
9018 	if (!t_options)
9019 		return;
9020 
9021 	for (i = 0; trace_options[i]; i++) {
9022 		if (top_level ||
9023 		    !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
9024 			create_trace_option_core_file(tr, trace_options[i], i);
9025 		}
9026 	}
9027 }
9028 
9029 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9030 rb_simple_read(struct file *filp, char __user *ubuf,
9031 	       size_t cnt, loff_t *ppos)
9032 {
9033 	struct trace_array *tr = filp->private_data;
9034 	char buf[64];
9035 	int r;
9036 
9037 	r = tracer_tracing_is_on(tr);
9038 	r = sprintf(buf, "%d\n", r);
9039 
9040 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9041 }
9042 
9043 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9044 rb_simple_write(struct file *filp, const char __user *ubuf,
9045 		size_t cnt, loff_t *ppos)
9046 {
9047 	struct trace_array *tr = filp->private_data;
9048 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9049 	unsigned long val;
9050 	int ret;
9051 
9052 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9053 	if (ret)
9054 		return ret;
9055 
9056 	if (buffer) {
9057 		guard(mutex)(&trace_types_lock);
9058 		if (!!val == tracer_tracing_is_on(tr)) {
9059 			val = 0; /* do nothing */
9060 		} else if (val) {
9061 			tracer_tracing_on(tr);
9062 			if (tr->current_trace->start)
9063 				tr->current_trace->start(tr);
9064 		} else {
9065 			tracer_tracing_off(tr);
9066 			if (tr->current_trace->stop)
9067 				tr->current_trace->stop(tr);
9068 			/* Wake up any waiters */
9069 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9070 		}
9071 	}
9072 
9073 	(*ppos)++;
9074 
9075 	return cnt;
9076 }
9077 
9078 static const struct file_operations rb_simple_fops = {
9079 	.open		= tracing_open_generic_tr,
9080 	.read		= rb_simple_read,
9081 	.write		= rb_simple_write,
9082 	.release	= tracing_release_generic_tr,
9083 	.llseek		= default_llseek,
9084 };
9085 
9086 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9087 buffer_percent_read(struct file *filp, char __user *ubuf,
9088 		    size_t cnt, loff_t *ppos)
9089 {
9090 	struct trace_array *tr = filp->private_data;
9091 	char buf[64];
9092 	int r;
9093 
9094 	r = tr->buffer_percent;
9095 	r = sprintf(buf, "%d\n", r);
9096 
9097 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9098 }
9099 
9100 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9101 buffer_percent_write(struct file *filp, const char __user *ubuf,
9102 		     size_t cnt, loff_t *ppos)
9103 {
9104 	struct trace_array *tr = filp->private_data;
9105 	unsigned long val;
9106 	int ret;
9107 
9108 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9109 	if (ret)
9110 		return ret;
9111 
9112 	if (val > 100)
9113 		return -EINVAL;
9114 
9115 	tr->buffer_percent = val;
9116 
9117 	(*ppos)++;
9118 
9119 	return cnt;
9120 }
9121 
9122 static const struct file_operations buffer_percent_fops = {
9123 	.open		= tracing_open_generic_tr,
9124 	.read		= buffer_percent_read,
9125 	.write		= buffer_percent_write,
9126 	.release	= tracing_release_generic_tr,
9127 	.llseek		= default_llseek,
9128 };
9129 
9130 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9131 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9132 {
9133 	struct trace_array *tr = filp->private_data;
9134 	size_t size;
9135 	char buf[64];
9136 	int order;
9137 	int r;
9138 
9139 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9140 	size = (PAGE_SIZE << order) / 1024;
9141 
9142 	r = sprintf(buf, "%zd\n", size);
9143 
9144 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9145 }
9146 
9147 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9148 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9149 			 size_t cnt, loff_t *ppos)
9150 {
9151 	struct trace_array *tr = filp->private_data;
9152 	unsigned long val;
9153 	int old_order;
9154 	int order;
9155 	int pages;
9156 	int ret;
9157 
9158 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9159 	if (ret)
9160 		return ret;
9161 
9162 	val *= 1024; /* value passed in is in KB */
9163 
9164 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9165 	order = fls(pages - 1);
9166 
9167 	/* limit between 1 and 128 system pages */
9168 	if (order < 0 || order > 7)
9169 		return -EINVAL;
9170 
9171 	/* Do not allow tracing while changing the order of the ring buffer */
9172 	tracing_stop_tr(tr);
9173 
9174 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9175 	if (old_order == order)
9176 		goto out;
9177 
9178 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9179 	if (ret)
9180 		goto out;
9181 
9182 #ifdef CONFIG_TRACER_SNAPSHOT
9183 
9184 	if (!tr->allocated_snapshot)
9185 		goto out_max;
9186 
9187 	ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
9188 	if (ret) {
9189 		/* Put back the old order */
9190 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9191 		if (WARN_ON_ONCE(cnt)) {
9192 			/*
9193 			 * AARGH! We are left with different orders!
9194 			 * The max buffer is our "snapshot" buffer.
9195 			 * When a tracer needs a snapshot (one of the
9196 			 * latency tracers), it swaps the max buffer
9197 			 * with the saved snap shot. We succeeded to
9198 			 * update the order of the main buffer, but failed to
9199 			 * update the order of the max buffer. But when we tried
9200 			 * to reset the main buffer to the original size, we
9201 			 * failed there too. This is very unlikely to
9202 			 * happen, but if it does, warn and kill all
9203 			 * tracing.
9204 			 */
9205 			tracing_disabled = 1;
9206 		}
9207 		goto out;
9208 	}
9209  out_max:
9210 #endif
9211 	(*ppos)++;
9212  out:
9213 	if (ret)
9214 		cnt = ret;
9215 	tracing_start_tr(tr);
9216 	return cnt;
9217 }
9218 
9219 static const struct file_operations buffer_subbuf_size_fops = {
9220 	.open		= tracing_open_generic_tr,
9221 	.read		= buffer_subbuf_size_read,
9222 	.write		= buffer_subbuf_size_write,
9223 	.release	= tracing_release_generic_tr,
9224 	.llseek		= default_llseek,
9225 };
9226 
9227 static struct dentry *trace_instance_dir;
9228 
9229 static void
9230 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9231 
9232 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)9233 static int make_mod_delta(struct module *mod, void *data)
9234 {
9235 	struct trace_module_delta *module_delta;
9236 	struct trace_scratch *tscratch;
9237 	struct trace_mod_entry *entry;
9238 	struct trace_array *tr = data;
9239 	int i;
9240 
9241 	tscratch = tr->scratch;
9242 	module_delta = READ_ONCE(tr->module_delta);
9243 	for (i = 0; i < tscratch->nr_entries; i++) {
9244 		entry = &tscratch->entries[i];
9245 		if (strcmp(mod->name, entry->mod_name))
9246 			continue;
9247 		if (mod->state == MODULE_STATE_GOING)
9248 			module_delta->delta[i] = 0;
9249 		else
9250 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9251 						 - entry->mod_addr;
9252 		break;
9253 	}
9254 	return 0;
9255 }
9256 #else
make_mod_delta(struct module * mod,void * data)9257 static int make_mod_delta(struct module *mod, void *data)
9258 {
9259 	return 0;
9260 }
9261 #endif
9262 
mod_addr_comp(const void * a,const void * b,const void * data)9263 static int mod_addr_comp(const void *a, const void *b, const void *data)
9264 {
9265 	const struct trace_mod_entry *e1 = a;
9266 	const struct trace_mod_entry *e2 = b;
9267 
9268 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9269 }
9270 
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)9271 static void setup_trace_scratch(struct trace_array *tr,
9272 				struct trace_scratch *tscratch, unsigned int size)
9273 {
9274 	struct trace_module_delta *module_delta;
9275 	struct trace_mod_entry *entry;
9276 	int i, nr_entries;
9277 
9278 	if (!tscratch)
9279 		return;
9280 
9281 	tr->scratch = tscratch;
9282 	tr->scratch_size = size;
9283 
9284 	if (tscratch->text_addr)
9285 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9286 
9287 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9288 		goto reset;
9289 
9290 	/* Check if each module name is a valid string */
9291 	for (i = 0; i < tscratch->nr_entries; i++) {
9292 		int n;
9293 
9294 		entry = &tscratch->entries[i];
9295 
9296 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9297 			if (entry->mod_name[n] == '\0')
9298 				break;
9299 			if (!isprint(entry->mod_name[n]))
9300 				goto reset;
9301 		}
9302 		if (n == MODULE_NAME_LEN)
9303 			goto reset;
9304 	}
9305 
9306 	/* Sort the entries so that we can find appropriate module from address. */
9307 	nr_entries = tscratch->nr_entries;
9308 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9309 	       mod_addr_comp, NULL, NULL);
9310 
9311 	if (IS_ENABLED(CONFIG_MODULES)) {
9312 		module_delta = kzalloc_flex(*module_delta, delta, nr_entries);
9313 		if (!module_delta) {
9314 			pr_info("module_delta allocation failed. Not able to decode module address.");
9315 			goto reset;
9316 		}
9317 		init_rcu_head(&module_delta->rcu);
9318 	} else
9319 		module_delta = NULL;
9320 	WRITE_ONCE(tr->module_delta, module_delta);
9321 
9322 	/* Scan modules to make text delta for modules. */
9323 	module_for_each_mod(make_mod_delta, tr);
9324 
9325 	/* Set trace_clock as the same of the previous boot. */
9326 	if (tscratch->clock_id != tr->clock_id) {
9327 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9328 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9329 			pr_info("the previous trace_clock info is not valid.");
9330 			goto reset;
9331 		}
9332 	}
9333 	return;
9334  reset:
9335 	/* Invalid trace modules */
9336 	memset(tscratch, 0, size);
9337 }
9338 
9339 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9340 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9341 {
9342 	enum ring_buffer_flags rb_flags;
9343 	struct trace_scratch *tscratch;
9344 	unsigned int scratch_size = 0;
9345 
9346 	rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
9347 
9348 	buf->tr = tr;
9349 
9350 	if (tr->range_addr_start && tr->range_addr_size) {
9351 		/* Add scratch buffer to handle 128 modules */
9352 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9353 						      tr->range_addr_start,
9354 						      tr->range_addr_size,
9355 						      struct_size(tscratch, entries, 128));
9356 
9357 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9358 		setup_trace_scratch(tr, tscratch, scratch_size);
9359 
9360 		/*
9361 		 * This is basically the same as a mapped buffer,
9362 		 * with the same restrictions.
9363 		 */
9364 		tr->mapped++;
9365 	} else {
9366 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9367 	}
9368 	if (!buf->buffer)
9369 		return -ENOMEM;
9370 
9371 	buf->data = alloc_percpu(struct trace_array_cpu);
9372 	if (!buf->data) {
9373 		ring_buffer_free(buf->buffer);
9374 		buf->buffer = NULL;
9375 		return -ENOMEM;
9376 	}
9377 
9378 	/* Allocate the first page for all buffers */
9379 	set_buffer_entries(&tr->array_buffer,
9380 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9381 
9382 	return 0;
9383 }
9384 
free_trace_buffer(struct array_buffer * buf)9385 static void free_trace_buffer(struct array_buffer *buf)
9386 {
9387 	if (buf->buffer) {
9388 		ring_buffer_free(buf->buffer);
9389 		buf->buffer = NULL;
9390 		free_percpu(buf->data);
9391 		buf->data = NULL;
9392 	}
9393 }
9394 
allocate_trace_buffers(struct trace_array * tr,int size)9395 static int allocate_trace_buffers(struct trace_array *tr, int size)
9396 {
9397 	int ret;
9398 
9399 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9400 	if (ret)
9401 		return ret;
9402 
9403 #ifdef CONFIG_TRACER_SNAPSHOT
9404 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9405 	if (tr->range_addr_start)
9406 		return 0;
9407 
9408 	ret = allocate_trace_buffer(tr, &tr->snapshot_buffer,
9409 				    allocate_snapshot ? size : 1);
9410 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9411 		free_trace_buffer(&tr->array_buffer);
9412 		return -ENOMEM;
9413 	}
9414 	tr->allocated_snapshot = allocate_snapshot;
9415 
9416 	allocate_snapshot = false;
9417 #endif
9418 
9419 	return 0;
9420 }
9421 
free_trace_buffers(struct trace_array * tr)9422 static void free_trace_buffers(struct trace_array *tr)
9423 {
9424 	if (!tr)
9425 		return;
9426 
9427 	free_trace_buffer(&tr->array_buffer);
9428 	kfree(tr->module_delta);
9429 
9430 #ifdef CONFIG_TRACER_SNAPSHOT
9431 	free_trace_buffer(&tr->snapshot_buffer);
9432 #endif
9433 }
9434 
init_trace_flags_index(struct trace_array * tr)9435 static void init_trace_flags_index(struct trace_array *tr)
9436 {
9437 	int i;
9438 
9439 	/* Used by the trace options files */
9440 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9441 		tr->trace_flags_index[i] = i;
9442 }
9443 
__update_tracer(struct trace_array * tr)9444 static int __update_tracer(struct trace_array *tr)
9445 {
9446 	struct tracer *t;
9447 	int ret = 0;
9448 
9449 	for (t = trace_types; t && !ret; t = t->next)
9450 		ret = add_tracer(tr, t);
9451 
9452 	return ret;
9453 }
9454 
__update_tracer_options(struct trace_array * tr)9455 static __init int __update_tracer_options(struct trace_array *tr)
9456 {
9457 	struct tracers *t;
9458 	int ret = 0;
9459 
9460 	list_for_each_entry(t, &tr->tracers, list) {
9461 		ret = add_tracer_options(tr, t);
9462 		if (ret < 0)
9463 			break;
9464 	}
9465 
9466 	return ret;
9467 }
9468 
update_tracer_options(void)9469 static __init void update_tracer_options(void)
9470 {
9471 	struct trace_array *tr;
9472 
9473 	guard(mutex)(&trace_types_lock);
9474 	tracer_options_updated = true;
9475 	list_for_each_entry(tr, &ftrace_trace_arrays, list)
9476 		__update_tracer_options(tr);
9477 }
9478 
9479 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9480 struct trace_array *trace_array_find(const char *instance)
9481 {
9482 	struct trace_array *tr, *found = NULL;
9483 
9484 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9485 		if (tr->name && strcmp(tr->name, instance) == 0) {
9486 			found = tr;
9487 			break;
9488 		}
9489 	}
9490 
9491 	return found;
9492 }
9493 
trace_array_find_get(const char * instance)9494 struct trace_array *trace_array_find_get(const char *instance)
9495 {
9496 	struct trace_array *tr;
9497 
9498 	guard(mutex)(&trace_types_lock);
9499 	tr = trace_array_find(instance);
9500 	if (tr)
9501 		tr->ref++;
9502 
9503 	return tr;
9504 }
9505 
trace_array_create_dir(struct trace_array * tr)9506 static int trace_array_create_dir(struct trace_array *tr)
9507 {
9508 	int ret;
9509 
9510 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9511 	if (!tr->dir)
9512 		return -EINVAL;
9513 
9514 	ret = event_trace_add_tracer(tr->dir, tr);
9515 	if (ret) {
9516 		tracefs_remove(tr->dir);
9517 		return ret;
9518 	}
9519 
9520 	init_tracer_tracefs(tr, tr->dir);
9521 	ret = __update_tracer(tr);
9522 	if (ret) {
9523 		event_trace_del_tracer(tr);
9524 		tracefs_remove(tr->dir);
9525 		return ret;
9526 	}
9527 	return 0;
9528 }
9529 
9530 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9531 trace_array_create_systems(const char *name, const char *systems,
9532 			   unsigned long range_addr_start,
9533 			   unsigned long range_addr_size)
9534 {
9535 	struct trace_array *tr;
9536 	int ret;
9537 
9538 	ret = -ENOMEM;
9539 	tr = kzalloc_obj(*tr);
9540 	if (!tr)
9541 		return ERR_PTR(ret);
9542 
9543 	tr->name = kstrdup(name, GFP_KERNEL);
9544 	if (!tr->name)
9545 		goto out_free_tr;
9546 
9547 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9548 		goto out_free_tr;
9549 
9550 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9551 		goto out_free_tr;
9552 
9553 	if (systems) {
9554 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9555 		if (!tr->system_names)
9556 			goto out_free_tr;
9557 	}
9558 
9559 	/* Only for boot up memory mapped ring buffers */
9560 	tr->range_addr_start = range_addr_start;
9561 	tr->range_addr_size = range_addr_size;
9562 
9563 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9564 
9565 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9566 
9567 	raw_spin_lock_init(&tr->start_lock);
9568 
9569 	tr->syscall_buf_sz = global_trace.syscall_buf_sz;
9570 
9571 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9572 #ifdef CONFIG_TRACER_SNAPSHOT
9573 	spin_lock_init(&tr->snapshot_trigger_lock);
9574 #endif
9575 	tr->current_trace = &nop_trace;
9576 	tr->current_trace_flags = nop_trace.flags;
9577 
9578 	INIT_LIST_HEAD(&tr->systems);
9579 	INIT_LIST_HEAD(&tr->events);
9580 	INIT_LIST_HEAD(&tr->hist_vars);
9581 	INIT_LIST_HEAD(&tr->err_log);
9582 	INIT_LIST_HEAD(&tr->tracers);
9583 	INIT_LIST_HEAD(&tr->marker_list);
9584 
9585 #ifdef CONFIG_MODULES
9586 	INIT_LIST_HEAD(&tr->mod_events);
9587 #endif
9588 
9589 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9590 		goto out_free_tr;
9591 
9592 	/* The ring buffer is defaultly expanded */
9593 	trace_set_ring_buffer_expanded(tr);
9594 
9595 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9596 		goto out_free_tr;
9597 
9598 	ftrace_init_trace_array(tr);
9599 
9600 	init_trace_flags_index(tr);
9601 
9602 	if (trace_instance_dir) {
9603 		ret = trace_array_create_dir(tr);
9604 		if (ret)
9605 			goto out_free_tr;
9606 	} else
9607 		__trace_early_add_events(tr);
9608 
9609 	list_add(&tr->list, &ftrace_trace_arrays);
9610 
9611 	tr->ref++;
9612 
9613 	return tr;
9614 
9615  out_free_tr:
9616 	ftrace_free_ftrace_ops(tr);
9617 	free_trace_buffers(tr);
9618 	free_cpumask_var(tr->pipe_cpumask);
9619 	free_cpumask_var(tr->tracing_cpumask);
9620 	kfree_const(tr->system_names);
9621 	kfree(tr->range_name);
9622 	kfree(tr->name);
9623 	kfree(tr);
9624 
9625 	return ERR_PTR(ret);
9626 }
9627 
trace_array_create(const char * name)9628 static struct trace_array *trace_array_create(const char *name)
9629 {
9630 	return trace_array_create_systems(name, NULL, 0, 0);
9631 }
9632 
instance_mkdir(const char * name)9633 static int instance_mkdir(const char *name)
9634 {
9635 	struct trace_array *tr;
9636 	int ret;
9637 
9638 	guard(mutex)(&event_mutex);
9639 	guard(mutex)(&trace_types_lock);
9640 
9641 	ret = -EEXIST;
9642 	if (trace_array_find(name))
9643 		return -EEXIST;
9644 
9645 	tr = trace_array_create(name);
9646 
9647 	ret = PTR_ERR_OR_ZERO(tr);
9648 
9649 	return ret;
9650 }
9651 
9652 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)9653 static u64 map_pages(unsigned long start, unsigned long size)
9654 {
9655 	unsigned long vmap_start, vmap_end;
9656 	struct vm_struct *area;
9657 	int ret;
9658 
9659 	area = get_vm_area(size, VM_IOREMAP);
9660 	if (!area)
9661 		return 0;
9662 
9663 	vmap_start = (unsigned long) area->addr;
9664 	vmap_end = vmap_start + size;
9665 
9666 	ret = vmap_page_range(vmap_start, vmap_end,
9667 			      start, pgprot_nx(PAGE_KERNEL));
9668 	if (ret < 0) {
9669 		free_vm_area(area);
9670 		return 0;
9671 	}
9672 
9673 	return (u64)vmap_start;
9674 }
9675 #else
map_pages(unsigned long start,unsigned long size)9676 static inline u64 map_pages(unsigned long start, unsigned long size)
9677 {
9678 	return 0;
9679 }
9680 #endif
9681 
9682 /**
9683  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9684  * @name: The name of the trace array to be looked up/created.
9685  * @systems: A list of systems to create event directories for (NULL for all)
9686  *
9687  * Returns pointer to trace array with given name.
9688  * NULL, if it cannot be created.
9689  *
9690  * NOTE: This function increments the reference counter associated with the
9691  * trace array returned. This makes sure it cannot be freed while in use.
9692  * Use trace_array_put() once the trace array is no longer needed.
9693  * If the trace_array is to be freed, trace_array_destroy() needs to
9694  * be called after the trace_array_put(), or simply let user space delete
9695  * it from the tracefs instances directory. But until the
9696  * trace_array_put() is called, user space can not delete it.
9697  *
9698  */
trace_array_get_by_name(const char * name,const char * systems)9699 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9700 {
9701 	struct trace_array *tr;
9702 
9703 	guard(mutex)(&event_mutex);
9704 	guard(mutex)(&trace_types_lock);
9705 
9706 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9707 		if (tr->name && strcmp(tr->name, name) == 0) {
9708 			tr->ref++;
9709 			return tr;
9710 		}
9711 	}
9712 
9713 	tr = trace_array_create_systems(name, systems, 0, 0);
9714 
9715 	if (IS_ERR(tr))
9716 		tr = NULL;
9717 	else
9718 		tr->ref++;
9719 
9720 	return tr;
9721 }
9722 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9723 
__remove_instance(struct trace_array * tr)9724 static int __remove_instance(struct trace_array *tr)
9725 {
9726 	int i;
9727 
9728 	/* Reference counter for a newly created trace array = 1. */
9729 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9730 		return -EBUSY;
9731 
9732 	list_del(&tr->list);
9733 
9734 	/* Disable all the flags that were enabled coming in */
9735 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9736 		if ((1ULL << i) & ZEROED_TRACE_FLAGS)
9737 			set_tracer_flag(tr, 1ULL << i, 0);
9738 	}
9739 
9740 	if (printk_trace == tr)
9741 		update_printk_trace(&global_trace);
9742 
9743 	if (update_marker_trace(tr, 0))
9744 		synchronize_rcu();
9745 
9746 	tracing_set_nop(tr);
9747 	clear_ftrace_function_probes(tr);
9748 	event_trace_del_tracer(tr);
9749 	ftrace_clear_pids(tr);
9750 	ftrace_destroy_function_files(tr);
9751 	tracefs_remove(tr->dir);
9752 	free_percpu(tr->last_func_repeats);
9753 	free_trace_buffers(tr);
9754 	clear_tracing_err_log(tr);
9755 	free_tracers(tr);
9756 
9757 	if (tr->range_name) {
9758 		reserve_mem_release_by_name(tr->range_name);
9759 		kfree(tr->range_name);
9760 	}
9761 	if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
9762 		vfree((void *)tr->range_addr_start);
9763 
9764 	for (i = 0; i < tr->nr_topts; i++) {
9765 		kfree(tr->topts[i].topts);
9766 	}
9767 	kfree(tr->topts);
9768 
9769 	free_cpumask_var(tr->pipe_cpumask);
9770 	free_cpumask_var(tr->tracing_cpumask);
9771 	kfree_const(tr->system_names);
9772 	kfree(tr->name);
9773 	kfree(tr);
9774 
9775 	return 0;
9776 }
9777 
trace_array_destroy(struct trace_array * this_tr)9778 int trace_array_destroy(struct trace_array *this_tr)
9779 {
9780 	struct trace_array *tr;
9781 
9782 	if (!this_tr)
9783 		return -EINVAL;
9784 
9785 	guard(mutex)(&event_mutex);
9786 	guard(mutex)(&trace_types_lock);
9787 
9788 
9789 	/* Making sure trace array exists before destroying it. */
9790 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9791 		if (tr == this_tr)
9792 			return __remove_instance(tr);
9793 	}
9794 
9795 	return -ENODEV;
9796 }
9797 EXPORT_SYMBOL_GPL(trace_array_destroy);
9798 
instance_rmdir(const char * name)9799 static int instance_rmdir(const char *name)
9800 {
9801 	struct trace_array *tr;
9802 
9803 	guard(mutex)(&event_mutex);
9804 	guard(mutex)(&trace_types_lock);
9805 
9806 	tr = trace_array_find(name);
9807 	if (!tr)
9808 		return -ENODEV;
9809 
9810 	return __remove_instance(tr);
9811 }
9812 
create_trace_instances(struct dentry * d_tracer)9813 static __init void create_trace_instances(struct dentry *d_tracer)
9814 {
9815 	struct trace_array *tr;
9816 
9817 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9818 							 instance_mkdir,
9819 							 instance_rmdir);
9820 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9821 		return;
9822 
9823 	guard(mutex)(&event_mutex);
9824 	guard(mutex)(&trace_types_lock);
9825 
9826 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9827 		if (!tr->name)
9828 			continue;
9829 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9830 			     "Failed to create instance directory\n"))
9831 			return;
9832 	}
9833 }
9834 
9835 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9836 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9837 {
9838 	int cpu;
9839 
9840 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9841 			tr, &show_traces_fops);
9842 
9843 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9844 			tr, &set_tracer_fops);
9845 
9846 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9847 			  tr, &tracing_cpumask_fops);
9848 
9849 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9850 			  tr, &tracing_iter_fops);
9851 
9852 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9853 			  tr, &tracing_fops);
9854 
9855 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9856 			  tr, &tracing_pipe_fops);
9857 
9858 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9859 			  tr, &tracing_entries_fops);
9860 
9861 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9862 			  tr, &tracing_total_entries_fops);
9863 
9864 	trace_create_file("free_buffer", 0200, d_tracer,
9865 			  tr, &tracing_free_buffer_fops);
9866 
9867 	trace_create_file("trace_marker", 0220, d_tracer,
9868 			  tr, &tracing_mark_fops);
9869 
9870 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9871 
9872 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9873 			  tr, &tracing_mark_raw_fops);
9874 
9875 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9876 			  &trace_clock_fops);
9877 
9878 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9879 			  tr, &rb_simple_fops);
9880 
9881 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9882 			  &trace_time_stamp_mode_fops);
9883 
9884 	tr->buffer_percent = 50;
9885 
9886 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9887 			tr, &buffer_percent_fops);
9888 
9889 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9890 			  tr, &buffer_subbuf_size_fops);
9891 
9892 	trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
9893 			 tr, &tracing_syscall_buf_fops);
9894 
9895 	create_trace_options_dir(tr);
9896 
9897 	trace_create_maxlat_file(tr, d_tracer);
9898 
9899 	if (ftrace_create_function_files(tr, d_tracer))
9900 		MEM_FAIL(1, "Could not allocate function filter files");
9901 
9902 	if (tr->range_addr_start) {
9903 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9904 				  tr, &last_boot_fops);
9905 #ifdef CONFIG_TRACER_SNAPSHOT
9906 	} else {
9907 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9908 				  tr, &snapshot_fops);
9909 #endif
9910 	}
9911 
9912 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9913 			  tr, &tracing_err_log_fops);
9914 
9915 	for_each_tracing_cpu(cpu)
9916 		tracing_init_tracefs_percpu(tr, cpu);
9917 
9918 	ftrace_init_tracefs(tr, d_tracer);
9919 }
9920 
9921 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)9922 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9923 {
9924 	struct vfsmount *mnt;
9925 	struct file_system_type *type;
9926 	struct fs_context *fc;
9927 	int ret;
9928 
9929 	/*
9930 	 * To maintain backward compatibility for tools that mount
9931 	 * debugfs to get to the tracing facility, tracefs is automatically
9932 	 * mounted to the debugfs/tracing directory.
9933 	 */
9934 	type = get_fs_type("tracefs");
9935 	if (!type)
9936 		return NULL;
9937 
9938 	fc = fs_context_for_submount(type, mntpt);
9939 	put_filesystem(type);
9940 	if (IS_ERR(fc))
9941 		return ERR_CAST(fc);
9942 
9943 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
9944 
9945 	ret = vfs_parse_fs_string(fc, "source", "tracefs");
9946 	if (!ret)
9947 		mnt = fc_mount(fc);
9948 	else
9949 		mnt = ERR_PTR(ret);
9950 
9951 	put_fs_context(fc);
9952 	return mnt;
9953 }
9954 #endif
9955 
9956 /**
9957  * tracing_init_dentry - initialize top level trace array
9958  *
9959  * This is called when creating files or directories in the tracing
9960  * directory. It is called via fs_initcall() by any of the boot up code
9961  * and expects to return the dentry of the top level tracing directory.
9962  */
tracing_init_dentry(void)9963 int tracing_init_dentry(void)
9964 {
9965 	struct trace_array *tr = &global_trace;
9966 
9967 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9968 		pr_warn("Tracing disabled due to lockdown\n");
9969 		return -EPERM;
9970 	}
9971 
9972 	/* The top level trace array uses  NULL as parent */
9973 	if (tr->dir)
9974 		return 0;
9975 
9976 	if (WARN_ON(!tracefs_initialized()))
9977 		return -ENODEV;
9978 
9979 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
9980 	/*
9981 	 * As there may still be users that expect the tracing
9982 	 * files to exist in debugfs/tracing, we must automount
9983 	 * the tracefs file system there, so older tools still
9984 	 * work with the newer kernel.
9985 	 */
9986 	tr->dir = debugfs_create_automount("tracing", NULL,
9987 					   trace_automount, NULL);
9988 #endif
9989 
9990 	return 0;
9991 }
9992 
9993 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9994 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9995 
9996 struct workqueue_struct *trace_init_wq __initdata;
9997 static struct work_struct eval_map_work __initdata;
9998 static struct work_struct tracerfs_init_work __initdata;
9999 
eval_map_work_func(struct work_struct * work)10000 static void __init eval_map_work_func(struct work_struct *work)
10001 {
10002 	int len;
10003 
10004 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10005 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10006 }
10007 
trace_eval_init(void)10008 static int __init trace_eval_init(void)
10009 {
10010 	INIT_WORK(&eval_map_work, eval_map_work_func);
10011 
10012 	trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0);
10013 	if (!trace_init_wq) {
10014 		pr_err("Unable to allocate trace_init_wq\n");
10015 		/* Do work here */
10016 		eval_map_work_func(&eval_map_work);
10017 		return -ENOMEM;
10018 	}
10019 
10020 	queue_work(trace_init_wq, &eval_map_work);
10021 	return 0;
10022 }
10023 
10024 subsys_initcall(trace_eval_init);
10025 
trace_eval_sync(void)10026 static int __init trace_eval_sync(void)
10027 {
10028 	/* Make sure the eval map updates are finished */
10029 	if (trace_init_wq)
10030 		destroy_workqueue(trace_init_wq);
10031 	return 0;
10032 }
10033 
10034 late_initcall_sync(trace_eval_sync);
10035 
10036 
10037 #ifdef CONFIG_MODULES
10038 
module_exists(const char * module)10039 bool module_exists(const char *module)
10040 {
10041 	/* All modules have the symbol __this_module */
10042 	static const char this_mod[] = "__this_module";
10043 	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10044 	unsigned long val;
10045 	int n;
10046 
10047 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10048 
10049 	if (n > sizeof(modname) - 1)
10050 		return false;
10051 
10052 	val = module_kallsyms_lookup_name(modname);
10053 	return val != 0;
10054 }
10055 
trace_module_add_evals(struct module * mod)10056 static void trace_module_add_evals(struct module *mod)
10057 {
10058 	/*
10059 	 * Modules with bad taint do not have events created, do
10060 	 * not bother with enums either.
10061 	 */
10062 	if (trace_module_has_bad_taint(mod))
10063 		return;
10064 
10065 	/* Even if no trace_evals, this need to sanitize field types. */
10066 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10067 }
10068 
10069 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10070 static void trace_module_remove_evals(struct module *mod)
10071 {
10072 	union trace_eval_map_item *map;
10073 	union trace_eval_map_item **last = &trace_eval_maps;
10074 
10075 	if (!mod->num_trace_evals)
10076 		return;
10077 
10078 	guard(mutex)(&trace_eval_mutex);
10079 
10080 	map = trace_eval_maps;
10081 
10082 	while (map) {
10083 		if (map->head.mod == mod)
10084 			break;
10085 		map = trace_eval_jmp_to_tail(map);
10086 		last = &map->tail.next;
10087 		map = map->tail.next;
10088 	}
10089 	if (!map)
10090 		return;
10091 
10092 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10093 	kfree(map);
10094 }
10095 #else
trace_module_remove_evals(struct module * mod)10096 static inline void trace_module_remove_evals(struct module *mod) { }
10097 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10098 
trace_module_record(struct module * mod,bool add)10099 static void trace_module_record(struct module *mod, bool add)
10100 {
10101 	struct trace_array *tr;
10102 	unsigned long flags;
10103 
10104 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10105 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10106 		/* Update any persistent trace array that has already been started */
10107 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10108 			guard(mutex)(&scratch_mutex);
10109 			save_mod(mod, tr);
10110 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10111 			/* Update delta if the module loaded in previous boot */
10112 			make_mod_delta(mod, tr);
10113 		}
10114 	}
10115 }
10116 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10117 static int trace_module_notify(struct notifier_block *self,
10118 			       unsigned long val, void *data)
10119 {
10120 	struct module *mod = data;
10121 
10122 	switch (val) {
10123 	case MODULE_STATE_COMING:
10124 		trace_module_add_evals(mod);
10125 		trace_module_record(mod, true);
10126 		break;
10127 	case MODULE_STATE_GOING:
10128 		trace_module_remove_evals(mod);
10129 		trace_module_record(mod, false);
10130 		break;
10131 	}
10132 
10133 	return NOTIFY_OK;
10134 }
10135 
10136 static struct notifier_block trace_module_nb = {
10137 	.notifier_call = trace_module_notify,
10138 	.priority = 0,
10139 };
10140 #endif /* CONFIG_MODULES */
10141 
tracer_init_tracefs_work_func(struct work_struct * work)10142 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10143 {
10144 
10145 	event_trace_init();
10146 
10147 	init_tracer_tracefs(&global_trace, NULL);
10148 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10149 
10150 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10151 			&global_trace, &tracing_thresh_fops);
10152 
10153 	trace_create_file("README", TRACE_MODE_READ, NULL,
10154 			NULL, &tracing_readme_fops);
10155 
10156 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10157 			NULL, &tracing_saved_cmdlines_fops);
10158 
10159 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10160 			  NULL, &tracing_saved_cmdlines_size_fops);
10161 
10162 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10163 			NULL, &tracing_saved_tgids_fops);
10164 
10165 	trace_create_eval_file(NULL);
10166 
10167 #ifdef CONFIG_MODULES
10168 	register_module_notifier(&trace_module_nb);
10169 #endif
10170 
10171 #ifdef CONFIG_DYNAMIC_FTRACE
10172 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10173 			NULL, &tracing_dyn_info_fops);
10174 #endif
10175 
10176 	create_trace_instances(NULL);
10177 
10178 	update_tracer_options();
10179 }
10180 
tracer_init_tracefs(void)10181 static __init int tracer_init_tracefs(void)
10182 {
10183 	int ret;
10184 
10185 	trace_access_lock_init();
10186 
10187 	ret = tracing_init_dentry();
10188 	if (ret)
10189 		return 0;
10190 
10191 	if (trace_init_wq) {
10192 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10193 		queue_work(trace_init_wq, &tracerfs_init_work);
10194 	} else {
10195 		tracer_init_tracefs_work_func(NULL);
10196 	}
10197 
10198 	if (rv_init_interface())
10199 		pr_err("RV: Error while creating the RV interface\n");
10200 
10201 	return 0;
10202 }
10203 
10204 fs_initcall(tracer_init_tracefs);
10205 
10206 static int trace_die_panic_handler(struct notifier_block *self,
10207 				unsigned long ev, void *unused);
10208 
10209 static struct notifier_block trace_panic_notifier = {
10210 	.notifier_call = trace_die_panic_handler,
10211 	.priority = INT_MAX - 1,
10212 };
10213 
10214 static struct notifier_block trace_die_notifier = {
10215 	.notifier_call = trace_die_panic_handler,
10216 	.priority = INT_MAX - 1,
10217 };
10218 
10219 /*
10220  * The idea is to execute the following die/panic callback early, in order
10221  * to avoid showing irrelevant information in the trace (like other panic
10222  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10223  * warnings get disabled (to prevent potential log flooding).
10224  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10225 static int trace_die_panic_handler(struct notifier_block *self,
10226 				unsigned long ev, void *unused)
10227 {
10228 	if (!ftrace_dump_on_oops_enabled())
10229 		return NOTIFY_DONE;
10230 
10231 	/* The die notifier requires DIE_OOPS to trigger */
10232 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10233 		return NOTIFY_DONE;
10234 
10235 	ftrace_dump(DUMP_PARAM);
10236 
10237 	return NOTIFY_DONE;
10238 }
10239 
10240 /*
10241  * printk is set to max of 1024, we really don't need it that big.
10242  * Nothing should be printing 1000 characters anyway.
10243  */
10244 #define TRACE_MAX_PRINT		1000
10245 
10246 /*
10247  * Define here KERN_TRACE so that we have one place to modify
10248  * it if we decide to change what log level the ftrace dump
10249  * should be at.
10250  */
10251 #define KERN_TRACE		KERN_EMERG
10252 
10253 void
trace_printk_seq(struct trace_seq * s)10254 trace_printk_seq(struct trace_seq *s)
10255 {
10256 	/* Probably should print a warning here. */
10257 	if (s->seq.len >= TRACE_MAX_PRINT)
10258 		s->seq.len = TRACE_MAX_PRINT;
10259 
10260 	/*
10261 	 * More paranoid code. Although the buffer size is set to
10262 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10263 	 * an extra layer of protection.
10264 	 */
10265 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10266 		s->seq.len = s->seq.size - 1;
10267 
10268 	/* should be zero ended, but we are paranoid. */
10269 	s->buffer[s->seq.len] = 0;
10270 
10271 	printk(KERN_TRACE "%s", s->buffer);
10272 
10273 	trace_seq_init(s);
10274 }
10275 
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10276 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10277 {
10278 	iter->tr = tr;
10279 	iter->trace = iter->tr->current_trace;
10280 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10281 	iter->array_buffer = &tr->array_buffer;
10282 
10283 	if (iter->trace && iter->trace->open)
10284 		iter->trace->open(iter);
10285 
10286 	/* Annotate start of buffers if we had overruns */
10287 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10288 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10289 
10290 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10291 	if (trace_clocks[iter->tr->clock_id].in_ns)
10292 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10293 
10294 	/* Can not use kmalloc for iter.temp and iter.fmt */
10295 	iter->temp = static_temp_buf;
10296 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10297 	iter->fmt = static_fmt_buf;
10298 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10299 }
10300 
trace_init_global_iter(struct trace_iterator * iter)10301 void trace_init_global_iter(struct trace_iterator *iter)
10302 {
10303 	trace_init_iter(iter, &global_trace);
10304 }
10305 
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10306 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10307 {
10308 	/* use static because iter can be a bit big for the stack */
10309 	static struct trace_iterator iter;
10310 	unsigned int old_userobj;
10311 	unsigned long flags;
10312 	int cnt = 0;
10313 
10314 	/*
10315 	 * Always turn off tracing when we dump.
10316 	 * We don't need to show trace output of what happens
10317 	 * between multiple crashes.
10318 	 *
10319 	 * If the user does a sysrq-z, then they can re-enable
10320 	 * tracing with echo 1 > tracing_on.
10321 	 */
10322 	tracer_tracing_off(tr);
10323 
10324 	local_irq_save(flags);
10325 
10326 	/* Simulate the iterator */
10327 	trace_init_iter(&iter, tr);
10328 
10329 	/* While dumping, do not allow the buffer to be enable */
10330 	tracer_tracing_disable(tr);
10331 
10332 	old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
10333 
10334 	/* don't look at user memory in panic mode */
10335 	tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
10336 
10337 	if (dump_mode == DUMP_ORIG)
10338 		iter.cpu_file = raw_smp_processor_id();
10339 	else
10340 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10341 
10342 	if (tr == &global_trace)
10343 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10344 	else
10345 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10346 
10347 	/* Did function tracer already get disabled? */
10348 	if (ftrace_is_dead()) {
10349 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10350 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10351 	}
10352 
10353 	/*
10354 	 * We need to stop all tracing on all CPUS to read
10355 	 * the next buffer. This is a bit expensive, but is
10356 	 * not done often. We fill all what we can read,
10357 	 * and then release the locks again.
10358 	 */
10359 
10360 	while (!trace_empty(&iter)) {
10361 
10362 		if (!cnt)
10363 			printk(KERN_TRACE "---------------------------------\n");
10364 
10365 		cnt++;
10366 
10367 		trace_iterator_reset(&iter);
10368 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10369 
10370 		if (trace_find_next_entry_inc(&iter) != NULL) {
10371 			int ret;
10372 
10373 			ret = print_trace_line(&iter);
10374 			if (ret != TRACE_TYPE_NO_CONSUME)
10375 				trace_consume(&iter);
10376 
10377 			trace_printk_seq(&iter.seq);
10378 		}
10379 		touch_nmi_watchdog();
10380 	}
10381 
10382 	if (!cnt)
10383 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10384 	else
10385 		printk(KERN_TRACE "---------------------------------\n");
10386 
10387 	tr->trace_flags |= old_userobj;
10388 
10389 	tracer_tracing_enable(tr);
10390 	local_irq_restore(flags);
10391 }
10392 
ftrace_dump_by_param(void)10393 static void ftrace_dump_by_param(void)
10394 {
10395 	bool first_param = true;
10396 	char dump_param[MAX_TRACER_SIZE];
10397 	char *buf, *token, *inst_name;
10398 	struct trace_array *tr;
10399 
10400 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10401 	buf = dump_param;
10402 
10403 	while ((token = strsep(&buf, ",")) != NULL) {
10404 		if (first_param) {
10405 			first_param = false;
10406 			if (!strcmp("0", token))
10407 				continue;
10408 			else if (!strcmp("1", token)) {
10409 				ftrace_dump_one(&global_trace, DUMP_ALL);
10410 				continue;
10411 			}
10412 			else if (!strcmp("2", token) ||
10413 			  !strcmp("orig_cpu", token)) {
10414 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10415 				continue;
10416 			}
10417 		}
10418 
10419 		inst_name = strsep(&token, "=");
10420 		tr = trace_array_find(inst_name);
10421 		if (!tr) {
10422 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10423 			continue;
10424 		}
10425 
10426 		if (token && (!strcmp("2", token) ||
10427 			  !strcmp("orig_cpu", token)))
10428 			ftrace_dump_one(tr, DUMP_ORIG);
10429 		else
10430 			ftrace_dump_one(tr, DUMP_ALL);
10431 	}
10432 }
10433 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10434 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10435 {
10436 	static atomic_t dump_running;
10437 
10438 	/* Only allow one dump user at a time. */
10439 	if (atomic_inc_return(&dump_running) != 1) {
10440 		atomic_dec(&dump_running);
10441 		return;
10442 	}
10443 
10444 	switch (oops_dump_mode) {
10445 	case DUMP_ALL:
10446 		ftrace_dump_one(&global_trace, DUMP_ALL);
10447 		break;
10448 	case DUMP_ORIG:
10449 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10450 		break;
10451 	case DUMP_PARAM:
10452 		ftrace_dump_by_param();
10453 		break;
10454 	case DUMP_NONE:
10455 		break;
10456 	default:
10457 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10458 		ftrace_dump_one(&global_trace, DUMP_ALL);
10459 	}
10460 
10461 	atomic_dec(&dump_running);
10462 }
10463 EXPORT_SYMBOL_GPL(ftrace_dump);
10464 
10465 #define WRITE_BUFSIZE  4096
10466 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10467 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10468 				size_t count, loff_t *ppos,
10469 				int (*createfn)(const char *))
10470 {
10471 	char *kbuf __free(kfree) = NULL;
10472 	char *buf, *tmp;
10473 	int ret = 0;
10474 	size_t done = 0;
10475 	size_t size;
10476 
10477 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10478 	if (!kbuf)
10479 		return -ENOMEM;
10480 
10481 	while (done < count) {
10482 		size = count - done;
10483 
10484 		if (size >= WRITE_BUFSIZE)
10485 			size = WRITE_BUFSIZE - 1;
10486 
10487 		if (copy_from_user(kbuf, buffer + done, size))
10488 			return -EFAULT;
10489 
10490 		kbuf[size] = '\0';
10491 		buf = kbuf;
10492 		do {
10493 			tmp = strchr(buf, '\n');
10494 			if (tmp) {
10495 				*tmp = '\0';
10496 				size = tmp - buf + 1;
10497 			} else {
10498 				size = strlen(buf);
10499 				if (done + size < count) {
10500 					if (buf != kbuf)
10501 						break;
10502 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10503 					pr_warn("Line length is too long: Should be less than %d\n",
10504 						WRITE_BUFSIZE - 2);
10505 					return -EINVAL;
10506 				}
10507 			}
10508 			done += size;
10509 
10510 			/* Remove comments */
10511 			tmp = strchr(buf, '#');
10512 
10513 			if (tmp)
10514 				*tmp = '\0';
10515 
10516 			ret = createfn(buf);
10517 			if (ret)
10518 				return ret;
10519 			buf += size;
10520 
10521 		} while (done < count);
10522 	}
10523 	return done;
10524 }
10525 
10526 #ifdef CONFIG_TRACER_SNAPSHOT
tr_needs_alloc_snapshot(const char * name)10527 __init static bool tr_needs_alloc_snapshot(const char *name)
10528 {
10529 	char *test;
10530 	int len = strlen(name);
10531 	bool ret;
10532 
10533 	if (!boot_snapshot_index)
10534 		return false;
10535 
10536 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10537 	    boot_snapshot_info[len] == '\t')
10538 		return true;
10539 
10540 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10541 	if (!test)
10542 		return false;
10543 
10544 	sprintf(test, "\t%s\t", name);
10545 	ret = strstr(boot_snapshot_info, test) == NULL;
10546 	kfree(test);
10547 	return ret;
10548 }
10549 
do_allocate_snapshot(const char * name)10550 __init static void do_allocate_snapshot(const char *name)
10551 {
10552 	if (!tr_needs_alloc_snapshot(name))
10553 		return;
10554 
10555 	/*
10556 	 * When allocate_snapshot is set, the next call to
10557 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10558 	 * will allocate the snapshot buffer. That will also clear
10559 	 * this flag.
10560 	 */
10561 	allocate_snapshot = true;
10562 }
10563 #else
do_allocate_snapshot(const char * name)10564 static inline void do_allocate_snapshot(const char *name) { }
10565 #endif
10566 
backup_instance_area(const char * backup,unsigned long * addr,phys_addr_t * size)10567 __init static int backup_instance_area(const char *backup,
10568 				       unsigned long *addr, phys_addr_t *size)
10569 {
10570 	struct trace_array *backup_tr;
10571 	void *allocated_vaddr = NULL;
10572 
10573 	backup_tr = trace_array_get_by_name(backup, NULL);
10574 	if (!backup_tr) {
10575 		pr_warn("Tracing: Instance %s is not found.\n", backup);
10576 		return -ENOENT;
10577 	}
10578 
10579 	if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
10580 		pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
10581 		trace_array_put(backup_tr);
10582 		return -EINVAL;
10583 	}
10584 
10585 	*size = backup_tr->range_addr_size;
10586 
10587 	allocated_vaddr = vzalloc(*size);
10588 	if (!allocated_vaddr) {
10589 		pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
10590 			backup, (unsigned long)*size);
10591 		trace_array_put(backup_tr);
10592 		return -ENOMEM;
10593 	}
10594 
10595 	memcpy(allocated_vaddr,
10596 		(void *)backup_tr->range_addr_start, (size_t)*size);
10597 	*addr = (unsigned long)allocated_vaddr;
10598 
10599 	trace_array_put(backup_tr);
10600 	return 0;
10601 }
10602 
enable_instances(void)10603 __init static void enable_instances(void)
10604 {
10605 	struct trace_array *tr;
10606 	bool memmap_area = false;
10607 	char *curr_str;
10608 	char *name;
10609 	char *str;
10610 	char *tok;
10611 
10612 	/* A tab is always appended */
10613 	boot_instance_info[boot_instance_index - 1] = '\0';
10614 	str = boot_instance_info;
10615 
10616 	while ((curr_str = strsep(&str, "\t"))) {
10617 		phys_addr_t start = 0;
10618 		phys_addr_t size = 0;
10619 		unsigned long addr = 0;
10620 		bool traceprintk = false;
10621 		bool traceoff = false;
10622 		char *flag_delim;
10623 		char *addr_delim;
10624 		char *rname __free(kfree) = NULL;
10625 		char *backup;
10626 
10627 		tok = strsep(&curr_str, ",");
10628 
10629 		name = strsep(&tok, "=");
10630 		backup = tok;
10631 
10632 		flag_delim = strchr(name, '^');
10633 		addr_delim = strchr(name, '@');
10634 
10635 		if (addr_delim)
10636 			*addr_delim++ = '\0';
10637 
10638 		if (flag_delim)
10639 			*flag_delim++ = '\0';
10640 
10641 		if (backup) {
10642 			if (backup_instance_area(backup, &addr, &size) < 0)
10643 				continue;
10644 		}
10645 
10646 		if (flag_delim) {
10647 			char *flag;
10648 
10649 			while ((flag = strsep(&flag_delim, "^"))) {
10650 				if (strcmp(flag, "traceoff") == 0) {
10651 					traceoff = true;
10652 				} else if ((strcmp(flag, "printk") == 0) ||
10653 					   (strcmp(flag, "traceprintk") == 0) ||
10654 					   (strcmp(flag, "trace_printk") == 0)) {
10655 					traceprintk = true;
10656 				} else {
10657 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10658 						flag, name);
10659 				}
10660 			}
10661 		}
10662 
10663 		tok = addr_delim;
10664 		if (tok && isdigit(*tok)) {
10665 			start = memparse(tok, &tok);
10666 			if (!start) {
10667 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10668 					name);
10669 				continue;
10670 			}
10671 			if (*tok != ':') {
10672 				pr_warn("Tracing: No size specified for instance %s\n", name);
10673 				continue;
10674 			}
10675 			tok++;
10676 			size = memparse(tok, &tok);
10677 			if (!size) {
10678 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10679 					name);
10680 				continue;
10681 			}
10682 			memmap_area = true;
10683 		} else if (tok) {
10684 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10685 				start = 0;
10686 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10687 				continue;
10688 			}
10689 			rname = kstrdup(tok, GFP_KERNEL);
10690 		}
10691 
10692 		if (start) {
10693 			/* Start and size must be page aligned */
10694 			if (start & ~PAGE_MASK) {
10695 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10696 				continue;
10697 			}
10698 			if (size & ~PAGE_MASK) {
10699 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10700 				continue;
10701 			}
10702 
10703 			if (memmap_area)
10704 				addr = map_pages(start, size);
10705 			else
10706 				addr = (unsigned long)phys_to_virt(start);
10707 			if (addr) {
10708 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10709 					name, &start, (unsigned long)size);
10710 			} else {
10711 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10712 				continue;
10713 			}
10714 		} else {
10715 			/* Only non mapped buffers have snapshot buffers */
10716 			if (IS_ENABLED(CONFIG_TRACER_SNAPSHOT))
10717 				do_allocate_snapshot(name);
10718 		}
10719 
10720 		tr = trace_array_create_systems(name, NULL, addr, size);
10721 		if (IS_ERR(tr)) {
10722 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10723 			continue;
10724 		}
10725 
10726 		if (traceoff)
10727 			tracer_tracing_off(tr);
10728 
10729 		if (traceprintk)
10730 			update_printk_trace(tr);
10731 
10732 		/*
10733 		 * memmap'd buffers can not be freed.
10734 		 */
10735 		if (memmap_area) {
10736 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
10737 			tr->ref++;
10738 		}
10739 
10740 		/*
10741 		 * Backup buffers can be freed but need vfree().
10742 		 */
10743 		if (backup)
10744 			tr->flags |= TRACE_ARRAY_FL_VMALLOC;
10745 
10746 		if (start || backup) {
10747 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10748 			tr->range_name = no_free_ptr(rname);
10749 		}
10750 
10751 		while ((tok = strsep(&curr_str, ","))) {
10752 			early_enable_events(tr, tok, true);
10753 		}
10754 	}
10755 }
10756 
tracer_alloc_buffers(void)10757 __init static int tracer_alloc_buffers(void)
10758 {
10759 	int ring_buf_size;
10760 	int ret = -ENOMEM;
10761 
10762 
10763 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10764 		pr_warn("Tracing disabled due to lockdown\n");
10765 		return -EPERM;
10766 	}
10767 
10768 	/*
10769 	 * Make sure we don't accidentally add more trace options
10770 	 * than we have bits for.
10771 	 */
10772 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10773 
10774 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10775 		return -ENOMEM;
10776 
10777 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10778 		goto out_free_buffer_mask;
10779 
10780 	/* Only allocate trace_printk buffers if a trace_printk exists */
10781 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10782 		/* Must be called before global_trace.buffer is allocated */
10783 		trace_printk_init_buffers();
10784 
10785 	/* To save memory, keep the ring buffer size to its minimum */
10786 	if (global_trace.ring_buffer_expanded)
10787 		ring_buf_size = trace_buf_size;
10788 	else
10789 		ring_buf_size = 1;
10790 
10791 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10792 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10793 
10794 	raw_spin_lock_init(&global_trace.start_lock);
10795 
10796 	/*
10797 	 * The prepare callbacks allocates some memory for the ring buffer. We
10798 	 * don't free the buffer if the CPU goes down. If we were to free
10799 	 * the buffer, then the user would lose any trace that was in the
10800 	 * buffer. The memory will be removed once the "instance" is removed.
10801 	 */
10802 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10803 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10804 				      NULL);
10805 	if (ret < 0)
10806 		goto out_free_cpumask;
10807 	/* Used for event triggers */
10808 	ret = -ENOMEM;
10809 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10810 	if (!temp_buffer)
10811 		goto out_rm_hp_state;
10812 
10813 	if (trace_create_savedcmd() < 0)
10814 		goto out_free_temp_buffer;
10815 
10816 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10817 		goto out_free_savedcmd;
10818 
10819 	/* TODO: make the number of buffers hot pluggable with CPUS */
10820 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10821 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10822 		goto out_free_pipe_cpumask;
10823 	}
10824 	if (global_trace.buffer_disabled)
10825 		tracing_off();
10826 
10827 	if (trace_boot_clock) {
10828 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10829 		if (ret < 0)
10830 			pr_warn("Trace clock %s not defined, going back to default\n",
10831 				trace_boot_clock);
10832 	}
10833 
10834 	/*
10835 	 * register_tracer() might reference current_trace, so it
10836 	 * needs to be set before we register anything. This is
10837 	 * just a bootstrap of current_trace anyway.
10838 	 */
10839 	global_trace.current_trace = &nop_trace;
10840 	global_trace.current_trace_flags = nop_trace.flags;
10841 
10842 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10843 #ifdef CONFIG_TRACER_SNAPSHOT
10844 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10845 #endif
10846 	ftrace_init_global_array_ops(&global_trace);
10847 
10848 #ifdef CONFIG_MODULES
10849 	INIT_LIST_HEAD(&global_trace.mod_events);
10850 #endif
10851 
10852 	init_trace_flags_index(&global_trace);
10853 
10854 	INIT_LIST_HEAD(&global_trace.tracers);
10855 
10856 	/* All seems OK, enable tracing */
10857 	tracing_disabled = 0;
10858 
10859 	atomic_notifier_chain_register(&panic_notifier_list,
10860 				       &trace_panic_notifier);
10861 
10862 	register_die_notifier(&trace_die_notifier);
10863 
10864 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10865 
10866 	global_trace.syscall_buf_sz = syscall_buf_size;
10867 
10868 	INIT_LIST_HEAD(&global_trace.systems);
10869 	INIT_LIST_HEAD(&global_trace.events);
10870 	INIT_LIST_HEAD(&global_trace.hist_vars);
10871 	INIT_LIST_HEAD(&global_trace.err_log);
10872 	list_add(&global_trace.marker_list, &marker_copies);
10873 	list_add(&global_trace.list, &ftrace_trace_arrays);
10874 
10875 	register_tracer(&nop_trace);
10876 
10877 	/* Function tracing may start here (via kernel command line) */
10878 	init_function_trace();
10879 
10880 	apply_trace_boot_options();
10881 
10882 	register_snapshot_cmd();
10883 
10884 	return 0;
10885 
10886 out_free_pipe_cpumask:
10887 	free_cpumask_var(global_trace.pipe_cpumask);
10888 out_free_savedcmd:
10889 	trace_free_saved_cmdlines_buffer();
10890 out_free_temp_buffer:
10891 	ring_buffer_free(temp_buffer);
10892 out_rm_hp_state:
10893 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10894 out_free_cpumask:
10895 	free_cpumask_var(global_trace.tracing_cpumask);
10896 out_free_buffer_mask:
10897 	free_cpumask_var(tracing_buffer_mask);
10898 	return ret;
10899 }
10900 
10901 #ifdef CONFIG_FUNCTION_TRACER
10902 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)10903 struct trace_array *trace_get_global_array(void)
10904 {
10905 	return &global_trace;
10906 }
10907 #endif
10908 
ftrace_boot_snapshot(void)10909 void __init ftrace_boot_snapshot(void)
10910 {
10911 #ifdef CONFIG_TRACER_SNAPSHOT
10912 	struct trace_array *tr;
10913 
10914 	if (!snapshot_at_boot)
10915 		return;
10916 
10917 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10918 		if (!tr->allocated_snapshot)
10919 			continue;
10920 
10921 		tracing_snapshot_instance(tr);
10922 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10923 	}
10924 #endif
10925 }
10926 
early_trace_init(void)10927 void __init early_trace_init(void)
10928 {
10929 	if (tracepoint_printk) {
10930 		tracepoint_print_iter = kzalloc_obj(*tracepoint_print_iter);
10931 		if (MEM_FAIL(!tracepoint_print_iter,
10932 			     "Failed to allocate trace iterator\n"))
10933 			tracepoint_printk = 0;
10934 		else
10935 			static_key_enable(&tracepoint_printk_key.key);
10936 	}
10937 	tracer_alloc_buffers();
10938 
10939 	init_events();
10940 }
10941 
trace_init(void)10942 void __init trace_init(void)
10943 {
10944 	trace_event_init();
10945 
10946 	if (boot_instance_index)
10947 		enable_instances();
10948 }
10949 
clear_boot_tracer(void)10950 __init static void clear_boot_tracer(void)
10951 {
10952 	/*
10953 	 * The default tracer at boot buffer is an init section.
10954 	 * This function is called in lateinit. If we did not
10955 	 * find the boot tracer, then clear it out, to prevent
10956 	 * later registration from accessing the buffer that is
10957 	 * about to be freed.
10958 	 */
10959 	if (!default_bootup_tracer)
10960 		return;
10961 
10962 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10963 	       default_bootup_tracer);
10964 	default_bootup_tracer = NULL;
10965 }
10966 
10967 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10968 __init static void tracing_set_default_clock(void)
10969 {
10970 	/* sched_clock_stable() is determined in late_initcall */
10971 	if (!trace_boot_clock && !sched_clock_stable()) {
10972 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10973 			pr_warn("Can not set tracing clock due to lockdown\n");
10974 			return;
10975 		}
10976 
10977 		printk(KERN_WARNING
10978 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10979 		       "If you want to keep using the local clock, then add:\n"
10980 		       "  \"trace_clock=local\"\n"
10981 		       "on the kernel command line\n");
10982 		tracing_set_clock(&global_trace, "global");
10983 	}
10984 }
10985 #else
tracing_set_default_clock(void)10986 static inline void tracing_set_default_clock(void) { }
10987 #endif
10988 
late_trace_init(void)10989 __init static int late_trace_init(void)
10990 {
10991 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10992 		static_key_disable(&tracepoint_printk_key.key);
10993 		tracepoint_printk = 0;
10994 	}
10995 
10996 	if (traceoff_after_boot)
10997 		tracing_off();
10998 
10999 	tracing_set_default_clock();
11000 	clear_boot_tracer();
11001 	return 0;
11002 }
11003 
11004 late_initcall_sync(late_trace_init);
11005