xref: /linux/kernel/trace/trace.c (revision ac57fa9faf716c6a0e30128c2c313443cf633019)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <linux/sort.h>
54 #include <linux/io.h> /* vmap_page_range() */
55 #include <linux/fs_context.h>
56 
57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
58 
59 #include "trace.h"
60 #include "trace_output.h"
61 
62 #ifdef CONFIG_FTRACE_STARTUP_TEST
63 /*
64  * We need to change this state when a selftest is running.
65  * A selftest will lurk into the ring-buffer to count the
66  * entries inserted during the selftest although some concurrent
67  * insertions into the ring-buffer such as trace_printk could occurred
68  * at the same time, giving false positive or negative results.
69  */
70 bool __read_mostly tracing_selftest_running;
71 
72 /*
73  * If boot-time tracing including tracers/events via kernel cmdline
74  * is running, we do not want to run SELFTEST.
75  */
76 bool __read_mostly tracing_selftest_disabled;
77 
disable_tracing_selftest(const char * reason)78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #else
86 #define tracing_selftest_disabled	0
87 #endif
88 
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95 
96 /* Store tracers and their flags per instance */
97 struct tracers {
98 	struct list_head	list;
99 	struct tracer		*tracer;
100 	struct tracer_flags	*flags;
101 };
102 
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 DEFINE_PER_CPU(bool, trace_taskinfo_save);
109 
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 int tracing_disabled = 1;
117 
118 cpumask_var_t __read_mostly	tracing_buffer_mask;
119 
120 #define MAX_TRACER_SIZE		100
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputting it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  * Set instance name if you want to dump the specific trace instance
136  * Multiple instance dump is also supported, and instances are separated
137  * by commas.
138  */
139 /* Set to string format zero to disable by default */
140 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
141 
142 /* When set, tracing will stop when a WARN*() is hit */
143 static int __disable_trace_on_warning;
144 
145 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
146 			     void *buffer, size_t *lenp, loff_t *ppos);
147 static const struct ctl_table trace_sysctl_table[] = {
148 	{
149 		.procname	= "ftrace_dump_on_oops",
150 		.data		= &ftrace_dump_on_oops,
151 		.maxlen		= MAX_TRACER_SIZE,
152 		.mode		= 0644,
153 		.proc_handler	= proc_dostring,
154 	},
155 	{
156 		.procname	= "traceoff_on_warning",
157 		.data		= &__disable_trace_on_warning,
158 		.maxlen		= sizeof(__disable_trace_on_warning),
159 		.mode		= 0644,
160 		.proc_handler	= proc_dointvec,
161 	},
162 	{
163 		.procname	= "tracepoint_printk",
164 		.data		= &tracepoint_printk,
165 		.maxlen		= sizeof(tracepoint_printk),
166 		.mode		= 0644,
167 		.proc_handler	= tracepoint_printk_sysctl,
168 	},
169 };
170 
init_trace_sysctls(void)171 static int __init init_trace_sysctls(void)
172 {
173 	register_sysctl_init("kernel", trace_sysctl_table);
174 	return 0;
175 }
176 subsys_initcall(init_trace_sysctls);
177 
178 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
179 /* Map of enums to their values, for "eval_map" file */
180 struct trace_eval_map_head {
181 	struct module			*mod;
182 	unsigned long			length;
183 };
184 
185 union trace_eval_map_item;
186 
187 struct trace_eval_map_tail {
188 	/*
189 	 * "end" is first and points to NULL as it must be different
190 	 * than "mod" or "eval_string"
191 	 */
192 	union trace_eval_map_item	*next;
193 	const char			*end;	/* points to NULL */
194 };
195 
196 static DEFINE_MUTEX(trace_eval_mutex);
197 
198 /*
199  * The trace_eval_maps are saved in an array with two extra elements,
200  * one at the beginning, and one at the end. The beginning item contains
201  * the count of the saved maps (head.length), and the module they
202  * belong to if not built in (head.mod). The ending item contains a
203  * pointer to the next array of saved eval_map items.
204  */
205 union trace_eval_map_item {
206 	struct trace_eval_map		map;
207 	struct trace_eval_map_head	head;
208 	struct trace_eval_map_tail	tail;
209 };
210 
211 static union trace_eval_map_item *trace_eval_maps;
212 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
213 
214 int tracing_set_tracer(struct trace_array *tr, const char *buf);
215 static void ftrace_trace_userstack(struct trace_array *tr,
216 				   struct trace_buffer *buffer,
217 				   unsigned int trace_ctx);
218 
219 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
220 static char *default_bootup_tracer;
221 
222 static bool allocate_snapshot;
223 static bool snapshot_at_boot;
224 
225 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
226 static int boot_instance_index;
227 
228 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
229 static int boot_snapshot_index;
230 
set_cmdline_ftrace(char * str)231 static int __init set_cmdline_ftrace(char *str)
232 {
233 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
234 	default_bootup_tracer = bootup_tracer_buf;
235 	/* We are using ftrace early, expand it */
236 	trace_set_ring_buffer_expanded(NULL);
237 	return 1;
238 }
239 __setup("ftrace=", set_cmdline_ftrace);
240 
ftrace_dump_on_oops_enabled(void)241 int ftrace_dump_on_oops_enabled(void)
242 {
243 	if (!strcmp("0", ftrace_dump_on_oops))
244 		return 0;
245 	else
246 		return 1;
247 }
248 
set_ftrace_dump_on_oops(char * str)249 static int __init set_ftrace_dump_on_oops(char *str)
250 {
251 	if (!*str) {
252 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
253 		return 1;
254 	}
255 
256 	if (*str == ',') {
257 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
258 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
259 		return 1;
260 	}
261 
262 	if (*str++ == '=') {
263 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
264 		return 1;
265 	}
266 
267 	return 0;
268 }
269 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
270 
stop_trace_on_warning(char * str)271 static int __init stop_trace_on_warning(char *str)
272 {
273 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
274 		__disable_trace_on_warning = 1;
275 	return 1;
276 }
277 __setup("traceoff_on_warning", stop_trace_on_warning);
278 
boot_alloc_snapshot(char * str)279 static int __init boot_alloc_snapshot(char *str)
280 {
281 	char *slot = boot_snapshot_info + boot_snapshot_index;
282 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
283 	int ret;
284 
285 	if (str[0] == '=') {
286 		str++;
287 		if (strlen(str) >= left)
288 			return -1;
289 
290 		ret = snprintf(slot, left, "%s\t", str);
291 		boot_snapshot_index += ret;
292 	} else {
293 		allocate_snapshot = true;
294 		/* We also need the main ring buffer expanded */
295 		trace_set_ring_buffer_expanded(NULL);
296 	}
297 	return 1;
298 }
299 __setup("alloc_snapshot", boot_alloc_snapshot);
300 
301 
boot_snapshot(char * str)302 static int __init boot_snapshot(char *str)
303 {
304 	snapshot_at_boot = true;
305 	boot_alloc_snapshot(str);
306 	return 1;
307 }
308 __setup("ftrace_boot_snapshot", boot_snapshot);
309 
310 
boot_instance(char * str)311 static int __init boot_instance(char *str)
312 {
313 	char *slot = boot_instance_info + boot_instance_index;
314 	int left = sizeof(boot_instance_info) - boot_instance_index;
315 	int ret;
316 
317 	if (strlen(str) >= left)
318 		return -1;
319 
320 	ret = snprintf(slot, left, "%s\t", str);
321 	boot_instance_index += ret;
322 
323 	return 1;
324 }
325 __setup("trace_instance=", boot_instance);
326 
327 
328 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
329 
set_trace_boot_options(char * str)330 static int __init set_trace_boot_options(char *str)
331 {
332 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
333 	return 1;
334 }
335 __setup("trace_options=", set_trace_boot_options);
336 
337 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
338 static char *trace_boot_clock __initdata;
339 
set_trace_boot_clock(char * str)340 static int __init set_trace_boot_clock(char *str)
341 {
342 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
343 	trace_boot_clock = trace_boot_clock_buf;
344 	return 1;
345 }
346 __setup("trace_clock=", set_trace_boot_clock);
347 
set_tracepoint_printk(char * str)348 static int __init set_tracepoint_printk(char *str)
349 {
350 	/* Ignore the "tp_printk_stop_on_boot" param */
351 	if (*str == '_')
352 		return 0;
353 
354 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
355 		tracepoint_printk = 1;
356 	return 1;
357 }
358 __setup("tp_printk", set_tracepoint_printk);
359 
set_tracepoint_printk_stop(char * str)360 static int __init set_tracepoint_printk_stop(char *str)
361 {
362 	tracepoint_printk_stop_on_boot = true;
363 	return 1;
364 }
365 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
366 
set_traceoff_after_boot(char * str)367 static int __init set_traceoff_after_boot(char *str)
368 {
369 	traceoff_after_boot = true;
370 	return 1;
371 }
372 __setup("traceoff_after_boot", set_traceoff_after_boot);
373 
ns2usecs(u64 nsec)374 unsigned long long ns2usecs(u64 nsec)
375 {
376 	nsec += 500;
377 	do_div(nsec, 1000);
378 	return nsec;
379 }
380 
381 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)382 trace_process_export(struct trace_export *export,
383 	       struct ring_buffer_event *event, int flag)
384 {
385 	struct trace_entry *entry;
386 	unsigned int size = 0;
387 
388 	if (export->flags & flag) {
389 		entry = ring_buffer_event_data(event);
390 		size = ring_buffer_event_length(event);
391 		export->write(export, entry, size);
392 	}
393 }
394 
395 static DEFINE_MUTEX(ftrace_export_lock);
396 
397 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
398 
399 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
400 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
401 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
402 
ftrace_exports_enable(struct trace_export * export)403 static inline void ftrace_exports_enable(struct trace_export *export)
404 {
405 	if (export->flags & TRACE_EXPORT_FUNCTION)
406 		static_branch_inc(&trace_function_exports_enabled);
407 
408 	if (export->flags & TRACE_EXPORT_EVENT)
409 		static_branch_inc(&trace_event_exports_enabled);
410 
411 	if (export->flags & TRACE_EXPORT_MARKER)
412 		static_branch_inc(&trace_marker_exports_enabled);
413 }
414 
ftrace_exports_disable(struct trace_export * export)415 static inline void ftrace_exports_disable(struct trace_export *export)
416 {
417 	if (export->flags & TRACE_EXPORT_FUNCTION)
418 		static_branch_dec(&trace_function_exports_enabled);
419 
420 	if (export->flags & TRACE_EXPORT_EVENT)
421 		static_branch_dec(&trace_event_exports_enabled);
422 
423 	if (export->flags & TRACE_EXPORT_MARKER)
424 		static_branch_dec(&trace_marker_exports_enabled);
425 }
426 
ftrace_exports(struct ring_buffer_event * event,int flag)427 static void ftrace_exports(struct ring_buffer_event *event, int flag)
428 {
429 	struct trace_export *export;
430 
431 	guard(preempt_notrace)();
432 
433 	export = rcu_dereference_raw_check(ftrace_exports_list);
434 	while (export) {
435 		trace_process_export(export, event, flag);
436 		export = rcu_dereference_raw_check(export->next);
437 	}
438 }
439 
440 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)441 add_trace_export(struct trace_export **list, struct trace_export *export)
442 {
443 	rcu_assign_pointer(export->next, *list);
444 	/*
445 	 * We are entering export into the list but another
446 	 * CPU might be walking that list. We need to make sure
447 	 * the export->next pointer is valid before another CPU sees
448 	 * the export pointer included into the list.
449 	 */
450 	rcu_assign_pointer(*list, export);
451 }
452 
453 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)454 rm_trace_export(struct trace_export **list, struct trace_export *export)
455 {
456 	struct trace_export **p;
457 
458 	for (p = list; *p != NULL; p = &(*p)->next)
459 		if (*p == export)
460 			break;
461 
462 	if (*p != export)
463 		return -1;
464 
465 	rcu_assign_pointer(*p, (*p)->next);
466 
467 	return 0;
468 }
469 
470 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)471 add_ftrace_export(struct trace_export **list, struct trace_export *export)
472 {
473 	ftrace_exports_enable(export);
474 
475 	add_trace_export(list, export);
476 }
477 
478 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)479 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
480 {
481 	int ret;
482 
483 	ret = rm_trace_export(list, export);
484 	ftrace_exports_disable(export);
485 
486 	return ret;
487 }
488 
register_ftrace_export(struct trace_export * export)489 int register_ftrace_export(struct trace_export *export)
490 {
491 	if (WARN_ON_ONCE(!export->write))
492 		return -1;
493 
494 	guard(mutex)(&ftrace_export_lock);
495 
496 	add_ftrace_export(&ftrace_exports_list, export);
497 
498 	return 0;
499 }
500 EXPORT_SYMBOL_GPL(register_ftrace_export);
501 
unregister_ftrace_export(struct trace_export * export)502 int unregister_ftrace_export(struct trace_export *export)
503 {
504 	guard(mutex)(&ftrace_export_lock);
505 	return rm_ftrace_export(&ftrace_exports_list, export);
506 }
507 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
508 
509 /* trace_flags holds trace_options default values */
510 #define TRACE_DEFAULT_FLAGS						\
511 	(FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS |		\
512 	 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) |		\
513 	 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) |		\
514 	 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) |		\
515 	 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) |			\
516 	 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) |		\
517 	 TRACE_ITER(COPY_MARKER))
518 
519 /* trace_options that are only supported by global_trace */
520 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) |			\
521 	       TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) |	\
522 	       TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
523 
524 /* trace_flags that are default zero for instances */
525 #define ZEROED_TRACE_FLAGS \
526 	(TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
527 	 TRACE_ITER(COPY_MARKER))
528 
529 /*
530  * The global_trace is the descriptor that holds the top-level tracing
531  * buffers for the live tracing.
532  */
533 static struct trace_array global_trace = {
534 	.trace_flags = TRACE_DEFAULT_FLAGS,
535 };
536 
537 struct trace_array *printk_trace = &global_trace;
538 
539 /* List of trace_arrays interested in the top level trace_marker */
540 static LIST_HEAD(marker_copies);
541 
update_printk_trace(struct trace_array * tr)542 static void update_printk_trace(struct trace_array *tr)
543 {
544 	if (printk_trace == tr)
545 		return;
546 
547 	printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
548 	printk_trace = tr;
549 	tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
550 }
551 
552 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)553 static bool update_marker_trace(struct trace_array *tr, int enabled)
554 {
555 	lockdep_assert_held(&event_mutex);
556 
557 	if (enabled) {
558 		if (tr->trace_flags & TRACE_ITER(COPY_MARKER))
559 			return false;
560 
561 		list_add_rcu(&tr->marker_list, &marker_copies);
562 		tr->trace_flags |= TRACE_ITER(COPY_MARKER);
563 		return true;
564 	}
565 
566 	if (!(tr->trace_flags & TRACE_ITER(COPY_MARKER)))
567 		return false;
568 
569 	list_del_rcu(&tr->marker_list);
570 	tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
571 	return true;
572 }
573 
trace_set_ring_buffer_expanded(struct trace_array * tr)574 void trace_set_ring_buffer_expanded(struct trace_array *tr)
575 {
576 	if (!tr)
577 		tr = &global_trace;
578 	tr->ring_buffer_expanded = true;
579 }
580 
581 LIST_HEAD(ftrace_trace_arrays);
582 
trace_array_get(struct trace_array * this_tr)583 int trace_array_get(struct trace_array *this_tr)
584 {
585 	struct trace_array *tr;
586 
587 	guard(mutex)(&trace_types_lock);
588 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
589 		if (tr == this_tr) {
590 			tr->ref++;
591 			return 0;
592 		}
593 	}
594 
595 	return -ENODEV;
596 }
597 
__trace_array_put(struct trace_array * this_tr)598 static void __trace_array_put(struct trace_array *this_tr)
599 {
600 	WARN_ON(!this_tr->ref);
601 	this_tr->ref--;
602 }
603 
604 /**
605  * trace_array_put - Decrement the reference counter for this trace array.
606  * @this_tr : pointer to the trace array
607  *
608  * NOTE: Use this when we no longer need the trace array returned by
609  * trace_array_get_by_name(). This ensures the trace array can be later
610  * destroyed.
611  *
612  */
trace_array_put(struct trace_array * this_tr)613 void trace_array_put(struct trace_array *this_tr)
614 {
615 	if (!this_tr)
616 		return;
617 
618 	guard(mutex)(&trace_types_lock);
619 	__trace_array_put(this_tr);
620 }
621 EXPORT_SYMBOL_GPL(trace_array_put);
622 
tracing_check_open_get_tr(struct trace_array * tr)623 int tracing_check_open_get_tr(struct trace_array *tr)
624 {
625 	int ret;
626 
627 	ret = security_locked_down(LOCKDOWN_TRACEFS);
628 	if (ret)
629 		return ret;
630 
631 	if (tracing_disabled)
632 		return -ENODEV;
633 
634 	if (tr && trace_array_get(tr) < 0)
635 		return -ENODEV;
636 
637 	return 0;
638 }
639 
buffer_ftrace_now(struct array_buffer * buf,int cpu)640 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
641 {
642 	u64 ts;
643 
644 	/* Early boot up does not have a buffer yet */
645 	if (!buf->buffer)
646 		return trace_clock_local();
647 
648 	ts = ring_buffer_time_stamp(buf->buffer);
649 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
650 
651 	return ts;
652 }
653 
ftrace_now(int cpu)654 u64 ftrace_now(int cpu)
655 {
656 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
657 }
658 
659 /**
660  * tracing_is_enabled - Show if global_trace has been enabled
661  *
662  * Shows if the global trace has been enabled or not. It uses the
663  * mirror flag "buffer_disabled" to be used in fast paths such as for
664  * the irqsoff tracer. But it may be inaccurate due to races. If you
665  * need to know the accurate state, use tracing_is_on() which is a little
666  * slower, but accurate.
667  */
tracing_is_enabled(void)668 int tracing_is_enabled(void)
669 {
670 	/*
671 	 * For quick access (irqsoff uses this in fast path), just
672 	 * return the mirror variable of the state of the ring buffer.
673 	 * It's a little racy, but we don't really care.
674 	 */
675 	return !global_trace.buffer_disabled;
676 }
677 
678 /*
679  * trace_buf_size is the size in bytes that is allocated
680  * for a buffer. Note, the number of bytes is always rounded
681  * to page size.
682  *
683  * This number is purposely set to a low number of 16384.
684  * If the dump on oops happens, it will be much appreciated
685  * to not have to wait for all that output. Anyway this can be
686  * boot time and run time configurable.
687  */
688 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
689 
690 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
691 
692 /* trace_types holds a link list of available tracers. */
693 static struct tracer		*trace_types __read_mostly;
694 
695 /*
696  * trace_types_lock is used to protect the trace_types list.
697  */
698 DEFINE_MUTEX(trace_types_lock);
699 
700 /*
701  * serialize the access of the ring buffer
702  *
703  * ring buffer serializes readers, but it is low level protection.
704  * The validity of the events (which returns by ring_buffer_peek() ..etc)
705  * are not protected by ring buffer.
706  *
707  * The content of events may become garbage if we allow other process consumes
708  * these events concurrently:
709  *   A) the page of the consumed events may become a normal page
710  *      (not reader page) in ring buffer, and this page will be rewritten
711  *      by events producer.
712  *   B) The page of the consumed events may become a page for splice_read,
713  *      and this page will be returned to system.
714  *
715  * These primitives allow multi process access to different cpu ring buffer
716  * concurrently.
717  *
718  * These primitives don't distinguish read-only and read-consume access.
719  * Multi read-only access are also serialized.
720  */
721 
722 #ifdef CONFIG_SMP
723 static DECLARE_RWSEM(all_cpu_access_lock);
724 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
725 
trace_access_lock(int cpu)726 static inline void trace_access_lock(int cpu)
727 {
728 	if (cpu == RING_BUFFER_ALL_CPUS) {
729 		/* gain it for accessing the whole ring buffer. */
730 		down_write(&all_cpu_access_lock);
731 	} else {
732 		/* gain it for accessing a cpu ring buffer. */
733 
734 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
735 		down_read(&all_cpu_access_lock);
736 
737 		/* Secondly block other access to this @cpu ring buffer. */
738 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
739 	}
740 }
741 
trace_access_unlock(int cpu)742 static inline void trace_access_unlock(int cpu)
743 {
744 	if (cpu == RING_BUFFER_ALL_CPUS) {
745 		up_write(&all_cpu_access_lock);
746 	} else {
747 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
748 		up_read(&all_cpu_access_lock);
749 	}
750 }
751 
trace_access_lock_init(void)752 static inline void trace_access_lock_init(void)
753 {
754 	int cpu;
755 
756 	for_each_possible_cpu(cpu)
757 		mutex_init(&per_cpu(cpu_access_lock, cpu));
758 }
759 
760 #else
761 
762 static DEFINE_MUTEX(access_lock);
763 
trace_access_lock(int cpu)764 static inline void trace_access_lock(int cpu)
765 {
766 	(void)cpu;
767 	mutex_lock(&access_lock);
768 }
769 
trace_access_unlock(int cpu)770 static inline void trace_access_unlock(int cpu)
771 {
772 	(void)cpu;
773 	mutex_unlock(&access_lock);
774 }
775 
trace_access_lock_init(void)776 static inline void trace_access_lock_init(void)
777 {
778 }
779 
780 #endif
781 
tracer_tracing_on(struct trace_array * tr)782 void tracer_tracing_on(struct trace_array *tr)
783 {
784 	if (tr->array_buffer.buffer)
785 		ring_buffer_record_on(tr->array_buffer.buffer);
786 	/*
787 	 * This flag is looked at when buffers haven't been allocated
788 	 * yet, or by some tracers (like irqsoff), that just want to
789 	 * know if the ring buffer has been disabled, but it can handle
790 	 * races of where it gets disabled but we still do a record.
791 	 * As the check is in the fast path of the tracers, it is more
792 	 * important to be fast than accurate.
793 	 */
794 	tr->buffer_disabled = 0;
795 }
796 
797 /**
798  * tracing_on - enable tracing buffers
799  *
800  * This function enables tracing buffers that may have been
801  * disabled with tracing_off.
802  */
tracing_on(void)803 void tracing_on(void)
804 {
805 	tracer_tracing_on(&global_trace);
806 }
807 EXPORT_SYMBOL_GPL(tracing_on);
808 
809 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)810 static void tracing_snapshot_instance_cond(struct trace_array *tr,
811 					   void *cond_data)
812 {
813 	unsigned long flags;
814 
815 	if (in_nmi()) {
816 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
817 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
818 		return;
819 	}
820 
821 	if (!tr->allocated_snapshot) {
822 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
823 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
824 		tracer_tracing_off(tr);
825 		return;
826 	}
827 
828 	if (tr->mapped) {
829 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
830 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
831 		return;
832 	}
833 
834 	/* Note, snapshot can not be used when the tracer uses it */
835 	if (tracer_uses_snapshot(tr->current_trace)) {
836 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
837 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
838 		return;
839 	}
840 
841 	local_irq_save(flags);
842 	update_max_tr(tr, current, smp_processor_id(), cond_data);
843 	local_irq_restore(flags);
844 }
845 
tracing_snapshot_instance(struct trace_array * tr)846 void tracing_snapshot_instance(struct trace_array *tr)
847 {
848 	tracing_snapshot_instance_cond(tr, NULL);
849 }
850 
851 /**
852  * tracing_snapshot - take a snapshot of the current buffer.
853  *
854  * This causes a swap between the snapshot buffer and the current live
855  * tracing buffer. You can use this to take snapshots of the live
856  * trace when some condition is triggered, but continue to trace.
857  *
858  * Note, make sure to allocate the snapshot with either
859  * a tracing_snapshot_alloc(), or by doing it manually
860  * with: echo 1 > /sys/kernel/tracing/snapshot
861  *
862  * If the snapshot buffer is not allocated, it will stop tracing.
863  * Basically making a permanent snapshot.
864  */
tracing_snapshot(void)865 void tracing_snapshot(void)
866 {
867 	struct trace_array *tr = &global_trace;
868 
869 	tracing_snapshot_instance(tr);
870 }
871 EXPORT_SYMBOL_GPL(tracing_snapshot);
872 
873 /**
874  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
875  * @tr:		The tracing instance to snapshot
876  * @cond_data:	The data to be tested conditionally, and possibly saved
877  *
878  * This is the same as tracing_snapshot() except that the snapshot is
879  * conditional - the snapshot will only happen if the
880  * cond_snapshot.update() implementation receiving the cond_data
881  * returns true, which means that the trace array's cond_snapshot
882  * update() operation used the cond_data to determine whether the
883  * snapshot should be taken, and if it was, presumably saved it along
884  * with the snapshot.
885  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)886 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
887 {
888 	tracing_snapshot_instance_cond(tr, cond_data);
889 }
890 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
891 
892 /**
893  * tracing_cond_snapshot_data - get the user data associated with a snapshot
894  * @tr:		The tracing instance
895  *
896  * When the user enables a conditional snapshot using
897  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
898  * with the snapshot.  This accessor is used to retrieve it.
899  *
900  * Should not be called from cond_snapshot.update(), since it takes
901  * the tr->max_lock lock, which the code calling
902  * cond_snapshot.update() has already done.
903  *
904  * Returns the cond_data associated with the trace array's snapshot.
905  */
tracing_cond_snapshot_data(struct trace_array * tr)906 void *tracing_cond_snapshot_data(struct trace_array *tr)
907 {
908 	void *cond_data = NULL;
909 
910 	local_irq_disable();
911 	arch_spin_lock(&tr->max_lock);
912 
913 	if (tr->cond_snapshot)
914 		cond_data = tr->cond_snapshot->cond_data;
915 
916 	arch_spin_unlock(&tr->max_lock);
917 	local_irq_enable();
918 
919 	return cond_data;
920 }
921 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
922 
923 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
924 					struct array_buffer *size_buf, int cpu_id);
925 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
926 
tracing_alloc_snapshot_instance(struct trace_array * tr)927 int tracing_alloc_snapshot_instance(struct trace_array *tr)
928 {
929 	int order;
930 	int ret;
931 
932 	if (!tr->allocated_snapshot) {
933 
934 		/* Make the snapshot buffer have the same order as main buffer */
935 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
936 		ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
937 		if (ret < 0)
938 			return ret;
939 
940 		/* allocate spare buffer */
941 		ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
942 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
943 		if (ret < 0)
944 			return ret;
945 
946 		tr->allocated_snapshot = true;
947 	}
948 
949 	return 0;
950 }
951 
free_snapshot(struct trace_array * tr)952 static void free_snapshot(struct trace_array *tr)
953 {
954 	/*
955 	 * We don't free the ring buffer. instead, resize it because
956 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
957 	 * we want preserve it.
958 	 */
959 	ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0);
960 	ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
961 	set_buffer_entries(&tr->snapshot_buffer, 1);
962 	tracing_reset_online_cpus(&tr->snapshot_buffer);
963 	tr->allocated_snapshot = false;
964 }
965 
tracing_arm_snapshot_locked(struct trace_array * tr)966 static int tracing_arm_snapshot_locked(struct trace_array *tr)
967 {
968 	int ret;
969 
970 	lockdep_assert_held(&trace_types_lock);
971 
972 	spin_lock(&tr->snapshot_trigger_lock);
973 	if (tr->snapshot == UINT_MAX || tr->mapped) {
974 		spin_unlock(&tr->snapshot_trigger_lock);
975 		return -EBUSY;
976 	}
977 
978 	tr->snapshot++;
979 	spin_unlock(&tr->snapshot_trigger_lock);
980 
981 	ret = tracing_alloc_snapshot_instance(tr);
982 	if (ret) {
983 		spin_lock(&tr->snapshot_trigger_lock);
984 		tr->snapshot--;
985 		spin_unlock(&tr->snapshot_trigger_lock);
986 	}
987 
988 	return ret;
989 }
990 
tracing_arm_snapshot(struct trace_array * tr)991 int tracing_arm_snapshot(struct trace_array *tr)
992 {
993 	guard(mutex)(&trace_types_lock);
994 	return tracing_arm_snapshot_locked(tr);
995 }
996 
tracing_disarm_snapshot(struct trace_array * tr)997 void tracing_disarm_snapshot(struct trace_array *tr)
998 {
999 	spin_lock(&tr->snapshot_trigger_lock);
1000 	if (!WARN_ON(!tr->snapshot))
1001 		tr->snapshot--;
1002 	spin_unlock(&tr->snapshot_trigger_lock);
1003 }
1004 
1005 /**
1006  * tracing_alloc_snapshot - allocate snapshot buffer.
1007  *
1008  * This only allocates the snapshot buffer if it isn't already
1009  * allocated - it doesn't also take a snapshot.
1010  *
1011  * This is meant to be used in cases where the snapshot buffer needs
1012  * to be set up for events that can't sleep but need to be able to
1013  * trigger a snapshot.
1014  */
tracing_alloc_snapshot(void)1015 int tracing_alloc_snapshot(void)
1016 {
1017 	struct trace_array *tr = &global_trace;
1018 	int ret;
1019 
1020 	ret = tracing_alloc_snapshot_instance(tr);
1021 	WARN_ON(ret < 0);
1022 
1023 	return ret;
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1026 
1027 /**
1028  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1029  *
1030  * This is similar to tracing_snapshot(), but it will allocate the
1031  * snapshot buffer if it isn't already allocated. Use this only
1032  * where it is safe to sleep, as the allocation may sleep.
1033  *
1034  * This causes a swap between the snapshot buffer and the current live
1035  * tracing buffer. You can use this to take snapshots of the live
1036  * trace when some condition is triggered, but continue to trace.
1037  */
tracing_snapshot_alloc(void)1038 void tracing_snapshot_alloc(void)
1039 {
1040 	int ret;
1041 
1042 	ret = tracing_alloc_snapshot();
1043 	if (ret < 0)
1044 		return;
1045 
1046 	tracing_snapshot();
1047 }
1048 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1049 
1050 /**
1051  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1052  * @tr:		The tracing instance
1053  * @cond_data:	User data to associate with the snapshot
1054  * @update:	Implementation of the cond_snapshot update function
1055  *
1056  * Check whether the conditional snapshot for the given instance has
1057  * already been enabled, or if the current tracer is already using a
1058  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1059  * save the cond_data and update function inside.
1060  *
1061  * Returns 0 if successful, error otherwise.
1062  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1063 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1064 				 cond_update_fn_t update)
1065 {
1066 	struct cond_snapshot *cond_snapshot __free(kfree) =
1067 		kzalloc_obj(*cond_snapshot);
1068 	int ret;
1069 
1070 	if (!cond_snapshot)
1071 		return -ENOMEM;
1072 
1073 	cond_snapshot->cond_data = cond_data;
1074 	cond_snapshot->update = update;
1075 
1076 	guard(mutex)(&trace_types_lock);
1077 
1078 	if (tracer_uses_snapshot(tr->current_trace))
1079 		return -EBUSY;
1080 
1081 	/*
1082 	 * The cond_snapshot can only change to NULL without the
1083 	 * trace_types_lock. We don't care if we race with it going
1084 	 * to NULL, but we want to make sure that it's not set to
1085 	 * something other than NULL when we get here, which we can
1086 	 * do safely with only holding the trace_types_lock and not
1087 	 * having to take the max_lock.
1088 	 */
1089 	if (tr->cond_snapshot)
1090 		return -EBUSY;
1091 
1092 	ret = tracing_arm_snapshot_locked(tr);
1093 	if (ret)
1094 		return ret;
1095 
1096 	local_irq_disable();
1097 	arch_spin_lock(&tr->max_lock);
1098 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1099 	arch_spin_unlock(&tr->max_lock);
1100 	local_irq_enable();
1101 
1102 	return 0;
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1105 
1106 /**
1107  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1108  * @tr:		The tracing instance
1109  *
1110  * Check whether the conditional snapshot for the given instance is
1111  * enabled; if so, free the cond_snapshot associated with it,
1112  * otherwise return -EINVAL.
1113  *
1114  * Returns 0 if successful, error otherwise.
1115  */
tracing_snapshot_cond_disable(struct trace_array * tr)1116 int tracing_snapshot_cond_disable(struct trace_array *tr)
1117 {
1118 	int ret = 0;
1119 
1120 	local_irq_disable();
1121 	arch_spin_lock(&tr->max_lock);
1122 
1123 	if (!tr->cond_snapshot)
1124 		ret = -EINVAL;
1125 	else {
1126 		kfree(tr->cond_snapshot);
1127 		tr->cond_snapshot = NULL;
1128 	}
1129 
1130 	arch_spin_unlock(&tr->max_lock);
1131 	local_irq_enable();
1132 
1133 	tracing_disarm_snapshot(tr);
1134 
1135 	return ret;
1136 }
1137 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1138 #else
tracing_snapshot(void)1139 void tracing_snapshot(void)
1140 {
1141 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1142 }
1143 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1144 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1145 {
1146 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1147 }
1148 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1149 int tracing_alloc_snapshot(void)
1150 {
1151 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1152 	return -ENODEV;
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1155 void tracing_snapshot_alloc(void)
1156 {
1157 	/* Give warning */
1158 	tracing_snapshot();
1159 }
1160 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1161 void *tracing_cond_snapshot_data(struct trace_array *tr)
1162 {
1163 	return NULL;
1164 }
1165 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1166 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1167 {
1168 	return -ENODEV;
1169 }
1170 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1171 int tracing_snapshot_cond_disable(struct trace_array *tr)
1172 {
1173 	return false;
1174 }
1175 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1176 #define free_snapshot(tr)	do { } while (0)
1177 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1178 #endif /* CONFIG_TRACER_SNAPSHOT */
1179 
tracer_tracing_off(struct trace_array * tr)1180 void tracer_tracing_off(struct trace_array *tr)
1181 {
1182 	if (tr->array_buffer.buffer)
1183 		ring_buffer_record_off(tr->array_buffer.buffer);
1184 	/*
1185 	 * This flag is looked at when buffers haven't been allocated
1186 	 * yet, or by some tracers (like irqsoff), that just want to
1187 	 * know if the ring buffer has been disabled, but it can handle
1188 	 * races of where it gets disabled but we still do a record.
1189 	 * As the check is in the fast path of the tracers, it is more
1190 	 * important to be fast than accurate.
1191 	 */
1192 	tr->buffer_disabled = 1;
1193 }
1194 
1195 /**
1196  * tracer_tracing_disable() - temporary disable the buffer from write
1197  * @tr: The trace array to disable its buffer for
1198  *
1199  * Expects trace_tracing_enable() to re-enable tracing.
1200  * The difference between this and tracer_tracing_off() is that this
1201  * is a counter and can nest, whereas, tracer_tracing_off() can
1202  * be called multiple times and a single trace_tracing_on() will
1203  * enable it.
1204  */
tracer_tracing_disable(struct trace_array * tr)1205 void tracer_tracing_disable(struct trace_array *tr)
1206 {
1207 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1208 		return;
1209 
1210 	ring_buffer_record_disable(tr->array_buffer.buffer);
1211 }
1212 
1213 /**
1214  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1215  * @tr: The trace array that had tracer_tracincg_disable() called on it
1216  *
1217  * This is called after tracer_tracing_disable() has been called on @tr,
1218  * when it's safe to re-enable tracing.
1219  */
tracer_tracing_enable(struct trace_array * tr)1220 void tracer_tracing_enable(struct trace_array *tr)
1221 {
1222 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1223 		return;
1224 
1225 	ring_buffer_record_enable(tr->array_buffer.buffer);
1226 }
1227 
1228 /**
1229  * tracing_off - turn off tracing buffers
1230  *
1231  * This function stops the tracing buffers from recording data.
1232  * It does not disable any overhead the tracers themselves may
1233  * be causing. This function simply causes all recording to
1234  * the ring buffers to fail.
1235  */
tracing_off(void)1236 void tracing_off(void)
1237 {
1238 	tracer_tracing_off(&global_trace);
1239 }
1240 EXPORT_SYMBOL_GPL(tracing_off);
1241 
disable_trace_on_warning(void)1242 void disable_trace_on_warning(void)
1243 {
1244 	if (__disable_trace_on_warning) {
1245 		struct trace_array *tr = READ_ONCE(printk_trace);
1246 
1247 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1248 			"Disabling tracing due to warning\n");
1249 		tracing_off();
1250 
1251 		/* Disable trace_printk() buffer too */
1252 		if (tr != &global_trace) {
1253 			trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1254 					       "Disabling tracing due to warning\n");
1255 			tracer_tracing_off(tr);
1256 		}
1257 	}
1258 }
1259 
1260 /**
1261  * tracer_tracing_is_on - show real state of ring buffer enabled
1262  * @tr : the trace array to know if ring buffer is enabled
1263  *
1264  * Shows real state of the ring buffer if it is enabled or not.
1265  */
tracer_tracing_is_on(struct trace_array * tr)1266 bool tracer_tracing_is_on(struct trace_array *tr)
1267 {
1268 	if (tr->array_buffer.buffer)
1269 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1270 	return !tr->buffer_disabled;
1271 }
1272 
1273 /**
1274  * tracing_is_on - show state of ring buffers enabled
1275  */
tracing_is_on(void)1276 int tracing_is_on(void)
1277 {
1278 	return tracer_tracing_is_on(&global_trace);
1279 }
1280 EXPORT_SYMBOL_GPL(tracing_is_on);
1281 
set_buf_size(char * str)1282 static int __init set_buf_size(char *str)
1283 {
1284 	unsigned long buf_size;
1285 
1286 	if (!str)
1287 		return 0;
1288 	buf_size = memparse(str, &str);
1289 	/*
1290 	 * nr_entries can not be zero and the startup
1291 	 * tests require some buffer space. Therefore
1292 	 * ensure we have at least 4096 bytes of buffer.
1293 	 */
1294 	trace_buf_size = max(4096UL, buf_size);
1295 	return 1;
1296 }
1297 __setup("trace_buf_size=", set_buf_size);
1298 
set_tracing_thresh(char * str)1299 static int __init set_tracing_thresh(char *str)
1300 {
1301 	unsigned long threshold;
1302 	int ret;
1303 
1304 	if (!str)
1305 		return 0;
1306 	ret = kstrtoul(str, 0, &threshold);
1307 	if (ret < 0)
1308 		return 0;
1309 	tracing_thresh = threshold * 1000;
1310 	return 1;
1311 }
1312 __setup("tracing_thresh=", set_tracing_thresh);
1313 
nsecs_to_usecs(unsigned long nsecs)1314 unsigned long nsecs_to_usecs(unsigned long nsecs)
1315 {
1316 	return nsecs / 1000;
1317 }
1318 
1319 /*
1320  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1321  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1322  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1323  * of strings in the order that the evals (enum) were defined.
1324  */
1325 #undef C
1326 #define C(a, b) b
1327 
1328 /* These must match the bit positions in trace_iterator_flags */
1329 static const char *trace_options[] = {
1330 	TRACE_FLAGS
1331 	NULL
1332 };
1333 
1334 static struct {
1335 	u64 (*func)(void);
1336 	const char *name;
1337 	int in_ns;		/* is this clock in nanoseconds? */
1338 } trace_clocks[] = {
1339 	{ trace_clock_local,		"local",	1 },
1340 	{ trace_clock_global,		"global",	1 },
1341 	{ trace_clock_counter,		"counter",	0 },
1342 	{ trace_clock_jiffies,		"uptime",	0 },
1343 	{ trace_clock,			"perf",		1 },
1344 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1345 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1346 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1347 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1348 	ARCH_TRACE_CLOCKS
1349 };
1350 
trace_clock_in_ns(struct trace_array * tr)1351 bool trace_clock_in_ns(struct trace_array *tr)
1352 {
1353 	if (trace_clocks[tr->clock_id].in_ns)
1354 		return true;
1355 
1356 	return false;
1357 }
1358 
1359 /*
1360  * trace_parser_get_init - gets the buffer for trace parser
1361  */
trace_parser_get_init(struct trace_parser * parser,int size)1362 int trace_parser_get_init(struct trace_parser *parser, int size)
1363 {
1364 	memset(parser, 0, sizeof(*parser));
1365 
1366 	parser->buffer = kmalloc(size, GFP_KERNEL);
1367 	if (!parser->buffer)
1368 		return 1;
1369 
1370 	parser->size = size;
1371 	return 0;
1372 }
1373 
1374 /*
1375  * trace_parser_put - frees the buffer for trace parser
1376  */
trace_parser_put(struct trace_parser * parser)1377 void trace_parser_put(struct trace_parser *parser)
1378 {
1379 	kfree(parser->buffer);
1380 	parser->buffer = NULL;
1381 }
1382 
1383 /*
1384  * trace_get_user - reads the user input string separated by  space
1385  * (matched by isspace(ch))
1386  *
1387  * For each string found the 'struct trace_parser' is updated,
1388  * and the function returns.
1389  *
1390  * Returns number of bytes read.
1391  *
1392  * See kernel/trace/trace.h for 'struct trace_parser' details.
1393  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1394 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1395 	size_t cnt, loff_t *ppos)
1396 {
1397 	char ch;
1398 	size_t read = 0;
1399 	ssize_t ret;
1400 
1401 	if (!*ppos)
1402 		trace_parser_clear(parser);
1403 
1404 	ret = get_user(ch, ubuf++);
1405 	if (ret)
1406 		goto fail;
1407 
1408 	read++;
1409 	cnt--;
1410 
1411 	/*
1412 	 * The parser is not finished with the last write,
1413 	 * continue reading the user input without skipping spaces.
1414 	 */
1415 	if (!parser->cont) {
1416 		/* skip white space */
1417 		while (cnt && isspace(ch)) {
1418 			ret = get_user(ch, ubuf++);
1419 			if (ret)
1420 				goto fail;
1421 			read++;
1422 			cnt--;
1423 		}
1424 
1425 		parser->idx = 0;
1426 
1427 		/* only spaces were written */
1428 		if (isspace(ch) || !ch) {
1429 			*ppos += read;
1430 			return read;
1431 		}
1432 	}
1433 
1434 	/* read the non-space input */
1435 	while (cnt && !isspace(ch) && ch) {
1436 		if (parser->idx < parser->size - 1)
1437 			parser->buffer[parser->idx++] = ch;
1438 		else {
1439 			ret = -EINVAL;
1440 			goto fail;
1441 		}
1442 
1443 		ret = get_user(ch, ubuf++);
1444 		if (ret)
1445 			goto fail;
1446 		read++;
1447 		cnt--;
1448 	}
1449 
1450 	/* We either got finished input or we have to wait for another call. */
1451 	if (isspace(ch) || !ch) {
1452 		parser->buffer[parser->idx] = 0;
1453 		parser->cont = false;
1454 	} else if (parser->idx < parser->size - 1) {
1455 		parser->cont = true;
1456 		parser->buffer[parser->idx++] = ch;
1457 		/* Make sure the parsed string always terminates with '\0'. */
1458 		parser->buffer[parser->idx] = 0;
1459 	} else {
1460 		ret = -EINVAL;
1461 		goto fail;
1462 	}
1463 
1464 	*ppos += read;
1465 	return read;
1466 fail:
1467 	trace_parser_fail(parser);
1468 	return ret;
1469 }
1470 
1471 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1472 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1473 {
1474 	int len;
1475 
1476 	if (trace_seq_used(s) <= s->readpos)
1477 		return -EBUSY;
1478 
1479 	len = trace_seq_used(s) - s->readpos;
1480 	if (cnt > len)
1481 		cnt = len;
1482 	memcpy(buf, s->buffer + s->readpos, cnt);
1483 
1484 	s->readpos += cnt;
1485 	return cnt;
1486 }
1487 
1488 unsigned long __read_mostly	tracing_thresh;
1489 
1490 #ifdef CONFIG_TRACER_MAX_TRACE
1491 #ifdef LATENCY_FS_NOTIFY
1492 static struct workqueue_struct *fsnotify_wq;
1493 
latency_fsnotify_workfn(struct work_struct * work)1494 static void latency_fsnotify_workfn(struct work_struct *work)
1495 {
1496 	struct trace_array *tr = container_of(work, struct trace_array,
1497 					      fsnotify_work);
1498 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1499 }
1500 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1501 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1502 {
1503 	struct trace_array *tr = container_of(iwork, struct trace_array,
1504 					      fsnotify_irqwork);
1505 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1506 }
1507 
latency_fsnotify_init(void)1508 __init static int latency_fsnotify_init(void)
1509 {
1510 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1511 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1512 	if (!fsnotify_wq) {
1513 		pr_err("Unable to allocate tr_max_lat_wq\n");
1514 		return -ENOMEM;
1515 	}
1516 	return 0;
1517 }
1518 
1519 late_initcall_sync(latency_fsnotify_init);
1520 
latency_fsnotify(struct trace_array * tr)1521 void latency_fsnotify(struct trace_array *tr)
1522 {
1523 	if (!fsnotify_wq)
1524 		return;
1525 	/*
1526 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1527 	 * possible that we are called from __schedule() or do_idle(), which
1528 	 * could cause a deadlock.
1529 	 */
1530 	irq_work_queue(&tr->fsnotify_irqwork);
1531 }
1532 #endif /* !LATENCY_FS_NOTIFY */
1533 
1534 static const struct file_operations tracing_max_lat_fops;
1535 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1536 static void trace_create_maxlat_file(struct trace_array *tr,
1537 				     struct dentry *d_tracer)
1538 {
1539 #ifdef LATENCY_FS_NOTIFY
1540 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1541 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1542 #endif
1543 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1544 					      TRACE_MODE_WRITE,
1545 					      d_tracer, tr,
1546 					      &tracing_max_lat_fops);
1547 }
1548 
1549 /*
1550  * Copy the new maximum trace into the separate maximum-trace
1551  * structure. (this way the maximum trace is permanently saved,
1552  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1553  */
1554 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1555 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1556 {
1557 	struct array_buffer *trace_buf = &tr->array_buffer;
1558 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1559 	struct array_buffer *max_buf = &tr->snapshot_buffer;
1560 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1561 
1562 	max_buf->cpu = cpu;
1563 	max_buf->time_start = data->preempt_timestamp;
1564 
1565 	max_data->saved_latency = tr->max_latency;
1566 	max_data->critical_start = data->critical_start;
1567 	max_data->critical_end = data->critical_end;
1568 
1569 	strscpy(max_data->comm, tsk->comm);
1570 	max_data->pid = tsk->pid;
1571 	/*
1572 	 * If tsk == current, then use current_uid(), as that does not use
1573 	 * RCU. The irq tracer can be called out of RCU scope.
1574 	 */
1575 	if (tsk == current)
1576 		max_data->uid = current_uid();
1577 	else
1578 		max_data->uid = task_uid(tsk);
1579 
1580 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1581 	max_data->policy = tsk->policy;
1582 	max_data->rt_priority = tsk->rt_priority;
1583 
1584 	/* record this tasks comm */
1585 	tracing_record_cmdline(tsk);
1586 	latency_fsnotify(tr);
1587 }
1588 #else
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1589 static inline void trace_create_maxlat_file(struct trace_array *tr,
1590 					    struct dentry *d_tracer) { }
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1591 static inline void __update_max_tr(struct trace_array *tr,
1592 				   struct task_struct *tsk, int cpu) { }
1593 #endif /* CONFIG_TRACER_MAX_TRACE */
1594 
1595 #ifdef CONFIG_TRACER_SNAPSHOT
1596 /**
1597  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1598  * @tr: tracer
1599  * @tsk: the task with the latency
1600  * @cpu: The cpu that initiated the trace.
1601  * @cond_data: User data associated with a conditional snapshot
1602  *
1603  * Flip the buffers between the @tr and the max_tr and record information
1604  * about which task was the cause of this latency.
1605  */
1606 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1607 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1608 	      void *cond_data)
1609 {
1610 	if (tr->stop_count)
1611 		return;
1612 
1613 	WARN_ON_ONCE(!irqs_disabled());
1614 
1615 	if (!tr->allocated_snapshot) {
1616 		/* Only the nop tracer should hit this when disabling */
1617 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1618 		return;
1619 	}
1620 
1621 	arch_spin_lock(&tr->max_lock);
1622 
1623 	/* Inherit the recordable setting from array_buffer */
1624 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1625 		ring_buffer_record_on(tr->snapshot_buffer.buffer);
1626 	else
1627 		ring_buffer_record_off(tr->snapshot_buffer.buffer);
1628 
1629 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1630 		arch_spin_unlock(&tr->max_lock);
1631 		return;
1632 	}
1633 
1634 	swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer);
1635 
1636 	__update_max_tr(tr, tsk, cpu);
1637 
1638 	arch_spin_unlock(&tr->max_lock);
1639 
1640 	/* Any waiters on the old snapshot buffer need to wake up */
1641 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1642 }
1643 
1644 /**
1645  * update_max_tr_single - only copy one trace over, and reset the rest
1646  * @tr: tracer
1647  * @tsk: task with the latency
1648  * @cpu: the cpu of the buffer to copy.
1649  *
1650  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1651  */
1652 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1653 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1654 {
1655 	int ret;
1656 
1657 	if (tr->stop_count)
1658 		return;
1659 
1660 	WARN_ON_ONCE(!irqs_disabled());
1661 	if (!tr->allocated_snapshot) {
1662 		/* Only the nop tracer should hit this when disabling */
1663 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1664 		return;
1665 	}
1666 
1667 	arch_spin_lock(&tr->max_lock);
1668 
1669 	ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu);
1670 
1671 	if (ret == -EBUSY) {
1672 		/*
1673 		 * We failed to swap the buffer due to a commit taking
1674 		 * place on this CPU. We fail to record, but we reset
1675 		 * the max trace buffer (no one writes directly to it)
1676 		 * and flag that it failed.
1677 		 * Another reason is resize is in progress.
1678 		 */
1679 		trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_,
1680 			"Failed to swap buffers due to commit or resize in progress\n");
1681 	}
1682 
1683 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1684 
1685 	__update_max_tr(tr, tsk, cpu);
1686 	arch_spin_unlock(&tr->max_lock);
1687 }
1688 #endif /* CONFIG_TRACER_SNAPSHOT */
1689 
1690 struct pipe_wait {
1691 	struct trace_iterator		*iter;
1692 	int				wait_index;
1693 };
1694 
wait_pipe_cond(void * data)1695 static bool wait_pipe_cond(void *data)
1696 {
1697 	struct pipe_wait *pwait = data;
1698 	struct trace_iterator *iter = pwait->iter;
1699 
1700 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
1701 		return true;
1702 
1703 	return iter->closed;
1704 }
1705 
wait_on_pipe(struct trace_iterator * iter,int full)1706 static int wait_on_pipe(struct trace_iterator *iter, int full)
1707 {
1708 	struct pipe_wait pwait;
1709 	int ret;
1710 
1711 	/* Iterators are static, they should be filled or empty */
1712 	if (trace_buffer_iter(iter, iter->cpu_file))
1713 		return 0;
1714 
1715 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
1716 	pwait.iter = iter;
1717 
1718 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
1719 			       wait_pipe_cond, &pwait);
1720 
1721 #ifdef CONFIG_TRACER_SNAPSHOT
1722 	/*
1723 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1724 	 * to happen, this would now be the main buffer.
1725 	 */
1726 	if (iter->snapshot)
1727 		iter->array_buffer = &iter->tr->snapshot_buffer;
1728 #endif
1729 	return ret;
1730 }
1731 
1732 #ifdef CONFIG_FTRACE_STARTUP_TEST
1733 static bool selftests_can_run;
1734 
1735 struct trace_selftests {
1736 	struct list_head		list;
1737 	struct tracer			*type;
1738 };
1739 
1740 static LIST_HEAD(postponed_selftests);
1741 
save_selftest(struct tracer * type)1742 static int save_selftest(struct tracer *type)
1743 {
1744 	struct trace_selftests *selftest;
1745 
1746 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1747 	if (!selftest)
1748 		return -ENOMEM;
1749 
1750 	selftest->type = type;
1751 	list_add(&selftest->list, &postponed_selftests);
1752 	return 0;
1753 }
1754 
run_tracer_selftest(struct tracer * type)1755 static int run_tracer_selftest(struct tracer *type)
1756 {
1757 	struct trace_array *tr = &global_trace;
1758 	struct tracer_flags *saved_flags = tr->current_trace_flags;
1759 	struct tracer *saved_tracer = tr->current_trace;
1760 	int ret;
1761 
1762 	if (!type->selftest || tracing_selftest_disabled)
1763 		return 0;
1764 
1765 	/*
1766 	 * If a tracer registers early in boot up (before scheduling is
1767 	 * initialized and such), then do not run its selftests yet.
1768 	 * Instead, run it a little later in the boot process.
1769 	 */
1770 	if (!selftests_can_run)
1771 		return save_selftest(type);
1772 
1773 	if (!tracing_is_on()) {
1774 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1775 			type->name);
1776 		return 0;
1777 	}
1778 
1779 	/*
1780 	 * Run a selftest on this tracer.
1781 	 * Here we reset the trace buffer, and set the current
1782 	 * tracer to be this tracer. The tracer can then run some
1783 	 * internal tracing to verify that everything is in order.
1784 	 * If we fail, we do not register this tracer.
1785 	 */
1786 	tracing_reset_online_cpus(&tr->array_buffer);
1787 
1788 	tr->current_trace = type;
1789 	tr->current_trace_flags = type->flags ? : type->default_flags;
1790 
1791 #ifdef CONFIG_TRACER_MAX_TRACE
1792 	if (tracer_uses_snapshot(type)) {
1793 		/* If we expanded the buffers, make sure the max is expanded too */
1794 		if (tr->ring_buffer_expanded)
1795 			ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size,
1796 					   RING_BUFFER_ALL_CPUS);
1797 		tr->allocated_snapshot = true;
1798 	}
1799 #endif
1800 
1801 	/* the test is responsible for initializing and enabling */
1802 	pr_info("Testing tracer %s: ", type->name);
1803 	ret = type->selftest(type, tr);
1804 	/* the test is responsible for resetting too */
1805 	tr->current_trace = saved_tracer;
1806 	tr->current_trace_flags = saved_flags;
1807 	if (ret) {
1808 		printk(KERN_CONT "FAILED!\n");
1809 		/* Add the warning after printing 'FAILED' */
1810 		WARN_ON(1);
1811 		return -1;
1812 	}
1813 	/* Only reset on passing, to avoid touching corrupted buffers */
1814 	tracing_reset_online_cpus(&tr->array_buffer);
1815 
1816 #ifdef CONFIG_TRACER_MAX_TRACE
1817 	if (tracer_uses_snapshot(type)) {
1818 		tr->allocated_snapshot = false;
1819 
1820 		/* Shrink the max buffer again */
1821 		if (tr->ring_buffer_expanded)
1822 			ring_buffer_resize(tr->snapshot_buffer.buffer, 1,
1823 					   RING_BUFFER_ALL_CPUS);
1824 	}
1825 #endif
1826 
1827 	printk(KERN_CONT "PASSED\n");
1828 	return 0;
1829 }
1830 
do_run_tracer_selftest(struct tracer * type)1831 static int do_run_tracer_selftest(struct tracer *type)
1832 {
1833 	int ret;
1834 
1835 	/*
1836 	 * Tests can take a long time, especially if they are run one after the
1837 	 * other, as does happen during bootup when all the tracers are
1838 	 * registered. This could cause the soft lockup watchdog to trigger.
1839 	 */
1840 	cond_resched();
1841 
1842 	tracing_selftest_running = true;
1843 	ret = run_tracer_selftest(type);
1844 	tracing_selftest_running = false;
1845 
1846 	return ret;
1847 }
1848 
init_trace_selftests(void)1849 static __init int init_trace_selftests(void)
1850 {
1851 	struct trace_selftests *p, *n;
1852 	struct tracer *t, **last;
1853 	int ret;
1854 
1855 	selftests_can_run = true;
1856 
1857 	guard(mutex)(&trace_types_lock);
1858 
1859 	if (list_empty(&postponed_selftests))
1860 		return 0;
1861 
1862 	pr_info("Running postponed tracer tests:\n");
1863 
1864 	tracing_selftest_running = true;
1865 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1866 		/* This loop can take minutes when sanitizers are enabled, so
1867 		 * lets make sure we allow RCU processing.
1868 		 */
1869 		cond_resched();
1870 		ret = run_tracer_selftest(p->type);
1871 		/* If the test fails, then warn and remove from available_tracers */
1872 		if (ret < 0) {
1873 			WARN(1, "tracer: %s failed selftest, disabling\n",
1874 			     p->type->name);
1875 			last = &trace_types;
1876 			for (t = trace_types; t; t = t->next) {
1877 				if (t == p->type) {
1878 					*last = t->next;
1879 					break;
1880 				}
1881 				last = &t->next;
1882 			}
1883 		}
1884 		list_del(&p->list);
1885 		kfree(p);
1886 	}
1887 	tracing_selftest_running = false;
1888 
1889 	return 0;
1890 }
1891 core_initcall(init_trace_selftests);
1892 #else
do_run_tracer_selftest(struct tracer * type)1893 static inline int do_run_tracer_selftest(struct tracer *type)
1894 {
1895 	return 0;
1896 }
1897 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1898 
1899 static int add_tracer(struct trace_array *tr, struct tracer *t);
1900 
1901 static void __init apply_trace_boot_options(void);
1902 
free_tracers(struct trace_array * tr)1903 static void free_tracers(struct trace_array *tr)
1904 {
1905 	struct tracers *t, *n;
1906 
1907 	lockdep_assert_held(&trace_types_lock);
1908 
1909 	list_for_each_entry_safe(t, n, &tr->tracers, list) {
1910 		list_del(&t->list);
1911 		kfree(t->flags);
1912 		kfree(t);
1913 	}
1914 }
1915 
1916 /**
1917  * register_tracer - register a tracer with the ftrace system.
1918  * @type: the plugin for the tracer
1919  *
1920  * Register a new plugin tracer.
1921  */
register_tracer(struct tracer * type)1922 int __init register_tracer(struct tracer *type)
1923 {
1924 	struct trace_array *tr;
1925 	struct tracer *t;
1926 	int ret = 0;
1927 
1928 	if (!type->name) {
1929 		pr_info("Tracer must have a name\n");
1930 		return -1;
1931 	}
1932 
1933 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1934 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1935 		return -1;
1936 	}
1937 
1938 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1939 		pr_warn("Can not register tracer %s due to lockdown\n",
1940 			   type->name);
1941 		return -EPERM;
1942 	}
1943 
1944 	mutex_lock(&trace_types_lock);
1945 
1946 	for (t = trace_types; t; t = t->next) {
1947 		if (strcmp(type->name, t->name) == 0) {
1948 			/* already found */
1949 			pr_info("Tracer %s already registered\n",
1950 				type->name);
1951 			ret = -1;
1952 			goto out;
1953 		}
1954 	}
1955 
1956 	/* store the tracer for __set_tracer_option */
1957 	if (type->flags)
1958 		type->flags->trace = type;
1959 
1960 	ret = do_run_tracer_selftest(type);
1961 	if (ret < 0)
1962 		goto out;
1963 
1964 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1965 		ret = add_tracer(tr, type);
1966 		if (ret < 0) {
1967 			/* The tracer will still exist but without options */
1968 			pr_warn("Failed to create tracer options for %s\n", type->name);
1969 			break;
1970 		}
1971 	}
1972 
1973 	type->next = trace_types;
1974 	trace_types = type;
1975 
1976  out:
1977 	mutex_unlock(&trace_types_lock);
1978 
1979 	if (ret || !default_bootup_tracer)
1980 		return ret;
1981 
1982 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1983 		return 0;
1984 
1985 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1986 	/* Do we want this tracer to start on bootup? */
1987 	WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
1988 	default_bootup_tracer = NULL;
1989 
1990 	apply_trace_boot_options();
1991 
1992 	/* disable other selftests, since this will break it. */
1993 	disable_tracing_selftest("running a tracer");
1994 
1995 	return 0;
1996 }
1997 
tracing_reset_cpu(struct array_buffer * buf,int cpu)1998 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1999 {
2000 	struct trace_buffer *buffer = buf->buffer;
2001 
2002 	if (!buffer)
2003 		return;
2004 
2005 	ring_buffer_record_disable(buffer);
2006 
2007 	/* Make sure all commits have finished */
2008 	synchronize_rcu();
2009 	ring_buffer_reset_cpu(buffer, cpu);
2010 
2011 	ring_buffer_record_enable(buffer);
2012 }
2013 
tracing_reset_online_cpus(struct array_buffer * buf)2014 void tracing_reset_online_cpus(struct array_buffer *buf)
2015 {
2016 	struct trace_buffer *buffer = buf->buffer;
2017 
2018 	if (!buffer)
2019 		return;
2020 
2021 	ring_buffer_record_disable(buffer);
2022 
2023 	/* Make sure all commits have finished */
2024 	synchronize_rcu();
2025 
2026 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2027 
2028 	ring_buffer_reset_online_cpus(buffer);
2029 
2030 	ring_buffer_record_enable(buffer);
2031 }
2032 
tracing_reset_all_cpus(struct array_buffer * buf)2033 static void tracing_reset_all_cpus(struct array_buffer *buf)
2034 {
2035 	struct trace_buffer *buffer = buf->buffer;
2036 
2037 	if (!buffer)
2038 		return;
2039 
2040 	ring_buffer_record_disable(buffer);
2041 
2042 	/* Make sure all commits have finished */
2043 	synchronize_rcu();
2044 
2045 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2046 
2047 	ring_buffer_reset(buffer);
2048 
2049 	ring_buffer_record_enable(buffer);
2050 }
2051 
2052 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2053 void tracing_reset_all_online_cpus_unlocked(void)
2054 {
2055 	struct trace_array *tr;
2056 
2057 	lockdep_assert_held(&trace_types_lock);
2058 
2059 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2060 		if (!tr->clear_trace)
2061 			continue;
2062 		tr->clear_trace = false;
2063 		tracing_reset_online_cpus(&tr->array_buffer);
2064 #ifdef CONFIG_TRACER_SNAPSHOT
2065 		tracing_reset_online_cpus(&tr->snapshot_buffer);
2066 #endif
2067 	}
2068 }
2069 
tracing_reset_all_online_cpus(void)2070 void tracing_reset_all_online_cpus(void)
2071 {
2072 	guard(mutex)(&trace_types_lock);
2073 	tracing_reset_all_online_cpus_unlocked();
2074 }
2075 
is_tracing_stopped(void)2076 int is_tracing_stopped(void)
2077 {
2078 	return global_trace.stop_count;
2079 }
2080 
tracing_start_tr(struct trace_array * tr)2081 static void tracing_start_tr(struct trace_array *tr)
2082 {
2083 	struct trace_buffer *buffer;
2084 
2085 	if (tracing_disabled)
2086 		return;
2087 
2088 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2089 	if (--tr->stop_count) {
2090 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2091 			/* Someone screwed up their debugging */
2092 			tr->stop_count = 0;
2093 		}
2094 		return;
2095 	}
2096 
2097 	/* Prevent the buffers from switching */
2098 	arch_spin_lock(&tr->max_lock);
2099 
2100 	buffer = tr->array_buffer.buffer;
2101 	if (buffer)
2102 		ring_buffer_record_enable(buffer);
2103 
2104 #ifdef CONFIG_TRACER_SNAPSHOT
2105 	buffer = tr->snapshot_buffer.buffer;
2106 	if (buffer)
2107 		ring_buffer_record_enable(buffer);
2108 #endif
2109 
2110 	arch_spin_unlock(&tr->max_lock);
2111 }
2112 
2113 /**
2114  * tracing_start - quick start of the tracer
2115  *
2116  * If tracing is enabled but was stopped by tracing_stop,
2117  * this will start the tracer back up.
2118  */
tracing_start(void)2119 void tracing_start(void)
2120 
2121 {
2122 	return tracing_start_tr(&global_trace);
2123 }
2124 
tracing_stop_tr(struct trace_array * tr)2125 static void tracing_stop_tr(struct trace_array *tr)
2126 {
2127 	struct trace_buffer *buffer;
2128 
2129 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2130 	if (tr->stop_count++)
2131 		return;
2132 
2133 	/* Prevent the buffers from switching */
2134 	arch_spin_lock(&tr->max_lock);
2135 
2136 	buffer = tr->array_buffer.buffer;
2137 	if (buffer)
2138 		ring_buffer_record_disable(buffer);
2139 
2140 #ifdef CONFIG_TRACER_SNAPSHOT
2141 	buffer = tr->snapshot_buffer.buffer;
2142 	if (buffer)
2143 		ring_buffer_record_disable(buffer);
2144 #endif
2145 
2146 	arch_spin_unlock(&tr->max_lock);
2147 }
2148 
2149 /**
2150  * tracing_stop - quick stop of the tracer
2151  *
2152  * Light weight way to stop tracing. Use in conjunction with
2153  * tracing_start.
2154  */
tracing_stop(void)2155 void tracing_stop(void)
2156 {
2157 	return tracing_stop_tr(&global_trace);
2158 }
2159 
2160 /*
2161  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2162  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2163  * simplifies those functions and keeps them in sync.
2164  */
trace_handle_return(struct trace_seq * s)2165 enum print_line_t trace_handle_return(struct trace_seq *s)
2166 {
2167 	return trace_seq_has_overflowed(s) ?
2168 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2169 }
2170 EXPORT_SYMBOL_GPL(trace_handle_return);
2171 
migration_disable_value(void)2172 static unsigned short migration_disable_value(void)
2173 {
2174 #if defined(CONFIG_SMP)
2175 	return current->migration_disabled;
2176 #else
2177 	return 0;
2178 #endif
2179 }
2180 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2181 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2182 {
2183 	unsigned int trace_flags = irqs_status;
2184 	unsigned int pc;
2185 
2186 	pc = preempt_count();
2187 
2188 	if (pc & NMI_MASK)
2189 		trace_flags |= TRACE_FLAG_NMI;
2190 	if (pc & HARDIRQ_MASK)
2191 		trace_flags |= TRACE_FLAG_HARDIRQ;
2192 	if (in_serving_softirq())
2193 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2194 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2195 		trace_flags |= TRACE_FLAG_BH_OFF;
2196 
2197 	if (tif_need_resched())
2198 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2199 	if (test_preempt_need_resched())
2200 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2201 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2202 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2203 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2204 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2205 }
2206 
2207 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2208 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2209 			  int type,
2210 			  unsigned long len,
2211 			  unsigned int trace_ctx)
2212 {
2213 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2214 }
2215 
2216 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2217 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2218 static int trace_buffered_event_ref;
2219 
2220 /**
2221  * trace_buffered_event_enable - enable buffering events
2222  *
2223  * When events are being filtered, it is quicker to use a temporary
2224  * buffer to write the event data into if there's a likely chance
2225  * that it will not be committed. The discard of the ring buffer
2226  * is not as fast as committing, and is much slower than copying
2227  * a commit.
2228  *
2229  * When an event is to be filtered, allocate per cpu buffers to
2230  * write the event data into, and if the event is filtered and discarded
2231  * it is simply dropped, otherwise, the entire data is to be committed
2232  * in one shot.
2233  */
trace_buffered_event_enable(void)2234 void trace_buffered_event_enable(void)
2235 {
2236 	struct ring_buffer_event *event;
2237 	struct page *page;
2238 	int cpu;
2239 
2240 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2241 
2242 	if (trace_buffered_event_ref++)
2243 		return;
2244 
2245 	for_each_tracing_cpu(cpu) {
2246 		page = alloc_pages_node(cpu_to_node(cpu),
2247 					GFP_KERNEL | __GFP_NORETRY, 0);
2248 		/* This is just an optimization and can handle failures */
2249 		if (!page) {
2250 			pr_err("Failed to allocate event buffer\n");
2251 			break;
2252 		}
2253 
2254 		event = page_address(page);
2255 		memset(event, 0, sizeof(*event));
2256 
2257 		per_cpu(trace_buffered_event, cpu) = event;
2258 
2259 		scoped_guard(preempt,) {
2260 			if (cpu == smp_processor_id() &&
2261 			    __this_cpu_read(trace_buffered_event) !=
2262 			    per_cpu(trace_buffered_event, cpu))
2263 				WARN_ON_ONCE(1);
2264 		}
2265 	}
2266 }
2267 
enable_trace_buffered_event(void * data)2268 static void enable_trace_buffered_event(void *data)
2269 {
2270 	this_cpu_dec(trace_buffered_event_cnt);
2271 }
2272 
disable_trace_buffered_event(void * data)2273 static void disable_trace_buffered_event(void *data)
2274 {
2275 	this_cpu_inc(trace_buffered_event_cnt);
2276 }
2277 
2278 /**
2279  * trace_buffered_event_disable - disable buffering events
2280  *
2281  * When a filter is removed, it is faster to not use the buffered
2282  * events, and to commit directly into the ring buffer. Free up
2283  * the temp buffers when there are no more users. This requires
2284  * special synchronization with current events.
2285  */
trace_buffered_event_disable(void)2286 void trace_buffered_event_disable(void)
2287 {
2288 	int cpu;
2289 
2290 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2291 
2292 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2293 		return;
2294 
2295 	if (--trace_buffered_event_ref)
2296 		return;
2297 
2298 	/* For each CPU, set the buffer as used. */
2299 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2300 			 NULL, true);
2301 
2302 	/* Wait for all current users to finish */
2303 	synchronize_rcu();
2304 
2305 	for_each_tracing_cpu(cpu) {
2306 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2307 		per_cpu(trace_buffered_event, cpu) = NULL;
2308 	}
2309 
2310 	/*
2311 	 * Wait for all CPUs that potentially started checking if they can use
2312 	 * their event buffer only after the previous synchronize_rcu() call and
2313 	 * they still read a valid pointer from trace_buffered_event. It must be
2314 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2315 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2316 	 */
2317 	synchronize_rcu();
2318 
2319 	/* For each CPU, relinquish the buffer */
2320 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2321 			 true);
2322 }
2323 
2324 static struct trace_buffer *temp_buffer;
2325 
2326 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2327 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2328 			  struct trace_event_file *trace_file,
2329 			  int type, unsigned long len,
2330 			  unsigned int trace_ctx)
2331 {
2332 	struct ring_buffer_event *entry;
2333 	struct trace_array *tr = trace_file->tr;
2334 	int val;
2335 
2336 	*current_rb = tr->array_buffer.buffer;
2337 
2338 	if (!tr->no_filter_buffering_ref &&
2339 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2340 		preempt_disable_notrace();
2341 		/*
2342 		 * Filtering is on, so try to use the per cpu buffer first.
2343 		 * This buffer will simulate a ring_buffer_event,
2344 		 * where the type_len is zero and the array[0] will
2345 		 * hold the full length.
2346 		 * (see include/linux/ring-buffer.h for details on
2347 		 *  how the ring_buffer_event is structured).
2348 		 *
2349 		 * Using a temp buffer during filtering and copying it
2350 		 * on a matched filter is quicker than writing directly
2351 		 * into the ring buffer and then discarding it when
2352 		 * it doesn't match. That is because the discard
2353 		 * requires several atomic operations to get right.
2354 		 * Copying on match and doing nothing on a failed match
2355 		 * is still quicker than no copy on match, but having
2356 		 * to discard out of the ring buffer on a failed match.
2357 		 */
2358 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2359 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2360 
2361 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2362 
2363 			/*
2364 			 * Preemption is disabled, but interrupts and NMIs
2365 			 * can still come in now. If that happens after
2366 			 * the above increment, then it will have to go
2367 			 * back to the old method of allocating the event
2368 			 * on the ring buffer, and if the filter fails, it
2369 			 * will have to call ring_buffer_discard_commit()
2370 			 * to remove it.
2371 			 *
2372 			 * Need to also check the unlikely case that the
2373 			 * length is bigger than the temp buffer size.
2374 			 * If that happens, then the reserve is pretty much
2375 			 * guaranteed to fail, as the ring buffer currently
2376 			 * only allows events less than a page. But that may
2377 			 * change in the future, so let the ring buffer reserve
2378 			 * handle the failure in that case.
2379 			 */
2380 			if (val == 1 && likely(len <= max_len)) {
2381 				trace_event_setup(entry, type, trace_ctx);
2382 				entry->array[0] = len;
2383 				/* Return with preemption disabled */
2384 				return entry;
2385 			}
2386 			this_cpu_dec(trace_buffered_event_cnt);
2387 		}
2388 		/* __trace_buffer_lock_reserve() disables preemption */
2389 		preempt_enable_notrace();
2390 	}
2391 
2392 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2393 					    trace_ctx);
2394 	/*
2395 	 * If tracing is off, but we have triggers enabled
2396 	 * we still need to look at the event data. Use the temp_buffer
2397 	 * to store the trace event for the trigger to use. It's recursive
2398 	 * safe and will not be recorded anywhere.
2399 	 */
2400 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2401 		*current_rb = temp_buffer;
2402 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2403 						    trace_ctx);
2404 	}
2405 	return entry;
2406 }
2407 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2408 
2409 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2410 static DEFINE_MUTEX(tracepoint_printk_mutex);
2411 
output_printk(struct trace_event_buffer * fbuffer)2412 static void output_printk(struct trace_event_buffer *fbuffer)
2413 {
2414 	struct trace_event_call *event_call;
2415 	struct trace_event_file *file;
2416 	struct trace_event *event;
2417 	unsigned long flags;
2418 	struct trace_iterator *iter = tracepoint_print_iter;
2419 
2420 	/* We should never get here if iter is NULL */
2421 	if (WARN_ON_ONCE(!iter))
2422 		return;
2423 
2424 	event_call = fbuffer->trace_file->event_call;
2425 	if (!event_call || !event_call->event.funcs ||
2426 	    !event_call->event.funcs->trace)
2427 		return;
2428 
2429 	file = fbuffer->trace_file;
2430 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2431 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2432 	     !filter_match_preds(file->filter, fbuffer->entry)))
2433 		return;
2434 
2435 	event = &fbuffer->trace_file->event_call->event;
2436 
2437 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2438 	trace_seq_init(&iter->seq);
2439 	iter->ent = fbuffer->entry;
2440 	event_call->event.funcs->trace(iter, 0, event);
2441 	trace_seq_putc(&iter->seq, 0);
2442 	printk("%s", iter->seq.buffer);
2443 
2444 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2445 }
2446 
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2447 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2448 			     void *buffer, size_t *lenp,
2449 			     loff_t *ppos)
2450 {
2451 	int save_tracepoint_printk;
2452 	int ret;
2453 
2454 	guard(mutex)(&tracepoint_printk_mutex);
2455 	save_tracepoint_printk = tracepoint_printk;
2456 
2457 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2458 
2459 	/*
2460 	 * This will force exiting early, as tracepoint_printk
2461 	 * is always zero when tracepoint_printk_iter is not allocated
2462 	 */
2463 	if (!tracepoint_print_iter)
2464 		tracepoint_printk = 0;
2465 
2466 	if (save_tracepoint_printk == tracepoint_printk)
2467 		return ret;
2468 
2469 	if (tracepoint_printk)
2470 		static_key_enable(&tracepoint_printk_key.key);
2471 	else
2472 		static_key_disable(&tracepoint_printk_key.key);
2473 
2474 	return ret;
2475 }
2476 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2477 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2478 {
2479 	enum event_trigger_type tt = ETT_NONE;
2480 	struct trace_event_file *file = fbuffer->trace_file;
2481 
2482 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2483 			fbuffer->entry, &tt))
2484 		goto discard;
2485 
2486 	if (static_key_false(&tracepoint_printk_key.key))
2487 		output_printk(fbuffer);
2488 
2489 	if (static_branch_unlikely(&trace_event_exports_enabled))
2490 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2491 
2492 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2493 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2494 
2495 discard:
2496 	if (tt)
2497 		event_triggers_post_call(file, tt);
2498 
2499 }
2500 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2501 
2502 /*
2503  * Skip 3:
2504  *
2505  *   trace_buffer_unlock_commit_regs()
2506  *   trace_event_buffer_commit()
2507  *   trace_event_raw_event_xxx()
2508  */
2509 # define STACK_SKIP 3
2510 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2511 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2512 				     struct trace_buffer *buffer,
2513 				     struct ring_buffer_event *event,
2514 				     unsigned int trace_ctx,
2515 				     struct pt_regs *regs)
2516 {
2517 	__buffer_unlock_commit(buffer, event);
2518 
2519 	/*
2520 	 * If regs is not set, then skip the necessary functions.
2521 	 * Note, we can still get here via blktrace, wakeup tracer
2522 	 * and mmiotrace, but that's ok if they lose a function or
2523 	 * two. They are not that meaningful.
2524 	 */
2525 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2526 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2527 }
2528 
2529 /*
2530  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2531  */
2532 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2533 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2534 				   struct ring_buffer_event *event)
2535 {
2536 	__buffer_unlock_commit(buffer, event);
2537 }
2538 
2539 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2540 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2541 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2542 {
2543 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2544 	struct ring_buffer_event *event;
2545 	struct ftrace_entry *entry;
2546 	int size = sizeof(*entry);
2547 
2548 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2549 
2550 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2551 					    trace_ctx);
2552 	if (!event)
2553 		return;
2554 	entry	= ring_buffer_event_data(event);
2555 	entry->ip			= ip;
2556 	entry->parent_ip		= parent_ip;
2557 
2558 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2559 	if (fregs) {
2560 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2561 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2562 	}
2563 #endif
2564 
2565 	if (static_branch_unlikely(&trace_function_exports_enabled))
2566 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2567 	__buffer_unlock_commit(buffer, event);
2568 }
2569 
2570 #ifdef CONFIG_STACKTRACE
2571 
2572 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2573 #define FTRACE_KSTACK_NESTING	4
2574 
2575 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2576 
2577 struct ftrace_stack {
2578 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2579 };
2580 
2581 
2582 struct ftrace_stacks {
2583 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2584 };
2585 
2586 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2587 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2588 
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2589 void __ftrace_trace_stack(struct trace_array *tr,
2590 			  struct trace_buffer *buffer,
2591 			  unsigned int trace_ctx,
2592 			  int skip, struct pt_regs *regs)
2593 {
2594 	struct ring_buffer_event *event;
2595 	unsigned int size, nr_entries;
2596 	struct ftrace_stack *fstack;
2597 	struct stack_entry *entry;
2598 	int stackidx;
2599 	int bit;
2600 
2601 	bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START);
2602 	if (bit < 0)
2603 		return;
2604 
2605 	/*
2606 	 * Add one, for this function and the call to save_stack_trace()
2607 	 * If regs is set, then these functions will not be in the way.
2608 	 */
2609 #ifndef CONFIG_UNWINDER_ORC
2610 	if (!regs)
2611 		skip++;
2612 #endif
2613 
2614 	guard(preempt_notrace)();
2615 
2616 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2617 
2618 	/* This should never happen. If it does, yell once and skip */
2619 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2620 		goto out;
2621 
2622 	/*
2623 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2624 	 * interrupt will either see the value pre increment or post
2625 	 * increment. If the interrupt happens pre increment it will have
2626 	 * restored the counter when it returns.  We just need a barrier to
2627 	 * keep gcc from moving things around.
2628 	 */
2629 	barrier();
2630 
2631 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2632 	size = ARRAY_SIZE(fstack->calls);
2633 
2634 	if (regs) {
2635 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2636 						   size, skip);
2637 	} else {
2638 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2639 	}
2640 
2641 #ifdef CONFIG_DYNAMIC_FTRACE
2642 	/* Mark entry of stack trace as trampoline code */
2643 	if (tr->ops && tr->ops->trampoline) {
2644 		unsigned long tramp_start = tr->ops->trampoline;
2645 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2646 		unsigned long *calls = fstack->calls;
2647 
2648 		for (int i = 0; i < nr_entries; i++) {
2649 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2650 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2651 		}
2652 	}
2653 #endif
2654 
2655 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2656 				    struct_size(entry, caller, nr_entries),
2657 				    trace_ctx);
2658 	if (!event)
2659 		goto out;
2660 	entry = ring_buffer_event_data(event);
2661 
2662 	entry->size = nr_entries;
2663 	memcpy(&entry->caller, fstack->calls,
2664 	       flex_array_size(entry, caller, nr_entries));
2665 
2666 	__buffer_unlock_commit(buffer, event);
2667 
2668  out:
2669 	/* Again, don't let gcc optimize things here */
2670 	barrier();
2671 	__this_cpu_dec(ftrace_stack_reserve);
2672 	trace_clear_recursion(bit);
2673 }
2674 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)2675 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2676 		   int skip)
2677 {
2678 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2679 
2680 	if (rcu_is_watching()) {
2681 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2682 		return;
2683 	}
2684 
2685 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2686 		return;
2687 
2688 	/*
2689 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
2690 	 * but if the above rcu_is_watching() failed, then the NMI
2691 	 * triggered someplace critical, and ct_irq_enter() should
2692 	 * not be called from NMI.
2693 	 */
2694 	if (unlikely(in_nmi()))
2695 		return;
2696 
2697 	ct_irq_enter_irqson();
2698 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2699 	ct_irq_exit_irqson();
2700 }
2701 
2702 /**
2703  * trace_dump_stack - record a stack back trace in the trace buffer
2704  * @skip: Number of functions to skip (helper handlers)
2705  */
trace_dump_stack(int skip)2706 void trace_dump_stack(int skip)
2707 {
2708 	if (tracing_disabled || tracing_selftest_running)
2709 		return;
2710 
2711 #ifndef CONFIG_UNWINDER_ORC
2712 	/* Skip 1 to skip this function. */
2713 	skip++;
2714 #endif
2715 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
2716 				tracing_gen_ctx(), skip, NULL);
2717 }
2718 EXPORT_SYMBOL_GPL(trace_dump_stack);
2719 
2720 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2721 static DEFINE_PER_CPU(int, user_stack_count);
2722 
2723 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)2724 ftrace_trace_userstack(struct trace_array *tr,
2725 		       struct trace_buffer *buffer, unsigned int trace_ctx)
2726 {
2727 	struct ring_buffer_event *event;
2728 	struct userstack_entry *entry;
2729 
2730 	if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
2731 		return;
2732 
2733 	/*
2734 	 * NMIs can not handle page faults, even with fix ups.
2735 	 * The save user stack can (and often does) fault.
2736 	 */
2737 	if (unlikely(in_nmi()))
2738 		return;
2739 
2740 	/*
2741 	 * prevent recursion, since the user stack tracing may
2742 	 * trigger other kernel events.
2743 	 */
2744 	guard(preempt)();
2745 	if (__this_cpu_read(user_stack_count))
2746 		return;
2747 
2748 	__this_cpu_inc(user_stack_count);
2749 
2750 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2751 					    sizeof(*entry), trace_ctx);
2752 	if (!event)
2753 		goto out_drop_count;
2754 	entry	= ring_buffer_event_data(event);
2755 
2756 	entry->tgid		= current->tgid;
2757 	memset(&entry->caller, 0, sizeof(entry->caller));
2758 
2759 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2760 	__buffer_unlock_commit(buffer, event);
2761 
2762  out_drop_count:
2763 	__this_cpu_dec(user_stack_count);
2764 }
2765 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)2766 static void ftrace_trace_userstack(struct trace_array *tr,
2767 				   struct trace_buffer *buffer,
2768 				   unsigned int trace_ctx)
2769 {
2770 }
2771 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2772 
2773 #endif /* CONFIG_STACKTRACE */
2774 
2775 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)2776 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
2777 			  unsigned long long delta)
2778 {
2779 	entry->bottom_delta_ts = delta & U32_MAX;
2780 	entry->top_delta_ts = (delta >> 32);
2781 }
2782 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)2783 void trace_last_func_repeats(struct trace_array *tr,
2784 			     struct trace_func_repeats *last_info,
2785 			     unsigned int trace_ctx)
2786 {
2787 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2788 	struct func_repeats_entry *entry;
2789 	struct ring_buffer_event *event;
2790 	u64 delta;
2791 
2792 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
2793 					    sizeof(*entry), trace_ctx);
2794 	if (!event)
2795 		return;
2796 
2797 	delta = ring_buffer_event_time_stamp(buffer, event) -
2798 		last_info->ts_last_call;
2799 
2800 	entry = ring_buffer_event_data(event);
2801 	entry->ip = last_info->ip;
2802 	entry->parent_ip = last_info->parent_ip;
2803 	entry->count = last_info->count;
2804 	func_repeats_set_delta_ts(entry, delta);
2805 
2806 	__buffer_unlock_commit(buffer, event);
2807 }
2808 
trace_iterator_increment(struct trace_iterator * iter)2809 static void trace_iterator_increment(struct trace_iterator *iter)
2810 {
2811 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2812 
2813 	iter->idx++;
2814 	if (buf_iter)
2815 		ring_buffer_iter_advance(buf_iter);
2816 }
2817 
2818 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)2819 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2820 		unsigned long *lost_events)
2821 {
2822 	struct ring_buffer_event *event;
2823 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2824 
2825 	if (buf_iter) {
2826 		event = ring_buffer_iter_peek(buf_iter, ts);
2827 		if (lost_events)
2828 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
2829 				(unsigned long)-1 : 0;
2830 	} else {
2831 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
2832 					 lost_events);
2833 	}
2834 
2835 	if (event) {
2836 		iter->ent_size = ring_buffer_event_length(event);
2837 		return ring_buffer_event_data(event);
2838 	}
2839 	iter->ent_size = 0;
2840 	return NULL;
2841 }
2842 
2843 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)2844 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2845 		  unsigned long *missing_events, u64 *ent_ts)
2846 {
2847 	struct trace_buffer *buffer = iter->array_buffer->buffer;
2848 	struct trace_entry *ent, *next = NULL;
2849 	unsigned long lost_events = 0, next_lost = 0;
2850 	int cpu_file = iter->cpu_file;
2851 	u64 next_ts = 0, ts;
2852 	int next_cpu = -1;
2853 	int next_size = 0;
2854 	int cpu;
2855 
2856 	/*
2857 	 * If we are in a per_cpu trace file, don't bother by iterating over
2858 	 * all cpu and peek directly.
2859 	 */
2860 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2861 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2862 			return NULL;
2863 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2864 		if (ent_cpu)
2865 			*ent_cpu = cpu_file;
2866 
2867 		return ent;
2868 	}
2869 
2870 	for_each_tracing_cpu(cpu) {
2871 
2872 		if (ring_buffer_empty_cpu(buffer, cpu))
2873 			continue;
2874 
2875 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2876 
2877 		/*
2878 		 * Pick the entry with the smallest timestamp:
2879 		 */
2880 		if (ent && (!next || ts < next_ts)) {
2881 			next = ent;
2882 			next_cpu = cpu;
2883 			next_ts = ts;
2884 			next_lost = lost_events;
2885 			next_size = iter->ent_size;
2886 		}
2887 	}
2888 
2889 	iter->ent_size = next_size;
2890 
2891 	if (ent_cpu)
2892 		*ent_cpu = next_cpu;
2893 
2894 	if (ent_ts)
2895 		*ent_ts = next_ts;
2896 
2897 	if (missing_events)
2898 		*missing_events = next_lost;
2899 
2900 	return next;
2901 }
2902 
2903 #define STATIC_FMT_BUF_SIZE	128
2904 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
2905 
trace_iter_expand_format(struct trace_iterator * iter)2906 char *trace_iter_expand_format(struct trace_iterator *iter)
2907 {
2908 	char *tmp;
2909 
2910 	/*
2911 	 * iter->tr is NULL when used with tp_printk, which makes
2912 	 * this get called where it is not safe to call krealloc().
2913 	 */
2914 	if (!iter->tr || iter->fmt == static_fmt_buf)
2915 		return NULL;
2916 
2917 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
2918 		       GFP_KERNEL);
2919 	if (tmp) {
2920 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
2921 		iter->fmt = tmp;
2922 	}
2923 
2924 	return tmp;
2925 }
2926 
2927 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)2928 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
2929 {
2930 	unsigned long addr = (unsigned long)str;
2931 	struct trace_event *trace_event;
2932 	struct trace_event_call *event;
2933 
2934 	/* OK if part of the event data */
2935 	if ((addr >= (unsigned long)iter->ent) &&
2936 	    (addr < (unsigned long)iter->ent + iter->ent_size))
2937 		return true;
2938 
2939 	/* OK if part of the temp seq buffer */
2940 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
2941 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
2942 		return true;
2943 
2944 	/* Core rodata can not be freed */
2945 	if (is_kernel_rodata(addr))
2946 		return true;
2947 
2948 	if (trace_is_tracepoint_string(str))
2949 		return true;
2950 
2951 	/*
2952 	 * Now this could be a module event, referencing core module
2953 	 * data, which is OK.
2954 	 */
2955 	if (!iter->ent)
2956 		return false;
2957 
2958 	trace_event = ftrace_find_event(iter->ent->type);
2959 	if (!trace_event)
2960 		return false;
2961 
2962 	event = container_of(trace_event, struct trace_event_call, event);
2963 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
2964 		return false;
2965 
2966 	/* Would rather have rodata, but this will suffice */
2967 	if (within_module_core(addr, event->module))
2968 		return true;
2969 
2970 	return false;
2971 }
2972 
2973 /**
2974  * ignore_event - Check dereferenced fields while writing to the seq buffer
2975  * @iter: The iterator that holds the seq buffer and the event being printed
2976  *
2977  * At boot up, test_event_printk() will flag any event that dereferences
2978  * a string with "%s" that does exist in the ring buffer. It may still
2979  * be valid, as the string may point to a static string in the kernel
2980  * rodata that never gets freed. But if the string pointer is pointing
2981  * to something that was allocated, there's a chance that it can be freed
2982  * by the time the user reads the trace. This would cause a bad memory
2983  * access by the kernel and possibly crash the system.
2984  *
2985  * This function will check if the event has any fields flagged as needing
2986  * to be checked at runtime and perform those checks.
2987  *
2988  * If it is found that a field is unsafe, it will write into the @iter->seq
2989  * a message stating what was found to be unsafe.
2990  *
2991  * @return: true if the event is unsafe and should be ignored,
2992  *          false otherwise.
2993  */
ignore_event(struct trace_iterator * iter)2994 bool ignore_event(struct trace_iterator *iter)
2995 {
2996 	struct ftrace_event_field *field;
2997 	struct trace_event *trace_event;
2998 	struct trace_event_call *event;
2999 	struct list_head *head;
3000 	struct trace_seq *seq;
3001 	const void *ptr;
3002 
3003 	trace_event = ftrace_find_event(iter->ent->type);
3004 
3005 	seq = &iter->seq;
3006 
3007 	if (!trace_event) {
3008 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3009 		return true;
3010 	}
3011 
3012 	event = container_of(trace_event, struct trace_event_call, event);
3013 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3014 		return false;
3015 
3016 	head = trace_get_fields(event);
3017 	if (!head) {
3018 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3019 				 trace_event_name(event));
3020 		return true;
3021 	}
3022 
3023 	/* Offsets are from the iter->ent that points to the raw event */
3024 	ptr = iter->ent;
3025 
3026 	list_for_each_entry(field, head, link) {
3027 		const char *str;
3028 		bool good;
3029 
3030 		if (!field->needs_test)
3031 			continue;
3032 
3033 		str = *(const char **)(ptr + field->offset);
3034 
3035 		good = trace_safe_str(iter, str);
3036 
3037 		/*
3038 		 * If you hit this warning, it is likely that the
3039 		 * trace event in question used %s on a string that
3040 		 * was saved at the time of the event, but may not be
3041 		 * around when the trace is read. Use __string(),
3042 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3043 		 * instead. See samples/trace_events/trace-events-sample.h
3044 		 * for reference.
3045 		 */
3046 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3047 			      trace_event_name(event), field->name)) {
3048 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3049 					 trace_event_name(event), field->name);
3050 			return true;
3051 		}
3052 	}
3053 	return false;
3054 }
3055 
trace_event_format(struct trace_iterator * iter,const char * fmt)3056 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3057 {
3058 	const char *p, *new_fmt;
3059 	char *q;
3060 
3061 	if (WARN_ON_ONCE(!fmt))
3062 		return fmt;
3063 
3064 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
3065 		return fmt;
3066 
3067 	p = fmt;
3068 	new_fmt = q = iter->fmt;
3069 	while (*p) {
3070 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3071 			if (!trace_iter_expand_format(iter))
3072 				return fmt;
3073 
3074 			q += iter->fmt - new_fmt;
3075 			new_fmt = iter->fmt;
3076 		}
3077 
3078 		*q++ = *p++;
3079 
3080 		/* Replace %p with %px */
3081 		if (p[-1] == '%') {
3082 			if (p[0] == '%') {
3083 				*q++ = *p++;
3084 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3085 				*q++ = *p++;
3086 				*q++ = 'x';
3087 			}
3088 		}
3089 	}
3090 	*q = '\0';
3091 
3092 	return new_fmt;
3093 }
3094 
3095 #define STATIC_TEMP_BUF_SIZE	128
3096 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3097 
3098 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3099 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3100 					  int *ent_cpu, u64 *ent_ts)
3101 {
3102 	/* __find_next_entry will reset ent_size */
3103 	int ent_size = iter->ent_size;
3104 	struct trace_entry *entry;
3105 
3106 	/*
3107 	 * If called from ftrace_dump(), then the iter->temp buffer
3108 	 * will be the static_temp_buf and not created from kmalloc.
3109 	 * If the entry size is greater than the buffer, we can
3110 	 * not save it. Just return NULL in that case. This is only
3111 	 * used to add markers when two consecutive events' time
3112 	 * stamps have a large delta. See trace_print_lat_context()
3113 	 */
3114 	if (iter->temp == static_temp_buf &&
3115 	    STATIC_TEMP_BUF_SIZE < ent_size)
3116 		return NULL;
3117 
3118 	/*
3119 	 * The __find_next_entry() may call peek_next_entry(), which may
3120 	 * call ring_buffer_peek() that may make the contents of iter->ent
3121 	 * undefined. Need to copy iter->ent now.
3122 	 */
3123 	if (iter->ent && iter->ent != iter->temp) {
3124 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3125 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3126 			void *temp;
3127 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3128 			if (!temp)
3129 				return NULL;
3130 			kfree(iter->temp);
3131 			iter->temp = temp;
3132 			iter->temp_size = iter->ent_size;
3133 		}
3134 		memcpy(iter->temp, iter->ent, iter->ent_size);
3135 		iter->ent = iter->temp;
3136 	}
3137 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3138 	/* Put back the original ent_size */
3139 	iter->ent_size = ent_size;
3140 
3141 	return entry;
3142 }
3143 
3144 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3145 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3146 {
3147 	iter->ent = __find_next_entry(iter, &iter->cpu,
3148 				      &iter->lost_events, &iter->ts);
3149 
3150 	if (iter->ent)
3151 		trace_iterator_increment(iter);
3152 
3153 	return iter->ent ? iter : NULL;
3154 }
3155 
trace_consume(struct trace_iterator * iter)3156 static void trace_consume(struct trace_iterator *iter)
3157 {
3158 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3159 			    &iter->lost_events);
3160 }
3161 
s_next(struct seq_file * m,void * v,loff_t * pos)3162 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3163 {
3164 	struct trace_iterator *iter = m->private;
3165 	int i = (int)*pos;
3166 	void *ent;
3167 
3168 	WARN_ON_ONCE(iter->leftover);
3169 
3170 	(*pos)++;
3171 
3172 	/* can't go backwards */
3173 	if (iter->idx > i)
3174 		return NULL;
3175 
3176 	if (iter->idx < 0)
3177 		ent = trace_find_next_entry_inc(iter);
3178 	else
3179 		ent = iter;
3180 
3181 	while (ent && iter->idx < i)
3182 		ent = trace_find_next_entry_inc(iter);
3183 
3184 	iter->pos = *pos;
3185 
3186 	return ent;
3187 }
3188 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3189 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3190 {
3191 	struct ring_buffer_iter *buf_iter;
3192 	unsigned long entries = 0;
3193 	u64 ts;
3194 
3195 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3196 
3197 	buf_iter = trace_buffer_iter(iter, cpu);
3198 	if (!buf_iter)
3199 		return;
3200 
3201 	ring_buffer_iter_reset(buf_iter);
3202 
3203 	/*
3204 	 * We could have the case with the max latency tracers
3205 	 * that a reset never took place on a cpu. This is evident
3206 	 * by the timestamp being before the start of the buffer.
3207 	 */
3208 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3209 		if (ts >= iter->array_buffer->time_start)
3210 			break;
3211 		entries++;
3212 		ring_buffer_iter_advance(buf_iter);
3213 		/* This could be a big loop */
3214 		cond_resched();
3215 	}
3216 
3217 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3218 }
3219 
3220 /*
3221  * The current tracer is copied to avoid a global locking
3222  * all around.
3223  */
s_start(struct seq_file * m,loff_t * pos)3224 static void *s_start(struct seq_file *m, loff_t *pos)
3225 {
3226 	struct trace_iterator *iter = m->private;
3227 	struct trace_array *tr = iter->tr;
3228 	int cpu_file = iter->cpu_file;
3229 	void *p = NULL;
3230 	loff_t l = 0;
3231 	int cpu;
3232 
3233 	mutex_lock(&trace_types_lock);
3234 	if (unlikely(tr->current_trace != iter->trace)) {
3235 		/* Close iter->trace before switching to the new current tracer */
3236 		if (iter->trace->close)
3237 			iter->trace->close(iter);
3238 		iter->trace = tr->current_trace;
3239 		/* Reopen the new current tracer */
3240 		if (iter->trace->open)
3241 			iter->trace->open(iter);
3242 	}
3243 	mutex_unlock(&trace_types_lock);
3244 
3245 	if (iter->snapshot && tracer_uses_snapshot(iter->trace))
3246 		return ERR_PTR(-EBUSY);
3247 
3248 	if (*pos != iter->pos) {
3249 		iter->ent = NULL;
3250 		iter->cpu = 0;
3251 		iter->idx = -1;
3252 
3253 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3254 			for_each_tracing_cpu(cpu)
3255 				tracing_iter_reset(iter, cpu);
3256 		} else
3257 			tracing_iter_reset(iter, cpu_file);
3258 
3259 		iter->leftover = 0;
3260 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3261 			;
3262 
3263 	} else {
3264 		/*
3265 		 * If we overflowed the seq_file before, then we want
3266 		 * to just reuse the trace_seq buffer again.
3267 		 */
3268 		if (iter->leftover)
3269 			p = iter;
3270 		else {
3271 			l = *pos - 1;
3272 			p = s_next(m, p, &l);
3273 		}
3274 	}
3275 
3276 	trace_event_read_lock();
3277 	trace_access_lock(cpu_file);
3278 	return p;
3279 }
3280 
s_stop(struct seq_file * m,void * p)3281 static void s_stop(struct seq_file *m, void *p)
3282 {
3283 	struct trace_iterator *iter = m->private;
3284 
3285 	if (iter->snapshot && tracer_uses_snapshot(iter->trace))
3286 		return;
3287 
3288 	trace_access_unlock(iter->cpu_file);
3289 	trace_event_read_unlock();
3290 }
3291 
3292 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3293 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3294 		      unsigned long *entries, int cpu)
3295 {
3296 	unsigned long count;
3297 
3298 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3299 	/*
3300 	 * If this buffer has skipped entries, then we hold all
3301 	 * entries for the trace and we need to ignore the
3302 	 * ones before the time stamp.
3303 	 */
3304 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3305 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3306 		/* total is the same as the entries */
3307 		*total = count;
3308 	} else
3309 		*total = count +
3310 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3311 	*entries = count;
3312 }
3313 
3314 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)3315 get_total_entries(struct array_buffer *buf,
3316 		  unsigned long *total, unsigned long *entries)
3317 {
3318 	unsigned long t, e;
3319 	int cpu;
3320 
3321 	*total = 0;
3322 	*entries = 0;
3323 
3324 	for_each_tracing_cpu(cpu) {
3325 		get_total_entries_cpu(buf, &t, &e, cpu);
3326 		*total += t;
3327 		*entries += e;
3328 	}
3329 }
3330 
trace_total_entries_cpu(struct trace_array * tr,int cpu)3331 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3332 {
3333 	unsigned long total, entries;
3334 
3335 	if (!tr)
3336 		tr = &global_trace;
3337 
3338 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3339 
3340 	return entries;
3341 }
3342 
trace_total_entries(struct trace_array * tr)3343 unsigned long trace_total_entries(struct trace_array *tr)
3344 {
3345 	unsigned long total, entries;
3346 
3347 	if (!tr)
3348 		tr = &global_trace;
3349 
3350 	get_total_entries(&tr->array_buffer, &total, &entries);
3351 
3352 	return entries;
3353 }
3354 
print_lat_help_header(struct seq_file * m)3355 static void print_lat_help_header(struct seq_file *m)
3356 {
3357 	seq_puts(m, "#                    _------=> CPU#            \n"
3358 		    "#                   / _-----=> irqs-off/BH-disabled\n"
3359 		    "#                  | / _----=> need-resched    \n"
3360 		    "#                  || / _---=> hardirq/softirq \n"
3361 		    "#                  ||| / _--=> preempt-depth   \n"
3362 		    "#                  |||| / _-=> migrate-disable \n"
3363 		    "#                  ||||| /     delay           \n"
3364 		    "#  cmd     pid     |||||| time  |   caller     \n"
3365 		    "#     \\   /        ||||||  \\    |    /       \n");
3366 }
3367 
print_event_info(struct array_buffer * buf,struct seq_file * m)3368 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3369 {
3370 	unsigned long total;
3371 	unsigned long entries;
3372 
3373 	get_total_entries(buf, &total, &entries);
3374 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3375 		   entries, total, num_online_cpus());
3376 	seq_puts(m, "#\n");
3377 }
3378 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3379 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3380 				   unsigned int flags)
3381 {
3382 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
3383 
3384 	print_event_info(buf, m);
3385 
3386 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3387 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3388 }
3389 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3390 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3391 				       unsigned int flags)
3392 {
3393 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
3394 	static const char space[] = "            ";
3395 	int prec = tgid ? 12 : 2;
3396 
3397 	print_event_info(buf, m);
3398 
3399 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
3400 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3401 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3402 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3403 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
3404 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
3405 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3406 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
3407 }
3408 
3409 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3410 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3411 {
3412 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3413 	struct array_buffer *buf = iter->array_buffer;
3414 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3415 	struct tracer *type = iter->trace;
3416 	unsigned long entries;
3417 	unsigned long total;
3418 	const char *name = type->name;
3419 
3420 	get_total_entries(buf, &total, &entries);
3421 
3422 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3423 		   name, init_utsname()->release);
3424 	seq_puts(m, "# -----------------------------------"
3425 		 "---------------------------------\n");
3426 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3427 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3428 		   nsecs_to_usecs(data->saved_latency),
3429 		   entries,
3430 		   total,
3431 		   buf->cpu,
3432 		   preempt_model_str(),
3433 		   /* These are reserved for later use */
3434 		   0, 0, 0, 0);
3435 #ifdef CONFIG_SMP
3436 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3437 #else
3438 	seq_puts(m, ")\n");
3439 #endif
3440 	seq_puts(m, "#    -----------------\n");
3441 	seq_printf(m, "#    | task: %.16s-%d "
3442 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3443 		   data->comm, data->pid,
3444 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3445 		   data->policy, data->rt_priority);
3446 	seq_puts(m, "#    -----------------\n");
3447 
3448 	if (data->critical_start) {
3449 		seq_puts(m, "#  => started at: ");
3450 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3451 		trace_print_seq(m, &iter->seq);
3452 		seq_puts(m, "\n#  => ended at:   ");
3453 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3454 		trace_print_seq(m, &iter->seq);
3455 		seq_puts(m, "\n#\n");
3456 	}
3457 
3458 	seq_puts(m, "#\n");
3459 }
3460 
test_cpu_buff_start(struct trace_iterator * iter)3461 static void test_cpu_buff_start(struct trace_iterator *iter)
3462 {
3463 	struct trace_seq *s = &iter->seq;
3464 	struct trace_array *tr = iter->tr;
3465 
3466 	if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
3467 		return;
3468 
3469 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3470 		return;
3471 
3472 	if (cpumask_available(iter->started) &&
3473 	    cpumask_test_cpu(iter->cpu, iter->started))
3474 		return;
3475 
3476 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3477 		return;
3478 
3479 	if (cpumask_available(iter->started))
3480 		cpumask_set_cpu(iter->cpu, iter->started);
3481 
3482 	/* Don't print started cpu buffer for the first entry of the trace */
3483 	if (iter->idx > 1)
3484 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3485 				iter->cpu);
3486 }
3487 
3488 #ifdef CONFIG_FTRACE_SYSCALLS
is_syscall_event(struct trace_event * event)3489 static bool is_syscall_event(struct trace_event *event)
3490 {
3491 	return (event->funcs == &enter_syscall_print_funcs) ||
3492 	       (event->funcs == &exit_syscall_print_funcs);
3493 
3494 }
3495 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
3496 #else
is_syscall_event(struct trace_event * event)3497 static inline bool is_syscall_event(struct trace_event *event)
3498 {
3499 	return false;
3500 }
3501 #define syscall_buf_size 0
3502 #endif /* CONFIG_FTRACE_SYSCALLS */
3503 
print_trace_fmt(struct trace_iterator * iter)3504 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3505 {
3506 	struct trace_array *tr = iter->tr;
3507 	struct trace_seq *s = &iter->seq;
3508 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3509 	struct trace_entry *entry;
3510 	struct trace_event *event;
3511 
3512 	entry = iter->ent;
3513 
3514 	test_cpu_buff_start(iter);
3515 
3516 	event = ftrace_find_event(entry->type);
3517 
3518 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3519 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3520 			trace_print_lat_context(iter);
3521 		else
3522 			trace_print_context(iter);
3523 	}
3524 
3525 	if (trace_seq_has_overflowed(s))
3526 		return TRACE_TYPE_PARTIAL_LINE;
3527 
3528 	if (event) {
3529 		if (tr->trace_flags & TRACE_ITER(FIELDS))
3530 			return print_event_fields(iter, event);
3531 		/*
3532 		 * For TRACE_EVENT() events, the print_fmt is not
3533 		 * safe to use if the array has delta offsets
3534 		 * Force printing via the fields.
3535 		 */
3536 		if ((tr->text_delta)) {
3537 			/* ftrace and system call events are still OK */
3538 			if ((event->type > __TRACE_LAST_TYPE) &&
3539 			    !is_syscall_event(event))
3540 				return print_event_fields(iter, event);
3541 		}
3542 		return event->funcs->trace(iter, sym_flags, event);
3543 	}
3544 
3545 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3546 
3547 	return trace_handle_return(s);
3548 }
3549 
print_raw_fmt(struct trace_iterator * iter)3550 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3551 {
3552 	struct trace_array *tr = iter->tr;
3553 	struct trace_seq *s = &iter->seq;
3554 	struct trace_entry *entry;
3555 	struct trace_event *event;
3556 
3557 	entry = iter->ent;
3558 
3559 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
3560 		trace_seq_printf(s, "%d %d %llu ",
3561 				 entry->pid, iter->cpu, iter->ts);
3562 
3563 	if (trace_seq_has_overflowed(s))
3564 		return TRACE_TYPE_PARTIAL_LINE;
3565 
3566 	event = ftrace_find_event(entry->type);
3567 	if (event)
3568 		return event->funcs->raw(iter, 0, event);
3569 
3570 	trace_seq_printf(s, "%d ?\n", entry->type);
3571 
3572 	return trace_handle_return(s);
3573 }
3574 
print_hex_fmt(struct trace_iterator * iter)3575 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3576 {
3577 	struct trace_array *tr = iter->tr;
3578 	struct trace_seq *s = &iter->seq;
3579 	unsigned char newline = '\n';
3580 	struct trace_entry *entry;
3581 	struct trace_event *event;
3582 
3583 	entry = iter->ent;
3584 
3585 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3586 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3587 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3588 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3589 		if (trace_seq_has_overflowed(s))
3590 			return TRACE_TYPE_PARTIAL_LINE;
3591 	}
3592 
3593 	event = ftrace_find_event(entry->type);
3594 	if (event) {
3595 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3596 		if (ret != TRACE_TYPE_HANDLED)
3597 			return ret;
3598 	}
3599 
3600 	SEQ_PUT_FIELD(s, newline);
3601 
3602 	return trace_handle_return(s);
3603 }
3604 
print_bin_fmt(struct trace_iterator * iter)3605 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3606 {
3607 	struct trace_array *tr = iter->tr;
3608 	struct trace_seq *s = &iter->seq;
3609 	struct trace_entry *entry;
3610 	struct trace_event *event;
3611 
3612 	entry = iter->ent;
3613 
3614 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3615 		SEQ_PUT_FIELD(s, entry->pid);
3616 		SEQ_PUT_FIELD(s, iter->cpu);
3617 		SEQ_PUT_FIELD(s, iter->ts);
3618 		if (trace_seq_has_overflowed(s))
3619 			return TRACE_TYPE_PARTIAL_LINE;
3620 	}
3621 
3622 	event = ftrace_find_event(entry->type);
3623 	return event ? event->funcs->binary(iter, 0, event) :
3624 		TRACE_TYPE_HANDLED;
3625 }
3626 
trace_empty(struct trace_iterator * iter)3627 int trace_empty(struct trace_iterator *iter)
3628 {
3629 	struct ring_buffer_iter *buf_iter;
3630 	int cpu;
3631 
3632 	/* If we are looking at one CPU buffer, only check that one */
3633 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3634 		cpu = iter->cpu_file;
3635 		buf_iter = trace_buffer_iter(iter, cpu);
3636 		if (buf_iter) {
3637 			if (!ring_buffer_iter_empty(buf_iter))
3638 				return 0;
3639 		} else {
3640 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3641 				return 0;
3642 		}
3643 		return 1;
3644 	}
3645 
3646 	for_each_tracing_cpu(cpu) {
3647 		buf_iter = trace_buffer_iter(iter, cpu);
3648 		if (buf_iter) {
3649 			if (!ring_buffer_iter_empty(buf_iter))
3650 				return 0;
3651 		} else {
3652 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3653 				return 0;
3654 		}
3655 	}
3656 
3657 	return 1;
3658 }
3659 
3660 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)3661 enum print_line_t print_trace_line(struct trace_iterator *iter)
3662 {
3663 	struct trace_array *tr = iter->tr;
3664 	unsigned long trace_flags = tr->trace_flags;
3665 	enum print_line_t ret;
3666 
3667 	if (iter->lost_events) {
3668 		if (iter->lost_events == (unsigned long)-1)
3669 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
3670 					 iter->cpu);
3671 		else
3672 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3673 					 iter->cpu, iter->lost_events);
3674 		if (trace_seq_has_overflowed(&iter->seq))
3675 			return TRACE_TYPE_PARTIAL_LINE;
3676 	}
3677 
3678 	if (iter->trace && iter->trace->print_line) {
3679 		ret = iter->trace->print_line(iter);
3680 		if (ret != TRACE_TYPE_UNHANDLED)
3681 			return ret;
3682 	}
3683 
3684 	if (iter->ent->type == TRACE_BPUTS &&
3685 			trace_flags & TRACE_ITER(PRINTK) &&
3686 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3687 		return trace_print_bputs_msg_only(iter);
3688 
3689 	if (iter->ent->type == TRACE_BPRINT &&
3690 			trace_flags & TRACE_ITER(PRINTK) &&
3691 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3692 		return trace_print_bprintk_msg_only(iter);
3693 
3694 	if (iter->ent->type == TRACE_PRINT &&
3695 			trace_flags & TRACE_ITER(PRINTK) &&
3696 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3697 		return trace_print_printk_msg_only(iter);
3698 
3699 	if (trace_flags & TRACE_ITER(BIN))
3700 		return print_bin_fmt(iter);
3701 
3702 	if (trace_flags & TRACE_ITER(HEX))
3703 		return print_hex_fmt(iter);
3704 
3705 	if (trace_flags & TRACE_ITER(RAW))
3706 		return print_raw_fmt(iter);
3707 
3708 	return print_trace_fmt(iter);
3709 }
3710 
trace_latency_header(struct seq_file * m)3711 void trace_latency_header(struct seq_file *m)
3712 {
3713 	struct trace_iterator *iter = m->private;
3714 	struct trace_array *tr = iter->tr;
3715 
3716 	/* print nothing if the buffers are empty */
3717 	if (trace_empty(iter))
3718 		return;
3719 
3720 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3721 		print_trace_header(m, iter);
3722 
3723 	if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
3724 		print_lat_help_header(m);
3725 }
3726 
trace_default_header(struct seq_file * m)3727 void trace_default_header(struct seq_file *m)
3728 {
3729 	struct trace_iterator *iter = m->private;
3730 	struct trace_array *tr = iter->tr;
3731 	unsigned long trace_flags = tr->trace_flags;
3732 
3733 	if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
3734 		return;
3735 
3736 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3737 		/* print nothing if the buffers are empty */
3738 		if (trace_empty(iter))
3739 			return;
3740 		print_trace_header(m, iter);
3741 		if (!(trace_flags & TRACE_ITER(VERBOSE)))
3742 			print_lat_help_header(m);
3743 	} else {
3744 		if (!(trace_flags & TRACE_ITER(VERBOSE))) {
3745 			if (trace_flags & TRACE_ITER(IRQ_INFO))
3746 				print_func_help_header_irq(iter->array_buffer,
3747 							   m, trace_flags);
3748 			else
3749 				print_func_help_header(iter->array_buffer, m,
3750 						       trace_flags);
3751 		}
3752 	}
3753 }
3754 
test_ftrace_alive(struct seq_file * m)3755 static void test_ftrace_alive(struct seq_file *m)
3756 {
3757 	if (!ftrace_is_dead())
3758 		return;
3759 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3760 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3761 }
3762 
3763 #ifdef CONFIG_TRACER_SNAPSHOT
show_snapshot_main_help(struct seq_file * m)3764 static void show_snapshot_main_help(struct seq_file *m)
3765 {
3766 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3767 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3768 		    "#                      Takes a snapshot of the main buffer.\n"
3769 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3770 		    "#                      (Doesn't have to be '2' works with any number that\n"
3771 		    "#                       is not a '0' or '1')\n");
3772 }
3773 
show_snapshot_percpu_help(struct seq_file * m)3774 static void show_snapshot_percpu_help(struct seq_file *m)
3775 {
3776 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3777 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3778 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3779 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3780 #else
3781 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3782 		    "#                     Must use main snapshot file to allocate.\n");
3783 #endif
3784 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3785 		    "#                      (Doesn't have to be '2' works with any number that\n"
3786 		    "#                       is not a '0' or '1')\n");
3787 }
3788 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3789 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3790 {
3791 	if (iter->tr->allocated_snapshot)
3792 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3793 	else
3794 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3795 
3796 	seq_puts(m, "# Snapshot commands:\n");
3797 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3798 		show_snapshot_main_help(m);
3799 	else
3800 		show_snapshot_percpu_help(m);
3801 }
3802 #else
3803 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3804 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3805 #endif
3806 
s_show(struct seq_file * m,void * v)3807 static int s_show(struct seq_file *m, void *v)
3808 {
3809 	struct trace_iterator *iter = v;
3810 	int ret;
3811 
3812 	if (iter->ent == NULL) {
3813 		if (iter->tr) {
3814 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3815 			seq_puts(m, "#\n");
3816 			test_ftrace_alive(m);
3817 		}
3818 		if (iter->snapshot && trace_empty(iter))
3819 			print_snapshot_help(m, iter);
3820 		else if (iter->trace && iter->trace->print_header)
3821 			iter->trace->print_header(m);
3822 		else
3823 			trace_default_header(m);
3824 
3825 	} else if (iter->leftover) {
3826 		/*
3827 		 * If we filled the seq_file buffer earlier, we
3828 		 * want to just show it now.
3829 		 */
3830 		ret = trace_print_seq(m, &iter->seq);
3831 
3832 		/* ret should this time be zero, but you never know */
3833 		iter->leftover = ret;
3834 
3835 	} else {
3836 		ret = print_trace_line(iter);
3837 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
3838 			iter->seq.full = 0;
3839 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
3840 		}
3841 		ret = trace_print_seq(m, &iter->seq);
3842 		/*
3843 		 * If we overflow the seq_file buffer, then it will
3844 		 * ask us for this data again at start up.
3845 		 * Use that instead.
3846 		 *  ret is 0 if seq_file write succeeded.
3847 		 *        -1 otherwise.
3848 		 */
3849 		iter->leftover = ret;
3850 	}
3851 
3852 	return 0;
3853 }
3854 
3855 /*
3856  * Should be used after trace_array_get(), trace_types_lock
3857  * ensures that i_cdev was already initialized.
3858  */
tracing_get_cpu(struct inode * inode)3859 static inline int tracing_get_cpu(struct inode *inode)
3860 {
3861 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3862 		return (long)inode->i_cdev - 1;
3863 	return RING_BUFFER_ALL_CPUS;
3864 }
3865 
3866 static const struct seq_operations tracer_seq_ops = {
3867 	.start		= s_start,
3868 	.next		= s_next,
3869 	.stop		= s_stop,
3870 	.show		= s_show,
3871 };
3872 
3873 /*
3874  * Note, as iter itself can be allocated and freed in different
3875  * ways, this function is only used to free its content, and not
3876  * the iterator itself. The only requirement to all the allocations
3877  * is that it must zero all fields (kzalloc), as freeing works with
3878  * ethier allocated content or NULL.
3879  */
free_trace_iter_content(struct trace_iterator * iter)3880 static void free_trace_iter_content(struct trace_iterator *iter)
3881 {
3882 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
3883 	if (iter->fmt != static_fmt_buf)
3884 		kfree(iter->fmt);
3885 
3886 	kfree(iter->temp);
3887 	kfree(iter->buffer_iter);
3888 	mutex_destroy(&iter->mutex);
3889 	free_cpumask_var(iter->started);
3890 }
3891 
3892 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)3893 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3894 {
3895 	struct trace_array *tr = inode->i_private;
3896 	struct trace_iterator *iter;
3897 	int cpu;
3898 
3899 	if (tracing_disabled)
3900 		return ERR_PTR(-ENODEV);
3901 
3902 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3903 	if (!iter)
3904 		return ERR_PTR(-ENOMEM);
3905 
3906 	iter->buffer_iter = kzalloc_objs(*iter->buffer_iter, nr_cpu_ids);
3907 	if (!iter->buffer_iter)
3908 		goto release;
3909 
3910 	/*
3911 	 * trace_find_next_entry() may need to save off iter->ent.
3912 	 * It will place it into the iter->temp buffer. As most
3913 	 * events are less than 128, allocate a buffer of that size.
3914 	 * If one is greater, then trace_find_next_entry() will
3915 	 * allocate a new buffer to adjust for the bigger iter->ent.
3916 	 * It's not critical if it fails to get allocated here.
3917 	 */
3918 	iter->temp = kmalloc(128, GFP_KERNEL);
3919 	if (iter->temp)
3920 		iter->temp_size = 128;
3921 
3922 	/*
3923 	 * trace_event_printf() may need to modify given format
3924 	 * string to replace %p with %px so that it shows real address
3925 	 * instead of hash value. However, that is only for the event
3926 	 * tracing, other tracer may not need. Defer the allocation
3927 	 * until it is needed.
3928 	 */
3929 	iter->fmt = NULL;
3930 	iter->fmt_size = 0;
3931 
3932 	mutex_lock(&trace_types_lock);
3933 	iter->trace = tr->current_trace;
3934 
3935 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3936 		goto fail;
3937 
3938 	iter->tr = tr;
3939 
3940 #ifdef CONFIG_TRACER_SNAPSHOT
3941 	/* Currently only the top directory has a snapshot */
3942 	if (tr->current_trace->print_max || snapshot)
3943 		iter->array_buffer = &tr->snapshot_buffer;
3944 	else
3945 #endif
3946 		iter->array_buffer = &tr->array_buffer;
3947 	iter->snapshot = snapshot;
3948 	iter->pos = -1;
3949 	iter->cpu_file = tracing_get_cpu(inode);
3950 	mutex_init(&iter->mutex);
3951 
3952 	/* Notify the tracer early; before we stop tracing. */
3953 	if (iter->trace->open)
3954 		iter->trace->open(iter);
3955 
3956 	/* Annotate start of buffers if we had overruns */
3957 	if (ring_buffer_overruns(iter->array_buffer->buffer))
3958 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3959 
3960 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3961 	if (trace_clocks[tr->clock_id].in_ns)
3962 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3963 
3964 	/*
3965 	 * If pause-on-trace is enabled, then stop the trace while
3966 	 * dumping, unless this is the "snapshot" file
3967 	 */
3968 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
3969 		iter->iter_flags |= TRACE_FILE_PAUSE;
3970 		tracing_stop_tr(tr);
3971 	}
3972 
3973 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3974 		for_each_tracing_cpu(cpu) {
3975 			iter->buffer_iter[cpu] =
3976 				ring_buffer_read_start(iter->array_buffer->buffer,
3977 						       cpu, GFP_KERNEL);
3978 			tracing_iter_reset(iter, cpu);
3979 		}
3980 	} else {
3981 		cpu = iter->cpu_file;
3982 		iter->buffer_iter[cpu] =
3983 			ring_buffer_read_start(iter->array_buffer->buffer,
3984 					       cpu, GFP_KERNEL);
3985 		tracing_iter_reset(iter, cpu);
3986 	}
3987 
3988 	mutex_unlock(&trace_types_lock);
3989 
3990 	return iter;
3991 
3992  fail:
3993 	mutex_unlock(&trace_types_lock);
3994 	free_trace_iter_content(iter);
3995 release:
3996 	seq_release_private(inode, file);
3997 	return ERR_PTR(-ENOMEM);
3998 }
3999 
tracing_open_generic(struct inode * inode,struct file * filp)4000 int tracing_open_generic(struct inode *inode, struct file *filp)
4001 {
4002 	int ret;
4003 
4004 	ret = tracing_check_open_get_tr(NULL);
4005 	if (ret)
4006 		return ret;
4007 
4008 	filp->private_data = inode->i_private;
4009 	return 0;
4010 }
4011 
4012 /*
4013  * Open and update trace_array ref count.
4014  * Must have the current trace_array passed to it.
4015  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4016 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4017 {
4018 	struct trace_array *tr = inode->i_private;
4019 	int ret;
4020 
4021 	ret = tracing_check_open_get_tr(tr);
4022 	if (ret)
4023 		return ret;
4024 
4025 	filp->private_data = inode->i_private;
4026 
4027 	return 0;
4028 }
4029 
4030 /*
4031  * The private pointer of the inode is the trace_event_file.
4032  * Update the tr ref count associated to it.
4033  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4034 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4035 {
4036 	struct trace_event_file *file = inode->i_private;
4037 	int ret;
4038 
4039 	ret = tracing_check_open_get_tr(file->tr);
4040 	if (ret)
4041 		return ret;
4042 
4043 	guard(mutex)(&event_mutex);
4044 
4045 	/* Fail if the file is marked for removal */
4046 	if (file->flags & EVENT_FILE_FL_FREED) {
4047 		trace_array_put(file->tr);
4048 		return -ENODEV;
4049 	} else {
4050 		event_file_get(file);
4051 	}
4052 
4053 	filp->private_data = inode->i_private;
4054 
4055 	return 0;
4056 }
4057 
tracing_release_file_tr(struct inode * inode,struct file * filp)4058 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4059 {
4060 	struct trace_event_file *file = inode->i_private;
4061 
4062 	trace_array_put(file->tr);
4063 	event_file_put(file);
4064 
4065 	return 0;
4066 }
4067 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4068 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4069 {
4070 	tracing_release_file_tr(inode, filp);
4071 	return single_release(inode, filp);
4072 }
4073 
tracing_release(struct inode * inode,struct file * file)4074 static int tracing_release(struct inode *inode, struct file *file)
4075 {
4076 	struct trace_array *tr = inode->i_private;
4077 	struct seq_file *m = file->private_data;
4078 	struct trace_iterator *iter;
4079 	int cpu;
4080 
4081 	if (!(file->f_mode & FMODE_READ)) {
4082 		trace_array_put(tr);
4083 		return 0;
4084 	}
4085 
4086 	/* Writes do not use seq_file */
4087 	iter = m->private;
4088 	mutex_lock(&trace_types_lock);
4089 
4090 	for_each_tracing_cpu(cpu) {
4091 		if (iter->buffer_iter[cpu])
4092 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4093 	}
4094 
4095 	if (iter->trace && iter->trace->close)
4096 		iter->trace->close(iter);
4097 
4098 	if (iter->iter_flags & TRACE_FILE_PAUSE)
4099 		/* reenable tracing if it was previously enabled */
4100 		tracing_start_tr(tr);
4101 
4102 	__trace_array_put(tr);
4103 
4104 	mutex_unlock(&trace_types_lock);
4105 
4106 	free_trace_iter_content(iter);
4107 	seq_release_private(inode, file);
4108 
4109 	return 0;
4110 }
4111 
tracing_release_generic_tr(struct inode * inode,struct file * file)4112 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4113 {
4114 	struct trace_array *tr = inode->i_private;
4115 
4116 	trace_array_put(tr);
4117 	return 0;
4118 }
4119 
tracing_single_release_tr(struct inode * inode,struct file * file)4120 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4121 {
4122 	struct trace_array *tr = inode->i_private;
4123 
4124 	trace_array_put(tr);
4125 
4126 	return single_release(inode, file);
4127 }
4128 
4129 static bool update_last_data_if_empty(struct trace_array *tr);
4130 
tracing_open(struct inode * inode,struct file * file)4131 static int tracing_open(struct inode *inode, struct file *file)
4132 {
4133 	struct trace_array *tr = inode->i_private;
4134 	struct trace_iterator *iter;
4135 	int ret;
4136 
4137 	ret = tracing_check_open_get_tr(tr);
4138 	if (ret)
4139 		return ret;
4140 
4141 	/* If this file was open for write, then erase contents */
4142 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4143 		int cpu = tracing_get_cpu(inode);
4144 		struct array_buffer *trace_buf = &tr->array_buffer;
4145 
4146 #ifdef CONFIG_TRACER_MAX_TRACE
4147 		if (tr->current_trace->print_max)
4148 			trace_buf = &tr->snapshot_buffer;
4149 #endif
4150 
4151 		if (cpu == RING_BUFFER_ALL_CPUS)
4152 			tracing_reset_online_cpus(trace_buf);
4153 		else
4154 			tracing_reset_cpu(trace_buf, cpu);
4155 
4156 		update_last_data_if_empty(tr);
4157 	}
4158 
4159 	if (file->f_mode & FMODE_READ) {
4160 		iter = __tracing_open(inode, file, false);
4161 		if (IS_ERR(iter))
4162 			ret = PTR_ERR(iter);
4163 		else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
4164 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4165 	}
4166 
4167 	if (ret < 0)
4168 		trace_array_put(tr);
4169 
4170 	return ret;
4171 }
4172 
4173 /*
4174  * Some tracers are not suitable for instance buffers.
4175  * A tracer is always available for the global array (toplevel)
4176  * or if it explicitly states that it is.
4177  */
4178 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4179 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4180 {
4181 	/* arrays with mapped buffer range do not have snapshots */
4182 	if (tr->range_addr_start && tracer_uses_snapshot(t))
4183 		return false;
4184 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4185 }
4186 
4187 /* Find the next tracer that this trace array may use */
4188 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4189 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4190 {
4191 	while (t && !trace_ok_for_array(t, tr))
4192 		t = t->next;
4193 
4194 	return t;
4195 }
4196 
4197 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4198 t_next(struct seq_file *m, void *v, loff_t *pos)
4199 {
4200 	struct trace_array *tr = m->private;
4201 	struct tracer *t = v;
4202 
4203 	(*pos)++;
4204 
4205 	if (t)
4206 		t = get_tracer_for_array(tr, t->next);
4207 
4208 	return t;
4209 }
4210 
t_start(struct seq_file * m,loff_t * pos)4211 static void *t_start(struct seq_file *m, loff_t *pos)
4212 {
4213 	struct trace_array *tr = m->private;
4214 	struct tracer *t;
4215 	loff_t l = 0;
4216 
4217 	mutex_lock(&trace_types_lock);
4218 
4219 	t = get_tracer_for_array(tr, trace_types);
4220 	for (; t && l < *pos; t = t_next(m, t, &l))
4221 			;
4222 
4223 	return t;
4224 }
4225 
t_stop(struct seq_file * m,void * p)4226 static void t_stop(struct seq_file *m, void *p)
4227 {
4228 	mutex_unlock(&trace_types_lock);
4229 }
4230 
t_show(struct seq_file * m,void * v)4231 static int t_show(struct seq_file *m, void *v)
4232 {
4233 	struct tracer *t = v;
4234 
4235 	if (!t)
4236 		return 0;
4237 
4238 	seq_puts(m, t->name);
4239 	if (t->next)
4240 		seq_putc(m, ' ');
4241 	else
4242 		seq_putc(m, '\n');
4243 
4244 	return 0;
4245 }
4246 
4247 static const struct seq_operations show_traces_seq_ops = {
4248 	.start		= t_start,
4249 	.next		= t_next,
4250 	.stop		= t_stop,
4251 	.show		= t_show,
4252 };
4253 
show_traces_open(struct inode * inode,struct file * file)4254 static int show_traces_open(struct inode *inode, struct file *file)
4255 {
4256 	struct trace_array *tr = inode->i_private;
4257 	struct seq_file *m;
4258 	int ret;
4259 
4260 	ret = tracing_check_open_get_tr(tr);
4261 	if (ret)
4262 		return ret;
4263 
4264 	ret = seq_open(file, &show_traces_seq_ops);
4265 	if (ret) {
4266 		trace_array_put(tr);
4267 		return ret;
4268 	}
4269 
4270 	m = file->private_data;
4271 	m->private = tr;
4272 
4273 	return 0;
4274 }
4275 
tracing_seq_release(struct inode * inode,struct file * file)4276 static int tracing_seq_release(struct inode *inode, struct file *file)
4277 {
4278 	struct trace_array *tr = inode->i_private;
4279 
4280 	trace_array_put(tr);
4281 	return seq_release(inode, file);
4282 }
4283 
4284 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4285 tracing_write_stub(struct file *filp, const char __user *ubuf,
4286 		   size_t count, loff_t *ppos)
4287 {
4288 	return count;
4289 }
4290 
tracing_lseek(struct file * file,loff_t offset,int whence)4291 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4292 {
4293 	int ret;
4294 
4295 	if (file->f_mode & FMODE_READ)
4296 		ret = seq_lseek(file, offset, whence);
4297 	else
4298 		file->f_pos = ret = 0;
4299 
4300 	return ret;
4301 }
4302 
4303 static const struct file_operations tracing_fops = {
4304 	.open		= tracing_open,
4305 	.read		= seq_read,
4306 	.read_iter	= seq_read_iter,
4307 	.splice_read	= copy_splice_read,
4308 	.write		= tracing_write_stub,
4309 	.llseek		= tracing_lseek,
4310 	.release	= tracing_release,
4311 };
4312 
4313 static const struct file_operations show_traces_fops = {
4314 	.open		= show_traces_open,
4315 	.read		= seq_read,
4316 	.llseek		= seq_lseek,
4317 	.release	= tracing_seq_release,
4318 };
4319 
4320 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4321 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4322 		     size_t count, loff_t *ppos)
4323 {
4324 	struct trace_array *tr = file_inode(filp)->i_private;
4325 	char *mask_str __free(kfree) = NULL;
4326 	int len;
4327 
4328 	len = snprintf(NULL, 0, "%*pb\n",
4329 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4330 	mask_str = kmalloc(len, GFP_KERNEL);
4331 	if (!mask_str)
4332 		return -ENOMEM;
4333 
4334 	len = snprintf(mask_str, len, "%*pb\n",
4335 		       cpumask_pr_args(tr->tracing_cpumask));
4336 	if (len >= count)
4337 		return -EINVAL;
4338 
4339 	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4340 }
4341 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)4342 int tracing_set_cpumask(struct trace_array *tr,
4343 			cpumask_var_t tracing_cpumask_new)
4344 {
4345 	int cpu;
4346 
4347 	if (!tr)
4348 		return -EINVAL;
4349 
4350 	local_irq_disable();
4351 	arch_spin_lock(&tr->max_lock);
4352 	for_each_tracing_cpu(cpu) {
4353 		/*
4354 		 * Increase/decrease the disabled counter if we are
4355 		 * about to flip a bit in the cpumask:
4356 		 */
4357 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4358 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4359 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4360 #ifdef CONFIG_TRACER_SNAPSHOT
4361 			ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu);
4362 #endif
4363 		}
4364 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4365 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4366 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4367 #ifdef CONFIG_TRACER_SNAPSHOT
4368 			ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu);
4369 #endif
4370 		}
4371 	}
4372 	arch_spin_unlock(&tr->max_lock);
4373 	local_irq_enable();
4374 
4375 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4376 
4377 	return 0;
4378 }
4379 
4380 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4381 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4382 		      size_t count, loff_t *ppos)
4383 {
4384 	struct trace_array *tr = file_inode(filp)->i_private;
4385 	cpumask_var_t tracing_cpumask_new;
4386 	int err;
4387 
4388 	if (count == 0 || count > KMALLOC_MAX_SIZE)
4389 		return -EINVAL;
4390 
4391 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4392 		return -ENOMEM;
4393 
4394 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4395 	if (err)
4396 		goto err_free;
4397 
4398 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4399 	if (err)
4400 		goto err_free;
4401 
4402 	free_cpumask_var(tracing_cpumask_new);
4403 
4404 	return count;
4405 
4406 err_free:
4407 	free_cpumask_var(tracing_cpumask_new);
4408 
4409 	return err;
4410 }
4411 
4412 static const struct file_operations tracing_cpumask_fops = {
4413 	.open		= tracing_open_generic_tr,
4414 	.read		= tracing_cpumask_read,
4415 	.write		= tracing_cpumask_write,
4416 	.release	= tracing_release_generic_tr,
4417 	.llseek		= generic_file_llseek,
4418 };
4419 
tracing_trace_options_show(struct seq_file * m,void * v)4420 static int tracing_trace_options_show(struct seq_file *m, void *v)
4421 {
4422 	struct tracer_opt *trace_opts;
4423 	struct trace_array *tr = m->private;
4424 	struct tracer_flags *flags;
4425 	u32 tracer_flags;
4426 	int i;
4427 
4428 	guard(mutex)(&trace_types_lock);
4429 
4430 	for (i = 0; trace_options[i]; i++) {
4431 		if (tr->trace_flags & (1ULL << i))
4432 			seq_printf(m, "%s\n", trace_options[i]);
4433 		else
4434 			seq_printf(m, "no%s\n", trace_options[i]);
4435 	}
4436 
4437 	flags = tr->current_trace_flags;
4438 	if (!flags || !flags->opts)
4439 		return 0;
4440 
4441 	tracer_flags = flags->val;
4442 	trace_opts = flags->opts;
4443 
4444 	for (i = 0; trace_opts[i].name; i++) {
4445 		if (tracer_flags & trace_opts[i].bit)
4446 			seq_printf(m, "%s\n", trace_opts[i].name);
4447 		else
4448 			seq_printf(m, "no%s\n", trace_opts[i].name);
4449 	}
4450 
4451 	return 0;
4452 }
4453 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4454 static int __set_tracer_option(struct trace_array *tr,
4455 			       struct tracer_flags *tracer_flags,
4456 			       struct tracer_opt *opts, int neg)
4457 {
4458 	struct tracer *trace = tracer_flags->trace;
4459 	int ret = 0;
4460 
4461 	if (trace->set_flag)
4462 		ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4463 	if (ret)
4464 		return ret;
4465 
4466 	if (neg)
4467 		tracer_flags->val &= ~opts->bit;
4468 	else
4469 		tracer_flags->val |= opts->bit;
4470 	return 0;
4471 }
4472 
4473 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4474 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4475 {
4476 	struct tracer_flags *tracer_flags = tr->current_trace_flags;
4477 	struct tracer_opt *opts = NULL;
4478 	int i;
4479 
4480 	if (!tracer_flags || !tracer_flags->opts)
4481 		return 0;
4482 
4483 	for (i = 0; tracer_flags->opts[i].name; i++) {
4484 		opts = &tracer_flags->opts[i];
4485 
4486 		if (strcmp(cmp, opts->name) == 0)
4487 			return __set_tracer_option(tr, tracer_flags, opts, neg);
4488 	}
4489 
4490 	return -EINVAL;
4491 }
4492 
4493 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u64 mask,int set)4494 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
4495 {
4496 	if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
4497 		return -1;
4498 
4499 	return 0;
4500 }
4501 
set_tracer_flag(struct trace_array * tr,u64 mask,int enabled)4502 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
4503 {
4504 	switch (mask) {
4505 	case TRACE_ITER(RECORD_TGID):
4506 	case TRACE_ITER(RECORD_CMD):
4507 	case TRACE_ITER(TRACE_PRINTK):
4508 	case TRACE_ITER(COPY_MARKER):
4509 		lockdep_assert_held(&event_mutex);
4510 	}
4511 
4512 	/* do nothing if flag is already set */
4513 	if (!!(tr->trace_flags & mask) == !!enabled)
4514 		return 0;
4515 
4516 	/* Give the tracer a chance to approve the change */
4517 	if (tr->current_trace->flag_changed)
4518 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4519 			return -EINVAL;
4520 
4521 	switch (mask) {
4522 	case TRACE_ITER(TRACE_PRINTK):
4523 		if (enabled) {
4524 			update_printk_trace(tr);
4525 		} else {
4526 			/*
4527 			 * The global_trace cannot clear this.
4528 			 * It's flag only gets cleared if another instance sets it.
4529 			 */
4530 			if (printk_trace == &global_trace)
4531 				return -EINVAL;
4532 			/*
4533 			 * An instance must always have it set.
4534 			 * by default, that's the global_trace instance.
4535 			 */
4536 			if (printk_trace == tr)
4537 				update_printk_trace(&global_trace);
4538 		}
4539 		break;
4540 
4541 	case TRACE_ITER(COPY_MARKER):
4542 		update_marker_trace(tr, enabled);
4543 		/* update_marker_trace updates the tr->trace_flags */
4544 		return 0;
4545 	}
4546 
4547 	if (enabled)
4548 		tr->trace_flags |= mask;
4549 	else
4550 		tr->trace_flags &= ~mask;
4551 
4552 	switch (mask) {
4553 	case TRACE_ITER(RECORD_CMD):
4554 		trace_event_enable_cmd_record(enabled);
4555 		break;
4556 
4557 	case TRACE_ITER(RECORD_TGID):
4558 
4559 		if (trace_alloc_tgid_map() < 0) {
4560 			tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
4561 			return -ENOMEM;
4562 		}
4563 
4564 		trace_event_enable_tgid_record(enabled);
4565 		break;
4566 
4567 	case TRACE_ITER(EVENT_FORK):
4568 		trace_event_follow_fork(tr, enabled);
4569 		break;
4570 
4571 	case TRACE_ITER(FUNC_FORK):
4572 		ftrace_pid_follow_fork(tr, enabled);
4573 		break;
4574 
4575 	case TRACE_ITER(OVERWRITE):
4576 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4577 #ifdef CONFIG_TRACER_SNAPSHOT
4578 		ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled);
4579 #endif
4580 		break;
4581 
4582 	case TRACE_ITER(PRINTK):
4583 		trace_printk_start_stop_comm(enabled);
4584 		trace_printk_control(enabled);
4585 		break;
4586 
4587 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
4588 	case TRACE_GRAPH_GRAPH_TIME:
4589 		ftrace_graph_graph_time_control(enabled);
4590 		break;
4591 #endif
4592 	}
4593 
4594 	return 0;
4595 }
4596 
trace_set_options(struct trace_array * tr,char * option)4597 int trace_set_options(struct trace_array *tr, char *option)
4598 {
4599 	char *cmp;
4600 	int neg = 0;
4601 	int ret;
4602 	size_t orig_len = strlen(option);
4603 	int len;
4604 
4605 	cmp = strstrip(option);
4606 
4607 	len = str_has_prefix(cmp, "no");
4608 	if (len)
4609 		neg = 1;
4610 
4611 	cmp += len;
4612 
4613 	mutex_lock(&event_mutex);
4614 	mutex_lock(&trace_types_lock);
4615 
4616 	ret = match_string(trace_options, -1, cmp);
4617 	/* If no option could be set, test the specific tracer options */
4618 	if (ret < 0)
4619 		ret = set_tracer_option(tr, cmp, neg);
4620 	else
4621 		ret = set_tracer_flag(tr, 1ULL << ret, !neg);
4622 
4623 	mutex_unlock(&trace_types_lock);
4624 	mutex_unlock(&event_mutex);
4625 
4626 	/*
4627 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4628 	 * turn it back into a space.
4629 	 */
4630 	if (orig_len > strlen(option))
4631 		option[strlen(option)] = ' ';
4632 
4633 	return ret;
4634 }
4635 
apply_trace_boot_options(void)4636 static void __init apply_trace_boot_options(void)
4637 {
4638 	char *buf = trace_boot_options_buf;
4639 	char *option;
4640 
4641 	while (true) {
4642 		option = strsep(&buf, ",");
4643 
4644 		if (!option)
4645 			break;
4646 
4647 		if (*option)
4648 			trace_set_options(&global_trace, option);
4649 
4650 		/* Put back the comma to allow this to be called again */
4651 		if (buf)
4652 			*(buf - 1) = ',';
4653 	}
4654 }
4655 
4656 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4657 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4658 			size_t cnt, loff_t *ppos)
4659 {
4660 	struct seq_file *m = filp->private_data;
4661 	struct trace_array *tr = m->private;
4662 	char buf[64];
4663 	int ret;
4664 
4665 	if (cnt >= sizeof(buf))
4666 		return -EINVAL;
4667 
4668 	if (copy_from_user(buf, ubuf, cnt))
4669 		return -EFAULT;
4670 
4671 	buf[cnt] = 0;
4672 
4673 	ret = trace_set_options(tr, buf);
4674 	if (ret < 0)
4675 		return ret;
4676 
4677 	*ppos += cnt;
4678 
4679 	return cnt;
4680 }
4681 
tracing_trace_options_open(struct inode * inode,struct file * file)4682 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4683 {
4684 	struct trace_array *tr = inode->i_private;
4685 	int ret;
4686 
4687 	ret = tracing_check_open_get_tr(tr);
4688 	if (ret)
4689 		return ret;
4690 
4691 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4692 	if (ret < 0)
4693 		trace_array_put(tr);
4694 
4695 	return ret;
4696 }
4697 
4698 static const struct file_operations tracing_iter_fops = {
4699 	.open		= tracing_trace_options_open,
4700 	.read		= seq_read,
4701 	.llseek		= seq_lseek,
4702 	.release	= tracing_single_release_tr,
4703 	.write		= tracing_trace_options_write,
4704 };
4705 
4706 static const char readme_msg[] =
4707 	"tracing mini-HOWTO:\n\n"
4708 	"By default tracefs removes all OTH file permission bits.\n"
4709 	"When mounting tracefs an optional group id can be specified\n"
4710 	"which adds the group to every directory and file in tracefs:\n\n"
4711 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
4712 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4713 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4714 	" Important files:\n"
4715 	"  trace\t\t\t- The static contents of the buffer\n"
4716 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4717 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4718 	"  current_tracer\t- function and latency tracers\n"
4719 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4720 	"  error_log\t- error log for failed commands (that support it)\n"
4721 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4722 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4723 	"  trace_clock\t\t- change the clock used to order events\n"
4724 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4725 	"      global:   Synced across CPUs but slows tracing down.\n"
4726 	"     counter:   Not a clock, but just an increment\n"
4727 	"      uptime:   Jiffy counter from time of boot\n"
4728 	"        perf:   Same clock that perf events use\n"
4729 #ifdef CONFIG_X86_64
4730 	"     x86-tsc:   TSC cycle counter\n"
4731 #endif
4732 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
4733 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4734 	"    absolute:   Absolute (standalone) timestamp\n"
4735 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4736 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4737 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4738 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4739 	"\t\t\t  Remove sub-buffer with rmdir\n"
4740 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4741 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4742 	"\t\t\t  option name\n"
4743 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4744 #ifdef CONFIG_DYNAMIC_FTRACE
4745 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4746 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4747 	"\t\t\t  functions\n"
4748 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4749 	"\t     modules: Can select a group via module\n"
4750 	"\t      Format: :mod:<module-name>\n"
4751 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4752 	"\t    triggers: a command to perform when function is hit\n"
4753 	"\t      Format: <function>:<trigger>[:count]\n"
4754 	"\t     trigger: traceon, traceoff\n"
4755 	"\t\t      enable_event:<system>:<event>\n"
4756 	"\t\t      disable_event:<system>:<event>\n"
4757 #ifdef CONFIG_STACKTRACE
4758 	"\t\t      stacktrace\n"
4759 #endif
4760 #ifdef CONFIG_TRACER_SNAPSHOT
4761 	"\t\t      snapshot\n"
4762 #endif
4763 	"\t\t      dump\n"
4764 	"\t\t      cpudump\n"
4765 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4766 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4767 	"\t     The first one will disable tracing every time do_fault is hit\n"
4768 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4769 	"\t       The first time do trap is hit and it disables tracing, the\n"
4770 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4771 	"\t       the counter will not decrement. It only decrements when the\n"
4772 	"\t       trigger did work\n"
4773 	"\t     To remove trigger without count:\n"
4774 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4775 	"\t     To remove trigger with a count:\n"
4776 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4777 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4778 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4779 	"\t    modules: Can select a group via module command :mod:\n"
4780 	"\t    Does not accept triggers\n"
4781 #endif /* CONFIG_DYNAMIC_FTRACE */
4782 #ifdef CONFIG_FUNCTION_TRACER
4783 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4784 	"\t\t    (function)\n"
4785 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
4786 	"\t\t    (function)\n"
4787 #endif
4788 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4789 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4790 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4791 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4792 #endif
4793 #ifdef CONFIG_TRACER_SNAPSHOT
4794 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4795 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4796 	"\t\t\t  information\n"
4797 #endif
4798 #ifdef CONFIG_STACK_TRACER
4799 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4800 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4801 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4802 	"\t\t\t  new trace)\n"
4803 #ifdef CONFIG_DYNAMIC_FTRACE
4804 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4805 	"\t\t\t  traces\n"
4806 #endif
4807 #endif /* CONFIG_STACK_TRACER */
4808 #ifdef CONFIG_DYNAMIC_EVENTS
4809 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4810 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4811 #endif
4812 #ifdef CONFIG_KPROBE_EVENTS
4813 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4814 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4815 #endif
4816 #ifdef CONFIG_UPROBE_EVENTS
4817 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4818 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4819 #endif
4820 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
4821     defined(CONFIG_FPROBE_EVENTS)
4822 	"\t  accepts: event-definitions (one definition per line)\n"
4823 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4824 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
4825 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
4826 #endif
4827 #ifdef CONFIG_FPROBE_EVENTS
4828 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
4829 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
4830 #endif
4831 #ifdef CONFIG_HIST_TRIGGERS
4832 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4833 #endif
4834 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
4835 	"\t           -:[<group>/][<event>]\n"
4836 #ifdef CONFIG_KPROBE_EVENTS
4837 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4838   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
4839 #endif
4840 #ifdef CONFIG_UPROBE_EVENTS
4841   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
4842 #endif
4843 	"\t     args: <name>=fetcharg[:type]\n"
4844 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
4845 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4846 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4847 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
4848 	"\t           <argname>[->field[->field|.field...]],\n"
4849 #endif
4850 #else
4851 	"\t           $stack<index>, $stack, $retval, $comm,\n"
4852 #endif
4853 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4854 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
4855 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
4856 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4857 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
4858 #ifdef CONFIG_HIST_TRIGGERS
4859 	"\t    field: <stype> <name>;\n"
4860 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4861 	"\t           [unsigned] char/int/long\n"
4862 #endif
4863 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
4864 	"\t            of the <attached-group>/<attached-event>.\n"
4865 #endif
4866 	"  set_event\t\t- Enables events by name written into it\n"
4867 	"\t\t\t  Can enable module events via: :mod:<module>\n"
4868 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4869 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4870 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4871 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4872 	"\t\t\t  events\n"
4873 	"      filter\t\t- If set, only events passing filter are traced\n"
4874 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4875 	"\t\t\t  <event>:\n"
4876 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4877 	"      filter\t\t- If set, only events passing filter are traced\n"
4878 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4879 	"\t    Format: <trigger>[:count][if <filter>]\n"
4880 	"\t   trigger: traceon, traceoff\n"
4881 	"\t            enable_event:<system>:<event>\n"
4882 	"\t            disable_event:<system>:<event>\n"
4883 #ifdef CONFIG_HIST_TRIGGERS
4884 	"\t            enable_hist:<system>:<event>\n"
4885 	"\t            disable_hist:<system>:<event>\n"
4886 #endif
4887 #ifdef CONFIG_STACKTRACE
4888 	"\t\t    stacktrace\n"
4889 #endif
4890 #ifdef CONFIG_TRACER_SNAPSHOT
4891 	"\t\t    snapshot\n"
4892 #endif
4893 #ifdef CONFIG_HIST_TRIGGERS
4894 	"\t\t    hist (see below)\n"
4895 #endif
4896 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4897 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4898 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4899 	"\t                  events/block/block_unplug/trigger\n"
4900 	"\t   The first disables tracing every time block_unplug is hit.\n"
4901 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4902 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4903 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4904 	"\t   Like function triggers, the counter is only decremented if it\n"
4905 	"\t    enabled or disabled tracing.\n"
4906 	"\t   To remove a trigger without a count:\n"
4907 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4908 	"\t   To remove a trigger with a count:\n"
4909 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4910 	"\t   Filters can be ignored when removing a trigger.\n"
4911 #ifdef CONFIG_HIST_TRIGGERS
4912 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4913 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4914 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
4915 	"\t            [:values=<field1[,field2,...]>]\n"
4916 	"\t            [:sort=<field1[,field2,...]>]\n"
4917 	"\t            [:size=#entries]\n"
4918 	"\t            [:pause][:continue][:clear]\n"
4919 	"\t            [:name=histname1]\n"
4920 	"\t            [:nohitcount]\n"
4921 	"\t            [:<handler>.<action>]\n"
4922 	"\t            [if <filter>]\n\n"
4923 	"\t    Note, special fields can be used as well:\n"
4924 	"\t            common_timestamp - to record current timestamp\n"
4925 	"\t            common_cpu - to record the CPU the event happened on\n"
4926 	"\n"
4927 	"\t    A hist trigger variable can be:\n"
4928 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
4929 	"\t        - a reference to another variable e.g. y=$x,\n"
4930 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
4931 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
4932 	"\n"
4933 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
4934 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
4935 	"\t    variable reference, field or numeric literal.\n"
4936 	"\n"
4937 	"\t    When a matching event is hit, an entry is added to a hash\n"
4938 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4939 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4940 	"\t    correspond to fields in the event's format description.  Keys\n"
4941 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
4942 	"\t    Compound keys consisting of up to two fields can be specified\n"
4943 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4944 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4945 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4946 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4947 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4948 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4949 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4950 	"\t    its histogram data will be shared with other triggers of the\n"
4951 	"\t    same name, and trigger hits will update this common data.\n\n"
4952 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4953 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4954 	"\t    triggers attached to an event, there will be a table for each\n"
4955 	"\t    trigger in the output.  The table displayed for a named\n"
4956 	"\t    trigger will be the same as any other instance having the\n"
4957 	"\t    same name.  The default format used to display a given field\n"
4958 	"\t    can be modified by appending any of the following modifiers\n"
4959 	"\t    to the field name, as applicable:\n\n"
4960 	"\t            .hex        display a number as a hex value\n"
4961 	"\t            .sym        display an address as a symbol\n"
4962 	"\t            .sym-offset display an address as a symbol and offset\n"
4963 	"\t            .execname   display a common_pid as a program name\n"
4964 	"\t            .syscall    display a syscall id as a syscall name\n"
4965 	"\t            .log2       display log2 value rather than raw number\n"
4966 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
4967 	"\t            .usecs      display a common_timestamp in microseconds\n"
4968 	"\t            .percent    display a number of percentage value\n"
4969 	"\t            .graph      display a bar-graph of a value\n\n"
4970 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4971 	"\t    trigger or to start a hist trigger but not log any events\n"
4972 	"\t    until told to do so.  'continue' can be used to start or\n"
4973 	"\t    restart a paused hist trigger.\n\n"
4974 	"\t    The 'clear' parameter will clear the contents of a running\n"
4975 	"\t    hist trigger and leave its current paused/active state\n"
4976 	"\t    unchanged.\n\n"
4977 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
4978 	"\t    raw hitcount in the histogram.\n\n"
4979 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4980 	"\t    have one event conditionally start and stop another event's\n"
4981 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4982 	"\t    the enable_event and disable_event triggers.\n\n"
4983 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4984 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4985 	"\t        <handler>.<action>\n\n"
4986 	"\t    The available handlers are:\n\n"
4987 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4988 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4989 	"\t        onchange(var)            - invoke action if var changes\n\n"
4990 	"\t    The available actions are:\n\n"
4991 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4992 	"\t        save(field,...)                      - save current event fields\n"
4993 #ifdef CONFIG_TRACER_SNAPSHOT
4994 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
4995 #endif
4996 #ifdef CONFIG_SYNTH_EVENTS
4997 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
4998 	"\t  Write into this file to define/undefine new synthetic events.\n"
4999 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5000 #endif
5001 #endif
5002 ;
5003 
5004 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5005 tracing_readme_read(struct file *filp, char __user *ubuf,
5006 		       size_t cnt, loff_t *ppos)
5007 {
5008 	return simple_read_from_buffer(ubuf, cnt, ppos,
5009 					readme_msg, strlen(readme_msg));
5010 }
5011 
5012 static const struct file_operations tracing_readme_fops = {
5013 	.open		= tracing_open_generic,
5014 	.read		= tracing_readme_read,
5015 	.llseek		= generic_file_llseek,
5016 };
5017 
5018 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5019 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5020 update_eval_map(union trace_eval_map_item *ptr)
5021 {
5022 	if (!ptr->map.eval_string) {
5023 		if (ptr->tail.next) {
5024 			ptr = ptr->tail.next;
5025 			/* Set ptr to the next real item (skip head) */
5026 			ptr++;
5027 		} else
5028 			return NULL;
5029 	}
5030 	return ptr;
5031 }
5032 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5033 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5034 {
5035 	union trace_eval_map_item *ptr = v;
5036 
5037 	/*
5038 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5039 	 * This really should never happen.
5040 	 */
5041 	(*pos)++;
5042 	ptr = update_eval_map(ptr);
5043 	if (WARN_ON_ONCE(!ptr))
5044 		return NULL;
5045 
5046 	ptr++;
5047 	ptr = update_eval_map(ptr);
5048 
5049 	return ptr;
5050 }
5051 
eval_map_start(struct seq_file * m,loff_t * pos)5052 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5053 {
5054 	union trace_eval_map_item *v;
5055 	loff_t l = 0;
5056 
5057 	mutex_lock(&trace_eval_mutex);
5058 
5059 	v = trace_eval_maps;
5060 	if (v)
5061 		v++;
5062 
5063 	while (v && l < *pos) {
5064 		v = eval_map_next(m, v, &l);
5065 	}
5066 
5067 	return v;
5068 }
5069 
eval_map_stop(struct seq_file * m,void * v)5070 static void eval_map_stop(struct seq_file *m, void *v)
5071 {
5072 	mutex_unlock(&trace_eval_mutex);
5073 }
5074 
eval_map_show(struct seq_file * m,void * v)5075 static int eval_map_show(struct seq_file *m, void *v)
5076 {
5077 	union trace_eval_map_item *ptr = v;
5078 
5079 	seq_printf(m, "%s %ld (%s)\n",
5080 		   ptr->map.eval_string, ptr->map.eval_value,
5081 		   ptr->map.system);
5082 
5083 	return 0;
5084 }
5085 
5086 static const struct seq_operations tracing_eval_map_seq_ops = {
5087 	.start		= eval_map_start,
5088 	.next		= eval_map_next,
5089 	.stop		= eval_map_stop,
5090 	.show		= eval_map_show,
5091 };
5092 
tracing_eval_map_open(struct inode * inode,struct file * filp)5093 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5094 {
5095 	int ret;
5096 
5097 	ret = tracing_check_open_get_tr(NULL);
5098 	if (ret)
5099 		return ret;
5100 
5101 	return seq_open(filp, &tracing_eval_map_seq_ops);
5102 }
5103 
5104 static const struct file_operations tracing_eval_map_fops = {
5105 	.open		= tracing_eval_map_open,
5106 	.read		= seq_read,
5107 	.llseek		= seq_lseek,
5108 	.release	= seq_release,
5109 };
5110 
5111 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5112 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5113 {
5114 	/* Return tail of array given the head */
5115 	return ptr + ptr->head.length + 1;
5116 }
5117 
5118 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5119 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5120 			   int len)
5121 {
5122 	struct trace_eval_map **stop;
5123 	struct trace_eval_map **map;
5124 	union trace_eval_map_item *map_array;
5125 	union trace_eval_map_item *ptr;
5126 
5127 	stop = start + len;
5128 
5129 	/*
5130 	 * The trace_eval_maps contains the map plus a head and tail item,
5131 	 * where the head holds the module and length of array, and the
5132 	 * tail holds a pointer to the next list.
5133 	 */
5134 	map_array = kmalloc_objs(*map_array, len + 2);
5135 	if (!map_array) {
5136 		pr_warn("Unable to allocate trace eval mapping\n");
5137 		return;
5138 	}
5139 
5140 	guard(mutex)(&trace_eval_mutex);
5141 
5142 	if (!trace_eval_maps)
5143 		trace_eval_maps = map_array;
5144 	else {
5145 		ptr = trace_eval_maps;
5146 		for (;;) {
5147 			ptr = trace_eval_jmp_to_tail(ptr);
5148 			if (!ptr->tail.next)
5149 				break;
5150 			ptr = ptr->tail.next;
5151 
5152 		}
5153 		ptr->tail.next = map_array;
5154 	}
5155 	map_array->head.mod = mod;
5156 	map_array->head.length = len;
5157 	map_array++;
5158 
5159 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5160 		map_array->map = **map;
5161 		map_array++;
5162 	}
5163 	memset(map_array, 0, sizeof(*map_array));
5164 }
5165 
trace_create_eval_file(struct dentry * d_tracer)5166 static void trace_create_eval_file(struct dentry *d_tracer)
5167 {
5168 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5169 			  NULL, &tracing_eval_map_fops);
5170 }
5171 
5172 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5173 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5174 static inline void trace_insert_eval_map_file(struct module *mod,
5175 			      struct trace_eval_map **start, int len) { }
5176 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5177 
5178 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5179 trace_event_update_with_eval_map(struct module *mod,
5180 				 struct trace_eval_map **start,
5181 				 int len)
5182 {
5183 	struct trace_eval_map **map;
5184 
5185 	/* Always run sanitizer only if btf_type_tag attr exists. */
5186 	if (len <= 0) {
5187 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5188 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5189 		      __has_attribute(btf_type_tag)))
5190 			return;
5191 	}
5192 
5193 	map = start;
5194 
5195 	trace_event_update_all(map, len);
5196 
5197 	if (len <= 0)
5198 		return;
5199 
5200 	trace_insert_eval_map_file(mod, start, len);
5201 }
5202 
5203 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5204 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5205 		       size_t cnt, loff_t *ppos)
5206 {
5207 	struct trace_array *tr = filp->private_data;
5208 	char buf[MAX_TRACER_SIZE+2];
5209 	int r;
5210 
5211 	scoped_guard(mutex, &trace_types_lock) {
5212 		r = sprintf(buf, "%s\n", tr->current_trace->name);
5213 	}
5214 
5215 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5216 }
5217 
tracer_init(struct tracer * t,struct trace_array * tr)5218 int tracer_init(struct tracer *t, struct trace_array *tr)
5219 {
5220 	tracing_reset_online_cpus(&tr->array_buffer);
5221 	update_last_data_if_empty(tr);
5222 	return t->init(tr);
5223 }
5224 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5225 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5226 {
5227 	int cpu;
5228 
5229 	for_each_tracing_cpu(cpu)
5230 		per_cpu_ptr(buf->data, cpu)->entries = val;
5231 }
5232 
update_buffer_entries(struct array_buffer * buf,int cpu)5233 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5234 {
5235 	if (cpu == RING_BUFFER_ALL_CPUS) {
5236 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5237 	} else {
5238 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5239 	}
5240 }
5241 
5242 #ifdef CONFIG_TRACER_SNAPSHOT
5243 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5244 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5245 					struct array_buffer *size_buf, int cpu_id)
5246 {
5247 	int cpu, ret = 0;
5248 
5249 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5250 		for_each_tracing_cpu(cpu) {
5251 			ret = ring_buffer_resize(trace_buf->buffer,
5252 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5253 			if (ret < 0)
5254 				break;
5255 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5256 				per_cpu_ptr(size_buf->data, cpu)->entries;
5257 		}
5258 	} else {
5259 		ret = ring_buffer_resize(trace_buf->buffer,
5260 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5261 		if (ret == 0)
5262 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5263 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5264 	}
5265 
5266 	return ret;
5267 }
5268 #endif /* CONFIG_TRACER_SNAPSHOT */
5269 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5270 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5271 					unsigned long size, int cpu)
5272 {
5273 	int ret;
5274 
5275 	/*
5276 	 * If kernel or user changes the size of the ring buffer
5277 	 * we use the size that was given, and we can forget about
5278 	 * expanding it later.
5279 	 */
5280 	trace_set_ring_buffer_expanded(tr);
5281 
5282 	/* May be called before buffers are initialized */
5283 	if (!tr->array_buffer.buffer)
5284 		return 0;
5285 
5286 	/* Do not allow tracing while resizing ring buffer */
5287 	tracing_stop_tr(tr);
5288 
5289 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5290 	if (ret < 0)
5291 		goto out_start;
5292 
5293 #ifdef CONFIG_TRACER_SNAPSHOT
5294 	if (!tr->allocated_snapshot)
5295 		goto out;
5296 
5297 	ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu);
5298 	if (ret < 0) {
5299 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5300 						     &tr->array_buffer, cpu);
5301 		if (r < 0) {
5302 			/*
5303 			 * AARGH! We are left with different
5304 			 * size max buffer!!!!
5305 			 * The max buffer is our "snapshot" buffer.
5306 			 * When a tracer needs a snapshot (one of the
5307 			 * latency tracers), it swaps the max buffer
5308 			 * with the saved snap shot. We succeeded to
5309 			 * update the size of the main buffer, but failed to
5310 			 * update the size of the max buffer. But when we tried
5311 			 * to reset the main buffer to the original size, we
5312 			 * failed there too. This is very unlikely to
5313 			 * happen, but if it does, warn and kill all
5314 			 * tracing.
5315 			 */
5316 			WARN_ON(1);
5317 			tracing_disabled = 1;
5318 		}
5319 		goto out_start;
5320 	}
5321 
5322 	update_buffer_entries(&tr->snapshot_buffer, cpu);
5323 
5324  out:
5325 #endif /* CONFIG_TRACER_SNAPSHOT */
5326 
5327 	update_buffer_entries(&tr->array_buffer, cpu);
5328  out_start:
5329 	tracing_start_tr(tr);
5330 	return ret;
5331 }
5332 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5333 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5334 				  unsigned long size, int cpu_id)
5335 {
5336 	guard(mutex)(&trace_types_lock);
5337 
5338 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5339 		/* make sure, this cpu is enabled in the mask */
5340 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5341 			return -EINVAL;
5342 	}
5343 
5344 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
5345 }
5346 
5347 struct trace_mod_entry {
5348 	unsigned long	mod_addr;
5349 	char		mod_name[MODULE_NAME_LEN];
5350 };
5351 
5352 struct trace_scratch {
5353 	unsigned int		clock_id;
5354 	unsigned long		text_addr;
5355 	unsigned long		nr_entries;
5356 	struct trace_mod_entry	entries[];
5357 };
5358 
5359 static DEFINE_MUTEX(scratch_mutex);
5360 
cmp_mod_entry(const void * key,const void * pivot)5361 static int cmp_mod_entry(const void *key, const void *pivot)
5362 {
5363 	unsigned long addr = (unsigned long)key;
5364 	const struct trace_mod_entry *ent = pivot;
5365 
5366 	if (addr < ent[0].mod_addr)
5367 		return -1;
5368 
5369 	return addr >= ent[1].mod_addr;
5370 }
5371 
5372 /**
5373  * trace_adjust_address() - Adjust prev boot address to current address.
5374  * @tr: Persistent ring buffer's trace_array.
5375  * @addr: Address in @tr which is adjusted.
5376  */
trace_adjust_address(struct trace_array * tr,unsigned long addr)5377 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
5378 {
5379 	struct trace_module_delta *module_delta;
5380 	struct trace_scratch *tscratch;
5381 	struct trace_mod_entry *entry;
5382 	unsigned long raddr;
5383 	int idx = 0, nr_entries;
5384 
5385 	/* If we don't have last boot delta, return the address */
5386 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5387 		return addr;
5388 
5389 	/* tr->module_delta must be protected by rcu. */
5390 	guard(rcu)();
5391 	tscratch = tr->scratch;
5392 	/* if there is no tscrach, module_delta must be NULL. */
5393 	module_delta = READ_ONCE(tr->module_delta);
5394 	if (!module_delta || !tscratch->nr_entries ||
5395 	    tscratch->entries[0].mod_addr > addr) {
5396 		raddr = addr + tr->text_delta;
5397 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
5398 			is_kernel_rodata(raddr) ? raddr : addr;
5399 	}
5400 
5401 	/* Note that entries must be sorted. */
5402 	nr_entries = tscratch->nr_entries;
5403 	if (nr_entries == 1 ||
5404 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
5405 		idx = nr_entries - 1;
5406 	else {
5407 		entry = __inline_bsearch((void *)addr,
5408 				tscratch->entries,
5409 				nr_entries - 1,
5410 				sizeof(tscratch->entries[0]),
5411 				cmp_mod_entry);
5412 		if (entry)
5413 			idx = entry - tscratch->entries;
5414 	}
5415 
5416 	return addr + module_delta->delta[idx];
5417 }
5418 
5419 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)5420 static int save_mod(struct module *mod, void *data)
5421 {
5422 	struct trace_array *tr = data;
5423 	struct trace_scratch *tscratch;
5424 	struct trace_mod_entry *entry;
5425 	unsigned int size;
5426 
5427 	tscratch = tr->scratch;
5428 	if (!tscratch)
5429 		return -1;
5430 	size = tr->scratch_size;
5431 
5432 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
5433 		return -1;
5434 
5435 	entry = &tscratch->entries[tscratch->nr_entries];
5436 
5437 	tscratch->nr_entries++;
5438 
5439 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
5440 	strscpy(entry->mod_name, mod->name);
5441 
5442 	return 0;
5443 }
5444 #else
save_mod(struct module * mod,void * data)5445 static int save_mod(struct module *mod, void *data)
5446 {
5447 	return 0;
5448 }
5449 #endif
5450 
update_last_data(struct trace_array * tr)5451 static void update_last_data(struct trace_array *tr)
5452 {
5453 	struct trace_module_delta *module_delta;
5454 	struct trace_scratch *tscratch;
5455 
5456 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
5457 		return;
5458 
5459 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5460 		return;
5461 
5462 	/* Only if the buffer has previous boot data clear and update it. */
5463 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
5464 
5465 	/* Reset the module list and reload them */
5466 	if (tr->scratch) {
5467 		struct trace_scratch *tscratch = tr->scratch;
5468 
5469 		tscratch->clock_id = tr->clock_id;
5470 		memset(tscratch->entries, 0,
5471 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
5472 		tscratch->nr_entries = 0;
5473 
5474 		guard(mutex)(&scratch_mutex);
5475 		module_for_each_mod(save_mod, tr);
5476 	}
5477 
5478 	/*
5479 	 * Need to clear all CPU buffers as there cannot be events
5480 	 * from the previous boot mixed with events with this boot
5481 	 * as that will cause a confusing trace. Need to clear all
5482 	 * CPU buffers, even for those that may currently be offline.
5483 	 */
5484 	tracing_reset_all_cpus(&tr->array_buffer);
5485 
5486 	/* Using current data now */
5487 	tr->text_delta = 0;
5488 
5489 	if (!tr->scratch)
5490 		return;
5491 
5492 	tscratch = tr->scratch;
5493 	module_delta = READ_ONCE(tr->module_delta);
5494 	WRITE_ONCE(tr->module_delta, NULL);
5495 	kfree_rcu(module_delta, rcu);
5496 
5497 	/* Set the persistent ring buffer meta data to this address */
5498 	tscratch->text_addr = (unsigned long)_text;
5499 }
5500 
5501 /**
5502  * tracing_update_buffers - used by tracing facility to expand ring buffers
5503  * @tr: The tracing instance
5504  *
5505  * To save on memory when the tracing is never used on a system with it
5506  * configured in. The ring buffers are set to a minimum size. But once
5507  * a user starts to use the tracing facility, then they need to grow
5508  * to their default size.
5509  *
5510  * This function is to be called when a tracer is about to be used.
5511  */
tracing_update_buffers(struct trace_array * tr)5512 int tracing_update_buffers(struct trace_array *tr)
5513 {
5514 	int ret = 0;
5515 
5516 	if (!tr)
5517 		tr = &global_trace;
5518 
5519 	guard(mutex)(&trace_types_lock);
5520 
5521 	update_last_data(tr);
5522 
5523 	if (!tr->ring_buffer_expanded)
5524 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5525 						RING_BUFFER_ALL_CPUS);
5526 	return ret;
5527 }
5528 
5529 /*
5530  * Used to clear out the tracer before deletion of an instance.
5531  * Must have trace_types_lock held.
5532  */
tracing_set_nop(struct trace_array * tr)5533 static void tracing_set_nop(struct trace_array *tr)
5534 {
5535 	if (tr->current_trace == &nop_trace)
5536 		return;
5537 
5538 	tr->current_trace->enabled--;
5539 
5540 	if (tr->current_trace->reset)
5541 		tr->current_trace->reset(tr);
5542 
5543 	tr->current_trace = &nop_trace;
5544 	tr->current_trace_flags = nop_trace.flags;
5545 }
5546 
5547 static bool tracer_options_updated;
5548 
tracing_set_tracer(struct trace_array * tr,const char * buf)5549 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5550 {
5551 	struct tracer *trace = NULL;
5552 	struct tracers *t;
5553 	bool had_max_tr;
5554 	int ret;
5555 
5556 	guard(mutex)(&trace_types_lock);
5557 
5558 	update_last_data(tr);
5559 
5560 	if (!tr->ring_buffer_expanded) {
5561 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5562 						RING_BUFFER_ALL_CPUS);
5563 		if (ret < 0)
5564 			return ret;
5565 		ret = 0;
5566 	}
5567 
5568 	list_for_each_entry(t, &tr->tracers, list) {
5569 		if (strcmp(t->tracer->name, buf) == 0) {
5570 			trace = t->tracer;
5571 			break;
5572 		}
5573 	}
5574 	if (!trace)
5575 		return -EINVAL;
5576 
5577 	if (trace == tr->current_trace)
5578 		return 0;
5579 
5580 #ifdef CONFIG_TRACER_SNAPSHOT
5581 	if (tracer_uses_snapshot(trace)) {
5582 		local_irq_disable();
5583 		arch_spin_lock(&tr->max_lock);
5584 		ret = tr->cond_snapshot ? -EBUSY : 0;
5585 		arch_spin_unlock(&tr->max_lock);
5586 		local_irq_enable();
5587 		if (ret)
5588 			return ret;
5589 	}
5590 #endif
5591 	/* Some tracers won't work on kernel command line */
5592 	if (system_state < SYSTEM_RUNNING && trace->noboot) {
5593 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5594 			trace->name);
5595 		return -EINVAL;
5596 	}
5597 
5598 	/* Some tracers are only allowed for the top level buffer */
5599 	if (!trace_ok_for_array(trace, tr))
5600 		return -EINVAL;
5601 
5602 	/* If trace pipe files are being read, we can't change the tracer */
5603 	if (tr->trace_ref)
5604 		return -EBUSY;
5605 
5606 	trace_branch_disable();
5607 
5608 	tr->current_trace->enabled--;
5609 
5610 	if (tr->current_trace->reset)
5611 		tr->current_trace->reset(tr);
5612 
5613 	had_max_tr = tracer_uses_snapshot(tr->current_trace);
5614 
5615 	/* Current trace needs to be nop_trace before synchronize_rcu */
5616 	tr->current_trace = &nop_trace;
5617 	tr->current_trace_flags = nop_trace.flags;
5618 
5619 	if (had_max_tr && !tracer_uses_snapshot(trace)) {
5620 		/*
5621 		 * We need to make sure that the update_max_tr sees that
5622 		 * current_trace changed to nop_trace to keep it from
5623 		 * swapping the buffers after we resize it.
5624 		 * The update_max_tr is called from interrupts disabled
5625 		 * so a synchronized_sched() is sufficient.
5626 		 */
5627 		synchronize_rcu();
5628 		free_snapshot(tr);
5629 		tracing_disarm_snapshot(tr);
5630 	}
5631 
5632 	if (!had_max_tr && tracer_uses_snapshot(trace)) {
5633 		ret = tracing_arm_snapshot_locked(tr);
5634 		if (ret)
5635 			return ret;
5636 	}
5637 
5638 	tr->current_trace_flags = t->flags ? : t->tracer->flags;
5639 
5640 	if (trace->init) {
5641 		ret = tracer_init(trace, tr);
5642 		if (ret) {
5643 			if (tracer_uses_snapshot(trace))
5644 				tracing_disarm_snapshot(tr);
5645 			tr->current_trace_flags = nop_trace.flags;
5646 			return ret;
5647 		}
5648 	}
5649 
5650 	tr->current_trace = trace;
5651 	tr->current_trace->enabled++;
5652 	trace_branch_enable(tr);
5653 
5654 	return 0;
5655 }
5656 
5657 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5658 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5659 			size_t cnt, loff_t *ppos)
5660 {
5661 	struct trace_array *tr = filp->private_data;
5662 	char buf[MAX_TRACER_SIZE+1];
5663 	char *name;
5664 	size_t ret;
5665 	int err;
5666 
5667 	ret = cnt;
5668 
5669 	if (cnt > MAX_TRACER_SIZE)
5670 		cnt = MAX_TRACER_SIZE;
5671 
5672 	if (copy_from_user(buf, ubuf, cnt))
5673 		return -EFAULT;
5674 
5675 	buf[cnt] = 0;
5676 
5677 	name = strim(buf);
5678 
5679 	err = tracing_set_tracer(tr, name);
5680 	if (err)
5681 		return err;
5682 
5683 	*ppos += ret;
5684 
5685 	return ret;
5686 }
5687 
5688 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)5689 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5690 		   size_t cnt, loff_t *ppos)
5691 {
5692 	char buf[64];
5693 	int r;
5694 
5695 	r = snprintf(buf, sizeof(buf), "%ld\n",
5696 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5697 	if (r > sizeof(buf))
5698 		r = sizeof(buf);
5699 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5700 }
5701 
5702 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)5703 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5704 		    size_t cnt, loff_t *ppos)
5705 {
5706 	unsigned long val;
5707 	int ret;
5708 
5709 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5710 	if (ret)
5711 		return ret;
5712 
5713 	*ptr = val * 1000;
5714 
5715 	return cnt;
5716 }
5717 
5718 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5719 tracing_thresh_read(struct file *filp, char __user *ubuf,
5720 		    size_t cnt, loff_t *ppos)
5721 {
5722 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5723 }
5724 
5725 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5726 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5727 		     size_t cnt, loff_t *ppos)
5728 {
5729 	struct trace_array *tr = filp->private_data;
5730 	int ret;
5731 
5732 	guard(mutex)(&trace_types_lock);
5733 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5734 	if (ret < 0)
5735 		return ret;
5736 
5737 	if (tr->current_trace->update_thresh) {
5738 		ret = tr->current_trace->update_thresh(tr);
5739 		if (ret < 0)
5740 			return ret;
5741 	}
5742 
5743 	return cnt;
5744 }
5745 
5746 #ifdef CONFIG_TRACER_MAX_TRACE
5747 
5748 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5749 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5750 		     size_t cnt, loff_t *ppos)
5751 {
5752 	struct trace_array *tr = filp->private_data;
5753 
5754 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
5755 }
5756 
5757 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5758 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5759 		      size_t cnt, loff_t *ppos)
5760 {
5761 	struct trace_array *tr = filp->private_data;
5762 
5763 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
5764 }
5765 
5766 #endif
5767 
open_pipe_on_cpu(struct trace_array * tr,int cpu)5768 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
5769 {
5770 	if (cpu == RING_BUFFER_ALL_CPUS) {
5771 		if (cpumask_empty(tr->pipe_cpumask)) {
5772 			cpumask_setall(tr->pipe_cpumask);
5773 			return 0;
5774 		}
5775 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
5776 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
5777 		return 0;
5778 	}
5779 	return -EBUSY;
5780 }
5781 
close_pipe_on_cpu(struct trace_array * tr,int cpu)5782 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
5783 {
5784 	if (cpu == RING_BUFFER_ALL_CPUS) {
5785 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
5786 		cpumask_clear(tr->pipe_cpumask);
5787 	} else {
5788 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
5789 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
5790 	}
5791 }
5792 
tracing_open_pipe(struct inode * inode,struct file * filp)5793 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5794 {
5795 	struct trace_array *tr = inode->i_private;
5796 	struct trace_iterator *iter;
5797 	int cpu;
5798 	int ret;
5799 
5800 	ret = tracing_check_open_get_tr(tr);
5801 	if (ret)
5802 		return ret;
5803 
5804 	guard(mutex)(&trace_types_lock);
5805 	cpu = tracing_get_cpu(inode);
5806 	ret = open_pipe_on_cpu(tr, cpu);
5807 	if (ret)
5808 		goto fail_pipe_on_cpu;
5809 
5810 	/* create a buffer to store the information to pass to userspace */
5811 	iter = kzalloc_obj(*iter);
5812 	if (!iter) {
5813 		ret = -ENOMEM;
5814 		goto fail_alloc_iter;
5815 	}
5816 
5817 	trace_seq_init(&iter->seq);
5818 	iter->trace = tr->current_trace;
5819 
5820 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5821 		ret = -ENOMEM;
5822 		goto fail;
5823 	}
5824 
5825 	/* trace pipe does not show start of buffer */
5826 	cpumask_setall(iter->started);
5827 
5828 	if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
5829 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5830 
5831 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5832 	if (trace_clocks[tr->clock_id].in_ns)
5833 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5834 
5835 	iter->tr = tr;
5836 	iter->array_buffer = &tr->array_buffer;
5837 	iter->cpu_file = cpu;
5838 	mutex_init(&iter->mutex);
5839 	filp->private_data = iter;
5840 
5841 	if (iter->trace->pipe_open)
5842 		iter->trace->pipe_open(iter);
5843 
5844 	nonseekable_open(inode, filp);
5845 
5846 	tr->trace_ref++;
5847 
5848 	return ret;
5849 
5850 fail:
5851 	kfree(iter);
5852 fail_alloc_iter:
5853 	close_pipe_on_cpu(tr, cpu);
5854 fail_pipe_on_cpu:
5855 	__trace_array_put(tr);
5856 	return ret;
5857 }
5858 
tracing_release_pipe(struct inode * inode,struct file * file)5859 static int tracing_release_pipe(struct inode *inode, struct file *file)
5860 {
5861 	struct trace_iterator *iter = file->private_data;
5862 	struct trace_array *tr = inode->i_private;
5863 
5864 	scoped_guard(mutex, &trace_types_lock) {
5865 		tr->trace_ref--;
5866 
5867 		if (iter->trace->pipe_close)
5868 			iter->trace->pipe_close(iter);
5869 		close_pipe_on_cpu(tr, iter->cpu_file);
5870 	}
5871 
5872 	free_trace_iter_content(iter);
5873 	kfree(iter);
5874 
5875 	trace_array_put(tr);
5876 
5877 	return 0;
5878 }
5879 
5880 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)5881 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5882 {
5883 	struct trace_array *tr = iter->tr;
5884 
5885 	/* Iterators are static, they should be filled or empty */
5886 	if (trace_buffer_iter(iter, iter->cpu_file))
5887 		return EPOLLIN | EPOLLRDNORM;
5888 
5889 	if (tr->trace_flags & TRACE_ITER(BLOCK))
5890 		/*
5891 		 * Always select as readable when in blocking mode
5892 		 */
5893 		return EPOLLIN | EPOLLRDNORM;
5894 	else
5895 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
5896 					     filp, poll_table, iter->tr->buffer_percent);
5897 }
5898 
5899 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)5900 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5901 {
5902 	struct trace_iterator *iter = filp->private_data;
5903 
5904 	return trace_poll(iter, filp, poll_table);
5905 }
5906 
5907 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)5908 static int tracing_wait_pipe(struct file *filp)
5909 {
5910 	struct trace_iterator *iter = filp->private_data;
5911 	int ret;
5912 
5913 	while (trace_empty(iter)) {
5914 
5915 		if ((filp->f_flags & O_NONBLOCK)) {
5916 			return -EAGAIN;
5917 		}
5918 
5919 		/*
5920 		 * We block until we read something and tracing is disabled.
5921 		 * We still block if tracing is disabled, but we have never
5922 		 * read anything. This allows a user to cat this file, and
5923 		 * then enable tracing. But after we have read something,
5924 		 * we give an EOF when tracing is again disabled.
5925 		 *
5926 		 * iter->pos will be 0 if we haven't read anything.
5927 		 */
5928 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5929 			break;
5930 
5931 		mutex_unlock(&iter->mutex);
5932 
5933 		ret = wait_on_pipe(iter, 0);
5934 
5935 		mutex_lock(&iter->mutex);
5936 
5937 		if (ret)
5938 			return ret;
5939 	}
5940 
5941 	return 1;
5942 }
5943 
update_last_data_if_empty(struct trace_array * tr)5944 static bool update_last_data_if_empty(struct trace_array *tr)
5945 {
5946 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5947 		return false;
5948 
5949 	if (!ring_buffer_empty(tr->array_buffer.buffer))
5950 		return false;
5951 
5952 	/*
5953 	 * If the buffer contains the last boot data and all per-cpu
5954 	 * buffers are empty, reset it from the kernel side.
5955 	 */
5956 	update_last_data(tr);
5957 	return true;
5958 }
5959 
5960 /*
5961  * Consumer reader.
5962  */
5963 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5964 tracing_read_pipe(struct file *filp, char __user *ubuf,
5965 		  size_t cnt, loff_t *ppos)
5966 {
5967 	struct trace_iterator *iter = filp->private_data;
5968 	ssize_t sret;
5969 
5970 	/*
5971 	 * Avoid more than one consumer on a single file descriptor
5972 	 * This is just a matter of traces coherency, the ring buffer itself
5973 	 * is protected.
5974 	 */
5975 	guard(mutex)(&iter->mutex);
5976 
5977 	/* return any leftover data */
5978 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5979 	if (sret != -EBUSY)
5980 		return sret;
5981 
5982 	trace_seq_init(&iter->seq);
5983 
5984 	if (iter->trace->read) {
5985 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5986 		if (sret)
5987 			return sret;
5988 	}
5989 
5990 waitagain:
5991 	if (update_last_data_if_empty(iter->tr))
5992 		return 0;
5993 
5994 	sret = tracing_wait_pipe(filp);
5995 	if (sret <= 0)
5996 		return sret;
5997 
5998 	/* stop when tracing is finished */
5999 	if (trace_empty(iter))
6000 		return 0;
6001 
6002 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6003 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6004 
6005 	/* reset all but tr, trace, and overruns */
6006 	trace_iterator_reset(iter);
6007 	cpumask_clear(iter->started);
6008 	trace_seq_init(&iter->seq);
6009 
6010 	trace_event_read_lock();
6011 	trace_access_lock(iter->cpu_file);
6012 	while (trace_find_next_entry_inc(iter) != NULL) {
6013 		enum print_line_t ret;
6014 		int save_len = iter->seq.seq.len;
6015 
6016 		ret = print_trace_line(iter);
6017 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6018 			/*
6019 			 * If one print_trace_line() fills entire trace_seq in one shot,
6020 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6021 			 * In this case, we need to consume it, otherwise, loop will peek
6022 			 * this event next time, resulting in an infinite loop.
6023 			 */
6024 			if (save_len == 0) {
6025 				iter->seq.full = 0;
6026 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6027 				trace_consume(iter);
6028 				break;
6029 			}
6030 
6031 			/* In other cases, don't print partial lines */
6032 			iter->seq.seq.len = save_len;
6033 			break;
6034 		}
6035 		if (ret != TRACE_TYPE_NO_CONSUME)
6036 			trace_consume(iter);
6037 
6038 		if (trace_seq_used(&iter->seq) >= cnt)
6039 			break;
6040 
6041 		/*
6042 		 * Setting the full flag means we reached the trace_seq buffer
6043 		 * size and we should leave by partial output condition above.
6044 		 * One of the trace_seq_* functions is not used properly.
6045 		 */
6046 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6047 			  iter->ent->type);
6048 	}
6049 	trace_access_unlock(iter->cpu_file);
6050 	trace_event_read_unlock();
6051 
6052 	/* Now copy what we have to the user */
6053 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6054 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6055 		trace_seq_init(&iter->seq);
6056 
6057 	/*
6058 	 * If there was nothing to send to user, in spite of consuming trace
6059 	 * entries, go back to wait for more entries.
6060 	 */
6061 	if (sret == -EBUSY)
6062 		goto waitagain;
6063 
6064 	return sret;
6065 }
6066 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6067 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6068 				     unsigned int idx)
6069 {
6070 	__free_page(spd->pages[idx]);
6071 }
6072 
6073 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6074 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6075 {
6076 	size_t count;
6077 	int save_len;
6078 	int ret;
6079 
6080 	/* Seq buffer is page-sized, exactly what we need. */
6081 	for (;;) {
6082 		save_len = iter->seq.seq.len;
6083 		ret = print_trace_line(iter);
6084 
6085 		if (trace_seq_has_overflowed(&iter->seq)) {
6086 			iter->seq.seq.len = save_len;
6087 			break;
6088 		}
6089 
6090 		/*
6091 		 * This should not be hit, because it should only
6092 		 * be set if the iter->seq overflowed. But check it
6093 		 * anyway to be safe.
6094 		 */
6095 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6096 			iter->seq.seq.len = save_len;
6097 			break;
6098 		}
6099 
6100 		count = trace_seq_used(&iter->seq) - save_len;
6101 		if (rem < count) {
6102 			rem = 0;
6103 			iter->seq.seq.len = save_len;
6104 			break;
6105 		}
6106 
6107 		if (ret != TRACE_TYPE_NO_CONSUME)
6108 			trace_consume(iter);
6109 		rem -= count;
6110 		if (!trace_find_next_entry_inc(iter))	{
6111 			rem = 0;
6112 			iter->ent = NULL;
6113 			break;
6114 		}
6115 	}
6116 
6117 	return rem;
6118 }
6119 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6120 static ssize_t tracing_splice_read_pipe(struct file *filp,
6121 					loff_t *ppos,
6122 					struct pipe_inode_info *pipe,
6123 					size_t len,
6124 					unsigned int flags)
6125 {
6126 	struct page *pages_def[PIPE_DEF_BUFFERS];
6127 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6128 	struct trace_iterator *iter = filp->private_data;
6129 	struct splice_pipe_desc spd = {
6130 		.pages		= pages_def,
6131 		.partial	= partial_def,
6132 		.nr_pages	= 0, /* This gets updated below. */
6133 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6134 		.ops		= &default_pipe_buf_ops,
6135 		.spd_release	= tracing_spd_release_pipe,
6136 	};
6137 	ssize_t ret;
6138 	size_t rem;
6139 	unsigned int i;
6140 
6141 	if (splice_grow_spd(pipe, &spd))
6142 		return -ENOMEM;
6143 
6144 	mutex_lock(&iter->mutex);
6145 
6146 	if (iter->trace->splice_read) {
6147 		ret = iter->trace->splice_read(iter, filp,
6148 					       ppos, pipe, len, flags);
6149 		if (ret)
6150 			goto out_err;
6151 	}
6152 
6153 	ret = tracing_wait_pipe(filp);
6154 	if (ret <= 0)
6155 		goto out_err;
6156 
6157 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6158 		ret = -EFAULT;
6159 		goto out_err;
6160 	}
6161 
6162 	trace_event_read_lock();
6163 	trace_access_lock(iter->cpu_file);
6164 
6165 	/* Fill as many pages as possible. */
6166 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6167 		spd.pages[i] = alloc_page(GFP_KERNEL);
6168 		if (!spd.pages[i])
6169 			break;
6170 
6171 		rem = tracing_fill_pipe_page(rem, iter);
6172 
6173 		/* Copy the data into the page, so we can start over. */
6174 		ret = trace_seq_to_buffer(&iter->seq,
6175 					  page_address(spd.pages[i]),
6176 					  min((size_t)trace_seq_used(&iter->seq),
6177 						  (size_t)PAGE_SIZE));
6178 		if (ret < 0) {
6179 			__free_page(spd.pages[i]);
6180 			break;
6181 		}
6182 		spd.partial[i].offset = 0;
6183 		spd.partial[i].len = ret;
6184 
6185 		trace_seq_init(&iter->seq);
6186 	}
6187 
6188 	trace_access_unlock(iter->cpu_file);
6189 	trace_event_read_unlock();
6190 	mutex_unlock(&iter->mutex);
6191 
6192 	spd.nr_pages = i;
6193 
6194 	if (i)
6195 		ret = splice_to_pipe(pipe, &spd);
6196 	else
6197 		ret = 0;
6198 out:
6199 	splice_shrink_spd(&spd);
6200 	return ret;
6201 
6202 out_err:
6203 	mutex_unlock(&iter->mutex);
6204 	goto out;
6205 }
6206 
6207 static ssize_t
tracing_syscall_buf_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6208 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
6209 			 size_t cnt, loff_t *ppos)
6210 {
6211 	struct inode *inode = file_inode(filp);
6212 	struct trace_array *tr = inode->i_private;
6213 	char buf[64];
6214 	int r;
6215 
6216 	r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
6217 
6218 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6219 }
6220 
6221 static ssize_t
tracing_syscall_buf_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6222 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
6223 			  size_t cnt, loff_t *ppos)
6224 {
6225 	struct inode *inode = file_inode(filp);
6226 	struct trace_array *tr = inode->i_private;
6227 	unsigned long val;
6228 	int ret;
6229 
6230 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6231 	if (ret)
6232 		return ret;
6233 
6234 	if (val > SYSCALL_FAULT_USER_MAX)
6235 		val = SYSCALL_FAULT_USER_MAX;
6236 
6237 	tr->syscall_buf_sz = val;
6238 
6239 	*ppos += cnt;
6240 
6241 	return cnt;
6242 }
6243 
6244 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6245 tracing_entries_read(struct file *filp, char __user *ubuf,
6246 		     size_t cnt, loff_t *ppos)
6247 {
6248 	struct inode *inode = file_inode(filp);
6249 	struct trace_array *tr = inode->i_private;
6250 	int cpu = tracing_get_cpu(inode);
6251 	char buf[64];
6252 	int r = 0;
6253 	ssize_t ret;
6254 
6255 	mutex_lock(&trace_types_lock);
6256 
6257 	if (cpu == RING_BUFFER_ALL_CPUS) {
6258 		int cpu, buf_size_same;
6259 		unsigned long size;
6260 
6261 		size = 0;
6262 		buf_size_same = 1;
6263 		/* check if all cpu sizes are same */
6264 		for_each_tracing_cpu(cpu) {
6265 			/* fill in the size from first enabled cpu */
6266 			if (size == 0)
6267 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6268 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6269 				buf_size_same = 0;
6270 				break;
6271 			}
6272 		}
6273 
6274 		if (buf_size_same) {
6275 			if (!tr->ring_buffer_expanded)
6276 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6277 					    size >> 10,
6278 					    trace_buf_size >> 10);
6279 			else
6280 				r = sprintf(buf, "%lu\n", size >> 10);
6281 		} else
6282 			r = sprintf(buf, "X\n");
6283 	} else
6284 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6285 
6286 	mutex_unlock(&trace_types_lock);
6287 
6288 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6289 	return ret;
6290 }
6291 
6292 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6293 tracing_entries_write(struct file *filp, const char __user *ubuf,
6294 		      size_t cnt, loff_t *ppos)
6295 {
6296 	struct inode *inode = file_inode(filp);
6297 	struct trace_array *tr = inode->i_private;
6298 	unsigned long val;
6299 	int ret;
6300 
6301 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6302 	if (ret)
6303 		return ret;
6304 
6305 	/* must have at least 1 entry */
6306 	if (!val)
6307 		return -EINVAL;
6308 
6309 	/* value is in KB */
6310 	val <<= 10;
6311 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6312 	if (ret < 0)
6313 		return ret;
6314 
6315 	*ppos += cnt;
6316 
6317 	return cnt;
6318 }
6319 
6320 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6321 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6322 				size_t cnt, loff_t *ppos)
6323 {
6324 	struct trace_array *tr = filp->private_data;
6325 	char buf[64];
6326 	int r, cpu;
6327 	unsigned long size = 0, expanded_size = 0;
6328 
6329 	mutex_lock(&trace_types_lock);
6330 	for_each_tracing_cpu(cpu) {
6331 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6332 		if (!tr->ring_buffer_expanded)
6333 			expanded_size += trace_buf_size >> 10;
6334 	}
6335 	if (tr->ring_buffer_expanded)
6336 		r = sprintf(buf, "%lu\n", size);
6337 	else
6338 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6339 	mutex_unlock(&trace_types_lock);
6340 
6341 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6342 }
6343 
6344 #define LAST_BOOT_HEADER ((void *)1)
6345 
l_next(struct seq_file * m,void * v,loff_t * pos)6346 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
6347 {
6348 	struct trace_array *tr = m->private;
6349 	struct trace_scratch *tscratch = tr->scratch;
6350 	unsigned int index = *pos;
6351 
6352 	(*pos)++;
6353 
6354 	if (*pos == 1)
6355 		return LAST_BOOT_HEADER;
6356 
6357 	/* Only show offsets of the last boot data */
6358 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6359 		return NULL;
6360 
6361 	/* *pos 0 is for the header, 1 is for the first module */
6362 	index--;
6363 
6364 	if (index >= tscratch->nr_entries)
6365 		return NULL;
6366 
6367 	return &tscratch->entries[index];
6368 }
6369 
l_start(struct seq_file * m,loff_t * pos)6370 static void *l_start(struct seq_file *m, loff_t *pos)
6371 {
6372 	mutex_lock(&scratch_mutex);
6373 
6374 	return l_next(m, NULL, pos);
6375 }
6376 
l_stop(struct seq_file * m,void * p)6377 static void l_stop(struct seq_file *m, void *p)
6378 {
6379 	mutex_unlock(&scratch_mutex);
6380 }
6381 
show_last_boot_header(struct seq_file * m,struct trace_array * tr)6382 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
6383 {
6384 	struct trace_scratch *tscratch = tr->scratch;
6385 
6386 	/*
6387 	 * Do not leak KASLR address. This only shows the KASLR address of
6388 	 * the last boot. When the ring buffer is started, the LAST_BOOT
6389 	 * flag gets cleared, and this should only report "current".
6390 	 * Otherwise it shows the KASLR address from the previous boot which
6391 	 * should not be the same as the current boot.
6392 	 */
6393 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6394 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
6395 	else
6396 		seq_puts(m, "# Current\n");
6397 }
6398 
l_show(struct seq_file * m,void * v)6399 static int l_show(struct seq_file *m, void *v)
6400 {
6401 	struct trace_array *tr = m->private;
6402 	struct trace_mod_entry *entry = v;
6403 
6404 	if (v == LAST_BOOT_HEADER) {
6405 		show_last_boot_header(m, tr);
6406 		return 0;
6407 	}
6408 
6409 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
6410 	return 0;
6411 }
6412 
6413 static const struct seq_operations last_boot_seq_ops = {
6414 	.start		= l_start,
6415 	.next		= l_next,
6416 	.stop		= l_stop,
6417 	.show		= l_show,
6418 };
6419 
tracing_last_boot_open(struct inode * inode,struct file * file)6420 static int tracing_last_boot_open(struct inode *inode, struct file *file)
6421 {
6422 	struct trace_array *tr = inode->i_private;
6423 	struct seq_file *m;
6424 	int ret;
6425 
6426 	ret = tracing_check_open_get_tr(tr);
6427 	if (ret)
6428 		return ret;
6429 
6430 	ret = seq_open(file, &last_boot_seq_ops);
6431 	if (ret) {
6432 		trace_array_put(tr);
6433 		return ret;
6434 	}
6435 
6436 	m = file->private_data;
6437 	m->private = tr;
6438 
6439 	return 0;
6440 }
6441 
tracing_buffer_meta_open(struct inode * inode,struct file * filp)6442 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6443 {
6444 	struct trace_array *tr = inode->i_private;
6445 	int cpu = tracing_get_cpu(inode);
6446 	int ret;
6447 
6448 	ret = tracing_check_open_get_tr(tr);
6449 	if (ret)
6450 		return ret;
6451 
6452 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6453 	if (ret < 0)
6454 		__trace_array_put(tr);
6455 	return ret;
6456 }
6457 
6458 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6459 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6460 			  size_t cnt, loff_t *ppos)
6461 {
6462 	/*
6463 	 * There is no need to read what the user has written, this function
6464 	 * is just to make sure that there is no error when "echo" is used
6465 	 */
6466 
6467 	*ppos += cnt;
6468 
6469 	return cnt;
6470 }
6471 
6472 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6473 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6474 {
6475 	struct trace_array *tr = inode->i_private;
6476 
6477 	/* disable tracing ? */
6478 	if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
6479 		tracer_tracing_off(tr);
6480 	/* resize the ring buffer to 0 */
6481 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6482 
6483 	trace_array_put(tr);
6484 
6485 	return 0;
6486 }
6487 
6488 #define TRACE_MARKER_MAX_SIZE		4096
6489 
write_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt,unsigned long ip)6490 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
6491 				      size_t cnt, unsigned long ip)
6492 {
6493 	struct ring_buffer_event *event;
6494 	enum event_trigger_type tt = ETT_NONE;
6495 	struct trace_buffer *buffer;
6496 	struct print_entry *entry;
6497 	int meta_size;
6498 	ssize_t written;
6499 	size_t size;
6500 
6501 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6502  again:
6503 	size = cnt + meta_size;
6504 
6505 	buffer = tr->array_buffer.buffer;
6506 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6507 					    tracing_gen_ctx());
6508 	if (unlikely(!event)) {
6509 		/*
6510 		 * If the size was greater than what was allowed, then
6511 		 * make it smaller and try again.
6512 		 */
6513 		if (size > ring_buffer_max_event_size(buffer)) {
6514 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
6515 			/* The above should only happen once */
6516 			if (WARN_ON_ONCE(cnt + meta_size == size))
6517 				return -EBADF;
6518 			goto again;
6519 		}
6520 
6521 		/* Ring buffer disabled, return as if not open for write */
6522 		return -EBADF;
6523 	}
6524 
6525 	entry = ring_buffer_event_data(event);
6526 	entry->ip = ip;
6527 	memcpy(&entry->buf, buf, cnt);
6528 	written = cnt;
6529 
6530 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6531 		/* do not add \n before testing triggers, but add \0 */
6532 		entry->buf[cnt] = '\0';
6533 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6534 	}
6535 
6536 	if (entry->buf[cnt - 1] != '\n') {
6537 		entry->buf[cnt] = '\n';
6538 		entry->buf[cnt + 1] = '\0';
6539 	} else
6540 		entry->buf[cnt] = '\0';
6541 
6542 	if (static_branch_unlikely(&trace_marker_exports_enabled))
6543 		ftrace_exports(event, TRACE_EXPORT_MARKER);
6544 	__buffer_unlock_commit(buffer, event);
6545 
6546 	if (tt)
6547 		event_triggers_post_call(tr->trace_marker_file, tt);
6548 
6549 	return written;
6550 }
6551 
6552 struct trace_user_buf {
6553 	char		*buf;
6554 };
6555 
6556 static DEFINE_MUTEX(trace_user_buffer_mutex);
6557 static struct trace_user_buf_info *trace_user_buffer;
6558 
6559 /**
6560  * trace_user_fault_destroy - free up allocated memory of a trace user buffer
6561  * @tinfo: The descriptor to free up
6562  *
6563  * Frees any data allocated in the trace info dsecriptor.
6564  */
trace_user_fault_destroy(struct trace_user_buf_info * tinfo)6565 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
6566 {
6567 	char *buf;
6568 	int cpu;
6569 
6570 	if (!tinfo || !tinfo->tbuf)
6571 		return;
6572 
6573 	for_each_possible_cpu(cpu) {
6574 		buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6575 		kfree(buf);
6576 	}
6577 	free_percpu(tinfo->tbuf);
6578 }
6579 
user_fault_buffer_enable(struct trace_user_buf_info * tinfo,size_t size)6580 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
6581 {
6582 	char *buf;
6583 	int cpu;
6584 
6585 	lockdep_assert_held(&trace_user_buffer_mutex);
6586 
6587 	tinfo->tbuf = alloc_percpu(struct trace_user_buf);
6588 	if (!tinfo->tbuf)
6589 		return -ENOMEM;
6590 
6591 	tinfo->ref = 1;
6592 	tinfo->size = size;
6593 
6594 	/* Clear each buffer in case of error */
6595 	for_each_possible_cpu(cpu) {
6596 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
6597 	}
6598 
6599 	for_each_possible_cpu(cpu) {
6600 		buf = kmalloc_node(size, GFP_KERNEL,
6601 				   cpu_to_node(cpu));
6602 		if (!buf)
6603 			return -ENOMEM;
6604 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
6605 	}
6606 
6607 	return 0;
6608 }
6609 
6610 /* For internal use. Free and reinitialize */
user_buffer_free(struct trace_user_buf_info ** tinfo)6611 static void user_buffer_free(struct trace_user_buf_info **tinfo)
6612 {
6613 	lockdep_assert_held(&trace_user_buffer_mutex);
6614 
6615 	trace_user_fault_destroy(*tinfo);
6616 	kfree(*tinfo);
6617 	*tinfo = NULL;
6618 }
6619 
6620 /* For internal use. Initialize and allocate */
user_buffer_init(struct trace_user_buf_info ** tinfo,size_t size)6621 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
6622 {
6623 	bool alloc = false;
6624 	int ret;
6625 
6626 	lockdep_assert_held(&trace_user_buffer_mutex);
6627 
6628 	if (!*tinfo) {
6629 		alloc = true;
6630 		*tinfo = kzalloc_obj(**tinfo);
6631 		if (!*tinfo)
6632 			return -ENOMEM;
6633 	}
6634 
6635 	ret = user_fault_buffer_enable(*tinfo, size);
6636 	if (ret < 0 && alloc)
6637 		user_buffer_free(tinfo);
6638 
6639 	return ret;
6640 }
6641 
6642 /* For internal use, derefrence and free if necessary */
user_buffer_put(struct trace_user_buf_info ** tinfo)6643 static void user_buffer_put(struct trace_user_buf_info **tinfo)
6644 {
6645 	guard(mutex)(&trace_user_buffer_mutex);
6646 
6647 	if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
6648 		return;
6649 
6650 	if (--(*tinfo)->ref)
6651 		return;
6652 
6653 	user_buffer_free(tinfo);
6654 }
6655 
6656 /**
6657  * trace_user_fault_init - Allocated or reference a per CPU buffer
6658  * @tinfo: A pointer to the trace buffer descriptor
6659  * @size: The size to allocate each per CPU buffer
6660  *
6661  * Create a per CPU buffer that can be used to copy from user space
6662  * in a task context. When calling trace_user_fault_read(), preemption
6663  * must be disabled, and it will enable preemption and copy user
6664  * space data to the buffer. If any schedule switches occur, it will
6665  * retry until it succeeds without a schedule switch knowing the buffer
6666  * is still valid.
6667  *
6668  * Returns 0 on success, negative on failure.
6669  */
trace_user_fault_init(struct trace_user_buf_info * tinfo,size_t size)6670 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
6671 {
6672 	int ret;
6673 
6674 	if (!tinfo)
6675 		return -EINVAL;
6676 
6677 	guard(mutex)(&trace_user_buffer_mutex);
6678 
6679 	ret = user_buffer_init(&tinfo, size);
6680 	if (ret < 0)
6681 		trace_user_fault_destroy(tinfo);
6682 
6683 	return ret;
6684 }
6685 
6686 /**
6687  * trace_user_fault_get - up the ref count for the user buffer
6688  * @tinfo: A pointer to a pointer to the trace buffer descriptor
6689  *
6690  * Ups the ref count of the trace buffer.
6691  *
6692  * Returns the new ref count.
6693  */
trace_user_fault_get(struct trace_user_buf_info * tinfo)6694 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
6695 {
6696 	if (!tinfo)
6697 		return -1;
6698 
6699 	guard(mutex)(&trace_user_buffer_mutex);
6700 
6701 	tinfo->ref++;
6702 	return tinfo->ref;
6703 }
6704 
6705 /**
6706  * trace_user_fault_put - dereference a per cpu trace buffer
6707  * @tinfo: The @tinfo that was passed to trace_user_fault_get()
6708  *
6709  * Decrement the ref count of @tinfo.
6710  *
6711  * Returns the new refcount (negative on error).
6712  */
trace_user_fault_put(struct trace_user_buf_info * tinfo)6713 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
6714 {
6715 	guard(mutex)(&trace_user_buffer_mutex);
6716 
6717 	if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
6718 		return -1;
6719 
6720 	--tinfo->ref;
6721 	return tinfo->ref;
6722 }
6723 
6724 /**
6725  * trace_user_fault_read - Read user space into a per CPU buffer
6726  * @tinfo: The @tinfo allocated by trace_user_fault_get()
6727  * @ptr: The user space pointer to read
6728  * @size: The size of user space to read.
6729  * @copy_func: Optional function to use to copy from user space
6730  * @data: Data to pass to copy_func if it was supplied
6731  *
6732  * Preemption must be disabled when this is called, and must not
6733  * be enabled while using the returned buffer.
6734  * This does the copying from user space into a per CPU buffer.
6735  *
6736  * The @size must not be greater than the size passed in to
6737  * trace_user_fault_init().
6738  *
6739  * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
6740  * otherwise it will call @copy_func. It will call @copy_func with:
6741  *
6742  *   buffer: the per CPU buffer of the @tinfo.
6743  *   ptr: The pointer @ptr to user space to read
6744  *   size: The @size of the ptr to read
6745  *   data: The @data parameter
6746  *
6747  * It is expected that @copy_func will return 0 on success and non zero
6748  * if there was a fault.
6749  *
6750  * Returns a pointer to the buffer with the content read from @ptr.
6751  *   Preemption must remain disabled while the caller accesses the
6752  *   buffer returned by this function.
6753  * Returns NULL if there was a fault, or the size passed in is
6754  *   greater than the size passed to trace_user_fault_init().
6755  */
trace_user_fault_read(struct trace_user_buf_info * tinfo,const char __user * ptr,size_t size,trace_user_buf_copy copy_func,void * data)6756 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
6757 			     const char __user *ptr, size_t size,
6758 			     trace_user_buf_copy copy_func, void *data)
6759 {
6760 	int cpu = smp_processor_id();
6761 	char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6762 	unsigned int cnt;
6763 	int trys = 0;
6764 	int ret;
6765 
6766 	lockdep_assert_preemption_disabled();
6767 
6768 	/*
6769 	 * It's up to the caller to not try to copy more than it said
6770 	 * it would.
6771 	 */
6772 	if (size > tinfo->size)
6773 		return NULL;
6774 
6775 	/*
6776 	 * This acts similar to a seqcount. The per CPU context switches are
6777 	 * recorded, migration is disabled and preemption is enabled. The
6778 	 * read of the user space memory is copied into the per CPU buffer.
6779 	 * Preemption is disabled again, and if the per CPU context switches count
6780 	 * is still the same, it means the buffer has not been corrupted.
6781 	 * If the count is different, it is assumed the buffer is corrupted
6782 	 * and reading must be tried again.
6783 	 */
6784 
6785 	do {
6786 		/*
6787 		 * It is possible that something is trying to migrate this
6788 		 * task. What happens then, is when preemption is enabled,
6789 		 * the migration thread will preempt this task, try to
6790 		 * migrate it, fail, then let it run again. That will
6791 		 * cause this to loop again and never succeed.
6792 		 * On failures, enabled and disable preemption with
6793 		 * migration enabled, to allow the migration thread to
6794 		 * migrate this task.
6795 		 */
6796 		if (trys) {
6797 			preempt_enable_notrace();
6798 			preempt_disable_notrace();
6799 			cpu = smp_processor_id();
6800 			buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6801 		}
6802 
6803 		/*
6804 		 * If for some reason, copy_from_user() always causes a context
6805 		 * switch, this would then cause an infinite loop.
6806 		 * If this task is preempted by another user space task, it
6807 		 * will cause this task to try again. But just in case something
6808 		 * changes where the copying from user space causes another task
6809 		 * to run, prevent this from going into an infinite loop.
6810 		 * 100 tries should be plenty.
6811 		 */
6812 		if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
6813 			return NULL;
6814 
6815 		/* Read the current CPU context switch counter */
6816 		cnt = nr_context_switches_cpu(cpu);
6817 
6818 		/*
6819 		 * Preemption is going to be enabled, but this task must
6820 		 * remain on this CPU.
6821 		 */
6822 		migrate_disable();
6823 
6824 		/*
6825 		 * Now preemption is being enabled and another task can come in
6826 		 * and use the same buffer and corrupt our data.
6827 		 */
6828 		preempt_enable_notrace();
6829 
6830 		/* Make sure preemption is enabled here */
6831 		lockdep_assert_preemption_enabled();
6832 
6833 		if (copy_func) {
6834 			ret = copy_func(buffer, ptr, size, data);
6835 		} else {
6836 			ret = __copy_from_user(buffer, ptr, size);
6837 		}
6838 
6839 		preempt_disable_notrace();
6840 		migrate_enable();
6841 
6842 		/* if it faulted, no need to test if the buffer was corrupted */
6843 		if (ret)
6844 			return NULL;
6845 
6846 		/*
6847 		 * Preemption is disabled again, now check the per CPU context
6848 		 * switch counter. If it doesn't match, then another user space
6849 		 * process may have schedule in and corrupted our buffer. In that
6850 		 * case the copying must be retried.
6851 		 */
6852 	} while (nr_context_switches_cpu(cpu) != cnt);
6853 
6854 	return buffer;
6855 }
6856 
6857 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6858 tracing_mark_write(struct file *filp, const char __user *ubuf,
6859 					size_t cnt, loff_t *fpos)
6860 {
6861 	struct trace_array *tr = filp->private_data;
6862 	ssize_t written = -ENODEV;
6863 	unsigned long ip;
6864 	char *buf;
6865 
6866 	if (unlikely(tracing_disabled))
6867 		return -EINVAL;
6868 
6869 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6870 		return -EINVAL;
6871 
6872 	if ((ssize_t)cnt < 0)
6873 		return -EINVAL;
6874 
6875 	if (cnt > TRACE_MARKER_MAX_SIZE)
6876 		cnt = TRACE_MARKER_MAX_SIZE;
6877 
6878 	/* Must have preemption disabled while having access to the buffer */
6879 	guard(preempt_notrace)();
6880 
6881 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6882 	if (!buf)
6883 		return -EFAULT;
6884 
6885 	/* The selftests expect this function to be the IP address */
6886 	ip = _THIS_IP_;
6887 
6888 	/* The global trace_marker can go to multiple instances */
6889 	if (tr == &global_trace) {
6890 		guard(rcu)();
6891 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6892 			written = write_marker_to_buffer(tr, buf, cnt, ip);
6893 			if (written < 0)
6894 				break;
6895 		}
6896 	} else {
6897 		written = write_marker_to_buffer(tr, buf, cnt, ip);
6898 	}
6899 
6900 	return written;
6901 }
6902 
write_raw_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt)6903 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
6904 					  const char *buf, size_t cnt)
6905 {
6906 	struct ring_buffer_event *event;
6907 	struct trace_buffer *buffer;
6908 	struct raw_data_entry *entry;
6909 	ssize_t written;
6910 	size_t size;
6911 
6912 	/* cnt includes both the entry->id and the data behind it. */
6913 	size = struct_offset(entry, id) + cnt;
6914 
6915 	buffer = tr->array_buffer.buffer;
6916 
6917 	if (size > ring_buffer_max_event_size(buffer))
6918 		return -EINVAL;
6919 
6920 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6921 					    tracing_gen_ctx());
6922 	if (!event)
6923 		/* Ring buffer disabled, return as if not open for write */
6924 		return -EBADF;
6925 
6926 	entry = ring_buffer_event_data(event);
6927 	unsafe_memcpy(&entry->id, buf, cnt,
6928 		      "id and content already reserved on ring buffer"
6929 		      "'buf' includes the 'id' and the data."
6930 		      "'entry' was allocated with cnt from 'id'.");
6931 	written = cnt;
6932 
6933 	__buffer_unlock_commit(buffer, event);
6934 
6935 	return written;
6936 }
6937 
6938 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6939 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6940 					size_t cnt, loff_t *fpos)
6941 {
6942 	struct trace_array *tr = filp->private_data;
6943 	ssize_t written = -ENODEV;
6944 	char *buf;
6945 
6946 	if (unlikely(tracing_disabled))
6947 		return -EINVAL;
6948 
6949 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6950 		return -EINVAL;
6951 
6952 	/* The marker must at least have a tag id */
6953 	if (cnt < sizeof(unsigned int))
6954 		return -EINVAL;
6955 
6956 	/* raw write is all or nothing */
6957 	if (cnt > TRACE_MARKER_MAX_SIZE)
6958 		return -EINVAL;
6959 
6960 	/* Must have preemption disabled while having access to the buffer */
6961 	guard(preempt_notrace)();
6962 
6963 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6964 	if (!buf)
6965 		return -EFAULT;
6966 
6967 	/* The global trace_marker_raw can go to multiple instances */
6968 	if (tr == &global_trace) {
6969 		guard(rcu)();
6970 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6971 			written = write_raw_marker_to_buffer(tr, buf, cnt);
6972 			if (written < 0)
6973 				break;
6974 		}
6975 	} else {
6976 		written = write_raw_marker_to_buffer(tr, buf, cnt);
6977 	}
6978 
6979 	return written;
6980 }
6981 
tracing_mark_open(struct inode * inode,struct file * filp)6982 static int tracing_mark_open(struct inode *inode, struct file *filp)
6983 {
6984 	int ret;
6985 
6986 	scoped_guard(mutex, &trace_user_buffer_mutex) {
6987 		if (!trace_user_buffer) {
6988 			ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
6989 			if (ret < 0)
6990 				return ret;
6991 		} else {
6992 			trace_user_buffer->ref++;
6993 		}
6994 	}
6995 
6996 	stream_open(inode, filp);
6997 	ret = tracing_open_generic_tr(inode, filp);
6998 	if (ret < 0)
6999 		user_buffer_put(&trace_user_buffer);
7000 	return ret;
7001 }
7002 
tracing_mark_release(struct inode * inode,struct file * file)7003 static int tracing_mark_release(struct inode *inode, struct file *file)
7004 {
7005 	user_buffer_put(&trace_user_buffer);
7006 	return tracing_release_generic_tr(inode, file);
7007 }
7008 
tracing_clock_show(struct seq_file * m,void * v)7009 static int tracing_clock_show(struct seq_file *m, void *v)
7010 {
7011 	struct trace_array *tr = m->private;
7012 	int i;
7013 
7014 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7015 		seq_printf(m,
7016 			"%s%s%s%s", i ? " " : "",
7017 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7018 			i == tr->clock_id ? "]" : "");
7019 	seq_putc(m, '\n');
7020 
7021 	return 0;
7022 }
7023 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7024 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7025 {
7026 	int i;
7027 
7028 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7029 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7030 			break;
7031 	}
7032 	if (i == ARRAY_SIZE(trace_clocks))
7033 		return -EINVAL;
7034 
7035 	guard(mutex)(&trace_types_lock);
7036 
7037 	tr->clock_id = i;
7038 
7039 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7040 
7041 	/*
7042 	 * New clock may not be consistent with the previous clock.
7043 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7044 	 */
7045 	tracing_reset_online_cpus(&tr->array_buffer);
7046 
7047 #ifdef CONFIG_TRACER_SNAPSHOT
7048 	if (tr->snapshot_buffer.buffer)
7049 		ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func);
7050 	tracing_reset_online_cpus(&tr->snapshot_buffer);
7051 #endif
7052 	update_last_data_if_empty(tr);
7053 
7054 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7055 		struct trace_scratch *tscratch = tr->scratch;
7056 
7057 		tscratch->clock_id = i;
7058 	}
7059 
7060 	return 0;
7061 }
7062 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7063 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7064 				   size_t cnt, loff_t *fpos)
7065 {
7066 	struct seq_file *m = filp->private_data;
7067 	struct trace_array *tr = m->private;
7068 	char buf[64];
7069 	const char *clockstr;
7070 	int ret;
7071 
7072 	if (cnt >= sizeof(buf))
7073 		return -EINVAL;
7074 
7075 	if (copy_from_user(buf, ubuf, cnt))
7076 		return -EFAULT;
7077 
7078 	buf[cnt] = 0;
7079 
7080 	clockstr = strstrip(buf);
7081 
7082 	ret = tracing_set_clock(tr, clockstr);
7083 	if (ret)
7084 		return ret;
7085 
7086 	*fpos += cnt;
7087 
7088 	return cnt;
7089 }
7090 
tracing_clock_open(struct inode * inode,struct file * file)7091 static int tracing_clock_open(struct inode *inode, struct file *file)
7092 {
7093 	struct trace_array *tr = inode->i_private;
7094 	int ret;
7095 
7096 	ret = tracing_check_open_get_tr(tr);
7097 	if (ret)
7098 		return ret;
7099 
7100 	ret = single_open(file, tracing_clock_show, inode->i_private);
7101 	if (ret < 0)
7102 		trace_array_put(tr);
7103 
7104 	return ret;
7105 }
7106 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7107 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7108 {
7109 	struct trace_array *tr = m->private;
7110 
7111 	guard(mutex)(&trace_types_lock);
7112 
7113 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7114 		seq_puts(m, "delta [absolute]\n");
7115 	else
7116 		seq_puts(m, "[delta] absolute\n");
7117 
7118 	return 0;
7119 }
7120 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7121 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7122 {
7123 	struct trace_array *tr = inode->i_private;
7124 	int ret;
7125 
7126 	ret = tracing_check_open_get_tr(tr);
7127 	if (ret)
7128 		return ret;
7129 
7130 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7131 	if (ret < 0)
7132 		trace_array_put(tr);
7133 
7134 	return ret;
7135 }
7136 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7137 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7138 {
7139 	if (rbe == this_cpu_read(trace_buffered_event))
7140 		return ring_buffer_time_stamp(buffer);
7141 
7142 	return ring_buffer_event_time_stamp(buffer, rbe);
7143 }
7144 
7145 struct ftrace_buffer_info {
7146 	struct trace_iterator	iter;
7147 	void			*spare;
7148 	unsigned int		spare_cpu;
7149 	unsigned int		spare_size;
7150 	unsigned int		read;
7151 };
7152 
7153 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7154 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7155 {
7156 	struct trace_array *tr = inode->i_private;
7157 	struct trace_iterator *iter;
7158 	struct seq_file *m;
7159 	int ret;
7160 
7161 	ret = tracing_check_open_get_tr(tr);
7162 	if (ret)
7163 		return ret;
7164 
7165 	if (file->f_mode & FMODE_READ) {
7166 		iter = __tracing_open(inode, file, true);
7167 		if (IS_ERR(iter))
7168 			ret = PTR_ERR(iter);
7169 	} else {
7170 		/* Writes still need the seq_file to hold the private data */
7171 		ret = -ENOMEM;
7172 		m = kzalloc_obj(*m);
7173 		if (!m)
7174 			goto out;
7175 		iter = kzalloc_obj(*iter);
7176 		if (!iter) {
7177 			kfree(m);
7178 			goto out;
7179 		}
7180 		ret = 0;
7181 
7182 		iter->tr = tr;
7183 		iter->array_buffer = &tr->snapshot_buffer;
7184 		iter->cpu_file = tracing_get_cpu(inode);
7185 		m->private = iter;
7186 		file->private_data = m;
7187 	}
7188 out:
7189 	if (ret < 0)
7190 		trace_array_put(tr);
7191 
7192 	return ret;
7193 }
7194 
tracing_swap_cpu_buffer(void * tr)7195 static void tracing_swap_cpu_buffer(void *tr)
7196 {
7197 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7198 }
7199 
7200 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7201 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7202 		       loff_t *ppos)
7203 {
7204 	struct seq_file *m = filp->private_data;
7205 	struct trace_iterator *iter = m->private;
7206 	struct trace_array *tr = iter->tr;
7207 	unsigned long val;
7208 	int ret;
7209 
7210 	ret = tracing_update_buffers(tr);
7211 	if (ret < 0)
7212 		return ret;
7213 
7214 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7215 	if (ret)
7216 		return ret;
7217 
7218 	guard(mutex)(&trace_types_lock);
7219 
7220 	if (tracer_uses_snapshot(tr->current_trace))
7221 		return -EBUSY;
7222 
7223 	local_irq_disable();
7224 	arch_spin_lock(&tr->max_lock);
7225 	if (tr->cond_snapshot)
7226 		ret = -EBUSY;
7227 	arch_spin_unlock(&tr->max_lock);
7228 	local_irq_enable();
7229 	if (ret)
7230 		return ret;
7231 
7232 	switch (val) {
7233 	case 0:
7234 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7235 			return -EINVAL;
7236 		if (tr->allocated_snapshot)
7237 			free_snapshot(tr);
7238 		break;
7239 	case 1:
7240 /* Only allow per-cpu swap if the ring buffer supports it */
7241 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7242 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7243 			return -EINVAL;
7244 #endif
7245 		if (tr->allocated_snapshot)
7246 			ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
7247 					&tr->array_buffer, iter->cpu_file);
7248 
7249 		ret = tracing_arm_snapshot_locked(tr);
7250 		if (ret)
7251 			return ret;
7252 
7253 		/* Now, we're going to swap */
7254 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7255 			local_irq_disable();
7256 			update_max_tr(tr, current, smp_processor_id(), NULL);
7257 			local_irq_enable();
7258 		} else {
7259 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7260 						 (void *)tr, 1);
7261 		}
7262 		tracing_disarm_snapshot(tr);
7263 		break;
7264 	default:
7265 		if (tr->allocated_snapshot) {
7266 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7267 				tracing_reset_online_cpus(&tr->snapshot_buffer);
7268 			else
7269 				tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file);
7270 		}
7271 		break;
7272 	}
7273 
7274 	if (ret >= 0) {
7275 		*ppos += cnt;
7276 		ret = cnt;
7277 	}
7278 
7279 	return ret;
7280 }
7281 
tracing_snapshot_release(struct inode * inode,struct file * file)7282 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7283 {
7284 	struct seq_file *m = file->private_data;
7285 	int ret;
7286 
7287 	ret = tracing_release(inode, file);
7288 
7289 	if (file->f_mode & FMODE_READ)
7290 		return ret;
7291 
7292 	/* If write only, the seq_file is just a stub */
7293 	if (m)
7294 		kfree(m->private);
7295 	kfree(m);
7296 
7297 	return 0;
7298 }
7299 
7300 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7301 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7302 				    size_t count, loff_t *ppos);
7303 static int tracing_buffers_release(struct inode *inode, struct file *file);
7304 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7305 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7306 
snapshot_raw_open(struct inode * inode,struct file * filp)7307 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7308 {
7309 	struct ftrace_buffer_info *info;
7310 	int ret;
7311 
7312 	/* The following checks for tracefs lockdown */
7313 	ret = tracing_buffers_open(inode, filp);
7314 	if (ret < 0)
7315 		return ret;
7316 
7317 	info = filp->private_data;
7318 
7319 	if (tracer_uses_snapshot(info->iter.trace)) {
7320 		tracing_buffers_release(inode, filp);
7321 		return -EBUSY;
7322 	}
7323 
7324 	info->iter.snapshot = true;
7325 	info->iter.array_buffer = &info->iter.tr->snapshot_buffer;
7326 
7327 	return ret;
7328 }
7329 
7330 #endif /* CONFIG_TRACER_SNAPSHOT */
7331 
7332 
7333 static const struct file_operations tracing_thresh_fops = {
7334 	.open		= tracing_open_generic,
7335 	.read		= tracing_thresh_read,
7336 	.write		= tracing_thresh_write,
7337 	.llseek		= generic_file_llseek,
7338 };
7339 
7340 #ifdef CONFIG_TRACER_MAX_TRACE
7341 static const struct file_operations tracing_max_lat_fops = {
7342 	.open		= tracing_open_generic_tr,
7343 	.read		= tracing_max_lat_read,
7344 	.write		= tracing_max_lat_write,
7345 	.llseek		= generic_file_llseek,
7346 	.release	= tracing_release_generic_tr,
7347 };
7348 #endif
7349 
7350 static const struct file_operations set_tracer_fops = {
7351 	.open		= tracing_open_generic_tr,
7352 	.read		= tracing_set_trace_read,
7353 	.write		= tracing_set_trace_write,
7354 	.llseek		= generic_file_llseek,
7355 	.release	= tracing_release_generic_tr,
7356 };
7357 
7358 static const struct file_operations tracing_pipe_fops = {
7359 	.open		= tracing_open_pipe,
7360 	.poll		= tracing_poll_pipe,
7361 	.read		= tracing_read_pipe,
7362 	.splice_read	= tracing_splice_read_pipe,
7363 	.release	= tracing_release_pipe,
7364 };
7365 
7366 static const struct file_operations tracing_entries_fops = {
7367 	.open		= tracing_open_generic_tr,
7368 	.read		= tracing_entries_read,
7369 	.write		= tracing_entries_write,
7370 	.llseek		= generic_file_llseek,
7371 	.release	= tracing_release_generic_tr,
7372 };
7373 
7374 static const struct file_operations tracing_syscall_buf_fops = {
7375 	.open		= tracing_open_generic_tr,
7376 	.read		= tracing_syscall_buf_read,
7377 	.write		= tracing_syscall_buf_write,
7378 	.llseek		= generic_file_llseek,
7379 	.release	= tracing_release_generic_tr,
7380 };
7381 
7382 static const struct file_operations tracing_buffer_meta_fops = {
7383 	.open		= tracing_buffer_meta_open,
7384 	.read		= seq_read,
7385 	.llseek		= seq_lseek,
7386 	.release	= tracing_seq_release,
7387 };
7388 
7389 static const struct file_operations tracing_total_entries_fops = {
7390 	.open		= tracing_open_generic_tr,
7391 	.read		= tracing_total_entries_read,
7392 	.llseek		= generic_file_llseek,
7393 	.release	= tracing_release_generic_tr,
7394 };
7395 
7396 static const struct file_operations tracing_free_buffer_fops = {
7397 	.open		= tracing_open_generic_tr,
7398 	.write		= tracing_free_buffer_write,
7399 	.release	= tracing_free_buffer_release,
7400 };
7401 
7402 static const struct file_operations tracing_mark_fops = {
7403 	.open		= tracing_mark_open,
7404 	.write		= tracing_mark_write,
7405 	.release	= tracing_mark_release,
7406 };
7407 
7408 static const struct file_operations tracing_mark_raw_fops = {
7409 	.open		= tracing_mark_open,
7410 	.write		= tracing_mark_raw_write,
7411 	.release	= tracing_mark_release,
7412 };
7413 
7414 static const struct file_operations trace_clock_fops = {
7415 	.open		= tracing_clock_open,
7416 	.read		= seq_read,
7417 	.llseek		= seq_lseek,
7418 	.release	= tracing_single_release_tr,
7419 	.write		= tracing_clock_write,
7420 };
7421 
7422 static const struct file_operations trace_time_stamp_mode_fops = {
7423 	.open		= tracing_time_stamp_mode_open,
7424 	.read		= seq_read,
7425 	.llseek		= seq_lseek,
7426 	.release	= tracing_single_release_tr,
7427 };
7428 
7429 static const struct file_operations last_boot_fops = {
7430 	.open		= tracing_last_boot_open,
7431 	.read		= seq_read,
7432 	.llseek		= seq_lseek,
7433 	.release	= tracing_seq_release,
7434 };
7435 
7436 #ifdef CONFIG_TRACER_SNAPSHOT
7437 static const struct file_operations snapshot_fops = {
7438 	.open		= tracing_snapshot_open,
7439 	.read		= seq_read,
7440 	.write		= tracing_snapshot_write,
7441 	.llseek		= tracing_lseek,
7442 	.release	= tracing_snapshot_release,
7443 };
7444 
7445 static const struct file_operations snapshot_raw_fops = {
7446 	.open		= snapshot_raw_open,
7447 	.read		= tracing_buffers_read,
7448 	.release	= tracing_buffers_release,
7449 	.splice_read	= tracing_buffers_splice_read,
7450 };
7451 
7452 #endif /* CONFIG_TRACER_SNAPSHOT */
7453 
7454 /*
7455  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7456  * @filp: The active open file structure
7457  * @ubuf: The userspace provided buffer to read value into
7458  * @cnt: The maximum number of bytes to read
7459  * @ppos: The current "file" position
7460  *
7461  * This function implements the write interface for a struct trace_min_max_param.
7462  * The filp->private_data must point to a trace_min_max_param structure that
7463  * defines where to write the value, the min and the max acceptable values,
7464  * and a lock to protect the write.
7465  */
7466 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7467 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7468 {
7469 	struct trace_min_max_param *param = filp->private_data;
7470 	u64 val;
7471 	int err;
7472 
7473 	if (!param)
7474 		return -EFAULT;
7475 
7476 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7477 	if (err)
7478 		return err;
7479 
7480 	if (param->lock)
7481 		mutex_lock(param->lock);
7482 
7483 	if (param->min && val < *param->min)
7484 		err = -EINVAL;
7485 
7486 	if (param->max && val > *param->max)
7487 		err = -EINVAL;
7488 
7489 	if (!err)
7490 		*param->val = val;
7491 
7492 	if (param->lock)
7493 		mutex_unlock(param->lock);
7494 
7495 	if (err)
7496 		return err;
7497 
7498 	return cnt;
7499 }
7500 
7501 /*
7502  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7503  * @filp: The active open file structure
7504  * @ubuf: The userspace provided buffer to read value into
7505  * @cnt: The maximum number of bytes to read
7506  * @ppos: The current "file" position
7507  *
7508  * This function implements the read interface for a struct trace_min_max_param.
7509  * The filp->private_data must point to a trace_min_max_param struct with valid
7510  * data.
7511  */
7512 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7513 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7514 {
7515 	struct trace_min_max_param *param = filp->private_data;
7516 	char buf[U64_STR_SIZE];
7517 	int len;
7518 	u64 val;
7519 
7520 	if (!param)
7521 		return -EFAULT;
7522 
7523 	val = *param->val;
7524 
7525 	if (cnt > sizeof(buf))
7526 		cnt = sizeof(buf);
7527 
7528 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7529 
7530 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7531 }
7532 
7533 const struct file_operations trace_min_max_fops = {
7534 	.open		= tracing_open_generic,
7535 	.read		= trace_min_max_read,
7536 	.write		= trace_min_max_write,
7537 };
7538 
7539 #define TRACING_LOG_ERRS_MAX	8
7540 #define TRACING_LOG_LOC_MAX	128
7541 
7542 #define CMD_PREFIX "  Command: "
7543 
7544 struct err_info {
7545 	const char	**errs;	/* ptr to loc-specific array of err strings */
7546 	u8		type;	/* index into errs -> specific err string */
7547 	u16		pos;	/* caret position */
7548 	u64		ts;
7549 };
7550 
7551 struct tracing_log_err {
7552 	struct list_head	list;
7553 	struct err_info		info;
7554 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7555 	char			*cmd;                     /* what caused err */
7556 };
7557 
7558 static DEFINE_MUTEX(tracing_err_log_lock);
7559 
alloc_tracing_log_err(int len)7560 static struct tracing_log_err *alloc_tracing_log_err(int len)
7561 {
7562 	struct tracing_log_err *err;
7563 
7564 	err = kzalloc_obj(*err);
7565 	if (!err)
7566 		return ERR_PTR(-ENOMEM);
7567 
7568 	err->cmd = kzalloc(len, GFP_KERNEL);
7569 	if (!err->cmd) {
7570 		kfree(err);
7571 		return ERR_PTR(-ENOMEM);
7572 	}
7573 
7574 	return err;
7575 }
7576 
free_tracing_log_err(struct tracing_log_err * err)7577 static void free_tracing_log_err(struct tracing_log_err *err)
7578 {
7579 	kfree(err->cmd);
7580 	kfree(err);
7581 }
7582 
get_tracing_log_err(struct trace_array * tr,int len)7583 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7584 						   int len)
7585 {
7586 	struct tracing_log_err *err;
7587 	char *cmd;
7588 
7589 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7590 		err = alloc_tracing_log_err(len);
7591 		if (PTR_ERR(err) != -ENOMEM)
7592 			tr->n_err_log_entries++;
7593 
7594 		return err;
7595 	}
7596 	cmd = kzalloc(len, GFP_KERNEL);
7597 	if (!cmd)
7598 		return ERR_PTR(-ENOMEM);
7599 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7600 	kfree(err->cmd);
7601 	err->cmd = cmd;
7602 	list_del(&err->list);
7603 
7604 	return err;
7605 }
7606 
7607 /**
7608  * err_pos - find the position of a string within a command for error careting
7609  * @cmd: The tracing command that caused the error
7610  * @str: The string to position the caret at within @cmd
7611  *
7612  * Finds the position of the first occurrence of @str within @cmd.  The
7613  * return value can be passed to tracing_log_err() for caret placement
7614  * within @cmd.
7615  *
7616  * Returns the index within @cmd of the first occurrence of @str or 0
7617  * if @str was not found.
7618  */
err_pos(char * cmd,const char * str)7619 unsigned int err_pos(char *cmd, const char *str)
7620 {
7621 	char *found;
7622 
7623 	if (WARN_ON(!strlen(cmd)))
7624 		return 0;
7625 
7626 	found = strstr(cmd, str);
7627 	if (found)
7628 		return found - cmd;
7629 
7630 	return 0;
7631 }
7632 
7633 /**
7634  * tracing_log_err - write an error to the tracing error log
7635  * @tr: The associated trace array for the error (NULL for top level array)
7636  * @loc: A string describing where the error occurred
7637  * @cmd: The tracing command that caused the error
7638  * @errs: The array of loc-specific static error strings
7639  * @type: The index into errs[], which produces the specific static err string
7640  * @pos: The position the caret should be placed in the cmd
7641  *
7642  * Writes an error into tracing/error_log of the form:
7643  *
7644  * <loc>: error: <text>
7645  *   Command: <cmd>
7646  *              ^
7647  *
7648  * tracing/error_log is a small log file containing the last
7649  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7650  * unless there has been a tracing error, and the error log can be
7651  * cleared and have its memory freed by writing the empty string in
7652  * truncation mode to it i.e. echo > tracing/error_log.
7653  *
7654  * NOTE: the @errs array along with the @type param are used to
7655  * produce a static error string - this string is not copied and saved
7656  * when the error is logged - only a pointer to it is saved.  See
7657  * existing callers for examples of how static strings are typically
7658  * defined for use with tracing_log_err().
7659  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7660 void tracing_log_err(struct trace_array *tr,
7661 		     const char *loc, const char *cmd,
7662 		     const char **errs, u8 type, u16 pos)
7663 {
7664 	struct tracing_log_err *err;
7665 	int len = 0;
7666 
7667 	if (!tr)
7668 		tr = &global_trace;
7669 
7670 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7671 
7672 	guard(mutex)(&tracing_err_log_lock);
7673 
7674 	err = get_tracing_log_err(tr, len);
7675 	if (PTR_ERR(err) == -ENOMEM)
7676 		return;
7677 
7678 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7679 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7680 
7681 	err->info.errs = errs;
7682 	err->info.type = type;
7683 	err->info.pos = pos;
7684 	err->info.ts = local_clock();
7685 
7686 	list_add_tail(&err->list, &tr->err_log);
7687 }
7688 
clear_tracing_err_log(struct trace_array * tr)7689 static void clear_tracing_err_log(struct trace_array *tr)
7690 {
7691 	struct tracing_log_err *err, *next;
7692 
7693 	guard(mutex)(&tracing_err_log_lock);
7694 
7695 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7696 		list_del(&err->list);
7697 		free_tracing_log_err(err);
7698 	}
7699 
7700 	tr->n_err_log_entries = 0;
7701 }
7702 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7703 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7704 {
7705 	struct trace_array *tr = m->private;
7706 
7707 	mutex_lock(&tracing_err_log_lock);
7708 
7709 	return seq_list_start(&tr->err_log, *pos);
7710 }
7711 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7712 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7713 {
7714 	struct trace_array *tr = m->private;
7715 
7716 	return seq_list_next(v, &tr->err_log, pos);
7717 }
7718 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7719 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7720 {
7721 	mutex_unlock(&tracing_err_log_lock);
7722 }
7723 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)7724 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7725 {
7726 	u16 i;
7727 
7728 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7729 		seq_putc(m, ' ');
7730 	for (i = 0; i < pos; i++)
7731 		seq_putc(m, ' ');
7732 	seq_puts(m, "^\n");
7733 }
7734 
tracing_err_log_seq_show(struct seq_file * m,void * v)7735 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7736 {
7737 	struct tracing_log_err *err = v;
7738 
7739 	if (err) {
7740 		const char *err_text = err->info.errs[err->info.type];
7741 		u64 sec = err->info.ts;
7742 		u32 nsec;
7743 
7744 		nsec = do_div(sec, NSEC_PER_SEC);
7745 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7746 			   err->loc, err_text);
7747 		seq_printf(m, "%s", err->cmd);
7748 		tracing_err_log_show_pos(m, err->info.pos);
7749 	}
7750 
7751 	return 0;
7752 }
7753 
7754 static const struct seq_operations tracing_err_log_seq_ops = {
7755 	.start  = tracing_err_log_seq_start,
7756 	.next   = tracing_err_log_seq_next,
7757 	.stop   = tracing_err_log_seq_stop,
7758 	.show   = tracing_err_log_seq_show
7759 };
7760 
tracing_err_log_open(struct inode * inode,struct file * file)7761 static int tracing_err_log_open(struct inode *inode, struct file *file)
7762 {
7763 	struct trace_array *tr = inode->i_private;
7764 	int ret = 0;
7765 
7766 	ret = tracing_check_open_get_tr(tr);
7767 	if (ret)
7768 		return ret;
7769 
7770 	/* If this file was opened for write, then erase contents */
7771 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7772 		clear_tracing_err_log(tr);
7773 
7774 	if (file->f_mode & FMODE_READ) {
7775 		ret = seq_open(file, &tracing_err_log_seq_ops);
7776 		if (!ret) {
7777 			struct seq_file *m = file->private_data;
7778 			m->private = tr;
7779 		} else {
7780 			trace_array_put(tr);
7781 		}
7782 	}
7783 	return ret;
7784 }
7785 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7786 static ssize_t tracing_err_log_write(struct file *file,
7787 				     const char __user *buffer,
7788 				     size_t count, loff_t *ppos)
7789 {
7790 	return count;
7791 }
7792 
tracing_err_log_release(struct inode * inode,struct file * file)7793 static int tracing_err_log_release(struct inode *inode, struct file *file)
7794 {
7795 	struct trace_array *tr = inode->i_private;
7796 
7797 	trace_array_put(tr);
7798 
7799 	if (file->f_mode & FMODE_READ)
7800 		seq_release(inode, file);
7801 
7802 	return 0;
7803 }
7804 
7805 static const struct file_operations tracing_err_log_fops = {
7806 	.open           = tracing_err_log_open,
7807 	.write		= tracing_err_log_write,
7808 	.read           = seq_read,
7809 	.llseek         = tracing_lseek,
7810 	.release        = tracing_err_log_release,
7811 };
7812 
tracing_buffers_open(struct inode * inode,struct file * filp)7813 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7814 {
7815 	struct trace_array *tr = inode->i_private;
7816 	struct ftrace_buffer_info *info;
7817 	int ret;
7818 
7819 	ret = tracing_check_open_get_tr(tr);
7820 	if (ret)
7821 		return ret;
7822 
7823 	info = kvzalloc_obj(*info);
7824 	if (!info) {
7825 		trace_array_put(tr);
7826 		return -ENOMEM;
7827 	}
7828 
7829 	mutex_lock(&trace_types_lock);
7830 
7831 	info->iter.tr		= tr;
7832 	info->iter.cpu_file	= tracing_get_cpu(inode);
7833 	info->iter.trace	= tr->current_trace;
7834 	info->iter.array_buffer = &tr->array_buffer;
7835 	info->spare		= NULL;
7836 	/* Force reading ring buffer for first read */
7837 	info->read		= (unsigned int)-1;
7838 
7839 	filp->private_data = info;
7840 
7841 	tr->trace_ref++;
7842 
7843 	mutex_unlock(&trace_types_lock);
7844 
7845 	ret = nonseekable_open(inode, filp);
7846 	if (ret < 0)
7847 		trace_array_put(tr);
7848 
7849 	return ret;
7850 }
7851 
7852 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7853 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7854 {
7855 	struct ftrace_buffer_info *info = filp->private_data;
7856 	struct trace_iterator *iter = &info->iter;
7857 
7858 	return trace_poll(iter, filp, poll_table);
7859 }
7860 
7861 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7862 tracing_buffers_read(struct file *filp, char __user *ubuf,
7863 		     size_t count, loff_t *ppos)
7864 {
7865 	struct ftrace_buffer_info *info = filp->private_data;
7866 	struct trace_iterator *iter = &info->iter;
7867 	void *trace_data;
7868 	int page_size;
7869 	ssize_t ret = 0;
7870 	ssize_t size;
7871 
7872 	if (!count)
7873 		return 0;
7874 
7875 	if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
7876 		return -EBUSY;
7877 
7878 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7879 
7880 	/* Make sure the spare matches the current sub buffer size */
7881 	if (info->spare) {
7882 		if (page_size != info->spare_size) {
7883 			ring_buffer_free_read_page(iter->array_buffer->buffer,
7884 						   info->spare_cpu, info->spare);
7885 			info->spare = NULL;
7886 		}
7887 	}
7888 
7889 	if (!info->spare) {
7890 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7891 							  iter->cpu_file);
7892 		if (IS_ERR(info->spare)) {
7893 			ret = PTR_ERR(info->spare);
7894 			info->spare = NULL;
7895 		} else {
7896 			info->spare_cpu = iter->cpu_file;
7897 			info->spare_size = page_size;
7898 		}
7899 	}
7900 	if (!info->spare)
7901 		return ret;
7902 
7903 	/* Do we have previous read data to read? */
7904 	if (info->read < page_size)
7905 		goto read;
7906 
7907  again:
7908 	trace_access_lock(iter->cpu_file);
7909 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7910 				    info->spare,
7911 				    count,
7912 				    iter->cpu_file, 0);
7913 	trace_access_unlock(iter->cpu_file);
7914 
7915 	if (ret < 0) {
7916 		if (trace_empty(iter) && !iter->closed) {
7917 			if (update_last_data_if_empty(iter->tr))
7918 				return 0;
7919 
7920 			if ((filp->f_flags & O_NONBLOCK))
7921 				return -EAGAIN;
7922 
7923 			ret = wait_on_pipe(iter, 0);
7924 			if (ret)
7925 				return ret;
7926 
7927 			goto again;
7928 		}
7929 		return 0;
7930 	}
7931 
7932 	info->read = 0;
7933  read:
7934 	size = page_size - info->read;
7935 	if (size > count)
7936 		size = count;
7937 	trace_data = ring_buffer_read_page_data(info->spare);
7938 	ret = copy_to_user(ubuf, trace_data + info->read, size);
7939 	if (ret == size)
7940 		return -EFAULT;
7941 
7942 	size -= ret;
7943 
7944 	*ppos += size;
7945 	info->read += size;
7946 
7947 	return size;
7948 }
7949 
tracing_buffers_flush(struct file * file,fl_owner_t id)7950 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7951 {
7952 	struct ftrace_buffer_info *info = file->private_data;
7953 	struct trace_iterator *iter = &info->iter;
7954 
7955 	iter->closed = true;
7956 	/* Make sure the waiters see the new wait_index */
7957 	(void)atomic_fetch_inc_release(&iter->wait_index);
7958 
7959 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7960 
7961 	return 0;
7962 }
7963 
tracing_buffers_release(struct inode * inode,struct file * file)7964 static int tracing_buffers_release(struct inode *inode, struct file *file)
7965 {
7966 	struct ftrace_buffer_info *info = file->private_data;
7967 	struct trace_iterator *iter = &info->iter;
7968 
7969 	guard(mutex)(&trace_types_lock);
7970 
7971 	iter->tr->trace_ref--;
7972 
7973 	__trace_array_put(iter->tr);
7974 
7975 	if (info->spare)
7976 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7977 					   info->spare_cpu, info->spare);
7978 	kvfree(info);
7979 
7980 	return 0;
7981 }
7982 
7983 struct buffer_ref {
7984 	struct trace_buffer	*buffer;
7985 	void			*page;
7986 	int			cpu;
7987 	refcount_t		refcount;
7988 };
7989 
buffer_ref_release(struct buffer_ref * ref)7990 static void buffer_ref_release(struct buffer_ref *ref)
7991 {
7992 	if (!refcount_dec_and_test(&ref->refcount))
7993 		return;
7994 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7995 	kfree(ref);
7996 }
7997 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7998 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7999 				    struct pipe_buffer *buf)
8000 {
8001 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8002 
8003 	buffer_ref_release(ref);
8004 	buf->private = 0;
8005 }
8006 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8007 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8008 				struct pipe_buffer *buf)
8009 {
8010 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8011 
8012 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8013 		return false;
8014 
8015 	refcount_inc(&ref->refcount);
8016 	return true;
8017 }
8018 
8019 /* Pipe buffer operations for a buffer. */
8020 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8021 	.release		= buffer_pipe_buf_release,
8022 	.get			= buffer_pipe_buf_get,
8023 };
8024 
8025 /*
8026  * Callback from splice_to_pipe(), if we need to release some pages
8027  * at the end of the spd in case we error'ed out in filling the pipe.
8028  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8029 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8030 {
8031 	struct buffer_ref *ref =
8032 		(struct buffer_ref *)spd->partial[i].private;
8033 
8034 	buffer_ref_release(ref);
8035 	spd->partial[i].private = 0;
8036 }
8037 
8038 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8039 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8040 			    struct pipe_inode_info *pipe, size_t len,
8041 			    unsigned int flags)
8042 {
8043 	struct ftrace_buffer_info *info = file->private_data;
8044 	struct trace_iterator *iter = &info->iter;
8045 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8046 	struct page *pages_def[PIPE_DEF_BUFFERS];
8047 	struct splice_pipe_desc spd = {
8048 		.pages		= pages_def,
8049 		.partial	= partial_def,
8050 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8051 		.ops		= &buffer_pipe_buf_ops,
8052 		.spd_release	= buffer_spd_release,
8053 	};
8054 	struct buffer_ref *ref;
8055 	bool woken = false;
8056 	int page_size;
8057 	int entries, i;
8058 	ssize_t ret = 0;
8059 
8060 	if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
8061 		return -EBUSY;
8062 
8063 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8064 	if (*ppos & (page_size - 1))
8065 		return -EINVAL;
8066 
8067 	if (len & (page_size - 1)) {
8068 		if (len < page_size)
8069 			return -EINVAL;
8070 		len &= (~(page_size - 1));
8071 	}
8072 
8073 	if (splice_grow_spd(pipe, &spd))
8074 		return -ENOMEM;
8075 
8076  again:
8077 	trace_access_lock(iter->cpu_file);
8078 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8079 
8080 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8081 		struct page *page;
8082 		int r;
8083 
8084 		ref = kzalloc_obj(*ref);
8085 		if (!ref) {
8086 			ret = -ENOMEM;
8087 			break;
8088 		}
8089 
8090 		refcount_set(&ref->refcount, 1);
8091 		ref->buffer = iter->array_buffer->buffer;
8092 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8093 		if (IS_ERR(ref->page)) {
8094 			ret = PTR_ERR(ref->page);
8095 			ref->page = NULL;
8096 			kfree(ref);
8097 			break;
8098 		}
8099 		ref->cpu = iter->cpu_file;
8100 
8101 		r = ring_buffer_read_page(ref->buffer, ref->page,
8102 					  len, iter->cpu_file, 1);
8103 		if (r < 0) {
8104 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8105 						   ref->page);
8106 			kfree(ref);
8107 			break;
8108 		}
8109 
8110 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8111 
8112 		spd.pages[i] = page;
8113 		spd.partial[i].len = page_size;
8114 		spd.partial[i].offset = 0;
8115 		spd.partial[i].private = (unsigned long)ref;
8116 		spd.nr_pages++;
8117 		*ppos += page_size;
8118 
8119 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8120 	}
8121 
8122 	trace_access_unlock(iter->cpu_file);
8123 	spd.nr_pages = i;
8124 
8125 	/* did we read anything? */
8126 	if (!spd.nr_pages) {
8127 
8128 		if (ret)
8129 			goto out;
8130 
8131 		if (woken)
8132 			goto out;
8133 
8134 		ret = -EAGAIN;
8135 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8136 			goto out;
8137 
8138 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8139 		if (ret)
8140 			goto out;
8141 
8142 		/* No need to wait after waking up when tracing is off */
8143 		if (!tracer_tracing_is_on(iter->tr))
8144 			goto out;
8145 
8146 		/* Iterate one more time to collect any new data then exit */
8147 		woken = true;
8148 
8149 		goto again;
8150 	}
8151 
8152 	ret = splice_to_pipe(pipe, &spd);
8153 out:
8154 	splice_shrink_spd(&spd);
8155 
8156 	return ret;
8157 }
8158 
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8159 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8160 {
8161 	struct ftrace_buffer_info *info = file->private_data;
8162 	struct trace_iterator *iter = &info->iter;
8163 	int err;
8164 
8165 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8166 		if (!(file->f_flags & O_NONBLOCK)) {
8167 			err = ring_buffer_wait(iter->array_buffer->buffer,
8168 					       iter->cpu_file,
8169 					       iter->tr->buffer_percent,
8170 					       NULL, NULL);
8171 			if (err)
8172 				return err;
8173 		}
8174 
8175 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8176 						  iter->cpu_file);
8177 	} else if (cmd) {
8178 		return -ENOTTY;
8179 	}
8180 
8181 	/*
8182 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8183 	 * waiters
8184 	 */
8185 	guard(mutex)(&trace_types_lock);
8186 
8187 	/* Make sure the waiters see the new wait_index */
8188 	(void)atomic_fetch_inc_release(&iter->wait_index);
8189 
8190 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8191 
8192 	return 0;
8193 }
8194 
8195 #ifdef CONFIG_TRACER_SNAPSHOT
get_snapshot_map(struct trace_array * tr)8196 static int get_snapshot_map(struct trace_array *tr)
8197 {
8198 	int err = 0;
8199 
8200 	/*
8201 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8202 	 * take trace_types_lock. Instead use the specific
8203 	 * snapshot_trigger_lock.
8204 	 */
8205 	spin_lock(&tr->snapshot_trigger_lock);
8206 
8207 	if (tr->snapshot || tr->mapped == UINT_MAX)
8208 		err = -EBUSY;
8209 	else
8210 		tr->mapped++;
8211 
8212 	spin_unlock(&tr->snapshot_trigger_lock);
8213 
8214 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8215 	if (tr->mapped == 1)
8216 		synchronize_rcu();
8217 
8218 	return err;
8219 
8220 }
put_snapshot_map(struct trace_array * tr)8221 static void put_snapshot_map(struct trace_array *tr)
8222 {
8223 	spin_lock(&tr->snapshot_trigger_lock);
8224 	if (!WARN_ON(!tr->mapped))
8225 		tr->mapped--;
8226 	spin_unlock(&tr->snapshot_trigger_lock);
8227 }
8228 #else
get_snapshot_map(struct trace_array * tr)8229 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8230 static inline void put_snapshot_map(struct trace_array *tr) { }
8231 #endif
8232 
8233 /*
8234  * This is called when a VMA is duplicated (e.g., on fork()) to increment
8235  * the user_mapped counter without remapping pages.
8236  */
tracing_buffers_mmap_open(struct vm_area_struct * vma)8237 static void tracing_buffers_mmap_open(struct vm_area_struct *vma)
8238 {
8239 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8240 	struct trace_iterator *iter = &info->iter;
8241 
8242 	ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file);
8243 }
8244 
tracing_buffers_mmap_close(struct vm_area_struct * vma)8245 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8246 {
8247 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8248 	struct trace_iterator *iter = &info->iter;
8249 
8250 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8251 	put_snapshot_map(iter->tr);
8252 }
8253 
tracing_buffers_may_split(struct vm_area_struct * vma,unsigned long addr)8254 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
8255 {
8256 	/*
8257 	 * Trace buffer mappings require the complete buffer including
8258 	 * the meta page. Partial mappings are not supported.
8259 	 */
8260 	return -EINVAL;
8261 }
8262 
8263 static const struct vm_operations_struct tracing_buffers_vmops = {
8264 	.open		= tracing_buffers_mmap_open,
8265 	.close		= tracing_buffers_mmap_close,
8266 	.may_split      = tracing_buffers_may_split,
8267 };
8268 
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8269 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8270 {
8271 	struct ftrace_buffer_info *info = filp->private_data;
8272 	struct trace_iterator *iter = &info->iter;
8273 	int ret = 0;
8274 
8275 	/* A memmap'ed and backup buffers are not supported for user space mmap */
8276 	if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
8277 		return -ENODEV;
8278 
8279 	ret = get_snapshot_map(iter->tr);
8280 	if (ret)
8281 		return ret;
8282 
8283 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8284 	if (ret)
8285 		put_snapshot_map(iter->tr);
8286 
8287 	vma->vm_ops = &tracing_buffers_vmops;
8288 
8289 	return ret;
8290 }
8291 
8292 static const struct file_operations tracing_buffers_fops = {
8293 	.open		= tracing_buffers_open,
8294 	.read		= tracing_buffers_read,
8295 	.poll		= tracing_buffers_poll,
8296 	.release	= tracing_buffers_release,
8297 	.flush		= tracing_buffers_flush,
8298 	.splice_read	= tracing_buffers_splice_read,
8299 	.unlocked_ioctl = tracing_buffers_ioctl,
8300 	.mmap		= tracing_buffers_mmap,
8301 };
8302 
8303 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8304 tracing_stats_read(struct file *filp, char __user *ubuf,
8305 		   size_t count, loff_t *ppos)
8306 {
8307 	struct inode *inode = file_inode(filp);
8308 	struct trace_array *tr = inode->i_private;
8309 	struct array_buffer *trace_buf = &tr->array_buffer;
8310 	int cpu = tracing_get_cpu(inode);
8311 	struct trace_seq *s;
8312 	unsigned long cnt;
8313 	unsigned long long t;
8314 	unsigned long usec_rem;
8315 
8316 	s = kmalloc_obj(*s);
8317 	if (!s)
8318 		return -ENOMEM;
8319 
8320 	trace_seq_init(s);
8321 
8322 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8323 	trace_seq_printf(s, "entries: %ld\n", cnt);
8324 
8325 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8326 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8327 
8328 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8329 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8330 
8331 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8332 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8333 
8334 	if (trace_clocks[tr->clock_id].in_ns) {
8335 		/* local or global for trace_clock */
8336 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8337 		usec_rem = do_div(t, USEC_PER_SEC);
8338 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8339 								t, usec_rem);
8340 
8341 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8342 		usec_rem = do_div(t, USEC_PER_SEC);
8343 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8344 	} else {
8345 		/* counter or tsc mode for trace_clock */
8346 		trace_seq_printf(s, "oldest event ts: %llu\n",
8347 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8348 
8349 		trace_seq_printf(s, "now ts: %llu\n",
8350 				ring_buffer_time_stamp(trace_buf->buffer));
8351 	}
8352 
8353 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8354 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8355 
8356 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8357 	trace_seq_printf(s, "read events: %ld\n", cnt);
8358 
8359 	count = simple_read_from_buffer(ubuf, count, ppos,
8360 					s->buffer, trace_seq_used(s));
8361 
8362 	kfree(s);
8363 
8364 	return count;
8365 }
8366 
8367 static const struct file_operations tracing_stats_fops = {
8368 	.open		= tracing_open_generic_tr,
8369 	.read		= tracing_stats_read,
8370 	.llseek		= generic_file_llseek,
8371 	.release	= tracing_release_generic_tr,
8372 };
8373 
8374 #ifdef CONFIG_DYNAMIC_FTRACE
8375 
8376 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8377 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8378 		  size_t cnt, loff_t *ppos)
8379 {
8380 	ssize_t ret;
8381 	char *buf;
8382 	int r;
8383 
8384 	/* 512 should be plenty to hold the amount needed */
8385 #define DYN_INFO_BUF_SIZE	512
8386 
8387 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8388 	if (!buf)
8389 		return -ENOMEM;
8390 
8391 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8392 		      "%ld pages:%ld groups: %ld\n"
8393 		      "ftrace boot update time = %llu (ns)\n"
8394 		      "ftrace module total update time = %llu (ns)\n",
8395 		      ftrace_update_tot_cnt,
8396 		      ftrace_number_of_pages,
8397 		      ftrace_number_of_groups,
8398 		      ftrace_update_time,
8399 		      ftrace_total_mod_time);
8400 
8401 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8402 	kfree(buf);
8403 	return ret;
8404 }
8405 
8406 static const struct file_operations tracing_dyn_info_fops = {
8407 	.open		= tracing_open_generic,
8408 	.read		= tracing_read_dyn_info,
8409 	.llseek		= generic_file_llseek,
8410 };
8411 #endif /* CONFIG_DYNAMIC_FTRACE */
8412 
8413 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8414 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8415 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8416 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8417 		void *data)
8418 {
8419 	tracing_snapshot_instance(tr);
8420 }
8421 
8422 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8423 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8424 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8425 		      void *data)
8426 {
8427 	struct ftrace_func_mapper *mapper = data;
8428 	long *count = NULL;
8429 
8430 	if (mapper)
8431 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8432 
8433 	if (count) {
8434 
8435 		if (*count <= 0)
8436 			return;
8437 
8438 		(*count)--;
8439 	}
8440 
8441 	tracing_snapshot_instance(tr);
8442 }
8443 
8444 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8445 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8446 		      struct ftrace_probe_ops *ops, void *data)
8447 {
8448 	struct ftrace_func_mapper *mapper = data;
8449 	long *count = NULL;
8450 
8451 	seq_printf(m, "%ps:", (void *)ip);
8452 
8453 	seq_puts(m, "snapshot");
8454 
8455 	if (mapper)
8456 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8457 
8458 	if (count)
8459 		seq_printf(m, ":count=%ld\n", *count);
8460 	else
8461 		seq_puts(m, ":unlimited\n");
8462 
8463 	return 0;
8464 }
8465 
8466 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8467 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8468 		     unsigned long ip, void *init_data, void **data)
8469 {
8470 	struct ftrace_func_mapper *mapper = *data;
8471 
8472 	if (!mapper) {
8473 		mapper = allocate_ftrace_func_mapper();
8474 		if (!mapper)
8475 			return -ENOMEM;
8476 		*data = mapper;
8477 	}
8478 
8479 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8480 }
8481 
8482 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8483 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8484 		     unsigned long ip, void *data)
8485 {
8486 	struct ftrace_func_mapper *mapper = data;
8487 
8488 	if (!ip) {
8489 		if (!mapper)
8490 			return;
8491 		free_ftrace_func_mapper(mapper, NULL);
8492 		return;
8493 	}
8494 
8495 	ftrace_func_mapper_remove_ip(mapper, ip);
8496 }
8497 
8498 static struct ftrace_probe_ops snapshot_probe_ops = {
8499 	.func			= ftrace_snapshot,
8500 	.print			= ftrace_snapshot_print,
8501 };
8502 
8503 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8504 	.func			= ftrace_count_snapshot,
8505 	.print			= ftrace_snapshot_print,
8506 	.init			= ftrace_snapshot_init,
8507 	.free			= ftrace_snapshot_free,
8508 };
8509 
8510 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8511 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8512 			       char *glob, char *cmd, char *param, int enable)
8513 {
8514 	struct ftrace_probe_ops *ops;
8515 	void *count = (void *)-1;
8516 	char *number;
8517 	int ret;
8518 
8519 	if (!tr)
8520 		return -ENODEV;
8521 
8522 	/* hash funcs only work with set_ftrace_filter */
8523 	if (!enable)
8524 		return -EINVAL;
8525 
8526 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8527 
8528 	if (glob[0] == '!') {
8529 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8530 		if (!ret)
8531 			tracing_disarm_snapshot(tr);
8532 
8533 		return ret;
8534 	}
8535 
8536 	if (!param)
8537 		goto out_reg;
8538 
8539 	number = strsep(&param, ":");
8540 
8541 	if (!strlen(number))
8542 		goto out_reg;
8543 
8544 	/*
8545 	 * We use the callback data field (which is a pointer)
8546 	 * as our counter.
8547 	 */
8548 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8549 	if (ret)
8550 		return ret;
8551 
8552  out_reg:
8553 	ret = tracing_arm_snapshot(tr);
8554 	if (ret < 0)
8555 		return ret;
8556 
8557 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8558 	if (ret < 0)
8559 		tracing_disarm_snapshot(tr);
8560 
8561 	return ret < 0 ? ret : 0;
8562 }
8563 
8564 static struct ftrace_func_command ftrace_snapshot_cmd = {
8565 	.name			= "snapshot",
8566 	.func			= ftrace_trace_snapshot_callback,
8567 };
8568 
register_snapshot_cmd(void)8569 static __init int register_snapshot_cmd(void)
8570 {
8571 	return register_ftrace_command(&ftrace_snapshot_cmd);
8572 }
8573 #else
register_snapshot_cmd(void)8574 static inline __init int register_snapshot_cmd(void) { return 0; }
8575 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8576 
tracing_get_dentry(struct trace_array * tr)8577 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8578 {
8579 	/* Top directory uses NULL as the parent */
8580 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8581 		return NULL;
8582 
8583 	if (WARN_ON(!tr->dir))
8584 		return ERR_PTR(-ENODEV);
8585 
8586 	/* All sub buffers have a descriptor */
8587 	return tr->dir;
8588 }
8589 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8590 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8591 {
8592 	struct dentry *d_tracer;
8593 
8594 	if (tr->percpu_dir)
8595 		return tr->percpu_dir;
8596 
8597 	d_tracer = tracing_get_dentry(tr);
8598 	if (IS_ERR(d_tracer))
8599 		return NULL;
8600 
8601 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8602 
8603 	MEM_FAIL(!tr->percpu_dir,
8604 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8605 
8606 	return tr->percpu_dir;
8607 }
8608 
8609 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8610 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8611 		      void *data, long cpu, const struct file_operations *fops)
8612 {
8613 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8614 
8615 	if (ret) /* See tracing_get_cpu() */
8616 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8617 	return ret;
8618 }
8619 
8620 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8621 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8622 {
8623 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8624 	struct dentry *d_cpu;
8625 	char cpu_dir[30]; /* 30 characters should be more than enough */
8626 
8627 	if (!d_percpu)
8628 		return;
8629 
8630 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8631 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8632 	if (!d_cpu) {
8633 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8634 		return;
8635 	}
8636 
8637 	/* per cpu trace_pipe */
8638 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8639 				tr, cpu, &tracing_pipe_fops);
8640 
8641 	/* per cpu trace */
8642 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8643 				tr, cpu, &tracing_fops);
8644 
8645 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8646 				tr, cpu, &tracing_buffers_fops);
8647 
8648 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8649 				tr, cpu, &tracing_stats_fops);
8650 
8651 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu,
8652 				tr, cpu, &tracing_entries_fops);
8653 
8654 	if (tr->range_addr_start)
8655 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8656 				      tr, cpu, &tracing_buffer_meta_fops);
8657 #ifdef CONFIG_TRACER_SNAPSHOT
8658 	if (!tr->range_addr_start) {
8659 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8660 				      tr, cpu, &snapshot_fops);
8661 
8662 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8663 				      tr, cpu, &snapshot_raw_fops);
8664 	}
8665 #endif
8666 }
8667 
8668 #ifdef CONFIG_FTRACE_SELFTEST
8669 /* Let selftest have access to static functions in this file */
8670 #include "trace_selftest.c"
8671 #endif
8672 
8673 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8674 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8675 			loff_t *ppos)
8676 {
8677 	struct trace_option_dentry *topt = filp->private_data;
8678 	char *buf;
8679 
8680 	if (topt->flags->val & topt->opt->bit)
8681 		buf = "1\n";
8682 	else
8683 		buf = "0\n";
8684 
8685 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8686 }
8687 
8688 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8689 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8690 			 loff_t *ppos)
8691 {
8692 	struct trace_option_dentry *topt = filp->private_data;
8693 	unsigned long val;
8694 	int ret;
8695 
8696 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8697 	if (ret)
8698 		return ret;
8699 
8700 	if (val != 0 && val != 1)
8701 		return -EINVAL;
8702 
8703 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8704 		guard(mutex)(&trace_types_lock);
8705 		ret = __set_tracer_option(topt->tr, topt->flags,
8706 					  topt->opt, !val);
8707 		if (ret)
8708 			return ret;
8709 	}
8710 
8711 	*ppos += cnt;
8712 
8713 	return cnt;
8714 }
8715 
tracing_open_options(struct inode * inode,struct file * filp)8716 static int tracing_open_options(struct inode *inode, struct file *filp)
8717 {
8718 	struct trace_option_dentry *topt = inode->i_private;
8719 	int ret;
8720 
8721 	ret = tracing_check_open_get_tr(topt->tr);
8722 	if (ret)
8723 		return ret;
8724 
8725 	filp->private_data = inode->i_private;
8726 	return 0;
8727 }
8728 
tracing_release_options(struct inode * inode,struct file * file)8729 static int tracing_release_options(struct inode *inode, struct file *file)
8730 {
8731 	struct trace_option_dentry *topt = file->private_data;
8732 
8733 	trace_array_put(topt->tr);
8734 	return 0;
8735 }
8736 
8737 static const struct file_operations trace_options_fops = {
8738 	.open = tracing_open_options,
8739 	.read = trace_options_read,
8740 	.write = trace_options_write,
8741 	.llseek	= generic_file_llseek,
8742 	.release = tracing_release_options,
8743 };
8744 
8745 /*
8746  * In order to pass in both the trace_array descriptor as well as the index
8747  * to the flag that the trace option file represents, the trace_array
8748  * has a character array of trace_flags_index[], which holds the index
8749  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8750  * The address of this character array is passed to the flag option file
8751  * read/write callbacks.
8752  *
8753  * In order to extract both the index and the trace_array descriptor,
8754  * get_tr_index() uses the following algorithm.
8755  *
8756  *   idx = *ptr;
8757  *
8758  * As the pointer itself contains the address of the index (remember
8759  * index[1] == 1).
8760  *
8761  * Then to get the trace_array descriptor, by subtracting that index
8762  * from the ptr, we get to the start of the index itself.
8763  *
8764  *   ptr - idx == &index[0]
8765  *
8766  * Then a simple container_of() from that pointer gets us to the
8767  * trace_array descriptor.
8768  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8769 static void get_tr_index(void *data, struct trace_array **ptr,
8770 			 unsigned int *pindex)
8771 {
8772 	*pindex = *(unsigned char *)data;
8773 
8774 	*ptr = container_of(data - *pindex, struct trace_array,
8775 			    trace_flags_index);
8776 }
8777 
8778 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8779 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8780 			loff_t *ppos)
8781 {
8782 	void *tr_index = filp->private_data;
8783 	struct trace_array *tr;
8784 	unsigned int index;
8785 	char *buf;
8786 
8787 	get_tr_index(tr_index, &tr, &index);
8788 
8789 	if (tr->trace_flags & (1ULL << index))
8790 		buf = "1\n";
8791 	else
8792 		buf = "0\n";
8793 
8794 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8795 }
8796 
8797 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8798 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8799 			 loff_t *ppos)
8800 {
8801 	void *tr_index = filp->private_data;
8802 	struct trace_array *tr;
8803 	unsigned int index;
8804 	unsigned long val;
8805 	int ret;
8806 
8807 	get_tr_index(tr_index, &tr, &index);
8808 
8809 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8810 	if (ret)
8811 		return ret;
8812 
8813 	if (val != 0 && val != 1)
8814 		return -EINVAL;
8815 
8816 	mutex_lock(&event_mutex);
8817 	mutex_lock(&trace_types_lock);
8818 	ret = set_tracer_flag(tr, 1ULL << index, val);
8819 	mutex_unlock(&trace_types_lock);
8820 	mutex_unlock(&event_mutex);
8821 
8822 	if (ret < 0)
8823 		return ret;
8824 
8825 	*ppos += cnt;
8826 
8827 	return cnt;
8828 }
8829 
8830 static const struct file_operations trace_options_core_fops = {
8831 	.open = tracing_open_generic,
8832 	.read = trace_options_core_read,
8833 	.write = trace_options_core_write,
8834 	.llseek = generic_file_llseek,
8835 };
8836 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8837 struct dentry *trace_create_file(const char *name,
8838 				 umode_t mode,
8839 				 struct dentry *parent,
8840 				 void *data,
8841 				 const struct file_operations *fops)
8842 {
8843 	struct dentry *ret;
8844 
8845 	ret = tracefs_create_file(name, mode, parent, data, fops);
8846 	if (!ret)
8847 		pr_warn("Could not create tracefs '%s' entry\n", name);
8848 
8849 	return ret;
8850 }
8851 
8852 
trace_options_init_dentry(struct trace_array * tr)8853 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8854 {
8855 	struct dentry *d_tracer;
8856 
8857 	if (tr->options)
8858 		return tr->options;
8859 
8860 	d_tracer = tracing_get_dentry(tr);
8861 	if (IS_ERR(d_tracer))
8862 		return NULL;
8863 
8864 	tr->options = tracefs_create_dir("options", d_tracer);
8865 	if (!tr->options) {
8866 		pr_warn("Could not create tracefs directory 'options'\n");
8867 		return NULL;
8868 	}
8869 
8870 	return tr->options;
8871 }
8872 
8873 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8874 create_trace_option_file(struct trace_array *tr,
8875 			 struct trace_option_dentry *topt,
8876 			 struct tracer_flags *flags,
8877 			 struct tracer_opt *opt)
8878 {
8879 	struct dentry *t_options;
8880 
8881 	t_options = trace_options_init_dentry(tr);
8882 	if (!t_options)
8883 		return;
8884 
8885 	topt->flags = flags;
8886 	topt->opt = opt;
8887 	topt->tr = tr;
8888 
8889 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8890 					t_options, topt, &trace_options_fops);
8891 }
8892 
8893 static int
create_trace_option_files(struct trace_array * tr,struct tracer * tracer,struct tracer_flags * flags)8894 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
8895 			  struct tracer_flags *flags)
8896 {
8897 	struct trace_option_dentry *topts;
8898 	struct trace_options *tr_topts;
8899 	struct tracer_opt *opts;
8900 	int cnt;
8901 
8902 	if (!flags || !flags->opts)
8903 		return 0;
8904 
8905 	opts = flags->opts;
8906 
8907 	for (cnt = 0; opts[cnt].name; cnt++)
8908 		;
8909 
8910 	topts = kzalloc_objs(*topts, cnt + 1);
8911 	if (!topts)
8912 		return 0;
8913 
8914 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8915 			    GFP_KERNEL);
8916 	if (!tr_topts) {
8917 		kfree(topts);
8918 		return -ENOMEM;
8919 	}
8920 
8921 	tr->topts = tr_topts;
8922 	tr->topts[tr->nr_topts].tracer = tracer;
8923 	tr->topts[tr->nr_topts].topts = topts;
8924 	tr->nr_topts++;
8925 
8926 	for (cnt = 0; opts[cnt].name; cnt++) {
8927 		create_trace_option_file(tr, &topts[cnt], flags,
8928 					 &opts[cnt]);
8929 		MEM_FAIL(topts[cnt].entry == NULL,
8930 			  "Failed to create trace option: %s",
8931 			  opts[cnt].name);
8932 	}
8933 	return 0;
8934 }
8935 
get_global_flags_val(struct tracer * tracer)8936 static int get_global_flags_val(struct tracer *tracer)
8937 {
8938 	struct tracers *t;
8939 
8940 	list_for_each_entry(t, &global_trace.tracers, list) {
8941 		if (t->tracer != tracer)
8942 			continue;
8943 		if (!t->flags)
8944 			return -1;
8945 		return t->flags->val;
8946 	}
8947 	return -1;
8948 }
8949 
add_tracer_options(struct trace_array * tr,struct tracers * t)8950 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
8951 {
8952 	struct tracer *tracer = t->tracer;
8953 	struct tracer_flags *flags = t->flags ?: tracer->flags;
8954 
8955 	if (!flags)
8956 		return 0;
8957 
8958 	/* Only add tracer options after update_tracer_options finish */
8959 	if (!tracer_options_updated)
8960 		return 0;
8961 
8962 	return create_trace_option_files(tr, tracer, flags);
8963 }
8964 
add_tracer(struct trace_array * tr,struct tracer * tracer)8965 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
8966 {
8967 	struct tracer_flags *flags;
8968 	struct tracers *t;
8969 	int ret;
8970 
8971 	/* Only enable if the directory has been created already. */
8972 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
8973 		return 0;
8974 
8975 	/*
8976 	 * If this is an instance, only create flags for tracers
8977 	 * the instance may have.
8978 	 */
8979 	if (!trace_ok_for_array(tracer, tr))
8980 		return 0;
8981 
8982 	t = kmalloc_obj(*t);
8983 	if (!t)
8984 		return -ENOMEM;
8985 
8986 	t->tracer = tracer;
8987 	t->flags = NULL;
8988 	list_add(&t->list, &tr->tracers);
8989 
8990 	flags = tracer->flags;
8991 	if (!flags) {
8992 		if (!tracer->default_flags)
8993 			return 0;
8994 
8995 		/*
8996 		 * If the tracer defines default flags, it means the flags are
8997 		 * per trace instance.
8998 		 */
8999 		flags = kmalloc_obj(*flags);
9000 		if (!flags)
9001 			return -ENOMEM;
9002 
9003 		*flags = *tracer->default_flags;
9004 		flags->trace = tracer;
9005 
9006 		t->flags = flags;
9007 
9008 		/* If this is an instance, inherit the global_trace flags */
9009 		if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
9010 			int val = get_global_flags_val(tracer);
9011 			if (!WARN_ON_ONCE(val < 0))
9012 				flags->val = val;
9013 		}
9014 	}
9015 
9016 	ret = add_tracer_options(tr, t);
9017 	if (ret < 0) {
9018 		list_del(&t->list);
9019 		kfree(t->flags);
9020 		kfree(t);
9021 	}
9022 
9023 	return ret;
9024 }
9025 
9026 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9027 create_trace_option_core_file(struct trace_array *tr,
9028 			      const char *option, long index)
9029 {
9030 	struct dentry *t_options;
9031 
9032 	t_options = trace_options_init_dentry(tr);
9033 	if (!t_options)
9034 		return NULL;
9035 
9036 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9037 				 (void *)&tr->trace_flags_index[index],
9038 				 &trace_options_core_fops);
9039 }
9040 
create_trace_options_dir(struct trace_array * tr)9041 static void create_trace_options_dir(struct trace_array *tr)
9042 {
9043 	struct dentry *t_options;
9044 	bool top_level = tr == &global_trace;
9045 	int i;
9046 
9047 	t_options = trace_options_init_dentry(tr);
9048 	if (!t_options)
9049 		return;
9050 
9051 	for (i = 0; trace_options[i]; i++) {
9052 		if (top_level ||
9053 		    !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
9054 			create_trace_option_core_file(tr, trace_options[i], i);
9055 		}
9056 	}
9057 }
9058 
9059 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9060 rb_simple_read(struct file *filp, char __user *ubuf,
9061 	       size_t cnt, loff_t *ppos)
9062 {
9063 	struct trace_array *tr = filp->private_data;
9064 	char buf[64];
9065 	int r;
9066 
9067 	r = tracer_tracing_is_on(tr);
9068 	r = sprintf(buf, "%d\n", r);
9069 
9070 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9071 }
9072 
9073 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9074 rb_simple_write(struct file *filp, const char __user *ubuf,
9075 		size_t cnt, loff_t *ppos)
9076 {
9077 	struct trace_array *tr = filp->private_data;
9078 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9079 	unsigned long val;
9080 	int ret;
9081 
9082 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9083 	if (ret)
9084 		return ret;
9085 
9086 	if (buffer) {
9087 		guard(mutex)(&trace_types_lock);
9088 		if (!!val == tracer_tracing_is_on(tr)) {
9089 			val = 0; /* do nothing */
9090 		} else if (val) {
9091 			tracer_tracing_on(tr);
9092 			if (tr->current_trace->start)
9093 				tr->current_trace->start(tr);
9094 		} else {
9095 			tracer_tracing_off(tr);
9096 			if (tr->current_trace->stop)
9097 				tr->current_trace->stop(tr);
9098 			/* Wake up any waiters */
9099 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9100 		}
9101 	}
9102 
9103 	(*ppos)++;
9104 
9105 	return cnt;
9106 }
9107 
9108 static const struct file_operations rb_simple_fops = {
9109 	.open		= tracing_open_generic_tr,
9110 	.read		= rb_simple_read,
9111 	.write		= rb_simple_write,
9112 	.release	= tracing_release_generic_tr,
9113 	.llseek		= default_llseek,
9114 };
9115 
9116 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9117 buffer_percent_read(struct file *filp, char __user *ubuf,
9118 		    size_t cnt, loff_t *ppos)
9119 {
9120 	struct trace_array *tr = filp->private_data;
9121 	char buf[64];
9122 	int r;
9123 
9124 	r = tr->buffer_percent;
9125 	r = sprintf(buf, "%d\n", r);
9126 
9127 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9128 }
9129 
9130 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9131 buffer_percent_write(struct file *filp, const char __user *ubuf,
9132 		     size_t cnt, loff_t *ppos)
9133 {
9134 	struct trace_array *tr = filp->private_data;
9135 	unsigned long val;
9136 	int ret;
9137 
9138 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9139 	if (ret)
9140 		return ret;
9141 
9142 	if (val > 100)
9143 		return -EINVAL;
9144 
9145 	tr->buffer_percent = val;
9146 
9147 	(*ppos)++;
9148 
9149 	return cnt;
9150 }
9151 
9152 static const struct file_operations buffer_percent_fops = {
9153 	.open		= tracing_open_generic_tr,
9154 	.read		= buffer_percent_read,
9155 	.write		= buffer_percent_write,
9156 	.release	= tracing_release_generic_tr,
9157 	.llseek		= default_llseek,
9158 };
9159 
9160 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9161 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9162 {
9163 	struct trace_array *tr = filp->private_data;
9164 	size_t size;
9165 	char buf[64];
9166 	int order;
9167 	int r;
9168 
9169 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9170 	size = (PAGE_SIZE << order) / 1024;
9171 
9172 	r = sprintf(buf, "%zd\n", size);
9173 
9174 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9175 }
9176 
9177 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9178 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9179 			 size_t cnt, loff_t *ppos)
9180 {
9181 	struct trace_array *tr = filp->private_data;
9182 	unsigned long val;
9183 	int old_order;
9184 	int order;
9185 	int pages;
9186 	int ret;
9187 
9188 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9189 	if (ret)
9190 		return ret;
9191 
9192 	val *= 1024; /* value passed in is in KB */
9193 
9194 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9195 	order = fls(pages - 1);
9196 
9197 	/* limit between 1 and 128 system pages */
9198 	if (order < 0 || order > 7)
9199 		return -EINVAL;
9200 
9201 	/* Do not allow tracing while changing the order of the ring buffer */
9202 	tracing_stop_tr(tr);
9203 
9204 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9205 	if (old_order == order)
9206 		goto out;
9207 
9208 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9209 	if (ret)
9210 		goto out;
9211 
9212 #ifdef CONFIG_TRACER_SNAPSHOT
9213 
9214 	if (!tr->allocated_snapshot)
9215 		goto out_max;
9216 
9217 	ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
9218 	if (ret) {
9219 		/* Put back the old order */
9220 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9221 		if (WARN_ON_ONCE(cnt)) {
9222 			/*
9223 			 * AARGH! We are left with different orders!
9224 			 * The max buffer is our "snapshot" buffer.
9225 			 * When a tracer needs a snapshot (one of the
9226 			 * latency tracers), it swaps the max buffer
9227 			 * with the saved snap shot. We succeeded to
9228 			 * update the order of the main buffer, but failed to
9229 			 * update the order of the max buffer. But when we tried
9230 			 * to reset the main buffer to the original size, we
9231 			 * failed there too. This is very unlikely to
9232 			 * happen, but if it does, warn and kill all
9233 			 * tracing.
9234 			 */
9235 			tracing_disabled = 1;
9236 		}
9237 		goto out;
9238 	}
9239  out_max:
9240 #endif
9241 	(*ppos)++;
9242  out:
9243 	if (ret)
9244 		cnt = ret;
9245 	tracing_start_tr(tr);
9246 	return cnt;
9247 }
9248 
9249 static const struct file_operations buffer_subbuf_size_fops = {
9250 	.open		= tracing_open_generic_tr,
9251 	.read		= buffer_subbuf_size_read,
9252 	.write		= buffer_subbuf_size_write,
9253 	.release	= tracing_release_generic_tr,
9254 	.llseek		= default_llseek,
9255 };
9256 
9257 static struct dentry *trace_instance_dir;
9258 
9259 static void
9260 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9261 
9262 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)9263 static int make_mod_delta(struct module *mod, void *data)
9264 {
9265 	struct trace_module_delta *module_delta;
9266 	struct trace_scratch *tscratch;
9267 	struct trace_mod_entry *entry;
9268 	struct trace_array *tr = data;
9269 	int i;
9270 
9271 	tscratch = tr->scratch;
9272 	module_delta = READ_ONCE(tr->module_delta);
9273 	for (i = 0; i < tscratch->nr_entries; i++) {
9274 		entry = &tscratch->entries[i];
9275 		if (strcmp(mod->name, entry->mod_name))
9276 			continue;
9277 		if (mod->state == MODULE_STATE_GOING)
9278 			module_delta->delta[i] = 0;
9279 		else
9280 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9281 						 - entry->mod_addr;
9282 		break;
9283 	}
9284 	return 0;
9285 }
9286 #else
make_mod_delta(struct module * mod,void * data)9287 static int make_mod_delta(struct module *mod, void *data)
9288 {
9289 	return 0;
9290 }
9291 #endif
9292 
mod_addr_comp(const void * a,const void * b,const void * data)9293 static int mod_addr_comp(const void *a, const void *b, const void *data)
9294 {
9295 	const struct trace_mod_entry *e1 = a;
9296 	const struct trace_mod_entry *e2 = b;
9297 
9298 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9299 }
9300 
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)9301 static void setup_trace_scratch(struct trace_array *tr,
9302 				struct trace_scratch *tscratch, unsigned int size)
9303 {
9304 	struct trace_module_delta *module_delta;
9305 	struct trace_mod_entry *entry;
9306 	int i, nr_entries;
9307 
9308 	if (!tscratch)
9309 		return;
9310 
9311 	tr->scratch = tscratch;
9312 	tr->scratch_size = size;
9313 
9314 	if (tscratch->text_addr)
9315 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9316 
9317 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9318 		goto reset;
9319 
9320 	/* Check if each module name is a valid string */
9321 	for (i = 0; i < tscratch->nr_entries; i++) {
9322 		int n;
9323 
9324 		entry = &tscratch->entries[i];
9325 
9326 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9327 			if (entry->mod_name[n] == '\0')
9328 				break;
9329 			if (!isprint(entry->mod_name[n]))
9330 				goto reset;
9331 		}
9332 		if (n == MODULE_NAME_LEN)
9333 			goto reset;
9334 	}
9335 
9336 	/* Sort the entries so that we can find appropriate module from address. */
9337 	nr_entries = tscratch->nr_entries;
9338 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9339 	       mod_addr_comp, NULL, NULL);
9340 
9341 	if (IS_ENABLED(CONFIG_MODULES)) {
9342 		module_delta = kzalloc_flex(*module_delta, delta, nr_entries);
9343 		if (!module_delta) {
9344 			pr_info("module_delta allocation failed. Not able to decode module address.");
9345 			goto reset;
9346 		}
9347 		init_rcu_head(&module_delta->rcu);
9348 	} else
9349 		module_delta = NULL;
9350 	WRITE_ONCE(tr->module_delta, module_delta);
9351 
9352 	/* Scan modules to make text delta for modules. */
9353 	module_for_each_mod(make_mod_delta, tr);
9354 
9355 	/* Set trace_clock as the same of the previous boot. */
9356 	if (tscratch->clock_id != tr->clock_id) {
9357 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9358 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9359 			pr_info("the previous trace_clock info is not valid.");
9360 			goto reset;
9361 		}
9362 	}
9363 	return;
9364  reset:
9365 	/* Invalid trace modules */
9366 	memset(tscratch, 0, size);
9367 }
9368 
9369 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,unsigned long size)9370 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, unsigned long size)
9371 {
9372 	enum ring_buffer_flags rb_flags;
9373 	struct trace_scratch *tscratch;
9374 	unsigned int scratch_size = 0;
9375 
9376 	rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
9377 
9378 	buf->tr = tr;
9379 
9380 	if (tr->range_addr_start && tr->range_addr_size) {
9381 		/* Add scratch buffer to handle 128 modules */
9382 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9383 						      tr->range_addr_start,
9384 						      tr->range_addr_size,
9385 						      struct_size(tscratch, entries, 128));
9386 
9387 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9388 		setup_trace_scratch(tr, tscratch, scratch_size);
9389 
9390 		/*
9391 		 * This is basically the same as a mapped buffer,
9392 		 * with the same restrictions.
9393 		 */
9394 		tr->mapped++;
9395 	} else {
9396 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9397 	}
9398 	if (!buf->buffer)
9399 		return -ENOMEM;
9400 
9401 	buf->data = alloc_percpu(struct trace_array_cpu);
9402 	if (!buf->data) {
9403 		ring_buffer_free(buf->buffer);
9404 		buf->buffer = NULL;
9405 		return -ENOMEM;
9406 	}
9407 
9408 	/* Allocate the first page for all buffers */
9409 	set_buffer_entries(&tr->array_buffer,
9410 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9411 
9412 	return 0;
9413 }
9414 
free_trace_buffer(struct array_buffer * buf)9415 static void free_trace_buffer(struct array_buffer *buf)
9416 {
9417 	if (buf->buffer) {
9418 		ring_buffer_free(buf->buffer);
9419 		buf->buffer = NULL;
9420 		free_percpu(buf->data);
9421 		buf->data = NULL;
9422 	}
9423 }
9424 
allocate_trace_buffers(struct trace_array * tr,unsigned long size)9425 static int allocate_trace_buffers(struct trace_array *tr, unsigned long size)
9426 {
9427 	int ret;
9428 
9429 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9430 	if (ret)
9431 		return ret;
9432 
9433 #ifdef CONFIG_TRACER_SNAPSHOT
9434 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9435 	if (tr->range_addr_start)
9436 		return 0;
9437 
9438 	ret = allocate_trace_buffer(tr, &tr->snapshot_buffer,
9439 				    allocate_snapshot ? size : 1);
9440 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9441 		free_trace_buffer(&tr->array_buffer);
9442 		return -ENOMEM;
9443 	}
9444 	tr->allocated_snapshot = allocate_snapshot;
9445 
9446 	allocate_snapshot = false;
9447 #endif
9448 
9449 	return 0;
9450 }
9451 
free_trace_buffers(struct trace_array * tr)9452 static void free_trace_buffers(struct trace_array *tr)
9453 {
9454 	if (!tr)
9455 		return;
9456 
9457 	free_trace_buffer(&tr->array_buffer);
9458 	kfree(tr->module_delta);
9459 
9460 #ifdef CONFIG_TRACER_SNAPSHOT
9461 	free_trace_buffer(&tr->snapshot_buffer);
9462 #endif
9463 }
9464 
init_trace_flags_index(struct trace_array * tr)9465 static void init_trace_flags_index(struct trace_array *tr)
9466 {
9467 	int i;
9468 
9469 	/* Used by the trace options files */
9470 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9471 		tr->trace_flags_index[i] = i;
9472 }
9473 
__update_tracer(struct trace_array * tr)9474 static int __update_tracer(struct trace_array *tr)
9475 {
9476 	struct tracer *t;
9477 	int ret = 0;
9478 
9479 	for (t = trace_types; t && !ret; t = t->next)
9480 		ret = add_tracer(tr, t);
9481 
9482 	return ret;
9483 }
9484 
__update_tracer_options(struct trace_array * tr)9485 static __init int __update_tracer_options(struct trace_array *tr)
9486 {
9487 	struct tracers *t;
9488 	int ret = 0;
9489 
9490 	list_for_each_entry(t, &tr->tracers, list) {
9491 		ret = add_tracer_options(tr, t);
9492 		if (ret < 0)
9493 			break;
9494 	}
9495 
9496 	return ret;
9497 }
9498 
update_tracer_options(void)9499 static __init void update_tracer_options(void)
9500 {
9501 	struct trace_array *tr;
9502 
9503 	guard(mutex)(&trace_types_lock);
9504 	tracer_options_updated = true;
9505 	list_for_each_entry(tr, &ftrace_trace_arrays, list)
9506 		__update_tracer_options(tr);
9507 }
9508 
9509 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9510 struct trace_array *trace_array_find(const char *instance)
9511 {
9512 	struct trace_array *tr, *found = NULL;
9513 
9514 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9515 		if (tr->name && strcmp(tr->name, instance) == 0) {
9516 			found = tr;
9517 			break;
9518 		}
9519 	}
9520 
9521 	return found;
9522 }
9523 
trace_array_find_get(const char * instance)9524 struct trace_array *trace_array_find_get(const char *instance)
9525 {
9526 	struct trace_array *tr;
9527 
9528 	guard(mutex)(&trace_types_lock);
9529 	tr = trace_array_find(instance);
9530 	if (tr)
9531 		tr->ref++;
9532 
9533 	return tr;
9534 }
9535 
trace_array_create_dir(struct trace_array * tr)9536 static int trace_array_create_dir(struct trace_array *tr)
9537 {
9538 	int ret;
9539 
9540 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9541 	if (!tr->dir)
9542 		return -EINVAL;
9543 
9544 	ret = event_trace_add_tracer(tr->dir, tr);
9545 	if (ret) {
9546 		tracefs_remove(tr->dir);
9547 		return ret;
9548 	}
9549 
9550 	init_tracer_tracefs(tr, tr->dir);
9551 	ret = __update_tracer(tr);
9552 	if (ret) {
9553 		event_trace_del_tracer(tr);
9554 		tracefs_remove(tr->dir);
9555 		return ret;
9556 	}
9557 	return 0;
9558 }
9559 
9560 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9561 trace_array_create_systems(const char *name, const char *systems,
9562 			   unsigned long range_addr_start,
9563 			   unsigned long range_addr_size)
9564 {
9565 	struct trace_array *tr;
9566 	int ret;
9567 
9568 	ret = -ENOMEM;
9569 	tr = kzalloc_obj(*tr);
9570 	if (!tr)
9571 		return ERR_PTR(ret);
9572 
9573 	tr->name = kstrdup(name, GFP_KERNEL);
9574 	if (!tr->name)
9575 		goto out_free_tr;
9576 
9577 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9578 		goto out_free_tr;
9579 
9580 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9581 		goto out_free_tr;
9582 
9583 	if (systems) {
9584 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9585 		if (!tr->system_names)
9586 			goto out_free_tr;
9587 	}
9588 
9589 	/* Only for boot up memory mapped ring buffers */
9590 	tr->range_addr_start = range_addr_start;
9591 	tr->range_addr_size = range_addr_size;
9592 
9593 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9594 
9595 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9596 
9597 	raw_spin_lock_init(&tr->start_lock);
9598 
9599 	tr->syscall_buf_sz = global_trace.syscall_buf_sz;
9600 
9601 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9602 #ifdef CONFIG_TRACER_SNAPSHOT
9603 	spin_lock_init(&tr->snapshot_trigger_lock);
9604 #endif
9605 	tr->current_trace = &nop_trace;
9606 	tr->current_trace_flags = nop_trace.flags;
9607 
9608 	INIT_LIST_HEAD(&tr->systems);
9609 	INIT_LIST_HEAD(&tr->events);
9610 	INIT_LIST_HEAD(&tr->hist_vars);
9611 	INIT_LIST_HEAD(&tr->err_log);
9612 	INIT_LIST_HEAD(&tr->tracers);
9613 	INIT_LIST_HEAD(&tr->marker_list);
9614 
9615 #ifdef CONFIG_MODULES
9616 	INIT_LIST_HEAD(&tr->mod_events);
9617 #endif
9618 
9619 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9620 		goto out_free_tr;
9621 
9622 	/* The ring buffer is defaultly expanded */
9623 	trace_set_ring_buffer_expanded(tr);
9624 
9625 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9626 		goto out_free_tr;
9627 
9628 	ftrace_init_trace_array(tr);
9629 
9630 	init_trace_flags_index(tr);
9631 
9632 	if (trace_instance_dir) {
9633 		ret = trace_array_create_dir(tr);
9634 		if (ret)
9635 			goto out_free_tr;
9636 	} else
9637 		__trace_early_add_events(tr);
9638 
9639 	list_add(&tr->list, &ftrace_trace_arrays);
9640 
9641 	tr->ref++;
9642 
9643 	return tr;
9644 
9645  out_free_tr:
9646 	ftrace_free_ftrace_ops(tr);
9647 	free_trace_buffers(tr);
9648 	free_cpumask_var(tr->pipe_cpumask);
9649 	free_cpumask_var(tr->tracing_cpumask);
9650 	kfree_const(tr->system_names);
9651 	kfree(tr->range_name);
9652 	kfree(tr->name);
9653 	kfree(tr);
9654 
9655 	return ERR_PTR(ret);
9656 }
9657 
trace_array_create(const char * name)9658 static struct trace_array *trace_array_create(const char *name)
9659 {
9660 	return trace_array_create_systems(name, NULL, 0, 0);
9661 }
9662 
instance_mkdir(const char * name)9663 static int instance_mkdir(const char *name)
9664 {
9665 	struct trace_array *tr;
9666 	int ret;
9667 
9668 	guard(mutex)(&event_mutex);
9669 	guard(mutex)(&trace_types_lock);
9670 
9671 	ret = -EEXIST;
9672 	if (trace_array_find(name))
9673 		return -EEXIST;
9674 
9675 	tr = trace_array_create(name);
9676 
9677 	ret = PTR_ERR_OR_ZERO(tr);
9678 
9679 	return ret;
9680 }
9681 
9682 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)9683 static u64 map_pages(unsigned long start, unsigned long size)
9684 {
9685 	unsigned long vmap_start, vmap_end;
9686 	struct vm_struct *area;
9687 	int ret;
9688 
9689 	area = get_vm_area(size, VM_IOREMAP);
9690 	if (!area)
9691 		return 0;
9692 
9693 	vmap_start = (unsigned long) area->addr;
9694 	vmap_end = vmap_start + size;
9695 
9696 	ret = vmap_page_range(vmap_start, vmap_end,
9697 			      start, pgprot_nx(PAGE_KERNEL));
9698 	if (ret < 0) {
9699 		free_vm_area(area);
9700 		return 0;
9701 	}
9702 
9703 	return (u64)vmap_start;
9704 }
9705 #else
map_pages(unsigned long start,unsigned long size)9706 static inline u64 map_pages(unsigned long start, unsigned long size)
9707 {
9708 	return 0;
9709 }
9710 #endif
9711 
9712 /**
9713  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9714  * @name: The name of the trace array to be looked up/created.
9715  * @systems: A list of systems to create event directories for (NULL for all)
9716  *
9717  * Returns pointer to trace array with given name.
9718  * NULL, if it cannot be created.
9719  *
9720  * NOTE: This function increments the reference counter associated with the
9721  * trace array returned. This makes sure it cannot be freed while in use.
9722  * Use trace_array_put() once the trace array is no longer needed.
9723  * If the trace_array is to be freed, trace_array_destroy() needs to
9724  * be called after the trace_array_put(), or simply let user space delete
9725  * it from the tracefs instances directory. But until the
9726  * trace_array_put() is called, user space can not delete it.
9727  *
9728  */
trace_array_get_by_name(const char * name,const char * systems)9729 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9730 {
9731 	struct trace_array *tr;
9732 
9733 	guard(mutex)(&event_mutex);
9734 	guard(mutex)(&trace_types_lock);
9735 
9736 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9737 		if (tr->name && strcmp(tr->name, name) == 0) {
9738 			tr->ref++;
9739 			return tr;
9740 		}
9741 	}
9742 
9743 	tr = trace_array_create_systems(name, systems, 0, 0);
9744 
9745 	if (IS_ERR(tr))
9746 		tr = NULL;
9747 	else
9748 		tr->ref++;
9749 
9750 	return tr;
9751 }
9752 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9753 
__remove_instance(struct trace_array * tr)9754 static int __remove_instance(struct trace_array *tr)
9755 {
9756 	int i;
9757 
9758 	/* Reference counter for a newly created trace array = 1. */
9759 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9760 		return -EBUSY;
9761 
9762 	list_del(&tr->list);
9763 
9764 	if (printk_trace == tr)
9765 		update_printk_trace(&global_trace);
9766 
9767 	/* Must be done before disabling all the flags */
9768 	if (update_marker_trace(tr, 0))
9769 		synchronize_rcu();
9770 
9771 	/* Disable all the flags that were enabled coming in */
9772 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9773 		if ((1ULL << i) & ZEROED_TRACE_FLAGS)
9774 			set_tracer_flag(tr, 1ULL << i, 0);
9775 	}
9776 
9777 	tracing_set_nop(tr);
9778 	clear_ftrace_function_probes(tr);
9779 	event_trace_del_tracer(tr);
9780 	ftrace_clear_pids(tr);
9781 	ftrace_destroy_function_files(tr);
9782 	tracefs_remove(tr->dir);
9783 	free_percpu(tr->last_func_repeats);
9784 	free_trace_buffers(tr);
9785 	clear_tracing_err_log(tr);
9786 	free_tracers(tr);
9787 
9788 	if (tr->range_name) {
9789 		reserve_mem_release_by_name(tr->range_name);
9790 		kfree(tr->range_name);
9791 	}
9792 	if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
9793 		vfree((void *)tr->range_addr_start);
9794 
9795 	for (i = 0; i < tr->nr_topts; i++) {
9796 		kfree(tr->topts[i].topts);
9797 	}
9798 	kfree(tr->topts);
9799 
9800 	free_cpumask_var(tr->pipe_cpumask);
9801 	free_cpumask_var(tr->tracing_cpumask);
9802 	kfree_const(tr->system_names);
9803 	kfree(tr->name);
9804 	kfree(tr);
9805 
9806 	return 0;
9807 }
9808 
trace_array_destroy(struct trace_array * this_tr)9809 int trace_array_destroy(struct trace_array *this_tr)
9810 {
9811 	struct trace_array *tr;
9812 
9813 	if (!this_tr)
9814 		return -EINVAL;
9815 
9816 	guard(mutex)(&event_mutex);
9817 	guard(mutex)(&trace_types_lock);
9818 
9819 
9820 	/* Making sure trace array exists before destroying it. */
9821 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9822 		if (tr == this_tr)
9823 			return __remove_instance(tr);
9824 	}
9825 
9826 	return -ENODEV;
9827 }
9828 EXPORT_SYMBOL_GPL(trace_array_destroy);
9829 
instance_rmdir(const char * name)9830 static int instance_rmdir(const char *name)
9831 {
9832 	struct trace_array *tr;
9833 
9834 	guard(mutex)(&event_mutex);
9835 	guard(mutex)(&trace_types_lock);
9836 
9837 	tr = trace_array_find(name);
9838 	if (!tr)
9839 		return -ENODEV;
9840 
9841 	return __remove_instance(tr);
9842 }
9843 
create_trace_instances(struct dentry * d_tracer)9844 static __init void create_trace_instances(struct dentry *d_tracer)
9845 {
9846 	struct trace_array *tr;
9847 
9848 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9849 							 instance_mkdir,
9850 							 instance_rmdir);
9851 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9852 		return;
9853 
9854 	guard(mutex)(&event_mutex);
9855 	guard(mutex)(&trace_types_lock);
9856 
9857 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9858 		if (!tr->name)
9859 			continue;
9860 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9861 			     "Failed to create instance directory\n"))
9862 			return;
9863 	}
9864 }
9865 
9866 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9867 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9868 {
9869 	int cpu;
9870 
9871 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9872 			tr, &show_traces_fops);
9873 
9874 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9875 			tr, &set_tracer_fops);
9876 
9877 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9878 			  tr, &tracing_cpumask_fops);
9879 
9880 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9881 			  tr, &tracing_iter_fops);
9882 
9883 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9884 			  tr, &tracing_fops);
9885 
9886 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9887 			  tr, &tracing_pipe_fops);
9888 
9889 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9890 			  tr, &tracing_entries_fops);
9891 
9892 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9893 			  tr, &tracing_total_entries_fops);
9894 
9895 	trace_create_file("free_buffer", 0200, d_tracer,
9896 			  tr, &tracing_free_buffer_fops);
9897 
9898 	trace_create_file("trace_marker", 0220, d_tracer,
9899 			  tr, &tracing_mark_fops);
9900 
9901 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9902 
9903 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9904 			  tr, &tracing_mark_raw_fops);
9905 
9906 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9907 			  &trace_clock_fops);
9908 
9909 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9910 			  tr, &rb_simple_fops);
9911 
9912 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9913 			  &trace_time_stamp_mode_fops);
9914 
9915 	tr->buffer_percent = 50;
9916 
9917 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9918 			tr, &buffer_percent_fops);
9919 
9920 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9921 			  tr, &buffer_subbuf_size_fops);
9922 
9923 	trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
9924 			 tr, &tracing_syscall_buf_fops);
9925 
9926 	create_trace_options_dir(tr);
9927 
9928 	trace_create_maxlat_file(tr, d_tracer);
9929 
9930 	if (ftrace_create_function_files(tr, d_tracer))
9931 		MEM_FAIL(1, "Could not allocate function filter files");
9932 
9933 	if (tr->range_addr_start) {
9934 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9935 				  tr, &last_boot_fops);
9936 #ifdef CONFIG_TRACER_SNAPSHOT
9937 	} else {
9938 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9939 				  tr, &snapshot_fops);
9940 #endif
9941 	}
9942 
9943 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9944 			  tr, &tracing_err_log_fops);
9945 
9946 	for_each_tracing_cpu(cpu)
9947 		tracing_init_tracefs_percpu(tr, cpu);
9948 
9949 	ftrace_init_tracefs(tr, d_tracer);
9950 }
9951 
9952 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)9953 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9954 {
9955 	struct vfsmount *mnt;
9956 	struct file_system_type *type;
9957 	struct fs_context *fc;
9958 	int ret;
9959 
9960 	/*
9961 	 * To maintain backward compatibility for tools that mount
9962 	 * debugfs to get to the tracing facility, tracefs is automatically
9963 	 * mounted to the debugfs/tracing directory.
9964 	 */
9965 	type = get_fs_type("tracefs");
9966 	if (!type)
9967 		return NULL;
9968 
9969 	fc = fs_context_for_submount(type, mntpt);
9970 	put_filesystem(type);
9971 	if (IS_ERR(fc))
9972 		return ERR_CAST(fc);
9973 
9974 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
9975 
9976 	ret = vfs_parse_fs_string(fc, "source", "tracefs");
9977 	if (!ret)
9978 		mnt = fc_mount(fc);
9979 	else
9980 		mnt = ERR_PTR(ret);
9981 
9982 	put_fs_context(fc);
9983 	return mnt;
9984 }
9985 #endif
9986 
9987 /**
9988  * tracing_init_dentry - initialize top level trace array
9989  *
9990  * This is called when creating files or directories in the tracing
9991  * directory. It is called via fs_initcall() by any of the boot up code
9992  * and expects to return the dentry of the top level tracing directory.
9993  */
tracing_init_dentry(void)9994 int tracing_init_dentry(void)
9995 {
9996 	struct trace_array *tr = &global_trace;
9997 
9998 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9999 		pr_warn("Tracing disabled due to lockdown\n");
10000 		return -EPERM;
10001 	}
10002 
10003 	/* The top level trace array uses  NULL as parent */
10004 	if (tr->dir)
10005 		return 0;
10006 
10007 	if (WARN_ON(!tracefs_initialized()))
10008 		return -ENODEV;
10009 
10010 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10011 	/*
10012 	 * As there may still be users that expect the tracing
10013 	 * files to exist in debugfs/tracing, we must automount
10014 	 * the tracefs file system there, so older tools still
10015 	 * work with the newer kernel.
10016 	 */
10017 	tr->dir = debugfs_create_automount("tracing", NULL,
10018 					   trace_automount, NULL);
10019 #endif
10020 
10021 	return 0;
10022 }
10023 
10024 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10025 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10026 
10027 struct workqueue_struct *trace_init_wq __initdata;
10028 static struct work_struct eval_map_work __initdata;
10029 static struct work_struct tracerfs_init_work __initdata;
10030 
eval_map_work_func(struct work_struct * work)10031 static void __init eval_map_work_func(struct work_struct *work)
10032 {
10033 	int len;
10034 
10035 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10036 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10037 }
10038 
trace_eval_init(void)10039 static int __init trace_eval_init(void)
10040 {
10041 	INIT_WORK(&eval_map_work, eval_map_work_func);
10042 
10043 	trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0);
10044 	if (!trace_init_wq) {
10045 		pr_err("Unable to allocate trace_init_wq\n");
10046 		/* Do work here */
10047 		eval_map_work_func(&eval_map_work);
10048 		return -ENOMEM;
10049 	}
10050 
10051 	queue_work(trace_init_wq, &eval_map_work);
10052 	return 0;
10053 }
10054 
10055 subsys_initcall(trace_eval_init);
10056 
trace_eval_sync(void)10057 static int __init trace_eval_sync(void)
10058 {
10059 	/* Make sure the eval map updates are finished */
10060 	if (trace_init_wq)
10061 		destroy_workqueue(trace_init_wq);
10062 	return 0;
10063 }
10064 
10065 late_initcall_sync(trace_eval_sync);
10066 
10067 
10068 #ifdef CONFIG_MODULES
10069 
module_exists(const char * module)10070 bool module_exists(const char *module)
10071 {
10072 	/* All modules have the symbol __this_module */
10073 	static const char this_mod[] = "__this_module";
10074 	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10075 	unsigned long val;
10076 	int n;
10077 
10078 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10079 
10080 	if (n > sizeof(modname) - 1)
10081 		return false;
10082 
10083 	val = module_kallsyms_lookup_name(modname);
10084 	return val != 0;
10085 }
10086 
trace_module_add_evals(struct module * mod)10087 static void trace_module_add_evals(struct module *mod)
10088 {
10089 	/*
10090 	 * Modules with bad taint do not have events created, do
10091 	 * not bother with enums either.
10092 	 */
10093 	if (trace_module_has_bad_taint(mod))
10094 		return;
10095 
10096 	/* Even if no trace_evals, this need to sanitize field types. */
10097 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10098 }
10099 
10100 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10101 static void trace_module_remove_evals(struct module *mod)
10102 {
10103 	union trace_eval_map_item *map;
10104 	union trace_eval_map_item **last = &trace_eval_maps;
10105 
10106 	if (!mod->num_trace_evals)
10107 		return;
10108 
10109 	guard(mutex)(&trace_eval_mutex);
10110 
10111 	map = trace_eval_maps;
10112 
10113 	while (map) {
10114 		if (map->head.mod == mod)
10115 			break;
10116 		map = trace_eval_jmp_to_tail(map);
10117 		last = &map->tail.next;
10118 		map = map->tail.next;
10119 	}
10120 	if (!map)
10121 		return;
10122 
10123 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10124 	kfree(map);
10125 }
10126 #else
trace_module_remove_evals(struct module * mod)10127 static inline void trace_module_remove_evals(struct module *mod) { }
10128 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10129 
trace_module_record(struct module * mod,bool add)10130 static void trace_module_record(struct module *mod, bool add)
10131 {
10132 	struct trace_array *tr;
10133 	unsigned long flags;
10134 
10135 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10136 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10137 		/* Update any persistent trace array that has already been started */
10138 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10139 			guard(mutex)(&scratch_mutex);
10140 			save_mod(mod, tr);
10141 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10142 			/* Update delta if the module loaded in previous boot */
10143 			make_mod_delta(mod, tr);
10144 		}
10145 	}
10146 }
10147 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10148 static int trace_module_notify(struct notifier_block *self,
10149 			       unsigned long val, void *data)
10150 {
10151 	struct module *mod = data;
10152 
10153 	switch (val) {
10154 	case MODULE_STATE_COMING:
10155 		trace_module_add_evals(mod);
10156 		trace_module_record(mod, true);
10157 		break;
10158 	case MODULE_STATE_GOING:
10159 		trace_module_remove_evals(mod);
10160 		trace_module_record(mod, false);
10161 		break;
10162 	}
10163 
10164 	return NOTIFY_OK;
10165 }
10166 
10167 static struct notifier_block trace_module_nb = {
10168 	.notifier_call = trace_module_notify,
10169 	.priority = 0,
10170 };
10171 #endif /* CONFIG_MODULES */
10172 
tracer_init_tracefs_work_func(struct work_struct * work)10173 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10174 {
10175 
10176 	event_trace_init();
10177 
10178 	init_tracer_tracefs(&global_trace, NULL);
10179 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10180 
10181 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10182 			&global_trace, &tracing_thresh_fops);
10183 
10184 	trace_create_file("README", TRACE_MODE_READ, NULL,
10185 			NULL, &tracing_readme_fops);
10186 
10187 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10188 			NULL, &tracing_saved_cmdlines_fops);
10189 
10190 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10191 			  NULL, &tracing_saved_cmdlines_size_fops);
10192 
10193 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10194 			NULL, &tracing_saved_tgids_fops);
10195 
10196 	trace_create_eval_file(NULL);
10197 
10198 #ifdef CONFIG_MODULES
10199 	register_module_notifier(&trace_module_nb);
10200 #endif
10201 
10202 #ifdef CONFIG_DYNAMIC_FTRACE
10203 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10204 			NULL, &tracing_dyn_info_fops);
10205 #endif
10206 
10207 	create_trace_instances(NULL);
10208 
10209 	update_tracer_options();
10210 }
10211 
tracer_init_tracefs(void)10212 static __init int tracer_init_tracefs(void)
10213 {
10214 	int ret;
10215 
10216 	trace_access_lock_init();
10217 
10218 	ret = tracing_init_dentry();
10219 	if (ret)
10220 		return 0;
10221 
10222 	if (trace_init_wq) {
10223 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10224 		queue_work(trace_init_wq, &tracerfs_init_work);
10225 	} else {
10226 		tracer_init_tracefs_work_func(NULL);
10227 	}
10228 
10229 	if (rv_init_interface())
10230 		pr_err("RV: Error while creating the RV interface\n");
10231 
10232 	return 0;
10233 }
10234 
10235 fs_initcall(tracer_init_tracefs);
10236 
10237 static int trace_die_panic_handler(struct notifier_block *self,
10238 				unsigned long ev, void *unused);
10239 
10240 static struct notifier_block trace_panic_notifier = {
10241 	.notifier_call = trace_die_panic_handler,
10242 	.priority = INT_MAX - 1,
10243 };
10244 
10245 static struct notifier_block trace_die_notifier = {
10246 	.notifier_call = trace_die_panic_handler,
10247 	.priority = INT_MAX - 1,
10248 };
10249 
10250 /*
10251  * The idea is to execute the following die/panic callback early, in order
10252  * to avoid showing irrelevant information in the trace (like other panic
10253  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10254  * warnings get disabled (to prevent potential log flooding).
10255  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10256 static int trace_die_panic_handler(struct notifier_block *self,
10257 				unsigned long ev, void *unused)
10258 {
10259 	if (!ftrace_dump_on_oops_enabled())
10260 		return NOTIFY_DONE;
10261 
10262 	/* The die notifier requires DIE_OOPS to trigger */
10263 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10264 		return NOTIFY_DONE;
10265 
10266 	ftrace_dump(DUMP_PARAM);
10267 
10268 	return NOTIFY_DONE;
10269 }
10270 
10271 /*
10272  * printk is set to max of 1024, we really don't need it that big.
10273  * Nothing should be printing 1000 characters anyway.
10274  */
10275 #define TRACE_MAX_PRINT		1000
10276 
10277 /*
10278  * Define here KERN_TRACE so that we have one place to modify
10279  * it if we decide to change what log level the ftrace dump
10280  * should be at.
10281  */
10282 #define KERN_TRACE		KERN_EMERG
10283 
10284 void
trace_printk_seq(struct trace_seq * s)10285 trace_printk_seq(struct trace_seq *s)
10286 {
10287 	/* Probably should print a warning here. */
10288 	if (s->seq.len >= TRACE_MAX_PRINT)
10289 		s->seq.len = TRACE_MAX_PRINT;
10290 
10291 	/*
10292 	 * More paranoid code. Although the buffer size is set to
10293 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10294 	 * an extra layer of protection.
10295 	 */
10296 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10297 		s->seq.len = s->seq.size - 1;
10298 
10299 	/* should be zero ended, but we are paranoid. */
10300 	s->buffer[s->seq.len] = 0;
10301 
10302 	printk(KERN_TRACE "%s", s->buffer);
10303 
10304 	trace_seq_init(s);
10305 }
10306 
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10307 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10308 {
10309 	iter->tr = tr;
10310 	iter->trace = iter->tr->current_trace;
10311 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10312 	iter->array_buffer = &tr->array_buffer;
10313 
10314 	if (iter->trace && iter->trace->open)
10315 		iter->trace->open(iter);
10316 
10317 	/* Annotate start of buffers if we had overruns */
10318 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10319 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10320 
10321 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10322 	if (trace_clocks[iter->tr->clock_id].in_ns)
10323 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10324 
10325 	/* Can not use kmalloc for iter.temp and iter.fmt */
10326 	iter->temp = static_temp_buf;
10327 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10328 	iter->fmt = static_fmt_buf;
10329 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10330 }
10331 
trace_init_global_iter(struct trace_iterator * iter)10332 void trace_init_global_iter(struct trace_iterator *iter)
10333 {
10334 	trace_init_iter(iter, &global_trace);
10335 }
10336 
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10337 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10338 {
10339 	/* use static because iter can be a bit big for the stack */
10340 	static struct trace_iterator iter;
10341 	unsigned int old_userobj;
10342 	unsigned long flags;
10343 	int cnt = 0;
10344 
10345 	/*
10346 	 * Always turn off tracing when we dump.
10347 	 * We don't need to show trace output of what happens
10348 	 * between multiple crashes.
10349 	 *
10350 	 * If the user does a sysrq-z, then they can re-enable
10351 	 * tracing with echo 1 > tracing_on.
10352 	 */
10353 	tracer_tracing_off(tr);
10354 
10355 	local_irq_save(flags);
10356 
10357 	/* Simulate the iterator */
10358 	trace_init_iter(&iter, tr);
10359 
10360 	/* While dumping, do not allow the buffer to be enable */
10361 	tracer_tracing_disable(tr);
10362 
10363 	old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
10364 
10365 	/* don't look at user memory in panic mode */
10366 	tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
10367 
10368 	if (dump_mode == DUMP_ORIG)
10369 		iter.cpu_file = raw_smp_processor_id();
10370 	else
10371 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10372 
10373 	if (tr == &global_trace)
10374 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10375 	else
10376 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10377 
10378 	/* Did function tracer already get disabled? */
10379 	if (ftrace_is_dead()) {
10380 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10381 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10382 	}
10383 
10384 	/*
10385 	 * We need to stop all tracing on all CPUS to read
10386 	 * the next buffer. This is a bit expensive, but is
10387 	 * not done often. We fill all what we can read,
10388 	 * and then release the locks again.
10389 	 */
10390 
10391 	while (!trace_empty(&iter)) {
10392 
10393 		if (!cnt)
10394 			printk(KERN_TRACE "---------------------------------\n");
10395 
10396 		cnt++;
10397 
10398 		trace_iterator_reset(&iter);
10399 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10400 
10401 		if (trace_find_next_entry_inc(&iter) != NULL) {
10402 			int ret;
10403 
10404 			ret = print_trace_line(&iter);
10405 			if (ret != TRACE_TYPE_NO_CONSUME)
10406 				trace_consume(&iter);
10407 
10408 			trace_printk_seq(&iter.seq);
10409 		}
10410 		touch_nmi_watchdog();
10411 	}
10412 
10413 	if (!cnt)
10414 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10415 	else
10416 		printk(KERN_TRACE "---------------------------------\n");
10417 
10418 	tr->trace_flags |= old_userobj;
10419 
10420 	tracer_tracing_enable(tr);
10421 	local_irq_restore(flags);
10422 }
10423 
ftrace_dump_by_param(void)10424 static void ftrace_dump_by_param(void)
10425 {
10426 	bool first_param = true;
10427 	char dump_param[MAX_TRACER_SIZE];
10428 	char *buf, *token, *inst_name;
10429 	struct trace_array *tr;
10430 
10431 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10432 	buf = dump_param;
10433 
10434 	while ((token = strsep(&buf, ",")) != NULL) {
10435 		if (first_param) {
10436 			first_param = false;
10437 			if (!strcmp("0", token))
10438 				continue;
10439 			else if (!strcmp("1", token)) {
10440 				ftrace_dump_one(&global_trace, DUMP_ALL);
10441 				continue;
10442 			}
10443 			else if (!strcmp("2", token) ||
10444 			  !strcmp("orig_cpu", token)) {
10445 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10446 				continue;
10447 			}
10448 		}
10449 
10450 		inst_name = strsep(&token, "=");
10451 		tr = trace_array_find(inst_name);
10452 		if (!tr) {
10453 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10454 			continue;
10455 		}
10456 
10457 		if (token && (!strcmp("2", token) ||
10458 			  !strcmp("orig_cpu", token)))
10459 			ftrace_dump_one(tr, DUMP_ORIG);
10460 		else
10461 			ftrace_dump_one(tr, DUMP_ALL);
10462 	}
10463 }
10464 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10465 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10466 {
10467 	static atomic_t dump_running;
10468 
10469 	/* Only allow one dump user at a time. */
10470 	if (atomic_inc_return(&dump_running) != 1) {
10471 		atomic_dec(&dump_running);
10472 		return;
10473 	}
10474 
10475 	switch (oops_dump_mode) {
10476 	case DUMP_ALL:
10477 		ftrace_dump_one(&global_trace, DUMP_ALL);
10478 		break;
10479 	case DUMP_ORIG:
10480 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10481 		break;
10482 	case DUMP_PARAM:
10483 		ftrace_dump_by_param();
10484 		break;
10485 	case DUMP_NONE:
10486 		break;
10487 	default:
10488 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10489 		ftrace_dump_one(&global_trace, DUMP_ALL);
10490 	}
10491 
10492 	atomic_dec(&dump_running);
10493 }
10494 EXPORT_SYMBOL_GPL(ftrace_dump);
10495 
10496 #define WRITE_BUFSIZE  4096
10497 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10498 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10499 				size_t count, loff_t *ppos,
10500 				int (*createfn)(const char *))
10501 {
10502 	char *kbuf __free(kfree) = NULL;
10503 	char *buf, *tmp;
10504 	int ret = 0;
10505 	size_t done = 0;
10506 	size_t size;
10507 
10508 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10509 	if (!kbuf)
10510 		return -ENOMEM;
10511 
10512 	while (done < count) {
10513 		size = count - done;
10514 
10515 		if (size >= WRITE_BUFSIZE)
10516 			size = WRITE_BUFSIZE - 1;
10517 
10518 		if (copy_from_user(kbuf, buffer + done, size))
10519 			return -EFAULT;
10520 
10521 		kbuf[size] = '\0';
10522 		buf = kbuf;
10523 		do {
10524 			tmp = strchr(buf, '\n');
10525 			if (tmp) {
10526 				*tmp = '\0';
10527 				size = tmp - buf + 1;
10528 			} else {
10529 				size = strlen(buf);
10530 				if (done + size < count) {
10531 					if (buf != kbuf)
10532 						break;
10533 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10534 					pr_warn("Line length is too long: Should be less than %d\n",
10535 						WRITE_BUFSIZE - 2);
10536 					return -EINVAL;
10537 				}
10538 			}
10539 			done += size;
10540 
10541 			/* Remove comments */
10542 			tmp = strchr(buf, '#');
10543 
10544 			if (tmp)
10545 				*tmp = '\0';
10546 
10547 			ret = createfn(buf);
10548 			if (ret)
10549 				return ret;
10550 			buf += size;
10551 
10552 		} while (done < count);
10553 	}
10554 	return done;
10555 }
10556 
10557 #ifdef CONFIG_TRACER_SNAPSHOT
tr_needs_alloc_snapshot(const char * name)10558 __init static bool tr_needs_alloc_snapshot(const char *name)
10559 {
10560 	char *test;
10561 	int len = strlen(name);
10562 	bool ret;
10563 
10564 	if (!boot_snapshot_index)
10565 		return false;
10566 
10567 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10568 	    boot_snapshot_info[len] == '\t')
10569 		return true;
10570 
10571 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10572 	if (!test)
10573 		return false;
10574 
10575 	sprintf(test, "\t%s\t", name);
10576 	ret = strstr(boot_snapshot_info, test) == NULL;
10577 	kfree(test);
10578 	return ret;
10579 }
10580 
do_allocate_snapshot(const char * name)10581 __init static void do_allocate_snapshot(const char *name)
10582 {
10583 	if (!tr_needs_alloc_snapshot(name))
10584 		return;
10585 
10586 	/*
10587 	 * When allocate_snapshot is set, the next call to
10588 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10589 	 * will allocate the snapshot buffer. That will also clear
10590 	 * this flag.
10591 	 */
10592 	allocate_snapshot = true;
10593 }
10594 #else
do_allocate_snapshot(const char * name)10595 static inline void do_allocate_snapshot(const char *name) { }
10596 #endif
10597 
backup_instance_area(const char * backup,unsigned long * addr,phys_addr_t * size)10598 __init static int backup_instance_area(const char *backup,
10599 				       unsigned long *addr, phys_addr_t *size)
10600 {
10601 	struct trace_array *backup_tr;
10602 	void *allocated_vaddr = NULL;
10603 
10604 	backup_tr = trace_array_get_by_name(backup, NULL);
10605 	if (!backup_tr) {
10606 		pr_warn("Tracing: Instance %s is not found.\n", backup);
10607 		return -ENOENT;
10608 	}
10609 
10610 	if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
10611 		pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
10612 		trace_array_put(backup_tr);
10613 		return -EINVAL;
10614 	}
10615 
10616 	*size = backup_tr->range_addr_size;
10617 
10618 	allocated_vaddr = vzalloc(*size);
10619 	if (!allocated_vaddr) {
10620 		pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
10621 			backup, (unsigned long)*size);
10622 		trace_array_put(backup_tr);
10623 		return -ENOMEM;
10624 	}
10625 
10626 	memcpy(allocated_vaddr,
10627 		(void *)backup_tr->range_addr_start, (size_t)*size);
10628 	*addr = (unsigned long)allocated_vaddr;
10629 
10630 	trace_array_put(backup_tr);
10631 	return 0;
10632 }
10633 
enable_instances(void)10634 __init static void enable_instances(void)
10635 {
10636 	struct trace_array *tr;
10637 	bool memmap_area = false;
10638 	char *curr_str;
10639 	char *name;
10640 	char *str;
10641 	char *tok;
10642 
10643 	/* A tab is always appended */
10644 	boot_instance_info[boot_instance_index - 1] = '\0';
10645 	str = boot_instance_info;
10646 
10647 	while ((curr_str = strsep(&str, "\t"))) {
10648 		phys_addr_t start = 0;
10649 		phys_addr_t size = 0;
10650 		unsigned long addr = 0;
10651 		bool traceprintk = false;
10652 		bool traceoff = false;
10653 		char *flag_delim;
10654 		char *addr_delim;
10655 		char *rname __free(kfree) = NULL;
10656 		char *backup;
10657 
10658 		tok = strsep(&curr_str, ",");
10659 
10660 		name = strsep(&tok, "=");
10661 		backup = tok;
10662 
10663 		flag_delim = strchr(name, '^');
10664 		addr_delim = strchr(name, '@');
10665 
10666 		if (addr_delim)
10667 			*addr_delim++ = '\0';
10668 
10669 		if (flag_delim)
10670 			*flag_delim++ = '\0';
10671 
10672 		if (backup) {
10673 			if (backup_instance_area(backup, &addr, &size) < 0)
10674 				continue;
10675 		}
10676 
10677 		if (flag_delim) {
10678 			char *flag;
10679 
10680 			while ((flag = strsep(&flag_delim, "^"))) {
10681 				if (strcmp(flag, "traceoff") == 0) {
10682 					traceoff = true;
10683 				} else if ((strcmp(flag, "printk") == 0) ||
10684 					   (strcmp(flag, "traceprintk") == 0) ||
10685 					   (strcmp(flag, "trace_printk") == 0)) {
10686 					traceprintk = true;
10687 				} else {
10688 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10689 						flag, name);
10690 				}
10691 			}
10692 		}
10693 
10694 		tok = addr_delim;
10695 		if (tok && isdigit(*tok)) {
10696 			start = memparse(tok, &tok);
10697 			if (!start) {
10698 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10699 					name);
10700 				continue;
10701 			}
10702 			if (*tok != ':') {
10703 				pr_warn("Tracing: No size specified for instance %s\n", name);
10704 				continue;
10705 			}
10706 			tok++;
10707 			size = memparse(tok, &tok);
10708 			if (!size) {
10709 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10710 					name);
10711 				continue;
10712 			}
10713 			memmap_area = true;
10714 		} else if (tok) {
10715 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10716 				start = 0;
10717 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10718 				continue;
10719 			}
10720 			rname = kstrdup(tok, GFP_KERNEL);
10721 		}
10722 
10723 		if (start) {
10724 			/* Start and size must be page aligned */
10725 			if (start & ~PAGE_MASK) {
10726 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10727 				continue;
10728 			}
10729 			if (size & ~PAGE_MASK) {
10730 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10731 				continue;
10732 			}
10733 
10734 			if (memmap_area)
10735 				addr = map_pages(start, size);
10736 			else
10737 				addr = (unsigned long)phys_to_virt(start);
10738 			if (addr) {
10739 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10740 					name, &start, (unsigned long)size);
10741 			} else {
10742 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10743 				continue;
10744 			}
10745 		} else {
10746 			/* Only non mapped buffers have snapshot buffers */
10747 			if (IS_ENABLED(CONFIG_TRACER_SNAPSHOT))
10748 				do_allocate_snapshot(name);
10749 		}
10750 
10751 		tr = trace_array_create_systems(name, NULL, addr, size);
10752 		if (IS_ERR(tr)) {
10753 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10754 			continue;
10755 		}
10756 
10757 		if (traceoff)
10758 			tracer_tracing_off(tr);
10759 
10760 		if (traceprintk)
10761 			update_printk_trace(tr);
10762 
10763 		/*
10764 		 * memmap'd buffers can not be freed.
10765 		 */
10766 		if (memmap_area) {
10767 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
10768 			tr->ref++;
10769 		}
10770 
10771 		/*
10772 		 * Backup buffers can be freed but need vfree().
10773 		 */
10774 		if (backup)
10775 			tr->flags |= TRACE_ARRAY_FL_VMALLOC;
10776 
10777 		if (start || backup) {
10778 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10779 			tr->range_name = no_free_ptr(rname);
10780 		}
10781 
10782 		while ((tok = strsep(&curr_str, ","))) {
10783 			early_enable_events(tr, tok, true);
10784 		}
10785 	}
10786 }
10787 
tracer_alloc_buffers(void)10788 __init static int tracer_alloc_buffers(void)
10789 {
10790 	unsigned long ring_buf_size;
10791 	int ret = -ENOMEM;
10792 
10793 
10794 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10795 		pr_warn("Tracing disabled due to lockdown\n");
10796 		return -EPERM;
10797 	}
10798 
10799 	/*
10800 	 * Make sure we don't accidentally add more trace options
10801 	 * than we have bits for.
10802 	 */
10803 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10804 
10805 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10806 		return -ENOMEM;
10807 
10808 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10809 		goto out_free_buffer_mask;
10810 
10811 	/* Only allocate trace_printk buffers if a trace_printk exists */
10812 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10813 		/* Must be called before global_trace.buffer is allocated */
10814 		trace_printk_init_buffers();
10815 
10816 	/* To save memory, keep the ring buffer size to its minimum */
10817 	if (global_trace.ring_buffer_expanded)
10818 		ring_buf_size = trace_buf_size;
10819 	else
10820 		ring_buf_size = 1;
10821 
10822 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10823 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10824 
10825 	raw_spin_lock_init(&global_trace.start_lock);
10826 
10827 	/*
10828 	 * The prepare callbacks allocates some memory for the ring buffer. We
10829 	 * don't free the buffer if the CPU goes down. If we were to free
10830 	 * the buffer, then the user would lose any trace that was in the
10831 	 * buffer. The memory will be removed once the "instance" is removed.
10832 	 */
10833 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10834 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10835 				      NULL);
10836 	if (ret < 0)
10837 		goto out_free_cpumask;
10838 	/* Used for event triggers */
10839 	ret = -ENOMEM;
10840 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10841 	if (!temp_buffer)
10842 		goto out_rm_hp_state;
10843 
10844 	if (trace_create_savedcmd() < 0)
10845 		goto out_free_temp_buffer;
10846 
10847 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10848 		goto out_free_savedcmd;
10849 
10850 	/* TODO: make the number of buffers hot pluggable with CPUS */
10851 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10852 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10853 		goto out_free_pipe_cpumask;
10854 	}
10855 	if (global_trace.buffer_disabled)
10856 		tracing_off();
10857 
10858 	if (trace_boot_clock) {
10859 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10860 		if (ret < 0)
10861 			pr_warn("Trace clock %s not defined, going back to default\n",
10862 				trace_boot_clock);
10863 	}
10864 
10865 	/*
10866 	 * register_tracer() might reference current_trace, so it
10867 	 * needs to be set before we register anything. This is
10868 	 * just a bootstrap of current_trace anyway.
10869 	 */
10870 	global_trace.current_trace = &nop_trace;
10871 	global_trace.current_trace_flags = nop_trace.flags;
10872 
10873 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10874 #ifdef CONFIG_TRACER_SNAPSHOT
10875 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10876 #endif
10877 	ftrace_init_global_array_ops(&global_trace);
10878 
10879 #ifdef CONFIG_MODULES
10880 	INIT_LIST_HEAD(&global_trace.mod_events);
10881 #endif
10882 
10883 	init_trace_flags_index(&global_trace);
10884 
10885 	INIT_LIST_HEAD(&global_trace.tracers);
10886 
10887 	/* All seems OK, enable tracing */
10888 	tracing_disabled = 0;
10889 
10890 	atomic_notifier_chain_register(&panic_notifier_list,
10891 				       &trace_panic_notifier);
10892 
10893 	register_die_notifier(&trace_die_notifier);
10894 
10895 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10896 
10897 	global_trace.syscall_buf_sz = syscall_buf_size;
10898 
10899 	INIT_LIST_HEAD(&global_trace.systems);
10900 	INIT_LIST_HEAD(&global_trace.events);
10901 	INIT_LIST_HEAD(&global_trace.hist_vars);
10902 	INIT_LIST_HEAD(&global_trace.err_log);
10903 	list_add(&global_trace.marker_list, &marker_copies);
10904 	list_add(&global_trace.list, &ftrace_trace_arrays);
10905 
10906 	register_tracer(&nop_trace);
10907 
10908 	/* Function tracing may start here (via kernel command line) */
10909 	init_function_trace();
10910 
10911 	apply_trace_boot_options();
10912 
10913 	register_snapshot_cmd();
10914 
10915 	return 0;
10916 
10917 out_free_pipe_cpumask:
10918 	free_cpumask_var(global_trace.pipe_cpumask);
10919 out_free_savedcmd:
10920 	trace_free_saved_cmdlines_buffer();
10921 out_free_temp_buffer:
10922 	ring_buffer_free(temp_buffer);
10923 out_rm_hp_state:
10924 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10925 out_free_cpumask:
10926 	free_cpumask_var(global_trace.tracing_cpumask);
10927 out_free_buffer_mask:
10928 	free_cpumask_var(tracing_buffer_mask);
10929 	return ret;
10930 }
10931 
10932 #ifdef CONFIG_FUNCTION_TRACER
10933 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)10934 struct trace_array *trace_get_global_array(void)
10935 {
10936 	return &global_trace;
10937 }
10938 #endif
10939 
ftrace_boot_snapshot(void)10940 void __init ftrace_boot_snapshot(void)
10941 {
10942 #ifdef CONFIG_TRACER_SNAPSHOT
10943 	struct trace_array *tr;
10944 
10945 	if (!snapshot_at_boot)
10946 		return;
10947 
10948 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10949 		if (!tr->allocated_snapshot)
10950 			continue;
10951 
10952 		tracing_snapshot_instance(tr);
10953 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10954 	}
10955 #endif
10956 }
10957 
early_trace_init(void)10958 void __init early_trace_init(void)
10959 {
10960 	if (tracepoint_printk) {
10961 		tracepoint_print_iter = kzalloc_obj(*tracepoint_print_iter);
10962 		if (MEM_FAIL(!tracepoint_print_iter,
10963 			     "Failed to allocate trace iterator\n"))
10964 			tracepoint_printk = 0;
10965 		else
10966 			static_key_enable(&tracepoint_printk_key.key);
10967 	}
10968 	tracer_alloc_buffers();
10969 
10970 	init_events();
10971 }
10972 
trace_init(void)10973 void __init trace_init(void)
10974 {
10975 	trace_event_init();
10976 
10977 	if (boot_instance_index)
10978 		enable_instances();
10979 }
10980 
clear_boot_tracer(void)10981 __init static void clear_boot_tracer(void)
10982 {
10983 	/*
10984 	 * The default tracer at boot buffer is an init section.
10985 	 * This function is called in lateinit. If we did not
10986 	 * find the boot tracer, then clear it out, to prevent
10987 	 * later registration from accessing the buffer that is
10988 	 * about to be freed.
10989 	 */
10990 	if (!default_bootup_tracer)
10991 		return;
10992 
10993 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10994 	       default_bootup_tracer);
10995 	default_bootup_tracer = NULL;
10996 }
10997 
10998 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10999 __init static void tracing_set_default_clock(void)
11000 {
11001 	/* sched_clock_stable() is determined in late_initcall */
11002 	if (!trace_boot_clock && !sched_clock_stable()) {
11003 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11004 			pr_warn("Can not set tracing clock due to lockdown\n");
11005 			return;
11006 		}
11007 
11008 		printk(KERN_WARNING
11009 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11010 		       "If you want to keep using the local clock, then add:\n"
11011 		       "  \"trace_clock=local\"\n"
11012 		       "on the kernel command line\n");
11013 		tracing_set_clock(&global_trace, "global");
11014 	}
11015 }
11016 #else
tracing_set_default_clock(void)11017 static inline void tracing_set_default_clock(void) { }
11018 #endif
11019 
late_trace_init(void)11020 __init static int late_trace_init(void)
11021 {
11022 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11023 		static_key_disable(&tracepoint_printk_key.key);
11024 		tracepoint_printk = 0;
11025 	}
11026 
11027 	if (traceoff_after_boot)
11028 		tracing_off();
11029 
11030 	tracing_set_default_clock();
11031 	clear_boot_tracer();
11032 	return 0;
11033 }
11034 
11035 late_initcall_sync(late_trace_init);
11036