xref: /linux/kernel/trace/trace.c (revision 09670b8c38b37bc2d6fc5d01fa7e02c38f7adf36)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <linux/sort.h>
54 #include <linux/io.h> /* vmap_page_range() */
55 #include <linux/fs_context.h>
56 
57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
58 
59 #include "trace.h"
60 #include "trace_output.h"
61 
62 #ifdef CONFIG_FTRACE_STARTUP_TEST
63 /*
64  * We need to change this state when a selftest is running.
65  * A selftest will lurk into the ring-buffer to count the
66  * entries inserted during the selftest although some concurrent
67  * insertions into the ring-buffer such as trace_printk could occurred
68  * at the same time, giving false positive or negative results.
69  */
70 static bool __read_mostly tracing_selftest_running;
71 
72 /*
73  * If boot-time tracing including tracers/events via kernel cmdline
74  * is running, we do not want to run SELFTEST.
75  */
76 bool __read_mostly tracing_selftest_disabled;
77 
disable_tracing_selftest(const char * reason)78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #else
86 #define tracing_selftest_running	0
87 #define tracing_selftest_disabled	0
88 #endif
89 
90 /* Pipe tracepoints to printk */
91 static struct trace_iterator *tracepoint_print_iter;
92 int tracepoint_printk;
93 static bool tracepoint_printk_stop_on_boot __initdata;
94 static bool traceoff_after_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96 
97 /* Store tracers and their flags per instance */
98 struct tracers {
99 	struct list_head	list;
100 	struct tracer		*tracer;
101 	struct tracer_flags	*flags;
102 };
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 #define MAX_TRACER_SIZE		100
122 /*
123  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
124  *
125  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
126  * is set, then ftrace_dump is called. This will output the contents
127  * of the ftrace buffers to the console.  This is very useful for
128  * capturing traces that lead to crashes and outputting it to a
129  * serial console.
130  *
131  * It is default off, but you can enable it with either specifying
132  * "ftrace_dump_on_oops" in the kernel command line, or setting
133  * /proc/sys/kernel/ftrace_dump_on_oops
134  * Set 1 if you want to dump buffers of all CPUs
135  * Set 2 if you want to dump the buffer of the CPU that triggered oops
136  * Set instance name if you want to dump the specific trace instance
137  * Multiple instance dump is also supported, and instances are separated
138  * by commas.
139  */
140 /* Set to string format zero to disable by default */
141 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
142 
143 /* When set, tracing will stop when a WARN*() is hit */
144 static int __disable_trace_on_warning;
145 
146 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
147 			     void *buffer, size_t *lenp, loff_t *ppos);
148 static const struct ctl_table trace_sysctl_table[] = {
149 	{
150 		.procname	= "ftrace_dump_on_oops",
151 		.data		= &ftrace_dump_on_oops,
152 		.maxlen		= MAX_TRACER_SIZE,
153 		.mode		= 0644,
154 		.proc_handler	= proc_dostring,
155 	},
156 	{
157 		.procname	= "traceoff_on_warning",
158 		.data		= &__disable_trace_on_warning,
159 		.maxlen		= sizeof(__disable_trace_on_warning),
160 		.mode		= 0644,
161 		.proc_handler	= proc_dointvec,
162 	},
163 	{
164 		.procname	= "tracepoint_printk",
165 		.data		= &tracepoint_printk,
166 		.maxlen		= sizeof(tracepoint_printk),
167 		.mode		= 0644,
168 		.proc_handler	= tracepoint_printk_sysctl,
169 	},
170 };
171 
init_trace_sysctls(void)172 static int __init init_trace_sysctls(void)
173 {
174 	register_sysctl_init("kernel", trace_sysctl_table);
175 	return 0;
176 }
177 subsys_initcall(init_trace_sysctls);
178 
179 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
180 /* Map of enums to their values, for "eval_map" file */
181 struct trace_eval_map_head {
182 	struct module			*mod;
183 	unsigned long			length;
184 };
185 
186 union trace_eval_map_item;
187 
188 struct trace_eval_map_tail {
189 	/*
190 	 * "end" is first and points to NULL as it must be different
191 	 * than "mod" or "eval_string"
192 	 */
193 	union trace_eval_map_item	*next;
194 	const char			*end;	/* points to NULL */
195 };
196 
197 static DEFINE_MUTEX(trace_eval_mutex);
198 
199 /*
200  * The trace_eval_maps are saved in an array with two extra elements,
201  * one at the beginning, and one at the end. The beginning item contains
202  * the count of the saved maps (head.length), and the module they
203  * belong to if not built in (head.mod). The ending item contains a
204  * pointer to the next array of saved eval_map items.
205  */
206 union trace_eval_map_item {
207 	struct trace_eval_map		map;
208 	struct trace_eval_map_head	head;
209 	struct trace_eval_map_tail	tail;
210 };
211 
212 static union trace_eval_map_item *trace_eval_maps;
213 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
214 
215 int tracing_set_tracer(struct trace_array *tr, const char *buf);
216 static void ftrace_trace_userstack(struct trace_array *tr,
217 				   struct trace_buffer *buffer,
218 				   unsigned int trace_ctx);
219 
220 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
221 static char *default_bootup_tracer;
222 
223 static bool allocate_snapshot;
224 static bool snapshot_at_boot;
225 
226 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
227 static int boot_instance_index;
228 
229 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_snapshot_index;
231 
set_cmdline_ftrace(char * str)232 static int __init set_cmdline_ftrace(char *str)
233 {
234 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
235 	default_bootup_tracer = bootup_tracer_buf;
236 	/* We are using ftrace early, expand it */
237 	trace_set_ring_buffer_expanded(NULL);
238 	return 1;
239 }
240 __setup("ftrace=", set_cmdline_ftrace);
241 
ftrace_dump_on_oops_enabled(void)242 int ftrace_dump_on_oops_enabled(void)
243 {
244 	if (!strcmp("0", ftrace_dump_on_oops))
245 		return 0;
246 	else
247 		return 1;
248 }
249 
set_ftrace_dump_on_oops(char * str)250 static int __init set_ftrace_dump_on_oops(char *str)
251 {
252 	if (!*str) {
253 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
254 		return 1;
255 	}
256 
257 	if (*str == ',') {
258 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
259 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
260 		return 1;
261 	}
262 
263 	if (*str++ == '=') {
264 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
265 		return 1;
266 	}
267 
268 	return 0;
269 }
270 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
271 
stop_trace_on_warning(char * str)272 static int __init stop_trace_on_warning(char *str)
273 {
274 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
275 		__disable_trace_on_warning = 1;
276 	return 1;
277 }
278 __setup("traceoff_on_warning", stop_trace_on_warning);
279 
boot_alloc_snapshot(char * str)280 static int __init boot_alloc_snapshot(char *str)
281 {
282 	char *slot = boot_snapshot_info + boot_snapshot_index;
283 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
284 	int ret;
285 
286 	if (str[0] == '=') {
287 		str++;
288 		if (strlen(str) >= left)
289 			return -1;
290 
291 		ret = snprintf(slot, left, "%s\t", str);
292 		boot_snapshot_index += ret;
293 	} else {
294 		allocate_snapshot = true;
295 		/* We also need the main ring buffer expanded */
296 		trace_set_ring_buffer_expanded(NULL);
297 	}
298 	return 1;
299 }
300 __setup("alloc_snapshot", boot_alloc_snapshot);
301 
302 
boot_snapshot(char * str)303 static int __init boot_snapshot(char *str)
304 {
305 	snapshot_at_boot = true;
306 	boot_alloc_snapshot(str);
307 	return 1;
308 }
309 __setup("ftrace_boot_snapshot", boot_snapshot);
310 
311 
boot_instance(char * str)312 static int __init boot_instance(char *str)
313 {
314 	char *slot = boot_instance_info + boot_instance_index;
315 	int left = sizeof(boot_instance_info) - boot_instance_index;
316 	int ret;
317 
318 	if (strlen(str) >= left)
319 		return -1;
320 
321 	ret = snprintf(slot, left, "%s\t", str);
322 	boot_instance_index += ret;
323 
324 	return 1;
325 }
326 __setup("trace_instance=", boot_instance);
327 
328 
329 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
330 
set_trace_boot_options(char * str)331 static int __init set_trace_boot_options(char *str)
332 {
333 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
334 	return 1;
335 }
336 __setup("trace_options=", set_trace_boot_options);
337 
338 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
339 static char *trace_boot_clock __initdata;
340 
set_trace_boot_clock(char * str)341 static int __init set_trace_boot_clock(char *str)
342 {
343 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
344 	trace_boot_clock = trace_boot_clock_buf;
345 	return 1;
346 }
347 __setup("trace_clock=", set_trace_boot_clock);
348 
set_tracepoint_printk(char * str)349 static int __init set_tracepoint_printk(char *str)
350 {
351 	/* Ignore the "tp_printk_stop_on_boot" param */
352 	if (*str == '_')
353 		return 0;
354 
355 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
356 		tracepoint_printk = 1;
357 	return 1;
358 }
359 __setup("tp_printk", set_tracepoint_printk);
360 
set_tracepoint_printk_stop(char * str)361 static int __init set_tracepoint_printk_stop(char *str)
362 {
363 	tracepoint_printk_stop_on_boot = true;
364 	return 1;
365 }
366 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
367 
set_traceoff_after_boot(char * str)368 static int __init set_traceoff_after_boot(char *str)
369 {
370 	traceoff_after_boot = true;
371 	return 1;
372 }
373 __setup("traceoff_after_boot", set_traceoff_after_boot);
374 
ns2usecs(u64 nsec)375 unsigned long long ns2usecs(u64 nsec)
376 {
377 	nsec += 500;
378 	do_div(nsec, 1000);
379 	return nsec;
380 }
381 
382 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)383 trace_process_export(struct trace_export *export,
384 	       struct ring_buffer_event *event, int flag)
385 {
386 	struct trace_entry *entry;
387 	unsigned int size = 0;
388 
389 	if (export->flags & flag) {
390 		entry = ring_buffer_event_data(event);
391 		size = ring_buffer_event_length(event);
392 		export->write(export, entry, size);
393 	}
394 }
395 
396 static DEFINE_MUTEX(ftrace_export_lock);
397 
398 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
399 
400 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
401 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
402 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
403 
ftrace_exports_enable(struct trace_export * export)404 static inline void ftrace_exports_enable(struct trace_export *export)
405 {
406 	if (export->flags & TRACE_EXPORT_FUNCTION)
407 		static_branch_inc(&trace_function_exports_enabled);
408 
409 	if (export->flags & TRACE_EXPORT_EVENT)
410 		static_branch_inc(&trace_event_exports_enabled);
411 
412 	if (export->flags & TRACE_EXPORT_MARKER)
413 		static_branch_inc(&trace_marker_exports_enabled);
414 }
415 
ftrace_exports_disable(struct trace_export * export)416 static inline void ftrace_exports_disable(struct trace_export *export)
417 {
418 	if (export->flags & TRACE_EXPORT_FUNCTION)
419 		static_branch_dec(&trace_function_exports_enabled);
420 
421 	if (export->flags & TRACE_EXPORT_EVENT)
422 		static_branch_dec(&trace_event_exports_enabled);
423 
424 	if (export->flags & TRACE_EXPORT_MARKER)
425 		static_branch_dec(&trace_marker_exports_enabled);
426 }
427 
ftrace_exports(struct ring_buffer_event * event,int flag)428 static void ftrace_exports(struct ring_buffer_event *event, int flag)
429 {
430 	struct trace_export *export;
431 
432 	guard(preempt_notrace)();
433 
434 	export = rcu_dereference_raw_check(ftrace_exports_list);
435 	while (export) {
436 		trace_process_export(export, event, flag);
437 		export = rcu_dereference_raw_check(export->next);
438 	}
439 }
440 
441 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)442 add_trace_export(struct trace_export **list, struct trace_export *export)
443 {
444 	rcu_assign_pointer(export->next, *list);
445 	/*
446 	 * We are entering export into the list but another
447 	 * CPU might be walking that list. We need to make sure
448 	 * the export->next pointer is valid before another CPU sees
449 	 * the export pointer included into the list.
450 	 */
451 	rcu_assign_pointer(*list, export);
452 }
453 
454 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)455 rm_trace_export(struct trace_export **list, struct trace_export *export)
456 {
457 	struct trace_export **p;
458 
459 	for (p = list; *p != NULL; p = &(*p)->next)
460 		if (*p == export)
461 			break;
462 
463 	if (*p != export)
464 		return -1;
465 
466 	rcu_assign_pointer(*p, (*p)->next);
467 
468 	return 0;
469 }
470 
471 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)472 add_ftrace_export(struct trace_export **list, struct trace_export *export)
473 {
474 	ftrace_exports_enable(export);
475 
476 	add_trace_export(list, export);
477 }
478 
479 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)480 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
481 {
482 	int ret;
483 
484 	ret = rm_trace_export(list, export);
485 	ftrace_exports_disable(export);
486 
487 	return ret;
488 }
489 
register_ftrace_export(struct trace_export * export)490 int register_ftrace_export(struct trace_export *export)
491 {
492 	if (WARN_ON_ONCE(!export->write))
493 		return -1;
494 
495 	guard(mutex)(&ftrace_export_lock);
496 
497 	add_ftrace_export(&ftrace_exports_list, export);
498 
499 	return 0;
500 }
501 EXPORT_SYMBOL_GPL(register_ftrace_export);
502 
unregister_ftrace_export(struct trace_export * export)503 int unregister_ftrace_export(struct trace_export *export)
504 {
505 	guard(mutex)(&ftrace_export_lock);
506 	return rm_ftrace_export(&ftrace_exports_list, export);
507 }
508 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
509 
510 /* trace_flags holds trace_options default values */
511 #define TRACE_DEFAULT_FLAGS						\
512 	(FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS |		\
513 	 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) |		\
514 	 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) |		\
515 	 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) |		\
516 	 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) |			\
517 	 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) |		\
518 	 TRACE_ITER(COPY_MARKER))
519 
520 /* trace_options that are only supported by global_trace */
521 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) |			\
522 	       TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) |	\
523 	       TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
524 
525 /* trace_flags that are default zero for instances */
526 #define ZEROED_TRACE_FLAGS \
527 	(TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
528 	 TRACE_ITER(COPY_MARKER))
529 
530 /*
531  * The global_trace is the descriptor that holds the top-level tracing
532  * buffers for the live tracing.
533  */
534 static struct trace_array global_trace = {
535 	.trace_flags = TRACE_DEFAULT_FLAGS,
536 };
537 
538 static struct trace_array *printk_trace = &global_trace;
539 
540 /* List of trace_arrays interested in the top level trace_marker */
541 static LIST_HEAD(marker_copies);
542 
printk_binsafe(struct trace_array * tr)543 static __always_inline bool printk_binsafe(struct trace_array *tr)
544 {
545 	/*
546 	 * The binary format of traceprintk can cause a crash if used
547 	 * by a buffer from another boot. Force the use of the
548 	 * non binary version of trace_printk if the trace_printk
549 	 * buffer is a boot mapped ring buffer.
550 	 */
551 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
552 }
553 
update_printk_trace(struct trace_array * tr)554 static void update_printk_trace(struct trace_array *tr)
555 {
556 	if (printk_trace == tr)
557 		return;
558 
559 	printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
560 	printk_trace = tr;
561 	tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
562 }
563 
564 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)565 static bool update_marker_trace(struct trace_array *tr, int enabled)
566 {
567 	lockdep_assert_held(&event_mutex);
568 
569 	if (enabled) {
570 		if (!list_empty(&tr->marker_list))
571 			return false;
572 
573 		list_add_rcu(&tr->marker_list, &marker_copies);
574 		tr->trace_flags |= TRACE_ITER(COPY_MARKER);
575 		return true;
576 	}
577 
578 	if (list_empty(&tr->marker_list))
579 		return false;
580 
581 	list_del_init(&tr->marker_list);
582 	tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
583 	return true;
584 }
585 
trace_set_ring_buffer_expanded(struct trace_array * tr)586 void trace_set_ring_buffer_expanded(struct trace_array *tr)
587 {
588 	if (!tr)
589 		tr = &global_trace;
590 	tr->ring_buffer_expanded = true;
591 }
592 
593 LIST_HEAD(ftrace_trace_arrays);
594 
trace_array_get(struct trace_array * this_tr)595 int trace_array_get(struct trace_array *this_tr)
596 {
597 	struct trace_array *tr;
598 
599 	guard(mutex)(&trace_types_lock);
600 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
601 		if (tr == this_tr) {
602 			tr->ref++;
603 			return 0;
604 		}
605 	}
606 
607 	return -ENODEV;
608 }
609 
__trace_array_put(struct trace_array * this_tr)610 static void __trace_array_put(struct trace_array *this_tr)
611 {
612 	WARN_ON(!this_tr->ref);
613 	this_tr->ref--;
614 }
615 
616 /**
617  * trace_array_put - Decrement the reference counter for this trace array.
618  * @this_tr : pointer to the trace array
619  *
620  * NOTE: Use this when we no longer need the trace array returned by
621  * trace_array_get_by_name(). This ensures the trace array can be later
622  * destroyed.
623  *
624  */
trace_array_put(struct trace_array * this_tr)625 void trace_array_put(struct trace_array *this_tr)
626 {
627 	if (!this_tr)
628 		return;
629 
630 	guard(mutex)(&trace_types_lock);
631 	__trace_array_put(this_tr);
632 }
633 EXPORT_SYMBOL_GPL(trace_array_put);
634 
tracing_check_open_get_tr(struct trace_array * tr)635 int tracing_check_open_get_tr(struct trace_array *tr)
636 {
637 	int ret;
638 
639 	ret = security_locked_down(LOCKDOWN_TRACEFS);
640 	if (ret)
641 		return ret;
642 
643 	if (tracing_disabled)
644 		return -ENODEV;
645 
646 	if (tr && trace_array_get(tr) < 0)
647 		return -ENODEV;
648 
649 	return 0;
650 }
651 
652 /**
653  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
654  * @filtered_pids: The list of pids to check
655  * @search_pid: The PID to find in @filtered_pids
656  *
657  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
658  */
659 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)660 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
661 {
662 	return trace_pid_list_is_set(filtered_pids, search_pid);
663 }
664 
665 /**
666  * trace_ignore_this_task - should a task be ignored for tracing
667  * @filtered_pids: The list of pids to check
668  * @filtered_no_pids: The list of pids not to be traced
669  * @task: The task that should be ignored if not filtered
670  *
671  * Checks if @task should be traced or not from @filtered_pids.
672  * Returns true if @task should *NOT* be traced.
673  * Returns false if @task should be traced.
674  */
675 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)676 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
677 		       struct trace_pid_list *filtered_no_pids,
678 		       struct task_struct *task)
679 {
680 	/*
681 	 * If filtered_no_pids is not empty, and the task's pid is listed
682 	 * in filtered_no_pids, then return true.
683 	 * Otherwise, if filtered_pids is empty, that means we can
684 	 * trace all tasks. If it has content, then only trace pids
685 	 * within filtered_pids.
686 	 */
687 
688 	return (filtered_pids &&
689 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
690 		(filtered_no_pids &&
691 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
692 }
693 
694 /**
695  * trace_filter_add_remove_task - Add or remove a task from a pid_list
696  * @pid_list: The list to modify
697  * @self: The current task for fork or NULL for exit
698  * @task: The task to add or remove
699  *
700  * If adding a task, if @self is defined, the task is only added if @self
701  * is also included in @pid_list. This happens on fork and tasks should
702  * only be added when the parent is listed. If @self is NULL, then the
703  * @task pid will be removed from the list, which would happen on exit
704  * of a task.
705  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)706 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
707 				  struct task_struct *self,
708 				  struct task_struct *task)
709 {
710 	if (!pid_list)
711 		return;
712 
713 	/* For forks, we only add if the forking task is listed */
714 	if (self) {
715 		if (!trace_find_filtered_pid(pid_list, self->pid))
716 			return;
717 	}
718 
719 	/* "self" is set for forks, and NULL for exits */
720 	if (self)
721 		trace_pid_list_set(pid_list, task->pid);
722 	else
723 		trace_pid_list_clear(pid_list, task->pid);
724 }
725 
726 /**
727  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
728  * @pid_list: The pid list to show
729  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
730  * @pos: The position of the file
731  *
732  * This is used by the seq_file "next" operation to iterate the pids
733  * listed in a trace_pid_list structure.
734  *
735  * Returns the pid+1 as we want to display pid of zero, but NULL would
736  * stop the iteration.
737  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)738 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
739 {
740 	long pid = (unsigned long)v;
741 	unsigned int next;
742 
743 	(*pos)++;
744 
745 	/* pid already is +1 of the actual previous bit */
746 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
747 		return NULL;
748 
749 	pid = next;
750 
751 	/* Return pid + 1 to allow zero to be represented */
752 	return (void *)(pid + 1);
753 }
754 
755 /**
756  * trace_pid_start - Used for seq_file to start reading pid lists
757  * @pid_list: The pid list to show
758  * @pos: The position of the file
759  *
760  * This is used by seq_file "start" operation to start the iteration
761  * of listing pids.
762  *
763  * Returns the pid+1 as we want to display pid of zero, but NULL would
764  * stop the iteration.
765  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)766 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
767 {
768 	unsigned long pid;
769 	unsigned int first;
770 	loff_t l = 0;
771 
772 	if (trace_pid_list_first(pid_list, &first) < 0)
773 		return NULL;
774 
775 	pid = first;
776 
777 	/* Return pid + 1 so that zero can be the exit value */
778 	for (pid++; pid && l < *pos;
779 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
780 		;
781 	return (void *)pid;
782 }
783 
784 /**
785  * trace_pid_show - show the current pid in seq_file processing
786  * @m: The seq_file structure to write into
787  * @v: A void pointer of the pid (+1) value to display
788  *
789  * Can be directly used by seq_file operations to display the current
790  * pid value.
791  */
trace_pid_show(struct seq_file * m,void * v)792 int trace_pid_show(struct seq_file *m, void *v)
793 {
794 	unsigned long pid = (unsigned long)v - 1;
795 
796 	seq_printf(m, "%lu\n", pid);
797 	return 0;
798 }
799 
800 /* 128 should be much more than enough */
801 #define PID_BUF_SIZE		127
802 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)803 int trace_pid_write(struct trace_pid_list *filtered_pids,
804 		    struct trace_pid_list **new_pid_list,
805 		    const char __user *ubuf, size_t cnt)
806 {
807 	struct trace_pid_list *pid_list;
808 	struct trace_parser parser;
809 	unsigned long val;
810 	int nr_pids = 0;
811 	ssize_t read = 0;
812 	ssize_t ret;
813 	loff_t pos;
814 	pid_t pid;
815 
816 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
817 		return -ENOMEM;
818 
819 	/*
820 	 * Always recreate a new array. The write is an all or nothing
821 	 * operation. Always create a new array when adding new pids by
822 	 * the user. If the operation fails, then the current list is
823 	 * not modified.
824 	 */
825 	pid_list = trace_pid_list_alloc();
826 	if (!pid_list) {
827 		trace_parser_put(&parser);
828 		return -ENOMEM;
829 	}
830 
831 	if (filtered_pids) {
832 		/* copy the current bits to the new max */
833 		ret = trace_pid_list_first(filtered_pids, &pid);
834 		while (!ret) {
835 			ret = trace_pid_list_set(pid_list, pid);
836 			if (ret < 0)
837 				goto out;
838 
839 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
840 			nr_pids++;
841 		}
842 	}
843 
844 	ret = 0;
845 	while (cnt > 0) {
846 
847 		pos = 0;
848 
849 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
850 		if (ret < 0)
851 			break;
852 
853 		read += ret;
854 		ubuf += ret;
855 		cnt -= ret;
856 
857 		if (!trace_parser_loaded(&parser))
858 			break;
859 
860 		ret = -EINVAL;
861 		if (kstrtoul(parser.buffer, 0, &val))
862 			break;
863 
864 		pid = (pid_t)val;
865 
866 		if (trace_pid_list_set(pid_list, pid) < 0) {
867 			ret = -1;
868 			break;
869 		}
870 		nr_pids++;
871 
872 		trace_parser_clear(&parser);
873 		ret = 0;
874 	}
875  out:
876 	trace_parser_put(&parser);
877 
878 	if (ret < 0) {
879 		trace_pid_list_free(pid_list);
880 		return ret;
881 	}
882 
883 	if (!nr_pids) {
884 		/* Cleared the list of pids */
885 		trace_pid_list_free(pid_list);
886 		pid_list = NULL;
887 	}
888 
889 	*new_pid_list = pid_list;
890 
891 	return read;
892 }
893 
buffer_ftrace_now(struct array_buffer * buf,int cpu)894 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
895 {
896 	u64 ts;
897 
898 	/* Early boot up does not have a buffer yet */
899 	if (!buf->buffer)
900 		return trace_clock_local();
901 
902 	ts = ring_buffer_time_stamp(buf->buffer);
903 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
904 
905 	return ts;
906 }
907 
ftrace_now(int cpu)908 u64 ftrace_now(int cpu)
909 {
910 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
911 }
912 
913 /**
914  * tracing_is_enabled - Show if global_trace has been enabled
915  *
916  * Shows if the global trace has been enabled or not. It uses the
917  * mirror flag "buffer_disabled" to be used in fast paths such as for
918  * the irqsoff tracer. But it may be inaccurate due to races. If you
919  * need to know the accurate state, use tracing_is_on() which is a little
920  * slower, but accurate.
921  */
tracing_is_enabled(void)922 int tracing_is_enabled(void)
923 {
924 	/*
925 	 * For quick access (irqsoff uses this in fast path), just
926 	 * return the mirror variable of the state of the ring buffer.
927 	 * It's a little racy, but we don't really care.
928 	 */
929 	return !global_trace.buffer_disabled;
930 }
931 
932 /*
933  * trace_buf_size is the size in bytes that is allocated
934  * for a buffer. Note, the number of bytes is always rounded
935  * to page size.
936  *
937  * This number is purposely set to a low number of 16384.
938  * If the dump on oops happens, it will be much appreciated
939  * to not have to wait for all that output. Anyway this can be
940  * boot time and run time configurable.
941  */
942 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
943 
944 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
945 
946 /* trace_types holds a link list of available tracers. */
947 static struct tracer		*trace_types __read_mostly;
948 
949 /*
950  * trace_types_lock is used to protect the trace_types list.
951  */
952 DEFINE_MUTEX(trace_types_lock);
953 
954 /*
955  * serialize the access of the ring buffer
956  *
957  * ring buffer serializes readers, but it is low level protection.
958  * The validity of the events (which returns by ring_buffer_peek() ..etc)
959  * are not protected by ring buffer.
960  *
961  * The content of events may become garbage if we allow other process consumes
962  * these events concurrently:
963  *   A) the page of the consumed events may become a normal page
964  *      (not reader page) in ring buffer, and this page will be rewritten
965  *      by events producer.
966  *   B) The page of the consumed events may become a page for splice_read,
967  *      and this page will be returned to system.
968  *
969  * These primitives allow multi process access to different cpu ring buffer
970  * concurrently.
971  *
972  * These primitives don't distinguish read-only and read-consume access.
973  * Multi read-only access are also serialized.
974  */
975 
976 #ifdef CONFIG_SMP
977 static DECLARE_RWSEM(all_cpu_access_lock);
978 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
979 
trace_access_lock(int cpu)980 static inline void trace_access_lock(int cpu)
981 {
982 	if (cpu == RING_BUFFER_ALL_CPUS) {
983 		/* gain it for accessing the whole ring buffer. */
984 		down_write(&all_cpu_access_lock);
985 	} else {
986 		/* gain it for accessing a cpu ring buffer. */
987 
988 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
989 		down_read(&all_cpu_access_lock);
990 
991 		/* Secondly block other access to this @cpu ring buffer. */
992 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
993 	}
994 }
995 
trace_access_unlock(int cpu)996 static inline void trace_access_unlock(int cpu)
997 {
998 	if (cpu == RING_BUFFER_ALL_CPUS) {
999 		up_write(&all_cpu_access_lock);
1000 	} else {
1001 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1002 		up_read(&all_cpu_access_lock);
1003 	}
1004 }
1005 
trace_access_lock_init(void)1006 static inline void trace_access_lock_init(void)
1007 {
1008 	int cpu;
1009 
1010 	for_each_possible_cpu(cpu)
1011 		mutex_init(&per_cpu(cpu_access_lock, cpu));
1012 }
1013 
1014 #else
1015 
1016 static DEFINE_MUTEX(access_lock);
1017 
trace_access_lock(int cpu)1018 static inline void trace_access_lock(int cpu)
1019 {
1020 	(void)cpu;
1021 	mutex_lock(&access_lock);
1022 }
1023 
trace_access_unlock(int cpu)1024 static inline void trace_access_unlock(int cpu)
1025 {
1026 	(void)cpu;
1027 	mutex_unlock(&access_lock);
1028 }
1029 
trace_access_lock_init(void)1030 static inline void trace_access_lock_init(void)
1031 {
1032 }
1033 
1034 #endif
1035 
1036 #ifdef CONFIG_STACKTRACE
1037 static void __ftrace_trace_stack(struct trace_array *tr,
1038 				 struct trace_buffer *buffer,
1039 				 unsigned int trace_ctx,
1040 				 int skip, struct pt_regs *regs);
1041 static inline void ftrace_trace_stack(struct trace_array *tr,
1042 				      struct trace_buffer *buffer,
1043 				      unsigned int trace_ctx,
1044 				      int skip, struct pt_regs *regs);
1045 
1046 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1047 static inline void __ftrace_trace_stack(struct trace_array *tr,
1048 					struct trace_buffer *buffer,
1049 					unsigned int trace_ctx,
1050 					int skip, struct pt_regs *regs)
1051 {
1052 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1053 static inline void ftrace_trace_stack(struct trace_array *tr,
1054 				      struct trace_buffer *buffer,
1055 				      unsigned long trace_ctx,
1056 				      int skip, struct pt_regs *regs)
1057 {
1058 }
1059 
1060 #endif
1061 
1062 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1063 trace_event_setup(struct ring_buffer_event *event,
1064 		  int type, unsigned int trace_ctx)
1065 {
1066 	struct trace_entry *ent = ring_buffer_event_data(event);
1067 
1068 	tracing_generic_entry_update(ent, type, trace_ctx);
1069 }
1070 
1071 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1072 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1073 			  int type,
1074 			  unsigned long len,
1075 			  unsigned int trace_ctx)
1076 {
1077 	struct ring_buffer_event *event;
1078 
1079 	event = ring_buffer_lock_reserve(buffer, len);
1080 	if (event != NULL)
1081 		trace_event_setup(event, type, trace_ctx);
1082 
1083 	return event;
1084 }
1085 
tracer_tracing_on(struct trace_array * tr)1086 void tracer_tracing_on(struct trace_array *tr)
1087 {
1088 	if (tr->array_buffer.buffer)
1089 		ring_buffer_record_on(tr->array_buffer.buffer);
1090 	/*
1091 	 * This flag is looked at when buffers haven't been allocated
1092 	 * yet, or by some tracers (like irqsoff), that just want to
1093 	 * know if the ring buffer has been disabled, but it can handle
1094 	 * races of where it gets disabled but we still do a record.
1095 	 * As the check is in the fast path of the tracers, it is more
1096 	 * important to be fast than accurate.
1097 	 */
1098 	tr->buffer_disabled = 0;
1099 }
1100 
1101 /**
1102  * tracing_on - enable tracing buffers
1103  *
1104  * This function enables tracing buffers that may have been
1105  * disabled with tracing_off.
1106  */
tracing_on(void)1107 void tracing_on(void)
1108 {
1109 	tracer_tracing_on(&global_trace);
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_on);
1112 
1113 
1114 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1115 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1116 {
1117 	__this_cpu_write(trace_taskinfo_save, true);
1118 
1119 	/* If this is the temp buffer, we need to commit fully */
1120 	if (this_cpu_read(trace_buffered_event) == event) {
1121 		/* Length is in event->array[0] */
1122 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1123 		/* Release the temp buffer */
1124 		this_cpu_dec(trace_buffered_event_cnt);
1125 		/* ring_buffer_unlock_commit() enables preemption */
1126 		preempt_enable_notrace();
1127 	} else
1128 		ring_buffer_unlock_commit(buffer);
1129 }
1130 
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1131 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1132 		       const char *str, int size)
1133 {
1134 	struct ring_buffer_event *event;
1135 	struct trace_buffer *buffer;
1136 	struct print_entry *entry;
1137 	unsigned int trace_ctx;
1138 	int alloc;
1139 
1140 	if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
1141 		return 0;
1142 
1143 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1144 		return 0;
1145 
1146 	if (unlikely(tracing_disabled))
1147 		return 0;
1148 
1149 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1150 
1151 	trace_ctx = tracing_gen_ctx();
1152 	buffer = tr->array_buffer.buffer;
1153 	guard(ring_buffer_nest)(buffer);
1154 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1155 					    trace_ctx);
1156 	if (!event)
1157 		return 0;
1158 
1159 	entry = ring_buffer_event_data(event);
1160 	entry->ip = ip;
1161 
1162 	memcpy(&entry->buf, str, size);
1163 
1164 	/* Add a newline if necessary */
1165 	if (entry->buf[size - 1] != '\n') {
1166 		entry->buf[size] = '\n';
1167 		entry->buf[size + 1] = '\0';
1168 	} else
1169 		entry->buf[size] = '\0';
1170 
1171 	__buffer_unlock_commit(buffer, event);
1172 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1173 	return size;
1174 }
1175 EXPORT_SYMBOL_GPL(__trace_array_puts);
1176 
1177 /**
1178  * __trace_puts - write a constant string into the trace buffer.
1179  * @ip:	   The address of the caller
1180  * @str:   The constant string to write
1181  * @size:  The size of the string.
1182  */
__trace_puts(unsigned long ip,const char * str,int size)1183 int __trace_puts(unsigned long ip, const char *str, int size)
1184 {
1185 	return __trace_array_puts(printk_trace, ip, str, size);
1186 }
1187 EXPORT_SYMBOL_GPL(__trace_puts);
1188 
1189 /**
1190  * __trace_bputs - write the pointer to a constant string into trace buffer
1191  * @ip:	   The address of the caller
1192  * @str:   The constant string to write to the buffer to
1193  */
__trace_bputs(unsigned long ip,const char * str)1194 int __trace_bputs(unsigned long ip, const char *str)
1195 {
1196 	struct trace_array *tr = READ_ONCE(printk_trace);
1197 	struct ring_buffer_event *event;
1198 	struct trace_buffer *buffer;
1199 	struct bputs_entry *entry;
1200 	unsigned int trace_ctx;
1201 	int size = sizeof(struct bputs_entry);
1202 
1203 	if (!printk_binsafe(tr))
1204 		return __trace_puts(ip, str, strlen(str));
1205 
1206 	if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
1207 		return 0;
1208 
1209 	if (unlikely(tracing_selftest_running || tracing_disabled))
1210 		return 0;
1211 
1212 	trace_ctx = tracing_gen_ctx();
1213 	buffer = tr->array_buffer.buffer;
1214 
1215 	guard(ring_buffer_nest)(buffer);
1216 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1217 					    trace_ctx);
1218 	if (!event)
1219 		return 0;
1220 
1221 	entry = ring_buffer_event_data(event);
1222 	entry->ip			= ip;
1223 	entry->str			= str;
1224 
1225 	__buffer_unlock_commit(buffer, event);
1226 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1227 
1228 	return 1;
1229 }
1230 EXPORT_SYMBOL_GPL(__trace_bputs);
1231 
1232 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1233 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1234 					   void *cond_data)
1235 {
1236 	struct tracer *tracer = tr->current_trace;
1237 	unsigned long flags;
1238 
1239 	if (in_nmi()) {
1240 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1241 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1242 		return;
1243 	}
1244 
1245 	if (!tr->allocated_snapshot) {
1246 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1247 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1248 		tracer_tracing_off(tr);
1249 		return;
1250 	}
1251 
1252 	/* Note, snapshot can not be used when the tracer uses it */
1253 	if (tracer->use_max_tr) {
1254 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1255 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1256 		return;
1257 	}
1258 
1259 	if (tr->mapped) {
1260 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1261 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1262 		return;
1263 	}
1264 
1265 	local_irq_save(flags);
1266 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1267 	local_irq_restore(flags);
1268 }
1269 
tracing_snapshot_instance(struct trace_array * tr)1270 void tracing_snapshot_instance(struct trace_array *tr)
1271 {
1272 	tracing_snapshot_instance_cond(tr, NULL);
1273 }
1274 
1275 /**
1276  * tracing_snapshot - take a snapshot of the current buffer.
1277  *
1278  * This causes a swap between the snapshot buffer and the current live
1279  * tracing buffer. You can use this to take snapshots of the live
1280  * trace when some condition is triggered, but continue to trace.
1281  *
1282  * Note, make sure to allocate the snapshot with either
1283  * a tracing_snapshot_alloc(), or by doing it manually
1284  * with: echo 1 > /sys/kernel/tracing/snapshot
1285  *
1286  * If the snapshot buffer is not allocated, it will stop tracing.
1287  * Basically making a permanent snapshot.
1288  */
tracing_snapshot(void)1289 void tracing_snapshot(void)
1290 {
1291 	struct trace_array *tr = &global_trace;
1292 
1293 	tracing_snapshot_instance(tr);
1294 }
1295 EXPORT_SYMBOL_GPL(tracing_snapshot);
1296 
1297 /**
1298  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1299  * @tr:		The tracing instance to snapshot
1300  * @cond_data:	The data to be tested conditionally, and possibly saved
1301  *
1302  * This is the same as tracing_snapshot() except that the snapshot is
1303  * conditional - the snapshot will only happen if the
1304  * cond_snapshot.update() implementation receiving the cond_data
1305  * returns true, which means that the trace array's cond_snapshot
1306  * update() operation used the cond_data to determine whether the
1307  * snapshot should be taken, and if it was, presumably saved it along
1308  * with the snapshot.
1309  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1310 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1311 {
1312 	tracing_snapshot_instance_cond(tr, cond_data);
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1315 
1316 /**
1317  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1318  * @tr:		The tracing instance
1319  *
1320  * When the user enables a conditional snapshot using
1321  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1322  * with the snapshot.  This accessor is used to retrieve it.
1323  *
1324  * Should not be called from cond_snapshot.update(), since it takes
1325  * the tr->max_lock lock, which the code calling
1326  * cond_snapshot.update() has already done.
1327  *
1328  * Returns the cond_data associated with the trace array's snapshot.
1329  */
tracing_cond_snapshot_data(struct trace_array * tr)1330 void *tracing_cond_snapshot_data(struct trace_array *tr)
1331 {
1332 	void *cond_data = NULL;
1333 
1334 	local_irq_disable();
1335 	arch_spin_lock(&tr->max_lock);
1336 
1337 	if (tr->cond_snapshot)
1338 		cond_data = tr->cond_snapshot->cond_data;
1339 
1340 	arch_spin_unlock(&tr->max_lock);
1341 	local_irq_enable();
1342 
1343 	return cond_data;
1344 }
1345 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1346 
1347 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1348 					struct array_buffer *size_buf, int cpu_id);
1349 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1350 
tracing_alloc_snapshot_instance(struct trace_array * tr)1351 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1352 {
1353 	int order;
1354 	int ret;
1355 
1356 	if (!tr->allocated_snapshot) {
1357 
1358 		/* Make the snapshot buffer have the same order as main buffer */
1359 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1360 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1361 		if (ret < 0)
1362 			return ret;
1363 
1364 		/* allocate spare buffer */
1365 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1366 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1367 		if (ret < 0)
1368 			return ret;
1369 
1370 		tr->allocated_snapshot = true;
1371 	}
1372 
1373 	return 0;
1374 }
1375 
free_snapshot(struct trace_array * tr)1376 static void free_snapshot(struct trace_array *tr)
1377 {
1378 	/*
1379 	 * We don't free the ring buffer. instead, resize it because
1380 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1381 	 * we want preserve it.
1382 	 */
1383 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1384 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1385 	set_buffer_entries(&tr->max_buffer, 1);
1386 	tracing_reset_online_cpus(&tr->max_buffer);
1387 	tr->allocated_snapshot = false;
1388 }
1389 
tracing_arm_snapshot_locked(struct trace_array * tr)1390 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1391 {
1392 	int ret;
1393 
1394 	lockdep_assert_held(&trace_types_lock);
1395 
1396 	spin_lock(&tr->snapshot_trigger_lock);
1397 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1398 		spin_unlock(&tr->snapshot_trigger_lock);
1399 		return -EBUSY;
1400 	}
1401 
1402 	tr->snapshot++;
1403 	spin_unlock(&tr->snapshot_trigger_lock);
1404 
1405 	ret = tracing_alloc_snapshot_instance(tr);
1406 	if (ret) {
1407 		spin_lock(&tr->snapshot_trigger_lock);
1408 		tr->snapshot--;
1409 		spin_unlock(&tr->snapshot_trigger_lock);
1410 	}
1411 
1412 	return ret;
1413 }
1414 
tracing_arm_snapshot(struct trace_array * tr)1415 int tracing_arm_snapshot(struct trace_array *tr)
1416 {
1417 	guard(mutex)(&trace_types_lock);
1418 	return tracing_arm_snapshot_locked(tr);
1419 }
1420 
tracing_disarm_snapshot(struct trace_array * tr)1421 void tracing_disarm_snapshot(struct trace_array *tr)
1422 {
1423 	spin_lock(&tr->snapshot_trigger_lock);
1424 	if (!WARN_ON(!tr->snapshot))
1425 		tr->snapshot--;
1426 	spin_unlock(&tr->snapshot_trigger_lock);
1427 }
1428 
1429 /**
1430  * tracing_alloc_snapshot - allocate snapshot buffer.
1431  *
1432  * This only allocates the snapshot buffer if it isn't already
1433  * allocated - it doesn't also take a snapshot.
1434  *
1435  * This is meant to be used in cases where the snapshot buffer needs
1436  * to be set up for events that can't sleep but need to be able to
1437  * trigger a snapshot.
1438  */
tracing_alloc_snapshot(void)1439 int tracing_alloc_snapshot(void)
1440 {
1441 	struct trace_array *tr = &global_trace;
1442 	int ret;
1443 
1444 	ret = tracing_alloc_snapshot_instance(tr);
1445 	WARN_ON(ret < 0);
1446 
1447 	return ret;
1448 }
1449 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1450 
1451 /**
1452  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1453  *
1454  * This is similar to tracing_snapshot(), but it will allocate the
1455  * snapshot buffer if it isn't already allocated. Use this only
1456  * where it is safe to sleep, as the allocation may sleep.
1457  *
1458  * This causes a swap between the snapshot buffer and the current live
1459  * tracing buffer. You can use this to take snapshots of the live
1460  * trace when some condition is triggered, but continue to trace.
1461  */
tracing_snapshot_alloc(void)1462 void tracing_snapshot_alloc(void)
1463 {
1464 	int ret;
1465 
1466 	ret = tracing_alloc_snapshot();
1467 	if (ret < 0)
1468 		return;
1469 
1470 	tracing_snapshot();
1471 }
1472 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1473 
1474 /**
1475  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1476  * @tr:		The tracing instance
1477  * @cond_data:	User data to associate with the snapshot
1478  * @update:	Implementation of the cond_snapshot update function
1479  *
1480  * Check whether the conditional snapshot for the given instance has
1481  * already been enabled, or if the current tracer is already using a
1482  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1483  * save the cond_data and update function inside.
1484  *
1485  * Returns 0 if successful, error otherwise.
1486  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1487 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1488 				 cond_update_fn_t update)
1489 {
1490 	struct cond_snapshot *cond_snapshot __free(kfree) =
1491 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1492 	int ret;
1493 
1494 	if (!cond_snapshot)
1495 		return -ENOMEM;
1496 
1497 	cond_snapshot->cond_data = cond_data;
1498 	cond_snapshot->update = update;
1499 
1500 	guard(mutex)(&trace_types_lock);
1501 
1502 	if (tr->current_trace->use_max_tr)
1503 		return -EBUSY;
1504 
1505 	/*
1506 	 * The cond_snapshot can only change to NULL without the
1507 	 * trace_types_lock. We don't care if we race with it going
1508 	 * to NULL, but we want to make sure that it's not set to
1509 	 * something other than NULL when we get here, which we can
1510 	 * do safely with only holding the trace_types_lock and not
1511 	 * having to take the max_lock.
1512 	 */
1513 	if (tr->cond_snapshot)
1514 		return -EBUSY;
1515 
1516 	ret = tracing_arm_snapshot_locked(tr);
1517 	if (ret)
1518 		return ret;
1519 
1520 	local_irq_disable();
1521 	arch_spin_lock(&tr->max_lock);
1522 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1523 	arch_spin_unlock(&tr->max_lock);
1524 	local_irq_enable();
1525 
1526 	return 0;
1527 }
1528 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1529 
1530 /**
1531  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1532  * @tr:		The tracing instance
1533  *
1534  * Check whether the conditional snapshot for the given instance is
1535  * enabled; if so, free the cond_snapshot associated with it,
1536  * otherwise return -EINVAL.
1537  *
1538  * Returns 0 if successful, error otherwise.
1539  */
tracing_snapshot_cond_disable(struct trace_array * tr)1540 int tracing_snapshot_cond_disable(struct trace_array *tr)
1541 {
1542 	int ret = 0;
1543 
1544 	local_irq_disable();
1545 	arch_spin_lock(&tr->max_lock);
1546 
1547 	if (!tr->cond_snapshot)
1548 		ret = -EINVAL;
1549 	else {
1550 		kfree(tr->cond_snapshot);
1551 		tr->cond_snapshot = NULL;
1552 	}
1553 
1554 	arch_spin_unlock(&tr->max_lock);
1555 	local_irq_enable();
1556 
1557 	tracing_disarm_snapshot(tr);
1558 
1559 	return ret;
1560 }
1561 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1562 #else
tracing_snapshot(void)1563 void tracing_snapshot(void)
1564 {
1565 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1566 }
1567 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1568 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1569 {
1570 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1571 }
1572 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1573 int tracing_alloc_snapshot(void)
1574 {
1575 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1576 	return -ENODEV;
1577 }
1578 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1579 void tracing_snapshot_alloc(void)
1580 {
1581 	/* Give warning */
1582 	tracing_snapshot();
1583 }
1584 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1585 void *tracing_cond_snapshot_data(struct trace_array *tr)
1586 {
1587 	return NULL;
1588 }
1589 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1590 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1591 {
1592 	return -ENODEV;
1593 }
1594 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1595 int tracing_snapshot_cond_disable(struct trace_array *tr)
1596 {
1597 	return false;
1598 }
1599 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1600 #define free_snapshot(tr)	do { } while (0)
1601 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1602 #endif /* CONFIG_TRACER_SNAPSHOT */
1603 
tracer_tracing_off(struct trace_array * tr)1604 void tracer_tracing_off(struct trace_array *tr)
1605 {
1606 	if (tr->array_buffer.buffer)
1607 		ring_buffer_record_off(tr->array_buffer.buffer);
1608 	/*
1609 	 * This flag is looked at when buffers haven't been allocated
1610 	 * yet, or by some tracers (like irqsoff), that just want to
1611 	 * know if the ring buffer has been disabled, but it can handle
1612 	 * races of where it gets disabled but we still do a record.
1613 	 * As the check is in the fast path of the tracers, it is more
1614 	 * important to be fast than accurate.
1615 	 */
1616 	tr->buffer_disabled = 1;
1617 }
1618 
1619 /**
1620  * tracer_tracing_disable() - temporary disable the buffer from write
1621  * @tr: The trace array to disable its buffer for
1622  *
1623  * Expects trace_tracing_enable() to re-enable tracing.
1624  * The difference between this and tracer_tracing_off() is that this
1625  * is a counter and can nest, whereas, tracer_tracing_off() can
1626  * be called multiple times and a single trace_tracing_on() will
1627  * enable it.
1628  */
tracer_tracing_disable(struct trace_array * tr)1629 void tracer_tracing_disable(struct trace_array *tr)
1630 {
1631 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1632 		return;
1633 
1634 	ring_buffer_record_disable(tr->array_buffer.buffer);
1635 }
1636 
1637 /**
1638  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1639  * @tr: The trace array that had tracer_tracincg_disable() called on it
1640  *
1641  * This is called after tracer_tracing_disable() has been called on @tr,
1642  * when it's safe to re-enable tracing.
1643  */
tracer_tracing_enable(struct trace_array * tr)1644 void tracer_tracing_enable(struct trace_array *tr)
1645 {
1646 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1647 		return;
1648 
1649 	ring_buffer_record_enable(tr->array_buffer.buffer);
1650 }
1651 
1652 /**
1653  * tracing_off - turn off tracing buffers
1654  *
1655  * This function stops the tracing buffers from recording data.
1656  * It does not disable any overhead the tracers themselves may
1657  * be causing. This function simply causes all recording to
1658  * the ring buffers to fail.
1659  */
tracing_off(void)1660 void tracing_off(void)
1661 {
1662 	tracer_tracing_off(&global_trace);
1663 }
1664 EXPORT_SYMBOL_GPL(tracing_off);
1665 
disable_trace_on_warning(void)1666 void disable_trace_on_warning(void)
1667 {
1668 	if (__disable_trace_on_warning) {
1669 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1670 			"Disabling tracing due to warning\n");
1671 		tracing_off();
1672 	}
1673 }
1674 
1675 /**
1676  * tracer_tracing_is_on - show real state of ring buffer enabled
1677  * @tr : the trace array to know if ring buffer is enabled
1678  *
1679  * Shows real state of the ring buffer if it is enabled or not.
1680  */
tracer_tracing_is_on(struct trace_array * tr)1681 bool tracer_tracing_is_on(struct trace_array *tr)
1682 {
1683 	if (tr->array_buffer.buffer)
1684 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1685 	return !tr->buffer_disabled;
1686 }
1687 
1688 /**
1689  * tracing_is_on - show state of ring buffers enabled
1690  */
tracing_is_on(void)1691 int tracing_is_on(void)
1692 {
1693 	return tracer_tracing_is_on(&global_trace);
1694 }
1695 EXPORT_SYMBOL_GPL(tracing_is_on);
1696 
set_buf_size(char * str)1697 static int __init set_buf_size(char *str)
1698 {
1699 	unsigned long buf_size;
1700 
1701 	if (!str)
1702 		return 0;
1703 	buf_size = memparse(str, &str);
1704 	/*
1705 	 * nr_entries can not be zero and the startup
1706 	 * tests require some buffer space. Therefore
1707 	 * ensure we have at least 4096 bytes of buffer.
1708 	 */
1709 	trace_buf_size = max(4096UL, buf_size);
1710 	return 1;
1711 }
1712 __setup("trace_buf_size=", set_buf_size);
1713 
set_tracing_thresh(char * str)1714 static int __init set_tracing_thresh(char *str)
1715 {
1716 	unsigned long threshold;
1717 	int ret;
1718 
1719 	if (!str)
1720 		return 0;
1721 	ret = kstrtoul(str, 0, &threshold);
1722 	if (ret < 0)
1723 		return 0;
1724 	tracing_thresh = threshold * 1000;
1725 	return 1;
1726 }
1727 __setup("tracing_thresh=", set_tracing_thresh);
1728 
nsecs_to_usecs(unsigned long nsecs)1729 unsigned long nsecs_to_usecs(unsigned long nsecs)
1730 {
1731 	return nsecs / 1000;
1732 }
1733 
1734 /*
1735  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1736  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1737  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1738  * of strings in the order that the evals (enum) were defined.
1739  */
1740 #undef C
1741 #define C(a, b) b
1742 
1743 /* These must match the bit positions in trace_iterator_flags */
1744 static const char *trace_options[] = {
1745 	TRACE_FLAGS
1746 	NULL
1747 };
1748 
1749 static struct {
1750 	u64 (*func)(void);
1751 	const char *name;
1752 	int in_ns;		/* is this clock in nanoseconds? */
1753 } trace_clocks[] = {
1754 	{ trace_clock_local,		"local",	1 },
1755 	{ trace_clock_global,		"global",	1 },
1756 	{ trace_clock_counter,		"counter",	0 },
1757 	{ trace_clock_jiffies,		"uptime",	0 },
1758 	{ trace_clock,			"perf",		1 },
1759 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1760 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1761 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1762 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1763 	ARCH_TRACE_CLOCKS
1764 };
1765 
trace_clock_in_ns(struct trace_array * tr)1766 bool trace_clock_in_ns(struct trace_array *tr)
1767 {
1768 	if (trace_clocks[tr->clock_id].in_ns)
1769 		return true;
1770 
1771 	return false;
1772 }
1773 
1774 /*
1775  * trace_parser_get_init - gets the buffer for trace parser
1776  */
trace_parser_get_init(struct trace_parser * parser,int size)1777 int trace_parser_get_init(struct trace_parser *parser, int size)
1778 {
1779 	memset(parser, 0, sizeof(*parser));
1780 
1781 	parser->buffer = kmalloc(size, GFP_KERNEL);
1782 	if (!parser->buffer)
1783 		return 1;
1784 
1785 	parser->size = size;
1786 	return 0;
1787 }
1788 
1789 /*
1790  * trace_parser_put - frees the buffer for trace parser
1791  */
trace_parser_put(struct trace_parser * parser)1792 void trace_parser_put(struct trace_parser *parser)
1793 {
1794 	kfree(parser->buffer);
1795 	parser->buffer = NULL;
1796 }
1797 
1798 /*
1799  * trace_get_user - reads the user input string separated by  space
1800  * (matched by isspace(ch))
1801  *
1802  * For each string found the 'struct trace_parser' is updated,
1803  * and the function returns.
1804  *
1805  * Returns number of bytes read.
1806  *
1807  * See kernel/trace/trace.h for 'struct trace_parser' details.
1808  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1809 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1810 	size_t cnt, loff_t *ppos)
1811 {
1812 	char ch;
1813 	size_t read = 0;
1814 	ssize_t ret;
1815 
1816 	if (!*ppos)
1817 		trace_parser_clear(parser);
1818 
1819 	ret = get_user(ch, ubuf++);
1820 	if (ret)
1821 		goto fail;
1822 
1823 	read++;
1824 	cnt--;
1825 
1826 	/*
1827 	 * The parser is not finished with the last write,
1828 	 * continue reading the user input without skipping spaces.
1829 	 */
1830 	if (!parser->cont) {
1831 		/* skip white space */
1832 		while (cnt && isspace(ch)) {
1833 			ret = get_user(ch, ubuf++);
1834 			if (ret)
1835 				goto fail;
1836 			read++;
1837 			cnt--;
1838 		}
1839 
1840 		parser->idx = 0;
1841 
1842 		/* only spaces were written */
1843 		if (isspace(ch) || !ch) {
1844 			*ppos += read;
1845 			return read;
1846 		}
1847 	}
1848 
1849 	/* read the non-space input */
1850 	while (cnt && !isspace(ch) && ch) {
1851 		if (parser->idx < parser->size - 1)
1852 			parser->buffer[parser->idx++] = ch;
1853 		else {
1854 			ret = -EINVAL;
1855 			goto fail;
1856 		}
1857 
1858 		ret = get_user(ch, ubuf++);
1859 		if (ret)
1860 			goto fail;
1861 		read++;
1862 		cnt--;
1863 	}
1864 
1865 	/* We either got finished input or we have to wait for another call. */
1866 	if (isspace(ch) || !ch) {
1867 		parser->buffer[parser->idx] = 0;
1868 		parser->cont = false;
1869 	} else if (parser->idx < parser->size - 1) {
1870 		parser->cont = true;
1871 		parser->buffer[parser->idx++] = ch;
1872 		/* Make sure the parsed string always terminates with '\0'. */
1873 		parser->buffer[parser->idx] = 0;
1874 	} else {
1875 		ret = -EINVAL;
1876 		goto fail;
1877 	}
1878 
1879 	*ppos += read;
1880 	return read;
1881 fail:
1882 	trace_parser_fail(parser);
1883 	return ret;
1884 }
1885 
1886 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1887 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1888 {
1889 	int len;
1890 
1891 	if (trace_seq_used(s) <= s->readpos)
1892 		return -EBUSY;
1893 
1894 	len = trace_seq_used(s) - s->readpos;
1895 	if (cnt > len)
1896 		cnt = len;
1897 	memcpy(buf, s->buffer + s->readpos, cnt);
1898 
1899 	s->readpos += cnt;
1900 	return cnt;
1901 }
1902 
1903 unsigned long __read_mostly	tracing_thresh;
1904 
1905 #ifdef CONFIG_TRACER_MAX_TRACE
1906 static const struct file_operations tracing_max_lat_fops;
1907 
1908 #ifdef LATENCY_FS_NOTIFY
1909 
1910 static struct workqueue_struct *fsnotify_wq;
1911 
latency_fsnotify_workfn(struct work_struct * work)1912 static void latency_fsnotify_workfn(struct work_struct *work)
1913 {
1914 	struct trace_array *tr = container_of(work, struct trace_array,
1915 					      fsnotify_work);
1916 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1917 }
1918 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1919 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1920 {
1921 	struct trace_array *tr = container_of(iwork, struct trace_array,
1922 					      fsnotify_irqwork);
1923 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1924 }
1925 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1926 static void trace_create_maxlat_file(struct trace_array *tr,
1927 				     struct dentry *d_tracer)
1928 {
1929 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1930 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1931 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1932 					      TRACE_MODE_WRITE,
1933 					      d_tracer, tr,
1934 					      &tracing_max_lat_fops);
1935 }
1936 
latency_fsnotify_init(void)1937 __init static int latency_fsnotify_init(void)
1938 {
1939 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1940 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1941 	if (!fsnotify_wq) {
1942 		pr_err("Unable to allocate tr_max_lat_wq\n");
1943 		return -ENOMEM;
1944 	}
1945 	return 0;
1946 }
1947 
1948 late_initcall_sync(latency_fsnotify_init);
1949 
latency_fsnotify(struct trace_array * tr)1950 void latency_fsnotify(struct trace_array *tr)
1951 {
1952 	if (!fsnotify_wq)
1953 		return;
1954 	/*
1955 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1956 	 * possible that we are called from __schedule() or do_idle(), which
1957 	 * could cause a deadlock.
1958 	 */
1959 	irq_work_queue(&tr->fsnotify_irqwork);
1960 }
1961 
1962 #else /* !LATENCY_FS_NOTIFY */
1963 
1964 #define trace_create_maxlat_file(tr, d_tracer)				\
1965 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1966 			  d_tracer, tr, &tracing_max_lat_fops)
1967 
1968 #endif
1969 
1970 /*
1971  * Copy the new maximum trace into the separate maximum-trace
1972  * structure. (this way the maximum trace is permanently saved,
1973  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1974  */
1975 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1976 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1977 {
1978 	struct array_buffer *trace_buf = &tr->array_buffer;
1979 	struct array_buffer *max_buf = &tr->max_buffer;
1980 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1981 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1982 
1983 	max_buf->cpu = cpu;
1984 	max_buf->time_start = data->preempt_timestamp;
1985 
1986 	max_data->saved_latency = tr->max_latency;
1987 	max_data->critical_start = data->critical_start;
1988 	max_data->critical_end = data->critical_end;
1989 
1990 	strscpy(max_data->comm, tsk->comm);
1991 	max_data->pid = tsk->pid;
1992 	/*
1993 	 * If tsk == current, then use current_uid(), as that does not use
1994 	 * RCU. The irq tracer can be called out of RCU scope.
1995 	 */
1996 	if (tsk == current)
1997 		max_data->uid = current_uid();
1998 	else
1999 		max_data->uid = task_uid(tsk);
2000 
2001 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2002 	max_data->policy = tsk->policy;
2003 	max_data->rt_priority = tsk->rt_priority;
2004 
2005 	/* record this tasks comm */
2006 	tracing_record_cmdline(tsk);
2007 	latency_fsnotify(tr);
2008 }
2009 
2010 /**
2011  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2012  * @tr: tracer
2013  * @tsk: the task with the latency
2014  * @cpu: The cpu that initiated the trace.
2015  * @cond_data: User data associated with a conditional snapshot
2016  *
2017  * Flip the buffers between the @tr and the max_tr and record information
2018  * about which task was the cause of this latency.
2019  */
2020 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)2021 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2022 	      void *cond_data)
2023 {
2024 	if (tr->stop_count)
2025 		return;
2026 
2027 	WARN_ON_ONCE(!irqs_disabled());
2028 
2029 	if (!tr->allocated_snapshot) {
2030 		/* Only the nop tracer should hit this when disabling */
2031 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2032 		return;
2033 	}
2034 
2035 	arch_spin_lock(&tr->max_lock);
2036 
2037 	/* Inherit the recordable setting from array_buffer */
2038 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2039 		ring_buffer_record_on(tr->max_buffer.buffer);
2040 	else
2041 		ring_buffer_record_off(tr->max_buffer.buffer);
2042 
2043 #ifdef CONFIG_TRACER_SNAPSHOT
2044 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2045 		arch_spin_unlock(&tr->max_lock);
2046 		return;
2047 	}
2048 #endif
2049 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2050 
2051 	__update_max_tr(tr, tsk, cpu);
2052 
2053 	arch_spin_unlock(&tr->max_lock);
2054 
2055 	/* Any waiters on the old snapshot buffer need to wake up */
2056 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2057 }
2058 
2059 /**
2060  * update_max_tr_single - only copy one trace over, and reset the rest
2061  * @tr: tracer
2062  * @tsk: task with the latency
2063  * @cpu: the cpu of the buffer to copy.
2064  *
2065  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2066  */
2067 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2068 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2069 {
2070 	int ret;
2071 
2072 	if (tr->stop_count)
2073 		return;
2074 
2075 	WARN_ON_ONCE(!irqs_disabled());
2076 	if (!tr->allocated_snapshot) {
2077 		/* Only the nop tracer should hit this when disabling */
2078 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2079 		return;
2080 	}
2081 
2082 	arch_spin_lock(&tr->max_lock);
2083 
2084 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2085 
2086 	if (ret == -EBUSY) {
2087 		/*
2088 		 * We failed to swap the buffer due to a commit taking
2089 		 * place on this CPU. We fail to record, but we reset
2090 		 * the max trace buffer (no one writes directly to it)
2091 		 * and flag that it failed.
2092 		 * Another reason is resize is in progress.
2093 		 */
2094 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2095 			"Failed to swap buffers due to commit or resize in progress\n");
2096 	}
2097 
2098 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2099 
2100 	__update_max_tr(tr, tsk, cpu);
2101 	arch_spin_unlock(&tr->max_lock);
2102 }
2103 
2104 #endif /* CONFIG_TRACER_MAX_TRACE */
2105 
2106 struct pipe_wait {
2107 	struct trace_iterator		*iter;
2108 	int				wait_index;
2109 };
2110 
wait_pipe_cond(void * data)2111 static bool wait_pipe_cond(void *data)
2112 {
2113 	struct pipe_wait *pwait = data;
2114 	struct trace_iterator *iter = pwait->iter;
2115 
2116 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2117 		return true;
2118 
2119 	return iter->closed;
2120 }
2121 
wait_on_pipe(struct trace_iterator * iter,int full)2122 static int wait_on_pipe(struct trace_iterator *iter, int full)
2123 {
2124 	struct pipe_wait pwait;
2125 	int ret;
2126 
2127 	/* Iterators are static, they should be filled or empty */
2128 	if (trace_buffer_iter(iter, iter->cpu_file))
2129 		return 0;
2130 
2131 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2132 	pwait.iter = iter;
2133 
2134 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2135 			       wait_pipe_cond, &pwait);
2136 
2137 #ifdef CONFIG_TRACER_MAX_TRACE
2138 	/*
2139 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2140 	 * to happen, this would now be the main buffer.
2141 	 */
2142 	if (iter->snapshot)
2143 		iter->array_buffer = &iter->tr->max_buffer;
2144 #endif
2145 	return ret;
2146 }
2147 
2148 #ifdef CONFIG_FTRACE_STARTUP_TEST
2149 static bool selftests_can_run;
2150 
2151 struct trace_selftests {
2152 	struct list_head		list;
2153 	struct tracer			*type;
2154 };
2155 
2156 static LIST_HEAD(postponed_selftests);
2157 
save_selftest(struct tracer * type)2158 static int save_selftest(struct tracer *type)
2159 {
2160 	struct trace_selftests *selftest;
2161 
2162 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2163 	if (!selftest)
2164 		return -ENOMEM;
2165 
2166 	selftest->type = type;
2167 	list_add(&selftest->list, &postponed_selftests);
2168 	return 0;
2169 }
2170 
run_tracer_selftest(struct tracer * type)2171 static int run_tracer_selftest(struct tracer *type)
2172 {
2173 	struct trace_array *tr = &global_trace;
2174 	struct tracer_flags *saved_flags = tr->current_trace_flags;
2175 	struct tracer *saved_tracer = tr->current_trace;
2176 	int ret;
2177 
2178 	if (!type->selftest || tracing_selftest_disabled)
2179 		return 0;
2180 
2181 	/*
2182 	 * If a tracer registers early in boot up (before scheduling is
2183 	 * initialized and such), then do not run its selftests yet.
2184 	 * Instead, run it a little later in the boot process.
2185 	 */
2186 	if (!selftests_can_run)
2187 		return save_selftest(type);
2188 
2189 	if (!tracing_is_on()) {
2190 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2191 			type->name);
2192 		return 0;
2193 	}
2194 
2195 	/*
2196 	 * Run a selftest on this tracer.
2197 	 * Here we reset the trace buffer, and set the current
2198 	 * tracer to be this tracer. The tracer can then run some
2199 	 * internal tracing to verify that everything is in order.
2200 	 * If we fail, we do not register this tracer.
2201 	 */
2202 	tracing_reset_online_cpus(&tr->array_buffer);
2203 
2204 	tr->current_trace = type;
2205 	tr->current_trace_flags = type->flags ? : type->default_flags;
2206 
2207 #ifdef CONFIG_TRACER_MAX_TRACE
2208 	if (type->use_max_tr) {
2209 		/* If we expanded the buffers, make sure the max is expanded too */
2210 		if (tr->ring_buffer_expanded)
2211 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2212 					   RING_BUFFER_ALL_CPUS);
2213 		tr->allocated_snapshot = true;
2214 	}
2215 #endif
2216 
2217 	/* the test is responsible for initializing and enabling */
2218 	pr_info("Testing tracer %s: ", type->name);
2219 	ret = type->selftest(type, tr);
2220 	/* the test is responsible for resetting too */
2221 	tr->current_trace = saved_tracer;
2222 	tr->current_trace_flags = saved_flags;
2223 	if (ret) {
2224 		printk(KERN_CONT "FAILED!\n");
2225 		/* Add the warning after printing 'FAILED' */
2226 		WARN_ON(1);
2227 		return -1;
2228 	}
2229 	/* Only reset on passing, to avoid touching corrupted buffers */
2230 	tracing_reset_online_cpus(&tr->array_buffer);
2231 
2232 #ifdef CONFIG_TRACER_MAX_TRACE
2233 	if (type->use_max_tr) {
2234 		tr->allocated_snapshot = false;
2235 
2236 		/* Shrink the max buffer again */
2237 		if (tr->ring_buffer_expanded)
2238 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2239 					   RING_BUFFER_ALL_CPUS);
2240 	}
2241 #endif
2242 
2243 	printk(KERN_CONT "PASSED\n");
2244 	return 0;
2245 }
2246 
do_run_tracer_selftest(struct tracer * type)2247 static int do_run_tracer_selftest(struct tracer *type)
2248 {
2249 	int ret;
2250 
2251 	/*
2252 	 * Tests can take a long time, especially if they are run one after the
2253 	 * other, as does happen during bootup when all the tracers are
2254 	 * registered. This could cause the soft lockup watchdog to trigger.
2255 	 */
2256 	cond_resched();
2257 
2258 	tracing_selftest_running = true;
2259 	ret = run_tracer_selftest(type);
2260 	tracing_selftest_running = false;
2261 
2262 	return ret;
2263 }
2264 
init_trace_selftests(void)2265 static __init int init_trace_selftests(void)
2266 {
2267 	struct trace_selftests *p, *n;
2268 	struct tracer *t, **last;
2269 	int ret;
2270 
2271 	selftests_can_run = true;
2272 
2273 	guard(mutex)(&trace_types_lock);
2274 
2275 	if (list_empty(&postponed_selftests))
2276 		return 0;
2277 
2278 	pr_info("Running postponed tracer tests:\n");
2279 
2280 	tracing_selftest_running = true;
2281 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2282 		/* This loop can take minutes when sanitizers are enabled, so
2283 		 * lets make sure we allow RCU processing.
2284 		 */
2285 		cond_resched();
2286 		ret = run_tracer_selftest(p->type);
2287 		/* If the test fails, then warn and remove from available_tracers */
2288 		if (ret < 0) {
2289 			WARN(1, "tracer: %s failed selftest, disabling\n",
2290 			     p->type->name);
2291 			last = &trace_types;
2292 			for (t = trace_types; t; t = t->next) {
2293 				if (t == p->type) {
2294 					*last = t->next;
2295 					break;
2296 				}
2297 				last = &t->next;
2298 			}
2299 		}
2300 		list_del(&p->list);
2301 		kfree(p);
2302 	}
2303 	tracing_selftest_running = false;
2304 
2305 	return 0;
2306 }
2307 core_initcall(init_trace_selftests);
2308 #else
do_run_tracer_selftest(struct tracer * type)2309 static inline int do_run_tracer_selftest(struct tracer *type)
2310 {
2311 	return 0;
2312 }
2313 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2314 
2315 static int add_tracer(struct trace_array *tr, struct tracer *t);
2316 
2317 static void __init apply_trace_boot_options(void);
2318 
free_tracers(struct trace_array * tr)2319 static void free_tracers(struct trace_array *tr)
2320 {
2321 	struct tracers *t, *n;
2322 
2323 	lockdep_assert_held(&trace_types_lock);
2324 
2325 	list_for_each_entry_safe(t, n, &tr->tracers, list) {
2326 		list_del(&t->list);
2327 		kfree(t->flags);
2328 		kfree(t);
2329 	}
2330 }
2331 
2332 /**
2333  * register_tracer - register a tracer with the ftrace system.
2334  * @type: the plugin for the tracer
2335  *
2336  * Register a new plugin tracer.
2337  */
register_tracer(struct tracer * type)2338 int __init register_tracer(struct tracer *type)
2339 {
2340 	struct trace_array *tr;
2341 	struct tracer *t;
2342 	int ret = 0;
2343 
2344 	if (!type->name) {
2345 		pr_info("Tracer must have a name\n");
2346 		return -1;
2347 	}
2348 
2349 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2350 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2351 		return -1;
2352 	}
2353 
2354 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2355 		pr_warn("Can not register tracer %s due to lockdown\n",
2356 			   type->name);
2357 		return -EPERM;
2358 	}
2359 
2360 	mutex_lock(&trace_types_lock);
2361 
2362 	for (t = trace_types; t; t = t->next) {
2363 		if (strcmp(type->name, t->name) == 0) {
2364 			/* already found */
2365 			pr_info("Tracer %s already registered\n",
2366 				type->name);
2367 			ret = -1;
2368 			goto out;
2369 		}
2370 	}
2371 
2372 	/* store the tracer for __set_tracer_option */
2373 	if (type->flags)
2374 		type->flags->trace = type;
2375 
2376 	ret = do_run_tracer_selftest(type);
2377 	if (ret < 0)
2378 		goto out;
2379 
2380 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2381 		ret = add_tracer(tr, type);
2382 		if (ret < 0) {
2383 			/* The tracer will still exist but without options */
2384 			pr_warn("Failed to create tracer options for %s\n", type->name);
2385 			break;
2386 		}
2387 	}
2388 
2389 	type->next = trace_types;
2390 	trace_types = type;
2391 
2392  out:
2393 	mutex_unlock(&trace_types_lock);
2394 
2395 	if (ret || !default_bootup_tracer)
2396 		return ret;
2397 
2398 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2399 		return 0;
2400 
2401 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2402 	/* Do we want this tracer to start on bootup? */
2403 	WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
2404 	default_bootup_tracer = NULL;
2405 
2406 	apply_trace_boot_options();
2407 
2408 	/* disable other selftests, since this will break it. */
2409 	disable_tracing_selftest("running a tracer");
2410 
2411 	return 0;
2412 }
2413 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2414 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2415 {
2416 	struct trace_buffer *buffer = buf->buffer;
2417 
2418 	if (!buffer)
2419 		return;
2420 
2421 	ring_buffer_record_disable(buffer);
2422 
2423 	/* Make sure all commits have finished */
2424 	synchronize_rcu();
2425 	ring_buffer_reset_cpu(buffer, cpu);
2426 
2427 	ring_buffer_record_enable(buffer);
2428 }
2429 
tracing_reset_online_cpus(struct array_buffer * buf)2430 void tracing_reset_online_cpus(struct array_buffer *buf)
2431 {
2432 	struct trace_buffer *buffer = buf->buffer;
2433 
2434 	if (!buffer)
2435 		return;
2436 
2437 	ring_buffer_record_disable(buffer);
2438 
2439 	/* Make sure all commits have finished */
2440 	synchronize_rcu();
2441 
2442 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2443 
2444 	ring_buffer_reset_online_cpus(buffer);
2445 
2446 	ring_buffer_record_enable(buffer);
2447 }
2448 
tracing_reset_all_cpus(struct array_buffer * buf)2449 static void tracing_reset_all_cpus(struct array_buffer *buf)
2450 {
2451 	struct trace_buffer *buffer = buf->buffer;
2452 
2453 	if (!buffer)
2454 		return;
2455 
2456 	ring_buffer_record_disable(buffer);
2457 
2458 	/* Make sure all commits have finished */
2459 	synchronize_rcu();
2460 
2461 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2462 
2463 	ring_buffer_reset(buffer);
2464 
2465 	ring_buffer_record_enable(buffer);
2466 }
2467 
2468 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2469 void tracing_reset_all_online_cpus_unlocked(void)
2470 {
2471 	struct trace_array *tr;
2472 
2473 	lockdep_assert_held(&trace_types_lock);
2474 
2475 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2476 		if (!tr->clear_trace)
2477 			continue;
2478 		tr->clear_trace = false;
2479 		tracing_reset_online_cpus(&tr->array_buffer);
2480 #ifdef CONFIG_TRACER_MAX_TRACE
2481 		tracing_reset_online_cpus(&tr->max_buffer);
2482 #endif
2483 	}
2484 }
2485 
tracing_reset_all_online_cpus(void)2486 void tracing_reset_all_online_cpus(void)
2487 {
2488 	guard(mutex)(&trace_types_lock);
2489 	tracing_reset_all_online_cpus_unlocked();
2490 }
2491 
is_tracing_stopped(void)2492 int is_tracing_stopped(void)
2493 {
2494 	return global_trace.stop_count;
2495 }
2496 
tracing_start_tr(struct trace_array * tr)2497 static void tracing_start_tr(struct trace_array *tr)
2498 {
2499 	struct trace_buffer *buffer;
2500 
2501 	if (tracing_disabled)
2502 		return;
2503 
2504 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2505 	if (--tr->stop_count) {
2506 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2507 			/* Someone screwed up their debugging */
2508 			tr->stop_count = 0;
2509 		}
2510 		return;
2511 	}
2512 
2513 	/* Prevent the buffers from switching */
2514 	arch_spin_lock(&tr->max_lock);
2515 
2516 	buffer = tr->array_buffer.buffer;
2517 	if (buffer)
2518 		ring_buffer_record_enable(buffer);
2519 
2520 #ifdef CONFIG_TRACER_MAX_TRACE
2521 	buffer = tr->max_buffer.buffer;
2522 	if (buffer)
2523 		ring_buffer_record_enable(buffer);
2524 #endif
2525 
2526 	arch_spin_unlock(&tr->max_lock);
2527 }
2528 
2529 /**
2530  * tracing_start - quick start of the tracer
2531  *
2532  * If tracing is enabled but was stopped by tracing_stop,
2533  * this will start the tracer back up.
2534  */
tracing_start(void)2535 void tracing_start(void)
2536 
2537 {
2538 	return tracing_start_tr(&global_trace);
2539 }
2540 
tracing_stop_tr(struct trace_array * tr)2541 static void tracing_stop_tr(struct trace_array *tr)
2542 {
2543 	struct trace_buffer *buffer;
2544 
2545 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2546 	if (tr->stop_count++)
2547 		return;
2548 
2549 	/* Prevent the buffers from switching */
2550 	arch_spin_lock(&tr->max_lock);
2551 
2552 	buffer = tr->array_buffer.buffer;
2553 	if (buffer)
2554 		ring_buffer_record_disable(buffer);
2555 
2556 #ifdef CONFIG_TRACER_MAX_TRACE
2557 	buffer = tr->max_buffer.buffer;
2558 	if (buffer)
2559 		ring_buffer_record_disable(buffer);
2560 #endif
2561 
2562 	arch_spin_unlock(&tr->max_lock);
2563 }
2564 
2565 /**
2566  * tracing_stop - quick stop of the tracer
2567  *
2568  * Light weight way to stop tracing. Use in conjunction with
2569  * tracing_start.
2570  */
tracing_stop(void)2571 void tracing_stop(void)
2572 {
2573 	return tracing_stop_tr(&global_trace);
2574 }
2575 
2576 /*
2577  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2578  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2579  * simplifies those functions and keeps them in sync.
2580  */
trace_handle_return(struct trace_seq * s)2581 enum print_line_t trace_handle_return(struct trace_seq *s)
2582 {
2583 	return trace_seq_has_overflowed(s) ?
2584 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2585 }
2586 EXPORT_SYMBOL_GPL(trace_handle_return);
2587 
migration_disable_value(void)2588 static unsigned short migration_disable_value(void)
2589 {
2590 #if defined(CONFIG_SMP)
2591 	return current->migration_disabled;
2592 #else
2593 	return 0;
2594 #endif
2595 }
2596 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2597 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2598 {
2599 	unsigned int trace_flags = irqs_status;
2600 	unsigned int pc;
2601 
2602 	pc = preempt_count();
2603 
2604 	if (pc & NMI_MASK)
2605 		trace_flags |= TRACE_FLAG_NMI;
2606 	if (pc & HARDIRQ_MASK)
2607 		trace_flags |= TRACE_FLAG_HARDIRQ;
2608 	if (in_serving_softirq())
2609 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2610 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2611 		trace_flags |= TRACE_FLAG_BH_OFF;
2612 
2613 	if (tif_need_resched())
2614 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2615 	if (test_preempt_need_resched())
2616 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2617 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2618 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2619 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2620 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2621 }
2622 
2623 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2624 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2625 			  int type,
2626 			  unsigned long len,
2627 			  unsigned int trace_ctx)
2628 {
2629 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2630 }
2631 
2632 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2633 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2634 static int trace_buffered_event_ref;
2635 
2636 /**
2637  * trace_buffered_event_enable - enable buffering events
2638  *
2639  * When events are being filtered, it is quicker to use a temporary
2640  * buffer to write the event data into if there's a likely chance
2641  * that it will not be committed. The discard of the ring buffer
2642  * is not as fast as committing, and is much slower than copying
2643  * a commit.
2644  *
2645  * When an event is to be filtered, allocate per cpu buffers to
2646  * write the event data into, and if the event is filtered and discarded
2647  * it is simply dropped, otherwise, the entire data is to be committed
2648  * in one shot.
2649  */
trace_buffered_event_enable(void)2650 void trace_buffered_event_enable(void)
2651 {
2652 	struct ring_buffer_event *event;
2653 	struct page *page;
2654 	int cpu;
2655 
2656 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2657 
2658 	if (trace_buffered_event_ref++)
2659 		return;
2660 
2661 	for_each_tracing_cpu(cpu) {
2662 		page = alloc_pages_node(cpu_to_node(cpu),
2663 					GFP_KERNEL | __GFP_NORETRY, 0);
2664 		/* This is just an optimization and can handle failures */
2665 		if (!page) {
2666 			pr_err("Failed to allocate event buffer\n");
2667 			break;
2668 		}
2669 
2670 		event = page_address(page);
2671 		memset(event, 0, sizeof(*event));
2672 
2673 		per_cpu(trace_buffered_event, cpu) = event;
2674 
2675 		scoped_guard(preempt,) {
2676 			if (cpu == smp_processor_id() &&
2677 			    __this_cpu_read(trace_buffered_event) !=
2678 			    per_cpu(trace_buffered_event, cpu))
2679 				WARN_ON_ONCE(1);
2680 		}
2681 	}
2682 }
2683 
enable_trace_buffered_event(void * data)2684 static void enable_trace_buffered_event(void *data)
2685 {
2686 	this_cpu_dec(trace_buffered_event_cnt);
2687 }
2688 
disable_trace_buffered_event(void * data)2689 static void disable_trace_buffered_event(void *data)
2690 {
2691 	this_cpu_inc(trace_buffered_event_cnt);
2692 }
2693 
2694 /**
2695  * trace_buffered_event_disable - disable buffering events
2696  *
2697  * When a filter is removed, it is faster to not use the buffered
2698  * events, and to commit directly into the ring buffer. Free up
2699  * the temp buffers when there are no more users. This requires
2700  * special synchronization with current events.
2701  */
trace_buffered_event_disable(void)2702 void trace_buffered_event_disable(void)
2703 {
2704 	int cpu;
2705 
2706 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2707 
2708 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2709 		return;
2710 
2711 	if (--trace_buffered_event_ref)
2712 		return;
2713 
2714 	/* For each CPU, set the buffer as used. */
2715 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2716 			 NULL, true);
2717 
2718 	/* Wait for all current users to finish */
2719 	synchronize_rcu();
2720 
2721 	for_each_tracing_cpu(cpu) {
2722 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2723 		per_cpu(trace_buffered_event, cpu) = NULL;
2724 	}
2725 
2726 	/*
2727 	 * Wait for all CPUs that potentially started checking if they can use
2728 	 * their event buffer only after the previous synchronize_rcu() call and
2729 	 * they still read a valid pointer from trace_buffered_event. It must be
2730 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2731 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2732 	 */
2733 	synchronize_rcu();
2734 
2735 	/* For each CPU, relinquish the buffer */
2736 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2737 			 true);
2738 }
2739 
2740 static struct trace_buffer *temp_buffer;
2741 
2742 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2743 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2744 			  struct trace_event_file *trace_file,
2745 			  int type, unsigned long len,
2746 			  unsigned int trace_ctx)
2747 {
2748 	struct ring_buffer_event *entry;
2749 	struct trace_array *tr = trace_file->tr;
2750 	int val;
2751 
2752 	*current_rb = tr->array_buffer.buffer;
2753 
2754 	if (!tr->no_filter_buffering_ref &&
2755 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2756 		preempt_disable_notrace();
2757 		/*
2758 		 * Filtering is on, so try to use the per cpu buffer first.
2759 		 * This buffer will simulate a ring_buffer_event,
2760 		 * where the type_len is zero and the array[0] will
2761 		 * hold the full length.
2762 		 * (see include/linux/ring-buffer.h for details on
2763 		 *  how the ring_buffer_event is structured).
2764 		 *
2765 		 * Using a temp buffer during filtering and copying it
2766 		 * on a matched filter is quicker than writing directly
2767 		 * into the ring buffer and then discarding it when
2768 		 * it doesn't match. That is because the discard
2769 		 * requires several atomic operations to get right.
2770 		 * Copying on match and doing nothing on a failed match
2771 		 * is still quicker than no copy on match, but having
2772 		 * to discard out of the ring buffer on a failed match.
2773 		 */
2774 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2775 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2776 
2777 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2778 
2779 			/*
2780 			 * Preemption is disabled, but interrupts and NMIs
2781 			 * can still come in now. If that happens after
2782 			 * the above increment, then it will have to go
2783 			 * back to the old method of allocating the event
2784 			 * on the ring buffer, and if the filter fails, it
2785 			 * will have to call ring_buffer_discard_commit()
2786 			 * to remove it.
2787 			 *
2788 			 * Need to also check the unlikely case that the
2789 			 * length is bigger than the temp buffer size.
2790 			 * If that happens, then the reserve is pretty much
2791 			 * guaranteed to fail, as the ring buffer currently
2792 			 * only allows events less than a page. But that may
2793 			 * change in the future, so let the ring buffer reserve
2794 			 * handle the failure in that case.
2795 			 */
2796 			if (val == 1 && likely(len <= max_len)) {
2797 				trace_event_setup(entry, type, trace_ctx);
2798 				entry->array[0] = len;
2799 				/* Return with preemption disabled */
2800 				return entry;
2801 			}
2802 			this_cpu_dec(trace_buffered_event_cnt);
2803 		}
2804 		/* __trace_buffer_lock_reserve() disables preemption */
2805 		preempt_enable_notrace();
2806 	}
2807 
2808 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2809 					    trace_ctx);
2810 	/*
2811 	 * If tracing is off, but we have triggers enabled
2812 	 * we still need to look at the event data. Use the temp_buffer
2813 	 * to store the trace event for the trigger to use. It's recursive
2814 	 * safe and will not be recorded anywhere.
2815 	 */
2816 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2817 		*current_rb = temp_buffer;
2818 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2819 						    trace_ctx);
2820 	}
2821 	return entry;
2822 }
2823 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2824 
2825 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2826 static DEFINE_MUTEX(tracepoint_printk_mutex);
2827 
output_printk(struct trace_event_buffer * fbuffer)2828 static void output_printk(struct trace_event_buffer *fbuffer)
2829 {
2830 	struct trace_event_call *event_call;
2831 	struct trace_event_file *file;
2832 	struct trace_event *event;
2833 	unsigned long flags;
2834 	struct trace_iterator *iter = tracepoint_print_iter;
2835 
2836 	/* We should never get here if iter is NULL */
2837 	if (WARN_ON_ONCE(!iter))
2838 		return;
2839 
2840 	event_call = fbuffer->trace_file->event_call;
2841 	if (!event_call || !event_call->event.funcs ||
2842 	    !event_call->event.funcs->trace)
2843 		return;
2844 
2845 	file = fbuffer->trace_file;
2846 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2847 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2848 	     !filter_match_preds(file->filter, fbuffer->entry)))
2849 		return;
2850 
2851 	event = &fbuffer->trace_file->event_call->event;
2852 
2853 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2854 	trace_seq_init(&iter->seq);
2855 	iter->ent = fbuffer->entry;
2856 	event_call->event.funcs->trace(iter, 0, event);
2857 	trace_seq_putc(&iter->seq, 0);
2858 	printk("%s", iter->seq.buffer);
2859 
2860 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2861 }
2862 
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2863 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2864 			     void *buffer, size_t *lenp,
2865 			     loff_t *ppos)
2866 {
2867 	int save_tracepoint_printk;
2868 	int ret;
2869 
2870 	guard(mutex)(&tracepoint_printk_mutex);
2871 	save_tracepoint_printk = tracepoint_printk;
2872 
2873 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2874 
2875 	/*
2876 	 * This will force exiting early, as tracepoint_printk
2877 	 * is always zero when tracepoint_printk_iter is not allocated
2878 	 */
2879 	if (!tracepoint_print_iter)
2880 		tracepoint_printk = 0;
2881 
2882 	if (save_tracepoint_printk == tracepoint_printk)
2883 		return ret;
2884 
2885 	if (tracepoint_printk)
2886 		static_key_enable(&tracepoint_printk_key.key);
2887 	else
2888 		static_key_disable(&tracepoint_printk_key.key);
2889 
2890 	return ret;
2891 }
2892 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2893 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2894 {
2895 	enum event_trigger_type tt = ETT_NONE;
2896 	struct trace_event_file *file = fbuffer->trace_file;
2897 
2898 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2899 			fbuffer->entry, &tt))
2900 		goto discard;
2901 
2902 	if (static_key_false(&tracepoint_printk_key.key))
2903 		output_printk(fbuffer);
2904 
2905 	if (static_branch_unlikely(&trace_event_exports_enabled))
2906 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2907 
2908 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2909 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2910 
2911 discard:
2912 	if (tt)
2913 		event_triggers_post_call(file, tt);
2914 
2915 }
2916 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2917 
2918 /*
2919  * Skip 3:
2920  *
2921  *   trace_buffer_unlock_commit_regs()
2922  *   trace_event_buffer_commit()
2923  *   trace_event_raw_event_xxx()
2924  */
2925 # define STACK_SKIP 3
2926 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2927 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2928 				     struct trace_buffer *buffer,
2929 				     struct ring_buffer_event *event,
2930 				     unsigned int trace_ctx,
2931 				     struct pt_regs *regs)
2932 {
2933 	__buffer_unlock_commit(buffer, event);
2934 
2935 	/*
2936 	 * If regs is not set, then skip the necessary functions.
2937 	 * Note, we can still get here via blktrace, wakeup tracer
2938 	 * and mmiotrace, but that's ok if they lose a function or
2939 	 * two. They are not that meaningful.
2940 	 */
2941 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2942 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2943 }
2944 
2945 /*
2946  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2947  */
2948 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2949 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2950 				   struct ring_buffer_event *event)
2951 {
2952 	__buffer_unlock_commit(buffer, event);
2953 }
2954 
2955 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2956 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2957 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2958 {
2959 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2960 	struct ring_buffer_event *event;
2961 	struct ftrace_entry *entry;
2962 	int size = sizeof(*entry);
2963 
2964 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2965 
2966 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2967 					    trace_ctx);
2968 	if (!event)
2969 		return;
2970 	entry	= ring_buffer_event_data(event);
2971 	entry->ip			= ip;
2972 	entry->parent_ip		= parent_ip;
2973 
2974 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2975 	if (fregs) {
2976 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2977 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2978 	}
2979 #endif
2980 
2981 	if (static_branch_unlikely(&trace_function_exports_enabled))
2982 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2983 	__buffer_unlock_commit(buffer, event);
2984 }
2985 
2986 #ifdef CONFIG_STACKTRACE
2987 
2988 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2989 #define FTRACE_KSTACK_NESTING	4
2990 
2991 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2992 
2993 struct ftrace_stack {
2994 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2995 };
2996 
2997 
2998 struct ftrace_stacks {
2999 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3000 };
3001 
3002 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3003 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3004 
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3005 static void __ftrace_trace_stack(struct trace_array *tr,
3006 				 struct trace_buffer *buffer,
3007 				 unsigned int trace_ctx,
3008 				 int skip, struct pt_regs *regs)
3009 {
3010 	struct ring_buffer_event *event;
3011 	unsigned int size, nr_entries;
3012 	struct ftrace_stack *fstack;
3013 	struct stack_entry *entry;
3014 	int stackidx;
3015 
3016 	/*
3017 	 * Add one, for this function and the call to save_stack_trace()
3018 	 * If regs is set, then these functions will not be in the way.
3019 	 */
3020 #ifndef CONFIG_UNWINDER_ORC
3021 	if (!regs)
3022 		skip++;
3023 #endif
3024 
3025 	guard(preempt_notrace)();
3026 
3027 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3028 
3029 	/* This should never happen. If it does, yell once and skip */
3030 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3031 		goto out;
3032 
3033 	/*
3034 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3035 	 * interrupt will either see the value pre increment or post
3036 	 * increment. If the interrupt happens pre increment it will have
3037 	 * restored the counter when it returns.  We just need a barrier to
3038 	 * keep gcc from moving things around.
3039 	 */
3040 	barrier();
3041 
3042 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3043 	size = ARRAY_SIZE(fstack->calls);
3044 
3045 	if (regs) {
3046 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3047 						   size, skip);
3048 	} else {
3049 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3050 	}
3051 
3052 #ifdef CONFIG_DYNAMIC_FTRACE
3053 	/* Mark entry of stack trace as trampoline code */
3054 	if (tr->ops && tr->ops->trampoline) {
3055 		unsigned long tramp_start = tr->ops->trampoline;
3056 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3057 		unsigned long *calls = fstack->calls;
3058 
3059 		for (int i = 0; i < nr_entries; i++) {
3060 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
3061 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
3062 		}
3063 	}
3064 #endif
3065 
3066 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3067 				    struct_size(entry, caller, nr_entries),
3068 				    trace_ctx);
3069 	if (!event)
3070 		goto out;
3071 	entry = ring_buffer_event_data(event);
3072 
3073 	entry->size = nr_entries;
3074 	memcpy(&entry->caller, fstack->calls,
3075 	       flex_array_size(entry, caller, nr_entries));
3076 
3077 	__buffer_unlock_commit(buffer, event);
3078 
3079  out:
3080 	/* Again, don't let gcc optimize things here */
3081 	barrier();
3082 	__this_cpu_dec(ftrace_stack_reserve);
3083 }
3084 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3085 static inline void ftrace_trace_stack(struct trace_array *tr,
3086 				      struct trace_buffer *buffer,
3087 				      unsigned int trace_ctx,
3088 				      int skip, struct pt_regs *regs)
3089 {
3090 	if (!(tr->trace_flags & TRACE_ITER(STACKTRACE)))
3091 		return;
3092 
3093 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3094 }
3095 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3096 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3097 		   int skip)
3098 {
3099 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3100 
3101 	if (rcu_is_watching()) {
3102 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3103 		return;
3104 	}
3105 
3106 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3107 		return;
3108 
3109 	/*
3110 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3111 	 * but if the above rcu_is_watching() failed, then the NMI
3112 	 * triggered someplace critical, and ct_irq_enter() should
3113 	 * not be called from NMI.
3114 	 */
3115 	if (unlikely(in_nmi()))
3116 		return;
3117 
3118 	ct_irq_enter_irqson();
3119 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3120 	ct_irq_exit_irqson();
3121 }
3122 
3123 /**
3124  * trace_dump_stack - record a stack back trace in the trace buffer
3125  * @skip: Number of functions to skip (helper handlers)
3126  */
trace_dump_stack(int skip)3127 void trace_dump_stack(int skip)
3128 {
3129 	if (tracing_disabled || tracing_selftest_running)
3130 		return;
3131 
3132 #ifndef CONFIG_UNWINDER_ORC
3133 	/* Skip 1 to skip this function. */
3134 	skip++;
3135 #endif
3136 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3137 				tracing_gen_ctx(), skip, NULL);
3138 }
3139 EXPORT_SYMBOL_GPL(trace_dump_stack);
3140 
3141 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3142 static DEFINE_PER_CPU(int, user_stack_count);
3143 
3144 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3145 ftrace_trace_userstack(struct trace_array *tr,
3146 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3147 {
3148 	struct ring_buffer_event *event;
3149 	struct userstack_entry *entry;
3150 
3151 	if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
3152 		return;
3153 
3154 	/*
3155 	 * NMIs can not handle page faults, even with fix ups.
3156 	 * The save user stack can (and often does) fault.
3157 	 */
3158 	if (unlikely(in_nmi()))
3159 		return;
3160 
3161 	/*
3162 	 * prevent recursion, since the user stack tracing may
3163 	 * trigger other kernel events.
3164 	 */
3165 	guard(preempt)();
3166 	if (__this_cpu_read(user_stack_count))
3167 		return;
3168 
3169 	__this_cpu_inc(user_stack_count);
3170 
3171 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3172 					    sizeof(*entry), trace_ctx);
3173 	if (!event)
3174 		goto out_drop_count;
3175 	entry	= ring_buffer_event_data(event);
3176 
3177 	entry->tgid		= current->tgid;
3178 	memset(&entry->caller, 0, sizeof(entry->caller));
3179 
3180 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3181 	__buffer_unlock_commit(buffer, event);
3182 
3183  out_drop_count:
3184 	__this_cpu_dec(user_stack_count);
3185 }
3186 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3187 static void ftrace_trace_userstack(struct trace_array *tr,
3188 				   struct trace_buffer *buffer,
3189 				   unsigned int trace_ctx)
3190 {
3191 }
3192 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3193 
3194 #endif /* CONFIG_STACKTRACE */
3195 
3196 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3197 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3198 			  unsigned long long delta)
3199 {
3200 	entry->bottom_delta_ts = delta & U32_MAX;
3201 	entry->top_delta_ts = (delta >> 32);
3202 }
3203 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3204 void trace_last_func_repeats(struct trace_array *tr,
3205 			     struct trace_func_repeats *last_info,
3206 			     unsigned int trace_ctx)
3207 {
3208 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3209 	struct func_repeats_entry *entry;
3210 	struct ring_buffer_event *event;
3211 	u64 delta;
3212 
3213 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3214 					    sizeof(*entry), trace_ctx);
3215 	if (!event)
3216 		return;
3217 
3218 	delta = ring_buffer_event_time_stamp(buffer, event) -
3219 		last_info->ts_last_call;
3220 
3221 	entry = ring_buffer_event_data(event);
3222 	entry->ip = last_info->ip;
3223 	entry->parent_ip = last_info->parent_ip;
3224 	entry->count = last_info->count;
3225 	func_repeats_set_delta_ts(entry, delta);
3226 
3227 	__buffer_unlock_commit(buffer, event);
3228 }
3229 
3230 /* created for use with alloc_percpu */
3231 struct trace_buffer_struct {
3232 	int nesting;
3233 	char buffer[4][TRACE_BUF_SIZE];
3234 };
3235 
3236 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3237 
3238 /*
3239  * This allows for lockless recording.  If we're nested too deeply, then
3240  * this returns NULL.
3241  */
get_trace_buf(void)3242 static char *get_trace_buf(void)
3243 {
3244 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3245 
3246 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3247 		return NULL;
3248 
3249 	buffer->nesting++;
3250 
3251 	/* Interrupts must see nesting incremented before we use the buffer */
3252 	barrier();
3253 	return &buffer->buffer[buffer->nesting - 1][0];
3254 }
3255 
put_trace_buf(void)3256 static void put_trace_buf(void)
3257 {
3258 	/* Don't let the decrement of nesting leak before this */
3259 	barrier();
3260 	this_cpu_dec(trace_percpu_buffer->nesting);
3261 }
3262 
alloc_percpu_trace_buffer(void)3263 static int alloc_percpu_trace_buffer(void)
3264 {
3265 	struct trace_buffer_struct __percpu *buffers;
3266 
3267 	if (trace_percpu_buffer)
3268 		return 0;
3269 
3270 	buffers = alloc_percpu(struct trace_buffer_struct);
3271 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3272 		return -ENOMEM;
3273 
3274 	trace_percpu_buffer = buffers;
3275 	return 0;
3276 }
3277 
3278 static int buffers_allocated;
3279 
trace_printk_init_buffers(void)3280 void trace_printk_init_buffers(void)
3281 {
3282 	if (buffers_allocated)
3283 		return;
3284 
3285 	if (alloc_percpu_trace_buffer())
3286 		return;
3287 
3288 	/* trace_printk() is for debug use only. Don't use it in production. */
3289 
3290 	pr_warn("\n");
3291 	pr_warn("**********************************************************\n");
3292 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3293 	pr_warn("**                                                      **\n");
3294 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3295 	pr_warn("**                                                      **\n");
3296 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3297 	pr_warn("** unsafe for production use.                           **\n");
3298 	pr_warn("**                                                      **\n");
3299 	pr_warn("** If you see this message and you are not debugging    **\n");
3300 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3301 	pr_warn("**                                                      **\n");
3302 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3303 	pr_warn("**********************************************************\n");
3304 
3305 	/* Expand the buffers to set size */
3306 	tracing_update_buffers(&global_trace);
3307 
3308 	buffers_allocated = 1;
3309 
3310 	/*
3311 	 * trace_printk_init_buffers() can be called by modules.
3312 	 * If that happens, then we need to start cmdline recording
3313 	 * directly here. If the global_trace.buffer is already
3314 	 * allocated here, then this was called by module code.
3315 	 */
3316 	if (global_trace.array_buffer.buffer)
3317 		tracing_start_cmdline_record();
3318 }
3319 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3320 
trace_printk_start_comm(void)3321 void trace_printk_start_comm(void)
3322 {
3323 	/* Start tracing comms if trace printk is set */
3324 	if (!buffers_allocated)
3325 		return;
3326 	tracing_start_cmdline_record();
3327 }
3328 
trace_printk_start_stop_comm(int enabled)3329 static void trace_printk_start_stop_comm(int enabled)
3330 {
3331 	if (!buffers_allocated)
3332 		return;
3333 
3334 	if (enabled)
3335 		tracing_start_cmdline_record();
3336 	else
3337 		tracing_stop_cmdline_record();
3338 }
3339 
3340 /**
3341  * trace_vbprintk - write binary msg to tracing buffer
3342  * @ip:    The address of the caller
3343  * @fmt:   The string format to write to the buffer
3344  * @args:  Arguments for @fmt
3345  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3346 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3347 {
3348 	struct ring_buffer_event *event;
3349 	struct trace_buffer *buffer;
3350 	struct trace_array *tr = READ_ONCE(printk_trace);
3351 	struct bprint_entry *entry;
3352 	unsigned int trace_ctx;
3353 	char *tbuffer;
3354 	int len = 0, size;
3355 
3356 	if (!printk_binsafe(tr))
3357 		return trace_vprintk(ip, fmt, args);
3358 
3359 	if (unlikely(tracing_selftest_running || tracing_disabled))
3360 		return 0;
3361 
3362 	/* Don't pollute graph traces with trace_vprintk internals */
3363 	pause_graph_tracing();
3364 
3365 	trace_ctx = tracing_gen_ctx();
3366 	guard(preempt_notrace)();
3367 
3368 	tbuffer = get_trace_buf();
3369 	if (!tbuffer) {
3370 		len = 0;
3371 		goto out_nobuffer;
3372 	}
3373 
3374 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3375 
3376 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3377 		goto out_put;
3378 
3379 	size = sizeof(*entry) + sizeof(u32) * len;
3380 	buffer = tr->array_buffer.buffer;
3381 	scoped_guard(ring_buffer_nest, buffer) {
3382 		event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3383 						    trace_ctx);
3384 		if (!event)
3385 			goto out_put;
3386 		entry = ring_buffer_event_data(event);
3387 		entry->ip			= ip;
3388 		entry->fmt			= fmt;
3389 
3390 		memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3391 		__buffer_unlock_commit(buffer, event);
3392 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3393 	}
3394 out_put:
3395 	put_trace_buf();
3396 
3397 out_nobuffer:
3398 	unpause_graph_tracing();
3399 
3400 	return len;
3401 }
3402 EXPORT_SYMBOL_GPL(trace_vbprintk);
3403 
3404 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3405 int __trace_array_vprintk(struct trace_buffer *buffer,
3406 			  unsigned long ip, const char *fmt, va_list args)
3407 {
3408 	struct ring_buffer_event *event;
3409 	int len = 0, size;
3410 	struct print_entry *entry;
3411 	unsigned int trace_ctx;
3412 	char *tbuffer;
3413 
3414 	if (tracing_disabled)
3415 		return 0;
3416 
3417 	/* Don't pollute graph traces with trace_vprintk internals */
3418 	pause_graph_tracing();
3419 
3420 	trace_ctx = tracing_gen_ctx();
3421 	guard(preempt_notrace)();
3422 
3423 
3424 	tbuffer = get_trace_buf();
3425 	if (!tbuffer) {
3426 		len = 0;
3427 		goto out_nobuffer;
3428 	}
3429 
3430 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3431 
3432 	size = sizeof(*entry) + len + 1;
3433 	scoped_guard(ring_buffer_nest, buffer) {
3434 		event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3435 						    trace_ctx);
3436 		if (!event)
3437 			goto out;
3438 		entry = ring_buffer_event_data(event);
3439 		entry->ip = ip;
3440 
3441 		memcpy(&entry->buf, tbuffer, len + 1);
3442 		__buffer_unlock_commit(buffer, event);
3443 		ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3444 	}
3445 out:
3446 	put_trace_buf();
3447 
3448 out_nobuffer:
3449 	unpause_graph_tracing();
3450 
3451 	return len;
3452 }
3453 
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3454 int trace_array_vprintk(struct trace_array *tr,
3455 			unsigned long ip, const char *fmt, va_list args)
3456 {
3457 	if (tracing_selftest_running && tr == &global_trace)
3458 		return 0;
3459 
3460 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3461 }
3462 
3463 /**
3464  * trace_array_printk - Print a message to a specific instance
3465  * @tr: The instance trace_array descriptor
3466  * @ip: The instruction pointer that this is called from.
3467  * @fmt: The format to print (printf format)
3468  *
3469  * If a subsystem sets up its own instance, they have the right to
3470  * printk strings into their tracing instance buffer using this
3471  * function. Note, this function will not write into the top level
3472  * buffer (use trace_printk() for that), as writing into the top level
3473  * buffer should only have events that can be individually disabled.
3474  * trace_printk() is only used for debugging a kernel, and should not
3475  * be ever incorporated in normal use.
3476  *
3477  * trace_array_printk() can be used, as it will not add noise to the
3478  * top level tracing buffer.
3479  *
3480  * Note, trace_array_init_printk() must be called on @tr before this
3481  * can be used.
3482  */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3483 int trace_array_printk(struct trace_array *tr,
3484 		       unsigned long ip, const char *fmt, ...)
3485 {
3486 	int ret;
3487 	va_list ap;
3488 
3489 	if (!tr)
3490 		return -ENOENT;
3491 
3492 	/* This is only allowed for created instances */
3493 	if (tr == &global_trace)
3494 		return 0;
3495 
3496 	if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
3497 		return 0;
3498 
3499 	va_start(ap, fmt);
3500 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3501 	va_end(ap);
3502 	return ret;
3503 }
3504 EXPORT_SYMBOL_GPL(trace_array_printk);
3505 
3506 /**
3507  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3508  * @tr: The trace array to initialize the buffers for
3509  *
3510  * As trace_array_printk() only writes into instances, they are OK to
3511  * have in the kernel (unlike trace_printk()). This needs to be called
3512  * before trace_array_printk() can be used on a trace_array.
3513  */
trace_array_init_printk(struct trace_array * tr)3514 int trace_array_init_printk(struct trace_array *tr)
3515 {
3516 	if (!tr)
3517 		return -ENOENT;
3518 
3519 	/* This is only allowed for created instances */
3520 	if (tr == &global_trace)
3521 		return -EINVAL;
3522 
3523 	return alloc_percpu_trace_buffer();
3524 }
3525 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3526 
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3527 int trace_array_printk_buf(struct trace_buffer *buffer,
3528 			   unsigned long ip, const char *fmt, ...)
3529 {
3530 	int ret;
3531 	va_list ap;
3532 
3533 	if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK)))
3534 		return 0;
3535 
3536 	va_start(ap, fmt);
3537 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3538 	va_end(ap);
3539 	return ret;
3540 }
3541 
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3542 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3543 {
3544 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3545 }
3546 EXPORT_SYMBOL_GPL(trace_vprintk);
3547 
trace_iterator_increment(struct trace_iterator * iter)3548 static void trace_iterator_increment(struct trace_iterator *iter)
3549 {
3550 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3551 
3552 	iter->idx++;
3553 	if (buf_iter)
3554 		ring_buffer_iter_advance(buf_iter);
3555 }
3556 
3557 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3558 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3559 		unsigned long *lost_events)
3560 {
3561 	struct ring_buffer_event *event;
3562 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3563 
3564 	if (buf_iter) {
3565 		event = ring_buffer_iter_peek(buf_iter, ts);
3566 		if (lost_events)
3567 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3568 				(unsigned long)-1 : 0;
3569 	} else {
3570 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3571 					 lost_events);
3572 	}
3573 
3574 	if (event) {
3575 		iter->ent_size = ring_buffer_event_length(event);
3576 		return ring_buffer_event_data(event);
3577 	}
3578 	iter->ent_size = 0;
3579 	return NULL;
3580 }
3581 
3582 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3583 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3584 		  unsigned long *missing_events, u64 *ent_ts)
3585 {
3586 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3587 	struct trace_entry *ent, *next = NULL;
3588 	unsigned long lost_events = 0, next_lost = 0;
3589 	int cpu_file = iter->cpu_file;
3590 	u64 next_ts = 0, ts;
3591 	int next_cpu = -1;
3592 	int next_size = 0;
3593 	int cpu;
3594 
3595 	/*
3596 	 * If we are in a per_cpu trace file, don't bother by iterating over
3597 	 * all cpu and peek directly.
3598 	 */
3599 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3600 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3601 			return NULL;
3602 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3603 		if (ent_cpu)
3604 			*ent_cpu = cpu_file;
3605 
3606 		return ent;
3607 	}
3608 
3609 	for_each_tracing_cpu(cpu) {
3610 
3611 		if (ring_buffer_empty_cpu(buffer, cpu))
3612 			continue;
3613 
3614 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3615 
3616 		/*
3617 		 * Pick the entry with the smallest timestamp:
3618 		 */
3619 		if (ent && (!next || ts < next_ts)) {
3620 			next = ent;
3621 			next_cpu = cpu;
3622 			next_ts = ts;
3623 			next_lost = lost_events;
3624 			next_size = iter->ent_size;
3625 		}
3626 	}
3627 
3628 	iter->ent_size = next_size;
3629 
3630 	if (ent_cpu)
3631 		*ent_cpu = next_cpu;
3632 
3633 	if (ent_ts)
3634 		*ent_ts = next_ts;
3635 
3636 	if (missing_events)
3637 		*missing_events = next_lost;
3638 
3639 	return next;
3640 }
3641 
3642 #define STATIC_FMT_BUF_SIZE	128
3643 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3644 
trace_iter_expand_format(struct trace_iterator * iter)3645 char *trace_iter_expand_format(struct trace_iterator *iter)
3646 {
3647 	char *tmp;
3648 
3649 	/*
3650 	 * iter->tr is NULL when used with tp_printk, which makes
3651 	 * this get called where it is not safe to call krealloc().
3652 	 */
3653 	if (!iter->tr || iter->fmt == static_fmt_buf)
3654 		return NULL;
3655 
3656 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3657 		       GFP_KERNEL);
3658 	if (tmp) {
3659 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3660 		iter->fmt = tmp;
3661 	}
3662 
3663 	return tmp;
3664 }
3665 
3666 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3667 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3668 {
3669 	unsigned long addr = (unsigned long)str;
3670 	struct trace_event *trace_event;
3671 	struct trace_event_call *event;
3672 
3673 	/* OK if part of the event data */
3674 	if ((addr >= (unsigned long)iter->ent) &&
3675 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3676 		return true;
3677 
3678 	/* OK if part of the temp seq buffer */
3679 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3680 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3681 		return true;
3682 
3683 	/* Core rodata can not be freed */
3684 	if (is_kernel_rodata(addr))
3685 		return true;
3686 
3687 	if (trace_is_tracepoint_string(str))
3688 		return true;
3689 
3690 	/*
3691 	 * Now this could be a module event, referencing core module
3692 	 * data, which is OK.
3693 	 */
3694 	if (!iter->ent)
3695 		return false;
3696 
3697 	trace_event = ftrace_find_event(iter->ent->type);
3698 	if (!trace_event)
3699 		return false;
3700 
3701 	event = container_of(trace_event, struct trace_event_call, event);
3702 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3703 		return false;
3704 
3705 	/* Would rather have rodata, but this will suffice */
3706 	if (within_module_core(addr, event->module))
3707 		return true;
3708 
3709 	return false;
3710 }
3711 
3712 /**
3713  * ignore_event - Check dereferenced fields while writing to the seq buffer
3714  * @iter: The iterator that holds the seq buffer and the event being printed
3715  *
3716  * At boot up, test_event_printk() will flag any event that dereferences
3717  * a string with "%s" that does exist in the ring buffer. It may still
3718  * be valid, as the string may point to a static string in the kernel
3719  * rodata that never gets freed. But if the string pointer is pointing
3720  * to something that was allocated, there's a chance that it can be freed
3721  * by the time the user reads the trace. This would cause a bad memory
3722  * access by the kernel and possibly crash the system.
3723  *
3724  * This function will check if the event has any fields flagged as needing
3725  * to be checked at runtime and perform those checks.
3726  *
3727  * If it is found that a field is unsafe, it will write into the @iter->seq
3728  * a message stating what was found to be unsafe.
3729  *
3730  * @return: true if the event is unsafe and should be ignored,
3731  *          false otherwise.
3732  */
ignore_event(struct trace_iterator * iter)3733 bool ignore_event(struct trace_iterator *iter)
3734 {
3735 	struct ftrace_event_field *field;
3736 	struct trace_event *trace_event;
3737 	struct trace_event_call *event;
3738 	struct list_head *head;
3739 	struct trace_seq *seq;
3740 	const void *ptr;
3741 
3742 	trace_event = ftrace_find_event(iter->ent->type);
3743 
3744 	seq = &iter->seq;
3745 
3746 	if (!trace_event) {
3747 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3748 		return true;
3749 	}
3750 
3751 	event = container_of(trace_event, struct trace_event_call, event);
3752 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3753 		return false;
3754 
3755 	head = trace_get_fields(event);
3756 	if (!head) {
3757 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3758 				 trace_event_name(event));
3759 		return true;
3760 	}
3761 
3762 	/* Offsets are from the iter->ent that points to the raw event */
3763 	ptr = iter->ent;
3764 
3765 	list_for_each_entry(field, head, link) {
3766 		const char *str;
3767 		bool good;
3768 
3769 		if (!field->needs_test)
3770 			continue;
3771 
3772 		str = *(const char **)(ptr + field->offset);
3773 
3774 		good = trace_safe_str(iter, str);
3775 
3776 		/*
3777 		 * If you hit this warning, it is likely that the
3778 		 * trace event in question used %s on a string that
3779 		 * was saved at the time of the event, but may not be
3780 		 * around when the trace is read. Use __string(),
3781 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3782 		 * instead. See samples/trace_events/trace-events-sample.h
3783 		 * for reference.
3784 		 */
3785 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3786 			      trace_event_name(event), field->name)) {
3787 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3788 					 trace_event_name(event), field->name);
3789 			return true;
3790 		}
3791 	}
3792 	return false;
3793 }
3794 
trace_event_format(struct trace_iterator * iter,const char * fmt)3795 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3796 {
3797 	const char *p, *new_fmt;
3798 	char *q;
3799 
3800 	if (WARN_ON_ONCE(!fmt))
3801 		return fmt;
3802 
3803 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
3804 		return fmt;
3805 
3806 	p = fmt;
3807 	new_fmt = q = iter->fmt;
3808 	while (*p) {
3809 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3810 			if (!trace_iter_expand_format(iter))
3811 				return fmt;
3812 
3813 			q += iter->fmt - new_fmt;
3814 			new_fmt = iter->fmt;
3815 		}
3816 
3817 		*q++ = *p++;
3818 
3819 		/* Replace %p with %px */
3820 		if (p[-1] == '%') {
3821 			if (p[0] == '%') {
3822 				*q++ = *p++;
3823 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3824 				*q++ = *p++;
3825 				*q++ = 'x';
3826 			}
3827 		}
3828 	}
3829 	*q = '\0';
3830 
3831 	return new_fmt;
3832 }
3833 
3834 #define STATIC_TEMP_BUF_SIZE	128
3835 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3836 
3837 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3838 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3839 					  int *ent_cpu, u64 *ent_ts)
3840 {
3841 	/* __find_next_entry will reset ent_size */
3842 	int ent_size = iter->ent_size;
3843 	struct trace_entry *entry;
3844 
3845 	/*
3846 	 * If called from ftrace_dump(), then the iter->temp buffer
3847 	 * will be the static_temp_buf and not created from kmalloc.
3848 	 * If the entry size is greater than the buffer, we can
3849 	 * not save it. Just return NULL in that case. This is only
3850 	 * used to add markers when two consecutive events' time
3851 	 * stamps have a large delta. See trace_print_lat_context()
3852 	 */
3853 	if (iter->temp == static_temp_buf &&
3854 	    STATIC_TEMP_BUF_SIZE < ent_size)
3855 		return NULL;
3856 
3857 	/*
3858 	 * The __find_next_entry() may call peek_next_entry(), which may
3859 	 * call ring_buffer_peek() that may make the contents of iter->ent
3860 	 * undefined. Need to copy iter->ent now.
3861 	 */
3862 	if (iter->ent && iter->ent != iter->temp) {
3863 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3864 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3865 			void *temp;
3866 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3867 			if (!temp)
3868 				return NULL;
3869 			kfree(iter->temp);
3870 			iter->temp = temp;
3871 			iter->temp_size = iter->ent_size;
3872 		}
3873 		memcpy(iter->temp, iter->ent, iter->ent_size);
3874 		iter->ent = iter->temp;
3875 	}
3876 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3877 	/* Put back the original ent_size */
3878 	iter->ent_size = ent_size;
3879 
3880 	return entry;
3881 }
3882 
3883 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3884 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3885 {
3886 	iter->ent = __find_next_entry(iter, &iter->cpu,
3887 				      &iter->lost_events, &iter->ts);
3888 
3889 	if (iter->ent)
3890 		trace_iterator_increment(iter);
3891 
3892 	return iter->ent ? iter : NULL;
3893 }
3894 
trace_consume(struct trace_iterator * iter)3895 static void trace_consume(struct trace_iterator *iter)
3896 {
3897 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3898 			    &iter->lost_events);
3899 }
3900 
s_next(struct seq_file * m,void * v,loff_t * pos)3901 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3902 {
3903 	struct trace_iterator *iter = m->private;
3904 	int i = (int)*pos;
3905 	void *ent;
3906 
3907 	WARN_ON_ONCE(iter->leftover);
3908 
3909 	(*pos)++;
3910 
3911 	/* can't go backwards */
3912 	if (iter->idx > i)
3913 		return NULL;
3914 
3915 	if (iter->idx < 0)
3916 		ent = trace_find_next_entry_inc(iter);
3917 	else
3918 		ent = iter;
3919 
3920 	while (ent && iter->idx < i)
3921 		ent = trace_find_next_entry_inc(iter);
3922 
3923 	iter->pos = *pos;
3924 
3925 	return ent;
3926 }
3927 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3928 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3929 {
3930 	struct ring_buffer_iter *buf_iter;
3931 	unsigned long entries = 0;
3932 	u64 ts;
3933 
3934 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3935 
3936 	buf_iter = trace_buffer_iter(iter, cpu);
3937 	if (!buf_iter)
3938 		return;
3939 
3940 	ring_buffer_iter_reset(buf_iter);
3941 
3942 	/*
3943 	 * We could have the case with the max latency tracers
3944 	 * that a reset never took place on a cpu. This is evident
3945 	 * by the timestamp being before the start of the buffer.
3946 	 */
3947 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3948 		if (ts >= iter->array_buffer->time_start)
3949 			break;
3950 		entries++;
3951 		ring_buffer_iter_advance(buf_iter);
3952 		/* This could be a big loop */
3953 		cond_resched();
3954 	}
3955 
3956 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3957 }
3958 
3959 /*
3960  * The current tracer is copied to avoid a global locking
3961  * all around.
3962  */
s_start(struct seq_file * m,loff_t * pos)3963 static void *s_start(struct seq_file *m, loff_t *pos)
3964 {
3965 	struct trace_iterator *iter = m->private;
3966 	struct trace_array *tr = iter->tr;
3967 	int cpu_file = iter->cpu_file;
3968 	void *p = NULL;
3969 	loff_t l = 0;
3970 	int cpu;
3971 
3972 	mutex_lock(&trace_types_lock);
3973 	if (unlikely(tr->current_trace != iter->trace)) {
3974 		/* Close iter->trace before switching to the new current tracer */
3975 		if (iter->trace->close)
3976 			iter->trace->close(iter);
3977 		iter->trace = tr->current_trace;
3978 		/* Reopen the new current tracer */
3979 		if (iter->trace->open)
3980 			iter->trace->open(iter);
3981 	}
3982 	mutex_unlock(&trace_types_lock);
3983 
3984 #ifdef CONFIG_TRACER_MAX_TRACE
3985 	if (iter->snapshot && iter->trace->use_max_tr)
3986 		return ERR_PTR(-EBUSY);
3987 #endif
3988 
3989 	if (*pos != iter->pos) {
3990 		iter->ent = NULL;
3991 		iter->cpu = 0;
3992 		iter->idx = -1;
3993 
3994 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3995 			for_each_tracing_cpu(cpu)
3996 				tracing_iter_reset(iter, cpu);
3997 		} else
3998 			tracing_iter_reset(iter, cpu_file);
3999 
4000 		iter->leftover = 0;
4001 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4002 			;
4003 
4004 	} else {
4005 		/*
4006 		 * If we overflowed the seq_file before, then we want
4007 		 * to just reuse the trace_seq buffer again.
4008 		 */
4009 		if (iter->leftover)
4010 			p = iter;
4011 		else {
4012 			l = *pos - 1;
4013 			p = s_next(m, p, &l);
4014 		}
4015 	}
4016 
4017 	trace_event_read_lock();
4018 	trace_access_lock(cpu_file);
4019 	return p;
4020 }
4021 
s_stop(struct seq_file * m,void * p)4022 static void s_stop(struct seq_file *m, void *p)
4023 {
4024 	struct trace_iterator *iter = m->private;
4025 
4026 #ifdef CONFIG_TRACER_MAX_TRACE
4027 	if (iter->snapshot && iter->trace->use_max_tr)
4028 		return;
4029 #endif
4030 
4031 	trace_access_unlock(iter->cpu_file);
4032 	trace_event_read_unlock();
4033 }
4034 
4035 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4036 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4037 		      unsigned long *entries, int cpu)
4038 {
4039 	unsigned long count;
4040 
4041 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4042 	/*
4043 	 * If this buffer has skipped entries, then we hold all
4044 	 * entries for the trace and we need to ignore the
4045 	 * ones before the time stamp.
4046 	 */
4047 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4048 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4049 		/* total is the same as the entries */
4050 		*total = count;
4051 	} else
4052 		*total = count +
4053 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4054 	*entries = count;
4055 }
4056 
4057 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4058 get_total_entries(struct array_buffer *buf,
4059 		  unsigned long *total, unsigned long *entries)
4060 {
4061 	unsigned long t, e;
4062 	int cpu;
4063 
4064 	*total = 0;
4065 	*entries = 0;
4066 
4067 	for_each_tracing_cpu(cpu) {
4068 		get_total_entries_cpu(buf, &t, &e, cpu);
4069 		*total += t;
4070 		*entries += e;
4071 	}
4072 }
4073 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4074 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4075 {
4076 	unsigned long total, entries;
4077 
4078 	if (!tr)
4079 		tr = &global_trace;
4080 
4081 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4082 
4083 	return entries;
4084 }
4085 
trace_total_entries(struct trace_array * tr)4086 unsigned long trace_total_entries(struct trace_array *tr)
4087 {
4088 	unsigned long total, entries;
4089 
4090 	if (!tr)
4091 		tr = &global_trace;
4092 
4093 	get_total_entries(&tr->array_buffer, &total, &entries);
4094 
4095 	return entries;
4096 }
4097 
print_lat_help_header(struct seq_file * m)4098 static void print_lat_help_header(struct seq_file *m)
4099 {
4100 	seq_puts(m, "#                    _------=> CPU#            \n"
4101 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4102 		    "#                  | / _----=> need-resched    \n"
4103 		    "#                  || / _---=> hardirq/softirq \n"
4104 		    "#                  ||| / _--=> preempt-depth   \n"
4105 		    "#                  |||| / _-=> migrate-disable \n"
4106 		    "#                  ||||| /     delay           \n"
4107 		    "#  cmd     pid     |||||| time  |   caller     \n"
4108 		    "#     \\   /        ||||||  \\    |    /       \n");
4109 }
4110 
print_event_info(struct array_buffer * buf,struct seq_file * m)4111 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4112 {
4113 	unsigned long total;
4114 	unsigned long entries;
4115 
4116 	get_total_entries(buf, &total, &entries);
4117 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4118 		   entries, total, num_online_cpus());
4119 	seq_puts(m, "#\n");
4120 }
4121 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4122 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4123 				   unsigned int flags)
4124 {
4125 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
4126 
4127 	print_event_info(buf, m);
4128 
4129 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4130 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4131 }
4132 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4133 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4134 				       unsigned int flags)
4135 {
4136 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
4137 	static const char space[] = "            ";
4138 	int prec = tgid ? 12 : 2;
4139 
4140 	print_event_info(buf, m);
4141 
4142 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4143 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4144 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4145 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4146 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4147 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4148 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4149 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4150 }
4151 
4152 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4153 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4154 {
4155 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4156 	struct array_buffer *buf = iter->array_buffer;
4157 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4158 	struct tracer *type = iter->trace;
4159 	unsigned long entries;
4160 	unsigned long total;
4161 	const char *name = type->name;
4162 
4163 	get_total_entries(buf, &total, &entries);
4164 
4165 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4166 		   name, init_utsname()->release);
4167 	seq_puts(m, "# -----------------------------------"
4168 		 "---------------------------------\n");
4169 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4170 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4171 		   nsecs_to_usecs(data->saved_latency),
4172 		   entries,
4173 		   total,
4174 		   buf->cpu,
4175 		   preempt_model_str(),
4176 		   /* These are reserved for later use */
4177 		   0, 0, 0, 0);
4178 #ifdef CONFIG_SMP
4179 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4180 #else
4181 	seq_puts(m, ")\n");
4182 #endif
4183 	seq_puts(m, "#    -----------------\n");
4184 	seq_printf(m, "#    | task: %.16s-%d "
4185 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4186 		   data->comm, data->pid,
4187 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4188 		   data->policy, data->rt_priority);
4189 	seq_puts(m, "#    -----------------\n");
4190 
4191 	if (data->critical_start) {
4192 		seq_puts(m, "#  => started at: ");
4193 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4194 		trace_print_seq(m, &iter->seq);
4195 		seq_puts(m, "\n#  => ended at:   ");
4196 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4197 		trace_print_seq(m, &iter->seq);
4198 		seq_puts(m, "\n#\n");
4199 	}
4200 
4201 	seq_puts(m, "#\n");
4202 }
4203 
test_cpu_buff_start(struct trace_iterator * iter)4204 static void test_cpu_buff_start(struct trace_iterator *iter)
4205 {
4206 	struct trace_seq *s = &iter->seq;
4207 	struct trace_array *tr = iter->tr;
4208 
4209 	if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
4210 		return;
4211 
4212 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4213 		return;
4214 
4215 	if (cpumask_available(iter->started) &&
4216 	    cpumask_test_cpu(iter->cpu, iter->started))
4217 		return;
4218 
4219 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4220 		return;
4221 
4222 	if (cpumask_available(iter->started))
4223 		cpumask_set_cpu(iter->cpu, iter->started);
4224 
4225 	/* Don't print started cpu buffer for the first entry of the trace */
4226 	if (iter->idx > 1)
4227 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4228 				iter->cpu);
4229 }
4230 
4231 #ifdef CONFIG_FTRACE_SYSCALLS
is_syscall_event(struct trace_event * event)4232 static bool is_syscall_event(struct trace_event *event)
4233 {
4234 	return (event->funcs == &enter_syscall_print_funcs) ||
4235 	       (event->funcs == &exit_syscall_print_funcs);
4236 
4237 }
4238 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
4239 #else
is_syscall_event(struct trace_event * event)4240 static inline bool is_syscall_event(struct trace_event *event)
4241 {
4242 	return false;
4243 }
4244 #define syscall_buf_size 0
4245 #endif /* CONFIG_FTRACE_SYSCALLS */
4246 
print_trace_fmt(struct trace_iterator * iter)4247 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4248 {
4249 	struct trace_array *tr = iter->tr;
4250 	struct trace_seq *s = &iter->seq;
4251 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4252 	struct trace_entry *entry;
4253 	struct trace_event *event;
4254 
4255 	entry = iter->ent;
4256 
4257 	test_cpu_buff_start(iter);
4258 
4259 	event = ftrace_find_event(entry->type);
4260 
4261 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4262 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4263 			trace_print_lat_context(iter);
4264 		else
4265 			trace_print_context(iter);
4266 	}
4267 
4268 	if (trace_seq_has_overflowed(s))
4269 		return TRACE_TYPE_PARTIAL_LINE;
4270 
4271 	if (event) {
4272 		if (tr->trace_flags & TRACE_ITER(FIELDS))
4273 			return print_event_fields(iter, event);
4274 		/*
4275 		 * For TRACE_EVENT() events, the print_fmt is not
4276 		 * safe to use if the array has delta offsets
4277 		 * Force printing via the fields.
4278 		 */
4279 		if ((tr->text_delta)) {
4280 			/* ftrace and system call events are still OK */
4281 			if ((event->type > __TRACE_LAST_TYPE) &&
4282 			    !is_syscall_event(event))
4283 			return print_event_fields(iter, event);
4284 		}
4285 		return event->funcs->trace(iter, sym_flags, event);
4286 	}
4287 
4288 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4289 
4290 	return trace_handle_return(s);
4291 }
4292 
print_raw_fmt(struct trace_iterator * iter)4293 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4294 {
4295 	struct trace_array *tr = iter->tr;
4296 	struct trace_seq *s = &iter->seq;
4297 	struct trace_entry *entry;
4298 	struct trace_event *event;
4299 
4300 	entry = iter->ent;
4301 
4302 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
4303 		trace_seq_printf(s, "%d %d %llu ",
4304 				 entry->pid, iter->cpu, iter->ts);
4305 
4306 	if (trace_seq_has_overflowed(s))
4307 		return TRACE_TYPE_PARTIAL_LINE;
4308 
4309 	event = ftrace_find_event(entry->type);
4310 	if (event)
4311 		return event->funcs->raw(iter, 0, event);
4312 
4313 	trace_seq_printf(s, "%d ?\n", entry->type);
4314 
4315 	return trace_handle_return(s);
4316 }
4317 
print_hex_fmt(struct trace_iterator * iter)4318 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4319 {
4320 	struct trace_array *tr = iter->tr;
4321 	struct trace_seq *s = &iter->seq;
4322 	unsigned char newline = '\n';
4323 	struct trace_entry *entry;
4324 	struct trace_event *event;
4325 
4326 	entry = iter->ent;
4327 
4328 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4329 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4330 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4331 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4332 		if (trace_seq_has_overflowed(s))
4333 			return TRACE_TYPE_PARTIAL_LINE;
4334 	}
4335 
4336 	event = ftrace_find_event(entry->type);
4337 	if (event) {
4338 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4339 		if (ret != TRACE_TYPE_HANDLED)
4340 			return ret;
4341 	}
4342 
4343 	SEQ_PUT_FIELD(s, newline);
4344 
4345 	return trace_handle_return(s);
4346 }
4347 
print_bin_fmt(struct trace_iterator * iter)4348 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4349 {
4350 	struct trace_array *tr = iter->tr;
4351 	struct trace_seq *s = &iter->seq;
4352 	struct trace_entry *entry;
4353 	struct trace_event *event;
4354 
4355 	entry = iter->ent;
4356 
4357 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4358 		SEQ_PUT_FIELD(s, entry->pid);
4359 		SEQ_PUT_FIELD(s, iter->cpu);
4360 		SEQ_PUT_FIELD(s, iter->ts);
4361 		if (trace_seq_has_overflowed(s))
4362 			return TRACE_TYPE_PARTIAL_LINE;
4363 	}
4364 
4365 	event = ftrace_find_event(entry->type);
4366 	return event ? event->funcs->binary(iter, 0, event) :
4367 		TRACE_TYPE_HANDLED;
4368 }
4369 
trace_empty(struct trace_iterator * iter)4370 int trace_empty(struct trace_iterator *iter)
4371 {
4372 	struct ring_buffer_iter *buf_iter;
4373 	int cpu;
4374 
4375 	/* If we are looking at one CPU buffer, only check that one */
4376 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4377 		cpu = iter->cpu_file;
4378 		buf_iter = trace_buffer_iter(iter, cpu);
4379 		if (buf_iter) {
4380 			if (!ring_buffer_iter_empty(buf_iter))
4381 				return 0;
4382 		} else {
4383 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4384 				return 0;
4385 		}
4386 		return 1;
4387 	}
4388 
4389 	for_each_tracing_cpu(cpu) {
4390 		buf_iter = trace_buffer_iter(iter, cpu);
4391 		if (buf_iter) {
4392 			if (!ring_buffer_iter_empty(buf_iter))
4393 				return 0;
4394 		} else {
4395 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4396 				return 0;
4397 		}
4398 	}
4399 
4400 	return 1;
4401 }
4402 
4403 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4404 enum print_line_t print_trace_line(struct trace_iterator *iter)
4405 {
4406 	struct trace_array *tr = iter->tr;
4407 	unsigned long trace_flags = tr->trace_flags;
4408 	enum print_line_t ret;
4409 
4410 	if (iter->lost_events) {
4411 		if (iter->lost_events == (unsigned long)-1)
4412 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4413 					 iter->cpu);
4414 		else
4415 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4416 					 iter->cpu, iter->lost_events);
4417 		if (trace_seq_has_overflowed(&iter->seq))
4418 			return TRACE_TYPE_PARTIAL_LINE;
4419 	}
4420 
4421 	if (iter->trace && iter->trace->print_line) {
4422 		ret = iter->trace->print_line(iter);
4423 		if (ret != TRACE_TYPE_UNHANDLED)
4424 			return ret;
4425 	}
4426 
4427 	if (iter->ent->type == TRACE_BPUTS &&
4428 			trace_flags & TRACE_ITER(PRINTK) &&
4429 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4430 		return trace_print_bputs_msg_only(iter);
4431 
4432 	if (iter->ent->type == TRACE_BPRINT &&
4433 			trace_flags & TRACE_ITER(PRINTK) &&
4434 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4435 		return trace_print_bprintk_msg_only(iter);
4436 
4437 	if (iter->ent->type == TRACE_PRINT &&
4438 			trace_flags & TRACE_ITER(PRINTK) &&
4439 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4440 		return trace_print_printk_msg_only(iter);
4441 
4442 	if (trace_flags & TRACE_ITER(BIN))
4443 		return print_bin_fmt(iter);
4444 
4445 	if (trace_flags & TRACE_ITER(HEX))
4446 		return print_hex_fmt(iter);
4447 
4448 	if (trace_flags & TRACE_ITER(RAW))
4449 		return print_raw_fmt(iter);
4450 
4451 	return print_trace_fmt(iter);
4452 }
4453 
trace_latency_header(struct seq_file * m)4454 void trace_latency_header(struct seq_file *m)
4455 {
4456 	struct trace_iterator *iter = m->private;
4457 	struct trace_array *tr = iter->tr;
4458 
4459 	/* print nothing if the buffers are empty */
4460 	if (trace_empty(iter))
4461 		return;
4462 
4463 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4464 		print_trace_header(m, iter);
4465 
4466 	if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
4467 		print_lat_help_header(m);
4468 }
4469 
trace_default_header(struct seq_file * m)4470 void trace_default_header(struct seq_file *m)
4471 {
4472 	struct trace_iterator *iter = m->private;
4473 	struct trace_array *tr = iter->tr;
4474 	unsigned long trace_flags = tr->trace_flags;
4475 
4476 	if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
4477 		return;
4478 
4479 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4480 		/* print nothing if the buffers are empty */
4481 		if (trace_empty(iter))
4482 			return;
4483 		print_trace_header(m, iter);
4484 		if (!(trace_flags & TRACE_ITER(VERBOSE)))
4485 			print_lat_help_header(m);
4486 	} else {
4487 		if (!(trace_flags & TRACE_ITER(VERBOSE))) {
4488 			if (trace_flags & TRACE_ITER(IRQ_INFO))
4489 				print_func_help_header_irq(iter->array_buffer,
4490 							   m, trace_flags);
4491 			else
4492 				print_func_help_header(iter->array_buffer, m,
4493 						       trace_flags);
4494 		}
4495 	}
4496 }
4497 
test_ftrace_alive(struct seq_file * m)4498 static void test_ftrace_alive(struct seq_file *m)
4499 {
4500 	if (!ftrace_is_dead())
4501 		return;
4502 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4503 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4504 }
4505 
4506 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4507 static void show_snapshot_main_help(struct seq_file *m)
4508 {
4509 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4510 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4511 		    "#                      Takes a snapshot of the main buffer.\n"
4512 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4513 		    "#                      (Doesn't have to be '2' works with any number that\n"
4514 		    "#                       is not a '0' or '1')\n");
4515 }
4516 
show_snapshot_percpu_help(struct seq_file * m)4517 static void show_snapshot_percpu_help(struct seq_file *m)
4518 {
4519 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4520 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4521 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4522 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4523 #else
4524 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4525 		    "#                     Must use main snapshot file to allocate.\n");
4526 #endif
4527 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4528 		    "#                      (Doesn't have to be '2' works with any number that\n"
4529 		    "#                       is not a '0' or '1')\n");
4530 }
4531 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4532 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4533 {
4534 	if (iter->tr->allocated_snapshot)
4535 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4536 	else
4537 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4538 
4539 	seq_puts(m, "# Snapshot commands:\n");
4540 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4541 		show_snapshot_main_help(m);
4542 	else
4543 		show_snapshot_percpu_help(m);
4544 }
4545 #else
4546 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4547 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4548 #endif
4549 
s_show(struct seq_file * m,void * v)4550 static int s_show(struct seq_file *m, void *v)
4551 {
4552 	struct trace_iterator *iter = v;
4553 	int ret;
4554 
4555 	if (iter->ent == NULL) {
4556 		if (iter->tr) {
4557 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4558 			seq_puts(m, "#\n");
4559 			test_ftrace_alive(m);
4560 		}
4561 		if (iter->snapshot && trace_empty(iter))
4562 			print_snapshot_help(m, iter);
4563 		else if (iter->trace && iter->trace->print_header)
4564 			iter->trace->print_header(m);
4565 		else
4566 			trace_default_header(m);
4567 
4568 	} else if (iter->leftover) {
4569 		/*
4570 		 * If we filled the seq_file buffer earlier, we
4571 		 * want to just show it now.
4572 		 */
4573 		ret = trace_print_seq(m, &iter->seq);
4574 
4575 		/* ret should this time be zero, but you never know */
4576 		iter->leftover = ret;
4577 
4578 	} else {
4579 		ret = print_trace_line(iter);
4580 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4581 			iter->seq.full = 0;
4582 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4583 		}
4584 		ret = trace_print_seq(m, &iter->seq);
4585 		/*
4586 		 * If we overflow the seq_file buffer, then it will
4587 		 * ask us for this data again at start up.
4588 		 * Use that instead.
4589 		 *  ret is 0 if seq_file write succeeded.
4590 		 *        -1 otherwise.
4591 		 */
4592 		iter->leftover = ret;
4593 	}
4594 
4595 	return 0;
4596 }
4597 
4598 /*
4599  * Should be used after trace_array_get(), trace_types_lock
4600  * ensures that i_cdev was already initialized.
4601  */
tracing_get_cpu(struct inode * inode)4602 static inline int tracing_get_cpu(struct inode *inode)
4603 {
4604 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4605 		return (long)inode->i_cdev - 1;
4606 	return RING_BUFFER_ALL_CPUS;
4607 }
4608 
4609 static const struct seq_operations tracer_seq_ops = {
4610 	.start		= s_start,
4611 	.next		= s_next,
4612 	.stop		= s_stop,
4613 	.show		= s_show,
4614 };
4615 
4616 /*
4617  * Note, as iter itself can be allocated and freed in different
4618  * ways, this function is only used to free its content, and not
4619  * the iterator itself. The only requirement to all the allocations
4620  * is that it must zero all fields (kzalloc), as freeing works with
4621  * ethier allocated content or NULL.
4622  */
free_trace_iter_content(struct trace_iterator * iter)4623 static void free_trace_iter_content(struct trace_iterator *iter)
4624 {
4625 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4626 	if (iter->fmt != static_fmt_buf)
4627 		kfree(iter->fmt);
4628 
4629 	kfree(iter->temp);
4630 	kfree(iter->buffer_iter);
4631 	mutex_destroy(&iter->mutex);
4632 	free_cpumask_var(iter->started);
4633 }
4634 
4635 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4636 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4637 {
4638 	struct trace_array *tr = inode->i_private;
4639 	struct trace_iterator *iter;
4640 	int cpu;
4641 
4642 	if (tracing_disabled)
4643 		return ERR_PTR(-ENODEV);
4644 
4645 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4646 	if (!iter)
4647 		return ERR_PTR(-ENOMEM);
4648 
4649 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4650 				    GFP_KERNEL);
4651 	if (!iter->buffer_iter)
4652 		goto release;
4653 
4654 	/*
4655 	 * trace_find_next_entry() may need to save off iter->ent.
4656 	 * It will place it into the iter->temp buffer. As most
4657 	 * events are less than 128, allocate a buffer of that size.
4658 	 * If one is greater, then trace_find_next_entry() will
4659 	 * allocate a new buffer to adjust for the bigger iter->ent.
4660 	 * It's not critical if it fails to get allocated here.
4661 	 */
4662 	iter->temp = kmalloc(128, GFP_KERNEL);
4663 	if (iter->temp)
4664 		iter->temp_size = 128;
4665 
4666 	/*
4667 	 * trace_event_printf() may need to modify given format
4668 	 * string to replace %p with %px so that it shows real address
4669 	 * instead of hash value. However, that is only for the event
4670 	 * tracing, other tracer may not need. Defer the allocation
4671 	 * until it is needed.
4672 	 */
4673 	iter->fmt = NULL;
4674 	iter->fmt_size = 0;
4675 
4676 	mutex_lock(&trace_types_lock);
4677 	iter->trace = tr->current_trace;
4678 
4679 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4680 		goto fail;
4681 
4682 	iter->tr = tr;
4683 
4684 #ifdef CONFIG_TRACER_MAX_TRACE
4685 	/* Currently only the top directory has a snapshot */
4686 	if (tr->current_trace->print_max || snapshot)
4687 		iter->array_buffer = &tr->max_buffer;
4688 	else
4689 #endif
4690 		iter->array_buffer = &tr->array_buffer;
4691 	iter->snapshot = snapshot;
4692 	iter->pos = -1;
4693 	iter->cpu_file = tracing_get_cpu(inode);
4694 	mutex_init(&iter->mutex);
4695 
4696 	/* Notify the tracer early; before we stop tracing. */
4697 	if (iter->trace->open)
4698 		iter->trace->open(iter);
4699 
4700 	/* Annotate start of buffers if we had overruns */
4701 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4702 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4703 
4704 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4705 	if (trace_clocks[tr->clock_id].in_ns)
4706 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4707 
4708 	/*
4709 	 * If pause-on-trace is enabled, then stop the trace while
4710 	 * dumping, unless this is the "snapshot" file
4711 	 */
4712 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
4713 		iter->iter_flags |= TRACE_FILE_PAUSE;
4714 		tracing_stop_tr(tr);
4715 	}
4716 
4717 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4718 		for_each_tracing_cpu(cpu) {
4719 			iter->buffer_iter[cpu] =
4720 				ring_buffer_read_start(iter->array_buffer->buffer,
4721 						       cpu, GFP_KERNEL);
4722 			tracing_iter_reset(iter, cpu);
4723 		}
4724 	} else {
4725 		cpu = iter->cpu_file;
4726 		iter->buffer_iter[cpu] =
4727 			ring_buffer_read_start(iter->array_buffer->buffer,
4728 					       cpu, GFP_KERNEL);
4729 		tracing_iter_reset(iter, cpu);
4730 	}
4731 
4732 	mutex_unlock(&trace_types_lock);
4733 
4734 	return iter;
4735 
4736  fail:
4737 	mutex_unlock(&trace_types_lock);
4738 	free_trace_iter_content(iter);
4739 release:
4740 	seq_release_private(inode, file);
4741 	return ERR_PTR(-ENOMEM);
4742 }
4743 
tracing_open_generic(struct inode * inode,struct file * filp)4744 int tracing_open_generic(struct inode *inode, struct file *filp)
4745 {
4746 	int ret;
4747 
4748 	ret = tracing_check_open_get_tr(NULL);
4749 	if (ret)
4750 		return ret;
4751 
4752 	filp->private_data = inode->i_private;
4753 	return 0;
4754 }
4755 
tracing_is_disabled(void)4756 bool tracing_is_disabled(void)
4757 {
4758 	return (tracing_disabled) ? true: false;
4759 }
4760 
4761 /*
4762  * Open and update trace_array ref count.
4763  * Must have the current trace_array passed to it.
4764  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4765 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4766 {
4767 	struct trace_array *tr = inode->i_private;
4768 	int ret;
4769 
4770 	ret = tracing_check_open_get_tr(tr);
4771 	if (ret)
4772 		return ret;
4773 
4774 	filp->private_data = inode->i_private;
4775 
4776 	return 0;
4777 }
4778 
4779 /*
4780  * The private pointer of the inode is the trace_event_file.
4781  * Update the tr ref count associated to it.
4782  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4783 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4784 {
4785 	struct trace_event_file *file = inode->i_private;
4786 	int ret;
4787 
4788 	ret = tracing_check_open_get_tr(file->tr);
4789 	if (ret)
4790 		return ret;
4791 
4792 	guard(mutex)(&event_mutex);
4793 
4794 	/* Fail if the file is marked for removal */
4795 	if (file->flags & EVENT_FILE_FL_FREED) {
4796 		trace_array_put(file->tr);
4797 		return -ENODEV;
4798 	} else {
4799 		event_file_get(file);
4800 	}
4801 
4802 	filp->private_data = inode->i_private;
4803 
4804 	return 0;
4805 }
4806 
tracing_release_file_tr(struct inode * inode,struct file * filp)4807 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4808 {
4809 	struct trace_event_file *file = inode->i_private;
4810 
4811 	trace_array_put(file->tr);
4812 	event_file_put(file);
4813 
4814 	return 0;
4815 }
4816 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4817 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4818 {
4819 	tracing_release_file_tr(inode, filp);
4820 	return single_release(inode, filp);
4821 }
4822 
tracing_release(struct inode * inode,struct file * file)4823 static int tracing_release(struct inode *inode, struct file *file)
4824 {
4825 	struct trace_array *tr = inode->i_private;
4826 	struct seq_file *m = file->private_data;
4827 	struct trace_iterator *iter;
4828 	int cpu;
4829 
4830 	if (!(file->f_mode & FMODE_READ)) {
4831 		trace_array_put(tr);
4832 		return 0;
4833 	}
4834 
4835 	/* Writes do not use seq_file */
4836 	iter = m->private;
4837 	mutex_lock(&trace_types_lock);
4838 
4839 	for_each_tracing_cpu(cpu) {
4840 		if (iter->buffer_iter[cpu])
4841 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4842 	}
4843 
4844 	if (iter->trace && iter->trace->close)
4845 		iter->trace->close(iter);
4846 
4847 	if (iter->iter_flags & TRACE_FILE_PAUSE)
4848 		/* reenable tracing if it was previously enabled */
4849 		tracing_start_tr(tr);
4850 
4851 	__trace_array_put(tr);
4852 
4853 	mutex_unlock(&trace_types_lock);
4854 
4855 	free_trace_iter_content(iter);
4856 	seq_release_private(inode, file);
4857 
4858 	return 0;
4859 }
4860 
tracing_release_generic_tr(struct inode * inode,struct file * file)4861 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4862 {
4863 	struct trace_array *tr = inode->i_private;
4864 
4865 	trace_array_put(tr);
4866 	return 0;
4867 }
4868 
tracing_single_release_tr(struct inode * inode,struct file * file)4869 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4870 {
4871 	struct trace_array *tr = inode->i_private;
4872 
4873 	trace_array_put(tr);
4874 
4875 	return single_release(inode, file);
4876 }
4877 
tracing_open(struct inode * inode,struct file * file)4878 static int tracing_open(struct inode *inode, struct file *file)
4879 {
4880 	struct trace_array *tr = inode->i_private;
4881 	struct trace_iterator *iter;
4882 	int ret;
4883 
4884 	ret = tracing_check_open_get_tr(tr);
4885 	if (ret)
4886 		return ret;
4887 
4888 	/* If this file was open for write, then erase contents */
4889 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4890 		int cpu = tracing_get_cpu(inode);
4891 		struct array_buffer *trace_buf = &tr->array_buffer;
4892 
4893 #ifdef CONFIG_TRACER_MAX_TRACE
4894 		if (tr->current_trace->print_max)
4895 			trace_buf = &tr->max_buffer;
4896 #endif
4897 
4898 		if (cpu == RING_BUFFER_ALL_CPUS)
4899 			tracing_reset_online_cpus(trace_buf);
4900 		else
4901 			tracing_reset_cpu(trace_buf, cpu);
4902 	}
4903 
4904 	if (file->f_mode & FMODE_READ) {
4905 		iter = __tracing_open(inode, file, false);
4906 		if (IS_ERR(iter))
4907 			ret = PTR_ERR(iter);
4908 		else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
4909 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4910 	}
4911 
4912 	if (ret < 0)
4913 		trace_array_put(tr);
4914 
4915 	return ret;
4916 }
4917 
4918 /*
4919  * Some tracers are not suitable for instance buffers.
4920  * A tracer is always available for the global array (toplevel)
4921  * or if it explicitly states that it is.
4922  */
4923 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4924 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4925 {
4926 #ifdef CONFIG_TRACER_SNAPSHOT
4927 	/* arrays with mapped buffer range do not have snapshots */
4928 	if (tr->range_addr_start && t->use_max_tr)
4929 		return false;
4930 #endif
4931 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4932 }
4933 
4934 /* Find the next tracer that this trace array may use */
4935 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4936 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4937 {
4938 	while (t && !trace_ok_for_array(t, tr))
4939 		t = t->next;
4940 
4941 	return t;
4942 }
4943 
4944 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4945 t_next(struct seq_file *m, void *v, loff_t *pos)
4946 {
4947 	struct trace_array *tr = m->private;
4948 	struct tracer *t = v;
4949 
4950 	(*pos)++;
4951 
4952 	if (t)
4953 		t = get_tracer_for_array(tr, t->next);
4954 
4955 	return t;
4956 }
4957 
t_start(struct seq_file * m,loff_t * pos)4958 static void *t_start(struct seq_file *m, loff_t *pos)
4959 {
4960 	struct trace_array *tr = m->private;
4961 	struct tracer *t;
4962 	loff_t l = 0;
4963 
4964 	mutex_lock(&trace_types_lock);
4965 
4966 	t = get_tracer_for_array(tr, trace_types);
4967 	for (; t && l < *pos; t = t_next(m, t, &l))
4968 			;
4969 
4970 	return t;
4971 }
4972 
t_stop(struct seq_file * m,void * p)4973 static void t_stop(struct seq_file *m, void *p)
4974 {
4975 	mutex_unlock(&trace_types_lock);
4976 }
4977 
t_show(struct seq_file * m,void * v)4978 static int t_show(struct seq_file *m, void *v)
4979 {
4980 	struct tracer *t = v;
4981 
4982 	if (!t)
4983 		return 0;
4984 
4985 	seq_puts(m, t->name);
4986 	if (t->next)
4987 		seq_putc(m, ' ');
4988 	else
4989 		seq_putc(m, '\n');
4990 
4991 	return 0;
4992 }
4993 
4994 static const struct seq_operations show_traces_seq_ops = {
4995 	.start		= t_start,
4996 	.next		= t_next,
4997 	.stop		= t_stop,
4998 	.show		= t_show,
4999 };
5000 
show_traces_open(struct inode * inode,struct file * file)5001 static int show_traces_open(struct inode *inode, struct file *file)
5002 {
5003 	struct trace_array *tr = inode->i_private;
5004 	struct seq_file *m;
5005 	int ret;
5006 
5007 	ret = tracing_check_open_get_tr(tr);
5008 	if (ret)
5009 		return ret;
5010 
5011 	ret = seq_open(file, &show_traces_seq_ops);
5012 	if (ret) {
5013 		trace_array_put(tr);
5014 		return ret;
5015 	}
5016 
5017 	m = file->private_data;
5018 	m->private = tr;
5019 
5020 	return 0;
5021 }
5022 
tracing_seq_release(struct inode * inode,struct file * file)5023 static int tracing_seq_release(struct inode *inode, struct file *file)
5024 {
5025 	struct trace_array *tr = inode->i_private;
5026 
5027 	trace_array_put(tr);
5028 	return seq_release(inode, file);
5029 }
5030 
5031 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5032 tracing_write_stub(struct file *filp, const char __user *ubuf,
5033 		   size_t count, loff_t *ppos)
5034 {
5035 	return count;
5036 }
5037 
tracing_lseek(struct file * file,loff_t offset,int whence)5038 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5039 {
5040 	int ret;
5041 
5042 	if (file->f_mode & FMODE_READ)
5043 		ret = seq_lseek(file, offset, whence);
5044 	else
5045 		file->f_pos = ret = 0;
5046 
5047 	return ret;
5048 }
5049 
5050 static const struct file_operations tracing_fops = {
5051 	.open		= tracing_open,
5052 	.read		= seq_read,
5053 	.read_iter	= seq_read_iter,
5054 	.splice_read	= copy_splice_read,
5055 	.write		= tracing_write_stub,
5056 	.llseek		= tracing_lseek,
5057 	.release	= tracing_release,
5058 };
5059 
5060 static const struct file_operations show_traces_fops = {
5061 	.open		= show_traces_open,
5062 	.read		= seq_read,
5063 	.llseek		= seq_lseek,
5064 	.release	= tracing_seq_release,
5065 };
5066 
5067 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5068 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5069 		     size_t count, loff_t *ppos)
5070 {
5071 	struct trace_array *tr = file_inode(filp)->i_private;
5072 	char *mask_str __free(kfree) = NULL;
5073 	int len;
5074 
5075 	len = snprintf(NULL, 0, "%*pb\n",
5076 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5077 	mask_str = kmalloc(len, GFP_KERNEL);
5078 	if (!mask_str)
5079 		return -ENOMEM;
5080 
5081 	len = snprintf(mask_str, len, "%*pb\n",
5082 		       cpumask_pr_args(tr->tracing_cpumask));
5083 	if (len >= count)
5084 		return -EINVAL;
5085 
5086 	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5087 }
5088 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5089 int tracing_set_cpumask(struct trace_array *tr,
5090 			cpumask_var_t tracing_cpumask_new)
5091 {
5092 	int cpu;
5093 
5094 	if (!tr)
5095 		return -EINVAL;
5096 
5097 	local_irq_disable();
5098 	arch_spin_lock(&tr->max_lock);
5099 	for_each_tracing_cpu(cpu) {
5100 		/*
5101 		 * Increase/decrease the disabled counter if we are
5102 		 * about to flip a bit in the cpumask:
5103 		 */
5104 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5105 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5106 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5107 #ifdef CONFIG_TRACER_MAX_TRACE
5108 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5109 #endif
5110 		}
5111 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5112 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5113 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5114 #ifdef CONFIG_TRACER_MAX_TRACE
5115 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5116 #endif
5117 		}
5118 	}
5119 	arch_spin_unlock(&tr->max_lock);
5120 	local_irq_enable();
5121 
5122 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5123 
5124 	return 0;
5125 }
5126 
5127 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5128 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5129 		      size_t count, loff_t *ppos)
5130 {
5131 	struct trace_array *tr = file_inode(filp)->i_private;
5132 	cpumask_var_t tracing_cpumask_new;
5133 	int err;
5134 
5135 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5136 		return -EINVAL;
5137 
5138 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5139 		return -ENOMEM;
5140 
5141 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5142 	if (err)
5143 		goto err_free;
5144 
5145 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5146 	if (err)
5147 		goto err_free;
5148 
5149 	free_cpumask_var(tracing_cpumask_new);
5150 
5151 	return count;
5152 
5153 err_free:
5154 	free_cpumask_var(tracing_cpumask_new);
5155 
5156 	return err;
5157 }
5158 
5159 static const struct file_operations tracing_cpumask_fops = {
5160 	.open		= tracing_open_generic_tr,
5161 	.read		= tracing_cpumask_read,
5162 	.write		= tracing_cpumask_write,
5163 	.release	= tracing_release_generic_tr,
5164 	.llseek		= generic_file_llseek,
5165 };
5166 
tracing_trace_options_show(struct seq_file * m,void * v)5167 static int tracing_trace_options_show(struct seq_file *m, void *v)
5168 {
5169 	struct tracer_opt *trace_opts;
5170 	struct trace_array *tr = m->private;
5171 	struct tracer_flags *flags;
5172 	u32 tracer_flags;
5173 	int i;
5174 
5175 	guard(mutex)(&trace_types_lock);
5176 
5177 	for (i = 0; trace_options[i]; i++) {
5178 		if (tr->trace_flags & (1ULL << i))
5179 			seq_printf(m, "%s\n", trace_options[i]);
5180 		else
5181 			seq_printf(m, "no%s\n", trace_options[i]);
5182 	}
5183 
5184 	flags = tr->current_trace_flags;
5185 	if (!flags || !flags->opts)
5186 		return 0;
5187 
5188 	tracer_flags = flags->val;
5189 	trace_opts = flags->opts;
5190 
5191 	for (i = 0; trace_opts[i].name; i++) {
5192 		if (tracer_flags & trace_opts[i].bit)
5193 			seq_printf(m, "%s\n", trace_opts[i].name);
5194 		else
5195 			seq_printf(m, "no%s\n", trace_opts[i].name);
5196 	}
5197 
5198 	return 0;
5199 }
5200 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5201 static int __set_tracer_option(struct trace_array *tr,
5202 			       struct tracer_flags *tracer_flags,
5203 			       struct tracer_opt *opts, int neg)
5204 {
5205 	struct tracer *trace = tracer_flags->trace;
5206 	int ret = 0;
5207 
5208 	if (trace->set_flag)
5209 		ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5210 	if (ret)
5211 		return ret;
5212 
5213 	if (neg)
5214 		tracer_flags->val &= ~opts->bit;
5215 	else
5216 		tracer_flags->val |= opts->bit;
5217 	return 0;
5218 }
5219 
5220 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5221 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5222 {
5223 	struct tracer_flags *tracer_flags = tr->current_trace_flags;
5224 	struct tracer_opt *opts = NULL;
5225 	int i;
5226 
5227 	if (!tracer_flags || !tracer_flags->opts)
5228 		return 0;
5229 
5230 	for (i = 0; tracer_flags->opts[i].name; i++) {
5231 		opts = &tracer_flags->opts[i];
5232 
5233 		if (strcmp(cmp, opts->name) == 0)
5234 			return __set_tracer_option(tr, tracer_flags, opts, neg);
5235 	}
5236 
5237 	return -EINVAL;
5238 }
5239 
5240 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u64 mask,int set)5241 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
5242 {
5243 	if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
5244 		return -1;
5245 
5246 	return 0;
5247 }
5248 
set_tracer_flag(struct trace_array * tr,u64 mask,int enabled)5249 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
5250 {
5251 	switch (mask) {
5252 	case TRACE_ITER(RECORD_TGID):
5253 	case TRACE_ITER(RECORD_CMD):
5254 	case TRACE_ITER(TRACE_PRINTK):
5255 	case TRACE_ITER(COPY_MARKER):
5256 		lockdep_assert_held(&event_mutex);
5257 	}
5258 
5259 	/* do nothing if flag is already set */
5260 	if (!!(tr->trace_flags & mask) == !!enabled)
5261 		return 0;
5262 
5263 	/* Give the tracer a chance to approve the change */
5264 	if (tr->current_trace->flag_changed)
5265 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5266 			return -EINVAL;
5267 
5268 	switch (mask) {
5269 	case TRACE_ITER(TRACE_PRINTK):
5270 		if (enabled) {
5271 			update_printk_trace(tr);
5272 		} else {
5273 			/*
5274 			 * The global_trace cannot clear this.
5275 			 * It's flag only gets cleared if another instance sets it.
5276 			 */
5277 			if (printk_trace == &global_trace)
5278 				return -EINVAL;
5279 			/*
5280 			 * An instance must always have it set.
5281 			 * by default, that's the global_trace instance.
5282 			 */
5283 			if (printk_trace == tr)
5284 				update_printk_trace(&global_trace);
5285 		}
5286 		break;
5287 
5288 	case TRACE_ITER(COPY_MARKER):
5289 		update_marker_trace(tr, enabled);
5290 		/* update_marker_trace updates the tr->trace_flags */
5291 		return 0;
5292 	}
5293 
5294 	if (enabled)
5295 		tr->trace_flags |= mask;
5296 	else
5297 		tr->trace_flags &= ~mask;
5298 
5299 	switch (mask) {
5300 	case TRACE_ITER(RECORD_CMD):
5301 		trace_event_enable_cmd_record(enabled);
5302 		break;
5303 
5304 	case TRACE_ITER(RECORD_TGID):
5305 
5306 		if (trace_alloc_tgid_map() < 0) {
5307 			tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
5308 			return -ENOMEM;
5309 		}
5310 
5311 		trace_event_enable_tgid_record(enabled);
5312 		break;
5313 
5314 	case TRACE_ITER(EVENT_FORK):
5315 		trace_event_follow_fork(tr, enabled);
5316 		break;
5317 
5318 	case TRACE_ITER(FUNC_FORK):
5319 		ftrace_pid_follow_fork(tr, enabled);
5320 		break;
5321 
5322 	case TRACE_ITER(OVERWRITE):
5323 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5324 #ifdef CONFIG_TRACER_MAX_TRACE
5325 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5326 #endif
5327 		break;
5328 
5329 	case TRACE_ITER(PRINTK):
5330 		trace_printk_start_stop_comm(enabled);
5331 		trace_printk_control(enabled);
5332 		break;
5333 
5334 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
5335 	case TRACE_GRAPH_GRAPH_TIME:
5336 		ftrace_graph_graph_time_control(enabled);
5337 		break;
5338 #endif
5339 	}
5340 
5341 	return 0;
5342 }
5343 
trace_set_options(struct trace_array * tr,char * option)5344 int trace_set_options(struct trace_array *tr, char *option)
5345 {
5346 	char *cmp;
5347 	int neg = 0;
5348 	int ret;
5349 	size_t orig_len = strlen(option);
5350 	int len;
5351 
5352 	cmp = strstrip(option);
5353 
5354 	len = str_has_prefix(cmp, "no");
5355 	if (len)
5356 		neg = 1;
5357 
5358 	cmp += len;
5359 
5360 	mutex_lock(&event_mutex);
5361 	mutex_lock(&trace_types_lock);
5362 
5363 	ret = match_string(trace_options, -1, cmp);
5364 	/* If no option could be set, test the specific tracer options */
5365 	if (ret < 0)
5366 		ret = set_tracer_option(tr, cmp, neg);
5367 	else
5368 		ret = set_tracer_flag(tr, 1ULL << ret, !neg);
5369 
5370 	mutex_unlock(&trace_types_lock);
5371 	mutex_unlock(&event_mutex);
5372 
5373 	/*
5374 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5375 	 * turn it back into a space.
5376 	 */
5377 	if (orig_len > strlen(option))
5378 		option[strlen(option)] = ' ';
5379 
5380 	return ret;
5381 }
5382 
apply_trace_boot_options(void)5383 static void __init apply_trace_boot_options(void)
5384 {
5385 	char *buf = trace_boot_options_buf;
5386 	char *option;
5387 
5388 	while (true) {
5389 		option = strsep(&buf, ",");
5390 
5391 		if (!option)
5392 			break;
5393 
5394 		if (*option)
5395 			trace_set_options(&global_trace, option);
5396 
5397 		/* Put back the comma to allow this to be called again */
5398 		if (buf)
5399 			*(buf - 1) = ',';
5400 	}
5401 }
5402 
5403 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5404 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5405 			size_t cnt, loff_t *ppos)
5406 {
5407 	struct seq_file *m = filp->private_data;
5408 	struct trace_array *tr = m->private;
5409 	char buf[64];
5410 	int ret;
5411 
5412 	if (cnt >= sizeof(buf))
5413 		return -EINVAL;
5414 
5415 	if (copy_from_user(buf, ubuf, cnt))
5416 		return -EFAULT;
5417 
5418 	buf[cnt] = 0;
5419 
5420 	ret = trace_set_options(tr, buf);
5421 	if (ret < 0)
5422 		return ret;
5423 
5424 	*ppos += cnt;
5425 
5426 	return cnt;
5427 }
5428 
tracing_trace_options_open(struct inode * inode,struct file * file)5429 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5430 {
5431 	struct trace_array *tr = inode->i_private;
5432 	int ret;
5433 
5434 	ret = tracing_check_open_get_tr(tr);
5435 	if (ret)
5436 		return ret;
5437 
5438 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5439 	if (ret < 0)
5440 		trace_array_put(tr);
5441 
5442 	return ret;
5443 }
5444 
5445 static const struct file_operations tracing_iter_fops = {
5446 	.open		= tracing_trace_options_open,
5447 	.read		= seq_read,
5448 	.llseek		= seq_lseek,
5449 	.release	= tracing_single_release_tr,
5450 	.write		= tracing_trace_options_write,
5451 };
5452 
5453 static const char readme_msg[] =
5454 	"tracing mini-HOWTO:\n\n"
5455 	"By default tracefs removes all OTH file permission bits.\n"
5456 	"When mounting tracefs an optional group id can be specified\n"
5457 	"which adds the group to every directory and file in tracefs:\n\n"
5458 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5459 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5460 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5461 	" Important files:\n"
5462 	"  trace\t\t\t- The static contents of the buffer\n"
5463 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5464 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5465 	"  current_tracer\t- function and latency tracers\n"
5466 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5467 	"  error_log\t- error log for failed commands (that support it)\n"
5468 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5469 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5470 	"  trace_clock\t\t- change the clock used to order events\n"
5471 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5472 	"      global:   Synced across CPUs but slows tracing down.\n"
5473 	"     counter:   Not a clock, but just an increment\n"
5474 	"      uptime:   Jiffy counter from time of boot\n"
5475 	"        perf:   Same clock that perf events use\n"
5476 #ifdef CONFIG_X86_64
5477 	"     x86-tsc:   TSC cycle counter\n"
5478 #endif
5479 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5480 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5481 	"    absolute:   Absolute (standalone) timestamp\n"
5482 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5483 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5484 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5485 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5486 	"\t\t\t  Remove sub-buffer with rmdir\n"
5487 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5488 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5489 	"\t\t\t  option name\n"
5490 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5491 #ifdef CONFIG_DYNAMIC_FTRACE
5492 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5493 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5494 	"\t\t\t  functions\n"
5495 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5496 	"\t     modules: Can select a group via module\n"
5497 	"\t      Format: :mod:<module-name>\n"
5498 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5499 	"\t    triggers: a command to perform when function is hit\n"
5500 	"\t      Format: <function>:<trigger>[:count]\n"
5501 	"\t     trigger: traceon, traceoff\n"
5502 	"\t\t      enable_event:<system>:<event>\n"
5503 	"\t\t      disable_event:<system>:<event>\n"
5504 #ifdef CONFIG_STACKTRACE
5505 	"\t\t      stacktrace\n"
5506 #endif
5507 #ifdef CONFIG_TRACER_SNAPSHOT
5508 	"\t\t      snapshot\n"
5509 #endif
5510 	"\t\t      dump\n"
5511 	"\t\t      cpudump\n"
5512 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5513 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5514 	"\t     The first one will disable tracing every time do_fault is hit\n"
5515 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5516 	"\t       The first time do trap is hit and it disables tracing, the\n"
5517 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5518 	"\t       the counter will not decrement. It only decrements when the\n"
5519 	"\t       trigger did work\n"
5520 	"\t     To remove trigger without count:\n"
5521 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5522 	"\t     To remove trigger with a count:\n"
5523 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5524 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5525 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5526 	"\t    modules: Can select a group via module command :mod:\n"
5527 	"\t    Does not accept triggers\n"
5528 #endif /* CONFIG_DYNAMIC_FTRACE */
5529 #ifdef CONFIG_FUNCTION_TRACER
5530 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5531 	"\t\t    (function)\n"
5532 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5533 	"\t\t    (function)\n"
5534 #endif
5535 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5536 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5537 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5538 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5539 #endif
5540 #ifdef CONFIG_TRACER_SNAPSHOT
5541 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5542 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5543 	"\t\t\t  information\n"
5544 #endif
5545 #ifdef CONFIG_STACK_TRACER
5546 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5547 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5548 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5549 	"\t\t\t  new trace)\n"
5550 #ifdef CONFIG_DYNAMIC_FTRACE
5551 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5552 	"\t\t\t  traces\n"
5553 #endif
5554 #endif /* CONFIG_STACK_TRACER */
5555 #ifdef CONFIG_DYNAMIC_EVENTS
5556 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5557 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5558 #endif
5559 #ifdef CONFIG_KPROBE_EVENTS
5560 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5561 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5562 #endif
5563 #ifdef CONFIG_UPROBE_EVENTS
5564 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5565 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5566 #endif
5567 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5568     defined(CONFIG_FPROBE_EVENTS)
5569 	"\t  accepts: event-definitions (one definition per line)\n"
5570 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5571 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5572 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5573 #endif
5574 #ifdef CONFIG_FPROBE_EVENTS
5575 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5576 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5577 #endif
5578 #ifdef CONFIG_HIST_TRIGGERS
5579 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5580 #endif
5581 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5582 	"\t           -:[<group>/][<event>]\n"
5583 #ifdef CONFIG_KPROBE_EVENTS
5584 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5585   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5586 #endif
5587 #ifdef CONFIG_UPROBE_EVENTS
5588   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5589 #endif
5590 	"\t     args: <name>=fetcharg[:type]\n"
5591 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5592 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5593 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5594 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5595 	"\t           <argname>[->field[->field|.field...]],\n"
5596 #endif
5597 #else
5598 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5599 #endif
5600 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5601 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5602 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5603 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5604 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5605 #ifdef CONFIG_HIST_TRIGGERS
5606 	"\t    field: <stype> <name>;\n"
5607 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5608 	"\t           [unsigned] char/int/long\n"
5609 #endif
5610 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5611 	"\t            of the <attached-group>/<attached-event>.\n"
5612 #endif
5613 	"  set_event\t\t- Enables events by name written into it\n"
5614 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5615 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5616 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5617 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5618 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5619 	"\t\t\t  events\n"
5620 	"      filter\t\t- If set, only events passing filter are traced\n"
5621 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5622 	"\t\t\t  <event>:\n"
5623 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5624 	"      filter\t\t- If set, only events passing filter are traced\n"
5625 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5626 	"\t    Format: <trigger>[:count][if <filter>]\n"
5627 	"\t   trigger: traceon, traceoff\n"
5628 	"\t            enable_event:<system>:<event>\n"
5629 	"\t            disable_event:<system>:<event>\n"
5630 #ifdef CONFIG_HIST_TRIGGERS
5631 	"\t            enable_hist:<system>:<event>\n"
5632 	"\t            disable_hist:<system>:<event>\n"
5633 #endif
5634 #ifdef CONFIG_STACKTRACE
5635 	"\t\t    stacktrace\n"
5636 #endif
5637 #ifdef CONFIG_TRACER_SNAPSHOT
5638 	"\t\t    snapshot\n"
5639 #endif
5640 #ifdef CONFIG_HIST_TRIGGERS
5641 	"\t\t    hist (see below)\n"
5642 #endif
5643 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5644 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5645 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5646 	"\t                  events/block/block_unplug/trigger\n"
5647 	"\t   The first disables tracing every time block_unplug is hit.\n"
5648 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5649 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5650 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5651 	"\t   Like function triggers, the counter is only decremented if it\n"
5652 	"\t    enabled or disabled tracing.\n"
5653 	"\t   To remove a trigger without a count:\n"
5654 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5655 	"\t   To remove a trigger with a count:\n"
5656 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5657 	"\t   Filters can be ignored when removing a trigger.\n"
5658 #ifdef CONFIG_HIST_TRIGGERS
5659 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5660 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5661 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5662 	"\t            [:values=<field1[,field2,...]>]\n"
5663 	"\t            [:sort=<field1[,field2,...]>]\n"
5664 	"\t            [:size=#entries]\n"
5665 	"\t            [:pause][:continue][:clear]\n"
5666 	"\t            [:name=histname1]\n"
5667 	"\t            [:nohitcount]\n"
5668 	"\t            [:<handler>.<action>]\n"
5669 	"\t            [if <filter>]\n\n"
5670 	"\t    Note, special fields can be used as well:\n"
5671 	"\t            common_timestamp - to record current timestamp\n"
5672 	"\t            common_cpu - to record the CPU the event happened on\n"
5673 	"\n"
5674 	"\t    A hist trigger variable can be:\n"
5675 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5676 	"\t        - a reference to another variable e.g. y=$x,\n"
5677 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5678 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5679 	"\n"
5680 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5681 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5682 	"\t    variable reference, field or numeric literal.\n"
5683 	"\n"
5684 	"\t    When a matching event is hit, an entry is added to a hash\n"
5685 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5686 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5687 	"\t    correspond to fields in the event's format description.  Keys\n"
5688 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5689 	"\t    Compound keys consisting of up to two fields can be specified\n"
5690 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5691 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5692 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5693 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5694 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5695 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5696 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5697 	"\t    its histogram data will be shared with other triggers of the\n"
5698 	"\t    same name, and trigger hits will update this common data.\n\n"
5699 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5700 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5701 	"\t    triggers attached to an event, there will be a table for each\n"
5702 	"\t    trigger in the output.  The table displayed for a named\n"
5703 	"\t    trigger will be the same as any other instance having the\n"
5704 	"\t    same name.  The default format used to display a given field\n"
5705 	"\t    can be modified by appending any of the following modifiers\n"
5706 	"\t    to the field name, as applicable:\n\n"
5707 	"\t            .hex        display a number as a hex value\n"
5708 	"\t            .sym        display an address as a symbol\n"
5709 	"\t            .sym-offset display an address as a symbol and offset\n"
5710 	"\t            .execname   display a common_pid as a program name\n"
5711 	"\t            .syscall    display a syscall id as a syscall name\n"
5712 	"\t            .log2       display log2 value rather than raw number\n"
5713 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5714 	"\t            .usecs      display a common_timestamp in microseconds\n"
5715 	"\t            .percent    display a number of percentage value\n"
5716 	"\t            .graph      display a bar-graph of a value\n\n"
5717 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5718 	"\t    trigger or to start a hist trigger but not log any events\n"
5719 	"\t    until told to do so.  'continue' can be used to start or\n"
5720 	"\t    restart a paused hist trigger.\n\n"
5721 	"\t    The 'clear' parameter will clear the contents of a running\n"
5722 	"\t    hist trigger and leave its current paused/active state\n"
5723 	"\t    unchanged.\n\n"
5724 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5725 	"\t    raw hitcount in the histogram.\n\n"
5726 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5727 	"\t    have one event conditionally start and stop another event's\n"
5728 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5729 	"\t    the enable_event and disable_event triggers.\n\n"
5730 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5731 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5732 	"\t        <handler>.<action>\n\n"
5733 	"\t    The available handlers are:\n\n"
5734 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5735 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5736 	"\t        onchange(var)            - invoke action if var changes\n\n"
5737 	"\t    The available actions are:\n\n"
5738 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5739 	"\t        save(field,...)                      - save current event fields\n"
5740 #ifdef CONFIG_TRACER_SNAPSHOT
5741 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5742 #endif
5743 #ifdef CONFIG_SYNTH_EVENTS
5744 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5745 	"\t  Write into this file to define/undefine new synthetic events.\n"
5746 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5747 #endif
5748 #endif
5749 ;
5750 
5751 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5752 tracing_readme_read(struct file *filp, char __user *ubuf,
5753 		       size_t cnt, loff_t *ppos)
5754 {
5755 	return simple_read_from_buffer(ubuf, cnt, ppos,
5756 					readme_msg, strlen(readme_msg));
5757 }
5758 
5759 static const struct file_operations tracing_readme_fops = {
5760 	.open		= tracing_open_generic,
5761 	.read		= tracing_readme_read,
5762 	.llseek		= generic_file_llseek,
5763 };
5764 
5765 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5766 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5767 update_eval_map(union trace_eval_map_item *ptr)
5768 {
5769 	if (!ptr->map.eval_string) {
5770 		if (ptr->tail.next) {
5771 			ptr = ptr->tail.next;
5772 			/* Set ptr to the next real item (skip head) */
5773 			ptr++;
5774 		} else
5775 			return NULL;
5776 	}
5777 	return ptr;
5778 }
5779 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5780 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5781 {
5782 	union trace_eval_map_item *ptr = v;
5783 
5784 	/*
5785 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5786 	 * This really should never happen.
5787 	 */
5788 	(*pos)++;
5789 	ptr = update_eval_map(ptr);
5790 	if (WARN_ON_ONCE(!ptr))
5791 		return NULL;
5792 
5793 	ptr++;
5794 	ptr = update_eval_map(ptr);
5795 
5796 	return ptr;
5797 }
5798 
eval_map_start(struct seq_file * m,loff_t * pos)5799 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5800 {
5801 	union trace_eval_map_item *v;
5802 	loff_t l = 0;
5803 
5804 	mutex_lock(&trace_eval_mutex);
5805 
5806 	v = trace_eval_maps;
5807 	if (v)
5808 		v++;
5809 
5810 	while (v && l < *pos) {
5811 		v = eval_map_next(m, v, &l);
5812 	}
5813 
5814 	return v;
5815 }
5816 
eval_map_stop(struct seq_file * m,void * v)5817 static void eval_map_stop(struct seq_file *m, void *v)
5818 {
5819 	mutex_unlock(&trace_eval_mutex);
5820 }
5821 
eval_map_show(struct seq_file * m,void * v)5822 static int eval_map_show(struct seq_file *m, void *v)
5823 {
5824 	union trace_eval_map_item *ptr = v;
5825 
5826 	seq_printf(m, "%s %ld (%s)\n",
5827 		   ptr->map.eval_string, ptr->map.eval_value,
5828 		   ptr->map.system);
5829 
5830 	return 0;
5831 }
5832 
5833 static const struct seq_operations tracing_eval_map_seq_ops = {
5834 	.start		= eval_map_start,
5835 	.next		= eval_map_next,
5836 	.stop		= eval_map_stop,
5837 	.show		= eval_map_show,
5838 };
5839 
tracing_eval_map_open(struct inode * inode,struct file * filp)5840 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5841 {
5842 	int ret;
5843 
5844 	ret = tracing_check_open_get_tr(NULL);
5845 	if (ret)
5846 		return ret;
5847 
5848 	return seq_open(filp, &tracing_eval_map_seq_ops);
5849 }
5850 
5851 static const struct file_operations tracing_eval_map_fops = {
5852 	.open		= tracing_eval_map_open,
5853 	.read		= seq_read,
5854 	.llseek		= seq_lseek,
5855 	.release	= seq_release,
5856 };
5857 
5858 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5859 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5860 {
5861 	/* Return tail of array given the head */
5862 	return ptr + ptr->head.length + 1;
5863 }
5864 
5865 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5866 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5867 			   int len)
5868 {
5869 	struct trace_eval_map **stop;
5870 	struct trace_eval_map **map;
5871 	union trace_eval_map_item *map_array;
5872 	union trace_eval_map_item *ptr;
5873 
5874 	stop = start + len;
5875 
5876 	/*
5877 	 * The trace_eval_maps contains the map plus a head and tail item,
5878 	 * where the head holds the module and length of array, and the
5879 	 * tail holds a pointer to the next list.
5880 	 */
5881 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5882 	if (!map_array) {
5883 		pr_warn("Unable to allocate trace eval mapping\n");
5884 		return;
5885 	}
5886 
5887 	guard(mutex)(&trace_eval_mutex);
5888 
5889 	if (!trace_eval_maps)
5890 		trace_eval_maps = map_array;
5891 	else {
5892 		ptr = trace_eval_maps;
5893 		for (;;) {
5894 			ptr = trace_eval_jmp_to_tail(ptr);
5895 			if (!ptr->tail.next)
5896 				break;
5897 			ptr = ptr->tail.next;
5898 
5899 		}
5900 		ptr->tail.next = map_array;
5901 	}
5902 	map_array->head.mod = mod;
5903 	map_array->head.length = len;
5904 	map_array++;
5905 
5906 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5907 		map_array->map = **map;
5908 		map_array++;
5909 	}
5910 	memset(map_array, 0, sizeof(*map_array));
5911 }
5912 
trace_create_eval_file(struct dentry * d_tracer)5913 static void trace_create_eval_file(struct dentry *d_tracer)
5914 {
5915 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5916 			  NULL, &tracing_eval_map_fops);
5917 }
5918 
5919 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5920 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5921 static inline void trace_insert_eval_map_file(struct module *mod,
5922 			      struct trace_eval_map **start, int len) { }
5923 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5924 
5925 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5926 trace_event_update_with_eval_map(struct module *mod,
5927 				 struct trace_eval_map **start,
5928 				 int len)
5929 {
5930 	struct trace_eval_map **map;
5931 
5932 	/* Always run sanitizer only if btf_type_tag attr exists. */
5933 	if (len <= 0) {
5934 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5935 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5936 		      __has_attribute(btf_type_tag)))
5937 			return;
5938 	}
5939 
5940 	map = start;
5941 
5942 	trace_event_update_all(map, len);
5943 
5944 	if (len <= 0)
5945 		return;
5946 
5947 	trace_insert_eval_map_file(mod, start, len);
5948 }
5949 
5950 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5951 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5952 		       size_t cnt, loff_t *ppos)
5953 {
5954 	struct trace_array *tr = filp->private_data;
5955 	char buf[MAX_TRACER_SIZE+2];
5956 	int r;
5957 
5958 	scoped_guard(mutex, &trace_types_lock) {
5959 		r = sprintf(buf, "%s\n", tr->current_trace->name);
5960 	}
5961 
5962 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5963 }
5964 
tracer_init(struct tracer * t,struct trace_array * tr)5965 int tracer_init(struct tracer *t, struct trace_array *tr)
5966 {
5967 	tracing_reset_online_cpus(&tr->array_buffer);
5968 	return t->init(tr);
5969 }
5970 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5971 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5972 {
5973 	int cpu;
5974 
5975 	for_each_tracing_cpu(cpu)
5976 		per_cpu_ptr(buf->data, cpu)->entries = val;
5977 }
5978 
update_buffer_entries(struct array_buffer * buf,int cpu)5979 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5980 {
5981 	if (cpu == RING_BUFFER_ALL_CPUS) {
5982 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5983 	} else {
5984 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5985 	}
5986 }
5987 
5988 #ifdef CONFIG_TRACER_MAX_TRACE
5989 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5990 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5991 					struct array_buffer *size_buf, int cpu_id)
5992 {
5993 	int cpu, ret = 0;
5994 
5995 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5996 		for_each_tracing_cpu(cpu) {
5997 			ret = ring_buffer_resize(trace_buf->buffer,
5998 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5999 			if (ret < 0)
6000 				break;
6001 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6002 				per_cpu_ptr(size_buf->data, cpu)->entries;
6003 		}
6004 	} else {
6005 		ret = ring_buffer_resize(trace_buf->buffer,
6006 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6007 		if (ret == 0)
6008 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6009 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6010 	}
6011 
6012 	return ret;
6013 }
6014 #endif /* CONFIG_TRACER_MAX_TRACE */
6015 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6016 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6017 					unsigned long size, int cpu)
6018 {
6019 	int ret;
6020 
6021 	/*
6022 	 * If kernel or user changes the size of the ring buffer
6023 	 * we use the size that was given, and we can forget about
6024 	 * expanding it later.
6025 	 */
6026 	trace_set_ring_buffer_expanded(tr);
6027 
6028 	/* May be called before buffers are initialized */
6029 	if (!tr->array_buffer.buffer)
6030 		return 0;
6031 
6032 	/* Do not allow tracing while resizing ring buffer */
6033 	tracing_stop_tr(tr);
6034 
6035 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6036 	if (ret < 0)
6037 		goto out_start;
6038 
6039 #ifdef CONFIG_TRACER_MAX_TRACE
6040 	if (!tr->allocated_snapshot)
6041 		goto out;
6042 
6043 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6044 	if (ret < 0) {
6045 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6046 						     &tr->array_buffer, cpu);
6047 		if (r < 0) {
6048 			/*
6049 			 * AARGH! We are left with different
6050 			 * size max buffer!!!!
6051 			 * The max buffer is our "snapshot" buffer.
6052 			 * When a tracer needs a snapshot (one of the
6053 			 * latency tracers), it swaps the max buffer
6054 			 * with the saved snap shot. We succeeded to
6055 			 * update the size of the main buffer, but failed to
6056 			 * update the size of the max buffer. But when we tried
6057 			 * to reset the main buffer to the original size, we
6058 			 * failed there too. This is very unlikely to
6059 			 * happen, but if it does, warn and kill all
6060 			 * tracing.
6061 			 */
6062 			WARN_ON(1);
6063 			tracing_disabled = 1;
6064 		}
6065 		goto out_start;
6066 	}
6067 
6068 	update_buffer_entries(&tr->max_buffer, cpu);
6069 
6070  out:
6071 #endif /* CONFIG_TRACER_MAX_TRACE */
6072 
6073 	update_buffer_entries(&tr->array_buffer, cpu);
6074  out_start:
6075 	tracing_start_tr(tr);
6076 	return ret;
6077 }
6078 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6079 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6080 				  unsigned long size, int cpu_id)
6081 {
6082 	guard(mutex)(&trace_types_lock);
6083 
6084 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6085 		/* make sure, this cpu is enabled in the mask */
6086 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6087 			return -EINVAL;
6088 	}
6089 
6090 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6091 }
6092 
6093 struct trace_mod_entry {
6094 	unsigned long	mod_addr;
6095 	char		mod_name[MODULE_NAME_LEN];
6096 };
6097 
6098 struct trace_scratch {
6099 	unsigned int		clock_id;
6100 	unsigned long		text_addr;
6101 	unsigned long		nr_entries;
6102 	struct trace_mod_entry	entries[];
6103 };
6104 
6105 static DEFINE_MUTEX(scratch_mutex);
6106 
cmp_mod_entry(const void * key,const void * pivot)6107 static int cmp_mod_entry(const void *key, const void *pivot)
6108 {
6109 	unsigned long addr = (unsigned long)key;
6110 	const struct trace_mod_entry *ent = pivot;
6111 
6112 	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6113 		return 0;
6114 	else
6115 		return addr - ent->mod_addr;
6116 }
6117 
6118 /**
6119  * trace_adjust_address() - Adjust prev boot address to current address.
6120  * @tr: Persistent ring buffer's trace_array.
6121  * @addr: Address in @tr which is adjusted.
6122  */
trace_adjust_address(struct trace_array * tr,unsigned long addr)6123 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6124 {
6125 	struct trace_module_delta *module_delta;
6126 	struct trace_scratch *tscratch;
6127 	struct trace_mod_entry *entry;
6128 	unsigned long raddr;
6129 	int idx = 0, nr_entries;
6130 
6131 	/* If we don't have last boot delta, return the address */
6132 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6133 		return addr;
6134 
6135 	/* tr->module_delta must be protected by rcu. */
6136 	guard(rcu)();
6137 	tscratch = tr->scratch;
6138 	/* if there is no tscrach, module_delta must be NULL. */
6139 	module_delta = READ_ONCE(tr->module_delta);
6140 	if (!module_delta || !tscratch->nr_entries ||
6141 	    tscratch->entries[0].mod_addr > addr) {
6142 		raddr = addr + tr->text_delta;
6143 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6144 			is_kernel_rodata(raddr) ? raddr : addr;
6145 	}
6146 
6147 	/* Note that entries must be sorted. */
6148 	nr_entries = tscratch->nr_entries;
6149 	if (nr_entries == 1 ||
6150 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6151 		idx = nr_entries - 1;
6152 	else {
6153 		entry = __inline_bsearch((void *)addr,
6154 				tscratch->entries,
6155 				nr_entries - 1,
6156 				sizeof(tscratch->entries[0]),
6157 				cmp_mod_entry);
6158 		if (entry)
6159 			idx = entry - tscratch->entries;
6160 	}
6161 
6162 	return addr + module_delta->delta[idx];
6163 }
6164 
6165 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)6166 static int save_mod(struct module *mod, void *data)
6167 {
6168 	struct trace_array *tr = data;
6169 	struct trace_scratch *tscratch;
6170 	struct trace_mod_entry *entry;
6171 	unsigned int size;
6172 
6173 	tscratch = tr->scratch;
6174 	if (!tscratch)
6175 		return -1;
6176 	size = tr->scratch_size;
6177 
6178 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6179 		return -1;
6180 
6181 	entry = &tscratch->entries[tscratch->nr_entries];
6182 
6183 	tscratch->nr_entries++;
6184 
6185 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6186 	strscpy(entry->mod_name, mod->name);
6187 
6188 	return 0;
6189 }
6190 #else
save_mod(struct module * mod,void * data)6191 static int save_mod(struct module *mod, void *data)
6192 {
6193 	return 0;
6194 }
6195 #endif
6196 
update_last_data(struct trace_array * tr)6197 static void update_last_data(struct trace_array *tr)
6198 {
6199 	struct trace_module_delta *module_delta;
6200 	struct trace_scratch *tscratch;
6201 
6202 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6203 		return;
6204 
6205 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6206 		return;
6207 
6208 	/* Only if the buffer has previous boot data clear and update it. */
6209 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6210 
6211 	/* Reset the module list and reload them */
6212 	if (tr->scratch) {
6213 		struct trace_scratch *tscratch = tr->scratch;
6214 
6215 		tscratch->clock_id = tr->clock_id;
6216 		memset(tscratch->entries, 0,
6217 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6218 		tscratch->nr_entries = 0;
6219 
6220 		guard(mutex)(&scratch_mutex);
6221 		module_for_each_mod(save_mod, tr);
6222 	}
6223 
6224 	/*
6225 	 * Need to clear all CPU buffers as there cannot be events
6226 	 * from the previous boot mixed with events with this boot
6227 	 * as that will cause a confusing trace. Need to clear all
6228 	 * CPU buffers, even for those that may currently be offline.
6229 	 */
6230 	tracing_reset_all_cpus(&tr->array_buffer);
6231 
6232 	/* Using current data now */
6233 	tr->text_delta = 0;
6234 
6235 	if (!tr->scratch)
6236 		return;
6237 
6238 	tscratch = tr->scratch;
6239 	module_delta = READ_ONCE(tr->module_delta);
6240 	WRITE_ONCE(tr->module_delta, NULL);
6241 	kfree_rcu(module_delta, rcu);
6242 
6243 	/* Set the persistent ring buffer meta data to this address */
6244 	tscratch->text_addr = (unsigned long)_text;
6245 }
6246 
6247 /**
6248  * tracing_update_buffers - used by tracing facility to expand ring buffers
6249  * @tr: The tracing instance
6250  *
6251  * To save on memory when the tracing is never used on a system with it
6252  * configured in. The ring buffers are set to a minimum size. But once
6253  * a user starts to use the tracing facility, then they need to grow
6254  * to their default size.
6255  *
6256  * This function is to be called when a tracer is about to be used.
6257  */
tracing_update_buffers(struct trace_array * tr)6258 int tracing_update_buffers(struct trace_array *tr)
6259 {
6260 	int ret = 0;
6261 
6262 	guard(mutex)(&trace_types_lock);
6263 
6264 	update_last_data(tr);
6265 
6266 	if (!tr->ring_buffer_expanded)
6267 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6268 						RING_BUFFER_ALL_CPUS);
6269 	return ret;
6270 }
6271 
6272 /*
6273  * Used to clear out the tracer before deletion of an instance.
6274  * Must have trace_types_lock held.
6275  */
tracing_set_nop(struct trace_array * tr)6276 static void tracing_set_nop(struct trace_array *tr)
6277 {
6278 	if (tr->current_trace == &nop_trace)
6279 		return;
6280 
6281 	tr->current_trace->enabled--;
6282 
6283 	if (tr->current_trace->reset)
6284 		tr->current_trace->reset(tr);
6285 
6286 	tr->current_trace = &nop_trace;
6287 	tr->current_trace_flags = nop_trace.flags;
6288 }
6289 
6290 static bool tracer_options_updated;
6291 
tracing_set_tracer(struct trace_array * tr,const char * buf)6292 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6293 {
6294 	struct tracer *trace = NULL;
6295 	struct tracers *t;
6296 #ifdef CONFIG_TRACER_MAX_TRACE
6297 	bool had_max_tr;
6298 #endif
6299 	int ret;
6300 
6301 	guard(mutex)(&trace_types_lock);
6302 
6303 	update_last_data(tr);
6304 
6305 	if (!tr->ring_buffer_expanded) {
6306 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6307 						RING_BUFFER_ALL_CPUS);
6308 		if (ret < 0)
6309 			return ret;
6310 		ret = 0;
6311 	}
6312 
6313 	list_for_each_entry(t, &tr->tracers, list) {
6314 		if (strcmp(t->tracer->name, buf) == 0) {
6315 			trace = t->tracer;
6316 			break;
6317 		}
6318 	}
6319 	if (!trace)
6320 		return -EINVAL;
6321 
6322 	if (trace == tr->current_trace)
6323 		return 0;
6324 
6325 #ifdef CONFIG_TRACER_SNAPSHOT
6326 	if (trace->use_max_tr) {
6327 		local_irq_disable();
6328 		arch_spin_lock(&tr->max_lock);
6329 		ret = tr->cond_snapshot ? -EBUSY : 0;
6330 		arch_spin_unlock(&tr->max_lock);
6331 		local_irq_enable();
6332 		if (ret)
6333 			return ret;
6334 	}
6335 #endif
6336 	/* Some tracers won't work on kernel command line */
6337 	if (system_state < SYSTEM_RUNNING && trace->noboot) {
6338 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6339 			trace->name);
6340 		return -EINVAL;
6341 	}
6342 
6343 	/* Some tracers are only allowed for the top level buffer */
6344 	if (!trace_ok_for_array(trace, tr))
6345 		return -EINVAL;
6346 
6347 	/* If trace pipe files are being read, we can't change the tracer */
6348 	if (tr->trace_ref)
6349 		return -EBUSY;
6350 
6351 	trace_branch_disable();
6352 
6353 	tr->current_trace->enabled--;
6354 
6355 	if (tr->current_trace->reset)
6356 		tr->current_trace->reset(tr);
6357 
6358 #ifdef CONFIG_TRACER_MAX_TRACE
6359 	had_max_tr = tr->current_trace->use_max_tr;
6360 
6361 	/* Current trace needs to be nop_trace before synchronize_rcu */
6362 	tr->current_trace = &nop_trace;
6363 	tr->current_trace_flags = nop_trace.flags;
6364 
6365 	if (had_max_tr && !trace->use_max_tr) {
6366 		/*
6367 		 * We need to make sure that the update_max_tr sees that
6368 		 * current_trace changed to nop_trace to keep it from
6369 		 * swapping the buffers after we resize it.
6370 		 * The update_max_tr is called from interrupts disabled
6371 		 * so a synchronized_sched() is sufficient.
6372 		 */
6373 		synchronize_rcu();
6374 		free_snapshot(tr);
6375 		tracing_disarm_snapshot(tr);
6376 	}
6377 
6378 	if (!had_max_tr && trace->use_max_tr) {
6379 		ret = tracing_arm_snapshot_locked(tr);
6380 		if (ret)
6381 			return ret;
6382 	}
6383 #else
6384 	tr->current_trace = &nop_trace;
6385 #endif
6386 
6387 	tr->current_trace_flags = t->flags ? : t->tracer->flags;
6388 
6389 	if (trace->init) {
6390 		ret = tracer_init(trace, tr);
6391 		if (ret) {
6392 #ifdef CONFIG_TRACER_MAX_TRACE
6393 			if (trace->use_max_tr)
6394 				tracing_disarm_snapshot(tr);
6395 #endif
6396 			tr->current_trace_flags = nop_trace.flags;
6397 			return ret;
6398 		}
6399 	}
6400 
6401 	tr->current_trace = trace;
6402 	tr->current_trace->enabled++;
6403 	trace_branch_enable(tr);
6404 
6405 	return 0;
6406 }
6407 
6408 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6409 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6410 			size_t cnt, loff_t *ppos)
6411 {
6412 	struct trace_array *tr = filp->private_data;
6413 	char buf[MAX_TRACER_SIZE+1];
6414 	char *name;
6415 	size_t ret;
6416 	int err;
6417 
6418 	ret = cnt;
6419 
6420 	if (cnt > MAX_TRACER_SIZE)
6421 		cnt = MAX_TRACER_SIZE;
6422 
6423 	if (copy_from_user(buf, ubuf, cnt))
6424 		return -EFAULT;
6425 
6426 	buf[cnt] = 0;
6427 
6428 	name = strim(buf);
6429 
6430 	err = tracing_set_tracer(tr, name);
6431 	if (err)
6432 		return err;
6433 
6434 	*ppos += ret;
6435 
6436 	return ret;
6437 }
6438 
6439 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6440 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6441 		   size_t cnt, loff_t *ppos)
6442 {
6443 	char buf[64];
6444 	int r;
6445 
6446 	r = snprintf(buf, sizeof(buf), "%ld\n",
6447 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6448 	if (r > sizeof(buf))
6449 		r = sizeof(buf);
6450 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6451 }
6452 
6453 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6454 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6455 		    size_t cnt, loff_t *ppos)
6456 {
6457 	unsigned long val;
6458 	int ret;
6459 
6460 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6461 	if (ret)
6462 		return ret;
6463 
6464 	*ptr = val * 1000;
6465 
6466 	return cnt;
6467 }
6468 
6469 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6470 tracing_thresh_read(struct file *filp, char __user *ubuf,
6471 		    size_t cnt, loff_t *ppos)
6472 {
6473 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6474 }
6475 
6476 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6477 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6478 		     size_t cnt, loff_t *ppos)
6479 {
6480 	struct trace_array *tr = filp->private_data;
6481 	int ret;
6482 
6483 	guard(mutex)(&trace_types_lock);
6484 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6485 	if (ret < 0)
6486 		return ret;
6487 
6488 	if (tr->current_trace->update_thresh) {
6489 		ret = tr->current_trace->update_thresh(tr);
6490 		if (ret < 0)
6491 			return ret;
6492 	}
6493 
6494 	return cnt;
6495 }
6496 
6497 #ifdef CONFIG_TRACER_MAX_TRACE
6498 
6499 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6500 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6501 		     size_t cnt, loff_t *ppos)
6502 {
6503 	struct trace_array *tr = filp->private_data;
6504 
6505 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6506 }
6507 
6508 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6509 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6510 		      size_t cnt, loff_t *ppos)
6511 {
6512 	struct trace_array *tr = filp->private_data;
6513 
6514 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6515 }
6516 
6517 #endif
6518 
open_pipe_on_cpu(struct trace_array * tr,int cpu)6519 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6520 {
6521 	if (cpu == RING_BUFFER_ALL_CPUS) {
6522 		if (cpumask_empty(tr->pipe_cpumask)) {
6523 			cpumask_setall(tr->pipe_cpumask);
6524 			return 0;
6525 		}
6526 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6527 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6528 		return 0;
6529 	}
6530 	return -EBUSY;
6531 }
6532 
close_pipe_on_cpu(struct trace_array * tr,int cpu)6533 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6534 {
6535 	if (cpu == RING_BUFFER_ALL_CPUS) {
6536 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6537 		cpumask_clear(tr->pipe_cpumask);
6538 	} else {
6539 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6540 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6541 	}
6542 }
6543 
tracing_open_pipe(struct inode * inode,struct file * filp)6544 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6545 {
6546 	struct trace_array *tr = inode->i_private;
6547 	struct trace_iterator *iter;
6548 	int cpu;
6549 	int ret;
6550 
6551 	ret = tracing_check_open_get_tr(tr);
6552 	if (ret)
6553 		return ret;
6554 
6555 	guard(mutex)(&trace_types_lock);
6556 	cpu = tracing_get_cpu(inode);
6557 	ret = open_pipe_on_cpu(tr, cpu);
6558 	if (ret)
6559 		goto fail_pipe_on_cpu;
6560 
6561 	/* create a buffer to store the information to pass to userspace */
6562 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6563 	if (!iter) {
6564 		ret = -ENOMEM;
6565 		goto fail_alloc_iter;
6566 	}
6567 
6568 	trace_seq_init(&iter->seq);
6569 	iter->trace = tr->current_trace;
6570 
6571 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6572 		ret = -ENOMEM;
6573 		goto fail;
6574 	}
6575 
6576 	/* trace pipe does not show start of buffer */
6577 	cpumask_setall(iter->started);
6578 
6579 	if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
6580 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6581 
6582 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6583 	if (trace_clocks[tr->clock_id].in_ns)
6584 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6585 
6586 	iter->tr = tr;
6587 	iter->array_buffer = &tr->array_buffer;
6588 	iter->cpu_file = cpu;
6589 	mutex_init(&iter->mutex);
6590 	filp->private_data = iter;
6591 
6592 	if (iter->trace->pipe_open)
6593 		iter->trace->pipe_open(iter);
6594 
6595 	nonseekable_open(inode, filp);
6596 
6597 	tr->trace_ref++;
6598 
6599 	return ret;
6600 
6601 fail:
6602 	kfree(iter);
6603 fail_alloc_iter:
6604 	close_pipe_on_cpu(tr, cpu);
6605 fail_pipe_on_cpu:
6606 	__trace_array_put(tr);
6607 	return ret;
6608 }
6609 
tracing_release_pipe(struct inode * inode,struct file * file)6610 static int tracing_release_pipe(struct inode *inode, struct file *file)
6611 {
6612 	struct trace_iterator *iter = file->private_data;
6613 	struct trace_array *tr = inode->i_private;
6614 
6615 	scoped_guard(mutex, &trace_types_lock) {
6616 		tr->trace_ref--;
6617 
6618 		if (iter->trace->pipe_close)
6619 			iter->trace->pipe_close(iter);
6620 		close_pipe_on_cpu(tr, iter->cpu_file);
6621 	}
6622 
6623 	free_trace_iter_content(iter);
6624 	kfree(iter);
6625 
6626 	trace_array_put(tr);
6627 
6628 	return 0;
6629 }
6630 
6631 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6632 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6633 {
6634 	struct trace_array *tr = iter->tr;
6635 
6636 	/* Iterators are static, they should be filled or empty */
6637 	if (trace_buffer_iter(iter, iter->cpu_file))
6638 		return EPOLLIN | EPOLLRDNORM;
6639 
6640 	if (tr->trace_flags & TRACE_ITER(BLOCK))
6641 		/*
6642 		 * Always select as readable when in blocking mode
6643 		 */
6644 		return EPOLLIN | EPOLLRDNORM;
6645 	else
6646 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6647 					     filp, poll_table, iter->tr->buffer_percent);
6648 }
6649 
6650 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6651 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6652 {
6653 	struct trace_iterator *iter = filp->private_data;
6654 
6655 	return trace_poll(iter, filp, poll_table);
6656 }
6657 
6658 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6659 static int tracing_wait_pipe(struct file *filp)
6660 {
6661 	struct trace_iterator *iter = filp->private_data;
6662 	int ret;
6663 
6664 	while (trace_empty(iter)) {
6665 
6666 		if ((filp->f_flags & O_NONBLOCK)) {
6667 			return -EAGAIN;
6668 		}
6669 
6670 		/*
6671 		 * We block until we read something and tracing is disabled.
6672 		 * We still block if tracing is disabled, but we have never
6673 		 * read anything. This allows a user to cat this file, and
6674 		 * then enable tracing. But after we have read something,
6675 		 * we give an EOF when tracing is again disabled.
6676 		 *
6677 		 * iter->pos will be 0 if we haven't read anything.
6678 		 */
6679 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6680 			break;
6681 
6682 		mutex_unlock(&iter->mutex);
6683 
6684 		ret = wait_on_pipe(iter, 0);
6685 
6686 		mutex_lock(&iter->mutex);
6687 
6688 		if (ret)
6689 			return ret;
6690 	}
6691 
6692 	return 1;
6693 }
6694 
update_last_data_if_empty(struct trace_array * tr)6695 static bool update_last_data_if_empty(struct trace_array *tr)
6696 {
6697 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6698 		return false;
6699 
6700 	if (!ring_buffer_empty(tr->array_buffer.buffer))
6701 		return false;
6702 
6703 	/*
6704 	 * If the buffer contains the last boot data and all per-cpu
6705 	 * buffers are empty, reset it from the kernel side.
6706 	 */
6707 	update_last_data(tr);
6708 	return true;
6709 }
6710 
6711 /*
6712  * Consumer reader.
6713  */
6714 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6715 tracing_read_pipe(struct file *filp, char __user *ubuf,
6716 		  size_t cnt, loff_t *ppos)
6717 {
6718 	struct trace_iterator *iter = filp->private_data;
6719 	ssize_t sret;
6720 
6721 	/*
6722 	 * Avoid more than one consumer on a single file descriptor
6723 	 * This is just a matter of traces coherency, the ring buffer itself
6724 	 * is protected.
6725 	 */
6726 	guard(mutex)(&iter->mutex);
6727 
6728 	/* return any leftover data */
6729 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6730 	if (sret != -EBUSY)
6731 		return sret;
6732 
6733 	trace_seq_init(&iter->seq);
6734 
6735 	if (iter->trace->read) {
6736 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6737 		if (sret)
6738 			return sret;
6739 	}
6740 
6741 waitagain:
6742 	if (update_last_data_if_empty(iter->tr))
6743 		return 0;
6744 
6745 	sret = tracing_wait_pipe(filp);
6746 	if (sret <= 0)
6747 		return sret;
6748 
6749 	/* stop when tracing is finished */
6750 	if (trace_empty(iter))
6751 		return 0;
6752 
6753 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6754 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6755 
6756 	/* reset all but tr, trace, and overruns */
6757 	trace_iterator_reset(iter);
6758 	cpumask_clear(iter->started);
6759 	trace_seq_init(&iter->seq);
6760 
6761 	trace_event_read_lock();
6762 	trace_access_lock(iter->cpu_file);
6763 	while (trace_find_next_entry_inc(iter) != NULL) {
6764 		enum print_line_t ret;
6765 		int save_len = iter->seq.seq.len;
6766 
6767 		ret = print_trace_line(iter);
6768 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6769 			/*
6770 			 * If one print_trace_line() fills entire trace_seq in one shot,
6771 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6772 			 * In this case, we need to consume it, otherwise, loop will peek
6773 			 * this event next time, resulting in an infinite loop.
6774 			 */
6775 			if (save_len == 0) {
6776 				iter->seq.full = 0;
6777 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6778 				trace_consume(iter);
6779 				break;
6780 			}
6781 
6782 			/* In other cases, don't print partial lines */
6783 			iter->seq.seq.len = save_len;
6784 			break;
6785 		}
6786 		if (ret != TRACE_TYPE_NO_CONSUME)
6787 			trace_consume(iter);
6788 
6789 		if (trace_seq_used(&iter->seq) >= cnt)
6790 			break;
6791 
6792 		/*
6793 		 * Setting the full flag means we reached the trace_seq buffer
6794 		 * size and we should leave by partial output condition above.
6795 		 * One of the trace_seq_* functions is not used properly.
6796 		 */
6797 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6798 			  iter->ent->type);
6799 	}
6800 	trace_access_unlock(iter->cpu_file);
6801 	trace_event_read_unlock();
6802 
6803 	/* Now copy what we have to the user */
6804 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6805 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6806 		trace_seq_init(&iter->seq);
6807 
6808 	/*
6809 	 * If there was nothing to send to user, in spite of consuming trace
6810 	 * entries, go back to wait for more entries.
6811 	 */
6812 	if (sret == -EBUSY)
6813 		goto waitagain;
6814 
6815 	return sret;
6816 }
6817 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6818 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6819 				     unsigned int idx)
6820 {
6821 	__free_page(spd->pages[idx]);
6822 }
6823 
6824 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6825 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6826 {
6827 	size_t count;
6828 	int save_len;
6829 	int ret;
6830 
6831 	/* Seq buffer is page-sized, exactly what we need. */
6832 	for (;;) {
6833 		save_len = iter->seq.seq.len;
6834 		ret = print_trace_line(iter);
6835 
6836 		if (trace_seq_has_overflowed(&iter->seq)) {
6837 			iter->seq.seq.len = save_len;
6838 			break;
6839 		}
6840 
6841 		/*
6842 		 * This should not be hit, because it should only
6843 		 * be set if the iter->seq overflowed. But check it
6844 		 * anyway to be safe.
6845 		 */
6846 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6847 			iter->seq.seq.len = save_len;
6848 			break;
6849 		}
6850 
6851 		count = trace_seq_used(&iter->seq) - save_len;
6852 		if (rem < count) {
6853 			rem = 0;
6854 			iter->seq.seq.len = save_len;
6855 			break;
6856 		}
6857 
6858 		if (ret != TRACE_TYPE_NO_CONSUME)
6859 			trace_consume(iter);
6860 		rem -= count;
6861 		if (!trace_find_next_entry_inc(iter))	{
6862 			rem = 0;
6863 			iter->ent = NULL;
6864 			break;
6865 		}
6866 	}
6867 
6868 	return rem;
6869 }
6870 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6871 static ssize_t tracing_splice_read_pipe(struct file *filp,
6872 					loff_t *ppos,
6873 					struct pipe_inode_info *pipe,
6874 					size_t len,
6875 					unsigned int flags)
6876 {
6877 	struct page *pages_def[PIPE_DEF_BUFFERS];
6878 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6879 	struct trace_iterator *iter = filp->private_data;
6880 	struct splice_pipe_desc spd = {
6881 		.pages		= pages_def,
6882 		.partial	= partial_def,
6883 		.nr_pages	= 0, /* This gets updated below. */
6884 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6885 		.ops		= &default_pipe_buf_ops,
6886 		.spd_release	= tracing_spd_release_pipe,
6887 	};
6888 	ssize_t ret;
6889 	size_t rem;
6890 	unsigned int i;
6891 
6892 	if (splice_grow_spd(pipe, &spd))
6893 		return -ENOMEM;
6894 
6895 	mutex_lock(&iter->mutex);
6896 
6897 	if (iter->trace->splice_read) {
6898 		ret = iter->trace->splice_read(iter, filp,
6899 					       ppos, pipe, len, flags);
6900 		if (ret)
6901 			goto out_err;
6902 	}
6903 
6904 	ret = tracing_wait_pipe(filp);
6905 	if (ret <= 0)
6906 		goto out_err;
6907 
6908 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6909 		ret = -EFAULT;
6910 		goto out_err;
6911 	}
6912 
6913 	trace_event_read_lock();
6914 	trace_access_lock(iter->cpu_file);
6915 
6916 	/* Fill as many pages as possible. */
6917 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6918 		spd.pages[i] = alloc_page(GFP_KERNEL);
6919 		if (!spd.pages[i])
6920 			break;
6921 
6922 		rem = tracing_fill_pipe_page(rem, iter);
6923 
6924 		/* Copy the data into the page, so we can start over. */
6925 		ret = trace_seq_to_buffer(&iter->seq,
6926 					  page_address(spd.pages[i]),
6927 					  min((size_t)trace_seq_used(&iter->seq),
6928 						  (size_t)PAGE_SIZE));
6929 		if (ret < 0) {
6930 			__free_page(spd.pages[i]);
6931 			break;
6932 		}
6933 		spd.partial[i].offset = 0;
6934 		spd.partial[i].len = ret;
6935 
6936 		trace_seq_init(&iter->seq);
6937 	}
6938 
6939 	trace_access_unlock(iter->cpu_file);
6940 	trace_event_read_unlock();
6941 	mutex_unlock(&iter->mutex);
6942 
6943 	spd.nr_pages = i;
6944 
6945 	if (i)
6946 		ret = splice_to_pipe(pipe, &spd);
6947 	else
6948 		ret = 0;
6949 out:
6950 	splice_shrink_spd(&spd);
6951 	return ret;
6952 
6953 out_err:
6954 	mutex_unlock(&iter->mutex);
6955 	goto out;
6956 }
6957 
6958 static ssize_t
tracing_syscall_buf_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6959 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
6960 			 size_t cnt, loff_t *ppos)
6961 {
6962 	struct inode *inode = file_inode(filp);
6963 	struct trace_array *tr = inode->i_private;
6964 	char buf[64];
6965 	int r;
6966 
6967 	r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
6968 
6969 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6970 }
6971 
6972 static ssize_t
tracing_syscall_buf_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6973 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
6974 			  size_t cnt, loff_t *ppos)
6975 {
6976 	struct inode *inode = file_inode(filp);
6977 	struct trace_array *tr = inode->i_private;
6978 	unsigned long val;
6979 	int ret;
6980 
6981 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6982 	if (ret)
6983 		return ret;
6984 
6985 	if (val > SYSCALL_FAULT_USER_MAX)
6986 		val = SYSCALL_FAULT_USER_MAX;
6987 
6988 	tr->syscall_buf_sz = val;
6989 
6990 	*ppos += cnt;
6991 
6992 	return cnt;
6993 }
6994 
6995 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6996 tracing_entries_read(struct file *filp, char __user *ubuf,
6997 		     size_t cnt, loff_t *ppos)
6998 {
6999 	struct inode *inode = file_inode(filp);
7000 	struct trace_array *tr = inode->i_private;
7001 	int cpu = tracing_get_cpu(inode);
7002 	char buf[64];
7003 	int r = 0;
7004 	ssize_t ret;
7005 
7006 	mutex_lock(&trace_types_lock);
7007 
7008 	if (cpu == RING_BUFFER_ALL_CPUS) {
7009 		int cpu, buf_size_same;
7010 		unsigned long size;
7011 
7012 		size = 0;
7013 		buf_size_same = 1;
7014 		/* check if all cpu sizes are same */
7015 		for_each_tracing_cpu(cpu) {
7016 			/* fill in the size from first enabled cpu */
7017 			if (size == 0)
7018 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7019 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7020 				buf_size_same = 0;
7021 				break;
7022 			}
7023 		}
7024 
7025 		if (buf_size_same) {
7026 			if (!tr->ring_buffer_expanded)
7027 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7028 					    size >> 10,
7029 					    trace_buf_size >> 10);
7030 			else
7031 				r = sprintf(buf, "%lu\n", size >> 10);
7032 		} else
7033 			r = sprintf(buf, "X\n");
7034 	} else
7035 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7036 
7037 	mutex_unlock(&trace_types_lock);
7038 
7039 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7040 	return ret;
7041 }
7042 
7043 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7044 tracing_entries_write(struct file *filp, const char __user *ubuf,
7045 		      size_t cnt, loff_t *ppos)
7046 {
7047 	struct inode *inode = file_inode(filp);
7048 	struct trace_array *tr = inode->i_private;
7049 	unsigned long val;
7050 	int ret;
7051 
7052 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7053 	if (ret)
7054 		return ret;
7055 
7056 	/* must have at least 1 entry */
7057 	if (!val)
7058 		return -EINVAL;
7059 
7060 	/* value is in KB */
7061 	val <<= 10;
7062 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7063 	if (ret < 0)
7064 		return ret;
7065 
7066 	*ppos += cnt;
7067 
7068 	return cnt;
7069 }
7070 
7071 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7072 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7073 				size_t cnt, loff_t *ppos)
7074 {
7075 	struct trace_array *tr = filp->private_data;
7076 	char buf[64];
7077 	int r, cpu;
7078 	unsigned long size = 0, expanded_size = 0;
7079 
7080 	mutex_lock(&trace_types_lock);
7081 	for_each_tracing_cpu(cpu) {
7082 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7083 		if (!tr->ring_buffer_expanded)
7084 			expanded_size += trace_buf_size >> 10;
7085 	}
7086 	if (tr->ring_buffer_expanded)
7087 		r = sprintf(buf, "%lu\n", size);
7088 	else
7089 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7090 	mutex_unlock(&trace_types_lock);
7091 
7092 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7093 }
7094 
7095 #define LAST_BOOT_HEADER ((void *)1)
7096 
l_next(struct seq_file * m,void * v,loff_t * pos)7097 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7098 {
7099 	struct trace_array *tr = m->private;
7100 	struct trace_scratch *tscratch = tr->scratch;
7101 	unsigned int index = *pos;
7102 
7103 	(*pos)++;
7104 
7105 	if (*pos == 1)
7106 		return LAST_BOOT_HEADER;
7107 
7108 	/* Only show offsets of the last boot data */
7109 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7110 		return NULL;
7111 
7112 	/* *pos 0 is for the header, 1 is for the first module */
7113 	index--;
7114 
7115 	if (index >= tscratch->nr_entries)
7116 		return NULL;
7117 
7118 	return &tscratch->entries[index];
7119 }
7120 
l_start(struct seq_file * m,loff_t * pos)7121 static void *l_start(struct seq_file *m, loff_t *pos)
7122 {
7123 	mutex_lock(&scratch_mutex);
7124 
7125 	return l_next(m, NULL, pos);
7126 }
7127 
l_stop(struct seq_file * m,void * p)7128 static void l_stop(struct seq_file *m, void *p)
7129 {
7130 	mutex_unlock(&scratch_mutex);
7131 }
7132 
show_last_boot_header(struct seq_file * m,struct trace_array * tr)7133 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7134 {
7135 	struct trace_scratch *tscratch = tr->scratch;
7136 
7137 	/*
7138 	 * Do not leak KASLR address. This only shows the KASLR address of
7139 	 * the last boot. When the ring buffer is started, the LAST_BOOT
7140 	 * flag gets cleared, and this should only report "current".
7141 	 * Otherwise it shows the KASLR address from the previous boot which
7142 	 * should not be the same as the current boot.
7143 	 */
7144 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7145 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7146 	else
7147 		seq_puts(m, "# Current\n");
7148 }
7149 
l_show(struct seq_file * m,void * v)7150 static int l_show(struct seq_file *m, void *v)
7151 {
7152 	struct trace_array *tr = m->private;
7153 	struct trace_mod_entry *entry = v;
7154 
7155 	if (v == LAST_BOOT_HEADER) {
7156 		show_last_boot_header(m, tr);
7157 		return 0;
7158 	}
7159 
7160 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7161 	return 0;
7162 }
7163 
7164 static const struct seq_operations last_boot_seq_ops = {
7165 	.start		= l_start,
7166 	.next		= l_next,
7167 	.stop		= l_stop,
7168 	.show		= l_show,
7169 };
7170 
tracing_last_boot_open(struct inode * inode,struct file * file)7171 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7172 {
7173 	struct trace_array *tr = inode->i_private;
7174 	struct seq_file *m;
7175 	int ret;
7176 
7177 	ret = tracing_check_open_get_tr(tr);
7178 	if (ret)
7179 		return ret;
7180 
7181 	ret = seq_open(file, &last_boot_seq_ops);
7182 	if (ret) {
7183 		trace_array_put(tr);
7184 		return ret;
7185 	}
7186 
7187 	m = file->private_data;
7188 	m->private = tr;
7189 
7190 	return 0;
7191 }
7192 
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7193 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7194 {
7195 	struct trace_array *tr = inode->i_private;
7196 	int cpu = tracing_get_cpu(inode);
7197 	int ret;
7198 
7199 	ret = tracing_check_open_get_tr(tr);
7200 	if (ret)
7201 		return ret;
7202 
7203 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7204 	if (ret < 0)
7205 		__trace_array_put(tr);
7206 	return ret;
7207 }
7208 
7209 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7210 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7211 			  size_t cnt, loff_t *ppos)
7212 {
7213 	/*
7214 	 * There is no need to read what the user has written, this function
7215 	 * is just to make sure that there is no error when "echo" is used
7216 	 */
7217 
7218 	*ppos += cnt;
7219 
7220 	return cnt;
7221 }
7222 
7223 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7224 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7225 {
7226 	struct trace_array *tr = inode->i_private;
7227 
7228 	/* disable tracing ? */
7229 	if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
7230 		tracer_tracing_off(tr);
7231 	/* resize the ring buffer to 0 */
7232 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7233 
7234 	trace_array_put(tr);
7235 
7236 	return 0;
7237 }
7238 
7239 #define TRACE_MARKER_MAX_SIZE		4096
7240 
write_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt,unsigned long ip)7241 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
7242 				      size_t cnt, unsigned long ip)
7243 {
7244 	struct ring_buffer_event *event;
7245 	enum event_trigger_type tt = ETT_NONE;
7246 	struct trace_buffer *buffer;
7247 	struct print_entry *entry;
7248 	int meta_size;
7249 	ssize_t written;
7250 	size_t size;
7251 
7252 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7253  again:
7254 	size = cnt + meta_size;
7255 
7256 	buffer = tr->array_buffer.buffer;
7257 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7258 					    tracing_gen_ctx());
7259 	if (unlikely(!event)) {
7260 		/*
7261 		 * If the size was greater than what was allowed, then
7262 		 * make it smaller and try again.
7263 		 */
7264 		if (size > ring_buffer_max_event_size(buffer)) {
7265 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7266 			/* The above should only happen once */
7267 			if (WARN_ON_ONCE(cnt + meta_size == size))
7268 				return -EBADF;
7269 			goto again;
7270 		}
7271 
7272 		/* Ring buffer disabled, return as if not open for write */
7273 		return -EBADF;
7274 	}
7275 
7276 	entry = ring_buffer_event_data(event);
7277 	entry->ip = ip;
7278 	memcpy(&entry->buf, buf, cnt);
7279 	written = cnt;
7280 
7281 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7282 		/* do not add \n before testing triggers, but add \0 */
7283 		entry->buf[cnt] = '\0';
7284 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7285 	}
7286 
7287 	if (entry->buf[cnt - 1] != '\n') {
7288 		entry->buf[cnt] = '\n';
7289 		entry->buf[cnt + 1] = '\0';
7290 	} else
7291 		entry->buf[cnt] = '\0';
7292 
7293 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7294 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7295 	__buffer_unlock_commit(buffer, event);
7296 
7297 	if (tt)
7298 		event_triggers_post_call(tr->trace_marker_file, tt);
7299 
7300 	return written;
7301 }
7302 
7303 struct trace_user_buf {
7304 	char		*buf;
7305 };
7306 
7307 static DEFINE_MUTEX(trace_user_buffer_mutex);
7308 static struct trace_user_buf_info *trace_user_buffer;
7309 
7310 /**
7311  * trace_user_fault_destroy - free up allocated memory of a trace user buffer
7312  * @tinfo: The descriptor to free up
7313  *
7314  * Frees any data allocated in the trace info dsecriptor.
7315  */
trace_user_fault_destroy(struct trace_user_buf_info * tinfo)7316 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
7317 {
7318 	char *buf;
7319 	int cpu;
7320 
7321 	if (!tinfo || !tinfo->tbuf)
7322 		return;
7323 
7324 	for_each_possible_cpu(cpu) {
7325 		buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7326 		kfree(buf);
7327 	}
7328 	free_percpu(tinfo->tbuf);
7329 }
7330 
user_fault_buffer_enable(struct trace_user_buf_info * tinfo,size_t size)7331 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
7332 {
7333 	char *buf;
7334 	int cpu;
7335 
7336 	lockdep_assert_held(&trace_user_buffer_mutex);
7337 
7338 	tinfo->tbuf = alloc_percpu(struct trace_user_buf);
7339 	if (!tinfo->tbuf)
7340 		return -ENOMEM;
7341 
7342 	tinfo->ref = 1;
7343 	tinfo->size = size;
7344 
7345 	/* Clear each buffer in case of error */
7346 	for_each_possible_cpu(cpu) {
7347 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
7348 	}
7349 
7350 	for_each_possible_cpu(cpu) {
7351 		buf = kmalloc_node(size, GFP_KERNEL,
7352 				   cpu_to_node(cpu));
7353 		if (!buf)
7354 			return -ENOMEM;
7355 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
7356 	}
7357 
7358 	return 0;
7359 }
7360 
7361 /* For internal use. Free and reinitialize */
user_buffer_free(struct trace_user_buf_info ** tinfo)7362 static void user_buffer_free(struct trace_user_buf_info **tinfo)
7363 {
7364 	lockdep_assert_held(&trace_user_buffer_mutex);
7365 
7366 	trace_user_fault_destroy(*tinfo);
7367 	kfree(*tinfo);
7368 	*tinfo = NULL;
7369 }
7370 
7371 /* For internal use. Initialize and allocate */
user_buffer_init(struct trace_user_buf_info ** tinfo,size_t size)7372 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
7373 {
7374 	bool alloc = false;
7375 	int ret;
7376 
7377 	lockdep_assert_held(&trace_user_buffer_mutex);
7378 
7379 	if (!*tinfo) {
7380 		alloc = true;
7381 		*tinfo = kzalloc(sizeof(**tinfo), GFP_KERNEL);
7382 		if (!*tinfo)
7383 			return -ENOMEM;
7384 	}
7385 
7386 	ret = user_fault_buffer_enable(*tinfo, size);
7387 	if (ret < 0 && alloc)
7388 		user_buffer_free(tinfo);
7389 
7390 	return ret;
7391 }
7392 
7393 /* For internal use, derefrence and free if necessary */
user_buffer_put(struct trace_user_buf_info ** tinfo)7394 static void user_buffer_put(struct trace_user_buf_info **tinfo)
7395 {
7396 	guard(mutex)(&trace_user_buffer_mutex);
7397 
7398 	if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
7399 		return;
7400 
7401 	if (--(*tinfo)->ref)
7402 		return;
7403 
7404 	user_buffer_free(tinfo);
7405 }
7406 
7407 /**
7408  * trace_user_fault_init - Allocated or reference a per CPU buffer
7409  * @tinfo: A pointer to the trace buffer descriptor
7410  * @size: The size to allocate each per CPU buffer
7411  *
7412  * Create a per CPU buffer that can be used to copy from user space
7413  * in a task context. When calling trace_user_fault_read(), preemption
7414  * must be disabled, and it will enable preemption and copy user
7415  * space data to the buffer. If any schedule switches occur, it will
7416  * retry until it succeeds without a schedule switch knowing the buffer
7417  * is still valid.
7418  *
7419  * Returns 0 on success, negative on failure.
7420  */
trace_user_fault_init(struct trace_user_buf_info * tinfo,size_t size)7421 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
7422 {
7423 	int ret;
7424 
7425 	if (!tinfo)
7426 		return -EINVAL;
7427 
7428 	guard(mutex)(&trace_user_buffer_mutex);
7429 
7430 	ret = user_buffer_init(&tinfo, size);
7431 	if (ret < 0)
7432 		trace_user_fault_destroy(tinfo);
7433 
7434 	return ret;
7435 }
7436 
7437 /**
7438  * trace_user_fault_get - up the ref count for the user buffer
7439  * @tinfo: A pointer to a pointer to the trace buffer descriptor
7440  *
7441  * Ups the ref count of the trace buffer.
7442  *
7443  * Returns the new ref count.
7444  */
trace_user_fault_get(struct trace_user_buf_info * tinfo)7445 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
7446 {
7447 	if (!tinfo)
7448 		return -1;
7449 
7450 	guard(mutex)(&trace_user_buffer_mutex);
7451 
7452 	tinfo->ref++;
7453 	return tinfo->ref;
7454 }
7455 
7456 /**
7457  * trace_user_fault_put - dereference a per cpu trace buffer
7458  * @tinfo: The @tinfo that was passed to trace_user_fault_get()
7459  *
7460  * Decrement the ref count of @tinfo.
7461  *
7462  * Returns the new refcount (negative on error).
7463  */
trace_user_fault_put(struct trace_user_buf_info * tinfo)7464 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
7465 {
7466 	guard(mutex)(&trace_user_buffer_mutex);
7467 
7468 	if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
7469 		return -1;
7470 
7471 	--tinfo->ref;
7472 	return tinfo->ref;
7473 }
7474 
7475 /**
7476  * trace_user_fault_read - Read user space into a per CPU buffer
7477  * @tinfo: The @tinfo allocated by trace_user_fault_get()
7478  * @ptr: The user space pointer to read
7479  * @size: The size of user space to read.
7480  * @copy_func: Optional function to use to copy from user space
7481  * @data: Data to pass to copy_func if it was supplied
7482  *
7483  * Preemption must be disabled when this is called, and must not
7484  * be enabled while using the returned buffer.
7485  * This does the copying from user space into a per CPU buffer.
7486  *
7487  * The @size must not be greater than the size passed in to
7488  * trace_user_fault_init().
7489  *
7490  * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
7491  * otherwise it will call @copy_func. It will call @copy_func with:
7492  *
7493  *   buffer: the per CPU buffer of the @tinfo.
7494  *   ptr: The pointer @ptr to user space to read
7495  *   size: The @size of the ptr to read
7496  *   data: The @data parameter
7497  *
7498  * It is expected that @copy_func will return 0 on success and non zero
7499  * if there was a fault.
7500  *
7501  * Returns a pointer to the buffer with the content read from @ptr.
7502  *   Preemption must remain disabled while the caller accesses the
7503  *   buffer returned by this function.
7504  * Returns NULL if there was a fault, or the size passed in is
7505  *   greater than the size passed to trace_user_fault_init().
7506  */
trace_user_fault_read(struct trace_user_buf_info * tinfo,const char __user * ptr,size_t size,trace_user_buf_copy copy_func,void * data)7507 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
7508 			     const char __user *ptr, size_t size,
7509 			     trace_user_buf_copy copy_func, void *data)
7510 {
7511 	int cpu = smp_processor_id();
7512 	char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7513 	unsigned int cnt;
7514 	int trys = 0;
7515 	int ret;
7516 
7517 	lockdep_assert_preemption_disabled();
7518 
7519 	/*
7520 	 * It's up to the caller to not try to copy more than it said
7521 	 * it would.
7522 	 */
7523 	if (size > tinfo->size)
7524 		return NULL;
7525 
7526 	/*
7527 	 * This acts similar to a seqcount. The per CPU context switches are
7528 	 * recorded, migration is disabled and preemption is enabled. The
7529 	 * read of the user space memory is copied into the per CPU buffer.
7530 	 * Preemption is disabled again, and if the per CPU context switches count
7531 	 * is still the same, it means the buffer has not been corrupted.
7532 	 * If the count is different, it is assumed the buffer is corrupted
7533 	 * and reading must be tried again.
7534 	 */
7535 
7536 	do {
7537 		/*
7538 		 * If for some reason, copy_from_user() always causes a context
7539 		 * switch, this would then cause an infinite loop.
7540 		 * If this task is preempted by another user space task, it
7541 		 * will cause this task to try again. But just in case something
7542 		 * changes where the copying from user space causes another task
7543 		 * to run, prevent this from going into an infinite loop.
7544 		 * 100 tries should be plenty.
7545 		 */
7546 		if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
7547 			return NULL;
7548 
7549 		/* Read the current CPU context switch counter */
7550 		cnt = nr_context_switches_cpu(cpu);
7551 
7552 		/*
7553 		 * Preemption is going to be enabled, but this task must
7554 		 * remain on this CPU.
7555 		 */
7556 		migrate_disable();
7557 
7558 		/*
7559 		 * Now preemption is being enabled and another task can come in
7560 		 * and use the same buffer and corrupt our data.
7561 		 */
7562 		preempt_enable_notrace();
7563 
7564 		/* Make sure preemption is enabled here */
7565 		lockdep_assert_preemption_enabled();
7566 
7567 		if (copy_func) {
7568 			ret = copy_func(buffer, ptr, size, data);
7569 		} else {
7570 			ret = __copy_from_user(buffer, ptr, size);
7571 		}
7572 
7573 		preempt_disable_notrace();
7574 		migrate_enable();
7575 
7576 		/* if it faulted, no need to test if the buffer was corrupted */
7577 		if (ret)
7578 			return NULL;
7579 
7580 		/*
7581 		 * Preemption is disabled again, now check the per CPU context
7582 		 * switch counter. If it doesn't match, then another user space
7583 		 * process may have schedule in and corrupted our buffer. In that
7584 		 * case the copying must be retried.
7585 		 */
7586 	} while (nr_context_switches_cpu(cpu) != cnt);
7587 
7588 	return buffer;
7589 }
7590 
7591 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7592 tracing_mark_write(struct file *filp, const char __user *ubuf,
7593 					size_t cnt, loff_t *fpos)
7594 {
7595 	struct trace_array *tr = filp->private_data;
7596 	ssize_t written = -ENODEV;
7597 	unsigned long ip;
7598 	char *buf;
7599 
7600 	if (tracing_disabled)
7601 		return -EINVAL;
7602 
7603 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7604 		return -EINVAL;
7605 
7606 	if ((ssize_t)cnt < 0)
7607 		return -EINVAL;
7608 
7609 	if (cnt > TRACE_MARKER_MAX_SIZE)
7610 		cnt = TRACE_MARKER_MAX_SIZE;
7611 
7612 	/* Must have preemption disabled while having access to the buffer */
7613 	guard(preempt_notrace)();
7614 
7615 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7616 	if (!buf)
7617 		return -EFAULT;
7618 
7619 	/* The selftests expect this function to be the IP address */
7620 	ip = _THIS_IP_;
7621 
7622 	/* The global trace_marker can go to multiple instances */
7623 	if (tr == &global_trace) {
7624 		guard(rcu)();
7625 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7626 			written = write_marker_to_buffer(tr, buf, cnt, ip);
7627 			if (written < 0)
7628 				break;
7629 		}
7630 	} else {
7631 		written = write_marker_to_buffer(tr, buf, cnt, ip);
7632 	}
7633 
7634 	return written;
7635 }
7636 
write_raw_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt)7637 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7638 					  const char *buf, size_t cnt)
7639 {
7640 	struct ring_buffer_event *event;
7641 	struct trace_buffer *buffer;
7642 	struct raw_data_entry *entry;
7643 	ssize_t written;
7644 	size_t size;
7645 
7646 	/* cnt includes both the entry->id and the data behind it. */
7647 	size = struct_offset(entry, id) + cnt;
7648 
7649 	buffer = tr->array_buffer.buffer;
7650 
7651 	if (size > ring_buffer_max_event_size(buffer))
7652 		return -EINVAL;
7653 
7654 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7655 					    tracing_gen_ctx());
7656 	if (!event)
7657 		/* Ring buffer disabled, return as if not open for write */
7658 		return -EBADF;
7659 
7660 	entry = ring_buffer_event_data(event);
7661 	unsafe_memcpy(&entry->id, buf, cnt,
7662 		      "id and content already reserved on ring buffer"
7663 		      "'buf' includes the 'id' and the data."
7664 		      "'entry' was allocated with cnt from 'id'.");
7665 	written = cnt;
7666 
7667 	__buffer_unlock_commit(buffer, event);
7668 
7669 	return written;
7670 }
7671 
7672 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7673 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7674 					size_t cnt, loff_t *fpos)
7675 {
7676 	struct trace_array *tr = filp->private_data;
7677 	ssize_t written = -ENODEV;
7678 	char *buf;
7679 
7680 	if (tracing_disabled)
7681 		return -EINVAL;
7682 
7683 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7684 		return -EINVAL;
7685 
7686 	/* The marker must at least have a tag id */
7687 	if (cnt < sizeof(unsigned int))
7688 		return -EINVAL;
7689 
7690 	/* raw write is all or nothing */
7691 	if (cnt > TRACE_MARKER_MAX_SIZE)
7692 		return -EINVAL;
7693 
7694 	/* Must have preemption disabled while having access to the buffer */
7695 	guard(preempt_notrace)();
7696 
7697 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7698 	if (!buf)
7699 		return -EFAULT;
7700 
7701 	/* The global trace_marker_raw can go to multiple instances */
7702 	if (tr == &global_trace) {
7703 		guard(rcu)();
7704 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7705 			written = write_raw_marker_to_buffer(tr, buf, cnt);
7706 			if (written < 0)
7707 				break;
7708 		}
7709 	} else {
7710 		written = write_raw_marker_to_buffer(tr, buf, cnt);
7711 	}
7712 
7713 	return written;
7714 }
7715 
tracing_mark_open(struct inode * inode,struct file * filp)7716 static int tracing_mark_open(struct inode *inode, struct file *filp)
7717 {
7718 	int ret;
7719 
7720 	scoped_guard(mutex, &trace_user_buffer_mutex) {
7721 		if (!trace_user_buffer) {
7722 			ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
7723 			if (ret < 0)
7724 				return ret;
7725 		} else {
7726 			trace_user_buffer->ref++;
7727 		}
7728 	}
7729 
7730 	stream_open(inode, filp);
7731 	ret = tracing_open_generic_tr(inode, filp);
7732 	if (ret < 0)
7733 		user_buffer_put(&trace_user_buffer);
7734 	return ret;
7735 }
7736 
tracing_mark_release(struct inode * inode,struct file * file)7737 static int tracing_mark_release(struct inode *inode, struct file *file)
7738 {
7739 	user_buffer_put(&trace_user_buffer);
7740 	return tracing_release_generic_tr(inode, file);
7741 }
7742 
tracing_clock_show(struct seq_file * m,void * v)7743 static int tracing_clock_show(struct seq_file *m, void *v)
7744 {
7745 	struct trace_array *tr = m->private;
7746 	int i;
7747 
7748 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7749 		seq_printf(m,
7750 			"%s%s%s%s", i ? " " : "",
7751 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7752 			i == tr->clock_id ? "]" : "");
7753 	seq_putc(m, '\n');
7754 
7755 	return 0;
7756 }
7757 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7758 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7759 {
7760 	int i;
7761 
7762 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7763 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7764 			break;
7765 	}
7766 	if (i == ARRAY_SIZE(trace_clocks))
7767 		return -EINVAL;
7768 
7769 	guard(mutex)(&trace_types_lock);
7770 
7771 	tr->clock_id = i;
7772 
7773 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7774 
7775 	/*
7776 	 * New clock may not be consistent with the previous clock.
7777 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7778 	 */
7779 	tracing_reset_online_cpus(&tr->array_buffer);
7780 
7781 #ifdef CONFIG_TRACER_MAX_TRACE
7782 	if (tr->max_buffer.buffer)
7783 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7784 	tracing_reset_online_cpus(&tr->max_buffer);
7785 #endif
7786 
7787 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7788 		struct trace_scratch *tscratch = tr->scratch;
7789 
7790 		tscratch->clock_id = i;
7791 	}
7792 
7793 	return 0;
7794 }
7795 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7796 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7797 				   size_t cnt, loff_t *fpos)
7798 {
7799 	struct seq_file *m = filp->private_data;
7800 	struct trace_array *tr = m->private;
7801 	char buf[64];
7802 	const char *clockstr;
7803 	int ret;
7804 
7805 	if (cnt >= sizeof(buf))
7806 		return -EINVAL;
7807 
7808 	if (copy_from_user(buf, ubuf, cnt))
7809 		return -EFAULT;
7810 
7811 	buf[cnt] = 0;
7812 
7813 	clockstr = strstrip(buf);
7814 
7815 	ret = tracing_set_clock(tr, clockstr);
7816 	if (ret)
7817 		return ret;
7818 
7819 	*fpos += cnt;
7820 
7821 	return cnt;
7822 }
7823 
tracing_clock_open(struct inode * inode,struct file * file)7824 static int tracing_clock_open(struct inode *inode, struct file *file)
7825 {
7826 	struct trace_array *tr = inode->i_private;
7827 	int ret;
7828 
7829 	ret = tracing_check_open_get_tr(tr);
7830 	if (ret)
7831 		return ret;
7832 
7833 	ret = single_open(file, tracing_clock_show, inode->i_private);
7834 	if (ret < 0)
7835 		trace_array_put(tr);
7836 
7837 	return ret;
7838 }
7839 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7840 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7841 {
7842 	struct trace_array *tr = m->private;
7843 
7844 	guard(mutex)(&trace_types_lock);
7845 
7846 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7847 		seq_puts(m, "delta [absolute]\n");
7848 	else
7849 		seq_puts(m, "[delta] absolute\n");
7850 
7851 	return 0;
7852 }
7853 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7854 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7855 {
7856 	struct trace_array *tr = inode->i_private;
7857 	int ret;
7858 
7859 	ret = tracing_check_open_get_tr(tr);
7860 	if (ret)
7861 		return ret;
7862 
7863 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7864 	if (ret < 0)
7865 		trace_array_put(tr);
7866 
7867 	return ret;
7868 }
7869 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7870 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7871 {
7872 	if (rbe == this_cpu_read(trace_buffered_event))
7873 		return ring_buffer_time_stamp(buffer);
7874 
7875 	return ring_buffer_event_time_stamp(buffer, rbe);
7876 }
7877 
7878 /*
7879  * Set or disable using the per CPU trace_buffer_event when possible.
7880  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7881 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7882 {
7883 	guard(mutex)(&trace_types_lock);
7884 
7885 	if (set && tr->no_filter_buffering_ref++)
7886 		return 0;
7887 
7888 	if (!set) {
7889 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7890 			return -EINVAL;
7891 
7892 		--tr->no_filter_buffering_ref;
7893 	}
7894 
7895 	return 0;
7896 }
7897 
7898 struct ftrace_buffer_info {
7899 	struct trace_iterator	iter;
7900 	void			*spare;
7901 	unsigned int		spare_cpu;
7902 	unsigned int		spare_size;
7903 	unsigned int		read;
7904 };
7905 
7906 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7907 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7908 {
7909 	struct trace_array *tr = inode->i_private;
7910 	struct trace_iterator *iter;
7911 	struct seq_file *m;
7912 	int ret;
7913 
7914 	ret = tracing_check_open_get_tr(tr);
7915 	if (ret)
7916 		return ret;
7917 
7918 	if (file->f_mode & FMODE_READ) {
7919 		iter = __tracing_open(inode, file, true);
7920 		if (IS_ERR(iter))
7921 			ret = PTR_ERR(iter);
7922 	} else {
7923 		/* Writes still need the seq_file to hold the private data */
7924 		ret = -ENOMEM;
7925 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7926 		if (!m)
7927 			goto out;
7928 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7929 		if (!iter) {
7930 			kfree(m);
7931 			goto out;
7932 		}
7933 		ret = 0;
7934 
7935 		iter->tr = tr;
7936 		iter->array_buffer = &tr->max_buffer;
7937 		iter->cpu_file = tracing_get_cpu(inode);
7938 		m->private = iter;
7939 		file->private_data = m;
7940 	}
7941 out:
7942 	if (ret < 0)
7943 		trace_array_put(tr);
7944 
7945 	return ret;
7946 }
7947 
tracing_swap_cpu_buffer(void * tr)7948 static void tracing_swap_cpu_buffer(void *tr)
7949 {
7950 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7951 }
7952 
7953 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7954 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7955 		       loff_t *ppos)
7956 {
7957 	struct seq_file *m = filp->private_data;
7958 	struct trace_iterator *iter = m->private;
7959 	struct trace_array *tr = iter->tr;
7960 	unsigned long val;
7961 	int ret;
7962 
7963 	ret = tracing_update_buffers(tr);
7964 	if (ret < 0)
7965 		return ret;
7966 
7967 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7968 	if (ret)
7969 		return ret;
7970 
7971 	guard(mutex)(&trace_types_lock);
7972 
7973 	if (tr->current_trace->use_max_tr)
7974 		return -EBUSY;
7975 
7976 	local_irq_disable();
7977 	arch_spin_lock(&tr->max_lock);
7978 	if (tr->cond_snapshot)
7979 		ret = -EBUSY;
7980 	arch_spin_unlock(&tr->max_lock);
7981 	local_irq_enable();
7982 	if (ret)
7983 		return ret;
7984 
7985 	switch (val) {
7986 	case 0:
7987 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7988 			return -EINVAL;
7989 		if (tr->allocated_snapshot)
7990 			free_snapshot(tr);
7991 		break;
7992 	case 1:
7993 /* Only allow per-cpu swap if the ring buffer supports it */
7994 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7995 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7996 			return -EINVAL;
7997 #endif
7998 		if (tr->allocated_snapshot)
7999 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
8000 					&tr->array_buffer, iter->cpu_file);
8001 
8002 		ret = tracing_arm_snapshot_locked(tr);
8003 		if (ret)
8004 			return ret;
8005 
8006 		/* Now, we're going to swap */
8007 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
8008 			local_irq_disable();
8009 			update_max_tr(tr, current, smp_processor_id(), NULL);
8010 			local_irq_enable();
8011 		} else {
8012 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
8013 						 (void *)tr, 1);
8014 		}
8015 		tracing_disarm_snapshot(tr);
8016 		break;
8017 	default:
8018 		if (tr->allocated_snapshot) {
8019 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
8020 				tracing_reset_online_cpus(&tr->max_buffer);
8021 			else
8022 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
8023 		}
8024 		break;
8025 	}
8026 
8027 	if (ret >= 0) {
8028 		*ppos += cnt;
8029 		ret = cnt;
8030 	}
8031 
8032 	return ret;
8033 }
8034 
tracing_snapshot_release(struct inode * inode,struct file * file)8035 static int tracing_snapshot_release(struct inode *inode, struct file *file)
8036 {
8037 	struct seq_file *m = file->private_data;
8038 	int ret;
8039 
8040 	ret = tracing_release(inode, file);
8041 
8042 	if (file->f_mode & FMODE_READ)
8043 		return ret;
8044 
8045 	/* If write only, the seq_file is just a stub */
8046 	if (m)
8047 		kfree(m->private);
8048 	kfree(m);
8049 
8050 	return 0;
8051 }
8052 
8053 static int tracing_buffers_open(struct inode *inode, struct file *filp);
8054 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
8055 				    size_t count, loff_t *ppos);
8056 static int tracing_buffers_release(struct inode *inode, struct file *file);
8057 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8058 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
8059 
snapshot_raw_open(struct inode * inode,struct file * filp)8060 static int snapshot_raw_open(struct inode *inode, struct file *filp)
8061 {
8062 	struct ftrace_buffer_info *info;
8063 	int ret;
8064 
8065 	/* The following checks for tracefs lockdown */
8066 	ret = tracing_buffers_open(inode, filp);
8067 	if (ret < 0)
8068 		return ret;
8069 
8070 	info = filp->private_data;
8071 
8072 	if (info->iter.trace->use_max_tr) {
8073 		tracing_buffers_release(inode, filp);
8074 		return -EBUSY;
8075 	}
8076 
8077 	info->iter.snapshot = true;
8078 	info->iter.array_buffer = &info->iter.tr->max_buffer;
8079 
8080 	return ret;
8081 }
8082 
8083 #endif /* CONFIG_TRACER_SNAPSHOT */
8084 
8085 
8086 static const struct file_operations tracing_thresh_fops = {
8087 	.open		= tracing_open_generic,
8088 	.read		= tracing_thresh_read,
8089 	.write		= tracing_thresh_write,
8090 	.llseek		= generic_file_llseek,
8091 };
8092 
8093 #ifdef CONFIG_TRACER_MAX_TRACE
8094 static const struct file_operations tracing_max_lat_fops = {
8095 	.open		= tracing_open_generic_tr,
8096 	.read		= tracing_max_lat_read,
8097 	.write		= tracing_max_lat_write,
8098 	.llseek		= generic_file_llseek,
8099 	.release	= tracing_release_generic_tr,
8100 };
8101 #endif
8102 
8103 static const struct file_operations set_tracer_fops = {
8104 	.open		= tracing_open_generic_tr,
8105 	.read		= tracing_set_trace_read,
8106 	.write		= tracing_set_trace_write,
8107 	.llseek		= generic_file_llseek,
8108 	.release	= tracing_release_generic_tr,
8109 };
8110 
8111 static const struct file_operations tracing_pipe_fops = {
8112 	.open		= tracing_open_pipe,
8113 	.poll		= tracing_poll_pipe,
8114 	.read		= tracing_read_pipe,
8115 	.splice_read	= tracing_splice_read_pipe,
8116 	.release	= tracing_release_pipe,
8117 };
8118 
8119 static const struct file_operations tracing_entries_fops = {
8120 	.open		= tracing_open_generic_tr,
8121 	.read		= tracing_entries_read,
8122 	.write		= tracing_entries_write,
8123 	.llseek		= generic_file_llseek,
8124 	.release	= tracing_release_generic_tr,
8125 };
8126 
8127 static const struct file_operations tracing_syscall_buf_fops = {
8128 	.open		= tracing_open_generic_tr,
8129 	.read		= tracing_syscall_buf_read,
8130 	.write		= tracing_syscall_buf_write,
8131 	.llseek		= generic_file_llseek,
8132 	.release	= tracing_release_generic_tr,
8133 };
8134 
8135 static const struct file_operations tracing_buffer_meta_fops = {
8136 	.open		= tracing_buffer_meta_open,
8137 	.read		= seq_read,
8138 	.llseek		= seq_lseek,
8139 	.release	= tracing_seq_release,
8140 };
8141 
8142 static const struct file_operations tracing_total_entries_fops = {
8143 	.open		= tracing_open_generic_tr,
8144 	.read		= tracing_total_entries_read,
8145 	.llseek		= generic_file_llseek,
8146 	.release	= tracing_release_generic_tr,
8147 };
8148 
8149 static const struct file_operations tracing_free_buffer_fops = {
8150 	.open		= tracing_open_generic_tr,
8151 	.write		= tracing_free_buffer_write,
8152 	.release	= tracing_free_buffer_release,
8153 };
8154 
8155 static const struct file_operations tracing_mark_fops = {
8156 	.open		= tracing_mark_open,
8157 	.write		= tracing_mark_write,
8158 	.release	= tracing_mark_release,
8159 };
8160 
8161 static const struct file_operations tracing_mark_raw_fops = {
8162 	.open		= tracing_mark_open,
8163 	.write		= tracing_mark_raw_write,
8164 	.release	= tracing_mark_release,
8165 };
8166 
8167 static const struct file_operations trace_clock_fops = {
8168 	.open		= tracing_clock_open,
8169 	.read		= seq_read,
8170 	.llseek		= seq_lseek,
8171 	.release	= tracing_single_release_tr,
8172 	.write		= tracing_clock_write,
8173 };
8174 
8175 static const struct file_operations trace_time_stamp_mode_fops = {
8176 	.open		= tracing_time_stamp_mode_open,
8177 	.read		= seq_read,
8178 	.llseek		= seq_lseek,
8179 	.release	= tracing_single_release_tr,
8180 };
8181 
8182 static const struct file_operations last_boot_fops = {
8183 	.open		= tracing_last_boot_open,
8184 	.read		= seq_read,
8185 	.llseek		= seq_lseek,
8186 	.release	= tracing_seq_release,
8187 };
8188 
8189 #ifdef CONFIG_TRACER_SNAPSHOT
8190 static const struct file_operations snapshot_fops = {
8191 	.open		= tracing_snapshot_open,
8192 	.read		= seq_read,
8193 	.write		= tracing_snapshot_write,
8194 	.llseek		= tracing_lseek,
8195 	.release	= tracing_snapshot_release,
8196 };
8197 
8198 static const struct file_operations snapshot_raw_fops = {
8199 	.open		= snapshot_raw_open,
8200 	.read		= tracing_buffers_read,
8201 	.release	= tracing_buffers_release,
8202 	.splice_read	= tracing_buffers_splice_read,
8203 };
8204 
8205 #endif /* CONFIG_TRACER_SNAPSHOT */
8206 
8207 /*
8208  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
8209  * @filp: The active open file structure
8210  * @ubuf: The userspace provided buffer to read value into
8211  * @cnt: The maximum number of bytes to read
8212  * @ppos: The current "file" position
8213  *
8214  * This function implements the write interface for a struct trace_min_max_param.
8215  * The filp->private_data must point to a trace_min_max_param structure that
8216  * defines where to write the value, the min and the max acceptable values,
8217  * and a lock to protect the write.
8218  */
8219 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8220 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
8221 {
8222 	struct trace_min_max_param *param = filp->private_data;
8223 	u64 val;
8224 	int err;
8225 
8226 	if (!param)
8227 		return -EFAULT;
8228 
8229 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
8230 	if (err)
8231 		return err;
8232 
8233 	if (param->lock)
8234 		mutex_lock(param->lock);
8235 
8236 	if (param->min && val < *param->min)
8237 		err = -EINVAL;
8238 
8239 	if (param->max && val > *param->max)
8240 		err = -EINVAL;
8241 
8242 	if (!err)
8243 		*param->val = val;
8244 
8245 	if (param->lock)
8246 		mutex_unlock(param->lock);
8247 
8248 	if (err)
8249 		return err;
8250 
8251 	return cnt;
8252 }
8253 
8254 /*
8255  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
8256  * @filp: The active open file structure
8257  * @ubuf: The userspace provided buffer to read value into
8258  * @cnt: The maximum number of bytes to read
8259  * @ppos: The current "file" position
8260  *
8261  * This function implements the read interface for a struct trace_min_max_param.
8262  * The filp->private_data must point to a trace_min_max_param struct with valid
8263  * data.
8264  */
8265 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8266 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
8267 {
8268 	struct trace_min_max_param *param = filp->private_data;
8269 	char buf[U64_STR_SIZE];
8270 	int len;
8271 	u64 val;
8272 
8273 	if (!param)
8274 		return -EFAULT;
8275 
8276 	val = *param->val;
8277 
8278 	if (cnt > sizeof(buf))
8279 		cnt = sizeof(buf);
8280 
8281 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
8282 
8283 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
8284 }
8285 
8286 const struct file_operations trace_min_max_fops = {
8287 	.open		= tracing_open_generic,
8288 	.read		= trace_min_max_read,
8289 	.write		= trace_min_max_write,
8290 };
8291 
8292 #define TRACING_LOG_ERRS_MAX	8
8293 #define TRACING_LOG_LOC_MAX	128
8294 
8295 #define CMD_PREFIX "  Command: "
8296 
8297 struct err_info {
8298 	const char	**errs;	/* ptr to loc-specific array of err strings */
8299 	u8		type;	/* index into errs -> specific err string */
8300 	u16		pos;	/* caret position */
8301 	u64		ts;
8302 };
8303 
8304 struct tracing_log_err {
8305 	struct list_head	list;
8306 	struct err_info		info;
8307 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
8308 	char			*cmd;                     /* what caused err */
8309 };
8310 
8311 static DEFINE_MUTEX(tracing_err_log_lock);
8312 
alloc_tracing_log_err(int len)8313 static struct tracing_log_err *alloc_tracing_log_err(int len)
8314 {
8315 	struct tracing_log_err *err;
8316 
8317 	err = kzalloc(sizeof(*err), GFP_KERNEL);
8318 	if (!err)
8319 		return ERR_PTR(-ENOMEM);
8320 
8321 	err->cmd = kzalloc(len, GFP_KERNEL);
8322 	if (!err->cmd) {
8323 		kfree(err);
8324 		return ERR_PTR(-ENOMEM);
8325 	}
8326 
8327 	return err;
8328 }
8329 
free_tracing_log_err(struct tracing_log_err * err)8330 static void free_tracing_log_err(struct tracing_log_err *err)
8331 {
8332 	kfree(err->cmd);
8333 	kfree(err);
8334 }
8335 
get_tracing_log_err(struct trace_array * tr,int len)8336 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8337 						   int len)
8338 {
8339 	struct tracing_log_err *err;
8340 	char *cmd;
8341 
8342 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8343 		err = alloc_tracing_log_err(len);
8344 		if (PTR_ERR(err) != -ENOMEM)
8345 			tr->n_err_log_entries++;
8346 
8347 		return err;
8348 	}
8349 	cmd = kzalloc(len, GFP_KERNEL);
8350 	if (!cmd)
8351 		return ERR_PTR(-ENOMEM);
8352 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8353 	kfree(err->cmd);
8354 	err->cmd = cmd;
8355 	list_del(&err->list);
8356 
8357 	return err;
8358 }
8359 
8360 /**
8361  * err_pos - find the position of a string within a command for error careting
8362  * @cmd: The tracing command that caused the error
8363  * @str: The string to position the caret at within @cmd
8364  *
8365  * Finds the position of the first occurrence of @str within @cmd.  The
8366  * return value can be passed to tracing_log_err() for caret placement
8367  * within @cmd.
8368  *
8369  * Returns the index within @cmd of the first occurrence of @str or 0
8370  * if @str was not found.
8371  */
err_pos(char * cmd,const char * str)8372 unsigned int err_pos(char *cmd, const char *str)
8373 {
8374 	char *found;
8375 
8376 	if (WARN_ON(!strlen(cmd)))
8377 		return 0;
8378 
8379 	found = strstr(cmd, str);
8380 	if (found)
8381 		return found - cmd;
8382 
8383 	return 0;
8384 }
8385 
8386 /**
8387  * tracing_log_err - write an error to the tracing error log
8388  * @tr: The associated trace array for the error (NULL for top level array)
8389  * @loc: A string describing where the error occurred
8390  * @cmd: The tracing command that caused the error
8391  * @errs: The array of loc-specific static error strings
8392  * @type: The index into errs[], which produces the specific static err string
8393  * @pos: The position the caret should be placed in the cmd
8394  *
8395  * Writes an error into tracing/error_log of the form:
8396  *
8397  * <loc>: error: <text>
8398  *   Command: <cmd>
8399  *              ^
8400  *
8401  * tracing/error_log is a small log file containing the last
8402  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8403  * unless there has been a tracing error, and the error log can be
8404  * cleared and have its memory freed by writing the empty string in
8405  * truncation mode to it i.e. echo > tracing/error_log.
8406  *
8407  * NOTE: the @errs array along with the @type param are used to
8408  * produce a static error string - this string is not copied and saved
8409  * when the error is logged - only a pointer to it is saved.  See
8410  * existing callers for examples of how static strings are typically
8411  * defined for use with tracing_log_err().
8412  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8413 void tracing_log_err(struct trace_array *tr,
8414 		     const char *loc, const char *cmd,
8415 		     const char **errs, u8 type, u16 pos)
8416 {
8417 	struct tracing_log_err *err;
8418 	int len = 0;
8419 
8420 	if (!tr)
8421 		tr = &global_trace;
8422 
8423 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8424 
8425 	guard(mutex)(&tracing_err_log_lock);
8426 
8427 	err = get_tracing_log_err(tr, len);
8428 	if (PTR_ERR(err) == -ENOMEM)
8429 		return;
8430 
8431 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8432 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8433 
8434 	err->info.errs = errs;
8435 	err->info.type = type;
8436 	err->info.pos = pos;
8437 	err->info.ts = local_clock();
8438 
8439 	list_add_tail(&err->list, &tr->err_log);
8440 }
8441 
clear_tracing_err_log(struct trace_array * tr)8442 static void clear_tracing_err_log(struct trace_array *tr)
8443 {
8444 	struct tracing_log_err *err, *next;
8445 
8446 	guard(mutex)(&tracing_err_log_lock);
8447 
8448 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8449 		list_del(&err->list);
8450 		free_tracing_log_err(err);
8451 	}
8452 
8453 	tr->n_err_log_entries = 0;
8454 }
8455 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8456 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8457 {
8458 	struct trace_array *tr = m->private;
8459 
8460 	mutex_lock(&tracing_err_log_lock);
8461 
8462 	return seq_list_start(&tr->err_log, *pos);
8463 }
8464 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8465 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8466 {
8467 	struct trace_array *tr = m->private;
8468 
8469 	return seq_list_next(v, &tr->err_log, pos);
8470 }
8471 
tracing_err_log_seq_stop(struct seq_file * m,void * v)8472 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8473 {
8474 	mutex_unlock(&tracing_err_log_lock);
8475 }
8476 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8477 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8478 {
8479 	u16 i;
8480 
8481 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8482 		seq_putc(m, ' ');
8483 	for (i = 0; i < pos; i++)
8484 		seq_putc(m, ' ');
8485 	seq_puts(m, "^\n");
8486 }
8487 
tracing_err_log_seq_show(struct seq_file * m,void * v)8488 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8489 {
8490 	struct tracing_log_err *err = v;
8491 
8492 	if (err) {
8493 		const char *err_text = err->info.errs[err->info.type];
8494 		u64 sec = err->info.ts;
8495 		u32 nsec;
8496 
8497 		nsec = do_div(sec, NSEC_PER_SEC);
8498 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8499 			   err->loc, err_text);
8500 		seq_printf(m, "%s", err->cmd);
8501 		tracing_err_log_show_pos(m, err->info.pos);
8502 	}
8503 
8504 	return 0;
8505 }
8506 
8507 static const struct seq_operations tracing_err_log_seq_ops = {
8508 	.start  = tracing_err_log_seq_start,
8509 	.next   = tracing_err_log_seq_next,
8510 	.stop   = tracing_err_log_seq_stop,
8511 	.show   = tracing_err_log_seq_show
8512 };
8513 
tracing_err_log_open(struct inode * inode,struct file * file)8514 static int tracing_err_log_open(struct inode *inode, struct file *file)
8515 {
8516 	struct trace_array *tr = inode->i_private;
8517 	int ret = 0;
8518 
8519 	ret = tracing_check_open_get_tr(tr);
8520 	if (ret)
8521 		return ret;
8522 
8523 	/* If this file was opened for write, then erase contents */
8524 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8525 		clear_tracing_err_log(tr);
8526 
8527 	if (file->f_mode & FMODE_READ) {
8528 		ret = seq_open(file, &tracing_err_log_seq_ops);
8529 		if (!ret) {
8530 			struct seq_file *m = file->private_data;
8531 			m->private = tr;
8532 		} else {
8533 			trace_array_put(tr);
8534 		}
8535 	}
8536 	return ret;
8537 }
8538 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8539 static ssize_t tracing_err_log_write(struct file *file,
8540 				     const char __user *buffer,
8541 				     size_t count, loff_t *ppos)
8542 {
8543 	return count;
8544 }
8545 
tracing_err_log_release(struct inode * inode,struct file * file)8546 static int tracing_err_log_release(struct inode *inode, struct file *file)
8547 {
8548 	struct trace_array *tr = inode->i_private;
8549 
8550 	trace_array_put(tr);
8551 
8552 	if (file->f_mode & FMODE_READ)
8553 		seq_release(inode, file);
8554 
8555 	return 0;
8556 }
8557 
8558 static const struct file_operations tracing_err_log_fops = {
8559 	.open           = tracing_err_log_open,
8560 	.write		= tracing_err_log_write,
8561 	.read           = seq_read,
8562 	.llseek         = tracing_lseek,
8563 	.release        = tracing_err_log_release,
8564 };
8565 
tracing_buffers_open(struct inode * inode,struct file * filp)8566 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8567 {
8568 	struct trace_array *tr = inode->i_private;
8569 	struct ftrace_buffer_info *info;
8570 	int ret;
8571 
8572 	ret = tracing_check_open_get_tr(tr);
8573 	if (ret)
8574 		return ret;
8575 
8576 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8577 	if (!info) {
8578 		trace_array_put(tr);
8579 		return -ENOMEM;
8580 	}
8581 
8582 	mutex_lock(&trace_types_lock);
8583 
8584 	info->iter.tr		= tr;
8585 	info->iter.cpu_file	= tracing_get_cpu(inode);
8586 	info->iter.trace	= tr->current_trace;
8587 	info->iter.array_buffer = &tr->array_buffer;
8588 	info->spare		= NULL;
8589 	/* Force reading ring buffer for first read */
8590 	info->read		= (unsigned int)-1;
8591 
8592 	filp->private_data = info;
8593 
8594 	tr->trace_ref++;
8595 
8596 	mutex_unlock(&trace_types_lock);
8597 
8598 	ret = nonseekable_open(inode, filp);
8599 	if (ret < 0)
8600 		trace_array_put(tr);
8601 
8602 	return ret;
8603 }
8604 
8605 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8606 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8607 {
8608 	struct ftrace_buffer_info *info = filp->private_data;
8609 	struct trace_iterator *iter = &info->iter;
8610 
8611 	return trace_poll(iter, filp, poll_table);
8612 }
8613 
8614 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8615 tracing_buffers_read(struct file *filp, char __user *ubuf,
8616 		     size_t count, loff_t *ppos)
8617 {
8618 	struct ftrace_buffer_info *info = filp->private_data;
8619 	struct trace_iterator *iter = &info->iter;
8620 	void *trace_data;
8621 	int page_size;
8622 	ssize_t ret = 0;
8623 	ssize_t size;
8624 
8625 	if (!count)
8626 		return 0;
8627 
8628 #ifdef CONFIG_TRACER_MAX_TRACE
8629 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8630 		return -EBUSY;
8631 #endif
8632 
8633 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8634 
8635 	/* Make sure the spare matches the current sub buffer size */
8636 	if (info->spare) {
8637 		if (page_size != info->spare_size) {
8638 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8639 						   info->spare_cpu, info->spare);
8640 			info->spare = NULL;
8641 		}
8642 	}
8643 
8644 	if (!info->spare) {
8645 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8646 							  iter->cpu_file);
8647 		if (IS_ERR(info->spare)) {
8648 			ret = PTR_ERR(info->spare);
8649 			info->spare = NULL;
8650 		} else {
8651 			info->spare_cpu = iter->cpu_file;
8652 			info->spare_size = page_size;
8653 		}
8654 	}
8655 	if (!info->spare)
8656 		return ret;
8657 
8658 	/* Do we have previous read data to read? */
8659 	if (info->read < page_size)
8660 		goto read;
8661 
8662  again:
8663 	trace_access_lock(iter->cpu_file);
8664 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8665 				    info->spare,
8666 				    count,
8667 				    iter->cpu_file, 0);
8668 	trace_access_unlock(iter->cpu_file);
8669 
8670 	if (ret < 0) {
8671 		if (trace_empty(iter) && !iter->closed) {
8672 			if (update_last_data_if_empty(iter->tr))
8673 				return 0;
8674 
8675 			if ((filp->f_flags & O_NONBLOCK))
8676 				return -EAGAIN;
8677 
8678 			ret = wait_on_pipe(iter, 0);
8679 			if (ret)
8680 				return ret;
8681 
8682 			goto again;
8683 		}
8684 		return 0;
8685 	}
8686 
8687 	info->read = 0;
8688  read:
8689 	size = page_size - info->read;
8690 	if (size > count)
8691 		size = count;
8692 	trace_data = ring_buffer_read_page_data(info->spare);
8693 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8694 	if (ret == size)
8695 		return -EFAULT;
8696 
8697 	size -= ret;
8698 
8699 	*ppos += size;
8700 	info->read += size;
8701 
8702 	return size;
8703 }
8704 
tracing_buffers_flush(struct file * file,fl_owner_t id)8705 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8706 {
8707 	struct ftrace_buffer_info *info = file->private_data;
8708 	struct trace_iterator *iter = &info->iter;
8709 
8710 	iter->closed = true;
8711 	/* Make sure the waiters see the new wait_index */
8712 	(void)atomic_fetch_inc_release(&iter->wait_index);
8713 
8714 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8715 
8716 	return 0;
8717 }
8718 
tracing_buffers_release(struct inode * inode,struct file * file)8719 static int tracing_buffers_release(struct inode *inode, struct file *file)
8720 {
8721 	struct ftrace_buffer_info *info = file->private_data;
8722 	struct trace_iterator *iter = &info->iter;
8723 
8724 	guard(mutex)(&trace_types_lock);
8725 
8726 	iter->tr->trace_ref--;
8727 
8728 	__trace_array_put(iter->tr);
8729 
8730 	if (info->spare)
8731 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8732 					   info->spare_cpu, info->spare);
8733 	kvfree(info);
8734 
8735 	return 0;
8736 }
8737 
8738 struct buffer_ref {
8739 	struct trace_buffer	*buffer;
8740 	void			*page;
8741 	int			cpu;
8742 	refcount_t		refcount;
8743 };
8744 
buffer_ref_release(struct buffer_ref * ref)8745 static void buffer_ref_release(struct buffer_ref *ref)
8746 {
8747 	if (!refcount_dec_and_test(&ref->refcount))
8748 		return;
8749 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8750 	kfree(ref);
8751 }
8752 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8753 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8754 				    struct pipe_buffer *buf)
8755 {
8756 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8757 
8758 	buffer_ref_release(ref);
8759 	buf->private = 0;
8760 }
8761 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8762 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8763 				struct pipe_buffer *buf)
8764 {
8765 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8766 
8767 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8768 		return false;
8769 
8770 	refcount_inc(&ref->refcount);
8771 	return true;
8772 }
8773 
8774 /* Pipe buffer operations for a buffer. */
8775 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8776 	.release		= buffer_pipe_buf_release,
8777 	.get			= buffer_pipe_buf_get,
8778 };
8779 
8780 /*
8781  * Callback from splice_to_pipe(), if we need to release some pages
8782  * at the end of the spd in case we error'ed out in filling the pipe.
8783  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8784 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8785 {
8786 	struct buffer_ref *ref =
8787 		(struct buffer_ref *)spd->partial[i].private;
8788 
8789 	buffer_ref_release(ref);
8790 	spd->partial[i].private = 0;
8791 }
8792 
8793 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8794 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8795 			    struct pipe_inode_info *pipe, size_t len,
8796 			    unsigned int flags)
8797 {
8798 	struct ftrace_buffer_info *info = file->private_data;
8799 	struct trace_iterator *iter = &info->iter;
8800 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8801 	struct page *pages_def[PIPE_DEF_BUFFERS];
8802 	struct splice_pipe_desc spd = {
8803 		.pages		= pages_def,
8804 		.partial	= partial_def,
8805 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8806 		.ops		= &buffer_pipe_buf_ops,
8807 		.spd_release	= buffer_spd_release,
8808 	};
8809 	struct buffer_ref *ref;
8810 	bool woken = false;
8811 	int page_size;
8812 	int entries, i;
8813 	ssize_t ret = 0;
8814 
8815 #ifdef CONFIG_TRACER_MAX_TRACE
8816 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8817 		return -EBUSY;
8818 #endif
8819 
8820 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8821 	if (*ppos & (page_size - 1))
8822 		return -EINVAL;
8823 
8824 	if (len & (page_size - 1)) {
8825 		if (len < page_size)
8826 			return -EINVAL;
8827 		len &= (~(page_size - 1));
8828 	}
8829 
8830 	if (splice_grow_spd(pipe, &spd))
8831 		return -ENOMEM;
8832 
8833  again:
8834 	trace_access_lock(iter->cpu_file);
8835 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8836 
8837 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8838 		struct page *page;
8839 		int r;
8840 
8841 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8842 		if (!ref) {
8843 			ret = -ENOMEM;
8844 			break;
8845 		}
8846 
8847 		refcount_set(&ref->refcount, 1);
8848 		ref->buffer = iter->array_buffer->buffer;
8849 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8850 		if (IS_ERR(ref->page)) {
8851 			ret = PTR_ERR(ref->page);
8852 			ref->page = NULL;
8853 			kfree(ref);
8854 			break;
8855 		}
8856 		ref->cpu = iter->cpu_file;
8857 
8858 		r = ring_buffer_read_page(ref->buffer, ref->page,
8859 					  len, iter->cpu_file, 1);
8860 		if (r < 0) {
8861 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8862 						   ref->page);
8863 			kfree(ref);
8864 			break;
8865 		}
8866 
8867 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8868 
8869 		spd.pages[i] = page;
8870 		spd.partial[i].len = page_size;
8871 		spd.partial[i].offset = 0;
8872 		spd.partial[i].private = (unsigned long)ref;
8873 		spd.nr_pages++;
8874 		*ppos += page_size;
8875 
8876 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8877 	}
8878 
8879 	trace_access_unlock(iter->cpu_file);
8880 	spd.nr_pages = i;
8881 
8882 	/* did we read anything? */
8883 	if (!spd.nr_pages) {
8884 
8885 		if (ret)
8886 			goto out;
8887 
8888 		if (woken)
8889 			goto out;
8890 
8891 		ret = -EAGAIN;
8892 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8893 			goto out;
8894 
8895 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8896 		if (ret)
8897 			goto out;
8898 
8899 		/* No need to wait after waking up when tracing is off */
8900 		if (!tracer_tracing_is_on(iter->tr))
8901 			goto out;
8902 
8903 		/* Iterate one more time to collect any new data then exit */
8904 		woken = true;
8905 
8906 		goto again;
8907 	}
8908 
8909 	ret = splice_to_pipe(pipe, &spd);
8910 out:
8911 	splice_shrink_spd(&spd);
8912 
8913 	return ret;
8914 }
8915 
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8916 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8917 {
8918 	struct ftrace_buffer_info *info = file->private_data;
8919 	struct trace_iterator *iter = &info->iter;
8920 	int err;
8921 
8922 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8923 		if (!(file->f_flags & O_NONBLOCK)) {
8924 			err = ring_buffer_wait(iter->array_buffer->buffer,
8925 					       iter->cpu_file,
8926 					       iter->tr->buffer_percent,
8927 					       NULL, NULL);
8928 			if (err)
8929 				return err;
8930 		}
8931 
8932 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8933 						  iter->cpu_file);
8934 	} else if (cmd) {
8935 		return -ENOTTY;
8936 	}
8937 
8938 	/*
8939 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8940 	 * waiters
8941 	 */
8942 	guard(mutex)(&trace_types_lock);
8943 
8944 	/* Make sure the waiters see the new wait_index */
8945 	(void)atomic_fetch_inc_release(&iter->wait_index);
8946 
8947 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8948 
8949 	return 0;
8950 }
8951 
8952 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8953 static int get_snapshot_map(struct trace_array *tr)
8954 {
8955 	int err = 0;
8956 
8957 	/*
8958 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8959 	 * take trace_types_lock. Instead use the specific
8960 	 * snapshot_trigger_lock.
8961 	 */
8962 	spin_lock(&tr->snapshot_trigger_lock);
8963 
8964 	if (tr->snapshot || tr->mapped == UINT_MAX)
8965 		err = -EBUSY;
8966 	else
8967 		tr->mapped++;
8968 
8969 	spin_unlock(&tr->snapshot_trigger_lock);
8970 
8971 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8972 	if (tr->mapped == 1)
8973 		synchronize_rcu();
8974 
8975 	return err;
8976 
8977 }
put_snapshot_map(struct trace_array * tr)8978 static void put_snapshot_map(struct trace_array *tr)
8979 {
8980 	spin_lock(&tr->snapshot_trigger_lock);
8981 	if (!WARN_ON(!tr->mapped))
8982 		tr->mapped--;
8983 	spin_unlock(&tr->snapshot_trigger_lock);
8984 }
8985 #else
get_snapshot_map(struct trace_array * tr)8986 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8987 static inline void put_snapshot_map(struct trace_array *tr) { }
8988 #endif
8989 
tracing_buffers_mmap_close(struct vm_area_struct * vma)8990 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8991 {
8992 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8993 	struct trace_iterator *iter = &info->iter;
8994 
8995 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8996 	put_snapshot_map(iter->tr);
8997 }
8998 
tracing_buffers_may_split(struct vm_area_struct * vma,unsigned long addr)8999 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
9000 {
9001 	/*
9002 	 * Trace buffer mappings require the complete buffer including
9003 	 * the meta page. Partial mappings are not supported.
9004 	 */
9005 	return -EINVAL;
9006 }
9007 
9008 static const struct vm_operations_struct tracing_buffers_vmops = {
9009 	.close		= tracing_buffers_mmap_close,
9010 	.may_split      = tracing_buffers_may_split,
9011 };
9012 
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)9013 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
9014 {
9015 	struct ftrace_buffer_info *info = filp->private_data;
9016 	struct trace_iterator *iter = &info->iter;
9017 	int ret = 0;
9018 
9019 	/* A memmap'ed and backup buffers are not supported for user space mmap */
9020 	if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
9021 		return -ENODEV;
9022 
9023 	ret = get_snapshot_map(iter->tr);
9024 	if (ret)
9025 		return ret;
9026 
9027 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
9028 	if (ret)
9029 		put_snapshot_map(iter->tr);
9030 
9031 	vma->vm_ops = &tracing_buffers_vmops;
9032 
9033 	return ret;
9034 }
9035 
9036 static const struct file_operations tracing_buffers_fops = {
9037 	.open		= tracing_buffers_open,
9038 	.read		= tracing_buffers_read,
9039 	.poll		= tracing_buffers_poll,
9040 	.release	= tracing_buffers_release,
9041 	.flush		= tracing_buffers_flush,
9042 	.splice_read	= tracing_buffers_splice_read,
9043 	.unlocked_ioctl = tracing_buffers_ioctl,
9044 	.mmap		= tracing_buffers_mmap,
9045 };
9046 
9047 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)9048 tracing_stats_read(struct file *filp, char __user *ubuf,
9049 		   size_t count, loff_t *ppos)
9050 {
9051 	struct inode *inode = file_inode(filp);
9052 	struct trace_array *tr = inode->i_private;
9053 	struct array_buffer *trace_buf = &tr->array_buffer;
9054 	int cpu = tracing_get_cpu(inode);
9055 	struct trace_seq *s;
9056 	unsigned long cnt;
9057 	unsigned long long t;
9058 	unsigned long usec_rem;
9059 
9060 	s = kmalloc(sizeof(*s), GFP_KERNEL);
9061 	if (!s)
9062 		return -ENOMEM;
9063 
9064 	trace_seq_init(s);
9065 
9066 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
9067 	trace_seq_printf(s, "entries: %ld\n", cnt);
9068 
9069 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
9070 	trace_seq_printf(s, "overrun: %ld\n", cnt);
9071 
9072 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
9073 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
9074 
9075 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
9076 	trace_seq_printf(s, "bytes: %ld\n", cnt);
9077 
9078 	if (trace_clocks[tr->clock_id].in_ns) {
9079 		/* local or global for trace_clock */
9080 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
9081 		usec_rem = do_div(t, USEC_PER_SEC);
9082 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
9083 								t, usec_rem);
9084 
9085 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
9086 		usec_rem = do_div(t, USEC_PER_SEC);
9087 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
9088 	} else {
9089 		/* counter or tsc mode for trace_clock */
9090 		trace_seq_printf(s, "oldest event ts: %llu\n",
9091 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
9092 
9093 		trace_seq_printf(s, "now ts: %llu\n",
9094 				ring_buffer_time_stamp(trace_buf->buffer));
9095 	}
9096 
9097 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
9098 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
9099 
9100 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
9101 	trace_seq_printf(s, "read events: %ld\n", cnt);
9102 
9103 	count = simple_read_from_buffer(ubuf, count, ppos,
9104 					s->buffer, trace_seq_used(s));
9105 
9106 	kfree(s);
9107 
9108 	return count;
9109 }
9110 
9111 static const struct file_operations tracing_stats_fops = {
9112 	.open		= tracing_open_generic_tr,
9113 	.read		= tracing_stats_read,
9114 	.llseek		= generic_file_llseek,
9115 	.release	= tracing_release_generic_tr,
9116 };
9117 
9118 #ifdef CONFIG_DYNAMIC_FTRACE
9119 
9120 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9121 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
9122 		  size_t cnt, loff_t *ppos)
9123 {
9124 	ssize_t ret;
9125 	char *buf;
9126 	int r;
9127 
9128 	/* 512 should be plenty to hold the amount needed */
9129 #define DYN_INFO_BUF_SIZE	512
9130 
9131 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
9132 	if (!buf)
9133 		return -ENOMEM;
9134 
9135 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
9136 		      "%ld pages:%ld groups: %ld\n"
9137 		      "ftrace boot update time = %llu (ns)\n"
9138 		      "ftrace module total update time = %llu (ns)\n",
9139 		      ftrace_update_tot_cnt,
9140 		      ftrace_number_of_pages,
9141 		      ftrace_number_of_groups,
9142 		      ftrace_update_time,
9143 		      ftrace_total_mod_time);
9144 
9145 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9146 	kfree(buf);
9147 	return ret;
9148 }
9149 
9150 static const struct file_operations tracing_dyn_info_fops = {
9151 	.open		= tracing_open_generic,
9152 	.read		= tracing_read_dyn_info,
9153 	.llseek		= generic_file_llseek,
9154 };
9155 #endif /* CONFIG_DYNAMIC_FTRACE */
9156 
9157 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
9158 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)9159 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
9160 		struct trace_array *tr, struct ftrace_probe_ops *ops,
9161 		void *data)
9162 {
9163 	tracing_snapshot_instance(tr);
9164 }
9165 
9166 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)9167 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
9168 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
9169 		      void *data)
9170 {
9171 	struct ftrace_func_mapper *mapper = data;
9172 	long *count = NULL;
9173 
9174 	if (mapper)
9175 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
9176 
9177 	if (count) {
9178 
9179 		if (*count <= 0)
9180 			return;
9181 
9182 		(*count)--;
9183 	}
9184 
9185 	tracing_snapshot_instance(tr);
9186 }
9187 
9188 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)9189 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
9190 		      struct ftrace_probe_ops *ops, void *data)
9191 {
9192 	struct ftrace_func_mapper *mapper = data;
9193 	long *count = NULL;
9194 
9195 	seq_printf(m, "%ps:", (void *)ip);
9196 
9197 	seq_puts(m, "snapshot");
9198 
9199 	if (mapper)
9200 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
9201 
9202 	if (count)
9203 		seq_printf(m, ":count=%ld\n", *count);
9204 	else
9205 		seq_puts(m, ":unlimited\n");
9206 
9207 	return 0;
9208 }
9209 
9210 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)9211 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
9212 		     unsigned long ip, void *init_data, void **data)
9213 {
9214 	struct ftrace_func_mapper *mapper = *data;
9215 
9216 	if (!mapper) {
9217 		mapper = allocate_ftrace_func_mapper();
9218 		if (!mapper)
9219 			return -ENOMEM;
9220 		*data = mapper;
9221 	}
9222 
9223 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
9224 }
9225 
9226 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)9227 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
9228 		     unsigned long ip, void *data)
9229 {
9230 	struct ftrace_func_mapper *mapper = data;
9231 
9232 	if (!ip) {
9233 		if (!mapper)
9234 			return;
9235 		free_ftrace_func_mapper(mapper, NULL);
9236 		return;
9237 	}
9238 
9239 	ftrace_func_mapper_remove_ip(mapper, ip);
9240 }
9241 
9242 static struct ftrace_probe_ops snapshot_probe_ops = {
9243 	.func			= ftrace_snapshot,
9244 	.print			= ftrace_snapshot_print,
9245 };
9246 
9247 static struct ftrace_probe_ops snapshot_count_probe_ops = {
9248 	.func			= ftrace_count_snapshot,
9249 	.print			= ftrace_snapshot_print,
9250 	.init			= ftrace_snapshot_init,
9251 	.free			= ftrace_snapshot_free,
9252 };
9253 
9254 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)9255 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
9256 			       char *glob, char *cmd, char *param, int enable)
9257 {
9258 	struct ftrace_probe_ops *ops;
9259 	void *count = (void *)-1;
9260 	char *number;
9261 	int ret;
9262 
9263 	if (!tr)
9264 		return -ENODEV;
9265 
9266 	/* hash funcs only work with set_ftrace_filter */
9267 	if (!enable)
9268 		return -EINVAL;
9269 
9270 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
9271 
9272 	if (glob[0] == '!') {
9273 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
9274 		if (!ret)
9275 			tracing_disarm_snapshot(tr);
9276 
9277 		return ret;
9278 	}
9279 
9280 	if (!param)
9281 		goto out_reg;
9282 
9283 	number = strsep(&param, ":");
9284 
9285 	if (!strlen(number))
9286 		goto out_reg;
9287 
9288 	/*
9289 	 * We use the callback data field (which is a pointer)
9290 	 * as our counter.
9291 	 */
9292 	ret = kstrtoul(number, 0, (unsigned long *)&count);
9293 	if (ret)
9294 		return ret;
9295 
9296  out_reg:
9297 	ret = tracing_arm_snapshot(tr);
9298 	if (ret < 0)
9299 		return ret;
9300 
9301 	ret = register_ftrace_function_probe(glob, tr, ops, count);
9302 	if (ret < 0)
9303 		tracing_disarm_snapshot(tr);
9304 
9305 	return ret < 0 ? ret : 0;
9306 }
9307 
9308 static struct ftrace_func_command ftrace_snapshot_cmd = {
9309 	.name			= "snapshot",
9310 	.func			= ftrace_trace_snapshot_callback,
9311 };
9312 
register_snapshot_cmd(void)9313 static __init int register_snapshot_cmd(void)
9314 {
9315 	return register_ftrace_command(&ftrace_snapshot_cmd);
9316 }
9317 #else
register_snapshot_cmd(void)9318 static inline __init int register_snapshot_cmd(void) { return 0; }
9319 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
9320 
tracing_get_dentry(struct trace_array * tr)9321 static struct dentry *tracing_get_dentry(struct trace_array *tr)
9322 {
9323 	/* Top directory uses NULL as the parent */
9324 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
9325 		return NULL;
9326 
9327 	if (WARN_ON(!tr->dir))
9328 		return ERR_PTR(-ENODEV);
9329 
9330 	/* All sub buffers have a descriptor */
9331 	return tr->dir;
9332 }
9333 
tracing_dentry_percpu(struct trace_array * tr,int cpu)9334 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
9335 {
9336 	struct dentry *d_tracer;
9337 
9338 	if (tr->percpu_dir)
9339 		return tr->percpu_dir;
9340 
9341 	d_tracer = tracing_get_dentry(tr);
9342 	if (IS_ERR(d_tracer))
9343 		return NULL;
9344 
9345 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
9346 
9347 	MEM_FAIL(!tr->percpu_dir,
9348 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
9349 
9350 	return tr->percpu_dir;
9351 }
9352 
9353 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)9354 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
9355 		      void *data, long cpu, const struct file_operations *fops)
9356 {
9357 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
9358 
9359 	if (ret) /* See tracing_get_cpu() */
9360 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
9361 	return ret;
9362 }
9363 
9364 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)9365 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
9366 {
9367 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
9368 	struct dentry *d_cpu;
9369 	char cpu_dir[30]; /* 30 characters should be more than enough */
9370 
9371 	if (!d_percpu)
9372 		return;
9373 
9374 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
9375 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
9376 	if (!d_cpu) {
9377 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
9378 		return;
9379 	}
9380 
9381 	/* per cpu trace_pipe */
9382 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
9383 				tr, cpu, &tracing_pipe_fops);
9384 
9385 	/* per cpu trace */
9386 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
9387 				tr, cpu, &tracing_fops);
9388 
9389 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
9390 				tr, cpu, &tracing_buffers_fops);
9391 
9392 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
9393 				tr, cpu, &tracing_stats_fops);
9394 
9395 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
9396 				tr, cpu, &tracing_entries_fops);
9397 
9398 	if (tr->range_addr_start)
9399 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
9400 				      tr, cpu, &tracing_buffer_meta_fops);
9401 #ifdef CONFIG_TRACER_SNAPSHOT
9402 	if (!tr->range_addr_start) {
9403 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9404 				      tr, cpu, &snapshot_fops);
9405 
9406 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9407 				      tr, cpu, &snapshot_raw_fops);
9408 	}
9409 #endif
9410 }
9411 
9412 #ifdef CONFIG_FTRACE_SELFTEST
9413 /* Let selftest have access to static functions in this file */
9414 #include "trace_selftest.c"
9415 #endif
9416 
9417 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9418 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9419 			loff_t *ppos)
9420 {
9421 	struct trace_option_dentry *topt = filp->private_data;
9422 	char *buf;
9423 
9424 	if (topt->flags->val & topt->opt->bit)
9425 		buf = "1\n";
9426 	else
9427 		buf = "0\n";
9428 
9429 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9430 }
9431 
9432 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9433 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9434 			 loff_t *ppos)
9435 {
9436 	struct trace_option_dentry *topt = filp->private_data;
9437 	unsigned long val;
9438 	int ret;
9439 
9440 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9441 	if (ret)
9442 		return ret;
9443 
9444 	if (val != 0 && val != 1)
9445 		return -EINVAL;
9446 
9447 	if (!!(topt->flags->val & topt->opt->bit) != val) {
9448 		guard(mutex)(&trace_types_lock);
9449 		ret = __set_tracer_option(topt->tr, topt->flags,
9450 					  topt->opt, !val);
9451 		if (ret)
9452 			return ret;
9453 	}
9454 
9455 	*ppos += cnt;
9456 
9457 	return cnt;
9458 }
9459 
tracing_open_options(struct inode * inode,struct file * filp)9460 static int tracing_open_options(struct inode *inode, struct file *filp)
9461 {
9462 	struct trace_option_dentry *topt = inode->i_private;
9463 	int ret;
9464 
9465 	ret = tracing_check_open_get_tr(topt->tr);
9466 	if (ret)
9467 		return ret;
9468 
9469 	filp->private_data = inode->i_private;
9470 	return 0;
9471 }
9472 
tracing_release_options(struct inode * inode,struct file * file)9473 static int tracing_release_options(struct inode *inode, struct file *file)
9474 {
9475 	struct trace_option_dentry *topt = file->private_data;
9476 
9477 	trace_array_put(topt->tr);
9478 	return 0;
9479 }
9480 
9481 static const struct file_operations trace_options_fops = {
9482 	.open = tracing_open_options,
9483 	.read = trace_options_read,
9484 	.write = trace_options_write,
9485 	.llseek	= generic_file_llseek,
9486 	.release = tracing_release_options,
9487 };
9488 
9489 /*
9490  * In order to pass in both the trace_array descriptor as well as the index
9491  * to the flag that the trace option file represents, the trace_array
9492  * has a character array of trace_flags_index[], which holds the index
9493  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9494  * The address of this character array is passed to the flag option file
9495  * read/write callbacks.
9496  *
9497  * In order to extract both the index and the trace_array descriptor,
9498  * get_tr_index() uses the following algorithm.
9499  *
9500  *   idx = *ptr;
9501  *
9502  * As the pointer itself contains the address of the index (remember
9503  * index[1] == 1).
9504  *
9505  * Then to get the trace_array descriptor, by subtracting that index
9506  * from the ptr, we get to the start of the index itself.
9507  *
9508  *   ptr - idx == &index[0]
9509  *
9510  * Then a simple container_of() from that pointer gets us to the
9511  * trace_array descriptor.
9512  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9513 static void get_tr_index(void *data, struct trace_array **ptr,
9514 			 unsigned int *pindex)
9515 {
9516 	*pindex = *(unsigned char *)data;
9517 
9518 	*ptr = container_of(data - *pindex, struct trace_array,
9519 			    trace_flags_index);
9520 }
9521 
9522 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9523 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9524 			loff_t *ppos)
9525 {
9526 	void *tr_index = filp->private_data;
9527 	struct trace_array *tr;
9528 	unsigned int index;
9529 	char *buf;
9530 
9531 	get_tr_index(tr_index, &tr, &index);
9532 
9533 	if (tr->trace_flags & (1ULL << index))
9534 		buf = "1\n";
9535 	else
9536 		buf = "0\n";
9537 
9538 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9539 }
9540 
9541 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9542 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9543 			 loff_t *ppos)
9544 {
9545 	void *tr_index = filp->private_data;
9546 	struct trace_array *tr;
9547 	unsigned int index;
9548 	unsigned long val;
9549 	int ret;
9550 
9551 	get_tr_index(tr_index, &tr, &index);
9552 
9553 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9554 	if (ret)
9555 		return ret;
9556 
9557 	if (val != 0 && val != 1)
9558 		return -EINVAL;
9559 
9560 	mutex_lock(&event_mutex);
9561 	mutex_lock(&trace_types_lock);
9562 	ret = set_tracer_flag(tr, 1ULL << index, val);
9563 	mutex_unlock(&trace_types_lock);
9564 	mutex_unlock(&event_mutex);
9565 
9566 	if (ret < 0)
9567 		return ret;
9568 
9569 	*ppos += cnt;
9570 
9571 	return cnt;
9572 }
9573 
9574 static const struct file_operations trace_options_core_fops = {
9575 	.open = tracing_open_generic,
9576 	.read = trace_options_core_read,
9577 	.write = trace_options_core_write,
9578 	.llseek = generic_file_llseek,
9579 };
9580 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9581 struct dentry *trace_create_file(const char *name,
9582 				 umode_t mode,
9583 				 struct dentry *parent,
9584 				 void *data,
9585 				 const struct file_operations *fops)
9586 {
9587 	struct dentry *ret;
9588 
9589 	ret = tracefs_create_file(name, mode, parent, data, fops);
9590 	if (!ret)
9591 		pr_warn("Could not create tracefs '%s' entry\n", name);
9592 
9593 	return ret;
9594 }
9595 
9596 
trace_options_init_dentry(struct trace_array * tr)9597 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9598 {
9599 	struct dentry *d_tracer;
9600 
9601 	if (tr->options)
9602 		return tr->options;
9603 
9604 	d_tracer = tracing_get_dentry(tr);
9605 	if (IS_ERR(d_tracer))
9606 		return NULL;
9607 
9608 	tr->options = tracefs_create_dir("options", d_tracer);
9609 	if (!tr->options) {
9610 		pr_warn("Could not create tracefs directory 'options'\n");
9611 		return NULL;
9612 	}
9613 
9614 	return tr->options;
9615 }
9616 
9617 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9618 create_trace_option_file(struct trace_array *tr,
9619 			 struct trace_option_dentry *topt,
9620 			 struct tracer_flags *flags,
9621 			 struct tracer_opt *opt)
9622 {
9623 	struct dentry *t_options;
9624 
9625 	t_options = trace_options_init_dentry(tr);
9626 	if (!t_options)
9627 		return;
9628 
9629 	topt->flags = flags;
9630 	topt->opt = opt;
9631 	topt->tr = tr;
9632 
9633 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9634 					t_options, topt, &trace_options_fops);
9635 }
9636 
9637 static int
create_trace_option_files(struct trace_array * tr,struct tracer * tracer,struct tracer_flags * flags)9638 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
9639 			  struct tracer_flags *flags)
9640 {
9641 	struct trace_option_dentry *topts;
9642 	struct trace_options *tr_topts;
9643 	struct tracer_opt *opts;
9644 	int cnt;
9645 
9646 	if (!flags || !flags->opts)
9647 		return 0;
9648 
9649 	opts = flags->opts;
9650 
9651 	for (cnt = 0; opts[cnt].name; cnt++)
9652 		;
9653 
9654 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9655 	if (!topts)
9656 		return 0;
9657 
9658 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9659 			    GFP_KERNEL);
9660 	if (!tr_topts) {
9661 		kfree(topts);
9662 		return -ENOMEM;
9663 	}
9664 
9665 	tr->topts = tr_topts;
9666 	tr->topts[tr->nr_topts].tracer = tracer;
9667 	tr->topts[tr->nr_topts].topts = topts;
9668 	tr->nr_topts++;
9669 
9670 	for (cnt = 0; opts[cnt].name; cnt++) {
9671 		create_trace_option_file(tr, &topts[cnt], flags,
9672 					 &opts[cnt]);
9673 		MEM_FAIL(topts[cnt].entry == NULL,
9674 			  "Failed to create trace option: %s",
9675 			  opts[cnt].name);
9676 	}
9677 	return 0;
9678 }
9679 
get_global_flags_val(struct tracer * tracer)9680 static int get_global_flags_val(struct tracer *tracer)
9681 {
9682 	struct tracers *t;
9683 
9684 	list_for_each_entry(t, &global_trace.tracers, list) {
9685 		if (t->tracer != tracer)
9686 			continue;
9687 		if (!t->flags)
9688 			return -1;
9689 		return t->flags->val;
9690 	}
9691 	return -1;
9692 }
9693 
add_tracer_options(struct trace_array * tr,struct tracers * t)9694 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
9695 {
9696 	struct tracer *tracer = t->tracer;
9697 	struct tracer_flags *flags = t->flags ?: tracer->flags;
9698 
9699 	if (!flags)
9700 		return 0;
9701 
9702 	/* Only add tracer options after update_tracer_options finish */
9703 	if (!tracer_options_updated)
9704 		return 0;
9705 
9706 	return create_trace_option_files(tr, tracer, flags);
9707 }
9708 
add_tracer(struct trace_array * tr,struct tracer * tracer)9709 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
9710 {
9711 	struct tracer_flags *flags;
9712 	struct tracers *t;
9713 	int ret;
9714 
9715 	/* Only enable if the directory has been created already. */
9716 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
9717 		return 0;
9718 
9719 	/*
9720 	 * If this is an instance, only create flags for tracers
9721 	 * the instance may have.
9722 	 */
9723 	if (!trace_ok_for_array(tracer, tr))
9724 		return 0;
9725 
9726 	t = kmalloc(sizeof(*t), GFP_KERNEL);
9727 	if (!t)
9728 		return -ENOMEM;
9729 
9730 	t->tracer = tracer;
9731 	t->flags = NULL;
9732 	list_add(&t->list, &tr->tracers);
9733 
9734 	flags = tracer->flags;
9735 	if (!flags) {
9736 		if (!tracer->default_flags)
9737 			return 0;
9738 
9739 		/*
9740 		 * If the tracer defines default flags, it means the flags are
9741 		 * per trace instance.
9742 		 */
9743 		flags = kmalloc(sizeof(*flags), GFP_KERNEL);
9744 		if (!flags)
9745 			return -ENOMEM;
9746 
9747 		*flags = *tracer->default_flags;
9748 		flags->trace = tracer;
9749 
9750 		t->flags = flags;
9751 
9752 		/* If this is an instance, inherit the global_trace flags */
9753 		if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
9754 			int val = get_global_flags_val(tracer);
9755 			if (!WARN_ON_ONCE(val < 0))
9756 				flags->val = val;
9757 		}
9758 	}
9759 
9760 	ret = add_tracer_options(tr, t);
9761 	if (ret < 0) {
9762 		list_del(&t->list);
9763 		kfree(t->flags);
9764 		kfree(t);
9765 	}
9766 
9767 	return ret;
9768 }
9769 
9770 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9771 create_trace_option_core_file(struct trace_array *tr,
9772 			      const char *option, long index)
9773 {
9774 	struct dentry *t_options;
9775 
9776 	t_options = trace_options_init_dentry(tr);
9777 	if (!t_options)
9778 		return NULL;
9779 
9780 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9781 				 (void *)&tr->trace_flags_index[index],
9782 				 &trace_options_core_fops);
9783 }
9784 
create_trace_options_dir(struct trace_array * tr)9785 static void create_trace_options_dir(struct trace_array *tr)
9786 {
9787 	struct dentry *t_options;
9788 	bool top_level = tr == &global_trace;
9789 	int i;
9790 
9791 	t_options = trace_options_init_dentry(tr);
9792 	if (!t_options)
9793 		return;
9794 
9795 	for (i = 0; trace_options[i]; i++) {
9796 		if (top_level ||
9797 		    !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
9798 			create_trace_option_core_file(tr, trace_options[i], i);
9799 		}
9800 	}
9801 }
9802 
9803 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9804 rb_simple_read(struct file *filp, char __user *ubuf,
9805 	       size_t cnt, loff_t *ppos)
9806 {
9807 	struct trace_array *tr = filp->private_data;
9808 	char buf[64];
9809 	int r;
9810 
9811 	r = tracer_tracing_is_on(tr);
9812 	r = sprintf(buf, "%d\n", r);
9813 
9814 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9815 }
9816 
9817 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9818 rb_simple_write(struct file *filp, const char __user *ubuf,
9819 		size_t cnt, loff_t *ppos)
9820 {
9821 	struct trace_array *tr = filp->private_data;
9822 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9823 	unsigned long val;
9824 	int ret;
9825 
9826 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9827 	if (ret)
9828 		return ret;
9829 
9830 	if (buffer) {
9831 		guard(mutex)(&trace_types_lock);
9832 		if (!!val == tracer_tracing_is_on(tr)) {
9833 			val = 0; /* do nothing */
9834 		} else if (val) {
9835 			tracer_tracing_on(tr);
9836 			if (tr->current_trace->start)
9837 				tr->current_trace->start(tr);
9838 		} else {
9839 			tracer_tracing_off(tr);
9840 			if (tr->current_trace->stop)
9841 				tr->current_trace->stop(tr);
9842 			/* Wake up any waiters */
9843 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9844 		}
9845 	}
9846 
9847 	(*ppos)++;
9848 
9849 	return cnt;
9850 }
9851 
9852 static const struct file_operations rb_simple_fops = {
9853 	.open		= tracing_open_generic_tr,
9854 	.read		= rb_simple_read,
9855 	.write		= rb_simple_write,
9856 	.release	= tracing_release_generic_tr,
9857 	.llseek		= default_llseek,
9858 };
9859 
9860 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9861 buffer_percent_read(struct file *filp, char __user *ubuf,
9862 		    size_t cnt, loff_t *ppos)
9863 {
9864 	struct trace_array *tr = filp->private_data;
9865 	char buf[64];
9866 	int r;
9867 
9868 	r = tr->buffer_percent;
9869 	r = sprintf(buf, "%d\n", r);
9870 
9871 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9872 }
9873 
9874 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9875 buffer_percent_write(struct file *filp, const char __user *ubuf,
9876 		     size_t cnt, loff_t *ppos)
9877 {
9878 	struct trace_array *tr = filp->private_data;
9879 	unsigned long val;
9880 	int ret;
9881 
9882 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9883 	if (ret)
9884 		return ret;
9885 
9886 	if (val > 100)
9887 		return -EINVAL;
9888 
9889 	tr->buffer_percent = val;
9890 
9891 	(*ppos)++;
9892 
9893 	return cnt;
9894 }
9895 
9896 static const struct file_operations buffer_percent_fops = {
9897 	.open		= tracing_open_generic_tr,
9898 	.read		= buffer_percent_read,
9899 	.write		= buffer_percent_write,
9900 	.release	= tracing_release_generic_tr,
9901 	.llseek		= default_llseek,
9902 };
9903 
9904 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9905 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9906 {
9907 	struct trace_array *tr = filp->private_data;
9908 	size_t size;
9909 	char buf[64];
9910 	int order;
9911 	int r;
9912 
9913 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9914 	size = (PAGE_SIZE << order) / 1024;
9915 
9916 	r = sprintf(buf, "%zd\n", size);
9917 
9918 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9919 }
9920 
9921 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9922 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9923 			 size_t cnt, loff_t *ppos)
9924 {
9925 	struct trace_array *tr = filp->private_data;
9926 	unsigned long val;
9927 	int old_order;
9928 	int order;
9929 	int pages;
9930 	int ret;
9931 
9932 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9933 	if (ret)
9934 		return ret;
9935 
9936 	val *= 1024; /* value passed in is in KB */
9937 
9938 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9939 	order = fls(pages - 1);
9940 
9941 	/* limit between 1 and 128 system pages */
9942 	if (order < 0 || order > 7)
9943 		return -EINVAL;
9944 
9945 	/* Do not allow tracing while changing the order of the ring buffer */
9946 	tracing_stop_tr(tr);
9947 
9948 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9949 	if (old_order == order)
9950 		goto out;
9951 
9952 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9953 	if (ret)
9954 		goto out;
9955 
9956 #ifdef CONFIG_TRACER_MAX_TRACE
9957 
9958 	if (!tr->allocated_snapshot)
9959 		goto out_max;
9960 
9961 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9962 	if (ret) {
9963 		/* Put back the old order */
9964 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9965 		if (WARN_ON_ONCE(cnt)) {
9966 			/*
9967 			 * AARGH! We are left with different orders!
9968 			 * The max buffer is our "snapshot" buffer.
9969 			 * When a tracer needs a snapshot (one of the
9970 			 * latency tracers), it swaps the max buffer
9971 			 * with the saved snap shot. We succeeded to
9972 			 * update the order of the main buffer, but failed to
9973 			 * update the order of the max buffer. But when we tried
9974 			 * to reset the main buffer to the original size, we
9975 			 * failed there too. This is very unlikely to
9976 			 * happen, but if it does, warn and kill all
9977 			 * tracing.
9978 			 */
9979 			tracing_disabled = 1;
9980 		}
9981 		goto out;
9982 	}
9983  out_max:
9984 #endif
9985 	(*ppos)++;
9986  out:
9987 	if (ret)
9988 		cnt = ret;
9989 	tracing_start_tr(tr);
9990 	return cnt;
9991 }
9992 
9993 static const struct file_operations buffer_subbuf_size_fops = {
9994 	.open		= tracing_open_generic_tr,
9995 	.read		= buffer_subbuf_size_read,
9996 	.write		= buffer_subbuf_size_write,
9997 	.release	= tracing_release_generic_tr,
9998 	.llseek		= default_llseek,
9999 };
10000 
10001 static struct dentry *trace_instance_dir;
10002 
10003 static void
10004 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
10005 
10006 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)10007 static int make_mod_delta(struct module *mod, void *data)
10008 {
10009 	struct trace_module_delta *module_delta;
10010 	struct trace_scratch *tscratch;
10011 	struct trace_mod_entry *entry;
10012 	struct trace_array *tr = data;
10013 	int i;
10014 
10015 	tscratch = tr->scratch;
10016 	module_delta = READ_ONCE(tr->module_delta);
10017 	for (i = 0; i < tscratch->nr_entries; i++) {
10018 		entry = &tscratch->entries[i];
10019 		if (strcmp(mod->name, entry->mod_name))
10020 			continue;
10021 		if (mod->state == MODULE_STATE_GOING)
10022 			module_delta->delta[i] = 0;
10023 		else
10024 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
10025 						 - entry->mod_addr;
10026 		break;
10027 	}
10028 	return 0;
10029 }
10030 #else
make_mod_delta(struct module * mod,void * data)10031 static int make_mod_delta(struct module *mod, void *data)
10032 {
10033 	return 0;
10034 }
10035 #endif
10036 
mod_addr_comp(const void * a,const void * b,const void * data)10037 static int mod_addr_comp(const void *a, const void *b, const void *data)
10038 {
10039 	const struct trace_mod_entry *e1 = a;
10040 	const struct trace_mod_entry *e2 = b;
10041 
10042 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
10043 }
10044 
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)10045 static void setup_trace_scratch(struct trace_array *tr,
10046 				struct trace_scratch *tscratch, unsigned int size)
10047 {
10048 	struct trace_module_delta *module_delta;
10049 	struct trace_mod_entry *entry;
10050 	int i, nr_entries;
10051 
10052 	if (!tscratch)
10053 		return;
10054 
10055 	tr->scratch = tscratch;
10056 	tr->scratch_size = size;
10057 
10058 	if (tscratch->text_addr)
10059 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
10060 
10061 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
10062 		goto reset;
10063 
10064 	/* Check if each module name is a valid string */
10065 	for (i = 0; i < tscratch->nr_entries; i++) {
10066 		int n;
10067 
10068 		entry = &tscratch->entries[i];
10069 
10070 		for (n = 0; n < MODULE_NAME_LEN; n++) {
10071 			if (entry->mod_name[n] == '\0')
10072 				break;
10073 			if (!isprint(entry->mod_name[n]))
10074 				goto reset;
10075 		}
10076 		if (n == MODULE_NAME_LEN)
10077 			goto reset;
10078 	}
10079 
10080 	/* Sort the entries so that we can find appropriate module from address. */
10081 	nr_entries = tscratch->nr_entries;
10082 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
10083 	       mod_addr_comp, NULL, NULL);
10084 
10085 	if (IS_ENABLED(CONFIG_MODULES)) {
10086 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
10087 		if (!module_delta) {
10088 			pr_info("module_delta allocation failed. Not able to decode module address.");
10089 			goto reset;
10090 		}
10091 		init_rcu_head(&module_delta->rcu);
10092 	} else
10093 		module_delta = NULL;
10094 	WRITE_ONCE(tr->module_delta, module_delta);
10095 
10096 	/* Scan modules to make text delta for modules. */
10097 	module_for_each_mod(make_mod_delta, tr);
10098 
10099 	/* Set trace_clock as the same of the previous boot. */
10100 	if (tscratch->clock_id != tr->clock_id) {
10101 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
10102 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
10103 			pr_info("the previous trace_clock info is not valid.");
10104 			goto reset;
10105 		}
10106 	}
10107 	return;
10108  reset:
10109 	/* Invalid trace modules */
10110 	memset(tscratch, 0, size);
10111 }
10112 
10113 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)10114 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
10115 {
10116 	enum ring_buffer_flags rb_flags;
10117 	struct trace_scratch *tscratch;
10118 	unsigned int scratch_size = 0;
10119 
10120 	rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
10121 
10122 	buf->tr = tr;
10123 
10124 	if (tr->range_addr_start && tr->range_addr_size) {
10125 		/* Add scratch buffer to handle 128 modules */
10126 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
10127 						      tr->range_addr_start,
10128 						      tr->range_addr_size,
10129 						      struct_size(tscratch, entries, 128));
10130 
10131 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
10132 		setup_trace_scratch(tr, tscratch, scratch_size);
10133 
10134 		/*
10135 		 * This is basically the same as a mapped buffer,
10136 		 * with the same restrictions.
10137 		 */
10138 		tr->mapped++;
10139 	} else {
10140 		buf->buffer = ring_buffer_alloc(size, rb_flags);
10141 	}
10142 	if (!buf->buffer)
10143 		return -ENOMEM;
10144 
10145 	buf->data = alloc_percpu(struct trace_array_cpu);
10146 	if (!buf->data) {
10147 		ring_buffer_free(buf->buffer);
10148 		buf->buffer = NULL;
10149 		return -ENOMEM;
10150 	}
10151 
10152 	/* Allocate the first page for all buffers */
10153 	set_buffer_entries(&tr->array_buffer,
10154 			   ring_buffer_size(tr->array_buffer.buffer, 0));
10155 
10156 	return 0;
10157 }
10158 
free_trace_buffer(struct array_buffer * buf)10159 static void free_trace_buffer(struct array_buffer *buf)
10160 {
10161 	if (buf->buffer) {
10162 		ring_buffer_free(buf->buffer);
10163 		buf->buffer = NULL;
10164 		free_percpu(buf->data);
10165 		buf->data = NULL;
10166 	}
10167 }
10168 
allocate_trace_buffers(struct trace_array * tr,int size)10169 static int allocate_trace_buffers(struct trace_array *tr, int size)
10170 {
10171 	int ret;
10172 
10173 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
10174 	if (ret)
10175 		return ret;
10176 
10177 #ifdef CONFIG_TRACER_MAX_TRACE
10178 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
10179 	if (tr->range_addr_start)
10180 		return 0;
10181 
10182 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
10183 				    allocate_snapshot ? size : 1);
10184 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
10185 		free_trace_buffer(&tr->array_buffer);
10186 		return -ENOMEM;
10187 	}
10188 	tr->allocated_snapshot = allocate_snapshot;
10189 
10190 	allocate_snapshot = false;
10191 #endif
10192 
10193 	return 0;
10194 }
10195 
free_trace_buffers(struct trace_array * tr)10196 static void free_trace_buffers(struct trace_array *tr)
10197 {
10198 	if (!tr)
10199 		return;
10200 
10201 	free_trace_buffer(&tr->array_buffer);
10202 	kfree(tr->module_delta);
10203 
10204 #ifdef CONFIG_TRACER_MAX_TRACE
10205 	free_trace_buffer(&tr->max_buffer);
10206 #endif
10207 }
10208 
init_trace_flags_index(struct trace_array * tr)10209 static void init_trace_flags_index(struct trace_array *tr)
10210 {
10211 	int i;
10212 
10213 	/* Used by the trace options files */
10214 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
10215 		tr->trace_flags_index[i] = i;
10216 }
10217 
__update_tracer(struct trace_array * tr)10218 static int __update_tracer(struct trace_array *tr)
10219 {
10220 	struct tracer *t;
10221 	int ret = 0;
10222 
10223 	for (t = trace_types; t && !ret; t = t->next)
10224 		ret = add_tracer(tr, t);
10225 
10226 	return ret;
10227 }
10228 
__update_tracer_options(struct trace_array * tr)10229 static __init int __update_tracer_options(struct trace_array *tr)
10230 {
10231 	struct tracers *t;
10232 	int ret = 0;
10233 
10234 	list_for_each_entry(t, &tr->tracers, list) {
10235 		ret = add_tracer_options(tr, t);
10236 		if (ret < 0)
10237 			break;
10238 	}
10239 
10240 	return ret;
10241 }
10242 
update_tracer_options(void)10243 static __init void update_tracer_options(void)
10244 {
10245 	struct trace_array *tr;
10246 
10247 	guard(mutex)(&trace_types_lock);
10248 	tracer_options_updated = true;
10249 	list_for_each_entry(tr, &ftrace_trace_arrays, list)
10250 		__update_tracer_options(tr);
10251 }
10252 
10253 /* Must have trace_types_lock held */
trace_array_find(const char * instance)10254 struct trace_array *trace_array_find(const char *instance)
10255 {
10256 	struct trace_array *tr, *found = NULL;
10257 
10258 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10259 		if (tr->name && strcmp(tr->name, instance) == 0) {
10260 			found = tr;
10261 			break;
10262 		}
10263 	}
10264 
10265 	return found;
10266 }
10267 
trace_array_find_get(const char * instance)10268 struct trace_array *trace_array_find_get(const char *instance)
10269 {
10270 	struct trace_array *tr;
10271 
10272 	guard(mutex)(&trace_types_lock);
10273 	tr = trace_array_find(instance);
10274 	if (tr)
10275 		tr->ref++;
10276 
10277 	return tr;
10278 }
10279 
trace_array_create_dir(struct trace_array * tr)10280 static int trace_array_create_dir(struct trace_array *tr)
10281 {
10282 	int ret;
10283 
10284 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
10285 	if (!tr->dir)
10286 		return -EINVAL;
10287 
10288 	ret = event_trace_add_tracer(tr->dir, tr);
10289 	if (ret) {
10290 		tracefs_remove(tr->dir);
10291 		return ret;
10292 	}
10293 
10294 	init_tracer_tracefs(tr, tr->dir);
10295 	ret = __update_tracer(tr);
10296 	if (ret) {
10297 		event_trace_del_tracer(tr);
10298 		tracefs_remove(tr->dir);
10299 		return ret;
10300 	}
10301 	return 0;
10302 }
10303 
10304 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)10305 trace_array_create_systems(const char *name, const char *systems,
10306 			   unsigned long range_addr_start,
10307 			   unsigned long range_addr_size)
10308 {
10309 	struct trace_array *tr;
10310 	int ret;
10311 
10312 	ret = -ENOMEM;
10313 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
10314 	if (!tr)
10315 		return ERR_PTR(ret);
10316 
10317 	tr->name = kstrdup(name, GFP_KERNEL);
10318 	if (!tr->name)
10319 		goto out_free_tr;
10320 
10321 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
10322 		goto out_free_tr;
10323 
10324 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
10325 		goto out_free_tr;
10326 
10327 	if (systems) {
10328 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
10329 		if (!tr->system_names)
10330 			goto out_free_tr;
10331 	}
10332 
10333 	/* Only for boot up memory mapped ring buffers */
10334 	tr->range_addr_start = range_addr_start;
10335 	tr->range_addr_size = range_addr_size;
10336 
10337 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
10338 
10339 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
10340 
10341 	raw_spin_lock_init(&tr->start_lock);
10342 
10343 	tr->syscall_buf_sz = global_trace.syscall_buf_sz;
10344 
10345 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10346 #ifdef CONFIG_TRACER_MAX_TRACE
10347 	spin_lock_init(&tr->snapshot_trigger_lock);
10348 #endif
10349 	tr->current_trace = &nop_trace;
10350 	tr->current_trace_flags = nop_trace.flags;
10351 
10352 	INIT_LIST_HEAD(&tr->systems);
10353 	INIT_LIST_HEAD(&tr->events);
10354 	INIT_LIST_HEAD(&tr->hist_vars);
10355 	INIT_LIST_HEAD(&tr->err_log);
10356 	INIT_LIST_HEAD(&tr->tracers);
10357 	INIT_LIST_HEAD(&tr->marker_list);
10358 
10359 #ifdef CONFIG_MODULES
10360 	INIT_LIST_HEAD(&tr->mod_events);
10361 #endif
10362 
10363 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
10364 		goto out_free_tr;
10365 
10366 	/* The ring buffer is defaultly expanded */
10367 	trace_set_ring_buffer_expanded(tr);
10368 
10369 	if (ftrace_allocate_ftrace_ops(tr) < 0)
10370 		goto out_free_tr;
10371 
10372 	ftrace_init_trace_array(tr);
10373 
10374 	init_trace_flags_index(tr);
10375 
10376 	if (trace_instance_dir) {
10377 		ret = trace_array_create_dir(tr);
10378 		if (ret)
10379 			goto out_free_tr;
10380 	} else
10381 		__trace_early_add_events(tr);
10382 
10383 	list_add(&tr->list, &ftrace_trace_arrays);
10384 
10385 	tr->ref++;
10386 
10387 	return tr;
10388 
10389  out_free_tr:
10390 	ftrace_free_ftrace_ops(tr);
10391 	free_trace_buffers(tr);
10392 	free_cpumask_var(tr->pipe_cpumask);
10393 	free_cpumask_var(tr->tracing_cpumask);
10394 	kfree_const(tr->system_names);
10395 	kfree(tr->range_name);
10396 	kfree(tr->name);
10397 	kfree(tr);
10398 
10399 	return ERR_PTR(ret);
10400 }
10401 
trace_array_create(const char * name)10402 static struct trace_array *trace_array_create(const char *name)
10403 {
10404 	return trace_array_create_systems(name, NULL, 0, 0);
10405 }
10406 
instance_mkdir(const char * name)10407 static int instance_mkdir(const char *name)
10408 {
10409 	struct trace_array *tr;
10410 	int ret;
10411 
10412 	guard(mutex)(&event_mutex);
10413 	guard(mutex)(&trace_types_lock);
10414 
10415 	ret = -EEXIST;
10416 	if (trace_array_find(name))
10417 		return -EEXIST;
10418 
10419 	tr = trace_array_create(name);
10420 
10421 	ret = PTR_ERR_OR_ZERO(tr);
10422 
10423 	return ret;
10424 }
10425 
10426 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)10427 static u64 map_pages(unsigned long start, unsigned long size)
10428 {
10429 	unsigned long vmap_start, vmap_end;
10430 	struct vm_struct *area;
10431 	int ret;
10432 
10433 	area = get_vm_area(size, VM_IOREMAP);
10434 	if (!area)
10435 		return 0;
10436 
10437 	vmap_start = (unsigned long) area->addr;
10438 	vmap_end = vmap_start + size;
10439 
10440 	ret = vmap_page_range(vmap_start, vmap_end,
10441 			      start, pgprot_nx(PAGE_KERNEL));
10442 	if (ret < 0) {
10443 		free_vm_area(area);
10444 		return 0;
10445 	}
10446 
10447 	return (u64)vmap_start;
10448 }
10449 #else
map_pages(unsigned long start,unsigned long size)10450 static inline u64 map_pages(unsigned long start, unsigned long size)
10451 {
10452 	return 0;
10453 }
10454 #endif
10455 
10456 /**
10457  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
10458  * @name: The name of the trace array to be looked up/created.
10459  * @systems: A list of systems to create event directories for (NULL for all)
10460  *
10461  * Returns pointer to trace array with given name.
10462  * NULL, if it cannot be created.
10463  *
10464  * NOTE: This function increments the reference counter associated with the
10465  * trace array returned. This makes sure it cannot be freed while in use.
10466  * Use trace_array_put() once the trace array is no longer needed.
10467  * If the trace_array is to be freed, trace_array_destroy() needs to
10468  * be called after the trace_array_put(), or simply let user space delete
10469  * it from the tracefs instances directory. But until the
10470  * trace_array_put() is called, user space can not delete it.
10471  *
10472  */
trace_array_get_by_name(const char * name,const char * systems)10473 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
10474 {
10475 	struct trace_array *tr;
10476 
10477 	guard(mutex)(&event_mutex);
10478 	guard(mutex)(&trace_types_lock);
10479 
10480 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10481 		if (tr->name && strcmp(tr->name, name) == 0) {
10482 			tr->ref++;
10483 			return tr;
10484 		}
10485 	}
10486 
10487 	tr = trace_array_create_systems(name, systems, 0, 0);
10488 
10489 	if (IS_ERR(tr))
10490 		tr = NULL;
10491 	else
10492 		tr->ref++;
10493 
10494 	return tr;
10495 }
10496 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
10497 
__remove_instance(struct trace_array * tr)10498 static int __remove_instance(struct trace_array *tr)
10499 {
10500 	int i;
10501 
10502 	/* Reference counter for a newly created trace array = 1. */
10503 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10504 		return -EBUSY;
10505 
10506 	list_del(&tr->list);
10507 
10508 	/* Disable all the flags that were enabled coming in */
10509 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10510 		if ((1 << i) & ZEROED_TRACE_FLAGS)
10511 			set_tracer_flag(tr, 1ULL << i, 0);
10512 	}
10513 
10514 	if (printk_trace == tr)
10515 		update_printk_trace(&global_trace);
10516 
10517 	if (update_marker_trace(tr, 0))
10518 		synchronize_rcu();
10519 
10520 	tracing_set_nop(tr);
10521 	clear_ftrace_function_probes(tr);
10522 	event_trace_del_tracer(tr);
10523 	ftrace_clear_pids(tr);
10524 	ftrace_destroy_function_files(tr);
10525 	tracefs_remove(tr->dir);
10526 	free_percpu(tr->last_func_repeats);
10527 	free_trace_buffers(tr);
10528 	clear_tracing_err_log(tr);
10529 	free_tracers(tr);
10530 
10531 	if (tr->range_name) {
10532 		reserve_mem_release_by_name(tr->range_name);
10533 		kfree(tr->range_name);
10534 	}
10535 	if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
10536 		vfree((void *)tr->range_addr_start);
10537 
10538 	for (i = 0; i < tr->nr_topts; i++) {
10539 		kfree(tr->topts[i].topts);
10540 	}
10541 	kfree(tr->topts);
10542 
10543 	free_cpumask_var(tr->pipe_cpumask);
10544 	free_cpumask_var(tr->tracing_cpumask);
10545 	kfree_const(tr->system_names);
10546 	kfree(tr->name);
10547 	kfree(tr);
10548 
10549 	return 0;
10550 }
10551 
trace_array_destroy(struct trace_array * this_tr)10552 int trace_array_destroy(struct trace_array *this_tr)
10553 {
10554 	struct trace_array *tr;
10555 
10556 	if (!this_tr)
10557 		return -EINVAL;
10558 
10559 	guard(mutex)(&event_mutex);
10560 	guard(mutex)(&trace_types_lock);
10561 
10562 
10563 	/* Making sure trace array exists before destroying it. */
10564 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10565 		if (tr == this_tr)
10566 			return __remove_instance(tr);
10567 	}
10568 
10569 	return -ENODEV;
10570 }
10571 EXPORT_SYMBOL_GPL(trace_array_destroy);
10572 
instance_rmdir(const char * name)10573 static int instance_rmdir(const char *name)
10574 {
10575 	struct trace_array *tr;
10576 
10577 	guard(mutex)(&event_mutex);
10578 	guard(mutex)(&trace_types_lock);
10579 
10580 	tr = trace_array_find(name);
10581 	if (!tr)
10582 		return -ENODEV;
10583 
10584 	return __remove_instance(tr);
10585 }
10586 
create_trace_instances(struct dentry * d_tracer)10587 static __init void create_trace_instances(struct dentry *d_tracer)
10588 {
10589 	struct trace_array *tr;
10590 
10591 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10592 							 instance_mkdir,
10593 							 instance_rmdir);
10594 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10595 		return;
10596 
10597 	guard(mutex)(&event_mutex);
10598 	guard(mutex)(&trace_types_lock);
10599 
10600 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10601 		if (!tr->name)
10602 			continue;
10603 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10604 			     "Failed to create instance directory\n"))
10605 			return;
10606 	}
10607 }
10608 
10609 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)10610 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10611 {
10612 	int cpu;
10613 
10614 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10615 			tr, &show_traces_fops);
10616 
10617 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10618 			tr, &set_tracer_fops);
10619 
10620 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10621 			  tr, &tracing_cpumask_fops);
10622 
10623 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10624 			  tr, &tracing_iter_fops);
10625 
10626 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10627 			  tr, &tracing_fops);
10628 
10629 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10630 			  tr, &tracing_pipe_fops);
10631 
10632 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10633 			  tr, &tracing_entries_fops);
10634 
10635 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10636 			  tr, &tracing_total_entries_fops);
10637 
10638 	trace_create_file("free_buffer", 0200, d_tracer,
10639 			  tr, &tracing_free_buffer_fops);
10640 
10641 	trace_create_file("trace_marker", 0220, d_tracer,
10642 			  tr, &tracing_mark_fops);
10643 
10644 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10645 
10646 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10647 			  tr, &tracing_mark_raw_fops);
10648 
10649 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10650 			  &trace_clock_fops);
10651 
10652 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10653 			  tr, &rb_simple_fops);
10654 
10655 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10656 			  &trace_time_stamp_mode_fops);
10657 
10658 	tr->buffer_percent = 50;
10659 
10660 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10661 			tr, &buffer_percent_fops);
10662 
10663 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10664 			  tr, &buffer_subbuf_size_fops);
10665 
10666 	trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
10667 			 tr, &tracing_syscall_buf_fops);
10668 
10669 	create_trace_options_dir(tr);
10670 
10671 #ifdef CONFIG_TRACER_MAX_TRACE
10672 	trace_create_maxlat_file(tr, d_tracer);
10673 #endif
10674 
10675 	if (ftrace_create_function_files(tr, d_tracer))
10676 		MEM_FAIL(1, "Could not allocate function filter files");
10677 
10678 	if (tr->range_addr_start) {
10679 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10680 				  tr, &last_boot_fops);
10681 #ifdef CONFIG_TRACER_SNAPSHOT
10682 	} else {
10683 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10684 				  tr, &snapshot_fops);
10685 #endif
10686 	}
10687 
10688 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10689 			  tr, &tracing_err_log_fops);
10690 
10691 	for_each_tracing_cpu(cpu)
10692 		tracing_init_tracefs_percpu(tr, cpu);
10693 
10694 	ftrace_init_tracefs(tr, d_tracer);
10695 }
10696 
10697 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10698 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10699 {
10700 	struct vfsmount *mnt;
10701 	struct file_system_type *type;
10702 	struct fs_context *fc;
10703 	int ret;
10704 
10705 	/*
10706 	 * To maintain backward compatibility for tools that mount
10707 	 * debugfs to get to the tracing facility, tracefs is automatically
10708 	 * mounted to the debugfs/tracing directory.
10709 	 */
10710 	type = get_fs_type("tracefs");
10711 	if (!type)
10712 		return NULL;
10713 
10714 	fc = fs_context_for_submount(type, mntpt);
10715 	put_filesystem(type);
10716 	if (IS_ERR(fc))
10717 		return ERR_CAST(fc);
10718 
10719 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10720 
10721 	ret = vfs_parse_fs_string(fc, "source", "tracefs");
10722 	if (!ret)
10723 		mnt = fc_mount(fc);
10724 	else
10725 		mnt = ERR_PTR(ret);
10726 
10727 	put_fs_context(fc);
10728 	return mnt;
10729 }
10730 #endif
10731 
10732 /**
10733  * tracing_init_dentry - initialize top level trace array
10734  *
10735  * This is called when creating files or directories in the tracing
10736  * directory. It is called via fs_initcall() by any of the boot up code
10737  * and expects to return the dentry of the top level tracing directory.
10738  */
tracing_init_dentry(void)10739 int tracing_init_dentry(void)
10740 {
10741 	struct trace_array *tr = &global_trace;
10742 
10743 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10744 		pr_warn("Tracing disabled due to lockdown\n");
10745 		return -EPERM;
10746 	}
10747 
10748 	/* The top level trace array uses  NULL as parent */
10749 	if (tr->dir)
10750 		return 0;
10751 
10752 	if (WARN_ON(!tracefs_initialized()))
10753 		return -ENODEV;
10754 
10755 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10756 	/*
10757 	 * As there may still be users that expect the tracing
10758 	 * files to exist in debugfs/tracing, we must automount
10759 	 * the tracefs file system there, so older tools still
10760 	 * work with the newer kernel.
10761 	 */
10762 	tr->dir = debugfs_create_automount("tracing", NULL,
10763 					   trace_automount, NULL);
10764 #endif
10765 
10766 	return 0;
10767 }
10768 
10769 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10770 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10771 
10772 static struct workqueue_struct *eval_map_wq __initdata;
10773 static struct work_struct eval_map_work __initdata;
10774 static struct work_struct tracerfs_init_work __initdata;
10775 
eval_map_work_func(struct work_struct * work)10776 static void __init eval_map_work_func(struct work_struct *work)
10777 {
10778 	int len;
10779 
10780 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10781 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10782 }
10783 
trace_eval_init(void)10784 static int __init trace_eval_init(void)
10785 {
10786 	INIT_WORK(&eval_map_work, eval_map_work_func);
10787 
10788 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10789 	if (!eval_map_wq) {
10790 		pr_err("Unable to allocate eval_map_wq\n");
10791 		/* Do work here */
10792 		eval_map_work_func(&eval_map_work);
10793 		return -ENOMEM;
10794 	}
10795 
10796 	queue_work(eval_map_wq, &eval_map_work);
10797 	return 0;
10798 }
10799 
10800 subsys_initcall(trace_eval_init);
10801 
trace_eval_sync(void)10802 static int __init trace_eval_sync(void)
10803 {
10804 	/* Make sure the eval map updates are finished */
10805 	if (eval_map_wq)
10806 		destroy_workqueue(eval_map_wq);
10807 	return 0;
10808 }
10809 
10810 late_initcall_sync(trace_eval_sync);
10811 
10812 
10813 #ifdef CONFIG_MODULES
10814 
module_exists(const char * module)10815 bool module_exists(const char *module)
10816 {
10817 	/* All modules have the symbol __this_module */
10818 	static const char this_mod[] = "__this_module";
10819 	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10820 	unsigned long val;
10821 	int n;
10822 
10823 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10824 
10825 	if (n > sizeof(modname) - 1)
10826 		return false;
10827 
10828 	val = module_kallsyms_lookup_name(modname);
10829 	return val != 0;
10830 }
10831 
trace_module_add_evals(struct module * mod)10832 static void trace_module_add_evals(struct module *mod)
10833 {
10834 	/*
10835 	 * Modules with bad taint do not have events created, do
10836 	 * not bother with enums either.
10837 	 */
10838 	if (trace_module_has_bad_taint(mod))
10839 		return;
10840 
10841 	/* Even if no trace_evals, this need to sanitize field types. */
10842 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10843 }
10844 
10845 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10846 static void trace_module_remove_evals(struct module *mod)
10847 {
10848 	union trace_eval_map_item *map;
10849 	union trace_eval_map_item **last = &trace_eval_maps;
10850 
10851 	if (!mod->num_trace_evals)
10852 		return;
10853 
10854 	guard(mutex)(&trace_eval_mutex);
10855 
10856 	map = trace_eval_maps;
10857 
10858 	while (map) {
10859 		if (map->head.mod == mod)
10860 			break;
10861 		map = trace_eval_jmp_to_tail(map);
10862 		last = &map->tail.next;
10863 		map = map->tail.next;
10864 	}
10865 	if (!map)
10866 		return;
10867 
10868 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10869 	kfree(map);
10870 }
10871 #else
trace_module_remove_evals(struct module * mod)10872 static inline void trace_module_remove_evals(struct module *mod) { }
10873 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10874 
trace_module_record(struct module * mod,bool add)10875 static void trace_module_record(struct module *mod, bool add)
10876 {
10877 	struct trace_array *tr;
10878 	unsigned long flags;
10879 
10880 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10881 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10882 		/* Update any persistent trace array that has already been started */
10883 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10884 			guard(mutex)(&scratch_mutex);
10885 			save_mod(mod, tr);
10886 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10887 			/* Update delta if the module loaded in previous boot */
10888 			make_mod_delta(mod, tr);
10889 		}
10890 	}
10891 }
10892 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10893 static int trace_module_notify(struct notifier_block *self,
10894 			       unsigned long val, void *data)
10895 {
10896 	struct module *mod = data;
10897 
10898 	switch (val) {
10899 	case MODULE_STATE_COMING:
10900 		trace_module_add_evals(mod);
10901 		trace_module_record(mod, true);
10902 		break;
10903 	case MODULE_STATE_GOING:
10904 		trace_module_remove_evals(mod);
10905 		trace_module_record(mod, false);
10906 		break;
10907 	}
10908 
10909 	return NOTIFY_OK;
10910 }
10911 
10912 static struct notifier_block trace_module_nb = {
10913 	.notifier_call = trace_module_notify,
10914 	.priority = 0,
10915 };
10916 #endif /* CONFIG_MODULES */
10917 
tracer_init_tracefs_work_func(struct work_struct * work)10918 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10919 {
10920 
10921 	event_trace_init();
10922 
10923 	init_tracer_tracefs(&global_trace, NULL);
10924 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10925 
10926 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10927 			&global_trace, &tracing_thresh_fops);
10928 
10929 	trace_create_file("README", TRACE_MODE_READ, NULL,
10930 			NULL, &tracing_readme_fops);
10931 
10932 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10933 			NULL, &tracing_saved_cmdlines_fops);
10934 
10935 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10936 			  NULL, &tracing_saved_cmdlines_size_fops);
10937 
10938 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10939 			NULL, &tracing_saved_tgids_fops);
10940 
10941 	trace_create_eval_file(NULL);
10942 
10943 #ifdef CONFIG_MODULES
10944 	register_module_notifier(&trace_module_nb);
10945 #endif
10946 
10947 #ifdef CONFIG_DYNAMIC_FTRACE
10948 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10949 			NULL, &tracing_dyn_info_fops);
10950 #endif
10951 
10952 	create_trace_instances(NULL);
10953 
10954 	update_tracer_options();
10955 }
10956 
tracer_init_tracefs(void)10957 static __init int tracer_init_tracefs(void)
10958 {
10959 	int ret;
10960 
10961 	trace_access_lock_init();
10962 
10963 	ret = tracing_init_dentry();
10964 	if (ret)
10965 		return 0;
10966 
10967 	if (eval_map_wq) {
10968 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10969 		queue_work(eval_map_wq, &tracerfs_init_work);
10970 	} else {
10971 		tracer_init_tracefs_work_func(NULL);
10972 	}
10973 
10974 	if (rv_init_interface())
10975 		pr_err("RV: Error while creating the RV interface\n");
10976 
10977 	return 0;
10978 }
10979 
10980 fs_initcall(tracer_init_tracefs);
10981 
10982 static int trace_die_panic_handler(struct notifier_block *self,
10983 				unsigned long ev, void *unused);
10984 
10985 static struct notifier_block trace_panic_notifier = {
10986 	.notifier_call = trace_die_panic_handler,
10987 	.priority = INT_MAX - 1,
10988 };
10989 
10990 static struct notifier_block trace_die_notifier = {
10991 	.notifier_call = trace_die_panic_handler,
10992 	.priority = INT_MAX - 1,
10993 };
10994 
10995 /*
10996  * The idea is to execute the following die/panic callback early, in order
10997  * to avoid showing irrelevant information in the trace (like other panic
10998  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10999  * warnings get disabled (to prevent potential log flooding).
11000  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)11001 static int trace_die_panic_handler(struct notifier_block *self,
11002 				unsigned long ev, void *unused)
11003 {
11004 	if (!ftrace_dump_on_oops_enabled())
11005 		return NOTIFY_DONE;
11006 
11007 	/* The die notifier requires DIE_OOPS to trigger */
11008 	if (self == &trace_die_notifier && ev != DIE_OOPS)
11009 		return NOTIFY_DONE;
11010 
11011 	ftrace_dump(DUMP_PARAM);
11012 
11013 	return NOTIFY_DONE;
11014 }
11015 
11016 /*
11017  * printk is set to max of 1024, we really don't need it that big.
11018  * Nothing should be printing 1000 characters anyway.
11019  */
11020 #define TRACE_MAX_PRINT		1000
11021 
11022 /*
11023  * Define here KERN_TRACE so that we have one place to modify
11024  * it if we decide to change what log level the ftrace dump
11025  * should be at.
11026  */
11027 #define KERN_TRACE		KERN_EMERG
11028 
11029 void
trace_printk_seq(struct trace_seq * s)11030 trace_printk_seq(struct trace_seq *s)
11031 {
11032 	/* Probably should print a warning here. */
11033 	if (s->seq.len >= TRACE_MAX_PRINT)
11034 		s->seq.len = TRACE_MAX_PRINT;
11035 
11036 	/*
11037 	 * More paranoid code. Although the buffer size is set to
11038 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
11039 	 * an extra layer of protection.
11040 	 */
11041 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
11042 		s->seq.len = s->seq.size - 1;
11043 
11044 	/* should be zero ended, but we are paranoid. */
11045 	s->buffer[s->seq.len] = 0;
11046 
11047 	printk(KERN_TRACE "%s", s->buffer);
11048 
11049 	trace_seq_init(s);
11050 }
11051 
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)11052 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
11053 {
11054 	iter->tr = tr;
11055 	iter->trace = iter->tr->current_trace;
11056 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
11057 	iter->array_buffer = &tr->array_buffer;
11058 
11059 	if (iter->trace && iter->trace->open)
11060 		iter->trace->open(iter);
11061 
11062 	/* Annotate start of buffers if we had overruns */
11063 	if (ring_buffer_overruns(iter->array_buffer->buffer))
11064 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
11065 
11066 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
11067 	if (trace_clocks[iter->tr->clock_id].in_ns)
11068 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
11069 
11070 	/* Can not use kmalloc for iter.temp and iter.fmt */
11071 	iter->temp = static_temp_buf;
11072 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
11073 	iter->fmt = static_fmt_buf;
11074 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
11075 }
11076 
trace_init_global_iter(struct trace_iterator * iter)11077 void trace_init_global_iter(struct trace_iterator *iter)
11078 {
11079 	trace_init_iter(iter, &global_trace);
11080 }
11081 
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)11082 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
11083 {
11084 	/* use static because iter can be a bit big for the stack */
11085 	static struct trace_iterator iter;
11086 	unsigned int old_userobj;
11087 	unsigned long flags;
11088 	int cnt = 0;
11089 
11090 	/*
11091 	 * Always turn off tracing when we dump.
11092 	 * We don't need to show trace output of what happens
11093 	 * between multiple crashes.
11094 	 *
11095 	 * If the user does a sysrq-z, then they can re-enable
11096 	 * tracing with echo 1 > tracing_on.
11097 	 */
11098 	tracer_tracing_off(tr);
11099 
11100 	local_irq_save(flags);
11101 
11102 	/* Simulate the iterator */
11103 	trace_init_iter(&iter, tr);
11104 
11105 	/* While dumping, do not allow the buffer to be enable */
11106 	tracer_tracing_disable(tr);
11107 
11108 	old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
11109 
11110 	/* don't look at user memory in panic mode */
11111 	tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
11112 
11113 	if (dump_mode == DUMP_ORIG)
11114 		iter.cpu_file = raw_smp_processor_id();
11115 	else
11116 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
11117 
11118 	if (tr == &global_trace)
11119 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
11120 	else
11121 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
11122 
11123 	/* Did function tracer already get disabled? */
11124 	if (ftrace_is_dead()) {
11125 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
11126 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
11127 	}
11128 
11129 	/*
11130 	 * We need to stop all tracing on all CPUS to read
11131 	 * the next buffer. This is a bit expensive, but is
11132 	 * not done often. We fill all what we can read,
11133 	 * and then release the locks again.
11134 	 */
11135 
11136 	while (!trace_empty(&iter)) {
11137 
11138 		if (!cnt)
11139 			printk(KERN_TRACE "---------------------------------\n");
11140 
11141 		cnt++;
11142 
11143 		trace_iterator_reset(&iter);
11144 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
11145 
11146 		if (trace_find_next_entry_inc(&iter) != NULL) {
11147 			int ret;
11148 
11149 			ret = print_trace_line(&iter);
11150 			if (ret != TRACE_TYPE_NO_CONSUME)
11151 				trace_consume(&iter);
11152 
11153 			trace_printk_seq(&iter.seq);
11154 		}
11155 		touch_nmi_watchdog();
11156 	}
11157 
11158 	if (!cnt)
11159 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
11160 	else
11161 		printk(KERN_TRACE "---------------------------------\n");
11162 
11163 	tr->trace_flags |= old_userobj;
11164 
11165 	tracer_tracing_enable(tr);
11166 	local_irq_restore(flags);
11167 }
11168 
ftrace_dump_by_param(void)11169 static void ftrace_dump_by_param(void)
11170 {
11171 	bool first_param = true;
11172 	char dump_param[MAX_TRACER_SIZE];
11173 	char *buf, *token, *inst_name;
11174 	struct trace_array *tr;
11175 
11176 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
11177 	buf = dump_param;
11178 
11179 	while ((token = strsep(&buf, ",")) != NULL) {
11180 		if (first_param) {
11181 			first_param = false;
11182 			if (!strcmp("0", token))
11183 				continue;
11184 			else if (!strcmp("1", token)) {
11185 				ftrace_dump_one(&global_trace, DUMP_ALL);
11186 				continue;
11187 			}
11188 			else if (!strcmp("2", token) ||
11189 			  !strcmp("orig_cpu", token)) {
11190 				ftrace_dump_one(&global_trace, DUMP_ORIG);
11191 				continue;
11192 			}
11193 		}
11194 
11195 		inst_name = strsep(&token, "=");
11196 		tr = trace_array_find(inst_name);
11197 		if (!tr) {
11198 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
11199 			continue;
11200 		}
11201 
11202 		if (token && (!strcmp("2", token) ||
11203 			  !strcmp("orig_cpu", token)))
11204 			ftrace_dump_one(tr, DUMP_ORIG);
11205 		else
11206 			ftrace_dump_one(tr, DUMP_ALL);
11207 	}
11208 }
11209 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)11210 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
11211 {
11212 	static atomic_t dump_running;
11213 
11214 	/* Only allow one dump user at a time. */
11215 	if (atomic_inc_return(&dump_running) != 1) {
11216 		atomic_dec(&dump_running);
11217 		return;
11218 	}
11219 
11220 	switch (oops_dump_mode) {
11221 	case DUMP_ALL:
11222 		ftrace_dump_one(&global_trace, DUMP_ALL);
11223 		break;
11224 	case DUMP_ORIG:
11225 		ftrace_dump_one(&global_trace, DUMP_ORIG);
11226 		break;
11227 	case DUMP_PARAM:
11228 		ftrace_dump_by_param();
11229 		break;
11230 	case DUMP_NONE:
11231 		break;
11232 	default:
11233 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
11234 		ftrace_dump_one(&global_trace, DUMP_ALL);
11235 	}
11236 
11237 	atomic_dec(&dump_running);
11238 }
11239 EXPORT_SYMBOL_GPL(ftrace_dump);
11240 
11241 #define WRITE_BUFSIZE  4096
11242 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))11243 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
11244 				size_t count, loff_t *ppos,
11245 				int (*createfn)(const char *))
11246 {
11247 	char *kbuf __free(kfree) = NULL;
11248 	char *buf, *tmp;
11249 	int ret = 0;
11250 	size_t done = 0;
11251 	size_t size;
11252 
11253 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
11254 	if (!kbuf)
11255 		return -ENOMEM;
11256 
11257 	while (done < count) {
11258 		size = count - done;
11259 
11260 		if (size >= WRITE_BUFSIZE)
11261 			size = WRITE_BUFSIZE - 1;
11262 
11263 		if (copy_from_user(kbuf, buffer + done, size))
11264 			return -EFAULT;
11265 
11266 		kbuf[size] = '\0';
11267 		buf = kbuf;
11268 		do {
11269 			tmp = strchr(buf, '\n');
11270 			if (tmp) {
11271 				*tmp = '\0';
11272 				size = tmp - buf + 1;
11273 			} else {
11274 				size = strlen(buf);
11275 				if (done + size < count) {
11276 					if (buf != kbuf)
11277 						break;
11278 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
11279 					pr_warn("Line length is too long: Should be less than %d\n",
11280 						WRITE_BUFSIZE - 2);
11281 					return -EINVAL;
11282 				}
11283 			}
11284 			done += size;
11285 
11286 			/* Remove comments */
11287 			tmp = strchr(buf, '#');
11288 
11289 			if (tmp)
11290 				*tmp = '\0';
11291 
11292 			ret = createfn(buf);
11293 			if (ret)
11294 				return ret;
11295 			buf += size;
11296 
11297 		} while (done < count);
11298 	}
11299 	return done;
11300 }
11301 
11302 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)11303 __init static bool tr_needs_alloc_snapshot(const char *name)
11304 {
11305 	char *test;
11306 	int len = strlen(name);
11307 	bool ret;
11308 
11309 	if (!boot_snapshot_index)
11310 		return false;
11311 
11312 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
11313 	    boot_snapshot_info[len] == '\t')
11314 		return true;
11315 
11316 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
11317 	if (!test)
11318 		return false;
11319 
11320 	sprintf(test, "\t%s\t", name);
11321 	ret = strstr(boot_snapshot_info, test) == NULL;
11322 	kfree(test);
11323 	return ret;
11324 }
11325 
do_allocate_snapshot(const char * name)11326 __init static void do_allocate_snapshot(const char *name)
11327 {
11328 	if (!tr_needs_alloc_snapshot(name))
11329 		return;
11330 
11331 	/*
11332 	 * When allocate_snapshot is set, the next call to
11333 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
11334 	 * will allocate the snapshot buffer. That will also clear
11335 	 * this flag.
11336 	 */
11337 	allocate_snapshot = true;
11338 }
11339 #else
do_allocate_snapshot(const char * name)11340 static inline void do_allocate_snapshot(const char *name) { }
11341 #endif
11342 
backup_instance_area(const char * backup,unsigned long * addr,phys_addr_t * size)11343 __init static int backup_instance_area(const char *backup,
11344 				       unsigned long *addr, phys_addr_t *size)
11345 {
11346 	struct trace_array *backup_tr;
11347 	void *allocated_vaddr = NULL;
11348 
11349 	backup_tr = trace_array_get_by_name(backup, NULL);
11350 	if (!backup_tr) {
11351 		pr_warn("Tracing: Instance %s is not found.\n", backup);
11352 		return -ENOENT;
11353 	}
11354 
11355 	if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
11356 		pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
11357 		trace_array_put(backup_tr);
11358 		return -EINVAL;
11359 	}
11360 
11361 	*size = backup_tr->range_addr_size;
11362 
11363 	allocated_vaddr = vzalloc(*size);
11364 	if (!allocated_vaddr) {
11365 		pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
11366 			backup, (unsigned long)*size);
11367 		trace_array_put(backup_tr);
11368 		return -ENOMEM;
11369 	}
11370 
11371 	memcpy(allocated_vaddr,
11372 		(void *)backup_tr->range_addr_start, (size_t)*size);
11373 	*addr = (unsigned long)allocated_vaddr;
11374 
11375 	trace_array_put(backup_tr);
11376 	return 0;
11377 }
11378 
enable_instances(void)11379 __init static void enable_instances(void)
11380 {
11381 	struct trace_array *tr;
11382 	bool memmap_area = false;
11383 	char *curr_str;
11384 	char *name;
11385 	char *str;
11386 	char *tok;
11387 
11388 	/* A tab is always appended */
11389 	boot_instance_info[boot_instance_index - 1] = '\0';
11390 	str = boot_instance_info;
11391 
11392 	while ((curr_str = strsep(&str, "\t"))) {
11393 		phys_addr_t start = 0;
11394 		phys_addr_t size = 0;
11395 		unsigned long addr = 0;
11396 		bool traceprintk = false;
11397 		bool traceoff = false;
11398 		char *flag_delim;
11399 		char *addr_delim;
11400 		char *rname __free(kfree) = NULL;
11401 		char *backup;
11402 
11403 		tok = strsep(&curr_str, ",");
11404 
11405 		name = strsep(&tok, "=");
11406 		backup = tok;
11407 
11408 		flag_delim = strchr(name, '^');
11409 		addr_delim = strchr(name, '@');
11410 
11411 		if (addr_delim)
11412 			*addr_delim++ = '\0';
11413 
11414 		if (flag_delim)
11415 			*flag_delim++ = '\0';
11416 
11417 		if (backup) {
11418 			if (backup_instance_area(backup, &addr, &size) < 0)
11419 				continue;
11420 		}
11421 
11422 		if (flag_delim) {
11423 			char *flag;
11424 
11425 			while ((flag = strsep(&flag_delim, "^"))) {
11426 				if (strcmp(flag, "traceoff") == 0) {
11427 					traceoff = true;
11428 				} else if ((strcmp(flag, "printk") == 0) ||
11429 					   (strcmp(flag, "traceprintk") == 0) ||
11430 					   (strcmp(flag, "trace_printk") == 0)) {
11431 					traceprintk = true;
11432 				} else {
11433 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
11434 						flag, name);
11435 				}
11436 			}
11437 		}
11438 
11439 		tok = addr_delim;
11440 		if (tok && isdigit(*tok)) {
11441 			start = memparse(tok, &tok);
11442 			if (!start) {
11443 				pr_warn("Tracing: Invalid boot instance address for %s\n",
11444 					name);
11445 				continue;
11446 			}
11447 			if (*tok != ':') {
11448 				pr_warn("Tracing: No size specified for instance %s\n", name);
11449 				continue;
11450 			}
11451 			tok++;
11452 			size = memparse(tok, &tok);
11453 			if (!size) {
11454 				pr_warn("Tracing: Invalid boot instance size for %s\n",
11455 					name);
11456 				continue;
11457 			}
11458 			memmap_area = true;
11459 		} else if (tok) {
11460 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
11461 				start = 0;
11462 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
11463 				continue;
11464 			}
11465 			rname = kstrdup(tok, GFP_KERNEL);
11466 		}
11467 
11468 		if (start) {
11469 			/* Start and size must be page aligned */
11470 			if (start & ~PAGE_MASK) {
11471 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
11472 				continue;
11473 			}
11474 			if (size & ~PAGE_MASK) {
11475 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
11476 				continue;
11477 			}
11478 
11479 			if (memmap_area)
11480 				addr = map_pages(start, size);
11481 			else
11482 				addr = (unsigned long)phys_to_virt(start);
11483 			if (addr) {
11484 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
11485 					name, &start, (unsigned long)size);
11486 			} else {
11487 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
11488 				continue;
11489 			}
11490 		} else {
11491 			/* Only non mapped buffers have snapshot buffers */
11492 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
11493 				do_allocate_snapshot(name);
11494 		}
11495 
11496 		tr = trace_array_create_systems(name, NULL, addr, size);
11497 		if (IS_ERR(tr)) {
11498 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
11499 			continue;
11500 		}
11501 
11502 		if (traceoff)
11503 			tracer_tracing_off(tr);
11504 
11505 		if (traceprintk)
11506 			update_printk_trace(tr);
11507 
11508 		/*
11509 		 * memmap'd buffers can not be freed.
11510 		 */
11511 		if (memmap_area) {
11512 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
11513 			tr->ref++;
11514 		}
11515 
11516 		/*
11517 		 * Backup buffers can be freed but need vfree().
11518 		 */
11519 		if (backup)
11520 			tr->flags |= TRACE_ARRAY_FL_VMALLOC;
11521 
11522 		if (start || backup) {
11523 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
11524 			tr->range_name = no_free_ptr(rname);
11525 		}
11526 
11527 		while ((tok = strsep(&curr_str, ","))) {
11528 			early_enable_events(tr, tok, true);
11529 		}
11530 	}
11531 }
11532 
tracer_alloc_buffers(void)11533 __init static int tracer_alloc_buffers(void)
11534 {
11535 	int ring_buf_size;
11536 	int ret = -ENOMEM;
11537 
11538 
11539 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
11540 		pr_warn("Tracing disabled due to lockdown\n");
11541 		return -EPERM;
11542 	}
11543 
11544 	/*
11545 	 * Make sure we don't accidentally add more trace options
11546 	 * than we have bits for.
11547 	 */
11548 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
11549 
11550 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
11551 		return -ENOMEM;
11552 
11553 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
11554 		goto out_free_buffer_mask;
11555 
11556 	/* Only allocate trace_printk buffers if a trace_printk exists */
11557 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11558 		/* Must be called before global_trace.buffer is allocated */
11559 		trace_printk_init_buffers();
11560 
11561 	/* To save memory, keep the ring buffer size to its minimum */
11562 	if (global_trace.ring_buffer_expanded)
11563 		ring_buf_size = trace_buf_size;
11564 	else
11565 		ring_buf_size = 1;
11566 
11567 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11568 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11569 
11570 	raw_spin_lock_init(&global_trace.start_lock);
11571 
11572 	/*
11573 	 * The prepare callbacks allocates some memory for the ring buffer. We
11574 	 * don't free the buffer if the CPU goes down. If we were to free
11575 	 * the buffer, then the user would lose any trace that was in the
11576 	 * buffer. The memory will be removed once the "instance" is removed.
11577 	 */
11578 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11579 				      "trace/RB:prepare", trace_rb_cpu_prepare,
11580 				      NULL);
11581 	if (ret < 0)
11582 		goto out_free_cpumask;
11583 	/* Used for event triggers */
11584 	ret = -ENOMEM;
11585 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11586 	if (!temp_buffer)
11587 		goto out_rm_hp_state;
11588 
11589 	if (trace_create_savedcmd() < 0)
11590 		goto out_free_temp_buffer;
11591 
11592 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11593 		goto out_free_savedcmd;
11594 
11595 	/* TODO: make the number of buffers hot pluggable with CPUS */
11596 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11597 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11598 		goto out_free_pipe_cpumask;
11599 	}
11600 	if (global_trace.buffer_disabled)
11601 		tracing_off();
11602 
11603 	if (trace_boot_clock) {
11604 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
11605 		if (ret < 0)
11606 			pr_warn("Trace clock %s not defined, going back to default\n",
11607 				trace_boot_clock);
11608 	}
11609 
11610 	/*
11611 	 * register_tracer() might reference current_trace, so it
11612 	 * needs to be set before we register anything. This is
11613 	 * just a bootstrap of current_trace anyway.
11614 	 */
11615 	global_trace.current_trace = &nop_trace;
11616 	global_trace.current_trace_flags = nop_trace.flags;
11617 
11618 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11619 #ifdef CONFIG_TRACER_MAX_TRACE
11620 	spin_lock_init(&global_trace.snapshot_trigger_lock);
11621 #endif
11622 	ftrace_init_global_array_ops(&global_trace);
11623 
11624 #ifdef CONFIG_MODULES
11625 	INIT_LIST_HEAD(&global_trace.mod_events);
11626 #endif
11627 
11628 	init_trace_flags_index(&global_trace);
11629 
11630 	INIT_LIST_HEAD(&global_trace.tracers);
11631 
11632 	/* All seems OK, enable tracing */
11633 	tracing_disabled = 0;
11634 
11635 	atomic_notifier_chain_register(&panic_notifier_list,
11636 				       &trace_panic_notifier);
11637 
11638 	register_die_notifier(&trace_die_notifier);
11639 
11640 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11641 
11642 	global_trace.syscall_buf_sz = syscall_buf_size;
11643 
11644 	INIT_LIST_HEAD(&global_trace.systems);
11645 	INIT_LIST_HEAD(&global_trace.events);
11646 	INIT_LIST_HEAD(&global_trace.hist_vars);
11647 	INIT_LIST_HEAD(&global_trace.err_log);
11648 	list_add(&global_trace.marker_list, &marker_copies);
11649 	list_add(&global_trace.list, &ftrace_trace_arrays);
11650 
11651 	register_tracer(&nop_trace);
11652 
11653 	/* Function tracing may start here (via kernel command line) */
11654 	init_function_trace();
11655 
11656 	apply_trace_boot_options();
11657 
11658 	register_snapshot_cmd();
11659 
11660 	return 0;
11661 
11662 out_free_pipe_cpumask:
11663 	free_cpumask_var(global_trace.pipe_cpumask);
11664 out_free_savedcmd:
11665 	trace_free_saved_cmdlines_buffer();
11666 out_free_temp_buffer:
11667 	ring_buffer_free(temp_buffer);
11668 out_rm_hp_state:
11669 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11670 out_free_cpumask:
11671 	free_cpumask_var(global_trace.tracing_cpumask);
11672 out_free_buffer_mask:
11673 	free_cpumask_var(tracing_buffer_mask);
11674 	return ret;
11675 }
11676 
11677 #ifdef CONFIG_FUNCTION_TRACER
11678 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11679 struct trace_array *trace_get_global_array(void)
11680 {
11681 	return &global_trace;
11682 }
11683 #endif
11684 
ftrace_boot_snapshot(void)11685 void __init ftrace_boot_snapshot(void)
11686 {
11687 #ifdef CONFIG_TRACER_MAX_TRACE
11688 	struct trace_array *tr;
11689 
11690 	if (!snapshot_at_boot)
11691 		return;
11692 
11693 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11694 		if (!tr->allocated_snapshot)
11695 			continue;
11696 
11697 		tracing_snapshot_instance(tr);
11698 		trace_array_puts(tr, "** Boot snapshot taken **\n");
11699 	}
11700 #endif
11701 }
11702 
early_trace_init(void)11703 void __init early_trace_init(void)
11704 {
11705 	if (tracepoint_printk) {
11706 		tracepoint_print_iter =
11707 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11708 		if (MEM_FAIL(!tracepoint_print_iter,
11709 			     "Failed to allocate trace iterator\n"))
11710 			tracepoint_printk = 0;
11711 		else
11712 			static_key_enable(&tracepoint_printk_key.key);
11713 	}
11714 	tracer_alloc_buffers();
11715 
11716 	init_events();
11717 }
11718 
trace_init(void)11719 void __init trace_init(void)
11720 {
11721 	trace_event_init();
11722 
11723 	if (boot_instance_index)
11724 		enable_instances();
11725 }
11726 
clear_boot_tracer(void)11727 __init static void clear_boot_tracer(void)
11728 {
11729 	/*
11730 	 * The default tracer at boot buffer is an init section.
11731 	 * This function is called in lateinit. If we did not
11732 	 * find the boot tracer, then clear it out, to prevent
11733 	 * later registration from accessing the buffer that is
11734 	 * about to be freed.
11735 	 */
11736 	if (!default_bootup_tracer)
11737 		return;
11738 
11739 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11740 	       default_bootup_tracer);
11741 	default_bootup_tracer = NULL;
11742 }
11743 
11744 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11745 __init static void tracing_set_default_clock(void)
11746 {
11747 	/* sched_clock_stable() is determined in late_initcall */
11748 	if (!trace_boot_clock && !sched_clock_stable()) {
11749 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11750 			pr_warn("Can not set tracing clock due to lockdown\n");
11751 			return;
11752 		}
11753 
11754 		printk(KERN_WARNING
11755 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11756 		       "If you want to keep using the local clock, then add:\n"
11757 		       "  \"trace_clock=local\"\n"
11758 		       "on the kernel command line\n");
11759 		tracing_set_clock(&global_trace, "global");
11760 	}
11761 }
11762 #else
tracing_set_default_clock(void)11763 static inline void tracing_set_default_clock(void) { }
11764 #endif
11765 
late_trace_init(void)11766 __init static int late_trace_init(void)
11767 {
11768 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11769 		static_key_disable(&tracepoint_printk_key.key);
11770 		tracepoint_printk = 0;
11771 	}
11772 
11773 	if (traceoff_after_boot)
11774 		tracing_off();
11775 
11776 	tracing_set_default_clock();
11777 	clear_boot_tracer();
11778 	return 0;
11779 }
11780 
11781 late_initcall_sync(late_trace_init);
11782