xref: /linux/kernel/trace/trace.c (revision 0b1b4a3d8ebec3c42231c306d4b9a5153d047674)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <linux/sort.h>
54 #include <linux/io.h> /* vmap_page_range() */
55 #include <linux/fs_context.h>
56 
57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
58 
59 #include "trace.h"
60 #include "trace_output.h"
61 
62 #ifdef CONFIG_FTRACE_STARTUP_TEST
63 /*
64  * We need to change this state when a selftest is running.
65  * A selftest will lurk into the ring-buffer to count the
66  * entries inserted during the selftest although some concurrent
67  * insertions into the ring-buffer such as trace_printk could occurred
68  * at the same time, giving false positive or negative results.
69  */
70 static bool __read_mostly tracing_selftest_running;
71 
72 /*
73  * If boot-time tracing including tracers/events via kernel cmdline
74  * is running, we do not want to run SELFTEST.
75  */
76 bool __read_mostly tracing_selftest_disabled;
77 
disable_tracing_selftest(const char * reason)78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #else
86 #define tracing_selftest_running	0
87 #define tracing_selftest_disabled	0
88 #endif
89 
90 /* Pipe tracepoints to printk */
91 static struct trace_iterator *tracepoint_print_iter;
92 int tracepoint_printk;
93 static bool tracepoint_printk_stop_on_boot __initdata;
94 static bool traceoff_after_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96 
97 /* Store tracers and their flags per instance */
98 struct tracers {
99 	struct list_head	list;
100 	struct tracer		*tracer;
101 	struct tracer_flags	*flags;
102 };
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 #define MAX_TRACER_SIZE		100
122 /*
123  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
124  *
125  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
126  * is set, then ftrace_dump is called. This will output the contents
127  * of the ftrace buffers to the console.  This is very useful for
128  * capturing traces that lead to crashes and outputing it to a
129  * serial console.
130  *
131  * It is default off, but you can enable it with either specifying
132  * "ftrace_dump_on_oops" in the kernel command line, or setting
133  * /proc/sys/kernel/ftrace_dump_on_oops
134  * Set 1 if you want to dump buffers of all CPUs
135  * Set 2 if you want to dump the buffer of the CPU that triggered oops
136  * Set instance name if you want to dump the specific trace instance
137  * Multiple instance dump is also supported, and instances are seperated
138  * by commas.
139  */
140 /* Set to string format zero to disable by default */
141 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
142 
143 /* When set, tracing will stop when a WARN*() is hit */
144 static int __disable_trace_on_warning;
145 
146 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
147 			     void *buffer, size_t *lenp, loff_t *ppos);
148 static const struct ctl_table trace_sysctl_table[] = {
149 	{
150 		.procname	= "ftrace_dump_on_oops",
151 		.data		= &ftrace_dump_on_oops,
152 		.maxlen		= MAX_TRACER_SIZE,
153 		.mode		= 0644,
154 		.proc_handler	= proc_dostring,
155 	},
156 	{
157 		.procname	= "traceoff_on_warning",
158 		.data		= &__disable_trace_on_warning,
159 		.maxlen		= sizeof(__disable_trace_on_warning),
160 		.mode		= 0644,
161 		.proc_handler	= proc_dointvec,
162 	},
163 	{
164 		.procname	= "tracepoint_printk",
165 		.data		= &tracepoint_printk,
166 		.maxlen		= sizeof(tracepoint_printk),
167 		.mode		= 0644,
168 		.proc_handler	= tracepoint_printk_sysctl,
169 	},
170 };
171 
init_trace_sysctls(void)172 static int __init init_trace_sysctls(void)
173 {
174 	register_sysctl_init("kernel", trace_sysctl_table);
175 	return 0;
176 }
177 subsys_initcall(init_trace_sysctls);
178 
179 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
180 /* Map of enums to their values, for "eval_map" file */
181 struct trace_eval_map_head {
182 	struct module			*mod;
183 	unsigned long			length;
184 };
185 
186 union trace_eval_map_item;
187 
188 struct trace_eval_map_tail {
189 	/*
190 	 * "end" is first and points to NULL as it must be different
191 	 * than "mod" or "eval_string"
192 	 */
193 	union trace_eval_map_item	*next;
194 	const char			*end;	/* points to NULL */
195 };
196 
197 static DEFINE_MUTEX(trace_eval_mutex);
198 
199 /*
200  * The trace_eval_maps are saved in an array with two extra elements,
201  * one at the beginning, and one at the end. The beginning item contains
202  * the count of the saved maps (head.length), and the module they
203  * belong to if not built in (head.mod). The ending item contains a
204  * pointer to the next array of saved eval_map items.
205  */
206 union trace_eval_map_item {
207 	struct trace_eval_map		map;
208 	struct trace_eval_map_head	head;
209 	struct trace_eval_map_tail	tail;
210 };
211 
212 static union trace_eval_map_item *trace_eval_maps;
213 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
214 
215 int tracing_set_tracer(struct trace_array *tr, const char *buf);
216 static void ftrace_trace_userstack(struct trace_array *tr,
217 				   struct trace_buffer *buffer,
218 				   unsigned int trace_ctx);
219 
220 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
221 static char *default_bootup_tracer;
222 
223 static bool allocate_snapshot;
224 static bool snapshot_at_boot;
225 
226 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
227 static int boot_instance_index;
228 
229 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_snapshot_index;
231 
set_cmdline_ftrace(char * str)232 static int __init set_cmdline_ftrace(char *str)
233 {
234 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
235 	default_bootup_tracer = bootup_tracer_buf;
236 	/* We are using ftrace early, expand it */
237 	trace_set_ring_buffer_expanded(NULL);
238 	return 1;
239 }
240 __setup("ftrace=", set_cmdline_ftrace);
241 
ftrace_dump_on_oops_enabled(void)242 int ftrace_dump_on_oops_enabled(void)
243 {
244 	if (!strcmp("0", ftrace_dump_on_oops))
245 		return 0;
246 	else
247 		return 1;
248 }
249 
set_ftrace_dump_on_oops(char * str)250 static int __init set_ftrace_dump_on_oops(char *str)
251 {
252 	if (!*str) {
253 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
254 		return 1;
255 	}
256 
257 	if (*str == ',') {
258 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
259 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
260 		return 1;
261 	}
262 
263 	if (*str++ == '=') {
264 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
265 		return 1;
266 	}
267 
268 	return 0;
269 }
270 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
271 
stop_trace_on_warning(char * str)272 static int __init stop_trace_on_warning(char *str)
273 {
274 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
275 		__disable_trace_on_warning = 1;
276 	return 1;
277 }
278 __setup("traceoff_on_warning", stop_trace_on_warning);
279 
boot_alloc_snapshot(char * str)280 static int __init boot_alloc_snapshot(char *str)
281 {
282 	char *slot = boot_snapshot_info + boot_snapshot_index;
283 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
284 	int ret;
285 
286 	if (str[0] == '=') {
287 		str++;
288 		if (strlen(str) >= left)
289 			return -1;
290 
291 		ret = snprintf(slot, left, "%s\t", str);
292 		boot_snapshot_index += ret;
293 	} else {
294 		allocate_snapshot = true;
295 		/* We also need the main ring buffer expanded */
296 		trace_set_ring_buffer_expanded(NULL);
297 	}
298 	return 1;
299 }
300 __setup("alloc_snapshot", boot_alloc_snapshot);
301 
302 
boot_snapshot(char * str)303 static int __init boot_snapshot(char *str)
304 {
305 	snapshot_at_boot = true;
306 	boot_alloc_snapshot(str);
307 	return 1;
308 }
309 __setup("ftrace_boot_snapshot", boot_snapshot);
310 
311 
boot_instance(char * str)312 static int __init boot_instance(char *str)
313 {
314 	char *slot = boot_instance_info + boot_instance_index;
315 	int left = sizeof(boot_instance_info) - boot_instance_index;
316 	int ret;
317 
318 	if (strlen(str) >= left)
319 		return -1;
320 
321 	ret = snprintf(slot, left, "%s\t", str);
322 	boot_instance_index += ret;
323 
324 	return 1;
325 }
326 __setup("trace_instance=", boot_instance);
327 
328 
329 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
330 
set_trace_boot_options(char * str)331 static int __init set_trace_boot_options(char *str)
332 {
333 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
334 	return 1;
335 }
336 __setup("trace_options=", set_trace_boot_options);
337 
338 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
339 static char *trace_boot_clock __initdata;
340 
set_trace_boot_clock(char * str)341 static int __init set_trace_boot_clock(char *str)
342 {
343 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
344 	trace_boot_clock = trace_boot_clock_buf;
345 	return 1;
346 }
347 __setup("trace_clock=", set_trace_boot_clock);
348 
set_tracepoint_printk(char * str)349 static int __init set_tracepoint_printk(char *str)
350 {
351 	/* Ignore the "tp_printk_stop_on_boot" param */
352 	if (*str == '_')
353 		return 0;
354 
355 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
356 		tracepoint_printk = 1;
357 	return 1;
358 }
359 __setup("tp_printk", set_tracepoint_printk);
360 
set_tracepoint_printk_stop(char * str)361 static int __init set_tracepoint_printk_stop(char *str)
362 {
363 	tracepoint_printk_stop_on_boot = true;
364 	return 1;
365 }
366 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
367 
set_traceoff_after_boot(char * str)368 static int __init set_traceoff_after_boot(char *str)
369 {
370 	traceoff_after_boot = true;
371 	return 1;
372 }
373 __setup("traceoff_after_boot", set_traceoff_after_boot);
374 
ns2usecs(u64 nsec)375 unsigned long long ns2usecs(u64 nsec)
376 {
377 	nsec += 500;
378 	do_div(nsec, 1000);
379 	return nsec;
380 }
381 
382 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)383 trace_process_export(struct trace_export *export,
384 	       struct ring_buffer_event *event, int flag)
385 {
386 	struct trace_entry *entry;
387 	unsigned int size = 0;
388 
389 	if (export->flags & flag) {
390 		entry = ring_buffer_event_data(event);
391 		size = ring_buffer_event_length(event);
392 		export->write(export, entry, size);
393 	}
394 }
395 
396 static DEFINE_MUTEX(ftrace_export_lock);
397 
398 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
399 
400 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
401 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
402 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
403 
ftrace_exports_enable(struct trace_export * export)404 static inline void ftrace_exports_enable(struct trace_export *export)
405 {
406 	if (export->flags & TRACE_EXPORT_FUNCTION)
407 		static_branch_inc(&trace_function_exports_enabled);
408 
409 	if (export->flags & TRACE_EXPORT_EVENT)
410 		static_branch_inc(&trace_event_exports_enabled);
411 
412 	if (export->flags & TRACE_EXPORT_MARKER)
413 		static_branch_inc(&trace_marker_exports_enabled);
414 }
415 
ftrace_exports_disable(struct trace_export * export)416 static inline void ftrace_exports_disable(struct trace_export *export)
417 {
418 	if (export->flags & TRACE_EXPORT_FUNCTION)
419 		static_branch_dec(&trace_function_exports_enabled);
420 
421 	if (export->flags & TRACE_EXPORT_EVENT)
422 		static_branch_dec(&trace_event_exports_enabled);
423 
424 	if (export->flags & TRACE_EXPORT_MARKER)
425 		static_branch_dec(&trace_marker_exports_enabled);
426 }
427 
ftrace_exports(struct ring_buffer_event * event,int flag)428 static void ftrace_exports(struct ring_buffer_event *event, int flag)
429 {
430 	struct trace_export *export;
431 
432 	guard(preempt_notrace)();
433 
434 	export = rcu_dereference_raw_check(ftrace_exports_list);
435 	while (export) {
436 		trace_process_export(export, event, flag);
437 		export = rcu_dereference_raw_check(export->next);
438 	}
439 }
440 
441 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)442 add_trace_export(struct trace_export **list, struct trace_export *export)
443 {
444 	rcu_assign_pointer(export->next, *list);
445 	/*
446 	 * We are entering export into the list but another
447 	 * CPU might be walking that list. We need to make sure
448 	 * the export->next pointer is valid before another CPU sees
449 	 * the export pointer included into the list.
450 	 */
451 	rcu_assign_pointer(*list, export);
452 }
453 
454 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)455 rm_trace_export(struct trace_export **list, struct trace_export *export)
456 {
457 	struct trace_export **p;
458 
459 	for (p = list; *p != NULL; p = &(*p)->next)
460 		if (*p == export)
461 			break;
462 
463 	if (*p != export)
464 		return -1;
465 
466 	rcu_assign_pointer(*p, (*p)->next);
467 
468 	return 0;
469 }
470 
471 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)472 add_ftrace_export(struct trace_export **list, struct trace_export *export)
473 {
474 	ftrace_exports_enable(export);
475 
476 	add_trace_export(list, export);
477 }
478 
479 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)480 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
481 {
482 	int ret;
483 
484 	ret = rm_trace_export(list, export);
485 	ftrace_exports_disable(export);
486 
487 	return ret;
488 }
489 
register_ftrace_export(struct trace_export * export)490 int register_ftrace_export(struct trace_export *export)
491 {
492 	if (WARN_ON_ONCE(!export->write))
493 		return -1;
494 
495 	guard(mutex)(&ftrace_export_lock);
496 
497 	add_ftrace_export(&ftrace_exports_list, export);
498 
499 	return 0;
500 }
501 EXPORT_SYMBOL_GPL(register_ftrace_export);
502 
unregister_ftrace_export(struct trace_export * export)503 int unregister_ftrace_export(struct trace_export *export)
504 {
505 	guard(mutex)(&ftrace_export_lock);
506 	return rm_ftrace_export(&ftrace_exports_list, export);
507 }
508 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
509 
510 /* trace_flags holds trace_options default values */
511 #define TRACE_DEFAULT_FLAGS						\
512 	(FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS |		\
513 	 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) |		\
514 	 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) |		\
515 	 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) |		\
516 	 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) |			\
517 	 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) |		\
518 	 TRACE_ITER(COPY_MARKER))
519 
520 /* trace_options that are only supported by global_trace */
521 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) |			\
522 	       TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) |	\
523 	       TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
524 
525 /* trace_flags that are default zero for instances */
526 #define ZEROED_TRACE_FLAGS \
527 	(TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
528 	 TRACE_ITER(COPY_MARKER))
529 
530 /*
531  * The global_trace is the descriptor that holds the top-level tracing
532  * buffers for the live tracing.
533  */
534 static struct trace_array global_trace = {
535 	.trace_flags = TRACE_DEFAULT_FLAGS,
536 };
537 
538 static struct trace_array *printk_trace = &global_trace;
539 
540 /* List of trace_arrays interested in the top level trace_marker */
541 static LIST_HEAD(marker_copies);
542 
printk_binsafe(struct trace_array * tr)543 static __always_inline bool printk_binsafe(struct trace_array *tr)
544 {
545 	/*
546 	 * The binary format of traceprintk can cause a crash if used
547 	 * by a buffer from another boot. Force the use of the
548 	 * non binary version of trace_printk if the trace_printk
549 	 * buffer is a boot mapped ring buffer.
550 	 */
551 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
552 }
553 
update_printk_trace(struct trace_array * tr)554 static void update_printk_trace(struct trace_array *tr)
555 {
556 	if (printk_trace == tr)
557 		return;
558 
559 	printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
560 	printk_trace = tr;
561 	tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
562 }
563 
564 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)565 static bool update_marker_trace(struct trace_array *tr, int enabled)
566 {
567 	lockdep_assert_held(&event_mutex);
568 
569 	if (enabled) {
570 		if (!list_empty(&tr->marker_list))
571 			return false;
572 
573 		list_add_rcu(&tr->marker_list, &marker_copies);
574 		tr->trace_flags |= TRACE_ITER(COPY_MARKER);
575 		return true;
576 	}
577 
578 	if (list_empty(&tr->marker_list))
579 		return false;
580 
581 	list_del_init(&tr->marker_list);
582 	tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
583 	return true;
584 }
585 
trace_set_ring_buffer_expanded(struct trace_array * tr)586 void trace_set_ring_buffer_expanded(struct trace_array *tr)
587 {
588 	if (!tr)
589 		tr = &global_trace;
590 	tr->ring_buffer_expanded = true;
591 }
592 
593 LIST_HEAD(ftrace_trace_arrays);
594 
trace_array_get(struct trace_array * this_tr)595 int trace_array_get(struct trace_array *this_tr)
596 {
597 	struct trace_array *tr;
598 
599 	guard(mutex)(&trace_types_lock);
600 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
601 		if (tr == this_tr) {
602 			tr->ref++;
603 			return 0;
604 		}
605 	}
606 
607 	return -ENODEV;
608 }
609 
__trace_array_put(struct trace_array * this_tr)610 static void __trace_array_put(struct trace_array *this_tr)
611 {
612 	WARN_ON(!this_tr->ref);
613 	this_tr->ref--;
614 }
615 
616 /**
617  * trace_array_put - Decrement the reference counter for this trace array.
618  * @this_tr : pointer to the trace array
619  *
620  * NOTE: Use this when we no longer need the trace array returned by
621  * trace_array_get_by_name(). This ensures the trace array can be later
622  * destroyed.
623  *
624  */
trace_array_put(struct trace_array * this_tr)625 void trace_array_put(struct trace_array *this_tr)
626 {
627 	if (!this_tr)
628 		return;
629 
630 	guard(mutex)(&trace_types_lock);
631 	__trace_array_put(this_tr);
632 }
633 EXPORT_SYMBOL_GPL(trace_array_put);
634 
tracing_check_open_get_tr(struct trace_array * tr)635 int tracing_check_open_get_tr(struct trace_array *tr)
636 {
637 	int ret;
638 
639 	ret = security_locked_down(LOCKDOWN_TRACEFS);
640 	if (ret)
641 		return ret;
642 
643 	if (tracing_disabled)
644 		return -ENODEV;
645 
646 	if (tr && trace_array_get(tr) < 0)
647 		return -ENODEV;
648 
649 	return 0;
650 }
651 
652 /**
653  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
654  * @filtered_pids: The list of pids to check
655  * @search_pid: The PID to find in @filtered_pids
656  *
657  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
658  */
659 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)660 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
661 {
662 	return trace_pid_list_is_set(filtered_pids, search_pid);
663 }
664 
665 /**
666  * trace_ignore_this_task - should a task be ignored for tracing
667  * @filtered_pids: The list of pids to check
668  * @filtered_no_pids: The list of pids not to be traced
669  * @task: The task that should be ignored if not filtered
670  *
671  * Checks if @task should be traced or not from @filtered_pids.
672  * Returns true if @task should *NOT* be traced.
673  * Returns false if @task should be traced.
674  */
675 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)676 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
677 		       struct trace_pid_list *filtered_no_pids,
678 		       struct task_struct *task)
679 {
680 	/*
681 	 * If filtered_no_pids is not empty, and the task's pid is listed
682 	 * in filtered_no_pids, then return true.
683 	 * Otherwise, if filtered_pids is empty, that means we can
684 	 * trace all tasks. If it has content, then only trace pids
685 	 * within filtered_pids.
686 	 */
687 
688 	return (filtered_pids &&
689 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
690 		(filtered_no_pids &&
691 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
692 }
693 
694 /**
695  * trace_filter_add_remove_task - Add or remove a task from a pid_list
696  * @pid_list: The list to modify
697  * @self: The current task for fork or NULL for exit
698  * @task: The task to add or remove
699  *
700  * If adding a task, if @self is defined, the task is only added if @self
701  * is also included in @pid_list. This happens on fork and tasks should
702  * only be added when the parent is listed. If @self is NULL, then the
703  * @task pid will be removed from the list, which would happen on exit
704  * of a task.
705  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)706 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
707 				  struct task_struct *self,
708 				  struct task_struct *task)
709 {
710 	if (!pid_list)
711 		return;
712 
713 	/* For forks, we only add if the forking task is listed */
714 	if (self) {
715 		if (!trace_find_filtered_pid(pid_list, self->pid))
716 			return;
717 	}
718 
719 	/* "self" is set for forks, and NULL for exits */
720 	if (self)
721 		trace_pid_list_set(pid_list, task->pid);
722 	else
723 		trace_pid_list_clear(pid_list, task->pid);
724 }
725 
726 /**
727  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
728  * @pid_list: The pid list to show
729  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
730  * @pos: The position of the file
731  *
732  * This is used by the seq_file "next" operation to iterate the pids
733  * listed in a trace_pid_list structure.
734  *
735  * Returns the pid+1 as we want to display pid of zero, but NULL would
736  * stop the iteration.
737  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)738 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
739 {
740 	long pid = (unsigned long)v;
741 	unsigned int next;
742 
743 	(*pos)++;
744 
745 	/* pid already is +1 of the actual previous bit */
746 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
747 		return NULL;
748 
749 	pid = next;
750 
751 	/* Return pid + 1 to allow zero to be represented */
752 	return (void *)(pid + 1);
753 }
754 
755 /**
756  * trace_pid_start - Used for seq_file to start reading pid lists
757  * @pid_list: The pid list to show
758  * @pos: The position of the file
759  *
760  * This is used by seq_file "start" operation to start the iteration
761  * of listing pids.
762  *
763  * Returns the pid+1 as we want to display pid of zero, but NULL would
764  * stop the iteration.
765  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)766 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
767 {
768 	unsigned long pid;
769 	unsigned int first;
770 	loff_t l = 0;
771 
772 	if (trace_pid_list_first(pid_list, &first) < 0)
773 		return NULL;
774 
775 	pid = first;
776 
777 	/* Return pid + 1 so that zero can be the exit value */
778 	for (pid++; pid && l < *pos;
779 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
780 		;
781 	return (void *)pid;
782 }
783 
784 /**
785  * trace_pid_show - show the current pid in seq_file processing
786  * @m: The seq_file structure to write into
787  * @v: A void pointer of the pid (+1) value to display
788  *
789  * Can be directly used by seq_file operations to display the current
790  * pid value.
791  */
trace_pid_show(struct seq_file * m,void * v)792 int trace_pid_show(struct seq_file *m, void *v)
793 {
794 	unsigned long pid = (unsigned long)v - 1;
795 
796 	seq_printf(m, "%lu\n", pid);
797 	return 0;
798 }
799 
800 /* 128 should be much more than enough */
801 #define PID_BUF_SIZE		127
802 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)803 int trace_pid_write(struct trace_pid_list *filtered_pids,
804 		    struct trace_pid_list **new_pid_list,
805 		    const char __user *ubuf, size_t cnt)
806 {
807 	struct trace_pid_list *pid_list;
808 	struct trace_parser parser;
809 	unsigned long val;
810 	int nr_pids = 0;
811 	ssize_t read = 0;
812 	ssize_t ret;
813 	loff_t pos;
814 	pid_t pid;
815 
816 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
817 		return -ENOMEM;
818 
819 	/*
820 	 * Always recreate a new array. The write is an all or nothing
821 	 * operation. Always create a new array when adding new pids by
822 	 * the user. If the operation fails, then the current list is
823 	 * not modified.
824 	 */
825 	pid_list = trace_pid_list_alloc();
826 	if (!pid_list) {
827 		trace_parser_put(&parser);
828 		return -ENOMEM;
829 	}
830 
831 	if (filtered_pids) {
832 		/* copy the current bits to the new max */
833 		ret = trace_pid_list_first(filtered_pids, &pid);
834 		while (!ret) {
835 			ret = trace_pid_list_set(pid_list, pid);
836 			if (ret < 0)
837 				goto out;
838 
839 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
840 			nr_pids++;
841 		}
842 	}
843 
844 	ret = 0;
845 	while (cnt > 0) {
846 
847 		pos = 0;
848 
849 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
850 		if (ret < 0)
851 			break;
852 
853 		read += ret;
854 		ubuf += ret;
855 		cnt -= ret;
856 
857 		if (!trace_parser_loaded(&parser))
858 			break;
859 
860 		ret = -EINVAL;
861 		if (kstrtoul(parser.buffer, 0, &val))
862 			break;
863 
864 		pid = (pid_t)val;
865 
866 		if (trace_pid_list_set(pid_list, pid) < 0) {
867 			ret = -1;
868 			break;
869 		}
870 		nr_pids++;
871 
872 		trace_parser_clear(&parser);
873 		ret = 0;
874 	}
875  out:
876 	trace_parser_put(&parser);
877 
878 	if (ret < 0) {
879 		trace_pid_list_free(pid_list);
880 		return ret;
881 	}
882 
883 	if (!nr_pids) {
884 		/* Cleared the list of pids */
885 		trace_pid_list_free(pid_list);
886 		pid_list = NULL;
887 	}
888 
889 	*new_pid_list = pid_list;
890 
891 	return read;
892 }
893 
buffer_ftrace_now(struct array_buffer * buf,int cpu)894 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
895 {
896 	u64 ts;
897 
898 	/* Early boot up does not have a buffer yet */
899 	if (!buf->buffer)
900 		return trace_clock_local();
901 
902 	ts = ring_buffer_time_stamp(buf->buffer);
903 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
904 
905 	return ts;
906 }
907 
ftrace_now(int cpu)908 u64 ftrace_now(int cpu)
909 {
910 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
911 }
912 
913 /**
914  * tracing_is_enabled - Show if global_trace has been enabled
915  *
916  * Shows if the global trace has been enabled or not. It uses the
917  * mirror flag "buffer_disabled" to be used in fast paths such as for
918  * the irqsoff tracer. But it may be inaccurate due to races. If you
919  * need to know the accurate state, use tracing_is_on() which is a little
920  * slower, but accurate.
921  */
tracing_is_enabled(void)922 int tracing_is_enabled(void)
923 {
924 	/*
925 	 * For quick access (irqsoff uses this in fast path), just
926 	 * return the mirror variable of the state of the ring buffer.
927 	 * It's a little racy, but we don't really care.
928 	 */
929 	return !global_trace.buffer_disabled;
930 }
931 
932 /*
933  * trace_buf_size is the size in bytes that is allocated
934  * for a buffer. Note, the number of bytes is always rounded
935  * to page size.
936  *
937  * This number is purposely set to a low number of 16384.
938  * If the dump on oops happens, it will be much appreciated
939  * to not have to wait for all that output. Anyway this can be
940  * boot time and run time configurable.
941  */
942 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
943 
944 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
945 
946 /* trace_types holds a link list of available tracers. */
947 static struct tracer		*trace_types __read_mostly;
948 
949 /*
950  * trace_types_lock is used to protect the trace_types list.
951  */
952 DEFINE_MUTEX(trace_types_lock);
953 
954 /*
955  * serialize the access of the ring buffer
956  *
957  * ring buffer serializes readers, but it is low level protection.
958  * The validity of the events (which returns by ring_buffer_peek() ..etc)
959  * are not protected by ring buffer.
960  *
961  * The content of events may become garbage if we allow other process consumes
962  * these events concurrently:
963  *   A) the page of the consumed events may become a normal page
964  *      (not reader page) in ring buffer, and this page will be rewritten
965  *      by events producer.
966  *   B) The page of the consumed events may become a page for splice_read,
967  *      and this page will be returned to system.
968  *
969  * These primitives allow multi process access to different cpu ring buffer
970  * concurrently.
971  *
972  * These primitives don't distinguish read-only and read-consume access.
973  * Multi read-only access are also serialized.
974  */
975 
976 #ifdef CONFIG_SMP
977 static DECLARE_RWSEM(all_cpu_access_lock);
978 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
979 
trace_access_lock(int cpu)980 static inline void trace_access_lock(int cpu)
981 {
982 	if (cpu == RING_BUFFER_ALL_CPUS) {
983 		/* gain it for accessing the whole ring buffer. */
984 		down_write(&all_cpu_access_lock);
985 	} else {
986 		/* gain it for accessing a cpu ring buffer. */
987 
988 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
989 		down_read(&all_cpu_access_lock);
990 
991 		/* Secondly block other access to this @cpu ring buffer. */
992 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
993 	}
994 }
995 
trace_access_unlock(int cpu)996 static inline void trace_access_unlock(int cpu)
997 {
998 	if (cpu == RING_BUFFER_ALL_CPUS) {
999 		up_write(&all_cpu_access_lock);
1000 	} else {
1001 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1002 		up_read(&all_cpu_access_lock);
1003 	}
1004 }
1005 
trace_access_lock_init(void)1006 static inline void trace_access_lock_init(void)
1007 {
1008 	int cpu;
1009 
1010 	for_each_possible_cpu(cpu)
1011 		mutex_init(&per_cpu(cpu_access_lock, cpu));
1012 }
1013 
1014 #else
1015 
1016 static DEFINE_MUTEX(access_lock);
1017 
trace_access_lock(int cpu)1018 static inline void trace_access_lock(int cpu)
1019 {
1020 	(void)cpu;
1021 	mutex_lock(&access_lock);
1022 }
1023 
trace_access_unlock(int cpu)1024 static inline void trace_access_unlock(int cpu)
1025 {
1026 	(void)cpu;
1027 	mutex_unlock(&access_lock);
1028 }
1029 
trace_access_lock_init(void)1030 static inline void trace_access_lock_init(void)
1031 {
1032 }
1033 
1034 #endif
1035 
1036 #ifdef CONFIG_STACKTRACE
1037 static void __ftrace_trace_stack(struct trace_array *tr,
1038 				 struct trace_buffer *buffer,
1039 				 unsigned int trace_ctx,
1040 				 int skip, struct pt_regs *regs);
1041 static inline void ftrace_trace_stack(struct trace_array *tr,
1042 				      struct trace_buffer *buffer,
1043 				      unsigned int trace_ctx,
1044 				      int skip, struct pt_regs *regs);
1045 
1046 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1047 static inline void __ftrace_trace_stack(struct trace_array *tr,
1048 					struct trace_buffer *buffer,
1049 					unsigned int trace_ctx,
1050 					int skip, struct pt_regs *regs)
1051 {
1052 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1053 static inline void ftrace_trace_stack(struct trace_array *tr,
1054 				      struct trace_buffer *buffer,
1055 				      unsigned long trace_ctx,
1056 				      int skip, struct pt_regs *regs)
1057 {
1058 }
1059 
1060 #endif
1061 
1062 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1063 trace_event_setup(struct ring_buffer_event *event,
1064 		  int type, unsigned int trace_ctx)
1065 {
1066 	struct trace_entry *ent = ring_buffer_event_data(event);
1067 
1068 	tracing_generic_entry_update(ent, type, trace_ctx);
1069 }
1070 
1071 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1072 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1073 			  int type,
1074 			  unsigned long len,
1075 			  unsigned int trace_ctx)
1076 {
1077 	struct ring_buffer_event *event;
1078 
1079 	event = ring_buffer_lock_reserve(buffer, len);
1080 	if (event != NULL)
1081 		trace_event_setup(event, type, trace_ctx);
1082 
1083 	return event;
1084 }
1085 
tracer_tracing_on(struct trace_array * tr)1086 void tracer_tracing_on(struct trace_array *tr)
1087 {
1088 	if (tr->array_buffer.buffer)
1089 		ring_buffer_record_on(tr->array_buffer.buffer);
1090 	/*
1091 	 * This flag is looked at when buffers haven't been allocated
1092 	 * yet, or by some tracers (like irqsoff), that just want to
1093 	 * know if the ring buffer has been disabled, but it can handle
1094 	 * races of where it gets disabled but we still do a record.
1095 	 * As the check is in the fast path of the tracers, it is more
1096 	 * important to be fast than accurate.
1097 	 */
1098 	tr->buffer_disabled = 0;
1099 }
1100 
1101 /**
1102  * tracing_on - enable tracing buffers
1103  *
1104  * This function enables tracing buffers that may have been
1105  * disabled with tracing_off.
1106  */
tracing_on(void)1107 void tracing_on(void)
1108 {
1109 	tracer_tracing_on(&global_trace);
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_on);
1112 
1113 
1114 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1115 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1116 {
1117 	__this_cpu_write(trace_taskinfo_save, true);
1118 
1119 	/* If this is the temp buffer, we need to commit fully */
1120 	if (this_cpu_read(trace_buffered_event) == event) {
1121 		/* Length is in event->array[0] */
1122 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1123 		/* Release the temp buffer */
1124 		this_cpu_dec(trace_buffered_event_cnt);
1125 		/* ring_buffer_unlock_commit() enables preemption */
1126 		preempt_enable_notrace();
1127 	} else
1128 		ring_buffer_unlock_commit(buffer);
1129 }
1130 
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1131 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1132 		       const char *str, int size)
1133 {
1134 	struct ring_buffer_event *event;
1135 	struct trace_buffer *buffer;
1136 	struct print_entry *entry;
1137 	unsigned int trace_ctx;
1138 	int alloc;
1139 
1140 	if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
1141 		return 0;
1142 
1143 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1144 		return 0;
1145 
1146 	if (unlikely(tracing_disabled))
1147 		return 0;
1148 
1149 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1150 
1151 	trace_ctx = tracing_gen_ctx();
1152 	buffer = tr->array_buffer.buffer;
1153 	guard(ring_buffer_nest)(buffer);
1154 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1155 					    trace_ctx);
1156 	if (!event)
1157 		return 0;
1158 
1159 	entry = ring_buffer_event_data(event);
1160 	entry->ip = ip;
1161 
1162 	memcpy(&entry->buf, str, size);
1163 
1164 	/* Add a newline if necessary */
1165 	if (entry->buf[size - 1] != '\n') {
1166 		entry->buf[size] = '\n';
1167 		entry->buf[size + 1] = '\0';
1168 	} else
1169 		entry->buf[size] = '\0';
1170 
1171 	__buffer_unlock_commit(buffer, event);
1172 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1173 	return size;
1174 }
1175 EXPORT_SYMBOL_GPL(__trace_array_puts);
1176 
1177 /**
1178  * __trace_puts - write a constant string into the trace buffer.
1179  * @ip:	   The address of the caller
1180  * @str:   The constant string to write
1181  * @size:  The size of the string.
1182  */
__trace_puts(unsigned long ip,const char * str,int size)1183 int __trace_puts(unsigned long ip, const char *str, int size)
1184 {
1185 	return __trace_array_puts(printk_trace, ip, str, size);
1186 }
1187 EXPORT_SYMBOL_GPL(__trace_puts);
1188 
1189 /**
1190  * __trace_bputs - write the pointer to a constant string into trace buffer
1191  * @ip:	   The address of the caller
1192  * @str:   The constant string to write to the buffer to
1193  */
__trace_bputs(unsigned long ip,const char * str)1194 int __trace_bputs(unsigned long ip, const char *str)
1195 {
1196 	struct trace_array *tr = READ_ONCE(printk_trace);
1197 	struct ring_buffer_event *event;
1198 	struct trace_buffer *buffer;
1199 	struct bputs_entry *entry;
1200 	unsigned int trace_ctx;
1201 	int size = sizeof(struct bputs_entry);
1202 
1203 	if (!printk_binsafe(tr))
1204 		return __trace_puts(ip, str, strlen(str));
1205 
1206 	if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
1207 		return 0;
1208 
1209 	if (unlikely(tracing_selftest_running || tracing_disabled))
1210 		return 0;
1211 
1212 	trace_ctx = tracing_gen_ctx();
1213 	buffer = tr->array_buffer.buffer;
1214 
1215 	guard(ring_buffer_nest)(buffer);
1216 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1217 					    trace_ctx);
1218 	if (!event)
1219 		return 0;
1220 
1221 	entry = ring_buffer_event_data(event);
1222 	entry->ip			= ip;
1223 	entry->str			= str;
1224 
1225 	__buffer_unlock_commit(buffer, event);
1226 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1227 
1228 	return 1;
1229 }
1230 EXPORT_SYMBOL_GPL(__trace_bputs);
1231 
1232 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1233 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1234 					   void *cond_data)
1235 {
1236 	struct tracer *tracer = tr->current_trace;
1237 	unsigned long flags;
1238 
1239 	if (in_nmi()) {
1240 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1241 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1242 		return;
1243 	}
1244 
1245 	if (!tr->allocated_snapshot) {
1246 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1247 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1248 		tracer_tracing_off(tr);
1249 		return;
1250 	}
1251 
1252 	/* Note, snapshot can not be used when the tracer uses it */
1253 	if (tracer->use_max_tr) {
1254 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1255 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1256 		return;
1257 	}
1258 
1259 	if (tr->mapped) {
1260 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1261 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1262 		return;
1263 	}
1264 
1265 	local_irq_save(flags);
1266 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1267 	local_irq_restore(flags);
1268 }
1269 
tracing_snapshot_instance(struct trace_array * tr)1270 void tracing_snapshot_instance(struct trace_array *tr)
1271 {
1272 	tracing_snapshot_instance_cond(tr, NULL);
1273 }
1274 
1275 /**
1276  * tracing_snapshot - take a snapshot of the current buffer.
1277  *
1278  * This causes a swap between the snapshot buffer and the current live
1279  * tracing buffer. You can use this to take snapshots of the live
1280  * trace when some condition is triggered, but continue to trace.
1281  *
1282  * Note, make sure to allocate the snapshot with either
1283  * a tracing_snapshot_alloc(), or by doing it manually
1284  * with: echo 1 > /sys/kernel/tracing/snapshot
1285  *
1286  * If the snapshot buffer is not allocated, it will stop tracing.
1287  * Basically making a permanent snapshot.
1288  */
tracing_snapshot(void)1289 void tracing_snapshot(void)
1290 {
1291 	struct trace_array *tr = &global_trace;
1292 
1293 	tracing_snapshot_instance(tr);
1294 }
1295 EXPORT_SYMBOL_GPL(tracing_snapshot);
1296 
1297 /**
1298  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1299  * @tr:		The tracing instance to snapshot
1300  * @cond_data:	The data to be tested conditionally, and possibly saved
1301  *
1302  * This is the same as tracing_snapshot() except that the snapshot is
1303  * conditional - the snapshot will only happen if the
1304  * cond_snapshot.update() implementation receiving the cond_data
1305  * returns true, which means that the trace array's cond_snapshot
1306  * update() operation used the cond_data to determine whether the
1307  * snapshot should be taken, and if it was, presumably saved it along
1308  * with the snapshot.
1309  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1310 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1311 {
1312 	tracing_snapshot_instance_cond(tr, cond_data);
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1315 
1316 /**
1317  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1318  * @tr:		The tracing instance
1319  *
1320  * When the user enables a conditional snapshot using
1321  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1322  * with the snapshot.  This accessor is used to retrieve it.
1323  *
1324  * Should not be called from cond_snapshot.update(), since it takes
1325  * the tr->max_lock lock, which the code calling
1326  * cond_snapshot.update() has already done.
1327  *
1328  * Returns the cond_data associated with the trace array's snapshot.
1329  */
tracing_cond_snapshot_data(struct trace_array * tr)1330 void *tracing_cond_snapshot_data(struct trace_array *tr)
1331 {
1332 	void *cond_data = NULL;
1333 
1334 	local_irq_disable();
1335 	arch_spin_lock(&tr->max_lock);
1336 
1337 	if (tr->cond_snapshot)
1338 		cond_data = tr->cond_snapshot->cond_data;
1339 
1340 	arch_spin_unlock(&tr->max_lock);
1341 	local_irq_enable();
1342 
1343 	return cond_data;
1344 }
1345 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1346 
1347 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1348 					struct array_buffer *size_buf, int cpu_id);
1349 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1350 
tracing_alloc_snapshot_instance(struct trace_array * tr)1351 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1352 {
1353 	int order;
1354 	int ret;
1355 
1356 	if (!tr->allocated_snapshot) {
1357 
1358 		/* Make the snapshot buffer have the same order as main buffer */
1359 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1360 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1361 		if (ret < 0)
1362 			return ret;
1363 
1364 		/* allocate spare buffer */
1365 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1366 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1367 		if (ret < 0)
1368 			return ret;
1369 
1370 		tr->allocated_snapshot = true;
1371 	}
1372 
1373 	return 0;
1374 }
1375 
free_snapshot(struct trace_array * tr)1376 static void free_snapshot(struct trace_array *tr)
1377 {
1378 	/*
1379 	 * We don't free the ring buffer. instead, resize it because
1380 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1381 	 * we want preserve it.
1382 	 */
1383 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1384 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1385 	set_buffer_entries(&tr->max_buffer, 1);
1386 	tracing_reset_online_cpus(&tr->max_buffer);
1387 	tr->allocated_snapshot = false;
1388 }
1389 
tracing_arm_snapshot_locked(struct trace_array * tr)1390 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1391 {
1392 	int ret;
1393 
1394 	lockdep_assert_held(&trace_types_lock);
1395 
1396 	spin_lock(&tr->snapshot_trigger_lock);
1397 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1398 		spin_unlock(&tr->snapshot_trigger_lock);
1399 		return -EBUSY;
1400 	}
1401 
1402 	tr->snapshot++;
1403 	spin_unlock(&tr->snapshot_trigger_lock);
1404 
1405 	ret = tracing_alloc_snapshot_instance(tr);
1406 	if (ret) {
1407 		spin_lock(&tr->snapshot_trigger_lock);
1408 		tr->snapshot--;
1409 		spin_unlock(&tr->snapshot_trigger_lock);
1410 	}
1411 
1412 	return ret;
1413 }
1414 
tracing_arm_snapshot(struct trace_array * tr)1415 int tracing_arm_snapshot(struct trace_array *tr)
1416 {
1417 	guard(mutex)(&trace_types_lock);
1418 	return tracing_arm_snapshot_locked(tr);
1419 }
1420 
tracing_disarm_snapshot(struct trace_array * tr)1421 void tracing_disarm_snapshot(struct trace_array *tr)
1422 {
1423 	spin_lock(&tr->snapshot_trigger_lock);
1424 	if (!WARN_ON(!tr->snapshot))
1425 		tr->snapshot--;
1426 	spin_unlock(&tr->snapshot_trigger_lock);
1427 }
1428 
1429 /**
1430  * tracing_alloc_snapshot - allocate snapshot buffer.
1431  *
1432  * This only allocates the snapshot buffer if it isn't already
1433  * allocated - it doesn't also take a snapshot.
1434  *
1435  * This is meant to be used in cases where the snapshot buffer needs
1436  * to be set up for events that can't sleep but need to be able to
1437  * trigger a snapshot.
1438  */
tracing_alloc_snapshot(void)1439 int tracing_alloc_snapshot(void)
1440 {
1441 	struct trace_array *tr = &global_trace;
1442 	int ret;
1443 
1444 	ret = tracing_alloc_snapshot_instance(tr);
1445 	WARN_ON(ret < 0);
1446 
1447 	return ret;
1448 }
1449 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1450 
1451 /**
1452  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1453  *
1454  * This is similar to tracing_snapshot(), but it will allocate the
1455  * snapshot buffer if it isn't already allocated. Use this only
1456  * where it is safe to sleep, as the allocation may sleep.
1457  *
1458  * This causes a swap between the snapshot buffer and the current live
1459  * tracing buffer. You can use this to take snapshots of the live
1460  * trace when some condition is triggered, but continue to trace.
1461  */
tracing_snapshot_alloc(void)1462 void tracing_snapshot_alloc(void)
1463 {
1464 	int ret;
1465 
1466 	ret = tracing_alloc_snapshot();
1467 	if (ret < 0)
1468 		return;
1469 
1470 	tracing_snapshot();
1471 }
1472 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1473 
1474 /**
1475  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1476  * @tr:		The tracing instance
1477  * @cond_data:	User data to associate with the snapshot
1478  * @update:	Implementation of the cond_snapshot update function
1479  *
1480  * Check whether the conditional snapshot for the given instance has
1481  * already been enabled, or if the current tracer is already using a
1482  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1483  * save the cond_data and update function inside.
1484  *
1485  * Returns 0 if successful, error otherwise.
1486  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1487 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1488 				 cond_update_fn_t update)
1489 {
1490 	struct cond_snapshot *cond_snapshot __free(kfree) =
1491 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1492 	int ret;
1493 
1494 	if (!cond_snapshot)
1495 		return -ENOMEM;
1496 
1497 	cond_snapshot->cond_data = cond_data;
1498 	cond_snapshot->update = update;
1499 
1500 	guard(mutex)(&trace_types_lock);
1501 
1502 	if (tr->current_trace->use_max_tr)
1503 		return -EBUSY;
1504 
1505 	/*
1506 	 * The cond_snapshot can only change to NULL without the
1507 	 * trace_types_lock. We don't care if we race with it going
1508 	 * to NULL, but we want to make sure that it's not set to
1509 	 * something other than NULL when we get here, which we can
1510 	 * do safely with only holding the trace_types_lock and not
1511 	 * having to take the max_lock.
1512 	 */
1513 	if (tr->cond_snapshot)
1514 		return -EBUSY;
1515 
1516 	ret = tracing_arm_snapshot_locked(tr);
1517 	if (ret)
1518 		return ret;
1519 
1520 	local_irq_disable();
1521 	arch_spin_lock(&tr->max_lock);
1522 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1523 	arch_spin_unlock(&tr->max_lock);
1524 	local_irq_enable();
1525 
1526 	return 0;
1527 }
1528 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1529 
1530 /**
1531  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1532  * @tr:		The tracing instance
1533  *
1534  * Check whether the conditional snapshot for the given instance is
1535  * enabled; if so, free the cond_snapshot associated with it,
1536  * otherwise return -EINVAL.
1537  *
1538  * Returns 0 if successful, error otherwise.
1539  */
tracing_snapshot_cond_disable(struct trace_array * tr)1540 int tracing_snapshot_cond_disable(struct trace_array *tr)
1541 {
1542 	int ret = 0;
1543 
1544 	local_irq_disable();
1545 	arch_spin_lock(&tr->max_lock);
1546 
1547 	if (!tr->cond_snapshot)
1548 		ret = -EINVAL;
1549 	else {
1550 		kfree(tr->cond_snapshot);
1551 		tr->cond_snapshot = NULL;
1552 	}
1553 
1554 	arch_spin_unlock(&tr->max_lock);
1555 	local_irq_enable();
1556 
1557 	tracing_disarm_snapshot(tr);
1558 
1559 	return ret;
1560 }
1561 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1562 #else
tracing_snapshot(void)1563 void tracing_snapshot(void)
1564 {
1565 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1566 }
1567 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1568 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1569 {
1570 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1571 }
1572 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1573 int tracing_alloc_snapshot(void)
1574 {
1575 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1576 	return -ENODEV;
1577 }
1578 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1579 void tracing_snapshot_alloc(void)
1580 {
1581 	/* Give warning */
1582 	tracing_snapshot();
1583 }
1584 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1585 void *tracing_cond_snapshot_data(struct trace_array *tr)
1586 {
1587 	return NULL;
1588 }
1589 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1590 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1591 {
1592 	return -ENODEV;
1593 }
1594 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1595 int tracing_snapshot_cond_disable(struct trace_array *tr)
1596 {
1597 	return false;
1598 }
1599 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1600 #define free_snapshot(tr)	do { } while (0)
1601 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1602 #endif /* CONFIG_TRACER_SNAPSHOT */
1603 
tracer_tracing_off(struct trace_array * tr)1604 void tracer_tracing_off(struct trace_array *tr)
1605 {
1606 	if (tr->array_buffer.buffer)
1607 		ring_buffer_record_off(tr->array_buffer.buffer);
1608 	/*
1609 	 * This flag is looked at when buffers haven't been allocated
1610 	 * yet, or by some tracers (like irqsoff), that just want to
1611 	 * know if the ring buffer has been disabled, but it can handle
1612 	 * races of where it gets disabled but we still do a record.
1613 	 * As the check is in the fast path of the tracers, it is more
1614 	 * important to be fast than accurate.
1615 	 */
1616 	tr->buffer_disabled = 1;
1617 }
1618 
1619 /**
1620  * tracer_tracing_disable() - temporary disable the buffer from write
1621  * @tr: The trace array to disable its buffer for
1622  *
1623  * Expects trace_tracing_enable() to re-enable tracing.
1624  * The difference between this and tracer_tracing_off() is that this
1625  * is a counter and can nest, whereas, tracer_tracing_off() can
1626  * be called multiple times and a single trace_tracing_on() will
1627  * enable it.
1628  */
tracer_tracing_disable(struct trace_array * tr)1629 void tracer_tracing_disable(struct trace_array *tr)
1630 {
1631 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1632 		return;
1633 
1634 	ring_buffer_record_disable(tr->array_buffer.buffer);
1635 }
1636 
1637 /**
1638  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1639  * @tr: The trace array that had tracer_tracincg_disable() called on it
1640  *
1641  * This is called after tracer_tracing_disable() has been called on @tr,
1642  * when it's safe to re-enable tracing.
1643  */
tracer_tracing_enable(struct trace_array * tr)1644 void tracer_tracing_enable(struct trace_array *tr)
1645 {
1646 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1647 		return;
1648 
1649 	ring_buffer_record_enable(tr->array_buffer.buffer);
1650 }
1651 
1652 /**
1653  * tracing_off - turn off tracing buffers
1654  *
1655  * This function stops the tracing buffers from recording data.
1656  * It does not disable any overhead the tracers themselves may
1657  * be causing. This function simply causes all recording to
1658  * the ring buffers to fail.
1659  */
tracing_off(void)1660 void tracing_off(void)
1661 {
1662 	tracer_tracing_off(&global_trace);
1663 }
1664 EXPORT_SYMBOL_GPL(tracing_off);
1665 
disable_trace_on_warning(void)1666 void disable_trace_on_warning(void)
1667 {
1668 	if (__disable_trace_on_warning) {
1669 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1670 			"Disabling tracing due to warning\n");
1671 		tracing_off();
1672 	}
1673 }
1674 
1675 /**
1676  * tracer_tracing_is_on - show real state of ring buffer enabled
1677  * @tr : the trace array to know if ring buffer is enabled
1678  *
1679  * Shows real state of the ring buffer if it is enabled or not.
1680  */
tracer_tracing_is_on(struct trace_array * tr)1681 bool tracer_tracing_is_on(struct trace_array *tr)
1682 {
1683 	if (tr->array_buffer.buffer)
1684 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1685 	return !tr->buffer_disabled;
1686 }
1687 
1688 /**
1689  * tracing_is_on - show state of ring buffers enabled
1690  */
tracing_is_on(void)1691 int tracing_is_on(void)
1692 {
1693 	return tracer_tracing_is_on(&global_trace);
1694 }
1695 EXPORT_SYMBOL_GPL(tracing_is_on);
1696 
set_buf_size(char * str)1697 static int __init set_buf_size(char *str)
1698 {
1699 	unsigned long buf_size;
1700 
1701 	if (!str)
1702 		return 0;
1703 	buf_size = memparse(str, &str);
1704 	/*
1705 	 * nr_entries can not be zero and the startup
1706 	 * tests require some buffer space. Therefore
1707 	 * ensure we have at least 4096 bytes of buffer.
1708 	 */
1709 	trace_buf_size = max(4096UL, buf_size);
1710 	return 1;
1711 }
1712 __setup("trace_buf_size=", set_buf_size);
1713 
set_tracing_thresh(char * str)1714 static int __init set_tracing_thresh(char *str)
1715 {
1716 	unsigned long threshold;
1717 	int ret;
1718 
1719 	if (!str)
1720 		return 0;
1721 	ret = kstrtoul(str, 0, &threshold);
1722 	if (ret < 0)
1723 		return 0;
1724 	tracing_thresh = threshold * 1000;
1725 	return 1;
1726 }
1727 __setup("tracing_thresh=", set_tracing_thresh);
1728 
nsecs_to_usecs(unsigned long nsecs)1729 unsigned long nsecs_to_usecs(unsigned long nsecs)
1730 {
1731 	return nsecs / 1000;
1732 }
1733 
1734 /*
1735  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1736  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1737  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1738  * of strings in the order that the evals (enum) were defined.
1739  */
1740 #undef C
1741 #define C(a, b) b
1742 
1743 /* These must match the bit positions in trace_iterator_flags */
1744 static const char *trace_options[] = {
1745 	TRACE_FLAGS
1746 	NULL
1747 };
1748 
1749 static struct {
1750 	u64 (*func)(void);
1751 	const char *name;
1752 	int in_ns;		/* is this clock in nanoseconds? */
1753 } trace_clocks[] = {
1754 	{ trace_clock_local,		"local",	1 },
1755 	{ trace_clock_global,		"global",	1 },
1756 	{ trace_clock_counter,		"counter",	0 },
1757 	{ trace_clock_jiffies,		"uptime",	0 },
1758 	{ trace_clock,			"perf",		1 },
1759 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1760 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1761 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1762 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1763 	ARCH_TRACE_CLOCKS
1764 };
1765 
trace_clock_in_ns(struct trace_array * tr)1766 bool trace_clock_in_ns(struct trace_array *tr)
1767 {
1768 	if (trace_clocks[tr->clock_id].in_ns)
1769 		return true;
1770 
1771 	return false;
1772 }
1773 
1774 /*
1775  * trace_parser_get_init - gets the buffer for trace parser
1776  */
trace_parser_get_init(struct trace_parser * parser,int size)1777 int trace_parser_get_init(struct trace_parser *parser, int size)
1778 {
1779 	memset(parser, 0, sizeof(*parser));
1780 
1781 	parser->buffer = kmalloc(size, GFP_KERNEL);
1782 	if (!parser->buffer)
1783 		return 1;
1784 
1785 	parser->size = size;
1786 	return 0;
1787 }
1788 
1789 /*
1790  * trace_parser_put - frees the buffer for trace parser
1791  */
trace_parser_put(struct trace_parser * parser)1792 void trace_parser_put(struct trace_parser *parser)
1793 {
1794 	kfree(parser->buffer);
1795 	parser->buffer = NULL;
1796 }
1797 
1798 /*
1799  * trace_get_user - reads the user input string separated by  space
1800  * (matched by isspace(ch))
1801  *
1802  * For each string found the 'struct trace_parser' is updated,
1803  * and the function returns.
1804  *
1805  * Returns number of bytes read.
1806  *
1807  * See kernel/trace/trace.h for 'struct trace_parser' details.
1808  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1809 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1810 	size_t cnt, loff_t *ppos)
1811 {
1812 	char ch;
1813 	size_t read = 0;
1814 	ssize_t ret;
1815 
1816 	if (!*ppos)
1817 		trace_parser_clear(parser);
1818 
1819 	ret = get_user(ch, ubuf++);
1820 	if (ret)
1821 		goto fail;
1822 
1823 	read++;
1824 	cnt--;
1825 
1826 	/*
1827 	 * The parser is not finished with the last write,
1828 	 * continue reading the user input without skipping spaces.
1829 	 */
1830 	if (!parser->cont) {
1831 		/* skip white space */
1832 		while (cnt && isspace(ch)) {
1833 			ret = get_user(ch, ubuf++);
1834 			if (ret)
1835 				goto fail;
1836 			read++;
1837 			cnt--;
1838 		}
1839 
1840 		parser->idx = 0;
1841 
1842 		/* only spaces were written */
1843 		if (isspace(ch) || !ch) {
1844 			*ppos += read;
1845 			return read;
1846 		}
1847 	}
1848 
1849 	/* read the non-space input */
1850 	while (cnt && !isspace(ch) && ch) {
1851 		if (parser->idx < parser->size - 1)
1852 			parser->buffer[parser->idx++] = ch;
1853 		else {
1854 			ret = -EINVAL;
1855 			goto fail;
1856 		}
1857 
1858 		ret = get_user(ch, ubuf++);
1859 		if (ret)
1860 			goto fail;
1861 		read++;
1862 		cnt--;
1863 	}
1864 
1865 	/* We either got finished input or we have to wait for another call. */
1866 	if (isspace(ch) || !ch) {
1867 		parser->buffer[parser->idx] = 0;
1868 		parser->cont = false;
1869 	} else if (parser->idx < parser->size - 1) {
1870 		parser->cont = true;
1871 		parser->buffer[parser->idx++] = ch;
1872 		/* Make sure the parsed string always terminates with '\0'. */
1873 		parser->buffer[parser->idx] = 0;
1874 	} else {
1875 		ret = -EINVAL;
1876 		goto fail;
1877 	}
1878 
1879 	*ppos += read;
1880 	return read;
1881 fail:
1882 	trace_parser_fail(parser);
1883 	return ret;
1884 }
1885 
1886 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1887 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1888 {
1889 	int len;
1890 
1891 	if (trace_seq_used(s) <= s->readpos)
1892 		return -EBUSY;
1893 
1894 	len = trace_seq_used(s) - s->readpos;
1895 	if (cnt > len)
1896 		cnt = len;
1897 	memcpy(buf, s->buffer + s->readpos, cnt);
1898 
1899 	s->readpos += cnt;
1900 	return cnt;
1901 }
1902 
1903 unsigned long __read_mostly	tracing_thresh;
1904 
1905 #ifdef CONFIG_TRACER_MAX_TRACE
1906 static const struct file_operations tracing_max_lat_fops;
1907 
1908 #ifdef LATENCY_FS_NOTIFY
1909 
1910 static struct workqueue_struct *fsnotify_wq;
1911 
latency_fsnotify_workfn(struct work_struct * work)1912 static void latency_fsnotify_workfn(struct work_struct *work)
1913 {
1914 	struct trace_array *tr = container_of(work, struct trace_array,
1915 					      fsnotify_work);
1916 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1917 }
1918 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1919 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1920 {
1921 	struct trace_array *tr = container_of(iwork, struct trace_array,
1922 					      fsnotify_irqwork);
1923 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1924 }
1925 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1926 static void trace_create_maxlat_file(struct trace_array *tr,
1927 				     struct dentry *d_tracer)
1928 {
1929 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1930 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1931 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1932 					      TRACE_MODE_WRITE,
1933 					      d_tracer, tr,
1934 					      &tracing_max_lat_fops);
1935 }
1936 
latency_fsnotify_init(void)1937 __init static int latency_fsnotify_init(void)
1938 {
1939 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1940 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1941 	if (!fsnotify_wq) {
1942 		pr_err("Unable to allocate tr_max_lat_wq\n");
1943 		return -ENOMEM;
1944 	}
1945 	return 0;
1946 }
1947 
1948 late_initcall_sync(latency_fsnotify_init);
1949 
latency_fsnotify(struct trace_array * tr)1950 void latency_fsnotify(struct trace_array *tr)
1951 {
1952 	if (!fsnotify_wq)
1953 		return;
1954 	/*
1955 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1956 	 * possible that we are called from __schedule() or do_idle(), which
1957 	 * could cause a deadlock.
1958 	 */
1959 	irq_work_queue(&tr->fsnotify_irqwork);
1960 }
1961 
1962 #else /* !LATENCY_FS_NOTIFY */
1963 
1964 #define trace_create_maxlat_file(tr, d_tracer)				\
1965 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1966 			  d_tracer, tr, &tracing_max_lat_fops)
1967 
1968 #endif
1969 
1970 /*
1971  * Copy the new maximum trace into the separate maximum-trace
1972  * structure. (this way the maximum trace is permanently saved,
1973  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1974  */
1975 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1976 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1977 {
1978 	struct array_buffer *trace_buf = &tr->array_buffer;
1979 	struct array_buffer *max_buf = &tr->max_buffer;
1980 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1981 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1982 
1983 	max_buf->cpu = cpu;
1984 	max_buf->time_start = data->preempt_timestamp;
1985 
1986 	max_data->saved_latency = tr->max_latency;
1987 	max_data->critical_start = data->critical_start;
1988 	max_data->critical_end = data->critical_end;
1989 
1990 	strscpy(max_data->comm, tsk->comm);
1991 	max_data->pid = tsk->pid;
1992 	/*
1993 	 * If tsk == current, then use current_uid(), as that does not use
1994 	 * RCU. The irq tracer can be called out of RCU scope.
1995 	 */
1996 	if (tsk == current)
1997 		max_data->uid = current_uid();
1998 	else
1999 		max_data->uid = task_uid(tsk);
2000 
2001 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2002 	max_data->policy = tsk->policy;
2003 	max_data->rt_priority = tsk->rt_priority;
2004 
2005 	/* record this tasks comm */
2006 	tracing_record_cmdline(tsk);
2007 	latency_fsnotify(tr);
2008 }
2009 
2010 /**
2011  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2012  * @tr: tracer
2013  * @tsk: the task with the latency
2014  * @cpu: The cpu that initiated the trace.
2015  * @cond_data: User data associated with a conditional snapshot
2016  *
2017  * Flip the buffers between the @tr and the max_tr and record information
2018  * about which task was the cause of this latency.
2019  */
2020 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)2021 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2022 	      void *cond_data)
2023 {
2024 	if (tr->stop_count)
2025 		return;
2026 
2027 	WARN_ON_ONCE(!irqs_disabled());
2028 
2029 	if (!tr->allocated_snapshot) {
2030 		/* Only the nop tracer should hit this when disabling */
2031 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2032 		return;
2033 	}
2034 
2035 	arch_spin_lock(&tr->max_lock);
2036 
2037 	/* Inherit the recordable setting from array_buffer */
2038 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2039 		ring_buffer_record_on(tr->max_buffer.buffer);
2040 	else
2041 		ring_buffer_record_off(tr->max_buffer.buffer);
2042 
2043 #ifdef CONFIG_TRACER_SNAPSHOT
2044 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2045 		arch_spin_unlock(&tr->max_lock);
2046 		return;
2047 	}
2048 #endif
2049 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2050 
2051 	__update_max_tr(tr, tsk, cpu);
2052 
2053 	arch_spin_unlock(&tr->max_lock);
2054 
2055 	/* Any waiters on the old snapshot buffer need to wake up */
2056 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2057 }
2058 
2059 /**
2060  * update_max_tr_single - only copy one trace over, and reset the rest
2061  * @tr: tracer
2062  * @tsk: task with the latency
2063  * @cpu: the cpu of the buffer to copy.
2064  *
2065  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2066  */
2067 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2068 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2069 {
2070 	int ret;
2071 
2072 	if (tr->stop_count)
2073 		return;
2074 
2075 	WARN_ON_ONCE(!irqs_disabled());
2076 	if (!tr->allocated_snapshot) {
2077 		/* Only the nop tracer should hit this when disabling */
2078 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2079 		return;
2080 	}
2081 
2082 	arch_spin_lock(&tr->max_lock);
2083 
2084 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2085 
2086 	if (ret == -EBUSY) {
2087 		/*
2088 		 * We failed to swap the buffer due to a commit taking
2089 		 * place on this CPU. We fail to record, but we reset
2090 		 * the max trace buffer (no one writes directly to it)
2091 		 * and flag that it failed.
2092 		 * Another reason is resize is in progress.
2093 		 */
2094 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2095 			"Failed to swap buffers due to commit or resize in progress\n");
2096 	}
2097 
2098 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2099 
2100 	__update_max_tr(tr, tsk, cpu);
2101 	arch_spin_unlock(&tr->max_lock);
2102 }
2103 
2104 #endif /* CONFIG_TRACER_MAX_TRACE */
2105 
2106 struct pipe_wait {
2107 	struct trace_iterator		*iter;
2108 	int				wait_index;
2109 };
2110 
wait_pipe_cond(void * data)2111 static bool wait_pipe_cond(void *data)
2112 {
2113 	struct pipe_wait *pwait = data;
2114 	struct trace_iterator *iter = pwait->iter;
2115 
2116 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2117 		return true;
2118 
2119 	return iter->closed;
2120 }
2121 
wait_on_pipe(struct trace_iterator * iter,int full)2122 static int wait_on_pipe(struct trace_iterator *iter, int full)
2123 {
2124 	struct pipe_wait pwait;
2125 	int ret;
2126 
2127 	/* Iterators are static, they should be filled or empty */
2128 	if (trace_buffer_iter(iter, iter->cpu_file))
2129 		return 0;
2130 
2131 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2132 	pwait.iter = iter;
2133 
2134 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2135 			       wait_pipe_cond, &pwait);
2136 
2137 #ifdef CONFIG_TRACER_MAX_TRACE
2138 	/*
2139 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2140 	 * to happen, this would now be the main buffer.
2141 	 */
2142 	if (iter->snapshot)
2143 		iter->array_buffer = &iter->tr->max_buffer;
2144 #endif
2145 	return ret;
2146 }
2147 
2148 #ifdef CONFIG_FTRACE_STARTUP_TEST
2149 static bool selftests_can_run;
2150 
2151 struct trace_selftests {
2152 	struct list_head		list;
2153 	struct tracer			*type;
2154 };
2155 
2156 static LIST_HEAD(postponed_selftests);
2157 
save_selftest(struct tracer * type)2158 static int save_selftest(struct tracer *type)
2159 {
2160 	struct trace_selftests *selftest;
2161 
2162 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2163 	if (!selftest)
2164 		return -ENOMEM;
2165 
2166 	selftest->type = type;
2167 	list_add(&selftest->list, &postponed_selftests);
2168 	return 0;
2169 }
2170 
run_tracer_selftest(struct tracer * type)2171 static int run_tracer_selftest(struct tracer *type)
2172 {
2173 	struct trace_array *tr = &global_trace;
2174 	struct tracer_flags *saved_flags = tr->current_trace_flags;
2175 	struct tracer *saved_tracer = tr->current_trace;
2176 	int ret;
2177 
2178 	if (!type->selftest || tracing_selftest_disabled)
2179 		return 0;
2180 
2181 	/*
2182 	 * If a tracer registers early in boot up (before scheduling is
2183 	 * initialized and such), then do not run its selftests yet.
2184 	 * Instead, run it a little later in the boot process.
2185 	 */
2186 	if (!selftests_can_run)
2187 		return save_selftest(type);
2188 
2189 	if (!tracing_is_on()) {
2190 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2191 			type->name);
2192 		return 0;
2193 	}
2194 
2195 	/*
2196 	 * Run a selftest on this tracer.
2197 	 * Here we reset the trace buffer, and set the current
2198 	 * tracer to be this tracer. The tracer can then run some
2199 	 * internal tracing to verify that everything is in order.
2200 	 * If we fail, we do not register this tracer.
2201 	 */
2202 	tracing_reset_online_cpus(&tr->array_buffer);
2203 
2204 	tr->current_trace = type;
2205 	tr->current_trace_flags = type->flags ? : type->default_flags;
2206 
2207 #ifdef CONFIG_TRACER_MAX_TRACE
2208 	if (type->use_max_tr) {
2209 		/* If we expanded the buffers, make sure the max is expanded too */
2210 		if (tr->ring_buffer_expanded)
2211 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2212 					   RING_BUFFER_ALL_CPUS);
2213 		tr->allocated_snapshot = true;
2214 	}
2215 #endif
2216 
2217 	/* the test is responsible for initializing and enabling */
2218 	pr_info("Testing tracer %s: ", type->name);
2219 	ret = type->selftest(type, tr);
2220 	/* the test is responsible for resetting too */
2221 	tr->current_trace = saved_tracer;
2222 	tr->current_trace_flags = saved_flags;
2223 	if (ret) {
2224 		printk(KERN_CONT "FAILED!\n");
2225 		/* Add the warning after printing 'FAILED' */
2226 		WARN_ON(1);
2227 		return -1;
2228 	}
2229 	/* Only reset on passing, to avoid touching corrupted buffers */
2230 	tracing_reset_online_cpus(&tr->array_buffer);
2231 
2232 #ifdef CONFIG_TRACER_MAX_TRACE
2233 	if (type->use_max_tr) {
2234 		tr->allocated_snapshot = false;
2235 
2236 		/* Shrink the max buffer again */
2237 		if (tr->ring_buffer_expanded)
2238 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2239 					   RING_BUFFER_ALL_CPUS);
2240 	}
2241 #endif
2242 
2243 	printk(KERN_CONT "PASSED\n");
2244 	return 0;
2245 }
2246 
do_run_tracer_selftest(struct tracer * type)2247 static int do_run_tracer_selftest(struct tracer *type)
2248 {
2249 	int ret;
2250 
2251 	/*
2252 	 * Tests can take a long time, especially if they are run one after the
2253 	 * other, as does happen during bootup when all the tracers are
2254 	 * registered. This could cause the soft lockup watchdog to trigger.
2255 	 */
2256 	cond_resched();
2257 
2258 	tracing_selftest_running = true;
2259 	ret = run_tracer_selftest(type);
2260 	tracing_selftest_running = false;
2261 
2262 	return ret;
2263 }
2264 
init_trace_selftests(void)2265 static __init int init_trace_selftests(void)
2266 {
2267 	struct trace_selftests *p, *n;
2268 	struct tracer *t, **last;
2269 	int ret;
2270 
2271 	selftests_can_run = true;
2272 
2273 	guard(mutex)(&trace_types_lock);
2274 
2275 	if (list_empty(&postponed_selftests))
2276 		return 0;
2277 
2278 	pr_info("Running postponed tracer tests:\n");
2279 
2280 	tracing_selftest_running = true;
2281 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2282 		/* This loop can take minutes when sanitizers are enabled, so
2283 		 * lets make sure we allow RCU processing.
2284 		 */
2285 		cond_resched();
2286 		ret = run_tracer_selftest(p->type);
2287 		/* If the test fails, then warn and remove from available_tracers */
2288 		if (ret < 0) {
2289 			WARN(1, "tracer: %s failed selftest, disabling\n",
2290 			     p->type->name);
2291 			last = &trace_types;
2292 			for (t = trace_types; t; t = t->next) {
2293 				if (t == p->type) {
2294 					*last = t->next;
2295 					break;
2296 				}
2297 				last = &t->next;
2298 			}
2299 		}
2300 		list_del(&p->list);
2301 		kfree(p);
2302 	}
2303 	tracing_selftest_running = false;
2304 
2305 	return 0;
2306 }
2307 core_initcall(init_trace_selftests);
2308 #else
do_run_tracer_selftest(struct tracer * type)2309 static inline int do_run_tracer_selftest(struct tracer *type)
2310 {
2311 	return 0;
2312 }
2313 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2314 
2315 static int add_tracer(struct trace_array *tr, struct tracer *t);
2316 
2317 static void __init apply_trace_boot_options(void);
2318 
free_tracers(struct trace_array * tr)2319 static void free_tracers(struct trace_array *tr)
2320 {
2321 	struct tracers *t, *n;
2322 
2323 	lockdep_assert_held(&trace_types_lock);
2324 
2325 	list_for_each_entry_safe(t, n, &tr->tracers, list) {
2326 		list_del(&t->list);
2327 		kfree(t->flags);
2328 		kfree(t);
2329 	}
2330 }
2331 
2332 /**
2333  * register_tracer - register a tracer with the ftrace system.
2334  * @type: the plugin for the tracer
2335  *
2336  * Register a new plugin tracer.
2337  */
register_tracer(struct tracer * type)2338 int __init register_tracer(struct tracer *type)
2339 {
2340 	struct trace_array *tr;
2341 	struct tracer *t;
2342 	int ret = 0;
2343 
2344 	if (!type->name) {
2345 		pr_info("Tracer must have a name\n");
2346 		return -1;
2347 	}
2348 
2349 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2350 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2351 		return -1;
2352 	}
2353 
2354 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2355 		pr_warn("Can not register tracer %s due to lockdown\n",
2356 			   type->name);
2357 		return -EPERM;
2358 	}
2359 
2360 	mutex_lock(&trace_types_lock);
2361 
2362 	for (t = trace_types; t; t = t->next) {
2363 		if (strcmp(type->name, t->name) == 0) {
2364 			/* already found */
2365 			pr_info("Tracer %s already registered\n",
2366 				type->name);
2367 			ret = -1;
2368 			goto out;
2369 		}
2370 	}
2371 
2372 	/* store the tracer for __set_tracer_option */
2373 	if (type->flags)
2374 		type->flags->trace = type;
2375 
2376 	ret = do_run_tracer_selftest(type);
2377 	if (ret < 0)
2378 		goto out;
2379 
2380 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2381 		ret = add_tracer(tr, type);
2382 		if (ret < 0) {
2383 			/* The tracer will still exist but without options */
2384 			pr_warn("Failed to create tracer options for %s\n", type->name);
2385 			break;
2386 		}
2387 	}
2388 
2389 	type->next = trace_types;
2390 	trace_types = type;
2391 
2392  out:
2393 	mutex_unlock(&trace_types_lock);
2394 
2395 	if (ret || !default_bootup_tracer)
2396 		return ret;
2397 
2398 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2399 		return 0;
2400 
2401 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2402 	/* Do we want this tracer to start on bootup? */
2403 	WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
2404 	default_bootup_tracer = NULL;
2405 
2406 	apply_trace_boot_options();
2407 
2408 	/* disable other selftests, since this will break it. */
2409 	disable_tracing_selftest("running a tracer");
2410 
2411 	return 0;
2412 }
2413 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2414 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2415 {
2416 	struct trace_buffer *buffer = buf->buffer;
2417 
2418 	if (!buffer)
2419 		return;
2420 
2421 	ring_buffer_record_disable(buffer);
2422 
2423 	/* Make sure all commits have finished */
2424 	synchronize_rcu();
2425 	ring_buffer_reset_cpu(buffer, cpu);
2426 
2427 	ring_buffer_record_enable(buffer);
2428 }
2429 
tracing_reset_online_cpus(struct array_buffer * buf)2430 void tracing_reset_online_cpus(struct array_buffer *buf)
2431 {
2432 	struct trace_buffer *buffer = buf->buffer;
2433 
2434 	if (!buffer)
2435 		return;
2436 
2437 	ring_buffer_record_disable(buffer);
2438 
2439 	/* Make sure all commits have finished */
2440 	synchronize_rcu();
2441 
2442 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2443 
2444 	ring_buffer_reset_online_cpus(buffer);
2445 
2446 	ring_buffer_record_enable(buffer);
2447 }
2448 
tracing_reset_all_cpus(struct array_buffer * buf)2449 static void tracing_reset_all_cpus(struct array_buffer *buf)
2450 {
2451 	struct trace_buffer *buffer = buf->buffer;
2452 
2453 	if (!buffer)
2454 		return;
2455 
2456 	ring_buffer_record_disable(buffer);
2457 
2458 	/* Make sure all commits have finished */
2459 	synchronize_rcu();
2460 
2461 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2462 
2463 	ring_buffer_reset(buffer);
2464 
2465 	ring_buffer_record_enable(buffer);
2466 }
2467 
2468 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2469 void tracing_reset_all_online_cpus_unlocked(void)
2470 {
2471 	struct trace_array *tr;
2472 
2473 	lockdep_assert_held(&trace_types_lock);
2474 
2475 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2476 		if (!tr->clear_trace)
2477 			continue;
2478 		tr->clear_trace = false;
2479 		tracing_reset_online_cpus(&tr->array_buffer);
2480 #ifdef CONFIG_TRACER_MAX_TRACE
2481 		tracing_reset_online_cpus(&tr->max_buffer);
2482 #endif
2483 	}
2484 }
2485 
tracing_reset_all_online_cpus(void)2486 void tracing_reset_all_online_cpus(void)
2487 {
2488 	guard(mutex)(&trace_types_lock);
2489 	tracing_reset_all_online_cpus_unlocked();
2490 }
2491 
is_tracing_stopped(void)2492 int is_tracing_stopped(void)
2493 {
2494 	return global_trace.stop_count;
2495 }
2496 
tracing_start_tr(struct trace_array * tr)2497 static void tracing_start_tr(struct trace_array *tr)
2498 {
2499 	struct trace_buffer *buffer;
2500 
2501 	if (tracing_disabled)
2502 		return;
2503 
2504 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2505 	if (--tr->stop_count) {
2506 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2507 			/* Someone screwed up their debugging */
2508 			tr->stop_count = 0;
2509 		}
2510 		return;
2511 	}
2512 
2513 	/* Prevent the buffers from switching */
2514 	arch_spin_lock(&tr->max_lock);
2515 
2516 	buffer = tr->array_buffer.buffer;
2517 	if (buffer)
2518 		ring_buffer_record_enable(buffer);
2519 
2520 #ifdef CONFIG_TRACER_MAX_TRACE
2521 	buffer = tr->max_buffer.buffer;
2522 	if (buffer)
2523 		ring_buffer_record_enable(buffer);
2524 #endif
2525 
2526 	arch_spin_unlock(&tr->max_lock);
2527 }
2528 
2529 /**
2530  * tracing_start - quick start of the tracer
2531  *
2532  * If tracing is enabled but was stopped by tracing_stop,
2533  * this will start the tracer back up.
2534  */
tracing_start(void)2535 void tracing_start(void)
2536 
2537 {
2538 	return tracing_start_tr(&global_trace);
2539 }
2540 
tracing_stop_tr(struct trace_array * tr)2541 static void tracing_stop_tr(struct trace_array *tr)
2542 {
2543 	struct trace_buffer *buffer;
2544 
2545 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2546 	if (tr->stop_count++)
2547 		return;
2548 
2549 	/* Prevent the buffers from switching */
2550 	arch_spin_lock(&tr->max_lock);
2551 
2552 	buffer = tr->array_buffer.buffer;
2553 	if (buffer)
2554 		ring_buffer_record_disable(buffer);
2555 
2556 #ifdef CONFIG_TRACER_MAX_TRACE
2557 	buffer = tr->max_buffer.buffer;
2558 	if (buffer)
2559 		ring_buffer_record_disable(buffer);
2560 #endif
2561 
2562 	arch_spin_unlock(&tr->max_lock);
2563 }
2564 
2565 /**
2566  * tracing_stop - quick stop of the tracer
2567  *
2568  * Light weight way to stop tracing. Use in conjunction with
2569  * tracing_start.
2570  */
tracing_stop(void)2571 void tracing_stop(void)
2572 {
2573 	return tracing_stop_tr(&global_trace);
2574 }
2575 
2576 /*
2577  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2578  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2579  * simplifies those functions and keeps them in sync.
2580  */
trace_handle_return(struct trace_seq * s)2581 enum print_line_t trace_handle_return(struct trace_seq *s)
2582 {
2583 	return trace_seq_has_overflowed(s) ?
2584 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2585 }
2586 EXPORT_SYMBOL_GPL(trace_handle_return);
2587 
migration_disable_value(void)2588 static unsigned short migration_disable_value(void)
2589 {
2590 #if defined(CONFIG_SMP)
2591 	return current->migration_disabled;
2592 #else
2593 	return 0;
2594 #endif
2595 }
2596 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2597 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2598 {
2599 	unsigned int trace_flags = irqs_status;
2600 	unsigned int pc;
2601 
2602 	pc = preempt_count();
2603 
2604 	if (pc & NMI_MASK)
2605 		trace_flags |= TRACE_FLAG_NMI;
2606 	if (pc & HARDIRQ_MASK)
2607 		trace_flags |= TRACE_FLAG_HARDIRQ;
2608 	if (in_serving_softirq())
2609 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2610 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2611 		trace_flags |= TRACE_FLAG_BH_OFF;
2612 
2613 	if (tif_need_resched())
2614 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2615 	if (test_preempt_need_resched())
2616 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2617 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2618 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2619 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2620 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2621 }
2622 
2623 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2624 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2625 			  int type,
2626 			  unsigned long len,
2627 			  unsigned int trace_ctx)
2628 {
2629 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2630 }
2631 
2632 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2633 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2634 static int trace_buffered_event_ref;
2635 
2636 /**
2637  * trace_buffered_event_enable - enable buffering events
2638  *
2639  * When events are being filtered, it is quicker to use a temporary
2640  * buffer to write the event data into if there's a likely chance
2641  * that it will not be committed. The discard of the ring buffer
2642  * is not as fast as committing, and is much slower than copying
2643  * a commit.
2644  *
2645  * When an event is to be filtered, allocate per cpu buffers to
2646  * write the event data into, and if the event is filtered and discarded
2647  * it is simply dropped, otherwise, the entire data is to be committed
2648  * in one shot.
2649  */
trace_buffered_event_enable(void)2650 void trace_buffered_event_enable(void)
2651 {
2652 	struct ring_buffer_event *event;
2653 	struct page *page;
2654 	int cpu;
2655 
2656 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2657 
2658 	if (trace_buffered_event_ref++)
2659 		return;
2660 
2661 	for_each_tracing_cpu(cpu) {
2662 		page = alloc_pages_node(cpu_to_node(cpu),
2663 					GFP_KERNEL | __GFP_NORETRY, 0);
2664 		/* This is just an optimization and can handle failures */
2665 		if (!page) {
2666 			pr_err("Failed to allocate event buffer\n");
2667 			break;
2668 		}
2669 
2670 		event = page_address(page);
2671 		memset(event, 0, sizeof(*event));
2672 
2673 		per_cpu(trace_buffered_event, cpu) = event;
2674 
2675 		scoped_guard(preempt,) {
2676 			if (cpu == smp_processor_id() &&
2677 			    __this_cpu_read(trace_buffered_event) !=
2678 			    per_cpu(trace_buffered_event, cpu))
2679 				WARN_ON_ONCE(1);
2680 		}
2681 	}
2682 }
2683 
enable_trace_buffered_event(void * data)2684 static void enable_trace_buffered_event(void *data)
2685 {
2686 	this_cpu_dec(trace_buffered_event_cnt);
2687 }
2688 
disable_trace_buffered_event(void * data)2689 static void disable_trace_buffered_event(void *data)
2690 {
2691 	this_cpu_inc(trace_buffered_event_cnt);
2692 }
2693 
2694 /**
2695  * trace_buffered_event_disable - disable buffering events
2696  *
2697  * When a filter is removed, it is faster to not use the buffered
2698  * events, and to commit directly into the ring buffer. Free up
2699  * the temp buffers when there are no more users. This requires
2700  * special synchronization with current events.
2701  */
trace_buffered_event_disable(void)2702 void trace_buffered_event_disable(void)
2703 {
2704 	int cpu;
2705 
2706 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2707 
2708 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2709 		return;
2710 
2711 	if (--trace_buffered_event_ref)
2712 		return;
2713 
2714 	/* For each CPU, set the buffer as used. */
2715 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2716 			 NULL, true);
2717 
2718 	/* Wait for all current users to finish */
2719 	synchronize_rcu();
2720 
2721 	for_each_tracing_cpu(cpu) {
2722 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2723 		per_cpu(trace_buffered_event, cpu) = NULL;
2724 	}
2725 
2726 	/*
2727 	 * Wait for all CPUs that potentially started checking if they can use
2728 	 * their event buffer only after the previous synchronize_rcu() call and
2729 	 * they still read a valid pointer from trace_buffered_event. It must be
2730 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2731 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2732 	 */
2733 	synchronize_rcu();
2734 
2735 	/* For each CPU, relinquish the buffer */
2736 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2737 			 true);
2738 }
2739 
2740 static struct trace_buffer *temp_buffer;
2741 
2742 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2743 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2744 			  struct trace_event_file *trace_file,
2745 			  int type, unsigned long len,
2746 			  unsigned int trace_ctx)
2747 {
2748 	struct ring_buffer_event *entry;
2749 	struct trace_array *tr = trace_file->tr;
2750 	int val;
2751 
2752 	*current_rb = tr->array_buffer.buffer;
2753 
2754 	if (!tr->no_filter_buffering_ref &&
2755 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2756 		preempt_disable_notrace();
2757 		/*
2758 		 * Filtering is on, so try to use the per cpu buffer first.
2759 		 * This buffer will simulate a ring_buffer_event,
2760 		 * where the type_len is zero and the array[0] will
2761 		 * hold the full length.
2762 		 * (see include/linux/ring-buffer.h for details on
2763 		 *  how the ring_buffer_event is structured).
2764 		 *
2765 		 * Using a temp buffer during filtering and copying it
2766 		 * on a matched filter is quicker than writing directly
2767 		 * into the ring buffer and then discarding it when
2768 		 * it doesn't match. That is because the discard
2769 		 * requires several atomic operations to get right.
2770 		 * Copying on match and doing nothing on a failed match
2771 		 * is still quicker than no copy on match, but having
2772 		 * to discard out of the ring buffer on a failed match.
2773 		 */
2774 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2775 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2776 
2777 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2778 
2779 			/*
2780 			 * Preemption is disabled, but interrupts and NMIs
2781 			 * can still come in now. If that happens after
2782 			 * the above increment, then it will have to go
2783 			 * back to the old method of allocating the event
2784 			 * on the ring buffer, and if the filter fails, it
2785 			 * will have to call ring_buffer_discard_commit()
2786 			 * to remove it.
2787 			 *
2788 			 * Need to also check the unlikely case that the
2789 			 * length is bigger than the temp buffer size.
2790 			 * If that happens, then the reserve is pretty much
2791 			 * guaranteed to fail, as the ring buffer currently
2792 			 * only allows events less than a page. But that may
2793 			 * change in the future, so let the ring buffer reserve
2794 			 * handle the failure in that case.
2795 			 */
2796 			if (val == 1 && likely(len <= max_len)) {
2797 				trace_event_setup(entry, type, trace_ctx);
2798 				entry->array[0] = len;
2799 				/* Return with preemption disabled */
2800 				return entry;
2801 			}
2802 			this_cpu_dec(trace_buffered_event_cnt);
2803 		}
2804 		/* __trace_buffer_lock_reserve() disables preemption */
2805 		preempt_enable_notrace();
2806 	}
2807 
2808 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2809 					    trace_ctx);
2810 	/*
2811 	 * If tracing is off, but we have triggers enabled
2812 	 * we still need to look at the event data. Use the temp_buffer
2813 	 * to store the trace event for the trigger to use. It's recursive
2814 	 * safe and will not be recorded anywhere.
2815 	 */
2816 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2817 		*current_rb = temp_buffer;
2818 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2819 						    trace_ctx);
2820 	}
2821 	return entry;
2822 }
2823 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2824 
2825 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2826 static DEFINE_MUTEX(tracepoint_printk_mutex);
2827 
output_printk(struct trace_event_buffer * fbuffer)2828 static void output_printk(struct trace_event_buffer *fbuffer)
2829 {
2830 	struct trace_event_call *event_call;
2831 	struct trace_event_file *file;
2832 	struct trace_event *event;
2833 	unsigned long flags;
2834 	struct trace_iterator *iter = tracepoint_print_iter;
2835 
2836 	/* We should never get here if iter is NULL */
2837 	if (WARN_ON_ONCE(!iter))
2838 		return;
2839 
2840 	event_call = fbuffer->trace_file->event_call;
2841 	if (!event_call || !event_call->event.funcs ||
2842 	    !event_call->event.funcs->trace)
2843 		return;
2844 
2845 	file = fbuffer->trace_file;
2846 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2847 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2848 	     !filter_match_preds(file->filter, fbuffer->entry)))
2849 		return;
2850 
2851 	event = &fbuffer->trace_file->event_call->event;
2852 
2853 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2854 	trace_seq_init(&iter->seq);
2855 	iter->ent = fbuffer->entry;
2856 	event_call->event.funcs->trace(iter, 0, event);
2857 	trace_seq_putc(&iter->seq, 0);
2858 	printk("%s", iter->seq.buffer);
2859 
2860 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2861 }
2862 
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2863 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2864 			     void *buffer, size_t *lenp,
2865 			     loff_t *ppos)
2866 {
2867 	int save_tracepoint_printk;
2868 	int ret;
2869 
2870 	guard(mutex)(&tracepoint_printk_mutex);
2871 	save_tracepoint_printk = tracepoint_printk;
2872 
2873 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2874 
2875 	/*
2876 	 * This will force exiting early, as tracepoint_printk
2877 	 * is always zero when tracepoint_printk_iter is not allocated
2878 	 */
2879 	if (!tracepoint_print_iter)
2880 		tracepoint_printk = 0;
2881 
2882 	if (save_tracepoint_printk == tracepoint_printk)
2883 		return ret;
2884 
2885 	if (tracepoint_printk)
2886 		static_key_enable(&tracepoint_printk_key.key);
2887 	else
2888 		static_key_disable(&tracepoint_printk_key.key);
2889 
2890 	return ret;
2891 }
2892 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2893 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2894 {
2895 	enum event_trigger_type tt = ETT_NONE;
2896 	struct trace_event_file *file = fbuffer->trace_file;
2897 
2898 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2899 			fbuffer->entry, &tt))
2900 		goto discard;
2901 
2902 	if (static_key_false(&tracepoint_printk_key.key))
2903 		output_printk(fbuffer);
2904 
2905 	if (static_branch_unlikely(&trace_event_exports_enabled))
2906 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2907 
2908 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2909 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2910 
2911 discard:
2912 	if (tt)
2913 		event_triggers_post_call(file, tt);
2914 
2915 }
2916 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2917 
2918 /*
2919  * Skip 3:
2920  *
2921  *   trace_buffer_unlock_commit_regs()
2922  *   trace_event_buffer_commit()
2923  *   trace_event_raw_event_xxx()
2924  */
2925 # define STACK_SKIP 3
2926 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2927 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2928 				     struct trace_buffer *buffer,
2929 				     struct ring_buffer_event *event,
2930 				     unsigned int trace_ctx,
2931 				     struct pt_regs *regs)
2932 {
2933 	__buffer_unlock_commit(buffer, event);
2934 
2935 	/*
2936 	 * If regs is not set, then skip the necessary functions.
2937 	 * Note, we can still get here via blktrace, wakeup tracer
2938 	 * and mmiotrace, but that's ok if they lose a function or
2939 	 * two. They are not that meaningful.
2940 	 */
2941 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2942 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2943 }
2944 
2945 /*
2946  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2947  */
2948 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2949 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2950 				   struct ring_buffer_event *event)
2951 {
2952 	__buffer_unlock_commit(buffer, event);
2953 }
2954 
2955 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2956 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2957 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2958 {
2959 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2960 	struct ring_buffer_event *event;
2961 	struct ftrace_entry *entry;
2962 	int size = sizeof(*entry);
2963 
2964 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2965 
2966 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2967 					    trace_ctx);
2968 	if (!event)
2969 		return;
2970 	entry	= ring_buffer_event_data(event);
2971 	entry->ip			= ip;
2972 	entry->parent_ip		= parent_ip;
2973 
2974 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2975 	if (fregs) {
2976 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2977 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2978 	}
2979 #endif
2980 
2981 	if (static_branch_unlikely(&trace_function_exports_enabled))
2982 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2983 	__buffer_unlock_commit(buffer, event);
2984 }
2985 
2986 #ifdef CONFIG_STACKTRACE
2987 
2988 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2989 #define FTRACE_KSTACK_NESTING	4
2990 
2991 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2992 
2993 struct ftrace_stack {
2994 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2995 };
2996 
2997 
2998 struct ftrace_stacks {
2999 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3000 };
3001 
3002 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3003 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3004 
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3005 static void __ftrace_trace_stack(struct trace_array *tr,
3006 				 struct trace_buffer *buffer,
3007 				 unsigned int trace_ctx,
3008 				 int skip, struct pt_regs *regs)
3009 {
3010 	struct ring_buffer_event *event;
3011 	unsigned int size, nr_entries;
3012 	struct ftrace_stack *fstack;
3013 	struct stack_entry *entry;
3014 	int stackidx;
3015 
3016 	/*
3017 	 * Add one, for this function and the call to save_stack_trace()
3018 	 * If regs is set, then these functions will not be in the way.
3019 	 */
3020 #ifndef CONFIG_UNWINDER_ORC
3021 	if (!regs)
3022 		skip++;
3023 #endif
3024 
3025 	guard(preempt_notrace)();
3026 
3027 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3028 
3029 	/* This should never happen. If it does, yell once and skip */
3030 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3031 		goto out;
3032 
3033 	/*
3034 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3035 	 * interrupt will either see the value pre increment or post
3036 	 * increment. If the interrupt happens pre increment it will have
3037 	 * restored the counter when it returns.  We just need a barrier to
3038 	 * keep gcc from moving things around.
3039 	 */
3040 	barrier();
3041 
3042 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3043 	size = ARRAY_SIZE(fstack->calls);
3044 
3045 	if (regs) {
3046 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3047 						   size, skip);
3048 	} else {
3049 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3050 	}
3051 
3052 #ifdef CONFIG_DYNAMIC_FTRACE
3053 	/* Mark entry of stack trace as trampoline code */
3054 	if (tr->ops && tr->ops->trampoline) {
3055 		unsigned long tramp_start = tr->ops->trampoline;
3056 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3057 		unsigned long *calls = fstack->calls;
3058 
3059 		for (int i = 0; i < nr_entries; i++) {
3060 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
3061 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
3062 		}
3063 	}
3064 #endif
3065 
3066 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3067 				    struct_size(entry, caller, nr_entries),
3068 				    trace_ctx);
3069 	if (!event)
3070 		goto out;
3071 	entry = ring_buffer_event_data(event);
3072 
3073 	entry->size = nr_entries;
3074 	memcpy(&entry->caller, fstack->calls,
3075 	       flex_array_size(entry, caller, nr_entries));
3076 
3077 	__buffer_unlock_commit(buffer, event);
3078 
3079  out:
3080 	/* Again, don't let gcc optimize things here */
3081 	barrier();
3082 	__this_cpu_dec(ftrace_stack_reserve);
3083 }
3084 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3085 static inline void ftrace_trace_stack(struct trace_array *tr,
3086 				      struct trace_buffer *buffer,
3087 				      unsigned int trace_ctx,
3088 				      int skip, struct pt_regs *regs)
3089 {
3090 	if (!(tr->trace_flags & TRACE_ITER(STACKTRACE)))
3091 		return;
3092 
3093 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3094 }
3095 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3096 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3097 		   int skip)
3098 {
3099 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3100 
3101 	if (rcu_is_watching()) {
3102 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3103 		return;
3104 	}
3105 
3106 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3107 		return;
3108 
3109 	/*
3110 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3111 	 * but if the above rcu_is_watching() failed, then the NMI
3112 	 * triggered someplace critical, and ct_irq_enter() should
3113 	 * not be called from NMI.
3114 	 */
3115 	if (unlikely(in_nmi()))
3116 		return;
3117 
3118 	ct_irq_enter_irqson();
3119 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3120 	ct_irq_exit_irqson();
3121 }
3122 
3123 /**
3124  * trace_dump_stack - record a stack back trace in the trace buffer
3125  * @skip: Number of functions to skip (helper handlers)
3126  */
trace_dump_stack(int skip)3127 void trace_dump_stack(int skip)
3128 {
3129 	if (tracing_disabled || tracing_selftest_running)
3130 		return;
3131 
3132 #ifndef CONFIG_UNWINDER_ORC
3133 	/* Skip 1 to skip this function. */
3134 	skip++;
3135 #endif
3136 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3137 				tracing_gen_ctx(), skip, NULL);
3138 }
3139 EXPORT_SYMBOL_GPL(trace_dump_stack);
3140 
3141 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3142 static DEFINE_PER_CPU(int, user_stack_count);
3143 
3144 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3145 ftrace_trace_userstack(struct trace_array *tr,
3146 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3147 {
3148 	struct ring_buffer_event *event;
3149 	struct userstack_entry *entry;
3150 
3151 	if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
3152 		return;
3153 
3154 	/*
3155 	 * NMIs can not handle page faults, even with fix ups.
3156 	 * The save user stack can (and often does) fault.
3157 	 */
3158 	if (unlikely(in_nmi()))
3159 		return;
3160 
3161 	/*
3162 	 * prevent recursion, since the user stack tracing may
3163 	 * trigger other kernel events.
3164 	 */
3165 	guard(preempt)();
3166 	if (__this_cpu_read(user_stack_count))
3167 		return;
3168 
3169 	__this_cpu_inc(user_stack_count);
3170 
3171 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3172 					    sizeof(*entry), trace_ctx);
3173 	if (!event)
3174 		goto out_drop_count;
3175 	entry	= ring_buffer_event_data(event);
3176 
3177 	entry->tgid		= current->tgid;
3178 	memset(&entry->caller, 0, sizeof(entry->caller));
3179 
3180 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3181 	__buffer_unlock_commit(buffer, event);
3182 
3183  out_drop_count:
3184 	__this_cpu_dec(user_stack_count);
3185 }
3186 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3187 static void ftrace_trace_userstack(struct trace_array *tr,
3188 				   struct trace_buffer *buffer,
3189 				   unsigned int trace_ctx)
3190 {
3191 }
3192 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3193 
3194 #endif /* CONFIG_STACKTRACE */
3195 
3196 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3197 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3198 			  unsigned long long delta)
3199 {
3200 	entry->bottom_delta_ts = delta & U32_MAX;
3201 	entry->top_delta_ts = (delta >> 32);
3202 }
3203 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3204 void trace_last_func_repeats(struct trace_array *tr,
3205 			     struct trace_func_repeats *last_info,
3206 			     unsigned int trace_ctx)
3207 {
3208 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3209 	struct func_repeats_entry *entry;
3210 	struct ring_buffer_event *event;
3211 	u64 delta;
3212 
3213 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3214 					    sizeof(*entry), trace_ctx);
3215 	if (!event)
3216 		return;
3217 
3218 	delta = ring_buffer_event_time_stamp(buffer, event) -
3219 		last_info->ts_last_call;
3220 
3221 	entry = ring_buffer_event_data(event);
3222 	entry->ip = last_info->ip;
3223 	entry->parent_ip = last_info->parent_ip;
3224 	entry->count = last_info->count;
3225 	func_repeats_set_delta_ts(entry, delta);
3226 
3227 	__buffer_unlock_commit(buffer, event);
3228 }
3229 
3230 /* created for use with alloc_percpu */
3231 struct trace_buffer_struct {
3232 	int nesting;
3233 	char buffer[4][TRACE_BUF_SIZE];
3234 };
3235 
3236 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3237 
3238 /*
3239  * This allows for lockless recording.  If we're nested too deeply, then
3240  * this returns NULL.
3241  */
get_trace_buf(void)3242 static char *get_trace_buf(void)
3243 {
3244 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3245 
3246 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3247 		return NULL;
3248 
3249 	buffer->nesting++;
3250 
3251 	/* Interrupts must see nesting incremented before we use the buffer */
3252 	barrier();
3253 	return &buffer->buffer[buffer->nesting - 1][0];
3254 }
3255 
put_trace_buf(void)3256 static void put_trace_buf(void)
3257 {
3258 	/* Don't let the decrement of nesting leak before this */
3259 	barrier();
3260 	this_cpu_dec(trace_percpu_buffer->nesting);
3261 }
3262 
alloc_percpu_trace_buffer(void)3263 static int alloc_percpu_trace_buffer(void)
3264 {
3265 	struct trace_buffer_struct __percpu *buffers;
3266 
3267 	if (trace_percpu_buffer)
3268 		return 0;
3269 
3270 	buffers = alloc_percpu(struct trace_buffer_struct);
3271 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3272 		return -ENOMEM;
3273 
3274 	trace_percpu_buffer = buffers;
3275 	return 0;
3276 }
3277 
3278 static int buffers_allocated;
3279 
trace_printk_init_buffers(void)3280 void trace_printk_init_buffers(void)
3281 {
3282 	if (buffers_allocated)
3283 		return;
3284 
3285 	if (alloc_percpu_trace_buffer())
3286 		return;
3287 
3288 	/* trace_printk() is for debug use only. Don't use it in production. */
3289 
3290 	pr_warn("\n");
3291 	pr_warn("**********************************************************\n");
3292 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3293 	pr_warn("**                                                      **\n");
3294 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3295 	pr_warn("**                                                      **\n");
3296 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3297 	pr_warn("** unsafe for production use.                           **\n");
3298 	pr_warn("**                                                      **\n");
3299 	pr_warn("** If you see this message and you are not debugging    **\n");
3300 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3301 	pr_warn("**                                                      **\n");
3302 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3303 	pr_warn("**********************************************************\n");
3304 
3305 	/* Expand the buffers to set size */
3306 	tracing_update_buffers(&global_trace);
3307 
3308 	buffers_allocated = 1;
3309 
3310 	/*
3311 	 * trace_printk_init_buffers() can be called by modules.
3312 	 * If that happens, then we need to start cmdline recording
3313 	 * directly here. If the global_trace.buffer is already
3314 	 * allocated here, then this was called by module code.
3315 	 */
3316 	if (global_trace.array_buffer.buffer)
3317 		tracing_start_cmdline_record();
3318 }
3319 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3320 
trace_printk_start_comm(void)3321 void trace_printk_start_comm(void)
3322 {
3323 	/* Start tracing comms if trace printk is set */
3324 	if (!buffers_allocated)
3325 		return;
3326 	tracing_start_cmdline_record();
3327 }
3328 
trace_printk_start_stop_comm(int enabled)3329 static void trace_printk_start_stop_comm(int enabled)
3330 {
3331 	if (!buffers_allocated)
3332 		return;
3333 
3334 	if (enabled)
3335 		tracing_start_cmdline_record();
3336 	else
3337 		tracing_stop_cmdline_record();
3338 }
3339 
3340 /**
3341  * trace_vbprintk - write binary msg to tracing buffer
3342  * @ip:    The address of the caller
3343  * @fmt:   The string format to write to the buffer
3344  * @args:  Arguments for @fmt
3345  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3346 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3347 {
3348 	struct ring_buffer_event *event;
3349 	struct trace_buffer *buffer;
3350 	struct trace_array *tr = READ_ONCE(printk_trace);
3351 	struct bprint_entry *entry;
3352 	unsigned int trace_ctx;
3353 	char *tbuffer;
3354 	int len = 0, size;
3355 
3356 	if (!printk_binsafe(tr))
3357 		return trace_vprintk(ip, fmt, args);
3358 
3359 	if (unlikely(tracing_selftest_running || tracing_disabled))
3360 		return 0;
3361 
3362 	/* Don't pollute graph traces with trace_vprintk internals */
3363 	pause_graph_tracing();
3364 
3365 	trace_ctx = tracing_gen_ctx();
3366 	guard(preempt_notrace)();
3367 
3368 	tbuffer = get_trace_buf();
3369 	if (!tbuffer) {
3370 		len = 0;
3371 		goto out_nobuffer;
3372 	}
3373 
3374 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3375 
3376 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3377 		goto out_put;
3378 
3379 	size = sizeof(*entry) + sizeof(u32) * len;
3380 	buffer = tr->array_buffer.buffer;
3381 	scoped_guard(ring_buffer_nest, buffer) {
3382 		event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3383 						    trace_ctx);
3384 		if (!event)
3385 			goto out_put;
3386 		entry = ring_buffer_event_data(event);
3387 		entry->ip			= ip;
3388 		entry->fmt			= fmt;
3389 
3390 		memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3391 		__buffer_unlock_commit(buffer, event);
3392 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3393 	}
3394 out_put:
3395 	put_trace_buf();
3396 
3397 out_nobuffer:
3398 	unpause_graph_tracing();
3399 
3400 	return len;
3401 }
3402 EXPORT_SYMBOL_GPL(trace_vbprintk);
3403 
3404 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3405 int __trace_array_vprintk(struct trace_buffer *buffer,
3406 			  unsigned long ip, const char *fmt, va_list args)
3407 {
3408 	struct ring_buffer_event *event;
3409 	int len = 0, size;
3410 	struct print_entry *entry;
3411 	unsigned int trace_ctx;
3412 	char *tbuffer;
3413 
3414 	if (tracing_disabled)
3415 		return 0;
3416 
3417 	/* Don't pollute graph traces with trace_vprintk internals */
3418 	pause_graph_tracing();
3419 
3420 	trace_ctx = tracing_gen_ctx();
3421 	guard(preempt_notrace)();
3422 
3423 
3424 	tbuffer = get_trace_buf();
3425 	if (!tbuffer) {
3426 		len = 0;
3427 		goto out_nobuffer;
3428 	}
3429 
3430 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3431 
3432 	size = sizeof(*entry) + len + 1;
3433 	scoped_guard(ring_buffer_nest, buffer) {
3434 		event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3435 						    trace_ctx);
3436 		if (!event)
3437 			goto out;
3438 		entry = ring_buffer_event_data(event);
3439 		entry->ip = ip;
3440 
3441 		memcpy(&entry->buf, tbuffer, len + 1);
3442 		__buffer_unlock_commit(buffer, event);
3443 		ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3444 	}
3445 out:
3446 	put_trace_buf();
3447 
3448 out_nobuffer:
3449 	unpause_graph_tracing();
3450 
3451 	return len;
3452 }
3453 
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3454 int trace_array_vprintk(struct trace_array *tr,
3455 			unsigned long ip, const char *fmt, va_list args)
3456 {
3457 	if (tracing_selftest_running && tr == &global_trace)
3458 		return 0;
3459 
3460 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3461 }
3462 
3463 /**
3464  * trace_array_printk - Print a message to a specific instance
3465  * @tr: The instance trace_array descriptor
3466  * @ip: The instruction pointer that this is called from.
3467  * @fmt: The format to print (printf format)
3468  *
3469  * If a subsystem sets up its own instance, they have the right to
3470  * printk strings into their tracing instance buffer using this
3471  * function. Note, this function will not write into the top level
3472  * buffer (use trace_printk() for that), as writing into the top level
3473  * buffer should only have events that can be individually disabled.
3474  * trace_printk() is only used for debugging a kernel, and should not
3475  * be ever incorporated in normal use.
3476  *
3477  * trace_array_printk() can be used, as it will not add noise to the
3478  * top level tracing buffer.
3479  *
3480  * Note, trace_array_init_printk() must be called on @tr before this
3481  * can be used.
3482  */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3483 int trace_array_printk(struct trace_array *tr,
3484 		       unsigned long ip, const char *fmt, ...)
3485 {
3486 	int ret;
3487 	va_list ap;
3488 
3489 	if (!tr)
3490 		return -ENOENT;
3491 
3492 	/* This is only allowed for created instances */
3493 	if (tr == &global_trace)
3494 		return 0;
3495 
3496 	if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
3497 		return 0;
3498 
3499 	va_start(ap, fmt);
3500 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3501 	va_end(ap);
3502 	return ret;
3503 }
3504 EXPORT_SYMBOL_GPL(trace_array_printk);
3505 
3506 /**
3507  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3508  * @tr: The trace array to initialize the buffers for
3509  *
3510  * As trace_array_printk() only writes into instances, they are OK to
3511  * have in the kernel (unlike trace_printk()). This needs to be called
3512  * before trace_array_printk() can be used on a trace_array.
3513  */
trace_array_init_printk(struct trace_array * tr)3514 int trace_array_init_printk(struct trace_array *tr)
3515 {
3516 	if (!tr)
3517 		return -ENOENT;
3518 
3519 	/* This is only allowed for created instances */
3520 	if (tr == &global_trace)
3521 		return -EINVAL;
3522 
3523 	return alloc_percpu_trace_buffer();
3524 }
3525 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3526 
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3527 int trace_array_printk_buf(struct trace_buffer *buffer,
3528 			   unsigned long ip, const char *fmt, ...)
3529 {
3530 	int ret;
3531 	va_list ap;
3532 
3533 	if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK)))
3534 		return 0;
3535 
3536 	va_start(ap, fmt);
3537 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3538 	va_end(ap);
3539 	return ret;
3540 }
3541 
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3542 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3543 {
3544 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3545 }
3546 EXPORT_SYMBOL_GPL(trace_vprintk);
3547 
trace_iterator_increment(struct trace_iterator * iter)3548 static void trace_iterator_increment(struct trace_iterator *iter)
3549 {
3550 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3551 
3552 	iter->idx++;
3553 	if (buf_iter)
3554 		ring_buffer_iter_advance(buf_iter);
3555 }
3556 
3557 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3558 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3559 		unsigned long *lost_events)
3560 {
3561 	struct ring_buffer_event *event;
3562 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3563 
3564 	if (buf_iter) {
3565 		event = ring_buffer_iter_peek(buf_iter, ts);
3566 		if (lost_events)
3567 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3568 				(unsigned long)-1 : 0;
3569 	} else {
3570 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3571 					 lost_events);
3572 	}
3573 
3574 	if (event) {
3575 		iter->ent_size = ring_buffer_event_length(event);
3576 		return ring_buffer_event_data(event);
3577 	}
3578 	iter->ent_size = 0;
3579 	return NULL;
3580 }
3581 
3582 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3583 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3584 		  unsigned long *missing_events, u64 *ent_ts)
3585 {
3586 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3587 	struct trace_entry *ent, *next = NULL;
3588 	unsigned long lost_events = 0, next_lost = 0;
3589 	int cpu_file = iter->cpu_file;
3590 	u64 next_ts = 0, ts;
3591 	int next_cpu = -1;
3592 	int next_size = 0;
3593 	int cpu;
3594 
3595 	/*
3596 	 * If we are in a per_cpu trace file, don't bother by iterating over
3597 	 * all cpu and peek directly.
3598 	 */
3599 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3600 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3601 			return NULL;
3602 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3603 		if (ent_cpu)
3604 			*ent_cpu = cpu_file;
3605 
3606 		return ent;
3607 	}
3608 
3609 	for_each_tracing_cpu(cpu) {
3610 
3611 		if (ring_buffer_empty_cpu(buffer, cpu))
3612 			continue;
3613 
3614 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3615 
3616 		/*
3617 		 * Pick the entry with the smallest timestamp:
3618 		 */
3619 		if (ent && (!next || ts < next_ts)) {
3620 			next = ent;
3621 			next_cpu = cpu;
3622 			next_ts = ts;
3623 			next_lost = lost_events;
3624 			next_size = iter->ent_size;
3625 		}
3626 	}
3627 
3628 	iter->ent_size = next_size;
3629 
3630 	if (ent_cpu)
3631 		*ent_cpu = next_cpu;
3632 
3633 	if (ent_ts)
3634 		*ent_ts = next_ts;
3635 
3636 	if (missing_events)
3637 		*missing_events = next_lost;
3638 
3639 	return next;
3640 }
3641 
3642 #define STATIC_FMT_BUF_SIZE	128
3643 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3644 
trace_iter_expand_format(struct trace_iterator * iter)3645 char *trace_iter_expand_format(struct trace_iterator *iter)
3646 {
3647 	char *tmp;
3648 
3649 	/*
3650 	 * iter->tr is NULL when used with tp_printk, which makes
3651 	 * this get called where it is not safe to call krealloc().
3652 	 */
3653 	if (!iter->tr || iter->fmt == static_fmt_buf)
3654 		return NULL;
3655 
3656 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3657 		       GFP_KERNEL);
3658 	if (tmp) {
3659 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3660 		iter->fmt = tmp;
3661 	}
3662 
3663 	return tmp;
3664 }
3665 
3666 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3667 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3668 {
3669 	unsigned long addr = (unsigned long)str;
3670 	struct trace_event *trace_event;
3671 	struct trace_event_call *event;
3672 
3673 	/* OK if part of the event data */
3674 	if ((addr >= (unsigned long)iter->ent) &&
3675 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3676 		return true;
3677 
3678 	/* OK if part of the temp seq buffer */
3679 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3680 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3681 		return true;
3682 
3683 	/* Core rodata can not be freed */
3684 	if (is_kernel_rodata(addr))
3685 		return true;
3686 
3687 	if (trace_is_tracepoint_string(str))
3688 		return true;
3689 
3690 	/*
3691 	 * Now this could be a module event, referencing core module
3692 	 * data, which is OK.
3693 	 */
3694 	if (!iter->ent)
3695 		return false;
3696 
3697 	trace_event = ftrace_find_event(iter->ent->type);
3698 	if (!trace_event)
3699 		return false;
3700 
3701 	event = container_of(trace_event, struct trace_event_call, event);
3702 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3703 		return false;
3704 
3705 	/* Would rather have rodata, but this will suffice */
3706 	if (within_module_core(addr, event->module))
3707 		return true;
3708 
3709 	return false;
3710 }
3711 
3712 /**
3713  * ignore_event - Check dereferenced fields while writing to the seq buffer
3714  * @iter: The iterator that holds the seq buffer and the event being printed
3715  *
3716  * At boot up, test_event_printk() will flag any event that dereferences
3717  * a string with "%s" that does exist in the ring buffer. It may still
3718  * be valid, as the string may point to a static string in the kernel
3719  * rodata that never gets freed. But if the string pointer is pointing
3720  * to something that was allocated, there's a chance that it can be freed
3721  * by the time the user reads the trace. This would cause a bad memory
3722  * access by the kernel and possibly crash the system.
3723  *
3724  * This function will check if the event has any fields flagged as needing
3725  * to be checked at runtime and perform those checks.
3726  *
3727  * If it is found that a field is unsafe, it will write into the @iter->seq
3728  * a message stating what was found to be unsafe.
3729  *
3730  * @return: true if the event is unsafe and should be ignored,
3731  *          false otherwise.
3732  */
ignore_event(struct trace_iterator * iter)3733 bool ignore_event(struct trace_iterator *iter)
3734 {
3735 	struct ftrace_event_field *field;
3736 	struct trace_event *trace_event;
3737 	struct trace_event_call *event;
3738 	struct list_head *head;
3739 	struct trace_seq *seq;
3740 	const void *ptr;
3741 
3742 	trace_event = ftrace_find_event(iter->ent->type);
3743 
3744 	seq = &iter->seq;
3745 
3746 	if (!trace_event) {
3747 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3748 		return true;
3749 	}
3750 
3751 	event = container_of(trace_event, struct trace_event_call, event);
3752 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3753 		return false;
3754 
3755 	head = trace_get_fields(event);
3756 	if (!head) {
3757 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3758 				 trace_event_name(event));
3759 		return true;
3760 	}
3761 
3762 	/* Offsets are from the iter->ent that points to the raw event */
3763 	ptr = iter->ent;
3764 
3765 	list_for_each_entry(field, head, link) {
3766 		const char *str;
3767 		bool good;
3768 
3769 		if (!field->needs_test)
3770 			continue;
3771 
3772 		str = *(const char **)(ptr + field->offset);
3773 
3774 		good = trace_safe_str(iter, str);
3775 
3776 		/*
3777 		 * If you hit this warning, it is likely that the
3778 		 * trace event in question used %s on a string that
3779 		 * was saved at the time of the event, but may not be
3780 		 * around when the trace is read. Use __string(),
3781 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3782 		 * instead. See samples/trace_events/trace-events-sample.h
3783 		 * for reference.
3784 		 */
3785 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3786 			      trace_event_name(event), field->name)) {
3787 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3788 					 trace_event_name(event), field->name);
3789 			return true;
3790 		}
3791 	}
3792 	return false;
3793 }
3794 
trace_event_format(struct trace_iterator * iter,const char * fmt)3795 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3796 {
3797 	const char *p, *new_fmt;
3798 	char *q;
3799 
3800 	if (WARN_ON_ONCE(!fmt))
3801 		return fmt;
3802 
3803 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
3804 		return fmt;
3805 
3806 	p = fmt;
3807 	new_fmt = q = iter->fmt;
3808 	while (*p) {
3809 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3810 			if (!trace_iter_expand_format(iter))
3811 				return fmt;
3812 
3813 			q += iter->fmt - new_fmt;
3814 			new_fmt = iter->fmt;
3815 		}
3816 
3817 		*q++ = *p++;
3818 
3819 		/* Replace %p with %px */
3820 		if (p[-1] == '%') {
3821 			if (p[0] == '%') {
3822 				*q++ = *p++;
3823 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3824 				*q++ = *p++;
3825 				*q++ = 'x';
3826 			}
3827 		}
3828 	}
3829 	*q = '\0';
3830 
3831 	return new_fmt;
3832 }
3833 
3834 #define STATIC_TEMP_BUF_SIZE	128
3835 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3836 
3837 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3838 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3839 					  int *ent_cpu, u64 *ent_ts)
3840 {
3841 	/* __find_next_entry will reset ent_size */
3842 	int ent_size = iter->ent_size;
3843 	struct trace_entry *entry;
3844 
3845 	/*
3846 	 * If called from ftrace_dump(), then the iter->temp buffer
3847 	 * will be the static_temp_buf and not created from kmalloc.
3848 	 * If the entry size is greater than the buffer, we can
3849 	 * not save it. Just return NULL in that case. This is only
3850 	 * used to add markers when two consecutive events' time
3851 	 * stamps have a large delta. See trace_print_lat_context()
3852 	 */
3853 	if (iter->temp == static_temp_buf &&
3854 	    STATIC_TEMP_BUF_SIZE < ent_size)
3855 		return NULL;
3856 
3857 	/*
3858 	 * The __find_next_entry() may call peek_next_entry(), which may
3859 	 * call ring_buffer_peek() that may make the contents of iter->ent
3860 	 * undefined. Need to copy iter->ent now.
3861 	 */
3862 	if (iter->ent && iter->ent != iter->temp) {
3863 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3864 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3865 			void *temp;
3866 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3867 			if (!temp)
3868 				return NULL;
3869 			kfree(iter->temp);
3870 			iter->temp = temp;
3871 			iter->temp_size = iter->ent_size;
3872 		}
3873 		memcpy(iter->temp, iter->ent, iter->ent_size);
3874 		iter->ent = iter->temp;
3875 	}
3876 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3877 	/* Put back the original ent_size */
3878 	iter->ent_size = ent_size;
3879 
3880 	return entry;
3881 }
3882 
3883 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3884 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3885 {
3886 	iter->ent = __find_next_entry(iter, &iter->cpu,
3887 				      &iter->lost_events, &iter->ts);
3888 
3889 	if (iter->ent)
3890 		trace_iterator_increment(iter);
3891 
3892 	return iter->ent ? iter : NULL;
3893 }
3894 
trace_consume(struct trace_iterator * iter)3895 static void trace_consume(struct trace_iterator *iter)
3896 {
3897 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3898 			    &iter->lost_events);
3899 }
3900 
s_next(struct seq_file * m,void * v,loff_t * pos)3901 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3902 {
3903 	struct trace_iterator *iter = m->private;
3904 	int i = (int)*pos;
3905 	void *ent;
3906 
3907 	WARN_ON_ONCE(iter->leftover);
3908 
3909 	(*pos)++;
3910 
3911 	/* can't go backwards */
3912 	if (iter->idx > i)
3913 		return NULL;
3914 
3915 	if (iter->idx < 0)
3916 		ent = trace_find_next_entry_inc(iter);
3917 	else
3918 		ent = iter;
3919 
3920 	while (ent && iter->idx < i)
3921 		ent = trace_find_next_entry_inc(iter);
3922 
3923 	iter->pos = *pos;
3924 
3925 	return ent;
3926 }
3927 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3928 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3929 {
3930 	struct ring_buffer_iter *buf_iter;
3931 	unsigned long entries = 0;
3932 	u64 ts;
3933 
3934 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3935 
3936 	buf_iter = trace_buffer_iter(iter, cpu);
3937 	if (!buf_iter)
3938 		return;
3939 
3940 	ring_buffer_iter_reset(buf_iter);
3941 
3942 	/*
3943 	 * We could have the case with the max latency tracers
3944 	 * that a reset never took place on a cpu. This is evident
3945 	 * by the timestamp being before the start of the buffer.
3946 	 */
3947 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3948 		if (ts >= iter->array_buffer->time_start)
3949 			break;
3950 		entries++;
3951 		ring_buffer_iter_advance(buf_iter);
3952 		/* This could be a big loop */
3953 		cond_resched();
3954 	}
3955 
3956 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3957 }
3958 
3959 /*
3960  * The current tracer is copied to avoid a global locking
3961  * all around.
3962  */
s_start(struct seq_file * m,loff_t * pos)3963 static void *s_start(struct seq_file *m, loff_t *pos)
3964 {
3965 	struct trace_iterator *iter = m->private;
3966 	struct trace_array *tr = iter->tr;
3967 	int cpu_file = iter->cpu_file;
3968 	void *p = NULL;
3969 	loff_t l = 0;
3970 	int cpu;
3971 
3972 	mutex_lock(&trace_types_lock);
3973 	if (unlikely(tr->current_trace != iter->trace)) {
3974 		/* Close iter->trace before switching to the new current tracer */
3975 		if (iter->trace->close)
3976 			iter->trace->close(iter);
3977 		iter->trace = tr->current_trace;
3978 		/* Reopen the new current tracer */
3979 		if (iter->trace->open)
3980 			iter->trace->open(iter);
3981 	}
3982 	mutex_unlock(&trace_types_lock);
3983 
3984 #ifdef CONFIG_TRACER_MAX_TRACE
3985 	if (iter->snapshot && iter->trace->use_max_tr)
3986 		return ERR_PTR(-EBUSY);
3987 #endif
3988 
3989 	if (*pos != iter->pos) {
3990 		iter->ent = NULL;
3991 		iter->cpu = 0;
3992 		iter->idx = -1;
3993 
3994 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3995 			for_each_tracing_cpu(cpu)
3996 				tracing_iter_reset(iter, cpu);
3997 		} else
3998 			tracing_iter_reset(iter, cpu_file);
3999 
4000 		iter->leftover = 0;
4001 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4002 			;
4003 
4004 	} else {
4005 		/*
4006 		 * If we overflowed the seq_file before, then we want
4007 		 * to just reuse the trace_seq buffer again.
4008 		 */
4009 		if (iter->leftover)
4010 			p = iter;
4011 		else {
4012 			l = *pos - 1;
4013 			p = s_next(m, p, &l);
4014 		}
4015 	}
4016 
4017 	trace_event_read_lock();
4018 	trace_access_lock(cpu_file);
4019 	return p;
4020 }
4021 
s_stop(struct seq_file * m,void * p)4022 static void s_stop(struct seq_file *m, void *p)
4023 {
4024 	struct trace_iterator *iter = m->private;
4025 
4026 #ifdef CONFIG_TRACER_MAX_TRACE
4027 	if (iter->snapshot && iter->trace->use_max_tr)
4028 		return;
4029 #endif
4030 
4031 	trace_access_unlock(iter->cpu_file);
4032 	trace_event_read_unlock();
4033 }
4034 
4035 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4036 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4037 		      unsigned long *entries, int cpu)
4038 {
4039 	unsigned long count;
4040 
4041 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4042 	/*
4043 	 * If this buffer has skipped entries, then we hold all
4044 	 * entries for the trace and we need to ignore the
4045 	 * ones before the time stamp.
4046 	 */
4047 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4048 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4049 		/* total is the same as the entries */
4050 		*total = count;
4051 	} else
4052 		*total = count +
4053 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4054 	*entries = count;
4055 }
4056 
4057 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4058 get_total_entries(struct array_buffer *buf,
4059 		  unsigned long *total, unsigned long *entries)
4060 {
4061 	unsigned long t, e;
4062 	int cpu;
4063 
4064 	*total = 0;
4065 	*entries = 0;
4066 
4067 	for_each_tracing_cpu(cpu) {
4068 		get_total_entries_cpu(buf, &t, &e, cpu);
4069 		*total += t;
4070 		*entries += e;
4071 	}
4072 }
4073 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4074 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4075 {
4076 	unsigned long total, entries;
4077 
4078 	if (!tr)
4079 		tr = &global_trace;
4080 
4081 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4082 
4083 	return entries;
4084 }
4085 
trace_total_entries(struct trace_array * tr)4086 unsigned long trace_total_entries(struct trace_array *tr)
4087 {
4088 	unsigned long total, entries;
4089 
4090 	if (!tr)
4091 		tr = &global_trace;
4092 
4093 	get_total_entries(&tr->array_buffer, &total, &entries);
4094 
4095 	return entries;
4096 }
4097 
print_lat_help_header(struct seq_file * m)4098 static void print_lat_help_header(struct seq_file *m)
4099 {
4100 	seq_puts(m, "#                    _------=> CPU#            \n"
4101 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4102 		    "#                  | / _----=> need-resched    \n"
4103 		    "#                  || / _---=> hardirq/softirq \n"
4104 		    "#                  ||| / _--=> preempt-depth   \n"
4105 		    "#                  |||| / _-=> migrate-disable \n"
4106 		    "#                  ||||| /     delay           \n"
4107 		    "#  cmd     pid     |||||| time  |   caller     \n"
4108 		    "#     \\   /        ||||||  \\    |    /       \n");
4109 }
4110 
print_event_info(struct array_buffer * buf,struct seq_file * m)4111 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4112 {
4113 	unsigned long total;
4114 	unsigned long entries;
4115 
4116 	get_total_entries(buf, &total, &entries);
4117 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4118 		   entries, total, num_online_cpus());
4119 	seq_puts(m, "#\n");
4120 }
4121 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4122 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4123 				   unsigned int flags)
4124 {
4125 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
4126 
4127 	print_event_info(buf, m);
4128 
4129 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4130 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4131 }
4132 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4133 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4134 				       unsigned int flags)
4135 {
4136 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
4137 	static const char space[] = "            ";
4138 	int prec = tgid ? 12 : 2;
4139 
4140 	print_event_info(buf, m);
4141 
4142 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4143 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4144 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4145 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4146 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4147 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4148 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4149 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4150 }
4151 
4152 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4153 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4154 {
4155 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4156 	struct array_buffer *buf = iter->array_buffer;
4157 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4158 	struct tracer *type = iter->trace;
4159 	unsigned long entries;
4160 	unsigned long total;
4161 	const char *name = type->name;
4162 
4163 	get_total_entries(buf, &total, &entries);
4164 
4165 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4166 		   name, init_utsname()->release);
4167 	seq_puts(m, "# -----------------------------------"
4168 		 "---------------------------------\n");
4169 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4170 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4171 		   nsecs_to_usecs(data->saved_latency),
4172 		   entries,
4173 		   total,
4174 		   buf->cpu,
4175 		   preempt_model_str(),
4176 		   /* These are reserved for later use */
4177 		   0, 0, 0, 0);
4178 #ifdef CONFIG_SMP
4179 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4180 #else
4181 	seq_puts(m, ")\n");
4182 #endif
4183 	seq_puts(m, "#    -----------------\n");
4184 	seq_printf(m, "#    | task: %.16s-%d "
4185 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4186 		   data->comm, data->pid,
4187 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4188 		   data->policy, data->rt_priority);
4189 	seq_puts(m, "#    -----------------\n");
4190 
4191 	if (data->critical_start) {
4192 		seq_puts(m, "#  => started at: ");
4193 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4194 		trace_print_seq(m, &iter->seq);
4195 		seq_puts(m, "\n#  => ended at:   ");
4196 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4197 		trace_print_seq(m, &iter->seq);
4198 		seq_puts(m, "\n#\n");
4199 	}
4200 
4201 	seq_puts(m, "#\n");
4202 }
4203 
test_cpu_buff_start(struct trace_iterator * iter)4204 static void test_cpu_buff_start(struct trace_iterator *iter)
4205 {
4206 	struct trace_seq *s = &iter->seq;
4207 	struct trace_array *tr = iter->tr;
4208 
4209 	if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
4210 		return;
4211 
4212 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4213 		return;
4214 
4215 	if (cpumask_available(iter->started) &&
4216 	    cpumask_test_cpu(iter->cpu, iter->started))
4217 		return;
4218 
4219 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4220 		return;
4221 
4222 	if (cpumask_available(iter->started))
4223 		cpumask_set_cpu(iter->cpu, iter->started);
4224 
4225 	/* Don't print started cpu buffer for the first entry of the trace */
4226 	if (iter->idx > 1)
4227 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4228 				iter->cpu);
4229 }
4230 
4231 #ifdef CONFIG_FTRACE_SYSCALLS
is_syscall_event(struct trace_event * event)4232 static bool is_syscall_event(struct trace_event *event)
4233 {
4234 	return (event->funcs == &enter_syscall_print_funcs) ||
4235 	       (event->funcs == &exit_syscall_print_funcs);
4236 
4237 }
4238 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
4239 #else
is_syscall_event(struct trace_event * event)4240 static inline bool is_syscall_event(struct trace_event *event)
4241 {
4242 	return false;
4243 }
4244 #define syscall_buf_size 0
4245 #endif /* CONFIG_FTRACE_SYSCALLS */
4246 
print_trace_fmt(struct trace_iterator * iter)4247 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4248 {
4249 	struct trace_array *tr = iter->tr;
4250 	struct trace_seq *s = &iter->seq;
4251 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4252 	struct trace_entry *entry;
4253 	struct trace_event *event;
4254 
4255 	entry = iter->ent;
4256 
4257 	test_cpu_buff_start(iter);
4258 
4259 	event = ftrace_find_event(entry->type);
4260 
4261 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4262 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4263 			trace_print_lat_context(iter);
4264 		else
4265 			trace_print_context(iter);
4266 	}
4267 
4268 	if (trace_seq_has_overflowed(s))
4269 		return TRACE_TYPE_PARTIAL_LINE;
4270 
4271 	if (event) {
4272 		if (tr->trace_flags & TRACE_ITER(FIELDS))
4273 			return print_event_fields(iter, event);
4274 		/*
4275 		 * For TRACE_EVENT() events, the print_fmt is not
4276 		 * safe to use if the array has delta offsets
4277 		 * Force printing via the fields.
4278 		 */
4279 		if ((tr->text_delta)) {
4280 			/* ftrace and system call events are still OK */
4281 			if ((event->type > __TRACE_LAST_TYPE) &&
4282 			    !is_syscall_event(event))
4283 			return print_event_fields(iter, event);
4284 		}
4285 		return event->funcs->trace(iter, sym_flags, event);
4286 	}
4287 
4288 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4289 
4290 	return trace_handle_return(s);
4291 }
4292 
print_raw_fmt(struct trace_iterator * iter)4293 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4294 {
4295 	struct trace_array *tr = iter->tr;
4296 	struct trace_seq *s = &iter->seq;
4297 	struct trace_entry *entry;
4298 	struct trace_event *event;
4299 
4300 	entry = iter->ent;
4301 
4302 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
4303 		trace_seq_printf(s, "%d %d %llu ",
4304 				 entry->pid, iter->cpu, iter->ts);
4305 
4306 	if (trace_seq_has_overflowed(s))
4307 		return TRACE_TYPE_PARTIAL_LINE;
4308 
4309 	event = ftrace_find_event(entry->type);
4310 	if (event)
4311 		return event->funcs->raw(iter, 0, event);
4312 
4313 	trace_seq_printf(s, "%d ?\n", entry->type);
4314 
4315 	return trace_handle_return(s);
4316 }
4317 
print_hex_fmt(struct trace_iterator * iter)4318 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4319 {
4320 	struct trace_array *tr = iter->tr;
4321 	struct trace_seq *s = &iter->seq;
4322 	unsigned char newline = '\n';
4323 	struct trace_entry *entry;
4324 	struct trace_event *event;
4325 
4326 	entry = iter->ent;
4327 
4328 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4329 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4330 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4331 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4332 		if (trace_seq_has_overflowed(s))
4333 			return TRACE_TYPE_PARTIAL_LINE;
4334 	}
4335 
4336 	event = ftrace_find_event(entry->type);
4337 	if (event) {
4338 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4339 		if (ret != TRACE_TYPE_HANDLED)
4340 			return ret;
4341 	}
4342 
4343 	SEQ_PUT_FIELD(s, newline);
4344 
4345 	return trace_handle_return(s);
4346 }
4347 
print_bin_fmt(struct trace_iterator * iter)4348 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4349 {
4350 	struct trace_array *tr = iter->tr;
4351 	struct trace_seq *s = &iter->seq;
4352 	struct trace_entry *entry;
4353 	struct trace_event *event;
4354 
4355 	entry = iter->ent;
4356 
4357 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4358 		SEQ_PUT_FIELD(s, entry->pid);
4359 		SEQ_PUT_FIELD(s, iter->cpu);
4360 		SEQ_PUT_FIELD(s, iter->ts);
4361 		if (trace_seq_has_overflowed(s))
4362 			return TRACE_TYPE_PARTIAL_LINE;
4363 	}
4364 
4365 	event = ftrace_find_event(entry->type);
4366 	return event ? event->funcs->binary(iter, 0, event) :
4367 		TRACE_TYPE_HANDLED;
4368 }
4369 
trace_empty(struct trace_iterator * iter)4370 int trace_empty(struct trace_iterator *iter)
4371 {
4372 	struct ring_buffer_iter *buf_iter;
4373 	int cpu;
4374 
4375 	/* If we are looking at one CPU buffer, only check that one */
4376 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4377 		cpu = iter->cpu_file;
4378 		buf_iter = trace_buffer_iter(iter, cpu);
4379 		if (buf_iter) {
4380 			if (!ring_buffer_iter_empty(buf_iter))
4381 				return 0;
4382 		} else {
4383 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4384 				return 0;
4385 		}
4386 		return 1;
4387 	}
4388 
4389 	for_each_tracing_cpu(cpu) {
4390 		buf_iter = trace_buffer_iter(iter, cpu);
4391 		if (buf_iter) {
4392 			if (!ring_buffer_iter_empty(buf_iter))
4393 				return 0;
4394 		} else {
4395 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4396 				return 0;
4397 		}
4398 	}
4399 
4400 	return 1;
4401 }
4402 
4403 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4404 enum print_line_t print_trace_line(struct trace_iterator *iter)
4405 {
4406 	struct trace_array *tr = iter->tr;
4407 	unsigned long trace_flags = tr->trace_flags;
4408 	enum print_line_t ret;
4409 
4410 	if (iter->lost_events) {
4411 		if (iter->lost_events == (unsigned long)-1)
4412 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4413 					 iter->cpu);
4414 		else
4415 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4416 					 iter->cpu, iter->lost_events);
4417 		if (trace_seq_has_overflowed(&iter->seq))
4418 			return TRACE_TYPE_PARTIAL_LINE;
4419 	}
4420 
4421 	if (iter->trace && iter->trace->print_line) {
4422 		ret = iter->trace->print_line(iter);
4423 		if (ret != TRACE_TYPE_UNHANDLED)
4424 			return ret;
4425 	}
4426 
4427 	if (iter->ent->type == TRACE_BPUTS &&
4428 			trace_flags & TRACE_ITER(PRINTK) &&
4429 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4430 		return trace_print_bputs_msg_only(iter);
4431 
4432 	if (iter->ent->type == TRACE_BPRINT &&
4433 			trace_flags & TRACE_ITER(PRINTK) &&
4434 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4435 		return trace_print_bprintk_msg_only(iter);
4436 
4437 	if (iter->ent->type == TRACE_PRINT &&
4438 			trace_flags & TRACE_ITER(PRINTK) &&
4439 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4440 		return trace_print_printk_msg_only(iter);
4441 
4442 	if (trace_flags & TRACE_ITER(BIN))
4443 		return print_bin_fmt(iter);
4444 
4445 	if (trace_flags & TRACE_ITER(HEX))
4446 		return print_hex_fmt(iter);
4447 
4448 	if (trace_flags & TRACE_ITER(RAW))
4449 		return print_raw_fmt(iter);
4450 
4451 	return print_trace_fmt(iter);
4452 }
4453 
trace_latency_header(struct seq_file * m)4454 void trace_latency_header(struct seq_file *m)
4455 {
4456 	struct trace_iterator *iter = m->private;
4457 	struct trace_array *tr = iter->tr;
4458 
4459 	/* print nothing if the buffers are empty */
4460 	if (trace_empty(iter))
4461 		return;
4462 
4463 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4464 		print_trace_header(m, iter);
4465 
4466 	if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
4467 		print_lat_help_header(m);
4468 }
4469 
trace_default_header(struct seq_file * m)4470 void trace_default_header(struct seq_file *m)
4471 {
4472 	struct trace_iterator *iter = m->private;
4473 	struct trace_array *tr = iter->tr;
4474 	unsigned long trace_flags = tr->trace_flags;
4475 
4476 	if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
4477 		return;
4478 
4479 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4480 		/* print nothing if the buffers are empty */
4481 		if (trace_empty(iter))
4482 			return;
4483 		print_trace_header(m, iter);
4484 		if (!(trace_flags & TRACE_ITER(VERBOSE)))
4485 			print_lat_help_header(m);
4486 	} else {
4487 		if (!(trace_flags & TRACE_ITER(VERBOSE))) {
4488 			if (trace_flags & TRACE_ITER(IRQ_INFO))
4489 				print_func_help_header_irq(iter->array_buffer,
4490 							   m, trace_flags);
4491 			else
4492 				print_func_help_header(iter->array_buffer, m,
4493 						       trace_flags);
4494 		}
4495 	}
4496 }
4497 
test_ftrace_alive(struct seq_file * m)4498 static void test_ftrace_alive(struct seq_file *m)
4499 {
4500 	if (!ftrace_is_dead())
4501 		return;
4502 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4503 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4504 }
4505 
4506 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4507 static void show_snapshot_main_help(struct seq_file *m)
4508 {
4509 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4510 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4511 		    "#                      Takes a snapshot of the main buffer.\n"
4512 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4513 		    "#                      (Doesn't have to be '2' works with any number that\n"
4514 		    "#                       is not a '0' or '1')\n");
4515 }
4516 
show_snapshot_percpu_help(struct seq_file * m)4517 static void show_snapshot_percpu_help(struct seq_file *m)
4518 {
4519 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4520 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4521 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4522 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4523 #else
4524 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4525 		    "#                     Must use main snapshot file to allocate.\n");
4526 #endif
4527 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4528 		    "#                      (Doesn't have to be '2' works with any number that\n"
4529 		    "#                       is not a '0' or '1')\n");
4530 }
4531 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4532 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4533 {
4534 	if (iter->tr->allocated_snapshot)
4535 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4536 	else
4537 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4538 
4539 	seq_puts(m, "# Snapshot commands:\n");
4540 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4541 		show_snapshot_main_help(m);
4542 	else
4543 		show_snapshot_percpu_help(m);
4544 }
4545 #else
4546 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4547 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4548 #endif
4549 
s_show(struct seq_file * m,void * v)4550 static int s_show(struct seq_file *m, void *v)
4551 {
4552 	struct trace_iterator *iter = v;
4553 	int ret;
4554 
4555 	if (iter->ent == NULL) {
4556 		if (iter->tr) {
4557 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4558 			seq_puts(m, "#\n");
4559 			test_ftrace_alive(m);
4560 		}
4561 		if (iter->snapshot && trace_empty(iter))
4562 			print_snapshot_help(m, iter);
4563 		else if (iter->trace && iter->trace->print_header)
4564 			iter->trace->print_header(m);
4565 		else
4566 			trace_default_header(m);
4567 
4568 	} else if (iter->leftover) {
4569 		/*
4570 		 * If we filled the seq_file buffer earlier, we
4571 		 * want to just show it now.
4572 		 */
4573 		ret = trace_print_seq(m, &iter->seq);
4574 
4575 		/* ret should this time be zero, but you never know */
4576 		iter->leftover = ret;
4577 
4578 	} else {
4579 		ret = print_trace_line(iter);
4580 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4581 			iter->seq.full = 0;
4582 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4583 		}
4584 		ret = trace_print_seq(m, &iter->seq);
4585 		/*
4586 		 * If we overflow the seq_file buffer, then it will
4587 		 * ask us for this data again at start up.
4588 		 * Use that instead.
4589 		 *  ret is 0 if seq_file write succeeded.
4590 		 *        -1 otherwise.
4591 		 */
4592 		iter->leftover = ret;
4593 	}
4594 
4595 	return 0;
4596 }
4597 
4598 /*
4599  * Should be used after trace_array_get(), trace_types_lock
4600  * ensures that i_cdev was already initialized.
4601  */
tracing_get_cpu(struct inode * inode)4602 static inline int tracing_get_cpu(struct inode *inode)
4603 {
4604 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4605 		return (long)inode->i_cdev - 1;
4606 	return RING_BUFFER_ALL_CPUS;
4607 }
4608 
4609 static const struct seq_operations tracer_seq_ops = {
4610 	.start		= s_start,
4611 	.next		= s_next,
4612 	.stop		= s_stop,
4613 	.show		= s_show,
4614 };
4615 
4616 /*
4617  * Note, as iter itself can be allocated and freed in different
4618  * ways, this function is only used to free its content, and not
4619  * the iterator itself. The only requirement to all the allocations
4620  * is that it must zero all fields (kzalloc), as freeing works with
4621  * ethier allocated content or NULL.
4622  */
free_trace_iter_content(struct trace_iterator * iter)4623 static void free_trace_iter_content(struct trace_iterator *iter)
4624 {
4625 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4626 	if (iter->fmt != static_fmt_buf)
4627 		kfree(iter->fmt);
4628 
4629 	kfree(iter->temp);
4630 	kfree(iter->buffer_iter);
4631 	mutex_destroy(&iter->mutex);
4632 	free_cpumask_var(iter->started);
4633 }
4634 
4635 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4636 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4637 {
4638 	struct trace_array *tr = inode->i_private;
4639 	struct trace_iterator *iter;
4640 	int cpu;
4641 
4642 	if (tracing_disabled)
4643 		return ERR_PTR(-ENODEV);
4644 
4645 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4646 	if (!iter)
4647 		return ERR_PTR(-ENOMEM);
4648 
4649 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4650 				    GFP_KERNEL);
4651 	if (!iter->buffer_iter)
4652 		goto release;
4653 
4654 	/*
4655 	 * trace_find_next_entry() may need to save off iter->ent.
4656 	 * It will place it into the iter->temp buffer. As most
4657 	 * events are less than 128, allocate a buffer of that size.
4658 	 * If one is greater, then trace_find_next_entry() will
4659 	 * allocate a new buffer to adjust for the bigger iter->ent.
4660 	 * It's not critical if it fails to get allocated here.
4661 	 */
4662 	iter->temp = kmalloc(128, GFP_KERNEL);
4663 	if (iter->temp)
4664 		iter->temp_size = 128;
4665 
4666 	/*
4667 	 * trace_event_printf() may need to modify given format
4668 	 * string to replace %p with %px so that it shows real address
4669 	 * instead of hash value. However, that is only for the event
4670 	 * tracing, other tracer may not need. Defer the allocation
4671 	 * until it is needed.
4672 	 */
4673 	iter->fmt = NULL;
4674 	iter->fmt_size = 0;
4675 
4676 	mutex_lock(&trace_types_lock);
4677 	iter->trace = tr->current_trace;
4678 
4679 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4680 		goto fail;
4681 
4682 	iter->tr = tr;
4683 
4684 #ifdef CONFIG_TRACER_MAX_TRACE
4685 	/* Currently only the top directory has a snapshot */
4686 	if (tr->current_trace->print_max || snapshot)
4687 		iter->array_buffer = &tr->max_buffer;
4688 	else
4689 #endif
4690 		iter->array_buffer = &tr->array_buffer;
4691 	iter->snapshot = snapshot;
4692 	iter->pos = -1;
4693 	iter->cpu_file = tracing_get_cpu(inode);
4694 	mutex_init(&iter->mutex);
4695 
4696 	/* Notify the tracer early; before we stop tracing. */
4697 	if (iter->trace->open)
4698 		iter->trace->open(iter);
4699 
4700 	/* Annotate start of buffers if we had overruns */
4701 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4702 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4703 
4704 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4705 	if (trace_clocks[tr->clock_id].in_ns)
4706 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4707 
4708 	/*
4709 	 * If pause-on-trace is enabled, then stop the trace while
4710 	 * dumping, unless this is the "snapshot" file
4711 	 */
4712 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE)))
4713 		tracing_stop_tr(tr);
4714 
4715 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4716 		for_each_tracing_cpu(cpu) {
4717 			iter->buffer_iter[cpu] =
4718 				ring_buffer_read_start(iter->array_buffer->buffer,
4719 						       cpu, GFP_KERNEL);
4720 			tracing_iter_reset(iter, cpu);
4721 		}
4722 	} else {
4723 		cpu = iter->cpu_file;
4724 		iter->buffer_iter[cpu] =
4725 			ring_buffer_read_start(iter->array_buffer->buffer,
4726 					       cpu, GFP_KERNEL);
4727 		tracing_iter_reset(iter, cpu);
4728 	}
4729 
4730 	mutex_unlock(&trace_types_lock);
4731 
4732 	return iter;
4733 
4734  fail:
4735 	mutex_unlock(&trace_types_lock);
4736 	free_trace_iter_content(iter);
4737 release:
4738 	seq_release_private(inode, file);
4739 	return ERR_PTR(-ENOMEM);
4740 }
4741 
tracing_open_generic(struct inode * inode,struct file * filp)4742 int tracing_open_generic(struct inode *inode, struct file *filp)
4743 {
4744 	int ret;
4745 
4746 	ret = tracing_check_open_get_tr(NULL);
4747 	if (ret)
4748 		return ret;
4749 
4750 	filp->private_data = inode->i_private;
4751 	return 0;
4752 }
4753 
tracing_is_disabled(void)4754 bool tracing_is_disabled(void)
4755 {
4756 	return (tracing_disabled) ? true: false;
4757 }
4758 
4759 /*
4760  * Open and update trace_array ref count.
4761  * Must have the current trace_array passed to it.
4762  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4763 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4764 {
4765 	struct trace_array *tr = inode->i_private;
4766 	int ret;
4767 
4768 	ret = tracing_check_open_get_tr(tr);
4769 	if (ret)
4770 		return ret;
4771 
4772 	filp->private_data = inode->i_private;
4773 
4774 	return 0;
4775 }
4776 
4777 /*
4778  * The private pointer of the inode is the trace_event_file.
4779  * Update the tr ref count associated to it.
4780  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4781 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4782 {
4783 	struct trace_event_file *file = inode->i_private;
4784 	int ret;
4785 
4786 	ret = tracing_check_open_get_tr(file->tr);
4787 	if (ret)
4788 		return ret;
4789 
4790 	guard(mutex)(&event_mutex);
4791 
4792 	/* Fail if the file is marked for removal */
4793 	if (file->flags & EVENT_FILE_FL_FREED) {
4794 		trace_array_put(file->tr);
4795 		return -ENODEV;
4796 	} else {
4797 		event_file_get(file);
4798 	}
4799 
4800 	filp->private_data = inode->i_private;
4801 
4802 	return 0;
4803 }
4804 
tracing_release_file_tr(struct inode * inode,struct file * filp)4805 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4806 {
4807 	struct trace_event_file *file = inode->i_private;
4808 
4809 	trace_array_put(file->tr);
4810 	event_file_put(file);
4811 
4812 	return 0;
4813 }
4814 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4815 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4816 {
4817 	tracing_release_file_tr(inode, filp);
4818 	return single_release(inode, filp);
4819 }
4820 
tracing_release(struct inode * inode,struct file * file)4821 static int tracing_release(struct inode *inode, struct file *file)
4822 {
4823 	struct trace_array *tr = inode->i_private;
4824 	struct seq_file *m = file->private_data;
4825 	struct trace_iterator *iter;
4826 	int cpu;
4827 
4828 	if (!(file->f_mode & FMODE_READ)) {
4829 		trace_array_put(tr);
4830 		return 0;
4831 	}
4832 
4833 	/* Writes do not use seq_file */
4834 	iter = m->private;
4835 	mutex_lock(&trace_types_lock);
4836 
4837 	for_each_tracing_cpu(cpu) {
4838 		if (iter->buffer_iter[cpu])
4839 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4840 	}
4841 
4842 	if (iter->trace && iter->trace->close)
4843 		iter->trace->close(iter);
4844 
4845 	if (!iter->snapshot && tr->stop_count)
4846 		/* reenable tracing if it was previously enabled */
4847 		tracing_start_tr(tr);
4848 
4849 	__trace_array_put(tr);
4850 
4851 	mutex_unlock(&trace_types_lock);
4852 
4853 	free_trace_iter_content(iter);
4854 	seq_release_private(inode, file);
4855 
4856 	return 0;
4857 }
4858 
tracing_release_generic_tr(struct inode * inode,struct file * file)4859 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4860 {
4861 	struct trace_array *tr = inode->i_private;
4862 
4863 	trace_array_put(tr);
4864 	return 0;
4865 }
4866 
tracing_single_release_tr(struct inode * inode,struct file * file)4867 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4868 {
4869 	struct trace_array *tr = inode->i_private;
4870 
4871 	trace_array_put(tr);
4872 
4873 	return single_release(inode, file);
4874 }
4875 
tracing_open(struct inode * inode,struct file * file)4876 static int tracing_open(struct inode *inode, struct file *file)
4877 {
4878 	struct trace_array *tr = inode->i_private;
4879 	struct trace_iterator *iter;
4880 	int ret;
4881 
4882 	ret = tracing_check_open_get_tr(tr);
4883 	if (ret)
4884 		return ret;
4885 
4886 	/* If this file was open for write, then erase contents */
4887 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4888 		int cpu = tracing_get_cpu(inode);
4889 		struct array_buffer *trace_buf = &tr->array_buffer;
4890 
4891 #ifdef CONFIG_TRACER_MAX_TRACE
4892 		if (tr->current_trace->print_max)
4893 			trace_buf = &tr->max_buffer;
4894 #endif
4895 
4896 		if (cpu == RING_BUFFER_ALL_CPUS)
4897 			tracing_reset_online_cpus(trace_buf);
4898 		else
4899 			tracing_reset_cpu(trace_buf, cpu);
4900 	}
4901 
4902 	if (file->f_mode & FMODE_READ) {
4903 		iter = __tracing_open(inode, file, false);
4904 		if (IS_ERR(iter))
4905 			ret = PTR_ERR(iter);
4906 		else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
4907 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4908 	}
4909 
4910 	if (ret < 0)
4911 		trace_array_put(tr);
4912 
4913 	return ret;
4914 }
4915 
4916 /*
4917  * Some tracers are not suitable for instance buffers.
4918  * A tracer is always available for the global array (toplevel)
4919  * or if it explicitly states that it is.
4920  */
4921 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4922 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4923 {
4924 #ifdef CONFIG_TRACER_SNAPSHOT
4925 	/* arrays with mapped buffer range do not have snapshots */
4926 	if (tr->range_addr_start && t->use_max_tr)
4927 		return false;
4928 #endif
4929 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4930 }
4931 
4932 /* Find the next tracer that this trace array may use */
4933 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4934 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4935 {
4936 	while (t && !trace_ok_for_array(t, tr))
4937 		t = t->next;
4938 
4939 	return t;
4940 }
4941 
4942 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4943 t_next(struct seq_file *m, void *v, loff_t *pos)
4944 {
4945 	struct trace_array *tr = m->private;
4946 	struct tracer *t = v;
4947 
4948 	(*pos)++;
4949 
4950 	if (t)
4951 		t = get_tracer_for_array(tr, t->next);
4952 
4953 	return t;
4954 }
4955 
t_start(struct seq_file * m,loff_t * pos)4956 static void *t_start(struct seq_file *m, loff_t *pos)
4957 {
4958 	struct trace_array *tr = m->private;
4959 	struct tracer *t;
4960 	loff_t l = 0;
4961 
4962 	mutex_lock(&trace_types_lock);
4963 
4964 	t = get_tracer_for_array(tr, trace_types);
4965 	for (; t && l < *pos; t = t_next(m, t, &l))
4966 			;
4967 
4968 	return t;
4969 }
4970 
t_stop(struct seq_file * m,void * p)4971 static void t_stop(struct seq_file *m, void *p)
4972 {
4973 	mutex_unlock(&trace_types_lock);
4974 }
4975 
t_show(struct seq_file * m,void * v)4976 static int t_show(struct seq_file *m, void *v)
4977 {
4978 	struct tracer *t = v;
4979 
4980 	if (!t)
4981 		return 0;
4982 
4983 	seq_puts(m, t->name);
4984 	if (t->next)
4985 		seq_putc(m, ' ');
4986 	else
4987 		seq_putc(m, '\n');
4988 
4989 	return 0;
4990 }
4991 
4992 static const struct seq_operations show_traces_seq_ops = {
4993 	.start		= t_start,
4994 	.next		= t_next,
4995 	.stop		= t_stop,
4996 	.show		= t_show,
4997 };
4998 
show_traces_open(struct inode * inode,struct file * file)4999 static int show_traces_open(struct inode *inode, struct file *file)
5000 {
5001 	struct trace_array *tr = inode->i_private;
5002 	struct seq_file *m;
5003 	int ret;
5004 
5005 	ret = tracing_check_open_get_tr(tr);
5006 	if (ret)
5007 		return ret;
5008 
5009 	ret = seq_open(file, &show_traces_seq_ops);
5010 	if (ret) {
5011 		trace_array_put(tr);
5012 		return ret;
5013 	}
5014 
5015 	m = file->private_data;
5016 	m->private = tr;
5017 
5018 	return 0;
5019 }
5020 
tracing_seq_release(struct inode * inode,struct file * file)5021 static int tracing_seq_release(struct inode *inode, struct file *file)
5022 {
5023 	struct trace_array *tr = inode->i_private;
5024 
5025 	trace_array_put(tr);
5026 	return seq_release(inode, file);
5027 }
5028 
5029 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5030 tracing_write_stub(struct file *filp, const char __user *ubuf,
5031 		   size_t count, loff_t *ppos)
5032 {
5033 	return count;
5034 }
5035 
tracing_lseek(struct file * file,loff_t offset,int whence)5036 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5037 {
5038 	int ret;
5039 
5040 	if (file->f_mode & FMODE_READ)
5041 		ret = seq_lseek(file, offset, whence);
5042 	else
5043 		file->f_pos = ret = 0;
5044 
5045 	return ret;
5046 }
5047 
5048 static const struct file_operations tracing_fops = {
5049 	.open		= tracing_open,
5050 	.read		= seq_read,
5051 	.read_iter	= seq_read_iter,
5052 	.splice_read	= copy_splice_read,
5053 	.write		= tracing_write_stub,
5054 	.llseek		= tracing_lseek,
5055 	.release	= tracing_release,
5056 };
5057 
5058 static const struct file_operations show_traces_fops = {
5059 	.open		= show_traces_open,
5060 	.read		= seq_read,
5061 	.llseek		= seq_lseek,
5062 	.release	= tracing_seq_release,
5063 };
5064 
5065 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5066 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5067 		     size_t count, loff_t *ppos)
5068 {
5069 	struct trace_array *tr = file_inode(filp)->i_private;
5070 	char *mask_str __free(kfree) = NULL;
5071 	int len;
5072 
5073 	len = snprintf(NULL, 0, "%*pb\n",
5074 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5075 	mask_str = kmalloc(len, GFP_KERNEL);
5076 	if (!mask_str)
5077 		return -ENOMEM;
5078 
5079 	len = snprintf(mask_str, len, "%*pb\n",
5080 		       cpumask_pr_args(tr->tracing_cpumask));
5081 	if (len >= count)
5082 		return -EINVAL;
5083 
5084 	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5085 }
5086 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5087 int tracing_set_cpumask(struct trace_array *tr,
5088 			cpumask_var_t tracing_cpumask_new)
5089 {
5090 	int cpu;
5091 
5092 	if (!tr)
5093 		return -EINVAL;
5094 
5095 	local_irq_disable();
5096 	arch_spin_lock(&tr->max_lock);
5097 	for_each_tracing_cpu(cpu) {
5098 		/*
5099 		 * Increase/decrease the disabled counter if we are
5100 		 * about to flip a bit in the cpumask:
5101 		 */
5102 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5103 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5104 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5105 #ifdef CONFIG_TRACER_MAX_TRACE
5106 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5107 #endif
5108 		}
5109 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5110 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5111 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5112 #ifdef CONFIG_TRACER_MAX_TRACE
5113 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5114 #endif
5115 		}
5116 	}
5117 	arch_spin_unlock(&tr->max_lock);
5118 	local_irq_enable();
5119 
5120 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5121 
5122 	return 0;
5123 }
5124 
5125 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5126 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5127 		      size_t count, loff_t *ppos)
5128 {
5129 	struct trace_array *tr = file_inode(filp)->i_private;
5130 	cpumask_var_t tracing_cpumask_new;
5131 	int err;
5132 
5133 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5134 		return -EINVAL;
5135 
5136 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5137 		return -ENOMEM;
5138 
5139 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5140 	if (err)
5141 		goto err_free;
5142 
5143 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5144 	if (err)
5145 		goto err_free;
5146 
5147 	free_cpumask_var(tracing_cpumask_new);
5148 
5149 	return count;
5150 
5151 err_free:
5152 	free_cpumask_var(tracing_cpumask_new);
5153 
5154 	return err;
5155 }
5156 
5157 static const struct file_operations tracing_cpumask_fops = {
5158 	.open		= tracing_open_generic_tr,
5159 	.read		= tracing_cpumask_read,
5160 	.write		= tracing_cpumask_write,
5161 	.release	= tracing_release_generic_tr,
5162 	.llseek		= generic_file_llseek,
5163 };
5164 
tracing_trace_options_show(struct seq_file * m,void * v)5165 static int tracing_trace_options_show(struct seq_file *m, void *v)
5166 {
5167 	struct tracer_opt *trace_opts;
5168 	struct trace_array *tr = m->private;
5169 	struct tracer_flags *flags;
5170 	u32 tracer_flags;
5171 	int i;
5172 
5173 	guard(mutex)(&trace_types_lock);
5174 
5175 	for (i = 0; trace_options[i]; i++) {
5176 		if (tr->trace_flags & (1ULL << i))
5177 			seq_printf(m, "%s\n", trace_options[i]);
5178 		else
5179 			seq_printf(m, "no%s\n", trace_options[i]);
5180 	}
5181 
5182 	flags = tr->current_trace_flags;
5183 	if (!flags || !flags->opts)
5184 		return 0;
5185 
5186 	tracer_flags = flags->val;
5187 	trace_opts = flags->opts;
5188 
5189 	for (i = 0; trace_opts[i].name; i++) {
5190 		if (tracer_flags & trace_opts[i].bit)
5191 			seq_printf(m, "%s\n", trace_opts[i].name);
5192 		else
5193 			seq_printf(m, "no%s\n", trace_opts[i].name);
5194 	}
5195 
5196 	return 0;
5197 }
5198 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5199 static int __set_tracer_option(struct trace_array *tr,
5200 			       struct tracer_flags *tracer_flags,
5201 			       struct tracer_opt *opts, int neg)
5202 {
5203 	struct tracer *trace = tracer_flags->trace;
5204 	int ret = 0;
5205 
5206 	if (trace->set_flag)
5207 		ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5208 	if (ret)
5209 		return ret;
5210 
5211 	if (neg)
5212 		tracer_flags->val &= ~opts->bit;
5213 	else
5214 		tracer_flags->val |= opts->bit;
5215 	return 0;
5216 }
5217 
5218 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5219 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5220 {
5221 	struct tracer_flags *tracer_flags = tr->current_trace_flags;
5222 	struct tracer_opt *opts = NULL;
5223 	int i;
5224 
5225 	if (!tracer_flags || !tracer_flags->opts)
5226 		return 0;
5227 
5228 	for (i = 0; tracer_flags->opts[i].name; i++) {
5229 		opts = &tracer_flags->opts[i];
5230 
5231 		if (strcmp(cmp, opts->name) == 0)
5232 			return __set_tracer_option(tr, tracer_flags, opts, neg);
5233 	}
5234 
5235 	return -EINVAL;
5236 }
5237 
5238 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u64 mask,int set)5239 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
5240 {
5241 	if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
5242 		return -1;
5243 
5244 	return 0;
5245 }
5246 
set_tracer_flag(struct trace_array * tr,u64 mask,int enabled)5247 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
5248 {
5249 	switch (mask) {
5250 	case TRACE_ITER(RECORD_TGID):
5251 	case TRACE_ITER(RECORD_CMD):
5252 	case TRACE_ITER(TRACE_PRINTK):
5253 	case TRACE_ITER(COPY_MARKER):
5254 		lockdep_assert_held(&event_mutex);
5255 	}
5256 
5257 	/* do nothing if flag is already set */
5258 	if (!!(tr->trace_flags & mask) == !!enabled)
5259 		return 0;
5260 
5261 	/* Give the tracer a chance to approve the change */
5262 	if (tr->current_trace->flag_changed)
5263 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5264 			return -EINVAL;
5265 
5266 	switch (mask) {
5267 	case TRACE_ITER(TRACE_PRINTK):
5268 		if (enabled) {
5269 			update_printk_trace(tr);
5270 		} else {
5271 			/*
5272 			 * The global_trace cannot clear this.
5273 			 * It's flag only gets cleared if another instance sets it.
5274 			 */
5275 			if (printk_trace == &global_trace)
5276 				return -EINVAL;
5277 			/*
5278 			 * An instance must always have it set.
5279 			 * by default, that's the global_trace instane.
5280 			 */
5281 			if (printk_trace == tr)
5282 				update_printk_trace(&global_trace);
5283 		}
5284 		break;
5285 
5286 	case TRACE_ITER(COPY_MARKER):
5287 		update_marker_trace(tr, enabled);
5288 		/* update_marker_trace updates the tr->trace_flags */
5289 		return 0;
5290 	}
5291 
5292 	if (enabled)
5293 		tr->trace_flags |= mask;
5294 	else
5295 		tr->trace_flags &= ~mask;
5296 
5297 	switch (mask) {
5298 	case TRACE_ITER(RECORD_CMD):
5299 		trace_event_enable_cmd_record(enabled);
5300 		break;
5301 
5302 	case TRACE_ITER(RECORD_TGID):
5303 
5304 		if (trace_alloc_tgid_map() < 0) {
5305 			tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
5306 			return -ENOMEM;
5307 		}
5308 
5309 		trace_event_enable_tgid_record(enabled);
5310 		break;
5311 
5312 	case TRACE_ITER(EVENT_FORK):
5313 		trace_event_follow_fork(tr, enabled);
5314 		break;
5315 
5316 	case TRACE_ITER(FUNC_FORK):
5317 		ftrace_pid_follow_fork(tr, enabled);
5318 		break;
5319 
5320 	case TRACE_ITER(OVERWRITE):
5321 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5322 #ifdef CONFIG_TRACER_MAX_TRACE
5323 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5324 #endif
5325 		break;
5326 
5327 	case TRACE_ITER(PRINTK):
5328 		trace_printk_start_stop_comm(enabled);
5329 		trace_printk_control(enabled);
5330 		break;
5331 
5332 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
5333 	case TRACE_GRAPH_GRAPH_TIME:
5334 		ftrace_graph_graph_time_control(enabled);
5335 		break;
5336 #endif
5337 	}
5338 
5339 	return 0;
5340 }
5341 
trace_set_options(struct trace_array * tr,char * option)5342 int trace_set_options(struct trace_array *tr, char *option)
5343 {
5344 	char *cmp;
5345 	int neg = 0;
5346 	int ret;
5347 	size_t orig_len = strlen(option);
5348 	int len;
5349 
5350 	cmp = strstrip(option);
5351 
5352 	len = str_has_prefix(cmp, "no");
5353 	if (len)
5354 		neg = 1;
5355 
5356 	cmp += len;
5357 
5358 	mutex_lock(&event_mutex);
5359 	mutex_lock(&trace_types_lock);
5360 
5361 	ret = match_string(trace_options, -1, cmp);
5362 	/* If no option could be set, test the specific tracer options */
5363 	if (ret < 0)
5364 		ret = set_tracer_option(tr, cmp, neg);
5365 	else
5366 		ret = set_tracer_flag(tr, 1ULL << ret, !neg);
5367 
5368 	mutex_unlock(&trace_types_lock);
5369 	mutex_unlock(&event_mutex);
5370 
5371 	/*
5372 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5373 	 * turn it back into a space.
5374 	 */
5375 	if (orig_len > strlen(option))
5376 		option[strlen(option)] = ' ';
5377 
5378 	return ret;
5379 }
5380 
apply_trace_boot_options(void)5381 static void __init apply_trace_boot_options(void)
5382 {
5383 	char *buf = trace_boot_options_buf;
5384 	char *option;
5385 
5386 	while (true) {
5387 		option = strsep(&buf, ",");
5388 
5389 		if (!option)
5390 			break;
5391 
5392 		if (*option)
5393 			trace_set_options(&global_trace, option);
5394 
5395 		/* Put back the comma to allow this to be called again */
5396 		if (buf)
5397 			*(buf - 1) = ',';
5398 	}
5399 }
5400 
5401 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5402 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5403 			size_t cnt, loff_t *ppos)
5404 {
5405 	struct seq_file *m = filp->private_data;
5406 	struct trace_array *tr = m->private;
5407 	char buf[64];
5408 	int ret;
5409 
5410 	if (cnt >= sizeof(buf))
5411 		return -EINVAL;
5412 
5413 	if (copy_from_user(buf, ubuf, cnt))
5414 		return -EFAULT;
5415 
5416 	buf[cnt] = 0;
5417 
5418 	ret = trace_set_options(tr, buf);
5419 	if (ret < 0)
5420 		return ret;
5421 
5422 	*ppos += cnt;
5423 
5424 	return cnt;
5425 }
5426 
tracing_trace_options_open(struct inode * inode,struct file * file)5427 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5428 {
5429 	struct trace_array *tr = inode->i_private;
5430 	int ret;
5431 
5432 	ret = tracing_check_open_get_tr(tr);
5433 	if (ret)
5434 		return ret;
5435 
5436 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5437 	if (ret < 0)
5438 		trace_array_put(tr);
5439 
5440 	return ret;
5441 }
5442 
5443 static const struct file_operations tracing_iter_fops = {
5444 	.open		= tracing_trace_options_open,
5445 	.read		= seq_read,
5446 	.llseek		= seq_lseek,
5447 	.release	= tracing_single_release_tr,
5448 	.write		= tracing_trace_options_write,
5449 };
5450 
5451 static const char readme_msg[] =
5452 	"tracing mini-HOWTO:\n\n"
5453 	"By default tracefs removes all OTH file permission bits.\n"
5454 	"When mounting tracefs an optional group id can be specified\n"
5455 	"which adds the group to every directory and file in tracefs:\n\n"
5456 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5457 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5458 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5459 	" Important files:\n"
5460 	"  trace\t\t\t- The static contents of the buffer\n"
5461 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5462 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5463 	"  current_tracer\t- function and latency tracers\n"
5464 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5465 	"  error_log\t- error log for failed commands (that support it)\n"
5466 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5467 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5468 	"  trace_clock\t\t- change the clock used to order events\n"
5469 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5470 	"      global:   Synced across CPUs but slows tracing down.\n"
5471 	"     counter:   Not a clock, but just an increment\n"
5472 	"      uptime:   Jiffy counter from time of boot\n"
5473 	"        perf:   Same clock that perf events use\n"
5474 #ifdef CONFIG_X86_64
5475 	"     x86-tsc:   TSC cycle counter\n"
5476 #endif
5477 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5478 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5479 	"    absolute:   Absolute (standalone) timestamp\n"
5480 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5481 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5482 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5483 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5484 	"\t\t\t  Remove sub-buffer with rmdir\n"
5485 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5486 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5487 	"\t\t\t  option name\n"
5488 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5489 #ifdef CONFIG_DYNAMIC_FTRACE
5490 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5491 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5492 	"\t\t\t  functions\n"
5493 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5494 	"\t     modules: Can select a group via module\n"
5495 	"\t      Format: :mod:<module-name>\n"
5496 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5497 	"\t    triggers: a command to perform when function is hit\n"
5498 	"\t      Format: <function>:<trigger>[:count]\n"
5499 	"\t     trigger: traceon, traceoff\n"
5500 	"\t\t      enable_event:<system>:<event>\n"
5501 	"\t\t      disable_event:<system>:<event>\n"
5502 #ifdef CONFIG_STACKTRACE
5503 	"\t\t      stacktrace\n"
5504 #endif
5505 #ifdef CONFIG_TRACER_SNAPSHOT
5506 	"\t\t      snapshot\n"
5507 #endif
5508 	"\t\t      dump\n"
5509 	"\t\t      cpudump\n"
5510 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5511 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5512 	"\t     The first one will disable tracing every time do_fault is hit\n"
5513 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5514 	"\t       The first time do trap is hit and it disables tracing, the\n"
5515 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5516 	"\t       the counter will not decrement. It only decrements when the\n"
5517 	"\t       trigger did work\n"
5518 	"\t     To remove trigger without count:\n"
5519 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5520 	"\t     To remove trigger with a count:\n"
5521 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5522 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5523 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5524 	"\t    modules: Can select a group via module command :mod:\n"
5525 	"\t    Does not accept triggers\n"
5526 #endif /* CONFIG_DYNAMIC_FTRACE */
5527 #ifdef CONFIG_FUNCTION_TRACER
5528 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5529 	"\t\t    (function)\n"
5530 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5531 	"\t\t    (function)\n"
5532 #endif
5533 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5534 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5535 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5536 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5537 #endif
5538 #ifdef CONFIG_TRACER_SNAPSHOT
5539 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5540 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5541 	"\t\t\t  information\n"
5542 #endif
5543 #ifdef CONFIG_STACK_TRACER
5544 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5545 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5546 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5547 	"\t\t\t  new trace)\n"
5548 #ifdef CONFIG_DYNAMIC_FTRACE
5549 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5550 	"\t\t\t  traces\n"
5551 #endif
5552 #endif /* CONFIG_STACK_TRACER */
5553 #ifdef CONFIG_DYNAMIC_EVENTS
5554 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5555 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5556 #endif
5557 #ifdef CONFIG_KPROBE_EVENTS
5558 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5559 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5560 #endif
5561 #ifdef CONFIG_UPROBE_EVENTS
5562 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5563 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5564 #endif
5565 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5566     defined(CONFIG_FPROBE_EVENTS)
5567 	"\t  accepts: event-definitions (one definition per line)\n"
5568 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5569 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5570 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5571 #endif
5572 #ifdef CONFIG_FPROBE_EVENTS
5573 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5574 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5575 #endif
5576 #ifdef CONFIG_HIST_TRIGGERS
5577 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5578 #endif
5579 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5580 	"\t           -:[<group>/][<event>]\n"
5581 #ifdef CONFIG_KPROBE_EVENTS
5582 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5583   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5584 #endif
5585 #ifdef CONFIG_UPROBE_EVENTS
5586   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5587 #endif
5588 	"\t     args: <name>=fetcharg[:type]\n"
5589 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5590 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5591 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5592 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5593 	"\t           <argname>[->field[->field|.field...]],\n"
5594 #endif
5595 #else
5596 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5597 #endif
5598 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5599 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5600 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5601 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5602 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5603 #ifdef CONFIG_HIST_TRIGGERS
5604 	"\t    field: <stype> <name>;\n"
5605 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5606 	"\t           [unsigned] char/int/long\n"
5607 #endif
5608 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5609 	"\t            of the <attached-group>/<attached-event>.\n"
5610 #endif
5611 	"  set_event\t\t- Enables events by name written into it\n"
5612 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5613 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5614 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5615 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5616 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5617 	"\t\t\t  events\n"
5618 	"      filter\t\t- If set, only events passing filter are traced\n"
5619 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5620 	"\t\t\t  <event>:\n"
5621 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5622 	"      filter\t\t- If set, only events passing filter are traced\n"
5623 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5624 	"\t    Format: <trigger>[:count][if <filter>]\n"
5625 	"\t   trigger: traceon, traceoff\n"
5626 	"\t            enable_event:<system>:<event>\n"
5627 	"\t            disable_event:<system>:<event>\n"
5628 #ifdef CONFIG_HIST_TRIGGERS
5629 	"\t            enable_hist:<system>:<event>\n"
5630 	"\t            disable_hist:<system>:<event>\n"
5631 #endif
5632 #ifdef CONFIG_STACKTRACE
5633 	"\t\t    stacktrace\n"
5634 #endif
5635 #ifdef CONFIG_TRACER_SNAPSHOT
5636 	"\t\t    snapshot\n"
5637 #endif
5638 #ifdef CONFIG_HIST_TRIGGERS
5639 	"\t\t    hist (see below)\n"
5640 #endif
5641 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5642 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5643 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5644 	"\t                  events/block/block_unplug/trigger\n"
5645 	"\t   The first disables tracing every time block_unplug is hit.\n"
5646 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5647 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5648 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5649 	"\t   Like function triggers, the counter is only decremented if it\n"
5650 	"\t    enabled or disabled tracing.\n"
5651 	"\t   To remove a trigger without a count:\n"
5652 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5653 	"\t   To remove a trigger with a count:\n"
5654 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5655 	"\t   Filters can be ignored when removing a trigger.\n"
5656 #ifdef CONFIG_HIST_TRIGGERS
5657 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5658 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5659 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5660 	"\t            [:values=<field1[,field2,...]>]\n"
5661 	"\t            [:sort=<field1[,field2,...]>]\n"
5662 	"\t            [:size=#entries]\n"
5663 	"\t            [:pause][:continue][:clear]\n"
5664 	"\t            [:name=histname1]\n"
5665 	"\t            [:nohitcount]\n"
5666 	"\t            [:<handler>.<action>]\n"
5667 	"\t            [if <filter>]\n\n"
5668 	"\t    Note, special fields can be used as well:\n"
5669 	"\t            common_timestamp - to record current timestamp\n"
5670 	"\t            common_cpu - to record the CPU the event happened on\n"
5671 	"\n"
5672 	"\t    A hist trigger variable can be:\n"
5673 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5674 	"\t        - a reference to another variable e.g. y=$x,\n"
5675 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5676 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5677 	"\n"
5678 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5679 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5680 	"\t    variable reference, field or numeric literal.\n"
5681 	"\n"
5682 	"\t    When a matching event is hit, an entry is added to a hash\n"
5683 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5684 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5685 	"\t    correspond to fields in the event's format description.  Keys\n"
5686 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5687 	"\t    Compound keys consisting of up to two fields can be specified\n"
5688 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5689 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5690 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5691 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5692 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5693 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5694 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5695 	"\t    its histogram data will be shared with other triggers of the\n"
5696 	"\t    same name, and trigger hits will update this common data.\n\n"
5697 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5698 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5699 	"\t    triggers attached to an event, there will be a table for each\n"
5700 	"\t    trigger in the output.  The table displayed for a named\n"
5701 	"\t    trigger will be the same as any other instance having the\n"
5702 	"\t    same name.  The default format used to display a given field\n"
5703 	"\t    can be modified by appending any of the following modifiers\n"
5704 	"\t    to the field name, as applicable:\n\n"
5705 	"\t            .hex        display a number as a hex value\n"
5706 	"\t            .sym        display an address as a symbol\n"
5707 	"\t            .sym-offset display an address as a symbol and offset\n"
5708 	"\t            .execname   display a common_pid as a program name\n"
5709 	"\t            .syscall    display a syscall id as a syscall name\n"
5710 	"\t            .log2       display log2 value rather than raw number\n"
5711 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5712 	"\t            .usecs      display a common_timestamp in microseconds\n"
5713 	"\t            .percent    display a number of percentage value\n"
5714 	"\t            .graph      display a bar-graph of a value\n\n"
5715 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5716 	"\t    trigger or to start a hist trigger but not log any events\n"
5717 	"\t    until told to do so.  'continue' can be used to start or\n"
5718 	"\t    restart a paused hist trigger.\n\n"
5719 	"\t    The 'clear' parameter will clear the contents of a running\n"
5720 	"\t    hist trigger and leave its current paused/active state\n"
5721 	"\t    unchanged.\n\n"
5722 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5723 	"\t    raw hitcount in the histogram.\n\n"
5724 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5725 	"\t    have one event conditionally start and stop another event's\n"
5726 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5727 	"\t    the enable_event and disable_event triggers.\n\n"
5728 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5729 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5730 	"\t        <handler>.<action>\n\n"
5731 	"\t    The available handlers are:\n\n"
5732 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5733 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5734 	"\t        onchange(var)            - invoke action if var changes\n\n"
5735 	"\t    The available actions are:\n\n"
5736 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5737 	"\t        save(field,...)                      - save current event fields\n"
5738 #ifdef CONFIG_TRACER_SNAPSHOT
5739 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5740 #endif
5741 #ifdef CONFIG_SYNTH_EVENTS
5742 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5743 	"\t  Write into this file to define/undefine new synthetic events.\n"
5744 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5745 #endif
5746 #endif
5747 ;
5748 
5749 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5750 tracing_readme_read(struct file *filp, char __user *ubuf,
5751 		       size_t cnt, loff_t *ppos)
5752 {
5753 	return simple_read_from_buffer(ubuf, cnt, ppos,
5754 					readme_msg, strlen(readme_msg));
5755 }
5756 
5757 static const struct file_operations tracing_readme_fops = {
5758 	.open		= tracing_open_generic,
5759 	.read		= tracing_readme_read,
5760 	.llseek		= generic_file_llseek,
5761 };
5762 
5763 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5764 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5765 update_eval_map(union trace_eval_map_item *ptr)
5766 {
5767 	if (!ptr->map.eval_string) {
5768 		if (ptr->tail.next) {
5769 			ptr = ptr->tail.next;
5770 			/* Set ptr to the next real item (skip head) */
5771 			ptr++;
5772 		} else
5773 			return NULL;
5774 	}
5775 	return ptr;
5776 }
5777 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5778 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5779 {
5780 	union trace_eval_map_item *ptr = v;
5781 
5782 	/*
5783 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5784 	 * This really should never happen.
5785 	 */
5786 	(*pos)++;
5787 	ptr = update_eval_map(ptr);
5788 	if (WARN_ON_ONCE(!ptr))
5789 		return NULL;
5790 
5791 	ptr++;
5792 	ptr = update_eval_map(ptr);
5793 
5794 	return ptr;
5795 }
5796 
eval_map_start(struct seq_file * m,loff_t * pos)5797 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5798 {
5799 	union trace_eval_map_item *v;
5800 	loff_t l = 0;
5801 
5802 	mutex_lock(&trace_eval_mutex);
5803 
5804 	v = trace_eval_maps;
5805 	if (v)
5806 		v++;
5807 
5808 	while (v && l < *pos) {
5809 		v = eval_map_next(m, v, &l);
5810 	}
5811 
5812 	return v;
5813 }
5814 
eval_map_stop(struct seq_file * m,void * v)5815 static void eval_map_stop(struct seq_file *m, void *v)
5816 {
5817 	mutex_unlock(&trace_eval_mutex);
5818 }
5819 
eval_map_show(struct seq_file * m,void * v)5820 static int eval_map_show(struct seq_file *m, void *v)
5821 {
5822 	union trace_eval_map_item *ptr = v;
5823 
5824 	seq_printf(m, "%s %ld (%s)\n",
5825 		   ptr->map.eval_string, ptr->map.eval_value,
5826 		   ptr->map.system);
5827 
5828 	return 0;
5829 }
5830 
5831 static const struct seq_operations tracing_eval_map_seq_ops = {
5832 	.start		= eval_map_start,
5833 	.next		= eval_map_next,
5834 	.stop		= eval_map_stop,
5835 	.show		= eval_map_show,
5836 };
5837 
tracing_eval_map_open(struct inode * inode,struct file * filp)5838 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5839 {
5840 	int ret;
5841 
5842 	ret = tracing_check_open_get_tr(NULL);
5843 	if (ret)
5844 		return ret;
5845 
5846 	return seq_open(filp, &tracing_eval_map_seq_ops);
5847 }
5848 
5849 static const struct file_operations tracing_eval_map_fops = {
5850 	.open		= tracing_eval_map_open,
5851 	.read		= seq_read,
5852 	.llseek		= seq_lseek,
5853 	.release	= seq_release,
5854 };
5855 
5856 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5857 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5858 {
5859 	/* Return tail of array given the head */
5860 	return ptr + ptr->head.length + 1;
5861 }
5862 
5863 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5864 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5865 			   int len)
5866 {
5867 	struct trace_eval_map **stop;
5868 	struct trace_eval_map **map;
5869 	union trace_eval_map_item *map_array;
5870 	union trace_eval_map_item *ptr;
5871 
5872 	stop = start + len;
5873 
5874 	/*
5875 	 * The trace_eval_maps contains the map plus a head and tail item,
5876 	 * where the head holds the module and length of array, and the
5877 	 * tail holds a pointer to the next list.
5878 	 */
5879 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5880 	if (!map_array) {
5881 		pr_warn("Unable to allocate trace eval mapping\n");
5882 		return;
5883 	}
5884 
5885 	guard(mutex)(&trace_eval_mutex);
5886 
5887 	if (!trace_eval_maps)
5888 		trace_eval_maps = map_array;
5889 	else {
5890 		ptr = trace_eval_maps;
5891 		for (;;) {
5892 			ptr = trace_eval_jmp_to_tail(ptr);
5893 			if (!ptr->tail.next)
5894 				break;
5895 			ptr = ptr->tail.next;
5896 
5897 		}
5898 		ptr->tail.next = map_array;
5899 	}
5900 	map_array->head.mod = mod;
5901 	map_array->head.length = len;
5902 	map_array++;
5903 
5904 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5905 		map_array->map = **map;
5906 		map_array++;
5907 	}
5908 	memset(map_array, 0, sizeof(*map_array));
5909 }
5910 
trace_create_eval_file(struct dentry * d_tracer)5911 static void trace_create_eval_file(struct dentry *d_tracer)
5912 {
5913 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5914 			  NULL, &tracing_eval_map_fops);
5915 }
5916 
5917 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5918 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5919 static inline void trace_insert_eval_map_file(struct module *mod,
5920 			      struct trace_eval_map **start, int len) { }
5921 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5922 
5923 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5924 trace_event_update_with_eval_map(struct module *mod,
5925 				 struct trace_eval_map **start,
5926 				 int len)
5927 {
5928 	struct trace_eval_map **map;
5929 
5930 	/* Always run sanitizer only if btf_type_tag attr exists. */
5931 	if (len <= 0) {
5932 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5933 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5934 		      __has_attribute(btf_type_tag)))
5935 			return;
5936 	}
5937 
5938 	map = start;
5939 
5940 	trace_event_update_all(map, len);
5941 
5942 	if (len <= 0)
5943 		return;
5944 
5945 	trace_insert_eval_map_file(mod, start, len);
5946 }
5947 
5948 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5949 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5950 		       size_t cnt, loff_t *ppos)
5951 {
5952 	struct trace_array *tr = filp->private_data;
5953 	char buf[MAX_TRACER_SIZE+2];
5954 	int r;
5955 
5956 	scoped_guard(mutex, &trace_types_lock) {
5957 		r = sprintf(buf, "%s\n", tr->current_trace->name);
5958 	}
5959 
5960 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5961 }
5962 
tracer_init(struct tracer * t,struct trace_array * tr)5963 int tracer_init(struct tracer *t, struct trace_array *tr)
5964 {
5965 	tracing_reset_online_cpus(&tr->array_buffer);
5966 	return t->init(tr);
5967 }
5968 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5969 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5970 {
5971 	int cpu;
5972 
5973 	for_each_tracing_cpu(cpu)
5974 		per_cpu_ptr(buf->data, cpu)->entries = val;
5975 }
5976 
update_buffer_entries(struct array_buffer * buf,int cpu)5977 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5978 {
5979 	if (cpu == RING_BUFFER_ALL_CPUS) {
5980 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5981 	} else {
5982 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5983 	}
5984 }
5985 
5986 #ifdef CONFIG_TRACER_MAX_TRACE
5987 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5988 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5989 					struct array_buffer *size_buf, int cpu_id)
5990 {
5991 	int cpu, ret = 0;
5992 
5993 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5994 		for_each_tracing_cpu(cpu) {
5995 			ret = ring_buffer_resize(trace_buf->buffer,
5996 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5997 			if (ret < 0)
5998 				break;
5999 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6000 				per_cpu_ptr(size_buf->data, cpu)->entries;
6001 		}
6002 	} else {
6003 		ret = ring_buffer_resize(trace_buf->buffer,
6004 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6005 		if (ret == 0)
6006 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6007 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6008 	}
6009 
6010 	return ret;
6011 }
6012 #endif /* CONFIG_TRACER_MAX_TRACE */
6013 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6014 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6015 					unsigned long size, int cpu)
6016 {
6017 	int ret;
6018 
6019 	/*
6020 	 * If kernel or user changes the size of the ring buffer
6021 	 * we use the size that was given, and we can forget about
6022 	 * expanding it later.
6023 	 */
6024 	trace_set_ring_buffer_expanded(tr);
6025 
6026 	/* May be called before buffers are initialized */
6027 	if (!tr->array_buffer.buffer)
6028 		return 0;
6029 
6030 	/* Do not allow tracing while resizing ring buffer */
6031 	tracing_stop_tr(tr);
6032 
6033 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6034 	if (ret < 0)
6035 		goto out_start;
6036 
6037 #ifdef CONFIG_TRACER_MAX_TRACE
6038 	if (!tr->allocated_snapshot)
6039 		goto out;
6040 
6041 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6042 	if (ret < 0) {
6043 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6044 						     &tr->array_buffer, cpu);
6045 		if (r < 0) {
6046 			/*
6047 			 * AARGH! We are left with different
6048 			 * size max buffer!!!!
6049 			 * The max buffer is our "snapshot" buffer.
6050 			 * When a tracer needs a snapshot (one of the
6051 			 * latency tracers), it swaps the max buffer
6052 			 * with the saved snap shot. We succeeded to
6053 			 * update the size of the main buffer, but failed to
6054 			 * update the size of the max buffer. But when we tried
6055 			 * to reset the main buffer to the original size, we
6056 			 * failed there too. This is very unlikely to
6057 			 * happen, but if it does, warn and kill all
6058 			 * tracing.
6059 			 */
6060 			WARN_ON(1);
6061 			tracing_disabled = 1;
6062 		}
6063 		goto out_start;
6064 	}
6065 
6066 	update_buffer_entries(&tr->max_buffer, cpu);
6067 
6068  out:
6069 #endif /* CONFIG_TRACER_MAX_TRACE */
6070 
6071 	update_buffer_entries(&tr->array_buffer, cpu);
6072  out_start:
6073 	tracing_start_tr(tr);
6074 	return ret;
6075 }
6076 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6077 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6078 				  unsigned long size, int cpu_id)
6079 {
6080 	guard(mutex)(&trace_types_lock);
6081 
6082 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6083 		/* make sure, this cpu is enabled in the mask */
6084 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6085 			return -EINVAL;
6086 	}
6087 
6088 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6089 }
6090 
6091 struct trace_mod_entry {
6092 	unsigned long	mod_addr;
6093 	char		mod_name[MODULE_NAME_LEN];
6094 };
6095 
6096 struct trace_scratch {
6097 	unsigned int		clock_id;
6098 	unsigned long		text_addr;
6099 	unsigned long		nr_entries;
6100 	struct trace_mod_entry	entries[];
6101 };
6102 
6103 static DEFINE_MUTEX(scratch_mutex);
6104 
cmp_mod_entry(const void * key,const void * pivot)6105 static int cmp_mod_entry(const void *key, const void *pivot)
6106 {
6107 	unsigned long addr = (unsigned long)key;
6108 	const struct trace_mod_entry *ent = pivot;
6109 
6110 	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6111 		return 0;
6112 	else
6113 		return addr - ent->mod_addr;
6114 }
6115 
6116 /**
6117  * trace_adjust_address() - Adjust prev boot address to current address.
6118  * @tr: Persistent ring buffer's trace_array.
6119  * @addr: Address in @tr which is adjusted.
6120  */
trace_adjust_address(struct trace_array * tr,unsigned long addr)6121 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6122 {
6123 	struct trace_module_delta *module_delta;
6124 	struct trace_scratch *tscratch;
6125 	struct trace_mod_entry *entry;
6126 	unsigned long raddr;
6127 	int idx = 0, nr_entries;
6128 
6129 	/* If we don't have last boot delta, return the address */
6130 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6131 		return addr;
6132 
6133 	/* tr->module_delta must be protected by rcu. */
6134 	guard(rcu)();
6135 	tscratch = tr->scratch;
6136 	/* if there is no tscrach, module_delta must be NULL. */
6137 	module_delta = READ_ONCE(tr->module_delta);
6138 	if (!module_delta || !tscratch->nr_entries ||
6139 	    tscratch->entries[0].mod_addr > addr) {
6140 		raddr = addr + tr->text_delta;
6141 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6142 			is_kernel_rodata(raddr) ? raddr : addr;
6143 	}
6144 
6145 	/* Note that entries must be sorted. */
6146 	nr_entries = tscratch->nr_entries;
6147 	if (nr_entries == 1 ||
6148 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6149 		idx = nr_entries - 1;
6150 	else {
6151 		entry = __inline_bsearch((void *)addr,
6152 				tscratch->entries,
6153 				nr_entries - 1,
6154 				sizeof(tscratch->entries[0]),
6155 				cmp_mod_entry);
6156 		if (entry)
6157 			idx = entry - tscratch->entries;
6158 	}
6159 
6160 	return addr + module_delta->delta[idx];
6161 }
6162 
6163 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)6164 static int save_mod(struct module *mod, void *data)
6165 {
6166 	struct trace_array *tr = data;
6167 	struct trace_scratch *tscratch;
6168 	struct trace_mod_entry *entry;
6169 	unsigned int size;
6170 
6171 	tscratch = tr->scratch;
6172 	if (!tscratch)
6173 		return -1;
6174 	size = tr->scratch_size;
6175 
6176 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6177 		return -1;
6178 
6179 	entry = &tscratch->entries[tscratch->nr_entries];
6180 
6181 	tscratch->nr_entries++;
6182 
6183 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6184 	strscpy(entry->mod_name, mod->name);
6185 
6186 	return 0;
6187 }
6188 #else
save_mod(struct module * mod,void * data)6189 static int save_mod(struct module *mod, void *data)
6190 {
6191 	return 0;
6192 }
6193 #endif
6194 
update_last_data(struct trace_array * tr)6195 static void update_last_data(struct trace_array *tr)
6196 {
6197 	struct trace_module_delta *module_delta;
6198 	struct trace_scratch *tscratch;
6199 
6200 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6201 		return;
6202 
6203 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6204 		return;
6205 
6206 	/* Only if the buffer has previous boot data clear and update it. */
6207 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6208 
6209 	/* Reset the module list and reload them */
6210 	if (tr->scratch) {
6211 		struct trace_scratch *tscratch = tr->scratch;
6212 
6213 		tscratch->clock_id = tr->clock_id;
6214 		memset(tscratch->entries, 0,
6215 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6216 		tscratch->nr_entries = 0;
6217 
6218 		guard(mutex)(&scratch_mutex);
6219 		module_for_each_mod(save_mod, tr);
6220 	}
6221 
6222 	/*
6223 	 * Need to clear all CPU buffers as there cannot be events
6224 	 * from the previous boot mixed with events with this boot
6225 	 * as that will cause a confusing trace. Need to clear all
6226 	 * CPU buffers, even for those that may currently be offline.
6227 	 */
6228 	tracing_reset_all_cpus(&tr->array_buffer);
6229 
6230 	/* Using current data now */
6231 	tr->text_delta = 0;
6232 
6233 	if (!tr->scratch)
6234 		return;
6235 
6236 	tscratch = tr->scratch;
6237 	module_delta = READ_ONCE(tr->module_delta);
6238 	WRITE_ONCE(tr->module_delta, NULL);
6239 	kfree_rcu(module_delta, rcu);
6240 
6241 	/* Set the persistent ring buffer meta data to this address */
6242 	tscratch->text_addr = (unsigned long)_text;
6243 }
6244 
6245 /**
6246  * tracing_update_buffers - used by tracing facility to expand ring buffers
6247  * @tr: The tracing instance
6248  *
6249  * To save on memory when the tracing is never used on a system with it
6250  * configured in. The ring buffers are set to a minimum size. But once
6251  * a user starts to use the tracing facility, then they need to grow
6252  * to their default size.
6253  *
6254  * This function is to be called when a tracer is about to be used.
6255  */
tracing_update_buffers(struct trace_array * tr)6256 int tracing_update_buffers(struct trace_array *tr)
6257 {
6258 	int ret = 0;
6259 
6260 	guard(mutex)(&trace_types_lock);
6261 
6262 	update_last_data(tr);
6263 
6264 	if (!tr->ring_buffer_expanded)
6265 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6266 						RING_BUFFER_ALL_CPUS);
6267 	return ret;
6268 }
6269 
6270 /*
6271  * Used to clear out the tracer before deletion of an instance.
6272  * Must have trace_types_lock held.
6273  */
tracing_set_nop(struct trace_array * tr)6274 static void tracing_set_nop(struct trace_array *tr)
6275 {
6276 	if (tr->current_trace == &nop_trace)
6277 		return;
6278 
6279 	tr->current_trace->enabled--;
6280 
6281 	if (tr->current_trace->reset)
6282 		tr->current_trace->reset(tr);
6283 
6284 	tr->current_trace = &nop_trace;
6285 	tr->current_trace_flags = nop_trace.flags;
6286 }
6287 
6288 static bool tracer_options_updated;
6289 
tracing_set_tracer(struct trace_array * tr,const char * buf)6290 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6291 {
6292 	struct tracer *trace = NULL;
6293 	struct tracers *t;
6294 #ifdef CONFIG_TRACER_MAX_TRACE
6295 	bool had_max_tr;
6296 #endif
6297 	int ret;
6298 
6299 	guard(mutex)(&trace_types_lock);
6300 
6301 	update_last_data(tr);
6302 
6303 	if (!tr->ring_buffer_expanded) {
6304 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6305 						RING_BUFFER_ALL_CPUS);
6306 		if (ret < 0)
6307 			return ret;
6308 		ret = 0;
6309 	}
6310 
6311 	list_for_each_entry(t, &tr->tracers, list) {
6312 		if (strcmp(t->tracer->name, buf) == 0) {
6313 			trace = t->tracer;
6314 			break;
6315 		}
6316 	}
6317 	if (!trace)
6318 		return -EINVAL;
6319 
6320 	if (trace == tr->current_trace)
6321 		return 0;
6322 
6323 #ifdef CONFIG_TRACER_SNAPSHOT
6324 	if (trace->use_max_tr) {
6325 		local_irq_disable();
6326 		arch_spin_lock(&tr->max_lock);
6327 		ret = tr->cond_snapshot ? -EBUSY : 0;
6328 		arch_spin_unlock(&tr->max_lock);
6329 		local_irq_enable();
6330 		if (ret)
6331 			return ret;
6332 	}
6333 #endif
6334 	/* Some tracers won't work on kernel command line */
6335 	if (system_state < SYSTEM_RUNNING && trace->noboot) {
6336 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6337 			trace->name);
6338 		return -EINVAL;
6339 	}
6340 
6341 	/* Some tracers are only allowed for the top level buffer */
6342 	if (!trace_ok_for_array(trace, tr))
6343 		return -EINVAL;
6344 
6345 	/* If trace pipe files are being read, we can't change the tracer */
6346 	if (tr->trace_ref)
6347 		return -EBUSY;
6348 
6349 	trace_branch_disable();
6350 
6351 	tr->current_trace->enabled--;
6352 
6353 	if (tr->current_trace->reset)
6354 		tr->current_trace->reset(tr);
6355 
6356 #ifdef CONFIG_TRACER_MAX_TRACE
6357 	had_max_tr = tr->current_trace->use_max_tr;
6358 
6359 	/* Current trace needs to be nop_trace before synchronize_rcu */
6360 	tr->current_trace = &nop_trace;
6361 	tr->current_trace_flags = nop_trace.flags;
6362 
6363 	if (had_max_tr && !trace->use_max_tr) {
6364 		/*
6365 		 * We need to make sure that the update_max_tr sees that
6366 		 * current_trace changed to nop_trace to keep it from
6367 		 * swapping the buffers after we resize it.
6368 		 * The update_max_tr is called from interrupts disabled
6369 		 * so a synchronized_sched() is sufficient.
6370 		 */
6371 		synchronize_rcu();
6372 		free_snapshot(tr);
6373 		tracing_disarm_snapshot(tr);
6374 	}
6375 
6376 	if (!had_max_tr && trace->use_max_tr) {
6377 		ret = tracing_arm_snapshot_locked(tr);
6378 		if (ret)
6379 			return ret;
6380 	}
6381 #else
6382 	tr->current_trace = &nop_trace;
6383 #endif
6384 
6385 	tr->current_trace_flags = t->flags ? : t->tracer->flags;
6386 
6387 	if (trace->init) {
6388 		ret = tracer_init(trace, tr);
6389 		if (ret) {
6390 #ifdef CONFIG_TRACER_MAX_TRACE
6391 			if (trace->use_max_tr)
6392 				tracing_disarm_snapshot(tr);
6393 #endif
6394 			tr->current_trace_flags = nop_trace.flags;
6395 			return ret;
6396 		}
6397 	}
6398 
6399 	tr->current_trace = trace;
6400 	tr->current_trace->enabled++;
6401 	trace_branch_enable(tr);
6402 
6403 	return 0;
6404 }
6405 
6406 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6407 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6408 			size_t cnt, loff_t *ppos)
6409 {
6410 	struct trace_array *tr = filp->private_data;
6411 	char buf[MAX_TRACER_SIZE+1];
6412 	char *name;
6413 	size_t ret;
6414 	int err;
6415 
6416 	ret = cnt;
6417 
6418 	if (cnt > MAX_TRACER_SIZE)
6419 		cnt = MAX_TRACER_SIZE;
6420 
6421 	if (copy_from_user(buf, ubuf, cnt))
6422 		return -EFAULT;
6423 
6424 	buf[cnt] = 0;
6425 
6426 	name = strim(buf);
6427 
6428 	err = tracing_set_tracer(tr, name);
6429 	if (err)
6430 		return err;
6431 
6432 	*ppos += ret;
6433 
6434 	return ret;
6435 }
6436 
6437 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6438 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6439 		   size_t cnt, loff_t *ppos)
6440 {
6441 	char buf[64];
6442 	int r;
6443 
6444 	r = snprintf(buf, sizeof(buf), "%ld\n",
6445 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6446 	if (r > sizeof(buf))
6447 		r = sizeof(buf);
6448 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6449 }
6450 
6451 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6452 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6453 		    size_t cnt, loff_t *ppos)
6454 {
6455 	unsigned long val;
6456 	int ret;
6457 
6458 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6459 	if (ret)
6460 		return ret;
6461 
6462 	*ptr = val * 1000;
6463 
6464 	return cnt;
6465 }
6466 
6467 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6468 tracing_thresh_read(struct file *filp, char __user *ubuf,
6469 		    size_t cnt, loff_t *ppos)
6470 {
6471 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6472 }
6473 
6474 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6475 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6476 		     size_t cnt, loff_t *ppos)
6477 {
6478 	struct trace_array *tr = filp->private_data;
6479 	int ret;
6480 
6481 	guard(mutex)(&trace_types_lock);
6482 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6483 	if (ret < 0)
6484 		return ret;
6485 
6486 	if (tr->current_trace->update_thresh) {
6487 		ret = tr->current_trace->update_thresh(tr);
6488 		if (ret < 0)
6489 			return ret;
6490 	}
6491 
6492 	return cnt;
6493 }
6494 
6495 #ifdef CONFIG_TRACER_MAX_TRACE
6496 
6497 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6498 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6499 		     size_t cnt, loff_t *ppos)
6500 {
6501 	struct trace_array *tr = filp->private_data;
6502 
6503 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6504 }
6505 
6506 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6507 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6508 		      size_t cnt, loff_t *ppos)
6509 {
6510 	struct trace_array *tr = filp->private_data;
6511 
6512 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6513 }
6514 
6515 #endif
6516 
open_pipe_on_cpu(struct trace_array * tr,int cpu)6517 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6518 {
6519 	if (cpu == RING_BUFFER_ALL_CPUS) {
6520 		if (cpumask_empty(tr->pipe_cpumask)) {
6521 			cpumask_setall(tr->pipe_cpumask);
6522 			return 0;
6523 		}
6524 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6525 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6526 		return 0;
6527 	}
6528 	return -EBUSY;
6529 }
6530 
close_pipe_on_cpu(struct trace_array * tr,int cpu)6531 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6532 {
6533 	if (cpu == RING_BUFFER_ALL_CPUS) {
6534 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6535 		cpumask_clear(tr->pipe_cpumask);
6536 	} else {
6537 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6538 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6539 	}
6540 }
6541 
tracing_open_pipe(struct inode * inode,struct file * filp)6542 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6543 {
6544 	struct trace_array *tr = inode->i_private;
6545 	struct trace_iterator *iter;
6546 	int cpu;
6547 	int ret;
6548 
6549 	ret = tracing_check_open_get_tr(tr);
6550 	if (ret)
6551 		return ret;
6552 
6553 	guard(mutex)(&trace_types_lock);
6554 	cpu = tracing_get_cpu(inode);
6555 	ret = open_pipe_on_cpu(tr, cpu);
6556 	if (ret)
6557 		goto fail_pipe_on_cpu;
6558 
6559 	/* create a buffer to store the information to pass to userspace */
6560 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6561 	if (!iter) {
6562 		ret = -ENOMEM;
6563 		goto fail_alloc_iter;
6564 	}
6565 
6566 	trace_seq_init(&iter->seq);
6567 	iter->trace = tr->current_trace;
6568 
6569 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6570 		ret = -ENOMEM;
6571 		goto fail;
6572 	}
6573 
6574 	/* trace pipe does not show start of buffer */
6575 	cpumask_setall(iter->started);
6576 
6577 	if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
6578 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6579 
6580 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6581 	if (trace_clocks[tr->clock_id].in_ns)
6582 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6583 
6584 	iter->tr = tr;
6585 	iter->array_buffer = &tr->array_buffer;
6586 	iter->cpu_file = cpu;
6587 	mutex_init(&iter->mutex);
6588 	filp->private_data = iter;
6589 
6590 	if (iter->trace->pipe_open)
6591 		iter->trace->pipe_open(iter);
6592 
6593 	nonseekable_open(inode, filp);
6594 
6595 	tr->trace_ref++;
6596 
6597 	return ret;
6598 
6599 fail:
6600 	kfree(iter);
6601 fail_alloc_iter:
6602 	close_pipe_on_cpu(tr, cpu);
6603 fail_pipe_on_cpu:
6604 	__trace_array_put(tr);
6605 	return ret;
6606 }
6607 
tracing_release_pipe(struct inode * inode,struct file * file)6608 static int tracing_release_pipe(struct inode *inode, struct file *file)
6609 {
6610 	struct trace_iterator *iter = file->private_data;
6611 	struct trace_array *tr = inode->i_private;
6612 
6613 	scoped_guard(mutex, &trace_types_lock) {
6614 		tr->trace_ref--;
6615 
6616 		if (iter->trace->pipe_close)
6617 			iter->trace->pipe_close(iter);
6618 		close_pipe_on_cpu(tr, iter->cpu_file);
6619 	}
6620 
6621 	free_trace_iter_content(iter);
6622 	kfree(iter);
6623 
6624 	trace_array_put(tr);
6625 
6626 	return 0;
6627 }
6628 
6629 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6630 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6631 {
6632 	struct trace_array *tr = iter->tr;
6633 
6634 	/* Iterators are static, they should be filled or empty */
6635 	if (trace_buffer_iter(iter, iter->cpu_file))
6636 		return EPOLLIN | EPOLLRDNORM;
6637 
6638 	if (tr->trace_flags & TRACE_ITER(BLOCK))
6639 		/*
6640 		 * Always select as readable when in blocking mode
6641 		 */
6642 		return EPOLLIN | EPOLLRDNORM;
6643 	else
6644 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6645 					     filp, poll_table, iter->tr->buffer_percent);
6646 }
6647 
6648 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6649 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6650 {
6651 	struct trace_iterator *iter = filp->private_data;
6652 
6653 	return trace_poll(iter, filp, poll_table);
6654 }
6655 
6656 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6657 static int tracing_wait_pipe(struct file *filp)
6658 {
6659 	struct trace_iterator *iter = filp->private_data;
6660 	int ret;
6661 
6662 	while (trace_empty(iter)) {
6663 
6664 		if ((filp->f_flags & O_NONBLOCK)) {
6665 			return -EAGAIN;
6666 		}
6667 
6668 		/*
6669 		 * We block until we read something and tracing is disabled.
6670 		 * We still block if tracing is disabled, but we have never
6671 		 * read anything. This allows a user to cat this file, and
6672 		 * then enable tracing. But after we have read something,
6673 		 * we give an EOF when tracing is again disabled.
6674 		 *
6675 		 * iter->pos will be 0 if we haven't read anything.
6676 		 */
6677 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6678 			break;
6679 
6680 		mutex_unlock(&iter->mutex);
6681 
6682 		ret = wait_on_pipe(iter, 0);
6683 
6684 		mutex_lock(&iter->mutex);
6685 
6686 		if (ret)
6687 			return ret;
6688 	}
6689 
6690 	return 1;
6691 }
6692 
update_last_data_if_empty(struct trace_array * tr)6693 static bool update_last_data_if_empty(struct trace_array *tr)
6694 {
6695 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6696 		return false;
6697 
6698 	if (!ring_buffer_empty(tr->array_buffer.buffer))
6699 		return false;
6700 
6701 	/*
6702 	 * If the buffer contains the last boot data and all per-cpu
6703 	 * buffers are empty, reset it from the kernel side.
6704 	 */
6705 	update_last_data(tr);
6706 	return true;
6707 }
6708 
6709 /*
6710  * Consumer reader.
6711  */
6712 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6713 tracing_read_pipe(struct file *filp, char __user *ubuf,
6714 		  size_t cnt, loff_t *ppos)
6715 {
6716 	struct trace_iterator *iter = filp->private_data;
6717 	ssize_t sret;
6718 
6719 	/*
6720 	 * Avoid more than one consumer on a single file descriptor
6721 	 * This is just a matter of traces coherency, the ring buffer itself
6722 	 * is protected.
6723 	 */
6724 	guard(mutex)(&iter->mutex);
6725 
6726 	/* return any leftover data */
6727 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6728 	if (sret != -EBUSY)
6729 		return sret;
6730 
6731 	trace_seq_init(&iter->seq);
6732 
6733 	if (iter->trace->read) {
6734 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6735 		if (sret)
6736 			return sret;
6737 	}
6738 
6739 waitagain:
6740 	if (update_last_data_if_empty(iter->tr))
6741 		return 0;
6742 
6743 	sret = tracing_wait_pipe(filp);
6744 	if (sret <= 0)
6745 		return sret;
6746 
6747 	/* stop when tracing is finished */
6748 	if (trace_empty(iter))
6749 		return 0;
6750 
6751 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6752 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6753 
6754 	/* reset all but tr, trace, and overruns */
6755 	trace_iterator_reset(iter);
6756 	cpumask_clear(iter->started);
6757 	trace_seq_init(&iter->seq);
6758 
6759 	trace_event_read_lock();
6760 	trace_access_lock(iter->cpu_file);
6761 	while (trace_find_next_entry_inc(iter) != NULL) {
6762 		enum print_line_t ret;
6763 		int save_len = iter->seq.seq.len;
6764 
6765 		ret = print_trace_line(iter);
6766 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6767 			/*
6768 			 * If one print_trace_line() fills entire trace_seq in one shot,
6769 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6770 			 * In this case, we need to consume it, otherwise, loop will peek
6771 			 * this event next time, resulting in an infinite loop.
6772 			 */
6773 			if (save_len == 0) {
6774 				iter->seq.full = 0;
6775 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6776 				trace_consume(iter);
6777 				break;
6778 			}
6779 
6780 			/* In other cases, don't print partial lines */
6781 			iter->seq.seq.len = save_len;
6782 			break;
6783 		}
6784 		if (ret != TRACE_TYPE_NO_CONSUME)
6785 			trace_consume(iter);
6786 
6787 		if (trace_seq_used(&iter->seq) >= cnt)
6788 			break;
6789 
6790 		/*
6791 		 * Setting the full flag means we reached the trace_seq buffer
6792 		 * size and we should leave by partial output condition above.
6793 		 * One of the trace_seq_* functions is not used properly.
6794 		 */
6795 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6796 			  iter->ent->type);
6797 	}
6798 	trace_access_unlock(iter->cpu_file);
6799 	trace_event_read_unlock();
6800 
6801 	/* Now copy what we have to the user */
6802 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6803 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6804 		trace_seq_init(&iter->seq);
6805 
6806 	/*
6807 	 * If there was nothing to send to user, in spite of consuming trace
6808 	 * entries, go back to wait for more entries.
6809 	 */
6810 	if (sret == -EBUSY)
6811 		goto waitagain;
6812 
6813 	return sret;
6814 }
6815 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6816 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6817 				     unsigned int idx)
6818 {
6819 	__free_page(spd->pages[idx]);
6820 }
6821 
6822 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6823 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6824 {
6825 	size_t count;
6826 	int save_len;
6827 	int ret;
6828 
6829 	/* Seq buffer is page-sized, exactly what we need. */
6830 	for (;;) {
6831 		save_len = iter->seq.seq.len;
6832 		ret = print_trace_line(iter);
6833 
6834 		if (trace_seq_has_overflowed(&iter->seq)) {
6835 			iter->seq.seq.len = save_len;
6836 			break;
6837 		}
6838 
6839 		/*
6840 		 * This should not be hit, because it should only
6841 		 * be set if the iter->seq overflowed. But check it
6842 		 * anyway to be safe.
6843 		 */
6844 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6845 			iter->seq.seq.len = save_len;
6846 			break;
6847 		}
6848 
6849 		count = trace_seq_used(&iter->seq) - save_len;
6850 		if (rem < count) {
6851 			rem = 0;
6852 			iter->seq.seq.len = save_len;
6853 			break;
6854 		}
6855 
6856 		if (ret != TRACE_TYPE_NO_CONSUME)
6857 			trace_consume(iter);
6858 		rem -= count;
6859 		if (!trace_find_next_entry_inc(iter))	{
6860 			rem = 0;
6861 			iter->ent = NULL;
6862 			break;
6863 		}
6864 	}
6865 
6866 	return rem;
6867 }
6868 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6869 static ssize_t tracing_splice_read_pipe(struct file *filp,
6870 					loff_t *ppos,
6871 					struct pipe_inode_info *pipe,
6872 					size_t len,
6873 					unsigned int flags)
6874 {
6875 	struct page *pages_def[PIPE_DEF_BUFFERS];
6876 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6877 	struct trace_iterator *iter = filp->private_data;
6878 	struct splice_pipe_desc spd = {
6879 		.pages		= pages_def,
6880 		.partial	= partial_def,
6881 		.nr_pages	= 0, /* This gets updated below. */
6882 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6883 		.ops		= &default_pipe_buf_ops,
6884 		.spd_release	= tracing_spd_release_pipe,
6885 	};
6886 	ssize_t ret;
6887 	size_t rem;
6888 	unsigned int i;
6889 
6890 	if (splice_grow_spd(pipe, &spd))
6891 		return -ENOMEM;
6892 
6893 	mutex_lock(&iter->mutex);
6894 
6895 	if (iter->trace->splice_read) {
6896 		ret = iter->trace->splice_read(iter, filp,
6897 					       ppos, pipe, len, flags);
6898 		if (ret)
6899 			goto out_err;
6900 	}
6901 
6902 	ret = tracing_wait_pipe(filp);
6903 	if (ret <= 0)
6904 		goto out_err;
6905 
6906 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6907 		ret = -EFAULT;
6908 		goto out_err;
6909 	}
6910 
6911 	trace_event_read_lock();
6912 	trace_access_lock(iter->cpu_file);
6913 
6914 	/* Fill as many pages as possible. */
6915 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6916 		spd.pages[i] = alloc_page(GFP_KERNEL);
6917 		if (!spd.pages[i])
6918 			break;
6919 
6920 		rem = tracing_fill_pipe_page(rem, iter);
6921 
6922 		/* Copy the data into the page, so we can start over. */
6923 		ret = trace_seq_to_buffer(&iter->seq,
6924 					  page_address(spd.pages[i]),
6925 					  min((size_t)trace_seq_used(&iter->seq),
6926 						  (size_t)PAGE_SIZE));
6927 		if (ret < 0) {
6928 			__free_page(spd.pages[i]);
6929 			break;
6930 		}
6931 		spd.partial[i].offset = 0;
6932 		spd.partial[i].len = ret;
6933 
6934 		trace_seq_init(&iter->seq);
6935 	}
6936 
6937 	trace_access_unlock(iter->cpu_file);
6938 	trace_event_read_unlock();
6939 	mutex_unlock(&iter->mutex);
6940 
6941 	spd.nr_pages = i;
6942 
6943 	if (i)
6944 		ret = splice_to_pipe(pipe, &spd);
6945 	else
6946 		ret = 0;
6947 out:
6948 	splice_shrink_spd(&spd);
6949 	return ret;
6950 
6951 out_err:
6952 	mutex_unlock(&iter->mutex);
6953 	goto out;
6954 }
6955 
6956 static ssize_t
tracing_syscall_buf_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6957 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
6958 			 size_t cnt, loff_t *ppos)
6959 {
6960 	struct inode *inode = file_inode(filp);
6961 	struct trace_array *tr = inode->i_private;
6962 	char buf[64];
6963 	int r;
6964 
6965 	r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
6966 
6967 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6968 }
6969 
6970 static ssize_t
tracing_syscall_buf_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6971 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
6972 			  size_t cnt, loff_t *ppos)
6973 {
6974 	struct inode *inode = file_inode(filp);
6975 	struct trace_array *tr = inode->i_private;
6976 	unsigned long val;
6977 	int ret;
6978 
6979 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6980 	if (ret)
6981 		return ret;
6982 
6983 	if (val > SYSCALL_FAULT_USER_MAX)
6984 		val = SYSCALL_FAULT_USER_MAX;
6985 
6986 	tr->syscall_buf_sz = val;
6987 
6988 	*ppos += cnt;
6989 
6990 	return cnt;
6991 }
6992 
6993 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6994 tracing_entries_read(struct file *filp, char __user *ubuf,
6995 		     size_t cnt, loff_t *ppos)
6996 {
6997 	struct inode *inode = file_inode(filp);
6998 	struct trace_array *tr = inode->i_private;
6999 	int cpu = tracing_get_cpu(inode);
7000 	char buf[64];
7001 	int r = 0;
7002 	ssize_t ret;
7003 
7004 	mutex_lock(&trace_types_lock);
7005 
7006 	if (cpu == RING_BUFFER_ALL_CPUS) {
7007 		int cpu, buf_size_same;
7008 		unsigned long size;
7009 
7010 		size = 0;
7011 		buf_size_same = 1;
7012 		/* check if all cpu sizes are same */
7013 		for_each_tracing_cpu(cpu) {
7014 			/* fill in the size from first enabled cpu */
7015 			if (size == 0)
7016 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7017 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7018 				buf_size_same = 0;
7019 				break;
7020 			}
7021 		}
7022 
7023 		if (buf_size_same) {
7024 			if (!tr->ring_buffer_expanded)
7025 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7026 					    size >> 10,
7027 					    trace_buf_size >> 10);
7028 			else
7029 				r = sprintf(buf, "%lu\n", size >> 10);
7030 		} else
7031 			r = sprintf(buf, "X\n");
7032 	} else
7033 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7034 
7035 	mutex_unlock(&trace_types_lock);
7036 
7037 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7038 	return ret;
7039 }
7040 
7041 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7042 tracing_entries_write(struct file *filp, const char __user *ubuf,
7043 		      size_t cnt, loff_t *ppos)
7044 {
7045 	struct inode *inode = file_inode(filp);
7046 	struct trace_array *tr = inode->i_private;
7047 	unsigned long val;
7048 	int ret;
7049 
7050 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7051 	if (ret)
7052 		return ret;
7053 
7054 	/* must have at least 1 entry */
7055 	if (!val)
7056 		return -EINVAL;
7057 
7058 	/* value is in KB */
7059 	val <<= 10;
7060 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7061 	if (ret < 0)
7062 		return ret;
7063 
7064 	*ppos += cnt;
7065 
7066 	return cnt;
7067 }
7068 
7069 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7070 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7071 				size_t cnt, loff_t *ppos)
7072 {
7073 	struct trace_array *tr = filp->private_data;
7074 	char buf[64];
7075 	int r, cpu;
7076 	unsigned long size = 0, expanded_size = 0;
7077 
7078 	mutex_lock(&trace_types_lock);
7079 	for_each_tracing_cpu(cpu) {
7080 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7081 		if (!tr->ring_buffer_expanded)
7082 			expanded_size += trace_buf_size >> 10;
7083 	}
7084 	if (tr->ring_buffer_expanded)
7085 		r = sprintf(buf, "%lu\n", size);
7086 	else
7087 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7088 	mutex_unlock(&trace_types_lock);
7089 
7090 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7091 }
7092 
7093 #define LAST_BOOT_HEADER ((void *)1)
7094 
l_next(struct seq_file * m,void * v,loff_t * pos)7095 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7096 {
7097 	struct trace_array *tr = m->private;
7098 	struct trace_scratch *tscratch = tr->scratch;
7099 	unsigned int index = *pos;
7100 
7101 	(*pos)++;
7102 
7103 	if (*pos == 1)
7104 		return LAST_BOOT_HEADER;
7105 
7106 	/* Only show offsets of the last boot data */
7107 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7108 		return NULL;
7109 
7110 	/* *pos 0 is for the header, 1 is for the first module */
7111 	index--;
7112 
7113 	if (index >= tscratch->nr_entries)
7114 		return NULL;
7115 
7116 	return &tscratch->entries[index];
7117 }
7118 
l_start(struct seq_file * m,loff_t * pos)7119 static void *l_start(struct seq_file *m, loff_t *pos)
7120 {
7121 	mutex_lock(&scratch_mutex);
7122 
7123 	return l_next(m, NULL, pos);
7124 }
7125 
l_stop(struct seq_file * m,void * p)7126 static void l_stop(struct seq_file *m, void *p)
7127 {
7128 	mutex_unlock(&scratch_mutex);
7129 }
7130 
show_last_boot_header(struct seq_file * m,struct trace_array * tr)7131 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7132 {
7133 	struct trace_scratch *tscratch = tr->scratch;
7134 
7135 	/*
7136 	 * Do not leak KASLR address. This only shows the KASLR address of
7137 	 * the last boot. When the ring buffer is started, the LAST_BOOT
7138 	 * flag gets cleared, and this should only report "current".
7139 	 * Otherwise it shows the KASLR address from the previous boot which
7140 	 * should not be the same as the current boot.
7141 	 */
7142 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7143 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7144 	else
7145 		seq_puts(m, "# Current\n");
7146 }
7147 
l_show(struct seq_file * m,void * v)7148 static int l_show(struct seq_file *m, void *v)
7149 {
7150 	struct trace_array *tr = m->private;
7151 	struct trace_mod_entry *entry = v;
7152 
7153 	if (v == LAST_BOOT_HEADER) {
7154 		show_last_boot_header(m, tr);
7155 		return 0;
7156 	}
7157 
7158 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7159 	return 0;
7160 }
7161 
7162 static const struct seq_operations last_boot_seq_ops = {
7163 	.start		= l_start,
7164 	.next		= l_next,
7165 	.stop		= l_stop,
7166 	.show		= l_show,
7167 };
7168 
tracing_last_boot_open(struct inode * inode,struct file * file)7169 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7170 {
7171 	struct trace_array *tr = inode->i_private;
7172 	struct seq_file *m;
7173 	int ret;
7174 
7175 	ret = tracing_check_open_get_tr(tr);
7176 	if (ret)
7177 		return ret;
7178 
7179 	ret = seq_open(file, &last_boot_seq_ops);
7180 	if (ret) {
7181 		trace_array_put(tr);
7182 		return ret;
7183 	}
7184 
7185 	m = file->private_data;
7186 	m->private = tr;
7187 
7188 	return 0;
7189 }
7190 
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7191 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7192 {
7193 	struct trace_array *tr = inode->i_private;
7194 	int cpu = tracing_get_cpu(inode);
7195 	int ret;
7196 
7197 	ret = tracing_check_open_get_tr(tr);
7198 	if (ret)
7199 		return ret;
7200 
7201 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7202 	if (ret < 0)
7203 		__trace_array_put(tr);
7204 	return ret;
7205 }
7206 
7207 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7208 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7209 			  size_t cnt, loff_t *ppos)
7210 {
7211 	/*
7212 	 * There is no need to read what the user has written, this function
7213 	 * is just to make sure that there is no error when "echo" is used
7214 	 */
7215 
7216 	*ppos += cnt;
7217 
7218 	return cnt;
7219 }
7220 
7221 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7222 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7223 {
7224 	struct trace_array *tr = inode->i_private;
7225 
7226 	/* disable tracing ? */
7227 	if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
7228 		tracer_tracing_off(tr);
7229 	/* resize the ring buffer to 0 */
7230 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7231 
7232 	trace_array_put(tr);
7233 
7234 	return 0;
7235 }
7236 
7237 #define TRACE_MARKER_MAX_SIZE		4096
7238 
write_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt,unsigned long ip)7239 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
7240 				      size_t cnt, unsigned long ip)
7241 {
7242 	struct ring_buffer_event *event;
7243 	enum event_trigger_type tt = ETT_NONE;
7244 	struct trace_buffer *buffer;
7245 	struct print_entry *entry;
7246 	int meta_size;
7247 	ssize_t written;
7248 	size_t size;
7249 
7250 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7251  again:
7252 	size = cnt + meta_size;
7253 
7254 	buffer = tr->array_buffer.buffer;
7255 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7256 					    tracing_gen_ctx());
7257 	if (unlikely(!event)) {
7258 		/*
7259 		 * If the size was greater than what was allowed, then
7260 		 * make it smaller and try again.
7261 		 */
7262 		if (size > ring_buffer_max_event_size(buffer)) {
7263 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7264 			/* The above should only happen once */
7265 			if (WARN_ON_ONCE(cnt + meta_size == size))
7266 				return -EBADF;
7267 			goto again;
7268 		}
7269 
7270 		/* Ring buffer disabled, return as if not open for write */
7271 		return -EBADF;
7272 	}
7273 
7274 	entry = ring_buffer_event_data(event);
7275 	entry->ip = ip;
7276 	memcpy(&entry->buf, buf, cnt);
7277 	written = cnt;
7278 
7279 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7280 		/* do not add \n before testing triggers, but add \0 */
7281 		entry->buf[cnt] = '\0';
7282 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7283 	}
7284 
7285 	if (entry->buf[cnt - 1] != '\n') {
7286 		entry->buf[cnt] = '\n';
7287 		entry->buf[cnt + 1] = '\0';
7288 	} else
7289 		entry->buf[cnt] = '\0';
7290 
7291 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7292 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7293 	__buffer_unlock_commit(buffer, event);
7294 
7295 	if (tt)
7296 		event_triggers_post_call(tr->trace_marker_file, tt);
7297 
7298 	return written;
7299 }
7300 
7301 struct trace_user_buf {
7302 	char		*buf;
7303 };
7304 
7305 static DEFINE_MUTEX(trace_user_buffer_mutex);
7306 static struct trace_user_buf_info *trace_user_buffer;
7307 
7308 /**
7309  * trace_user_fault_destroy - free up allocated memory of a trace user buffer
7310  * @tinfo: The descriptor to free up
7311  *
7312  * Frees any data allocated in the trace info dsecriptor.
7313  */
trace_user_fault_destroy(struct trace_user_buf_info * tinfo)7314 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
7315 {
7316 	char *buf;
7317 	int cpu;
7318 
7319 	if (!tinfo || !tinfo->tbuf)
7320 		return;
7321 
7322 	for_each_possible_cpu(cpu) {
7323 		buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7324 		kfree(buf);
7325 	}
7326 	free_percpu(tinfo->tbuf);
7327 }
7328 
user_fault_buffer_enable(struct trace_user_buf_info * tinfo,size_t size)7329 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
7330 {
7331 	char *buf;
7332 	int cpu;
7333 
7334 	lockdep_assert_held(&trace_user_buffer_mutex);
7335 
7336 	tinfo->tbuf = alloc_percpu(struct trace_user_buf);
7337 	if (!tinfo->tbuf)
7338 		return -ENOMEM;
7339 
7340 	tinfo->ref = 1;
7341 	tinfo->size = size;
7342 
7343 	/* Clear each buffer in case of error */
7344 	for_each_possible_cpu(cpu) {
7345 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
7346 	}
7347 
7348 	for_each_possible_cpu(cpu) {
7349 		buf = kmalloc_node(size, GFP_KERNEL,
7350 				   cpu_to_node(cpu));
7351 		if (!buf)
7352 			return -ENOMEM;
7353 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
7354 	}
7355 
7356 	return 0;
7357 }
7358 
7359 /* For internal use. Free and reinitialize */
user_buffer_free(struct trace_user_buf_info ** tinfo)7360 static void user_buffer_free(struct trace_user_buf_info **tinfo)
7361 {
7362 	lockdep_assert_held(&trace_user_buffer_mutex);
7363 
7364 	trace_user_fault_destroy(*tinfo);
7365 	kfree(*tinfo);
7366 	*tinfo = NULL;
7367 }
7368 
7369 /* For internal use. Initialize and allocate */
user_buffer_init(struct trace_user_buf_info ** tinfo,size_t size)7370 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
7371 {
7372 	bool alloc = false;
7373 	int ret;
7374 
7375 	lockdep_assert_held(&trace_user_buffer_mutex);
7376 
7377 	if (!*tinfo) {
7378 		alloc = true;
7379 		*tinfo = kzalloc(sizeof(**tinfo), GFP_KERNEL);
7380 		if (!*tinfo)
7381 			return -ENOMEM;
7382 	}
7383 
7384 	ret = user_fault_buffer_enable(*tinfo, size);
7385 	if (ret < 0 && alloc)
7386 		user_buffer_free(tinfo);
7387 
7388 	return ret;
7389 }
7390 
7391 /* For internal use, derefrence and free if necessary */
user_buffer_put(struct trace_user_buf_info ** tinfo)7392 static void user_buffer_put(struct trace_user_buf_info **tinfo)
7393 {
7394 	guard(mutex)(&trace_user_buffer_mutex);
7395 
7396 	if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
7397 		return;
7398 
7399 	if (--(*tinfo)->ref)
7400 		return;
7401 
7402 	user_buffer_free(tinfo);
7403 }
7404 
7405 /**
7406  * trace_user_fault_init - Allocated or reference a per CPU buffer
7407  * @tinfo: A pointer to the trace buffer descriptor
7408  * @size: The size to allocate each per CPU buffer
7409  *
7410  * Create a per CPU buffer that can be used to copy from user space
7411  * in a task context. When calling trace_user_fault_read(), preemption
7412  * must be disabled, and it will enable preemption and copy user
7413  * space data to the buffer. If any schedule switches occur, it will
7414  * retry until it succeeds without a schedule switch knowing the buffer
7415  * is still valid.
7416  *
7417  * Returns 0 on success, negative on failure.
7418  */
trace_user_fault_init(struct trace_user_buf_info * tinfo,size_t size)7419 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
7420 {
7421 	int ret;
7422 
7423 	if (!tinfo)
7424 		return -EINVAL;
7425 
7426 	guard(mutex)(&trace_user_buffer_mutex);
7427 
7428 	ret = user_buffer_init(&tinfo, size);
7429 	if (ret < 0)
7430 		trace_user_fault_destroy(tinfo);
7431 
7432 	return ret;
7433 }
7434 
7435 /**
7436  * trace_user_fault_get - up the ref count for the user buffer
7437  * @tinfo: A pointer to a pointer to the trace buffer descriptor
7438  *
7439  * Ups the ref count of the trace buffer.
7440  *
7441  * Returns the new ref count.
7442  */
trace_user_fault_get(struct trace_user_buf_info * tinfo)7443 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
7444 {
7445 	if (!tinfo)
7446 		return -1;
7447 
7448 	guard(mutex)(&trace_user_buffer_mutex);
7449 
7450 	tinfo->ref++;
7451 	return tinfo->ref;
7452 }
7453 
7454 /**
7455  * trace_user_fault_put - dereference a per cpu trace buffer
7456  * @tinfo: The @tinfo that was passed to trace_user_fault_get()
7457  *
7458  * Decrement the ref count of @tinfo.
7459  *
7460  * Returns the new refcount (negative on error).
7461  */
trace_user_fault_put(struct trace_user_buf_info * tinfo)7462 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
7463 {
7464 	guard(mutex)(&trace_user_buffer_mutex);
7465 
7466 	if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
7467 		return -1;
7468 
7469 	--tinfo->ref;
7470 	return tinfo->ref;
7471 }
7472 
7473 /**
7474  * trace_user_fault_read - Read user space into a per CPU buffer
7475  * @tinfo: The @tinfo allocated by trace_user_fault_get()
7476  * @ptr: The user space pointer to read
7477  * @size: The size of user space to read.
7478  * @copy_func: Optional function to use to copy from user space
7479  * @data: Data to pass to copy_func if it was supplied
7480  *
7481  * Preemption must be disabled when this is called, and must not
7482  * be enabled while using the returned buffer.
7483  * This does the copying from user space into a per CPU buffer.
7484  *
7485  * The @size must not be greater than the size passed in to
7486  * trace_user_fault_init().
7487  *
7488  * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
7489  * otherwise it will call @copy_func. It will call @copy_func with:
7490  *
7491  *   buffer: the per CPU buffer of the @tinfo.
7492  *   ptr: The pointer @ptr to user space to read
7493  *   size: The @size of the ptr to read
7494  *   data: The @data parameter
7495  *
7496  * It is expected that @copy_func will return 0 on success and non zero
7497  * if there was a fault.
7498  *
7499  * Returns a pointer to the buffer with the content read from @ptr.
7500  *   Preemption must remain disabled while the caller accesses the
7501  *   buffer returned by this function.
7502  * Returns NULL if there was a fault, or the size passed in is
7503  *   greater than the size passed to trace_user_fault_init().
7504  */
trace_user_fault_read(struct trace_user_buf_info * tinfo,const char __user * ptr,size_t size,trace_user_buf_copy copy_func,void * data)7505 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
7506 			     const char __user *ptr, size_t size,
7507 			     trace_user_buf_copy copy_func, void *data)
7508 {
7509 	int cpu = smp_processor_id();
7510 	char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7511 	unsigned int cnt;
7512 	int trys = 0;
7513 	int ret;
7514 
7515 	lockdep_assert_preemption_disabled();
7516 
7517 	/*
7518 	 * It's up to the caller to not try to copy more than it said
7519 	 * it would.
7520 	 */
7521 	if (size > tinfo->size)
7522 		return NULL;
7523 
7524 	/*
7525 	 * This acts similar to a seqcount. The per CPU context switches are
7526 	 * recorded, migration is disabled and preemption is enabled. The
7527 	 * read of the user space memory is copied into the per CPU buffer.
7528 	 * Preemption is disabled again, and if the per CPU context switches count
7529 	 * is still the same, it means the buffer has not been corrupted.
7530 	 * If the count is different, it is assumed the buffer is corrupted
7531 	 * and reading must be tried again.
7532 	 */
7533 
7534 	do {
7535 		/*
7536 		 * If for some reason, copy_from_user() always causes a context
7537 		 * switch, this would then cause an infinite loop.
7538 		 * If this task is preempted by another user space task, it
7539 		 * will cause this task to try again. But just in case something
7540 		 * changes where the copying from user space causes another task
7541 		 * to run, prevent this from going into an infinite loop.
7542 		 * 100 tries should be plenty.
7543 		 */
7544 		if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
7545 			return NULL;
7546 
7547 		/* Read the current CPU context switch counter */
7548 		cnt = nr_context_switches_cpu(cpu);
7549 
7550 		/*
7551 		 * Preemption is going to be enabled, but this task must
7552 		 * remain on this CPU.
7553 		 */
7554 		migrate_disable();
7555 
7556 		/*
7557 		 * Now preemption is being enabed and another task can come in
7558 		 * and use the same buffer and corrupt our data.
7559 		 */
7560 		preempt_enable_notrace();
7561 
7562 		/* Make sure preemption is enabled here */
7563 		lockdep_assert_preemption_enabled();
7564 
7565 		if (copy_func) {
7566 			ret = copy_func(buffer, ptr, size, data);
7567 		} else {
7568 			ret = __copy_from_user(buffer, ptr, size);
7569 		}
7570 
7571 		preempt_disable_notrace();
7572 		migrate_enable();
7573 
7574 		/* if it faulted, no need to test if the buffer was corrupted */
7575 		if (ret)
7576 			return NULL;
7577 
7578 		/*
7579 		 * Preemption is disabled again, now check the per CPU context
7580 		 * switch counter. If it doesn't match, then another user space
7581 		 * process may have schedule in and corrupted our buffer. In that
7582 		 * case the copying must be retried.
7583 		 */
7584 	} while (nr_context_switches_cpu(cpu) != cnt);
7585 
7586 	return buffer;
7587 }
7588 
7589 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7590 tracing_mark_write(struct file *filp, const char __user *ubuf,
7591 					size_t cnt, loff_t *fpos)
7592 {
7593 	struct trace_array *tr = filp->private_data;
7594 	ssize_t written = -ENODEV;
7595 	unsigned long ip;
7596 	char *buf;
7597 
7598 	if (tracing_disabled)
7599 		return -EINVAL;
7600 
7601 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7602 		return -EINVAL;
7603 
7604 	if ((ssize_t)cnt < 0)
7605 		return -EINVAL;
7606 
7607 	if (cnt > TRACE_MARKER_MAX_SIZE)
7608 		cnt = TRACE_MARKER_MAX_SIZE;
7609 
7610 	/* Must have preemption disabled while having access to the buffer */
7611 	guard(preempt_notrace)();
7612 
7613 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7614 	if (!buf)
7615 		return -EFAULT;
7616 
7617 	/* The selftests expect this function to be the IP address */
7618 	ip = _THIS_IP_;
7619 
7620 	/* The global trace_marker can go to multiple instances */
7621 	if (tr == &global_trace) {
7622 		guard(rcu)();
7623 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7624 			written = write_marker_to_buffer(tr, buf, cnt, ip);
7625 			if (written < 0)
7626 				break;
7627 		}
7628 	} else {
7629 		written = write_marker_to_buffer(tr, buf, cnt, ip);
7630 	}
7631 
7632 	return written;
7633 }
7634 
write_raw_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt)7635 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7636 					  const char *buf, size_t cnt)
7637 {
7638 	struct ring_buffer_event *event;
7639 	struct trace_buffer *buffer;
7640 	struct raw_data_entry *entry;
7641 	ssize_t written;
7642 	size_t size;
7643 
7644 	/* cnt includes both the entry->id and the data behind it. */
7645 	size = struct_offset(entry, id) + cnt;
7646 
7647 	buffer = tr->array_buffer.buffer;
7648 
7649 	if (size > ring_buffer_max_event_size(buffer))
7650 		return -EINVAL;
7651 
7652 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7653 					    tracing_gen_ctx());
7654 	if (!event)
7655 		/* Ring buffer disabled, return as if not open for write */
7656 		return -EBADF;
7657 
7658 	entry = ring_buffer_event_data(event);
7659 	unsafe_memcpy(&entry->id, buf, cnt,
7660 		      "id and content already reserved on ring buffer"
7661 		      "'buf' includes the 'id' and the data."
7662 		      "'entry' was allocated with cnt from 'id'.");
7663 	written = cnt;
7664 
7665 	__buffer_unlock_commit(buffer, event);
7666 
7667 	return written;
7668 }
7669 
7670 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7671 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7672 					size_t cnt, loff_t *fpos)
7673 {
7674 	struct trace_array *tr = filp->private_data;
7675 	ssize_t written = -ENODEV;
7676 	char *buf;
7677 
7678 	if (tracing_disabled)
7679 		return -EINVAL;
7680 
7681 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7682 		return -EINVAL;
7683 
7684 	/* The marker must at least have a tag id */
7685 	if (cnt < sizeof(unsigned int))
7686 		return -EINVAL;
7687 
7688 	/* raw write is all or nothing */
7689 	if (cnt > TRACE_MARKER_MAX_SIZE)
7690 		return -EINVAL;
7691 
7692 	/* Must have preemption disabled while having access to the buffer */
7693 	guard(preempt_notrace)();
7694 
7695 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7696 	if (!buf)
7697 		return -EFAULT;
7698 
7699 	/* The global trace_marker_raw can go to multiple instances */
7700 	if (tr == &global_trace) {
7701 		guard(rcu)();
7702 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7703 			written = write_raw_marker_to_buffer(tr, buf, cnt);
7704 			if (written < 0)
7705 				break;
7706 		}
7707 	} else {
7708 		written = write_raw_marker_to_buffer(tr, buf, cnt);
7709 	}
7710 
7711 	return written;
7712 }
7713 
tracing_mark_open(struct inode * inode,struct file * filp)7714 static int tracing_mark_open(struct inode *inode, struct file *filp)
7715 {
7716 	int ret;
7717 
7718 	scoped_guard(mutex, &trace_user_buffer_mutex) {
7719 		if (!trace_user_buffer) {
7720 			ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
7721 			if (ret < 0)
7722 				return ret;
7723 		} else {
7724 			trace_user_buffer->ref++;
7725 		}
7726 	}
7727 
7728 	stream_open(inode, filp);
7729 	ret = tracing_open_generic_tr(inode, filp);
7730 	if (ret < 0)
7731 		user_buffer_put(&trace_user_buffer);
7732 	return ret;
7733 }
7734 
tracing_mark_release(struct inode * inode,struct file * file)7735 static int tracing_mark_release(struct inode *inode, struct file *file)
7736 {
7737 	user_buffer_put(&trace_user_buffer);
7738 	return tracing_release_generic_tr(inode, file);
7739 }
7740 
tracing_clock_show(struct seq_file * m,void * v)7741 static int tracing_clock_show(struct seq_file *m, void *v)
7742 {
7743 	struct trace_array *tr = m->private;
7744 	int i;
7745 
7746 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7747 		seq_printf(m,
7748 			"%s%s%s%s", i ? " " : "",
7749 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7750 			i == tr->clock_id ? "]" : "");
7751 	seq_putc(m, '\n');
7752 
7753 	return 0;
7754 }
7755 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7756 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7757 {
7758 	int i;
7759 
7760 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7761 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7762 			break;
7763 	}
7764 	if (i == ARRAY_SIZE(trace_clocks))
7765 		return -EINVAL;
7766 
7767 	guard(mutex)(&trace_types_lock);
7768 
7769 	tr->clock_id = i;
7770 
7771 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7772 
7773 	/*
7774 	 * New clock may not be consistent with the previous clock.
7775 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7776 	 */
7777 	tracing_reset_online_cpus(&tr->array_buffer);
7778 
7779 #ifdef CONFIG_TRACER_MAX_TRACE
7780 	if (tr->max_buffer.buffer)
7781 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7782 	tracing_reset_online_cpus(&tr->max_buffer);
7783 #endif
7784 
7785 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7786 		struct trace_scratch *tscratch = tr->scratch;
7787 
7788 		tscratch->clock_id = i;
7789 	}
7790 
7791 	return 0;
7792 }
7793 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7794 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7795 				   size_t cnt, loff_t *fpos)
7796 {
7797 	struct seq_file *m = filp->private_data;
7798 	struct trace_array *tr = m->private;
7799 	char buf[64];
7800 	const char *clockstr;
7801 	int ret;
7802 
7803 	if (cnt >= sizeof(buf))
7804 		return -EINVAL;
7805 
7806 	if (copy_from_user(buf, ubuf, cnt))
7807 		return -EFAULT;
7808 
7809 	buf[cnt] = 0;
7810 
7811 	clockstr = strstrip(buf);
7812 
7813 	ret = tracing_set_clock(tr, clockstr);
7814 	if (ret)
7815 		return ret;
7816 
7817 	*fpos += cnt;
7818 
7819 	return cnt;
7820 }
7821 
tracing_clock_open(struct inode * inode,struct file * file)7822 static int tracing_clock_open(struct inode *inode, struct file *file)
7823 {
7824 	struct trace_array *tr = inode->i_private;
7825 	int ret;
7826 
7827 	ret = tracing_check_open_get_tr(tr);
7828 	if (ret)
7829 		return ret;
7830 
7831 	ret = single_open(file, tracing_clock_show, inode->i_private);
7832 	if (ret < 0)
7833 		trace_array_put(tr);
7834 
7835 	return ret;
7836 }
7837 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7838 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7839 {
7840 	struct trace_array *tr = m->private;
7841 
7842 	guard(mutex)(&trace_types_lock);
7843 
7844 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7845 		seq_puts(m, "delta [absolute]\n");
7846 	else
7847 		seq_puts(m, "[delta] absolute\n");
7848 
7849 	return 0;
7850 }
7851 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7852 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7853 {
7854 	struct trace_array *tr = inode->i_private;
7855 	int ret;
7856 
7857 	ret = tracing_check_open_get_tr(tr);
7858 	if (ret)
7859 		return ret;
7860 
7861 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7862 	if (ret < 0)
7863 		trace_array_put(tr);
7864 
7865 	return ret;
7866 }
7867 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7868 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7869 {
7870 	if (rbe == this_cpu_read(trace_buffered_event))
7871 		return ring_buffer_time_stamp(buffer);
7872 
7873 	return ring_buffer_event_time_stamp(buffer, rbe);
7874 }
7875 
7876 /*
7877  * Set or disable using the per CPU trace_buffer_event when possible.
7878  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7879 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7880 {
7881 	guard(mutex)(&trace_types_lock);
7882 
7883 	if (set && tr->no_filter_buffering_ref++)
7884 		return 0;
7885 
7886 	if (!set) {
7887 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7888 			return -EINVAL;
7889 
7890 		--tr->no_filter_buffering_ref;
7891 	}
7892 
7893 	return 0;
7894 }
7895 
7896 struct ftrace_buffer_info {
7897 	struct trace_iterator	iter;
7898 	void			*spare;
7899 	unsigned int		spare_cpu;
7900 	unsigned int		spare_size;
7901 	unsigned int		read;
7902 };
7903 
7904 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7905 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7906 {
7907 	struct trace_array *tr = inode->i_private;
7908 	struct trace_iterator *iter;
7909 	struct seq_file *m;
7910 	int ret;
7911 
7912 	ret = tracing_check_open_get_tr(tr);
7913 	if (ret)
7914 		return ret;
7915 
7916 	if (file->f_mode & FMODE_READ) {
7917 		iter = __tracing_open(inode, file, true);
7918 		if (IS_ERR(iter))
7919 			ret = PTR_ERR(iter);
7920 	} else {
7921 		/* Writes still need the seq_file to hold the private data */
7922 		ret = -ENOMEM;
7923 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7924 		if (!m)
7925 			goto out;
7926 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7927 		if (!iter) {
7928 			kfree(m);
7929 			goto out;
7930 		}
7931 		ret = 0;
7932 
7933 		iter->tr = tr;
7934 		iter->array_buffer = &tr->max_buffer;
7935 		iter->cpu_file = tracing_get_cpu(inode);
7936 		m->private = iter;
7937 		file->private_data = m;
7938 	}
7939 out:
7940 	if (ret < 0)
7941 		trace_array_put(tr);
7942 
7943 	return ret;
7944 }
7945 
tracing_swap_cpu_buffer(void * tr)7946 static void tracing_swap_cpu_buffer(void *tr)
7947 {
7948 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7949 }
7950 
7951 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7952 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7953 		       loff_t *ppos)
7954 {
7955 	struct seq_file *m = filp->private_data;
7956 	struct trace_iterator *iter = m->private;
7957 	struct trace_array *tr = iter->tr;
7958 	unsigned long val;
7959 	int ret;
7960 
7961 	ret = tracing_update_buffers(tr);
7962 	if (ret < 0)
7963 		return ret;
7964 
7965 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7966 	if (ret)
7967 		return ret;
7968 
7969 	guard(mutex)(&trace_types_lock);
7970 
7971 	if (tr->current_trace->use_max_tr)
7972 		return -EBUSY;
7973 
7974 	local_irq_disable();
7975 	arch_spin_lock(&tr->max_lock);
7976 	if (tr->cond_snapshot)
7977 		ret = -EBUSY;
7978 	arch_spin_unlock(&tr->max_lock);
7979 	local_irq_enable();
7980 	if (ret)
7981 		return ret;
7982 
7983 	switch (val) {
7984 	case 0:
7985 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7986 			return -EINVAL;
7987 		if (tr->allocated_snapshot)
7988 			free_snapshot(tr);
7989 		break;
7990 	case 1:
7991 /* Only allow per-cpu swap if the ring buffer supports it */
7992 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7993 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7994 			return -EINVAL;
7995 #endif
7996 		if (tr->allocated_snapshot)
7997 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7998 					&tr->array_buffer, iter->cpu_file);
7999 
8000 		ret = tracing_arm_snapshot_locked(tr);
8001 		if (ret)
8002 			return ret;
8003 
8004 		/* Now, we're going to swap */
8005 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
8006 			local_irq_disable();
8007 			update_max_tr(tr, current, smp_processor_id(), NULL);
8008 			local_irq_enable();
8009 		} else {
8010 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
8011 						 (void *)tr, 1);
8012 		}
8013 		tracing_disarm_snapshot(tr);
8014 		break;
8015 	default:
8016 		if (tr->allocated_snapshot) {
8017 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
8018 				tracing_reset_online_cpus(&tr->max_buffer);
8019 			else
8020 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
8021 		}
8022 		break;
8023 	}
8024 
8025 	if (ret >= 0) {
8026 		*ppos += cnt;
8027 		ret = cnt;
8028 	}
8029 
8030 	return ret;
8031 }
8032 
tracing_snapshot_release(struct inode * inode,struct file * file)8033 static int tracing_snapshot_release(struct inode *inode, struct file *file)
8034 {
8035 	struct seq_file *m = file->private_data;
8036 	int ret;
8037 
8038 	ret = tracing_release(inode, file);
8039 
8040 	if (file->f_mode & FMODE_READ)
8041 		return ret;
8042 
8043 	/* If write only, the seq_file is just a stub */
8044 	if (m)
8045 		kfree(m->private);
8046 	kfree(m);
8047 
8048 	return 0;
8049 }
8050 
8051 static int tracing_buffers_open(struct inode *inode, struct file *filp);
8052 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
8053 				    size_t count, loff_t *ppos);
8054 static int tracing_buffers_release(struct inode *inode, struct file *file);
8055 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8056 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
8057 
snapshot_raw_open(struct inode * inode,struct file * filp)8058 static int snapshot_raw_open(struct inode *inode, struct file *filp)
8059 {
8060 	struct ftrace_buffer_info *info;
8061 	int ret;
8062 
8063 	/* The following checks for tracefs lockdown */
8064 	ret = tracing_buffers_open(inode, filp);
8065 	if (ret < 0)
8066 		return ret;
8067 
8068 	info = filp->private_data;
8069 
8070 	if (info->iter.trace->use_max_tr) {
8071 		tracing_buffers_release(inode, filp);
8072 		return -EBUSY;
8073 	}
8074 
8075 	info->iter.snapshot = true;
8076 	info->iter.array_buffer = &info->iter.tr->max_buffer;
8077 
8078 	return ret;
8079 }
8080 
8081 #endif /* CONFIG_TRACER_SNAPSHOT */
8082 
8083 
8084 static const struct file_operations tracing_thresh_fops = {
8085 	.open		= tracing_open_generic,
8086 	.read		= tracing_thresh_read,
8087 	.write		= tracing_thresh_write,
8088 	.llseek		= generic_file_llseek,
8089 };
8090 
8091 #ifdef CONFIG_TRACER_MAX_TRACE
8092 static const struct file_operations tracing_max_lat_fops = {
8093 	.open		= tracing_open_generic_tr,
8094 	.read		= tracing_max_lat_read,
8095 	.write		= tracing_max_lat_write,
8096 	.llseek		= generic_file_llseek,
8097 	.release	= tracing_release_generic_tr,
8098 };
8099 #endif
8100 
8101 static const struct file_operations set_tracer_fops = {
8102 	.open		= tracing_open_generic_tr,
8103 	.read		= tracing_set_trace_read,
8104 	.write		= tracing_set_trace_write,
8105 	.llseek		= generic_file_llseek,
8106 	.release	= tracing_release_generic_tr,
8107 };
8108 
8109 static const struct file_operations tracing_pipe_fops = {
8110 	.open		= tracing_open_pipe,
8111 	.poll		= tracing_poll_pipe,
8112 	.read		= tracing_read_pipe,
8113 	.splice_read	= tracing_splice_read_pipe,
8114 	.release	= tracing_release_pipe,
8115 };
8116 
8117 static const struct file_operations tracing_entries_fops = {
8118 	.open		= tracing_open_generic_tr,
8119 	.read		= tracing_entries_read,
8120 	.write		= tracing_entries_write,
8121 	.llseek		= generic_file_llseek,
8122 	.release	= tracing_release_generic_tr,
8123 };
8124 
8125 static const struct file_operations tracing_syscall_buf_fops = {
8126 	.open		= tracing_open_generic_tr,
8127 	.read		= tracing_syscall_buf_read,
8128 	.write		= tracing_syscall_buf_write,
8129 	.llseek		= generic_file_llseek,
8130 	.release	= tracing_release_generic_tr,
8131 };
8132 
8133 static const struct file_operations tracing_buffer_meta_fops = {
8134 	.open		= tracing_buffer_meta_open,
8135 	.read		= seq_read,
8136 	.llseek		= seq_lseek,
8137 	.release	= tracing_seq_release,
8138 };
8139 
8140 static const struct file_operations tracing_total_entries_fops = {
8141 	.open		= tracing_open_generic_tr,
8142 	.read		= tracing_total_entries_read,
8143 	.llseek		= generic_file_llseek,
8144 	.release	= tracing_release_generic_tr,
8145 };
8146 
8147 static const struct file_operations tracing_free_buffer_fops = {
8148 	.open		= tracing_open_generic_tr,
8149 	.write		= tracing_free_buffer_write,
8150 	.release	= tracing_free_buffer_release,
8151 };
8152 
8153 static const struct file_operations tracing_mark_fops = {
8154 	.open		= tracing_mark_open,
8155 	.write		= tracing_mark_write,
8156 	.release	= tracing_mark_release,
8157 };
8158 
8159 static const struct file_operations tracing_mark_raw_fops = {
8160 	.open		= tracing_mark_open,
8161 	.write		= tracing_mark_raw_write,
8162 	.release	= tracing_mark_release,
8163 };
8164 
8165 static const struct file_operations trace_clock_fops = {
8166 	.open		= tracing_clock_open,
8167 	.read		= seq_read,
8168 	.llseek		= seq_lseek,
8169 	.release	= tracing_single_release_tr,
8170 	.write		= tracing_clock_write,
8171 };
8172 
8173 static const struct file_operations trace_time_stamp_mode_fops = {
8174 	.open		= tracing_time_stamp_mode_open,
8175 	.read		= seq_read,
8176 	.llseek		= seq_lseek,
8177 	.release	= tracing_single_release_tr,
8178 };
8179 
8180 static const struct file_operations last_boot_fops = {
8181 	.open		= tracing_last_boot_open,
8182 	.read		= seq_read,
8183 	.llseek		= seq_lseek,
8184 	.release	= tracing_seq_release,
8185 };
8186 
8187 #ifdef CONFIG_TRACER_SNAPSHOT
8188 static const struct file_operations snapshot_fops = {
8189 	.open		= tracing_snapshot_open,
8190 	.read		= seq_read,
8191 	.write		= tracing_snapshot_write,
8192 	.llseek		= tracing_lseek,
8193 	.release	= tracing_snapshot_release,
8194 };
8195 
8196 static const struct file_operations snapshot_raw_fops = {
8197 	.open		= snapshot_raw_open,
8198 	.read		= tracing_buffers_read,
8199 	.release	= tracing_buffers_release,
8200 	.splice_read	= tracing_buffers_splice_read,
8201 };
8202 
8203 #endif /* CONFIG_TRACER_SNAPSHOT */
8204 
8205 /*
8206  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
8207  * @filp: The active open file structure
8208  * @ubuf: The userspace provided buffer to read value into
8209  * @cnt: The maximum number of bytes to read
8210  * @ppos: The current "file" position
8211  *
8212  * This function implements the write interface for a struct trace_min_max_param.
8213  * The filp->private_data must point to a trace_min_max_param structure that
8214  * defines where to write the value, the min and the max acceptable values,
8215  * and a lock to protect the write.
8216  */
8217 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8218 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
8219 {
8220 	struct trace_min_max_param *param = filp->private_data;
8221 	u64 val;
8222 	int err;
8223 
8224 	if (!param)
8225 		return -EFAULT;
8226 
8227 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
8228 	if (err)
8229 		return err;
8230 
8231 	if (param->lock)
8232 		mutex_lock(param->lock);
8233 
8234 	if (param->min && val < *param->min)
8235 		err = -EINVAL;
8236 
8237 	if (param->max && val > *param->max)
8238 		err = -EINVAL;
8239 
8240 	if (!err)
8241 		*param->val = val;
8242 
8243 	if (param->lock)
8244 		mutex_unlock(param->lock);
8245 
8246 	if (err)
8247 		return err;
8248 
8249 	return cnt;
8250 }
8251 
8252 /*
8253  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
8254  * @filp: The active open file structure
8255  * @ubuf: The userspace provided buffer to read value into
8256  * @cnt: The maximum number of bytes to read
8257  * @ppos: The current "file" position
8258  *
8259  * This function implements the read interface for a struct trace_min_max_param.
8260  * The filp->private_data must point to a trace_min_max_param struct with valid
8261  * data.
8262  */
8263 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8264 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
8265 {
8266 	struct trace_min_max_param *param = filp->private_data;
8267 	char buf[U64_STR_SIZE];
8268 	int len;
8269 	u64 val;
8270 
8271 	if (!param)
8272 		return -EFAULT;
8273 
8274 	val = *param->val;
8275 
8276 	if (cnt > sizeof(buf))
8277 		cnt = sizeof(buf);
8278 
8279 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
8280 
8281 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
8282 }
8283 
8284 const struct file_operations trace_min_max_fops = {
8285 	.open		= tracing_open_generic,
8286 	.read		= trace_min_max_read,
8287 	.write		= trace_min_max_write,
8288 };
8289 
8290 #define TRACING_LOG_ERRS_MAX	8
8291 #define TRACING_LOG_LOC_MAX	128
8292 
8293 #define CMD_PREFIX "  Command: "
8294 
8295 struct err_info {
8296 	const char	**errs;	/* ptr to loc-specific array of err strings */
8297 	u8		type;	/* index into errs -> specific err string */
8298 	u16		pos;	/* caret position */
8299 	u64		ts;
8300 };
8301 
8302 struct tracing_log_err {
8303 	struct list_head	list;
8304 	struct err_info		info;
8305 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
8306 	char			*cmd;                     /* what caused err */
8307 };
8308 
8309 static DEFINE_MUTEX(tracing_err_log_lock);
8310 
alloc_tracing_log_err(int len)8311 static struct tracing_log_err *alloc_tracing_log_err(int len)
8312 {
8313 	struct tracing_log_err *err;
8314 
8315 	err = kzalloc(sizeof(*err), GFP_KERNEL);
8316 	if (!err)
8317 		return ERR_PTR(-ENOMEM);
8318 
8319 	err->cmd = kzalloc(len, GFP_KERNEL);
8320 	if (!err->cmd) {
8321 		kfree(err);
8322 		return ERR_PTR(-ENOMEM);
8323 	}
8324 
8325 	return err;
8326 }
8327 
free_tracing_log_err(struct tracing_log_err * err)8328 static void free_tracing_log_err(struct tracing_log_err *err)
8329 {
8330 	kfree(err->cmd);
8331 	kfree(err);
8332 }
8333 
get_tracing_log_err(struct trace_array * tr,int len)8334 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8335 						   int len)
8336 {
8337 	struct tracing_log_err *err;
8338 	char *cmd;
8339 
8340 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8341 		err = alloc_tracing_log_err(len);
8342 		if (PTR_ERR(err) != -ENOMEM)
8343 			tr->n_err_log_entries++;
8344 
8345 		return err;
8346 	}
8347 	cmd = kzalloc(len, GFP_KERNEL);
8348 	if (!cmd)
8349 		return ERR_PTR(-ENOMEM);
8350 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8351 	kfree(err->cmd);
8352 	err->cmd = cmd;
8353 	list_del(&err->list);
8354 
8355 	return err;
8356 }
8357 
8358 /**
8359  * err_pos - find the position of a string within a command for error careting
8360  * @cmd: The tracing command that caused the error
8361  * @str: The string to position the caret at within @cmd
8362  *
8363  * Finds the position of the first occurrence of @str within @cmd.  The
8364  * return value can be passed to tracing_log_err() for caret placement
8365  * within @cmd.
8366  *
8367  * Returns the index within @cmd of the first occurrence of @str or 0
8368  * if @str was not found.
8369  */
err_pos(char * cmd,const char * str)8370 unsigned int err_pos(char *cmd, const char *str)
8371 {
8372 	char *found;
8373 
8374 	if (WARN_ON(!strlen(cmd)))
8375 		return 0;
8376 
8377 	found = strstr(cmd, str);
8378 	if (found)
8379 		return found - cmd;
8380 
8381 	return 0;
8382 }
8383 
8384 /**
8385  * tracing_log_err - write an error to the tracing error log
8386  * @tr: The associated trace array for the error (NULL for top level array)
8387  * @loc: A string describing where the error occurred
8388  * @cmd: The tracing command that caused the error
8389  * @errs: The array of loc-specific static error strings
8390  * @type: The index into errs[], which produces the specific static err string
8391  * @pos: The position the caret should be placed in the cmd
8392  *
8393  * Writes an error into tracing/error_log of the form:
8394  *
8395  * <loc>: error: <text>
8396  *   Command: <cmd>
8397  *              ^
8398  *
8399  * tracing/error_log is a small log file containing the last
8400  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8401  * unless there has been a tracing error, and the error log can be
8402  * cleared and have its memory freed by writing the empty string in
8403  * truncation mode to it i.e. echo > tracing/error_log.
8404  *
8405  * NOTE: the @errs array along with the @type param are used to
8406  * produce a static error string - this string is not copied and saved
8407  * when the error is logged - only a pointer to it is saved.  See
8408  * existing callers for examples of how static strings are typically
8409  * defined for use with tracing_log_err().
8410  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8411 void tracing_log_err(struct trace_array *tr,
8412 		     const char *loc, const char *cmd,
8413 		     const char **errs, u8 type, u16 pos)
8414 {
8415 	struct tracing_log_err *err;
8416 	int len = 0;
8417 
8418 	if (!tr)
8419 		tr = &global_trace;
8420 
8421 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8422 
8423 	guard(mutex)(&tracing_err_log_lock);
8424 
8425 	err = get_tracing_log_err(tr, len);
8426 	if (PTR_ERR(err) == -ENOMEM)
8427 		return;
8428 
8429 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8430 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8431 
8432 	err->info.errs = errs;
8433 	err->info.type = type;
8434 	err->info.pos = pos;
8435 	err->info.ts = local_clock();
8436 
8437 	list_add_tail(&err->list, &tr->err_log);
8438 }
8439 
clear_tracing_err_log(struct trace_array * tr)8440 static void clear_tracing_err_log(struct trace_array *tr)
8441 {
8442 	struct tracing_log_err *err, *next;
8443 
8444 	guard(mutex)(&tracing_err_log_lock);
8445 
8446 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8447 		list_del(&err->list);
8448 		free_tracing_log_err(err);
8449 	}
8450 
8451 	tr->n_err_log_entries = 0;
8452 }
8453 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8454 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8455 {
8456 	struct trace_array *tr = m->private;
8457 
8458 	mutex_lock(&tracing_err_log_lock);
8459 
8460 	return seq_list_start(&tr->err_log, *pos);
8461 }
8462 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8463 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8464 {
8465 	struct trace_array *tr = m->private;
8466 
8467 	return seq_list_next(v, &tr->err_log, pos);
8468 }
8469 
tracing_err_log_seq_stop(struct seq_file * m,void * v)8470 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8471 {
8472 	mutex_unlock(&tracing_err_log_lock);
8473 }
8474 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8475 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8476 {
8477 	u16 i;
8478 
8479 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8480 		seq_putc(m, ' ');
8481 	for (i = 0; i < pos; i++)
8482 		seq_putc(m, ' ');
8483 	seq_puts(m, "^\n");
8484 }
8485 
tracing_err_log_seq_show(struct seq_file * m,void * v)8486 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8487 {
8488 	struct tracing_log_err *err = v;
8489 
8490 	if (err) {
8491 		const char *err_text = err->info.errs[err->info.type];
8492 		u64 sec = err->info.ts;
8493 		u32 nsec;
8494 
8495 		nsec = do_div(sec, NSEC_PER_SEC);
8496 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8497 			   err->loc, err_text);
8498 		seq_printf(m, "%s", err->cmd);
8499 		tracing_err_log_show_pos(m, err->info.pos);
8500 	}
8501 
8502 	return 0;
8503 }
8504 
8505 static const struct seq_operations tracing_err_log_seq_ops = {
8506 	.start  = tracing_err_log_seq_start,
8507 	.next   = tracing_err_log_seq_next,
8508 	.stop   = tracing_err_log_seq_stop,
8509 	.show   = tracing_err_log_seq_show
8510 };
8511 
tracing_err_log_open(struct inode * inode,struct file * file)8512 static int tracing_err_log_open(struct inode *inode, struct file *file)
8513 {
8514 	struct trace_array *tr = inode->i_private;
8515 	int ret = 0;
8516 
8517 	ret = tracing_check_open_get_tr(tr);
8518 	if (ret)
8519 		return ret;
8520 
8521 	/* If this file was opened for write, then erase contents */
8522 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8523 		clear_tracing_err_log(tr);
8524 
8525 	if (file->f_mode & FMODE_READ) {
8526 		ret = seq_open(file, &tracing_err_log_seq_ops);
8527 		if (!ret) {
8528 			struct seq_file *m = file->private_data;
8529 			m->private = tr;
8530 		} else {
8531 			trace_array_put(tr);
8532 		}
8533 	}
8534 	return ret;
8535 }
8536 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8537 static ssize_t tracing_err_log_write(struct file *file,
8538 				     const char __user *buffer,
8539 				     size_t count, loff_t *ppos)
8540 {
8541 	return count;
8542 }
8543 
tracing_err_log_release(struct inode * inode,struct file * file)8544 static int tracing_err_log_release(struct inode *inode, struct file *file)
8545 {
8546 	struct trace_array *tr = inode->i_private;
8547 
8548 	trace_array_put(tr);
8549 
8550 	if (file->f_mode & FMODE_READ)
8551 		seq_release(inode, file);
8552 
8553 	return 0;
8554 }
8555 
8556 static const struct file_operations tracing_err_log_fops = {
8557 	.open           = tracing_err_log_open,
8558 	.write		= tracing_err_log_write,
8559 	.read           = seq_read,
8560 	.llseek         = tracing_lseek,
8561 	.release        = tracing_err_log_release,
8562 };
8563 
tracing_buffers_open(struct inode * inode,struct file * filp)8564 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8565 {
8566 	struct trace_array *tr = inode->i_private;
8567 	struct ftrace_buffer_info *info;
8568 	int ret;
8569 
8570 	ret = tracing_check_open_get_tr(tr);
8571 	if (ret)
8572 		return ret;
8573 
8574 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8575 	if (!info) {
8576 		trace_array_put(tr);
8577 		return -ENOMEM;
8578 	}
8579 
8580 	mutex_lock(&trace_types_lock);
8581 
8582 	info->iter.tr		= tr;
8583 	info->iter.cpu_file	= tracing_get_cpu(inode);
8584 	info->iter.trace	= tr->current_trace;
8585 	info->iter.array_buffer = &tr->array_buffer;
8586 	info->spare		= NULL;
8587 	/* Force reading ring buffer for first read */
8588 	info->read		= (unsigned int)-1;
8589 
8590 	filp->private_data = info;
8591 
8592 	tr->trace_ref++;
8593 
8594 	mutex_unlock(&trace_types_lock);
8595 
8596 	ret = nonseekable_open(inode, filp);
8597 	if (ret < 0)
8598 		trace_array_put(tr);
8599 
8600 	return ret;
8601 }
8602 
8603 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8604 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8605 {
8606 	struct ftrace_buffer_info *info = filp->private_data;
8607 	struct trace_iterator *iter = &info->iter;
8608 
8609 	return trace_poll(iter, filp, poll_table);
8610 }
8611 
8612 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8613 tracing_buffers_read(struct file *filp, char __user *ubuf,
8614 		     size_t count, loff_t *ppos)
8615 {
8616 	struct ftrace_buffer_info *info = filp->private_data;
8617 	struct trace_iterator *iter = &info->iter;
8618 	void *trace_data;
8619 	int page_size;
8620 	ssize_t ret = 0;
8621 	ssize_t size;
8622 
8623 	if (!count)
8624 		return 0;
8625 
8626 #ifdef CONFIG_TRACER_MAX_TRACE
8627 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8628 		return -EBUSY;
8629 #endif
8630 
8631 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8632 
8633 	/* Make sure the spare matches the current sub buffer size */
8634 	if (info->spare) {
8635 		if (page_size != info->spare_size) {
8636 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8637 						   info->spare_cpu, info->spare);
8638 			info->spare = NULL;
8639 		}
8640 	}
8641 
8642 	if (!info->spare) {
8643 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8644 							  iter->cpu_file);
8645 		if (IS_ERR(info->spare)) {
8646 			ret = PTR_ERR(info->spare);
8647 			info->spare = NULL;
8648 		} else {
8649 			info->spare_cpu = iter->cpu_file;
8650 			info->spare_size = page_size;
8651 		}
8652 	}
8653 	if (!info->spare)
8654 		return ret;
8655 
8656 	/* Do we have previous read data to read? */
8657 	if (info->read < page_size)
8658 		goto read;
8659 
8660  again:
8661 	trace_access_lock(iter->cpu_file);
8662 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8663 				    info->spare,
8664 				    count,
8665 				    iter->cpu_file, 0);
8666 	trace_access_unlock(iter->cpu_file);
8667 
8668 	if (ret < 0) {
8669 		if (trace_empty(iter) && !iter->closed) {
8670 			if (update_last_data_if_empty(iter->tr))
8671 				return 0;
8672 
8673 			if ((filp->f_flags & O_NONBLOCK))
8674 				return -EAGAIN;
8675 
8676 			ret = wait_on_pipe(iter, 0);
8677 			if (ret)
8678 				return ret;
8679 
8680 			goto again;
8681 		}
8682 		return 0;
8683 	}
8684 
8685 	info->read = 0;
8686  read:
8687 	size = page_size - info->read;
8688 	if (size > count)
8689 		size = count;
8690 	trace_data = ring_buffer_read_page_data(info->spare);
8691 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8692 	if (ret == size)
8693 		return -EFAULT;
8694 
8695 	size -= ret;
8696 
8697 	*ppos += size;
8698 	info->read += size;
8699 
8700 	return size;
8701 }
8702 
tracing_buffers_flush(struct file * file,fl_owner_t id)8703 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8704 {
8705 	struct ftrace_buffer_info *info = file->private_data;
8706 	struct trace_iterator *iter = &info->iter;
8707 
8708 	iter->closed = true;
8709 	/* Make sure the waiters see the new wait_index */
8710 	(void)atomic_fetch_inc_release(&iter->wait_index);
8711 
8712 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8713 
8714 	return 0;
8715 }
8716 
tracing_buffers_release(struct inode * inode,struct file * file)8717 static int tracing_buffers_release(struct inode *inode, struct file *file)
8718 {
8719 	struct ftrace_buffer_info *info = file->private_data;
8720 	struct trace_iterator *iter = &info->iter;
8721 
8722 	guard(mutex)(&trace_types_lock);
8723 
8724 	iter->tr->trace_ref--;
8725 
8726 	__trace_array_put(iter->tr);
8727 
8728 	if (info->spare)
8729 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8730 					   info->spare_cpu, info->spare);
8731 	kvfree(info);
8732 
8733 	return 0;
8734 }
8735 
8736 struct buffer_ref {
8737 	struct trace_buffer	*buffer;
8738 	void			*page;
8739 	int			cpu;
8740 	refcount_t		refcount;
8741 };
8742 
buffer_ref_release(struct buffer_ref * ref)8743 static void buffer_ref_release(struct buffer_ref *ref)
8744 {
8745 	if (!refcount_dec_and_test(&ref->refcount))
8746 		return;
8747 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8748 	kfree(ref);
8749 }
8750 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8751 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8752 				    struct pipe_buffer *buf)
8753 {
8754 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8755 
8756 	buffer_ref_release(ref);
8757 	buf->private = 0;
8758 }
8759 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8760 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8761 				struct pipe_buffer *buf)
8762 {
8763 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8764 
8765 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8766 		return false;
8767 
8768 	refcount_inc(&ref->refcount);
8769 	return true;
8770 }
8771 
8772 /* Pipe buffer operations for a buffer. */
8773 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8774 	.release		= buffer_pipe_buf_release,
8775 	.get			= buffer_pipe_buf_get,
8776 };
8777 
8778 /*
8779  * Callback from splice_to_pipe(), if we need to release some pages
8780  * at the end of the spd in case we error'ed out in filling the pipe.
8781  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8782 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8783 {
8784 	struct buffer_ref *ref =
8785 		(struct buffer_ref *)spd->partial[i].private;
8786 
8787 	buffer_ref_release(ref);
8788 	spd->partial[i].private = 0;
8789 }
8790 
8791 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8792 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8793 			    struct pipe_inode_info *pipe, size_t len,
8794 			    unsigned int flags)
8795 {
8796 	struct ftrace_buffer_info *info = file->private_data;
8797 	struct trace_iterator *iter = &info->iter;
8798 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8799 	struct page *pages_def[PIPE_DEF_BUFFERS];
8800 	struct splice_pipe_desc spd = {
8801 		.pages		= pages_def,
8802 		.partial	= partial_def,
8803 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8804 		.ops		= &buffer_pipe_buf_ops,
8805 		.spd_release	= buffer_spd_release,
8806 	};
8807 	struct buffer_ref *ref;
8808 	bool woken = false;
8809 	int page_size;
8810 	int entries, i;
8811 	ssize_t ret = 0;
8812 
8813 #ifdef CONFIG_TRACER_MAX_TRACE
8814 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8815 		return -EBUSY;
8816 #endif
8817 
8818 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8819 	if (*ppos & (page_size - 1))
8820 		return -EINVAL;
8821 
8822 	if (len & (page_size - 1)) {
8823 		if (len < page_size)
8824 			return -EINVAL;
8825 		len &= (~(page_size - 1));
8826 	}
8827 
8828 	if (splice_grow_spd(pipe, &spd))
8829 		return -ENOMEM;
8830 
8831  again:
8832 	trace_access_lock(iter->cpu_file);
8833 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8834 
8835 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8836 		struct page *page;
8837 		int r;
8838 
8839 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8840 		if (!ref) {
8841 			ret = -ENOMEM;
8842 			break;
8843 		}
8844 
8845 		refcount_set(&ref->refcount, 1);
8846 		ref->buffer = iter->array_buffer->buffer;
8847 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8848 		if (IS_ERR(ref->page)) {
8849 			ret = PTR_ERR(ref->page);
8850 			ref->page = NULL;
8851 			kfree(ref);
8852 			break;
8853 		}
8854 		ref->cpu = iter->cpu_file;
8855 
8856 		r = ring_buffer_read_page(ref->buffer, ref->page,
8857 					  len, iter->cpu_file, 1);
8858 		if (r < 0) {
8859 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8860 						   ref->page);
8861 			kfree(ref);
8862 			break;
8863 		}
8864 
8865 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8866 
8867 		spd.pages[i] = page;
8868 		spd.partial[i].len = page_size;
8869 		spd.partial[i].offset = 0;
8870 		spd.partial[i].private = (unsigned long)ref;
8871 		spd.nr_pages++;
8872 		*ppos += page_size;
8873 
8874 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8875 	}
8876 
8877 	trace_access_unlock(iter->cpu_file);
8878 	spd.nr_pages = i;
8879 
8880 	/* did we read anything? */
8881 	if (!spd.nr_pages) {
8882 
8883 		if (ret)
8884 			goto out;
8885 
8886 		if (woken)
8887 			goto out;
8888 
8889 		ret = -EAGAIN;
8890 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8891 			goto out;
8892 
8893 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8894 		if (ret)
8895 			goto out;
8896 
8897 		/* No need to wait after waking up when tracing is off */
8898 		if (!tracer_tracing_is_on(iter->tr))
8899 			goto out;
8900 
8901 		/* Iterate one more time to collect any new data then exit */
8902 		woken = true;
8903 
8904 		goto again;
8905 	}
8906 
8907 	ret = splice_to_pipe(pipe, &spd);
8908 out:
8909 	splice_shrink_spd(&spd);
8910 
8911 	return ret;
8912 }
8913 
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8914 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8915 {
8916 	struct ftrace_buffer_info *info = file->private_data;
8917 	struct trace_iterator *iter = &info->iter;
8918 	int err;
8919 
8920 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8921 		if (!(file->f_flags & O_NONBLOCK)) {
8922 			err = ring_buffer_wait(iter->array_buffer->buffer,
8923 					       iter->cpu_file,
8924 					       iter->tr->buffer_percent,
8925 					       NULL, NULL);
8926 			if (err)
8927 				return err;
8928 		}
8929 
8930 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8931 						  iter->cpu_file);
8932 	} else if (cmd) {
8933 		return -ENOTTY;
8934 	}
8935 
8936 	/*
8937 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8938 	 * waiters
8939 	 */
8940 	guard(mutex)(&trace_types_lock);
8941 
8942 	/* Make sure the waiters see the new wait_index */
8943 	(void)atomic_fetch_inc_release(&iter->wait_index);
8944 
8945 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8946 
8947 	return 0;
8948 }
8949 
8950 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8951 static int get_snapshot_map(struct trace_array *tr)
8952 {
8953 	int err = 0;
8954 
8955 	/*
8956 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8957 	 * take trace_types_lock. Instead use the specific
8958 	 * snapshot_trigger_lock.
8959 	 */
8960 	spin_lock(&tr->snapshot_trigger_lock);
8961 
8962 	if (tr->snapshot || tr->mapped == UINT_MAX)
8963 		err = -EBUSY;
8964 	else
8965 		tr->mapped++;
8966 
8967 	spin_unlock(&tr->snapshot_trigger_lock);
8968 
8969 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8970 	if (tr->mapped == 1)
8971 		synchronize_rcu();
8972 
8973 	return err;
8974 
8975 }
put_snapshot_map(struct trace_array * tr)8976 static void put_snapshot_map(struct trace_array *tr)
8977 {
8978 	spin_lock(&tr->snapshot_trigger_lock);
8979 	if (!WARN_ON(!tr->mapped))
8980 		tr->mapped--;
8981 	spin_unlock(&tr->snapshot_trigger_lock);
8982 }
8983 #else
get_snapshot_map(struct trace_array * tr)8984 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8985 static inline void put_snapshot_map(struct trace_array *tr) { }
8986 #endif
8987 
tracing_buffers_mmap_close(struct vm_area_struct * vma)8988 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8989 {
8990 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8991 	struct trace_iterator *iter = &info->iter;
8992 
8993 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8994 	put_snapshot_map(iter->tr);
8995 }
8996 
tracing_buffers_may_split(struct vm_area_struct * vma,unsigned long addr)8997 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
8998 {
8999 	/*
9000 	 * Trace buffer mappings require the complete buffer including
9001 	 * the meta page. Partial mappings are not supported.
9002 	 */
9003 	return -EINVAL;
9004 }
9005 
9006 static const struct vm_operations_struct tracing_buffers_vmops = {
9007 	.close		= tracing_buffers_mmap_close,
9008 	.may_split      = tracing_buffers_may_split,
9009 };
9010 
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)9011 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
9012 {
9013 	struct ftrace_buffer_info *info = filp->private_data;
9014 	struct trace_iterator *iter = &info->iter;
9015 	int ret = 0;
9016 
9017 	/* A memmap'ed and backup buffers are not supported for user space mmap */
9018 	if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
9019 		return -ENODEV;
9020 
9021 	ret = get_snapshot_map(iter->tr);
9022 	if (ret)
9023 		return ret;
9024 
9025 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
9026 	if (ret)
9027 		put_snapshot_map(iter->tr);
9028 
9029 	vma->vm_ops = &tracing_buffers_vmops;
9030 
9031 	return ret;
9032 }
9033 
9034 static const struct file_operations tracing_buffers_fops = {
9035 	.open		= tracing_buffers_open,
9036 	.read		= tracing_buffers_read,
9037 	.poll		= tracing_buffers_poll,
9038 	.release	= tracing_buffers_release,
9039 	.flush		= tracing_buffers_flush,
9040 	.splice_read	= tracing_buffers_splice_read,
9041 	.unlocked_ioctl = tracing_buffers_ioctl,
9042 	.mmap		= tracing_buffers_mmap,
9043 };
9044 
9045 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)9046 tracing_stats_read(struct file *filp, char __user *ubuf,
9047 		   size_t count, loff_t *ppos)
9048 {
9049 	struct inode *inode = file_inode(filp);
9050 	struct trace_array *tr = inode->i_private;
9051 	struct array_buffer *trace_buf = &tr->array_buffer;
9052 	int cpu = tracing_get_cpu(inode);
9053 	struct trace_seq *s;
9054 	unsigned long cnt;
9055 	unsigned long long t;
9056 	unsigned long usec_rem;
9057 
9058 	s = kmalloc(sizeof(*s), GFP_KERNEL);
9059 	if (!s)
9060 		return -ENOMEM;
9061 
9062 	trace_seq_init(s);
9063 
9064 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
9065 	trace_seq_printf(s, "entries: %ld\n", cnt);
9066 
9067 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
9068 	trace_seq_printf(s, "overrun: %ld\n", cnt);
9069 
9070 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
9071 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
9072 
9073 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
9074 	trace_seq_printf(s, "bytes: %ld\n", cnt);
9075 
9076 	if (trace_clocks[tr->clock_id].in_ns) {
9077 		/* local or global for trace_clock */
9078 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
9079 		usec_rem = do_div(t, USEC_PER_SEC);
9080 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
9081 								t, usec_rem);
9082 
9083 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
9084 		usec_rem = do_div(t, USEC_PER_SEC);
9085 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
9086 	} else {
9087 		/* counter or tsc mode for trace_clock */
9088 		trace_seq_printf(s, "oldest event ts: %llu\n",
9089 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
9090 
9091 		trace_seq_printf(s, "now ts: %llu\n",
9092 				ring_buffer_time_stamp(trace_buf->buffer));
9093 	}
9094 
9095 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
9096 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
9097 
9098 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
9099 	trace_seq_printf(s, "read events: %ld\n", cnt);
9100 
9101 	count = simple_read_from_buffer(ubuf, count, ppos,
9102 					s->buffer, trace_seq_used(s));
9103 
9104 	kfree(s);
9105 
9106 	return count;
9107 }
9108 
9109 static const struct file_operations tracing_stats_fops = {
9110 	.open		= tracing_open_generic_tr,
9111 	.read		= tracing_stats_read,
9112 	.llseek		= generic_file_llseek,
9113 	.release	= tracing_release_generic_tr,
9114 };
9115 
9116 #ifdef CONFIG_DYNAMIC_FTRACE
9117 
9118 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9119 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
9120 		  size_t cnt, loff_t *ppos)
9121 {
9122 	ssize_t ret;
9123 	char *buf;
9124 	int r;
9125 
9126 	/* 512 should be plenty to hold the amount needed */
9127 #define DYN_INFO_BUF_SIZE	512
9128 
9129 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
9130 	if (!buf)
9131 		return -ENOMEM;
9132 
9133 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
9134 		      "%ld pages:%ld groups: %ld\n"
9135 		      "ftrace boot update time = %llu (ns)\n"
9136 		      "ftrace module total update time = %llu (ns)\n",
9137 		      ftrace_update_tot_cnt,
9138 		      ftrace_number_of_pages,
9139 		      ftrace_number_of_groups,
9140 		      ftrace_update_time,
9141 		      ftrace_total_mod_time);
9142 
9143 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9144 	kfree(buf);
9145 	return ret;
9146 }
9147 
9148 static const struct file_operations tracing_dyn_info_fops = {
9149 	.open		= tracing_open_generic,
9150 	.read		= tracing_read_dyn_info,
9151 	.llseek		= generic_file_llseek,
9152 };
9153 #endif /* CONFIG_DYNAMIC_FTRACE */
9154 
9155 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
9156 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)9157 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
9158 		struct trace_array *tr, struct ftrace_probe_ops *ops,
9159 		void *data)
9160 {
9161 	tracing_snapshot_instance(tr);
9162 }
9163 
9164 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)9165 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
9166 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
9167 		      void *data)
9168 {
9169 	struct ftrace_func_mapper *mapper = data;
9170 	long *count = NULL;
9171 
9172 	if (mapper)
9173 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
9174 
9175 	if (count) {
9176 
9177 		if (*count <= 0)
9178 			return;
9179 
9180 		(*count)--;
9181 	}
9182 
9183 	tracing_snapshot_instance(tr);
9184 }
9185 
9186 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)9187 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
9188 		      struct ftrace_probe_ops *ops, void *data)
9189 {
9190 	struct ftrace_func_mapper *mapper = data;
9191 	long *count = NULL;
9192 
9193 	seq_printf(m, "%ps:", (void *)ip);
9194 
9195 	seq_puts(m, "snapshot");
9196 
9197 	if (mapper)
9198 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
9199 
9200 	if (count)
9201 		seq_printf(m, ":count=%ld\n", *count);
9202 	else
9203 		seq_puts(m, ":unlimited\n");
9204 
9205 	return 0;
9206 }
9207 
9208 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)9209 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
9210 		     unsigned long ip, void *init_data, void **data)
9211 {
9212 	struct ftrace_func_mapper *mapper = *data;
9213 
9214 	if (!mapper) {
9215 		mapper = allocate_ftrace_func_mapper();
9216 		if (!mapper)
9217 			return -ENOMEM;
9218 		*data = mapper;
9219 	}
9220 
9221 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
9222 }
9223 
9224 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)9225 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
9226 		     unsigned long ip, void *data)
9227 {
9228 	struct ftrace_func_mapper *mapper = data;
9229 
9230 	if (!ip) {
9231 		if (!mapper)
9232 			return;
9233 		free_ftrace_func_mapper(mapper, NULL);
9234 		return;
9235 	}
9236 
9237 	ftrace_func_mapper_remove_ip(mapper, ip);
9238 }
9239 
9240 static struct ftrace_probe_ops snapshot_probe_ops = {
9241 	.func			= ftrace_snapshot,
9242 	.print			= ftrace_snapshot_print,
9243 };
9244 
9245 static struct ftrace_probe_ops snapshot_count_probe_ops = {
9246 	.func			= ftrace_count_snapshot,
9247 	.print			= ftrace_snapshot_print,
9248 	.init			= ftrace_snapshot_init,
9249 	.free			= ftrace_snapshot_free,
9250 };
9251 
9252 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)9253 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
9254 			       char *glob, char *cmd, char *param, int enable)
9255 {
9256 	struct ftrace_probe_ops *ops;
9257 	void *count = (void *)-1;
9258 	char *number;
9259 	int ret;
9260 
9261 	if (!tr)
9262 		return -ENODEV;
9263 
9264 	/* hash funcs only work with set_ftrace_filter */
9265 	if (!enable)
9266 		return -EINVAL;
9267 
9268 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
9269 
9270 	if (glob[0] == '!') {
9271 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
9272 		if (!ret)
9273 			tracing_disarm_snapshot(tr);
9274 
9275 		return ret;
9276 	}
9277 
9278 	if (!param)
9279 		goto out_reg;
9280 
9281 	number = strsep(&param, ":");
9282 
9283 	if (!strlen(number))
9284 		goto out_reg;
9285 
9286 	/*
9287 	 * We use the callback data field (which is a pointer)
9288 	 * as our counter.
9289 	 */
9290 	ret = kstrtoul(number, 0, (unsigned long *)&count);
9291 	if (ret)
9292 		return ret;
9293 
9294  out_reg:
9295 	ret = tracing_arm_snapshot(tr);
9296 	if (ret < 0)
9297 		return ret;
9298 
9299 	ret = register_ftrace_function_probe(glob, tr, ops, count);
9300 	if (ret < 0)
9301 		tracing_disarm_snapshot(tr);
9302 
9303 	return ret < 0 ? ret : 0;
9304 }
9305 
9306 static struct ftrace_func_command ftrace_snapshot_cmd = {
9307 	.name			= "snapshot",
9308 	.func			= ftrace_trace_snapshot_callback,
9309 };
9310 
register_snapshot_cmd(void)9311 static __init int register_snapshot_cmd(void)
9312 {
9313 	return register_ftrace_command(&ftrace_snapshot_cmd);
9314 }
9315 #else
register_snapshot_cmd(void)9316 static inline __init int register_snapshot_cmd(void) { return 0; }
9317 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
9318 
tracing_get_dentry(struct trace_array * tr)9319 static struct dentry *tracing_get_dentry(struct trace_array *tr)
9320 {
9321 	/* Top directory uses NULL as the parent */
9322 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
9323 		return NULL;
9324 
9325 	if (WARN_ON(!tr->dir))
9326 		return ERR_PTR(-ENODEV);
9327 
9328 	/* All sub buffers have a descriptor */
9329 	return tr->dir;
9330 }
9331 
tracing_dentry_percpu(struct trace_array * tr,int cpu)9332 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
9333 {
9334 	struct dentry *d_tracer;
9335 
9336 	if (tr->percpu_dir)
9337 		return tr->percpu_dir;
9338 
9339 	d_tracer = tracing_get_dentry(tr);
9340 	if (IS_ERR(d_tracer))
9341 		return NULL;
9342 
9343 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
9344 
9345 	MEM_FAIL(!tr->percpu_dir,
9346 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
9347 
9348 	return tr->percpu_dir;
9349 }
9350 
9351 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)9352 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
9353 		      void *data, long cpu, const struct file_operations *fops)
9354 {
9355 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
9356 
9357 	if (ret) /* See tracing_get_cpu() */
9358 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
9359 	return ret;
9360 }
9361 
9362 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)9363 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
9364 {
9365 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
9366 	struct dentry *d_cpu;
9367 	char cpu_dir[30]; /* 30 characters should be more than enough */
9368 
9369 	if (!d_percpu)
9370 		return;
9371 
9372 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
9373 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
9374 	if (!d_cpu) {
9375 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
9376 		return;
9377 	}
9378 
9379 	/* per cpu trace_pipe */
9380 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
9381 				tr, cpu, &tracing_pipe_fops);
9382 
9383 	/* per cpu trace */
9384 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
9385 				tr, cpu, &tracing_fops);
9386 
9387 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
9388 				tr, cpu, &tracing_buffers_fops);
9389 
9390 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
9391 				tr, cpu, &tracing_stats_fops);
9392 
9393 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
9394 				tr, cpu, &tracing_entries_fops);
9395 
9396 	if (tr->range_addr_start)
9397 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
9398 				      tr, cpu, &tracing_buffer_meta_fops);
9399 #ifdef CONFIG_TRACER_SNAPSHOT
9400 	if (!tr->range_addr_start) {
9401 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9402 				      tr, cpu, &snapshot_fops);
9403 
9404 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9405 				      tr, cpu, &snapshot_raw_fops);
9406 	}
9407 #endif
9408 }
9409 
9410 #ifdef CONFIG_FTRACE_SELFTEST
9411 /* Let selftest have access to static functions in this file */
9412 #include "trace_selftest.c"
9413 #endif
9414 
9415 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9416 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9417 			loff_t *ppos)
9418 {
9419 	struct trace_option_dentry *topt = filp->private_data;
9420 	char *buf;
9421 
9422 	if (topt->flags->val & topt->opt->bit)
9423 		buf = "1\n";
9424 	else
9425 		buf = "0\n";
9426 
9427 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9428 }
9429 
9430 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9431 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9432 			 loff_t *ppos)
9433 {
9434 	struct trace_option_dentry *topt = filp->private_data;
9435 	unsigned long val;
9436 	int ret;
9437 
9438 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9439 	if (ret)
9440 		return ret;
9441 
9442 	if (val != 0 && val != 1)
9443 		return -EINVAL;
9444 
9445 	if (!!(topt->flags->val & topt->opt->bit) != val) {
9446 		guard(mutex)(&trace_types_lock);
9447 		ret = __set_tracer_option(topt->tr, topt->flags,
9448 					  topt->opt, !val);
9449 		if (ret)
9450 			return ret;
9451 	}
9452 
9453 	*ppos += cnt;
9454 
9455 	return cnt;
9456 }
9457 
tracing_open_options(struct inode * inode,struct file * filp)9458 static int tracing_open_options(struct inode *inode, struct file *filp)
9459 {
9460 	struct trace_option_dentry *topt = inode->i_private;
9461 	int ret;
9462 
9463 	ret = tracing_check_open_get_tr(topt->tr);
9464 	if (ret)
9465 		return ret;
9466 
9467 	filp->private_data = inode->i_private;
9468 	return 0;
9469 }
9470 
tracing_release_options(struct inode * inode,struct file * file)9471 static int tracing_release_options(struct inode *inode, struct file *file)
9472 {
9473 	struct trace_option_dentry *topt = file->private_data;
9474 
9475 	trace_array_put(topt->tr);
9476 	return 0;
9477 }
9478 
9479 static const struct file_operations trace_options_fops = {
9480 	.open = tracing_open_options,
9481 	.read = trace_options_read,
9482 	.write = trace_options_write,
9483 	.llseek	= generic_file_llseek,
9484 	.release = tracing_release_options,
9485 };
9486 
9487 /*
9488  * In order to pass in both the trace_array descriptor as well as the index
9489  * to the flag that the trace option file represents, the trace_array
9490  * has a character array of trace_flags_index[], which holds the index
9491  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9492  * The address of this character array is passed to the flag option file
9493  * read/write callbacks.
9494  *
9495  * In order to extract both the index and the trace_array descriptor,
9496  * get_tr_index() uses the following algorithm.
9497  *
9498  *   idx = *ptr;
9499  *
9500  * As the pointer itself contains the address of the index (remember
9501  * index[1] == 1).
9502  *
9503  * Then to get the trace_array descriptor, by subtracting that index
9504  * from the ptr, we get to the start of the index itself.
9505  *
9506  *   ptr - idx == &index[0]
9507  *
9508  * Then a simple container_of() from that pointer gets us to the
9509  * trace_array descriptor.
9510  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9511 static void get_tr_index(void *data, struct trace_array **ptr,
9512 			 unsigned int *pindex)
9513 {
9514 	*pindex = *(unsigned char *)data;
9515 
9516 	*ptr = container_of(data - *pindex, struct trace_array,
9517 			    trace_flags_index);
9518 }
9519 
9520 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9521 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9522 			loff_t *ppos)
9523 {
9524 	void *tr_index = filp->private_data;
9525 	struct trace_array *tr;
9526 	unsigned int index;
9527 	char *buf;
9528 
9529 	get_tr_index(tr_index, &tr, &index);
9530 
9531 	if (tr->trace_flags & (1ULL << index))
9532 		buf = "1\n";
9533 	else
9534 		buf = "0\n";
9535 
9536 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9537 }
9538 
9539 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9540 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9541 			 loff_t *ppos)
9542 {
9543 	void *tr_index = filp->private_data;
9544 	struct trace_array *tr;
9545 	unsigned int index;
9546 	unsigned long val;
9547 	int ret;
9548 
9549 	get_tr_index(tr_index, &tr, &index);
9550 
9551 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9552 	if (ret)
9553 		return ret;
9554 
9555 	if (val != 0 && val != 1)
9556 		return -EINVAL;
9557 
9558 	mutex_lock(&event_mutex);
9559 	mutex_lock(&trace_types_lock);
9560 	ret = set_tracer_flag(tr, 1ULL << index, val);
9561 	mutex_unlock(&trace_types_lock);
9562 	mutex_unlock(&event_mutex);
9563 
9564 	if (ret < 0)
9565 		return ret;
9566 
9567 	*ppos += cnt;
9568 
9569 	return cnt;
9570 }
9571 
9572 static const struct file_operations trace_options_core_fops = {
9573 	.open = tracing_open_generic,
9574 	.read = trace_options_core_read,
9575 	.write = trace_options_core_write,
9576 	.llseek = generic_file_llseek,
9577 };
9578 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9579 struct dentry *trace_create_file(const char *name,
9580 				 umode_t mode,
9581 				 struct dentry *parent,
9582 				 void *data,
9583 				 const struct file_operations *fops)
9584 {
9585 	struct dentry *ret;
9586 
9587 	ret = tracefs_create_file(name, mode, parent, data, fops);
9588 	if (!ret)
9589 		pr_warn("Could not create tracefs '%s' entry\n", name);
9590 
9591 	return ret;
9592 }
9593 
9594 
trace_options_init_dentry(struct trace_array * tr)9595 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9596 {
9597 	struct dentry *d_tracer;
9598 
9599 	if (tr->options)
9600 		return tr->options;
9601 
9602 	d_tracer = tracing_get_dentry(tr);
9603 	if (IS_ERR(d_tracer))
9604 		return NULL;
9605 
9606 	tr->options = tracefs_create_dir("options", d_tracer);
9607 	if (!tr->options) {
9608 		pr_warn("Could not create tracefs directory 'options'\n");
9609 		return NULL;
9610 	}
9611 
9612 	return tr->options;
9613 }
9614 
9615 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9616 create_trace_option_file(struct trace_array *tr,
9617 			 struct trace_option_dentry *topt,
9618 			 struct tracer_flags *flags,
9619 			 struct tracer_opt *opt)
9620 {
9621 	struct dentry *t_options;
9622 
9623 	t_options = trace_options_init_dentry(tr);
9624 	if (!t_options)
9625 		return;
9626 
9627 	topt->flags = flags;
9628 	topt->opt = opt;
9629 	topt->tr = tr;
9630 
9631 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9632 					t_options, topt, &trace_options_fops);
9633 }
9634 
9635 static int
create_trace_option_files(struct trace_array * tr,struct tracer * tracer,struct tracer_flags * flags)9636 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
9637 			  struct tracer_flags *flags)
9638 {
9639 	struct trace_option_dentry *topts;
9640 	struct trace_options *tr_topts;
9641 	struct tracer_opt *opts;
9642 	int cnt;
9643 
9644 	if (!flags || !flags->opts)
9645 		return 0;
9646 
9647 	opts = flags->opts;
9648 
9649 	for (cnt = 0; opts[cnt].name; cnt++)
9650 		;
9651 
9652 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9653 	if (!topts)
9654 		return 0;
9655 
9656 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9657 			    GFP_KERNEL);
9658 	if (!tr_topts) {
9659 		kfree(topts);
9660 		return -ENOMEM;
9661 	}
9662 
9663 	tr->topts = tr_topts;
9664 	tr->topts[tr->nr_topts].tracer = tracer;
9665 	tr->topts[tr->nr_topts].topts = topts;
9666 	tr->nr_topts++;
9667 
9668 	for (cnt = 0; opts[cnt].name; cnt++) {
9669 		create_trace_option_file(tr, &topts[cnt], flags,
9670 					 &opts[cnt]);
9671 		MEM_FAIL(topts[cnt].entry == NULL,
9672 			  "Failed to create trace option: %s",
9673 			  opts[cnt].name);
9674 	}
9675 	return 0;
9676 }
9677 
get_global_flags_val(struct tracer * tracer)9678 static int get_global_flags_val(struct tracer *tracer)
9679 {
9680 	struct tracers *t;
9681 
9682 	list_for_each_entry(t, &global_trace.tracers, list) {
9683 		if (t->tracer != tracer)
9684 			continue;
9685 		if (!t->flags)
9686 			return -1;
9687 		return t->flags->val;
9688 	}
9689 	return -1;
9690 }
9691 
add_tracer_options(struct trace_array * tr,struct tracers * t)9692 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
9693 {
9694 	struct tracer *tracer = t->tracer;
9695 	struct tracer_flags *flags = t->flags ?: tracer->flags;
9696 
9697 	if (!flags)
9698 		return 0;
9699 
9700 	/* Only add tracer options after update_tracer_options finish */
9701 	if (!tracer_options_updated)
9702 		return 0;
9703 
9704 	return create_trace_option_files(tr, tracer, flags);
9705 }
9706 
add_tracer(struct trace_array * tr,struct tracer * tracer)9707 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
9708 {
9709 	struct tracer_flags *flags;
9710 	struct tracers *t;
9711 	int ret;
9712 
9713 	/* Only enable if the directory has been created already. */
9714 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
9715 		return 0;
9716 
9717 	/*
9718 	 * If this is an instance, only create flags for tracers
9719 	 * the instance may have.
9720 	 */
9721 	if (!trace_ok_for_array(tracer, tr))
9722 		return 0;
9723 
9724 	t = kmalloc(sizeof(*t), GFP_KERNEL);
9725 	if (!t)
9726 		return -ENOMEM;
9727 
9728 	t->tracer = tracer;
9729 	t->flags = NULL;
9730 	list_add(&t->list, &tr->tracers);
9731 
9732 	flags = tracer->flags;
9733 	if (!flags) {
9734 		if (!tracer->default_flags)
9735 			return 0;
9736 
9737 		/*
9738 		 * If the tracer defines default flags, it means the flags are
9739 		 * per trace instance.
9740 		 */
9741 		flags = kmalloc(sizeof(*flags), GFP_KERNEL);
9742 		if (!flags)
9743 			return -ENOMEM;
9744 
9745 		*flags = *tracer->default_flags;
9746 		flags->trace = tracer;
9747 
9748 		t->flags = flags;
9749 
9750 		/* If this is an instance, inherit the global_trace flags */
9751 		if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
9752 			int val = get_global_flags_val(tracer);
9753 			if (!WARN_ON_ONCE(val < 0))
9754 				flags->val = val;
9755 		}
9756 	}
9757 
9758 	ret = add_tracer_options(tr, t);
9759 	if (ret < 0) {
9760 		list_del(&t->list);
9761 		kfree(t->flags);
9762 		kfree(t);
9763 	}
9764 
9765 	return ret;
9766 }
9767 
9768 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9769 create_trace_option_core_file(struct trace_array *tr,
9770 			      const char *option, long index)
9771 {
9772 	struct dentry *t_options;
9773 
9774 	t_options = trace_options_init_dentry(tr);
9775 	if (!t_options)
9776 		return NULL;
9777 
9778 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9779 				 (void *)&tr->trace_flags_index[index],
9780 				 &trace_options_core_fops);
9781 }
9782 
create_trace_options_dir(struct trace_array * tr)9783 static void create_trace_options_dir(struct trace_array *tr)
9784 {
9785 	struct dentry *t_options;
9786 	bool top_level = tr == &global_trace;
9787 	int i;
9788 
9789 	t_options = trace_options_init_dentry(tr);
9790 	if (!t_options)
9791 		return;
9792 
9793 	for (i = 0; trace_options[i]; i++) {
9794 		if (top_level ||
9795 		    !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
9796 			create_trace_option_core_file(tr, trace_options[i], i);
9797 		}
9798 	}
9799 }
9800 
9801 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9802 rb_simple_read(struct file *filp, char __user *ubuf,
9803 	       size_t cnt, loff_t *ppos)
9804 {
9805 	struct trace_array *tr = filp->private_data;
9806 	char buf[64];
9807 	int r;
9808 
9809 	r = tracer_tracing_is_on(tr);
9810 	r = sprintf(buf, "%d\n", r);
9811 
9812 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9813 }
9814 
9815 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9816 rb_simple_write(struct file *filp, const char __user *ubuf,
9817 		size_t cnt, loff_t *ppos)
9818 {
9819 	struct trace_array *tr = filp->private_data;
9820 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9821 	unsigned long val;
9822 	int ret;
9823 
9824 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9825 	if (ret)
9826 		return ret;
9827 
9828 	if (buffer) {
9829 		guard(mutex)(&trace_types_lock);
9830 		if (!!val == tracer_tracing_is_on(tr)) {
9831 			val = 0; /* do nothing */
9832 		} else if (val) {
9833 			tracer_tracing_on(tr);
9834 			if (tr->current_trace->start)
9835 				tr->current_trace->start(tr);
9836 		} else {
9837 			tracer_tracing_off(tr);
9838 			if (tr->current_trace->stop)
9839 				tr->current_trace->stop(tr);
9840 			/* Wake up any waiters */
9841 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9842 		}
9843 	}
9844 
9845 	(*ppos)++;
9846 
9847 	return cnt;
9848 }
9849 
9850 static const struct file_operations rb_simple_fops = {
9851 	.open		= tracing_open_generic_tr,
9852 	.read		= rb_simple_read,
9853 	.write		= rb_simple_write,
9854 	.release	= tracing_release_generic_tr,
9855 	.llseek		= default_llseek,
9856 };
9857 
9858 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9859 buffer_percent_read(struct file *filp, char __user *ubuf,
9860 		    size_t cnt, loff_t *ppos)
9861 {
9862 	struct trace_array *tr = filp->private_data;
9863 	char buf[64];
9864 	int r;
9865 
9866 	r = tr->buffer_percent;
9867 	r = sprintf(buf, "%d\n", r);
9868 
9869 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9870 }
9871 
9872 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9873 buffer_percent_write(struct file *filp, const char __user *ubuf,
9874 		     size_t cnt, loff_t *ppos)
9875 {
9876 	struct trace_array *tr = filp->private_data;
9877 	unsigned long val;
9878 	int ret;
9879 
9880 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9881 	if (ret)
9882 		return ret;
9883 
9884 	if (val > 100)
9885 		return -EINVAL;
9886 
9887 	tr->buffer_percent = val;
9888 
9889 	(*ppos)++;
9890 
9891 	return cnt;
9892 }
9893 
9894 static const struct file_operations buffer_percent_fops = {
9895 	.open		= tracing_open_generic_tr,
9896 	.read		= buffer_percent_read,
9897 	.write		= buffer_percent_write,
9898 	.release	= tracing_release_generic_tr,
9899 	.llseek		= default_llseek,
9900 };
9901 
9902 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9903 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9904 {
9905 	struct trace_array *tr = filp->private_data;
9906 	size_t size;
9907 	char buf[64];
9908 	int order;
9909 	int r;
9910 
9911 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9912 	size = (PAGE_SIZE << order) / 1024;
9913 
9914 	r = sprintf(buf, "%zd\n", size);
9915 
9916 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9917 }
9918 
9919 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9920 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9921 			 size_t cnt, loff_t *ppos)
9922 {
9923 	struct trace_array *tr = filp->private_data;
9924 	unsigned long val;
9925 	int old_order;
9926 	int order;
9927 	int pages;
9928 	int ret;
9929 
9930 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9931 	if (ret)
9932 		return ret;
9933 
9934 	val *= 1024; /* value passed in is in KB */
9935 
9936 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9937 	order = fls(pages - 1);
9938 
9939 	/* limit between 1 and 128 system pages */
9940 	if (order < 0 || order > 7)
9941 		return -EINVAL;
9942 
9943 	/* Do not allow tracing while changing the order of the ring buffer */
9944 	tracing_stop_tr(tr);
9945 
9946 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9947 	if (old_order == order)
9948 		goto out;
9949 
9950 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9951 	if (ret)
9952 		goto out;
9953 
9954 #ifdef CONFIG_TRACER_MAX_TRACE
9955 
9956 	if (!tr->allocated_snapshot)
9957 		goto out_max;
9958 
9959 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9960 	if (ret) {
9961 		/* Put back the old order */
9962 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9963 		if (WARN_ON_ONCE(cnt)) {
9964 			/*
9965 			 * AARGH! We are left with different orders!
9966 			 * The max buffer is our "snapshot" buffer.
9967 			 * When a tracer needs a snapshot (one of the
9968 			 * latency tracers), it swaps the max buffer
9969 			 * with the saved snap shot. We succeeded to
9970 			 * update the order of the main buffer, but failed to
9971 			 * update the order of the max buffer. But when we tried
9972 			 * to reset the main buffer to the original size, we
9973 			 * failed there too. This is very unlikely to
9974 			 * happen, but if it does, warn and kill all
9975 			 * tracing.
9976 			 */
9977 			tracing_disabled = 1;
9978 		}
9979 		goto out;
9980 	}
9981  out_max:
9982 #endif
9983 	(*ppos)++;
9984  out:
9985 	if (ret)
9986 		cnt = ret;
9987 	tracing_start_tr(tr);
9988 	return cnt;
9989 }
9990 
9991 static const struct file_operations buffer_subbuf_size_fops = {
9992 	.open		= tracing_open_generic_tr,
9993 	.read		= buffer_subbuf_size_read,
9994 	.write		= buffer_subbuf_size_write,
9995 	.release	= tracing_release_generic_tr,
9996 	.llseek		= default_llseek,
9997 };
9998 
9999 static struct dentry *trace_instance_dir;
10000 
10001 static void
10002 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
10003 
10004 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)10005 static int make_mod_delta(struct module *mod, void *data)
10006 {
10007 	struct trace_module_delta *module_delta;
10008 	struct trace_scratch *tscratch;
10009 	struct trace_mod_entry *entry;
10010 	struct trace_array *tr = data;
10011 	int i;
10012 
10013 	tscratch = tr->scratch;
10014 	module_delta = READ_ONCE(tr->module_delta);
10015 	for (i = 0; i < tscratch->nr_entries; i++) {
10016 		entry = &tscratch->entries[i];
10017 		if (strcmp(mod->name, entry->mod_name))
10018 			continue;
10019 		if (mod->state == MODULE_STATE_GOING)
10020 			module_delta->delta[i] = 0;
10021 		else
10022 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
10023 						 - entry->mod_addr;
10024 		break;
10025 	}
10026 	return 0;
10027 }
10028 #else
make_mod_delta(struct module * mod,void * data)10029 static int make_mod_delta(struct module *mod, void *data)
10030 {
10031 	return 0;
10032 }
10033 #endif
10034 
mod_addr_comp(const void * a,const void * b,const void * data)10035 static int mod_addr_comp(const void *a, const void *b, const void *data)
10036 {
10037 	const struct trace_mod_entry *e1 = a;
10038 	const struct trace_mod_entry *e2 = b;
10039 
10040 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
10041 }
10042 
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)10043 static void setup_trace_scratch(struct trace_array *tr,
10044 				struct trace_scratch *tscratch, unsigned int size)
10045 {
10046 	struct trace_module_delta *module_delta;
10047 	struct trace_mod_entry *entry;
10048 	int i, nr_entries;
10049 
10050 	if (!tscratch)
10051 		return;
10052 
10053 	tr->scratch = tscratch;
10054 	tr->scratch_size = size;
10055 
10056 	if (tscratch->text_addr)
10057 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
10058 
10059 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
10060 		goto reset;
10061 
10062 	/* Check if each module name is a valid string */
10063 	for (i = 0; i < tscratch->nr_entries; i++) {
10064 		int n;
10065 
10066 		entry = &tscratch->entries[i];
10067 
10068 		for (n = 0; n < MODULE_NAME_LEN; n++) {
10069 			if (entry->mod_name[n] == '\0')
10070 				break;
10071 			if (!isprint(entry->mod_name[n]))
10072 				goto reset;
10073 		}
10074 		if (n == MODULE_NAME_LEN)
10075 			goto reset;
10076 	}
10077 
10078 	/* Sort the entries so that we can find appropriate module from address. */
10079 	nr_entries = tscratch->nr_entries;
10080 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
10081 	       mod_addr_comp, NULL, NULL);
10082 
10083 	if (IS_ENABLED(CONFIG_MODULES)) {
10084 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
10085 		if (!module_delta) {
10086 			pr_info("module_delta allocation failed. Not able to decode module address.");
10087 			goto reset;
10088 		}
10089 		init_rcu_head(&module_delta->rcu);
10090 	} else
10091 		module_delta = NULL;
10092 	WRITE_ONCE(tr->module_delta, module_delta);
10093 
10094 	/* Scan modules to make text delta for modules. */
10095 	module_for_each_mod(make_mod_delta, tr);
10096 
10097 	/* Set trace_clock as the same of the previous boot. */
10098 	if (tscratch->clock_id != tr->clock_id) {
10099 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
10100 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
10101 			pr_info("the previous trace_clock info is not valid.");
10102 			goto reset;
10103 		}
10104 	}
10105 	return;
10106  reset:
10107 	/* Invalid trace modules */
10108 	memset(tscratch, 0, size);
10109 }
10110 
10111 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)10112 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
10113 {
10114 	enum ring_buffer_flags rb_flags;
10115 	struct trace_scratch *tscratch;
10116 	unsigned int scratch_size = 0;
10117 
10118 	rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
10119 
10120 	buf->tr = tr;
10121 
10122 	if (tr->range_addr_start && tr->range_addr_size) {
10123 		/* Add scratch buffer to handle 128 modules */
10124 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
10125 						      tr->range_addr_start,
10126 						      tr->range_addr_size,
10127 						      struct_size(tscratch, entries, 128));
10128 
10129 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
10130 		setup_trace_scratch(tr, tscratch, scratch_size);
10131 
10132 		/*
10133 		 * This is basically the same as a mapped buffer,
10134 		 * with the same restrictions.
10135 		 */
10136 		tr->mapped++;
10137 	} else {
10138 		buf->buffer = ring_buffer_alloc(size, rb_flags);
10139 	}
10140 	if (!buf->buffer)
10141 		return -ENOMEM;
10142 
10143 	buf->data = alloc_percpu(struct trace_array_cpu);
10144 	if (!buf->data) {
10145 		ring_buffer_free(buf->buffer);
10146 		buf->buffer = NULL;
10147 		return -ENOMEM;
10148 	}
10149 
10150 	/* Allocate the first page for all buffers */
10151 	set_buffer_entries(&tr->array_buffer,
10152 			   ring_buffer_size(tr->array_buffer.buffer, 0));
10153 
10154 	return 0;
10155 }
10156 
free_trace_buffer(struct array_buffer * buf)10157 static void free_trace_buffer(struct array_buffer *buf)
10158 {
10159 	if (buf->buffer) {
10160 		ring_buffer_free(buf->buffer);
10161 		buf->buffer = NULL;
10162 		free_percpu(buf->data);
10163 		buf->data = NULL;
10164 	}
10165 }
10166 
allocate_trace_buffers(struct trace_array * tr,int size)10167 static int allocate_trace_buffers(struct trace_array *tr, int size)
10168 {
10169 	int ret;
10170 
10171 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
10172 	if (ret)
10173 		return ret;
10174 
10175 #ifdef CONFIG_TRACER_MAX_TRACE
10176 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
10177 	if (tr->range_addr_start)
10178 		return 0;
10179 
10180 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
10181 				    allocate_snapshot ? size : 1);
10182 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
10183 		free_trace_buffer(&tr->array_buffer);
10184 		return -ENOMEM;
10185 	}
10186 	tr->allocated_snapshot = allocate_snapshot;
10187 
10188 	allocate_snapshot = false;
10189 #endif
10190 
10191 	return 0;
10192 }
10193 
free_trace_buffers(struct trace_array * tr)10194 static void free_trace_buffers(struct trace_array *tr)
10195 {
10196 	if (!tr)
10197 		return;
10198 
10199 	free_trace_buffer(&tr->array_buffer);
10200 	kfree(tr->module_delta);
10201 
10202 #ifdef CONFIG_TRACER_MAX_TRACE
10203 	free_trace_buffer(&tr->max_buffer);
10204 #endif
10205 }
10206 
init_trace_flags_index(struct trace_array * tr)10207 static void init_trace_flags_index(struct trace_array *tr)
10208 {
10209 	int i;
10210 
10211 	/* Used by the trace options files */
10212 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
10213 		tr->trace_flags_index[i] = i;
10214 }
10215 
__update_tracer(struct trace_array * tr)10216 static int __update_tracer(struct trace_array *tr)
10217 {
10218 	struct tracer *t;
10219 	int ret = 0;
10220 
10221 	for (t = trace_types; t && !ret; t = t->next)
10222 		ret = add_tracer(tr, t);
10223 
10224 	return ret;
10225 }
10226 
__update_tracer_options(struct trace_array * tr)10227 static __init int __update_tracer_options(struct trace_array *tr)
10228 {
10229 	struct tracers *t;
10230 	int ret = 0;
10231 
10232 	list_for_each_entry(t, &tr->tracers, list) {
10233 		ret = add_tracer_options(tr, t);
10234 		if (ret < 0)
10235 			break;
10236 	}
10237 
10238 	return ret;
10239 }
10240 
update_tracer_options(void)10241 static __init void update_tracer_options(void)
10242 {
10243 	struct trace_array *tr;
10244 
10245 	guard(mutex)(&trace_types_lock);
10246 	tracer_options_updated = true;
10247 	list_for_each_entry(tr, &ftrace_trace_arrays, list)
10248 		__update_tracer_options(tr);
10249 }
10250 
10251 /* Must have trace_types_lock held */
trace_array_find(const char * instance)10252 struct trace_array *trace_array_find(const char *instance)
10253 {
10254 	struct trace_array *tr, *found = NULL;
10255 
10256 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10257 		if (tr->name && strcmp(tr->name, instance) == 0) {
10258 			found = tr;
10259 			break;
10260 		}
10261 	}
10262 
10263 	return found;
10264 }
10265 
trace_array_find_get(const char * instance)10266 struct trace_array *trace_array_find_get(const char *instance)
10267 {
10268 	struct trace_array *tr;
10269 
10270 	guard(mutex)(&trace_types_lock);
10271 	tr = trace_array_find(instance);
10272 	if (tr)
10273 		tr->ref++;
10274 
10275 	return tr;
10276 }
10277 
trace_array_create_dir(struct trace_array * tr)10278 static int trace_array_create_dir(struct trace_array *tr)
10279 {
10280 	int ret;
10281 
10282 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
10283 	if (!tr->dir)
10284 		return -EINVAL;
10285 
10286 	ret = event_trace_add_tracer(tr->dir, tr);
10287 	if (ret) {
10288 		tracefs_remove(tr->dir);
10289 		return ret;
10290 	}
10291 
10292 	init_tracer_tracefs(tr, tr->dir);
10293 	ret = __update_tracer(tr);
10294 	if (ret) {
10295 		event_trace_del_tracer(tr);
10296 		tracefs_remove(tr->dir);
10297 		return ret;
10298 	}
10299 	return 0;
10300 }
10301 
10302 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)10303 trace_array_create_systems(const char *name, const char *systems,
10304 			   unsigned long range_addr_start,
10305 			   unsigned long range_addr_size)
10306 {
10307 	struct trace_array *tr;
10308 	int ret;
10309 
10310 	ret = -ENOMEM;
10311 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
10312 	if (!tr)
10313 		return ERR_PTR(ret);
10314 
10315 	tr->name = kstrdup(name, GFP_KERNEL);
10316 	if (!tr->name)
10317 		goto out_free_tr;
10318 
10319 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
10320 		goto out_free_tr;
10321 
10322 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
10323 		goto out_free_tr;
10324 
10325 	if (systems) {
10326 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
10327 		if (!tr->system_names)
10328 			goto out_free_tr;
10329 	}
10330 
10331 	/* Only for boot up memory mapped ring buffers */
10332 	tr->range_addr_start = range_addr_start;
10333 	tr->range_addr_size = range_addr_size;
10334 
10335 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
10336 
10337 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
10338 
10339 	raw_spin_lock_init(&tr->start_lock);
10340 
10341 	tr->syscall_buf_sz = global_trace.syscall_buf_sz;
10342 
10343 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10344 #ifdef CONFIG_TRACER_MAX_TRACE
10345 	spin_lock_init(&tr->snapshot_trigger_lock);
10346 #endif
10347 	tr->current_trace = &nop_trace;
10348 	tr->current_trace_flags = nop_trace.flags;
10349 
10350 	INIT_LIST_HEAD(&tr->systems);
10351 	INIT_LIST_HEAD(&tr->events);
10352 	INIT_LIST_HEAD(&tr->hist_vars);
10353 	INIT_LIST_HEAD(&tr->err_log);
10354 	INIT_LIST_HEAD(&tr->tracers);
10355 	INIT_LIST_HEAD(&tr->marker_list);
10356 
10357 #ifdef CONFIG_MODULES
10358 	INIT_LIST_HEAD(&tr->mod_events);
10359 #endif
10360 
10361 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
10362 		goto out_free_tr;
10363 
10364 	/* The ring buffer is defaultly expanded */
10365 	trace_set_ring_buffer_expanded(tr);
10366 
10367 	if (ftrace_allocate_ftrace_ops(tr) < 0)
10368 		goto out_free_tr;
10369 
10370 	ftrace_init_trace_array(tr);
10371 
10372 	init_trace_flags_index(tr);
10373 
10374 	if (trace_instance_dir) {
10375 		ret = trace_array_create_dir(tr);
10376 		if (ret)
10377 			goto out_free_tr;
10378 	} else
10379 		__trace_early_add_events(tr);
10380 
10381 	list_add(&tr->list, &ftrace_trace_arrays);
10382 
10383 	tr->ref++;
10384 
10385 	return tr;
10386 
10387  out_free_tr:
10388 	ftrace_free_ftrace_ops(tr);
10389 	free_trace_buffers(tr);
10390 	free_cpumask_var(tr->pipe_cpumask);
10391 	free_cpumask_var(tr->tracing_cpumask);
10392 	kfree_const(tr->system_names);
10393 	kfree(tr->range_name);
10394 	kfree(tr->name);
10395 	kfree(tr);
10396 
10397 	return ERR_PTR(ret);
10398 }
10399 
trace_array_create(const char * name)10400 static struct trace_array *trace_array_create(const char *name)
10401 {
10402 	return trace_array_create_systems(name, NULL, 0, 0);
10403 }
10404 
instance_mkdir(const char * name)10405 static int instance_mkdir(const char *name)
10406 {
10407 	struct trace_array *tr;
10408 	int ret;
10409 
10410 	guard(mutex)(&event_mutex);
10411 	guard(mutex)(&trace_types_lock);
10412 
10413 	ret = -EEXIST;
10414 	if (trace_array_find(name))
10415 		return -EEXIST;
10416 
10417 	tr = trace_array_create(name);
10418 
10419 	ret = PTR_ERR_OR_ZERO(tr);
10420 
10421 	return ret;
10422 }
10423 
10424 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)10425 static u64 map_pages(unsigned long start, unsigned long size)
10426 {
10427 	unsigned long vmap_start, vmap_end;
10428 	struct vm_struct *area;
10429 	int ret;
10430 
10431 	area = get_vm_area(size, VM_IOREMAP);
10432 	if (!area)
10433 		return 0;
10434 
10435 	vmap_start = (unsigned long) area->addr;
10436 	vmap_end = vmap_start + size;
10437 
10438 	ret = vmap_page_range(vmap_start, vmap_end,
10439 			      start, pgprot_nx(PAGE_KERNEL));
10440 	if (ret < 0) {
10441 		free_vm_area(area);
10442 		return 0;
10443 	}
10444 
10445 	return (u64)vmap_start;
10446 }
10447 #else
map_pages(unsigned long start,unsigned long size)10448 static inline u64 map_pages(unsigned long start, unsigned long size)
10449 {
10450 	return 0;
10451 }
10452 #endif
10453 
10454 /**
10455  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
10456  * @name: The name of the trace array to be looked up/created.
10457  * @systems: A list of systems to create event directories for (NULL for all)
10458  *
10459  * Returns pointer to trace array with given name.
10460  * NULL, if it cannot be created.
10461  *
10462  * NOTE: This function increments the reference counter associated with the
10463  * trace array returned. This makes sure it cannot be freed while in use.
10464  * Use trace_array_put() once the trace array is no longer needed.
10465  * If the trace_array is to be freed, trace_array_destroy() needs to
10466  * be called after the trace_array_put(), or simply let user space delete
10467  * it from the tracefs instances directory. But until the
10468  * trace_array_put() is called, user space can not delete it.
10469  *
10470  */
trace_array_get_by_name(const char * name,const char * systems)10471 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
10472 {
10473 	struct trace_array *tr;
10474 
10475 	guard(mutex)(&event_mutex);
10476 	guard(mutex)(&trace_types_lock);
10477 
10478 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10479 		if (tr->name && strcmp(tr->name, name) == 0) {
10480 			tr->ref++;
10481 			return tr;
10482 		}
10483 	}
10484 
10485 	tr = trace_array_create_systems(name, systems, 0, 0);
10486 
10487 	if (IS_ERR(tr))
10488 		tr = NULL;
10489 	else
10490 		tr->ref++;
10491 
10492 	return tr;
10493 }
10494 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
10495 
__remove_instance(struct trace_array * tr)10496 static int __remove_instance(struct trace_array *tr)
10497 {
10498 	int i;
10499 
10500 	/* Reference counter for a newly created trace array = 1. */
10501 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10502 		return -EBUSY;
10503 
10504 	list_del(&tr->list);
10505 
10506 	/* Disable all the flags that were enabled coming in */
10507 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10508 		if ((1 << i) & ZEROED_TRACE_FLAGS)
10509 			set_tracer_flag(tr, 1ULL << i, 0);
10510 	}
10511 
10512 	if (printk_trace == tr)
10513 		update_printk_trace(&global_trace);
10514 
10515 	if (update_marker_trace(tr, 0))
10516 		synchronize_rcu();
10517 
10518 	tracing_set_nop(tr);
10519 	clear_ftrace_function_probes(tr);
10520 	event_trace_del_tracer(tr);
10521 	ftrace_clear_pids(tr);
10522 	ftrace_destroy_function_files(tr);
10523 	tracefs_remove(tr->dir);
10524 	free_percpu(tr->last_func_repeats);
10525 	free_trace_buffers(tr);
10526 	clear_tracing_err_log(tr);
10527 	free_tracers(tr);
10528 
10529 	if (tr->range_name) {
10530 		reserve_mem_release_by_name(tr->range_name);
10531 		kfree(tr->range_name);
10532 	}
10533 	if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
10534 		vfree((void *)tr->range_addr_start);
10535 
10536 	for (i = 0; i < tr->nr_topts; i++) {
10537 		kfree(tr->topts[i].topts);
10538 	}
10539 	kfree(tr->topts);
10540 
10541 	free_cpumask_var(tr->pipe_cpumask);
10542 	free_cpumask_var(tr->tracing_cpumask);
10543 	kfree_const(tr->system_names);
10544 	kfree(tr->name);
10545 	kfree(tr);
10546 
10547 	return 0;
10548 }
10549 
trace_array_destroy(struct trace_array * this_tr)10550 int trace_array_destroy(struct trace_array *this_tr)
10551 {
10552 	struct trace_array *tr;
10553 
10554 	if (!this_tr)
10555 		return -EINVAL;
10556 
10557 	guard(mutex)(&event_mutex);
10558 	guard(mutex)(&trace_types_lock);
10559 
10560 
10561 	/* Making sure trace array exists before destroying it. */
10562 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10563 		if (tr == this_tr)
10564 			return __remove_instance(tr);
10565 	}
10566 
10567 	return -ENODEV;
10568 }
10569 EXPORT_SYMBOL_GPL(trace_array_destroy);
10570 
instance_rmdir(const char * name)10571 static int instance_rmdir(const char *name)
10572 {
10573 	struct trace_array *tr;
10574 
10575 	guard(mutex)(&event_mutex);
10576 	guard(mutex)(&trace_types_lock);
10577 
10578 	tr = trace_array_find(name);
10579 	if (!tr)
10580 		return -ENODEV;
10581 
10582 	return __remove_instance(tr);
10583 }
10584 
create_trace_instances(struct dentry * d_tracer)10585 static __init void create_trace_instances(struct dentry *d_tracer)
10586 {
10587 	struct trace_array *tr;
10588 
10589 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10590 							 instance_mkdir,
10591 							 instance_rmdir);
10592 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10593 		return;
10594 
10595 	guard(mutex)(&event_mutex);
10596 	guard(mutex)(&trace_types_lock);
10597 
10598 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10599 		if (!tr->name)
10600 			continue;
10601 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10602 			     "Failed to create instance directory\n"))
10603 			return;
10604 	}
10605 }
10606 
10607 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)10608 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10609 {
10610 	int cpu;
10611 
10612 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10613 			tr, &show_traces_fops);
10614 
10615 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10616 			tr, &set_tracer_fops);
10617 
10618 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10619 			  tr, &tracing_cpumask_fops);
10620 
10621 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10622 			  tr, &tracing_iter_fops);
10623 
10624 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10625 			  tr, &tracing_fops);
10626 
10627 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10628 			  tr, &tracing_pipe_fops);
10629 
10630 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10631 			  tr, &tracing_entries_fops);
10632 
10633 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10634 			  tr, &tracing_total_entries_fops);
10635 
10636 	trace_create_file("free_buffer", 0200, d_tracer,
10637 			  tr, &tracing_free_buffer_fops);
10638 
10639 	trace_create_file("trace_marker", 0220, d_tracer,
10640 			  tr, &tracing_mark_fops);
10641 
10642 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10643 
10644 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10645 			  tr, &tracing_mark_raw_fops);
10646 
10647 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10648 			  &trace_clock_fops);
10649 
10650 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10651 			  tr, &rb_simple_fops);
10652 
10653 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10654 			  &trace_time_stamp_mode_fops);
10655 
10656 	tr->buffer_percent = 50;
10657 
10658 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10659 			tr, &buffer_percent_fops);
10660 
10661 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10662 			  tr, &buffer_subbuf_size_fops);
10663 
10664 	trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
10665 			 tr, &tracing_syscall_buf_fops);
10666 
10667 	create_trace_options_dir(tr);
10668 
10669 #ifdef CONFIG_TRACER_MAX_TRACE
10670 	trace_create_maxlat_file(tr, d_tracer);
10671 #endif
10672 
10673 	if (ftrace_create_function_files(tr, d_tracer))
10674 		MEM_FAIL(1, "Could not allocate function filter files");
10675 
10676 	if (tr->range_addr_start) {
10677 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10678 				  tr, &last_boot_fops);
10679 #ifdef CONFIG_TRACER_SNAPSHOT
10680 	} else {
10681 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10682 				  tr, &snapshot_fops);
10683 #endif
10684 	}
10685 
10686 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10687 			  tr, &tracing_err_log_fops);
10688 
10689 	for_each_tracing_cpu(cpu)
10690 		tracing_init_tracefs_percpu(tr, cpu);
10691 
10692 	ftrace_init_tracefs(tr, d_tracer);
10693 }
10694 
10695 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10696 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10697 {
10698 	struct vfsmount *mnt;
10699 	struct file_system_type *type;
10700 	struct fs_context *fc;
10701 	int ret;
10702 
10703 	/*
10704 	 * To maintain backward compatibility for tools that mount
10705 	 * debugfs to get to the tracing facility, tracefs is automatically
10706 	 * mounted to the debugfs/tracing directory.
10707 	 */
10708 	type = get_fs_type("tracefs");
10709 	if (!type)
10710 		return NULL;
10711 
10712 	fc = fs_context_for_submount(type, mntpt);
10713 	put_filesystem(type);
10714 	if (IS_ERR(fc))
10715 		return ERR_CAST(fc);
10716 
10717 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10718 
10719 	ret = vfs_parse_fs_string(fc, "source", "tracefs");
10720 	if (!ret)
10721 		mnt = fc_mount(fc);
10722 	else
10723 		mnt = ERR_PTR(ret);
10724 
10725 	put_fs_context(fc);
10726 	return mnt;
10727 }
10728 #endif
10729 
10730 /**
10731  * tracing_init_dentry - initialize top level trace array
10732  *
10733  * This is called when creating files or directories in the tracing
10734  * directory. It is called via fs_initcall() by any of the boot up code
10735  * and expects to return the dentry of the top level tracing directory.
10736  */
tracing_init_dentry(void)10737 int tracing_init_dentry(void)
10738 {
10739 	struct trace_array *tr = &global_trace;
10740 
10741 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10742 		pr_warn("Tracing disabled due to lockdown\n");
10743 		return -EPERM;
10744 	}
10745 
10746 	/* The top level trace array uses  NULL as parent */
10747 	if (tr->dir)
10748 		return 0;
10749 
10750 	if (WARN_ON(!tracefs_initialized()))
10751 		return -ENODEV;
10752 
10753 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10754 	/*
10755 	 * As there may still be users that expect the tracing
10756 	 * files to exist in debugfs/tracing, we must automount
10757 	 * the tracefs file system there, so older tools still
10758 	 * work with the newer kernel.
10759 	 */
10760 	tr->dir = debugfs_create_automount("tracing", NULL,
10761 					   trace_automount, NULL);
10762 #endif
10763 
10764 	return 0;
10765 }
10766 
10767 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10768 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10769 
10770 static struct workqueue_struct *eval_map_wq __initdata;
10771 static struct work_struct eval_map_work __initdata;
10772 static struct work_struct tracerfs_init_work __initdata;
10773 
eval_map_work_func(struct work_struct * work)10774 static void __init eval_map_work_func(struct work_struct *work)
10775 {
10776 	int len;
10777 
10778 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10779 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10780 }
10781 
trace_eval_init(void)10782 static int __init trace_eval_init(void)
10783 {
10784 	INIT_WORK(&eval_map_work, eval_map_work_func);
10785 
10786 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10787 	if (!eval_map_wq) {
10788 		pr_err("Unable to allocate eval_map_wq\n");
10789 		/* Do work here */
10790 		eval_map_work_func(&eval_map_work);
10791 		return -ENOMEM;
10792 	}
10793 
10794 	queue_work(eval_map_wq, &eval_map_work);
10795 	return 0;
10796 }
10797 
10798 subsys_initcall(trace_eval_init);
10799 
trace_eval_sync(void)10800 static int __init trace_eval_sync(void)
10801 {
10802 	/* Make sure the eval map updates are finished */
10803 	if (eval_map_wq)
10804 		destroy_workqueue(eval_map_wq);
10805 	return 0;
10806 }
10807 
10808 late_initcall_sync(trace_eval_sync);
10809 
10810 
10811 #ifdef CONFIG_MODULES
10812 
module_exists(const char * module)10813 bool module_exists(const char *module)
10814 {
10815 	/* All modules have the symbol __this_module */
10816 	static const char this_mod[] = "__this_module";
10817 	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10818 	unsigned long val;
10819 	int n;
10820 
10821 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10822 
10823 	if (n > sizeof(modname) - 1)
10824 		return false;
10825 
10826 	val = module_kallsyms_lookup_name(modname);
10827 	return val != 0;
10828 }
10829 
trace_module_add_evals(struct module * mod)10830 static void trace_module_add_evals(struct module *mod)
10831 {
10832 	/*
10833 	 * Modules with bad taint do not have events created, do
10834 	 * not bother with enums either.
10835 	 */
10836 	if (trace_module_has_bad_taint(mod))
10837 		return;
10838 
10839 	/* Even if no trace_evals, this need to sanitize field types. */
10840 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10841 }
10842 
10843 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10844 static void trace_module_remove_evals(struct module *mod)
10845 {
10846 	union trace_eval_map_item *map;
10847 	union trace_eval_map_item **last = &trace_eval_maps;
10848 
10849 	if (!mod->num_trace_evals)
10850 		return;
10851 
10852 	guard(mutex)(&trace_eval_mutex);
10853 
10854 	map = trace_eval_maps;
10855 
10856 	while (map) {
10857 		if (map->head.mod == mod)
10858 			break;
10859 		map = trace_eval_jmp_to_tail(map);
10860 		last = &map->tail.next;
10861 		map = map->tail.next;
10862 	}
10863 	if (!map)
10864 		return;
10865 
10866 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10867 	kfree(map);
10868 }
10869 #else
trace_module_remove_evals(struct module * mod)10870 static inline void trace_module_remove_evals(struct module *mod) { }
10871 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10872 
trace_module_record(struct module * mod,bool add)10873 static void trace_module_record(struct module *mod, bool add)
10874 {
10875 	struct trace_array *tr;
10876 	unsigned long flags;
10877 
10878 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10879 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10880 		/* Update any persistent trace array that has already been started */
10881 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10882 			guard(mutex)(&scratch_mutex);
10883 			save_mod(mod, tr);
10884 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10885 			/* Update delta if the module loaded in previous boot */
10886 			make_mod_delta(mod, tr);
10887 		}
10888 	}
10889 }
10890 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10891 static int trace_module_notify(struct notifier_block *self,
10892 			       unsigned long val, void *data)
10893 {
10894 	struct module *mod = data;
10895 
10896 	switch (val) {
10897 	case MODULE_STATE_COMING:
10898 		trace_module_add_evals(mod);
10899 		trace_module_record(mod, true);
10900 		break;
10901 	case MODULE_STATE_GOING:
10902 		trace_module_remove_evals(mod);
10903 		trace_module_record(mod, false);
10904 		break;
10905 	}
10906 
10907 	return NOTIFY_OK;
10908 }
10909 
10910 static struct notifier_block trace_module_nb = {
10911 	.notifier_call = trace_module_notify,
10912 	.priority = 0,
10913 };
10914 #endif /* CONFIG_MODULES */
10915 
tracer_init_tracefs_work_func(struct work_struct * work)10916 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10917 {
10918 
10919 	event_trace_init();
10920 
10921 	init_tracer_tracefs(&global_trace, NULL);
10922 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10923 
10924 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10925 			&global_trace, &tracing_thresh_fops);
10926 
10927 	trace_create_file("README", TRACE_MODE_READ, NULL,
10928 			NULL, &tracing_readme_fops);
10929 
10930 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10931 			NULL, &tracing_saved_cmdlines_fops);
10932 
10933 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10934 			  NULL, &tracing_saved_cmdlines_size_fops);
10935 
10936 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10937 			NULL, &tracing_saved_tgids_fops);
10938 
10939 	trace_create_eval_file(NULL);
10940 
10941 #ifdef CONFIG_MODULES
10942 	register_module_notifier(&trace_module_nb);
10943 #endif
10944 
10945 #ifdef CONFIG_DYNAMIC_FTRACE
10946 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10947 			NULL, &tracing_dyn_info_fops);
10948 #endif
10949 
10950 	create_trace_instances(NULL);
10951 
10952 	update_tracer_options();
10953 }
10954 
tracer_init_tracefs(void)10955 static __init int tracer_init_tracefs(void)
10956 {
10957 	int ret;
10958 
10959 	trace_access_lock_init();
10960 
10961 	ret = tracing_init_dentry();
10962 	if (ret)
10963 		return 0;
10964 
10965 	if (eval_map_wq) {
10966 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10967 		queue_work(eval_map_wq, &tracerfs_init_work);
10968 	} else {
10969 		tracer_init_tracefs_work_func(NULL);
10970 	}
10971 
10972 	if (rv_init_interface())
10973 		pr_err("RV: Error while creating the RV interface\n");
10974 
10975 	return 0;
10976 }
10977 
10978 fs_initcall(tracer_init_tracefs);
10979 
10980 static int trace_die_panic_handler(struct notifier_block *self,
10981 				unsigned long ev, void *unused);
10982 
10983 static struct notifier_block trace_panic_notifier = {
10984 	.notifier_call = trace_die_panic_handler,
10985 	.priority = INT_MAX - 1,
10986 };
10987 
10988 static struct notifier_block trace_die_notifier = {
10989 	.notifier_call = trace_die_panic_handler,
10990 	.priority = INT_MAX - 1,
10991 };
10992 
10993 /*
10994  * The idea is to execute the following die/panic callback early, in order
10995  * to avoid showing irrelevant information in the trace (like other panic
10996  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10997  * warnings get disabled (to prevent potential log flooding).
10998  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10999 static int trace_die_panic_handler(struct notifier_block *self,
11000 				unsigned long ev, void *unused)
11001 {
11002 	if (!ftrace_dump_on_oops_enabled())
11003 		return NOTIFY_DONE;
11004 
11005 	/* The die notifier requires DIE_OOPS to trigger */
11006 	if (self == &trace_die_notifier && ev != DIE_OOPS)
11007 		return NOTIFY_DONE;
11008 
11009 	ftrace_dump(DUMP_PARAM);
11010 
11011 	return NOTIFY_DONE;
11012 }
11013 
11014 /*
11015  * printk is set to max of 1024, we really don't need it that big.
11016  * Nothing should be printing 1000 characters anyway.
11017  */
11018 #define TRACE_MAX_PRINT		1000
11019 
11020 /*
11021  * Define here KERN_TRACE so that we have one place to modify
11022  * it if we decide to change what log level the ftrace dump
11023  * should be at.
11024  */
11025 #define KERN_TRACE		KERN_EMERG
11026 
11027 void
trace_printk_seq(struct trace_seq * s)11028 trace_printk_seq(struct trace_seq *s)
11029 {
11030 	/* Probably should print a warning here. */
11031 	if (s->seq.len >= TRACE_MAX_PRINT)
11032 		s->seq.len = TRACE_MAX_PRINT;
11033 
11034 	/*
11035 	 * More paranoid code. Although the buffer size is set to
11036 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
11037 	 * an extra layer of protection.
11038 	 */
11039 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
11040 		s->seq.len = s->seq.size - 1;
11041 
11042 	/* should be zero ended, but we are paranoid. */
11043 	s->buffer[s->seq.len] = 0;
11044 
11045 	printk(KERN_TRACE "%s", s->buffer);
11046 
11047 	trace_seq_init(s);
11048 }
11049 
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)11050 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
11051 {
11052 	iter->tr = tr;
11053 	iter->trace = iter->tr->current_trace;
11054 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
11055 	iter->array_buffer = &tr->array_buffer;
11056 
11057 	if (iter->trace && iter->trace->open)
11058 		iter->trace->open(iter);
11059 
11060 	/* Annotate start of buffers if we had overruns */
11061 	if (ring_buffer_overruns(iter->array_buffer->buffer))
11062 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
11063 
11064 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
11065 	if (trace_clocks[iter->tr->clock_id].in_ns)
11066 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
11067 
11068 	/* Can not use kmalloc for iter.temp and iter.fmt */
11069 	iter->temp = static_temp_buf;
11070 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
11071 	iter->fmt = static_fmt_buf;
11072 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
11073 }
11074 
trace_init_global_iter(struct trace_iterator * iter)11075 void trace_init_global_iter(struct trace_iterator *iter)
11076 {
11077 	trace_init_iter(iter, &global_trace);
11078 }
11079 
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)11080 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
11081 {
11082 	/* use static because iter can be a bit big for the stack */
11083 	static struct trace_iterator iter;
11084 	unsigned int old_userobj;
11085 	unsigned long flags;
11086 	int cnt = 0;
11087 
11088 	/*
11089 	 * Always turn off tracing when we dump.
11090 	 * We don't need to show trace output of what happens
11091 	 * between multiple crashes.
11092 	 *
11093 	 * If the user does a sysrq-z, then they can re-enable
11094 	 * tracing with echo 1 > tracing_on.
11095 	 */
11096 	tracer_tracing_off(tr);
11097 
11098 	local_irq_save(flags);
11099 
11100 	/* Simulate the iterator */
11101 	trace_init_iter(&iter, tr);
11102 
11103 	/* While dumping, do not allow the buffer to be enable */
11104 	tracer_tracing_disable(tr);
11105 
11106 	old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
11107 
11108 	/* don't look at user memory in panic mode */
11109 	tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
11110 
11111 	if (dump_mode == DUMP_ORIG)
11112 		iter.cpu_file = raw_smp_processor_id();
11113 	else
11114 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
11115 
11116 	if (tr == &global_trace)
11117 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
11118 	else
11119 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
11120 
11121 	/* Did function tracer already get disabled? */
11122 	if (ftrace_is_dead()) {
11123 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
11124 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
11125 	}
11126 
11127 	/*
11128 	 * We need to stop all tracing on all CPUS to read
11129 	 * the next buffer. This is a bit expensive, but is
11130 	 * not done often. We fill all what we can read,
11131 	 * and then release the locks again.
11132 	 */
11133 
11134 	while (!trace_empty(&iter)) {
11135 
11136 		if (!cnt)
11137 			printk(KERN_TRACE "---------------------------------\n");
11138 
11139 		cnt++;
11140 
11141 		trace_iterator_reset(&iter);
11142 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
11143 
11144 		if (trace_find_next_entry_inc(&iter) != NULL) {
11145 			int ret;
11146 
11147 			ret = print_trace_line(&iter);
11148 			if (ret != TRACE_TYPE_NO_CONSUME)
11149 				trace_consume(&iter);
11150 
11151 			trace_printk_seq(&iter.seq);
11152 		}
11153 		touch_nmi_watchdog();
11154 	}
11155 
11156 	if (!cnt)
11157 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
11158 	else
11159 		printk(KERN_TRACE "---------------------------------\n");
11160 
11161 	tr->trace_flags |= old_userobj;
11162 
11163 	tracer_tracing_enable(tr);
11164 	local_irq_restore(flags);
11165 }
11166 
ftrace_dump_by_param(void)11167 static void ftrace_dump_by_param(void)
11168 {
11169 	bool first_param = true;
11170 	char dump_param[MAX_TRACER_SIZE];
11171 	char *buf, *token, *inst_name;
11172 	struct trace_array *tr;
11173 
11174 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
11175 	buf = dump_param;
11176 
11177 	while ((token = strsep(&buf, ",")) != NULL) {
11178 		if (first_param) {
11179 			first_param = false;
11180 			if (!strcmp("0", token))
11181 				continue;
11182 			else if (!strcmp("1", token)) {
11183 				ftrace_dump_one(&global_trace, DUMP_ALL);
11184 				continue;
11185 			}
11186 			else if (!strcmp("2", token) ||
11187 			  !strcmp("orig_cpu", token)) {
11188 				ftrace_dump_one(&global_trace, DUMP_ORIG);
11189 				continue;
11190 			}
11191 		}
11192 
11193 		inst_name = strsep(&token, "=");
11194 		tr = trace_array_find(inst_name);
11195 		if (!tr) {
11196 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
11197 			continue;
11198 		}
11199 
11200 		if (token && (!strcmp("2", token) ||
11201 			  !strcmp("orig_cpu", token)))
11202 			ftrace_dump_one(tr, DUMP_ORIG);
11203 		else
11204 			ftrace_dump_one(tr, DUMP_ALL);
11205 	}
11206 }
11207 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)11208 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
11209 {
11210 	static atomic_t dump_running;
11211 
11212 	/* Only allow one dump user at a time. */
11213 	if (atomic_inc_return(&dump_running) != 1) {
11214 		atomic_dec(&dump_running);
11215 		return;
11216 	}
11217 
11218 	switch (oops_dump_mode) {
11219 	case DUMP_ALL:
11220 		ftrace_dump_one(&global_trace, DUMP_ALL);
11221 		break;
11222 	case DUMP_ORIG:
11223 		ftrace_dump_one(&global_trace, DUMP_ORIG);
11224 		break;
11225 	case DUMP_PARAM:
11226 		ftrace_dump_by_param();
11227 		break;
11228 	case DUMP_NONE:
11229 		break;
11230 	default:
11231 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
11232 		ftrace_dump_one(&global_trace, DUMP_ALL);
11233 	}
11234 
11235 	atomic_dec(&dump_running);
11236 }
11237 EXPORT_SYMBOL_GPL(ftrace_dump);
11238 
11239 #define WRITE_BUFSIZE  4096
11240 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))11241 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
11242 				size_t count, loff_t *ppos,
11243 				int (*createfn)(const char *))
11244 {
11245 	char *kbuf __free(kfree) = NULL;
11246 	char *buf, *tmp;
11247 	int ret = 0;
11248 	size_t done = 0;
11249 	size_t size;
11250 
11251 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
11252 	if (!kbuf)
11253 		return -ENOMEM;
11254 
11255 	while (done < count) {
11256 		size = count - done;
11257 
11258 		if (size >= WRITE_BUFSIZE)
11259 			size = WRITE_BUFSIZE - 1;
11260 
11261 		if (copy_from_user(kbuf, buffer + done, size))
11262 			return -EFAULT;
11263 
11264 		kbuf[size] = '\0';
11265 		buf = kbuf;
11266 		do {
11267 			tmp = strchr(buf, '\n');
11268 			if (tmp) {
11269 				*tmp = '\0';
11270 				size = tmp - buf + 1;
11271 			} else {
11272 				size = strlen(buf);
11273 				if (done + size < count) {
11274 					if (buf != kbuf)
11275 						break;
11276 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
11277 					pr_warn("Line length is too long: Should be less than %d\n",
11278 						WRITE_BUFSIZE - 2);
11279 					return -EINVAL;
11280 				}
11281 			}
11282 			done += size;
11283 
11284 			/* Remove comments */
11285 			tmp = strchr(buf, '#');
11286 
11287 			if (tmp)
11288 				*tmp = '\0';
11289 
11290 			ret = createfn(buf);
11291 			if (ret)
11292 				return ret;
11293 			buf += size;
11294 
11295 		} while (done < count);
11296 	}
11297 	return done;
11298 }
11299 
11300 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)11301 __init static bool tr_needs_alloc_snapshot(const char *name)
11302 {
11303 	char *test;
11304 	int len = strlen(name);
11305 	bool ret;
11306 
11307 	if (!boot_snapshot_index)
11308 		return false;
11309 
11310 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
11311 	    boot_snapshot_info[len] == '\t')
11312 		return true;
11313 
11314 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
11315 	if (!test)
11316 		return false;
11317 
11318 	sprintf(test, "\t%s\t", name);
11319 	ret = strstr(boot_snapshot_info, test) == NULL;
11320 	kfree(test);
11321 	return ret;
11322 }
11323 
do_allocate_snapshot(const char * name)11324 __init static void do_allocate_snapshot(const char *name)
11325 {
11326 	if (!tr_needs_alloc_snapshot(name))
11327 		return;
11328 
11329 	/*
11330 	 * When allocate_snapshot is set, the next call to
11331 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
11332 	 * will allocate the snapshot buffer. That will alse clear
11333 	 * this flag.
11334 	 */
11335 	allocate_snapshot = true;
11336 }
11337 #else
do_allocate_snapshot(const char * name)11338 static inline void do_allocate_snapshot(const char *name) { }
11339 #endif
11340 
backup_instance_area(const char * backup,unsigned long * addr,phys_addr_t * size)11341 __init static int backup_instance_area(const char *backup,
11342 				       unsigned long *addr, phys_addr_t *size)
11343 {
11344 	struct trace_array *backup_tr;
11345 	void *allocated_vaddr = NULL;
11346 
11347 	backup_tr = trace_array_get_by_name(backup, NULL);
11348 	if (!backup_tr) {
11349 		pr_warn("Tracing: Instance %s is not found.\n", backup);
11350 		return -ENOENT;
11351 	}
11352 
11353 	if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
11354 		pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
11355 		trace_array_put(backup_tr);
11356 		return -EINVAL;
11357 	}
11358 
11359 	*size = backup_tr->range_addr_size;
11360 
11361 	allocated_vaddr = vzalloc(*size);
11362 	if (!allocated_vaddr) {
11363 		pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
11364 			backup, (unsigned long)*size);
11365 		trace_array_put(backup_tr);
11366 		return -ENOMEM;
11367 	}
11368 
11369 	memcpy(allocated_vaddr,
11370 		(void *)backup_tr->range_addr_start, (size_t)*size);
11371 	*addr = (unsigned long)allocated_vaddr;
11372 
11373 	trace_array_put(backup_tr);
11374 	return 0;
11375 }
11376 
enable_instances(void)11377 __init static void enable_instances(void)
11378 {
11379 	struct trace_array *tr;
11380 	bool memmap_area = false;
11381 	char *curr_str;
11382 	char *name;
11383 	char *str;
11384 	char *tok;
11385 
11386 	/* A tab is always appended */
11387 	boot_instance_info[boot_instance_index - 1] = '\0';
11388 	str = boot_instance_info;
11389 
11390 	while ((curr_str = strsep(&str, "\t"))) {
11391 		phys_addr_t start = 0;
11392 		phys_addr_t size = 0;
11393 		unsigned long addr = 0;
11394 		bool traceprintk = false;
11395 		bool traceoff = false;
11396 		char *flag_delim;
11397 		char *addr_delim;
11398 		char *rname __free(kfree) = NULL;
11399 		char *backup;
11400 
11401 		tok = strsep(&curr_str, ",");
11402 
11403 		name = strsep(&tok, "=");
11404 		backup = tok;
11405 
11406 		flag_delim = strchr(name, '^');
11407 		addr_delim = strchr(name, '@');
11408 
11409 		if (addr_delim)
11410 			*addr_delim++ = '\0';
11411 
11412 		if (flag_delim)
11413 			*flag_delim++ = '\0';
11414 
11415 		if (backup) {
11416 			if (backup_instance_area(backup, &addr, &size) < 0)
11417 				continue;
11418 		}
11419 
11420 		if (flag_delim) {
11421 			char *flag;
11422 
11423 			while ((flag = strsep(&flag_delim, "^"))) {
11424 				if (strcmp(flag, "traceoff") == 0) {
11425 					traceoff = true;
11426 				} else if ((strcmp(flag, "printk") == 0) ||
11427 					   (strcmp(flag, "traceprintk") == 0) ||
11428 					   (strcmp(flag, "trace_printk") == 0)) {
11429 					traceprintk = true;
11430 				} else {
11431 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
11432 						flag, name);
11433 				}
11434 			}
11435 		}
11436 
11437 		tok = addr_delim;
11438 		if (tok && isdigit(*tok)) {
11439 			start = memparse(tok, &tok);
11440 			if (!start) {
11441 				pr_warn("Tracing: Invalid boot instance address for %s\n",
11442 					name);
11443 				continue;
11444 			}
11445 			if (*tok != ':') {
11446 				pr_warn("Tracing: No size specified for instance %s\n", name);
11447 				continue;
11448 			}
11449 			tok++;
11450 			size = memparse(tok, &tok);
11451 			if (!size) {
11452 				pr_warn("Tracing: Invalid boot instance size for %s\n",
11453 					name);
11454 				continue;
11455 			}
11456 			memmap_area = true;
11457 		} else if (tok) {
11458 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
11459 				start = 0;
11460 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
11461 				continue;
11462 			}
11463 			rname = kstrdup(tok, GFP_KERNEL);
11464 		}
11465 
11466 		if (start) {
11467 			/* Start and size must be page aligned */
11468 			if (start & ~PAGE_MASK) {
11469 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
11470 				continue;
11471 			}
11472 			if (size & ~PAGE_MASK) {
11473 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
11474 				continue;
11475 			}
11476 
11477 			if (memmap_area)
11478 				addr = map_pages(start, size);
11479 			else
11480 				addr = (unsigned long)phys_to_virt(start);
11481 			if (addr) {
11482 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
11483 					name, &start, (unsigned long)size);
11484 			} else {
11485 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
11486 				continue;
11487 			}
11488 		} else {
11489 			/* Only non mapped buffers have snapshot buffers */
11490 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
11491 				do_allocate_snapshot(name);
11492 		}
11493 
11494 		tr = trace_array_create_systems(name, NULL, addr, size);
11495 		if (IS_ERR(tr)) {
11496 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
11497 			continue;
11498 		}
11499 
11500 		if (traceoff)
11501 			tracer_tracing_off(tr);
11502 
11503 		if (traceprintk)
11504 			update_printk_trace(tr);
11505 
11506 		/*
11507 		 * memmap'd buffers can not be freed.
11508 		 */
11509 		if (memmap_area) {
11510 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
11511 			tr->ref++;
11512 		}
11513 
11514 		/*
11515 		 * Backup buffers can be freed but need vfree().
11516 		 */
11517 		if (backup)
11518 			tr->flags |= TRACE_ARRAY_FL_VMALLOC;
11519 
11520 		if (start || backup) {
11521 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
11522 			tr->range_name = no_free_ptr(rname);
11523 		}
11524 
11525 		while ((tok = strsep(&curr_str, ","))) {
11526 			early_enable_events(tr, tok, true);
11527 		}
11528 	}
11529 }
11530 
tracer_alloc_buffers(void)11531 __init static int tracer_alloc_buffers(void)
11532 {
11533 	int ring_buf_size;
11534 	int ret = -ENOMEM;
11535 
11536 
11537 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
11538 		pr_warn("Tracing disabled due to lockdown\n");
11539 		return -EPERM;
11540 	}
11541 
11542 	/*
11543 	 * Make sure we don't accidentally add more trace options
11544 	 * than we have bits for.
11545 	 */
11546 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
11547 
11548 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
11549 		return -ENOMEM;
11550 
11551 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
11552 		goto out_free_buffer_mask;
11553 
11554 	/* Only allocate trace_printk buffers if a trace_printk exists */
11555 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11556 		/* Must be called before global_trace.buffer is allocated */
11557 		trace_printk_init_buffers();
11558 
11559 	/* To save memory, keep the ring buffer size to its minimum */
11560 	if (global_trace.ring_buffer_expanded)
11561 		ring_buf_size = trace_buf_size;
11562 	else
11563 		ring_buf_size = 1;
11564 
11565 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11566 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11567 
11568 	raw_spin_lock_init(&global_trace.start_lock);
11569 
11570 	/*
11571 	 * The prepare callbacks allocates some memory for the ring buffer. We
11572 	 * don't free the buffer if the CPU goes down. If we were to free
11573 	 * the buffer, then the user would lose any trace that was in the
11574 	 * buffer. The memory will be removed once the "instance" is removed.
11575 	 */
11576 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11577 				      "trace/RB:prepare", trace_rb_cpu_prepare,
11578 				      NULL);
11579 	if (ret < 0)
11580 		goto out_free_cpumask;
11581 	/* Used for event triggers */
11582 	ret = -ENOMEM;
11583 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11584 	if (!temp_buffer)
11585 		goto out_rm_hp_state;
11586 
11587 	if (trace_create_savedcmd() < 0)
11588 		goto out_free_temp_buffer;
11589 
11590 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11591 		goto out_free_savedcmd;
11592 
11593 	/* TODO: make the number of buffers hot pluggable with CPUS */
11594 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11595 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11596 		goto out_free_pipe_cpumask;
11597 	}
11598 	if (global_trace.buffer_disabled)
11599 		tracing_off();
11600 
11601 	if (trace_boot_clock) {
11602 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
11603 		if (ret < 0)
11604 			pr_warn("Trace clock %s not defined, going back to default\n",
11605 				trace_boot_clock);
11606 	}
11607 
11608 	/*
11609 	 * register_tracer() might reference current_trace, so it
11610 	 * needs to be set before we register anything. This is
11611 	 * just a bootstrap of current_trace anyway.
11612 	 */
11613 	global_trace.current_trace = &nop_trace;
11614 	global_trace.current_trace_flags = nop_trace.flags;
11615 
11616 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11617 #ifdef CONFIG_TRACER_MAX_TRACE
11618 	spin_lock_init(&global_trace.snapshot_trigger_lock);
11619 #endif
11620 	ftrace_init_global_array_ops(&global_trace);
11621 
11622 #ifdef CONFIG_MODULES
11623 	INIT_LIST_HEAD(&global_trace.mod_events);
11624 #endif
11625 
11626 	init_trace_flags_index(&global_trace);
11627 
11628 	INIT_LIST_HEAD(&global_trace.tracers);
11629 
11630 	/* All seems OK, enable tracing */
11631 	tracing_disabled = 0;
11632 
11633 	atomic_notifier_chain_register(&panic_notifier_list,
11634 				       &trace_panic_notifier);
11635 
11636 	register_die_notifier(&trace_die_notifier);
11637 
11638 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11639 
11640 	global_trace.syscall_buf_sz = syscall_buf_size;
11641 
11642 	INIT_LIST_HEAD(&global_trace.systems);
11643 	INIT_LIST_HEAD(&global_trace.events);
11644 	INIT_LIST_HEAD(&global_trace.hist_vars);
11645 	INIT_LIST_HEAD(&global_trace.err_log);
11646 	list_add(&global_trace.marker_list, &marker_copies);
11647 	list_add(&global_trace.list, &ftrace_trace_arrays);
11648 
11649 	register_tracer(&nop_trace);
11650 
11651 	/* Function tracing may start here (via kernel command line) */
11652 	init_function_trace();
11653 
11654 	apply_trace_boot_options();
11655 
11656 	register_snapshot_cmd();
11657 
11658 	return 0;
11659 
11660 out_free_pipe_cpumask:
11661 	free_cpumask_var(global_trace.pipe_cpumask);
11662 out_free_savedcmd:
11663 	trace_free_saved_cmdlines_buffer();
11664 out_free_temp_buffer:
11665 	ring_buffer_free(temp_buffer);
11666 out_rm_hp_state:
11667 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11668 out_free_cpumask:
11669 	free_cpumask_var(global_trace.tracing_cpumask);
11670 out_free_buffer_mask:
11671 	free_cpumask_var(tracing_buffer_mask);
11672 	return ret;
11673 }
11674 
11675 #ifdef CONFIG_FUNCTION_TRACER
11676 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11677 struct trace_array *trace_get_global_array(void)
11678 {
11679 	return &global_trace;
11680 }
11681 #endif
11682 
ftrace_boot_snapshot(void)11683 void __init ftrace_boot_snapshot(void)
11684 {
11685 #ifdef CONFIG_TRACER_MAX_TRACE
11686 	struct trace_array *tr;
11687 
11688 	if (!snapshot_at_boot)
11689 		return;
11690 
11691 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11692 		if (!tr->allocated_snapshot)
11693 			continue;
11694 
11695 		tracing_snapshot_instance(tr);
11696 		trace_array_puts(tr, "** Boot snapshot taken **\n");
11697 	}
11698 #endif
11699 }
11700 
early_trace_init(void)11701 void __init early_trace_init(void)
11702 {
11703 	if (tracepoint_printk) {
11704 		tracepoint_print_iter =
11705 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11706 		if (MEM_FAIL(!tracepoint_print_iter,
11707 			     "Failed to allocate trace iterator\n"))
11708 			tracepoint_printk = 0;
11709 		else
11710 			static_key_enable(&tracepoint_printk_key.key);
11711 	}
11712 	tracer_alloc_buffers();
11713 
11714 	init_events();
11715 }
11716 
trace_init(void)11717 void __init trace_init(void)
11718 {
11719 	trace_event_init();
11720 
11721 	if (boot_instance_index)
11722 		enable_instances();
11723 }
11724 
clear_boot_tracer(void)11725 __init static void clear_boot_tracer(void)
11726 {
11727 	/*
11728 	 * The default tracer at boot buffer is an init section.
11729 	 * This function is called in lateinit. If we did not
11730 	 * find the boot tracer, then clear it out, to prevent
11731 	 * later registration from accessing the buffer that is
11732 	 * about to be freed.
11733 	 */
11734 	if (!default_bootup_tracer)
11735 		return;
11736 
11737 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11738 	       default_bootup_tracer);
11739 	default_bootup_tracer = NULL;
11740 }
11741 
11742 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11743 __init static void tracing_set_default_clock(void)
11744 {
11745 	/* sched_clock_stable() is determined in late_initcall */
11746 	if (!trace_boot_clock && !sched_clock_stable()) {
11747 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11748 			pr_warn("Can not set tracing clock due to lockdown\n");
11749 			return;
11750 		}
11751 
11752 		printk(KERN_WARNING
11753 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11754 		       "If you want to keep using the local clock, then add:\n"
11755 		       "  \"trace_clock=local\"\n"
11756 		       "on the kernel command line\n");
11757 		tracing_set_clock(&global_trace, "global");
11758 	}
11759 }
11760 #else
tracing_set_default_clock(void)11761 static inline void tracing_set_default_clock(void) { }
11762 #endif
11763 
late_trace_init(void)11764 __init static int late_trace_init(void)
11765 {
11766 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11767 		static_key_disable(&tracepoint_printk_key.key);
11768 		tracepoint_printk = 0;
11769 	}
11770 
11771 	if (traceoff_after_boot)
11772 		tracing_off();
11773 
11774 	tracing_set_default_clock();
11775 	clear_boot_tracer();
11776 	return 0;
11777 }
11778 
11779 late_initcall_sync(late_trace_init);
11780