xref: /linux/kernel/trace/trace.c (revision 5572ad8fddecd4a0db19801262072ff5916b7589)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <linux/sort.h>
54 #include <linux/io.h> /* vmap_page_range() */
55 #include <linux/fs_context.h>
56 
57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
58 
59 #include "trace.h"
60 #include "trace_output.h"
61 
62 #ifdef CONFIG_FTRACE_STARTUP_TEST
63 /*
64  * We need to change this state when a selftest is running.
65  * A selftest will lurk into the ring-buffer to count the
66  * entries inserted during the selftest although some concurrent
67  * insertions into the ring-buffer such as trace_printk could occurred
68  * at the same time, giving false positive or negative results.
69  */
70 static bool __read_mostly tracing_selftest_running;
71 
72 /*
73  * If boot-time tracing including tracers/events via kernel cmdline
74  * is running, we do not want to run SELFTEST.
75  */
76 bool __read_mostly tracing_selftest_disabled;
77 
disable_tracing_selftest(const char * reason)78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #else
86 #define tracing_selftest_running	0
87 #define tracing_selftest_disabled	0
88 #endif
89 
90 /* Pipe tracepoints to printk */
91 static struct trace_iterator *tracepoint_print_iter;
92 int tracepoint_printk;
93 static bool tracepoint_printk_stop_on_boot __initdata;
94 static bool traceoff_after_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96 
97 /* Store tracers and their flags per instance */
98 struct tracers {
99 	struct list_head	list;
100 	struct tracer		*tracer;
101 	struct tracer_flags	*flags;
102 };
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 #define MAX_TRACER_SIZE		100
122 /*
123  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
124  *
125  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
126  * is set, then ftrace_dump is called. This will output the contents
127  * of the ftrace buffers to the console.  This is very useful for
128  * capturing traces that lead to crashes and outputting it to a
129  * serial console.
130  *
131  * It is default off, but you can enable it with either specifying
132  * "ftrace_dump_on_oops" in the kernel command line, or setting
133  * /proc/sys/kernel/ftrace_dump_on_oops
134  * Set 1 if you want to dump buffers of all CPUs
135  * Set 2 if you want to dump the buffer of the CPU that triggered oops
136  * Set instance name if you want to dump the specific trace instance
137  * Multiple instance dump is also supported, and instances are separated
138  * by commas.
139  */
140 /* Set to string format zero to disable by default */
141 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
142 
143 /* When set, tracing will stop when a WARN*() is hit */
144 static int __disable_trace_on_warning;
145 
146 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
147 			     void *buffer, size_t *lenp, loff_t *ppos);
148 static const struct ctl_table trace_sysctl_table[] = {
149 	{
150 		.procname	= "ftrace_dump_on_oops",
151 		.data		= &ftrace_dump_on_oops,
152 		.maxlen		= MAX_TRACER_SIZE,
153 		.mode		= 0644,
154 		.proc_handler	= proc_dostring,
155 	},
156 	{
157 		.procname	= "traceoff_on_warning",
158 		.data		= &__disable_trace_on_warning,
159 		.maxlen		= sizeof(__disable_trace_on_warning),
160 		.mode		= 0644,
161 		.proc_handler	= proc_dointvec,
162 	},
163 	{
164 		.procname	= "tracepoint_printk",
165 		.data		= &tracepoint_printk,
166 		.maxlen		= sizeof(tracepoint_printk),
167 		.mode		= 0644,
168 		.proc_handler	= tracepoint_printk_sysctl,
169 	},
170 };
171 
init_trace_sysctls(void)172 static int __init init_trace_sysctls(void)
173 {
174 	register_sysctl_init("kernel", trace_sysctl_table);
175 	return 0;
176 }
177 subsys_initcall(init_trace_sysctls);
178 
179 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
180 /* Map of enums to their values, for "eval_map" file */
181 struct trace_eval_map_head {
182 	struct module			*mod;
183 	unsigned long			length;
184 };
185 
186 union trace_eval_map_item;
187 
188 struct trace_eval_map_tail {
189 	/*
190 	 * "end" is first and points to NULL as it must be different
191 	 * than "mod" or "eval_string"
192 	 */
193 	union trace_eval_map_item	*next;
194 	const char			*end;	/* points to NULL */
195 };
196 
197 static DEFINE_MUTEX(trace_eval_mutex);
198 
199 /*
200  * The trace_eval_maps are saved in an array with two extra elements,
201  * one at the beginning, and one at the end. The beginning item contains
202  * the count of the saved maps (head.length), and the module they
203  * belong to if not built in (head.mod). The ending item contains a
204  * pointer to the next array of saved eval_map items.
205  */
206 union trace_eval_map_item {
207 	struct trace_eval_map		map;
208 	struct trace_eval_map_head	head;
209 	struct trace_eval_map_tail	tail;
210 };
211 
212 static union trace_eval_map_item *trace_eval_maps;
213 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
214 
215 int tracing_set_tracer(struct trace_array *tr, const char *buf);
216 static void ftrace_trace_userstack(struct trace_array *tr,
217 				   struct trace_buffer *buffer,
218 				   unsigned int trace_ctx);
219 
220 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
221 static char *default_bootup_tracer;
222 
223 static bool allocate_snapshot;
224 static bool snapshot_at_boot;
225 
226 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
227 static int boot_instance_index;
228 
229 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_snapshot_index;
231 
set_cmdline_ftrace(char * str)232 static int __init set_cmdline_ftrace(char *str)
233 {
234 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
235 	default_bootup_tracer = bootup_tracer_buf;
236 	/* We are using ftrace early, expand it */
237 	trace_set_ring_buffer_expanded(NULL);
238 	return 1;
239 }
240 __setup("ftrace=", set_cmdline_ftrace);
241 
ftrace_dump_on_oops_enabled(void)242 int ftrace_dump_on_oops_enabled(void)
243 {
244 	if (!strcmp("0", ftrace_dump_on_oops))
245 		return 0;
246 	else
247 		return 1;
248 }
249 
set_ftrace_dump_on_oops(char * str)250 static int __init set_ftrace_dump_on_oops(char *str)
251 {
252 	if (!*str) {
253 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
254 		return 1;
255 	}
256 
257 	if (*str == ',') {
258 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
259 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
260 		return 1;
261 	}
262 
263 	if (*str++ == '=') {
264 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
265 		return 1;
266 	}
267 
268 	return 0;
269 }
270 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
271 
stop_trace_on_warning(char * str)272 static int __init stop_trace_on_warning(char *str)
273 {
274 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
275 		__disable_trace_on_warning = 1;
276 	return 1;
277 }
278 __setup("traceoff_on_warning", stop_trace_on_warning);
279 
boot_alloc_snapshot(char * str)280 static int __init boot_alloc_snapshot(char *str)
281 {
282 	char *slot = boot_snapshot_info + boot_snapshot_index;
283 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
284 	int ret;
285 
286 	if (str[0] == '=') {
287 		str++;
288 		if (strlen(str) >= left)
289 			return -1;
290 
291 		ret = snprintf(slot, left, "%s\t", str);
292 		boot_snapshot_index += ret;
293 	} else {
294 		allocate_snapshot = true;
295 		/* We also need the main ring buffer expanded */
296 		trace_set_ring_buffer_expanded(NULL);
297 	}
298 	return 1;
299 }
300 __setup("alloc_snapshot", boot_alloc_snapshot);
301 
302 
boot_snapshot(char * str)303 static int __init boot_snapshot(char *str)
304 {
305 	snapshot_at_boot = true;
306 	boot_alloc_snapshot(str);
307 	return 1;
308 }
309 __setup("ftrace_boot_snapshot", boot_snapshot);
310 
311 
boot_instance(char * str)312 static int __init boot_instance(char *str)
313 {
314 	char *slot = boot_instance_info + boot_instance_index;
315 	int left = sizeof(boot_instance_info) - boot_instance_index;
316 	int ret;
317 
318 	if (strlen(str) >= left)
319 		return -1;
320 
321 	ret = snprintf(slot, left, "%s\t", str);
322 	boot_instance_index += ret;
323 
324 	return 1;
325 }
326 __setup("trace_instance=", boot_instance);
327 
328 
329 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
330 
set_trace_boot_options(char * str)331 static int __init set_trace_boot_options(char *str)
332 {
333 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
334 	return 1;
335 }
336 __setup("trace_options=", set_trace_boot_options);
337 
338 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
339 static char *trace_boot_clock __initdata;
340 
set_trace_boot_clock(char * str)341 static int __init set_trace_boot_clock(char *str)
342 {
343 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
344 	trace_boot_clock = trace_boot_clock_buf;
345 	return 1;
346 }
347 __setup("trace_clock=", set_trace_boot_clock);
348 
set_tracepoint_printk(char * str)349 static int __init set_tracepoint_printk(char *str)
350 {
351 	/* Ignore the "tp_printk_stop_on_boot" param */
352 	if (*str == '_')
353 		return 0;
354 
355 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
356 		tracepoint_printk = 1;
357 	return 1;
358 }
359 __setup("tp_printk", set_tracepoint_printk);
360 
set_tracepoint_printk_stop(char * str)361 static int __init set_tracepoint_printk_stop(char *str)
362 {
363 	tracepoint_printk_stop_on_boot = true;
364 	return 1;
365 }
366 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
367 
set_traceoff_after_boot(char * str)368 static int __init set_traceoff_after_boot(char *str)
369 {
370 	traceoff_after_boot = true;
371 	return 1;
372 }
373 __setup("traceoff_after_boot", set_traceoff_after_boot);
374 
ns2usecs(u64 nsec)375 unsigned long long ns2usecs(u64 nsec)
376 {
377 	nsec += 500;
378 	do_div(nsec, 1000);
379 	return nsec;
380 }
381 
382 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)383 trace_process_export(struct trace_export *export,
384 	       struct ring_buffer_event *event, int flag)
385 {
386 	struct trace_entry *entry;
387 	unsigned int size = 0;
388 
389 	if (export->flags & flag) {
390 		entry = ring_buffer_event_data(event);
391 		size = ring_buffer_event_length(event);
392 		export->write(export, entry, size);
393 	}
394 }
395 
396 static DEFINE_MUTEX(ftrace_export_lock);
397 
398 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
399 
400 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
401 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
402 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
403 
ftrace_exports_enable(struct trace_export * export)404 static inline void ftrace_exports_enable(struct trace_export *export)
405 {
406 	if (export->flags & TRACE_EXPORT_FUNCTION)
407 		static_branch_inc(&trace_function_exports_enabled);
408 
409 	if (export->flags & TRACE_EXPORT_EVENT)
410 		static_branch_inc(&trace_event_exports_enabled);
411 
412 	if (export->flags & TRACE_EXPORT_MARKER)
413 		static_branch_inc(&trace_marker_exports_enabled);
414 }
415 
ftrace_exports_disable(struct trace_export * export)416 static inline void ftrace_exports_disable(struct trace_export *export)
417 {
418 	if (export->flags & TRACE_EXPORT_FUNCTION)
419 		static_branch_dec(&trace_function_exports_enabled);
420 
421 	if (export->flags & TRACE_EXPORT_EVENT)
422 		static_branch_dec(&trace_event_exports_enabled);
423 
424 	if (export->flags & TRACE_EXPORT_MARKER)
425 		static_branch_dec(&trace_marker_exports_enabled);
426 }
427 
ftrace_exports(struct ring_buffer_event * event,int flag)428 static void ftrace_exports(struct ring_buffer_event *event, int flag)
429 {
430 	struct trace_export *export;
431 
432 	guard(preempt_notrace)();
433 
434 	export = rcu_dereference_raw_check(ftrace_exports_list);
435 	while (export) {
436 		trace_process_export(export, event, flag);
437 		export = rcu_dereference_raw_check(export->next);
438 	}
439 }
440 
441 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)442 add_trace_export(struct trace_export **list, struct trace_export *export)
443 {
444 	rcu_assign_pointer(export->next, *list);
445 	/*
446 	 * We are entering export into the list but another
447 	 * CPU might be walking that list. We need to make sure
448 	 * the export->next pointer is valid before another CPU sees
449 	 * the export pointer included into the list.
450 	 */
451 	rcu_assign_pointer(*list, export);
452 }
453 
454 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)455 rm_trace_export(struct trace_export **list, struct trace_export *export)
456 {
457 	struct trace_export **p;
458 
459 	for (p = list; *p != NULL; p = &(*p)->next)
460 		if (*p == export)
461 			break;
462 
463 	if (*p != export)
464 		return -1;
465 
466 	rcu_assign_pointer(*p, (*p)->next);
467 
468 	return 0;
469 }
470 
471 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)472 add_ftrace_export(struct trace_export **list, struct trace_export *export)
473 {
474 	ftrace_exports_enable(export);
475 
476 	add_trace_export(list, export);
477 }
478 
479 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)480 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
481 {
482 	int ret;
483 
484 	ret = rm_trace_export(list, export);
485 	ftrace_exports_disable(export);
486 
487 	return ret;
488 }
489 
register_ftrace_export(struct trace_export * export)490 int register_ftrace_export(struct trace_export *export)
491 {
492 	if (WARN_ON_ONCE(!export->write))
493 		return -1;
494 
495 	guard(mutex)(&ftrace_export_lock);
496 
497 	add_ftrace_export(&ftrace_exports_list, export);
498 
499 	return 0;
500 }
501 EXPORT_SYMBOL_GPL(register_ftrace_export);
502 
unregister_ftrace_export(struct trace_export * export)503 int unregister_ftrace_export(struct trace_export *export)
504 {
505 	guard(mutex)(&ftrace_export_lock);
506 	return rm_ftrace_export(&ftrace_exports_list, export);
507 }
508 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
509 
510 /* trace_flags holds trace_options default values */
511 #define TRACE_DEFAULT_FLAGS						\
512 	(FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS |		\
513 	 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) |		\
514 	 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) |		\
515 	 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) |		\
516 	 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) |			\
517 	 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) |		\
518 	 TRACE_ITER(COPY_MARKER))
519 
520 /* trace_options that are only supported by global_trace */
521 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) |			\
522 	       TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) |	\
523 	       TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
524 
525 /* trace_flags that are default zero for instances */
526 #define ZEROED_TRACE_FLAGS \
527 	(TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
528 	 TRACE_ITER(COPY_MARKER))
529 
530 /*
531  * The global_trace is the descriptor that holds the top-level tracing
532  * buffers for the live tracing.
533  */
534 static struct trace_array global_trace = {
535 	.trace_flags = TRACE_DEFAULT_FLAGS,
536 };
537 
538 static struct trace_array *printk_trace = &global_trace;
539 
540 /* List of trace_arrays interested in the top level trace_marker */
541 static LIST_HEAD(marker_copies);
542 
printk_binsafe(struct trace_array * tr)543 static __always_inline bool printk_binsafe(struct trace_array *tr)
544 {
545 	/*
546 	 * The binary format of traceprintk can cause a crash if used
547 	 * by a buffer from another boot. Force the use of the
548 	 * non binary version of trace_printk if the trace_printk
549 	 * buffer is a boot mapped ring buffer.
550 	 */
551 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
552 }
553 
update_printk_trace(struct trace_array * tr)554 static void update_printk_trace(struct trace_array *tr)
555 {
556 	if (printk_trace == tr)
557 		return;
558 
559 	printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
560 	printk_trace = tr;
561 	tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
562 }
563 
564 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)565 static bool update_marker_trace(struct trace_array *tr, int enabled)
566 {
567 	lockdep_assert_held(&event_mutex);
568 
569 	if (enabled) {
570 		if (!list_empty(&tr->marker_list))
571 			return false;
572 
573 		list_add_rcu(&tr->marker_list, &marker_copies);
574 		tr->trace_flags |= TRACE_ITER(COPY_MARKER);
575 		return true;
576 	}
577 
578 	if (list_empty(&tr->marker_list))
579 		return false;
580 
581 	list_del_init(&tr->marker_list);
582 	tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
583 	return true;
584 }
585 
trace_set_ring_buffer_expanded(struct trace_array * tr)586 void trace_set_ring_buffer_expanded(struct trace_array *tr)
587 {
588 	if (!tr)
589 		tr = &global_trace;
590 	tr->ring_buffer_expanded = true;
591 }
592 
593 LIST_HEAD(ftrace_trace_arrays);
594 
trace_array_get(struct trace_array * this_tr)595 int trace_array_get(struct trace_array *this_tr)
596 {
597 	struct trace_array *tr;
598 
599 	guard(mutex)(&trace_types_lock);
600 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
601 		if (tr == this_tr) {
602 			tr->ref++;
603 			return 0;
604 		}
605 	}
606 
607 	return -ENODEV;
608 }
609 
__trace_array_put(struct trace_array * this_tr)610 static void __trace_array_put(struct trace_array *this_tr)
611 {
612 	WARN_ON(!this_tr->ref);
613 	this_tr->ref--;
614 }
615 
616 /**
617  * trace_array_put - Decrement the reference counter for this trace array.
618  * @this_tr : pointer to the trace array
619  *
620  * NOTE: Use this when we no longer need the trace array returned by
621  * trace_array_get_by_name(). This ensures the trace array can be later
622  * destroyed.
623  *
624  */
trace_array_put(struct trace_array * this_tr)625 void trace_array_put(struct trace_array *this_tr)
626 {
627 	if (!this_tr)
628 		return;
629 
630 	guard(mutex)(&trace_types_lock);
631 	__trace_array_put(this_tr);
632 }
633 EXPORT_SYMBOL_GPL(trace_array_put);
634 
tracing_check_open_get_tr(struct trace_array * tr)635 int tracing_check_open_get_tr(struct trace_array *tr)
636 {
637 	int ret;
638 
639 	ret = security_locked_down(LOCKDOWN_TRACEFS);
640 	if (ret)
641 		return ret;
642 
643 	if (tracing_disabled)
644 		return -ENODEV;
645 
646 	if (tr && trace_array_get(tr) < 0)
647 		return -ENODEV;
648 
649 	return 0;
650 }
651 
652 /**
653  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
654  * @filtered_pids: The list of pids to check
655  * @search_pid: The PID to find in @filtered_pids
656  *
657  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
658  */
659 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)660 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
661 {
662 	return trace_pid_list_is_set(filtered_pids, search_pid);
663 }
664 
665 /**
666  * trace_ignore_this_task - should a task be ignored for tracing
667  * @filtered_pids: The list of pids to check
668  * @filtered_no_pids: The list of pids not to be traced
669  * @task: The task that should be ignored if not filtered
670  *
671  * Checks if @task should be traced or not from @filtered_pids.
672  * Returns true if @task should *NOT* be traced.
673  * Returns false if @task should be traced.
674  */
675 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)676 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
677 		       struct trace_pid_list *filtered_no_pids,
678 		       struct task_struct *task)
679 {
680 	/*
681 	 * If filtered_no_pids is not empty, and the task's pid is listed
682 	 * in filtered_no_pids, then return true.
683 	 * Otherwise, if filtered_pids is empty, that means we can
684 	 * trace all tasks. If it has content, then only trace pids
685 	 * within filtered_pids.
686 	 */
687 
688 	return (filtered_pids &&
689 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
690 		(filtered_no_pids &&
691 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
692 }
693 
694 /**
695  * trace_filter_add_remove_task - Add or remove a task from a pid_list
696  * @pid_list: The list to modify
697  * @self: The current task for fork or NULL for exit
698  * @task: The task to add or remove
699  *
700  * If adding a task, if @self is defined, the task is only added if @self
701  * is also included in @pid_list. This happens on fork and tasks should
702  * only be added when the parent is listed. If @self is NULL, then the
703  * @task pid will be removed from the list, which would happen on exit
704  * of a task.
705  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)706 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
707 				  struct task_struct *self,
708 				  struct task_struct *task)
709 {
710 	if (!pid_list)
711 		return;
712 
713 	/* For forks, we only add if the forking task is listed */
714 	if (self) {
715 		if (!trace_find_filtered_pid(pid_list, self->pid))
716 			return;
717 	}
718 
719 	/* "self" is set for forks, and NULL for exits */
720 	if (self)
721 		trace_pid_list_set(pid_list, task->pid);
722 	else
723 		trace_pid_list_clear(pid_list, task->pid);
724 }
725 
726 /**
727  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
728  * @pid_list: The pid list to show
729  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
730  * @pos: The position of the file
731  *
732  * This is used by the seq_file "next" operation to iterate the pids
733  * listed in a trace_pid_list structure.
734  *
735  * Returns the pid+1 as we want to display pid of zero, but NULL would
736  * stop the iteration.
737  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)738 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
739 {
740 	long pid = (unsigned long)v;
741 	unsigned int next;
742 
743 	(*pos)++;
744 
745 	/* pid already is +1 of the actual previous bit */
746 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
747 		return NULL;
748 
749 	pid = next;
750 
751 	/* Return pid + 1 to allow zero to be represented */
752 	return (void *)(pid + 1);
753 }
754 
755 /**
756  * trace_pid_start - Used for seq_file to start reading pid lists
757  * @pid_list: The pid list to show
758  * @pos: The position of the file
759  *
760  * This is used by seq_file "start" operation to start the iteration
761  * of listing pids.
762  *
763  * Returns the pid+1 as we want to display pid of zero, but NULL would
764  * stop the iteration.
765  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)766 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
767 {
768 	unsigned long pid;
769 	unsigned int first;
770 	loff_t l = 0;
771 
772 	if (trace_pid_list_first(pid_list, &first) < 0)
773 		return NULL;
774 
775 	pid = first;
776 
777 	/* Return pid + 1 so that zero can be the exit value */
778 	for (pid++; pid && l < *pos;
779 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
780 		;
781 	return (void *)pid;
782 }
783 
784 /**
785  * trace_pid_show - show the current pid in seq_file processing
786  * @m: The seq_file structure to write into
787  * @v: A void pointer of the pid (+1) value to display
788  *
789  * Can be directly used by seq_file operations to display the current
790  * pid value.
791  */
trace_pid_show(struct seq_file * m,void * v)792 int trace_pid_show(struct seq_file *m, void *v)
793 {
794 	unsigned long pid = (unsigned long)v - 1;
795 
796 	seq_printf(m, "%lu\n", pid);
797 	return 0;
798 }
799 
800 /* 128 should be much more than enough */
801 #define PID_BUF_SIZE		127
802 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)803 int trace_pid_write(struct trace_pid_list *filtered_pids,
804 		    struct trace_pid_list **new_pid_list,
805 		    const char __user *ubuf, size_t cnt)
806 {
807 	struct trace_pid_list *pid_list;
808 	struct trace_parser parser;
809 	unsigned long val;
810 	int nr_pids = 0;
811 	ssize_t read = 0;
812 	ssize_t ret;
813 	loff_t pos;
814 	pid_t pid;
815 
816 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
817 		return -ENOMEM;
818 
819 	/*
820 	 * Always recreate a new array. The write is an all or nothing
821 	 * operation. Always create a new array when adding new pids by
822 	 * the user. If the operation fails, then the current list is
823 	 * not modified.
824 	 */
825 	pid_list = trace_pid_list_alloc();
826 	if (!pid_list) {
827 		trace_parser_put(&parser);
828 		return -ENOMEM;
829 	}
830 
831 	if (filtered_pids) {
832 		/* copy the current bits to the new max */
833 		ret = trace_pid_list_first(filtered_pids, &pid);
834 		while (!ret) {
835 			ret = trace_pid_list_set(pid_list, pid);
836 			if (ret < 0)
837 				goto out;
838 
839 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
840 			nr_pids++;
841 		}
842 	}
843 
844 	ret = 0;
845 	while (cnt > 0) {
846 
847 		pos = 0;
848 
849 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
850 		if (ret < 0)
851 			break;
852 
853 		read += ret;
854 		ubuf += ret;
855 		cnt -= ret;
856 
857 		if (!trace_parser_loaded(&parser))
858 			break;
859 
860 		ret = -EINVAL;
861 		if (kstrtoul(parser.buffer, 0, &val))
862 			break;
863 
864 		pid = (pid_t)val;
865 
866 		if (trace_pid_list_set(pid_list, pid) < 0) {
867 			ret = -1;
868 			break;
869 		}
870 		nr_pids++;
871 
872 		trace_parser_clear(&parser);
873 		ret = 0;
874 	}
875  out:
876 	trace_parser_put(&parser);
877 
878 	if (ret < 0) {
879 		trace_pid_list_free(pid_list);
880 		return ret;
881 	}
882 
883 	if (!nr_pids) {
884 		/* Cleared the list of pids */
885 		trace_pid_list_free(pid_list);
886 		pid_list = NULL;
887 	}
888 
889 	*new_pid_list = pid_list;
890 
891 	return read;
892 }
893 
buffer_ftrace_now(struct array_buffer * buf,int cpu)894 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
895 {
896 	u64 ts;
897 
898 	/* Early boot up does not have a buffer yet */
899 	if (!buf->buffer)
900 		return trace_clock_local();
901 
902 	ts = ring_buffer_time_stamp(buf->buffer);
903 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
904 
905 	return ts;
906 }
907 
ftrace_now(int cpu)908 u64 ftrace_now(int cpu)
909 {
910 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
911 }
912 
913 /**
914  * tracing_is_enabled - Show if global_trace has been enabled
915  *
916  * Shows if the global trace has been enabled or not. It uses the
917  * mirror flag "buffer_disabled" to be used in fast paths such as for
918  * the irqsoff tracer. But it may be inaccurate due to races. If you
919  * need to know the accurate state, use tracing_is_on() which is a little
920  * slower, but accurate.
921  */
tracing_is_enabled(void)922 int tracing_is_enabled(void)
923 {
924 	/*
925 	 * For quick access (irqsoff uses this in fast path), just
926 	 * return the mirror variable of the state of the ring buffer.
927 	 * It's a little racy, but we don't really care.
928 	 */
929 	return !global_trace.buffer_disabled;
930 }
931 
932 /*
933  * trace_buf_size is the size in bytes that is allocated
934  * for a buffer. Note, the number of bytes is always rounded
935  * to page size.
936  *
937  * This number is purposely set to a low number of 16384.
938  * If the dump on oops happens, it will be much appreciated
939  * to not have to wait for all that output. Anyway this can be
940  * boot time and run time configurable.
941  */
942 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
943 
944 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
945 
946 /* trace_types holds a link list of available tracers. */
947 static struct tracer		*trace_types __read_mostly;
948 
949 /*
950  * trace_types_lock is used to protect the trace_types list.
951  */
952 DEFINE_MUTEX(trace_types_lock);
953 
954 /*
955  * serialize the access of the ring buffer
956  *
957  * ring buffer serializes readers, but it is low level protection.
958  * The validity of the events (which returns by ring_buffer_peek() ..etc)
959  * are not protected by ring buffer.
960  *
961  * The content of events may become garbage if we allow other process consumes
962  * these events concurrently:
963  *   A) the page of the consumed events may become a normal page
964  *      (not reader page) in ring buffer, and this page will be rewritten
965  *      by events producer.
966  *   B) The page of the consumed events may become a page for splice_read,
967  *      and this page will be returned to system.
968  *
969  * These primitives allow multi process access to different cpu ring buffer
970  * concurrently.
971  *
972  * These primitives don't distinguish read-only and read-consume access.
973  * Multi read-only access are also serialized.
974  */
975 
976 #ifdef CONFIG_SMP
977 static DECLARE_RWSEM(all_cpu_access_lock);
978 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
979 
trace_access_lock(int cpu)980 static inline void trace_access_lock(int cpu)
981 {
982 	if (cpu == RING_BUFFER_ALL_CPUS) {
983 		/* gain it for accessing the whole ring buffer. */
984 		down_write(&all_cpu_access_lock);
985 	} else {
986 		/* gain it for accessing a cpu ring buffer. */
987 
988 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
989 		down_read(&all_cpu_access_lock);
990 
991 		/* Secondly block other access to this @cpu ring buffer. */
992 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
993 	}
994 }
995 
trace_access_unlock(int cpu)996 static inline void trace_access_unlock(int cpu)
997 {
998 	if (cpu == RING_BUFFER_ALL_CPUS) {
999 		up_write(&all_cpu_access_lock);
1000 	} else {
1001 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1002 		up_read(&all_cpu_access_lock);
1003 	}
1004 }
1005 
trace_access_lock_init(void)1006 static inline void trace_access_lock_init(void)
1007 {
1008 	int cpu;
1009 
1010 	for_each_possible_cpu(cpu)
1011 		mutex_init(&per_cpu(cpu_access_lock, cpu));
1012 }
1013 
1014 #else
1015 
1016 static DEFINE_MUTEX(access_lock);
1017 
trace_access_lock(int cpu)1018 static inline void trace_access_lock(int cpu)
1019 {
1020 	(void)cpu;
1021 	mutex_lock(&access_lock);
1022 }
1023 
trace_access_unlock(int cpu)1024 static inline void trace_access_unlock(int cpu)
1025 {
1026 	(void)cpu;
1027 	mutex_unlock(&access_lock);
1028 }
1029 
trace_access_lock_init(void)1030 static inline void trace_access_lock_init(void)
1031 {
1032 }
1033 
1034 #endif
1035 
1036 #ifdef CONFIG_STACKTRACE
1037 static void __ftrace_trace_stack(struct trace_array *tr,
1038 				 struct trace_buffer *buffer,
1039 				 unsigned int trace_ctx,
1040 				 int skip, struct pt_regs *regs);
1041 static inline void ftrace_trace_stack(struct trace_array *tr,
1042 				      struct trace_buffer *buffer,
1043 				      unsigned int trace_ctx,
1044 				      int skip, struct pt_regs *regs);
1045 
1046 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1047 static inline void __ftrace_trace_stack(struct trace_array *tr,
1048 					struct trace_buffer *buffer,
1049 					unsigned int trace_ctx,
1050 					int skip, struct pt_regs *regs)
1051 {
1052 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1053 static inline void ftrace_trace_stack(struct trace_array *tr,
1054 				      struct trace_buffer *buffer,
1055 				      unsigned long trace_ctx,
1056 				      int skip, struct pt_regs *regs)
1057 {
1058 }
1059 
1060 #endif
1061 
1062 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1063 trace_event_setup(struct ring_buffer_event *event,
1064 		  int type, unsigned int trace_ctx)
1065 {
1066 	struct trace_entry *ent = ring_buffer_event_data(event);
1067 
1068 	tracing_generic_entry_update(ent, type, trace_ctx);
1069 }
1070 
1071 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1072 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1073 			  int type,
1074 			  unsigned long len,
1075 			  unsigned int trace_ctx)
1076 {
1077 	struct ring_buffer_event *event;
1078 
1079 	event = ring_buffer_lock_reserve(buffer, len);
1080 	if (event != NULL)
1081 		trace_event_setup(event, type, trace_ctx);
1082 
1083 	return event;
1084 }
1085 
tracer_tracing_on(struct trace_array * tr)1086 void tracer_tracing_on(struct trace_array *tr)
1087 {
1088 	if (tr->array_buffer.buffer)
1089 		ring_buffer_record_on(tr->array_buffer.buffer);
1090 	/*
1091 	 * This flag is looked at when buffers haven't been allocated
1092 	 * yet, or by some tracers (like irqsoff), that just want to
1093 	 * know if the ring buffer has been disabled, but it can handle
1094 	 * races of where it gets disabled but we still do a record.
1095 	 * As the check is in the fast path of the tracers, it is more
1096 	 * important to be fast than accurate.
1097 	 */
1098 	tr->buffer_disabled = 0;
1099 }
1100 
1101 /**
1102  * tracing_on - enable tracing buffers
1103  *
1104  * This function enables tracing buffers that may have been
1105  * disabled with tracing_off.
1106  */
tracing_on(void)1107 void tracing_on(void)
1108 {
1109 	tracer_tracing_on(&global_trace);
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_on);
1112 
1113 
1114 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1115 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1116 {
1117 	__this_cpu_write(trace_taskinfo_save, true);
1118 
1119 	/* If this is the temp buffer, we need to commit fully */
1120 	if (this_cpu_read(trace_buffered_event) == event) {
1121 		/* Length is in event->array[0] */
1122 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1123 		/* Release the temp buffer */
1124 		this_cpu_dec(trace_buffered_event_cnt);
1125 		/* ring_buffer_unlock_commit() enables preemption */
1126 		preempt_enable_notrace();
1127 	} else
1128 		ring_buffer_unlock_commit(buffer);
1129 }
1130 
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1131 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1132 		       const char *str, int size)
1133 {
1134 	struct ring_buffer_event *event;
1135 	struct trace_buffer *buffer;
1136 	struct print_entry *entry;
1137 	unsigned int trace_ctx;
1138 	int alloc;
1139 
1140 	if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
1141 		return 0;
1142 
1143 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1144 		return 0;
1145 
1146 	if (unlikely(tracing_disabled))
1147 		return 0;
1148 
1149 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1150 
1151 	trace_ctx = tracing_gen_ctx();
1152 	buffer = tr->array_buffer.buffer;
1153 	guard(ring_buffer_nest)(buffer);
1154 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1155 					    trace_ctx);
1156 	if (!event)
1157 		return 0;
1158 
1159 	entry = ring_buffer_event_data(event);
1160 	entry->ip = ip;
1161 
1162 	memcpy(&entry->buf, str, size);
1163 
1164 	/* Add a newline if necessary */
1165 	if (entry->buf[size - 1] != '\n') {
1166 		entry->buf[size] = '\n';
1167 		entry->buf[size + 1] = '\0';
1168 	} else
1169 		entry->buf[size] = '\0';
1170 
1171 	__buffer_unlock_commit(buffer, event);
1172 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1173 	return size;
1174 }
1175 EXPORT_SYMBOL_GPL(__trace_array_puts);
1176 
1177 /**
1178  * __trace_puts - write a constant string into the trace buffer.
1179  * @ip:	   The address of the caller
1180  * @str:   The constant string to write
1181  * @size:  The size of the string.
1182  */
__trace_puts(unsigned long ip,const char * str,int size)1183 int __trace_puts(unsigned long ip, const char *str, int size)
1184 {
1185 	return __trace_array_puts(printk_trace, ip, str, size);
1186 }
1187 EXPORT_SYMBOL_GPL(__trace_puts);
1188 
1189 /**
1190  * __trace_bputs - write the pointer to a constant string into trace buffer
1191  * @ip:	   The address of the caller
1192  * @str:   The constant string to write to the buffer to
1193  */
__trace_bputs(unsigned long ip,const char * str)1194 int __trace_bputs(unsigned long ip, const char *str)
1195 {
1196 	struct trace_array *tr = READ_ONCE(printk_trace);
1197 	struct ring_buffer_event *event;
1198 	struct trace_buffer *buffer;
1199 	struct bputs_entry *entry;
1200 	unsigned int trace_ctx;
1201 	int size = sizeof(struct bputs_entry);
1202 
1203 	if (!printk_binsafe(tr))
1204 		return __trace_puts(ip, str, strlen(str));
1205 
1206 	if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
1207 		return 0;
1208 
1209 	if (unlikely(tracing_selftest_running || tracing_disabled))
1210 		return 0;
1211 
1212 	trace_ctx = tracing_gen_ctx();
1213 	buffer = tr->array_buffer.buffer;
1214 
1215 	guard(ring_buffer_nest)(buffer);
1216 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1217 					    trace_ctx);
1218 	if (!event)
1219 		return 0;
1220 
1221 	entry = ring_buffer_event_data(event);
1222 	entry->ip			= ip;
1223 	entry->str			= str;
1224 
1225 	__buffer_unlock_commit(buffer, event);
1226 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1227 
1228 	return 1;
1229 }
1230 EXPORT_SYMBOL_GPL(__trace_bputs);
1231 
1232 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1233 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1234 					   void *cond_data)
1235 {
1236 	struct tracer *tracer = tr->current_trace;
1237 	unsigned long flags;
1238 
1239 	if (in_nmi()) {
1240 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1241 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1242 		return;
1243 	}
1244 
1245 	if (!tr->allocated_snapshot) {
1246 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1247 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1248 		tracer_tracing_off(tr);
1249 		return;
1250 	}
1251 
1252 	/* Note, snapshot can not be used when the tracer uses it */
1253 	if (tracer->use_max_tr) {
1254 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1255 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1256 		return;
1257 	}
1258 
1259 	if (tr->mapped) {
1260 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1261 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1262 		return;
1263 	}
1264 
1265 	local_irq_save(flags);
1266 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1267 	local_irq_restore(flags);
1268 }
1269 
tracing_snapshot_instance(struct trace_array * tr)1270 void tracing_snapshot_instance(struct trace_array *tr)
1271 {
1272 	tracing_snapshot_instance_cond(tr, NULL);
1273 }
1274 
1275 /**
1276  * tracing_snapshot - take a snapshot of the current buffer.
1277  *
1278  * This causes a swap between the snapshot buffer and the current live
1279  * tracing buffer. You can use this to take snapshots of the live
1280  * trace when some condition is triggered, but continue to trace.
1281  *
1282  * Note, make sure to allocate the snapshot with either
1283  * a tracing_snapshot_alloc(), or by doing it manually
1284  * with: echo 1 > /sys/kernel/tracing/snapshot
1285  *
1286  * If the snapshot buffer is not allocated, it will stop tracing.
1287  * Basically making a permanent snapshot.
1288  */
tracing_snapshot(void)1289 void tracing_snapshot(void)
1290 {
1291 	struct trace_array *tr = &global_trace;
1292 
1293 	tracing_snapshot_instance(tr);
1294 }
1295 EXPORT_SYMBOL_GPL(tracing_snapshot);
1296 
1297 /**
1298  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1299  * @tr:		The tracing instance to snapshot
1300  * @cond_data:	The data to be tested conditionally, and possibly saved
1301  *
1302  * This is the same as tracing_snapshot() except that the snapshot is
1303  * conditional - the snapshot will only happen if the
1304  * cond_snapshot.update() implementation receiving the cond_data
1305  * returns true, which means that the trace array's cond_snapshot
1306  * update() operation used the cond_data to determine whether the
1307  * snapshot should be taken, and if it was, presumably saved it along
1308  * with the snapshot.
1309  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1310 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1311 {
1312 	tracing_snapshot_instance_cond(tr, cond_data);
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1315 
1316 /**
1317  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1318  * @tr:		The tracing instance
1319  *
1320  * When the user enables a conditional snapshot using
1321  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1322  * with the snapshot.  This accessor is used to retrieve it.
1323  *
1324  * Should not be called from cond_snapshot.update(), since it takes
1325  * the tr->max_lock lock, which the code calling
1326  * cond_snapshot.update() has already done.
1327  *
1328  * Returns the cond_data associated with the trace array's snapshot.
1329  */
tracing_cond_snapshot_data(struct trace_array * tr)1330 void *tracing_cond_snapshot_data(struct trace_array *tr)
1331 {
1332 	void *cond_data = NULL;
1333 
1334 	local_irq_disable();
1335 	arch_spin_lock(&tr->max_lock);
1336 
1337 	if (tr->cond_snapshot)
1338 		cond_data = tr->cond_snapshot->cond_data;
1339 
1340 	arch_spin_unlock(&tr->max_lock);
1341 	local_irq_enable();
1342 
1343 	return cond_data;
1344 }
1345 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1346 
1347 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1348 					struct array_buffer *size_buf, int cpu_id);
1349 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1350 
tracing_alloc_snapshot_instance(struct trace_array * tr)1351 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1352 {
1353 	int order;
1354 	int ret;
1355 
1356 	if (!tr->allocated_snapshot) {
1357 
1358 		/* Make the snapshot buffer have the same order as main buffer */
1359 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1360 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1361 		if (ret < 0)
1362 			return ret;
1363 
1364 		/* allocate spare buffer */
1365 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1366 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1367 		if (ret < 0)
1368 			return ret;
1369 
1370 		tr->allocated_snapshot = true;
1371 	}
1372 
1373 	return 0;
1374 }
1375 
free_snapshot(struct trace_array * tr)1376 static void free_snapshot(struct trace_array *tr)
1377 {
1378 	/*
1379 	 * We don't free the ring buffer. instead, resize it because
1380 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1381 	 * we want preserve it.
1382 	 */
1383 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1384 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1385 	set_buffer_entries(&tr->max_buffer, 1);
1386 	tracing_reset_online_cpus(&tr->max_buffer);
1387 	tr->allocated_snapshot = false;
1388 }
1389 
tracing_arm_snapshot_locked(struct trace_array * tr)1390 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1391 {
1392 	int ret;
1393 
1394 	lockdep_assert_held(&trace_types_lock);
1395 
1396 	spin_lock(&tr->snapshot_trigger_lock);
1397 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1398 		spin_unlock(&tr->snapshot_trigger_lock);
1399 		return -EBUSY;
1400 	}
1401 
1402 	tr->snapshot++;
1403 	spin_unlock(&tr->snapshot_trigger_lock);
1404 
1405 	ret = tracing_alloc_snapshot_instance(tr);
1406 	if (ret) {
1407 		spin_lock(&tr->snapshot_trigger_lock);
1408 		tr->snapshot--;
1409 		spin_unlock(&tr->snapshot_trigger_lock);
1410 	}
1411 
1412 	return ret;
1413 }
1414 
tracing_arm_snapshot(struct trace_array * tr)1415 int tracing_arm_snapshot(struct trace_array *tr)
1416 {
1417 	guard(mutex)(&trace_types_lock);
1418 	return tracing_arm_snapshot_locked(tr);
1419 }
1420 
tracing_disarm_snapshot(struct trace_array * tr)1421 void tracing_disarm_snapshot(struct trace_array *tr)
1422 {
1423 	spin_lock(&tr->snapshot_trigger_lock);
1424 	if (!WARN_ON(!tr->snapshot))
1425 		tr->snapshot--;
1426 	spin_unlock(&tr->snapshot_trigger_lock);
1427 }
1428 
1429 /**
1430  * tracing_alloc_snapshot - allocate snapshot buffer.
1431  *
1432  * This only allocates the snapshot buffer if it isn't already
1433  * allocated - it doesn't also take a snapshot.
1434  *
1435  * This is meant to be used in cases where the snapshot buffer needs
1436  * to be set up for events that can't sleep but need to be able to
1437  * trigger a snapshot.
1438  */
tracing_alloc_snapshot(void)1439 int tracing_alloc_snapshot(void)
1440 {
1441 	struct trace_array *tr = &global_trace;
1442 	int ret;
1443 
1444 	ret = tracing_alloc_snapshot_instance(tr);
1445 	WARN_ON(ret < 0);
1446 
1447 	return ret;
1448 }
1449 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1450 
1451 /**
1452  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1453  *
1454  * This is similar to tracing_snapshot(), but it will allocate the
1455  * snapshot buffer if it isn't already allocated. Use this only
1456  * where it is safe to sleep, as the allocation may sleep.
1457  *
1458  * This causes a swap between the snapshot buffer and the current live
1459  * tracing buffer. You can use this to take snapshots of the live
1460  * trace when some condition is triggered, but continue to trace.
1461  */
tracing_snapshot_alloc(void)1462 void tracing_snapshot_alloc(void)
1463 {
1464 	int ret;
1465 
1466 	ret = tracing_alloc_snapshot();
1467 	if (ret < 0)
1468 		return;
1469 
1470 	tracing_snapshot();
1471 }
1472 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1473 
1474 /**
1475  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1476  * @tr:		The tracing instance
1477  * @cond_data:	User data to associate with the snapshot
1478  * @update:	Implementation of the cond_snapshot update function
1479  *
1480  * Check whether the conditional snapshot for the given instance has
1481  * already been enabled, or if the current tracer is already using a
1482  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1483  * save the cond_data and update function inside.
1484  *
1485  * Returns 0 if successful, error otherwise.
1486  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1487 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1488 				 cond_update_fn_t update)
1489 {
1490 	struct cond_snapshot *cond_snapshot __free(kfree) =
1491 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1492 	int ret;
1493 
1494 	if (!cond_snapshot)
1495 		return -ENOMEM;
1496 
1497 	cond_snapshot->cond_data = cond_data;
1498 	cond_snapshot->update = update;
1499 
1500 	guard(mutex)(&trace_types_lock);
1501 
1502 	if (tr->current_trace->use_max_tr)
1503 		return -EBUSY;
1504 
1505 	/*
1506 	 * The cond_snapshot can only change to NULL without the
1507 	 * trace_types_lock. We don't care if we race with it going
1508 	 * to NULL, but we want to make sure that it's not set to
1509 	 * something other than NULL when we get here, which we can
1510 	 * do safely with only holding the trace_types_lock and not
1511 	 * having to take the max_lock.
1512 	 */
1513 	if (tr->cond_snapshot)
1514 		return -EBUSY;
1515 
1516 	ret = tracing_arm_snapshot_locked(tr);
1517 	if (ret)
1518 		return ret;
1519 
1520 	local_irq_disable();
1521 	arch_spin_lock(&tr->max_lock);
1522 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1523 	arch_spin_unlock(&tr->max_lock);
1524 	local_irq_enable();
1525 
1526 	return 0;
1527 }
1528 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1529 
1530 /**
1531  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1532  * @tr:		The tracing instance
1533  *
1534  * Check whether the conditional snapshot for the given instance is
1535  * enabled; if so, free the cond_snapshot associated with it,
1536  * otherwise return -EINVAL.
1537  *
1538  * Returns 0 if successful, error otherwise.
1539  */
tracing_snapshot_cond_disable(struct trace_array * tr)1540 int tracing_snapshot_cond_disable(struct trace_array *tr)
1541 {
1542 	int ret = 0;
1543 
1544 	local_irq_disable();
1545 	arch_spin_lock(&tr->max_lock);
1546 
1547 	if (!tr->cond_snapshot)
1548 		ret = -EINVAL;
1549 	else {
1550 		kfree(tr->cond_snapshot);
1551 		tr->cond_snapshot = NULL;
1552 	}
1553 
1554 	arch_spin_unlock(&tr->max_lock);
1555 	local_irq_enable();
1556 
1557 	tracing_disarm_snapshot(tr);
1558 
1559 	return ret;
1560 }
1561 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1562 #else
tracing_snapshot(void)1563 void tracing_snapshot(void)
1564 {
1565 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1566 }
1567 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1568 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1569 {
1570 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1571 }
1572 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1573 int tracing_alloc_snapshot(void)
1574 {
1575 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1576 	return -ENODEV;
1577 }
1578 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1579 void tracing_snapshot_alloc(void)
1580 {
1581 	/* Give warning */
1582 	tracing_snapshot();
1583 }
1584 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1585 void *tracing_cond_snapshot_data(struct trace_array *tr)
1586 {
1587 	return NULL;
1588 }
1589 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1590 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1591 {
1592 	return -ENODEV;
1593 }
1594 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1595 int tracing_snapshot_cond_disable(struct trace_array *tr)
1596 {
1597 	return false;
1598 }
1599 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1600 #define free_snapshot(tr)	do { } while (0)
1601 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1602 #endif /* CONFIG_TRACER_SNAPSHOT */
1603 
tracer_tracing_off(struct trace_array * tr)1604 void tracer_tracing_off(struct trace_array *tr)
1605 {
1606 	if (tr->array_buffer.buffer)
1607 		ring_buffer_record_off(tr->array_buffer.buffer);
1608 	/*
1609 	 * This flag is looked at when buffers haven't been allocated
1610 	 * yet, or by some tracers (like irqsoff), that just want to
1611 	 * know if the ring buffer has been disabled, but it can handle
1612 	 * races of where it gets disabled but we still do a record.
1613 	 * As the check is in the fast path of the tracers, it is more
1614 	 * important to be fast than accurate.
1615 	 */
1616 	tr->buffer_disabled = 1;
1617 }
1618 
1619 /**
1620  * tracer_tracing_disable() - temporary disable the buffer from write
1621  * @tr: The trace array to disable its buffer for
1622  *
1623  * Expects trace_tracing_enable() to re-enable tracing.
1624  * The difference between this and tracer_tracing_off() is that this
1625  * is a counter and can nest, whereas, tracer_tracing_off() can
1626  * be called multiple times and a single trace_tracing_on() will
1627  * enable it.
1628  */
tracer_tracing_disable(struct trace_array * tr)1629 void tracer_tracing_disable(struct trace_array *tr)
1630 {
1631 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1632 		return;
1633 
1634 	ring_buffer_record_disable(tr->array_buffer.buffer);
1635 }
1636 
1637 /**
1638  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1639  * @tr: The trace array that had tracer_tracincg_disable() called on it
1640  *
1641  * This is called after tracer_tracing_disable() has been called on @tr,
1642  * when it's safe to re-enable tracing.
1643  */
tracer_tracing_enable(struct trace_array * tr)1644 void tracer_tracing_enable(struct trace_array *tr)
1645 {
1646 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1647 		return;
1648 
1649 	ring_buffer_record_enable(tr->array_buffer.buffer);
1650 }
1651 
1652 /**
1653  * tracing_off - turn off tracing buffers
1654  *
1655  * This function stops the tracing buffers from recording data.
1656  * It does not disable any overhead the tracers themselves may
1657  * be causing. This function simply causes all recording to
1658  * the ring buffers to fail.
1659  */
tracing_off(void)1660 void tracing_off(void)
1661 {
1662 	tracer_tracing_off(&global_trace);
1663 }
1664 EXPORT_SYMBOL_GPL(tracing_off);
1665 
disable_trace_on_warning(void)1666 void disable_trace_on_warning(void)
1667 {
1668 	if (__disable_trace_on_warning) {
1669 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1670 			"Disabling tracing due to warning\n");
1671 		tracing_off();
1672 	}
1673 }
1674 
1675 /**
1676  * tracer_tracing_is_on - show real state of ring buffer enabled
1677  * @tr : the trace array to know if ring buffer is enabled
1678  *
1679  * Shows real state of the ring buffer if it is enabled or not.
1680  */
tracer_tracing_is_on(struct trace_array * tr)1681 bool tracer_tracing_is_on(struct trace_array *tr)
1682 {
1683 	if (tr->array_buffer.buffer)
1684 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1685 	return !tr->buffer_disabled;
1686 }
1687 
1688 /**
1689  * tracing_is_on - show state of ring buffers enabled
1690  */
tracing_is_on(void)1691 int tracing_is_on(void)
1692 {
1693 	return tracer_tracing_is_on(&global_trace);
1694 }
1695 EXPORT_SYMBOL_GPL(tracing_is_on);
1696 
set_buf_size(char * str)1697 static int __init set_buf_size(char *str)
1698 {
1699 	unsigned long buf_size;
1700 
1701 	if (!str)
1702 		return 0;
1703 	buf_size = memparse(str, &str);
1704 	/*
1705 	 * nr_entries can not be zero and the startup
1706 	 * tests require some buffer space. Therefore
1707 	 * ensure we have at least 4096 bytes of buffer.
1708 	 */
1709 	trace_buf_size = max(4096UL, buf_size);
1710 	return 1;
1711 }
1712 __setup("trace_buf_size=", set_buf_size);
1713 
set_tracing_thresh(char * str)1714 static int __init set_tracing_thresh(char *str)
1715 {
1716 	unsigned long threshold;
1717 	int ret;
1718 
1719 	if (!str)
1720 		return 0;
1721 	ret = kstrtoul(str, 0, &threshold);
1722 	if (ret < 0)
1723 		return 0;
1724 	tracing_thresh = threshold * 1000;
1725 	return 1;
1726 }
1727 __setup("tracing_thresh=", set_tracing_thresh);
1728 
nsecs_to_usecs(unsigned long nsecs)1729 unsigned long nsecs_to_usecs(unsigned long nsecs)
1730 {
1731 	return nsecs / 1000;
1732 }
1733 
1734 /*
1735  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1736  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1737  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1738  * of strings in the order that the evals (enum) were defined.
1739  */
1740 #undef C
1741 #define C(a, b) b
1742 
1743 /* These must match the bit positions in trace_iterator_flags */
1744 static const char *trace_options[] = {
1745 	TRACE_FLAGS
1746 	NULL
1747 };
1748 
1749 static struct {
1750 	u64 (*func)(void);
1751 	const char *name;
1752 	int in_ns;		/* is this clock in nanoseconds? */
1753 } trace_clocks[] = {
1754 	{ trace_clock_local,		"local",	1 },
1755 	{ trace_clock_global,		"global",	1 },
1756 	{ trace_clock_counter,		"counter",	0 },
1757 	{ trace_clock_jiffies,		"uptime",	0 },
1758 	{ trace_clock,			"perf",		1 },
1759 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1760 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1761 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1762 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1763 	ARCH_TRACE_CLOCKS
1764 };
1765 
trace_clock_in_ns(struct trace_array * tr)1766 bool trace_clock_in_ns(struct trace_array *tr)
1767 {
1768 	if (trace_clocks[tr->clock_id].in_ns)
1769 		return true;
1770 
1771 	return false;
1772 }
1773 
1774 /*
1775  * trace_parser_get_init - gets the buffer for trace parser
1776  */
trace_parser_get_init(struct trace_parser * parser,int size)1777 int trace_parser_get_init(struct trace_parser *parser, int size)
1778 {
1779 	memset(parser, 0, sizeof(*parser));
1780 
1781 	parser->buffer = kmalloc(size, GFP_KERNEL);
1782 	if (!parser->buffer)
1783 		return 1;
1784 
1785 	parser->size = size;
1786 	return 0;
1787 }
1788 
1789 /*
1790  * trace_parser_put - frees the buffer for trace parser
1791  */
trace_parser_put(struct trace_parser * parser)1792 void trace_parser_put(struct trace_parser *parser)
1793 {
1794 	kfree(parser->buffer);
1795 	parser->buffer = NULL;
1796 }
1797 
1798 /*
1799  * trace_get_user - reads the user input string separated by  space
1800  * (matched by isspace(ch))
1801  *
1802  * For each string found the 'struct trace_parser' is updated,
1803  * and the function returns.
1804  *
1805  * Returns number of bytes read.
1806  *
1807  * See kernel/trace/trace.h for 'struct trace_parser' details.
1808  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1809 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1810 	size_t cnt, loff_t *ppos)
1811 {
1812 	char ch;
1813 	size_t read = 0;
1814 	ssize_t ret;
1815 
1816 	if (!*ppos)
1817 		trace_parser_clear(parser);
1818 
1819 	ret = get_user(ch, ubuf++);
1820 	if (ret)
1821 		goto fail;
1822 
1823 	read++;
1824 	cnt--;
1825 
1826 	/*
1827 	 * The parser is not finished with the last write,
1828 	 * continue reading the user input without skipping spaces.
1829 	 */
1830 	if (!parser->cont) {
1831 		/* skip white space */
1832 		while (cnt && isspace(ch)) {
1833 			ret = get_user(ch, ubuf++);
1834 			if (ret)
1835 				goto fail;
1836 			read++;
1837 			cnt--;
1838 		}
1839 
1840 		parser->idx = 0;
1841 
1842 		/* only spaces were written */
1843 		if (isspace(ch) || !ch) {
1844 			*ppos += read;
1845 			return read;
1846 		}
1847 	}
1848 
1849 	/* read the non-space input */
1850 	while (cnt && !isspace(ch) && ch) {
1851 		if (parser->idx < parser->size - 1)
1852 			parser->buffer[parser->idx++] = ch;
1853 		else {
1854 			ret = -EINVAL;
1855 			goto fail;
1856 		}
1857 
1858 		ret = get_user(ch, ubuf++);
1859 		if (ret)
1860 			goto fail;
1861 		read++;
1862 		cnt--;
1863 	}
1864 
1865 	/* We either got finished input or we have to wait for another call. */
1866 	if (isspace(ch) || !ch) {
1867 		parser->buffer[parser->idx] = 0;
1868 		parser->cont = false;
1869 	} else if (parser->idx < parser->size - 1) {
1870 		parser->cont = true;
1871 		parser->buffer[parser->idx++] = ch;
1872 		/* Make sure the parsed string always terminates with '\0'. */
1873 		parser->buffer[parser->idx] = 0;
1874 	} else {
1875 		ret = -EINVAL;
1876 		goto fail;
1877 	}
1878 
1879 	*ppos += read;
1880 	return read;
1881 fail:
1882 	trace_parser_fail(parser);
1883 	return ret;
1884 }
1885 
1886 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1887 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1888 {
1889 	int len;
1890 
1891 	if (trace_seq_used(s) <= s->readpos)
1892 		return -EBUSY;
1893 
1894 	len = trace_seq_used(s) - s->readpos;
1895 	if (cnt > len)
1896 		cnt = len;
1897 	memcpy(buf, s->buffer + s->readpos, cnt);
1898 
1899 	s->readpos += cnt;
1900 	return cnt;
1901 }
1902 
1903 unsigned long __read_mostly	tracing_thresh;
1904 
1905 #ifdef CONFIG_TRACER_MAX_TRACE
1906 static const struct file_operations tracing_max_lat_fops;
1907 
1908 #ifdef LATENCY_FS_NOTIFY
1909 
1910 static struct workqueue_struct *fsnotify_wq;
1911 
latency_fsnotify_workfn(struct work_struct * work)1912 static void latency_fsnotify_workfn(struct work_struct *work)
1913 {
1914 	struct trace_array *tr = container_of(work, struct trace_array,
1915 					      fsnotify_work);
1916 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1917 }
1918 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1919 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1920 {
1921 	struct trace_array *tr = container_of(iwork, struct trace_array,
1922 					      fsnotify_irqwork);
1923 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1924 }
1925 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1926 static void trace_create_maxlat_file(struct trace_array *tr,
1927 				     struct dentry *d_tracer)
1928 {
1929 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1930 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1931 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1932 					      TRACE_MODE_WRITE,
1933 					      d_tracer, tr,
1934 					      &tracing_max_lat_fops);
1935 }
1936 
latency_fsnotify_init(void)1937 __init static int latency_fsnotify_init(void)
1938 {
1939 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1940 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1941 	if (!fsnotify_wq) {
1942 		pr_err("Unable to allocate tr_max_lat_wq\n");
1943 		return -ENOMEM;
1944 	}
1945 	return 0;
1946 }
1947 
1948 late_initcall_sync(latency_fsnotify_init);
1949 
latency_fsnotify(struct trace_array * tr)1950 void latency_fsnotify(struct trace_array *tr)
1951 {
1952 	if (!fsnotify_wq)
1953 		return;
1954 	/*
1955 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1956 	 * possible that we are called from __schedule() or do_idle(), which
1957 	 * could cause a deadlock.
1958 	 */
1959 	irq_work_queue(&tr->fsnotify_irqwork);
1960 }
1961 
1962 #else /* !LATENCY_FS_NOTIFY */
1963 
1964 #define trace_create_maxlat_file(tr, d_tracer)				\
1965 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1966 			  d_tracer, tr, &tracing_max_lat_fops)
1967 
1968 #endif
1969 
1970 /*
1971  * Copy the new maximum trace into the separate maximum-trace
1972  * structure. (this way the maximum trace is permanently saved,
1973  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1974  */
1975 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1976 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1977 {
1978 	struct array_buffer *trace_buf = &tr->array_buffer;
1979 	struct array_buffer *max_buf = &tr->max_buffer;
1980 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1981 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1982 
1983 	max_buf->cpu = cpu;
1984 	max_buf->time_start = data->preempt_timestamp;
1985 
1986 	max_data->saved_latency = tr->max_latency;
1987 	max_data->critical_start = data->critical_start;
1988 	max_data->critical_end = data->critical_end;
1989 
1990 	strscpy(max_data->comm, tsk->comm);
1991 	max_data->pid = tsk->pid;
1992 	/*
1993 	 * If tsk == current, then use current_uid(), as that does not use
1994 	 * RCU. The irq tracer can be called out of RCU scope.
1995 	 */
1996 	if (tsk == current)
1997 		max_data->uid = current_uid();
1998 	else
1999 		max_data->uid = task_uid(tsk);
2000 
2001 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2002 	max_data->policy = tsk->policy;
2003 	max_data->rt_priority = tsk->rt_priority;
2004 
2005 	/* record this tasks comm */
2006 	tracing_record_cmdline(tsk);
2007 	latency_fsnotify(tr);
2008 }
2009 
2010 /**
2011  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2012  * @tr: tracer
2013  * @tsk: the task with the latency
2014  * @cpu: The cpu that initiated the trace.
2015  * @cond_data: User data associated with a conditional snapshot
2016  *
2017  * Flip the buffers between the @tr and the max_tr and record information
2018  * about which task was the cause of this latency.
2019  */
2020 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)2021 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2022 	      void *cond_data)
2023 {
2024 	if (tr->stop_count)
2025 		return;
2026 
2027 	WARN_ON_ONCE(!irqs_disabled());
2028 
2029 	if (!tr->allocated_snapshot) {
2030 		/* Only the nop tracer should hit this when disabling */
2031 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2032 		return;
2033 	}
2034 
2035 	arch_spin_lock(&tr->max_lock);
2036 
2037 	/* Inherit the recordable setting from array_buffer */
2038 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2039 		ring_buffer_record_on(tr->max_buffer.buffer);
2040 	else
2041 		ring_buffer_record_off(tr->max_buffer.buffer);
2042 
2043 #ifdef CONFIG_TRACER_SNAPSHOT
2044 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2045 		arch_spin_unlock(&tr->max_lock);
2046 		return;
2047 	}
2048 #endif
2049 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2050 
2051 	__update_max_tr(tr, tsk, cpu);
2052 
2053 	arch_spin_unlock(&tr->max_lock);
2054 
2055 	/* Any waiters on the old snapshot buffer need to wake up */
2056 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2057 }
2058 
2059 /**
2060  * update_max_tr_single - only copy one trace over, and reset the rest
2061  * @tr: tracer
2062  * @tsk: task with the latency
2063  * @cpu: the cpu of the buffer to copy.
2064  *
2065  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2066  */
2067 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2068 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2069 {
2070 	int ret;
2071 
2072 	if (tr->stop_count)
2073 		return;
2074 
2075 	WARN_ON_ONCE(!irqs_disabled());
2076 	if (!tr->allocated_snapshot) {
2077 		/* Only the nop tracer should hit this when disabling */
2078 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2079 		return;
2080 	}
2081 
2082 	arch_spin_lock(&tr->max_lock);
2083 
2084 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2085 
2086 	if (ret == -EBUSY) {
2087 		/*
2088 		 * We failed to swap the buffer due to a commit taking
2089 		 * place on this CPU. We fail to record, but we reset
2090 		 * the max trace buffer (no one writes directly to it)
2091 		 * and flag that it failed.
2092 		 * Another reason is resize is in progress.
2093 		 */
2094 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2095 			"Failed to swap buffers due to commit or resize in progress\n");
2096 	}
2097 
2098 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2099 
2100 	__update_max_tr(tr, tsk, cpu);
2101 	arch_spin_unlock(&tr->max_lock);
2102 }
2103 
2104 #endif /* CONFIG_TRACER_MAX_TRACE */
2105 
2106 struct pipe_wait {
2107 	struct trace_iterator		*iter;
2108 	int				wait_index;
2109 };
2110 
wait_pipe_cond(void * data)2111 static bool wait_pipe_cond(void *data)
2112 {
2113 	struct pipe_wait *pwait = data;
2114 	struct trace_iterator *iter = pwait->iter;
2115 
2116 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2117 		return true;
2118 
2119 	return iter->closed;
2120 }
2121 
wait_on_pipe(struct trace_iterator * iter,int full)2122 static int wait_on_pipe(struct trace_iterator *iter, int full)
2123 {
2124 	struct pipe_wait pwait;
2125 	int ret;
2126 
2127 	/* Iterators are static, they should be filled or empty */
2128 	if (trace_buffer_iter(iter, iter->cpu_file))
2129 		return 0;
2130 
2131 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2132 	pwait.iter = iter;
2133 
2134 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2135 			       wait_pipe_cond, &pwait);
2136 
2137 #ifdef CONFIG_TRACER_MAX_TRACE
2138 	/*
2139 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2140 	 * to happen, this would now be the main buffer.
2141 	 */
2142 	if (iter->snapshot)
2143 		iter->array_buffer = &iter->tr->max_buffer;
2144 #endif
2145 	return ret;
2146 }
2147 
2148 #ifdef CONFIG_FTRACE_STARTUP_TEST
2149 static bool selftests_can_run;
2150 
2151 struct trace_selftests {
2152 	struct list_head		list;
2153 	struct tracer			*type;
2154 };
2155 
2156 static LIST_HEAD(postponed_selftests);
2157 
save_selftest(struct tracer * type)2158 static int save_selftest(struct tracer *type)
2159 {
2160 	struct trace_selftests *selftest;
2161 
2162 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2163 	if (!selftest)
2164 		return -ENOMEM;
2165 
2166 	selftest->type = type;
2167 	list_add(&selftest->list, &postponed_selftests);
2168 	return 0;
2169 }
2170 
run_tracer_selftest(struct tracer * type)2171 static int run_tracer_selftest(struct tracer *type)
2172 {
2173 	struct trace_array *tr = &global_trace;
2174 	struct tracer_flags *saved_flags = tr->current_trace_flags;
2175 	struct tracer *saved_tracer = tr->current_trace;
2176 	int ret;
2177 
2178 	if (!type->selftest || tracing_selftest_disabled)
2179 		return 0;
2180 
2181 	/*
2182 	 * If a tracer registers early in boot up (before scheduling is
2183 	 * initialized and such), then do not run its selftests yet.
2184 	 * Instead, run it a little later in the boot process.
2185 	 */
2186 	if (!selftests_can_run)
2187 		return save_selftest(type);
2188 
2189 	if (!tracing_is_on()) {
2190 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2191 			type->name);
2192 		return 0;
2193 	}
2194 
2195 	/*
2196 	 * Run a selftest on this tracer.
2197 	 * Here we reset the trace buffer, and set the current
2198 	 * tracer to be this tracer. The tracer can then run some
2199 	 * internal tracing to verify that everything is in order.
2200 	 * If we fail, we do not register this tracer.
2201 	 */
2202 	tracing_reset_online_cpus(&tr->array_buffer);
2203 
2204 	tr->current_trace = type;
2205 	tr->current_trace_flags = type->flags ? : type->default_flags;
2206 
2207 #ifdef CONFIG_TRACER_MAX_TRACE
2208 	if (type->use_max_tr) {
2209 		/* If we expanded the buffers, make sure the max is expanded too */
2210 		if (tr->ring_buffer_expanded)
2211 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2212 					   RING_BUFFER_ALL_CPUS);
2213 		tr->allocated_snapshot = true;
2214 	}
2215 #endif
2216 
2217 	/* the test is responsible for initializing and enabling */
2218 	pr_info("Testing tracer %s: ", type->name);
2219 	ret = type->selftest(type, tr);
2220 	/* the test is responsible for resetting too */
2221 	tr->current_trace = saved_tracer;
2222 	tr->current_trace_flags = saved_flags;
2223 	if (ret) {
2224 		printk(KERN_CONT "FAILED!\n");
2225 		/* Add the warning after printing 'FAILED' */
2226 		WARN_ON(1);
2227 		return -1;
2228 	}
2229 	/* Only reset on passing, to avoid touching corrupted buffers */
2230 	tracing_reset_online_cpus(&tr->array_buffer);
2231 
2232 #ifdef CONFIG_TRACER_MAX_TRACE
2233 	if (type->use_max_tr) {
2234 		tr->allocated_snapshot = false;
2235 
2236 		/* Shrink the max buffer again */
2237 		if (tr->ring_buffer_expanded)
2238 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2239 					   RING_BUFFER_ALL_CPUS);
2240 	}
2241 #endif
2242 
2243 	printk(KERN_CONT "PASSED\n");
2244 	return 0;
2245 }
2246 
do_run_tracer_selftest(struct tracer * type)2247 static int do_run_tracer_selftest(struct tracer *type)
2248 {
2249 	int ret;
2250 
2251 	/*
2252 	 * Tests can take a long time, especially if they are run one after the
2253 	 * other, as does happen during bootup when all the tracers are
2254 	 * registered. This could cause the soft lockup watchdog to trigger.
2255 	 */
2256 	cond_resched();
2257 
2258 	tracing_selftest_running = true;
2259 	ret = run_tracer_selftest(type);
2260 	tracing_selftest_running = false;
2261 
2262 	return ret;
2263 }
2264 
init_trace_selftests(void)2265 static __init int init_trace_selftests(void)
2266 {
2267 	struct trace_selftests *p, *n;
2268 	struct tracer *t, **last;
2269 	int ret;
2270 
2271 	selftests_can_run = true;
2272 
2273 	guard(mutex)(&trace_types_lock);
2274 
2275 	if (list_empty(&postponed_selftests))
2276 		return 0;
2277 
2278 	pr_info("Running postponed tracer tests:\n");
2279 
2280 	tracing_selftest_running = true;
2281 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2282 		/* This loop can take minutes when sanitizers are enabled, so
2283 		 * lets make sure we allow RCU processing.
2284 		 */
2285 		cond_resched();
2286 		ret = run_tracer_selftest(p->type);
2287 		/* If the test fails, then warn and remove from available_tracers */
2288 		if (ret < 0) {
2289 			WARN(1, "tracer: %s failed selftest, disabling\n",
2290 			     p->type->name);
2291 			last = &trace_types;
2292 			for (t = trace_types; t; t = t->next) {
2293 				if (t == p->type) {
2294 					*last = t->next;
2295 					break;
2296 				}
2297 				last = &t->next;
2298 			}
2299 		}
2300 		list_del(&p->list);
2301 		kfree(p);
2302 	}
2303 	tracing_selftest_running = false;
2304 
2305 	return 0;
2306 }
2307 core_initcall(init_trace_selftests);
2308 #else
do_run_tracer_selftest(struct tracer * type)2309 static inline int do_run_tracer_selftest(struct tracer *type)
2310 {
2311 	return 0;
2312 }
2313 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2314 
2315 static int add_tracer(struct trace_array *tr, struct tracer *t);
2316 
2317 static void __init apply_trace_boot_options(void);
2318 
free_tracers(struct trace_array * tr)2319 static void free_tracers(struct trace_array *tr)
2320 {
2321 	struct tracers *t, *n;
2322 
2323 	lockdep_assert_held(&trace_types_lock);
2324 
2325 	list_for_each_entry_safe(t, n, &tr->tracers, list) {
2326 		list_del(&t->list);
2327 		kfree(t->flags);
2328 		kfree(t);
2329 	}
2330 }
2331 
2332 /**
2333  * register_tracer - register a tracer with the ftrace system.
2334  * @type: the plugin for the tracer
2335  *
2336  * Register a new plugin tracer.
2337  */
register_tracer(struct tracer * type)2338 int __init register_tracer(struct tracer *type)
2339 {
2340 	struct trace_array *tr;
2341 	struct tracer *t;
2342 	int ret = 0;
2343 
2344 	if (!type->name) {
2345 		pr_info("Tracer must have a name\n");
2346 		return -1;
2347 	}
2348 
2349 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2350 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2351 		return -1;
2352 	}
2353 
2354 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2355 		pr_warn("Can not register tracer %s due to lockdown\n",
2356 			   type->name);
2357 		return -EPERM;
2358 	}
2359 
2360 	mutex_lock(&trace_types_lock);
2361 
2362 	for (t = trace_types; t; t = t->next) {
2363 		if (strcmp(type->name, t->name) == 0) {
2364 			/* already found */
2365 			pr_info("Tracer %s already registered\n",
2366 				type->name);
2367 			ret = -1;
2368 			goto out;
2369 		}
2370 	}
2371 
2372 	/* store the tracer for __set_tracer_option */
2373 	if (type->flags)
2374 		type->flags->trace = type;
2375 
2376 	ret = do_run_tracer_selftest(type);
2377 	if (ret < 0)
2378 		goto out;
2379 
2380 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2381 		ret = add_tracer(tr, type);
2382 		if (ret < 0) {
2383 			/* The tracer will still exist but without options */
2384 			pr_warn("Failed to create tracer options for %s\n", type->name);
2385 			break;
2386 		}
2387 	}
2388 
2389 	type->next = trace_types;
2390 	trace_types = type;
2391 
2392  out:
2393 	mutex_unlock(&trace_types_lock);
2394 
2395 	if (ret || !default_bootup_tracer)
2396 		return ret;
2397 
2398 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2399 		return 0;
2400 
2401 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2402 	/* Do we want this tracer to start on bootup? */
2403 	WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
2404 	default_bootup_tracer = NULL;
2405 
2406 	apply_trace_boot_options();
2407 
2408 	/* disable other selftests, since this will break it. */
2409 	disable_tracing_selftest("running a tracer");
2410 
2411 	return 0;
2412 }
2413 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2414 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2415 {
2416 	struct trace_buffer *buffer = buf->buffer;
2417 
2418 	if (!buffer)
2419 		return;
2420 
2421 	ring_buffer_record_disable(buffer);
2422 
2423 	/* Make sure all commits have finished */
2424 	synchronize_rcu();
2425 	ring_buffer_reset_cpu(buffer, cpu);
2426 
2427 	ring_buffer_record_enable(buffer);
2428 }
2429 
tracing_reset_online_cpus(struct array_buffer * buf)2430 void tracing_reset_online_cpus(struct array_buffer *buf)
2431 {
2432 	struct trace_buffer *buffer = buf->buffer;
2433 
2434 	if (!buffer)
2435 		return;
2436 
2437 	ring_buffer_record_disable(buffer);
2438 
2439 	/* Make sure all commits have finished */
2440 	synchronize_rcu();
2441 
2442 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2443 
2444 	ring_buffer_reset_online_cpus(buffer);
2445 
2446 	ring_buffer_record_enable(buffer);
2447 }
2448 
tracing_reset_all_cpus(struct array_buffer * buf)2449 static void tracing_reset_all_cpus(struct array_buffer *buf)
2450 {
2451 	struct trace_buffer *buffer = buf->buffer;
2452 
2453 	if (!buffer)
2454 		return;
2455 
2456 	ring_buffer_record_disable(buffer);
2457 
2458 	/* Make sure all commits have finished */
2459 	synchronize_rcu();
2460 
2461 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2462 
2463 	ring_buffer_reset(buffer);
2464 
2465 	ring_buffer_record_enable(buffer);
2466 }
2467 
2468 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2469 void tracing_reset_all_online_cpus_unlocked(void)
2470 {
2471 	struct trace_array *tr;
2472 
2473 	lockdep_assert_held(&trace_types_lock);
2474 
2475 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2476 		if (!tr->clear_trace)
2477 			continue;
2478 		tr->clear_trace = false;
2479 		tracing_reset_online_cpus(&tr->array_buffer);
2480 #ifdef CONFIG_TRACER_MAX_TRACE
2481 		tracing_reset_online_cpus(&tr->max_buffer);
2482 #endif
2483 	}
2484 }
2485 
tracing_reset_all_online_cpus(void)2486 void tracing_reset_all_online_cpus(void)
2487 {
2488 	guard(mutex)(&trace_types_lock);
2489 	tracing_reset_all_online_cpus_unlocked();
2490 }
2491 
is_tracing_stopped(void)2492 int is_tracing_stopped(void)
2493 {
2494 	return global_trace.stop_count;
2495 }
2496 
tracing_start_tr(struct trace_array * tr)2497 static void tracing_start_tr(struct trace_array *tr)
2498 {
2499 	struct trace_buffer *buffer;
2500 
2501 	if (tracing_disabled)
2502 		return;
2503 
2504 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2505 	if (--tr->stop_count) {
2506 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2507 			/* Someone screwed up their debugging */
2508 			tr->stop_count = 0;
2509 		}
2510 		return;
2511 	}
2512 
2513 	/* Prevent the buffers from switching */
2514 	arch_spin_lock(&tr->max_lock);
2515 
2516 	buffer = tr->array_buffer.buffer;
2517 	if (buffer)
2518 		ring_buffer_record_enable(buffer);
2519 
2520 #ifdef CONFIG_TRACER_MAX_TRACE
2521 	buffer = tr->max_buffer.buffer;
2522 	if (buffer)
2523 		ring_buffer_record_enable(buffer);
2524 #endif
2525 
2526 	arch_spin_unlock(&tr->max_lock);
2527 }
2528 
2529 /**
2530  * tracing_start - quick start of the tracer
2531  *
2532  * If tracing is enabled but was stopped by tracing_stop,
2533  * this will start the tracer back up.
2534  */
tracing_start(void)2535 void tracing_start(void)
2536 
2537 {
2538 	return tracing_start_tr(&global_trace);
2539 }
2540 
tracing_stop_tr(struct trace_array * tr)2541 static void tracing_stop_tr(struct trace_array *tr)
2542 {
2543 	struct trace_buffer *buffer;
2544 
2545 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2546 	if (tr->stop_count++)
2547 		return;
2548 
2549 	/* Prevent the buffers from switching */
2550 	arch_spin_lock(&tr->max_lock);
2551 
2552 	buffer = tr->array_buffer.buffer;
2553 	if (buffer)
2554 		ring_buffer_record_disable(buffer);
2555 
2556 #ifdef CONFIG_TRACER_MAX_TRACE
2557 	buffer = tr->max_buffer.buffer;
2558 	if (buffer)
2559 		ring_buffer_record_disable(buffer);
2560 #endif
2561 
2562 	arch_spin_unlock(&tr->max_lock);
2563 }
2564 
2565 /**
2566  * tracing_stop - quick stop of the tracer
2567  *
2568  * Light weight way to stop tracing. Use in conjunction with
2569  * tracing_start.
2570  */
tracing_stop(void)2571 void tracing_stop(void)
2572 {
2573 	return tracing_stop_tr(&global_trace);
2574 }
2575 
2576 /*
2577  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2578  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2579  * simplifies those functions and keeps them in sync.
2580  */
trace_handle_return(struct trace_seq * s)2581 enum print_line_t trace_handle_return(struct trace_seq *s)
2582 {
2583 	return trace_seq_has_overflowed(s) ?
2584 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2585 }
2586 EXPORT_SYMBOL_GPL(trace_handle_return);
2587 
migration_disable_value(void)2588 static unsigned short migration_disable_value(void)
2589 {
2590 #if defined(CONFIG_SMP)
2591 	return current->migration_disabled;
2592 #else
2593 	return 0;
2594 #endif
2595 }
2596 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2597 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2598 {
2599 	unsigned int trace_flags = irqs_status;
2600 	unsigned int pc;
2601 
2602 	pc = preempt_count();
2603 
2604 	if (pc & NMI_MASK)
2605 		trace_flags |= TRACE_FLAG_NMI;
2606 	if (pc & HARDIRQ_MASK)
2607 		trace_flags |= TRACE_FLAG_HARDIRQ;
2608 	if (in_serving_softirq())
2609 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2610 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2611 		trace_flags |= TRACE_FLAG_BH_OFF;
2612 
2613 	if (tif_need_resched())
2614 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2615 	if (test_preempt_need_resched())
2616 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2617 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2618 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2619 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2620 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2621 }
2622 
2623 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2624 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2625 			  int type,
2626 			  unsigned long len,
2627 			  unsigned int trace_ctx)
2628 {
2629 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2630 }
2631 
2632 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2633 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2634 static int trace_buffered_event_ref;
2635 
2636 /**
2637  * trace_buffered_event_enable - enable buffering events
2638  *
2639  * When events are being filtered, it is quicker to use a temporary
2640  * buffer to write the event data into if there's a likely chance
2641  * that it will not be committed. The discard of the ring buffer
2642  * is not as fast as committing, and is much slower than copying
2643  * a commit.
2644  *
2645  * When an event is to be filtered, allocate per cpu buffers to
2646  * write the event data into, and if the event is filtered and discarded
2647  * it is simply dropped, otherwise, the entire data is to be committed
2648  * in one shot.
2649  */
trace_buffered_event_enable(void)2650 void trace_buffered_event_enable(void)
2651 {
2652 	struct ring_buffer_event *event;
2653 	struct page *page;
2654 	int cpu;
2655 
2656 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2657 
2658 	if (trace_buffered_event_ref++)
2659 		return;
2660 
2661 	for_each_tracing_cpu(cpu) {
2662 		page = alloc_pages_node(cpu_to_node(cpu),
2663 					GFP_KERNEL | __GFP_NORETRY, 0);
2664 		/* This is just an optimization and can handle failures */
2665 		if (!page) {
2666 			pr_err("Failed to allocate event buffer\n");
2667 			break;
2668 		}
2669 
2670 		event = page_address(page);
2671 		memset(event, 0, sizeof(*event));
2672 
2673 		per_cpu(trace_buffered_event, cpu) = event;
2674 
2675 		scoped_guard(preempt,) {
2676 			if (cpu == smp_processor_id() &&
2677 			    __this_cpu_read(trace_buffered_event) !=
2678 			    per_cpu(trace_buffered_event, cpu))
2679 				WARN_ON_ONCE(1);
2680 		}
2681 	}
2682 }
2683 
enable_trace_buffered_event(void * data)2684 static void enable_trace_buffered_event(void *data)
2685 {
2686 	this_cpu_dec(trace_buffered_event_cnt);
2687 }
2688 
disable_trace_buffered_event(void * data)2689 static void disable_trace_buffered_event(void *data)
2690 {
2691 	this_cpu_inc(trace_buffered_event_cnt);
2692 }
2693 
2694 /**
2695  * trace_buffered_event_disable - disable buffering events
2696  *
2697  * When a filter is removed, it is faster to not use the buffered
2698  * events, and to commit directly into the ring buffer. Free up
2699  * the temp buffers when there are no more users. This requires
2700  * special synchronization with current events.
2701  */
trace_buffered_event_disable(void)2702 void trace_buffered_event_disable(void)
2703 {
2704 	int cpu;
2705 
2706 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2707 
2708 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2709 		return;
2710 
2711 	if (--trace_buffered_event_ref)
2712 		return;
2713 
2714 	/* For each CPU, set the buffer as used. */
2715 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2716 			 NULL, true);
2717 
2718 	/* Wait for all current users to finish */
2719 	synchronize_rcu();
2720 
2721 	for_each_tracing_cpu(cpu) {
2722 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2723 		per_cpu(trace_buffered_event, cpu) = NULL;
2724 	}
2725 
2726 	/*
2727 	 * Wait for all CPUs that potentially started checking if they can use
2728 	 * their event buffer only after the previous synchronize_rcu() call and
2729 	 * they still read a valid pointer from trace_buffered_event. It must be
2730 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2731 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2732 	 */
2733 	synchronize_rcu();
2734 
2735 	/* For each CPU, relinquish the buffer */
2736 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2737 			 true);
2738 }
2739 
2740 static struct trace_buffer *temp_buffer;
2741 
2742 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2743 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2744 			  struct trace_event_file *trace_file,
2745 			  int type, unsigned long len,
2746 			  unsigned int trace_ctx)
2747 {
2748 	struct ring_buffer_event *entry;
2749 	struct trace_array *tr = trace_file->tr;
2750 	int val;
2751 
2752 	*current_rb = tr->array_buffer.buffer;
2753 
2754 	if (!tr->no_filter_buffering_ref &&
2755 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2756 		preempt_disable_notrace();
2757 		/*
2758 		 * Filtering is on, so try to use the per cpu buffer first.
2759 		 * This buffer will simulate a ring_buffer_event,
2760 		 * where the type_len is zero and the array[0] will
2761 		 * hold the full length.
2762 		 * (see include/linux/ring-buffer.h for details on
2763 		 *  how the ring_buffer_event is structured).
2764 		 *
2765 		 * Using a temp buffer during filtering and copying it
2766 		 * on a matched filter is quicker than writing directly
2767 		 * into the ring buffer and then discarding it when
2768 		 * it doesn't match. That is because the discard
2769 		 * requires several atomic operations to get right.
2770 		 * Copying on match and doing nothing on a failed match
2771 		 * is still quicker than no copy on match, but having
2772 		 * to discard out of the ring buffer on a failed match.
2773 		 */
2774 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2775 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2776 
2777 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2778 
2779 			/*
2780 			 * Preemption is disabled, but interrupts and NMIs
2781 			 * can still come in now. If that happens after
2782 			 * the above increment, then it will have to go
2783 			 * back to the old method of allocating the event
2784 			 * on the ring buffer, and if the filter fails, it
2785 			 * will have to call ring_buffer_discard_commit()
2786 			 * to remove it.
2787 			 *
2788 			 * Need to also check the unlikely case that the
2789 			 * length is bigger than the temp buffer size.
2790 			 * If that happens, then the reserve is pretty much
2791 			 * guaranteed to fail, as the ring buffer currently
2792 			 * only allows events less than a page. But that may
2793 			 * change in the future, so let the ring buffer reserve
2794 			 * handle the failure in that case.
2795 			 */
2796 			if (val == 1 && likely(len <= max_len)) {
2797 				trace_event_setup(entry, type, trace_ctx);
2798 				entry->array[0] = len;
2799 				/* Return with preemption disabled */
2800 				return entry;
2801 			}
2802 			this_cpu_dec(trace_buffered_event_cnt);
2803 		}
2804 		/* __trace_buffer_lock_reserve() disables preemption */
2805 		preempt_enable_notrace();
2806 	}
2807 
2808 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2809 					    trace_ctx);
2810 	/*
2811 	 * If tracing is off, but we have triggers enabled
2812 	 * we still need to look at the event data. Use the temp_buffer
2813 	 * to store the trace event for the trigger to use. It's recursive
2814 	 * safe and will not be recorded anywhere.
2815 	 */
2816 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2817 		*current_rb = temp_buffer;
2818 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2819 						    trace_ctx);
2820 	}
2821 	return entry;
2822 }
2823 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2824 
2825 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2826 static DEFINE_MUTEX(tracepoint_printk_mutex);
2827 
output_printk(struct trace_event_buffer * fbuffer)2828 static void output_printk(struct trace_event_buffer *fbuffer)
2829 {
2830 	struct trace_event_call *event_call;
2831 	struct trace_event_file *file;
2832 	struct trace_event *event;
2833 	unsigned long flags;
2834 	struct trace_iterator *iter = tracepoint_print_iter;
2835 
2836 	/* We should never get here if iter is NULL */
2837 	if (WARN_ON_ONCE(!iter))
2838 		return;
2839 
2840 	event_call = fbuffer->trace_file->event_call;
2841 	if (!event_call || !event_call->event.funcs ||
2842 	    !event_call->event.funcs->trace)
2843 		return;
2844 
2845 	file = fbuffer->trace_file;
2846 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2847 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2848 	     !filter_match_preds(file->filter, fbuffer->entry)))
2849 		return;
2850 
2851 	event = &fbuffer->trace_file->event_call->event;
2852 
2853 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2854 	trace_seq_init(&iter->seq);
2855 	iter->ent = fbuffer->entry;
2856 	event_call->event.funcs->trace(iter, 0, event);
2857 	trace_seq_putc(&iter->seq, 0);
2858 	printk("%s", iter->seq.buffer);
2859 
2860 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2861 }
2862 
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2863 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2864 			     void *buffer, size_t *lenp,
2865 			     loff_t *ppos)
2866 {
2867 	int save_tracepoint_printk;
2868 	int ret;
2869 
2870 	guard(mutex)(&tracepoint_printk_mutex);
2871 	save_tracepoint_printk = tracepoint_printk;
2872 
2873 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2874 
2875 	/*
2876 	 * This will force exiting early, as tracepoint_printk
2877 	 * is always zero when tracepoint_printk_iter is not allocated
2878 	 */
2879 	if (!tracepoint_print_iter)
2880 		tracepoint_printk = 0;
2881 
2882 	if (save_tracepoint_printk == tracepoint_printk)
2883 		return ret;
2884 
2885 	if (tracepoint_printk)
2886 		static_key_enable(&tracepoint_printk_key.key);
2887 	else
2888 		static_key_disable(&tracepoint_printk_key.key);
2889 
2890 	return ret;
2891 }
2892 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2893 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2894 {
2895 	enum event_trigger_type tt = ETT_NONE;
2896 	struct trace_event_file *file = fbuffer->trace_file;
2897 
2898 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2899 			fbuffer->entry, &tt))
2900 		goto discard;
2901 
2902 	if (static_key_false(&tracepoint_printk_key.key))
2903 		output_printk(fbuffer);
2904 
2905 	if (static_branch_unlikely(&trace_event_exports_enabled))
2906 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2907 
2908 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2909 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2910 
2911 discard:
2912 	if (tt)
2913 		event_triggers_post_call(file, tt);
2914 
2915 }
2916 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2917 
2918 /*
2919  * Skip 3:
2920  *
2921  *   trace_buffer_unlock_commit_regs()
2922  *   trace_event_buffer_commit()
2923  *   trace_event_raw_event_xxx()
2924  */
2925 # define STACK_SKIP 3
2926 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2927 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2928 				     struct trace_buffer *buffer,
2929 				     struct ring_buffer_event *event,
2930 				     unsigned int trace_ctx,
2931 				     struct pt_regs *regs)
2932 {
2933 	__buffer_unlock_commit(buffer, event);
2934 
2935 	/*
2936 	 * If regs is not set, then skip the necessary functions.
2937 	 * Note, we can still get here via blktrace, wakeup tracer
2938 	 * and mmiotrace, but that's ok if they lose a function or
2939 	 * two. They are not that meaningful.
2940 	 */
2941 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2942 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2943 }
2944 
2945 /*
2946  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2947  */
2948 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2949 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2950 				   struct ring_buffer_event *event)
2951 {
2952 	__buffer_unlock_commit(buffer, event);
2953 }
2954 
2955 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2956 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2957 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2958 {
2959 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2960 	struct ring_buffer_event *event;
2961 	struct ftrace_entry *entry;
2962 	int size = sizeof(*entry);
2963 
2964 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2965 
2966 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2967 					    trace_ctx);
2968 	if (!event)
2969 		return;
2970 	entry	= ring_buffer_event_data(event);
2971 	entry->ip			= ip;
2972 	entry->parent_ip		= parent_ip;
2973 
2974 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2975 	if (fregs) {
2976 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2977 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2978 	}
2979 #endif
2980 
2981 	if (static_branch_unlikely(&trace_function_exports_enabled))
2982 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2983 	__buffer_unlock_commit(buffer, event);
2984 }
2985 
2986 #ifdef CONFIG_STACKTRACE
2987 
2988 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2989 #define FTRACE_KSTACK_NESTING	4
2990 
2991 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2992 
2993 struct ftrace_stack {
2994 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2995 };
2996 
2997 
2998 struct ftrace_stacks {
2999 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3000 };
3001 
3002 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3003 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3004 
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3005 static void __ftrace_trace_stack(struct trace_array *tr,
3006 				 struct trace_buffer *buffer,
3007 				 unsigned int trace_ctx,
3008 				 int skip, struct pt_regs *regs)
3009 {
3010 	struct ring_buffer_event *event;
3011 	unsigned int size, nr_entries;
3012 	struct ftrace_stack *fstack;
3013 	struct stack_entry *entry;
3014 	int stackidx;
3015 	int bit;
3016 
3017 	bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START);
3018 	if (bit < 0)
3019 		return;
3020 
3021 	/*
3022 	 * Add one, for this function and the call to save_stack_trace()
3023 	 * If regs is set, then these functions will not be in the way.
3024 	 */
3025 #ifndef CONFIG_UNWINDER_ORC
3026 	if (!regs)
3027 		skip++;
3028 #endif
3029 
3030 	guard(preempt_notrace)();
3031 
3032 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3033 
3034 	/* This should never happen. If it does, yell once and skip */
3035 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3036 		goto out;
3037 
3038 	/*
3039 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3040 	 * interrupt will either see the value pre increment or post
3041 	 * increment. If the interrupt happens pre increment it will have
3042 	 * restored the counter when it returns.  We just need a barrier to
3043 	 * keep gcc from moving things around.
3044 	 */
3045 	barrier();
3046 
3047 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3048 	size = ARRAY_SIZE(fstack->calls);
3049 
3050 	if (regs) {
3051 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3052 						   size, skip);
3053 	} else {
3054 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3055 	}
3056 
3057 #ifdef CONFIG_DYNAMIC_FTRACE
3058 	/* Mark entry of stack trace as trampoline code */
3059 	if (tr->ops && tr->ops->trampoline) {
3060 		unsigned long tramp_start = tr->ops->trampoline;
3061 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3062 		unsigned long *calls = fstack->calls;
3063 
3064 		for (int i = 0; i < nr_entries; i++) {
3065 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
3066 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
3067 		}
3068 	}
3069 #endif
3070 
3071 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3072 				    struct_size(entry, caller, nr_entries),
3073 				    trace_ctx);
3074 	if (!event)
3075 		goto out;
3076 	entry = ring_buffer_event_data(event);
3077 
3078 	entry->size = nr_entries;
3079 	memcpy(&entry->caller, fstack->calls,
3080 	       flex_array_size(entry, caller, nr_entries));
3081 
3082 	__buffer_unlock_commit(buffer, event);
3083 
3084  out:
3085 	/* Again, don't let gcc optimize things here */
3086 	barrier();
3087 	__this_cpu_dec(ftrace_stack_reserve);
3088 	trace_clear_recursion(bit);
3089 }
3090 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3091 static inline void ftrace_trace_stack(struct trace_array *tr,
3092 				      struct trace_buffer *buffer,
3093 				      unsigned int trace_ctx,
3094 				      int skip, struct pt_regs *regs)
3095 {
3096 	if (!(tr->trace_flags & TRACE_ITER(STACKTRACE)))
3097 		return;
3098 
3099 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3100 }
3101 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3102 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3103 		   int skip)
3104 {
3105 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3106 
3107 	if (rcu_is_watching()) {
3108 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3109 		return;
3110 	}
3111 
3112 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3113 		return;
3114 
3115 	/*
3116 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3117 	 * but if the above rcu_is_watching() failed, then the NMI
3118 	 * triggered someplace critical, and ct_irq_enter() should
3119 	 * not be called from NMI.
3120 	 */
3121 	if (unlikely(in_nmi()))
3122 		return;
3123 
3124 	ct_irq_enter_irqson();
3125 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3126 	ct_irq_exit_irqson();
3127 }
3128 
3129 /**
3130  * trace_dump_stack - record a stack back trace in the trace buffer
3131  * @skip: Number of functions to skip (helper handlers)
3132  */
trace_dump_stack(int skip)3133 void trace_dump_stack(int skip)
3134 {
3135 	if (tracing_disabled || tracing_selftest_running)
3136 		return;
3137 
3138 #ifndef CONFIG_UNWINDER_ORC
3139 	/* Skip 1 to skip this function. */
3140 	skip++;
3141 #endif
3142 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3143 				tracing_gen_ctx(), skip, NULL);
3144 }
3145 EXPORT_SYMBOL_GPL(trace_dump_stack);
3146 
3147 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3148 static DEFINE_PER_CPU(int, user_stack_count);
3149 
3150 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3151 ftrace_trace_userstack(struct trace_array *tr,
3152 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3153 {
3154 	struct ring_buffer_event *event;
3155 	struct userstack_entry *entry;
3156 
3157 	if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
3158 		return;
3159 
3160 	/*
3161 	 * NMIs can not handle page faults, even with fix ups.
3162 	 * The save user stack can (and often does) fault.
3163 	 */
3164 	if (unlikely(in_nmi()))
3165 		return;
3166 
3167 	/*
3168 	 * prevent recursion, since the user stack tracing may
3169 	 * trigger other kernel events.
3170 	 */
3171 	guard(preempt)();
3172 	if (__this_cpu_read(user_stack_count))
3173 		return;
3174 
3175 	__this_cpu_inc(user_stack_count);
3176 
3177 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3178 					    sizeof(*entry), trace_ctx);
3179 	if (!event)
3180 		goto out_drop_count;
3181 	entry	= ring_buffer_event_data(event);
3182 
3183 	entry->tgid		= current->tgid;
3184 	memset(&entry->caller, 0, sizeof(entry->caller));
3185 
3186 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3187 	__buffer_unlock_commit(buffer, event);
3188 
3189  out_drop_count:
3190 	__this_cpu_dec(user_stack_count);
3191 }
3192 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3193 static void ftrace_trace_userstack(struct trace_array *tr,
3194 				   struct trace_buffer *buffer,
3195 				   unsigned int trace_ctx)
3196 {
3197 }
3198 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3199 
3200 #endif /* CONFIG_STACKTRACE */
3201 
3202 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3203 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3204 			  unsigned long long delta)
3205 {
3206 	entry->bottom_delta_ts = delta & U32_MAX;
3207 	entry->top_delta_ts = (delta >> 32);
3208 }
3209 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3210 void trace_last_func_repeats(struct trace_array *tr,
3211 			     struct trace_func_repeats *last_info,
3212 			     unsigned int trace_ctx)
3213 {
3214 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3215 	struct func_repeats_entry *entry;
3216 	struct ring_buffer_event *event;
3217 	u64 delta;
3218 
3219 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3220 					    sizeof(*entry), trace_ctx);
3221 	if (!event)
3222 		return;
3223 
3224 	delta = ring_buffer_event_time_stamp(buffer, event) -
3225 		last_info->ts_last_call;
3226 
3227 	entry = ring_buffer_event_data(event);
3228 	entry->ip = last_info->ip;
3229 	entry->parent_ip = last_info->parent_ip;
3230 	entry->count = last_info->count;
3231 	func_repeats_set_delta_ts(entry, delta);
3232 
3233 	__buffer_unlock_commit(buffer, event);
3234 }
3235 
3236 /* created for use with alloc_percpu */
3237 struct trace_buffer_struct {
3238 	int nesting;
3239 	char buffer[4][TRACE_BUF_SIZE];
3240 };
3241 
3242 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3243 
3244 /*
3245  * This allows for lockless recording.  If we're nested too deeply, then
3246  * this returns NULL.
3247  */
get_trace_buf(void)3248 static char *get_trace_buf(void)
3249 {
3250 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3251 
3252 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3253 		return NULL;
3254 
3255 	buffer->nesting++;
3256 
3257 	/* Interrupts must see nesting incremented before we use the buffer */
3258 	barrier();
3259 	return &buffer->buffer[buffer->nesting - 1][0];
3260 }
3261 
put_trace_buf(void)3262 static void put_trace_buf(void)
3263 {
3264 	/* Don't let the decrement of nesting leak before this */
3265 	barrier();
3266 	this_cpu_dec(trace_percpu_buffer->nesting);
3267 }
3268 
alloc_percpu_trace_buffer(void)3269 static int alloc_percpu_trace_buffer(void)
3270 {
3271 	struct trace_buffer_struct __percpu *buffers;
3272 
3273 	if (trace_percpu_buffer)
3274 		return 0;
3275 
3276 	buffers = alloc_percpu(struct trace_buffer_struct);
3277 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3278 		return -ENOMEM;
3279 
3280 	trace_percpu_buffer = buffers;
3281 	return 0;
3282 }
3283 
3284 static int buffers_allocated;
3285 
trace_printk_init_buffers(void)3286 void trace_printk_init_buffers(void)
3287 {
3288 	if (buffers_allocated)
3289 		return;
3290 
3291 	if (alloc_percpu_trace_buffer())
3292 		return;
3293 
3294 	/* trace_printk() is for debug use only. Don't use it in production. */
3295 
3296 	pr_warn("\n");
3297 	pr_warn("**********************************************************\n");
3298 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3299 	pr_warn("**                                                      **\n");
3300 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3301 	pr_warn("**                                                      **\n");
3302 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3303 	pr_warn("** unsafe for production use.                           **\n");
3304 	pr_warn("**                                                      **\n");
3305 	pr_warn("** If you see this message and you are not debugging    **\n");
3306 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3307 	pr_warn("**                                                      **\n");
3308 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3309 	pr_warn("**********************************************************\n");
3310 
3311 	/* Expand the buffers to set size */
3312 	tracing_update_buffers(&global_trace);
3313 
3314 	buffers_allocated = 1;
3315 
3316 	/*
3317 	 * trace_printk_init_buffers() can be called by modules.
3318 	 * If that happens, then we need to start cmdline recording
3319 	 * directly here. If the global_trace.buffer is already
3320 	 * allocated here, then this was called by module code.
3321 	 */
3322 	if (global_trace.array_buffer.buffer)
3323 		tracing_start_cmdline_record();
3324 }
3325 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3326 
trace_printk_start_comm(void)3327 void trace_printk_start_comm(void)
3328 {
3329 	/* Start tracing comms if trace printk is set */
3330 	if (!buffers_allocated)
3331 		return;
3332 	tracing_start_cmdline_record();
3333 }
3334 
trace_printk_start_stop_comm(int enabled)3335 static void trace_printk_start_stop_comm(int enabled)
3336 {
3337 	if (!buffers_allocated)
3338 		return;
3339 
3340 	if (enabled)
3341 		tracing_start_cmdline_record();
3342 	else
3343 		tracing_stop_cmdline_record();
3344 }
3345 
3346 /**
3347  * trace_vbprintk - write binary msg to tracing buffer
3348  * @ip:    The address of the caller
3349  * @fmt:   The string format to write to the buffer
3350  * @args:  Arguments for @fmt
3351  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3352 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3353 {
3354 	struct ring_buffer_event *event;
3355 	struct trace_buffer *buffer;
3356 	struct trace_array *tr = READ_ONCE(printk_trace);
3357 	struct bprint_entry *entry;
3358 	unsigned int trace_ctx;
3359 	char *tbuffer;
3360 	int len = 0, size;
3361 
3362 	if (!printk_binsafe(tr))
3363 		return trace_vprintk(ip, fmt, args);
3364 
3365 	if (unlikely(tracing_selftest_running || tracing_disabled))
3366 		return 0;
3367 
3368 	/* Don't pollute graph traces with trace_vprintk internals */
3369 	pause_graph_tracing();
3370 
3371 	trace_ctx = tracing_gen_ctx();
3372 	guard(preempt_notrace)();
3373 
3374 	tbuffer = get_trace_buf();
3375 	if (!tbuffer) {
3376 		len = 0;
3377 		goto out_nobuffer;
3378 	}
3379 
3380 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3381 
3382 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3383 		goto out_put;
3384 
3385 	size = sizeof(*entry) + sizeof(u32) * len;
3386 	buffer = tr->array_buffer.buffer;
3387 	scoped_guard(ring_buffer_nest, buffer) {
3388 		event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3389 						    trace_ctx);
3390 		if (!event)
3391 			goto out_put;
3392 		entry = ring_buffer_event_data(event);
3393 		entry->ip			= ip;
3394 		entry->fmt			= fmt;
3395 
3396 		memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3397 		__buffer_unlock_commit(buffer, event);
3398 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3399 	}
3400 out_put:
3401 	put_trace_buf();
3402 
3403 out_nobuffer:
3404 	unpause_graph_tracing();
3405 
3406 	return len;
3407 }
3408 EXPORT_SYMBOL_GPL(trace_vbprintk);
3409 
3410 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3411 int __trace_array_vprintk(struct trace_buffer *buffer,
3412 			  unsigned long ip, const char *fmt, va_list args)
3413 {
3414 	struct ring_buffer_event *event;
3415 	int len = 0, size;
3416 	struct print_entry *entry;
3417 	unsigned int trace_ctx;
3418 	char *tbuffer;
3419 
3420 	if (tracing_disabled)
3421 		return 0;
3422 
3423 	/* Don't pollute graph traces with trace_vprintk internals */
3424 	pause_graph_tracing();
3425 
3426 	trace_ctx = tracing_gen_ctx();
3427 	guard(preempt_notrace)();
3428 
3429 
3430 	tbuffer = get_trace_buf();
3431 	if (!tbuffer) {
3432 		len = 0;
3433 		goto out_nobuffer;
3434 	}
3435 
3436 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3437 
3438 	size = sizeof(*entry) + len + 1;
3439 	scoped_guard(ring_buffer_nest, buffer) {
3440 		event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3441 						    trace_ctx);
3442 		if (!event)
3443 			goto out;
3444 		entry = ring_buffer_event_data(event);
3445 		entry->ip = ip;
3446 
3447 		memcpy(&entry->buf, tbuffer, len + 1);
3448 		__buffer_unlock_commit(buffer, event);
3449 		ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3450 	}
3451 out:
3452 	put_trace_buf();
3453 
3454 out_nobuffer:
3455 	unpause_graph_tracing();
3456 
3457 	return len;
3458 }
3459 
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3460 int trace_array_vprintk(struct trace_array *tr,
3461 			unsigned long ip, const char *fmt, va_list args)
3462 {
3463 	if (tracing_selftest_running && tr == &global_trace)
3464 		return 0;
3465 
3466 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3467 }
3468 
3469 /**
3470  * trace_array_printk - Print a message to a specific instance
3471  * @tr: The instance trace_array descriptor
3472  * @ip: The instruction pointer that this is called from.
3473  * @fmt: The format to print (printf format)
3474  *
3475  * If a subsystem sets up its own instance, they have the right to
3476  * printk strings into their tracing instance buffer using this
3477  * function. Note, this function will not write into the top level
3478  * buffer (use trace_printk() for that), as writing into the top level
3479  * buffer should only have events that can be individually disabled.
3480  * trace_printk() is only used for debugging a kernel, and should not
3481  * be ever incorporated in normal use.
3482  *
3483  * trace_array_printk() can be used, as it will not add noise to the
3484  * top level tracing buffer.
3485  *
3486  * Note, trace_array_init_printk() must be called on @tr before this
3487  * can be used.
3488  */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3489 int trace_array_printk(struct trace_array *tr,
3490 		       unsigned long ip, const char *fmt, ...)
3491 {
3492 	int ret;
3493 	va_list ap;
3494 
3495 	if (!tr)
3496 		return -ENOENT;
3497 
3498 	/* This is only allowed for created instances */
3499 	if (tr == &global_trace)
3500 		return 0;
3501 
3502 	if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
3503 		return 0;
3504 
3505 	va_start(ap, fmt);
3506 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3507 	va_end(ap);
3508 	return ret;
3509 }
3510 EXPORT_SYMBOL_GPL(trace_array_printk);
3511 
3512 /**
3513  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3514  * @tr: The trace array to initialize the buffers for
3515  *
3516  * As trace_array_printk() only writes into instances, they are OK to
3517  * have in the kernel (unlike trace_printk()). This needs to be called
3518  * before trace_array_printk() can be used on a trace_array.
3519  */
trace_array_init_printk(struct trace_array * tr)3520 int trace_array_init_printk(struct trace_array *tr)
3521 {
3522 	if (!tr)
3523 		return -ENOENT;
3524 
3525 	/* This is only allowed for created instances */
3526 	if (tr == &global_trace)
3527 		return -EINVAL;
3528 
3529 	return alloc_percpu_trace_buffer();
3530 }
3531 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3532 
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3533 int trace_array_printk_buf(struct trace_buffer *buffer,
3534 			   unsigned long ip, const char *fmt, ...)
3535 {
3536 	int ret;
3537 	va_list ap;
3538 
3539 	if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK)))
3540 		return 0;
3541 
3542 	va_start(ap, fmt);
3543 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3544 	va_end(ap);
3545 	return ret;
3546 }
3547 
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3548 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3549 {
3550 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3551 }
3552 EXPORT_SYMBOL_GPL(trace_vprintk);
3553 
trace_iterator_increment(struct trace_iterator * iter)3554 static void trace_iterator_increment(struct trace_iterator *iter)
3555 {
3556 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3557 
3558 	iter->idx++;
3559 	if (buf_iter)
3560 		ring_buffer_iter_advance(buf_iter);
3561 }
3562 
3563 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3564 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3565 		unsigned long *lost_events)
3566 {
3567 	struct ring_buffer_event *event;
3568 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3569 
3570 	if (buf_iter) {
3571 		event = ring_buffer_iter_peek(buf_iter, ts);
3572 		if (lost_events)
3573 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3574 				(unsigned long)-1 : 0;
3575 	} else {
3576 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3577 					 lost_events);
3578 	}
3579 
3580 	if (event) {
3581 		iter->ent_size = ring_buffer_event_length(event);
3582 		return ring_buffer_event_data(event);
3583 	}
3584 	iter->ent_size = 0;
3585 	return NULL;
3586 }
3587 
3588 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3589 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3590 		  unsigned long *missing_events, u64 *ent_ts)
3591 {
3592 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3593 	struct trace_entry *ent, *next = NULL;
3594 	unsigned long lost_events = 0, next_lost = 0;
3595 	int cpu_file = iter->cpu_file;
3596 	u64 next_ts = 0, ts;
3597 	int next_cpu = -1;
3598 	int next_size = 0;
3599 	int cpu;
3600 
3601 	/*
3602 	 * If we are in a per_cpu trace file, don't bother by iterating over
3603 	 * all cpu and peek directly.
3604 	 */
3605 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3606 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3607 			return NULL;
3608 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3609 		if (ent_cpu)
3610 			*ent_cpu = cpu_file;
3611 
3612 		return ent;
3613 	}
3614 
3615 	for_each_tracing_cpu(cpu) {
3616 
3617 		if (ring_buffer_empty_cpu(buffer, cpu))
3618 			continue;
3619 
3620 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3621 
3622 		/*
3623 		 * Pick the entry with the smallest timestamp:
3624 		 */
3625 		if (ent && (!next || ts < next_ts)) {
3626 			next = ent;
3627 			next_cpu = cpu;
3628 			next_ts = ts;
3629 			next_lost = lost_events;
3630 			next_size = iter->ent_size;
3631 		}
3632 	}
3633 
3634 	iter->ent_size = next_size;
3635 
3636 	if (ent_cpu)
3637 		*ent_cpu = next_cpu;
3638 
3639 	if (ent_ts)
3640 		*ent_ts = next_ts;
3641 
3642 	if (missing_events)
3643 		*missing_events = next_lost;
3644 
3645 	return next;
3646 }
3647 
3648 #define STATIC_FMT_BUF_SIZE	128
3649 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3650 
trace_iter_expand_format(struct trace_iterator * iter)3651 char *trace_iter_expand_format(struct trace_iterator *iter)
3652 {
3653 	char *tmp;
3654 
3655 	/*
3656 	 * iter->tr is NULL when used with tp_printk, which makes
3657 	 * this get called where it is not safe to call krealloc().
3658 	 */
3659 	if (!iter->tr || iter->fmt == static_fmt_buf)
3660 		return NULL;
3661 
3662 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3663 		       GFP_KERNEL);
3664 	if (tmp) {
3665 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3666 		iter->fmt = tmp;
3667 	}
3668 
3669 	return tmp;
3670 }
3671 
3672 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3673 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3674 {
3675 	unsigned long addr = (unsigned long)str;
3676 	struct trace_event *trace_event;
3677 	struct trace_event_call *event;
3678 
3679 	/* OK if part of the event data */
3680 	if ((addr >= (unsigned long)iter->ent) &&
3681 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3682 		return true;
3683 
3684 	/* OK if part of the temp seq buffer */
3685 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3686 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3687 		return true;
3688 
3689 	/* Core rodata can not be freed */
3690 	if (is_kernel_rodata(addr))
3691 		return true;
3692 
3693 	if (trace_is_tracepoint_string(str))
3694 		return true;
3695 
3696 	/*
3697 	 * Now this could be a module event, referencing core module
3698 	 * data, which is OK.
3699 	 */
3700 	if (!iter->ent)
3701 		return false;
3702 
3703 	trace_event = ftrace_find_event(iter->ent->type);
3704 	if (!trace_event)
3705 		return false;
3706 
3707 	event = container_of(trace_event, struct trace_event_call, event);
3708 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3709 		return false;
3710 
3711 	/* Would rather have rodata, but this will suffice */
3712 	if (within_module_core(addr, event->module))
3713 		return true;
3714 
3715 	return false;
3716 }
3717 
3718 /**
3719  * ignore_event - Check dereferenced fields while writing to the seq buffer
3720  * @iter: The iterator that holds the seq buffer and the event being printed
3721  *
3722  * At boot up, test_event_printk() will flag any event that dereferences
3723  * a string with "%s" that does exist in the ring buffer. It may still
3724  * be valid, as the string may point to a static string in the kernel
3725  * rodata that never gets freed. But if the string pointer is pointing
3726  * to something that was allocated, there's a chance that it can be freed
3727  * by the time the user reads the trace. This would cause a bad memory
3728  * access by the kernel and possibly crash the system.
3729  *
3730  * This function will check if the event has any fields flagged as needing
3731  * to be checked at runtime and perform those checks.
3732  *
3733  * If it is found that a field is unsafe, it will write into the @iter->seq
3734  * a message stating what was found to be unsafe.
3735  *
3736  * @return: true if the event is unsafe and should be ignored,
3737  *          false otherwise.
3738  */
ignore_event(struct trace_iterator * iter)3739 bool ignore_event(struct trace_iterator *iter)
3740 {
3741 	struct ftrace_event_field *field;
3742 	struct trace_event *trace_event;
3743 	struct trace_event_call *event;
3744 	struct list_head *head;
3745 	struct trace_seq *seq;
3746 	const void *ptr;
3747 
3748 	trace_event = ftrace_find_event(iter->ent->type);
3749 
3750 	seq = &iter->seq;
3751 
3752 	if (!trace_event) {
3753 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3754 		return true;
3755 	}
3756 
3757 	event = container_of(trace_event, struct trace_event_call, event);
3758 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3759 		return false;
3760 
3761 	head = trace_get_fields(event);
3762 	if (!head) {
3763 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3764 				 trace_event_name(event));
3765 		return true;
3766 	}
3767 
3768 	/* Offsets are from the iter->ent that points to the raw event */
3769 	ptr = iter->ent;
3770 
3771 	list_for_each_entry(field, head, link) {
3772 		const char *str;
3773 		bool good;
3774 
3775 		if (!field->needs_test)
3776 			continue;
3777 
3778 		str = *(const char **)(ptr + field->offset);
3779 
3780 		good = trace_safe_str(iter, str);
3781 
3782 		/*
3783 		 * If you hit this warning, it is likely that the
3784 		 * trace event in question used %s on a string that
3785 		 * was saved at the time of the event, but may not be
3786 		 * around when the trace is read. Use __string(),
3787 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3788 		 * instead. See samples/trace_events/trace-events-sample.h
3789 		 * for reference.
3790 		 */
3791 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3792 			      trace_event_name(event), field->name)) {
3793 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3794 					 trace_event_name(event), field->name);
3795 			return true;
3796 		}
3797 	}
3798 	return false;
3799 }
3800 
trace_event_format(struct trace_iterator * iter,const char * fmt)3801 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3802 {
3803 	const char *p, *new_fmt;
3804 	char *q;
3805 
3806 	if (WARN_ON_ONCE(!fmt))
3807 		return fmt;
3808 
3809 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
3810 		return fmt;
3811 
3812 	p = fmt;
3813 	new_fmt = q = iter->fmt;
3814 	while (*p) {
3815 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3816 			if (!trace_iter_expand_format(iter))
3817 				return fmt;
3818 
3819 			q += iter->fmt - new_fmt;
3820 			new_fmt = iter->fmt;
3821 		}
3822 
3823 		*q++ = *p++;
3824 
3825 		/* Replace %p with %px */
3826 		if (p[-1] == '%') {
3827 			if (p[0] == '%') {
3828 				*q++ = *p++;
3829 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3830 				*q++ = *p++;
3831 				*q++ = 'x';
3832 			}
3833 		}
3834 	}
3835 	*q = '\0';
3836 
3837 	return new_fmt;
3838 }
3839 
3840 #define STATIC_TEMP_BUF_SIZE	128
3841 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3842 
3843 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3844 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3845 					  int *ent_cpu, u64 *ent_ts)
3846 {
3847 	/* __find_next_entry will reset ent_size */
3848 	int ent_size = iter->ent_size;
3849 	struct trace_entry *entry;
3850 
3851 	/*
3852 	 * If called from ftrace_dump(), then the iter->temp buffer
3853 	 * will be the static_temp_buf and not created from kmalloc.
3854 	 * If the entry size is greater than the buffer, we can
3855 	 * not save it. Just return NULL in that case. This is only
3856 	 * used to add markers when two consecutive events' time
3857 	 * stamps have a large delta. See trace_print_lat_context()
3858 	 */
3859 	if (iter->temp == static_temp_buf &&
3860 	    STATIC_TEMP_BUF_SIZE < ent_size)
3861 		return NULL;
3862 
3863 	/*
3864 	 * The __find_next_entry() may call peek_next_entry(), which may
3865 	 * call ring_buffer_peek() that may make the contents of iter->ent
3866 	 * undefined. Need to copy iter->ent now.
3867 	 */
3868 	if (iter->ent && iter->ent != iter->temp) {
3869 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3870 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3871 			void *temp;
3872 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3873 			if (!temp)
3874 				return NULL;
3875 			kfree(iter->temp);
3876 			iter->temp = temp;
3877 			iter->temp_size = iter->ent_size;
3878 		}
3879 		memcpy(iter->temp, iter->ent, iter->ent_size);
3880 		iter->ent = iter->temp;
3881 	}
3882 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3883 	/* Put back the original ent_size */
3884 	iter->ent_size = ent_size;
3885 
3886 	return entry;
3887 }
3888 
3889 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3890 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3891 {
3892 	iter->ent = __find_next_entry(iter, &iter->cpu,
3893 				      &iter->lost_events, &iter->ts);
3894 
3895 	if (iter->ent)
3896 		trace_iterator_increment(iter);
3897 
3898 	return iter->ent ? iter : NULL;
3899 }
3900 
trace_consume(struct trace_iterator * iter)3901 static void trace_consume(struct trace_iterator *iter)
3902 {
3903 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3904 			    &iter->lost_events);
3905 }
3906 
s_next(struct seq_file * m,void * v,loff_t * pos)3907 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3908 {
3909 	struct trace_iterator *iter = m->private;
3910 	int i = (int)*pos;
3911 	void *ent;
3912 
3913 	WARN_ON_ONCE(iter->leftover);
3914 
3915 	(*pos)++;
3916 
3917 	/* can't go backwards */
3918 	if (iter->idx > i)
3919 		return NULL;
3920 
3921 	if (iter->idx < 0)
3922 		ent = trace_find_next_entry_inc(iter);
3923 	else
3924 		ent = iter;
3925 
3926 	while (ent && iter->idx < i)
3927 		ent = trace_find_next_entry_inc(iter);
3928 
3929 	iter->pos = *pos;
3930 
3931 	return ent;
3932 }
3933 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3934 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3935 {
3936 	struct ring_buffer_iter *buf_iter;
3937 	unsigned long entries = 0;
3938 	u64 ts;
3939 
3940 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3941 
3942 	buf_iter = trace_buffer_iter(iter, cpu);
3943 	if (!buf_iter)
3944 		return;
3945 
3946 	ring_buffer_iter_reset(buf_iter);
3947 
3948 	/*
3949 	 * We could have the case with the max latency tracers
3950 	 * that a reset never took place on a cpu. This is evident
3951 	 * by the timestamp being before the start of the buffer.
3952 	 */
3953 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3954 		if (ts >= iter->array_buffer->time_start)
3955 			break;
3956 		entries++;
3957 		ring_buffer_iter_advance(buf_iter);
3958 		/* This could be a big loop */
3959 		cond_resched();
3960 	}
3961 
3962 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3963 }
3964 
3965 /*
3966  * The current tracer is copied to avoid a global locking
3967  * all around.
3968  */
s_start(struct seq_file * m,loff_t * pos)3969 static void *s_start(struct seq_file *m, loff_t *pos)
3970 {
3971 	struct trace_iterator *iter = m->private;
3972 	struct trace_array *tr = iter->tr;
3973 	int cpu_file = iter->cpu_file;
3974 	void *p = NULL;
3975 	loff_t l = 0;
3976 	int cpu;
3977 
3978 	mutex_lock(&trace_types_lock);
3979 	if (unlikely(tr->current_trace != iter->trace)) {
3980 		/* Close iter->trace before switching to the new current tracer */
3981 		if (iter->trace->close)
3982 			iter->trace->close(iter);
3983 		iter->trace = tr->current_trace;
3984 		/* Reopen the new current tracer */
3985 		if (iter->trace->open)
3986 			iter->trace->open(iter);
3987 	}
3988 	mutex_unlock(&trace_types_lock);
3989 
3990 #ifdef CONFIG_TRACER_MAX_TRACE
3991 	if (iter->snapshot && iter->trace->use_max_tr)
3992 		return ERR_PTR(-EBUSY);
3993 #endif
3994 
3995 	if (*pos != iter->pos) {
3996 		iter->ent = NULL;
3997 		iter->cpu = 0;
3998 		iter->idx = -1;
3999 
4000 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4001 			for_each_tracing_cpu(cpu)
4002 				tracing_iter_reset(iter, cpu);
4003 		} else
4004 			tracing_iter_reset(iter, cpu_file);
4005 
4006 		iter->leftover = 0;
4007 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4008 			;
4009 
4010 	} else {
4011 		/*
4012 		 * If we overflowed the seq_file before, then we want
4013 		 * to just reuse the trace_seq buffer again.
4014 		 */
4015 		if (iter->leftover)
4016 			p = iter;
4017 		else {
4018 			l = *pos - 1;
4019 			p = s_next(m, p, &l);
4020 		}
4021 	}
4022 
4023 	trace_event_read_lock();
4024 	trace_access_lock(cpu_file);
4025 	return p;
4026 }
4027 
s_stop(struct seq_file * m,void * p)4028 static void s_stop(struct seq_file *m, void *p)
4029 {
4030 	struct trace_iterator *iter = m->private;
4031 
4032 #ifdef CONFIG_TRACER_MAX_TRACE
4033 	if (iter->snapshot && iter->trace->use_max_tr)
4034 		return;
4035 #endif
4036 
4037 	trace_access_unlock(iter->cpu_file);
4038 	trace_event_read_unlock();
4039 }
4040 
4041 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4042 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4043 		      unsigned long *entries, int cpu)
4044 {
4045 	unsigned long count;
4046 
4047 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4048 	/*
4049 	 * If this buffer has skipped entries, then we hold all
4050 	 * entries for the trace and we need to ignore the
4051 	 * ones before the time stamp.
4052 	 */
4053 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4054 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4055 		/* total is the same as the entries */
4056 		*total = count;
4057 	} else
4058 		*total = count +
4059 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4060 	*entries = count;
4061 }
4062 
4063 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4064 get_total_entries(struct array_buffer *buf,
4065 		  unsigned long *total, unsigned long *entries)
4066 {
4067 	unsigned long t, e;
4068 	int cpu;
4069 
4070 	*total = 0;
4071 	*entries = 0;
4072 
4073 	for_each_tracing_cpu(cpu) {
4074 		get_total_entries_cpu(buf, &t, &e, cpu);
4075 		*total += t;
4076 		*entries += e;
4077 	}
4078 }
4079 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4080 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4081 {
4082 	unsigned long total, entries;
4083 
4084 	if (!tr)
4085 		tr = &global_trace;
4086 
4087 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4088 
4089 	return entries;
4090 }
4091 
trace_total_entries(struct trace_array * tr)4092 unsigned long trace_total_entries(struct trace_array *tr)
4093 {
4094 	unsigned long total, entries;
4095 
4096 	if (!tr)
4097 		tr = &global_trace;
4098 
4099 	get_total_entries(&tr->array_buffer, &total, &entries);
4100 
4101 	return entries;
4102 }
4103 
print_lat_help_header(struct seq_file * m)4104 static void print_lat_help_header(struct seq_file *m)
4105 {
4106 	seq_puts(m, "#                    _------=> CPU#            \n"
4107 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4108 		    "#                  | / _----=> need-resched    \n"
4109 		    "#                  || / _---=> hardirq/softirq \n"
4110 		    "#                  ||| / _--=> preempt-depth   \n"
4111 		    "#                  |||| / _-=> migrate-disable \n"
4112 		    "#                  ||||| /     delay           \n"
4113 		    "#  cmd     pid     |||||| time  |   caller     \n"
4114 		    "#     \\   /        ||||||  \\    |    /       \n");
4115 }
4116 
print_event_info(struct array_buffer * buf,struct seq_file * m)4117 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4118 {
4119 	unsigned long total;
4120 	unsigned long entries;
4121 
4122 	get_total_entries(buf, &total, &entries);
4123 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4124 		   entries, total, num_online_cpus());
4125 	seq_puts(m, "#\n");
4126 }
4127 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4128 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4129 				   unsigned int flags)
4130 {
4131 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
4132 
4133 	print_event_info(buf, m);
4134 
4135 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4136 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4137 }
4138 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4139 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4140 				       unsigned int flags)
4141 {
4142 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
4143 	static const char space[] = "            ";
4144 	int prec = tgid ? 12 : 2;
4145 
4146 	print_event_info(buf, m);
4147 
4148 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4149 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4150 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4151 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4152 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4153 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4154 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4155 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4156 }
4157 
4158 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4159 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4160 {
4161 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4162 	struct array_buffer *buf = iter->array_buffer;
4163 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4164 	struct tracer *type = iter->trace;
4165 	unsigned long entries;
4166 	unsigned long total;
4167 	const char *name = type->name;
4168 
4169 	get_total_entries(buf, &total, &entries);
4170 
4171 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4172 		   name, init_utsname()->release);
4173 	seq_puts(m, "# -----------------------------------"
4174 		 "---------------------------------\n");
4175 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4176 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4177 		   nsecs_to_usecs(data->saved_latency),
4178 		   entries,
4179 		   total,
4180 		   buf->cpu,
4181 		   preempt_model_str(),
4182 		   /* These are reserved for later use */
4183 		   0, 0, 0, 0);
4184 #ifdef CONFIG_SMP
4185 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4186 #else
4187 	seq_puts(m, ")\n");
4188 #endif
4189 	seq_puts(m, "#    -----------------\n");
4190 	seq_printf(m, "#    | task: %.16s-%d "
4191 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4192 		   data->comm, data->pid,
4193 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4194 		   data->policy, data->rt_priority);
4195 	seq_puts(m, "#    -----------------\n");
4196 
4197 	if (data->critical_start) {
4198 		seq_puts(m, "#  => started at: ");
4199 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4200 		trace_print_seq(m, &iter->seq);
4201 		seq_puts(m, "\n#  => ended at:   ");
4202 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4203 		trace_print_seq(m, &iter->seq);
4204 		seq_puts(m, "\n#\n");
4205 	}
4206 
4207 	seq_puts(m, "#\n");
4208 }
4209 
test_cpu_buff_start(struct trace_iterator * iter)4210 static void test_cpu_buff_start(struct trace_iterator *iter)
4211 {
4212 	struct trace_seq *s = &iter->seq;
4213 	struct trace_array *tr = iter->tr;
4214 
4215 	if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
4216 		return;
4217 
4218 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4219 		return;
4220 
4221 	if (cpumask_available(iter->started) &&
4222 	    cpumask_test_cpu(iter->cpu, iter->started))
4223 		return;
4224 
4225 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4226 		return;
4227 
4228 	if (cpumask_available(iter->started))
4229 		cpumask_set_cpu(iter->cpu, iter->started);
4230 
4231 	/* Don't print started cpu buffer for the first entry of the trace */
4232 	if (iter->idx > 1)
4233 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4234 				iter->cpu);
4235 }
4236 
4237 #ifdef CONFIG_FTRACE_SYSCALLS
is_syscall_event(struct trace_event * event)4238 static bool is_syscall_event(struct trace_event *event)
4239 {
4240 	return (event->funcs == &enter_syscall_print_funcs) ||
4241 	       (event->funcs == &exit_syscall_print_funcs);
4242 
4243 }
4244 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
4245 #else
is_syscall_event(struct trace_event * event)4246 static inline bool is_syscall_event(struct trace_event *event)
4247 {
4248 	return false;
4249 }
4250 #define syscall_buf_size 0
4251 #endif /* CONFIG_FTRACE_SYSCALLS */
4252 
print_trace_fmt(struct trace_iterator * iter)4253 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4254 {
4255 	struct trace_array *tr = iter->tr;
4256 	struct trace_seq *s = &iter->seq;
4257 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4258 	struct trace_entry *entry;
4259 	struct trace_event *event;
4260 
4261 	entry = iter->ent;
4262 
4263 	test_cpu_buff_start(iter);
4264 
4265 	event = ftrace_find_event(entry->type);
4266 
4267 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4268 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4269 			trace_print_lat_context(iter);
4270 		else
4271 			trace_print_context(iter);
4272 	}
4273 
4274 	if (trace_seq_has_overflowed(s))
4275 		return TRACE_TYPE_PARTIAL_LINE;
4276 
4277 	if (event) {
4278 		if (tr->trace_flags & TRACE_ITER(FIELDS))
4279 			return print_event_fields(iter, event);
4280 		/*
4281 		 * For TRACE_EVENT() events, the print_fmt is not
4282 		 * safe to use if the array has delta offsets
4283 		 * Force printing via the fields.
4284 		 */
4285 		if ((tr->text_delta)) {
4286 			/* ftrace and system call events are still OK */
4287 			if ((event->type > __TRACE_LAST_TYPE) &&
4288 			    !is_syscall_event(event))
4289 			return print_event_fields(iter, event);
4290 		}
4291 		return event->funcs->trace(iter, sym_flags, event);
4292 	}
4293 
4294 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4295 
4296 	return trace_handle_return(s);
4297 }
4298 
print_raw_fmt(struct trace_iterator * iter)4299 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4300 {
4301 	struct trace_array *tr = iter->tr;
4302 	struct trace_seq *s = &iter->seq;
4303 	struct trace_entry *entry;
4304 	struct trace_event *event;
4305 
4306 	entry = iter->ent;
4307 
4308 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
4309 		trace_seq_printf(s, "%d %d %llu ",
4310 				 entry->pid, iter->cpu, iter->ts);
4311 
4312 	if (trace_seq_has_overflowed(s))
4313 		return TRACE_TYPE_PARTIAL_LINE;
4314 
4315 	event = ftrace_find_event(entry->type);
4316 	if (event)
4317 		return event->funcs->raw(iter, 0, event);
4318 
4319 	trace_seq_printf(s, "%d ?\n", entry->type);
4320 
4321 	return trace_handle_return(s);
4322 }
4323 
print_hex_fmt(struct trace_iterator * iter)4324 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4325 {
4326 	struct trace_array *tr = iter->tr;
4327 	struct trace_seq *s = &iter->seq;
4328 	unsigned char newline = '\n';
4329 	struct trace_entry *entry;
4330 	struct trace_event *event;
4331 
4332 	entry = iter->ent;
4333 
4334 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4335 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4336 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4337 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4338 		if (trace_seq_has_overflowed(s))
4339 			return TRACE_TYPE_PARTIAL_LINE;
4340 	}
4341 
4342 	event = ftrace_find_event(entry->type);
4343 	if (event) {
4344 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4345 		if (ret != TRACE_TYPE_HANDLED)
4346 			return ret;
4347 	}
4348 
4349 	SEQ_PUT_FIELD(s, newline);
4350 
4351 	return trace_handle_return(s);
4352 }
4353 
print_bin_fmt(struct trace_iterator * iter)4354 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4355 {
4356 	struct trace_array *tr = iter->tr;
4357 	struct trace_seq *s = &iter->seq;
4358 	struct trace_entry *entry;
4359 	struct trace_event *event;
4360 
4361 	entry = iter->ent;
4362 
4363 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4364 		SEQ_PUT_FIELD(s, entry->pid);
4365 		SEQ_PUT_FIELD(s, iter->cpu);
4366 		SEQ_PUT_FIELD(s, iter->ts);
4367 		if (trace_seq_has_overflowed(s))
4368 			return TRACE_TYPE_PARTIAL_LINE;
4369 	}
4370 
4371 	event = ftrace_find_event(entry->type);
4372 	return event ? event->funcs->binary(iter, 0, event) :
4373 		TRACE_TYPE_HANDLED;
4374 }
4375 
trace_empty(struct trace_iterator * iter)4376 int trace_empty(struct trace_iterator *iter)
4377 {
4378 	struct ring_buffer_iter *buf_iter;
4379 	int cpu;
4380 
4381 	/* If we are looking at one CPU buffer, only check that one */
4382 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4383 		cpu = iter->cpu_file;
4384 		buf_iter = trace_buffer_iter(iter, cpu);
4385 		if (buf_iter) {
4386 			if (!ring_buffer_iter_empty(buf_iter))
4387 				return 0;
4388 		} else {
4389 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4390 				return 0;
4391 		}
4392 		return 1;
4393 	}
4394 
4395 	for_each_tracing_cpu(cpu) {
4396 		buf_iter = trace_buffer_iter(iter, cpu);
4397 		if (buf_iter) {
4398 			if (!ring_buffer_iter_empty(buf_iter))
4399 				return 0;
4400 		} else {
4401 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4402 				return 0;
4403 		}
4404 	}
4405 
4406 	return 1;
4407 }
4408 
4409 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4410 enum print_line_t print_trace_line(struct trace_iterator *iter)
4411 {
4412 	struct trace_array *tr = iter->tr;
4413 	unsigned long trace_flags = tr->trace_flags;
4414 	enum print_line_t ret;
4415 
4416 	if (iter->lost_events) {
4417 		if (iter->lost_events == (unsigned long)-1)
4418 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4419 					 iter->cpu);
4420 		else
4421 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4422 					 iter->cpu, iter->lost_events);
4423 		if (trace_seq_has_overflowed(&iter->seq))
4424 			return TRACE_TYPE_PARTIAL_LINE;
4425 	}
4426 
4427 	if (iter->trace && iter->trace->print_line) {
4428 		ret = iter->trace->print_line(iter);
4429 		if (ret != TRACE_TYPE_UNHANDLED)
4430 			return ret;
4431 	}
4432 
4433 	if (iter->ent->type == TRACE_BPUTS &&
4434 			trace_flags & TRACE_ITER(PRINTK) &&
4435 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4436 		return trace_print_bputs_msg_only(iter);
4437 
4438 	if (iter->ent->type == TRACE_BPRINT &&
4439 			trace_flags & TRACE_ITER(PRINTK) &&
4440 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4441 		return trace_print_bprintk_msg_only(iter);
4442 
4443 	if (iter->ent->type == TRACE_PRINT &&
4444 			trace_flags & TRACE_ITER(PRINTK) &&
4445 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4446 		return trace_print_printk_msg_only(iter);
4447 
4448 	if (trace_flags & TRACE_ITER(BIN))
4449 		return print_bin_fmt(iter);
4450 
4451 	if (trace_flags & TRACE_ITER(HEX))
4452 		return print_hex_fmt(iter);
4453 
4454 	if (trace_flags & TRACE_ITER(RAW))
4455 		return print_raw_fmt(iter);
4456 
4457 	return print_trace_fmt(iter);
4458 }
4459 
trace_latency_header(struct seq_file * m)4460 void trace_latency_header(struct seq_file *m)
4461 {
4462 	struct trace_iterator *iter = m->private;
4463 	struct trace_array *tr = iter->tr;
4464 
4465 	/* print nothing if the buffers are empty */
4466 	if (trace_empty(iter))
4467 		return;
4468 
4469 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4470 		print_trace_header(m, iter);
4471 
4472 	if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
4473 		print_lat_help_header(m);
4474 }
4475 
trace_default_header(struct seq_file * m)4476 void trace_default_header(struct seq_file *m)
4477 {
4478 	struct trace_iterator *iter = m->private;
4479 	struct trace_array *tr = iter->tr;
4480 	unsigned long trace_flags = tr->trace_flags;
4481 
4482 	if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
4483 		return;
4484 
4485 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4486 		/* print nothing if the buffers are empty */
4487 		if (trace_empty(iter))
4488 			return;
4489 		print_trace_header(m, iter);
4490 		if (!(trace_flags & TRACE_ITER(VERBOSE)))
4491 			print_lat_help_header(m);
4492 	} else {
4493 		if (!(trace_flags & TRACE_ITER(VERBOSE))) {
4494 			if (trace_flags & TRACE_ITER(IRQ_INFO))
4495 				print_func_help_header_irq(iter->array_buffer,
4496 							   m, trace_flags);
4497 			else
4498 				print_func_help_header(iter->array_buffer, m,
4499 						       trace_flags);
4500 		}
4501 	}
4502 }
4503 
test_ftrace_alive(struct seq_file * m)4504 static void test_ftrace_alive(struct seq_file *m)
4505 {
4506 	if (!ftrace_is_dead())
4507 		return;
4508 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4509 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4510 }
4511 
4512 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4513 static void show_snapshot_main_help(struct seq_file *m)
4514 {
4515 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4516 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4517 		    "#                      Takes a snapshot of the main buffer.\n"
4518 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4519 		    "#                      (Doesn't have to be '2' works with any number that\n"
4520 		    "#                       is not a '0' or '1')\n");
4521 }
4522 
show_snapshot_percpu_help(struct seq_file * m)4523 static void show_snapshot_percpu_help(struct seq_file *m)
4524 {
4525 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4526 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4527 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4528 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4529 #else
4530 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4531 		    "#                     Must use main snapshot file to allocate.\n");
4532 #endif
4533 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4534 		    "#                      (Doesn't have to be '2' works with any number that\n"
4535 		    "#                       is not a '0' or '1')\n");
4536 }
4537 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4538 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4539 {
4540 	if (iter->tr->allocated_snapshot)
4541 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4542 	else
4543 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4544 
4545 	seq_puts(m, "# Snapshot commands:\n");
4546 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4547 		show_snapshot_main_help(m);
4548 	else
4549 		show_snapshot_percpu_help(m);
4550 }
4551 #else
4552 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4553 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4554 #endif
4555 
s_show(struct seq_file * m,void * v)4556 static int s_show(struct seq_file *m, void *v)
4557 {
4558 	struct trace_iterator *iter = v;
4559 	int ret;
4560 
4561 	if (iter->ent == NULL) {
4562 		if (iter->tr) {
4563 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4564 			seq_puts(m, "#\n");
4565 			test_ftrace_alive(m);
4566 		}
4567 		if (iter->snapshot && trace_empty(iter))
4568 			print_snapshot_help(m, iter);
4569 		else if (iter->trace && iter->trace->print_header)
4570 			iter->trace->print_header(m);
4571 		else
4572 			trace_default_header(m);
4573 
4574 	} else if (iter->leftover) {
4575 		/*
4576 		 * If we filled the seq_file buffer earlier, we
4577 		 * want to just show it now.
4578 		 */
4579 		ret = trace_print_seq(m, &iter->seq);
4580 
4581 		/* ret should this time be zero, but you never know */
4582 		iter->leftover = ret;
4583 
4584 	} else {
4585 		ret = print_trace_line(iter);
4586 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4587 			iter->seq.full = 0;
4588 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4589 		}
4590 		ret = trace_print_seq(m, &iter->seq);
4591 		/*
4592 		 * If we overflow the seq_file buffer, then it will
4593 		 * ask us for this data again at start up.
4594 		 * Use that instead.
4595 		 *  ret is 0 if seq_file write succeeded.
4596 		 *        -1 otherwise.
4597 		 */
4598 		iter->leftover = ret;
4599 	}
4600 
4601 	return 0;
4602 }
4603 
4604 /*
4605  * Should be used after trace_array_get(), trace_types_lock
4606  * ensures that i_cdev was already initialized.
4607  */
tracing_get_cpu(struct inode * inode)4608 static inline int tracing_get_cpu(struct inode *inode)
4609 {
4610 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4611 		return (long)inode->i_cdev - 1;
4612 	return RING_BUFFER_ALL_CPUS;
4613 }
4614 
4615 static const struct seq_operations tracer_seq_ops = {
4616 	.start		= s_start,
4617 	.next		= s_next,
4618 	.stop		= s_stop,
4619 	.show		= s_show,
4620 };
4621 
4622 /*
4623  * Note, as iter itself can be allocated and freed in different
4624  * ways, this function is only used to free its content, and not
4625  * the iterator itself. The only requirement to all the allocations
4626  * is that it must zero all fields (kzalloc), as freeing works with
4627  * ethier allocated content or NULL.
4628  */
free_trace_iter_content(struct trace_iterator * iter)4629 static void free_trace_iter_content(struct trace_iterator *iter)
4630 {
4631 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4632 	if (iter->fmt != static_fmt_buf)
4633 		kfree(iter->fmt);
4634 
4635 	kfree(iter->temp);
4636 	kfree(iter->buffer_iter);
4637 	mutex_destroy(&iter->mutex);
4638 	free_cpumask_var(iter->started);
4639 }
4640 
4641 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4642 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4643 {
4644 	struct trace_array *tr = inode->i_private;
4645 	struct trace_iterator *iter;
4646 	int cpu;
4647 
4648 	if (tracing_disabled)
4649 		return ERR_PTR(-ENODEV);
4650 
4651 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4652 	if (!iter)
4653 		return ERR_PTR(-ENOMEM);
4654 
4655 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4656 				    GFP_KERNEL);
4657 	if (!iter->buffer_iter)
4658 		goto release;
4659 
4660 	/*
4661 	 * trace_find_next_entry() may need to save off iter->ent.
4662 	 * It will place it into the iter->temp buffer. As most
4663 	 * events are less than 128, allocate a buffer of that size.
4664 	 * If one is greater, then trace_find_next_entry() will
4665 	 * allocate a new buffer to adjust for the bigger iter->ent.
4666 	 * It's not critical if it fails to get allocated here.
4667 	 */
4668 	iter->temp = kmalloc(128, GFP_KERNEL);
4669 	if (iter->temp)
4670 		iter->temp_size = 128;
4671 
4672 	/*
4673 	 * trace_event_printf() may need to modify given format
4674 	 * string to replace %p with %px so that it shows real address
4675 	 * instead of hash value. However, that is only for the event
4676 	 * tracing, other tracer may not need. Defer the allocation
4677 	 * until it is needed.
4678 	 */
4679 	iter->fmt = NULL;
4680 	iter->fmt_size = 0;
4681 
4682 	mutex_lock(&trace_types_lock);
4683 	iter->trace = tr->current_trace;
4684 
4685 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4686 		goto fail;
4687 
4688 	iter->tr = tr;
4689 
4690 #ifdef CONFIG_TRACER_MAX_TRACE
4691 	/* Currently only the top directory has a snapshot */
4692 	if (tr->current_trace->print_max || snapshot)
4693 		iter->array_buffer = &tr->max_buffer;
4694 	else
4695 #endif
4696 		iter->array_buffer = &tr->array_buffer;
4697 	iter->snapshot = snapshot;
4698 	iter->pos = -1;
4699 	iter->cpu_file = tracing_get_cpu(inode);
4700 	mutex_init(&iter->mutex);
4701 
4702 	/* Notify the tracer early; before we stop tracing. */
4703 	if (iter->trace->open)
4704 		iter->trace->open(iter);
4705 
4706 	/* Annotate start of buffers if we had overruns */
4707 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4708 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4709 
4710 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4711 	if (trace_clocks[tr->clock_id].in_ns)
4712 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4713 
4714 	/*
4715 	 * If pause-on-trace is enabled, then stop the trace while
4716 	 * dumping, unless this is the "snapshot" file
4717 	 */
4718 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
4719 		iter->iter_flags |= TRACE_FILE_PAUSE;
4720 		tracing_stop_tr(tr);
4721 	}
4722 
4723 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4724 		for_each_tracing_cpu(cpu) {
4725 			iter->buffer_iter[cpu] =
4726 				ring_buffer_read_start(iter->array_buffer->buffer,
4727 						       cpu, GFP_KERNEL);
4728 			tracing_iter_reset(iter, cpu);
4729 		}
4730 	} else {
4731 		cpu = iter->cpu_file;
4732 		iter->buffer_iter[cpu] =
4733 			ring_buffer_read_start(iter->array_buffer->buffer,
4734 					       cpu, GFP_KERNEL);
4735 		tracing_iter_reset(iter, cpu);
4736 	}
4737 
4738 	mutex_unlock(&trace_types_lock);
4739 
4740 	return iter;
4741 
4742  fail:
4743 	mutex_unlock(&trace_types_lock);
4744 	free_trace_iter_content(iter);
4745 release:
4746 	seq_release_private(inode, file);
4747 	return ERR_PTR(-ENOMEM);
4748 }
4749 
tracing_open_generic(struct inode * inode,struct file * filp)4750 int tracing_open_generic(struct inode *inode, struct file *filp)
4751 {
4752 	int ret;
4753 
4754 	ret = tracing_check_open_get_tr(NULL);
4755 	if (ret)
4756 		return ret;
4757 
4758 	filp->private_data = inode->i_private;
4759 	return 0;
4760 }
4761 
tracing_is_disabled(void)4762 bool tracing_is_disabled(void)
4763 {
4764 	return (tracing_disabled) ? true: false;
4765 }
4766 
4767 /*
4768  * Open and update trace_array ref count.
4769  * Must have the current trace_array passed to it.
4770  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4771 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4772 {
4773 	struct trace_array *tr = inode->i_private;
4774 	int ret;
4775 
4776 	ret = tracing_check_open_get_tr(tr);
4777 	if (ret)
4778 		return ret;
4779 
4780 	filp->private_data = inode->i_private;
4781 
4782 	return 0;
4783 }
4784 
4785 /*
4786  * The private pointer of the inode is the trace_event_file.
4787  * Update the tr ref count associated to it.
4788  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4789 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4790 {
4791 	struct trace_event_file *file = inode->i_private;
4792 	int ret;
4793 
4794 	ret = tracing_check_open_get_tr(file->tr);
4795 	if (ret)
4796 		return ret;
4797 
4798 	guard(mutex)(&event_mutex);
4799 
4800 	/* Fail if the file is marked for removal */
4801 	if (file->flags & EVENT_FILE_FL_FREED) {
4802 		trace_array_put(file->tr);
4803 		return -ENODEV;
4804 	} else {
4805 		event_file_get(file);
4806 	}
4807 
4808 	filp->private_data = inode->i_private;
4809 
4810 	return 0;
4811 }
4812 
tracing_release_file_tr(struct inode * inode,struct file * filp)4813 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4814 {
4815 	struct trace_event_file *file = inode->i_private;
4816 
4817 	trace_array_put(file->tr);
4818 	event_file_put(file);
4819 
4820 	return 0;
4821 }
4822 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4823 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4824 {
4825 	tracing_release_file_tr(inode, filp);
4826 	return single_release(inode, filp);
4827 }
4828 
tracing_release(struct inode * inode,struct file * file)4829 static int tracing_release(struct inode *inode, struct file *file)
4830 {
4831 	struct trace_array *tr = inode->i_private;
4832 	struct seq_file *m = file->private_data;
4833 	struct trace_iterator *iter;
4834 	int cpu;
4835 
4836 	if (!(file->f_mode & FMODE_READ)) {
4837 		trace_array_put(tr);
4838 		return 0;
4839 	}
4840 
4841 	/* Writes do not use seq_file */
4842 	iter = m->private;
4843 	mutex_lock(&trace_types_lock);
4844 
4845 	for_each_tracing_cpu(cpu) {
4846 		if (iter->buffer_iter[cpu])
4847 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4848 	}
4849 
4850 	if (iter->trace && iter->trace->close)
4851 		iter->trace->close(iter);
4852 
4853 	if (iter->iter_flags & TRACE_FILE_PAUSE)
4854 		/* reenable tracing if it was previously enabled */
4855 		tracing_start_tr(tr);
4856 
4857 	__trace_array_put(tr);
4858 
4859 	mutex_unlock(&trace_types_lock);
4860 
4861 	free_trace_iter_content(iter);
4862 	seq_release_private(inode, file);
4863 
4864 	return 0;
4865 }
4866 
tracing_release_generic_tr(struct inode * inode,struct file * file)4867 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4868 {
4869 	struct trace_array *tr = inode->i_private;
4870 
4871 	trace_array_put(tr);
4872 	return 0;
4873 }
4874 
tracing_single_release_tr(struct inode * inode,struct file * file)4875 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4876 {
4877 	struct trace_array *tr = inode->i_private;
4878 
4879 	trace_array_put(tr);
4880 
4881 	return single_release(inode, file);
4882 }
4883 
tracing_open(struct inode * inode,struct file * file)4884 static int tracing_open(struct inode *inode, struct file *file)
4885 {
4886 	struct trace_array *tr = inode->i_private;
4887 	struct trace_iterator *iter;
4888 	int ret;
4889 
4890 	ret = tracing_check_open_get_tr(tr);
4891 	if (ret)
4892 		return ret;
4893 
4894 	/* If this file was open for write, then erase contents */
4895 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4896 		int cpu = tracing_get_cpu(inode);
4897 		struct array_buffer *trace_buf = &tr->array_buffer;
4898 
4899 #ifdef CONFIG_TRACER_MAX_TRACE
4900 		if (tr->current_trace->print_max)
4901 			trace_buf = &tr->max_buffer;
4902 #endif
4903 
4904 		if (cpu == RING_BUFFER_ALL_CPUS)
4905 			tracing_reset_online_cpus(trace_buf);
4906 		else
4907 			tracing_reset_cpu(trace_buf, cpu);
4908 	}
4909 
4910 	if (file->f_mode & FMODE_READ) {
4911 		iter = __tracing_open(inode, file, false);
4912 		if (IS_ERR(iter))
4913 			ret = PTR_ERR(iter);
4914 		else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
4915 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4916 	}
4917 
4918 	if (ret < 0)
4919 		trace_array_put(tr);
4920 
4921 	return ret;
4922 }
4923 
4924 /*
4925  * Some tracers are not suitable for instance buffers.
4926  * A tracer is always available for the global array (toplevel)
4927  * or if it explicitly states that it is.
4928  */
4929 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4930 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4931 {
4932 #ifdef CONFIG_TRACER_SNAPSHOT
4933 	/* arrays with mapped buffer range do not have snapshots */
4934 	if (tr->range_addr_start && t->use_max_tr)
4935 		return false;
4936 #endif
4937 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4938 }
4939 
4940 /* Find the next tracer that this trace array may use */
4941 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4942 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4943 {
4944 	while (t && !trace_ok_for_array(t, tr))
4945 		t = t->next;
4946 
4947 	return t;
4948 }
4949 
4950 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4951 t_next(struct seq_file *m, void *v, loff_t *pos)
4952 {
4953 	struct trace_array *tr = m->private;
4954 	struct tracer *t = v;
4955 
4956 	(*pos)++;
4957 
4958 	if (t)
4959 		t = get_tracer_for_array(tr, t->next);
4960 
4961 	return t;
4962 }
4963 
t_start(struct seq_file * m,loff_t * pos)4964 static void *t_start(struct seq_file *m, loff_t *pos)
4965 {
4966 	struct trace_array *tr = m->private;
4967 	struct tracer *t;
4968 	loff_t l = 0;
4969 
4970 	mutex_lock(&trace_types_lock);
4971 
4972 	t = get_tracer_for_array(tr, trace_types);
4973 	for (; t && l < *pos; t = t_next(m, t, &l))
4974 			;
4975 
4976 	return t;
4977 }
4978 
t_stop(struct seq_file * m,void * p)4979 static void t_stop(struct seq_file *m, void *p)
4980 {
4981 	mutex_unlock(&trace_types_lock);
4982 }
4983 
t_show(struct seq_file * m,void * v)4984 static int t_show(struct seq_file *m, void *v)
4985 {
4986 	struct tracer *t = v;
4987 
4988 	if (!t)
4989 		return 0;
4990 
4991 	seq_puts(m, t->name);
4992 	if (t->next)
4993 		seq_putc(m, ' ');
4994 	else
4995 		seq_putc(m, '\n');
4996 
4997 	return 0;
4998 }
4999 
5000 static const struct seq_operations show_traces_seq_ops = {
5001 	.start		= t_start,
5002 	.next		= t_next,
5003 	.stop		= t_stop,
5004 	.show		= t_show,
5005 };
5006 
show_traces_open(struct inode * inode,struct file * file)5007 static int show_traces_open(struct inode *inode, struct file *file)
5008 {
5009 	struct trace_array *tr = inode->i_private;
5010 	struct seq_file *m;
5011 	int ret;
5012 
5013 	ret = tracing_check_open_get_tr(tr);
5014 	if (ret)
5015 		return ret;
5016 
5017 	ret = seq_open(file, &show_traces_seq_ops);
5018 	if (ret) {
5019 		trace_array_put(tr);
5020 		return ret;
5021 	}
5022 
5023 	m = file->private_data;
5024 	m->private = tr;
5025 
5026 	return 0;
5027 }
5028 
tracing_seq_release(struct inode * inode,struct file * file)5029 static int tracing_seq_release(struct inode *inode, struct file *file)
5030 {
5031 	struct trace_array *tr = inode->i_private;
5032 
5033 	trace_array_put(tr);
5034 	return seq_release(inode, file);
5035 }
5036 
5037 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5038 tracing_write_stub(struct file *filp, const char __user *ubuf,
5039 		   size_t count, loff_t *ppos)
5040 {
5041 	return count;
5042 }
5043 
tracing_lseek(struct file * file,loff_t offset,int whence)5044 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5045 {
5046 	int ret;
5047 
5048 	if (file->f_mode & FMODE_READ)
5049 		ret = seq_lseek(file, offset, whence);
5050 	else
5051 		file->f_pos = ret = 0;
5052 
5053 	return ret;
5054 }
5055 
5056 static const struct file_operations tracing_fops = {
5057 	.open		= tracing_open,
5058 	.read		= seq_read,
5059 	.read_iter	= seq_read_iter,
5060 	.splice_read	= copy_splice_read,
5061 	.write		= tracing_write_stub,
5062 	.llseek		= tracing_lseek,
5063 	.release	= tracing_release,
5064 };
5065 
5066 static const struct file_operations show_traces_fops = {
5067 	.open		= show_traces_open,
5068 	.read		= seq_read,
5069 	.llseek		= seq_lseek,
5070 	.release	= tracing_seq_release,
5071 };
5072 
5073 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5074 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5075 		     size_t count, loff_t *ppos)
5076 {
5077 	struct trace_array *tr = file_inode(filp)->i_private;
5078 	char *mask_str __free(kfree) = NULL;
5079 	int len;
5080 
5081 	len = snprintf(NULL, 0, "%*pb\n",
5082 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5083 	mask_str = kmalloc(len, GFP_KERNEL);
5084 	if (!mask_str)
5085 		return -ENOMEM;
5086 
5087 	len = snprintf(mask_str, len, "%*pb\n",
5088 		       cpumask_pr_args(tr->tracing_cpumask));
5089 	if (len >= count)
5090 		return -EINVAL;
5091 
5092 	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5093 }
5094 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5095 int tracing_set_cpumask(struct trace_array *tr,
5096 			cpumask_var_t tracing_cpumask_new)
5097 {
5098 	int cpu;
5099 
5100 	if (!tr)
5101 		return -EINVAL;
5102 
5103 	local_irq_disable();
5104 	arch_spin_lock(&tr->max_lock);
5105 	for_each_tracing_cpu(cpu) {
5106 		/*
5107 		 * Increase/decrease the disabled counter if we are
5108 		 * about to flip a bit in the cpumask:
5109 		 */
5110 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5111 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5112 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5113 #ifdef CONFIG_TRACER_MAX_TRACE
5114 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5115 #endif
5116 		}
5117 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5118 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5119 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5120 #ifdef CONFIG_TRACER_MAX_TRACE
5121 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5122 #endif
5123 		}
5124 	}
5125 	arch_spin_unlock(&tr->max_lock);
5126 	local_irq_enable();
5127 
5128 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5129 
5130 	return 0;
5131 }
5132 
5133 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5134 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5135 		      size_t count, loff_t *ppos)
5136 {
5137 	struct trace_array *tr = file_inode(filp)->i_private;
5138 	cpumask_var_t tracing_cpumask_new;
5139 	int err;
5140 
5141 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5142 		return -EINVAL;
5143 
5144 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5145 		return -ENOMEM;
5146 
5147 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5148 	if (err)
5149 		goto err_free;
5150 
5151 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5152 	if (err)
5153 		goto err_free;
5154 
5155 	free_cpumask_var(tracing_cpumask_new);
5156 
5157 	return count;
5158 
5159 err_free:
5160 	free_cpumask_var(tracing_cpumask_new);
5161 
5162 	return err;
5163 }
5164 
5165 static const struct file_operations tracing_cpumask_fops = {
5166 	.open		= tracing_open_generic_tr,
5167 	.read		= tracing_cpumask_read,
5168 	.write		= tracing_cpumask_write,
5169 	.release	= tracing_release_generic_tr,
5170 	.llseek		= generic_file_llseek,
5171 };
5172 
tracing_trace_options_show(struct seq_file * m,void * v)5173 static int tracing_trace_options_show(struct seq_file *m, void *v)
5174 {
5175 	struct tracer_opt *trace_opts;
5176 	struct trace_array *tr = m->private;
5177 	struct tracer_flags *flags;
5178 	u32 tracer_flags;
5179 	int i;
5180 
5181 	guard(mutex)(&trace_types_lock);
5182 
5183 	for (i = 0; trace_options[i]; i++) {
5184 		if (tr->trace_flags & (1ULL << i))
5185 			seq_printf(m, "%s\n", trace_options[i]);
5186 		else
5187 			seq_printf(m, "no%s\n", trace_options[i]);
5188 	}
5189 
5190 	flags = tr->current_trace_flags;
5191 	if (!flags || !flags->opts)
5192 		return 0;
5193 
5194 	tracer_flags = flags->val;
5195 	trace_opts = flags->opts;
5196 
5197 	for (i = 0; trace_opts[i].name; i++) {
5198 		if (tracer_flags & trace_opts[i].bit)
5199 			seq_printf(m, "%s\n", trace_opts[i].name);
5200 		else
5201 			seq_printf(m, "no%s\n", trace_opts[i].name);
5202 	}
5203 
5204 	return 0;
5205 }
5206 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5207 static int __set_tracer_option(struct trace_array *tr,
5208 			       struct tracer_flags *tracer_flags,
5209 			       struct tracer_opt *opts, int neg)
5210 {
5211 	struct tracer *trace = tracer_flags->trace;
5212 	int ret = 0;
5213 
5214 	if (trace->set_flag)
5215 		ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5216 	if (ret)
5217 		return ret;
5218 
5219 	if (neg)
5220 		tracer_flags->val &= ~opts->bit;
5221 	else
5222 		tracer_flags->val |= opts->bit;
5223 	return 0;
5224 }
5225 
5226 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5227 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5228 {
5229 	struct tracer_flags *tracer_flags = tr->current_trace_flags;
5230 	struct tracer_opt *opts = NULL;
5231 	int i;
5232 
5233 	if (!tracer_flags || !tracer_flags->opts)
5234 		return 0;
5235 
5236 	for (i = 0; tracer_flags->opts[i].name; i++) {
5237 		opts = &tracer_flags->opts[i];
5238 
5239 		if (strcmp(cmp, opts->name) == 0)
5240 			return __set_tracer_option(tr, tracer_flags, opts, neg);
5241 	}
5242 
5243 	return -EINVAL;
5244 }
5245 
5246 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u64 mask,int set)5247 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
5248 {
5249 	if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
5250 		return -1;
5251 
5252 	return 0;
5253 }
5254 
set_tracer_flag(struct trace_array * tr,u64 mask,int enabled)5255 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
5256 {
5257 	switch (mask) {
5258 	case TRACE_ITER(RECORD_TGID):
5259 	case TRACE_ITER(RECORD_CMD):
5260 	case TRACE_ITER(TRACE_PRINTK):
5261 	case TRACE_ITER(COPY_MARKER):
5262 		lockdep_assert_held(&event_mutex);
5263 	}
5264 
5265 	/* do nothing if flag is already set */
5266 	if (!!(tr->trace_flags & mask) == !!enabled)
5267 		return 0;
5268 
5269 	/* Give the tracer a chance to approve the change */
5270 	if (tr->current_trace->flag_changed)
5271 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5272 			return -EINVAL;
5273 
5274 	switch (mask) {
5275 	case TRACE_ITER(TRACE_PRINTK):
5276 		if (enabled) {
5277 			update_printk_trace(tr);
5278 		} else {
5279 			/*
5280 			 * The global_trace cannot clear this.
5281 			 * It's flag only gets cleared if another instance sets it.
5282 			 */
5283 			if (printk_trace == &global_trace)
5284 				return -EINVAL;
5285 			/*
5286 			 * An instance must always have it set.
5287 			 * by default, that's the global_trace instance.
5288 			 */
5289 			if (printk_trace == tr)
5290 				update_printk_trace(&global_trace);
5291 		}
5292 		break;
5293 
5294 	case TRACE_ITER(COPY_MARKER):
5295 		update_marker_trace(tr, enabled);
5296 		/* update_marker_trace updates the tr->trace_flags */
5297 		return 0;
5298 	}
5299 
5300 	if (enabled)
5301 		tr->trace_flags |= mask;
5302 	else
5303 		tr->trace_flags &= ~mask;
5304 
5305 	switch (mask) {
5306 	case TRACE_ITER(RECORD_CMD):
5307 		trace_event_enable_cmd_record(enabled);
5308 		break;
5309 
5310 	case TRACE_ITER(RECORD_TGID):
5311 
5312 		if (trace_alloc_tgid_map() < 0) {
5313 			tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
5314 			return -ENOMEM;
5315 		}
5316 
5317 		trace_event_enable_tgid_record(enabled);
5318 		break;
5319 
5320 	case TRACE_ITER(EVENT_FORK):
5321 		trace_event_follow_fork(tr, enabled);
5322 		break;
5323 
5324 	case TRACE_ITER(FUNC_FORK):
5325 		ftrace_pid_follow_fork(tr, enabled);
5326 		break;
5327 
5328 	case TRACE_ITER(OVERWRITE):
5329 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5330 #ifdef CONFIG_TRACER_MAX_TRACE
5331 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5332 #endif
5333 		break;
5334 
5335 	case TRACE_ITER(PRINTK):
5336 		trace_printk_start_stop_comm(enabled);
5337 		trace_printk_control(enabled);
5338 		break;
5339 
5340 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
5341 	case TRACE_GRAPH_GRAPH_TIME:
5342 		ftrace_graph_graph_time_control(enabled);
5343 		break;
5344 #endif
5345 	}
5346 
5347 	return 0;
5348 }
5349 
trace_set_options(struct trace_array * tr,char * option)5350 int trace_set_options(struct trace_array *tr, char *option)
5351 {
5352 	char *cmp;
5353 	int neg = 0;
5354 	int ret;
5355 	size_t orig_len = strlen(option);
5356 	int len;
5357 
5358 	cmp = strstrip(option);
5359 
5360 	len = str_has_prefix(cmp, "no");
5361 	if (len)
5362 		neg = 1;
5363 
5364 	cmp += len;
5365 
5366 	mutex_lock(&event_mutex);
5367 	mutex_lock(&trace_types_lock);
5368 
5369 	ret = match_string(trace_options, -1, cmp);
5370 	/* If no option could be set, test the specific tracer options */
5371 	if (ret < 0)
5372 		ret = set_tracer_option(tr, cmp, neg);
5373 	else
5374 		ret = set_tracer_flag(tr, 1ULL << ret, !neg);
5375 
5376 	mutex_unlock(&trace_types_lock);
5377 	mutex_unlock(&event_mutex);
5378 
5379 	/*
5380 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5381 	 * turn it back into a space.
5382 	 */
5383 	if (orig_len > strlen(option))
5384 		option[strlen(option)] = ' ';
5385 
5386 	return ret;
5387 }
5388 
apply_trace_boot_options(void)5389 static void __init apply_trace_boot_options(void)
5390 {
5391 	char *buf = trace_boot_options_buf;
5392 	char *option;
5393 
5394 	while (true) {
5395 		option = strsep(&buf, ",");
5396 
5397 		if (!option)
5398 			break;
5399 
5400 		if (*option)
5401 			trace_set_options(&global_trace, option);
5402 
5403 		/* Put back the comma to allow this to be called again */
5404 		if (buf)
5405 			*(buf - 1) = ',';
5406 	}
5407 }
5408 
5409 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5410 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5411 			size_t cnt, loff_t *ppos)
5412 {
5413 	struct seq_file *m = filp->private_data;
5414 	struct trace_array *tr = m->private;
5415 	char buf[64];
5416 	int ret;
5417 
5418 	if (cnt >= sizeof(buf))
5419 		return -EINVAL;
5420 
5421 	if (copy_from_user(buf, ubuf, cnt))
5422 		return -EFAULT;
5423 
5424 	buf[cnt] = 0;
5425 
5426 	ret = trace_set_options(tr, buf);
5427 	if (ret < 0)
5428 		return ret;
5429 
5430 	*ppos += cnt;
5431 
5432 	return cnt;
5433 }
5434 
tracing_trace_options_open(struct inode * inode,struct file * file)5435 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5436 {
5437 	struct trace_array *tr = inode->i_private;
5438 	int ret;
5439 
5440 	ret = tracing_check_open_get_tr(tr);
5441 	if (ret)
5442 		return ret;
5443 
5444 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5445 	if (ret < 0)
5446 		trace_array_put(tr);
5447 
5448 	return ret;
5449 }
5450 
5451 static const struct file_operations tracing_iter_fops = {
5452 	.open		= tracing_trace_options_open,
5453 	.read		= seq_read,
5454 	.llseek		= seq_lseek,
5455 	.release	= tracing_single_release_tr,
5456 	.write		= tracing_trace_options_write,
5457 };
5458 
5459 static const char readme_msg[] =
5460 	"tracing mini-HOWTO:\n\n"
5461 	"By default tracefs removes all OTH file permission bits.\n"
5462 	"When mounting tracefs an optional group id can be specified\n"
5463 	"which adds the group to every directory and file in tracefs:\n\n"
5464 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5465 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5466 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5467 	" Important files:\n"
5468 	"  trace\t\t\t- The static contents of the buffer\n"
5469 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5470 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5471 	"  current_tracer\t- function and latency tracers\n"
5472 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5473 	"  error_log\t- error log for failed commands (that support it)\n"
5474 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5475 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5476 	"  trace_clock\t\t- change the clock used to order events\n"
5477 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5478 	"      global:   Synced across CPUs but slows tracing down.\n"
5479 	"     counter:   Not a clock, but just an increment\n"
5480 	"      uptime:   Jiffy counter from time of boot\n"
5481 	"        perf:   Same clock that perf events use\n"
5482 #ifdef CONFIG_X86_64
5483 	"     x86-tsc:   TSC cycle counter\n"
5484 #endif
5485 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5486 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5487 	"    absolute:   Absolute (standalone) timestamp\n"
5488 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5489 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5490 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5491 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5492 	"\t\t\t  Remove sub-buffer with rmdir\n"
5493 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5494 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5495 	"\t\t\t  option name\n"
5496 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5497 #ifdef CONFIG_DYNAMIC_FTRACE
5498 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5499 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5500 	"\t\t\t  functions\n"
5501 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5502 	"\t     modules: Can select a group via module\n"
5503 	"\t      Format: :mod:<module-name>\n"
5504 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5505 	"\t    triggers: a command to perform when function is hit\n"
5506 	"\t      Format: <function>:<trigger>[:count]\n"
5507 	"\t     trigger: traceon, traceoff\n"
5508 	"\t\t      enable_event:<system>:<event>\n"
5509 	"\t\t      disable_event:<system>:<event>\n"
5510 #ifdef CONFIG_STACKTRACE
5511 	"\t\t      stacktrace\n"
5512 #endif
5513 #ifdef CONFIG_TRACER_SNAPSHOT
5514 	"\t\t      snapshot\n"
5515 #endif
5516 	"\t\t      dump\n"
5517 	"\t\t      cpudump\n"
5518 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5519 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5520 	"\t     The first one will disable tracing every time do_fault is hit\n"
5521 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5522 	"\t       The first time do trap is hit and it disables tracing, the\n"
5523 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5524 	"\t       the counter will not decrement. It only decrements when the\n"
5525 	"\t       trigger did work\n"
5526 	"\t     To remove trigger without count:\n"
5527 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5528 	"\t     To remove trigger with a count:\n"
5529 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5530 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5531 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5532 	"\t    modules: Can select a group via module command :mod:\n"
5533 	"\t    Does not accept triggers\n"
5534 #endif /* CONFIG_DYNAMIC_FTRACE */
5535 #ifdef CONFIG_FUNCTION_TRACER
5536 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5537 	"\t\t    (function)\n"
5538 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5539 	"\t\t    (function)\n"
5540 #endif
5541 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5542 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5543 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5544 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5545 #endif
5546 #ifdef CONFIG_TRACER_SNAPSHOT
5547 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5548 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5549 	"\t\t\t  information\n"
5550 #endif
5551 #ifdef CONFIG_STACK_TRACER
5552 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5553 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5554 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5555 	"\t\t\t  new trace)\n"
5556 #ifdef CONFIG_DYNAMIC_FTRACE
5557 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5558 	"\t\t\t  traces\n"
5559 #endif
5560 #endif /* CONFIG_STACK_TRACER */
5561 #ifdef CONFIG_DYNAMIC_EVENTS
5562 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5563 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5564 #endif
5565 #ifdef CONFIG_KPROBE_EVENTS
5566 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5567 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5568 #endif
5569 #ifdef CONFIG_UPROBE_EVENTS
5570 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5571 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5572 #endif
5573 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5574     defined(CONFIG_FPROBE_EVENTS)
5575 	"\t  accepts: event-definitions (one definition per line)\n"
5576 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5577 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5578 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5579 #endif
5580 #ifdef CONFIG_FPROBE_EVENTS
5581 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5582 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5583 #endif
5584 #ifdef CONFIG_HIST_TRIGGERS
5585 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5586 #endif
5587 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5588 	"\t           -:[<group>/][<event>]\n"
5589 #ifdef CONFIG_KPROBE_EVENTS
5590 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5591   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5592 #endif
5593 #ifdef CONFIG_UPROBE_EVENTS
5594   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5595 #endif
5596 	"\t     args: <name>=fetcharg[:type]\n"
5597 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5598 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5599 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5600 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5601 	"\t           <argname>[->field[->field|.field...]],\n"
5602 #endif
5603 #else
5604 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5605 #endif
5606 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5607 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5608 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5609 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5610 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5611 #ifdef CONFIG_HIST_TRIGGERS
5612 	"\t    field: <stype> <name>;\n"
5613 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5614 	"\t           [unsigned] char/int/long\n"
5615 #endif
5616 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5617 	"\t            of the <attached-group>/<attached-event>.\n"
5618 #endif
5619 	"  set_event\t\t- Enables events by name written into it\n"
5620 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5621 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5622 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5623 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5624 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5625 	"\t\t\t  events\n"
5626 	"      filter\t\t- If set, only events passing filter are traced\n"
5627 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5628 	"\t\t\t  <event>:\n"
5629 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5630 	"      filter\t\t- If set, only events passing filter are traced\n"
5631 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5632 	"\t    Format: <trigger>[:count][if <filter>]\n"
5633 	"\t   trigger: traceon, traceoff\n"
5634 	"\t            enable_event:<system>:<event>\n"
5635 	"\t            disable_event:<system>:<event>\n"
5636 #ifdef CONFIG_HIST_TRIGGERS
5637 	"\t            enable_hist:<system>:<event>\n"
5638 	"\t            disable_hist:<system>:<event>\n"
5639 #endif
5640 #ifdef CONFIG_STACKTRACE
5641 	"\t\t    stacktrace\n"
5642 #endif
5643 #ifdef CONFIG_TRACER_SNAPSHOT
5644 	"\t\t    snapshot\n"
5645 #endif
5646 #ifdef CONFIG_HIST_TRIGGERS
5647 	"\t\t    hist (see below)\n"
5648 #endif
5649 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5650 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5651 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5652 	"\t                  events/block/block_unplug/trigger\n"
5653 	"\t   The first disables tracing every time block_unplug is hit.\n"
5654 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5655 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5656 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5657 	"\t   Like function triggers, the counter is only decremented if it\n"
5658 	"\t    enabled or disabled tracing.\n"
5659 	"\t   To remove a trigger without a count:\n"
5660 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5661 	"\t   To remove a trigger with a count:\n"
5662 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5663 	"\t   Filters can be ignored when removing a trigger.\n"
5664 #ifdef CONFIG_HIST_TRIGGERS
5665 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5666 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5667 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5668 	"\t            [:values=<field1[,field2,...]>]\n"
5669 	"\t            [:sort=<field1[,field2,...]>]\n"
5670 	"\t            [:size=#entries]\n"
5671 	"\t            [:pause][:continue][:clear]\n"
5672 	"\t            [:name=histname1]\n"
5673 	"\t            [:nohitcount]\n"
5674 	"\t            [:<handler>.<action>]\n"
5675 	"\t            [if <filter>]\n\n"
5676 	"\t    Note, special fields can be used as well:\n"
5677 	"\t            common_timestamp - to record current timestamp\n"
5678 	"\t            common_cpu - to record the CPU the event happened on\n"
5679 	"\n"
5680 	"\t    A hist trigger variable can be:\n"
5681 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5682 	"\t        - a reference to another variable e.g. y=$x,\n"
5683 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5684 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5685 	"\n"
5686 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5687 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5688 	"\t    variable reference, field or numeric literal.\n"
5689 	"\n"
5690 	"\t    When a matching event is hit, an entry is added to a hash\n"
5691 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5692 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5693 	"\t    correspond to fields in the event's format description.  Keys\n"
5694 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5695 	"\t    Compound keys consisting of up to two fields can be specified\n"
5696 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5697 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5698 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5699 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5700 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5701 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5702 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5703 	"\t    its histogram data will be shared with other triggers of the\n"
5704 	"\t    same name, and trigger hits will update this common data.\n\n"
5705 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5706 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5707 	"\t    triggers attached to an event, there will be a table for each\n"
5708 	"\t    trigger in the output.  The table displayed for a named\n"
5709 	"\t    trigger will be the same as any other instance having the\n"
5710 	"\t    same name.  The default format used to display a given field\n"
5711 	"\t    can be modified by appending any of the following modifiers\n"
5712 	"\t    to the field name, as applicable:\n\n"
5713 	"\t            .hex        display a number as a hex value\n"
5714 	"\t            .sym        display an address as a symbol\n"
5715 	"\t            .sym-offset display an address as a symbol and offset\n"
5716 	"\t            .execname   display a common_pid as a program name\n"
5717 	"\t            .syscall    display a syscall id as a syscall name\n"
5718 	"\t            .log2       display log2 value rather than raw number\n"
5719 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5720 	"\t            .usecs      display a common_timestamp in microseconds\n"
5721 	"\t            .percent    display a number of percentage value\n"
5722 	"\t            .graph      display a bar-graph of a value\n\n"
5723 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5724 	"\t    trigger or to start a hist trigger but not log any events\n"
5725 	"\t    until told to do so.  'continue' can be used to start or\n"
5726 	"\t    restart a paused hist trigger.\n\n"
5727 	"\t    The 'clear' parameter will clear the contents of a running\n"
5728 	"\t    hist trigger and leave its current paused/active state\n"
5729 	"\t    unchanged.\n\n"
5730 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5731 	"\t    raw hitcount in the histogram.\n\n"
5732 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5733 	"\t    have one event conditionally start and stop another event's\n"
5734 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5735 	"\t    the enable_event and disable_event triggers.\n\n"
5736 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5737 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5738 	"\t        <handler>.<action>\n\n"
5739 	"\t    The available handlers are:\n\n"
5740 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5741 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5742 	"\t        onchange(var)            - invoke action if var changes\n\n"
5743 	"\t    The available actions are:\n\n"
5744 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5745 	"\t        save(field,...)                      - save current event fields\n"
5746 #ifdef CONFIG_TRACER_SNAPSHOT
5747 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5748 #endif
5749 #ifdef CONFIG_SYNTH_EVENTS
5750 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5751 	"\t  Write into this file to define/undefine new synthetic events.\n"
5752 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5753 #endif
5754 #endif
5755 ;
5756 
5757 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5758 tracing_readme_read(struct file *filp, char __user *ubuf,
5759 		       size_t cnt, loff_t *ppos)
5760 {
5761 	return simple_read_from_buffer(ubuf, cnt, ppos,
5762 					readme_msg, strlen(readme_msg));
5763 }
5764 
5765 static const struct file_operations tracing_readme_fops = {
5766 	.open		= tracing_open_generic,
5767 	.read		= tracing_readme_read,
5768 	.llseek		= generic_file_llseek,
5769 };
5770 
5771 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5772 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5773 update_eval_map(union trace_eval_map_item *ptr)
5774 {
5775 	if (!ptr->map.eval_string) {
5776 		if (ptr->tail.next) {
5777 			ptr = ptr->tail.next;
5778 			/* Set ptr to the next real item (skip head) */
5779 			ptr++;
5780 		} else
5781 			return NULL;
5782 	}
5783 	return ptr;
5784 }
5785 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5786 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5787 {
5788 	union trace_eval_map_item *ptr = v;
5789 
5790 	/*
5791 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5792 	 * This really should never happen.
5793 	 */
5794 	(*pos)++;
5795 	ptr = update_eval_map(ptr);
5796 	if (WARN_ON_ONCE(!ptr))
5797 		return NULL;
5798 
5799 	ptr++;
5800 	ptr = update_eval_map(ptr);
5801 
5802 	return ptr;
5803 }
5804 
eval_map_start(struct seq_file * m,loff_t * pos)5805 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5806 {
5807 	union trace_eval_map_item *v;
5808 	loff_t l = 0;
5809 
5810 	mutex_lock(&trace_eval_mutex);
5811 
5812 	v = trace_eval_maps;
5813 	if (v)
5814 		v++;
5815 
5816 	while (v && l < *pos) {
5817 		v = eval_map_next(m, v, &l);
5818 	}
5819 
5820 	return v;
5821 }
5822 
eval_map_stop(struct seq_file * m,void * v)5823 static void eval_map_stop(struct seq_file *m, void *v)
5824 {
5825 	mutex_unlock(&trace_eval_mutex);
5826 }
5827 
eval_map_show(struct seq_file * m,void * v)5828 static int eval_map_show(struct seq_file *m, void *v)
5829 {
5830 	union trace_eval_map_item *ptr = v;
5831 
5832 	seq_printf(m, "%s %ld (%s)\n",
5833 		   ptr->map.eval_string, ptr->map.eval_value,
5834 		   ptr->map.system);
5835 
5836 	return 0;
5837 }
5838 
5839 static const struct seq_operations tracing_eval_map_seq_ops = {
5840 	.start		= eval_map_start,
5841 	.next		= eval_map_next,
5842 	.stop		= eval_map_stop,
5843 	.show		= eval_map_show,
5844 };
5845 
tracing_eval_map_open(struct inode * inode,struct file * filp)5846 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5847 {
5848 	int ret;
5849 
5850 	ret = tracing_check_open_get_tr(NULL);
5851 	if (ret)
5852 		return ret;
5853 
5854 	return seq_open(filp, &tracing_eval_map_seq_ops);
5855 }
5856 
5857 static const struct file_operations tracing_eval_map_fops = {
5858 	.open		= tracing_eval_map_open,
5859 	.read		= seq_read,
5860 	.llseek		= seq_lseek,
5861 	.release	= seq_release,
5862 };
5863 
5864 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5865 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5866 {
5867 	/* Return tail of array given the head */
5868 	return ptr + ptr->head.length + 1;
5869 }
5870 
5871 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5872 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5873 			   int len)
5874 {
5875 	struct trace_eval_map **stop;
5876 	struct trace_eval_map **map;
5877 	union trace_eval_map_item *map_array;
5878 	union trace_eval_map_item *ptr;
5879 
5880 	stop = start + len;
5881 
5882 	/*
5883 	 * The trace_eval_maps contains the map plus a head and tail item,
5884 	 * where the head holds the module and length of array, and the
5885 	 * tail holds a pointer to the next list.
5886 	 */
5887 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5888 	if (!map_array) {
5889 		pr_warn("Unable to allocate trace eval mapping\n");
5890 		return;
5891 	}
5892 
5893 	guard(mutex)(&trace_eval_mutex);
5894 
5895 	if (!trace_eval_maps)
5896 		trace_eval_maps = map_array;
5897 	else {
5898 		ptr = trace_eval_maps;
5899 		for (;;) {
5900 			ptr = trace_eval_jmp_to_tail(ptr);
5901 			if (!ptr->tail.next)
5902 				break;
5903 			ptr = ptr->tail.next;
5904 
5905 		}
5906 		ptr->tail.next = map_array;
5907 	}
5908 	map_array->head.mod = mod;
5909 	map_array->head.length = len;
5910 	map_array++;
5911 
5912 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5913 		map_array->map = **map;
5914 		map_array++;
5915 	}
5916 	memset(map_array, 0, sizeof(*map_array));
5917 }
5918 
trace_create_eval_file(struct dentry * d_tracer)5919 static void trace_create_eval_file(struct dentry *d_tracer)
5920 {
5921 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5922 			  NULL, &tracing_eval_map_fops);
5923 }
5924 
5925 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5926 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5927 static inline void trace_insert_eval_map_file(struct module *mod,
5928 			      struct trace_eval_map **start, int len) { }
5929 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5930 
5931 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5932 trace_event_update_with_eval_map(struct module *mod,
5933 				 struct trace_eval_map **start,
5934 				 int len)
5935 {
5936 	struct trace_eval_map **map;
5937 
5938 	/* Always run sanitizer only if btf_type_tag attr exists. */
5939 	if (len <= 0) {
5940 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5941 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5942 		      __has_attribute(btf_type_tag)))
5943 			return;
5944 	}
5945 
5946 	map = start;
5947 
5948 	trace_event_update_all(map, len);
5949 
5950 	if (len <= 0)
5951 		return;
5952 
5953 	trace_insert_eval_map_file(mod, start, len);
5954 }
5955 
5956 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5957 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5958 		       size_t cnt, loff_t *ppos)
5959 {
5960 	struct trace_array *tr = filp->private_data;
5961 	char buf[MAX_TRACER_SIZE+2];
5962 	int r;
5963 
5964 	scoped_guard(mutex, &trace_types_lock) {
5965 		r = sprintf(buf, "%s\n", tr->current_trace->name);
5966 	}
5967 
5968 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5969 }
5970 
tracer_init(struct tracer * t,struct trace_array * tr)5971 int tracer_init(struct tracer *t, struct trace_array *tr)
5972 {
5973 	tracing_reset_online_cpus(&tr->array_buffer);
5974 	return t->init(tr);
5975 }
5976 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5977 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5978 {
5979 	int cpu;
5980 
5981 	for_each_tracing_cpu(cpu)
5982 		per_cpu_ptr(buf->data, cpu)->entries = val;
5983 }
5984 
update_buffer_entries(struct array_buffer * buf,int cpu)5985 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5986 {
5987 	if (cpu == RING_BUFFER_ALL_CPUS) {
5988 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5989 	} else {
5990 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5991 	}
5992 }
5993 
5994 #ifdef CONFIG_TRACER_MAX_TRACE
5995 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5996 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5997 					struct array_buffer *size_buf, int cpu_id)
5998 {
5999 	int cpu, ret = 0;
6000 
6001 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6002 		for_each_tracing_cpu(cpu) {
6003 			ret = ring_buffer_resize(trace_buf->buffer,
6004 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6005 			if (ret < 0)
6006 				break;
6007 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6008 				per_cpu_ptr(size_buf->data, cpu)->entries;
6009 		}
6010 	} else {
6011 		ret = ring_buffer_resize(trace_buf->buffer,
6012 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6013 		if (ret == 0)
6014 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6015 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6016 	}
6017 
6018 	return ret;
6019 }
6020 #endif /* CONFIG_TRACER_MAX_TRACE */
6021 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6022 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6023 					unsigned long size, int cpu)
6024 {
6025 	int ret;
6026 
6027 	/*
6028 	 * If kernel or user changes the size of the ring buffer
6029 	 * we use the size that was given, and we can forget about
6030 	 * expanding it later.
6031 	 */
6032 	trace_set_ring_buffer_expanded(tr);
6033 
6034 	/* May be called before buffers are initialized */
6035 	if (!tr->array_buffer.buffer)
6036 		return 0;
6037 
6038 	/* Do not allow tracing while resizing ring buffer */
6039 	tracing_stop_tr(tr);
6040 
6041 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6042 	if (ret < 0)
6043 		goto out_start;
6044 
6045 #ifdef CONFIG_TRACER_MAX_TRACE
6046 	if (!tr->allocated_snapshot)
6047 		goto out;
6048 
6049 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6050 	if (ret < 0) {
6051 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6052 						     &tr->array_buffer, cpu);
6053 		if (r < 0) {
6054 			/*
6055 			 * AARGH! We are left with different
6056 			 * size max buffer!!!!
6057 			 * The max buffer is our "snapshot" buffer.
6058 			 * When a tracer needs a snapshot (one of the
6059 			 * latency tracers), it swaps the max buffer
6060 			 * with the saved snap shot. We succeeded to
6061 			 * update the size of the main buffer, but failed to
6062 			 * update the size of the max buffer. But when we tried
6063 			 * to reset the main buffer to the original size, we
6064 			 * failed there too. This is very unlikely to
6065 			 * happen, but if it does, warn and kill all
6066 			 * tracing.
6067 			 */
6068 			WARN_ON(1);
6069 			tracing_disabled = 1;
6070 		}
6071 		goto out_start;
6072 	}
6073 
6074 	update_buffer_entries(&tr->max_buffer, cpu);
6075 
6076  out:
6077 #endif /* CONFIG_TRACER_MAX_TRACE */
6078 
6079 	update_buffer_entries(&tr->array_buffer, cpu);
6080  out_start:
6081 	tracing_start_tr(tr);
6082 	return ret;
6083 }
6084 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6085 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6086 				  unsigned long size, int cpu_id)
6087 {
6088 	guard(mutex)(&trace_types_lock);
6089 
6090 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6091 		/* make sure, this cpu is enabled in the mask */
6092 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6093 			return -EINVAL;
6094 	}
6095 
6096 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6097 }
6098 
6099 struct trace_mod_entry {
6100 	unsigned long	mod_addr;
6101 	char		mod_name[MODULE_NAME_LEN];
6102 };
6103 
6104 struct trace_scratch {
6105 	unsigned int		clock_id;
6106 	unsigned long		text_addr;
6107 	unsigned long		nr_entries;
6108 	struct trace_mod_entry	entries[];
6109 };
6110 
6111 static DEFINE_MUTEX(scratch_mutex);
6112 
cmp_mod_entry(const void * key,const void * pivot)6113 static int cmp_mod_entry(const void *key, const void *pivot)
6114 {
6115 	unsigned long addr = (unsigned long)key;
6116 	const struct trace_mod_entry *ent = pivot;
6117 
6118 	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6119 		return 0;
6120 	else
6121 		return addr - ent->mod_addr;
6122 }
6123 
6124 /**
6125  * trace_adjust_address() - Adjust prev boot address to current address.
6126  * @tr: Persistent ring buffer's trace_array.
6127  * @addr: Address in @tr which is adjusted.
6128  */
trace_adjust_address(struct trace_array * tr,unsigned long addr)6129 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6130 {
6131 	struct trace_module_delta *module_delta;
6132 	struct trace_scratch *tscratch;
6133 	struct trace_mod_entry *entry;
6134 	unsigned long raddr;
6135 	int idx = 0, nr_entries;
6136 
6137 	/* If we don't have last boot delta, return the address */
6138 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6139 		return addr;
6140 
6141 	/* tr->module_delta must be protected by rcu. */
6142 	guard(rcu)();
6143 	tscratch = tr->scratch;
6144 	/* if there is no tscrach, module_delta must be NULL. */
6145 	module_delta = READ_ONCE(tr->module_delta);
6146 	if (!module_delta || !tscratch->nr_entries ||
6147 	    tscratch->entries[0].mod_addr > addr) {
6148 		raddr = addr + tr->text_delta;
6149 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6150 			is_kernel_rodata(raddr) ? raddr : addr;
6151 	}
6152 
6153 	/* Note that entries must be sorted. */
6154 	nr_entries = tscratch->nr_entries;
6155 	if (nr_entries == 1 ||
6156 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6157 		idx = nr_entries - 1;
6158 	else {
6159 		entry = __inline_bsearch((void *)addr,
6160 				tscratch->entries,
6161 				nr_entries - 1,
6162 				sizeof(tscratch->entries[0]),
6163 				cmp_mod_entry);
6164 		if (entry)
6165 			idx = entry - tscratch->entries;
6166 	}
6167 
6168 	return addr + module_delta->delta[idx];
6169 }
6170 
6171 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)6172 static int save_mod(struct module *mod, void *data)
6173 {
6174 	struct trace_array *tr = data;
6175 	struct trace_scratch *tscratch;
6176 	struct trace_mod_entry *entry;
6177 	unsigned int size;
6178 
6179 	tscratch = tr->scratch;
6180 	if (!tscratch)
6181 		return -1;
6182 	size = tr->scratch_size;
6183 
6184 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6185 		return -1;
6186 
6187 	entry = &tscratch->entries[tscratch->nr_entries];
6188 
6189 	tscratch->nr_entries++;
6190 
6191 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6192 	strscpy(entry->mod_name, mod->name);
6193 
6194 	return 0;
6195 }
6196 #else
save_mod(struct module * mod,void * data)6197 static int save_mod(struct module *mod, void *data)
6198 {
6199 	return 0;
6200 }
6201 #endif
6202 
update_last_data(struct trace_array * tr)6203 static void update_last_data(struct trace_array *tr)
6204 {
6205 	struct trace_module_delta *module_delta;
6206 	struct trace_scratch *tscratch;
6207 
6208 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6209 		return;
6210 
6211 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6212 		return;
6213 
6214 	/* Only if the buffer has previous boot data clear and update it. */
6215 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6216 
6217 	/* Reset the module list and reload them */
6218 	if (tr->scratch) {
6219 		struct trace_scratch *tscratch = tr->scratch;
6220 
6221 		tscratch->clock_id = tr->clock_id;
6222 		memset(tscratch->entries, 0,
6223 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6224 		tscratch->nr_entries = 0;
6225 
6226 		guard(mutex)(&scratch_mutex);
6227 		module_for_each_mod(save_mod, tr);
6228 	}
6229 
6230 	/*
6231 	 * Need to clear all CPU buffers as there cannot be events
6232 	 * from the previous boot mixed with events with this boot
6233 	 * as that will cause a confusing trace. Need to clear all
6234 	 * CPU buffers, even for those that may currently be offline.
6235 	 */
6236 	tracing_reset_all_cpus(&tr->array_buffer);
6237 
6238 	/* Using current data now */
6239 	tr->text_delta = 0;
6240 
6241 	if (!tr->scratch)
6242 		return;
6243 
6244 	tscratch = tr->scratch;
6245 	module_delta = READ_ONCE(tr->module_delta);
6246 	WRITE_ONCE(tr->module_delta, NULL);
6247 	kfree_rcu(module_delta, rcu);
6248 
6249 	/* Set the persistent ring buffer meta data to this address */
6250 	tscratch->text_addr = (unsigned long)_text;
6251 }
6252 
6253 /**
6254  * tracing_update_buffers - used by tracing facility to expand ring buffers
6255  * @tr: The tracing instance
6256  *
6257  * To save on memory when the tracing is never used on a system with it
6258  * configured in. The ring buffers are set to a minimum size. But once
6259  * a user starts to use the tracing facility, then they need to grow
6260  * to their default size.
6261  *
6262  * This function is to be called when a tracer is about to be used.
6263  */
tracing_update_buffers(struct trace_array * tr)6264 int tracing_update_buffers(struct trace_array *tr)
6265 {
6266 	int ret = 0;
6267 
6268 	guard(mutex)(&trace_types_lock);
6269 
6270 	update_last_data(tr);
6271 
6272 	if (!tr->ring_buffer_expanded)
6273 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6274 						RING_BUFFER_ALL_CPUS);
6275 	return ret;
6276 }
6277 
6278 /*
6279  * Used to clear out the tracer before deletion of an instance.
6280  * Must have trace_types_lock held.
6281  */
tracing_set_nop(struct trace_array * tr)6282 static void tracing_set_nop(struct trace_array *tr)
6283 {
6284 	if (tr->current_trace == &nop_trace)
6285 		return;
6286 
6287 	tr->current_trace->enabled--;
6288 
6289 	if (tr->current_trace->reset)
6290 		tr->current_trace->reset(tr);
6291 
6292 	tr->current_trace = &nop_trace;
6293 	tr->current_trace_flags = nop_trace.flags;
6294 }
6295 
6296 static bool tracer_options_updated;
6297 
tracing_set_tracer(struct trace_array * tr,const char * buf)6298 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6299 {
6300 	struct tracer *trace = NULL;
6301 	struct tracers *t;
6302 #ifdef CONFIG_TRACER_MAX_TRACE
6303 	bool had_max_tr;
6304 #endif
6305 	int ret;
6306 
6307 	guard(mutex)(&trace_types_lock);
6308 
6309 	update_last_data(tr);
6310 
6311 	if (!tr->ring_buffer_expanded) {
6312 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6313 						RING_BUFFER_ALL_CPUS);
6314 		if (ret < 0)
6315 			return ret;
6316 		ret = 0;
6317 	}
6318 
6319 	list_for_each_entry(t, &tr->tracers, list) {
6320 		if (strcmp(t->tracer->name, buf) == 0) {
6321 			trace = t->tracer;
6322 			break;
6323 		}
6324 	}
6325 	if (!trace)
6326 		return -EINVAL;
6327 
6328 	if (trace == tr->current_trace)
6329 		return 0;
6330 
6331 #ifdef CONFIG_TRACER_SNAPSHOT
6332 	if (trace->use_max_tr) {
6333 		local_irq_disable();
6334 		arch_spin_lock(&tr->max_lock);
6335 		ret = tr->cond_snapshot ? -EBUSY : 0;
6336 		arch_spin_unlock(&tr->max_lock);
6337 		local_irq_enable();
6338 		if (ret)
6339 			return ret;
6340 	}
6341 #endif
6342 	/* Some tracers won't work on kernel command line */
6343 	if (system_state < SYSTEM_RUNNING && trace->noboot) {
6344 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6345 			trace->name);
6346 		return -EINVAL;
6347 	}
6348 
6349 	/* Some tracers are only allowed for the top level buffer */
6350 	if (!trace_ok_for_array(trace, tr))
6351 		return -EINVAL;
6352 
6353 	/* If trace pipe files are being read, we can't change the tracer */
6354 	if (tr->trace_ref)
6355 		return -EBUSY;
6356 
6357 	trace_branch_disable();
6358 
6359 	tr->current_trace->enabled--;
6360 
6361 	if (tr->current_trace->reset)
6362 		tr->current_trace->reset(tr);
6363 
6364 #ifdef CONFIG_TRACER_MAX_TRACE
6365 	had_max_tr = tr->current_trace->use_max_tr;
6366 
6367 	/* Current trace needs to be nop_trace before synchronize_rcu */
6368 	tr->current_trace = &nop_trace;
6369 	tr->current_trace_flags = nop_trace.flags;
6370 
6371 	if (had_max_tr && !trace->use_max_tr) {
6372 		/*
6373 		 * We need to make sure that the update_max_tr sees that
6374 		 * current_trace changed to nop_trace to keep it from
6375 		 * swapping the buffers after we resize it.
6376 		 * The update_max_tr is called from interrupts disabled
6377 		 * so a synchronized_sched() is sufficient.
6378 		 */
6379 		synchronize_rcu();
6380 		free_snapshot(tr);
6381 		tracing_disarm_snapshot(tr);
6382 	}
6383 
6384 	if (!had_max_tr && trace->use_max_tr) {
6385 		ret = tracing_arm_snapshot_locked(tr);
6386 		if (ret)
6387 			return ret;
6388 	}
6389 #else
6390 	tr->current_trace = &nop_trace;
6391 #endif
6392 
6393 	tr->current_trace_flags = t->flags ? : t->tracer->flags;
6394 
6395 	if (trace->init) {
6396 		ret = tracer_init(trace, tr);
6397 		if (ret) {
6398 #ifdef CONFIG_TRACER_MAX_TRACE
6399 			if (trace->use_max_tr)
6400 				tracing_disarm_snapshot(tr);
6401 #endif
6402 			tr->current_trace_flags = nop_trace.flags;
6403 			return ret;
6404 		}
6405 	}
6406 
6407 	tr->current_trace = trace;
6408 	tr->current_trace->enabled++;
6409 	trace_branch_enable(tr);
6410 
6411 	return 0;
6412 }
6413 
6414 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6415 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6416 			size_t cnt, loff_t *ppos)
6417 {
6418 	struct trace_array *tr = filp->private_data;
6419 	char buf[MAX_TRACER_SIZE+1];
6420 	char *name;
6421 	size_t ret;
6422 	int err;
6423 
6424 	ret = cnt;
6425 
6426 	if (cnt > MAX_TRACER_SIZE)
6427 		cnt = MAX_TRACER_SIZE;
6428 
6429 	if (copy_from_user(buf, ubuf, cnt))
6430 		return -EFAULT;
6431 
6432 	buf[cnt] = 0;
6433 
6434 	name = strim(buf);
6435 
6436 	err = tracing_set_tracer(tr, name);
6437 	if (err)
6438 		return err;
6439 
6440 	*ppos += ret;
6441 
6442 	return ret;
6443 }
6444 
6445 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6446 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6447 		   size_t cnt, loff_t *ppos)
6448 {
6449 	char buf[64];
6450 	int r;
6451 
6452 	r = snprintf(buf, sizeof(buf), "%ld\n",
6453 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6454 	if (r > sizeof(buf))
6455 		r = sizeof(buf);
6456 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6457 }
6458 
6459 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6460 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6461 		    size_t cnt, loff_t *ppos)
6462 {
6463 	unsigned long val;
6464 	int ret;
6465 
6466 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6467 	if (ret)
6468 		return ret;
6469 
6470 	*ptr = val * 1000;
6471 
6472 	return cnt;
6473 }
6474 
6475 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6476 tracing_thresh_read(struct file *filp, char __user *ubuf,
6477 		    size_t cnt, loff_t *ppos)
6478 {
6479 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6480 }
6481 
6482 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6483 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6484 		     size_t cnt, loff_t *ppos)
6485 {
6486 	struct trace_array *tr = filp->private_data;
6487 	int ret;
6488 
6489 	guard(mutex)(&trace_types_lock);
6490 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6491 	if (ret < 0)
6492 		return ret;
6493 
6494 	if (tr->current_trace->update_thresh) {
6495 		ret = tr->current_trace->update_thresh(tr);
6496 		if (ret < 0)
6497 			return ret;
6498 	}
6499 
6500 	return cnt;
6501 }
6502 
6503 #ifdef CONFIG_TRACER_MAX_TRACE
6504 
6505 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6506 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6507 		     size_t cnt, loff_t *ppos)
6508 {
6509 	struct trace_array *tr = filp->private_data;
6510 
6511 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6512 }
6513 
6514 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6515 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6516 		      size_t cnt, loff_t *ppos)
6517 {
6518 	struct trace_array *tr = filp->private_data;
6519 
6520 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6521 }
6522 
6523 #endif
6524 
open_pipe_on_cpu(struct trace_array * tr,int cpu)6525 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6526 {
6527 	if (cpu == RING_BUFFER_ALL_CPUS) {
6528 		if (cpumask_empty(tr->pipe_cpumask)) {
6529 			cpumask_setall(tr->pipe_cpumask);
6530 			return 0;
6531 		}
6532 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6533 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6534 		return 0;
6535 	}
6536 	return -EBUSY;
6537 }
6538 
close_pipe_on_cpu(struct trace_array * tr,int cpu)6539 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6540 {
6541 	if (cpu == RING_BUFFER_ALL_CPUS) {
6542 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6543 		cpumask_clear(tr->pipe_cpumask);
6544 	} else {
6545 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6546 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6547 	}
6548 }
6549 
tracing_open_pipe(struct inode * inode,struct file * filp)6550 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6551 {
6552 	struct trace_array *tr = inode->i_private;
6553 	struct trace_iterator *iter;
6554 	int cpu;
6555 	int ret;
6556 
6557 	ret = tracing_check_open_get_tr(tr);
6558 	if (ret)
6559 		return ret;
6560 
6561 	guard(mutex)(&trace_types_lock);
6562 	cpu = tracing_get_cpu(inode);
6563 	ret = open_pipe_on_cpu(tr, cpu);
6564 	if (ret)
6565 		goto fail_pipe_on_cpu;
6566 
6567 	/* create a buffer to store the information to pass to userspace */
6568 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6569 	if (!iter) {
6570 		ret = -ENOMEM;
6571 		goto fail_alloc_iter;
6572 	}
6573 
6574 	trace_seq_init(&iter->seq);
6575 	iter->trace = tr->current_trace;
6576 
6577 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6578 		ret = -ENOMEM;
6579 		goto fail;
6580 	}
6581 
6582 	/* trace pipe does not show start of buffer */
6583 	cpumask_setall(iter->started);
6584 
6585 	if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
6586 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6587 
6588 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6589 	if (trace_clocks[tr->clock_id].in_ns)
6590 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6591 
6592 	iter->tr = tr;
6593 	iter->array_buffer = &tr->array_buffer;
6594 	iter->cpu_file = cpu;
6595 	mutex_init(&iter->mutex);
6596 	filp->private_data = iter;
6597 
6598 	if (iter->trace->pipe_open)
6599 		iter->trace->pipe_open(iter);
6600 
6601 	nonseekable_open(inode, filp);
6602 
6603 	tr->trace_ref++;
6604 
6605 	return ret;
6606 
6607 fail:
6608 	kfree(iter);
6609 fail_alloc_iter:
6610 	close_pipe_on_cpu(tr, cpu);
6611 fail_pipe_on_cpu:
6612 	__trace_array_put(tr);
6613 	return ret;
6614 }
6615 
tracing_release_pipe(struct inode * inode,struct file * file)6616 static int tracing_release_pipe(struct inode *inode, struct file *file)
6617 {
6618 	struct trace_iterator *iter = file->private_data;
6619 	struct trace_array *tr = inode->i_private;
6620 
6621 	scoped_guard(mutex, &trace_types_lock) {
6622 		tr->trace_ref--;
6623 
6624 		if (iter->trace->pipe_close)
6625 			iter->trace->pipe_close(iter);
6626 		close_pipe_on_cpu(tr, iter->cpu_file);
6627 	}
6628 
6629 	free_trace_iter_content(iter);
6630 	kfree(iter);
6631 
6632 	trace_array_put(tr);
6633 
6634 	return 0;
6635 }
6636 
6637 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6638 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6639 {
6640 	struct trace_array *tr = iter->tr;
6641 
6642 	/* Iterators are static, they should be filled or empty */
6643 	if (trace_buffer_iter(iter, iter->cpu_file))
6644 		return EPOLLIN | EPOLLRDNORM;
6645 
6646 	if (tr->trace_flags & TRACE_ITER(BLOCK))
6647 		/*
6648 		 * Always select as readable when in blocking mode
6649 		 */
6650 		return EPOLLIN | EPOLLRDNORM;
6651 	else
6652 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6653 					     filp, poll_table, iter->tr->buffer_percent);
6654 }
6655 
6656 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6657 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6658 {
6659 	struct trace_iterator *iter = filp->private_data;
6660 
6661 	return trace_poll(iter, filp, poll_table);
6662 }
6663 
6664 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6665 static int tracing_wait_pipe(struct file *filp)
6666 {
6667 	struct trace_iterator *iter = filp->private_data;
6668 	int ret;
6669 
6670 	while (trace_empty(iter)) {
6671 
6672 		if ((filp->f_flags & O_NONBLOCK)) {
6673 			return -EAGAIN;
6674 		}
6675 
6676 		/*
6677 		 * We block until we read something and tracing is disabled.
6678 		 * We still block if tracing is disabled, but we have never
6679 		 * read anything. This allows a user to cat this file, and
6680 		 * then enable tracing. But after we have read something,
6681 		 * we give an EOF when tracing is again disabled.
6682 		 *
6683 		 * iter->pos will be 0 if we haven't read anything.
6684 		 */
6685 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6686 			break;
6687 
6688 		mutex_unlock(&iter->mutex);
6689 
6690 		ret = wait_on_pipe(iter, 0);
6691 
6692 		mutex_lock(&iter->mutex);
6693 
6694 		if (ret)
6695 			return ret;
6696 	}
6697 
6698 	return 1;
6699 }
6700 
update_last_data_if_empty(struct trace_array * tr)6701 static bool update_last_data_if_empty(struct trace_array *tr)
6702 {
6703 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6704 		return false;
6705 
6706 	if (!ring_buffer_empty(tr->array_buffer.buffer))
6707 		return false;
6708 
6709 	/*
6710 	 * If the buffer contains the last boot data and all per-cpu
6711 	 * buffers are empty, reset it from the kernel side.
6712 	 */
6713 	update_last_data(tr);
6714 	return true;
6715 }
6716 
6717 /*
6718  * Consumer reader.
6719  */
6720 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6721 tracing_read_pipe(struct file *filp, char __user *ubuf,
6722 		  size_t cnt, loff_t *ppos)
6723 {
6724 	struct trace_iterator *iter = filp->private_data;
6725 	ssize_t sret;
6726 
6727 	/*
6728 	 * Avoid more than one consumer on a single file descriptor
6729 	 * This is just a matter of traces coherency, the ring buffer itself
6730 	 * is protected.
6731 	 */
6732 	guard(mutex)(&iter->mutex);
6733 
6734 	/* return any leftover data */
6735 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6736 	if (sret != -EBUSY)
6737 		return sret;
6738 
6739 	trace_seq_init(&iter->seq);
6740 
6741 	if (iter->trace->read) {
6742 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6743 		if (sret)
6744 			return sret;
6745 	}
6746 
6747 waitagain:
6748 	if (update_last_data_if_empty(iter->tr))
6749 		return 0;
6750 
6751 	sret = tracing_wait_pipe(filp);
6752 	if (sret <= 0)
6753 		return sret;
6754 
6755 	/* stop when tracing is finished */
6756 	if (trace_empty(iter))
6757 		return 0;
6758 
6759 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6760 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6761 
6762 	/* reset all but tr, trace, and overruns */
6763 	trace_iterator_reset(iter);
6764 	cpumask_clear(iter->started);
6765 	trace_seq_init(&iter->seq);
6766 
6767 	trace_event_read_lock();
6768 	trace_access_lock(iter->cpu_file);
6769 	while (trace_find_next_entry_inc(iter) != NULL) {
6770 		enum print_line_t ret;
6771 		int save_len = iter->seq.seq.len;
6772 
6773 		ret = print_trace_line(iter);
6774 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6775 			/*
6776 			 * If one print_trace_line() fills entire trace_seq in one shot,
6777 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6778 			 * In this case, we need to consume it, otherwise, loop will peek
6779 			 * this event next time, resulting in an infinite loop.
6780 			 */
6781 			if (save_len == 0) {
6782 				iter->seq.full = 0;
6783 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6784 				trace_consume(iter);
6785 				break;
6786 			}
6787 
6788 			/* In other cases, don't print partial lines */
6789 			iter->seq.seq.len = save_len;
6790 			break;
6791 		}
6792 		if (ret != TRACE_TYPE_NO_CONSUME)
6793 			trace_consume(iter);
6794 
6795 		if (trace_seq_used(&iter->seq) >= cnt)
6796 			break;
6797 
6798 		/*
6799 		 * Setting the full flag means we reached the trace_seq buffer
6800 		 * size and we should leave by partial output condition above.
6801 		 * One of the trace_seq_* functions is not used properly.
6802 		 */
6803 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6804 			  iter->ent->type);
6805 	}
6806 	trace_access_unlock(iter->cpu_file);
6807 	trace_event_read_unlock();
6808 
6809 	/* Now copy what we have to the user */
6810 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6811 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6812 		trace_seq_init(&iter->seq);
6813 
6814 	/*
6815 	 * If there was nothing to send to user, in spite of consuming trace
6816 	 * entries, go back to wait for more entries.
6817 	 */
6818 	if (sret == -EBUSY)
6819 		goto waitagain;
6820 
6821 	return sret;
6822 }
6823 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6824 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6825 				     unsigned int idx)
6826 {
6827 	__free_page(spd->pages[idx]);
6828 }
6829 
6830 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6831 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6832 {
6833 	size_t count;
6834 	int save_len;
6835 	int ret;
6836 
6837 	/* Seq buffer is page-sized, exactly what we need. */
6838 	for (;;) {
6839 		save_len = iter->seq.seq.len;
6840 		ret = print_trace_line(iter);
6841 
6842 		if (trace_seq_has_overflowed(&iter->seq)) {
6843 			iter->seq.seq.len = save_len;
6844 			break;
6845 		}
6846 
6847 		/*
6848 		 * This should not be hit, because it should only
6849 		 * be set if the iter->seq overflowed. But check it
6850 		 * anyway to be safe.
6851 		 */
6852 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6853 			iter->seq.seq.len = save_len;
6854 			break;
6855 		}
6856 
6857 		count = trace_seq_used(&iter->seq) - save_len;
6858 		if (rem < count) {
6859 			rem = 0;
6860 			iter->seq.seq.len = save_len;
6861 			break;
6862 		}
6863 
6864 		if (ret != TRACE_TYPE_NO_CONSUME)
6865 			trace_consume(iter);
6866 		rem -= count;
6867 		if (!trace_find_next_entry_inc(iter))	{
6868 			rem = 0;
6869 			iter->ent = NULL;
6870 			break;
6871 		}
6872 	}
6873 
6874 	return rem;
6875 }
6876 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6877 static ssize_t tracing_splice_read_pipe(struct file *filp,
6878 					loff_t *ppos,
6879 					struct pipe_inode_info *pipe,
6880 					size_t len,
6881 					unsigned int flags)
6882 {
6883 	struct page *pages_def[PIPE_DEF_BUFFERS];
6884 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6885 	struct trace_iterator *iter = filp->private_data;
6886 	struct splice_pipe_desc spd = {
6887 		.pages		= pages_def,
6888 		.partial	= partial_def,
6889 		.nr_pages	= 0, /* This gets updated below. */
6890 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6891 		.ops		= &default_pipe_buf_ops,
6892 		.spd_release	= tracing_spd_release_pipe,
6893 	};
6894 	ssize_t ret;
6895 	size_t rem;
6896 	unsigned int i;
6897 
6898 	if (splice_grow_spd(pipe, &spd))
6899 		return -ENOMEM;
6900 
6901 	mutex_lock(&iter->mutex);
6902 
6903 	if (iter->trace->splice_read) {
6904 		ret = iter->trace->splice_read(iter, filp,
6905 					       ppos, pipe, len, flags);
6906 		if (ret)
6907 			goto out_err;
6908 	}
6909 
6910 	ret = tracing_wait_pipe(filp);
6911 	if (ret <= 0)
6912 		goto out_err;
6913 
6914 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6915 		ret = -EFAULT;
6916 		goto out_err;
6917 	}
6918 
6919 	trace_event_read_lock();
6920 	trace_access_lock(iter->cpu_file);
6921 
6922 	/* Fill as many pages as possible. */
6923 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6924 		spd.pages[i] = alloc_page(GFP_KERNEL);
6925 		if (!spd.pages[i])
6926 			break;
6927 
6928 		rem = tracing_fill_pipe_page(rem, iter);
6929 
6930 		/* Copy the data into the page, so we can start over. */
6931 		ret = trace_seq_to_buffer(&iter->seq,
6932 					  page_address(spd.pages[i]),
6933 					  min((size_t)trace_seq_used(&iter->seq),
6934 						  (size_t)PAGE_SIZE));
6935 		if (ret < 0) {
6936 			__free_page(spd.pages[i]);
6937 			break;
6938 		}
6939 		spd.partial[i].offset = 0;
6940 		spd.partial[i].len = ret;
6941 
6942 		trace_seq_init(&iter->seq);
6943 	}
6944 
6945 	trace_access_unlock(iter->cpu_file);
6946 	trace_event_read_unlock();
6947 	mutex_unlock(&iter->mutex);
6948 
6949 	spd.nr_pages = i;
6950 
6951 	if (i)
6952 		ret = splice_to_pipe(pipe, &spd);
6953 	else
6954 		ret = 0;
6955 out:
6956 	splice_shrink_spd(&spd);
6957 	return ret;
6958 
6959 out_err:
6960 	mutex_unlock(&iter->mutex);
6961 	goto out;
6962 }
6963 
6964 static ssize_t
tracing_syscall_buf_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6965 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
6966 			 size_t cnt, loff_t *ppos)
6967 {
6968 	struct inode *inode = file_inode(filp);
6969 	struct trace_array *tr = inode->i_private;
6970 	char buf[64];
6971 	int r;
6972 
6973 	r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
6974 
6975 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6976 }
6977 
6978 static ssize_t
tracing_syscall_buf_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6979 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
6980 			  size_t cnt, loff_t *ppos)
6981 {
6982 	struct inode *inode = file_inode(filp);
6983 	struct trace_array *tr = inode->i_private;
6984 	unsigned long val;
6985 	int ret;
6986 
6987 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6988 	if (ret)
6989 		return ret;
6990 
6991 	if (val > SYSCALL_FAULT_USER_MAX)
6992 		val = SYSCALL_FAULT_USER_MAX;
6993 
6994 	tr->syscall_buf_sz = val;
6995 
6996 	*ppos += cnt;
6997 
6998 	return cnt;
6999 }
7000 
7001 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7002 tracing_entries_read(struct file *filp, char __user *ubuf,
7003 		     size_t cnt, loff_t *ppos)
7004 {
7005 	struct inode *inode = file_inode(filp);
7006 	struct trace_array *tr = inode->i_private;
7007 	int cpu = tracing_get_cpu(inode);
7008 	char buf[64];
7009 	int r = 0;
7010 	ssize_t ret;
7011 
7012 	mutex_lock(&trace_types_lock);
7013 
7014 	if (cpu == RING_BUFFER_ALL_CPUS) {
7015 		int cpu, buf_size_same;
7016 		unsigned long size;
7017 
7018 		size = 0;
7019 		buf_size_same = 1;
7020 		/* check if all cpu sizes are same */
7021 		for_each_tracing_cpu(cpu) {
7022 			/* fill in the size from first enabled cpu */
7023 			if (size == 0)
7024 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7025 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7026 				buf_size_same = 0;
7027 				break;
7028 			}
7029 		}
7030 
7031 		if (buf_size_same) {
7032 			if (!tr->ring_buffer_expanded)
7033 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7034 					    size >> 10,
7035 					    trace_buf_size >> 10);
7036 			else
7037 				r = sprintf(buf, "%lu\n", size >> 10);
7038 		} else
7039 			r = sprintf(buf, "X\n");
7040 	} else
7041 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7042 
7043 	mutex_unlock(&trace_types_lock);
7044 
7045 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7046 	return ret;
7047 }
7048 
7049 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7050 tracing_entries_write(struct file *filp, const char __user *ubuf,
7051 		      size_t cnt, loff_t *ppos)
7052 {
7053 	struct inode *inode = file_inode(filp);
7054 	struct trace_array *tr = inode->i_private;
7055 	unsigned long val;
7056 	int ret;
7057 
7058 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7059 	if (ret)
7060 		return ret;
7061 
7062 	/* must have at least 1 entry */
7063 	if (!val)
7064 		return -EINVAL;
7065 
7066 	/* value is in KB */
7067 	val <<= 10;
7068 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7069 	if (ret < 0)
7070 		return ret;
7071 
7072 	*ppos += cnt;
7073 
7074 	return cnt;
7075 }
7076 
7077 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7078 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7079 				size_t cnt, loff_t *ppos)
7080 {
7081 	struct trace_array *tr = filp->private_data;
7082 	char buf[64];
7083 	int r, cpu;
7084 	unsigned long size = 0, expanded_size = 0;
7085 
7086 	mutex_lock(&trace_types_lock);
7087 	for_each_tracing_cpu(cpu) {
7088 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7089 		if (!tr->ring_buffer_expanded)
7090 			expanded_size += trace_buf_size >> 10;
7091 	}
7092 	if (tr->ring_buffer_expanded)
7093 		r = sprintf(buf, "%lu\n", size);
7094 	else
7095 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7096 	mutex_unlock(&trace_types_lock);
7097 
7098 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7099 }
7100 
7101 #define LAST_BOOT_HEADER ((void *)1)
7102 
l_next(struct seq_file * m,void * v,loff_t * pos)7103 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7104 {
7105 	struct trace_array *tr = m->private;
7106 	struct trace_scratch *tscratch = tr->scratch;
7107 	unsigned int index = *pos;
7108 
7109 	(*pos)++;
7110 
7111 	if (*pos == 1)
7112 		return LAST_BOOT_HEADER;
7113 
7114 	/* Only show offsets of the last boot data */
7115 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7116 		return NULL;
7117 
7118 	/* *pos 0 is for the header, 1 is for the first module */
7119 	index--;
7120 
7121 	if (index >= tscratch->nr_entries)
7122 		return NULL;
7123 
7124 	return &tscratch->entries[index];
7125 }
7126 
l_start(struct seq_file * m,loff_t * pos)7127 static void *l_start(struct seq_file *m, loff_t *pos)
7128 {
7129 	mutex_lock(&scratch_mutex);
7130 
7131 	return l_next(m, NULL, pos);
7132 }
7133 
l_stop(struct seq_file * m,void * p)7134 static void l_stop(struct seq_file *m, void *p)
7135 {
7136 	mutex_unlock(&scratch_mutex);
7137 }
7138 
show_last_boot_header(struct seq_file * m,struct trace_array * tr)7139 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7140 {
7141 	struct trace_scratch *tscratch = tr->scratch;
7142 
7143 	/*
7144 	 * Do not leak KASLR address. This only shows the KASLR address of
7145 	 * the last boot. When the ring buffer is started, the LAST_BOOT
7146 	 * flag gets cleared, and this should only report "current".
7147 	 * Otherwise it shows the KASLR address from the previous boot which
7148 	 * should not be the same as the current boot.
7149 	 */
7150 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7151 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7152 	else
7153 		seq_puts(m, "# Current\n");
7154 }
7155 
l_show(struct seq_file * m,void * v)7156 static int l_show(struct seq_file *m, void *v)
7157 {
7158 	struct trace_array *tr = m->private;
7159 	struct trace_mod_entry *entry = v;
7160 
7161 	if (v == LAST_BOOT_HEADER) {
7162 		show_last_boot_header(m, tr);
7163 		return 0;
7164 	}
7165 
7166 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7167 	return 0;
7168 }
7169 
7170 static const struct seq_operations last_boot_seq_ops = {
7171 	.start		= l_start,
7172 	.next		= l_next,
7173 	.stop		= l_stop,
7174 	.show		= l_show,
7175 };
7176 
tracing_last_boot_open(struct inode * inode,struct file * file)7177 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7178 {
7179 	struct trace_array *tr = inode->i_private;
7180 	struct seq_file *m;
7181 	int ret;
7182 
7183 	ret = tracing_check_open_get_tr(tr);
7184 	if (ret)
7185 		return ret;
7186 
7187 	ret = seq_open(file, &last_boot_seq_ops);
7188 	if (ret) {
7189 		trace_array_put(tr);
7190 		return ret;
7191 	}
7192 
7193 	m = file->private_data;
7194 	m->private = tr;
7195 
7196 	return 0;
7197 }
7198 
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7199 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7200 {
7201 	struct trace_array *tr = inode->i_private;
7202 	int cpu = tracing_get_cpu(inode);
7203 	int ret;
7204 
7205 	ret = tracing_check_open_get_tr(tr);
7206 	if (ret)
7207 		return ret;
7208 
7209 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7210 	if (ret < 0)
7211 		__trace_array_put(tr);
7212 	return ret;
7213 }
7214 
7215 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7216 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7217 			  size_t cnt, loff_t *ppos)
7218 {
7219 	/*
7220 	 * There is no need to read what the user has written, this function
7221 	 * is just to make sure that there is no error when "echo" is used
7222 	 */
7223 
7224 	*ppos += cnt;
7225 
7226 	return cnt;
7227 }
7228 
7229 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7230 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7231 {
7232 	struct trace_array *tr = inode->i_private;
7233 
7234 	/* disable tracing ? */
7235 	if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
7236 		tracer_tracing_off(tr);
7237 	/* resize the ring buffer to 0 */
7238 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7239 
7240 	trace_array_put(tr);
7241 
7242 	return 0;
7243 }
7244 
7245 #define TRACE_MARKER_MAX_SIZE		4096
7246 
write_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt,unsigned long ip)7247 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
7248 				      size_t cnt, unsigned long ip)
7249 {
7250 	struct ring_buffer_event *event;
7251 	enum event_trigger_type tt = ETT_NONE;
7252 	struct trace_buffer *buffer;
7253 	struct print_entry *entry;
7254 	int meta_size;
7255 	ssize_t written;
7256 	size_t size;
7257 
7258 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7259  again:
7260 	size = cnt + meta_size;
7261 
7262 	buffer = tr->array_buffer.buffer;
7263 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7264 					    tracing_gen_ctx());
7265 	if (unlikely(!event)) {
7266 		/*
7267 		 * If the size was greater than what was allowed, then
7268 		 * make it smaller and try again.
7269 		 */
7270 		if (size > ring_buffer_max_event_size(buffer)) {
7271 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7272 			/* The above should only happen once */
7273 			if (WARN_ON_ONCE(cnt + meta_size == size))
7274 				return -EBADF;
7275 			goto again;
7276 		}
7277 
7278 		/* Ring buffer disabled, return as if not open for write */
7279 		return -EBADF;
7280 	}
7281 
7282 	entry = ring_buffer_event_data(event);
7283 	entry->ip = ip;
7284 	memcpy(&entry->buf, buf, cnt);
7285 	written = cnt;
7286 
7287 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7288 		/* do not add \n before testing triggers, but add \0 */
7289 		entry->buf[cnt] = '\0';
7290 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7291 	}
7292 
7293 	if (entry->buf[cnt - 1] != '\n') {
7294 		entry->buf[cnt] = '\n';
7295 		entry->buf[cnt + 1] = '\0';
7296 	} else
7297 		entry->buf[cnt] = '\0';
7298 
7299 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7300 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7301 	__buffer_unlock_commit(buffer, event);
7302 
7303 	if (tt)
7304 		event_triggers_post_call(tr->trace_marker_file, tt);
7305 
7306 	return written;
7307 }
7308 
7309 struct trace_user_buf {
7310 	char		*buf;
7311 };
7312 
7313 static DEFINE_MUTEX(trace_user_buffer_mutex);
7314 static struct trace_user_buf_info *trace_user_buffer;
7315 
7316 /**
7317  * trace_user_fault_destroy - free up allocated memory of a trace user buffer
7318  * @tinfo: The descriptor to free up
7319  *
7320  * Frees any data allocated in the trace info dsecriptor.
7321  */
trace_user_fault_destroy(struct trace_user_buf_info * tinfo)7322 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
7323 {
7324 	char *buf;
7325 	int cpu;
7326 
7327 	if (!tinfo || !tinfo->tbuf)
7328 		return;
7329 
7330 	for_each_possible_cpu(cpu) {
7331 		buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7332 		kfree(buf);
7333 	}
7334 	free_percpu(tinfo->tbuf);
7335 }
7336 
user_fault_buffer_enable(struct trace_user_buf_info * tinfo,size_t size)7337 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
7338 {
7339 	char *buf;
7340 	int cpu;
7341 
7342 	lockdep_assert_held(&trace_user_buffer_mutex);
7343 
7344 	tinfo->tbuf = alloc_percpu(struct trace_user_buf);
7345 	if (!tinfo->tbuf)
7346 		return -ENOMEM;
7347 
7348 	tinfo->ref = 1;
7349 	tinfo->size = size;
7350 
7351 	/* Clear each buffer in case of error */
7352 	for_each_possible_cpu(cpu) {
7353 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
7354 	}
7355 
7356 	for_each_possible_cpu(cpu) {
7357 		buf = kmalloc_node(size, GFP_KERNEL,
7358 				   cpu_to_node(cpu));
7359 		if (!buf)
7360 			return -ENOMEM;
7361 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
7362 	}
7363 
7364 	return 0;
7365 }
7366 
7367 /* For internal use. Free and reinitialize */
user_buffer_free(struct trace_user_buf_info ** tinfo)7368 static void user_buffer_free(struct trace_user_buf_info **tinfo)
7369 {
7370 	lockdep_assert_held(&trace_user_buffer_mutex);
7371 
7372 	trace_user_fault_destroy(*tinfo);
7373 	kfree(*tinfo);
7374 	*tinfo = NULL;
7375 }
7376 
7377 /* For internal use. Initialize and allocate */
user_buffer_init(struct trace_user_buf_info ** tinfo,size_t size)7378 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
7379 {
7380 	bool alloc = false;
7381 	int ret;
7382 
7383 	lockdep_assert_held(&trace_user_buffer_mutex);
7384 
7385 	if (!*tinfo) {
7386 		alloc = true;
7387 		*tinfo = kzalloc(sizeof(**tinfo), GFP_KERNEL);
7388 		if (!*tinfo)
7389 			return -ENOMEM;
7390 	}
7391 
7392 	ret = user_fault_buffer_enable(*tinfo, size);
7393 	if (ret < 0 && alloc)
7394 		user_buffer_free(tinfo);
7395 
7396 	return ret;
7397 }
7398 
7399 /* For internal use, derefrence and free if necessary */
user_buffer_put(struct trace_user_buf_info ** tinfo)7400 static void user_buffer_put(struct trace_user_buf_info **tinfo)
7401 {
7402 	guard(mutex)(&trace_user_buffer_mutex);
7403 
7404 	if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
7405 		return;
7406 
7407 	if (--(*tinfo)->ref)
7408 		return;
7409 
7410 	user_buffer_free(tinfo);
7411 }
7412 
7413 /**
7414  * trace_user_fault_init - Allocated or reference a per CPU buffer
7415  * @tinfo: A pointer to the trace buffer descriptor
7416  * @size: The size to allocate each per CPU buffer
7417  *
7418  * Create a per CPU buffer that can be used to copy from user space
7419  * in a task context. When calling trace_user_fault_read(), preemption
7420  * must be disabled, and it will enable preemption and copy user
7421  * space data to the buffer. If any schedule switches occur, it will
7422  * retry until it succeeds without a schedule switch knowing the buffer
7423  * is still valid.
7424  *
7425  * Returns 0 on success, negative on failure.
7426  */
trace_user_fault_init(struct trace_user_buf_info * tinfo,size_t size)7427 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
7428 {
7429 	int ret;
7430 
7431 	if (!tinfo)
7432 		return -EINVAL;
7433 
7434 	guard(mutex)(&trace_user_buffer_mutex);
7435 
7436 	ret = user_buffer_init(&tinfo, size);
7437 	if (ret < 0)
7438 		trace_user_fault_destroy(tinfo);
7439 
7440 	return ret;
7441 }
7442 
7443 /**
7444  * trace_user_fault_get - up the ref count for the user buffer
7445  * @tinfo: A pointer to a pointer to the trace buffer descriptor
7446  *
7447  * Ups the ref count of the trace buffer.
7448  *
7449  * Returns the new ref count.
7450  */
trace_user_fault_get(struct trace_user_buf_info * tinfo)7451 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
7452 {
7453 	if (!tinfo)
7454 		return -1;
7455 
7456 	guard(mutex)(&trace_user_buffer_mutex);
7457 
7458 	tinfo->ref++;
7459 	return tinfo->ref;
7460 }
7461 
7462 /**
7463  * trace_user_fault_put - dereference a per cpu trace buffer
7464  * @tinfo: The @tinfo that was passed to trace_user_fault_get()
7465  *
7466  * Decrement the ref count of @tinfo.
7467  *
7468  * Returns the new refcount (negative on error).
7469  */
trace_user_fault_put(struct trace_user_buf_info * tinfo)7470 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
7471 {
7472 	guard(mutex)(&trace_user_buffer_mutex);
7473 
7474 	if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
7475 		return -1;
7476 
7477 	--tinfo->ref;
7478 	return tinfo->ref;
7479 }
7480 
7481 /**
7482  * trace_user_fault_read - Read user space into a per CPU buffer
7483  * @tinfo: The @tinfo allocated by trace_user_fault_get()
7484  * @ptr: The user space pointer to read
7485  * @size: The size of user space to read.
7486  * @copy_func: Optional function to use to copy from user space
7487  * @data: Data to pass to copy_func if it was supplied
7488  *
7489  * Preemption must be disabled when this is called, and must not
7490  * be enabled while using the returned buffer.
7491  * This does the copying from user space into a per CPU buffer.
7492  *
7493  * The @size must not be greater than the size passed in to
7494  * trace_user_fault_init().
7495  *
7496  * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
7497  * otherwise it will call @copy_func. It will call @copy_func with:
7498  *
7499  *   buffer: the per CPU buffer of the @tinfo.
7500  *   ptr: The pointer @ptr to user space to read
7501  *   size: The @size of the ptr to read
7502  *   data: The @data parameter
7503  *
7504  * It is expected that @copy_func will return 0 on success and non zero
7505  * if there was a fault.
7506  *
7507  * Returns a pointer to the buffer with the content read from @ptr.
7508  *   Preemption must remain disabled while the caller accesses the
7509  *   buffer returned by this function.
7510  * Returns NULL if there was a fault, or the size passed in is
7511  *   greater than the size passed to trace_user_fault_init().
7512  */
trace_user_fault_read(struct trace_user_buf_info * tinfo,const char __user * ptr,size_t size,trace_user_buf_copy copy_func,void * data)7513 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
7514 			     const char __user *ptr, size_t size,
7515 			     trace_user_buf_copy copy_func, void *data)
7516 {
7517 	int cpu = smp_processor_id();
7518 	char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7519 	unsigned int cnt;
7520 	int trys = 0;
7521 	int ret;
7522 
7523 	lockdep_assert_preemption_disabled();
7524 
7525 	/*
7526 	 * It's up to the caller to not try to copy more than it said
7527 	 * it would.
7528 	 */
7529 	if (size > tinfo->size)
7530 		return NULL;
7531 
7532 	/*
7533 	 * This acts similar to a seqcount. The per CPU context switches are
7534 	 * recorded, migration is disabled and preemption is enabled. The
7535 	 * read of the user space memory is copied into the per CPU buffer.
7536 	 * Preemption is disabled again, and if the per CPU context switches count
7537 	 * is still the same, it means the buffer has not been corrupted.
7538 	 * If the count is different, it is assumed the buffer is corrupted
7539 	 * and reading must be tried again.
7540 	 */
7541 
7542 	do {
7543 		/*
7544 		 * If for some reason, copy_from_user() always causes a context
7545 		 * switch, this would then cause an infinite loop.
7546 		 * If this task is preempted by another user space task, it
7547 		 * will cause this task to try again. But just in case something
7548 		 * changes where the copying from user space causes another task
7549 		 * to run, prevent this from going into an infinite loop.
7550 		 * 100 tries should be plenty.
7551 		 */
7552 		if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
7553 			return NULL;
7554 
7555 		/* Read the current CPU context switch counter */
7556 		cnt = nr_context_switches_cpu(cpu);
7557 
7558 		/*
7559 		 * Preemption is going to be enabled, but this task must
7560 		 * remain on this CPU.
7561 		 */
7562 		migrate_disable();
7563 
7564 		/*
7565 		 * Now preemption is being enabled and another task can come in
7566 		 * and use the same buffer and corrupt our data.
7567 		 */
7568 		preempt_enable_notrace();
7569 
7570 		/* Make sure preemption is enabled here */
7571 		lockdep_assert_preemption_enabled();
7572 
7573 		if (copy_func) {
7574 			ret = copy_func(buffer, ptr, size, data);
7575 		} else {
7576 			ret = __copy_from_user(buffer, ptr, size);
7577 		}
7578 
7579 		preempt_disable_notrace();
7580 		migrate_enable();
7581 
7582 		/* if it faulted, no need to test if the buffer was corrupted */
7583 		if (ret)
7584 			return NULL;
7585 
7586 		/*
7587 		 * Preemption is disabled again, now check the per CPU context
7588 		 * switch counter. If it doesn't match, then another user space
7589 		 * process may have schedule in and corrupted our buffer. In that
7590 		 * case the copying must be retried.
7591 		 */
7592 	} while (nr_context_switches_cpu(cpu) != cnt);
7593 
7594 	return buffer;
7595 }
7596 
7597 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7598 tracing_mark_write(struct file *filp, const char __user *ubuf,
7599 					size_t cnt, loff_t *fpos)
7600 {
7601 	struct trace_array *tr = filp->private_data;
7602 	ssize_t written = -ENODEV;
7603 	unsigned long ip;
7604 	char *buf;
7605 
7606 	if (tracing_disabled)
7607 		return -EINVAL;
7608 
7609 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7610 		return -EINVAL;
7611 
7612 	if ((ssize_t)cnt < 0)
7613 		return -EINVAL;
7614 
7615 	if (cnt > TRACE_MARKER_MAX_SIZE)
7616 		cnt = TRACE_MARKER_MAX_SIZE;
7617 
7618 	/* Must have preemption disabled while having access to the buffer */
7619 	guard(preempt_notrace)();
7620 
7621 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7622 	if (!buf)
7623 		return -EFAULT;
7624 
7625 	/* The selftests expect this function to be the IP address */
7626 	ip = _THIS_IP_;
7627 
7628 	/* The global trace_marker can go to multiple instances */
7629 	if (tr == &global_trace) {
7630 		guard(rcu)();
7631 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7632 			written = write_marker_to_buffer(tr, buf, cnt, ip);
7633 			if (written < 0)
7634 				break;
7635 		}
7636 	} else {
7637 		written = write_marker_to_buffer(tr, buf, cnt, ip);
7638 	}
7639 
7640 	return written;
7641 }
7642 
write_raw_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt)7643 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7644 					  const char *buf, size_t cnt)
7645 {
7646 	struct ring_buffer_event *event;
7647 	struct trace_buffer *buffer;
7648 	struct raw_data_entry *entry;
7649 	ssize_t written;
7650 	size_t size;
7651 
7652 	/* cnt includes both the entry->id and the data behind it. */
7653 	size = struct_offset(entry, id) + cnt;
7654 
7655 	buffer = tr->array_buffer.buffer;
7656 
7657 	if (size > ring_buffer_max_event_size(buffer))
7658 		return -EINVAL;
7659 
7660 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7661 					    tracing_gen_ctx());
7662 	if (!event)
7663 		/* Ring buffer disabled, return as if not open for write */
7664 		return -EBADF;
7665 
7666 	entry = ring_buffer_event_data(event);
7667 	unsafe_memcpy(&entry->id, buf, cnt,
7668 		      "id and content already reserved on ring buffer"
7669 		      "'buf' includes the 'id' and the data."
7670 		      "'entry' was allocated with cnt from 'id'.");
7671 	written = cnt;
7672 
7673 	__buffer_unlock_commit(buffer, event);
7674 
7675 	return written;
7676 }
7677 
7678 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7679 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7680 					size_t cnt, loff_t *fpos)
7681 {
7682 	struct trace_array *tr = filp->private_data;
7683 	ssize_t written = -ENODEV;
7684 	char *buf;
7685 
7686 	if (tracing_disabled)
7687 		return -EINVAL;
7688 
7689 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7690 		return -EINVAL;
7691 
7692 	/* The marker must at least have a tag id */
7693 	if (cnt < sizeof(unsigned int))
7694 		return -EINVAL;
7695 
7696 	/* raw write is all or nothing */
7697 	if (cnt > TRACE_MARKER_MAX_SIZE)
7698 		return -EINVAL;
7699 
7700 	/* Must have preemption disabled while having access to the buffer */
7701 	guard(preempt_notrace)();
7702 
7703 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7704 	if (!buf)
7705 		return -EFAULT;
7706 
7707 	/* The global trace_marker_raw can go to multiple instances */
7708 	if (tr == &global_trace) {
7709 		guard(rcu)();
7710 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7711 			written = write_raw_marker_to_buffer(tr, buf, cnt);
7712 			if (written < 0)
7713 				break;
7714 		}
7715 	} else {
7716 		written = write_raw_marker_to_buffer(tr, buf, cnt);
7717 	}
7718 
7719 	return written;
7720 }
7721 
tracing_mark_open(struct inode * inode,struct file * filp)7722 static int tracing_mark_open(struct inode *inode, struct file *filp)
7723 {
7724 	int ret;
7725 
7726 	scoped_guard(mutex, &trace_user_buffer_mutex) {
7727 		if (!trace_user_buffer) {
7728 			ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
7729 			if (ret < 0)
7730 				return ret;
7731 		} else {
7732 			trace_user_buffer->ref++;
7733 		}
7734 	}
7735 
7736 	stream_open(inode, filp);
7737 	ret = tracing_open_generic_tr(inode, filp);
7738 	if (ret < 0)
7739 		user_buffer_put(&trace_user_buffer);
7740 	return ret;
7741 }
7742 
tracing_mark_release(struct inode * inode,struct file * file)7743 static int tracing_mark_release(struct inode *inode, struct file *file)
7744 {
7745 	user_buffer_put(&trace_user_buffer);
7746 	return tracing_release_generic_tr(inode, file);
7747 }
7748 
tracing_clock_show(struct seq_file * m,void * v)7749 static int tracing_clock_show(struct seq_file *m, void *v)
7750 {
7751 	struct trace_array *tr = m->private;
7752 	int i;
7753 
7754 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7755 		seq_printf(m,
7756 			"%s%s%s%s", i ? " " : "",
7757 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7758 			i == tr->clock_id ? "]" : "");
7759 	seq_putc(m, '\n');
7760 
7761 	return 0;
7762 }
7763 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7764 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7765 {
7766 	int i;
7767 
7768 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7769 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7770 			break;
7771 	}
7772 	if (i == ARRAY_SIZE(trace_clocks))
7773 		return -EINVAL;
7774 
7775 	guard(mutex)(&trace_types_lock);
7776 
7777 	tr->clock_id = i;
7778 
7779 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7780 
7781 	/*
7782 	 * New clock may not be consistent with the previous clock.
7783 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7784 	 */
7785 	tracing_reset_online_cpus(&tr->array_buffer);
7786 
7787 #ifdef CONFIG_TRACER_MAX_TRACE
7788 	if (tr->max_buffer.buffer)
7789 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7790 	tracing_reset_online_cpus(&tr->max_buffer);
7791 #endif
7792 
7793 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7794 		struct trace_scratch *tscratch = tr->scratch;
7795 
7796 		tscratch->clock_id = i;
7797 	}
7798 
7799 	return 0;
7800 }
7801 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7802 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7803 				   size_t cnt, loff_t *fpos)
7804 {
7805 	struct seq_file *m = filp->private_data;
7806 	struct trace_array *tr = m->private;
7807 	char buf[64];
7808 	const char *clockstr;
7809 	int ret;
7810 
7811 	if (cnt >= sizeof(buf))
7812 		return -EINVAL;
7813 
7814 	if (copy_from_user(buf, ubuf, cnt))
7815 		return -EFAULT;
7816 
7817 	buf[cnt] = 0;
7818 
7819 	clockstr = strstrip(buf);
7820 
7821 	ret = tracing_set_clock(tr, clockstr);
7822 	if (ret)
7823 		return ret;
7824 
7825 	*fpos += cnt;
7826 
7827 	return cnt;
7828 }
7829 
tracing_clock_open(struct inode * inode,struct file * file)7830 static int tracing_clock_open(struct inode *inode, struct file *file)
7831 {
7832 	struct trace_array *tr = inode->i_private;
7833 	int ret;
7834 
7835 	ret = tracing_check_open_get_tr(tr);
7836 	if (ret)
7837 		return ret;
7838 
7839 	ret = single_open(file, tracing_clock_show, inode->i_private);
7840 	if (ret < 0)
7841 		trace_array_put(tr);
7842 
7843 	return ret;
7844 }
7845 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7846 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7847 {
7848 	struct trace_array *tr = m->private;
7849 
7850 	guard(mutex)(&trace_types_lock);
7851 
7852 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7853 		seq_puts(m, "delta [absolute]\n");
7854 	else
7855 		seq_puts(m, "[delta] absolute\n");
7856 
7857 	return 0;
7858 }
7859 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7860 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7861 {
7862 	struct trace_array *tr = inode->i_private;
7863 	int ret;
7864 
7865 	ret = tracing_check_open_get_tr(tr);
7866 	if (ret)
7867 		return ret;
7868 
7869 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7870 	if (ret < 0)
7871 		trace_array_put(tr);
7872 
7873 	return ret;
7874 }
7875 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7876 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7877 {
7878 	if (rbe == this_cpu_read(trace_buffered_event))
7879 		return ring_buffer_time_stamp(buffer);
7880 
7881 	return ring_buffer_event_time_stamp(buffer, rbe);
7882 }
7883 
7884 /*
7885  * Set or disable using the per CPU trace_buffer_event when possible.
7886  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7887 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7888 {
7889 	guard(mutex)(&trace_types_lock);
7890 
7891 	if (set && tr->no_filter_buffering_ref++)
7892 		return 0;
7893 
7894 	if (!set) {
7895 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7896 			return -EINVAL;
7897 
7898 		--tr->no_filter_buffering_ref;
7899 	}
7900 
7901 	return 0;
7902 }
7903 
7904 struct ftrace_buffer_info {
7905 	struct trace_iterator	iter;
7906 	void			*spare;
7907 	unsigned int		spare_cpu;
7908 	unsigned int		spare_size;
7909 	unsigned int		read;
7910 };
7911 
7912 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7913 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7914 {
7915 	struct trace_array *tr = inode->i_private;
7916 	struct trace_iterator *iter;
7917 	struct seq_file *m;
7918 	int ret;
7919 
7920 	ret = tracing_check_open_get_tr(tr);
7921 	if (ret)
7922 		return ret;
7923 
7924 	if (file->f_mode & FMODE_READ) {
7925 		iter = __tracing_open(inode, file, true);
7926 		if (IS_ERR(iter))
7927 			ret = PTR_ERR(iter);
7928 	} else {
7929 		/* Writes still need the seq_file to hold the private data */
7930 		ret = -ENOMEM;
7931 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7932 		if (!m)
7933 			goto out;
7934 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7935 		if (!iter) {
7936 			kfree(m);
7937 			goto out;
7938 		}
7939 		ret = 0;
7940 
7941 		iter->tr = tr;
7942 		iter->array_buffer = &tr->max_buffer;
7943 		iter->cpu_file = tracing_get_cpu(inode);
7944 		m->private = iter;
7945 		file->private_data = m;
7946 	}
7947 out:
7948 	if (ret < 0)
7949 		trace_array_put(tr);
7950 
7951 	return ret;
7952 }
7953 
tracing_swap_cpu_buffer(void * tr)7954 static void tracing_swap_cpu_buffer(void *tr)
7955 {
7956 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7957 }
7958 
7959 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7960 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7961 		       loff_t *ppos)
7962 {
7963 	struct seq_file *m = filp->private_data;
7964 	struct trace_iterator *iter = m->private;
7965 	struct trace_array *tr = iter->tr;
7966 	unsigned long val;
7967 	int ret;
7968 
7969 	ret = tracing_update_buffers(tr);
7970 	if (ret < 0)
7971 		return ret;
7972 
7973 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7974 	if (ret)
7975 		return ret;
7976 
7977 	guard(mutex)(&trace_types_lock);
7978 
7979 	if (tr->current_trace->use_max_tr)
7980 		return -EBUSY;
7981 
7982 	local_irq_disable();
7983 	arch_spin_lock(&tr->max_lock);
7984 	if (tr->cond_snapshot)
7985 		ret = -EBUSY;
7986 	arch_spin_unlock(&tr->max_lock);
7987 	local_irq_enable();
7988 	if (ret)
7989 		return ret;
7990 
7991 	switch (val) {
7992 	case 0:
7993 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7994 			return -EINVAL;
7995 		if (tr->allocated_snapshot)
7996 			free_snapshot(tr);
7997 		break;
7998 	case 1:
7999 /* Only allow per-cpu swap if the ring buffer supports it */
8000 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
8001 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
8002 			return -EINVAL;
8003 #endif
8004 		if (tr->allocated_snapshot)
8005 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
8006 					&tr->array_buffer, iter->cpu_file);
8007 
8008 		ret = tracing_arm_snapshot_locked(tr);
8009 		if (ret)
8010 			return ret;
8011 
8012 		/* Now, we're going to swap */
8013 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
8014 			local_irq_disable();
8015 			update_max_tr(tr, current, smp_processor_id(), NULL);
8016 			local_irq_enable();
8017 		} else {
8018 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
8019 						 (void *)tr, 1);
8020 		}
8021 		tracing_disarm_snapshot(tr);
8022 		break;
8023 	default:
8024 		if (tr->allocated_snapshot) {
8025 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
8026 				tracing_reset_online_cpus(&tr->max_buffer);
8027 			else
8028 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
8029 		}
8030 		break;
8031 	}
8032 
8033 	if (ret >= 0) {
8034 		*ppos += cnt;
8035 		ret = cnt;
8036 	}
8037 
8038 	return ret;
8039 }
8040 
tracing_snapshot_release(struct inode * inode,struct file * file)8041 static int tracing_snapshot_release(struct inode *inode, struct file *file)
8042 {
8043 	struct seq_file *m = file->private_data;
8044 	int ret;
8045 
8046 	ret = tracing_release(inode, file);
8047 
8048 	if (file->f_mode & FMODE_READ)
8049 		return ret;
8050 
8051 	/* If write only, the seq_file is just a stub */
8052 	if (m)
8053 		kfree(m->private);
8054 	kfree(m);
8055 
8056 	return 0;
8057 }
8058 
8059 static int tracing_buffers_open(struct inode *inode, struct file *filp);
8060 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
8061 				    size_t count, loff_t *ppos);
8062 static int tracing_buffers_release(struct inode *inode, struct file *file);
8063 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8064 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
8065 
snapshot_raw_open(struct inode * inode,struct file * filp)8066 static int snapshot_raw_open(struct inode *inode, struct file *filp)
8067 {
8068 	struct ftrace_buffer_info *info;
8069 	int ret;
8070 
8071 	/* The following checks for tracefs lockdown */
8072 	ret = tracing_buffers_open(inode, filp);
8073 	if (ret < 0)
8074 		return ret;
8075 
8076 	info = filp->private_data;
8077 
8078 	if (info->iter.trace->use_max_tr) {
8079 		tracing_buffers_release(inode, filp);
8080 		return -EBUSY;
8081 	}
8082 
8083 	info->iter.snapshot = true;
8084 	info->iter.array_buffer = &info->iter.tr->max_buffer;
8085 
8086 	return ret;
8087 }
8088 
8089 #endif /* CONFIG_TRACER_SNAPSHOT */
8090 
8091 
8092 static const struct file_operations tracing_thresh_fops = {
8093 	.open		= tracing_open_generic,
8094 	.read		= tracing_thresh_read,
8095 	.write		= tracing_thresh_write,
8096 	.llseek		= generic_file_llseek,
8097 };
8098 
8099 #ifdef CONFIG_TRACER_MAX_TRACE
8100 static const struct file_operations tracing_max_lat_fops = {
8101 	.open		= tracing_open_generic_tr,
8102 	.read		= tracing_max_lat_read,
8103 	.write		= tracing_max_lat_write,
8104 	.llseek		= generic_file_llseek,
8105 	.release	= tracing_release_generic_tr,
8106 };
8107 #endif
8108 
8109 static const struct file_operations set_tracer_fops = {
8110 	.open		= tracing_open_generic_tr,
8111 	.read		= tracing_set_trace_read,
8112 	.write		= tracing_set_trace_write,
8113 	.llseek		= generic_file_llseek,
8114 	.release	= tracing_release_generic_tr,
8115 };
8116 
8117 static const struct file_operations tracing_pipe_fops = {
8118 	.open		= tracing_open_pipe,
8119 	.poll		= tracing_poll_pipe,
8120 	.read		= tracing_read_pipe,
8121 	.splice_read	= tracing_splice_read_pipe,
8122 	.release	= tracing_release_pipe,
8123 };
8124 
8125 static const struct file_operations tracing_entries_fops = {
8126 	.open		= tracing_open_generic_tr,
8127 	.read		= tracing_entries_read,
8128 	.write		= tracing_entries_write,
8129 	.llseek		= generic_file_llseek,
8130 	.release	= tracing_release_generic_tr,
8131 };
8132 
8133 static const struct file_operations tracing_syscall_buf_fops = {
8134 	.open		= tracing_open_generic_tr,
8135 	.read		= tracing_syscall_buf_read,
8136 	.write		= tracing_syscall_buf_write,
8137 	.llseek		= generic_file_llseek,
8138 	.release	= tracing_release_generic_tr,
8139 };
8140 
8141 static const struct file_operations tracing_buffer_meta_fops = {
8142 	.open		= tracing_buffer_meta_open,
8143 	.read		= seq_read,
8144 	.llseek		= seq_lseek,
8145 	.release	= tracing_seq_release,
8146 };
8147 
8148 static const struct file_operations tracing_total_entries_fops = {
8149 	.open		= tracing_open_generic_tr,
8150 	.read		= tracing_total_entries_read,
8151 	.llseek		= generic_file_llseek,
8152 	.release	= tracing_release_generic_tr,
8153 };
8154 
8155 static const struct file_operations tracing_free_buffer_fops = {
8156 	.open		= tracing_open_generic_tr,
8157 	.write		= tracing_free_buffer_write,
8158 	.release	= tracing_free_buffer_release,
8159 };
8160 
8161 static const struct file_operations tracing_mark_fops = {
8162 	.open		= tracing_mark_open,
8163 	.write		= tracing_mark_write,
8164 	.release	= tracing_mark_release,
8165 };
8166 
8167 static const struct file_operations tracing_mark_raw_fops = {
8168 	.open		= tracing_mark_open,
8169 	.write		= tracing_mark_raw_write,
8170 	.release	= tracing_mark_release,
8171 };
8172 
8173 static const struct file_operations trace_clock_fops = {
8174 	.open		= tracing_clock_open,
8175 	.read		= seq_read,
8176 	.llseek		= seq_lseek,
8177 	.release	= tracing_single_release_tr,
8178 	.write		= tracing_clock_write,
8179 };
8180 
8181 static const struct file_operations trace_time_stamp_mode_fops = {
8182 	.open		= tracing_time_stamp_mode_open,
8183 	.read		= seq_read,
8184 	.llseek		= seq_lseek,
8185 	.release	= tracing_single_release_tr,
8186 };
8187 
8188 static const struct file_operations last_boot_fops = {
8189 	.open		= tracing_last_boot_open,
8190 	.read		= seq_read,
8191 	.llseek		= seq_lseek,
8192 	.release	= tracing_seq_release,
8193 };
8194 
8195 #ifdef CONFIG_TRACER_SNAPSHOT
8196 static const struct file_operations snapshot_fops = {
8197 	.open		= tracing_snapshot_open,
8198 	.read		= seq_read,
8199 	.write		= tracing_snapshot_write,
8200 	.llseek		= tracing_lseek,
8201 	.release	= tracing_snapshot_release,
8202 };
8203 
8204 static const struct file_operations snapshot_raw_fops = {
8205 	.open		= snapshot_raw_open,
8206 	.read		= tracing_buffers_read,
8207 	.release	= tracing_buffers_release,
8208 	.splice_read	= tracing_buffers_splice_read,
8209 };
8210 
8211 #endif /* CONFIG_TRACER_SNAPSHOT */
8212 
8213 /*
8214  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
8215  * @filp: The active open file structure
8216  * @ubuf: The userspace provided buffer to read value into
8217  * @cnt: The maximum number of bytes to read
8218  * @ppos: The current "file" position
8219  *
8220  * This function implements the write interface for a struct trace_min_max_param.
8221  * The filp->private_data must point to a trace_min_max_param structure that
8222  * defines where to write the value, the min and the max acceptable values,
8223  * and a lock to protect the write.
8224  */
8225 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8226 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
8227 {
8228 	struct trace_min_max_param *param = filp->private_data;
8229 	u64 val;
8230 	int err;
8231 
8232 	if (!param)
8233 		return -EFAULT;
8234 
8235 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
8236 	if (err)
8237 		return err;
8238 
8239 	if (param->lock)
8240 		mutex_lock(param->lock);
8241 
8242 	if (param->min && val < *param->min)
8243 		err = -EINVAL;
8244 
8245 	if (param->max && val > *param->max)
8246 		err = -EINVAL;
8247 
8248 	if (!err)
8249 		*param->val = val;
8250 
8251 	if (param->lock)
8252 		mutex_unlock(param->lock);
8253 
8254 	if (err)
8255 		return err;
8256 
8257 	return cnt;
8258 }
8259 
8260 /*
8261  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
8262  * @filp: The active open file structure
8263  * @ubuf: The userspace provided buffer to read value into
8264  * @cnt: The maximum number of bytes to read
8265  * @ppos: The current "file" position
8266  *
8267  * This function implements the read interface for a struct trace_min_max_param.
8268  * The filp->private_data must point to a trace_min_max_param struct with valid
8269  * data.
8270  */
8271 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8272 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
8273 {
8274 	struct trace_min_max_param *param = filp->private_data;
8275 	char buf[U64_STR_SIZE];
8276 	int len;
8277 	u64 val;
8278 
8279 	if (!param)
8280 		return -EFAULT;
8281 
8282 	val = *param->val;
8283 
8284 	if (cnt > sizeof(buf))
8285 		cnt = sizeof(buf);
8286 
8287 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
8288 
8289 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
8290 }
8291 
8292 const struct file_operations trace_min_max_fops = {
8293 	.open		= tracing_open_generic,
8294 	.read		= trace_min_max_read,
8295 	.write		= trace_min_max_write,
8296 };
8297 
8298 #define TRACING_LOG_ERRS_MAX	8
8299 #define TRACING_LOG_LOC_MAX	128
8300 
8301 #define CMD_PREFIX "  Command: "
8302 
8303 struct err_info {
8304 	const char	**errs;	/* ptr to loc-specific array of err strings */
8305 	u8		type;	/* index into errs -> specific err string */
8306 	u16		pos;	/* caret position */
8307 	u64		ts;
8308 };
8309 
8310 struct tracing_log_err {
8311 	struct list_head	list;
8312 	struct err_info		info;
8313 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
8314 	char			*cmd;                     /* what caused err */
8315 };
8316 
8317 static DEFINE_MUTEX(tracing_err_log_lock);
8318 
alloc_tracing_log_err(int len)8319 static struct tracing_log_err *alloc_tracing_log_err(int len)
8320 {
8321 	struct tracing_log_err *err;
8322 
8323 	err = kzalloc(sizeof(*err), GFP_KERNEL);
8324 	if (!err)
8325 		return ERR_PTR(-ENOMEM);
8326 
8327 	err->cmd = kzalloc(len, GFP_KERNEL);
8328 	if (!err->cmd) {
8329 		kfree(err);
8330 		return ERR_PTR(-ENOMEM);
8331 	}
8332 
8333 	return err;
8334 }
8335 
free_tracing_log_err(struct tracing_log_err * err)8336 static void free_tracing_log_err(struct tracing_log_err *err)
8337 {
8338 	kfree(err->cmd);
8339 	kfree(err);
8340 }
8341 
get_tracing_log_err(struct trace_array * tr,int len)8342 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8343 						   int len)
8344 {
8345 	struct tracing_log_err *err;
8346 	char *cmd;
8347 
8348 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8349 		err = alloc_tracing_log_err(len);
8350 		if (PTR_ERR(err) != -ENOMEM)
8351 			tr->n_err_log_entries++;
8352 
8353 		return err;
8354 	}
8355 	cmd = kzalloc(len, GFP_KERNEL);
8356 	if (!cmd)
8357 		return ERR_PTR(-ENOMEM);
8358 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8359 	kfree(err->cmd);
8360 	err->cmd = cmd;
8361 	list_del(&err->list);
8362 
8363 	return err;
8364 }
8365 
8366 /**
8367  * err_pos - find the position of a string within a command for error careting
8368  * @cmd: The tracing command that caused the error
8369  * @str: The string to position the caret at within @cmd
8370  *
8371  * Finds the position of the first occurrence of @str within @cmd.  The
8372  * return value can be passed to tracing_log_err() for caret placement
8373  * within @cmd.
8374  *
8375  * Returns the index within @cmd of the first occurrence of @str or 0
8376  * if @str was not found.
8377  */
err_pos(char * cmd,const char * str)8378 unsigned int err_pos(char *cmd, const char *str)
8379 {
8380 	char *found;
8381 
8382 	if (WARN_ON(!strlen(cmd)))
8383 		return 0;
8384 
8385 	found = strstr(cmd, str);
8386 	if (found)
8387 		return found - cmd;
8388 
8389 	return 0;
8390 }
8391 
8392 /**
8393  * tracing_log_err - write an error to the tracing error log
8394  * @tr: The associated trace array for the error (NULL for top level array)
8395  * @loc: A string describing where the error occurred
8396  * @cmd: The tracing command that caused the error
8397  * @errs: The array of loc-specific static error strings
8398  * @type: The index into errs[], which produces the specific static err string
8399  * @pos: The position the caret should be placed in the cmd
8400  *
8401  * Writes an error into tracing/error_log of the form:
8402  *
8403  * <loc>: error: <text>
8404  *   Command: <cmd>
8405  *              ^
8406  *
8407  * tracing/error_log is a small log file containing the last
8408  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8409  * unless there has been a tracing error, and the error log can be
8410  * cleared and have its memory freed by writing the empty string in
8411  * truncation mode to it i.e. echo > tracing/error_log.
8412  *
8413  * NOTE: the @errs array along with the @type param are used to
8414  * produce a static error string - this string is not copied and saved
8415  * when the error is logged - only a pointer to it is saved.  See
8416  * existing callers for examples of how static strings are typically
8417  * defined for use with tracing_log_err().
8418  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8419 void tracing_log_err(struct trace_array *tr,
8420 		     const char *loc, const char *cmd,
8421 		     const char **errs, u8 type, u16 pos)
8422 {
8423 	struct tracing_log_err *err;
8424 	int len = 0;
8425 
8426 	if (!tr)
8427 		tr = &global_trace;
8428 
8429 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8430 
8431 	guard(mutex)(&tracing_err_log_lock);
8432 
8433 	err = get_tracing_log_err(tr, len);
8434 	if (PTR_ERR(err) == -ENOMEM)
8435 		return;
8436 
8437 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8438 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8439 
8440 	err->info.errs = errs;
8441 	err->info.type = type;
8442 	err->info.pos = pos;
8443 	err->info.ts = local_clock();
8444 
8445 	list_add_tail(&err->list, &tr->err_log);
8446 }
8447 
clear_tracing_err_log(struct trace_array * tr)8448 static void clear_tracing_err_log(struct trace_array *tr)
8449 {
8450 	struct tracing_log_err *err, *next;
8451 
8452 	guard(mutex)(&tracing_err_log_lock);
8453 
8454 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8455 		list_del(&err->list);
8456 		free_tracing_log_err(err);
8457 	}
8458 
8459 	tr->n_err_log_entries = 0;
8460 }
8461 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8462 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8463 {
8464 	struct trace_array *tr = m->private;
8465 
8466 	mutex_lock(&tracing_err_log_lock);
8467 
8468 	return seq_list_start(&tr->err_log, *pos);
8469 }
8470 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8471 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8472 {
8473 	struct trace_array *tr = m->private;
8474 
8475 	return seq_list_next(v, &tr->err_log, pos);
8476 }
8477 
tracing_err_log_seq_stop(struct seq_file * m,void * v)8478 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8479 {
8480 	mutex_unlock(&tracing_err_log_lock);
8481 }
8482 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8483 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8484 {
8485 	u16 i;
8486 
8487 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8488 		seq_putc(m, ' ');
8489 	for (i = 0; i < pos; i++)
8490 		seq_putc(m, ' ');
8491 	seq_puts(m, "^\n");
8492 }
8493 
tracing_err_log_seq_show(struct seq_file * m,void * v)8494 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8495 {
8496 	struct tracing_log_err *err = v;
8497 
8498 	if (err) {
8499 		const char *err_text = err->info.errs[err->info.type];
8500 		u64 sec = err->info.ts;
8501 		u32 nsec;
8502 
8503 		nsec = do_div(sec, NSEC_PER_SEC);
8504 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8505 			   err->loc, err_text);
8506 		seq_printf(m, "%s", err->cmd);
8507 		tracing_err_log_show_pos(m, err->info.pos);
8508 	}
8509 
8510 	return 0;
8511 }
8512 
8513 static const struct seq_operations tracing_err_log_seq_ops = {
8514 	.start  = tracing_err_log_seq_start,
8515 	.next   = tracing_err_log_seq_next,
8516 	.stop   = tracing_err_log_seq_stop,
8517 	.show   = tracing_err_log_seq_show
8518 };
8519 
tracing_err_log_open(struct inode * inode,struct file * file)8520 static int tracing_err_log_open(struct inode *inode, struct file *file)
8521 {
8522 	struct trace_array *tr = inode->i_private;
8523 	int ret = 0;
8524 
8525 	ret = tracing_check_open_get_tr(tr);
8526 	if (ret)
8527 		return ret;
8528 
8529 	/* If this file was opened for write, then erase contents */
8530 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8531 		clear_tracing_err_log(tr);
8532 
8533 	if (file->f_mode & FMODE_READ) {
8534 		ret = seq_open(file, &tracing_err_log_seq_ops);
8535 		if (!ret) {
8536 			struct seq_file *m = file->private_data;
8537 			m->private = tr;
8538 		} else {
8539 			trace_array_put(tr);
8540 		}
8541 	}
8542 	return ret;
8543 }
8544 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8545 static ssize_t tracing_err_log_write(struct file *file,
8546 				     const char __user *buffer,
8547 				     size_t count, loff_t *ppos)
8548 {
8549 	return count;
8550 }
8551 
tracing_err_log_release(struct inode * inode,struct file * file)8552 static int tracing_err_log_release(struct inode *inode, struct file *file)
8553 {
8554 	struct trace_array *tr = inode->i_private;
8555 
8556 	trace_array_put(tr);
8557 
8558 	if (file->f_mode & FMODE_READ)
8559 		seq_release(inode, file);
8560 
8561 	return 0;
8562 }
8563 
8564 static const struct file_operations tracing_err_log_fops = {
8565 	.open           = tracing_err_log_open,
8566 	.write		= tracing_err_log_write,
8567 	.read           = seq_read,
8568 	.llseek         = tracing_lseek,
8569 	.release        = tracing_err_log_release,
8570 };
8571 
tracing_buffers_open(struct inode * inode,struct file * filp)8572 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8573 {
8574 	struct trace_array *tr = inode->i_private;
8575 	struct ftrace_buffer_info *info;
8576 	int ret;
8577 
8578 	ret = tracing_check_open_get_tr(tr);
8579 	if (ret)
8580 		return ret;
8581 
8582 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8583 	if (!info) {
8584 		trace_array_put(tr);
8585 		return -ENOMEM;
8586 	}
8587 
8588 	mutex_lock(&trace_types_lock);
8589 
8590 	info->iter.tr		= tr;
8591 	info->iter.cpu_file	= tracing_get_cpu(inode);
8592 	info->iter.trace	= tr->current_trace;
8593 	info->iter.array_buffer = &tr->array_buffer;
8594 	info->spare		= NULL;
8595 	/* Force reading ring buffer for first read */
8596 	info->read		= (unsigned int)-1;
8597 
8598 	filp->private_data = info;
8599 
8600 	tr->trace_ref++;
8601 
8602 	mutex_unlock(&trace_types_lock);
8603 
8604 	ret = nonseekable_open(inode, filp);
8605 	if (ret < 0)
8606 		trace_array_put(tr);
8607 
8608 	return ret;
8609 }
8610 
8611 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8612 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8613 {
8614 	struct ftrace_buffer_info *info = filp->private_data;
8615 	struct trace_iterator *iter = &info->iter;
8616 
8617 	return trace_poll(iter, filp, poll_table);
8618 }
8619 
8620 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8621 tracing_buffers_read(struct file *filp, char __user *ubuf,
8622 		     size_t count, loff_t *ppos)
8623 {
8624 	struct ftrace_buffer_info *info = filp->private_data;
8625 	struct trace_iterator *iter = &info->iter;
8626 	void *trace_data;
8627 	int page_size;
8628 	ssize_t ret = 0;
8629 	ssize_t size;
8630 
8631 	if (!count)
8632 		return 0;
8633 
8634 #ifdef CONFIG_TRACER_MAX_TRACE
8635 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8636 		return -EBUSY;
8637 #endif
8638 
8639 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8640 
8641 	/* Make sure the spare matches the current sub buffer size */
8642 	if (info->spare) {
8643 		if (page_size != info->spare_size) {
8644 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8645 						   info->spare_cpu, info->spare);
8646 			info->spare = NULL;
8647 		}
8648 	}
8649 
8650 	if (!info->spare) {
8651 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8652 							  iter->cpu_file);
8653 		if (IS_ERR(info->spare)) {
8654 			ret = PTR_ERR(info->spare);
8655 			info->spare = NULL;
8656 		} else {
8657 			info->spare_cpu = iter->cpu_file;
8658 			info->spare_size = page_size;
8659 		}
8660 	}
8661 	if (!info->spare)
8662 		return ret;
8663 
8664 	/* Do we have previous read data to read? */
8665 	if (info->read < page_size)
8666 		goto read;
8667 
8668  again:
8669 	trace_access_lock(iter->cpu_file);
8670 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8671 				    info->spare,
8672 				    count,
8673 				    iter->cpu_file, 0);
8674 	trace_access_unlock(iter->cpu_file);
8675 
8676 	if (ret < 0) {
8677 		if (trace_empty(iter) && !iter->closed) {
8678 			if (update_last_data_if_empty(iter->tr))
8679 				return 0;
8680 
8681 			if ((filp->f_flags & O_NONBLOCK))
8682 				return -EAGAIN;
8683 
8684 			ret = wait_on_pipe(iter, 0);
8685 			if (ret)
8686 				return ret;
8687 
8688 			goto again;
8689 		}
8690 		return 0;
8691 	}
8692 
8693 	info->read = 0;
8694  read:
8695 	size = page_size - info->read;
8696 	if (size > count)
8697 		size = count;
8698 	trace_data = ring_buffer_read_page_data(info->spare);
8699 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8700 	if (ret == size)
8701 		return -EFAULT;
8702 
8703 	size -= ret;
8704 
8705 	*ppos += size;
8706 	info->read += size;
8707 
8708 	return size;
8709 }
8710 
tracing_buffers_flush(struct file * file,fl_owner_t id)8711 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8712 {
8713 	struct ftrace_buffer_info *info = file->private_data;
8714 	struct trace_iterator *iter = &info->iter;
8715 
8716 	iter->closed = true;
8717 	/* Make sure the waiters see the new wait_index */
8718 	(void)atomic_fetch_inc_release(&iter->wait_index);
8719 
8720 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8721 
8722 	return 0;
8723 }
8724 
tracing_buffers_release(struct inode * inode,struct file * file)8725 static int tracing_buffers_release(struct inode *inode, struct file *file)
8726 {
8727 	struct ftrace_buffer_info *info = file->private_data;
8728 	struct trace_iterator *iter = &info->iter;
8729 
8730 	guard(mutex)(&trace_types_lock);
8731 
8732 	iter->tr->trace_ref--;
8733 
8734 	__trace_array_put(iter->tr);
8735 
8736 	if (info->spare)
8737 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8738 					   info->spare_cpu, info->spare);
8739 	kvfree(info);
8740 
8741 	return 0;
8742 }
8743 
8744 struct buffer_ref {
8745 	struct trace_buffer	*buffer;
8746 	void			*page;
8747 	int			cpu;
8748 	refcount_t		refcount;
8749 };
8750 
buffer_ref_release(struct buffer_ref * ref)8751 static void buffer_ref_release(struct buffer_ref *ref)
8752 {
8753 	if (!refcount_dec_and_test(&ref->refcount))
8754 		return;
8755 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8756 	kfree(ref);
8757 }
8758 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8759 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8760 				    struct pipe_buffer *buf)
8761 {
8762 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8763 
8764 	buffer_ref_release(ref);
8765 	buf->private = 0;
8766 }
8767 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8768 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8769 				struct pipe_buffer *buf)
8770 {
8771 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8772 
8773 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8774 		return false;
8775 
8776 	refcount_inc(&ref->refcount);
8777 	return true;
8778 }
8779 
8780 /* Pipe buffer operations for a buffer. */
8781 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8782 	.release		= buffer_pipe_buf_release,
8783 	.get			= buffer_pipe_buf_get,
8784 };
8785 
8786 /*
8787  * Callback from splice_to_pipe(), if we need to release some pages
8788  * at the end of the spd in case we error'ed out in filling the pipe.
8789  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8790 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8791 {
8792 	struct buffer_ref *ref =
8793 		(struct buffer_ref *)spd->partial[i].private;
8794 
8795 	buffer_ref_release(ref);
8796 	spd->partial[i].private = 0;
8797 }
8798 
8799 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8800 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8801 			    struct pipe_inode_info *pipe, size_t len,
8802 			    unsigned int flags)
8803 {
8804 	struct ftrace_buffer_info *info = file->private_data;
8805 	struct trace_iterator *iter = &info->iter;
8806 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8807 	struct page *pages_def[PIPE_DEF_BUFFERS];
8808 	struct splice_pipe_desc spd = {
8809 		.pages		= pages_def,
8810 		.partial	= partial_def,
8811 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8812 		.ops		= &buffer_pipe_buf_ops,
8813 		.spd_release	= buffer_spd_release,
8814 	};
8815 	struct buffer_ref *ref;
8816 	bool woken = false;
8817 	int page_size;
8818 	int entries, i;
8819 	ssize_t ret = 0;
8820 
8821 #ifdef CONFIG_TRACER_MAX_TRACE
8822 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8823 		return -EBUSY;
8824 #endif
8825 
8826 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8827 	if (*ppos & (page_size - 1))
8828 		return -EINVAL;
8829 
8830 	if (len & (page_size - 1)) {
8831 		if (len < page_size)
8832 			return -EINVAL;
8833 		len &= (~(page_size - 1));
8834 	}
8835 
8836 	if (splice_grow_spd(pipe, &spd))
8837 		return -ENOMEM;
8838 
8839  again:
8840 	trace_access_lock(iter->cpu_file);
8841 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8842 
8843 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8844 		struct page *page;
8845 		int r;
8846 
8847 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8848 		if (!ref) {
8849 			ret = -ENOMEM;
8850 			break;
8851 		}
8852 
8853 		refcount_set(&ref->refcount, 1);
8854 		ref->buffer = iter->array_buffer->buffer;
8855 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8856 		if (IS_ERR(ref->page)) {
8857 			ret = PTR_ERR(ref->page);
8858 			ref->page = NULL;
8859 			kfree(ref);
8860 			break;
8861 		}
8862 		ref->cpu = iter->cpu_file;
8863 
8864 		r = ring_buffer_read_page(ref->buffer, ref->page,
8865 					  len, iter->cpu_file, 1);
8866 		if (r < 0) {
8867 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8868 						   ref->page);
8869 			kfree(ref);
8870 			break;
8871 		}
8872 
8873 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8874 
8875 		spd.pages[i] = page;
8876 		spd.partial[i].len = page_size;
8877 		spd.partial[i].offset = 0;
8878 		spd.partial[i].private = (unsigned long)ref;
8879 		spd.nr_pages++;
8880 		*ppos += page_size;
8881 
8882 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8883 	}
8884 
8885 	trace_access_unlock(iter->cpu_file);
8886 	spd.nr_pages = i;
8887 
8888 	/* did we read anything? */
8889 	if (!spd.nr_pages) {
8890 
8891 		if (ret)
8892 			goto out;
8893 
8894 		if (woken)
8895 			goto out;
8896 
8897 		ret = -EAGAIN;
8898 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8899 			goto out;
8900 
8901 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8902 		if (ret)
8903 			goto out;
8904 
8905 		/* No need to wait after waking up when tracing is off */
8906 		if (!tracer_tracing_is_on(iter->tr))
8907 			goto out;
8908 
8909 		/* Iterate one more time to collect any new data then exit */
8910 		woken = true;
8911 
8912 		goto again;
8913 	}
8914 
8915 	ret = splice_to_pipe(pipe, &spd);
8916 out:
8917 	splice_shrink_spd(&spd);
8918 
8919 	return ret;
8920 }
8921 
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8922 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8923 {
8924 	struct ftrace_buffer_info *info = file->private_data;
8925 	struct trace_iterator *iter = &info->iter;
8926 	int err;
8927 
8928 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8929 		if (!(file->f_flags & O_NONBLOCK)) {
8930 			err = ring_buffer_wait(iter->array_buffer->buffer,
8931 					       iter->cpu_file,
8932 					       iter->tr->buffer_percent,
8933 					       NULL, NULL);
8934 			if (err)
8935 				return err;
8936 		}
8937 
8938 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8939 						  iter->cpu_file);
8940 	} else if (cmd) {
8941 		return -ENOTTY;
8942 	}
8943 
8944 	/*
8945 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8946 	 * waiters
8947 	 */
8948 	guard(mutex)(&trace_types_lock);
8949 
8950 	/* Make sure the waiters see the new wait_index */
8951 	(void)atomic_fetch_inc_release(&iter->wait_index);
8952 
8953 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8954 
8955 	return 0;
8956 }
8957 
8958 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8959 static int get_snapshot_map(struct trace_array *tr)
8960 {
8961 	int err = 0;
8962 
8963 	/*
8964 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8965 	 * take trace_types_lock. Instead use the specific
8966 	 * snapshot_trigger_lock.
8967 	 */
8968 	spin_lock(&tr->snapshot_trigger_lock);
8969 
8970 	if (tr->snapshot || tr->mapped == UINT_MAX)
8971 		err = -EBUSY;
8972 	else
8973 		tr->mapped++;
8974 
8975 	spin_unlock(&tr->snapshot_trigger_lock);
8976 
8977 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8978 	if (tr->mapped == 1)
8979 		synchronize_rcu();
8980 
8981 	return err;
8982 
8983 }
put_snapshot_map(struct trace_array * tr)8984 static void put_snapshot_map(struct trace_array *tr)
8985 {
8986 	spin_lock(&tr->snapshot_trigger_lock);
8987 	if (!WARN_ON(!tr->mapped))
8988 		tr->mapped--;
8989 	spin_unlock(&tr->snapshot_trigger_lock);
8990 }
8991 #else
get_snapshot_map(struct trace_array * tr)8992 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8993 static inline void put_snapshot_map(struct trace_array *tr) { }
8994 #endif
8995 
tracing_buffers_mmap_close(struct vm_area_struct * vma)8996 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8997 {
8998 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8999 	struct trace_iterator *iter = &info->iter;
9000 
9001 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
9002 	put_snapshot_map(iter->tr);
9003 }
9004 
tracing_buffers_may_split(struct vm_area_struct * vma,unsigned long addr)9005 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
9006 {
9007 	/*
9008 	 * Trace buffer mappings require the complete buffer including
9009 	 * the meta page. Partial mappings are not supported.
9010 	 */
9011 	return -EINVAL;
9012 }
9013 
9014 static const struct vm_operations_struct tracing_buffers_vmops = {
9015 	.close		= tracing_buffers_mmap_close,
9016 	.may_split      = tracing_buffers_may_split,
9017 };
9018 
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)9019 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
9020 {
9021 	struct ftrace_buffer_info *info = filp->private_data;
9022 	struct trace_iterator *iter = &info->iter;
9023 	int ret = 0;
9024 
9025 	/* A memmap'ed and backup buffers are not supported for user space mmap */
9026 	if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
9027 		return -ENODEV;
9028 
9029 	ret = get_snapshot_map(iter->tr);
9030 	if (ret)
9031 		return ret;
9032 
9033 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
9034 	if (ret)
9035 		put_snapshot_map(iter->tr);
9036 
9037 	vma->vm_ops = &tracing_buffers_vmops;
9038 
9039 	return ret;
9040 }
9041 
9042 static const struct file_operations tracing_buffers_fops = {
9043 	.open		= tracing_buffers_open,
9044 	.read		= tracing_buffers_read,
9045 	.poll		= tracing_buffers_poll,
9046 	.release	= tracing_buffers_release,
9047 	.flush		= tracing_buffers_flush,
9048 	.splice_read	= tracing_buffers_splice_read,
9049 	.unlocked_ioctl = tracing_buffers_ioctl,
9050 	.mmap		= tracing_buffers_mmap,
9051 };
9052 
9053 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)9054 tracing_stats_read(struct file *filp, char __user *ubuf,
9055 		   size_t count, loff_t *ppos)
9056 {
9057 	struct inode *inode = file_inode(filp);
9058 	struct trace_array *tr = inode->i_private;
9059 	struct array_buffer *trace_buf = &tr->array_buffer;
9060 	int cpu = tracing_get_cpu(inode);
9061 	struct trace_seq *s;
9062 	unsigned long cnt;
9063 	unsigned long long t;
9064 	unsigned long usec_rem;
9065 
9066 	s = kmalloc(sizeof(*s), GFP_KERNEL);
9067 	if (!s)
9068 		return -ENOMEM;
9069 
9070 	trace_seq_init(s);
9071 
9072 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
9073 	trace_seq_printf(s, "entries: %ld\n", cnt);
9074 
9075 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
9076 	trace_seq_printf(s, "overrun: %ld\n", cnt);
9077 
9078 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
9079 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
9080 
9081 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
9082 	trace_seq_printf(s, "bytes: %ld\n", cnt);
9083 
9084 	if (trace_clocks[tr->clock_id].in_ns) {
9085 		/* local or global for trace_clock */
9086 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
9087 		usec_rem = do_div(t, USEC_PER_SEC);
9088 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
9089 								t, usec_rem);
9090 
9091 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
9092 		usec_rem = do_div(t, USEC_PER_SEC);
9093 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
9094 	} else {
9095 		/* counter or tsc mode for trace_clock */
9096 		trace_seq_printf(s, "oldest event ts: %llu\n",
9097 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
9098 
9099 		trace_seq_printf(s, "now ts: %llu\n",
9100 				ring_buffer_time_stamp(trace_buf->buffer));
9101 	}
9102 
9103 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
9104 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
9105 
9106 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
9107 	trace_seq_printf(s, "read events: %ld\n", cnt);
9108 
9109 	count = simple_read_from_buffer(ubuf, count, ppos,
9110 					s->buffer, trace_seq_used(s));
9111 
9112 	kfree(s);
9113 
9114 	return count;
9115 }
9116 
9117 static const struct file_operations tracing_stats_fops = {
9118 	.open		= tracing_open_generic_tr,
9119 	.read		= tracing_stats_read,
9120 	.llseek		= generic_file_llseek,
9121 	.release	= tracing_release_generic_tr,
9122 };
9123 
9124 #ifdef CONFIG_DYNAMIC_FTRACE
9125 
9126 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9127 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
9128 		  size_t cnt, loff_t *ppos)
9129 {
9130 	ssize_t ret;
9131 	char *buf;
9132 	int r;
9133 
9134 	/* 512 should be plenty to hold the amount needed */
9135 #define DYN_INFO_BUF_SIZE	512
9136 
9137 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
9138 	if (!buf)
9139 		return -ENOMEM;
9140 
9141 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
9142 		      "%ld pages:%ld groups: %ld\n"
9143 		      "ftrace boot update time = %llu (ns)\n"
9144 		      "ftrace module total update time = %llu (ns)\n",
9145 		      ftrace_update_tot_cnt,
9146 		      ftrace_number_of_pages,
9147 		      ftrace_number_of_groups,
9148 		      ftrace_update_time,
9149 		      ftrace_total_mod_time);
9150 
9151 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9152 	kfree(buf);
9153 	return ret;
9154 }
9155 
9156 static const struct file_operations tracing_dyn_info_fops = {
9157 	.open		= tracing_open_generic,
9158 	.read		= tracing_read_dyn_info,
9159 	.llseek		= generic_file_llseek,
9160 };
9161 #endif /* CONFIG_DYNAMIC_FTRACE */
9162 
9163 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
9164 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)9165 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
9166 		struct trace_array *tr, struct ftrace_probe_ops *ops,
9167 		void *data)
9168 {
9169 	tracing_snapshot_instance(tr);
9170 }
9171 
9172 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)9173 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
9174 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
9175 		      void *data)
9176 {
9177 	struct ftrace_func_mapper *mapper = data;
9178 	long *count = NULL;
9179 
9180 	if (mapper)
9181 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
9182 
9183 	if (count) {
9184 
9185 		if (*count <= 0)
9186 			return;
9187 
9188 		(*count)--;
9189 	}
9190 
9191 	tracing_snapshot_instance(tr);
9192 }
9193 
9194 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)9195 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
9196 		      struct ftrace_probe_ops *ops, void *data)
9197 {
9198 	struct ftrace_func_mapper *mapper = data;
9199 	long *count = NULL;
9200 
9201 	seq_printf(m, "%ps:", (void *)ip);
9202 
9203 	seq_puts(m, "snapshot");
9204 
9205 	if (mapper)
9206 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
9207 
9208 	if (count)
9209 		seq_printf(m, ":count=%ld\n", *count);
9210 	else
9211 		seq_puts(m, ":unlimited\n");
9212 
9213 	return 0;
9214 }
9215 
9216 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)9217 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
9218 		     unsigned long ip, void *init_data, void **data)
9219 {
9220 	struct ftrace_func_mapper *mapper = *data;
9221 
9222 	if (!mapper) {
9223 		mapper = allocate_ftrace_func_mapper();
9224 		if (!mapper)
9225 			return -ENOMEM;
9226 		*data = mapper;
9227 	}
9228 
9229 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
9230 }
9231 
9232 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)9233 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
9234 		     unsigned long ip, void *data)
9235 {
9236 	struct ftrace_func_mapper *mapper = data;
9237 
9238 	if (!ip) {
9239 		if (!mapper)
9240 			return;
9241 		free_ftrace_func_mapper(mapper, NULL);
9242 		return;
9243 	}
9244 
9245 	ftrace_func_mapper_remove_ip(mapper, ip);
9246 }
9247 
9248 static struct ftrace_probe_ops snapshot_probe_ops = {
9249 	.func			= ftrace_snapshot,
9250 	.print			= ftrace_snapshot_print,
9251 };
9252 
9253 static struct ftrace_probe_ops snapshot_count_probe_ops = {
9254 	.func			= ftrace_count_snapshot,
9255 	.print			= ftrace_snapshot_print,
9256 	.init			= ftrace_snapshot_init,
9257 	.free			= ftrace_snapshot_free,
9258 };
9259 
9260 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)9261 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
9262 			       char *glob, char *cmd, char *param, int enable)
9263 {
9264 	struct ftrace_probe_ops *ops;
9265 	void *count = (void *)-1;
9266 	char *number;
9267 	int ret;
9268 
9269 	if (!tr)
9270 		return -ENODEV;
9271 
9272 	/* hash funcs only work with set_ftrace_filter */
9273 	if (!enable)
9274 		return -EINVAL;
9275 
9276 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
9277 
9278 	if (glob[0] == '!') {
9279 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
9280 		if (!ret)
9281 			tracing_disarm_snapshot(tr);
9282 
9283 		return ret;
9284 	}
9285 
9286 	if (!param)
9287 		goto out_reg;
9288 
9289 	number = strsep(&param, ":");
9290 
9291 	if (!strlen(number))
9292 		goto out_reg;
9293 
9294 	/*
9295 	 * We use the callback data field (which is a pointer)
9296 	 * as our counter.
9297 	 */
9298 	ret = kstrtoul(number, 0, (unsigned long *)&count);
9299 	if (ret)
9300 		return ret;
9301 
9302  out_reg:
9303 	ret = tracing_arm_snapshot(tr);
9304 	if (ret < 0)
9305 		return ret;
9306 
9307 	ret = register_ftrace_function_probe(glob, tr, ops, count);
9308 	if (ret < 0)
9309 		tracing_disarm_snapshot(tr);
9310 
9311 	return ret < 0 ? ret : 0;
9312 }
9313 
9314 static struct ftrace_func_command ftrace_snapshot_cmd = {
9315 	.name			= "snapshot",
9316 	.func			= ftrace_trace_snapshot_callback,
9317 };
9318 
register_snapshot_cmd(void)9319 static __init int register_snapshot_cmd(void)
9320 {
9321 	return register_ftrace_command(&ftrace_snapshot_cmd);
9322 }
9323 #else
register_snapshot_cmd(void)9324 static inline __init int register_snapshot_cmd(void) { return 0; }
9325 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
9326 
tracing_get_dentry(struct trace_array * tr)9327 static struct dentry *tracing_get_dentry(struct trace_array *tr)
9328 {
9329 	/* Top directory uses NULL as the parent */
9330 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
9331 		return NULL;
9332 
9333 	if (WARN_ON(!tr->dir))
9334 		return ERR_PTR(-ENODEV);
9335 
9336 	/* All sub buffers have a descriptor */
9337 	return tr->dir;
9338 }
9339 
tracing_dentry_percpu(struct trace_array * tr,int cpu)9340 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
9341 {
9342 	struct dentry *d_tracer;
9343 
9344 	if (tr->percpu_dir)
9345 		return tr->percpu_dir;
9346 
9347 	d_tracer = tracing_get_dentry(tr);
9348 	if (IS_ERR(d_tracer))
9349 		return NULL;
9350 
9351 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
9352 
9353 	MEM_FAIL(!tr->percpu_dir,
9354 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
9355 
9356 	return tr->percpu_dir;
9357 }
9358 
9359 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)9360 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
9361 		      void *data, long cpu, const struct file_operations *fops)
9362 {
9363 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
9364 
9365 	if (ret) /* See tracing_get_cpu() */
9366 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
9367 	return ret;
9368 }
9369 
9370 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)9371 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
9372 {
9373 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
9374 	struct dentry *d_cpu;
9375 	char cpu_dir[30]; /* 30 characters should be more than enough */
9376 
9377 	if (!d_percpu)
9378 		return;
9379 
9380 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
9381 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
9382 	if (!d_cpu) {
9383 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
9384 		return;
9385 	}
9386 
9387 	/* per cpu trace_pipe */
9388 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
9389 				tr, cpu, &tracing_pipe_fops);
9390 
9391 	/* per cpu trace */
9392 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
9393 				tr, cpu, &tracing_fops);
9394 
9395 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
9396 				tr, cpu, &tracing_buffers_fops);
9397 
9398 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
9399 				tr, cpu, &tracing_stats_fops);
9400 
9401 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
9402 				tr, cpu, &tracing_entries_fops);
9403 
9404 	if (tr->range_addr_start)
9405 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
9406 				      tr, cpu, &tracing_buffer_meta_fops);
9407 #ifdef CONFIG_TRACER_SNAPSHOT
9408 	if (!tr->range_addr_start) {
9409 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9410 				      tr, cpu, &snapshot_fops);
9411 
9412 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9413 				      tr, cpu, &snapshot_raw_fops);
9414 	}
9415 #endif
9416 }
9417 
9418 #ifdef CONFIG_FTRACE_SELFTEST
9419 /* Let selftest have access to static functions in this file */
9420 #include "trace_selftest.c"
9421 #endif
9422 
9423 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9424 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9425 			loff_t *ppos)
9426 {
9427 	struct trace_option_dentry *topt = filp->private_data;
9428 	char *buf;
9429 
9430 	if (topt->flags->val & topt->opt->bit)
9431 		buf = "1\n";
9432 	else
9433 		buf = "0\n";
9434 
9435 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9436 }
9437 
9438 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9439 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9440 			 loff_t *ppos)
9441 {
9442 	struct trace_option_dentry *topt = filp->private_data;
9443 	unsigned long val;
9444 	int ret;
9445 
9446 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9447 	if (ret)
9448 		return ret;
9449 
9450 	if (val != 0 && val != 1)
9451 		return -EINVAL;
9452 
9453 	if (!!(topt->flags->val & topt->opt->bit) != val) {
9454 		guard(mutex)(&trace_types_lock);
9455 		ret = __set_tracer_option(topt->tr, topt->flags,
9456 					  topt->opt, !val);
9457 		if (ret)
9458 			return ret;
9459 	}
9460 
9461 	*ppos += cnt;
9462 
9463 	return cnt;
9464 }
9465 
tracing_open_options(struct inode * inode,struct file * filp)9466 static int tracing_open_options(struct inode *inode, struct file *filp)
9467 {
9468 	struct trace_option_dentry *topt = inode->i_private;
9469 	int ret;
9470 
9471 	ret = tracing_check_open_get_tr(topt->tr);
9472 	if (ret)
9473 		return ret;
9474 
9475 	filp->private_data = inode->i_private;
9476 	return 0;
9477 }
9478 
tracing_release_options(struct inode * inode,struct file * file)9479 static int tracing_release_options(struct inode *inode, struct file *file)
9480 {
9481 	struct trace_option_dentry *topt = file->private_data;
9482 
9483 	trace_array_put(topt->tr);
9484 	return 0;
9485 }
9486 
9487 static const struct file_operations trace_options_fops = {
9488 	.open = tracing_open_options,
9489 	.read = trace_options_read,
9490 	.write = trace_options_write,
9491 	.llseek	= generic_file_llseek,
9492 	.release = tracing_release_options,
9493 };
9494 
9495 /*
9496  * In order to pass in both the trace_array descriptor as well as the index
9497  * to the flag that the trace option file represents, the trace_array
9498  * has a character array of trace_flags_index[], which holds the index
9499  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9500  * The address of this character array is passed to the flag option file
9501  * read/write callbacks.
9502  *
9503  * In order to extract both the index and the trace_array descriptor,
9504  * get_tr_index() uses the following algorithm.
9505  *
9506  *   idx = *ptr;
9507  *
9508  * As the pointer itself contains the address of the index (remember
9509  * index[1] == 1).
9510  *
9511  * Then to get the trace_array descriptor, by subtracting that index
9512  * from the ptr, we get to the start of the index itself.
9513  *
9514  *   ptr - idx == &index[0]
9515  *
9516  * Then a simple container_of() from that pointer gets us to the
9517  * trace_array descriptor.
9518  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9519 static void get_tr_index(void *data, struct trace_array **ptr,
9520 			 unsigned int *pindex)
9521 {
9522 	*pindex = *(unsigned char *)data;
9523 
9524 	*ptr = container_of(data - *pindex, struct trace_array,
9525 			    trace_flags_index);
9526 }
9527 
9528 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9529 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9530 			loff_t *ppos)
9531 {
9532 	void *tr_index = filp->private_data;
9533 	struct trace_array *tr;
9534 	unsigned int index;
9535 	char *buf;
9536 
9537 	get_tr_index(tr_index, &tr, &index);
9538 
9539 	if (tr->trace_flags & (1ULL << index))
9540 		buf = "1\n";
9541 	else
9542 		buf = "0\n";
9543 
9544 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9545 }
9546 
9547 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9548 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9549 			 loff_t *ppos)
9550 {
9551 	void *tr_index = filp->private_data;
9552 	struct trace_array *tr;
9553 	unsigned int index;
9554 	unsigned long val;
9555 	int ret;
9556 
9557 	get_tr_index(tr_index, &tr, &index);
9558 
9559 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9560 	if (ret)
9561 		return ret;
9562 
9563 	if (val != 0 && val != 1)
9564 		return -EINVAL;
9565 
9566 	mutex_lock(&event_mutex);
9567 	mutex_lock(&trace_types_lock);
9568 	ret = set_tracer_flag(tr, 1ULL << index, val);
9569 	mutex_unlock(&trace_types_lock);
9570 	mutex_unlock(&event_mutex);
9571 
9572 	if (ret < 0)
9573 		return ret;
9574 
9575 	*ppos += cnt;
9576 
9577 	return cnt;
9578 }
9579 
9580 static const struct file_operations trace_options_core_fops = {
9581 	.open = tracing_open_generic,
9582 	.read = trace_options_core_read,
9583 	.write = trace_options_core_write,
9584 	.llseek = generic_file_llseek,
9585 };
9586 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9587 struct dentry *trace_create_file(const char *name,
9588 				 umode_t mode,
9589 				 struct dentry *parent,
9590 				 void *data,
9591 				 const struct file_operations *fops)
9592 {
9593 	struct dentry *ret;
9594 
9595 	ret = tracefs_create_file(name, mode, parent, data, fops);
9596 	if (!ret)
9597 		pr_warn("Could not create tracefs '%s' entry\n", name);
9598 
9599 	return ret;
9600 }
9601 
9602 
trace_options_init_dentry(struct trace_array * tr)9603 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9604 {
9605 	struct dentry *d_tracer;
9606 
9607 	if (tr->options)
9608 		return tr->options;
9609 
9610 	d_tracer = tracing_get_dentry(tr);
9611 	if (IS_ERR(d_tracer))
9612 		return NULL;
9613 
9614 	tr->options = tracefs_create_dir("options", d_tracer);
9615 	if (!tr->options) {
9616 		pr_warn("Could not create tracefs directory 'options'\n");
9617 		return NULL;
9618 	}
9619 
9620 	return tr->options;
9621 }
9622 
9623 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9624 create_trace_option_file(struct trace_array *tr,
9625 			 struct trace_option_dentry *topt,
9626 			 struct tracer_flags *flags,
9627 			 struct tracer_opt *opt)
9628 {
9629 	struct dentry *t_options;
9630 
9631 	t_options = trace_options_init_dentry(tr);
9632 	if (!t_options)
9633 		return;
9634 
9635 	topt->flags = flags;
9636 	topt->opt = opt;
9637 	topt->tr = tr;
9638 
9639 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9640 					t_options, topt, &trace_options_fops);
9641 }
9642 
9643 static int
create_trace_option_files(struct trace_array * tr,struct tracer * tracer,struct tracer_flags * flags)9644 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
9645 			  struct tracer_flags *flags)
9646 {
9647 	struct trace_option_dentry *topts;
9648 	struct trace_options *tr_topts;
9649 	struct tracer_opt *opts;
9650 	int cnt;
9651 
9652 	if (!flags || !flags->opts)
9653 		return 0;
9654 
9655 	opts = flags->opts;
9656 
9657 	for (cnt = 0; opts[cnt].name; cnt++)
9658 		;
9659 
9660 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9661 	if (!topts)
9662 		return 0;
9663 
9664 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9665 			    GFP_KERNEL);
9666 	if (!tr_topts) {
9667 		kfree(topts);
9668 		return -ENOMEM;
9669 	}
9670 
9671 	tr->topts = tr_topts;
9672 	tr->topts[tr->nr_topts].tracer = tracer;
9673 	tr->topts[tr->nr_topts].topts = topts;
9674 	tr->nr_topts++;
9675 
9676 	for (cnt = 0; opts[cnt].name; cnt++) {
9677 		create_trace_option_file(tr, &topts[cnt], flags,
9678 					 &opts[cnt]);
9679 		MEM_FAIL(topts[cnt].entry == NULL,
9680 			  "Failed to create trace option: %s",
9681 			  opts[cnt].name);
9682 	}
9683 	return 0;
9684 }
9685 
get_global_flags_val(struct tracer * tracer)9686 static int get_global_flags_val(struct tracer *tracer)
9687 {
9688 	struct tracers *t;
9689 
9690 	list_for_each_entry(t, &global_trace.tracers, list) {
9691 		if (t->tracer != tracer)
9692 			continue;
9693 		if (!t->flags)
9694 			return -1;
9695 		return t->flags->val;
9696 	}
9697 	return -1;
9698 }
9699 
add_tracer_options(struct trace_array * tr,struct tracers * t)9700 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
9701 {
9702 	struct tracer *tracer = t->tracer;
9703 	struct tracer_flags *flags = t->flags ?: tracer->flags;
9704 
9705 	if (!flags)
9706 		return 0;
9707 
9708 	/* Only add tracer options after update_tracer_options finish */
9709 	if (!tracer_options_updated)
9710 		return 0;
9711 
9712 	return create_trace_option_files(tr, tracer, flags);
9713 }
9714 
add_tracer(struct trace_array * tr,struct tracer * tracer)9715 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
9716 {
9717 	struct tracer_flags *flags;
9718 	struct tracers *t;
9719 	int ret;
9720 
9721 	/* Only enable if the directory has been created already. */
9722 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
9723 		return 0;
9724 
9725 	/*
9726 	 * If this is an instance, only create flags for tracers
9727 	 * the instance may have.
9728 	 */
9729 	if (!trace_ok_for_array(tracer, tr))
9730 		return 0;
9731 
9732 	t = kmalloc(sizeof(*t), GFP_KERNEL);
9733 	if (!t)
9734 		return -ENOMEM;
9735 
9736 	t->tracer = tracer;
9737 	t->flags = NULL;
9738 	list_add(&t->list, &tr->tracers);
9739 
9740 	flags = tracer->flags;
9741 	if (!flags) {
9742 		if (!tracer->default_flags)
9743 			return 0;
9744 
9745 		/*
9746 		 * If the tracer defines default flags, it means the flags are
9747 		 * per trace instance.
9748 		 */
9749 		flags = kmalloc(sizeof(*flags), GFP_KERNEL);
9750 		if (!flags)
9751 			return -ENOMEM;
9752 
9753 		*flags = *tracer->default_flags;
9754 		flags->trace = tracer;
9755 
9756 		t->flags = flags;
9757 
9758 		/* If this is an instance, inherit the global_trace flags */
9759 		if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
9760 			int val = get_global_flags_val(tracer);
9761 			if (!WARN_ON_ONCE(val < 0))
9762 				flags->val = val;
9763 		}
9764 	}
9765 
9766 	ret = add_tracer_options(tr, t);
9767 	if (ret < 0) {
9768 		list_del(&t->list);
9769 		kfree(t->flags);
9770 		kfree(t);
9771 	}
9772 
9773 	return ret;
9774 }
9775 
9776 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9777 create_trace_option_core_file(struct trace_array *tr,
9778 			      const char *option, long index)
9779 {
9780 	struct dentry *t_options;
9781 
9782 	t_options = trace_options_init_dentry(tr);
9783 	if (!t_options)
9784 		return NULL;
9785 
9786 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9787 				 (void *)&tr->trace_flags_index[index],
9788 				 &trace_options_core_fops);
9789 }
9790 
create_trace_options_dir(struct trace_array * tr)9791 static void create_trace_options_dir(struct trace_array *tr)
9792 {
9793 	struct dentry *t_options;
9794 	bool top_level = tr == &global_trace;
9795 	int i;
9796 
9797 	t_options = trace_options_init_dentry(tr);
9798 	if (!t_options)
9799 		return;
9800 
9801 	for (i = 0; trace_options[i]; i++) {
9802 		if (top_level ||
9803 		    !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
9804 			create_trace_option_core_file(tr, trace_options[i], i);
9805 		}
9806 	}
9807 }
9808 
9809 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9810 rb_simple_read(struct file *filp, char __user *ubuf,
9811 	       size_t cnt, loff_t *ppos)
9812 {
9813 	struct trace_array *tr = filp->private_data;
9814 	char buf[64];
9815 	int r;
9816 
9817 	r = tracer_tracing_is_on(tr);
9818 	r = sprintf(buf, "%d\n", r);
9819 
9820 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9821 }
9822 
9823 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9824 rb_simple_write(struct file *filp, const char __user *ubuf,
9825 		size_t cnt, loff_t *ppos)
9826 {
9827 	struct trace_array *tr = filp->private_data;
9828 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9829 	unsigned long val;
9830 	int ret;
9831 
9832 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9833 	if (ret)
9834 		return ret;
9835 
9836 	if (buffer) {
9837 		guard(mutex)(&trace_types_lock);
9838 		if (!!val == tracer_tracing_is_on(tr)) {
9839 			val = 0; /* do nothing */
9840 		} else if (val) {
9841 			tracer_tracing_on(tr);
9842 			if (tr->current_trace->start)
9843 				tr->current_trace->start(tr);
9844 		} else {
9845 			tracer_tracing_off(tr);
9846 			if (tr->current_trace->stop)
9847 				tr->current_trace->stop(tr);
9848 			/* Wake up any waiters */
9849 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9850 		}
9851 	}
9852 
9853 	(*ppos)++;
9854 
9855 	return cnt;
9856 }
9857 
9858 static const struct file_operations rb_simple_fops = {
9859 	.open		= tracing_open_generic_tr,
9860 	.read		= rb_simple_read,
9861 	.write		= rb_simple_write,
9862 	.release	= tracing_release_generic_tr,
9863 	.llseek		= default_llseek,
9864 };
9865 
9866 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9867 buffer_percent_read(struct file *filp, char __user *ubuf,
9868 		    size_t cnt, loff_t *ppos)
9869 {
9870 	struct trace_array *tr = filp->private_data;
9871 	char buf[64];
9872 	int r;
9873 
9874 	r = tr->buffer_percent;
9875 	r = sprintf(buf, "%d\n", r);
9876 
9877 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9878 }
9879 
9880 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9881 buffer_percent_write(struct file *filp, const char __user *ubuf,
9882 		     size_t cnt, loff_t *ppos)
9883 {
9884 	struct trace_array *tr = filp->private_data;
9885 	unsigned long val;
9886 	int ret;
9887 
9888 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9889 	if (ret)
9890 		return ret;
9891 
9892 	if (val > 100)
9893 		return -EINVAL;
9894 
9895 	tr->buffer_percent = val;
9896 
9897 	(*ppos)++;
9898 
9899 	return cnt;
9900 }
9901 
9902 static const struct file_operations buffer_percent_fops = {
9903 	.open		= tracing_open_generic_tr,
9904 	.read		= buffer_percent_read,
9905 	.write		= buffer_percent_write,
9906 	.release	= tracing_release_generic_tr,
9907 	.llseek		= default_llseek,
9908 };
9909 
9910 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9911 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9912 {
9913 	struct trace_array *tr = filp->private_data;
9914 	size_t size;
9915 	char buf[64];
9916 	int order;
9917 	int r;
9918 
9919 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9920 	size = (PAGE_SIZE << order) / 1024;
9921 
9922 	r = sprintf(buf, "%zd\n", size);
9923 
9924 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9925 }
9926 
9927 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9928 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9929 			 size_t cnt, loff_t *ppos)
9930 {
9931 	struct trace_array *tr = filp->private_data;
9932 	unsigned long val;
9933 	int old_order;
9934 	int order;
9935 	int pages;
9936 	int ret;
9937 
9938 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9939 	if (ret)
9940 		return ret;
9941 
9942 	val *= 1024; /* value passed in is in KB */
9943 
9944 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9945 	order = fls(pages - 1);
9946 
9947 	/* limit between 1 and 128 system pages */
9948 	if (order < 0 || order > 7)
9949 		return -EINVAL;
9950 
9951 	/* Do not allow tracing while changing the order of the ring buffer */
9952 	tracing_stop_tr(tr);
9953 
9954 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9955 	if (old_order == order)
9956 		goto out;
9957 
9958 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9959 	if (ret)
9960 		goto out;
9961 
9962 #ifdef CONFIG_TRACER_MAX_TRACE
9963 
9964 	if (!tr->allocated_snapshot)
9965 		goto out_max;
9966 
9967 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9968 	if (ret) {
9969 		/* Put back the old order */
9970 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9971 		if (WARN_ON_ONCE(cnt)) {
9972 			/*
9973 			 * AARGH! We are left with different orders!
9974 			 * The max buffer is our "snapshot" buffer.
9975 			 * When a tracer needs a snapshot (one of the
9976 			 * latency tracers), it swaps the max buffer
9977 			 * with the saved snap shot. We succeeded to
9978 			 * update the order of the main buffer, but failed to
9979 			 * update the order of the max buffer. But when we tried
9980 			 * to reset the main buffer to the original size, we
9981 			 * failed there too. This is very unlikely to
9982 			 * happen, but if it does, warn and kill all
9983 			 * tracing.
9984 			 */
9985 			tracing_disabled = 1;
9986 		}
9987 		goto out;
9988 	}
9989  out_max:
9990 #endif
9991 	(*ppos)++;
9992  out:
9993 	if (ret)
9994 		cnt = ret;
9995 	tracing_start_tr(tr);
9996 	return cnt;
9997 }
9998 
9999 static const struct file_operations buffer_subbuf_size_fops = {
10000 	.open		= tracing_open_generic_tr,
10001 	.read		= buffer_subbuf_size_read,
10002 	.write		= buffer_subbuf_size_write,
10003 	.release	= tracing_release_generic_tr,
10004 	.llseek		= default_llseek,
10005 };
10006 
10007 static struct dentry *trace_instance_dir;
10008 
10009 static void
10010 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
10011 
10012 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)10013 static int make_mod_delta(struct module *mod, void *data)
10014 {
10015 	struct trace_module_delta *module_delta;
10016 	struct trace_scratch *tscratch;
10017 	struct trace_mod_entry *entry;
10018 	struct trace_array *tr = data;
10019 	int i;
10020 
10021 	tscratch = tr->scratch;
10022 	module_delta = READ_ONCE(tr->module_delta);
10023 	for (i = 0; i < tscratch->nr_entries; i++) {
10024 		entry = &tscratch->entries[i];
10025 		if (strcmp(mod->name, entry->mod_name))
10026 			continue;
10027 		if (mod->state == MODULE_STATE_GOING)
10028 			module_delta->delta[i] = 0;
10029 		else
10030 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
10031 						 - entry->mod_addr;
10032 		break;
10033 	}
10034 	return 0;
10035 }
10036 #else
make_mod_delta(struct module * mod,void * data)10037 static int make_mod_delta(struct module *mod, void *data)
10038 {
10039 	return 0;
10040 }
10041 #endif
10042 
mod_addr_comp(const void * a,const void * b,const void * data)10043 static int mod_addr_comp(const void *a, const void *b, const void *data)
10044 {
10045 	const struct trace_mod_entry *e1 = a;
10046 	const struct trace_mod_entry *e2 = b;
10047 
10048 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
10049 }
10050 
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)10051 static void setup_trace_scratch(struct trace_array *tr,
10052 				struct trace_scratch *tscratch, unsigned int size)
10053 {
10054 	struct trace_module_delta *module_delta;
10055 	struct trace_mod_entry *entry;
10056 	int i, nr_entries;
10057 
10058 	if (!tscratch)
10059 		return;
10060 
10061 	tr->scratch = tscratch;
10062 	tr->scratch_size = size;
10063 
10064 	if (tscratch->text_addr)
10065 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
10066 
10067 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
10068 		goto reset;
10069 
10070 	/* Check if each module name is a valid string */
10071 	for (i = 0; i < tscratch->nr_entries; i++) {
10072 		int n;
10073 
10074 		entry = &tscratch->entries[i];
10075 
10076 		for (n = 0; n < MODULE_NAME_LEN; n++) {
10077 			if (entry->mod_name[n] == '\0')
10078 				break;
10079 			if (!isprint(entry->mod_name[n]))
10080 				goto reset;
10081 		}
10082 		if (n == MODULE_NAME_LEN)
10083 			goto reset;
10084 	}
10085 
10086 	/* Sort the entries so that we can find appropriate module from address. */
10087 	nr_entries = tscratch->nr_entries;
10088 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
10089 	       mod_addr_comp, NULL, NULL);
10090 
10091 	if (IS_ENABLED(CONFIG_MODULES)) {
10092 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
10093 		if (!module_delta) {
10094 			pr_info("module_delta allocation failed. Not able to decode module address.");
10095 			goto reset;
10096 		}
10097 		init_rcu_head(&module_delta->rcu);
10098 	} else
10099 		module_delta = NULL;
10100 	WRITE_ONCE(tr->module_delta, module_delta);
10101 
10102 	/* Scan modules to make text delta for modules. */
10103 	module_for_each_mod(make_mod_delta, tr);
10104 
10105 	/* Set trace_clock as the same of the previous boot. */
10106 	if (tscratch->clock_id != tr->clock_id) {
10107 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
10108 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
10109 			pr_info("the previous trace_clock info is not valid.");
10110 			goto reset;
10111 		}
10112 	}
10113 	return;
10114  reset:
10115 	/* Invalid trace modules */
10116 	memset(tscratch, 0, size);
10117 }
10118 
10119 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)10120 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
10121 {
10122 	enum ring_buffer_flags rb_flags;
10123 	struct trace_scratch *tscratch;
10124 	unsigned int scratch_size = 0;
10125 
10126 	rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
10127 
10128 	buf->tr = tr;
10129 
10130 	if (tr->range_addr_start && tr->range_addr_size) {
10131 		/* Add scratch buffer to handle 128 modules */
10132 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
10133 						      tr->range_addr_start,
10134 						      tr->range_addr_size,
10135 						      struct_size(tscratch, entries, 128));
10136 
10137 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
10138 		setup_trace_scratch(tr, tscratch, scratch_size);
10139 
10140 		/*
10141 		 * This is basically the same as a mapped buffer,
10142 		 * with the same restrictions.
10143 		 */
10144 		tr->mapped++;
10145 	} else {
10146 		buf->buffer = ring_buffer_alloc(size, rb_flags);
10147 	}
10148 	if (!buf->buffer)
10149 		return -ENOMEM;
10150 
10151 	buf->data = alloc_percpu(struct trace_array_cpu);
10152 	if (!buf->data) {
10153 		ring_buffer_free(buf->buffer);
10154 		buf->buffer = NULL;
10155 		return -ENOMEM;
10156 	}
10157 
10158 	/* Allocate the first page for all buffers */
10159 	set_buffer_entries(&tr->array_buffer,
10160 			   ring_buffer_size(tr->array_buffer.buffer, 0));
10161 
10162 	return 0;
10163 }
10164 
free_trace_buffer(struct array_buffer * buf)10165 static void free_trace_buffer(struct array_buffer *buf)
10166 {
10167 	if (buf->buffer) {
10168 		ring_buffer_free(buf->buffer);
10169 		buf->buffer = NULL;
10170 		free_percpu(buf->data);
10171 		buf->data = NULL;
10172 	}
10173 }
10174 
allocate_trace_buffers(struct trace_array * tr,int size)10175 static int allocate_trace_buffers(struct trace_array *tr, int size)
10176 {
10177 	int ret;
10178 
10179 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
10180 	if (ret)
10181 		return ret;
10182 
10183 #ifdef CONFIG_TRACER_MAX_TRACE
10184 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
10185 	if (tr->range_addr_start)
10186 		return 0;
10187 
10188 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
10189 				    allocate_snapshot ? size : 1);
10190 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
10191 		free_trace_buffer(&tr->array_buffer);
10192 		return -ENOMEM;
10193 	}
10194 	tr->allocated_snapshot = allocate_snapshot;
10195 
10196 	allocate_snapshot = false;
10197 #endif
10198 
10199 	return 0;
10200 }
10201 
free_trace_buffers(struct trace_array * tr)10202 static void free_trace_buffers(struct trace_array *tr)
10203 {
10204 	if (!tr)
10205 		return;
10206 
10207 	free_trace_buffer(&tr->array_buffer);
10208 	kfree(tr->module_delta);
10209 
10210 #ifdef CONFIG_TRACER_MAX_TRACE
10211 	free_trace_buffer(&tr->max_buffer);
10212 #endif
10213 }
10214 
init_trace_flags_index(struct trace_array * tr)10215 static void init_trace_flags_index(struct trace_array *tr)
10216 {
10217 	int i;
10218 
10219 	/* Used by the trace options files */
10220 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
10221 		tr->trace_flags_index[i] = i;
10222 }
10223 
__update_tracer(struct trace_array * tr)10224 static int __update_tracer(struct trace_array *tr)
10225 {
10226 	struct tracer *t;
10227 	int ret = 0;
10228 
10229 	for (t = trace_types; t && !ret; t = t->next)
10230 		ret = add_tracer(tr, t);
10231 
10232 	return ret;
10233 }
10234 
__update_tracer_options(struct trace_array * tr)10235 static __init int __update_tracer_options(struct trace_array *tr)
10236 {
10237 	struct tracers *t;
10238 	int ret = 0;
10239 
10240 	list_for_each_entry(t, &tr->tracers, list) {
10241 		ret = add_tracer_options(tr, t);
10242 		if (ret < 0)
10243 			break;
10244 	}
10245 
10246 	return ret;
10247 }
10248 
update_tracer_options(void)10249 static __init void update_tracer_options(void)
10250 {
10251 	struct trace_array *tr;
10252 
10253 	guard(mutex)(&trace_types_lock);
10254 	tracer_options_updated = true;
10255 	list_for_each_entry(tr, &ftrace_trace_arrays, list)
10256 		__update_tracer_options(tr);
10257 }
10258 
10259 /* Must have trace_types_lock held */
trace_array_find(const char * instance)10260 struct trace_array *trace_array_find(const char *instance)
10261 {
10262 	struct trace_array *tr, *found = NULL;
10263 
10264 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10265 		if (tr->name && strcmp(tr->name, instance) == 0) {
10266 			found = tr;
10267 			break;
10268 		}
10269 	}
10270 
10271 	return found;
10272 }
10273 
trace_array_find_get(const char * instance)10274 struct trace_array *trace_array_find_get(const char *instance)
10275 {
10276 	struct trace_array *tr;
10277 
10278 	guard(mutex)(&trace_types_lock);
10279 	tr = trace_array_find(instance);
10280 	if (tr)
10281 		tr->ref++;
10282 
10283 	return tr;
10284 }
10285 
trace_array_create_dir(struct trace_array * tr)10286 static int trace_array_create_dir(struct trace_array *tr)
10287 {
10288 	int ret;
10289 
10290 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
10291 	if (!tr->dir)
10292 		return -EINVAL;
10293 
10294 	ret = event_trace_add_tracer(tr->dir, tr);
10295 	if (ret) {
10296 		tracefs_remove(tr->dir);
10297 		return ret;
10298 	}
10299 
10300 	init_tracer_tracefs(tr, tr->dir);
10301 	ret = __update_tracer(tr);
10302 	if (ret) {
10303 		event_trace_del_tracer(tr);
10304 		tracefs_remove(tr->dir);
10305 		return ret;
10306 	}
10307 	return 0;
10308 }
10309 
10310 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)10311 trace_array_create_systems(const char *name, const char *systems,
10312 			   unsigned long range_addr_start,
10313 			   unsigned long range_addr_size)
10314 {
10315 	struct trace_array *tr;
10316 	int ret;
10317 
10318 	ret = -ENOMEM;
10319 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
10320 	if (!tr)
10321 		return ERR_PTR(ret);
10322 
10323 	tr->name = kstrdup(name, GFP_KERNEL);
10324 	if (!tr->name)
10325 		goto out_free_tr;
10326 
10327 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
10328 		goto out_free_tr;
10329 
10330 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
10331 		goto out_free_tr;
10332 
10333 	if (systems) {
10334 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
10335 		if (!tr->system_names)
10336 			goto out_free_tr;
10337 	}
10338 
10339 	/* Only for boot up memory mapped ring buffers */
10340 	tr->range_addr_start = range_addr_start;
10341 	tr->range_addr_size = range_addr_size;
10342 
10343 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
10344 
10345 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
10346 
10347 	raw_spin_lock_init(&tr->start_lock);
10348 
10349 	tr->syscall_buf_sz = global_trace.syscall_buf_sz;
10350 
10351 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10352 #ifdef CONFIG_TRACER_MAX_TRACE
10353 	spin_lock_init(&tr->snapshot_trigger_lock);
10354 #endif
10355 	tr->current_trace = &nop_trace;
10356 	tr->current_trace_flags = nop_trace.flags;
10357 
10358 	INIT_LIST_HEAD(&tr->systems);
10359 	INIT_LIST_HEAD(&tr->events);
10360 	INIT_LIST_HEAD(&tr->hist_vars);
10361 	INIT_LIST_HEAD(&tr->err_log);
10362 	INIT_LIST_HEAD(&tr->tracers);
10363 	INIT_LIST_HEAD(&tr->marker_list);
10364 
10365 #ifdef CONFIG_MODULES
10366 	INIT_LIST_HEAD(&tr->mod_events);
10367 #endif
10368 
10369 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
10370 		goto out_free_tr;
10371 
10372 	/* The ring buffer is defaultly expanded */
10373 	trace_set_ring_buffer_expanded(tr);
10374 
10375 	if (ftrace_allocate_ftrace_ops(tr) < 0)
10376 		goto out_free_tr;
10377 
10378 	ftrace_init_trace_array(tr);
10379 
10380 	init_trace_flags_index(tr);
10381 
10382 	if (trace_instance_dir) {
10383 		ret = trace_array_create_dir(tr);
10384 		if (ret)
10385 			goto out_free_tr;
10386 	} else
10387 		__trace_early_add_events(tr);
10388 
10389 	list_add(&tr->list, &ftrace_trace_arrays);
10390 
10391 	tr->ref++;
10392 
10393 	return tr;
10394 
10395  out_free_tr:
10396 	ftrace_free_ftrace_ops(tr);
10397 	free_trace_buffers(tr);
10398 	free_cpumask_var(tr->pipe_cpumask);
10399 	free_cpumask_var(tr->tracing_cpumask);
10400 	kfree_const(tr->system_names);
10401 	kfree(tr->range_name);
10402 	kfree(tr->name);
10403 	kfree(tr);
10404 
10405 	return ERR_PTR(ret);
10406 }
10407 
trace_array_create(const char * name)10408 static struct trace_array *trace_array_create(const char *name)
10409 {
10410 	return trace_array_create_systems(name, NULL, 0, 0);
10411 }
10412 
instance_mkdir(const char * name)10413 static int instance_mkdir(const char *name)
10414 {
10415 	struct trace_array *tr;
10416 	int ret;
10417 
10418 	guard(mutex)(&event_mutex);
10419 	guard(mutex)(&trace_types_lock);
10420 
10421 	ret = -EEXIST;
10422 	if (trace_array_find(name))
10423 		return -EEXIST;
10424 
10425 	tr = trace_array_create(name);
10426 
10427 	ret = PTR_ERR_OR_ZERO(tr);
10428 
10429 	return ret;
10430 }
10431 
10432 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)10433 static u64 map_pages(unsigned long start, unsigned long size)
10434 {
10435 	unsigned long vmap_start, vmap_end;
10436 	struct vm_struct *area;
10437 	int ret;
10438 
10439 	area = get_vm_area(size, VM_IOREMAP);
10440 	if (!area)
10441 		return 0;
10442 
10443 	vmap_start = (unsigned long) area->addr;
10444 	vmap_end = vmap_start + size;
10445 
10446 	ret = vmap_page_range(vmap_start, vmap_end,
10447 			      start, pgprot_nx(PAGE_KERNEL));
10448 	if (ret < 0) {
10449 		free_vm_area(area);
10450 		return 0;
10451 	}
10452 
10453 	return (u64)vmap_start;
10454 }
10455 #else
map_pages(unsigned long start,unsigned long size)10456 static inline u64 map_pages(unsigned long start, unsigned long size)
10457 {
10458 	return 0;
10459 }
10460 #endif
10461 
10462 /**
10463  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
10464  * @name: The name of the trace array to be looked up/created.
10465  * @systems: A list of systems to create event directories for (NULL for all)
10466  *
10467  * Returns pointer to trace array with given name.
10468  * NULL, if it cannot be created.
10469  *
10470  * NOTE: This function increments the reference counter associated with the
10471  * trace array returned. This makes sure it cannot be freed while in use.
10472  * Use trace_array_put() once the trace array is no longer needed.
10473  * If the trace_array is to be freed, trace_array_destroy() needs to
10474  * be called after the trace_array_put(), or simply let user space delete
10475  * it from the tracefs instances directory. But until the
10476  * trace_array_put() is called, user space can not delete it.
10477  *
10478  */
trace_array_get_by_name(const char * name,const char * systems)10479 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
10480 {
10481 	struct trace_array *tr;
10482 
10483 	guard(mutex)(&event_mutex);
10484 	guard(mutex)(&trace_types_lock);
10485 
10486 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10487 		if (tr->name && strcmp(tr->name, name) == 0) {
10488 			tr->ref++;
10489 			return tr;
10490 		}
10491 	}
10492 
10493 	tr = trace_array_create_systems(name, systems, 0, 0);
10494 
10495 	if (IS_ERR(tr))
10496 		tr = NULL;
10497 	else
10498 		tr->ref++;
10499 
10500 	return tr;
10501 }
10502 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
10503 
__remove_instance(struct trace_array * tr)10504 static int __remove_instance(struct trace_array *tr)
10505 {
10506 	int i;
10507 
10508 	/* Reference counter for a newly created trace array = 1. */
10509 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10510 		return -EBUSY;
10511 
10512 	list_del(&tr->list);
10513 
10514 	/* Disable all the flags that were enabled coming in */
10515 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10516 		if ((1ULL << i) & ZEROED_TRACE_FLAGS)
10517 			set_tracer_flag(tr, 1ULL << i, 0);
10518 	}
10519 
10520 	if (printk_trace == tr)
10521 		update_printk_trace(&global_trace);
10522 
10523 	if (update_marker_trace(tr, 0))
10524 		synchronize_rcu();
10525 
10526 	tracing_set_nop(tr);
10527 	clear_ftrace_function_probes(tr);
10528 	event_trace_del_tracer(tr);
10529 	ftrace_clear_pids(tr);
10530 	ftrace_destroy_function_files(tr);
10531 	tracefs_remove(tr->dir);
10532 	free_percpu(tr->last_func_repeats);
10533 	free_trace_buffers(tr);
10534 	clear_tracing_err_log(tr);
10535 	free_tracers(tr);
10536 
10537 	if (tr->range_name) {
10538 		reserve_mem_release_by_name(tr->range_name);
10539 		kfree(tr->range_name);
10540 	}
10541 	if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
10542 		vfree((void *)tr->range_addr_start);
10543 
10544 	for (i = 0; i < tr->nr_topts; i++) {
10545 		kfree(tr->topts[i].topts);
10546 	}
10547 	kfree(tr->topts);
10548 
10549 	free_cpumask_var(tr->pipe_cpumask);
10550 	free_cpumask_var(tr->tracing_cpumask);
10551 	kfree_const(tr->system_names);
10552 	kfree(tr->name);
10553 	kfree(tr);
10554 
10555 	return 0;
10556 }
10557 
trace_array_destroy(struct trace_array * this_tr)10558 int trace_array_destroy(struct trace_array *this_tr)
10559 {
10560 	struct trace_array *tr;
10561 
10562 	if (!this_tr)
10563 		return -EINVAL;
10564 
10565 	guard(mutex)(&event_mutex);
10566 	guard(mutex)(&trace_types_lock);
10567 
10568 
10569 	/* Making sure trace array exists before destroying it. */
10570 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10571 		if (tr == this_tr)
10572 			return __remove_instance(tr);
10573 	}
10574 
10575 	return -ENODEV;
10576 }
10577 EXPORT_SYMBOL_GPL(trace_array_destroy);
10578 
instance_rmdir(const char * name)10579 static int instance_rmdir(const char *name)
10580 {
10581 	struct trace_array *tr;
10582 
10583 	guard(mutex)(&event_mutex);
10584 	guard(mutex)(&trace_types_lock);
10585 
10586 	tr = trace_array_find(name);
10587 	if (!tr)
10588 		return -ENODEV;
10589 
10590 	return __remove_instance(tr);
10591 }
10592 
create_trace_instances(struct dentry * d_tracer)10593 static __init void create_trace_instances(struct dentry *d_tracer)
10594 {
10595 	struct trace_array *tr;
10596 
10597 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10598 							 instance_mkdir,
10599 							 instance_rmdir);
10600 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10601 		return;
10602 
10603 	guard(mutex)(&event_mutex);
10604 	guard(mutex)(&trace_types_lock);
10605 
10606 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10607 		if (!tr->name)
10608 			continue;
10609 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10610 			     "Failed to create instance directory\n"))
10611 			return;
10612 	}
10613 }
10614 
10615 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)10616 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10617 {
10618 	int cpu;
10619 
10620 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10621 			tr, &show_traces_fops);
10622 
10623 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10624 			tr, &set_tracer_fops);
10625 
10626 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10627 			  tr, &tracing_cpumask_fops);
10628 
10629 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10630 			  tr, &tracing_iter_fops);
10631 
10632 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10633 			  tr, &tracing_fops);
10634 
10635 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10636 			  tr, &tracing_pipe_fops);
10637 
10638 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10639 			  tr, &tracing_entries_fops);
10640 
10641 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10642 			  tr, &tracing_total_entries_fops);
10643 
10644 	trace_create_file("free_buffer", 0200, d_tracer,
10645 			  tr, &tracing_free_buffer_fops);
10646 
10647 	trace_create_file("trace_marker", 0220, d_tracer,
10648 			  tr, &tracing_mark_fops);
10649 
10650 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10651 
10652 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10653 			  tr, &tracing_mark_raw_fops);
10654 
10655 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10656 			  &trace_clock_fops);
10657 
10658 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10659 			  tr, &rb_simple_fops);
10660 
10661 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10662 			  &trace_time_stamp_mode_fops);
10663 
10664 	tr->buffer_percent = 50;
10665 
10666 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10667 			tr, &buffer_percent_fops);
10668 
10669 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10670 			  tr, &buffer_subbuf_size_fops);
10671 
10672 	trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
10673 			 tr, &tracing_syscall_buf_fops);
10674 
10675 	create_trace_options_dir(tr);
10676 
10677 #ifdef CONFIG_TRACER_MAX_TRACE
10678 	trace_create_maxlat_file(tr, d_tracer);
10679 #endif
10680 
10681 	if (ftrace_create_function_files(tr, d_tracer))
10682 		MEM_FAIL(1, "Could not allocate function filter files");
10683 
10684 	if (tr->range_addr_start) {
10685 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10686 				  tr, &last_boot_fops);
10687 #ifdef CONFIG_TRACER_SNAPSHOT
10688 	} else {
10689 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10690 				  tr, &snapshot_fops);
10691 #endif
10692 	}
10693 
10694 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10695 			  tr, &tracing_err_log_fops);
10696 
10697 	for_each_tracing_cpu(cpu)
10698 		tracing_init_tracefs_percpu(tr, cpu);
10699 
10700 	ftrace_init_tracefs(tr, d_tracer);
10701 }
10702 
10703 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10704 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10705 {
10706 	struct vfsmount *mnt;
10707 	struct file_system_type *type;
10708 	struct fs_context *fc;
10709 	int ret;
10710 
10711 	/*
10712 	 * To maintain backward compatibility for tools that mount
10713 	 * debugfs to get to the tracing facility, tracefs is automatically
10714 	 * mounted to the debugfs/tracing directory.
10715 	 */
10716 	type = get_fs_type("tracefs");
10717 	if (!type)
10718 		return NULL;
10719 
10720 	fc = fs_context_for_submount(type, mntpt);
10721 	put_filesystem(type);
10722 	if (IS_ERR(fc))
10723 		return ERR_CAST(fc);
10724 
10725 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10726 
10727 	ret = vfs_parse_fs_string(fc, "source", "tracefs");
10728 	if (!ret)
10729 		mnt = fc_mount(fc);
10730 	else
10731 		mnt = ERR_PTR(ret);
10732 
10733 	put_fs_context(fc);
10734 	return mnt;
10735 }
10736 #endif
10737 
10738 /**
10739  * tracing_init_dentry - initialize top level trace array
10740  *
10741  * This is called when creating files or directories in the tracing
10742  * directory. It is called via fs_initcall() by any of the boot up code
10743  * and expects to return the dentry of the top level tracing directory.
10744  */
tracing_init_dentry(void)10745 int tracing_init_dentry(void)
10746 {
10747 	struct trace_array *tr = &global_trace;
10748 
10749 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10750 		pr_warn("Tracing disabled due to lockdown\n");
10751 		return -EPERM;
10752 	}
10753 
10754 	/* The top level trace array uses  NULL as parent */
10755 	if (tr->dir)
10756 		return 0;
10757 
10758 	if (WARN_ON(!tracefs_initialized()))
10759 		return -ENODEV;
10760 
10761 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10762 	/*
10763 	 * As there may still be users that expect the tracing
10764 	 * files to exist in debugfs/tracing, we must automount
10765 	 * the tracefs file system there, so older tools still
10766 	 * work with the newer kernel.
10767 	 */
10768 	tr->dir = debugfs_create_automount("tracing", NULL,
10769 					   trace_automount, NULL);
10770 #endif
10771 
10772 	return 0;
10773 }
10774 
10775 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10776 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10777 
10778 static struct workqueue_struct *eval_map_wq __initdata;
10779 static struct work_struct eval_map_work __initdata;
10780 static struct work_struct tracerfs_init_work __initdata;
10781 
eval_map_work_func(struct work_struct * work)10782 static void __init eval_map_work_func(struct work_struct *work)
10783 {
10784 	int len;
10785 
10786 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10787 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10788 }
10789 
trace_eval_init(void)10790 static int __init trace_eval_init(void)
10791 {
10792 	INIT_WORK(&eval_map_work, eval_map_work_func);
10793 
10794 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10795 	if (!eval_map_wq) {
10796 		pr_err("Unable to allocate eval_map_wq\n");
10797 		/* Do work here */
10798 		eval_map_work_func(&eval_map_work);
10799 		return -ENOMEM;
10800 	}
10801 
10802 	queue_work(eval_map_wq, &eval_map_work);
10803 	return 0;
10804 }
10805 
10806 subsys_initcall(trace_eval_init);
10807 
trace_eval_sync(void)10808 static int __init trace_eval_sync(void)
10809 {
10810 	/* Make sure the eval map updates are finished */
10811 	if (eval_map_wq)
10812 		destroy_workqueue(eval_map_wq);
10813 	return 0;
10814 }
10815 
10816 late_initcall_sync(trace_eval_sync);
10817 
10818 
10819 #ifdef CONFIG_MODULES
10820 
module_exists(const char * module)10821 bool module_exists(const char *module)
10822 {
10823 	/* All modules have the symbol __this_module */
10824 	static const char this_mod[] = "__this_module";
10825 	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10826 	unsigned long val;
10827 	int n;
10828 
10829 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10830 
10831 	if (n > sizeof(modname) - 1)
10832 		return false;
10833 
10834 	val = module_kallsyms_lookup_name(modname);
10835 	return val != 0;
10836 }
10837 
trace_module_add_evals(struct module * mod)10838 static void trace_module_add_evals(struct module *mod)
10839 {
10840 	/*
10841 	 * Modules with bad taint do not have events created, do
10842 	 * not bother with enums either.
10843 	 */
10844 	if (trace_module_has_bad_taint(mod))
10845 		return;
10846 
10847 	/* Even if no trace_evals, this need to sanitize field types. */
10848 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10849 }
10850 
10851 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10852 static void trace_module_remove_evals(struct module *mod)
10853 {
10854 	union trace_eval_map_item *map;
10855 	union trace_eval_map_item **last = &trace_eval_maps;
10856 
10857 	if (!mod->num_trace_evals)
10858 		return;
10859 
10860 	guard(mutex)(&trace_eval_mutex);
10861 
10862 	map = trace_eval_maps;
10863 
10864 	while (map) {
10865 		if (map->head.mod == mod)
10866 			break;
10867 		map = trace_eval_jmp_to_tail(map);
10868 		last = &map->tail.next;
10869 		map = map->tail.next;
10870 	}
10871 	if (!map)
10872 		return;
10873 
10874 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10875 	kfree(map);
10876 }
10877 #else
trace_module_remove_evals(struct module * mod)10878 static inline void trace_module_remove_evals(struct module *mod) { }
10879 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10880 
trace_module_record(struct module * mod,bool add)10881 static void trace_module_record(struct module *mod, bool add)
10882 {
10883 	struct trace_array *tr;
10884 	unsigned long flags;
10885 
10886 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10887 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10888 		/* Update any persistent trace array that has already been started */
10889 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10890 			guard(mutex)(&scratch_mutex);
10891 			save_mod(mod, tr);
10892 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10893 			/* Update delta if the module loaded in previous boot */
10894 			make_mod_delta(mod, tr);
10895 		}
10896 	}
10897 }
10898 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10899 static int trace_module_notify(struct notifier_block *self,
10900 			       unsigned long val, void *data)
10901 {
10902 	struct module *mod = data;
10903 
10904 	switch (val) {
10905 	case MODULE_STATE_COMING:
10906 		trace_module_add_evals(mod);
10907 		trace_module_record(mod, true);
10908 		break;
10909 	case MODULE_STATE_GOING:
10910 		trace_module_remove_evals(mod);
10911 		trace_module_record(mod, false);
10912 		break;
10913 	}
10914 
10915 	return NOTIFY_OK;
10916 }
10917 
10918 static struct notifier_block trace_module_nb = {
10919 	.notifier_call = trace_module_notify,
10920 	.priority = 0,
10921 };
10922 #endif /* CONFIG_MODULES */
10923 
tracer_init_tracefs_work_func(struct work_struct * work)10924 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10925 {
10926 
10927 	event_trace_init();
10928 
10929 	init_tracer_tracefs(&global_trace, NULL);
10930 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10931 
10932 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10933 			&global_trace, &tracing_thresh_fops);
10934 
10935 	trace_create_file("README", TRACE_MODE_READ, NULL,
10936 			NULL, &tracing_readme_fops);
10937 
10938 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10939 			NULL, &tracing_saved_cmdlines_fops);
10940 
10941 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10942 			  NULL, &tracing_saved_cmdlines_size_fops);
10943 
10944 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10945 			NULL, &tracing_saved_tgids_fops);
10946 
10947 	trace_create_eval_file(NULL);
10948 
10949 #ifdef CONFIG_MODULES
10950 	register_module_notifier(&trace_module_nb);
10951 #endif
10952 
10953 #ifdef CONFIG_DYNAMIC_FTRACE
10954 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10955 			NULL, &tracing_dyn_info_fops);
10956 #endif
10957 
10958 	create_trace_instances(NULL);
10959 
10960 	update_tracer_options();
10961 }
10962 
tracer_init_tracefs(void)10963 static __init int tracer_init_tracefs(void)
10964 {
10965 	int ret;
10966 
10967 	trace_access_lock_init();
10968 
10969 	ret = tracing_init_dentry();
10970 	if (ret)
10971 		return 0;
10972 
10973 	if (eval_map_wq) {
10974 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10975 		queue_work(eval_map_wq, &tracerfs_init_work);
10976 	} else {
10977 		tracer_init_tracefs_work_func(NULL);
10978 	}
10979 
10980 	if (rv_init_interface())
10981 		pr_err("RV: Error while creating the RV interface\n");
10982 
10983 	return 0;
10984 }
10985 
10986 fs_initcall(tracer_init_tracefs);
10987 
10988 static int trace_die_panic_handler(struct notifier_block *self,
10989 				unsigned long ev, void *unused);
10990 
10991 static struct notifier_block trace_panic_notifier = {
10992 	.notifier_call = trace_die_panic_handler,
10993 	.priority = INT_MAX - 1,
10994 };
10995 
10996 static struct notifier_block trace_die_notifier = {
10997 	.notifier_call = trace_die_panic_handler,
10998 	.priority = INT_MAX - 1,
10999 };
11000 
11001 /*
11002  * The idea is to execute the following die/panic callback early, in order
11003  * to avoid showing irrelevant information in the trace (like other panic
11004  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
11005  * warnings get disabled (to prevent potential log flooding).
11006  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)11007 static int trace_die_panic_handler(struct notifier_block *self,
11008 				unsigned long ev, void *unused)
11009 {
11010 	if (!ftrace_dump_on_oops_enabled())
11011 		return NOTIFY_DONE;
11012 
11013 	/* The die notifier requires DIE_OOPS to trigger */
11014 	if (self == &trace_die_notifier && ev != DIE_OOPS)
11015 		return NOTIFY_DONE;
11016 
11017 	ftrace_dump(DUMP_PARAM);
11018 
11019 	return NOTIFY_DONE;
11020 }
11021 
11022 /*
11023  * printk is set to max of 1024, we really don't need it that big.
11024  * Nothing should be printing 1000 characters anyway.
11025  */
11026 #define TRACE_MAX_PRINT		1000
11027 
11028 /*
11029  * Define here KERN_TRACE so that we have one place to modify
11030  * it if we decide to change what log level the ftrace dump
11031  * should be at.
11032  */
11033 #define KERN_TRACE		KERN_EMERG
11034 
11035 void
trace_printk_seq(struct trace_seq * s)11036 trace_printk_seq(struct trace_seq *s)
11037 {
11038 	/* Probably should print a warning here. */
11039 	if (s->seq.len >= TRACE_MAX_PRINT)
11040 		s->seq.len = TRACE_MAX_PRINT;
11041 
11042 	/*
11043 	 * More paranoid code. Although the buffer size is set to
11044 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
11045 	 * an extra layer of protection.
11046 	 */
11047 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
11048 		s->seq.len = s->seq.size - 1;
11049 
11050 	/* should be zero ended, but we are paranoid. */
11051 	s->buffer[s->seq.len] = 0;
11052 
11053 	printk(KERN_TRACE "%s", s->buffer);
11054 
11055 	trace_seq_init(s);
11056 }
11057 
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)11058 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
11059 {
11060 	iter->tr = tr;
11061 	iter->trace = iter->tr->current_trace;
11062 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
11063 	iter->array_buffer = &tr->array_buffer;
11064 
11065 	if (iter->trace && iter->trace->open)
11066 		iter->trace->open(iter);
11067 
11068 	/* Annotate start of buffers if we had overruns */
11069 	if (ring_buffer_overruns(iter->array_buffer->buffer))
11070 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
11071 
11072 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
11073 	if (trace_clocks[iter->tr->clock_id].in_ns)
11074 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
11075 
11076 	/* Can not use kmalloc for iter.temp and iter.fmt */
11077 	iter->temp = static_temp_buf;
11078 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
11079 	iter->fmt = static_fmt_buf;
11080 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
11081 }
11082 
trace_init_global_iter(struct trace_iterator * iter)11083 void trace_init_global_iter(struct trace_iterator *iter)
11084 {
11085 	trace_init_iter(iter, &global_trace);
11086 }
11087 
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)11088 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
11089 {
11090 	/* use static because iter can be a bit big for the stack */
11091 	static struct trace_iterator iter;
11092 	unsigned int old_userobj;
11093 	unsigned long flags;
11094 	int cnt = 0;
11095 
11096 	/*
11097 	 * Always turn off tracing when we dump.
11098 	 * We don't need to show trace output of what happens
11099 	 * between multiple crashes.
11100 	 *
11101 	 * If the user does a sysrq-z, then they can re-enable
11102 	 * tracing with echo 1 > tracing_on.
11103 	 */
11104 	tracer_tracing_off(tr);
11105 
11106 	local_irq_save(flags);
11107 
11108 	/* Simulate the iterator */
11109 	trace_init_iter(&iter, tr);
11110 
11111 	/* While dumping, do not allow the buffer to be enable */
11112 	tracer_tracing_disable(tr);
11113 
11114 	old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
11115 
11116 	/* don't look at user memory in panic mode */
11117 	tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
11118 
11119 	if (dump_mode == DUMP_ORIG)
11120 		iter.cpu_file = raw_smp_processor_id();
11121 	else
11122 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
11123 
11124 	if (tr == &global_trace)
11125 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
11126 	else
11127 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
11128 
11129 	/* Did function tracer already get disabled? */
11130 	if (ftrace_is_dead()) {
11131 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
11132 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
11133 	}
11134 
11135 	/*
11136 	 * We need to stop all tracing on all CPUS to read
11137 	 * the next buffer. This is a bit expensive, but is
11138 	 * not done often. We fill all what we can read,
11139 	 * and then release the locks again.
11140 	 */
11141 
11142 	while (!trace_empty(&iter)) {
11143 
11144 		if (!cnt)
11145 			printk(KERN_TRACE "---------------------------------\n");
11146 
11147 		cnt++;
11148 
11149 		trace_iterator_reset(&iter);
11150 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
11151 
11152 		if (trace_find_next_entry_inc(&iter) != NULL) {
11153 			int ret;
11154 
11155 			ret = print_trace_line(&iter);
11156 			if (ret != TRACE_TYPE_NO_CONSUME)
11157 				trace_consume(&iter);
11158 
11159 			trace_printk_seq(&iter.seq);
11160 		}
11161 		touch_nmi_watchdog();
11162 	}
11163 
11164 	if (!cnt)
11165 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
11166 	else
11167 		printk(KERN_TRACE "---------------------------------\n");
11168 
11169 	tr->trace_flags |= old_userobj;
11170 
11171 	tracer_tracing_enable(tr);
11172 	local_irq_restore(flags);
11173 }
11174 
ftrace_dump_by_param(void)11175 static void ftrace_dump_by_param(void)
11176 {
11177 	bool first_param = true;
11178 	char dump_param[MAX_TRACER_SIZE];
11179 	char *buf, *token, *inst_name;
11180 	struct trace_array *tr;
11181 
11182 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
11183 	buf = dump_param;
11184 
11185 	while ((token = strsep(&buf, ",")) != NULL) {
11186 		if (first_param) {
11187 			first_param = false;
11188 			if (!strcmp("0", token))
11189 				continue;
11190 			else if (!strcmp("1", token)) {
11191 				ftrace_dump_one(&global_trace, DUMP_ALL);
11192 				continue;
11193 			}
11194 			else if (!strcmp("2", token) ||
11195 			  !strcmp("orig_cpu", token)) {
11196 				ftrace_dump_one(&global_trace, DUMP_ORIG);
11197 				continue;
11198 			}
11199 		}
11200 
11201 		inst_name = strsep(&token, "=");
11202 		tr = trace_array_find(inst_name);
11203 		if (!tr) {
11204 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
11205 			continue;
11206 		}
11207 
11208 		if (token && (!strcmp("2", token) ||
11209 			  !strcmp("orig_cpu", token)))
11210 			ftrace_dump_one(tr, DUMP_ORIG);
11211 		else
11212 			ftrace_dump_one(tr, DUMP_ALL);
11213 	}
11214 }
11215 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)11216 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
11217 {
11218 	static atomic_t dump_running;
11219 
11220 	/* Only allow one dump user at a time. */
11221 	if (atomic_inc_return(&dump_running) != 1) {
11222 		atomic_dec(&dump_running);
11223 		return;
11224 	}
11225 
11226 	switch (oops_dump_mode) {
11227 	case DUMP_ALL:
11228 		ftrace_dump_one(&global_trace, DUMP_ALL);
11229 		break;
11230 	case DUMP_ORIG:
11231 		ftrace_dump_one(&global_trace, DUMP_ORIG);
11232 		break;
11233 	case DUMP_PARAM:
11234 		ftrace_dump_by_param();
11235 		break;
11236 	case DUMP_NONE:
11237 		break;
11238 	default:
11239 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
11240 		ftrace_dump_one(&global_trace, DUMP_ALL);
11241 	}
11242 
11243 	atomic_dec(&dump_running);
11244 }
11245 EXPORT_SYMBOL_GPL(ftrace_dump);
11246 
11247 #define WRITE_BUFSIZE  4096
11248 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))11249 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
11250 				size_t count, loff_t *ppos,
11251 				int (*createfn)(const char *))
11252 {
11253 	char *kbuf __free(kfree) = NULL;
11254 	char *buf, *tmp;
11255 	int ret = 0;
11256 	size_t done = 0;
11257 	size_t size;
11258 
11259 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
11260 	if (!kbuf)
11261 		return -ENOMEM;
11262 
11263 	while (done < count) {
11264 		size = count - done;
11265 
11266 		if (size >= WRITE_BUFSIZE)
11267 			size = WRITE_BUFSIZE - 1;
11268 
11269 		if (copy_from_user(kbuf, buffer + done, size))
11270 			return -EFAULT;
11271 
11272 		kbuf[size] = '\0';
11273 		buf = kbuf;
11274 		do {
11275 			tmp = strchr(buf, '\n');
11276 			if (tmp) {
11277 				*tmp = '\0';
11278 				size = tmp - buf + 1;
11279 			} else {
11280 				size = strlen(buf);
11281 				if (done + size < count) {
11282 					if (buf != kbuf)
11283 						break;
11284 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
11285 					pr_warn("Line length is too long: Should be less than %d\n",
11286 						WRITE_BUFSIZE - 2);
11287 					return -EINVAL;
11288 				}
11289 			}
11290 			done += size;
11291 
11292 			/* Remove comments */
11293 			tmp = strchr(buf, '#');
11294 
11295 			if (tmp)
11296 				*tmp = '\0';
11297 
11298 			ret = createfn(buf);
11299 			if (ret)
11300 				return ret;
11301 			buf += size;
11302 
11303 		} while (done < count);
11304 	}
11305 	return done;
11306 }
11307 
11308 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)11309 __init static bool tr_needs_alloc_snapshot(const char *name)
11310 {
11311 	char *test;
11312 	int len = strlen(name);
11313 	bool ret;
11314 
11315 	if (!boot_snapshot_index)
11316 		return false;
11317 
11318 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
11319 	    boot_snapshot_info[len] == '\t')
11320 		return true;
11321 
11322 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
11323 	if (!test)
11324 		return false;
11325 
11326 	sprintf(test, "\t%s\t", name);
11327 	ret = strstr(boot_snapshot_info, test) == NULL;
11328 	kfree(test);
11329 	return ret;
11330 }
11331 
do_allocate_snapshot(const char * name)11332 __init static void do_allocate_snapshot(const char *name)
11333 {
11334 	if (!tr_needs_alloc_snapshot(name))
11335 		return;
11336 
11337 	/*
11338 	 * When allocate_snapshot is set, the next call to
11339 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
11340 	 * will allocate the snapshot buffer. That will also clear
11341 	 * this flag.
11342 	 */
11343 	allocate_snapshot = true;
11344 }
11345 #else
do_allocate_snapshot(const char * name)11346 static inline void do_allocate_snapshot(const char *name) { }
11347 #endif
11348 
backup_instance_area(const char * backup,unsigned long * addr,phys_addr_t * size)11349 __init static int backup_instance_area(const char *backup,
11350 				       unsigned long *addr, phys_addr_t *size)
11351 {
11352 	struct trace_array *backup_tr;
11353 	void *allocated_vaddr = NULL;
11354 
11355 	backup_tr = trace_array_get_by_name(backup, NULL);
11356 	if (!backup_tr) {
11357 		pr_warn("Tracing: Instance %s is not found.\n", backup);
11358 		return -ENOENT;
11359 	}
11360 
11361 	if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
11362 		pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
11363 		trace_array_put(backup_tr);
11364 		return -EINVAL;
11365 	}
11366 
11367 	*size = backup_tr->range_addr_size;
11368 
11369 	allocated_vaddr = vzalloc(*size);
11370 	if (!allocated_vaddr) {
11371 		pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
11372 			backup, (unsigned long)*size);
11373 		trace_array_put(backup_tr);
11374 		return -ENOMEM;
11375 	}
11376 
11377 	memcpy(allocated_vaddr,
11378 		(void *)backup_tr->range_addr_start, (size_t)*size);
11379 	*addr = (unsigned long)allocated_vaddr;
11380 
11381 	trace_array_put(backup_tr);
11382 	return 0;
11383 }
11384 
enable_instances(void)11385 __init static void enable_instances(void)
11386 {
11387 	struct trace_array *tr;
11388 	bool memmap_area = false;
11389 	char *curr_str;
11390 	char *name;
11391 	char *str;
11392 	char *tok;
11393 
11394 	/* A tab is always appended */
11395 	boot_instance_info[boot_instance_index - 1] = '\0';
11396 	str = boot_instance_info;
11397 
11398 	while ((curr_str = strsep(&str, "\t"))) {
11399 		phys_addr_t start = 0;
11400 		phys_addr_t size = 0;
11401 		unsigned long addr = 0;
11402 		bool traceprintk = false;
11403 		bool traceoff = false;
11404 		char *flag_delim;
11405 		char *addr_delim;
11406 		char *rname __free(kfree) = NULL;
11407 		char *backup;
11408 
11409 		tok = strsep(&curr_str, ",");
11410 
11411 		name = strsep(&tok, "=");
11412 		backup = tok;
11413 
11414 		flag_delim = strchr(name, '^');
11415 		addr_delim = strchr(name, '@');
11416 
11417 		if (addr_delim)
11418 			*addr_delim++ = '\0';
11419 
11420 		if (flag_delim)
11421 			*flag_delim++ = '\0';
11422 
11423 		if (backup) {
11424 			if (backup_instance_area(backup, &addr, &size) < 0)
11425 				continue;
11426 		}
11427 
11428 		if (flag_delim) {
11429 			char *flag;
11430 
11431 			while ((flag = strsep(&flag_delim, "^"))) {
11432 				if (strcmp(flag, "traceoff") == 0) {
11433 					traceoff = true;
11434 				} else if ((strcmp(flag, "printk") == 0) ||
11435 					   (strcmp(flag, "traceprintk") == 0) ||
11436 					   (strcmp(flag, "trace_printk") == 0)) {
11437 					traceprintk = true;
11438 				} else {
11439 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
11440 						flag, name);
11441 				}
11442 			}
11443 		}
11444 
11445 		tok = addr_delim;
11446 		if (tok && isdigit(*tok)) {
11447 			start = memparse(tok, &tok);
11448 			if (!start) {
11449 				pr_warn("Tracing: Invalid boot instance address for %s\n",
11450 					name);
11451 				continue;
11452 			}
11453 			if (*tok != ':') {
11454 				pr_warn("Tracing: No size specified for instance %s\n", name);
11455 				continue;
11456 			}
11457 			tok++;
11458 			size = memparse(tok, &tok);
11459 			if (!size) {
11460 				pr_warn("Tracing: Invalid boot instance size for %s\n",
11461 					name);
11462 				continue;
11463 			}
11464 			memmap_area = true;
11465 		} else if (tok) {
11466 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
11467 				start = 0;
11468 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
11469 				continue;
11470 			}
11471 			rname = kstrdup(tok, GFP_KERNEL);
11472 		}
11473 
11474 		if (start) {
11475 			/* Start and size must be page aligned */
11476 			if (start & ~PAGE_MASK) {
11477 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
11478 				continue;
11479 			}
11480 			if (size & ~PAGE_MASK) {
11481 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
11482 				continue;
11483 			}
11484 
11485 			if (memmap_area)
11486 				addr = map_pages(start, size);
11487 			else
11488 				addr = (unsigned long)phys_to_virt(start);
11489 			if (addr) {
11490 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
11491 					name, &start, (unsigned long)size);
11492 			} else {
11493 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
11494 				continue;
11495 			}
11496 		} else {
11497 			/* Only non mapped buffers have snapshot buffers */
11498 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
11499 				do_allocate_snapshot(name);
11500 		}
11501 
11502 		tr = trace_array_create_systems(name, NULL, addr, size);
11503 		if (IS_ERR(tr)) {
11504 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
11505 			continue;
11506 		}
11507 
11508 		if (traceoff)
11509 			tracer_tracing_off(tr);
11510 
11511 		if (traceprintk)
11512 			update_printk_trace(tr);
11513 
11514 		/*
11515 		 * memmap'd buffers can not be freed.
11516 		 */
11517 		if (memmap_area) {
11518 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
11519 			tr->ref++;
11520 		}
11521 
11522 		/*
11523 		 * Backup buffers can be freed but need vfree().
11524 		 */
11525 		if (backup)
11526 			tr->flags |= TRACE_ARRAY_FL_VMALLOC;
11527 
11528 		if (start || backup) {
11529 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
11530 			tr->range_name = no_free_ptr(rname);
11531 		}
11532 
11533 		while ((tok = strsep(&curr_str, ","))) {
11534 			early_enable_events(tr, tok, true);
11535 		}
11536 	}
11537 }
11538 
tracer_alloc_buffers(void)11539 __init static int tracer_alloc_buffers(void)
11540 {
11541 	int ring_buf_size;
11542 	int ret = -ENOMEM;
11543 
11544 
11545 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
11546 		pr_warn("Tracing disabled due to lockdown\n");
11547 		return -EPERM;
11548 	}
11549 
11550 	/*
11551 	 * Make sure we don't accidentally add more trace options
11552 	 * than we have bits for.
11553 	 */
11554 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
11555 
11556 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
11557 		return -ENOMEM;
11558 
11559 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
11560 		goto out_free_buffer_mask;
11561 
11562 	/* Only allocate trace_printk buffers if a trace_printk exists */
11563 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11564 		/* Must be called before global_trace.buffer is allocated */
11565 		trace_printk_init_buffers();
11566 
11567 	/* To save memory, keep the ring buffer size to its minimum */
11568 	if (global_trace.ring_buffer_expanded)
11569 		ring_buf_size = trace_buf_size;
11570 	else
11571 		ring_buf_size = 1;
11572 
11573 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11574 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11575 
11576 	raw_spin_lock_init(&global_trace.start_lock);
11577 
11578 	/*
11579 	 * The prepare callbacks allocates some memory for the ring buffer. We
11580 	 * don't free the buffer if the CPU goes down. If we were to free
11581 	 * the buffer, then the user would lose any trace that was in the
11582 	 * buffer. The memory will be removed once the "instance" is removed.
11583 	 */
11584 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11585 				      "trace/RB:prepare", trace_rb_cpu_prepare,
11586 				      NULL);
11587 	if (ret < 0)
11588 		goto out_free_cpumask;
11589 	/* Used for event triggers */
11590 	ret = -ENOMEM;
11591 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11592 	if (!temp_buffer)
11593 		goto out_rm_hp_state;
11594 
11595 	if (trace_create_savedcmd() < 0)
11596 		goto out_free_temp_buffer;
11597 
11598 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11599 		goto out_free_savedcmd;
11600 
11601 	/* TODO: make the number of buffers hot pluggable with CPUS */
11602 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11603 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11604 		goto out_free_pipe_cpumask;
11605 	}
11606 	if (global_trace.buffer_disabled)
11607 		tracing_off();
11608 
11609 	if (trace_boot_clock) {
11610 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
11611 		if (ret < 0)
11612 			pr_warn("Trace clock %s not defined, going back to default\n",
11613 				trace_boot_clock);
11614 	}
11615 
11616 	/*
11617 	 * register_tracer() might reference current_trace, so it
11618 	 * needs to be set before we register anything. This is
11619 	 * just a bootstrap of current_trace anyway.
11620 	 */
11621 	global_trace.current_trace = &nop_trace;
11622 	global_trace.current_trace_flags = nop_trace.flags;
11623 
11624 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11625 #ifdef CONFIG_TRACER_MAX_TRACE
11626 	spin_lock_init(&global_trace.snapshot_trigger_lock);
11627 #endif
11628 	ftrace_init_global_array_ops(&global_trace);
11629 
11630 #ifdef CONFIG_MODULES
11631 	INIT_LIST_HEAD(&global_trace.mod_events);
11632 #endif
11633 
11634 	init_trace_flags_index(&global_trace);
11635 
11636 	INIT_LIST_HEAD(&global_trace.tracers);
11637 
11638 	/* All seems OK, enable tracing */
11639 	tracing_disabled = 0;
11640 
11641 	atomic_notifier_chain_register(&panic_notifier_list,
11642 				       &trace_panic_notifier);
11643 
11644 	register_die_notifier(&trace_die_notifier);
11645 
11646 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11647 
11648 	global_trace.syscall_buf_sz = syscall_buf_size;
11649 
11650 	INIT_LIST_HEAD(&global_trace.systems);
11651 	INIT_LIST_HEAD(&global_trace.events);
11652 	INIT_LIST_HEAD(&global_trace.hist_vars);
11653 	INIT_LIST_HEAD(&global_trace.err_log);
11654 	list_add(&global_trace.marker_list, &marker_copies);
11655 	list_add(&global_trace.list, &ftrace_trace_arrays);
11656 
11657 	register_tracer(&nop_trace);
11658 
11659 	/* Function tracing may start here (via kernel command line) */
11660 	init_function_trace();
11661 
11662 	apply_trace_boot_options();
11663 
11664 	register_snapshot_cmd();
11665 
11666 	return 0;
11667 
11668 out_free_pipe_cpumask:
11669 	free_cpumask_var(global_trace.pipe_cpumask);
11670 out_free_savedcmd:
11671 	trace_free_saved_cmdlines_buffer();
11672 out_free_temp_buffer:
11673 	ring_buffer_free(temp_buffer);
11674 out_rm_hp_state:
11675 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11676 out_free_cpumask:
11677 	free_cpumask_var(global_trace.tracing_cpumask);
11678 out_free_buffer_mask:
11679 	free_cpumask_var(tracing_buffer_mask);
11680 	return ret;
11681 }
11682 
11683 #ifdef CONFIG_FUNCTION_TRACER
11684 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11685 struct trace_array *trace_get_global_array(void)
11686 {
11687 	return &global_trace;
11688 }
11689 #endif
11690 
ftrace_boot_snapshot(void)11691 void __init ftrace_boot_snapshot(void)
11692 {
11693 #ifdef CONFIG_TRACER_MAX_TRACE
11694 	struct trace_array *tr;
11695 
11696 	if (!snapshot_at_boot)
11697 		return;
11698 
11699 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11700 		if (!tr->allocated_snapshot)
11701 			continue;
11702 
11703 		tracing_snapshot_instance(tr);
11704 		trace_array_puts(tr, "** Boot snapshot taken **\n");
11705 	}
11706 #endif
11707 }
11708 
early_trace_init(void)11709 void __init early_trace_init(void)
11710 {
11711 	if (tracepoint_printk) {
11712 		tracepoint_print_iter =
11713 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11714 		if (MEM_FAIL(!tracepoint_print_iter,
11715 			     "Failed to allocate trace iterator\n"))
11716 			tracepoint_printk = 0;
11717 		else
11718 			static_key_enable(&tracepoint_printk_key.key);
11719 	}
11720 	tracer_alloc_buffers();
11721 
11722 	init_events();
11723 }
11724 
trace_init(void)11725 void __init trace_init(void)
11726 {
11727 	trace_event_init();
11728 
11729 	if (boot_instance_index)
11730 		enable_instances();
11731 }
11732 
clear_boot_tracer(void)11733 __init static void clear_boot_tracer(void)
11734 {
11735 	/*
11736 	 * The default tracer at boot buffer is an init section.
11737 	 * This function is called in lateinit. If we did not
11738 	 * find the boot tracer, then clear it out, to prevent
11739 	 * later registration from accessing the buffer that is
11740 	 * about to be freed.
11741 	 */
11742 	if (!default_bootup_tracer)
11743 		return;
11744 
11745 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11746 	       default_bootup_tracer);
11747 	default_bootup_tracer = NULL;
11748 }
11749 
11750 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11751 __init static void tracing_set_default_clock(void)
11752 {
11753 	/* sched_clock_stable() is determined in late_initcall */
11754 	if (!trace_boot_clock && !sched_clock_stable()) {
11755 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11756 			pr_warn("Can not set tracing clock due to lockdown\n");
11757 			return;
11758 		}
11759 
11760 		printk(KERN_WARNING
11761 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11762 		       "If you want to keep using the local clock, then add:\n"
11763 		       "  \"trace_clock=local\"\n"
11764 		       "on the kernel command line\n");
11765 		tracing_set_clock(&global_trace, "global");
11766 	}
11767 }
11768 #else
tracing_set_default_clock(void)11769 static inline void tracing_set_default_clock(void) { }
11770 #endif
11771 
late_trace_init(void)11772 __init static int late_trace_init(void)
11773 {
11774 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11775 		static_key_disable(&tracepoint_printk_key.key);
11776 		tracepoint_printk = 0;
11777 	}
11778 
11779 	if (traceoff_after_boot)
11780 		tracing_off();
11781 
11782 	tracing_set_default_clock();
11783 	clear_boot_tracer();
11784 	return 0;
11785 }
11786 
11787 late_initcall_sync(late_trace_init);
11788