xref: /linux/kernel/trace/trace.c (revision 1b5d4661c7ee7937d062a00bd336761a237870b4)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 #include <linux/fs_context.h>
55 
56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57 
58 #include "trace.h"
59 #include "trace_output.h"
60 
61 #ifdef CONFIG_FTRACE_STARTUP_TEST
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
disable_tracing_selftest(const char * reason)77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #else
85 #define tracing_selftest_running	0
86 #define tracing_selftest_disabled	0
87 #endif
88 
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95 
96 /* For tracers that don't implement custom flags */
97 static struct tracer_opt dummy_tracer_opt[] = {
98 	{ }
99 };
100 
101 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)102 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 {
104 	return 0;
105 }
106 
107 /*
108  * To prevent the comm cache from being overwritten when no
109  * tracing is active, only save the comm when a trace event
110  * occurred.
111  */
112 DEFINE_PER_CPU(bool, trace_taskinfo_save);
113 
114 /*
115  * Kill all tracing for good (never come back).
116  * It is initialized to 1 but will turn to zero if the initialization
117  * of the tracer is successful. But that is the only place that sets
118  * this back to zero.
119  */
120 static int tracing_disabled = 1;
121 
122 cpumask_var_t __read_mostly	tracing_buffer_mask;
123 
124 #define MAX_TRACER_SIZE		100
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  * Set instance name if you want to dump the specific trace instance
140  * Multiple instance dump is also supported, and instances are seperated
141  * by commas.
142  */
143 /* Set to string format zero to disable by default */
144 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
145 
146 /* When set, tracing will stop when a WARN*() is hit */
147 static int __disable_trace_on_warning;
148 
149 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
150 			     void *buffer, size_t *lenp, loff_t *ppos);
151 static const struct ctl_table trace_sysctl_table[] = {
152 	{
153 		.procname	= "ftrace_dump_on_oops",
154 		.data		= &ftrace_dump_on_oops,
155 		.maxlen		= MAX_TRACER_SIZE,
156 		.mode		= 0644,
157 		.proc_handler	= proc_dostring,
158 	},
159 	{
160 		.procname	= "traceoff_on_warning",
161 		.data		= &__disable_trace_on_warning,
162 		.maxlen		= sizeof(__disable_trace_on_warning),
163 		.mode		= 0644,
164 		.proc_handler	= proc_dointvec,
165 	},
166 	{
167 		.procname	= "tracepoint_printk",
168 		.data		= &tracepoint_printk,
169 		.maxlen		= sizeof(tracepoint_printk),
170 		.mode		= 0644,
171 		.proc_handler	= tracepoint_printk_sysctl,
172 	},
173 };
174 
init_trace_sysctls(void)175 static int __init init_trace_sysctls(void)
176 {
177 	register_sysctl_init("kernel", trace_sysctl_table);
178 	return 0;
179 }
180 subsys_initcall(init_trace_sysctls);
181 
182 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
183 /* Map of enums to their values, for "eval_map" file */
184 struct trace_eval_map_head {
185 	struct module			*mod;
186 	unsigned long			length;
187 };
188 
189 union trace_eval_map_item;
190 
191 struct trace_eval_map_tail {
192 	/*
193 	 * "end" is first and points to NULL as it must be different
194 	 * than "mod" or "eval_string"
195 	 */
196 	union trace_eval_map_item	*next;
197 	const char			*end;	/* points to NULL */
198 };
199 
200 static DEFINE_MUTEX(trace_eval_mutex);
201 
202 /*
203  * The trace_eval_maps are saved in an array with two extra elements,
204  * one at the beginning, and one at the end. The beginning item contains
205  * the count of the saved maps (head.length), and the module they
206  * belong to if not built in (head.mod). The ending item contains a
207  * pointer to the next array of saved eval_map items.
208  */
209 union trace_eval_map_item {
210 	struct trace_eval_map		map;
211 	struct trace_eval_map_head	head;
212 	struct trace_eval_map_tail	tail;
213 };
214 
215 static union trace_eval_map_item *trace_eval_maps;
216 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
217 
218 int tracing_set_tracer(struct trace_array *tr, const char *buf);
219 static void ftrace_trace_userstack(struct trace_array *tr,
220 				   struct trace_buffer *buffer,
221 				   unsigned int trace_ctx);
222 
223 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
224 static char *default_bootup_tracer;
225 
226 static bool allocate_snapshot;
227 static bool snapshot_at_boot;
228 
229 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_instance_index;
231 
232 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
233 static int boot_snapshot_index;
234 
set_cmdline_ftrace(char * str)235 static int __init set_cmdline_ftrace(char *str)
236 {
237 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
238 	default_bootup_tracer = bootup_tracer_buf;
239 	/* We are using ftrace early, expand it */
240 	trace_set_ring_buffer_expanded(NULL);
241 	return 1;
242 }
243 __setup("ftrace=", set_cmdline_ftrace);
244 
ftrace_dump_on_oops_enabled(void)245 int ftrace_dump_on_oops_enabled(void)
246 {
247 	if (!strcmp("0", ftrace_dump_on_oops))
248 		return 0;
249 	else
250 		return 1;
251 }
252 
set_ftrace_dump_on_oops(char * str)253 static int __init set_ftrace_dump_on_oops(char *str)
254 {
255 	if (!*str) {
256 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
257 		return 1;
258 	}
259 
260 	if (*str == ',') {
261 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
262 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
263 		return 1;
264 	}
265 
266 	if (*str++ == '=') {
267 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
268 		return 1;
269 	}
270 
271 	return 0;
272 }
273 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
274 
stop_trace_on_warning(char * str)275 static int __init stop_trace_on_warning(char *str)
276 {
277 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
278 		__disable_trace_on_warning = 1;
279 	return 1;
280 }
281 __setup("traceoff_on_warning", stop_trace_on_warning);
282 
boot_alloc_snapshot(char * str)283 static int __init boot_alloc_snapshot(char *str)
284 {
285 	char *slot = boot_snapshot_info + boot_snapshot_index;
286 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
287 	int ret;
288 
289 	if (str[0] == '=') {
290 		str++;
291 		if (strlen(str) >= left)
292 			return -1;
293 
294 		ret = snprintf(slot, left, "%s\t", str);
295 		boot_snapshot_index += ret;
296 	} else {
297 		allocate_snapshot = true;
298 		/* We also need the main ring buffer expanded */
299 		trace_set_ring_buffer_expanded(NULL);
300 	}
301 	return 1;
302 }
303 __setup("alloc_snapshot", boot_alloc_snapshot);
304 
305 
boot_snapshot(char * str)306 static int __init boot_snapshot(char *str)
307 {
308 	snapshot_at_boot = true;
309 	boot_alloc_snapshot(str);
310 	return 1;
311 }
312 __setup("ftrace_boot_snapshot", boot_snapshot);
313 
314 
boot_instance(char * str)315 static int __init boot_instance(char *str)
316 {
317 	char *slot = boot_instance_info + boot_instance_index;
318 	int left = sizeof(boot_instance_info) - boot_instance_index;
319 	int ret;
320 
321 	if (strlen(str) >= left)
322 		return -1;
323 
324 	ret = snprintf(slot, left, "%s\t", str);
325 	boot_instance_index += ret;
326 
327 	return 1;
328 }
329 __setup("trace_instance=", boot_instance);
330 
331 
332 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
333 
set_trace_boot_options(char * str)334 static int __init set_trace_boot_options(char *str)
335 {
336 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
337 	return 1;
338 }
339 __setup("trace_options=", set_trace_boot_options);
340 
341 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
342 static char *trace_boot_clock __initdata;
343 
set_trace_boot_clock(char * str)344 static int __init set_trace_boot_clock(char *str)
345 {
346 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
347 	trace_boot_clock = trace_boot_clock_buf;
348 	return 1;
349 }
350 __setup("trace_clock=", set_trace_boot_clock);
351 
set_tracepoint_printk(char * str)352 static int __init set_tracepoint_printk(char *str)
353 {
354 	/* Ignore the "tp_printk_stop_on_boot" param */
355 	if (*str == '_')
356 		return 0;
357 
358 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
359 		tracepoint_printk = 1;
360 	return 1;
361 }
362 __setup("tp_printk", set_tracepoint_printk);
363 
set_tracepoint_printk_stop(char * str)364 static int __init set_tracepoint_printk_stop(char *str)
365 {
366 	tracepoint_printk_stop_on_boot = true;
367 	return 1;
368 }
369 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
370 
set_traceoff_after_boot(char * str)371 static int __init set_traceoff_after_boot(char *str)
372 {
373 	traceoff_after_boot = true;
374 	return 1;
375 }
376 __setup("traceoff_after_boot", set_traceoff_after_boot);
377 
ns2usecs(u64 nsec)378 unsigned long long ns2usecs(u64 nsec)
379 {
380 	nsec += 500;
381 	do_div(nsec, 1000);
382 	return nsec;
383 }
384 
385 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)386 trace_process_export(struct trace_export *export,
387 	       struct ring_buffer_event *event, int flag)
388 {
389 	struct trace_entry *entry;
390 	unsigned int size = 0;
391 
392 	if (export->flags & flag) {
393 		entry = ring_buffer_event_data(event);
394 		size = ring_buffer_event_length(event);
395 		export->write(export, entry, size);
396 	}
397 }
398 
399 static DEFINE_MUTEX(ftrace_export_lock);
400 
401 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
402 
403 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
404 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
405 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
406 
ftrace_exports_enable(struct trace_export * export)407 static inline void ftrace_exports_enable(struct trace_export *export)
408 {
409 	if (export->flags & TRACE_EXPORT_FUNCTION)
410 		static_branch_inc(&trace_function_exports_enabled);
411 
412 	if (export->flags & TRACE_EXPORT_EVENT)
413 		static_branch_inc(&trace_event_exports_enabled);
414 
415 	if (export->flags & TRACE_EXPORT_MARKER)
416 		static_branch_inc(&trace_marker_exports_enabled);
417 }
418 
ftrace_exports_disable(struct trace_export * export)419 static inline void ftrace_exports_disable(struct trace_export *export)
420 {
421 	if (export->flags & TRACE_EXPORT_FUNCTION)
422 		static_branch_dec(&trace_function_exports_enabled);
423 
424 	if (export->flags & TRACE_EXPORT_EVENT)
425 		static_branch_dec(&trace_event_exports_enabled);
426 
427 	if (export->flags & TRACE_EXPORT_MARKER)
428 		static_branch_dec(&trace_marker_exports_enabled);
429 }
430 
ftrace_exports(struct ring_buffer_event * event,int flag)431 static void ftrace_exports(struct ring_buffer_event *event, int flag)
432 {
433 	struct trace_export *export;
434 
435 	guard(preempt_notrace)();
436 
437 	export = rcu_dereference_raw_check(ftrace_exports_list);
438 	while (export) {
439 		trace_process_export(export, event, flag);
440 		export = rcu_dereference_raw_check(export->next);
441 	}
442 }
443 
444 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)445 add_trace_export(struct trace_export **list, struct trace_export *export)
446 {
447 	rcu_assign_pointer(export->next, *list);
448 	/*
449 	 * We are entering export into the list but another
450 	 * CPU might be walking that list. We need to make sure
451 	 * the export->next pointer is valid before another CPU sees
452 	 * the export pointer included into the list.
453 	 */
454 	rcu_assign_pointer(*list, export);
455 }
456 
457 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)458 rm_trace_export(struct trace_export **list, struct trace_export *export)
459 {
460 	struct trace_export **p;
461 
462 	for (p = list; *p != NULL; p = &(*p)->next)
463 		if (*p == export)
464 			break;
465 
466 	if (*p != export)
467 		return -1;
468 
469 	rcu_assign_pointer(*p, (*p)->next);
470 
471 	return 0;
472 }
473 
474 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)475 add_ftrace_export(struct trace_export **list, struct trace_export *export)
476 {
477 	ftrace_exports_enable(export);
478 
479 	add_trace_export(list, export);
480 }
481 
482 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)483 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
484 {
485 	int ret;
486 
487 	ret = rm_trace_export(list, export);
488 	ftrace_exports_disable(export);
489 
490 	return ret;
491 }
492 
register_ftrace_export(struct trace_export * export)493 int register_ftrace_export(struct trace_export *export)
494 {
495 	if (WARN_ON_ONCE(!export->write))
496 		return -1;
497 
498 	guard(mutex)(&ftrace_export_lock);
499 
500 	add_ftrace_export(&ftrace_exports_list, export);
501 
502 	return 0;
503 }
504 EXPORT_SYMBOL_GPL(register_ftrace_export);
505 
unregister_ftrace_export(struct trace_export * export)506 int unregister_ftrace_export(struct trace_export *export)
507 {
508 	guard(mutex)(&ftrace_export_lock);
509 	return rm_ftrace_export(&ftrace_exports_list, export);
510 }
511 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
512 
513 /* trace_flags holds trace_options default values */
514 #define TRACE_DEFAULT_FLAGS						\
515 	(FUNCTION_DEFAULT_FLAGS |					\
516 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
517 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
518 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
519 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
520 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK |		\
521 	 TRACE_ITER_COPY_MARKER)
522 
523 /* trace_options that are only supported by global_trace */
524 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
525 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
526 
527 /* trace_flags that are default zero for instances */
528 #define ZEROED_TRACE_FLAGS \
529 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
530 	 TRACE_ITER_COPY_MARKER)
531 
532 /*
533  * The global_trace is the descriptor that holds the top-level tracing
534  * buffers for the live tracing.
535  */
536 static struct trace_array global_trace = {
537 	.trace_flags = TRACE_DEFAULT_FLAGS,
538 };
539 
540 static struct trace_array *printk_trace = &global_trace;
541 
542 /* List of trace_arrays interested in the top level trace_marker */
543 static LIST_HEAD(marker_copies);
544 
printk_binsafe(struct trace_array * tr)545 static __always_inline bool printk_binsafe(struct trace_array *tr)
546 {
547 	/*
548 	 * The binary format of traceprintk can cause a crash if used
549 	 * by a buffer from another boot. Force the use of the
550 	 * non binary version of trace_printk if the trace_printk
551 	 * buffer is a boot mapped ring buffer.
552 	 */
553 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
554 }
555 
update_printk_trace(struct trace_array * tr)556 static void update_printk_trace(struct trace_array *tr)
557 {
558 	if (printk_trace == tr)
559 		return;
560 
561 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
562 	printk_trace = tr;
563 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
564 }
565 
566 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)567 static bool update_marker_trace(struct trace_array *tr, int enabled)
568 {
569 	lockdep_assert_held(&event_mutex);
570 
571 	if (enabled) {
572 		if (!list_empty(&tr->marker_list))
573 			return false;
574 
575 		list_add_rcu(&tr->marker_list, &marker_copies);
576 		tr->trace_flags |= TRACE_ITER_COPY_MARKER;
577 		return true;
578 	}
579 
580 	if (list_empty(&tr->marker_list))
581 		return false;
582 
583 	list_del_init(&tr->marker_list);
584 	tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
585 	return true;
586 }
587 
trace_set_ring_buffer_expanded(struct trace_array * tr)588 void trace_set_ring_buffer_expanded(struct trace_array *tr)
589 {
590 	if (!tr)
591 		tr = &global_trace;
592 	tr->ring_buffer_expanded = true;
593 }
594 
595 LIST_HEAD(ftrace_trace_arrays);
596 
trace_array_get(struct trace_array * this_tr)597 int trace_array_get(struct trace_array *this_tr)
598 {
599 	struct trace_array *tr;
600 
601 	guard(mutex)(&trace_types_lock);
602 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
603 		if (tr == this_tr) {
604 			tr->ref++;
605 			return 0;
606 		}
607 	}
608 
609 	return -ENODEV;
610 }
611 
__trace_array_put(struct trace_array * this_tr)612 static void __trace_array_put(struct trace_array *this_tr)
613 {
614 	WARN_ON(!this_tr->ref);
615 	this_tr->ref--;
616 }
617 
618 /**
619  * trace_array_put - Decrement the reference counter for this trace array.
620  * @this_tr : pointer to the trace array
621  *
622  * NOTE: Use this when we no longer need the trace array returned by
623  * trace_array_get_by_name(). This ensures the trace array can be later
624  * destroyed.
625  *
626  */
trace_array_put(struct trace_array * this_tr)627 void trace_array_put(struct trace_array *this_tr)
628 {
629 	if (!this_tr)
630 		return;
631 
632 	guard(mutex)(&trace_types_lock);
633 	__trace_array_put(this_tr);
634 }
635 EXPORT_SYMBOL_GPL(trace_array_put);
636 
tracing_check_open_get_tr(struct trace_array * tr)637 int tracing_check_open_get_tr(struct trace_array *tr)
638 {
639 	int ret;
640 
641 	ret = security_locked_down(LOCKDOWN_TRACEFS);
642 	if (ret)
643 		return ret;
644 
645 	if (tracing_disabled)
646 		return -ENODEV;
647 
648 	if (tr && trace_array_get(tr) < 0)
649 		return -ENODEV;
650 
651 	return 0;
652 }
653 
654 /**
655  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
656  * @filtered_pids: The list of pids to check
657  * @search_pid: The PID to find in @filtered_pids
658  *
659  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
660  */
661 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)662 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
663 {
664 	return trace_pid_list_is_set(filtered_pids, search_pid);
665 }
666 
667 /**
668  * trace_ignore_this_task - should a task be ignored for tracing
669  * @filtered_pids: The list of pids to check
670  * @filtered_no_pids: The list of pids not to be traced
671  * @task: The task that should be ignored if not filtered
672  *
673  * Checks if @task should be traced or not from @filtered_pids.
674  * Returns true if @task should *NOT* be traced.
675  * Returns false if @task should be traced.
676  */
677 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)678 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
679 		       struct trace_pid_list *filtered_no_pids,
680 		       struct task_struct *task)
681 {
682 	/*
683 	 * If filtered_no_pids is not empty, and the task's pid is listed
684 	 * in filtered_no_pids, then return true.
685 	 * Otherwise, if filtered_pids is empty, that means we can
686 	 * trace all tasks. If it has content, then only trace pids
687 	 * within filtered_pids.
688 	 */
689 
690 	return (filtered_pids &&
691 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
692 		(filtered_no_pids &&
693 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
694 }
695 
696 /**
697  * trace_filter_add_remove_task - Add or remove a task from a pid_list
698  * @pid_list: The list to modify
699  * @self: The current task for fork or NULL for exit
700  * @task: The task to add or remove
701  *
702  * If adding a task, if @self is defined, the task is only added if @self
703  * is also included in @pid_list. This happens on fork and tasks should
704  * only be added when the parent is listed. If @self is NULL, then the
705  * @task pid will be removed from the list, which would happen on exit
706  * of a task.
707  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)708 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
709 				  struct task_struct *self,
710 				  struct task_struct *task)
711 {
712 	if (!pid_list)
713 		return;
714 
715 	/* For forks, we only add if the forking task is listed */
716 	if (self) {
717 		if (!trace_find_filtered_pid(pid_list, self->pid))
718 			return;
719 	}
720 
721 	/* "self" is set for forks, and NULL for exits */
722 	if (self)
723 		trace_pid_list_set(pid_list, task->pid);
724 	else
725 		trace_pid_list_clear(pid_list, task->pid);
726 }
727 
728 /**
729  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
730  * @pid_list: The pid list to show
731  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
732  * @pos: The position of the file
733  *
734  * This is used by the seq_file "next" operation to iterate the pids
735  * listed in a trace_pid_list structure.
736  *
737  * Returns the pid+1 as we want to display pid of zero, but NULL would
738  * stop the iteration.
739  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)740 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
741 {
742 	long pid = (unsigned long)v;
743 	unsigned int next;
744 
745 	(*pos)++;
746 
747 	/* pid already is +1 of the actual previous bit */
748 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
749 		return NULL;
750 
751 	pid = next;
752 
753 	/* Return pid + 1 to allow zero to be represented */
754 	return (void *)(pid + 1);
755 }
756 
757 /**
758  * trace_pid_start - Used for seq_file to start reading pid lists
759  * @pid_list: The pid list to show
760  * @pos: The position of the file
761  *
762  * This is used by seq_file "start" operation to start the iteration
763  * of listing pids.
764  *
765  * Returns the pid+1 as we want to display pid of zero, but NULL would
766  * stop the iteration.
767  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)768 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
769 {
770 	unsigned long pid;
771 	unsigned int first;
772 	loff_t l = 0;
773 
774 	if (trace_pid_list_first(pid_list, &first) < 0)
775 		return NULL;
776 
777 	pid = first;
778 
779 	/* Return pid + 1 so that zero can be the exit value */
780 	for (pid++; pid && l < *pos;
781 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
782 		;
783 	return (void *)pid;
784 }
785 
786 /**
787  * trace_pid_show - show the current pid in seq_file processing
788  * @m: The seq_file structure to write into
789  * @v: A void pointer of the pid (+1) value to display
790  *
791  * Can be directly used by seq_file operations to display the current
792  * pid value.
793  */
trace_pid_show(struct seq_file * m,void * v)794 int trace_pid_show(struct seq_file *m, void *v)
795 {
796 	unsigned long pid = (unsigned long)v - 1;
797 
798 	seq_printf(m, "%lu\n", pid);
799 	return 0;
800 }
801 
802 /* 128 should be much more than enough */
803 #define PID_BUF_SIZE		127
804 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)805 int trace_pid_write(struct trace_pid_list *filtered_pids,
806 		    struct trace_pid_list **new_pid_list,
807 		    const char __user *ubuf, size_t cnt)
808 {
809 	struct trace_pid_list *pid_list;
810 	struct trace_parser parser;
811 	unsigned long val;
812 	int nr_pids = 0;
813 	ssize_t read = 0;
814 	ssize_t ret;
815 	loff_t pos;
816 	pid_t pid;
817 
818 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
819 		return -ENOMEM;
820 
821 	/*
822 	 * Always recreate a new array. The write is an all or nothing
823 	 * operation. Always create a new array when adding new pids by
824 	 * the user. If the operation fails, then the current list is
825 	 * not modified.
826 	 */
827 	pid_list = trace_pid_list_alloc();
828 	if (!pid_list) {
829 		trace_parser_put(&parser);
830 		return -ENOMEM;
831 	}
832 
833 	if (filtered_pids) {
834 		/* copy the current bits to the new max */
835 		ret = trace_pid_list_first(filtered_pids, &pid);
836 		while (!ret) {
837 			ret = trace_pid_list_set(pid_list, pid);
838 			if (ret < 0)
839 				goto out;
840 
841 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
842 			nr_pids++;
843 		}
844 	}
845 
846 	ret = 0;
847 	while (cnt > 0) {
848 
849 		pos = 0;
850 
851 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
852 		if (ret < 0)
853 			break;
854 
855 		read += ret;
856 		ubuf += ret;
857 		cnt -= ret;
858 
859 		if (!trace_parser_loaded(&parser))
860 			break;
861 
862 		ret = -EINVAL;
863 		if (kstrtoul(parser.buffer, 0, &val))
864 			break;
865 
866 		pid = (pid_t)val;
867 
868 		if (trace_pid_list_set(pid_list, pid) < 0) {
869 			ret = -1;
870 			break;
871 		}
872 		nr_pids++;
873 
874 		trace_parser_clear(&parser);
875 		ret = 0;
876 	}
877  out:
878 	trace_parser_put(&parser);
879 
880 	if (ret < 0) {
881 		trace_pid_list_free(pid_list);
882 		return ret;
883 	}
884 
885 	if (!nr_pids) {
886 		/* Cleared the list of pids */
887 		trace_pid_list_free(pid_list);
888 		pid_list = NULL;
889 	}
890 
891 	*new_pid_list = pid_list;
892 
893 	return read;
894 }
895 
buffer_ftrace_now(struct array_buffer * buf,int cpu)896 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
897 {
898 	u64 ts;
899 
900 	/* Early boot up does not have a buffer yet */
901 	if (!buf->buffer)
902 		return trace_clock_local();
903 
904 	ts = ring_buffer_time_stamp(buf->buffer);
905 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
906 
907 	return ts;
908 }
909 
ftrace_now(int cpu)910 u64 ftrace_now(int cpu)
911 {
912 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
913 }
914 
915 /**
916  * tracing_is_enabled - Show if global_trace has been enabled
917  *
918  * Shows if the global trace has been enabled or not. It uses the
919  * mirror flag "buffer_disabled" to be used in fast paths such as for
920  * the irqsoff tracer. But it may be inaccurate due to races. If you
921  * need to know the accurate state, use tracing_is_on() which is a little
922  * slower, but accurate.
923  */
tracing_is_enabled(void)924 int tracing_is_enabled(void)
925 {
926 	/*
927 	 * For quick access (irqsoff uses this in fast path), just
928 	 * return the mirror variable of the state of the ring buffer.
929 	 * It's a little racy, but we don't really care.
930 	 */
931 	return !global_trace.buffer_disabled;
932 }
933 
934 /*
935  * trace_buf_size is the size in bytes that is allocated
936  * for a buffer. Note, the number of bytes is always rounded
937  * to page size.
938  *
939  * This number is purposely set to a low number of 16384.
940  * If the dump on oops happens, it will be much appreciated
941  * to not have to wait for all that output. Anyway this can be
942  * boot time and run time configurable.
943  */
944 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
945 
946 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
947 
948 /* trace_types holds a link list of available tracers. */
949 static struct tracer		*trace_types __read_mostly;
950 
951 /*
952  * trace_types_lock is used to protect the trace_types list.
953  */
954 DEFINE_MUTEX(trace_types_lock);
955 
956 /*
957  * serialize the access of the ring buffer
958  *
959  * ring buffer serializes readers, but it is low level protection.
960  * The validity of the events (which returns by ring_buffer_peek() ..etc)
961  * are not protected by ring buffer.
962  *
963  * The content of events may become garbage if we allow other process consumes
964  * these events concurrently:
965  *   A) the page of the consumed events may become a normal page
966  *      (not reader page) in ring buffer, and this page will be rewritten
967  *      by events producer.
968  *   B) The page of the consumed events may become a page for splice_read,
969  *      and this page will be returned to system.
970  *
971  * These primitives allow multi process access to different cpu ring buffer
972  * concurrently.
973  *
974  * These primitives don't distinguish read-only and read-consume access.
975  * Multi read-only access are also serialized.
976  */
977 
978 #ifdef CONFIG_SMP
979 static DECLARE_RWSEM(all_cpu_access_lock);
980 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
981 
trace_access_lock(int cpu)982 static inline void trace_access_lock(int cpu)
983 {
984 	if (cpu == RING_BUFFER_ALL_CPUS) {
985 		/* gain it for accessing the whole ring buffer. */
986 		down_write(&all_cpu_access_lock);
987 	} else {
988 		/* gain it for accessing a cpu ring buffer. */
989 
990 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
991 		down_read(&all_cpu_access_lock);
992 
993 		/* Secondly block other access to this @cpu ring buffer. */
994 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
995 	}
996 }
997 
trace_access_unlock(int cpu)998 static inline void trace_access_unlock(int cpu)
999 {
1000 	if (cpu == RING_BUFFER_ALL_CPUS) {
1001 		up_write(&all_cpu_access_lock);
1002 	} else {
1003 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1004 		up_read(&all_cpu_access_lock);
1005 	}
1006 }
1007 
trace_access_lock_init(void)1008 static inline void trace_access_lock_init(void)
1009 {
1010 	int cpu;
1011 
1012 	for_each_possible_cpu(cpu)
1013 		mutex_init(&per_cpu(cpu_access_lock, cpu));
1014 }
1015 
1016 #else
1017 
1018 static DEFINE_MUTEX(access_lock);
1019 
trace_access_lock(int cpu)1020 static inline void trace_access_lock(int cpu)
1021 {
1022 	(void)cpu;
1023 	mutex_lock(&access_lock);
1024 }
1025 
trace_access_unlock(int cpu)1026 static inline void trace_access_unlock(int cpu)
1027 {
1028 	(void)cpu;
1029 	mutex_unlock(&access_lock);
1030 }
1031 
trace_access_lock_init(void)1032 static inline void trace_access_lock_init(void)
1033 {
1034 }
1035 
1036 #endif
1037 
1038 #ifdef CONFIG_STACKTRACE
1039 static void __ftrace_trace_stack(struct trace_array *tr,
1040 				 struct trace_buffer *buffer,
1041 				 unsigned int trace_ctx,
1042 				 int skip, struct pt_regs *regs);
1043 static inline void ftrace_trace_stack(struct trace_array *tr,
1044 				      struct trace_buffer *buffer,
1045 				      unsigned int trace_ctx,
1046 				      int skip, struct pt_regs *regs);
1047 
1048 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1049 static inline void __ftrace_trace_stack(struct trace_array *tr,
1050 					struct trace_buffer *buffer,
1051 					unsigned int trace_ctx,
1052 					int skip, struct pt_regs *regs)
1053 {
1054 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1055 static inline void ftrace_trace_stack(struct trace_array *tr,
1056 				      struct trace_buffer *buffer,
1057 				      unsigned long trace_ctx,
1058 				      int skip, struct pt_regs *regs)
1059 {
1060 }
1061 
1062 #endif
1063 
1064 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1065 trace_event_setup(struct ring_buffer_event *event,
1066 		  int type, unsigned int trace_ctx)
1067 {
1068 	struct trace_entry *ent = ring_buffer_event_data(event);
1069 
1070 	tracing_generic_entry_update(ent, type, trace_ctx);
1071 }
1072 
1073 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1074 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1075 			  int type,
1076 			  unsigned long len,
1077 			  unsigned int trace_ctx)
1078 {
1079 	struct ring_buffer_event *event;
1080 
1081 	event = ring_buffer_lock_reserve(buffer, len);
1082 	if (event != NULL)
1083 		trace_event_setup(event, type, trace_ctx);
1084 
1085 	return event;
1086 }
1087 
tracer_tracing_on(struct trace_array * tr)1088 void tracer_tracing_on(struct trace_array *tr)
1089 {
1090 	if (tr->array_buffer.buffer)
1091 		ring_buffer_record_on(tr->array_buffer.buffer);
1092 	/*
1093 	 * This flag is looked at when buffers haven't been allocated
1094 	 * yet, or by some tracers (like irqsoff), that just want to
1095 	 * know if the ring buffer has been disabled, but it can handle
1096 	 * races of where it gets disabled but we still do a record.
1097 	 * As the check is in the fast path of the tracers, it is more
1098 	 * important to be fast than accurate.
1099 	 */
1100 	tr->buffer_disabled = 0;
1101 }
1102 
1103 /**
1104  * tracing_on - enable tracing buffers
1105  *
1106  * This function enables tracing buffers that may have been
1107  * disabled with tracing_off.
1108  */
tracing_on(void)1109 void tracing_on(void)
1110 {
1111 	tracer_tracing_on(&global_trace);
1112 }
1113 EXPORT_SYMBOL_GPL(tracing_on);
1114 
1115 
1116 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1117 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1118 {
1119 	__this_cpu_write(trace_taskinfo_save, true);
1120 
1121 	/* If this is the temp buffer, we need to commit fully */
1122 	if (this_cpu_read(trace_buffered_event) == event) {
1123 		/* Length is in event->array[0] */
1124 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1125 		/* Release the temp buffer */
1126 		this_cpu_dec(trace_buffered_event_cnt);
1127 		/* ring_buffer_unlock_commit() enables preemption */
1128 		preempt_enable_notrace();
1129 	} else
1130 		ring_buffer_unlock_commit(buffer);
1131 }
1132 
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1133 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1134 		       const char *str, int size)
1135 {
1136 	struct ring_buffer_event *event;
1137 	struct trace_buffer *buffer;
1138 	struct print_entry *entry;
1139 	unsigned int trace_ctx;
1140 	int alloc;
1141 
1142 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1143 		return 0;
1144 
1145 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1146 		return 0;
1147 
1148 	if (unlikely(tracing_disabled))
1149 		return 0;
1150 
1151 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1152 
1153 	trace_ctx = tracing_gen_ctx();
1154 	buffer = tr->array_buffer.buffer;
1155 	guard(ring_buffer_nest)(buffer);
1156 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1157 					    trace_ctx);
1158 	if (!event)
1159 		return 0;
1160 
1161 	entry = ring_buffer_event_data(event);
1162 	entry->ip = ip;
1163 
1164 	memcpy(&entry->buf, str, size);
1165 
1166 	/* Add a newline if necessary */
1167 	if (entry->buf[size - 1] != '\n') {
1168 		entry->buf[size] = '\n';
1169 		entry->buf[size + 1] = '\0';
1170 	} else
1171 		entry->buf[size] = '\0';
1172 
1173 	__buffer_unlock_commit(buffer, event);
1174 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1175 	return size;
1176 }
1177 EXPORT_SYMBOL_GPL(__trace_array_puts);
1178 
1179 /**
1180  * __trace_puts - write a constant string into the trace buffer.
1181  * @ip:	   The address of the caller
1182  * @str:   The constant string to write
1183  * @size:  The size of the string.
1184  */
__trace_puts(unsigned long ip,const char * str,int size)1185 int __trace_puts(unsigned long ip, const char *str, int size)
1186 {
1187 	return __trace_array_puts(printk_trace, ip, str, size);
1188 }
1189 EXPORT_SYMBOL_GPL(__trace_puts);
1190 
1191 /**
1192  * __trace_bputs - write the pointer to a constant string into trace buffer
1193  * @ip:	   The address of the caller
1194  * @str:   The constant string to write to the buffer to
1195  */
__trace_bputs(unsigned long ip,const char * str)1196 int __trace_bputs(unsigned long ip, const char *str)
1197 {
1198 	struct trace_array *tr = READ_ONCE(printk_trace);
1199 	struct ring_buffer_event *event;
1200 	struct trace_buffer *buffer;
1201 	struct bputs_entry *entry;
1202 	unsigned int trace_ctx;
1203 	int size = sizeof(struct bputs_entry);
1204 
1205 	if (!printk_binsafe(tr))
1206 		return __trace_puts(ip, str, strlen(str));
1207 
1208 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1209 		return 0;
1210 
1211 	if (unlikely(tracing_selftest_running || tracing_disabled))
1212 		return 0;
1213 
1214 	trace_ctx = tracing_gen_ctx();
1215 	buffer = tr->array_buffer.buffer;
1216 
1217 	guard(ring_buffer_nest)(buffer);
1218 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1219 					    trace_ctx);
1220 	if (!event)
1221 		return 0;
1222 
1223 	entry = ring_buffer_event_data(event);
1224 	entry->ip			= ip;
1225 	entry->str			= str;
1226 
1227 	__buffer_unlock_commit(buffer, event);
1228 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1229 
1230 	return 1;
1231 }
1232 EXPORT_SYMBOL_GPL(__trace_bputs);
1233 
1234 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1235 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1236 					   void *cond_data)
1237 {
1238 	struct tracer *tracer = tr->current_trace;
1239 	unsigned long flags;
1240 
1241 	if (in_nmi()) {
1242 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1243 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1244 		return;
1245 	}
1246 
1247 	if (!tr->allocated_snapshot) {
1248 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1249 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1250 		tracer_tracing_off(tr);
1251 		return;
1252 	}
1253 
1254 	/* Note, snapshot can not be used when the tracer uses it */
1255 	if (tracer->use_max_tr) {
1256 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1257 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1258 		return;
1259 	}
1260 
1261 	if (tr->mapped) {
1262 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1263 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1264 		return;
1265 	}
1266 
1267 	local_irq_save(flags);
1268 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1269 	local_irq_restore(flags);
1270 }
1271 
tracing_snapshot_instance(struct trace_array * tr)1272 void tracing_snapshot_instance(struct trace_array *tr)
1273 {
1274 	tracing_snapshot_instance_cond(tr, NULL);
1275 }
1276 
1277 /**
1278  * tracing_snapshot - take a snapshot of the current buffer.
1279  *
1280  * This causes a swap between the snapshot buffer and the current live
1281  * tracing buffer. You can use this to take snapshots of the live
1282  * trace when some condition is triggered, but continue to trace.
1283  *
1284  * Note, make sure to allocate the snapshot with either
1285  * a tracing_snapshot_alloc(), or by doing it manually
1286  * with: echo 1 > /sys/kernel/tracing/snapshot
1287  *
1288  * If the snapshot buffer is not allocated, it will stop tracing.
1289  * Basically making a permanent snapshot.
1290  */
tracing_snapshot(void)1291 void tracing_snapshot(void)
1292 {
1293 	struct trace_array *tr = &global_trace;
1294 
1295 	tracing_snapshot_instance(tr);
1296 }
1297 EXPORT_SYMBOL_GPL(tracing_snapshot);
1298 
1299 /**
1300  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1301  * @tr:		The tracing instance to snapshot
1302  * @cond_data:	The data to be tested conditionally, and possibly saved
1303  *
1304  * This is the same as tracing_snapshot() except that the snapshot is
1305  * conditional - the snapshot will only happen if the
1306  * cond_snapshot.update() implementation receiving the cond_data
1307  * returns true, which means that the trace array's cond_snapshot
1308  * update() operation used the cond_data to determine whether the
1309  * snapshot should be taken, and if it was, presumably saved it along
1310  * with the snapshot.
1311  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1312 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1313 {
1314 	tracing_snapshot_instance_cond(tr, cond_data);
1315 }
1316 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1317 
1318 /**
1319  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1320  * @tr:		The tracing instance
1321  *
1322  * When the user enables a conditional snapshot using
1323  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1324  * with the snapshot.  This accessor is used to retrieve it.
1325  *
1326  * Should not be called from cond_snapshot.update(), since it takes
1327  * the tr->max_lock lock, which the code calling
1328  * cond_snapshot.update() has already done.
1329  *
1330  * Returns the cond_data associated with the trace array's snapshot.
1331  */
tracing_cond_snapshot_data(struct trace_array * tr)1332 void *tracing_cond_snapshot_data(struct trace_array *tr)
1333 {
1334 	void *cond_data = NULL;
1335 
1336 	local_irq_disable();
1337 	arch_spin_lock(&tr->max_lock);
1338 
1339 	if (tr->cond_snapshot)
1340 		cond_data = tr->cond_snapshot->cond_data;
1341 
1342 	arch_spin_unlock(&tr->max_lock);
1343 	local_irq_enable();
1344 
1345 	return cond_data;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1348 
1349 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1350 					struct array_buffer *size_buf, int cpu_id);
1351 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1352 
tracing_alloc_snapshot_instance(struct trace_array * tr)1353 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1354 {
1355 	int order;
1356 	int ret;
1357 
1358 	if (!tr->allocated_snapshot) {
1359 
1360 		/* Make the snapshot buffer have the same order as main buffer */
1361 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1362 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1363 		if (ret < 0)
1364 			return ret;
1365 
1366 		/* allocate spare buffer */
1367 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1368 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1369 		if (ret < 0)
1370 			return ret;
1371 
1372 		tr->allocated_snapshot = true;
1373 	}
1374 
1375 	return 0;
1376 }
1377 
free_snapshot(struct trace_array * tr)1378 static void free_snapshot(struct trace_array *tr)
1379 {
1380 	/*
1381 	 * We don't free the ring buffer. instead, resize it because
1382 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1383 	 * we want preserve it.
1384 	 */
1385 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1386 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1387 	set_buffer_entries(&tr->max_buffer, 1);
1388 	tracing_reset_online_cpus(&tr->max_buffer);
1389 	tr->allocated_snapshot = false;
1390 }
1391 
tracing_arm_snapshot_locked(struct trace_array * tr)1392 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1393 {
1394 	int ret;
1395 
1396 	lockdep_assert_held(&trace_types_lock);
1397 
1398 	spin_lock(&tr->snapshot_trigger_lock);
1399 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1400 		spin_unlock(&tr->snapshot_trigger_lock);
1401 		return -EBUSY;
1402 	}
1403 
1404 	tr->snapshot++;
1405 	spin_unlock(&tr->snapshot_trigger_lock);
1406 
1407 	ret = tracing_alloc_snapshot_instance(tr);
1408 	if (ret) {
1409 		spin_lock(&tr->snapshot_trigger_lock);
1410 		tr->snapshot--;
1411 		spin_unlock(&tr->snapshot_trigger_lock);
1412 	}
1413 
1414 	return ret;
1415 }
1416 
tracing_arm_snapshot(struct trace_array * tr)1417 int tracing_arm_snapshot(struct trace_array *tr)
1418 {
1419 	guard(mutex)(&trace_types_lock);
1420 	return tracing_arm_snapshot_locked(tr);
1421 }
1422 
tracing_disarm_snapshot(struct trace_array * tr)1423 void tracing_disarm_snapshot(struct trace_array *tr)
1424 {
1425 	spin_lock(&tr->snapshot_trigger_lock);
1426 	if (!WARN_ON(!tr->snapshot))
1427 		tr->snapshot--;
1428 	spin_unlock(&tr->snapshot_trigger_lock);
1429 }
1430 
1431 /**
1432  * tracing_alloc_snapshot - allocate snapshot buffer.
1433  *
1434  * This only allocates the snapshot buffer if it isn't already
1435  * allocated - it doesn't also take a snapshot.
1436  *
1437  * This is meant to be used in cases where the snapshot buffer needs
1438  * to be set up for events that can't sleep but need to be able to
1439  * trigger a snapshot.
1440  */
tracing_alloc_snapshot(void)1441 int tracing_alloc_snapshot(void)
1442 {
1443 	struct trace_array *tr = &global_trace;
1444 	int ret;
1445 
1446 	ret = tracing_alloc_snapshot_instance(tr);
1447 	WARN_ON(ret < 0);
1448 
1449 	return ret;
1450 }
1451 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1452 
1453 /**
1454  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1455  *
1456  * This is similar to tracing_snapshot(), but it will allocate the
1457  * snapshot buffer if it isn't already allocated. Use this only
1458  * where it is safe to sleep, as the allocation may sleep.
1459  *
1460  * This causes a swap between the snapshot buffer and the current live
1461  * tracing buffer. You can use this to take snapshots of the live
1462  * trace when some condition is triggered, but continue to trace.
1463  */
tracing_snapshot_alloc(void)1464 void tracing_snapshot_alloc(void)
1465 {
1466 	int ret;
1467 
1468 	ret = tracing_alloc_snapshot();
1469 	if (ret < 0)
1470 		return;
1471 
1472 	tracing_snapshot();
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1475 
1476 /**
1477  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1478  * @tr:		The tracing instance
1479  * @cond_data:	User data to associate with the snapshot
1480  * @update:	Implementation of the cond_snapshot update function
1481  *
1482  * Check whether the conditional snapshot for the given instance has
1483  * already been enabled, or if the current tracer is already using a
1484  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1485  * save the cond_data and update function inside.
1486  *
1487  * Returns 0 if successful, error otherwise.
1488  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1489 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1490 				 cond_update_fn_t update)
1491 {
1492 	struct cond_snapshot *cond_snapshot __free(kfree) =
1493 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1494 	int ret;
1495 
1496 	if (!cond_snapshot)
1497 		return -ENOMEM;
1498 
1499 	cond_snapshot->cond_data = cond_data;
1500 	cond_snapshot->update = update;
1501 
1502 	guard(mutex)(&trace_types_lock);
1503 
1504 	if (tr->current_trace->use_max_tr)
1505 		return -EBUSY;
1506 
1507 	/*
1508 	 * The cond_snapshot can only change to NULL without the
1509 	 * trace_types_lock. We don't care if we race with it going
1510 	 * to NULL, but we want to make sure that it's not set to
1511 	 * something other than NULL when we get here, which we can
1512 	 * do safely with only holding the trace_types_lock and not
1513 	 * having to take the max_lock.
1514 	 */
1515 	if (tr->cond_snapshot)
1516 		return -EBUSY;
1517 
1518 	ret = tracing_arm_snapshot_locked(tr);
1519 	if (ret)
1520 		return ret;
1521 
1522 	local_irq_disable();
1523 	arch_spin_lock(&tr->max_lock);
1524 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1525 	arch_spin_unlock(&tr->max_lock);
1526 	local_irq_enable();
1527 
1528 	return 0;
1529 }
1530 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1531 
1532 /**
1533  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1534  * @tr:		The tracing instance
1535  *
1536  * Check whether the conditional snapshot for the given instance is
1537  * enabled; if so, free the cond_snapshot associated with it,
1538  * otherwise return -EINVAL.
1539  *
1540  * Returns 0 if successful, error otherwise.
1541  */
tracing_snapshot_cond_disable(struct trace_array * tr)1542 int tracing_snapshot_cond_disable(struct trace_array *tr)
1543 {
1544 	int ret = 0;
1545 
1546 	local_irq_disable();
1547 	arch_spin_lock(&tr->max_lock);
1548 
1549 	if (!tr->cond_snapshot)
1550 		ret = -EINVAL;
1551 	else {
1552 		kfree(tr->cond_snapshot);
1553 		tr->cond_snapshot = NULL;
1554 	}
1555 
1556 	arch_spin_unlock(&tr->max_lock);
1557 	local_irq_enable();
1558 
1559 	tracing_disarm_snapshot(tr);
1560 
1561 	return ret;
1562 }
1563 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1564 #else
tracing_snapshot(void)1565 void tracing_snapshot(void)
1566 {
1567 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1568 }
1569 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1570 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1571 {
1572 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1573 }
1574 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1575 int tracing_alloc_snapshot(void)
1576 {
1577 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1578 	return -ENODEV;
1579 }
1580 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1581 void tracing_snapshot_alloc(void)
1582 {
1583 	/* Give warning */
1584 	tracing_snapshot();
1585 }
1586 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1587 void *tracing_cond_snapshot_data(struct trace_array *tr)
1588 {
1589 	return NULL;
1590 }
1591 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1592 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1593 {
1594 	return -ENODEV;
1595 }
1596 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1597 int tracing_snapshot_cond_disable(struct trace_array *tr)
1598 {
1599 	return false;
1600 }
1601 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1602 #define free_snapshot(tr)	do { } while (0)
1603 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1604 #endif /* CONFIG_TRACER_SNAPSHOT */
1605 
tracer_tracing_off(struct trace_array * tr)1606 void tracer_tracing_off(struct trace_array *tr)
1607 {
1608 	if (tr->array_buffer.buffer)
1609 		ring_buffer_record_off(tr->array_buffer.buffer);
1610 	/*
1611 	 * This flag is looked at when buffers haven't been allocated
1612 	 * yet, or by some tracers (like irqsoff), that just want to
1613 	 * know if the ring buffer has been disabled, but it can handle
1614 	 * races of where it gets disabled but we still do a record.
1615 	 * As the check is in the fast path of the tracers, it is more
1616 	 * important to be fast than accurate.
1617 	 */
1618 	tr->buffer_disabled = 1;
1619 }
1620 
1621 /**
1622  * tracer_tracing_disable() - temporary disable the buffer from write
1623  * @tr: The trace array to disable its buffer for
1624  *
1625  * Expects trace_tracing_enable() to re-enable tracing.
1626  * The difference between this and tracer_tracing_off() is that this
1627  * is a counter and can nest, whereas, tracer_tracing_off() can
1628  * be called multiple times and a single trace_tracing_on() will
1629  * enable it.
1630  */
tracer_tracing_disable(struct trace_array * tr)1631 void tracer_tracing_disable(struct trace_array *tr)
1632 {
1633 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1634 		return;
1635 
1636 	ring_buffer_record_disable(tr->array_buffer.buffer);
1637 }
1638 
1639 /**
1640  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1641  * @tr: The trace array that had tracer_tracincg_disable() called on it
1642  *
1643  * This is called after tracer_tracing_disable() has been called on @tr,
1644  * when it's safe to re-enable tracing.
1645  */
tracer_tracing_enable(struct trace_array * tr)1646 void tracer_tracing_enable(struct trace_array *tr)
1647 {
1648 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1649 		return;
1650 
1651 	ring_buffer_record_enable(tr->array_buffer.buffer);
1652 }
1653 
1654 /**
1655  * tracing_off - turn off tracing buffers
1656  *
1657  * This function stops the tracing buffers from recording data.
1658  * It does not disable any overhead the tracers themselves may
1659  * be causing. This function simply causes all recording to
1660  * the ring buffers to fail.
1661  */
tracing_off(void)1662 void tracing_off(void)
1663 {
1664 	tracer_tracing_off(&global_trace);
1665 }
1666 EXPORT_SYMBOL_GPL(tracing_off);
1667 
disable_trace_on_warning(void)1668 void disable_trace_on_warning(void)
1669 {
1670 	if (__disable_trace_on_warning) {
1671 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1672 			"Disabling tracing due to warning\n");
1673 		tracing_off();
1674 	}
1675 }
1676 
1677 /**
1678  * tracer_tracing_is_on - show real state of ring buffer enabled
1679  * @tr : the trace array to know if ring buffer is enabled
1680  *
1681  * Shows real state of the ring buffer if it is enabled or not.
1682  */
tracer_tracing_is_on(struct trace_array * tr)1683 bool tracer_tracing_is_on(struct trace_array *tr)
1684 {
1685 	if (tr->array_buffer.buffer)
1686 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1687 	return !tr->buffer_disabled;
1688 }
1689 
1690 /**
1691  * tracing_is_on - show state of ring buffers enabled
1692  */
tracing_is_on(void)1693 int tracing_is_on(void)
1694 {
1695 	return tracer_tracing_is_on(&global_trace);
1696 }
1697 EXPORT_SYMBOL_GPL(tracing_is_on);
1698 
set_buf_size(char * str)1699 static int __init set_buf_size(char *str)
1700 {
1701 	unsigned long buf_size;
1702 
1703 	if (!str)
1704 		return 0;
1705 	buf_size = memparse(str, &str);
1706 	/*
1707 	 * nr_entries can not be zero and the startup
1708 	 * tests require some buffer space. Therefore
1709 	 * ensure we have at least 4096 bytes of buffer.
1710 	 */
1711 	trace_buf_size = max(4096UL, buf_size);
1712 	return 1;
1713 }
1714 __setup("trace_buf_size=", set_buf_size);
1715 
set_tracing_thresh(char * str)1716 static int __init set_tracing_thresh(char *str)
1717 {
1718 	unsigned long threshold;
1719 	int ret;
1720 
1721 	if (!str)
1722 		return 0;
1723 	ret = kstrtoul(str, 0, &threshold);
1724 	if (ret < 0)
1725 		return 0;
1726 	tracing_thresh = threshold * 1000;
1727 	return 1;
1728 }
1729 __setup("tracing_thresh=", set_tracing_thresh);
1730 
nsecs_to_usecs(unsigned long nsecs)1731 unsigned long nsecs_to_usecs(unsigned long nsecs)
1732 {
1733 	return nsecs / 1000;
1734 }
1735 
1736 /*
1737  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1738  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1739  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1740  * of strings in the order that the evals (enum) were defined.
1741  */
1742 #undef C
1743 #define C(a, b) b
1744 
1745 /* These must match the bit positions in trace_iterator_flags */
1746 static const char *trace_options[] = {
1747 	TRACE_FLAGS
1748 	NULL
1749 };
1750 
1751 static struct {
1752 	u64 (*func)(void);
1753 	const char *name;
1754 	int in_ns;		/* is this clock in nanoseconds? */
1755 } trace_clocks[] = {
1756 	{ trace_clock_local,		"local",	1 },
1757 	{ trace_clock_global,		"global",	1 },
1758 	{ trace_clock_counter,		"counter",	0 },
1759 	{ trace_clock_jiffies,		"uptime",	0 },
1760 	{ trace_clock,			"perf",		1 },
1761 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1762 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1763 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1764 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1765 	ARCH_TRACE_CLOCKS
1766 };
1767 
trace_clock_in_ns(struct trace_array * tr)1768 bool trace_clock_in_ns(struct trace_array *tr)
1769 {
1770 	if (trace_clocks[tr->clock_id].in_ns)
1771 		return true;
1772 
1773 	return false;
1774 }
1775 
1776 /*
1777  * trace_parser_get_init - gets the buffer for trace parser
1778  */
trace_parser_get_init(struct trace_parser * parser,int size)1779 int trace_parser_get_init(struct trace_parser *parser, int size)
1780 {
1781 	memset(parser, 0, sizeof(*parser));
1782 
1783 	parser->buffer = kmalloc(size, GFP_KERNEL);
1784 	if (!parser->buffer)
1785 		return 1;
1786 
1787 	parser->size = size;
1788 	return 0;
1789 }
1790 
1791 /*
1792  * trace_parser_put - frees the buffer for trace parser
1793  */
trace_parser_put(struct trace_parser * parser)1794 void trace_parser_put(struct trace_parser *parser)
1795 {
1796 	kfree(parser->buffer);
1797 	parser->buffer = NULL;
1798 }
1799 
1800 /*
1801  * trace_get_user - reads the user input string separated by  space
1802  * (matched by isspace(ch))
1803  *
1804  * For each string found the 'struct trace_parser' is updated,
1805  * and the function returns.
1806  *
1807  * Returns number of bytes read.
1808  *
1809  * See kernel/trace/trace.h for 'struct trace_parser' details.
1810  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1811 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1812 	size_t cnt, loff_t *ppos)
1813 {
1814 	char ch;
1815 	size_t read = 0;
1816 	ssize_t ret;
1817 
1818 	if (!*ppos)
1819 		trace_parser_clear(parser);
1820 
1821 	ret = get_user(ch, ubuf++);
1822 	if (ret)
1823 		goto fail;
1824 
1825 	read++;
1826 	cnt--;
1827 
1828 	/*
1829 	 * The parser is not finished with the last write,
1830 	 * continue reading the user input without skipping spaces.
1831 	 */
1832 	if (!parser->cont) {
1833 		/* skip white space */
1834 		while (cnt && isspace(ch)) {
1835 			ret = get_user(ch, ubuf++);
1836 			if (ret)
1837 				goto fail;
1838 			read++;
1839 			cnt--;
1840 		}
1841 
1842 		parser->idx = 0;
1843 
1844 		/* only spaces were written */
1845 		if (isspace(ch) || !ch) {
1846 			*ppos += read;
1847 			return read;
1848 		}
1849 	}
1850 
1851 	/* read the non-space input */
1852 	while (cnt && !isspace(ch) && ch) {
1853 		if (parser->idx < parser->size - 1)
1854 			parser->buffer[parser->idx++] = ch;
1855 		else {
1856 			ret = -EINVAL;
1857 			goto fail;
1858 		}
1859 
1860 		ret = get_user(ch, ubuf++);
1861 		if (ret)
1862 			goto fail;
1863 		read++;
1864 		cnt--;
1865 	}
1866 
1867 	/* We either got finished input or we have to wait for another call. */
1868 	if (isspace(ch) || !ch) {
1869 		parser->buffer[parser->idx] = 0;
1870 		parser->cont = false;
1871 	} else if (parser->idx < parser->size - 1) {
1872 		parser->cont = true;
1873 		parser->buffer[parser->idx++] = ch;
1874 		/* Make sure the parsed string always terminates with '\0'. */
1875 		parser->buffer[parser->idx] = 0;
1876 	} else {
1877 		ret = -EINVAL;
1878 		goto fail;
1879 	}
1880 
1881 	*ppos += read;
1882 	return read;
1883 fail:
1884 	trace_parser_fail(parser);
1885 	return ret;
1886 }
1887 
1888 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1889 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1890 {
1891 	int len;
1892 
1893 	if (trace_seq_used(s) <= s->readpos)
1894 		return -EBUSY;
1895 
1896 	len = trace_seq_used(s) - s->readpos;
1897 	if (cnt > len)
1898 		cnt = len;
1899 	memcpy(buf, s->buffer + s->readpos, cnt);
1900 
1901 	s->readpos += cnt;
1902 	return cnt;
1903 }
1904 
1905 unsigned long __read_mostly	tracing_thresh;
1906 
1907 #ifdef CONFIG_TRACER_MAX_TRACE
1908 static const struct file_operations tracing_max_lat_fops;
1909 
1910 #ifdef LATENCY_FS_NOTIFY
1911 
1912 static struct workqueue_struct *fsnotify_wq;
1913 
latency_fsnotify_workfn(struct work_struct * work)1914 static void latency_fsnotify_workfn(struct work_struct *work)
1915 {
1916 	struct trace_array *tr = container_of(work, struct trace_array,
1917 					      fsnotify_work);
1918 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1919 }
1920 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1921 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1922 {
1923 	struct trace_array *tr = container_of(iwork, struct trace_array,
1924 					      fsnotify_irqwork);
1925 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1926 }
1927 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1928 static void trace_create_maxlat_file(struct trace_array *tr,
1929 				     struct dentry *d_tracer)
1930 {
1931 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1932 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1933 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1934 					      TRACE_MODE_WRITE,
1935 					      d_tracer, tr,
1936 					      &tracing_max_lat_fops);
1937 }
1938 
latency_fsnotify_init(void)1939 __init static int latency_fsnotify_init(void)
1940 {
1941 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1942 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1943 	if (!fsnotify_wq) {
1944 		pr_err("Unable to allocate tr_max_lat_wq\n");
1945 		return -ENOMEM;
1946 	}
1947 	return 0;
1948 }
1949 
1950 late_initcall_sync(latency_fsnotify_init);
1951 
latency_fsnotify(struct trace_array * tr)1952 void latency_fsnotify(struct trace_array *tr)
1953 {
1954 	if (!fsnotify_wq)
1955 		return;
1956 	/*
1957 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1958 	 * possible that we are called from __schedule() or do_idle(), which
1959 	 * could cause a deadlock.
1960 	 */
1961 	irq_work_queue(&tr->fsnotify_irqwork);
1962 }
1963 
1964 #else /* !LATENCY_FS_NOTIFY */
1965 
1966 #define trace_create_maxlat_file(tr, d_tracer)				\
1967 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1968 			  d_tracer, tr, &tracing_max_lat_fops)
1969 
1970 #endif
1971 
1972 /*
1973  * Copy the new maximum trace into the separate maximum-trace
1974  * structure. (this way the maximum trace is permanently saved,
1975  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1976  */
1977 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1978 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1979 {
1980 	struct array_buffer *trace_buf = &tr->array_buffer;
1981 	struct array_buffer *max_buf = &tr->max_buffer;
1982 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1983 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1984 
1985 	max_buf->cpu = cpu;
1986 	max_buf->time_start = data->preempt_timestamp;
1987 
1988 	max_data->saved_latency = tr->max_latency;
1989 	max_data->critical_start = data->critical_start;
1990 	max_data->critical_end = data->critical_end;
1991 
1992 	strscpy(max_data->comm, tsk->comm);
1993 	max_data->pid = tsk->pid;
1994 	/*
1995 	 * If tsk == current, then use current_uid(), as that does not use
1996 	 * RCU. The irq tracer can be called out of RCU scope.
1997 	 */
1998 	if (tsk == current)
1999 		max_data->uid = current_uid();
2000 	else
2001 		max_data->uid = task_uid(tsk);
2002 
2003 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2004 	max_data->policy = tsk->policy;
2005 	max_data->rt_priority = tsk->rt_priority;
2006 
2007 	/* record this tasks comm */
2008 	tracing_record_cmdline(tsk);
2009 	latency_fsnotify(tr);
2010 }
2011 
2012 /**
2013  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2014  * @tr: tracer
2015  * @tsk: the task with the latency
2016  * @cpu: The cpu that initiated the trace.
2017  * @cond_data: User data associated with a conditional snapshot
2018  *
2019  * Flip the buffers between the @tr and the max_tr and record information
2020  * about which task was the cause of this latency.
2021  */
2022 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)2023 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2024 	      void *cond_data)
2025 {
2026 	if (tr->stop_count)
2027 		return;
2028 
2029 	WARN_ON_ONCE(!irqs_disabled());
2030 
2031 	if (!tr->allocated_snapshot) {
2032 		/* Only the nop tracer should hit this when disabling */
2033 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2034 		return;
2035 	}
2036 
2037 	arch_spin_lock(&tr->max_lock);
2038 
2039 	/* Inherit the recordable setting from array_buffer */
2040 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2041 		ring_buffer_record_on(tr->max_buffer.buffer);
2042 	else
2043 		ring_buffer_record_off(tr->max_buffer.buffer);
2044 
2045 #ifdef CONFIG_TRACER_SNAPSHOT
2046 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2047 		arch_spin_unlock(&tr->max_lock);
2048 		return;
2049 	}
2050 #endif
2051 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2052 
2053 	__update_max_tr(tr, tsk, cpu);
2054 
2055 	arch_spin_unlock(&tr->max_lock);
2056 
2057 	/* Any waiters on the old snapshot buffer need to wake up */
2058 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2059 }
2060 
2061 /**
2062  * update_max_tr_single - only copy one trace over, and reset the rest
2063  * @tr: tracer
2064  * @tsk: task with the latency
2065  * @cpu: the cpu of the buffer to copy.
2066  *
2067  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2068  */
2069 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2070 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2071 {
2072 	int ret;
2073 
2074 	if (tr->stop_count)
2075 		return;
2076 
2077 	WARN_ON_ONCE(!irqs_disabled());
2078 	if (!tr->allocated_snapshot) {
2079 		/* Only the nop tracer should hit this when disabling */
2080 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2081 		return;
2082 	}
2083 
2084 	arch_spin_lock(&tr->max_lock);
2085 
2086 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2087 
2088 	if (ret == -EBUSY) {
2089 		/*
2090 		 * We failed to swap the buffer due to a commit taking
2091 		 * place on this CPU. We fail to record, but we reset
2092 		 * the max trace buffer (no one writes directly to it)
2093 		 * and flag that it failed.
2094 		 * Another reason is resize is in progress.
2095 		 */
2096 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2097 			"Failed to swap buffers due to commit or resize in progress\n");
2098 	}
2099 
2100 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2101 
2102 	__update_max_tr(tr, tsk, cpu);
2103 	arch_spin_unlock(&tr->max_lock);
2104 }
2105 
2106 #endif /* CONFIG_TRACER_MAX_TRACE */
2107 
2108 struct pipe_wait {
2109 	struct trace_iterator		*iter;
2110 	int				wait_index;
2111 };
2112 
wait_pipe_cond(void * data)2113 static bool wait_pipe_cond(void *data)
2114 {
2115 	struct pipe_wait *pwait = data;
2116 	struct trace_iterator *iter = pwait->iter;
2117 
2118 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2119 		return true;
2120 
2121 	return iter->closed;
2122 }
2123 
wait_on_pipe(struct trace_iterator * iter,int full)2124 static int wait_on_pipe(struct trace_iterator *iter, int full)
2125 {
2126 	struct pipe_wait pwait;
2127 	int ret;
2128 
2129 	/* Iterators are static, they should be filled or empty */
2130 	if (trace_buffer_iter(iter, iter->cpu_file))
2131 		return 0;
2132 
2133 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2134 	pwait.iter = iter;
2135 
2136 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2137 			       wait_pipe_cond, &pwait);
2138 
2139 #ifdef CONFIG_TRACER_MAX_TRACE
2140 	/*
2141 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2142 	 * to happen, this would now be the main buffer.
2143 	 */
2144 	if (iter->snapshot)
2145 		iter->array_buffer = &iter->tr->max_buffer;
2146 #endif
2147 	return ret;
2148 }
2149 
2150 #ifdef CONFIG_FTRACE_STARTUP_TEST
2151 static bool selftests_can_run;
2152 
2153 struct trace_selftests {
2154 	struct list_head		list;
2155 	struct tracer			*type;
2156 };
2157 
2158 static LIST_HEAD(postponed_selftests);
2159 
save_selftest(struct tracer * type)2160 static int save_selftest(struct tracer *type)
2161 {
2162 	struct trace_selftests *selftest;
2163 
2164 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2165 	if (!selftest)
2166 		return -ENOMEM;
2167 
2168 	selftest->type = type;
2169 	list_add(&selftest->list, &postponed_selftests);
2170 	return 0;
2171 }
2172 
run_tracer_selftest(struct tracer * type)2173 static int run_tracer_selftest(struct tracer *type)
2174 {
2175 	struct trace_array *tr = &global_trace;
2176 	struct tracer *saved_tracer = tr->current_trace;
2177 	int ret;
2178 
2179 	if (!type->selftest || tracing_selftest_disabled)
2180 		return 0;
2181 
2182 	/*
2183 	 * If a tracer registers early in boot up (before scheduling is
2184 	 * initialized and such), then do not run its selftests yet.
2185 	 * Instead, run it a little later in the boot process.
2186 	 */
2187 	if (!selftests_can_run)
2188 		return save_selftest(type);
2189 
2190 	if (!tracing_is_on()) {
2191 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2192 			type->name);
2193 		return 0;
2194 	}
2195 
2196 	/*
2197 	 * Run a selftest on this tracer.
2198 	 * Here we reset the trace buffer, and set the current
2199 	 * tracer to be this tracer. The tracer can then run some
2200 	 * internal tracing to verify that everything is in order.
2201 	 * If we fail, we do not register this tracer.
2202 	 */
2203 	tracing_reset_online_cpus(&tr->array_buffer);
2204 
2205 	tr->current_trace = type;
2206 
2207 #ifdef CONFIG_TRACER_MAX_TRACE
2208 	if (type->use_max_tr) {
2209 		/* If we expanded the buffers, make sure the max is expanded too */
2210 		if (tr->ring_buffer_expanded)
2211 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2212 					   RING_BUFFER_ALL_CPUS);
2213 		tr->allocated_snapshot = true;
2214 	}
2215 #endif
2216 
2217 	/* the test is responsible for initializing and enabling */
2218 	pr_info("Testing tracer %s: ", type->name);
2219 	ret = type->selftest(type, tr);
2220 	/* the test is responsible for resetting too */
2221 	tr->current_trace = saved_tracer;
2222 	if (ret) {
2223 		printk(KERN_CONT "FAILED!\n");
2224 		/* Add the warning after printing 'FAILED' */
2225 		WARN_ON(1);
2226 		return -1;
2227 	}
2228 	/* Only reset on passing, to avoid touching corrupted buffers */
2229 	tracing_reset_online_cpus(&tr->array_buffer);
2230 
2231 #ifdef CONFIG_TRACER_MAX_TRACE
2232 	if (type->use_max_tr) {
2233 		tr->allocated_snapshot = false;
2234 
2235 		/* Shrink the max buffer again */
2236 		if (tr->ring_buffer_expanded)
2237 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2238 					   RING_BUFFER_ALL_CPUS);
2239 	}
2240 #endif
2241 
2242 	printk(KERN_CONT "PASSED\n");
2243 	return 0;
2244 }
2245 
do_run_tracer_selftest(struct tracer * type)2246 static int do_run_tracer_selftest(struct tracer *type)
2247 {
2248 	int ret;
2249 
2250 	/*
2251 	 * Tests can take a long time, especially if they are run one after the
2252 	 * other, as does happen during bootup when all the tracers are
2253 	 * registered. This could cause the soft lockup watchdog to trigger.
2254 	 */
2255 	cond_resched();
2256 
2257 	tracing_selftest_running = true;
2258 	ret = run_tracer_selftest(type);
2259 	tracing_selftest_running = false;
2260 
2261 	return ret;
2262 }
2263 
init_trace_selftests(void)2264 static __init int init_trace_selftests(void)
2265 {
2266 	struct trace_selftests *p, *n;
2267 	struct tracer *t, **last;
2268 	int ret;
2269 
2270 	selftests_can_run = true;
2271 
2272 	guard(mutex)(&trace_types_lock);
2273 
2274 	if (list_empty(&postponed_selftests))
2275 		return 0;
2276 
2277 	pr_info("Running postponed tracer tests:\n");
2278 
2279 	tracing_selftest_running = true;
2280 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2281 		/* This loop can take minutes when sanitizers are enabled, so
2282 		 * lets make sure we allow RCU processing.
2283 		 */
2284 		cond_resched();
2285 		ret = run_tracer_selftest(p->type);
2286 		/* If the test fails, then warn and remove from available_tracers */
2287 		if (ret < 0) {
2288 			WARN(1, "tracer: %s failed selftest, disabling\n",
2289 			     p->type->name);
2290 			last = &trace_types;
2291 			for (t = trace_types; t; t = t->next) {
2292 				if (t == p->type) {
2293 					*last = t->next;
2294 					break;
2295 				}
2296 				last = &t->next;
2297 			}
2298 		}
2299 		list_del(&p->list);
2300 		kfree(p);
2301 	}
2302 	tracing_selftest_running = false;
2303 
2304 	return 0;
2305 }
2306 core_initcall(init_trace_selftests);
2307 #else
do_run_tracer_selftest(struct tracer * type)2308 static inline int do_run_tracer_selftest(struct tracer *type)
2309 {
2310 	return 0;
2311 }
2312 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2313 
2314 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2315 
2316 static void __init apply_trace_boot_options(void);
2317 
2318 /**
2319  * register_tracer - register a tracer with the ftrace system.
2320  * @type: the plugin for the tracer
2321  *
2322  * Register a new plugin tracer.
2323  */
register_tracer(struct tracer * type)2324 int __init register_tracer(struct tracer *type)
2325 {
2326 	struct tracer *t;
2327 	int ret = 0;
2328 
2329 	if (!type->name) {
2330 		pr_info("Tracer must have a name\n");
2331 		return -1;
2332 	}
2333 
2334 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2335 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2336 		return -1;
2337 	}
2338 
2339 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2340 		pr_warn("Can not register tracer %s due to lockdown\n",
2341 			   type->name);
2342 		return -EPERM;
2343 	}
2344 
2345 	mutex_lock(&trace_types_lock);
2346 
2347 	for (t = trace_types; t; t = t->next) {
2348 		if (strcmp(type->name, t->name) == 0) {
2349 			/* already found */
2350 			pr_info("Tracer %s already registered\n",
2351 				type->name);
2352 			ret = -1;
2353 			goto out;
2354 		}
2355 	}
2356 
2357 	if (!type->set_flag)
2358 		type->set_flag = &dummy_set_flag;
2359 	if (!type->flags) {
2360 		/*allocate a dummy tracer_flags*/
2361 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2362 		if (!type->flags) {
2363 			ret = -ENOMEM;
2364 			goto out;
2365 		}
2366 		type->flags->val = 0;
2367 		type->flags->opts = dummy_tracer_opt;
2368 	} else
2369 		if (!type->flags->opts)
2370 			type->flags->opts = dummy_tracer_opt;
2371 
2372 	/* store the tracer for __set_tracer_option */
2373 	type->flags->trace = type;
2374 
2375 	ret = do_run_tracer_selftest(type);
2376 	if (ret < 0)
2377 		goto out;
2378 
2379 	type->next = trace_types;
2380 	trace_types = type;
2381 	add_tracer_options(&global_trace, type);
2382 
2383  out:
2384 	mutex_unlock(&trace_types_lock);
2385 
2386 	if (ret || !default_bootup_tracer)
2387 		return ret;
2388 
2389 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2390 		return 0;
2391 
2392 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2393 	/* Do we want this tracer to start on bootup? */
2394 	tracing_set_tracer(&global_trace, type->name);
2395 	default_bootup_tracer = NULL;
2396 
2397 	apply_trace_boot_options();
2398 
2399 	/* disable other selftests, since this will break it. */
2400 	disable_tracing_selftest("running a tracer");
2401 
2402 	return 0;
2403 }
2404 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2405 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2406 {
2407 	struct trace_buffer *buffer = buf->buffer;
2408 
2409 	if (!buffer)
2410 		return;
2411 
2412 	ring_buffer_record_disable(buffer);
2413 
2414 	/* Make sure all commits have finished */
2415 	synchronize_rcu();
2416 	ring_buffer_reset_cpu(buffer, cpu);
2417 
2418 	ring_buffer_record_enable(buffer);
2419 }
2420 
tracing_reset_online_cpus(struct array_buffer * buf)2421 void tracing_reset_online_cpus(struct array_buffer *buf)
2422 {
2423 	struct trace_buffer *buffer = buf->buffer;
2424 
2425 	if (!buffer)
2426 		return;
2427 
2428 	ring_buffer_record_disable(buffer);
2429 
2430 	/* Make sure all commits have finished */
2431 	synchronize_rcu();
2432 
2433 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2434 
2435 	ring_buffer_reset_online_cpus(buffer);
2436 
2437 	ring_buffer_record_enable(buffer);
2438 }
2439 
tracing_reset_all_cpus(struct array_buffer * buf)2440 static void tracing_reset_all_cpus(struct array_buffer *buf)
2441 {
2442 	struct trace_buffer *buffer = buf->buffer;
2443 
2444 	if (!buffer)
2445 		return;
2446 
2447 	ring_buffer_record_disable(buffer);
2448 
2449 	/* Make sure all commits have finished */
2450 	synchronize_rcu();
2451 
2452 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2453 
2454 	ring_buffer_reset(buffer);
2455 
2456 	ring_buffer_record_enable(buffer);
2457 }
2458 
2459 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2460 void tracing_reset_all_online_cpus_unlocked(void)
2461 {
2462 	struct trace_array *tr;
2463 
2464 	lockdep_assert_held(&trace_types_lock);
2465 
2466 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2467 		if (!tr->clear_trace)
2468 			continue;
2469 		tr->clear_trace = false;
2470 		tracing_reset_online_cpus(&tr->array_buffer);
2471 #ifdef CONFIG_TRACER_MAX_TRACE
2472 		tracing_reset_online_cpus(&tr->max_buffer);
2473 #endif
2474 	}
2475 }
2476 
tracing_reset_all_online_cpus(void)2477 void tracing_reset_all_online_cpus(void)
2478 {
2479 	guard(mutex)(&trace_types_lock);
2480 	tracing_reset_all_online_cpus_unlocked();
2481 }
2482 
is_tracing_stopped(void)2483 int is_tracing_stopped(void)
2484 {
2485 	return global_trace.stop_count;
2486 }
2487 
tracing_start_tr(struct trace_array * tr)2488 static void tracing_start_tr(struct trace_array *tr)
2489 {
2490 	struct trace_buffer *buffer;
2491 
2492 	if (tracing_disabled)
2493 		return;
2494 
2495 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2496 	if (--tr->stop_count) {
2497 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2498 			/* Someone screwed up their debugging */
2499 			tr->stop_count = 0;
2500 		}
2501 		return;
2502 	}
2503 
2504 	/* Prevent the buffers from switching */
2505 	arch_spin_lock(&tr->max_lock);
2506 
2507 	buffer = tr->array_buffer.buffer;
2508 	if (buffer)
2509 		ring_buffer_record_enable(buffer);
2510 
2511 #ifdef CONFIG_TRACER_MAX_TRACE
2512 	buffer = tr->max_buffer.buffer;
2513 	if (buffer)
2514 		ring_buffer_record_enable(buffer);
2515 #endif
2516 
2517 	arch_spin_unlock(&tr->max_lock);
2518 }
2519 
2520 /**
2521  * tracing_start - quick start of the tracer
2522  *
2523  * If tracing is enabled but was stopped by tracing_stop,
2524  * this will start the tracer back up.
2525  */
tracing_start(void)2526 void tracing_start(void)
2527 
2528 {
2529 	return tracing_start_tr(&global_trace);
2530 }
2531 
tracing_stop_tr(struct trace_array * tr)2532 static void tracing_stop_tr(struct trace_array *tr)
2533 {
2534 	struct trace_buffer *buffer;
2535 
2536 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2537 	if (tr->stop_count++)
2538 		return;
2539 
2540 	/* Prevent the buffers from switching */
2541 	arch_spin_lock(&tr->max_lock);
2542 
2543 	buffer = tr->array_buffer.buffer;
2544 	if (buffer)
2545 		ring_buffer_record_disable(buffer);
2546 
2547 #ifdef CONFIG_TRACER_MAX_TRACE
2548 	buffer = tr->max_buffer.buffer;
2549 	if (buffer)
2550 		ring_buffer_record_disable(buffer);
2551 #endif
2552 
2553 	arch_spin_unlock(&tr->max_lock);
2554 }
2555 
2556 /**
2557  * tracing_stop - quick stop of the tracer
2558  *
2559  * Light weight way to stop tracing. Use in conjunction with
2560  * tracing_start.
2561  */
tracing_stop(void)2562 void tracing_stop(void)
2563 {
2564 	return tracing_stop_tr(&global_trace);
2565 }
2566 
2567 /*
2568  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2569  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2570  * simplifies those functions and keeps them in sync.
2571  */
trace_handle_return(struct trace_seq * s)2572 enum print_line_t trace_handle_return(struct trace_seq *s)
2573 {
2574 	return trace_seq_has_overflowed(s) ?
2575 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2576 }
2577 EXPORT_SYMBOL_GPL(trace_handle_return);
2578 
migration_disable_value(void)2579 static unsigned short migration_disable_value(void)
2580 {
2581 #if defined(CONFIG_SMP)
2582 	return current->migration_disabled;
2583 #else
2584 	return 0;
2585 #endif
2586 }
2587 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2588 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2589 {
2590 	unsigned int trace_flags = irqs_status;
2591 	unsigned int pc;
2592 
2593 	pc = preempt_count();
2594 
2595 	if (pc & NMI_MASK)
2596 		trace_flags |= TRACE_FLAG_NMI;
2597 	if (pc & HARDIRQ_MASK)
2598 		trace_flags |= TRACE_FLAG_HARDIRQ;
2599 	if (in_serving_softirq())
2600 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2601 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2602 		trace_flags |= TRACE_FLAG_BH_OFF;
2603 
2604 	if (tif_need_resched())
2605 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2606 	if (test_preempt_need_resched())
2607 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2608 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2609 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2610 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2611 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2612 }
2613 
2614 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2615 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2616 			  int type,
2617 			  unsigned long len,
2618 			  unsigned int trace_ctx)
2619 {
2620 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2621 }
2622 
2623 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2624 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2625 static int trace_buffered_event_ref;
2626 
2627 /**
2628  * trace_buffered_event_enable - enable buffering events
2629  *
2630  * When events are being filtered, it is quicker to use a temporary
2631  * buffer to write the event data into if there's a likely chance
2632  * that it will not be committed. The discard of the ring buffer
2633  * is not as fast as committing, and is much slower than copying
2634  * a commit.
2635  *
2636  * When an event is to be filtered, allocate per cpu buffers to
2637  * write the event data into, and if the event is filtered and discarded
2638  * it is simply dropped, otherwise, the entire data is to be committed
2639  * in one shot.
2640  */
trace_buffered_event_enable(void)2641 void trace_buffered_event_enable(void)
2642 {
2643 	struct ring_buffer_event *event;
2644 	struct page *page;
2645 	int cpu;
2646 
2647 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2648 
2649 	if (trace_buffered_event_ref++)
2650 		return;
2651 
2652 	for_each_tracing_cpu(cpu) {
2653 		page = alloc_pages_node(cpu_to_node(cpu),
2654 					GFP_KERNEL | __GFP_NORETRY, 0);
2655 		/* This is just an optimization and can handle failures */
2656 		if (!page) {
2657 			pr_err("Failed to allocate event buffer\n");
2658 			break;
2659 		}
2660 
2661 		event = page_address(page);
2662 		memset(event, 0, sizeof(*event));
2663 
2664 		per_cpu(trace_buffered_event, cpu) = event;
2665 
2666 		scoped_guard(preempt,) {
2667 			if (cpu == smp_processor_id() &&
2668 			    __this_cpu_read(trace_buffered_event) !=
2669 			    per_cpu(trace_buffered_event, cpu))
2670 				WARN_ON_ONCE(1);
2671 		}
2672 	}
2673 }
2674 
enable_trace_buffered_event(void * data)2675 static void enable_trace_buffered_event(void *data)
2676 {
2677 	this_cpu_dec(trace_buffered_event_cnt);
2678 }
2679 
disable_trace_buffered_event(void * data)2680 static void disable_trace_buffered_event(void *data)
2681 {
2682 	this_cpu_inc(trace_buffered_event_cnt);
2683 }
2684 
2685 /**
2686  * trace_buffered_event_disable - disable buffering events
2687  *
2688  * When a filter is removed, it is faster to not use the buffered
2689  * events, and to commit directly into the ring buffer. Free up
2690  * the temp buffers when there are no more users. This requires
2691  * special synchronization with current events.
2692  */
trace_buffered_event_disable(void)2693 void trace_buffered_event_disable(void)
2694 {
2695 	int cpu;
2696 
2697 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2698 
2699 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2700 		return;
2701 
2702 	if (--trace_buffered_event_ref)
2703 		return;
2704 
2705 	/* For each CPU, set the buffer as used. */
2706 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2707 			 NULL, true);
2708 
2709 	/* Wait for all current users to finish */
2710 	synchronize_rcu();
2711 
2712 	for_each_tracing_cpu(cpu) {
2713 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2714 		per_cpu(trace_buffered_event, cpu) = NULL;
2715 	}
2716 
2717 	/*
2718 	 * Wait for all CPUs that potentially started checking if they can use
2719 	 * their event buffer only after the previous synchronize_rcu() call and
2720 	 * they still read a valid pointer from trace_buffered_event. It must be
2721 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2722 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2723 	 */
2724 	synchronize_rcu();
2725 
2726 	/* For each CPU, relinquish the buffer */
2727 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2728 			 true);
2729 }
2730 
2731 static struct trace_buffer *temp_buffer;
2732 
2733 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2734 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2735 			  struct trace_event_file *trace_file,
2736 			  int type, unsigned long len,
2737 			  unsigned int trace_ctx)
2738 {
2739 	struct ring_buffer_event *entry;
2740 	struct trace_array *tr = trace_file->tr;
2741 	int val;
2742 
2743 	*current_rb = tr->array_buffer.buffer;
2744 
2745 	if (!tr->no_filter_buffering_ref &&
2746 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2747 		preempt_disable_notrace();
2748 		/*
2749 		 * Filtering is on, so try to use the per cpu buffer first.
2750 		 * This buffer will simulate a ring_buffer_event,
2751 		 * where the type_len is zero and the array[0] will
2752 		 * hold the full length.
2753 		 * (see include/linux/ring-buffer.h for details on
2754 		 *  how the ring_buffer_event is structured).
2755 		 *
2756 		 * Using a temp buffer during filtering and copying it
2757 		 * on a matched filter is quicker than writing directly
2758 		 * into the ring buffer and then discarding it when
2759 		 * it doesn't match. That is because the discard
2760 		 * requires several atomic operations to get right.
2761 		 * Copying on match and doing nothing on a failed match
2762 		 * is still quicker than no copy on match, but having
2763 		 * to discard out of the ring buffer on a failed match.
2764 		 */
2765 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2766 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2767 
2768 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2769 
2770 			/*
2771 			 * Preemption is disabled, but interrupts and NMIs
2772 			 * can still come in now. If that happens after
2773 			 * the above increment, then it will have to go
2774 			 * back to the old method of allocating the event
2775 			 * on the ring buffer, and if the filter fails, it
2776 			 * will have to call ring_buffer_discard_commit()
2777 			 * to remove it.
2778 			 *
2779 			 * Need to also check the unlikely case that the
2780 			 * length is bigger than the temp buffer size.
2781 			 * If that happens, then the reserve is pretty much
2782 			 * guaranteed to fail, as the ring buffer currently
2783 			 * only allows events less than a page. But that may
2784 			 * change in the future, so let the ring buffer reserve
2785 			 * handle the failure in that case.
2786 			 */
2787 			if (val == 1 && likely(len <= max_len)) {
2788 				trace_event_setup(entry, type, trace_ctx);
2789 				entry->array[0] = len;
2790 				/* Return with preemption disabled */
2791 				return entry;
2792 			}
2793 			this_cpu_dec(trace_buffered_event_cnt);
2794 		}
2795 		/* __trace_buffer_lock_reserve() disables preemption */
2796 		preempt_enable_notrace();
2797 	}
2798 
2799 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2800 					    trace_ctx);
2801 	/*
2802 	 * If tracing is off, but we have triggers enabled
2803 	 * we still need to look at the event data. Use the temp_buffer
2804 	 * to store the trace event for the trigger to use. It's recursive
2805 	 * safe and will not be recorded anywhere.
2806 	 */
2807 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2808 		*current_rb = temp_buffer;
2809 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2810 						    trace_ctx);
2811 	}
2812 	return entry;
2813 }
2814 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2815 
2816 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2817 static DEFINE_MUTEX(tracepoint_printk_mutex);
2818 
output_printk(struct trace_event_buffer * fbuffer)2819 static void output_printk(struct trace_event_buffer *fbuffer)
2820 {
2821 	struct trace_event_call *event_call;
2822 	struct trace_event_file *file;
2823 	struct trace_event *event;
2824 	unsigned long flags;
2825 	struct trace_iterator *iter = tracepoint_print_iter;
2826 
2827 	/* We should never get here if iter is NULL */
2828 	if (WARN_ON_ONCE(!iter))
2829 		return;
2830 
2831 	event_call = fbuffer->trace_file->event_call;
2832 	if (!event_call || !event_call->event.funcs ||
2833 	    !event_call->event.funcs->trace)
2834 		return;
2835 
2836 	file = fbuffer->trace_file;
2837 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2838 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2839 	     !filter_match_preds(file->filter, fbuffer->entry)))
2840 		return;
2841 
2842 	event = &fbuffer->trace_file->event_call->event;
2843 
2844 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2845 	trace_seq_init(&iter->seq);
2846 	iter->ent = fbuffer->entry;
2847 	event_call->event.funcs->trace(iter, 0, event);
2848 	trace_seq_putc(&iter->seq, 0);
2849 	printk("%s", iter->seq.buffer);
2850 
2851 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2852 }
2853 
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2854 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2855 			     void *buffer, size_t *lenp,
2856 			     loff_t *ppos)
2857 {
2858 	int save_tracepoint_printk;
2859 	int ret;
2860 
2861 	guard(mutex)(&tracepoint_printk_mutex);
2862 	save_tracepoint_printk = tracepoint_printk;
2863 
2864 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2865 
2866 	/*
2867 	 * This will force exiting early, as tracepoint_printk
2868 	 * is always zero when tracepoint_printk_iter is not allocated
2869 	 */
2870 	if (!tracepoint_print_iter)
2871 		tracepoint_printk = 0;
2872 
2873 	if (save_tracepoint_printk == tracepoint_printk)
2874 		return ret;
2875 
2876 	if (tracepoint_printk)
2877 		static_key_enable(&tracepoint_printk_key.key);
2878 	else
2879 		static_key_disable(&tracepoint_printk_key.key);
2880 
2881 	return ret;
2882 }
2883 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2884 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2885 {
2886 	enum event_trigger_type tt = ETT_NONE;
2887 	struct trace_event_file *file = fbuffer->trace_file;
2888 
2889 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2890 			fbuffer->entry, &tt))
2891 		goto discard;
2892 
2893 	if (static_key_false(&tracepoint_printk_key.key))
2894 		output_printk(fbuffer);
2895 
2896 	if (static_branch_unlikely(&trace_event_exports_enabled))
2897 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2898 
2899 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2900 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2901 
2902 discard:
2903 	if (tt)
2904 		event_triggers_post_call(file, tt);
2905 
2906 }
2907 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2908 
2909 /*
2910  * Skip 3:
2911  *
2912  *   trace_buffer_unlock_commit_regs()
2913  *   trace_event_buffer_commit()
2914  *   trace_event_raw_event_xxx()
2915  */
2916 # define STACK_SKIP 3
2917 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2918 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2919 				     struct trace_buffer *buffer,
2920 				     struct ring_buffer_event *event,
2921 				     unsigned int trace_ctx,
2922 				     struct pt_regs *regs)
2923 {
2924 	__buffer_unlock_commit(buffer, event);
2925 
2926 	/*
2927 	 * If regs is not set, then skip the necessary functions.
2928 	 * Note, we can still get here via blktrace, wakeup tracer
2929 	 * and mmiotrace, but that's ok if they lose a function or
2930 	 * two. They are not that meaningful.
2931 	 */
2932 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2933 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2934 }
2935 
2936 /*
2937  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2938  */
2939 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2940 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2941 				   struct ring_buffer_event *event)
2942 {
2943 	__buffer_unlock_commit(buffer, event);
2944 }
2945 
2946 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2947 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2948 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2949 {
2950 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2951 	struct ring_buffer_event *event;
2952 	struct ftrace_entry *entry;
2953 	int size = sizeof(*entry);
2954 
2955 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2956 
2957 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2958 					    trace_ctx);
2959 	if (!event)
2960 		return;
2961 	entry	= ring_buffer_event_data(event);
2962 	entry->ip			= ip;
2963 	entry->parent_ip		= parent_ip;
2964 
2965 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2966 	if (fregs) {
2967 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2968 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2969 	}
2970 #endif
2971 
2972 	if (static_branch_unlikely(&trace_function_exports_enabled))
2973 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2974 	__buffer_unlock_commit(buffer, event);
2975 }
2976 
2977 #ifdef CONFIG_STACKTRACE
2978 
2979 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2980 #define FTRACE_KSTACK_NESTING	4
2981 
2982 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2983 
2984 struct ftrace_stack {
2985 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2986 };
2987 
2988 
2989 struct ftrace_stacks {
2990 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2991 };
2992 
2993 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2994 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2995 
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2996 static void __ftrace_trace_stack(struct trace_array *tr,
2997 				 struct trace_buffer *buffer,
2998 				 unsigned int trace_ctx,
2999 				 int skip, struct pt_regs *regs)
3000 {
3001 	struct ring_buffer_event *event;
3002 	unsigned int size, nr_entries;
3003 	struct ftrace_stack *fstack;
3004 	struct stack_entry *entry;
3005 	int stackidx;
3006 
3007 	/*
3008 	 * Add one, for this function and the call to save_stack_trace()
3009 	 * If regs is set, then these functions will not be in the way.
3010 	 */
3011 #ifndef CONFIG_UNWINDER_ORC
3012 	if (!regs)
3013 		skip++;
3014 #endif
3015 
3016 	guard(preempt_notrace)();
3017 
3018 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3019 
3020 	/* This should never happen. If it does, yell once and skip */
3021 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3022 		goto out;
3023 
3024 	/*
3025 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3026 	 * interrupt will either see the value pre increment or post
3027 	 * increment. If the interrupt happens pre increment it will have
3028 	 * restored the counter when it returns.  We just need a barrier to
3029 	 * keep gcc from moving things around.
3030 	 */
3031 	barrier();
3032 
3033 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3034 	size = ARRAY_SIZE(fstack->calls);
3035 
3036 	if (regs) {
3037 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3038 						   size, skip);
3039 	} else {
3040 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3041 	}
3042 
3043 #ifdef CONFIG_DYNAMIC_FTRACE
3044 	/* Mark entry of stack trace as trampoline code */
3045 	if (tr->ops && tr->ops->trampoline) {
3046 		unsigned long tramp_start = tr->ops->trampoline;
3047 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3048 		unsigned long *calls = fstack->calls;
3049 
3050 		for (int i = 0; i < nr_entries; i++) {
3051 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
3052 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
3053 		}
3054 	}
3055 #endif
3056 
3057 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3058 				    struct_size(entry, caller, nr_entries),
3059 				    trace_ctx);
3060 	if (!event)
3061 		goto out;
3062 	entry = ring_buffer_event_data(event);
3063 
3064 	entry->size = nr_entries;
3065 	memcpy(&entry->caller, fstack->calls,
3066 	       flex_array_size(entry, caller, nr_entries));
3067 
3068 	__buffer_unlock_commit(buffer, event);
3069 
3070  out:
3071 	/* Again, don't let gcc optimize things here */
3072 	barrier();
3073 	__this_cpu_dec(ftrace_stack_reserve);
3074 }
3075 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3076 static inline void ftrace_trace_stack(struct trace_array *tr,
3077 				      struct trace_buffer *buffer,
3078 				      unsigned int trace_ctx,
3079 				      int skip, struct pt_regs *regs)
3080 {
3081 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3082 		return;
3083 
3084 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3085 }
3086 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3087 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3088 		   int skip)
3089 {
3090 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3091 
3092 	if (rcu_is_watching()) {
3093 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3094 		return;
3095 	}
3096 
3097 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3098 		return;
3099 
3100 	/*
3101 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3102 	 * but if the above rcu_is_watching() failed, then the NMI
3103 	 * triggered someplace critical, and ct_irq_enter() should
3104 	 * not be called from NMI.
3105 	 */
3106 	if (unlikely(in_nmi()))
3107 		return;
3108 
3109 	ct_irq_enter_irqson();
3110 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3111 	ct_irq_exit_irqson();
3112 }
3113 
3114 /**
3115  * trace_dump_stack - record a stack back trace in the trace buffer
3116  * @skip: Number of functions to skip (helper handlers)
3117  */
trace_dump_stack(int skip)3118 void trace_dump_stack(int skip)
3119 {
3120 	if (tracing_disabled || tracing_selftest_running)
3121 		return;
3122 
3123 #ifndef CONFIG_UNWINDER_ORC
3124 	/* Skip 1 to skip this function. */
3125 	skip++;
3126 #endif
3127 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3128 				tracing_gen_ctx(), skip, NULL);
3129 }
3130 EXPORT_SYMBOL_GPL(trace_dump_stack);
3131 
3132 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3133 static DEFINE_PER_CPU(int, user_stack_count);
3134 
3135 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3136 ftrace_trace_userstack(struct trace_array *tr,
3137 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3138 {
3139 	struct ring_buffer_event *event;
3140 	struct userstack_entry *entry;
3141 
3142 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3143 		return;
3144 
3145 	/*
3146 	 * NMIs can not handle page faults, even with fix ups.
3147 	 * The save user stack can (and often does) fault.
3148 	 */
3149 	if (unlikely(in_nmi()))
3150 		return;
3151 
3152 	/*
3153 	 * prevent recursion, since the user stack tracing may
3154 	 * trigger other kernel events.
3155 	 */
3156 	guard(preempt)();
3157 	if (__this_cpu_read(user_stack_count))
3158 		return;
3159 
3160 	__this_cpu_inc(user_stack_count);
3161 
3162 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3163 					    sizeof(*entry), trace_ctx);
3164 	if (!event)
3165 		goto out_drop_count;
3166 	entry	= ring_buffer_event_data(event);
3167 
3168 	entry->tgid		= current->tgid;
3169 	memset(&entry->caller, 0, sizeof(entry->caller));
3170 
3171 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3172 	__buffer_unlock_commit(buffer, event);
3173 
3174  out_drop_count:
3175 	__this_cpu_dec(user_stack_count);
3176 }
3177 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3178 static void ftrace_trace_userstack(struct trace_array *tr,
3179 				   struct trace_buffer *buffer,
3180 				   unsigned int trace_ctx)
3181 {
3182 }
3183 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3184 
3185 #endif /* CONFIG_STACKTRACE */
3186 
3187 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3188 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3189 			  unsigned long long delta)
3190 {
3191 	entry->bottom_delta_ts = delta & U32_MAX;
3192 	entry->top_delta_ts = (delta >> 32);
3193 }
3194 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3195 void trace_last_func_repeats(struct trace_array *tr,
3196 			     struct trace_func_repeats *last_info,
3197 			     unsigned int trace_ctx)
3198 {
3199 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3200 	struct func_repeats_entry *entry;
3201 	struct ring_buffer_event *event;
3202 	u64 delta;
3203 
3204 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3205 					    sizeof(*entry), trace_ctx);
3206 	if (!event)
3207 		return;
3208 
3209 	delta = ring_buffer_event_time_stamp(buffer, event) -
3210 		last_info->ts_last_call;
3211 
3212 	entry = ring_buffer_event_data(event);
3213 	entry->ip = last_info->ip;
3214 	entry->parent_ip = last_info->parent_ip;
3215 	entry->count = last_info->count;
3216 	func_repeats_set_delta_ts(entry, delta);
3217 
3218 	__buffer_unlock_commit(buffer, event);
3219 }
3220 
3221 /* created for use with alloc_percpu */
3222 struct trace_buffer_struct {
3223 	int nesting;
3224 	char buffer[4][TRACE_BUF_SIZE];
3225 };
3226 
3227 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3228 
3229 /*
3230  * This allows for lockless recording.  If we're nested too deeply, then
3231  * this returns NULL.
3232  */
get_trace_buf(void)3233 static char *get_trace_buf(void)
3234 {
3235 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3236 
3237 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3238 		return NULL;
3239 
3240 	buffer->nesting++;
3241 
3242 	/* Interrupts must see nesting incremented before we use the buffer */
3243 	barrier();
3244 	return &buffer->buffer[buffer->nesting - 1][0];
3245 }
3246 
put_trace_buf(void)3247 static void put_trace_buf(void)
3248 {
3249 	/* Don't let the decrement of nesting leak before this */
3250 	barrier();
3251 	this_cpu_dec(trace_percpu_buffer->nesting);
3252 }
3253 
alloc_percpu_trace_buffer(void)3254 static int alloc_percpu_trace_buffer(void)
3255 {
3256 	struct trace_buffer_struct __percpu *buffers;
3257 
3258 	if (trace_percpu_buffer)
3259 		return 0;
3260 
3261 	buffers = alloc_percpu(struct trace_buffer_struct);
3262 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3263 		return -ENOMEM;
3264 
3265 	trace_percpu_buffer = buffers;
3266 	return 0;
3267 }
3268 
3269 static int buffers_allocated;
3270 
trace_printk_init_buffers(void)3271 void trace_printk_init_buffers(void)
3272 {
3273 	if (buffers_allocated)
3274 		return;
3275 
3276 	if (alloc_percpu_trace_buffer())
3277 		return;
3278 
3279 	/* trace_printk() is for debug use only. Don't use it in production. */
3280 
3281 	pr_warn("\n");
3282 	pr_warn("**********************************************************\n");
3283 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3284 	pr_warn("**                                                      **\n");
3285 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3286 	pr_warn("**                                                      **\n");
3287 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3288 	pr_warn("** unsafe for production use.                           **\n");
3289 	pr_warn("**                                                      **\n");
3290 	pr_warn("** If you see this message and you are not debugging    **\n");
3291 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3292 	pr_warn("**                                                      **\n");
3293 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3294 	pr_warn("**********************************************************\n");
3295 
3296 	/* Expand the buffers to set size */
3297 	tracing_update_buffers(&global_trace);
3298 
3299 	buffers_allocated = 1;
3300 
3301 	/*
3302 	 * trace_printk_init_buffers() can be called by modules.
3303 	 * If that happens, then we need to start cmdline recording
3304 	 * directly here. If the global_trace.buffer is already
3305 	 * allocated here, then this was called by module code.
3306 	 */
3307 	if (global_trace.array_buffer.buffer)
3308 		tracing_start_cmdline_record();
3309 }
3310 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3311 
trace_printk_start_comm(void)3312 void trace_printk_start_comm(void)
3313 {
3314 	/* Start tracing comms if trace printk is set */
3315 	if (!buffers_allocated)
3316 		return;
3317 	tracing_start_cmdline_record();
3318 }
3319 
trace_printk_start_stop_comm(int enabled)3320 static void trace_printk_start_stop_comm(int enabled)
3321 {
3322 	if (!buffers_allocated)
3323 		return;
3324 
3325 	if (enabled)
3326 		tracing_start_cmdline_record();
3327 	else
3328 		tracing_stop_cmdline_record();
3329 }
3330 
3331 /**
3332  * trace_vbprintk - write binary msg to tracing buffer
3333  * @ip:    The address of the caller
3334  * @fmt:   The string format to write to the buffer
3335  * @args:  Arguments for @fmt
3336  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3337 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3338 {
3339 	struct ring_buffer_event *event;
3340 	struct trace_buffer *buffer;
3341 	struct trace_array *tr = READ_ONCE(printk_trace);
3342 	struct bprint_entry *entry;
3343 	unsigned int trace_ctx;
3344 	char *tbuffer;
3345 	int len = 0, size;
3346 
3347 	if (!printk_binsafe(tr))
3348 		return trace_vprintk(ip, fmt, args);
3349 
3350 	if (unlikely(tracing_selftest_running || tracing_disabled))
3351 		return 0;
3352 
3353 	/* Don't pollute graph traces with trace_vprintk internals */
3354 	pause_graph_tracing();
3355 
3356 	trace_ctx = tracing_gen_ctx();
3357 	guard(preempt_notrace)();
3358 
3359 	tbuffer = get_trace_buf();
3360 	if (!tbuffer) {
3361 		len = 0;
3362 		goto out_nobuffer;
3363 	}
3364 
3365 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3366 
3367 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3368 		goto out_put;
3369 
3370 	size = sizeof(*entry) + sizeof(u32) * len;
3371 	buffer = tr->array_buffer.buffer;
3372 	scoped_guard(ring_buffer_nest, buffer) {
3373 		event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3374 						    trace_ctx);
3375 		if (!event)
3376 			goto out_put;
3377 		entry = ring_buffer_event_data(event);
3378 		entry->ip			= ip;
3379 		entry->fmt			= fmt;
3380 
3381 		memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3382 		__buffer_unlock_commit(buffer, event);
3383 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3384 	}
3385 out_put:
3386 	put_trace_buf();
3387 
3388 out_nobuffer:
3389 	unpause_graph_tracing();
3390 
3391 	return len;
3392 }
3393 EXPORT_SYMBOL_GPL(trace_vbprintk);
3394 
3395 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3396 int __trace_array_vprintk(struct trace_buffer *buffer,
3397 			  unsigned long ip, const char *fmt, va_list args)
3398 {
3399 	struct ring_buffer_event *event;
3400 	int len = 0, size;
3401 	struct print_entry *entry;
3402 	unsigned int trace_ctx;
3403 	char *tbuffer;
3404 
3405 	if (tracing_disabled)
3406 		return 0;
3407 
3408 	/* Don't pollute graph traces with trace_vprintk internals */
3409 	pause_graph_tracing();
3410 
3411 	trace_ctx = tracing_gen_ctx();
3412 	guard(preempt_notrace)();
3413 
3414 
3415 	tbuffer = get_trace_buf();
3416 	if (!tbuffer) {
3417 		len = 0;
3418 		goto out_nobuffer;
3419 	}
3420 
3421 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3422 
3423 	size = sizeof(*entry) + len + 1;
3424 	scoped_guard(ring_buffer_nest, buffer) {
3425 		event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3426 						    trace_ctx);
3427 		if (!event)
3428 			goto out;
3429 		entry = ring_buffer_event_data(event);
3430 		entry->ip = ip;
3431 
3432 		memcpy(&entry->buf, tbuffer, len + 1);
3433 		__buffer_unlock_commit(buffer, event);
3434 		ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3435 	}
3436 out:
3437 	put_trace_buf();
3438 
3439 out_nobuffer:
3440 	unpause_graph_tracing();
3441 
3442 	return len;
3443 }
3444 
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3445 int trace_array_vprintk(struct trace_array *tr,
3446 			unsigned long ip, const char *fmt, va_list args)
3447 {
3448 	if (tracing_selftest_running && tr == &global_trace)
3449 		return 0;
3450 
3451 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3452 }
3453 
3454 /**
3455  * trace_array_printk - Print a message to a specific instance
3456  * @tr: The instance trace_array descriptor
3457  * @ip: The instruction pointer that this is called from.
3458  * @fmt: The format to print (printf format)
3459  *
3460  * If a subsystem sets up its own instance, they have the right to
3461  * printk strings into their tracing instance buffer using this
3462  * function. Note, this function will not write into the top level
3463  * buffer (use trace_printk() for that), as writing into the top level
3464  * buffer should only have events that can be individually disabled.
3465  * trace_printk() is only used for debugging a kernel, and should not
3466  * be ever incorporated in normal use.
3467  *
3468  * trace_array_printk() can be used, as it will not add noise to the
3469  * top level tracing buffer.
3470  *
3471  * Note, trace_array_init_printk() must be called on @tr before this
3472  * can be used.
3473  */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3474 int trace_array_printk(struct trace_array *tr,
3475 		       unsigned long ip, const char *fmt, ...)
3476 {
3477 	int ret;
3478 	va_list ap;
3479 
3480 	if (!tr)
3481 		return -ENOENT;
3482 
3483 	/* This is only allowed for created instances */
3484 	if (tr == &global_trace)
3485 		return 0;
3486 
3487 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3488 		return 0;
3489 
3490 	va_start(ap, fmt);
3491 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3492 	va_end(ap);
3493 	return ret;
3494 }
3495 EXPORT_SYMBOL_GPL(trace_array_printk);
3496 
3497 /**
3498  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3499  * @tr: The trace array to initialize the buffers for
3500  *
3501  * As trace_array_printk() only writes into instances, they are OK to
3502  * have in the kernel (unlike trace_printk()). This needs to be called
3503  * before trace_array_printk() can be used on a trace_array.
3504  */
trace_array_init_printk(struct trace_array * tr)3505 int trace_array_init_printk(struct trace_array *tr)
3506 {
3507 	if (!tr)
3508 		return -ENOENT;
3509 
3510 	/* This is only allowed for created instances */
3511 	if (tr == &global_trace)
3512 		return -EINVAL;
3513 
3514 	return alloc_percpu_trace_buffer();
3515 }
3516 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3517 
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3518 int trace_array_printk_buf(struct trace_buffer *buffer,
3519 			   unsigned long ip, const char *fmt, ...)
3520 {
3521 	int ret;
3522 	va_list ap;
3523 
3524 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3525 		return 0;
3526 
3527 	va_start(ap, fmt);
3528 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3529 	va_end(ap);
3530 	return ret;
3531 }
3532 
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3533 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3534 {
3535 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3536 }
3537 EXPORT_SYMBOL_GPL(trace_vprintk);
3538 
trace_iterator_increment(struct trace_iterator * iter)3539 static void trace_iterator_increment(struct trace_iterator *iter)
3540 {
3541 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3542 
3543 	iter->idx++;
3544 	if (buf_iter)
3545 		ring_buffer_iter_advance(buf_iter);
3546 }
3547 
3548 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3549 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3550 		unsigned long *lost_events)
3551 {
3552 	struct ring_buffer_event *event;
3553 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3554 
3555 	if (buf_iter) {
3556 		event = ring_buffer_iter_peek(buf_iter, ts);
3557 		if (lost_events)
3558 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3559 				(unsigned long)-1 : 0;
3560 	} else {
3561 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3562 					 lost_events);
3563 	}
3564 
3565 	if (event) {
3566 		iter->ent_size = ring_buffer_event_length(event);
3567 		return ring_buffer_event_data(event);
3568 	}
3569 	iter->ent_size = 0;
3570 	return NULL;
3571 }
3572 
3573 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3574 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3575 		  unsigned long *missing_events, u64 *ent_ts)
3576 {
3577 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3578 	struct trace_entry *ent, *next = NULL;
3579 	unsigned long lost_events = 0, next_lost = 0;
3580 	int cpu_file = iter->cpu_file;
3581 	u64 next_ts = 0, ts;
3582 	int next_cpu = -1;
3583 	int next_size = 0;
3584 	int cpu;
3585 
3586 	/*
3587 	 * If we are in a per_cpu trace file, don't bother by iterating over
3588 	 * all cpu and peek directly.
3589 	 */
3590 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3591 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3592 			return NULL;
3593 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3594 		if (ent_cpu)
3595 			*ent_cpu = cpu_file;
3596 
3597 		return ent;
3598 	}
3599 
3600 	for_each_tracing_cpu(cpu) {
3601 
3602 		if (ring_buffer_empty_cpu(buffer, cpu))
3603 			continue;
3604 
3605 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3606 
3607 		/*
3608 		 * Pick the entry with the smallest timestamp:
3609 		 */
3610 		if (ent && (!next || ts < next_ts)) {
3611 			next = ent;
3612 			next_cpu = cpu;
3613 			next_ts = ts;
3614 			next_lost = lost_events;
3615 			next_size = iter->ent_size;
3616 		}
3617 	}
3618 
3619 	iter->ent_size = next_size;
3620 
3621 	if (ent_cpu)
3622 		*ent_cpu = next_cpu;
3623 
3624 	if (ent_ts)
3625 		*ent_ts = next_ts;
3626 
3627 	if (missing_events)
3628 		*missing_events = next_lost;
3629 
3630 	return next;
3631 }
3632 
3633 #define STATIC_FMT_BUF_SIZE	128
3634 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3635 
trace_iter_expand_format(struct trace_iterator * iter)3636 char *trace_iter_expand_format(struct trace_iterator *iter)
3637 {
3638 	char *tmp;
3639 
3640 	/*
3641 	 * iter->tr is NULL when used with tp_printk, which makes
3642 	 * this get called where it is not safe to call krealloc().
3643 	 */
3644 	if (!iter->tr || iter->fmt == static_fmt_buf)
3645 		return NULL;
3646 
3647 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3648 		       GFP_KERNEL);
3649 	if (tmp) {
3650 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3651 		iter->fmt = tmp;
3652 	}
3653 
3654 	return tmp;
3655 }
3656 
3657 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3658 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3659 {
3660 	unsigned long addr = (unsigned long)str;
3661 	struct trace_event *trace_event;
3662 	struct trace_event_call *event;
3663 
3664 	/* OK if part of the event data */
3665 	if ((addr >= (unsigned long)iter->ent) &&
3666 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3667 		return true;
3668 
3669 	/* OK if part of the temp seq buffer */
3670 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3671 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3672 		return true;
3673 
3674 	/* Core rodata can not be freed */
3675 	if (is_kernel_rodata(addr))
3676 		return true;
3677 
3678 	if (trace_is_tracepoint_string(str))
3679 		return true;
3680 
3681 	/*
3682 	 * Now this could be a module event, referencing core module
3683 	 * data, which is OK.
3684 	 */
3685 	if (!iter->ent)
3686 		return false;
3687 
3688 	trace_event = ftrace_find_event(iter->ent->type);
3689 	if (!trace_event)
3690 		return false;
3691 
3692 	event = container_of(trace_event, struct trace_event_call, event);
3693 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3694 		return false;
3695 
3696 	/* Would rather have rodata, but this will suffice */
3697 	if (within_module_core(addr, event->module))
3698 		return true;
3699 
3700 	return false;
3701 }
3702 
3703 /**
3704  * ignore_event - Check dereferenced fields while writing to the seq buffer
3705  * @iter: The iterator that holds the seq buffer and the event being printed
3706  *
3707  * At boot up, test_event_printk() will flag any event that dereferences
3708  * a string with "%s" that does exist in the ring buffer. It may still
3709  * be valid, as the string may point to a static string in the kernel
3710  * rodata that never gets freed. But if the string pointer is pointing
3711  * to something that was allocated, there's a chance that it can be freed
3712  * by the time the user reads the trace. This would cause a bad memory
3713  * access by the kernel and possibly crash the system.
3714  *
3715  * This function will check if the event has any fields flagged as needing
3716  * to be checked at runtime and perform those checks.
3717  *
3718  * If it is found that a field is unsafe, it will write into the @iter->seq
3719  * a message stating what was found to be unsafe.
3720  *
3721  * @return: true if the event is unsafe and should be ignored,
3722  *          false otherwise.
3723  */
ignore_event(struct trace_iterator * iter)3724 bool ignore_event(struct trace_iterator *iter)
3725 {
3726 	struct ftrace_event_field *field;
3727 	struct trace_event *trace_event;
3728 	struct trace_event_call *event;
3729 	struct list_head *head;
3730 	struct trace_seq *seq;
3731 	const void *ptr;
3732 
3733 	trace_event = ftrace_find_event(iter->ent->type);
3734 
3735 	seq = &iter->seq;
3736 
3737 	if (!trace_event) {
3738 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3739 		return true;
3740 	}
3741 
3742 	event = container_of(trace_event, struct trace_event_call, event);
3743 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3744 		return false;
3745 
3746 	head = trace_get_fields(event);
3747 	if (!head) {
3748 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3749 				 trace_event_name(event));
3750 		return true;
3751 	}
3752 
3753 	/* Offsets are from the iter->ent that points to the raw event */
3754 	ptr = iter->ent;
3755 
3756 	list_for_each_entry(field, head, link) {
3757 		const char *str;
3758 		bool good;
3759 
3760 		if (!field->needs_test)
3761 			continue;
3762 
3763 		str = *(const char **)(ptr + field->offset);
3764 
3765 		good = trace_safe_str(iter, str);
3766 
3767 		/*
3768 		 * If you hit this warning, it is likely that the
3769 		 * trace event in question used %s on a string that
3770 		 * was saved at the time of the event, but may not be
3771 		 * around when the trace is read. Use __string(),
3772 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3773 		 * instead. See samples/trace_events/trace-events-sample.h
3774 		 * for reference.
3775 		 */
3776 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3777 			      trace_event_name(event), field->name)) {
3778 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3779 					 trace_event_name(event), field->name);
3780 			return true;
3781 		}
3782 	}
3783 	return false;
3784 }
3785 
trace_event_format(struct trace_iterator * iter,const char * fmt)3786 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3787 {
3788 	const char *p, *new_fmt;
3789 	char *q;
3790 
3791 	if (WARN_ON_ONCE(!fmt))
3792 		return fmt;
3793 
3794 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3795 		return fmt;
3796 
3797 	p = fmt;
3798 	new_fmt = q = iter->fmt;
3799 	while (*p) {
3800 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3801 			if (!trace_iter_expand_format(iter))
3802 				return fmt;
3803 
3804 			q += iter->fmt - new_fmt;
3805 			new_fmt = iter->fmt;
3806 		}
3807 
3808 		*q++ = *p++;
3809 
3810 		/* Replace %p with %px */
3811 		if (p[-1] == '%') {
3812 			if (p[0] == '%') {
3813 				*q++ = *p++;
3814 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3815 				*q++ = *p++;
3816 				*q++ = 'x';
3817 			}
3818 		}
3819 	}
3820 	*q = '\0';
3821 
3822 	return new_fmt;
3823 }
3824 
3825 #define STATIC_TEMP_BUF_SIZE	128
3826 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3827 
3828 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3829 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3830 					  int *ent_cpu, u64 *ent_ts)
3831 {
3832 	/* __find_next_entry will reset ent_size */
3833 	int ent_size = iter->ent_size;
3834 	struct trace_entry *entry;
3835 
3836 	/*
3837 	 * If called from ftrace_dump(), then the iter->temp buffer
3838 	 * will be the static_temp_buf and not created from kmalloc.
3839 	 * If the entry size is greater than the buffer, we can
3840 	 * not save it. Just return NULL in that case. This is only
3841 	 * used to add markers when two consecutive events' time
3842 	 * stamps have a large delta. See trace_print_lat_context()
3843 	 */
3844 	if (iter->temp == static_temp_buf &&
3845 	    STATIC_TEMP_BUF_SIZE < ent_size)
3846 		return NULL;
3847 
3848 	/*
3849 	 * The __find_next_entry() may call peek_next_entry(), which may
3850 	 * call ring_buffer_peek() that may make the contents of iter->ent
3851 	 * undefined. Need to copy iter->ent now.
3852 	 */
3853 	if (iter->ent && iter->ent != iter->temp) {
3854 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3855 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3856 			void *temp;
3857 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3858 			if (!temp)
3859 				return NULL;
3860 			kfree(iter->temp);
3861 			iter->temp = temp;
3862 			iter->temp_size = iter->ent_size;
3863 		}
3864 		memcpy(iter->temp, iter->ent, iter->ent_size);
3865 		iter->ent = iter->temp;
3866 	}
3867 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3868 	/* Put back the original ent_size */
3869 	iter->ent_size = ent_size;
3870 
3871 	return entry;
3872 }
3873 
3874 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3875 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3876 {
3877 	iter->ent = __find_next_entry(iter, &iter->cpu,
3878 				      &iter->lost_events, &iter->ts);
3879 
3880 	if (iter->ent)
3881 		trace_iterator_increment(iter);
3882 
3883 	return iter->ent ? iter : NULL;
3884 }
3885 
trace_consume(struct trace_iterator * iter)3886 static void trace_consume(struct trace_iterator *iter)
3887 {
3888 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3889 			    &iter->lost_events);
3890 }
3891 
s_next(struct seq_file * m,void * v,loff_t * pos)3892 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3893 {
3894 	struct trace_iterator *iter = m->private;
3895 	int i = (int)*pos;
3896 	void *ent;
3897 
3898 	WARN_ON_ONCE(iter->leftover);
3899 
3900 	(*pos)++;
3901 
3902 	/* can't go backwards */
3903 	if (iter->idx > i)
3904 		return NULL;
3905 
3906 	if (iter->idx < 0)
3907 		ent = trace_find_next_entry_inc(iter);
3908 	else
3909 		ent = iter;
3910 
3911 	while (ent && iter->idx < i)
3912 		ent = trace_find_next_entry_inc(iter);
3913 
3914 	iter->pos = *pos;
3915 
3916 	return ent;
3917 }
3918 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3919 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3920 {
3921 	struct ring_buffer_iter *buf_iter;
3922 	unsigned long entries = 0;
3923 	u64 ts;
3924 
3925 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3926 
3927 	buf_iter = trace_buffer_iter(iter, cpu);
3928 	if (!buf_iter)
3929 		return;
3930 
3931 	ring_buffer_iter_reset(buf_iter);
3932 
3933 	/*
3934 	 * We could have the case with the max latency tracers
3935 	 * that a reset never took place on a cpu. This is evident
3936 	 * by the timestamp being before the start of the buffer.
3937 	 */
3938 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3939 		if (ts >= iter->array_buffer->time_start)
3940 			break;
3941 		entries++;
3942 		ring_buffer_iter_advance(buf_iter);
3943 		/* This could be a big loop */
3944 		cond_resched();
3945 	}
3946 
3947 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3948 }
3949 
3950 /*
3951  * The current tracer is copied to avoid a global locking
3952  * all around.
3953  */
s_start(struct seq_file * m,loff_t * pos)3954 static void *s_start(struct seq_file *m, loff_t *pos)
3955 {
3956 	struct trace_iterator *iter = m->private;
3957 	struct trace_array *tr = iter->tr;
3958 	int cpu_file = iter->cpu_file;
3959 	void *p = NULL;
3960 	loff_t l = 0;
3961 	int cpu;
3962 
3963 	mutex_lock(&trace_types_lock);
3964 	if (unlikely(tr->current_trace != iter->trace)) {
3965 		/* Close iter->trace before switching to the new current tracer */
3966 		if (iter->trace->close)
3967 			iter->trace->close(iter);
3968 		iter->trace = tr->current_trace;
3969 		/* Reopen the new current tracer */
3970 		if (iter->trace->open)
3971 			iter->trace->open(iter);
3972 	}
3973 	mutex_unlock(&trace_types_lock);
3974 
3975 #ifdef CONFIG_TRACER_MAX_TRACE
3976 	if (iter->snapshot && iter->trace->use_max_tr)
3977 		return ERR_PTR(-EBUSY);
3978 #endif
3979 
3980 	if (*pos != iter->pos) {
3981 		iter->ent = NULL;
3982 		iter->cpu = 0;
3983 		iter->idx = -1;
3984 
3985 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3986 			for_each_tracing_cpu(cpu)
3987 				tracing_iter_reset(iter, cpu);
3988 		} else
3989 			tracing_iter_reset(iter, cpu_file);
3990 
3991 		iter->leftover = 0;
3992 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3993 			;
3994 
3995 	} else {
3996 		/*
3997 		 * If we overflowed the seq_file before, then we want
3998 		 * to just reuse the trace_seq buffer again.
3999 		 */
4000 		if (iter->leftover)
4001 			p = iter;
4002 		else {
4003 			l = *pos - 1;
4004 			p = s_next(m, p, &l);
4005 		}
4006 	}
4007 
4008 	trace_event_read_lock();
4009 	trace_access_lock(cpu_file);
4010 	return p;
4011 }
4012 
s_stop(struct seq_file * m,void * p)4013 static void s_stop(struct seq_file *m, void *p)
4014 {
4015 	struct trace_iterator *iter = m->private;
4016 
4017 #ifdef CONFIG_TRACER_MAX_TRACE
4018 	if (iter->snapshot && iter->trace->use_max_tr)
4019 		return;
4020 #endif
4021 
4022 	trace_access_unlock(iter->cpu_file);
4023 	trace_event_read_unlock();
4024 }
4025 
4026 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4027 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4028 		      unsigned long *entries, int cpu)
4029 {
4030 	unsigned long count;
4031 
4032 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4033 	/*
4034 	 * If this buffer has skipped entries, then we hold all
4035 	 * entries for the trace and we need to ignore the
4036 	 * ones before the time stamp.
4037 	 */
4038 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4039 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4040 		/* total is the same as the entries */
4041 		*total = count;
4042 	} else
4043 		*total = count +
4044 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4045 	*entries = count;
4046 }
4047 
4048 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4049 get_total_entries(struct array_buffer *buf,
4050 		  unsigned long *total, unsigned long *entries)
4051 {
4052 	unsigned long t, e;
4053 	int cpu;
4054 
4055 	*total = 0;
4056 	*entries = 0;
4057 
4058 	for_each_tracing_cpu(cpu) {
4059 		get_total_entries_cpu(buf, &t, &e, cpu);
4060 		*total += t;
4061 		*entries += e;
4062 	}
4063 }
4064 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4065 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4066 {
4067 	unsigned long total, entries;
4068 
4069 	if (!tr)
4070 		tr = &global_trace;
4071 
4072 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4073 
4074 	return entries;
4075 }
4076 
trace_total_entries(struct trace_array * tr)4077 unsigned long trace_total_entries(struct trace_array *tr)
4078 {
4079 	unsigned long total, entries;
4080 
4081 	if (!tr)
4082 		tr = &global_trace;
4083 
4084 	get_total_entries(&tr->array_buffer, &total, &entries);
4085 
4086 	return entries;
4087 }
4088 
print_lat_help_header(struct seq_file * m)4089 static void print_lat_help_header(struct seq_file *m)
4090 {
4091 	seq_puts(m, "#                    _------=> CPU#            \n"
4092 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4093 		    "#                  | / _----=> need-resched    \n"
4094 		    "#                  || / _---=> hardirq/softirq \n"
4095 		    "#                  ||| / _--=> preempt-depth   \n"
4096 		    "#                  |||| / _-=> migrate-disable \n"
4097 		    "#                  ||||| /     delay           \n"
4098 		    "#  cmd     pid     |||||| time  |   caller     \n"
4099 		    "#     \\   /        ||||||  \\    |    /       \n");
4100 }
4101 
print_event_info(struct array_buffer * buf,struct seq_file * m)4102 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4103 {
4104 	unsigned long total;
4105 	unsigned long entries;
4106 
4107 	get_total_entries(buf, &total, &entries);
4108 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4109 		   entries, total, num_online_cpus());
4110 	seq_puts(m, "#\n");
4111 }
4112 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4113 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4114 				   unsigned int flags)
4115 {
4116 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4117 
4118 	print_event_info(buf, m);
4119 
4120 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4121 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4122 }
4123 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4124 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4125 				       unsigned int flags)
4126 {
4127 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4128 	static const char space[] = "            ";
4129 	int prec = tgid ? 12 : 2;
4130 
4131 	print_event_info(buf, m);
4132 
4133 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4134 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4135 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4136 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4137 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4138 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4139 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4140 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4141 }
4142 
4143 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4144 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4145 {
4146 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4147 	struct array_buffer *buf = iter->array_buffer;
4148 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4149 	struct tracer *type = iter->trace;
4150 	unsigned long entries;
4151 	unsigned long total;
4152 	const char *name = type->name;
4153 
4154 	get_total_entries(buf, &total, &entries);
4155 
4156 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4157 		   name, init_utsname()->release);
4158 	seq_puts(m, "# -----------------------------------"
4159 		 "---------------------------------\n");
4160 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4161 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4162 		   nsecs_to_usecs(data->saved_latency),
4163 		   entries,
4164 		   total,
4165 		   buf->cpu,
4166 		   preempt_model_str(),
4167 		   /* These are reserved for later use */
4168 		   0, 0, 0, 0);
4169 #ifdef CONFIG_SMP
4170 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4171 #else
4172 	seq_puts(m, ")\n");
4173 #endif
4174 	seq_puts(m, "#    -----------------\n");
4175 	seq_printf(m, "#    | task: %.16s-%d "
4176 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4177 		   data->comm, data->pid,
4178 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4179 		   data->policy, data->rt_priority);
4180 	seq_puts(m, "#    -----------------\n");
4181 
4182 	if (data->critical_start) {
4183 		seq_puts(m, "#  => started at: ");
4184 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4185 		trace_print_seq(m, &iter->seq);
4186 		seq_puts(m, "\n#  => ended at:   ");
4187 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4188 		trace_print_seq(m, &iter->seq);
4189 		seq_puts(m, "\n#\n");
4190 	}
4191 
4192 	seq_puts(m, "#\n");
4193 }
4194 
test_cpu_buff_start(struct trace_iterator * iter)4195 static void test_cpu_buff_start(struct trace_iterator *iter)
4196 {
4197 	struct trace_seq *s = &iter->seq;
4198 	struct trace_array *tr = iter->tr;
4199 
4200 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4201 		return;
4202 
4203 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4204 		return;
4205 
4206 	if (cpumask_available(iter->started) &&
4207 	    cpumask_test_cpu(iter->cpu, iter->started))
4208 		return;
4209 
4210 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4211 		return;
4212 
4213 	if (cpumask_available(iter->started))
4214 		cpumask_set_cpu(iter->cpu, iter->started);
4215 
4216 	/* Don't print started cpu buffer for the first entry of the trace */
4217 	if (iter->idx > 1)
4218 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4219 				iter->cpu);
4220 }
4221 
print_trace_fmt(struct trace_iterator * iter)4222 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4223 {
4224 	struct trace_array *tr = iter->tr;
4225 	struct trace_seq *s = &iter->seq;
4226 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4227 	struct trace_entry *entry;
4228 	struct trace_event *event;
4229 
4230 	entry = iter->ent;
4231 
4232 	test_cpu_buff_start(iter);
4233 
4234 	event = ftrace_find_event(entry->type);
4235 
4236 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4237 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4238 			trace_print_lat_context(iter);
4239 		else
4240 			trace_print_context(iter);
4241 	}
4242 
4243 	if (trace_seq_has_overflowed(s))
4244 		return TRACE_TYPE_PARTIAL_LINE;
4245 
4246 	if (event) {
4247 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4248 			return print_event_fields(iter, event);
4249 		/*
4250 		 * For TRACE_EVENT() events, the print_fmt is not
4251 		 * safe to use if the array has delta offsets
4252 		 * Force printing via the fields.
4253 		 */
4254 		if ((tr->text_delta) &&
4255 		    event->type > __TRACE_LAST_TYPE)
4256 			return print_event_fields(iter, event);
4257 
4258 		return event->funcs->trace(iter, sym_flags, event);
4259 	}
4260 
4261 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4262 
4263 	return trace_handle_return(s);
4264 }
4265 
print_raw_fmt(struct trace_iterator * iter)4266 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4267 {
4268 	struct trace_array *tr = iter->tr;
4269 	struct trace_seq *s = &iter->seq;
4270 	struct trace_entry *entry;
4271 	struct trace_event *event;
4272 
4273 	entry = iter->ent;
4274 
4275 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4276 		trace_seq_printf(s, "%d %d %llu ",
4277 				 entry->pid, iter->cpu, iter->ts);
4278 
4279 	if (trace_seq_has_overflowed(s))
4280 		return TRACE_TYPE_PARTIAL_LINE;
4281 
4282 	event = ftrace_find_event(entry->type);
4283 	if (event)
4284 		return event->funcs->raw(iter, 0, event);
4285 
4286 	trace_seq_printf(s, "%d ?\n", entry->type);
4287 
4288 	return trace_handle_return(s);
4289 }
4290 
print_hex_fmt(struct trace_iterator * iter)4291 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4292 {
4293 	struct trace_array *tr = iter->tr;
4294 	struct trace_seq *s = &iter->seq;
4295 	unsigned char newline = '\n';
4296 	struct trace_entry *entry;
4297 	struct trace_event *event;
4298 
4299 	entry = iter->ent;
4300 
4301 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4302 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4303 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4304 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4305 		if (trace_seq_has_overflowed(s))
4306 			return TRACE_TYPE_PARTIAL_LINE;
4307 	}
4308 
4309 	event = ftrace_find_event(entry->type);
4310 	if (event) {
4311 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4312 		if (ret != TRACE_TYPE_HANDLED)
4313 			return ret;
4314 	}
4315 
4316 	SEQ_PUT_FIELD(s, newline);
4317 
4318 	return trace_handle_return(s);
4319 }
4320 
print_bin_fmt(struct trace_iterator * iter)4321 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4322 {
4323 	struct trace_array *tr = iter->tr;
4324 	struct trace_seq *s = &iter->seq;
4325 	struct trace_entry *entry;
4326 	struct trace_event *event;
4327 
4328 	entry = iter->ent;
4329 
4330 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4331 		SEQ_PUT_FIELD(s, entry->pid);
4332 		SEQ_PUT_FIELD(s, iter->cpu);
4333 		SEQ_PUT_FIELD(s, iter->ts);
4334 		if (trace_seq_has_overflowed(s))
4335 			return TRACE_TYPE_PARTIAL_LINE;
4336 	}
4337 
4338 	event = ftrace_find_event(entry->type);
4339 	return event ? event->funcs->binary(iter, 0, event) :
4340 		TRACE_TYPE_HANDLED;
4341 }
4342 
trace_empty(struct trace_iterator * iter)4343 int trace_empty(struct trace_iterator *iter)
4344 {
4345 	struct ring_buffer_iter *buf_iter;
4346 	int cpu;
4347 
4348 	/* If we are looking at one CPU buffer, only check that one */
4349 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4350 		cpu = iter->cpu_file;
4351 		buf_iter = trace_buffer_iter(iter, cpu);
4352 		if (buf_iter) {
4353 			if (!ring_buffer_iter_empty(buf_iter))
4354 				return 0;
4355 		} else {
4356 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4357 				return 0;
4358 		}
4359 		return 1;
4360 	}
4361 
4362 	for_each_tracing_cpu(cpu) {
4363 		buf_iter = trace_buffer_iter(iter, cpu);
4364 		if (buf_iter) {
4365 			if (!ring_buffer_iter_empty(buf_iter))
4366 				return 0;
4367 		} else {
4368 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4369 				return 0;
4370 		}
4371 	}
4372 
4373 	return 1;
4374 }
4375 
4376 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4377 enum print_line_t print_trace_line(struct trace_iterator *iter)
4378 {
4379 	struct trace_array *tr = iter->tr;
4380 	unsigned long trace_flags = tr->trace_flags;
4381 	enum print_line_t ret;
4382 
4383 	if (iter->lost_events) {
4384 		if (iter->lost_events == (unsigned long)-1)
4385 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4386 					 iter->cpu);
4387 		else
4388 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4389 					 iter->cpu, iter->lost_events);
4390 		if (trace_seq_has_overflowed(&iter->seq))
4391 			return TRACE_TYPE_PARTIAL_LINE;
4392 	}
4393 
4394 	if (iter->trace && iter->trace->print_line) {
4395 		ret = iter->trace->print_line(iter);
4396 		if (ret != TRACE_TYPE_UNHANDLED)
4397 			return ret;
4398 	}
4399 
4400 	if (iter->ent->type == TRACE_BPUTS &&
4401 			trace_flags & TRACE_ITER_PRINTK &&
4402 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4403 		return trace_print_bputs_msg_only(iter);
4404 
4405 	if (iter->ent->type == TRACE_BPRINT &&
4406 			trace_flags & TRACE_ITER_PRINTK &&
4407 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4408 		return trace_print_bprintk_msg_only(iter);
4409 
4410 	if (iter->ent->type == TRACE_PRINT &&
4411 			trace_flags & TRACE_ITER_PRINTK &&
4412 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4413 		return trace_print_printk_msg_only(iter);
4414 
4415 	if (trace_flags & TRACE_ITER_BIN)
4416 		return print_bin_fmt(iter);
4417 
4418 	if (trace_flags & TRACE_ITER_HEX)
4419 		return print_hex_fmt(iter);
4420 
4421 	if (trace_flags & TRACE_ITER_RAW)
4422 		return print_raw_fmt(iter);
4423 
4424 	return print_trace_fmt(iter);
4425 }
4426 
trace_latency_header(struct seq_file * m)4427 void trace_latency_header(struct seq_file *m)
4428 {
4429 	struct trace_iterator *iter = m->private;
4430 	struct trace_array *tr = iter->tr;
4431 
4432 	/* print nothing if the buffers are empty */
4433 	if (trace_empty(iter))
4434 		return;
4435 
4436 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4437 		print_trace_header(m, iter);
4438 
4439 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4440 		print_lat_help_header(m);
4441 }
4442 
trace_default_header(struct seq_file * m)4443 void trace_default_header(struct seq_file *m)
4444 {
4445 	struct trace_iterator *iter = m->private;
4446 	struct trace_array *tr = iter->tr;
4447 	unsigned long trace_flags = tr->trace_flags;
4448 
4449 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4450 		return;
4451 
4452 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4453 		/* print nothing if the buffers are empty */
4454 		if (trace_empty(iter))
4455 			return;
4456 		print_trace_header(m, iter);
4457 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4458 			print_lat_help_header(m);
4459 	} else {
4460 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4461 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4462 				print_func_help_header_irq(iter->array_buffer,
4463 							   m, trace_flags);
4464 			else
4465 				print_func_help_header(iter->array_buffer, m,
4466 						       trace_flags);
4467 		}
4468 	}
4469 }
4470 
test_ftrace_alive(struct seq_file * m)4471 static void test_ftrace_alive(struct seq_file *m)
4472 {
4473 	if (!ftrace_is_dead())
4474 		return;
4475 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4476 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4477 }
4478 
4479 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4480 static void show_snapshot_main_help(struct seq_file *m)
4481 {
4482 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4483 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4484 		    "#                      Takes a snapshot of the main buffer.\n"
4485 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4486 		    "#                      (Doesn't have to be '2' works with any number that\n"
4487 		    "#                       is not a '0' or '1')\n");
4488 }
4489 
show_snapshot_percpu_help(struct seq_file * m)4490 static void show_snapshot_percpu_help(struct seq_file *m)
4491 {
4492 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4493 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4494 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4495 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4496 #else
4497 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4498 		    "#                     Must use main snapshot file to allocate.\n");
4499 #endif
4500 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4501 		    "#                      (Doesn't have to be '2' works with any number that\n"
4502 		    "#                       is not a '0' or '1')\n");
4503 }
4504 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4505 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4506 {
4507 	if (iter->tr->allocated_snapshot)
4508 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4509 	else
4510 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4511 
4512 	seq_puts(m, "# Snapshot commands:\n");
4513 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4514 		show_snapshot_main_help(m);
4515 	else
4516 		show_snapshot_percpu_help(m);
4517 }
4518 #else
4519 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4520 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4521 #endif
4522 
s_show(struct seq_file * m,void * v)4523 static int s_show(struct seq_file *m, void *v)
4524 {
4525 	struct trace_iterator *iter = v;
4526 	int ret;
4527 
4528 	if (iter->ent == NULL) {
4529 		if (iter->tr) {
4530 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4531 			seq_puts(m, "#\n");
4532 			test_ftrace_alive(m);
4533 		}
4534 		if (iter->snapshot && trace_empty(iter))
4535 			print_snapshot_help(m, iter);
4536 		else if (iter->trace && iter->trace->print_header)
4537 			iter->trace->print_header(m);
4538 		else
4539 			trace_default_header(m);
4540 
4541 	} else if (iter->leftover) {
4542 		/*
4543 		 * If we filled the seq_file buffer earlier, we
4544 		 * want to just show it now.
4545 		 */
4546 		ret = trace_print_seq(m, &iter->seq);
4547 
4548 		/* ret should this time be zero, but you never know */
4549 		iter->leftover = ret;
4550 
4551 	} else {
4552 		ret = print_trace_line(iter);
4553 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4554 			iter->seq.full = 0;
4555 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4556 		}
4557 		ret = trace_print_seq(m, &iter->seq);
4558 		/*
4559 		 * If we overflow the seq_file buffer, then it will
4560 		 * ask us for this data again at start up.
4561 		 * Use that instead.
4562 		 *  ret is 0 if seq_file write succeeded.
4563 		 *        -1 otherwise.
4564 		 */
4565 		iter->leftover = ret;
4566 	}
4567 
4568 	return 0;
4569 }
4570 
4571 /*
4572  * Should be used after trace_array_get(), trace_types_lock
4573  * ensures that i_cdev was already initialized.
4574  */
tracing_get_cpu(struct inode * inode)4575 static inline int tracing_get_cpu(struct inode *inode)
4576 {
4577 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4578 		return (long)inode->i_cdev - 1;
4579 	return RING_BUFFER_ALL_CPUS;
4580 }
4581 
4582 static const struct seq_operations tracer_seq_ops = {
4583 	.start		= s_start,
4584 	.next		= s_next,
4585 	.stop		= s_stop,
4586 	.show		= s_show,
4587 };
4588 
4589 /*
4590  * Note, as iter itself can be allocated and freed in different
4591  * ways, this function is only used to free its content, and not
4592  * the iterator itself. The only requirement to all the allocations
4593  * is that it must zero all fields (kzalloc), as freeing works with
4594  * ethier allocated content or NULL.
4595  */
free_trace_iter_content(struct trace_iterator * iter)4596 static void free_trace_iter_content(struct trace_iterator *iter)
4597 {
4598 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4599 	if (iter->fmt != static_fmt_buf)
4600 		kfree(iter->fmt);
4601 
4602 	kfree(iter->temp);
4603 	kfree(iter->buffer_iter);
4604 	mutex_destroy(&iter->mutex);
4605 	free_cpumask_var(iter->started);
4606 }
4607 
4608 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4609 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4610 {
4611 	struct trace_array *tr = inode->i_private;
4612 	struct trace_iterator *iter;
4613 	int cpu;
4614 
4615 	if (tracing_disabled)
4616 		return ERR_PTR(-ENODEV);
4617 
4618 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4619 	if (!iter)
4620 		return ERR_PTR(-ENOMEM);
4621 
4622 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4623 				    GFP_KERNEL);
4624 	if (!iter->buffer_iter)
4625 		goto release;
4626 
4627 	/*
4628 	 * trace_find_next_entry() may need to save off iter->ent.
4629 	 * It will place it into the iter->temp buffer. As most
4630 	 * events are less than 128, allocate a buffer of that size.
4631 	 * If one is greater, then trace_find_next_entry() will
4632 	 * allocate a new buffer to adjust for the bigger iter->ent.
4633 	 * It's not critical if it fails to get allocated here.
4634 	 */
4635 	iter->temp = kmalloc(128, GFP_KERNEL);
4636 	if (iter->temp)
4637 		iter->temp_size = 128;
4638 
4639 	/*
4640 	 * trace_event_printf() may need to modify given format
4641 	 * string to replace %p with %px so that it shows real address
4642 	 * instead of hash value. However, that is only for the event
4643 	 * tracing, other tracer may not need. Defer the allocation
4644 	 * until it is needed.
4645 	 */
4646 	iter->fmt = NULL;
4647 	iter->fmt_size = 0;
4648 
4649 	mutex_lock(&trace_types_lock);
4650 	iter->trace = tr->current_trace;
4651 
4652 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4653 		goto fail;
4654 
4655 	iter->tr = tr;
4656 
4657 #ifdef CONFIG_TRACER_MAX_TRACE
4658 	/* Currently only the top directory has a snapshot */
4659 	if (tr->current_trace->print_max || snapshot)
4660 		iter->array_buffer = &tr->max_buffer;
4661 	else
4662 #endif
4663 		iter->array_buffer = &tr->array_buffer;
4664 	iter->snapshot = snapshot;
4665 	iter->pos = -1;
4666 	iter->cpu_file = tracing_get_cpu(inode);
4667 	mutex_init(&iter->mutex);
4668 
4669 	/* Notify the tracer early; before we stop tracing. */
4670 	if (iter->trace->open)
4671 		iter->trace->open(iter);
4672 
4673 	/* Annotate start of buffers if we had overruns */
4674 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4675 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4676 
4677 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4678 	if (trace_clocks[tr->clock_id].in_ns)
4679 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4680 
4681 	/*
4682 	 * If pause-on-trace is enabled, then stop the trace while
4683 	 * dumping, unless this is the "snapshot" file
4684 	 */
4685 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4686 		tracing_stop_tr(tr);
4687 
4688 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4689 		for_each_tracing_cpu(cpu) {
4690 			iter->buffer_iter[cpu] =
4691 				ring_buffer_read_start(iter->array_buffer->buffer,
4692 						       cpu, GFP_KERNEL);
4693 			tracing_iter_reset(iter, cpu);
4694 		}
4695 	} else {
4696 		cpu = iter->cpu_file;
4697 		iter->buffer_iter[cpu] =
4698 			ring_buffer_read_start(iter->array_buffer->buffer,
4699 					       cpu, GFP_KERNEL);
4700 		tracing_iter_reset(iter, cpu);
4701 	}
4702 
4703 	mutex_unlock(&trace_types_lock);
4704 
4705 	return iter;
4706 
4707  fail:
4708 	mutex_unlock(&trace_types_lock);
4709 	free_trace_iter_content(iter);
4710 release:
4711 	seq_release_private(inode, file);
4712 	return ERR_PTR(-ENOMEM);
4713 }
4714 
tracing_open_generic(struct inode * inode,struct file * filp)4715 int tracing_open_generic(struct inode *inode, struct file *filp)
4716 {
4717 	int ret;
4718 
4719 	ret = tracing_check_open_get_tr(NULL);
4720 	if (ret)
4721 		return ret;
4722 
4723 	filp->private_data = inode->i_private;
4724 	return 0;
4725 }
4726 
tracing_is_disabled(void)4727 bool tracing_is_disabled(void)
4728 {
4729 	return (tracing_disabled) ? true: false;
4730 }
4731 
4732 /*
4733  * Open and update trace_array ref count.
4734  * Must have the current trace_array passed to it.
4735  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4736 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4737 {
4738 	struct trace_array *tr = inode->i_private;
4739 	int ret;
4740 
4741 	ret = tracing_check_open_get_tr(tr);
4742 	if (ret)
4743 		return ret;
4744 
4745 	filp->private_data = inode->i_private;
4746 
4747 	return 0;
4748 }
4749 
4750 /*
4751  * The private pointer of the inode is the trace_event_file.
4752  * Update the tr ref count associated to it.
4753  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4754 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4755 {
4756 	struct trace_event_file *file = inode->i_private;
4757 	int ret;
4758 
4759 	ret = tracing_check_open_get_tr(file->tr);
4760 	if (ret)
4761 		return ret;
4762 
4763 	guard(mutex)(&event_mutex);
4764 
4765 	/* Fail if the file is marked for removal */
4766 	if (file->flags & EVENT_FILE_FL_FREED) {
4767 		trace_array_put(file->tr);
4768 		return -ENODEV;
4769 	} else {
4770 		event_file_get(file);
4771 	}
4772 
4773 	filp->private_data = inode->i_private;
4774 
4775 	return 0;
4776 }
4777 
tracing_release_file_tr(struct inode * inode,struct file * filp)4778 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4779 {
4780 	struct trace_event_file *file = inode->i_private;
4781 
4782 	trace_array_put(file->tr);
4783 	event_file_put(file);
4784 
4785 	return 0;
4786 }
4787 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4788 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4789 {
4790 	tracing_release_file_tr(inode, filp);
4791 	return single_release(inode, filp);
4792 }
4793 
tracing_mark_open(struct inode * inode,struct file * filp)4794 static int tracing_mark_open(struct inode *inode, struct file *filp)
4795 {
4796 	stream_open(inode, filp);
4797 	return tracing_open_generic_tr(inode, filp);
4798 }
4799 
tracing_release(struct inode * inode,struct file * file)4800 static int tracing_release(struct inode *inode, struct file *file)
4801 {
4802 	struct trace_array *tr = inode->i_private;
4803 	struct seq_file *m = file->private_data;
4804 	struct trace_iterator *iter;
4805 	int cpu;
4806 
4807 	if (!(file->f_mode & FMODE_READ)) {
4808 		trace_array_put(tr);
4809 		return 0;
4810 	}
4811 
4812 	/* Writes do not use seq_file */
4813 	iter = m->private;
4814 	mutex_lock(&trace_types_lock);
4815 
4816 	for_each_tracing_cpu(cpu) {
4817 		if (iter->buffer_iter[cpu])
4818 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4819 	}
4820 
4821 	if (iter->trace && iter->trace->close)
4822 		iter->trace->close(iter);
4823 
4824 	if (!iter->snapshot && tr->stop_count)
4825 		/* reenable tracing if it was previously enabled */
4826 		tracing_start_tr(tr);
4827 
4828 	__trace_array_put(tr);
4829 
4830 	mutex_unlock(&trace_types_lock);
4831 
4832 	free_trace_iter_content(iter);
4833 	seq_release_private(inode, file);
4834 
4835 	return 0;
4836 }
4837 
tracing_release_generic_tr(struct inode * inode,struct file * file)4838 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4839 {
4840 	struct trace_array *tr = inode->i_private;
4841 
4842 	trace_array_put(tr);
4843 	return 0;
4844 }
4845 
tracing_single_release_tr(struct inode * inode,struct file * file)4846 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4847 {
4848 	struct trace_array *tr = inode->i_private;
4849 
4850 	trace_array_put(tr);
4851 
4852 	return single_release(inode, file);
4853 }
4854 
tracing_open(struct inode * inode,struct file * file)4855 static int tracing_open(struct inode *inode, struct file *file)
4856 {
4857 	struct trace_array *tr = inode->i_private;
4858 	struct trace_iterator *iter;
4859 	int ret;
4860 
4861 	ret = tracing_check_open_get_tr(tr);
4862 	if (ret)
4863 		return ret;
4864 
4865 	/* If this file was open for write, then erase contents */
4866 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4867 		int cpu = tracing_get_cpu(inode);
4868 		struct array_buffer *trace_buf = &tr->array_buffer;
4869 
4870 #ifdef CONFIG_TRACER_MAX_TRACE
4871 		if (tr->current_trace->print_max)
4872 			trace_buf = &tr->max_buffer;
4873 #endif
4874 
4875 		if (cpu == RING_BUFFER_ALL_CPUS)
4876 			tracing_reset_online_cpus(trace_buf);
4877 		else
4878 			tracing_reset_cpu(trace_buf, cpu);
4879 	}
4880 
4881 	if (file->f_mode & FMODE_READ) {
4882 		iter = __tracing_open(inode, file, false);
4883 		if (IS_ERR(iter))
4884 			ret = PTR_ERR(iter);
4885 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4886 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4887 	}
4888 
4889 	if (ret < 0)
4890 		trace_array_put(tr);
4891 
4892 	return ret;
4893 }
4894 
4895 /*
4896  * Some tracers are not suitable for instance buffers.
4897  * A tracer is always available for the global array (toplevel)
4898  * or if it explicitly states that it is.
4899  */
4900 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4901 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4902 {
4903 #ifdef CONFIG_TRACER_SNAPSHOT
4904 	/* arrays with mapped buffer range do not have snapshots */
4905 	if (tr->range_addr_start && t->use_max_tr)
4906 		return false;
4907 #endif
4908 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4909 }
4910 
4911 /* Find the next tracer that this trace array may use */
4912 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4913 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4914 {
4915 	while (t && !trace_ok_for_array(t, tr))
4916 		t = t->next;
4917 
4918 	return t;
4919 }
4920 
4921 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4922 t_next(struct seq_file *m, void *v, loff_t *pos)
4923 {
4924 	struct trace_array *tr = m->private;
4925 	struct tracer *t = v;
4926 
4927 	(*pos)++;
4928 
4929 	if (t)
4930 		t = get_tracer_for_array(tr, t->next);
4931 
4932 	return t;
4933 }
4934 
t_start(struct seq_file * m,loff_t * pos)4935 static void *t_start(struct seq_file *m, loff_t *pos)
4936 {
4937 	struct trace_array *tr = m->private;
4938 	struct tracer *t;
4939 	loff_t l = 0;
4940 
4941 	mutex_lock(&trace_types_lock);
4942 
4943 	t = get_tracer_for_array(tr, trace_types);
4944 	for (; t && l < *pos; t = t_next(m, t, &l))
4945 			;
4946 
4947 	return t;
4948 }
4949 
t_stop(struct seq_file * m,void * p)4950 static void t_stop(struct seq_file *m, void *p)
4951 {
4952 	mutex_unlock(&trace_types_lock);
4953 }
4954 
t_show(struct seq_file * m,void * v)4955 static int t_show(struct seq_file *m, void *v)
4956 {
4957 	struct tracer *t = v;
4958 
4959 	if (!t)
4960 		return 0;
4961 
4962 	seq_puts(m, t->name);
4963 	if (t->next)
4964 		seq_putc(m, ' ');
4965 	else
4966 		seq_putc(m, '\n');
4967 
4968 	return 0;
4969 }
4970 
4971 static const struct seq_operations show_traces_seq_ops = {
4972 	.start		= t_start,
4973 	.next		= t_next,
4974 	.stop		= t_stop,
4975 	.show		= t_show,
4976 };
4977 
show_traces_open(struct inode * inode,struct file * file)4978 static int show_traces_open(struct inode *inode, struct file *file)
4979 {
4980 	struct trace_array *tr = inode->i_private;
4981 	struct seq_file *m;
4982 	int ret;
4983 
4984 	ret = tracing_check_open_get_tr(tr);
4985 	if (ret)
4986 		return ret;
4987 
4988 	ret = seq_open(file, &show_traces_seq_ops);
4989 	if (ret) {
4990 		trace_array_put(tr);
4991 		return ret;
4992 	}
4993 
4994 	m = file->private_data;
4995 	m->private = tr;
4996 
4997 	return 0;
4998 }
4999 
tracing_seq_release(struct inode * inode,struct file * file)5000 static int tracing_seq_release(struct inode *inode, struct file *file)
5001 {
5002 	struct trace_array *tr = inode->i_private;
5003 
5004 	trace_array_put(tr);
5005 	return seq_release(inode, file);
5006 }
5007 
5008 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5009 tracing_write_stub(struct file *filp, const char __user *ubuf,
5010 		   size_t count, loff_t *ppos)
5011 {
5012 	return count;
5013 }
5014 
tracing_lseek(struct file * file,loff_t offset,int whence)5015 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5016 {
5017 	int ret;
5018 
5019 	if (file->f_mode & FMODE_READ)
5020 		ret = seq_lseek(file, offset, whence);
5021 	else
5022 		file->f_pos = ret = 0;
5023 
5024 	return ret;
5025 }
5026 
5027 static const struct file_operations tracing_fops = {
5028 	.open		= tracing_open,
5029 	.read		= seq_read,
5030 	.read_iter	= seq_read_iter,
5031 	.splice_read	= copy_splice_read,
5032 	.write		= tracing_write_stub,
5033 	.llseek		= tracing_lseek,
5034 	.release	= tracing_release,
5035 };
5036 
5037 static const struct file_operations show_traces_fops = {
5038 	.open		= show_traces_open,
5039 	.read		= seq_read,
5040 	.llseek		= seq_lseek,
5041 	.release	= tracing_seq_release,
5042 };
5043 
5044 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5045 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5046 		     size_t count, loff_t *ppos)
5047 {
5048 	struct trace_array *tr = file_inode(filp)->i_private;
5049 	char *mask_str __free(kfree) = NULL;
5050 	int len;
5051 
5052 	len = snprintf(NULL, 0, "%*pb\n",
5053 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5054 	mask_str = kmalloc(len, GFP_KERNEL);
5055 	if (!mask_str)
5056 		return -ENOMEM;
5057 
5058 	len = snprintf(mask_str, len, "%*pb\n",
5059 		       cpumask_pr_args(tr->tracing_cpumask));
5060 	if (len >= count)
5061 		return -EINVAL;
5062 
5063 	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5064 }
5065 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5066 int tracing_set_cpumask(struct trace_array *tr,
5067 			cpumask_var_t tracing_cpumask_new)
5068 {
5069 	int cpu;
5070 
5071 	if (!tr)
5072 		return -EINVAL;
5073 
5074 	local_irq_disable();
5075 	arch_spin_lock(&tr->max_lock);
5076 	for_each_tracing_cpu(cpu) {
5077 		/*
5078 		 * Increase/decrease the disabled counter if we are
5079 		 * about to flip a bit in the cpumask:
5080 		 */
5081 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5082 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5083 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5084 #ifdef CONFIG_TRACER_MAX_TRACE
5085 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5086 #endif
5087 		}
5088 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5089 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5090 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5091 #ifdef CONFIG_TRACER_MAX_TRACE
5092 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5093 #endif
5094 		}
5095 	}
5096 	arch_spin_unlock(&tr->max_lock);
5097 	local_irq_enable();
5098 
5099 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5100 
5101 	return 0;
5102 }
5103 
5104 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5105 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5106 		      size_t count, loff_t *ppos)
5107 {
5108 	struct trace_array *tr = file_inode(filp)->i_private;
5109 	cpumask_var_t tracing_cpumask_new;
5110 	int err;
5111 
5112 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5113 		return -EINVAL;
5114 
5115 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5116 		return -ENOMEM;
5117 
5118 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5119 	if (err)
5120 		goto err_free;
5121 
5122 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5123 	if (err)
5124 		goto err_free;
5125 
5126 	free_cpumask_var(tracing_cpumask_new);
5127 
5128 	return count;
5129 
5130 err_free:
5131 	free_cpumask_var(tracing_cpumask_new);
5132 
5133 	return err;
5134 }
5135 
5136 static const struct file_operations tracing_cpumask_fops = {
5137 	.open		= tracing_open_generic_tr,
5138 	.read		= tracing_cpumask_read,
5139 	.write		= tracing_cpumask_write,
5140 	.release	= tracing_release_generic_tr,
5141 	.llseek		= generic_file_llseek,
5142 };
5143 
tracing_trace_options_show(struct seq_file * m,void * v)5144 static int tracing_trace_options_show(struct seq_file *m, void *v)
5145 {
5146 	struct tracer_opt *trace_opts;
5147 	struct trace_array *tr = m->private;
5148 	u32 tracer_flags;
5149 	int i;
5150 
5151 	guard(mutex)(&trace_types_lock);
5152 
5153 	tracer_flags = tr->current_trace->flags->val;
5154 	trace_opts = tr->current_trace->flags->opts;
5155 
5156 	for (i = 0; trace_options[i]; i++) {
5157 		if (tr->trace_flags & (1 << i))
5158 			seq_printf(m, "%s\n", trace_options[i]);
5159 		else
5160 			seq_printf(m, "no%s\n", trace_options[i]);
5161 	}
5162 
5163 	for (i = 0; trace_opts[i].name; i++) {
5164 		if (tracer_flags & trace_opts[i].bit)
5165 			seq_printf(m, "%s\n", trace_opts[i].name);
5166 		else
5167 			seq_printf(m, "no%s\n", trace_opts[i].name);
5168 	}
5169 
5170 	return 0;
5171 }
5172 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5173 static int __set_tracer_option(struct trace_array *tr,
5174 			       struct tracer_flags *tracer_flags,
5175 			       struct tracer_opt *opts, int neg)
5176 {
5177 	struct tracer *trace = tracer_flags->trace;
5178 	int ret;
5179 
5180 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5181 	if (ret)
5182 		return ret;
5183 
5184 	if (neg)
5185 		tracer_flags->val &= ~opts->bit;
5186 	else
5187 		tracer_flags->val |= opts->bit;
5188 	return 0;
5189 }
5190 
5191 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5192 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5193 {
5194 	struct tracer *trace = tr->current_trace;
5195 	struct tracer_flags *tracer_flags = trace->flags;
5196 	struct tracer_opt *opts = NULL;
5197 	int i;
5198 
5199 	for (i = 0; tracer_flags->opts[i].name; i++) {
5200 		opts = &tracer_flags->opts[i];
5201 
5202 		if (strcmp(cmp, opts->name) == 0)
5203 			return __set_tracer_option(tr, trace->flags, opts, neg);
5204 	}
5205 
5206 	return -EINVAL;
5207 }
5208 
5209 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5210 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5211 {
5212 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5213 		return -1;
5214 
5215 	return 0;
5216 }
5217 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5218 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5219 {
5220 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5221 	    (mask == TRACE_ITER_RECORD_CMD) ||
5222 	    (mask == TRACE_ITER_TRACE_PRINTK) ||
5223 	    (mask == TRACE_ITER_COPY_MARKER))
5224 		lockdep_assert_held(&event_mutex);
5225 
5226 	/* do nothing if flag is already set */
5227 	if (!!(tr->trace_flags & mask) == !!enabled)
5228 		return 0;
5229 
5230 	/* Give the tracer a chance to approve the change */
5231 	if (tr->current_trace->flag_changed)
5232 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5233 			return -EINVAL;
5234 
5235 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5236 		if (enabled) {
5237 			update_printk_trace(tr);
5238 		} else {
5239 			/*
5240 			 * The global_trace cannot clear this.
5241 			 * It's flag only gets cleared if another instance sets it.
5242 			 */
5243 			if (printk_trace == &global_trace)
5244 				return -EINVAL;
5245 			/*
5246 			 * An instance must always have it set.
5247 			 * by default, that's the global_trace instane.
5248 			 */
5249 			if (printk_trace == tr)
5250 				update_printk_trace(&global_trace);
5251 		}
5252 	}
5253 
5254 	if (mask == TRACE_ITER_COPY_MARKER)
5255 		update_marker_trace(tr, enabled);
5256 
5257 	if (enabled)
5258 		tr->trace_flags |= mask;
5259 	else
5260 		tr->trace_flags &= ~mask;
5261 
5262 	if (mask == TRACE_ITER_RECORD_CMD)
5263 		trace_event_enable_cmd_record(enabled);
5264 
5265 	if (mask == TRACE_ITER_RECORD_TGID) {
5266 
5267 		if (trace_alloc_tgid_map() < 0) {
5268 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5269 			return -ENOMEM;
5270 		}
5271 
5272 		trace_event_enable_tgid_record(enabled);
5273 	}
5274 
5275 	if (mask == TRACE_ITER_EVENT_FORK)
5276 		trace_event_follow_fork(tr, enabled);
5277 
5278 	if (mask == TRACE_ITER_FUNC_FORK)
5279 		ftrace_pid_follow_fork(tr, enabled);
5280 
5281 	if (mask == TRACE_ITER_OVERWRITE) {
5282 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5283 #ifdef CONFIG_TRACER_MAX_TRACE
5284 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5285 #endif
5286 	}
5287 
5288 	if (mask == TRACE_ITER_PRINTK) {
5289 		trace_printk_start_stop_comm(enabled);
5290 		trace_printk_control(enabled);
5291 	}
5292 
5293 	return 0;
5294 }
5295 
trace_set_options(struct trace_array * tr,char * option)5296 int trace_set_options(struct trace_array *tr, char *option)
5297 {
5298 	char *cmp;
5299 	int neg = 0;
5300 	int ret;
5301 	size_t orig_len = strlen(option);
5302 	int len;
5303 
5304 	cmp = strstrip(option);
5305 
5306 	len = str_has_prefix(cmp, "no");
5307 	if (len)
5308 		neg = 1;
5309 
5310 	cmp += len;
5311 
5312 	mutex_lock(&event_mutex);
5313 	mutex_lock(&trace_types_lock);
5314 
5315 	ret = match_string(trace_options, -1, cmp);
5316 	/* If no option could be set, test the specific tracer options */
5317 	if (ret < 0)
5318 		ret = set_tracer_option(tr, cmp, neg);
5319 	else
5320 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5321 
5322 	mutex_unlock(&trace_types_lock);
5323 	mutex_unlock(&event_mutex);
5324 
5325 	/*
5326 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5327 	 * turn it back into a space.
5328 	 */
5329 	if (orig_len > strlen(option))
5330 		option[strlen(option)] = ' ';
5331 
5332 	return ret;
5333 }
5334 
apply_trace_boot_options(void)5335 static void __init apply_trace_boot_options(void)
5336 {
5337 	char *buf = trace_boot_options_buf;
5338 	char *option;
5339 
5340 	while (true) {
5341 		option = strsep(&buf, ",");
5342 
5343 		if (!option)
5344 			break;
5345 
5346 		if (*option)
5347 			trace_set_options(&global_trace, option);
5348 
5349 		/* Put back the comma to allow this to be called again */
5350 		if (buf)
5351 			*(buf - 1) = ',';
5352 	}
5353 }
5354 
5355 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5356 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5357 			size_t cnt, loff_t *ppos)
5358 {
5359 	struct seq_file *m = filp->private_data;
5360 	struct trace_array *tr = m->private;
5361 	char buf[64];
5362 	int ret;
5363 
5364 	if (cnt >= sizeof(buf))
5365 		return -EINVAL;
5366 
5367 	if (copy_from_user(buf, ubuf, cnt))
5368 		return -EFAULT;
5369 
5370 	buf[cnt] = 0;
5371 
5372 	ret = trace_set_options(tr, buf);
5373 	if (ret < 0)
5374 		return ret;
5375 
5376 	*ppos += cnt;
5377 
5378 	return cnt;
5379 }
5380 
tracing_trace_options_open(struct inode * inode,struct file * file)5381 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5382 {
5383 	struct trace_array *tr = inode->i_private;
5384 	int ret;
5385 
5386 	ret = tracing_check_open_get_tr(tr);
5387 	if (ret)
5388 		return ret;
5389 
5390 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5391 	if (ret < 0)
5392 		trace_array_put(tr);
5393 
5394 	return ret;
5395 }
5396 
5397 static const struct file_operations tracing_iter_fops = {
5398 	.open		= tracing_trace_options_open,
5399 	.read		= seq_read,
5400 	.llseek		= seq_lseek,
5401 	.release	= tracing_single_release_tr,
5402 	.write		= tracing_trace_options_write,
5403 };
5404 
5405 static const char readme_msg[] =
5406 	"tracing mini-HOWTO:\n\n"
5407 	"By default tracefs removes all OTH file permission bits.\n"
5408 	"When mounting tracefs an optional group id can be specified\n"
5409 	"which adds the group to every directory and file in tracefs:\n\n"
5410 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5411 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5412 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5413 	" Important files:\n"
5414 	"  trace\t\t\t- The static contents of the buffer\n"
5415 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5416 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5417 	"  current_tracer\t- function and latency tracers\n"
5418 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5419 	"  error_log\t- error log for failed commands (that support it)\n"
5420 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5421 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5422 	"  trace_clock\t\t- change the clock used to order events\n"
5423 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5424 	"      global:   Synced across CPUs but slows tracing down.\n"
5425 	"     counter:   Not a clock, but just an increment\n"
5426 	"      uptime:   Jiffy counter from time of boot\n"
5427 	"        perf:   Same clock that perf events use\n"
5428 #ifdef CONFIG_X86_64
5429 	"     x86-tsc:   TSC cycle counter\n"
5430 #endif
5431 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5432 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5433 	"    absolute:   Absolute (standalone) timestamp\n"
5434 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5435 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5436 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5437 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5438 	"\t\t\t  Remove sub-buffer with rmdir\n"
5439 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5440 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5441 	"\t\t\t  option name\n"
5442 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5443 #ifdef CONFIG_DYNAMIC_FTRACE
5444 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5445 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5446 	"\t\t\t  functions\n"
5447 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5448 	"\t     modules: Can select a group via module\n"
5449 	"\t      Format: :mod:<module-name>\n"
5450 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5451 	"\t    triggers: a command to perform when function is hit\n"
5452 	"\t      Format: <function>:<trigger>[:count]\n"
5453 	"\t     trigger: traceon, traceoff\n"
5454 	"\t\t      enable_event:<system>:<event>\n"
5455 	"\t\t      disable_event:<system>:<event>\n"
5456 #ifdef CONFIG_STACKTRACE
5457 	"\t\t      stacktrace\n"
5458 #endif
5459 #ifdef CONFIG_TRACER_SNAPSHOT
5460 	"\t\t      snapshot\n"
5461 #endif
5462 	"\t\t      dump\n"
5463 	"\t\t      cpudump\n"
5464 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5465 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5466 	"\t     The first one will disable tracing every time do_fault is hit\n"
5467 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5468 	"\t       The first time do trap is hit and it disables tracing, the\n"
5469 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5470 	"\t       the counter will not decrement. It only decrements when the\n"
5471 	"\t       trigger did work\n"
5472 	"\t     To remove trigger without count:\n"
5473 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5474 	"\t     To remove trigger with a count:\n"
5475 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5476 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5477 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5478 	"\t    modules: Can select a group via module command :mod:\n"
5479 	"\t    Does not accept triggers\n"
5480 #endif /* CONFIG_DYNAMIC_FTRACE */
5481 #ifdef CONFIG_FUNCTION_TRACER
5482 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5483 	"\t\t    (function)\n"
5484 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5485 	"\t\t    (function)\n"
5486 #endif
5487 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5488 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5489 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5490 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5491 #endif
5492 #ifdef CONFIG_TRACER_SNAPSHOT
5493 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5494 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5495 	"\t\t\t  information\n"
5496 #endif
5497 #ifdef CONFIG_STACK_TRACER
5498 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5499 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5500 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5501 	"\t\t\t  new trace)\n"
5502 #ifdef CONFIG_DYNAMIC_FTRACE
5503 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5504 	"\t\t\t  traces\n"
5505 #endif
5506 #endif /* CONFIG_STACK_TRACER */
5507 #ifdef CONFIG_DYNAMIC_EVENTS
5508 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5509 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5510 #endif
5511 #ifdef CONFIG_KPROBE_EVENTS
5512 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5513 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5514 #endif
5515 #ifdef CONFIG_UPROBE_EVENTS
5516 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5517 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5518 #endif
5519 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5520     defined(CONFIG_FPROBE_EVENTS)
5521 	"\t  accepts: event-definitions (one definition per line)\n"
5522 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5523 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5524 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5525 #endif
5526 #ifdef CONFIG_FPROBE_EVENTS
5527 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5528 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5529 #endif
5530 #ifdef CONFIG_HIST_TRIGGERS
5531 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5532 #endif
5533 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5534 	"\t           -:[<group>/][<event>]\n"
5535 #ifdef CONFIG_KPROBE_EVENTS
5536 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5537   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5538 #endif
5539 #ifdef CONFIG_UPROBE_EVENTS
5540   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5541 #endif
5542 	"\t     args: <name>=fetcharg[:type]\n"
5543 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5544 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5545 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5546 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5547 	"\t           <argname>[->field[->field|.field...]],\n"
5548 #endif
5549 #else
5550 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5551 #endif
5552 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5553 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5554 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5555 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5556 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5557 #ifdef CONFIG_HIST_TRIGGERS
5558 	"\t    field: <stype> <name>;\n"
5559 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5560 	"\t           [unsigned] char/int/long\n"
5561 #endif
5562 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5563 	"\t            of the <attached-group>/<attached-event>.\n"
5564 #endif
5565 	"  set_event\t\t- Enables events by name written into it\n"
5566 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5567 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5568 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5569 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5570 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5571 	"\t\t\t  events\n"
5572 	"      filter\t\t- If set, only events passing filter are traced\n"
5573 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5574 	"\t\t\t  <event>:\n"
5575 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5576 	"      filter\t\t- If set, only events passing filter are traced\n"
5577 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5578 	"\t    Format: <trigger>[:count][if <filter>]\n"
5579 	"\t   trigger: traceon, traceoff\n"
5580 	"\t            enable_event:<system>:<event>\n"
5581 	"\t            disable_event:<system>:<event>\n"
5582 #ifdef CONFIG_HIST_TRIGGERS
5583 	"\t            enable_hist:<system>:<event>\n"
5584 	"\t            disable_hist:<system>:<event>\n"
5585 #endif
5586 #ifdef CONFIG_STACKTRACE
5587 	"\t\t    stacktrace\n"
5588 #endif
5589 #ifdef CONFIG_TRACER_SNAPSHOT
5590 	"\t\t    snapshot\n"
5591 #endif
5592 #ifdef CONFIG_HIST_TRIGGERS
5593 	"\t\t    hist (see below)\n"
5594 #endif
5595 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5596 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5597 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5598 	"\t                  events/block/block_unplug/trigger\n"
5599 	"\t   The first disables tracing every time block_unplug is hit.\n"
5600 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5601 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5602 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5603 	"\t   Like function triggers, the counter is only decremented if it\n"
5604 	"\t    enabled or disabled tracing.\n"
5605 	"\t   To remove a trigger without a count:\n"
5606 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5607 	"\t   To remove a trigger with a count:\n"
5608 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5609 	"\t   Filters can be ignored when removing a trigger.\n"
5610 #ifdef CONFIG_HIST_TRIGGERS
5611 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5612 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5613 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5614 	"\t            [:values=<field1[,field2,...]>]\n"
5615 	"\t            [:sort=<field1[,field2,...]>]\n"
5616 	"\t            [:size=#entries]\n"
5617 	"\t            [:pause][:continue][:clear]\n"
5618 	"\t            [:name=histname1]\n"
5619 	"\t            [:nohitcount]\n"
5620 	"\t            [:<handler>.<action>]\n"
5621 	"\t            [if <filter>]\n\n"
5622 	"\t    Note, special fields can be used as well:\n"
5623 	"\t            common_timestamp - to record current timestamp\n"
5624 	"\t            common_cpu - to record the CPU the event happened on\n"
5625 	"\n"
5626 	"\t    A hist trigger variable can be:\n"
5627 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5628 	"\t        - a reference to another variable e.g. y=$x,\n"
5629 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5630 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5631 	"\n"
5632 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5633 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5634 	"\t    variable reference, field or numeric literal.\n"
5635 	"\n"
5636 	"\t    When a matching event is hit, an entry is added to a hash\n"
5637 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5638 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5639 	"\t    correspond to fields in the event's format description.  Keys\n"
5640 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5641 	"\t    Compound keys consisting of up to two fields can be specified\n"
5642 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5643 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5644 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5645 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5646 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5647 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5648 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5649 	"\t    its histogram data will be shared with other triggers of the\n"
5650 	"\t    same name, and trigger hits will update this common data.\n\n"
5651 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5652 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5653 	"\t    triggers attached to an event, there will be a table for each\n"
5654 	"\t    trigger in the output.  The table displayed for a named\n"
5655 	"\t    trigger will be the same as any other instance having the\n"
5656 	"\t    same name.  The default format used to display a given field\n"
5657 	"\t    can be modified by appending any of the following modifiers\n"
5658 	"\t    to the field name, as applicable:\n\n"
5659 	"\t            .hex        display a number as a hex value\n"
5660 	"\t            .sym        display an address as a symbol\n"
5661 	"\t            .sym-offset display an address as a symbol and offset\n"
5662 	"\t            .execname   display a common_pid as a program name\n"
5663 	"\t            .syscall    display a syscall id as a syscall name\n"
5664 	"\t            .log2       display log2 value rather than raw number\n"
5665 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5666 	"\t            .usecs      display a common_timestamp in microseconds\n"
5667 	"\t            .percent    display a number of percentage value\n"
5668 	"\t            .graph      display a bar-graph of a value\n\n"
5669 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5670 	"\t    trigger or to start a hist trigger but not log any events\n"
5671 	"\t    until told to do so.  'continue' can be used to start or\n"
5672 	"\t    restart a paused hist trigger.\n\n"
5673 	"\t    The 'clear' parameter will clear the contents of a running\n"
5674 	"\t    hist trigger and leave its current paused/active state\n"
5675 	"\t    unchanged.\n\n"
5676 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5677 	"\t    raw hitcount in the histogram.\n\n"
5678 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5679 	"\t    have one event conditionally start and stop another event's\n"
5680 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5681 	"\t    the enable_event and disable_event triggers.\n\n"
5682 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5683 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5684 	"\t        <handler>.<action>\n\n"
5685 	"\t    The available handlers are:\n\n"
5686 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5687 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5688 	"\t        onchange(var)            - invoke action if var changes\n\n"
5689 	"\t    The available actions are:\n\n"
5690 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5691 	"\t        save(field,...)                      - save current event fields\n"
5692 #ifdef CONFIG_TRACER_SNAPSHOT
5693 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5694 #endif
5695 #ifdef CONFIG_SYNTH_EVENTS
5696 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5697 	"\t  Write into this file to define/undefine new synthetic events.\n"
5698 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5699 #endif
5700 #endif
5701 ;
5702 
5703 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5704 tracing_readme_read(struct file *filp, char __user *ubuf,
5705 		       size_t cnt, loff_t *ppos)
5706 {
5707 	return simple_read_from_buffer(ubuf, cnt, ppos,
5708 					readme_msg, strlen(readme_msg));
5709 }
5710 
5711 static const struct file_operations tracing_readme_fops = {
5712 	.open		= tracing_open_generic,
5713 	.read		= tracing_readme_read,
5714 	.llseek		= generic_file_llseek,
5715 };
5716 
5717 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5718 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5719 update_eval_map(union trace_eval_map_item *ptr)
5720 {
5721 	if (!ptr->map.eval_string) {
5722 		if (ptr->tail.next) {
5723 			ptr = ptr->tail.next;
5724 			/* Set ptr to the next real item (skip head) */
5725 			ptr++;
5726 		} else
5727 			return NULL;
5728 	}
5729 	return ptr;
5730 }
5731 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5732 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5733 {
5734 	union trace_eval_map_item *ptr = v;
5735 
5736 	/*
5737 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5738 	 * This really should never happen.
5739 	 */
5740 	(*pos)++;
5741 	ptr = update_eval_map(ptr);
5742 	if (WARN_ON_ONCE(!ptr))
5743 		return NULL;
5744 
5745 	ptr++;
5746 	ptr = update_eval_map(ptr);
5747 
5748 	return ptr;
5749 }
5750 
eval_map_start(struct seq_file * m,loff_t * pos)5751 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5752 {
5753 	union trace_eval_map_item *v;
5754 	loff_t l = 0;
5755 
5756 	mutex_lock(&trace_eval_mutex);
5757 
5758 	v = trace_eval_maps;
5759 	if (v)
5760 		v++;
5761 
5762 	while (v && l < *pos) {
5763 		v = eval_map_next(m, v, &l);
5764 	}
5765 
5766 	return v;
5767 }
5768 
eval_map_stop(struct seq_file * m,void * v)5769 static void eval_map_stop(struct seq_file *m, void *v)
5770 {
5771 	mutex_unlock(&trace_eval_mutex);
5772 }
5773 
eval_map_show(struct seq_file * m,void * v)5774 static int eval_map_show(struct seq_file *m, void *v)
5775 {
5776 	union trace_eval_map_item *ptr = v;
5777 
5778 	seq_printf(m, "%s %ld (%s)\n",
5779 		   ptr->map.eval_string, ptr->map.eval_value,
5780 		   ptr->map.system);
5781 
5782 	return 0;
5783 }
5784 
5785 static const struct seq_operations tracing_eval_map_seq_ops = {
5786 	.start		= eval_map_start,
5787 	.next		= eval_map_next,
5788 	.stop		= eval_map_stop,
5789 	.show		= eval_map_show,
5790 };
5791 
tracing_eval_map_open(struct inode * inode,struct file * filp)5792 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5793 {
5794 	int ret;
5795 
5796 	ret = tracing_check_open_get_tr(NULL);
5797 	if (ret)
5798 		return ret;
5799 
5800 	return seq_open(filp, &tracing_eval_map_seq_ops);
5801 }
5802 
5803 static const struct file_operations tracing_eval_map_fops = {
5804 	.open		= tracing_eval_map_open,
5805 	.read		= seq_read,
5806 	.llseek		= seq_lseek,
5807 	.release	= seq_release,
5808 };
5809 
5810 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5811 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5812 {
5813 	/* Return tail of array given the head */
5814 	return ptr + ptr->head.length + 1;
5815 }
5816 
5817 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5818 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5819 			   int len)
5820 {
5821 	struct trace_eval_map **stop;
5822 	struct trace_eval_map **map;
5823 	union trace_eval_map_item *map_array;
5824 	union trace_eval_map_item *ptr;
5825 
5826 	stop = start + len;
5827 
5828 	/*
5829 	 * The trace_eval_maps contains the map plus a head and tail item,
5830 	 * where the head holds the module and length of array, and the
5831 	 * tail holds a pointer to the next list.
5832 	 */
5833 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5834 	if (!map_array) {
5835 		pr_warn("Unable to allocate trace eval mapping\n");
5836 		return;
5837 	}
5838 
5839 	guard(mutex)(&trace_eval_mutex);
5840 
5841 	if (!trace_eval_maps)
5842 		trace_eval_maps = map_array;
5843 	else {
5844 		ptr = trace_eval_maps;
5845 		for (;;) {
5846 			ptr = trace_eval_jmp_to_tail(ptr);
5847 			if (!ptr->tail.next)
5848 				break;
5849 			ptr = ptr->tail.next;
5850 
5851 		}
5852 		ptr->tail.next = map_array;
5853 	}
5854 	map_array->head.mod = mod;
5855 	map_array->head.length = len;
5856 	map_array++;
5857 
5858 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5859 		map_array->map = **map;
5860 		map_array++;
5861 	}
5862 	memset(map_array, 0, sizeof(*map_array));
5863 }
5864 
trace_create_eval_file(struct dentry * d_tracer)5865 static void trace_create_eval_file(struct dentry *d_tracer)
5866 {
5867 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5868 			  NULL, &tracing_eval_map_fops);
5869 }
5870 
5871 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5872 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5873 static inline void trace_insert_eval_map_file(struct module *mod,
5874 			      struct trace_eval_map **start, int len) { }
5875 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5876 
5877 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5878 trace_event_update_with_eval_map(struct module *mod,
5879 				 struct trace_eval_map **start,
5880 				 int len)
5881 {
5882 	struct trace_eval_map **map;
5883 
5884 	/* Always run sanitizer only if btf_type_tag attr exists. */
5885 	if (len <= 0) {
5886 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5887 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5888 		      __has_attribute(btf_type_tag)))
5889 			return;
5890 	}
5891 
5892 	map = start;
5893 
5894 	trace_event_update_all(map, len);
5895 
5896 	if (len <= 0)
5897 		return;
5898 
5899 	trace_insert_eval_map_file(mod, start, len);
5900 }
5901 
5902 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5903 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5904 		       size_t cnt, loff_t *ppos)
5905 {
5906 	struct trace_array *tr = filp->private_data;
5907 	char buf[MAX_TRACER_SIZE+2];
5908 	int r;
5909 
5910 	scoped_guard(mutex, &trace_types_lock) {
5911 		r = sprintf(buf, "%s\n", tr->current_trace->name);
5912 	}
5913 
5914 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5915 }
5916 
tracer_init(struct tracer * t,struct trace_array * tr)5917 int tracer_init(struct tracer *t, struct trace_array *tr)
5918 {
5919 	tracing_reset_online_cpus(&tr->array_buffer);
5920 	return t->init(tr);
5921 }
5922 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5923 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5924 {
5925 	int cpu;
5926 
5927 	for_each_tracing_cpu(cpu)
5928 		per_cpu_ptr(buf->data, cpu)->entries = val;
5929 }
5930 
update_buffer_entries(struct array_buffer * buf,int cpu)5931 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5932 {
5933 	if (cpu == RING_BUFFER_ALL_CPUS) {
5934 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5935 	} else {
5936 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5937 	}
5938 }
5939 
5940 #ifdef CONFIG_TRACER_MAX_TRACE
5941 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5942 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5943 					struct array_buffer *size_buf, int cpu_id)
5944 {
5945 	int cpu, ret = 0;
5946 
5947 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5948 		for_each_tracing_cpu(cpu) {
5949 			ret = ring_buffer_resize(trace_buf->buffer,
5950 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5951 			if (ret < 0)
5952 				break;
5953 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5954 				per_cpu_ptr(size_buf->data, cpu)->entries;
5955 		}
5956 	} else {
5957 		ret = ring_buffer_resize(trace_buf->buffer,
5958 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5959 		if (ret == 0)
5960 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5961 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5962 	}
5963 
5964 	return ret;
5965 }
5966 #endif /* CONFIG_TRACER_MAX_TRACE */
5967 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5968 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5969 					unsigned long size, int cpu)
5970 {
5971 	int ret;
5972 
5973 	/*
5974 	 * If kernel or user changes the size of the ring buffer
5975 	 * we use the size that was given, and we can forget about
5976 	 * expanding it later.
5977 	 */
5978 	trace_set_ring_buffer_expanded(tr);
5979 
5980 	/* May be called before buffers are initialized */
5981 	if (!tr->array_buffer.buffer)
5982 		return 0;
5983 
5984 	/* Do not allow tracing while resizing ring buffer */
5985 	tracing_stop_tr(tr);
5986 
5987 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5988 	if (ret < 0)
5989 		goto out_start;
5990 
5991 #ifdef CONFIG_TRACER_MAX_TRACE
5992 	if (!tr->allocated_snapshot)
5993 		goto out;
5994 
5995 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5996 	if (ret < 0) {
5997 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5998 						     &tr->array_buffer, cpu);
5999 		if (r < 0) {
6000 			/*
6001 			 * AARGH! We are left with different
6002 			 * size max buffer!!!!
6003 			 * The max buffer is our "snapshot" buffer.
6004 			 * When a tracer needs a snapshot (one of the
6005 			 * latency tracers), it swaps the max buffer
6006 			 * with the saved snap shot. We succeeded to
6007 			 * update the size of the main buffer, but failed to
6008 			 * update the size of the max buffer. But when we tried
6009 			 * to reset the main buffer to the original size, we
6010 			 * failed there too. This is very unlikely to
6011 			 * happen, but if it does, warn and kill all
6012 			 * tracing.
6013 			 */
6014 			WARN_ON(1);
6015 			tracing_disabled = 1;
6016 		}
6017 		goto out_start;
6018 	}
6019 
6020 	update_buffer_entries(&tr->max_buffer, cpu);
6021 
6022  out:
6023 #endif /* CONFIG_TRACER_MAX_TRACE */
6024 
6025 	update_buffer_entries(&tr->array_buffer, cpu);
6026  out_start:
6027 	tracing_start_tr(tr);
6028 	return ret;
6029 }
6030 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6031 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6032 				  unsigned long size, int cpu_id)
6033 {
6034 	guard(mutex)(&trace_types_lock);
6035 
6036 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6037 		/* make sure, this cpu is enabled in the mask */
6038 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6039 			return -EINVAL;
6040 	}
6041 
6042 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6043 }
6044 
6045 struct trace_mod_entry {
6046 	unsigned long	mod_addr;
6047 	char		mod_name[MODULE_NAME_LEN];
6048 };
6049 
6050 struct trace_scratch {
6051 	unsigned int		clock_id;
6052 	unsigned long		text_addr;
6053 	unsigned long		nr_entries;
6054 	struct trace_mod_entry	entries[];
6055 };
6056 
6057 static DEFINE_MUTEX(scratch_mutex);
6058 
cmp_mod_entry(const void * key,const void * pivot)6059 static int cmp_mod_entry(const void *key, const void *pivot)
6060 {
6061 	unsigned long addr = (unsigned long)key;
6062 	const struct trace_mod_entry *ent = pivot;
6063 
6064 	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6065 		return 0;
6066 	else
6067 		return addr - ent->mod_addr;
6068 }
6069 
6070 /**
6071  * trace_adjust_address() - Adjust prev boot address to current address.
6072  * @tr: Persistent ring buffer's trace_array.
6073  * @addr: Address in @tr which is adjusted.
6074  */
trace_adjust_address(struct trace_array * tr,unsigned long addr)6075 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6076 {
6077 	struct trace_module_delta *module_delta;
6078 	struct trace_scratch *tscratch;
6079 	struct trace_mod_entry *entry;
6080 	unsigned long raddr;
6081 	int idx = 0, nr_entries;
6082 
6083 	/* If we don't have last boot delta, return the address */
6084 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6085 		return addr;
6086 
6087 	/* tr->module_delta must be protected by rcu. */
6088 	guard(rcu)();
6089 	tscratch = tr->scratch;
6090 	/* if there is no tscrach, module_delta must be NULL. */
6091 	module_delta = READ_ONCE(tr->module_delta);
6092 	if (!module_delta || !tscratch->nr_entries ||
6093 	    tscratch->entries[0].mod_addr > addr) {
6094 		raddr = addr + tr->text_delta;
6095 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6096 			is_kernel_rodata(raddr) ? raddr : addr;
6097 	}
6098 
6099 	/* Note that entries must be sorted. */
6100 	nr_entries = tscratch->nr_entries;
6101 	if (nr_entries == 1 ||
6102 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6103 		idx = nr_entries - 1;
6104 	else {
6105 		entry = __inline_bsearch((void *)addr,
6106 				tscratch->entries,
6107 				nr_entries - 1,
6108 				sizeof(tscratch->entries[0]),
6109 				cmp_mod_entry);
6110 		if (entry)
6111 			idx = entry - tscratch->entries;
6112 	}
6113 
6114 	return addr + module_delta->delta[idx];
6115 }
6116 
6117 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)6118 static int save_mod(struct module *mod, void *data)
6119 {
6120 	struct trace_array *tr = data;
6121 	struct trace_scratch *tscratch;
6122 	struct trace_mod_entry *entry;
6123 	unsigned int size;
6124 
6125 	tscratch = tr->scratch;
6126 	if (!tscratch)
6127 		return -1;
6128 	size = tr->scratch_size;
6129 
6130 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6131 		return -1;
6132 
6133 	entry = &tscratch->entries[tscratch->nr_entries];
6134 
6135 	tscratch->nr_entries++;
6136 
6137 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6138 	strscpy(entry->mod_name, mod->name);
6139 
6140 	return 0;
6141 }
6142 #else
save_mod(struct module * mod,void * data)6143 static int save_mod(struct module *mod, void *data)
6144 {
6145 	return 0;
6146 }
6147 #endif
6148 
update_last_data(struct trace_array * tr)6149 static void update_last_data(struct trace_array *tr)
6150 {
6151 	struct trace_module_delta *module_delta;
6152 	struct trace_scratch *tscratch;
6153 
6154 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6155 		return;
6156 
6157 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6158 		return;
6159 
6160 	/* Only if the buffer has previous boot data clear and update it. */
6161 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6162 
6163 	/* Reset the module list and reload them */
6164 	if (tr->scratch) {
6165 		struct trace_scratch *tscratch = tr->scratch;
6166 
6167 		tscratch->clock_id = tr->clock_id;
6168 		memset(tscratch->entries, 0,
6169 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6170 		tscratch->nr_entries = 0;
6171 
6172 		guard(mutex)(&scratch_mutex);
6173 		module_for_each_mod(save_mod, tr);
6174 	}
6175 
6176 	/*
6177 	 * Need to clear all CPU buffers as there cannot be events
6178 	 * from the previous boot mixed with events with this boot
6179 	 * as that will cause a confusing trace. Need to clear all
6180 	 * CPU buffers, even for those that may currently be offline.
6181 	 */
6182 	tracing_reset_all_cpus(&tr->array_buffer);
6183 
6184 	/* Using current data now */
6185 	tr->text_delta = 0;
6186 
6187 	if (!tr->scratch)
6188 		return;
6189 
6190 	tscratch = tr->scratch;
6191 	module_delta = READ_ONCE(tr->module_delta);
6192 	WRITE_ONCE(tr->module_delta, NULL);
6193 	kfree_rcu(module_delta, rcu);
6194 
6195 	/* Set the persistent ring buffer meta data to this address */
6196 	tscratch->text_addr = (unsigned long)_text;
6197 }
6198 
6199 /**
6200  * tracing_update_buffers - used by tracing facility to expand ring buffers
6201  * @tr: The tracing instance
6202  *
6203  * To save on memory when the tracing is never used on a system with it
6204  * configured in. The ring buffers are set to a minimum size. But once
6205  * a user starts to use the tracing facility, then they need to grow
6206  * to their default size.
6207  *
6208  * This function is to be called when a tracer is about to be used.
6209  */
tracing_update_buffers(struct trace_array * tr)6210 int tracing_update_buffers(struct trace_array *tr)
6211 {
6212 	int ret = 0;
6213 
6214 	guard(mutex)(&trace_types_lock);
6215 
6216 	update_last_data(tr);
6217 
6218 	if (!tr->ring_buffer_expanded)
6219 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6220 						RING_BUFFER_ALL_CPUS);
6221 	return ret;
6222 }
6223 
6224 struct trace_option_dentry;
6225 
6226 static void
6227 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6228 
6229 /*
6230  * Used to clear out the tracer before deletion of an instance.
6231  * Must have trace_types_lock held.
6232  */
tracing_set_nop(struct trace_array * tr)6233 static void tracing_set_nop(struct trace_array *tr)
6234 {
6235 	if (tr->current_trace == &nop_trace)
6236 		return;
6237 
6238 	tr->current_trace->enabled--;
6239 
6240 	if (tr->current_trace->reset)
6241 		tr->current_trace->reset(tr);
6242 
6243 	tr->current_trace = &nop_trace;
6244 }
6245 
6246 static bool tracer_options_updated;
6247 
add_tracer_options(struct trace_array * tr,struct tracer * t)6248 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6249 {
6250 	/* Only enable if the directory has been created already. */
6251 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
6252 		return;
6253 
6254 	/* Only create trace option files after update_tracer_options finish */
6255 	if (!tracer_options_updated)
6256 		return;
6257 
6258 	create_trace_option_files(tr, t);
6259 }
6260 
tracing_set_tracer(struct trace_array * tr,const char * buf)6261 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6262 {
6263 	struct tracer *t;
6264 #ifdef CONFIG_TRACER_MAX_TRACE
6265 	bool had_max_tr;
6266 #endif
6267 	int ret;
6268 
6269 	guard(mutex)(&trace_types_lock);
6270 
6271 	update_last_data(tr);
6272 
6273 	if (!tr->ring_buffer_expanded) {
6274 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6275 						RING_BUFFER_ALL_CPUS);
6276 		if (ret < 0)
6277 			return ret;
6278 		ret = 0;
6279 	}
6280 
6281 	for (t = trace_types; t; t = t->next) {
6282 		if (strcmp(t->name, buf) == 0)
6283 			break;
6284 	}
6285 	if (!t)
6286 		return -EINVAL;
6287 
6288 	if (t == tr->current_trace)
6289 		return 0;
6290 
6291 #ifdef CONFIG_TRACER_SNAPSHOT
6292 	if (t->use_max_tr) {
6293 		local_irq_disable();
6294 		arch_spin_lock(&tr->max_lock);
6295 		ret = tr->cond_snapshot ? -EBUSY : 0;
6296 		arch_spin_unlock(&tr->max_lock);
6297 		local_irq_enable();
6298 		if (ret)
6299 			return ret;
6300 	}
6301 #endif
6302 	/* Some tracers won't work on kernel command line */
6303 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6304 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6305 			t->name);
6306 		return -EINVAL;
6307 	}
6308 
6309 	/* Some tracers are only allowed for the top level buffer */
6310 	if (!trace_ok_for_array(t, tr))
6311 		return -EINVAL;
6312 
6313 	/* If trace pipe files are being read, we can't change the tracer */
6314 	if (tr->trace_ref)
6315 		return -EBUSY;
6316 
6317 	trace_branch_disable();
6318 
6319 	tr->current_trace->enabled--;
6320 
6321 	if (tr->current_trace->reset)
6322 		tr->current_trace->reset(tr);
6323 
6324 #ifdef CONFIG_TRACER_MAX_TRACE
6325 	had_max_tr = tr->current_trace->use_max_tr;
6326 
6327 	/* Current trace needs to be nop_trace before synchronize_rcu */
6328 	tr->current_trace = &nop_trace;
6329 
6330 	if (had_max_tr && !t->use_max_tr) {
6331 		/*
6332 		 * We need to make sure that the update_max_tr sees that
6333 		 * current_trace changed to nop_trace to keep it from
6334 		 * swapping the buffers after we resize it.
6335 		 * The update_max_tr is called from interrupts disabled
6336 		 * so a synchronized_sched() is sufficient.
6337 		 */
6338 		synchronize_rcu();
6339 		free_snapshot(tr);
6340 		tracing_disarm_snapshot(tr);
6341 	}
6342 
6343 	if (!had_max_tr && t->use_max_tr) {
6344 		ret = tracing_arm_snapshot_locked(tr);
6345 		if (ret)
6346 			return ret;
6347 	}
6348 #else
6349 	tr->current_trace = &nop_trace;
6350 #endif
6351 
6352 	if (t->init) {
6353 		ret = tracer_init(t, tr);
6354 		if (ret) {
6355 #ifdef CONFIG_TRACER_MAX_TRACE
6356 			if (t->use_max_tr)
6357 				tracing_disarm_snapshot(tr);
6358 #endif
6359 			return ret;
6360 		}
6361 	}
6362 
6363 	tr->current_trace = t;
6364 	tr->current_trace->enabled++;
6365 	trace_branch_enable(tr);
6366 
6367 	return 0;
6368 }
6369 
6370 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6371 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6372 			size_t cnt, loff_t *ppos)
6373 {
6374 	struct trace_array *tr = filp->private_data;
6375 	char buf[MAX_TRACER_SIZE+1];
6376 	char *name;
6377 	size_t ret;
6378 	int err;
6379 
6380 	ret = cnt;
6381 
6382 	if (cnt > MAX_TRACER_SIZE)
6383 		cnt = MAX_TRACER_SIZE;
6384 
6385 	if (copy_from_user(buf, ubuf, cnt))
6386 		return -EFAULT;
6387 
6388 	buf[cnt] = 0;
6389 
6390 	name = strim(buf);
6391 
6392 	err = tracing_set_tracer(tr, name);
6393 	if (err)
6394 		return err;
6395 
6396 	*ppos += ret;
6397 
6398 	return ret;
6399 }
6400 
6401 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6402 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6403 		   size_t cnt, loff_t *ppos)
6404 {
6405 	char buf[64];
6406 	int r;
6407 
6408 	r = snprintf(buf, sizeof(buf), "%ld\n",
6409 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6410 	if (r > sizeof(buf))
6411 		r = sizeof(buf);
6412 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6413 }
6414 
6415 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6416 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6417 		    size_t cnt, loff_t *ppos)
6418 {
6419 	unsigned long val;
6420 	int ret;
6421 
6422 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6423 	if (ret)
6424 		return ret;
6425 
6426 	*ptr = val * 1000;
6427 
6428 	return cnt;
6429 }
6430 
6431 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6432 tracing_thresh_read(struct file *filp, char __user *ubuf,
6433 		    size_t cnt, loff_t *ppos)
6434 {
6435 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6436 }
6437 
6438 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6439 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6440 		     size_t cnt, loff_t *ppos)
6441 {
6442 	struct trace_array *tr = filp->private_data;
6443 	int ret;
6444 
6445 	guard(mutex)(&trace_types_lock);
6446 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6447 	if (ret < 0)
6448 		return ret;
6449 
6450 	if (tr->current_trace->update_thresh) {
6451 		ret = tr->current_trace->update_thresh(tr);
6452 		if (ret < 0)
6453 			return ret;
6454 	}
6455 
6456 	return cnt;
6457 }
6458 
6459 #ifdef CONFIG_TRACER_MAX_TRACE
6460 
6461 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6462 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6463 		     size_t cnt, loff_t *ppos)
6464 {
6465 	struct trace_array *tr = filp->private_data;
6466 
6467 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6468 }
6469 
6470 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6471 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6472 		      size_t cnt, loff_t *ppos)
6473 {
6474 	struct trace_array *tr = filp->private_data;
6475 
6476 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6477 }
6478 
6479 #endif
6480 
open_pipe_on_cpu(struct trace_array * tr,int cpu)6481 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6482 {
6483 	if (cpu == RING_BUFFER_ALL_CPUS) {
6484 		if (cpumask_empty(tr->pipe_cpumask)) {
6485 			cpumask_setall(tr->pipe_cpumask);
6486 			return 0;
6487 		}
6488 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6489 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6490 		return 0;
6491 	}
6492 	return -EBUSY;
6493 }
6494 
close_pipe_on_cpu(struct trace_array * tr,int cpu)6495 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6496 {
6497 	if (cpu == RING_BUFFER_ALL_CPUS) {
6498 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6499 		cpumask_clear(tr->pipe_cpumask);
6500 	} else {
6501 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6502 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6503 	}
6504 }
6505 
tracing_open_pipe(struct inode * inode,struct file * filp)6506 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6507 {
6508 	struct trace_array *tr = inode->i_private;
6509 	struct trace_iterator *iter;
6510 	int cpu;
6511 	int ret;
6512 
6513 	ret = tracing_check_open_get_tr(tr);
6514 	if (ret)
6515 		return ret;
6516 
6517 	guard(mutex)(&trace_types_lock);
6518 	cpu = tracing_get_cpu(inode);
6519 	ret = open_pipe_on_cpu(tr, cpu);
6520 	if (ret)
6521 		goto fail_pipe_on_cpu;
6522 
6523 	/* create a buffer to store the information to pass to userspace */
6524 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6525 	if (!iter) {
6526 		ret = -ENOMEM;
6527 		goto fail_alloc_iter;
6528 	}
6529 
6530 	trace_seq_init(&iter->seq);
6531 	iter->trace = tr->current_trace;
6532 
6533 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6534 		ret = -ENOMEM;
6535 		goto fail;
6536 	}
6537 
6538 	/* trace pipe does not show start of buffer */
6539 	cpumask_setall(iter->started);
6540 
6541 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6542 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6543 
6544 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6545 	if (trace_clocks[tr->clock_id].in_ns)
6546 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6547 
6548 	iter->tr = tr;
6549 	iter->array_buffer = &tr->array_buffer;
6550 	iter->cpu_file = cpu;
6551 	mutex_init(&iter->mutex);
6552 	filp->private_data = iter;
6553 
6554 	if (iter->trace->pipe_open)
6555 		iter->trace->pipe_open(iter);
6556 
6557 	nonseekable_open(inode, filp);
6558 
6559 	tr->trace_ref++;
6560 
6561 	return ret;
6562 
6563 fail:
6564 	kfree(iter);
6565 fail_alloc_iter:
6566 	close_pipe_on_cpu(tr, cpu);
6567 fail_pipe_on_cpu:
6568 	__trace_array_put(tr);
6569 	return ret;
6570 }
6571 
tracing_release_pipe(struct inode * inode,struct file * file)6572 static int tracing_release_pipe(struct inode *inode, struct file *file)
6573 {
6574 	struct trace_iterator *iter = file->private_data;
6575 	struct trace_array *tr = inode->i_private;
6576 
6577 	scoped_guard(mutex, &trace_types_lock) {
6578 		tr->trace_ref--;
6579 
6580 		if (iter->trace->pipe_close)
6581 			iter->trace->pipe_close(iter);
6582 		close_pipe_on_cpu(tr, iter->cpu_file);
6583 	}
6584 
6585 	free_trace_iter_content(iter);
6586 	kfree(iter);
6587 
6588 	trace_array_put(tr);
6589 
6590 	return 0;
6591 }
6592 
6593 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6594 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6595 {
6596 	struct trace_array *tr = iter->tr;
6597 
6598 	/* Iterators are static, they should be filled or empty */
6599 	if (trace_buffer_iter(iter, iter->cpu_file))
6600 		return EPOLLIN | EPOLLRDNORM;
6601 
6602 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6603 		/*
6604 		 * Always select as readable when in blocking mode
6605 		 */
6606 		return EPOLLIN | EPOLLRDNORM;
6607 	else
6608 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6609 					     filp, poll_table, iter->tr->buffer_percent);
6610 }
6611 
6612 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6613 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6614 {
6615 	struct trace_iterator *iter = filp->private_data;
6616 
6617 	return trace_poll(iter, filp, poll_table);
6618 }
6619 
6620 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6621 static int tracing_wait_pipe(struct file *filp)
6622 {
6623 	struct trace_iterator *iter = filp->private_data;
6624 	int ret;
6625 
6626 	while (trace_empty(iter)) {
6627 
6628 		if ((filp->f_flags & O_NONBLOCK)) {
6629 			return -EAGAIN;
6630 		}
6631 
6632 		/*
6633 		 * We block until we read something and tracing is disabled.
6634 		 * We still block if tracing is disabled, but we have never
6635 		 * read anything. This allows a user to cat this file, and
6636 		 * then enable tracing. But after we have read something,
6637 		 * we give an EOF when tracing is again disabled.
6638 		 *
6639 		 * iter->pos will be 0 if we haven't read anything.
6640 		 */
6641 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6642 			break;
6643 
6644 		mutex_unlock(&iter->mutex);
6645 
6646 		ret = wait_on_pipe(iter, 0);
6647 
6648 		mutex_lock(&iter->mutex);
6649 
6650 		if (ret)
6651 			return ret;
6652 	}
6653 
6654 	return 1;
6655 }
6656 
update_last_data_if_empty(struct trace_array * tr)6657 static bool update_last_data_if_empty(struct trace_array *tr)
6658 {
6659 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6660 		return false;
6661 
6662 	if (!ring_buffer_empty(tr->array_buffer.buffer))
6663 		return false;
6664 
6665 	/*
6666 	 * If the buffer contains the last boot data and all per-cpu
6667 	 * buffers are empty, reset it from the kernel side.
6668 	 */
6669 	update_last_data(tr);
6670 	return true;
6671 }
6672 
6673 /*
6674  * Consumer reader.
6675  */
6676 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6677 tracing_read_pipe(struct file *filp, char __user *ubuf,
6678 		  size_t cnt, loff_t *ppos)
6679 {
6680 	struct trace_iterator *iter = filp->private_data;
6681 	ssize_t sret;
6682 
6683 	/*
6684 	 * Avoid more than one consumer on a single file descriptor
6685 	 * This is just a matter of traces coherency, the ring buffer itself
6686 	 * is protected.
6687 	 */
6688 	guard(mutex)(&iter->mutex);
6689 
6690 	/* return any leftover data */
6691 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6692 	if (sret != -EBUSY)
6693 		return sret;
6694 
6695 	trace_seq_init(&iter->seq);
6696 
6697 	if (iter->trace->read) {
6698 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6699 		if (sret)
6700 			return sret;
6701 	}
6702 
6703 waitagain:
6704 	if (update_last_data_if_empty(iter->tr))
6705 		return 0;
6706 
6707 	sret = tracing_wait_pipe(filp);
6708 	if (sret <= 0)
6709 		return sret;
6710 
6711 	/* stop when tracing is finished */
6712 	if (trace_empty(iter))
6713 		return 0;
6714 
6715 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6716 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6717 
6718 	/* reset all but tr, trace, and overruns */
6719 	trace_iterator_reset(iter);
6720 	cpumask_clear(iter->started);
6721 	trace_seq_init(&iter->seq);
6722 
6723 	trace_event_read_lock();
6724 	trace_access_lock(iter->cpu_file);
6725 	while (trace_find_next_entry_inc(iter) != NULL) {
6726 		enum print_line_t ret;
6727 		int save_len = iter->seq.seq.len;
6728 
6729 		ret = print_trace_line(iter);
6730 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6731 			/*
6732 			 * If one print_trace_line() fills entire trace_seq in one shot,
6733 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6734 			 * In this case, we need to consume it, otherwise, loop will peek
6735 			 * this event next time, resulting in an infinite loop.
6736 			 */
6737 			if (save_len == 0) {
6738 				iter->seq.full = 0;
6739 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6740 				trace_consume(iter);
6741 				break;
6742 			}
6743 
6744 			/* In other cases, don't print partial lines */
6745 			iter->seq.seq.len = save_len;
6746 			break;
6747 		}
6748 		if (ret != TRACE_TYPE_NO_CONSUME)
6749 			trace_consume(iter);
6750 
6751 		if (trace_seq_used(&iter->seq) >= cnt)
6752 			break;
6753 
6754 		/*
6755 		 * Setting the full flag means we reached the trace_seq buffer
6756 		 * size and we should leave by partial output condition above.
6757 		 * One of the trace_seq_* functions is not used properly.
6758 		 */
6759 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6760 			  iter->ent->type);
6761 	}
6762 	trace_access_unlock(iter->cpu_file);
6763 	trace_event_read_unlock();
6764 
6765 	/* Now copy what we have to the user */
6766 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6767 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6768 		trace_seq_init(&iter->seq);
6769 
6770 	/*
6771 	 * If there was nothing to send to user, in spite of consuming trace
6772 	 * entries, go back to wait for more entries.
6773 	 */
6774 	if (sret == -EBUSY)
6775 		goto waitagain;
6776 
6777 	return sret;
6778 }
6779 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6780 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6781 				     unsigned int idx)
6782 {
6783 	__free_page(spd->pages[idx]);
6784 }
6785 
6786 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6787 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6788 {
6789 	size_t count;
6790 	int save_len;
6791 	int ret;
6792 
6793 	/* Seq buffer is page-sized, exactly what we need. */
6794 	for (;;) {
6795 		save_len = iter->seq.seq.len;
6796 		ret = print_trace_line(iter);
6797 
6798 		if (trace_seq_has_overflowed(&iter->seq)) {
6799 			iter->seq.seq.len = save_len;
6800 			break;
6801 		}
6802 
6803 		/*
6804 		 * This should not be hit, because it should only
6805 		 * be set if the iter->seq overflowed. But check it
6806 		 * anyway to be safe.
6807 		 */
6808 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6809 			iter->seq.seq.len = save_len;
6810 			break;
6811 		}
6812 
6813 		count = trace_seq_used(&iter->seq) - save_len;
6814 		if (rem < count) {
6815 			rem = 0;
6816 			iter->seq.seq.len = save_len;
6817 			break;
6818 		}
6819 
6820 		if (ret != TRACE_TYPE_NO_CONSUME)
6821 			trace_consume(iter);
6822 		rem -= count;
6823 		if (!trace_find_next_entry_inc(iter))	{
6824 			rem = 0;
6825 			iter->ent = NULL;
6826 			break;
6827 		}
6828 	}
6829 
6830 	return rem;
6831 }
6832 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6833 static ssize_t tracing_splice_read_pipe(struct file *filp,
6834 					loff_t *ppos,
6835 					struct pipe_inode_info *pipe,
6836 					size_t len,
6837 					unsigned int flags)
6838 {
6839 	struct page *pages_def[PIPE_DEF_BUFFERS];
6840 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6841 	struct trace_iterator *iter = filp->private_data;
6842 	struct splice_pipe_desc spd = {
6843 		.pages		= pages_def,
6844 		.partial	= partial_def,
6845 		.nr_pages	= 0, /* This gets updated below. */
6846 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6847 		.ops		= &default_pipe_buf_ops,
6848 		.spd_release	= tracing_spd_release_pipe,
6849 	};
6850 	ssize_t ret;
6851 	size_t rem;
6852 	unsigned int i;
6853 
6854 	if (splice_grow_spd(pipe, &spd))
6855 		return -ENOMEM;
6856 
6857 	mutex_lock(&iter->mutex);
6858 
6859 	if (iter->trace->splice_read) {
6860 		ret = iter->trace->splice_read(iter, filp,
6861 					       ppos, pipe, len, flags);
6862 		if (ret)
6863 			goto out_err;
6864 	}
6865 
6866 	ret = tracing_wait_pipe(filp);
6867 	if (ret <= 0)
6868 		goto out_err;
6869 
6870 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6871 		ret = -EFAULT;
6872 		goto out_err;
6873 	}
6874 
6875 	trace_event_read_lock();
6876 	trace_access_lock(iter->cpu_file);
6877 
6878 	/* Fill as many pages as possible. */
6879 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6880 		spd.pages[i] = alloc_page(GFP_KERNEL);
6881 		if (!spd.pages[i])
6882 			break;
6883 
6884 		rem = tracing_fill_pipe_page(rem, iter);
6885 
6886 		/* Copy the data into the page, so we can start over. */
6887 		ret = trace_seq_to_buffer(&iter->seq,
6888 					  page_address(spd.pages[i]),
6889 					  min((size_t)trace_seq_used(&iter->seq),
6890 						  (size_t)PAGE_SIZE));
6891 		if (ret < 0) {
6892 			__free_page(spd.pages[i]);
6893 			break;
6894 		}
6895 		spd.partial[i].offset = 0;
6896 		spd.partial[i].len = ret;
6897 
6898 		trace_seq_init(&iter->seq);
6899 	}
6900 
6901 	trace_access_unlock(iter->cpu_file);
6902 	trace_event_read_unlock();
6903 	mutex_unlock(&iter->mutex);
6904 
6905 	spd.nr_pages = i;
6906 
6907 	if (i)
6908 		ret = splice_to_pipe(pipe, &spd);
6909 	else
6910 		ret = 0;
6911 out:
6912 	splice_shrink_spd(&spd);
6913 	return ret;
6914 
6915 out_err:
6916 	mutex_unlock(&iter->mutex);
6917 	goto out;
6918 }
6919 
6920 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6921 tracing_entries_read(struct file *filp, char __user *ubuf,
6922 		     size_t cnt, loff_t *ppos)
6923 {
6924 	struct inode *inode = file_inode(filp);
6925 	struct trace_array *tr = inode->i_private;
6926 	int cpu = tracing_get_cpu(inode);
6927 	char buf[64];
6928 	int r = 0;
6929 	ssize_t ret;
6930 
6931 	mutex_lock(&trace_types_lock);
6932 
6933 	if (cpu == RING_BUFFER_ALL_CPUS) {
6934 		int cpu, buf_size_same;
6935 		unsigned long size;
6936 
6937 		size = 0;
6938 		buf_size_same = 1;
6939 		/* check if all cpu sizes are same */
6940 		for_each_tracing_cpu(cpu) {
6941 			/* fill in the size from first enabled cpu */
6942 			if (size == 0)
6943 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6944 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6945 				buf_size_same = 0;
6946 				break;
6947 			}
6948 		}
6949 
6950 		if (buf_size_same) {
6951 			if (!tr->ring_buffer_expanded)
6952 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6953 					    size >> 10,
6954 					    trace_buf_size >> 10);
6955 			else
6956 				r = sprintf(buf, "%lu\n", size >> 10);
6957 		} else
6958 			r = sprintf(buf, "X\n");
6959 	} else
6960 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6961 
6962 	mutex_unlock(&trace_types_lock);
6963 
6964 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6965 	return ret;
6966 }
6967 
6968 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6969 tracing_entries_write(struct file *filp, const char __user *ubuf,
6970 		      size_t cnt, loff_t *ppos)
6971 {
6972 	struct inode *inode = file_inode(filp);
6973 	struct trace_array *tr = inode->i_private;
6974 	unsigned long val;
6975 	int ret;
6976 
6977 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6978 	if (ret)
6979 		return ret;
6980 
6981 	/* must have at least 1 entry */
6982 	if (!val)
6983 		return -EINVAL;
6984 
6985 	/* value is in KB */
6986 	val <<= 10;
6987 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6988 	if (ret < 0)
6989 		return ret;
6990 
6991 	*ppos += cnt;
6992 
6993 	return cnt;
6994 }
6995 
6996 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6997 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6998 				size_t cnt, loff_t *ppos)
6999 {
7000 	struct trace_array *tr = filp->private_data;
7001 	char buf[64];
7002 	int r, cpu;
7003 	unsigned long size = 0, expanded_size = 0;
7004 
7005 	mutex_lock(&trace_types_lock);
7006 	for_each_tracing_cpu(cpu) {
7007 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7008 		if (!tr->ring_buffer_expanded)
7009 			expanded_size += trace_buf_size >> 10;
7010 	}
7011 	if (tr->ring_buffer_expanded)
7012 		r = sprintf(buf, "%lu\n", size);
7013 	else
7014 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7015 	mutex_unlock(&trace_types_lock);
7016 
7017 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7018 }
7019 
7020 #define LAST_BOOT_HEADER ((void *)1)
7021 
l_next(struct seq_file * m,void * v,loff_t * pos)7022 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7023 {
7024 	struct trace_array *tr = m->private;
7025 	struct trace_scratch *tscratch = tr->scratch;
7026 	unsigned int index = *pos;
7027 
7028 	(*pos)++;
7029 
7030 	if (*pos == 1)
7031 		return LAST_BOOT_HEADER;
7032 
7033 	/* Only show offsets of the last boot data */
7034 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7035 		return NULL;
7036 
7037 	/* *pos 0 is for the header, 1 is for the first module */
7038 	index--;
7039 
7040 	if (index >= tscratch->nr_entries)
7041 		return NULL;
7042 
7043 	return &tscratch->entries[index];
7044 }
7045 
l_start(struct seq_file * m,loff_t * pos)7046 static void *l_start(struct seq_file *m, loff_t *pos)
7047 {
7048 	mutex_lock(&scratch_mutex);
7049 
7050 	return l_next(m, NULL, pos);
7051 }
7052 
l_stop(struct seq_file * m,void * p)7053 static void l_stop(struct seq_file *m, void *p)
7054 {
7055 	mutex_unlock(&scratch_mutex);
7056 }
7057 
show_last_boot_header(struct seq_file * m,struct trace_array * tr)7058 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7059 {
7060 	struct trace_scratch *tscratch = tr->scratch;
7061 
7062 	/*
7063 	 * Do not leak KASLR address. This only shows the KASLR address of
7064 	 * the last boot. When the ring buffer is started, the LAST_BOOT
7065 	 * flag gets cleared, and this should only report "current".
7066 	 * Otherwise it shows the KASLR address from the previous boot which
7067 	 * should not be the same as the current boot.
7068 	 */
7069 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7070 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7071 	else
7072 		seq_puts(m, "# Current\n");
7073 }
7074 
l_show(struct seq_file * m,void * v)7075 static int l_show(struct seq_file *m, void *v)
7076 {
7077 	struct trace_array *tr = m->private;
7078 	struct trace_mod_entry *entry = v;
7079 
7080 	if (v == LAST_BOOT_HEADER) {
7081 		show_last_boot_header(m, tr);
7082 		return 0;
7083 	}
7084 
7085 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7086 	return 0;
7087 }
7088 
7089 static const struct seq_operations last_boot_seq_ops = {
7090 	.start		= l_start,
7091 	.next		= l_next,
7092 	.stop		= l_stop,
7093 	.show		= l_show,
7094 };
7095 
tracing_last_boot_open(struct inode * inode,struct file * file)7096 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7097 {
7098 	struct trace_array *tr = inode->i_private;
7099 	struct seq_file *m;
7100 	int ret;
7101 
7102 	ret = tracing_check_open_get_tr(tr);
7103 	if (ret)
7104 		return ret;
7105 
7106 	ret = seq_open(file, &last_boot_seq_ops);
7107 	if (ret) {
7108 		trace_array_put(tr);
7109 		return ret;
7110 	}
7111 
7112 	m = file->private_data;
7113 	m->private = tr;
7114 
7115 	return 0;
7116 }
7117 
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7118 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7119 {
7120 	struct trace_array *tr = inode->i_private;
7121 	int cpu = tracing_get_cpu(inode);
7122 	int ret;
7123 
7124 	ret = tracing_check_open_get_tr(tr);
7125 	if (ret)
7126 		return ret;
7127 
7128 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7129 	if (ret < 0)
7130 		__trace_array_put(tr);
7131 	return ret;
7132 }
7133 
7134 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7135 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7136 			  size_t cnt, loff_t *ppos)
7137 {
7138 	/*
7139 	 * There is no need to read what the user has written, this function
7140 	 * is just to make sure that there is no error when "echo" is used
7141 	 */
7142 
7143 	*ppos += cnt;
7144 
7145 	return cnt;
7146 }
7147 
7148 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7149 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7150 {
7151 	struct trace_array *tr = inode->i_private;
7152 
7153 	/* disable tracing ? */
7154 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7155 		tracer_tracing_off(tr);
7156 	/* resize the ring buffer to 0 */
7157 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7158 
7159 	trace_array_put(tr);
7160 
7161 	return 0;
7162 }
7163 
7164 #define TRACE_MARKER_MAX_SIZE		4096
7165 
write_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt,unsigned long ip)7166 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
7167 				      size_t cnt, unsigned long ip)
7168 {
7169 	struct ring_buffer_event *event;
7170 	enum event_trigger_type tt = ETT_NONE;
7171 	struct trace_buffer *buffer;
7172 	struct print_entry *entry;
7173 	int meta_size;
7174 	ssize_t written;
7175 	size_t size;
7176 	int len;
7177 
7178 /* Used in tracing_mark_raw_write() as well */
7179 #define FAULTED_STR "<faulted>"
7180 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7181 
7182 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7183  again:
7184 	size = cnt + meta_size;
7185 
7186 	/* If less than "<faulted>", then make sure we can still add that */
7187 	if (cnt < FAULTED_SIZE)
7188 		size += FAULTED_SIZE - cnt;
7189 
7190 	buffer = tr->array_buffer.buffer;
7191 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7192 					    tracing_gen_ctx());
7193 	if (unlikely(!event)) {
7194 		/*
7195 		 * If the size was greater than what was allowed, then
7196 		 * make it smaller and try again.
7197 		 */
7198 		if (size > ring_buffer_max_event_size(buffer)) {
7199 			/* cnt < FAULTED size should never be bigger than max */
7200 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7201 				return -EBADF;
7202 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7203 			/* The above should only happen once */
7204 			if (WARN_ON_ONCE(cnt + meta_size == size))
7205 				return -EBADF;
7206 			goto again;
7207 		}
7208 
7209 		/* Ring buffer disabled, return as if not open for write */
7210 		return -EBADF;
7211 	}
7212 
7213 	entry = ring_buffer_event_data(event);
7214 	entry->ip = ip;
7215 
7216 	len = copy_from_user_nofault(&entry->buf, ubuf, cnt);
7217 	if (len) {
7218 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7219 		cnt = FAULTED_SIZE;
7220 		written = -EFAULT;
7221 	} else
7222 		written = cnt;
7223 
7224 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7225 		/* do not add \n before testing triggers, but add \0 */
7226 		entry->buf[cnt] = '\0';
7227 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7228 	}
7229 
7230 	if (entry->buf[cnt - 1] != '\n') {
7231 		entry->buf[cnt] = '\n';
7232 		entry->buf[cnt + 1] = '\0';
7233 	} else
7234 		entry->buf[cnt] = '\0';
7235 
7236 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7237 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7238 	__buffer_unlock_commit(buffer, event);
7239 
7240 	if (tt)
7241 		event_triggers_post_call(tr->trace_marker_file, tt);
7242 
7243 	return written;
7244 }
7245 
7246 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7247 tracing_mark_write(struct file *filp, const char __user *ubuf,
7248 					size_t cnt, loff_t *fpos)
7249 {
7250 	struct trace_array *tr = filp->private_data;
7251 	ssize_t written = -ENODEV;
7252 	unsigned long ip;
7253 
7254 	if (tracing_disabled)
7255 		return -EINVAL;
7256 
7257 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7258 		return -EINVAL;
7259 
7260 	if ((ssize_t)cnt < 0)
7261 		return -EINVAL;
7262 
7263 	if (cnt > TRACE_MARKER_MAX_SIZE)
7264 		cnt = TRACE_MARKER_MAX_SIZE;
7265 
7266 	/* The selftests expect this function to be the IP address */
7267 	ip = _THIS_IP_;
7268 
7269 	/* The global trace_marker can go to multiple instances */
7270 	if (tr == &global_trace) {
7271 		guard(rcu)();
7272 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7273 			written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7274 			if (written < 0)
7275 				break;
7276 		}
7277 	} else {
7278 		written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7279 	}
7280 
7281 	return written;
7282 }
7283 
write_raw_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt)7284 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7285 					  const char __user *ubuf, size_t cnt)
7286 {
7287 	struct ring_buffer_event *event;
7288 	struct trace_buffer *buffer;
7289 	struct raw_data_entry *entry;
7290 	ssize_t written;
7291 	int size;
7292 	int len;
7293 
7294 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7295 
7296 	size = sizeof(*entry) + cnt;
7297 	if (cnt < FAULT_SIZE_ID)
7298 		size += FAULT_SIZE_ID - cnt;
7299 
7300 	buffer = tr->array_buffer.buffer;
7301 
7302 	if (size > ring_buffer_max_event_size(buffer))
7303 		return -EINVAL;
7304 
7305 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7306 					    tracing_gen_ctx());
7307 	if (!event)
7308 		/* Ring buffer disabled, return as if not open for write */
7309 		return -EBADF;
7310 
7311 	entry = ring_buffer_event_data(event);
7312 
7313 	len = copy_from_user_nofault(&entry->id, ubuf, cnt);
7314 	if (len) {
7315 		entry->id = -1;
7316 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7317 		written = -EFAULT;
7318 	} else
7319 		written = cnt;
7320 
7321 	__buffer_unlock_commit(buffer, event);
7322 
7323 	return written;
7324 }
7325 
7326 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7327 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7328 					size_t cnt, loff_t *fpos)
7329 {
7330 	struct trace_array *tr = filp->private_data;
7331 	ssize_t written = -ENODEV;
7332 
7333 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7334 
7335 	if (tracing_disabled)
7336 		return -EINVAL;
7337 
7338 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7339 		return -EINVAL;
7340 
7341 	/* The marker must at least have a tag id */
7342 	if (cnt < sizeof(unsigned int))
7343 		return -EINVAL;
7344 
7345 	/* The global trace_marker_raw can go to multiple instances */
7346 	if (tr == &global_trace) {
7347 		guard(rcu)();
7348 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7349 			written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7350 			if (written < 0)
7351 				break;
7352 		}
7353 	} else {
7354 		written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7355 	}
7356 
7357 	return written;
7358 }
7359 
tracing_clock_show(struct seq_file * m,void * v)7360 static int tracing_clock_show(struct seq_file *m, void *v)
7361 {
7362 	struct trace_array *tr = m->private;
7363 	int i;
7364 
7365 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7366 		seq_printf(m,
7367 			"%s%s%s%s", i ? " " : "",
7368 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7369 			i == tr->clock_id ? "]" : "");
7370 	seq_putc(m, '\n');
7371 
7372 	return 0;
7373 }
7374 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7375 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7376 {
7377 	int i;
7378 
7379 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7380 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7381 			break;
7382 	}
7383 	if (i == ARRAY_SIZE(trace_clocks))
7384 		return -EINVAL;
7385 
7386 	guard(mutex)(&trace_types_lock);
7387 
7388 	tr->clock_id = i;
7389 
7390 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7391 
7392 	/*
7393 	 * New clock may not be consistent with the previous clock.
7394 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7395 	 */
7396 	tracing_reset_online_cpus(&tr->array_buffer);
7397 
7398 #ifdef CONFIG_TRACER_MAX_TRACE
7399 	if (tr->max_buffer.buffer)
7400 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7401 	tracing_reset_online_cpus(&tr->max_buffer);
7402 #endif
7403 
7404 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7405 		struct trace_scratch *tscratch = tr->scratch;
7406 
7407 		tscratch->clock_id = i;
7408 	}
7409 
7410 	return 0;
7411 }
7412 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7413 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7414 				   size_t cnt, loff_t *fpos)
7415 {
7416 	struct seq_file *m = filp->private_data;
7417 	struct trace_array *tr = m->private;
7418 	char buf[64];
7419 	const char *clockstr;
7420 	int ret;
7421 
7422 	if (cnt >= sizeof(buf))
7423 		return -EINVAL;
7424 
7425 	if (copy_from_user(buf, ubuf, cnt))
7426 		return -EFAULT;
7427 
7428 	buf[cnt] = 0;
7429 
7430 	clockstr = strstrip(buf);
7431 
7432 	ret = tracing_set_clock(tr, clockstr);
7433 	if (ret)
7434 		return ret;
7435 
7436 	*fpos += cnt;
7437 
7438 	return cnt;
7439 }
7440 
tracing_clock_open(struct inode * inode,struct file * file)7441 static int tracing_clock_open(struct inode *inode, struct file *file)
7442 {
7443 	struct trace_array *tr = inode->i_private;
7444 	int ret;
7445 
7446 	ret = tracing_check_open_get_tr(tr);
7447 	if (ret)
7448 		return ret;
7449 
7450 	ret = single_open(file, tracing_clock_show, inode->i_private);
7451 	if (ret < 0)
7452 		trace_array_put(tr);
7453 
7454 	return ret;
7455 }
7456 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7457 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7458 {
7459 	struct trace_array *tr = m->private;
7460 
7461 	guard(mutex)(&trace_types_lock);
7462 
7463 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7464 		seq_puts(m, "delta [absolute]\n");
7465 	else
7466 		seq_puts(m, "[delta] absolute\n");
7467 
7468 	return 0;
7469 }
7470 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7471 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7472 {
7473 	struct trace_array *tr = inode->i_private;
7474 	int ret;
7475 
7476 	ret = tracing_check_open_get_tr(tr);
7477 	if (ret)
7478 		return ret;
7479 
7480 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7481 	if (ret < 0)
7482 		trace_array_put(tr);
7483 
7484 	return ret;
7485 }
7486 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7487 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7488 {
7489 	if (rbe == this_cpu_read(trace_buffered_event))
7490 		return ring_buffer_time_stamp(buffer);
7491 
7492 	return ring_buffer_event_time_stamp(buffer, rbe);
7493 }
7494 
7495 /*
7496  * Set or disable using the per CPU trace_buffer_event when possible.
7497  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7498 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7499 {
7500 	guard(mutex)(&trace_types_lock);
7501 
7502 	if (set && tr->no_filter_buffering_ref++)
7503 		return 0;
7504 
7505 	if (!set) {
7506 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7507 			return -EINVAL;
7508 
7509 		--tr->no_filter_buffering_ref;
7510 	}
7511 
7512 	return 0;
7513 }
7514 
7515 struct ftrace_buffer_info {
7516 	struct trace_iterator	iter;
7517 	void			*spare;
7518 	unsigned int		spare_cpu;
7519 	unsigned int		spare_size;
7520 	unsigned int		read;
7521 };
7522 
7523 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7524 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7525 {
7526 	struct trace_array *tr = inode->i_private;
7527 	struct trace_iterator *iter;
7528 	struct seq_file *m;
7529 	int ret;
7530 
7531 	ret = tracing_check_open_get_tr(tr);
7532 	if (ret)
7533 		return ret;
7534 
7535 	if (file->f_mode & FMODE_READ) {
7536 		iter = __tracing_open(inode, file, true);
7537 		if (IS_ERR(iter))
7538 			ret = PTR_ERR(iter);
7539 	} else {
7540 		/* Writes still need the seq_file to hold the private data */
7541 		ret = -ENOMEM;
7542 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7543 		if (!m)
7544 			goto out;
7545 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7546 		if (!iter) {
7547 			kfree(m);
7548 			goto out;
7549 		}
7550 		ret = 0;
7551 
7552 		iter->tr = tr;
7553 		iter->array_buffer = &tr->max_buffer;
7554 		iter->cpu_file = tracing_get_cpu(inode);
7555 		m->private = iter;
7556 		file->private_data = m;
7557 	}
7558 out:
7559 	if (ret < 0)
7560 		trace_array_put(tr);
7561 
7562 	return ret;
7563 }
7564 
tracing_swap_cpu_buffer(void * tr)7565 static void tracing_swap_cpu_buffer(void *tr)
7566 {
7567 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7568 }
7569 
7570 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7571 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7572 		       loff_t *ppos)
7573 {
7574 	struct seq_file *m = filp->private_data;
7575 	struct trace_iterator *iter = m->private;
7576 	struct trace_array *tr = iter->tr;
7577 	unsigned long val;
7578 	int ret;
7579 
7580 	ret = tracing_update_buffers(tr);
7581 	if (ret < 0)
7582 		return ret;
7583 
7584 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7585 	if (ret)
7586 		return ret;
7587 
7588 	guard(mutex)(&trace_types_lock);
7589 
7590 	if (tr->current_trace->use_max_tr)
7591 		return -EBUSY;
7592 
7593 	local_irq_disable();
7594 	arch_spin_lock(&tr->max_lock);
7595 	if (tr->cond_snapshot)
7596 		ret = -EBUSY;
7597 	arch_spin_unlock(&tr->max_lock);
7598 	local_irq_enable();
7599 	if (ret)
7600 		return ret;
7601 
7602 	switch (val) {
7603 	case 0:
7604 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7605 			return -EINVAL;
7606 		if (tr->allocated_snapshot)
7607 			free_snapshot(tr);
7608 		break;
7609 	case 1:
7610 /* Only allow per-cpu swap if the ring buffer supports it */
7611 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7612 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7613 			return -EINVAL;
7614 #endif
7615 		if (tr->allocated_snapshot)
7616 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7617 					&tr->array_buffer, iter->cpu_file);
7618 
7619 		ret = tracing_arm_snapshot_locked(tr);
7620 		if (ret)
7621 			return ret;
7622 
7623 		/* Now, we're going to swap */
7624 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7625 			local_irq_disable();
7626 			update_max_tr(tr, current, smp_processor_id(), NULL);
7627 			local_irq_enable();
7628 		} else {
7629 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7630 						 (void *)tr, 1);
7631 		}
7632 		tracing_disarm_snapshot(tr);
7633 		break;
7634 	default:
7635 		if (tr->allocated_snapshot) {
7636 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7637 				tracing_reset_online_cpus(&tr->max_buffer);
7638 			else
7639 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7640 		}
7641 		break;
7642 	}
7643 
7644 	if (ret >= 0) {
7645 		*ppos += cnt;
7646 		ret = cnt;
7647 	}
7648 
7649 	return ret;
7650 }
7651 
tracing_snapshot_release(struct inode * inode,struct file * file)7652 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7653 {
7654 	struct seq_file *m = file->private_data;
7655 	int ret;
7656 
7657 	ret = tracing_release(inode, file);
7658 
7659 	if (file->f_mode & FMODE_READ)
7660 		return ret;
7661 
7662 	/* If write only, the seq_file is just a stub */
7663 	if (m)
7664 		kfree(m->private);
7665 	kfree(m);
7666 
7667 	return 0;
7668 }
7669 
7670 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7671 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7672 				    size_t count, loff_t *ppos);
7673 static int tracing_buffers_release(struct inode *inode, struct file *file);
7674 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7675 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7676 
snapshot_raw_open(struct inode * inode,struct file * filp)7677 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7678 {
7679 	struct ftrace_buffer_info *info;
7680 	int ret;
7681 
7682 	/* The following checks for tracefs lockdown */
7683 	ret = tracing_buffers_open(inode, filp);
7684 	if (ret < 0)
7685 		return ret;
7686 
7687 	info = filp->private_data;
7688 
7689 	if (info->iter.trace->use_max_tr) {
7690 		tracing_buffers_release(inode, filp);
7691 		return -EBUSY;
7692 	}
7693 
7694 	info->iter.snapshot = true;
7695 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7696 
7697 	return ret;
7698 }
7699 
7700 #endif /* CONFIG_TRACER_SNAPSHOT */
7701 
7702 
7703 static const struct file_operations tracing_thresh_fops = {
7704 	.open		= tracing_open_generic,
7705 	.read		= tracing_thresh_read,
7706 	.write		= tracing_thresh_write,
7707 	.llseek		= generic_file_llseek,
7708 };
7709 
7710 #ifdef CONFIG_TRACER_MAX_TRACE
7711 static const struct file_operations tracing_max_lat_fops = {
7712 	.open		= tracing_open_generic_tr,
7713 	.read		= tracing_max_lat_read,
7714 	.write		= tracing_max_lat_write,
7715 	.llseek		= generic_file_llseek,
7716 	.release	= tracing_release_generic_tr,
7717 };
7718 #endif
7719 
7720 static const struct file_operations set_tracer_fops = {
7721 	.open		= tracing_open_generic_tr,
7722 	.read		= tracing_set_trace_read,
7723 	.write		= tracing_set_trace_write,
7724 	.llseek		= generic_file_llseek,
7725 	.release	= tracing_release_generic_tr,
7726 };
7727 
7728 static const struct file_operations tracing_pipe_fops = {
7729 	.open		= tracing_open_pipe,
7730 	.poll		= tracing_poll_pipe,
7731 	.read		= tracing_read_pipe,
7732 	.splice_read	= tracing_splice_read_pipe,
7733 	.release	= tracing_release_pipe,
7734 };
7735 
7736 static const struct file_operations tracing_entries_fops = {
7737 	.open		= tracing_open_generic_tr,
7738 	.read		= tracing_entries_read,
7739 	.write		= tracing_entries_write,
7740 	.llseek		= generic_file_llseek,
7741 	.release	= tracing_release_generic_tr,
7742 };
7743 
7744 static const struct file_operations tracing_buffer_meta_fops = {
7745 	.open		= tracing_buffer_meta_open,
7746 	.read		= seq_read,
7747 	.llseek		= seq_lseek,
7748 	.release	= tracing_seq_release,
7749 };
7750 
7751 static const struct file_operations tracing_total_entries_fops = {
7752 	.open		= tracing_open_generic_tr,
7753 	.read		= tracing_total_entries_read,
7754 	.llseek		= generic_file_llseek,
7755 	.release	= tracing_release_generic_tr,
7756 };
7757 
7758 static const struct file_operations tracing_free_buffer_fops = {
7759 	.open		= tracing_open_generic_tr,
7760 	.write		= tracing_free_buffer_write,
7761 	.release	= tracing_free_buffer_release,
7762 };
7763 
7764 static const struct file_operations tracing_mark_fops = {
7765 	.open		= tracing_mark_open,
7766 	.write		= tracing_mark_write,
7767 	.release	= tracing_release_generic_tr,
7768 };
7769 
7770 static const struct file_operations tracing_mark_raw_fops = {
7771 	.open		= tracing_mark_open,
7772 	.write		= tracing_mark_raw_write,
7773 	.release	= tracing_release_generic_tr,
7774 };
7775 
7776 static const struct file_operations trace_clock_fops = {
7777 	.open		= tracing_clock_open,
7778 	.read		= seq_read,
7779 	.llseek		= seq_lseek,
7780 	.release	= tracing_single_release_tr,
7781 	.write		= tracing_clock_write,
7782 };
7783 
7784 static const struct file_operations trace_time_stamp_mode_fops = {
7785 	.open		= tracing_time_stamp_mode_open,
7786 	.read		= seq_read,
7787 	.llseek		= seq_lseek,
7788 	.release	= tracing_single_release_tr,
7789 };
7790 
7791 static const struct file_operations last_boot_fops = {
7792 	.open		= tracing_last_boot_open,
7793 	.read		= seq_read,
7794 	.llseek		= seq_lseek,
7795 	.release	= tracing_seq_release,
7796 };
7797 
7798 #ifdef CONFIG_TRACER_SNAPSHOT
7799 static const struct file_operations snapshot_fops = {
7800 	.open		= tracing_snapshot_open,
7801 	.read		= seq_read,
7802 	.write		= tracing_snapshot_write,
7803 	.llseek		= tracing_lseek,
7804 	.release	= tracing_snapshot_release,
7805 };
7806 
7807 static const struct file_operations snapshot_raw_fops = {
7808 	.open		= snapshot_raw_open,
7809 	.read		= tracing_buffers_read,
7810 	.release	= tracing_buffers_release,
7811 	.splice_read	= tracing_buffers_splice_read,
7812 };
7813 
7814 #endif /* CONFIG_TRACER_SNAPSHOT */
7815 
7816 /*
7817  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7818  * @filp: The active open file structure
7819  * @ubuf: The userspace provided buffer to read value into
7820  * @cnt: The maximum number of bytes to read
7821  * @ppos: The current "file" position
7822  *
7823  * This function implements the write interface for a struct trace_min_max_param.
7824  * The filp->private_data must point to a trace_min_max_param structure that
7825  * defines where to write the value, the min and the max acceptable values,
7826  * and a lock to protect the write.
7827  */
7828 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7829 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7830 {
7831 	struct trace_min_max_param *param = filp->private_data;
7832 	u64 val;
7833 	int err;
7834 
7835 	if (!param)
7836 		return -EFAULT;
7837 
7838 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7839 	if (err)
7840 		return err;
7841 
7842 	if (param->lock)
7843 		mutex_lock(param->lock);
7844 
7845 	if (param->min && val < *param->min)
7846 		err = -EINVAL;
7847 
7848 	if (param->max && val > *param->max)
7849 		err = -EINVAL;
7850 
7851 	if (!err)
7852 		*param->val = val;
7853 
7854 	if (param->lock)
7855 		mutex_unlock(param->lock);
7856 
7857 	if (err)
7858 		return err;
7859 
7860 	return cnt;
7861 }
7862 
7863 /*
7864  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7865  * @filp: The active open file structure
7866  * @ubuf: The userspace provided buffer to read value into
7867  * @cnt: The maximum number of bytes to read
7868  * @ppos: The current "file" position
7869  *
7870  * This function implements the read interface for a struct trace_min_max_param.
7871  * The filp->private_data must point to a trace_min_max_param struct with valid
7872  * data.
7873  */
7874 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7875 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7876 {
7877 	struct trace_min_max_param *param = filp->private_data;
7878 	char buf[U64_STR_SIZE];
7879 	int len;
7880 	u64 val;
7881 
7882 	if (!param)
7883 		return -EFAULT;
7884 
7885 	val = *param->val;
7886 
7887 	if (cnt > sizeof(buf))
7888 		cnt = sizeof(buf);
7889 
7890 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7891 
7892 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7893 }
7894 
7895 const struct file_operations trace_min_max_fops = {
7896 	.open		= tracing_open_generic,
7897 	.read		= trace_min_max_read,
7898 	.write		= trace_min_max_write,
7899 };
7900 
7901 #define TRACING_LOG_ERRS_MAX	8
7902 #define TRACING_LOG_LOC_MAX	128
7903 
7904 #define CMD_PREFIX "  Command: "
7905 
7906 struct err_info {
7907 	const char	**errs;	/* ptr to loc-specific array of err strings */
7908 	u8		type;	/* index into errs -> specific err string */
7909 	u16		pos;	/* caret position */
7910 	u64		ts;
7911 };
7912 
7913 struct tracing_log_err {
7914 	struct list_head	list;
7915 	struct err_info		info;
7916 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7917 	char			*cmd;                     /* what caused err */
7918 };
7919 
7920 static DEFINE_MUTEX(tracing_err_log_lock);
7921 
alloc_tracing_log_err(int len)7922 static struct tracing_log_err *alloc_tracing_log_err(int len)
7923 {
7924 	struct tracing_log_err *err;
7925 
7926 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7927 	if (!err)
7928 		return ERR_PTR(-ENOMEM);
7929 
7930 	err->cmd = kzalloc(len, GFP_KERNEL);
7931 	if (!err->cmd) {
7932 		kfree(err);
7933 		return ERR_PTR(-ENOMEM);
7934 	}
7935 
7936 	return err;
7937 }
7938 
free_tracing_log_err(struct tracing_log_err * err)7939 static void free_tracing_log_err(struct tracing_log_err *err)
7940 {
7941 	kfree(err->cmd);
7942 	kfree(err);
7943 }
7944 
get_tracing_log_err(struct trace_array * tr,int len)7945 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7946 						   int len)
7947 {
7948 	struct tracing_log_err *err;
7949 	char *cmd;
7950 
7951 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7952 		err = alloc_tracing_log_err(len);
7953 		if (PTR_ERR(err) != -ENOMEM)
7954 			tr->n_err_log_entries++;
7955 
7956 		return err;
7957 	}
7958 	cmd = kzalloc(len, GFP_KERNEL);
7959 	if (!cmd)
7960 		return ERR_PTR(-ENOMEM);
7961 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7962 	kfree(err->cmd);
7963 	err->cmd = cmd;
7964 	list_del(&err->list);
7965 
7966 	return err;
7967 }
7968 
7969 /**
7970  * err_pos - find the position of a string within a command for error careting
7971  * @cmd: The tracing command that caused the error
7972  * @str: The string to position the caret at within @cmd
7973  *
7974  * Finds the position of the first occurrence of @str within @cmd.  The
7975  * return value can be passed to tracing_log_err() for caret placement
7976  * within @cmd.
7977  *
7978  * Returns the index within @cmd of the first occurrence of @str or 0
7979  * if @str was not found.
7980  */
err_pos(char * cmd,const char * str)7981 unsigned int err_pos(char *cmd, const char *str)
7982 {
7983 	char *found;
7984 
7985 	if (WARN_ON(!strlen(cmd)))
7986 		return 0;
7987 
7988 	found = strstr(cmd, str);
7989 	if (found)
7990 		return found - cmd;
7991 
7992 	return 0;
7993 }
7994 
7995 /**
7996  * tracing_log_err - write an error to the tracing error log
7997  * @tr: The associated trace array for the error (NULL for top level array)
7998  * @loc: A string describing where the error occurred
7999  * @cmd: The tracing command that caused the error
8000  * @errs: The array of loc-specific static error strings
8001  * @type: The index into errs[], which produces the specific static err string
8002  * @pos: The position the caret should be placed in the cmd
8003  *
8004  * Writes an error into tracing/error_log of the form:
8005  *
8006  * <loc>: error: <text>
8007  *   Command: <cmd>
8008  *              ^
8009  *
8010  * tracing/error_log is a small log file containing the last
8011  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8012  * unless there has been a tracing error, and the error log can be
8013  * cleared and have its memory freed by writing the empty string in
8014  * truncation mode to it i.e. echo > tracing/error_log.
8015  *
8016  * NOTE: the @errs array along with the @type param are used to
8017  * produce a static error string - this string is not copied and saved
8018  * when the error is logged - only a pointer to it is saved.  See
8019  * existing callers for examples of how static strings are typically
8020  * defined for use with tracing_log_err().
8021  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8022 void tracing_log_err(struct trace_array *tr,
8023 		     const char *loc, const char *cmd,
8024 		     const char **errs, u8 type, u16 pos)
8025 {
8026 	struct tracing_log_err *err;
8027 	int len = 0;
8028 
8029 	if (!tr)
8030 		tr = &global_trace;
8031 
8032 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8033 
8034 	guard(mutex)(&tracing_err_log_lock);
8035 
8036 	err = get_tracing_log_err(tr, len);
8037 	if (PTR_ERR(err) == -ENOMEM)
8038 		return;
8039 
8040 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8041 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8042 
8043 	err->info.errs = errs;
8044 	err->info.type = type;
8045 	err->info.pos = pos;
8046 	err->info.ts = local_clock();
8047 
8048 	list_add_tail(&err->list, &tr->err_log);
8049 }
8050 
clear_tracing_err_log(struct trace_array * tr)8051 static void clear_tracing_err_log(struct trace_array *tr)
8052 {
8053 	struct tracing_log_err *err, *next;
8054 
8055 	guard(mutex)(&tracing_err_log_lock);
8056 
8057 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8058 		list_del(&err->list);
8059 		free_tracing_log_err(err);
8060 	}
8061 
8062 	tr->n_err_log_entries = 0;
8063 }
8064 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8065 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8066 {
8067 	struct trace_array *tr = m->private;
8068 
8069 	mutex_lock(&tracing_err_log_lock);
8070 
8071 	return seq_list_start(&tr->err_log, *pos);
8072 }
8073 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8074 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8075 {
8076 	struct trace_array *tr = m->private;
8077 
8078 	return seq_list_next(v, &tr->err_log, pos);
8079 }
8080 
tracing_err_log_seq_stop(struct seq_file * m,void * v)8081 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8082 {
8083 	mutex_unlock(&tracing_err_log_lock);
8084 }
8085 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8086 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8087 {
8088 	u16 i;
8089 
8090 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8091 		seq_putc(m, ' ');
8092 	for (i = 0; i < pos; i++)
8093 		seq_putc(m, ' ');
8094 	seq_puts(m, "^\n");
8095 }
8096 
tracing_err_log_seq_show(struct seq_file * m,void * v)8097 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8098 {
8099 	struct tracing_log_err *err = v;
8100 
8101 	if (err) {
8102 		const char *err_text = err->info.errs[err->info.type];
8103 		u64 sec = err->info.ts;
8104 		u32 nsec;
8105 
8106 		nsec = do_div(sec, NSEC_PER_SEC);
8107 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8108 			   err->loc, err_text);
8109 		seq_printf(m, "%s", err->cmd);
8110 		tracing_err_log_show_pos(m, err->info.pos);
8111 	}
8112 
8113 	return 0;
8114 }
8115 
8116 static const struct seq_operations tracing_err_log_seq_ops = {
8117 	.start  = tracing_err_log_seq_start,
8118 	.next   = tracing_err_log_seq_next,
8119 	.stop   = tracing_err_log_seq_stop,
8120 	.show   = tracing_err_log_seq_show
8121 };
8122 
tracing_err_log_open(struct inode * inode,struct file * file)8123 static int tracing_err_log_open(struct inode *inode, struct file *file)
8124 {
8125 	struct trace_array *tr = inode->i_private;
8126 	int ret = 0;
8127 
8128 	ret = tracing_check_open_get_tr(tr);
8129 	if (ret)
8130 		return ret;
8131 
8132 	/* If this file was opened for write, then erase contents */
8133 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8134 		clear_tracing_err_log(tr);
8135 
8136 	if (file->f_mode & FMODE_READ) {
8137 		ret = seq_open(file, &tracing_err_log_seq_ops);
8138 		if (!ret) {
8139 			struct seq_file *m = file->private_data;
8140 			m->private = tr;
8141 		} else {
8142 			trace_array_put(tr);
8143 		}
8144 	}
8145 	return ret;
8146 }
8147 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8148 static ssize_t tracing_err_log_write(struct file *file,
8149 				     const char __user *buffer,
8150 				     size_t count, loff_t *ppos)
8151 {
8152 	return count;
8153 }
8154 
tracing_err_log_release(struct inode * inode,struct file * file)8155 static int tracing_err_log_release(struct inode *inode, struct file *file)
8156 {
8157 	struct trace_array *tr = inode->i_private;
8158 
8159 	trace_array_put(tr);
8160 
8161 	if (file->f_mode & FMODE_READ)
8162 		seq_release(inode, file);
8163 
8164 	return 0;
8165 }
8166 
8167 static const struct file_operations tracing_err_log_fops = {
8168 	.open           = tracing_err_log_open,
8169 	.write		= tracing_err_log_write,
8170 	.read           = seq_read,
8171 	.llseek         = tracing_lseek,
8172 	.release        = tracing_err_log_release,
8173 };
8174 
tracing_buffers_open(struct inode * inode,struct file * filp)8175 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8176 {
8177 	struct trace_array *tr = inode->i_private;
8178 	struct ftrace_buffer_info *info;
8179 	int ret;
8180 
8181 	ret = tracing_check_open_get_tr(tr);
8182 	if (ret)
8183 		return ret;
8184 
8185 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8186 	if (!info) {
8187 		trace_array_put(tr);
8188 		return -ENOMEM;
8189 	}
8190 
8191 	mutex_lock(&trace_types_lock);
8192 
8193 	info->iter.tr		= tr;
8194 	info->iter.cpu_file	= tracing_get_cpu(inode);
8195 	info->iter.trace	= tr->current_trace;
8196 	info->iter.array_buffer = &tr->array_buffer;
8197 	info->spare		= NULL;
8198 	/* Force reading ring buffer for first read */
8199 	info->read		= (unsigned int)-1;
8200 
8201 	filp->private_data = info;
8202 
8203 	tr->trace_ref++;
8204 
8205 	mutex_unlock(&trace_types_lock);
8206 
8207 	ret = nonseekable_open(inode, filp);
8208 	if (ret < 0)
8209 		trace_array_put(tr);
8210 
8211 	return ret;
8212 }
8213 
8214 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8215 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8216 {
8217 	struct ftrace_buffer_info *info = filp->private_data;
8218 	struct trace_iterator *iter = &info->iter;
8219 
8220 	return trace_poll(iter, filp, poll_table);
8221 }
8222 
8223 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8224 tracing_buffers_read(struct file *filp, char __user *ubuf,
8225 		     size_t count, loff_t *ppos)
8226 {
8227 	struct ftrace_buffer_info *info = filp->private_data;
8228 	struct trace_iterator *iter = &info->iter;
8229 	void *trace_data;
8230 	int page_size;
8231 	ssize_t ret = 0;
8232 	ssize_t size;
8233 
8234 	if (!count)
8235 		return 0;
8236 
8237 #ifdef CONFIG_TRACER_MAX_TRACE
8238 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8239 		return -EBUSY;
8240 #endif
8241 
8242 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8243 
8244 	/* Make sure the spare matches the current sub buffer size */
8245 	if (info->spare) {
8246 		if (page_size != info->spare_size) {
8247 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8248 						   info->spare_cpu, info->spare);
8249 			info->spare = NULL;
8250 		}
8251 	}
8252 
8253 	if (!info->spare) {
8254 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8255 							  iter->cpu_file);
8256 		if (IS_ERR(info->spare)) {
8257 			ret = PTR_ERR(info->spare);
8258 			info->spare = NULL;
8259 		} else {
8260 			info->spare_cpu = iter->cpu_file;
8261 			info->spare_size = page_size;
8262 		}
8263 	}
8264 	if (!info->spare)
8265 		return ret;
8266 
8267 	/* Do we have previous read data to read? */
8268 	if (info->read < page_size)
8269 		goto read;
8270 
8271  again:
8272 	trace_access_lock(iter->cpu_file);
8273 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8274 				    info->spare,
8275 				    count,
8276 				    iter->cpu_file, 0);
8277 	trace_access_unlock(iter->cpu_file);
8278 
8279 	if (ret < 0) {
8280 		if (trace_empty(iter) && !iter->closed) {
8281 			if (update_last_data_if_empty(iter->tr))
8282 				return 0;
8283 
8284 			if ((filp->f_flags & O_NONBLOCK))
8285 				return -EAGAIN;
8286 
8287 			ret = wait_on_pipe(iter, 0);
8288 			if (ret)
8289 				return ret;
8290 
8291 			goto again;
8292 		}
8293 		return 0;
8294 	}
8295 
8296 	info->read = 0;
8297  read:
8298 	size = page_size - info->read;
8299 	if (size > count)
8300 		size = count;
8301 	trace_data = ring_buffer_read_page_data(info->spare);
8302 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8303 	if (ret == size)
8304 		return -EFAULT;
8305 
8306 	size -= ret;
8307 
8308 	*ppos += size;
8309 	info->read += size;
8310 
8311 	return size;
8312 }
8313 
tracing_buffers_flush(struct file * file,fl_owner_t id)8314 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8315 {
8316 	struct ftrace_buffer_info *info = file->private_data;
8317 	struct trace_iterator *iter = &info->iter;
8318 
8319 	iter->closed = true;
8320 	/* Make sure the waiters see the new wait_index */
8321 	(void)atomic_fetch_inc_release(&iter->wait_index);
8322 
8323 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8324 
8325 	return 0;
8326 }
8327 
tracing_buffers_release(struct inode * inode,struct file * file)8328 static int tracing_buffers_release(struct inode *inode, struct file *file)
8329 {
8330 	struct ftrace_buffer_info *info = file->private_data;
8331 	struct trace_iterator *iter = &info->iter;
8332 
8333 	guard(mutex)(&trace_types_lock);
8334 
8335 	iter->tr->trace_ref--;
8336 
8337 	__trace_array_put(iter->tr);
8338 
8339 	if (info->spare)
8340 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8341 					   info->spare_cpu, info->spare);
8342 	kvfree(info);
8343 
8344 	return 0;
8345 }
8346 
8347 struct buffer_ref {
8348 	struct trace_buffer	*buffer;
8349 	void			*page;
8350 	int			cpu;
8351 	refcount_t		refcount;
8352 };
8353 
buffer_ref_release(struct buffer_ref * ref)8354 static void buffer_ref_release(struct buffer_ref *ref)
8355 {
8356 	if (!refcount_dec_and_test(&ref->refcount))
8357 		return;
8358 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8359 	kfree(ref);
8360 }
8361 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8362 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8363 				    struct pipe_buffer *buf)
8364 {
8365 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8366 
8367 	buffer_ref_release(ref);
8368 	buf->private = 0;
8369 }
8370 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8371 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8372 				struct pipe_buffer *buf)
8373 {
8374 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8375 
8376 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8377 		return false;
8378 
8379 	refcount_inc(&ref->refcount);
8380 	return true;
8381 }
8382 
8383 /* Pipe buffer operations for a buffer. */
8384 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8385 	.release		= buffer_pipe_buf_release,
8386 	.get			= buffer_pipe_buf_get,
8387 };
8388 
8389 /*
8390  * Callback from splice_to_pipe(), if we need to release some pages
8391  * at the end of the spd in case we error'ed out in filling the pipe.
8392  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8393 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8394 {
8395 	struct buffer_ref *ref =
8396 		(struct buffer_ref *)spd->partial[i].private;
8397 
8398 	buffer_ref_release(ref);
8399 	spd->partial[i].private = 0;
8400 }
8401 
8402 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8403 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8404 			    struct pipe_inode_info *pipe, size_t len,
8405 			    unsigned int flags)
8406 {
8407 	struct ftrace_buffer_info *info = file->private_data;
8408 	struct trace_iterator *iter = &info->iter;
8409 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8410 	struct page *pages_def[PIPE_DEF_BUFFERS];
8411 	struct splice_pipe_desc spd = {
8412 		.pages		= pages_def,
8413 		.partial	= partial_def,
8414 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8415 		.ops		= &buffer_pipe_buf_ops,
8416 		.spd_release	= buffer_spd_release,
8417 	};
8418 	struct buffer_ref *ref;
8419 	bool woken = false;
8420 	int page_size;
8421 	int entries, i;
8422 	ssize_t ret = 0;
8423 
8424 #ifdef CONFIG_TRACER_MAX_TRACE
8425 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8426 		return -EBUSY;
8427 #endif
8428 
8429 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8430 	if (*ppos & (page_size - 1))
8431 		return -EINVAL;
8432 
8433 	if (len & (page_size - 1)) {
8434 		if (len < page_size)
8435 			return -EINVAL;
8436 		len &= (~(page_size - 1));
8437 	}
8438 
8439 	if (splice_grow_spd(pipe, &spd))
8440 		return -ENOMEM;
8441 
8442  again:
8443 	trace_access_lock(iter->cpu_file);
8444 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8445 
8446 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8447 		struct page *page;
8448 		int r;
8449 
8450 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8451 		if (!ref) {
8452 			ret = -ENOMEM;
8453 			break;
8454 		}
8455 
8456 		refcount_set(&ref->refcount, 1);
8457 		ref->buffer = iter->array_buffer->buffer;
8458 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8459 		if (IS_ERR(ref->page)) {
8460 			ret = PTR_ERR(ref->page);
8461 			ref->page = NULL;
8462 			kfree(ref);
8463 			break;
8464 		}
8465 		ref->cpu = iter->cpu_file;
8466 
8467 		r = ring_buffer_read_page(ref->buffer, ref->page,
8468 					  len, iter->cpu_file, 1);
8469 		if (r < 0) {
8470 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8471 						   ref->page);
8472 			kfree(ref);
8473 			break;
8474 		}
8475 
8476 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8477 
8478 		spd.pages[i] = page;
8479 		spd.partial[i].len = page_size;
8480 		spd.partial[i].offset = 0;
8481 		spd.partial[i].private = (unsigned long)ref;
8482 		spd.nr_pages++;
8483 		*ppos += page_size;
8484 
8485 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8486 	}
8487 
8488 	trace_access_unlock(iter->cpu_file);
8489 	spd.nr_pages = i;
8490 
8491 	/* did we read anything? */
8492 	if (!spd.nr_pages) {
8493 
8494 		if (ret)
8495 			goto out;
8496 
8497 		if (woken)
8498 			goto out;
8499 
8500 		ret = -EAGAIN;
8501 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8502 			goto out;
8503 
8504 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8505 		if (ret)
8506 			goto out;
8507 
8508 		/* No need to wait after waking up when tracing is off */
8509 		if (!tracer_tracing_is_on(iter->tr))
8510 			goto out;
8511 
8512 		/* Iterate one more time to collect any new data then exit */
8513 		woken = true;
8514 
8515 		goto again;
8516 	}
8517 
8518 	ret = splice_to_pipe(pipe, &spd);
8519 out:
8520 	splice_shrink_spd(&spd);
8521 
8522 	return ret;
8523 }
8524 
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8525 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8526 {
8527 	struct ftrace_buffer_info *info = file->private_data;
8528 	struct trace_iterator *iter = &info->iter;
8529 	int err;
8530 
8531 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8532 		if (!(file->f_flags & O_NONBLOCK)) {
8533 			err = ring_buffer_wait(iter->array_buffer->buffer,
8534 					       iter->cpu_file,
8535 					       iter->tr->buffer_percent,
8536 					       NULL, NULL);
8537 			if (err)
8538 				return err;
8539 		}
8540 
8541 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8542 						  iter->cpu_file);
8543 	} else if (cmd) {
8544 		return -ENOTTY;
8545 	}
8546 
8547 	/*
8548 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8549 	 * waiters
8550 	 */
8551 	guard(mutex)(&trace_types_lock);
8552 
8553 	/* Make sure the waiters see the new wait_index */
8554 	(void)atomic_fetch_inc_release(&iter->wait_index);
8555 
8556 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8557 
8558 	return 0;
8559 }
8560 
8561 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8562 static int get_snapshot_map(struct trace_array *tr)
8563 {
8564 	int err = 0;
8565 
8566 	/*
8567 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8568 	 * take trace_types_lock. Instead use the specific
8569 	 * snapshot_trigger_lock.
8570 	 */
8571 	spin_lock(&tr->snapshot_trigger_lock);
8572 
8573 	if (tr->snapshot || tr->mapped == UINT_MAX)
8574 		err = -EBUSY;
8575 	else
8576 		tr->mapped++;
8577 
8578 	spin_unlock(&tr->snapshot_trigger_lock);
8579 
8580 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8581 	if (tr->mapped == 1)
8582 		synchronize_rcu();
8583 
8584 	return err;
8585 
8586 }
put_snapshot_map(struct trace_array * tr)8587 static void put_snapshot_map(struct trace_array *tr)
8588 {
8589 	spin_lock(&tr->snapshot_trigger_lock);
8590 	if (!WARN_ON(!tr->mapped))
8591 		tr->mapped--;
8592 	spin_unlock(&tr->snapshot_trigger_lock);
8593 }
8594 #else
get_snapshot_map(struct trace_array * tr)8595 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8596 static inline void put_snapshot_map(struct trace_array *tr) { }
8597 #endif
8598 
tracing_buffers_mmap_close(struct vm_area_struct * vma)8599 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8600 {
8601 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8602 	struct trace_iterator *iter = &info->iter;
8603 
8604 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8605 	put_snapshot_map(iter->tr);
8606 }
8607 
8608 static const struct vm_operations_struct tracing_buffers_vmops = {
8609 	.close		= tracing_buffers_mmap_close,
8610 };
8611 
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8612 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8613 {
8614 	struct ftrace_buffer_info *info = filp->private_data;
8615 	struct trace_iterator *iter = &info->iter;
8616 	int ret = 0;
8617 
8618 	/* A memmap'ed buffer is not supported for user space mmap */
8619 	if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8620 		return -ENODEV;
8621 
8622 	ret = get_snapshot_map(iter->tr);
8623 	if (ret)
8624 		return ret;
8625 
8626 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8627 	if (ret)
8628 		put_snapshot_map(iter->tr);
8629 
8630 	vma->vm_ops = &tracing_buffers_vmops;
8631 
8632 	return ret;
8633 }
8634 
8635 static const struct file_operations tracing_buffers_fops = {
8636 	.open		= tracing_buffers_open,
8637 	.read		= tracing_buffers_read,
8638 	.poll		= tracing_buffers_poll,
8639 	.release	= tracing_buffers_release,
8640 	.flush		= tracing_buffers_flush,
8641 	.splice_read	= tracing_buffers_splice_read,
8642 	.unlocked_ioctl = tracing_buffers_ioctl,
8643 	.mmap		= tracing_buffers_mmap,
8644 };
8645 
8646 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8647 tracing_stats_read(struct file *filp, char __user *ubuf,
8648 		   size_t count, loff_t *ppos)
8649 {
8650 	struct inode *inode = file_inode(filp);
8651 	struct trace_array *tr = inode->i_private;
8652 	struct array_buffer *trace_buf = &tr->array_buffer;
8653 	int cpu = tracing_get_cpu(inode);
8654 	struct trace_seq *s;
8655 	unsigned long cnt;
8656 	unsigned long long t;
8657 	unsigned long usec_rem;
8658 
8659 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8660 	if (!s)
8661 		return -ENOMEM;
8662 
8663 	trace_seq_init(s);
8664 
8665 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8666 	trace_seq_printf(s, "entries: %ld\n", cnt);
8667 
8668 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8669 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8670 
8671 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8672 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8673 
8674 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8675 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8676 
8677 	if (trace_clocks[tr->clock_id].in_ns) {
8678 		/* local or global for trace_clock */
8679 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8680 		usec_rem = do_div(t, USEC_PER_SEC);
8681 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8682 								t, usec_rem);
8683 
8684 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8685 		usec_rem = do_div(t, USEC_PER_SEC);
8686 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8687 	} else {
8688 		/* counter or tsc mode for trace_clock */
8689 		trace_seq_printf(s, "oldest event ts: %llu\n",
8690 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8691 
8692 		trace_seq_printf(s, "now ts: %llu\n",
8693 				ring_buffer_time_stamp(trace_buf->buffer));
8694 	}
8695 
8696 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8697 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8698 
8699 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8700 	trace_seq_printf(s, "read events: %ld\n", cnt);
8701 
8702 	count = simple_read_from_buffer(ubuf, count, ppos,
8703 					s->buffer, trace_seq_used(s));
8704 
8705 	kfree(s);
8706 
8707 	return count;
8708 }
8709 
8710 static const struct file_operations tracing_stats_fops = {
8711 	.open		= tracing_open_generic_tr,
8712 	.read		= tracing_stats_read,
8713 	.llseek		= generic_file_llseek,
8714 	.release	= tracing_release_generic_tr,
8715 };
8716 
8717 #ifdef CONFIG_DYNAMIC_FTRACE
8718 
8719 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8720 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8721 		  size_t cnt, loff_t *ppos)
8722 {
8723 	ssize_t ret;
8724 	char *buf;
8725 	int r;
8726 
8727 	/* 512 should be plenty to hold the amount needed */
8728 #define DYN_INFO_BUF_SIZE	512
8729 
8730 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8731 	if (!buf)
8732 		return -ENOMEM;
8733 
8734 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8735 		      "%ld pages:%ld groups: %ld\n"
8736 		      "ftrace boot update time = %llu (ns)\n"
8737 		      "ftrace module total update time = %llu (ns)\n",
8738 		      ftrace_update_tot_cnt,
8739 		      ftrace_number_of_pages,
8740 		      ftrace_number_of_groups,
8741 		      ftrace_update_time,
8742 		      ftrace_total_mod_time);
8743 
8744 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8745 	kfree(buf);
8746 	return ret;
8747 }
8748 
8749 static const struct file_operations tracing_dyn_info_fops = {
8750 	.open		= tracing_open_generic,
8751 	.read		= tracing_read_dyn_info,
8752 	.llseek		= generic_file_llseek,
8753 };
8754 #endif /* CONFIG_DYNAMIC_FTRACE */
8755 
8756 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8757 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8758 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8759 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8760 		void *data)
8761 {
8762 	tracing_snapshot_instance(tr);
8763 }
8764 
8765 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8766 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8767 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8768 		      void *data)
8769 {
8770 	struct ftrace_func_mapper *mapper = data;
8771 	long *count = NULL;
8772 
8773 	if (mapper)
8774 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8775 
8776 	if (count) {
8777 
8778 		if (*count <= 0)
8779 			return;
8780 
8781 		(*count)--;
8782 	}
8783 
8784 	tracing_snapshot_instance(tr);
8785 }
8786 
8787 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8788 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8789 		      struct ftrace_probe_ops *ops, void *data)
8790 {
8791 	struct ftrace_func_mapper *mapper = data;
8792 	long *count = NULL;
8793 
8794 	seq_printf(m, "%ps:", (void *)ip);
8795 
8796 	seq_puts(m, "snapshot");
8797 
8798 	if (mapper)
8799 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8800 
8801 	if (count)
8802 		seq_printf(m, ":count=%ld\n", *count);
8803 	else
8804 		seq_puts(m, ":unlimited\n");
8805 
8806 	return 0;
8807 }
8808 
8809 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8810 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8811 		     unsigned long ip, void *init_data, void **data)
8812 {
8813 	struct ftrace_func_mapper *mapper = *data;
8814 
8815 	if (!mapper) {
8816 		mapper = allocate_ftrace_func_mapper();
8817 		if (!mapper)
8818 			return -ENOMEM;
8819 		*data = mapper;
8820 	}
8821 
8822 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8823 }
8824 
8825 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8826 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8827 		     unsigned long ip, void *data)
8828 {
8829 	struct ftrace_func_mapper *mapper = data;
8830 
8831 	if (!ip) {
8832 		if (!mapper)
8833 			return;
8834 		free_ftrace_func_mapper(mapper, NULL);
8835 		return;
8836 	}
8837 
8838 	ftrace_func_mapper_remove_ip(mapper, ip);
8839 }
8840 
8841 static struct ftrace_probe_ops snapshot_probe_ops = {
8842 	.func			= ftrace_snapshot,
8843 	.print			= ftrace_snapshot_print,
8844 };
8845 
8846 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8847 	.func			= ftrace_count_snapshot,
8848 	.print			= ftrace_snapshot_print,
8849 	.init			= ftrace_snapshot_init,
8850 	.free			= ftrace_snapshot_free,
8851 };
8852 
8853 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8854 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8855 			       char *glob, char *cmd, char *param, int enable)
8856 {
8857 	struct ftrace_probe_ops *ops;
8858 	void *count = (void *)-1;
8859 	char *number;
8860 	int ret;
8861 
8862 	if (!tr)
8863 		return -ENODEV;
8864 
8865 	/* hash funcs only work with set_ftrace_filter */
8866 	if (!enable)
8867 		return -EINVAL;
8868 
8869 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8870 
8871 	if (glob[0] == '!') {
8872 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8873 		if (!ret)
8874 			tracing_disarm_snapshot(tr);
8875 
8876 		return ret;
8877 	}
8878 
8879 	if (!param)
8880 		goto out_reg;
8881 
8882 	number = strsep(&param, ":");
8883 
8884 	if (!strlen(number))
8885 		goto out_reg;
8886 
8887 	/*
8888 	 * We use the callback data field (which is a pointer)
8889 	 * as our counter.
8890 	 */
8891 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8892 	if (ret)
8893 		return ret;
8894 
8895  out_reg:
8896 	ret = tracing_arm_snapshot(tr);
8897 	if (ret < 0)
8898 		return ret;
8899 
8900 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8901 	if (ret < 0)
8902 		tracing_disarm_snapshot(tr);
8903 
8904 	return ret < 0 ? ret : 0;
8905 }
8906 
8907 static struct ftrace_func_command ftrace_snapshot_cmd = {
8908 	.name			= "snapshot",
8909 	.func			= ftrace_trace_snapshot_callback,
8910 };
8911 
register_snapshot_cmd(void)8912 static __init int register_snapshot_cmd(void)
8913 {
8914 	return register_ftrace_command(&ftrace_snapshot_cmd);
8915 }
8916 #else
register_snapshot_cmd(void)8917 static inline __init int register_snapshot_cmd(void) { return 0; }
8918 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8919 
tracing_get_dentry(struct trace_array * tr)8920 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8921 {
8922 	/* Top directory uses NULL as the parent */
8923 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8924 		return NULL;
8925 
8926 	if (WARN_ON(!tr->dir))
8927 		return ERR_PTR(-ENODEV);
8928 
8929 	/* All sub buffers have a descriptor */
8930 	return tr->dir;
8931 }
8932 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8933 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8934 {
8935 	struct dentry *d_tracer;
8936 
8937 	if (tr->percpu_dir)
8938 		return tr->percpu_dir;
8939 
8940 	d_tracer = tracing_get_dentry(tr);
8941 	if (IS_ERR(d_tracer))
8942 		return NULL;
8943 
8944 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8945 
8946 	MEM_FAIL(!tr->percpu_dir,
8947 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8948 
8949 	return tr->percpu_dir;
8950 }
8951 
8952 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8953 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8954 		      void *data, long cpu, const struct file_operations *fops)
8955 {
8956 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8957 
8958 	if (ret) /* See tracing_get_cpu() */
8959 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8960 	return ret;
8961 }
8962 
8963 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8964 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8965 {
8966 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8967 	struct dentry *d_cpu;
8968 	char cpu_dir[30]; /* 30 characters should be more than enough */
8969 
8970 	if (!d_percpu)
8971 		return;
8972 
8973 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8974 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8975 	if (!d_cpu) {
8976 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8977 		return;
8978 	}
8979 
8980 	/* per cpu trace_pipe */
8981 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8982 				tr, cpu, &tracing_pipe_fops);
8983 
8984 	/* per cpu trace */
8985 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8986 				tr, cpu, &tracing_fops);
8987 
8988 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8989 				tr, cpu, &tracing_buffers_fops);
8990 
8991 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8992 				tr, cpu, &tracing_stats_fops);
8993 
8994 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8995 				tr, cpu, &tracing_entries_fops);
8996 
8997 	if (tr->range_addr_start)
8998 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8999 				      tr, cpu, &tracing_buffer_meta_fops);
9000 #ifdef CONFIG_TRACER_SNAPSHOT
9001 	if (!tr->range_addr_start) {
9002 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9003 				      tr, cpu, &snapshot_fops);
9004 
9005 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9006 				      tr, cpu, &snapshot_raw_fops);
9007 	}
9008 #endif
9009 }
9010 
9011 #ifdef CONFIG_FTRACE_SELFTEST
9012 /* Let selftest have access to static functions in this file */
9013 #include "trace_selftest.c"
9014 #endif
9015 
9016 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9017 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9018 			loff_t *ppos)
9019 {
9020 	struct trace_option_dentry *topt = filp->private_data;
9021 	char *buf;
9022 
9023 	if (topt->flags->val & topt->opt->bit)
9024 		buf = "1\n";
9025 	else
9026 		buf = "0\n";
9027 
9028 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9029 }
9030 
9031 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9032 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9033 			 loff_t *ppos)
9034 {
9035 	struct trace_option_dentry *topt = filp->private_data;
9036 	unsigned long val;
9037 	int ret;
9038 
9039 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9040 	if (ret)
9041 		return ret;
9042 
9043 	if (val != 0 && val != 1)
9044 		return -EINVAL;
9045 
9046 	if (!!(topt->flags->val & topt->opt->bit) != val) {
9047 		guard(mutex)(&trace_types_lock);
9048 		ret = __set_tracer_option(topt->tr, topt->flags,
9049 					  topt->opt, !val);
9050 		if (ret)
9051 			return ret;
9052 	}
9053 
9054 	*ppos += cnt;
9055 
9056 	return cnt;
9057 }
9058 
tracing_open_options(struct inode * inode,struct file * filp)9059 static int tracing_open_options(struct inode *inode, struct file *filp)
9060 {
9061 	struct trace_option_dentry *topt = inode->i_private;
9062 	int ret;
9063 
9064 	ret = tracing_check_open_get_tr(topt->tr);
9065 	if (ret)
9066 		return ret;
9067 
9068 	filp->private_data = inode->i_private;
9069 	return 0;
9070 }
9071 
tracing_release_options(struct inode * inode,struct file * file)9072 static int tracing_release_options(struct inode *inode, struct file *file)
9073 {
9074 	struct trace_option_dentry *topt = file->private_data;
9075 
9076 	trace_array_put(topt->tr);
9077 	return 0;
9078 }
9079 
9080 static const struct file_operations trace_options_fops = {
9081 	.open = tracing_open_options,
9082 	.read = trace_options_read,
9083 	.write = trace_options_write,
9084 	.llseek	= generic_file_llseek,
9085 	.release = tracing_release_options,
9086 };
9087 
9088 /*
9089  * In order to pass in both the trace_array descriptor as well as the index
9090  * to the flag that the trace option file represents, the trace_array
9091  * has a character array of trace_flags_index[], which holds the index
9092  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9093  * The address of this character array is passed to the flag option file
9094  * read/write callbacks.
9095  *
9096  * In order to extract both the index and the trace_array descriptor,
9097  * get_tr_index() uses the following algorithm.
9098  *
9099  *   idx = *ptr;
9100  *
9101  * As the pointer itself contains the address of the index (remember
9102  * index[1] == 1).
9103  *
9104  * Then to get the trace_array descriptor, by subtracting that index
9105  * from the ptr, we get to the start of the index itself.
9106  *
9107  *   ptr - idx == &index[0]
9108  *
9109  * Then a simple container_of() from that pointer gets us to the
9110  * trace_array descriptor.
9111  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9112 static void get_tr_index(void *data, struct trace_array **ptr,
9113 			 unsigned int *pindex)
9114 {
9115 	*pindex = *(unsigned char *)data;
9116 
9117 	*ptr = container_of(data - *pindex, struct trace_array,
9118 			    trace_flags_index);
9119 }
9120 
9121 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9122 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9123 			loff_t *ppos)
9124 {
9125 	void *tr_index = filp->private_data;
9126 	struct trace_array *tr;
9127 	unsigned int index;
9128 	char *buf;
9129 
9130 	get_tr_index(tr_index, &tr, &index);
9131 
9132 	if (tr->trace_flags & (1 << index))
9133 		buf = "1\n";
9134 	else
9135 		buf = "0\n";
9136 
9137 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9138 }
9139 
9140 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9141 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9142 			 loff_t *ppos)
9143 {
9144 	void *tr_index = filp->private_data;
9145 	struct trace_array *tr;
9146 	unsigned int index;
9147 	unsigned long val;
9148 	int ret;
9149 
9150 	get_tr_index(tr_index, &tr, &index);
9151 
9152 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9153 	if (ret)
9154 		return ret;
9155 
9156 	if (val != 0 && val != 1)
9157 		return -EINVAL;
9158 
9159 	mutex_lock(&event_mutex);
9160 	mutex_lock(&trace_types_lock);
9161 	ret = set_tracer_flag(tr, 1 << index, val);
9162 	mutex_unlock(&trace_types_lock);
9163 	mutex_unlock(&event_mutex);
9164 
9165 	if (ret < 0)
9166 		return ret;
9167 
9168 	*ppos += cnt;
9169 
9170 	return cnt;
9171 }
9172 
9173 static const struct file_operations trace_options_core_fops = {
9174 	.open = tracing_open_generic,
9175 	.read = trace_options_core_read,
9176 	.write = trace_options_core_write,
9177 	.llseek = generic_file_llseek,
9178 };
9179 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9180 struct dentry *trace_create_file(const char *name,
9181 				 umode_t mode,
9182 				 struct dentry *parent,
9183 				 void *data,
9184 				 const struct file_operations *fops)
9185 {
9186 	struct dentry *ret;
9187 
9188 	ret = tracefs_create_file(name, mode, parent, data, fops);
9189 	if (!ret)
9190 		pr_warn("Could not create tracefs '%s' entry\n", name);
9191 
9192 	return ret;
9193 }
9194 
9195 
trace_options_init_dentry(struct trace_array * tr)9196 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9197 {
9198 	struct dentry *d_tracer;
9199 
9200 	if (tr->options)
9201 		return tr->options;
9202 
9203 	d_tracer = tracing_get_dentry(tr);
9204 	if (IS_ERR(d_tracer))
9205 		return NULL;
9206 
9207 	tr->options = tracefs_create_dir("options", d_tracer);
9208 	if (!tr->options) {
9209 		pr_warn("Could not create tracefs directory 'options'\n");
9210 		return NULL;
9211 	}
9212 
9213 	return tr->options;
9214 }
9215 
9216 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9217 create_trace_option_file(struct trace_array *tr,
9218 			 struct trace_option_dentry *topt,
9219 			 struct tracer_flags *flags,
9220 			 struct tracer_opt *opt)
9221 {
9222 	struct dentry *t_options;
9223 
9224 	t_options = trace_options_init_dentry(tr);
9225 	if (!t_options)
9226 		return;
9227 
9228 	topt->flags = flags;
9229 	topt->opt = opt;
9230 	topt->tr = tr;
9231 
9232 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9233 					t_options, topt, &trace_options_fops);
9234 
9235 }
9236 
9237 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9238 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9239 {
9240 	struct trace_option_dentry *topts;
9241 	struct trace_options *tr_topts;
9242 	struct tracer_flags *flags;
9243 	struct tracer_opt *opts;
9244 	int cnt;
9245 	int i;
9246 
9247 	if (!tracer)
9248 		return;
9249 
9250 	flags = tracer->flags;
9251 
9252 	if (!flags || !flags->opts)
9253 		return;
9254 
9255 	/*
9256 	 * If this is an instance, only create flags for tracers
9257 	 * the instance may have.
9258 	 */
9259 	if (!trace_ok_for_array(tracer, tr))
9260 		return;
9261 
9262 	for (i = 0; i < tr->nr_topts; i++) {
9263 		/* Make sure there's no duplicate flags. */
9264 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9265 			return;
9266 	}
9267 
9268 	opts = flags->opts;
9269 
9270 	for (cnt = 0; opts[cnt].name; cnt++)
9271 		;
9272 
9273 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9274 	if (!topts)
9275 		return;
9276 
9277 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9278 			    GFP_KERNEL);
9279 	if (!tr_topts) {
9280 		kfree(topts);
9281 		return;
9282 	}
9283 
9284 	tr->topts = tr_topts;
9285 	tr->topts[tr->nr_topts].tracer = tracer;
9286 	tr->topts[tr->nr_topts].topts = topts;
9287 	tr->nr_topts++;
9288 
9289 	for (cnt = 0; opts[cnt].name; cnt++) {
9290 		create_trace_option_file(tr, &topts[cnt], flags,
9291 					 &opts[cnt]);
9292 		MEM_FAIL(topts[cnt].entry == NULL,
9293 			  "Failed to create trace option: %s",
9294 			  opts[cnt].name);
9295 	}
9296 }
9297 
9298 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9299 create_trace_option_core_file(struct trace_array *tr,
9300 			      const char *option, long index)
9301 {
9302 	struct dentry *t_options;
9303 
9304 	t_options = trace_options_init_dentry(tr);
9305 	if (!t_options)
9306 		return NULL;
9307 
9308 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9309 				 (void *)&tr->trace_flags_index[index],
9310 				 &trace_options_core_fops);
9311 }
9312 
create_trace_options_dir(struct trace_array * tr)9313 static void create_trace_options_dir(struct trace_array *tr)
9314 {
9315 	struct dentry *t_options;
9316 	bool top_level = tr == &global_trace;
9317 	int i;
9318 
9319 	t_options = trace_options_init_dentry(tr);
9320 	if (!t_options)
9321 		return;
9322 
9323 	for (i = 0; trace_options[i]; i++) {
9324 		if (top_level ||
9325 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9326 			create_trace_option_core_file(tr, trace_options[i], i);
9327 	}
9328 }
9329 
9330 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9331 rb_simple_read(struct file *filp, char __user *ubuf,
9332 	       size_t cnt, loff_t *ppos)
9333 {
9334 	struct trace_array *tr = filp->private_data;
9335 	char buf[64];
9336 	int r;
9337 
9338 	r = tracer_tracing_is_on(tr);
9339 	r = sprintf(buf, "%d\n", r);
9340 
9341 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9342 }
9343 
9344 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9345 rb_simple_write(struct file *filp, const char __user *ubuf,
9346 		size_t cnt, loff_t *ppos)
9347 {
9348 	struct trace_array *tr = filp->private_data;
9349 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9350 	unsigned long val;
9351 	int ret;
9352 
9353 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9354 	if (ret)
9355 		return ret;
9356 
9357 	if (buffer) {
9358 		guard(mutex)(&trace_types_lock);
9359 		if (!!val == tracer_tracing_is_on(tr)) {
9360 			val = 0; /* do nothing */
9361 		} else if (val) {
9362 			tracer_tracing_on(tr);
9363 			if (tr->current_trace->start)
9364 				tr->current_trace->start(tr);
9365 		} else {
9366 			tracer_tracing_off(tr);
9367 			if (tr->current_trace->stop)
9368 				tr->current_trace->stop(tr);
9369 			/* Wake up any waiters */
9370 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9371 		}
9372 	}
9373 
9374 	(*ppos)++;
9375 
9376 	return cnt;
9377 }
9378 
9379 static const struct file_operations rb_simple_fops = {
9380 	.open		= tracing_open_generic_tr,
9381 	.read		= rb_simple_read,
9382 	.write		= rb_simple_write,
9383 	.release	= tracing_release_generic_tr,
9384 	.llseek		= default_llseek,
9385 };
9386 
9387 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9388 buffer_percent_read(struct file *filp, char __user *ubuf,
9389 		    size_t cnt, loff_t *ppos)
9390 {
9391 	struct trace_array *tr = filp->private_data;
9392 	char buf[64];
9393 	int r;
9394 
9395 	r = tr->buffer_percent;
9396 	r = sprintf(buf, "%d\n", r);
9397 
9398 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9399 }
9400 
9401 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9402 buffer_percent_write(struct file *filp, const char __user *ubuf,
9403 		     size_t cnt, loff_t *ppos)
9404 {
9405 	struct trace_array *tr = filp->private_data;
9406 	unsigned long val;
9407 	int ret;
9408 
9409 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9410 	if (ret)
9411 		return ret;
9412 
9413 	if (val > 100)
9414 		return -EINVAL;
9415 
9416 	tr->buffer_percent = val;
9417 
9418 	(*ppos)++;
9419 
9420 	return cnt;
9421 }
9422 
9423 static const struct file_operations buffer_percent_fops = {
9424 	.open		= tracing_open_generic_tr,
9425 	.read		= buffer_percent_read,
9426 	.write		= buffer_percent_write,
9427 	.release	= tracing_release_generic_tr,
9428 	.llseek		= default_llseek,
9429 };
9430 
9431 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9432 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9433 {
9434 	struct trace_array *tr = filp->private_data;
9435 	size_t size;
9436 	char buf[64];
9437 	int order;
9438 	int r;
9439 
9440 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9441 	size = (PAGE_SIZE << order) / 1024;
9442 
9443 	r = sprintf(buf, "%zd\n", size);
9444 
9445 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9446 }
9447 
9448 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9449 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9450 			 size_t cnt, loff_t *ppos)
9451 {
9452 	struct trace_array *tr = filp->private_data;
9453 	unsigned long val;
9454 	int old_order;
9455 	int order;
9456 	int pages;
9457 	int ret;
9458 
9459 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9460 	if (ret)
9461 		return ret;
9462 
9463 	val *= 1024; /* value passed in is in KB */
9464 
9465 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9466 	order = fls(pages - 1);
9467 
9468 	/* limit between 1 and 128 system pages */
9469 	if (order < 0 || order > 7)
9470 		return -EINVAL;
9471 
9472 	/* Do not allow tracing while changing the order of the ring buffer */
9473 	tracing_stop_tr(tr);
9474 
9475 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9476 	if (old_order == order)
9477 		goto out;
9478 
9479 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9480 	if (ret)
9481 		goto out;
9482 
9483 #ifdef CONFIG_TRACER_MAX_TRACE
9484 
9485 	if (!tr->allocated_snapshot)
9486 		goto out_max;
9487 
9488 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9489 	if (ret) {
9490 		/* Put back the old order */
9491 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9492 		if (WARN_ON_ONCE(cnt)) {
9493 			/*
9494 			 * AARGH! We are left with different orders!
9495 			 * The max buffer is our "snapshot" buffer.
9496 			 * When a tracer needs a snapshot (one of the
9497 			 * latency tracers), it swaps the max buffer
9498 			 * with the saved snap shot. We succeeded to
9499 			 * update the order of the main buffer, but failed to
9500 			 * update the order of the max buffer. But when we tried
9501 			 * to reset the main buffer to the original size, we
9502 			 * failed there too. This is very unlikely to
9503 			 * happen, but if it does, warn and kill all
9504 			 * tracing.
9505 			 */
9506 			tracing_disabled = 1;
9507 		}
9508 		goto out;
9509 	}
9510  out_max:
9511 #endif
9512 	(*ppos)++;
9513  out:
9514 	if (ret)
9515 		cnt = ret;
9516 	tracing_start_tr(tr);
9517 	return cnt;
9518 }
9519 
9520 static const struct file_operations buffer_subbuf_size_fops = {
9521 	.open		= tracing_open_generic_tr,
9522 	.read		= buffer_subbuf_size_read,
9523 	.write		= buffer_subbuf_size_write,
9524 	.release	= tracing_release_generic_tr,
9525 	.llseek		= default_llseek,
9526 };
9527 
9528 static struct dentry *trace_instance_dir;
9529 
9530 static void
9531 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9532 
9533 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)9534 static int make_mod_delta(struct module *mod, void *data)
9535 {
9536 	struct trace_module_delta *module_delta;
9537 	struct trace_scratch *tscratch;
9538 	struct trace_mod_entry *entry;
9539 	struct trace_array *tr = data;
9540 	int i;
9541 
9542 	tscratch = tr->scratch;
9543 	module_delta = READ_ONCE(tr->module_delta);
9544 	for (i = 0; i < tscratch->nr_entries; i++) {
9545 		entry = &tscratch->entries[i];
9546 		if (strcmp(mod->name, entry->mod_name))
9547 			continue;
9548 		if (mod->state == MODULE_STATE_GOING)
9549 			module_delta->delta[i] = 0;
9550 		else
9551 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9552 						 - entry->mod_addr;
9553 		break;
9554 	}
9555 	return 0;
9556 }
9557 #else
make_mod_delta(struct module * mod,void * data)9558 static int make_mod_delta(struct module *mod, void *data)
9559 {
9560 	return 0;
9561 }
9562 #endif
9563 
mod_addr_comp(const void * a,const void * b,const void * data)9564 static int mod_addr_comp(const void *a, const void *b, const void *data)
9565 {
9566 	const struct trace_mod_entry *e1 = a;
9567 	const struct trace_mod_entry *e2 = b;
9568 
9569 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9570 }
9571 
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)9572 static void setup_trace_scratch(struct trace_array *tr,
9573 				struct trace_scratch *tscratch, unsigned int size)
9574 {
9575 	struct trace_module_delta *module_delta;
9576 	struct trace_mod_entry *entry;
9577 	int i, nr_entries;
9578 
9579 	if (!tscratch)
9580 		return;
9581 
9582 	tr->scratch = tscratch;
9583 	tr->scratch_size = size;
9584 
9585 	if (tscratch->text_addr)
9586 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9587 
9588 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9589 		goto reset;
9590 
9591 	/* Check if each module name is a valid string */
9592 	for (i = 0; i < tscratch->nr_entries; i++) {
9593 		int n;
9594 
9595 		entry = &tscratch->entries[i];
9596 
9597 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9598 			if (entry->mod_name[n] == '\0')
9599 				break;
9600 			if (!isprint(entry->mod_name[n]))
9601 				goto reset;
9602 		}
9603 		if (n == MODULE_NAME_LEN)
9604 			goto reset;
9605 	}
9606 
9607 	/* Sort the entries so that we can find appropriate module from address. */
9608 	nr_entries = tscratch->nr_entries;
9609 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9610 	       mod_addr_comp, NULL, NULL);
9611 
9612 	if (IS_ENABLED(CONFIG_MODULES)) {
9613 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9614 		if (!module_delta) {
9615 			pr_info("module_delta allocation failed. Not able to decode module address.");
9616 			goto reset;
9617 		}
9618 		init_rcu_head(&module_delta->rcu);
9619 	} else
9620 		module_delta = NULL;
9621 	WRITE_ONCE(tr->module_delta, module_delta);
9622 
9623 	/* Scan modules to make text delta for modules. */
9624 	module_for_each_mod(make_mod_delta, tr);
9625 
9626 	/* Set trace_clock as the same of the previous boot. */
9627 	if (tscratch->clock_id != tr->clock_id) {
9628 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9629 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9630 			pr_info("the previous trace_clock info is not valid.");
9631 			goto reset;
9632 		}
9633 	}
9634 	return;
9635  reset:
9636 	/* Invalid trace modules */
9637 	memset(tscratch, 0, size);
9638 }
9639 
9640 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9641 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9642 {
9643 	enum ring_buffer_flags rb_flags;
9644 	struct trace_scratch *tscratch;
9645 	unsigned int scratch_size = 0;
9646 
9647 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9648 
9649 	buf->tr = tr;
9650 
9651 	if (tr->range_addr_start && tr->range_addr_size) {
9652 		/* Add scratch buffer to handle 128 modules */
9653 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9654 						      tr->range_addr_start,
9655 						      tr->range_addr_size,
9656 						      struct_size(tscratch, entries, 128));
9657 
9658 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9659 		setup_trace_scratch(tr, tscratch, scratch_size);
9660 
9661 		/*
9662 		 * This is basically the same as a mapped buffer,
9663 		 * with the same restrictions.
9664 		 */
9665 		tr->mapped++;
9666 	} else {
9667 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9668 	}
9669 	if (!buf->buffer)
9670 		return -ENOMEM;
9671 
9672 	buf->data = alloc_percpu(struct trace_array_cpu);
9673 	if (!buf->data) {
9674 		ring_buffer_free(buf->buffer);
9675 		buf->buffer = NULL;
9676 		return -ENOMEM;
9677 	}
9678 
9679 	/* Allocate the first page for all buffers */
9680 	set_buffer_entries(&tr->array_buffer,
9681 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9682 
9683 	return 0;
9684 }
9685 
free_trace_buffer(struct array_buffer * buf)9686 static void free_trace_buffer(struct array_buffer *buf)
9687 {
9688 	if (buf->buffer) {
9689 		ring_buffer_free(buf->buffer);
9690 		buf->buffer = NULL;
9691 		free_percpu(buf->data);
9692 		buf->data = NULL;
9693 	}
9694 }
9695 
allocate_trace_buffers(struct trace_array * tr,int size)9696 static int allocate_trace_buffers(struct trace_array *tr, int size)
9697 {
9698 	int ret;
9699 
9700 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9701 	if (ret)
9702 		return ret;
9703 
9704 #ifdef CONFIG_TRACER_MAX_TRACE
9705 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9706 	if (tr->range_addr_start)
9707 		return 0;
9708 
9709 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9710 				    allocate_snapshot ? size : 1);
9711 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9712 		free_trace_buffer(&tr->array_buffer);
9713 		return -ENOMEM;
9714 	}
9715 	tr->allocated_snapshot = allocate_snapshot;
9716 
9717 	allocate_snapshot = false;
9718 #endif
9719 
9720 	return 0;
9721 }
9722 
free_trace_buffers(struct trace_array * tr)9723 static void free_trace_buffers(struct trace_array *tr)
9724 {
9725 	if (!tr)
9726 		return;
9727 
9728 	free_trace_buffer(&tr->array_buffer);
9729 	kfree(tr->module_delta);
9730 
9731 #ifdef CONFIG_TRACER_MAX_TRACE
9732 	free_trace_buffer(&tr->max_buffer);
9733 #endif
9734 }
9735 
init_trace_flags_index(struct trace_array * tr)9736 static void init_trace_flags_index(struct trace_array *tr)
9737 {
9738 	int i;
9739 
9740 	/* Used by the trace options files */
9741 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9742 		tr->trace_flags_index[i] = i;
9743 }
9744 
__update_tracer_options(struct trace_array * tr)9745 static void __update_tracer_options(struct trace_array *tr)
9746 {
9747 	struct tracer *t;
9748 
9749 	for (t = trace_types; t; t = t->next)
9750 		add_tracer_options(tr, t);
9751 }
9752 
update_tracer_options(struct trace_array * tr)9753 static void update_tracer_options(struct trace_array *tr)
9754 {
9755 	guard(mutex)(&trace_types_lock);
9756 	tracer_options_updated = true;
9757 	__update_tracer_options(tr);
9758 }
9759 
9760 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9761 struct trace_array *trace_array_find(const char *instance)
9762 {
9763 	struct trace_array *tr, *found = NULL;
9764 
9765 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9766 		if (tr->name && strcmp(tr->name, instance) == 0) {
9767 			found = tr;
9768 			break;
9769 		}
9770 	}
9771 
9772 	return found;
9773 }
9774 
trace_array_find_get(const char * instance)9775 struct trace_array *trace_array_find_get(const char *instance)
9776 {
9777 	struct trace_array *tr;
9778 
9779 	guard(mutex)(&trace_types_lock);
9780 	tr = trace_array_find(instance);
9781 	if (tr)
9782 		tr->ref++;
9783 
9784 	return tr;
9785 }
9786 
trace_array_create_dir(struct trace_array * tr)9787 static int trace_array_create_dir(struct trace_array *tr)
9788 {
9789 	int ret;
9790 
9791 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9792 	if (!tr->dir)
9793 		return -EINVAL;
9794 
9795 	ret = event_trace_add_tracer(tr->dir, tr);
9796 	if (ret) {
9797 		tracefs_remove(tr->dir);
9798 		return ret;
9799 	}
9800 
9801 	init_tracer_tracefs(tr, tr->dir);
9802 	__update_tracer_options(tr);
9803 
9804 	return ret;
9805 }
9806 
9807 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9808 trace_array_create_systems(const char *name, const char *systems,
9809 			   unsigned long range_addr_start,
9810 			   unsigned long range_addr_size)
9811 {
9812 	struct trace_array *tr;
9813 	int ret;
9814 
9815 	ret = -ENOMEM;
9816 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9817 	if (!tr)
9818 		return ERR_PTR(ret);
9819 
9820 	tr->name = kstrdup(name, GFP_KERNEL);
9821 	if (!tr->name)
9822 		goto out_free_tr;
9823 
9824 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9825 		goto out_free_tr;
9826 
9827 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9828 		goto out_free_tr;
9829 
9830 	if (systems) {
9831 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9832 		if (!tr->system_names)
9833 			goto out_free_tr;
9834 	}
9835 
9836 	/* Only for boot up memory mapped ring buffers */
9837 	tr->range_addr_start = range_addr_start;
9838 	tr->range_addr_size = range_addr_size;
9839 
9840 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9841 
9842 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9843 
9844 	raw_spin_lock_init(&tr->start_lock);
9845 
9846 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9847 #ifdef CONFIG_TRACER_MAX_TRACE
9848 	spin_lock_init(&tr->snapshot_trigger_lock);
9849 #endif
9850 	tr->current_trace = &nop_trace;
9851 
9852 	INIT_LIST_HEAD(&tr->systems);
9853 	INIT_LIST_HEAD(&tr->events);
9854 	INIT_LIST_HEAD(&tr->hist_vars);
9855 	INIT_LIST_HEAD(&tr->err_log);
9856 	INIT_LIST_HEAD(&tr->marker_list);
9857 
9858 #ifdef CONFIG_MODULES
9859 	INIT_LIST_HEAD(&tr->mod_events);
9860 #endif
9861 
9862 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9863 		goto out_free_tr;
9864 
9865 	/* The ring buffer is defaultly expanded */
9866 	trace_set_ring_buffer_expanded(tr);
9867 
9868 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9869 		goto out_free_tr;
9870 
9871 	ftrace_init_trace_array(tr);
9872 
9873 	init_trace_flags_index(tr);
9874 
9875 	if (trace_instance_dir) {
9876 		ret = trace_array_create_dir(tr);
9877 		if (ret)
9878 			goto out_free_tr;
9879 	} else
9880 		__trace_early_add_events(tr);
9881 
9882 	list_add(&tr->list, &ftrace_trace_arrays);
9883 
9884 	tr->ref++;
9885 
9886 	return tr;
9887 
9888  out_free_tr:
9889 	ftrace_free_ftrace_ops(tr);
9890 	free_trace_buffers(tr);
9891 	free_cpumask_var(tr->pipe_cpumask);
9892 	free_cpumask_var(tr->tracing_cpumask);
9893 	kfree_const(tr->system_names);
9894 	kfree(tr->range_name);
9895 	kfree(tr->name);
9896 	kfree(tr);
9897 
9898 	return ERR_PTR(ret);
9899 }
9900 
trace_array_create(const char * name)9901 static struct trace_array *trace_array_create(const char *name)
9902 {
9903 	return trace_array_create_systems(name, NULL, 0, 0);
9904 }
9905 
instance_mkdir(const char * name)9906 static int instance_mkdir(const char *name)
9907 {
9908 	struct trace_array *tr;
9909 	int ret;
9910 
9911 	guard(mutex)(&event_mutex);
9912 	guard(mutex)(&trace_types_lock);
9913 
9914 	ret = -EEXIST;
9915 	if (trace_array_find(name))
9916 		return -EEXIST;
9917 
9918 	tr = trace_array_create(name);
9919 
9920 	ret = PTR_ERR_OR_ZERO(tr);
9921 
9922 	return ret;
9923 }
9924 
9925 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)9926 static u64 map_pages(unsigned long start, unsigned long size)
9927 {
9928 	unsigned long vmap_start, vmap_end;
9929 	struct vm_struct *area;
9930 	int ret;
9931 
9932 	area = get_vm_area(size, VM_IOREMAP);
9933 	if (!area)
9934 		return 0;
9935 
9936 	vmap_start = (unsigned long) area->addr;
9937 	vmap_end = vmap_start + size;
9938 
9939 	ret = vmap_page_range(vmap_start, vmap_end,
9940 			      start, pgprot_nx(PAGE_KERNEL));
9941 	if (ret < 0) {
9942 		free_vm_area(area);
9943 		return 0;
9944 	}
9945 
9946 	return (u64)vmap_start;
9947 }
9948 #else
map_pages(unsigned long start,unsigned long size)9949 static inline u64 map_pages(unsigned long start, unsigned long size)
9950 {
9951 	return 0;
9952 }
9953 #endif
9954 
9955 /**
9956  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9957  * @name: The name of the trace array to be looked up/created.
9958  * @systems: A list of systems to create event directories for (NULL for all)
9959  *
9960  * Returns pointer to trace array with given name.
9961  * NULL, if it cannot be created.
9962  *
9963  * NOTE: This function increments the reference counter associated with the
9964  * trace array returned. This makes sure it cannot be freed while in use.
9965  * Use trace_array_put() once the trace array is no longer needed.
9966  * If the trace_array is to be freed, trace_array_destroy() needs to
9967  * be called after the trace_array_put(), or simply let user space delete
9968  * it from the tracefs instances directory. But until the
9969  * trace_array_put() is called, user space can not delete it.
9970  *
9971  */
trace_array_get_by_name(const char * name,const char * systems)9972 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9973 {
9974 	struct trace_array *tr;
9975 
9976 	guard(mutex)(&event_mutex);
9977 	guard(mutex)(&trace_types_lock);
9978 
9979 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9980 		if (tr->name && strcmp(tr->name, name) == 0) {
9981 			tr->ref++;
9982 			return tr;
9983 		}
9984 	}
9985 
9986 	tr = trace_array_create_systems(name, systems, 0, 0);
9987 
9988 	if (IS_ERR(tr))
9989 		tr = NULL;
9990 	else
9991 		tr->ref++;
9992 
9993 	return tr;
9994 }
9995 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9996 
__remove_instance(struct trace_array * tr)9997 static int __remove_instance(struct trace_array *tr)
9998 {
9999 	int i;
10000 
10001 	/* Reference counter for a newly created trace array = 1. */
10002 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10003 		return -EBUSY;
10004 
10005 	list_del(&tr->list);
10006 
10007 	/* Disable all the flags that were enabled coming in */
10008 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10009 		if ((1 << i) & ZEROED_TRACE_FLAGS)
10010 			set_tracer_flag(tr, 1 << i, 0);
10011 	}
10012 
10013 	if (printk_trace == tr)
10014 		update_printk_trace(&global_trace);
10015 
10016 	if (update_marker_trace(tr, 0))
10017 		synchronize_rcu();
10018 
10019 	tracing_set_nop(tr);
10020 	clear_ftrace_function_probes(tr);
10021 	event_trace_del_tracer(tr);
10022 	ftrace_clear_pids(tr);
10023 	ftrace_destroy_function_files(tr);
10024 	tracefs_remove(tr->dir);
10025 	free_percpu(tr->last_func_repeats);
10026 	free_trace_buffers(tr);
10027 	clear_tracing_err_log(tr);
10028 
10029 	if (tr->range_name) {
10030 		reserve_mem_release_by_name(tr->range_name);
10031 		kfree(tr->range_name);
10032 	}
10033 
10034 	for (i = 0; i < tr->nr_topts; i++) {
10035 		kfree(tr->topts[i].topts);
10036 	}
10037 	kfree(tr->topts);
10038 
10039 	free_cpumask_var(tr->pipe_cpumask);
10040 	free_cpumask_var(tr->tracing_cpumask);
10041 	kfree_const(tr->system_names);
10042 	kfree(tr->name);
10043 	kfree(tr);
10044 
10045 	return 0;
10046 }
10047 
trace_array_destroy(struct trace_array * this_tr)10048 int trace_array_destroy(struct trace_array *this_tr)
10049 {
10050 	struct trace_array *tr;
10051 
10052 	if (!this_tr)
10053 		return -EINVAL;
10054 
10055 	guard(mutex)(&event_mutex);
10056 	guard(mutex)(&trace_types_lock);
10057 
10058 
10059 	/* Making sure trace array exists before destroying it. */
10060 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10061 		if (tr == this_tr)
10062 			return __remove_instance(tr);
10063 	}
10064 
10065 	return -ENODEV;
10066 }
10067 EXPORT_SYMBOL_GPL(trace_array_destroy);
10068 
instance_rmdir(const char * name)10069 static int instance_rmdir(const char *name)
10070 {
10071 	struct trace_array *tr;
10072 
10073 	guard(mutex)(&event_mutex);
10074 	guard(mutex)(&trace_types_lock);
10075 
10076 	tr = trace_array_find(name);
10077 	if (!tr)
10078 		return -ENODEV;
10079 
10080 	return __remove_instance(tr);
10081 }
10082 
create_trace_instances(struct dentry * d_tracer)10083 static __init void create_trace_instances(struct dentry *d_tracer)
10084 {
10085 	struct trace_array *tr;
10086 
10087 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10088 							 instance_mkdir,
10089 							 instance_rmdir);
10090 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10091 		return;
10092 
10093 	guard(mutex)(&event_mutex);
10094 	guard(mutex)(&trace_types_lock);
10095 
10096 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10097 		if (!tr->name)
10098 			continue;
10099 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10100 			     "Failed to create instance directory\n"))
10101 			return;
10102 	}
10103 }
10104 
10105 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)10106 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10107 {
10108 	int cpu;
10109 
10110 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10111 			tr, &show_traces_fops);
10112 
10113 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10114 			tr, &set_tracer_fops);
10115 
10116 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10117 			  tr, &tracing_cpumask_fops);
10118 
10119 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10120 			  tr, &tracing_iter_fops);
10121 
10122 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10123 			  tr, &tracing_fops);
10124 
10125 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10126 			  tr, &tracing_pipe_fops);
10127 
10128 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10129 			  tr, &tracing_entries_fops);
10130 
10131 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10132 			  tr, &tracing_total_entries_fops);
10133 
10134 	trace_create_file("free_buffer", 0200, d_tracer,
10135 			  tr, &tracing_free_buffer_fops);
10136 
10137 	trace_create_file("trace_marker", 0220, d_tracer,
10138 			  tr, &tracing_mark_fops);
10139 
10140 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10141 
10142 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10143 			  tr, &tracing_mark_raw_fops);
10144 
10145 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10146 			  &trace_clock_fops);
10147 
10148 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10149 			  tr, &rb_simple_fops);
10150 
10151 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10152 			  &trace_time_stamp_mode_fops);
10153 
10154 	tr->buffer_percent = 50;
10155 
10156 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10157 			tr, &buffer_percent_fops);
10158 
10159 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10160 			  tr, &buffer_subbuf_size_fops);
10161 
10162 	create_trace_options_dir(tr);
10163 
10164 #ifdef CONFIG_TRACER_MAX_TRACE
10165 	trace_create_maxlat_file(tr, d_tracer);
10166 #endif
10167 
10168 	if (ftrace_create_function_files(tr, d_tracer))
10169 		MEM_FAIL(1, "Could not allocate function filter files");
10170 
10171 	if (tr->range_addr_start) {
10172 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10173 				  tr, &last_boot_fops);
10174 #ifdef CONFIG_TRACER_SNAPSHOT
10175 	} else {
10176 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10177 				  tr, &snapshot_fops);
10178 #endif
10179 	}
10180 
10181 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10182 			  tr, &tracing_err_log_fops);
10183 
10184 	for_each_tracing_cpu(cpu)
10185 		tracing_init_tracefs_percpu(tr, cpu);
10186 
10187 	ftrace_init_tracefs(tr, d_tracer);
10188 }
10189 
10190 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10191 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10192 {
10193 	struct vfsmount *mnt;
10194 	struct file_system_type *type;
10195 	struct fs_context *fc;
10196 	int ret;
10197 
10198 	/*
10199 	 * To maintain backward compatibility for tools that mount
10200 	 * debugfs to get to the tracing facility, tracefs is automatically
10201 	 * mounted to the debugfs/tracing directory.
10202 	 */
10203 	type = get_fs_type("tracefs");
10204 	if (!type)
10205 		return NULL;
10206 
10207 	fc = fs_context_for_submount(type, mntpt);
10208 	put_filesystem(type);
10209 	if (IS_ERR(fc))
10210 		return ERR_CAST(fc);
10211 
10212 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10213 
10214 	ret = vfs_parse_fs_string(fc, "source",
10215 				  "tracefs", strlen("tracefs"));
10216 	if (!ret)
10217 		mnt = fc_mount(fc);
10218 	else
10219 		mnt = ERR_PTR(ret);
10220 
10221 	put_fs_context(fc);
10222 	return mnt;
10223 }
10224 #endif
10225 
10226 /**
10227  * tracing_init_dentry - initialize top level trace array
10228  *
10229  * This is called when creating files or directories in the tracing
10230  * directory. It is called via fs_initcall() by any of the boot up code
10231  * and expects to return the dentry of the top level tracing directory.
10232  */
tracing_init_dentry(void)10233 int tracing_init_dentry(void)
10234 {
10235 	struct trace_array *tr = &global_trace;
10236 
10237 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10238 		pr_warn("Tracing disabled due to lockdown\n");
10239 		return -EPERM;
10240 	}
10241 
10242 	/* The top level trace array uses  NULL as parent */
10243 	if (tr->dir)
10244 		return 0;
10245 
10246 	if (WARN_ON(!tracefs_initialized()))
10247 		return -ENODEV;
10248 
10249 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10250 	/*
10251 	 * As there may still be users that expect the tracing
10252 	 * files to exist in debugfs/tracing, we must automount
10253 	 * the tracefs file system there, so older tools still
10254 	 * work with the newer kernel.
10255 	 */
10256 	tr->dir = debugfs_create_automount("tracing", NULL,
10257 					   trace_automount, NULL);
10258 #endif
10259 
10260 	return 0;
10261 }
10262 
10263 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10264 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10265 
10266 static struct workqueue_struct *eval_map_wq __initdata;
10267 static struct work_struct eval_map_work __initdata;
10268 static struct work_struct tracerfs_init_work __initdata;
10269 
eval_map_work_func(struct work_struct * work)10270 static void __init eval_map_work_func(struct work_struct *work)
10271 {
10272 	int len;
10273 
10274 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10275 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10276 }
10277 
trace_eval_init(void)10278 static int __init trace_eval_init(void)
10279 {
10280 	INIT_WORK(&eval_map_work, eval_map_work_func);
10281 
10282 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10283 	if (!eval_map_wq) {
10284 		pr_err("Unable to allocate eval_map_wq\n");
10285 		/* Do work here */
10286 		eval_map_work_func(&eval_map_work);
10287 		return -ENOMEM;
10288 	}
10289 
10290 	queue_work(eval_map_wq, &eval_map_work);
10291 	return 0;
10292 }
10293 
10294 subsys_initcall(trace_eval_init);
10295 
trace_eval_sync(void)10296 static int __init trace_eval_sync(void)
10297 {
10298 	/* Make sure the eval map updates are finished */
10299 	if (eval_map_wq)
10300 		destroy_workqueue(eval_map_wq);
10301 	return 0;
10302 }
10303 
10304 late_initcall_sync(trace_eval_sync);
10305 
10306 
10307 #ifdef CONFIG_MODULES
10308 
module_exists(const char * module)10309 bool module_exists(const char *module)
10310 {
10311 	/* All modules have the symbol __this_module */
10312 	static const char this_mod[] = "__this_module";
10313 	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10314 	unsigned long val;
10315 	int n;
10316 
10317 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10318 
10319 	if (n > sizeof(modname) - 1)
10320 		return false;
10321 
10322 	val = module_kallsyms_lookup_name(modname);
10323 	return val != 0;
10324 }
10325 
trace_module_add_evals(struct module * mod)10326 static void trace_module_add_evals(struct module *mod)
10327 {
10328 	/*
10329 	 * Modules with bad taint do not have events created, do
10330 	 * not bother with enums either.
10331 	 */
10332 	if (trace_module_has_bad_taint(mod))
10333 		return;
10334 
10335 	/* Even if no trace_evals, this need to sanitize field types. */
10336 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10337 }
10338 
10339 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10340 static void trace_module_remove_evals(struct module *mod)
10341 {
10342 	union trace_eval_map_item *map;
10343 	union trace_eval_map_item **last = &trace_eval_maps;
10344 
10345 	if (!mod->num_trace_evals)
10346 		return;
10347 
10348 	guard(mutex)(&trace_eval_mutex);
10349 
10350 	map = trace_eval_maps;
10351 
10352 	while (map) {
10353 		if (map->head.mod == mod)
10354 			break;
10355 		map = trace_eval_jmp_to_tail(map);
10356 		last = &map->tail.next;
10357 		map = map->tail.next;
10358 	}
10359 	if (!map)
10360 		return;
10361 
10362 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10363 	kfree(map);
10364 }
10365 #else
trace_module_remove_evals(struct module * mod)10366 static inline void trace_module_remove_evals(struct module *mod) { }
10367 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10368 
trace_module_record(struct module * mod,bool add)10369 static void trace_module_record(struct module *mod, bool add)
10370 {
10371 	struct trace_array *tr;
10372 	unsigned long flags;
10373 
10374 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10375 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10376 		/* Update any persistent trace array that has already been started */
10377 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10378 			guard(mutex)(&scratch_mutex);
10379 			save_mod(mod, tr);
10380 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10381 			/* Update delta if the module loaded in previous boot */
10382 			make_mod_delta(mod, tr);
10383 		}
10384 	}
10385 }
10386 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10387 static int trace_module_notify(struct notifier_block *self,
10388 			       unsigned long val, void *data)
10389 {
10390 	struct module *mod = data;
10391 
10392 	switch (val) {
10393 	case MODULE_STATE_COMING:
10394 		trace_module_add_evals(mod);
10395 		trace_module_record(mod, true);
10396 		break;
10397 	case MODULE_STATE_GOING:
10398 		trace_module_remove_evals(mod);
10399 		trace_module_record(mod, false);
10400 		break;
10401 	}
10402 
10403 	return NOTIFY_OK;
10404 }
10405 
10406 static struct notifier_block trace_module_nb = {
10407 	.notifier_call = trace_module_notify,
10408 	.priority = 0,
10409 };
10410 #endif /* CONFIG_MODULES */
10411 
tracer_init_tracefs_work_func(struct work_struct * work)10412 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10413 {
10414 
10415 	event_trace_init();
10416 
10417 	init_tracer_tracefs(&global_trace, NULL);
10418 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10419 
10420 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10421 			&global_trace, &tracing_thresh_fops);
10422 
10423 	trace_create_file("README", TRACE_MODE_READ, NULL,
10424 			NULL, &tracing_readme_fops);
10425 
10426 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10427 			NULL, &tracing_saved_cmdlines_fops);
10428 
10429 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10430 			  NULL, &tracing_saved_cmdlines_size_fops);
10431 
10432 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10433 			NULL, &tracing_saved_tgids_fops);
10434 
10435 	trace_create_eval_file(NULL);
10436 
10437 #ifdef CONFIG_MODULES
10438 	register_module_notifier(&trace_module_nb);
10439 #endif
10440 
10441 #ifdef CONFIG_DYNAMIC_FTRACE
10442 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10443 			NULL, &tracing_dyn_info_fops);
10444 #endif
10445 
10446 	create_trace_instances(NULL);
10447 
10448 	update_tracer_options(&global_trace);
10449 }
10450 
tracer_init_tracefs(void)10451 static __init int tracer_init_tracefs(void)
10452 {
10453 	int ret;
10454 
10455 	trace_access_lock_init();
10456 
10457 	ret = tracing_init_dentry();
10458 	if (ret)
10459 		return 0;
10460 
10461 	if (eval_map_wq) {
10462 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10463 		queue_work(eval_map_wq, &tracerfs_init_work);
10464 	} else {
10465 		tracer_init_tracefs_work_func(NULL);
10466 	}
10467 
10468 	rv_init_interface();
10469 
10470 	return 0;
10471 }
10472 
10473 fs_initcall(tracer_init_tracefs);
10474 
10475 static int trace_die_panic_handler(struct notifier_block *self,
10476 				unsigned long ev, void *unused);
10477 
10478 static struct notifier_block trace_panic_notifier = {
10479 	.notifier_call = trace_die_panic_handler,
10480 	.priority = INT_MAX - 1,
10481 };
10482 
10483 static struct notifier_block trace_die_notifier = {
10484 	.notifier_call = trace_die_panic_handler,
10485 	.priority = INT_MAX - 1,
10486 };
10487 
10488 /*
10489  * The idea is to execute the following die/panic callback early, in order
10490  * to avoid showing irrelevant information in the trace (like other panic
10491  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10492  * warnings get disabled (to prevent potential log flooding).
10493  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10494 static int trace_die_panic_handler(struct notifier_block *self,
10495 				unsigned long ev, void *unused)
10496 {
10497 	if (!ftrace_dump_on_oops_enabled())
10498 		return NOTIFY_DONE;
10499 
10500 	/* The die notifier requires DIE_OOPS to trigger */
10501 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10502 		return NOTIFY_DONE;
10503 
10504 	ftrace_dump(DUMP_PARAM);
10505 
10506 	return NOTIFY_DONE;
10507 }
10508 
10509 /*
10510  * printk is set to max of 1024, we really don't need it that big.
10511  * Nothing should be printing 1000 characters anyway.
10512  */
10513 #define TRACE_MAX_PRINT		1000
10514 
10515 /*
10516  * Define here KERN_TRACE so that we have one place to modify
10517  * it if we decide to change what log level the ftrace dump
10518  * should be at.
10519  */
10520 #define KERN_TRACE		KERN_EMERG
10521 
10522 void
trace_printk_seq(struct trace_seq * s)10523 trace_printk_seq(struct trace_seq *s)
10524 {
10525 	/* Probably should print a warning here. */
10526 	if (s->seq.len >= TRACE_MAX_PRINT)
10527 		s->seq.len = TRACE_MAX_PRINT;
10528 
10529 	/*
10530 	 * More paranoid code. Although the buffer size is set to
10531 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10532 	 * an extra layer of protection.
10533 	 */
10534 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10535 		s->seq.len = s->seq.size - 1;
10536 
10537 	/* should be zero ended, but we are paranoid. */
10538 	s->buffer[s->seq.len] = 0;
10539 
10540 	printk(KERN_TRACE "%s", s->buffer);
10541 
10542 	trace_seq_init(s);
10543 }
10544 
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10545 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10546 {
10547 	iter->tr = tr;
10548 	iter->trace = iter->tr->current_trace;
10549 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10550 	iter->array_buffer = &tr->array_buffer;
10551 
10552 	if (iter->trace && iter->trace->open)
10553 		iter->trace->open(iter);
10554 
10555 	/* Annotate start of buffers if we had overruns */
10556 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10557 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10558 
10559 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10560 	if (trace_clocks[iter->tr->clock_id].in_ns)
10561 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10562 
10563 	/* Can not use kmalloc for iter.temp and iter.fmt */
10564 	iter->temp = static_temp_buf;
10565 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10566 	iter->fmt = static_fmt_buf;
10567 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10568 }
10569 
trace_init_global_iter(struct trace_iterator * iter)10570 void trace_init_global_iter(struct trace_iterator *iter)
10571 {
10572 	trace_init_iter(iter, &global_trace);
10573 }
10574 
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10575 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10576 {
10577 	/* use static because iter can be a bit big for the stack */
10578 	static struct trace_iterator iter;
10579 	unsigned int old_userobj;
10580 	unsigned long flags;
10581 	int cnt = 0;
10582 
10583 	/*
10584 	 * Always turn off tracing when we dump.
10585 	 * We don't need to show trace output of what happens
10586 	 * between multiple crashes.
10587 	 *
10588 	 * If the user does a sysrq-z, then they can re-enable
10589 	 * tracing with echo 1 > tracing_on.
10590 	 */
10591 	tracer_tracing_off(tr);
10592 
10593 	local_irq_save(flags);
10594 
10595 	/* Simulate the iterator */
10596 	trace_init_iter(&iter, tr);
10597 
10598 	/* While dumping, do not allow the buffer to be enable */
10599 	tracer_tracing_disable(tr);
10600 
10601 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10602 
10603 	/* don't look at user memory in panic mode */
10604 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10605 
10606 	if (dump_mode == DUMP_ORIG)
10607 		iter.cpu_file = raw_smp_processor_id();
10608 	else
10609 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10610 
10611 	if (tr == &global_trace)
10612 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10613 	else
10614 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10615 
10616 	/* Did function tracer already get disabled? */
10617 	if (ftrace_is_dead()) {
10618 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10619 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10620 	}
10621 
10622 	/*
10623 	 * We need to stop all tracing on all CPUS to read
10624 	 * the next buffer. This is a bit expensive, but is
10625 	 * not done often. We fill all what we can read,
10626 	 * and then release the locks again.
10627 	 */
10628 
10629 	while (!trace_empty(&iter)) {
10630 
10631 		if (!cnt)
10632 			printk(KERN_TRACE "---------------------------------\n");
10633 
10634 		cnt++;
10635 
10636 		trace_iterator_reset(&iter);
10637 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10638 
10639 		if (trace_find_next_entry_inc(&iter) != NULL) {
10640 			int ret;
10641 
10642 			ret = print_trace_line(&iter);
10643 			if (ret != TRACE_TYPE_NO_CONSUME)
10644 				trace_consume(&iter);
10645 
10646 			trace_printk_seq(&iter.seq);
10647 		}
10648 		touch_nmi_watchdog();
10649 	}
10650 
10651 	if (!cnt)
10652 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10653 	else
10654 		printk(KERN_TRACE "---------------------------------\n");
10655 
10656 	tr->trace_flags |= old_userobj;
10657 
10658 	tracer_tracing_enable(tr);
10659 	local_irq_restore(flags);
10660 }
10661 
ftrace_dump_by_param(void)10662 static void ftrace_dump_by_param(void)
10663 {
10664 	bool first_param = true;
10665 	char dump_param[MAX_TRACER_SIZE];
10666 	char *buf, *token, *inst_name;
10667 	struct trace_array *tr;
10668 
10669 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10670 	buf = dump_param;
10671 
10672 	while ((token = strsep(&buf, ",")) != NULL) {
10673 		if (first_param) {
10674 			first_param = false;
10675 			if (!strcmp("0", token))
10676 				continue;
10677 			else if (!strcmp("1", token)) {
10678 				ftrace_dump_one(&global_trace, DUMP_ALL);
10679 				continue;
10680 			}
10681 			else if (!strcmp("2", token) ||
10682 			  !strcmp("orig_cpu", token)) {
10683 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10684 				continue;
10685 			}
10686 		}
10687 
10688 		inst_name = strsep(&token, "=");
10689 		tr = trace_array_find(inst_name);
10690 		if (!tr) {
10691 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10692 			continue;
10693 		}
10694 
10695 		if (token && (!strcmp("2", token) ||
10696 			  !strcmp("orig_cpu", token)))
10697 			ftrace_dump_one(tr, DUMP_ORIG);
10698 		else
10699 			ftrace_dump_one(tr, DUMP_ALL);
10700 	}
10701 }
10702 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10703 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10704 {
10705 	static atomic_t dump_running;
10706 
10707 	/* Only allow one dump user at a time. */
10708 	if (atomic_inc_return(&dump_running) != 1) {
10709 		atomic_dec(&dump_running);
10710 		return;
10711 	}
10712 
10713 	switch (oops_dump_mode) {
10714 	case DUMP_ALL:
10715 		ftrace_dump_one(&global_trace, DUMP_ALL);
10716 		break;
10717 	case DUMP_ORIG:
10718 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10719 		break;
10720 	case DUMP_PARAM:
10721 		ftrace_dump_by_param();
10722 		break;
10723 	case DUMP_NONE:
10724 		break;
10725 	default:
10726 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10727 		ftrace_dump_one(&global_trace, DUMP_ALL);
10728 	}
10729 
10730 	atomic_dec(&dump_running);
10731 }
10732 EXPORT_SYMBOL_GPL(ftrace_dump);
10733 
10734 #define WRITE_BUFSIZE  4096
10735 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10736 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10737 				size_t count, loff_t *ppos,
10738 				int (*createfn)(const char *))
10739 {
10740 	char *kbuf __free(kfree) = NULL;
10741 	char *buf, *tmp;
10742 	int ret = 0;
10743 	size_t done = 0;
10744 	size_t size;
10745 
10746 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10747 	if (!kbuf)
10748 		return -ENOMEM;
10749 
10750 	while (done < count) {
10751 		size = count - done;
10752 
10753 		if (size >= WRITE_BUFSIZE)
10754 			size = WRITE_BUFSIZE - 1;
10755 
10756 		if (copy_from_user(kbuf, buffer + done, size))
10757 			return -EFAULT;
10758 
10759 		kbuf[size] = '\0';
10760 		buf = kbuf;
10761 		do {
10762 			tmp = strchr(buf, '\n');
10763 			if (tmp) {
10764 				*tmp = '\0';
10765 				size = tmp - buf + 1;
10766 			} else {
10767 				size = strlen(buf);
10768 				if (done + size < count) {
10769 					if (buf != kbuf)
10770 						break;
10771 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10772 					pr_warn("Line length is too long: Should be less than %d\n",
10773 						WRITE_BUFSIZE - 2);
10774 					return -EINVAL;
10775 				}
10776 			}
10777 			done += size;
10778 
10779 			/* Remove comments */
10780 			tmp = strchr(buf, '#');
10781 
10782 			if (tmp)
10783 				*tmp = '\0';
10784 
10785 			ret = createfn(buf);
10786 			if (ret)
10787 				return ret;
10788 			buf += size;
10789 
10790 		} while (done < count);
10791 	}
10792 	return done;
10793 }
10794 
10795 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10796 __init static bool tr_needs_alloc_snapshot(const char *name)
10797 {
10798 	char *test;
10799 	int len = strlen(name);
10800 	bool ret;
10801 
10802 	if (!boot_snapshot_index)
10803 		return false;
10804 
10805 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10806 	    boot_snapshot_info[len] == '\t')
10807 		return true;
10808 
10809 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10810 	if (!test)
10811 		return false;
10812 
10813 	sprintf(test, "\t%s\t", name);
10814 	ret = strstr(boot_snapshot_info, test) == NULL;
10815 	kfree(test);
10816 	return ret;
10817 }
10818 
do_allocate_snapshot(const char * name)10819 __init static void do_allocate_snapshot(const char *name)
10820 {
10821 	if (!tr_needs_alloc_snapshot(name))
10822 		return;
10823 
10824 	/*
10825 	 * When allocate_snapshot is set, the next call to
10826 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10827 	 * will allocate the snapshot buffer. That will alse clear
10828 	 * this flag.
10829 	 */
10830 	allocate_snapshot = true;
10831 }
10832 #else
do_allocate_snapshot(const char * name)10833 static inline void do_allocate_snapshot(const char *name) { }
10834 #endif
10835 
enable_instances(void)10836 __init static void enable_instances(void)
10837 {
10838 	struct trace_array *tr;
10839 	bool memmap_area = false;
10840 	char *curr_str;
10841 	char *name;
10842 	char *str;
10843 	char *tok;
10844 
10845 	/* A tab is always appended */
10846 	boot_instance_info[boot_instance_index - 1] = '\0';
10847 	str = boot_instance_info;
10848 
10849 	while ((curr_str = strsep(&str, "\t"))) {
10850 		phys_addr_t start = 0;
10851 		phys_addr_t size = 0;
10852 		unsigned long addr = 0;
10853 		bool traceprintk = false;
10854 		bool traceoff = false;
10855 		char *flag_delim;
10856 		char *addr_delim;
10857 		char *rname __free(kfree) = NULL;
10858 
10859 		tok = strsep(&curr_str, ",");
10860 
10861 		flag_delim = strchr(tok, '^');
10862 		addr_delim = strchr(tok, '@');
10863 
10864 		if (addr_delim)
10865 			*addr_delim++ = '\0';
10866 
10867 		if (flag_delim)
10868 			*flag_delim++ = '\0';
10869 
10870 		name = tok;
10871 
10872 		if (flag_delim) {
10873 			char *flag;
10874 
10875 			while ((flag = strsep(&flag_delim, "^"))) {
10876 				if (strcmp(flag, "traceoff") == 0) {
10877 					traceoff = true;
10878 				} else if ((strcmp(flag, "printk") == 0) ||
10879 					   (strcmp(flag, "traceprintk") == 0) ||
10880 					   (strcmp(flag, "trace_printk") == 0)) {
10881 					traceprintk = true;
10882 				} else {
10883 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10884 						flag, name);
10885 				}
10886 			}
10887 		}
10888 
10889 		tok = addr_delim;
10890 		if (tok && isdigit(*tok)) {
10891 			start = memparse(tok, &tok);
10892 			if (!start) {
10893 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10894 					name);
10895 				continue;
10896 			}
10897 			if (*tok != ':') {
10898 				pr_warn("Tracing: No size specified for instance %s\n", name);
10899 				continue;
10900 			}
10901 			tok++;
10902 			size = memparse(tok, &tok);
10903 			if (!size) {
10904 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10905 					name);
10906 				continue;
10907 			}
10908 			memmap_area = true;
10909 		} else if (tok) {
10910 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10911 				start = 0;
10912 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10913 				continue;
10914 			}
10915 			rname = kstrdup(tok, GFP_KERNEL);
10916 		}
10917 
10918 		if (start) {
10919 			/* Start and size must be page aligned */
10920 			if (start & ~PAGE_MASK) {
10921 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10922 				continue;
10923 			}
10924 			if (size & ~PAGE_MASK) {
10925 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10926 				continue;
10927 			}
10928 
10929 			if (memmap_area)
10930 				addr = map_pages(start, size);
10931 			else
10932 				addr = (unsigned long)phys_to_virt(start);
10933 			if (addr) {
10934 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10935 					name, &start, (unsigned long)size);
10936 			} else {
10937 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10938 				continue;
10939 			}
10940 		} else {
10941 			/* Only non mapped buffers have snapshot buffers */
10942 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10943 				do_allocate_snapshot(name);
10944 		}
10945 
10946 		tr = trace_array_create_systems(name, NULL, addr, size);
10947 		if (IS_ERR(tr)) {
10948 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10949 			continue;
10950 		}
10951 
10952 		if (traceoff)
10953 			tracer_tracing_off(tr);
10954 
10955 		if (traceprintk)
10956 			update_printk_trace(tr);
10957 
10958 		/*
10959 		 * memmap'd buffers can not be freed.
10960 		 */
10961 		if (memmap_area) {
10962 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
10963 			tr->ref++;
10964 		}
10965 
10966 		if (start) {
10967 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10968 			tr->range_name = no_free_ptr(rname);
10969 		}
10970 
10971 		while ((tok = strsep(&curr_str, ","))) {
10972 			early_enable_events(tr, tok, true);
10973 		}
10974 	}
10975 }
10976 
tracer_alloc_buffers(void)10977 __init static int tracer_alloc_buffers(void)
10978 {
10979 	int ring_buf_size;
10980 	int ret = -ENOMEM;
10981 
10982 
10983 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10984 		pr_warn("Tracing disabled due to lockdown\n");
10985 		return -EPERM;
10986 	}
10987 
10988 	/*
10989 	 * Make sure we don't accidentally add more trace options
10990 	 * than we have bits for.
10991 	 */
10992 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10993 
10994 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10995 		return -ENOMEM;
10996 
10997 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10998 		goto out_free_buffer_mask;
10999 
11000 	/* Only allocate trace_printk buffers if a trace_printk exists */
11001 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11002 		/* Must be called before global_trace.buffer is allocated */
11003 		trace_printk_init_buffers();
11004 
11005 	/* To save memory, keep the ring buffer size to its minimum */
11006 	if (global_trace.ring_buffer_expanded)
11007 		ring_buf_size = trace_buf_size;
11008 	else
11009 		ring_buf_size = 1;
11010 
11011 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11012 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11013 
11014 	raw_spin_lock_init(&global_trace.start_lock);
11015 
11016 	/*
11017 	 * The prepare callbacks allocates some memory for the ring buffer. We
11018 	 * don't free the buffer if the CPU goes down. If we were to free
11019 	 * the buffer, then the user would lose any trace that was in the
11020 	 * buffer. The memory will be removed once the "instance" is removed.
11021 	 */
11022 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11023 				      "trace/RB:prepare", trace_rb_cpu_prepare,
11024 				      NULL);
11025 	if (ret < 0)
11026 		goto out_free_cpumask;
11027 	/* Used for event triggers */
11028 	ret = -ENOMEM;
11029 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11030 	if (!temp_buffer)
11031 		goto out_rm_hp_state;
11032 
11033 	if (trace_create_savedcmd() < 0)
11034 		goto out_free_temp_buffer;
11035 
11036 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11037 		goto out_free_savedcmd;
11038 
11039 	/* TODO: make the number of buffers hot pluggable with CPUS */
11040 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11041 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11042 		goto out_free_pipe_cpumask;
11043 	}
11044 	if (global_trace.buffer_disabled)
11045 		tracing_off();
11046 
11047 	if (trace_boot_clock) {
11048 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
11049 		if (ret < 0)
11050 			pr_warn("Trace clock %s not defined, going back to default\n",
11051 				trace_boot_clock);
11052 	}
11053 
11054 	/*
11055 	 * register_tracer() might reference current_trace, so it
11056 	 * needs to be set before we register anything. This is
11057 	 * just a bootstrap of current_trace anyway.
11058 	 */
11059 	global_trace.current_trace = &nop_trace;
11060 
11061 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11062 #ifdef CONFIG_TRACER_MAX_TRACE
11063 	spin_lock_init(&global_trace.snapshot_trigger_lock);
11064 #endif
11065 	ftrace_init_global_array_ops(&global_trace);
11066 
11067 #ifdef CONFIG_MODULES
11068 	INIT_LIST_HEAD(&global_trace.mod_events);
11069 #endif
11070 
11071 	init_trace_flags_index(&global_trace);
11072 
11073 	register_tracer(&nop_trace);
11074 
11075 	/* Function tracing may start here (via kernel command line) */
11076 	init_function_trace();
11077 
11078 	/* All seems OK, enable tracing */
11079 	tracing_disabled = 0;
11080 
11081 	atomic_notifier_chain_register(&panic_notifier_list,
11082 				       &trace_panic_notifier);
11083 
11084 	register_die_notifier(&trace_die_notifier);
11085 
11086 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11087 
11088 	INIT_LIST_HEAD(&global_trace.systems);
11089 	INIT_LIST_HEAD(&global_trace.events);
11090 	INIT_LIST_HEAD(&global_trace.hist_vars);
11091 	INIT_LIST_HEAD(&global_trace.err_log);
11092 	list_add(&global_trace.marker_list, &marker_copies);
11093 	list_add(&global_trace.list, &ftrace_trace_arrays);
11094 
11095 	apply_trace_boot_options();
11096 
11097 	register_snapshot_cmd();
11098 
11099 	return 0;
11100 
11101 out_free_pipe_cpumask:
11102 	free_cpumask_var(global_trace.pipe_cpumask);
11103 out_free_savedcmd:
11104 	trace_free_saved_cmdlines_buffer();
11105 out_free_temp_buffer:
11106 	ring_buffer_free(temp_buffer);
11107 out_rm_hp_state:
11108 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11109 out_free_cpumask:
11110 	free_cpumask_var(global_trace.tracing_cpumask);
11111 out_free_buffer_mask:
11112 	free_cpumask_var(tracing_buffer_mask);
11113 	return ret;
11114 }
11115 
11116 #ifdef CONFIG_FUNCTION_TRACER
11117 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11118 __init struct trace_array *trace_get_global_array(void)
11119 {
11120 	return &global_trace;
11121 }
11122 #endif
11123 
ftrace_boot_snapshot(void)11124 void __init ftrace_boot_snapshot(void)
11125 {
11126 #ifdef CONFIG_TRACER_MAX_TRACE
11127 	struct trace_array *tr;
11128 
11129 	if (!snapshot_at_boot)
11130 		return;
11131 
11132 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11133 		if (!tr->allocated_snapshot)
11134 			continue;
11135 
11136 		tracing_snapshot_instance(tr);
11137 		trace_array_puts(tr, "** Boot snapshot taken **\n");
11138 	}
11139 #endif
11140 }
11141 
early_trace_init(void)11142 void __init early_trace_init(void)
11143 {
11144 	if (tracepoint_printk) {
11145 		tracepoint_print_iter =
11146 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11147 		if (MEM_FAIL(!tracepoint_print_iter,
11148 			     "Failed to allocate trace iterator\n"))
11149 			tracepoint_printk = 0;
11150 		else
11151 			static_key_enable(&tracepoint_printk_key.key);
11152 	}
11153 	tracer_alloc_buffers();
11154 
11155 	init_events();
11156 }
11157 
trace_init(void)11158 void __init trace_init(void)
11159 {
11160 	trace_event_init();
11161 
11162 	if (boot_instance_index)
11163 		enable_instances();
11164 }
11165 
clear_boot_tracer(void)11166 __init static void clear_boot_tracer(void)
11167 {
11168 	/*
11169 	 * The default tracer at boot buffer is an init section.
11170 	 * This function is called in lateinit. If we did not
11171 	 * find the boot tracer, then clear it out, to prevent
11172 	 * later registration from accessing the buffer that is
11173 	 * about to be freed.
11174 	 */
11175 	if (!default_bootup_tracer)
11176 		return;
11177 
11178 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11179 	       default_bootup_tracer);
11180 	default_bootup_tracer = NULL;
11181 }
11182 
11183 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11184 __init static void tracing_set_default_clock(void)
11185 {
11186 	/* sched_clock_stable() is determined in late_initcall */
11187 	if (!trace_boot_clock && !sched_clock_stable()) {
11188 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11189 			pr_warn("Can not set tracing clock due to lockdown\n");
11190 			return;
11191 		}
11192 
11193 		printk(KERN_WARNING
11194 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11195 		       "If you want to keep using the local clock, then add:\n"
11196 		       "  \"trace_clock=local\"\n"
11197 		       "on the kernel command line\n");
11198 		tracing_set_clock(&global_trace, "global");
11199 	}
11200 }
11201 #else
tracing_set_default_clock(void)11202 static inline void tracing_set_default_clock(void) { }
11203 #endif
11204 
late_trace_init(void)11205 __init static int late_trace_init(void)
11206 {
11207 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11208 		static_key_disable(&tracepoint_printk_key.key);
11209 		tracepoint_printk = 0;
11210 	}
11211 
11212 	if (traceoff_after_boot)
11213 		tracing_off();
11214 
11215 	tracing_set_default_clock();
11216 	clear_boot_tracer();
11217 	return 0;
11218 }
11219 
11220 late_initcall_sync(late_trace_init);
11221