xref: /linux/kernel/trace/trace.c (revision d6f38c12396397e48092ad9e8a4d7be4de51b942)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 #include <linux/fs_context.h>
55 
56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57 
58 #include "trace.h"
59 #include "trace_output.h"
60 
61 #ifdef CONFIG_FTRACE_STARTUP_TEST
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
disable_tracing_selftest(const char * reason)77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #else
85 #define tracing_selftest_running	0
86 #define tracing_selftest_disabled	0
87 #endif
88 
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95 
96 /* For tracers that don't implement custom flags */
97 static struct tracer_opt dummy_tracer_opt[] = {
98 	{ }
99 };
100 
101 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)102 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 {
104 	return 0;
105 }
106 
107 /*
108  * To prevent the comm cache from being overwritten when no
109  * tracing is active, only save the comm when a trace event
110  * occurred.
111  */
112 DEFINE_PER_CPU(bool, trace_taskinfo_save);
113 
114 /*
115  * Kill all tracing for good (never come back).
116  * It is initialized to 1 but will turn to zero if the initialization
117  * of the tracer is successful. But that is the only place that sets
118  * this back to zero.
119  */
120 static int tracing_disabled = 1;
121 
122 cpumask_var_t __read_mostly	tracing_buffer_mask;
123 
124 #define MAX_TRACER_SIZE		100
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  * Set instance name if you want to dump the specific trace instance
140  * Multiple instance dump is also supported, and instances are seperated
141  * by commas.
142  */
143 /* Set to string format zero to disable by default */
144 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
145 
146 /* When set, tracing will stop when a WARN*() is hit */
147 static int __disable_trace_on_warning;
148 
149 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
150 			     void *buffer, size_t *lenp, loff_t *ppos);
151 static const struct ctl_table trace_sysctl_table[] = {
152 	{
153 		.procname	= "ftrace_dump_on_oops",
154 		.data		= &ftrace_dump_on_oops,
155 		.maxlen		= MAX_TRACER_SIZE,
156 		.mode		= 0644,
157 		.proc_handler	= proc_dostring,
158 	},
159 	{
160 		.procname	= "traceoff_on_warning",
161 		.data		= &__disable_trace_on_warning,
162 		.maxlen		= sizeof(__disable_trace_on_warning),
163 		.mode		= 0644,
164 		.proc_handler	= proc_dointvec,
165 	},
166 	{
167 		.procname	= "tracepoint_printk",
168 		.data		= &tracepoint_printk,
169 		.maxlen		= sizeof(tracepoint_printk),
170 		.mode		= 0644,
171 		.proc_handler	= tracepoint_printk_sysctl,
172 	},
173 };
174 
init_trace_sysctls(void)175 static int __init init_trace_sysctls(void)
176 {
177 	register_sysctl_init("kernel", trace_sysctl_table);
178 	return 0;
179 }
180 subsys_initcall(init_trace_sysctls);
181 
182 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
183 /* Map of enums to their values, for "eval_map" file */
184 struct trace_eval_map_head {
185 	struct module			*mod;
186 	unsigned long			length;
187 };
188 
189 union trace_eval_map_item;
190 
191 struct trace_eval_map_tail {
192 	/*
193 	 * "end" is first and points to NULL as it must be different
194 	 * than "mod" or "eval_string"
195 	 */
196 	union trace_eval_map_item	*next;
197 	const char			*end;	/* points to NULL */
198 };
199 
200 static DEFINE_MUTEX(trace_eval_mutex);
201 
202 /*
203  * The trace_eval_maps are saved in an array with two extra elements,
204  * one at the beginning, and one at the end. The beginning item contains
205  * the count of the saved maps (head.length), and the module they
206  * belong to if not built in (head.mod). The ending item contains a
207  * pointer to the next array of saved eval_map items.
208  */
209 union trace_eval_map_item {
210 	struct trace_eval_map		map;
211 	struct trace_eval_map_head	head;
212 	struct trace_eval_map_tail	tail;
213 };
214 
215 static union trace_eval_map_item *trace_eval_maps;
216 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
217 
218 int tracing_set_tracer(struct trace_array *tr, const char *buf);
219 static void ftrace_trace_userstack(struct trace_array *tr,
220 				   struct trace_buffer *buffer,
221 				   unsigned int trace_ctx);
222 
223 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
224 static char *default_bootup_tracer;
225 
226 static bool allocate_snapshot;
227 static bool snapshot_at_boot;
228 
229 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_instance_index;
231 
232 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
233 static int boot_snapshot_index;
234 
set_cmdline_ftrace(char * str)235 static int __init set_cmdline_ftrace(char *str)
236 {
237 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
238 	default_bootup_tracer = bootup_tracer_buf;
239 	/* We are using ftrace early, expand it */
240 	trace_set_ring_buffer_expanded(NULL);
241 	return 1;
242 }
243 __setup("ftrace=", set_cmdline_ftrace);
244 
ftrace_dump_on_oops_enabled(void)245 int ftrace_dump_on_oops_enabled(void)
246 {
247 	if (!strcmp("0", ftrace_dump_on_oops))
248 		return 0;
249 	else
250 		return 1;
251 }
252 
set_ftrace_dump_on_oops(char * str)253 static int __init set_ftrace_dump_on_oops(char *str)
254 {
255 	if (!*str) {
256 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
257 		return 1;
258 	}
259 
260 	if (*str == ',') {
261 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
262 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
263 		return 1;
264 	}
265 
266 	if (*str++ == '=') {
267 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
268 		return 1;
269 	}
270 
271 	return 0;
272 }
273 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
274 
stop_trace_on_warning(char * str)275 static int __init stop_trace_on_warning(char *str)
276 {
277 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
278 		__disable_trace_on_warning = 1;
279 	return 1;
280 }
281 __setup("traceoff_on_warning", stop_trace_on_warning);
282 
boot_alloc_snapshot(char * str)283 static int __init boot_alloc_snapshot(char *str)
284 {
285 	char *slot = boot_snapshot_info + boot_snapshot_index;
286 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
287 	int ret;
288 
289 	if (str[0] == '=') {
290 		str++;
291 		if (strlen(str) >= left)
292 			return -1;
293 
294 		ret = snprintf(slot, left, "%s\t", str);
295 		boot_snapshot_index += ret;
296 	} else {
297 		allocate_snapshot = true;
298 		/* We also need the main ring buffer expanded */
299 		trace_set_ring_buffer_expanded(NULL);
300 	}
301 	return 1;
302 }
303 __setup("alloc_snapshot", boot_alloc_snapshot);
304 
305 
boot_snapshot(char * str)306 static int __init boot_snapshot(char *str)
307 {
308 	snapshot_at_boot = true;
309 	boot_alloc_snapshot(str);
310 	return 1;
311 }
312 __setup("ftrace_boot_snapshot", boot_snapshot);
313 
314 
boot_instance(char * str)315 static int __init boot_instance(char *str)
316 {
317 	char *slot = boot_instance_info + boot_instance_index;
318 	int left = sizeof(boot_instance_info) - boot_instance_index;
319 	int ret;
320 
321 	if (strlen(str) >= left)
322 		return -1;
323 
324 	ret = snprintf(slot, left, "%s\t", str);
325 	boot_instance_index += ret;
326 
327 	return 1;
328 }
329 __setup("trace_instance=", boot_instance);
330 
331 
332 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
333 
set_trace_boot_options(char * str)334 static int __init set_trace_boot_options(char *str)
335 {
336 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
337 	return 1;
338 }
339 __setup("trace_options=", set_trace_boot_options);
340 
341 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
342 static char *trace_boot_clock __initdata;
343 
set_trace_boot_clock(char * str)344 static int __init set_trace_boot_clock(char *str)
345 {
346 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
347 	trace_boot_clock = trace_boot_clock_buf;
348 	return 1;
349 }
350 __setup("trace_clock=", set_trace_boot_clock);
351 
set_tracepoint_printk(char * str)352 static int __init set_tracepoint_printk(char *str)
353 {
354 	/* Ignore the "tp_printk_stop_on_boot" param */
355 	if (*str == '_')
356 		return 0;
357 
358 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
359 		tracepoint_printk = 1;
360 	return 1;
361 }
362 __setup("tp_printk", set_tracepoint_printk);
363 
set_tracepoint_printk_stop(char * str)364 static int __init set_tracepoint_printk_stop(char *str)
365 {
366 	tracepoint_printk_stop_on_boot = true;
367 	return 1;
368 }
369 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
370 
set_traceoff_after_boot(char * str)371 static int __init set_traceoff_after_boot(char *str)
372 {
373 	traceoff_after_boot = true;
374 	return 1;
375 }
376 __setup("traceoff_after_boot", set_traceoff_after_boot);
377 
ns2usecs(u64 nsec)378 unsigned long long ns2usecs(u64 nsec)
379 {
380 	nsec += 500;
381 	do_div(nsec, 1000);
382 	return nsec;
383 }
384 
385 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)386 trace_process_export(struct trace_export *export,
387 	       struct ring_buffer_event *event, int flag)
388 {
389 	struct trace_entry *entry;
390 	unsigned int size = 0;
391 
392 	if (export->flags & flag) {
393 		entry = ring_buffer_event_data(event);
394 		size = ring_buffer_event_length(event);
395 		export->write(export, entry, size);
396 	}
397 }
398 
399 static DEFINE_MUTEX(ftrace_export_lock);
400 
401 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
402 
403 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
404 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
405 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
406 
ftrace_exports_enable(struct trace_export * export)407 static inline void ftrace_exports_enable(struct trace_export *export)
408 {
409 	if (export->flags & TRACE_EXPORT_FUNCTION)
410 		static_branch_inc(&trace_function_exports_enabled);
411 
412 	if (export->flags & TRACE_EXPORT_EVENT)
413 		static_branch_inc(&trace_event_exports_enabled);
414 
415 	if (export->flags & TRACE_EXPORT_MARKER)
416 		static_branch_inc(&trace_marker_exports_enabled);
417 }
418 
ftrace_exports_disable(struct trace_export * export)419 static inline void ftrace_exports_disable(struct trace_export *export)
420 {
421 	if (export->flags & TRACE_EXPORT_FUNCTION)
422 		static_branch_dec(&trace_function_exports_enabled);
423 
424 	if (export->flags & TRACE_EXPORT_EVENT)
425 		static_branch_dec(&trace_event_exports_enabled);
426 
427 	if (export->flags & TRACE_EXPORT_MARKER)
428 		static_branch_dec(&trace_marker_exports_enabled);
429 }
430 
ftrace_exports(struct ring_buffer_event * event,int flag)431 static void ftrace_exports(struct ring_buffer_event *event, int flag)
432 {
433 	struct trace_export *export;
434 
435 	preempt_disable_notrace();
436 
437 	export = rcu_dereference_raw_check(ftrace_exports_list);
438 	while (export) {
439 		trace_process_export(export, event, flag);
440 		export = rcu_dereference_raw_check(export->next);
441 	}
442 
443 	preempt_enable_notrace();
444 }
445 
446 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)447 add_trace_export(struct trace_export **list, struct trace_export *export)
448 {
449 	rcu_assign_pointer(export->next, *list);
450 	/*
451 	 * We are entering export into the list but another
452 	 * CPU might be walking that list. We need to make sure
453 	 * the export->next pointer is valid before another CPU sees
454 	 * the export pointer included into the list.
455 	 */
456 	rcu_assign_pointer(*list, export);
457 }
458 
459 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)460 rm_trace_export(struct trace_export **list, struct trace_export *export)
461 {
462 	struct trace_export **p;
463 
464 	for (p = list; *p != NULL; p = &(*p)->next)
465 		if (*p == export)
466 			break;
467 
468 	if (*p != export)
469 		return -1;
470 
471 	rcu_assign_pointer(*p, (*p)->next);
472 
473 	return 0;
474 }
475 
476 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)477 add_ftrace_export(struct trace_export **list, struct trace_export *export)
478 {
479 	ftrace_exports_enable(export);
480 
481 	add_trace_export(list, export);
482 }
483 
484 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)485 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
486 {
487 	int ret;
488 
489 	ret = rm_trace_export(list, export);
490 	ftrace_exports_disable(export);
491 
492 	return ret;
493 }
494 
register_ftrace_export(struct trace_export * export)495 int register_ftrace_export(struct trace_export *export)
496 {
497 	if (WARN_ON_ONCE(!export->write))
498 		return -1;
499 
500 	mutex_lock(&ftrace_export_lock);
501 
502 	add_ftrace_export(&ftrace_exports_list, export);
503 
504 	mutex_unlock(&ftrace_export_lock);
505 
506 	return 0;
507 }
508 EXPORT_SYMBOL_GPL(register_ftrace_export);
509 
unregister_ftrace_export(struct trace_export * export)510 int unregister_ftrace_export(struct trace_export *export)
511 {
512 	int ret;
513 
514 	mutex_lock(&ftrace_export_lock);
515 
516 	ret = rm_ftrace_export(&ftrace_exports_list, export);
517 
518 	mutex_unlock(&ftrace_export_lock);
519 
520 	return ret;
521 }
522 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
523 
524 /* trace_flags holds trace_options default values */
525 #define TRACE_DEFAULT_FLAGS						\
526 	(FUNCTION_DEFAULT_FLAGS |					\
527 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
528 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
529 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
530 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
531 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK |		\
532 	 TRACE_ITER_COPY_MARKER)
533 
534 /* trace_options that are only supported by global_trace */
535 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
536 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
537 
538 /* trace_flags that are default zero for instances */
539 #define ZEROED_TRACE_FLAGS \
540 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
541 	 TRACE_ITER_COPY_MARKER)
542 
543 /*
544  * The global_trace is the descriptor that holds the top-level tracing
545  * buffers for the live tracing.
546  */
547 static struct trace_array global_trace = {
548 	.trace_flags = TRACE_DEFAULT_FLAGS,
549 };
550 
551 static struct trace_array *printk_trace = &global_trace;
552 
553 /* List of trace_arrays interested in the top level trace_marker */
554 static LIST_HEAD(marker_copies);
555 
printk_binsafe(struct trace_array * tr)556 static __always_inline bool printk_binsafe(struct trace_array *tr)
557 {
558 	/*
559 	 * The binary format of traceprintk can cause a crash if used
560 	 * by a buffer from another boot. Force the use of the
561 	 * non binary version of trace_printk if the trace_printk
562 	 * buffer is a boot mapped ring buffer.
563 	 */
564 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
565 }
566 
update_printk_trace(struct trace_array * tr)567 static void update_printk_trace(struct trace_array *tr)
568 {
569 	if (printk_trace == tr)
570 		return;
571 
572 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
573 	printk_trace = tr;
574 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
575 }
576 
577 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)578 static bool update_marker_trace(struct trace_array *tr, int enabled)
579 {
580 	lockdep_assert_held(&event_mutex);
581 
582 	if (enabled) {
583 		if (!list_empty(&tr->marker_list))
584 			return false;
585 
586 		list_add_rcu(&tr->marker_list, &marker_copies);
587 		tr->trace_flags |= TRACE_ITER_COPY_MARKER;
588 		return true;
589 	}
590 
591 	if (list_empty(&tr->marker_list))
592 		return false;
593 
594 	list_del_init(&tr->marker_list);
595 	tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
596 	return true;
597 }
598 
trace_set_ring_buffer_expanded(struct trace_array * tr)599 void trace_set_ring_buffer_expanded(struct trace_array *tr)
600 {
601 	if (!tr)
602 		tr = &global_trace;
603 	tr->ring_buffer_expanded = true;
604 }
605 
606 LIST_HEAD(ftrace_trace_arrays);
607 
trace_array_get(struct trace_array * this_tr)608 int trace_array_get(struct trace_array *this_tr)
609 {
610 	struct trace_array *tr;
611 
612 	guard(mutex)(&trace_types_lock);
613 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
614 		if (tr == this_tr) {
615 			tr->ref++;
616 			return 0;
617 		}
618 	}
619 
620 	return -ENODEV;
621 }
622 
__trace_array_put(struct trace_array * this_tr)623 static void __trace_array_put(struct trace_array *this_tr)
624 {
625 	WARN_ON(!this_tr->ref);
626 	this_tr->ref--;
627 }
628 
629 /**
630  * trace_array_put - Decrement the reference counter for this trace array.
631  * @this_tr : pointer to the trace array
632  *
633  * NOTE: Use this when we no longer need the trace array returned by
634  * trace_array_get_by_name(). This ensures the trace array can be later
635  * destroyed.
636  *
637  */
trace_array_put(struct trace_array * this_tr)638 void trace_array_put(struct trace_array *this_tr)
639 {
640 	if (!this_tr)
641 		return;
642 
643 	mutex_lock(&trace_types_lock);
644 	__trace_array_put(this_tr);
645 	mutex_unlock(&trace_types_lock);
646 }
647 EXPORT_SYMBOL_GPL(trace_array_put);
648 
tracing_check_open_get_tr(struct trace_array * tr)649 int tracing_check_open_get_tr(struct trace_array *tr)
650 {
651 	int ret;
652 
653 	ret = security_locked_down(LOCKDOWN_TRACEFS);
654 	if (ret)
655 		return ret;
656 
657 	if (tracing_disabled)
658 		return -ENODEV;
659 
660 	if (tr && trace_array_get(tr) < 0)
661 		return -ENODEV;
662 
663 	return 0;
664 }
665 
666 /**
667  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
668  * @filtered_pids: The list of pids to check
669  * @search_pid: The PID to find in @filtered_pids
670  *
671  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
672  */
673 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)674 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
675 {
676 	return trace_pid_list_is_set(filtered_pids, search_pid);
677 }
678 
679 /**
680  * trace_ignore_this_task - should a task be ignored for tracing
681  * @filtered_pids: The list of pids to check
682  * @filtered_no_pids: The list of pids not to be traced
683  * @task: The task that should be ignored if not filtered
684  *
685  * Checks if @task should be traced or not from @filtered_pids.
686  * Returns true if @task should *NOT* be traced.
687  * Returns false if @task should be traced.
688  */
689 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)690 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
691 		       struct trace_pid_list *filtered_no_pids,
692 		       struct task_struct *task)
693 {
694 	/*
695 	 * If filtered_no_pids is not empty, and the task's pid is listed
696 	 * in filtered_no_pids, then return true.
697 	 * Otherwise, if filtered_pids is empty, that means we can
698 	 * trace all tasks. If it has content, then only trace pids
699 	 * within filtered_pids.
700 	 */
701 
702 	return (filtered_pids &&
703 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
704 		(filtered_no_pids &&
705 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
706 }
707 
708 /**
709  * trace_filter_add_remove_task - Add or remove a task from a pid_list
710  * @pid_list: The list to modify
711  * @self: The current task for fork or NULL for exit
712  * @task: The task to add or remove
713  *
714  * If adding a task, if @self is defined, the task is only added if @self
715  * is also included in @pid_list. This happens on fork and tasks should
716  * only be added when the parent is listed. If @self is NULL, then the
717  * @task pid will be removed from the list, which would happen on exit
718  * of a task.
719  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)720 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
721 				  struct task_struct *self,
722 				  struct task_struct *task)
723 {
724 	if (!pid_list)
725 		return;
726 
727 	/* For forks, we only add if the forking task is listed */
728 	if (self) {
729 		if (!trace_find_filtered_pid(pid_list, self->pid))
730 			return;
731 	}
732 
733 	/* "self" is set for forks, and NULL for exits */
734 	if (self)
735 		trace_pid_list_set(pid_list, task->pid);
736 	else
737 		trace_pid_list_clear(pid_list, task->pid);
738 }
739 
740 /**
741  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
742  * @pid_list: The pid list to show
743  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
744  * @pos: The position of the file
745  *
746  * This is used by the seq_file "next" operation to iterate the pids
747  * listed in a trace_pid_list structure.
748  *
749  * Returns the pid+1 as we want to display pid of zero, but NULL would
750  * stop the iteration.
751  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)752 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
753 {
754 	long pid = (unsigned long)v;
755 	unsigned int next;
756 
757 	(*pos)++;
758 
759 	/* pid already is +1 of the actual previous bit */
760 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
761 		return NULL;
762 
763 	pid = next;
764 
765 	/* Return pid + 1 to allow zero to be represented */
766 	return (void *)(pid + 1);
767 }
768 
769 /**
770  * trace_pid_start - Used for seq_file to start reading pid lists
771  * @pid_list: The pid list to show
772  * @pos: The position of the file
773  *
774  * This is used by seq_file "start" operation to start the iteration
775  * of listing pids.
776  *
777  * Returns the pid+1 as we want to display pid of zero, but NULL would
778  * stop the iteration.
779  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)780 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
781 {
782 	unsigned long pid;
783 	unsigned int first;
784 	loff_t l = 0;
785 
786 	if (trace_pid_list_first(pid_list, &first) < 0)
787 		return NULL;
788 
789 	pid = first;
790 
791 	/* Return pid + 1 so that zero can be the exit value */
792 	for (pid++; pid && l < *pos;
793 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
794 		;
795 	return (void *)pid;
796 }
797 
798 /**
799  * trace_pid_show - show the current pid in seq_file processing
800  * @m: The seq_file structure to write into
801  * @v: A void pointer of the pid (+1) value to display
802  *
803  * Can be directly used by seq_file operations to display the current
804  * pid value.
805  */
trace_pid_show(struct seq_file * m,void * v)806 int trace_pid_show(struct seq_file *m, void *v)
807 {
808 	unsigned long pid = (unsigned long)v - 1;
809 
810 	seq_printf(m, "%lu\n", pid);
811 	return 0;
812 }
813 
814 /* 128 should be much more than enough */
815 #define PID_BUF_SIZE		127
816 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)817 int trace_pid_write(struct trace_pid_list *filtered_pids,
818 		    struct trace_pid_list **new_pid_list,
819 		    const char __user *ubuf, size_t cnt)
820 {
821 	struct trace_pid_list *pid_list;
822 	struct trace_parser parser;
823 	unsigned long val;
824 	int nr_pids = 0;
825 	ssize_t read = 0;
826 	ssize_t ret;
827 	loff_t pos;
828 	pid_t pid;
829 
830 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
831 		return -ENOMEM;
832 
833 	/*
834 	 * Always recreate a new array. The write is an all or nothing
835 	 * operation. Always create a new array when adding new pids by
836 	 * the user. If the operation fails, then the current list is
837 	 * not modified.
838 	 */
839 	pid_list = trace_pid_list_alloc();
840 	if (!pid_list) {
841 		trace_parser_put(&parser);
842 		return -ENOMEM;
843 	}
844 
845 	if (filtered_pids) {
846 		/* copy the current bits to the new max */
847 		ret = trace_pid_list_first(filtered_pids, &pid);
848 		while (!ret) {
849 			trace_pid_list_set(pid_list, pid);
850 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
851 			nr_pids++;
852 		}
853 	}
854 
855 	ret = 0;
856 	while (cnt > 0) {
857 
858 		pos = 0;
859 
860 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
861 		if (ret < 0)
862 			break;
863 
864 		read += ret;
865 		ubuf += ret;
866 		cnt -= ret;
867 
868 		if (!trace_parser_loaded(&parser))
869 			break;
870 
871 		ret = -EINVAL;
872 		if (kstrtoul(parser.buffer, 0, &val))
873 			break;
874 
875 		pid = (pid_t)val;
876 
877 		if (trace_pid_list_set(pid_list, pid) < 0) {
878 			ret = -1;
879 			break;
880 		}
881 		nr_pids++;
882 
883 		trace_parser_clear(&parser);
884 		ret = 0;
885 	}
886 	trace_parser_put(&parser);
887 
888 	if (ret < 0) {
889 		trace_pid_list_free(pid_list);
890 		return ret;
891 	}
892 
893 	if (!nr_pids) {
894 		/* Cleared the list of pids */
895 		trace_pid_list_free(pid_list);
896 		pid_list = NULL;
897 	}
898 
899 	*new_pid_list = pid_list;
900 
901 	return read;
902 }
903 
buffer_ftrace_now(struct array_buffer * buf,int cpu)904 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
905 {
906 	u64 ts;
907 
908 	/* Early boot up does not have a buffer yet */
909 	if (!buf->buffer)
910 		return trace_clock_local();
911 
912 	ts = ring_buffer_time_stamp(buf->buffer);
913 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
914 
915 	return ts;
916 }
917 
ftrace_now(int cpu)918 u64 ftrace_now(int cpu)
919 {
920 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
921 }
922 
923 /**
924  * tracing_is_enabled - Show if global_trace has been enabled
925  *
926  * Shows if the global trace has been enabled or not. It uses the
927  * mirror flag "buffer_disabled" to be used in fast paths such as for
928  * the irqsoff tracer. But it may be inaccurate due to races. If you
929  * need to know the accurate state, use tracing_is_on() which is a little
930  * slower, but accurate.
931  */
tracing_is_enabled(void)932 int tracing_is_enabled(void)
933 {
934 	/*
935 	 * For quick access (irqsoff uses this in fast path), just
936 	 * return the mirror variable of the state of the ring buffer.
937 	 * It's a little racy, but we don't really care.
938 	 */
939 	return !global_trace.buffer_disabled;
940 }
941 
942 /*
943  * trace_buf_size is the size in bytes that is allocated
944  * for a buffer. Note, the number of bytes is always rounded
945  * to page size.
946  *
947  * This number is purposely set to a low number of 16384.
948  * If the dump on oops happens, it will be much appreciated
949  * to not have to wait for all that output. Anyway this can be
950  * boot time and run time configurable.
951  */
952 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
953 
954 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
955 
956 /* trace_types holds a link list of available tracers. */
957 static struct tracer		*trace_types __read_mostly;
958 
959 /*
960  * trace_types_lock is used to protect the trace_types list.
961  */
962 DEFINE_MUTEX(trace_types_lock);
963 
964 /*
965  * serialize the access of the ring buffer
966  *
967  * ring buffer serializes readers, but it is low level protection.
968  * The validity of the events (which returns by ring_buffer_peek() ..etc)
969  * are not protected by ring buffer.
970  *
971  * The content of events may become garbage if we allow other process consumes
972  * these events concurrently:
973  *   A) the page of the consumed events may become a normal page
974  *      (not reader page) in ring buffer, and this page will be rewritten
975  *      by events producer.
976  *   B) The page of the consumed events may become a page for splice_read,
977  *      and this page will be returned to system.
978  *
979  * These primitives allow multi process access to different cpu ring buffer
980  * concurrently.
981  *
982  * These primitives don't distinguish read-only and read-consume access.
983  * Multi read-only access are also serialized.
984  */
985 
986 #ifdef CONFIG_SMP
987 static DECLARE_RWSEM(all_cpu_access_lock);
988 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
989 
trace_access_lock(int cpu)990 static inline void trace_access_lock(int cpu)
991 {
992 	if (cpu == RING_BUFFER_ALL_CPUS) {
993 		/* gain it for accessing the whole ring buffer. */
994 		down_write(&all_cpu_access_lock);
995 	} else {
996 		/* gain it for accessing a cpu ring buffer. */
997 
998 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
999 		down_read(&all_cpu_access_lock);
1000 
1001 		/* Secondly block other access to this @cpu ring buffer. */
1002 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
1003 	}
1004 }
1005 
trace_access_unlock(int cpu)1006 static inline void trace_access_unlock(int cpu)
1007 {
1008 	if (cpu == RING_BUFFER_ALL_CPUS) {
1009 		up_write(&all_cpu_access_lock);
1010 	} else {
1011 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1012 		up_read(&all_cpu_access_lock);
1013 	}
1014 }
1015 
trace_access_lock_init(void)1016 static inline void trace_access_lock_init(void)
1017 {
1018 	int cpu;
1019 
1020 	for_each_possible_cpu(cpu)
1021 		mutex_init(&per_cpu(cpu_access_lock, cpu));
1022 }
1023 
1024 #else
1025 
1026 static DEFINE_MUTEX(access_lock);
1027 
trace_access_lock(int cpu)1028 static inline void trace_access_lock(int cpu)
1029 {
1030 	(void)cpu;
1031 	mutex_lock(&access_lock);
1032 }
1033 
trace_access_unlock(int cpu)1034 static inline void trace_access_unlock(int cpu)
1035 {
1036 	(void)cpu;
1037 	mutex_unlock(&access_lock);
1038 }
1039 
trace_access_lock_init(void)1040 static inline void trace_access_lock_init(void)
1041 {
1042 }
1043 
1044 #endif
1045 
1046 #ifdef CONFIG_STACKTRACE
1047 static void __ftrace_trace_stack(struct trace_array *tr,
1048 				 struct trace_buffer *buffer,
1049 				 unsigned int trace_ctx,
1050 				 int skip, struct pt_regs *regs);
1051 static inline void ftrace_trace_stack(struct trace_array *tr,
1052 				      struct trace_buffer *buffer,
1053 				      unsigned int trace_ctx,
1054 				      int skip, struct pt_regs *regs);
1055 
1056 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1057 static inline void __ftrace_trace_stack(struct trace_array *tr,
1058 					struct trace_buffer *buffer,
1059 					unsigned int trace_ctx,
1060 					int skip, struct pt_regs *regs)
1061 {
1062 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1063 static inline void ftrace_trace_stack(struct trace_array *tr,
1064 				      struct trace_buffer *buffer,
1065 				      unsigned long trace_ctx,
1066 				      int skip, struct pt_regs *regs)
1067 {
1068 }
1069 
1070 #endif
1071 
1072 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1073 trace_event_setup(struct ring_buffer_event *event,
1074 		  int type, unsigned int trace_ctx)
1075 {
1076 	struct trace_entry *ent = ring_buffer_event_data(event);
1077 
1078 	tracing_generic_entry_update(ent, type, trace_ctx);
1079 }
1080 
1081 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1082 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1083 			  int type,
1084 			  unsigned long len,
1085 			  unsigned int trace_ctx)
1086 {
1087 	struct ring_buffer_event *event;
1088 
1089 	event = ring_buffer_lock_reserve(buffer, len);
1090 	if (event != NULL)
1091 		trace_event_setup(event, type, trace_ctx);
1092 
1093 	return event;
1094 }
1095 
tracer_tracing_on(struct trace_array * tr)1096 void tracer_tracing_on(struct trace_array *tr)
1097 {
1098 	if (tr->array_buffer.buffer)
1099 		ring_buffer_record_on(tr->array_buffer.buffer);
1100 	/*
1101 	 * This flag is looked at when buffers haven't been allocated
1102 	 * yet, or by some tracers (like irqsoff), that just want to
1103 	 * know if the ring buffer has been disabled, but it can handle
1104 	 * races of where it gets disabled but we still do a record.
1105 	 * As the check is in the fast path of the tracers, it is more
1106 	 * important to be fast than accurate.
1107 	 */
1108 	tr->buffer_disabled = 0;
1109 }
1110 
1111 /**
1112  * tracing_on - enable tracing buffers
1113  *
1114  * This function enables tracing buffers that may have been
1115  * disabled with tracing_off.
1116  */
tracing_on(void)1117 void tracing_on(void)
1118 {
1119 	tracer_tracing_on(&global_trace);
1120 }
1121 EXPORT_SYMBOL_GPL(tracing_on);
1122 
1123 
1124 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1125 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1126 {
1127 	__this_cpu_write(trace_taskinfo_save, true);
1128 
1129 	/* If this is the temp buffer, we need to commit fully */
1130 	if (this_cpu_read(trace_buffered_event) == event) {
1131 		/* Length is in event->array[0] */
1132 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1133 		/* Release the temp buffer */
1134 		this_cpu_dec(trace_buffered_event_cnt);
1135 		/* ring_buffer_unlock_commit() enables preemption */
1136 		preempt_enable_notrace();
1137 	} else
1138 		ring_buffer_unlock_commit(buffer);
1139 }
1140 
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1141 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1142 		       const char *str, int size)
1143 {
1144 	struct ring_buffer_event *event;
1145 	struct trace_buffer *buffer;
1146 	struct print_entry *entry;
1147 	unsigned int trace_ctx;
1148 	int alloc;
1149 
1150 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1151 		return 0;
1152 
1153 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1154 		return 0;
1155 
1156 	if (unlikely(tracing_disabled))
1157 		return 0;
1158 
1159 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1160 
1161 	trace_ctx = tracing_gen_ctx();
1162 	buffer = tr->array_buffer.buffer;
1163 	ring_buffer_nest_start(buffer);
1164 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1165 					    trace_ctx);
1166 	if (!event) {
1167 		size = 0;
1168 		goto out;
1169 	}
1170 
1171 	entry = ring_buffer_event_data(event);
1172 	entry->ip = ip;
1173 
1174 	memcpy(&entry->buf, str, size);
1175 
1176 	/* Add a newline if necessary */
1177 	if (entry->buf[size - 1] != '\n') {
1178 		entry->buf[size] = '\n';
1179 		entry->buf[size + 1] = '\0';
1180 	} else
1181 		entry->buf[size] = '\0';
1182 
1183 	__buffer_unlock_commit(buffer, event);
1184 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1185  out:
1186 	ring_buffer_nest_end(buffer);
1187 	return size;
1188 }
1189 EXPORT_SYMBOL_GPL(__trace_array_puts);
1190 
1191 /**
1192  * __trace_puts - write a constant string into the trace buffer.
1193  * @ip:	   The address of the caller
1194  * @str:   The constant string to write
1195  * @size:  The size of the string.
1196  */
__trace_puts(unsigned long ip,const char * str,int size)1197 int __trace_puts(unsigned long ip, const char *str, int size)
1198 {
1199 	return __trace_array_puts(printk_trace, ip, str, size);
1200 }
1201 EXPORT_SYMBOL_GPL(__trace_puts);
1202 
1203 /**
1204  * __trace_bputs - write the pointer to a constant string into trace buffer
1205  * @ip:	   The address of the caller
1206  * @str:   The constant string to write to the buffer to
1207  */
__trace_bputs(unsigned long ip,const char * str)1208 int __trace_bputs(unsigned long ip, const char *str)
1209 {
1210 	struct trace_array *tr = READ_ONCE(printk_trace);
1211 	struct ring_buffer_event *event;
1212 	struct trace_buffer *buffer;
1213 	struct bputs_entry *entry;
1214 	unsigned int trace_ctx;
1215 	int size = sizeof(struct bputs_entry);
1216 	int ret = 0;
1217 
1218 	if (!printk_binsafe(tr))
1219 		return __trace_puts(ip, str, strlen(str));
1220 
1221 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1222 		return 0;
1223 
1224 	if (unlikely(tracing_selftest_running || tracing_disabled))
1225 		return 0;
1226 
1227 	trace_ctx = tracing_gen_ctx();
1228 	buffer = tr->array_buffer.buffer;
1229 
1230 	ring_buffer_nest_start(buffer);
1231 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1232 					    trace_ctx);
1233 	if (!event)
1234 		goto out;
1235 
1236 	entry = ring_buffer_event_data(event);
1237 	entry->ip			= ip;
1238 	entry->str			= str;
1239 
1240 	__buffer_unlock_commit(buffer, event);
1241 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1242 
1243 	ret = 1;
1244  out:
1245 	ring_buffer_nest_end(buffer);
1246 	return ret;
1247 }
1248 EXPORT_SYMBOL_GPL(__trace_bputs);
1249 
1250 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1251 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1252 					   void *cond_data)
1253 {
1254 	struct tracer *tracer = tr->current_trace;
1255 	unsigned long flags;
1256 
1257 	if (in_nmi()) {
1258 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1259 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1260 		return;
1261 	}
1262 
1263 	if (!tr->allocated_snapshot) {
1264 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1265 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1266 		tracer_tracing_off(tr);
1267 		return;
1268 	}
1269 
1270 	/* Note, snapshot can not be used when the tracer uses it */
1271 	if (tracer->use_max_tr) {
1272 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1273 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1274 		return;
1275 	}
1276 
1277 	if (tr->mapped) {
1278 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1279 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1280 		return;
1281 	}
1282 
1283 	local_irq_save(flags);
1284 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1285 	local_irq_restore(flags);
1286 }
1287 
tracing_snapshot_instance(struct trace_array * tr)1288 void tracing_snapshot_instance(struct trace_array *tr)
1289 {
1290 	tracing_snapshot_instance_cond(tr, NULL);
1291 }
1292 
1293 /**
1294  * tracing_snapshot - take a snapshot of the current buffer.
1295  *
1296  * This causes a swap between the snapshot buffer and the current live
1297  * tracing buffer. You can use this to take snapshots of the live
1298  * trace when some condition is triggered, but continue to trace.
1299  *
1300  * Note, make sure to allocate the snapshot with either
1301  * a tracing_snapshot_alloc(), or by doing it manually
1302  * with: echo 1 > /sys/kernel/tracing/snapshot
1303  *
1304  * If the snapshot buffer is not allocated, it will stop tracing.
1305  * Basically making a permanent snapshot.
1306  */
tracing_snapshot(void)1307 void tracing_snapshot(void)
1308 {
1309 	struct trace_array *tr = &global_trace;
1310 
1311 	tracing_snapshot_instance(tr);
1312 }
1313 EXPORT_SYMBOL_GPL(tracing_snapshot);
1314 
1315 /**
1316  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1317  * @tr:		The tracing instance to snapshot
1318  * @cond_data:	The data to be tested conditionally, and possibly saved
1319  *
1320  * This is the same as tracing_snapshot() except that the snapshot is
1321  * conditional - the snapshot will only happen if the
1322  * cond_snapshot.update() implementation receiving the cond_data
1323  * returns true, which means that the trace array's cond_snapshot
1324  * update() operation used the cond_data to determine whether the
1325  * snapshot should be taken, and if it was, presumably saved it along
1326  * with the snapshot.
1327  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1328 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1329 {
1330 	tracing_snapshot_instance_cond(tr, cond_data);
1331 }
1332 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1333 
1334 /**
1335  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1336  * @tr:		The tracing instance
1337  *
1338  * When the user enables a conditional snapshot using
1339  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1340  * with the snapshot.  This accessor is used to retrieve it.
1341  *
1342  * Should not be called from cond_snapshot.update(), since it takes
1343  * the tr->max_lock lock, which the code calling
1344  * cond_snapshot.update() has already done.
1345  *
1346  * Returns the cond_data associated with the trace array's snapshot.
1347  */
tracing_cond_snapshot_data(struct trace_array * tr)1348 void *tracing_cond_snapshot_data(struct trace_array *tr)
1349 {
1350 	void *cond_data = NULL;
1351 
1352 	local_irq_disable();
1353 	arch_spin_lock(&tr->max_lock);
1354 
1355 	if (tr->cond_snapshot)
1356 		cond_data = tr->cond_snapshot->cond_data;
1357 
1358 	arch_spin_unlock(&tr->max_lock);
1359 	local_irq_enable();
1360 
1361 	return cond_data;
1362 }
1363 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1364 
1365 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1366 					struct array_buffer *size_buf, int cpu_id);
1367 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1368 
tracing_alloc_snapshot_instance(struct trace_array * tr)1369 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1370 {
1371 	int order;
1372 	int ret;
1373 
1374 	if (!tr->allocated_snapshot) {
1375 
1376 		/* Make the snapshot buffer have the same order as main buffer */
1377 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1378 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1379 		if (ret < 0)
1380 			return ret;
1381 
1382 		/* allocate spare buffer */
1383 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1384 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1385 		if (ret < 0)
1386 			return ret;
1387 
1388 		tr->allocated_snapshot = true;
1389 	}
1390 
1391 	return 0;
1392 }
1393 
free_snapshot(struct trace_array * tr)1394 static void free_snapshot(struct trace_array *tr)
1395 {
1396 	/*
1397 	 * We don't free the ring buffer. instead, resize it because
1398 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1399 	 * we want preserve it.
1400 	 */
1401 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1402 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1403 	set_buffer_entries(&tr->max_buffer, 1);
1404 	tracing_reset_online_cpus(&tr->max_buffer);
1405 	tr->allocated_snapshot = false;
1406 }
1407 
tracing_arm_snapshot_locked(struct trace_array * tr)1408 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1409 {
1410 	int ret;
1411 
1412 	lockdep_assert_held(&trace_types_lock);
1413 
1414 	spin_lock(&tr->snapshot_trigger_lock);
1415 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1416 		spin_unlock(&tr->snapshot_trigger_lock);
1417 		return -EBUSY;
1418 	}
1419 
1420 	tr->snapshot++;
1421 	spin_unlock(&tr->snapshot_trigger_lock);
1422 
1423 	ret = tracing_alloc_snapshot_instance(tr);
1424 	if (ret) {
1425 		spin_lock(&tr->snapshot_trigger_lock);
1426 		tr->snapshot--;
1427 		spin_unlock(&tr->snapshot_trigger_lock);
1428 	}
1429 
1430 	return ret;
1431 }
1432 
tracing_arm_snapshot(struct trace_array * tr)1433 int tracing_arm_snapshot(struct trace_array *tr)
1434 {
1435 	int ret;
1436 
1437 	mutex_lock(&trace_types_lock);
1438 	ret = tracing_arm_snapshot_locked(tr);
1439 	mutex_unlock(&trace_types_lock);
1440 
1441 	return ret;
1442 }
1443 
tracing_disarm_snapshot(struct trace_array * tr)1444 void tracing_disarm_snapshot(struct trace_array *tr)
1445 {
1446 	spin_lock(&tr->snapshot_trigger_lock);
1447 	if (!WARN_ON(!tr->snapshot))
1448 		tr->snapshot--;
1449 	spin_unlock(&tr->snapshot_trigger_lock);
1450 }
1451 
1452 /**
1453  * tracing_alloc_snapshot - allocate snapshot buffer.
1454  *
1455  * This only allocates the snapshot buffer if it isn't already
1456  * allocated - it doesn't also take a snapshot.
1457  *
1458  * This is meant to be used in cases where the snapshot buffer needs
1459  * to be set up for events that can't sleep but need to be able to
1460  * trigger a snapshot.
1461  */
tracing_alloc_snapshot(void)1462 int tracing_alloc_snapshot(void)
1463 {
1464 	struct trace_array *tr = &global_trace;
1465 	int ret;
1466 
1467 	ret = tracing_alloc_snapshot_instance(tr);
1468 	WARN_ON(ret < 0);
1469 
1470 	return ret;
1471 }
1472 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1473 
1474 /**
1475  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1476  *
1477  * This is similar to tracing_snapshot(), but it will allocate the
1478  * snapshot buffer if it isn't already allocated. Use this only
1479  * where it is safe to sleep, as the allocation may sleep.
1480  *
1481  * This causes a swap between the snapshot buffer and the current live
1482  * tracing buffer. You can use this to take snapshots of the live
1483  * trace when some condition is triggered, but continue to trace.
1484  */
tracing_snapshot_alloc(void)1485 void tracing_snapshot_alloc(void)
1486 {
1487 	int ret;
1488 
1489 	ret = tracing_alloc_snapshot();
1490 	if (ret < 0)
1491 		return;
1492 
1493 	tracing_snapshot();
1494 }
1495 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1496 
1497 /**
1498  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1499  * @tr:		The tracing instance
1500  * @cond_data:	User data to associate with the snapshot
1501  * @update:	Implementation of the cond_snapshot update function
1502  *
1503  * Check whether the conditional snapshot for the given instance has
1504  * already been enabled, or if the current tracer is already using a
1505  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1506  * save the cond_data and update function inside.
1507  *
1508  * Returns 0 if successful, error otherwise.
1509  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1510 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1511 				 cond_update_fn_t update)
1512 {
1513 	struct cond_snapshot *cond_snapshot __free(kfree) =
1514 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1515 	int ret;
1516 
1517 	if (!cond_snapshot)
1518 		return -ENOMEM;
1519 
1520 	cond_snapshot->cond_data = cond_data;
1521 	cond_snapshot->update = update;
1522 
1523 	guard(mutex)(&trace_types_lock);
1524 
1525 	if (tr->current_trace->use_max_tr)
1526 		return -EBUSY;
1527 
1528 	/*
1529 	 * The cond_snapshot can only change to NULL without the
1530 	 * trace_types_lock. We don't care if we race with it going
1531 	 * to NULL, but we want to make sure that it's not set to
1532 	 * something other than NULL when we get here, which we can
1533 	 * do safely with only holding the trace_types_lock and not
1534 	 * having to take the max_lock.
1535 	 */
1536 	if (tr->cond_snapshot)
1537 		return -EBUSY;
1538 
1539 	ret = tracing_arm_snapshot_locked(tr);
1540 	if (ret)
1541 		return ret;
1542 
1543 	local_irq_disable();
1544 	arch_spin_lock(&tr->max_lock);
1545 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1546 	arch_spin_unlock(&tr->max_lock);
1547 	local_irq_enable();
1548 
1549 	return 0;
1550 }
1551 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1552 
1553 /**
1554  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1555  * @tr:		The tracing instance
1556  *
1557  * Check whether the conditional snapshot for the given instance is
1558  * enabled; if so, free the cond_snapshot associated with it,
1559  * otherwise return -EINVAL.
1560  *
1561  * Returns 0 if successful, error otherwise.
1562  */
tracing_snapshot_cond_disable(struct trace_array * tr)1563 int tracing_snapshot_cond_disable(struct trace_array *tr)
1564 {
1565 	int ret = 0;
1566 
1567 	local_irq_disable();
1568 	arch_spin_lock(&tr->max_lock);
1569 
1570 	if (!tr->cond_snapshot)
1571 		ret = -EINVAL;
1572 	else {
1573 		kfree(tr->cond_snapshot);
1574 		tr->cond_snapshot = NULL;
1575 	}
1576 
1577 	arch_spin_unlock(&tr->max_lock);
1578 	local_irq_enable();
1579 
1580 	tracing_disarm_snapshot(tr);
1581 
1582 	return ret;
1583 }
1584 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1585 #else
tracing_snapshot(void)1586 void tracing_snapshot(void)
1587 {
1588 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1589 }
1590 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1591 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1592 {
1593 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1594 }
1595 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1596 int tracing_alloc_snapshot(void)
1597 {
1598 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1599 	return -ENODEV;
1600 }
1601 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1602 void tracing_snapshot_alloc(void)
1603 {
1604 	/* Give warning */
1605 	tracing_snapshot();
1606 }
1607 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1608 void *tracing_cond_snapshot_data(struct trace_array *tr)
1609 {
1610 	return NULL;
1611 }
1612 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1613 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1614 {
1615 	return -ENODEV;
1616 }
1617 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1618 int tracing_snapshot_cond_disable(struct trace_array *tr)
1619 {
1620 	return false;
1621 }
1622 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1623 #define free_snapshot(tr)	do { } while (0)
1624 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1625 #endif /* CONFIG_TRACER_SNAPSHOT */
1626 
tracer_tracing_off(struct trace_array * tr)1627 void tracer_tracing_off(struct trace_array *tr)
1628 {
1629 	if (tr->array_buffer.buffer)
1630 		ring_buffer_record_off(tr->array_buffer.buffer);
1631 	/*
1632 	 * This flag is looked at when buffers haven't been allocated
1633 	 * yet, or by some tracers (like irqsoff), that just want to
1634 	 * know if the ring buffer has been disabled, but it can handle
1635 	 * races of where it gets disabled but we still do a record.
1636 	 * As the check is in the fast path of the tracers, it is more
1637 	 * important to be fast than accurate.
1638 	 */
1639 	tr->buffer_disabled = 1;
1640 }
1641 
1642 /**
1643  * tracer_tracing_disable() - temporary disable the buffer from write
1644  * @tr: The trace array to disable its buffer for
1645  *
1646  * Expects trace_tracing_enable() to re-enable tracing.
1647  * The difference between this and tracer_tracing_off() is that this
1648  * is a counter and can nest, whereas, tracer_tracing_off() can
1649  * be called multiple times and a single trace_tracing_on() will
1650  * enable it.
1651  */
tracer_tracing_disable(struct trace_array * tr)1652 void tracer_tracing_disable(struct trace_array *tr)
1653 {
1654 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1655 		return;
1656 
1657 	ring_buffer_record_disable(tr->array_buffer.buffer);
1658 }
1659 
1660 /**
1661  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1662  * @tr: The trace array that had tracer_tracincg_disable() called on it
1663  *
1664  * This is called after tracer_tracing_disable() has been called on @tr,
1665  * when it's safe to re-enable tracing.
1666  */
tracer_tracing_enable(struct trace_array * tr)1667 void tracer_tracing_enable(struct trace_array *tr)
1668 {
1669 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1670 		return;
1671 
1672 	ring_buffer_record_enable(tr->array_buffer.buffer);
1673 }
1674 
1675 /**
1676  * tracing_off - turn off tracing buffers
1677  *
1678  * This function stops the tracing buffers from recording data.
1679  * It does not disable any overhead the tracers themselves may
1680  * be causing. This function simply causes all recording to
1681  * the ring buffers to fail.
1682  */
tracing_off(void)1683 void tracing_off(void)
1684 {
1685 	tracer_tracing_off(&global_trace);
1686 }
1687 EXPORT_SYMBOL_GPL(tracing_off);
1688 
disable_trace_on_warning(void)1689 void disable_trace_on_warning(void)
1690 {
1691 	if (__disable_trace_on_warning) {
1692 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1693 			"Disabling tracing due to warning\n");
1694 		tracing_off();
1695 	}
1696 }
1697 
1698 /**
1699  * tracer_tracing_is_on - show real state of ring buffer enabled
1700  * @tr : the trace array to know if ring buffer is enabled
1701  *
1702  * Shows real state of the ring buffer if it is enabled or not.
1703  */
tracer_tracing_is_on(struct trace_array * tr)1704 bool tracer_tracing_is_on(struct trace_array *tr)
1705 {
1706 	if (tr->array_buffer.buffer)
1707 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1708 	return !tr->buffer_disabled;
1709 }
1710 
1711 /**
1712  * tracing_is_on - show state of ring buffers enabled
1713  */
tracing_is_on(void)1714 int tracing_is_on(void)
1715 {
1716 	return tracer_tracing_is_on(&global_trace);
1717 }
1718 EXPORT_SYMBOL_GPL(tracing_is_on);
1719 
set_buf_size(char * str)1720 static int __init set_buf_size(char *str)
1721 {
1722 	unsigned long buf_size;
1723 
1724 	if (!str)
1725 		return 0;
1726 	buf_size = memparse(str, &str);
1727 	/*
1728 	 * nr_entries can not be zero and the startup
1729 	 * tests require some buffer space. Therefore
1730 	 * ensure we have at least 4096 bytes of buffer.
1731 	 */
1732 	trace_buf_size = max(4096UL, buf_size);
1733 	return 1;
1734 }
1735 __setup("trace_buf_size=", set_buf_size);
1736 
set_tracing_thresh(char * str)1737 static int __init set_tracing_thresh(char *str)
1738 {
1739 	unsigned long threshold;
1740 	int ret;
1741 
1742 	if (!str)
1743 		return 0;
1744 	ret = kstrtoul(str, 0, &threshold);
1745 	if (ret < 0)
1746 		return 0;
1747 	tracing_thresh = threshold * 1000;
1748 	return 1;
1749 }
1750 __setup("tracing_thresh=", set_tracing_thresh);
1751 
nsecs_to_usecs(unsigned long nsecs)1752 unsigned long nsecs_to_usecs(unsigned long nsecs)
1753 {
1754 	return nsecs / 1000;
1755 }
1756 
1757 /*
1758  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1759  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1760  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1761  * of strings in the order that the evals (enum) were defined.
1762  */
1763 #undef C
1764 #define C(a, b) b
1765 
1766 /* These must match the bit positions in trace_iterator_flags */
1767 static const char *trace_options[] = {
1768 	TRACE_FLAGS
1769 	NULL
1770 };
1771 
1772 static struct {
1773 	u64 (*func)(void);
1774 	const char *name;
1775 	int in_ns;		/* is this clock in nanoseconds? */
1776 } trace_clocks[] = {
1777 	{ trace_clock_local,		"local",	1 },
1778 	{ trace_clock_global,		"global",	1 },
1779 	{ trace_clock_counter,		"counter",	0 },
1780 	{ trace_clock_jiffies,		"uptime",	0 },
1781 	{ trace_clock,			"perf",		1 },
1782 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1783 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1784 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1785 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1786 	ARCH_TRACE_CLOCKS
1787 };
1788 
trace_clock_in_ns(struct trace_array * tr)1789 bool trace_clock_in_ns(struct trace_array *tr)
1790 {
1791 	if (trace_clocks[tr->clock_id].in_ns)
1792 		return true;
1793 
1794 	return false;
1795 }
1796 
1797 /*
1798  * trace_parser_get_init - gets the buffer for trace parser
1799  */
trace_parser_get_init(struct trace_parser * parser,int size)1800 int trace_parser_get_init(struct trace_parser *parser, int size)
1801 {
1802 	memset(parser, 0, sizeof(*parser));
1803 
1804 	parser->buffer = kmalloc(size, GFP_KERNEL);
1805 	if (!parser->buffer)
1806 		return 1;
1807 
1808 	parser->size = size;
1809 	return 0;
1810 }
1811 
1812 /*
1813  * trace_parser_put - frees the buffer for trace parser
1814  */
trace_parser_put(struct trace_parser * parser)1815 void trace_parser_put(struct trace_parser *parser)
1816 {
1817 	kfree(parser->buffer);
1818 	parser->buffer = NULL;
1819 }
1820 
1821 /*
1822  * trace_get_user - reads the user input string separated by  space
1823  * (matched by isspace(ch))
1824  *
1825  * For each string found the 'struct trace_parser' is updated,
1826  * and the function returns.
1827  *
1828  * Returns number of bytes read.
1829  *
1830  * See kernel/trace/trace.h for 'struct trace_parser' details.
1831  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1832 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1833 	size_t cnt, loff_t *ppos)
1834 {
1835 	char ch;
1836 	size_t read = 0;
1837 	ssize_t ret;
1838 
1839 	if (!*ppos)
1840 		trace_parser_clear(parser);
1841 
1842 	ret = get_user(ch, ubuf++);
1843 	if (ret)
1844 		goto out;
1845 
1846 	read++;
1847 	cnt--;
1848 
1849 	/*
1850 	 * The parser is not finished with the last write,
1851 	 * continue reading the user input without skipping spaces.
1852 	 */
1853 	if (!parser->cont) {
1854 		/* skip white space */
1855 		while (cnt && isspace(ch)) {
1856 			ret = get_user(ch, ubuf++);
1857 			if (ret)
1858 				goto out;
1859 			read++;
1860 			cnt--;
1861 		}
1862 
1863 		parser->idx = 0;
1864 
1865 		/* only spaces were written */
1866 		if (isspace(ch) || !ch) {
1867 			*ppos += read;
1868 			ret = read;
1869 			goto out;
1870 		}
1871 	}
1872 
1873 	/* read the non-space input */
1874 	while (cnt && !isspace(ch) && ch) {
1875 		if (parser->idx < parser->size - 1)
1876 			parser->buffer[parser->idx++] = ch;
1877 		else {
1878 			ret = -EINVAL;
1879 			goto out;
1880 		}
1881 		ret = get_user(ch, ubuf++);
1882 		if (ret)
1883 			goto out;
1884 		read++;
1885 		cnt--;
1886 	}
1887 
1888 	/* We either got finished input or we have to wait for another call. */
1889 	if (isspace(ch) || !ch) {
1890 		parser->buffer[parser->idx] = 0;
1891 		parser->cont = false;
1892 	} else if (parser->idx < parser->size - 1) {
1893 		parser->cont = true;
1894 		parser->buffer[parser->idx++] = ch;
1895 		/* Make sure the parsed string always terminates with '\0'. */
1896 		parser->buffer[parser->idx] = 0;
1897 	} else {
1898 		ret = -EINVAL;
1899 		goto out;
1900 	}
1901 
1902 	*ppos += read;
1903 	ret = read;
1904 
1905 out:
1906 	return ret;
1907 }
1908 
1909 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1910 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1911 {
1912 	int len;
1913 
1914 	if (trace_seq_used(s) <= s->readpos)
1915 		return -EBUSY;
1916 
1917 	len = trace_seq_used(s) - s->readpos;
1918 	if (cnt > len)
1919 		cnt = len;
1920 	memcpy(buf, s->buffer + s->readpos, cnt);
1921 
1922 	s->readpos += cnt;
1923 	return cnt;
1924 }
1925 
1926 unsigned long __read_mostly	tracing_thresh;
1927 
1928 #ifdef CONFIG_TRACER_MAX_TRACE
1929 static const struct file_operations tracing_max_lat_fops;
1930 
1931 #ifdef LATENCY_FS_NOTIFY
1932 
1933 static struct workqueue_struct *fsnotify_wq;
1934 
latency_fsnotify_workfn(struct work_struct * work)1935 static void latency_fsnotify_workfn(struct work_struct *work)
1936 {
1937 	struct trace_array *tr = container_of(work, struct trace_array,
1938 					      fsnotify_work);
1939 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1940 }
1941 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1942 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1943 {
1944 	struct trace_array *tr = container_of(iwork, struct trace_array,
1945 					      fsnotify_irqwork);
1946 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1947 }
1948 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1949 static void trace_create_maxlat_file(struct trace_array *tr,
1950 				     struct dentry *d_tracer)
1951 {
1952 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1953 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1954 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1955 					      TRACE_MODE_WRITE,
1956 					      d_tracer, tr,
1957 					      &tracing_max_lat_fops);
1958 }
1959 
latency_fsnotify_init(void)1960 __init static int latency_fsnotify_init(void)
1961 {
1962 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1963 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1964 	if (!fsnotify_wq) {
1965 		pr_err("Unable to allocate tr_max_lat_wq\n");
1966 		return -ENOMEM;
1967 	}
1968 	return 0;
1969 }
1970 
1971 late_initcall_sync(latency_fsnotify_init);
1972 
latency_fsnotify(struct trace_array * tr)1973 void latency_fsnotify(struct trace_array *tr)
1974 {
1975 	if (!fsnotify_wq)
1976 		return;
1977 	/*
1978 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1979 	 * possible that we are called from __schedule() or do_idle(), which
1980 	 * could cause a deadlock.
1981 	 */
1982 	irq_work_queue(&tr->fsnotify_irqwork);
1983 }
1984 
1985 #else /* !LATENCY_FS_NOTIFY */
1986 
1987 #define trace_create_maxlat_file(tr, d_tracer)				\
1988 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1989 			  d_tracer, tr, &tracing_max_lat_fops)
1990 
1991 #endif
1992 
1993 /*
1994  * Copy the new maximum trace into the separate maximum-trace
1995  * structure. (this way the maximum trace is permanently saved,
1996  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1997  */
1998 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1999 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
2000 {
2001 	struct array_buffer *trace_buf = &tr->array_buffer;
2002 	struct array_buffer *max_buf = &tr->max_buffer;
2003 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
2004 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
2005 
2006 	max_buf->cpu = cpu;
2007 	max_buf->time_start = data->preempt_timestamp;
2008 
2009 	max_data->saved_latency = tr->max_latency;
2010 	max_data->critical_start = data->critical_start;
2011 	max_data->critical_end = data->critical_end;
2012 
2013 	strscpy(max_data->comm, tsk->comm);
2014 	max_data->pid = tsk->pid;
2015 	/*
2016 	 * If tsk == current, then use current_uid(), as that does not use
2017 	 * RCU. The irq tracer can be called out of RCU scope.
2018 	 */
2019 	if (tsk == current)
2020 		max_data->uid = current_uid();
2021 	else
2022 		max_data->uid = task_uid(tsk);
2023 
2024 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2025 	max_data->policy = tsk->policy;
2026 	max_data->rt_priority = tsk->rt_priority;
2027 
2028 	/* record this tasks comm */
2029 	tracing_record_cmdline(tsk);
2030 	latency_fsnotify(tr);
2031 }
2032 
2033 /**
2034  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2035  * @tr: tracer
2036  * @tsk: the task with the latency
2037  * @cpu: The cpu that initiated the trace.
2038  * @cond_data: User data associated with a conditional snapshot
2039  *
2040  * Flip the buffers between the @tr and the max_tr and record information
2041  * about which task was the cause of this latency.
2042  */
2043 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)2044 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2045 	      void *cond_data)
2046 {
2047 	if (tr->stop_count)
2048 		return;
2049 
2050 	WARN_ON_ONCE(!irqs_disabled());
2051 
2052 	if (!tr->allocated_snapshot) {
2053 		/* Only the nop tracer should hit this when disabling */
2054 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2055 		return;
2056 	}
2057 
2058 	arch_spin_lock(&tr->max_lock);
2059 
2060 	/* Inherit the recordable setting from array_buffer */
2061 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2062 		ring_buffer_record_on(tr->max_buffer.buffer);
2063 	else
2064 		ring_buffer_record_off(tr->max_buffer.buffer);
2065 
2066 #ifdef CONFIG_TRACER_SNAPSHOT
2067 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2068 		arch_spin_unlock(&tr->max_lock);
2069 		return;
2070 	}
2071 #endif
2072 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2073 
2074 	__update_max_tr(tr, tsk, cpu);
2075 
2076 	arch_spin_unlock(&tr->max_lock);
2077 
2078 	/* Any waiters on the old snapshot buffer need to wake up */
2079 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2080 }
2081 
2082 /**
2083  * update_max_tr_single - only copy one trace over, and reset the rest
2084  * @tr: tracer
2085  * @tsk: task with the latency
2086  * @cpu: the cpu of the buffer to copy.
2087  *
2088  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2089  */
2090 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2091 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2092 {
2093 	int ret;
2094 
2095 	if (tr->stop_count)
2096 		return;
2097 
2098 	WARN_ON_ONCE(!irqs_disabled());
2099 	if (!tr->allocated_snapshot) {
2100 		/* Only the nop tracer should hit this when disabling */
2101 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2102 		return;
2103 	}
2104 
2105 	arch_spin_lock(&tr->max_lock);
2106 
2107 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2108 
2109 	if (ret == -EBUSY) {
2110 		/*
2111 		 * We failed to swap the buffer due to a commit taking
2112 		 * place on this CPU. We fail to record, but we reset
2113 		 * the max trace buffer (no one writes directly to it)
2114 		 * and flag that it failed.
2115 		 * Another reason is resize is in progress.
2116 		 */
2117 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2118 			"Failed to swap buffers due to commit or resize in progress\n");
2119 	}
2120 
2121 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2122 
2123 	__update_max_tr(tr, tsk, cpu);
2124 	arch_spin_unlock(&tr->max_lock);
2125 }
2126 
2127 #endif /* CONFIG_TRACER_MAX_TRACE */
2128 
2129 struct pipe_wait {
2130 	struct trace_iterator		*iter;
2131 	int				wait_index;
2132 };
2133 
wait_pipe_cond(void * data)2134 static bool wait_pipe_cond(void *data)
2135 {
2136 	struct pipe_wait *pwait = data;
2137 	struct trace_iterator *iter = pwait->iter;
2138 
2139 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2140 		return true;
2141 
2142 	return iter->closed;
2143 }
2144 
wait_on_pipe(struct trace_iterator * iter,int full)2145 static int wait_on_pipe(struct trace_iterator *iter, int full)
2146 {
2147 	struct pipe_wait pwait;
2148 	int ret;
2149 
2150 	/* Iterators are static, they should be filled or empty */
2151 	if (trace_buffer_iter(iter, iter->cpu_file))
2152 		return 0;
2153 
2154 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2155 	pwait.iter = iter;
2156 
2157 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2158 			       wait_pipe_cond, &pwait);
2159 
2160 #ifdef CONFIG_TRACER_MAX_TRACE
2161 	/*
2162 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2163 	 * to happen, this would now be the main buffer.
2164 	 */
2165 	if (iter->snapshot)
2166 		iter->array_buffer = &iter->tr->max_buffer;
2167 #endif
2168 	return ret;
2169 }
2170 
2171 #ifdef CONFIG_FTRACE_STARTUP_TEST
2172 static bool selftests_can_run;
2173 
2174 struct trace_selftests {
2175 	struct list_head		list;
2176 	struct tracer			*type;
2177 };
2178 
2179 static LIST_HEAD(postponed_selftests);
2180 
save_selftest(struct tracer * type)2181 static int save_selftest(struct tracer *type)
2182 {
2183 	struct trace_selftests *selftest;
2184 
2185 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2186 	if (!selftest)
2187 		return -ENOMEM;
2188 
2189 	selftest->type = type;
2190 	list_add(&selftest->list, &postponed_selftests);
2191 	return 0;
2192 }
2193 
run_tracer_selftest(struct tracer * type)2194 static int run_tracer_selftest(struct tracer *type)
2195 {
2196 	struct trace_array *tr = &global_trace;
2197 	struct tracer *saved_tracer = tr->current_trace;
2198 	int ret;
2199 
2200 	if (!type->selftest || tracing_selftest_disabled)
2201 		return 0;
2202 
2203 	/*
2204 	 * If a tracer registers early in boot up (before scheduling is
2205 	 * initialized and such), then do not run its selftests yet.
2206 	 * Instead, run it a little later in the boot process.
2207 	 */
2208 	if (!selftests_can_run)
2209 		return save_selftest(type);
2210 
2211 	if (!tracing_is_on()) {
2212 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2213 			type->name);
2214 		return 0;
2215 	}
2216 
2217 	/*
2218 	 * Run a selftest on this tracer.
2219 	 * Here we reset the trace buffer, and set the current
2220 	 * tracer to be this tracer. The tracer can then run some
2221 	 * internal tracing to verify that everything is in order.
2222 	 * If we fail, we do not register this tracer.
2223 	 */
2224 	tracing_reset_online_cpus(&tr->array_buffer);
2225 
2226 	tr->current_trace = type;
2227 
2228 #ifdef CONFIG_TRACER_MAX_TRACE
2229 	if (type->use_max_tr) {
2230 		/* If we expanded the buffers, make sure the max is expanded too */
2231 		if (tr->ring_buffer_expanded)
2232 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2233 					   RING_BUFFER_ALL_CPUS);
2234 		tr->allocated_snapshot = true;
2235 	}
2236 #endif
2237 
2238 	/* the test is responsible for initializing and enabling */
2239 	pr_info("Testing tracer %s: ", type->name);
2240 	ret = type->selftest(type, tr);
2241 	/* the test is responsible for resetting too */
2242 	tr->current_trace = saved_tracer;
2243 	if (ret) {
2244 		printk(KERN_CONT "FAILED!\n");
2245 		/* Add the warning after printing 'FAILED' */
2246 		WARN_ON(1);
2247 		return -1;
2248 	}
2249 	/* Only reset on passing, to avoid touching corrupted buffers */
2250 	tracing_reset_online_cpus(&tr->array_buffer);
2251 
2252 #ifdef CONFIG_TRACER_MAX_TRACE
2253 	if (type->use_max_tr) {
2254 		tr->allocated_snapshot = false;
2255 
2256 		/* Shrink the max buffer again */
2257 		if (tr->ring_buffer_expanded)
2258 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2259 					   RING_BUFFER_ALL_CPUS);
2260 	}
2261 #endif
2262 
2263 	printk(KERN_CONT "PASSED\n");
2264 	return 0;
2265 }
2266 
do_run_tracer_selftest(struct tracer * type)2267 static int do_run_tracer_selftest(struct tracer *type)
2268 {
2269 	int ret;
2270 
2271 	/*
2272 	 * Tests can take a long time, especially if they are run one after the
2273 	 * other, as does happen during bootup when all the tracers are
2274 	 * registered. This could cause the soft lockup watchdog to trigger.
2275 	 */
2276 	cond_resched();
2277 
2278 	tracing_selftest_running = true;
2279 	ret = run_tracer_selftest(type);
2280 	tracing_selftest_running = false;
2281 
2282 	return ret;
2283 }
2284 
init_trace_selftests(void)2285 static __init int init_trace_selftests(void)
2286 {
2287 	struct trace_selftests *p, *n;
2288 	struct tracer *t, **last;
2289 	int ret;
2290 
2291 	selftests_can_run = true;
2292 
2293 	guard(mutex)(&trace_types_lock);
2294 
2295 	if (list_empty(&postponed_selftests))
2296 		return 0;
2297 
2298 	pr_info("Running postponed tracer tests:\n");
2299 
2300 	tracing_selftest_running = true;
2301 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2302 		/* This loop can take minutes when sanitizers are enabled, so
2303 		 * lets make sure we allow RCU processing.
2304 		 */
2305 		cond_resched();
2306 		ret = run_tracer_selftest(p->type);
2307 		/* If the test fails, then warn and remove from available_tracers */
2308 		if (ret < 0) {
2309 			WARN(1, "tracer: %s failed selftest, disabling\n",
2310 			     p->type->name);
2311 			last = &trace_types;
2312 			for (t = trace_types; t; t = t->next) {
2313 				if (t == p->type) {
2314 					*last = t->next;
2315 					break;
2316 				}
2317 				last = &t->next;
2318 			}
2319 		}
2320 		list_del(&p->list);
2321 		kfree(p);
2322 	}
2323 	tracing_selftest_running = false;
2324 
2325 	return 0;
2326 }
2327 core_initcall(init_trace_selftests);
2328 #else
do_run_tracer_selftest(struct tracer * type)2329 static inline int do_run_tracer_selftest(struct tracer *type)
2330 {
2331 	return 0;
2332 }
2333 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2334 
2335 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2336 
2337 static void __init apply_trace_boot_options(void);
2338 
2339 /**
2340  * register_tracer - register a tracer with the ftrace system.
2341  * @type: the plugin for the tracer
2342  *
2343  * Register a new plugin tracer.
2344  */
register_tracer(struct tracer * type)2345 int __init register_tracer(struct tracer *type)
2346 {
2347 	struct tracer *t;
2348 	int ret = 0;
2349 
2350 	if (!type->name) {
2351 		pr_info("Tracer must have a name\n");
2352 		return -1;
2353 	}
2354 
2355 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2356 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2357 		return -1;
2358 	}
2359 
2360 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2361 		pr_warn("Can not register tracer %s due to lockdown\n",
2362 			   type->name);
2363 		return -EPERM;
2364 	}
2365 
2366 	mutex_lock(&trace_types_lock);
2367 
2368 	for (t = trace_types; t; t = t->next) {
2369 		if (strcmp(type->name, t->name) == 0) {
2370 			/* already found */
2371 			pr_info("Tracer %s already registered\n",
2372 				type->name);
2373 			ret = -1;
2374 			goto out;
2375 		}
2376 	}
2377 
2378 	if (!type->set_flag)
2379 		type->set_flag = &dummy_set_flag;
2380 	if (!type->flags) {
2381 		/*allocate a dummy tracer_flags*/
2382 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2383 		if (!type->flags) {
2384 			ret = -ENOMEM;
2385 			goto out;
2386 		}
2387 		type->flags->val = 0;
2388 		type->flags->opts = dummy_tracer_opt;
2389 	} else
2390 		if (!type->flags->opts)
2391 			type->flags->opts = dummy_tracer_opt;
2392 
2393 	/* store the tracer for __set_tracer_option */
2394 	type->flags->trace = type;
2395 
2396 	ret = do_run_tracer_selftest(type);
2397 	if (ret < 0)
2398 		goto out;
2399 
2400 	type->next = trace_types;
2401 	trace_types = type;
2402 	add_tracer_options(&global_trace, type);
2403 
2404  out:
2405 	mutex_unlock(&trace_types_lock);
2406 
2407 	if (ret || !default_bootup_tracer)
2408 		goto out_unlock;
2409 
2410 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2411 		goto out_unlock;
2412 
2413 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2414 	/* Do we want this tracer to start on bootup? */
2415 	tracing_set_tracer(&global_trace, type->name);
2416 	default_bootup_tracer = NULL;
2417 
2418 	apply_trace_boot_options();
2419 
2420 	/* disable other selftests, since this will break it. */
2421 	disable_tracing_selftest("running a tracer");
2422 
2423  out_unlock:
2424 	return ret;
2425 }
2426 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2427 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2428 {
2429 	struct trace_buffer *buffer = buf->buffer;
2430 
2431 	if (!buffer)
2432 		return;
2433 
2434 	ring_buffer_record_disable(buffer);
2435 
2436 	/* Make sure all commits have finished */
2437 	synchronize_rcu();
2438 	ring_buffer_reset_cpu(buffer, cpu);
2439 
2440 	ring_buffer_record_enable(buffer);
2441 }
2442 
tracing_reset_online_cpus(struct array_buffer * buf)2443 void tracing_reset_online_cpus(struct array_buffer *buf)
2444 {
2445 	struct trace_buffer *buffer = buf->buffer;
2446 
2447 	if (!buffer)
2448 		return;
2449 
2450 	ring_buffer_record_disable(buffer);
2451 
2452 	/* Make sure all commits have finished */
2453 	synchronize_rcu();
2454 
2455 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2456 
2457 	ring_buffer_reset_online_cpus(buffer);
2458 
2459 	ring_buffer_record_enable(buffer);
2460 }
2461 
tracing_reset_all_cpus(struct array_buffer * buf)2462 static void tracing_reset_all_cpus(struct array_buffer *buf)
2463 {
2464 	struct trace_buffer *buffer = buf->buffer;
2465 
2466 	if (!buffer)
2467 		return;
2468 
2469 	ring_buffer_record_disable(buffer);
2470 
2471 	/* Make sure all commits have finished */
2472 	synchronize_rcu();
2473 
2474 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2475 
2476 	ring_buffer_reset(buffer);
2477 
2478 	ring_buffer_record_enable(buffer);
2479 }
2480 
2481 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2482 void tracing_reset_all_online_cpus_unlocked(void)
2483 {
2484 	struct trace_array *tr;
2485 
2486 	lockdep_assert_held(&trace_types_lock);
2487 
2488 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2489 		if (!tr->clear_trace)
2490 			continue;
2491 		tr->clear_trace = false;
2492 		tracing_reset_online_cpus(&tr->array_buffer);
2493 #ifdef CONFIG_TRACER_MAX_TRACE
2494 		tracing_reset_online_cpus(&tr->max_buffer);
2495 #endif
2496 	}
2497 }
2498 
tracing_reset_all_online_cpus(void)2499 void tracing_reset_all_online_cpus(void)
2500 {
2501 	mutex_lock(&trace_types_lock);
2502 	tracing_reset_all_online_cpus_unlocked();
2503 	mutex_unlock(&trace_types_lock);
2504 }
2505 
is_tracing_stopped(void)2506 int is_tracing_stopped(void)
2507 {
2508 	return global_trace.stop_count;
2509 }
2510 
tracing_start_tr(struct trace_array * tr)2511 static void tracing_start_tr(struct trace_array *tr)
2512 {
2513 	struct trace_buffer *buffer;
2514 	unsigned long flags;
2515 
2516 	if (tracing_disabled)
2517 		return;
2518 
2519 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2520 	if (--tr->stop_count) {
2521 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2522 			/* Someone screwed up their debugging */
2523 			tr->stop_count = 0;
2524 		}
2525 		goto out;
2526 	}
2527 
2528 	/* Prevent the buffers from switching */
2529 	arch_spin_lock(&tr->max_lock);
2530 
2531 	buffer = tr->array_buffer.buffer;
2532 	if (buffer)
2533 		ring_buffer_record_enable(buffer);
2534 
2535 #ifdef CONFIG_TRACER_MAX_TRACE
2536 	buffer = tr->max_buffer.buffer;
2537 	if (buffer)
2538 		ring_buffer_record_enable(buffer);
2539 #endif
2540 
2541 	arch_spin_unlock(&tr->max_lock);
2542 
2543  out:
2544 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2545 }
2546 
2547 /**
2548  * tracing_start - quick start of the tracer
2549  *
2550  * If tracing is enabled but was stopped by tracing_stop,
2551  * this will start the tracer back up.
2552  */
tracing_start(void)2553 void tracing_start(void)
2554 
2555 {
2556 	return tracing_start_tr(&global_trace);
2557 }
2558 
tracing_stop_tr(struct trace_array * tr)2559 static void tracing_stop_tr(struct trace_array *tr)
2560 {
2561 	struct trace_buffer *buffer;
2562 	unsigned long flags;
2563 
2564 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2565 	if (tr->stop_count++)
2566 		goto out;
2567 
2568 	/* Prevent the buffers from switching */
2569 	arch_spin_lock(&tr->max_lock);
2570 
2571 	buffer = tr->array_buffer.buffer;
2572 	if (buffer)
2573 		ring_buffer_record_disable(buffer);
2574 
2575 #ifdef CONFIG_TRACER_MAX_TRACE
2576 	buffer = tr->max_buffer.buffer;
2577 	if (buffer)
2578 		ring_buffer_record_disable(buffer);
2579 #endif
2580 
2581 	arch_spin_unlock(&tr->max_lock);
2582 
2583  out:
2584 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2585 }
2586 
2587 /**
2588  * tracing_stop - quick stop of the tracer
2589  *
2590  * Light weight way to stop tracing. Use in conjunction with
2591  * tracing_start.
2592  */
tracing_stop(void)2593 void tracing_stop(void)
2594 {
2595 	return tracing_stop_tr(&global_trace);
2596 }
2597 
2598 /*
2599  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2600  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2601  * simplifies those functions and keeps them in sync.
2602  */
trace_handle_return(struct trace_seq * s)2603 enum print_line_t trace_handle_return(struct trace_seq *s)
2604 {
2605 	return trace_seq_has_overflowed(s) ?
2606 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2607 }
2608 EXPORT_SYMBOL_GPL(trace_handle_return);
2609 
migration_disable_value(void)2610 static unsigned short migration_disable_value(void)
2611 {
2612 #if defined(CONFIG_SMP)
2613 	return current->migration_disabled;
2614 #else
2615 	return 0;
2616 #endif
2617 }
2618 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2619 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2620 {
2621 	unsigned int trace_flags = irqs_status;
2622 	unsigned int pc;
2623 
2624 	pc = preempt_count();
2625 
2626 	if (pc & NMI_MASK)
2627 		trace_flags |= TRACE_FLAG_NMI;
2628 	if (pc & HARDIRQ_MASK)
2629 		trace_flags |= TRACE_FLAG_HARDIRQ;
2630 	if (in_serving_softirq())
2631 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2632 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2633 		trace_flags |= TRACE_FLAG_BH_OFF;
2634 
2635 	if (tif_need_resched())
2636 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2637 	if (test_preempt_need_resched())
2638 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2639 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2640 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2641 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2642 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2643 }
2644 
2645 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2646 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2647 			  int type,
2648 			  unsigned long len,
2649 			  unsigned int trace_ctx)
2650 {
2651 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2652 }
2653 
2654 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2655 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2656 static int trace_buffered_event_ref;
2657 
2658 /**
2659  * trace_buffered_event_enable - enable buffering events
2660  *
2661  * When events are being filtered, it is quicker to use a temporary
2662  * buffer to write the event data into if there's a likely chance
2663  * that it will not be committed. The discard of the ring buffer
2664  * is not as fast as committing, and is much slower than copying
2665  * a commit.
2666  *
2667  * When an event is to be filtered, allocate per cpu buffers to
2668  * write the event data into, and if the event is filtered and discarded
2669  * it is simply dropped, otherwise, the entire data is to be committed
2670  * in one shot.
2671  */
trace_buffered_event_enable(void)2672 void trace_buffered_event_enable(void)
2673 {
2674 	struct ring_buffer_event *event;
2675 	struct page *page;
2676 	int cpu;
2677 
2678 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2679 
2680 	if (trace_buffered_event_ref++)
2681 		return;
2682 
2683 	for_each_tracing_cpu(cpu) {
2684 		page = alloc_pages_node(cpu_to_node(cpu),
2685 					GFP_KERNEL | __GFP_NORETRY, 0);
2686 		/* This is just an optimization and can handle failures */
2687 		if (!page) {
2688 			pr_err("Failed to allocate event buffer\n");
2689 			break;
2690 		}
2691 
2692 		event = page_address(page);
2693 		memset(event, 0, sizeof(*event));
2694 
2695 		per_cpu(trace_buffered_event, cpu) = event;
2696 
2697 		preempt_disable();
2698 		if (cpu == smp_processor_id() &&
2699 		    __this_cpu_read(trace_buffered_event) !=
2700 		    per_cpu(trace_buffered_event, cpu))
2701 			WARN_ON_ONCE(1);
2702 		preempt_enable();
2703 	}
2704 }
2705 
enable_trace_buffered_event(void * data)2706 static void enable_trace_buffered_event(void *data)
2707 {
2708 	this_cpu_dec(trace_buffered_event_cnt);
2709 }
2710 
disable_trace_buffered_event(void * data)2711 static void disable_trace_buffered_event(void *data)
2712 {
2713 	this_cpu_inc(trace_buffered_event_cnt);
2714 }
2715 
2716 /**
2717  * trace_buffered_event_disable - disable buffering events
2718  *
2719  * When a filter is removed, it is faster to not use the buffered
2720  * events, and to commit directly into the ring buffer. Free up
2721  * the temp buffers when there are no more users. This requires
2722  * special synchronization with current events.
2723  */
trace_buffered_event_disable(void)2724 void trace_buffered_event_disable(void)
2725 {
2726 	int cpu;
2727 
2728 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2729 
2730 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2731 		return;
2732 
2733 	if (--trace_buffered_event_ref)
2734 		return;
2735 
2736 	/* For each CPU, set the buffer as used. */
2737 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2738 			 NULL, true);
2739 
2740 	/* Wait for all current users to finish */
2741 	synchronize_rcu();
2742 
2743 	for_each_tracing_cpu(cpu) {
2744 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2745 		per_cpu(trace_buffered_event, cpu) = NULL;
2746 	}
2747 
2748 	/*
2749 	 * Wait for all CPUs that potentially started checking if they can use
2750 	 * their event buffer only after the previous synchronize_rcu() call and
2751 	 * they still read a valid pointer from trace_buffered_event. It must be
2752 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2753 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2754 	 */
2755 	synchronize_rcu();
2756 
2757 	/* For each CPU, relinquish the buffer */
2758 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2759 			 true);
2760 }
2761 
2762 static struct trace_buffer *temp_buffer;
2763 
2764 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2765 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2766 			  struct trace_event_file *trace_file,
2767 			  int type, unsigned long len,
2768 			  unsigned int trace_ctx)
2769 {
2770 	struct ring_buffer_event *entry;
2771 	struct trace_array *tr = trace_file->tr;
2772 	int val;
2773 
2774 	*current_rb = tr->array_buffer.buffer;
2775 
2776 	if (!tr->no_filter_buffering_ref &&
2777 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2778 		preempt_disable_notrace();
2779 		/*
2780 		 * Filtering is on, so try to use the per cpu buffer first.
2781 		 * This buffer will simulate a ring_buffer_event,
2782 		 * where the type_len is zero and the array[0] will
2783 		 * hold the full length.
2784 		 * (see include/linux/ring-buffer.h for details on
2785 		 *  how the ring_buffer_event is structured).
2786 		 *
2787 		 * Using a temp buffer during filtering and copying it
2788 		 * on a matched filter is quicker than writing directly
2789 		 * into the ring buffer and then discarding it when
2790 		 * it doesn't match. That is because the discard
2791 		 * requires several atomic operations to get right.
2792 		 * Copying on match and doing nothing on a failed match
2793 		 * is still quicker than no copy on match, but having
2794 		 * to discard out of the ring buffer on a failed match.
2795 		 */
2796 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2797 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2798 
2799 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2800 
2801 			/*
2802 			 * Preemption is disabled, but interrupts and NMIs
2803 			 * can still come in now. If that happens after
2804 			 * the above increment, then it will have to go
2805 			 * back to the old method of allocating the event
2806 			 * on the ring buffer, and if the filter fails, it
2807 			 * will have to call ring_buffer_discard_commit()
2808 			 * to remove it.
2809 			 *
2810 			 * Need to also check the unlikely case that the
2811 			 * length is bigger than the temp buffer size.
2812 			 * If that happens, then the reserve is pretty much
2813 			 * guaranteed to fail, as the ring buffer currently
2814 			 * only allows events less than a page. But that may
2815 			 * change in the future, so let the ring buffer reserve
2816 			 * handle the failure in that case.
2817 			 */
2818 			if (val == 1 && likely(len <= max_len)) {
2819 				trace_event_setup(entry, type, trace_ctx);
2820 				entry->array[0] = len;
2821 				/* Return with preemption disabled */
2822 				return entry;
2823 			}
2824 			this_cpu_dec(trace_buffered_event_cnt);
2825 		}
2826 		/* __trace_buffer_lock_reserve() disables preemption */
2827 		preempt_enable_notrace();
2828 	}
2829 
2830 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2831 					    trace_ctx);
2832 	/*
2833 	 * If tracing is off, but we have triggers enabled
2834 	 * we still need to look at the event data. Use the temp_buffer
2835 	 * to store the trace event for the trigger to use. It's recursive
2836 	 * safe and will not be recorded anywhere.
2837 	 */
2838 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2839 		*current_rb = temp_buffer;
2840 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2841 						    trace_ctx);
2842 	}
2843 	return entry;
2844 }
2845 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2846 
2847 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2848 static DEFINE_MUTEX(tracepoint_printk_mutex);
2849 
output_printk(struct trace_event_buffer * fbuffer)2850 static void output_printk(struct trace_event_buffer *fbuffer)
2851 {
2852 	struct trace_event_call *event_call;
2853 	struct trace_event_file *file;
2854 	struct trace_event *event;
2855 	unsigned long flags;
2856 	struct trace_iterator *iter = tracepoint_print_iter;
2857 
2858 	/* We should never get here if iter is NULL */
2859 	if (WARN_ON_ONCE(!iter))
2860 		return;
2861 
2862 	event_call = fbuffer->trace_file->event_call;
2863 	if (!event_call || !event_call->event.funcs ||
2864 	    !event_call->event.funcs->trace)
2865 		return;
2866 
2867 	file = fbuffer->trace_file;
2868 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2869 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2870 	     !filter_match_preds(file->filter, fbuffer->entry)))
2871 		return;
2872 
2873 	event = &fbuffer->trace_file->event_call->event;
2874 
2875 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2876 	trace_seq_init(&iter->seq);
2877 	iter->ent = fbuffer->entry;
2878 	event_call->event.funcs->trace(iter, 0, event);
2879 	trace_seq_putc(&iter->seq, 0);
2880 	printk("%s", iter->seq.buffer);
2881 
2882 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2883 }
2884 
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2885 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2886 			     void *buffer, size_t *lenp,
2887 			     loff_t *ppos)
2888 {
2889 	int save_tracepoint_printk;
2890 	int ret;
2891 
2892 	guard(mutex)(&tracepoint_printk_mutex);
2893 	save_tracepoint_printk = tracepoint_printk;
2894 
2895 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2896 
2897 	/*
2898 	 * This will force exiting early, as tracepoint_printk
2899 	 * is always zero when tracepoint_printk_iter is not allocated
2900 	 */
2901 	if (!tracepoint_print_iter)
2902 		tracepoint_printk = 0;
2903 
2904 	if (save_tracepoint_printk == tracepoint_printk)
2905 		return ret;
2906 
2907 	if (tracepoint_printk)
2908 		static_key_enable(&tracepoint_printk_key.key);
2909 	else
2910 		static_key_disable(&tracepoint_printk_key.key);
2911 
2912 	return ret;
2913 }
2914 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2915 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2916 {
2917 	enum event_trigger_type tt = ETT_NONE;
2918 	struct trace_event_file *file = fbuffer->trace_file;
2919 
2920 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2921 			fbuffer->entry, &tt))
2922 		goto discard;
2923 
2924 	if (static_key_false(&tracepoint_printk_key.key))
2925 		output_printk(fbuffer);
2926 
2927 	if (static_branch_unlikely(&trace_event_exports_enabled))
2928 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2929 
2930 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2931 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2932 
2933 discard:
2934 	if (tt)
2935 		event_triggers_post_call(file, tt);
2936 
2937 }
2938 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2939 
2940 /*
2941  * Skip 3:
2942  *
2943  *   trace_buffer_unlock_commit_regs()
2944  *   trace_event_buffer_commit()
2945  *   trace_event_raw_event_xxx()
2946  */
2947 # define STACK_SKIP 3
2948 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2949 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2950 				     struct trace_buffer *buffer,
2951 				     struct ring_buffer_event *event,
2952 				     unsigned int trace_ctx,
2953 				     struct pt_regs *regs)
2954 {
2955 	__buffer_unlock_commit(buffer, event);
2956 
2957 	/*
2958 	 * If regs is not set, then skip the necessary functions.
2959 	 * Note, we can still get here via blktrace, wakeup tracer
2960 	 * and mmiotrace, but that's ok if they lose a function or
2961 	 * two. They are not that meaningful.
2962 	 */
2963 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2964 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2965 }
2966 
2967 /*
2968  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2969  */
2970 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2971 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2972 				   struct ring_buffer_event *event)
2973 {
2974 	__buffer_unlock_commit(buffer, event);
2975 }
2976 
2977 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2978 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2979 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2980 {
2981 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2982 	struct ring_buffer_event *event;
2983 	struct ftrace_entry *entry;
2984 	int size = sizeof(*entry);
2985 
2986 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2987 
2988 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2989 					    trace_ctx);
2990 	if (!event)
2991 		return;
2992 	entry	= ring_buffer_event_data(event);
2993 	entry->ip			= ip;
2994 	entry->parent_ip		= parent_ip;
2995 
2996 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2997 	if (fregs) {
2998 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2999 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
3000 	}
3001 #endif
3002 
3003 	if (static_branch_unlikely(&trace_function_exports_enabled))
3004 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3005 	__buffer_unlock_commit(buffer, event);
3006 }
3007 
3008 #ifdef CONFIG_STACKTRACE
3009 
3010 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3011 #define FTRACE_KSTACK_NESTING	4
3012 
3013 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
3014 
3015 struct ftrace_stack {
3016 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3017 };
3018 
3019 
3020 struct ftrace_stacks {
3021 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3022 };
3023 
3024 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3025 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3026 
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3027 static void __ftrace_trace_stack(struct trace_array *tr,
3028 				 struct trace_buffer *buffer,
3029 				 unsigned int trace_ctx,
3030 				 int skip, struct pt_regs *regs)
3031 {
3032 	struct ring_buffer_event *event;
3033 	unsigned int size, nr_entries;
3034 	struct ftrace_stack *fstack;
3035 	struct stack_entry *entry;
3036 	int stackidx;
3037 
3038 	/*
3039 	 * Add one, for this function and the call to save_stack_trace()
3040 	 * If regs is set, then these functions will not be in the way.
3041 	 */
3042 #ifndef CONFIG_UNWINDER_ORC
3043 	if (!regs)
3044 		skip++;
3045 #endif
3046 
3047 	preempt_disable_notrace();
3048 
3049 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3050 
3051 	/* This should never happen. If it does, yell once and skip */
3052 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3053 		goto out;
3054 
3055 	/*
3056 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3057 	 * interrupt will either see the value pre increment or post
3058 	 * increment. If the interrupt happens pre increment it will have
3059 	 * restored the counter when it returns.  We just need a barrier to
3060 	 * keep gcc from moving things around.
3061 	 */
3062 	barrier();
3063 
3064 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3065 	size = ARRAY_SIZE(fstack->calls);
3066 
3067 	if (regs) {
3068 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3069 						   size, skip);
3070 	} else {
3071 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3072 	}
3073 
3074 #ifdef CONFIG_DYNAMIC_FTRACE
3075 	/* Mark entry of stack trace as trampoline code */
3076 	if (tr->ops && tr->ops->trampoline) {
3077 		unsigned long tramp_start = tr->ops->trampoline;
3078 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3079 		unsigned long *calls = fstack->calls;
3080 
3081 		for (int i = 0; i < nr_entries; i++) {
3082 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
3083 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
3084 		}
3085 	}
3086 #endif
3087 
3088 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3089 				    struct_size(entry, caller, nr_entries),
3090 				    trace_ctx);
3091 	if (!event)
3092 		goto out;
3093 	entry = ring_buffer_event_data(event);
3094 
3095 	entry->size = nr_entries;
3096 	memcpy(&entry->caller, fstack->calls,
3097 	       flex_array_size(entry, caller, nr_entries));
3098 
3099 	__buffer_unlock_commit(buffer, event);
3100 
3101  out:
3102 	/* Again, don't let gcc optimize things here */
3103 	barrier();
3104 	__this_cpu_dec(ftrace_stack_reserve);
3105 	preempt_enable_notrace();
3106 
3107 }
3108 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3109 static inline void ftrace_trace_stack(struct trace_array *tr,
3110 				      struct trace_buffer *buffer,
3111 				      unsigned int trace_ctx,
3112 				      int skip, struct pt_regs *regs)
3113 {
3114 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3115 		return;
3116 
3117 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3118 }
3119 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3120 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3121 		   int skip)
3122 {
3123 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3124 
3125 	if (rcu_is_watching()) {
3126 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3127 		return;
3128 	}
3129 
3130 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3131 		return;
3132 
3133 	/*
3134 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3135 	 * but if the above rcu_is_watching() failed, then the NMI
3136 	 * triggered someplace critical, and ct_irq_enter() should
3137 	 * not be called from NMI.
3138 	 */
3139 	if (unlikely(in_nmi()))
3140 		return;
3141 
3142 	ct_irq_enter_irqson();
3143 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3144 	ct_irq_exit_irqson();
3145 }
3146 
3147 /**
3148  * trace_dump_stack - record a stack back trace in the trace buffer
3149  * @skip: Number of functions to skip (helper handlers)
3150  */
trace_dump_stack(int skip)3151 void trace_dump_stack(int skip)
3152 {
3153 	if (tracing_disabled || tracing_selftest_running)
3154 		return;
3155 
3156 #ifndef CONFIG_UNWINDER_ORC
3157 	/* Skip 1 to skip this function. */
3158 	skip++;
3159 #endif
3160 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3161 				tracing_gen_ctx(), skip, NULL);
3162 }
3163 EXPORT_SYMBOL_GPL(trace_dump_stack);
3164 
3165 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3166 static DEFINE_PER_CPU(int, user_stack_count);
3167 
3168 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3169 ftrace_trace_userstack(struct trace_array *tr,
3170 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3171 {
3172 	struct ring_buffer_event *event;
3173 	struct userstack_entry *entry;
3174 
3175 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3176 		return;
3177 
3178 	/*
3179 	 * NMIs can not handle page faults, even with fix ups.
3180 	 * The save user stack can (and often does) fault.
3181 	 */
3182 	if (unlikely(in_nmi()))
3183 		return;
3184 
3185 	/*
3186 	 * prevent recursion, since the user stack tracing may
3187 	 * trigger other kernel events.
3188 	 */
3189 	preempt_disable();
3190 	if (__this_cpu_read(user_stack_count))
3191 		goto out;
3192 
3193 	__this_cpu_inc(user_stack_count);
3194 
3195 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3196 					    sizeof(*entry), trace_ctx);
3197 	if (!event)
3198 		goto out_drop_count;
3199 	entry	= ring_buffer_event_data(event);
3200 
3201 	entry->tgid		= current->tgid;
3202 	memset(&entry->caller, 0, sizeof(entry->caller));
3203 
3204 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3205 	__buffer_unlock_commit(buffer, event);
3206 
3207  out_drop_count:
3208 	__this_cpu_dec(user_stack_count);
3209  out:
3210 	preempt_enable();
3211 }
3212 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3213 static void ftrace_trace_userstack(struct trace_array *tr,
3214 				   struct trace_buffer *buffer,
3215 				   unsigned int trace_ctx)
3216 {
3217 }
3218 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3219 
3220 #endif /* CONFIG_STACKTRACE */
3221 
3222 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3223 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3224 			  unsigned long long delta)
3225 {
3226 	entry->bottom_delta_ts = delta & U32_MAX;
3227 	entry->top_delta_ts = (delta >> 32);
3228 }
3229 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3230 void trace_last_func_repeats(struct trace_array *tr,
3231 			     struct trace_func_repeats *last_info,
3232 			     unsigned int trace_ctx)
3233 {
3234 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3235 	struct func_repeats_entry *entry;
3236 	struct ring_buffer_event *event;
3237 	u64 delta;
3238 
3239 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3240 					    sizeof(*entry), trace_ctx);
3241 	if (!event)
3242 		return;
3243 
3244 	delta = ring_buffer_event_time_stamp(buffer, event) -
3245 		last_info->ts_last_call;
3246 
3247 	entry = ring_buffer_event_data(event);
3248 	entry->ip = last_info->ip;
3249 	entry->parent_ip = last_info->parent_ip;
3250 	entry->count = last_info->count;
3251 	func_repeats_set_delta_ts(entry, delta);
3252 
3253 	__buffer_unlock_commit(buffer, event);
3254 }
3255 
3256 /* created for use with alloc_percpu */
3257 struct trace_buffer_struct {
3258 	int nesting;
3259 	char buffer[4][TRACE_BUF_SIZE];
3260 };
3261 
3262 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3263 
3264 /*
3265  * This allows for lockless recording.  If we're nested too deeply, then
3266  * this returns NULL.
3267  */
get_trace_buf(void)3268 static char *get_trace_buf(void)
3269 {
3270 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3271 
3272 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3273 		return NULL;
3274 
3275 	buffer->nesting++;
3276 
3277 	/* Interrupts must see nesting incremented before we use the buffer */
3278 	barrier();
3279 	return &buffer->buffer[buffer->nesting - 1][0];
3280 }
3281 
put_trace_buf(void)3282 static void put_trace_buf(void)
3283 {
3284 	/* Don't let the decrement of nesting leak before this */
3285 	barrier();
3286 	this_cpu_dec(trace_percpu_buffer->nesting);
3287 }
3288 
alloc_percpu_trace_buffer(void)3289 static int alloc_percpu_trace_buffer(void)
3290 {
3291 	struct trace_buffer_struct __percpu *buffers;
3292 
3293 	if (trace_percpu_buffer)
3294 		return 0;
3295 
3296 	buffers = alloc_percpu(struct trace_buffer_struct);
3297 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3298 		return -ENOMEM;
3299 
3300 	trace_percpu_buffer = buffers;
3301 	return 0;
3302 }
3303 
3304 static int buffers_allocated;
3305 
trace_printk_init_buffers(void)3306 void trace_printk_init_buffers(void)
3307 {
3308 	if (buffers_allocated)
3309 		return;
3310 
3311 	if (alloc_percpu_trace_buffer())
3312 		return;
3313 
3314 	/* trace_printk() is for debug use only. Don't use it in production. */
3315 
3316 	pr_warn("\n");
3317 	pr_warn("**********************************************************\n");
3318 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3319 	pr_warn("**                                                      **\n");
3320 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3321 	pr_warn("**                                                      **\n");
3322 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3323 	pr_warn("** unsafe for production use.                           **\n");
3324 	pr_warn("**                                                      **\n");
3325 	pr_warn("** If you see this message and you are not debugging    **\n");
3326 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3327 	pr_warn("**                                                      **\n");
3328 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3329 	pr_warn("**********************************************************\n");
3330 
3331 	/* Expand the buffers to set size */
3332 	tracing_update_buffers(&global_trace);
3333 
3334 	buffers_allocated = 1;
3335 
3336 	/*
3337 	 * trace_printk_init_buffers() can be called by modules.
3338 	 * If that happens, then we need to start cmdline recording
3339 	 * directly here. If the global_trace.buffer is already
3340 	 * allocated here, then this was called by module code.
3341 	 */
3342 	if (global_trace.array_buffer.buffer)
3343 		tracing_start_cmdline_record();
3344 }
3345 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3346 
trace_printk_start_comm(void)3347 void trace_printk_start_comm(void)
3348 {
3349 	/* Start tracing comms if trace printk is set */
3350 	if (!buffers_allocated)
3351 		return;
3352 	tracing_start_cmdline_record();
3353 }
3354 
trace_printk_start_stop_comm(int enabled)3355 static void trace_printk_start_stop_comm(int enabled)
3356 {
3357 	if (!buffers_allocated)
3358 		return;
3359 
3360 	if (enabled)
3361 		tracing_start_cmdline_record();
3362 	else
3363 		tracing_stop_cmdline_record();
3364 }
3365 
3366 /**
3367  * trace_vbprintk - write binary msg to tracing buffer
3368  * @ip:    The address of the caller
3369  * @fmt:   The string format to write to the buffer
3370  * @args:  Arguments for @fmt
3371  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3372 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3373 {
3374 	struct ring_buffer_event *event;
3375 	struct trace_buffer *buffer;
3376 	struct trace_array *tr = READ_ONCE(printk_trace);
3377 	struct bprint_entry *entry;
3378 	unsigned int trace_ctx;
3379 	char *tbuffer;
3380 	int len = 0, size;
3381 
3382 	if (!printk_binsafe(tr))
3383 		return trace_vprintk(ip, fmt, args);
3384 
3385 	if (unlikely(tracing_selftest_running || tracing_disabled))
3386 		return 0;
3387 
3388 	/* Don't pollute graph traces with trace_vprintk internals */
3389 	pause_graph_tracing();
3390 
3391 	trace_ctx = tracing_gen_ctx();
3392 	preempt_disable_notrace();
3393 
3394 	tbuffer = get_trace_buf();
3395 	if (!tbuffer) {
3396 		len = 0;
3397 		goto out_nobuffer;
3398 	}
3399 
3400 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3401 
3402 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3403 		goto out_put;
3404 
3405 	size = sizeof(*entry) + sizeof(u32) * len;
3406 	buffer = tr->array_buffer.buffer;
3407 	ring_buffer_nest_start(buffer);
3408 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3409 					    trace_ctx);
3410 	if (!event)
3411 		goto out;
3412 	entry = ring_buffer_event_data(event);
3413 	entry->ip			= ip;
3414 	entry->fmt			= fmt;
3415 
3416 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3417 	__buffer_unlock_commit(buffer, event);
3418 	ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3419 
3420 out:
3421 	ring_buffer_nest_end(buffer);
3422 out_put:
3423 	put_trace_buf();
3424 
3425 out_nobuffer:
3426 	preempt_enable_notrace();
3427 	unpause_graph_tracing();
3428 
3429 	return len;
3430 }
3431 EXPORT_SYMBOL_GPL(trace_vbprintk);
3432 
3433 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3434 int __trace_array_vprintk(struct trace_buffer *buffer,
3435 			  unsigned long ip, const char *fmt, va_list args)
3436 {
3437 	struct ring_buffer_event *event;
3438 	int len = 0, size;
3439 	struct print_entry *entry;
3440 	unsigned int trace_ctx;
3441 	char *tbuffer;
3442 
3443 	if (tracing_disabled)
3444 		return 0;
3445 
3446 	/* Don't pollute graph traces with trace_vprintk internals */
3447 	pause_graph_tracing();
3448 
3449 	trace_ctx = tracing_gen_ctx();
3450 	preempt_disable_notrace();
3451 
3452 
3453 	tbuffer = get_trace_buf();
3454 	if (!tbuffer) {
3455 		len = 0;
3456 		goto out_nobuffer;
3457 	}
3458 
3459 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3460 
3461 	size = sizeof(*entry) + len + 1;
3462 	ring_buffer_nest_start(buffer);
3463 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3464 					    trace_ctx);
3465 	if (!event)
3466 		goto out;
3467 	entry = ring_buffer_event_data(event);
3468 	entry->ip = ip;
3469 
3470 	memcpy(&entry->buf, tbuffer, len + 1);
3471 	__buffer_unlock_commit(buffer, event);
3472 	ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3473 
3474 out:
3475 	ring_buffer_nest_end(buffer);
3476 	put_trace_buf();
3477 
3478 out_nobuffer:
3479 	preempt_enable_notrace();
3480 	unpause_graph_tracing();
3481 
3482 	return len;
3483 }
3484 
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3485 int trace_array_vprintk(struct trace_array *tr,
3486 			unsigned long ip, const char *fmt, va_list args)
3487 {
3488 	if (tracing_selftest_running && tr == &global_trace)
3489 		return 0;
3490 
3491 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3492 }
3493 
3494 /**
3495  * trace_array_printk - Print a message to a specific instance
3496  * @tr: The instance trace_array descriptor
3497  * @ip: The instruction pointer that this is called from.
3498  * @fmt: The format to print (printf format)
3499  *
3500  * If a subsystem sets up its own instance, they have the right to
3501  * printk strings into their tracing instance buffer using this
3502  * function. Note, this function will not write into the top level
3503  * buffer (use trace_printk() for that), as writing into the top level
3504  * buffer should only have events that can be individually disabled.
3505  * trace_printk() is only used for debugging a kernel, and should not
3506  * be ever incorporated in normal use.
3507  *
3508  * trace_array_printk() can be used, as it will not add noise to the
3509  * top level tracing buffer.
3510  *
3511  * Note, trace_array_init_printk() must be called on @tr before this
3512  * can be used.
3513  */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3514 int trace_array_printk(struct trace_array *tr,
3515 		       unsigned long ip, const char *fmt, ...)
3516 {
3517 	int ret;
3518 	va_list ap;
3519 
3520 	if (!tr)
3521 		return -ENOENT;
3522 
3523 	/* This is only allowed for created instances */
3524 	if (tr == &global_trace)
3525 		return 0;
3526 
3527 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3528 		return 0;
3529 
3530 	va_start(ap, fmt);
3531 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3532 	va_end(ap);
3533 	return ret;
3534 }
3535 EXPORT_SYMBOL_GPL(trace_array_printk);
3536 
3537 /**
3538  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3539  * @tr: The trace array to initialize the buffers for
3540  *
3541  * As trace_array_printk() only writes into instances, they are OK to
3542  * have in the kernel (unlike trace_printk()). This needs to be called
3543  * before trace_array_printk() can be used on a trace_array.
3544  */
trace_array_init_printk(struct trace_array * tr)3545 int trace_array_init_printk(struct trace_array *tr)
3546 {
3547 	if (!tr)
3548 		return -ENOENT;
3549 
3550 	/* This is only allowed for created instances */
3551 	if (tr == &global_trace)
3552 		return -EINVAL;
3553 
3554 	return alloc_percpu_trace_buffer();
3555 }
3556 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3557 
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3558 int trace_array_printk_buf(struct trace_buffer *buffer,
3559 			   unsigned long ip, const char *fmt, ...)
3560 {
3561 	int ret;
3562 	va_list ap;
3563 
3564 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3565 		return 0;
3566 
3567 	va_start(ap, fmt);
3568 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3569 	va_end(ap);
3570 	return ret;
3571 }
3572 
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3573 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3574 {
3575 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3576 }
3577 EXPORT_SYMBOL_GPL(trace_vprintk);
3578 
trace_iterator_increment(struct trace_iterator * iter)3579 static void trace_iterator_increment(struct trace_iterator *iter)
3580 {
3581 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3582 
3583 	iter->idx++;
3584 	if (buf_iter)
3585 		ring_buffer_iter_advance(buf_iter);
3586 }
3587 
3588 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3589 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3590 		unsigned long *lost_events)
3591 {
3592 	struct ring_buffer_event *event;
3593 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3594 
3595 	if (buf_iter) {
3596 		event = ring_buffer_iter_peek(buf_iter, ts);
3597 		if (lost_events)
3598 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3599 				(unsigned long)-1 : 0;
3600 	} else {
3601 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3602 					 lost_events);
3603 	}
3604 
3605 	if (event) {
3606 		iter->ent_size = ring_buffer_event_length(event);
3607 		return ring_buffer_event_data(event);
3608 	}
3609 	iter->ent_size = 0;
3610 	return NULL;
3611 }
3612 
3613 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3614 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3615 		  unsigned long *missing_events, u64 *ent_ts)
3616 {
3617 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3618 	struct trace_entry *ent, *next = NULL;
3619 	unsigned long lost_events = 0, next_lost = 0;
3620 	int cpu_file = iter->cpu_file;
3621 	u64 next_ts = 0, ts;
3622 	int next_cpu = -1;
3623 	int next_size = 0;
3624 	int cpu;
3625 
3626 	/*
3627 	 * If we are in a per_cpu trace file, don't bother by iterating over
3628 	 * all cpu and peek directly.
3629 	 */
3630 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3631 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3632 			return NULL;
3633 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3634 		if (ent_cpu)
3635 			*ent_cpu = cpu_file;
3636 
3637 		return ent;
3638 	}
3639 
3640 	for_each_tracing_cpu(cpu) {
3641 
3642 		if (ring_buffer_empty_cpu(buffer, cpu))
3643 			continue;
3644 
3645 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3646 
3647 		/*
3648 		 * Pick the entry with the smallest timestamp:
3649 		 */
3650 		if (ent && (!next || ts < next_ts)) {
3651 			next = ent;
3652 			next_cpu = cpu;
3653 			next_ts = ts;
3654 			next_lost = lost_events;
3655 			next_size = iter->ent_size;
3656 		}
3657 	}
3658 
3659 	iter->ent_size = next_size;
3660 
3661 	if (ent_cpu)
3662 		*ent_cpu = next_cpu;
3663 
3664 	if (ent_ts)
3665 		*ent_ts = next_ts;
3666 
3667 	if (missing_events)
3668 		*missing_events = next_lost;
3669 
3670 	return next;
3671 }
3672 
3673 #define STATIC_FMT_BUF_SIZE	128
3674 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3675 
trace_iter_expand_format(struct trace_iterator * iter)3676 char *trace_iter_expand_format(struct trace_iterator *iter)
3677 {
3678 	char *tmp;
3679 
3680 	/*
3681 	 * iter->tr is NULL when used with tp_printk, which makes
3682 	 * this get called where it is not safe to call krealloc().
3683 	 */
3684 	if (!iter->tr || iter->fmt == static_fmt_buf)
3685 		return NULL;
3686 
3687 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3688 		       GFP_KERNEL);
3689 	if (tmp) {
3690 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3691 		iter->fmt = tmp;
3692 	}
3693 
3694 	return tmp;
3695 }
3696 
3697 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3698 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3699 {
3700 	unsigned long addr = (unsigned long)str;
3701 	struct trace_event *trace_event;
3702 	struct trace_event_call *event;
3703 
3704 	/* OK if part of the event data */
3705 	if ((addr >= (unsigned long)iter->ent) &&
3706 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3707 		return true;
3708 
3709 	/* OK if part of the temp seq buffer */
3710 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3711 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3712 		return true;
3713 
3714 	/* Core rodata can not be freed */
3715 	if (is_kernel_rodata(addr))
3716 		return true;
3717 
3718 	if (trace_is_tracepoint_string(str))
3719 		return true;
3720 
3721 	/*
3722 	 * Now this could be a module event, referencing core module
3723 	 * data, which is OK.
3724 	 */
3725 	if (!iter->ent)
3726 		return false;
3727 
3728 	trace_event = ftrace_find_event(iter->ent->type);
3729 	if (!trace_event)
3730 		return false;
3731 
3732 	event = container_of(trace_event, struct trace_event_call, event);
3733 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3734 		return false;
3735 
3736 	/* Would rather have rodata, but this will suffice */
3737 	if (within_module_core(addr, event->module))
3738 		return true;
3739 
3740 	return false;
3741 }
3742 
3743 /**
3744  * ignore_event - Check dereferenced fields while writing to the seq buffer
3745  * @iter: The iterator that holds the seq buffer and the event being printed
3746  *
3747  * At boot up, test_event_printk() will flag any event that dereferences
3748  * a string with "%s" that does exist in the ring buffer. It may still
3749  * be valid, as the string may point to a static string in the kernel
3750  * rodata that never gets freed. But if the string pointer is pointing
3751  * to something that was allocated, there's a chance that it can be freed
3752  * by the time the user reads the trace. This would cause a bad memory
3753  * access by the kernel and possibly crash the system.
3754  *
3755  * This function will check if the event has any fields flagged as needing
3756  * to be checked at runtime and perform those checks.
3757  *
3758  * If it is found that a field is unsafe, it will write into the @iter->seq
3759  * a message stating what was found to be unsafe.
3760  *
3761  * @return: true if the event is unsafe and should be ignored,
3762  *          false otherwise.
3763  */
ignore_event(struct trace_iterator * iter)3764 bool ignore_event(struct trace_iterator *iter)
3765 {
3766 	struct ftrace_event_field *field;
3767 	struct trace_event *trace_event;
3768 	struct trace_event_call *event;
3769 	struct list_head *head;
3770 	struct trace_seq *seq;
3771 	const void *ptr;
3772 
3773 	trace_event = ftrace_find_event(iter->ent->type);
3774 
3775 	seq = &iter->seq;
3776 
3777 	if (!trace_event) {
3778 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3779 		return true;
3780 	}
3781 
3782 	event = container_of(trace_event, struct trace_event_call, event);
3783 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3784 		return false;
3785 
3786 	head = trace_get_fields(event);
3787 	if (!head) {
3788 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3789 				 trace_event_name(event));
3790 		return true;
3791 	}
3792 
3793 	/* Offsets are from the iter->ent that points to the raw event */
3794 	ptr = iter->ent;
3795 
3796 	list_for_each_entry(field, head, link) {
3797 		const char *str;
3798 		bool good;
3799 
3800 		if (!field->needs_test)
3801 			continue;
3802 
3803 		str = *(const char **)(ptr + field->offset);
3804 
3805 		good = trace_safe_str(iter, str);
3806 
3807 		/*
3808 		 * If you hit this warning, it is likely that the
3809 		 * trace event in question used %s on a string that
3810 		 * was saved at the time of the event, but may not be
3811 		 * around when the trace is read. Use __string(),
3812 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3813 		 * instead. See samples/trace_events/trace-events-sample.h
3814 		 * for reference.
3815 		 */
3816 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3817 			      trace_event_name(event), field->name)) {
3818 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3819 					 trace_event_name(event), field->name);
3820 			return true;
3821 		}
3822 	}
3823 	return false;
3824 }
3825 
trace_event_format(struct trace_iterator * iter,const char * fmt)3826 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3827 {
3828 	const char *p, *new_fmt;
3829 	char *q;
3830 
3831 	if (WARN_ON_ONCE(!fmt))
3832 		return fmt;
3833 
3834 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3835 		return fmt;
3836 
3837 	p = fmt;
3838 	new_fmt = q = iter->fmt;
3839 	while (*p) {
3840 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3841 			if (!trace_iter_expand_format(iter))
3842 				return fmt;
3843 
3844 			q += iter->fmt - new_fmt;
3845 			new_fmt = iter->fmt;
3846 		}
3847 
3848 		*q++ = *p++;
3849 
3850 		/* Replace %p with %px */
3851 		if (p[-1] == '%') {
3852 			if (p[0] == '%') {
3853 				*q++ = *p++;
3854 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3855 				*q++ = *p++;
3856 				*q++ = 'x';
3857 			}
3858 		}
3859 	}
3860 	*q = '\0';
3861 
3862 	return new_fmt;
3863 }
3864 
3865 #define STATIC_TEMP_BUF_SIZE	128
3866 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3867 
3868 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3869 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3870 					  int *ent_cpu, u64 *ent_ts)
3871 {
3872 	/* __find_next_entry will reset ent_size */
3873 	int ent_size = iter->ent_size;
3874 	struct trace_entry *entry;
3875 
3876 	/*
3877 	 * If called from ftrace_dump(), then the iter->temp buffer
3878 	 * will be the static_temp_buf and not created from kmalloc.
3879 	 * If the entry size is greater than the buffer, we can
3880 	 * not save it. Just return NULL in that case. This is only
3881 	 * used to add markers when two consecutive events' time
3882 	 * stamps have a large delta. See trace_print_lat_context()
3883 	 */
3884 	if (iter->temp == static_temp_buf &&
3885 	    STATIC_TEMP_BUF_SIZE < ent_size)
3886 		return NULL;
3887 
3888 	/*
3889 	 * The __find_next_entry() may call peek_next_entry(), which may
3890 	 * call ring_buffer_peek() that may make the contents of iter->ent
3891 	 * undefined. Need to copy iter->ent now.
3892 	 */
3893 	if (iter->ent && iter->ent != iter->temp) {
3894 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3895 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3896 			void *temp;
3897 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3898 			if (!temp)
3899 				return NULL;
3900 			kfree(iter->temp);
3901 			iter->temp = temp;
3902 			iter->temp_size = iter->ent_size;
3903 		}
3904 		memcpy(iter->temp, iter->ent, iter->ent_size);
3905 		iter->ent = iter->temp;
3906 	}
3907 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3908 	/* Put back the original ent_size */
3909 	iter->ent_size = ent_size;
3910 
3911 	return entry;
3912 }
3913 
3914 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3915 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3916 {
3917 	iter->ent = __find_next_entry(iter, &iter->cpu,
3918 				      &iter->lost_events, &iter->ts);
3919 
3920 	if (iter->ent)
3921 		trace_iterator_increment(iter);
3922 
3923 	return iter->ent ? iter : NULL;
3924 }
3925 
trace_consume(struct trace_iterator * iter)3926 static void trace_consume(struct trace_iterator *iter)
3927 {
3928 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3929 			    &iter->lost_events);
3930 }
3931 
s_next(struct seq_file * m,void * v,loff_t * pos)3932 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3933 {
3934 	struct trace_iterator *iter = m->private;
3935 	int i = (int)*pos;
3936 	void *ent;
3937 
3938 	WARN_ON_ONCE(iter->leftover);
3939 
3940 	(*pos)++;
3941 
3942 	/* can't go backwards */
3943 	if (iter->idx > i)
3944 		return NULL;
3945 
3946 	if (iter->idx < 0)
3947 		ent = trace_find_next_entry_inc(iter);
3948 	else
3949 		ent = iter;
3950 
3951 	while (ent && iter->idx < i)
3952 		ent = trace_find_next_entry_inc(iter);
3953 
3954 	iter->pos = *pos;
3955 
3956 	return ent;
3957 }
3958 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3959 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3960 {
3961 	struct ring_buffer_iter *buf_iter;
3962 	unsigned long entries = 0;
3963 	u64 ts;
3964 
3965 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3966 
3967 	buf_iter = trace_buffer_iter(iter, cpu);
3968 	if (!buf_iter)
3969 		return;
3970 
3971 	ring_buffer_iter_reset(buf_iter);
3972 
3973 	/*
3974 	 * We could have the case with the max latency tracers
3975 	 * that a reset never took place on a cpu. This is evident
3976 	 * by the timestamp being before the start of the buffer.
3977 	 */
3978 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3979 		if (ts >= iter->array_buffer->time_start)
3980 			break;
3981 		entries++;
3982 		ring_buffer_iter_advance(buf_iter);
3983 		/* This could be a big loop */
3984 		cond_resched();
3985 	}
3986 
3987 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3988 }
3989 
3990 /*
3991  * The current tracer is copied to avoid a global locking
3992  * all around.
3993  */
s_start(struct seq_file * m,loff_t * pos)3994 static void *s_start(struct seq_file *m, loff_t *pos)
3995 {
3996 	struct trace_iterator *iter = m->private;
3997 	struct trace_array *tr = iter->tr;
3998 	int cpu_file = iter->cpu_file;
3999 	void *p = NULL;
4000 	loff_t l = 0;
4001 	int cpu;
4002 
4003 	mutex_lock(&trace_types_lock);
4004 	if (unlikely(tr->current_trace != iter->trace)) {
4005 		/* Close iter->trace before switching to the new current tracer */
4006 		if (iter->trace->close)
4007 			iter->trace->close(iter);
4008 		iter->trace = tr->current_trace;
4009 		/* Reopen the new current tracer */
4010 		if (iter->trace->open)
4011 			iter->trace->open(iter);
4012 	}
4013 	mutex_unlock(&trace_types_lock);
4014 
4015 #ifdef CONFIG_TRACER_MAX_TRACE
4016 	if (iter->snapshot && iter->trace->use_max_tr)
4017 		return ERR_PTR(-EBUSY);
4018 #endif
4019 
4020 	if (*pos != iter->pos) {
4021 		iter->ent = NULL;
4022 		iter->cpu = 0;
4023 		iter->idx = -1;
4024 
4025 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4026 			for_each_tracing_cpu(cpu)
4027 				tracing_iter_reset(iter, cpu);
4028 		} else
4029 			tracing_iter_reset(iter, cpu_file);
4030 
4031 		iter->leftover = 0;
4032 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4033 			;
4034 
4035 	} else {
4036 		/*
4037 		 * If we overflowed the seq_file before, then we want
4038 		 * to just reuse the trace_seq buffer again.
4039 		 */
4040 		if (iter->leftover)
4041 			p = iter;
4042 		else {
4043 			l = *pos - 1;
4044 			p = s_next(m, p, &l);
4045 		}
4046 	}
4047 
4048 	trace_event_read_lock();
4049 	trace_access_lock(cpu_file);
4050 	return p;
4051 }
4052 
s_stop(struct seq_file * m,void * p)4053 static void s_stop(struct seq_file *m, void *p)
4054 {
4055 	struct trace_iterator *iter = m->private;
4056 
4057 #ifdef CONFIG_TRACER_MAX_TRACE
4058 	if (iter->snapshot && iter->trace->use_max_tr)
4059 		return;
4060 #endif
4061 
4062 	trace_access_unlock(iter->cpu_file);
4063 	trace_event_read_unlock();
4064 }
4065 
4066 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4067 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4068 		      unsigned long *entries, int cpu)
4069 {
4070 	unsigned long count;
4071 
4072 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4073 	/*
4074 	 * If this buffer has skipped entries, then we hold all
4075 	 * entries for the trace and we need to ignore the
4076 	 * ones before the time stamp.
4077 	 */
4078 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4079 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4080 		/* total is the same as the entries */
4081 		*total = count;
4082 	} else
4083 		*total = count +
4084 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4085 	*entries = count;
4086 }
4087 
4088 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4089 get_total_entries(struct array_buffer *buf,
4090 		  unsigned long *total, unsigned long *entries)
4091 {
4092 	unsigned long t, e;
4093 	int cpu;
4094 
4095 	*total = 0;
4096 	*entries = 0;
4097 
4098 	for_each_tracing_cpu(cpu) {
4099 		get_total_entries_cpu(buf, &t, &e, cpu);
4100 		*total += t;
4101 		*entries += e;
4102 	}
4103 }
4104 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4105 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4106 {
4107 	unsigned long total, entries;
4108 
4109 	if (!tr)
4110 		tr = &global_trace;
4111 
4112 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4113 
4114 	return entries;
4115 }
4116 
trace_total_entries(struct trace_array * tr)4117 unsigned long trace_total_entries(struct trace_array *tr)
4118 {
4119 	unsigned long total, entries;
4120 
4121 	if (!tr)
4122 		tr = &global_trace;
4123 
4124 	get_total_entries(&tr->array_buffer, &total, &entries);
4125 
4126 	return entries;
4127 }
4128 
print_lat_help_header(struct seq_file * m)4129 static void print_lat_help_header(struct seq_file *m)
4130 {
4131 	seq_puts(m, "#                    _------=> CPU#            \n"
4132 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4133 		    "#                  | / _----=> need-resched    \n"
4134 		    "#                  || / _---=> hardirq/softirq \n"
4135 		    "#                  ||| / _--=> preempt-depth   \n"
4136 		    "#                  |||| / _-=> migrate-disable \n"
4137 		    "#                  ||||| /     delay           \n"
4138 		    "#  cmd     pid     |||||| time  |   caller     \n"
4139 		    "#     \\   /        ||||||  \\    |    /       \n");
4140 }
4141 
print_event_info(struct array_buffer * buf,struct seq_file * m)4142 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4143 {
4144 	unsigned long total;
4145 	unsigned long entries;
4146 
4147 	get_total_entries(buf, &total, &entries);
4148 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4149 		   entries, total, num_online_cpus());
4150 	seq_puts(m, "#\n");
4151 }
4152 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4153 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4154 				   unsigned int flags)
4155 {
4156 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4157 
4158 	print_event_info(buf, m);
4159 
4160 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4161 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4162 }
4163 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4164 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4165 				       unsigned int flags)
4166 {
4167 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4168 	static const char space[] = "            ";
4169 	int prec = tgid ? 12 : 2;
4170 
4171 	print_event_info(buf, m);
4172 
4173 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4174 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4175 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4176 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4177 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4178 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4179 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4180 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4181 }
4182 
4183 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4184 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4185 {
4186 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4187 	struct array_buffer *buf = iter->array_buffer;
4188 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4189 	struct tracer *type = iter->trace;
4190 	unsigned long entries;
4191 	unsigned long total;
4192 	const char *name = type->name;
4193 
4194 	get_total_entries(buf, &total, &entries);
4195 
4196 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4197 		   name, init_utsname()->release);
4198 	seq_puts(m, "# -----------------------------------"
4199 		 "---------------------------------\n");
4200 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4201 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4202 		   nsecs_to_usecs(data->saved_latency),
4203 		   entries,
4204 		   total,
4205 		   buf->cpu,
4206 		   preempt_model_str(),
4207 		   /* These are reserved for later use */
4208 		   0, 0, 0, 0);
4209 #ifdef CONFIG_SMP
4210 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4211 #else
4212 	seq_puts(m, ")\n");
4213 #endif
4214 	seq_puts(m, "#    -----------------\n");
4215 	seq_printf(m, "#    | task: %.16s-%d "
4216 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4217 		   data->comm, data->pid,
4218 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4219 		   data->policy, data->rt_priority);
4220 	seq_puts(m, "#    -----------------\n");
4221 
4222 	if (data->critical_start) {
4223 		seq_puts(m, "#  => started at: ");
4224 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4225 		trace_print_seq(m, &iter->seq);
4226 		seq_puts(m, "\n#  => ended at:   ");
4227 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4228 		trace_print_seq(m, &iter->seq);
4229 		seq_puts(m, "\n#\n");
4230 	}
4231 
4232 	seq_puts(m, "#\n");
4233 }
4234 
test_cpu_buff_start(struct trace_iterator * iter)4235 static void test_cpu_buff_start(struct trace_iterator *iter)
4236 {
4237 	struct trace_seq *s = &iter->seq;
4238 	struct trace_array *tr = iter->tr;
4239 
4240 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4241 		return;
4242 
4243 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4244 		return;
4245 
4246 	if (cpumask_available(iter->started) &&
4247 	    cpumask_test_cpu(iter->cpu, iter->started))
4248 		return;
4249 
4250 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4251 		return;
4252 
4253 	if (cpumask_available(iter->started))
4254 		cpumask_set_cpu(iter->cpu, iter->started);
4255 
4256 	/* Don't print started cpu buffer for the first entry of the trace */
4257 	if (iter->idx > 1)
4258 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4259 				iter->cpu);
4260 }
4261 
print_trace_fmt(struct trace_iterator * iter)4262 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4263 {
4264 	struct trace_array *tr = iter->tr;
4265 	struct trace_seq *s = &iter->seq;
4266 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4267 	struct trace_entry *entry;
4268 	struct trace_event *event;
4269 
4270 	entry = iter->ent;
4271 
4272 	test_cpu_buff_start(iter);
4273 
4274 	event = ftrace_find_event(entry->type);
4275 
4276 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4277 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4278 			trace_print_lat_context(iter);
4279 		else
4280 			trace_print_context(iter);
4281 	}
4282 
4283 	if (trace_seq_has_overflowed(s))
4284 		return TRACE_TYPE_PARTIAL_LINE;
4285 
4286 	if (event) {
4287 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4288 			return print_event_fields(iter, event);
4289 		/*
4290 		 * For TRACE_EVENT() events, the print_fmt is not
4291 		 * safe to use if the array has delta offsets
4292 		 * Force printing via the fields.
4293 		 */
4294 		if ((tr->text_delta) &&
4295 		    event->type > __TRACE_LAST_TYPE)
4296 			return print_event_fields(iter, event);
4297 
4298 		return event->funcs->trace(iter, sym_flags, event);
4299 	}
4300 
4301 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4302 
4303 	return trace_handle_return(s);
4304 }
4305 
print_raw_fmt(struct trace_iterator * iter)4306 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4307 {
4308 	struct trace_array *tr = iter->tr;
4309 	struct trace_seq *s = &iter->seq;
4310 	struct trace_entry *entry;
4311 	struct trace_event *event;
4312 
4313 	entry = iter->ent;
4314 
4315 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4316 		trace_seq_printf(s, "%d %d %llu ",
4317 				 entry->pid, iter->cpu, iter->ts);
4318 
4319 	if (trace_seq_has_overflowed(s))
4320 		return TRACE_TYPE_PARTIAL_LINE;
4321 
4322 	event = ftrace_find_event(entry->type);
4323 	if (event)
4324 		return event->funcs->raw(iter, 0, event);
4325 
4326 	trace_seq_printf(s, "%d ?\n", entry->type);
4327 
4328 	return trace_handle_return(s);
4329 }
4330 
print_hex_fmt(struct trace_iterator * iter)4331 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4332 {
4333 	struct trace_array *tr = iter->tr;
4334 	struct trace_seq *s = &iter->seq;
4335 	unsigned char newline = '\n';
4336 	struct trace_entry *entry;
4337 	struct trace_event *event;
4338 
4339 	entry = iter->ent;
4340 
4341 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4342 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4343 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4344 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4345 		if (trace_seq_has_overflowed(s))
4346 			return TRACE_TYPE_PARTIAL_LINE;
4347 	}
4348 
4349 	event = ftrace_find_event(entry->type);
4350 	if (event) {
4351 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4352 		if (ret != TRACE_TYPE_HANDLED)
4353 			return ret;
4354 	}
4355 
4356 	SEQ_PUT_FIELD(s, newline);
4357 
4358 	return trace_handle_return(s);
4359 }
4360 
print_bin_fmt(struct trace_iterator * iter)4361 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4362 {
4363 	struct trace_array *tr = iter->tr;
4364 	struct trace_seq *s = &iter->seq;
4365 	struct trace_entry *entry;
4366 	struct trace_event *event;
4367 
4368 	entry = iter->ent;
4369 
4370 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4371 		SEQ_PUT_FIELD(s, entry->pid);
4372 		SEQ_PUT_FIELD(s, iter->cpu);
4373 		SEQ_PUT_FIELD(s, iter->ts);
4374 		if (trace_seq_has_overflowed(s))
4375 			return TRACE_TYPE_PARTIAL_LINE;
4376 	}
4377 
4378 	event = ftrace_find_event(entry->type);
4379 	return event ? event->funcs->binary(iter, 0, event) :
4380 		TRACE_TYPE_HANDLED;
4381 }
4382 
trace_empty(struct trace_iterator * iter)4383 int trace_empty(struct trace_iterator *iter)
4384 {
4385 	struct ring_buffer_iter *buf_iter;
4386 	int cpu;
4387 
4388 	/* If we are looking at one CPU buffer, only check that one */
4389 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4390 		cpu = iter->cpu_file;
4391 		buf_iter = trace_buffer_iter(iter, cpu);
4392 		if (buf_iter) {
4393 			if (!ring_buffer_iter_empty(buf_iter))
4394 				return 0;
4395 		} else {
4396 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4397 				return 0;
4398 		}
4399 		return 1;
4400 	}
4401 
4402 	for_each_tracing_cpu(cpu) {
4403 		buf_iter = trace_buffer_iter(iter, cpu);
4404 		if (buf_iter) {
4405 			if (!ring_buffer_iter_empty(buf_iter))
4406 				return 0;
4407 		} else {
4408 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4409 				return 0;
4410 		}
4411 	}
4412 
4413 	return 1;
4414 }
4415 
4416 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4417 enum print_line_t print_trace_line(struct trace_iterator *iter)
4418 {
4419 	struct trace_array *tr = iter->tr;
4420 	unsigned long trace_flags = tr->trace_flags;
4421 	enum print_line_t ret;
4422 
4423 	if (iter->lost_events) {
4424 		if (iter->lost_events == (unsigned long)-1)
4425 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4426 					 iter->cpu);
4427 		else
4428 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4429 					 iter->cpu, iter->lost_events);
4430 		if (trace_seq_has_overflowed(&iter->seq))
4431 			return TRACE_TYPE_PARTIAL_LINE;
4432 	}
4433 
4434 	if (iter->trace && iter->trace->print_line) {
4435 		ret = iter->trace->print_line(iter);
4436 		if (ret != TRACE_TYPE_UNHANDLED)
4437 			return ret;
4438 	}
4439 
4440 	if (iter->ent->type == TRACE_BPUTS &&
4441 			trace_flags & TRACE_ITER_PRINTK &&
4442 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4443 		return trace_print_bputs_msg_only(iter);
4444 
4445 	if (iter->ent->type == TRACE_BPRINT &&
4446 			trace_flags & TRACE_ITER_PRINTK &&
4447 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4448 		return trace_print_bprintk_msg_only(iter);
4449 
4450 	if (iter->ent->type == TRACE_PRINT &&
4451 			trace_flags & TRACE_ITER_PRINTK &&
4452 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4453 		return trace_print_printk_msg_only(iter);
4454 
4455 	if (trace_flags & TRACE_ITER_BIN)
4456 		return print_bin_fmt(iter);
4457 
4458 	if (trace_flags & TRACE_ITER_HEX)
4459 		return print_hex_fmt(iter);
4460 
4461 	if (trace_flags & TRACE_ITER_RAW)
4462 		return print_raw_fmt(iter);
4463 
4464 	return print_trace_fmt(iter);
4465 }
4466 
trace_latency_header(struct seq_file * m)4467 void trace_latency_header(struct seq_file *m)
4468 {
4469 	struct trace_iterator *iter = m->private;
4470 	struct trace_array *tr = iter->tr;
4471 
4472 	/* print nothing if the buffers are empty */
4473 	if (trace_empty(iter))
4474 		return;
4475 
4476 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4477 		print_trace_header(m, iter);
4478 
4479 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4480 		print_lat_help_header(m);
4481 }
4482 
trace_default_header(struct seq_file * m)4483 void trace_default_header(struct seq_file *m)
4484 {
4485 	struct trace_iterator *iter = m->private;
4486 	struct trace_array *tr = iter->tr;
4487 	unsigned long trace_flags = tr->trace_flags;
4488 
4489 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4490 		return;
4491 
4492 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4493 		/* print nothing if the buffers are empty */
4494 		if (trace_empty(iter))
4495 			return;
4496 		print_trace_header(m, iter);
4497 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4498 			print_lat_help_header(m);
4499 	} else {
4500 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4501 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4502 				print_func_help_header_irq(iter->array_buffer,
4503 							   m, trace_flags);
4504 			else
4505 				print_func_help_header(iter->array_buffer, m,
4506 						       trace_flags);
4507 		}
4508 	}
4509 }
4510 
test_ftrace_alive(struct seq_file * m)4511 static void test_ftrace_alive(struct seq_file *m)
4512 {
4513 	if (!ftrace_is_dead())
4514 		return;
4515 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4516 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4517 }
4518 
4519 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4520 static void show_snapshot_main_help(struct seq_file *m)
4521 {
4522 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4523 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4524 		    "#                      Takes a snapshot of the main buffer.\n"
4525 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4526 		    "#                      (Doesn't have to be '2' works with any number that\n"
4527 		    "#                       is not a '0' or '1')\n");
4528 }
4529 
show_snapshot_percpu_help(struct seq_file * m)4530 static void show_snapshot_percpu_help(struct seq_file *m)
4531 {
4532 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4533 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4534 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4535 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4536 #else
4537 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4538 		    "#                     Must use main snapshot file to allocate.\n");
4539 #endif
4540 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4541 		    "#                      (Doesn't have to be '2' works with any number that\n"
4542 		    "#                       is not a '0' or '1')\n");
4543 }
4544 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4545 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4546 {
4547 	if (iter->tr->allocated_snapshot)
4548 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4549 	else
4550 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4551 
4552 	seq_puts(m, "# Snapshot commands:\n");
4553 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4554 		show_snapshot_main_help(m);
4555 	else
4556 		show_snapshot_percpu_help(m);
4557 }
4558 #else
4559 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4560 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4561 #endif
4562 
s_show(struct seq_file * m,void * v)4563 static int s_show(struct seq_file *m, void *v)
4564 {
4565 	struct trace_iterator *iter = v;
4566 	int ret;
4567 
4568 	if (iter->ent == NULL) {
4569 		if (iter->tr) {
4570 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4571 			seq_puts(m, "#\n");
4572 			test_ftrace_alive(m);
4573 		}
4574 		if (iter->snapshot && trace_empty(iter))
4575 			print_snapshot_help(m, iter);
4576 		else if (iter->trace && iter->trace->print_header)
4577 			iter->trace->print_header(m);
4578 		else
4579 			trace_default_header(m);
4580 
4581 	} else if (iter->leftover) {
4582 		/*
4583 		 * If we filled the seq_file buffer earlier, we
4584 		 * want to just show it now.
4585 		 */
4586 		ret = trace_print_seq(m, &iter->seq);
4587 
4588 		/* ret should this time be zero, but you never know */
4589 		iter->leftover = ret;
4590 
4591 	} else {
4592 		ret = print_trace_line(iter);
4593 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4594 			iter->seq.full = 0;
4595 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4596 		}
4597 		ret = trace_print_seq(m, &iter->seq);
4598 		/*
4599 		 * If we overflow the seq_file buffer, then it will
4600 		 * ask us for this data again at start up.
4601 		 * Use that instead.
4602 		 *  ret is 0 if seq_file write succeeded.
4603 		 *        -1 otherwise.
4604 		 */
4605 		iter->leftover = ret;
4606 	}
4607 
4608 	return 0;
4609 }
4610 
4611 /*
4612  * Should be used after trace_array_get(), trace_types_lock
4613  * ensures that i_cdev was already initialized.
4614  */
tracing_get_cpu(struct inode * inode)4615 static inline int tracing_get_cpu(struct inode *inode)
4616 {
4617 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4618 		return (long)inode->i_cdev - 1;
4619 	return RING_BUFFER_ALL_CPUS;
4620 }
4621 
4622 static const struct seq_operations tracer_seq_ops = {
4623 	.start		= s_start,
4624 	.next		= s_next,
4625 	.stop		= s_stop,
4626 	.show		= s_show,
4627 };
4628 
4629 /*
4630  * Note, as iter itself can be allocated and freed in different
4631  * ways, this function is only used to free its content, and not
4632  * the iterator itself. The only requirement to all the allocations
4633  * is that it must zero all fields (kzalloc), as freeing works with
4634  * ethier allocated content or NULL.
4635  */
free_trace_iter_content(struct trace_iterator * iter)4636 static void free_trace_iter_content(struct trace_iterator *iter)
4637 {
4638 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4639 	if (iter->fmt != static_fmt_buf)
4640 		kfree(iter->fmt);
4641 
4642 	kfree(iter->temp);
4643 	kfree(iter->buffer_iter);
4644 	mutex_destroy(&iter->mutex);
4645 	free_cpumask_var(iter->started);
4646 }
4647 
4648 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4649 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4650 {
4651 	struct trace_array *tr = inode->i_private;
4652 	struct trace_iterator *iter;
4653 	int cpu;
4654 
4655 	if (tracing_disabled)
4656 		return ERR_PTR(-ENODEV);
4657 
4658 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4659 	if (!iter)
4660 		return ERR_PTR(-ENOMEM);
4661 
4662 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4663 				    GFP_KERNEL);
4664 	if (!iter->buffer_iter)
4665 		goto release;
4666 
4667 	/*
4668 	 * trace_find_next_entry() may need to save off iter->ent.
4669 	 * It will place it into the iter->temp buffer. As most
4670 	 * events are less than 128, allocate a buffer of that size.
4671 	 * If one is greater, then trace_find_next_entry() will
4672 	 * allocate a new buffer to adjust for the bigger iter->ent.
4673 	 * It's not critical if it fails to get allocated here.
4674 	 */
4675 	iter->temp = kmalloc(128, GFP_KERNEL);
4676 	if (iter->temp)
4677 		iter->temp_size = 128;
4678 
4679 	/*
4680 	 * trace_event_printf() may need to modify given format
4681 	 * string to replace %p with %px so that it shows real address
4682 	 * instead of hash value. However, that is only for the event
4683 	 * tracing, other tracer may not need. Defer the allocation
4684 	 * until it is needed.
4685 	 */
4686 	iter->fmt = NULL;
4687 	iter->fmt_size = 0;
4688 
4689 	mutex_lock(&trace_types_lock);
4690 	iter->trace = tr->current_trace;
4691 
4692 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4693 		goto fail;
4694 
4695 	iter->tr = tr;
4696 
4697 #ifdef CONFIG_TRACER_MAX_TRACE
4698 	/* Currently only the top directory has a snapshot */
4699 	if (tr->current_trace->print_max || snapshot)
4700 		iter->array_buffer = &tr->max_buffer;
4701 	else
4702 #endif
4703 		iter->array_buffer = &tr->array_buffer;
4704 	iter->snapshot = snapshot;
4705 	iter->pos = -1;
4706 	iter->cpu_file = tracing_get_cpu(inode);
4707 	mutex_init(&iter->mutex);
4708 
4709 	/* Notify the tracer early; before we stop tracing. */
4710 	if (iter->trace->open)
4711 		iter->trace->open(iter);
4712 
4713 	/* Annotate start of buffers if we had overruns */
4714 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4715 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4716 
4717 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4718 	if (trace_clocks[tr->clock_id].in_ns)
4719 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4720 
4721 	/*
4722 	 * If pause-on-trace is enabled, then stop the trace while
4723 	 * dumping, unless this is the "snapshot" file
4724 	 */
4725 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4726 		tracing_stop_tr(tr);
4727 
4728 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4729 		for_each_tracing_cpu(cpu) {
4730 			iter->buffer_iter[cpu] =
4731 				ring_buffer_read_start(iter->array_buffer->buffer,
4732 						       cpu, GFP_KERNEL);
4733 			tracing_iter_reset(iter, cpu);
4734 		}
4735 	} else {
4736 		cpu = iter->cpu_file;
4737 		iter->buffer_iter[cpu] =
4738 			ring_buffer_read_start(iter->array_buffer->buffer,
4739 					       cpu, GFP_KERNEL);
4740 		tracing_iter_reset(iter, cpu);
4741 	}
4742 
4743 	mutex_unlock(&trace_types_lock);
4744 
4745 	return iter;
4746 
4747  fail:
4748 	mutex_unlock(&trace_types_lock);
4749 	free_trace_iter_content(iter);
4750 release:
4751 	seq_release_private(inode, file);
4752 	return ERR_PTR(-ENOMEM);
4753 }
4754 
tracing_open_generic(struct inode * inode,struct file * filp)4755 int tracing_open_generic(struct inode *inode, struct file *filp)
4756 {
4757 	int ret;
4758 
4759 	ret = tracing_check_open_get_tr(NULL);
4760 	if (ret)
4761 		return ret;
4762 
4763 	filp->private_data = inode->i_private;
4764 	return 0;
4765 }
4766 
tracing_is_disabled(void)4767 bool tracing_is_disabled(void)
4768 {
4769 	return (tracing_disabled) ? true: false;
4770 }
4771 
4772 /*
4773  * Open and update trace_array ref count.
4774  * Must have the current trace_array passed to it.
4775  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4776 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4777 {
4778 	struct trace_array *tr = inode->i_private;
4779 	int ret;
4780 
4781 	ret = tracing_check_open_get_tr(tr);
4782 	if (ret)
4783 		return ret;
4784 
4785 	filp->private_data = inode->i_private;
4786 
4787 	return 0;
4788 }
4789 
4790 /*
4791  * The private pointer of the inode is the trace_event_file.
4792  * Update the tr ref count associated to it.
4793  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4794 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4795 {
4796 	struct trace_event_file *file = inode->i_private;
4797 	int ret;
4798 
4799 	ret = tracing_check_open_get_tr(file->tr);
4800 	if (ret)
4801 		return ret;
4802 
4803 	mutex_lock(&event_mutex);
4804 
4805 	/* Fail if the file is marked for removal */
4806 	if (file->flags & EVENT_FILE_FL_FREED) {
4807 		trace_array_put(file->tr);
4808 		ret = -ENODEV;
4809 	} else {
4810 		event_file_get(file);
4811 	}
4812 
4813 	mutex_unlock(&event_mutex);
4814 	if (ret)
4815 		return ret;
4816 
4817 	filp->private_data = inode->i_private;
4818 
4819 	return 0;
4820 }
4821 
tracing_release_file_tr(struct inode * inode,struct file * filp)4822 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4823 {
4824 	struct trace_event_file *file = inode->i_private;
4825 
4826 	trace_array_put(file->tr);
4827 	event_file_put(file);
4828 
4829 	return 0;
4830 }
4831 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4832 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4833 {
4834 	tracing_release_file_tr(inode, filp);
4835 	return single_release(inode, filp);
4836 }
4837 
tracing_mark_open(struct inode * inode,struct file * filp)4838 static int tracing_mark_open(struct inode *inode, struct file *filp)
4839 {
4840 	stream_open(inode, filp);
4841 	return tracing_open_generic_tr(inode, filp);
4842 }
4843 
tracing_release(struct inode * inode,struct file * file)4844 static int tracing_release(struct inode *inode, struct file *file)
4845 {
4846 	struct trace_array *tr = inode->i_private;
4847 	struct seq_file *m = file->private_data;
4848 	struct trace_iterator *iter;
4849 	int cpu;
4850 
4851 	if (!(file->f_mode & FMODE_READ)) {
4852 		trace_array_put(tr);
4853 		return 0;
4854 	}
4855 
4856 	/* Writes do not use seq_file */
4857 	iter = m->private;
4858 	mutex_lock(&trace_types_lock);
4859 
4860 	for_each_tracing_cpu(cpu) {
4861 		if (iter->buffer_iter[cpu])
4862 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4863 	}
4864 
4865 	if (iter->trace && iter->trace->close)
4866 		iter->trace->close(iter);
4867 
4868 	if (!iter->snapshot && tr->stop_count)
4869 		/* reenable tracing if it was previously enabled */
4870 		tracing_start_tr(tr);
4871 
4872 	__trace_array_put(tr);
4873 
4874 	mutex_unlock(&trace_types_lock);
4875 
4876 	free_trace_iter_content(iter);
4877 	seq_release_private(inode, file);
4878 
4879 	return 0;
4880 }
4881 
tracing_release_generic_tr(struct inode * inode,struct file * file)4882 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4883 {
4884 	struct trace_array *tr = inode->i_private;
4885 
4886 	trace_array_put(tr);
4887 	return 0;
4888 }
4889 
tracing_single_release_tr(struct inode * inode,struct file * file)4890 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4891 {
4892 	struct trace_array *tr = inode->i_private;
4893 
4894 	trace_array_put(tr);
4895 
4896 	return single_release(inode, file);
4897 }
4898 
tracing_open(struct inode * inode,struct file * file)4899 static int tracing_open(struct inode *inode, struct file *file)
4900 {
4901 	struct trace_array *tr = inode->i_private;
4902 	struct trace_iterator *iter;
4903 	int ret;
4904 
4905 	ret = tracing_check_open_get_tr(tr);
4906 	if (ret)
4907 		return ret;
4908 
4909 	/* If this file was open for write, then erase contents */
4910 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4911 		int cpu = tracing_get_cpu(inode);
4912 		struct array_buffer *trace_buf = &tr->array_buffer;
4913 
4914 #ifdef CONFIG_TRACER_MAX_TRACE
4915 		if (tr->current_trace->print_max)
4916 			trace_buf = &tr->max_buffer;
4917 #endif
4918 
4919 		if (cpu == RING_BUFFER_ALL_CPUS)
4920 			tracing_reset_online_cpus(trace_buf);
4921 		else
4922 			tracing_reset_cpu(trace_buf, cpu);
4923 	}
4924 
4925 	if (file->f_mode & FMODE_READ) {
4926 		iter = __tracing_open(inode, file, false);
4927 		if (IS_ERR(iter))
4928 			ret = PTR_ERR(iter);
4929 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4930 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4931 	}
4932 
4933 	if (ret < 0)
4934 		trace_array_put(tr);
4935 
4936 	return ret;
4937 }
4938 
4939 /*
4940  * Some tracers are not suitable for instance buffers.
4941  * A tracer is always available for the global array (toplevel)
4942  * or if it explicitly states that it is.
4943  */
4944 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4945 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4946 {
4947 #ifdef CONFIG_TRACER_SNAPSHOT
4948 	/* arrays with mapped buffer range do not have snapshots */
4949 	if (tr->range_addr_start && t->use_max_tr)
4950 		return false;
4951 #endif
4952 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4953 }
4954 
4955 /* Find the next tracer that this trace array may use */
4956 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4957 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4958 {
4959 	while (t && !trace_ok_for_array(t, tr))
4960 		t = t->next;
4961 
4962 	return t;
4963 }
4964 
4965 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4966 t_next(struct seq_file *m, void *v, loff_t *pos)
4967 {
4968 	struct trace_array *tr = m->private;
4969 	struct tracer *t = v;
4970 
4971 	(*pos)++;
4972 
4973 	if (t)
4974 		t = get_tracer_for_array(tr, t->next);
4975 
4976 	return t;
4977 }
4978 
t_start(struct seq_file * m,loff_t * pos)4979 static void *t_start(struct seq_file *m, loff_t *pos)
4980 {
4981 	struct trace_array *tr = m->private;
4982 	struct tracer *t;
4983 	loff_t l = 0;
4984 
4985 	mutex_lock(&trace_types_lock);
4986 
4987 	t = get_tracer_for_array(tr, trace_types);
4988 	for (; t && l < *pos; t = t_next(m, t, &l))
4989 			;
4990 
4991 	return t;
4992 }
4993 
t_stop(struct seq_file * m,void * p)4994 static void t_stop(struct seq_file *m, void *p)
4995 {
4996 	mutex_unlock(&trace_types_lock);
4997 }
4998 
t_show(struct seq_file * m,void * v)4999 static int t_show(struct seq_file *m, void *v)
5000 {
5001 	struct tracer *t = v;
5002 
5003 	if (!t)
5004 		return 0;
5005 
5006 	seq_puts(m, t->name);
5007 	if (t->next)
5008 		seq_putc(m, ' ');
5009 	else
5010 		seq_putc(m, '\n');
5011 
5012 	return 0;
5013 }
5014 
5015 static const struct seq_operations show_traces_seq_ops = {
5016 	.start		= t_start,
5017 	.next		= t_next,
5018 	.stop		= t_stop,
5019 	.show		= t_show,
5020 };
5021 
show_traces_open(struct inode * inode,struct file * file)5022 static int show_traces_open(struct inode *inode, struct file *file)
5023 {
5024 	struct trace_array *tr = inode->i_private;
5025 	struct seq_file *m;
5026 	int ret;
5027 
5028 	ret = tracing_check_open_get_tr(tr);
5029 	if (ret)
5030 		return ret;
5031 
5032 	ret = seq_open(file, &show_traces_seq_ops);
5033 	if (ret) {
5034 		trace_array_put(tr);
5035 		return ret;
5036 	}
5037 
5038 	m = file->private_data;
5039 	m->private = tr;
5040 
5041 	return 0;
5042 }
5043 
tracing_seq_release(struct inode * inode,struct file * file)5044 static int tracing_seq_release(struct inode *inode, struct file *file)
5045 {
5046 	struct trace_array *tr = inode->i_private;
5047 
5048 	trace_array_put(tr);
5049 	return seq_release(inode, file);
5050 }
5051 
5052 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5053 tracing_write_stub(struct file *filp, const char __user *ubuf,
5054 		   size_t count, loff_t *ppos)
5055 {
5056 	return count;
5057 }
5058 
tracing_lseek(struct file * file,loff_t offset,int whence)5059 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5060 {
5061 	int ret;
5062 
5063 	if (file->f_mode & FMODE_READ)
5064 		ret = seq_lseek(file, offset, whence);
5065 	else
5066 		file->f_pos = ret = 0;
5067 
5068 	return ret;
5069 }
5070 
5071 static const struct file_operations tracing_fops = {
5072 	.open		= tracing_open,
5073 	.read		= seq_read,
5074 	.read_iter	= seq_read_iter,
5075 	.splice_read	= copy_splice_read,
5076 	.write		= tracing_write_stub,
5077 	.llseek		= tracing_lseek,
5078 	.release	= tracing_release,
5079 };
5080 
5081 static const struct file_operations show_traces_fops = {
5082 	.open		= show_traces_open,
5083 	.read		= seq_read,
5084 	.llseek		= seq_lseek,
5085 	.release	= tracing_seq_release,
5086 };
5087 
5088 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5089 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5090 		     size_t count, loff_t *ppos)
5091 {
5092 	struct trace_array *tr = file_inode(filp)->i_private;
5093 	char *mask_str;
5094 	int len;
5095 
5096 	len = snprintf(NULL, 0, "%*pb\n",
5097 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5098 	mask_str = kmalloc(len, GFP_KERNEL);
5099 	if (!mask_str)
5100 		return -ENOMEM;
5101 
5102 	len = snprintf(mask_str, len, "%*pb\n",
5103 		       cpumask_pr_args(tr->tracing_cpumask));
5104 	if (len >= count) {
5105 		count = -EINVAL;
5106 		goto out_err;
5107 	}
5108 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5109 
5110 out_err:
5111 	kfree(mask_str);
5112 
5113 	return count;
5114 }
5115 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5116 int tracing_set_cpumask(struct trace_array *tr,
5117 			cpumask_var_t tracing_cpumask_new)
5118 {
5119 	int cpu;
5120 
5121 	if (!tr)
5122 		return -EINVAL;
5123 
5124 	local_irq_disable();
5125 	arch_spin_lock(&tr->max_lock);
5126 	for_each_tracing_cpu(cpu) {
5127 		/*
5128 		 * Increase/decrease the disabled counter if we are
5129 		 * about to flip a bit in the cpumask:
5130 		 */
5131 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5132 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5133 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5134 #ifdef CONFIG_TRACER_MAX_TRACE
5135 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5136 #endif
5137 		}
5138 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5139 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5140 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5141 #ifdef CONFIG_TRACER_MAX_TRACE
5142 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5143 #endif
5144 		}
5145 	}
5146 	arch_spin_unlock(&tr->max_lock);
5147 	local_irq_enable();
5148 
5149 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5150 
5151 	return 0;
5152 }
5153 
5154 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5155 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5156 		      size_t count, loff_t *ppos)
5157 {
5158 	struct trace_array *tr = file_inode(filp)->i_private;
5159 	cpumask_var_t tracing_cpumask_new;
5160 	int err;
5161 
5162 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5163 		return -EINVAL;
5164 
5165 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5166 		return -ENOMEM;
5167 
5168 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5169 	if (err)
5170 		goto err_free;
5171 
5172 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5173 	if (err)
5174 		goto err_free;
5175 
5176 	free_cpumask_var(tracing_cpumask_new);
5177 
5178 	return count;
5179 
5180 err_free:
5181 	free_cpumask_var(tracing_cpumask_new);
5182 
5183 	return err;
5184 }
5185 
5186 static const struct file_operations tracing_cpumask_fops = {
5187 	.open		= tracing_open_generic_tr,
5188 	.read		= tracing_cpumask_read,
5189 	.write		= tracing_cpumask_write,
5190 	.release	= tracing_release_generic_tr,
5191 	.llseek		= generic_file_llseek,
5192 };
5193 
tracing_trace_options_show(struct seq_file * m,void * v)5194 static int tracing_trace_options_show(struct seq_file *m, void *v)
5195 {
5196 	struct tracer_opt *trace_opts;
5197 	struct trace_array *tr = m->private;
5198 	u32 tracer_flags;
5199 	int i;
5200 
5201 	guard(mutex)(&trace_types_lock);
5202 
5203 	tracer_flags = tr->current_trace->flags->val;
5204 	trace_opts = tr->current_trace->flags->opts;
5205 
5206 	for (i = 0; trace_options[i]; i++) {
5207 		if (tr->trace_flags & (1 << i))
5208 			seq_printf(m, "%s\n", trace_options[i]);
5209 		else
5210 			seq_printf(m, "no%s\n", trace_options[i]);
5211 	}
5212 
5213 	for (i = 0; trace_opts[i].name; i++) {
5214 		if (tracer_flags & trace_opts[i].bit)
5215 			seq_printf(m, "%s\n", trace_opts[i].name);
5216 		else
5217 			seq_printf(m, "no%s\n", trace_opts[i].name);
5218 	}
5219 
5220 	return 0;
5221 }
5222 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5223 static int __set_tracer_option(struct trace_array *tr,
5224 			       struct tracer_flags *tracer_flags,
5225 			       struct tracer_opt *opts, int neg)
5226 {
5227 	struct tracer *trace = tracer_flags->trace;
5228 	int ret;
5229 
5230 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5231 	if (ret)
5232 		return ret;
5233 
5234 	if (neg)
5235 		tracer_flags->val &= ~opts->bit;
5236 	else
5237 		tracer_flags->val |= opts->bit;
5238 	return 0;
5239 }
5240 
5241 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5242 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5243 {
5244 	struct tracer *trace = tr->current_trace;
5245 	struct tracer_flags *tracer_flags = trace->flags;
5246 	struct tracer_opt *opts = NULL;
5247 	int i;
5248 
5249 	for (i = 0; tracer_flags->opts[i].name; i++) {
5250 		opts = &tracer_flags->opts[i];
5251 
5252 		if (strcmp(cmp, opts->name) == 0)
5253 			return __set_tracer_option(tr, trace->flags, opts, neg);
5254 	}
5255 
5256 	return -EINVAL;
5257 }
5258 
5259 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5260 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5261 {
5262 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5263 		return -1;
5264 
5265 	return 0;
5266 }
5267 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5268 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5269 {
5270 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5271 	    (mask == TRACE_ITER_RECORD_CMD) ||
5272 	    (mask == TRACE_ITER_TRACE_PRINTK) ||
5273 	    (mask == TRACE_ITER_COPY_MARKER))
5274 		lockdep_assert_held(&event_mutex);
5275 
5276 	/* do nothing if flag is already set */
5277 	if (!!(tr->trace_flags & mask) == !!enabled)
5278 		return 0;
5279 
5280 	/* Give the tracer a chance to approve the change */
5281 	if (tr->current_trace->flag_changed)
5282 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5283 			return -EINVAL;
5284 
5285 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5286 		if (enabled) {
5287 			update_printk_trace(tr);
5288 		} else {
5289 			/*
5290 			 * The global_trace cannot clear this.
5291 			 * It's flag only gets cleared if another instance sets it.
5292 			 */
5293 			if (printk_trace == &global_trace)
5294 				return -EINVAL;
5295 			/*
5296 			 * An instance must always have it set.
5297 			 * by default, that's the global_trace instane.
5298 			 */
5299 			if (printk_trace == tr)
5300 				update_printk_trace(&global_trace);
5301 		}
5302 	}
5303 
5304 	if (mask == TRACE_ITER_COPY_MARKER)
5305 		update_marker_trace(tr, enabled);
5306 
5307 	if (enabled)
5308 		tr->trace_flags |= mask;
5309 	else
5310 		tr->trace_flags &= ~mask;
5311 
5312 	if (mask == TRACE_ITER_RECORD_CMD)
5313 		trace_event_enable_cmd_record(enabled);
5314 
5315 	if (mask == TRACE_ITER_RECORD_TGID) {
5316 
5317 		if (trace_alloc_tgid_map() < 0) {
5318 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5319 			return -ENOMEM;
5320 		}
5321 
5322 		trace_event_enable_tgid_record(enabled);
5323 	}
5324 
5325 	if (mask == TRACE_ITER_EVENT_FORK)
5326 		trace_event_follow_fork(tr, enabled);
5327 
5328 	if (mask == TRACE_ITER_FUNC_FORK)
5329 		ftrace_pid_follow_fork(tr, enabled);
5330 
5331 	if (mask == TRACE_ITER_OVERWRITE) {
5332 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5333 #ifdef CONFIG_TRACER_MAX_TRACE
5334 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5335 #endif
5336 	}
5337 
5338 	if (mask == TRACE_ITER_PRINTK) {
5339 		trace_printk_start_stop_comm(enabled);
5340 		trace_printk_control(enabled);
5341 	}
5342 
5343 	return 0;
5344 }
5345 
trace_set_options(struct trace_array * tr,char * option)5346 int trace_set_options(struct trace_array *tr, char *option)
5347 {
5348 	char *cmp;
5349 	int neg = 0;
5350 	int ret;
5351 	size_t orig_len = strlen(option);
5352 	int len;
5353 
5354 	cmp = strstrip(option);
5355 
5356 	len = str_has_prefix(cmp, "no");
5357 	if (len)
5358 		neg = 1;
5359 
5360 	cmp += len;
5361 
5362 	mutex_lock(&event_mutex);
5363 	mutex_lock(&trace_types_lock);
5364 
5365 	ret = match_string(trace_options, -1, cmp);
5366 	/* If no option could be set, test the specific tracer options */
5367 	if (ret < 0)
5368 		ret = set_tracer_option(tr, cmp, neg);
5369 	else
5370 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5371 
5372 	mutex_unlock(&trace_types_lock);
5373 	mutex_unlock(&event_mutex);
5374 
5375 	/*
5376 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5377 	 * turn it back into a space.
5378 	 */
5379 	if (orig_len > strlen(option))
5380 		option[strlen(option)] = ' ';
5381 
5382 	return ret;
5383 }
5384 
apply_trace_boot_options(void)5385 static void __init apply_trace_boot_options(void)
5386 {
5387 	char *buf = trace_boot_options_buf;
5388 	char *option;
5389 
5390 	while (true) {
5391 		option = strsep(&buf, ",");
5392 
5393 		if (!option)
5394 			break;
5395 
5396 		if (*option)
5397 			trace_set_options(&global_trace, option);
5398 
5399 		/* Put back the comma to allow this to be called again */
5400 		if (buf)
5401 			*(buf - 1) = ',';
5402 	}
5403 }
5404 
5405 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5406 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5407 			size_t cnt, loff_t *ppos)
5408 {
5409 	struct seq_file *m = filp->private_data;
5410 	struct trace_array *tr = m->private;
5411 	char buf[64];
5412 	int ret;
5413 
5414 	if (cnt >= sizeof(buf))
5415 		return -EINVAL;
5416 
5417 	if (copy_from_user(buf, ubuf, cnt))
5418 		return -EFAULT;
5419 
5420 	buf[cnt] = 0;
5421 
5422 	ret = trace_set_options(tr, buf);
5423 	if (ret < 0)
5424 		return ret;
5425 
5426 	*ppos += cnt;
5427 
5428 	return cnt;
5429 }
5430 
tracing_trace_options_open(struct inode * inode,struct file * file)5431 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5432 {
5433 	struct trace_array *tr = inode->i_private;
5434 	int ret;
5435 
5436 	ret = tracing_check_open_get_tr(tr);
5437 	if (ret)
5438 		return ret;
5439 
5440 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5441 	if (ret < 0)
5442 		trace_array_put(tr);
5443 
5444 	return ret;
5445 }
5446 
5447 static const struct file_operations tracing_iter_fops = {
5448 	.open		= tracing_trace_options_open,
5449 	.read		= seq_read,
5450 	.llseek		= seq_lseek,
5451 	.release	= tracing_single_release_tr,
5452 	.write		= tracing_trace_options_write,
5453 };
5454 
5455 static const char readme_msg[] =
5456 	"tracing mini-HOWTO:\n\n"
5457 	"By default tracefs removes all OTH file permission bits.\n"
5458 	"When mounting tracefs an optional group id can be specified\n"
5459 	"which adds the group to every directory and file in tracefs:\n\n"
5460 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5461 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5462 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5463 	" Important files:\n"
5464 	"  trace\t\t\t- The static contents of the buffer\n"
5465 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5466 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5467 	"  current_tracer\t- function and latency tracers\n"
5468 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5469 	"  error_log\t- error log for failed commands (that support it)\n"
5470 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5471 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5472 	"  trace_clock\t\t- change the clock used to order events\n"
5473 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5474 	"      global:   Synced across CPUs but slows tracing down.\n"
5475 	"     counter:   Not a clock, but just an increment\n"
5476 	"      uptime:   Jiffy counter from time of boot\n"
5477 	"        perf:   Same clock that perf events use\n"
5478 #ifdef CONFIG_X86_64
5479 	"     x86-tsc:   TSC cycle counter\n"
5480 #endif
5481 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5482 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5483 	"    absolute:   Absolute (standalone) timestamp\n"
5484 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5485 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5486 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5487 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5488 	"\t\t\t  Remove sub-buffer with rmdir\n"
5489 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5490 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5491 	"\t\t\t  option name\n"
5492 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5493 #ifdef CONFIG_DYNAMIC_FTRACE
5494 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5495 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5496 	"\t\t\t  functions\n"
5497 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5498 	"\t     modules: Can select a group via module\n"
5499 	"\t      Format: :mod:<module-name>\n"
5500 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5501 	"\t    triggers: a command to perform when function is hit\n"
5502 	"\t      Format: <function>:<trigger>[:count]\n"
5503 	"\t     trigger: traceon, traceoff\n"
5504 	"\t\t      enable_event:<system>:<event>\n"
5505 	"\t\t      disable_event:<system>:<event>\n"
5506 #ifdef CONFIG_STACKTRACE
5507 	"\t\t      stacktrace\n"
5508 #endif
5509 #ifdef CONFIG_TRACER_SNAPSHOT
5510 	"\t\t      snapshot\n"
5511 #endif
5512 	"\t\t      dump\n"
5513 	"\t\t      cpudump\n"
5514 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5515 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5516 	"\t     The first one will disable tracing every time do_fault is hit\n"
5517 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5518 	"\t       The first time do trap is hit and it disables tracing, the\n"
5519 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5520 	"\t       the counter will not decrement. It only decrements when the\n"
5521 	"\t       trigger did work\n"
5522 	"\t     To remove trigger without count:\n"
5523 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5524 	"\t     To remove trigger with a count:\n"
5525 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5526 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5527 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5528 	"\t    modules: Can select a group via module command :mod:\n"
5529 	"\t    Does not accept triggers\n"
5530 #endif /* CONFIG_DYNAMIC_FTRACE */
5531 #ifdef CONFIG_FUNCTION_TRACER
5532 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5533 	"\t\t    (function)\n"
5534 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5535 	"\t\t    (function)\n"
5536 #endif
5537 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5538 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5539 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5540 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5541 #endif
5542 #ifdef CONFIG_TRACER_SNAPSHOT
5543 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5544 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5545 	"\t\t\t  information\n"
5546 #endif
5547 #ifdef CONFIG_STACK_TRACER
5548 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5549 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5550 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5551 	"\t\t\t  new trace)\n"
5552 #ifdef CONFIG_DYNAMIC_FTRACE
5553 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5554 	"\t\t\t  traces\n"
5555 #endif
5556 #endif /* CONFIG_STACK_TRACER */
5557 #ifdef CONFIG_DYNAMIC_EVENTS
5558 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5559 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5560 #endif
5561 #ifdef CONFIG_KPROBE_EVENTS
5562 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5563 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5564 #endif
5565 #ifdef CONFIG_UPROBE_EVENTS
5566 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5567 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5568 #endif
5569 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5570     defined(CONFIG_FPROBE_EVENTS)
5571 	"\t  accepts: event-definitions (one definition per line)\n"
5572 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5573 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5574 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5575 #endif
5576 #ifdef CONFIG_FPROBE_EVENTS
5577 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5578 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5579 #endif
5580 #ifdef CONFIG_HIST_TRIGGERS
5581 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5582 #endif
5583 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5584 	"\t           -:[<group>/][<event>]\n"
5585 #ifdef CONFIG_KPROBE_EVENTS
5586 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5587   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5588 #endif
5589 #ifdef CONFIG_UPROBE_EVENTS
5590   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5591 #endif
5592 	"\t     args: <name>=fetcharg[:type]\n"
5593 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5594 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5595 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5596 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5597 	"\t           <argname>[->field[->field|.field...]],\n"
5598 #endif
5599 #else
5600 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5601 #endif
5602 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5603 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5604 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5605 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5606 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5607 #ifdef CONFIG_HIST_TRIGGERS
5608 	"\t    field: <stype> <name>;\n"
5609 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5610 	"\t           [unsigned] char/int/long\n"
5611 #endif
5612 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5613 	"\t            of the <attached-group>/<attached-event>.\n"
5614 #endif
5615 	"  set_event\t\t- Enables events by name written into it\n"
5616 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5617 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5618 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5619 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5620 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5621 	"\t\t\t  events\n"
5622 	"      filter\t\t- If set, only events passing filter are traced\n"
5623 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5624 	"\t\t\t  <event>:\n"
5625 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5626 	"      filter\t\t- If set, only events passing filter are traced\n"
5627 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5628 	"\t    Format: <trigger>[:count][if <filter>]\n"
5629 	"\t   trigger: traceon, traceoff\n"
5630 	"\t            enable_event:<system>:<event>\n"
5631 	"\t            disable_event:<system>:<event>\n"
5632 #ifdef CONFIG_HIST_TRIGGERS
5633 	"\t            enable_hist:<system>:<event>\n"
5634 	"\t            disable_hist:<system>:<event>\n"
5635 #endif
5636 #ifdef CONFIG_STACKTRACE
5637 	"\t\t    stacktrace\n"
5638 #endif
5639 #ifdef CONFIG_TRACER_SNAPSHOT
5640 	"\t\t    snapshot\n"
5641 #endif
5642 #ifdef CONFIG_HIST_TRIGGERS
5643 	"\t\t    hist (see below)\n"
5644 #endif
5645 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5646 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5647 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5648 	"\t                  events/block/block_unplug/trigger\n"
5649 	"\t   The first disables tracing every time block_unplug is hit.\n"
5650 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5651 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5652 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5653 	"\t   Like function triggers, the counter is only decremented if it\n"
5654 	"\t    enabled or disabled tracing.\n"
5655 	"\t   To remove a trigger without a count:\n"
5656 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5657 	"\t   To remove a trigger with a count:\n"
5658 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5659 	"\t   Filters can be ignored when removing a trigger.\n"
5660 #ifdef CONFIG_HIST_TRIGGERS
5661 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5662 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5663 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5664 	"\t            [:values=<field1[,field2,...]>]\n"
5665 	"\t            [:sort=<field1[,field2,...]>]\n"
5666 	"\t            [:size=#entries]\n"
5667 	"\t            [:pause][:continue][:clear]\n"
5668 	"\t            [:name=histname1]\n"
5669 	"\t            [:nohitcount]\n"
5670 	"\t            [:<handler>.<action>]\n"
5671 	"\t            [if <filter>]\n\n"
5672 	"\t    Note, special fields can be used as well:\n"
5673 	"\t            common_timestamp - to record current timestamp\n"
5674 	"\t            common_cpu - to record the CPU the event happened on\n"
5675 	"\n"
5676 	"\t    A hist trigger variable can be:\n"
5677 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5678 	"\t        - a reference to another variable e.g. y=$x,\n"
5679 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5680 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5681 	"\n"
5682 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5683 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5684 	"\t    variable reference, field or numeric literal.\n"
5685 	"\n"
5686 	"\t    When a matching event is hit, an entry is added to a hash\n"
5687 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5688 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5689 	"\t    correspond to fields in the event's format description.  Keys\n"
5690 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5691 	"\t    Compound keys consisting of up to two fields can be specified\n"
5692 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5693 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5694 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5695 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5696 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5697 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5698 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5699 	"\t    its histogram data will be shared with other triggers of the\n"
5700 	"\t    same name, and trigger hits will update this common data.\n\n"
5701 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5702 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5703 	"\t    triggers attached to an event, there will be a table for each\n"
5704 	"\t    trigger in the output.  The table displayed for a named\n"
5705 	"\t    trigger will be the same as any other instance having the\n"
5706 	"\t    same name.  The default format used to display a given field\n"
5707 	"\t    can be modified by appending any of the following modifiers\n"
5708 	"\t    to the field name, as applicable:\n\n"
5709 	"\t            .hex        display a number as a hex value\n"
5710 	"\t            .sym        display an address as a symbol\n"
5711 	"\t            .sym-offset display an address as a symbol and offset\n"
5712 	"\t            .execname   display a common_pid as a program name\n"
5713 	"\t            .syscall    display a syscall id as a syscall name\n"
5714 	"\t            .log2       display log2 value rather than raw number\n"
5715 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5716 	"\t            .usecs      display a common_timestamp in microseconds\n"
5717 	"\t            .percent    display a number of percentage value\n"
5718 	"\t            .graph      display a bar-graph of a value\n\n"
5719 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5720 	"\t    trigger or to start a hist trigger but not log any events\n"
5721 	"\t    until told to do so.  'continue' can be used to start or\n"
5722 	"\t    restart a paused hist trigger.\n\n"
5723 	"\t    The 'clear' parameter will clear the contents of a running\n"
5724 	"\t    hist trigger and leave its current paused/active state\n"
5725 	"\t    unchanged.\n\n"
5726 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5727 	"\t    raw hitcount in the histogram.\n\n"
5728 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5729 	"\t    have one event conditionally start and stop another event's\n"
5730 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5731 	"\t    the enable_event and disable_event triggers.\n\n"
5732 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5733 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5734 	"\t        <handler>.<action>\n\n"
5735 	"\t    The available handlers are:\n\n"
5736 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5737 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5738 	"\t        onchange(var)            - invoke action if var changes\n\n"
5739 	"\t    The available actions are:\n\n"
5740 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5741 	"\t        save(field,...)                      - save current event fields\n"
5742 #ifdef CONFIG_TRACER_SNAPSHOT
5743 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5744 #endif
5745 #ifdef CONFIG_SYNTH_EVENTS
5746 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5747 	"\t  Write into this file to define/undefine new synthetic events.\n"
5748 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5749 #endif
5750 #endif
5751 ;
5752 
5753 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5754 tracing_readme_read(struct file *filp, char __user *ubuf,
5755 		       size_t cnt, loff_t *ppos)
5756 {
5757 	return simple_read_from_buffer(ubuf, cnt, ppos,
5758 					readme_msg, strlen(readme_msg));
5759 }
5760 
5761 static const struct file_operations tracing_readme_fops = {
5762 	.open		= tracing_open_generic,
5763 	.read		= tracing_readme_read,
5764 	.llseek		= generic_file_llseek,
5765 };
5766 
5767 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5768 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5769 update_eval_map(union trace_eval_map_item *ptr)
5770 {
5771 	if (!ptr->map.eval_string) {
5772 		if (ptr->tail.next) {
5773 			ptr = ptr->tail.next;
5774 			/* Set ptr to the next real item (skip head) */
5775 			ptr++;
5776 		} else
5777 			return NULL;
5778 	}
5779 	return ptr;
5780 }
5781 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5782 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5783 {
5784 	union trace_eval_map_item *ptr = v;
5785 
5786 	/*
5787 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5788 	 * This really should never happen.
5789 	 */
5790 	(*pos)++;
5791 	ptr = update_eval_map(ptr);
5792 	if (WARN_ON_ONCE(!ptr))
5793 		return NULL;
5794 
5795 	ptr++;
5796 	ptr = update_eval_map(ptr);
5797 
5798 	return ptr;
5799 }
5800 
eval_map_start(struct seq_file * m,loff_t * pos)5801 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5802 {
5803 	union trace_eval_map_item *v;
5804 	loff_t l = 0;
5805 
5806 	mutex_lock(&trace_eval_mutex);
5807 
5808 	v = trace_eval_maps;
5809 	if (v)
5810 		v++;
5811 
5812 	while (v && l < *pos) {
5813 		v = eval_map_next(m, v, &l);
5814 	}
5815 
5816 	return v;
5817 }
5818 
eval_map_stop(struct seq_file * m,void * v)5819 static void eval_map_stop(struct seq_file *m, void *v)
5820 {
5821 	mutex_unlock(&trace_eval_mutex);
5822 }
5823 
eval_map_show(struct seq_file * m,void * v)5824 static int eval_map_show(struct seq_file *m, void *v)
5825 {
5826 	union trace_eval_map_item *ptr = v;
5827 
5828 	seq_printf(m, "%s %ld (%s)\n",
5829 		   ptr->map.eval_string, ptr->map.eval_value,
5830 		   ptr->map.system);
5831 
5832 	return 0;
5833 }
5834 
5835 static const struct seq_operations tracing_eval_map_seq_ops = {
5836 	.start		= eval_map_start,
5837 	.next		= eval_map_next,
5838 	.stop		= eval_map_stop,
5839 	.show		= eval_map_show,
5840 };
5841 
tracing_eval_map_open(struct inode * inode,struct file * filp)5842 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5843 {
5844 	int ret;
5845 
5846 	ret = tracing_check_open_get_tr(NULL);
5847 	if (ret)
5848 		return ret;
5849 
5850 	return seq_open(filp, &tracing_eval_map_seq_ops);
5851 }
5852 
5853 static const struct file_operations tracing_eval_map_fops = {
5854 	.open		= tracing_eval_map_open,
5855 	.read		= seq_read,
5856 	.llseek		= seq_lseek,
5857 	.release	= seq_release,
5858 };
5859 
5860 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5861 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5862 {
5863 	/* Return tail of array given the head */
5864 	return ptr + ptr->head.length + 1;
5865 }
5866 
5867 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5868 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5869 			   int len)
5870 {
5871 	struct trace_eval_map **stop;
5872 	struct trace_eval_map **map;
5873 	union trace_eval_map_item *map_array;
5874 	union trace_eval_map_item *ptr;
5875 
5876 	stop = start + len;
5877 
5878 	/*
5879 	 * The trace_eval_maps contains the map plus a head and tail item,
5880 	 * where the head holds the module and length of array, and the
5881 	 * tail holds a pointer to the next list.
5882 	 */
5883 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5884 	if (!map_array) {
5885 		pr_warn("Unable to allocate trace eval mapping\n");
5886 		return;
5887 	}
5888 
5889 	guard(mutex)(&trace_eval_mutex);
5890 
5891 	if (!trace_eval_maps)
5892 		trace_eval_maps = map_array;
5893 	else {
5894 		ptr = trace_eval_maps;
5895 		for (;;) {
5896 			ptr = trace_eval_jmp_to_tail(ptr);
5897 			if (!ptr->tail.next)
5898 				break;
5899 			ptr = ptr->tail.next;
5900 
5901 		}
5902 		ptr->tail.next = map_array;
5903 	}
5904 	map_array->head.mod = mod;
5905 	map_array->head.length = len;
5906 	map_array++;
5907 
5908 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5909 		map_array->map = **map;
5910 		map_array++;
5911 	}
5912 	memset(map_array, 0, sizeof(*map_array));
5913 }
5914 
trace_create_eval_file(struct dentry * d_tracer)5915 static void trace_create_eval_file(struct dentry *d_tracer)
5916 {
5917 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5918 			  NULL, &tracing_eval_map_fops);
5919 }
5920 
5921 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5922 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5923 static inline void trace_insert_eval_map_file(struct module *mod,
5924 			      struct trace_eval_map **start, int len) { }
5925 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5926 
5927 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5928 trace_event_update_with_eval_map(struct module *mod,
5929 				 struct trace_eval_map **start,
5930 				 int len)
5931 {
5932 	struct trace_eval_map **map;
5933 
5934 	/* Always run sanitizer only if btf_type_tag attr exists. */
5935 	if (len <= 0) {
5936 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5937 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5938 		      __has_attribute(btf_type_tag)))
5939 			return;
5940 	}
5941 
5942 	map = start;
5943 
5944 	trace_event_update_all(map, len);
5945 
5946 	if (len <= 0)
5947 		return;
5948 
5949 	trace_insert_eval_map_file(mod, start, len);
5950 }
5951 
5952 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5953 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5954 		       size_t cnt, loff_t *ppos)
5955 {
5956 	struct trace_array *tr = filp->private_data;
5957 	char buf[MAX_TRACER_SIZE+2];
5958 	int r;
5959 
5960 	mutex_lock(&trace_types_lock);
5961 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5962 	mutex_unlock(&trace_types_lock);
5963 
5964 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5965 }
5966 
tracer_init(struct tracer * t,struct trace_array * tr)5967 int tracer_init(struct tracer *t, struct trace_array *tr)
5968 {
5969 	tracing_reset_online_cpus(&tr->array_buffer);
5970 	return t->init(tr);
5971 }
5972 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5973 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5974 {
5975 	int cpu;
5976 
5977 	for_each_tracing_cpu(cpu)
5978 		per_cpu_ptr(buf->data, cpu)->entries = val;
5979 }
5980 
update_buffer_entries(struct array_buffer * buf,int cpu)5981 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5982 {
5983 	if (cpu == RING_BUFFER_ALL_CPUS) {
5984 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5985 	} else {
5986 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5987 	}
5988 }
5989 
5990 #ifdef CONFIG_TRACER_MAX_TRACE
5991 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5992 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5993 					struct array_buffer *size_buf, int cpu_id)
5994 {
5995 	int cpu, ret = 0;
5996 
5997 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5998 		for_each_tracing_cpu(cpu) {
5999 			ret = ring_buffer_resize(trace_buf->buffer,
6000 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6001 			if (ret < 0)
6002 				break;
6003 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6004 				per_cpu_ptr(size_buf->data, cpu)->entries;
6005 		}
6006 	} else {
6007 		ret = ring_buffer_resize(trace_buf->buffer,
6008 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6009 		if (ret == 0)
6010 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6011 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6012 	}
6013 
6014 	return ret;
6015 }
6016 #endif /* CONFIG_TRACER_MAX_TRACE */
6017 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6018 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6019 					unsigned long size, int cpu)
6020 {
6021 	int ret;
6022 
6023 	/*
6024 	 * If kernel or user changes the size of the ring buffer
6025 	 * we use the size that was given, and we can forget about
6026 	 * expanding it later.
6027 	 */
6028 	trace_set_ring_buffer_expanded(tr);
6029 
6030 	/* May be called before buffers are initialized */
6031 	if (!tr->array_buffer.buffer)
6032 		return 0;
6033 
6034 	/* Do not allow tracing while resizing ring buffer */
6035 	tracing_stop_tr(tr);
6036 
6037 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6038 	if (ret < 0)
6039 		goto out_start;
6040 
6041 #ifdef CONFIG_TRACER_MAX_TRACE
6042 	if (!tr->allocated_snapshot)
6043 		goto out;
6044 
6045 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6046 	if (ret < 0) {
6047 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6048 						     &tr->array_buffer, cpu);
6049 		if (r < 0) {
6050 			/*
6051 			 * AARGH! We are left with different
6052 			 * size max buffer!!!!
6053 			 * The max buffer is our "snapshot" buffer.
6054 			 * When a tracer needs a snapshot (one of the
6055 			 * latency tracers), it swaps the max buffer
6056 			 * with the saved snap shot. We succeeded to
6057 			 * update the size of the main buffer, but failed to
6058 			 * update the size of the max buffer. But when we tried
6059 			 * to reset the main buffer to the original size, we
6060 			 * failed there too. This is very unlikely to
6061 			 * happen, but if it does, warn and kill all
6062 			 * tracing.
6063 			 */
6064 			WARN_ON(1);
6065 			tracing_disabled = 1;
6066 		}
6067 		goto out_start;
6068 	}
6069 
6070 	update_buffer_entries(&tr->max_buffer, cpu);
6071 
6072  out:
6073 #endif /* CONFIG_TRACER_MAX_TRACE */
6074 
6075 	update_buffer_entries(&tr->array_buffer, cpu);
6076  out_start:
6077 	tracing_start_tr(tr);
6078 	return ret;
6079 }
6080 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6081 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6082 				  unsigned long size, int cpu_id)
6083 {
6084 	guard(mutex)(&trace_types_lock);
6085 
6086 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6087 		/* make sure, this cpu is enabled in the mask */
6088 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6089 			return -EINVAL;
6090 	}
6091 
6092 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6093 }
6094 
6095 struct trace_mod_entry {
6096 	unsigned long	mod_addr;
6097 	char		mod_name[MODULE_NAME_LEN];
6098 };
6099 
6100 struct trace_scratch {
6101 	unsigned int		clock_id;
6102 	unsigned long		text_addr;
6103 	unsigned long		nr_entries;
6104 	struct trace_mod_entry	entries[];
6105 };
6106 
6107 static DEFINE_MUTEX(scratch_mutex);
6108 
cmp_mod_entry(const void * key,const void * pivot)6109 static int cmp_mod_entry(const void *key, const void *pivot)
6110 {
6111 	unsigned long addr = (unsigned long)key;
6112 	const struct trace_mod_entry *ent = pivot;
6113 
6114 	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6115 		return 0;
6116 	else
6117 		return addr - ent->mod_addr;
6118 }
6119 
6120 /**
6121  * trace_adjust_address() - Adjust prev boot address to current address.
6122  * @tr: Persistent ring buffer's trace_array.
6123  * @addr: Address in @tr which is adjusted.
6124  */
trace_adjust_address(struct trace_array * tr,unsigned long addr)6125 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6126 {
6127 	struct trace_module_delta *module_delta;
6128 	struct trace_scratch *tscratch;
6129 	struct trace_mod_entry *entry;
6130 	unsigned long raddr;
6131 	int idx = 0, nr_entries;
6132 
6133 	/* If we don't have last boot delta, return the address */
6134 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6135 		return addr;
6136 
6137 	/* tr->module_delta must be protected by rcu. */
6138 	guard(rcu)();
6139 	tscratch = tr->scratch;
6140 	/* if there is no tscrach, module_delta must be NULL. */
6141 	module_delta = READ_ONCE(tr->module_delta);
6142 	if (!module_delta || !tscratch->nr_entries ||
6143 	    tscratch->entries[0].mod_addr > addr) {
6144 		raddr = addr + tr->text_delta;
6145 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6146 			is_kernel_rodata(raddr) ? raddr : addr;
6147 	}
6148 
6149 	/* Note that entries must be sorted. */
6150 	nr_entries = tscratch->nr_entries;
6151 	if (nr_entries == 1 ||
6152 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6153 		idx = nr_entries - 1;
6154 	else {
6155 		entry = __inline_bsearch((void *)addr,
6156 				tscratch->entries,
6157 				nr_entries - 1,
6158 				sizeof(tscratch->entries[0]),
6159 				cmp_mod_entry);
6160 		if (entry)
6161 			idx = entry - tscratch->entries;
6162 	}
6163 
6164 	return addr + module_delta->delta[idx];
6165 }
6166 
6167 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)6168 static int save_mod(struct module *mod, void *data)
6169 {
6170 	struct trace_array *tr = data;
6171 	struct trace_scratch *tscratch;
6172 	struct trace_mod_entry *entry;
6173 	unsigned int size;
6174 
6175 	tscratch = tr->scratch;
6176 	if (!tscratch)
6177 		return -1;
6178 	size = tr->scratch_size;
6179 
6180 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6181 		return -1;
6182 
6183 	entry = &tscratch->entries[tscratch->nr_entries];
6184 
6185 	tscratch->nr_entries++;
6186 
6187 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6188 	strscpy(entry->mod_name, mod->name);
6189 
6190 	return 0;
6191 }
6192 #else
save_mod(struct module * mod,void * data)6193 static int save_mod(struct module *mod, void *data)
6194 {
6195 	return 0;
6196 }
6197 #endif
6198 
update_last_data(struct trace_array * tr)6199 static void update_last_data(struct trace_array *tr)
6200 {
6201 	struct trace_module_delta *module_delta;
6202 	struct trace_scratch *tscratch;
6203 
6204 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6205 		return;
6206 
6207 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6208 		return;
6209 
6210 	/* Only if the buffer has previous boot data clear and update it. */
6211 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6212 
6213 	/* Reset the module list and reload them */
6214 	if (tr->scratch) {
6215 		struct trace_scratch *tscratch = tr->scratch;
6216 
6217 		tscratch->clock_id = tr->clock_id;
6218 		memset(tscratch->entries, 0,
6219 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6220 		tscratch->nr_entries = 0;
6221 
6222 		guard(mutex)(&scratch_mutex);
6223 		module_for_each_mod(save_mod, tr);
6224 	}
6225 
6226 	/*
6227 	 * Need to clear all CPU buffers as there cannot be events
6228 	 * from the previous boot mixed with events with this boot
6229 	 * as that will cause a confusing trace. Need to clear all
6230 	 * CPU buffers, even for those that may currently be offline.
6231 	 */
6232 	tracing_reset_all_cpus(&tr->array_buffer);
6233 
6234 	/* Using current data now */
6235 	tr->text_delta = 0;
6236 
6237 	if (!tr->scratch)
6238 		return;
6239 
6240 	tscratch = tr->scratch;
6241 	module_delta = READ_ONCE(tr->module_delta);
6242 	WRITE_ONCE(tr->module_delta, NULL);
6243 	kfree_rcu(module_delta, rcu);
6244 
6245 	/* Set the persistent ring buffer meta data to this address */
6246 	tscratch->text_addr = (unsigned long)_text;
6247 }
6248 
6249 /**
6250  * tracing_update_buffers - used by tracing facility to expand ring buffers
6251  * @tr: The tracing instance
6252  *
6253  * To save on memory when the tracing is never used on a system with it
6254  * configured in. The ring buffers are set to a minimum size. But once
6255  * a user starts to use the tracing facility, then they need to grow
6256  * to their default size.
6257  *
6258  * This function is to be called when a tracer is about to be used.
6259  */
tracing_update_buffers(struct trace_array * tr)6260 int tracing_update_buffers(struct trace_array *tr)
6261 {
6262 	int ret = 0;
6263 
6264 	mutex_lock(&trace_types_lock);
6265 
6266 	update_last_data(tr);
6267 
6268 	if (!tr->ring_buffer_expanded)
6269 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6270 						RING_BUFFER_ALL_CPUS);
6271 	mutex_unlock(&trace_types_lock);
6272 
6273 	return ret;
6274 }
6275 
6276 struct trace_option_dentry;
6277 
6278 static void
6279 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6280 
6281 /*
6282  * Used to clear out the tracer before deletion of an instance.
6283  * Must have trace_types_lock held.
6284  */
tracing_set_nop(struct trace_array * tr)6285 static void tracing_set_nop(struct trace_array *tr)
6286 {
6287 	if (tr->current_trace == &nop_trace)
6288 		return;
6289 
6290 	tr->current_trace->enabled--;
6291 
6292 	if (tr->current_trace->reset)
6293 		tr->current_trace->reset(tr);
6294 
6295 	tr->current_trace = &nop_trace;
6296 }
6297 
6298 static bool tracer_options_updated;
6299 
add_tracer_options(struct trace_array * tr,struct tracer * t)6300 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6301 {
6302 	/* Only enable if the directory has been created already. */
6303 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
6304 		return;
6305 
6306 	/* Only create trace option files after update_tracer_options finish */
6307 	if (!tracer_options_updated)
6308 		return;
6309 
6310 	create_trace_option_files(tr, t);
6311 }
6312 
tracing_set_tracer(struct trace_array * tr,const char * buf)6313 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6314 {
6315 	struct tracer *t;
6316 #ifdef CONFIG_TRACER_MAX_TRACE
6317 	bool had_max_tr;
6318 #endif
6319 	int ret;
6320 
6321 	guard(mutex)(&trace_types_lock);
6322 
6323 	update_last_data(tr);
6324 
6325 	if (!tr->ring_buffer_expanded) {
6326 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6327 						RING_BUFFER_ALL_CPUS);
6328 		if (ret < 0)
6329 			return ret;
6330 		ret = 0;
6331 	}
6332 
6333 	for (t = trace_types; t; t = t->next) {
6334 		if (strcmp(t->name, buf) == 0)
6335 			break;
6336 	}
6337 	if (!t)
6338 		return -EINVAL;
6339 
6340 	if (t == tr->current_trace)
6341 		return 0;
6342 
6343 #ifdef CONFIG_TRACER_SNAPSHOT
6344 	if (t->use_max_tr) {
6345 		local_irq_disable();
6346 		arch_spin_lock(&tr->max_lock);
6347 		ret = tr->cond_snapshot ? -EBUSY : 0;
6348 		arch_spin_unlock(&tr->max_lock);
6349 		local_irq_enable();
6350 		if (ret)
6351 			return ret;
6352 	}
6353 #endif
6354 	/* Some tracers won't work on kernel command line */
6355 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6356 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6357 			t->name);
6358 		return -EINVAL;
6359 	}
6360 
6361 	/* Some tracers are only allowed for the top level buffer */
6362 	if (!trace_ok_for_array(t, tr))
6363 		return -EINVAL;
6364 
6365 	/* If trace pipe files are being read, we can't change the tracer */
6366 	if (tr->trace_ref)
6367 		return -EBUSY;
6368 
6369 	trace_branch_disable();
6370 
6371 	tr->current_trace->enabled--;
6372 
6373 	if (tr->current_trace->reset)
6374 		tr->current_trace->reset(tr);
6375 
6376 #ifdef CONFIG_TRACER_MAX_TRACE
6377 	had_max_tr = tr->current_trace->use_max_tr;
6378 
6379 	/* Current trace needs to be nop_trace before synchronize_rcu */
6380 	tr->current_trace = &nop_trace;
6381 
6382 	if (had_max_tr && !t->use_max_tr) {
6383 		/*
6384 		 * We need to make sure that the update_max_tr sees that
6385 		 * current_trace changed to nop_trace to keep it from
6386 		 * swapping the buffers after we resize it.
6387 		 * The update_max_tr is called from interrupts disabled
6388 		 * so a synchronized_sched() is sufficient.
6389 		 */
6390 		synchronize_rcu();
6391 		free_snapshot(tr);
6392 		tracing_disarm_snapshot(tr);
6393 	}
6394 
6395 	if (!had_max_tr && t->use_max_tr) {
6396 		ret = tracing_arm_snapshot_locked(tr);
6397 		if (ret)
6398 			return ret;
6399 	}
6400 #else
6401 	tr->current_trace = &nop_trace;
6402 #endif
6403 
6404 	if (t->init) {
6405 		ret = tracer_init(t, tr);
6406 		if (ret) {
6407 #ifdef CONFIG_TRACER_MAX_TRACE
6408 			if (t->use_max_tr)
6409 				tracing_disarm_snapshot(tr);
6410 #endif
6411 			return ret;
6412 		}
6413 	}
6414 
6415 	tr->current_trace = t;
6416 	tr->current_trace->enabled++;
6417 	trace_branch_enable(tr);
6418 
6419 	return 0;
6420 }
6421 
6422 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6423 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6424 			size_t cnt, loff_t *ppos)
6425 {
6426 	struct trace_array *tr = filp->private_data;
6427 	char buf[MAX_TRACER_SIZE+1];
6428 	char *name;
6429 	size_t ret;
6430 	int err;
6431 
6432 	ret = cnt;
6433 
6434 	if (cnt > MAX_TRACER_SIZE)
6435 		cnt = MAX_TRACER_SIZE;
6436 
6437 	if (copy_from_user(buf, ubuf, cnt))
6438 		return -EFAULT;
6439 
6440 	buf[cnt] = 0;
6441 
6442 	name = strim(buf);
6443 
6444 	err = tracing_set_tracer(tr, name);
6445 	if (err)
6446 		return err;
6447 
6448 	*ppos += ret;
6449 
6450 	return ret;
6451 }
6452 
6453 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6454 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6455 		   size_t cnt, loff_t *ppos)
6456 {
6457 	char buf[64];
6458 	int r;
6459 
6460 	r = snprintf(buf, sizeof(buf), "%ld\n",
6461 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6462 	if (r > sizeof(buf))
6463 		r = sizeof(buf);
6464 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6465 }
6466 
6467 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6468 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6469 		    size_t cnt, loff_t *ppos)
6470 {
6471 	unsigned long val;
6472 	int ret;
6473 
6474 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6475 	if (ret)
6476 		return ret;
6477 
6478 	*ptr = val * 1000;
6479 
6480 	return cnt;
6481 }
6482 
6483 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6484 tracing_thresh_read(struct file *filp, char __user *ubuf,
6485 		    size_t cnt, loff_t *ppos)
6486 {
6487 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6488 }
6489 
6490 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6491 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6492 		     size_t cnt, loff_t *ppos)
6493 {
6494 	struct trace_array *tr = filp->private_data;
6495 	int ret;
6496 
6497 	guard(mutex)(&trace_types_lock);
6498 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6499 	if (ret < 0)
6500 		return ret;
6501 
6502 	if (tr->current_trace->update_thresh) {
6503 		ret = tr->current_trace->update_thresh(tr);
6504 		if (ret < 0)
6505 			return ret;
6506 	}
6507 
6508 	return cnt;
6509 }
6510 
6511 #ifdef CONFIG_TRACER_MAX_TRACE
6512 
6513 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6514 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6515 		     size_t cnt, loff_t *ppos)
6516 {
6517 	struct trace_array *tr = filp->private_data;
6518 
6519 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6520 }
6521 
6522 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6523 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6524 		      size_t cnt, loff_t *ppos)
6525 {
6526 	struct trace_array *tr = filp->private_data;
6527 
6528 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6529 }
6530 
6531 #endif
6532 
open_pipe_on_cpu(struct trace_array * tr,int cpu)6533 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6534 {
6535 	if (cpu == RING_BUFFER_ALL_CPUS) {
6536 		if (cpumask_empty(tr->pipe_cpumask)) {
6537 			cpumask_setall(tr->pipe_cpumask);
6538 			return 0;
6539 		}
6540 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6541 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6542 		return 0;
6543 	}
6544 	return -EBUSY;
6545 }
6546 
close_pipe_on_cpu(struct trace_array * tr,int cpu)6547 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6548 {
6549 	if (cpu == RING_BUFFER_ALL_CPUS) {
6550 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6551 		cpumask_clear(tr->pipe_cpumask);
6552 	} else {
6553 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6554 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6555 	}
6556 }
6557 
tracing_open_pipe(struct inode * inode,struct file * filp)6558 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6559 {
6560 	struct trace_array *tr = inode->i_private;
6561 	struct trace_iterator *iter;
6562 	int cpu;
6563 	int ret;
6564 
6565 	ret = tracing_check_open_get_tr(tr);
6566 	if (ret)
6567 		return ret;
6568 
6569 	mutex_lock(&trace_types_lock);
6570 	cpu = tracing_get_cpu(inode);
6571 	ret = open_pipe_on_cpu(tr, cpu);
6572 	if (ret)
6573 		goto fail_pipe_on_cpu;
6574 
6575 	/* create a buffer to store the information to pass to userspace */
6576 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6577 	if (!iter) {
6578 		ret = -ENOMEM;
6579 		goto fail_alloc_iter;
6580 	}
6581 
6582 	trace_seq_init(&iter->seq);
6583 	iter->trace = tr->current_trace;
6584 
6585 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6586 		ret = -ENOMEM;
6587 		goto fail;
6588 	}
6589 
6590 	/* trace pipe does not show start of buffer */
6591 	cpumask_setall(iter->started);
6592 
6593 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6594 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6595 
6596 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6597 	if (trace_clocks[tr->clock_id].in_ns)
6598 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6599 
6600 	iter->tr = tr;
6601 	iter->array_buffer = &tr->array_buffer;
6602 	iter->cpu_file = cpu;
6603 	mutex_init(&iter->mutex);
6604 	filp->private_data = iter;
6605 
6606 	if (iter->trace->pipe_open)
6607 		iter->trace->pipe_open(iter);
6608 
6609 	nonseekable_open(inode, filp);
6610 
6611 	tr->trace_ref++;
6612 
6613 	mutex_unlock(&trace_types_lock);
6614 	return ret;
6615 
6616 fail:
6617 	kfree(iter);
6618 fail_alloc_iter:
6619 	close_pipe_on_cpu(tr, cpu);
6620 fail_pipe_on_cpu:
6621 	__trace_array_put(tr);
6622 	mutex_unlock(&trace_types_lock);
6623 	return ret;
6624 }
6625 
tracing_release_pipe(struct inode * inode,struct file * file)6626 static int tracing_release_pipe(struct inode *inode, struct file *file)
6627 {
6628 	struct trace_iterator *iter = file->private_data;
6629 	struct trace_array *tr = inode->i_private;
6630 
6631 	mutex_lock(&trace_types_lock);
6632 
6633 	tr->trace_ref--;
6634 
6635 	if (iter->trace->pipe_close)
6636 		iter->trace->pipe_close(iter);
6637 	close_pipe_on_cpu(tr, iter->cpu_file);
6638 	mutex_unlock(&trace_types_lock);
6639 
6640 	free_trace_iter_content(iter);
6641 	kfree(iter);
6642 
6643 	trace_array_put(tr);
6644 
6645 	return 0;
6646 }
6647 
6648 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6649 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6650 {
6651 	struct trace_array *tr = iter->tr;
6652 
6653 	/* Iterators are static, they should be filled or empty */
6654 	if (trace_buffer_iter(iter, iter->cpu_file))
6655 		return EPOLLIN | EPOLLRDNORM;
6656 
6657 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6658 		/*
6659 		 * Always select as readable when in blocking mode
6660 		 */
6661 		return EPOLLIN | EPOLLRDNORM;
6662 	else
6663 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6664 					     filp, poll_table, iter->tr->buffer_percent);
6665 }
6666 
6667 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6668 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6669 {
6670 	struct trace_iterator *iter = filp->private_data;
6671 
6672 	return trace_poll(iter, filp, poll_table);
6673 }
6674 
6675 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6676 static int tracing_wait_pipe(struct file *filp)
6677 {
6678 	struct trace_iterator *iter = filp->private_data;
6679 	int ret;
6680 
6681 	while (trace_empty(iter)) {
6682 
6683 		if ((filp->f_flags & O_NONBLOCK)) {
6684 			return -EAGAIN;
6685 		}
6686 
6687 		/*
6688 		 * We block until we read something and tracing is disabled.
6689 		 * We still block if tracing is disabled, but we have never
6690 		 * read anything. This allows a user to cat this file, and
6691 		 * then enable tracing. But after we have read something,
6692 		 * we give an EOF when tracing is again disabled.
6693 		 *
6694 		 * iter->pos will be 0 if we haven't read anything.
6695 		 */
6696 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6697 			break;
6698 
6699 		mutex_unlock(&iter->mutex);
6700 
6701 		ret = wait_on_pipe(iter, 0);
6702 
6703 		mutex_lock(&iter->mutex);
6704 
6705 		if (ret)
6706 			return ret;
6707 	}
6708 
6709 	return 1;
6710 }
6711 
update_last_data_if_empty(struct trace_array * tr)6712 static bool update_last_data_if_empty(struct trace_array *tr)
6713 {
6714 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6715 		return false;
6716 
6717 	if (!ring_buffer_empty(tr->array_buffer.buffer))
6718 		return false;
6719 
6720 	/*
6721 	 * If the buffer contains the last boot data and all per-cpu
6722 	 * buffers are empty, reset it from the kernel side.
6723 	 */
6724 	update_last_data(tr);
6725 	return true;
6726 }
6727 
6728 /*
6729  * Consumer reader.
6730  */
6731 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6732 tracing_read_pipe(struct file *filp, char __user *ubuf,
6733 		  size_t cnt, loff_t *ppos)
6734 {
6735 	struct trace_iterator *iter = filp->private_data;
6736 	ssize_t sret;
6737 
6738 	/*
6739 	 * Avoid more than one consumer on a single file descriptor
6740 	 * This is just a matter of traces coherency, the ring buffer itself
6741 	 * is protected.
6742 	 */
6743 	guard(mutex)(&iter->mutex);
6744 
6745 	/* return any leftover data */
6746 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6747 	if (sret != -EBUSY)
6748 		return sret;
6749 
6750 	trace_seq_init(&iter->seq);
6751 
6752 	if (iter->trace->read) {
6753 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6754 		if (sret)
6755 			return sret;
6756 	}
6757 
6758 waitagain:
6759 	if (update_last_data_if_empty(iter->tr))
6760 		return 0;
6761 
6762 	sret = tracing_wait_pipe(filp);
6763 	if (sret <= 0)
6764 		return sret;
6765 
6766 	/* stop when tracing is finished */
6767 	if (trace_empty(iter))
6768 		return 0;
6769 
6770 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6771 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6772 
6773 	/* reset all but tr, trace, and overruns */
6774 	trace_iterator_reset(iter);
6775 	cpumask_clear(iter->started);
6776 	trace_seq_init(&iter->seq);
6777 
6778 	trace_event_read_lock();
6779 	trace_access_lock(iter->cpu_file);
6780 	while (trace_find_next_entry_inc(iter) != NULL) {
6781 		enum print_line_t ret;
6782 		int save_len = iter->seq.seq.len;
6783 
6784 		ret = print_trace_line(iter);
6785 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6786 			/*
6787 			 * If one print_trace_line() fills entire trace_seq in one shot,
6788 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6789 			 * In this case, we need to consume it, otherwise, loop will peek
6790 			 * this event next time, resulting in an infinite loop.
6791 			 */
6792 			if (save_len == 0) {
6793 				iter->seq.full = 0;
6794 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6795 				trace_consume(iter);
6796 				break;
6797 			}
6798 
6799 			/* In other cases, don't print partial lines */
6800 			iter->seq.seq.len = save_len;
6801 			break;
6802 		}
6803 		if (ret != TRACE_TYPE_NO_CONSUME)
6804 			trace_consume(iter);
6805 
6806 		if (trace_seq_used(&iter->seq) >= cnt)
6807 			break;
6808 
6809 		/*
6810 		 * Setting the full flag means we reached the trace_seq buffer
6811 		 * size and we should leave by partial output condition above.
6812 		 * One of the trace_seq_* functions is not used properly.
6813 		 */
6814 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6815 			  iter->ent->type);
6816 	}
6817 	trace_access_unlock(iter->cpu_file);
6818 	trace_event_read_unlock();
6819 
6820 	/* Now copy what we have to the user */
6821 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6822 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6823 		trace_seq_init(&iter->seq);
6824 
6825 	/*
6826 	 * If there was nothing to send to user, in spite of consuming trace
6827 	 * entries, go back to wait for more entries.
6828 	 */
6829 	if (sret == -EBUSY)
6830 		goto waitagain;
6831 
6832 	return sret;
6833 }
6834 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6835 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6836 				     unsigned int idx)
6837 {
6838 	__free_page(spd->pages[idx]);
6839 }
6840 
6841 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6842 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6843 {
6844 	size_t count;
6845 	int save_len;
6846 	int ret;
6847 
6848 	/* Seq buffer is page-sized, exactly what we need. */
6849 	for (;;) {
6850 		save_len = iter->seq.seq.len;
6851 		ret = print_trace_line(iter);
6852 
6853 		if (trace_seq_has_overflowed(&iter->seq)) {
6854 			iter->seq.seq.len = save_len;
6855 			break;
6856 		}
6857 
6858 		/*
6859 		 * This should not be hit, because it should only
6860 		 * be set if the iter->seq overflowed. But check it
6861 		 * anyway to be safe.
6862 		 */
6863 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6864 			iter->seq.seq.len = save_len;
6865 			break;
6866 		}
6867 
6868 		count = trace_seq_used(&iter->seq) - save_len;
6869 		if (rem < count) {
6870 			rem = 0;
6871 			iter->seq.seq.len = save_len;
6872 			break;
6873 		}
6874 
6875 		if (ret != TRACE_TYPE_NO_CONSUME)
6876 			trace_consume(iter);
6877 		rem -= count;
6878 		if (!trace_find_next_entry_inc(iter))	{
6879 			rem = 0;
6880 			iter->ent = NULL;
6881 			break;
6882 		}
6883 	}
6884 
6885 	return rem;
6886 }
6887 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6888 static ssize_t tracing_splice_read_pipe(struct file *filp,
6889 					loff_t *ppos,
6890 					struct pipe_inode_info *pipe,
6891 					size_t len,
6892 					unsigned int flags)
6893 {
6894 	struct page *pages_def[PIPE_DEF_BUFFERS];
6895 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6896 	struct trace_iterator *iter = filp->private_data;
6897 	struct splice_pipe_desc spd = {
6898 		.pages		= pages_def,
6899 		.partial	= partial_def,
6900 		.nr_pages	= 0, /* This gets updated below. */
6901 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6902 		.ops		= &default_pipe_buf_ops,
6903 		.spd_release	= tracing_spd_release_pipe,
6904 	};
6905 	ssize_t ret;
6906 	size_t rem;
6907 	unsigned int i;
6908 
6909 	if (splice_grow_spd(pipe, &spd))
6910 		return -ENOMEM;
6911 
6912 	mutex_lock(&iter->mutex);
6913 
6914 	if (iter->trace->splice_read) {
6915 		ret = iter->trace->splice_read(iter, filp,
6916 					       ppos, pipe, len, flags);
6917 		if (ret)
6918 			goto out_err;
6919 	}
6920 
6921 	ret = tracing_wait_pipe(filp);
6922 	if (ret <= 0)
6923 		goto out_err;
6924 
6925 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6926 		ret = -EFAULT;
6927 		goto out_err;
6928 	}
6929 
6930 	trace_event_read_lock();
6931 	trace_access_lock(iter->cpu_file);
6932 
6933 	/* Fill as many pages as possible. */
6934 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6935 		spd.pages[i] = alloc_page(GFP_KERNEL);
6936 		if (!spd.pages[i])
6937 			break;
6938 
6939 		rem = tracing_fill_pipe_page(rem, iter);
6940 
6941 		/* Copy the data into the page, so we can start over. */
6942 		ret = trace_seq_to_buffer(&iter->seq,
6943 					  page_address(spd.pages[i]),
6944 					  min((size_t)trace_seq_used(&iter->seq),
6945 						  (size_t)PAGE_SIZE));
6946 		if (ret < 0) {
6947 			__free_page(spd.pages[i]);
6948 			break;
6949 		}
6950 		spd.partial[i].offset = 0;
6951 		spd.partial[i].len = ret;
6952 
6953 		trace_seq_init(&iter->seq);
6954 	}
6955 
6956 	trace_access_unlock(iter->cpu_file);
6957 	trace_event_read_unlock();
6958 	mutex_unlock(&iter->mutex);
6959 
6960 	spd.nr_pages = i;
6961 
6962 	if (i)
6963 		ret = splice_to_pipe(pipe, &spd);
6964 	else
6965 		ret = 0;
6966 out:
6967 	splice_shrink_spd(&spd);
6968 	return ret;
6969 
6970 out_err:
6971 	mutex_unlock(&iter->mutex);
6972 	goto out;
6973 }
6974 
6975 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6976 tracing_entries_read(struct file *filp, char __user *ubuf,
6977 		     size_t cnt, loff_t *ppos)
6978 {
6979 	struct inode *inode = file_inode(filp);
6980 	struct trace_array *tr = inode->i_private;
6981 	int cpu = tracing_get_cpu(inode);
6982 	char buf[64];
6983 	int r = 0;
6984 	ssize_t ret;
6985 
6986 	mutex_lock(&trace_types_lock);
6987 
6988 	if (cpu == RING_BUFFER_ALL_CPUS) {
6989 		int cpu, buf_size_same;
6990 		unsigned long size;
6991 
6992 		size = 0;
6993 		buf_size_same = 1;
6994 		/* check if all cpu sizes are same */
6995 		for_each_tracing_cpu(cpu) {
6996 			/* fill in the size from first enabled cpu */
6997 			if (size == 0)
6998 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6999 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7000 				buf_size_same = 0;
7001 				break;
7002 			}
7003 		}
7004 
7005 		if (buf_size_same) {
7006 			if (!tr->ring_buffer_expanded)
7007 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7008 					    size >> 10,
7009 					    trace_buf_size >> 10);
7010 			else
7011 				r = sprintf(buf, "%lu\n", size >> 10);
7012 		} else
7013 			r = sprintf(buf, "X\n");
7014 	} else
7015 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7016 
7017 	mutex_unlock(&trace_types_lock);
7018 
7019 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7020 	return ret;
7021 }
7022 
7023 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7024 tracing_entries_write(struct file *filp, const char __user *ubuf,
7025 		      size_t cnt, loff_t *ppos)
7026 {
7027 	struct inode *inode = file_inode(filp);
7028 	struct trace_array *tr = inode->i_private;
7029 	unsigned long val;
7030 	int ret;
7031 
7032 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7033 	if (ret)
7034 		return ret;
7035 
7036 	/* must have at least 1 entry */
7037 	if (!val)
7038 		return -EINVAL;
7039 
7040 	/* value is in KB */
7041 	val <<= 10;
7042 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7043 	if (ret < 0)
7044 		return ret;
7045 
7046 	*ppos += cnt;
7047 
7048 	return cnt;
7049 }
7050 
7051 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7052 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7053 				size_t cnt, loff_t *ppos)
7054 {
7055 	struct trace_array *tr = filp->private_data;
7056 	char buf[64];
7057 	int r, cpu;
7058 	unsigned long size = 0, expanded_size = 0;
7059 
7060 	mutex_lock(&trace_types_lock);
7061 	for_each_tracing_cpu(cpu) {
7062 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7063 		if (!tr->ring_buffer_expanded)
7064 			expanded_size += trace_buf_size >> 10;
7065 	}
7066 	if (tr->ring_buffer_expanded)
7067 		r = sprintf(buf, "%lu\n", size);
7068 	else
7069 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7070 	mutex_unlock(&trace_types_lock);
7071 
7072 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7073 }
7074 
7075 #define LAST_BOOT_HEADER ((void *)1)
7076 
l_next(struct seq_file * m,void * v,loff_t * pos)7077 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7078 {
7079 	struct trace_array *tr = m->private;
7080 	struct trace_scratch *tscratch = tr->scratch;
7081 	unsigned int index = *pos;
7082 
7083 	(*pos)++;
7084 
7085 	if (*pos == 1)
7086 		return LAST_BOOT_HEADER;
7087 
7088 	/* Only show offsets of the last boot data */
7089 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7090 		return NULL;
7091 
7092 	/* *pos 0 is for the header, 1 is for the first module */
7093 	index--;
7094 
7095 	if (index >= tscratch->nr_entries)
7096 		return NULL;
7097 
7098 	return &tscratch->entries[index];
7099 }
7100 
l_start(struct seq_file * m,loff_t * pos)7101 static void *l_start(struct seq_file *m, loff_t *pos)
7102 {
7103 	mutex_lock(&scratch_mutex);
7104 
7105 	return l_next(m, NULL, pos);
7106 }
7107 
l_stop(struct seq_file * m,void * p)7108 static void l_stop(struct seq_file *m, void *p)
7109 {
7110 	mutex_unlock(&scratch_mutex);
7111 }
7112 
show_last_boot_header(struct seq_file * m,struct trace_array * tr)7113 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7114 {
7115 	struct trace_scratch *tscratch = tr->scratch;
7116 
7117 	/*
7118 	 * Do not leak KASLR address. This only shows the KASLR address of
7119 	 * the last boot. When the ring buffer is started, the LAST_BOOT
7120 	 * flag gets cleared, and this should only report "current".
7121 	 * Otherwise it shows the KASLR address from the previous boot which
7122 	 * should not be the same as the current boot.
7123 	 */
7124 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7125 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7126 	else
7127 		seq_puts(m, "# Current\n");
7128 }
7129 
l_show(struct seq_file * m,void * v)7130 static int l_show(struct seq_file *m, void *v)
7131 {
7132 	struct trace_array *tr = m->private;
7133 	struct trace_mod_entry *entry = v;
7134 
7135 	if (v == LAST_BOOT_HEADER) {
7136 		show_last_boot_header(m, tr);
7137 		return 0;
7138 	}
7139 
7140 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7141 	return 0;
7142 }
7143 
7144 static const struct seq_operations last_boot_seq_ops = {
7145 	.start		= l_start,
7146 	.next		= l_next,
7147 	.stop		= l_stop,
7148 	.show		= l_show,
7149 };
7150 
tracing_last_boot_open(struct inode * inode,struct file * file)7151 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7152 {
7153 	struct trace_array *tr = inode->i_private;
7154 	struct seq_file *m;
7155 	int ret;
7156 
7157 	ret = tracing_check_open_get_tr(tr);
7158 	if (ret)
7159 		return ret;
7160 
7161 	ret = seq_open(file, &last_boot_seq_ops);
7162 	if (ret) {
7163 		trace_array_put(tr);
7164 		return ret;
7165 	}
7166 
7167 	m = file->private_data;
7168 	m->private = tr;
7169 
7170 	return 0;
7171 }
7172 
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7173 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7174 {
7175 	struct trace_array *tr = inode->i_private;
7176 	int cpu = tracing_get_cpu(inode);
7177 	int ret;
7178 
7179 	ret = tracing_check_open_get_tr(tr);
7180 	if (ret)
7181 		return ret;
7182 
7183 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7184 	if (ret < 0)
7185 		__trace_array_put(tr);
7186 	return ret;
7187 }
7188 
7189 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7190 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7191 			  size_t cnt, loff_t *ppos)
7192 {
7193 	/*
7194 	 * There is no need to read what the user has written, this function
7195 	 * is just to make sure that there is no error when "echo" is used
7196 	 */
7197 
7198 	*ppos += cnt;
7199 
7200 	return cnt;
7201 }
7202 
7203 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7204 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7205 {
7206 	struct trace_array *tr = inode->i_private;
7207 
7208 	/* disable tracing ? */
7209 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7210 		tracer_tracing_off(tr);
7211 	/* resize the ring buffer to 0 */
7212 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7213 
7214 	trace_array_put(tr);
7215 
7216 	return 0;
7217 }
7218 
7219 #define TRACE_MARKER_MAX_SIZE		4096
7220 
write_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt,unsigned long ip)7221 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
7222 				      size_t cnt, unsigned long ip)
7223 {
7224 	struct ring_buffer_event *event;
7225 	enum event_trigger_type tt = ETT_NONE;
7226 	struct trace_buffer *buffer;
7227 	struct print_entry *entry;
7228 	int meta_size;
7229 	ssize_t written;
7230 	size_t size;
7231 	int len;
7232 
7233 /* Used in tracing_mark_raw_write() as well */
7234 #define FAULTED_STR "<faulted>"
7235 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7236 
7237 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7238  again:
7239 	size = cnt + meta_size;
7240 
7241 	/* If less than "<faulted>", then make sure we can still add that */
7242 	if (cnt < FAULTED_SIZE)
7243 		size += FAULTED_SIZE - cnt;
7244 
7245 	buffer = tr->array_buffer.buffer;
7246 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7247 					    tracing_gen_ctx());
7248 	if (unlikely(!event)) {
7249 		/*
7250 		 * If the size was greater than what was allowed, then
7251 		 * make it smaller and try again.
7252 		 */
7253 		if (size > ring_buffer_max_event_size(buffer)) {
7254 			/* cnt < FAULTED size should never be bigger than max */
7255 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7256 				return -EBADF;
7257 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7258 			/* The above should only happen once */
7259 			if (WARN_ON_ONCE(cnt + meta_size == size))
7260 				return -EBADF;
7261 			goto again;
7262 		}
7263 
7264 		/* Ring buffer disabled, return as if not open for write */
7265 		return -EBADF;
7266 	}
7267 
7268 	entry = ring_buffer_event_data(event);
7269 	entry->ip = ip;
7270 
7271 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7272 	if (len) {
7273 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7274 		cnt = FAULTED_SIZE;
7275 		written = -EFAULT;
7276 	} else
7277 		written = cnt;
7278 
7279 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7280 		/* do not add \n before testing triggers, but add \0 */
7281 		entry->buf[cnt] = '\0';
7282 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7283 	}
7284 
7285 	if (entry->buf[cnt - 1] != '\n') {
7286 		entry->buf[cnt] = '\n';
7287 		entry->buf[cnt + 1] = '\0';
7288 	} else
7289 		entry->buf[cnt] = '\0';
7290 
7291 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7292 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7293 	__buffer_unlock_commit(buffer, event);
7294 
7295 	if (tt)
7296 		event_triggers_post_call(tr->trace_marker_file, tt);
7297 
7298 	return written;
7299 }
7300 
7301 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7302 tracing_mark_write(struct file *filp, const char __user *ubuf,
7303 					size_t cnt, loff_t *fpos)
7304 {
7305 	struct trace_array *tr = filp->private_data;
7306 	ssize_t written = -ENODEV;
7307 	unsigned long ip;
7308 
7309 	if (tracing_disabled)
7310 		return -EINVAL;
7311 
7312 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7313 		return -EINVAL;
7314 
7315 	if ((ssize_t)cnt < 0)
7316 		return -EINVAL;
7317 
7318 	if (cnt > TRACE_MARKER_MAX_SIZE)
7319 		cnt = TRACE_MARKER_MAX_SIZE;
7320 
7321 	/* The selftests expect this function to be the IP address */
7322 	ip = _THIS_IP_;
7323 
7324 	/* The global trace_marker can go to multiple instances */
7325 	if (tr == &global_trace) {
7326 		guard(rcu)();
7327 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7328 			written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7329 			if (written < 0)
7330 				break;
7331 		}
7332 	} else {
7333 		written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7334 	}
7335 
7336 	return written;
7337 }
7338 
write_raw_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt)7339 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7340 					  const char __user *ubuf, size_t cnt)
7341 {
7342 	struct ring_buffer_event *event;
7343 	struct trace_buffer *buffer;
7344 	struct raw_data_entry *entry;
7345 	ssize_t written;
7346 	int size;
7347 	int len;
7348 
7349 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7350 
7351 	size = sizeof(*entry) + cnt;
7352 	if (cnt < FAULT_SIZE_ID)
7353 		size += FAULT_SIZE_ID - cnt;
7354 
7355 	buffer = tr->array_buffer.buffer;
7356 
7357 	if (size > ring_buffer_max_event_size(buffer))
7358 		return -EINVAL;
7359 
7360 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7361 					    tracing_gen_ctx());
7362 	if (!event)
7363 		/* Ring buffer disabled, return as if not open for write */
7364 		return -EBADF;
7365 
7366 	entry = ring_buffer_event_data(event);
7367 
7368 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7369 	if (len) {
7370 		entry->id = -1;
7371 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7372 		written = -EFAULT;
7373 	} else
7374 		written = cnt;
7375 
7376 	__buffer_unlock_commit(buffer, event);
7377 
7378 	return written;
7379 }
7380 
7381 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7382 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7383 					size_t cnt, loff_t *fpos)
7384 {
7385 	struct trace_array *tr = filp->private_data;
7386 	ssize_t written = -ENODEV;
7387 
7388 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7389 
7390 	if (tracing_disabled)
7391 		return -EINVAL;
7392 
7393 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7394 		return -EINVAL;
7395 
7396 	/* The marker must at least have a tag id */
7397 	if (cnt < sizeof(unsigned int))
7398 		return -EINVAL;
7399 
7400 	/* The global trace_marker_raw can go to multiple instances */
7401 	if (tr == &global_trace) {
7402 		guard(rcu)();
7403 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7404 			written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7405 			if (written < 0)
7406 				break;
7407 		}
7408 	} else {
7409 		written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7410 	}
7411 
7412 	return written;
7413 }
7414 
tracing_clock_show(struct seq_file * m,void * v)7415 static int tracing_clock_show(struct seq_file *m, void *v)
7416 {
7417 	struct trace_array *tr = m->private;
7418 	int i;
7419 
7420 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7421 		seq_printf(m,
7422 			"%s%s%s%s", i ? " " : "",
7423 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7424 			i == tr->clock_id ? "]" : "");
7425 	seq_putc(m, '\n');
7426 
7427 	return 0;
7428 }
7429 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7430 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7431 {
7432 	int i;
7433 
7434 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7435 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7436 			break;
7437 	}
7438 	if (i == ARRAY_SIZE(trace_clocks))
7439 		return -EINVAL;
7440 
7441 	mutex_lock(&trace_types_lock);
7442 
7443 	tr->clock_id = i;
7444 
7445 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7446 
7447 	/*
7448 	 * New clock may not be consistent with the previous clock.
7449 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7450 	 */
7451 	tracing_reset_online_cpus(&tr->array_buffer);
7452 
7453 #ifdef CONFIG_TRACER_MAX_TRACE
7454 	if (tr->max_buffer.buffer)
7455 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7456 	tracing_reset_online_cpus(&tr->max_buffer);
7457 #endif
7458 
7459 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7460 		struct trace_scratch *tscratch = tr->scratch;
7461 
7462 		tscratch->clock_id = i;
7463 	}
7464 
7465 	mutex_unlock(&trace_types_lock);
7466 
7467 	return 0;
7468 }
7469 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7470 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7471 				   size_t cnt, loff_t *fpos)
7472 {
7473 	struct seq_file *m = filp->private_data;
7474 	struct trace_array *tr = m->private;
7475 	char buf[64];
7476 	const char *clockstr;
7477 	int ret;
7478 
7479 	if (cnt >= sizeof(buf))
7480 		return -EINVAL;
7481 
7482 	if (copy_from_user(buf, ubuf, cnt))
7483 		return -EFAULT;
7484 
7485 	buf[cnt] = 0;
7486 
7487 	clockstr = strstrip(buf);
7488 
7489 	ret = tracing_set_clock(tr, clockstr);
7490 	if (ret)
7491 		return ret;
7492 
7493 	*fpos += cnt;
7494 
7495 	return cnt;
7496 }
7497 
tracing_clock_open(struct inode * inode,struct file * file)7498 static int tracing_clock_open(struct inode *inode, struct file *file)
7499 {
7500 	struct trace_array *tr = inode->i_private;
7501 	int ret;
7502 
7503 	ret = tracing_check_open_get_tr(tr);
7504 	if (ret)
7505 		return ret;
7506 
7507 	ret = single_open(file, tracing_clock_show, inode->i_private);
7508 	if (ret < 0)
7509 		trace_array_put(tr);
7510 
7511 	return ret;
7512 }
7513 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7514 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7515 {
7516 	struct trace_array *tr = m->private;
7517 
7518 	mutex_lock(&trace_types_lock);
7519 
7520 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7521 		seq_puts(m, "delta [absolute]\n");
7522 	else
7523 		seq_puts(m, "[delta] absolute\n");
7524 
7525 	mutex_unlock(&trace_types_lock);
7526 
7527 	return 0;
7528 }
7529 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7530 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7531 {
7532 	struct trace_array *tr = inode->i_private;
7533 	int ret;
7534 
7535 	ret = tracing_check_open_get_tr(tr);
7536 	if (ret)
7537 		return ret;
7538 
7539 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7540 	if (ret < 0)
7541 		trace_array_put(tr);
7542 
7543 	return ret;
7544 }
7545 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7546 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7547 {
7548 	if (rbe == this_cpu_read(trace_buffered_event))
7549 		return ring_buffer_time_stamp(buffer);
7550 
7551 	return ring_buffer_event_time_stamp(buffer, rbe);
7552 }
7553 
7554 /*
7555  * Set or disable using the per CPU trace_buffer_event when possible.
7556  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7557 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7558 {
7559 	guard(mutex)(&trace_types_lock);
7560 
7561 	if (set && tr->no_filter_buffering_ref++)
7562 		return 0;
7563 
7564 	if (!set) {
7565 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7566 			return -EINVAL;
7567 
7568 		--tr->no_filter_buffering_ref;
7569 	}
7570 
7571 	return 0;
7572 }
7573 
7574 struct ftrace_buffer_info {
7575 	struct trace_iterator	iter;
7576 	void			*spare;
7577 	unsigned int		spare_cpu;
7578 	unsigned int		spare_size;
7579 	unsigned int		read;
7580 };
7581 
7582 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7583 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7584 {
7585 	struct trace_array *tr = inode->i_private;
7586 	struct trace_iterator *iter;
7587 	struct seq_file *m;
7588 	int ret;
7589 
7590 	ret = tracing_check_open_get_tr(tr);
7591 	if (ret)
7592 		return ret;
7593 
7594 	if (file->f_mode & FMODE_READ) {
7595 		iter = __tracing_open(inode, file, true);
7596 		if (IS_ERR(iter))
7597 			ret = PTR_ERR(iter);
7598 	} else {
7599 		/* Writes still need the seq_file to hold the private data */
7600 		ret = -ENOMEM;
7601 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7602 		if (!m)
7603 			goto out;
7604 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7605 		if (!iter) {
7606 			kfree(m);
7607 			goto out;
7608 		}
7609 		ret = 0;
7610 
7611 		iter->tr = tr;
7612 		iter->array_buffer = &tr->max_buffer;
7613 		iter->cpu_file = tracing_get_cpu(inode);
7614 		m->private = iter;
7615 		file->private_data = m;
7616 	}
7617 out:
7618 	if (ret < 0)
7619 		trace_array_put(tr);
7620 
7621 	return ret;
7622 }
7623 
tracing_swap_cpu_buffer(void * tr)7624 static void tracing_swap_cpu_buffer(void *tr)
7625 {
7626 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7627 }
7628 
7629 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7630 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7631 		       loff_t *ppos)
7632 {
7633 	struct seq_file *m = filp->private_data;
7634 	struct trace_iterator *iter = m->private;
7635 	struct trace_array *tr = iter->tr;
7636 	unsigned long val;
7637 	int ret;
7638 
7639 	ret = tracing_update_buffers(tr);
7640 	if (ret < 0)
7641 		return ret;
7642 
7643 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7644 	if (ret)
7645 		return ret;
7646 
7647 	guard(mutex)(&trace_types_lock);
7648 
7649 	if (tr->current_trace->use_max_tr)
7650 		return -EBUSY;
7651 
7652 	local_irq_disable();
7653 	arch_spin_lock(&tr->max_lock);
7654 	if (tr->cond_snapshot)
7655 		ret = -EBUSY;
7656 	arch_spin_unlock(&tr->max_lock);
7657 	local_irq_enable();
7658 	if (ret)
7659 		return ret;
7660 
7661 	switch (val) {
7662 	case 0:
7663 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7664 			return -EINVAL;
7665 		if (tr->allocated_snapshot)
7666 			free_snapshot(tr);
7667 		break;
7668 	case 1:
7669 /* Only allow per-cpu swap if the ring buffer supports it */
7670 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7671 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7672 			return -EINVAL;
7673 #endif
7674 		if (tr->allocated_snapshot)
7675 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7676 					&tr->array_buffer, iter->cpu_file);
7677 
7678 		ret = tracing_arm_snapshot_locked(tr);
7679 		if (ret)
7680 			return ret;
7681 
7682 		/* Now, we're going to swap */
7683 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7684 			local_irq_disable();
7685 			update_max_tr(tr, current, smp_processor_id(), NULL);
7686 			local_irq_enable();
7687 		} else {
7688 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7689 						 (void *)tr, 1);
7690 		}
7691 		tracing_disarm_snapshot(tr);
7692 		break;
7693 	default:
7694 		if (tr->allocated_snapshot) {
7695 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7696 				tracing_reset_online_cpus(&tr->max_buffer);
7697 			else
7698 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7699 		}
7700 		break;
7701 	}
7702 
7703 	if (ret >= 0) {
7704 		*ppos += cnt;
7705 		ret = cnt;
7706 	}
7707 
7708 	return ret;
7709 }
7710 
tracing_snapshot_release(struct inode * inode,struct file * file)7711 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7712 {
7713 	struct seq_file *m = file->private_data;
7714 	int ret;
7715 
7716 	ret = tracing_release(inode, file);
7717 
7718 	if (file->f_mode & FMODE_READ)
7719 		return ret;
7720 
7721 	/* If write only, the seq_file is just a stub */
7722 	if (m)
7723 		kfree(m->private);
7724 	kfree(m);
7725 
7726 	return 0;
7727 }
7728 
7729 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7730 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7731 				    size_t count, loff_t *ppos);
7732 static int tracing_buffers_release(struct inode *inode, struct file *file);
7733 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7734 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7735 
snapshot_raw_open(struct inode * inode,struct file * filp)7736 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7737 {
7738 	struct ftrace_buffer_info *info;
7739 	int ret;
7740 
7741 	/* The following checks for tracefs lockdown */
7742 	ret = tracing_buffers_open(inode, filp);
7743 	if (ret < 0)
7744 		return ret;
7745 
7746 	info = filp->private_data;
7747 
7748 	if (info->iter.trace->use_max_tr) {
7749 		tracing_buffers_release(inode, filp);
7750 		return -EBUSY;
7751 	}
7752 
7753 	info->iter.snapshot = true;
7754 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7755 
7756 	return ret;
7757 }
7758 
7759 #endif /* CONFIG_TRACER_SNAPSHOT */
7760 
7761 
7762 static const struct file_operations tracing_thresh_fops = {
7763 	.open		= tracing_open_generic,
7764 	.read		= tracing_thresh_read,
7765 	.write		= tracing_thresh_write,
7766 	.llseek		= generic_file_llseek,
7767 };
7768 
7769 #ifdef CONFIG_TRACER_MAX_TRACE
7770 static const struct file_operations tracing_max_lat_fops = {
7771 	.open		= tracing_open_generic_tr,
7772 	.read		= tracing_max_lat_read,
7773 	.write		= tracing_max_lat_write,
7774 	.llseek		= generic_file_llseek,
7775 	.release	= tracing_release_generic_tr,
7776 };
7777 #endif
7778 
7779 static const struct file_operations set_tracer_fops = {
7780 	.open		= tracing_open_generic_tr,
7781 	.read		= tracing_set_trace_read,
7782 	.write		= tracing_set_trace_write,
7783 	.llseek		= generic_file_llseek,
7784 	.release	= tracing_release_generic_tr,
7785 };
7786 
7787 static const struct file_operations tracing_pipe_fops = {
7788 	.open		= tracing_open_pipe,
7789 	.poll		= tracing_poll_pipe,
7790 	.read		= tracing_read_pipe,
7791 	.splice_read	= tracing_splice_read_pipe,
7792 	.release	= tracing_release_pipe,
7793 };
7794 
7795 static const struct file_operations tracing_entries_fops = {
7796 	.open		= tracing_open_generic_tr,
7797 	.read		= tracing_entries_read,
7798 	.write		= tracing_entries_write,
7799 	.llseek		= generic_file_llseek,
7800 	.release	= tracing_release_generic_tr,
7801 };
7802 
7803 static const struct file_operations tracing_buffer_meta_fops = {
7804 	.open		= tracing_buffer_meta_open,
7805 	.read		= seq_read,
7806 	.llseek		= seq_lseek,
7807 	.release	= tracing_seq_release,
7808 };
7809 
7810 static const struct file_operations tracing_total_entries_fops = {
7811 	.open		= tracing_open_generic_tr,
7812 	.read		= tracing_total_entries_read,
7813 	.llseek		= generic_file_llseek,
7814 	.release	= tracing_release_generic_tr,
7815 };
7816 
7817 static const struct file_operations tracing_free_buffer_fops = {
7818 	.open		= tracing_open_generic_tr,
7819 	.write		= tracing_free_buffer_write,
7820 	.release	= tracing_free_buffer_release,
7821 };
7822 
7823 static const struct file_operations tracing_mark_fops = {
7824 	.open		= tracing_mark_open,
7825 	.write		= tracing_mark_write,
7826 	.release	= tracing_release_generic_tr,
7827 };
7828 
7829 static const struct file_operations tracing_mark_raw_fops = {
7830 	.open		= tracing_mark_open,
7831 	.write		= tracing_mark_raw_write,
7832 	.release	= tracing_release_generic_tr,
7833 };
7834 
7835 static const struct file_operations trace_clock_fops = {
7836 	.open		= tracing_clock_open,
7837 	.read		= seq_read,
7838 	.llseek		= seq_lseek,
7839 	.release	= tracing_single_release_tr,
7840 	.write		= tracing_clock_write,
7841 };
7842 
7843 static const struct file_operations trace_time_stamp_mode_fops = {
7844 	.open		= tracing_time_stamp_mode_open,
7845 	.read		= seq_read,
7846 	.llseek		= seq_lseek,
7847 	.release	= tracing_single_release_tr,
7848 };
7849 
7850 static const struct file_operations last_boot_fops = {
7851 	.open		= tracing_last_boot_open,
7852 	.read		= seq_read,
7853 	.llseek		= seq_lseek,
7854 	.release	= tracing_seq_release,
7855 };
7856 
7857 #ifdef CONFIG_TRACER_SNAPSHOT
7858 static const struct file_operations snapshot_fops = {
7859 	.open		= tracing_snapshot_open,
7860 	.read		= seq_read,
7861 	.write		= tracing_snapshot_write,
7862 	.llseek		= tracing_lseek,
7863 	.release	= tracing_snapshot_release,
7864 };
7865 
7866 static const struct file_operations snapshot_raw_fops = {
7867 	.open		= snapshot_raw_open,
7868 	.read		= tracing_buffers_read,
7869 	.release	= tracing_buffers_release,
7870 	.splice_read	= tracing_buffers_splice_read,
7871 };
7872 
7873 #endif /* CONFIG_TRACER_SNAPSHOT */
7874 
7875 /*
7876  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7877  * @filp: The active open file structure
7878  * @ubuf: The userspace provided buffer to read value into
7879  * @cnt: The maximum number of bytes to read
7880  * @ppos: The current "file" position
7881  *
7882  * This function implements the write interface for a struct trace_min_max_param.
7883  * The filp->private_data must point to a trace_min_max_param structure that
7884  * defines where to write the value, the min and the max acceptable values,
7885  * and a lock to protect the write.
7886  */
7887 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7888 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7889 {
7890 	struct trace_min_max_param *param = filp->private_data;
7891 	u64 val;
7892 	int err;
7893 
7894 	if (!param)
7895 		return -EFAULT;
7896 
7897 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7898 	if (err)
7899 		return err;
7900 
7901 	if (param->lock)
7902 		mutex_lock(param->lock);
7903 
7904 	if (param->min && val < *param->min)
7905 		err = -EINVAL;
7906 
7907 	if (param->max && val > *param->max)
7908 		err = -EINVAL;
7909 
7910 	if (!err)
7911 		*param->val = val;
7912 
7913 	if (param->lock)
7914 		mutex_unlock(param->lock);
7915 
7916 	if (err)
7917 		return err;
7918 
7919 	return cnt;
7920 }
7921 
7922 /*
7923  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7924  * @filp: The active open file structure
7925  * @ubuf: The userspace provided buffer to read value into
7926  * @cnt: The maximum number of bytes to read
7927  * @ppos: The current "file" position
7928  *
7929  * This function implements the read interface for a struct trace_min_max_param.
7930  * The filp->private_data must point to a trace_min_max_param struct with valid
7931  * data.
7932  */
7933 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7934 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7935 {
7936 	struct trace_min_max_param *param = filp->private_data;
7937 	char buf[U64_STR_SIZE];
7938 	int len;
7939 	u64 val;
7940 
7941 	if (!param)
7942 		return -EFAULT;
7943 
7944 	val = *param->val;
7945 
7946 	if (cnt > sizeof(buf))
7947 		cnt = sizeof(buf);
7948 
7949 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7950 
7951 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7952 }
7953 
7954 const struct file_operations trace_min_max_fops = {
7955 	.open		= tracing_open_generic,
7956 	.read		= trace_min_max_read,
7957 	.write		= trace_min_max_write,
7958 };
7959 
7960 #define TRACING_LOG_ERRS_MAX	8
7961 #define TRACING_LOG_LOC_MAX	128
7962 
7963 #define CMD_PREFIX "  Command: "
7964 
7965 struct err_info {
7966 	const char	**errs;	/* ptr to loc-specific array of err strings */
7967 	u8		type;	/* index into errs -> specific err string */
7968 	u16		pos;	/* caret position */
7969 	u64		ts;
7970 };
7971 
7972 struct tracing_log_err {
7973 	struct list_head	list;
7974 	struct err_info		info;
7975 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7976 	char			*cmd;                     /* what caused err */
7977 };
7978 
7979 static DEFINE_MUTEX(tracing_err_log_lock);
7980 
alloc_tracing_log_err(int len)7981 static struct tracing_log_err *alloc_tracing_log_err(int len)
7982 {
7983 	struct tracing_log_err *err;
7984 
7985 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7986 	if (!err)
7987 		return ERR_PTR(-ENOMEM);
7988 
7989 	err->cmd = kzalloc(len, GFP_KERNEL);
7990 	if (!err->cmd) {
7991 		kfree(err);
7992 		return ERR_PTR(-ENOMEM);
7993 	}
7994 
7995 	return err;
7996 }
7997 
free_tracing_log_err(struct tracing_log_err * err)7998 static void free_tracing_log_err(struct tracing_log_err *err)
7999 {
8000 	kfree(err->cmd);
8001 	kfree(err);
8002 }
8003 
get_tracing_log_err(struct trace_array * tr,int len)8004 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8005 						   int len)
8006 {
8007 	struct tracing_log_err *err;
8008 	char *cmd;
8009 
8010 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8011 		err = alloc_tracing_log_err(len);
8012 		if (PTR_ERR(err) != -ENOMEM)
8013 			tr->n_err_log_entries++;
8014 
8015 		return err;
8016 	}
8017 	cmd = kzalloc(len, GFP_KERNEL);
8018 	if (!cmd)
8019 		return ERR_PTR(-ENOMEM);
8020 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8021 	kfree(err->cmd);
8022 	err->cmd = cmd;
8023 	list_del(&err->list);
8024 
8025 	return err;
8026 }
8027 
8028 /**
8029  * err_pos - find the position of a string within a command for error careting
8030  * @cmd: The tracing command that caused the error
8031  * @str: The string to position the caret at within @cmd
8032  *
8033  * Finds the position of the first occurrence of @str within @cmd.  The
8034  * return value can be passed to tracing_log_err() for caret placement
8035  * within @cmd.
8036  *
8037  * Returns the index within @cmd of the first occurrence of @str or 0
8038  * if @str was not found.
8039  */
err_pos(char * cmd,const char * str)8040 unsigned int err_pos(char *cmd, const char *str)
8041 {
8042 	char *found;
8043 
8044 	if (WARN_ON(!strlen(cmd)))
8045 		return 0;
8046 
8047 	found = strstr(cmd, str);
8048 	if (found)
8049 		return found - cmd;
8050 
8051 	return 0;
8052 }
8053 
8054 /**
8055  * tracing_log_err - write an error to the tracing error log
8056  * @tr: The associated trace array for the error (NULL for top level array)
8057  * @loc: A string describing where the error occurred
8058  * @cmd: The tracing command that caused the error
8059  * @errs: The array of loc-specific static error strings
8060  * @type: The index into errs[], which produces the specific static err string
8061  * @pos: The position the caret should be placed in the cmd
8062  *
8063  * Writes an error into tracing/error_log of the form:
8064  *
8065  * <loc>: error: <text>
8066  *   Command: <cmd>
8067  *              ^
8068  *
8069  * tracing/error_log is a small log file containing the last
8070  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8071  * unless there has been a tracing error, and the error log can be
8072  * cleared and have its memory freed by writing the empty string in
8073  * truncation mode to it i.e. echo > tracing/error_log.
8074  *
8075  * NOTE: the @errs array along with the @type param are used to
8076  * produce a static error string - this string is not copied and saved
8077  * when the error is logged - only a pointer to it is saved.  See
8078  * existing callers for examples of how static strings are typically
8079  * defined for use with tracing_log_err().
8080  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8081 void tracing_log_err(struct trace_array *tr,
8082 		     const char *loc, const char *cmd,
8083 		     const char **errs, u8 type, u16 pos)
8084 {
8085 	struct tracing_log_err *err;
8086 	int len = 0;
8087 
8088 	if (!tr)
8089 		tr = &global_trace;
8090 
8091 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8092 
8093 	guard(mutex)(&tracing_err_log_lock);
8094 
8095 	err = get_tracing_log_err(tr, len);
8096 	if (PTR_ERR(err) == -ENOMEM)
8097 		return;
8098 
8099 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8100 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8101 
8102 	err->info.errs = errs;
8103 	err->info.type = type;
8104 	err->info.pos = pos;
8105 	err->info.ts = local_clock();
8106 
8107 	list_add_tail(&err->list, &tr->err_log);
8108 }
8109 
clear_tracing_err_log(struct trace_array * tr)8110 static void clear_tracing_err_log(struct trace_array *tr)
8111 {
8112 	struct tracing_log_err *err, *next;
8113 
8114 	mutex_lock(&tracing_err_log_lock);
8115 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8116 		list_del(&err->list);
8117 		free_tracing_log_err(err);
8118 	}
8119 
8120 	tr->n_err_log_entries = 0;
8121 	mutex_unlock(&tracing_err_log_lock);
8122 }
8123 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8124 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8125 {
8126 	struct trace_array *tr = m->private;
8127 
8128 	mutex_lock(&tracing_err_log_lock);
8129 
8130 	return seq_list_start(&tr->err_log, *pos);
8131 }
8132 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8133 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8134 {
8135 	struct trace_array *tr = m->private;
8136 
8137 	return seq_list_next(v, &tr->err_log, pos);
8138 }
8139 
tracing_err_log_seq_stop(struct seq_file * m,void * v)8140 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8141 {
8142 	mutex_unlock(&tracing_err_log_lock);
8143 }
8144 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8145 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8146 {
8147 	u16 i;
8148 
8149 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8150 		seq_putc(m, ' ');
8151 	for (i = 0; i < pos; i++)
8152 		seq_putc(m, ' ');
8153 	seq_puts(m, "^\n");
8154 }
8155 
tracing_err_log_seq_show(struct seq_file * m,void * v)8156 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8157 {
8158 	struct tracing_log_err *err = v;
8159 
8160 	if (err) {
8161 		const char *err_text = err->info.errs[err->info.type];
8162 		u64 sec = err->info.ts;
8163 		u32 nsec;
8164 
8165 		nsec = do_div(sec, NSEC_PER_SEC);
8166 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8167 			   err->loc, err_text);
8168 		seq_printf(m, "%s", err->cmd);
8169 		tracing_err_log_show_pos(m, err->info.pos);
8170 	}
8171 
8172 	return 0;
8173 }
8174 
8175 static const struct seq_operations tracing_err_log_seq_ops = {
8176 	.start  = tracing_err_log_seq_start,
8177 	.next   = tracing_err_log_seq_next,
8178 	.stop   = tracing_err_log_seq_stop,
8179 	.show   = tracing_err_log_seq_show
8180 };
8181 
tracing_err_log_open(struct inode * inode,struct file * file)8182 static int tracing_err_log_open(struct inode *inode, struct file *file)
8183 {
8184 	struct trace_array *tr = inode->i_private;
8185 	int ret = 0;
8186 
8187 	ret = tracing_check_open_get_tr(tr);
8188 	if (ret)
8189 		return ret;
8190 
8191 	/* If this file was opened for write, then erase contents */
8192 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8193 		clear_tracing_err_log(tr);
8194 
8195 	if (file->f_mode & FMODE_READ) {
8196 		ret = seq_open(file, &tracing_err_log_seq_ops);
8197 		if (!ret) {
8198 			struct seq_file *m = file->private_data;
8199 			m->private = tr;
8200 		} else {
8201 			trace_array_put(tr);
8202 		}
8203 	}
8204 	return ret;
8205 }
8206 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8207 static ssize_t tracing_err_log_write(struct file *file,
8208 				     const char __user *buffer,
8209 				     size_t count, loff_t *ppos)
8210 {
8211 	return count;
8212 }
8213 
tracing_err_log_release(struct inode * inode,struct file * file)8214 static int tracing_err_log_release(struct inode *inode, struct file *file)
8215 {
8216 	struct trace_array *tr = inode->i_private;
8217 
8218 	trace_array_put(tr);
8219 
8220 	if (file->f_mode & FMODE_READ)
8221 		seq_release(inode, file);
8222 
8223 	return 0;
8224 }
8225 
8226 static const struct file_operations tracing_err_log_fops = {
8227 	.open           = tracing_err_log_open,
8228 	.write		= tracing_err_log_write,
8229 	.read           = seq_read,
8230 	.llseek         = tracing_lseek,
8231 	.release        = tracing_err_log_release,
8232 };
8233 
tracing_buffers_open(struct inode * inode,struct file * filp)8234 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8235 {
8236 	struct trace_array *tr = inode->i_private;
8237 	struct ftrace_buffer_info *info;
8238 	int ret;
8239 
8240 	ret = tracing_check_open_get_tr(tr);
8241 	if (ret)
8242 		return ret;
8243 
8244 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8245 	if (!info) {
8246 		trace_array_put(tr);
8247 		return -ENOMEM;
8248 	}
8249 
8250 	mutex_lock(&trace_types_lock);
8251 
8252 	info->iter.tr		= tr;
8253 	info->iter.cpu_file	= tracing_get_cpu(inode);
8254 	info->iter.trace	= tr->current_trace;
8255 	info->iter.array_buffer = &tr->array_buffer;
8256 	info->spare		= NULL;
8257 	/* Force reading ring buffer for first read */
8258 	info->read		= (unsigned int)-1;
8259 
8260 	filp->private_data = info;
8261 
8262 	tr->trace_ref++;
8263 
8264 	mutex_unlock(&trace_types_lock);
8265 
8266 	ret = nonseekable_open(inode, filp);
8267 	if (ret < 0)
8268 		trace_array_put(tr);
8269 
8270 	return ret;
8271 }
8272 
8273 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8274 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8275 {
8276 	struct ftrace_buffer_info *info = filp->private_data;
8277 	struct trace_iterator *iter = &info->iter;
8278 
8279 	return trace_poll(iter, filp, poll_table);
8280 }
8281 
8282 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8283 tracing_buffers_read(struct file *filp, char __user *ubuf,
8284 		     size_t count, loff_t *ppos)
8285 {
8286 	struct ftrace_buffer_info *info = filp->private_data;
8287 	struct trace_iterator *iter = &info->iter;
8288 	void *trace_data;
8289 	int page_size;
8290 	ssize_t ret = 0;
8291 	ssize_t size;
8292 
8293 	if (!count)
8294 		return 0;
8295 
8296 #ifdef CONFIG_TRACER_MAX_TRACE
8297 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8298 		return -EBUSY;
8299 #endif
8300 
8301 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8302 
8303 	/* Make sure the spare matches the current sub buffer size */
8304 	if (info->spare) {
8305 		if (page_size != info->spare_size) {
8306 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8307 						   info->spare_cpu, info->spare);
8308 			info->spare = NULL;
8309 		}
8310 	}
8311 
8312 	if (!info->spare) {
8313 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8314 							  iter->cpu_file);
8315 		if (IS_ERR(info->spare)) {
8316 			ret = PTR_ERR(info->spare);
8317 			info->spare = NULL;
8318 		} else {
8319 			info->spare_cpu = iter->cpu_file;
8320 			info->spare_size = page_size;
8321 		}
8322 	}
8323 	if (!info->spare)
8324 		return ret;
8325 
8326 	/* Do we have previous read data to read? */
8327 	if (info->read < page_size)
8328 		goto read;
8329 
8330  again:
8331 	trace_access_lock(iter->cpu_file);
8332 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8333 				    info->spare,
8334 				    count,
8335 				    iter->cpu_file, 0);
8336 	trace_access_unlock(iter->cpu_file);
8337 
8338 	if (ret < 0) {
8339 		if (trace_empty(iter) && !iter->closed) {
8340 			if (update_last_data_if_empty(iter->tr))
8341 				return 0;
8342 
8343 			if ((filp->f_flags & O_NONBLOCK))
8344 				return -EAGAIN;
8345 
8346 			ret = wait_on_pipe(iter, 0);
8347 			if (ret)
8348 				return ret;
8349 
8350 			goto again;
8351 		}
8352 		return 0;
8353 	}
8354 
8355 	info->read = 0;
8356  read:
8357 	size = page_size - info->read;
8358 	if (size > count)
8359 		size = count;
8360 	trace_data = ring_buffer_read_page_data(info->spare);
8361 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8362 	if (ret == size)
8363 		return -EFAULT;
8364 
8365 	size -= ret;
8366 
8367 	*ppos += size;
8368 	info->read += size;
8369 
8370 	return size;
8371 }
8372 
tracing_buffers_flush(struct file * file,fl_owner_t id)8373 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8374 {
8375 	struct ftrace_buffer_info *info = file->private_data;
8376 	struct trace_iterator *iter = &info->iter;
8377 
8378 	iter->closed = true;
8379 	/* Make sure the waiters see the new wait_index */
8380 	(void)atomic_fetch_inc_release(&iter->wait_index);
8381 
8382 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8383 
8384 	return 0;
8385 }
8386 
tracing_buffers_release(struct inode * inode,struct file * file)8387 static int tracing_buffers_release(struct inode *inode, struct file *file)
8388 {
8389 	struct ftrace_buffer_info *info = file->private_data;
8390 	struct trace_iterator *iter = &info->iter;
8391 
8392 	mutex_lock(&trace_types_lock);
8393 
8394 	iter->tr->trace_ref--;
8395 
8396 	__trace_array_put(iter->tr);
8397 
8398 	if (info->spare)
8399 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8400 					   info->spare_cpu, info->spare);
8401 	kvfree(info);
8402 
8403 	mutex_unlock(&trace_types_lock);
8404 
8405 	return 0;
8406 }
8407 
8408 struct buffer_ref {
8409 	struct trace_buffer	*buffer;
8410 	void			*page;
8411 	int			cpu;
8412 	refcount_t		refcount;
8413 };
8414 
buffer_ref_release(struct buffer_ref * ref)8415 static void buffer_ref_release(struct buffer_ref *ref)
8416 {
8417 	if (!refcount_dec_and_test(&ref->refcount))
8418 		return;
8419 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8420 	kfree(ref);
8421 }
8422 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8423 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8424 				    struct pipe_buffer *buf)
8425 {
8426 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8427 
8428 	buffer_ref_release(ref);
8429 	buf->private = 0;
8430 }
8431 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8432 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8433 				struct pipe_buffer *buf)
8434 {
8435 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8436 
8437 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8438 		return false;
8439 
8440 	refcount_inc(&ref->refcount);
8441 	return true;
8442 }
8443 
8444 /* Pipe buffer operations for a buffer. */
8445 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8446 	.release		= buffer_pipe_buf_release,
8447 	.get			= buffer_pipe_buf_get,
8448 };
8449 
8450 /*
8451  * Callback from splice_to_pipe(), if we need to release some pages
8452  * at the end of the spd in case we error'ed out in filling the pipe.
8453  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8454 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8455 {
8456 	struct buffer_ref *ref =
8457 		(struct buffer_ref *)spd->partial[i].private;
8458 
8459 	buffer_ref_release(ref);
8460 	spd->partial[i].private = 0;
8461 }
8462 
8463 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8464 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8465 			    struct pipe_inode_info *pipe, size_t len,
8466 			    unsigned int flags)
8467 {
8468 	struct ftrace_buffer_info *info = file->private_data;
8469 	struct trace_iterator *iter = &info->iter;
8470 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8471 	struct page *pages_def[PIPE_DEF_BUFFERS];
8472 	struct splice_pipe_desc spd = {
8473 		.pages		= pages_def,
8474 		.partial	= partial_def,
8475 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8476 		.ops		= &buffer_pipe_buf_ops,
8477 		.spd_release	= buffer_spd_release,
8478 	};
8479 	struct buffer_ref *ref;
8480 	bool woken = false;
8481 	int page_size;
8482 	int entries, i;
8483 	ssize_t ret = 0;
8484 
8485 #ifdef CONFIG_TRACER_MAX_TRACE
8486 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8487 		return -EBUSY;
8488 #endif
8489 
8490 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8491 	if (*ppos & (page_size - 1))
8492 		return -EINVAL;
8493 
8494 	if (len & (page_size - 1)) {
8495 		if (len < page_size)
8496 			return -EINVAL;
8497 		len &= (~(page_size - 1));
8498 	}
8499 
8500 	if (splice_grow_spd(pipe, &spd))
8501 		return -ENOMEM;
8502 
8503  again:
8504 	trace_access_lock(iter->cpu_file);
8505 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8506 
8507 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8508 		struct page *page;
8509 		int r;
8510 
8511 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8512 		if (!ref) {
8513 			ret = -ENOMEM;
8514 			break;
8515 		}
8516 
8517 		refcount_set(&ref->refcount, 1);
8518 		ref->buffer = iter->array_buffer->buffer;
8519 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8520 		if (IS_ERR(ref->page)) {
8521 			ret = PTR_ERR(ref->page);
8522 			ref->page = NULL;
8523 			kfree(ref);
8524 			break;
8525 		}
8526 		ref->cpu = iter->cpu_file;
8527 
8528 		r = ring_buffer_read_page(ref->buffer, ref->page,
8529 					  len, iter->cpu_file, 1);
8530 		if (r < 0) {
8531 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8532 						   ref->page);
8533 			kfree(ref);
8534 			break;
8535 		}
8536 
8537 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8538 
8539 		spd.pages[i] = page;
8540 		spd.partial[i].len = page_size;
8541 		spd.partial[i].offset = 0;
8542 		spd.partial[i].private = (unsigned long)ref;
8543 		spd.nr_pages++;
8544 		*ppos += page_size;
8545 
8546 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8547 	}
8548 
8549 	trace_access_unlock(iter->cpu_file);
8550 	spd.nr_pages = i;
8551 
8552 	/* did we read anything? */
8553 	if (!spd.nr_pages) {
8554 
8555 		if (ret)
8556 			goto out;
8557 
8558 		if (woken)
8559 			goto out;
8560 
8561 		ret = -EAGAIN;
8562 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8563 			goto out;
8564 
8565 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8566 		if (ret)
8567 			goto out;
8568 
8569 		/* No need to wait after waking up when tracing is off */
8570 		if (!tracer_tracing_is_on(iter->tr))
8571 			goto out;
8572 
8573 		/* Iterate one more time to collect any new data then exit */
8574 		woken = true;
8575 
8576 		goto again;
8577 	}
8578 
8579 	ret = splice_to_pipe(pipe, &spd);
8580 out:
8581 	splice_shrink_spd(&spd);
8582 
8583 	return ret;
8584 }
8585 
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8586 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8587 {
8588 	struct ftrace_buffer_info *info = file->private_data;
8589 	struct trace_iterator *iter = &info->iter;
8590 	int err;
8591 
8592 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8593 		if (!(file->f_flags & O_NONBLOCK)) {
8594 			err = ring_buffer_wait(iter->array_buffer->buffer,
8595 					       iter->cpu_file,
8596 					       iter->tr->buffer_percent,
8597 					       NULL, NULL);
8598 			if (err)
8599 				return err;
8600 		}
8601 
8602 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8603 						  iter->cpu_file);
8604 	} else if (cmd) {
8605 		return -ENOTTY;
8606 	}
8607 
8608 	/*
8609 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8610 	 * waiters
8611 	 */
8612 	mutex_lock(&trace_types_lock);
8613 
8614 	/* Make sure the waiters see the new wait_index */
8615 	(void)atomic_fetch_inc_release(&iter->wait_index);
8616 
8617 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8618 
8619 	mutex_unlock(&trace_types_lock);
8620 	return 0;
8621 }
8622 
8623 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8624 static int get_snapshot_map(struct trace_array *tr)
8625 {
8626 	int err = 0;
8627 
8628 	/*
8629 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8630 	 * take trace_types_lock. Instead use the specific
8631 	 * snapshot_trigger_lock.
8632 	 */
8633 	spin_lock(&tr->snapshot_trigger_lock);
8634 
8635 	if (tr->snapshot || tr->mapped == UINT_MAX)
8636 		err = -EBUSY;
8637 	else
8638 		tr->mapped++;
8639 
8640 	spin_unlock(&tr->snapshot_trigger_lock);
8641 
8642 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8643 	if (tr->mapped == 1)
8644 		synchronize_rcu();
8645 
8646 	return err;
8647 
8648 }
put_snapshot_map(struct trace_array * tr)8649 static void put_snapshot_map(struct trace_array *tr)
8650 {
8651 	spin_lock(&tr->snapshot_trigger_lock);
8652 	if (!WARN_ON(!tr->mapped))
8653 		tr->mapped--;
8654 	spin_unlock(&tr->snapshot_trigger_lock);
8655 }
8656 #else
get_snapshot_map(struct trace_array * tr)8657 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8658 static inline void put_snapshot_map(struct trace_array *tr) { }
8659 #endif
8660 
tracing_buffers_mmap_close(struct vm_area_struct * vma)8661 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8662 {
8663 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8664 	struct trace_iterator *iter = &info->iter;
8665 
8666 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8667 	put_snapshot_map(iter->tr);
8668 }
8669 
8670 static const struct vm_operations_struct tracing_buffers_vmops = {
8671 	.close		= tracing_buffers_mmap_close,
8672 };
8673 
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8674 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8675 {
8676 	struct ftrace_buffer_info *info = filp->private_data;
8677 	struct trace_iterator *iter = &info->iter;
8678 	int ret = 0;
8679 
8680 	/* A memmap'ed buffer is not supported for user space mmap */
8681 	if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8682 		return -ENODEV;
8683 
8684 	ret = get_snapshot_map(iter->tr);
8685 	if (ret)
8686 		return ret;
8687 
8688 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8689 	if (ret)
8690 		put_snapshot_map(iter->tr);
8691 
8692 	vma->vm_ops = &tracing_buffers_vmops;
8693 
8694 	return ret;
8695 }
8696 
8697 static const struct file_operations tracing_buffers_fops = {
8698 	.open		= tracing_buffers_open,
8699 	.read		= tracing_buffers_read,
8700 	.poll		= tracing_buffers_poll,
8701 	.release	= tracing_buffers_release,
8702 	.flush		= tracing_buffers_flush,
8703 	.splice_read	= tracing_buffers_splice_read,
8704 	.unlocked_ioctl = tracing_buffers_ioctl,
8705 	.mmap		= tracing_buffers_mmap,
8706 };
8707 
8708 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8709 tracing_stats_read(struct file *filp, char __user *ubuf,
8710 		   size_t count, loff_t *ppos)
8711 {
8712 	struct inode *inode = file_inode(filp);
8713 	struct trace_array *tr = inode->i_private;
8714 	struct array_buffer *trace_buf = &tr->array_buffer;
8715 	int cpu = tracing_get_cpu(inode);
8716 	struct trace_seq *s;
8717 	unsigned long cnt;
8718 	unsigned long long t;
8719 	unsigned long usec_rem;
8720 
8721 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8722 	if (!s)
8723 		return -ENOMEM;
8724 
8725 	trace_seq_init(s);
8726 
8727 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8728 	trace_seq_printf(s, "entries: %ld\n", cnt);
8729 
8730 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8731 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8732 
8733 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8734 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8735 
8736 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8737 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8738 
8739 	if (trace_clocks[tr->clock_id].in_ns) {
8740 		/* local or global for trace_clock */
8741 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8742 		usec_rem = do_div(t, USEC_PER_SEC);
8743 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8744 								t, usec_rem);
8745 
8746 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8747 		usec_rem = do_div(t, USEC_PER_SEC);
8748 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8749 	} else {
8750 		/* counter or tsc mode for trace_clock */
8751 		trace_seq_printf(s, "oldest event ts: %llu\n",
8752 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8753 
8754 		trace_seq_printf(s, "now ts: %llu\n",
8755 				ring_buffer_time_stamp(trace_buf->buffer));
8756 	}
8757 
8758 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8759 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8760 
8761 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8762 	trace_seq_printf(s, "read events: %ld\n", cnt);
8763 
8764 	count = simple_read_from_buffer(ubuf, count, ppos,
8765 					s->buffer, trace_seq_used(s));
8766 
8767 	kfree(s);
8768 
8769 	return count;
8770 }
8771 
8772 static const struct file_operations tracing_stats_fops = {
8773 	.open		= tracing_open_generic_tr,
8774 	.read		= tracing_stats_read,
8775 	.llseek		= generic_file_llseek,
8776 	.release	= tracing_release_generic_tr,
8777 };
8778 
8779 #ifdef CONFIG_DYNAMIC_FTRACE
8780 
8781 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8782 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8783 		  size_t cnt, loff_t *ppos)
8784 {
8785 	ssize_t ret;
8786 	char *buf;
8787 	int r;
8788 
8789 	/* 512 should be plenty to hold the amount needed */
8790 #define DYN_INFO_BUF_SIZE	512
8791 
8792 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8793 	if (!buf)
8794 		return -ENOMEM;
8795 
8796 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8797 		      "%ld pages:%ld groups: %ld\n"
8798 		      "ftrace boot update time = %llu (ns)\n"
8799 		      "ftrace module total update time = %llu (ns)\n",
8800 		      ftrace_update_tot_cnt,
8801 		      ftrace_number_of_pages,
8802 		      ftrace_number_of_groups,
8803 		      ftrace_update_time,
8804 		      ftrace_total_mod_time);
8805 
8806 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8807 	kfree(buf);
8808 	return ret;
8809 }
8810 
8811 static const struct file_operations tracing_dyn_info_fops = {
8812 	.open		= tracing_open_generic,
8813 	.read		= tracing_read_dyn_info,
8814 	.llseek		= generic_file_llseek,
8815 };
8816 #endif /* CONFIG_DYNAMIC_FTRACE */
8817 
8818 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8819 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8820 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8821 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8822 		void *data)
8823 {
8824 	tracing_snapshot_instance(tr);
8825 }
8826 
8827 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8828 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8829 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8830 		      void *data)
8831 {
8832 	struct ftrace_func_mapper *mapper = data;
8833 	long *count = NULL;
8834 
8835 	if (mapper)
8836 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8837 
8838 	if (count) {
8839 
8840 		if (*count <= 0)
8841 			return;
8842 
8843 		(*count)--;
8844 	}
8845 
8846 	tracing_snapshot_instance(tr);
8847 }
8848 
8849 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8850 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8851 		      struct ftrace_probe_ops *ops, void *data)
8852 {
8853 	struct ftrace_func_mapper *mapper = data;
8854 	long *count = NULL;
8855 
8856 	seq_printf(m, "%ps:", (void *)ip);
8857 
8858 	seq_puts(m, "snapshot");
8859 
8860 	if (mapper)
8861 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8862 
8863 	if (count)
8864 		seq_printf(m, ":count=%ld\n", *count);
8865 	else
8866 		seq_puts(m, ":unlimited\n");
8867 
8868 	return 0;
8869 }
8870 
8871 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8872 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8873 		     unsigned long ip, void *init_data, void **data)
8874 {
8875 	struct ftrace_func_mapper *mapper = *data;
8876 
8877 	if (!mapper) {
8878 		mapper = allocate_ftrace_func_mapper();
8879 		if (!mapper)
8880 			return -ENOMEM;
8881 		*data = mapper;
8882 	}
8883 
8884 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8885 }
8886 
8887 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8888 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8889 		     unsigned long ip, void *data)
8890 {
8891 	struct ftrace_func_mapper *mapper = data;
8892 
8893 	if (!ip) {
8894 		if (!mapper)
8895 			return;
8896 		free_ftrace_func_mapper(mapper, NULL);
8897 		return;
8898 	}
8899 
8900 	ftrace_func_mapper_remove_ip(mapper, ip);
8901 }
8902 
8903 static struct ftrace_probe_ops snapshot_probe_ops = {
8904 	.func			= ftrace_snapshot,
8905 	.print			= ftrace_snapshot_print,
8906 };
8907 
8908 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8909 	.func			= ftrace_count_snapshot,
8910 	.print			= ftrace_snapshot_print,
8911 	.init			= ftrace_snapshot_init,
8912 	.free			= ftrace_snapshot_free,
8913 };
8914 
8915 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8916 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8917 			       char *glob, char *cmd, char *param, int enable)
8918 {
8919 	struct ftrace_probe_ops *ops;
8920 	void *count = (void *)-1;
8921 	char *number;
8922 	int ret;
8923 
8924 	if (!tr)
8925 		return -ENODEV;
8926 
8927 	/* hash funcs only work with set_ftrace_filter */
8928 	if (!enable)
8929 		return -EINVAL;
8930 
8931 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8932 
8933 	if (glob[0] == '!') {
8934 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8935 		if (!ret)
8936 			tracing_disarm_snapshot(tr);
8937 
8938 		return ret;
8939 	}
8940 
8941 	if (!param)
8942 		goto out_reg;
8943 
8944 	number = strsep(&param, ":");
8945 
8946 	if (!strlen(number))
8947 		goto out_reg;
8948 
8949 	/*
8950 	 * We use the callback data field (which is a pointer)
8951 	 * as our counter.
8952 	 */
8953 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8954 	if (ret)
8955 		return ret;
8956 
8957  out_reg:
8958 	ret = tracing_arm_snapshot(tr);
8959 	if (ret < 0)
8960 		goto out;
8961 
8962 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8963 	if (ret < 0)
8964 		tracing_disarm_snapshot(tr);
8965  out:
8966 	return ret < 0 ? ret : 0;
8967 }
8968 
8969 static struct ftrace_func_command ftrace_snapshot_cmd = {
8970 	.name			= "snapshot",
8971 	.func			= ftrace_trace_snapshot_callback,
8972 };
8973 
register_snapshot_cmd(void)8974 static __init int register_snapshot_cmd(void)
8975 {
8976 	return register_ftrace_command(&ftrace_snapshot_cmd);
8977 }
8978 #else
register_snapshot_cmd(void)8979 static inline __init int register_snapshot_cmd(void) { return 0; }
8980 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8981 
tracing_get_dentry(struct trace_array * tr)8982 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8983 {
8984 	/* Top directory uses NULL as the parent */
8985 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8986 		return NULL;
8987 
8988 	if (WARN_ON(!tr->dir))
8989 		return ERR_PTR(-ENODEV);
8990 
8991 	/* All sub buffers have a descriptor */
8992 	return tr->dir;
8993 }
8994 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8995 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8996 {
8997 	struct dentry *d_tracer;
8998 
8999 	if (tr->percpu_dir)
9000 		return tr->percpu_dir;
9001 
9002 	d_tracer = tracing_get_dentry(tr);
9003 	if (IS_ERR(d_tracer))
9004 		return NULL;
9005 
9006 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
9007 
9008 	MEM_FAIL(!tr->percpu_dir,
9009 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
9010 
9011 	return tr->percpu_dir;
9012 }
9013 
9014 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)9015 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
9016 		      void *data, long cpu, const struct file_operations *fops)
9017 {
9018 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
9019 
9020 	if (ret) /* See tracing_get_cpu() */
9021 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
9022 	return ret;
9023 }
9024 
9025 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)9026 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
9027 {
9028 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
9029 	struct dentry *d_cpu;
9030 	char cpu_dir[30]; /* 30 characters should be more than enough */
9031 
9032 	if (!d_percpu)
9033 		return;
9034 
9035 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
9036 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
9037 	if (!d_cpu) {
9038 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
9039 		return;
9040 	}
9041 
9042 	/* per cpu trace_pipe */
9043 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
9044 				tr, cpu, &tracing_pipe_fops);
9045 
9046 	/* per cpu trace */
9047 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
9048 				tr, cpu, &tracing_fops);
9049 
9050 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
9051 				tr, cpu, &tracing_buffers_fops);
9052 
9053 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
9054 				tr, cpu, &tracing_stats_fops);
9055 
9056 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
9057 				tr, cpu, &tracing_entries_fops);
9058 
9059 	if (tr->range_addr_start)
9060 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
9061 				      tr, cpu, &tracing_buffer_meta_fops);
9062 #ifdef CONFIG_TRACER_SNAPSHOT
9063 	if (!tr->range_addr_start) {
9064 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9065 				      tr, cpu, &snapshot_fops);
9066 
9067 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9068 				      tr, cpu, &snapshot_raw_fops);
9069 	}
9070 #endif
9071 }
9072 
9073 #ifdef CONFIG_FTRACE_SELFTEST
9074 /* Let selftest have access to static functions in this file */
9075 #include "trace_selftest.c"
9076 #endif
9077 
9078 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9079 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9080 			loff_t *ppos)
9081 {
9082 	struct trace_option_dentry *topt = filp->private_data;
9083 	char *buf;
9084 
9085 	if (topt->flags->val & topt->opt->bit)
9086 		buf = "1\n";
9087 	else
9088 		buf = "0\n";
9089 
9090 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9091 }
9092 
9093 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9094 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9095 			 loff_t *ppos)
9096 {
9097 	struct trace_option_dentry *topt = filp->private_data;
9098 	unsigned long val;
9099 	int ret;
9100 
9101 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9102 	if (ret)
9103 		return ret;
9104 
9105 	if (val != 0 && val != 1)
9106 		return -EINVAL;
9107 
9108 	if (!!(topt->flags->val & topt->opt->bit) != val) {
9109 		mutex_lock(&trace_types_lock);
9110 		ret = __set_tracer_option(topt->tr, topt->flags,
9111 					  topt->opt, !val);
9112 		mutex_unlock(&trace_types_lock);
9113 		if (ret)
9114 			return ret;
9115 	}
9116 
9117 	*ppos += cnt;
9118 
9119 	return cnt;
9120 }
9121 
tracing_open_options(struct inode * inode,struct file * filp)9122 static int tracing_open_options(struct inode *inode, struct file *filp)
9123 {
9124 	struct trace_option_dentry *topt = inode->i_private;
9125 	int ret;
9126 
9127 	ret = tracing_check_open_get_tr(topt->tr);
9128 	if (ret)
9129 		return ret;
9130 
9131 	filp->private_data = inode->i_private;
9132 	return 0;
9133 }
9134 
tracing_release_options(struct inode * inode,struct file * file)9135 static int tracing_release_options(struct inode *inode, struct file *file)
9136 {
9137 	struct trace_option_dentry *topt = file->private_data;
9138 
9139 	trace_array_put(topt->tr);
9140 	return 0;
9141 }
9142 
9143 static const struct file_operations trace_options_fops = {
9144 	.open = tracing_open_options,
9145 	.read = trace_options_read,
9146 	.write = trace_options_write,
9147 	.llseek	= generic_file_llseek,
9148 	.release = tracing_release_options,
9149 };
9150 
9151 /*
9152  * In order to pass in both the trace_array descriptor as well as the index
9153  * to the flag that the trace option file represents, the trace_array
9154  * has a character array of trace_flags_index[], which holds the index
9155  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9156  * The address of this character array is passed to the flag option file
9157  * read/write callbacks.
9158  *
9159  * In order to extract both the index and the trace_array descriptor,
9160  * get_tr_index() uses the following algorithm.
9161  *
9162  *   idx = *ptr;
9163  *
9164  * As the pointer itself contains the address of the index (remember
9165  * index[1] == 1).
9166  *
9167  * Then to get the trace_array descriptor, by subtracting that index
9168  * from the ptr, we get to the start of the index itself.
9169  *
9170  *   ptr - idx == &index[0]
9171  *
9172  * Then a simple container_of() from that pointer gets us to the
9173  * trace_array descriptor.
9174  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9175 static void get_tr_index(void *data, struct trace_array **ptr,
9176 			 unsigned int *pindex)
9177 {
9178 	*pindex = *(unsigned char *)data;
9179 
9180 	*ptr = container_of(data - *pindex, struct trace_array,
9181 			    trace_flags_index);
9182 }
9183 
9184 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9185 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9186 			loff_t *ppos)
9187 {
9188 	void *tr_index = filp->private_data;
9189 	struct trace_array *tr;
9190 	unsigned int index;
9191 	char *buf;
9192 
9193 	get_tr_index(tr_index, &tr, &index);
9194 
9195 	if (tr->trace_flags & (1 << index))
9196 		buf = "1\n";
9197 	else
9198 		buf = "0\n";
9199 
9200 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9201 }
9202 
9203 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9204 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9205 			 loff_t *ppos)
9206 {
9207 	void *tr_index = filp->private_data;
9208 	struct trace_array *tr;
9209 	unsigned int index;
9210 	unsigned long val;
9211 	int ret;
9212 
9213 	get_tr_index(tr_index, &tr, &index);
9214 
9215 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9216 	if (ret)
9217 		return ret;
9218 
9219 	if (val != 0 && val != 1)
9220 		return -EINVAL;
9221 
9222 	mutex_lock(&event_mutex);
9223 	mutex_lock(&trace_types_lock);
9224 	ret = set_tracer_flag(tr, 1 << index, val);
9225 	mutex_unlock(&trace_types_lock);
9226 	mutex_unlock(&event_mutex);
9227 
9228 	if (ret < 0)
9229 		return ret;
9230 
9231 	*ppos += cnt;
9232 
9233 	return cnt;
9234 }
9235 
9236 static const struct file_operations trace_options_core_fops = {
9237 	.open = tracing_open_generic,
9238 	.read = trace_options_core_read,
9239 	.write = trace_options_core_write,
9240 	.llseek = generic_file_llseek,
9241 };
9242 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9243 struct dentry *trace_create_file(const char *name,
9244 				 umode_t mode,
9245 				 struct dentry *parent,
9246 				 void *data,
9247 				 const struct file_operations *fops)
9248 {
9249 	struct dentry *ret;
9250 
9251 	ret = tracefs_create_file(name, mode, parent, data, fops);
9252 	if (!ret)
9253 		pr_warn("Could not create tracefs '%s' entry\n", name);
9254 
9255 	return ret;
9256 }
9257 
9258 
trace_options_init_dentry(struct trace_array * tr)9259 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9260 {
9261 	struct dentry *d_tracer;
9262 
9263 	if (tr->options)
9264 		return tr->options;
9265 
9266 	d_tracer = tracing_get_dentry(tr);
9267 	if (IS_ERR(d_tracer))
9268 		return NULL;
9269 
9270 	tr->options = tracefs_create_dir("options", d_tracer);
9271 	if (!tr->options) {
9272 		pr_warn("Could not create tracefs directory 'options'\n");
9273 		return NULL;
9274 	}
9275 
9276 	return tr->options;
9277 }
9278 
9279 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9280 create_trace_option_file(struct trace_array *tr,
9281 			 struct trace_option_dentry *topt,
9282 			 struct tracer_flags *flags,
9283 			 struct tracer_opt *opt)
9284 {
9285 	struct dentry *t_options;
9286 
9287 	t_options = trace_options_init_dentry(tr);
9288 	if (!t_options)
9289 		return;
9290 
9291 	topt->flags = flags;
9292 	topt->opt = opt;
9293 	topt->tr = tr;
9294 
9295 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9296 					t_options, topt, &trace_options_fops);
9297 
9298 }
9299 
9300 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9301 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9302 {
9303 	struct trace_option_dentry *topts;
9304 	struct trace_options *tr_topts;
9305 	struct tracer_flags *flags;
9306 	struct tracer_opt *opts;
9307 	int cnt;
9308 	int i;
9309 
9310 	if (!tracer)
9311 		return;
9312 
9313 	flags = tracer->flags;
9314 
9315 	if (!flags || !flags->opts)
9316 		return;
9317 
9318 	/*
9319 	 * If this is an instance, only create flags for tracers
9320 	 * the instance may have.
9321 	 */
9322 	if (!trace_ok_for_array(tracer, tr))
9323 		return;
9324 
9325 	for (i = 0; i < tr->nr_topts; i++) {
9326 		/* Make sure there's no duplicate flags. */
9327 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9328 			return;
9329 	}
9330 
9331 	opts = flags->opts;
9332 
9333 	for (cnt = 0; opts[cnt].name; cnt++)
9334 		;
9335 
9336 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9337 	if (!topts)
9338 		return;
9339 
9340 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9341 			    GFP_KERNEL);
9342 	if (!tr_topts) {
9343 		kfree(topts);
9344 		return;
9345 	}
9346 
9347 	tr->topts = tr_topts;
9348 	tr->topts[tr->nr_topts].tracer = tracer;
9349 	tr->topts[tr->nr_topts].topts = topts;
9350 	tr->nr_topts++;
9351 
9352 	for (cnt = 0; opts[cnt].name; cnt++) {
9353 		create_trace_option_file(tr, &topts[cnt], flags,
9354 					 &opts[cnt]);
9355 		MEM_FAIL(topts[cnt].entry == NULL,
9356 			  "Failed to create trace option: %s",
9357 			  opts[cnt].name);
9358 	}
9359 }
9360 
9361 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9362 create_trace_option_core_file(struct trace_array *tr,
9363 			      const char *option, long index)
9364 {
9365 	struct dentry *t_options;
9366 
9367 	t_options = trace_options_init_dentry(tr);
9368 	if (!t_options)
9369 		return NULL;
9370 
9371 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9372 				 (void *)&tr->trace_flags_index[index],
9373 				 &trace_options_core_fops);
9374 }
9375 
create_trace_options_dir(struct trace_array * tr)9376 static void create_trace_options_dir(struct trace_array *tr)
9377 {
9378 	struct dentry *t_options;
9379 	bool top_level = tr == &global_trace;
9380 	int i;
9381 
9382 	t_options = trace_options_init_dentry(tr);
9383 	if (!t_options)
9384 		return;
9385 
9386 	for (i = 0; trace_options[i]; i++) {
9387 		if (top_level ||
9388 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9389 			create_trace_option_core_file(tr, trace_options[i], i);
9390 	}
9391 }
9392 
9393 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9394 rb_simple_read(struct file *filp, char __user *ubuf,
9395 	       size_t cnt, loff_t *ppos)
9396 {
9397 	struct trace_array *tr = filp->private_data;
9398 	char buf[64];
9399 	int r;
9400 
9401 	r = tracer_tracing_is_on(tr);
9402 	r = sprintf(buf, "%d\n", r);
9403 
9404 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9405 }
9406 
9407 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9408 rb_simple_write(struct file *filp, const char __user *ubuf,
9409 		size_t cnt, loff_t *ppos)
9410 {
9411 	struct trace_array *tr = filp->private_data;
9412 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9413 	unsigned long val;
9414 	int ret;
9415 
9416 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9417 	if (ret)
9418 		return ret;
9419 
9420 	if (buffer) {
9421 		mutex_lock(&trace_types_lock);
9422 		if (!!val == tracer_tracing_is_on(tr)) {
9423 			val = 0; /* do nothing */
9424 		} else if (val) {
9425 			tracer_tracing_on(tr);
9426 			if (tr->current_trace->start)
9427 				tr->current_trace->start(tr);
9428 		} else {
9429 			tracer_tracing_off(tr);
9430 			if (tr->current_trace->stop)
9431 				tr->current_trace->stop(tr);
9432 			/* Wake up any waiters */
9433 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9434 		}
9435 		mutex_unlock(&trace_types_lock);
9436 	}
9437 
9438 	(*ppos)++;
9439 
9440 	return cnt;
9441 }
9442 
9443 static const struct file_operations rb_simple_fops = {
9444 	.open		= tracing_open_generic_tr,
9445 	.read		= rb_simple_read,
9446 	.write		= rb_simple_write,
9447 	.release	= tracing_release_generic_tr,
9448 	.llseek		= default_llseek,
9449 };
9450 
9451 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9452 buffer_percent_read(struct file *filp, char __user *ubuf,
9453 		    size_t cnt, loff_t *ppos)
9454 {
9455 	struct trace_array *tr = filp->private_data;
9456 	char buf[64];
9457 	int r;
9458 
9459 	r = tr->buffer_percent;
9460 	r = sprintf(buf, "%d\n", r);
9461 
9462 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9463 }
9464 
9465 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9466 buffer_percent_write(struct file *filp, const char __user *ubuf,
9467 		     size_t cnt, loff_t *ppos)
9468 {
9469 	struct trace_array *tr = filp->private_data;
9470 	unsigned long val;
9471 	int ret;
9472 
9473 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9474 	if (ret)
9475 		return ret;
9476 
9477 	if (val > 100)
9478 		return -EINVAL;
9479 
9480 	tr->buffer_percent = val;
9481 
9482 	(*ppos)++;
9483 
9484 	return cnt;
9485 }
9486 
9487 static const struct file_operations buffer_percent_fops = {
9488 	.open		= tracing_open_generic_tr,
9489 	.read		= buffer_percent_read,
9490 	.write		= buffer_percent_write,
9491 	.release	= tracing_release_generic_tr,
9492 	.llseek		= default_llseek,
9493 };
9494 
9495 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9496 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9497 {
9498 	struct trace_array *tr = filp->private_data;
9499 	size_t size;
9500 	char buf[64];
9501 	int order;
9502 	int r;
9503 
9504 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9505 	size = (PAGE_SIZE << order) / 1024;
9506 
9507 	r = sprintf(buf, "%zd\n", size);
9508 
9509 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9510 }
9511 
9512 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9513 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9514 			 size_t cnt, loff_t *ppos)
9515 {
9516 	struct trace_array *tr = filp->private_data;
9517 	unsigned long val;
9518 	int old_order;
9519 	int order;
9520 	int pages;
9521 	int ret;
9522 
9523 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9524 	if (ret)
9525 		return ret;
9526 
9527 	val *= 1024; /* value passed in is in KB */
9528 
9529 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9530 	order = fls(pages - 1);
9531 
9532 	/* limit between 1 and 128 system pages */
9533 	if (order < 0 || order > 7)
9534 		return -EINVAL;
9535 
9536 	/* Do not allow tracing while changing the order of the ring buffer */
9537 	tracing_stop_tr(tr);
9538 
9539 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9540 	if (old_order == order)
9541 		goto out;
9542 
9543 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9544 	if (ret)
9545 		goto out;
9546 
9547 #ifdef CONFIG_TRACER_MAX_TRACE
9548 
9549 	if (!tr->allocated_snapshot)
9550 		goto out_max;
9551 
9552 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9553 	if (ret) {
9554 		/* Put back the old order */
9555 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9556 		if (WARN_ON_ONCE(cnt)) {
9557 			/*
9558 			 * AARGH! We are left with different orders!
9559 			 * The max buffer is our "snapshot" buffer.
9560 			 * When a tracer needs a snapshot (one of the
9561 			 * latency tracers), it swaps the max buffer
9562 			 * with the saved snap shot. We succeeded to
9563 			 * update the order of the main buffer, but failed to
9564 			 * update the order of the max buffer. But when we tried
9565 			 * to reset the main buffer to the original size, we
9566 			 * failed there too. This is very unlikely to
9567 			 * happen, but if it does, warn and kill all
9568 			 * tracing.
9569 			 */
9570 			tracing_disabled = 1;
9571 		}
9572 		goto out;
9573 	}
9574  out_max:
9575 #endif
9576 	(*ppos)++;
9577  out:
9578 	if (ret)
9579 		cnt = ret;
9580 	tracing_start_tr(tr);
9581 	return cnt;
9582 }
9583 
9584 static const struct file_operations buffer_subbuf_size_fops = {
9585 	.open		= tracing_open_generic_tr,
9586 	.read		= buffer_subbuf_size_read,
9587 	.write		= buffer_subbuf_size_write,
9588 	.release	= tracing_release_generic_tr,
9589 	.llseek		= default_llseek,
9590 };
9591 
9592 static struct dentry *trace_instance_dir;
9593 
9594 static void
9595 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9596 
9597 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)9598 static int make_mod_delta(struct module *mod, void *data)
9599 {
9600 	struct trace_module_delta *module_delta;
9601 	struct trace_scratch *tscratch;
9602 	struct trace_mod_entry *entry;
9603 	struct trace_array *tr = data;
9604 	int i;
9605 
9606 	tscratch = tr->scratch;
9607 	module_delta = READ_ONCE(tr->module_delta);
9608 	for (i = 0; i < tscratch->nr_entries; i++) {
9609 		entry = &tscratch->entries[i];
9610 		if (strcmp(mod->name, entry->mod_name))
9611 			continue;
9612 		if (mod->state == MODULE_STATE_GOING)
9613 			module_delta->delta[i] = 0;
9614 		else
9615 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9616 						 - entry->mod_addr;
9617 		break;
9618 	}
9619 	return 0;
9620 }
9621 #else
make_mod_delta(struct module * mod,void * data)9622 static int make_mod_delta(struct module *mod, void *data)
9623 {
9624 	return 0;
9625 }
9626 #endif
9627 
mod_addr_comp(const void * a,const void * b,const void * data)9628 static int mod_addr_comp(const void *a, const void *b, const void *data)
9629 {
9630 	const struct trace_mod_entry *e1 = a;
9631 	const struct trace_mod_entry *e2 = b;
9632 
9633 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9634 }
9635 
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)9636 static void setup_trace_scratch(struct trace_array *tr,
9637 				struct trace_scratch *tscratch, unsigned int size)
9638 {
9639 	struct trace_module_delta *module_delta;
9640 	struct trace_mod_entry *entry;
9641 	int i, nr_entries;
9642 
9643 	if (!tscratch)
9644 		return;
9645 
9646 	tr->scratch = tscratch;
9647 	tr->scratch_size = size;
9648 
9649 	if (tscratch->text_addr)
9650 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9651 
9652 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9653 		goto reset;
9654 
9655 	/* Check if each module name is a valid string */
9656 	for (i = 0; i < tscratch->nr_entries; i++) {
9657 		int n;
9658 
9659 		entry = &tscratch->entries[i];
9660 
9661 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9662 			if (entry->mod_name[n] == '\0')
9663 				break;
9664 			if (!isprint(entry->mod_name[n]))
9665 				goto reset;
9666 		}
9667 		if (n == MODULE_NAME_LEN)
9668 			goto reset;
9669 	}
9670 
9671 	/* Sort the entries so that we can find appropriate module from address. */
9672 	nr_entries = tscratch->nr_entries;
9673 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9674 	       mod_addr_comp, NULL, NULL);
9675 
9676 	if (IS_ENABLED(CONFIG_MODULES)) {
9677 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9678 		if (!module_delta) {
9679 			pr_info("module_delta allocation failed. Not able to decode module address.");
9680 			goto reset;
9681 		}
9682 		init_rcu_head(&module_delta->rcu);
9683 	} else
9684 		module_delta = NULL;
9685 	WRITE_ONCE(tr->module_delta, module_delta);
9686 
9687 	/* Scan modules to make text delta for modules. */
9688 	module_for_each_mod(make_mod_delta, tr);
9689 
9690 	/* Set trace_clock as the same of the previous boot. */
9691 	if (tscratch->clock_id != tr->clock_id) {
9692 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9693 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9694 			pr_info("the previous trace_clock info is not valid.");
9695 			goto reset;
9696 		}
9697 	}
9698 	return;
9699  reset:
9700 	/* Invalid trace modules */
9701 	memset(tscratch, 0, size);
9702 }
9703 
9704 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9705 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9706 {
9707 	enum ring_buffer_flags rb_flags;
9708 	struct trace_scratch *tscratch;
9709 	unsigned int scratch_size = 0;
9710 
9711 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9712 
9713 	buf->tr = tr;
9714 
9715 	if (tr->range_addr_start && tr->range_addr_size) {
9716 		/* Add scratch buffer to handle 128 modules */
9717 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9718 						      tr->range_addr_start,
9719 						      tr->range_addr_size,
9720 						      struct_size(tscratch, entries, 128));
9721 
9722 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9723 		setup_trace_scratch(tr, tscratch, scratch_size);
9724 
9725 		/*
9726 		 * This is basically the same as a mapped buffer,
9727 		 * with the same restrictions.
9728 		 */
9729 		tr->mapped++;
9730 	} else {
9731 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9732 	}
9733 	if (!buf->buffer)
9734 		return -ENOMEM;
9735 
9736 	buf->data = alloc_percpu(struct trace_array_cpu);
9737 	if (!buf->data) {
9738 		ring_buffer_free(buf->buffer);
9739 		buf->buffer = NULL;
9740 		return -ENOMEM;
9741 	}
9742 
9743 	/* Allocate the first page for all buffers */
9744 	set_buffer_entries(&tr->array_buffer,
9745 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9746 
9747 	return 0;
9748 }
9749 
free_trace_buffer(struct array_buffer * buf)9750 static void free_trace_buffer(struct array_buffer *buf)
9751 {
9752 	if (buf->buffer) {
9753 		ring_buffer_free(buf->buffer);
9754 		buf->buffer = NULL;
9755 		free_percpu(buf->data);
9756 		buf->data = NULL;
9757 	}
9758 }
9759 
allocate_trace_buffers(struct trace_array * tr,int size)9760 static int allocate_trace_buffers(struct trace_array *tr, int size)
9761 {
9762 	int ret;
9763 
9764 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9765 	if (ret)
9766 		return ret;
9767 
9768 #ifdef CONFIG_TRACER_MAX_TRACE
9769 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9770 	if (tr->range_addr_start)
9771 		return 0;
9772 
9773 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9774 				    allocate_snapshot ? size : 1);
9775 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9776 		free_trace_buffer(&tr->array_buffer);
9777 		return -ENOMEM;
9778 	}
9779 	tr->allocated_snapshot = allocate_snapshot;
9780 
9781 	allocate_snapshot = false;
9782 #endif
9783 
9784 	return 0;
9785 }
9786 
free_trace_buffers(struct trace_array * tr)9787 static void free_trace_buffers(struct trace_array *tr)
9788 {
9789 	if (!tr)
9790 		return;
9791 
9792 	free_trace_buffer(&tr->array_buffer);
9793 	kfree(tr->module_delta);
9794 
9795 #ifdef CONFIG_TRACER_MAX_TRACE
9796 	free_trace_buffer(&tr->max_buffer);
9797 #endif
9798 }
9799 
init_trace_flags_index(struct trace_array * tr)9800 static void init_trace_flags_index(struct trace_array *tr)
9801 {
9802 	int i;
9803 
9804 	/* Used by the trace options files */
9805 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9806 		tr->trace_flags_index[i] = i;
9807 }
9808 
__update_tracer_options(struct trace_array * tr)9809 static void __update_tracer_options(struct trace_array *tr)
9810 {
9811 	struct tracer *t;
9812 
9813 	for (t = trace_types; t; t = t->next)
9814 		add_tracer_options(tr, t);
9815 }
9816 
update_tracer_options(struct trace_array * tr)9817 static void update_tracer_options(struct trace_array *tr)
9818 {
9819 	mutex_lock(&trace_types_lock);
9820 	tracer_options_updated = true;
9821 	__update_tracer_options(tr);
9822 	mutex_unlock(&trace_types_lock);
9823 }
9824 
9825 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9826 struct trace_array *trace_array_find(const char *instance)
9827 {
9828 	struct trace_array *tr, *found = NULL;
9829 
9830 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9831 		if (tr->name && strcmp(tr->name, instance) == 0) {
9832 			found = tr;
9833 			break;
9834 		}
9835 	}
9836 
9837 	return found;
9838 }
9839 
trace_array_find_get(const char * instance)9840 struct trace_array *trace_array_find_get(const char *instance)
9841 {
9842 	struct trace_array *tr;
9843 
9844 	mutex_lock(&trace_types_lock);
9845 	tr = trace_array_find(instance);
9846 	if (tr)
9847 		tr->ref++;
9848 	mutex_unlock(&trace_types_lock);
9849 
9850 	return tr;
9851 }
9852 
trace_array_create_dir(struct trace_array * tr)9853 static int trace_array_create_dir(struct trace_array *tr)
9854 {
9855 	int ret;
9856 
9857 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9858 	if (!tr->dir)
9859 		return -EINVAL;
9860 
9861 	ret = event_trace_add_tracer(tr->dir, tr);
9862 	if (ret) {
9863 		tracefs_remove(tr->dir);
9864 		return ret;
9865 	}
9866 
9867 	init_tracer_tracefs(tr, tr->dir);
9868 	__update_tracer_options(tr);
9869 
9870 	return ret;
9871 }
9872 
9873 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9874 trace_array_create_systems(const char *name, const char *systems,
9875 			   unsigned long range_addr_start,
9876 			   unsigned long range_addr_size)
9877 {
9878 	struct trace_array *tr;
9879 	int ret;
9880 
9881 	ret = -ENOMEM;
9882 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9883 	if (!tr)
9884 		return ERR_PTR(ret);
9885 
9886 	tr->name = kstrdup(name, GFP_KERNEL);
9887 	if (!tr->name)
9888 		goto out_free_tr;
9889 
9890 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9891 		goto out_free_tr;
9892 
9893 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9894 		goto out_free_tr;
9895 
9896 	if (systems) {
9897 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9898 		if (!tr->system_names)
9899 			goto out_free_tr;
9900 	}
9901 
9902 	/* Only for boot up memory mapped ring buffers */
9903 	tr->range_addr_start = range_addr_start;
9904 	tr->range_addr_size = range_addr_size;
9905 
9906 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9907 
9908 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9909 
9910 	raw_spin_lock_init(&tr->start_lock);
9911 
9912 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9913 #ifdef CONFIG_TRACER_MAX_TRACE
9914 	spin_lock_init(&tr->snapshot_trigger_lock);
9915 #endif
9916 	tr->current_trace = &nop_trace;
9917 
9918 	INIT_LIST_HEAD(&tr->systems);
9919 	INIT_LIST_HEAD(&tr->events);
9920 	INIT_LIST_HEAD(&tr->hist_vars);
9921 	INIT_LIST_HEAD(&tr->err_log);
9922 	INIT_LIST_HEAD(&tr->marker_list);
9923 
9924 #ifdef CONFIG_MODULES
9925 	INIT_LIST_HEAD(&tr->mod_events);
9926 #endif
9927 
9928 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9929 		goto out_free_tr;
9930 
9931 	/* The ring buffer is defaultly expanded */
9932 	trace_set_ring_buffer_expanded(tr);
9933 
9934 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9935 		goto out_free_tr;
9936 
9937 	ftrace_init_trace_array(tr);
9938 
9939 	init_trace_flags_index(tr);
9940 
9941 	if (trace_instance_dir) {
9942 		ret = trace_array_create_dir(tr);
9943 		if (ret)
9944 			goto out_free_tr;
9945 	} else
9946 		__trace_early_add_events(tr);
9947 
9948 	list_add(&tr->list, &ftrace_trace_arrays);
9949 
9950 	tr->ref++;
9951 
9952 	return tr;
9953 
9954  out_free_tr:
9955 	ftrace_free_ftrace_ops(tr);
9956 	free_trace_buffers(tr);
9957 	free_cpumask_var(tr->pipe_cpumask);
9958 	free_cpumask_var(tr->tracing_cpumask);
9959 	kfree_const(tr->system_names);
9960 	kfree(tr->range_name);
9961 	kfree(tr->name);
9962 	kfree(tr);
9963 
9964 	return ERR_PTR(ret);
9965 }
9966 
trace_array_create(const char * name)9967 static struct trace_array *trace_array_create(const char *name)
9968 {
9969 	return trace_array_create_systems(name, NULL, 0, 0);
9970 }
9971 
instance_mkdir(const char * name)9972 static int instance_mkdir(const char *name)
9973 {
9974 	struct trace_array *tr;
9975 	int ret;
9976 
9977 	guard(mutex)(&event_mutex);
9978 	guard(mutex)(&trace_types_lock);
9979 
9980 	ret = -EEXIST;
9981 	if (trace_array_find(name))
9982 		return -EEXIST;
9983 
9984 	tr = trace_array_create(name);
9985 
9986 	ret = PTR_ERR_OR_ZERO(tr);
9987 
9988 	return ret;
9989 }
9990 
9991 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)9992 static u64 map_pages(unsigned long start, unsigned long size)
9993 {
9994 	unsigned long vmap_start, vmap_end;
9995 	struct vm_struct *area;
9996 	int ret;
9997 
9998 	area = get_vm_area(size, VM_IOREMAP);
9999 	if (!area)
10000 		return 0;
10001 
10002 	vmap_start = (unsigned long) area->addr;
10003 	vmap_end = vmap_start + size;
10004 
10005 	ret = vmap_page_range(vmap_start, vmap_end,
10006 			      start, pgprot_nx(PAGE_KERNEL));
10007 	if (ret < 0) {
10008 		free_vm_area(area);
10009 		return 0;
10010 	}
10011 
10012 	return (u64)vmap_start;
10013 }
10014 #else
map_pages(unsigned long start,unsigned long size)10015 static inline u64 map_pages(unsigned long start, unsigned long size)
10016 {
10017 	return 0;
10018 }
10019 #endif
10020 
10021 /**
10022  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
10023  * @name: The name of the trace array to be looked up/created.
10024  * @systems: A list of systems to create event directories for (NULL for all)
10025  *
10026  * Returns pointer to trace array with given name.
10027  * NULL, if it cannot be created.
10028  *
10029  * NOTE: This function increments the reference counter associated with the
10030  * trace array returned. This makes sure it cannot be freed while in use.
10031  * Use trace_array_put() once the trace array is no longer needed.
10032  * If the trace_array is to be freed, trace_array_destroy() needs to
10033  * be called after the trace_array_put(), or simply let user space delete
10034  * it from the tracefs instances directory. But until the
10035  * trace_array_put() is called, user space can not delete it.
10036  *
10037  */
trace_array_get_by_name(const char * name,const char * systems)10038 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
10039 {
10040 	struct trace_array *tr;
10041 
10042 	guard(mutex)(&event_mutex);
10043 	guard(mutex)(&trace_types_lock);
10044 
10045 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10046 		if (tr->name && strcmp(tr->name, name) == 0) {
10047 			tr->ref++;
10048 			return tr;
10049 		}
10050 	}
10051 
10052 	tr = trace_array_create_systems(name, systems, 0, 0);
10053 
10054 	if (IS_ERR(tr))
10055 		tr = NULL;
10056 	else
10057 		tr->ref++;
10058 
10059 	return tr;
10060 }
10061 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
10062 
__remove_instance(struct trace_array * tr)10063 static int __remove_instance(struct trace_array *tr)
10064 {
10065 	int i;
10066 
10067 	/* Reference counter for a newly created trace array = 1. */
10068 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10069 		return -EBUSY;
10070 
10071 	list_del(&tr->list);
10072 
10073 	/* Disable all the flags that were enabled coming in */
10074 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10075 		if ((1 << i) & ZEROED_TRACE_FLAGS)
10076 			set_tracer_flag(tr, 1 << i, 0);
10077 	}
10078 
10079 	if (printk_trace == tr)
10080 		update_printk_trace(&global_trace);
10081 
10082 	if (update_marker_trace(tr, 0))
10083 		synchronize_rcu();
10084 
10085 	tracing_set_nop(tr);
10086 	clear_ftrace_function_probes(tr);
10087 	event_trace_del_tracer(tr);
10088 	ftrace_clear_pids(tr);
10089 	ftrace_destroy_function_files(tr);
10090 	tracefs_remove(tr->dir);
10091 	free_percpu(tr->last_func_repeats);
10092 	free_trace_buffers(tr);
10093 	clear_tracing_err_log(tr);
10094 
10095 	if (tr->range_name) {
10096 		reserve_mem_release_by_name(tr->range_name);
10097 		kfree(tr->range_name);
10098 	}
10099 
10100 	for (i = 0; i < tr->nr_topts; i++) {
10101 		kfree(tr->topts[i].topts);
10102 	}
10103 	kfree(tr->topts);
10104 
10105 	free_cpumask_var(tr->pipe_cpumask);
10106 	free_cpumask_var(tr->tracing_cpumask);
10107 	kfree_const(tr->system_names);
10108 	kfree(tr->name);
10109 	kfree(tr);
10110 
10111 	return 0;
10112 }
10113 
trace_array_destroy(struct trace_array * this_tr)10114 int trace_array_destroy(struct trace_array *this_tr)
10115 {
10116 	struct trace_array *tr;
10117 
10118 	if (!this_tr)
10119 		return -EINVAL;
10120 
10121 	guard(mutex)(&event_mutex);
10122 	guard(mutex)(&trace_types_lock);
10123 
10124 
10125 	/* Making sure trace array exists before destroying it. */
10126 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10127 		if (tr == this_tr)
10128 			return __remove_instance(tr);
10129 	}
10130 
10131 	return -ENODEV;
10132 }
10133 EXPORT_SYMBOL_GPL(trace_array_destroy);
10134 
instance_rmdir(const char * name)10135 static int instance_rmdir(const char *name)
10136 {
10137 	struct trace_array *tr;
10138 
10139 	guard(mutex)(&event_mutex);
10140 	guard(mutex)(&trace_types_lock);
10141 
10142 	tr = trace_array_find(name);
10143 	if (!tr)
10144 		return -ENODEV;
10145 
10146 	return __remove_instance(tr);
10147 }
10148 
create_trace_instances(struct dentry * d_tracer)10149 static __init void create_trace_instances(struct dentry *d_tracer)
10150 {
10151 	struct trace_array *tr;
10152 
10153 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10154 							 instance_mkdir,
10155 							 instance_rmdir);
10156 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10157 		return;
10158 
10159 	guard(mutex)(&event_mutex);
10160 	guard(mutex)(&trace_types_lock);
10161 
10162 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10163 		if (!tr->name)
10164 			continue;
10165 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10166 			     "Failed to create instance directory\n"))
10167 			return;
10168 	}
10169 }
10170 
10171 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)10172 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10173 {
10174 	int cpu;
10175 
10176 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10177 			tr, &show_traces_fops);
10178 
10179 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10180 			tr, &set_tracer_fops);
10181 
10182 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10183 			  tr, &tracing_cpumask_fops);
10184 
10185 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10186 			  tr, &tracing_iter_fops);
10187 
10188 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10189 			  tr, &tracing_fops);
10190 
10191 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10192 			  tr, &tracing_pipe_fops);
10193 
10194 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10195 			  tr, &tracing_entries_fops);
10196 
10197 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10198 			  tr, &tracing_total_entries_fops);
10199 
10200 	trace_create_file("free_buffer", 0200, d_tracer,
10201 			  tr, &tracing_free_buffer_fops);
10202 
10203 	trace_create_file("trace_marker", 0220, d_tracer,
10204 			  tr, &tracing_mark_fops);
10205 
10206 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10207 
10208 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10209 			  tr, &tracing_mark_raw_fops);
10210 
10211 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10212 			  &trace_clock_fops);
10213 
10214 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10215 			  tr, &rb_simple_fops);
10216 
10217 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10218 			  &trace_time_stamp_mode_fops);
10219 
10220 	tr->buffer_percent = 50;
10221 
10222 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10223 			tr, &buffer_percent_fops);
10224 
10225 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10226 			  tr, &buffer_subbuf_size_fops);
10227 
10228 	create_trace_options_dir(tr);
10229 
10230 #ifdef CONFIG_TRACER_MAX_TRACE
10231 	trace_create_maxlat_file(tr, d_tracer);
10232 #endif
10233 
10234 	if (ftrace_create_function_files(tr, d_tracer))
10235 		MEM_FAIL(1, "Could not allocate function filter files");
10236 
10237 	if (tr->range_addr_start) {
10238 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10239 				  tr, &last_boot_fops);
10240 #ifdef CONFIG_TRACER_SNAPSHOT
10241 	} else {
10242 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10243 				  tr, &snapshot_fops);
10244 #endif
10245 	}
10246 
10247 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10248 			  tr, &tracing_err_log_fops);
10249 
10250 	for_each_tracing_cpu(cpu)
10251 		tracing_init_tracefs_percpu(tr, cpu);
10252 
10253 	ftrace_init_tracefs(tr, d_tracer);
10254 }
10255 
10256 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10257 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10258 {
10259 	struct vfsmount *mnt;
10260 	struct file_system_type *type;
10261 	struct fs_context *fc;
10262 	int ret;
10263 
10264 	/*
10265 	 * To maintain backward compatibility for tools that mount
10266 	 * debugfs to get to the tracing facility, tracefs is automatically
10267 	 * mounted to the debugfs/tracing directory.
10268 	 */
10269 	type = get_fs_type("tracefs");
10270 	if (!type)
10271 		return NULL;
10272 
10273 	fc = fs_context_for_submount(type, mntpt);
10274 	put_filesystem(type);
10275 	if (IS_ERR(fc))
10276 		return ERR_CAST(fc);
10277 
10278 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10279 
10280 	ret = vfs_parse_fs_string(fc, "source",
10281 				  "tracefs", strlen("tracefs"));
10282 	if (!ret)
10283 		mnt = fc_mount(fc);
10284 	else
10285 		mnt = ERR_PTR(ret);
10286 
10287 	put_fs_context(fc);
10288 	return mnt;
10289 }
10290 #endif
10291 
10292 /**
10293  * tracing_init_dentry - initialize top level trace array
10294  *
10295  * This is called when creating files or directories in the tracing
10296  * directory. It is called via fs_initcall() by any of the boot up code
10297  * and expects to return the dentry of the top level tracing directory.
10298  */
tracing_init_dentry(void)10299 int tracing_init_dentry(void)
10300 {
10301 	struct trace_array *tr = &global_trace;
10302 
10303 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10304 		pr_warn("Tracing disabled due to lockdown\n");
10305 		return -EPERM;
10306 	}
10307 
10308 	/* The top level trace array uses  NULL as parent */
10309 	if (tr->dir)
10310 		return 0;
10311 
10312 	if (WARN_ON(!tracefs_initialized()))
10313 		return -ENODEV;
10314 
10315 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10316 	/*
10317 	 * As there may still be users that expect the tracing
10318 	 * files to exist in debugfs/tracing, we must automount
10319 	 * the tracefs file system there, so older tools still
10320 	 * work with the newer kernel.
10321 	 */
10322 	tr->dir = debugfs_create_automount("tracing", NULL,
10323 					   trace_automount, NULL);
10324 #endif
10325 
10326 	return 0;
10327 }
10328 
10329 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10330 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10331 
10332 static struct workqueue_struct *eval_map_wq __initdata;
10333 static struct work_struct eval_map_work __initdata;
10334 static struct work_struct tracerfs_init_work __initdata;
10335 
eval_map_work_func(struct work_struct * work)10336 static void __init eval_map_work_func(struct work_struct *work)
10337 {
10338 	int len;
10339 
10340 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10341 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10342 }
10343 
trace_eval_init(void)10344 static int __init trace_eval_init(void)
10345 {
10346 	INIT_WORK(&eval_map_work, eval_map_work_func);
10347 
10348 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10349 	if (!eval_map_wq) {
10350 		pr_err("Unable to allocate eval_map_wq\n");
10351 		/* Do work here */
10352 		eval_map_work_func(&eval_map_work);
10353 		return -ENOMEM;
10354 	}
10355 
10356 	queue_work(eval_map_wq, &eval_map_work);
10357 	return 0;
10358 }
10359 
10360 subsys_initcall(trace_eval_init);
10361 
trace_eval_sync(void)10362 static int __init trace_eval_sync(void)
10363 {
10364 	/* Make sure the eval map updates are finished */
10365 	if (eval_map_wq)
10366 		destroy_workqueue(eval_map_wq);
10367 	return 0;
10368 }
10369 
10370 late_initcall_sync(trace_eval_sync);
10371 
10372 
10373 #ifdef CONFIG_MODULES
10374 
module_exists(const char * module)10375 bool module_exists(const char *module)
10376 {
10377 	/* All modules have the symbol __this_module */
10378 	static const char this_mod[] = "__this_module";
10379 	char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
10380 	unsigned long val;
10381 	int n;
10382 
10383 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10384 
10385 	if (n > sizeof(modname) - 1)
10386 		return false;
10387 
10388 	val = module_kallsyms_lookup_name(modname);
10389 	return val != 0;
10390 }
10391 
trace_module_add_evals(struct module * mod)10392 static void trace_module_add_evals(struct module *mod)
10393 {
10394 	/*
10395 	 * Modules with bad taint do not have events created, do
10396 	 * not bother with enums either.
10397 	 */
10398 	if (trace_module_has_bad_taint(mod))
10399 		return;
10400 
10401 	/* Even if no trace_evals, this need to sanitize field types. */
10402 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10403 }
10404 
10405 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10406 static void trace_module_remove_evals(struct module *mod)
10407 {
10408 	union trace_eval_map_item *map;
10409 	union trace_eval_map_item **last = &trace_eval_maps;
10410 
10411 	if (!mod->num_trace_evals)
10412 		return;
10413 
10414 	guard(mutex)(&trace_eval_mutex);
10415 
10416 	map = trace_eval_maps;
10417 
10418 	while (map) {
10419 		if (map->head.mod == mod)
10420 			break;
10421 		map = trace_eval_jmp_to_tail(map);
10422 		last = &map->tail.next;
10423 		map = map->tail.next;
10424 	}
10425 	if (!map)
10426 		return;
10427 
10428 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10429 	kfree(map);
10430 }
10431 #else
trace_module_remove_evals(struct module * mod)10432 static inline void trace_module_remove_evals(struct module *mod) { }
10433 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10434 
trace_module_record(struct module * mod,bool add)10435 static void trace_module_record(struct module *mod, bool add)
10436 {
10437 	struct trace_array *tr;
10438 	unsigned long flags;
10439 
10440 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10441 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10442 		/* Update any persistent trace array that has already been started */
10443 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10444 			guard(mutex)(&scratch_mutex);
10445 			save_mod(mod, tr);
10446 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10447 			/* Update delta if the module loaded in previous boot */
10448 			make_mod_delta(mod, tr);
10449 		}
10450 	}
10451 }
10452 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10453 static int trace_module_notify(struct notifier_block *self,
10454 			       unsigned long val, void *data)
10455 {
10456 	struct module *mod = data;
10457 
10458 	switch (val) {
10459 	case MODULE_STATE_COMING:
10460 		trace_module_add_evals(mod);
10461 		trace_module_record(mod, true);
10462 		break;
10463 	case MODULE_STATE_GOING:
10464 		trace_module_remove_evals(mod);
10465 		trace_module_record(mod, false);
10466 		break;
10467 	}
10468 
10469 	return NOTIFY_OK;
10470 }
10471 
10472 static struct notifier_block trace_module_nb = {
10473 	.notifier_call = trace_module_notify,
10474 	.priority = 0,
10475 };
10476 #endif /* CONFIG_MODULES */
10477 
tracer_init_tracefs_work_func(struct work_struct * work)10478 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10479 {
10480 
10481 	event_trace_init();
10482 
10483 	init_tracer_tracefs(&global_trace, NULL);
10484 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10485 
10486 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10487 			&global_trace, &tracing_thresh_fops);
10488 
10489 	trace_create_file("README", TRACE_MODE_READ, NULL,
10490 			NULL, &tracing_readme_fops);
10491 
10492 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10493 			NULL, &tracing_saved_cmdlines_fops);
10494 
10495 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10496 			  NULL, &tracing_saved_cmdlines_size_fops);
10497 
10498 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10499 			NULL, &tracing_saved_tgids_fops);
10500 
10501 	trace_create_eval_file(NULL);
10502 
10503 #ifdef CONFIG_MODULES
10504 	register_module_notifier(&trace_module_nb);
10505 #endif
10506 
10507 #ifdef CONFIG_DYNAMIC_FTRACE
10508 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10509 			NULL, &tracing_dyn_info_fops);
10510 #endif
10511 
10512 	create_trace_instances(NULL);
10513 
10514 	update_tracer_options(&global_trace);
10515 }
10516 
tracer_init_tracefs(void)10517 static __init int tracer_init_tracefs(void)
10518 {
10519 	int ret;
10520 
10521 	trace_access_lock_init();
10522 
10523 	ret = tracing_init_dentry();
10524 	if (ret)
10525 		return 0;
10526 
10527 	if (eval_map_wq) {
10528 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10529 		queue_work(eval_map_wq, &tracerfs_init_work);
10530 	} else {
10531 		tracer_init_tracefs_work_func(NULL);
10532 	}
10533 
10534 	rv_init_interface();
10535 
10536 	return 0;
10537 }
10538 
10539 fs_initcall(tracer_init_tracefs);
10540 
10541 static int trace_die_panic_handler(struct notifier_block *self,
10542 				unsigned long ev, void *unused);
10543 
10544 static struct notifier_block trace_panic_notifier = {
10545 	.notifier_call = trace_die_panic_handler,
10546 	.priority = INT_MAX - 1,
10547 };
10548 
10549 static struct notifier_block trace_die_notifier = {
10550 	.notifier_call = trace_die_panic_handler,
10551 	.priority = INT_MAX - 1,
10552 };
10553 
10554 /*
10555  * The idea is to execute the following die/panic callback early, in order
10556  * to avoid showing irrelevant information in the trace (like other panic
10557  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10558  * warnings get disabled (to prevent potential log flooding).
10559  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10560 static int trace_die_panic_handler(struct notifier_block *self,
10561 				unsigned long ev, void *unused)
10562 {
10563 	if (!ftrace_dump_on_oops_enabled())
10564 		return NOTIFY_DONE;
10565 
10566 	/* The die notifier requires DIE_OOPS to trigger */
10567 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10568 		return NOTIFY_DONE;
10569 
10570 	ftrace_dump(DUMP_PARAM);
10571 
10572 	return NOTIFY_DONE;
10573 }
10574 
10575 /*
10576  * printk is set to max of 1024, we really don't need it that big.
10577  * Nothing should be printing 1000 characters anyway.
10578  */
10579 #define TRACE_MAX_PRINT		1000
10580 
10581 /*
10582  * Define here KERN_TRACE so that we have one place to modify
10583  * it if we decide to change what log level the ftrace dump
10584  * should be at.
10585  */
10586 #define KERN_TRACE		KERN_EMERG
10587 
10588 void
trace_printk_seq(struct trace_seq * s)10589 trace_printk_seq(struct trace_seq *s)
10590 {
10591 	/* Probably should print a warning here. */
10592 	if (s->seq.len >= TRACE_MAX_PRINT)
10593 		s->seq.len = TRACE_MAX_PRINT;
10594 
10595 	/*
10596 	 * More paranoid code. Although the buffer size is set to
10597 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10598 	 * an extra layer of protection.
10599 	 */
10600 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10601 		s->seq.len = s->seq.size - 1;
10602 
10603 	/* should be zero ended, but we are paranoid. */
10604 	s->buffer[s->seq.len] = 0;
10605 
10606 	printk(KERN_TRACE "%s", s->buffer);
10607 
10608 	trace_seq_init(s);
10609 }
10610 
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10611 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10612 {
10613 	iter->tr = tr;
10614 	iter->trace = iter->tr->current_trace;
10615 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10616 	iter->array_buffer = &tr->array_buffer;
10617 
10618 	if (iter->trace && iter->trace->open)
10619 		iter->trace->open(iter);
10620 
10621 	/* Annotate start of buffers if we had overruns */
10622 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10623 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10624 
10625 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10626 	if (trace_clocks[iter->tr->clock_id].in_ns)
10627 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10628 
10629 	/* Can not use kmalloc for iter.temp and iter.fmt */
10630 	iter->temp = static_temp_buf;
10631 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10632 	iter->fmt = static_fmt_buf;
10633 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10634 }
10635 
trace_init_global_iter(struct trace_iterator * iter)10636 void trace_init_global_iter(struct trace_iterator *iter)
10637 {
10638 	trace_init_iter(iter, &global_trace);
10639 }
10640 
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10641 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10642 {
10643 	/* use static because iter can be a bit big for the stack */
10644 	static struct trace_iterator iter;
10645 	unsigned int old_userobj;
10646 	unsigned long flags;
10647 	int cnt = 0;
10648 
10649 	/*
10650 	 * Always turn off tracing when we dump.
10651 	 * We don't need to show trace output of what happens
10652 	 * between multiple crashes.
10653 	 *
10654 	 * If the user does a sysrq-z, then they can re-enable
10655 	 * tracing with echo 1 > tracing_on.
10656 	 */
10657 	tracer_tracing_off(tr);
10658 
10659 	local_irq_save(flags);
10660 
10661 	/* Simulate the iterator */
10662 	trace_init_iter(&iter, tr);
10663 
10664 	/* While dumping, do not allow the buffer to be enable */
10665 	tracer_tracing_disable(tr);
10666 
10667 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10668 
10669 	/* don't look at user memory in panic mode */
10670 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10671 
10672 	if (dump_mode == DUMP_ORIG)
10673 		iter.cpu_file = raw_smp_processor_id();
10674 	else
10675 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10676 
10677 	if (tr == &global_trace)
10678 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10679 	else
10680 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10681 
10682 	/* Did function tracer already get disabled? */
10683 	if (ftrace_is_dead()) {
10684 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10685 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10686 	}
10687 
10688 	/*
10689 	 * We need to stop all tracing on all CPUS to read
10690 	 * the next buffer. This is a bit expensive, but is
10691 	 * not done often. We fill all what we can read,
10692 	 * and then release the locks again.
10693 	 */
10694 
10695 	while (!trace_empty(&iter)) {
10696 
10697 		if (!cnt)
10698 			printk(KERN_TRACE "---------------------------------\n");
10699 
10700 		cnt++;
10701 
10702 		trace_iterator_reset(&iter);
10703 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10704 
10705 		if (trace_find_next_entry_inc(&iter) != NULL) {
10706 			int ret;
10707 
10708 			ret = print_trace_line(&iter);
10709 			if (ret != TRACE_TYPE_NO_CONSUME)
10710 				trace_consume(&iter);
10711 		}
10712 		touch_nmi_watchdog();
10713 
10714 		trace_printk_seq(&iter.seq);
10715 	}
10716 
10717 	if (!cnt)
10718 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10719 	else
10720 		printk(KERN_TRACE "---------------------------------\n");
10721 
10722 	tr->trace_flags |= old_userobj;
10723 
10724 	tracer_tracing_enable(tr);
10725 	local_irq_restore(flags);
10726 }
10727 
ftrace_dump_by_param(void)10728 static void ftrace_dump_by_param(void)
10729 {
10730 	bool first_param = true;
10731 	char dump_param[MAX_TRACER_SIZE];
10732 	char *buf, *token, *inst_name;
10733 	struct trace_array *tr;
10734 
10735 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10736 	buf = dump_param;
10737 
10738 	while ((token = strsep(&buf, ",")) != NULL) {
10739 		if (first_param) {
10740 			first_param = false;
10741 			if (!strcmp("0", token))
10742 				continue;
10743 			else if (!strcmp("1", token)) {
10744 				ftrace_dump_one(&global_trace, DUMP_ALL);
10745 				continue;
10746 			}
10747 			else if (!strcmp("2", token) ||
10748 			  !strcmp("orig_cpu", token)) {
10749 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10750 				continue;
10751 			}
10752 		}
10753 
10754 		inst_name = strsep(&token, "=");
10755 		tr = trace_array_find(inst_name);
10756 		if (!tr) {
10757 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10758 			continue;
10759 		}
10760 
10761 		if (token && (!strcmp("2", token) ||
10762 			  !strcmp("orig_cpu", token)))
10763 			ftrace_dump_one(tr, DUMP_ORIG);
10764 		else
10765 			ftrace_dump_one(tr, DUMP_ALL);
10766 	}
10767 }
10768 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10769 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10770 {
10771 	static atomic_t dump_running;
10772 
10773 	/* Only allow one dump user at a time. */
10774 	if (atomic_inc_return(&dump_running) != 1) {
10775 		atomic_dec(&dump_running);
10776 		return;
10777 	}
10778 
10779 	switch (oops_dump_mode) {
10780 	case DUMP_ALL:
10781 		ftrace_dump_one(&global_trace, DUMP_ALL);
10782 		break;
10783 	case DUMP_ORIG:
10784 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10785 		break;
10786 	case DUMP_PARAM:
10787 		ftrace_dump_by_param();
10788 		break;
10789 	case DUMP_NONE:
10790 		break;
10791 	default:
10792 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10793 		ftrace_dump_one(&global_trace, DUMP_ALL);
10794 	}
10795 
10796 	atomic_dec(&dump_running);
10797 }
10798 EXPORT_SYMBOL_GPL(ftrace_dump);
10799 
10800 #define WRITE_BUFSIZE  4096
10801 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10802 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10803 				size_t count, loff_t *ppos,
10804 				int (*createfn)(const char *))
10805 {
10806 	char *kbuf, *buf, *tmp;
10807 	int ret = 0;
10808 	size_t done = 0;
10809 	size_t size;
10810 
10811 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10812 	if (!kbuf)
10813 		return -ENOMEM;
10814 
10815 	while (done < count) {
10816 		size = count - done;
10817 
10818 		if (size >= WRITE_BUFSIZE)
10819 			size = WRITE_BUFSIZE - 1;
10820 
10821 		if (copy_from_user(kbuf, buffer + done, size)) {
10822 			ret = -EFAULT;
10823 			goto out;
10824 		}
10825 		kbuf[size] = '\0';
10826 		buf = kbuf;
10827 		do {
10828 			tmp = strchr(buf, '\n');
10829 			if (tmp) {
10830 				*tmp = '\0';
10831 				size = tmp - buf + 1;
10832 			} else {
10833 				size = strlen(buf);
10834 				if (done + size < count) {
10835 					if (buf != kbuf)
10836 						break;
10837 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10838 					pr_warn("Line length is too long: Should be less than %d\n",
10839 						WRITE_BUFSIZE - 2);
10840 					ret = -EINVAL;
10841 					goto out;
10842 				}
10843 			}
10844 			done += size;
10845 
10846 			/* Remove comments */
10847 			tmp = strchr(buf, '#');
10848 
10849 			if (tmp)
10850 				*tmp = '\0';
10851 
10852 			ret = createfn(buf);
10853 			if (ret)
10854 				goto out;
10855 			buf += size;
10856 
10857 		} while (done < count);
10858 	}
10859 	ret = done;
10860 
10861 out:
10862 	kfree(kbuf);
10863 
10864 	return ret;
10865 }
10866 
10867 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10868 __init static bool tr_needs_alloc_snapshot(const char *name)
10869 {
10870 	char *test;
10871 	int len = strlen(name);
10872 	bool ret;
10873 
10874 	if (!boot_snapshot_index)
10875 		return false;
10876 
10877 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10878 	    boot_snapshot_info[len] == '\t')
10879 		return true;
10880 
10881 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10882 	if (!test)
10883 		return false;
10884 
10885 	sprintf(test, "\t%s\t", name);
10886 	ret = strstr(boot_snapshot_info, test) == NULL;
10887 	kfree(test);
10888 	return ret;
10889 }
10890 
do_allocate_snapshot(const char * name)10891 __init static void do_allocate_snapshot(const char *name)
10892 {
10893 	if (!tr_needs_alloc_snapshot(name))
10894 		return;
10895 
10896 	/*
10897 	 * When allocate_snapshot is set, the next call to
10898 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10899 	 * will allocate the snapshot buffer. That will alse clear
10900 	 * this flag.
10901 	 */
10902 	allocate_snapshot = true;
10903 }
10904 #else
do_allocate_snapshot(const char * name)10905 static inline void do_allocate_snapshot(const char *name) { }
10906 #endif
10907 
enable_instances(void)10908 __init static void enable_instances(void)
10909 {
10910 	struct trace_array *tr;
10911 	bool memmap_area = false;
10912 	char *curr_str;
10913 	char *name;
10914 	char *str;
10915 	char *tok;
10916 
10917 	/* A tab is always appended */
10918 	boot_instance_info[boot_instance_index - 1] = '\0';
10919 	str = boot_instance_info;
10920 
10921 	while ((curr_str = strsep(&str, "\t"))) {
10922 		phys_addr_t start = 0;
10923 		phys_addr_t size = 0;
10924 		unsigned long addr = 0;
10925 		bool traceprintk = false;
10926 		bool traceoff = false;
10927 		char *flag_delim;
10928 		char *addr_delim;
10929 		char *rname __free(kfree) = NULL;
10930 
10931 		tok = strsep(&curr_str, ",");
10932 
10933 		flag_delim = strchr(tok, '^');
10934 		addr_delim = strchr(tok, '@');
10935 
10936 		if (addr_delim)
10937 			*addr_delim++ = '\0';
10938 
10939 		if (flag_delim)
10940 			*flag_delim++ = '\0';
10941 
10942 		name = tok;
10943 
10944 		if (flag_delim) {
10945 			char *flag;
10946 
10947 			while ((flag = strsep(&flag_delim, "^"))) {
10948 				if (strcmp(flag, "traceoff") == 0) {
10949 					traceoff = true;
10950 				} else if ((strcmp(flag, "printk") == 0) ||
10951 					   (strcmp(flag, "traceprintk") == 0) ||
10952 					   (strcmp(flag, "trace_printk") == 0)) {
10953 					traceprintk = true;
10954 				} else {
10955 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10956 						flag, name);
10957 				}
10958 			}
10959 		}
10960 
10961 		tok = addr_delim;
10962 		if (tok && isdigit(*tok)) {
10963 			start = memparse(tok, &tok);
10964 			if (!start) {
10965 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10966 					name);
10967 				continue;
10968 			}
10969 			if (*tok != ':') {
10970 				pr_warn("Tracing: No size specified for instance %s\n", name);
10971 				continue;
10972 			}
10973 			tok++;
10974 			size = memparse(tok, &tok);
10975 			if (!size) {
10976 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10977 					name);
10978 				continue;
10979 			}
10980 			memmap_area = true;
10981 		} else if (tok) {
10982 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10983 				start = 0;
10984 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10985 				continue;
10986 			}
10987 			rname = kstrdup(tok, GFP_KERNEL);
10988 		}
10989 
10990 		if (start) {
10991 			/* Start and size must be page aligned */
10992 			if (start & ~PAGE_MASK) {
10993 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10994 				continue;
10995 			}
10996 			if (size & ~PAGE_MASK) {
10997 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10998 				continue;
10999 			}
11000 
11001 			if (memmap_area)
11002 				addr = map_pages(start, size);
11003 			else
11004 				addr = (unsigned long)phys_to_virt(start);
11005 			if (addr) {
11006 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
11007 					name, &start, (unsigned long)size);
11008 			} else {
11009 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
11010 				continue;
11011 			}
11012 		} else {
11013 			/* Only non mapped buffers have snapshot buffers */
11014 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
11015 				do_allocate_snapshot(name);
11016 		}
11017 
11018 		tr = trace_array_create_systems(name, NULL, addr, size);
11019 		if (IS_ERR(tr)) {
11020 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
11021 			continue;
11022 		}
11023 
11024 		if (traceoff)
11025 			tracer_tracing_off(tr);
11026 
11027 		if (traceprintk)
11028 			update_printk_trace(tr);
11029 
11030 		/*
11031 		 * memmap'd buffers can not be freed.
11032 		 */
11033 		if (memmap_area) {
11034 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
11035 			tr->ref++;
11036 		}
11037 
11038 		if (start) {
11039 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
11040 			tr->range_name = no_free_ptr(rname);
11041 		}
11042 
11043 		while ((tok = strsep(&curr_str, ","))) {
11044 			early_enable_events(tr, tok, true);
11045 		}
11046 	}
11047 }
11048 
tracer_alloc_buffers(void)11049 __init static int tracer_alloc_buffers(void)
11050 {
11051 	int ring_buf_size;
11052 	int ret = -ENOMEM;
11053 
11054 
11055 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
11056 		pr_warn("Tracing disabled due to lockdown\n");
11057 		return -EPERM;
11058 	}
11059 
11060 	/*
11061 	 * Make sure we don't accidentally add more trace options
11062 	 * than we have bits for.
11063 	 */
11064 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
11065 
11066 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
11067 		goto out;
11068 
11069 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
11070 		goto out_free_buffer_mask;
11071 
11072 	/* Only allocate trace_printk buffers if a trace_printk exists */
11073 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11074 		/* Must be called before global_trace.buffer is allocated */
11075 		trace_printk_init_buffers();
11076 
11077 	/* To save memory, keep the ring buffer size to its minimum */
11078 	if (global_trace.ring_buffer_expanded)
11079 		ring_buf_size = trace_buf_size;
11080 	else
11081 		ring_buf_size = 1;
11082 
11083 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11084 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11085 
11086 	raw_spin_lock_init(&global_trace.start_lock);
11087 
11088 	/*
11089 	 * The prepare callbacks allocates some memory for the ring buffer. We
11090 	 * don't free the buffer if the CPU goes down. If we were to free
11091 	 * the buffer, then the user would lose any trace that was in the
11092 	 * buffer. The memory will be removed once the "instance" is removed.
11093 	 */
11094 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11095 				      "trace/RB:prepare", trace_rb_cpu_prepare,
11096 				      NULL);
11097 	if (ret < 0)
11098 		goto out_free_cpumask;
11099 	/* Used for event triggers */
11100 	ret = -ENOMEM;
11101 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11102 	if (!temp_buffer)
11103 		goto out_rm_hp_state;
11104 
11105 	if (trace_create_savedcmd() < 0)
11106 		goto out_free_temp_buffer;
11107 
11108 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11109 		goto out_free_savedcmd;
11110 
11111 	/* TODO: make the number of buffers hot pluggable with CPUS */
11112 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11113 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11114 		goto out_free_pipe_cpumask;
11115 	}
11116 	if (global_trace.buffer_disabled)
11117 		tracing_off();
11118 
11119 	if (trace_boot_clock) {
11120 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
11121 		if (ret < 0)
11122 			pr_warn("Trace clock %s not defined, going back to default\n",
11123 				trace_boot_clock);
11124 	}
11125 
11126 	/*
11127 	 * register_tracer() might reference current_trace, so it
11128 	 * needs to be set before we register anything. This is
11129 	 * just a bootstrap of current_trace anyway.
11130 	 */
11131 	global_trace.current_trace = &nop_trace;
11132 
11133 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11134 #ifdef CONFIG_TRACER_MAX_TRACE
11135 	spin_lock_init(&global_trace.snapshot_trigger_lock);
11136 #endif
11137 	ftrace_init_global_array_ops(&global_trace);
11138 
11139 #ifdef CONFIG_MODULES
11140 	INIT_LIST_HEAD(&global_trace.mod_events);
11141 #endif
11142 
11143 	init_trace_flags_index(&global_trace);
11144 
11145 	register_tracer(&nop_trace);
11146 
11147 	/* Function tracing may start here (via kernel command line) */
11148 	init_function_trace();
11149 
11150 	/* All seems OK, enable tracing */
11151 	tracing_disabled = 0;
11152 
11153 	atomic_notifier_chain_register(&panic_notifier_list,
11154 				       &trace_panic_notifier);
11155 
11156 	register_die_notifier(&trace_die_notifier);
11157 
11158 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11159 
11160 	INIT_LIST_HEAD(&global_trace.systems);
11161 	INIT_LIST_HEAD(&global_trace.events);
11162 	INIT_LIST_HEAD(&global_trace.hist_vars);
11163 	INIT_LIST_HEAD(&global_trace.err_log);
11164 	list_add(&global_trace.marker_list, &marker_copies);
11165 	list_add(&global_trace.list, &ftrace_trace_arrays);
11166 
11167 	apply_trace_boot_options();
11168 
11169 	register_snapshot_cmd();
11170 
11171 	return 0;
11172 
11173 out_free_pipe_cpumask:
11174 	free_cpumask_var(global_trace.pipe_cpumask);
11175 out_free_savedcmd:
11176 	trace_free_saved_cmdlines_buffer();
11177 out_free_temp_buffer:
11178 	ring_buffer_free(temp_buffer);
11179 out_rm_hp_state:
11180 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11181 out_free_cpumask:
11182 	free_cpumask_var(global_trace.tracing_cpumask);
11183 out_free_buffer_mask:
11184 	free_cpumask_var(tracing_buffer_mask);
11185 out:
11186 	return ret;
11187 }
11188 
11189 #ifdef CONFIG_FUNCTION_TRACER
11190 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11191 __init struct trace_array *trace_get_global_array(void)
11192 {
11193 	return &global_trace;
11194 }
11195 #endif
11196 
ftrace_boot_snapshot(void)11197 void __init ftrace_boot_snapshot(void)
11198 {
11199 #ifdef CONFIG_TRACER_MAX_TRACE
11200 	struct trace_array *tr;
11201 
11202 	if (!snapshot_at_boot)
11203 		return;
11204 
11205 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11206 		if (!tr->allocated_snapshot)
11207 			continue;
11208 
11209 		tracing_snapshot_instance(tr);
11210 		trace_array_puts(tr, "** Boot snapshot taken **\n");
11211 	}
11212 #endif
11213 }
11214 
early_trace_init(void)11215 void __init early_trace_init(void)
11216 {
11217 	if (tracepoint_printk) {
11218 		tracepoint_print_iter =
11219 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11220 		if (MEM_FAIL(!tracepoint_print_iter,
11221 			     "Failed to allocate trace iterator\n"))
11222 			tracepoint_printk = 0;
11223 		else
11224 			static_key_enable(&tracepoint_printk_key.key);
11225 	}
11226 	tracer_alloc_buffers();
11227 
11228 	init_events();
11229 }
11230 
trace_init(void)11231 void __init trace_init(void)
11232 {
11233 	trace_event_init();
11234 
11235 	if (boot_instance_index)
11236 		enable_instances();
11237 }
11238 
clear_boot_tracer(void)11239 __init static void clear_boot_tracer(void)
11240 {
11241 	/*
11242 	 * The default tracer at boot buffer is an init section.
11243 	 * This function is called in lateinit. If we did not
11244 	 * find the boot tracer, then clear it out, to prevent
11245 	 * later registration from accessing the buffer that is
11246 	 * about to be freed.
11247 	 */
11248 	if (!default_bootup_tracer)
11249 		return;
11250 
11251 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11252 	       default_bootup_tracer);
11253 	default_bootup_tracer = NULL;
11254 }
11255 
11256 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11257 __init static void tracing_set_default_clock(void)
11258 {
11259 	/* sched_clock_stable() is determined in late_initcall */
11260 	if (!trace_boot_clock && !sched_clock_stable()) {
11261 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11262 			pr_warn("Can not set tracing clock due to lockdown\n");
11263 			return;
11264 		}
11265 
11266 		printk(KERN_WARNING
11267 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11268 		       "If you want to keep using the local clock, then add:\n"
11269 		       "  \"trace_clock=local\"\n"
11270 		       "on the kernel command line\n");
11271 		tracing_set_clock(&global_trace, "global");
11272 	}
11273 }
11274 #else
tracing_set_default_clock(void)11275 static inline void tracing_set_default_clock(void) { }
11276 #endif
11277 
late_trace_init(void)11278 __init static int late_trace_init(void)
11279 {
11280 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11281 		static_key_disable(&tracepoint_printk_key.key);
11282 		tracepoint_printk = 0;
11283 	}
11284 
11285 	if (traceoff_after_boot)
11286 		tracing_off();
11287 
11288 	tracing_set_default_clock();
11289 	clear_boot_tracer();
11290 	return 0;
11291 }
11292 
11293 late_initcall_sync(late_trace_init);
11294