xref: /linux/kernel/trace/trace.c (revision 67029a49db6c1f21106a1b5fcdd0ea234a6e0711)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 #include <linux/fs_context.h>
55 
56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57 
58 #include "trace.h"
59 #include "trace_output.h"
60 
61 #ifdef CONFIG_FTRACE_STARTUP_TEST
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
disable_tracing_selftest(const char * reason)77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #else
85 #define tracing_selftest_running	0
86 #define tracing_selftest_disabled	0
87 #endif
88 
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95 
96 /* For tracers that don't implement custom flags */
97 static struct tracer_opt dummy_tracer_opt[] = {
98 	{ }
99 };
100 
101 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)102 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 {
104 	return 0;
105 }
106 
107 /*
108  * To prevent the comm cache from being overwritten when no
109  * tracing is active, only save the comm when a trace event
110  * occurred.
111  */
112 DEFINE_PER_CPU(bool, trace_taskinfo_save);
113 
114 /*
115  * Kill all tracing for good (never come back).
116  * It is initialized to 1 but will turn to zero if the initialization
117  * of the tracer is successful. But that is the only place that sets
118  * this back to zero.
119  */
120 static int tracing_disabled = 1;
121 
122 cpumask_var_t __read_mostly	tracing_buffer_mask;
123 
124 #define MAX_TRACER_SIZE		100
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  * Set instance name if you want to dump the specific trace instance
140  * Multiple instance dump is also supported, and instances are seperated
141  * by commas.
142  */
143 /* Set to string format zero to disable by default */
144 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
145 
146 /* When set, tracing will stop when a WARN*() is hit */
147 static int __disable_trace_on_warning;
148 
149 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
150 			     void *buffer, size_t *lenp, loff_t *ppos);
151 static const struct ctl_table trace_sysctl_table[] = {
152 	{
153 		.procname	= "ftrace_dump_on_oops",
154 		.data		= &ftrace_dump_on_oops,
155 		.maxlen		= MAX_TRACER_SIZE,
156 		.mode		= 0644,
157 		.proc_handler	= proc_dostring,
158 	},
159 	{
160 		.procname	= "traceoff_on_warning",
161 		.data		= &__disable_trace_on_warning,
162 		.maxlen		= sizeof(__disable_trace_on_warning),
163 		.mode		= 0644,
164 		.proc_handler	= proc_dointvec,
165 	},
166 	{
167 		.procname	= "tracepoint_printk",
168 		.data		= &tracepoint_printk,
169 		.maxlen		= sizeof(tracepoint_printk),
170 		.mode		= 0644,
171 		.proc_handler	= tracepoint_printk_sysctl,
172 	},
173 };
174 
init_trace_sysctls(void)175 static int __init init_trace_sysctls(void)
176 {
177 	register_sysctl_init("kernel", trace_sysctl_table);
178 	return 0;
179 }
180 subsys_initcall(init_trace_sysctls);
181 
182 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
183 /* Map of enums to their values, for "eval_map" file */
184 struct trace_eval_map_head {
185 	struct module			*mod;
186 	unsigned long			length;
187 };
188 
189 union trace_eval_map_item;
190 
191 struct trace_eval_map_tail {
192 	/*
193 	 * "end" is first and points to NULL as it must be different
194 	 * than "mod" or "eval_string"
195 	 */
196 	union trace_eval_map_item	*next;
197 	const char			*end;	/* points to NULL */
198 };
199 
200 static DEFINE_MUTEX(trace_eval_mutex);
201 
202 /*
203  * The trace_eval_maps are saved in an array with two extra elements,
204  * one at the beginning, and one at the end. The beginning item contains
205  * the count of the saved maps (head.length), and the module they
206  * belong to if not built in (head.mod). The ending item contains a
207  * pointer to the next array of saved eval_map items.
208  */
209 union trace_eval_map_item {
210 	struct trace_eval_map		map;
211 	struct trace_eval_map_head	head;
212 	struct trace_eval_map_tail	tail;
213 };
214 
215 static union trace_eval_map_item *trace_eval_maps;
216 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
217 
218 int tracing_set_tracer(struct trace_array *tr, const char *buf);
219 static void ftrace_trace_userstack(struct trace_array *tr,
220 				   struct trace_buffer *buffer,
221 				   unsigned int trace_ctx);
222 
223 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
224 static char *default_bootup_tracer;
225 
226 static bool allocate_snapshot;
227 static bool snapshot_at_boot;
228 
229 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_instance_index;
231 
232 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
233 static int boot_snapshot_index;
234 
set_cmdline_ftrace(char * str)235 static int __init set_cmdline_ftrace(char *str)
236 {
237 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
238 	default_bootup_tracer = bootup_tracer_buf;
239 	/* We are using ftrace early, expand it */
240 	trace_set_ring_buffer_expanded(NULL);
241 	return 1;
242 }
243 __setup("ftrace=", set_cmdline_ftrace);
244 
ftrace_dump_on_oops_enabled(void)245 int ftrace_dump_on_oops_enabled(void)
246 {
247 	if (!strcmp("0", ftrace_dump_on_oops))
248 		return 0;
249 	else
250 		return 1;
251 }
252 
set_ftrace_dump_on_oops(char * str)253 static int __init set_ftrace_dump_on_oops(char *str)
254 {
255 	if (!*str) {
256 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
257 		return 1;
258 	}
259 
260 	if (*str == ',') {
261 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
262 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
263 		return 1;
264 	}
265 
266 	if (*str++ == '=') {
267 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
268 		return 1;
269 	}
270 
271 	return 0;
272 }
273 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
274 
stop_trace_on_warning(char * str)275 static int __init stop_trace_on_warning(char *str)
276 {
277 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
278 		__disable_trace_on_warning = 1;
279 	return 1;
280 }
281 __setup("traceoff_on_warning", stop_trace_on_warning);
282 
boot_alloc_snapshot(char * str)283 static int __init boot_alloc_snapshot(char *str)
284 {
285 	char *slot = boot_snapshot_info + boot_snapshot_index;
286 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
287 	int ret;
288 
289 	if (str[0] == '=') {
290 		str++;
291 		if (strlen(str) >= left)
292 			return -1;
293 
294 		ret = snprintf(slot, left, "%s\t", str);
295 		boot_snapshot_index += ret;
296 	} else {
297 		allocate_snapshot = true;
298 		/* We also need the main ring buffer expanded */
299 		trace_set_ring_buffer_expanded(NULL);
300 	}
301 	return 1;
302 }
303 __setup("alloc_snapshot", boot_alloc_snapshot);
304 
305 
boot_snapshot(char * str)306 static int __init boot_snapshot(char *str)
307 {
308 	snapshot_at_boot = true;
309 	boot_alloc_snapshot(str);
310 	return 1;
311 }
312 __setup("ftrace_boot_snapshot", boot_snapshot);
313 
314 
boot_instance(char * str)315 static int __init boot_instance(char *str)
316 {
317 	char *slot = boot_instance_info + boot_instance_index;
318 	int left = sizeof(boot_instance_info) - boot_instance_index;
319 	int ret;
320 
321 	if (strlen(str) >= left)
322 		return -1;
323 
324 	ret = snprintf(slot, left, "%s\t", str);
325 	boot_instance_index += ret;
326 
327 	return 1;
328 }
329 __setup("trace_instance=", boot_instance);
330 
331 
332 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
333 
set_trace_boot_options(char * str)334 static int __init set_trace_boot_options(char *str)
335 {
336 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
337 	return 1;
338 }
339 __setup("trace_options=", set_trace_boot_options);
340 
341 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
342 static char *trace_boot_clock __initdata;
343 
set_trace_boot_clock(char * str)344 static int __init set_trace_boot_clock(char *str)
345 {
346 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
347 	trace_boot_clock = trace_boot_clock_buf;
348 	return 1;
349 }
350 __setup("trace_clock=", set_trace_boot_clock);
351 
set_tracepoint_printk(char * str)352 static int __init set_tracepoint_printk(char *str)
353 {
354 	/* Ignore the "tp_printk_stop_on_boot" param */
355 	if (*str == '_')
356 		return 0;
357 
358 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
359 		tracepoint_printk = 1;
360 	return 1;
361 }
362 __setup("tp_printk", set_tracepoint_printk);
363 
set_tracepoint_printk_stop(char * str)364 static int __init set_tracepoint_printk_stop(char *str)
365 {
366 	tracepoint_printk_stop_on_boot = true;
367 	return 1;
368 }
369 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
370 
set_traceoff_after_boot(char * str)371 static int __init set_traceoff_after_boot(char *str)
372 {
373 	traceoff_after_boot = true;
374 	return 1;
375 }
376 __setup("traceoff_after_boot", set_traceoff_after_boot);
377 
ns2usecs(u64 nsec)378 unsigned long long ns2usecs(u64 nsec)
379 {
380 	nsec += 500;
381 	do_div(nsec, 1000);
382 	return nsec;
383 }
384 
385 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)386 trace_process_export(struct trace_export *export,
387 	       struct ring_buffer_event *event, int flag)
388 {
389 	struct trace_entry *entry;
390 	unsigned int size = 0;
391 
392 	if (export->flags & flag) {
393 		entry = ring_buffer_event_data(event);
394 		size = ring_buffer_event_length(event);
395 		export->write(export, entry, size);
396 	}
397 }
398 
399 static DEFINE_MUTEX(ftrace_export_lock);
400 
401 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
402 
403 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
404 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
405 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
406 
ftrace_exports_enable(struct trace_export * export)407 static inline void ftrace_exports_enable(struct trace_export *export)
408 {
409 	if (export->flags & TRACE_EXPORT_FUNCTION)
410 		static_branch_inc(&trace_function_exports_enabled);
411 
412 	if (export->flags & TRACE_EXPORT_EVENT)
413 		static_branch_inc(&trace_event_exports_enabled);
414 
415 	if (export->flags & TRACE_EXPORT_MARKER)
416 		static_branch_inc(&trace_marker_exports_enabled);
417 }
418 
ftrace_exports_disable(struct trace_export * export)419 static inline void ftrace_exports_disable(struct trace_export *export)
420 {
421 	if (export->flags & TRACE_EXPORT_FUNCTION)
422 		static_branch_dec(&trace_function_exports_enabled);
423 
424 	if (export->flags & TRACE_EXPORT_EVENT)
425 		static_branch_dec(&trace_event_exports_enabled);
426 
427 	if (export->flags & TRACE_EXPORT_MARKER)
428 		static_branch_dec(&trace_marker_exports_enabled);
429 }
430 
ftrace_exports(struct ring_buffer_event * event,int flag)431 static void ftrace_exports(struct ring_buffer_event *event, int flag)
432 {
433 	struct trace_export *export;
434 
435 	guard(preempt_notrace)();
436 
437 	export = rcu_dereference_raw_check(ftrace_exports_list);
438 	while (export) {
439 		trace_process_export(export, event, flag);
440 		export = rcu_dereference_raw_check(export->next);
441 	}
442 }
443 
444 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)445 add_trace_export(struct trace_export **list, struct trace_export *export)
446 {
447 	rcu_assign_pointer(export->next, *list);
448 	/*
449 	 * We are entering export into the list but another
450 	 * CPU might be walking that list. We need to make sure
451 	 * the export->next pointer is valid before another CPU sees
452 	 * the export pointer included into the list.
453 	 */
454 	rcu_assign_pointer(*list, export);
455 }
456 
457 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)458 rm_trace_export(struct trace_export **list, struct trace_export *export)
459 {
460 	struct trace_export **p;
461 
462 	for (p = list; *p != NULL; p = &(*p)->next)
463 		if (*p == export)
464 			break;
465 
466 	if (*p != export)
467 		return -1;
468 
469 	rcu_assign_pointer(*p, (*p)->next);
470 
471 	return 0;
472 }
473 
474 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)475 add_ftrace_export(struct trace_export **list, struct trace_export *export)
476 {
477 	ftrace_exports_enable(export);
478 
479 	add_trace_export(list, export);
480 }
481 
482 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)483 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
484 {
485 	int ret;
486 
487 	ret = rm_trace_export(list, export);
488 	ftrace_exports_disable(export);
489 
490 	return ret;
491 }
492 
register_ftrace_export(struct trace_export * export)493 int register_ftrace_export(struct trace_export *export)
494 {
495 	if (WARN_ON_ONCE(!export->write))
496 		return -1;
497 
498 	guard(mutex)(&ftrace_export_lock);
499 
500 	add_ftrace_export(&ftrace_exports_list, export);
501 
502 	return 0;
503 }
504 EXPORT_SYMBOL_GPL(register_ftrace_export);
505 
unregister_ftrace_export(struct trace_export * export)506 int unregister_ftrace_export(struct trace_export *export)
507 {
508 	guard(mutex)(&ftrace_export_lock);
509 	return rm_ftrace_export(&ftrace_exports_list, export);
510 }
511 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
512 
513 /* trace_flags holds trace_options default values */
514 #define TRACE_DEFAULT_FLAGS						\
515 	(FUNCTION_DEFAULT_FLAGS |					\
516 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
517 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
518 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
519 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
520 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK |		\
521 	 TRACE_ITER_COPY_MARKER)
522 
523 /* trace_options that are only supported by global_trace */
524 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
525 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
526 
527 /* trace_flags that are default zero for instances */
528 #define ZEROED_TRACE_FLAGS \
529 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
530 	 TRACE_ITER_COPY_MARKER)
531 
532 /*
533  * The global_trace is the descriptor that holds the top-level tracing
534  * buffers for the live tracing.
535  */
536 static struct trace_array global_trace = {
537 	.trace_flags = TRACE_DEFAULT_FLAGS,
538 };
539 
540 static struct trace_array *printk_trace = &global_trace;
541 
542 /* List of trace_arrays interested in the top level trace_marker */
543 static LIST_HEAD(marker_copies);
544 
printk_binsafe(struct trace_array * tr)545 static __always_inline bool printk_binsafe(struct trace_array *tr)
546 {
547 	/*
548 	 * The binary format of traceprintk can cause a crash if used
549 	 * by a buffer from another boot. Force the use of the
550 	 * non binary version of trace_printk if the trace_printk
551 	 * buffer is a boot mapped ring buffer.
552 	 */
553 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
554 }
555 
update_printk_trace(struct trace_array * tr)556 static void update_printk_trace(struct trace_array *tr)
557 {
558 	if (printk_trace == tr)
559 		return;
560 
561 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
562 	printk_trace = tr;
563 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
564 }
565 
566 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)567 static bool update_marker_trace(struct trace_array *tr, int enabled)
568 {
569 	lockdep_assert_held(&event_mutex);
570 
571 	if (enabled) {
572 		if (!list_empty(&tr->marker_list))
573 			return false;
574 
575 		list_add_rcu(&tr->marker_list, &marker_copies);
576 		tr->trace_flags |= TRACE_ITER_COPY_MARKER;
577 		return true;
578 	}
579 
580 	if (list_empty(&tr->marker_list))
581 		return false;
582 
583 	list_del_init(&tr->marker_list);
584 	tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
585 	return true;
586 }
587 
trace_set_ring_buffer_expanded(struct trace_array * tr)588 void trace_set_ring_buffer_expanded(struct trace_array *tr)
589 {
590 	if (!tr)
591 		tr = &global_trace;
592 	tr->ring_buffer_expanded = true;
593 }
594 
595 LIST_HEAD(ftrace_trace_arrays);
596 
trace_array_get(struct trace_array * this_tr)597 int trace_array_get(struct trace_array *this_tr)
598 {
599 	struct trace_array *tr;
600 
601 	guard(mutex)(&trace_types_lock);
602 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
603 		if (tr == this_tr) {
604 			tr->ref++;
605 			return 0;
606 		}
607 	}
608 
609 	return -ENODEV;
610 }
611 
__trace_array_put(struct trace_array * this_tr)612 static void __trace_array_put(struct trace_array *this_tr)
613 {
614 	WARN_ON(!this_tr->ref);
615 	this_tr->ref--;
616 }
617 
618 /**
619  * trace_array_put - Decrement the reference counter for this trace array.
620  * @this_tr : pointer to the trace array
621  *
622  * NOTE: Use this when we no longer need the trace array returned by
623  * trace_array_get_by_name(). This ensures the trace array can be later
624  * destroyed.
625  *
626  */
trace_array_put(struct trace_array * this_tr)627 void trace_array_put(struct trace_array *this_tr)
628 {
629 	if (!this_tr)
630 		return;
631 
632 	guard(mutex)(&trace_types_lock);
633 	__trace_array_put(this_tr);
634 }
635 EXPORT_SYMBOL_GPL(trace_array_put);
636 
tracing_check_open_get_tr(struct trace_array * tr)637 int tracing_check_open_get_tr(struct trace_array *tr)
638 {
639 	int ret;
640 
641 	ret = security_locked_down(LOCKDOWN_TRACEFS);
642 	if (ret)
643 		return ret;
644 
645 	if (tracing_disabled)
646 		return -ENODEV;
647 
648 	if (tr && trace_array_get(tr) < 0)
649 		return -ENODEV;
650 
651 	return 0;
652 }
653 
654 /**
655  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
656  * @filtered_pids: The list of pids to check
657  * @search_pid: The PID to find in @filtered_pids
658  *
659  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
660  */
661 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)662 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
663 {
664 	return trace_pid_list_is_set(filtered_pids, search_pid);
665 }
666 
667 /**
668  * trace_ignore_this_task - should a task be ignored for tracing
669  * @filtered_pids: The list of pids to check
670  * @filtered_no_pids: The list of pids not to be traced
671  * @task: The task that should be ignored if not filtered
672  *
673  * Checks if @task should be traced or not from @filtered_pids.
674  * Returns true if @task should *NOT* be traced.
675  * Returns false if @task should be traced.
676  */
677 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)678 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
679 		       struct trace_pid_list *filtered_no_pids,
680 		       struct task_struct *task)
681 {
682 	/*
683 	 * If filtered_no_pids is not empty, and the task's pid is listed
684 	 * in filtered_no_pids, then return true.
685 	 * Otherwise, if filtered_pids is empty, that means we can
686 	 * trace all tasks. If it has content, then only trace pids
687 	 * within filtered_pids.
688 	 */
689 
690 	return (filtered_pids &&
691 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
692 		(filtered_no_pids &&
693 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
694 }
695 
696 /**
697  * trace_filter_add_remove_task - Add or remove a task from a pid_list
698  * @pid_list: The list to modify
699  * @self: The current task for fork or NULL for exit
700  * @task: The task to add or remove
701  *
702  * If adding a task, if @self is defined, the task is only added if @self
703  * is also included in @pid_list. This happens on fork and tasks should
704  * only be added when the parent is listed. If @self is NULL, then the
705  * @task pid will be removed from the list, which would happen on exit
706  * of a task.
707  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)708 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
709 				  struct task_struct *self,
710 				  struct task_struct *task)
711 {
712 	if (!pid_list)
713 		return;
714 
715 	/* For forks, we only add if the forking task is listed */
716 	if (self) {
717 		if (!trace_find_filtered_pid(pid_list, self->pid))
718 			return;
719 	}
720 
721 	/* "self" is set for forks, and NULL for exits */
722 	if (self)
723 		trace_pid_list_set(pid_list, task->pid);
724 	else
725 		trace_pid_list_clear(pid_list, task->pid);
726 }
727 
728 /**
729  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
730  * @pid_list: The pid list to show
731  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
732  * @pos: The position of the file
733  *
734  * This is used by the seq_file "next" operation to iterate the pids
735  * listed in a trace_pid_list structure.
736  *
737  * Returns the pid+1 as we want to display pid of zero, but NULL would
738  * stop the iteration.
739  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)740 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
741 {
742 	long pid = (unsigned long)v;
743 	unsigned int next;
744 
745 	(*pos)++;
746 
747 	/* pid already is +1 of the actual previous bit */
748 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
749 		return NULL;
750 
751 	pid = next;
752 
753 	/* Return pid + 1 to allow zero to be represented */
754 	return (void *)(pid + 1);
755 }
756 
757 /**
758  * trace_pid_start - Used for seq_file to start reading pid lists
759  * @pid_list: The pid list to show
760  * @pos: The position of the file
761  *
762  * This is used by seq_file "start" operation to start the iteration
763  * of listing pids.
764  *
765  * Returns the pid+1 as we want to display pid of zero, but NULL would
766  * stop the iteration.
767  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)768 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
769 {
770 	unsigned long pid;
771 	unsigned int first;
772 	loff_t l = 0;
773 
774 	if (trace_pid_list_first(pid_list, &first) < 0)
775 		return NULL;
776 
777 	pid = first;
778 
779 	/* Return pid + 1 so that zero can be the exit value */
780 	for (pid++; pid && l < *pos;
781 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
782 		;
783 	return (void *)pid;
784 }
785 
786 /**
787  * trace_pid_show - show the current pid in seq_file processing
788  * @m: The seq_file structure to write into
789  * @v: A void pointer of the pid (+1) value to display
790  *
791  * Can be directly used by seq_file operations to display the current
792  * pid value.
793  */
trace_pid_show(struct seq_file * m,void * v)794 int trace_pid_show(struct seq_file *m, void *v)
795 {
796 	unsigned long pid = (unsigned long)v - 1;
797 
798 	seq_printf(m, "%lu\n", pid);
799 	return 0;
800 }
801 
802 /* 128 should be much more than enough */
803 #define PID_BUF_SIZE		127
804 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)805 int trace_pid_write(struct trace_pid_list *filtered_pids,
806 		    struct trace_pid_list **new_pid_list,
807 		    const char __user *ubuf, size_t cnt)
808 {
809 	struct trace_pid_list *pid_list;
810 	struct trace_parser parser;
811 	unsigned long val;
812 	int nr_pids = 0;
813 	ssize_t read = 0;
814 	ssize_t ret;
815 	loff_t pos;
816 	pid_t pid;
817 
818 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
819 		return -ENOMEM;
820 
821 	/*
822 	 * Always recreate a new array. The write is an all or nothing
823 	 * operation. Always create a new array when adding new pids by
824 	 * the user. If the operation fails, then the current list is
825 	 * not modified.
826 	 */
827 	pid_list = trace_pid_list_alloc();
828 	if (!pid_list) {
829 		trace_parser_put(&parser);
830 		return -ENOMEM;
831 	}
832 
833 	if (filtered_pids) {
834 		/* copy the current bits to the new max */
835 		ret = trace_pid_list_first(filtered_pids, &pid);
836 		while (!ret) {
837 			ret = trace_pid_list_set(pid_list, pid);
838 			if (ret < 0)
839 				goto out;
840 
841 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
842 			nr_pids++;
843 		}
844 	}
845 
846 	ret = 0;
847 	while (cnt > 0) {
848 
849 		pos = 0;
850 
851 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
852 		if (ret < 0)
853 			break;
854 
855 		read += ret;
856 		ubuf += ret;
857 		cnt -= ret;
858 
859 		if (!trace_parser_loaded(&parser))
860 			break;
861 
862 		ret = -EINVAL;
863 		if (kstrtoul(parser.buffer, 0, &val))
864 			break;
865 
866 		pid = (pid_t)val;
867 
868 		if (trace_pid_list_set(pid_list, pid) < 0) {
869 			ret = -1;
870 			break;
871 		}
872 		nr_pids++;
873 
874 		trace_parser_clear(&parser);
875 		ret = 0;
876 	}
877  out:
878 	trace_parser_put(&parser);
879 
880 	if (ret < 0) {
881 		trace_pid_list_free(pid_list);
882 		return ret;
883 	}
884 
885 	if (!nr_pids) {
886 		/* Cleared the list of pids */
887 		trace_pid_list_free(pid_list);
888 		pid_list = NULL;
889 	}
890 
891 	*new_pid_list = pid_list;
892 
893 	return read;
894 }
895 
buffer_ftrace_now(struct array_buffer * buf,int cpu)896 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
897 {
898 	u64 ts;
899 
900 	/* Early boot up does not have a buffer yet */
901 	if (!buf->buffer)
902 		return trace_clock_local();
903 
904 	ts = ring_buffer_time_stamp(buf->buffer);
905 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
906 
907 	return ts;
908 }
909 
ftrace_now(int cpu)910 u64 ftrace_now(int cpu)
911 {
912 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
913 }
914 
915 /**
916  * tracing_is_enabled - Show if global_trace has been enabled
917  *
918  * Shows if the global trace has been enabled or not. It uses the
919  * mirror flag "buffer_disabled" to be used in fast paths such as for
920  * the irqsoff tracer. But it may be inaccurate due to races. If you
921  * need to know the accurate state, use tracing_is_on() which is a little
922  * slower, but accurate.
923  */
tracing_is_enabled(void)924 int tracing_is_enabled(void)
925 {
926 	/*
927 	 * For quick access (irqsoff uses this in fast path), just
928 	 * return the mirror variable of the state of the ring buffer.
929 	 * It's a little racy, but we don't really care.
930 	 */
931 	return !global_trace.buffer_disabled;
932 }
933 
934 /*
935  * trace_buf_size is the size in bytes that is allocated
936  * for a buffer. Note, the number of bytes is always rounded
937  * to page size.
938  *
939  * This number is purposely set to a low number of 16384.
940  * If the dump on oops happens, it will be much appreciated
941  * to not have to wait for all that output. Anyway this can be
942  * boot time and run time configurable.
943  */
944 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
945 
946 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
947 
948 /* trace_types holds a link list of available tracers. */
949 static struct tracer		*trace_types __read_mostly;
950 
951 /*
952  * trace_types_lock is used to protect the trace_types list.
953  */
954 DEFINE_MUTEX(trace_types_lock);
955 
956 /*
957  * serialize the access of the ring buffer
958  *
959  * ring buffer serializes readers, but it is low level protection.
960  * The validity of the events (which returns by ring_buffer_peek() ..etc)
961  * are not protected by ring buffer.
962  *
963  * The content of events may become garbage if we allow other process consumes
964  * these events concurrently:
965  *   A) the page of the consumed events may become a normal page
966  *      (not reader page) in ring buffer, and this page will be rewritten
967  *      by events producer.
968  *   B) The page of the consumed events may become a page for splice_read,
969  *      and this page will be returned to system.
970  *
971  * These primitives allow multi process access to different cpu ring buffer
972  * concurrently.
973  *
974  * These primitives don't distinguish read-only and read-consume access.
975  * Multi read-only access are also serialized.
976  */
977 
978 #ifdef CONFIG_SMP
979 static DECLARE_RWSEM(all_cpu_access_lock);
980 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
981 
trace_access_lock(int cpu)982 static inline void trace_access_lock(int cpu)
983 {
984 	if (cpu == RING_BUFFER_ALL_CPUS) {
985 		/* gain it for accessing the whole ring buffer. */
986 		down_write(&all_cpu_access_lock);
987 	} else {
988 		/* gain it for accessing a cpu ring buffer. */
989 
990 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
991 		down_read(&all_cpu_access_lock);
992 
993 		/* Secondly block other access to this @cpu ring buffer. */
994 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
995 	}
996 }
997 
trace_access_unlock(int cpu)998 static inline void trace_access_unlock(int cpu)
999 {
1000 	if (cpu == RING_BUFFER_ALL_CPUS) {
1001 		up_write(&all_cpu_access_lock);
1002 	} else {
1003 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1004 		up_read(&all_cpu_access_lock);
1005 	}
1006 }
1007 
trace_access_lock_init(void)1008 static inline void trace_access_lock_init(void)
1009 {
1010 	int cpu;
1011 
1012 	for_each_possible_cpu(cpu)
1013 		mutex_init(&per_cpu(cpu_access_lock, cpu));
1014 }
1015 
1016 #else
1017 
1018 static DEFINE_MUTEX(access_lock);
1019 
trace_access_lock(int cpu)1020 static inline void trace_access_lock(int cpu)
1021 {
1022 	(void)cpu;
1023 	mutex_lock(&access_lock);
1024 }
1025 
trace_access_unlock(int cpu)1026 static inline void trace_access_unlock(int cpu)
1027 {
1028 	(void)cpu;
1029 	mutex_unlock(&access_lock);
1030 }
1031 
trace_access_lock_init(void)1032 static inline void trace_access_lock_init(void)
1033 {
1034 }
1035 
1036 #endif
1037 
1038 #ifdef CONFIG_STACKTRACE
1039 static void __ftrace_trace_stack(struct trace_array *tr,
1040 				 struct trace_buffer *buffer,
1041 				 unsigned int trace_ctx,
1042 				 int skip, struct pt_regs *regs);
1043 static inline void ftrace_trace_stack(struct trace_array *tr,
1044 				      struct trace_buffer *buffer,
1045 				      unsigned int trace_ctx,
1046 				      int skip, struct pt_regs *regs);
1047 
1048 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1049 static inline void __ftrace_trace_stack(struct trace_array *tr,
1050 					struct trace_buffer *buffer,
1051 					unsigned int trace_ctx,
1052 					int skip, struct pt_regs *regs)
1053 {
1054 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1055 static inline void ftrace_trace_stack(struct trace_array *tr,
1056 				      struct trace_buffer *buffer,
1057 				      unsigned long trace_ctx,
1058 				      int skip, struct pt_regs *regs)
1059 {
1060 }
1061 
1062 #endif
1063 
1064 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1065 trace_event_setup(struct ring_buffer_event *event,
1066 		  int type, unsigned int trace_ctx)
1067 {
1068 	struct trace_entry *ent = ring_buffer_event_data(event);
1069 
1070 	tracing_generic_entry_update(ent, type, trace_ctx);
1071 }
1072 
1073 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1074 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1075 			  int type,
1076 			  unsigned long len,
1077 			  unsigned int trace_ctx)
1078 {
1079 	struct ring_buffer_event *event;
1080 
1081 	event = ring_buffer_lock_reserve(buffer, len);
1082 	if (event != NULL)
1083 		trace_event_setup(event, type, trace_ctx);
1084 
1085 	return event;
1086 }
1087 
tracer_tracing_on(struct trace_array * tr)1088 void tracer_tracing_on(struct trace_array *tr)
1089 {
1090 	if (tr->array_buffer.buffer)
1091 		ring_buffer_record_on(tr->array_buffer.buffer);
1092 	/*
1093 	 * This flag is looked at when buffers haven't been allocated
1094 	 * yet, or by some tracers (like irqsoff), that just want to
1095 	 * know if the ring buffer has been disabled, but it can handle
1096 	 * races of where it gets disabled but we still do a record.
1097 	 * As the check is in the fast path of the tracers, it is more
1098 	 * important to be fast than accurate.
1099 	 */
1100 	tr->buffer_disabled = 0;
1101 }
1102 
1103 /**
1104  * tracing_on - enable tracing buffers
1105  *
1106  * This function enables tracing buffers that may have been
1107  * disabled with tracing_off.
1108  */
tracing_on(void)1109 void tracing_on(void)
1110 {
1111 	tracer_tracing_on(&global_trace);
1112 }
1113 EXPORT_SYMBOL_GPL(tracing_on);
1114 
1115 
1116 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1117 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1118 {
1119 	__this_cpu_write(trace_taskinfo_save, true);
1120 
1121 	/* If this is the temp buffer, we need to commit fully */
1122 	if (this_cpu_read(trace_buffered_event) == event) {
1123 		/* Length is in event->array[0] */
1124 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1125 		/* Release the temp buffer */
1126 		this_cpu_dec(trace_buffered_event_cnt);
1127 		/* ring_buffer_unlock_commit() enables preemption */
1128 		preempt_enable_notrace();
1129 	} else
1130 		ring_buffer_unlock_commit(buffer);
1131 }
1132 
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1133 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1134 		       const char *str, int size)
1135 {
1136 	struct ring_buffer_event *event;
1137 	struct trace_buffer *buffer;
1138 	struct print_entry *entry;
1139 	unsigned int trace_ctx;
1140 	int alloc;
1141 
1142 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1143 		return 0;
1144 
1145 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1146 		return 0;
1147 
1148 	if (unlikely(tracing_disabled))
1149 		return 0;
1150 
1151 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1152 
1153 	trace_ctx = tracing_gen_ctx();
1154 	buffer = tr->array_buffer.buffer;
1155 	guard(ring_buffer_nest)(buffer);
1156 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1157 					    trace_ctx);
1158 	if (!event)
1159 		return 0;
1160 
1161 	entry = ring_buffer_event_data(event);
1162 	entry->ip = ip;
1163 
1164 	memcpy(&entry->buf, str, size);
1165 
1166 	/* Add a newline if necessary */
1167 	if (entry->buf[size - 1] != '\n') {
1168 		entry->buf[size] = '\n';
1169 		entry->buf[size + 1] = '\0';
1170 	} else
1171 		entry->buf[size] = '\0';
1172 
1173 	__buffer_unlock_commit(buffer, event);
1174 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1175 	return size;
1176 }
1177 EXPORT_SYMBOL_GPL(__trace_array_puts);
1178 
1179 /**
1180  * __trace_puts - write a constant string into the trace buffer.
1181  * @ip:	   The address of the caller
1182  * @str:   The constant string to write
1183  * @size:  The size of the string.
1184  */
__trace_puts(unsigned long ip,const char * str,int size)1185 int __trace_puts(unsigned long ip, const char *str, int size)
1186 {
1187 	return __trace_array_puts(printk_trace, ip, str, size);
1188 }
1189 EXPORT_SYMBOL_GPL(__trace_puts);
1190 
1191 /**
1192  * __trace_bputs - write the pointer to a constant string into trace buffer
1193  * @ip:	   The address of the caller
1194  * @str:   The constant string to write to the buffer to
1195  */
__trace_bputs(unsigned long ip,const char * str)1196 int __trace_bputs(unsigned long ip, const char *str)
1197 {
1198 	struct trace_array *tr = READ_ONCE(printk_trace);
1199 	struct ring_buffer_event *event;
1200 	struct trace_buffer *buffer;
1201 	struct bputs_entry *entry;
1202 	unsigned int trace_ctx;
1203 	int size = sizeof(struct bputs_entry);
1204 
1205 	if (!printk_binsafe(tr))
1206 		return __trace_puts(ip, str, strlen(str));
1207 
1208 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1209 		return 0;
1210 
1211 	if (unlikely(tracing_selftest_running || tracing_disabled))
1212 		return 0;
1213 
1214 	trace_ctx = tracing_gen_ctx();
1215 	buffer = tr->array_buffer.buffer;
1216 
1217 	guard(ring_buffer_nest)(buffer);
1218 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1219 					    trace_ctx);
1220 	if (!event)
1221 		return 0;
1222 
1223 	entry = ring_buffer_event_data(event);
1224 	entry->ip			= ip;
1225 	entry->str			= str;
1226 
1227 	__buffer_unlock_commit(buffer, event);
1228 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1229 
1230 	return 1;
1231 }
1232 EXPORT_SYMBOL_GPL(__trace_bputs);
1233 
1234 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1235 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1236 					   void *cond_data)
1237 {
1238 	struct tracer *tracer = tr->current_trace;
1239 	unsigned long flags;
1240 
1241 	if (in_nmi()) {
1242 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1243 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1244 		return;
1245 	}
1246 
1247 	if (!tr->allocated_snapshot) {
1248 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1249 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1250 		tracer_tracing_off(tr);
1251 		return;
1252 	}
1253 
1254 	/* Note, snapshot can not be used when the tracer uses it */
1255 	if (tracer->use_max_tr) {
1256 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1257 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1258 		return;
1259 	}
1260 
1261 	if (tr->mapped) {
1262 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1263 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1264 		return;
1265 	}
1266 
1267 	local_irq_save(flags);
1268 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1269 	local_irq_restore(flags);
1270 }
1271 
tracing_snapshot_instance(struct trace_array * tr)1272 void tracing_snapshot_instance(struct trace_array *tr)
1273 {
1274 	tracing_snapshot_instance_cond(tr, NULL);
1275 }
1276 
1277 /**
1278  * tracing_snapshot - take a snapshot of the current buffer.
1279  *
1280  * This causes a swap between the snapshot buffer and the current live
1281  * tracing buffer. You can use this to take snapshots of the live
1282  * trace when some condition is triggered, but continue to trace.
1283  *
1284  * Note, make sure to allocate the snapshot with either
1285  * a tracing_snapshot_alloc(), or by doing it manually
1286  * with: echo 1 > /sys/kernel/tracing/snapshot
1287  *
1288  * If the snapshot buffer is not allocated, it will stop tracing.
1289  * Basically making a permanent snapshot.
1290  */
tracing_snapshot(void)1291 void tracing_snapshot(void)
1292 {
1293 	struct trace_array *tr = &global_trace;
1294 
1295 	tracing_snapshot_instance(tr);
1296 }
1297 EXPORT_SYMBOL_GPL(tracing_snapshot);
1298 
1299 /**
1300  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1301  * @tr:		The tracing instance to snapshot
1302  * @cond_data:	The data to be tested conditionally, and possibly saved
1303  *
1304  * This is the same as tracing_snapshot() except that the snapshot is
1305  * conditional - the snapshot will only happen if the
1306  * cond_snapshot.update() implementation receiving the cond_data
1307  * returns true, which means that the trace array's cond_snapshot
1308  * update() operation used the cond_data to determine whether the
1309  * snapshot should be taken, and if it was, presumably saved it along
1310  * with the snapshot.
1311  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1312 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1313 {
1314 	tracing_snapshot_instance_cond(tr, cond_data);
1315 }
1316 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1317 
1318 /**
1319  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1320  * @tr:		The tracing instance
1321  *
1322  * When the user enables a conditional snapshot using
1323  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1324  * with the snapshot.  This accessor is used to retrieve it.
1325  *
1326  * Should not be called from cond_snapshot.update(), since it takes
1327  * the tr->max_lock lock, which the code calling
1328  * cond_snapshot.update() has already done.
1329  *
1330  * Returns the cond_data associated with the trace array's snapshot.
1331  */
tracing_cond_snapshot_data(struct trace_array * tr)1332 void *tracing_cond_snapshot_data(struct trace_array *tr)
1333 {
1334 	void *cond_data = NULL;
1335 
1336 	local_irq_disable();
1337 	arch_spin_lock(&tr->max_lock);
1338 
1339 	if (tr->cond_snapshot)
1340 		cond_data = tr->cond_snapshot->cond_data;
1341 
1342 	arch_spin_unlock(&tr->max_lock);
1343 	local_irq_enable();
1344 
1345 	return cond_data;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1348 
1349 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1350 					struct array_buffer *size_buf, int cpu_id);
1351 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1352 
tracing_alloc_snapshot_instance(struct trace_array * tr)1353 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1354 {
1355 	int order;
1356 	int ret;
1357 
1358 	if (!tr->allocated_snapshot) {
1359 
1360 		/* Make the snapshot buffer have the same order as main buffer */
1361 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1362 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1363 		if (ret < 0)
1364 			return ret;
1365 
1366 		/* allocate spare buffer */
1367 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1368 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1369 		if (ret < 0)
1370 			return ret;
1371 
1372 		tr->allocated_snapshot = true;
1373 	}
1374 
1375 	return 0;
1376 }
1377 
free_snapshot(struct trace_array * tr)1378 static void free_snapshot(struct trace_array *tr)
1379 {
1380 	/*
1381 	 * We don't free the ring buffer. instead, resize it because
1382 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1383 	 * we want preserve it.
1384 	 */
1385 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1386 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1387 	set_buffer_entries(&tr->max_buffer, 1);
1388 	tracing_reset_online_cpus(&tr->max_buffer);
1389 	tr->allocated_snapshot = false;
1390 }
1391 
tracing_arm_snapshot_locked(struct trace_array * tr)1392 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1393 {
1394 	int ret;
1395 
1396 	lockdep_assert_held(&trace_types_lock);
1397 
1398 	spin_lock(&tr->snapshot_trigger_lock);
1399 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1400 		spin_unlock(&tr->snapshot_trigger_lock);
1401 		return -EBUSY;
1402 	}
1403 
1404 	tr->snapshot++;
1405 	spin_unlock(&tr->snapshot_trigger_lock);
1406 
1407 	ret = tracing_alloc_snapshot_instance(tr);
1408 	if (ret) {
1409 		spin_lock(&tr->snapshot_trigger_lock);
1410 		tr->snapshot--;
1411 		spin_unlock(&tr->snapshot_trigger_lock);
1412 	}
1413 
1414 	return ret;
1415 }
1416 
tracing_arm_snapshot(struct trace_array * tr)1417 int tracing_arm_snapshot(struct trace_array *tr)
1418 {
1419 	guard(mutex)(&trace_types_lock);
1420 	return tracing_arm_snapshot_locked(tr);
1421 }
1422 
tracing_disarm_snapshot(struct trace_array * tr)1423 void tracing_disarm_snapshot(struct trace_array *tr)
1424 {
1425 	spin_lock(&tr->snapshot_trigger_lock);
1426 	if (!WARN_ON(!tr->snapshot))
1427 		tr->snapshot--;
1428 	spin_unlock(&tr->snapshot_trigger_lock);
1429 }
1430 
1431 /**
1432  * tracing_alloc_snapshot - allocate snapshot buffer.
1433  *
1434  * This only allocates the snapshot buffer if it isn't already
1435  * allocated - it doesn't also take a snapshot.
1436  *
1437  * This is meant to be used in cases where the snapshot buffer needs
1438  * to be set up for events that can't sleep but need to be able to
1439  * trigger a snapshot.
1440  */
tracing_alloc_snapshot(void)1441 int tracing_alloc_snapshot(void)
1442 {
1443 	struct trace_array *tr = &global_trace;
1444 	int ret;
1445 
1446 	ret = tracing_alloc_snapshot_instance(tr);
1447 	WARN_ON(ret < 0);
1448 
1449 	return ret;
1450 }
1451 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1452 
1453 /**
1454  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1455  *
1456  * This is similar to tracing_snapshot(), but it will allocate the
1457  * snapshot buffer if it isn't already allocated. Use this only
1458  * where it is safe to sleep, as the allocation may sleep.
1459  *
1460  * This causes a swap between the snapshot buffer and the current live
1461  * tracing buffer. You can use this to take snapshots of the live
1462  * trace when some condition is triggered, but continue to trace.
1463  */
tracing_snapshot_alloc(void)1464 void tracing_snapshot_alloc(void)
1465 {
1466 	int ret;
1467 
1468 	ret = tracing_alloc_snapshot();
1469 	if (ret < 0)
1470 		return;
1471 
1472 	tracing_snapshot();
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1475 
1476 /**
1477  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1478  * @tr:		The tracing instance
1479  * @cond_data:	User data to associate with the snapshot
1480  * @update:	Implementation of the cond_snapshot update function
1481  *
1482  * Check whether the conditional snapshot for the given instance has
1483  * already been enabled, or if the current tracer is already using a
1484  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1485  * save the cond_data and update function inside.
1486  *
1487  * Returns 0 if successful, error otherwise.
1488  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1489 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1490 				 cond_update_fn_t update)
1491 {
1492 	struct cond_snapshot *cond_snapshot __free(kfree) =
1493 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1494 	int ret;
1495 
1496 	if (!cond_snapshot)
1497 		return -ENOMEM;
1498 
1499 	cond_snapshot->cond_data = cond_data;
1500 	cond_snapshot->update = update;
1501 
1502 	guard(mutex)(&trace_types_lock);
1503 
1504 	if (tr->current_trace->use_max_tr)
1505 		return -EBUSY;
1506 
1507 	/*
1508 	 * The cond_snapshot can only change to NULL without the
1509 	 * trace_types_lock. We don't care if we race with it going
1510 	 * to NULL, but we want to make sure that it's not set to
1511 	 * something other than NULL when we get here, which we can
1512 	 * do safely with only holding the trace_types_lock and not
1513 	 * having to take the max_lock.
1514 	 */
1515 	if (tr->cond_snapshot)
1516 		return -EBUSY;
1517 
1518 	ret = tracing_arm_snapshot_locked(tr);
1519 	if (ret)
1520 		return ret;
1521 
1522 	local_irq_disable();
1523 	arch_spin_lock(&tr->max_lock);
1524 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1525 	arch_spin_unlock(&tr->max_lock);
1526 	local_irq_enable();
1527 
1528 	return 0;
1529 }
1530 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1531 
1532 /**
1533  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1534  * @tr:		The tracing instance
1535  *
1536  * Check whether the conditional snapshot for the given instance is
1537  * enabled; if so, free the cond_snapshot associated with it,
1538  * otherwise return -EINVAL.
1539  *
1540  * Returns 0 if successful, error otherwise.
1541  */
tracing_snapshot_cond_disable(struct trace_array * tr)1542 int tracing_snapshot_cond_disable(struct trace_array *tr)
1543 {
1544 	int ret = 0;
1545 
1546 	local_irq_disable();
1547 	arch_spin_lock(&tr->max_lock);
1548 
1549 	if (!tr->cond_snapshot)
1550 		ret = -EINVAL;
1551 	else {
1552 		kfree(tr->cond_snapshot);
1553 		tr->cond_snapshot = NULL;
1554 	}
1555 
1556 	arch_spin_unlock(&tr->max_lock);
1557 	local_irq_enable();
1558 
1559 	tracing_disarm_snapshot(tr);
1560 
1561 	return ret;
1562 }
1563 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1564 #else
tracing_snapshot(void)1565 void tracing_snapshot(void)
1566 {
1567 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1568 }
1569 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1570 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1571 {
1572 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1573 }
1574 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1575 int tracing_alloc_snapshot(void)
1576 {
1577 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1578 	return -ENODEV;
1579 }
1580 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1581 void tracing_snapshot_alloc(void)
1582 {
1583 	/* Give warning */
1584 	tracing_snapshot();
1585 }
1586 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1587 void *tracing_cond_snapshot_data(struct trace_array *tr)
1588 {
1589 	return NULL;
1590 }
1591 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1592 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1593 {
1594 	return -ENODEV;
1595 }
1596 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1597 int tracing_snapshot_cond_disable(struct trace_array *tr)
1598 {
1599 	return false;
1600 }
1601 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1602 #define free_snapshot(tr)	do { } while (0)
1603 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1604 #endif /* CONFIG_TRACER_SNAPSHOT */
1605 
tracer_tracing_off(struct trace_array * tr)1606 void tracer_tracing_off(struct trace_array *tr)
1607 {
1608 	if (tr->array_buffer.buffer)
1609 		ring_buffer_record_off(tr->array_buffer.buffer);
1610 	/*
1611 	 * This flag is looked at when buffers haven't been allocated
1612 	 * yet, or by some tracers (like irqsoff), that just want to
1613 	 * know if the ring buffer has been disabled, but it can handle
1614 	 * races of where it gets disabled but we still do a record.
1615 	 * As the check is in the fast path of the tracers, it is more
1616 	 * important to be fast than accurate.
1617 	 */
1618 	tr->buffer_disabled = 1;
1619 }
1620 
1621 /**
1622  * tracer_tracing_disable() - temporary disable the buffer from write
1623  * @tr: The trace array to disable its buffer for
1624  *
1625  * Expects trace_tracing_enable() to re-enable tracing.
1626  * The difference between this and tracer_tracing_off() is that this
1627  * is a counter and can nest, whereas, tracer_tracing_off() can
1628  * be called multiple times and a single trace_tracing_on() will
1629  * enable it.
1630  */
tracer_tracing_disable(struct trace_array * tr)1631 void tracer_tracing_disable(struct trace_array *tr)
1632 {
1633 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1634 		return;
1635 
1636 	ring_buffer_record_disable(tr->array_buffer.buffer);
1637 }
1638 
1639 /**
1640  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1641  * @tr: The trace array that had tracer_tracincg_disable() called on it
1642  *
1643  * This is called after tracer_tracing_disable() has been called on @tr,
1644  * when it's safe to re-enable tracing.
1645  */
tracer_tracing_enable(struct trace_array * tr)1646 void tracer_tracing_enable(struct trace_array *tr)
1647 {
1648 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1649 		return;
1650 
1651 	ring_buffer_record_enable(tr->array_buffer.buffer);
1652 }
1653 
1654 /**
1655  * tracing_off - turn off tracing buffers
1656  *
1657  * This function stops the tracing buffers from recording data.
1658  * It does not disable any overhead the tracers themselves may
1659  * be causing. This function simply causes all recording to
1660  * the ring buffers to fail.
1661  */
tracing_off(void)1662 void tracing_off(void)
1663 {
1664 	tracer_tracing_off(&global_trace);
1665 }
1666 EXPORT_SYMBOL_GPL(tracing_off);
1667 
disable_trace_on_warning(void)1668 void disable_trace_on_warning(void)
1669 {
1670 	if (__disable_trace_on_warning) {
1671 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1672 			"Disabling tracing due to warning\n");
1673 		tracing_off();
1674 	}
1675 }
1676 
1677 /**
1678  * tracer_tracing_is_on - show real state of ring buffer enabled
1679  * @tr : the trace array to know if ring buffer is enabled
1680  *
1681  * Shows real state of the ring buffer if it is enabled or not.
1682  */
tracer_tracing_is_on(struct trace_array * tr)1683 bool tracer_tracing_is_on(struct trace_array *tr)
1684 {
1685 	if (tr->array_buffer.buffer)
1686 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1687 	return !tr->buffer_disabled;
1688 }
1689 
1690 /**
1691  * tracing_is_on - show state of ring buffers enabled
1692  */
tracing_is_on(void)1693 int tracing_is_on(void)
1694 {
1695 	return tracer_tracing_is_on(&global_trace);
1696 }
1697 EXPORT_SYMBOL_GPL(tracing_is_on);
1698 
set_buf_size(char * str)1699 static int __init set_buf_size(char *str)
1700 {
1701 	unsigned long buf_size;
1702 
1703 	if (!str)
1704 		return 0;
1705 	buf_size = memparse(str, &str);
1706 	/*
1707 	 * nr_entries can not be zero and the startup
1708 	 * tests require some buffer space. Therefore
1709 	 * ensure we have at least 4096 bytes of buffer.
1710 	 */
1711 	trace_buf_size = max(4096UL, buf_size);
1712 	return 1;
1713 }
1714 __setup("trace_buf_size=", set_buf_size);
1715 
set_tracing_thresh(char * str)1716 static int __init set_tracing_thresh(char *str)
1717 {
1718 	unsigned long threshold;
1719 	int ret;
1720 
1721 	if (!str)
1722 		return 0;
1723 	ret = kstrtoul(str, 0, &threshold);
1724 	if (ret < 0)
1725 		return 0;
1726 	tracing_thresh = threshold * 1000;
1727 	return 1;
1728 }
1729 __setup("tracing_thresh=", set_tracing_thresh);
1730 
nsecs_to_usecs(unsigned long nsecs)1731 unsigned long nsecs_to_usecs(unsigned long nsecs)
1732 {
1733 	return nsecs / 1000;
1734 }
1735 
1736 /*
1737  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1738  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1739  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1740  * of strings in the order that the evals (enum) were defined.
1741  */
1742 #undef C
1743 #define C(a, b) b
1744 
1745 /* These must match the bit positions in trace_iterator_flags */
1746 static const char *trace_options[] = {
1747 	TRACE_FLAGS
1748 	NULL
1749 };
1750 
1751 static struct {
1752 	u64 (*func)(void);
1753 	const char *name;
1754 	int in_ns;		/* is this clock in nanoseconds? */
1755 } trace_clocks[] = {
1756 	{ trace_clock_local,		"local",	1 },
1757 	{ trace_clock_global,		"global",	1 },
1758 	{ trace_clock_counter,		"counter",	0 },
1759 	{ trace_clock_jiffies,		"uptime",	0 },
1760 	{ trace_clock,			"perf",		1 },
1761 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1762 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1763 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1764 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1765 	ARCH_TRACE_CLOCKS
1766 };
1767 
trace_clock_in_ns(struct trace_array * tr)1768 bool trace_clock_in_ns(struct trace_array *tr)
1769 {
1770 	if (trace_clocks[tr->clock_id].in_ns)
1771 		return true;
1772 
1773 	return false;
1774 }
1775 
1776 /*
1777  * trace_parser_get_init - gets the buffer for trace parser
1778  */
trace_parser_get_init(struct trace_parser * parser,int size)1779 int trace_parser_get_init(struct trace_parser *parser, int size)
1780 {
1781 	memset(parser, 0, sizeof(*parser));
1782 
1783 	parser->buffer = kmalloc(size, GFP_KERNEL);
1784 	if (!parser->buffer)
1785 		return 1;
1786 
1787 	parser->size = size;
1788 	return 0;
1789 }
1790 
1791 /*
1792  * trace_parser_put - frees the buffer for trace parser
1793  */
trace_parser_put(struct trace_parser * parser)1794 void trace_parser_put(struct trace_parser *parser)
1795 {
1796 	kfree(parser->buffer);
1797 	parser->buffer = NULL;
1798 }
1799 
1800 /*
1801  * trace_get_user - reads the user input string separated by  space
1802  * (matched by isspace(ch))
1803  *
1804  * For each string found the 'struct trace_parser' is updated,
1805  * and the function returns.
1806  *
1807  * Returns number of bytes read.
1808  *
1809  * See kernel/trace/trace.h for 'struct trace_parser' details.
1810  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1811 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1812 	size_t cnt, loff_t *ppos)
1813 {
1814 	char ch;
1815 	size_t read = 0;
1816 	ssize_t ret;
1817 
1818 	if (!*ppos)
1819 		trace_parser_clear(parser);
1820 
1821 	ret = get_user(ch, ubuf++);
1822 	if (ret)
1823 		goto fail;
1824 
1825 	read++;
1826 	cnt--;
1827 
1828 	/*
1829 	 * The parser is not finished with the last write,
1830 	 * continue reading the user input without skipping spaces.
1831 	 */
1832 	if (!parser->cont) {
1833 		/* skip white space */
1834 		while (cnt && isspace(ch)) {
1835 			ret = get_user(ch, ubuf++);
1836 			if (ret)
1837 				goto fail;
1838 			read++;
1839 			cnt--;
1840 		}
1841 
1842 		parser->idx = 0;
1843 
1844 		/* only spaces were written */
1845 		if (isspace(ch) || !ch) {
1846 			*ppos += read;
1847 			return read;
1848 		}
1849 	}
1850 
1851 	/* read the non-space input */
1852 	while (cnt && !isspace(ch) && ch) {
1853 		if (parser->idx < parser->size - 1)
1854 			parser->buffer[parser->idx++] = ch;
1855 		else {
1856 			ret = -EINVAL;
1857 			goto fail;
1858 		}
1859 
1860 		ret = get_user(ch, ubuf++);
1861 		if (ret)
1862 			goto fail;
1863 		read++;
1864 		cnt--;
1865 	}
1866 
1867 	/* We either got finished input or we have to wait for another call. */
1868 	if (isspace(ch) || !ch) {
1869 		parser->buffer[parser->idx] = 0;
1870 		parser->cont = false;
1871 	} else if (parser->idx < parser->size - 1) {
1872 		parser->cont = true;
1873 		parser->buffer[parser->idx++] = ch;
1874 		/* Make sure the parsed string always terminates with '\0'. */
1875 		parser->buffer[parser->idx] = 0;
1876 	} else {
1877 		ret = -EINVAL;
1878 		goto fail;
1879 	}
1880 
1881 	*ppos += read;
1882 	return read;
1883 fail:
1884 	trace_parser_fail(parser);
1885 	return ret;
1886 }
1887 
1888 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1889 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1890 {
1891 	int len;
1892 
1893 	if (trace_seq_used(s) <= s->readpos)
1894 		return -EBUSY;
1895 
1896 	len = trace_seq_used(s) - s->readpos;
1897 	if (cnt > len)
1898 		cnt = len;
1899 	memcpy(buf, s->buffer + s->readpos, cnt);
1900 
1901 	s->readpos += cnt;
1902 	return cnt;
1903 }
1904 
1905 unsigned long __read_mostly	tracing_thresh;
1906 
1907 #ifdef CONFIG_TRACER_MAX_TRACE
1908 static const struct file_operations tracing_max_lat_fops;
1909 
1910 #ifdef LATENCY_FS_NOTIFY
1911 
1912 static struct workqueue_struct *fsnotify_wq;
1913 
latency_fsnotify_workfn(struct work_struct * work)1914 static void latency_fsnotify_workfn(struct work_struct *work)
1915 {
1916 	struct trace_array *tr = container_of(work, struct trace_array,
1917 					      fsnotify_work);
1918 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1919 }
1920 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1921 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1922 {
1923 	struct trace_array *tr = container_of(iwork, struct trace_array,
1924 					      fsnotify_irqwork);
1925 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1926 }
1927 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1928 static void trace_create_maxlat_file(struct trace_array *tr,
1929 				     struct dentry *d_tracer)
1930 {
1931 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1932 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1933 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1934 					      TRACE_MODE_WRITE,
1935 					      d_tracer, tr,
1936 					      &tracing_max_lat_fops);
1937 }
1938 
latency_fsnotify_init(void)1939 __init static int latency_fsnotify_init(void)
1940 {
1941 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1942 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1943 	if (!fsnotify_wq) {
1944 		pr_err("Unable to allocate tr_max_lat_wq\n");
1945 		return -ENOMEM;
1946 	}
1947 	return 0;
1948 }
1949 
1950 late_initcall_sync(latency_fsnotify_init);
1951 
latency_fsnotify(struct trace_array * tr)1952 void latency_fsnotify(struct trace_array *tr)
1953 {
1954 	if (!fsnotify_wq)
1955 		return;
1956 	/*
1957 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1958 	 * possible that we are called from __schedule() or do_idle(), which
1959 	 * could cause a deadlock.
1960 	 */
1961 	irq_work_queue(&tr->fsnotify_irqwork);
1962 }
1963 
1964 #else /* !LATENCY_FS_NOTIFY */
1965 
1966 #define trace_create_maxlat_file(tr, d_tracer)				\
1967 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1968 			  d_tracer, tr, &tracing_max_lat_fops)
1969 
1970 #endif
1971 
1972 /*
1973  * Copy the new maximum trace into the separate maximum-trace
1974  * structure. (this way the maximum trace is permanently saved,
1975  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1976  */
1977 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1978 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1979 {
1980 	struct array_buffer *trace_buf = &tr->array_buffer;
1981 	struct array_buffer *max_buf = &tr->max_buffer;
1982 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1983 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1984 
1985 	max_buf->cpu = cpu;
1986 	max_buf->time_start = data->preempt_timestamp;
1987 
1988 	max_data->saved_latency = tr->max_latency;
1989 	max_data->critical_start = data->critical_start;
1990 	max_data->critical_end = data->critical_end;
1991 
1992 	strscpy(max_data->comm, tsk->comm);
1993 	max_data->pid = tsk->pid;
1994 	/*
1995 	 * If tsk == current, then use current_uid(), as that does not use
1996 	 * RCU. The irq tracer can be called out of RCU scope.
1997 	 */
1998 	if (tsk == current)
1999 		max_data->uid = current_uid();
2000 	else
2001 		max_data->uid = task_uid(tsk);
2002 
2003 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2004 	max_data->policy = tsk->policy;
2005 	max_data->rt_priority = tsk->rt_priority;
2006 
2007 	/* record this tasks comm */
2008 	tracing_record_cmdline(tsk);
2009 	latency_fsnotify(tr);
2010 }
2011 
2012 /**
2013  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2014  * @tr: tracer
2015  * @tsk: the task with the latency
2016  * @cpu: The cpu that initiated the trace.
2017  * @cond_data: User data associated with a conditional snapshot
2018  *
2019  * Flip the buffers between the @tr and the max_tr and record information
2020  * about which task was the cause of this latency.
2021  */
2022 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)2023 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2024 	      void *cond_data)
2025 {
2026 	if (tr->stop_count)
2027 		return;
2028 
2029 	WARN_ON_ONCE(!irqs_disabled());
2030 
2031 	if (!tr->allocated_snapshot) {
2032 		/* Only the nop tracer should hit this when disabling */
2033 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2034 		return;
2035 	}
2036 
2037 	arch_spin_lock(&tr->max_lock);
2038 
2039 	/* Inherit the recordable setting from array_buffer */
2040 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2041 		ring_buffer_record_on(tr->max_buffer.buffer);
2042 	else
2043 		ring_buffer_record_off(tr->max_buffer.buffer);
2044 
2045 #ifdef CONFIG_TRACER_SNAPSHOT
2046 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2047 		arch_spin_unlock(&tr->max_lock);
2048 		return;
2049 	}
2050 #endif
2051 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2052 
2053 	__update_max_tr(tr, tsk, cpu);
2054 
2055 	arch_spin_unlock(&tr->max_lock);
2056 
2057 	/* Any waiters on the old snapshot buffer need to wake up */
2058 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2059 }
2060 
2061 /**
2062  * update_max_tr_single - only copy one trace over, and reset the rest
2063  * @tr: tracer
2064  * @tsk: task with the latency
2065  * @cpu: the cpu of the buffer to copy.
2066  *
2067  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2068  */
2069 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2070 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2071 {
2072 	int ret;
2073 
2074 	if (tr->stop_count)
2075 		return;
2076 
2077 	WARN_ON_ONCE(!irqs_disabled());
2078 	if (!tr->allocated_snapshot) {
2079 		/* Only the nop tracer should hit this when disabling */
2080 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2081 		return;
2082 	}
2083 
2084 	arch_spin_lock(&tr->max_lock);
2085 
2086 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2087 
2088 	if (ret == -EBUSY) {
2089 		/*
2090 		 * We failed to swap the buffer due to a commit taking
2091 		 * place on this CPU. We fail to record, but we reset
2092 		 * the max trace buffer (no one writes directly to it)
2093 		 * and flag that it failed.
2094 		 * Another reason is resize is in progress.
2095 		 */
2096 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2097 			"Failed to swap buffers due to commit or resize in progress\n");
2098 	}
2099 
2100 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2101 
2102 	__update_max_tr(tr, tsk, cpu);
2103 	arch_spin_unlock(&tr->max_lock);
2104 }
2105 
2106 #endif /* CONFIG_TRACER_MAX_TRACE */
2107 
2108 struct pipe_wait {
2109 	struct trace_iterator		*iter;
2110 	int				wait_index;
2111 };
2112 
wait_pipe_cond(void * data)2113 static bool wait_pipe_cond(void *data)
2114 {
2115 	struct pipe_wait *pwait = data;
2116 	struct trace_iterator *iter = pwait->iter;
2117 
2118 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2119 		return true;
2120 
2121 	return iter->closed;
2122 }
2123 
wait_on_pipe(struct trace_iterator * iter,int full)2124 static int wait_on_pipe(struct trace_iterator *iter, int full)
2125 {
2126 	struct pipe_wait pwait;
2127 	int ret;
2128 
2129 	/* Iterators are static, they should be filled or empty */
2130 	if (trace_buffer_iter(iter, iter->cpu_file))
2131 		return 0;
2132 
2133 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2134 	pwait.iter = iter;
2135 
2136 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2137 			       wait_pipe_cond, &pwait);
2138 
2139 #ifdef CONFIG_TRACER_MAX_TRACE
2140 	/*
2141 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2142 	 * to happen, this would now be the main buffer.
2143 	 */
2144 	if (iter->snapshot)
2145 		iter->array_buffer = &iter->tr->max_buffer;
2146 #endif
2147 	return ret;
2148 }
2149 
2150 #ifdef CONFIG_FTRACE_STARTUP_TEST
2151 static bool selftests_can_run;
2152 
2153 struct trace_selftests {
2154 	struct list_head		list;
2155 	struct tracer			*type;
2156 };
2157 
2158 static LIST_HEAD(postponed_selftests);
2159 
save_selftest(struct tracer * type)2160 static int save_selftest(struct tracer *type)
2161 {
2162 	struct trace_selftests *selftest;
2163 
2164 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2165 	if (!selftest)
2166 		return -ENOMEM;
2167 
2168 	selftest->type = type;
2169 	list_add(&selftest->list, &postponed_selftests);
2170 	return 0;
2171 }
2172 
run_tracer_selftest(struct tracer * type)2173 static int run_tracer_selftest(struct tracer *type)
2174 {
2175 	struct trace_array *tr = &global_trace;
2176 	struct tracer *saved_tracer = tr->current_trace;
2177 	int ret;
2178 
2179 	if (!type->selftest || tracing_selftest_disabled)
2180 		return 0;
2181 
2182 	/*
2183 	 * If a tracer registers early in boot up (before scheduling is
2184 	 * initialized and such), then do not run its selftests yet.
2185 	 * Instead, run it a little later in the boot process.
2186 	 */
2187 	if (!selftests_can_run)
2188 		return save_selftest(type);
2189 
2190 	if (!tracing_is_on()) {
2191 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2192 			type->name);
2193 		return 0;
2194 	}
2195 
2196 	/*
2197 	 * Run a selftest on this tracer.
2198 	 * Here we reset the trace buffer, and set the current
2199 	 * tracer to be this tracer. The tracer can then run some
2200 	 * internal tracing to verify that everything is in order.
2201 	 * If we fail, we do not register this tracer.
2202 	 */
2203 	tracing_reset_online_cpus(&tr->array_buffer);
2204 
2205 	tr->current_trace = type;
2206 
2207 #ifdef CONFIG_TRACER_MAX_TRACE
2208 	if (type->use_max_tr) {
2209 		/* If we expanded the buffers, make sure the max is expanded too */
2210 		if (tr->ring_buffer_expanded)
2211 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2212 					   RING_BUFFER_ALL_CPUS);
2213 		tr->allocated_snapshot = true;
2214 	}
2215 #endif
2216 
2217 	/* the test is responsible for initializing and enabling */
2218 	pr_info("Testing tracer %s: ", type->name);
2219 	ret = type->selftest(type, tr);
2220 	/* the test is responsible for resetting too */
2221 	tr->current_trace = saved_tracer;
2222 	if (ret) {
2223 		printk(KERN_CONT "FAILED!\n");
2224 		/* Add the warning after printing 'FAILED' */
2225 		WARN_ON(1);
2226 		return -1;
2227 	}
2228 	/* Only reset on passing, to avoid touching corrupted buffers */
2229 	tracing_reset_online_cpus(&tr->array_buffer);
2230 
2231 #ifdef CONFIG_TRACER_MAX_TRACE
2232 	if (type->use_max_tr) {
2233 		tr->allocated_snapshot = false;
2234 
2235 		/* Shrink the max buffer again */
2236 		if (tr->ring_buffer_expanded)
2237 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2238 					   RING_BUFFER_ALL_CPUS);
2239 	}
2240 #endif
2241 
2242 	printk(KERN_CONT "PASSED\n");
2243 	return 0;
2244 }
2245 
do_run_tracer_selftest(struct tracer * type)2246 static int do_run_tracer_selftest(struct tracer *type)
2247 {
2248 	int ret;
2249 
2250 	/*
2251 	 * Tests can take a long time, especially if they are run one after the
2252 	 * other, as does happen during bootup when all the tracers are
2253 	 * registered. This could cause the soft lockup watchdog to trigger.
2254 	 */
2255 	cond_resched();
2256 
2257 	tracing_selftest_running = true;
2258 	ret = run_tracer_selftest(type);
2259 	tracing_selftest_running = false;
2260 
2261 	return ret;
2262 }
2263 
init_trace_selftests(void)2264 static __init int init_trace_selftests(void)
2265 {
2266 	struct trace_selftests *p, *n;
2267 	struct tracer *t, **last;
2268 	int ret;
2269 
2270 	selftests_can_run = true;
2271 
2272 	guard(mutex)(&trace_types_lock);
2273 
2274 	if (list_empty(&postponed_selftests))
2275 		return 0;
2276 
2277 	pr_info("Running postponed tracer tests:\n");
2278 
2279 	tracing_selftest_running = true;
2280 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2281 		/* This loop can take minutes when sanitizers are enabled, so
2282 		 * lets make sure we allow RCU processing.
2283 		 */
2284 		cond_resched();
2285 		ret = run_tracer_selftest(p->type);
2286 		/* If the test fails, then warn and remove from available_tracers */
2287 		if (ret < 0) {
2288 			WARN(1, "tracer: %s failed selftest, disabling\n",
2289 			     p->type->name);
2290 			last = &trace_types;
2291 			for (t = trace_types; t; t = t->next) {
2292 				if (t == p->type) {
2293 					*last = t->next;
2294 					break;
2295 				}
2296 				last = &t->next;
2297 			}
2298 		}
2299 		list_del(&p->list);
2300 		kfree(p);
2301 	}
2302 	tracing_selftest_running = false;
2303 
2304 	return 0;
2305 }
2306 core_initcall(init_trace_selftests);
2307 #else
do_run_tracer_selftest(struct tracer * type)2308 static inline int do_run_tracer_selftest(struct tracer *type)
2309 {
2310 	return 0;
2311 }
2312 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2313 
2314 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2315 
2316 static void __init apply_trace_boot_options(void);
2317 
2318 /**
2319  * register_tracer - register a tracer with the ftrace system.
2320  * @type: the plugin for the tracer
2321  *
2322  * Register a new plugin tracer.
2323  */
register_tracer(struct tracer * type)2324 int __init register_tracer(struct tracer *type)
2325 {
2326 	struct tracer *t;
2327 	int ret = 0;
2328 
2329 	if (!type->name) {
2330 		pr_info("Tracer must have a name\n");
2331 		return -1;
2332 	}
2333 
2334 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2335 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2336 		return -1;
2337 	}
2338 
2339 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2340 		pr_warn("Can not register tracer %s due to lockdown\n",
2341 			   type->name);
2342 		return -EPERM;
2343 	}
2344 
2345 	mutex_lock(&trace_types_lock);
2346 
2347 	for (t = trace_types; t; t = t->next) {
2348 		if (strcmp(type->name, t->name) == 0) {
2349 			/* already found */
2350 			pr_info("Tracer %s already registered\n",
2351 				type->name);
2352 			ret = -1;
2353 			goto out;
2354 		}
2355 	}
2356 
2357 	if (!type->set_flag)
2358 		type->set_flag = &dummy_set_flag;
2359 	if (!type->flags) {
2360 		/*allocate a dummy tracer_flags*/
2361 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2362 		if (!type->flags) {
2363 			ret = -ENOMEM;
2364 			goto out;
2365 		}
2366 		type->flags->val = 0;
2367 		type->flags->opts = dummy_tracer_opt;
2368 	} else
2369 		if (!type->flags->opts)
2370 			type->flags->opts = dummy_tracer_opt;
2371 
2372 	/* store the tracer for __set_tracer_option */
2373 	type->flags->trace = type;
2374 
2375 	ret = do_run_tracer_selftest(type);
2376 	if (ret < 0)
2377 		goto out;
2378 
2379 	type->next = trace_types;
2380 	trace_types = type;
2381 	add_tracer_options(&global_trace, type);
2382 
2383  out:
2384 	mutex_unlock(&trace_types_lock);
2385 
2386 	if (ret || !default_bootup_tracer)
2387 		return ret;
2388 
2389 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2390 		return 0;
2391 
2392 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2393 	/* Do we want this tracer to start on bootup? */
2394 	tracing_set_tracer(&global_trace, type->name);
2395 	default_bootup_tracer = NULL;
2396 
2397 	apply_trace_boot_options();
2398 
2399 	/* disable other selftests, since this will break it. */
2400 	disable_tracing_selftest("running a tracer");
2401 
2402 	return 0;
2403 }
2404 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2405 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2406 {
2407 	struct trace_buffer *buffer = buf->buffer;
2408 
2409 	if (!buffer)
2410 		return;
2411 
2412 	ring_buffer_record_disable(buffer);
2413 
2414 	/* Make sure all commits have finished */
2415 	synchronize_rcu();
2416 	ring_buffer_reset_cpu(buffer, cpu);
2417 
2418 	ring_buffer_record_enable(buffer);
2419 }
2420 
tracing_reset_online_cpus(struct array_buffer * buf)2421 void tracing_reset_online_cpus(struct array_buffer *buf)
2422 {
2423 	struct trace_buffer *buffer = buf->buffer;
2424 
2425 	if (!buffer)
2426 		return;
2427 
2428 	ring_buffer_record_disable(buffer);
2429 
2430 	/* Make sure all commits have finished */
2431 	synchronize_rcu();
2432 
2433 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2434 
2435 	ring_buffer_reset_online_cpus(buffer);
2436 
2437 	ring_buffer_record_enable(buffer);
2438 }
2439 
tracing_reset_all_cpus(struct array_buffer * buf)2440 static void tracing_reset_all_cpus(struct array_buffer *buf)
2441 {
2442 	struct trace_buffer *buffer = buf->buffer;
2443 
2444 	if (!buffer)
2445 		return;
2446 
2447 	ring_buffer_record_disable(buffer);
2448 
2449 	/* Make sure all commits have finished */
2450 	synchronize_rcu();
2451 
2452 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2453 
2454 	ring_buffer_reset(buffer);
2455 
2456 	ring_buffer_record_enable(buffer);
2457 }
2458 
2459 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2460 void tracing_reset_all_online_cpus_unlocked(void)
2461 {
2462 	struct trace_array *tr;
2463 
2464 	lockdep_assert_held(&trace_types_lock);
2465 
2466 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2467 		if (!tr->clear_trace)
2468 			continue;
2469 		tr->clear_trace = false;
2470 		tracing_reset_online_cpus(&tr->array_buffer);
2471 #ifdef CONFIG_TRACER_MAX_TRACE
2472 		tracing_reset_online_cpus(&tr->max_buffer);
2473 #endif
2474 	}
2475 }
2476 
tracing_reset_all_online_cpus(void)2477 void tracing_reset_all_online_cpus(void)
2478 {
2479 	guard(mutex)(&trace_types_lock);
2480 	tracing_reset_all_online_cpus_unlocked();
2481 }
2482 
is_tracing_stopped(void)2483 int is_tracing_stopped(void)
2484 {
2485 	return global_trace.stop_count;
2486 }
2487 
tracing_start_tr(struct trace_array * tr)2488 static void tracing_start_tr(struct trace_array *tr)
2489 {
2490 	struct trace_buffer *buffer;
2491 
2492 	if (tracing_disabled)
2493 		return;
2494 
2495 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2496 	if (--tr->stop_count) {
2497 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2498 			/* Someone screwed up their debugging */
2499 			tr->stop_count = 0;
2500 		}
2501 		return;
2502 	}
2503 
2504 	/* Prevent the buffers from switching */
2505 	arch_spin_lock(&tr->max_lock);
2506 
2507 	buffer = tr->array_buffer.buffer;
2508 	if (buffer)
2509 		ring_buffer_record_enable(buffer);
2510 
2511 #ifdef CONFIG_TRACER_MAX_TRACE
2512 	buffer = tr->max_buffer.buffer;
2513 	if (buffer)
2514 		ring_buffer_record_enable(buffer);
2515 #endif
2516 
2517 	arch_spin_unlock(&tr->max_lock);
2518 }
2519 
2520 /**
2521  * tracing_start - quick start of the tracer
2522  *
2523  * If tracing is enabled but was stopped by tracing_stop,
2524  * this will start the tracer back up.
2525  */
tracing_start(void)2526 void tracing_start(void)
2527 
2528 {
2529 	return tracing_start_tr(&global_trace);
2530 }
2531 
tracing_stop_tr(struct trace_array * tr)2532 static void tracing_stop_tr(struct trace_array *tr)
2533 {
2534 	struct trace_buffer *buffer;
2535 
2536 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2537 	if (tr->stop_count++)
2538 		return;
2539 
2540 	/* Prevent the buffers from switching */
2541 	arch_spin_lock(&tr->max_lock);
2542 
2543 	buffer = tr->array_buffer.buffer;
2544 	if (buffer)
2545 		ring_buffer_record_disable(buffer);
2546 
2547 #ifdef CONFIG_TRACER_MAX_TRACE
2548 	buffer = tr->max_buffer.buffer;
2549 	if (buffer)
2550 		ring_buffer_record_disable(buffer);
2551 #endif
2552 
2553 	arch_spin_unlock(&tr->max_lock);
2554 }
2555 
2556 /**
2557  * tracing_stop - quick stop of the tracer
2558  *
2559  * Light weight way to stop tracing. Use in conjunction with
2560  * tracing_start.
2561  */
tracing_stop(void)2562 void tracing_stop(void)
2563 {
2564 	return tracing_stop_tr(&global_trace);
2565 }
2566 
2567 /*
2568  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2569  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2570  * simplifies those functions and keeps them in sync.
2571  */
trace_handle_return(struct trace_seq * s)2572 enum print_line_t trace_handle_return(struct trace_seq *s)
2573 {
2574 	return trace_seq_has_overflowed(s) ?
2575 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2576 }
2577 EXPORT_SYMBOL_GPL(trace_handle_return);
2578 
migration_disable_value(void)2579 static unsigned short migration_disable_value(void)
2580 {
2581 #if defined(CONFIG_SMP)
2582 	return current->migration_disabled;
2583 #else
2584 	return 0;
2585 #endif
2586 }
2587 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2588 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2589 {
2590 	unsigned int trace_flags = irqs_status;
2591 	unsigned int pc;
2592 
2593 	pc = preempt_count();
2594 
2595 	if (pc & NMI_MASK)
2596 		trace_flags |= TRACE_FLAG_NMI;
2597 	if (pc & HARDIRQ_MASK)
2598 		trace_flags |= TRACE_FLAG_HARDIRQ;
2599 	if (in_serving_softirq())
2600 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2601 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2602 		trace_flags |= TRACE_FLAG_BH_OFF;
2603 
2604 	if (tif_need_resched())
2605 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2606 	if (test_preempt_need_resched())
2607 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2608 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2609 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2610 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2611 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2612 }
2613 
2614 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2615 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2616 			  int type,
2617 			  unsigned long len,
2618 			  unsigned int trace_ctx)
2619 {
2620 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2621 }
2622 
2623 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2624 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2625 static int trace_buffered_event_ref;
2626 
2627 /**
2628  * trace_buffered_event_enable - enable buffering events
2629  *
2630  * When events are being filtered, it is quicker to use a temporary
2631  * buffer to write the event data into if there's a likely chance
2632  * that it will not be committed. The discard of the ring buffer
2633  * is not as fast as committing, and is much slower than copying
2634  * a commit.
2635  *
2636  * When an event is to be filtered, allocate per cpu buffers to
2637  * write the event data into, and if the event is filtered and discarded
2638  * it is simply dropped, otherwise, the entire data is to be committed
2639  * in one shot.
2640  */
trace_buffered_event_enable(void)2641 void trace_buffered_event_enable(void)
2642 {
2643 	struct ring_buffer_event *event;
2644 	struct page *page;
2645 	int cpu;
2646 
2647 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2648 
2649 	if (trace_buffered_event_ref++)
2650 		return;
2651 
2652 	for_each_tracing_cpu(cpu) {
2653 		page = alloc_pages_node(cpu_to_node(cpu),
2654 					GFP_KERNEL | __GFP_NORETRY, 0);
2655 		/* This is just an optimization and can handle failures */
2656 		if (!page) {
2657 			pr_err("Failed to allocate event buffer\n");
2658 			break;
2659 		}
2660 
2661 		event = page_address(page);
2662 		memset(event, 0, sizeof(*event));
2663 
2664 		per_cpu(trace_buffered_event, cpu) = event;
2665 
2666 		scoped_guard(preempt,) {
2667 			if (cpu == smp_processor_id() &&
2668 			    __this_cpu_read(trace_buffered_event) !=
2669 			    per_cpu(trace_buffered_event, cpu))
2670 				WARN_ON_ONCE(1);
2671 		}
2672 	}
2673 }
2674 
enable_trace_buffered_event(void * data)2675 static void enable_trace_buffered_event(void *data)
2676 {
2677 	this_cpu_dec(trace_buffered_event_cnt);
2678 }
2679 
disable_trace_buffered_event(void * data)2680 static void disable_trace_buffered_event(void *data)
2681 {
2682 	this_cpu_inc(trace_buffered_event_cnt);
2683 }
2684 
2685 /**
2686  * trace_buffered_event_disable - disable buffering events
2687  *
2688  * When a filter is removed, it is faster to not use the buffered
2689  * events, and to commit directly into the ring buffer. Free up
2690  * the temp buffers when there are no more users. This requires
2691  * special synchronization with current events.
2692  */
trace_buffered_event_disable(void)2693 void trace_buffered_event_disable(void)
2694 {
2695 	int cpu;
2696 
2697 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2698 
2699 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2700 		return;
2701 
2702 	if (--trace_buffered_event_ref)
2703 		return;
2704 
2705 	/* For each CPU, set the buffer as used. */
2706 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2707 			 NULL, true);
2708 
2709 	/* Wait for all current users to finish */
2710 	synchronize_rcu();
2711 
2712 	for_each_tracing_cpu(cpu) {
2713 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2714 		per_cpu(trace_buffered_event, cpu) = NULL;
2715 	}
2716 
2717 	/*
2718 	 * Wait for all CPUs that potentially started checking if they can use
2719 	 * their event buffer only after the previous synchronize_rcu() call and
2720 	 * they still read a valid pointer from trace_buffered_event. It must be
2721 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2722 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2723 	 */
2724 	synchronize_rcu();
2725 
2726 	/* For each CPU, relinquish the buffer */
2727 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2728 			 true);
2729 }
2730 
2731 static struct trace_buffer *temp_buffer;
2732 
2733 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2734 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2735 			  struct trace_event_file *trace_file,
2736 			  int type, unsigned long len,
2737 			  unsigned int trace_ctx)
2738 {
2739 	struct ring_buffer_event *entry;
2740 	struct trace_array *tr = trace_file->tr;
2741 	int val;
2742 
2743 	*current_rb = tr->array_buffer.buffer;
2744 
2745 	if (!tr->no_filter_buffering_ref &&
2746 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2747 		preempt_disable_notrace();
2748 		/*
2749 		 * Filtering is on, so try to use the per cpu buffer first.
2750 		 * This buffer will simulate a ring_buffer_event,
2751 		 * where the type_len is zero and the array[0] will
2752 		 * hold the full length.
2753 		 * (see include/linux/ring-buffer.h for details on
2754 		 *  how the ring_buffer_event is structured).
2755 		 *
2756 		 * Using a temp buffer during filtering and copying it
2757 		 * on a matched filter is quicker than writing directly
2758 		 * into the ring buffer and then discarding it when
2759 		 * it doesn't match. That is because the discard
2760 		 * requires several atomic operations to get right.
2761 		 * Copying on match and doing nothing on a failed match
2762 		 * is still quicker than no copy on match, but having
2763 		 * to discard out of the ring buffer on a failed match.
2764 		 */
2765 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2766 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2767 
2768 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2769 
2770 			/*
2771 			 * Preemption is disabled, but interrupts and NMIs
2772 			 * can still come in now. If that happens after
2773 			 * the above increment, then it will have to go
2774 			 * back to the old method of allocating the event
2775 			 * on the ring buffer, and if the filter fails, it
2776 			 * will have to call ring_buffer_discard_commit()
2777 			 * to remove it.
2778 			 *
2779 			 * Need to also check the unlikely case that the
2780 			 * length is bigger than the temp buffer size.
2781 			 * If that happens, then the reserve is pretty much
2782 			 * guaranteed to fail, as the ring buffer currently
2783 			 * only allows events less than a page. But that may
2784 			 * change in the future, so let the ring buffer reserve
2785 			 * handle the failure in that case.
2786 			 */
2787 			if (val == 1 && likely(len <= max_len)) {
2788 				trace_event_setup(entry, type, trace_ctx);
2789 				entry->array[0] = len;
2790 				/* Return with preemption disabled */
2791 				return entry;
2792 			}
2793 			this_cpu_dec(trace_buffered_event_cnt);
2794 		}
2795 		/* __trace_buffer_lock_reserve() disables preemption */
2796 		preempt_enable_notrace();
2797 	}
2798 
2799 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2800 					    trace_ctx);
2801 	/*
2802 	 * If tracing is off, but we have triggers enabled
2803 	 * we still need to look at the event data. Use the temp_buffer
2804 	 * to store the trace event for the trigger to use. It's recursive
2805 	 * safe and will not be recorded anywhere.
2806 	 */
2807 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2808 		*current_rb = temp_buffer;
2809 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2810 						    trace_ctx);
2811 	}
2812 	return entry;
2813 }
2814 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2815 
2816 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2817 static DEFINE_MUTEX(tracepoint_printk_mutex);
2818 
output_printk(struct trace_event_buffer * fbuffer)2819 static void output_printk(struct trace_event_buffer *fbuffer)
2820 {
2821 	struct trace_event_call *event_call;
2822 	struct trace_event_file *file;
2823 	struct trace_event *event;
2824 	unsigned long flags;
2825 	struct trace_iterator *iter = tracepoint_print_iter;
2826 
2827 	/* We should never get here if iter is NULL */
2828 	if (WARN_ON_ONCE(!iter))
2829 		return;
2830 
2831 	event_call = fbuffer->trace_file->event_call;
2832 	if (!event_call || !event_call->event.funcs ||
2833 	    !event_call->event.funcs->trace)
2834 		return;
2835 
2836 	file = fbuffer->trace_file;
2837 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2838 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2839 	     !filter_match_preds(file->filter, fbuffer->entry)))
2840 		return;
2841 
2842 	event = &fbuffer->trace_file->event_call->event;
2843 
2844 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2845 	trace_seq_init(&iter->seq);
2846 	iter->ent = fbuffer->entry;
2847 	event_call->event.funcs->trace(iter, 0, event);
2848 	trace_seq_putc(&iter->seq, 0);
2849 	printk("%s", iter->seq.buffer);
2850 
2851 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2852 }
2853 
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2854 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2855 			     void *buffer, size_t *lenp,
2856 			     loff_t *ppos)
2857 {
2858 	int save_tracepoint_printk;
2859 	int ret;
2860 
2861 	guard(mutex)(&tracepoint_printk_mutex);
2862 	save_tracepoint_printk = tracepoint_printk;
2863 
2864 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2865 
2866 	/*
2867 	 * This will force exiting early, as tracepoint_printk
2868 	 * is always zero when tracepoint_printk_iter is not allocated
2869 	 */
2870 	if (!tracepoint_print_iter)
2871 		tracepoint_printk = 0;
2872 
2873 	if (save_tracepoint_printk == tracepoint_printk)
2874 		return ret;
2875 
2876 	if (tracepoint_printk)
2877 		static_key_enable(&tracepoint_printk_key.key);
2878 	else
2879 		static_key_disable(&tracepoint_printk_key.key);
2880 
2881 	return ret;
2882 }
2883 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2884 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2885 {
2886 	enum event_trigger_type tt = ETT_NONE;
2887 	struct trace_event_file *file = fbuffer->trace_file;
2888 
2889 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2890 			fbuffer->entry, &tt))
2891 		goto discard;
2892 
2893 	if (static_key_false(&tracepoint_printk_key.key))
2894 		output_printk(fbuffer);
2895 
2896 	if (static_branch_unlikely(&trace_event_exports_enabled))
2897 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2898 
2899 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2900 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2901 
2902 discard:
2903 	if (tt)
2904 		event_triggers_post_call(file, tt);
2905 
2906 }
2907 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2908 
2909 /*
2910  * Skip 3:
2911  *
2912  *   trace_buffer_unlock_commit_regs()
2913  *   trace_event_buffer_commit()
2914  *   trace_event_raw_event_xxx()
2915  */
2916 # define STACK_SKIP 3
2917 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2918 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2919 				     struct trace_buffer *buffer,
2920 				     struct ring_buffer_event *event,
2921 				     unsigned int trace_ctx,
2922 				     struct pt_regs *regs)
2923 {
2924 	__buffer_unlock_commit(buffer, event);
2925 
2926 	/*
2927 	 * If regs is not set, then skip the necessary functions.
2928 	 * Note, we can still get here via blktrace, wakeup tracer
2929 	 * and mmiotrace, but that's ok if they lose a function or
2930 	 * two. They are not that meaningful.
2931 	 */
2932 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2933 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2934 }
2935 
2936 /*
2937  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2938  */
2939 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2940 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2941 				   struct ring_buffer_event *event)
2942 {
2943 	__buffer_unlock_commit(buffer, event);
2944 }
2945 
2946 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2947 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2948 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2949 {
2950 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2951 	struct ring_buffer_event *event;
2952 	struct ftrace_entry *entry;
2953 	int size = sizeof(*entry);
2954 
2955 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2956 
2957 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2958 					    trace_ctx);
2959 	if (!event)
2960 		return;
2961 	entry	= ring_buffer_event_data(event);
2962 	entry->ip			= ip;
2963 	entry->parent_ip		= parent_ip;
2964 
2965 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2966 	if (fregs) {
2967 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2968 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2969 	}
2970 #endif
2971 
2972 	if (static_branch_unlikely(&trace_function_exports_enabled))
2973 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2974 	__buffer_unlock_commit(buffer, event);
2975 }
2976 
2977 #ifdef CONFIG_STACKTRACE
2978 
2979 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2980 #define FTRACE_KSTACK_NESTING	4
2981 
2982 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2983 
2984 struct ftrace_stack {
2985 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2986 };
2987 
2988 
2989 struct ftrace_stacks {
2990 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2991 };
2992 
2993 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2994 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2995 
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2996 static void __ftrace_trace_stack(struct trace_array *tr,
2997 				 struct trace_buffer *buffer,
2998 				 unsigned int trace_ctx,
2999 				 int skip, struct pt_regs *regs)
3000 {
3001 	struct ring_buffer_event *event;
3002 	unsigned int size, nr_entries;
3003 	struct ftrace_stack *fstack;
3004 	struct stack_entry *entry;
3005 	int stackidx;
3006 
3007 	/*
3008 	 * Add one, for this function and the call to save_stack_trace()
3009 	 * If regs is set, then these functions will not be in the way.
3010 	 */
3011 #ifndef CONFIG_UNWINDER_ORC
3012 	if (!regs)
3013 		skip++;
3014 #endif
3015 
3016 	guard(preempt_notrace)();
3017 
3018 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3019 
3020 	/* This should never happen. If it does, yell once and skip */
3021 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3022 		goto out;
3023 
3024 	/*
3025 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3026 	 * interrupt will either see the value pre increment or post
3027 	 * increment. If the interrupt happens pre increment it will have
3028 	 * restored the counter when it returns.  We just need a barrier to
3029 	 * keep gcc from moving things around.
3030 	 */
3031 	barrier();
3032 
3033 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3034 	size = ARRAY_SIZE(fstack->calls);
3035 
3036 	if (regs) {
3037 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3038 						   size, skip);
3039 	} else {
3040 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3041 	}
3042 
3043 #ifdef CONFIG_DYNAMIC_FTRACE
3044 	/* Mark entry of stack trace as trampoline code */
3045 	if (tr->ops && tr->ops->trampoline) {
3046 		unsigned long tramp_start = tr->ops->trampoline;
3047 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3048 		unsigned long *calls = fstack->calls;
3049 
3050 		for (int i = 0; i < nr_entries; i++) {
3051 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
3052 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
3053 		}
3054 	}
3055 #endif
3056 
3057 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3058 				    struct_size(entry, caller, nr_entries),
3059 				    trace_ctx);
3060 	if (!event)
3061 		goto out;
3062 	entry = ring_buffer_event_data(event);
3063 
3064 	entry->size = nr_entries;
3065 	memcpy(&entry->caller, fstack->calls,
3066 	       flex_array_size(entry, caller, nr_entries));
3067 
3068 	__buffer_unlock_commit(buffer, event);
3069 
3070  out:
3071 	/* Again, don't let gcc optimize things here */
3072 	barrier();
3073 	__this_cpu_dec(ftrace_stack_reserve);
3074 }
3075 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3076 static inline void ftrace_trace_stack(struct trace_array *tr,
3077 				      struct trace_buffer *buffer,
3078 				      unsigned int trace_ctx,
3079 				      int skip, struct pt_regs *regs)
3080 {
3081 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3082 		return;
3083 
3084 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3085 }
3086 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3087 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3088 		   int skip)
3089 {
3090 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3091 
3092 	if (rcu_is_watching()) {
3093 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3094 		return;
3095 	}
3096 
3097 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3098 		return;
3099 
3100 	/*
3101 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3102 	 * but if the above rcu_is_watching() failed, then the NMI
3103 	 * triggered someplace critical, and ct_irq_enter() should
3104 	 * not be called from NMI.
3105 	 */
3106 	if (unlikely(in_nmi()))
3107 		return;
3108 
3109 	ct_irq_enter_irqson();
3110 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3111 	ct_irq_exit_irqson();
3112 }
3113 
3114 /**
3115  * trace_dump_stack - record a stack back trace in the trace buffer
3116  * @skip: Number of functions to skip (helper handlers)
3117  */
trace_dump_stack(int skip)3118 void trace_dump_stack(int skip)
3119 {
3120 	if (tracing_disabled || tracing_selftest_running)
3121 		return;
3122 
3123 #ifndef CONFIG_UNWINDER_ORC
3124 	/* Skip 1 to skip this function. */
3125 	skip++;
3126 #endif
3127 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3128 				tracing_gen_ctx(), skip, NULL);
3129 }
3130 EXPORT_SYMBOL_GPL(trace_dump_stack);
3131 
3132 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3133 static DEFINE_PER_CPU(int, user_stack_count);
3134 
3135 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3136 ftrace_trace_userstack(struct trace_array *tr,
3137 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3138 {
3139 	struct ring_buffer_event *event;
3140 	struct userstack_entry *entry;
3141 
3142 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3143 		return;
3144 
3145 	/*
3146 	 * NMIs can not handle page faults, even with fix ups.
3147 	 * The save user stack can (and often does) fault.
3148 	 */
3149 	if (unlikely(in_nmi()))
3150 		return;
3151 
3152 	/*
3153 	 * prevent recursion, since the user stack tracing may
3154 	 * trigger other kernel events.
3155 	 */
3156 	guard(preempt)();
3157 	if (__this_cpu_read(user_stack_count))
3158 		return;
3159 
3160 	__this_cpu_inc(user_stack_count);
3161 
3162 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3163 					    sizeof(*entry), trace_ctx);
3164 	if (!event)
3165 		goto out_drop_count;
3166 	entry	= ring_buffer_event_data(event);
3167 
3168 	entry->tgid		= current->tgid;
3169 	memset(&entry->caller, 0, sizeof(entry->caller));
3170 
3171 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3172 	__buffer_unlock_commit(buffer, event);
3173 
3174  out_drop_count:
3175 	__this_cpu_dec(user_stack_count);
3176 }
3177 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3178 static void ftrace_trace_userstack(struct trace_array *tr,
3179 				   struct trace_buffer *buffer,
3180 				   unsigned int trace_ctx)
3181 {
3182 }
3183 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3184 
3185 #endif /* CONFIG_STACKTRACE */
3186 
3187 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3188 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3189 			  unsigned long long delta)
3190 {
3191 	entry->bottom_delta_ts = delta & U32_MAX;
3192 	entry->top_delta_ts = (delta >> 32);
3193 }
3194 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3195 void trace_last_func_repeats(struct trace_array *tr,
3196 			     struct trace_func_repeats *last_info,
3197 			     unsigned int trace_ctx)
3198 {
3199 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3200 	struct func_repeats_entry *entry;
3201 	struct ring_buffer_event *event;
3202 	u64 delta;
3203 
3204 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3205 					    sizeof(*entry), trace_ctx);
3206 	if (!event)
3207 		return;
3208 
3209 	delta = ring_buffer_event_time_stamp(buffer, event) -
3210 		last_info->ts_last_call;
3211 
3212 	entry = ring_buffer_event_data(event);
3213 	entry->ip = last_info->ip;
3214 	entry->parent_ip = last_info->parent_ip;
3215 	entry->count = last_info->count;
3216 	func_repeats_set_delta_ts(entry, delta);
3217 
3218 	__buffer_unlock_commit(buffer, event);
3219 }
3220 
3221 /* created for use with alloc_percpu */
3222 struct trace_buffer_struct {
3223 	int nesting;
3224 	char buffer[4][TRACE_BUF_SIZE];
3225 };
3226 
3227 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3228 
3229 /*
3230  * This allows for lockless recording.  If we're nested too deeply, then
3231  * this returns NULL.
3232  */
get_trace_buf(void)3233 static char *get_trace_buf(void)
3234 {
3235 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3236 
3237 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3238 		return NULL;
3239 
3240 	buffer->nesting++;
3241 
3242 	/* Interrupts must see nesting incremented before we use the buffer */
3243 	barrier();
3244 	return &buffer->buffer[buffer->nesting - 1][0];
3245 }
3246 
put_trace_buf(void)3247 static void put_trace_buf(void)
3248 {
3249 	/* Don't let the decrement of nesting leak before this */
3250 	barrier();
3251 	this_cpu_dec(trace_percpu_buffer->nesting);
3252 }
3253 
alloc_percpu_trace_buffer(void)3254 static int alloc_percpu_trace_buffer(void)
3255 {
3256 	struct trace_buffer_struct __percpu *buffers;
3257 
3258 	if (trace_percpu_buffer)
3259 		return 0;
3260 
3261 	buffers = alloc_percpu(struct trace_buffer_struct);
3262 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3263 		return -ENOMEM;
3264 
3265 	trace_percpu_buffer = buffers;
3266 	return 0;
3267 }
3268 
3269 static int buffers_allocated;
3270 
trace_printk_init_buffers(void)3271 void trace_printk_init_buffers(void)
3272 {
3273 	if (buffers_allocated)
3274 		return;
3275 
3276 	if (alloc_percpu_trace_buffer())
3277 		return;
3278 
3279 	/* trace_printk() is for debug use only. Don't use it in production. */
3280 
3281 	pr_warn("\n");
3282 	pr_warn("**********************************************************\n");
3283 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3284 	pr_warn("**                                                      **\n");
3285 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3286 	pr_warn("**                                                      **\n");
3287 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3288 	pr_warn("** unsafe for production use.                           **\n");
3289 	pr_warn("**                                                      **\n");
3290 	pr_warn("** If you see this message and you are not debugging    **\n");
3291 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3292 	pr_warn("**                                                      **\n");
3293 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3294 	pr_warn("**********************************************************\n");
3295 
3296 	/* Expand the buffers to set size */
3297 	tracing_update_buffers(&global_trace);
3298 
3299 	buffers_allocated = 1;
3300 
3301 	/*
3302 	 * trace_printk_init_buffers() can be called by modules.
3303 	 * If that happens, then we need to start cmdline recording
3304 	 * directly here. If the global_trace.buffer is already
3305 	 * allocated here, then this was called by module code.
3306 	 */
3307 	if (global_trace.array_buffer.buffer)
3308 		tracing_start_cmdline_record();
3309 }
3310 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3311 
trace_printk_start_comm(void)3312 void trace_printk_start_comm(void)
3313 {
3314 	/* Start tracing comms if trace printk is set */
3315 	if (!buffers_allocated)
3316 		return;
3317 	tracing_start_cmdline_record();
3318 }
3319 
trace_printk_start_stop_comm(int enabled)3320 static void trace_printk_start_stop_comm(int enabled)
3321 {
3322 	if (!buffers_allocated)
3323 		return;
3324 
3325 	if (enabled)
3326 		tracing_start_cmdline_record();
3327 	else
3328 		tracing_stop_cmdline_record();
3329 }
3330 
3331 /**
3332  * trace_vbprintk - write binary msg to tracing buffer
3333  * @ip:    The address of the caller
3334  * @fmt:   The string format to write to the buffer
3335  * @args:  Arguments for @fmt
3336  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3337 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3338 {
3339 	struct ring_buffer_event *event;
3340 	struct trace_buffer *buffer;
3341 	struct trace_array *tr = READ_ONCE(printk_trace);
3342 	struct bprint_entry *entry;
3343 	unsigned int trace_ctx;
3344 	char *tbuffer;
3345 	int len = 0, size;
3346 
3347 	if (!printk_binsafe(tr))
3348 		return trace_vprintk(ip, fmt, args);
3349 
3350 	if (unlikely(tracing_selftest_running || tracing_disabled))
3351 		return 0;
3352 
3353 	/* Don't pollute graph traces with trace_vprintk internals */
3354 	pause_graph_tracing();
3355 
3356 	trace_ctx = tracing_gen_ctx();
3357 	guard(preempt_notrace)();
3358 
3359 	tbuffer = get_trace_buf();
3360 	if (!tbuffer) {
3361 		len = 0;
3362 		goto out_nobuffer;
3363 	}
3364 
3365 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3366 
3367 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3368 		goto out_put;
3369 
3370 	size = sizeof(*entry) + sizeof(u32) * len;
3371 	buffer = tr->array_buffer.buffer;
3372 	scoped_guard(ring_buffer_nest, buffer) {
3373 		event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3374 						    trace_ctx);
3375 		if (!event)
3376 			goto out_put;
3377 		entry = ring_buffer_event_data(event);
3378 		entry->ip			= ip;
3379 		entry->fmt			= fmt;
3380 
3381 		memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3382 		__buffer_unlock_commit(buffer, event);
3383 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3384 	}
3385 out_put:
3386 	put_trace_buf();
3387 
3388 out_nobuffer:
3389 	unpause_graph_tracing();
3390 
3391 	return len;
3392 }
3393 EXPORT_SYMBOL_GPL(trace_vbprintk);
3394 
3395 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3396 int __trace_array_vprintk(struct trace_buffer *buffer,
3397 			  unsigned long ip, const char *fmt, va_list args)
3398 {
3399 	struct ring_buffer_event *event;
3400 	int len = 0, size;
3401 	struct print_entry *entry;
3402 	unsigned int trace_ctx;
3403 	char *tbuffer;
3404 
3405 	if (tracing_disabled)
3406 		return 0;
3407 
3408 	/* Don't pollute graph traces with trace_vprintk internals */
3409 	pause_graph_tracing();
3410 
3411 	trace_ctx = tracing_gen_ctx();
3412 	guard(preempt_notrace)();
3413 
3414 
3415 	tbuffer = get_trace_buf();
3416 	if (!tbuffer) {
3417 		len = 0;
3418 		goto out_nobuffer;
3419 	}
3420 
3421 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3422 
3423 	size = sizeof(*entry) + len + 1;
3424 	scoped_guard(ring_buffer_nest, buffer) {
3425 		event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3426 						    trace_ctx);
3427 		if (!event)
3428 			goto out;
3429 		entry = ring_buffer_event_data(event);
3430 		entry->ip = ip;
3431 
3432 		memcpy(&entry->buf, tbuffer, len + 1);
3433 		__buffer_unlock_commit(buffer, event);
3434 		ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3435 	}
3436 out:
3437 	put_trace_buf();
3438 
3439 out_nobuffer:
3440 	unpause_graph_tracing();
3441 
3442 	return len;
3443 }
3444 
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3445 int trace_array_vprintk(struct trace_array *tr,
3446 			unsigned long ip, const char *fmt, va_list args)
3447 {
3448 	if (tracing_selftest_running && tr == &global_trace)
3449 		return 0;
3450 
3451 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3452 }
3453 
3454 /**
3455  * trace_array_printk - Print a message to a specific instance
3456  * @tr: The instance trace_array descriptor
3457  * @ip: The instruction pointer that this is called from.
3458  * @fmt: The format to print (printf format)
3459  *
3460  * If a subsystem sets up its own instance, they have the right to
3461  * printk strings into their tracing instance buffer using this
3462  * function. Note, this function will not write into the top level
3463  * buffer (use trace_printk() for that), as writing into the top level
3464  * buffer should only have events that can be individually disabled.
3465  * trace_printk() is only used for debugging a kernel, and should not
3466  * be ever incorporated in normal use.
3467  *
3468  * trace_array_printk() can be used, as it will not add noise to the
3469  * top level tracing buffer.
3470  *
3471  * Note, trace_array_init_printk() must be called on @tr before this
3472  * can be used.
3473  */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3474 int trace_array_printk(struct trace_array *tr,
3475 		       unsigned long ip, const char *fmt, ...)
3476 {
3477 	int ret;
3478 	va_list ap;
3479 
3480 	if (!tr)
3481 		return -ENOENT;
3482 
3483 	/* This is only allowed for created instances */
3484 	if (tr == &global_trace)
3485 		return 0;
3486 
3487 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3488 		return 0;
3489 
3490 	va_start(ap, fmt);
3491 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3492 	va_end(ap);
3493 	return ret;
3494 }
3495 EXPORT_SYMBOL_GPL(trace_array_printk);
3496 
3497 /**
3498  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3499  * @tr: The trace array to initialize the buffers for
3500  *
3501  * As trace_array_printk() only writes into instances, they are OK to
3502  * have in the kernel (unlike trace_printk()). This needs to be called
3503  * before trace_array_printk() can be used on a trace_array.
3504  */
trace_array_init_printk(struct trace_array * tr)3505 int trace_array_init_printk(struct trace_array *tr)
3506 {
3507 	if (!tr)
3508 		return -ENOENT;
3509 
3510 	/* This is only allowed for created instances */
3511 	if (tr == &global_trace)
3512 		return -EINVAL;
3513 
3514 	return alloc_percpu_trace_buffer();
3515 }
3516 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3517 
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3518 int trace_array_printk_buf(struct trace_buffer *buffer,
3519 			   unsigned long ip, const char *fmt, ...)
3520 {
3521 	int ret;
3522 	va_list ap;
3523 
3524 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3525 		return 0;
3526 
3527 	va_start(ap, fmt);
3528 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3529 	va_end(ap);
3530 	return ret;
3531 }
3532 
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3533 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3534 {
3535 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3536 }
3537 EXPORT_SYMBOL_GPL(trace_vprintk);
3538 
trace_iterator_increment(struct trace_iterator * iter)3539 static void trace_iterator_increment(struct trace_iterator *iter)
3540 {
3541 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3542 
3543 	iter->idx++;
3544 	if (buf_iter)
3545 		ring_buffer_iter_advance(buf_iter);
3546 }
3547 
3548 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3549 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3550 		unsigned long *lost_events)
3551 {
3552 	struct ring_buffer_event *event;
3553 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3554 
3555 	if (buf_iter) {
3556 		event = ring_buffer_iter_peek(buf_iter, ts);
3557 		if (lost_events)
3558 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3559 				(unsigned long)-1 : 0;
3560 	} else {
3561 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3562 					 lost_events);
3563 	}
3564 
3565 	if (event) {
3566 		iter->ent_size = ring_buffer_event_length(event);
3567 		return ring_buffer_event_data(event);
3568 	}
3569 	iter->ent_size = 0;
3570 	return NULL;
3571 }
3572 
3573 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3574 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3575 		  unsigned long *missing_events, u64 *ent_ts)
3576 {
3577 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3578 	struct trace_entry *ent, *next = NULL;
3579 	unsigned long lost_events = 0, next_lost = 0;
3580 	int cpu_file = iter->cpu_file;
3581 	u64 next_ts = 0, ts;
3582 	int next_cpu = -1;
3583 	int next_size = 0;
3584 	int cpu;
3585 
3586 	/*
3587 	 * If we are in a per_cpu trace file, don't bother by iterating over
3588 	 * all cpu and peek directly.
3589 	 */
3590 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3591 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3592 			return NULL;
3593 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3594 		if (ent_cpu)
3595 			*ent_cpu = cpu_file;
3596 
3597 		return ent;
3598 	}
3599 
3600 	for_each_tracing_cpu(cpu) {
3601 
3602 		if (ring_buffer_empty_cpu(buffer, cpu))
3603 			continue;
3604 
3605 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3606 
3607 		/*
3608 		 * Pick the entry with the smallest timestamp:
3609 		 */
3610 		if (ent && (!next || ts < next_ts)) {
3611 			next = ent;
3612 			next_cpu = cpu;
3613 			next_ts = ts;
3614 			next_lost = lost_events;
3615 			next_size = iter->ent_size;
3616 		}
3617 	}
3618 
3619 	iter->ent_size = next_size;
3620 
3621 	if (ent_cpu)
3622 		*ent_cpu = next_cpu;
3623 
3624 	if (ent_ts)
3625 		*ent_ts = next_ts;
3626 
3627 	if (missing_events)
3628 		*missing_events = next_lost;
3629 
3630 	return next;
3631 }
3632 
3633 #define STATIC_FMT_BUF_SIZE	128
3634 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3635 
trace_iter_expand_format(struct trace_iterator * iter)3636 char *trace_iter_expand_format(struct trace_iterator *iter)
3637 {
3638 	char *tmp;
3639 
3640 	/*
3641 	 * iter->tr is NULL when used with tp_printk, which makes
3642 	 * this get called where it is not safe to call krealloc().
3643 	 */
3644 	if (!iter->tr || iter->fmt == static_fmt_buf)
3645 		return NULL;
3646 
3647 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3648 		       GFP_KERNEL);
3649 	if (tmp) {
3650 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3651 		iter->fmt = tmp;
3652 	}
3653 
3654 	return tmp;
3655 }
3656 
3657 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3658 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3659 {
3660 	unsigned long addr = (unsigned long)str;
3661 	struct trace_event *trace_event;
3662 	struct trace_event_call *event;
3663 
3664 	/* OK if part of the event data */
3665 	if ((addr >= (unsigned long)iter->ent) &&
3666 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3667 		return true;
3668 
3669 	/* OK if part of the temp seq buffer */
3670 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3671 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3672 		return true;
3673 
3674 	/* Core rodata can not be freed */
3675 	if (is_kernel_rodata(addr))
3676 		return true;
3677 
3678 	if (trace_is_tracepoint_string(str))
3679 		return true;
3680 
3681 	/*
3682 	 * Now this could be a module event, referencing core module
3683 	 * data, which is OK.
3684 	 */
3685 	if (!iter->ent)
3686 		return false;
3687 
3688 	trace_event = ftrace_find_event(iter->ent->type);
3689 	if (!trace_event)
3690 		return false;
3691 
3692 	event = container_of(trace_event, struct trace_event_call, event);
3693 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3694 		return false;
3695 
3696 	/* Would rather have rodata, but this will suffice */
3697 	if (within_module_core(addr, event->module))
3698 		return true;
3699 
3700 	return false;
3701 }
3702 
3703 /**
3704  * ignore_event - Check dereferenced fields while writing to the seq buffer
3705  * @iter: The iterator that holds the seq buffer and the event being printed
3706  *
3707  * At boot up, test_event_printk() will flag any event that dereferences
3708  * a string with "%s" that does exist in the ring buffer. It may still
3709  * be valid, as the string may point to a static string in the kernel
3710  * rodata that never gets freed. But if the string pointer is pointing
3711  * to something that was allocated, there's a chance that it can be freed
3712  * by the time the user reads the trace. This would cause a bad memory
3713  * access by the kernel and possibly crash the system.
3714  *
3715  * This function will check if the event has any fields flagged as needing
3716  * to be checked at runtime and perform those checks.
3717  *
3718  * If it is found that a field is unsafe, it will write into the @iter->seq
3719  * a message stating what was found to be unsafe.
3720  *
3721  * @return: true if the event is unsafe and should be ignored,
3722  *          false otherwise.
3723  */
ignore_event(struct trace_iterator * iter)3724 bool ignore_event(struct trace_iterator *iter)
3725 {
3726 	struct ftrace_event_field *field;
3727 	struct trace_event *trace_event;
3728 	struct trace_event_call *event;
3729 	struct list_head *head;
3730 	struct trace_seq *seq;
3731 	const void *ptr;
3732 
3733 	trace_event = ftrace_find_event(iter->ent->type);
3734 
3735 	seq = &iter->seq;
3736 
3737 	if (!trace_event) {
3738 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3739 		return true;
3740 	}
3741 
3742 	event = container_of(trace_event, struct trace_event_call, event);
3743 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3744 		return false;
3745 
3746 	head = trace_get_fields(event);
3747 	if (!head) {
3748 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3749 				 trace_event_name(event));
3750 		return true;
3751 	}
3752 
3753 	/* Offsets are from the iter->ent that points to the raw event */
3754 	ptr = iter->ent;
3755 
3756 	list_for_each_entry(field, head, link) {
3757 		const char *str;
3758 		bool good;
3759 
3760 		if (!field->needs_test)
3761 			continue;
3762 
3763 		str = *(const char **)(ptr + field->offset);
3764 
3765 		good = trace_safe_str(iter, str);
3766 
3767 		/*
3768 		 * If you hit this warning, it is likely that the
3769 		 * trace event in question used %s on a string that
3770 		 * was saved at the time of the event, but may not be
3771 		 * around when the trace is read. Use __string(),
3772 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3773 		 * instead. See samples/trace_events/trace-events-sample.h
3774 		 * for reference.
3775 		 */
3776 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3777 			      trace_event_name(event), field->name)) {
3778 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3779 					 trace_event_name(event), field->name);
3780 			return true;
3781 		}
3782 	}
3783 	return false;
3784 }
3785 
trace_event_format(struct trace_iterator * iter,const char * fmt)3786 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3787 {
3788 	const char *p, *new_fmt;
3789 	char *q;
3790 
3791 	if (WARN_ON_ONCE(!fmt))
3792 		return fmt;
3793 
3794 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3795 		return fmt;
3796 
3797 	p = fmt;
3798 	new_fmt = q = iter->fmt;
3799 	while (*p) {
3800 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3801 			if (!trace_iter_expand_format(iter))
3802 				return fmt;
3803 
3804 			q += iter->fmt - new_fmt;
3805 			new_fmt = iter->fmt;
3806 		}
3807 
3808 		*q++ = *p++;
3809 
3810 		/* Replace %p with %px */
3811 		if (p[-1] == '%') {
3812 			if (p[0] == '%') {
3813 				*q++ = *p++;
3814 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3815 				*q++ = *p++;
3816 				*q++ = 'x';
3817 			}
3818 		}
3819 	}
3820 	*q = '\0';
3821 
3822 	return new_fmt;
3823 }
3824 
3825 #define STATIC_TEMP_BUF_SIZE	128
3826 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3827 
3828 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3829 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3830 					  int *ent_cpu, u64 *ent_ts)
3831 {
3832 	/* __find_next_entry will reset ent_size */
3833 	int ent_size = iter->ent_size;
3834 	struct trace_entry *entry;
3835 
3836 	/*
3837 	 * If called from ftrace_dump(), then the iter->temp buffer
3838 	 * will be the static_temp_buf and not created from kmalloc.
3839 	 * If the entry size is greater than the buffer, we can
3840 	 * not save it. Just return NULL in that case. This is only
3841 	 * used to add markers when two consecutive events' time
3842 	 * stamps have a large delta. See trace_print_lat_context()
3843 	 */
3844 	if (iter->temp == static_temp_buf &&
3845 	    STATIC_TEMP_BUF_SIZE < ent_size)
3846 		return NULL;
3847 
3848 	/*
3849 	 * The __find_next_entry() may call peek_next_entry(), which may
3850 	 * call ring_buffer_peek() that may make the contents of iter->ent
3851 	 * undefined. Need to copy iter->ent now.
3852 	 */
3853 	if (iter->ent && iter->ent != iter->temp) {
3854 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3855 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3856 			void *temp;
3857 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3858 			if (!temp)
3859 				return NULL;
3860 			kfree(iter->temp);
3861 			iter->temp = temp;
3862 			iter->temp_size = iter->ent_size;
3863 		}
3864 		memcpy(iter->temp, iter->ent, iter->ent_size);
3865 		iter->ent = iter->temp;
3866 	}
3867 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3868 	/* Put back the original ent_size */
3869 	iter->ent_size = ent_size;
3870 
3871 	return entry;
3872 }
3873 
3874 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3875 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3876 {
3877 	iter->ent = __find_next_entry(iter, &iter->cpu,
3878 				      &iter->lost_events, &iter->ts);
3879 
3880 	if (iter->ent)
3881 		trace_iterator_increment(iter);
3882 
3883 	return iter->ent ? iter : NULL;
3884 }
3885 
trace_consume(struct trace_iterator * iter)3886 static void trace_consume(struct trace_iterator *iter)
3887 {
3888 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3889 			    &iter->lost_events);
3890 }
3891 
s_next(struct seq_file * m,void * v,loff_t * pos)3892 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3893 {
3894 	struct trace_iterator *iter = m->private;
3895 	int i = (int)*pos;
3896 	void *ent;
3897 
3898 	WARN_ON_ONCE(iter->leftover);
3899 
3900 	(*pos)++;
3901 
3902 	/* can't go backwards */
3903 	if (iter->idx > i)
3904 		return NULL;
3905 
3906 	if (iter->idx < 0)
3907 		ent = trace_find_next_entry_inc(iter);
3908 	else
3909 		ent = iter;
3910 
3911 	while (ent && iter->idx < i)
3912 		ent = trace_find_next_entry_inc(iter);
3913 
3914 	iter->pos = *pos;
3915 
3916 	return ent;
3917 }
3918 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3919 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3920 {
3921 	struct ring_buffer_iter *buf_iter;
3922 	unsigned long entries = 0;
3923 	u64 ts;
3924 
3925 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3926 
3927 	buf_iter = trace_buffer_iter(iter, cpu);
3928 	if (!buf_iter)
3929 		return;
3930 
3931 	ring_buffer_iter_reset(buf_iter);
3932 
3933 	/*
3934 	 * We could have the case with the max latency tracers
3935 	 * that a reset never took place on a cpu. This is evident
3936 	 * by the timestamp being before the start of the buffer.
3937 	 */
3938 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3939 		if (ts >= iter->array_buffer->time_start)
3940 			break;
3941 		entries++;
3942 		ring_buffer_iter_advance(buf_iter);
3943 		/* This could be a big loop */
3944 		cond_resched();
3945 	}
3946 
3947 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3948 }
3949 
3950 /*
3951  * The current tracer is copied to avoid a global locking
3952  * all around.
3953  */
s_start(struct seq_file * m,loff_t * pos)3954 static void *s_start(struct seq_file *m, loff_t *pos)
3955 {
3956 	struct trace_iterator *iter = m->private;
3957 	struct trace_array *tr = iter->tr;
3958 	int cpu_file = iter->cpu_file;
3959 	void *p = NULL;
3960 	loff_t l = 0;
3961 	int cpu;
3962 
3963 	mutex_lock(&trace_types_lock);
3964 	if (unlikely(tr->current_trace != iter->trace)) {
3965 		/* Close iter->trace before switching to the new current tracer */
3966 		if (iter->trace->close)
3967 			iter->trace->close(iter);
3968 		iter->trace = tr->current_trace;
3969 		/* Reopen the new current tracer */
3970 		if (iter->trace->open)
3971 			iter->trace->open(iter);
3972 	}
3973 	mutex_unlock(&trace_types_lock);
3974 
3975 #ifdef CONFIG_TRACER_MAX_TRACE
3976 	if (iter->snapshot && iter->trace->use_max_tr)
3977 		return ERR_PTR(-EBUSY);
3978 #endif
3979 
3980 	if (*pos != iter->pos) {
3981 		iter->ent = NULL;
3982 		iter->cpu = 0;
3983 		iter->idx = -1;
3984 
3985 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3986 			for_each_tracing_cpu(cpu)
3987 				tracing_iter_reset(iter, cpu);
3988 		} else
3989 			tracing_iter_reset(iter, cpu_file);
3990 
3991 		iter->leftover = 0;
3992 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3993 			;
3994 
3995 	} else {
3996 		/*
3997 		 * If we overflowed the seq_file before, then we want
3998 		 * to just reuse the trace_seq buffer again.
3999 		 */
4000 		if (iter->leftover)
4001 			p = iter;
4002 		else {
4003 			l = *pos - 1;
4004 			p = s_next(m, p, &l);
4005 		}
4006 	}
4007 
4008 	trace_event_read_lock();
4009 	trace_access_lock(cpu_file);
4010 	return p;
4011 }
4012 
s_stop(struct seq_file * m,void * p)4013 static void s_stop(struct seq_file *m, void *p)
4014 {
4015 	struct trace_iterator *iter = m->private;
4016 
4017 #ifdef CONFIG_TRACER_MAX_TRACE
4018 	if (iter->snapshot && iter->trace->use_max_tr)
4019 		return;
4020 #endif
4021 
4022 	trace_access_unlock(iter->cpu_file);
4023 	trace_event_read_unlock();
4024 }
4025 
4026 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4027 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4028 		      unsigned long *entries, int cpu)
4029 {
4030 	unsigned long count;
4031 
4032 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4033 	/*
4034 	 * If this buffer has skipped entries, then we hold all
4035 	 * entries for the trace and we need to ignore the
4036 	 * ones before the time stamp.
4037 	 */
4038 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4039 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4040 		/* total is the same as the entries */
4041 		*total = count;
4042 	} else
4043 		*total = count +
4044 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4045 	*entries = count;
4046 }
4047 
4048 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4049 get_total_entries(struct array_buffer *buf,
4050 		  unsigned long *total, unsigned long *entries)
4051 {
4052 	unsigned long t, e;
4053 	int cpu;
4054 
4055 	*total = 0;
4056 	*entries = 0;
4057 
4058 	for_each_tracing_cpu(cpu) {
4059 		get_total_entries_cpu(buf, &t, &e, cpu);
4060 		*total += t;
4061 		*entries += e;
4062 	}
4063 }
4064 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4065 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4066 {
4067 	unsigned long total, entries;
4068 
4069 	if (!tr)
4070 		tr = &global_trace;
4071 
4072 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4073 
4074 	return entries;
4075 }
4076 
trace_total_entries(struct trace_array * tr)4077 unsigned long trace_total_entries(struct trace_array *tr)
4078 {
4079 	unsigned long total, entries;
4080 
4081 	if (!tr)
4082 		tr = &global_trace;
4083 
4084 	get_total_entries(&tr->array_buffer, &total, &entries);
4085 
4086 	return entries;
4087 }
4088 
print_lat_help_header(struct seq_file * m)4089 static void print_lat_help_header(struct seq_file *m)
4090 {
4091 	seq_puts(m, "#                    _------=> CPU#            \n"
4092 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4093 		    "#                  | / _----=> need-resched    \n"
4094 		    "#                  || / _---=> hardirq/softirq \n"
4095 		    "#                  ||| / _--=> preempt-depth   \n"
4096 		    "#                  |||| / _-=> migrate-disable \n"
4097 		    "#                  ||||| /     delay           \n"
4098 		    "#  cmd     pid     |||||| time  |   caller     \n"
4099 		    "#     \\   /        ||||||  \\    |    /       \n");
4100 }
4101 
print_event_info(struct array_buffer * buf,struct seq_file * m)4102 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4103 {
4104 	unsigned long total;
4105 	unsigned long entries;
4106 
4107 	get_total_entries(buf, &total, &entries);
4108 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4109 		   entries, total, num_online_cpus());
4110 	seq_puts(m, "#\n");
4111 }
4112 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4113 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4114 				   unsigned int flags)
4115 {
4116 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4117 
4118 	print_event_info(buf, m);
4119 
4120 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4121 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4122 }
4123 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4124 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4125 				       unsigned int flags)
4126 {
4127 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4128 	static const char space[] = "            ";
4129 	int prec = tgid ? 12 : 2;
4130 
4131 	print_event_info(buf, m);
4132 
4133 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4134 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4135 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4136 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4137 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4138 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4139 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4140 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4141 }
4142 
4143 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4144 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4145 {
4146 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4147 	struct array_buffer *buf = iter->array_buffer;
4148 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4149 	struct tracer *type = iter->trace;
4150 	unsigned long entries;
4151 	unsigned long total;
4152 	const char *name = type->name;
4153 
4154 	get_total_entries(buf, &total, &entries);
4155 
4156 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4157 		   name, init_utsname()->release);
4158 	seq_puts(m, "# -----------------------------------"
4159 		 "---------------------------------\n");
4160 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4161 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4162 		   nsecs_to_usecs(data->saved_latency),
4163 		   entries,
4164 		   total,
4165 		   buf->cpu,
4166 		   preempt_model_str(),
4167 		   /* These are reserved for later use */
4168 		   0, 0, 0, 0);
4169 #ifdef CONFIG_SMP
4170 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4171 #else
4172 	seq_puts(m, ")\n");
4173 #endif
4174 	seq_puts(m, "#    -----------------\n");
4175 	seq_printf(m, "#    | task: %.16s-%d "
4176 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4177 		   data->comm, data->pid,
4178 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4179 		   data->policy, data->rt_priority);
4180 	seq_puts(m, "#    -----------------\n");
4181 
4182 	if (data->critical_start) {
4183 		seq_puts(m, "#  => started at: ");
4184 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4185 		trace_print_seq(m, &iter->seq);
4186 		seq_puts(m, "\n#  => ended at:   ");
4187 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4188 		trace_print_seq(m, &iter->seq);
4189 		seq_puts(m, "\n#\n");
4190 	}
4191 
4192 	seq_puts(m, "#\n");
4193 }
4194 
test_cpu_buff_start(struct trace_iterator * iter)4195 static void test_cpu_buff_start(struct trace_iterator *iter)
4196 {
4197 	struct trace_seq *s = &iter->seq;
4198 	struct trace_array *tr = iter->tr;
4199 
4200 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4201 		return;
4202 
4203 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4204 		return;
4205 
4206 	if (cpumask_available(iter->started) &&
4207 	    cpumask_test_cpu(iter->cpu, iter->started))
4208 		return;
4209 
4210 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4211 		return;
4212 
4213 	if (cpumask_available(iter->started))
4214 		cpumask_set_cpu(iter->cpu, iter->started);
4215 
4216 	/* Don't print started cpu buffer for the first entry of the trace */
4217 	if (iter->idx > 1)
4218 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4219 				iter->cpu);
4220 }
4221 
print_trace_fmt(struct trace_iterator * iter)4222 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4223 {
4224 	struct trace_array *tr = iter->tr;
4225 	struct trace_seq *s = &iter->seq;
4226 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4227 	struct trace_entry *entry;
4228 	struct trace_event *event;
4229 
4230 	entry = iter->ent;
4231 
4232 	test_cpu_buff_start(iter);
4233 
4234 	event = ftrace_find_event(entry->type);
4235 
4236 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4237 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4238 			trace_print_lat_context(iter);
4239 		else
4240 			trace_print_context(iter);
4241 	}
4242 
4243 	if (trace_seq_has_overflowed(s))
4244 		return TRACE_TYPE_PARTIAL_LINE;
4245 
4246 	if (event) {
4247 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4248 			return print_event_fields(iter, event);
4249 		/*
4250 		 * For TRACE_EVENT() events, the print_fmt is not
4251 		 * safe to use if the array has delta offsets
4252 		 * Force printing via the fields.
4253 		 */
4254 		if ((tr->text_delta) &&
4255 		    event->type > __TRACE_LAST_TYPE)
4256 			return print_event_fields(iter, event);
4257 
4258 		return event->funcs->trace(iter, sym_flags, event);
4259 	}
4260 
4261 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4262 
4263 	return trace_handle_return(s);
4264 }
4265 
print_raw_fmt(struct trace_iterator * iter)4266 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4267 {
4268 	struct trace_array *tr = iter->tr;
4269 	struct trace_seq *s = &iter->seq;
4270 	struct trace_entry *entry;
4271 	struct trace_event *event;
4272 
4273 	entry = iter->ent;
4274 
4275 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4276 		trace_seq_printf(s, "%d %d %llu ",
4277 				 entry->pid, iter->cpu, iter->ts);
4278 
4279 	if (trace_seq_has_overflowed(s))
4280 		return TRACE_TYPE_PARTIAL_LINE;
4281 
4282 	event = ftrace_find_event(entry->type);
4283 	if (event)
4284 		return event->funcs->raw(iter, 0, event);
4285 
4286 	trace_seq_printf(s, "%d ?\n", entry->type);
4287 
4288 	return trace_handle_return(s);
4289 }
4290 
print_hex_fmt(struct trace_iterator * iter)4291 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4292 {
4293 	struct trace_array *tr = iter->tr;
4294 	struct trace_seq *s = &iter->seq;
4295 	unsigned char newline = '\n';
4296 	struct trace_entry *entry;
4297 	struct trace_event *event;
4298 
4299 	entry = iter->ent;
4300 
4301 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4302 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4303 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4304 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4305 		if (trace_seq_has_overflowed(s))
4306 			return TRACE_TYPE_PARTIAL_LINE;
4307 	}
4308 
4309 	event = ftrace_find_event(entry->type);
4310 	if (event) {
4311 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4312 		if (ret != TRACE_TYPE_HANDLED)
4313 			return ret;
4314 	}
4315 
4316 	SEQ_PUT_FIELD(s, newline);
4317 
4318 	return trace_handle_return(s);
4319 }
4320 
print_bin_fmt(struct trace_iterator * iter)4321 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4322 {
4323 	struct trace_array *tr = iter->tr;
4324 	struct trace_seq *s = &iter->seq;
4325 	struct trace_entry *entry;
4326 	struct trace_event *event;
4327 
4328 	entry = iter->ent;
4329 
4330 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4331 		SEQ_PUT_FIELD(s, entry->pid);
4332 		SEQ_PUT_FIELD(s, iter->cpu);
4333 		SEQ_PUT_FIELD(s, iter->ts);
4334 		if (trace_seq_has_overflowed(s))
4335 			return TRACE_TYPE_PARTIAL_LINE;
4336 	}
4337 
4338 	event = ftrace_find_event(entry->type);
4339 	return event ? event->funcs->binary(iter, 0, event) :
4340 		TRACE_TYPE_HANDLED;
4341 }
4342 
trace_empty(struct trace_iterator * iter)4343 int trace_empty(struct trace_iterator *iter)
4344 {
4345 	struct ring_buffer_iter *buf_iter;
4346 	int cpu;
4347 
4348 	/* If we are looking at one CPU buffer, only check that one */
4349 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4350 		cpu = iter->cpu_file;
4351 		buf_iter = trace_buffer_iter(iter, cpu);
4352 		if (buf_iter) {
4353 			if (!ring_buffer_iter_empty(buf_iter))
4354 				return 0;
4355 		} else {
4356 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4357 				return 0;
4358 		}
4359 		return 1;
4360 	}
4361 
4362 	for_each_tracing_cpu(cpu) {
4363 		buf_iter = trace_buffer_iter(iter, cpu);
4364 		if (buf_iter) {
4365 			if (!ring_buffer_iter_empty(buf_iter))
4366 				return 0;
4367 		} else {
4368 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4369 				return 0;
4370 		}
4371 	}
4372 
4373 	return 1;
4374 }
4375 
4376 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4377 enum print_line_t print_trace_line(struct trace_iterator *iter)
4378 {
4379 	struct trace_array *tr = iter->tr;
4380 	unsigned long trace_flags = tr->trace_flags;
4381 	enum print_line_t ret;
4382 
4383 	if (iter->lost_events) {
4384 		if (iter->lost_events == (unsigned long)-1)
4385 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4386 					 iter->cpu);
4387 		else
4388 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4389 					 iter->cpu, iter->lost_events);
4390 		if (trace_seq_has_overflowed(&iter->seq))
4391 			return TRACE_TYPE_PARTIAL_LINE;
4392 	}
4393 
4394 	if (iter->trace && iter->trace->print_line) {
4395 		ret = iter->trace->print_line(iter);
4396 		if (ret != TRACE_TYPE_UNHANDLED)
4397 			return ret;
4398 	}
4399 
4400 	if (iter->ent->type == TRACE_BPUTS &&
4401 			trace_flags & TRACE_ITER_PRINTK &&
4402 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4403 		return trace_print_bputs_msg_only(iter);
4404 
4405 	if (iter->ent->type == TRACE_BPRINT &&
4406 			trace_flags & TRACE_ITER_PRINTK &&
4407 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4408 		return trace_print_bprintk_msg_only(iter);
4409 
4410 	if (iter->ent->type == TRACE_PRINT &&
4411 			trace_flags & TRACE_ITER_PRINTK &&
4412 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4413 		return trace_print_printk_msg_only(iter);
4414 
4415 	if (trace_flags & TRACE_ITER_BIN)
4416 		return print_bin_fmt(iter);
4417 
4418 	if (trace_flags & TRACE_ITER_HEX)
4419 		return print_hex_fmt(iter);
4420 
4421 	if (trace_flags & TRACE_ITER_RAW)
4422 		return print_raw_fmt(iter);
4423 
4424 	return print_trace_fmt(iter);
4425 }
4426 
trace_latency_header(struct seq_file * m)4427 void trace_latency_header(struct seq_file *m)
4428 {
4429 	struct trace_iterator *iter = m->private;
4430 	struct trace_array *tr = iter->tr;
4431 
4432 	/* print nothing if the buffers are empty */
4433 	if (trace_empty(iter))
4434 		return;
4435 
4436 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4437 		print_trace_header(m, iter);
4438 
4439 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4440 		print_lat_help_header(m);
4441 }
4442 
trace_default_header(struct seq_file * m)4443 void trace_default_header(struct seq_file *m)
4444 {
4445 	struct trace_iterator *iter = m->private;
4446 	struct trace_array *tr = iter->tr;
4447 	unsigned long trace_flags = tr->trace_flags;
4448 
4449 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4450 		return;
4451 
4452 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4453 		/* print nothing if the buffers are empty */
4454 		if (trace_empty(iter))
4455 			return;
4456 		print_trace_header(m, iter);
4457 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4458 			print_lat_help_header(m);
4459 	} else {
4460 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4461 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4462 				print_func_help_header_irq(iter->array_buffer,
4463 							   m, trace_flags);
4464 			else
4465 				print_func_help_header(iter->array_buffer, m,
4466 						       trace_flags);
4467 		}
4468 	}
4469 }
4470 
test_ftrace_alive(struct seq_file * m)4471 static void test_ftrace_alive(struct seq_file *m)
4472 {
4473 	if (!ftrace_is_dead())
4474 		return;
4475 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4476 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4477 }
4478 
4479 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4480 static void show_snapshot_main_help(struct seq_file *m)
4481 {
4482 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4483 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4484 		    "#                      Takes a snapshot of the main buffer.\n"
4485 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4486 		    "#                      (Doesn't have to be '2' works with any number that\n"
4487 		    "#                       is not a '0' or '1')\n");
4488 }
4489 
show_snapshot_percpu_help(struct seq_file * m)4490 static void show_snapshot_percpu_help(struct seq_file *m)
4491 {
4492 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4493 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4494 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4495 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4496 #else
4497 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4498 		    "#                     Must use main snapshot file to allocate.\n");
4499 #endif
4500 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4501 		    "#                      (Doesn't have to be '2' works with any number that\n"
4502 		    "#                       is not a '0' or '1')\n");
4503 }
4504 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4505 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4506 {
4507 	if (iter->tr->allocated_snapshot)
4508 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4509 	else
4510 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4511 
4512 	seq_puts(m, "# Snapshot commands:\n");
4513 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4514 		show_snapshot_main_help(m);
4515 	else
4516 		show_snapshot_percpu_help(m);
4517 }
4518 #else
4519 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4520 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4521 #endif
4522 
s_show(struct seq_file * m,void * v)4523 static int s_show(struct seq_file *m, void *v)
4524 {
4525 	struct trace_iterator *iter = v;
4526 	int ret;
4527 
4528 	if (iter->ent == NULL) {
4529 		if (iter->tr) {
4530 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4531 			seq_puts(m, "#\n");
4532 			test_ftrace_alive(m);
4533 		}
4534 		if (iter->snapshot && trace_empty(iter))
4535 			print_snapshot_help(m, iter);
4536 		else if (iter->trace && iter->trace->print_header)
4537 			iter->trace->print_header(m);
4538 		else
4539 			trace_default_header(m);
4540 
4541 	} else if (iter->leftover) {
4542 		/*
4543 		 * If we filled the seq_file buffer earlier, we
4544 		 * want to just show it now.
4545 		 */
4546 		ret = trace_print_seq(m, &iter->seq);
4547 
4548 		/* ret should this time be zero, but you never know */
4549 		iter->leftover = ret;
4550 
4551 	} else {
4552 		ret = print_trace_line(iter);
4553 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4554 			iter->seq.full = 0;
4555 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4556 		}
4557 		ret = trace_print_seq(m, &iter->seq);
4558 		/*
4559 		 * If we overflow the seq_file buffer, then it will
4560 		 * ask us for this data again at start up.
4561 		 * Use that instead.
4562 		 *  ret is 0 if seq_file write succeeded.
4563 		 *        -1 otherwise.
4564 		 */
4565 		iter->leftover = ret;
4566 	}
4567 
4568 	return 0;
4569 }
4570 
4571 /*
4572  * Should be used after trace_array_get(), trace_types_lock
4573  * ensures that i_cdev was already initialized.
4574  */
tracing_get_cpu(struct inode * inode)4575 static inline int tracing_get_cpu(struct inode *inode)
4576 {
4577 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4578 		return (long)inode->i_cdev - 1;
4579 	return RING_BUFFER_ALL_CPUS;
4580 }
4581 
4582 static const struct seq_operations tracer_seq_ops = {
4583 	.start		= s_start,
4584 	.next		= s_next,
4585 	.stop		= s_stop,
4586 	.show		= s_show,
4587 };
4588 
4589 /*
4590  * Note, as iter itself can be allocated and freed in different
4591  * ways, this function is only used to free its content, and not
4592  * the iterator itself. The only requirement to all the allocations
4593  * is that it must zero all fields (kzalloc), as freeing works with
4594  * ethier allocated content or NULL.
4595  */
free_trace_iter_content(struct trace_iterator * iter)4596 static void free_trace_iter_content(struct trace_iterator *iter)
4597 {
4598 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4599 	if (iter->fmt != static_fmt_buf)
4600 		kfree(iter->fmt);
4601 
4602 	kfree(iter->temp);
4603 	kfree(iter->buffer_iter);
4604 	mutex_destroy(&iter->mutex);
4605 	free_cpumask_var(iter->started);
4606 }
4607 
4608 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4609 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4610 {
4611 	struct trace_array *tr = inode->i_private;
4612 	struct trace_iterator *iter;
4613 	int cpu;
4614 
4615 	if (tracing_disabled)
4616 		return ERR_PTR(-ENODEV);
4617 
4618 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4619 	if (!iter)
4620 		return ERR_PTR(-ENOMEM);
4621 
4622 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4623 				    GFP_KERNEL);
4624 	if (!iter->buffer_iter)
4625 		goto release;
4626 
4627 	/*
4628 	 * trace_find_next_entry() may need to save off iter->ent.
4629 	 * It will place it into the iter->temp buffer. As most
4630 	 * events are less than 128, allocate a buffer of that size.
4631 	 * If one is greater, then trace_find_next_entry() will
4632 	 * allocate a new buffer to adjust for the bigger iter->ent.
4633 	 * It's not critical if it fails to get allocated here.
4634 	 */
4635 	iter->temp = kmalloc(128, GFP_KERNEL);
4636 	if (iter->temp)
4637 		iter->temp_size = 128;
4638 
4639 	/*
4640 	 * trace_event_printf() may need to modify given format
4641 	 * string to replace %p with %px so that it shows real address
4642 	 * instead of hash value. However, that is only for the event
4643 	 * tracing, other tracer may not need. Defer the allocation
4644 	 * until it is needed.
4645 	 */
4646 	iter->fmt = NULL;
4647 	iter->fmt_size = 0;
4648 
4649 	mutex_lock(&trace_types_lock);
4650 	iter->trace = tr->current_trace;
4651 
4652 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4653 		goto fail;
4654 
4655 	iter->tr = tr;
4656 
4657 #ifdef CONFIG_TRACER_MAX_TRACE
4658 	/* Currently only the top directory has a snapshot */
4659 	if (tr->current_trace->print_max || snapshot)
4660 		iter->array_buffer = &tr->max_buffer;
4661 	else
4662 #endif
4663 		iter->array_buffer = &tr->array_buffer;
4664 	iter->snapshot = snapshot;
4665 	iter->pos = -1;
4666 	iter->cpu_file = tracing_get_cpu(inode);
4667 	mutex_init(&iter->mutex);
4668 
4669 	/* Notify the tracer early; before we stop tracing. */
4670 	if (iter->trace->open)
4671 		iter->trace->open(iter);
4672 
4673 	/* Annotate start of buffers if we had overruns */
4674 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4675 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4676 
4677 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4678 	if (trace_clocks[tr->clock_id].in_ns)
4679 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4680 
4681 	/*
4682 	 * If pause-on-trace is enabled, then stop the trace while
4683 	 * dumping, unless this is the "snapshot" file
4684 	 */
4685 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4686 		tracing_stop_tr(tr);
4687 
4688 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4689 		for_each_tracing_cpu(cpu) {
4690 			iter->buffer_iter[cpu] =
4691 				ring_buffer_read_start(iter->array_buffer->buffer,
4692 						       cpu, GFP_KERNEL);
4693 			tracing_iter_reset(iter, cpu);
4694 		}
4695 	} else {
4696 		cpu = iter->cpu_file;
4697 		iter->buffer_iter[cpu] =
4698 			ring_buffer_read_start(iter->array_buffer->buffer,
4699 					       cpu, GFP_KERNEL);
4700 		tracing_iter_reset(iter, cpu);
4701 	}
4702 
4703 	mutex_unlock(&trace_types_lock);
4704 
4705 	return iter;
4706 
4707  fail:
4708 	mutex_unlock(&trace_types_lock);
4709 	free_trace_iter_content(iter);
4710 release:
4711 	seq_release_private(inode, file);
4712 	return ERR_PTR(-ENOMEM);
4713 }
4714 
tracing_open_generic(struct inode * inode,struct file * filp)4715 int tracing_open_generic(struct inode *inode, struct file *filp)
4716 {
4717 	int ret;
4718 
4719 	ret = tracing_check_open_get_tr(NULL);
4720 	if (ret)
4721 		return ret;
4722 
4723 	filp->private_data = inode->i_private;
4724 	return 0;
4725 }
4726 
tracing_is_disabled(void)4727 bool tracing_is_disabled(void)
4728 {
4729 	return (tracing_disabled) ? true: false;
4730 }
4731 
4732 /*
4733  * Open and update trace_array ref count.
4734  * Must have the current trace_array passed to it.
4735  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4736 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4737 {
4738 	struct trace_array *tr = inode->i_private;
4739 	int ret;
4740 
4741 	ret = tracing_check_open_get_tr(tr);
4742 	if (ret)
4743 		return ret;
4744 
4745 	filp->private_data = inode->i_private;
4746 
4747 	return 0;
4748 }
4749 
4750 /*
4751  * The private pointer of the inode is the trace_event_file.
4752  * Update the tr ref count associated to it.
4753  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4754 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4755 {
4756 	struct trace_event_file *file = inode->i_private;
4757 	int ret;
4758 
4759 	ret = tracing_check_open_get_tr(file->tr);
4760 	if (ret)
4761 		return ret;
4762 
4763 	guard(mutex)(&event_mutex);
4764 
4765 	/* Fail if the file is marked for removal */
4766 	if (file->flags & EVENT_FILE_FL_FREED) {
4767 		trace_array_put(file->tr);
4768 		return -ENODEV;
4769 	} else {
4770 		event_file_get(file);
4771 	}
4772 
4773 	filp->private_data = inode->i_private;
4774 
4775 	return 0;
4776 }
4777 
tracing_release_file_tr(struct inode * inode,struct file * filp)4778 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4779 {
4780 	struct trace_event_file *file = inode->i_private;
4781 
4782 	trace_array_put(file->tr);
4783 	event_file_put(file);
4784 
4785 	return 0;
4786 }
4787 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4788 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4789 {
4790 	tracing_release_file_tr(inode, filp);
4791 	return single_release(inode, filp);
4792 }
4793 
tracing_release(struct inode * inode,struct file * file)4794 static int tracing_release(struct inode *inode, struct file *file)
4795 {
4796 	struct trace_array *tr = inode->i_private;
4797 	struct seq_file *m = file->private_data;
4798 	struct trace_iterator *iter;
4799 	int cpu;
4800 
4801 	if (!(file->f_mode & FMODE_READ)) {
4802 		trace_array_put(tr);
4803 		return 0;
4804 	}
4805 
4806 	/* Writes do not use seq_file */
4807 	iter = m->private;
4808 	mutex_lock(&trace_types_lock);
4809 
4810 	for_each_tracing_cpu(cpu) {
4811 		if (iter->buffer_iter[cpu])
4812 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4813 	}
4814 
4815 	if (iter->trace && iter->trace->close)
4816 		iter->trace->close(iter);
4817 
4818 	if (!iter->snapshot && tr->stop_count)
4819 		/* reenable tracing if it was previously enabled */
4820 		tracing_start_tr(tr);
4821 
4822 	__trace_array_put(tr);
4823 
4824 	mutex_unlock(&trace_types_lock);
4825 
4826 	free_trace_iter_content(iter);
4827 	seq_release_private(inode, file);
4828 
4829 	return 0;
4830 }
4831 
tracing_release_generic_tr(struct inode * inode,struct file * file)4832 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4833 {
4834 	struct trace_array *tr = inode->i_private;
4835 
4836 	trace_array_put(tr);
4837 	return 0;
4838 }
4839 
tracing_single_release_tr(struct inode * inode,struct file * file)4840 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4841 {
4842 	struct trace_array *tr = inode->i_private;
4843 
4844 	trace_array_put(tr);
4845 
4846 	return single_release(inode, file);
4847 }
4848 
tracing_open(struct inode * inode,struct file * file)4849 static int tracing_open(struct inode *inode, struct file *file)
4850 {
4851 	struct trace_array *tr = inode->i_private;
4852 	struct trace_iterator *iter;
4853 	int ret;
4854 
4855 	ret = tracing_check_open_get_tr(tr);
4856 	if (ret)
4857 		return ret;
4858 
4859 	/* If this file was open for write, then erase contents */
4860 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4861 		int cpu = tracing_get_cpu(inode);
4862 		struct array_buffer *trace_buf = &tr->array_buffer;
4863 
4864 #ifdef CONFIG_TRACER_MAX_TRACE
4865 		if (tr->current_trace->print_max)
4866 			trace_buf = &tr->max_buffer;
4867 #endif
4868 
4869 		if (cpu == RING_BUFFER_ALL_CPUS)
4870 			tracing_reset_online_cpus(trace_buf);
4871 		else
4872 			tracing_reset_cpu(trace_buf, cpu);
4873 	}
4874 
4875 	if (file->f_mode & FMODE_READ) {
4876 		iter = __tracing_open(inode, file, false);
4877 		if (IS_ERR(iter))
4878 			ret = PTR_ERR(iter);
4879 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4880 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4881 	}
4882 
4883 	if (ret < 0)
4884 		trace_array_put(tr);
4885 
4886 	return ret;
4887 }
4888 
4889 /*
4890  * Some tracers are not suitable for instance buffers.
4891  * A tracer is always available for the global array (toplevel)
4892  * or if it explicitly states that it is.
4893  */
4894 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4895 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4896 {
4897 #ifdef CONFIG_TRACER_SNAPSHOT
4898 	/* arrays with mapped buffer range do not have snapshots */
4899 	if (tr->range_addr_start && t->use_max_tr)
4900 		return false;
4901 #endif
4902 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4903 }
4904 
4905 /* Find the next tracer that this trace array may use */
4906 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4907 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4908 {
4909 	while (t && !trace_ok_for_array(t, tr))
4910 		t = t->next;
4911 
4912 	return t;
4913 }
4914 
4915 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4916 t_next(struct seq_file *m, void *v, loff_t *pos)
4917 {
4918 	struct trace_array *tr = m->private;
4919 	struct tracer *t = v;
4920 
4921 	(*pos)++;
4922 
4923 	if (t)
4924 		t = get_tracer_for_array(tr, t->next);
4925 
4926 	return t;
4927 }
4928 
t_start(struct seq_file * m,loff_t * pos)4929 static void *t_start(struct seq_file *m, loff_t *pos)
4930 {
4931 	struct trace_array *tr = m->private;
4932 	struct tracer *t;
4933 	loff_t l = 0;
4934 
4935 	mutex_lock(&trace_types_lock);
4936 
4937 	t = get_tracer_for_array(tr, trace_types);
4938 	for (; t && l < *pos; t = t_next(m, t, &l))
4939 			;
4940 
4941 	return t;
4942 }
4943 
t_stop(struct seq_file * m,void * p)4944 static void t_stop(struct seq_file *m, void *p)
4945 {
4946 	mutex_unlock(&trace_types_lock);
4947 }
4948 
t_show(struct seq_file * m,void * v)4949 static int t_show(struct seq_file *m, void *v)
4950 {
4951 	struct tracer *t = v;
4952 
4953 	if (!t)
4954 		return 0;
4955 
4956 	seq_puts(m, t->name);
4957 	if (t->next)
4958 		seq_putc(m, ' ');
4959 	else
4960 		seq_putc(m, '\n');
4961 
4962 	return 0;
4963 }
4964 
4965 static const struct seq_operations show_traces_seq_ops = {
4966 	.start		= t_start,
4967 	.next		= t_next,
4968 	.stop		= t_stop,
4969 	.show		= t_show,
4970 };
4971 
show_traces_open(struct inode * inode,struct file * file)4972 static int show_traces_open(struct inode *inode, struct file *file)
4973 {
4974 	struct trace_array *tr = inode->i_private;
4975 	struct seq_file *m;
4976 	int ret;
4977 
4978 	ret = tracing_check_open_get_tr(tr);
4979 	if (ret)
4980 		return ret;
4981 
4982 	ret = seq_open(file, &show_traces_seq_ops);
4983 	if (ret) {
4984 		trace_array_put(tr);
4985 		return ret;
4986 	}
4987 
4988 	m = file->private_data;
4989 	m->private = tr;
4990 
4991 	return 0;
4992 }
4993 
tracing_seq_release(struct inode * inode,struct file * file)4994 static int tracing_seq_release(struct inode *inode, struct file *file)
4995 {
4996 	struct trace_array *tr = inode->i_private;
4997 
4998 	trace_array_put(tr);
4999 	return seq_release(inode, file);
5000 }
5001 
5002 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5003 tracing_write_stub(struct file *filp, const char __user *ubuf,
5004 		   size_t count, loff_t *ppos)
5005 {
5006 	return count;
5007 }
5008 
tracing_lseek(struct file * file,loff_t offset,int whence)5009 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5010 {
5011 	int ret;
5012 
5013 	if (file->f_mode & FMODE_READ)
5014 		ret = seq_lseek(file, offset, whence);
5015 	else
5016 		file->f_pos = ret = 0;
5017 
5018 	return ret;
5019 }
5020 
5021 static const struct file_operations tracing_fops = {
5022 	.open		= tracing_open,
5023 	.read		= seq_read,
5024 	.read_iter	= seq_read_iter,
5025 	.splice_read	= copy_splice_read,
5026 	.write		= tracing_write_stub,
5027 	.llseek		= tracing_lseek,
5028 	.release	= tracing_release,
5029 };
5030 
5031 static const struct file_operations show_traces_fops = {
5032 	.open		= show_traces_open,
5033 	.read		= seq_read,
5034 	.llseek		= seq_lseek,
5035 	.release	= tracing_seq_release,
5036 };
5037 
5038 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5039 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5040 		     size_t count, loff_t *ppos)
5041 {
5042 	struct trace_array *tr = file_inode(filp)->i_private;
5043 	char *mask_str __free(kfree) = NULL;
5044 	int len;
5045 
5046 	len = snprintf(NULL, 0, "%*pb\n",
5047 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5048 	mask_str = kmalloc(len, GFP_KERNEL);
5049 	if (!mask_str)
5050 		return -ENOMEM;
5051 
5052 	len = snprintf(mask_str, len, "%*pb\n",
5053 		       cpumask_pr_args(tr->tracing_cpumask));
5054 	if (len >= count)
5055 		return -EINVAL;
5056 
5057 	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5058 }
5059 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5060 int tracing_set_cpumask(struct trace_array *tr,
5061 			cpumask_var_t tracing_cpumask_new)
5062 {
5063 	int cpu;
5064 
5065 	if (!tr)
5066 		return -EINVAL;
5067 
5068 	local_irq_disable();
5069 	arch_spin_lock(&tr->max_lock);
5070 	for_each_tracing_cpu(cpu) {
5071 		/*
5072 		 * Increase/decrease the disabled counter if we are
5073 		 * about to flip a bit in the cpumask:
5074 		 */
5075 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5076 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5077 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5078 #ifdef CONFIG_TRACER_MAX_TRACE
5079 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5080 #endif
5081 		}
5082 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5083 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5084 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5085 #ifdef CONFIG_TRACER_MAX_TRACE
5086 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5087 #endif
5088 		}
5089 	}
5090 	arch_spin_unlock(&tr->max_lock);
5091 	local_irq_enable();
5092 
5093 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5094 
5095 	return 0;
5096 }
5097 
5098 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5099 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5100 		      size_t count, loff_t *ppos)
5101 {
5102 	struct trace_array *tr = file_inode(filp)->i_private;
5103 	cpumask_var_t tracing_cpumask_new;
5104 	int err;
5105 
5106 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5107 		return -EINVAL;
5108 
5109 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5110 		return -ENOMEM;
5111 
5112 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5113 	if (err)
5114 		goto err_free;
5115 
5116 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5117 	if (err)
5118 		goto err_free;
5119 
5120 	free_cpumask_var(tracing_cpumask_new);
5121 
5122 	return count;
5123 
5124 err_free:
5125 	free_cpumask_var(tracing_cpumask_new);
5126 
5127 	return err;
5128 }
5129 
5130 static const struct file_operations tracing_cpumask_fops = {
5131 	.open		= tracing_open_generic_tr,
5132 	.read		= tracing_cpumask_read,
5133 	.write		= tracing_cpumask_write,
5134 	.release	= tracing_release_generic_tr,
5135 	.llseek		= generic_file_llseek,
5136 };
5137 
tracing_trace_options_show(struct seq_file * m,void * v)5138 static int tracing_trace_options_show(struct seq_file *m, void *v)
5139 {
5140 	struct tracer_opt *trace_opts;
5141 	struct trace_array *tr = m->private;
5142 	u32 tracer_flags;
5143 	int i;
5144 
5145 	guard(mutex)(&trace_types_lock);
5146 
5147 	tracer_flags = tr->current_trace->flags->val;
5148 	trace_opts = tr->current_trace->flags->opts;
5149 
5150 	for (i = 0; trace_options[i]; i++) {
5151 		if (tr->trace_flags & (1 << i))
5152 			seq_printf(m, "%s\n", trace_options[i]);
5153 		else
5154 			seq_printf(m, "no%s\n", trace_options[i]);
5155 	}
5156 
5157 	for (i = 0; trace_opts[i].name; i++) {
5158 		if (tracer_flags & trace_opts[i].bit)
5159 			seq_printf(m, "%s\n", trace_opts[i].name);
5160 		else
5161 			seq_printf(m, "no%s\n", trace_opts[i].name);
5162 	}
5163 
5164 	return 0;
5165 }
5166 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5167 static int __set_tracer_option(struct trace_array *tr,
5168 			       struct tracer_flags *tracer_flags,
5169 			       struct tracer_opt *opts, int neg)
5170 {
5171 	struct tracer *trace = tracer_flags->trace;
5172 	int ret;
5173 
5174 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5175 	if (ret)
5176 		return ret;
5177 
5178 	if (neg)
5179 		tracer_flags->val &= ~opts->bit;
5180 	else
5181 		tracer_flags->val |= opts->bit;
5182 	return 0;
5183 }
5184 
5185 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5186 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5187 {
5188 	struct tracer *trace = tr->current_trace;
5189 	struct tracer_flags *tracer_flags = trace->flags;
5190 	struct tracer_opt *opts = NULL;
5191 	int i;
5192 
5193 	for (i = 0; tracer_flags->opts[i].name; i++) {
5194 		opts = &tracer_flags->opts[i];
5195 
5196 		if (strcmp(cmp, opts->name) == 0)
5197 			return __set_tracer_option(tr, trace->flags, opts, neg);
5198 	}
5199 
5200 	return -EINVAL;
5201 }
5202 
5203 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5204 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5205 {
5206 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5207 		return -1;
5208 
5209 	return 0;
5210 }
5211 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5212 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5213 {
5214 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5215 	    (mask == TRACE_ITER_RECORD_CMD) ||
5216 	    (mask == TRACE_ITER_TRACE_PRINTK) ||
5217 	    (mask == TRACE_ITER_COPY_MARKER))
5218 		lockdep_assert_held(&event_mutex);
5219 
5220 	/* do nothing if flag is already set */
5221 	if (!!(tr->trace_flags & mask) == !!enabled)
5222 		return 0;
5223 
5224 	/* Give the tracer a chance to approve the change */
5225 	if (tr->current_trace->flag_changed)
5226 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5227 			return -EINVAL;
5228 
5229 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5230 		if (enabled) {
5231 			update_printk_trace(tr);
5232 		} else {
5233 			/*
5234 			 * The global_trace cannot clear this.
5235 			 * It's flag only gets cleared if another instance sets it.
5236 			 */
5237 			if (printk_trace == &global_trace)
5238 				return -EINVAL;
5239 			/*
5240 			 * An instance must always have it set.
5241 			 * by default, that's the global_trace instane.
5242 			 */
5243 			if (printk_trace == tr)
5244 				update_printk_trace(&global_trace);
5245 		}
5246 	}
5247 
5248 	if (mask == TRACE_ITER_COPY_MARKER)
5249 		update_marker_trace(tr, enabled);
5250 
5251 	if (enabled)
5252 		tr->trace_flags |= mask;
5253 	else
5254 		tr->trace_flags &= ~mask;
5255 
5256 	if (mask == TRACE_ITER_RECORD_CMD)
5257 		trace_event_enable_cmd_record(enabled);
5258 
5259 	if (mask == TRACE_ITER_RECORD_TGID) {
5260 
5261 		if (trace_alloc_tgid_map() < 0) {
5262 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5263 			return -ENOMEM;
5264 		}
5265 
5266 		trace_event_enable_tgid_record(enabled);
5267 	}
5268 
5269 	if (mask == TRACE_ITER_EVENT_FORK)
5270 		trace_event_follow_fork(tr, enabled);
5271 
5272 	if (mask == TRACE_ITER_FUNC_FORK)
5273 		ftrace_pid_follow_fork(tr, enabled);
5274 
5275 	if (mask == TRACE_ITER_OVERWRITE) {
5276 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5277 #ifdef CONFIG_TRACER_MAX_TRACE
5278 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5279 #endif
5280 	}
5281 
5282 	if (mask == TRACE_ITER_PRINTK) {
5283 		trace_printk_start_stop_comm(enabled);
5284 		trace_printk_control(enabled);
5285 	}
5286 
5287 	return 0;
5288 }
5289 
trace_set_options(struct trace_array * tr,char * option)5290 int trace_set_options(struct trace_array *tr, char *option)
5291 {
5292 	char *cmp;
5293 	int neg = 0;
5294 	int ret;
5295 	size_t orig_len = strlen(option);
5296 	int len;
5297 
5298 	cmp = strstrip(option);
5299 
5300 	len = str_has_prefix(cmp, "no");
5301 	if (len)
5302 		neg = 1;
5303 
5304 	cmp += len;
5305 
5306 	mutex_lock(&event_mutex);
5307 	mutex_lock(&trace_types_lock);
5308 
5309 	ret = match_string(trace_options, -1, cmp);
5310 	/* If no option could be set, test the specific tracer options */
5311 	if (ret < 0)
5312 		ret = set_tracer_option(tr, cmp, neg);
5313 	else
5314 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5315 
5316 	mutex_unlock(&trace_types_lock);
5317 	mutex_unlock(&event_mutex);
5318 
5319 	/*
5320 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5321 	 * turn it back into a space.
5322 	 */
5323 	if (orig_len > strlen(option))
5324 		option[strlen(option)] = ' ';
5325 
5326 	return ret;
5327 }
5328 
apply_trace_boot_options(void)5329 static void __init apply_trace_boot_options(void)
5330 {
5331 	char *buf = trace_boot_options_buf;
5332 	char *option;
5333 
5334 	while (true) {
5335 		option = strsep(&buf, ",");
5336 
5337 		if (!option)
5338 			break;
5339 
5340 		if (*option)
5341 			trace_set_options(&global_trace, option);
5342 
5343 		/* Put back the comma to allow this to be called again */
5344 		if (buf)
5345 			*(buf - 1) = ',';
5346 	}
5347 }
5348 
5349 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5350 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5351 			size_t cnt, loff_t *ppos)
5352 {
5353 	struct seq_file *m = filp->private_data;
5354 	struct trace_array *tr = m->private;
5355 	char buf[64];
5356 	int ret;
5357 
5358 	if (cnt >= sizeof(buf))
5359 		return -EINVAL;
5360 
5361 	if (copy_from_user(buf, ubuf, cnt))
5362 		return -EFAULT;
5363 
5364 	buf[cnt] = 0;
5365 
5366 	ret = trace_set_options(tr, buf);
5367 	if (ret < 0)
5368 		return ret;
5369 
5370 	*ppos += cnt;
5371 
5372 	return cnt;
5373 }
5374 
tracing_trace_options_open(struct inode * inode,struct file * file)5375 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5376 {
5377 	struct trace_array *tr = inode->i_private;
5378 	int ret;
5379 
5380 	ret = tracing_check_open_get_tr(tr);
5381 	if (ret)
5382 		return ret;
5383 
5384 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5385 	if (ret < 0)
5386 		trace_array_put(tr);
5387 
5388 	return ret;
5389 }
5390 
5391 static const struct file_operations tracing_iter_fops = {
5392 	.open		= tracing_trace_options_open,
5393 	.read		= seq_read,
5394 	.llseek		= seq_lseek,
5395 	.release	= tracing_single_release_tr,
5396 	.write		= tracing_trace_options_write,
5397 };
5398 
5399 static const char readme_msg[] =
5400 	"tracing mini-HOWTO:\n\n"
5401 	"By default tracefs removes all OTH file permission bits.\n"
5402 	"When mounting tracefs an optional group id can be specified\n"
5403 	"which adds the group to every directory and file in tracefs:\n\n"
5404 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5405 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5406 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5407 	" Important files:\n"
5408 	"  trace\t\t\t- The static contents of the buffer\n"
5409 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5410 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5411 	"  current_tracer\t- function and latency tracers\n"
5412 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5413 	"  error_log\t- error log for failed commands (that support it)\n"
5414 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5415 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5416 	"  trace_clock\t\t- change the clock used to order events\n"
5417 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5418 	"      global:   Synced across CPUs but slows tracing down.\n"
5419 	"     counter:   Not a clock, but just an increment\n"
5420 	"      uptime:   Jiffy counter from time of boot\n"
5421 	"        perf:   Same clock that perf events use\n"
5422 #ifdef CONFIG_X86_64
5423 	"     x86-tsc:   TSC cycle counter\n"
5424 #endif
5425 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5426 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5427 	"    absolute:   Absolute (standalone) timestamp\n"
5428 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5429 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5430 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5431 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5432 	"\t\t\t  Remove sub-buffer with rmdir\n"
5433 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5434 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5435 	"\t\t\t  option name\n"
5436 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5437 #ifdef CONFIG_DYNAMIC_FTRACE
5438 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5439 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5440 	"\t\t\t  functions\n"
5441 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5442 	"\t     modules: Can select a group via module\n"
5443 	"\t      Format: :mod:<module-name>\n"
5444 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5445 	"\t    triggers: a command to perform when function is hit\n"
5446 	"\t      Format: <function>:<trigger>[:count]\n"
5447 	"\t     trigger: traceon, traceoff\n"
5448 	"\t\t      enable_event:<system>:<event>\n"
5449 	"\t\t      disable_event:<system>:<event>\n"
5450 #ifdef CONFIG_STACKTRACE
5451 	"\t\t      stacktrace\n"
5452 #endif
5453 #ifdef CONFIG_TRACER_SNAPSHOT
5454 	"\t\t      snapshot\n"
5455 #endif
5456 	"\t\t      dump\n"
5457 	"\t\t      cpudump\n"
5458 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5459 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5460 	"\t     The first one will disable tracing every time do_fault is hit\n"
5461 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5462 	"\t       The first time do trap is hit and it disables tracing, the\n"
5463 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5464 	"\t       the counter will not decrement. It only decrements when the\n"
5465 	"\t       trigger did work\n"
5466 	"\t     To remove trigger without count:\n"
5467 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5468 	"\t     To remove trigger with a count:\n"
5469 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5470 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5471 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5472 	"\t    modules: Can select a group via module command :mod:\n"
5473 	"\t    Does not accept triggers\n"
5474 #endif /* CONFIG_DYNAMIC_FTRACE */
5475 #ifdef CONFIG_FUNCTION_TRACER
5476 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5477 	"\t\t    (function)\n"
5478 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5479 	"\t\t    (function)\n"
5480 #endif
5481 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5482 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5483 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5484 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5485 #endif
5486 #ifdef CONFIG_TRACER_SNAPSHOT
5487 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5488 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5489 	"\t\t\t  information\n"
5490 #endif
5491 #ifdef CONFIG_STACK_TRACER
5492 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5493 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5494 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5495 	"\t\t\t  new trace)\n"
5496 #ifdef CONFIG_DYNAMIC_FTRACE
5497 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5498 	"\t\t\t  traces\n"
5499 #endif
5500 #endif /* CONFIG_STACK_TRACER */
5501 #ifdef CONFIG_DYNAMIC_EVENTS
5502 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5503 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5504 #endif
5505 #ifdef CONFIG_KPROBE_EVENTS
5506 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5507 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5508 #endif
5509 #ifdef CONFIG_UPROBE_EVENTS
5510 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5511 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5512 #endif
5513 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5514     defined(CONFIG_FPROBE_EVENTS)
5515 	"\t  accepts: event-definitions (one definition per line)\n"
5516 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5517 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5518 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5519 #endif
5520 #ifdef CONFIG_FPROBE_EVENTS
5521 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5522 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5523 #endif
5524 #ifdef CONFIG_HIST_TRIGGERS
5525 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5526 #endif
5527 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5528 	"\t           -:[<group>/][<event>]\n"
5529 #ifdef CONFIG_KPROBE_EVENTS
5530 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5531   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5532 #endif
5533 #ifdef CONFIG_UPROBE_EVENTS
5534   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5535 #endif
5536 	"\t     args: <name>=fetcharg[:type]\n"
5537 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5538 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5539 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5540 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5541 	"\t           <argname>[->field[->field|.field...]],\n"
5542 #endif
5543 #else
5544 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5545 #endif
5546 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5547 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5548 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5549 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5550 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5551 #ifdef CONFIG_HIST_TRIGGERS
5552 	"\t    field: <stype> <name>;\n"
5553 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5554 	"\t           [unsigned] char/int/long\n"
5555 #endif
5556 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5557 	"\t            of the <attached-group>/<attached-event>.\n"
5558 #endif
5559 	"  set_event\t\t- Enables events by name written into it\n"
5560 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5561 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5562 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5563 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5564 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5565 	"\t\t\t  events\n"
5566 	"      filter\t\t- If set, only events passing filter are traced\n"
5567 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5568 	"\t\t\t  <event>:\n"
5569 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5570 	"      filter\t\t- If set, only events passing filter are traced\n"
5571 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5572 	"\t    Format: <trigger>[:count][if <filter>]\n"
5573 	"\t   trigger: traceon, traceoff\n"
5574 	"\t            enable_event:<system>:<event>\n"
5575 	"\t            disable_event:<system>:<event>\n"
5576 #ifdef CONFIG_HIST_TRIGGERS
5577 	"\t            enable_hist:<system>:<event>\n"
5578 	"\t            disable_hist:<system>:<event>\n"
5579 #endif
5580 #ifdef CONFIG_STACKTRACE
5581 	"\t\t    stacktrace\n"
5582 #endif
5583 #ifdef CONFIG_TRACER_SNAPSHOT
5584 	"\t\t    snapshot\n"
5585 #endif
5586 #ifdef CONFIG_HIST_TRIGGERS
5587 	"\t\t    hist (see below)\n"
5588 #endif
5589 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5590 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5591 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5592 	"\t                  events/block/block_unplug/trigger\n"
5593 	"\t   The first disables tracing every time block_unplug is hit.\n"
5594 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5595 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5596 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5597 	"\t   Like function triggers, the counter is only decremented if it\n"
5598 	"\t    enabled or disabled tracing.\n"
5599 	"\t   To remove a trigger without a count:\n"
5600 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5601 	"\t   To remove a trigger with a count:\n"
5602 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5603 	"\t   Filters can be ignored when removing a trigger.\n"
5604 #ifdef CONFIG_HIST_TRIGGERS
5605 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5606 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5607 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5608 	"\t            [:values=<field1[,field2,...]>]\n"
5609 	"\t            [:sort=<field1[,field2,...]>]\n"
5610 	"\t            [:size=#entries]\n"
5611 	"\t            [:pause][:continue][:clear]\n"
5612 	"\t            [:name=histname1]\n"
5613 	"\t            [:nohitcount]\n"
5614 	"\t            [:<handler>.<action>]\n"
5615 	"\t            [if <filter>]\n\n"
5616 	"\t    Note, special fields can be used as well:\n"
5617 	"\t            common_timestamp - to record current timestamp\n"
5618 	"\t            common_cpu - to record the CPU the event happened on\n"
5619 	"\n"
5620 	"\t    A hist trigger variable can be:\n"
5621 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5622 	"\t        - a reference to another variable e.g. y=$x,\n"
5623 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5624 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5625 	"\n"
5626 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5627 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5628 	"\t    variable reference, field or numeric literal.\n"
5629 	"\n"
5630 	"\t    When a matching event is hit, an entry is added to a hash\n"
5631 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5632 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5633 	"\t    correspond to fields in the event's format description.  Keys\n"
5634 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5635 	"\t    Compound keys consisting of up to two fields can be specified\n"
5636 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5637 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5638 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5639 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5640 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5641 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5642 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5643 	"\t    its histogram data will be shared with other triggers of the\n"
5644 	"\t    same name, and trigger hits will update this common data.\n\n"
5645 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5646 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5647 	"\t    triggers attached to an event, there will be a table for each\n"
5648 	"\t    trigger in the output.  The table displayed for a named\n"
5649 	"\t    trigger will be the same as any other instance having the\n"
5650 	"\t    same name.  The default format used to display a given field\n"
5651 	"\t    can be modified by appending any of the following modifiers\n"
5652 	"\t    to the field name, as applicable:\n\n"
5653 	"\t            .hex        display a number as a hex value\n"
5654 	"\t            .sym        display an address as a symbol\n"
5655 	"\t            .sym-offset display an address as a symbol and offset\n"
5656 	"\t            .execname   display a common_pid as a program name\n"
5657 	"\t            .syscall    display a syscall id as a syscall name\n"
5658 	"\t            .log2       display log2 value rather than raw number\n"
5659 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5660 	"\t            .usecs      display a common_timestamp in microseconds\n"
5661 	"\t            .percent    display a number of percentage value\n"
5662 	"\t            .graph      display a bar-graph of a value\n\n"
5663 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5664 	"\t    trigger or to start a hist trigger but not log any events\n"
5665 	"\t    until told to do so.  'continue' can be used to start or\n"
5666 	"\t    restart a paused hist trigger.\n\n"
5667 	"\t    The 'clear' parameter will clear the contents of a running\n"
5668 	"\t    hist trigger and leave its current paused/active state\n"
5669 	"\t    unchanged.\n\n"
5670 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5671 	"\t    raw hitcount in the histogram.\n\n"
5672 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5673 	"\t    have one event conditionally start and stop another event's\n"
5674 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5675 	"\t    the enable_event and disable_event triggers.\n\n"
5676 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5677 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5678 	"\t        <handler>.<action>\n\n"
5679 	"\t    The available handlers are:\n\n"
5680 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5681 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5682 	"\t        onchange(var)            - invoke action if var changes\n\n"
5683 	"\t    The available actions are:\n\n"
5684 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5685 	"\t        save(field,...)                      - save current event fields\n"
5686 #ifdef CONFIG_TRACER_SNAPSHOT
5687 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5688 #endif
5689 #ifdef CONFIG_SYNTH_EVENTS
5690 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5691 	"\t  Write into this file to define/undefine new synthetic events.\n"
5692 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5693 #endif
5694 #endif
5695 ;
5696 
5697 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5698 tracing_readme_read(struct file *filp, char __user *ubuf,
5699 		       size_t cnt, loff_t *ppos)
5700 {
5701 	return simple_read_from_buffer(ubuf, cnt, ppos,
5702 					readme_msg, strlen(readme_msg));
5703 }
5704 
5705 static const struct file_operations tracing_readme_fops = {
5706 	.open		= tracing_open_generic,
5707 	.read		= tracing_readme_read,
5708 	.llseek		= generic_file_llseek,
5709 };
5710 
5711 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5712 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5713 update_eval_map(union trace_eval_map_item *ptr)
5714 {
5715 	if (!ptr->map.eval_string) {
5716 		if (ptr->tail.next) {
5717 			ptr = ptr->tail.next;
5718 			/* Set ptr to the next real item (skip head) */
5719 			ptr++;
5720 		} else
5721 			return NULL;
5722 	}
5723 	return ptr;
5724 }
5725 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5726 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5727 {
5728 	union trace_eval_map_item *ptr = v;
5729 
5730 	/*
5731 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5732 	 * This really should never happen.
5733 	 */
5734 	(*pos)++;
5735 	ptr = update_eval_map(ptr);
5736 	if (WARN_ON_ONCE(!ptr))
5737 		return NULL;
5738 
5739 	ptr++;
5740 	ptr = update_eval_map(ptr);
5741 
5742 	return ptr;
5743 }
5744 
eval_map_start(struct seq_file * m,loff_t * pos)5745 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5746 {
5747 	union trace_eval_map_item *v;
5748 	loff_t l = 0;
5749 
5750 	mutex_lock(&trace_eval_mutex);
5751 
5752 	v = trace_eval_maps;
5753 	if (v)
5754 		v++;
5755 
5756 	while (v && l < *pos) {
5757 		v = eval_map_next(m, v, &l);
5758 	}
5759 
5760 	return v;
5761 }
5762 
eval_map_stop(struct seq_file * m,void * v)5763 static void eval_map_stop(struct seq_file *m, void *v)
5764 {
5765 	mutex_unlock(&trace_eval_mutex);
5766 }
5767 
eval_map_show(struct seq_file * m,void * v)5768 static int eval_map_show(struct seq_file *m, void *v)
5769 {
5770 	union trace_eval_map_item *ptr = v;
5771 
5772 	seq_printf(m, "%s %ld (%s)\n",
5773 		   ptr->map.eval_string, ptr->map.eval_value,
5774 		   ptr->map.system);
5775 
5776 	return 0;
5777 }
5778 
5779 static const struct seq_operations tracing_eval_map_seq_ops = {
5780 	.start		= eval_map_start,
5781 	.next		= eval_map_next,
5782 	.stop		= eval_map_stop,
5783 	.show		= eval_map_show,
5784 };
5785 
tracing_eval_map_open(struct inode * inode,struct file * filp)5786 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5787 {
5788 	int ret;
5789 
5790 	ret = tracing_check_open_get_tr(NULL);
5791 	if (ret)
5792 		return ret;
5793 
5794 	return seq_open(filp, &tracing_eval_map_seq_ops);
5795 }
5796 
5797 static const struct file_operations tracing_eval_map_fops = {
5798 	.open		= tracing_eval_map_open,
5799 	.read		= seq_read,
5800 	.llseek		= seq_lseek,
5801 	.release	= seq_release,
5802 };
5803 
5804 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5805 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5806 {
5807 	/* Return tail of array given the head */
5808 	return ptr + ptr->head.length + 1;
5809 }
5810 
5811 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5812 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5813 			   int len)
5814 {
5815 	struct trace_eval_map **stop;
5816 	struct trace_eval_map **map;
5817 	union trace_eval_map_item *map_array;
5818 	union trace_eval_map_item *ptr;
5819 
5820 	stop = start + len;
5821 
5822 	/*
5823 	 * The trace_eval_maps contains the map plus a head and tail item,
5824 	 * where the head holds the module and length of array, and the
5825 	 * tail holds a pointer to the next list.
5826 	 */
5827 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5828 	if (!map_array) {
5829 		pr_warn("Unable to allocate trace eval mapping\n");
5830 		return;
5831 	}
5832 
5833 	guard(mutex)(&trace_eval_mutex);
5834 
5835 	if (!trace_eval_maps)
5836 		trace_eval_maps = map_array;
5837 	else {
5838 		ptr = trace_eval_maps;
5839 		for (;;) {
5840 			ptr = trace_eval_jmp_to_tail(ptr);
5841 			if (!ptr->tail.next)
5842 				break;
5843 			ptr = ptr->tail.next;
5844 
5845 		}
5846 		ptr->tail.next = map_array;
5847 	}
5848 	map_array->head.mod = mod;
5849 	map_array->head.length = len;
5850 	map_array++;
5851 
5852 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5853 		map_array->map = **map;
5854 		map_array++;
5855 	}
5856 	memset(map_array, 0, sizeof(*map_array));
5857 }
5858 
trace_create_eval_file(struct dentry * d_tracer)5859 static void trace_create_eval_file(struct dentry *d_tracer)
5860 {
5861 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5862 			  NULL, &tracing_eval_map_fops);
5863 }
5864 
5865 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5866 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5867 static inline void trace_insert_eval_map_file(struct module *mod,
5868 			      struct trace_eval_map **start, int len) { }
5869 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5870 
5871 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5872 trace_event_update_with_eval_map(struct module *mod,
5873 				 struct trace_eval_map **start,
5874 				 int len)
5875 {
5876 	struct trace_eval_map **map;
5877 
5878 	/* Always run sanitizer only if btf_type_tag attr exists. */
5879 	if (len <= 0) {
5880 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5881 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5882 		      __has_attribute(btf_type_tag)))
5883 			return;
5884 	}
5885 
5886 	map = start;
5887 
5888 	trace_event_update_all(map, len);
5889 
5890 	if (len <= 0)
5891 		return;
5892 
5893 	trace_insert_eval_map_file(mod, start, len);
5894 }
5895 
5896 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5897 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5898 		       size_t cnt, loff_t *ppos)
5899 {
5900 	struct trace_array *tr = filp->private_data;
5901 	char buf[MAX_TRACER_SIZE+2];
5902 	int r;
5903 
5904 	scoped_guard(mutex, &trace_types_lock) {
5905 		r = sprintf(buf, "%s\n", tr->current_trace->name);
5906 	}
5907 
5908 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5909 }
5910 
tracer_init(struct tracer * t,struct trace_array * tr)5911 int tracer_init(struct tracer *t, struct trace_array *tr)
5912 {
5913 	tracing_reset_online_cpus(&tr->array_buffer);
5914 	return t->init(tr);
5915 }
5916 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5917 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5918 {
5919 	int cpu;
5920 
5921 	for_each_tracing_cpu(cpu)
5922 		per_cpu_ptr(buf->data, cpu)->entries = val;
5923 }
5924 
update_buffer_entries(struct array_buffer * buf,int cpu)5925 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5926 {
5927 	if (cpu == RING_BUFFER_ALL_CPUS) {
5928 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5929 	} else {
5930 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5931 	}
5932 }
5933 
5934 #ifdef CONFIG_TRACER_MAX_TRACE
5935 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5936 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5937 					struct array_buffer *size_buf, int cpu_id)
5938 {
5939 	int cpu, ret = 0;
5940 
5941 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5942 		for_each_tracing_cpu(cpu) {
5943 			ret = ring_buffer_resize(trace_buf->buffer,
5944 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5945 			if (ret < 0)
5946 				break;
5947 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5948 				per_cpu_ptr(size_buf->data, cpu)->entries;
5949 		}
5950 	} else {
5951 		ret = ring_buffer_resize(trace_buf->buffer,
5952 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5953 		if (ret == 0)
5954 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5955 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5956 	}
5957 
5958 	return ret;
5959 }
5960 #endif /* CONFIG_TRACER_MAX_TRACE */
5961 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5962 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5963 					unsigned long size, int cpu)
5964 {
5965 	int ret;
5966 
5967 	/*
5968 	 * If kernel or user changes the size of the ring buffer
5969 	 * we use the size that was given, and we can forget about
5970 	 * expanding it later.
5971 	 */
5972 	trace_set_ring_buffer_expanded(tr);
5973 
5974 	/* May be called before buffers are initialized */
5975 	if (!tr->array_buffer.buffer)
5976 		return 0;
5977 
5978 	/* Do not allow tracing while resizing ring buffer */
5979 	tracing_stop_tr(tr);
5980 
5981 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5982 	if (ret < 0)
5983 		goto out_start;
5984 
5985 #ifdef CONFIG_TRACER_MAX_TRACE
5986 	if (!tr->allocated_snapshot)
5987 		goto out;
5988 
5989 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5990 	if (ret < 0) {
5991 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5992 						     &tr->array_buffer, cpu);
5993 		if (r < 0) {
5994 			/*
5995 			 * AARGH! We are left with different
5996 			 * size max buffer!!!!
5997 			 * The max buffer is our "snapshot" buffer.
5998 			 * When a tracer needs a snapshot (one of the
5999 			 * latency tracers), it swaps the max buffer
6000 			 * with the saved snap shot. We succeeded to
6001 			 * update the size of the main buffer, but failed to
6002 			 * update the size of the max buffer. But when we tried
6003 			 * to reset the main buffer to the original size, we
6004 			 * failed there too. This is very unlikely to
6005 			 * happen, but if it does, warn and kill all
6006 			 * tracing.
6007 			 */
6008 			WARN_ON(1);
6009 			tracing_disabled = 1;
6010 		}
6011 		goto out_start;
6012 	}
6013 
6014 	update_buffer_entries(&tr->max_buffer, cpu);
6015 
6016  out:
6017 #endif /* CONFIG_TRACER_MAX_TRACE */
6018 
6019 	update_buffer_entries(&tr->array_buffer, cpu);
6020  out_start:
6021 	tracing_start_tr(tr);
6022 	return ret;
6023 }
6024 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6025 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6026 				  unsigned long size, int cpu_id)
6027 {
6028 	guard(mutex)(&trace_types_lock);
6029 
6030 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6031 		/* make sure, this cpu is enabled in the mask */
6032 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6033 			return -EINVAL;
6034 	}
6035 
6036 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6037 }
6038 
6039 struct trace_mod_entry {
6040 	unsigned long	mod_addr;
6041 	char		mod_name[MODULE_NAME_LEN];
6042 };
6043 
6044 struct trace_scratch {
6045 	unsigned int		clock_id;
6046 	unsigned long		text_addr;
6047 	unsigned long		nr_entries;
6048 	struct trace_mod_entry	entries[];
6049 };
6050 
6051 static DEFINE_MUTEX(scratch_mutex);
6052 
cmp_mod_entry(const void * key,const void * pivot)6053 static int cmp_mod_entry(const void *key, const void *pivot)
6054 {
6055 	unsigned long addr = (unsigned long)key;
6056 	const struct trace_mod_entry *ent = pivot;
6057 
6058 	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6059 		return 0;
6060 	else
6061 		return addr - ent->mod_addr;
6062 }
6063 
6064 /**
6065  * trace_adjust_address() - Adjust prev boot address to current address.
6066  * @tr: Persistent ring buffer's trace_array.
6067  * @addr: Address in @tr which is adjusted.
6068  */
trace_adjust_address(struct trace_array * tr,unsigned long addr)6069 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6070 {
6071 	struct trace_module_delta *module_delta;
6072 	struct trace_scratch *tscratch;
6073 	struct trace_mod_entry *entry;
6074 	unsigned long raddr;
6075 	int idx = 0, nr_entries;
6076 
6077 	/* If we don't have last boot delta, return the address */
6078 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6079 		return addr;
6080 
6081 	/* tr->module_delta must be protected by rcu. */
6082 	guard(rcu)();
6083 	tscratch = tr->scratch;
6084 	/* if there is no tscrach, module_delta must be NULL. */
6085 	module_delta = READ_ONCE(tr->module_delta);
6086 	if (!module_delta || !tscratch->nr_entries ||
6087 	    tscratch->entries[0].mod_addr > addr) {
6088 		raddr = addr + tr->text_delta;
6089 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6090 			is_kernel_rodata(raddr) ? raddr : addr;
6091 	}
6092 
6093 	/* Note that entries must be sorted. */
6094 	nr_entries = tscratch->nr_entries;
6095 	if (nr_entries == 1 ||
6096 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6097 		idx = nr_entries - 1;
6098 	else {
6099 		entry = __inline_bsearch((void *)addr,
6100 				tscratch->entries,
6101 				nr_entries - 1,
6102 				sizeof(tscratch->entries[0]),
6103 				cmp_mod_entry);
6104 		if (entry)
6105 			idx = entry - tscratch->entries;
6106 	}
6107 
6108 	return addr + module_delta->delta[idx];
6109 }
6110 
6111 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)6112 static int save_mod(struct module *mod, void *data)
6113 {
6114 	struct trace_array *tr = data;
6115 	struct trace_scratch *tscratch;
6116 	struct trace_mod_entry *entry;
6117 	unsigned int size;
6118 
6119 	tscratch = tr->scratch;
6120 	if (!tscratch)
6121 		return -1;
6122 	size = tr->scratch_size;
6123 
6124 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6125 		return -1;
6126 
6127 	entry = &tscratch->entries[tscratch->nr_entries];
6128 
6129 	tscratch->nr_entries++;
6130 
6131 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6132 	strscpy(entry->mod_name, mod->name);
6133 
6134 	return 0;
6135 }
6136 #else
save_mod(struct module * mod,void * data)6137 static int save_mod(struct module *mod, void *data)
6138 {
6139 	return 0;
6140 }
6141 #endif
6142 
update_last_data(struct trace_array * tr)6143 static void update_last_data(struct trace_array *tr)
6144 {
6145 	struct trace_module_delta *module_delta;
6146 	struct trace_scratch *tscratch;
6147 
6148 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6149 		return;
6150 
6151 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6152 		return;
6153 
6154 	/* Only if the buffer has previous boot data clear and update it. */
6155 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6156 
6157 	/* Reset the module list and reload them */
6158 	if (tr->scratch) {
6159 		struct trace_scratch *tscratch = tr->scratch;
6160 
6161 		tscratch->clock_id = tr->clock_id;
6162 		memset(tscratch->entries, 0,
6163 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6164 		tscratch->nr_entries = 0;
6165 
6166 		guard(mutex)(&scratch_mutex);
6167 		module_for_each_mod(save_mod, tr);
6168 	}
6169 
6170 	/*
6171 	 * Need to clear all CPU buffers as there cannot be events
6172 	 * from the previous boot mixed with events with this boot
6173 	 * as that will cause a confusing trace. Need to clear all
6174 	 * CPU buffers, even for those that may currently be offline.
6175 	 */
6176 	tracing_reset_all_cpus(&tr->array_buffer);
6177 
6178 	/* Using current data now */
6179 	tr->text_delta = 0;
6180 
6181 	if (!tr->scratch)
6182 		return;
6183 
6184 	tscratch = tr->scratch;
6185 	module_delta = READ_ONCE(tr->module_delta);
6186 	WRITE_ONCE(tr->module_delta, NULL);
6187 	kfree_rcu(module_delta, rcu);
6188 
6189 	/* Set the persistent ring buffer meta data to this address */
6190 	tscratch->text_addr = (unsigned long)_text;
6191 }
6192 
6193 /**
6194  * tracing_update_buffers - used by tracing facility to expand ring buffers
6195  * @tr: The tracing instance
6196  *
6197  * To save on memory when the tracing is never used on a system with it
6198  * configured in. The ring buffers are set to a minimum size. But once
6199  * a user starts to use the tracing facility, then they need to grow
6200  * to their default size.
6201  *
6202  * This function is to be called when a tracer is about to be used.
6203  */
tracing_update_buffers(struct trace_array * tr)6204 int tracing_update_buffers(struct trace_array *tr)
6205 {
6206 	int ret = 0;
6207 
6208 	guard(mutex)(&trace_types_lock);
6209 
6210 	update_last_data(tr);
6211 
6212 	if (!tr->ring_buffer_expanded)
6213 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6214 						RING_BUFFER_ALL_CPUS);
6215 	return ret;
6216 }
6217 
6218 struct trace_option_dentry;
6219 
6220 static void
6221 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6222 
6223 /*
6224  * Used to clear out the tracer before deletion of an instance.
6225  * Must have trace_types_lock held.
6226  */
tracing_set_nop(struct trace_array * tr)6227 static void tracing_set_nop(struct trace_array *tr)
6228 {
6229 	if (tr->current_trace == &nop_trace)
6230 		return;
6231 
6232 	tr->current_trace->enabled--;
6233 
6234 	if (tr->current_trace->reset)
6235 		tr->current_trace->reset(tr);
6236 
6237 	tr->current_trace = &nop_trace;
6238 }
6239 
6240 static bool tracer_options_updated;
6241 
add_tracer_options(struct trace_array * tr,struct tracer * t)6242 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6243 {
6244 	/* Only enable if the directory has been created already. */
6245 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
6246 		return;
6247 
6248 	/* Only create trace option files after update_tracer_options finish */
6249 	if (!tracer_options_updated)
6250 		return;
6251 
6252 	create_trace_option_files(tr, t);
6253 }
6254 
tracing_set_tracer(struct trace_array * tr,const char * buf)6255 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6256 {
6257 	struct tracer *t;
6258 #ifdef CONFIG_TRACER_MAX_TRACE
6259 	bool had_max_tr;
6260 #endif
6261 	int ret;
6262 
6263 	guard(mutex)(&trace_types_lock);
6264 
6265 	update_last_data(tr);
6266 
6267 	if (!tr->ring_buffer_expanded) {
6268 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6269 						RING_BUFFER_ALL_CPUS);
6270 		if (ret < 0)
6271 			return ret;
6272 		ret = 0;
6273 	}
6274 
6275 	for (t = trace_types; t; t = t->next) {
6276 		if (strcmp(t->name, buf) == 0)
6277 			break;
6278 	}
6279 	if (!t)
6280 		return -EINVAL;
6281 
6282 	if (t == tr->current_trace)
6283 		return 0;
6284 
6285 #ifdef CONFIG_TRACER_SNAPSHOT
6286 	if (t->use_max_tr) {
6287 		local_irq_disable();
6288 		arch_spin_lock(&tr->max_lock);
6289 		ret = tr->cond_snapshot ? -EBUSY : 0;
6290 		arch_spin_unlock(&tr->max_lock);
6291 		local_irq_enable();
6292 		if (ret)
6293 			return ret;
6294 	}
6295 #endif
6296 	/* Some tracers won't work on kernel command line */
6297 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6298 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6299 			t->name);
6300 		return -EINVAL;
6301 	}
6302 
6303 	/* Some tracers are only allowed for the top level buffer */
6304 	if (!trace_ok_for_array(t, tr))
6305 		return -EINVAL;
6306 
6307 	/* If trace pipe files are being read, we can't change the tracer */
6308 	if (tr->trace_ref)
6309 		return -EBUSY;
6310 
6311 	trace_branch_disable();
6312 
6313 	tr->current_trace->enabled--;
6314 
6315 	if (tr->current_trace->reset)
6316 		tr->current_trace->reset(tr);
6317 
6318 #ifdef CONFIG_TRACER_MAX_TRACE
6319 	had_max_tr = tr->current_trace->use_max_tr;
6320 
6321 	/* Current trace needs to be nop_trace before synchronize_rcu */
6322 	tr->current_trace = &nop_trace;
6323 
6324 	if (had_max_tr && !t->use_max_tr) {
6325 		/*
6326 		 * We need to make sure that the update_max_tr sees that
6327 		 * current_trace changed to nop_trace to keep it from
6328 		 * swapping the buffers after we resize it.
6329 		 * The update_max_tr is called from interrupts disabled
6330 		 * so a synchronized_sched() is sufficient.
6331 		 */
6332 		synchronize_rcu();
6333 		free_snapshot(tr);
6334 		tracing_disarm_snapshot(tr);
6335 	}
6336 
6337 	if (!had_max_tr && t->use_max_tr) {
6338 		ret = tracing_arm_snapshot_locked(tr);
6339 		if (ret)
6340 			return ret;
6341 	}
6342 #else
6343 	tr->current_trace = &nop_trace;
6344 #endif
6345 
6346 	if (t->init) {
6347 		ret = tracer_init(t, tr);
6348 		if (ret) {
6349 #ifdef CONFIG_TRACER_MAX_TRACE
6350 			if (t->use_max_tr)
6351 				tracing_disarm_snapshot(tr);
6352 #endif
6353 			return ret;
6354 		}
6355 	}
6356 
6357 	tr->current_trace = t;
6358 	tr->current_trace->enabled++;
6359 	trace_branch_enable(tr);
6360 
6361 	return 0;
6362 }
6363 
6364 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6365 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6366 			size_t cnt, loff_t *ppos)
6367 {
6368 	struct trace_array *tr = filp->private_data;
6369 	char buf[MAX_TRACER_SIZE+1];
6370 	char *name;
6371 	size_t ret;
6372 	int err;
6373 
6374 	ret = cnt;
6375 
6376 	if (cnt > MAX_TRACER_SIZE)
6377 		cnt = MAX_TRACER_SIZE;
6378 
6379 	if (copy_from_user(buf, ubuf, cnt))
6380 		return -EFAULT;
6381 
6382 	buf[cnt] = 0;
6383 
6384 	name = strim(buf);
6385 
6386 	err = tracing_set_tracer(tr, name);
6387 	if (err)
6388 		return err;
6389 
6390 	*ppos += ret;
6391 
6392 	return ret;
6393 }
6394 
6395 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6396 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6397 		   size_t cnt, loff_t *ppos)
6398 {
6399 	char buf[64];
6400 	int r;
6401 
6402 	r = snprintf(buf, sizeof(buf), "%ld\n",
6403 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6404 	if (r > sizeof(buf))
6405 		r = sizeof(buf);
6406 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6407 }
6408 
6409 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6410 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6411 		    size_t cnt, loff_t *ppos)
6412 {
6413 	unsigned long val;
6414 	int ret;
6415 
6416 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6417 	if (ret)
6418 		return ret;
6419 
6420 	*ptr = val * 1000;
6421 
6422 	return cnt;
6423 }
6424 
6425 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6426 tracing_thresh_read(struct file *filp, char __user *ubuf,
6427 		    size_t cnt, loff_t *ppos)
6428 {
6429 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6430 }
6431 
6432 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6433 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6434 		     size_t cnt, loff_t *ppos)
6435 {
6436 	struct trace_array *tr = filp->private_data;
6437 	int ret;
6438 
6439 	guard(mutex)(&trace_types_lock);
6440 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6441 	if (ret < 0)
6442 		return ret;
6443 
6444 	if (tr->current_trace->update_thresh) {
6445 		ret = tr->current_trace->update_thresh(tr);
6446 		if (ret < 0)
6447 			return ret;
6448 	}
6449 
6450 	return cnt;
6451 }
6452 
6453 #ifdef CONFIG_TRACER_MAX_TRACE
6454 
6455 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6456 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6457 		     size_t cnt, loff_t *ppos)
6458 {
6459 	struct trace_array *tr = filp->private_data;
6460 
6461 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6462 }
6463 
6464 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6465 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6466 		      size_t cnt, loff_t *ppos)
6467 {
6468 	struct trace_array *tr = filp->private_data;
6469 
6470 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6471 }
6472 
6473 #endif
6474 
open_pipe_on_cpu(struct trace_array * tr,int cpu)6475 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6476 {
6477 	if (cpu == RING_BUFFER_ALL_CPUS) {
6478 		if (cpumask_empty(tr->pipe_cpumask)) {
6479 			cpumask_setall(tr->pipe_cpumask);
6480 			return 0;
6481 		}
6482 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6483 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6484 		return 0;
6485 	}
6486 	return -EBUSY;
6487 }
6488 
close_pipe_on_cpu(struct trace_array * tr,int cpu)6489 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6490 {
6491 	if (cpu == RING_BUFFER_ALL_CPUS) {
6492 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6493 		cpumask_clear(tr->pipe_cpumask);
6494 	} else {
6495 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6496 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6497 	}
6498 }
6499 
tracing_open_pipe(struct inode * inode,struct file * filp)6500 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6501 {
6502 	struct trace_array *tr = inode->i_private;
6503 	struct trace_iterator *iter;
6504 	int cpu;
6505 	int ret;
6506 
6507 	ret = tracing_check_open_get_tr(tr);
6508 	if (ret)
6509 		return ret;
6510 
6511 	guard(mutex)(&trace_types_lock);
6512 	cpu = tracing_get_cpu(inode);
6513 	ret = open_pipe_on_cpu(tr, cpu);
6514 	if (ret)
6515 		goto fail_pipe_on_cpu;
6516 
6517 	/* create a buffer to store the information to pass to userspace */
6518 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6519 	if (!iter) {
6520 		ret = -ENOMEM;
6521 		goto fail_alloc_iter;
6522 	}
6523 
6524 	trace_seq_init(&iter->seq);
6525 	iter->trace = tr->current_trace;
6526 
6527 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6528 		ret = -ENOMEM;
6529 		goto fail;
6530 	}
6531 
6532 	/* trace pipe does not show start of buffer */
6533 	cpumask_setall(iter->started);
6534 
6535 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6536 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6537 
6538 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6539 	if (trace_clocks[tr->clock_id].in_ns)
6540 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6541 
6542 	iter->tr = tr;
6543 	iter->array_buffer = &tr->array_buffer;
6544 	iter->cpu_file = cpu;
6545 	mutex_init(&iter->mutex);
6546 	filp->private_data = iter;
6547 
6548 	if (iter->trace->pipe_open)
6549 		iter->trace->pipe_open(iter);
6550 
6551 	nonseekable_open(inode, filp);
6552 
6553 	tr->trace_ref++;
6554 
6555 	return ret;
6556 
6557 fail:
6558 	kfree(iter);
6559 fail_alloc_iter:
6560 	close_pipe_on_cpu(tr, cpu);
6561 fail_pipe_on_cpu:
6562 	__trace_array_put(tr);
6563 	return ret;
6564 }
6565 
tracing_release_pipe(struct inode * inode,struct file * file)6566 static int tracing_release_pipe(struct inode *inode, struct file *file)
6567 {
6568 	struct trace_iterator *iter = file->private_data;
6569 	struct trace_array *tr = inode->i_private;
6570 
6571 	scoped_guard(mutex, &trace_types_lock) {
6572 		tr->trace_ref--;
6573 
6574 		if (iter->trace->pipe_close)
6575 			iter->trace->pipe_close(iter);
6576 		close_pipe_on_cpu(tr, iter->cpu_file);
6577 	}
6578 
6579 	free_trace_iter_content(iter);
6580 	kfree(iter);
6581 
6582 	trace_array_put(tr);
6583 
6584 	return 0;
6585 }
6586 
6587 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6588 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6589 {
6590 	struct trace_array *tr = iter->tr;
6591 
6592 	/* Iterators are static, they should be filled or empty */
6593 	if (trace_buffer_iter(iter, iter->cpu_file))
6594 		return EPOLLIN | EPOLLRDNORM;
6595 
6596 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6597 		/*
6598 		 * Always select as readable when in blocking mode
6599 		 */
6600 		return EPOLLIN | EPOLLRDNORM;
6601 	else
6602 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6603 					     filp, poll_table, iter->tr->buffer_percent);
6604 }
6605 
6606 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6607 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6608 {
6609 	struct trace_iterator *iter = filp->private_data;
6610 
6611 	return trace_poll(iter, filp, poll_table);
6612 }
6613 
6614 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6615 static int tracing_wait_pipe(struct file *filp)
6616 {
6617 	struct trace_iterator *iter = filp->private_data;
6618 	int ret;
6619 
6620 	while (trace_empty(iter)) {
6621 
6622 		if ((filp->f_flags & O_NONBLOCK)) {
6623 			return -EAGAIN;
6624 		}
6625 
6626 		/*
6627 		 * We block until we read something and tracing is disabled.
6628 		 * We still block if tracing is disabled, but we have never
6629 		 * read anything. This allows a user to cat this file, and
6630 		 * then enable tracing. But after we have read something,
6631 		 * we give an EOF when tracing is again disabled.
6632 		 *
6633 		 * iter->pos will be 0 if we haven't read anything.
6634 		 */
6635 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6636 			break;
6637 
6638 		mutex_unlock(&iter->mutex);
6639 
6640 		ret = wait_on_pipe(iter, 0);
6641 
6642 		mutex_lock(&iter->mutex);
6643 
6644 		if (ret)
6645 			return ret;
6646 	}
6647 
6648 	return 1;
6649 }
6650 
update_last_data_if_empty(struct trace_array * tr)6651 static bool update_last_data_if_empty(struct trace_array *tr)
6652 {
6653 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6654 		return false;
6655 
6656 	if (!ring_buffer_empty(tr->array_buffer.buffer))
6657 		return false;
6658 
6659 	/*
6660 	 * If the buffer contains the last boot data and all per-cpu
6661 	 * buffers are empty, reset it from the kernel side.
6662 	 */
6663 	update_last_data(tr);
6664 	return true;
6665 }
6666 
6667 /*
6668  * Consumer reader.
6669  */
6670 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6671 tracing_read_pipe(struct file *filp, char __user *ubuf,
6672 		  size_t cnt, loff_t *ppos)
6673 {
6674 	struct trace_iterator *iter = filp->private_data;
6675 	ssize_t sret;
6676 
6677 	/*
6678 	 * Avoid more than one consumer on a single file descriptor
6679 	 * This is just a matter of traces coherency, the ring buffer itself
6680 	 * is protected.
6681 	 */
6682 	guard(mutex)(&iter->mutex);
6683 
6684 	/* return any leftover data */
6685 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6686 	if (sret != -EBUSY)
6687 		return sret;
6688 
6689 	trace_seq_init(&iter->seq);
6690 
6691 	if (iter->trace->read) {
6692 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6693 		if (sret)
6694 			return sret;
6695 	}
6696 
6697 waitagain:
6698 	if (update_last_data_if_empty(iter->tr))
6699 		return 0;
6700 
6701 	sret = tracing_wait_pipe(filp);
6702 	if (sret <= 0)
6703 		return sret;
6704 
6705 	/* stop when tracing is finished */
6706 	if (trace_empty(iter))
6707 		return 0;
6708 
6709 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6710 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6711 
6712 	/* reset all but tr, trace, and overruns */
6713 	trace_iterator_reset(iter);
6714 	cpumask_clear(iter->started);
6715 	trace_seq_init(&iter->seq);
6716 
6717 	trace_event_read_lock();
6718 	trace_access_lock(iter->cpu_file);
6719 	while (trace_find_next_entry_inc(iter) != NULL) {
6720 		enum print_line_t ret;
6721 		int save_len = iter->seq.seq.len;
6722 
6723 		ret = print_trace_line(iter);
6724 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6725 			/*
6726 			 * If one print_trace_line() fills entire trace_seq in one shot,
6727 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6728 			 * In this case, we need to consume it, otherwise, loop will peek
6729 			 * this event next time, resulting in an infinite loop.
6730 			 */
6731 			if (save_len == 0) {
6732 				iter->seq.full = 0;
6733 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6734 				trace_consume(iter);
6735 				break;
6736 			}
6737 
6738 			/* In other cases, don't print partial lines */
6739 			iter->seq.seq.len = save_len;
6740 			break;
6741 		}
6742 		if (ret != TRACE_TYPE_NO_CONSUME)
6743 			trace_consume(iter);
6744 
6745 		if (trace_seq_used(&iter->seq) >= cnt)
6746 			break;
6747 
6748 		/*
6749 		 * Setting the full flag means we reached the trace_seq buffer
6750 		 * size and we should leave by partial output condition above.
6751 		 * One of the trace_seq_* functions is not used properly.
6752 		 */
6753 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6754 			  iter->ent->type);
6755 	}
6756 	trace_access_unlock(iter->cpu_file);
6757 	trace_event_read_unlock();
6758 
6759 	/* Now copy what we have to the user */
6760 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6761 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6762 		trace_seq_init(&iter->seq);
6763 
6764 	/*
6765 	 * If there was nothing to send to user, in spite of consuming trace
6766 	 * entries, go back to wait for more entries.
6767 	 */
6768 	if (sret == -EBUSY)
6769 		goto waitagain;
6770 
6771 	return sret;
6772 }
6773 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6774 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6775 				     unsigned int idx)
6776 {
6777 	__free_page(spd->pages[idx]);
6778 }
6779 
6780 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6781 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6782 {
6783 	size_t count;
6784 	int save_len;
6785 	int ret;
6786 
6787 	/* Seq buffer is page-sized, exactly what we need. */
6788 	for (;;) {
6789 		save_len = iter->seq.seq.len;
6790 		ret = print_trace_line(iter);
6791 
6792 		if (trace_seq_has_overflowed(&iter->seq)) {
6793 			iter->seq.seq.len = save_len;
6794 			break;
6795 		}
6796 
6797 		/*
6798 		 * This should not be hit, because it should only
6799 		 * be set if the iter->seq overflowed. But check it
6800 		 * anyway to be safe.
6801 		 */
6802 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6803 			iter->seq.seq.len = save_len;
6804 			break;
6805 		}
6806 
6807 		count = trace_seq_used(&iter->seq) - save_len;
6808 		if (rem < count) {
6809 			rem = 0;
6810 			iter->seq.seq.len = save_len;
6811 			break;
6812 		}
6813 
6814 		if (ret != TRACE_TYPE_NO_CONSUME)
6815 			trace_consume(iter);
6816 		rem -= count;
6817 		if (!trace_find_next_entry_inc(iter))	{
6818 			rem = 0;
6819 			iter->ent = NULL;
6820 			break;
6821 		}
6822 	}
6823 
6824 	return rem;
6825 }
6826 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6827 static ssize_t tracing_splice_read_pipe(struct file *filp,
6828 					loff_t *ppos,
6829 					struct pipe_inode_info *pipe,
6830 					size_t len,
6831 					unsigned int flags)
6832 {
6833 	struct page *pages_def[PIPE_DEF_BUFFERS];
6834 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6835 	struct trace_iterator *iter = filp->private_data;
6836 	struct splice_pipe_desc spd = {
6837 		.pages		= pages_def,
6838 		.partial	= partial_def,
6839 		.nr_pages	= 0, /* This gets updated below. */
6840 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6841 		.ops		= &default_pipe_buf_ops,
6842 		.spd_release	= tracing_spd_release_pipe,
6843 	};
6844 	ssize_t ret;
6845 	size_t rem;
6846 	unsigned int i;
6847 
6848 	if (splice_grow_spd(pipe, &spd))
6849 		return -ENOMEM;
6850 
6851 	mutex_lock(&iter->mutex);
6852 
6853 	if (iter->trace->splice_read) {
6854 		ret = iter->trace->splice_read(iter, filp,
6855 					       ppos, pipe, len, flags);
6856 		if (ret)
6857 			goto out_err;
6858 	}
6859 
6860 	ret = tracing_wait_pipe(filp);
6861 	if (ret <= 0)
6862 		goto out_err;
6863 
6864 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6865 		ret = -EFAULT;
6866 		goto out_err;
6867 	}
6868 
6869 	trace_event_read_lock();
6870 	trace_access_lock(iter->cpu_file);
6871 
6872 	/* Fill as many pages as possible. */
6873 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6874 		spd.pages[i] = alloc_page(GFP_KERNEL);
6875 		if (!spd.pages[i])
6876 			break;
6877 
6878 		rem = tracing_fill_pipe_page(rem, iter);
6879 
6880 		/* Copy the data into the page, so we can start over. */
6881 		ret = trace_seq_to_buffer(&iter->seq,
6882 					  page_address(spd.pages[i]),
6883 					  min((size_t)trace_seq_used(&iter->seq),
6884 						  (size_t)PAGE_SIZE));
6885 		if (ret < 0) {
6886 			__free_page(spd.pages[i]);
6887 			break;
6888 		}
6889 		spd.partial[i].offset = 0;
6890 		spd.partial[i].len = ret;
6891 
6892 		trace_seq_init(&iter->seq);
6893 	}
6894 
6895 	trace_access_unlock(iter->cpu_file);
6896 	trace_event_read_unlock();
6897 	mutex_unlock(&iter->mutex);
6898 
6899 	spd.nr_pages = i;
6900 
6901 	if (i)
6902 		ret = splice_to_pipe(pipe, &spd);
6903 	else
6904 		ret = 0;
6905 out:
6906 	splice_shrink_spd(&spd);
6907 	return ret;
6908 
6909 out_err:
6910 	mutex_unlock(&iter->mutex);
6911 	goto out;
6912 }
6913 
6914 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6915 tracing_entries_read(struct file *filp, char __user *ubuf,
6916 		     size_t cnt, loff_t *ppos)
6917 {
6918 	struct inode *inode = file_inode(filp);
6919 	struct trace_array *tr = inode->i_private;
6920 	int cpu = tracing_get_cpu(inode);
6921 	char buf[64];
6922 	int r = 0;
6923 	ssize_t ret;
6924 
6925 	mutex_lock(&trace_types_lock);
6926 
6927 	if (cpu == RING_BUFFER_ALL_CPUS) {
6928 		int cpu, buf_size_same;
6929 		unsigned long size;
6930 
6931 		size = 0;
6932 		buf_size_same = 1;
6933 		/* check if all cpu sizes are same */
6934 		for_each_tracing_cpu(cpu) {
6935 			/* fill in the size from first enabled cpu */
6936 			if (size == 0)
6937 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6938 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6939 				buf_size_same = 0;
6940 				break;
6941 			}
6942 		}
6943 
6944 		if (buf_size_same) {
6945 			if (!tr->ring_buffer_expanded)
6946 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6947 					    size >> 10,
6948 					    trace_buf_size >> 10);
6949 			else
6950 				r = sprintf(buf, "%lu\n", size >> 10);
6951 		} else
6952 			r = sprintf(buf, "X\n");
6953 	} else
6954 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6955 
6956 	mutex_unlock(&trace_types_lock);
6957 
6958 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6959 	return ret;
6960 }
6961 
6962 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6963 tracing_entries_write(struct file *filp, const char __user *ubuf,
6964 		      size_t cnt, loff_t *ppos)
6965 {
6966 	struct inode *inode = file_inode(filp);
6967 	struct trace_array *tr = inode->i_private;
6968 	unsigned long val;
6969 	int ret;
6970 
6971 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6972 	if (ret)
6973 		return ret;
6974 
6975 	/* must have at least 1 entry */
6976 	if (!val)
6977 		return -EINVAL;
6978 
6979 	/* value is in KB */
6980 	val <<= 10;
6981 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6982 	if (ret < 0)
6983 		return ret;
6984 
6985 	*ppos += cnt;
6986 
6987 	return cnt;
6988 }
6989 
6990 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6991 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6992 				size_t cnt, loff_t *ppos)
6993 {
6994 	struct trace_array *tr = filp->private_data;
6995 	char buf[64];
6996 	int r, cpu;
6997 	unsigned long size = 0, expanded_size = 0;
6998 
6999 	mutex_lock(&trace_types_lock);
7000 	for_each_tracing_cpu(cpu) {
7001 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7002 		if (!tr->ring_buffer_expanded)
7003 			expanded_size += trace_buf_size >> 10;
7004 	}
7005 	if (tr->ring_buffer_expanded)
7006 		r = sprintf(buf, "%lu\n", size);
7007 	else
7008 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7009 	mutex_unlock(&trace_types_lock);
7010 
7011 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7012 }
7013 
7014 #define LAST_BOOT_HEADER ((void *)1)
7015 
l_next(struct seq_file * m,void * v,loff_t * pos)7016 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7017 {
7018 	struct trace_array *tr = m->private;
7019 	struct trace_scratch *tscratch = tr->scratch;
7020 	unsigned int index = *pos;
7021 
7022 	(*pos)++;
7023 
7024 	if (*pos == 1)
7025 		return LAST_BOOT_HEADER;
7026 
7027 	/* Only show offsets of the last boot data */
7028 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7029 		return NULL;
7030 
7031 	/* *pos 0 is for the header, 1 is for the first module */
7032 	index--;
7033 
7034 	if (index >= tscratch->nr_entries)
7035 		return NULL;
7036 
7037 	return &tscratch->entries[index];
7038 }
7039 
l_start(struct seq_file * m,loff_t * pos)7040 static void *l_start(struct seq_file *m, loff_t *pos)
7041 {
7042 	mutex_lock(&scratch_mutex);
7043 
7044 	return l_next(m, NULL, pos);
7045 }
7046 
l_stop(struct seq_file * m,void * p)7047 static void l_stop(struct seq_file *m, void *p)
7048 {
7049 	mutex_unlock(&scratch_mutex);
7050 }
7051 
show_last_boot_header(struct seq_file * m,struct trace_array * tr)7052 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7053 {
7054 	struct trace_scratch *tscratch = tr->scratch;
7055 
7056 	/*
7057 	 * Do not leak KASLR address. This only shows the KASLR address of
7058 	 * the last boot. When the ring buffer is started, the LAST_BOOT
7059 	 * flag gets cleared, and this should only report "current".
7060 	 * Otherwise it shows the KASLR address from the previous boot which
7061 	 * should not be the same as the current boot.
7062 	 */
7063 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7064 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7065 	else
7066 		seq_puts(m, "# Current\n");
7067 }
7068 
l_show(struct seq_file * m,void * v)7069 static int l_show(struct seq_file *m, void *v)
7070 {
7071 	struct trace_array *tr = m->private;
7072 	struct trace_mod_entry *entry = v;
7073 
7074 	if (v == LAST_BOOT_HEADER) {
7075 		show_last_boot_header(m, tr);
7076 		return 0;
7077 	}
7078 
7079 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7080 	return 0;
7081 }
7082 
7083 static const struct seq_operations last_boot_seq_ops = {
7084 	.start		= l_start,
7085 	.next		= l_next,
7086 	.stop		= l_stop,
7087 	.show		= l_show,
7088 };
7089 
tracing_last_boot_open(struct inode * inode,struct file * file)7090 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7091 {
7092 	struct trace_array *tr = inode->i_private;
7093 	struct seq_file *m;
7094 	int ret;
7095 
7096 	ret = tracing_check_open_get_tr(tr);
7097 	if (ret)
7098 		return ret;
7099 
7100 	ret = seq_open(file, &last_boot_seq_ops);
7101 	if (ret) {
7102 		trace_array_put(tr);
7103 		return ret;
7104 	}
7105 
7106 	m = file->private_data;
7107 	m->private = tr;
7108 
7109 	return 0;
7110 }
7111 
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7112 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7113 {
7114 	struct trace_array *tr = inode->i_private;
7115 	int cpu = tracing_get_cpu(inode);
7116 	int ret;
7117 
7118 	ret = tracing_check_open_get_tr(tr);
7119 	if (ret)
7120 		return ret;
7121 
7122 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7123 	if (ret < 0)
7124 		__trace_array_put(tr);
7125 	return ret;
7126 }
7127 
7128 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7129 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7130 			  size_t cnt, loff_t *ppos)
7131 {
7132 	/*
7133 	 * There is no need to read what the user has written, this function
7134 	 * is just to make sure that there is no error when "echo" is used
7135 	 */
7136 
7137 	*ppos += cnt;
7138 
7139 	return cnt;
7140 }
7141 
7142 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7143 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7144 {
7145 	struct trace_array *tr = inode->i_private;
7146 
7147 	/* disable tracing ? */
7148 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7149 		tracer_tracing_off(tr);
7150 	/* resize the ring buffer to 0 */
7151 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7152 
7153 	trace_array_put(tr);
7154 
7155 	return 0;
7156 }
7157 
7158 #define TRACE_MARKER_MAX_SIZE		4096
7159 
write_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt,unsigned long ip)7160 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
7161 				      size_t cnt, unsigned long ip)
7162 {
7163 	struct ring_buffer_event *event;
7164 	enum event_trigger_type tt = ETT_NONE;
7165 	struct trace_buffer *buffer;
7166 	struct print_entry *entry;
7167 	int meta_size;
7168 	ssize_t written;
7169 	size_t size;
7170 
7171 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7172  again:
7173 	size = cnt + meta_size;
7174 
7175 	buffer = tr->array_buffer.buffer;
7176 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7177 					    tracing_gen_ctx());
7178 	if (unlikely(!event)) {
7179 		/*
7180 		 * If the size was greater than what was allowed, then
7181 		 * make it smaller and try again.
7182 		 */
7183 		if (size > ring_buffer_max_event_size(buffer)) {
7184 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7185 			/* The above should only happen once */
7186 			if (WARN_ON_ONCE(cnt + meta_size == size))
7187 				return -EBADF;
7188 			goto again;
7189 		}
7190 
7191 		/* Ring buffer disabled, return as if not open for write */
7192 		return -EBADF;
7193 	}
7194 
7195 	entry = ring_buffer_event_data(event);
7196 	entry->ip = ip;
7197 	memcpy(&entry->buf, buf, cnt);
7198 	written = cnt;
7199 
7200 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7201 		/* do not add \n before testing triggers, but add \0 */
7202 		entry->buf[cnt] = '\0';
7203 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7204 	}
7205 
7206 	if (entry->buf[cnt - 1] != '\n') {
7207 		entry->buf[cnt] = '\n';
7208 		entry->buf[cnt + 1] = '\0';
7209 	} else
7210 		entry->buf[cnt] = '\0';
7211 
7212 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7213 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7214 	__buffer_unlock_commit(buffer, event);
7215 
7216 	if (tt)
7217 		event_triggers_post_call(tr->trace_marker_file, tt);
7218 
7219 	return written;
7220 }
7221 
7222 struct trace_user_buf {
7223 	char		*buf;
7224 };
7225 
7226 struct trace_user_buf_info {
7227 	struct trace_user_buf __percpu	*tbuf;
7228 	int				ref;
7229 };
7230 
7231 
7232 static DEFINE_MUTEX(trace_user_buffer_mutex);
7233 static struct trace_user_buf_info *trace_user_buffer;
7234 
trace_user_fault_buffer_free(struct trace_user_buf_info * tinfo)7235 static void trace_user_fault_buffer_free(struct trace_user_buf_info *tinfo)
7236 {
7237 	char *buf;
7238 	int cpu;
7239 
7240 	for_each_possible_cpu(cpu) {
7241 		buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7242 		kfree(buf);
7243 	}
7244 	free_percpu(tinfo->tbuf);
7245 	kfree(tinfo);
7246 }
7247 
trace_user_fault_buffer_enable(void)7248 static int trace_user_fault_buffer_enable(void)
7249 {
7250 	struct trace_user_buf_info *tinfo;
7251 	char *buf;
7252 	int cpu;
7253 
7254 	guard(mutex)(&trace_user_buffer_mutex);
7255 
7256 	if (trace_user_buffer) {
7257 		trace_user_buffer->ref++;
7258 		return 0;
7259 	}
7260 
7261 	tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
7262 	if (!tinfo)
7263 		return -ENOMEM;
7264 
7265 	tinfo->tbuf = alloc_percpu(struct trace_user_buf);
7266 	if (!tinfo->tbuf) {
7267 		kfree(tinfo);
7268 		return -ENOMEM;
7269 	}
7270 
7271 	tinfo->ref = 1;
7272 
7273 	/* Clear each buffer in case of error */
7274 	for_each_possible_cpu(cpu) {
7275 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
7276 	}
7277 
7278 	for_each_possible_cpu(cpu) {
7279 		buf = kmalloc_node(TRACE_MARKER_MAX_SIZE, GFP_KERNEL,
7280 				   cpu_to_node(cpu));
7281 		if (!buf) {
7282 			trace_user_fault_buffer_free(tinfo);
7283 			return -ENOMEM;
7284 		}
7285 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
7286 	}
7287 
7288 	trace_user_buffer = tinfo;
7289 
7290 	return 0;
7291 }
7292 
trace_user_fault_buffer_disable(void)7293 static void trace_user_fault_buffer_disable(void)
7294 {
7295 	struct trace_user_buf_info *tinfo;
7296 
7297 	guard(mutex)(&trace_user_buffer_mutex);
7298 
7299 	tinfo = trace_user_buffer;
7300 
7301 	if (WARN_ON_ONCE(!tinfo))
7302 		return;
7303 
7304 	if (--tinfo->ref)
7305 		return;
7306 
7307 	trace_user_fault_buffer_free(tinfo);
7308 	trace_user_buffer = NULL;
7309 }
7310 
7311 /* Must be called with preemption disabled */
trace_user_fault_read(struct trace_user_buf_info * tinfo,const char __user * ptr,size_t size,size_t * read_size)7312 static char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
7313 				   const char __user *ptr, size_t size,
7314 				   size_t *read_size)
7315 {
7316 	int cpu = smp_processor_id();
7317 	char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7318 	unsigned int cnt;
7319 	int trys = 0;
7320 	int ret;
7321 
7322 	if (size > TRACE_MARKER_MAX_SIZE)
7323 		size = TRACE_MARKER_MAX_SIZE;
7324 	*read_size = 0;
7325 
7326 	/*
7327 	 * This acts similar to a seqcount. The per CPU context switches are
7328 	 * recorded, migration is disabled and preemption is enabled. The
7329 	 * read of the user space memory is copied into the per CPU buffer.
7330 	 * Preemption is disabled again, and if the per CPU context switches count
7331 	 * is still the same, it means the buffer has not been corrupted.
7332 	 * If the count is different, it is assumed the buffer is corrupted
7333 	 * and reading must be tried again.
7334 	 */
7335 
7336 	do {
7337 		/*
7338 		 * If for some reason, copy_from_user() always causes a context
7339 		 * switch, this would then cause an infinite loop.
7340 		 * If this task is preempted by another user space task, it
7341 		 * will cause this task to try again. But just in case something
7342 		 * changes where the copying from user space causes another task
7343 		 * to run, prevent this from going into an infinite loop.
7344 		 * 100 tries should be plenty.
7345 		 */
7346 		if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
7347 			return NULL;
7348 
7349 		/* Read the current CPU context switch counter */
7350 		cnt = nr_context_switches_cpu(cpu);
7351 
7352 		/*
7353 		 * Preemption is going to be enabled, but this task must
7354 		 * remain on this CPU.
7355 		 */
7356 		migrate_disable();
7357 
7358 		/*
7359 		 * Now preemption is being enabed and another task can come in
7360 		 * and use the same buffer and corrupt our data.
7361 		 */
7362 		preempt_enable_notrace();
7363 
7364 		ret = __copy_from_user(buffer, ptr, size);
7365 
7366 		preempt_disable_notrace();
7367 		migrate_enable();
7368 
7369 		/* if it faulted, no need to test if the buffer was corrupted */
7370 		if (ret)
7371 			return NULL;
7372 
7373 		/*
7374 		 * Preemption is disabled again, now check the per CPU context
7375 		 * switch counter. If it doesn't match, then another user space
7376 		 * process may have schedule in and corrupted our buffer. In that
7377 		 * case the copying must be retried.
7378 		 */
7379 	} while (nr_context_switches_cpu(cpu) != cnt);
7380 
7381 	*read_size = size;
7382 	return buffer;
7383 }
7384 
7385 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7386 tracing_mark_write(struct file *filp, const char __user *ubuf,
7387 					size_t cnt, loff_t *fpos)
7388 {
7389 	struct trace_array *tr = filp->private_data;
7390 	ssize_t written = -ENODEV;
7391 	unsigned long ip;
7392 	size_t size;
7393 	char *buf;
7394 
7395 	if (tracing_disabled)
7396 		return -EINVAL;
7397 
7398 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7399 		return -EINVAL;
7400 
7401 	if ((ssize_t)cnt < 0)
7402 		return -EINVAL;
7403 
7404 	if (cnt > TRACE_MARKER_MAX_SIZE)
7405 		cnt = TRACE_MARKER_MAX_SIZE;
7406 
7407 	/* Must have preemption disabled while having access to the buffer */
7408 	guard(preempt_notrace)();
7409 
7410 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, &size);
7411 	if (!buf)
7412 		return -EFAULT;
7413 
7414 	if (cnt > size)
7415 		cnt = size;
7416 
7417 	/* The selftests expect this function to be the IP address */
7418 	ip = _THIS_IP_;
7419 
7420 	/* The global trace_marker can go to multiple instances */
7421 	if (tr == &global_trace) {
7422 		guard(rcu)();
7423 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7424 			written = write_marker_to_buffer(tr, buf, cnt, ip);
7425 			if (written < 0)
7426 				break;
7427 		}
7428 	} else {
7429 		written = write_marker_to_buffer(tr, buf, cnt, ip);
7430 	}
7431 
7432 	return written;
7433 }
7434 
write_raw_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt)7435 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7436 					  const char *buf, size_t cnt)
7437 {
7438 	struct ring_buffer_event *event;
7439 	struct trace_buffer *buffer;
7440 	struct raw_data_entry *entry;
7441 	ssize_t written;
7442 	size_t size;
7443 
7444 	/* cnt includes both the entry->id and the data behind it. */
7445 	size = struct_size(entry, buf, cnt - sizeof(entry->id));
7446 
7447 	buffer = tr->array_buffer.buffer;
7448 
7449 	if (size > ring_buffer_max_event_size(buffer))
7450 		return -EINVAL;
7451 
7452 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7453 					    tracing_gen_ctx());
7454 	if (!event)
7455 		/* Ring buffer disabled, return as if not open for write */
7456 		return -EBADF;
7457 
7458 	entry = ring_buffer_event_data(event);
7459 	unsafe_memcpy(&entry->id, buf, cnt,
7460 		      "id and content already reserved on ring buffer"
7461 		      "'buf' includes the 'id' and the data."
7462 		      "'entry' was allocated with cnt from 'id'.");
7463 	written = cnt;
7464 
7465 	__buffer_unlock_commit(buffer, event);
7466 
7467 	return written;
7468 }
7469 
7470 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7471 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7472 					size_t cnt, loff_t *fpos)
7473 {
7474 	struct trace_array *tr = filp->private_data;
7475 	ssize_t written = -ENODEV;
7476 	size_t size;
7477 	char *buf;
7478 
7479 	if (tracing_disabled)
7480 		return -EINVAL;
7481 
7482 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7483 		return -EINVAL;
7484 
7485 	/* The marker must at least have a tag id */
7486 	if (cnt < sizeof(unsigned int))
7487 		return -EINVAL;
7488 
7489 	/* Must have preemption disabled while having access to the buffer */
7490 	guard(preempt_notrace)();
7491 
7492 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, &size);
7493 	if (!buf)
7494 		return -EFAULT;
7495 
7496 	/* raw write is all or nothing */
7497 	if (cnt > size)
7498 		return -EINVAL;
7499 
7500 	/* The global trace_marker_raw can go to multiple instances */
7501 	if (tr == &global_trace) {
7502 		guard(rcu)();
7503 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7504 			written = write_raw_marker_to_buffer(tr, buf, cnt);
7505 			if (written < 0)
7506 				break;
7507 		}
7508 	} else {
7509 		written = write_raw_marker_to_buffer(tr, buf, cnt);
7510 	}
7511 
7512 	return written;
7513 }
7514 
tracing_mark_open(struct inode * inode,struct file * filp)7515 static int tracing_mark_open(struct inode *inode, struct file *filp)
7516 {
7517 	int ret;
7518 
7519 	ret = trace_user_fault_buffer_enable();
7520 	if (ret < 0)
7521 		return ret;
7522 
7523 	stream_open(inode, filp);
7524 	ret = tracing_open_generic_tr(inode, filp);
7525 	if (ret < 0)
7526 		trace_user_fault_buffer_disable();
7527 	return ret;
7528 }
7529 
tracing_mark_release(struct inode * inode,struct file * file)7530 static int tracing_mark_release(struct inode *inode, struct file *file)
7531 {
7532 	trace_user_fault_buffer_disable();
7533 	return tracing_release_generic_tr(inode, file);
7534 }
7535 
tracing_clock_show(struct seq_file * m,void * v)7536 static int tracing_clock_show(struct seq_file *m, void *v)
7537 {
7538 	struct trace_array *tr = m->private;
7539 	int i;
7540 
7541 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7542 		seq_printf(m,
7543 			"%s%s%s%s", i ? " " : "",
7544 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7545 			i == tr->clock_id ? "]" : "");
7546 	seq_putc(m, '\n');
7547 
7548 	return 0;
7549 }
7550 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7551 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7552 {
7553 	int i;
7554 
7555 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7556 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7557 			break;
7558 	}
7559 	if (i == ARRAY_SIZE(trace_clocks))
7560 		return -EINVAL;
7561 
7562 	guard(mutex)(&trace_types_lock);
7563 
7564 	tr->clock_id = i;
7565 
7566 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7567 
7568 	/*
7569 	 * New clock may not be consistent with the previous clock.
7570 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7571 	 */
7572 	tracing_reset_online_cpus(&tr->array_buffer);
7573 
7574 #ifdef CONFIG_TRACER_MAX_TRACE
7575 	if (tr->max_buffer.buffer)
7576 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7577 	tracing_reset_online_cpus(&tr->max_buffer);
7578 #endif
7579 
7580 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7581 		struct trace_scratch *tscratch = tr->scratch;
7582 
7583 		tscratch->clock_id = i;
7584 	}
7585 
7586 	return 0;
7587 }
7588 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7589 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7590 				   size_t cnt, loff_t *fpos)
7591 {
7592 	struct seq_file *m = filp->private_data;
7593 	struct trace_array *tr = m->private;
7594 	char buf[64];
7595 	const char *clockstr;
7596 	int ret;
7597 
7598 	if (cnt >= sizeof(buf))
7599 		return -EINVAL;
7600 
7601 	if (copy_from_user(buf, ubuf, cnt))
7602 		return -EFAULT;
7603 
7604 	buf[cnt] = 0;
7605 
7606 	clockstr = strstrip(buf);
7607 
7608 	ret = tracing_set_clock(tr, clockstr);
7609 	if (ret)
7610 		return ret;
7611 
7612 	*fpos += cnt;
7613 
7614 	return cnt;
7615 }
7616 
tracing_clock_open(struct inode * inode,struct file * file)7617 static int tracing_clock_open(struct inode *inode, struct file *file)
7618 {
7619 	struct trace_array *tr = inode->i_private;
7620 	int ret;
7621 
7622 	ret = tracing_check_open_get_tr(tr);
7623 	if (ret)
7624 		return ret;
7625 
7626 	ret = single_open(file, tracing_clock_show, inode->i_private);
7627 	if (ret < 0)
7628 		trace_array_put(tr);
7629 
7630 	return ret;
7631 }
7632 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7633 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7634 {
7635 	struct trace_array *tr = m->private;
7636 
7637 	guard(mutex)(&trace_types_lock);
7638 
7639 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7640 		seq_puts(m, "delta [absolute]\n");
7641 	else
7642 		seq_puts(m, "[delta] absolute\n");
7643 
7644 	return 0;
7645 }
7646 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7647 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7648 {
7649 	struct trace_array *tr = inode->i_private;
7650 	int ret;
7651 
7652 	ret = tracing_check_open_get_tr(tr);
7653 	if (ret)
7654 		return ret;
7655 
7656 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7657 	if (ret < 0)
7658 		trace_array_put(tr);
7659 
7660 	return ret;
7661 }
7662 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7663 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7664 {
7665 	if (rbe == this_cpu_read(trace_buffered_event))
7666 		return ring_buffer_time_stamp(buffer);
7667 
7668 	return ring_buffer_event_time_stamp(buffer, rbe);
7669 }
7670 
7671 /*
7672  * Set or disable using the per CPU trace_buffer_event when possible.
7673  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7674 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7675 {
7676 	guard(mutex)(&trace_types_lock);
7677 
7678 	if (set && tr->no_filter_buffering_ref++)
7679 		return 0;
7680 
7681 	if (!set) {
7682 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7683 			return -EINVAL;
7684 
7685 		--tr->no_filter_buffering_ref;
7686 	}
7687 
7688 	return 0;
7689 }
7690 
7691 struct ftrace_buffer_info {
7692 	struct trace_iterator	iter;
7693 	void			*spare;
7694 	unsigned int		spare_cpu;
7695 	unsigned int		spare_size;
7696 	unsigned int		read;
7697 };
7698 
7699 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7700 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7701 {
7702 	struct trace_array *tr = inode->i_private;
7703 	struct trace_iterator *iter;
7704 	struct seq_file *m;
7705 	int ret;
7706 
7707 	ret = tracing_check_open_get_tr(tr);
7708 	if (ret)
7709 		return ret;
7710 
7711 	if (file->f_mode & FMODE_READ) {
7712 		iter = __tracing_open(inode, file, true);
7713 		if (IS_ERR(iter))
7714 			ret = PTR_ERR(iter);
7715 	} else {
7716 		/* Writes still need the seq_file to hold the private data */
7717 		ret = -ENOMEM;
7718 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7719 		if (!m)
7720 			goto out;
7721 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7722 		if (!iter) {
7723 			kfree(m);
7724 			goto out;
7725 		}
7726 		ret = 0;
7727 
7728 		iter->tr = tr;
7729 		iter->array_buffer = &tr->max_buffer;
7730 		iter->cpu_file = tracing_get_cpu(inode);
7731 		m->private = iter;
7732 		file->private_data = m;
7733 	}
7734 out:
7735 	if (ret < 0)
7736 		trace_array_put(tr);
7737 
7738 	return ret;
7739 }
7740 
tracing_swap_cpu_buffer(void * tr)7741 static void tracing_swap_cpu_buffer(void *tr)
7742 {
7743 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7744 }
7745 
7746 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7747 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7748 		       loff_t *ppos)
7749 {
7750 	struct seq_file *m = filp->private_data;
7751 	struct trace_iterator *iter = m->private;
7752 	struct trace_array *tr = iter->tr;
7753 	unsigned long val;
7754 	int ret;
7755 
7756 	ret = tracing_update_buffers(tr);
7757 	if (ret < 0)
7758 		return ret;
7759 
7760 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7761 	if (ret)
7762 		return ret;
7763 
7764 	guard(mutex)(&trace_types_lock);
7765 
7766 	if (tr->current_trace->use_max_tr)
7767 		return -EBUSY;
7768 
7769 	local_irq_disable();
7770 	arch_spin_lock(&tr->max_lock);
7771 	if (tr->cond_snapshot)
7772 		ret = -EBUSY;
7773 	arch_spin_unlock(&tr->max_lock);
7774 	local_irq_enable();
7775 	if (ret)
7776 		return ret;
7777 
7778 	switch (val) {
7779 	case 0:
7780 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7781 			return -EINVAL;
7782 		if (tr->allocated_snapshot)
7783 			free_snapshot(tr);
7784 		break;
7785 	case 1:
7786 /* Only allow per-cpu swap if the ring buffer supports it */
7787 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7788 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7789 			return -EINVAL;
7790 #endif
7791 		if (tr->allocated_snapshot)
7792 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7793 					&tr->array_buffer, iter->cpu_file);
7794 
7795 		ret = tracing_arm_snapshot_locked(tr);
7796 		if (ret)
7797 			return ret;
7798 
7799 		/* Now, we're going to swap */
7800 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7801 			local_irq_disable();
7802 			update_max_tr(tr, current, smp_processor_id(), NULL);
7803 			local_irq_enable();
7804 		} else {
7805 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7806 						 (void *)tr, 1);
7807 		}
7808 		tracing_disarm_snapshot(tr);
7809 		break;
7810 	default:
7811 		if (tr->allocated_snapshot) {
7812 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7813 				tracing_reset_online_cpus(&tr->max_buffer);
7814 			else
7815 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7816 		}
7817 		break;
7818 	}
7819 
7820 	if (ret >= 0) {
7821 		*ppos += cnt;
7822 		ret = cnt;
7823 	}
7824 
7825 	return ret;
7826 }
7827 
tracing_snapshot_release(struct inode * inode,struct file * file)7828 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7829 {
7830 	struct seq_file *m = file->private_data;
7831 	int ret;
7832 
7833 	ret = tracing_release(inode, file);
7834 
7835 	if (file->f_mode & FMODE_READ)
7836 		return ret;
7837 
7838 	/* If write only, the seq_file is just a stub */
7839 	if (m)
7840 		kfree(m->private);
7841 	kfree(m);
7842 
7843 	return 0;
7844 }
7845 
7846 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7847 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7848 				    size_t count, loff_t *ppos);
7849 static int tracing_buffers_release(struct inode *inode, struct file *file);
7850 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7851 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7852 
snapshot_raw_open(struct inode * inode,struct file * filp)7853 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7854 {
7855 	struct ftrace_buffer_info *info;
7856 	int ret;
7857 
7858 	/* The following checks for tracefs lockdown */
7859 	ret = tracing_buffers_open(inode, filp);
7860 	if (ret < 0)
7861 		return ret;
7862 
7863 	info = filp->private_data;
7864 
7865 	if (info->iter.trace->use_max_tr) {
7866 		tracing_buffers_release(inode, filp);
7867 		return -EBUSY;
7868 	}
7869 
7870 	info->iter.snapshot = true;
7871 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7872 
7873 	return ret;
7874 }
7875 
7876 #endif /* CONFIG_TRACER_SNAPSHOT */
7877 
7878 
7879 static const struct file_operations tracing_thresh_fops = {
7880 	.open		= tracing_open_generic,
7881 	.read		= tracing_thresh_read,
7882 	.write		= tracing_thresh_write,
7883 	.llseek		= generic_file_llseek,
7884 };
7885 
7886 #ifdef CONFIG_TRACER_MAX_TRACE
7887 static const struct file_operations tracing_max_lat_fops = {
7888 	.open		= tracing_open_generic_tr,
7889 	.read		= tracing_max_lat_read,
7890 	.write		= tracing_max_lat_write,
7891 	.llseek		= generic_file_llseek,
7892 	.release	= tracing_release_generic_tr,
7893 };
7894 #endif
7895 
7896 static const struct file_operations set_tracer_fops = {
7897 	.open		= tracing_open_generic_tr,
7898 	.read		= tracing_set_trace_read,
7899 	.write		= tracing_set_trace_write,
7900 	.llseek		= generic_file_llseek,
7901 	.release	= tracing_release_generic_tr,
7902 };
7903 
7904 static const struct file_operations tracing_pipe_fops = {
7905 	.open		= tracing_open_pipe,
7906 	.poll		= tracing_poll_pipe,
7907 	.read		= tracing_read_pipe,
7908 	.splice_read	= tracing_splice_read_pipe,
7909 	.release	= tracing_release_pipe,
7910 };
7911 
7912 static const struct file_operations tracing_entries_fops = {
7913 	.open		= tracing_open_generic_tr,
7914 	.read		= tracing_entries_read,
7915 	.write		= tracing_entries_write,
7916 	.llseek		= generic_file_llseek,
7917 	.release	= tracing_release_generic_tr,
7918 };
7919 
7920 static const struct file_operations tracing_buffer_meta_fops = {
7921 	.open		= tracing_buffer_meta_open,
7922 	.read		= seq_read,
7923 	.llseek		= seq_lseek,
7924 	.release	= tracing_seq_release,
7925 };
7926 
7927 static const struct file_operations tracing_total_entries_fops = {
7928 	.open		= tracing_open_generic_tr,
7929 	.read		= tracing_total_entries_read,
7930 	.llseek		= generic_file_llseek,
7931 	.release	= tracing_release_generic_tr,
7932 };
7933 
7934 static const struct file_operations tracing_free_buffer_fops = {
7935 	.open		= tracing_open_generic_tr,
7936 	.write		= tracing_free_buffer_write,
7937 	.release	= tracing_free_buffer_release,
7938 };
7939 
7940 static const struct file_operations tracing_mark_fops = {
7941 	.open		= tracing_mark_open,
7942 	.write		= tracing_mark_write,
7943 	.release	= tracing_mark_release,
7944 };
7945 
7946 static const struct file_operations tracing_mark_raw_fops = {
7947 	.open		= tracing_mark_open,
7948 	.write		= tracing_mark_raw_write,
7949 	.release	= tracing_mark_release,
7950 };
7951 
7952 static const struct file_operations trace_clock_fops = {
7953 	.open		= tracing_clock_open,
7954 	.read		= seq_read,
7955 	.llseek		= seq_lseek,
7956 	.release	= tracing_single_release_tr,
7957 	.write		= tracing_clock_write,
7958 };
7959 
7960 static const struct file_operations trace_time_stamp_mode_fops = {
7961 	.open		= tracing_time_stamp_mode_open,
7962 	.read		= seq_read,
7963 	.llseek		= seq_lseek,
7964 	.release	= tracing_single_release_tr,
7965 };
7966 
7967 static const struct file_operations last_boot_fops = {
7968 	.open		= tracing_last_boot_open,
7969 	.read		= seq_read,
7970 	.llseek		= seq_lseek,
7971 	.release	= tracing_seq_release,
7972 };
7973 
7974 #ifdef CONFIG_TRACER_SNAPSHOT
7975 static const struct file_operations snapshot_fops = {
7976 	.open		= tracing_snapshot_open,
7977 	.read		= seq_read,
7978 	.write		= tracing_snapshot_write,
7979 	.llseek		= tracing_lseek,
7980 	.release	= tracing_snapshot_release,
7981 };
7982 
7983 static const struct file_operations snapshot_raw_fops = {
7984 	.open		= snapshot_raw_open,
7985 	.read		= tracing_buffers_read,
7986 	.release	= tracing_buffers_release,
7987 	.splice_read	= tracing_buffers_splice_read,
7988 };
7989 
7990 #endif /* CONFIG_TRACER_SNAPSHOT */
7991 
7992 /*
7993  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7994  * @filp: The active open file structure
7995  * @ubuf: The userspace provided buffer to read value into
7996  * @cnt: The maximum number of bytes to read
7997  * @ppos: The current "file" position
7998  *
7999  * This function implements the write interface for a struct trace_min_max_param.
8000  * The filp->private_data must point to a trace_min_max_param structure that
8001  * defines where to write the value, the min and the max acceptable values,
8002  * and a lock to protect the write.
8003  */
8004 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8005 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
8006 {
8007 	struct trace_min_max_param *param = filp->private_data;
8008 	u64 val;
8009 	int err;
8010 
8011 	if (!param)
8012 		return -EFAULT;
8013 
8014 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
8015 	if (err)
8016 		return err;
8017 
8018 	if (param->lock)
8019 		mutex_lock(param->lock);
8020 
8021 	if (param->min && val < *param->min)
8022 		err = -EINVAL;
8023 
8024 	if (param->max && val > *param->max)
8025 		err = -EINVAL;
8026 
8027 	if (!err)
8028 		*param->val = val;
8029 
8030 	if (param->lock)
8031 		mutex_unlock(param->lock);
8032 
8033 	if (err)
8034 		return err;
8035 
8036 	return cnt;
8037 }
8038 
8039 /*
8040  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
8041  * @filp: The active open file structure
8042  * @ubuf: The userspace provided buffer to read value into
8043  * @cnt: The maximum number of bytes to read
8044  * @ppos: The current "file" position
8045  *
8046  * This function implements the read interface for a struct trace_min_max_param.
8047  * The filp->private_data must point to a trace_min_max_param struct with valid
8048  * data.
8049  */
8050 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8051 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
8052 {
8053 	struct trace_min_max_param *param = filp->private_data;
8054 	char buf[U64_STR_SIZE];
8055 	int len;
8056 	u64 val;
8057 
8058 	if (!param)
8059 		return -EFAULT;
8060 
8061 	val = *param->val;
8062 
8063 	if (cnt > sizeof(buf))
8064 		cnt = sizeof(buf);
8065 
8066 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
8067 
8068 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
8069 }
8070 
8071 const struct file_operations trace_min_max_fops = {
8072 	.open		= tracing_open_generic,
8073 	.read		= trace_min_max_read,
8074 	.write		= trace_min_max_write,
8075 };
8076 
8077 #define TRACING_LOG_ERRS_MAX	8
8078 #define TRACING_LOG_LOC_MAX	128
8079 
8080 #define CMD_PREFIX "  Command: "
8081 
8082 struct err_info {
8083 	const char	**errs;	/* ptr to loc-specific array of err strings */
8084 	u8		type;	/* index into errs -> specific err string */
8085 	u16		pos;	/* caret position */
8086 	u64		ts;
8087 };
8088 
8089 struct tracing_log_err {
8090 	struct list_head	list;
8091 	struct err_info		info;
8092 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
8093 	char			*cmd;                     /* what caused err */
8094 };
8095 
8096 static DEFINE_MUTEX(tracing_err_log_lock);
8097 
alloc_tracing_log_err(int len)8098 static struct tracing_log_err *alloc_tracing_log_err(int len)
8099 {
8100 	struct tracing_log_err *err;
8101 
8102 	err = kzalloc(sizeof(*err), GFP_KERNEL);
8103 	if (!err)
8104 		return ERR_PTR(-ENOMEM);
8105 
8106 	err->cmd = kzalloc(len, GFP_KERNEL);
8107 	if (!err->cmd) {
8108 		kfree(err);
8109 		return ERR_PTR(-ENOMEM);
8110 	}
8111 
8112 	return err;
8113 }
8114 
free_tracing_log_err(struct tracing_log_err * err)8115 static void free_tracing_log_err(struct tracing_log_err *err)
8116 {
8117 	kfree(err->cmd);
8118 	kfree(err);
8119 }
8120 
get_tracing_log_err(struct trace_array * tr,int len)8121 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8122 						   int len)
8123 {
8124 	struct tracing_log_err *err;
8125 	char *cmd;
8126 
8127 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8128 		err = alloc_tracing_log_err(len);
8129 		if (PTR_ERR(err) != -ENOMEM)
8130 			tr->n_err_log_entries++;
8131 
8132 		return err;
8133 	}
8134 	cmd = kzalloc(len, GFP_KERNEL);
8135 	if (!cmd)
8136 		return ERR_PTR(-ENOMEM);
8137 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8138 	kfree(err->cmd);
8139 	err->cmd = cmd;
8140 	list_del(&err->list);
8141 
8142 	return err;
8143 }
8144 
8145 /**
8146  * err_pos - find the position of a string within a command for error careting
8147  * @cmd: The tracing command that caused the error
8148  * @str: The string to position the caret at within @cmd
8149  *
8150  * Finds the position of the first occurrence of @str within @cmd.  The
8151  * return value can be passed to tracing_log_err() for caret placement
8152  * within @cmd.
8153  *
8154  * Returns the index within @cmd of the first occurrence of @str or 0
8155  * if @str was not found.
8156  */
err_pos(char * cmd,const char * str)8157 unsigned int err_pos(char *cmd, const char *str)
8158 {
8159 	char *found;
8160 
8161 	if (WARN_ON(!strlen(cmd)))
8162 		return 0;
8163 
8164 	found = strstr(cmd, str);
8165 	if (found)
8166 		return found - cmd;
8167 
8168 	return 0;
8169 }
8170 
8171 /**
8172  * tracing_log_err - write an error to the tracing error log
8173  * @tr: The associated trace array for the error (NULL for top level array)
8174  * @loc: A string describing where the error occurred
8175  * @cmd: The tracing command that caused the error
8176  * @errs: The array of loc-specific static error strings
8177  * @type: The index into errs[], which produces the specific static err string
8178  * @pos: The position the caret should be placed in the cmd
8179  *
8180  * Writes an error into tracing/error_log of the form:
8181  *
8182  * <loc>: error: <text>
8183  *   Command: <cmd>
8184  *              ^
8185  *
8186  * tracing/error_log is a small log file containing the last
8187  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8188  * unless there has been a tracing error, and the error log can be
8189  * cleared and have its memory freed by writing the empty string in
8190  * truncation mode to it i.e. echo > tracing/error_log.
8191  *
8192  * NOTE: the @errs array along with the @type param are used to
8193  * produce a static error string - this string is not copied and saved
8194  * when the error is logged - only a pointer to it is saved.  See
8195  * existing callers for examples of how static strings are typically
8196  * defined for use with tracing_log_err().
8197  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8198 void tracing_log_err(struct trace_array *tr,
8199 		     const char *loc, const char *cmd,
8200 		     const char **errs, u8 type, u16 pos)
8201 {
8202 	struct tracing_log_err *err;
8203 	int len = 0;
8204 
8205 	if (!tr)
8206 		tr = &global_trace;
8207 
8208 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8209 
8210 	guard(mutex)(&tracing_err_log_lock);
8211 
8212 	err = get_tracing_log_err(tr, len);
8213 	if (PTR_ERR(err) == -ENOMEM)
8214 		return;
8215 
8216 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8217 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8218 
8219 	err->info.errs = errs;
8220 	err->info.type = type;
8221 	err->info.pos = pos;
8222 	err->info.ts = local_clock();
8223 
8224 	list_add_tail(&err->list, &tr->err_log);
8225 }
8226 
clear_tracing_err_log(struct trace_array * tr)8227 static void clear_tracing_err_log(struct trace_array *tr)
8228 {
8229 	struct tracing_log_err *err, *next;
8230 
8231 	guard(mutex)(&tracing_err_log_lock);
8232 
8233 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8234 		list_del(&err->list);
8235 		free_tracing_log_err(err);
8236 	}
8237 
8238 	tr->n_err_log_entries = 0;
8239 }
8240 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8241 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8242 {
8243 	struct trace_array *tr = m->private;
8244 
8245 	mutex_lock(&tracing_err_log_lock);
8246 
8247 	return seq_list_start(&tr->err_log, *pos);
8248 }
8249 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8250 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8251 {
8252 	struct trace_array *tr = m->private;
8253 
8254 	return seq_list_next(v, &tr->err_log, pos);
8255 }
8256 
tracing_err_log_seq_stop(struct seq_file * m,void * v)8257 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8258 {
8259 	mutex_unlock(&tracing_err_log_lock);
8260 }
8261 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8262 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8263 {
8264 	u16 i;
8265 
8266 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8267 		seq_putc(m, ' ');
8268 	for (i = 0; i < pos; i++)
8269 		seq_putc(m, ' ');
8270 	seq_puts(m, "^\n");
8271 }
8272 
tracing_err_log_seq_show(struct seq_file * m,void * v)8273 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8274 {
8275 	struct tracing_log_err *err = v;
8276 
8277 	if (err) {
8278 		const char *err_text = err->info.errs[err->info.type];
8279 		u64 sec = err->info.ts;
8280 		u32 nsec;
8281 
8282 		nsec = do_div(sec, NSEC_PER_SEC);
8283 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8284 			   err->loc, err_text);
8285 		seq_printf(m, "%s", err->cmd);
8286 		tracing_err_log_show_pos(m, err->info.pos);
8287 	}
8288 
8289 	return 0;
8290 }
8291 
8292 static const struct seq_operations tracing_err_log_seq_ops = {
8293 	.start  = tracing_err_log_seq_start,
8294 	.next   = tracing_err_log_seq_next,
8295 	.stop   = tracing_err_log_seq_stop,
8296 	.show   = tracing_err_log_seq_show
8297 };
8298 
tracing_err_log_open(struct inode * inode,struct file * file)8299 static int tracing_err_log_open(struct inode *inode, struct file *file)
8300 {
8301 	struct trace_array *tr = inode->i_private;
8302 	int ret = 0;
8303 
8304 	ret = tracing_check_open_get_tr(tr);
8305 	if (ret)
8306 		return ret;
8307 
8308 	/* If this file was opened for write, then erase contents */
8309 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8310 		clear_tracing_err_log(tr);
8311 
8312 	if (file->f_mode & FMODE_READ) {
8313 		ret = seq_open(file, &tracing_err_log_seq_ops);
8314 		if (!ret) {
8315 			struct seq_file *m = file->private_data;
8316 			m->private = tr;
8317 		} else {
8318 			trace_array_put(tr);
8319 		}
8320 	}
8321 	return ret;
8322 }
8323 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8324 static ssize_t tracing_err_log_write(struct file *file,
8325 				     const char __user *buffer,
8326 				     size_t count, loff_t *ppos)
8327 {
8328 	return count;
8329 }
8330 
tracing_err_log_release(struct inode * inode,struct file * file)8331 static int tracing_err_log_release(struct inode *inode, struct file *file)
8332 {
8333 	struct trace_array *tr = inode->i_private;
8334 
8335 	trace_array_put(tr);
8336 
8337 	if (file->f_mode & FMODE_READ)
8338 		seq_release(inode, file);
8339 
8340 	return 0;
8341 }
8342 
8343 static const struct file_operations tracing_err_log_fops = {
8344 	.open           = tracing_err_log_open,
8345 	.write		= tracing_err_log_write,
8346 	.read           = seq_read,
8347 	.llseek         = tracing_lseek,
8348 	.release        = tracing_err_log_release,
8349 };
8350 
tracing_buffers_open(struct inode * inode,struct file * filp)8351 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8352 {
8353 	struct trace_array *tr = inode->i_private;
8354 	struct ftrace_buffer_info *info;
8355 	int ret;
8356 
8357 	ret = tracing_check_open_get_tr(tr);
8358 	if (ret)
8359 		return ret;
8360 
8361 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8362 	if (!info) {
8363 		trace_array_put(tr);
8364 		return -ENOMEM;
8365 	}
8366 
8367 	mutex_lock(&trace_types_lock);
8368 
8369 	info->iter.tr		= tr;
8370 	info->iter.cpu_file	= tracing_get_cpu(inode);
8371 	info->iter.trace	= tr->current_trace;
8372 	info->iter.array_buffer = &tr->array_buffer;
8373 	info->spare		= NULL;
8374 	/* Force reading ring buffer for first read */
8375 	info->read		= (unsigned int)-1;
8376 
8377 	filp->private_data = info;
8378 
8379 	tr->trace_ref++;
8380 
8381 	mutex_unlock(&trace_types_lock);
8382 
8383 	ret = nonseekable_open(inode, filp);
8384 	if (ret < 0)
8385 		trace_array_put(tr);
8386 
8387 	return ret;
8388 }
8389 
8390 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8391 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8392 {
8393 	struct ftrace_buffer_info *info = filp->private_data;
8394 	struct trace_iterator *iter = &info->iter;
8395 
8396 	return trace_poll(iter, filp, poll_table);
8397 }
8398 
8399 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8400 tracing_buffers_read(struct file *filp, char __user *ubuf,
8401 		     size_t count, loff_t *ppos)
8402 {
8403 	struct ftrace_buffer_info *info = filp->private_data;
8404 	struct trace_iterator *iter = &info->iter;
8405 	void *trace_data;
8406 	int page_size;
8407 	ssize_t ret = 0;
8408 	ssize_t size;
8409 
8410 	if (!count)
8411 		return 0;
8412 
8413 #ifdef CONFIG_TRACER_MAX_TRACE
8414 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8415 		return -EBUSY;
8416 #endif
8417 
8418 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8419 
8420 	/* Make sure the spare matches the current sub buffer size */
8421 	if (info->spare) {
8422 		if (page_size != info->spare_size) {
8423 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8424 						   info->spare_cpu, info->spare);
8425 			info->spare = NULL;
8426 		}
8427 	}
8428 
8429 	if (!info->spare) {
8430 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8431 							  iter->cpu_file);
8432 		if (IS_ERR(info->spare)) {
8433 			ret = PTR_ERR(info->spare);
8434 			info->spare = NULL;
8435 		} else {
8436 			info->spare_cpu = iter->cpu_file;
8437 			info->spare_size = page_size;
8438 		}
8439 	}
8440 	if (!info->spare)
8441 		return ret;
8442 
8443 	/* Do we have previous read data to read? */
8444 	if (info->read < page_size)
8445 		goto read;
8446 
8447  again:
8448 	trace_access_lock(iter->cpu_file);
8449 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8450 				    info->spare,
8451 				    count,
8452 				    iter->cpu_file, 0);
8453 	trace_access_unlock(iter->cpu_file);
8454 
8455 	if (ret < 0) {
8456 		if (trace_empty(iter) && !iter->closed) {
8457 			if (update_last_data_if_empty(iter->tr))
8458 				return 0;
8459 
8460 			if ((filp->f_flags & O_NONBLOCK))
8461 				return -EAGAIN;
8462 
8463 			ret = wait_on_pipe(iter, 0);
8464 			if (ret)
8465 				return ret;
8466 
8467 			goto again;
8468 		}
8469 		return 0;
8470 	}
8471 
8472 	info->read = 0;
8473  read:
8474 	size = page_size - info->read;
8475 	if (size > count)
8476 		size = count;
8477 	trace_data = ring_buffer_read_page_data(info->spare);
8478 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8479 	if (ret == size)
8480 		return -EFAULT;
8481 
8482 	size -= ret;
8483 
8484 	*ppos += size;
8485 	info->read += size;
8486 
8487 	return size;
8488 }
8489 
tracing_buffers_flush(struct file * file,fl_owner_t id)8490 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8491 {
8492 	struct ftrace_buffer_info *info = file->private_data;
8493 	struct trace_iterator *iter = &info->iter;
8494 
8495 	iter->closed = true;
8496 	/* Make sure the waiters see the new wait_index */
8497 	(void)atomic_fetch_inc_release(&iter->wait_index);
8498 
8499 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8500 
8501 	return 0;
8502 }
8503 
tracing_buffers_release(struct inode * inode,struct file * file)8504 static int tracing_buffers_release(struct inode *inode, struct file *file)
8505 {
8506 	struct ftrace_buffer_info *info = file->private_data;
8507 	struct trace_iterator *iter = &info->iter;
8508 
8509 	guard(mutex)(&trace_types_lock);
8510 
8511 	iter->tr->trace_ref--;
8512 
8513 	__trace_array_put(iter->tr);
8514 
8515 	if (info->spare)
8516 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8517 					   info->spare_cpu, info->spare);
8518 	kvfree(info);
8519 
8520 	return 0;
8521 }
8522 
8523 struct buffer_ref {
8524 	struct trace_buffer	*buffer;
8525 	void			*page;
8526 	int			cpu;
8527 	refcount_t		refcount;
8528 };
8529 
buffer_ref_release(struct buffer_ref * ref)8530 static void buffer_ref_release(struct buffer_ref *ref)
8531 {
8532 	if (!refcount_dec_and_test(&ref->refcount))
8533 		return;
8534 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8535 	kfree(ref);
8536 }
8537 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8538 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8539 				    struct pipe_buffer *buf)
8540 {
8541 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8542 
8543 	buffer_ref_release(ref);
8544 	buf->private = 0;
8545 }
8546 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8547 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8548 				struct pipe_buffer *buf)
8549 {
8550 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8551 
8552 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8553 		return false;
8554 
8555 	refcount_inc(&ref->refcount);
8556 	return true;
8557 }
8558 
8559 /* Pipe buffer operations for a buffer. */
8560 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8561 	.release		= buffer_pipe_buf_release,
8562 	.get			= buffer_pipe_buf_get,
8563 };
8564 
8565 /*
8566  * Callback from splice_to_pipe(), if we need to release some pages
8567  * at the end of the spd in case we error'ed out in filling the pipe.
8568  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8569 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8570 {
8571 	struct buffer_ref *ref =
8572 		(struct buffer_ref *)spd->partial[i].private;
8573 
8574 	buffer_ref_release(ref);
8575 	spd->partial[i].private = 0;
8576 }
8577 
8578 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8579 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8580 			    struct pipe_inode_info *pipe, size_t len,
8581 			    unsigned int flags)
8582 {
8583 	struct ftrace_buffer_info *info = file->private_data;
8584 	struct trace_iterator *iter = &info->iter;
8585 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8586 	struct page *pages_def[PIPE_DEF_BUFFERS];
8587 	struct splice_pipe_desc spd = {
8588 		.pages		= pages_def,
8589 		.partial	= partial_def,
8590 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8591 		.ops		= &buffer_pipe_buf_ops,
8592 		.spd_release	= buffer_spd_release,
8593 	};
8594 	struct buffer_ref *ref;
8595 	bool woken = false;
8596 	int page_size;
8597 	int entries, i;
8598 	ssize_t ret = 0;
8599 
8600 #ifdef CONFIG_TRACER_MAX_TRACE
8601 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8602 		return -EBUSY;
8603 #endif
8604 
8605 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8606 	if (*ppos & (page_size - 1))
8607 		return -EINVAL;
8608 
8609 	if (len & (page_size - 1)) {
8610 		if (len < page_size)
8611 			return -EINVAL;
8612 		len &= (~(page_size - 1));
8613 	}
8614 
8615 	if (splice_grow_spd(pipe, &spd))
8616 		return -ENOMEM;
8617 
8618  again:
8619 	trace_access_lock(iter->cpu_file);
8620 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8621 
8622 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8623 		struct page *page;
8624 		int r;
8625 
8626 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8627 		if (!ref) {
8628 			ret = -ENOMEM;
8629 			break;
8630 		}
8631 
8632 		refcount_set(&ref->refcount, 1);
8633 		ref->buffer = iter->array_buffer->buffer;
8634 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8635 		if (IS_ERR(ref->page)) {
8636 			ret = PTR_ERR(ref->page);
8637 			ref->page = NULL;
8638 			kfree(ref);
8639 			break;
8640 		}
8641 		ref->cpu = iter->cpu_file;
8642 
8643 		r = ring_buffer_read_page(ref->buffer, ref->page,
8644 					  len, iter->cpu_file, 1);
8645 		if (r < 0) {
8646 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8647 						   ref->page);
8648 			kfree(ref);
8649 			break;
8650 		}
8651 
8652 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8653 
8654 		spd.pages[i] = page;
8655 		spd.partial[i].len = page_size;
8656 		spd.partial[i].offset = 0;
8657 		spd.partial[i].private = (unsigned long)ref;
8658 		spd.nr_pages++;
8659 		*ppos += page_size;
8660 
8661 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8662 	}
8663 
8664 	trace_access_unlock(iter->cpu_file);
8665 	spd.nr_pages = i;
8666 
8667 	/* did we read anything? */
8668 	if (!spd.nr_pages) {
8669 
8670 		if (ret)
8671 			goto out;
8672 
8673 		if (woken)
8674 			goto out;
8675 
8676 		ret = -EAGAIN;
8677 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8678 			goto out;
8679 
8680 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8681 		if (ret)
8682 			goto out;
8683 
8684 		/* No need to wait after waking up when tracing is off */
8685 		if (!tracer_tracing_is_on(iter->tr))
8686 			goto out;
8687 
8688 		/* Iterate one more time to collect any new data then exit */
8689 		woken = true;
8690 
8691 		goto again;
8692 	}
8693 
8694 	ret = splice_to_pipe(pipe, &spd);
8695 out:
8696 	splice_shrink_spd(&spd);
8697 
8698 	return ret;
8699 }
8700 
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8701 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8702 {
8703 	struct ftrace_buffer_info *info = file->private_data;
8704 	struct trace_iterator *iter = &info->iter;
8705 	int err;
8706 
8707 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8708 		if (!(file->f_flags & O_NONBLOCK)) {
8709 			err = ring_buffer_wait(iter->array_buffer->buffer,
8710 					       iter->cpu_file,
8711 					       iter->tr->buffer_percent,
8712 					       NULL, NULL);
8713 			if (err)
8714 				return err;
8715 		}
8716 
8717 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8718 						  iter->cpu_file);
8719 	} else if (cmd) {
8720 		return -ENOTTY;
8721 	}
8722 
8723 	/*
8724 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8725 	 * waiters
8726 	 */
8727 	guard(mutex)(&trace_types_lock);
8728 
8729 	/* Make sure the waiters see the new wait_index */
8730 	(void)atomic_fetch_inc_release(&iter->wait_index);
8731 
8732 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8733 
8734 	return 0;
8735 }
8736 
8737 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8738 static int get_snapshot_map(struct trace_array *tr)
8739 {
8740 	int err = 0;
8741 
8742 	/*
8743 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8744 	 * take trace_types_lock. Instead use the specific
8745 	 * snapshot_trigger_lock.
8746 	 */
8747 	spin_lock(&tr->snapshot_trigger_lock);
8748 
8749 	if (tr->snapshot || tr->mapped == UINT_MAX)
8750 		err = -EBUSY;
8751 	else
8752 		tr->mapped++;
8753 
8754 	spin_unlock(&tr->snapshot_trigger_lock);
8755 
8756 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8757 	if (tr->mapped == 1)
8758 		synchronize_rcu();
8759 
8760 	return err;
8761 
8762 }
put_snapshot_map(struct trace_array * tr)8763 static void put_snapshot_map(struct trace_array *tr)
8764 {
8765 	spin_lock(&tr->snapshot_trigger_lock);
8766 	if (!WARN_ON(!tr->mapped))
8767 		tr->mapped--;
8768 	spin_unlock(&tr->snapshot_trigger_lock);
8769 }
8770 #else
get_snapshot_map(struct trace_array * tr)8771 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8772 static inline void put_snapshot_map(struct trace_array *tr) { }
8773 #endif
8774 
tracing_buffers_mmap_close(struct vm_area_struct * vma)8775 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8776 {
8777 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8778 	struct trace_iterator *iter = &info->iter;
8779 
8780 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8781 	put_snapshot_map(iter->tr);
8782 }
8783 
8784 static const struct vm_operations_struct tracing_buffers_vmops = {
8785 	.close		= tracing_buffers_mmap_close,
8786 };
8787 
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8788 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8789 {
8790 	struct ftrace_buffer_info *info = filp->private_data;
8791 	struct trace_iterator *iter = &info->iter;
8792 	int ret = 0;
8793 
8794 	/* A memmap'ed buffer is not supported for user space mmap */
8795 	if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8796 		return -ENODEV;
8797 
8798 	ret = get_snapshot_map(iter->tr);
8799 	if (ret)
8800 		return ret;
8801 
8802 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8803 	if (ret)
8804 		put_snapshot_map(iter->tr);
8805 
8806 	vma->vm_ops = &tracing_buffers_vmops;
8807 
8808 	return ret;
8809 }
8810 
8811 static const struct file_operations tracing_buffers_fops = {
8812 	.open		= tracing_buffers_open,
8813 	.read		= tracing_buffers_read,
8814 	.poll		= tracing_buffers_poll,
8815 	.release	= tracing_buffers_release,
8816 	.flush		= tracing_buffers_flush,
8817 	.splice_read	= tracing_buffers_splice_read,
8818 	.unlocked_ioctl = tracing_buffers_ioctl,
8819 	.mmap		= tracing_buffers_mmap,
8820 };
8821 
8822 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8823 tracing_stats_read(struct file *filp, char __user *ubuf,
8824 		   size_t count, loff_t *ppos)
8825 {
8826 	struct inode *inode = file_inode(filp);
8827 	struct trace_array *tr = inode->i_private;
8828 	struct array_buffer *trace_buf = &tr->array_buffer;
8829 	int cpu = tracing_get_cpu(inode);
8830 	struct trace_seq *s;
8831 	unsigned long cnt;
8832 	unsigned long long t;
8833 	unsigned long usec_rem;
8834 
8835 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8836 	if (!s)
8837 		return -ENOMEM;
8838 
8839 	trace_seq_init(s);
8840 
8841 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8842 	trace_seq_printf(s, "entries: %ld\n", cnt);
8843 
8844 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8845 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8846 
8847 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8848 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8849 
8850 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8851 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8852 
8853 	if (trace_clocks[tr->clock_id].in_ns) {
8854 		/* local or global for trace_clock */
8855 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8856 		usec_rem = do_div(t, USEC_PER_SEC);
8857 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8858 								t, usec_rem);
8859 
8860 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8861 		usec_rem = do_div(t, USEC_PER_SEC);
8862 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8863 	} else {
8864 		/* counter or tsc mode for trace_clock */
8865 		trace_seq_printf(s, "oldest event ts: %llu\n",
8866 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8867 
8868 		trace_seq_printf(s, "now ts: %llu\n",
8869 				ring_buffer_time_stamp(trace_buf->buffer));
8870 	}
8871 
8872 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8873 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8874 
8875 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8876 	trace_seq_printf(s, "read events: %ld\n", cnt);
8877 
8878 	count = simple_read_from_buffer(ubuf, count, ppos,
8879 					s->buffer, trace_seq_used(s));
8880 
8881 	kfree(s);
8882 
8883 	return count;
8884 }
8885 
8886 static const struct file_operations tracing_stats_fops = {
8887 	.open		= tracing_open_generic_tr,
8888 	.read		= tracing_stats_read,
8889 	.llseek		= generic_file_llseek,
8890 	.release	= tracing_release_generic_tr,
8891 };
8892 
8893 #ifdef CONFIG_DYNAMIC_FTRACE
8894 
8895 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8896 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8897 		  size_t cnt, loff_t *ppos)
8898 {
8899 	ssize_t ret;
8900 	char *buf;
8901 	int r;
8902 
8903 	/* 512 should be plenty to hold the amount needed */
8904 #define DYN_INFO_BUF_SIZE	512
8905 
8906 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8907 	if (!buf)
8908 		return -ENOMEM;
8909 
8910 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8911 		      "%ld pages:%ld groups: %ld\n"
8912 		      "ftrace boot update time = %llu (ns)\n"
8913 		      "ftrace module total update time = %llu (ns)\n",
8914 		      ftrace_update_tot_cnt,
8915 		      ftrace_number_of_pages,
8916 		      ftrace_number_of_groups,
8917 		      ftrace_update_time,
8918 		      ftrace_total_mod_time);
8919 
8920 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8921 	kfree(buf);
8922 	return ret;
8923 }
8924 
8925 static const struct file_operations tracing_dyn_info_fops = {
8926 	.open		= tracing_open_generic,
8927 	.read		= tracing_read_dyn_info,
8928 	.llseek		= generic_file_llseek,
8929 };
8930 #endif /* CONFIG_DYNAMIC_FTRACE */
8931 
8932 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8933 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8934 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8935 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8936 		void *data)
8937 {
8938 	tracing_snapshot_instance(tr);
8939 }
8940 
8941 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8942 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8943 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8944 		      void *data)
8945 {
8946 	struct ftrace_func_mapper *mapper = data;
8947 	long *count = NULL;
8948 
8949 	if (mapper)
8950 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8951 
8952 	if (count) {
8953 
8954 		if (*count <= 0)
8955 			return;
8956 
8957 		(*count)--;
8958 	}
8959 
8960 	tracing_snapshot_instance(tr);
8961 }
8962 
8963 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8964 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8965 		      struct ftrace_probe_ops *ops, void *data)
8966 {
8967 	struct ftrace_func_mapper *mapper = data;
8968 	long *count = NULL;
8969 
8970 	seq_printf(m, "%ps:", (void *)ip);
8971 
8972 	seq_puts(m, "snapshot");
8973 
8974 	if (mapper)
8975 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8976 
8977 	if (count)
8978 		seq_printf(m, ":count=%ld\n", *count);
8979 	else
8980 		seq_puts(m, ":unlimited\n");
8981 
8982 	return 0;
8983 }
8984 
8985 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8986 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8987 		     unsigned long ip, void *init_data, void **data)
8988 {
8989 	struct ftrace_func_mapper *mapper = *data;
8990 
8991 	if (!mapper) {
8992 		mapper = allocate_ftrace_func_mapper();
8993 		if (!mapper)
8994 			return -ENOMEM;
8995 		*data = mapper;
8996 	}
8997 
8998 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8999 }
9000 
9001 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)9002 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
9003 		     unsigned long ip, void *data)
9004 {
9005 	struct ftrace_func_mapper *mapper = data;
9006 
9007 	if (!ip) {
9008 		if (!mapper)
9009 			return;
9010 		free_ftrace_func_mapper(mapper, NULL);
9011 		return;
9012 	}
9013 
9014 	ftrace_func_mapper_remove_ip(mapper, ip);
9015 }
9016 
9017 static struct ftrace_probe_ops snapshot_probe_ops = {
9018 	.func			= ftrace_snapshot,
9019 	.print			= ftrace_snapshot_print,
9020 };
9021 
9022 static struct ftrace_probe_ops snapshot_count_probe_ops = {
9023 	.func			= ftrace_count_snapshot,
9024 	.print			= ftrace_snapshot_print,
9025 	.init			= ftrace_snapshot_init,
9026 	.free			= ftrace_snapshot_free,
9027 };
9028 
9029 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)9030 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
9031 			       char *glob, char *cmd, char *param, int enable)
9032 {
9033 	struct ftrace_probe_ops *ops;
9034 	void *count = (void *)-1;
9035 	char *number;
9036 	int ret;
9037 
9038 	if (!tr)
9039 		return -ENODEV;
9040 
9041 	/* hash funcs only work with set_ftrace_filter */
9042 	if (!enable)
9043 		return -EINVAL;
9044 
9045 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
9046 
9047 	if (glob[0] == '!') {
9048 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
9049 		if (!ret)
9050 			tracing_disarm_snapshot(tr);
9051 
9052 		return ret;
9053 	}
9054 
9055 	if (!param)
9056 		goto out_reg;
9057 
9058 	number = strsep(&param, ":");
9059 
9060 	if (!strlen(number))
9061 		goto out_reg;
9062 
9063 	/*
9064 	 * We use the callback data field (which is a pointer)
9065 	 * as our counter.
9066 	 */
9067 	ret = kstrtoul(number, 0, (unsigned long *)&count);
9068 	if (ret)
9069 		return ret;
9070 
9071  out_reg:
9072 	ret = tracing_arm_snapshot(tr);
9073 	if (ret < 0)
9074 		return ret;
9075 
9076 	ret = register_ftrace_function_probe(glob, tr, ops, count);
9077 	if (ret < 0)
9078 		tracing_disarm_snapshot(tr);
9079 
9080 	return ret < 0 ? ret : 0;
9081 }
9082 
9083 static struct ftrace_func_command ftrace_snapshot_cmd = {
9084 	.name			= "snapshot",
9085 	.func			= ftrace_trace_snapshot_callback,
9086 };
9087 
register_snapshot_cmd(void)9088 static __init int register_snapshot_cmd(void)
9089 {
9090 	return register_ftrace_command(&ftrace_snapshot_cmd);
9091 }
9092 #else
register_snapshot_cmd(void)9093 static inline __init int register_snapshot_cmd(void) { return 0; }
9094 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
9095 
tracing_get_dentry(struct trace_array * tr)9096 static struct dentry *tracing_get_dentry(struct trace_array *tr)
9097 {
9098 	/* Top directory uses NULL as the parent */
9099 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
9100 		return NULL;
9101 
9102 	if (WARN_ON(!tr->dir))
9103 		return ERR_PTR(-ENODEV);
9104 
9105 	/* All sub buffers have a descriptor */
9106 	return tr->dir;
9107 }
9108 
tracing_dentry_percpu(struct trace_array * tr,int cpu)9109 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
9110 {
9111 	struct dentry *d_tracer;
9112 
9113 	if (tr->percpu_dir)
9114 		return tr->percpu_dir;
9115 
9116 	d_tracer = tracing_get_dentry(tr);
9117 	if (IS_ERR(d_tracer))
9118 		return NULL;
9119 
9120 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
9121 
9122 	MEM_FAIL(!tr->percpu_dir,
9123 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
9124 
9125 	return tr->percpu_dir;
9126 }
9127 
9128 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)9129 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
9130 		      void *data, long cpu, const struct file_operations *fops)
9131 {
9132 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
9133 
9134 	if (ret) /* See tracing_get_cpu() */
9135 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
9136 	return ret;
9137 }
9138 
9139 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)9140 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
9141 {
9142 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
9143 	struct dentry *d_cpu;
9144 	char cpu_dir[30]; /* 30 characters should be more than enough */
9145 
9146 	if (!d_percpu)
9147 		return;
9148 
9149 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
9150 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
9151 	if (!d_cpu) {
9152 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
9153 		return;
9154 	}
9155 
9156 	/* per cpu trace_pipe */
9157 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
9158 				tr, cpu, &tracing_pipe_fops);
9159 
9160 	/* per cpu trace */
9161 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
9162 				tr, cpu, &tracing_fops);
9163 
9164 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
9165 				tr, cpu, &tracing_buffers_fops);
9166 
9167 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
9168 				tr, cpu, &tracing_stats_fops);
9169 
9170 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
9171 				tr, cpu, &tracing_entries_fops);
9172 
9173 	if (tr->range_addr_start)
9174 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
9175 				      tr, cpu, &tracing_buffer_meta_fops);
9176 #ifdef CONFIG_TRACER_SNAPSHOT
9177 	if (!tr->range_addr_start) {
9178 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9179 				      tr, cpu, &snapshot_fops);
9180 
9181 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9182 				      tr, cpu, &snapshot_raw_fops);
9183 	}
9184 #endif
9185 }
9186 
9187 #ifdef CONFIG_FTRACE_SELFTEST
9188 /* Let selftest have access to static functions in this file */
9189 #include "trace_selftest.c"
9190 #endif
9191 
9192 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9193 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9194 			loff_t *ppos)
9195 {
9196 	struct trace_option_dentry *topt = filp->private_data;
9197 	char *buf;
9198 
9199 	if (topt->flags->val & topt->opt->bit)
9200 		buf = "1\n";
9201 	else
9202 		buf = "0\n";
9203 
9204 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9205 }
9206 
9207 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9208 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9209 			 loff_t *ppos)
9210 {
9211 	struct trace_option_dentry *topt = filp->private_data;
9212 	unsigned long val;
9213 	int ret;
9214 
9215 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9216 	if (ret)
9217 		return ret;
9218 
9219 	if (val != 0 && val != 1)
9220 		return -EINVAL;
9221 
9222 	if (!!(topt->flags->val & topt->opt->bit) != val) {
9223 		guard(mutex)(&trace_types_lock);
9224 		ret = __set_tracer_option(topt->tr, topt->flags,
9225 					  topt->opt, !val);
9226 		if (ret)
9227 			return ret;
9228 	}
9229 
9230 	*ppos += cnt;
9231 
9232 	return cnt;
9233 }
9234 
tracing_open_options(struct inode * inode,struct file * filp)9235 static int tracing_open_options(struct inode *inode, struct file *filp)
9236 {
9237 	struct trace_option_dentry *topt = inode->i_private;
9238 	int ret;
9239 
9240 	ret = tracing_check_open_get_tr(topt->tr);
9241 	if (ret)
9242 		return ret;
9243 
9244 	filp->private_data = inode->i_private;
9245 	return 0;
9246 }
9247 
tracing_release_options(struct inode * inode,struct file * file)9248 static int tracing_release_options(struct inode *inode, struct file *file)
9249 {
9250 	struct trace_option_dentry *topt = file->private_data;
9251 
9252 	trace_array_put(topt->tr);
9253 	return 0;
9254 }
9255 
9256 static const struct file_operations trace_options_fops = {
9257 	.open = tracing_open_options,
9258 	.read = trace_options_read,
9259 	.write = trace_options_write,
9260 	.llseek	= generic_file_llseek,
9261 	.release = tracing_release_options,
9262 };
9263 
9264 /*
9265  * In order to pass in both the trace_array descriptor as well as the index
9266  * to the flag that the trace option file represents, the trace_array
9267  * has a character array of trace_flags_index[], which holds the index
9268  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9269  * The address of this character array is passed to the flag option file
9270  * read/write callbacks.
9271  *
9272  * In order to extract both the index and the trace_array descriptor,
9273  * get_tr_index() uses the following algorithm.
9274  *
9275  *   idx = *ptr;
9276  *
9277  * As the pointer itself contains the address of the index (remember
9278  * index[1] == 1).
9279  *
9280  * Then to get the trace_array descriptor, by subtracting that index
9281  * from the ptr, we get to the start of the index itself.
9282  *
9283  *   ptr - idx == &index[0]
9284  *
9285  * Then a simple container_of() from that pointer gets us to the
9286  * trace_array descriptor.
9287  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9288 static void get_tr_index(void *data, struct trace_array **ptr,
9289 			 unsigned int *pindex)
9290 {
9291 	*pindex = *(unsigned char *)data;
9292 
9293 	*ptr = container_of(data - *pindex, struct trace_array,
9294 			    trace_flags_index);
9295 }
9296 
9297 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9298 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9299 			loff_t *ppos)
9300 {
9301 	void *tr_index = filp->private_data;
9302 	struct trace_array *tr;
9303 	unsigned int index;
9304 	char *buf;
9305 
9306 	get_tr_index(tr_index, &tr, &index);
9307 
9308 	if (tr->trace_flags & (1 << index))
9309 		buf = "1\n";
9310 	else
9311 		buf = "0\n";
9312 
9313 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9314 }
9315 
9316 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9317 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9318 			 loff_t *ppos)
9319 {
9320 	void *tr_index = filp->private_data;
9321 	struct trace_array *tr;
9322 	unsigned int index;
9323 	unsigned long val;
9324 	int ret;
9325 
9326 	get_tr_index(tr_index, &tr, &index);
9327 
9328 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9329 	if (ret)
9330 		return ret;
9331 
9332 	if (val != 0 && val != 1)
9333 		return -EINVAL;
9334 
9335 	mutex_lock(&event_mutex);
9336 	mutex_lock(&trace_types_lock);
9337 	ret = set_tracer_flag(tr, 1 << index, val);
9338 	mutex_unlock(&trace_types_lock);
9339 	mutex_unlock(&event_mutex);
9340 
9341 	if (ret < 0)
9342 		return ret;
9343 
9344 	*ppos += cnt;
9345 
9346 	return cnt;
9347 }
9348 
9349 static const struct file_operations trace_options_core_fops = {
9350 	.open = tracing_open_generic,
9351 	.read = trace_options_core_read,
9352 	.write = trace_options_core_write,
9353 	.llseek = generic_file_llseek,
9354 };
9355 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9356 struct dentry *trace_create_file(const char *name,
9357 				 umode_t mode,
9358 				 struct dentry *parent,
9359 				 void *data,
9360 				 const struct file_operations *fops)
9361 {
9362 	struct dentry *ret;
9363 
9364 	ret = tracefs_create_file(name, mode, parent, data, fops);
9365 	if (!ret)
9366 		pr_warn("Could not create tracefs '%s' entry\n", name);
9367 
9368 	return ret;
9369 }
9370 
9371 
trace_options_init_dentry(struct trace_array * tr)9372 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9373 {
9374 	struct dentry *d_tracer;
9375 
9376 	if (tr->options)
9377 		return tr->options;
9378 
9379 	d_tracer = tracing_get_dentry(tr);
9380 	if (IS_ERR(d_tracer))
9381 		return NULL;
9382 
9383 	tr->options = tracefs_create_dir("options", d_tracer);
9384 	if (!tr->options) {
9385 		pr_warn("Could not create tracefs directory 'options'\n");
9386 		return NULL;
9387 	}
9388 
9389 	return tr->options;
9390 }
9391 
9392 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9393 create_trace_option_file(struct trace_array *tr,
9394 			 struct trace_option_dentry *topt,
9395 			 struct tracer_flags *flags,
9396 			 struct tracer_opt *opt)
9397 {
9398 	struct dentry *t_options;
9399 
9400 	t_options = trace_options_init_dentry(tr);
9401 	if (!t_options)
9402 		return;
9403 
9404 	topt->flags = flags;
9405 	topt->opt = opt;
9406 	topt->tr = tr;
9407 
9408 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9409 					t_options, topt, &trace_options_fops);
9410 
9411 }
9412 
9413 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9414 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9415 {
9416 	struct trace_option_dentry *topts;
9417 	struct trace_options *tr_topts;
9418 	struct tracer_flags *flags;
9419 	struct tracer_opt *opts;
9420 	int cnt;
9421 	int i;
9422 
9423 	if (!tracer)
9424 		return;
9425 
9426 	flags = tracer->flags;
9427 
9428 	if (!flags || !flags->opts)
9429 		return;
9430 
9431 	/*
9432 	 * If this is an instance, only create flags for tracers
9433 	 * the instance may have.
9434 	 */
9435 	if (!trace_ok_for_array(tracer, tr))
9436 		return;
9437 
9438 	for (i = 0; i < tr->nr_topts; i++) {
9439 		/* Make sure there's no duplicate flags. */
9440 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9441 			return;
9442 	}
9443 
9444 	opts = flags->opts;
9445 
9446 	for (cnt = 0; opts[cnt].name; cnt++)
9447 		;
9448 
9449 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9450 	if (!topts)
9451 		return;
9452 
9453 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9454 			    GFP_KERNEL);
9455 	if (!tr_topts) {
9456 		kfree(topts);
9457 		return;
9458 	}
9459 
9460 	tr->topts = tr_topts;
9461 	tr->topts[tr->nr_topts].tracer = tracer;
9462 	tr->topts[tr->nr_topts].topts = topts;
9463 	tr->nr_topts++;
9464 
9465 	for (cnt = 0; opts[cnt].name; cnt++) {
9466 		create_trace_option_file(tr, &topts[cnt], flags,
9467 					 &opts[cnt]);
9468 		MEM_FAIL(topts[cnt].entry == NULL,
9469 			  "Failed to create trace option: %s",
9470 			  opts[cnt].name);
9471 	}
9472 }
9473 
9474 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9475 create_trace_option_core_file(struct trace_array *tr,
9476 			      const char *option, long index)
9477 {
9478 	struct dentry *t_options;
9479 
9480 	t_options = trace_options_init_dentry(tr);
9481 	if (!t_options)
9482 		return NULL;
9483 
9484 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9485 				 (void *)&tr->trace_flags_index[index],
9486 				 &trace_options_core_fops);
9487 }
9488 
create_trace_options_dir(struct trace_array * tr)9489 static void create_trace_options_dir(struct trace_array *tr)
9490 {
9491 	struct dentry *t_options;
9492 	bool top_level = tr == &global_trace;
9493 	int i;
9494 
9495 	t_options = trace_options_init_dentry(tr);
9496 	if (!t_options)
9497 		return;
9498 
9499 	for (i = 0; trace_options[i]; i++) {
9500 		if (top_level ||
9501 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9502 			create_trace_option_core_file(tr, trace_options[i], i);
9503 	}
9504 }
9505 
9506 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9507 rb_simple_read(struct file *filp, char __user *ubuf,
9508 	       size_t cnt, loff_t *ppos)
9509 {
9510 	struct trace_array *tr = filp->private_data;
9511 	char buf[64];
9512 	int r;
9513 
9514 	r = tracer_tracing_is_on(tr);
9515 	r = sprintf(buf, "%d\n", r);
9516 
9517 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9518 }
9519 
9520 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9521 rb_simple_write(struct file *filp, const char __user *ubuf,
9522 		size_t cnt, loff_t *ppos)
9523 {
9524 	struct trace_array *tr = filp->private_data;
9525 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9526 	unsigned long val;
9527 	int ret;
9528 
9529 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9530 	if (ret)
9531 		return ret;
9532 
9533 	if (buffer) {
9534 		guard(mutex)(&trace_types_lock);
9535 		if (!!val == tracer_tracing_is_on(tr)) {
9536 			val = 0; /* do nothing */
9537 		} else if (val) {
9538 			tracer_tracing_on(tr);
9539 			if (tr->current_trace->start)
9540 				tr->current_trace->start(tr);
9541 		} else {
9542 			tracer_tracing_off(tr);
9543 			if (tr->current_trace->stop)
9544 				tr->current_trace->stop(tr);
9545 			/* Wake up any waiters */
9546 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9547 		}
9548 	}
9549 
9550 	(*ppos)++;
9551 
9552 	return cnt;
9553 }
9554 
9555 static const struct file_operations rb_simple_fops = {
9556 	.open		= tracing_open_generic_tr,
9557 	.read		= rb_simple_read,
9558 	.write		= rb_simple_write,
9559 	.release	= tracing_release_generic_tr,
9560 	.llseek		= default_llseek,
9561 };
9562 
9563 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9564 buffer_percent_read(struct file *filp, char __user *ubuf,
9565 		    size_t cnt, loff_t *ppos)
9566 {
9567 	struct trace_array *tr = filp->private_data;
9568 	char buf[64];
9569 	int r;
9570 
9571 	r = tr->buffer_percent;
9572 	r = sprintf(buf, "%d\n", r);
9573 
9574 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9575 }
9576 
9577 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9578 buffer_percent_write(struct file *filp, const char __user *ubuf,
9579 		     size_t cnt, loff_t *ppos)
9580 {
9581 	struct trace_array *tr = filp->private_data;
9582 	unsigned long val;
9583 	int ret;
9584 
9585 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9586 	if (ret)
9587 		return ret;
9588 
9589 	if (val > 100)
9590 		return -EINVAL;
9591 
9592 	tr->buffer_percent = val;
9593 
9594 	(*ppos)++;
9595 
9596 	return cnt;
9597 }
9598 
9599 static const struct file_operations buffer_percent_fops = {
9600 	.open		= tracing_open_generic_tr,
9601 	.read		= buffer_percent_read,
9602 	.write		= buffer_percent_write,
9603 	.release	= tracing_release_generic_tr,
9604 	.llseek		= default_llseek,
9605 };
9606 
9607 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9608 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9609 {
9610 	struct trace_array *tr = filp->private_data;
9611 	size_t size;
9612 	char buf[64];
9613 	int order;
9614 	int r;
9615 
9616 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9617 	size = (PAGE_SIZE << order) / 1024;
9618 
9619 	r = sprintf(buf, "%zd\n", size);
9620 
9621 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9622 }
9623 
9624 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9625 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9626 			 size_t cnt, loff_t *ppos)
9627 {
9628 	struct trace_array *tr = filp->private_data;
9629 	unsigned long val;
9630 	int old_order;
9631 	int order;
9632 	int pages;
9633 	int ret;
9634 
9635 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9636 	if (ret)
9637 		return ret;
9638 
9639 	val *= 1024; /* value passed in is in KB */
9640 
9641 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9642 	order = fls(pages - 1);
9643 
9644 	/* limit between 1 and 128 system pages */
9645 	if (order < 0 || order > 7)
9646 		return -EINVAL;
9647 
9648 	/* Do not allow tracing while changing the order of the ring buffer */
9649 	tracing_stop_tr(tr);
9650 
9651 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9652 	if (old_order == order)
9653 		goto out;
9654 
9655 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9656 	if (ret)
9657 		goto out;
9658 
9659 #ifdef CONFIG_TRACER_MAX_TRACE
9660 
9661 	if (!tr->allocated_snapshot)
9662 		goto out_max;
9663 
9664 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9665 	if (ret) {
9666 		/* Put back the old order */
9667 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9668 		if (WARN_ON_ONCE(cnt)) {
9669 			/*
9670 			 * AARGH! We are left with different orders!
9671 			 * The max buffer is our "snapshot" buffer.
9672 			 * When a tracer needs a snapshot (one of the
9673 			 * latency tracers), it swaps the max buffer
9674 			 * with the saved snap shot. We succeeded to
9675 			 * update the order of the main buffer, but failed to
9676 			 * update the order of the max buffer. But when we tried
9677 			 * to reset the main buffer to the original size, we
9678 			 * failed there too. This is very unlikely to
9679 			 * happen, but if it does, warn and kill all
9680 			 * tracing.
9681 			 */
9682 			tracing_disabled = 1;
9683 		}
9684 		goto out;
9685 	}
9686  out_max:
9687 #endif
9688 	(*ppos)++;
9689  out:
9690 	if (ret)
9691 		cnt = ret;
9692 	tracing_start_tr(tr);
9693 	return cnt;
9694 }
9695 
9696 static const struct file_operations buffer_subbuf_size_fops = {
9697 	.open		= tracing_open_generic_tr,
9698 	.read		= buffer_subbuf_size_read,
9699 	.write		= buffer_subbuf_size_write,
9700 	.release	= tracing_release_generic_tr,
9701 	.llseek		= default_llseek,
9702 };
9703 
9704 static struct dentry *trace_instance_dir;
9705 
9706 static void
9707 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9708 
9709 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)9710 static int make_mod_delta(struct module *mod, void *data)
9711 {
9712 	struct trace_module_delta *module_delta;
9713 	struct trace_scratch *tscratch;
9714 	struct trace_mod_entry *entry;
9715 	struct trace_array *tr = data;
9716 	int i;
9717 
9718 	tscratch = tr->scratch;
9719 	module_delta = READ_ONCE(tr->module_delta);
9720 	for (i = 0; i < tscratch->nr_entries; i++) {
9721 		entry = &tscratch->entries[i];
9722 		if (strcmp(mod->name, entry->mod_name))
9723 			continue;
9724 		if (mod->state == MODULE_STATE_GOING)
9725 			module_delta->delta[i] = 0;
9726 		else
9727 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9728 						 - entry->mod_addr;
9729 		break;
9730 	}
9731 	return 0;
9732 }
9733 #else
make_mod_delta(struct module * mod,void * data)9734 static int make_mod_delta(struct module *mod, void *data)
9735 {
9736 	return 0;
9737 }
9738 #endif
9739 
mod_addr_comp(const void * a,const void * b,const void * data)9740 static int mod_addr_comp(const void *a, const void *b, const void *data)
9741 {
9742 	const struct trace_mod_entry *e1 = a;
9743 	const struct trace_mod_entry *e2 = b;
9744 
9745 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9746 }
9747 
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)9748 static void setup_trace_scratch(struct trace_array *tr,
9749 				struct trace_scratch *tscratch, unsigned int size)
9750 {
9751 	struct trace_module_delta *module_delta;
9752 	struct trace_mod_entry *entry;
9753 	int i, nr_entries;
9754 
9755 	if (!tscratch)
9756 		return;
9757 
9758 	tr->scratch = tscratch;
9759 	tr->scratch_size = size;
9760 
9761 	if (tscratch->text_addr)
9762 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9763 
9764 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9765 		goto reset;
9766 
9767 	/* Check if each module name is a valid string */
9768 	for (i = 0; i < tscratch->nr_entries; i++) {
9769 		int n;
9770 
9771 		entry = &tscratch->entries[i];
9772 
9773 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9774 			if (entry->mod_name[n] == '\0')
9775 				break;
9776 			if (!isprint(entry->mod_name[n]))
9777 				goto reset;
9778 		}
9779 		if (n == MODULE_NAME_LEN)
9780 			goto reset;
9781 	}
9782 
9783 	/* Sort the entries so that we can find appropriate module from address. */
9784 	nr_entries = tscratch->nr_entries;
9785 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9786 	       mod_addr_comp, NULL, NULL);
9787 
9788 	if (IS_ENABLED(CONFIG_MODULES)) {
9789 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9790 		if (!module_delta) {
9791 			pr_info("module_delta allocation failed. Not able to decode module address.");
9792 			goto reset;
9793 		}
9794 		init_rcu_head(&module_delta->rcu);
9795 	} else
9796 		module_delta = NULL;
9797 	WRITE_ONCE(tr->module_delta, module_delta);
9798 
9799 	/* Scan modules to make text delta for modules. */
9800 	module_for_each_mod(make_mod_delta, tr);
9801 
9802 	/* Set trace_clock as the same of the previous boot. */
9803 	if (tscratch->clock_id != tr->clock_id) {
9804 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9805 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9806 			pr_info("the previous trace_clock info is not valid.");
9807 			goto reset;
9808 		}
9809 	}
9810 	return;
9811  reset:
9812 	/* Invalid trace modules */
9813 	memset(tscratch, 0, size);
9814 }
9815 
9816 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9817 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9818 {
9819 	enum ring_buffer_flags rb_flags;
9820 	struct trace_scratch *tscratch;
9821 	unsigned int scratch_size = 0;
9822 
9823 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9824 
9825 	buf->tr = tr;
9826 
9827 	if (tr->range_addr_start && tr->range_addr_size) {
9828 		/* Add scratch buffer to handle 128 modules */
9829 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9830 						      tr->range_addr_start,
9831 						      tr->range_addr_size,
9832 						      struct_size(tscratch, entries, 128));
9833 
9834 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9835 		setup_trace_scratch(tr, tscratch, scratch_size);
9836 
9837 		/*
9838 		 * This is basically the same as a mapped buffer,
9839 		 * with the same restrictions.
9840 		 */
9841 		tr->mapped++;
9842 	} else {
9843 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9844 	}
9845 	if (!buf->buffer)
9846 		return -ENOMEM;
9847 
9848 	buf->data = alloc_percpu(struct trace_array_cpu);
9849 	if (!buf->data) {
9850 		ring_buffer_free(buf->buffer);
9851 		buf->buffer = NULL;
9852 		return -ENOMEM;
9853 	}
9854 
9855 	/* Allocate the first page for all buffers */
9856 	set_buffer_entries(&tr->array_buffer,
9857 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9858 
9859 	return 0;
9860 }
9861 
free_trace_buffer(struct array_buffer * buf)9862 static void free_trace_buffer(struct array_buffer *buf)
9863 {
9864 	if (buf->buffer) {
9865 		ring_buffer_free(buf->buffer);
9866 		buf->buffer = NULL;
9867 		free_percpu(buf->data);
9868 		buf->data = NULL;
9869 	}
9870 }
9871 
allocate_trace_buffers(struct trace_array * tr,int size)9872 static int allocate_trace_buffers(struct trace_array *tr, int size)
9873 {
9874 	int ret;
9875 
9876 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9877 	if (ret)
9878 		return ret;
9879 
9880 #ifdef CONFIG_TRACER_MAX_TRACE
9881 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9882 	if (tr->range_addr_start)
9883 		return 0;
9884 
9885 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9886 				    allocate_snapshot ? size : 1);
9887 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9888 		free_trace_buffer(&tr->array_buffer);
9889 		return -ENOMEM;
9890 	}
9891 	tr->allocated_snapshot = allocate_snapshot;
9892 
9893 	allocate_snapshot = false;
9894 #endif
9895 
9896 	return 0;
9897 }
9898 
free_trace_buffers(struct trace_array * tr)9899 static void free_trace_buffers(struct trace_array *tr)
9900 {
9901 	if (!tr)
9902 		return;
9903 
9904 	free_trace_buffer(&tr->array_buffer);
9905 	kfree(tr->module_delta);
9906 
9907 #ifdef CONFIG_TRACER_MAX_TRACE
9908 	free_trace_buffer(&tr->max_buffer);
9909 #endif
9910 }
9911 
init_trace_flags_index(struct trace_array * tr)9912 static void init_trace_flags_index(struct trace_array *tr)
9913 {
9914 	int i;
9915 
9916 	/* Used by the trace options files */
9917 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9918 		tr->trace_flags_index[i] = i;
9919 }
9920 
__update_tracer_options(struct trace_array * tr)9921 static void __update_tracer_options(struct trace_array *tr)
9922 {
9923 	struct tracer *t;
9924 
9925 	for (t = trace_types; t; t = t->next)
9926 		add_tracer_options(tr, t);
9927 }
9928 
update_tracer_options(struct trace_array * tr)9929 static void update_tracer_options(struct trace_array *tr)
9930 {
9931 	guard(mutex)(&trace_types_lock);
9932 	tracer_options_updated = true;
9933 	__update_tracer_options(tr);
9934 }
9935 
9936 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9937 struct trace_array *trace_array_find(const char *instance)
9938 {
9939 	struct trace_array *tr, *found = NULL;
9940 
9941 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9942 		if (tr->name && strcmp(tr->name, instance) == 0) {
9943 			found = tr;
9944 			break;
9945 		}
9946 	}
9947 
9948 	return found;
9949 }
9950 
trace_array_find_get(const char * instance)9951 struct trace_array *trace_array_find_get(const char *instance)
9952 {
9953 	struct trace_array *tr;
9954 
9955 	guard(mutex)(&trace_types_lock);
9956 	tr = trace_array_find(instance);
9957 	if (tr)
9958 		tr->ref++;
9959 
9960 	return tr;
9961 }
9962 
trace_array_create_dir(struct trace_array * tr)9963 static int trace_array_create_dir(struct trace_array *tr)
9964 {
9965 	int ret;
9966 
9967 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9968 	if (!tr->dir)
9969 		return -EINVAL;
9970 
9971 	ret = event_trace_add_tracer(tr->dir, tr);
9972 	if (ret) {
9973 		tracefs_remove(tr->dir);
9974 		return ret;
9975 	}
9976 
9977 	init_tracer_tracefs(tr, tr->dir);
9978 	__update_tracer_options(tr);
9979 
9980 	return ret;
9981 }
9982 
9983 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9984 trace_array_create_systems(const char *name, const char *systems,
9985 			   unsigned long range_addr_start,
9986 			   unsigned long range_addr_size)
9987 {
9988 	struct trace_array *tr;
9989 	int ret;
9990 
9991 	ret = -ENOMEM;
9992 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9993 	if (!tr)
9994 		return ERR_PTR(ret);
9995 
9996 	tr->name = kstrdup(name, GFP_KERNEL);
9997 	if (!tr->name)
9998 		goto out_free_tr;
9999 
10000 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
10001 		goto out_free_tr;
10002 
10003 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
10004 		goto out_free_tr;
10005 
10006 	if (systems) {
10007 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
10008 		if (!tr->system_names)
10009 			goto out_free_tr;
10010 	}
10011 
10012 	/* Only for boot up memory mapped ring buffers */
10013 	tr->range_addr_start = range_addr_start;
10014 	tr->range_addr_size = range_addr_size;
10015 
10016 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
10017 
10018 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
10019 
10020 	raw_spin_lock_init(&tr->start_lock);
10021 
10022 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10023 #ifdef CONFIG_TRACER_MAX_TRACE
10024 	spin_lock_init(&tr->snapshot_trigger_lock);
10025 #endif
10026 	tr->current_trace = &nop_trace;
10027 
10028 	INIT_LIST_HEAD(&tr->systems);
10029 	INIT_LIST_HEAD(&tr->events);
10030 	INIT_LIST_HEAD(&tr->hist_vars);
10031 	INIT_LIST_HEAD(&tr->err_log);
10032 	INIT_LIST_HEAD(&tr->marker_list);
10033 
10034 #ifdef CONFIG_MODULES
10035 	INIT_LIST_HEAD(&tr->mod_events);
10036 #endif
10037 
10038 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
10039 		goto out_free_tr;
10040 
10041 	/* The ring buffer is defaultly expanded */
10042 	trace_set_ring_buffer_expanded(tr);
10043 
10044 	if (ftrace_allocate_ftrace_ops(tr) < 0)
10045 		goto out_free_tr;
10046 
10047 	ftrace_init_trace_array(tr);
10048 
10049 	init_trace_flags_index(tr);
10050 
10051 	if (trace_instance_dir) {
10052 		ret = trace_array_create_dir(tr);
10053 		if (ret)
10054 			goto out_free_tr;
10055 	} else
10056 		__trace_early_add_events(tr);
10057 
10058 	list_add(&tr->list, &ftrace_trace_arrays);
10059 
10060 	tr->ref++;
10061 
10062 	return tr;
10063 
10064  out_free_tr:
10065 	ftrace_free_ftrace_ops(tr);
10066 	free_trace_buffers(tr);
10067 	free_cpumask_var(tr->pipe_cpumask);
10068 	free_cpumask_var(tr->tracing_cpumask);
10069 	kfree_const(tr->system_names);
10070 	kfree(tr->range_name);
10071 	kfree(tr->name);
10072 	kfree(tr);
10073 
10074 	return ERR_PTR(ret);
10075 }
10076 
trace_array_create(const char * name)10077 static struct trace_array *trace_array_create(const char *name)
10078 {
10079 	return trace_array_create_systems(name, NULL, 0, 0);
10080 }
10081 
instance_mkdir(const char * name)10082 static int instance_mkdir(const char *name)
10083 {
10084 	struct trace_array *tr;
10085 	int ret;
10086 
10087 	guard(mutex)(&event_mutex);
10088 	guard(mutex)(&trace_types_lock);
10089 
10090 	ret = -EEXIST;
10091 	if (trace_array_find(name))
10092 		return -EEXIST;
10093 
10094 	tr = trace_array_create(name);
10095 
10096 	ret = PTR_ERR_OR_ZERO(tr);
10097 
10098 	return ret;
10099 }
10100 
10101 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)10102 static u64 map_pages(unsigned long start, unsigned long size)
10103 {
10104 	unsigned long vmap_start, vmap_end;
10105 	struct vm_struct *area;
10106 	int ret;
10107 
10108 	area = get_vm_area(size, VM_IOREMAP);
10109 	if (!area)
10110 		return 0;
10111 
10112 	vmap_start = (unsigned long) area->addr;
10113 	vmap_end = vmap_start + size;
10114 
10115 	ret = vmap_page_range(vmap_start, vmap_end,
10116 			      start, pgprot_nx(PAGE_KERNEL));
10117 	if (ret < 0) {
10118 		free_vm_area(area);
10119 		return 0;
10120 	}
10121 
10122 	return (u64)vmap_start;
10123 }
10124 #else
map_pages(unsigned long start,unsigned long size)10125 static inline u64 map_pages(unsigned long start, unsigned long size)
10126 {
10127 	return 0;
10128 }
10129 #endif
10130 
10131 /**
10132  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
10133  * @name: The name of the trace array to be looked up/created.
10134  * @systems: A list of systems to create event directories for (NULL for all)
10135  *
10136  * Returns pointer to trace array with given name.
10137  * NULL, if it cannot be created.
10138  *
10139  * NOTE: This function increments the reference counter associated with the
10140  * trace array returned. This makes sure it cannot be freed while in use.
10141  * Use trace_array_put() once the trace array is no longer needed.
10142  * If the trace_array is to be freed, trace_array_destroy() needs to
10143  * be called after the trace_array_put(), or simply let user space delete
10144  * it from the tracefs instances directory. But until the
10145  * trace_array_put() is called, user space can not delete it.
10146  *
10147  */
trace_array_get_by_name(const char * name,const char * systems)10148 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
10149 {
10150 	struct trace_array *tr;
10151 
10152 	guard(mutex)(&event_mutex);
10153 	guard(mutex)(&trace_types_lock);
10154 
10155 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10156 		if (tr->name && strcmp(tr->name, name) == 0) {
10157 			tr->ref++;
10158 			return tr;
10159 		}
10160 	}
10161 
10162 	tr = trace_array_create_systems(name, systems, 0, 0);
10163 
10164 	if (IS_ERR(tr))
10165 		tr = NULL;
10166 	else
10167 		tr->ref++;
10168 
10169 	return tr;
10170 }
10171 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
10172 
__remove_instance(struct trace_array * tr)10173 static int __remove_instance(struct trace_array *tr)
10174 {
10175 	int i;
10176 
10177 	/* Reference counter for a newly created trace array = 1. */
10178 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10179 		return -EBUSY;
10180 
10181 	list_del(&tr->list);
10182 
10183 	/* Disable all the flags that were enabled coming in */
10184 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10185 		if ((1 << i) & ZEROED_TRACE_FLAGS)
10186 			set_tracer_flag(tr, 1 << i, 0);
10187 	}
10188 
10189 	if (printk_trace == tr)
10190 		update_printk_trace(&global_trace);
10191 
10192 	if (update_marker_trace(tr, 0))
10193 		synchronize_rcu();
10194 
10195 	tracing_set_nop(tr);
10196 	clear_ftrace_function_probes(tr);
10197 	event_trace_del_tracer(tr);
10198 	ftrace_clear_pids(tr);
10199 	ftrace_destroy_function_files(tr);
10200 	tracefs_remove(tr->dir);
10201 	free_percpu(tr->last_func_repeats);
10202 	free_trace_buffers(tr);
10203 	clear_tracing_err_log(tr);
10204 
10205 	if (tr->range_name) {
10206 		reserve_mem_release_by_name(tr->range_name);
10207 		kfree(tr->range_name);
10208 	}
10209 
10210 	for (i = 0; i < tr->nr_topts; i++) {
10211 		kfree(tr->topts[i].topts);
10212 	}
10213 	kfree(tr->topts);
10214 
10215 	free_cpumask_var(tr->pipe_cpumask);
10216 	free_cpumask_var(tr->tracing_cpumask);
10217 	kfree_const(tr->system_names);
10218 	kfree(tr->name);
10219 	kfree(tr);
10220 
10221 	return 0;
10222 }
10223 
trace_array_destroy(struct trace_array * this_tr)10224 int trace_array_destroy(struct trace_array *this_tr)
10225 {
10226 	struct trace_array *tr;
10227 
10228 	if (!this_tr)
10229 		return -EINVAL;
10230 
10231 	guard(mutex)(&event_mutex);
10232 	guard(mutex)(&trace_types_lock);
10233 
10234 
10235 	/* Making sure trace array exists before destroying it. */
10236 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10237 		if (tr == this_tr)
10238 			return __remove_instance(tr);
10239 	}
10240 
10241 	return -ENODEV;
10242 }
10243 EXPORT_SYMBOL_GPL(trace_array_destroy);
10244 
instance_rmdir(const char * name)10245 static int instance_rmdir(const char *name)
10246 {
10247 	struct trace_array *tr;
10248 
10249 	guard(mutex)(&event_mutex);
10250 	guard(mutex)(&trace_types_lock);
10251 
10252 	tr = trace_array_find(name);
10253 	if (!tr)
10254 		return -ENODEV;
10255 
10256 	return __remove_instance(tr);
10257 }
10258 
create_trace_instances(struct dentry * d_tracer)10259 static __init void create_trace_instances(struct dentry *d_tracer)
10260 {
10261 	struct trace_array *tr;
10262 
10263 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10264 							 instance_mkdir,
10265 							 instance_rmdir);
10266 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10267 		return;
10268 
10269 	guard(mutex)(&event_mutex);
10270 	guard(mutex)(&trace_types_lock);
10271 
10272 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10273 		if (!tr->name)
10274 			continue;
10275 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10276 			     "Failed to create instance directory\n"))
10277 			return;
10278 	}
10279 }
10280 
10281 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)10282 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10283 {
10284 	int cpu;
10285 
10286 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10287 			tr, &show_traces_fops);
10288 
10289 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10290 			tr, &set_tracer_fops);
10291 
10292 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10293 			  tr, &tracing_cpumask_fops);
10294 
10295 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10296 			  tr, &tracing_iter_fops);
10297 
10298 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10299 			  tr, &tracing_fops);
10300 
10301 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10302 			  tr, &tracing_pipe_fops);
10303 
10304 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10305 			  tr, &tracing_entries_fops);
10306 
10307 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10308 			  tr, &tracing_total_entries_fops);
10309 
10310 	trace_create_file("free_buffer", 0200, d_tracer,
10311 			  tr, &tracing_free_buffer_fops);
10312 
10313 	trace_create_file("trace_marker", 0220, d_tracer,
10314 			  tr, &tracing_mark_fops);
10315 
10316 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10317 
10318 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10319 			  tr, &tracing_mark_raw_fops);
10320 
10321 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10322 			  &trace_clock_fops);
10323 
10324 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10325 			  tr, &rb_simple_fops);
10326 
10327 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10328 			  &trace_time_stamp_mode_fops);
10329 
10330 	tr->buffer_percent = 50;
10331 
10332 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10333 			tr, &buffer_percent_fops);
10334 
10335 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10336 			  tr, &buffer_subbuf_size_fops);
10337 
10338 	create_trace_options_dir(tr);
10339 
10340 #ifdef CONFIG_TRACER_MAX_TRACE
10341 	trace_create_maxlat_file(tr, d_tracer);
10342 #endif
10343 
10344 	if (ftrace_create_function_files(tr, d_tracer))
10345 		MEM_FAIL(1, "Could not allocate function filter files");
10346 
10347 	if (tr->range_addr_start) {
10348 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10349 				  tr, &last_boot_fops);
10350 #ifdef CONFIG_TRACER_SNAPSHOT
10351 	} else {
10352 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10353 				  tr, &snapshot_fops);
10354 #endif
10355 	}
10356 
10357 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10358 			  tr, &tracing_err_log_fops);
10359 
10360 	for_each_tracing_cpu(cpu)
10361 		tracing_init_tracefs_percpu(tr, cpu);
10362 
10363 	ftrace_init_tracefs(tr, d_tracer);
10364 }
10365 
10366 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10367 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10368 {
10369 	struct vfsmount *mnt;
10370 	struct file_system_type *type;
10371 	struct fs_context *fc;
10372 	int ret;
10373 
10374 	/*
10375 	 * To maintain backward compatibility for tools that mount
10376 	 * debugfs to get to the tracing facility, tracefs is automatically
10377 	 * mounted to the debugfs/tracing directory.
10378 	 */
10379 	type = get_fs_type("tracefs");
10380 	if (!type)
10381 		return NULL;
10382 
10383 	fc = fs_context_for_submount(type, mntpt);
10384 	put_filesystem(type);
10385 	if (IS_ERR(fc))
10386 		return ERR_CAST(fc);
10387 
10388 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10389 
10390 	ret = vfs_parse_fs_string(fc, "source", "tracefs");
10391 	if (!ret)
10392 		mnt = fc_mount(fc);
10393 	else
10394 		mnt = ERR_PTR(ret);
10395 
10396 	put_fs_context(fc);
10397 	return mnt;
10398 }
10399 #endif
10400 
10401 /**
10402  * tracing_init_dentry - initialize top level trace array
10403  *
10404  * This is called when creating files or directories in the tracing
10405  * directory. It is called via fs_initcall() by any of the boot up code
10406  * and expects to return the dentry of the top level tracing directory.
10407  */
tracing_init_dentry(void)10408 int tracing_init_dentry(void)
10409 {
10410 	struct trace_array *tr = &global_trace;
10411 
10412 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10413 		pr_warn("Tracing disabled due to lockdown\n");
10414 		return -EPERM;
10415 	}
10416 
10417 	/* The top level trace array uses  NULL as parent */
10418 	if (tr->dir)
10419 		return 0;
10420 
10421 	if (WARN_ON(!tracefs_initialized()))
10422 		return -ENODEV;
10423 
10424 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10425 	/*
10426 	 * As there may still be users that expect the tracing
10427 	 * files to exist in debugfs/tracing, we must automount
10428 	 * the tracefs file system there, so older tools still
10429 	 * work with the newer kernel.
10430 	 */
10431 	tr->dir = debugfs_create_automount("tracing", NULL,
10432 					   trace_automount, NULL);
10433 #endif
10434 
10435 	return 0;
10436 }
10437 
10438 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10439 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10440 
10441 static struct workqueue_struct *eval_map_wq __initdata;
10442 static struct work_struct eval_map_work __initdata;
10443 static struct work_struct tracerfs_init_work __initdata;
10444 
eval_map_work_func(struct work_struct * work)10445 static void __init eval_map_work_func(struct work_struct *work)
10446 {
10447 	int len;
10448 
10449 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10450 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10451 }
10452 
trace_eval_init(void)10453 static int __init trace_eval_init(void)
10454 {
10455 	INIT_WORK(&eval_map_work, eval_map_work_func);
10456 
10457 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10458 	if (!eval_map_wq) {
10459 		pr_err("Unable to allocate eval_map_wq\n");
10460 		/* Do work here */
10461 		eval_map_work_func(&eval_map_work);
10462 		return -ENOMEM;
10463 	}
10464 
10465 	queue_work(eval_map_wq, &eval_map_work);
10466 	return 0;
10467 }
10468 
10469 subsys_initcall(trace_eval_init);
10470 
trace_eval_sync(void)10471 static int __init trace_eval_sync(void)
10472 {
10473 	/* Make sure the eval map updates are finished */
10474 	if (eval_map_wq)
10475 		destroy_workqueue(eval_map_wq);
10476 	return 0;
10477 }
10478 
10479 late_initcall_sync(trace_eval_sync);
10480 
10481 
10482 #ifdef CONFIG_MODULES
10483 
module_exists(const char * module)10484 bool module_exists(const char *module)
10485 {
10486 	/* All modules have the symbol __this_module */
10487 	static const char this_mod[] = "__this_module";
10488 	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10489 	unsigned long val;
10490 	int n;
10491 
10492 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10493 
10494 	if (n > sizeof(modname) - 1)
10495 		return false;
10496 
10497 	val = module_kallsyms_lookup_name(modname);
10498 	return val != 0;
10499 }
10500 
trace_module_add_evals(struct module * mod)10501 static void trace_module_add_evals(struct module *mod)
10502 {
10503 	/*
10504 	 * Modules with bad taint do not have events created, do
10505 	 * not bother with enums either.
10506 	 */
10507 	if (trace_module_has_bad_taint(mod))
10508 		return;
10509 
10510 	/* Even if no trace_evals, this need to sanitize field types. */
10511 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10512 }
10513 
10514 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10515 static void trace_module_remove_evals(struct module *mod)
10516 {
10517 	union trace_eval_map_item *map;
10518 	union trace_eval_map_item **last = &trace_eval_maps;
10519 
10520 	if (!mod->num_trace_evals)
10521 		return;
10522 
10523 	guard(mutex)(&trace_eval_mutex);
10524 
10525 	map = trace_eval_maps;
10526 
10527 	while (map) {
10528 		if (map->head.mod == mod)
10529 			break;
10530 		map = trace_eval_jmp_to_tail(map);
10531 		last = &map->tail.next;
10532 		map = map->tail.next;
10533 	}
10534 	if (!map)
10535 		return;
10536 
10537 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10538 	kfree(map);
10539 }
10540 #else
trace_module_remove_evals(struct module * mod)10541 static inline void trace_module_remove_evals(struct module *mod) { }
10542 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10543 
trace_module_record(struct module * mod,bool add)10544 static void trace_module_record(struct module *mod, bool add)
10545 {
10546 	struct trace_array *tr;
10547 	unsigned long flags;
10548 
10549 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10550 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10551 		/* Update any persistent trace array that has already been started */
10552 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10553 			guard(mutex)(&scratch_mutex);
10554 			save_mod(mod, tr);
10555 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10556 			/* Update delta if the module loaded in previous boot */
10557 			make_mod_delta(mod, tr);
10558 		}
10559 	}
10560 }
10561 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10562 static int trace_module_notify(struct notifier_block *self,
10563 			       unsigned long val, void *data)
10564 {
10565 	struct module *mod = data;
10566 
10567 	switch (val) {
10568 	case MODULE_STATE_COMING:
10569 		trace_module_add_evals(mod);
10570 		trace_module_record(mod, true);
10571 		break;
10572 	case MODULE_STATE_GOING:
10573 		trace_module_remove_evals(mod);
10574 		trace_module_record(mod, false);
10575 		break;
10576 	}
10577 
10578 	return NOTIFY_OK;
10579 }
10580 
10581 static struct notifier_block trace_module_nb = {
10582 	.notifier_call = trace_module_notify,
10583 	.priority = 0,
10584 };
10585 #endif /* CONFIG_MODULES */
10586 
tracer_init_tracefs_work_func(struct work_struct * work)10587 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10588 {
10589 
10590 	event_trace_init();
10591 
10592 	init_tracer_tracefs(&global_trace, NULL);
10593 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10594 
10595 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10596 			&global_trace, &tracing_thresh_fops);
10597 
10598 	trace_create_file("README", TRACE_MODE_READ, NULL,
10599 			NULL, &tracing_readme_fops);
10600 
10601 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10602 			NULL, &tracing_saved_cmdlines_fops);
10603 
10604 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10605 			  NULL, &tracing_saved_cmdlines_size_fops);
10606 
10607 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10608 			NULL, &tracing_saved_tgids_fops);
10609 
10610 	trace_create_eval_file(NULL);
10611 
10612 #ifdef CONFIG_MODULES
10613 	register_module_notifier(&trace_module_nb);
10614 #endif
10615 
10616 #ifdef CONFIG_DYNAMIC_FTRACE
10617 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10618 			NULL, &tracing_dyn_info_fops);
10619 #endif
10620 
10621 	create_trace_instances(NULL);
10622 
10623 	update_tracer_options(&global_trace);
10624 }
10625 
tracer_init_tracefs(void)10626 static __init int tracer_init_tracefs(void)
10627 {
10628 	int ret;
10629 
10630 	trace_access_lock_init();
10631 
10632 	ret = tracing_init_dentry();
10633 	if (ret)
10634 		return 0;
10635 
10636 	if (eval_map_wq) {
10637 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10638 		queue_work(eval_map_wq, &tracerfs_init_work);
10639 	} else {
10640 		tracer_init_tracefs_work_func(NULL);
10641 	}
10642 
10643 	rv_init_interface();
10644 
10645 	return 0;
10646 }
10647 
10648 fs_initcall(tracer_init_tracefs);
10649 
10650 static int trace_die_panic_handler(struct notifier_block *self,
10651 				unsigned long ev, void *unused);
10652 
10653 static struct notifier_block trace_panic_notifier = {
10654 	.notifier_call = trace_die_panic_handler,
10655 	.priority = INT_MAX - 1,
10656 };
10657 
10658 static struct notifier_block trace_die_notifier = {
10659 	.notifier_call = trace_die_panic_handler,
10660 	.priority = INT_MAX - 1,
10661 };
10662 
10663 /*
10664  * The idea is to execute the following die/panic callback early, in order
10665  * to avoid showing irrelevant information in the trace (like other panic
10666  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10667  * warnings get disabled (to prevent potential log flooding).
10668  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10669 static int trace_die_panic_handler(struct notifier_block *self,
10670 				unsigned long ev, void *unused)
10671 {
10672 	if (!ftrace_dump_on_oops_enabled())
10673 		return NOTIFY_DONE;
10674 
10675 	/* The die notifier requires DIE_OOPS to trigger */
10676 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10677 		return NOTIFY_DONE;
10678 
10679 	ftrace_dump(DUMP_PARAM);
10680 
10681 	return NOTIFY_DONE;
10682 }
10683 
10684 /*
10685  * printk is set to max of 1024, we really don't need it that big.
10686  * Nothing should be printing 1000 characters anyway.
10687  */
10688 #define TRACE_MAX_PRINT		1000
10689 
10690 /*
10691  * Define here KERN_TRACE so that we have one place to modify
10692  * it if we decide to change what log level the ftrace dump
10693  * should be at.
10694  */
10695 #define KERN_TRACE		KERN_EMERG
10696 
10697 void
trace_printk_seq(struct trace_seq * s)10698 trace_printk_seq(struct trace_seq *s)
10699 {
10700 	/* Probably should print a warning here. */
10701 	if (s->seq.len >= TRACE_MAX_PRINT)
10702 		s->seq.len = TRACE_MAX_PRINT;
10703 
10704 	/*
10705 	 * More paranoid code. Although the buffer size is set to
10706 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10707 	 * an extra layer of protection.
10708 	 */
10709 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10710 		s->seq.len = s->seq.size - 1;
10711 
10712 	/* should be zero ended, but we are paranoid. */
10713 	s->buffer[s->seq.len] = 0;
10714 
10715 	printk(KERN_TRACE "%s", s->buffer);
10716 
10717 	trace_seq_init(s);
10718 }
10719 
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10720 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10721 {
10722 	iter->tr = tr;
10723 	iter->trace = iter->tr->current_trace;
10724 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10725 	iter->array_buffer = &tr->array_buffer;
10726 
10727 	if (iter->trace && iter->trace->open)
10728 		iter->trace->open(iter);
10729 
10730 	/* Annotate start of buffers if we had overruns */
10731 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10732 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10733 
10734 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10735 	if (trace_clocks[iter->tr->clock_id].in_ns)
10736 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10737 
10738 	/* Can not use kmalloc for iter.temp and iter.fmt */
10739 	iter->temp = static_temp_buf;
10740 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10741 	iter->fmt = static_fmt_buf;
10742 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10743 }
10744 
trace_init_global_iter(struct trace_iterator * iter)10745 void trace_init_global_iter(struct trace_iterator *iter)
10746 {
10747 	trace_init_iter(iter, &global_trace);
10748 }
10749 
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10750 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10751 {
10752 	/* use static because iter can be a bit big for the stack */
10753 	static struct trace_iterator iter;
10754 	unsigned int old_userobj;
10755 	unsigned long flags;
10756 	int cnt = 0;
10757 
10758 	/*
10759 	 * Always turn off tracing when we dump.
10760 	 * We don't need to show trace output of what happens
10761 	 * between multiple crashes.
10762 	 *
10763 	 * If the user does a sysrq-z, then they can re-enable
10764 	 * tracing with echo 1 > tracing_on.
10765 	 */
10766 	tracer_tracing_off(tr);
10767 
10768 	local_irq_save(flags);
10769 
10770 	/* Simulate the iterator */
10771 	trace_init_iter(&iter, tr);
10772 
10773 	/* While dumping, do not allow the buffer to be enable */
10774 	tracer_tracing_disable(tr);
10775 
10776 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10777 
10778 	/* don't look at user memory in panic mode */
10779 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10780 
10781 	if (dump_mode == DUMP_ORIG)
10782 		iter.cpu_file = raw_smp_processor_id();
10783 	else
10784 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10785 
10786 	if (tr == &global_trace)
10787 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10788 	else
10789 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10790 
10791 	/* Did function tracer already get disabled? */
10792 	if (ftrace_is_dead()) {
10793 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10794 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10795 	}
10796 
10797 	/*
10798 	 * We need to stop all tracing on all CPUS to read
10799 	 * the next buffer. This is a bit expensive, but is
10800 	 * not done often. We fill all what we can read,
10801 	 * and then release the locks again.
10802 	 */
10803 
10804 	while (!trace_empty(&iter)) {
10805 
10806 		if (!cnt)
10807 			printk(KERN_TRACE "---------------------------------\n");
10808 
10809 		cnt++;
10810 
10811 		trace_iterator_reset(&iter);
10812 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10813 
10814 		if (trace_find_next_entry_inc(&iter) != NULL) {
10815 			int ret;
10816 
10817 			ret = print_trace_line(&iter);
10818 			if (ret != TRACE_TYPE_NO_CONSUME)
10819 				trace_consume(&iter);
10820 
10821 			trace_printk_seq(&iter.seq);
10822 		}
10823 		touch_nmi_watchdog();
10824 	}
10825 
10826 	if (!cnt)
10827 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10828 	else
10829 		printk(KERN_TRACE "---------------------------------\n");
10830 
10831 	tr->trace_flags |= old_userobj;
10832 
10833 	tracer_tracing_enable(tr);
10834 	local_irq_restore(flags);
10835 }
10836 
ftrace_dump_by_param(void)10837 static void ftrace_dump_by_param(void)
10838 {
10839 	bool first_param = true;
10840 	char dump_param[MAX_TRACER_SIZE];
10841 	char *buf, *token, *inst_name;
10842 	struct trace_array *tr;
10843 
10844 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10845 	buf = dump_param;
10846 
10847 	while ((token = strsep(&buf, ",")) != NULL) {
10848 		if (first_param) {
10849 			first_param = false;
10850 			if (!strcmp("0", token))
10851 				continue;
10852 			else if (!strcmp("1", token)) {
10853 				ftrace_dump_one(&global_trace, DUMP_ALL);
10854 				continue;
10855 			}
10856 			else if (!strcmp("2", token) ||
10857 			  !strcmp("orig_cpu", token)) {
10858 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10859 				continue;
10860 			}
10861 		}
10862 
10863 		inst_name = strsep(&token, "=");
10864 		tr = trace_array_find(inst_name);
10865 		if (!tr) {
10866 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10867 			continue;
10868 		}
10869 
10870 		if (token && (!strcmp("2", token) ||
10871 			  !strcmp("orig_cpu", token)))
10872 			ftrace_dump_one(tr, DUMP_ORIG);
10873 		else
10874 			ftrace_dump_one(tr, DUMP_ALL);
10875 	}
10876 }
10877 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10878 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10879 {
10880 	static atomic_t dump_running;
10881 
10882 	/* Only allow one dump user at a time. */
10883 	if (atomic_inc_return(&dump_running) != 1) {
10884 		atomic_dec(&dump_running);
10885 		return;
10886 	}
10887 
10888 	switch (oops_dump_mode) {
10889 	case DUMP_ALL:
10890 		ftrace_dump_one(&global_trace, DUMP_ALL);
10891 		break;
10892 	case DUMP_ORIG:
10893 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10894 		break;
10895 	case DUMP_PARAM:
10896 		ftrace_dump_by_param();
10897 		break;
10898 	case DUMP_NONE:
10899 		break;
10900 	default:
10901 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10902 		ftrace_dump_one(&global_trace, DUMP_ALL);
10903 	}
10904 
10905 	atomic_dec(&dump_running);
10906 }
10907 EXPORT_SYMBOL_GPL(ftrace_dump);
10908 
10909 #define WRITE_BUFSIZE  4096
10910 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10911 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10912 				size_t count, loff_t *ppos,
10913 				int (*createfn)(const char *))
10914 {
10915 	char *kbuf __free(kfree) = NULL;
10916 	char *buf, *tmp;
10917 	int ret = 0;
10918 	size_t done = 0;
10919 	size_t size;
10920 
10921 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10922 	if (!kbuf)
10923 		return -ENOMEM;
10924 
10925 	while (done < count) {
10926 		size = count - done;
10927 
10928 		if (size >= WRITE_BUFSIZE)
10929 			size = WRITE_BUFSIZE - 1;
10930 
10931 		if (copy_from_user(kbuf, buffer + done, size))
10932 			return -EFAULT;
10933 
10934 		kbuf[size] = '\0';
10935 		buf = kbuf;
10936 		do {
10937 			tmp = strchr(buf, '\n');
10938 			if (tmp) {
10939 				*tmp = '\0';
10940 				size = tmp - buf + 1;
10941 			} else {
10942 				size = strlen(buf);
10943 				if (done + size < count) {
10944 					if (buf != kbuf)
10945 						break;
10946 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10947 					pr_warn("Line length is too long: Should be less than %d\n",
10948 						WRITE_BUFSIZE - 2);
10949 					return -EINVAL;
10950 				}
10951 			}
10952 			done += size;
10953 
10954 			/* Remove comments */
10955 			tmp = strchr(buf, '#');
10956 
10957 			if (tmp)
10958 				*tmp = '\0';
10959 
10960 			ret = createfn(buf);
10961 			if (ret)
10962 				return ret;
10963 			buf += size;
10964 
10965 		} while (done < count);
10966 	}
10967 	return done;
10968 }
10969 
10970 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10971 __init static bool tr_needs_alloc_snapshot(const char *name)
10972 {
10973 	char *test;
10974 	int len = strlen(name);
10975 	bool ret;
10976 
10977 	if (!boot_snapshot_index)
10978 		return false;
10979 
10980 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10981 	    boot_snapshot_info[len] == '\t')
10982 		return true;
10983 
10984 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10985 	if (!test)
10986 		return false;
10987 
10988 	sprintf(test, "\t%s\t", name);
10989 	ret = strstr(boot_snapshot_info, test) == NULL;
10990 	kfree(test);
10991 	return ret;
10992 }
10993 
do_allocate_snapshot(const char * name)10994 __init static void do_allocate_snapshot(const char *name)
10995 {
10996 	if (!tr_needs_alloc_snapshot(name))
10997 		return;
10998 
10999 	/*
11000 	 * When allocate_snapshot is set, the next call to
11001 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
11002 	 * will allocate the snapshot buffer. That will alse clear
11003 	 * this flag.
11004 	 */
11005 	allocate_snapshot = true;
11006 }
11007 #else
do_allocate_snapshot(const char * name)11008 static inline void do_allocate_snapshot(const char *name) { }
11009 #endif
11010 
enable_instances(void)11011 __init static void enable_instances(void)
11012 {
11013 	struct trace_array *tr;
11014 	bool memmap_area = false;
11015 	char *curr_str;
11016 	char *name;
11017 	char *str;
11018 	char *tok;
11019 
11020 	/* A tab is always appended */
11021 	boot_instance_info[boot_instance_index - 1] = '\0';
11022 	str = boot_instance_info;
11023 
11024 	while ((curr_str = strsep(&str, "\t"))) {
11025 		phys_addr_t start = 0;
11026 		phys_addr_t size = 0;
11027 		unsigned long addr = 0;
11028 		bool traceprintk = false;
11029 		bool traceoff = false;
11030 		char *flag_delim;
11031 		char *addr_delim;
11032 		char *rname __free(kfree) = NULL;
11033 
11034 		tok = strsep(&curr_str, ",");
11035 
11036 		flag_delim = strchr(tok, '^');
11037 		addr_delim = strchr(tok, '@');
11038 
11039 		if (addr_delim)
11040 			*addr_delim++ = '\0';
11041 
11042 		if (flag_delim)
11043 			*flag_delim++ = '\0';
11044 
11045 		name = tok;
11046 
11047 		if (flag_delim) {
11048 			char *flag;
11049 
11050 			while ((flag = strsep(&flag_delim, "^"))) {
11051 				if (strcmp(flag, "traceoff") == 0) {
11052 					traceoff = true;
11053 				} else if ((strcmp(flag, "printk") == 0) ||
11054 					   (strcmp(flag, "traceprintk") == 0) ||
11055 					   (strcmp(flag, "trace_printk") == 0)) {
11056 					traceprintk = true;
11057 				} else {
11058 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
11059 						flag, name);
11060 				}
11061 			}
11062 		}
11063 
11064 		tok = addr_delim;
11065 		if (tok && isdigit(*tok)) {
11066 			start = memparse(tok, &tok);
11067 			if (!start) {
11068 				pr_warn("Tracing: Invalid boot instance address for %s\n",
11069 					name);
11070 				continue;
11071 			}
11072 			if (*tok != ':') {
11073 				pr_warn("Tracing: No size specified for instance %s\n", name);
11074 				continue;
11075 			}
11076 			tok++;
11077 			size = memparse(tok, &tok);
11078 			if (!size) {
11079 				pr_warn("Tracing: Invalid boot instance size for %s\n",
11080 					name);
11081 				continue;
11082 			}
11083 			memmap_area = true;
11084 		} else if (tok) {
11085 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
11086 				start = 0;
11087 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
11088 				continue;
11089 			}
11090 			rname = kstrdup(tok, GFP_KERNEL);
11091 		}
11092 
11093 		if (start) {
11094 			/* Start and size must be page aligned */
11095 			if (start & ~PAGE_MASK) {
11096 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
11097 				continue;
11098 			}
11099 			if (size & ~PAGE_MASK) {
11100 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
11101 				continue;
11102 			}
11103 
11104 			if (memmap_area)
11105 				addr = map_pages(start, size);
11106 			else
11107 				addr = (unsigned long)phys_to_virt(start);
11108 			if (addr) {
11109 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
11110 					name, &start, (unsigned long)size);
11111 			} else {
11112 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
11113 				continue;
11114 			}
11115 		} else {
11116 			/* Only non mapped buffers have snapshot buffers */
11117 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
11118 				do_allocate_snapshot(name);
11119 		}
11120 
11121 		tr = trace_array_create_systems(name, NULL, addr, size);
11122 		if (IS_ERR(tr)) {
11123 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
11124 			continue;
11125 		}
11126 
11127 		if (traceoff)
11128 			tracer_tracing_off(tr);
11129 
11130 		if (traceprintk)
11131 			update_printk_trace(tr);
11132 
11133 		/*
11134 		 * memmap'd buffers can not be freed.
11135 		 */
11136 		if (memmap_area) {
11137 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
11138 			tr->ref++;
11139 		}
11140 
11141 		if (start) {
11142 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
11143 			tr->range_name = no_free_ptr(rname);
11144 		}
11145 
11146 		while ((tok = strsep(&curr_str, ","))) {
11147 			early_enable_events(tr, tok, true);
11148 		}
11149 	}
11150 }
11151 
tracer_alloc_buffers(void)11152 __init static int tracer_alloc_buffers(void)
11153 {
11154 	int ring_buf_size;
11155 	int ret = -ENOMEM;
11156 
11157 
11158 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
11159 		pr_warn("Tracing disabled due to lockdown\n");
11160 		return -EPERM;
11161 	}
11162 
11163 	/*
11164 	 * Make sure we don't accidentally add more trace options
11165 	 * than we have bits for.
11166 	 */
11167 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
11168 
11169 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
11170 		return -ENOMEM;
11171 
11172 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
11173 		goto out_free_buffer_mask;
11174 
11175 	/* Only allocate trace_printk buffers if a trace_printk exists */
11176 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11177 		/* Must be called before global_trace.buffer is allocated */
11178 		trace_printk_init_buffers();
11179 
11180 	/* To save memory, keep the ring buffer size to its minimum */
11181 	if (global_trace.ring_buffer_expanded)
11182 		ring_buf_size = trace_buf_size;
11183 	else
11184 		ring_buf_size = 1;
11185 
11186 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11187 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11188 
11189 	raw_spin_lock_init(&global_trace.start_lock);
11190 
11191 	/*
11192 	 * The prepare callbacks allocates some memory for the ring buffer. We
11193 	 * don't free the buffer if the CPU goes down. If we were to free
11194 	 * the buffer, then the user would lose any trace that was in the
11195 	 * buffer. The memory will be removed once the "instance" is removed.
11196 	 */
11197 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11198 				      "trace/RB:prepare", trace_rb_cpu_prepare,
11199 				      NULL);
11200 	if (ret < 0)
11201 		goto out_free_cpumask;
11202 	/* Used for event triggers */
11203 	ret = -ENOMEM;
11204 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11205 	if (!temp_buffer)
11206 		goto out_rm_hp_state;
11207 
11208 	if (trace_create_savedcmd() < 0)
11209 		goto out_free_temp_buffer;
11210 
11211 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11212 		goto out_free_savedcmd;
11213 
11214 	/* TODO: make the number of buffers hot pluggable with CPUS */
11215 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11216 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11217 		goto out_free_pipe_cpumask;
11218 	}
11219 	if (global_trace.buffer_disabled)
11220 		tracing_off();
11221 
11222 	if (trace_boot_clock) {
11223 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
11224 		if (ret < 0)
11225 			pr_warn("Trace clock %s not defined, going back to default\n",
11226 				trace_boot_clock);
11227 	}
11228 
11229 	/*
11230 	 * register_tracer() might reference current_trace, so it
11231 	 * needs to be set before we register anything. This is
11232 	 * just a bootstrap of current_trace anyway.
11233 	 */
11234 	global_trace.current_trace = &nop_trace;
11235 
11236 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11237 #ifdef CONFIG_TRACER_MAX_TRACE
11238 	spin_lock_init(&global_trace.snapshot_trigger_lock);
11239 #endif
11240 	ftrace_init_global_array_ops(&global_trace);
11241 
11242 #ifdef CONFIG_MODULES
11243 	INIT_LIST_HEAD(&global_trace.mod_events);
11244 #endif
11245 
11246 	init_trace_flags_index(&global_trace);
11247 
11248 	register_tracer(&nop_trace);
11249 
11250 	/* Function tracing may start here (via kernel command line) */
11251 	init_function_trace();
11252 
11253 	/* All seems OK, enable tracing */
11254 	tracing_disabled = 0;
11255 
11256 	atomic_notifier_chain_register(&panic_notifier_list,
11257 				       &trace_panic_notifier);
11258 
11259 	register_die_notifier(&trace_die_notifier);
11260 
11261 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11262 
11263 	INIT_LIST_HEAD(&global_trace.systems);
11264 	INIT_LIST_HEAD(&global_trace.events);
11265 	INIT_LIST_HEAD(&global_trace.hist_vars);
11266 	INIT_LIST_HEAD(&global_trace.err_log);
11267 	list_add(&global_trace.marker_list, &marker_copies);
11268 	list_add(&global_trace.list, &ftrace_trace_arrays);
11269 
11270 	apply_trace_boot_options();
11271 
11272 	register_snapshot_cmd();
11273 
11274 	return 0;
11275 
11276 out_free_pipe_cpumask:
11277 	free_cpumask_var(global_trace.pipe_cpumask);
11278 out_free_savedcmd:
11279 	trace_free_saved_cmdlines_buffer();
11280 out_free_temp_buffer:
11281 	ring_buffer_free(temp_buffer);
11282 out_rm_hp_state:
11283 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11284 out_free_cpumask:
11285 	free_cpumask_var(global_trace.tracing_cpumask);
11286 out_free_buffer_mask:
11287 	free_cpumask_var(tracing_buffer_mask);
11288 	return ret;
11289 }
11290 
11291 #ifdef CONFIG_FUNCTION_TRACER
11292 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11293 __init struct trace_array *trace_get_global_array(void)
11294 {
11295 	return &global_trace;
11296 }
11297 #endif
11298 
ftrace_boot_snapshot(void)11299 void __init ftrace_boot_snapshot(void)
11300 {
11301 #ifdef CONFIG_TRACER_MAX_TRACE
11302 	struct trace_array *tr;
11303 
11304 	if (!snapshot_at_boot)
11305 		return;
11306 
11307 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11308 		if (!tr->allocated_snapshot)
11309 			continue;
11310 
11311 		tracing_snapshot_instance(tr);
11312 		trace_array_puts(tr, "** Boot snapshot taken **\n");
11313 	}
11314 #endif
11315 }
11316 
early_trace_init(void)11317 void __init early_trace_init(void)
11318 {
11319 	if (tracepoint_printk) {
11320 		tracepoint_print_iter =
11321 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11322 		if (MEM_FAIL(!tracepoint_print_iter,
11323 			     "Failed to allocate trace iterator\n"))
11324 			tracepoint_printk = 0;
11325 		else
11326 			static_key_enable(&tracepoint_printk_key.key);
11327 	}
11328 	tracer_alloc_buffers();
11329 
11330 	init_events();
11331 }
11332 
trace_init(void)11333 void __init trace_init(void)
11334 {
11335 	trace_event_init();
11336 
11337 	if (boot_instance_index)
11338 		enable_instances();
11339 }
11340 
clear_boot_tracer(void)11341 __init static void clear_boot_tracer(void)
11342 {
11343 	/*
11344 	 * The default tracer at boot buffer is an init section.
11345 	 * This function is called in lateinit. If we did not
11346 	 * find the boot tracer, then clear it out, to prevent
11347 	 * later registration from accessing the buffer that is
11348 	 * about to be freed.
11349 	 */
11350 	if (!default_bootup_tracer)
11351 		return;
11352 
11353 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11354 	       default_bootup_tracer);
11355 	default_bootup_tracer = NULL;
11356 }
11357 
11358 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11359 __init static void tracing_set_default_clock(void)
11360 {
11361 	/* sched_clock_stable() is determined in late_initcall */
11362 	if (!trace_boot_clock && !sched_clock_stable()) {
11363 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11364 			pr_warn("Can not set tracing clock due to lockdown\n");
11365 			return;
11366 		}
11367 
11368 		printk(KERN_WARNING
11369 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11370 		       "If you want to keep using the local clock, then add:\n"
11371 		       "  \"trace_clock=local\"\n"
11372 		       "on the kernel command line\n");
11373 		tracing_set_clock(&global_trace, "global");
11374 	}
11375 }
11376 #else
tracing_set_default_clock(void)11377 static inline void tracing_set_default_clock(void) { }
11378 #endif
11379 
late_trace_init(void)11380 __init static int late_trace_init(void)
11381 {
11382 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11383 		static_key_disable(&tracepoint_printk_key.key);
11384 		tracepoint_printk = 0;
11385 	}
11386 
11387 	if (traceoff_after_boot)
11388 		tracing_off();
11389 
11390 	tracing_set_default_clock();
11391 	clear_boot_tracer();
11392 	return 0;
11393 }
11394 
11395 late_initcall_sync(late_trace_init);
11396