xref: /linux/kernel/trace/trace.c (revision fa4820b893843f7ad5e1b5c446a92426c5c946ce)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <linux/sort.h>
54 #include <linux/io.h> /* vmap_page_range() */
55 #include <linux/fs_context.h>
56 
57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
58 
59 #include "trace.h"
60 #include "trace_output.h"
61 
62 #ifdef CONFIG_FTRACE_STARTUP_TEST
63 /*
64  * We need to change this state when a selftest is running.
65  * A selftest will lurk into the ring-buffer to count the
66  * entries inserted during the selftest although some concurrent
67  * insertions into the ring-buffer such as trace_printk could occurred
68  * at the same time, giving false positive or negative results.
69  */
70 bool __read_mostly tracing_selftest_running;
71 
72 /*
73  * If boot-time tracing including tracers/events via kernel cmdline
74  * is running, we do not want to run SELFTEST.
75  */
76 bool __read_mostly tracing_selftest_disabled;
77 
78 void __init disable_tracing_selftest(const char *reason)
79 {
80 	if (!tracing_selftest_disabled) {
81 		tracing_selftest_disabled = true;
82 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 	}
84 }
85 #else
86 #define tracing_selftest_disabled	0
87 #endif
88 
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95 
96 /* Store tracers and their flags per instance */
97 struct tracers {
98 	struct list_head	list;
99 	struct tracer		*tracer;
100 	struct tracer_flags	*flags;
101 };
102 
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 DEFINE_PER_CPU(bool, trace_taskinfo_save);
109 
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 int tracing_disabled = 1;
117 
118 cpumask_var_t __read_mostly	tracing_buffer_mask;
119 
120 #define MAX_TRACER_SIZE		100
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputting it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  * Set instance name if you want to dump the specific trace instance
136  * Multiple instance dump is also supported, and instances are separated
137  * by commas.
138  */
139 /* Set to string format zero to disable by default */
140 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
141 
142 /* When set, tracing will stop when a WARN*() is hit */
143 static int __disable_trace_on_warning;
144 
145 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
146 			     void *buffer, size_t *lenp, loff_t *ppos);
147 static const struct ctl_table trace_sysctl_table[] = {
148 	{
149 		.procname	= "ftrace_dump_on_oops",
150 		.data		= &ftrace_dump_on_oops,
151 		.maxlen		= MAX_TRACER_SIZE,
152 		.mode		= 0644,
153 		.proc_handler	= proc_dostring,
154 	},
155 	{
156 		.procname	= "traceoff_on_warning",
157 		.data		= &__disable_trace_on_warning,
158 		.maxlen		= sizeof(__disable_trace_on_warning),
159 		.mode		= 0644,
160 		.proc_handler	= proc_dointvec,
161 	},
162 	{
163 		.procname	= "tracepoint_printk",
164 		.data		= &tracepoint_printk,
165 		.maxlen		= sizeof(tracepoint_printk),
166 		.mode		= 0644,
167 		.proc_handler	= tracepoint_printk_sysctl,
168 	},
169 };
170 
171 static int __init init_trace_sysctls(void)
172 {
173 	register_sysctl_init("kernel", trace_sysctl_table);
174 	return 0;
175 }
176 subsys_initcall(init_trace_sysctls);
177 
178 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
179 /* Map of enums to their values, for "eval_map" file */
180 struct trace_eval_map_head {
181 	struct module			*mod;
182 	unsigned long			length;
183 };
184 
185 union trace_eval_map_item;
186 
187 struct trace_eval_map_tail {
188 	/*
189 	 * "end" is first and points to NULL as it must be different
190 	 * than "mod" or "eval_string"
191 	 */
192 	union trace_eval_map_item	*next;
193 	const char			*end;	/* points to NULL */
194 };
195 
196 static DEFINE_MUTEX(trace_eval_mutex);
197 
198 /*
199  * The trace_eval_maps are saved in an array with two extra elements,
200  * one at the beginning, and one at the end. The beginning item contains
201  * the count of the saved maps (head.length), and the module they
202  * belong to if not built in (head.mod). The ending item contains a
203  * pointer to the next array of saved eval_map items.
204  */
205 union trace_eval_map_item {
206 	struct trace_eval_map		map;
207 	struct trace_eval_map_head	head;
208 	struct trace_eval_map_tail	tail;
209 };
210 
211 static union trace_eval_map_item *trace_eval_maps;
212 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
213 
214 int tracing_set_tracer(struct trace_array *tr, const char *buf);
215 static void ftrace_trace_userstack(struct trace_array *tr,
216 				   struct trace_buffer *buffer,
217 				   unsigned int trace_ctx);
218 
219 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
220 static char *default_bootup_tracer;
221 
222 static bool allocate_snapshot;
223 static bool snapshot_at_boot;
224 
225 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
226 static int boot_instance_index;
227 
228 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
229 static int boot_snapshot_index;
230 
231 static int __init set_cmdline_ftrace(char *str)
232 {
233 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
234 	default_bootup_tracer = bootup_tracer_buf;
235 	/* We are using ftrace early, expand it */
236 	trace_set_ring_buffer_expanded(NULL);
237 	return 1;
238 }
239 __setup("ftrace=", set_cmdline_ftrace);
240 
241 int ftrace_dump_on_oops_enabled(void)
242 {
243 	if (!strcmp("0", ftrace_dump_on_oops))
244 		return 0;
245 	else
246 		return 1;
247 }
248 
249 static int __init set_ftrace_dump_on_oops(char *str)
250 {
251 	if (!*str) {
252 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
253 		return 1;
254 	}
255 
256 	if (*str == ',') {
257 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
258 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
259 		return 1;
260 	}
261 
262 	if (*str++ == '=') {
263 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
264 		return 1;
265 	}
266 
267 	return 0;
268 }
269 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
270 
271 static int __init stop_trace_on_warning(char *str)
272 {
273 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
274 		__disable_trace_on_warning = 1;
275 	return 1;
276 }
277 __setup("traceoff_on_warning", stop_trace_on_warning);
278 
279 static int __init boot_alloc_snapshot(char *str)
280 {
281 	char *slot = boot_snapshot_info + boot_snapshot_index;
282 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
283 	int ret;
284 
285 	if (str[0] == '=') {
286 		str++;
287 		if (strlen(str) >= left)
288 			return -1;
289 
290 		ret = snprintf(slot, left, "%s\t", str);
291 		boot_snapshot_index += ret;
292 	} else {
293 		allocate_snapshot = true;
294 		/* We also need the main ring buffer expanded */
295 		trace_set_ring_buffer_expanded(NULL);
296 	}
297 	return 1;
298 }
299 __setup("alloc_snapshot", boot_alloc_snapshot);
300 
301 
302 static int __init boot_snapshot(char *str)
303 {
304 	snapshot_at_boot = true;
305 	boot_alloc_snapshot(str);
306 	return 1;
307 }
308 __setup("ftrace_boot_snapshot", boot_snapshot);
309 
310 
311 static int __init boot_instance(char *str)
312 {
313 	char *slot = boot_instance_info + boot_instance_index;
314 	int left = sizeof(boot_instance_info) - boot_instance_index;
315 	int ret;
316 
317 	if (strlen(str) >= left)
318 		return -1;
319 
320 	ret = snprintf(slot, left, "%s\t", str);
321 	boot_instance_index += ret;
322 
323 	return 1;
324 }
325 __setup("trace_instance=", boot_instance);
326 
327 
328 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
329 
330 static int __init set_trace_boot_options(char *str)
331 {
332 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
333 	return 1;
334 }
335 __setup("trace_options=", set_trace_boot_options);
336 
337 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
338 static char *trace_boot_clock __initdata;
339 
340 static int __init set_trace_boot_clock(char *str)
341 {
342 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
343 	trace_boot_clock = trace_boot_clock_buf;
344 	return 1;
345 }
346 __setup("trace_clock=", set_trace_boot_clock);
347 
348 static int __init set_tracepoint_printk(char *str)
349 {
350 	/* Ignore the "tp_printk_stop_on_boot" param */
351 	if (*str == '_')
352 		return 0;
353 
354 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
355 		tracepoint_printk = 1;
356 	return 1;
357 }
358 __setup("tp_printk", set_tracepoint_printk);
359 
360 static int __init set_tracepoint_printk_stop(char *str)
361 {
362 	tracepoint_printk_stop_on_boot = true;
363 	return 1;
364 }
365 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
366 
367 static int __init set_traceoff_after_boot(char *str)
368 {
369 	traceoff_after_boot = true;
370 	return 1;
371 }
372 __setup("traceoff_after_boot", set_traceoff_after_boot);
373 
374 unsigned long long ns2usecs(u64 nsec)
375 {
376 	nsec += 500;
377 	do_div(nsec, 1000);
378 	return nsec;
379 }
380 
381 static void
382 trace_process_export(struct trace_export *export,
383 	       struct ring_buffer_event *event, int flag)
384 {
385 	struct trace_entry *entry;
386 	unsigned int size = 0;
387 
388 	if (export->flags & flag) {
389 		entry = ring_buffer_event_data(event);
390 		size = ring_buffer_event_length(event);
391 		export->write(export, entry, size);
392 	}
393 }
394 
395 static DEFINE_MUTEX(ftrace_export_lock);
396 
397 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
398 
399 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
400 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
401 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
402 
403 static inline void ftrace_exports_enable(struct trace_export *export)
404 {
405 	if (export->flags & TRACE_EXPORT_FUNCTION)
406 		static_branch_inc(&trace_function_exports_enabled);
407 
408 	if (export->flags & TRACE_EXPORT_EVENT)
409 		static_branch_inc(&trace_event_exports_enabled);
410 
411 	if (export->flags & TRACE_EXPORT_MARKER)
412 		static_branch_inc(&trace_marker_exports_enabled);
413 }
414 
415 static inline void ftrace_exports_disable(struct trace_export *export)
416 {
417 	if (export->flags & TRACE_EXPORT_FUNCTION)
418 		static_branch_dec(&trace_function_exports_enabled);
419 
420 	if (export->flags & TRACE_EXPORT_EVENT)
421 		static_branch_dec(&trace_event_exports_enabled);
422 
423 	if (export->flags & TRACE_EXPORT_MARKER)
424 		static_branch_dec(&trace_marker_exports_enabled);
425 }
426 
427 static void ftrace_exports(struct ring_buffer_event *event, int flag)
428 {
429 	struct trace_export *export;
430 
431 	guard(preempt_notrace)();
432 
433 	export = rcu_dereference_raw_check(ftrace_exports_list);
434 	while (export) {
435 		trace_process_export(export, event, flag);
436 		export = rcu_dereference_raw_check(export->next);
437 	}
438 }
439 
440 static inline void
441 add_trace_export(struct trace_export **list, struct trace_export *export)
442 {
443 	rcu_assign_pointer(export->next, *list);
444 	/*
445 	 * We are entering export into the list but another
446 	 * CPU might be walking that list. We need to make sure
447 	 * the export->next pointer is valid before another CPU sees
448 	 * the export pointer included into the list.
449 	 */
450 	rcu_assign_pointer(*list, export);
451 }
452 
453 static inline int
454 rm_trace_export(struct trace_export **list, struct trace_export *export)
455 {
456 	struct trace_export **p;
457 
458 	for (p = list; *p != NULL; p = &(*p)->next)
459 		if (*p == export)
460 			break;
461 
462 	if (*p != export)
463 		return -1;
464 
465 	rcu_assign_pointer(*p, (*p)->next);
466 
467 	return 0;
468 }
469 
470 static inline void
471 add_ftrace_export(struct trace_export **list, struct trace_export *export)
472 {
473 	ftrace_exports_enable(export);
474 
475 	add_trace_export(list, export);
476 }
477 
478 static inline int
479 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
480 {
481 	int ret;
482 
483 	ret = rm_trace_export(list, export);
484 	ftrace_exports_disable(export);
485 
486 	return ret;
487 }
488 
489 int register_ftrace_export(struct trace_export *export)
490 {
491 	if (WARN_ON_ONCE(!export->write))
492 		return -1;
493 
494 	guard(mutex)(&ftrace_export_lock);
495 
496 	add_ftrace_export(&ftrace_exports_list, export);
497 
498 	return 0;
499 }
500 EXPORT_SYMBOL_GPL(register_ftrace_export);
501 
502 int unregister_ftrace_export(struct trace_export *export)
503 {
504 	guard(mutex)(&ftrace_export_lock);
505 	return rm_ftrace_export(&ftrace_exports_list, export);
506 }
507 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
508 
509 /* trace_flags holds trace_options default values */
510 #define TRACE_DEFAULT_FLAGS						\
511 	(FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS |		\
512 	 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) |		\
513 	 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) |		\
514 	 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) |		\
515 	 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) |			\
516 	 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) |		\
517 	 TRACE_ITER(COPY_MARKER))
518 
519 /* trace_options that are only supported by global_trace */
520 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) |			\
521 	       TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) |	\
522 	       TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
523 
524 /* trace_flags that are default zero for instances */
525 #define ZEROED_TRACE_FLAGS \
526 	(TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
527 	 TRACE_ITER(COPY_MARKER))
528 
529 /*
530  * The global_trace is the descriptor that holds the top-level tracing
531  * buffers for the live tracing.
532  */
533 static struct trace_array global_trace = {
534 	.trace_flags = TRACE_DEFAULT_FLAGS,
535 };
536 
537 struct trace_array *printk_trace = &global_trace;
538 
539 /* List of trace_arrays interested in the top level trace_marker */
540 static LIST_HEAD(marker_copies);
541 
542 static void update_printk_trace(struct trace_array *tr)
543 {
544 	if (printk_trace == tr)
545 		return;
546 
547 	printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
548 	printk_trace = tr;
549 	tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
550 }
551 
552 /* Returns true if the status of tr changed */
553 static bool update_marker_trace(struct trace_array *tr, int enabled)
554 {
555 	lockdep_assert_held(&event_mutex);
556 
557 	if (enabled) {
558 		if (!list_empty(&tr->marker_list))
559 			return false;
560 
561 		list_add_rcu(&tr->marker_list, &marker_copies);
562 		tr->trace_flags |= TRACE_ITER(COPY_MARKER);
563 		return true;
564 	}
565 
566 	if (list_empty(&tr->marker_list))
567 		return false;
568 
569 	list_del_init(&tr->marker_list);
570 	tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
571 	return true;
572 }
573 
574 void trace_set_ring_buffer_expanded(struct trace_array *tr)
575 {
576 	if (!tr)
577 		tr = &global_trace;
578 	tr->ring_buffer_expanded = true;
579 }
580 
581 LIST_HEAD(ftrace_trace_arrays);
582 
583 int trace_array_get(struct trace_array *this_tr)
584 {
585 	struct trace_array *tr;
586 
587 	guard(mutex)(&trace_types_lock);
588 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
589 		if (tr == this_tr) {
590 			tr->ref++;
591 			return 0;
592 		}
593 	}
594 
595 	return -ENODEV;
596 }
597 
598 static void __trace_array_put(struct trace_array *this_tr)
599 {
600 	WARN_ON(!this_tr->ref);
601 	this_tr->ref--;
602 }
603 
604 /**
605  * trace_array_put - Decrement the reference counter for this trace array.
606  * @this_tr : pointer to the trace array
607  *
608  * NOTE: Use this when we no longer need the trace array returned by
609  * trace_array_get_by_name(). This ensures the trace array can be later
610  * destroyed.
611  *
612  */
613 void trace_array_put(struct trace_array *this_tr)
614 {
615 	if (!this_tr)
616 		return;
617 
618 	guard(mutex)(&trace_types_lock);
619 	__trace_array_put(this_tr);
620 }
621 EXPORT_SYMBOL_GPL(trace_array_put);
622 
623 int tracing_check_open_get_tr(struct trace_array *tr)
624 {
625 	int ret;
626 
627 	ret = security_locked_down(LOCKDOWN_TRACEFS);
628 	if (ret)
629 		return ret;
630 
631 	if (tracing_disabled)
632 		return -ENODEV;
633 
634 	if (tr && trace_array_get(tr) < 0)
635 		return -ENODEV;
636 
637 	return 0;
638 }
639 
640 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
641 {
642 	u64 ts;
643 
644 	/* Early boot up does not have a buffer yet */
645 	if (!buf->buffer)
646 		return trace_clock_local();
647 
648 	ts = ring_buffer_time_stamp(buf->buffer);
649 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
650 
651 	return ts;
652 }
653 
654 u64 ftrace_now(int cpu)
655 {
656 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
657 }
658 
659 /**
660  * tracing_is_enabled - Show if global_trace has been enabled
661  *
662  * Shows if the global trace has been enabled or not. It uses the
663  * mirror flag "buffer_disabled" to be used in fast paths such as for
664  * the irqsoff tracer. But it may be inaccurate due to races. If you
665  * need to know the accurate state, use tracing_is_on() which is a little
666  * slower, but accurate.
667  */
668 int tracing_is_enabled(void)
669 {
670 	/*
671 	 * For quick access (irqsoff uses this in fast path), just
672 	 * return the mirror variable of the state of the ring buffer.
673 	 * It's a little racy, but we don't really care.
674 	 */
675 	return !global_trace.buffer_disabled;
676 }
677 
678 /*
679  * trace_buf_size is the size in bytes that is allocated
680  * for a buffer. Note, the number of bytes is always rounded
681  * to page size.
682  *
683  * This number is purposely set to a low number of 16384.
684  * If the dump on oops happens, it will be much appreciated
685  * to not have to wait for all that output. Anyway this can be
686  * boot time and run time configurable.
687  */
688 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
689 
690 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
691 
692 /* trace_types holds a link list of available tracers. */
693 static struct tracer		*trace_types __read_mostly;
694 
695 /*
696  * trace_types_lock is used to protect the trace_types list.
697  */
698 DEFINE_MUTEX(trace_types_lock);
699 
700 /*
701  * serialize the access of the ring buffer
702  *
703  * ring buffer serializes readers, but it is low level protection.
704  * The validity of the events (which returns by ring_buffer_peek() ..etc)
705  * are not protected by ring buffer.
706  *
707  * The content of events may become garbage if we allow other process consumes
708  * these events concurrently:
709  *   A) the page of the consumed events may become a normal page
710  *      (not reader page) in ring buffer, and this page will be rewritten
711  *      by events producer.
712  *   B) The page of the consumed events may become a page for splice_read,
713  *      and this page will be returned to system.
714  *
715  * These primitives allow multi process access to different cpu ring buffer
716  * concurrently.
717  *
718  * These primitives don't distinguish read-only and read-consume access.
719  * Multi read-only access are also serialized.
720  */
721 
722 #ifdef CONFIG_SMP
723 static DECLARE_RWSEM(all_cpu_access_lock);
724 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
725 
726 static inline void trace_access_lock(int cpu)
727 {
728 	if (cpu == RING_BUFFER_ALL_CPUS) {
729 		/* gain it for accessing the whole ring buffer. */
730 		down_write(&all_cpu_access_lock);
731 	} else {
732 		/* gain it for accessing a cpu ring buffer. */
733 
734 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
735 		down_read(&all_cpu_access_lock);
736 
737 		/* Secondly block other access to this @cpu ring buffer. */
738 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
739 	}
740 }
741 
742 static inline void trace_access_unlock(int cpu)
743 {
744 	if (cpu == RING_BUFFER_ALL_CPUS) {
745 		up_write(&all_cpu_access_lock);
746 	} else {
747 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
748 		up_read(&all_cpu_access_lock);
749 	}
750 }
751 
752 static inline void trace_access_lock_init(void)
753 {
754 	int cpu;
755 
756 	for_each_possible_cpu(cpu)
757 		mutex_init(&per_cpu(cpu_access_lock, cpu));
758 }
759 
760 #else
761 
762 static DEFINE_MUTEX(access_lock);
763 
764 static inline void trace_access_lock(int cpu)
765 {
766 	(void)cpu;
767 	mutex_lock(&access_lock);
768 }
769 
770 static inline void trace_access_unlock(int cpu)
771 {
772 	(void)cpu;
773 	mutex_unlock(&access_lock);
774 }
775 
776 static inline void trace_access_lock_init(void)
777 {
778 }
779 
780 #endif
781 
782 void tracer_tracing_on(struct trace_array *tr)
783 {
784 	if (tr->array_buffer.buffer)
785 		ring_buffer_record_on(tr->array_buffer.buffer);
786 	/*
787 	 * This flag is looked at when buffers haven't been allocated
788 	 * yet, or by some tracers (like irqsoff), that just want to
789 	 * know if the ring buffer has been disabled, but it can handle
790 	 * races of where it gets disabled but we still do a record.
791 	 * As the check is in the fast path of the tracers, it is more
792 	 * important to be fast than accurate.
793 	 */
794 	tr->buffer_disabled = 0;
795 }
796 
797 /**
798  * tracing_on - enable tracing buffers
799  *
800  * This function enables tracing buffers that may have been
801  * disabled with tracing_off.
802  */
803 void tracing_on(void)
804 {
805 	tracer_tracing_on(&global_trace);
806 }
807 EXPORT_SYMBOL_GPL(tracing_on);
808 
809 #ifdef CONFIG_TRACER_SNAPSHOT
810 static void tracing_snapshot_instance_cond(struct trace_array *tr,
811 					   void *cond_data)
812 {
813 	unsigned long flags;
814 
815 	if (in_nmi()) {
816 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
817 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
818 		return;
819 	}
820 
821 	if (!tr->allocated_snapshot) {
822 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
823 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
824 		tracer_tracing_off(tr);
825 		return;
826 	}
827 
828 	if (tr->mapped) {
829 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
830 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
831 		return;
832 	}
833 
834 	/* Note, snapshot can not be used when the tracer uses it */
835 	if (tracer_uses_snapshot(tr->current_trace)) {
836 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
837 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
838 		return;
839 	}
840 
841 	local_irq_save(flags);
842 	update_max_tr(tr, current, smp_processor_id(), cond_data);
843 	local_irq_restore(flags);
844 }
845 
846 void tracing_snapshot_instance(struct trace_array *tr)
847 {
848 	tracing_snapshot_instance_cond(tr, NULL);
849 }
850 
851 /**
852  * tracing_snapshot - take a snapshot of the current buffer.
853  *
854  * This causes a swap between the snapshot buffer and the current live
855  * tracing buffer. You can use this to take snapshots of the live
856  * trace when some condition is triggered, but continue to trace.
857  *
858  * Note, make sure to allocate the snapshot with either
859  * a tracing_snapshot_alloc(), or by doing it manually
860  * with: echo 1 > /sys/kernel/tracing/snapshot
861  *
862  * If the snapshot buffer is not allocated, it will stop tracing.
863  * Basically making a permanent snapshot.
864  */
865 void tracing_snapshot(void)
866 {
867 	struct trace_array *tr = &global_trace;
868 
869 	tracing_snapshot_instance(tr);
870 }
871 EXPORT_SYMBOL_GPL(tracing_snapshot);
872 
873 /**
874  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
875  * @tr:		The tracing instance to snapshot
876  * @cond_data:	The data to be tested conditionally, and possibly saved
877  *
878  * This is the same as tracing_snapshot() except that the snapshot is
879  * conditional - the snapshot will only happen if the
880  * cond_snapshot.update() implementation receiving the cond_data
881  * returns true, which means that the trace array's cond_snapshot
882  * update() operation used the cond_data to determine whether the
883  * snapshot should be taken, and if it was, presumably saved it along
884  * with the snapshot.
885  */
886 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
887 {
888 	tracing_snapshot_instance_cond(tr, cond_data);
889 }
890 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
891 
892 /**
893  * tracing_cond_snapshot_data - get the user data associated with a snapshot
894  * @tr:		The tracing instance
895  *
896  * When the user enables a conditional snapshot using
897  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
898  * with the snapshot.  This accessor is used to retrieve it.
899  *
900  * Should not be called from cond_snapshot.update(), since it takes
901  * the tr->max_lock lock, which the code calling
902  * cond_snapshot.update() has already done.
903  *
904  * Returns the cond_data associated with the trace array's snapshot.
905  */
906 void *tracing_cond_snapshot_data(struct trace_array *tr)
907 {
908 	void *cond_data = NULL;
909 
910 	local_irq_disable();
911 	arch_spin_lock(&tr->max_lock);
912 
913 	if (tr->cond_snapshot)
914 		cond_data = tr->cond_snapshot->cond_data;
915 
916 	arch_spin_unlock(&tr->max_lock);
917 	local_irq_enable();
918 
919 	return cond_data;
920 }
921 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
922 
923 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
924 					struct array_buffer *size_buf, int cpu_id);
925 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
926 
927 int tracing_alloc_snapshot_instance(struct trace_array *tr)
928 {
929 	int order;
930 	int ret;
931 
932 	if (!tr->allocated_snapshot) {
933 
934 		/* Make the snapshot buffer have the same order as main buffer */
935 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
936 		ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
937 		if (ret < 0)
938 			return ret;
939 
940 		/* allocate spare buffer */
941 		ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
942 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
943 		if (ret < 0)
944 			return ret;
945 
946 		tr->allocated_snapshot = true;
947 	}
948 
949 	return 0;
950 }
951 
952 static void free_snapshot(struct trace_array *tr)
953 {
954 	/*
955 	 * We don't free the ring buffer. instead, resize it because
956 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
957 	 * we want preserve it.
958 	 */
959 	ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0);
960 	ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
961 	set_buffer_entries(&tr->snapshot_buffer, 1);
962 	tracing_reset_online_cpus(&tr->snapshot_buffer);
963 	tr->allocated_snapshot = false;
964 }
965 
966 static int tracing_arm_snapshot_locked(struct trace_array *tr)
967 {
968 	int ret;
969 
970 	lockdep_assert_held(&trace_types_lock);
971 
972 	spin_lock(&tr->snapshot_trigger_lock);
973 	if (tr->snapshot == UINT_MAX || tr->mapped) {
974 		spin_unlock(&tr->snapshot_trigger_lock);
975 		return -EBUSY;
976 	}
977 
978 	tr->snapshot++;
979 	spin_unlock(&tr->snapshot_trigger_lock);
980 
981 	ret = tracing_alloc_snapshot_instance(tr);
982 	if (ret) {
983 		spin_lock(&tr->snapshot_trigger_lock);
984 		tr->snapshot--;
985 		spin_unlock(&tr->snapshot_trigger_lock);
986 	}
987 
988 	return ret;
989 }
990 
991 int tracing_arm_snapshot(struct trace_array *tr)
992 {
993 	guard(mutex)(&trace_types_lock);
994 	return tracing_arm_snapshot_locked(tr);
995 }
996 
997 void tracing_disarm_snapshot(struct trace_array *tr)
998 {
999 	spin_lock(&tr->snapshot_trigger_lock);
1000 	if (!WARN_ON(!tr->snapshot))
1001 		tr->snapshot--;
1002 	spin_unlock(&tr->snapshot_trigger_lock);
1003 }
1004 
1005 /**
1006  * tracing_alloc_snapshot - allocate snapshot buffer.
1007  *
1008  * This only allocates the snapshot buffer if it isn't already
1009  * allocated - it doesn't also take a snapshot.
1010  *
1011  * This is meant to be used in cases where the snapshot buffer needs
1012  * to be set up for events that can't sleep but need to be able to
1013  * trigger a snapshot.
1014  */
1015 int tracing_alloc_snapshot(void)
1016 {
1017 	struct trace_array *tr = &global_trace;
1018 	int ret;
1019 
1020 	ret = tracing_alloc_snapshot_instance(tr);
1021 	WARN_ON(ret < 0);
1022 
1023 	return ret;
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1026 
1027 /**
1028  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1029  *
1030  * This is similar to tracing_snapshot(), but it will allocate the
1031  * snapshot buffer if it isn't already allocated. Use this only
1032  * where it is safe to sleep, as the allocation may sleep.
1033  *
1034  * This causes a swap between the snapshot buffer and the current live
1035  * tracing buffer. You can use this to take snapshots of the live
1036  * trace when some condition is triggered, but continue to trace.
1037  */
1038 void tracing_snapshot_alloc(void)
1039 {
1040 	int ret;
1041 
1042 	ret = tracing_alloc_snapshot();
1043 	if (ret < 0)
1044 		return;
1045 
1046 	tracing_snapshot();
1047 }
1048 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1049 
1050 /**
1051  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1052  * @tr:		The tracing instance
1053  * @cond_data:	User data to associate with the snapshot
1054  * @update:	Implementation of the cond_snapshot update function
1055  *
1056  * Check whether the conditional snapshot for the given instance has
1057  * already been enabled, or if the current tracer is already using a
1058  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1059  * save the cond_data and update function inside.
1060  *
1061  * Returns 0 if successful, error otherwise.
1062  */
1063 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1064 				 cond_update_fn_t update)
1065 {
1066 	struct cond_snapshot *cond_snapshot __free(kfree) =
1067 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1068 	int ret;
1069 
1070 	if (!cond_snapshot)
1071 		return -ENOMEM;
1072 
1073 	cond_snapshot->cond_data = cond_data;
1074 	cond_snapshot->update = update;
1075 
1076 	guard(mutex)(&trace_types_lock);
1077 
1078 	if (tracer_uses_snapshot(tr->current_trace))
1079 		return -EBUSY;
1080 
1081 	/*
1082 	 * The cond_snapshot can only change to NULL without the
1083 	 * trace_types_lock. We don't care if we race with it going
1084 	 * to NULL, but we want to make sure that it's not set to
1085 	 * something other than NULL when we get here, which we can
1086 	 * do safely with only holding the trace_types_lock and not
1087 	 * having to take the max_lock.
1088 	 */
1089 	if (tr->cond_snapshot)
1090 		return -EBUSY;
1091 
1092 	ret = tracing_arm_snapshot_locked(tr);
1093 	if (ret)
1094 		return ret;
1095 
1096 	local_irq_disable();
1097 	arch_spin_lock(&tr->max_lock);
1098 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1099 	arch_spin_unlock(&tr->max_lock);
1100 	local_irq_enable();
1101 
1102 	return 0;
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1105 
1106 /**
1107  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1108  * @tr:		The tracing instance
1109  *
1110  * Check whether the conditional snapshot for the given instance is
1111  * enabled; if so, free the cond_snapshot associated with it,
1112  * otherwise return -EINVAL.
1113  *
1114  * Returns 0 if successful, error otherwise.
1115  */
1116 int tracing_snapshot_cond_disable(struct trace_array *tr)
1117 {
1118 	int ret = 0;
1119 
1120 	local_irq_disable();
1121 	arch_spin_lock(&tr->max_lock);
1122 
1123 	if (!tr->cond_snapshot)
1124 		ret = -EINVAL;
1125 	else {
1126 		kfree(tr->cond_snapshot);
1127 		tr->cond_snapshot = NULL;
1128 	}
1129 
1130 	arch_spin_unlock(&tr->max_lock);
1131 	local_irq_enable();
1132 
1133 	tracing_disarm_snapshot(tr);
1134 
1135 	return ret;
1136 }
1137 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1138 #else
1139 void tracing_snapshot(void)
1140 {
1141 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1142 }
1143 EXPORT_SYMBOL_GPL(tracing_snapshot);
1144 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1145 {
1146 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1147 }
1148 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1149 int tracing_alloc_snapshot(void)
1150 {
1151 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1152 	return -ENODEV;
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1155 void tracing_snapshot_alloc(void)
1156 {
1157 	/* Give warning */
1158 	tracing_snapshot();
1159 }
1160 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1161 void *tracing_cond_snapshot_data(struct trace_array *tr)
1162 {
1163 	return NULL;
1164 }
1165 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1166 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1167 {
1168 	return -ENODEV;
1169 }
1170 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1171 int tracing_snapshot_cond_disable(struct trace_array *tr)
1172 {
1173 	return false;
1174 }
1175 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1176 #define free_snapshot(tr)	do { } while (0)
1177 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1178 #endif /* CONFIG_TRACER_SNAPSHOT */
1179 
1180 void tracer_tracing_off(struct trace_array *tr)
1181 {
1182 	if (tr->array_buffer.buffer)
1183 		ring_buffer_record_off(tr->array_buffer.buffer);
1184 	/*
1185 	 * This flag is looked at when buffers haven't been allocated
1186 	 * yet, or by some tracers (like irqsoff), that just want to
1187 	 * know if the ring buffer has been disabled, but it can handle
1188 	 * races of where it gets disabled but we still do a record.
1189 	 * As the check is in the fast path of the tracers, it is more
1190 	 * important to be fast than accurate.
1191 	 */
1192 	tr->buffer_disabled = 1;
1193 }
1194 
1195 /**
1196  * tracer_tracing_disable() - temporary disable the buffer from write
1197  * @tr: The trace array to disable its buffer for
1198  *
1199  * Expects trace_tracing_enable() to re-enable tracing.
1200  * The difference between this and tracer_tracing_off() is that this
1201  * is a counter and can nest, whereas, tracer_tracing_off() can
1202  * be called multiple times and a single trace_tracing_on() will
1203  * enable it.
1204  */
1205 void tracer_tracing_disable(struct trace_array *tr)
1206 {
1207 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1208 		return;
1209 
1210 	ring_buffer_record_disable(tr->array_buffer.buffer);
1211 }
1212 
1213 /**
1214  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1215  * @tr: The trace array that had tracer_tracincg_disable() called on it
1216  *
1217  * This is called after tracer_tracing_disable() has been called on @tr,
1218  * when it's safe to re-enable tracing.
1219  */
1220 void tracer_tracing_enable(struct trace_array *tr)
1221 {
1222 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1223 		return;
1224 
1225 	ring_buffer_record_enable(tr->array_buffer.buffer);
1226 }
1227 
1228 /**
1229  * tracing_off - turn off tracing buffers
1230  *
1231  * This function stops the tracing buffers from recording data.
1232  * It does not disable any overhead the tracers themselves may
1233  * be causing. This function simply causes all recording to
1234  * the ring buffers to fail.
1235  */
1236 void tracing_off(void)
1237 {
1238 	tracer_tracing_off(&global_trace);
1239 }
1240 EXPORT_SYMBOL_GPL(tracing_off);
1241 
1242 void disable_trace_on_warning(void)
1243 {
1244 	if (__disable_trace_on_warning) {
1245 		struct trace_array *tr = READ_ONCE(printk_trace);
1246 
1247 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1248 			"Disabling tracing due to warning\n");
1249 		tracing_off();
1250 
1251 		/* Disable trace_printk() buffer too */
1252 		if (tr != &global_trace) {
1253 			trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1254 					       "Disabling tracing due to warning\n");
1255 			tracer_tracing_off(tr);
1256 		}
1257 	}
1258 }
1259 
1260 /**
1261  * tracer_tracing_is_on - show real state of ring buffer enabled
1262  * @tr : the trace array to know if ring buffer is enabled
1263  *
1264  * Shows real state of the ring buffer if it is enabled or not.
1265  */
1266 bool tracer_tracing_is_on(struct trace_array *tr)
1267 {
1268 	if (tr->array_buffer.buffer)
1269 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1270 	return !tr->buffer_disabled;
1271 }
1272 
1273 /**
1274  * tracing_is_on - show state of ring buffers enabled
1275  */
1276 int tracing_is_on(void)
1277 {
1278 	return tracer_tracing_is_on(&global_trace);
1279 }
1280 EXPORT_SYMBOL_GPL(tracing_is_on);
1281 
1282 static int __init set_buf_size(char *str)
1283 {
1284 	unsigned long buf_size;
1285 
1286 	if (!str)
1287 		return 0;
1288 	buf_size = memparse(str, &str);
1289 	/*
1290 	 * nr_entries can not be zero and the startup
1291 	 * tests require some buffer space. Therefore
1292 	 * ensure we have at least 4096 bytes of buffer.
1293 	 */
1294 	trace_buf_size = max(4096UL, buf_size);
1295 	return 1;
1296 }
1297 __setup("trace_buf_size=", set_buf_size);
1298 
1299 static int __init set_tracing_thresh(char *str)
1300 {
1301 	unsigned long threshold;
1302 	int ret;
1303 
1304 	if (!str)
1305 		return 0;
1306 	ret = kstrtoul(str, 0, &threshold);
1307 	if (ret < 0)
1308 		return 0;
1309 	tracing_thresh = threshold * 1000;
1310 	return 1;
1311 }
1312 __setup("tracing_thresh=", set_tracing_thresh);
1313 
1314 unsigned long nsecs_to_usecs(unsigned long nsecs)
1315 {
1316 	return nsecs / 1000;
1317 }
1318 
1319 /*
1320  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1321  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1322  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1323  * of strings in the order that the evals (enum) were defined.
1324  */
1325 #undef C
1326 #define C(a, b) b
1327 
1328 /* These must match the bit positions in trace_iterator_flags */
1329 static const char *trace_options[] = {
1330 	TRACE_FLAGS
1331 	NULL
1332 };
1333 
1334 static struct {
1335 	u64 (*func)(void);
1336 	const char *name;
1337 	int in_ns;		/* is this clock in nanoseconds? */
1338 } trace_clocks[] = {
1339 	{ trace_clock_local,		"local",	1 },
1340 	{ trace_clock_global,		"global",	1 },
1341 	{ trace_clock_counter,		"counter",	0 },
1342 	{ trace_clock_jiffies,		"uptime",	0 },
1343 	{ trace_clock,			"perf",		1 },
1344 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1345 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1346 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1347 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1348 	ARCH_TRACE_CLOCKS
1349 };
1350 
1351 bool trace_clock_in_ns(struct trace_array *tr)
1352 {
1353 	if (trace_clocks[tr->clock_id].in_ns)
1354 		return true;
1355 
1356 	return false;
1357 }
1358 
1359 /*
1360  * trace_parser_get_init - gets the buffer for trace parser
1361  */
1362 int trace_parser_get_init(struct trace_parser *parser, int size)
1363 {
1364 	memset(parser, 0, sizeof(*parser));
1365 
1366 	parser->buffer = kmalloc(size, GFP_KERNEL);
1367 	if (!parser->buffer)
1368 		return 1;
1369 
1370 	parser->size = size;
1371 	return 0;
1372 }
1373 
1374 /*
1375  * trace_parser_put - frees the buffer for trace parser
1376  */
1377 void trace_parser_put(struct trace_parser *parser)
1378 {
1379 	kfree(parser->buffer);
1380 	parser->buffer = NULL;
1381 }
1382 
1383 /*
1384  * trace_get_user - reads the user input string separated by  space
1385  * (matched by isspace(ch))
1386  *
1387  * For each string found the 'struct trace_parser' is updated,
1388  * and the function returns.
1389  *
1390  * Returns number of bytes read.
1391  *
1392  * See kernel/trace/trace.h for 'struct trace_parser' details.
1393  */
1394 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1395 	size_t cnt, loff_t *ppos)
1396 {
1397 	char ch;
1398 	size_t read = 0;
1399 	ssize_t ret;
1400 
1401 	if (!*ppos)
1402 		trace_parser_clear(parser);
1403 
1404 	ret = get_user(ch, ubuf++);
1405 	if (ret)
1406 		goto fail;
1407 
1408 	read++;
1409 	cnt--;
1410 
1411 	/*
1412 	 * The parser is not finished with the last write,
1413 	 * continue reading the user input without skipping spaces.
1414 	 */
1415 	if (!parser->cont) {
1416 		/* skip white space */
1417 		while (cnt && isspace(ch)) {
1418 			ret = get_user(ch, ubuf++);
1419 			if (ret)
1420 				goto fail;
1421 			read++;
1422 			cnt--;
1423 		}
1424 
1425 		parser->idx = 0;
1426 
1427 		/* only spaces were written */
1428 		if (isspace(ch) || !ch) {
1429 			*ppos += read;
1430 			return read;
1431 		}
1432 	}
1433 
1434 	/* read the non-space input */
1435 	while (cnt && !isspace(ch) && ch) {
1436 		if (parser->idx < parser->size - 1)
1437 			parser->buffer[parser->idx++] = ch;
1438 		else {
1439 			ret = -EINVAL;
1440 			goto fail;
1441 		}
1442 
1443 		ret = get_user(ch, ubuf++);
1444 		if (ret)
1445 			goto fail;
1446 		read++;
1447 		cnt--;
1448 	}
1449 
1450 	/* We either got finished input or we have to wait for another call. */
1451 	if (isspace(ch) || !ch) {
1452 		parser->buffer[parser->idx] = 0;
1453 		parser->cont = false;
1454 	} else if (parser->idx < parser->size - 1) {
1455 		parser->cont = true;
1456 		parser->buffer[parser->idx++] = ch;
1457 		/* Make sure the parsed string always terminates with '\0'. */
1458 		parser->buffer[parser->idx] = 0;
1459 	} else {
1460 		ret = -EINVAL;
1461 		goto fail;
1462 	}
1463 
1464 	*ppos += read;
1465 	return read;
1466 fail:
1467 	trace_parser_fail(parser);
1468 	return ret;
1469 }
1470 
1471 /* TODO add a seq_buf_to_buffer() */
1472 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1473 {
1474 	int len;
1475 
1476 	if (trace_seq_used(s) <= s->readpos)
1477 		return -EBUSY;
1478 
1479 	len = trace_seq_used(s) - s->readpos;
1480 	if (cnt > len)
1481 		cnt = len;
1482 	memcpy(buf, s->buffer + s->readpos, cnt);
1483 
1484 	s->readpos += cnt;
1485 	return cnt;
1486 }
1487 
1488 unsigned long __read_mostly	tracing_thresh;
1489 
1490 #ifdef CONFIG_TRACER_MAX_TRACE
1491 #ifdef LATENCY_FS_NOTIFY
1492 static struct workqueue_struct *fsnotify_wq;
1493 
1494 static void latency_fsnotify_workfn(struct work_struct *work)
1495 {
1496 	struct trace_array *tr = container_of(work, struct trace_array,
1497 					      fsnotify_work);
1498 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1499 }
1500 
1501 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1502 {
1503 	struct trace_array *tr = container_of(iwork, struct trace_array,
1504 					      fsnotify_irqwork);
1505 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1506 }
1507 
1508 __init static int latency_fsnotify_init(void)
1509 {
1510 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1511 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1512 	if (!fsnotify_wq) {
1513 		pr_err("Unable to allocate tr_max_lat_wq\n");
1514 		return -ENOMEM;
1515 	}
1516 	return 0;
1517 }
1518 
1519 late_initcall_sync(latency_fsnotify_init);
1520 
1521 void latency_fsnotify(struct trace_array *tr)
1522 {
1523 	if (!fsnotify_wq)
1524 		return;
1525 	/*
1526 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1527 	 * possible that we are called from __schedule() or do_idle(), which
1528 	 * could cause a deadlock.
1529 	 */
1530 	irq_work_queue(&tr->fsnotify_irqwork);
1531 }
1532 #endif /* !LATENCY_FS_NOTIFY */
1533 
1534 static const struct file_operations tracing_max_lat_fops;
1535 
1536 static void trace_create_maxlat_file(struct trace_array *tr,
1537 				     struct dentry *d_tracer)
1538 {
1539 #ifdef LATENCY_FS_NOTIFY
1540 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1541 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1542 #endif
1543 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1544 					      TRACE_MODE_WRITE,
1545 					      d_tracer, tr,
1546 					      &tracing_max_lat_fops);
1547 }
1548 
1549 /*
1550  * Copy the new maximum trace into the separate maximum-trace
1551  * structure. (this way the maximum trace is permanently saved,
1552  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1553  */
1554 static void
1555 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1556 {
1557 	struct array_buffer *trace_buf = &tr->array_buffer;
1558 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1559 	struct array_buffer *max_buf = &tr->snapshot_buffer;
1560 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1561 
1562 	max_buf->cpu = cpu;
1563 	max_buf->time_start = data->preempt_timestamp;
1564 
1565 	max_data->saved_latency = tr->max_latency;
1566 	max_data->critical_start = data->critical_start;
1567 	max_data->critical_end = data->critical_end;
1568 
1569 	strscpy(max_data->comm, tsk->comm);
1570 	max_data->pid = tsk->pid;
1571 	/*
1572 	 * If tsk == current, then use current_uid(), as that does not use
1573 	 * RCU. The irq tracer can be called out of RCU scope.
1574 	 */
1575 	if (tsk == current)
1576 		max_data->uid = current_uid();
1577 	else
1578 		max_data->uid = task_uid(tsk);
1579 
1580 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1581 	max_data->policy = tsk->policy;
1582 	max_data->rt_priority = tsk->rt_priority;
1583 
1584 	/* record this tasks comm */
1585 	tracing_record_cmdline(tsk);
1586 	latency_fsnotify(tr);
1587 }
1588 #else
1589 static inline void trace_create_maxlat_file(struct trace_array *tr,
1590 					    struct dentry *d_tracer) { }
1591 static inline void __update_max_tr(struct trace_array *tr,
1592 				   struct task_struct *tsk, int cpu) { }
1593 #endif /* CONFIG_TRACER_MAX_TRACE */
1594 
1595 #ifdef CONFIG_TRACER_SNAPSHOT
1596 /**
1597  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1598  * @tr: tracer
1599  * @tsk: the task with the latency
1600  * @cpu: The cpu that initiated the trace.
1601  * @cond_data: User data associated with a conditional snapshot
1602  *
1603  * Flip the buffers between the @tr and the max_tr and record information
1604  * about which task was the cause of this latency.
1605  */
1606 void
1607 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1608 	      void *cond_data)
1609 {
1610 	if (tr->stop_count)
1611 		return;
1612 
1613 	WARN_ON_ONCE(!irqs_disabled());
1614 
1615 	if (!tr->allocated_snapshot) {
1616 		/* Only the nop tracer should hit this when disabling */
1617 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1618 		return;
1619 	}
1620 
1621 	arch_spin_lock(&tr->max_lock);
1622 
1623 	/* Inherit the recordable setting from array_buffer */
1624 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1625 		ring_buffer_record_on(tr->snapshot_buffer.buffer);
1626 	else
1627 		ring_buffer_record_off(tr->snapshot_buffer.buffer);
1628 
1629 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1630 		arch_spin_unlock(&tr->max_lock);
1631 		return;
1632 	}
1633 
1634 	swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer);
1635 
1636 	__update_max_tr(tr, tsk, cpu);
1637 
1638 	arch_spin_unlock(&tr->max_lock);
1639 
1640 	/* Any waiters on the old snapshot buffer need to wake up */
1641 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1642 }
1643 
1644 /**
1645  * update_max_tr_single - only copy one trace over, and reset the rest
1646  * @tr: tracer
1647  * @tsk: task with the latency
1648  * @cpu: the cpu of the buffer to copy.
1649  *
1650  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1651  */
1652 void
1653 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1654 {
1655 	int ret;
1656 
1657 	if (tr->stop_count)
1658 		return;
1659 
1660 	WARN_ON_ONCE(!irqs_disabled());
1661 	if (!tr->allocated_snapshot) {
1662 		/* Only the nop tracer should hit this when disabling */
1663 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1664 		return;
1665 	}
1666 
1667 	arch_spin_lock(&tr->max_lock);
1668 
1669 	ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu);
1670 
1671 	if (ret == -EBUSY) {
1672 		/*
1673 		 * We failed to swap the buffer due to a commit taking
1674 		 * place on this CPU. We fail to record, but we reset
1675 		 * the max trace buffer (no one writes directly to it)
1676 		 * and flag that it failed.
1677 		 * Another reason is resize is in progress.
1678 		 */
1679 		trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_,
1680 			"Failed to swap buffers due to commit or resize in progress\n");
1681 	}
1682 
1683 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1684 
1685 	__update_max_tr(tr, tsk, cpu);
1686 	arch_spin_unlock(&tr->max_lock);
1687 }
1688 #endif /* CONFIG_TRACER_SNAPSHOT */
1689 
1690 struct pipe_wait {
1691 	struct trace_iterator		*iter;
1692 	int				wait_index;
1693 };
1694 
1695 static bool wait_pipe_cond(void *data)
1696 {
1697 	struct pipe_wait *pwait = data;
1698 	struct trace_iterator *iter = pwait->iter;
1699 
1700 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
1701 		return true;
1702 
1703 	return iter->closed;
1704 }
1705 
1706 static int wait_on_pipe(struct trace_iterator *iter, int full)
1707 {
1708 	struct pipe_wait pwait;
1709 	int ret;
1710 
1711 	/* Iterators are static, they should be filled or empty */
1712 	if (trace_buffer_iter(iter, iter->cpu_file))
1713 		return 0;
1714 
1715 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
1716 	pwait.iter = iter;
1717 
1718 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
1719 			       wait_pipe_cond, &pwait);
1720 
1721 #ifdef CONFIG_TRACER_SNAPSHOT
1722 	/*
1723 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1724 	 * to happen, this would now be the main buffer.
1725 	 */
1726 	if (iter->snapshot)
1727 		iter->array_buffer = &iter->tr->snapshot_buffer;
1728 #endif
1729 	return ret;
1730 }
1731 
1732 #ifdef CONFIG_FTRACE_STARTUP_TEST
1733 static bool selftests_can_run;
1734 
1735 struct trace_selftests {
1736 	struct list_head		list;
1737 	struct tracer			*type;
1738 };
1739 
1740 static LIST_HEAD(postponed_selftests);
1741 
1742 static int save_selftest(struct tracer *type)
1743 {
1744 	struct trace_selftests *selftest;
1745 
1746 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1747 	if (!selftest)
1748 		return -ENOMEM;
1749 
1750 	selftest->type = type;
1751 	list_add(&selftest->list, &postponed_selftests);
1752 	return 0;
1753 }
1754 
1755 static int run_tracer_selftest(struct tracer *type)
1756 {
1757 	struct trace_array *tr = &global_trace;
1758 	struct tracer_flags *saved_flags = tr->current_trace_flags;
1759 	struct tracer *saved_tracer = tr->current_trace;
1760 	int ret;
1761 
1762 	if (!type->selftest || tracing_selftest_disabled)
1763 		return 0;
1764 
1765 	/*
1766 	 * If a tracer registers early in boot up (before scheduling is
1767 	 * initialized and such), then do not run its selftests yet.
1768 	 * Instead, run it a little later in the boot process.
1769 	 */
1770 	if (!selftests_can_run)
1771 		return save_selftest(type);
1772 
1773 	if (!tracing_is_on()) {
1774 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1775 			type->name);
1776 		return 0;
1777 	}
1778 
1779 	/*
1780 	 * Run a selftest on this tracer.
1781 	 * Here we reset the trace buffer, and set the current
1782 	 * tracer to be this tracer. The tracer can then run some
1783 	 * internal tracing to verify that everything is in order.
1784 	 * If we fail, we do not register this tracer.
1785 	 */
1786 	tracing_reset_online_cpus(&tr->array_buffer);
1787 
1788 	tr->current_trace = type;
1789 	tr->current_trace_flags = type->flags ? : type->default_flags;
1790 
1791 #ifdef CONFIG_TRACER_MAX_TRACE
1792 	if (tracer_uses_snapshot(type)) {
1793 		/* If we expanded the buffers, make sure the max is expanded too */
1794 		if (tr->ring_buffer_expanded)
1795 			ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size,
1796 					   RING_BUFFER_ALL_CPUS);
1797 		tr->allocated_snapshot = true;
1798 	}
1799 #endif
1800 
1801 	/* the test is responsible for initializing and enabling */
1802 	pr_info("Testing tracer %s: ", type->name);
1803 	ret = type->selftest(type, tr);
1804 	/* the test is responsible for resetting too */
1805 	tr->current_trace = saved_tracer;
1806 	tr->current_trace_flags = saved_flags;
1807 	if (ret) {
1808 		printk(KERN_CONT "FAILED!\n");
1809 		/* Add the warning after printing 'FAILED' */
1810 		WARN_ON(1);
1811 		return -1;
1812 	}
1813 	/* Only reset on passing, to avoid touching corrupted buffers */
1814 	tracing_reset_online_cpus(&tr->array_buffer);
1815 
1816 #ifdef CONFIG_TRACER_MAX_TRACE
1817 	if (tracer_uses_snapshot(type)) {
1818 		tr->allocated_snapshot = false;
1819 
1820 		/* Shrink the max buffer again */
1821 		if (tr->ring_buffer_expanded)
1822 			ring_buffer_resize(tr->snapshot_buffer.buffer, 1,
1823 					   RING_BUFFER_ALL_CPUS);
1824 	}
1825 #endif
1826 
1827 	printk(KERN_CONT "PASSED\n");
1828 	return 0;
1829 }
1830 
1831 static int do_run_tracer_selftest(struct tracer *type)
1832 {
1833 	int ret;
1834 
1835 	/*
1836 	 * Tests can take a long time, especially if they are run one after the
1837 	 * other, as does happen during bootup when all the tracers are
1838 	 * registered. This could cause the soft lockup watchdog to trigger.
1839 	 */
1840 	cond_resched();
1841 
1842 	tracing_selftest_running = true;
1843 	ret = run_tracer_selftest(type);
1844 	tracing_selftest_running = false;
1845 
1846 	return ret;
1847 }
1848 
1849 static __init int init_trace_selftests(void)
1850 {
1851 	struct trace_selftests *p, *n;
1852 	struct tracer *t, **last;
1853 	int ret;
1854 
1855 	selftests_can_run = true;
1856 
1857 	guard(mutex)(&trace_types_lock);
1858 
1859 	if (list_empty(&postponed_selftests))
1860 		return 0;
1861 
1862 	pr_info("Running postponed tracer tests:\n");
1863 
1864 	tracing_selftest_running = true;
1865 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1866 		/* This loop can take minutes when sanitizers are enabled, so
1867 		 * lets make sure we allow RCU processing.
1868 		 */
1869 		cond_resched();
1870 		ret = run_tracer_selftest(p->type);
1871 		/* If the test fails, then warn and remove from available_tracers */
1872 		if (ret < 0) {
1873 			WARN(1, "tracer: %s failed selftest, disabling\n",
1874 			     p->type->name);
1875 			last = &trace_types;
1876 			for (t = trace_types; t; t = t->next) {
1877 				if (t == p->type) {
1878 					*last = t->next;
1879 					break;
1880 				}
1881 				last = &t->next;
1882 			}
1883 		}
1884 		list_del(&p->list);
1885 		kfree(p);
1886 	}
1887 	tracing_selftest_running = false;
1888 
1889 	return 0;
1890 }
1891 core_initcall(init_trace_selftests);
1892 #else
1893 static inline int do_run_tracer_selftest(struct tracer *type)
1894 {
1895 	return 0;
1896 }
1897 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1898 
1899 static int add_tracer(struct trace_array *tr, struct tracer *t);
1900 
1901 static void __init apply_trace_boot_options(void);
1902 
1903 static void free_tracers(struct trace_array *tr)
1904 {
1905 	struct tracers *t, *n;
1906 
1907 	lockdep_assert_held(&trace_types_lock);
1908 
1909 	list_for_each_entry_safe(t, n, &tr->tracers, list) {
1910 		list_del(&t->list);
1911 		kfree(t->flags);
1912 		kfree(t);
1913 	}
1914 }
1915 
1916 /**
1917  * register_tracer - register a tracer with the ftrace system.
1918  * @type: the plugin for the tracer
1919  *
1920  * Register a new plugin tracer.
1921  */
1922 int __init register_tracer(struct tracer *type)
1923 {
1924 	struct trace_array *tr;
1925 	struct tracer *t;
1926 	int ret = 0;
1927 
1928 	if (!type->name) {
1929 		pr_info("Tracer must have a name\n");
1930 		return -1;
1931 	}
1932 
1933 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1934 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1935 		return -1;
1936 	}
1937 
1938 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
1939 		pr_warn("Can not register tracer %s due to lockdown\n",
1940 			   type->name);
1941 		return -EPERM;
1942 	}
1943 
1944 	mutex_lock(&trace_types_lock);
1945 
1946 	for (t = trace_types; t; t = t->next) {
1947 		if (strcmp(type->name, t->name) == 0) {
1948 			/* already found */
1949 			pr_info("Tracer %s already registered\n",
1950 				type->name);
1951 			ret = -1;
1952 			goto out;
1953 		}
1954 	}
1955 
1956 	/* store the tracer for __set_tracer_option */
1957 	if (type->flags)
1958 		type->flags->trace = type;
1959 
1960 	ret = do_run_tracer_selftest(type);
1961 	if (ret < 0)
1962 		goto out;
1963 
1964 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1965 		ret = add_tracer(tr, type);
1966 		if (ret < 0) {
1967 			/* The tracer will still exist but without options */
1968 			pr_warn("Failed to create tracer options for %s\n", type->name);
1969 			break;
1970 		}
1971 	}
1972 
1973 	type->next = trace_types;
1974 	trace_types = type;
1975 
1976  out:
1977 	mutex_unlock(&trace_types_lock);
1978 
1979 	if (ret || !default_bootup_tracer)
1980 		return ret;
1981 
1982 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1983 		return 0;
1984 
1985 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1986 	/* Do we want this tracer to start on bootup? */
1987 	WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
1988 	default_bootup_tracer = NULL;
1989 
1990 	apply_trace_boot_options();
1991 
1992 	/* disable other selftests, since this will break it. */
1993 	disable_tracing_selftest("running a tracer");
1994 
1995 	return 0;
1996 }
1997 
1998 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1999 {
2000 	struct trace_buffer *buffer = buf->buffer;
2001 
2002 	if (!buffer)
2003 		return;
2004 
2005 	ring_buffer_record_disable(buffer);
2006 
2007 	/* Make sure all commits have finished */
2008 	synchronize_rcu();
2009 	ring_buffer_reset_cpu(buffer, cpu);
2010 
2011 	ring_buffer_record_enable(buffer);
2012 }
2013 
2014 void tracing_reset_online_cpus(struct array_buffer *buf)
2015 {
2016 	struct trace_buffer *buffer = buf->buffer;
2017 
2018 	if (!buffer)
2019 		return;
2020 
2021 	ring_buffer_record_disable(buffer);
2022 
2023 	/* Make sure all commits have finished */
2024 	synchronize_rcu();
2025 
2026 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2027 
2028 	ring_buffer_reset_online_cpus(buffer);
2029 
2030 	ring_buffer_record_enable(buffer);
2031 }
2032 
2033 static void tracing_reset_all_cpus(struct array_buffer *buf)
2034 {
2035 	struct trace_buffer *buffer = buf->buffer;
2036 
2037 	if (!buffer)
2038 		return;
2039 
2040 	ring_buffer_record_disable(buffer);
2041 
2042 	/* Make sure all commits have finished */
2043 	synchronize_rcu();
2044 
2045 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2046 
2047 	ring_buffer_reset(buffer);
2048 
2049 	ring_buffer_record_enable(buffer);
2050 }
2051 
2052 /* Must have trace_types_lock held */
2053 void tracing_reset_all_online_cpus_unlocked(void)
2054 {
2055 	struct trace_array *tr;
2056 
2057 	lockdep_assert_held(&trace_types_lock);
2058 
2059 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2060 		if (!tr->clear_trace)
2061 			continue;
2062 		tr->clear_trace = false;
2063 		tracing_reset_online_cpus(&tr->array_buffer);
2064 #ifdef CONFIG_TRACER_SNAPSHOT
2065 		tracing_reset_online_cpus(&tr->snapshot_buffer);
2066 #endif
2067 	}
2068 }
2069 
2070 void tracing_reset_all_online_cpus(void)
2071 {
2072 	guard(mutex)(&trace_types_lock);
2073 	tracing_reset_all_online_cpus_unlocked();
2074 }
2075 
2076 int is_tracing_stopped(void)
2077 {
2078 	return global_trace.stop_count;
2079 }
2080 
2081 static void tracing_start_tr(struct trace_array *tr)
2082 {
2083 	struct trace_buffer *buffer;
2084 
2085 	if (tracing_disabled)
2086 		return;
2087 
2088 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2089 	if (--tr->stop_count) {
2090 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2091 			/* Someone screwed up their debugging */
2092 			tr->stop_count = 0;
2093 		}
2094 		return;
2095 	}
2096 
2097 	/* Prevent the buffers from switching */
2098 	arch_spin_lock(&tr->max_lock);
2099 
2100 	buffer = tr->array_buffer.buffer;
2101 	if (buffer)
2102 		ring_buffer_record_enable(buffer);
2103 
2104 #ifdef CONFIG_TRACER_SNAPSHOT
2105 	buffer = tr->snapshot_buffer.buffer;
2106 	if (buffer)
2107 		ring_buffer_record_enable(buffer);
2108 #endif
2109 
2110 	arch_spin_unlock(&tr->max_lock);
2111 }
2112 
2113 /**
2114  * tracing_start - quick start of the tracer
2115  *
2116  * If tracing is enabled but was stopped by tracing_stop,
2117  * this will start the tracer back up.
2118  */
2119 void tracing_start(void)
2120 
2121 {
2122 	return tracing_start_tr(&global_trace);
2123 }
2124 
2125 static void tracing_stop_tr(struct trace_array *tr)
2126 {
2127 	struct trace_buffer *buffer;
2128 
2129 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2130 	if (tr->stop_count++)
2131 		return;
2132 
2133 	/* Prevent the buffers from switching */
2134 	arch_spin_lock(&tr->max_lock);
2135 
2136 	buffer = tr->array_buffer.buffer;
2137 	if (buffer)
2138 		ring_buffer_record_disable(buffer);
2139 
2140 #ifdef CONFIG_TRACER_SNAPSHOT
2141 	buffer = tr->snapshot_buffer.buffer;
2142 	if (buffer)
2143 		ring_buffer_record_disable(buffer);
2144 #endif
2145 
2146 	arch_spin_unlock(&tr->max_lock);
2147 }
2148 
2149 /**
2150  * tracing_stop - quick stop of the tracer
2151  *
2152  * Light weight way to stop tracing. Use in conjunction with
2153  * tracing_start.
2154  */
2155 void tracing_stop(void)
2156 {
2157 	return tracing_stop_tr(&global_trace);
2158 }
2159 
2160 /*
2161  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2162  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2163  * simplifies those functions and keeps them in sync.
2164  */
2165 enum print_line_t trace_handle_return(struct trace_seq *s)
2166 {
2167 	return trace_seq_has_overflowed(s) ?
2168 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2169 }
2170 EXPORT_SYMBOL_GPL(trace_handle_return);
2171 
2172 static unsigned short migration_disable_value(void)
2173 {
2174 #if defined(CONFIG_SMP)
2175 	return current->migration_disabled;
2176 #else
2177 	return 0;
2178 #endif
2179 }
2180 
2181 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2182 {
2183 	unsigned int trace_flags = irqs_status;
2184 	unsigned int pc;
2185 
2186 	pc = preempt_count();
2187 
2188 	if (pc & NMI_MASK)
2189 		trace_flags |= TRACE_FLAG_NMI;
2190 	if (pc & HARDIRQ_MASK)
2191 		trace_flags |= TRACE_FLAG_HARDIRQ;
2192 	if (in_serving_softirq())
2193 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2194 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2195 		trace_flags |= TRACE_FLAG_BH_OFF;
2196 
2197 	if (tif_need_resched())
2198 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2199 	if (test_preempt_need_resched())
2200 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2201 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2202 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2203 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2204 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2205 }
2206 
2207 struct ring_buffer_event *
2208 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2209 			  int type,
2210 			  unsigned long len,
2211 			  unsigned int trace_ctx)
2212 {
2213 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2214 }
2215 
2216 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2217 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2218 static int trace_buffered_event_ref;
2219 
2220 /**
2221  * trace_buffered_event_enable - enable buffering events
2222  *
2223  * When events are being filtered, it is quicker to use a temporary
2224  * buffer to write the event data into if there's a likely chance
2225  * that it will not be committed. The discard of the ring buffer
2226  * is not as fast as committing, and is much slower than copying
2227  * a commit.
2228  *
2229  * When an event is to be filtered, allocate per cpu buffers to
2230  * write the event data into, and if the event is filtered and discarded
2231  * it is simply dropped, otherwise, the entire data is to be committed
2232  * in one shot.
2233  */
2234 void trace_buffered_event_enable(void)
2235 {
2236 	struct ring_buffer_event *event;
2237 	struct page *page;
2238 	int cpu;
2239 
2240 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2241 
2242 	if (trace_buffered_event_ref++)
2243 		return;
2244 
2245 	for_each_tracing_cpu(cpu) {
2246 		page = alloc_pages_node(cpu_to_node(cpu),
2247 					GFP_KERNEL | __GFP_NORETRY, 0);
2248 		/* This is just an optimization and can handle failures */
2249 		if (!page) {
2250 			pr_err("Failed to allocate event buffer\n");
2251 			break;
2252 		}
2253 
2254 		event = page_address(page);
2255 		memset(event, 0, sizeof(*event));
2256 
2257 		per_cpu(trace_buffered_event, cpu) = event;
2258 
2259 		scoped_guard(preempt,) {
2260 			if (cpu == smp_processor_id() &&
2261 			    __this_cpu_read(trace_buffered_event) !=
2262 			    per_cpu(trace_buffered_event, cpu))
2263 				WARN_ON_ONCE(1);
2264 		}
2265 	}
2266 }
2267 
2268 static void enable_trace_buffered_event(void *data)
2269 {
2270 	this_cpu_dec(trace_buffered_event_cnt);
2271 }
2272 
2273 static void disable_trace_buffered_event(void *data)
2274 {
2275 	this_cpu_inc(trace_buffered_event_cnt);
2276 }
2277 
2278 /**
2279  * trace_buffered_event_disable - disable buffering events
2280  *
2281  * When a filter is removed, it is faster to not use the buffered
2282  * events, and to commit directly into the ring buffer. Free up
2283  * the temp buffers when there are no more users. This requires
2284  * special synchronization with current events.
2285  */
2286 void trace_buffered_event_disable(void)
2287 {
2288 	int cpu;
2289 
2290 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2291 
2292 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2293 		return;
2294 
2295 	if (--trace_buffered_event_ref)
2296 		return;
2297 
2298 	/* For each CPU, set the buffer as used. */
2299 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2300 			 NULL, true);
2301 
2302 	/* Wait for all current users to finish */
2303 	synchronize_rcu();
2304 
2305 	for_each_tracing_cpu(cpu) {
2306 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2307 		per_cpu(trace_buffered_event, cpu) = NULL;
2308 	}
2309 
2310 	/*
2311 	 * Wait for all CPUs that potentially started checking if they can use
2312 	 * their event buffer only after the previous synchronize_rcu() call and
2313 	 * they still read a valid pointer from trace_buffered_event. It must be
2314 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2315 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2316 	 */
2317 	synchronize_rcu();
2318 
2319 	/* For each CPU, relinquish the buffer */
2320 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2321 			 true);
2322 }
2323 
2324 static struct trace_buffer *temp_buffer;
2325 
2326 struct ring_buffer_event *
2327 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2328 			  struct trace_event_file *trace_file,
2329 			  int type, unsigned long len,
2330 			  unsigned int trace_ctx)
2331 {
2332 	struct ring_buffer_event *entry;
2333 	struct trace_array *tr = trace_file->tr;
2334 	int val;
2335 
2336 	*current_rb = tr->array_buffer.buffer;
2337 
2338 	if (!tr->no_filter_buffering_ref &&
2339 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2340 		preempt_disable_notrace();
2341 		/*
2342 		 * Filtering is on, so try to use the per cpu buffer first.
2343 		 * This buffer will simulate a ring_buffer_event,
2344 		 * where the type_len is zero and the array[0] will
2345 		 * hold the full length.
2346 		 * (see include/linux/ring-buffer.h for details on
2347 		 *  how the ring_buffer_event is structured).
2348 		 *
2349 		 * Using a temp buffer during filtering and copying it
2350 		 * on a matched filter is quicker than writing directly
2351 		 * into the ring buffer and then discarding it when
2352 		 * it doesn't match. That is because the discard
2353 		 * requires several atomic operations to get right.
2354 		 * Copying on match and doing nothing on a failed match
2355 		 * is still quicker than no copy on match, but having
2356 		 * to discard out of the ring buffer on a failed match.
2357 		 */
2358 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2359 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2360 
2361 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2362 
2363 			/*
2364 			 * Preemption is disabled, but interrupts and NMIs
2365 			 * can still come in now. If that happens after
2366 			 * the above increment, then it will have to go
2367 			 * back to the old method of allocating the event
2368 			 * on the ring buffer, and if the filter fails, it
2369 			 * will have to call ring_buffer_discard_commit()
2370 			 * to remove it.
2371 			 *
2372 			 * Need to also check the unlikely case that the
2373 			 * length is bigger than the temp buffer size.
2374 			 * If that happens, then the reserve is pretty much
2375 			 * guaranteed to fail, as the ring buffer currently
2376 			 * only allows events less than a page. But that may
2377 			 * change in the future, so let the ring buffer reserve
2378 			 * handle the failure in that case.
2379 			 */
2380 			if (val == 1 && likely(len <= max_len)) {
2381 				trace_event_setup(entry, type, trace_ctx);
2382 				entry->array[0] = len;
2383 				/* Return with preemption disabled */
2384 				return entry;
2385 			}
2386 			this_cpu_dec(trace_buffered_event_cnt);
2387 		}
2388 		/* __trace_buffer_lock_reserve() disables preemption */
2389 		preempt_enable_notrace();
2390 	}
2391 
2392 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2393 					    trace_ctx);
2394 	/*
2395 	 * If tracing is off, but we have triggers enabled
2396 	 * we still need to look at the event data. Use the temp_buffer
2397 	 * to store the trace event for the trigger to use. It's recursive
2398 	 * safe and will not be recorded anywhere.
2399 	 */
2400 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2401 		*current_rb = temp_buffer;
2402 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2403 						    trace_ctx);
2404 	}
2405 	return entry;
2406 }
2407 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2408 
2409 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2410 static DEFINE_MUTEX(tracepoint_printk_mutex);
2411 
2412 static void output_printk(struct trace_event_buffer *fbuffer)
2413 {
2414 	struct trace_event_call *event_call;
2415 	struct trace_event_file *file;
2416 	struct trace_event *event;
2417 	unsigned long flags;
2418 	struct trace_iterator *iter = tracepoint_print_iter;
2419 
2420 	/* We should never get here if iter is NULL */
2421 	if (WARN_ON_ONCE(!iter))
2422 		return;
2423 
2424 	event_call = fbuffer->trace_file->event_call;
2425 	if (!event_call || !event_call->event.funcs ||
2426 	    !event_call->event.funcs->trace)
2427 		return;
2428 
2429 	file = fbuffer->trace_file;
2430 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2431 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2432 	     !filter_match_preds(file->filter, fbuffer->entry)))
2433 		return;
2434 
2435 	event = &fbuffer->trace_file->event_call->event;
2436 
2437 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2438 	trace_seq_init(&iter->seq);
2439 	iter->ent = fbuffer->entry;
2440 	event_call->event.funcs->trace(iter, 0, event);
2441 	trace_seq_putc(&iter->seq, 0);
2442 	printk("%s", iter->seq.buffer);
2443 
2444 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2445 }
2446 
2447 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2448 			     void *buffer, size_t *lenp,
2449 			     loff_t *ppos)
2450 {
2451 	int save_tracepoint_printk;
2452 	int ret;
2453 
2454 	guard(mutex)(&tracepoint_printk_mutex);
2455 	save_tracepoint_printk = tracepoint_printk;
2456 
2457 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2458 
2459 	/*
2460 	 * This will force exiting early, as tracepoint_printk
2461 	 * is always zero when tracepoint_printk_iter is not allocated
2462 	 */
2463 	if (!tracepoint_print_iter)
2464 		tracepoint_printk = 0;
2465 
2466 	if (save_tracepoint_printk == tracepoint_printk)
2467 		return ret;
2468 
2469 	if (tracepoint_printk)
2470 		static_key_enable(&tracepoint_printk_key.key);
2471 	else
2472 		static_key_disable(&tracepoint_printk_key.key);
2473 
2474 	return ret;
2475 }
2476 
2477 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2478 {
2479 	enum event_trigger_type tt = ETT_NONE;
2480 	struct trace_event_file *file = fbuffer->trace_file;
2481 
2482 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2483 			fbuffer->entry, &tt))
2484 		goto discard;
2485 
2486 	if (static_key_false(&tracepoint_printk_key.key))
2487 		output_printk(fbuffer);
2488 
2489 	if (static_branch_unlikely(&trace_event_exports_enabled))
2490 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2491 
2492 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2493 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2494 
2495 discard:
2496 	if (tt)
2497 		event_triggers_post_call(file, tt);
2498 
2499 }
2500 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2501 
2502 /*
2503  * Skip 3:
2504  *
2505  *   trace_buffer_unlock_commit_regs()
2506  *   trace_event_buffer_commit()
2507  *   trace_event_raw_event_xxx()
2508  */
2509 # define STACK_SKIP 3
2510 
2511 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2512 				     struct trace_buffer *buffer,
2513 				     struct ring_buffer_event *event,
2514 				     unsigned int trace_ctx,
2515 				     struct pt_regs *regs)
2516 {
2517 	__buffer_unlock_commit(buffer, event);
2518 
2519 	/*
2520 	 * If regs is not set, then skip the necessary functions.
2521 	 * Note, we can still get here via blktrace, wakeup tracer
2522 	 * and mmiotrace, but that's ok if they lose a function or
2523 	 * two. They are not that meaningful.
2524 	 */
2525 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2526 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2527 }
2528 
2529 /*
2530  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2531  */
2532 void
2533 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2534 				   struct ring_buffer_event *event)
2535 {
2536 	__buffer_unlock_commit(buffer, event);
2537 }
2538 
2539 void
2540 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2541 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2542 {
2543 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2544 	struct ring_buffer_event *event;
2545 	struct ftrace_entry *entry;
2546 	int size = sizeof(*entry);
2547 
2548 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2549 
2550 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2551 					    trace_ctx);
2552 	if (!event)
2553 		return;
2554 	entry	= ring_buffer_event_data(event);
2555 	entry->ip			= ip;
2556 	entry->parent_ip		= parent_ip;
2557 
2558 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2559 	if (fregs) {
2560 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2561 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2562 	}
2563 #endif
2564 
2565 	if (static_branch_unlikely(&trace_function_exports_enabled))
2566 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2567 	__buffer_unlock_commit(buffer, event);
2568 }
2569 
2570 #ifdef CONFIG_STACKTRACE
2571 
2572 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2573 #define FTRACE_KSTACK_NESTING	4
2574 
2575 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2576 
2577 struct ftrace_stack {
2578 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2579 };
2580 
2581 
2582 struct ftrace_stacks {
2583 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2584 };
2585 
2586 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2587 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2588 
2589 void __ftrace_trace_stack(struct trace_array *tr,
2590 			  struct trace_buffer *buffer,
2591 			  unsigned int trace_ctx,
2592 			  int skip, struct pt_regs *regs)
2593 {
2594 	struct ring_buffer_event *event;
2595 	unsigned int size, nr_entries;
2596 	struct ftrace_stack *fstack;
2597 	struct stack_entry *entry;
2598 	int stackidx;
2599 	int bit;
2600 
2601 	bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START);
2602 	if (bit < 0)
2603 		return;
2604 
2605 	/*
2606 	 * Add one, for this function and the call to save_stack_trace()
2607 	 * If regs is set, then these functions will not be in the way.
2608 	 */
2609 #ifndef CONFIG_UNWINDER_ORC
2610 	if (!regs)
2611 		skip++;
2612 #endif
2613 
2614 	guard(preempt_notrace)();
2615 
2616 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2617 
2618 	/* This should never happen. If it does, yell once and skip */
2619 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2620 		goto out;
2621 
2622 	/*
2623 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2624 	 * interrupt will either see the value pre increment or post
2625 	 * increment. If the interrupt happens pre increment it will have
2626 	 * restored the counter when it returns.  We just need a barrier to
2627 	 * keep gcc from moving things around.
2628 	 */
2629 	barrier();
2630 
2631 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2632 	size = ARRAY_SIZE(fstack->calls);
2633 
2634 	if (regs) {
2635 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2636 						   size, skip);
2637 	} else {
2638 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2639 	}
2640 
2641 #ifdef CONFIG_DYNAMIC_FTRACE
2642 	/* Mark entry of stack trace as trampoline code */
2643 	if (tr->ops && tr->ops->trampoline) {
2644 		unsigned long tramp_start = tr->ops->trampoline;
2645 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2646 		unsigned long *calls = fstack->calls;
2647 
2648 		for (int i = 0; i < nr_entries; i++) {
2649 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2650 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2651 		}
2652 	}
2653 #endif
2654 
2655 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2656 				    struct_size(entry, caller, nr_entries),
2657 				    trace_ctx);
2658 	if (!event)
2659 		goto out;
2660 	entry = ring_buffer_event_data(event);
2661 
2662 	entry->size = nr_entries;
2663 	memcpy(&entry->caller, fstack->calls,
2664 	       flex_array_size(entry, caller, nr_entries));
2665 
2666 	__buffer_unlock_commit(buffer, event);
2667 
2668  out:
2669 	/* Again, don't let gcc optimize things here */
2670 	barrier();
2671 	__this_cpu_dec(ftrace_stack_reserve);
2672 	trace_clear_recursion(bit);
2673 }
2674 
2675 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2676 		   int skip)
2677 {
2678 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2679 
2680 	if (rcu_is_watching()) {
2681 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2682 		return;
2683 	}
2684 
2685 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2686 		return;
2687 
2688 	/*
2689 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
2690 	 * but if the above rcu_is_watching() failed, then the NMI
2691 	 * triggered someplace critical, and ct_irq_enter() should
2692 	 * not be called from NMI.
2693 	 */
2694 	if (unlikely(in_nmi()))
2695 		return;
2696 
2697 	ct_irq_enter_irqson();
2698 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2699 	ct_irq_exit_irqson();
2700 }
2701 
2702 /**
2703  * trace_dump_stack - record a stack back trace in the trace buffer
2704  * @skip: Number of functions to skip (helper handlers)
2705  */
2706 void trace_dump_stack(int skip)
2707 {
2708 	if (tracing_disabled || tracing_selftest_running)
2709 		return;
2710 
2711 #ifndef CONFIG_UNWINDER_ORC
2712 	/* Skip 1 to skip this function. */
2713 	skip++;
2714 #endif
2715 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
2716 				tracing_gen_ctx(), skip, NULL);
2717 }
2718 EXPORT_SYMBOL_GPL(trace_dump_stack);
2719 
2720 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2721 static DEFINE_PER_CPU(int, user_stack_count);
2722 
2723 static void
2724 ftrace_trace_userstack(struct trace_array *tr,
2725 		       struct trace_buffer *buffer, unsigned int trace_ctx)
2726 {
2727 	struct ring_buffer_event *event;
2728 	struct userstack_entry *entry;
2729 
2730 	if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
2731 		return;
2732 
2733 	/*
2734 	 * NMIs can not handle page faults, even with fix ups.
2735 	 * The save user stack can (and often does) fault.
2736 	 */
2737 	if (unlikely(in_nmi()))
2738 		return;
2739 
2740 	/*
2741 	 * prevent recursion, since the user stack tracing may
2742 	 * trigger other kernel events.
2743 	 */
2744 	guard(preempt)();
2745 	if (__this_cpu_read(user_stack_count))
2746 		return;
2747 
2748 	__this_cpu_inc(user_stack_count);
2749 
2750 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2751 					    sizeof(*entry), trace_ctx);
2752 	if (!event)
2753 		goto out_drop_count;
2754 	entry	= ring_buffer_event_data(event);
2755 
2756 	entry->tgid		= current->tgid;
2757 	memset(&entry->caller, 0, sizeof(entry->caller));
2758 
2759 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2760 	__buffer_unlock_commit(buffer, event);
2761 
2762  out_drop_count:
2763 	__this_cpu_dec(user_stack_count);
2764 }
2765 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
2766 static void ftrace_trace_userstack(struct trace_array *tr,
2767 				   struct trace_buffer *buffer,
2768 				   unsigned int trace_ctx)
2769 {
2770 }
2771 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2772 
2773 #endif /* CONFIG_STACKTRACE */
2774 
2775 static inline void
2776 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
2777 			  unsigned long long delta)
2778 {
2779 	entry->bottom_delta_ts = delta & U32_MAX;
2780 	entry->top_delta_ts = (delta >> 32);
2781 }
2782 
2783 void trace_last_func_repeats(struct trace_array *tr,
2784 			     struct trace_func_repeats *last_info,
2785 			     unsigned int trace_ctx)
2786 {
2787 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2788 	struct func_repeats_entry *entry;
2789 	struct ring_buffer_event *event;
2790 	u64 delta;
2791 
2792 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
2793 					    sizeof(*entry), trace_ctx);
2794 	if (!event)
2795 		return;
2796 
2797 	delta = ring_buffer_event_time_stamp(buffer, event) -
2798 		last_info->ts_last_call;
2799 
2800 	entry = ring_buffer_event_data(event);
2801 	entry->ip = last_info->ip;
2802 	entry->parent_ip = last_info->parent_ip;
2803 	entry->count = last_info->count;
2804 	func_repeats_set_delta_ts(entry, delta);
2805 
2806 	__buffer_unlock_commit(buffer, event);
2807 }
2808 
2809 static void trace_iterator_increment(struct trace_iterator *iter)
2810 {
2811 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2812 
2813 	iter->idx++;
2814 	if (buf_iter)
2815 		ring_buffer_iter_advance(buf_iter);
2816 }
2817 
2818 static struct trace_entry *
2819 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2820 		unsigned long *lost_events)
2821 {
2822 	struct ring_buffer_event *event;
2823 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2824 
2825 	if (buf_iter) {
2826 		event = ring_buffer_iter_peek(buf_iter, ts);
2827 		if (lost_events)
2828 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
2829 				(unsigned long)-1 : 0;
2830 	} else {
2831 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
2832 					 lost_events);
2833 	}
2834 
2835 	if (event) {
2836 		iter->ent_size = ring_buffer_event_length(event);
2837 		return ring_buffer_event_data(event);
2838 	}
2839 	iter->ent_size = 0;
2840 	return NULL;
2841 }
2842 
2843 static struct trace_entry *
2844 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2845 		  unsigned long *missing_events, u64 *ent_ts)
2846 {
2847 	struct trace_buffer *buffer = iter->array_buffer->buffer;
2848 	struct trace_entry *ent, *next = NULL;
2849 	unsigned long lost_events = 0, next_lost = 0;
2850 	int cpu_file = iter->cpu_file;
2851 	u64 next_ts = 0, ts;
2852 	int next_cpu = -1;
2853 	int next_size = 0;
2854 	int cpu;
2855 
2856 	/*
2857 	 * If we are in a per_cpu trace file, don't bother by iterating over
2858 	 * all cpu and peek directly.
2859 	 */
2860 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2861 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2862 			return NULL;
2863 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2864 		if (ent_cpu)
2865 			*ent_cpu = cpu_file;
2866 
2867 		return ent;
2868 	}
2869 
2870 	for_each_tracing_cpu(cpu) {
2871 
2872 		if (ring_buffer_empty_cpu(buffer, cpu))
2873 			continue;
2874 
2875 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2876 
2877 		/*
2878 		 * Pick the entry with the smallest timestamp:
2879 		 */
2880 		if (ent && (!next || ts < next_ts)) {
2881 			next = ent;
2882 			next_cpu = cpu;
2883 			next_ts = ts;
2884 			next_lost = lost_events;
2885 			next_size = iter->ent_size;
2886 		}
2887 	}
2888 
2889 	iter->ent_size = next_size;
2890 
2891 	if (ent_cpu)
2892 		*ent_cpu = next_cpu;
2893 
2894 	if (ent_ts)
2895 		*ent_ts = next_ts;
2896 
2897 	if (missing_events)
2898 		*missing_events = next_lost;
2899 
2900 	return next;
2901 }
2902 
2903 #define STATIC_FMT_BUF_SIZE	128
2904 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
2905 
2906 char *trace_iter_expand_format(struct trace_iterator *iter)
2907 {
2908 	char *tmp;
2909 
2910 	/*
2911 	 * iter->tr is NULL when used with tp_printk, which makes
2912 	 * this get called where it is not safe to call krealloc().
2913 	 */
2914 	if (!iter->tr || iter->fmt == static_fmt_buf)
2915 		return NULL;
2916 
2917 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
2918 		       GFP_KERNEL);
2919 	if (tmp) {
2920 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
2921 		iter->fmt = tmp;
2922 	}
2923 
2924 	return tmp;
2925 }
2926 
2927 /* Returns true if the string is safe to dereference from an event */
2928 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
2929 {
2930 	unsigned long addr = (unsigned long)str;
2931 	struct trace_event *trace_event;
2932 	struct trace_event_call *event;
2933 
2934 	/* OK if part of the event data */
2935 	if ((addr >= (unsigned long)iter->ent) &&
2936 	    (addr < (unsigned long)iter->ent + iter->ent_size))
2937 		return true;
2938 
2939 	/* OK if part of the temp seq buffer */
2940 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
2941 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
2942 		return true;
2943 
2944 	/* Core rodata can not be freed */
2945 	if (is_kernel_rodata(addr))
2946 		return true;
2947 
2948 	if (trace_is_tracepoint_string(str))
2949 		return true;
2950 
2951 	/*
2952 	 * Now this could be a module event, referencing core module
2953 	 * data, which is OK.
2954 	 */
2955 	if (!iter->ent)
2956 		return false;
2957 
2958 	trace_event = ftrace_find_event(iter->ent->type);
2959 	if (!trace_event)
2960 		return false;
2961 
2962 	event = container_of(trace_event, struct trace_event_call, event);
2963 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
2964 		return false;
2965 
2966 	/* Would rather have rodata, but this will suffice */
2967 	if (within_module_core(addr, event->module))
2968 		return true;
2969 
2970 	return false;
2971 }
2972 
2973 /**
2974  * ignore_event - Check dereferenced fields while writing to the seq buffer
2975  * @iter: The iterator that holds the seq buffer and the event being printed
2976  *
2977  * At boot up, test_event_printk() will flag any event that dereferences
2978  * a string with "%s" that does exist in the ring buffer. It may still
2979  * be valid, as the string may point to a static string in the kernel
2980  * rodata that never gets freed. But if the string pointer is pointing
2981  * to something that was allocated, there's a chance that it can be freed
2982  * by the time the user reads the trace. This would cause a bad memory
2983  * access by the kernel and possibly crash the system.
2984  *
2985  * This function will check if the event has any fields flagged as needing
2986  * to be checked at runtime and perform those checks.
2987  *
2988  * If it is found that a field is unsafe, it will write into the @iter->seq
2989  * a message stating what was found to be unsafe.
2990  *
2991  * @return: true if the event is unsafe and should be ignored,
2992  *          false otherwise.
2993  */
2994 bool ignore_event(struct trace_iterator *iter)
2995 {
2996 	struct ftrace_event_field *field;
2997 	struct trace_event *trace_event;
2998 	struct trace_event_call *event;
2999 	struct list_head *head;
3000 	struct trace_seq *seq;
3001 	const void *ptr;
3002 
3003 	trace_event = ftrace_find_event(iter->ent->type);
3004 
3005 	seq = &iter->seq;
3006 
3007 	if (!trace_event) {
3008 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3009 		return true;
3010 	}
3011 
3012 	event = container_of(trace_event, struct trace_event_call, event);
3013 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3014 		return false;
3015 
3016 	head = trace_get_fields(event);
3017 	if (!head) {
3018 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3019 				 trace_event_name(event));
3020 		return true;
3021 	}
3022 
3023 	/* Offsets are from the iter->ent that points to the raw event */
3024 	ptr = iter->ent;
3025 
3026 	list_for_each_entry(field, head, link) {
3027 		const char *str;
3028 		bool good;
3029 
3030 		if (!field->needs_test)
3031 			continue;
3032 
3033 		str = *(const char **)(ptr + field->offset);
3034 
3035 		good = trace_safe_str(iter, str);
3036 
3037 		/*
3038 		 * If you hit this warning, it is likely that the
3039 		 * trace event in question used %s on a string that
3040 		 * was saved at the time of the event, but may not be
3041 		 * around when the trace is read. Use __string(),
3042 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3043 		 * instead. See samples/trace_events/trace-events-sample.h
3044 		 * for reference.
3045 		 */
3046 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3047 			      trace_event_name(event), field->name)) {
3048 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3049 					 trace_event_name(event), field->name);
3050 			return true;
3051 		}
3052 	}
3053 	return false;
3054 }
3055 
3056 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3057 {
3058 	const char *p, *new_fmt;
3059 	char *q;
3060 
3061 	if (WARN_ON_ONCE(!fmt))
3062 		return fmt;
3063 
3064 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
3065 		return fmt;
3066 
3067 	p = fmt;
3068 	new_fmt = q = iter->fmt;
3069 	while (*p) {
3070 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3071 			if (!trace_iter_expand_format(iter))
3072 				return fmt;
3073 
3074 			q += iter->fmt - new_fmt;
3075 			new_fmt = iter->fmt;
3076 		}
3077 
3078 		*q++ = *p++;
3079 
3080 		/* Replace %p with %px */
3081 		if (p[-1] == '%') {
3082 			if (p[0] == '%') {
3083 				*q++ = *p++;
3084 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3085 				*q++ = *p++;
3086 				*q++ = 'x';
3087 			}
3088 		}
3089 	}
3090 	*q = '\0';
3091 
3092 	return new_fmt;
3093 }
3094 
3095 #define STATIC_TEMP_BUF_SIZE	128
3096 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3097 
3098 /* Find the next real entry, without updating the iterator itself */
3099 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3100 					  int *ent_cpu, u64 *ent_ts)
3101 {
3102 	/* __find_next_entry will reset ent_size */
3103 	int ent_size = iter->ent_size;
3104 	struct trace_entry *entry;
3105 
3106 	/*
3107 	 * If called from ftrace_dump(), then the iter->temp buffer
3108 	 * will be the static_temp_buf and not created from kmalloc.
3109 	 * If the entry size is greater than the buffer, we can
3110 	 * not save it. Just return NULL in that case. This is only
3111 	 * used to add markers when two consecutive events' time
3112 	 * stamps have a large delta. See trace_print_lat_context()
3113 	 */
3114 	if (iter->temp == static_temp_buf &&
3115 	    STATIC_TEMP_BUF_SIZE < ent_size)
3116 		return NULL;
3117 
3118 	/*
3119 	 * The __find_next_entry() may call peek_next_entry(), which may
3120 	 * call ring_buffer_peek() that may make the contents of iter->ent
3121 	 * undefined. Need to copy iter->ent now.
3122 	 */
3123 	if (iter->ent && iter->ent != iter->temp) {
3124 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3125 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3126 			void *temp;
3127 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3128 			if (!temp)
3129 				return NULL;
3130 			kfree(iter->temp);
3131 			iter->temp = temp;
3132 			iter->temp_size = iter->ent_size;
3133 		}
3134 		memcpy(iter->temp, iter->ent, iter->ent_size);
3135 		iter->ent = iter->temp;
3136 	}
3137 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3138 	/* Put back the original ent_size */
3139 	iter->ent_size = ent_size;
3140 
3141 	return entry;
3142 }
3143 
3144 /* Find the next real entry, and increment the iterator to the next entry */
3145 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3146 {
3147 	iter->ent = __find_next_entry(iter, &iter->cpu,
3148 				      &iter->lost_events, &iter->ts);
3149 
3150 	if (iter->ent)
3151 		trace_iterator_increment(iter);
3152 
3153 	return iter->ent ? iter : NULL;
3154 }
3155 
3156 static void trace_consume(struct trace_iterator *iter)
3157 {
3158 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3159 			    &iter->lost_events);
3160 }
3161 
3162 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3163 {
3164 	struct trace_iterator *iter = m->private;
3165 	int i = (int)*pos;
3166 	void *ent;
3167 
3168 	WARN_ON_ONCE(iter->leftover);
3169 
3170 	(*pos)++;
3171 
3172 	/* can't go backwards */
3173 	if (iter->idx > i)
3174 		return NULL;
3175 
3176 	if (iter->idx < 0)
3177 		ent = trace_find_next_entry_inc(iter);
3178 	else
3179 		ent = iter;
3180 
3181 	while (ent && iter->idx < i)
3182 		ent = trace_find_next_entry_inc(iter);
3183 
3184 	iter->pos = *pos;
3185 
3186 	return ent;
3187 }
3188 
3189 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3190 {
3191 	struct ring_buffer_iter *buf_iter;
3192 	unsigned long entries = 0;
3193 	u64 ts;
3194 
3195 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3196 
3197 	buf_iter = trace_buffer_iter(iter, cpu);
3198 	if (!buf_iter)
3199 		return;
3200 
3201 	ring_buffer_iter_reset(buf_iter);
3202 
3203 	/*
3204 	 * We could have the case with the max latency tracers
3205 	 * that a reset never took place on a cpu. This is evident
3206 	 * by the timestamp being before the start of the buffer.
3207 	 */
3208 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3209 		if (ts >= iter->array_buffer->time_start)
3210 			break;
3211 		entries++;
3212 		ring_buffer_iter_advance(buf_iter);
3213 		/* This could be a big loop */
3214 		cond_resched();
3215 	}
3216 
3217 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3218 }
3219 
3220 /*
3221  * The current tracer is copied to avoid a global locking
3222  * all around.
3223  */
3224 static void *s_start(struct seq_file *m, loff_t *pos)
3225 {
3226 	struct trace_iterator *iter = m->private;
3227 	struct trace_array *tr = iter->tr;
3228 	int cpu_file = iter->cpu_file;
3229 	void *p = NULL;
3230 	loff_t l = 0;
3231 	int cpu;
3232 
3233 	mutex_lock(&trace_types_lock);
3234 	if (unlikely(tr->current_trace != iter->trace)) {
3235 		/* Close iter->trace before switching to the new current tracer */
3236 		if (iter->trace->close)
3237 			iter->trace->close(iter);
3238 		iter->trace = tr->current_trace;
3239 		/* Reopen the new current tracer */
3240 		if (iter->trace->open)
3241 			iter->trace->open(iter);
3242 	}
3243 	mutex_unlock(&trace_types_lock);
3244 
3245 	if (iter->snapshot && tracer_uses_snapshot(iter->trace))
3246 		return ERR_PTR(-EBUSY);
3247 
3248 	if (*pos != iter->pos) {
3249 		iter->ent = NULL;
3250 		iter->cpu = 0;
3251 		iter->idx = -1;
3252 
3253 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3254 			for_each_tracing_cpu(cpu)
3255 				tracing_iter_reset(iter, cpu);
3256 		} else
3257 			tracing_iter_reset(iter, cpu_file);
3258 
3259 		iter->leftover = 0;
3260 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3261 			;
3262 
3263 	} else {
3264 		/*
3265 		 * If we overflowed the seq_file before, then we want
3266 		 * to just reuse the trace_seq buffer again.
3267 		 */
3268 		if (iter->leftover)
3269 			p = iter;
3270 		else {
3271 			l = *pos - 1;
3272 			p = s_next(m, p, &l);
3273 		}
3274 	}
3275 
3276 	trace_event_read_lock();
3277 	trace_access_lock(cpu_file);
3278 	return p;
3279 }
3280 
3281 static void s_stop(struct seq_file *m, void *p)
3282 {
3283 	struct trace_iterator *iter = m->private;
3284 
3285 	if (iter->snapshot && tracer_uses_snapshot(iter->trace))
3286 		return;
3287 
3288 	trace_access_unlock(iter->cpu_file);
3289 	trace_event_read_unlock();
3290 }
3291 
3292 static void
3293 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3294 		      unsigned long *entries, int cpu)
3295 {
3296 	unsigned long count;
3297 
3298 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3299 	/*
3300 	 * If this buffer has skipped entries, then we hold all
3301 	 * entries for the trace and we need to ignore the
3302 	 * ones before the time stamp.
3303 	 */
3304 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3305 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3306 		/* total is the same as the entries */
3307 		*total = count;
3308 	} else
3309 		*total = count +
3310 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3311 	*entries = count;
3312 }
3313 
3314 static void
3315 get_total_entries(struct array_buffer *buf,
3316 		  unsigned long *total, unsigned long *entries)
3317 {
3318 	unsigned long t, e;
3319 	int cpu;
3320 
3321 	*total = 0;
3322 	*entries = 0;
3323 
3324 	for_each_tracing_cpu(cpu) {
3325 		get_total_entries_cpu(buf, &t, &e, cpu);
3326 		*total += t;
3327 		*entries += e;
3328 	}
3329 }
3330 
3331 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3332 {
3333 	unsigned long total, entries;
3334 
3335 	if (!tr)
3336 		tr = &global_trace;
3337 
3338 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3339 
3340 	return entries;
3341 }
3342 
3343 unsigned long trace_total_entries(struct trace_array *tr)
3344 {
3345 	unsigned long total, entries;
3346 
3347 	if (!tr)
3348 		tr = &global_trace;
3349 
3350 	get_total_entries(&tr->array_buffer, &total, &entries);
3351 
3352 	return entries;
3353 }
3354 
3355 static void print_lat_help_header(struct seq_file *m)
3356 {
3357 	seq_puts(m, "#                    _------=> CPU#            \n"
3358 		    "#                   / _-----=> irqs-off/BH-disabled\n"
3359 		    "#                  | / _----=> need-resched    \n"
3360 		    "#                  || / _---=> hardirq/softirq \n"
3361 		    "#                  ||| / _--=> preempt-depth   \n"
3362 		    "#                  |||| / _-=> migrate-disable \n"
3363 		    "#                  ||||| /     delay           \n"
3364 		    "#  cmd     pid     |||||| time  |   caller     \n"
3365 		    "#     \\   /        ||||||  \\    |    /       \n");
3366 }
3367 
3368 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3369 {
3370 	unsigned long total;
3371 	unsigned long entries;
3372 
3373 	get_total_entries(buf, &total, &entries);
3374 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3375 		   entries, total, num_online_cpus());
3376 	seq_puts(m, "#\n");
3377 }
3378 
3379 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3380 				   unsigned int flags)
3381 {
3382 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
3383 
3384 	print_event_info(buf, m);
3385 
3386 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3387 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3388 }
3389 
3390 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3391 				       unsigned int flags)
3392 {
3393 	bool tgid = flags & TRACE_ITER(RECORD_TGID);
3394 	static const char space[] = "            ";
3395 	int prec = tgid ? 12 : 2;
3396 
3397 	print_event_info(buf, m);
3398 
3399 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
3400 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3401 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3402 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3403 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
3404 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
3405 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3406 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
3407 }
3408 
3409 void
3410 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3411 {
3412 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3413 	struct array_buffer *buf = iter->array_buffer;
3414 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3415 	struct tracer *type = iter->trace;
3416 	unsigned long entries;
3417 	unsigned long total;
3418 	const char *name = type->name;
3419 
3420 	get_total_entries(buf, &total, &entries);
3421 
3422 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3423 		   name, init_utsname()->release);
3424 	seq_puts(m, "# -----------------------------------"
3425 		 "---------------------------------\n");
3426 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3427 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3428 		   nsecs_to_usecs(data->saved_latency),
3429 		   entries,
3430 		   total,
3431 		   buf->cpu,
3432 		   preempt_model_str(),
3433 		   /* These are reserved for later use */
3434 		   0, 0, 0, 0);
3435 #ifdef CONFIG_SMP
3436 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3437 #else
3438 	seq_puts(m, ")\n");
3439 #endif
3440 	seq_puts(m, "#    -----------------\n");
3441 	seq_printf(m, "#    | task: %.16s-%d "
3442 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3443 		   data->comm, data->pid,
3444 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3445 		   data->policy, data->rt_priority);
3446 	seq_puts(m, "#    -----------------\n");
3447 
3448 	if (data->critical_start) {
3449 		seq_puts(m, "#  => started at: ");
3450 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3451 		trace_print_seq(m, &iter->seq);
3452 		seq_puts(m, "\n#  => ended at:   ");
3453 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3454 		trace_print_seq(m, &iter->seq);
3455 		seq_puts(m, "\n#\n");
3456 	}
3457 
3458 	seq_puts(m, "#\n");
3459 }
3460 
3461 static void test_cpu_buff_start(struct trace_iterator *iter)
3462 {
3463 	struct trace_seq *s = &iter->seq;
3464 	struct trace_array *tr = iter->tr;
3465 
3466 	if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
3467 		return;
3468 
3469 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3470 		return;
3471 
3472 	if (cpumask_available(iter->started) &&
3473 	    cpumask_test_cpu(iter->cpu, iter->started))
3474 		return;
3475 
3476 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3477 		return;
3478 
3479 	if (cpumask_available(iter->started))
3480 		cpumask_set_cpu(iter->cpu, iter->started);
3481 
3482 	/* Don't print started cpu buffer for the first entry of the trace */
3483 	if (iter->idx > 1)
3484 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3485 				iter->cpu);
3486 }
3487 
3488 #ifdef CONFIG_FTRACE_SYSCALLS
3489 static bool is_syscall_event(struct trace_event *event)
3490 {
3491 	return (event->funcs == &enter_syscall_print_funcs) ||
3492 	       (event->funcs == &exit_syscall_print_funcs);
3493 
3494 }
3495 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
3496 #else
3497 static inline bool is_syscall_event(struct trace_event *event)
3498 {
3499 	return false;
3500 }
3501 #define syscall_buf_size 0
3502 #endif /* CONFIG_FTRACE_SYSCALLS */
3503 
3504 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3505 {
3506 	struct trace_array *tr = iter->tr;
3507 	struct trace_seq *s = &iter->seq;
3508 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3509 	struct trace_entry *entry;
3510 	struct trace_event *event;
3511 
3512 	entry = iter->ent;
3513 
3514 	test_cpu_buff_start(iter);
3515 
3516 	event = ftrace_find_event(entry->type);
3517 
3518 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3519 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3520 			trace_print_lat_context(iter);
3521 		else
3522 			trace_print_context(iter);
3523 	}
3524 
3525 	if (trace_seq_has_overflowed(s))
3526 		return TRACE_TYPE_PARTIAL_LINE;
3527 
3528 	if (event) {
3529 		if (tr->trace_flags & TRACE_ITER(FIELDS))
3530 			return print_event_fields(iter, event);
3531 		/*
3532 		 * For TRACE_EVENT() events, the print_fmt is not
3533 		 * safe to use if the array has delta offsets
3534 		 * Force printing via the fields.
3535 		 */
3536 		if ((tr->text_delta)) {
3537 			/* ftrace and system call events are still OK */
3538 			if ((event->type > __TRACE_LAST_TYPE) &&
3539 			    !is_syscall_event(event))
3540 				return print_event_fields(iter, event);
3541 		}
3542 		return event->funcs->trace(iter, sym_flags, event);
3543 	}
3544 
3545 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3546 
3547 	return trace_handle_return(s);
3548 }
3549 
3550 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3551 {
3552 	struct trace_array *tr = iter->tr;
3553 	struct trace_seq *s = &iter->seq;
3554 	struct trace_entry *entry;
3555 	struct trace_event *event;
3556 
3557 	entry = iter->ent;
3558 
3559 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
3560 		trace_seq_printf(s, "%d %d %llu ",
3561 				 entry->pid, iter->cpu, iter->ts);
3562 
3563 	if (trace_seq_has_overflowed(s))
3564 		return TRACE_TYPE_PARTIAL_LINE;
3565 
3566 	event = ftrace_find_event(entry->type);
3567 	if (event)
3568 		return event->funcs->raw(iter, 0, event);
3569 
3570 	trace_seq_printf(s, "%d ?\n", entry->type);
3571 
3572 	return trace_handle_return(s);
3573 }
3574 
3575 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3576 {
3577 	struct trace_array *tr = iter->tr;
3578 	struct trace_seq *s = &iter->seq;
3579 	unsigned char newline = '\n';
3580 	struct trace_entry *entry;
3581 	struct trace_event *event;
3582 
3583 	entry = iter->ent;
3584 
3585 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3586 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3587 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3588 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3589 		if (trace_seq_has_overflowed(s))
3590 			return TRACE_TYPE_PARTIAL_LINE;
3591 	}
3592 
3593 	event = ftrace_find_event(entry->type);
3594 	if (event) {
3595 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3596 		if (ret != TRACE_TYPE_HANDLED)
3597 			return ret;
3598 	}
3599 
3600 	SEQ_PUT_FIELD(s, newline);
3601 
3602 	return trace_handle_return(s);
3603 }
3604 
3605 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3606 {
3607 	struct trace_array *tr = iter->tr;
3608 	struct trace_seq *s = &iter->seq;
3609 	struct trace_entry *entry;
3610 	struct trace_event *event;
3611 
3612 	entry = iter->ent;
3613 
3614 	if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3615 		SEQ_PUT_FIELD(s, entry->pid);
3616 		SEQ_PUT_FIELD(s, iter->cpu);
3617 		SEQ_PUT_FIELD(s, iter->ts);
3618 		if (trace_seq_has_overflowed(s))
3619 			return TRACE_TYPE_PARTIAL_LINE;
3620 	}
3621 
3622 	event = ftrace_find_event(entry->type);
3623 	return event ? event->funcs->binary(iter, 0, event) :
3624 		TRACE_TYPE_HANDLED;
3625 }
3626 
3627 int trace_empty(struct trace_iterator *iter)
3628 {
3629 	struct ring_buffer_iter *buf_iter;
3630 	int cpu;
3631 
3632 	/* If we are looking at one CPU buffer, only check that one */
3633 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3634 		cpu = iter->cpu_file;
3635 		buf_iter = trace_buffer_iter(iter, cpu);
3636 		if (buf_iter) {
3637 			if (!ring_buffer_iter_empty(buf_iter))
3638 				return 0;
3639 		} else {
3640 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3641 				return 0;
3642 		}
3643 		return 1;
3644 	}
3645 
3646 	for_each_tracing_cpu(cpu) {
3647 		buf_iter = trace_buffer_iter(iter, cpu);
3648 		if (buf_iter) {
3649 			if (!ring_buffer_iter_empty(buf_iter))
3650 				return 0;
3651 		} else {
3652 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3653 				return 0;
3654 		}
3655 	}
3656 
3657 	return 1;
3658 }
3659 
3660 /*  Called with trace_event_read_lock() held. */
3661 enum print_line_t print_trace_line(struct trace_iterator *iter)
3662 {
3663 	struct trace_array *tr = iter->tr;
3664 	unsigned long trace_flags = tr->trace_flags;
3665 	enum print_line_t ret;
3666 
3667 	if (iter->lost_events) {
3668 		if (iter->lost_events == (unsigned long)-1)
3669 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
3670 					 iter->cpu);
3671 		else
3672 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3673 					 iter->cpu, iter->lost_events);
3674 		if (trace_seq_has_overflowed(&iter->seq))
3675 			return TRACE_TYPE_PARTIAL_LINE;
3676 	}
3677 
3678 	if (iter->trace && iter->trace->print_line) {
3679 		ret = iter->trace->print_line(iter);
3680 		if (ret != TRACE_TYPE_UNHANDLED)
3681 			return ret;
3682 	}
3683 
3684 	if (iter->ent->type == TRACE_BPUTS &&
3685 			trace_flags & TRACE_ITER(PRINTK) &&
3686 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3687 		return trace_print_bputs_msg_only(iter);
3688 
3689 	if (iter->ent->type == TRACE_BPRINT &&
3690 			trace_flags & TRACE_ITER(PRINTK) &&
3691 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3692 		return trace_print_bprintk_msg_only(iter);
3693 
3694 	if (iter->ent->type == TRACE_PRINT &&
3695 			trace_flags & TRACE_ITER(PRINTK) &&
3696 			trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3697 		return trace_print_printk_msg_only(iter);
3698 
3699 	if (trace_flags & TRACE_ITER(BIN))
3700 		return print_bin_fmt(iter);
3701 
3702 	if (trace_flags & TRACE_ITER(HEX))
3703 		return print_hex_fmt(iter);
3704 
3705 	if (trace_flags & TRACE_ITER(RAW))
3706 		return print_raw_fmt(iter);
3707 
3708 	return print_trace_fmt(iter);
3709 }
3710 
3711 void trace_latency_header(struct seq_file *m)
3712 {
3713 	struct trace_iterator *iter = m->private;
3714 	struct trace_array *tr = iter->tr;
3715 
3716 	/* print nothing if the buffers are empty */
3717 	if (trace_empty(iter))
3718 		return;
3719 
3720 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3721 		print_trace_header(m, iter);
3722 
3723 	if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
3724 		print_lat_help_header(m);
3725 }
3726 
3727 void trace_default_header(struct seq_file *m)
3728 {
3729 	struct trace_iterator *iter = m->private;
3730 	struct trace_array *tr = iter->tr;
3731 	unsigned long trace_flags = tr->trace_flags;
3732 
3733 	if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
3734 		return;
3735 
3736 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3737 		/* print nothing if the buffers are empty */
3738 		if (trace_empty(iter))
3739 			return;
3740 		print_trace_header(m, iter);
3741 		if (!(trace_flags & TRACE_ITER(VERBOSE)))
3742 			print_lat_help_header(m);
3743 	} else {
3744 		if (!(trace_flags & TRACE_ITER(VERBOSE))) {
3745 			if (trace_flags & TRACE_ITER(IRQ_INFO))
3746 				print_func_help_header_irq(iter->array_buffer,
3747 							   m, trace_flags);
3748 			else
3749 				print_func_help_header(iter->array_buffer, m,
3750 						       trace_flags);
3751 		}
3752 	}
3753 }
3754 
3755 static void test_ftrace_alive(struct seq_file *m)
3756 {
3757 	if (!ftrace_is_dead())
3758 		return;
3759 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3760 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3761 }
3762 
3763 #ifdef CONFIG_TRACER_SNAPSHOT
3764 static void show_snapshot_main_help(struct seq_file *m)
3765 {
3766 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3767 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3768 		    "#                      Takes a snapshot of the main buffer.\n"
3769 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3770 		    "#                      (Doesn't have to be '2' works with any number that\n"
3771 		    "#                       is not a '0' or '1')\n");
3772 }
3773 
3774 static void show_snapshot_percpu_help(struct seq_file *m)
3775 {
3776 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3777 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3778 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3779 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3780 #else
3781 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3782 		    "#                     Must use main snapshot file to allocate.\n");
3783 #endif
3784 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3785 		    "#                      (Doesn't have to be '2' works with any number that\n"
3786 		    "#                       is not a '0' or '1')\n");
3787 }
3788 
3789 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3790 {
3791 	if (iter->tr->allocated_snapshot)
3792 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3793 	else
3794 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3795 
3796 	seq_puts(m, "# Snapshot commands:\n");
3797 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3798 		show_snapshot_main_help(m);
3799 	else
3800 		show_snapshot_percpu_help(m);
3801 }
3802 #else
3803 /* Should never be called */
3804 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3805 #endif
3806 
3807 static int s_show(struct seq_file *m, void *v)
3808 {
3809 	struct trace_iterator *iter = v;
3810 	int ret;
3811 
3812 	if (iter->ent == NULL) {
3813 		if (iter->tr) {
3814 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3815 			seq_puts(m, "#\n");
3816 			test_ftrace_alive(m);
3817 		}
3818 		if (iter->snapshot && trace_empty(iter))
3819 			print_snapshot_help(m, iter);
3820 		else if (iter->trace && iter->trace->print_header)
3821 			iter->trace->print_header(m);
3822 		else
3823 			trace_default_header(m);
3824 
3825 	} else if (iter->leftover) {
3826 		/*
3827 		 * If we filled the seq_file buffer earlier, we
3828 		 * want to just show it now.
3829 		 */
3830 		ret = trace_print_seq(m, &iter->seq);
3831 
3832 		/* ret should this time be zero, but you never know */
3833 		iter->leftover = ret;
3834 
3835 	} else {
3836 		ret = print_trace_line(iter);
3837 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
3838 			iter->seq.full = 0;
3839 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
3840 		}
3841 		ret = trace_print_seq(m, &iter->seq);
3842 		/*
3843 		 * If we overflow the seq_file buffer, then it will
3844 		 * ask us for this data again at start up.
3845 		 * Use that instead.
3846 		 *  ret is 0 if seq_file write succeeded.
3847 		 *        -1 otherwise.
3848 		 */
3849 		iter->leftover = ret;
3850 	}
3851 
3852 	return 0;
3853 }
3854 
3855 /*
3856  * Should be used after trace_array_get(), trace_types_lock
3857  * ensures that i_cdev was already initialized.
3858  */
3859 static inline int tracing_get_cpu(struct inode *inode)
3860 {
3861 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3862 		return (long)inode->i_cdev - 1;
3863 	return RING_BUFFER_ALL_CPUS;
3864 }
3865 
3866 static const struct seq_operations tracer_seq_ops = {
3867 	.start		= s_start,
3868 	.next		= s_next,
3869 	.stop		= s_stop,
3870 	.show		= s_show,
3871 };
3872 
3873 /*
3874  * Note, as iter itself can be allocated and freed in different
3875  * ways, this function is only used to free its content, and not
3876  * the iterator itself. The only requirement to all the allocations
3877  * is that it must zero all fields (kzalloc), as freeing works with
3878  * ethier allocated content or NULL.
3879  */
3880 static void free_trace_iter_content(struct trace_iterator *iter)
3881 {
3882 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
3883 	if (iter->fmt != static_fmt_buf)
3884 		kfree(iter->fmt);
3885 
3886 	kfree(iter->temp);
3887 	kfree(iter->buffer_iter);
3888 	mutex_destroy(&iter->mutex);
3889 	free_cpumask_var(iter->started);
3890 }
3891 
3892 static struct trace_iterator *
3893 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3894 {
3895 	struct trace_array *tr = inode->i_private;
3896 	struct trace_iterator *iter;
3897 	int cpu;
3898 
3899 	if (tracing_disabled)
3900 		return ERR_PTR(-ENODEV);
3901 
3902 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3903 	if (!iter)
3904 		return ERR_PTR(-ENOMEM);
3905 
3906 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3907 				    GFP_KERNEL);
3908 	if (!iter->buffer_iter)
3909 		goto release;
3910 
3911 	/*
3912 	 * trace_find_next_entry() may need to save off iter->ent.
3913 	 * It will place it into the iter->temp buffer. As most
3914 	 * events are less than 128, allocate a buffer of that size.
3915 	 * If one is greater, then trace_find_next_entry() will
3916 	 * allocate a new buffer to adjust for the bigger iter->ent.
3917 	 * It's not critical if it fails to get allocated here.
3918 	 */
3919 	iter->temp = kmalloc(128, GFP_KERNEL);
3920 	if (iter->temp)
3921 		iter->temp_size = 128;
3922 
3923 	/*
3924 	 * trace_event_printf() may need to modify given format
3925 	 * string to replace %p with %px so that it shows real address
3926 	 * instead of hash value. However, that is only for the event
3927 	 * tracing, other tracer may not need. Defer the allocation
3928 	 * until it is needed.
3929 	 */
3930 	iter->fmt = NULL;
3931 	iter->fmt_size = 0;
3932 
3933 	mutex_lock(&trace_types_lock);
3934 	iter->trace = tr->current_trace;
3935 
3936 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3937 		goto fail;
3938 
3939 	iter->tr = tr;
3940 
3941 #ifdef CONFIG_TRACER_SNAPSHOT
3942 	/* Currently only the top directory has a snapshot */
3943 	if (tr->current_trace->print_max || snapshot)
3944 		iter->array_buffer = &tr->snapshot_buffer;
3945 	else
3946 #endif
3947 		iter->array_buffer = &tr->array_buffer;
3948 	iter->snapshot = snapshot;
3949 	iter->pos = -1;
3950 	iter->cpu_file = tracing_get_cpu(inode);
3951 	mutex_init(&iter->mutex);
3952 
3953 	/* Notify the tracer early; before we stop tracing. */
3954 	if (iter->trace->open)
3955 		iter->trace->open(iter);
3956 
3957 	/* Annotate start of buffers if we had overruns */
3958 	if (ring_buffer_overruns(iter->array_buffer->buffer))
3959 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3960 
3961 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3962 	if (trace_clocks[tr->clock_id].in_ns)
3963 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3964 
3965 	/*
3966 	 * If pause-on-trace is enabled, then stop the trace while
3967 	 * dumping, unless this is the "snapshot" file
3968 	 */
3969 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
3970 		iter->iter_flags |= TRACE_FILE_PAUSE;
3971 		tracing_stop_tr(tr);
3972 	}
3973 
3974 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3975 		for_each_tracing_cpu(cpu) {
3976 			iter->buffer_iter[cpu] =
3977 				ring_buffer_read_start(iter->array_buffer->buffer,
3978 						       cpu, GFP_KERNEL);
3979 			tracing_iter_reset(iter, cpu);
3980 		}
3981 	} else {
3982 		cpu = iter->cpu_file;
3983 		iter->buffer_iter[cpu] =
3984 			ring_buffer_read_start(iter->array_buffer->buffer,
3985 					       cpu, GFP_KERNEL);
3986 		tracing_iter_reset(iter, cpu);
3987 	}
3988 
3989 	mutex_unlock(&trace_types_lock);
3990 
3991 	return iter;
3992 
3993  fail:
3994 	mutex_unlock(&trace_types_lock);
3995 	free_trace_iter_content(iter);
3996 release:
3997 	seq_release_private(inode, file);
3998 	return ERR_PTR(-ENOMEM);
3999 }
4000 
4001 int tracing_open_generic(struct inode *inode, struct file *filp)
4002 {
4003 	int ret;
4004 
4005 	ret = tracing_check_open_get_tr(NULL);
4006 	if (ret)
4007 		return ret;
4008 
4009 	filp->private_data = inode->i_private;
4010 	return 0;
4011 }
4012 
4013 /*
4014  * Open and update trace_array ref count.
4015  * Must have the current trace_array passed to it.
4016  */
4017 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4018 {
4019 	struct trace_array *tr = inode->i_private;
4020 	int ret;
4021 
4022 	ret = tracing_check_open_get_tr(tr);
4023 	if (ret)
4024 		return ret;
4025 
4026 	filp->private_data = inode->i_private;
4027 
4028 	return 0;
4029 }
4030 
4031 /*
4032  * The private pointer of the inode is the trace_event_file.
4033  * Update the tr ref count associated to it.
4034  */
4035 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4036 {
4037 	struct trace_event_file *file = inode->i_private;
4038 	int ret;
4039 
4040 	ret = tracing_check_open_get_tr(file->tr);
4041 	if (ret)
4042 		return ret;
4043 
4044 	guard(mutex)(&event_mutex);
4045 
4046 	/* Fail if the file is marked for removal */
4047 	if (file->flags & EVENT_FILE_FL_FREED) {
4048 		trace_array_put(file->tr);
4049 		return -ENODEV;
4050 	} else {
4051 		event_file_get(file);
4052 	}
4053 
4054 	filp->private_data = inode->i_private;
4055 
4056 	return 0;
4057 }
4058 
4059 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4060 {
4061 	struct trace_event_file *file = inode->i_private;
4062 
4063 	trace_array_put(file->tr);
4064 	event_file_put(file);
4065 
4066 	return 0;
4067 }
4068 
4069 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4070 {
4071 	tracing_release_file_tr(inode, filp);
4072 	return single_release(inode, filp);
4073 }
4074 
4075 static int tracing_release(struct inode *inode, struct file *file)
4076 {
4077 	struct trace_array *tr = inode->i_private;
4078 	struct seq_file *m = file->private_data;
4079 	struct trace_iterator *iter;
4080 	int cpu;
4081 
4082 	if (!(file->f_mode & FMODE_READ)) {
4083 		trace_array_put(tr);
4084 		return 0;
4085 	}
4086 
4087 	/* Writes do not use seq_file */
4088 	iter = m->private;
4089 	mutex_lock(&trace_types_lock);
4090 
4091 	for_each_tracing_cpu(cpu) {
4092 		if (iter->buffer_iter[cpu])
4093 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4094 	}
4095 
4096 	if (iter->trace && iter->trace->close)
4097 		iter->trace->close(iter);
4098 
4099 	if (iter->iter_flags & TRACE_FILE_PAUSE)
4100 		/* reenable tracing if it was previously enabled */
4101 		tracing_start_tr(tr);
4102 
4103 	__trace_array_put(tr);
4104 
4105 	mutex_unlock(&trace_types_lock);
4106 
4107 	free_trace_iter_content(iter);
4108 	seq_release_private(inode, file);
4109 
4110 	return 0;
4111 }
4112 
4113 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4114 {
4115 	struct trace_array *tr = inode->i_private;
4116 
4117 	trace_array_put(tr);
4118 	return 0;
4119 }
4120 
4121 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4122 {
4123 	struct trace_array *tr = inode->i_private;
4124 
4125 	trace_array_put(tr);
4126 
4127 	return single_release(inode, file);
4128 }
4129 
4130 static bool update_last_data_if_empty(struct trace_array *tr);
4131 
4132 static int tracing_open(struct inode *inode, struct file *file)
4133 {
4134 	struct trace_array *tr = inode->i_private;
4135 	struct trace_iterator *iter;
4136 	int ret;
4137 
4138 	ret = tracing_check_open_get_tr(tr);
4139 	if (ret)
4140 		return ret;
4141 
4142 	/* If this file was open for write, then erase contents */
4143 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4144 		int cpu = tracing_get_cpu(inode);
4145 		struct array_buffer *trace_buf = &tr->array_buffer;
4146 
4147 #ifdef CONFIG_TRACER_MAX_TRACE
4148 		if (tr->current_trace->print_max)
4149 			trace_buf = &tr->snapshot_buffer;
4150 #endif
4151 
4152 		if (cpu == RING_BUFFER_ALL_CPUS)
4153 			tracing_reset_online_cpus(trace_buf);
4154 		else
4155 			tracing_reset_cpu(trace_buf, cpu);
4156 
4157 		update_last_data_if_empty(tr);
4158 	}
4159 
4160 	if (file->f_mode & FMODE_READ) {
4161 		iter = __tracing_open(inode, file, false);
4162 		if (IS_ERR(iter))
4163 			ret = PTR_ERR(iter);
4164 		else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
4165 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4166 	}
4167 
4168 	if (ret < 0)
4169 		trace_array_put(tr);
4170 
4171 	return ret;
4172 }
4173 
4174 /*
4175  * Some tracers are not suitable for instance buffers.
4176  * A tracer is always available for the global array (toplevel)
4177  * or if it explicitly states that it is.
4178  */
4179 static bool
4180 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4181 {
4182 	/* arrays with mapped buffer range do not have snapshots */
4183 	if (tr->range_addr_start && tracer_uses_snapshot(t))
4184 		return false;
4185 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4186 }
4187 
4188 /* Find the next tracer that this trace array may use */
4189 static struct tracer *
4190 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4191 {
4192 	while (t && !trace_ok_for_array(t, tr))
4193 		t = t->next;
4194 
4195 	return t;
4196 }
4197 
4198 static void *
4199 t_next(struct seq_file *m, void *v, loff_t *pos)
4200 {
4201 	struct trace_array *tr = m->private;
4202 	struct tracer *t = v;
4203 
4204 	(*pos)++;
4205 
4206 	if (t)
4207 		t = get_tracer_for_array(tr, t->next);
4208 
4209 	return t;
4210 }
4211 
4212 static void *t_start(struct seq_file *m, loff_t *pos)
4213 {
4214 	struct trace_array *tr = m->private;
4215 	struct tracer *t;
4216 	loff_t l = 0;
4217 
4218 	mutex_lock(&trace_types_lock);
4219 
4220 	t = get_tracer_for_array(tr, trace_types);
4221 	for (; t && l < *pos; t = t_next(m, t, &l))
4222 			;
4223 
4224 	return t;
4225 }
4226 
4227 static void t_stop(struct seq_file *m, void *p)
4228 {
4229 	mutex_unlock(&trace_types_lock);
4230 }
4231 
4232 static int t_show(struct seq_file *m, void *v)
4233 {
4234 	struct tracer *t = v;
4235 
4236 	if (!t)
4237 		return 0;
4238 
4239 	seq_puts(m, t->name);
4240 	if (t->next)
4241 		seq_putc(m, ' ');
4242 	else
4243 		seq_putc(m, '\n');
4244 
4245 	return 0;
4246 }
4247 
4248 static const struct seq_operations show_traces_seq_ops = {
4249 	.start		= t_start,
4250 	.next		= t_next,
4251 	.stop		= t_stop,
4252 	.show		= t_show,
4253 };
4254 
4255 static int show_traces_open(struct inode *inode, struct file *file)
4256 {
4257 	struct trace_array *tr = inode->i_private;
4258 	struct seq_file *m;
4259 	int ret;
4260 
4261 	ret = tracing_check_open_get_tr(tr);
4262 	if (ret)
4263 		return ret;
4264 
4265 	ret = seq_open(file, &show_traces_seq_ops);
4266 	if (ret) {
4267 		trace_array_put(tr);
4268 		return ret;
4269 	}
4270 
4271 	m = file->private_data;
4272 	m->private = tr;
4273 
4274 	return 0;
4275 }
4276 
4277 static int tracing_seq_release(struct inode *inode, struct file *file)
4278 {
4279 	struct trace_array *tr = inode->i_private;
4280 
4281 	trace_array_put(tr);
4282 	return seq_release(inode, file);
4283 }
4284 
4285 static ssize_t
4286 tracing_write_stub(struct file *filp, const char __user *ubuf,
4287 		   size_t count, loff_t *ppos)
4288 {
4289 	return count;
4290 }
4291 
4292 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4293 {
4294 	int ret;
4295 
4296 	if (file->f_mode & FMODE_READ)
4297 		ret = seq_lseek(file, offset, whence);
4298 	else
4299 		file->f_pos = ret = 0;
4300 
4301 	return ret;
4302 }
4303 
4304 static const struct file_operations tracing_fops = {
4305 	.open		= tracing_open,
4306 	.read		= seq_read,
4307 	.read_iter	= seq_read_iter,
4308 	.splice_read	= copy_splice_read,
4309 	.write		= tracing_write_stub,
4310 	.llseek		= tracing_lseek,
4311 	.release	= tracing_release,
4312 };
4313 
4314 static const struct file_operations show_traces_fops = {
4315 	.open		= show_traces_open,
4316 	.read		= seq_read,
4317 	.llseek		= seq_lseek,
4318 	.release	= tracing_seq_release,
4319 };
4320 
4321 static ssize_t
4322 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4323 		     size_t count, loff_t *ppos)
4324 {
4325 	struct trace_array *tr = file_inode(filp)->i_private;
4326 	char *mask_str __free(kfree) = NULL;
4327 	int len;
4328 
4329 	len = snprintf(NULL, 0, "%*pb\n",
4330 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4331 	mask_str = kmalloc(len, GFP_KERNEL);
4332 	if (!mask_str)
4333 		return -ENOMEM;
4334 
4335 	len = snprintf(mask_str, len, "%*pb\n",
4336 		       cpumask_pr_args(tr->tracing_cpumask));
4337 	if (len >= count)
4338 		return -EINVAL;
4339 
4340 	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4341 }
4342 
4343 int tracing_set_cpumask(struct trace_array *tr,
4344 			cpumask_var_t tracing_cpumask_new)
4345 {
4346 	int cpu;
4347 
4348 	if (!tr)
4349 		return -EINVAL;
4350 
4351 	local_irq_disable();
4352 	arch_spin_lock(&tr->max_lock);
4353 	for_each_tracing_cpu(cpu) {
4354 		/*
4355 		 * Increase/decrease the disabled counter if we are
4356 		 * about to flip a bit in the cpumask:
4357 		 */
4358 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4359 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4360 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4361 #ifdef CONFIG_TRACER_SNAPSHOT
4362 			ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu);
4363 #endif
4364 		}
4365 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4366 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4367 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4368 #ifdef CONFIG_TRACER_SNAPSHOT
4369 			ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu);
4370 #endif
4371 		}
4372 	}
4373 	arch_spin_unlock(&tr->max_lock);
4374 	local_irq_enable();
4375 
4376 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4377 
4378 	return 0;
4379 }
4380 
4381 static ssize_t
4382 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4383 		      size_t count, loff_t *ppos)
4384 {
4385 	struct trace_array *tr = file_inode(filp)->i_private;
4386 	cpumask_var_t tracing_cpumask_new;
4387 	int err;
4388 
4389 	if (count == 0 || count > KMALLOC_MAX_SIZE)
4390 		return -EINVAL;
4391 
4392 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4393 		return -ENOMEM;
4394 
4395 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4396 	if (err)
4397 		goto err_free;
4398 
4399 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4400 	if (err)
4401 		goto err_free;
4402 
4403 	free_cpumask_var(tracing_cpumask_new);
4404 
4405 	return count;
4406 
4407 err_free:
4408 	free_cpumask_var(tracing_cpumask_new);
4409 
4410 	return err;
4411 }
4412 
4413 static const struct file_operations tracing_cpumask_fops = {
4414 	.open		= tracing_open_generic_tr,
4415 	.read		= tracing_cpumask_read,
4416 	.write		= tracing_cpumask_write,
4417 	.release	= tracing_release_generic_tr,
4418 	.llseek		= generic_file_llseek,
4419 };
4420 
4421 static int tracing_trace_options_show(struct seq_file *m, void *v)
4422 {
4423 	struct tracer_opt *trace_opts;
4424 	struct trace_array *tr = m->private;
4425 	struct tracer_flags *flags;
4426 	u32 tracer_flags;
4427 	int i;
4428 
4429 	guard(mutex)(&trace_types_lock);
4430 
4431 	for (i = 0; trace_options[i]; i++) {
4432 		if (tr->trace_flags & (1ULL << i))
4433 			seq_printf(m, "%s\n", trace_options[i]);
4434 		else
4435 			seq_printf(m, "no%s\n", trace_options[i]);
4436 	}
4437 
4438 	flags = tr->current_trace_flags;
4439 	if (!flags || !flags->opts)
4440 		return 0;
4441 
4442 	tracer_flags = flags->val;
4443 	trace_opts = flags->opts;
4444 
4445 	for (i = 0; trace_opts[i].name; i++) {
4446 		if (tracer_flags & trace_opts[i].bit)
4447 			seq_printf(m, "%s\n", trace_opts[i].name);
4448 		else
4449 			seq_printf(m, "no%s\n", trace_opts[i].name);
4450 	}
4451 
4452 	return 0;
4453 }
4454 
4455 static int __set_tracer_option(struct trace_array *tr,
4456 			       struct tracer_flags *tracer_flags,
4457 			       struct tracer_opt *opts, int neg)
4458 {
4459 	struct tracer *trace = tracer_flags->trace;
4460 	int ret = 0;
4461 
4462 	if (trace->set_flag)
4463 		ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4464 	if (ret)
4465 		return ret;
4466 
4467 	if (neg)
4468 		tracer_flags->val &= ~opts->bit;
4469 	else
4470 		tracer_flags->val |= opts->bit;
4471 	return 0;
4472 }
4473 
4474 /* Try to assign a tracer specific option */
4475 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4476 {
4477 	struct tracer_flags *tracer_flags = tr->current_trace_flags;
4478 	struct tracer_opt *opts = NULL;
4479 	int i;
4480 
4481 	if (!tracer_flags || !tracer_flags->opts)
4482 		return 0;
4483 
4484 	for (i = 0; tracer_flags->opts[i].name; i++) {
4485 		opts = &tracer_flags->opts[i];
4486 
4487 		if (strcmp(cmp, opts->name) == 0)
4488 			return __set_tracer_option(tr, tracer_flags, opts, neg);
4489 	}
4490 
4491 	return -EINVAL;
4492 }
4493 
4494 /* Some tracers require overwrite to stay enabled */
4495 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
4496 {
4497 	if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
4498 		return -1;
4499 
4500 	return 0;
4501 }
4502 
4503 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
4504 {
4505 	switch (mask) {
4506 	case TRACE_ITER(RECORD_TGID):
4507 	case TRACE_ITER(RECORD_CMD):
4508 	case TRACE_ITER(TRACE_PRINTK):
4509 	case TRACE_ITER(COPY_MARKER):
4510 		lockdep_assert_held(&event_mutex);
4511 	}
4512 
4513 	/* do nothing if flag is already set */
4514 	if (!!(tr->trace_flags & mask) == !!enabled)
4515 		return 0;
4516 
4517 	/* Give the tracer a chance to approve the change */
4518 	if (tr->current_trace->flag_changed)
4519 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4520 			return -EINVAL;
4521 
4522 	switch (mask) {
4523 	case TRACE_ITER(TRACE_PRINTK):
4524 		if (enabled) {
4525 			update_printk_trace(tr);
4526 		} else {
4527 			/*
4528 			 * The global_trace cannot clear this.
4529 			 * It's flag only gets cleared if another instance sets it.
4530 			 */
4531 			if (printk_trace == &global_trace)
4532 				return -EINVAL;
4533 			/*
4534 			 * An instance must always have it set.
4535 			 * by default, that's the global_trace instance.
4536 			 */
4537 			if (printk_trace == tr)
4538 				update_printk_trace(&global_trace);
4539 		}
4540 		break;
4541 
4542 	case TRACE_ITER(COPY_MARKER):
4543 		update_marker_trace(tr, enabled);
4544 		/* update_marker_trace updates the tr->trace_flags */
4545 		return 0;
4546 	}
4547 
4548 	if (enabled)
4549 		tr->trace_flags |= mask;
4550 	else
4551 		tr->trace_flags &= ~mask;
4552 
4553 	switch (mask) {
4554 	case TRACE_ITER(RECORD_CMD):
4555 		trace_event_enable_cmd_record(enabled);
4556 		break;
4557 
4558 	case TRACE_ITER(RECORD_TGID):
4559 
4560 		if (trace_alloc_tgid_map() < 0) {
4561 			tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
4562 			return -ENOMEM;
4563 		}
4564 
4565 		trace_event_enable_tgid_record(enabled);
4566 		break;
4567 
4568 	case TRACE_ITER(EVENT_FORK):
4569 		trace_event_follow_fork(tr, enabled);
4570 		break;
4571 
4572 	case TRACE_ITER(FUNC_FORK):
4573 		ftrace_pid_follow_fork(tr, enabled);
4574 		break;
4575 
4576 	case TRACE_ITER(OVERWRITE):
4577 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4578 #ifdef CONFIG_TRACER_SNAPSHOT
4579 		ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled);
4580 #endif
4581 		break;
4582 
4583 	case TRACE_ITER(PRINTK):
4584 		trace_printk_start_stop_comm(enabled);
4585 		trace_printk_control(enabled);
4586 		break;
4587 
4588 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
4589 	case TRACE_GRAPH_GRAPH_TIME:
4590 		ftrace_graph_graph_time_control(enabled);
4591 		break;
4592 #endif
4593 	}
4594 
4595 	return 0;
4596 }
4597 
4598 int trace_set_options(struct trace_array *tr, char *option)
4599 {
4600 	char *cmp;
4601 	int neg = 0;
4602 	int ret;
4603 	size_t orig_len = strlen(option);
4604 	int len;
4605 
4606 	cmp = strstrip(option);
4607 
4608 	len = str_has_prefix(cmp, "no");
4609 	if (len)
4610 		neg = 1;
4611 
4612 	cmp += len;
4613 
4614 	mutex_lock(&event_mutex);
4615 	mutex_lock(&trace_types_lock);
4616 
4617 	ret = match_string(trace_options, -1, cmp);
4618 	/* If no option could be set, test the specific tracer options */
4619 	if (ret < 0)
4620 		ret = set_tracer_option(tr, cmp, neg);
4621 	else
4622 		ret = set_tracer_flag(tr, 1ULL << ret, !neg);
4623 
4624 	mutex_unlock(&trace_types_lock);
4625 	mutex_unlock(&event_mutex);
4626 
4627 	/*
4628 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4629 	 * turn it back into a space.
4630 	 */
4631 	if (orig_len > strlen(option))
4632 		option[strlen(option)] = ' ';
4633 
4634 	return ret;
4635 }
4636 
4637 static void __init apply_trace_boot_options(void)
4638 {
4639 	char *buf = trace_boot_options_buf;
4640 	char *option;
4641 
4642 	while (true) {
4643 		option = strsep(&buf, ",");
4644 
4645 		if (!option)
4646 			break;
4647 
4648 		if (*option)
4649 			trace_set_options(&global_trace, option);
4650 
4651 		/* Put back the comma to allow this to be called again */
4652 		if (buf)
4653 			*(buf - 1) = ',';
4654 	}
4655 }
4656 
4657 static ssize_t
4658 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4659 			size_t cnt, loff_t *ppos)
4660 {
4661 	struct seq_file *m = filp->private_data;
4662 	struct trace_array *tr = m->private;
4663 	char buf[64];
4664 	int ret;
4665 
4666 	if (cnt >= sizeof(buf))
4667 		return -EINVAL;
4668 
4669 	if (copy_from_user(buf, ubuf, cnt))
4670 		return -EFAULT;
4671 
4672 	buf[cnt] = 0;
4673 
4674 	ret = trace_set_options(tr, buf);
4675 	if (ret < 0)
4676 		return ret;
4677 
4678 	*ppos += cnt;
4679 
4680 	return cnt;
4681 }
4682 
4683 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4684 {
4685 	struct trace_array *tr = inode->i_private;
4686 	int ret;
4687 
4688 	ret = tracing_check_open_get_tr(tr);
4689 	if (ret)
4690 		return ret;
4691 
4692 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4693 	if (ret < 0)
4694 		trace_array_put(tr);
4695 
4696 	return ret;
4697 }
4698 
4699 static const struct file_operations tracing_iter_fops = {
4700 	.open		= tracing_trace_options_open,
4701 	.read		= seq_read,
4702 	.llseek		= seq_lseek,
4703 	.release	= tracing_single_release_tr,
4704 	.write		= tracing_trace_options_write,
4705 };
4706 
4707 static const char readme_msg[] =
4708 	"tracing mini-HOWTO:\n\n"
4709 	"By default tracefs removes all OTH file permission bits.\n"
4710 	"When mounting tracefs an optional group id can be specified\n"
4711 	"which adds the group to every directory and file in tracefs:\n\n"
4712 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
4713 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4714 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4715 	" Important files:\n"
4716 	"  trace\t\t\t- The static contents of the buffer\n"
4717 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4718 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4719 	"  current_tracer\t- function and latency tracers\n"
4720 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4721 	"  error_log\t- error log for failed commands (that support it)\n"
4722 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4723 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4724 	"  trace_clock\t\t- change the clock used to order events\n"
4725 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4726 	"      global:   Synced across CPUs but slows tracing down.\n"
4727 	"     counter:   Not a clock, but just an increment\n"
4728 	"      uptime:   Jiffy counter from time of boot\n"
4729 	"        perf:   Same clock that perf events use\n"
4730 #ifdef CONFIG_X86_64
4731 	"     x86-tsc:   TSC cycle counter\n"
4732 #endif
4733 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
4734 	"       delta:   Delta difference against a buffer-wide timestamp\n"
4735 	"    absolute:   Absolute (standalone) timestamp\n"
4736 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4737 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4738 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4739 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4740 	"\t\t\t  Remove sub-buffer with rmdir\n"
4741 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4742 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
4743 	"\t\t\t  option name\n"
4744 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4745 #ifdef CONFIG_DYNAMIC_FTRACE
4746 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4747 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4748 	"\t\t\t  functions\n"
4749 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4750 	"\t     modules: Can select a group via module\n"
4751 	"\t      Format: :mod:<module-name>\n"
4752 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4753 	"\t    triggers: a command to perform when function is hit\n"
4754 	"\t      Format: <function>:<trigger>[:count]\n"
4755 	"\t     trigger: traceon, traceoff\n"
4756 	"\t\t      enable_event:<system>:<event>\n"
4757 	"\t\t      disable_event:<system>:<event>\n"
4758 #ifdef CONFIG_STACKTRACE
4759 	"\t\t      stacktrace\n"
4760 #endif
4761 #ifdef CONFIG_TRACER_SNAPSHOT
4762 	"\t\t      snapshot\n"
4763 #endif
4764 	"\t\t      dump\n"
4765 	"\t\t      cpudump\n"
4766 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4767 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4768 	"\t     The first one will disable tracing every time do_fault is hit\n"
4769 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4770 	"\t       The first time do trap is hit and it disables tracing, the\n"
4771 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4772 	"\t       the counter will not decrement. It only decrements when the\n"
4773 	"\t       trigger did work\n"
4774 	"\t     To remove trigger without count:\n"
4775 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4776 	"\t     To remove trigger with a count:\n"
4777 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4778 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4779 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4780 	"\t    modules: Can select a group via module command :mod:\n"
4781 	"\t    Does not accept triggers\n"
4782 #endif /* CONFIG_DYNAMIC_FTRACE */
4783 #ifdef CONFIG_FUNCTION_TRACER
4784 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4785 	"\t\t    (function)\n"
4786 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
4787 	"\t\t    (function)\n"
4788 #endif
4789 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4790 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4791 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4792 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4793 #endif
4794 #ifdef CONFIG_TRACER_SNAPSHOT
4795 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4796 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4797 	"\t\t\t  information\n"
4798 #endif
4799 #ifdef CONFIG_STACK_TRACER
4800 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4801 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4802 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4803 	"\t\t\t  new trace)\n"
4804 #ifdef CONFIG_DYNAMIC_FTRACE
4805 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4806 	"\t\t\t  traces\n"
4807 #endif
4808 #endif /* CONFIG_STACK_TRACER */
4809 #ifdef CONFIG_DYNAMIC_EVENTS
4810 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4811 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4812 #endif
4813 #ifdef CONFIG_KPROBE_EVENTS
4814 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4815 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4816 #endif
4817 #ifdef CONFIG_UPROBE_EVENTS
4818 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4819 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4820 #endif
4821 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
4822     defined(CONFIG_FPROBE_EVENTS)
4823 	"\t  accepts: event-definitions (one definition per line)\n"
4824 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4825 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
4826 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
4827 #endif
4828 #ifdef CONFIG_FPROBE_EVENTS
4829 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
4830 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
4831 #endif
4832 #ifdef CONFIG_HIST_TRIGGERS
4833 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
4834 #endif
4835 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
4836 	"\t           -:[<group>/][<event>]\n"
4837 #ifdef CONFIG_KPROBE_EVENTS
4838 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4839   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
4840 #endif
4841 #ifdef CONFIG_UPROBE_EVENTS
4842   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
4843 #endif
4844 	"\t     args: <name>=fetcharg[:type]\n"
4845 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
4846 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4847 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4848 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
4849 	"\t           <argname>[->field[->field|.field...]],\n"
4850 #endif
4851 #else
4852 	"\t           $stack<index>, $stack, $retval, $comm,\n"
4853 #endif
4854 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4855 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
4856 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
4857 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4858 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
4859 #ifdef CONFIG_HIST_TRIGGERS
4860 	"\t    field: <stype> <name>;\n"
4861 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4862 	"\t           [unsigned] char/int/long\n"
4863 #endif
4864 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
4865 	"\t            of the <attached-group>/<attached-event>.\n"
4866 #endif
4867 	"  set_event\t\t- Enables events by name written into it\n"
4868 	"\t\t\t  Can enable module events via: :mod:<module>\n"
4869 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4870 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4871 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4872 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4873 	"\t\t\t  events\n"
4874 	"      filter\t\t- If set, only events passing filter are traced\n"
4875 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4876 	"\t\t\t  <event>:\n"
4877 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4878 	"      filter\t\t- If set, only events passing filter are traced\n"
4879 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4880 	"\t    Format: <trigger>[:count][if <filter>]\n"
4881 	"\t   trigger: traceon, traceoff\n"
4882 	"\t            enable_event:<system>:<event>\n"
4883 	"\t            disable_event:<system>:<event>\n"
4884 #ifdef CONFIG_HIST_TRIGGERS
4885 	"\t            enable_hist:<system>:<event>\n"
4886 	"\t            disable_hist:<system>:<event>\n"
4887 #endif
4888 #ifdef CONFIG_STACKTRACE
4889 	"\t\t    stacktrace\n"
4890 #endif
4891 #ifdef CONFIG_TRACER_SNAPSHOT
4892 	"\t\t    snapshot\n"
4893 #endif
4894 #ifdef CONFIG_HIST_TRIGGERS
4895 	"\t\t    hist (see below)\n"
4896 #endif
4897 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4898 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4899 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4900 	"\t                  events/block/block_unplug/trigger\n"
4901 	"\t   The first disables tracing every time block_unplug is hit.\n"
4902 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4903 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4904 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4905 	"\t   Like function triggers, the counter is only decremented if it\n"
4906 	"\t    enabled or disabled tracing.\n"
4907 	"\t   To remove a trigger without a count:\n"
4908 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4909 	"\t   To remove a trigger with a count:\n"
4910 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4911 	"\t   Filters can be ignored when removing a trigger.\n"
4912 #ifdef CONFIG_HIST_TRIGGERS
4913 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4914 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4915 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
4916 	"\t            [:values=<field1[,field2,...]>]\n"
4917 	"\t            [:sort=<field1[,field2,...]>]\n"
4918 	"\t            [:size=#entries]\n"
4919 	"\t            [:pause][:continue][:clear]\n"
4920 	"\t            [:name=histname1]\n"
4921 	"\t            [:nohitcount]\n"
4922 	"\t            [:<handler>.<action>]\n"
4923 	"\t            [if <filter>]\n\n"
4924 	"\t    Note, special fields can be used as well:\n"
4925 	"\t            common_timestamp - to record current timestamp\n"
4926 	"\t            common_cpu - to record the CPU the event happened on\n"
4927 	"\n"
4928 	"\t    A hist trigger variable can be:\n"
4929 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
4930 	"\t        - a reference to another variable e.g. y=$x,\n"
4931 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
4932 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
4933 	"\n"
4934 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
4935 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
4936 	"\t    variable reference, field or numeric literal.\n"
4937 	"\n"
4938 	"\t    When a matching event is hit, an entry is added to a hash\n"
4939 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4940 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4941 	"\t    correspond to fields in the event's format description.  Keys\n"
4942 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
4943 	"\t    Compound keys consisting of up to two fields can be specified\n"
4944 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4945 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4946 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4947 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4948 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4949 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4950 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4951 	"\t    its histogram data will be shared with other triggers of the\n"
4952 	"\t    same name, and trigger hits will update this common data.\n\n"
4953 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4954 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4955 	"\t    triggers attached to an event, there will be a table for each\n"
4956 	"\t    trigger in the output.  The table displayed for a named\n"
4957 	"\t    trigger will be the same as any other instance having the\n"
4958 	"\t    same name.  The default format used to display a given field\n"
4959 	"\t    can be modified by appending any of the following modifiers\n"
4960 	"\t    to the field name, as applicable:\n\n"
4961 	"\t            .hex        display a number as a hex value\n"
4962 	"\t            .sym        display an address as a symbol\n"
4963 	"\t            .sym-offset display an address as a symbol and offset\n"
4964 	"\t            .execname   display a common_pid as a program name\n"
4965 	"\t            .syscall    display a syscall id as a syscall name\n"
4966 	"\t            .log2       display log2 value rather than raw number\n"
4967 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
4968 	"\t            .usecs      display a common_timestamp in microseconds\n"
4969 	"\t            .percent    display a number of percentage value\n"
4970 	"\t            .graph      display a bar-graph of a value\n\n"
4971 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4972 	"\t    trigger or to start a hist trigger but not log any events\n"
4973 	"\t    until told to do so.  'continue' can be used to start or\n"
4974 	"\t    restart a paused hist trigger.\n\n"
4975 	"\t    The 'clear' parameter will clear the contents of a running\n"
4976 	"\t    hist trigger and leave its current paused/active state\n"
4977 	"\t    unchanged.\n\n"
4978 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
4979 	"\t    raw hitcount in the histogram.\n\n"
4980 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4981 	"\t    have one event conditionally start and stop another event's\n"
4982 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
4983 	"\t    the enable_event and disable_event triggers.\n\n"
4984 	"\t    Hist trigger handlers and actions are executed whenever a\n"
4985 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
4986 	"\t        <handler>.<action>\n\n"
4987 	"\t    The available handlers are:\n\n"
4988 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
4989 	"\t        onmax(var)               - invoke if var exceeds current max\n"
4990 	"\t        onchange(var)            - invoke action if var changes\n\n"
4991 	"\t    The available actions are:\n\n"
4992 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4993 	"\t        save(field,...)                      - save current event fields\n"
4994 #ifdef CONFIG_TRACER_SNAPSHOT
4995 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
4996 #endif
4997 #ifdef CONFIG_SYNTH_EVENTS
4998 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
4999 	"\t  Write into this file to define/undefine new synthetic events.\n"
5000 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5001 #endif
5002 #endif
5003 ;
5004 
5005 static ssize_t
5006 tracing_readme_read(struct file *filp, char __user *ubuf,
5007 		       size_t cnt, loff_t *ppos)
5008 {
5009 	return simple_read_from_buffer(ubuf, cnt, ppos,
5010 					readme_msg, strlen(readme_msg));
5011 }
5012 
5013 static const struct file_operations tracing_readme_fops = {
5014 	.open		= tracing_open_generic,
5015 	.read		= tracing_readme_read,
5016 	.llseek		= generic_file_llseek,
5017 };
5018 
5019 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5020 static union trace_eval_map_item *
5021 update_eval_map(union trace_eval_map_item *ptr)
5022 {
5023 	if (!ptr->map.eval_string) {
5024 		if (ptr->tail.next) {
5025 			ptr = ptr->tail.next;
5026 			/* Set ptr to the next real item (skip head) */
5027 			ptr++;
5028 		} else
5029 			return NULL;
5030 	}
5031 	return ptr;
5032 }
5033 
5034 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5035 {
5036 	union trace_eval_map_item *ptr = v;
5037 
5038 	/*
5039 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5040 	 * This really should never happen.
5041 	 */
5042 	(*pos)++;
5043 	ptr = update_eval_map(ptr);
5044 	if (WARN_ON_ONCE(!ptr))
5045 		return NULL;
5046 
5047 	ptr++;
5048 	ptr = update_eval_map(ptr);
5049 
5050 	return ptr;
5051 }
5052 
5053 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5054 {
5055 	union trace_eval_map_item *v;
5056 	loff_t l = 0;
5057 
5058 	mutex_lock(&trace_eval_mutex);
5059 
5060 	v = trace_eval_maps;
5061 	if (v)
5062 		v++;
5063 
5064 	while (v && l < *pos) {
5065 		v = eval_map_next(m, v, &l);
5066 	}
5067 
5068 	return v;
5069 }
5070 
5071 static void eval_map_stop(struct seq_file *m, void *v)
5072 {
5073 	mutex_unlock(&trace_eval_mutex);
5074 }
5075 
5076 static int eval_map_show(struct seq_file *m, void *v)
5077 {
5078 	union trace_eval_map_item *ptr = v;
5079 
5080 	seq_printf(m, "%s %ld (%s)\n",
5081 		   ptr->map.eval_string, ptr->map.eval_value,
5082 		   ptr->map.system);
5083 
5084 	return 0;
5085 }
5086 
5087 static const struct seq_operations tracing_eval_map_seq_ops = {
5088 	.start		= eval_map_start,
5089 	.next		= eval_map_next,
5090 	.stop		= eval_map_stop,
5091 	.show		= eval_map_show,
5092 };
5093 
5094 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5095 {
5096 	int ret;
5097 
5098 	ret = tracing_check_open_get_tr(NULL);
5099 	if (ret)
5100 		return ret;
5101 
5102 	return seq_open(filp, &tracing_eval_map_seq_ops);
5103 }
5104 
5105 static const struct file_operations tracing_eval_map_fops = {
5106 	.open		= tracing_eval_map_open,
5107 	.read		= seq_read,
5108 	.llseek		= seq_lseek,
5109 	.release	= seq_release,
5110 };
5111 
5112 static inline union trace_eval_map_item *
5113 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5114 {
5115 	/* Return tail of array given the head */
5116 	return ptr + ptr->head.length + 1;
5117 }
5118 
5119 static void
5120 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5121 			   int len)
5122 {
5123 	struct trace_eval_map **stop;
5124 	struct trace_eval_map **map;
5125 	union trace_eval_map_item *map_array;
5126 	union trace_eval_map_item *ptr;
5127 
5128 	stop = start + len;
5129 
5130 	/*
5131 	 * The trace_eval_maps contains the map plus a head and tail item,
5132 	 * where the head holds the module and length of array, and the
5133 	 * tail holds a pointer to the next list.
5134 	 */
5135 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5136 	if (!map_array) {
5137 		pr_warn("Unable to allocate trace eval mapping\n");
5138 		return;
5139 	}
5140 
5141 	guard(mutex)(&trace_eval_mutex);
5142 
5143 	if (!trace_eval_maps)
5144 		trace_eval_maps = map_array;
5145 	else {
5146 		ptr = trace_eval_maps;
5147 		for (;;) {
5148 			ptr = trace_eval_jmp_to_tail(ptr);
5149 			if (!ptr->tail.next)
5150 				break;
5151 			ptr = ptr->tail.next;
5152 
5153 		}
5154 		ptr->tail.next = map_array;
5155 	}
5156 	map_array->head.mod = mod;
5157 	map_array->head.length = len;
5158 	map_array++;
5159 
5160 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5161 		map_array->map = **map;
5162 		map_array++;
5163 	}
5164 	memset(map_array, 0, sizeof(*map_array));
5165 }
5166 
5167 static void trace_create_eval_file(struct dentry *d_tracer)
5168 {
5169 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5170 			  NULL, &tracing_eval_map_fops);
5171 }
5172 
5173 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5174 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5175 static inline void trace_insert_eval_map_file(struct module *mod,
5176 			      struct trace_eval_map **start, int len) { }
5177 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5178 
5179 static void
5180 trace_event_update_with_eval_map(struct module *mod,
5181 				 struct trace_eval_map **start,
5182 				 int len)
5183 {
5184 	struct trace_eval_map **map;
5185 
5186 	/* Always run sanitizer only if btf_type_tag attr exists. */
5187 	if (len <= 0) {
5188 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5189 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5190 		      __has_attribute(btf_type_tag)))
5191 			return;
5192 	}
5193 
5194 	map = start;
5195 
5196 	trace_event_update_all(map, len);
5197 
5198 	if (len <= 0)
5199 		return;
5200 
5201 	trace_insert_eval_map_file(mod, start, len);
5202 }
5203 
5204 static ssize_t
5205 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5206 		       size_t cnt, loff_t *ppos)
5207 {
5208 	struct trace_array *tr = filp->private_data;
5209 	char buf[MAX_TRACER_SIZE+2];
5210 	int r;
5211 
5212 	scoped_guard(mutex, &trace_types_lock) {
5213 		r = sprintf(buf, "%s\n", tr->current_trace->name);
5214 	}
5215 
5216 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5217 }
5218 
5219 int tracer_init(struct tracer *t, struct trace_array *tr)
5220 {
5221 	tracing_reset_online_cpus(&tr->array_buffer);
5222 	update_last_data_if_empty(tr);
5223 	return t->init(tr);
5224 }
5225 
5226 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5227 {
5228 	int cpu;
5229 
5230 	for_each_tracing_cpu(cpu)
5231 		per_cpu_ptr(buf->data, cpu)->entries = val;
5232 }
5233 
5234 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5235 {
5236 	if (cpu == RING_BUFFER_ALL_CPUS) {
5237 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5238 	} else {
5239 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5240 	}
5241 }
5242 
5243 #ifdef CONFIG_TRACER_SNAPSHOT
5244 /* resize @tr's buffer to the size of @size_tr's entries */
5245 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5246 					struct array_buffer *size_buf, int cpu_id)
5247 {
5248 	int cpu, ret = 0;
5249 
5250 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5251 		for_each_tracing_cpu(cpu) {
5252 			ret = ring_buffer_resize(trace_buf->buffer,
5253 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5254 			if (ret < 0)
5255 				break;
5256 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5257 				per_cpu_ptr(size_buf->data, cpu)->entries;
5258 		}
5259 	} else {
5260 		ret = ring_buffer_resize(trace_buf->buffer,
5261 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5262 		if (ret == 0)
5263 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5264 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5265 	}
5266 
5267 	return ret;
5268 }
5269 #endif /* CONFIG_TRACER_SNAPSHOT */
5270 
5271 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5272 					unsigned long size, int cpu)
5273 {
5274 	int ret;
5275 
5276 	/*
5277 	 * If kernel or user changes the size of the ring buffer
5278 	 * we use the size that was given, and we can forget about
5279 	 * expanding it later.
5280 	 */
5281 	trace_set_ring_buffer_expanded(tr);
5282 
5283 	/* May be called before buffers are initialized */
5284 	if (!tr->array_buffer.buffer)
5285 		return 0;
5286 
5287 	/* Do not allow tracing while resizing ring buffer */
5288 	tracing_stop_tr(tr);
5289 
5290 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5291 	if (ret < 0)
5292 		goto out_start;
5293 
5294 #ifdef CONFIG_TRACER_SNAPSHOT
5295 	if (!tr->allocated_snapshot)
5296 		goto out;
5297 
5298 	ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu);
5299 	if (ret < 0) {
5300 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5301 						     &tr->array_buffer, cpu);
5302 		if (r < 0) {
5303 			/*
5304 			 * AARGH! We are left with different
5305 			 * size max buffer!!!!
5306 			 * The max buffer is our "snapshot" buffer.
5307 			 * When a tracer needs a snapshot (one of the
5308 			 * latency tracers), it swaps the max buffer
5309 			 * with the saved snap shot. We succeeded to
5310 			 * update the size of the main buffer, but failed to
5311 			 * update the size of the max buffer. But when we tried
5312 			 * to reset the main buffer to the original size, we
5313 			 * failed there too. This is very unlikely to
5314 			 * happen, but if it does, warn and kill all
5315 			 * tracing.
5316 			 */
5317 			WARN_ON(1);
5318 			tracing_disabled = 1;
5319 		}
5320 		goto out_start;
5321 	}
5322 
5323 	update_buffer_entries(&tr->snapshot_buffer, cpu);
5324 
5325  out:
5326 #endif /* CONFIG_TRACER_SNAPSHOT */
5327 
5328 	update_buffer_entries(&tr->array_buffer, cpu);
5329  out_start:
5330 	tracing_start_tr(tr);
5331 	return ret;
5332 }
5333 
5334 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5335 				  unsigned long size, int cpu_id)
5336 {
5337 	guard(mutex)(&trace_types_lock);
5338 
5339 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5340 		/* make sure, this cpu is enabled in the mask */
5341 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5342 			return -EINVAL;
5343 	}
5344 
5345 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
5346 }
5347 
5348 struct trace_mod_entry {
5349 	unsigned long	mod_addr;
5350 	char		mod_name[MODULE_NAME_LEN];
5351 };
5352 
5353 struct trace_scratch {
5354 	unsigned int		clock_id;
5355 	unsigned long		text_addr;
5356 	unsigned long		nr_entries;
5357 	struct trace_mod_entry	entries[];
5358 };
5359 
5360 static DEFINE_MUTEX(scratch_mutex);
5361 
5362 static int cmp_mod_entry(const void *key, const void *pivot)
5363 {
5364 	unsigned long addr = (unsigned long)key;
5365 	const struct trace_mod_entry *ent = pivot;
5366 
5367 	if (addr < ent[0].mod_addr)
5368 		return -1;
5369 
5370 	return addr >= ent[1].mod_addr;
5371 }
5372 
5373 /**
5374  * trace_adjust_address() - Adjust prev boot address to current address.
5375  * @tr: Persistent ring buffer's trace_array.
5376  * @addr: Address in @tr which is adjusted.
5377  */
5378 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
5379 {
5380 	struct trace_module_delta *module_delta;
5381 	struct trace_scratch *tscratch;
5382 	struct trace_mod_entry *entry;
5383 	unsigned long raddr;
5384 	int idx = 0, nr_entries;
5385 
5386 	/* If we don't have last boot delta, return the address */
5387 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5388 		return addr;
5389 
5390 	/* tr->module_delta must be protected by rcu. */
5391 	guard(rcu)();
5392 	tscratch = tr->scratch;
5393 	/* if there is no tscrach, module_delta must be NULL. */
5394 	module_delta = READ_ONCE(tr->module_delta);
5395 	if (!module_delta || !tscratch->nr_entries ||
5396 	    tscratch->entries[0].mod_addr > addr) {
5397 		raddr = addr + tr->text_delta;
5398 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
5399 			is_kernel_rodata(raddr) ? raddr : addr;
5400 	}
5401 
5402 	/* Note that entries must be sorted. */
5403 	nr_entries = tscratch->nr_entries;
5404 	if (nr_entries == 1 ||
5405 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
5406 		idx = nr_entries - 1;
5407 	else {
5408 		entry = __inline_bsearch((void *)addr,
5409 				tscratch->entries,
5410 				nr_entries - 1,
5411 				sizeof(tscratch->entries[0]),
5412 				cmp_mod_entry);
5413 		if (entry)
5414 			idx = entry - tscratch->entries;
5415 	}
5416 
5417 	return addr + module_delta->delta[idx];
5418 }
5419 
5420 #ifdef CONFIG_MODULES
5421 static int save_mod(struct module *mod, void *data)
5422 {
5423 	struct trace_array *tr = data;
5424 	struct trace_scratch *tscratch;
5425 	struct trace_mod_entry *entry;
5426 	unsigned int size;
5427 
5428 	tscratch = tr->scratch;
5429 	if (!tscratch)
5430 		return -1;
5431 	size = tr->scratch_size;
5432 
5433 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
5434 		return -1;
5435 
5436 	entry = &tscratch->entries[tscratch->nr_entries];
5437 
5438 	tscratch->nr_entries++;
5439 
5440 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
5441 	strscpy(entry->mod_name, mod->name);
5442 
5443 	return 0;
5444 }
5445 #else
5446 static int save_mod(struct module *mod, void *data)
5447 {
5448 	return 0;
5449 }
5450 #endif
5451 
5452 static void update_last_data(struct trace_array *tr)
5453 {
5454 	struct trace_module_delta *module_delta;
5455 	struct trace_scratch *tscratch;
5456 
5457 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
5458 		return;
5459 
5460 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5461 		return;
5462 
5463 	/* Only if the buffer has previous boot data clear and update it. */
5464 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
5465 
5466 	/* Reset the module list and reload them */
5467 	if (tr->scratch) {
5468 		struct trace_scratch *tscratch = tr->scratch;
5469 
5470 		tscratch->clock_id = tr->clock_id;
5471 		memset(tscratch->entries, 0,
5472 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
5473 		tscratch->nr_entries = 0;
5474 
5475 		guard(mutex)(&scratch_mutex);
5476 		module_for_each_mod(save_mod, tr);
5477 	}
5478 
5479 	/*
5480 	 * Need to clear all CPU buffers as there cannot be events
5481 	 * from the previous boot mixed with events with this boot
5482 	 * as that will cause a confusing trace. Need to clear all
5483 	 * CPU buffers, even for those that may currently be offline.
5484 	 */
5485 	tracing_reset_all_cpus(&tr->array_buffer);
5486 
5487 	/* Using current data now */
5488 	tr->text_delta = 0;
5489 
5490 	if (!tr->scratch)
5491 		return;
5492 
5493 	tscratch = tr->scratch;
5494 	module_delta = READ_ONCE(tr->module_delta);
5495 	WRITE_ONCE(tr->module_delta, NULL);
5496 	kfree_rcu(module_delta, rcu);
5497 
5498 	/* Set the persistent ring buffer meta data to this address */
5499 	tscratch->text_addr = (unsigned long)_text;
5500 }
5501 
5502 /**
5503  * tracing_update_buffers - used by tracing facility to expand ring buffers
5504  * @tr: The tracing instance
5505  *
5506  * To save on memory when the tracing is never used on a system with it
5507  * configured in. The ring buffers are set to a minimum size. But once
5508  * a user starts to use the tracing facility, then they need to grow
5509  * to their default size.
5510  *
5511  * This function is to be called when a tracer is about to be used.
5512  */
5513 int tracing_update_buffers(struct trace_array *tr)
5514 {
5515 	int ret = 0;
5516 
5517 	if (!tr)
5518 		tr = &global_trace;
5519 
5520 	guard(mutex)(&trace_types_lock);
5521 
5522 	update_last_data(tr);
5523 
5524 	if (!tr->ring_buffer_expanded)
5525 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5526 						RING_BUFFER_ALL_CPUS);
5527 	return ret;
5528 }
5529 
5530 /*
5531  * Used to clear out the tracer before deletion of an instance.
5532  * Must have trace_types_lock held.
5533  */
5534 static void tracing_set_nop(struct trace_array *tr)
5535 {
5536 	if (tr->current_trace == &nop_trace)
5537 		return;
5538 
5539 	tr->current_trace->enabled--;
5540 
5541 	if (tr->current_trace->reset)
5542 		tr->current_trace->reset(tr);
5543 
5544 	tr->current_trace = &nop_trace;
5545 	tr->current_trace_flags = nop_trace.flags;
5546 }
5547 
5548 static bool tracer_options_updated;
5549 
5550 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5551 {
5552 	struct tracer *trace = NULL;
5553 	struct tracers *t;
5554 	bool had_max_tr;
5555 	int ret;
5556 
5557 	guard(mutex)(&trace_types_lock);
5558 
5559 	update_last_data(tr);
5560 
5561 	if (!tr->ring_buffer_expanded) {
5562 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5563 						RING_BUFFER_ALL_CPUS);
5564 		if (ret < 0)
5565 			return ret;
5566 		ret = 0;
5567 	}
5568 
5569 	list_for_each_entry(t, &tr->tracers, list) {
5570 		if (strcmp(t->tracer->name, buf) == 0) {
5571 			trace = t->tracer;
5572 			break;
5573 		}
5574 	}
5575 	if (!trace)
5576 		return -EINVAL;
5577 
5578 	if (trace == tr->current_trace)
5579 		return 0;
5580 
5581 #ifdef CONFIG_TRACER_SNAPSHOT
5582 	if (tracer_uses_snapshot(trace)) {
5583 		local_irq_disable();
5584 		arch_spin_lock(&tr->max_lock);
5585 		ret = tr->cond_snapshot ? -EBUSY : 0;
5586 		arch_spin_unlock(&tr->max_lock);
5587 		local_irq_enable();
5588 		if (ret)
5589 			return ret;
5590 	}
5591 #endif
5592 	/* Some tracers won't work on kernel command line */
5593 	if (system_state < SYSTEM_RUNNING && trace->noboot) {
5594 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5595 			trace->name);
5596 		return -EINVAL;
5597 	}
5598 
5599 	/* Some tracers are only allowed for the top level buffer */
5600 	if (!trace_ok_for_array(trace, tr))
5601 		return -EINVAL;
5602 
5603 	/* If trace pipe files are being read, we can't change the tracer */
5604 	if (tr->trace_ref)
5605 		return -EBUSY;
5606 
5607 	trace_branch_disable();
5608 
5609 	tr->current_trace->enabled--;
5610 
5611 	if (tr->current_trace->reset)
5612 		tr->current_trace->reset(tr);
5613 
5614 	had_max_tr = tracer_uses_snapshot(tr->current_trace);
5615 
5616 	/* Current trace needs to be nop_trace before synchronize_rcu */
5617 	tr->current_trace = &nop_trace;
5618 	tr->current_trace_flags = nop_trace.flags;
5619 
5620 	if (had_max_tr && !tracer_uses_snapshot(trace)) {
5621 		/*
5622 		 * We need to make sure that the update_max_tr sees that
5623 		 * current_trace changed to nop_trace to keep it from
5624 		 * swapping the buffers after we resize it.
5625 		 * The update_max_tr is called from interrupts disabled
5626 		 * so a synchronized_sched() is sufficient.
5627 		 */
5628 		synchronize_rcu();
5629 		free_snapshot(tr);
5630 		tracing_disarm_snapshot(tr);
5631 	}
5632 
5633 	if (!had_max_tr && tracer_uses_snapshot(trace)) {
5634 		ret = tracing_arm_snapshot_locked(tr);
5635 		if (ret)
5636 			return ret;
5637 	}
5638 
5639 	tr->current_trace_flags = t->flags ? : t->tracer->flags;
5640 
5641 	if (trace->init) {
5642 		ret = tracer_init(trace, tr);
5643 		if (ret) {
5644 			if (tracer_uses_snapshot(trace))
5645 				tracing_disarm_snapshot(tr);
5646 			tr->current_trace_flags = nop_trace.flags;
5647 			return ret;
5648 		}
5649 	}
5650 
5651 	tr->current_trace = trace;
5652 	tr->current_trace->enabled++;
5653 	trace_branch_enable(tr);
5654 
5655 	return 0;
5656 }
5657 
5658 static ssize_t
5659 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5660 			size_t cnt, loff_t *ppos)
5661 {
5662 	struct trace_array *tr = filp->private_data;
5663 	char buf[MAX_TRACER_SIZE+1];
5664 	char *name;
5665 	size_t ret;
5666 	int err;
5667 
5668 	ret = cnt;
5669 
5670 	if (cnt > MAX_TRACER_SIZE)
5671 		cnt = MAX_TRACER_SIZE;
5672 
5673 	if (copy_from_user(buf, ubuf, cnt))
5674 		return -EFAULT;
5675 
5676 	buf[cnt] = 0;
5677 
5678 	name = strim(buf);
5679 
5680 	err = tracing_set_tracer(tr, name);
5681 	if (err)
5682 		return err;
5683 
5684 	*ppos += ret;
5685 
5686 	return ret;
5687 }
5688 
5689 static ssize_t
5690 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5691 		   size_t cnt, loff_t *ppos)
5692 {
5693 	char buf[64];
5694 	int r;
5695 
5696 	r = snprintf(buf, sizeof(buf), "%ld\n",
5697 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5698 	if (r > sizeof(buf))
5699 		r = sizeof(buf);
5700 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5701 }
5702 
5703 static ssize_t
5704 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5705 		    size_t cnt, loff_t *ppos)
5706 {
5707 	unsigned long val;
5708 	int ret;
5709 
5710 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5711 	if (ret)
5712 		return ret;
5713 
5714 	*ptr = val * 1000;
5715 
5716 	return cnt;
5717 }
5718 
5719 static ssize_t
5720 tracing_thresh_read(struct file *filp, char __user *ubuf,
5721 		    size_t cnt, loff_t *ppos)
5722 {
5723 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5724 }
5725 
5726 static ssize_t
5727 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5728 		     size_t cnt, loff_t *ppos)
5729 {
5730 	struct trace_array *tr = filp->private_data;
5731 	int ret;
5732 
5733 	guard(mutex)(&trace_types_lock);
5734 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5735 	if (ret < 0)
5736 		return ret;
5737 
5738 	if (tr->current_trace->update_thresh) {
5739 		ret = tr->current_trace->update_thresh(tr);
5740 		if (ret < 0)
5741 			return ret;
5742 	}
5743 
5744 	return cnt;
5745 }
5746 
5747 #ifdef CONFIG_TRACER_MAX_TRACE
5748 
5749 static ssize_t
5750 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5751 		     size_t cnt, loff_t *ppos)
5752 {
5753 	struct trace_array *tr = filp->private_data;
5754 
5755 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
5756 }
5757 
5758 static ssize_t
5759 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5760 		      size_t cnt, loff_t *ppos)
5761 {
5762 	struct trace_array *tr = filp->private_data;
5763 
5764 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
5765 }
5766 
5767 #endif
5768 
5769 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
5770 {
5771 	if (cpu == RING_BUFFER_ALL_CPUS) {
5772 		if (cpumask_empty(tr->pipe_cpumask)) {
5773 			cpumask_setall(tr->pipe_cpumask);
5774 			return 0;
5775 		}
5776 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
5777 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
5778 		return 0;
5779 	}
5780 	return -EBUSY;
5781 }
5782 
5783 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
5784 {
5785 	if (cpu == RING_BUFFER_ALL_CPUS) {
5786 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
5787 		cpumask_clear(tr->pipe_cpumask);
5788 	} else {
5789 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
5790 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
5791 	}
5792 }
5793 
5794 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5795 {
5796 	struct trace_array *tr = inode->i_private;
5797 	struct trace_iterator *iter;
5798 	int cpu;
5799 	int ret;
5800 
5801 	ret = tracing_check_open_get_tr(tr);
5802 	if (ret)
5803 		return ret;
5804 
5805 	guard(mutex)(&trace_types_lock);
5806 	cpu = tracing_get_cpu(inode);
5807 	ret = open_pipe_on_cpu(tr, cpu);
5808 	if (ret)
5809 		goto fail_pipe_on_cpu;
5810 
5811 	/* create a buffer to store the information to pass to userspace */
5812 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5813 	if (!iter) {
5814 		ret = -ENOMEM;
5815 		goto fail_alloc_iter;
5816 	}
5817 
5818 	trace_seq_init(&iter->seq);
5819 	iter->trace = tr->current_trace;
5820 
5821 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5822 		ret = -ENOMEM;
5823 		goto fail;
5824 	}
5825 
5826 	/* trace pipe does not show start of buffer */
5827 	cpumask_setall(iter->started);
5828 
5829 	if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
5830 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5831 
5832 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5833 	if (trace_clocks[tr->clock_id].in_ns)
5834 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5835 
5836 	iter->tr = tr;
5837 	iter->array_buffer = &tr->array_buffer;
5838 	iter->cpu_file = cpu;
5839 	mutex_init(&iter->mutex);
5840 	filp->private_data = iter;
5841 
5842 	if (iter->trace->pipe_open)
5843 		iter->trace->pipe_open(iter);
5844 
5845 	nonseekable_open(inode, filp);
5846 
5847 	tr->trace_ref++;
5848 
5849 	return ret;
5850 
5851 fail:
5852 	kfree(iter);
5853 fail_alloc_iter:
5854 	close_pipe_on_cpu(tr, cpu);
5855 fail_pipe_on_cpu:
5856 	__trace_array_put(tr);
5857 	return ret;
5858 }
5859 
5860 static int tracing_release_pipe(struct inode *inode, struct file *file)
5861 {
5862 	struct trace_iterator *iter = file->private_data;
5863 	struct trace_array *tr = inode->i_private;
5864 
5865 	scoped_guard(mutex, &trace_types_lock) {
5866 		tr->trace_ref--;
5867 
5868 		if (iter->trace->pipe_close)
5869 			iter->trace->pipe_close(iter);
5870 		close_pipe_on_cpu(tr, iter->cpu_file);
5871 	}
5872 
5873 	free_trace_iter_content(iter);
5874 	kfree(iter);
5875 
5876 	trace_array_put(tr);
5877 
5878 	return 0;
5879 }
5880 
5881 static __poll_t
5882 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5883 {
5884 	struct trace_array *tr = iter->tr;
5885 
5886 	/* Iterators are static, they should be filled or empty */
5887 	if (trace_buffer_iter(iter, iter->cpu_file))
5888 		return EPOLLIN | EPOLLRDNORM;
5889 
5890 	if (tr->trace_flags & TRACE_ITER(BLOCK))
5891 		/*
5892 		 * Always select as readable when in blocking mode
5893 		 */
5894 		return EPOLLIN | EPOLLRDNORM;
5895 	else
5896 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
5897 					     filp, poll_table, iter->tr->buffer_percent);
5898 }
5899 
5900 static __poll_t
5901 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5902 {
5903 	struct trace_iterator *iter = filp->private_data;
5904 
5905 	return trace_poll(iter, filp, poll_table);
5906 }
5907 
5908 /* Must be called with iter->mutex held. */
5909 static int tracing_wait_pipe(struct file *filp)
5910 {
5911 	struct trace_iterator *iter = filp->private_data;
5912 	int ret;
5913 
5914 	while (trace_empty(iter)) {
5915 
5916 		if ((filp->f_flags & O_NONBLOCK)) {
5917 			return -EAGAIN;
5918 		}
5919 
5920 		/*
5921 		 * We block until we read something and tracing is disabled.
5922 		 * We still block if tracing is disabled, but we have never
5923 		 * read anything. This allows a user to cat this file, and
5924 		 * then enable tracing. But after we have read something,
5925 		 * we give an EOF when tracing is again disabled.
5926 		 *
5927 		 * iter->pos will be 0 if we haven't read anything.
5928 		 */
5929 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5930 			break;
5931 
5932 		mutex_unlock(&iter->mutex);
5933 
5934 		ret = wait_on_pipe(iter, 0);
5935 
5936 		mutex_lock(&iter->mutex);
5937 
5938 		if (ret)
5939 			return ret;
5940 	}
5941 
5942 	return 1;
5943 }
5944 
5945 static bool update_last_data_if_empty(struct trace_array *tr)
5946 {
5947 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5948 		return false;
5949 
5950 	if (!ring_buffer_empty(tr->array_buffer.buffer))
5951 		return false;
5952 
5953 	/*
5954 	 * If the buffer contains the last boot data and all per-cpu
5955 	 * buffers are empty, reset it from the kernel side.
5956 	 */
5957 	update_last_data(tr);
5958 	return true;
5959 }
5960 
5961 /*
5962  * Consumer reader.
5963  */
5964 static ssize_t
5965 tracing_read_pipe(struct file *filp, char __user *ubuf,
5966 		  size_t cnt, loff_t *ppos)
5967 {
5968 	struct trace_iterator *iter = filp->private_data;
5969 	ssize_t sret;
5970 
5971 	/*
5972 	 * Avoid more than one consumer on a single file descriptor
5973 	 * This is just a matter of traces coherency, the ring buffer itself
5974 	 * is protected.
5975 	 */
5976 	guard(mutex)(&iter->mutex);
5977 
5978 	/* return any leftover data */
5979 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5980 	if (sret != -EBUSY)
5981 		return sret;
5982 
5983 	trace_seq_init(&iter->seq);
5984 
5985 	if (iter->trace->read) {
5986 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5987 		if (sret)
5988 			return sret;
5989 	}
5990 
5991 waitagain:
5992 	if (update_last_data_if_empty(iter->tr))
5993 		return 0;
5994 
5995 	sret = tracing_wait_pipe(filp);
5996 	if (sret <= 0)
5997 		return sret;
5998 
5999 	/* stop when tracing is finished */
6000 	if (trace_empty(iter))
6001 		return 0;
6002 
6003 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6004 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6005 
6006 	/* reset all but tr, trace, and overruns */
6007 	trace_iterator_reset(iter);
6008 	cpumask_clear(iter->started);
6009 	trace_seq_init(&iter->seq);
6010 
6011 	trace_event_read_lock();
6012 	trace_access_lock(iter->cpu_file);
6013 	while (trace_find_next_entry_inc(iter) != NULL) {
6014 		enum print_line_t ret;
6015 		int save_len = iter->seq.seq.len;
6016 
6017 		ret = print_trace_line(iter);
6018 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6019 			/*
6020 			 * If one print_trace_line() fills entire trace_seq in one shot,
6021 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6022 			 * In this case, we need to consume it, otherwise, loop will peek
6023 			 * this event next time, resulting in an infinite loop.
6024 			 */
6025 			if (save_len == 0) {
6026 				iter->seq.full = 0;
6027 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6028 				trace_consume(iter);
6029 				break;
6030 			}
6031 
6032 			/* In other cases, don't print partial lines */
6033 			iter->seq.seq.len = save_len;
6034 			break;
6035 		}
6036 		if (ret != TRACE_TYPE_NO_CONSUME)
6037 			trace_consume(iter);
6038 
6039 		if (trace_seq_used(&iter->seq) >= cnt)
6040 			break;
6041 
6042 		/*
6043 		 * Setting the full flag means we reached the trace_seq buffer
6044 		 * size and we should leave by partial output condition above.
6045 		 * One of the trace_seq_* functions is not used properly.
6046 		 */
6047 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6048 			  iter->ent->type);
6049 	}
6050 	trace_access_unlock(iter->cpu_file);
6051 	trace_event_read_unlock();
6052 
6053 	/* Now copy what we have to the user */
6054 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6055 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6056 		trace_seq_init(&iter->seq);
6057 
6058 	/*
6059 	 * If there was nothing to send to user, in spite of consuming trace
6060 	 * entries, go back to wait for more entries.
6061 	 */
6062 	if (sret == -EBUSY)
6063 		goto waitagain;
6064 
6065 	return sret;
6066 }
6067 
6068 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6069 				     unsigned int idx)
6070 {
6071 	__free_page(spd->pages[idx]);
6072 }
6073 
6074 static size_t
6075 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6076 {
6077 	size_t count;
6078 	int save_len;
6079 	int ret;
6080 
6081 	/* Seq buffer is page-sized, exactly what we need. */
6082 	for (;;) {
6083 		save_len = iter->seq.seq.len;
6084 		ret = print_trace_line(iter);
6085 
6086 		if (trace_seq_has_overflowed(&iter->seq)) {
6087 			iter->seq.seq.len = save_len;
6088 			break;
6089 		}
6090 
6091 		/*
6092 		 * This should not be hit, because it should only
6093 		 * be set if the iter->seq overflowed. But check it
6094 		 * anyway to be safe.
6095 		 */
6096 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6097 			iter->seq.seq.len = save_len;
6098 			break;
6099 		}
6100 
6101 		count = trace_seq_used(&iter->seq) - save_len;
6102 		if (rem < count) {
6103 			rem = 0;
6104 			iter->seq.seq.len = save_len;
6105 			break;
6106 		}
6107 
6108 		if (ret != TRACE_TYPE_NO_CONSUME)
6109 			trace_consume(iter);
6110 		rem -= count;
6111 		if (!trace_find_next_entry_inc(iter))	{
6112 			rem = 0;
6113 			iter->ent = NULL;
6114 			break;
6115 		}
6116 	}
6117 
6118 	return rem;
6119 }
6120 
6121 static ssize_t tracing_splice_read_pipe(struct file *filp,
6122 					loff_t *ppos,
6123 					struct pipe_inode_info *pipe,
6124 					size_t len,
6125 					unsigned int flags)
6126 {
6127 	struct page *pages_def[PIPE_DEF_BUFFERS];
6128 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6129 	struct trace_iterator *iter = filp->private_data;
6130 	struct splice_pipe_desc spd = {
6131 		.pages		= pages_def,
6132 		.partial	= partial_def,
6133 		.nr_pages	= 0, /* This gets updated below. */
6134 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6135 		.ops		= &default_pipe_buf_ops,
6136 		.spd_release	= tracing_spd_release_pipe,
6137 	};
6138 	ssize_t ret;
6139 	size_t rem;
6140 	unsigned int i;
6141 
6142 	if (splice_grow_spd(pipe, &spd))
6143 		return -ENOMEM;
6144 
6145 	mutex_lock(&iter->mutex);
6146 
6147 	if (iter->trace->splice_read) {
6148 		ret = iter->trace->splice_read(iter, filp,
6149 					       ppos, pipe, len, flags);
6150 		if (ret)
6151 			goto out_err;
6152 	}
6153 
6154 	ret = tracing_wait_pipe(filp);
6155 	if (ret <= 0)
6156 		goto out_err;
6157 
6158 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6159 		ret = -EFAULT;
6160 		goto out_err;
6161 	}
6162 
6163 	trace_event_read_lock();
6164 	trace_access_lock(iter->cpu_file);
6165 
6166 	/* Fill as many pages as possible. */
6167 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6168 		spd.pages[i] = alloc_page(GFP_KERNEL);
6169 		if (!spd.pages[i])
6170 			break;
6171 
6172 		rem = tracing_fill_pipe_page(rem, iter);
6173 
6174 		/* Copy the data into the page, so we can start over. */
6175 		ret = trace_seq_to_buffer(&iter->seq,
6176 					  page_address(spd.pages[i]),
6177 					  min((size_t)trace_seq_used(&iter->seq),
6178 						  (size_t)PAGE_SIZE));
6179 		if (ret < 0) {
6180 			__free_page(spd.pages[i]);
6181 			break;
6182 		}
6183 		spd.partial[i].offset = 0;
6184 		spd.partial[i].len = ret;
6185 
6186 		trace_seq_init(&iter->seq);
6187 	}
6188 
6189 	trace_access_unlock(iter->cpu_file);
6190 	trace_event_read_unlock();
6191 	mutex_unlock(&iter->mutex);
6192 
6193 	spd.nr_pages = i;
6194 
6195 	if (i)
6196 		ret = splice_to_pipe(pipe, &spd);
6197 	else
6198 		ret = 0;
6199 out:
6200 	splice_shrink_spd(&spd);
6201 	return ret;
6202 
6203 out_err:
6204 	mutex_unlock(&iter->mutex);
6205 	goto out;
6206 }
6207 
6208 static ssize_t
6209 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
6210 			 size_t cnt, loff_t *ppos)
6211 {
6212 	struct inode *inode = file_inode(filp);
6213 	struct trace_array *tr = inode->i_private;
6214 	char buf[64];
6215 	int r;
6216 
6217 	r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
6218 
6219 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6220 }
6221 
6222 static ssize_t
6223 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
6224 			  size_t cnt, loff_t *ppos)
6225 {
6226 	struct inode *inode = file_inode(filp);
6227 	struct trace_array *tr = inode->i_private;
6228 	unsigned long val;
6229 	int ret;
6230 
6231 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6232 	if (ret)
6233 		return ret;
6234 
6235 	if (val > SYSCALL_FAULT_USER_MAX)
6236 		val = SYSCALL_FAULT_USER_MAX;
6237 
6238 	tr->syscall_buf_sz = val;
6239 
6240 	*ppos += cnt;
6241 
6242 	return cnt;
6243 }
6244 
6245 static ssize_t
6246 tracing_entries_read(struct file *filp, char __user *ubuf,
6247 		     size_t cnt, loff_t *ppos)
6248 {
6249 	struct inode *inode = file_inode(filp);
6250 	struct trace_array *tr = inode->i_private;
6251 	int cpu = tracing_get_cpu(inode);
6252 	char buf[64];
6253 	int r = 0;
6254 	ssize_t ret;
6255 
6256 	mutex_lock(&trace_types_lock);
6257 
6258 	if (cpu == RING_BUFFER_ALL_CPUS) {
6259 		int cpu, buf_size_same;
6260 		unsigned long size;
6261 
6262 		size = 0;
6263 		buf_size_same = 1;
6264 		/* check if all cpu sizes are same */
6265 		for_each_tracing_cpu(cpu) {
6266 			/* fill in the size from first enabled cpu */
6267 			if (size == 0)
6268 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6269 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6270 				buf_size_same = 0;
6271 				break;
6272 			}
6273 		}
6274 
6275 		if (buf_size_same) {
6276 			if (!tr->ring_buffer_expanded)
6277 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6278 					    size >> 10,
6279 					    trace_buf_size >> 10);
6280 			else
6281 				r = sprintf(buf, "%lu\n", size >> 10);
6282 		} else
6283 			r = sprintf(buf, "X\n");
6284 	} else
6285 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6286 
6287 	mutex_unlock(&trace_types_lock);
6288 
6289 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6290 	return ret;
6291 }
6292 
6293 static ssize_t
6294 tracing_entries_write(struct file *filp, const char __user *ubuf,
6295 		      size_t cnt, loff_t *ppos)
6296 {
6297 	struct inode *inode = file_inode(filp);
6298 	struct trace_array *tr = inode->i_private;
6299 	unsigned long val;
6300 	int ret;
6301 
6302 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6303 	if (ret)
6304 		return ret;
6305 
6306 	/* must have at least 1 entry */
6307 	if (!val)
6308 		return -EINVAL;
6309 
6310 	/* value is in KB */
6311 	val <<= 10;
6312 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6313 	if (ret < 0)
6314 		return ret;
6315 
6316 	*ppos += cnt;
6317 
6318 	return cnt;
6319 }
6320 
6321 static ssize_t
6322 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6323 				size_t cnt, loff_t *ppos)
6324 {
6325 	struct trace_array *tr = filp->private_data;
6326 	char buf[64];
6327 	int r, cpu;
6328 	unsigned long size = 0, expanded_size = 0;
6329 
6330 	mutex_lock(&trace_types_lock);
6331 	for_each_tracing_cpu(cpu) {
6332 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6333 		if (!tr->ring_buffer_expanded)
6334 			expanded_size += trace_buf_size >> 10;
6335 	}
6336 	if (tr->ring_buffer_expanded)
6337 		r = sprintf(buf, "%lu\n", size);
6338 	else
6339 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6340 	mutex_unlock(&trace_types_lock);
6341 
6342 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6343 }
6344 
6345 #define LAST_BOOT_HEADER ((void *)1)
6346 
6347 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
6348 {
6349 	struct trace_array *tr = m->private;
6350 	struct trace_scratch *tscratch = tr->scratch;
6351 	unsigned int index = *pos;
6352 
6353 	(*pos)++;
6354 
6355 	if (*pos == 1)
6356 		return LAST_BOOT_HEADER;
6357 
6358 	/* Only show offsets of the last boot data */
6359 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6360 		return NULL;
6361 
6362 	/* *pos 0 is for the header, 1 is for the first module */
6363 	index--;
6364 
6365 	if (index >= tscratch->nr_entries)
6366 		return NULL;
6367 
6368 	return &tscratch->entries[index];
6369 }
6370 
6371 static void *l_start(struct seq_file *m, loff_t *pos)
6372 {
6373 	mutex_lock(&scratch_mutex);
6374 
6375 	return l_next(m, NULL, pos);
6376 }
6377 
6378 static void l_stop(struct seq_file *m, void *p)
6379 {
6380 	mutex_unlock(&scratch_mutex);
6381 }
6382 
6383 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
6384 {
6385 	struct trace_scratch *tscratch = tr->scratch;
6386 
6387 	/*
6388 	 * Do not leak KASLR address. This only shows the KASLR address of
6389 	 * the last boot. When the ring buffer is started, the LAST_BOOT
6390 	 * flag gets cleared, and this should only report "current".
6391 	 * Otherwise it shows the KASLR address from the previous boot which
6392 	 * should not be the same as the current boot.
6393 	 */
6394 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6395 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
6396 	else
6397 		seq_puts(m, "# Current\n");
6398 }
6399 
6400 static int l_show(struct seq_file *m, void *v)
6401 {
6402 	struct trace_array *tr = m->private;
6403 	struct trace_mod_entry *entry = v;
6404 
6405 	if (v == LAST_BOOT_HEADER) {
6406 		show_last_boot_header(m, tr);
6407 		return 0;
6408 	}
6409 
6410 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
6411 	return 0;
6412 }
6413 
6414 static const struct seq_operations last_boot_seq_ops = {
6415 	.start		= l_start,
6416 	.next		= l_next,
6417 	.stop		= l_stop,
6418 	.show		= l_show,
6419 };
6420 
6421 static int tracing_last_boot_open(struct inode *inode, struct file *file)
6422 {
6423 	struct trace_array *tr = inode->i_private;
6424 	struct seq_file *m;
6425 	int ret;
6426 
6427 	ret = tracing_check_open_get_tr(tr);
6428 	if (ret)
6429 		return ret;
6430 
6431 	ret = seq_open(file, &last_boot_seq_ops);
6432 	if (ret) {
6433 		trace_array_put(tr);
6434 		return ret;
6435 	}
6436 
6437 	m = file->private_data;
6438 	m->private = tr;
6439 
6440 	return 0;
6441 }
6442 
6443 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6444 {
6445 	struct trace_array *tr = inode->i_private;
6446 	int cpu = tracing_get_cpu(inode);
6447 	int ret;
6448 
6449 	ret = tracing_check_open_get_tr(tr);
6450 	if (ret)
6451 		return ret;
6452 
6453 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6454 	if (ret < 0)
6455 		__trace_array_put(tr);
6456 	return ret;
6457 }
6458 
6459 static ssize_t
6460 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6461 			  size_t cnt, loff_t *ppos)
6462 {
6463 	/*
6464 	 * There is no need to read what the user has written, this function
6465 	 * is just to make sure that there is no error when "echo" is used
6466 	 */
6467 
6468 	*ppos += cnt;
6469 
6470 	return cnt;
6471 }
6472 
6473 static int
6474 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6475 {
6476 	struct trace_array *tr = inode->i_private;
6477 
6478 	/* disable tracing ? */
6479 	if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
6480 		tracer_tracing_off(tr);
6481 	/* resize the ring buffer to 0 */
6482 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6483 
6484 	trace_array_put(tr);
6485 
6486 	return 0;
6487 }
6488 
6489 #define TRACE_MARKER_MAX_SIZE		4096
6490 
6491 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
6492 				      size_t cnt, unsigned long ip)
6493 {
6494 	struct ring_buffer_event *event;
6495 	enum event_trigger_type tt = ETT_NONE;
6496 	struct trace_buffer *buffer;
6497 	struct print_entry *entry;
6498 	int meta_size;
6499 	ssize_t written;
6500 	size_t size;
6501 
6502 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6503  again:
6504 	size = cnt + meta_size;
6505 
6506 	buffer = tr->array_buffer.buffer;
6507 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6508 					    tracing_gen_ctx());
6509 	if (unlikely(!event)) {
6510 		/*
6511 		 * If the size was greater than what was allowed, then
6512 		 * make it smaller and try again.
6513 		 */
6514 		if (size > ring_buffer_max_event_size(buffer)) {
6515 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
6516 			/* The above should only happen once */
6517 			if (WARN_ON_ONCE(cnt + meta_size == size))
6518 				return -EBADF;
6519 			goto again;
6520 		}
6521 
6522 		/* Ring buffer disabled, return as if not open for write */
6523 		return -EBADF;
6524 	}
6525 
6526 	entry = ring_buffer_event_data(event);
6527 	entry->ip = ip;
6528 	memcpy(&entry->buf, buf, cnt);
6529 	written = cnt;
6530 
6531 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6532 		/* do not add \n before testing triggers, but add \0 */
6533 		entry->buf[cnt] = '\0';
6534 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6535 	}
6536 
6537 	if (entry->buf[cnt - 1] != '\n') {
6538 		entry->buf[cnt] = '\n';
6539 		entry->buf[cnt + 1] = '\0';
6540 	} else
6541 		entry->buf[cnt] = '\0';
6542 
6543 	if (static_branch_unlikely(&trace_marker_exports_enabled))
6544 		ftrace_exports(event, TRACE_EXPORT_MARKER);
6545 	__buffer_unlock_commit(buffer, event);
6546 
6547 	if (tt)
6548 		event_triggers_post_call(tr->trace_marker_file, tt);
6549 
6550 	return written;
6551 }
6552 
6553 struct trace_user_buf {
6554 	char		*buf;
6555 };
6556 
6557 static DEFINE_MUTEX(trace_user_buffer_mutex);
6558 static struct trace_user_buf_info *trace_user_buffer;
6559 
6560 /**
6561  * trace_user_fault_destroy - free up allocated memory of a trace user buffer
6562  * @tinfo: The descriptor to free up
6563  *
6564  * Frees any data allocated in the trace info dsecriptor.
6565  */
6566 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
6567 {
6568 	char *buf;
6569 	int cpu;
6570 
6571 	if (!tinfo || !tinfo->tbuf)
6572 		return;
6573 
6574 	for_each_possible_cpu(cpu) {
6575 		buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6576 		kfree(buf);
6577 	}
6578 	free_percpu(tinfo->tbuf);
6579 }
6580 
6581 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
6582 {
6583 	char *buf;
6584 	int cpu;
6585 
6586 	lockdep_assert_held(&trace_user_buffer_mutex);
6587 
6588 	tinfo->tbuf = alloc_percpu(struct trace_user_buf);
6589 	if (!tinfo->tbuf)
6590 		return -ENOMEM;
6591 
6592 	tinfo->ref = 1;
6593 	tinfo->size = size;
6594 
6595 	/* Clear each buffer in case of error */
6596 	for_each_possible_cpu(cpu) {
6597 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
6598 	}
6599 
6600 	for_each_possible_cpu(cpu) {
6601 		buf = kmalloc_node(size, GFP_KERNEL,
6602 				   cpu_to_node(cpu));
6603 		if (!buf)
6604 			return -ENOMEM;
6605 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
6606 	}
6607 
6608 	return 0;
6609 }
6610 
6611 /* For internal use. Free and reinitialize */
6612 static void user_buffer_free(struct trace_user_buf_info **tinfo)
6613 {
6614 	lockdep_assert_held(&trace_user_buffer_mutex);
6615 
6616 	trace_user_fault_destroy(*tinfo);
6617 	kfree(*tinfo);
6618 	*tinfo = NULL;
6619 }
6620 
6621 /* For internal use. Initialize and allocate */
6622 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
6623 {
6624 	bool alloc = false;
6625 	int ret;
6626 
6627 	lockdep_assert_held(&trace_user_buffer_mutex);
6628 
6629 	if (!*tinfo) {
6630 		alloc = true;
6631 		*tinfo = kzalloc(sizeof(**tinfo), GFP_KERNEL);
6632 		if (!*tinfo)
6633 			return -ENOMEM;
6634 	}
6635 
6636 	ret = user_fault_buffer_enable(*tinfo, size);
6637 	if (ret < 0 && alloc)
6638 		user_buffer_free(tinfo);
6639 
6640 	return ret;
6641 }
6642 
6643 /* For internal use, derefrence and free if necessary */
6644 static void user_buffer_put(struct trace_user_buf_info **tinfo)
6645 {
6646 	guard(mutex)(&trace_user_buffer_mutex);
6647 
6648 	if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
6649 		return;
6650 
6651 	if (--(*tinfo)->ref)
6652 		return;
6653 
6654 	user_buffer_free(tinfo);
6655 }
6656 
6657 /**
6658  * trace_user_fault_init - Allocated or reference a per CPU buffer
6659  * @tinfo: A pointer to the trace buffer descriptor
6660  * @size: The size to allocate each per CPU buffer
6661  *
6662  * Create a per CPU buffer that can be used to copy from user space
6663  * in a task context. When calling trace_user_fault_read(), preemption
6664  * must be disabled, and it will enable preemption and copy user
6665  * space data to the buffer. If any schedule switches occur, it will
6666  * retry until it succeeds without a schedule switch knowing the buffer
6667  * is still valid.
6668  *
6669  * Returns 0 on success, negative on failure.
6670  */
6671 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
6672 {
6673 	int ret;
6674 
6675 	if (!tinfo)
6676 		return -EINVAL;
6677 
6678 	guard(mutex)(&trace_user_buffer_mutex);
6679 
6680 	ret = user_buffer_init(&tinfo, size);
6681 	if (ret < 0)
6682 		trace_user_fault_destroy(tinfo);
6683 
6684 	return ret;
6685 }
6686 
6687 /**
6688  * trace_user_fault_get - up the ref count for the user buffer
6689  * @tinfo: A pointer to a pointer to the trace buffer descriptor
6690  *
6691  * Ups the ref count of the trace buffer.
6692  *
6693  * Returns the new ref count.
6694  */
6695 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
6696 {
6697 	if (!tinfo)
6698 		return -1;
6699 
6700 	guard(mutex)(&trace_user_buffer_mutex);
6701 
6702 	tinfo->ref++;
6703 	return tinfo->ref;
6704 }
6705 
6706 /**
6707  * trace_user_fault_put - dereference a per cpu trace buffer
6708  * @tinfo: The @tinfo that was passed to trace_user_fault_get()
6709  *
6710  * Decrement the ref count of @tinfo.
6711  *
6712  * Returns the new refcount (negative on error).
6713  */
6714 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
6715 {
6716 	guard(mutex)(&trace_user_buffer_mutex);
6717 
6718 	if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
6719 		return -1;
6720 
6721 	--tinfo->ref;
6722 	return tinfo->ref;
6723 }
6724 
6725 /**
6726  * trace_user_fault_read - Read user space into a per CPU buffer
6727  * @tinfo: The @tinfo allocated by trace_user_fault_get()
6728  * @ptr: The user space pointer to read
6729  * @size: The size of user space to read.
6730  * @copy_func: Optional function to use to copy from user space
6731  * @data: Data to pass to copy_func if it was supplied
6732  *
6733  * Preemption must be disabled when this is called, and must not
6734  * be enabled while using the returned buffer.
6735  * This does the copying from user space into a per CPU buffer.
6736  *
6737  * The @size must not be greater than the size passed in to
6738  * trace_user_fault_init().
6739  *
6740  * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
6741  * otherwise it will call @copy_func. It will call @copy_func with:
6742  *
6743  *   buffer: the per CPU buffer of the @tinfo.
6744  *   ptr: The pointer @ptr to user space to read
6745  *   size: The @size of the ptr to read
6746  *   data: The @data parameter
6747  *
6748  * It is expected that @copy_func will return 0 on success and non zero
6749  * if there was a fault.
6750  *
6751  * Returns a pointer to the buffer with the content read from @ptr.
6752  *   Preemption must remain disabled while the caller accesses the
6753  *   buffer returned by this function.
6754  * Returns NULL if there was a fault, or the size passed in is
6755  *   greater than the size passed to trace_user_fault_init().
6756  */
6757 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
6758 			     const char __user *ptr, size_t size,
6759 			     trace_user_buf_copy copy_func, void *data)
6760 {
6761 	int cpu = smp_processor_id();
6762 	char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6763 	unsigned int cnt;
6764 	int trys = 0;
6765 	int ret;
6766 
6767 	lockdep_assert_preemption_disabled();
6768 
6769 	/*
6770 	 * It's up to the caller to not try to copy more than it said
6771 	 * it would.
6772 	 */
6773 	if (size > tinfo->size)
6774 		return NULL;
6775 
6776 	/*
6777 	 * This acts similar to a seqcount. The per CPU context switches are
6778 	 * recorded, migration is disabled and preemption is enabled. The
6779 	 * read of the user space memory is copied into the per CPU buffer.
6780 	 * Preemption is disabled again, and if the per CPU context switches count
6781 	 * is still the same, it means the buffer has not been corrupted.
6782 	 * If the count is different, it is assumed the buffer is corrupted
6783 	 * and reading must be tried again.
6784 	 */
6785 
6786 	do {
6787 		/*
6788 		 * If for some reason, copy_from_user() always causes a context
6789 		 * switch, this would then cause an infinite loop.
6790 		 * If this task is preempted by another user space task, it
6791 		 * will cause this task to try again. But just in case something
6792 		 * changes where the copying from user space causes another task
6793 		 * to run, prevent this from going into an infinite loop.
6794 		 * 100 tries should be plenty.
6795 		 */
6796 		if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
6797 			return NULL;
6798 
6799 		/* Read the current CPU context switch counter */
6800 		cnt = nr_context_switches_cpu(cpu);
6801 
6802 		/*
6803 		 * Preemption is going to be enabled, but this task must
6804 		 * remain on this CPU.
6805 		 */
6806 		migrate_disable();
6807 
6808 		/*
6809 		 * Now preemption is being enabled and another task can come in
6810 		 * and use the same buffer and corrupt our data.
6811 		 */
6812 		preempt_enable_notrace();
6813 
6814 		/* Make sure preemption is enabled here */
6815 		lockdep_assert_preemption_enabled();
6816 
6817 		if (copy_func) {
6818 			ret = copy_func(buffer, ptr, size, data);
6819 		} else {
6820 			ret = __copy_from_user(buffer, ptr, size);
6821 		}
6822 
6823 		preempt_disable_notrace();
6824 		migrate_enable();
6825 
6826 		/* if it faulted, no need to test if the buffer was corrupted */
6827 		if (ret)
6828 			return NULL;
6829 
6830 		/*
6831 		 * Preemption is disabled again, now check the per CPU context
6832 		 * switch counter. If it doesn't match, then another user space
6833 		 * process may have schedule in and corrupted our buffer. In that
6834 		 * case the copying must be retried.
6835 		 */
6836 	} while (nr_context_switches_cpu(cpu) != cnt);
6837 
6838 	return buffer;
6839 }
6840 
6841 static ssize_t
6842 tracing_mark_write(struct file *filp, const char __user *ubuf,
6843 					size_t cnt, loff_t *fpos)
6844 {
6845 	struct trace_array *tr = filp->private_data;
6846 	ssize_t written = -ENODEV;
6847 	unsigned long ip;
6848 	char *buf;
6849 
6850 	if (unlikely(tracing_disabled))
6851 		return -EINVAL;
6852 
6853 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6854 		return -EINVAL;
6855 
6856 	if ((ssize_t)cnt < 0)
6857 		return -EINVAL;
6858 
6859 	if (cnt > TRACE_MARKER_MAX_SIZE)
6860 		cnt = TRACE_MARKER_MAX_SIZE;
6861 
6862 	/* Must have preemption disabled while having access to the buffer */
6863 	guard(preempt_notrace)();
6864 
6865 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6866 	if (!buf)
6867 		return -EFAULT;
6868 
6869 	/* The selftests expect this function to be the IP address */
6870 	ip = _THIS_IP_;
6871 
6872 	/* The global trace_marker can go to multiple instances */
6873 	if (tr == &global_trace) {
6874 		guard(rcu)();
6875 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6876 			written = write_marker_to_buffer(tr, buf, cnt, ip);
6877 			if (written < 0)
6878 				break;
6879 		}
6880 	} else {
6881 		written = write_marker_to_buffer(tr, buf, cnt, ip);
6882 	}
6883 
6884 	return written;
6885 }
6886 
6887 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
6888 					  const char *buf, size_t cnt)
6889 {
6890 	struct ring_buffer_event *event;
6891 	struct trace_buffer *buffer;
6892 	struct raw_data_entry *entry;
6893 	ssize_t written;
6894 	size_t size;
6895 
6896 	/* cnt includes both the entry->id and the data behind it. */
6897 	size = struct_offset(entry, id) + cnt;
6898 
6899 	buffer = tr->array_buffer.buffer;
6900 
6901 	if (size > ring_buffer_max_event_size(buffer))
6902 		return -EINVAL;
6903 
6904 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6905 					    tracing_gen_ctx());
6906 	if (!event)
6907 		/* Ring buffer disabled, return as if not open for write */
6908 		return -EBADF;
6909 
6910 	entry = ring_buffer_event_data(event);
6911 	unsafe_memcpy(&entry->id, buf, cnt,
6912 		      "id and content already reserved on ring buffer"
6913 		      "'buf' includes the 'id' and the data."
6914 		      "'entry' was allocated with cnt from 'id'.");
6915 	written = cnt;
6916 
6917 	__buffer_unlock_commit(buffer, event);
6918 
6919 	return written;
6920 }
6921 
6922 static ssize_t
6923 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6924 					size_t cnt, loff_t *fpos)
6925 {
6926 	struct trace_array *tr = filp->private_data;
6927 	ssize_t written = -ENODEV;
6928 	char *buf;
6929 
6930 	if (unlikely(tracing_disabled))
6931 		return -EINVAL;
6932 
6933 	if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6934 		return -EINVAL;
6935 
6936 	/* The marker must at least have a tag id */
6937 	if (cnt < sizeof(unsigned int))
6938 		return -EINVAL;
6939 
6940 	/* raw write is all or nothing */
6941 	if (cnt > TRACE_MARKER_MAX_SIZE)
6942 		return -EINVAL;
6943 
6944 	/* Must have preemption disabled while having access to the buffer */
6945 	guard(preempt_notrace)();
6946 
6947 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6948 	if (!buf)
6949 		return -EFAULT;
6950 
6951 	/* The global trace_marker_raw can go to multiple instances */
6952 	if (tr == &global_trace) {
6953 		guard(rcu)();
6954 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6955 			written = write_raw_marker_to_buffer(tr, buf, cnt);
6956 			if (written < 0)
6957 				break;
6958 		}
6959 	} else {
6960 		written = write_raw_marker_to_buffer(tr, buf, cnt);
6961 	}
6962 
6963 	return written;
6964 }
6965 
6966 static int tracing_mark_open(struct inode *inode, struct file *filp)
6967 {
6968 	int ret;
6969 
6970 	scoped_guard(mutex, &trace_user_buffer_mutex) {
6971 		if (!trace_user_buffer) {
6972 			ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
6973 			if (ret < 0)
6974 				return ret;
6975 		} else {
6976 			trace_user_buffer->ref++;
6977 		}
6978 	}
6979 
6980 	stream_open(inode, filp);
6981 	ret = tracing_open_generic_tr(inode, filp);
6982 	if (ret < 0)
6983 		user_buffer_put(&trace_user_buffer);
6984 	return ret;
6985 }
6986 
6987 static int tracing_mark_release(struct inode *inode, struct file *file)
6988 {
6989 	user_buffer_put(&trace_user_buffer);
6990 	return tracing_release_generic_tr(inode, file);
6991 }
6992 
6993 static int tracing_clock_show(struct seq_file *m, void *v)
6994 {
6995 	struct trace_array *tr = m->private;
6996 	int i;
6997 
6998 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6999 		seq_printf(m,
7000 			"%s%s%s%s", i ? " " : "",
7001 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7002 			i == tr->clock_id ? "]" : "");
7003 	seq_putc(m, '\n');
7004 
7005 	return 0;
7006 }
7007 
7008 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7009 {
7010 	int i;
7011 
7012 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7013 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7014 			break;
7015 	}
7016 	if (i == ARRAY_SIZE(trace_clocks))
7017 		return -EINVAL;
7018 
7019 	guard(mutex)(&trace_types_lock);
7020 
7021 	tr->clock_id = i;
7022 
7023 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7024 
7025 	/*
7026 	 * New clock may not be consistent with the previous clock.
7027 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7028 	 */
7029 	tracing_reset_online_cpus(&tr->array_buffer);
7030 
7031 #ifdef CONFIG_TRACER_SNAPSHOT
7032 	if (tr->snapshot_buffer.buffer)
7033 		ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func);
7034 	tracing_reset_online_cpus(&tr->snapshot_buffer);
7035 #endif
7036 	update_last_data_if_empty(tr);
7037 
7038 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7039 		struct trace_scratch *tscratch = tr->scratch;
7040 
7041 		tscratch->clock_id = i;
7042 	}
7043 
7044 	return 0;
7045 }
7046 
7047 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7048 				   size_t cnt, loff_t *fpos)
7049 {
7050 	struct seq_file *m = filp->private_data;
7051 	struct trace_array *tr = m->private;
7052 	char buf[64];
7053 	const char *clockstr;
7054 	int ret;
7055 
7056 	if (cnt >= sizeof(buf))
7057 		return -EINVAL;
7058 
7059 	if (copy_from_user(buf, ubuf, cnt))
7060 		return -EFAULT;
7061 
7062 	buf[cnt] = 0;
7063 
7064 	clockstr = strstrip(buf);
7065 
7066 	ret = tracing_set_clock(tr, clockstr);
7067 	if (ret)
7068 		return ret;
7069 
7070 	*fpos += cnt;
7071 
7072 	return cnt;
7073 }
7074 
7075 static int tracing_clock_open(struct inode *inode, struct file *file)
7076 {
7077 	struct trace_array *tr = inode->i_private;
7078 	int ret;
7079 
7080 	ret = tracing_check_open_get_tr(tr);
7081 	if (ret)
7082 		return ret;
7083 
7084 	ret = single_open(file, tracing_clock_show, inode->i_private);
7085 	if (ret < 0)
7086 		trace_array_put(tr);
7087 
7088 	return ret;
7089 }
7090 
7091 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7092 {
7093 	struct trace_array *tr = m->private;
7094 
7095 	guard(mutex)(&trace_types_lock);
7096 
7097 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7098 		seq_puts(m, "delta [absolute]\n");
7099 	else
7100 		seq_puts(m, "[delta] absolute\n");
7101 
7102 	return 0;
7103 }
7104 
7105 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7106 {
7107 	struct trace_array *tr = inode->i_private;
7108 	int ret;
7109 
7110 	ret = tracing_check_open_get_tr(tr);
7111 	if (ret)
7112 		return ret;
7113 
7114 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7115 	if (ret < 0)
7116 		trace_array_put(tr);
7117 
7118 	return ret;
7119 }
7120 
7121 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7122 {
7123 	if (rbe == this_cpu_read(trace_buffered_event))
7124 		return ring_buffer_time_stamp(buffer);
7125 
7126 	return ring_buffer_event_time_stamp(buffer, rbe);
7127 }
7128 
7129 struct ftrace_buffer_info {
7130 	struct trace_iterator	iter;
7131 	void			*spare;
7132 	unsigned int		spare_cpu;
7133 	unsigned int		spare_size;
7134 	unsigned int		read;
7135 };
7136 
7137 #ifdef CONFIG_TRACER_SNAPSHOT
7138 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7139 {
7140 	struct trace_array *tr = inode->i_private;
7141 	struct trace_iterator *iter;
7142 	struct seq_file *m;
7143 	int ret;
7144 
7145 	ret = tracing_check_open_get_tr(tr);
7146 	if (ret)
7147 		return ret;
7148 
7149 	if (file->f_mode & FMODE_READ) {
7150 		iter = __tracing_open(inode, file, true);
7151 		if (IS_ERR(iter))
7152 			ret = PTR_ERR(iter);
7153 	} else {
7154 		/* Writes still need the seq_file to hold the private data */
7155 		ret = -ENOMEM;
7156 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7157 		if (!m)
7158 			goto out;
7159 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7160 		if (!iter) {
7161 			kfree(m);
7162 			goto out;
7163 		}
7164 		ret = 0;
7165 
7166 		iter->tr = tr;
7167 		iter->array_buffer = &tr->snapshot_buffer;
7168 		iter->cpu_file = tracing_get_cpu(inode);
7169 		m->private = iter;
7170 		file->private_data = m;
7171 	}
7172 out:
7173 	if (ret < 0)
7174 		trace_array_put(tr);
7175 
7176 	return ret;
7177 }
7178 
7179 static void tracing_swap_cpu_buffer(void *tr)
7180 {
7181 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7182 }
7183 
7184 static ssize_t
7185 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7186 		       loff_t *ppos)
7187 {
7188 	struct seq_file *m = filp->private_data;
7189 	struct trace_iterator *iter = m->private;
7190 	struct trace_array *tr = iter->tr;
7191 	unsigned long val;
7192 	int ret;
7193 
7194 	ret = tracing_update_buffers(tr);
7195 	if (ret < 0)
7196 		return ret;
7197 
7198 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7199 	if (ret)
7200 		return ret;
7201 
7202 	guard(mutex)(&trace_types_lock);
7203 
7204 	if (tracer_uses_snapshot(tr->current_trace))
7205 		return -EBUSY;
7206 
7207 	local_irq_disable();
7208 	arch_spin_lock(&tr->max_lock);
7209 	if (tr->cond_snapshot)
7210 		ret = -EBUSY;
7211 	arch_spin_unlock(&tr->max_lock);
7212 	local_irq_enable();
7213 	if (ret)
7214 		return ret;
7215 
7216 	switch (val) {
7217 	case 0:
7218 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7219 			return -EINVAL;
7220 		if (tr->allocated_snapshot)
7221 			free_snapshot(tr);
7222 		break;
7223 	case 1:
7224 /* Only allow per-cpu swap if the ring buffer supports it */
7225 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7226 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7227 			return -EINVAL;
7228 #endif
7229 		if (tr->allocated_snapshot)
7230 			ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
7231 					&tr->array_buffer, iter->cpu_file);
7232 
7233 		ret = tracing_arm_snapshot_locked(tr);
7234 		if (ret)
7235 			return ret;
7236 
7237 		/* Now, we're going to swap */
7238 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7239 			local_irq_disable();
7240 			update_max_tr(tr, current, smp_processor_id(), NULL);
7241 			local_irq_enable();
7242 		} else {
7243 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7244 						 (void *)tr, 1);
7245 		}
7246 		tracing_disarm_snapshot(tr);
7247 		break;
7248 	default:
7249 		if (tr->allocated_snapshot) {
7250 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7251 				tracing_reset_online_cpus(&tr->snapshot_buffer);
7252 			else
7253 				tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file);
7254 		}
7255 		break;
7256 	}
7257 
7258 	if (ret >= 0) {
7259 		*ppos += cnt;
7260 		ret = cnt;
7261 	}
7262 
7263 	return ret;
7264 }
7265 
7266 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7267 {
7268 	struct seq_file *m = file->private_data;
7269 	int ret;
7270 
7271 	ret = tracing_release(inode, file);
7272 
7273 	if (file->f_mode & FMODE_READ)
7274 		return ret;
7275 
7276 	/* If write only, the seq_file is just a stub */
7277 	if (m)
7278 		kfree(m->private);
7279 	kfree(m);
7280 
7281 	return 0;
7282 }
7283 
7284 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7285 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7286 				    size_t count, loff_t *ppos);
7287 static int tracing_buffers_release(struct inode *inode, struct file *file);
7288 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7289 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7290 
7291 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7292 {
7293 	struct ftrace_buffer_info *info;
7294 	int ret;
7295 
7296 	/* The following checks for tracefs lockdown */
7297 	ret = tracing_buffers_open(inode, filp);
7298 	if (ret < 0)
7299 		return ret;
7300 
7301 	info = filp->private_data;
7302 
7303 	if (tracer_uses_snapshot(info->iter.trace)) {
7304 		tracing_buffers_release(inode, filp);
7305 		return -EBUSY;
7306 	}
7307 
7308 	info->iter.snapshot = true;
7309 	info->iter.array_buffer = &info->iter.tr->snapshot_buffer;
7310 
7311 	return ret;
7312 }
7313 
7314 #endif /* CONFIG_TRACER_SNAPSHOT */
7315 
7316 
7317 static const struct file_operations tracing_thresh_fops = {
7318 	.open		= tracing_open_generic,
7319 	.read		= tracing_thresh_read,
7320 	.write		= tracing_thresh_write,
7321 	.llseek		= generic_file_llseek,
7322 };
7323 
7324 #ifdef CONFIG_TRACER_MAX_TRACE
7325 static const struct file_operations tracing_max_lat_fops = {
7326 	.open		= tracing_open_generic_tr,
7327 	.read		= tracing_max_lat_read,
7328 	.write		= tracing_max_lat_write,
7329 	.llseek		= generic_file_llseek,
7330 	.release	= tracing_release_generic_tr,
7331 };
7332 #endif
7333 
7334 static const struct file_operations set_tracer_fops = {
7335 	.open		= tracing_open_generic_tr,
7336 	.read		= tracing_set_trace_read,
7337 	.write		= tracing_set_trace_write,
7338 	.llseek		= generic_file_llseek,
7339 	.release	= tracing_release_generic_tr,
7340 };
7341 
7342 static const struct file_operations tracing_pipe_fops = {
7343 	.open		= tracing_open_pipe,
7344 	.poll		= tracing_poll_pipe,
7345 	.read		= tracing_read_pipe,
7346 	.splice_read	= tracing_splice_read_pipe,
7347 	.release	= tracing_release_pipe,
7348 };
7349 
7350 static const struct file_operations tracing_entries_fops = {
7351 	.open		= tracing_open_generic_tr,
7352 	.read		= tracing_entries_read,
7353 	.write		= tracing_entries_write,
7354 	.llseek		= generic_file_llseek,
7355 	.release	= tracing_release_generic_tr,
7356 };
7357 
7358 static const struct file_operations tracing_syscall_buf_fops = {
7359 	.open		= tracing_open_generic_tr,
7360 	.read		= tracing_syscall_buf_read,
7361 	.write		= tracing_syscall_buf_write,
7362 	.llseek		= generic_file_llseek,
7363 	.release	= tracing_release_generic_tr,
7364 };
7365 
7366 static const struct file_operations tracing_buffer_meta_fops = {
7367 	.open		= tracing_buffer_meta_open,
7368 	.read		= seq_read,
7369 	.llseek		= seq_lseek,
7370 	.release	= tracing_seq_release,
7371 };
7372 
7373 static const struct file_operations tracing_total_entries_fops = {
7374 	.open		= tracing_open_generic_tr,
7375 	.read		= tracing_total_entries_read,
7376 	.llseek		= generic_file_llseek,
7377 	.release	= tracing_release_generic_tr,
7378 };
7379 
7380 static const struct file_operations tracing_free_buffer_fops = {
7381 	.open		= tracing_open_generic_tr,
7382 	.write		= tracing_free_buffer_write,
7383 	.release	= tracing_free_buffer_release,
7384 };
7385 
7386 static const struct file_operations tracing_mark_fops = {
7387 	.open		= tracing_mark_open,
7388 	.write		= tracing_mark_write,
7389 	.release	= tracing_mark_release,
7390 };
7391 
7392 static const struct file_operations tracing_mark_raw_fops = {
7393 	.open		= tracing_mark_open,
7394 	.write		= tracing_mark_raw_write,
7395 	.release	= tracing_mark_release,
7396 };
7397 
7398 static const struct file_operations trace_clock_fops = {
7399 	.open		= tracing_clock_open,
7400 	.read		= seq_read,
7401 	.llseek		= seq_lseek,
7402 	.release	= tracing_single_release_tr,
7403 	.write		= tracing_clock_write,
7404 };
7405 
7406 static const struct file_operations trace_time_stamp_mode_fops = {
7407 	.open		= tracing_time_stamp_mode_open,
7408 	.read		= seq_read,
7409 	.llseek		= seq_lseek,
7410 	.release	= tracing_single_release_tr,
7411 };
7412 
7413 static const struct file_operations last_boot_fops = {
7414 	.open		= tracing_last_boot_open,
7415 	.read		= seq_read,
7416 	.llseek		= seq_lseek,
7417 	.release	= tracing_seq_release,
7418 };
7419 
7420 #ifdef CONFIG_TRACER_SNAPSHOT
7421 static const struct file_operations snapshot_fops = {
7422 	.open		= tracing_snapshot_open,
7423 	.read		= seq_read,
7424 	.write		= tracing_snapshot_write,
7425 	.llseek		= tracing_lseek,
7426 	.release	= tracing_snapshot_release,
7427 };
7428 
7429 static const struct file_operations snapshot_raw_fops = {
7430 	.open		= snapshot_raw_open,
7431 	.read		= tracing_buffers_read,
7432 	.release	= tracing_buffers_release,
7433 	.splice_read	= tracing_buffers_splice_read,
7434 };
7435 
7436 #endif /* CONFIG_TRACER_SNAPSHOT */
7437 
7438 /*
7439  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7440  * @filp: The active open file structure
7441  * @ubuf: The userspace provided buffer to read value into
7442  * @cnt: The maximum number of bytes to read
7443  * @ppos: The current "file" position
7444  *
7445  * This function implements the write interface for a struct trace_min_max_param.
7446  * The filp->private_data must point to a trace_min_max_param structure that
7447  * defines where to write the value, the min and the max acceptable values,
7448  * and a lock to protect the write.
7449  */
7450 static ssize_t
7451 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7452 {
7453 	struct trace_min_max_param *param = filp->private_data;
7454 	u64 val;
7455 	int err;
7456 
7457 	if (!param)
7458 		return -EFAULT;
7459 
7460 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7461 	if (err)
7462 		return err;
7463 
7464 	if (param->lock)
7465 		mutex_lock(param->lock);
7466 
7467 	if (param->min && val < *param->min)
7468 		err = -EINVAL;
7469 
7470 	if (param->max && val > *param->max)
7471 		err = -EINVAL;
7472 
7473 	if (!err)
7474 		*param->val = val;
7475 
7476 	if (param->lock)
7477 		mutex_unlock(param->lock);
7478 
7479 	if (err)
7480 		return err;
7481 
7482 	return cnt;
7483 }
7484 
7485 /*
7486  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7487  * @filp: The active open file structure
7488  * @ubuf: The userspace provided buffer to read value into
7489  * @cnt: The maximum number of bytes to read
7490  * @ppos: The current "file" position
7491  *
7492  * This function implements the read interface for a struct trace_min_max_param.
7493  * The filp->private_data must point to a trace_min_max_param struct with valid
7494  * data.
7495  */
7496 static ssize_t
7497 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7498 {
7499 	struct trace_min_max_param *param = filp->private_data;
7500 	char buf[U64_STR_SIZE];
7501 	int len;
7502 	u64 val;
7503 
7504 	if (!param)
7505 		return -EFAULT;
7506 
7507 	val = *param->val;
7508 
7509 	if (cnt > sizeof(buf))
7510 		cnt = sizeof(buf);
7511 
7512 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7513 
7514 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7515 }
7516 
7517 const struct file_operations trace_min_max_fops = {
7518 	.open		= tracing_open_generic,
7519 	.read		= trace_min_max_read,
7520 	.write		= trace_min_max_write,
7521 };
7522 
7523 #define TRACING_LOG_ERRS_MAX	8
7524 #define TRACING_LOG_LOC_MAX	128
7525 
7526 #define CMD_PREFIX "  Command: "
7527 
7528 struct err_info {
7529 	const char	**errs;	/* ptr to loc-specific array of err strings */
7530 	u8		type;	/* index into errs -> specific err string */
7531 	u16		pos;	/* caret position */
7532 	u64		ts;
7533 };
7534 
7535 struct tracing_log_err {
7536 	struct list_head	list;
7537 	struct err_info		info;
7538 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7539 	char			*cmd;                     /* what caused err */
7540 };
7541 
7542 static DEFINE_MUTEX(tracing_err_log_lock);
7543 
7544 static struct tracing_log_err *alloc_tracing_log_err(int len)
7545 {
7546 	struct tracing_log_err *err;
7547 
7548 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7549 	if (!err)
7550 		return ERR_PTR(-ENOMEM);
7551 
7552 	err->cmd = kzalloc(len, GFP_KERNEL);
7553 	if (!err->cmd) {
7554 		kfree(err);
7555 		return ERR_PTR(-ENOMEM);
7556 	}
7557 
7558 	return err;
7559 }
7560 
7561 static void free_tracing_log_err(struct tracing_log_err *err)
7562 {
7563 	kfree(err->cmd);
7564 	kfree(err);
7565 }
7566 
7567 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7568 						   int len)
7569 {
7570 	struct tracing_log_err *err;
7571 	char *cmd;
7572 
7573 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7574 		err = alloc_tracing_log_err(len);
7575 		if (PTR_ERR(err) != -ENOMEM)
7576 			tr->n_err_log_entries++;
7577 
7578 		return err;
7579 	}
7580 	cmd = kzalloc(len, GFP_KERNEL);
7581 	if (!cmd)
7582 		return ERR_PTR(-ENOMEM);
7583 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7584 	kfree(err->cmd);
7585 	err->cmd = cmd;
7586 	list_del(&err->list);
7587 
7588 	return err;
7589 }
7590 
7591 /**
7592  * err_pos - find the position of a string within a command for error careting
7593  * @cmd: The tracing command that caused the error
7594  * @str: The string to position the caret at within @cmd
7595  *
7596  * Finds the position of the first occurrence of @str within @cmd.  The
7597  * return value can be passed to tracing_log_err() for caret placement
7598  * within @cmd.
7599  *
7600  * Returns the index within @cmd of the first occurrence of @str or 0
7601  * if @str was not found.
7602  */
7603 unsigned int err_pos(char *cmd, const char *str)
7604 {
7605 	char *found;
7606 
7607 	if (WARN_ON(!strlen(cmd)))
7608 		return 0;
7609 
7610 	found = strstr(cmd, str);
7611 	if (found)
7612 		return found - cmd;
7613 
7614 	return 0;
7615 }
7616 
7617 /**
7618  * tracing_log_err - write an error to the tracing error log
7619  * @tr: The associated trace array for the error (NULL for top level array)
7620  * @loc: A string describing where the error occurred
7621  * @cmd: The tracing command that caused the error
7622  * @errs: The array of loc-specific static error strings
7623  * @type: The index into errs[], which produces the specific static err string
7624  * @pos: The position the caret should be placed in the cmd
7625  *
7626  * Writes an error into tracing/error_log of the form:
7627  *
7628  * <loc>: error: <text>
7629  *   Command: <cmd>
7630  *              ^
7631  *
7632  * tracing/error_log is a small log file containing the last
7633  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7634  * unless there has been a tracing error, and the error log can be
7635  * cleared and have its memory freed by writing the empty string in
7636  * truncation mode to it i.e. echo > tracing/error_log.
7637  *
7638  * NOTE: the @errs array along with the @type param are used to
7639  * produce a static error string - this string is not copied and saved
7640  * when the error is logged - only a pointer to it is saved.  See
7641  * existing callers for examples of how static strings are typically
7642  * defined for use with tracing_log_err().
7643  */
7644 void tracing_log_err(struct trace_array *tr,
7645 		     const char *loc, const char *cmd,
7646 		     const char **errs, u8 type, u16 pos)
7647 {
7648 	struct tracing_log_err *err;
7649 	int len = 0;
7650 
7651 	if (!tr)
7652 		tr = &global_trace;
7653 
7654 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7655 
7656 	guard(mutex)(&tracing_err_log_lock);
7657 
7658 	err = get_tracing_log_err(tr, len);
7659 	if (PTR_ERR(err) == -ENOMEM)
7660 		return;
7661 
7662 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7663 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7664 
7665 	err->info.errs = errs;
7666 	err->info.type = type;
7667 	err->info.pos = pos;
7668 	err->info.ts = local_clock();
7669 
7670 	list_add_tail(&err->list, &tr->err_log);
7671 }
7672 
7673 static void clear_tracing_err_log(struct trace_array *tr)
7674 {
7675 	struct tracing_log_err *err, *next;
7676 
7677 	guard(mutex)(&tracing_err_log_lock);
7678 
7679 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7680 		list_del(&err->list);
7681 		free_tracing_log_err(err);
7682 	}
7683 
7684 	tr->n_err_log_entries = 0;
7685 }
7686 
7687 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7688 {
7689 	struct trace_array *tr = m->private;
7690 
7691 	mutex_lock(&tracing_err_log_lock);
7692 
7693 	return seq_list_start(&tr->err_log, *pos);
7694 }
7695 
7696 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7697 {
7698 	struct trace_array *tr = m->private;
7699 
7700 	return seq_list_next(v, &tr->err_log, pos);
7701 }
7702 
7703 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7704 {
7705 	mutex_unlock(&tracing_err_log_lock);
7706 }
7707 
7708 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7709 {
7710 	u16 i;
7711 
7712 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7713 		seq_putc(m, ' ');
7714 	for (i = 0; i < pos; i++)
7715 		seq_putc(m, ' ');
7716 	seq_puts(m, "^\n");
7717 }
7718 
7719 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7720 {
7721 	struct tracing_log_err *err = v;
7722 
7723 	if (err) {
7724 		const char *err_text = err->info.errs[err->info.type];
7725 		u64 sec = err->info.ts;
7726 		u32 nsec;
7727 
7728 		nsec = do_div(sec, NSEC_PER_SEC);
7729 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7730 			   err->loc, err_text);
7731 		seq_printf(m, "%s", err->cmd);
7732 		tracing_err_log_show_pos(m, err->info.pos);
7733 	}
7734 
7735 	return 0;
7736 }
7737 
7738 static const struct seq_operations tracing_err_log_seq_ops = {
7739 	.start  = tracing_err_log_seq_start,
7740 	.next   = tracing_err_log_seq_next,
7741 	.stop   = tracing_err_log_seq_stop,
7742 	.show   = tracing_err_log_seq_show
7743 };
7744 
7745 static int tracing_err_log_open(struct inode *inode, struct file *file)
7746 {
7747 	struct trace_array *tr = inode->i_private;
7748 	int ret = 0;
7749 
7750 	ret = tracing_check_open_get_tr(tr);
7751 	if (ret)
7752 		return ret;
7753 
7754 	/* If this file was opened for write, then erase contents */
7755 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7756 		clear_tracing_err_log(tr);
7757 
7758 	if (file->f_mode & FMODE_READ) {
7759 		ret = seq_open(file, &tracing_err_log_seq_ops);
7760 		if (!ret) {
7761 			struct seq_file *m = file->private_data;
7762 			m->private = tr;
7763 		} else {
7764 			trace_array_put(tr);
7765 		}
7766 	}
7767 	return ret;
7768 }
7769 
7770 static ssize_t tracing_err_log_write(struct file *file,
7771 				     const char __user *buffer,
7772 				     size_t count, loff_t *ppos)
7773 {
7774 	return count;
7775 }
7776 
7777 static int tracing_err_log_release(struct inode *inode, struct file *file)
7778 {
7779 	struct trace_array *tr = inode->i_private;
7780 
7781 	trace_array_put(tr);
7782 
7783 	if (file->f_mode & FMODE_READ)
7784 		seq_release(inode, file);
7785 
7786 	return 0;
7787 }
7788 
7789 static const struct file_operations tracing_err_log_fops = {
7790 	.open           = tracing_err_log_open,
7791 	.write		= tracing_err_log_write,
7792 	.read           = seq_read,
7793 	.llseek         = tracing_lseek,
7794 	.release        = tracing_err_log_release,
7795 };
7796 
7797 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7798 {
7799 	struct trace_array *tr = inode->i_private;
7800 	struct ftrace_buffer_info *info;
7801 	int ret;
7802 
7803 	ret = tracing_check_open_get_tr(tr);
7804 	if (ret)
7805 		return ret;
7806 
7807 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7808 	if (!info) {
7809 		trace_array_put(tr);
7810 		return -ENOMEM;
7811 	}
7812 
7813 	mutex_lock(&trace_types_lock);
7814 
7815 	info->iter.tr		= tr;
7816 	info->iter.cpu_file	= tracing_get_cpu(inode);
7817 	info->iter.trace	= tr->current_trace;
7818 	info->iter.array_buffer = &tr->array_buffer;
7819 	info->spare		= NULL;
7820 	/* Force reading ring buffer for first read */
7821 	info->read		= (unsigned int)-1;
7822 
7823 	filp->private_data = info;
7824 
7825 	tr->trace_ref++;
7826 
7827 	mutex_unlock(&trace_types_lock);
7828 
7829 	ret = nonseekable_open(inode, filp);
7830 	if (ret < 0)
7831 		trace_array_put(tr);
7832 
7833 	return ret;
7834 }
7835 
7836 static __poll_t
7837 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7838 {
7839 	struct ftrace_buffer_info *info = filp->private_data;
7840 	struct trace_iterator *iter = &info->iter;
7841 
7842 	return trace_poll(iter, filp, poll_table);
7843 }
7844 
7845 static ssize_t
7846 tracing_buffers_read(struct file *filp, char __user *ubuf,
7847 		     size_t count, loff_t *ppos)
7848 {
7849 	struct ftrace_buffer_info *info = filp->private_data;
7850 	struct trace_iterator *iter = &info->iter;
7851 	void *trace_data;
7852 	int page_size;
7853 	ssize_t ret = 0;
7854 	ssize_t size;
7855 
7856 	if (!count)
7857 		return 0;
7858 
7859 	if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
7860 		return -EBUSY;
7861 
7862 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7863 
7864 	/* Make sure the spare matches the current sub buffer size */
7865 	if (info->spare) {
7866 		if (page_size != info->spare_size) {
7867 			ring_buffer_free_read_page(iter->array_buffer->buffer,
7868 						   info->spare_cpu, info->spare);
7869 			info->spare = NULL;
7870 		}
7871 	}
7872 
7873 	if (!info->spare) {
7874 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7875 							  iter->cpu_file);
7876 		if (IS_ERR(info->spare)) {
7877 			ret = PTR_ERR(info->spare);
7878 			info->spare = NULL;
7879 		} else {
7880 			info->spare_cpu = iter->cpu_file;
7881 			info->spare_size = page_size;
7882 		}
7883 	}
7884 	if (!info->spare)
7885 		return ret;
7886 
7887 	/* Do we have previous read data to read? */
7888 	if (info->read < page_size)
7889 		goto read;
7890 
7891  again:
7892 	trace_access_lock(iter->cpu_file);
7893 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7894 				    info->spare,
7895 				    count,
7896 				    iter->cpu_file, 0);
7897 	trace_access_unlock(iter->cpu_file);
7898 
7899 	if (ret < 0) {
7900 		if (trace_empty(iter) && !iter->closed) {
7901 			if (update_last_data_if_empty(iter->tr))
7902 				return 0;
7903 
7904 			if ((filp->f_flags & O_NONBLOCK))
7905 				return -EAGAIN;
7906 
7907 			ret = wait_on_pipe(iter, 0);
7908 			if (ret)
7909 				return ret;
7910 
7911 			goto again;
7912 		}
7913 		return 0;
7914 	}
7915 
7916 	info->read = 0;
7917  read:
7918 	size = page_size - info->read;
7919 	if (size > count)
7920 		size = count;
7921 	trace_data = ring_buffer_read_page_data(info->spare);
7922 	ret = copy_to_user(ubuf, trace_data + info->read, size);
7923 	if (ret == size)
7924 		return -EFAULT;
7925 
7926 	size -= ret;
7927 
7928 	*ppos += size;
7929 	info->read += size;
7930 
7931 	return size;
7932 }
7933 
7934 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7935 {
7936 	struct ftrace_buffer_info *info = file->private_data;
7937 	struct trace_iterator *iter = &info->iter;
7938 
7939 	iter->closed = true;
7940 	/* Make sure the waiters see the new wait_index */
7941 	(void)atomic_fetch_inc_release(&iter->wait_index);
7942 
7943 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7944 
7945 	return 0;
7946 }
7947 
7948 static int tracing_buffers_release(struct inode *inode, struct file *file)
7949 {
7950 	struct ftrace_buffer_info *info = file->private_data;
7951 	struct trace_iterator *iter = &info->iter;
7952 
7953 	guard(mutex)(&trace_types_lock);
7954 
7955 	iter->tr->trace_ref--;
7956 
7957 	__trace_array_put(iter->tr);
7958 
7959 	if (info->spare)
7960 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7961 					   info->spare_cpu, info->spare);
7962 	kvfree(info);
7963 
7964 	return 0;
7965 }
7966 
7967 struct buffer_ref {
7968 	struct trace_buffer	*buffer;
7969 	void			*page;
7970 	int			cpu;
7971 	refcount_t		refcount;
7972 };
7973 
7974 static void buffer_ref_release(struct buffer_ref *ref)
7975 {
7976 	if (!refcount_dec_and_test(&ref->refcount))
7977 		return;
7978 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7979 	kfree(ref);
7980 }
7981 
7982 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7983 				    struct pipe_buffer *buf)
7984 {
7985 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7986 
7987 	buffer_ref_release(ref);
7988 	buf->private = 0;
7989 }
7990 
7991 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7992 				struct pipe_buffer *buf)
7993 {
7994 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7995 
7996 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7997 		return false;
7998 
7999 	refcount_inc(&ref->refcount);
8000 	return true;
8001 }
8002 
8003 /* Pipe buffer operations for a buffer. */
8004 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8005 	.release		= buffer_pipe_buf_release,
8006 	.get			= buffer_pipe_buf_get,
8007 };
8008 
8009 /*
8010  * Callback from splice_to_pipe(), if we need to release some pages
8011  * at the end of the spd in case we error'ed out in filling the pipe.
8012  */
8013 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8014 {
8015 	struct buffer_ref *ref =
8016 		(struct buffer_ref *)spd->partial[i].private;
8017 
8018 	buffer_ref_release(ref);
8019 	spd->partial[i].private = 0;
8020 }
8021 
8022 static ssize_t
8023 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8024 			    struct pipe_inode_info *pipe, size_t len,
8025 			    unsigned int flags)
8026 {
8027 	struct ftrace_buffer_info *info = file->private_data;
8028 	struct trace_iterator *iter = &info->iter;
8029 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8030 	struct page *pages_def[PIPE_DEF_BUFFERS];
8031 	struct splice_pipe_desc spd = {
8032 		.pages		= pages_def,
8033 		.partial	= partial_def,
8034 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8035 		.ops		= &buffer_pipe_buf_ops,
8036 		.spd_release	= buffer_spd_release,
8037 	};
8038 	struct buffer_ref *ref;
8039 	bool woken = false;
8040 	int page_size;
8041 	int entries, i;
8042 	ssize_t ret = 0;
8043 
8044 	if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
8045 		return -EBUSY;
8046 
8047 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8048 	if (*ppos & (page_size - 1))
8049 		return -EINVAL;
8050 
8051 	if (len & (page_size - 1)) {
8052 		if (len < page_size)
8053 			return -EINVAL;
8054 		len &= (~(page_size - 1));
8055 	}
8056 
8057 	if (splice_grow_spd(pipe, &spd))
8058 		return -ENOMEM;
8059 
8060  again:
8061 	trace_access_lock(iter->cpu_file);
8062 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8063 
8064 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8065 		struct page *page;
8066 		int r;
8067 
8068 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8069 		if (!ref) {
8070 			ret = -ENOMEM;
8071 			break;
8072 		}
8073 
8074 		refcount_set(&ref->refcount, 1);
8075 		ref->buffer = iter->array_buffer->buffer;
8076 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8077 		if (IS_ERR(ref->page)) {
8078 			ret = PTR_ERR(ref->page);
8079 			ref->page = NULL;
8080 			kfree(ref);
8081 			break;
8082 		}
8083 		ref->cpu = iter->cpu_file;
8084 
8085 		r = ring_buffer_read_page(ref->buffer, ref->page,
8086 					  len, iter->cpu_file, 1);
8087 		if (r < 0) {
8088 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8089 						   ref->page);
8090 			kfree(ref);
8091 			break;
8092 		}
8093 
8094 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8095 
8096 		spd.pages[i] = page;
8097 		spd.partial[i].len = page_size;
8098 		spd.partial[i].offset = 0;
8099 		spd.partial[i].private = (unsigned long)ref;
8100 		spd.nr_pages++;
8101 		*ppos += page_size;
8102 
8103 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8104 	}
8105 
8106 	trace_access_unlock(iter->cpu_file);
8107 	spd.nr_pages = i;
8108 
8109 	/* did we read anything? */
8110 	if (!spd.nr_pages) {
8111 
8112 		if (ret)
8113 			goto out;
8114 
8115 		if (woken)
8116 			goto out;
8117 
8118 		ret = -EAGAIN;
8119 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8120 			goto out;
8121 
8122 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8123 		if (ret)
8124 			goto out;
8125 
8126 		/* No need to wait after waking up when tracing is off */
8127 		if (!tracer_tracing_is_on(iter->tr))
8128 			goto out;
8129 
8130 		/* Iterate one more time to collect any new data then exit */
8131 		woken = true;
8132 
8133 		goto again;
8134 	}
8135 
8136 	ret = splice_to_pipe(pipe, &spd);
8137 out:
8138 	splice_shrink_spd(&spd);
8139 
8140 	return ret;
8141 }
8142 
8143 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8144 {
8145 	struct ftrace_buffer_info *info = file->private_data;
8146 	struct trace_iterator *iter = &info->iter;
8147 	int err;
8148 
8149 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8150 		if (!(file->f_flags & O_NONBLOCK)) {
8151 			err = ring_buffer_wait(iter->array_buffer->buffer,
8152 					       iter->cpu_file,
8153 					       iter->tr->buffer_percent,
8154 					       NULL, NULL);
8155 			if (err)
8156 				return err;
8157 		}
8158 
8159 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8160 						  iter->cpu_file);
8161 	} else if (cmd) {
8162 		return -ENOTTY;
8163 	}
8164 
8165 	/*
8166 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8167 	 * waiters
8168 	 */
8169 	guard(mutex)(&trace_types_lock);
8170 
8171 	/* Make sure the waiters see the new wait_index */
8172 	(void)atomic_fetch_inc_release(&iter->wait_index);
8173 
8174 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8175 
8176 	return 0;
8177 }
8178 
8179 #ifdef CONFIG_TRACER_SNAPSHOT
8180 static int get_snapshot_map(struct trace_array *tr)
8181 {
8182 	int err = 0;
8183 
8184 	/*
8185 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8186 	 * take trace_types_lock. Instead use the specific
8187 	 * snapshot_trigger_lock.
8188 	 */
8189 	spin_lock(&tr->snapshot_trigger_lock);
8190 
8191 	if (tr->snapshot || tr->mapped == UINT_MAX)
8192 		err = -EBUSY;
8193 	else
8194 		tr->mapped++;
8195 
8196 	spin_unlock(&tr->snapshot_trigger_lock);
8197 
8198 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8199 	if (tr->mapped == 1)
8200 		synchronize_rcu();
8201 
8202 	return err;
8203 
8204 }
8205 static void put_snapshot_map(struct trace_array *tr)
8206 {
8207 	spin_lock(&tr->snapshot_trigger_lock);
8208 	if (!WARN_ON(!tr->mapped))
8209 		tr->mapped--;
8210 	spin_unlock(&tr->snapshot_trigger_lock);
8211 }
8212 #else
8213 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8214 static inline void put_snapshot_map(struct trace_array *tr) { }
8215 #endif
8216 
8217 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8218 {
8219 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8220 	struct trace_iterator *iter = &info->iter;
8221 
8222 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8223 	put_snapshot_map(iter->tr);
8224 }
8225 
8226 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
8227 {
8228 	/*
8229 	 * Trace buffer mappings require the complete buffer including
8230 	 * the meta page. Partial mappings are not supported.
8231 	 */
8232 	return -EINVAL;
8233 }
8234 
8235 static const struct vm_operations_struct tracing_buffers_vmops = {
8236 	.close		= tracing_buffers_mmap_close,
8237 	.may_split      = tracing_buffers_may_split,
8238 };
8239 
8240 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8241 {
8242 	struct ftrace_buffer_info *info = filp->private_data;
8243 	struct trace_iterator *iter = &info->iter;
8244 	int ret = 0;
8245 
8246 	/* A memmap'ed and backup buffers are not supported for user space mmap */
8247 	if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
8248 		return -ENODEV;
8249 
8250 	ret = get_snapshot_map(iter->tr);
8251 	if (ret)
8252 		return ret;
8253 
8254 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8255 	if (ret)
8256 		put_snapshot_map(iter->tr);
8257 
8258 	vma->vm_ops = &tracing_buffers_vmops;
8259 
8260 	return ret;
8261 }
8262 
8263 static const struct file_operations tracing_buffers_fops = {
8264 	.open		= tracing_buffers_open,
8265 	.read		= tracing_buffers_read,
8266 	.poll		= tracing_buffers_poll,
8267 	.release	= tracing_buffers_release,
8268 	.flush		= tracing_buffers_flush,
8269 	.splice_read	= tracing_buffers_splice_read,
8270 	.unlocked_ioctl = tracing_buffers_ioctl,
8271 	.mmap		= tracing_buffers_mmap,
8272 };
8273 
8274 static ssize_t
8275 tracing_stats_read(struct file *filp, char __user *ubuf,
8276 		   size_t count, loff_t *ppos)
8277 {
8278 	struct inode *inode = file_inode(filp);
8279 	struct trace_array *tr = inode->i_private;
8280 	struct array_buffer *trace_buf = &tr->array_buffer;
8281 	int cpu = tracing_get_cpu(inode);
8282 	struct trace_seq *s;
8283 	unsigned long cnt;
8284 	unsigned long long t;
8285 	unsigned long usec_rem;
8286 
8287 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8288 	if (!s)
8289 		return -ENOMEM;
8290 
8291 	trace_seq_init(s);
8292 
8293 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8294 	trace_seq_printf(s, "entries: %ld\n", cnt);
8295 
8296 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8297 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8298 
8299 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8300 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8301 
8302 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8303 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8304 
8305 	if (trace_clocks[tr->clock_id].in_ns) {
8306 		/* local or global for trace_clock */
8307 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8308 		usec_rem = do_div(t, USEC_PER_SEC);
8309 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8310 								t, usec_rem);
8311 
8312 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8313 		usec_rem = do_div(t, USEC_PER_SEC);
8314 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8315 	} else {
8316 		/* counter or tsc mode for trace_clock */
8317 		trace_seq_printf(s, "oldest event ts: %llu\n",
8318 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8319 
8320 		trace_seq_printf(s, "now ts: %llu\n",
8321 				ring_buffer_time_stamp(trace_buf->buffer));
8322 	}
8323 
8324 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8325 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8326 
8327 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8328 	trace_seq_printf(s, "read events: %ld\n", cnt);
8329 
8330 	count = simple_read_from_buffer(ubuf, count, ppos,
8331 					s->buffer, trace_seq_used(s));
8332 
8333 	kfree(s);
8334 
8335 	return count;
8336 }
8337 
8338 static const struct file_operations tracing_stats_fops = {
8339 	.open		= tracing_open_generic_tr,
8340 	.read		= tracing_stats_read,
8341 	.llseek		= generic_file_llseek,
8342 	.release	= tracing_release_generic_tr,
8343 };
8344 
8345 #ifdef CONFIG_DYNAMIC_FTRACE
8346 
8347 static ssize_t
8348 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8349 		  size_t cnt, loff_t *ppos)
8350 {
8351 	ssize_t ret;
8352 	char *buf;
8353 	int r;
8354 
8355 	/* 512 should be plenty to hold the amount needed */
8356 #define DYN_INFO_BUF_SIZE	512
8357 
8358 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8359 	if (!buf)
8360 		return -ENOMEM;
8361 
8362 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8363 		      "%ld pages:%ld groups: %ld\n"
8364 		      "ftrace boot update time = %llu (ns)\n"
8365 		      "ftrace module total update time = %llu (ns)\n",
8366 		      ftrace_update_tot_cnt,
8367 		      ftrace_number_of_pages,
8368 		      ftrace_number_of_groups,
8369 		      ftrace_update_time,
8370 		      ftrace_total_mod_time);
8371 
8372 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8373 	kfree(buf);
8374 	return ret;
8375 }
8376 
8377 static const struct file_operations tracing_dyn_info_fops = {
8378 	.open		= tracing_open_generic,
8379 	.read		= tracing_read_dyn_info,
8380 	.llseek		= generic_file_llseek,
8381 };
8382 #endif /* CONFIG_DYNAMIC_FTRACE */
8383 
8384 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8385 static void
8386 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8387 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8388 		void *data)
8389 {
8390 	tracing_snapshot_instance(tr);
8391 }
8392 
8393 static void
8394 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8395 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8396 		      void *data)
8397 {
8398 	struct ftrace_func_mapper *mapper = data;
8399 	long *count = NULL;
8400 
8401 	if (mapper)
8402 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8403 
8404 	if (count) {
8405 
8406 		if (*count <= 0)
8407 			return;
8408 
8409 		(*count)--;
8410 	}
8411 
8412 	tracing_snapshot_instance(tr);
8413 }
8414 
8415 static int
8416 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8417 		      struct ftrace_probe_ops *ops, void *data)
8418 {
8419 	struct ftrace_func_mapper *mapper = data;
8420 	long *count = NULL;
8421 
8422 	seq_printf(m, "%ps:", (void *)ip);
8423 
8424 	seq_puts(m, "snapshot");
8425 
8426 	if (mapper)
8427 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8428 
8429 	if (count)
8430 		seq_printf(m, ":count=%ld\n", *count);
8431 	else
8432 		seq_puts(m, ":unlimited\n");
8433 
8434 	return 0;
8435 }
8436 
8437 static int
8438 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8439 		     unsigned long ip, void *init_data, void **data)
8440 {
8441 	struct ftrace_func_mapper *mapper = *data;
8442 
8443 	if (!mapper) {
8444 		mapper = allocate_ftrace_func_mapper();
8445 		if (!mapper)
8446 			return -ENOMEM;
8447 		*data = mapper;
8448 	}
8449 
8450 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8451 }
8452 
8453 static void
8454 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8455 		     unsigned long ip, void *data)
8456 {
8457 	struct ftrace_func_mapper *mapper = data;
8458 
8459 	if (!ip) {
8460 		if (!mapper)
8461 			return;
8462 		free_ftrace_func_mapper(mapper, NULL);
8463 		return;
8464 	}
8465 
8466 	ftrace_func_mapper_remove_ip(mapper, ip);
8467 }
8468 
8469 static struct ftrace_probe_ops snapshot_probe_ops = {
8470 	.func			= ftrace_snapshot,
8471 	.print			= ftrace_snapshot_print,
8472 };
8473 
8474 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8475 	.func			= ftrace_count_snapshot,
8476 	.print			= ftrace_snapshot_print,
8477 	.init			= ftrace_snapshot_init,
8478 	.free			= ftrace_snapshot_free,
8479 };
8480 
8481 static int
8482 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8483 			       char *glob, char *cmd, char *param, int enable)
8484 {
8485 	struct ftrace_probe_ops *ops;
8486 	void *count = (void *)-1;
8487 	char *number;
8488 	int ret;
8489 
8490 	if (!tr)
8491 		return -ENODEV;
8492 
8493 	/* hash funcs only work with set_ftrace_filter */
8494 	if (!enable)
8495 		return -EINVAL;
8496 
8497 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8498 
8499 	if (glob[0] == '!') {
8500 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8501 		if (!ret)
8502 			tracing_disarm_snapshot(tr);
8503 
8504 		return ret;
8505 	}
8506 
8507 	if (!param)
8508 		goto out_reg;
8509 
8510 	number = strsep(&param, ":");
8511 
8512 	if (!strlen(number))
8513 		goto out_reg;
8514 
8515 	/*
8516 	 * We use the callback data field (which is a pointer)
8517 	 * as our counter.
8518 	 */
8519 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8520 	if (ret)
8521 		return ret;
8522 
8523  out_reg:
8524 	ret = tracing_arm_snapshot(tr);
8525 	if (ret < 0)
8526 		return ret;
8527 
8528 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8529 	if (ret < 0)
8530 		tracing_disarm_snapshot(tr);
8531 
8532 	return ret < 0 ? ret : 0;
8533 }
8534 
8535 static struct ftrace_func_command ftrace_snapshot_cmd = {
8536 	.name			= "snapshot",
8537 	.func			= ftrace_trace_snapshot_callback,
8538 };
8539 
8540 static __init int register_snapshot_cmd(void)
8541 {
8542 	return register_ftrace_command(&ftrace_snapshot_cmd);
8543 }
8544 #else
8545 static inline __init int register_snapshot_cmd(void) { return 0; }
8546 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8547 
8548 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8549 {
8550 	/* Top directory uses NULL as the parent */
8551 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8552 		return NULL;
8553 
8554 	if (WARN_ON(!tr->dir))
8555 		return ERR_PTR(-ENODEV);
8556 
8557 	/* All sub buffers have a descriptor */
8558 	return tr->dir;
8559 }
8560 
8561 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8562 {
8563 	struct dentry *d_tracer;
8564 
8565 	if (tr->percpu_dir)
8566 		return tr->percpu_dir;
8567 
8568 	d_tracer = tracing_get_dentry(tr);
8569 	if (IS_ERR(d_tracer))
8570 		return NULL;
8571 
8572 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8573 
8574 	MEM_FAIL(!tr->percpu_dir,
8575 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8576 
8577 	return tr->percpu_dir;
8578 }
8579 
8580 static struct dentry *
8581 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8582 		      void *data, long cpu, const struct file_operations *fops)
8583 {
8584 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8585 
8586 	if (ret) /* See tracing_get_cpu() */
8587 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8588 	return ret;
8589 }
8590 
8591 static void
8592 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8593 {
8594 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8595 	struct dentry *d_cpu;
8596 	char cpu_dir[30]; /* 30 characters should be more than enough */
8597 
8598 	if (!d_percpu)
8599 		return;
8600 
8601 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8602 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8603 	if (!d_cpu) {
8604 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8605 		return;
8606 	}
8607 
8608 	/* per cpu trace_pipe */
8609 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8610 				tr, cpu, &tracing_pipe_fops);
8611 
8612 	/* per cpu trace */
8613 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8614 				tr, cpu, &tracing_fops);
8615 
8616 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8617 				tr, cpu, &tracing_buffers_fops);
8618 
8619 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8620 				tr, cpu, &tracing_stats_fops);
8621 
8622 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu,
8623 				tr, cpu, &tracing_entries_fops);
8624 
8625 	if (tr->range_addr_start)
8626 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8627 				      tr, cpu, &tracing_buffer_meta_fops);
8628 #ifdef CONFIG_TRACER_SNAPSHOT
8629 	if (!tr->range_addr_start) {
8630 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8631 				      tr, cpu, &snapshot_fops);
8632 
8633 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8634 				      tr, cpu, &snapshot_raw_fops);
8635 	}
8636 #endif
8637 }
8638 
8639 #ifdef CONFIG_FTRACE_SELFTEST
8640 /* Let selftest have access to static functions in this file */
8641 #include "trace_selftest.c"
8642 #endif
8643 
8644 static ssize_t
8645 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8646 			loff_t *ppos)
8647 {
8648 	struct trace_option_dentry *topt = filp->private_data;
8649 	char *buf;
8650 
8651 	if (topt->flags->val & topt->opt->bit)
8652 		buf = "1\n";
8653 	else
8654 		buf = "0\n";
8655 
8656 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8657 }
8658 
8659 static ssize_t
8660 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8661 			 loff_t *ppos)
8662 {
8663 	struct trace_option_dentry *topt = filp->private_data;
8664 	unsigned long val;
8665 	int ret;
8666 
8667 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8668 	if (ret)
8669 		return ret;
8670 
8671 	if (val != 0 && val != 1)
8672 		return -EINVAL;
8673 
8674 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8675 		guard(mutex)(&trace_types_lock);
8676 		ret = __set_tracer_option(topt->tr, topt->flags,
8677 					  topt->opt, !val);
8678 		if (ret)
8679 			return ret;
8680 	}
8681 
8682 	*ppos += cnt;
8683 
8684 	return cnt;
8685 }
8686 
8687 static int tracing_open_options(struct inode *inode, struct file *filp)
8688 {
8689 	struct trace_option_dentry *topt = inode->i_private;
8690 	int ret;
8691 
8692 	ret = tracing_check_open_get_tr(topt->tr);
8693 	if (ret)
8694 		return ret;
8695 
8696 	filp->private_data = inode->i_private;
8697 	return 0;
8698 }
8699 
8700 static int tracing_release_options(struct inode *inode, struct file *file)
8701 {
8702 	struct trace_option_dentry *topt = file->private_data;
8703 
8704 	trace_array_put(topt->tr);
8705 	return 0;
8706 }
8707 
8708 static const struct file_operations trace_options_fops = {
8709 	.open = tracing_open_options,
8710 	.read = trace_options_read,
8711 	.write = trace_options_write,
8712 	.llseek	= generic_file_llseek,
8713 	.release = tracing_release_options,
8714 };
8715 
8716 /*
8717  * In order to pass in both the trace_array descriptor as well as the index
8718  * to the flag that the trace option file represents, the trace_array
8719  * has a character array of trace_flags_index[], which holds the index
8720  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8721  * The address of this character array is passed to the flag option file
8722  * read/write callbacks.
8723  *
8724  * In order to extract both the index and the trace_array descriptor,
8725  * get_tr_index() uses the following algorithm.
8726  *
8727  *   idx = *ptr;
8728  *
8729  * As the pointer itself contains the address of the index (remember
8730  * index[1] == 1).
8731  *
8732  * Then to get the trace_array descriptor, by subtracting that index
8733  * from the ptr, we get to the start of the index itself.
8734  *
8735  *   ptr - idx == &index[0]
8736  *
8737  * Then a simple container_of() from that pointer gets us to the
8738  * trace_array descriptor.
8739  */
8740 static void get_tr_index(void *data, struct trace_array **ptr,
8741 			 unsigned int *pindex)
8742 {
8743 	*pindex = *(unsigned char *)data;
8744 
8745 	*ptr = container_of(data - *pindex, struct trace_array,
8746 			    trace_flags_index);
8747 }
8748 
8749 static ssize_t
8750 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8751 			loff_t *ppos)
8752 {
8753 	void *tr_index = filp->private_data;
8754 	struct trace_array *tr;
8755 	unsigned int index;
8756 	char *buf;
8757 
8758 	get_tr_index(tr_index, &tr, &index);
8759 
8760 	if (tr->trace_flags & (1ULL << index))
8761 		buf = "1\n";
8762 	else
8763 		buf = "0\n";
8764 
8765 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8766 }
8767 
8768 static ssize_t
8769 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8770 			 loff_t *ppos)
8771 {
8772 	void *tr_index = filp->private_data;
8773 	struct trace_array *tr;
8774 	unsigned int index;
8775 	unsigned long val;
8776 	int ret;
8777 
8778 	get_tr_index(tr_index, &tr, &index);
8779 
8780 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8781 	if (ret)
8782 		return ret;
8783 
8784 	if (val != 0 && val != 1)
8785 		return -EINVAL;
8786 
8787 	mutex_lock(&event_mutex);
8788 	mutex_lock(&trace_types_lock);
8789 	ret = set_tracer_flag(tr, 1ULL << index, val);
8790 	mutex_unlock(&trace_types_lock);
8791 	mutex_unlock(&event_mutex);
8792 
8793 	if (ret < 0)
8794 		return ret;
8795 
8796 	*ppos += cnt;
8797 
8798 	return cnt;
8799 }
8800 
8801 static const struct file_operations trace_options_core_fops = {
8802 	.open = tracing_open_generic,
8803 	.read = trace_options_core_read,
8804 	.write = trace_options_core_write,
8805 	.llseek = generic_file_llseek,
8806 };
8807 
8808 struct dentry *trace_create_file(const char *name,
8809 				 umode_t mode,
8810 				 struct dentry *parent,
8811 				 void *data,
8812 				 const struct file_operations *fops)
8813 {
8814 	struct dentry *ret;
8815 
8816 	ret = tracefs_create_file(name, mode, parent, data, fops);
8817 	if (!ret)
8818 		pr_warn("Could not create tracefs '%s' entry\n", name);
8819 
8820 	return ret;
8821 }
8822 
8823 
8824 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8825 {
8826 	struct dentry *d_tracer;
8827 
8828 	if (tr->options)
8829 		return tr->options;
8830 
8831 	d_tracer = tracing_get_dentry(tr);
8832 	if (IS_ERR(d_tracer))
8833 		return NULL;
8834 
8835 	tr->options = tracefs_create_dir("options", d_tracer);
8836 	if (!tr->options) {
8837 		pr_warn("Could not create tracefs directory 'options'\n");
8838 		return NULL;
8839 	}
8840 
8841 	return tr->options;
8842 }
8843 
8844 static void
8845 create_trace_option_file(struct trace_array *tr,
8846 			 struct trace_option_dentry *topt,
8847 			 struct tracer_flags *flags,
8848 			 struct tracer_opt *opt)
8849 {
8850 	struct dentry *t_options;
8851 
8852 	t_options = trace_options_init_dentry(tr);
8853 	if (!t_options)
8854 		return;
8855 
8856 	topt->flags = flags;
8857 	topt->opt = opt;
8858 	topt->tr = tr;
8859 
8860 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8861 					t_options, topt, &trace_options_fops);
8862 }
8863 
8864 static int
8865 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
8866 			  struct tracer_flags *flags)
8867 {
8868 	struct trace_option_dentry *topts;
8869 	struct trace_options *tr_topts;
8870 	struct tracer_opt *opts;
8871 	int cnt;
8872 
8873 	if (!flags || !flags->opts)
8874 		return 0;
8875 
8876 	opts = flags->opts;
8877 
8878 	for (cnt = 0; opts[cnt].name; cnt++)
8879 		;
8880 
8881 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8882 	if (!topts)
8883 		return 0;
8884 
8885 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8886 			    GFP_KERNEL);
8887 	if (!tr_topts) {
8888 		kfree(topts);
8889 		return -ENOMEM;
8890 	}
8891 
8892 	tr->topts = tr_topts;
8893 	tr->topts[tr->nr_topts].tracer = tracer;
8894 	tr->topts[tr->nr_topts].topts = topts;
8895 	tr->nr_topts++;
8896 
8897 	for (cnt = 0; opts[cnt].name; cnt++) {
8898 		create_trace_option_file(tr, &topts[cnt], flags,
8899 					 &opts[cnt]);
8900 		MEM_FAIL(topts[cnt].entry == NULL,
8901 			  "Failed to create trace option: %s",
8902 			  opts[cnt].name);
8903 	}
8904 	return 0;
8905 }
8906 
8907 static int get_global_flags_val(struct tracer *tracer)
8908 {
8909 	struct tracers *t;
8910 
8911 	list_for_each_entry(t, &global_trace.tracers, list) {
8912 		if (t->tracer != tracer)
8913 			continue;
8914 		if (!t->flags)
8915 			return -1;
8916 		return t->flags->val;
8917 	}
8918 	return -1;
8919 }
8920 
8921 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
8922 {
8923 	struct tracer *tracer = t->tracer;
8924 	struct tracer_flags *flags = t->flags ?: tracer->flags;
8925 
8926 	if (!flags)
8927 		return 0;
8928 
8929 	/* Only add tracer options after update_tracer_options finish */
8930 	if (!tracer_options_updated)
8931 		return 0;
8932 
8933 	return create_trace_option_files(tr, tracer, flags);
8934 }
8935 
8936 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
8937 {
8938 	struct tracer_flags *flags;
8939 	struct tracers *t;
8940 	int ret;
8941 
8942 	/* Only enable if the directory has been created already. */
8943 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
8944 		return 0;
8945 
8946 	/*
8947 	 * If this is an instance, only create flags for tracers
8948 	 * the instance may have.
8949 	 */
8950 	if (!trace_ok_for_array(tracer, tr))
8951 		return 0;
8952 
8953 	t = kmalloc(sizeof(*t), GFP_KERNEL);
8954 	if (!t)
8955 		return -ENOMEM;
8956 
8957 	t->tracer = tracer;
8958 	t->flags = NULL;
8959 	list_add(&t->list, &tr->tracers);
8960 
8961 	flags = tracer->flags;
8962 	if (!flags) {
8963 		if (!tracer->default_flags)
8964 			return 0;
8965 
8966 		/*
8967 		 * If the tracer defines default flags, it means the flags are
8968 		 * per trace instance.
8969 		 */
8970 		flags = kmalloc(sizeof(*flags), GFP_KERNEL);
8971 		if (!flags)
8972 			return -ENOMEM;
8973 
8974 		*flags = *tracer->default_flags;
8975 		flags->trace = tracer;
8976 
8977 		t->flags = flags;
8978 
8979 		/* If this is an instance, inherit the global_trace flags */
8980 		if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
8981 			int val = get_global_flags_val(tracer);
8982 			if (!WARN_ON_ONCE(val < 0))
8983 				flags->val = val;
8984 		}
8985 	}
8986 
8987 	ret = add_tracer_options(tr, t);
8988 	if (ret < 0) {
8989 		list_del(&t->list);
8990 		kfree(t->flags);
8991 		kfree(t);
8992 	}
8993 
8994 	return ret;
8995 }
8996 
8997 static struct dentry *
8998 create_trace_option_core_file(struct trace_array *tr,
8999 			      const char *option, long index)
9000 {
9001 	struct dentry *t_options;
9002 
9003 	t_options = trace_options_init_dentry(tr);
9004 	if (!t_options)
9005 		return NULL;
9006 
9007 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9008 				 (void *)&tr->trace_flags_index[index],
9009 				 &trace_options_core_fops);
9010 }
9011 
9012 static void create_trace_options_dir(struct trace_array *tr)
9013 {
9014 	struct dentry *t_options;
9015 	bool top_level = tr == &global_trace;
9016 	int i;
9017 
9018 	t_options = trace_options_init_dentry(tr);
9019 	if (!t_options)
9020 		return;
9021 
9022 	for (i = 0; trace_options[i]; i++) {
9023 		if (top_level ||
9024 		    !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
9025 			create_trace_option_core_file(tr, trace_options[i], i);
9026 		}
9027 	}
9028 }
9029 
9030 static ssize_t
9031 rb_simple_read(struct file *filp, char __user *ubuf,
9032 	       size_t cnt, loff_t *ppos)
9033 {
9034 	struct trace_array *tr = filp->private_data;
9035 	char buf[64];
9036 	int r;
9037 
9038 	r = tracer_tracing_is_on(tr);
9039 	r = sprintf(buf, "%d\n", r);
9040 
9041 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9042 }
9043 
9044 static ssize_t
9045 rb_simple_write(struct file *filp, const char __user *ubuf,
9046 		size_t cnt, loff_t *ppos)
9047 {
9048 	struct trace_array *tr = filp->private_data;
9049 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9050 	unsigned long val;
9051 	int ret;
9052 
9053 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9054 	if (ret)
9055 		return ret;
9056 
9057 	if (buffer) {
9058 		guard(mutex)(&trace_types_lock);
9059 		if (!!val == tracer_tracing_is_on(tr)) {
9060 			val = 0; /* do nothing */
9061 		} else if (val) {
9062 			tracer_tracing_on(tr);
9063 			if (tr->current_trace->start)
9064 				tr->current_trace->start(tr);
9065 		} else {
9066 			tracer_tracing_off(tr);
9067 			if (tr->current_trace->stop)
9068 				tr->current_trace->stop(tr);
9069 			/* Wake up any waiters */
9070 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9071 		}
9072 	}
9073 
9074 	(*ppos)++;
9075 
9076 	return cnt;
9077 }
9078 
9079 static const struct file_operations rb_simple_fops = {
9080 	.open		= tracing_open_generic_tr,
9081 	.read		= rb_simple_read,
9082 	.write		= rb_simple_write,
9083 	.release	= tracing_release_generic_tr,
9084 	.llseek		= default_llseek,
9085 };
9086 
9087 static ssize_t
9088 buffer_percent_read(struct file *filp, char __user *ubuf,
9089 		    size_t cnt, loff_t *ppos)
9090 {
9091 	struct trace_array *tr = filp->private_data;
9092 	char buf[64];
9093 	int r;
9094 
9095 	r = tr->buffer_percent;
9096 	r = sprintf(buf, "%d\n", r);
9097 
9098 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9099 }
9100 
9101 static ssize_t
9102 buffer_percent_write(struct file *filp, const char __user *ubuf,
9103 		     size_t cnt, loff_t *ppos)
9104 {
9105 	struct trace_array *tr = filp->private_data;
9106 	unsigned long val;
9107 	int ret;
9108 
9109 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9110 	if (ret)
9111 		return ret;
9112 
9113 	if (val > 100)
9114 		return -EINVAL;
9115 
9116 	tr->buffer_percent = val;
9117 
9118 	(*ppos)++;
9119 
9120 	return cnt;
9121 }
9122 
9123 static const struct file_operations buffer_percent_fops = {
9124 	.open		= tracing_open_generic_tr,
9125 	.read		= buffer_percent_read,
9126 	.write		= buffer_percent_write,
9127 	.release	= tracing_release_generic_tr,
9128 	.llseek		= default_llseek,
9129 };
9130 
9131 static ssize_t
9132 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9133 {
9134 	struct trace_array *tr = filp->private_data;
9135 	size_t size;
9136 	char buf[64];
9137 	int order;
9138 	int r;
9139 
9140 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9141 	size = (PAGE_SIZE << order) / 1024;
9142 
9143 	r = sprintf(buf, "%zd\n", size);
9144 
9145 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9146 }
9147 
9148 static ssize_t
9149 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9150 			 size_t cnt, loff_t *ppos)
9151 {
9152 	struct trace_array *tr = filp->private_data;
9153 	unsigned long val;
9154 	int old_order;
9155 	int order;
9156 	int pages;
9157 	int ret;
9158 
9159 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9160 	if (ret)
9161 		return ret;
9162 
9163 	val *= 1024; /* value passed in is in KB */
9164 
9165 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9166 	order = fls(pages - 1);
9167 
9168 	/* limit between 1 and 128 system pages */
9169 	if (order < 0 || order > 7)
9170 		return -EINVAL;
9171 
9172 	/* Do not allow tracing while changing the order of the ring buffer */
9173 	tracing_stop_tr(tr);
9174 
9175 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9176 	if (old_order == order)
9177 		goto out;
9178 
9179 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9180 	if (ret)
9181 		goto out;
9182 
9183 #ifdef CONFIG_TRACER_SNAPSHOT
9184 
9185 	if (!tr->allocated_snapshot)
9186 		goto out_max;
9187 
9188 	ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
9189 	if (ret) {
9190 		/* Put back the old order */
9191 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9192 		if (WARN_ON_ONCE(cnt)) {
9193 			/*
9194 			 * AARGH! We are left with different orders!
9195 			 * The max buffer is our "snapshot" buffer.
9196 			 * When a tracer needs a snapshot (one of the
9197 			 * latency tracers), it swaps the max buffer
9198 			 * with the saved snap shot. We succeeded to
9199 			 * update the order of the main buffer, but failed to
9200 			 * update the order of the max buffer. But when we tried
9201 			 * to reset the main buffer to the original size, we
9202 			 * failed there too. This is very unlikely to
9203 			 * happen, but if it does, warn and kill all
9204 			 * tracing.
9205 			 */
9206 			tracing_disabled = 1;
9207 		}
9208 		goto out;
9209 	}
9210  out_max:
9211 #endif
9212 	(*ppos)++;
9213  out:
9214 	if (ret)
9215 		cnt = ret;
9216 	tracing_start_tr(tr);
9217 	return cnt;
9218 }
9219 
9220 static const struct file_operations buffer_subbuf_size_fops = {
9221 	.open		= tracing_open_generic_tr,
9222 	.read		= buffer_subbuf_size_read,
9223 	.write		= buffer_subbuf_size_write,
9224 	.release	= tracing_release_generic_tr,
9225 	.llseek		= default_llseek,
9226 };
9227 
9228 static struct dentry *trace_instance_dir;
9229 
9230 static void
9231 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9232 
9233 #ifdef CONFIG_MODULES
9234 static int make_mod_delta(struct module *mod, void *data)
9235 {
9236 	struct trace_module_delta *module_delta;
9237 	struct trace_scratch *tscratch;
9238 	struct trace_mod_entry *entry;
9239 	struct trace_array *tr = data;
9240 	int i;
9241 
9242 	tscratch = tr->scratch;
9243 	module_delta = READ_ONCE(tr->module_delta);
9244 	for (i = 0; i < tscratch->nr_entries; i++) {
9245 		entry = &tscratch->entries[i];
9246 		if (strcmp(mod->name, entry->mod_name))
9247 			continue;
9248 		if (mod->state == MODULE_STATE_GOING)
9249 			module_delta->delta[i] = 0;
9250 		else
9251 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9252 						 - entry->mod_addr;
9253 		break;
9254 	}
9255 	return 0;
9256 }
9257 #else
9258 static int make_mod_delta(struct module *mod, void *data)
9259 {
9260 	return 0;
9261 }
9262 #endif
9263 
9264 static int mod_addr_comp(const void *a, const void *b, const void *data)
9265 {
9266 	const struct trace_mod_entry *e1 = a;
9267 	const struct trace_mod_entry *e2 = b;
9268 
9269 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9270 }
9271 
9272 static void setup_trace_scratch(struct trace_array *tr,
9273 				struct trace_scratch *tscratch, unsigned int size)
9274 {
9275 	struct trace_module_delta *module_delta;
9276 	struct trace_mod_entry *entry;
9277 	int i, nr_entries;
9278 
9279 	if (!tscratch)
9280 		return;
9281 
9282 	tr->scratch = tscratch;
9283 	tr->scratch_size = size;
9284 
9285 	if (tscratch->text_addr)
9286 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9287 
9288 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9289 		goto reset;
9290 
9291 	/* Check if each module name is a valid string */
9292 	for (i = 0; i < tscratch->nr_entries; i++) {
9293 		int n;
9294 
9295 		entry = &tscratch->entries[i];
9296 
9297 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9298 			if (entry->mod_name[n] == '\0')
9299 				break;
9300 			if (!isprint(entry->mod_name[n]))
9301 				goto reset;
9302 		}
9303 		if (n == MODULE_NAME_LEN)
9304 			goto reset;
9305 	}
9306 
9307 	/* Sort the entries so that we can find appropriate module from address. */
9308 	nr_entries = tscratch->nr_entries;
9309 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9310 	       mod_addr_comp, NULL, NULL);
9311 
9312 	if (IS_ENABLED(CONFIG_MODULES)) {
9313 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9314 		if (!module_delta) {
9315 			pr_info("module_delta allocation failed. Not able to decode module address.");
9316 			goto reset;
9317 		}
9318 		init_rcu_head(&module_delta->rcu);
9319 	} else
9320 		module_delta = NULL;
9321 	WRITE_ONCE(tr->module_delta, module_delta);
9322 
9323 	/* Scan modules to make text delta for modules. */
9324 	module_for_each_mod(make_mod_delta, tr);
9325 
9326 	/* Set trace_clock as the same of the previous boot. */
9327 	if (tscratch->clock_id != tr->clock_id) {
9328 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9329 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9330 			pr_info("the previous trace_clock info is not valid.");
9331 			goto reset;
9332 		}
9333 	}
9334 	return;
9335  reset:
9336 	/* Invalid trace modules */
9337 	memset(tscratch, 0, size);
9338 }
9339 
9340 static int
9341 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9342 {
9343 	enum ring_buffer_flags rb_flags;
9344 	struct trace_scratch *tscratch;
9345 	unsigned int scratch_size = 0;
9346 
9347 	rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
9348 
9349 	buf->tr = tr;
9350 
9351 	if (tr->range_addr_start && tr->range_addr_size) {
9352 		/* Add scratch buffer to handle 128 modules */
9353 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9354 						      tr->range_addr_start,
9355 						      tr->range_addr_size,
9356 						      struct_size(tscratch, entries, 128));
9357 
9358 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9359 		setup_trace_scratch(tr, tscratch, scratch_size);
9360 
9361 		/*
9362 		 * This is basically the same as a mapped buffer,
9363 		 * with the same restrictions.
9364 		 */
9365 		tr->mapped++;
9366 	} else {
9367 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9368 	}
9369 	if (!buf->buffer)
9370 		return -ENOMEM;
9371 
9372 	buf->data = alloc_percpu(struct trace_array_cpu);
9373 	if (!buf->data) {
9374 		ring_buffer_free(buf->buffer);
9375 		buf->buffer = NULL;
9376 		return -ENOMEM;
9377 	}
9378 
9379 	/* Allocate the first page for all buffers */
9380 	set_buffer_entries(&tr->array_buffer,
9381 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9382 
9383 	return 0;
9384 }
9385 
9386 static void free_trace_buffer(struct array_buffer *buf)
9387 {
9388 	if (buf->buffer) {
9389 		ring_buffer_free(buf->buffer);
9390 		buf->buffer = NULL;
9391 		free_percpu(buf->data);
9392 		buf->data = NULL;
9393 	}
9394 }
9395 
9396 static int allocate_trace_buffers(struct trace_array *tr, int size)
9397 {
9398 	int ret;
9399 
9400 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9401 	if (ret)
9402 		return ret;
9403 
9404 #ifdef CONFIG_TRACER_SNAPSHOT
9405 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9406 	if (tr->range_addr_start)
9407 		return 0;
9408 
9409 	ret = allocate_trace_buffer(tr, &tr->snapshot_buffer,
9410 				    allocate_snapshot ? size : 1);
9411 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9412 		free_trace_buffer(&tr->array_buffer);
9413 		return -ENOMEM;
9414 	}
9415 	tr->allocated_snapshot = allocate_snapshot;
9416 
9417 	allocate_snapshot = false;
9418 #endif
9419 
9420 	return 0;
9421 }
9422 
9423 static void free_trace_buffers(struct trace_array *tr)
9424 {
9425 	if (!tr)
9426 		return;
9427 
9428 	free_trace_buffer(&tr->array_buffer);
9429 	kfree(tr->module_delta);
9430 
9431 #ifdef CONFIG_TRACER_SNAPSHOT
9432 	free_trace_buffer(&tr->snapshot_buffer);
9433 #endif
9434 }
9435 
9436 static void init_trace_flags_index(struct trace_array *tr)
9437 {
9438 	int i;
9439 
9440 	/* Used by the trace options files */
9441 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9442 		tr->trace_flags_index[i] = i;
9443 }
9444 
9445 static int __update_tracer(struct trace_array *tr)
9446 {
9447 	struct tracer *t;
9448 	int ret = 0;
9449 
9450 	for (t = trace_types; t && !ret; t = t->next)
9451 		ret = add_tracer(tr, t);
9452 
9453 	return ret;
9454 }
9455 
9456 static __init int __update_tracer_options(struct trace_array *tr)
9457 {
9458 	struct tracers *t;
9459 	int ret = 0;
9460 
9461 	list_for_each_entry(t, &tr->tracers, list) {
9462 		ret = add_tracer_options(tr, t);
9463 		if (ret < 0)
9464 			break;
9465 	}
9466 
9467 	return ret;
9468 }
9469 
9470 static __init void update_tracer_options(void)
9471 {
9472 	struct trace_array *tr;
9473 
9474 	guard(mutex)(&trace_types_lock);
9475 	tracer_options_updated = true;
9476 	list_for_each_entry(tr, &ftrace_trace_arrays, list)
9477 		__update_tracer_options(tr);
9478 }
9479 
9480 /* Must have trace_types_lock held */
9481 struct trace_array *trace_array_find(const char *instance)
9482 {
9483 	struct trace_array *tr, *found = NULL;
9484 
9485 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9486 		if (tr->name && strcmp(tr->name, instance) == 0) {
9487 			found = tr;
9488 			break;
9489 		}
9490 	}
9491 
9492 	return found;
9493 }
9494 
9495 struct trace_array *trace_array_find_get(const char *instance)
9496 {
9497 	struct trace_array *tr;
9498 
9499 	guard(mutex)(&trace_types_lock);
9500 	tr = trace_array_find(instance);
9501 	if (tr)
9502 		tr->ref++;
9503 
9504 	return tr;
9505 }
9506 
9507 static int trace_array_create_dir(struct trace_array *tr)
9508 {
9509 	int ret;
9510 
9511 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9512 	if (!tr->dir)
9513 		return -EINVAL;
9514 
9515 	ret = event_trace_add_tracer(tr->dir, tr);
9516 	if (ret) {
9517 		tracefs_remove(tr->dir);
9518 		return ret;
9519 	}
9520 
9521 	init_tracer_tracefs(tr, tr->dir);
9522 	ret = __update_tracer(tr);
9523 	if (ret) {
9524 		event_trace_del_tracer(tr);
9525 		tracefs_remove(tr->dir);
9526 		return ret;
9527 	}
9528 	return 0;
9529 }
9530 
9531 static struct trace_array *
9532 trace_array_create_systems(const char *name, const char *systems,
9533 			   unsigned long range_addr_start,
9534 			   unsigned long range_addr_size)
9535 {
9536 	struct trace_array *tr;
9537 	int ret;
9538 
9539 	ret = -ENOMEM;
9540 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9541 	if (!tr)
9542 		return ERR_PTR(ret);
9543 
9544 	tr->name = kstrdup(name, GFP_KERNEL);
9545 	if (!tr->name)
9546 		goto out_free_tr;
9547 
9548 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9549 		goto out_free_tr;
9550 
9551 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9552 		goto out_free_tr;
9553 
9554 	if (systems) {
9555 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9556 		if (!tr->system_names)
9557 			goto out_free_tr;
9558 	}
9559 
9560 	/* Only for boot up memory mapped ring buffers */
9561 	tr->range_addr_start = range_addr_start;
9562 	tr->range_addr_size = range_addr_size;
9563 
9564 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9565 
9566 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9567 
9568 	raw_spin_lock_init(&tr->start_lock);
9569 
9570 	tr->syscall_buf_sz = global_trace.syscall_buf_sz;
9571 
9572 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9573 #ifdef CONFIG_TRACER_SNAPSHOT
9574 	spin_lock_init(&tr->snapshot_trigger_lock);
9575 #endif
9576 	tr->current_trace = &nop_trace;
9577 	tr->current_trace_flags = nop_trace.flags;
9578 
9579 	INIT_LIST_HEAD(&tr->systems);
9580 	INIT_LIST_HEAD(&tr->events);
9581 	INIT_LIST_HEAD(&tr->hist_vars);
9582 	INIT_LIST_HEAD(&tr->err_log);
9583 	INIT_LIST_HEAD(&tr->tracers);
9584 	INIT_LIST_HEAD(&tr->marker_list);
9585 
9586 #ifdef CONFIG_MODULES
9587 	INIT_LIST_HEAD(&tr->mod_events);
9588 #endif
9589 
9590 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9591 		goto out_free_tr;
9592 
9593 	/* The ring buffer is defaultly expanded */
9594 	trace_set_ring_buffer_expanded(tr);
9595 
9596 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9597 		goto out_free_tr;
9598 
9599 	ftrace_init_trace_array(tr);
9600 
9601 	init_trace_flags_index(tr);
9602 
9603 	if (trace_instance_dir) {
9604 		ret = trace_array_create_dir(tr);
9605 		if (ret)
9606 			goto out_free_tr;
9607 	} else
9608 		__trace_early_add_events(tr);
9609 
9610 	list_add(&tr->list, &ftrace_trace_arrays);
9611 
9612 	tr->ref++;
9613 
9614 	return tr;
9615 
9616  out_free_tr:
9617 	ftrace_free_ftrace_ops(tr);
9618 	free_trace_buffers(tr);
9619 	free_cpumask_var(tr->pipe_cpumask);
9620 	free_cpumask_var(tr->tracing_cpumask);
9621 	kfree_const(tr->system_names);
9622 	kfree(tr->range_name);
9623 	kfree(tr->name);
9624 	kfree(tr);
9625 
9626 	return ERR_PTR(ret);
9627 }
9628 
9629 static struct trace_array *trace_array_create(const char *name)
9630 {
9631 	return trace_array_create_systems(name, NULL, 0, 0);
9632 }
9633 
9634 static int instance_mkdir(const char *name)
9635 {
9636 	struct trace_array *tr;
9637 	int ret;
9638 
9639 	guard(mutex)(&event_mutex);
9640 	guard(mutex)(&trace_types_lock);
9641 
9642 	ret = -EEXIST;
9643 	if (trace_array_find(name))
9644 		return -EEXIST;
9645 
9646 	tr = trace_array_create(name);
9647 
9648 	ret = PTR_ERR_OR_ZERO(tr);
9649 
9650 	return ret;
9651 }
9652 
9653 #ifdef CONFIG_MMU
9654 static u64 map_pages(unsigned long start, unsigned long size)
9655 {
9656 	unsigned long vmap_start, vmap_end;
9657 	struct vm_struct *area;
9658 	int ret;
9659 
9660 	area = get_vm_area(size, VM_IOREMAP);
9661 	if (!area)
9662 		return 0;
9663 
9664 	vmap_start = (unsigned long) area->addr;
9665 	vmap_end = vmap_start + size;
9666 
9667 	ret = vmap_page_range(vmap_start, vmap_end,
9668 			      start, pgprot_nx(PAGE_KERNEL));
9669 	if (ret < 0) {
9670 		free_vm_area(area);
9671 		return 0;
9672 	}
9673 
9674 	return (u64)vmap_start;
9675 }
9676 #else
9677 static inline u64 map_pages(unsigned long start, unsigned long size)
9678 {
9679 	return 0;
9680 }
9681 #endif
9682 
9683 /**
9684  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9685  * @name: The name of the trace array to be looked up/created.
9686  * @systems: A list of systems to create event directories for (NULL for all)
9687  *
9688  * Returns pointer to trace array with given name.
9689  * NULL, if it cannot be created.
9690  *
9691  * NOTE: This function increments the reference counter associated with the
9692  * trace array returned. This makes sure it cannot be freed while in use.
9693  * Use trace_array_put() once the trace array is no longer needed.
9694  * If the trace_array is to be freed, trace_array_destroy() needs to
9695  * be called after the trace_array_put(), or simply let user space delete
9696  * it from the tracefs instances directory. But until the
9697  * trace_array_put() is called, user space can not delete it.
9698  *
9699  */
9700 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9701 {
9702 	struct trace_array *tr;
9703 
9704 	guard(mutex)(&event_mutex);
9705 	guard(mutex)(&trace_types_lock);
9706 
9707 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9708 		if (tr->name && strcmp(tr->name, name) == 0) {
9709 			tr->ref++;
9710 			return tr;
9711 		}
9712 	}
9713 
9714 	tr = trace_array_create_systems(name, systems, 0, 0);
9715 
9716 	if (IS_ERR(tr))
9717 		tr = NULL;
9718 	else
9719 		tr->ref++;
9720 
9721 	return tr;
9722 }
9723 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9724 
9725 static int __remove_instance(struct trace_array *tr)
9726 {
9727 	int i;
9728 
9729 	/* Reference counter for a newly created trace array = 1. */
9730 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9731 		return -EBUSY;
9732 
9733 	list_del(&tr->list);
9734 
9735 	/* Disable all the flags that were enabled coming in */
9736 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9737 		if ((1ULL << i) & ZEROED_TRACE_FLAGS)
9738 			set_tracer_flag(tr, 1ULL << i, 0);
9739 	}
9740 
9741 	if (printk_trace == tr)
9742 		update_printk_trace(&global_trace);
9743 
9744 	if (update_marker_trace(tr, 0))
9745 		synchronize_rcu();
9746 
9747 	tracing_set_nop(tr);
9748 	clear_ftrace_function_probes(tr);
9749 	event_trace_del_tracer(tr);
9750 	ftrace_clear_pids(tr);
9751 	ftrace_destroy_function_files(tr);
9752 	tracefs_remove(tr->dir);
9753 	free_percpu(tr->last_func_repeats);
9754 	free_trace_buffers(tr);
9755 	clear_tracing_err_log(tr);
9756 	free_tracers(tr);
9757 
9758 	if (tr->range_name) {
9759 		reserve_mem_release_by_name(tr->range_name);
9760 		kfree(tr->range_name);
9761 	}
9762 	if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
9763 		vfree((void *)tr->range_addr_start);
9764 
9765 	for (i = 0; i < tr->nr_topts; i++) {
9766 		kfree(tr->topts[i].topts);
9767 	}
9768 	kfree(tr->topts);
9769 
9770 	free_cpumask_var(tr->pipe_cpumask);
9771 	free_cpumask_var(tr->tracing_cpumask);
9772 	kfree_const(tr->system_names);
9773 	kfree(tr->name);
9774 	kfree(tr);
9775 
9776 	return 0;
9777 }
9778 
9779 int trace_array_destroy(struct trace_array *this_tr)
9780 {
9781 	struct trace_array *tr;
9782 
9783 	if (!this_tr)
9784 		return -EINVAL;
9785 
9786 	guard(mutex)(&event_mutex);
9787 	guard(mutex)(&trace_types_lock);
9788 
9789 
9790 	/* Making sure trace array exists before destroying it. */
9791 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9792 		if (tr == this_tr)
9793 			return __remove_instance(tr);
9794 	}
9795 
9796 	return -ENODEV;
9797 }
9798 EXPORT_SYMBOL_GPL(trace_array_destroy);
9799 
9800 static int instance_rmdir(const char *name)
9801 {
9802 	struct trace_array *tr;
9803 
9804 	guard(mutex)(&event_mutex);
9805 	guard(mutex)(&trace_types_lock);
9806 
9807 	tr = trace_array_find(name);
9808 	if (!tr)
9809 		return -ENODEV;
9810 
9811 	return __remove_instance(tr);
9812 }
9813 
9814 static __init void create_trace_instances(struct dentry *d_tracer)
9815 {
9816 	struct trace_array *tr;
9817 
9818 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9819 							 instance_mkdir,
9820 							 instance_rmdir);
9821 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9822 		return;
9823 
9824 	guard(mutex)(&event_mutex);
9825 	guard(mutex)(&trace_types_lock);
9826 
9827 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9828 		if (!tr->name)
9829 			continue;
9830 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9831 			     "Failed to create instance directory\n"))
9832 			return;
9833 	}
9834 }
9835 
9836 static void
9837 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9838 {
9839 	int cpu;
9840 
9841 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9842 			tr, &show_traces_fops);
9843 
9844 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9845 			tr, &set_tracer_fops);
9846 
9847 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9848 			  tr, &tracing_cpumask_fops);
9849 
9850 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9851 			  tr, &tracing_iter_fops);
9852 
9853 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9854 			  tr, &tracing_fops);
9855 
9856 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9857 			  tr, &tracing_pipe_fops);
9858 
9859 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9860 			  tr, &tracing_entries_fops);
9861 
9862 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9863 			  tr, &tracing_total_entries_fops);
9864 
9865 	trace_create_file("free_buffer", 0200, d_tracer,
9866 			  tr, &tracing_free_buffer_fops);
9867 
9868 	trace_create_file("trace_marker", 0220, d_tracer,
9869 			  tr, &tracing_mark_fops);
9870 
9871 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9872 
9873 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9874 			  tr, &tracing_mark_raw_fops);
9875 
9876 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9877 			  &trace_clock_fops);
9878 
9879 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9880 			  tr, &rb_simple_fops);
9881 
9882 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9883 			  &trace_time_stamp_mode_fops);
9884 
9885 	tr->buffer_percent = 50;
9886 
9887 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9888 			tr, &buffer_percent_fops);
9889 
9890 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9891 			  tr, &buffer_subbuf_size_fops);
9892 
9893 	trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
9894 			 tr, &tracing_syscall_buf_fops);
9895 
9896 	create_trace_options_dir(tr);
9897 
9898 	trace_create_maxlat_file(tr, d_tracer);
9899 
9900 	if (ftrace_create_function_files(tr, d_tracer))
9901 		MEM_FAIL(1, "Could not allocate function filter files");
9902 
9903 	if (tr->range_addr_start) {
9904 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9905 				  tr, &last_boot_fops);
9906 #ifdef CONFIG_TRACER_SNAPSHOT
9907 	} else {
9908 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9909 				  tr, &snapshot_fops);
9910 #endif
9911 	}
9912 
9913 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9914 			  tr, &tracing_err_log_fops);
9915 
9916 	for_each_tracing_cpu(cpu)
9917 		tracing_init_tracefs_percpu(tr, cpu);
9918 
9919 	ftrace_init_tracefs(tr, d_tracer);
9920 }
9921 
9922 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
9923 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9924 {
9925 	struct vfsmount *mnt;
9926 	struct file_system_type *type;
9927 	struct fs_context *fc;
9928 	int ret;
9929 
9930 	/*
9931 	 * To maintain backward compatibility for tools that mount
9932 	 * debugfs to get to the tracing facility, tracefs is automatically
9933 	 * mounted to the debugfs/tracing directory.
9934 	 */
9935 	type = get_fs_type("tracefs");
9936 	if (!type)
9937 		return NULL;
9938 
9939 	fc = fs_context_for_submount(type, mntpt);
9940 	put_filesystem(type);
9941 	if (IS_ERR(fc))
9942 		return ERR_CAST(fc);
9943 
9944 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
9945 
9946 	ret = vfs_parse_fs_string(fc, "source", "tracefs");
9947 	if (!ret)
9948 		mnt = fc_mount(fc);
9949 	else
9950 		mnt = ERR_PTR(ret);
9951 
9952 	put_fs_context(fc);
9953 	return mnt;
9954 }
9955 #endif
9956 
9957 /**
9958  * tracing_init_dentry - initialize top level trace array
9959  *
9960  * This is called when creating files or directories in the tracing
9961  * directory. It is called via fs_initcall() by any of the boot up code
9962  * and expects to return the dentry of the top level tracing directory.
9963  */
9964 int tracing_init_dentry(void)
9965 {
9966 	struct trace_array *tr = &global_trace;
9967 
9968 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9969 		pr_warn("Tracing disabled due to lockdown\n");
9970 		return -EPERM;
9971 	}
9972 
9973 	/* The top level trace array uses  NULL as parent */
9974 	if (tr->dir)
9975 		return 0;
9976 
9977 	if (WARN_ON(!tracefs_initialized()))
9978 		return -ENODEV;
9979 
9980 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
9981 	/*
9982 	 * As there may still be users that expect the tracing
9983 	 * files to exist in debugfs/tracing, we must automount
9984 	 * the tracefs file system there, so older tools still
9985 	 * work with the newer kernel.
9986 	 */
9987 	tr->dir = debugfs_create_automount("tracing", NULL,
9988 					   trace_automount, NULL);
9989 #endif
9990 
9991 	return 0;
9992 }
9993 
9994 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9995 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9996 
9997 struct workqueue_struct *trace_init_wq __initdata;
9998 static struct work_struct eval_map_work __initdata;
9999 static struct work_struct tracerfs_init_work __initdata;
10000 
10001 static void __init eval_map_work_func(struct work_struct *work)
10002 {
10003 	int len;
10004 
10005 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10006 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10007 }
10008 
10009 static int __init trace_eval_init(void)
10010 {
10011 	INIT_WORK(&eval_map_work, eval_map_work_func);
10012 
10013 	trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0);
10014 	if (!trace_init_wq) {
10015 		pr_err("Unable to allocate trace_init_wq\n");
10016 		/* Do work here */
10017 		eval_map_work_func(&eval_map_work);
10018 		return -ENOMEM;
10019 	}
10020 
10021 	queue_work(trace_init_wq, &eval_map_work);
10022 	return 0;
10023 }
10024 
10025 subsys_initcall(trace_eval_init);
10026 
10027 static int __init trace_eval_sync(void)
10028 {
10029 	/* Make sure the eval map updates are finished */
10030 	if (trace_init_wq)
10031 		destroy_workqueue(trace_init_wq);
10032 	return 0;
10033 }
10034 
10035 late_initcall_sync(trace_eval_sync);
10036 
10037 
10038 #ifdef CONFIG_MODULES
10039 
10040 bool module_exists(const char *module)
10041 {
10042 	/* All modules have the symbol __this_module */
10043 	static const char this_mod[] = "__this_module";
10044 	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10045 	unsigned long val;
10046 	int n;
10047 
10048 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10049 
10050 	if (n > sizeof(modname) - 1)
10051 		return false;
10052 
10053 	val = module_kallsyms_lookup_name(modname);
10054 	return val != 0;
10055 }
10056 
10057 static void trace_module_add_evals(struct module *mod)
10058 {
10059 	/*
10060 	 * Modules with bad taint do not have events created, do
10061 	 * not bother with enums either.
10062 	 */
10063 	if (trace_module_has_bad_taint(mod))
10064 		return;
10065 
10066 	/* Even if no trace_evals, this need to sanitize field types. */
10067 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10068 }
10069 
10070 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10071 static void trace_module_remove_evals(struct module *mod)
10072 {
10073 	union trace_eval_map_item *map;
10074 	union trace_eval_map_item **last = &trace_eval_maps;
10075 
10076 	if (!mod->num_trace_evals)
10077 		return;
10078 
10079 	guard(mutex)(&trace_eval_mutex);
10080 
10081 	map = trace_eval_maps;
10082 
10083 	while (map) {
10084 		if (map->head.mod == mod)
10085 			break;
10086 		map = trace_eval_jmp_to_tail(map);
10087 		last = &map->tail.next;
10088 		map = map->tail.next;
10089 	}
10090 	if (!map)
10091 		return;
10092 
10093 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10094 	kfree(map);
10095 }
10096 #else
10097 static inline void trace_module_remove_evals(struct module *mod) { }
10098 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10099 
10100 static void trace_module_record(struct module *mod, bool add)
10101 {
10102 	struct trace_array *tr;
10103 	unsigned long flags;
10104 
10105 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10106 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10107 		/* Update any persistent trace array that has already been started */
10108 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10109 			guard(mutex)(&scratch_mutex);
10110 			save_mod(mod, tr);
10111 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10112 			/* Update delta if the module loaded in previous boot */
10113 			make_mod_delta(mod, tr);
10114 		}
10115 	}
10116 }
10117 
10118 static int trace_module_notify(struct notifier_block *self,
10119 			       unsigned long val, void *data)
10120 {
10121 	struct module *mod = data;
10122 
10123 	switch (val) {
10124 	case MODULE_STATE_COMING:
10125 		trace_module_add_evals(mod);
10126 		trace_module_record(mod, true);
10127 		break;
10128 	case MODULE_STATE_GOING:
10129 		trace_module_remove_evals(mod);
10130 		trace_module_record(mod, false);
10131 		break;
10132 	}
10133 
10134 	return NOTIFY_OK;
10135 }
10136 
10137 static struct notifier_block trace_module_nb = {
10138 	.notifier_call = trace_module_notify,
10139 	.priority = 0,
10140 };
10141 #endif /* CONFIG_MODULES */
10142 
10143 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10144 {
10145 
10146 	event_trace_init();
10147 
10148 	init_tracer_tracefs(&global_trace, NULL);
10149 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10150 
10151 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10152 			&global_trace, &tracing_thresh_fops);
10153 
10154 	trace_create_file("README", TRACE_MODE_READ, NULL,
10155 			NULL, &tracing_readme_fops);
10156 
10157 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10158 			NULL, &tracing_saved_cmdlines_fops);
10159 
10160 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10161 			  NULL, &tracing_saved_cmdlines_size_fops);
10162 
10163 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10164 			NULL, &tracing_saved_tgids_fops);
10165 
10166 	trace_create_eval_file(NULL);
10167 
10168 #ifdef CONFIG_MODULES
10169 	register_module_notifier(&trace_module_nb);
10170 #endif
10171 
10172 #ifdef CONFIG_DYNAMIC_FTRACE
10173 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10174 			NULL, &tracing_dyn_info_fops);
10175 #endif
10176 
10177 	create_trace_instances(NULL);
10178 
10179 	update_tracer_options();
10180 }
10181 
10182 static __init int tracer_init_tracefs(void)
10183 {
10184 	int ret;
10185 
10186 	trace_access_lock_init();
10187 
10188 	ret = tracing_init_dentry();
10189 	if (ret)
10190 		return 0;
10191 
10192 	if (trace_init_wq) {
10193 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10194 		queue_work(trace_init_wq, &tracerfs_init_work);
10195 	} else {
10196 		tracer_init_tracefs_work_func(NULL);
10197 	}
10198 
10199 	if (rv_init_interface())
10200 		pr_err("RV: Error while creating the RV interface\n");
10201 
10202 	return 0;
10203 }
10204 
10205 fs_initcall(tracer_init_tracefs);
10206 
10207 static int trace_die_panic_handler(struct notifier_block *self,
10208 				unsigned long ev, void *unused);
10209 
10210 static struct notifier_block trace_panic_notifier = {
10211 	.notifier_call = trace_die_panic_handler,
10212 	.priority = INT_MAX - 1,
10213 };
10214 
10215 static struct notifier_block trace_die_notifier = {
10216 	.notifier_call = trace_die_panic_handler,
10217 	.priority = INT_MAX - 1,
10218 };
10219 
10220 /*
10221  * The idea is to execute the following die/panic callback early, in order
10222  * to avoid showing irrelevant information in the trace (like other panic
10223  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10224  * warnings get disabled (to prevent potential log flooding).
10225  */
10226 static int trace_die_panic_handler(struct notifier_block *self,
10227 				unsigned long ev, void *unused)
10228 {
10229 	if (!ftrace_dump_on_oops_enabled())
10230 		return NOTIFY_DONE;
10231 
10232 	/* The die notifier requires DIE_OOPS to trigger */
10233 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10234 		return NOTIFY_DONE;
10235 
10236 	ftrace_dump(DUMP_PARAM);
10237 
10238 	return NOTIFY_DONE;
10239 }
10240 
10241 /*
10242  * printk is set to max of 1024, we really don't need it that big.
10243  * Nothing should be printing 1000 characters anyway.
10244  */
10245 #define TRACE_MAX_PRINT		1000
10246 
10247 /*
10248  * Define here KERN_TRACE so that we have one place to modify
10249  * it if we decide to change what log level the ftrace dump
10250  * should be at.
10251  */
10252 #define KERN_TRACE		KERN_EMERG
10253 
10254 void
10255 trace_printk_seq(struct trace_seq *s)
10256 {
10257 	/* Probably should print a warning here. */
10258 	if (s->seq.len >= TRACE_MAX_PRINT)
10259 		s->seq.len = TRACE_MAX_PRINT;
10260 
10261 	/*
10262 	 * More paranoid code. Although the buffer size is set to
10263 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10264 	 * an extra layer of protection.
10265 	 */
10266 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10267 		s->seq.len = s->seq.size - 1;
10268 
10269 	/* should be zero ended, but we are paranoid. */
10270 	s->buffer[s->seq.len] = 0;
10271 
10272 	printk(KERN_TRACE "%s", s->buffer);
10273 
10274 	trace_seq_init(s);
10275 }
10276 
10277 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10278 {
10279 	iter->tr = tr;
10280 	iter->trace = iter->tr->current_trace;
10281 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10282 	iter->array_buffer = &tr->array_buffer;
10283 
10284 	if (iter->trace && iter->trace->open)
10285 		iter->trace->open(iter);
10286 
10287 	/* Annotate start of buffers if we had overruns */
10288 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10289 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10290 
10291 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10292 	if (trace_clocks[iter->tr->clock_id].in_ns)
10293 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10294 
10295 	/* Can not use kmalloc for iter.temp and iter.fmt */
10296 	iter->temp = static_temp_buf;
10297 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10298 	iter->fmt = static_fmt_buf;
10299 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10300 }
10301 
10302 void trace_init_global_iter(struct trace_iterator *iter)
10303 {
10304 	trace_init_iter(iter, &global_trace);
10305 }
10306 
10307 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10308 {
10309 	/* use static because iter can be a bit big for the stack */
10310 	static struct trace_iterator iter;
10311 	unsigned int old_userobj;
10312 	unsigned long flags;
10313 	int cnt = 0;
10314 
10315 	/*
10316 	 * Always turn off tracing when we dump.
10317 	 * We don't need to show trace output of what happens
10318 	 * between multiple crashes.
10319 	 *
10320 	 * If the user does a sysrq-z, then they can re-enable
10321 	 * tracing with echo 1 > tracing_on.
10322 	 */
10323 	tracer_tracing_off(tr);
10324 
10325 	local_irq_save(flags);
10326 
10327 	/* Simulate the iterator */
10328 	trace_init_iter(&iter, tr);
10329 
10330 	/* While dumping, do not allow the buffer to be enable */
10331 	tracer_tracing_disable(tr);
10332 
10333 	old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
10334 
10335 	/* don't look at user memory in panic mode */
10336 	tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
10337 
10338 	if (dump_mode == DUMP_ORIG)
10339 		iter.cpu_file = raw_smp_processor_id();
10340 	else
10341 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10342 
10343 	if (tr == &global_trace)
10344 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10345 	else
10346 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10347 
10348 	/* Did function tracer already get disabled? */
10349 	if (ftrace_is_dead()) {
10350 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10351 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10352 	}
10353 
10354 	/*
10355 	 * We need to stop all tracing on all CPUS to read
10356 	 * the next buffer. This is a bit expensive, but is
10357 	 * not done often. We fill all what we can read,
10358 	 * and then release the locks again.
10359 	 */
10360 
10361 	while (!trace_empty(&iter)) {
10362 
10363 		if (!cnt)
10364 			printk(KERN_TRACE "---------------------------------\n");
10365 
10366 		cnt++;
10367 
10368 		trace_iterator_reset(&iter);
10369 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10370 
10371 		if (trace_find_next_entry_inc(&iter) != NULL) {
10372 			int ret;
10373 
10374 			ret = print_trace_line(&iter);
10375 			if (ret != TRACE_TYPE_NO_CONSUME)
10376 				trace_consume(&iter);
10377 
10378 			trace_printk_seq(&iter.seq);
10379 		}
10380 		touch_nmi_watchdog();
10381 	}
10382 
10383 	if (!cnt)
10384 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10385 	else
10386 		printk(KERN_TRACE "---------------------------------\n");
10387 
10388 	tr->trace_flags |= old_userobj;
10389 
10390 	tracer_tracing_enable(tr);
10391 	local_irq_restore(flags);
10392 }
10393 
10394 static void ftrace_dump_by_param(void)
10395 {
10396 	bool first_param = true;
10397 	char dump_param[MAX_TRACER_SIZE];
10398 	char *buf, *token, *inst_name;
10399 	struct trace_array *tr;
10400 
10401 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10402 	buf = dump_param;
10403 
10404 	while ((token = strsep(&buf, ",")) != NULL) {
10405 		if (first_param) {
10406 			first_param = false;
10407 			if (!strcmp("0", token))
10408 				continue;
10409 			else if (!strcmp("1", token)) {
10410 				ftrace_dump_one(&global_trace, DUMP_ALL);
10411 				continue;
10412 			}
10413 			else if (!strcmp("2", token) ||
10414 			  !strcmp("orig_cpu", token)) {
10415 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10416 				continue;
10417 			}
10418 		}
10419 
10420 		inst_name = strsep(&token, "=");
10421 		tr = trace_array_find(inst_name);
10422 		if (!tr) {
10423 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10424 			continue;
10425 		}
10426 
10427 		if (token && (!strcmp("2", token) ||
10428 			  !strcmp("orig_cpu", token)))
10429 			ftrace_dump_one(tr, DUMP_ORIG);
10430 		else
10431 			ftrace_dump_one(tr, DUMP_ALL);
10432 	}
10433 }
10434 
10435 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10436 {
10437 	static atomic_t dump_running;
10438 
10439 	/* Only allow one dump user at a time. */
10440 	if (atomic_inc_return(&dump_running) != 1) {
10441 		atomic_dec(&dump_running);
10442 		return;
10443 	}
10444 
10445 	switch (oops_dump_mode) {
10446 	case DUMP_ALL:
10447 		ftrace_dump_one(&global_trace, DUMP_ALL);
10448 		break;
10449 	case DUMP_ORIG:
10450 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10451 		break;
10452 	case DUMP_PARAM:
10453 		ftrace_dump_by_param();
10454 		break;
10455 	case DUMP_NONE:
10456 		break;
10457 	default:
10458 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10459 		ftrace_dump_one(&global_trace, DUMP_ALL);
10460 	}
10461 
10462 	atomic_dec(&dump_running);
10463 }
10464 EXPORT_SYMBOL_GPL(ftrace_dump);
10465 
10466 #define WRITE_BUFSIZE  4096
10467 
10468 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10469 				size_t count, loff_t *ppos,
10470 				int (*createfn)(const char *))
10471 {
10472 	char *kbuf __free(kfree) = NULL;
10473 	char *buf, *tmp;
10474 	int ret = 0;
10475 	size_t done = 0;
10476 	size_t size;
10477 
10478 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10479 	if (!kbuf)
10480 		return -ENOMEM;
10481 
10482 	while (done < count) {
10483 		size = count - done;
10484 
10485 		if (size >= WRITE_BUFSIZE)
10486 			size = WRITE_BUFSIZE - 1;
10487 
10488 		if (copy_from_user(kbuf, buffer + done, size))
10489 			return -EFAULT;
10490 
10491 		kbuf[size] = '\0';
10492 		buf = kbuf;
10493 		do {
10494 			tmp = strchr(buf, '\n');
10495 			if (tmp) {
10496 				*tmp = '\0';
10497 				size = tmp - buf + 1;
10498 			} else {
10499 				size = strlen(buf);
10500 				if (done + size < count) {
10501 					if (buf != kbuf)
10502 						break;
10503 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10504 					pr_warn("Line length is too long: Should be less than %d\n",
10505 						WRITE_BUFSIZE - 2);
10506 					return -EINVAL;
10507 				}
10508 			}
10509 			done += size;
10510 
10511 			/* Remove comments */
10512 			tmp = strchr(buf, '#');
10513 
10514 			if (tmp)
10515 				*tmp = '\0';
10516 
10517 			ret = createfn(buf);
10518 			if (ret)
10519 				return ret;
10520 			buf += size;
10521 
10522 		} while (done < count);
10523 	}
10524 	return done;
10525 }
10526 
10527 #ifdef CONFIG_TRACER_SNAPSHOT
10528 __init static bool tr_needs_alloc_snapshot(const char *name)
10529 {
10530 	char *test;
10531 	int len = strlen(name);
10532 	bool ret;
10533 
10534 	if (!boot_snapshot_index)
10535 		return false;
10536 
10537 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10538 	    boot_snapshot_info[len] == '\t')
10539 		return true;
10540 
10541 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10542 	if (!test)
10543 		return false;
10544 
10545 	sprintf(test, "\t%s\t", name);
10546 	ret = strstr(boot_snapshot_info, test) == NULL;
10547 	kfree(test);
10548 	return ret;
10549 }
10550 
10551 __init static void do_allocate_snapshot(const char *name)
10552 {
10553 	if (!tr_needs_alloc_snapshot(name))
10554 		return;
10555 
10556 	/*
10557 	 * When allocate_snapshot is set, the next call to
10558 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10559 	 * will allocate the snapshot buffer. That will also clear
10560 	 * this flag.
10561 	 */
10562 	allocate_snapshot = true;
10563 }
10564 #else
10565 static inline void do_allocate_snapshot(const char *name) { }
10566 #endif
10567 
10568 __init static int backup_instance_area(const char *backup,
10569 				       unsigned long *addr, phys_addr_t *size)
10570 {
10571 	struct trace_array *backup_tr;
10572 	void *allocated_vaddr = NULL;
10573 
10574 	backup_tr = trace_array_get_by_name(backup, NULL);
10575 	if (!backup_tr) {
10576 		pr_warn("Tracing: Instance %s is not found.\n", backup);
10577 		return -ENOENT;
10578 	}
10579 
10580 	if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
10581 		pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
10582 		trace_array_put(backup_tr);
10583 		return -EINVAL;
10584 	}
10585 
10586 	*size = backup_tr->range_addr_size;
10587 
10588 	allocated_vaddr = vzalloc(*size);
10589 	if (!allocated_vaddr) {
10590 		pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
10591 			backup, (unsigned long)*size);
10592 		trace_array_put(backup_tr);
10593 		return -ENOMEM;
10594 	}
10595 
10596 	memcpy(allocated_vaddr,
10597 		(void *)backup_tr->range_addr_start, (size_t)*size);
10598 	*addr = (unsigned long)allocated_vaddr;
10599 
10600 	trace_array_put(backup_tr);
10601 	return 0;
10602 }
10603 
10604 __init static void enable_instances(void)
10605 {
10606 	struct trace_array *tr;
10607 	bool memmap_area = false;
10608 	char *curr_str;
10609 	char *name;
10610 	char *str;
10611 	char *tok;
10612 
10613 	/* A tab is always appended */
10614 	boot_instance_info[boot_instance_index - 1] = '\0';
10615 	str = boot_instance_info;
10616 
10617 	while ((curr_str = strsep(&str, "\t"))) {
10618 		phys_addr_t start = 0;
10619 		phys_addr_t size = 0;
10620 		unsigned long addr = 0;
10621 		bool traceprintk = false;
10622 		bool traceoff = false;
10623 		char *flag_delim;
10624 		char *addr_delim;
10625 		char *rname __free(kfree) = NULL;
10626 		char *backup;
10627 
10628 		tok = strsep(&curr_str, ",");
10629 
10630 		name = strsep(&tok, "=");
10631 		backup = tok;
10632 
10633 		flag_delim = strchr(name, '^');
10634 		addr_delim = strchr(name, '@');
10635 
10636 		if (addr_delim)
10637 			*addr_delim++ = '\0';
10638 
10639 		if (flag_delim)
10640 			*flag_delim++ = '\0';
10641 
10642 		if (backup) {
10643 			if (backup_instance_area(backup, &addr, &size) < 0)
10644 				continue;
10645 		}
10646 
10647 		if (flag_delim) {
10648 			char *flag;
10649 
10650 			while ((flag = strsep(&flag_delim, "^"))) {
10651 				if (strcmp(flag, "traceoff") == 0) {
10652 					traceoff = true;
10653 				} else if ((strcmp(flag, "printk") == 0) ||
10654 					   (strcmp(flag, "traceprintk") == 0) ||
10655 					   (strcmp(flag, "trace_printk") == 0)) {
10656 					traceprintk = true;
10657 				} else {
10658 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10659 						flag, name);
10660 				}
10661 			}
10662 		}
10663 
10664 		tok = addr_delim;
10665 		if (tok && isdigit(*tok)) {
10666 			start = memparse(tok, &tok);
10667 			if (!start) {
10668 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10669 					name);
10670 				continue;
10671 			}
10672 			if (*tok != ':') {
10673 				pr_warn("Tracing: No size specified for instance %s\n", name);
10674 				continue;
10675 			}
10676 			tok++;
10677 			size = memparse(tok, &tok);
10678 			if (!size) {
10679 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10680 					name);
10681 				continue;
10682 			}
10683 			memmap_area = true;
10684 		} else if (tok) {
10685 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10686 				start = 0;
10687 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10688 				continue;
10689 			}
10690 			rname = kstrdup(tok, GFP_KERNEL);
10691 		}
10692 
10693 		if (start) {
10694 			/* Start and size must be page aligned */
10695 			if (start & ~PAGE_MASK) {
10696 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10697 				continue;
10698 			}
10699 			if (size & ~PAGE_MASK) {
10700 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10701 				continue;
10702 			}
10703 
10704 			if (memmap_area)
10705 				addr = map_pages(start, size);
10706 			else
10707 				addr = (unsigned long)phys_to_virt(start);
10708 			if (addr) {
10709 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10710 					name, &start, (unsigned long)size);
10711 			} else {
10712 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10713 				continue;
10714 			}
10715 		} else {
10716 			/* Only non mapped buffers have snapshot buffers */
10717 			if (IS_ENABLED(CONFIG_TRACER_SNAPSHOT))
10718 				do_allocate_snapshot(name);
10719 		}
10720 
10721 		tr = trace_array_create_systems(name, NULL, addr, size);
10722 		if (IS_ERR(tr)) {
10723 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10724 			continue;
10725 		}
10726 
10727 		if (traceoff)
10728 			tracer_tracing_off(tr);
10729 
10730 		if (traceprintk)
10731 			update_printk_trace(tr);
10732 
10733 		/*
10734 		 * memmap'd buffers can not be freed.
10735 		 */
10736 		if (memmap_area) {
10737 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
10738 			tr->ref++;
10739 		}
10740 
10741 		/*
10742 		 * Backup buffers can be freed but need vfree().
10743 		 */
10744 		if (backup)
10745 			tr->flags |= TRACE_ARRAY_FL_VMALLOC;
10746 
10747 		if (start || backup) {
10748 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10749 			tr->range_name = no_free_ptr(rname);
10750 		}
10751 
10752 		while ((tok = strsep(&curr_str, ","))) {
10753 			early_enable_events(tr, tok, true);
10754 		}
10755 	}
10756 }
10757 
10758 __init static int tracer_alloc_buffers(void)
10759 {
10760 	int ring_buf_size;
10761 	int ret = -ENOMEM;
10762 
10763 
10764 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10765 		pr_warn("Tracing disabled due to lockdown\n");
10766 		return -EPERM;
10767 	}
10768 
10769 	/*
10770 	 * Make sure we don't accidentally add more trace options
10771 	 * than we have bits for.
10772 	 */
10773 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10774 
10775 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10776 		return -ENOMEM;
10777 
10778 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10779 		goto out_free_buffer_mask;
10780 
10781 	/* Only allocate trace_printk buffers if a trace_printk exists */
10782 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10783 		/* Must be called before global_trace.buffer is allocated */
10784 		trace_printk_init_buffers();
10785 
10786 	/* To save memory, keep the ring buffer size to its minimum */
10787 	if (global_trace.ring_buffer_expanded)
10788 		ring_buf_size = trace_buf_size;
10789 	else
10790 		ring_buf_size = 1;
10791 
10792 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10793 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10794 
10795 	raw_spin_lock_init(&global_trace.start_lock);
10796 
10797 	/*
10798 	 * The prepare callbacks allocates some memory for the ring buffer. We
10799 	 * don't free the buffer if the CPU goes down. If we were to free
10800 	 * the buffer, then the user would lose any trace that was in the
10801 	 * buffer. The memory will be removed once the "instance" is removed.
10802 	 */
10803 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10804 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10805 				      NULL);
10806 	if (ret < 0)
10807 		goto out_free_cpumask;
10808 	/* Used for event triggers */
10809 	ret = -ENOMEM;
10810 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10811 	if (!temp_buffer)
10812 		goto out_rm_hp_state;
10813 
10814 	if (trace_create_savedcmd() < 0)
10815 		goto out_free_temp_buffer;
10816 
10817 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10818 		goto out_free_savedcmd;
10819 
10820 	/* TODO: make the number of buffers hot pluggable with CPUS */
10821 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10822 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10823 		goto out_free_pipe_cpumask;
10824 	}
10825 	if (global_trace.buffer_disabled)
10826 		tracing_off();
10827 
10828 	if (trace_boot_clock) {
10829 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10830 		if (ret < 0)
10831 			pr_warn("Trace clock %s not defined, going back to default\n",
10832 				trace_boot_clock);
10833 	}
10834 
10835 	/*
10836 	 * register_tracer() might reference current_trace, so it
10837 	 * needs to be set before we register anything. This is
10838 	 * just a bootstrap of current_trace anyway.
10839 	 */
10840 	global_trace.current_trace = &nop_trace;
10841 	global_trace.current_trace_flags = nop_trace.flags;
10842 
10843 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10844 #ifdef CONFIG_TRACER_SNAPSHOT
10845 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10846 #endif
10847 	ftrace_init_global_array_ops(&global_trace);
10848 
10849 #ifdef CONFIG_MODULES
10850 	INIT_LIST_HEAD(&global_trace.mod_events);
10851 #endif
10852 
10853 	init_trace_flags_index(&global_trace);
10854 
10855 	INIT_LIST_HEAD(&global_trace.tracers);
10856 
10857 	/* All seems OK, enable tracing */
10858 	tracing_disabled = 0;
10859 
10860 	atomic_notifier_chain_register(&panic_notifier_list,
10861 				       &trace_panic_notifier);
10862 
10863 	register_die_notifier(&trace_die_notifier);
10864 
10865 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10866 
10867 	global_trace.syscall_buf_sz = syscall_buf_size;
10868 
10869 	INIT_LIST_HEAD(&global_trace.systems);
10870 	INIT_LIST_HEAD(&global_trace.events);
10871 	INIT_LIST_HEAD(&global_trace.hist_vars);
10872 	INIT_LIST_HEAD(&global_trace.err_log);
10873 	list_add(&global_trace.marker_list, &marker_copies);
10874 	list_add(&global_trace.list, &ftrace_trace_arrays);
10875 
10876 	register_tracer(&nop_trace);
10877 
10878 	/* Function tracing may start here (via kernel command line) */
10879 	init_function_trace();
10880 
10881 	apply_trace_boot_options();
10882 
10883 	register_snapshot_cmd();
10884 
10885 	return 0;
10886 
10887 out_free_pipe_cpumask:
10888 	free_cpumask_var(global_trace.pipe_cpumask);
10889 out_free_savedcmd:
10890 	trace_free_saved_cmdlines_buffer();
10891 out_free_temp_buffer:
10892 	ring_buffer_free(temp_buffer);
10893 out_rm_hp_state:
10894 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10895 out_free_cpumask:
10896 	free_cpumask_var(global_trace.tracing_cpumask);
10897 out_free_buffer_mask:
10898 	free_cpumask_var(tracing_buffer_mask);
10899 	return ret;
10900 }
10901 
10902 #ifdef CONFIG_FUNCTION_TRACER
10903 /* Used to set module cached ftrace filtering at boot up */
10904 struct trace_array *trace_get_global_array(void)
10905 {
10906 	return &global_trace;
10907 }
10908 #endif
10909 
10910 void __init ftrace_boot_snapshot(void)
10911 {
10912 #ifdef CONFIG_TRACER_SNAPSHOT
10913 	struct trace_array *tr;
10914 
10915 	if (!snapshot_at_boot)
10916 		return;
10917 
10918 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10919 		if (!tr->allocated_snapshot)
10920 			continue;
10921 
10922 		tracing_snapshot_instance(tr);
10923 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10924 	}
10925 #endif
10926 }
10927 
10928 void __init early_trace_init(void)
10929 {
10930 	if (tracepoint_printk) {
10931 		tracepoint_print_iter =
10932 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10933 		if (MEM_FAIL(!tracepoint_print_iter,
10934 			     "Failed to allocate trace iterator\n"))
10935 			tracepoint_printk = 0;
10936 		else
10937 			static_key_enable(&tracepoint_printk_key.key);
10938 	}
10939 	tracer_alloc_buffers();
10940 
10941 	init_events();
10942 }
10943 
10944 void __init trace_init(void)
10945 {
10946 	trace_event_init();
10947 
10948 	if (boot_instance_index)
10949 		enable_instances();
10950 }
10951 
10952 __init static void clear_boot_tracer(void)
10953 {
10954 	/*
10955 	 * The default tracer at boot buffer is an init section.
10956 	 * This function is called in lateinit. If we did not
10957 	 * find the boot tracer, then clear it out, to prevent
10958 	 * later registration from accessing the buffer that is
10959 	 * about to be freed.
10960 	 */
10961 	if (!default_bootup_tracer)
10962 		return;
10963 
10964 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10965 	       default_bootup_tracer);
10966 	default_bootup_tracer = NULL;
10967 }
10968 
10969 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10970 __init static void tracing_set_default_clock(void)
10971 {
10972 	/* sched_clock_stable() is determined in late_initcall */
10973 	if (!trace_boot_clock && !sched_clock_stable()) {
10974 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10975 			pr_warn("Can not set tracing clock due to lockdown\n");
10976 			return;
10977 		}
10978 
10979 		printk(KERN_WARNING
10980 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10981 		       "If you want to keep using the local clock, then add:\n"
10982 		       "  \"trace_clock=local\"\n"
10983 		       "on the kernel command line\n");
10984 		tracing_set_clock(&global_trace, "global");
10985 	}
10986 }
10987 #else
10988 static inline void tracing_set_default_clock(void) { }
10989 #endif
10990 
10991 __init static int late_trace_init(void)
10992 {
10993 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10994 		static_key_disable(&tracepoint_printk_key.key);
10995 		tracepoint_printk = 0;
10996 	}
10997 
10998 	if (traceoff_after_boot)
10999 		tracing_off();
11000 
11001 	tracing_set_default_clock();
11002 	clear_boot_tracer();
11003 	return 0;
11004 }
11005 
11006 late_initcall_sync(late_trace_init);
11007