xref: /linux/kernel/trace/trace.c (revision 852947be66b826c3d0ba328e19a3559fdf7ac726)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 #include <linux/fs_context.h>
55 
56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57 
58 #include "trace.h"
59 #include "trace_output.h"
60 
61 #ifdef CONFIG_FTRACE_STARTUP_TEST
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70 
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76 
77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #else
85 #define tracing_selftest_running	0
86 #define tracing_selftest_disabled	0
87 #endif
88 
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95 
96 /* For tracers that don't implement custom flags */
97 static struct tracer_opt dummy_tracer_opt[] = {
98 	{ }
99 };
100 
101 static int
102 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 {
104 	return 0;
105 }
106 
107 /*
108  * To prevent the comm cache from being overwritten when no
109  * tracing is active, only save the comm when a trace event
110  * occurred.
111  */
112 DEFINE_PER_CPU(bool, trace_taskinfo_save);
113 
114 /*
115  * Kill all tracing for good (never come back).
116  * It is initialized to 1 but will turn to zero if the initialization
117  * of the tracer is successful. But that is the only place that sets
118  * this back to zero.
119  */
120 static int tracing_disabled = 1;
121 
122 cpumask_var_t __read_mostly	tracing_buffer_mask;
123 
124 #define MAX_TRACER_SIZE		100
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  * Set instance name if you want to dump the specific trace instance
140  * Multiple instance dump is also supported, and instances are seperated
141  * by commas.
142  */
143 /* Set to string format zero to disable by default */
144 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
145 
146 /* When set, tracing will stop when a WARN*() is hit */
147 static int __disable_trace_on_warning;
148 
149 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
150 			     void *buffer, size_t *lenp, loff_t *ppos);
151 static const struct ctl_table trace_sysctl_table[] = {
152 	{
153 		.procname	= "ftrace_dump_on_oops",
154 		.data		= &ftrace_dump_on_oops,
155 		.maxlen		= MAX_TRACER_SIZE,
156 		.mode		= 0644,
157 		.proc_handler	= proc_dostring,
158 	},
159 	{
160 		.procname	= "traceoff_on_warning",
161 		.data		= &__disable_trace_on_warning,
162 		.maxlen		= sizeof(__disable_trace_on_warning),
163 		.mode		= 0644,
164 		.proc_handler	= proc_dointvec,
165 	},
166 	{
167 		.procname	= "tracepoint_printk",
168 		.data		= &tracepoint_printk,
169 		.maxlen		= sizeof(tracepoint_printk),
170 		.mode		= 0644,
171 		.proc_handler	= tracepoint_printk_sysctl,
172 	},
173 };
174 
175 static int __init init_trace_sysctls(void)
176 {
177 	register_sysctl_init("kernel", trace_sysctl_table);
178 	return 0;
179 }
180 subsys_initcall(init_trace_sysctls);
181 
182 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
183 /* Map of enums to their values, for "eval_map" file */
184 struct trace_eval_map_head {
185 	struct module			*mod;
186 	unsigned long			length;
187 };
188 
189 union trace_eval_map_item;
190 
191 struct trace_eval_map_tail {
192 	/*
193 	 * "end" is first and points to NULL as it must be different
194 	 * than "mod" or "eval_string"
195 	 */
196 	union trace_eval_map_item	*next;
197 	const char			*end;	/* points to NULL */
198 };
199 
200 static DEFINE_MUTEX(trace_eval_mutex);
201 
202 /*
203  * The trace_eval_maps are saved in an array with two extra elements,
204  * one at the beginning, and one at the end. The beginning item contains
205  * the count of the saved maps (head.length), and the module they
206  * belong to if not built in (head.mod). The ending item contains a
207  * pointer to the next array of saved eval_map items.
208  */
209 union trace_eval_map_item {
210 	struct trace_eval_map		map;
211 	struct trace_eval_map_head	head;
212 	struct trace_eval_map_tail	tail;
213 };
214 
215 static union trace_eval_map_item *trace_eval_maps;
216 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
217 
218 int tracing_set_tracer(struct trace_array *tr, const char *buf);
219 static void ftrace_trace_userstack(struct trace_array *tr,
220 				   struct trace_buffer *buffer,
221 				   unsigned int trace_ctx);
222 
223 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
224 static char *default_bootup_tracer;
225 
226 static bool allocate_snapshot;
227 static bool snapshot_at_boot;
228 
229 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_instance_index;
231 
232 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
233 static int boot_snapshot_index;
234 
235 static int __init set_cmdline_ftrace(char *str)
236 {
237 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
238 	default_bootup_tracer = bootup_tracer_buf;
239 	/* We are using ftrace early, expand it */
240 	trace_set_ring_buffer_expanded(NULL);
241 	return 1;
242 }
243 __setup("ftrace=", set_cmdline_ftrace);
244 
245 int ftrace_dump_on_oops_enabled(void)
246 {
247 	if (!strcmp("0", ftrace_dump_on_oops))
248 		return 0;
249 	else
250 		return 1;
251 }
252 
253 static int __init set_ftrace_dump_on_oops(char *str)
254 {
255 	if (!*str) {
256 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
257 		return 1;
258 	}
259 
260 	if (*str == ',') {
261 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
262 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
263 		return 1;
264 	}
265 
266 	if (*str++ == '=') {
267 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
268 		return 1;
269 	}
270 
271 	return 0;
272 }
273 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
274 
275 static int __init stop_trace_on_warning(char *str)
276 {
277 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
278 		__disable_trace_on_warning = 1;
279 	return 1;
280 }
281 __setup("traceoff_on_warning", stop_trace_on_warning);
282 
283 static int __init boot_alloc_snapshot(char *str)
284 {
285 	char *slot = boot_snapshot_info + boot_snapshot_index;
286 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
287 	int ret;
288 
289 	if (str[0] == '=') {
290 		str++;
291 		if (strlen(str) >= left)
292 			return -1;
293 
294 		ret = snprintf(slot, left, "%s\t", str);
295 		boot_snapshot_index += ret;
296 	} else {
297 		allocate_snapshot = true;
298 		/* We also need the main ring buffer expanded */
299 		trace_set_ring_buffer_expanded(NULL);
300 	}
301 	return 1;
302 }
303 __setup("alloc_snapshot", boot_alloc_snapshot);
304 
305 
306 static int __init boot_snapshot(char *str)
307 {
308 	snapshot_at_boot = true;
309 	boot_alloc_snapshot(str);
310 	return 1;
311 }
312 __setup("ftrace_boot_snapshot", boot_snapshot);
313 
314 
315 static int __init boot_instance(char *str)
316 {
317 	char *slot = boot_instance_info + boot_instance_index;
318 	int left = sizeof(boot_instance_info) - boot_instance_index;
319 	int ret;
320 
321 	if (strlen(str) >= left)
322 		return -1;
323 
324 	ret = snprintf(slot, left, "%s\t", str);
325 	boot_instance_index += ret;
326 
327 	return 1;
328 }
329 __setup("trace_instance=", boot_instance);
330 
331 
332 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
333 
334 static int __init set_trace_boot_options(char *str)
335 {
336 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
337 	return 1;
338 }
339 __setup("trace_options=", set_trace_boot_options);
340 
341 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
342 static char *trace_boot_clock __initdata;
343 
344 static int __init set_trace_boot_clock(char *str)
345 {
346 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
347 	trace_boot_clock = trace_boot_clock_buf;
348 	return 1;
349 }
350 __setup("trace_clock=", set_trace_boot_clock);
351 
352 static int __init set_tracepoint_printk(char *str)
353 {
354 	/* Ignore the "tp_printk_stop_on_boot" param */
355 	if (*str == '_')
356 		return 0;
357 
358 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
359 		tracepoint_printk = 1;
360 	return 1;
361 }
362 __setup("tp_printk", set_tracepoint_printk);
363 
364 static int __init set_tracepoint_printk_stop(char *str)
365 {
366 	tracepoint_printk_stop_on_boot = true;
367 	return 1;
368 }
369 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
370 
371 static int __init set_traceoff_after_boot(char *str)
372 {
373 	traceoff_after_boot = true;
374 	return 1;
375 }
376 __setup("traceoff_after_boot", set_traceoff_after_boot);
377 
378 unsigned long long ns2usecs(u64 nsec)
379 {
380 	nsec += 500;
381 	do_div(nsec, 1000);
382 	return nsec;
383 }
384 
385 static void
386 trace_process_export(struct trace_export *export,
387 	       struct ring_buffer_event *event, int flag)
388 {
389 	struct trace_entry *entry;
390 	unsigned int size = 0;
391 
392 	if (export->flags & flag) {
393 		entry = ring_buffer_event_data(event);
394 		size = ring_buffer_event_length(event);
395 		export->write(export, entry, size);
396 	}
397 }
398 
399 static DEFINE_MUTEX(ftrace_export_lock);
400 
401 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
402 
403 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
404 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
405 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
406 
407 static inline void ftrace_exports_enable(struct trace_export *export)
408 {
409 	if (export->flags & TRACE_EXPORT_FUNCTION)
410 		static_branch_inc(&trace_function_exports_enabled);
411 
412 	if (export->flags & TRACE_EXPORT_EVENT)
413 		static_branch_inc(&trace_event_exports_enabled);
414 
415 	if (export->flags & TRACE_EXPORT_MARKER)
416 		static_branch_inc(&trace_marker_exports_enabled);
417 }
418 
419 static inline void ftrace_exports_disable(struct trace_export *export)
420 {
421 	if (export->flags & TRACE_EXPORT_FUNCTION)
422 		static_branch_dec(&trace_function_exports_enabled);
423 
424 	if (export->flags & TRACE_EXPORT_EVENT)
425 		static_branch_dec(&trace_event_exports_enabled);
426 
427 	if (export->flags & TRACE_EXPORT_MARKER)
428 		static_branch_dec(&trace_marker_exports_enabled);
429 }
430 
431 static void ftrace_exports(struct ring_buffer_event *event, int flag)
432 {
433 	struct trace_export *export;
434 
435 	guard(preempt_notrace)();
436 
437 	export = rcu_dereference_raw_check(ftrace_exports_list);
438 	while (export) {
439 		trace_process_export(export, event, flag);
440 		export = rcu_dereference_raw_check(export->next);
441 	}
442 }
443 
444 static inline void
445 add_trace_export(struct trace_export **list, struct trace_export *export)
446 {
447 	rcu_assign_pointer(export->next, *list);
448 	/*
449 	 * We are entering export into the list but another
450 	 * CPU might be walking that list. We need to make sure
451 	 * the export->next pointer is valid before another CPU sees
452 	 * the export pointer included into the list.
453 	 */
454 	rcu_assign_pointer(*list, export);
455 }
456 
457 static inline int
458 rm_trace_export(struct trace_export **list, struct trace_export *export)
459 {
460 	struct trace_export **p;
461 
462 	for (p = list; *p != NULL; p = &(*p)->next)
463 		if (*p == export)
464 			break;
465 
466 	if (*p != export)
467 		return -1;
468 
469 	rcu_assign_pointer(*p, (*p)->next);
470 
471 	return 0;
472 }
473 
474 static inline void
475 add_ftrace_export(struct trace_export **list, struct trace_export *export)
476 {
477 	ftrace_exports_enable(export);
478 
479 	add_trace_export(list, export);
480 }
481 
482 static inline int
483 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
484 {
485 	int ret;
486 
487 	ret = rm_trace_export(list, export);
488 	ftrace_exports_disable(export);
489 
490 	return ret;
491 }
492 
493 int register_ftrace_export(struct trace_export *export)
494 {
495 	if (WARN_ON_ONCE(!export->write))
496 		return -1;
497 
498 	guard(mutex)(&ftrace_export_lock);
499 
500 	add_ftrace_export(&ftrace_exports_list, export);
501 
502 	return 0;
503 }
504 EXPORT_SYMBOL_GPL(register_ftrace_export);
505 
506 int unregister_ftrace_export(struct trace_export *export)
507 {
508 	guard(mutex)(&ftrace_export_lock);
509 	return rm_ftrace_export(&ftrace_exports_list, export);
510 }
511 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
512 
513 /* trace_flags holds trace_options default values */
514 #define TRACE_DEFAULT_FLAGS						\
515 	(FUNCTION_DEFAULT_FLAGS |					\
516 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
517 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
518 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
519 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
520 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK |		\
521 	 TRACE_ITER_COPY_MARKER)
522 
523 /* trace_options that are only supported by global_trace */
524 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
525 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
526 
527 /* trace_flags that are default zero for instances */
528 #define ZEROED_TRACE_FLAGS \
529 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
530 	 TRACE_ITER_COPY_MARKER)
531 
532 /*
533  * The global_trace is the descriptor that holds the top-level tracing
534  * buffers for the live tracing.
535  */
536 static struct trace_array global_trace = {
537 	.trace_flags = TRACE_DEFAULT_FLAGS,
538 };
539 
540 static struct trace_array *printk_trace = &global_trace;
541 
542 /* List of trace_arrays interested in the top level trace_marker */
543 static LIST_HEAD(marker_copies);
544 
545 static __always_inline bool printk_binsafe(struct trace_array *tr)
546 {
547 	/*
548 	 * The binary format of traceprintk can cause a crash if used
549 	 * by a buffer from another boot. Force the use of the
550 	 * non binary version of trace_printk if the trace_printk
551 	 * buffer is a boot mapped ring buffer.
552 	 */
553 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
554 }
555 
556 static void update_printk_trace(struct trace_array *tr)
557 {
558 	if (printk_trace == tr)
559 		return;
560 
561 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
562 	printk_trace = tr;
563 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
564 }
565 
566 /* Returns true if the status of tr changed */
567 static bool update_marker_trace(struct trace_array *tr, int enabled)
568 {
569 	lockdep_assert_held(&event_mutex);
570 
571 	if (enabled) {
572 		if (!list_empty(&tr->marker_list))
573 			return false;
574 
575 		list_add_rcu(&tr->marker_list, &marker_copies);
576 		tr->trace_flags |= TRACE_ITER_COPY_MARKER;
577 		return true;
578 	}
579 
580 	if (list_empty(&tr->marker_list))
581 		return false;
582 
583 	list_del_init(&tr->marker_list);
584 	tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
585 	return true;
586 }
587 
588 void trace_set_ring_buffer_expanded(struct trace_array *tr)
589 {
590 	if (!tr)
591 		tr = &global_trace;
592 	tr->ring_buffer_expanded = true;
593 }
594 
595 LIST_HEAD(ftrace_trace_arrays);
596 
597 int trace_array_get(struct trace_array *this_tr)
598 {
599 	struct trace_array *tr;
600 
601 	guard(mutex)(&trace_types_lock);
602 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
603 		if (tr == this_tr) {
604 			tr->ref++;
605 			return 0;
606 		}
607 	}
608 
609 	return -ENODEV;
610 }
611 
612 static void __trace_array_put(struct trace_array *this_tr)
613 {
614 	WARN_ON(!this_tr->ref);
615 	this_tr->ref--;
616 }
617 
618 /**
619  * trace_array_put - Decrement the reference counter for this trace array.
620  * @this_tr : pointer to the trace array
621  *
622  * NOTE: Use this when we no longer need the trace array returned by
623  * trace_array_get_by_name(). This ensures the trace array can be later
624  * destroyed.
625  *
626  */
627 void trace_array_put(struct trace_array *this_tr)
628 {
629 	if (!this_tr)
630 		return;
631 
632 	guard(mutex)(&trace_types_lock);
633 	__trace_array_put(this_tr);
634 }
635 EXPORT_SYMBOL_GPL(trace_array_put);
636 
637 int tracing_check_open_get_tr(struct trace_array *tr)
638 {
639 	int ret;
640 
641 	ret = security_locked_down(LOCKDOWN_TRACEFS);
642 	if (ret)
643 		return ret;
644 
645 	if (tracing_disabled)
646 		return -ENODEV;
647 
648 	if (tr && trace_array_get(tr) < 0)
649 		return -ENODEV;
650 
651 	return 0;
652 }
653 
654 /**
655  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
656  * @filtered_pids: The list of pids to check
657  * @search_pid: The PID to find in @filtered_pids
658  *
659  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
660  */
661 bool
662 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
663 {
664 	return trace_pid_list_is_set(filtered_pids, search_pid);
665 }
666 
667 /**
668  * trace_ignore_this_task - should a task be ignored for tracing
669  * @filtered_pids: The list of pids to check
670  * @filtered_no_pids: The list of pids not to be traced
671  * @task: The task that should be ignored if not filtered
672  *
673  * Checks if @task should be traced or not from @filtered_pids.
674  * Returns true if @task should *NOT* be traced.
675  * Returns false if @task should be traced.
676  */
677 bool
678 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
679 		       struct trace_pid_list *filtered_no_pids,
680 		       struct task_struct *task)
681 {
682 	/*
683 	 * If filtered_no_pids is not empty, and the task's pid is listed
684 	 * in filtered_no_pids, then return true.
685 	 * Otherwise, if filtered_pids is empty, that means we can
686 	 * trace all tasks. If it has content, then only trace pids
687 	 * within filtered_pids.
688 	 */
689 
690 	return (filtered_pids &&
691 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
692 		(filtered_no_pids &&
693 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
694 }
695 
696 /**
697  * trace_filter_add_remove_task - Add or remove a task from a pid_list
698  * @pid_list: The list to modify
699  * @self: The current task for fork or NULL for exit
700  * @task: The task to add or remove
701  *
702  * If adding a task, if @self is defined, the task is only added if @self
703  * is also included in @pid_list. This happens on fork and tasks should
704  * only be added when the parent is listed. If @self is NULL, then the
705  * @task pid will be removed from the list, which would happen on exit
706  * of a task.
707  */
708 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
709 				  struct task_struct *self,
710 				  struct task_struct *task)
711 {
712 	if (!pid_list)
713 		return;
714 
715 	/* For forks, we only add if the forking task is listed */
716 	if (self) {
717 		if (!trace_find_filtered_pid(pid_list, self->pid))
718 			return;
719 	}
720 
721 	/* "self" is set for forks, and NULL for exits */
722 	if (self)
723 		trace_pid_list_set(pid_list, task->pid);
724 	else
725 		trace_pid_list_clear(pid_list, task->pid);
726 }
727 
728 /**
729  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
730  * @pid_list: The pid list to show
731  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
732  * @pos: The position of the file
733  *
734  * This is used by the seq_file "next" operation to iterate the pids
735  * listed in a trace_pid_list structure.
736  *
737  * Returns the pid+1 as we want to display pid of zero, but NULL would
738  * stop the iteration.
739  */
740 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
741 {
742 	long pid = (unsigned long)v;
743 	unsigned int next;
744 
745 	(*pos)++;
746 
747 	/* pid already is +1 of the actual previous bit */
748 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
749 		return NULL;
750 
751 	pid = next;
752 
753 	/* Return pid + 1 to allow zero to be represented */
754 	return (void *)(pid + 1);
755 }
756 
757 /**
758  * trace_pid_start - Used for seq_file to start reading pid lists
759  * @pid_list: The pid list to show
760  * @pos: The position of the file
761  *
762  * This is used by seq_file "start" operation to start the iteration
763  * of listing pids.
764  *
765  * Returns the pid+1 as we want to display pid of zero, but NULL would
766  * stop the iteration.
767  */
768 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
769 {
770 	unsigned long pid;
771 	unsigned int first;
772 	loff_t l = 0;
773 
774 	if (trace_pid_list_first(pid_list, &first) < 0)
775 		return NULL;
776 
777 	pid = first;
778 
779 	/* Return pid + 1 so that zero can be the exit value */
780 	for (pid++; pid && l < *pos;
781 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
782 		;
783 	return (void *)pid;
784 }
785 
786 /**
787  * trace_pid_show - show the current pid in seq_file processing
788  * @m: The seq_file structure to write into
789  * @v: A void pointer of the pid (+1) value to display
790  *
791  * Can be directly used by seq_file operations to display the current
792  * pid value.
793  */
794 int trace_pid_show(struct seq_file *m, void *v)
795 {
796 	unsigned long pid = (unsigned long)v - 1;
797 
798 	seq_printf(m, "%lu\n", pid);
799 	return 0;
800 }
801 
802 /* 128 should be much more than enough */
803 #define PID_BUF_SIZE		127
804 
805 int trace_pid_write(struct trace_pid_list *filtered_pids,
806 		    struct trace_pid_list **new_pid_list,
807 		    const char __user *ubuf, size_t cnt)
808 {
809 	struct trace_pid_list *pid_list;
810 	struct trace_parser parser;
811 	unsigned long val;
812 	int nr_pids = 0;
813 	ssize_t read = 0;
814 	ssize_t ret;
815 	loff_t pos;
816 	pid_t pid;
817 
818 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
819 		return -ENOMEM;
820 
821 	/*
822 	 * Always recreate a new array. The write is an all or nothing
823 	 * operation. Always create a new array when adding new pids by
824 	 * the user. If the operation fails, then the current list is
825 	 * not modified.
826 	 */
827 	pid_list = trace_pid_list_alloc();
828 	if (!pid_list) {
829 		trace_parser_put(&parser);
830 		return -ENOMEM;
831 	}
832 
833 	if (filtered_pids) {
834 		/* copy the current bits to the new max */
835 		ret = trace_pid_list_first(filtered_pids, &pid);
836 		while (!ret) {
837 			ret = trace_pid_list_set(pid_list, pid);
838 			if (ret < 0)
839 				goto out;
840 
841 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
842 			nr_pids++;
843 		}
844 	}
845 
846 	ret = 0;
847 	while (cnt > 0) {
848 
849 		pos = 0;
850 
851 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
852 		if (ret < 0)
853 			break;
854 
855 		read += ret;
856 		ubuf += ret;
857 		cnt -= ret;
858 
859 		if (!trace_parser_loaded(&parser))
860 			break;
861 
862 		ret = -EINVAL;
863 		if (kstrtoul(parser.buffer, 0, &val))
864 			break;
865 
866 		pid = (pid_t)val;
867 
868 		if (trace_pid_list_set(pid_list, pid) < 0) {
869 			ret = -1;
870 			break;
871 		}
872 		nr_pids++;
873 
874 		trace_parser_clear(&parser);
875 		ret = 0;
876 	}
877  out:
878 	trace_parser_put(&parser);
879 
880 	if (ret < 0) {
881 		trace_pid_list_free(pid_list);
882 		return ret;
883 	}
884 
885 	if (!nr_pids) {
886 		/* Cleared the list of pids */
887 		trace_pid_list_free(pid_list);
888 		pid_list = NULL;
889 	}
890 
891 	*new_pid_list = pid_list;
892 
893 	return read;
894 }
895 
896 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
897 {
898 	u64 ts;
899 
900 	/* Early boot up does not have a buffer yet */
901 	if (!buf->buffer)
902 		return trace_clock_local();
903 
904 	ts = ring_buffer_time_stamp(buf->buffer);
905 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
906 
907 	return ts;
908 }
909 
910 u64 ftrace_now(int cpu)
911 {
912 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
913 }
914 
915 /**
916  * tracing_is_enabled - Show if global_trace has been enabled
917  *
918  * Shows if the global trace has been enabled or not. It uses the
919  * mirror flag "buffer_disabled" to be used in fast paths such as for
920  * the irqsoff tracer. But it may be inaccurate due to races. If you
921  * need to know the accurate state, use tracing_is_on() which is a little
922  * slower, but accurate.
923  */
924 int tracing_is_enabled(void)
925 {
926 	/*
927 	 * For quick access (irqsoff uses this in fast path), just
928 	 * return the mirror variable of the state of the ring buffer.
929 	 * It's a little racy, but we don't really care.
930 	 */
931 	return !global_trace.buffer_disabled;
932 }
933 
934 /*
935  * trace_buf_size is the size in bytes that is allocated
936  * for a buffer. Note, the number of bytes is always rounded
937  * to page size.
938  *
939  * This number is purposely set to a low number of 16384.
940  * If the dump on oops happens, it will be much appreciated
941  * to not have to wait for all that output. Anyway this can be
942  * boot time and run time configurable.
943  */
944 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
945 
946 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
947 
948 /* trace_types holds a link list of available tracers. */
949 static struct tracer		*trace_types __read_mostly;
950 
951 /*
952  * trace_types_lock is used to protect the trace_types list.
953  */
954 DEFINE_MUTEX(trace_types_lock);
955 
956 /*
957  * serialize the access of the ring buffer
958  *
959  * ring buffer serializes readers, but it is low level protection.
960  * The validity of the events (which returns by ring_buffer_peek() ..etc)
961  * are not protected by ring buffer.
962  *
963  * The content of events may become garbage if we allow other process consumes
964  * these events concurrently:
965  *   A) the page of the consumed events may become a normal page
966  *      (not reader page) in ring buffer, and this page will be rewritten
967  *      by events producer.
968  *   B) The page of the consumed events may become a page for splice_read,
969  *      and this page will be returned to system.
970  *
971  * These primitives allow multi process access to different cpu ring buffer
972  * concurrently.
973  *
974  * These primitives don't distinguish read-only and read-consume access.
975  * Multi read-only access are also serialized.
976  */
977 
978 #ifdef CONFIG_SMP
979 static DECLARE_RWSEM(all_cpu_access_lock);
980 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
981 
982 static inline void trace_access_lock(int cpu)
983 {
984 	if (cpu == RING_BUFFER_ALL_CPUS) {
985 		/* gain it for accessing the whole ring buffer. */
986 		down_write(&all_cpu_access_lock);
987 	} else {
988 		/* gain it for accessing a cpu ring buffer. */
989 
990 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
991 		down_read(&all_cpu_access_lock);
992 
993 		/* Secondly block other access to this @cpu ring buffer. */
994 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
995 	}
996 }
997 
998 static inline void trace_access_unlock(int cpu)
999 {
1000 	if (cpu == RING_BUFFER_ALL_CPUS) {
1001 		up_write(&all_cpu_access_lock);
1002 	} else {
1003 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1004 		up_read(&all_cpu_access_lock);
1005 	}
1006 }
1007 
1008 static inline void trace_access_lock_init(void)
1009 {
1010 	int cpu;
1011 
1012 	for_each_possible_cpu(cpu)
1013 		mutex_init(&per_cpu(cpu_access_lock, cpu));
1014 }
1015 
1016 #else
1017 
1018 static DEFINE_MUTEX(access_lock);
1019 
1020 static inline void trace_access_lock(int cpu)
1021 {
1022 	(void)cpu;
1023 	mutex_lock(&access_lock);
1024 }
1025 
1026 static inline void trace_access_unlock(int cpu)
1027 {
1028 	(void)cpu;
1029 	mutex_unlock(&access_lock);
1030 }
1031 
1032 static inline void trace_access_lock_init(void)
1033 {
1034 }
1035 
1036 #endif
1037 
1038 #ifdef CONFIG_STACKTRACE
1039 static void __ftrace_trace_stack(struct trace_array *tr,
1040 				 struct trace_buffer *buffer,
1041 				 unsigned int trace_ctx,
1042 				 int skip, struct pt_regs *regs);
1043 static inline void ftrace_trace_stack(struct trace_array *tr,
1044 				      struct trace_buffer *buffer,
1045 				      unsigned int trace_ctx,
1046 				      int skip, struct pt_regs *regs);
1047 
1048 #else
1049 static inline void __ftrace_trace_stack(struct trace_array *tr,
1050 					struct trace_buffer *buffer,
1051 					unsigned int trace_ctx,
1052 					int skip, struct pt_regs *regs)
1053 {
1054 }
1055 static inline void ftrace_trace_stack(struct trace_array *tr,
1056 				      struct trace_buffer *buffer,
1057 				      unsigned long trace_ctx,
1058 				      int skip, struct pt_regs *regs)
1059 {
1060 }
1061 
1062 #endif
1063 
1064 static __always_inline void
1065 trace_event_setup(struct ring_buffer_event *event,
1066 		  int type, unsigned int trace_ctx)
1067 {
1068 	struct trace_entry *ent = ring_buffer_event_data(event);
1069 
1070 	tracing_generic_entry_update(ent, type, trace_ctx);
1071 }
1072 
1073 static __always_inline struct ring_buffer_event *
1074 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1075 			  int type,
1076 			  unsigned long len,
1077 			  unsigned int trace_ctx)
1078 {
1079 	struct ring_buffer_event *event;
1080 
1081 	event = ring_buffer_lock_reserve(buffer, len);
1082 	if (event != NULL)
1083 		trace_event_setup(event, type, trace_ctx);
1084 
1085 	return event;
1086 }
1087 
1088 void tracer_tracing_on(struct trace_array *tr)
1089 {
1090 	if (tr->array_buffer.buffer)
1091 		ring_buffer_record_on(tr->array_buffer.buffer);
1092 	/*
1093 	 * This flag is looked at when buffers haven't been allocated
1094 	 * yet, or by some tracers (like irqsoff), that just want to
1095 	 * know if the ring buffer has been disabled, but it can handle
1096 	 * races of where it gets disabled but we still do a record.
1097 	 * As the check is in the fast path of the tracers, it is more
1098 	 * important to be fast than accurate.
1099 	 */
1100 	tr->buffer_disabled = 0;
1101 }
1102 
1103 /**
1104  * tracing_on - enable tracing buffers
1105  *
1106  * This function enables tracing buffers that may have been
1107  * disabled with tracing_off.
1108  */
1109 void tracing_on(void)
1110 {
1111 	tracer_tracing_on(&global_trace);
1112 }
1113 EXPORT_SYMBOL_GPL(tracing_on);
1114 
1115 
1116 static __always_inline void
1117 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1118 {
1119 	__this_cpu_write(trace_taskinfo_save, true);
1120 
1121 	/* If this is the temp buffer, we need to commit fully */
1122 	if (this_cpu_read(trace_buffered_event) == event) {
1123 		/* Length is in event->array[0] */
1124 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1125 		/* Release the temp buffer */
1126 		this_cpu_dec(trace_buffered_event_cnt);
1127 		/* ring_buffer_unlock_commit() enables preemption */
1128 		preempt_enable_notrace();
1129 	} else
1130 		ring_buffer_unlock_commit(buffer);
1131 }
1132 
1133 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1134 		       const char *str, int size)
1135 {
1136 	struct ring_buffer_event *event;
1137 	struct trace_buffer *buffer;
1138 	struct print_entry *entry;
1139 	unsigned int trace_ctx;
1140 	int alloc;
1141 
1142 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1143 		return 0;
1144 
1145 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1146 		return 0;
1147 
1148 	if (unlikely(tracing_disabled))
1149 		return 0;
1150 
1151 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1152 
1153 	trace_ctx = tracing_gen_ctx();
1154 	buffer = tr->array_buffer.buffer;
1155 	guard(ring_buffer_nest)(buffer);
1156 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1157 					    trace_ctx);
1158 	if (!event)
1159 		return 0;
1160 
1161 	entry = ring_buffer_event_data(event);
1162 	entry->ip = ip;
1163 
1164 	memcpy(&entry->buf, str, size);
1165 
1166 	/* Add a newline if necessary */
1167 	if (entry->buf[size - 1] != '\n') {
1168 		entry->buf[size] = '\n';
1169 		entry->buf[size + 1] = '\0';
1170 	} else
1171 		entry->buf[size] = '\0';
1172 
1173 	__buffer_unlock_commit(buffer, event);
1174 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1175 	return size;
1176 }
1177 EXPORT_SYMBOL_GPL(__trace_array_puts);
1178 
1179 /**
1180  * __trace_puts - write a constant string into the trace buffer.
1181  * @ip:	   The address of the caller
1182  * @str:   The constant string to write
1183  * @size:  The size of the string.
1184  */
1185 int __trace_puts(unsigned long ip, const char *str, int size)
1186 {
1187 	return __trace_array_puts(printk_trace, ip, str, size);
1188 }
1189 EXPORT_SYMBOL_GPL(__trace_puts);
1190 
1191 /**
1192  * __trace_bputs - write the pointer to a constant string into trace buffer
1193  * @ip:	   The address of the caller
1194  * @str:   The constant string to write to the buffer to
1195  */
1196 int __trace_bputs(unsigned long ip, const char *str)
1197 {
1198 	struct trace_array *tr = READ_ONCE(printk_trace);
1199 	struct ring_buffer_event *event;
1200 	struct trace_buffer *buffer;
1201 	struct bputs_entry *entry;
1202 	unsigned int trace_ctx;
1203 	int size = sizeof(struct bputs_entry);
1204 
1205 	if (!printk_binsafe(tr))
1206 		return __trace_puts(ip, str, strlen(str));
1207 
1208 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1209 		return 0;
1210 
1211 	if (unlikely(tracing_selftest_running || tracing_disabled))
1212 		return 0;
1213 
1214 	trace_ctx = tracing_gen_ctx();
1215 	buffer = tr->array_buffer.buffer;
1216 
1217 	guard(ring_buffer_nest)(buffer);
1218 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1219 					    trace_ctx);
1220 	if (!event)
1221 		return 0;
1222 
1223 	entry = ring_buffer_event_data(event);
1224 	entry->ip			= ip;
1225 	entry->str			= str;
1226 
1227 	__buffer_unlock_commit(buffer, event);
1228 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1229 
1230 	return 1;
1231 }
1232 EXPORT_SYMBOL_GPL(__trace_bputs);
1233 
1234 #ifdef CONFIG_TRACER_SNAPSHOT
1235 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1236 					   void *cond_data)
1237 {
1238 	struct tracer *tracer = tr->current_trace;
1239 	unsigned long flags;
1240 
1241 	if (in_nmi()) {
1242 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1243 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1244 		return;
1245 	}
1246 
1247 	if (!tr->allocated_snapshot) {
1248 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1249 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1250 		tracer_tracing_off(tr);
1251 		return;
1252 	}
1253 
1254 	/* Note, snapshot can not be used when the tracer uses it */
1255 	if (tracer->use_max_tr) {
1256 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1257 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1258 		return;
1259 	}
1260 
1261 	if (tr->mapped) {
1262 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1263 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1264 		return;
1265 	}
1266 
1267 	local_irq_save(flags);
1268 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1269 	local_irq_restore(flags);
1270 }
1271 
1272 void tracing_snapshot_instance(struct trace_array *tr)
1273 {
1274 	tracing_snapshot_instance_cond(tr, NULL);
1275 }
1276 
1277 /**
1278  * tracing_snapshot - take a snapshot of the current buffer.
1279  *
1280  * This causes a swap between the snapshot buffer and the current live
1281  * tracing buffer. You can use this to take snapshots of the live
1282  * trace when some condition is triggered, but continue to trace.
1283  *
1284  * Note, make sure to allocate the snapshot with either
1285  * a tracing_snapshot_alloc(), or by doing it manually
1286  * with: echo 1 > /sys/kernel/tracing/snapshot
1287  *
1288  * If the snapshot buffer is not allocated, it will stop tracing.
1289  * Basically making a permanent snapshot.
1290  */
1291 void tracing_snapshot(void)
1292 {
1293 	struct trace_array *tr = &global_trace;
1294 
1295 	tracing_snapshot_instance(tr);
1296 }
1297 EXPORT_SYMBOL_GPL(tracing_snapshot);
1298 
1299 /**
1300  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1301  * @tr:		The tracing instance to snapshot
1302  * @cond_data:	The data to be tested conditionally, and possibly saved
1303  *
1304  * This is the same as tracing_snapshot() except that the snapshot is
1305  * conditional - the snapshot will only happen if the
1306  * cond_snapshot.update() implementation receiving the cond_data
1307  * returns true, which means that the trace array's cond_snapshot
1308  * update() operation used the cond_data to determine whether the
1309  * snapshot should be taken, and if it was, presumably saved it along
1310  * with the snapshot.
1311  */
1312 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1313 {
1314 	tracing_snapshot_instance_cond(tr, cond_data);
1315 }
1316 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1317 
1318 /**
1319  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1320  * @tr:		The tracing instance
1321  *
1322  * When the user enables a conditional snapshot using
1323  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1324  * with the snapshot.  This accessor is used to retrieve it.
1325  *
1326  * Should not be called from cond_snapshot.update(), since it takes
1327  * the tr->max_lock lock, which the code calling
1328  * cond_snapshot.update() has already done.
1329  *
1330  * Returns the cond_data associated with the trace array's snapshot.
1331  */
1332 void *tracing_cond_snapshot_data(struct trace_array *tr)
1333 {
1334 	void *cond_data = NULL;
1335 
1336 	local_irq_disable();
1337 	arch_spin_lock(&tr->max_lock);
1338 
1339 	if (tr->cond_snapshot)
1340 		cond_data = tr->cond_snapshot->cond_data;
1341 
1342 	arch_spin_unlock(&tr->max_lock);
1343 	local_irq_enable();
1344 
1345 	return cond_data;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1348 
1349 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1350 					struct array_buffer *size_buf, int cpu_id);
1351 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1352 
1353 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1354 {
1355 	int order;
1356 	int ret;
1357 
1358 	if (!tr->allocated_snapshot) {
1359 
1360 		/* Make the snapshot buffer have the same order as main buffer */
1361 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1362 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1363 		if (ret < 0)
1364 			return ret;
1365 
1366 		/* allocate spare buffer */
1367 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1368 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1369 		if (ret < 0)
1370 			return ret;
1371 
1372 		tr->allocated_snapshot = true;
1373 	}
1374 
1375 	return 0;
1376 }
1377 
1378 static void free_snapshot(struct trace_array *tr)
1379 {
1380 	/*
1381 	 * We don't free the ring buffer. instead, resize it because
1382 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1383 	 * we want preserve it.
1384 	 */
1385 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1386 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1387 	set_buffer_entries(&tr->max_buffer, 1);
1388 	tracing_reset_online_cpus(&tr->max_buffer);
1389 	tr->allocated_snapshot = false;
1390 }
1391 
1392 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1393 {
1394 	int ret;
1395 
1396 	lockdep_assert_held(&trace_types_lock);
1397 
1398 	spin_lock(&tr->snapshot_trigger_lock);
1399 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1400 		spin_unlock(&tr->snapshot_trigger_lock);
1401 		return -EBUSY;
1402 	}
1403 
1404 	tr->snapshot++;
1405 	spin_unlock(&tr->snapshot_trigger_lock);
1406 
1407 	ret = tracing_alloc_snapshot_instance(tr);
1408 	if (ret) {
1409 		spin_lock(&tr->snapshot_trigger_lock);
1410 		tr->snapshot--;
1411 		spin_unlock(&tr->snapshot_trigger_lock);
1412 	}
1413 
1414 	return ret;
1415 }
1416 
1417 int tracing_arm_snapshot(struct trace_array *tr)
1418 {
1419 	guard(mutex)(&trace_types_lock);
1420 	return tracing_arm_snapshot_locked(tr);
1421 }
1422 
1423 void tracing_disarm_snapshot(struct trace_array *tr)
1424 {
1425 	spin_lock(&tr->snapshot_trigger_lock);
1426 	if (!WARN_ON(!tr->snapshot))
1427 		tr->snapshot--;
1428 	spin_unlock(&tr->snapshot_trigger_lock);
1429 }
1430 
1431 /**
1432  * tracing_alloc_snapshot - allocate snapshot buffer.
1433  *
1434  * This only allocates the snapshot buffer if it isn't already
1435  * allocated - it doesn't also take a snapshot.
1436  *
1437  * This is meant to be used in cases where the snapshot buffer needs
1438  * to be set up for events that can't sleep but need to be able to
1439  * trigger a snapshot.
1440  */
1441 int tracing_alloc_snapshot(void)
1442 {
1443 	struct trace_array *tr = &global_trace;
1444 	int ret;
1445 
1446 	ret = tracing_alloc_snapshot_instance(tr);
1447 	WARN_ON(ret < 0);
1448 
1449 	return ret;
1450 }
1451 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1452 
1453 /**
1454  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1455  *
1456  * This is similar to tracing_snapshot(), but it will allocate the
1457  * snapshot buffer if it isn't already allocated. Use this only
1458  * where it is safe to sleep, as the allocation may sleep.
1459  *
1460  * This causes a swap between the snapshot buffer and the current live
1461  * tracing buffer. You can use this to take snapshots of the live
1462  * trace when some condition is triggered, but continue to trace.
1463  */
1464 void tracing_snapshot_alloc(void)
1465 {
1466 	int ret;
1467 
1468 	ret = tracing_alloc_snapshot();
1469 	if (ret < 0)
1470 		return;
1471 
1472 	tracing_snapshot();
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1475 
1476 /**
1477  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1478  * @tr:		The tracing instance
1479  * @cond_data:	User data to associate with the snapshot
1480  * @update:	Implementation of the cond_snapshot update function
1481  *
1482  * Check whether the conditional snapshot for the given instance has
1483  * already been enabled, or if the current tracer is already using a
1484  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1485  * save the cond_data and update function inside.
1486  *
1487  * Returns 0 if successful, error otherwise.
1488  */
1489 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1490 				 cond_update_fn_t update)
1491 {
1492 	struct cond_snapshot *cond_snapshot __free(kfree) =
1493 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1494 	int ret;
1495 
1496 	if (!cond_snapshot)
1497 		return -ENOMEM;
1498 
1499 	cond_snapshot->cond_data = cond_data;
1500 	cond_snapshot->update = update;
1501 
1502 	guard(mutex)(&trace_types_lock);
1503 
1504 	if (tr->current_trace->use_max_tr)
1505 		return -EBUSY;
1506 
1507 	/*
1508 	 * The cond_snapshot can only change to NULL without the
1509 	 * trace_types_lock. We don't care if we race with it going
1510 	 * to NULL, but we want to make sure that it's not set to
1511 	 * something other than NULL when we get here, which we can
1512 	 * do safely with only holding the trace_types_lock and not
1513 	 * having to take the max_lock.
1514 	 */
1515 	if (tr->cond_snapshot)
1516 		return -EBUSY;
1517 
1518 	ret = tracing_arm_snapshot_locked(tr);
1519 	if (ret)
1520 		return ret;
1521 
1522 	local_irq_disable();
1523 	arch_spin_lock(&tr->max_lock);
1524 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1525 	arch_spin_unlock(&tr->max_lock);
1526 	local_irq_enable();
1527 
1528 	return 0;
1529 }
1530 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1531 
1532 /**
1533  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1534  * @tr:		The tracing instance
1535  *
1536  * Check whether the conditional snapshot for the given instance is
1537  * enabled; if so, free the cond_snapshot associated with it,
1538  * otherwise return -EINVAL.
1539  *
1540  * Returns 0 if successful, error otherwise.
1541  */
1542 int tracing_snapshot_cond_disable(struct trace_array *tr)
1543 {
1544 	int ret = 0;
1545 
1546 	local_irq_disable();
1547 	arch_spin_lock(&tr->max_lock);
1548 
1549 	if (!tr->cond_snapshot)
1550 		ret = -EINVAL;
1551 	else {
1552 		kfree(tr->cond_snapshot);
1553 		tr->cond_snapshot = NULL;
1554 	}
1555 
1556 	arch_spin_unlock(&tr->max_lock);
1557 	local_irq_enable();
1558 
1559 	tracing_disarm_snapshot(tr);
1560 
1561 	return ret;
1562 }
1563 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1564 #else
1565 void tracing_snapshot(void)
1566 {
1567 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1568 }
1569 EXPORT_SYMBOL_GPL(tracing_snapshot);
1570 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1571 {
1572 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1573 }
1574 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1575 int tracing_alloc_snapshot(void)
1576 {
1577 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1578 	return -ENODEV;
1579 }
1580 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1581 void tracing_snapshot_alloc(void)
1582 {
1583 	/* Give warning */
1584 	tracing_snapshot();
1585 }
1586 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1587 void *tracing_cond_snapshot_data(struct trace_array *tr)
1588 {
1589 	return NULL;
1590 }
1591 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1592 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1593 {
1594 	return -ENODEV;
1595 }
1596 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1597 int tracing_snapshot_cond_disable(struct trace_array *tr)
1598 {
1599 	return false;
1600 }
1601 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1602 #define free_snapshot(tr)	do { } while (0)
1603 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1604 #endif /* CONFIG_TRACER_SNAPSHOT */
1605 
1606 void tracer_tracing_off(struct trace_array *tr)
1607 {
1608 	if (tr->array_buffer.buffer)
1609 		ring_buffer_record_off(tr->array_buffer.buffer);
1610 	/*
1611 	 * This flag is looked at when buffers haven't been allocated
1612 	 * yet, or by some tracers (like irqsoff), that just want to
1613 	 * know if the ring buffer has been disabled, but it can handle
1614 	 * races of where it gets disabled but we still do a record.
1615 	 * As the check is in the fast path of the tracers, it is more
1616 	 * important to be fast than accurate.
1617 	 */
1618 	tr->buffer_disabled = 1;
1619 }
1620 
1621 /**
1622  * tracer_tracing_disable() - temporary disable the buffer from write
1623  * @tr: The trace array to disable its buffer for
1624  *
1625  * Expects trace_tracing_enable() to re-enable tracing.
1626  * The difference between this and tracer_tracing_off() is that this
1627  * is a counter and can nest, whereas, tracer_tracing_off() can
1628  * be called multiple times and a single trace_tracing_on() will
1629  * enable it.
1630  */
1631 void tracer_tracing_disable(struct trace_array *tr)
1632 {
1633 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1634 		return;
1635 
1636 	ring_buffer_record_disable(tr->array_buffer.buffer);
1637 }
1638 
1639 /**
1640  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1641  * @tr: The trace array that had tracer_tracincg_disable() called on it
1642  *
1643  * This is called after tracer_tracing_disable() has been called on @tr,
1644  * when it's safe to re-enable tracing.
1645  */
1646 void tracer_tracing_enable(struct trace_array *tr)
1647 {
1648 	if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1649 		return;
1650 
1651 	ring_buffer_record_enable(tr->array_buffer.buffer);
1652 }
1653 
1654 /**
1655  * tracing_off - turn off tracing buffers
1656  *
1657  * This function stops the tracing buffers from recording data.
1658  * It does not disable any overhead the tracers themselves may
1659  * be causing. This function simply causes all recording to
1660  * the ring buffers to fail.
1661  */
1662 void tracing_off(void)
1663 {
1664 	tracer_tracing_off(&global_trace);
1665 }
1666 EXPORT_SYMBOL_GPL(tracing_off);
1667 
1668 void disable_trace_on_warning(void)
1669 {
1670 	if (__disable_trace_on_warning) {
1671 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1672 			"Disabling tracing due to warning\n");
1673 		tracing_off();
1674 	}
1675 }
1676 
1677 /**
1678  * tracer_tracing_is_on - show real state of ring buffer enabled
1679  * @tr : the trace array to know if ring buffer is enabled
1680  *
1681  * Shows real state of the ring buffer if it is enabled or not.
1682  */
1683 bool tracer_tracing_is_on(struct trace_array *tr)
1684 {
1685 	if (tr->array_buffer.buffer)
1686 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1687 	return !tr->buffer_disabled;
1688 }
1689 
1690 /**
1691  * tracing_is_on - show state of ring buffers enabled
1692  */
1693 int tracing_is_on(void)
1694 {
1695 	return tracer_tracing_is_on(&global_trace);
1696 }
1697 EXPORT_SYMBOL_GPL(tracing_is_on);
1698 
1699 static int __init set_buf_size(char *str)
1700 {
1701 	unsigned long buf_size;
1702 
1703 	if (!str)
1704 		return 0;
1705 	buf_size = memparse(str, &str);
1706 	/*
1707 	 * nr_entries can not be zero and the startup
1708 	 * tests require some buffer space. Therefore
1709 	 * ensure we have at least 4096 bytes of buffer.
1710 	 */
1711 	trace_buf_size = max(4096UL, buf_size);
1712 	return 1;
1713 }
1714 __setup("trace_buf_size=", set_buf_size);
1715 
1716 static int __init set_tracing_thresh(char *str)
1717 {
1718 	unsigned long threshold;
1719 	int ret;
1720 
1721 	if (!str)
1722 		return 0;
1723 	ret = kstrtoul(str, 0, &threshold);
1724 	if (ret < 0)
1725 		return 0;
1726 	tracing_thresh = threshold * 1000;
1727 	return 1;
1728 }
1729 __setup("tracing_thresh=", set_tracing_thresh);
1730 
1731 unsigned long nsecs_to_usecs(unsigned long nsecs)
1732 {
1733 	return nsecs / 1000;
1734 }
1735 
1736 /*
1737  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1738  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1739  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1740  * of strings in the order that the evals (enum) were defined.
1741  */
1742 #undef C
1743 #define C(a, b) b
1744 
1745 /* These must match the bit positions in trace_iterator_flags */
1746 static const char *trace_options[] = {
1747 	TRACE_FLAGS
1748 	NULL
1749 };
1750 
1751 static struct {
1752 	u64 (*func)(void);
1753 	const char *name;
1754 	int in_ns;		/* is this clock in nanoseconds? */
1755 } trace_clocks[] = {
1756 	{ trace_clock_local,		"local",	1 },
1757 	{ trace_clock_global,		"global",	1 },
1758 	{ trace_clock_counter,		"counter",	0 },
1759 	{ trace_clock_jiffies,		"uptime",	0 },
1760 	{ trace_clock,			"perf",		1 },
1761 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1762 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1763 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1764 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1765 	ARCH_TRACE_CLOCKS
1766 };
1767 
1768 bool trace_clock_in_ns(struct trace_array *tr)
1769 {
1770 	if (trace_clocks[tr->clock_id].in_ns)
1771 		return true;
1772 
1773 	return false;
1774 }
1775 
1776 /*
1777  * trace_parser_get_init - gets the buffer for trace parser
1778  */
1779 int trace_parser_get_init(struct trace_parser *parser, int size)
1780 {
1781 	memset(parser, 0, sizeof(*parser));
1782 
1783 	parser->buffer = kmalloc(size, GFP_KERNEL);
1784 	if (!parser->buffer)
1785 		return 1;
1786 
1787 	parser->size = size;
1788 	return 0;
1789 }
1790 
1791 /*
1792  * trace_parser_put - frees the buffer for trace parser
1793  */
1794 void trace_parser_put(struct trace_parser *parser)
1795 {
1796 	kfree(parser->buffer);
1797 	parser->buffer = NULL;
1798 }
1799 
1800 /*
1801  * trace_get_user - reads the user input string separated by  space
1802  * (matched by isspace(ch))
1803  *
1804  * For each string found the 'struct trace_parser' is updated,
1805  * and the function returns.
1806  *
1807  * Returns number of bytes read.
1808  *
1809  * See kernel/trace/trace.h for 'struct trace_parser' details.
1810  */
1811 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1812 	size_t cnt, loff_t *ppos)
1813 {
1814 	char ch;
1815 	size_t read = 0;
1816 	ssize_t ret;
1817 
1818 	if (!*ppos)
1819 		trace_parser_clear(parser);
1820 
1821 	ret = get_user(ch, ubuf++);
1822 	if (ret)
1823 		goto fail;
1824 
1825 	read++;
1826 	cnt--;
1827 
1828 	/*
1829 	 * The parser is not finished with the last write,
1830 	 * continue reading the user input without skipping spaces.
1831 	 */
1832 	if (!parser->cont) {
1833 		/* skip white space */
1834 		while (cnt && isspace(ch)) {
1835 			ret = get_user(ch, ubuf++);
1836 			if (ret)
1837 				goto fail;
1838 			read++;
1839 			cnt--;
1840 		}
1841 
1842 		parser->idx = 0;
1843 
1844 		/* only spaces were written */
1845 		if (isspace(ch) || !ch) {
1846 			*ppos += read;
1847 			return read;
1848 		}
1849 	}
1850 
1851 	/* read the non-space input */
1852 	while (cnt && !isspace(ch) && ch) {
1853 		if (parser->idx < parser->size - 1)
1854 			parser->buffer[parser->idx++] = ch;
1855 		else {
1856 			ret = -EINVAL;
1857 			goto fail;
1858 		}
1859 
1860 		ret = get_user(ch, ubuf++);
1861 		if (ret)
1862 			goto fail;
1863 		read++;
1864 		cnt--;
1865 	}
1866 
1867 	/* We either got finished input or we have to wait for another call. */
1868 	if (isspace(ch) || !ch) {
1869 		parser->buffer[parser->idx] = 0;
1870 		parser->cont = false;
1871 	} else if (parser->idx < parser->size - 1) {
1872 		parser->cont = true;
1873 		parser->buffer[parser->idx++] = ch;
1874 		/* Make sure the parsed string always terminates with '\0'. */
1875 		parser->buffer[parser->idx] = 0;
1876 	} else {
1877 		ret = -EINVAL;
1878 		goto fail;
1879 	}
1880 
1881 	*ppos += read;
1882 	return read;
1883 fail:
1884 	trace_parser_fail(parser);
1885 	return ret;
1886 }
1887 
1888 /* TODO add a seq_buf_to_buffer() */
1889 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1890 {
1891 	int len;
1892 
1893 	if (trace_seq_used(s) <= s->readpos)
1894 		return -EBUSY;
1895 
1896 	len = trace_seq_used(s) - s->readpos;
1897 	if (cnt > len)
1898 		cnt = len;
1899 	memcpy(buf, s->buffer + s->readpos, cnt);
1900 
1901 	s->readpos += cnt;
1902 	return cnt;
1903 }
1904 
1905 unsigned long __read_mostly	tracing_thresh;
1906 
1907 #ifdef CONFIG_TRACER_MAX_TRACE
1908 static const struct file_operations tracing_max_lat_fops;
1909 
1910 #ifdef LATENCY_FS_NOTIFY
1911 
1912 static struct workqueue_struct *fsnotify_wq;
1913 
1914 static void latency_fsnotify_workfn(struct work_struct *work)
1915 {
1916 	struct trace_array *tr = container_of(work, struct trace_array,
1917 					      fsnotify_work);
1918 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1919 }
1920 
1921 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1922 {
1923 	struct trace_array *tr = container_of(iwork, struct trace_array,
1924 					      fsnotify_irqwork);
1925 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1926 }
1927 
1928 static void trace_create_maxlat_file(struct trace_array *tr,
1929 				     struct dentry *d_tracer)
1930 {
1931 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1932 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1933 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1934 					      TRACE_MODE_WRITE,
1935 					      d_tracer, tr,
1936 					      &tracing_max_lat_fops);
1937 }
1938 
1939 __init static int latency_fsnotify_init(void)
1940 {
1941 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1942 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1943 	if (!fsnotify_wq) {
1944 		pr_err("Unable to allocate tr_max_lat_wq\n");
1945 		return -ENOMEM;
1946 	}
1947 	return 0;
1948 }
1949 
1950 late_initcall_sync(latency_fsnotify_init);
1951 
1952 void latency_fsnotify(struct trace_array *tr)
1953 {
1954 	if (!fsnotify_wq)
1955 		return;
1956 	/*
1957 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1958 	 * possible that we are called from __schedule() or do_idle(), which
1959 	 * could cause a deadlock.
1960 	 */
1961 	irq_work_queue(&tr->fsnotify_irqwork);
1962 }
1963 
1964 #else /* !LATENCY_FS_NOTIFY */
1965 
1966 #define trace_create_maxlat_file(tr, d_tracer)				\
1967 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1968 			  d_tracer, tr, &tracing_max_lat_fops)
1969 
1970 #endif
1971 
1972 /*
1973  * Copy the new maximum trace into the separate maximum-trace
1974  * structure. (this way the maximum trace is permanently saved,
1975  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1976  */
1977 static void
1978 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1979 {
1980 	struct array_buffer *trace_buf = &tr->array_buffer;
1981 	struct array_buffer *max_buf = &tr->max_buffer;
1982 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1983 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1984 
1985 	max_buf->cpu = cpu;
1986 	max_buf->time_start = data->preempt_timestamp;
1987 
1988 	max_data->saved_latency = tr->max_latency;
1989 	max_data->critical_start = data->critical_start;
1990 	max_data->critical_end = data->critical_end;
1991 
1992 	strscpy(max_data->comm, tsk->comm);
1993 	max_data->pid = tsk->pid;
1994 	/*
1995 	 * If tsk == current, then use current_uid(), as that does not use
1996 	 * RCU. The irq tracer can be called out of RCU scope.
1997 	 */
1998 	if (tsk == current)
1999 		max_data->uid = current_uid();
2000 	else
2001 		max_data->uid = task_uid(tsk);
2002 
2003 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2004 	max_data->policy = tsk->policy;
2005 	max_data->rt_priority = tsk->rt_priority;
2006 
2007 	/* record this tasks comm */
2008 	tracing_record_cmdline(tsk);
2009 	latency_fsnotify(tr);
2010 }
2011 
2012 /**
2013  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2014  * @tr: tracer
2015  * @tsk: the task with the latency
2016  * @cpu: The cpu that initiated the trace.
2017  * @cond_data: User data associated with a conditional snapshot
2018  *
2019  * Flip the buffers between the @tr and the max_tr and record information
2020  * about which task was the cause of this latency.
2021  */
2022 void
2023 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2024 	      void *cond_data)
2025 {
2026 	if (tr->stop_count)
2027 		return;
2028 
2029 	WARN_ON_ONCE(!irqs_disabled());
2030 
2031 	if (!tr->allocated_snapshot) {
2032 		/* Only the nop tracer should hit this when disabling */
2033 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2034 		return;
2035 	}
2036 
2037 	arch_spin_lock(&tr->max_lock);
2038 
2039 	/* Inherit the recordable setting from array_buffer */
2040 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2041 		ring_buffer_record_on(tr->max_buffer.buffer);
2042 	else
2043 		ring_buffer_record_off(tr->max_buffer.buffer);
2044 
2045 #ifdef CONFIG_TRACER_SNAPSHOT
2046 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2047 		arch_spin_unlock(&tr->max_lock);
2048 		return;
2049 	}
2050 #endif
2051 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2052 
2053 	__update_max_tr(tr, tsk, cpu);
2054 
2055 	arch_spin_unlock(&tr->max_lock);
2056 
2057 	/* Any waiters on the old snapshot buffer need to wake up */
2058 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2059 }
2060 
2061 /**
2062  * update_max_tr_single - only copy one trace over, and reset the rest
2063  * @tr: tracer
2064  * @tsk: task with the latency
2065  * @cpu: the cpu of the buffer to copy.
2066  *
2067  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2068  */
2069 void
2070 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2071 {
2072 	int ret;
2073 
2074 	if (tr->stop_count)
2075 		return;
2076 
2077 	WARN_ON_ONCE(!irqs_disabled());
2078 	if (!tr->allocated_snapshot) {
2079 		/* Only the nop tracer should hit this when disabling */
2080 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2081 		return;
2082 	}
2083 
2084 	arch_spin_lock(&tr->max_lock);
2085 
2086 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2087 
2088 	if (ret == -EBUSY) {
2089 		/*
2090 		 * We failed to swap the buffer due to a commit taking
2091 		 * place on this CPU. We fail to record, but we reset
2092 		 * the max trace buffer (no one writes directly to it)
2093 		 * and flag that it failed.
2094 		 * Another reason is resize is in progress.
2095 		 */
2096 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2097 			"Failed to swap buffers due to commit or resize in progress\n");
2098 	}
2099 
2100 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2101 
2102 	__update_max_tr(tr, tsk, cpu);
2103 	arch_spin_unlock(&tr->max_lock);
2104 }
2105 
2106 #endif /* CONFIG_TRACER_MAX_TRACE */
2107 
2108 struct pipe_wait {
2109 	struct trace_iterator		*iter;
2110 	int				wait_index;
2111 };
2112 
2113 static bool wait_pipe_cond(void *data)
2114 {
2115 	struct pipe_wait *pwait = data;
2116 	struct trace_iterator *iter = pwait->iter;
2117 
2118 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2119 		return true;
2120 
2121 	return iter->closed;
2122 }
2123 
2124 static int wait_on_pipe(struct trace_iterator *iter, int full)
2125 {
2126 	struct pipe_wait pwait;
2127 	int ret;
2128 
2129 	/* Iterators are static, they should be filled or empty */
2130 	if (trace_buffer_iter(iter, iter->cpu_file))
2131 		return 0;
2132 
2133 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2134 	pwait.iter = iter;
2135 
2136 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2137 			       wait_pipe_cond, &pwait);
2138 
2139 #ifdef CONFIG_TRACER_MAX_TRACE
2140 	/*
2141 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2142 	 * to happen, this would now be the main buffer.
2143 	 */
2144 	if (iter->snapshot)
2145 		iter->array_buffer = &iter->tr->max_buffer;
2146 #endif
2147 	return ret;
2148 }
2149 
2150 #ifdef CONFIG_FTRACE_STARTUP_TEST
2151 static bool selftests_can_run;
2152 
2153 struct trace_selftests {
2154 	struct list_head		list;
2155 	struct tracer			*type;
2156 };
2157 
2158 static LIST_HEAD(postponed_selftests);
2159 
2160 static int save_selftest(struct tracer *type)
2161 {
2162 	struct trace_selftests *selftest;
2163 
2164 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2165 	if (!selftest)
2166 		return -ENOMEM;
2167 
2168 	selftest->type = type;
2169 	list_add(&selftest->list, &postponed_selftests);
2170 	return 0;
2171 }
2172 
2173 static int run_tracer_selftest(struct tracer *type)
2174 {
2175 	struct trace_array *tr = &global_trace;
2176 	struct tracer *saved_tracer = tr->current_trace;
2177 	int ret;
2178 
2179 	if (!type->selftest || tracing_selftest_disabled)
2180 		return 0;
2181 
2182 	/*
2183 	 * If a tracer registers early in boot up (before scheduling is
2184 	 * initialized and such), then do not run its selftests yet.
2185 	 * Instead, run it a little later in the boot process.
2186 	 */
2187 	if (!selftests_can_run)
2188 		return save_selftest(type);
2189 
2190 	if (!tracing_is_on()) {
2191 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2192 			type->name);
2193 		return 0;
2194 	}
2195 
2196 	/*
2197 	 * Run a selftest on this tracer.
2198 	 * Here we reset the trace buffer, and set the current
2199 	 * tracer to be this tracer. The tracer can then run some
2200 	 * internal tracing to verify that everything is in order.
2201 	 * If we fail, we do not register this tracer.
2202 	 */
2203 	tracing_reset_online_cpus(&tr->array_buffer);
2204 
2205 	tr->current_trace = type;
2206 
2207 #ifdef CONFIG_TRACER_MAX_TRACE
2208 	if (type->use_max_tr) {
2209 		/* If we expanded the buffers, make sure the max is expanded too */
2210 		if (tr->ring_buffer_expanded)
2211 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2212 					   RING_BUFFER_ALL_CPUS);
2213 		tr->allocated_snapshot = true;
2214 	}
2215 #endif
2216 
2217 	/* the test is responsible for initializing and enabling */
2218 	pr_info("Testing tracer %s: ", type->name);
2219 	ret = type->selftest(type, tr);
2220 	/* the test is responsible for resetting too */
2221 	tr->current_trace = saved_tracer;
2222 	if (ret) {
2223 		printk(KERN_CONT "FAILED!\n");
2224 		/* Add the warning after printing 'FAILED' */
2225 		WARN_ON(1);
2226 		return -1;
2227 	}
2228 	/* Only reset on passing, to avoid touching corrupted buffers */
2229 	tracing_reset_online_cpus(&tr->array_buffer);
2230 
2231 #ifdef CONFIG_TRACER_MAX_TRACE
2232 	if (type->use_max_tr) {
2233 		tr->allocated_snapshot = false;
2234 
2235 		/* Shrink the max buffer again */
2236 		if (tr->ring_buffer_expanded)
2237 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2238 					   RING_BUFFER_ALL_CPUS);
2239 	}
2240 #endif
2241 
2242 	printk(KERN_CONT "PASSED\n");
2243 	return 0;
2244 }
2245 
2246 static int do_run_tracer_selftest(struct tracer *type)
2247 {
2248 	int ret;
2249 
2250 	/*
2251 	 * Tests can take a long time, especially if they are run one after the
2252 	 * other, as does happen during bootup when all the tracers are
2253 	 * registered. This could cause the soft lockup watchdog to trigger.
2254 	 */
2255 	cond_resched();
2256 
2257 	tracing_selftest_running = true;
2258 	ret = run_tracer_selftest(type);
2259 	tracing_selftest_running = false;
2260 
2261 	return ret;
2262 }
2263 
2264 static __init int init_trace_selftests(void)
2265 {
2266 	struct trace_selftests *p, *n;
2267 	struct tracer *t, **last;
2268 	int ret;
2269 
2270 	selftests_can_run = true;
2271 
2272 	guard(mutex)(&trace_types_lock);
2273 
2274 	if (list_empty(&postponed_selftests))
2275 		return 0;
2276 
2277 	pr_info("Running postponed tracer tests:\n");
2278 
2279 	tracing_selftest_running = true;
2280 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2281 		/* This loop can take minutes when sanitizers are enabled, so
2282 		 * lets make sure we allow RCU processing.
2283 		 */
2284 		cond_resched();
2285 		ret = run_tracer_selftest(p->type);
2286 		/* If the test fails, then warn and remove from available_tracers */
2287 		if (ret < 0) {
2288 			WARN(1, "tracer: %s failed selftest, disabling\n",
2289 			     p->type->name);
2290 			last = &trace_types;
2291 			for (t = trace_types; t; t = t->next) {
2292 				if (t == p->type) {
2293 					*last = t->next;
2294 					break;
2295 				}
2296 				last = &t->next;
2297 			}
2298 		}
2299 		list_del(&p->list);
2300 		kfree(p);
2301 	}
2302 	tracing_selftest_running = false;
2303 
2304 	return 0;
2305 }
2306 core_initcall(init_trace_selftests);
2307 #else
2308 static inline int do_run_tracer_selftest(struct tracer *type)
2309 {
2310 	return 0;
2311 }
2312 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2313 
2314 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2315 
2316 static void __init apply_trace_boot_options(void);
2317 
2318 /**
2319  * register_tracer - register a tracer with the ftrace system.
2320  * @type: the plugin for the tracer
2321  *
2322  * Register a new plugin tracer.
2323  */
2324 int __init register_tracer(struct tracer *type)
2325 {
2326 	struct tracer *t;
2327 	int ret = 0;
2328 
2329 	if (!type->name) {
2330 		pr_info("Tracer must have a name\n");
2331 		return -1;
2332 	}
2333 
2334 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2335 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2336 		return -1;
2337 	}
2338 
2339 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2340 		pr_warn("Can not register tracer %s due to lockdown\n",
2341 			   type->name);
2342 		return -EPERM;
2343 	}
2344 
2345 	mutex_lock(&trace_types_lock);
2346 
2347 	for (t = trace_types; t; t = t->next) {
2348 		if (strcmp(type->name, t->name) == 0) {
2349 			/* already found */
2350 			pr_info("Tracer %s already registered\n",
2351 				type->name);
2352 			ret = -1;
2353 			goto out;
2354 		}
2355 	}
2356 
2357 	if (!type->set_flag)
2358 		type->set_flag = &dummy_set_flag;
2359 	if (!type->flags) {
2360 		/*allocate a dummy tracer_flags*/
2361 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2362 		if (!type->flags) {
2363 			ret = -ENOMEM;
2364 			goto out;
2365 		}
2366 		type->flags->val = 0;
2367 		type->flags->opts = dummy_tracer_opt;
2368 	} else
2369 		if (!type->flags->opts)
2370 			type->flags->opts = dummy_tracer_opt;
2371 
2372 	/* store the tracer for __set_tracer_option */
2373 	type->flags->trace = type;
2374 
2375 	ret = do_run_tracer_selftest(type);
2376 	if (ret < 0)
2377 		goto out;
2378 
2379 	type->next = trace_types;
2380 	trace_types = type;
2381 	add_tracer_options(&global_trace, type);
2382 
2383  out:
2384 	mutex_unlock(&trace_types_lock);
2385 
2386 	if (ret || !default_bootup_tracer)
2387 		return ret;
2388 
2389 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2390 		return 0;
2391 
2392 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2393 	/* Do we want this tracer to start on bootup? */
2394 	tracing_set_tracer(&global_trace, type->name);
2395 	default_bootup_tracer = NULL;
2396 
2397 	apply_trace_boot_options();
2398 
2399 	/* disable other selftests, since this will break it. */
2400 	disable_tracing_selftest("running a tracer");
2401 
2402 	return 0;
2403 }
2404 
2405 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2406 {
2407 	struct trace_buffer *buffer = buf->buffer;
2408 
2409 	if (!buffer)
2410 		return;
2411 
2412 	ring_buffer_record_disable(buffer);
2413 
2414 	/* Make sure all commits have finished */
2415 	synchronize_rcu();
2416 	ring_buffer_reset_cpu(buffer, cpu);
2417 
2418 	ring_buffer_record_enable(buffer);
2419 }
2420 
2421 void tracing_reset_online_cpus(struct array_buffer *buf)
2422 {
2423 	struct trace_buffer *buffer = buf->buffer;
2424 
2425 	if (!buffer)
2426 		return;
2427 
2428 	ring_buffer_record_disable(buffer);
2429 
2430 	/* Make sure all commits have finished */
2431 	synchronize_rcu();
2432 
2433 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2434 
2435 	ring_buffer_reset_online_cpus(buffer);
2436 
2437 	ring_buffer_record_enable(buffer);
2438 }
2439 
2440 static void tracing_reset_all_cpus(struct array_buffer *buf)
2441 {
2442 	struct trace_buffer *buffer = buf->buffer;
2443 
2444 	if (!buffer)
2445 		return;
2446 
2447 	ring_buffer_record_disable(buffer);
2448 
2449 	/* Make sure all commits have finished */
2450 	synchronize_rcu();
2451 
2452 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2453 
2454 	ring_buffer_reset(buffer);
2455 
2456 	ring_buffer_record_enable(buffer);
2457 }
2458 
2459 /* Must have trace_types_lock held */
2460 void tracing_reset_all_online_cpus_unlocked(void)
2461 {
2462 	struct trace_array *tr;
2463 
2464 	lockdep_assert_held(&trace_types_lock);
2465 
2466 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2467 		if (!tr->clear_trace)
2468 			continue;
2469 		tr->clear_trace = false;
2470 		tracing_reset_online_cpus(&tr->array_buffer);
2471 #ifdef CONFIG_TRACER_MAX_TRACE
2472 		tracing_reset_online_cpus(&tr->max_buffer);
2473 #endif
2474 	}
2475 }
2476 
2477 void tracing_reset_all_online_cpus(void)
2478 {
2479 	guard(mutex)(&trace_types_lock);
2480 	tracing_reset_all_online_cpus_unlocked();
2481 }
2482 
2483 int is_tracing_stopped(void)
2484 {
2485 	return global_trace.stop_count;
2486 }
2487 
2488 static void tracing_start_tr(struct trace_array *tr)
2489 {
2490 	struct trace_buffer *buffer;
2491 
2492 	if (tracing_disabled)
2493 		return;
2494 
2495 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2496 	if (--tr->stop_count) {
2497 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2498 			/* Someone screwed up their debugging */
2499 			tr->stop_count = 0;
2500 		}
2501 		return;
2502 	}
2503 
2504 	/* Prevent the buffers from switching */
2505 	arch_spin_lock(&tr->max_lock);
2506 
2507 	buffer = tr->array_buffer.buffer;
2508 	if (buffer)
2509 		ring_buffer_record_enable(buffer);
2510 
2511 #ifdef CONFIG_TRACER_MAX_TRACE
2512 	buffer = tr->max_buffer.buffer;
2513 	if (buffer)
2514 		ring_buffer_record_enable(buffer);
2515 #endif
2516 
2517 	arch_spin_unlock(&tr->max_lock);
2518 }
2519 
2520 /**
2521  * tracing_start - quick start of the tracer
2522  *
2523  * If tracing is enabled but was stopped by tracing_stop,
2524  * this will start the tracer back up.
2525  */
2526 void tracing_start(void)
2527 
2528 {
2529 	return tracing_start_tr(&global_trace);
2530 }
2531 
2532 static void tracing_stop_tr(struct trace_array *tr)
2533 {
2534 	struct trace_buffer *buffer;
2535 
2536 	guard(raw_spinlock_irqsave)(&tr->start_lock);
2537 	if (tr->stop_count++)
2538 		return;
2539 
2540 	/* Prevent the buffers from switching */
2541 	arch_spin_lock(&tr->max_lock);
2542 
2543 	buffer = tr->array_buffer.buffer;
2544 	if (buffer)
2545 		ring_buffer_record_disable(buffer);
2546 
2547 #ifdef CONFIG_TRACER_MAX_TRACE
2548 	buffer = tr->max_buffer.buffer;
2549 	if (buffer)
2550 		ring_buffer_record_disable(buffer);
2551 #endif
2552 
2553 	arch_spin_unlock(&tr->max_lock);
2554 }
2555 
2556 /**
2557  * tracing_stop - quick stop of the tracer
2558  *
2559  * Light weight way to stop tracing. Use in conjunction with
2560  * tracing_start.
2561  */
2562 void tracing_stop(void)
2563 {
2564 	return tracing_stop_tr(&global_trace);
2565 }
2566 
2567 /*
2568  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2569  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2570  * simplifies those functions and keeps them in sync.
2571  */
2572 enum print_line_t trace_handle_return(struct trace_seq *s)
2573 {
2574 	return trace_seq_has_overflowed(s) ?
2575 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2576 }
2577 EXPORT_SYMBOL_GPL(trace_handle_return);
2578 
2579 static unsigned short migration_disable_value(void)
2580 {
2581 #if defined(CONFIG_SMP)
2582 	return current->migration_disabled;
2583 #else
2584 	return 0;
2585 #endif
2586 }
2587 
2588 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2589 {
2590 	unsigned int trace_flags = irqs_status;
2591 	unsigned int pc;
2592 
2593 	pc = preempt_count();
2594 
2595 	if (pc & NMI_MASK)
2596 		trace_flags |= TRACE_FLAG_NMI;
2597 	if (pc & HARDIRQ_MASK)
2598 		trace_flags |= TRACE_FLAG_HARDIRQ;
2599 	if (in_serving_softirq())
2600 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2601 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2602 		trace_flags |= TRACE_FLAG_BH_OFF;
2603 
2604 	if (tif_need_resched())
2605 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2606 	if (test_preempt_need_resched())
2607 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2608 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2609 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2610 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2611 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2612 }
2613 
2614 struct ring_buffer_event *
2615 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2616 			  int type,
2617 			  unsigned long len,
2618 			  unsigned int trace_ctx)
2619 {
2620 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2621 }
2622 
2623 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2624 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2625 static int trace_buffered_event_ref;
2626 
2627 /**
2628  * trace_buffered_event_enable - enable buffering events
2629  *
2630  * When events are being filtered, it is quicker to use a temporary
2631  * buffer to write the event data into if there's a likely chance
2632  * that it will not be committed. The discard of the ring buffer
2633  * is not as fast as committing, and is much slower than copying
2634  * a commit.
2635  *
2636  * When an event is to be filtered, allocate per cpu buffers to
2637  * write the event data into, and if the event is filtered and discarded
2638  * it is simply dropped, otherwise, the entire data is to be committed
2639  * in one shot.
2640  */
2641 void trace_buffered_event_enable(void)
2642 {
2643 	struct ring_buffer_event *event;
2644 	struct page *page;
2645 	int cpu;
2646 
2647 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2648 
2649 	if (trace_buffered_event_ref++)
2650 		return;
2651 
2652 	for_each_tracing_cpu(cpu) {
2653 		page = alloc_pages_node(cpu_to_node(cpu),
2654 					GFP_KERNEL | __GFP_NORETRY, 0);
2655 		/* This is just an optimization and can handle failures */
2656 		if (!page) {
2657 			pr_err("Failed to allocate event buffer\n");
2658 			break;
2659 		}
2660 
2661 		event = page_address(page);
2662 		memset(event, 0, sizeof(*event));
2663 
2664 		per_cpu(trace_buffered_event, cpu) = event;
2665 
2666 		scoped_guard(preempt,) {
2667 			if (cpu == smp_processor_id() &&
2668 			    __this_cpu_read(trace_buffered_event) !=
2669 			    per_cpu(trace_buffered_event, cpu))
2670 				WARN_ON_ONCE(1);
2671 		}
2672 	}
2673 }
2674 
2675 static void enable_trace_buffered_event(void *data)
2676 {
2677 	this_cpu_dec(trace_buffered_event_cnt);
2678 }
2679 
2680 static void disable_trace_buffered_event(void *data)
2681 {
2682 	this_cpu_inc(trace_buffered_event_cnt);
2683 }
2684 
2685 /**
2686  * trace_buffered_event_disable - disable buffering events
2687  *
2688  * When a filter is removed, it is faster to not use the buffered
2689  * events, and to commit directly into the ring buffer. Free up
2690  * the temp buffers when there are no more users. This requires
2691  * special synchronization with current events.
2692  */
2693 void trace_buffered_event_disable(void)
2694 {
2695 	int cpu;
2696 
2697 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2698 
2699 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2700 		return;
2701 
2702 	if (--trace_buffered_event_ref)
2703 		return;
2704 
2705 	/* For each CPU, set the buffer as used. */
2706 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2707 			 NULL, true);
2708 
2709 	/* Wait for all current users to finish */
2710 	synchronize_rcu();
2711 
2712 	for_each_tracing_cpu(cpu) {
2713 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2714 		per_cpu(trace_buffered_event, cpu) = NULL;
2715 	}
2716 
2717 	/*
2718 	 * Wait for all CPUs that potentially started checking if they can use
2719 	 * their event buffer only after the previous synchronize_rcu() call and
2720 	 * they still read a valid pointer from trace_buffered_event. It must be
2721 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2722 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2723 	 */
2724 	synchronize_rcu();
2725 
2726 	/* For each CPU, relinquish the buffer */
2727 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2728 			 true);
2729 }
2730 
2731 static struct trace_buffer *temp_buffer;
2732 
2733 struct ring_buffer_event *
2734 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2735 			  struct trace_event_file *trace_file,
2736 			  int type, unsigned long len,
2737 			  unsigned int trace_ctx)
2738 {
2739 	struct ring_buffer_event *entry;
2740 	struct trace_array *tr = trace_file->tr;
2741 	int val;
2742 
2743 	*current_rb = tr->array_buffer.buffer;
2744 
2745 	if (!tr->no_filter_buffering_ref &&
2746 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2747 		preempt_disable_notrace();
2748 		/*
2749 		 * Filtering is on, so try to use the per cpu buffer first.
2750 		 * This buffer will simulate a ring_buffer_event,
2751 		 * where the type_len is zero and the array[0] will
2752 		 * hold the full length.
2753 		 * (see include/linux/ring-buffer.h for details on
2754 		 *  how the ring_buffer_event is structured).
2755 		 *
2756 		 * Using a temp buffer during filtering and copying it
2757 		 * on a matched filter is quicker than writing directly
2758 		 * into the ring buffer and then discarding it when
2759 		 * it doesn't match. That is because the discard
2760 		 * requires several atomic operations to get right.
2761 		 * Copying on match and doing nothing on a failed match
2762 		 * is still quicker than no copy on match, but having
2763 		 * to discard out of the ring buffer on a failed match.
2764 		 */
2765 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2766 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2767 
2768 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2769 
2770 			/*
2771 			 * Preemption is disabled, but interrupts and NMIs
2772 			 * can still come in now. If that happens after
2773 			 * the above increment, then it will have to go
2774 			 * back to the old method of allocating the event
2775 			 * on the ring buffer, and if the filter fails, it
2776 			 * will have to call ring_buffer_discard_commit()
2777 			 * to remove it.
2778 			 *
2779 			 * Need to also check the unlikely case that the
2780 			 * length is bigger than the temp buffer size.
2781 			 * If that happens, then the reserve is pretty much
2782 			 * guaranteed to fail, as the ring buffer currently
2783 			 * only allows events less than a page. But that may
2784 			 * change in the future, so let the ring buffer reserve
2785 			 * handle the failure in that case.
2786 			 */
2787 			if (val == 1 && likely(len <= max_len)) {
2788 				trace_event_setup(entry, type, trace_ctx);
2789 				entry->array[0] = len;
2790 				/* Return with preemption disabled */
2791 				return entry;
2792 			}
2793 			this_cpu_dec(trace_buffered_event_cnt);
2794 		}
2795 		/* __trace_buffer_lock_reserve() disables preemption */
2796 		preempt_enable_notrace();
2797 	}
2798 
2799 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2800 					    trace_ctx);
2801 	/*
2802 	 * If tracing is off, but we have triggers enabled
2803 	 * we still need to look at the event data. Use the temp_buffer
2804 	 * to store the trace event for the trigger to use. It's recursive
2805 	 * safe and will not be recorded anywhere.
2806 	 */
2807 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2808 		*current_rb = temp_buffer;
2809 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2810 						    trace_ctx);
2811 	}
2812 	return entry;
2813 }
2814 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2815 
2816 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2817 static DEFINE_MUTEX(tracepoint_printk_mutex);
2818 
2819 static void output_printk(struct trace_event_buffer *fbuffer)
2820 {
2821 	struct trace_event_call *event_call;
2822 	struct trace_event_file *file;
2823 	struct trace_event *event;
2824 	unsigned long flags;
2825 	struct trace_iterator *iter = tracepoint_print_iter;
2826 
2827 	/* We should never get here if iter is NULL */
2828 	if (WARN_ON_ONCE(!iter))
2829 		return;
2830 
2831 	event_call = fbuffer->trace_file->event_call;
2832 	if (!event_call || !event_call->event.funcs ||
2833 	    !event_call->event.funcs->trace)
2834 		return;
2835 
2836 	file = fbuffer->trace_file;
2837 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2838 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2839 	     !filter_match_preds(file->filter, fbuffer->entry)))
2840 		return;
2841 
2842 	event = &fbuffer->trace_file->event_call->event;
2843 
2844 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2845 	trace_seq_init(&iter->seq);
2846 	iter->ent = fbuffer->entry;
2847 	event_call->event.funcs->trace(iter, 0, event);
2848 	trace_seq_putc(&iter->seq, 0);
2849 	printk("%s", iter->seq.buffer);
2850 
2851 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2852 }
2853 
2854 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2855 			     void *buffer, size_t *lenp,
2856 			     loff_t *ppos)
2857 {
2858 	int save_tracepoint_printk;
2859 	int ret;
2860 
2861 	guard(mutex)(&tracepoint_printk_mutex);
2862 	save_tracepoint_printk = tracepoint_printk;
2863 
2864 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2865 
2866 	/*
2867 	 * This will force exiting early, as tracepoint_printk
2868 	 * is always zero when tracepoint_printk_iter is not allocated
2869 	 */
2870 	if (!tracepoint_print_iter)
2871 		tracepoint_printk = 0;
2872 
2873 	if (save_tracepoint_printk == tracepoint_printk)
2874 		return ret;
2875 
2876 	if (tracepoint_printk)
2877 		static_key_enable(&tracepoint_printk_key.key);
2878 	else
2879 		static_key_disable(&tracepoint_printk_key.key);
2880 
2881 	return ret;
2882 }
2883 
2884 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2885 {
2886 	enum event_trigger_type tt = ETT_NONE;
2887 	struct trace_event_file *file = fbuffer->trace_file;
2888 
2889 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2890 			fbuffer->entry, &tt))
2891 		goto discard;
2892 
2893 	if (static_key_false(&tracepoint_printk_key.key))
2894 		output_printk(fbuffer);
2895 
2896 	if (static_branch_unlikely(&trace_event_exports_enabled))
2897 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2898 
2899 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2900 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2901 
2902 discard:
2903 	if (tt)
2904 		event_triggers_post_call(file, tt);
2905 
2906 }
2907 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2908 
2909 /*
2910  * Skip 3:
2911  *
2912  *   trace_buffer_unlock_commit_regs()
2913  *   trace_event_buffer_commit()
2914  *   trace_event_raw_event_xxx()
2915  */
2916 # define STACK_SKIP 3
2917 
2918 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2919 				     struct trace_buffer *buffer,
2920 				     struct ring_buffer_event *event,
2921 				     unsigned int trace_ctx,
2922 				     struct pt_regs *regs)
2923 {
2924 	__buffer_unlock_commit(buffer, event);
2925 
2926 	/*
2927 	 * If regs is not set, then skip the necessary functions.
2928 	 * Note, we can still get here via blktrace, wakeup tracer
2929 	 * and mmiotrace, but that's ok if they lose a function or
2930 	 * two. They are not that meaningful.
2931 	 */
2932 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2933 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2934 }
2935 
2936 /*
2937  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2938  */
2939 void
2940 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2941 				   struct ring_buffer_event *event)
2942 {
2943 	__buffer_unlock_commit(buffer, event);
2944 }
2945 
2946 void
2947 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2948 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2949 {
2950 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2951 	struct ring_buffer_event *event;
2952 	struct ftrace_entry *entry;
2953 	int size = sizeof(*entry);
2954 
2955 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2956 
2957 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2958 					    trace_ctx);
2959 	if (!event)
2960 		return;
2961 	entry	= ring_buffer_event_data(event);
2962 	entry->ip			= ip;
2963 	entry->parent_ip		= parent_ip;
2964 
2965 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2966 	if (fregs) {
2967 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2968 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2969 	}
2970 #endif
2971 
2972 	if (static_branch_unlikely(&trace_function_exports_enabled))
2973 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2974 	__buffer_unlock_commit(buffer, event);
2975 }
2976 
2977 #ifdef CONFIG_STACKTRACE
2978 
2979 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2980 #define FTRACE_KSTACK_NESTING	4
2981 
2982 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2983 
2984 struct ftrace_stack {
2985 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2986 };
2987 
2988 
2989 struct ftrace_stacks {
2990 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2991 };
2992 
2993 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2994 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2995 
2996 static void __ftrace_trace_stack(struct trace_array *tr,
2997 				 struct trace_buffer *buffer,
2998 				 unsigned int trace_ctx,
2999 				 int skip, struct pt_regs *regs)
3000 {
3001 	struct ring_buffer_event *event;
3002 	unsigned int size, nr_entries;
3003 	struct ftrace_stack *fstack;
3004 	struct stack_entry *entry;
3005 	int stackidx;
3006 
3007 	/*
3008 	 * Add one, for this function and the call to save_stack_trace()
3009 	 * If regs is set, then these functions will not be in the way.
3010 	 */
3011 #ifndef CONFIG_UNWINDER_ORC
3012 	if (!regs)
3013 		skip++;
3014 #endif
3015 
3016 	guard(preempt_notrace)();
3017 
3018 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3019 
3020 	/* This should never happen. If it does, yell once and skip */
3021 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3022 		goto out;
3023 
3024 	/*
3025 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3026 	 * interrupt will either see the value pre increment or post
3027 	 * increment. If the interrupt happens pre increment it will have
3028 	 * restored the counter when it returns.  We just need a barrier to
3029 	 * keep gcc from moving things around.
3030 	 */
3031 	barrier();
3032 
3033 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3034 	size = ARRAY_SIZE(fstack->calls);
3035 
3036 	if (regs) {
3037 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3038 						   size, skip);
3039 	} else {
3040 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3041 	}
3042 
3043 #ifdef CONFIG_DYNAMIC_FTRACE
3044 	/* Mark entry of stack trace as trampoline code */
3045 	if (tr->ops && tr->ops->trampoline) {
3046 		unsigned long tramp_start = tr->ops->trampoline;
3047 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3048 		unsigned long *calls = fstack->calls;
3049 
3050 		for (int i = 0; i < nr_entries; i++) {
3051 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
3052 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
3053 		}
3054 	}
3055 #endif
3056 
3057 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3058 				    struct_size(entry, caller, nr_entries),
3059 				    trace_ctx);
3060 	if (!event)
3061 		goto out;
3062 	entry = ring_buffer_event_data(event);
3063 
3064 	entry->size = nr_entries;
3065 	memcpy(&entry->caller, fstack->calls,
3066 	       flex_array_size(entry, caller, nr_entries));
3067 
3068 	__buffer_unlock_commit(buffer, event);
3069 
3070  out:
3071 	/* Again, don't let gcc optimize things here */
3072 	barrier();
3073 	__this_cpu_dec(ftrace_stack_reserve);
3074 }
3075 
3076 static inline void ftrace_trace_stack(struct trace_array *tr,
3077 				      struct trace_buffer *buffer,
3078 				      unsigned int trace_ctx,
3079 				      int skip, struct pt_regs *regs)
3080 {
3081 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3082 		return;
3083 
3084 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3085 }
3086 
3087 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3088 		   int skip)
3089 {
3090 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3091 
3092 	if (rcu_is_watching()) {
3093 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3094 		return;
3095 	}
3096 
3097 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3098 		return;
3099 
3100 	/*
3101 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3102 	 * but if the above rcu_is_watching() failed, then the NMI
3103 	 * triggered someplace critical, and ct_irq_enter() should
3104 	 * not be called from NMI.
3105 	 */
3106 	if (unlikely(in_nmi()))
3107 		return;
3108 
3109 	ct_irq_enter_irqson();
3110 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3111 	ct_irq_exit_irqson();
3112 }
3113 
3114 /**
3115  * trace_dump_stack - record a stack back trace in the trace buffer
3116  * @skip: Number of functions to skip (helper handlers)
3117  */
3118 void trace_dump_stack(int skip)
3119 {
3120 	if (tracing_disabled || tracing_selftest_running)
3121 		return;
3122 
3123 #ifndef CONFIG_UNWINDER_ORC
3124 	/* Skip 1 to skip this function. */
3125 	skip++;
3126 #endif
3127 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3128 				tracing_gen_ctx(), skip, NULL);
3129 }
3130 EXPORT_SYMBOL_GPL(trace_dump_stack);
3131 
3132 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3133 static DEFINE_PER_CPU(int, user_stack_count);
3134 
3135 static void
3136 ftrace_trace_userstack(struct trace_array *tr,
3137 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3138 {
3139 	struct ring_buffer_event *event;
3140 	struct userstack_entry *entry;
3141 
3142 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3143 		return;
3144 
3145 	/*
3146 	 * NMIs can not handle page faults, even with fix ups.
3147 	 * The save user stack can (and often does) fault.
3148 	 */
3149 	if (unlikely(in_nmi()))
3150 		return;
3151 
3152 	/*
3153 	 * prevent recursion, since the user stack tracing may
3154 	 * trigger other kernel events.
3155 	 */
3156 	guard(preempt)();
3157 	if (__this_cpu_read(user_stack_count))
3158 		return;
3159 
3160 	__this_cpu_inc(user_stack_count);
3161 
3162 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3163 					    sizeof(*entry), trace_ctx);
3164 	if (!event)
3165 		goto out_drop_count;
3166 	entry	= ring_buffer_event_data(event);
3167 
3168 	entry->tgid		= current->tgid;
3169 	memset(&entry->caller, 0, sizeof(entry->caller));
3170 
3171 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3172 	__buffer_unlock_commit(buffer, event);
3173 
3174  out_drop_count:
3175 	__this_cpu_dec(user_stack_count);
3176 }
3177 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3178 static void ftrace_trace_userstack(struct trace_array *tr,
3179 				   struct trace_buffer *buffer,
3180 				   unsigned int trace_ctx)
3181 {
3182 }
3183 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3184 
3185 #endif /* CONFIG_STACKTRACE */
3186 
3187 static inline void
3188 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3189 			  unsigned long long delta)
3190 {
3191 	entry->bottom_delta_ts = delta & U32_MAX;
3192 	entry->top_delta_ts = (delta >> 32);
3193 }
3194 
3195 void trace_last_func_repeats(struct trace_array *tr,
3196 			     struct trace_func_repeats *last_info,
3197 			     unsigned int trace_ctx)
3198 {
3199 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3200 	struct func_repeats_entry *entry;
3201 	struct ring_buffer_event *event;
3202 	u64 delta;
3203 
3204 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3205 					    sizeof(*entry), trace_ctx);
3206 	if (!event)
3207 		return;
3208 
3209 	delta = ring_buffer_event_time_stamp(buffer, event) -
3210 		last_info->ts_last_call;
3211 
3212 	entry = ring_buffer_event_data(event);
3213 	entry->ip = last_info->ip;
3214 	entry->parent_ip = last_info->parent_ip;
3215 	entry->count = last_info->count;
3216 	func_repeats_set_delta_ts(entry, delta);
3217 
3218 	__buffer_unlock_commit(buffer, event);
3219 }
3220 
3221 /* created for use with alloc_percpu */
3222 struct trace_buffer_struct {
3223 	int nesting;
3224 	char buffer[4][TRACE_BUF_SIZE];
3225 };
3226 
3227 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3228 
3229 /*
3230  * This allows for lockless recording.  If we're nested too deeply, then
3231  * this returns NULL.
3232  */
3233 static char *get_trace_buf(void)
3234 {
3235 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3236 
3237 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3238 		return NULL;
3239 
3240 	buffer->nesting++;
3241 
3242 	/* Interrupts must see nesting incremented before we use the buffer */
3243 	barrier();
3244 	return &buffer->buffer[buffer->nesting - 1][0];
3245 }
3246 
3247 static void put_trace_buf(void)
3248 {
3249 	/* Don't let the decrement of nesting leak before this */
3250 	barrier();
3251 	this_cpu_dec(trace_percpu_buffer->nesting);
3252 }
3253 
3254 static int alloc_percpu_trace_buffer(void)
3255 {
3256 	struct trace_buffer_struct __percpu *buffers;
3257 
3258 	if (trace_percpu_buffer)
3259 		return 0;
3260 
3261 	buffers = alloc_percpu(struct trace_buffer_struct);
3262 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3263 		return -ENOMEM;
3264 
3265 	trace_percpu_buffer = buffers;
3266 	return 0;
3267 }
3268 
3269 static int buffers_allocated;
3270 
3271 void trace_printk_init_buffers(void)
3272 {
3273 	if (buffers_allocated)
3274 		return;
3275 
3276 	if (alloc_percpu_trace_buffer())
3277 		return;
3278 
3279 	/* trace_printk() is for debug use only. Don't use it in production. */
3280 
3281 	pr_warn("\n");
3282 	pr_warn("**********************************************************\n");
3283 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3284 	pr_warn("**                                                      **\n");
3285 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3286 	pr_warn("**                                                      **\n");
3287 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3288 	pr_warn("** unsafe for production use.                           **\n");
3289 	pr_warn("**                                                      **\n");
3290 	pr_warn("** If you see this message and you are not debugging    **\n");
3291 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3292 	pr_warn("**                                                      **\n");
3293 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3294 	pr_warn("**********************************************************\n");
3295 
3296 	/* Expand the buffers to set size */
3297 	tracing_update_buffers(&global_trace);
3298 
3299 	buffers_allocated = 1;
3300 
3301 	/*
3302 	 * trace_printk_init_buffers() can be called by modules.
3303 	 * If that happens, then we need to start cmdline recording
3304 	 * directly here. If the global_trace.buffer is already
3305 	 * allocated here, then this was called by module code.
3306 	 */
3307 	if (global_trace.array_buffer.buffer)
3308 		tracing_start_cmdline_record();
3309 }
3310 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3311 
3312 void trace_printk_start_comm(void)
3313 {
3314 	/* Start tracing comms if trace printk is set */
3315 	if (!buffers_allocated)
3316 		return;
3317 	tracing_start_cmdline_record();
3318 }
3319 
3320 static void trace_printk_start_stop_comm(int enabled)
3321 {
3322 	if (!buffers_allocated)
3323 		return;
3324 
3325 	if (enabled)
3326 		tracing_start_cmdline_record();
3327 	else
3328 		tracing_stop_cmdline_record();
3329 }
3330 
3331 /**
3332  * trace_vbprintk - write binary msg to tracing buffer
3333  * @ip:    The address of the caller
3334  * @fmt:   The string format to write to the buffer
3335  * @args:  Arguments for @fmt
3336  */
3337 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3338 {
3339 	struct ring_buffer_event *event;
3340 	struct trace_buffer *buffer;
3341 	struct trace_array *tr = READ_ONCE(printk_trace);
3342 	struct bprint_entry *entry;
3343 	unsigned int trace_ctx;
3344 	char *tbuffer;
3345 	int len = 0, size;
3346 
3347 	if (!printk_binsafe(tr))
3348 		return trace_vprintk(ip, fmt, args);
3349 
3350 	if (unlikely(tracing_selftest_running || tracing_disabled))
3351 		return 0;
3352 
3353 	/* Don't pollute graph traces with trace_vprintk internals */
3354 	pause_graph_tracing();
3355 
3356 	trace_ctx = tracing_gen_ctx();
3357 	guard(preempt_notrace)();
3358 
3359 	tbuffer = get_trace_buf();
3360 	if (!tbuffer) {
3361 		len = 0;
3362 		goto out_nobuffer;
3363 	}
3364 
3365 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3366 
3367 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3368 		goto out_put;
3369 
3370 	size = sizeof(*entry) + sizeof(u32) * len;
3371 	buffer = tr->array_buffer.buffer;
3372 	scoped_guard(ring_buffer_nest, buffer) {
3373 		event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3374 						    trace_ctx);
3375 		if (!event)
3376 			goto out_put;
3377 		entry = ring_buffer_event_data(event);
3378 		entry->ip			= ip;
3379 		entry->fmt			= fmt;
3380 
3381 		memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3382 		__buffer_unlock_commit(buffer, event);
3383 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3384 	}
3385 out_put:
3386 	put_trace_buf();
3387 
3388 out_nobuffer:
3389 	unpause_graph_tracing();
3390 
3391 	return len;
3392 }
3393 EXPORT_SYMBOL_GPL(trace_vbprintk);
3394 
3395 static __printf(3, 0)
3396 int __trace_array_vprintk(struct trace_buffer *buffer,
3397 			  unsigned long ip, const char *fmt, va_list args)
3398 {
3399 	struct ring_buffer_event *event;
3400 	int len = 0, size;
3401 	struct print_entry *entry;
3402 	unsigned int trace_ctx;
3403 	char *tbuffer;
3404 
3405 	if (tracing_disabled)
3406 		return 0;
3407 
3408 	/* Don't pollute graph traces with trace_vprintk internals */
3409 	pause_graph_tracing();
3410 
3411 	trace_ctx = tracing_gen_ctx();
3412 	guard(preempt_notrace)();
3413 
3414 
3415 	tbuffer = get_trace_buf();
3416 	if (!tbuffer) {
3417 		len = 0;
3418 		goto out_nobuffer;
3419 	}
3420 
3421 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3422 
3423 	size = sizeof(*entry) + len + 1;
3424 	scoped_guard(ring_buffer_nest, buffer) {
3425 		event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3426 						    trace_ctx);
3427 		if (!event)
3428 			goto out;
3429 		entry = ring_buffer_event_data(event);
3430 		entry->ip = ip;
3431 
3432 		memcpy(&entry->buf, tbuffer, len + 1);
3433 		__buffer_unlock_commit(buffer, event);
3434 		ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3435 	}
3436 out:
3437 	put_trace_buf();
3438 
3439 out_nobuffer:
3440 	unpause_graph_tracing();
3441 
3442 	return len;
3443 }
3444 
3445 int trace_array_vprintk(struct trace_array *tr,
3446 			unsigned long ip, const char *fmt, va_list args)
3447 {
3448 	if (tracing_selftest_running && tr == &global_trace)
3449 		return 0;
3450 
3451 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3452 }
3453 
3454 /**
3455  * trace_array_printk - Print a message to a specific instance
3456  * @tr: The instance trace_array descriptor
3457  * @ip: The instruction pointer that this is called from.
3458  * @fmt: The format to print (printf format)
3459  *
3460  * If a subsystem sets up its own instance, they have the right to
3461  * printk strings into their tracing instance buffer using this
3462  * function. Note, this function will not write into the top level
3463  * buffer (use trace_printk() for that), as writing into the top level
3464  * buffer should only have events that can be individually disabled.
3465  * trace_printk() is only used for debugging a kernel, and should not
3466  * be ever incorporated in normal use.
3467  *
3468  * trace_array_printk() can be used, as it will not add noise to the
3469  * top level tracing buffer.
3470  *
3471  * Note, trace_array_init_printk() must be called on @tr before this
3472  * can be used.
3473  */
3474 int trace_array_printk(struct trace_array *tr,
3475 		       unsigned long ip, const char *fmt, ...)
3476 {
3477 	int ret;
3478 	va_list ap;
3479 
3480 	if (!tr)
3481 		return -ENOENT;
3482 
3483 	/* This is only allowed for created instances */
3484 	if (tr == &global_trace)
3485 		return 0;
3486 
3487 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3488 		return 0;
3489 
3490 	va_start(ap, fmt);
3491 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3492 	va_end(ap);
3493 	return ret;
3494 }
3495 EXPORT_SYMBOL_GPL(trace_array_printk);
3496 
3497 /**
3498  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3499  * @tr: The trace array to initialize the buffers for
3500  *
3501  * As trace_array_printk() only writes into instances, they are OK to
3502  * have in the kernel (unlike trace_printk()). This needs to be called
3503  * before trace_array_printk() can be used on a trace_array.
3504  */
3505 int trace_array_init_printk(struct trace_array *tr)
3506 {
3507 	if (!tr)
3508 		return -ENOENT;
3509 
3510 	/* This is only allowed for created instances */
3511 	if (tr == &global_trace)
3512 		return -EINVAL;
3513 
3514 	return alloc_percpu_trace_buffer();
3515 }
3516 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3517 
3518 int trace_array_printk_buf(struct trace_buffer *buffer,
3519 			   unsigned long ip, const char *fmt, ...)
3520 {
3521 	int ret;
3522 	va_list ap;
3523 
3524 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3525 		return 0;
3526 
3527 	va_start(ap, fmt);
3528 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3529 	va_end(ap);
3530 	return ret;
3531 }
3532 
3533 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3534 {
3535 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3536 }
3537 EXPORT_SYMBOL_GPL(trace_vprintk);
3538 
3539 static void trace_iterator_increment(struct trace_iterator *iter)
3540 {
3541 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3542 
3543 	iter->idx++;
3544 	if (buf_iter)
3545 		ring_buffer_iter_advance(buf_iter);
3546 }
3547 
3548 static struct trace_entry *
3549 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3550 		unsigned long *lost_events)
3551 {
3552 	struct ring_buffer_event *event;
3553 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3554 
3555 	if (buf_iter) {
3556 		event = ring_buffer_iter_peek(buf_iter, ts);
3557 		if (lost_events)
3558 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3559 				(unsigned long)-1 : 0;
3560 	} else {
3561 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3562 					 lost_events);
3563 	}
3564 
3565 	if (event) {
3566 		iter->ent_size = ring_buffer_event_length(event);
3567 		return ring_buffer_event_data(event);
3568 	}
3569 	iter->ent_size = 0;
3570 	return NULL;
3571 }
3572 
3573 static struct trace_entry *
3574 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3575 		  unsigned long *missing_events, u64 *ent_ts)
3576 {
3577 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3578 	struct trace_entry *ent, *next = NULL;
3579 	unsigned long lost_events = 0, next_lost = 0;
3580 	int cpu_file = iter->cpu_file;
3581 	u64 next_ts = 0, ts;
3582 	int next_cpu = -1;
3583 	int next_size = 0;
3584 	int cpu;
3585 
3586 	/*
3587 	 * If we are in a per_cpu trace file, don't bother by iterating over
3588 	 * all cpu and peek directly.
3589 	 */
3590 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3591 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3592 			return NULL;
3593 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3594 		if (ent_cpu)
3595 			*ent_cpu = cpu_file;
3596 
3597 		return ent;
3598 	}
3599 
3600 	for_each_tracing_cpu(cpu) {
3601 
3602 		if (ring_buffer_empty_cpu(buffer, cpu))
3603 			continue;
3604 
3605 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3606 
3607 		/*
3608 		 * Pick the entry with the smallest timestamp:
3609 		 */
3610 		if (ent && (!next || ts < next_ts)) {
3611 			next = ent;
3612 			next_cpu = cpu;
3613 			next_ts = ts;
3614 			next_lost = lost_events;
3615 			next_size = iter->ent_size;
3616 		}
3617 	}
3618 
3619 	iter->ent_size = next_size;
3620 
3621 	if (ent_cpu)
3622 		*ent_cpu = next_cpu;
3623 
3624 	if (ent_ts)
3625 		*ent_ts = next_ts;
3626 
3627 	if (missing_events)
3628 		*missing_events = next_lost;
3629 
3630 	return next;
3631 }
3632 
3633 #define STATIC_FMT_BUF_SIZE	128
3634 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3635 
3636 char *trace_iter_expand_format(struct trace_iterator *iter)
3637 {
3638 	char *tmp;
3639 
3640 	/*
3641 	 * iter->tr is NULL when used with tp_printk, which makes
3642 	 * this get called where it is not safe to call krealloc().
3643 	 */
3644 	if (!iter->tr || iter->fmt == static_fmt_buf)
3645 		return NULL;
3646 
3647 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3648 		       GFP_KERNEL);
3649 	if (tmp) {
3650 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3651 		iter->fmt = tmp;
3652 	}
3653 
3654 	return tmp;
3655 }
3656 
3657 /* Returns true if the string is safe to dereference from an event */
3658 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3659 {
3660 	unsigned long addr = (unsigned long)str;
3661 	struct trace_event *trace_event;
3662 	struct trace_event_call *event;
3663 
3664 	/* OK if part of the event data */
3665 	if ((addr >= (unsigned long)iter->ent) &&
3666 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3667 		return true;
3668 
3669 	/* OK if part of the temp seq buffer */
3670 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3671 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3672 		return true;
3673 
3674 	/* Core rodata can not be freed */
3675 	if (is_kernel_rodata(addr))
3676 		return true;
3677 
3678 	if (trace_is_tracepoint_string(str))
3679 		return true;
3680 
3681 	/*
3682 	 * Now this could be a module event, referencing core module
3683 	 * data, which is OK.
3684 	 */
3685 	if (!iter->ent)
3686 		return false;
3687 
3688 	trace_event = ftrace_find_event(iter->ent->type);
3689 	if (!trace_event)
3690 		return false;
3691 
3692 	event = container_of(trace_event, struct trace_event_call, event);
3693 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3694 		return false;
3695 
3696 	/* Would rather have rodata, but this will suffice */
3697 	if (within_module_core(addr, event->module))
3698 		return true;
3699 
3700 	return false;
3701 }
3702 
3703 /**
3704  * ignore_event - Check dereferenced fields while writing to the seq buffer
3705  * @iter: The iterator that holds the seq buffer and the event being printed
3706  *
3707  * At boot up, test_event_printk() will flag any event that dereferences
3708  * a string with "%s" that does exist in the ring buffer. It may still
3709  * be valid, as the string may point to a static string in the kernel
3710  * rodata that never gets freed. But if the string pointer is pointing
3711  * to something that was allocated, there's a chance that it can be freed
3712  * by the time the user reads the trace. This would cause a bad memory
3713  * access by the kernel and possibly crash the system.
3714  *
3715  * This function will check if the event has any fields flagged as needing
3716  * to be checked at runtime and perform those checks.
3717  *
3718  * If it is found that a field is unsafe, it will write into the @iter->seq
3719  * a message stating what was found to be unsafe.
3720  *
3721  * @return: true if the event is unsafe and should be ignored,
3722  *          false otherwise.
3723  */
3724 bool ignore_event(struct trace_iterator *iter)
3725 {
3726 	struct ftrace_event_field *field;
3727 	struct trace_event *trace_event;
3728 	struct trace_event_call *event;
3729 	struct list_head *head;
3730 	struct trace_seq *seq;
3731 	const void *ptr;
3732 
3733 	trace_event = ftrace_find_event(iter->ent->type);
3734 
3735 	seq = &iter->seq;
3736 
3737 	if (!trace_event) {
3738 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3739 		return true;
3740 	}
3741 
3742 	event = container_of(trace_event, struct trace_event_call, event);
3743 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3744 		return false;
3745 
3746 	head = trace_get_fields(event);
3747 	if (!head) {
3748 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3749 				 trace_event_name(event));
3750 		return true;
3751 	}
3752 
3753 	/* Offsets are from the iter->ent that points to the raw event */
3754 	ptr = iter->ent;
3755 
3756 	list_for_each_entry(field, head, link) {
3757 		const char *str;
3758 		bool good;
3759 
3760 		if (!field->needs_test)
3761 			continue;
3762 
3763 		str = *(const char **)(ptr + field->offset);
3764 
3765 		good = trace_safe_str(iter, str);
3766 
3767 		/*
3768 		 * If you hit this warning, it is likely that the
3769 		 * trace event in question used %s on a string that
3770 		 * was saved at the time of the event, but may not be
3771 		 * around when the trace is read. Use __string(),
3772 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3773 		 * instead. See samples/trace_events/trace-events-sample.h
3774 		 * for reference.
3775 		 */
3776 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3777 			      trace_event_name(event), field->name)) {
3778 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3779 					 trace_event_name(event), field->name);
3780 			return true;
3781 		}
3782 	}
3783 	return false;
3784 }
3785 
3786 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3787 {
3788 	const char *p, *new_fmt;
3789 	char *q;
3790 
3791 	if (WARN_ON_ONCE(!fmt))
3792 		return fmt;
3793 
3794 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3795 		return fmt;
3796 
3797 	p = fmt;
3798 	new_fmt = q = iter->fmt;
3799 	while (*p) {
3800 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3801 			if (!trace_iter_expand_format(iter))
3802 				return fmt;
3803 
3804 			q += iter->fmt - new_fmt;
3805 			new_fmt = iter->fmt;
3806 		}
3807 
3808 		*q++ = *p++;
3809 
3810 		/* Replace %p with %px */
3811 		if (p[-1] == '%') {
3812 			if (p[0] == '%') {
3813 				*q++ = *p++;
3814 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3815 				*q++ = *p++;
3816 				*q++ = 'x';
3817 			}
3818 		}
3819 	}
3820 	*q = '\0';
3821 
3822 	return new_fmt;
3823 }
3824 
3825 #define STATIC_TEMP_BUF_SIZE	128
3826 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3827 
3828 /* Find the next real entry, without updating the iterator itself */
3829 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3830 					  int *ent_cpu, u64 *ent_ts)
3831 {
3832 	/* __find_next_entry will reset ent_size */
3833 	int ent_size = iter->ent_size;
3834 	struct trace_entry *entry;
3835 
3836 	/*
3837 	 * If called from ftrace_dump(), then the iter->temp buffer
3838 	 * will be the static_temp_buf and not created from kmalloc.
3839 	 * If the entry size is greater than the buffer, we can
3840 	 * not save it. Just return NULL in that case. This is only
3841 	 * used to add markers when two consecutive events' time
3842 	 * stamps have a large delta. See trace_print_lat_context()
3843 	 */
3844 	if (iter->temp == static_temp_buf &&
3845 	    STATIC_TEMP_BUF_SIZE < ent_size)
3846 		return NULL;
3847 
3848 	/*
3849 	 * The __find_next_entry() may call peek_next_entry(), which may
3850 	 * call ring_buffer_peek() that may make the contents of iter->ent
3851 	 * undefined. Need to copy iter->ent now.
3852 	 */
3853 	if (iter->ent && iter->ent != iter->temp) {
3854 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3855 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3856 			void *temp;
3857 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3858 			if (!temp)
3859 				return NULL;
3860 			kfree(iter->temp);
3861 			iter->temp = temp;
3862 			iter->temp_size = iter->ent_size;
3863 		}
3864 		memcpy(iter->temp, iter->ent, iter->ent_size);
3865 		iter->ent = iter->temp;
3866 	}
3867 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3868 	/* Put back the original ent_size */
3869 	iter->ent_size = ent_size;
3870 
3871 	return entry;
3872 }
3873 
3874 /* Find the next real entry, and increment the iterator to the next entry */
3875 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3876 {
3877 	iter->ent = __find_next_entry(iter, &iter->cpu,
3878 				      &iter->lost_events, &iter->ts);
3879 
3880 	if (iter->ent)
3881 		trace_iterator_increment(iter);
3882 
3883 	return iter->ent ? iter : NULL;
3884 }
3885 
3886 static void trace_consume(struct trace_iterator *iter)
3887 {
3888 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3889 			    &iter->lost_events);
3890 }
3891 
3892 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3893 {
3894 	struct trace_iterator *iter = m->private;
3895 	int i = (int)*pos;
3896 	void *ent;
3897 
3898 	WARN_ON_ONCE(iter->leftover);
3899 
3900 	(*pos)++;
3901 
3902 	/* can't go backwards */
3903 	if (iter->idx > i)
3904 		return NULL;
3905 
3906 	if (iter->idx < 0)
3907 		ent = trace_find_next_entry_inc(iter);
3908 	else
3909 		ent = iter;
3910 
3911 	while (ent && iter->idx < i)
3912 		ent = trace_find_next_entry_inc(iter);
3913 
3914 	iter->pos = *pos;
3915 
3916 	return ent;
3917 }
3918 
3919 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3920 {
3921 	struct ring_buffer_iter *buf_iter;
3922 	unsigned long entries = 0;
3923 	u64 ts;
3924 
3925 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3926 
3927 	buf_iter = trace_buffer_iter(iter, cpu);
3928 	if (!buf_iter)
3929 		return;
3930 
3931 	ring_buffer_iter_reset(buf_iter);
3932 
3933 	/*
3934 	 * We could have the case with the max latency tracers
3935 	 * that a reset never took place on a cpu. This is evident
3936 	 * by the timestamp being before the start of the buffer.
3937 	 */
3938 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3939 		if (ts >= iter->array_buffer->time_start)
3940 			break;
3941 		entries++;
3942 		ring_buffer_iter_advance(buf_iter);
3943 		/* This could be a big loop */
3944 		cond_resched();
3945 	}
3946 
3947 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3948 }
3949 
3950 /*
3951  * The current tracer is copied to avoid a global locking
3952  * all around.
3953  */
3954 static void *s_start(struct seq_file *m, loff_t *pos)
3955 {
3956 	struct trace_iterator *iter = m->private;
3957 	struct trace_array *tr = iter->tr;
3958 	int cpu_file = iter->cpu_file;
3959 	void *p = NULL;
3960 	loff_t l = 0;
3961 	int cpu;
3962 
3963 	mutex_lock(&trace_types_lock);
3964 	if (unlikely(tr->current_trace != iter->trace)) {
3965 		/* Close iter->trace before switching to the new current tracer */
3966 		if (iter->trace->close)
3967 			iter->trace->close(iter);
3968 		iter->trace = tr->current_trace;
3969 		/* Reopen the new current tracer */
3970 		if (iter->trace->open)
3971 			iter->trace->open(iter);
3972 	}
3973 	mutex_unlock(&trace_types_lock);
3974 
3975 #ifdef CONFIG_TRACER_MAX_TRACE
3976 	if (iter->snapshot && iter->trace->use_max_tr)
3977 		return ERR_PTR(-EBUSY);
3978 #endif
3979 
3980 	if (*pos != iter->pos) {
3981 		iter->ent = NULL;
3982 		iter->cpu = 0;
3983 		iter->idx = -1;
3984 
3985 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3986 			for_each_tracing_cpu(cpu)
3987 				tracing_iter_reset(iter, cpu);
3988 		} else
3989 			tracing_iter_reset(iter, cpu_file);
3990 
3991 		iter->leftover = 0;
3992 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3993 			;
3994 
3995 	} else {
3996 		/*
3997 		 * If we overflowed the seq_file before, then we want
3998 		 * to just reuse the trace_seq buffer again.
3999 		 */
4000 		if (iter->leftover)
4001 			p = iter;
4002 		else {
4003 			l = *pos - 1;
4004 			p = s_next(m, p, &l);
4005 		}
4006 	}
4007 
4008 	trace_event_read_lock();
4009 	trace_access_lock(cpu_file);
4010 	return p;
4011 }
4012 
4013 static void s_stop(struct seq_file *m, void *p)
4014 {
4015 	struct trace_iterator *iter = m->private;
4016 
4017 #ifdef CONFIG_TRACER_MAX_TRACE
4018 	if (iter->snapshot && iter->trace->use_max_tr)
4019 		return;
4020 #endif
4021 
4022 	trace_access_unlock(iter->cpu_file);
4023 	trace_event_read_unlock();
4024 }
4025 
4026 static void
4027 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4028 		      unsigned long *entries, int cpu)
4029 {
4030 	unsigned long count;
4031 
4032 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4033 	/*
4034 	 * If this buffer has skipped entries, then we hold all
4035 	 * entries for the trace and we need to ignore the
4036 	 * ones before the time stamp.
4037 	 */
4038 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4039 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4040 		/* total is the same as the entries */
4041 		*total = count;
4042 	} else
4043 		*total = count +
4044 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4045 	*entries = count;
4046 }
4047 
4048 static void
4049 get_total_entries(struct array_buffer *buf,
4050 		  unsigned long *total, unsigned long *entries)
4051 {
4052 	unsigned long t, e;
4053 	int cpu;
4054 
4055 	*total = 0;
4056 	*entries = 0;
4057 
4058 	for_each_tracing_cpu(cpu) {
4059 		get_total_entries_cpu(buf, &t, &e, cpu);
4060 		*total += t;
4061 		*entries += e;
4062 	}
4063 }
4064 
4065 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4066 {
4067 	unsigned long total, entries;
4068 
4069 	if (!tr)
4070 		tr = &global_trace;
4071 
4072 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4073 
4074 	return entries;
4075 }
4076 
4077 unsigned long trace_total_entries(struct trace_array *tr)
4078 {
4079 	unsigned long total, entries;
4080 
4081 	if (!tr)
4082 		tr = &global_trace;
4083 
4084 	get_total_entries(&tr->array_buffer, &total, &entries);
4085 
4086 	return entries;
4087 }
4088 
4089 static void print_lat_help_header(struct seq_file *m)
4090 {
4091 	seq_puts(m, "#                    _------=> CPU#            \n"
4092 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4093 		    "#                  | / _----=> need-resched    \n"
4094 		    "#                  || / _---=> hardirq/softirq \n"
4095 		    "#                  ||| / _--=> preempt-depth   \n"
4096 		    "#                  |||| / _-=> migrate-disable \n"
4097 		    "#                  ||||| /     delay           \n"
4098 		    "#  cmd     pid     |||||| time  |   caller     \n"
4099 		    "#     \\   /        ||||||  \\    |    /       \n");
4100 }
4101 
4102 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4103 {
4104 	unsigned long total;
4105 	unsigned long entries;
4106 
4107 	get_total_entries(buf, &total, &entries);
4108 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4109 		   entries, total, num_online_cpus());
4110 	seq_puts(m, "#\n");
4111 }
4112 
4113 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4114 				   unsigned int flags)
4115 {
4116 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4117 
4118 	print_event_info(buf, m);
4119 
4120 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4121 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4122 }
4123 
4124 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4125 				       unsigned int flags)
4126 {
4127 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4128 	static const char space[] = "            ";
4129 	int prec = tgid ? 12 : 2;
4130 
4131 	print_event_info(buf, m);
4132 
4133 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4134 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4135 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4136 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4137 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4138 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4139 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4140 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4141 }
4142 
4143 void
4144 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4145 {
4146 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4147 	struct array_buffer *buf = iter->array_buffer;
4148 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4149 	struct tracer *type = iter->trace;
4150 	unsigned long entries;
4151 	unsigned long total;
4152 	const char *name = type->name;
4153 
4154 	get_total_entries(buf, &total, &entries);
4155 
4156 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4157 		   name, init_utsname()->release);
4158 	seq_puts(m, "# -----------------------------------"
4159 		 "---------------------------------\n");
4160 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4161 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4162 		   nsecs_to_usecs(data->saved_latency),
4163 		   entries,
4164 		   total,
4165 		   buf->cpu,
4166 		   preempt_model_str(),
4167 		   /* These are reserved for later use */
4168 		   0, 0, 0, 0);
4169 #ifdef CONFIG_SMP
4170 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4171 #else
4172 	seq_puts(m, ")\n");
4173 #endif
4174 	seq_puts(m, "#    -----------------\n");
4175 	seq_printf(m, "#    | task: %.16s-%d "
4176 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4177 		   data->comm, data->pid,
4178 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4179 		   data->policy, data->rt_priority);
4180 	seq_puts(m, "#    -----------------\n");
4181 
4182 	if (data->critical_start) {
4183 		seq_puts(m, "#  => started at: ");
4184 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4185 		trace_print_seq(m, &iter->seq);
4186 		seq_puts(m, "\n#  => ended at:   ");
4187 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4188 		trace_print_seq(m, &iter->seq);
4189 		seq_puts(m, "\n#\n");
4190 	}
4191 
4192 	seq_puts(m, "#\n");
4193 }
4194 
4195 static void test_cpu_buff_start(struct trace_iterator *iter)
4196 {
4197 	struct trace_seq *s = &iter->seq;
4198 	struct trace_array *tr = iter->tr;
4199 
4200 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4201 		return;
4202 
4203 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4204 		return;
4205 
4206 	if (cpumask_available(iter->started) &&
4207 	    cpumask_test_cpu(iter->cpu, iter->started))
4208 		return;
4209 
4210 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4211 		return;
4212 
4213 	if (cpumask_available(iter->started))
4214 		cpumask_set_cpu(iter->cpu, iter->started);
4215 
4216 	/* Don't print started cpu buffer for the first entry of the trace */
4217 	if (iter->idx > 1)
4218 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4219 				iter->cpu);
4220 }
4221 
4222 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4223 {
4224 	struct trace_array *tr = iter->tr;
4225 	struct trace_seq *s = &iter->seq;
4226 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4227 	struct trace_entry *entry;
4228 	struct trace_event *event;
4229 
4230 	entry = iter->ent;
4231 
4232 	test_cpu_buff_start(iter);
4233 
4234 	event = ftrace_find_event(entry->type);
4235 
4236 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4237 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4238 			trace_print_lat_context(iter);
4239 		else
4240 			trace_print_context(iter);
4241 	}
4242 
4243 	if (trace_seq_has_overflowed(s))
4244 		return TRACE_TYPE_PARTIAL_LINE;
4245 
4246 	if (event) {
4247 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4248 			return print_event_fields(iter, event);
4249 		/*
4250 		 * For TRACE_EVENT() events, the print_fmt is not
4251 		 * safe to use if the array has delta offsets
4252 		 * Force printing via the fields.
4253 		 */
4254 		if ((tr->text_delta) &&
4255 		    event->type > __TRACE_LAST_TYPE)
4256 			return print_event_fields(iter, event);
4257 
4258 		return event->funcs->trace(iter, sym_flags, event);
4259 	}
4260 
4261 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4262 
4263 	return trace_handle_return(s);
4264 }
4265 
4266 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4267 {
4268 	struct trace_array *tr = iter->tr;
4269 	struct trace_seq *s = &iter->seq;
4270 	struct trace_entry *entry;
4271 	struct trace_event *event;
4272 
4273 	entry = iter->ent;
4274 
4275 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4276 		trace_seq_printf(s, "%d %d %llu ",
4277 				 entry->pid, iter->cpu, iter->ts);
4278 
4279 	if (trace_seq_has_overflowed(s))
4280 		return TRACE_TYPE_PARTIAL_LINE;
4281 
4282 	event = ftrace_find_event(entry->type);
4283 	if (event)
4284 		return event->funcs->raw(iter, 0, event);
4285 
4286 	trace_seq_printf(s, "%d ?\n", entry->type);
4287 
4288 	return trace_handle_return(s);
4289 }
4290 
4291 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4292 {
4293 	struct trace_array *tr = iter->tr;
4294 	struct trace_seq *s = &iter->seq;
4295 	unsigned char newline = '\n';
4296 	struct trace_entry *entry;
4297 	struct trace_event *event;
4298 
4299 	entry = iter->ent;
4300 
4301 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4302 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4303 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4304 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4305 		if (trace_seq_has_overflowed(s))
4306 			return TRACE_TYPE_PARTIAL_LINE;
4307 	}
4308 
4309 	event = ftrace_find_event(entry->type);
4310 	if (event) {
4311 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4312 		if (ret != TRACE_TYPE_HANDLED)
4313 			return ret;
4314 	}
4315 
4316 	SEQ_PUT_FIELD(s, newline);
4317 
4318 	return trace_handle_return(s);
4319 }
4320 
4321 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4322 {
4323 	struct trace_array *tr = iter->tr;
4324 	struct trace_seq *s = &iter->seq;
4325 	struct trace_entry *entry;
4326 	struct trace_event *event;
4327 
4328 	entry = iter->ent;
4329 
4330 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4331 		SEQ_PUT_FIELD(s, entry->pid);
4332 		SEQ_PUT_FIELD(s, iter->cpu);
4333 		SEQ_PUT_FIELD(s, iter->ts);
4334 		if (trace_seq_has_overflowed(s))
4335 			return TRACE_TYPE_PARTIAL_LINE;
4336 	}
4337 
4338 	event = ftrace_find_event(entry->type);
4339 	return event ? event->funcs->binary(iter, 0, event) :
4340 		TRACE_TYPE_HANDLED;
4341 }
4342 
4343 int trace_empty(struct trace_iterator *iter)
4344 {
4345 	struct ring_buffer_iter *buf_iter;
4346 	int cpu;
4347 
4348 	/* If we are looking at one CPU buffer, only check that one */
4349 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4350 		cpu = iter->cpu_file;
4351 		buf_iter = trace_buffer_iter(iter, cpu);
4352 		if (buf_iter) {
4353 			if (!ring_buffer_iter_empty(buf_iter))
4354 				return 0;
4355 		} else {
4356 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4357 				return 0;
4358 		}
4359 		return 1;
4360 	}
4361 
4362 	for_each_tracing_cpu(cpu) {
4363 		buf_iter = trace_buffer_iter(iter, cpu);
4364 		if (buf_iter) {
4365 			if (!ring_buffer_iter_empty(buf_iter))
4366 				return 0;
4367 		} else {
4368 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4369 				return 0;
4370 		}
4371 	}
4372 
4373 	return 1;
4374 }
4375 
4376 /*  Called with trace_event_read_lock() held. */
4377 enum print_line_t print_trace_line(struct trace_iterator *iter)
4378 {
4379 	struct trace_array *tr = iter->tr;
4380 	unsigned long trace_flags = tr->trace_flags;
4381 	enum print_line_t ret;
4382 
4383 	if (iter->lost_events) {
4384 		if (iter->lost_events == (unsigned long)-1)
4385 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4386 					 iter->cpu);
4387 		else
4388 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4389 					 iter->cpu, iter->lost_events);
4390 		if (trace_seq_has_overflowed(&iter->seq))
4391 			return TRACE_TYPE_PARTIAL_LINE;
4392 	}
4393 
4394 	if (iter->trace && iter->trace->print_line) {
4395 		ret = iter->trace->print_line(iter);
4396 		if (ret != TRACE_TYPE_UNHANDLED)
4397 			return ret;
4398 	}
4399 
4400 	if (iter->ent->type == TRACE_BPUTS &&
4401 			trace_flags & TRACE_ITER_PRINTK &&
4402 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4403 		return trace_print_bputs_msg_only(iter);
4404 
4405 	if (iter->ent->type == TRACE_BPRINT &&
4406 			trace_flags & TRACE_ITER_PRINTK &&
4407 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4408 		return trace_print_bprintk_msg_only(iter);
4409 
4410 	if (iter->ent->type == TRACE_PRINT &&
4411 			trace_flags & TRACE_ITER_PRINTK &&
4412 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4413 		return trace_print_printk_msg_only(iter);
4414 
4415 	if (trace_flags & TRACE_ITER_BIN)
4416 		return print_bin_fmt(iter);
4417 
4418 	if (trace_flags & TRACE_ITER_HEX)
4419 		return print_hex_fmt(iter);
4420 
4421 	if (trace_flags & TRACE_ITER_RAW)
4422 		return print_raw_fmt(iter);
4423 
4424 	return print_trace_fmt(iter);
4425 }
4426 
4427 void trace_latency_header(struct seq_file *m)
4428 {
4429 	struct trace_iterator *iter = m->private;
4430 	struct trace_array *tr = iter->tr;
4431 
4432 	/* print nothing if the buffers are empty */
4433 	if (trace_empty(iter))
4434 		return;
4435 
4436 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4437 		print_trace_header(m, iter);
4438 
4439 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4440 		print_lat_help_header(m);
4441 }
4442 
4443 void trace_default_header(struct seq_file *m)
4444 {
4445 	struct trace_iterator *iter = m->private;
4446 	struct trace_array *tr = iter->tr;
4447 	unsigned long trace_flags = tr->trace_flags;
4448 
4449 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4450 		return;
4451 
4452 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4453 		/* print nothing if the buffers are empty */
4454 		if (trace_empty(iter))
4455 			return;
4456 		print_trace_header(m, iter);
4457 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4458 			print_lat_help_header(m);
4459 	} else {
4460 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4461 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4462 				print_func_help_header_irq(iter->array_buffer,
4463 							   m, trace_flags);
4464 			else
4465 				print_func_help_header(iter->array_buffer, m,
4466 						       trace_flags);
4467 		}
4468 	}
4469 }
4470 
4471 static void test_ftrace_alive(struct seq_file *m)
4472 {
4473 	if (!ftrace_is_dead())
4474 		return;
4475 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4476 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4477 }
4478 
4479 #ifdef CONFIG_TRACER_MAX_TRACE
4480 static void show_snapshot_main_help(struct seq_file *m)
4481 {
4482 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4483 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4484 		    "#                      Takes a snapshot of the main buffer.\n"
4485 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4486 		    "#                      (Doesn't have to be '2' works with any number that\n"
4487 		    "#                       is not a '0' or '1')\n");
4488 }
4489 
4490 static void show_snapshot_percpu_help(struct seq_file *m)
4491 {
4492 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4493 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4494 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4495 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4496 #else
4497 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4498 		    "#                     Must use main snapshot file to allocate.\n");
4499 #endif
4500 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4501 		    "#                      (Doesn't have to be '2' works with any number that\n"
4502 		    "#                       is not a '0' or '1')\n");
4503 }
4504 
4505 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4506 {
4507 	if (iter->tr->allocated_snapshot)
4508 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4509 	else
4510 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4511 
4512 	seq_puts(m, "# Snapshot commands:\n");
4513 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4514 		show_snapshot_main_help(m);
4515 	else
4516 		show_snapshot_percpu_help(m);
4517 }
4518 #else
4519 /* Should never be called */
4520 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4521 #endif
4522 
4523 static int s_show(struct seq_file *m, void *v)
4524 {
4525 	struct trace_iterator *iter = v;
4526 	int ret;
4527 
4528 	if (iter->ent == NULL) {
4529 		if (iter->tr) {
4530 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4531 			seq_puts(m, "#\n");
4532 			test_ftrace_alive(m);
4533 		}
4534 		if (iter->snapshot && trace_empty(iter))
4535 			print_snapshot_help(m, iter);
4536 		else if (iter->trace && iter->trace->print_header)
4537 			iter->trace->print_header(m);
4538 		else
4539 			trace_default_header(m);
4540 
4541 	} else if (iter->leftover) {
4542 		/*
4543 		 * If we filled the seq_file buffer earlier, we
4544 		 * want to just show it now.
4545 		 */
4546 		ret = trace_print_seq(m, &iter->seq);
4547 
4548 		/* ret should this time be zero, but you never know */
4549 		iter->leftover = ret;
4550 
4551 	} else {
4552 		ret = print_trace_line(iter);
4553 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4554 			iter->seq.full = 0;
4555 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4556 		}
4557 		ret = trace_print_seq(m, &iter->seq);
4558 		/*
4559 		 * If we overflow the seq_file buffer, then it will
4560 		 * ask us for this data again at start up.
4561 		 * Use that instead.
4562 		 *  ret is 0 if seq_file write succeeded.
4563 		 *        -1 otherwise.
4564 		 */
4565 		iter->leftover = ret;
4566 	}
4567 
4568 	return 0;
4569 }
4570 
4571 /*
4572  * Should be used after trace_array_get(), trace_types_lock
4573  * ensures that i_cdev was already initialized.
4574  */
4575 static inline int tracing_get_cpu(struct inode *inode)
4576 {
4577 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4578 		return (long)inode->i_cdev - 1;
4579 	return RING_BUFFER_ALL_CPUS;
4580 }
4581 
4582 static const struct seq_operations tracer_seq_ops = {
4583 	.start		= s_start,
4584 	.next		= s_next,
4585 	.stop		= s_stop,
4586 	.show		= s_show,
4587 };
4588 
4589 /*
4590  * Note, as iter itself can be allocated and freed in different
4591  * ways, this function is only used to free its content, and not
4592  * the iterator itself. The only requirement to all the allocations
4593  * is that it must zero all fields (kzalloc), as freeing works with
4594  * ethier allocated content or NULL.
4595  */
4596 static void free_trace_iter_content(struct trace_iterator *iter)
4597 {
4598 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4599 	if (iter->fmt != static_fmt_buf)
4600 		kfree(iter->fmt);
4601 
4602 	kfree(iter->temp);
4603 	kfree(iter->buffer_iter);
4604 	mutex_destroy(&iter->mutex);
4605 	free_cpumask_var(iter->started);
4606 }
4607 
4608 static struct trace_iterator *
4609 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4610 {
4611 	struct trace_array *tr = inode->i_private;
4612 	struct trace_iterator *iter;
4613 	int cpu;
4614 
4615 	if (tracing_disabled)
4616 		return ERR_PTR(-ENODEV);
4617 
4618 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4619 	if (!iter)
4620 		return ERR_PTR(-ENOMEM);
4621 
4622 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4623 				    GFP_KERNEL);
4624 	if (!iter->buffer_iter)
4625 		goto release;
4626 
4627 	/*
4628 	 * trace_find_next_entry() may need to save off iter->ent.
4629 	 * It will place it into the iter->temp buffer. As most
4630 	 * events are less than 128, allocate a buffer of that size.
4631 	 * If one is greater, then trace_find_next_entry() will
4632 	 * allocate a new buffer to adjust for the bigger iter->ent.
4633 	 * It's not critical if it fails to get allocated here.
4634 	 */
4635 	iter->temp = kmalloc(128, GFP_KERNEL);
4636 	if (iter->temp)
4637 		iter->temp_size = 128;
4638 
4639 	/*
4640 	 * trace_event_printf() may need to modify given format
4641 	 * string to replace %p with %px so that it shows real address
4642 	 * instead of hash value. However, that is only for the event
4643 	 * tracing, other tracer may not need. Defer the allocation
4644 	 * until it is needed.
4645 	 */
4646 	iter->fmt = NULL;
4647 	iter->fmt_size = 0;
4648 
4649 	mutex_lock(&trace_types_lock);
4650 	iter->trace = tr->current_trace;
4651 
4652 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4653 		goto fail;
4654 
4655 	iter->tr = tr;
4656 
4657 #ifdef CONFIG_TRACER_MAX_TRACE
4658 	/* Currently only the top directory has a snapshot */
4659 	if (tr->current_trace->print_max || snapshot)
4660 		iter->array_buffer = &tr->max_buffer;
4661 	else
4662 #endif
4663 		iter->array_buffer = &tr->array_buffer;
4664 	iter->snapshot = snapshot;
4665 	iter->pos = -1;
4666 	iter->cpu_file = tracing_get_cpu(inode);
4667 	mutex_init(&iter->mutex);
4668 
4669 	/* Notify the tracer early; before we stop tracing. */
4670 	if (iter->trace->open)
4671 		iter->trace->open(iter);
4672 
4673 	/* Annotate start of buffers if we had overruns */
4674 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4675 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4676 
4677 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4678 	if (trace_clocks[tr->clock_id].in_ns)
4679 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4680 
4681 	/*
4682 	 * If pause-on-trace is enabled, then stop the trace while
4683 	 * dumping, unless this is the "snapshot" file
4684 	 */
4685 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4686 		tracing_stop_tr(tr);
4687 
4688 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4689 		for_each_tracing_cpu(cpu) {
4690 			iter->buffer_iter[cpu] =
4691 				ring_buffer_read_start(iter->array_buffer->buffer,
4692 						       cpu, GFP_KERNEL);
4693 			tracing_iter_reset(iter, cpu);
4694 		}
4695 	} else {
4696 		cpu = iter->cpu_file;
4697 		iter->buffer_iter[cpu] =
4698 			ring_buffer_read_start(iter->array_buffer->buffer,
4699 					       cpu, GFP_KERNEL);
4700 		tracing_iter_reset(iter, cpu);
4701 	}
4702 
4703 	mutex_unlock(&trace_types_lock);
4704 
4705 	return iter;
4706 
4707  fail:
4708 	mutex_unlock(&trace_types_lock);
4709 	free_trace_iter_content(iter);
4710 release:
4711 	seq_release_private(inode, file);
4712 	return ERR_PTR(-ENOMEM);
4713 }
4714 
4715 int tracing_open_generic(struct inode *inode, struct file *filp)
4716 {
4717 	int ret;
4718 
4719 	ret = tracing_check_open_get_tr(NULL);
4720 	if (ret)
4721 		return ret;
4722 
4723 	filp->private_data = inode->i_private;
4724 	return 0;
4725 }
4726 
4727 bool tracing_is_disabled(void)
4728 {
4729 	return (tracing_disabled) ? true: false;
4730 }
4731 
4732 /*
4733  * Open and update trace_array ref count.
4734  * Must have the current trace_array passed to it.
4735  */
4736 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4737 {
4738 	struct trace_array *tr = inode->i_private;
4739 	int ret;
4740 
4741 	ret = tracing_check_open_get_tr(tr);
4742 	if (ret)
4743 		return ret;
4744 
4745 	filp->private_data = inode->i_private;
4746 
4747 	return 0;
4748 }
4749 
4750 /*
4751  * The private pointer of the inode is the trace_event_file.
4752  * Update the tr ref count associated to it.
4753  */
4754 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4755 {
4756 	struct trace_event_file *file = inode->i_private;
4757 	int ret;
4758 
4759 	ret = tracing_check_open_get_tr(file->tr);
4760 	if (ret)
4761 		return ret;
4762 
4763 	guard(mutex)(&event_mutex);
4764 
4765 	/* Fail if the file is marked for removal */
4766 	if (file->flags & EVENT_FILE_FL_FREED) {
4767 		trace_array_put(file->tr);
4768 		return -ENODEV;
4769 	} else {
4770 		event_file_get(file);
4771 	}
4772 
4773 	filp->private_data = inode->i_private;
4774 
4775 	return 0;
4776 }
4777 
4778 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4779 {
4780 	struct trace_event_file *file = inode->i_private;
4781 
4782 	trace_array_put(file->tr);
4783 	event_file_put(file);
4784 
4785 	return 0;
4786 }
4787 
4788 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4789 {
4790 	tracing_release_file_tr(inode, filp);
4791 	return single_release(inode, filp);
4792 }
4793 
4794 static int tracing_release(struct inode *inode, struct file *file)
4795 {
4796 	struct trace_array *tr = inode->i_private;
4797 	struct seq_file *m = file->private_data;
4798 	struct trace_iterator *iter;
4799 	int cpu;
4800 
4801 	if (!(file->f_mode & FMODE_READ)) {
4802 		trace_array_put(tr);
4803 		return 0;
4804 	}
4805 
4806 	/* Writes do not use seq_file */
4807 	iter = m->private;
4808 	mutex_lock(&trace_types_lock);
4809 
4810 	for_each_tracing_cpu(cpu) {
4811 		if (iter->buffer_iter[cpu])
4812 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4813 	}
4814 
4815 	if (iter->trace && iter->trace->close)
4816 		iter->trace->close(iter);
4817 
4818 	if (!iter->snapshot && tr->stop_count)
4819 		/* reenable tracing if it was previously enabled */
4820 		tracing_start_tr(tr);
4821 
4822 	__trace_array_put(tr);
4823 
4824 	mutex_unlock(&trace_types_lock);
4825 
4826 	free_trace_iter_content(iter);
4827 	seq_release_private(inode, file);
4828 
4829 	return 0;
4830 }
4831 
4832 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4833 {
4834 	struct trace_array *tr = inode->i_private;
4835 
4836 	trace_array_put(tr);
4837 	return 0;
4838 }
4839 
4840 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4841 {
4842 	struct trace_array *tr = inode->i_private;
4843 
4844 	trace_array_put(tr);
4845 
4846 	return single_release(inode, file);
4847 }
4848 
4849 static int tracing_open(struct inode *inode, struct file *file)
4850 {
4851 	struct trace_array *tr = inode->i_private;
4852 	struct trace_iterator *iter;
4853 	int ret;
4854 
4855 	ret = tracing_check_open_get_tr(tr);
4856 	if (ret)
4857 		return ret;
4858 
4859 	/* If this file was open for write, then erase contents */
4860 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4861 		int cpu = tracing_get_cpu(inode);
4862 		struct array_buffer *trace_buf = &tr->array_buffer;
4863 
4864 #ifdef CONFIG_TRACER_MAX_TRACE
4865 		if (tr->current_trace->print_max)
4866 			trace_buf = &tr->max_buffer;
4867 #endif
4868 
4869 		if (cpu == RING_BUFFER_ALL_CPUS)
4870 			tracing_reset_online_cpus(trace_buf);
4871 		else
4872 			tracing_reset_cpu(trace_buf, cpu);
4873 	}
4874 
4875 	if (file->f_mode & FMODE_READ) {
4876 		iter = __tracing_open(inode, file, false);
4877 		if (IS_ERR(iter))
4878 			ret = PTR_ERR(iter);
4879 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4880 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4881 	}
4882 
4883 	if (ret < 0)
4884 		trace_array_put(tr);
4885 
4886 	return ret;
4887 }
4888 
4889 /*
4890  * Some tracers are not suitable for instance buffers.
4891  * A tracer is always available for the global array (toplevel)
4892  * or if it explicitly states that it is.
4893  */
4894 static bool
4895 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4896 {
4897 #ifdef CONFIG_TRACER_SNAPSHOT
4898 	/* arrays with mapped buffer range do not have snapshots */
4899 	if (tr->range_addr_start && t->use_max_tr)
4900 		return false;
4901 #endif
4902 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4903 }
4904 
4905 /* Find the next tracer that this trace array may use */
4906 static struct tracer *
4907 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4908 {
4909 	while (t && !trace_ok_for_array(t, tr))
4910 		t = t->next;
4911 
4912 	return t;
4913 }
4914 
4915 static void *
4916 t_next(struct seq_file *m, void *v, loff_t *pos)
4917 {
4918 	struct trace_array *tr = m->private;
4919 	struct tracer *t = v;
4920 
4921 	(*pos)++;
4922 
4923 	if (t)
4924 		t = get_tracer_for_array(tr, t->next);
4925 
4926 	return t;
4927 }
4928 
4929 static void *t_start(struct seq_file *m, loff_t *pos)
4930 {
4931 	struct trace_array *tr = m->private;
4932 	struct tracer *t;
4933 	loff_t l = 0;
4934 
4935 	mutex_lock(&trace_types_lock);
4936 
4937 	t = get_tracer_for_array(tr, trace_types);
4938 	for (; t && l < *pos; t = t_next(m, t, &l))
4939 			;
4940 
4941 	return t;
4942 }
4943 
4944 static void t_stop(struct seq_file *m, void *p)
4945 {
4946 	mutex_unlock(&trace_types_lock);
4947 }
4948 
4949 static int t_show(struct seq_file *m, void *v)
4950 {
4951 	struct tracer *t = v;
4952 
4953 	if (!t)
4954 		return 0;
4955 
4956 	seq_puts(m, t->name);
4957 	if (t->next)
4958 		seq_putc(m, ' ');
4959 	else
4960 		seq_putc(m, '\n');
4961 
4962 	return 0;
4963 }
4964 
4965 static const struct seq_operations show_traces_seq_ops = {
4966 	.start		= t_start,
4967 	.next		= t_next,
4968 	.stop		= t_stop,
4969 	.show		= t_show,
4970 };
4971 
4972 static int show_traces_open(struct inode *inode, struct file *file)
4973 {
4974 	struct trace_array *tr = inode->i_private;
4975 	struct seq_file *m;
4976 	int ret;
4977 
4978 	ret = tracing_check_open_get_tr(tr);
4979 	if (ret)
4980 		return ret;
4981 
4982 	ret = seq_open(file, &show_traces_seq_ops);
4983 	if (ret) {
4984 		trace_array_put(tr);
4985 		return ret;
4986 	}
4987 
4988 	m = file->private_data;
4989 	m->private = tr;
4990 
4991 	return 0;
4992 }
4993 
4994 static int tracing_seq_release(struct inode *inode, struct file *file)
4995 {
4996 	struct trace_array *tr = inode->i_private;
4997 
4998 	trace_array_put(tr);
4999 	return seq_release(inode, file);
5000 }
5001 
5002 static ssize_t
5003 tracing_write_stub(struct file *filp, const char __user *ubuf,
5004 		   size_t count, loff_t *ppos)
5005 {
5006 	return count;
5007 }
5008 
5009 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5010 {
5011 	int ret;
5012 
5013 	if (file->f_mode & FMODE_READ)
5014 		ret = seq_lseek(file, offset, whence);
5015 	else
5016 		file->f_pos = ret = 0;
5017 
5018 	return ret;
5019 }
5020 
5021 static const struct file_operations tracing_fops = {
5022 	.open		= tracing_open,
5023 	.read		= seq_read,
5024 	.read_iter	= seq_read_iter,
5025 	.splice_read	= copy_splice_read,
5026 	.write		= tracing_write_stub,
5027 	.llseek		= tracing_lseek,
5028 	.release	= tracing_release,
5029 };
5030 
5031 static const struct file_operations show_traces_fops = {
5032 	.open		= show_traces_open,
5033 	.read		= seq_read,
5034 	.llseek		= seq_lseek,
5035 	.release	= tracing_seq_release,
5036 };
5037 
5038 static ssize_t
5039 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5040 		     size_t count, loff_t *ppos)
5041 {
5042 	struct trace_array *tr = file_inode(filp)->i_private;
5043 	char *mask_str __free(kfree) = NULL;
5044 	int len;
5045 
5046 	len = snprintf(NULL, 0, "%*pb\n",
5047 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5048 	mask_str = kmalloc(len, GFP_KERNEL);
5049 	if (!mask_str)
5050 		return -ENOMEM;
5051 
5052 	len = snprintf(mask_str, len, "%*pb\n",
5053 		       cpumask_pr_args(tr->tracing_cpumask));
5054 	if (len >= count)
5055 		return -EINVAL;
5056 
5057 	return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5058 }
5059 
5060 int tracing_set_cpumask(struct trace_array *tr,
5061 			cpumask_var_t tracing_cpumask_new)
5062 {
5063 	int cpu;
5064 
5065 	if (!tr)
5066 		return -EINVAL;
5067 
5068 	local_irq_disable();
5069 	arch_spin_lock(&tr->max_lock);
5070 	for_each_tracing_cpu(cpu) {
5071 		/*
5072 		 * Increase/decrease the disabled counter if we are
5073 		 * about to flip a bit in the cpumask:
5074 		 */
5075 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5076 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5077 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5078 #ifdef CONFIG_TRACER_MAX_TRACE
5079 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5080 #endif
5081 		}
5082 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5083 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5084 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5085 #ifdef CONFIG_TRACER_MAX_TRACE
5086 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5087 #endif
5088 		}
5089 	}
5090 	arch_spin_unlock(&tr->max_lock);
5091 	local_irq_enable();
5092 
5093 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5094 
5095 	return 0;
5096 }
5097 
5098 static ssize_t
5099 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5100 		      size_t count, loff_t *ppos)
5101 {
5102 	struct trace_array *tr = file_inode(filp)->i_private;
5103 	cpumask_var_t tracing_cpumask_new;
5104 	int err;
5105 
5106 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5107 		return -EINVAL;
5108 
5109 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5110 		return -ENOMEM;
5111 
5112 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5113 	if (err)
5114 		goto err_free;
5115 
5116 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5117 	if (err)
5118 		goto err_free;
5119 
5120 	free_cpumask_var(tracing_cpumask_new);
5121 
5122 	return count;
5123 
5124 err_free:
5125 	free_cpumask_var(tracing_cpumask_new);
5126 
5127 	return err;
5128 }
5129 
5130 static const struct file_operations tracing_cpumask_fops = {
5131 	.open		= tracing_open_generic_tr,
5132 	.read		= tracing_cpumask_read,
5133 	.write		= tracing_cpumask_write,
5134 	.release	= tracing_release_generic_tr,
5135 	.llseek		= generic_file_llseek,
5136 };
5137 
5138 static int tracing_trace_options_show(struct seq_file *m, void *v)
5139 {
5140 	struct tracer_opt *trace_opts;
5141 	struct trace_array *tr = m->private;
5142 	u32 tracer_flags;
5143 	int i;
5144 
5145 	guard(mutex)(&trace_types_lock);
5146 
5147 	tracer_flags = tr->current_trace->flags->val;
5148 	trace_opts = tr->current_trace->flags->opts;
5149 
5150 	for (i = 0; trace_options[i]; i++) {
5151 		if (tr->trace_flags & (1 << i))
5152 			seq_printf(m, "%s\n", trace_options[i]);
5153 		else
5154 			seq_printf(m, "no%s\n", trace_options[i]);
5155 	}
5156 
5157 	for (i = 0; trace_opts[i].name; i++) {
5158 		if (tracer_flags & trace_opts[i].bit)
5159 			seq_printf(m, "%s\n", trace_opts[i].name);
5160 		else
5161 			seq_printf(m, "no%s\n", trace_opts[i].name);
5162 	}
5163 
5164 	return 0;
5165 }
5166 
5167 static int __set_tracer_option(struct trace_array *tr,
5168 			       struct tracer_flags *tracer_flags,
5169 			       struct tracer_opt *opts, int neg)
5170 {
5171 	struct tracer *trace = tracer_flags->trace;
5172 	int ret;
5173 
5174 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5175 	if (ret)
5176 		return ret;
5177 
5178 	if (neg)
5179 		tracer_flags->val &= ~opts->bit;
5180 	else
5181 		tracer_flags->val |= opts->bit;
5182 	return 0;
5183 }
5184 
5185 /* Try to assign a tracer specific option */
5186 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5187 {
5188 	struct tracer *trace = tr->current_trace;
5189 	struct tracer_flags *tracer_flags = trace->flags;
5190 	struct tracer_opt *opts = NULL;
5191 	int i;
5192 
5193 	for (i = 0; tracer_flags->opts[i].name; i++) {
5194 		opts = &tracer_flags->opts[i];
5195 
5196 		if (strcmp(cmp, opts->name) == 0)
5197 			return __set_tracer_option(tr, trace->flags, opts, neg);
5198 	}
5199 
5200 	return -EINVAL;
5201 }
5202 
5203 /* Some tracers require overwrite to stay enabled */
5204 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5205 {
5206 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5207 		return -1;
5208 
5209 	return 0;
5210 }
5211 
5212 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5213 {
5214 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5215 	    (mask == TRACE_ITER_RECORD_CMD) ||
5216 	    (mask == TRACE_ITER_TRACE_PRINTK) ||
5217 	    (mask == TRACE_ITER_COPY_MARKER))
5218 		lockdep_assert_held(&event_mutex);
5219 
5220 	/* do nothing if flag is already set */
5221 	if (!!(tr->trace_flags & mask) == !!enabled)
5222 		return 0;
5223 
5224 	/* Give the tracer a chance to approve the change */
5225 	if (tr->current_trace->flag_changed)
5226 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5227 			return -EINVAL;
5228 
5229 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5230 		if (enabled) {
5231 			update_printk_trace(tr);
5232 		} else {
5233 			/*
5234 			 * The global_trace cannot clear this.
5235 			 * It's flag only gets cleared if another instance sets it.
5236 			 */
5237 			if (printk_trace == &global_trace)
5238 				return -EINVAL;
5239 			/*
5240 			 * An instance must always have it set.
5241 			 * by default, that's the global_trace instane.
5242 			 */
5243 			if (printk_trace == tr)
5244 				update_printk_trace(&global_trace);
5245 		}
5246 	}
5247 
5248 	if (mask == TRACE_ITER_COPY_MARKER)
5249 		update_marker_trace(tr, enabled);
5250 
5251 	if (enabled)
5252 		tr->trace_flags |= mask;
5253 	else
5254 		tr->trace_flags &= ~mask;
5255 
5256 	if (mask == TRACE_ITER_RECORD_CMD)
5257 		trace_event_enable_cmd_record(enabled);
5258 
5259 	if (mask == TRACE_ITER_RECORD_TGID) {
5260 
5261 		if (trace_alloc_tgid_map() < 0) {
5262 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5263 			return -ENOMEM;
5264 		}
5265 
5266 		trace_event_enable_tgid_record(enabled);
5267 	}
5268 
5269 	if (mask == TRACE_ITER_EVENT_FORK)
5270 		trace_event_follow_fork(tr, enabled);
5271 
5272 	if (mask == TRACE_ITER_FUNC_FORK)
5273 		ftrace_pid_follow_fork(tr, enabled);
5274 
5275 	if (mask == TRACE_ITER_OVERWRITE) {
5276 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5277 #ifdef CONFIG_TRACER_MAX_TRACE
5278 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5279 #endif
5280 	}
5281 
5282 	if (mask == TRACE_ITER_PRINTK) {
5283 		trace_printk_start_stop_comm(enabled);
5284 		trace_printk_control(enabled);
5285 	}
5286 
5287 	return 0;
5288 }
5289 
5290 int trace_set_options(struct trace_array *tr, char *option)
5291 {
5292 	char *cmp;
5293 	int neg = 0;
5294 	int ret;
5295 	size_t orig_len = strlen(option);
5296 	int len;
5297 
5298 	cmp = strstrip(option);
5299 
5300 	len = str_has_prefix(cmp, "no");
5301 	if (len)
5302 		neg = 1;
5303 
5304 	cmp += len;
5305 
5306 	mutex_lock(&event_mutex);
5307 	mutex_lock(&trace_types_lock);
5308 
5309 	ret = match_string(trace_options, -1, cmp);
5310 	/* If no option could be set, test the specific tracer options */
5311 	if (ret < 0)
5312 		ret = set_tracer_option(tr, cmp, neg);
5313 	else
5314 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5315 
5316 	mutex_unlock(&trace_types_lock);
5317 	mutex_unlock(&event_mutex);
5318 
5319 	/*
5320 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5321 	 * turn it back into a space.
5322 	 */
5323 	if (orig_len > strlen(option))
5324 		option[strlen(option)] = ' ';
5325 
5326 	return ret;
5327 }
5328 
5329 static void __init apply_trace_boot_options(void)
5330 {
5331 	char *buf = trace_boot_options_buf;
5332 	char *option;
5333 
5334 	while (true) {
5335 		option = strsep(&buf, ",");
5336 
5337 		if (!option)
5338 			break;
5339 
5340 		if (*option)
5341 			trace_set_options(&global_trace, option);
5342 
5343 		/* Put back the comma to allow this to be called again */
5344 		if (buf)
5345 			*(buf - 1) = ',';
5346 	}
5347 }
5348 
5349 static ssize_t
5350 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5351 			size_t cnt, loff_t *ppos)
5352 {
5353 	struct seq_file *m = filp->private_data;
5354 	struct trace_array *tr = m->private;
5355 	char buf[64];
5356 	int ret;
5357 
5358 	if (cnt >= sizeof(buf))
5359 		return -EINVAL;
5360 
5361 	if (copy_from_user(buf, ubuf, cnt))
5362 		return -EFAULT;
5363 
5364 	buf[cnt] = 0;
5365 
5366 	ret = trace_set_options(tr, buf);
5367 	if (ret < 0)
5368 		return ret;
5369 
5370 	*ppos += cnt;
5371 
5372 	return cnt;
5373 }
5374 
5375 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5376 {
5377 	struct trace_array *tr = inode->i_private;
5378 	int ret;
5379 
5380 	ret = tracing_check_open_get_tr(tr);
5381 	if (ret)
5382 		return ret;
5383 
5384 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5385 	if (ret < 0)
5386 		trace_array_put(tr);
5387 
5388 	return ret;
5389 }
5390 
5391 static const struct file_operations tracing_iter_fops = {
5392 	.open		= tracing_trace_options_open,
5393 	.read		= seq_read,
5394 	.llseek		= seq_lseek,
5395 	.release	= tracing_single_release_tr,
5396 	.write		= tracing_trace_options_write,
5397 };
5398 
5399 static const char readme_msg[] =
5400 	"tracing mini-HOWTO:\n\n"
5401 	"By default tracefs removes all OTH file permission bits.\n"
5402 	"When mounting tracefs an optional group id can be specified\n"
5403 	"which adds the group to every directory and file in tracefs:\n\n"
5404 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5405 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5406 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5407 	" Important files:\n"
5408 	"  trace\t\t\t- The static contents of the buffer\n"
5409 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5410 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5411 	"  current_tracer\t- function and latency tracers\n"
5412 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5413 	"  error_log\t- error log for failed commands (that support it)\n"
5414 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5415 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5416 	"  trace_clock\t\t- change the clock used to order events\n"
5417 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5418 	"      global:   Synced across CPUs but slows tracing down.\n"
5419 	"     counter:   Not a clock, but just an increment\n"
5420 	"      uptime:   Jiffy counter from time of boot\n"
5421 	"        perf:   Same clock that perf events use\n"
5422 #ifdef CONFIG_X86_64
5423 	"     x86-tsc:   TSC cycle counter\n"
5424 #endif
5425 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5426 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5427 	"    absolute:   Absolute (standalone) timestamp\n"
5428 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5429 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5430 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5431 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5432 	"\t\t\t  Remove sub-buffer with rmdir\n"
5433 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5434 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5435 	"\t\t\t  option name\n"
5436 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5437 #ifdef CONFIG_DYNAMIC_FTRACE
5438 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5439 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5440 	"\t\t\t  functions\n"
5441 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5442 	"\t     modules: Can select a group via module\n"
5443 	"\t      Format: :mod:<module-name>\n"
5444 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5445 	"\t    triggers: a command to perform when function is hit\n"
5446 	"\t      Format: <function>:<trigger>[:count]\n"
5447 	"\t     trigger: traceon, traceoff\n"
5448 	"\t\t      enable_event:<system>:<event>\n"
5449 	"\t\t      disable_event:<system>:<event>\n"
5450 #ifdef CONFIG_STACKTRACE
5451 	"\t\t      stacktrace\n"
5452 #endif
5453 #ifdef CONFIG_TRACER_SNAPSHOT
5454 	"\t\t      snapshot\n"
5455 #endif
5456 	"\t\t      dump\n"
5457 	"\t\t      cpudump\n"
5458 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5459 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5460 	"\t     The first one will disable tracing every time do_fault is hit\n"
5461 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5462 	"\t       The first time do trap is hit and it disables tracing, the\n"
5463 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5464 	"\t       the counter will not decrement. It only decrements when the\n"
5465 	"\t       trigger did work\n"
5466 	"\t     To remove trigger without count:\n"
5467 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5468 	"\t     To remove trigger with a count:\n"
5469 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5470 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5471 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5472 	"\t    modules: Can select a group via module command :mod:\n"
5473 	"\t    Does not accept triggers\n"
5474 #endif /* CONFIG_DYNAMIC_FTRACE */
5475 #ifdef CONFIG_FUNCTION_TRACER
5476 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5477 	"\t\t    (function)\n"
5478 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5479 	"\t\t    (function)\n"
5480 #endif
5481 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5482 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5483 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5484 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5485 #endif
5486 #ifdef CONFIG_TRACER_SNAPSHOT
5487 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5488 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5489 	"\t\t\t  information\n"
5490 #endif
5491 #ifdef CONFIG_STACK_TRACER
5492 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5493 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5494 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5495 	"\t\t\t  new trace)\n"
5496 #ifdef CONFIG_DYNAMIC_FTRACE
5497 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5498 	"\t\t\t  traces\n"
5499 #endif
5500 #endif /* CONFIG_STACK_TRACER */
5501 #ifdef CONFIG_DYNAMIC_EVENTS
5502 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5503 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5504 #endif
5505 #ifdef CONFIG_KPROBE_EVENTS
5506 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5507 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5508 #endif
5509 #ifdef CONFIG_UPROBE_EVENTS
5510 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5511 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5512 #endif
5513 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5514     defined(CONFIG_FPROBE_EVENTS)
5515 	"\t  accepts: event-definitions (one definition per line)\n"
5516 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5517 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5518 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5519 #endif
5520 #ifdef CONFIG_FPROBE_EVENTS
5521 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5522 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5523 #endif
5524 #ifdef CONFIG_HIST_TRIGGERS
5525 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5526 #endif
5527 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5528 	"\t           -:[<group>/][<event>]\n"
5529 #ifdef CONFIG_KPROBE_EVENTS
5530 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5531   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5532 #endif
5533 #ifdef CONFIG_UPROBE_EVENTS
5534   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5535 #endif
5536 	"\t     args: <name>=fetcharg[:type]\n"
5537 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5538 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5539 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5540 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5541 	"\t           <argname>[->field[->field|.field...]],\n"
5542 #endif
5543 #else
5544 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5545 #endif
5546 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5547 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5548 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5549 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5550 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5551 #ifdef CONFIG_HIST_TRIGGERS
5552 	"\t    field: <stype> <name>;\n"
5553 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5554 	"\t           [unsigned] char/int/long\n"
5555 #endif
5556 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5557 	"\t            of the <attached-group>/<attached-event>.\n"
5558 #endif
5559 	"  set_event\t\t- Enables events by name written into it\n"
5560 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5561 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5562 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5563 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5564 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5565 	"\t\t\t  events\n"
5566 	"      filter\t\t- If set, only events passing filter are traced\n"
5567 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5568 	"\t\t\t  <event>:\n"
5569 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5570 	"      filter\t\t- If set, only events passing filter are traced\n"
5571 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5572 	"\t    Format: <trigger>[:count][if <filter>]\n"
5573 	"\t   trigger: traceon, traceoff\n"
5574 	"\t            enable_event:<system>:<event>\n"
5575 	"\t            disable_event:<system>:<event>\n"
5576 #ifdef CONFIG_HIST_TRIGGERS
5577 	"\t            enable_hist:<system>:<event>\n"
5578 	"\t            disable_hist:<system>:<event>\n"
5579 #endif
5580 #ifdef CONFIG_STACKTRACE
5581 	"\t\t    stacktrace\n"
5582 #endif
5583 #ifdef CONFIG_TRACER_SNAPSHOT
5584 	"\t\t    snapshot\n"
5585 #endif
5586 #ifdef CONFIG_HIST_TRIGGERS
5587 	"\t\t    hist (see below)\n"
5588 #endif
5589 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5590 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5591 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5592 	"\t                  events/block/block_unplug/trigger\n"
5593 	"\t   The first disables tracing every time block_unplug is hit.\n"
5594 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5595 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5596 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5597 	"\t   Like function triggers, the counter is only decremented if it\n"
5598 	"\t    enabled or disabled tracing.\n"
5599 	"\t   To remove a trigger without a count:\n"
5600 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5601 	"\t   To remove a trigger with a count:\n"
5602 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5603 	"\t   Filters can be ignored when removing a trigger.\n"
5604 #ifdef CONFIG_HIST_TRIGGERS
5605 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5606 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5607 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5608 	"\t            [:values=<field1[,field2,...]>]\n"
5609 	"\t            [:sort=<field1[,field2,...]>]\n"
5610 	"\t            [:size=#entries]\n"
5611 	"\t            [:pause][:continue][:clear]\n"
5612 	"\t            [:name=histname1]\n"
5613 	"\t            [:nohitcount]\n"
5614 	"\t            [:<handler>.<action>]\n"
5615 	"\t            [if <filter>]\n\n"
5616 	"\t    Note, special fields can be used as well:\n"
5617 	"\t            common_timestamp - to record current timestamp\n"
5618 	"\t            common_cpu - to record the CPU the event happened on\n"
5619 	"\n"
5620 	"\t    A hist trigger variable can be:\n"
5621 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5622 	"\t        - a reference to another variable e.g. y=$x,\n"
5623 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5624 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5625 	"\n"
5626 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5627 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5628 	"\t    variable reference, field or numeric literal.\n"
5629 	"\n"
5630 	"\t    When a matching event is hit, an entry is added to a hash\n"
5631 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5632 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5633 	"\t    correspond to fields in the event's format description.  Keys\n"
5634 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5635 	"\t    Compound keys consisting of up to two fields can be specified\n"
5636 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5637 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5638 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5639 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5640 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5641 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5642 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5643 	"\t    its histogram data will be shared with other triggers of the\n"
5644 	"\t    same name, and trigger hits will update this common data.\n\n"
5645 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5646 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5647 	"\t    triggers attached to an event, there will be a table for each\n"
5648 	"\t    trigger in the output.  The table displayed for a named\n"
5649 	"\t    trigger will be the same as any other instance having the\n"
5650 	"\t    same name.  The default format used to display a given field\n"
5651 	"\t    can be modified by appending any of the following modifiers\n"
5652 	"\t    to the field name, as applicable:\n\n"
5653 	"\t            .hex        display a number as a hex value\n"
5654 	"\t            .sym        display an address as a symbol\n"
5655 	"\t            .sym-offset display an address as a symbol and offset\n"
5656 	"\t            .execname   display a common_pid as a program name\n"
5657 	"\t            .syscall    display a syscall id as a syscall name\n"
5658 	"\t            .log2       display log2 value rather than raw number\n"
5659 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5660 	"\t            .usecs      display a common_timestamp in microseconds\n"
5661 	"\t            .percent    display a number of percentage value\n"
5662 	"\t            .graph      display a bar-graph of a value\n\n"
5663 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5664 	"\t    trigger or to start a hist trigger but not log any events\n"
5665 	"\t    until told to do so.  'continue' can be used to start or\n"
5666 	"\t    restart a paused hist trigger.\n\n"
5667 	"\t    The 'clear' parameter will clear the contents of a running\n"
5668 	"\t    hist trigger and leave its current paused/active state\n"
5669 	"\t    unchanged.\n\n"
5670 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5671 	"\t    raw hitcount in the histogram.\n\n"
5672 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5673 	"\t    have one event conditionally start and stop another event's\n"
5674 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5675 	"\t    the enable_event and disable_event triggers.\n\n"
5676 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5677 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5678 	"\t        <handler>.<action>\n\n"
5679 	"\t    The available handlers are:\n\n"
5680 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5681 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5682 	"\t        onchange(var)            - invoke action if var changes\n\n"
5683 	"\t    The available actions are:\n\n"
5684 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5685 	"\t        save(field,...)                      - save current event fields\n"
5686 #ifdef CONFIG_TRACER_SNAPSHOT
5687 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5688 #endif
5689 #ifdef CONFIG_SYNTH_EVENTS
5690 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5691 	"\t  Write into this file to define/undefine new synthetic events.\n"
5692 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5693 #endif
5694 #endif
5695 ;
5696 
5697 static ssize_t
5698 tracing_readme_read(struct file *filp, char __user *ubuf,
5699 		       size_t cnt, loff_t *ppos)
5700 {
5701 	return simple_read_from_buffer(ubuf, cnt, ppos,
5702 					readme_msg, strlen(readme_msg));
5703 }
5704 
5705 static const struct file_operations tracing_readme_fops = {
5706 	.open		= tracing_open_generic,
5707 	.read		= tracing_readme_read,
5708 	.llseek		= generic_file_llseek,
5709 };
5710 
5711 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5712 static union trace_eval_map_item *
5713 update_eval_map(union trace_eval_map_item *ptr)
5714 {
5715 	if (!ptr->map.eval_string) {
5716 		if (ptr->tail.next) {
5717 			ptr = ptr->tail.next;
5718 			/* Set ptr to the next real item (skip head) */
5719 			ptr++;
5720 		} else
5721 			return NULL;
5722 	}
5723 	return ptr;
5724 }
5725 
5726 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5727 {
5728 	union trace_eval_map_item *ptr = v;
5729 
5730 	/*
5731 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5732 	 * This really should never happen.
5733 	 */
5734 	(*pos)++;
5735 	ptr = update_eval_map(ptr);
5736 	if (WARN_ON_ONCE(!ptr))
5737 		return NULL;
5738 
5739 	ptr++;
5740 	ptr = update_eval_map(ptr);
5741 
5742 	return ptr;
5743 }
5744 
5745 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5746 {
5747 	union trace_eval_map_item *v;
5748 	loff_t l = 0;
5749 
5750 	mutex_lock(&trace_eval_mutex);
5751 
5752 	v = trace_eval_maps;
5753 	if (v)
5754 		v++;
5755 
5756 	while (v && l < *pos) {
5757 		v = eval_map_next(m, v, &l);
5758 	}
5759 
5760 	return v;
5761 }
5762 
5763 static void eval_map_stop(struct seq_file *m, void *v)
5764 {
5765 	mutex_unlock(&trace_eval_mutex);
5766 }
5767 
5768 static int eval_map_show(struct seq_file *m, void *v)
5769 {
5770 	union trace_eval_map_item *ptr = v;
5771 
5772 	seq_printf(m, "%s %ld (%s)\n",
5773 		   ptr->map.eval_string, ptr->map.eval_value,
5774 		   ptr->map.system);
5775 
5776 	return 0;
5777 }
5778 
5779 static const struct seq_operations tracing_eval_map_seq_ops = {
5780 	.start		= eval_map_start,
5781 	.next		= eval_map_next,
5782 	.stop		= eval_map_stop,
5783 	.show		= eval_map_show,
5784 };
5785 
5786 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5787 {
5788 	int ret;
5789 
5790 	ret = tracing_check_open_get_tr(NULL);
5791 	if (ret)
5792 		return ret;
5793 
5794 	return seq_open(filp, &tracing_eval_map_seq_ops);
5795 }
5796 
5797 static const struct file_operations tracing_eval_map_fops = {
5798 	.open		= tracing_eval_map_open,
5799 	.read		= seq_read,
5800 	.llseek		= seq_lseek,
5801 	.release	= seq_release,
5802 };
5803 
5804 static inline union trace_eval_map_item *
5805 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5806 {
5807 	/* Return tail of array given the head */
5808 	return ptr + ptr->head.length + 1;
5809 }
5810 
5811 static void
5812 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5813 			   int len)
5814 {
5815 	struct trace_eval_map **stop;
5816 	struct trace_eval_map **map;
5817 	union trace_eval_map_item *map_array;
5818 	union trace_eval_map_item *ptr;
5819 
5820 	stop = start + len;
5821 
5822 	/*
5823 	 * The trace_eval_maps contains the map plus a head and tail item,
5824 	 * where the head holds the module and length of array, and the
5825 	 * tail holds a pointer to the next list.
5826 	 */
5827 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5828 	if (!map_array) {
5829 		pr_warn("Unable to allocate trace eval mapping\n");
5830 		return;
5831 	}
5832 
5833 	guard(mutex)(&trace_eval_mutex);
5834 
5835 	if (!trace_eval_maps)
5836 		trace_eval_maps = map_array;
5837 	else {
5838 		ptr = trace_eval_maps;
5839 		for (;;) {
5840 			ptr = trace_eval_jmp_to_tail(ptr);
5841 			if (!ptr->tail.next)
5842 				break;
5843 			ptr = ptr->tail.next;
5844 
5845 		}
5846 		ptr->tail.next = map_array;
5847 	}
5848 	map_array->head.mod = mod;
5849 	map_array->head.length = len;
5850 	map_array++;
5851 
5852 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5853 		map_array->map = **map;
5854 		map_array++;
5855 	}
5856 	memset(map_array, 0, sizeof(*map_array));
5857 }
5858 
5859 static void trace_create_eval_file(struct dentry *d_tracer)
5860 {
5861 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5862 			  NULL, &tracing_eval_map_fops);
5863 }
5864 
5865 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5866 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5867 static inline void trace_insert_eval_map_file(struct module *mod,
5868 			      struct trace_eval_map **start, int len) { }
5869 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5870 
5871 static void
5872 trace_event_update_with_eval_map(struct module *mod,
5873 				 struct trace_eval_map **start,
5874 				 int len)
5875 {
5876 	struct trace_eval_map **map;
5877 
5878 	/* Always run sanitizer only if btf_type_tag attr exists. */
5879 	if (len <= 0) {
5880 		if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5881 		      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5882 		      __has_attribute(btf_type_tag)))
5883 			return;
5884 	}
5885 
5886 	map = start;
5887 
5888 	trace_event_update_all(map, len);
5889 
5890 	if (len <= 0)
5891 		return;
5892 
5893 	trace_insert_eval_map_file(mod, start, len);
5894 }
5895 
5896 static ssize_t
5897 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5898 		       size_t cnt, loff_t *ppos)
5899 {
5900 	struct trace_array *tr = filp->private_data;
5901 	char buf[MAX_TRACER_SIZE+2];
5902 	int r;
5903 
5904 	scoped_guard(mutex, &trace_types_lock) {
5905 		r = sprintf(buf, "%s\n", tr->current_trace->name);
5906 	}
5907 
5908 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5909 }
5910 
5911 int tracer_init(struct tracer *t, struct trace_array *tr)
5912 {
5913 	tracing_reset_online_cpus(&tr->array_buffer);
5914 	return t->init(tr);
5915 }
5916 
5917 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5918 {
5919 	int cpu;
5920 
5921 	for_each_tracing_cpu(cpu)
5922 		per_cpu_ptr(buf->data, cpu)->entries = val;
5923 }
5924 
5925 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5926 {
5927 	if (cpu == RING_BUFFER_ALL_CPUS) {
5928 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5929 	} else {
5930 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5931 	}
5932 }
5933 
5934 #ifdef CONFIG_TRACER_MAX_TRACE
5935 /* resize @tr's buffer to the size of @size_tr's entries */
5936 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5937 					struct array_buffer *size_buf, int cpu_id)
5938 {
5939 	int cpu, ret = 0;
5940 
5941 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5942 		for_each_tracing_cpu(cpu) {
5943 			ret = ring_buffer_resize(trace_buf->buffer,
5944 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5945 			if (ret < 0)
5946 				break;
5947 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5948 				per_cpu_ptr(size_buf->data, cpu)->entries;
5949 		}
5950 	} else {
5951 		ret = ring_buffer_resize(trace_buf->buffer,
5952 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5953 		if (ret == 0)
5954 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5955 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5956 	}
5957 
5958 	return ret;
5959 }
5960 #endif /* CONFIG_TRACER_MAX_TRACE */
5961 
5962 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5963 					unsigned long size, int cpu)
5964 {
5965 	int ret;
5966 
5967 	/*
5968 	 * If kernel or user changes the size of the ring buffer
5969 	 * we use the size that was given, and we can forget about
5970 	 * expanding it later.
5971 	 */
5972 	trace_set_ring_buffer_expanded(tr);
5973 
5974 	/* May be called before buffers are initialized */
5975 	if (!tr->array_buffer.buffer)
5976 		return 0;
5977 
5978 	/* Do not allow tracing while resizing ring buffer */
5979 	tracing_stop_tr(tr);
5980 
5981 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5982 	if (ret < 0)
5983 		goto out_start;
5984 
5985 #ifdef CONFIG_TRACER_MAX_TRACE
5986 	if (!tr->allocated_snapshot)
5987 		goto out;
5988 
5989 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5990 	if (ret < 0) {
5991 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5992 						     &tr->array_buffer, cpu);
5993 		if (r < 0) {
5994 			/*
5995 			 * AARGH! We are left with different
5996 			 * size max buffer!!!!
5997 			 * The max buffer is our "snapshot" buffer.
5998 			 * When a tracer needs a snapshot (one of the
5999 			 * latency tracers), it swaps the max buffer
6000 			 * with the saved snap shot. We succeeded to
6001 			 * update the size of the main buffer, but failed to
6002 			 * update the size of the max buffer. But when we tried
6003 			 * to reset the main buffer to the original size, we
6004 			 * failed there too. This is very unlikely to
6005 			 * happen, but if it does, warn and kill all
6006 			 * tracing.
6007 			 */
6008 			WARN_ON(1);
6009 			tracing_disabled = 1;
6010 		}
6011 		goto out_start;
6012 	}
6013 
6014 	update_buffer_entries(&tr->max_buffer, cpu);
6015 
6016  out:
6017 #endif /* CONFIG_TRACER_MAX_TRACE */
6018 
6019 	update_buffer_entries(&tr->array_buffer, cpu);
6020  out_start:
6021 	tracing_start_tr(tr);
6022 	return ret;
6023 }
6024 
6025 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6026 				  unsigned long size, int cpu_id)
6027 {
6028 	guard(mutex)(&trace_types_lock);
6029 
6030 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6031 		/* make sure, this cpu is enabled in the mask */
6032 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6033 			return -EINVAL;
6034 	}
6035 
6036 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6037 }
6038 
6039 struct trace_mod_entry {
6040 	unsigned long	mod_addr;
6041 	char		mod_name[MODULE_NAME_LEN];
6042 };
6043 
6044 struct trace_scratch {
6045 	unsigned int		clock_id;
6046 	unsigned long		text_addr;
6047 	unsigned long		nr_entries;
6048 	struct trace_mod_entry	entries[];
6049 };
6050 
6051 static DEFINE_MUTEX(scratch_mutex);
6052 
6053 static int cmp_mod_entry(const void *key, const void *pivot)
6054 {
6055 	unsigned long addr = (unsigned long)key;
6056 	const struct trace_mod_entry *ent = pivot;
6057 
6058 	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6059 		return 0;
6060 	else
6061 		return addr - ent->mod_addr;
6062 }
6063 
6064 /**
6065  * trace_adjust_address() - Adjust prev boot address to current address.
6066  * @tr: Persistent ring buffer's trace_array.
6067  * @addr: Address in @tr which is adjusted.
6068  */
6069 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6070 {
6071 	struct trace_module_delta *module_delta;
6072 	struct trace_scratch *tscratch;
6073 	struct trace_mod_entry *entry;
6074 	unsigned long raddr;
6075 	int idx = 0, nr_entries;
6076 
6077 	/* If we don't have last boot delta, return the address */
6078 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6079 		return addr;
6080 
6081 	/* tr->module_delta must be protected by rcu. */
6082 	guard(rcu)();
6083 	tscratch = tr->scratch;
6084 	/* if there is no tscrach, module_delta must be NULL. */
6085 	module_delta = READ_ONCE(tr->module_delta);
6086 	if (!module_delta || !tscratch->nr_entries ||
6087 	    tscratch->entries[0].mod_addr > addr) {
6088 		raddr = addr + tr->text_delta;
6089 		return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6090 			is_kernel_rodata(raddr) ? raddr : addr;
6091 	}
6092 
6093 	/* Note that entries must be sorted. */
6094 	nr_entries = tscratch->nr_entries;
6095 	if (nr_entries == 1 ||
6096 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6097 		idx = nr_entries - 1;
6098 	else {
6099 		entry = __inline_bsearch((void *)addr,
6100 				tscratch->entries,
6101 				nr_entries - 1,
6102 				sizeof(tscratch->entries[0]),
6103 				cmp_mod_entry);
6104 		if (entry)
6105 			idx = entry - tscratch->entries;
6106 	}
6107 
6108 	return addr + module_delta->delta[idx];
6109 }
6110 
6111 #ifdef CONFIG_MODULES
6112 static int save_mod(struct module *mod, void *data)
6113 {
6114 	struct trace_array *tr = data;
6115 	struct trace_scratch *tscratch;
6116 	struct trace_mod_entry *entry;
6117 	unsigned int size;
6118 
6119 	tscratch = tr->scratch;
6120 	if (!tscratch)
6121 		return -1;
6122 	size = tr->scratch_size;
6123 
6124 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6125 		return -1;
6126 
6127 	entry = &tscratch->entries[tscratch->nr_entries];
6128 
6129 	tscratch->nr_entries++;
6130 
6131 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6132 	strscpy(entry->mod_name, mod->name);
6133 
6134 	return 0;
6135 }
6136 #else
6137 static int save_mod(struct module *mod, void *data)
6138 {
6139 	return 0;
6140 }
6141 #endif
6142 
6143 static void update_last_data(struct trace_array *tr)
6144 {
6145 	struct trace_module_delta *module_delta;
6146 	struct trace_scratch *tscratch;
6147 
6148 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6149 		return;
6150 
6151 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6152 		return;
6153 
6154 	/* Only if the buffer has previous boot data clear and update it. */
6155 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6156 
6157 	/* Reset the module list and reload them */
6158 	if (tr->scratch) {
6159 		struct trace_scratch *tscratch = tr->scratch;
6160 
6161 		tscratch->clock_id = tr->clock_id;
6162 		memset(tscratch->entries, 0,
6163 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6164 		tscratch->nr_entries = 0;
6165 
6166 		guard(mutex)(&scratch_mutex);
6167 		module_for_each_mod(save_mod, tr);
6168 	}
6169 
6170 	/*
6171 	 * Need to clear all CPU buffers as there cannot be events
6172 	 * from the previous boot mixed with events with this boot
6173 	 * as that will cause a confusing trace. Need to clear all
6174 	 * CPU buffers, even for those that may currently be offline.
6175 	 */
6176 	tracing_reset_all_cpus(&tr->array_buffer);
6177 
6178 	/* Using current data now */
6179 	tr->text_delta = 0;
6180 
6181 	if (!tr->scratch)
6182 		return;
6183 
6184 	tscratch = tr->scratch;
6185 	module_delta = READ_ONCE(tr->module_delta);
6186 	WRITE_ONCE(tr->module_delta, NULL);
6187 	kfree_rcu(module_delta, rcu);
6188 
6189 	/* Set the persistent ring buffer meta data to this address */
6190 	tscratch->text_addr = (unsigned long)_text;
6191 }
6192 
6193 /**
6194  * tracing_update_buffers - used by tracing facility to expand ring buffers
6195  * @tr: The tracing instance
6196  *
6197  * To save on memory when the tracing is never used on a system with it
6198  * configured in. The ring buffers are set to a minimum size. But once
6199  * a user starts to use the tracing facility, then they need to grow
6200  * to their default size.
6201  *
6202  * This function is to be called when a tracer is about to be used.
6203  */
6204 int tracing_update_buffers(struct trace_array *tr)
6205 {
6206 	int ret = 0;
6207 
6208 	guard(mutex)(&trace_types_lock);
6209 
6210 	update_last_data(tr);
6211 
6212 	if (!tr->ring_buffer_expanded)
6213 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6214 						RING_BUFFER_ALL_CPUS);
6215 	return ret;
6216 }
6217 
6218 struct trace_option_dentry;
6219 
6220 static void
6221 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6222 
6223 /*
6224  * Used to clear out the tracer before deletion of an instance.
6225  * Must have trace_types_lock held.
6226  */
6227 static void tracing_set_nop(struct trace_array *tr)
6228 {
6229 	if (tr->current_trace == &nop_trace)
6230 		return;
6231 
6232 	tr->current_trace->enabled--;
6233 
6234 	if (tr->current_trace->reset)
6235 		tr->current_trace->reset(tr);
6236 
6237 	tr->current_trace = &nop_trace;
6238 }
6239 
6240 static bool tracer_options_updated;
6241 
6242 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6243 {
6244 	/* Only enable if the directory has been created already. */
6245 	if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
6246 		return;
6247 
6248 	/* Only create trace option files after update_tracer_options finish */
6249 	if (!tracer_options_updated)
6250 		return;
6251 
6252 	create_trace_option_files(tr, t);
6253 }
6254 
6255 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6256 {
6257 	struct tracer *t;
6258 #ifdef CONFIG_TRACER_MAX_TRACE
6259 	bool had_max_tr;
6260 #endif
6261 	int ret;
6262 
6263 	guard(mutex)(&trace_types_lock);
6264 
6265 	update_last_data(tr);
6266 
6267 	if (!tr->ring_buffer_expanded) {
6268 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6269 						RING_BUFFER_ALL_CPUS);
6270 		if (ret < 0)
6271 			return ret;
6272 		ret = 0;
6273 	}
6274 
6275 	for (t = trace_types; t; t = t->next) {
6276 		if (strcmp(t->name, buf) == 0)
6277 			break;
6278 	}
6279 	if (!t)
6280 		return -EINVAL;
6281 
6282 	if (t == tr->current_trace)
6283 		return 0;
6284 
6285 #ifdef CONFIG_TRACER_SNAPSHOT
6286 	if (t->use_max_tr) {
6287 		local_irq_disable();
6288 		arch_spin_lock(&tr->max_lock);
6289 		ret = tr->cond_snapshot ? -EBUSY : 0;
6290 		arch_spin_unlock(&tr->max_lock);
6291 		local_irq_enable();
6292 		if (ret)
6293 			return ret;
6294 	}
6295 #endif
6296 	/* Some tracers won't work on kernel command line */
6297 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6298 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6299 			t->name);
6300 		return -EINVAL;
6301 	}
6302 
6303 	/* Some tracers are only allowed for the top level buffer */
6304 	if (!trace_ok_for_array(t, tr))
6305 		return -EINVAL;
6306 
6307 	/* If trace pipe files are being read, we can't change the tracer */
6308 	if (tr->trace_ref)
6309 		return -EBUSY;
6310 
6311 	trace_branch_disable();
6312 
6313 	tr->current_trace->enabled--;
6314 
6315 	if (tr->current_trace->reset)
6316 		tr->current_trace->reset(tr);
6317 
6318 #ifdef CONFIG_TRACER_MAX_TRACE
6319 	had_max_tr = tr->current_trace->use_max_tr;
6320 
6321 	/* Current trace needs to be nop_trace before synchronize_rcu */
6322 	tr->current_trace = &nop_trace;
6323 
6324 	if (had_max_tr && !t->use_max_tr) {
6325 		/*
6326 		 * We need to make sure that the update_max_tr sees that
6327 		 * current_trace changed to nop_trace to keep it from
6328 		 * swapping the buffers after we resize it.
6329 		 * The update_max_tr is called from interrupts disabled
6330 		 * so a synchronized_sched() is sufficient.
6331 		 */
6332 		synchronize_rcu();
6333 		free_snapshot(tr);
6334 		tracing_disarm_snapshot(tr);
6335 	}
6336 
6337 	if (!had_max_tr && t->use_max_tr) {
6338 		ret = tracing_arm_snapshot_locked(tr);
6339 		if (ret)
6340 			return ret;
6341 	}
6342 #else
6343 	tr->current_trace = &nop_trace;
6344 #endif
6345 
6346 	if (t->init) {
6347 		ret = tracer_init(t, tr);
6348 		if (ret) {
6349 #ifdef CONFIG_TRACER_MAX_TRACE
6350 			if (t->use_max_tr)
6351 				tracing_disarm_snapshot(tr);
6352 #endif
6353 			return ret;
6354 		}
6355 	}
6356 
6357 	tr->current_trace = t;
6358 	tr->current_trace->enabled++;
6359 	trace_branch_enable(tr);
6360 
6361 	return 0;
6362 }
6363 
6364 static ssize_t
6365 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6366 			size_t cnt, loff_t *ppos)
6367 {
6368 	struct trace_array *tr = filp->private_data;
6369 	char buf[MAX_TRACER_SIZE+1];
6370 	char *name;
6371 	size_t ret;
6372 	int err;
6373 
6374 	ret = cnt;
6375 
6376 	if (cnt > MAX_TRACER_SIZE)
6377 		cnt = MAX_TRACER_SIZE;
6378 
6379 	if (copy_from_user(buf, ubuf, cnt))
6380 		return -EFAULT;
6381 
6382 	buf[cnt] = 0;
6383 
6384 	name = strim(buf);
6385 
6386 	err = tracing_set_tracer(tr, name);
6387 	if (err)
6388 		return err;
6389 
6390 	*ppos += ret;
6391 
6392 	return ret;
6393 }
6394 
6395 static ssize_t
6396 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6397 		   size_t cnt, loff_t *ppos)
6398 {
6399 	char buf[64];
6400 	int r;
6401 
6402 	r = snprintf(buf, sizeof(buf), "%ld\n",
6403 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6404 	if (r > sizeof(buf))
6405 		r = sizeof(buf);
6406 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6407 }
6408 
6409 static ssize_t
6410 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6411 		    size_t cnt, loff_t *ppos)
6412 {
6413 	unsigned long val;
6414 	int ret;
6415 
6416 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6417 	if (ret)
6418 		return ret;
6419 
6420 	*ptr = val * 1000;
6421 
6422 	return cnt;
6423 }
6424 
6425 static ssize_t
6426 tracing_thresh_read(struct file *filp, char __user *ubuf,
6427 		    size_t cnt, loff_t *ppos)
6428 {
6429 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6430 }
6431 
6432 static ssize_t
6433 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6434 		     size_t cnt, loff_t *ppos)
6435 {
6436 	struct trace_array *tr = filp->private_data;
6437 	int ret;
6438 
6439 	guard(mutex)(&trace_types_lock);
6440 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6441 	if (ret < 0)
6442 		return ret;
6443 
6444 	if (tr->current_trace->update_thresh) {
6445 		ret = tr->current_trace->update_thresh(tr);
6446 		if (ret < 0)
6447 			return ret;
6448 	}
6449 
6450 	return cnt;
6451 }
6452 
6453 #ifdef CONFIG_TRACER_MAX_TRACE
6454 
6455 static ssize_t
6456 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6457 		     size_t cnt, loff_t *ppos)
6458 {
6459 	struct trace_array *tr = filp->private_data;
6460 
6461 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6462 }
6463 
6464 static ssize_t
6465 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6466 		      size_t cnt, loff_t *ppos)
6467 {
6468 	struct trace_array *tr = filp->private_data;
6469 
6470 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6471 }
6472 
6473 #endif
6474 
6475 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6476 {
6477 	if (cpu == RING_BUFFER_ALL_CPUS) {
6478 		if (cpumask_empty(tr->pipe_cpumask)) {
6479 			cpumask_setall(tr->pipe_cpumask);
6480 			return 0;
6481 		}
6482 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6483 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6484 		return 0;
6485 	}
6486 	return -EBUSY;
6487 }
6488 
6489 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6490 {
6491 	if (cpu == RING_BUFFER_ALL_CPUS) {
6492 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6493 		cpumask_clear(tr->pipe_cpumask);
6494 	} else {
6495 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6496 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6497 	}
6498 }
6499 
6500 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6501 {
6502 	struct trace_array *tr = inode->i_private;
6503 	struct trace_iterator *iter;
6504 	int cpu;
6505 	int ret;
6506 
6507 	ret = tracing_check_open_get_tr(tr);
6508 	if (ret)
6509 		return ret;
6510 
6511 	guard(mutex)(&trace_types_lock);
6512 	cpu = tracing_get_cpu(inode);
6513 	ret = open_pipe_on_cpu(tr, cpu);
6514 	if (ret)
6515 		goto fail_pipe_on_cpu;
6516 
6517 	/* create a buffer to store the information to pass to userspace */
6518 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6519 	if (!iter) {
6520 		ret = -ENOMEM;
6521 		goto fail_alloc_iter;
6522 	}
6523 
6524 	trace_seq_init(&iter->seq);
6525 	iter->trace = tr->current_trace;
6526 
6527 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6528 		ret = -ENOMEM;
6529 		goto fail;
6530 	}
6531 
6532 	/* trace pipe does not show start of buffer */
6533 	cpumask_setall(iter->started);
6534 
6535 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6536 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6537 
6538 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6539 	if (trace_clocks[tr->clock_id].in_ns)
6540 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6541 
6542 	iter->tr = tr;
6543 	iter->array_buffer = &tr->array_buffer;
6544 	iter->cpu_file = cpu;
6545 	mutex_init(&iter->mutex);
6546 	filp->private_data = iter;
6547 
6548 	if (iter->trace->pipe_open)
6549 		iter->trace->pipe_open(iter);
6550 
6551 	nonseekable_open(inode, filp);
6552 
6553 	tr->trace_ref++;
6554 
6555 	return ret;
6556 
6557 fail:
6558 	kfree(iter);
6559 fail_alloc_iter:
6560 	close_pipe_on_cpu(tr, cpu);
6561 fail_pipe_on_cpu:
6562 	__trace_array_put(tr);
6563 	return ret;
6564 }
6565 
6566 static int tracing_release_pipe(struct inode *inode, struct file *file)
6567 {
6568 	struct trace_iterator *iter = file->private_data;
6569 	struct trace_array *tr = inode->i_private;
6570 
6571 	scoped_guard(mutex, &trace_types_lock) {
6572 		tr->trace_ref--;
6573 
6574 		if (iter->trace->pipe_close)
6575 			iter->trace->pipe_close(iter);
6576 		close_pipe_on_cpu(tr, iter->cpu_file);
6577 	}
6578 
6579 	free_trace_iter_content(iter);
6580 	kfree(iter);
6581 
6582 	trace_array_put(tr);
6583 
6584 	return 0;
6585 }
6586 
6587 static __poll_t
6588 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6589 {
6590 	struct trace_array *tr = iter->tr;
6591 
6592 	/* Iterators are static, they should be filled or empty */
6593 	if (trace_buffer_iter(iter, iter->cpu_file))
6594 		return EPOLLIN | EPOLLRDNORM;
6595 
6596 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6597 		/*
6598 		 * Always select as readable when in blocking mode
6599 		 */
6600 		return EPOLLIN | EPOLLRDNORM;
6601 	else
6602 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6603 					     filp, poll_table, iter->tr->buffer_percent);
6604 }
6605 
6606 static __poll_t
6607 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6608 {
6609 	struct trace_iterator *iter = filp->private_data;
6610 
6611 	return trace_poll(iter, filp, poll_table);
6612 }
6613 
6614 /* Must be called with iter->mutex held. */
6615 static int tracing_wait_pipe(struct file *filp)
6616 {
6617 	struct trace_iterator *iter = filp->private_data;
6618 	int ret;
6619 
6620 	while (trace_empty(iter)) {
6621 
6622 		if ((filp->f_flags & O_NONBLOCK)) {
6623 			return -EAGAIN;
6624 		}
6625 
6626 		/*
6627 		 * We block until we read something and tracing is disabled.
6628 		 * We still block if tracing is disabled, but we have never
6629 		 * read anything. This allows a user to cat this file, and
6630 		 * then enable tracing. But after we have read something,
6631 		 * we give an EOF when tracing is again disabled.
6632 		 *
6633 		 * iter->pos will be 0 if we haven't read anything.
6634 		 */
6635 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6636 			break;
6637 
6638 		mutex_unlock(&iter->mutex);
6639 
6640 		ret = wait_on_pipe(iter, 0);
6641 
6642 		mutex_lock(&iter->mutex);
6643 
6644 		if (ret)
6645 			return ret;
6646 	}
6647 
6648 	return 1;
6649 }
6650 
6651 static bool update_last_data_if_empty(struct trace_array *tr)
6652 {
6653 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6654 		return false;
6655 
6656 	if (!ring_buffer_empty(tr->array_buffer.buffer))
6657 		return false;
6658 
6659 	/*
6660 	 * If the buffer contains the last boot data and all per-cpu
6661 	 * buffers are empty, reset it from the kernel side.
6662 	 */
6663 	update_last_data(tr);
6664 	return true;
6665 }
6666 
6667 /*
6668  * Consumer reader.
6669  */
6670 static ssize_t
6671 tracing_read_pipe(struct file *filp, char __user *ubuf,
6672 		  size_t cnt, loff_t *ppos)
6673 {
6674 	struct trace_iterator *iter = filp->private_data;
6675 	ssize_t sret;
6676 
6677 	/*
6678 	 * Avoid more than one consumer on a single file descriptor
6679 	 * This is just a matter of traces coherency, the ring buffer itself
6680 	 * is protected.
6681 	 */
6682 	guard(mutex)(&iter->mutex);
6683 
6684 	/* return any leftover data */
6685 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6686 	if (sret != -EBUSY)
6687 		return sret;
6688 
6689 	trace_seq_init(&iter->seq);
6690 
6691 	if (iter->trace->read) {
6692 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6693 		if (sret)
6694 			return sret;
6695 	}
6696 
6697 waitagain:
6698 	if (update_last_data_if_empty(iter->tr))
6699 		return 0;
6700 
6701 	sret = tracing_wait_pipe(filp);
6702 	if (sret <= 0)
6703 		return sret;
6704 
6705 	/* stop when tracing is finished */
6706 	if (trace_empty(iter))
6707 		return 0;
6708 
6709 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6710 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6711 
6712 	/* reset all but tr, trace, and overruns */
6713 	trace_iterator_reset(iter);
6714 	cpumask_clear(iter->started);
6715 	trace_seq_init(&iter->seq);
6716 
6717 	trace_event_read_lock();
6718 	trace_access_lock(iter->cpu_file);
6719 	while (trace_find_next_entry_inc(iter) != NULL) {
6720 		enum print_line_t ret;
6721 		int save_len = iter->seq.seq.len;
6722 
6723 		ret = print_trace_line(iter);
6724 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6725 			/*
6726 			 * If one print_trace_line() fills entire trace_seq in one shot,
6727 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6728 			 * In this case, we need to consume it, otherwise, loop will peek
6729 			 * this event next time, resulting in an infinite loop.
6730 			 */
6731 			if (save_len == 0) {
6732 				iter->seq.full = 0;
6733 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6734 				trace_consume(iter);
6735 				break;
6736 			}
6737 
6738 			/* In other cases, don't print partial lines */
6739 			iter->seq.seq.len = save_len;
6740 			break;
6741 		}
6742 		if (ret != TRACE_TYPE_NO_CONSUME)
6743 			trace_consume(iter);
6744 
6745 		if (trace_seq_used(&iter->seq) >= cnt)
6746 			break;
6747 
6748 		/*
6749 		 * Setting the full flag means we reached the trace_seq buffer
6750 		 * size and we should leave by partial output condition above.
6751 		 * One of the trace_seq_* functions is not used properly.
6752 		 */
6753 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6754 			  iter->ent->type);
6755 	}
6756 	trace_access_unlock(iter->cpu_file);
6757 	trace_event_read_unlock();
6758 
6759 	/* Now copy what we have to the user */
6760 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6761 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6762 		trace_seq_init(&iter->seq);
6763 
6764 	/*
6765 	 * If there was nothing to send to user, in spite of consuming trace
6766 	 * entries, go back to wait for more entries.
6767 	 */
6768 	if (sret == -EBUSY)
6769 		goto waitagain;
6770 
6771 	return sret;
6772 }
6773 
6774 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6775 				     unsigned int idx)
6776 {
6777 	__free_page(spd->pages[idx]);
6778 }
6779 
6780 static size_t
6781 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6782 {
6783 	size_t count;
6784 	int save_len;
6785 	int ret;
6786 
6787 	/* Seq buffer is page-sized, exactly what we need. */
6788 	for (;;) {
6789 		save_len = iter->seq.seq.len;
6790 		ret = print_trace_line(iter);
6791 
6792 		if (trace_seq_has_overflowed(&iter->seq)) {
6793 			iter->seq.seq.len = save_len;
6794 			break;
6795 		}
6796 
6797 		/*
6798 		 * This should not be hit, because it should only
6799 		 * be set if the iter->seq overflowed. But check it
6800 		 * anyway to be safe.
6801 		 */
6802 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6803 			iter->seq.seq.len = save_len;
6804 			break;
6805 		}
6806 
6807 		count = trace_seq_used(&iter->seq) - save_len;
6808 		if (rem < count) {
6809 			rem = 0;
6810 			iter->seq.seq.len = save_len;
6811 			break;
6812 		}
6813 
6814 		if (ret != TRACE_TYPE_NO_CONSUME)
6815 			trace_consume(iter);
6816 		rem -= count;
6817 		if (!trace_find_next_entry_inc(iter))	{
6818 			rem = 0;
6819 			iter->ent = NULL;
6820 			break;
6821 		}
6822 	}
6823 
6824 	return rem;
6825 }
6826 
6827 static ssize_t tracing_splice_read_pipe(struct file *filp,
6828 					loff_t *ppos,
6829 					struct pipe_inode_info *pipe,
6830 					size_t len,
6831 					unsigned int flags)
6832 {
6833 	struct page *pages_def[PIPE_DEF_BUFFERS];
6834 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6835 	struct trace_iterator *iter = filp->private_data;
6836 	struct splice_pipe_desc spd = {
6837 		.pages		= pages_def,
6838 		.partial	= partial_def,
6839 		.nr_pages	= 0, /* This gets updated below. */
6840 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6841 		.ops		= &default_pipe_buf_ops,
6842 		.spd_release	= tracing_spd_release_pipe,
6843 	};
6844 	ssize_t ret;
6845 	size_t rem;
6846 	unsigned int i;
6847 
6848 	if (splice_grow_spd(pipe, &spd))
6849 		return -ENOMEM;
6850 
6851 	mutex_lock(&iter->mutex);
6852 
6853 	if (iter->trace->splice_read) {
6854 		ret = iter->trace->splice_read(iter, filp,
6855 					       ppos, pipe, len, flags);
6856 		if (ret)
6857 			goto out_err;
6858 	}
6859 
6860 	ret = tracing_wait_pipe(filp);
6861 	if (ret <= 0)
6862 		goto out_err;
6863 
6864 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6865 		ret = -EFAULT;
6866 		goto out_err;
6867 	}
6868 
6869 	trace_event_read_lock();
6870 	trace_access_lock(iter->cpu_file);
6871 
6872 	/* Fill as many pages as possible. */
6873 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6874 		spd.pages[i] = alloc_page(GFP_KERNEL);
6875 		if (!spd.pages[i])
6876 			break;
6877 
6878 		rem = tracing_fill_pipe_page(rem, iter);
6879 
6880 		/* Copy the data into the page, so we can start over. */
6881 		ret = trace_seq_to_buffer(&iter->seq,
6882 					  page_address(spd.pages[i]),
6883 					  min((size_t)trace_seq_used(&iter->seq),
6884 						  (size_t)PAGE_SIZE));
6885 		if (ret < 0) {
6886 			__free_page(spd.pages[i]);
6887 			break;
6888 		}
6889 		spd.partial[i].offset = 0;
6890 		spd.partial[i].len = ret;
6891 
6892 		trace_seq_init(&iter->seq);
6893 	}
6894 
6895 	trace_access_unlock(iter->cpu_file);
6896 	trace_event_read_unlock();
6897 	mutex_unlock(&iter->mutex);
6898 
6899 	spd.nr_pages = i;
6900 
6901 	if (i)
6902 		ret = splice_to_pipe(pipe, &spd);
6903 	else
6904 		ret = 0;
6905 out:
6906 	splice_shrink_spd(&spd);
6907 	return ret;
6908 
6909 out_err:
6910 	mutex_unlock(&iter->mutex);
6911 	goto out;
6912 }
6913 
6914 static ssize_t
6915 tracing_entries_read(struct file *filp, char __user *ubuf,
6916 		     size_t cnt, loff_t *ppos)
6917 {
6918 	struct inode *inode = file_inode(filp);
6919 	struct trace_array *tr = inode->i_private;
6920 	int cpu = tracing_get_cpu(inode);
6921 	char buf[64];
6922 	int r = 0;
6923 	ssize_t ret;
6924 
6925 	mutex_lock(&trace_types_lock);
6926 
6927 	if (cpu == RING_BUFFER_ALL_CPUS) {
6928 		int cpu, buf_size_same;
6929 		unsigned long size;
6930 
6931 		size = 0;
6932 		buf_size_same = 1;
6933 		/* check if all cpu sizes are same */
6934 		for_each_tracing_cpu(cpu) {
6935 			/* fill in the size from first enabled cpu */
6936 			if (size == 0)
6937 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6938 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6939 				buf_size_same = 0;
6940 				break;
6941 			}
6942 		}
6943 
6944 		if (buf_size_same) {
6945 			if (!tr->ring_buffer_expanded)
6946 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6947 					    size >> 10,
6948 					    trace_buf_size >> 10);
6949 			else
6950 				r = sprintf(buf, "%lu\n", size >> 10);
6951 		} else
6952 			r = sprintf(buf, "X\n");
6953 	} else
6954 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6955 
6956 	mutex_unlock(&trace_types_lock);
6957 
6958 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6959 	return ret;
6960 }
6961 
6962 static ssize_t
6963 tracing_entries_write(struct file *filp, const char __user *ubuf,
6964 		      size_t cnt, loff_t *ppos)
6965 {
6966 	struct inode *inode = file_inode(filp);
6967 	struct trace_array *tr = inode->i_private;
6968 	unsigned long val;
6969 	int ret;
6970 
6971 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6972 	if (ret)
6973 		return ret;
6974 
6975 	/* must have at least 1 entry */
6976 	if (!val)
6977 		return -EINVAL;
6978 
6979 	/* value is in KB */
6980 	val <<= 10;
6981 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6982 	if (ret < 0)
6983 		return ret;
6984 
6985 	*ppos += cnt;
6986 
6987 	return cnt;
6988 }
6989 
6990 static ssize_t
6991 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6992 				size_t cnt, loff_t *ppos)
6993 {
6994 	struct trace_array *tr = filp->private_data;
6995 	char buf[64];
6996 	int r, cpu;
6997 	unsigned long size = 0, expanded_size = 0;
6998 
6999 	mutex_lock(&trace_types_lock);
7000 	for_each_tracing_cpu(cpu) {
7001 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7002 		if (!tr->ring_buffer_expanded)
7003 			expanded_size += trace_buf_size >> 10;
7004 	}
7005 	if (tr->ring_buffer_expanded)
7006 		r = sprintf(buf, "%lu\n", size);
7007 	else
7008 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7009 	mutex_unlock(&trace_types_lock);
7010 
7011 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7012 }
7013 
7014 #define LAST_BOOT_HEADER ((void *)1)
7015 
7016 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7017 {
7018 	struct trace_array *tr = m->private;
7019 	struct trace_scratch *tscratch = tr->scratch;
7020 	unsigned int index = *pos;
7021 
7022 	(*pos)++;
7023 
7024 	if (*pos == 1)
7025 		return LAST_BOOT_HEADER;
7026 
7027 	/* Only show offsets of the last boot data */
7028 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7029 		return NULL;
7030 
7031 	/* *pos 0 is for the header, 1 is for the first module */
7032 	index--;
7033 
7034 	if (index >= tscratch->nr_entries)
7035 		return NULL;
7036 
7037 	return &tscratch->entries[index];
7038 }
7039 
7040 static void *l_start(struct seq_file *m, loff_t *pos)
7041 {
7042 	mutex_lock(&scratch_mutex);
7043 
7044 	return l_next(m, NULL, pos);
7045 }
7046 
7047 static void l_stop(struct seq_file *m, void *p)
7048 {
7049 	mutex_unlock(&scratch_mutex);
7050 }
7051 
7052 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7053 {
7054 	struct trace_scratch *tscratch = tr->scratch;
7055 
7056 	/*
7057 	 * Do not leak KASLR address. This only shows the KASLR address of
7058 	 * the last boot. When the ring buffer is started, the LAST_BOOT
7059 	 * flag gets cleared, and this should only report "current".
7060 	 * Otherwise it shows the KASLR address from the previous boot which
7061 	 * should not be the same as the current boot.
7062 	 */
7063 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7064 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7065 	else
7066 		seq_puts(m, "# Current\n");
7067 }
7068 
7069 static int l_show(struct seq_file *m, void *v)
7070 {
7071 	struct trace_array *tr = m->private;
7072 	struct trace_mod_entry *entry = v;
7073 
7074 	if (v == LAST_BOOT_HEADER) {
7075 		show_last_boot_header(m, tr);
7076 		return 0;
7077 	}
7078 
7079 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7080 	return 0;
7081 }
7082 
7083 static const struct seq_operations last_boot_seq_ops = {
7084 	.start		= l_start,
7085 	.next		= l_next,
7086 	.stop		= l_stop,
7087 	.show		= l_show,
7088 };
7089 
7090 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7091 {
7092 	struct trace_array *tr = inode->i_private;
7093 	struct seq_file *m;
7094 	int ret;
7095 
7096 	ret = tracing_check_open_get_tr(tr);
7097 	if (ret)
7098 		return ret;
7099 
7100 	ret = seq_open(file, &last_boot_seq_ops);
7101 	if (ret) {
7102 		trace_array_put(tr);
7103 		return ret;
7104 	}
7105 
7106 	m = file->private_data;
7107 	m->private = tr;
7108 
7109 	return 0;
7110 }
7111 
7112 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7113 {
7114 	struct trace_array *tr = inode->i_private;
7115 	int cpu = tracing_get_cpu(inode);
7116 	int ret;
7117 
7118 	ret = tracing_check_open_get_tr(tr);
7119 	if (ret)
7120 		return ret;
7121 
7122 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7123 	if (ret < 0)
7124 		__trace_array_put(tr);
7125 	return ret;
7126 }
7127 
7128 static ssize_t
7129 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7130 			  size_t cnt, loff_t *ppos)
7131 {
7132 	/*
7133 	 * There is no need to read what the user has written, this function
7134 	 * is just to make sure that there is no error when "echo" is used
7135 	 */
7136 
7137 	*ppos += cnt;
7138 
7139 	return cnt;
7140 }
7141 
7142 static int
7143 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7144 {
7145 	struct trace_array *tr = inode->i_private;
7146 
7147 	/* disable tracing ? */
7148 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7149 		tracer_tracing_off(tr);
7150 	/* resize the ring buffer to 0 */
7151 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7152 
7153 	trace_array_put(tr);
7154 
7155 	return 0;
7156 }
7157 
7158 #define TRACE_MARKER_MAX_SIZE		4096
7159 
7160 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
7161 				      size_t cnt, unsigned long ip)
7162 {
7163 	struct ring_buffer_event *event;
7164 	enum event_trigger_type tt = ETT_NONE;
7165 	struct trace_buffer *buffer;
7166 	struct print_entry *entry;
7167 	int meta_size;
7168 	ssize_t written;
7169 	size_t size;
7170 
7171 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7172  again:
7173 	size = cnt + meta_size;
7174 
7175 	buffer = tr->array_buffer.buffer;
7176 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7177 					    tracing_gen_ctx());
7178 	if (unlikely(!event)) {
7179 		/*
7180 		 * If the size was greater than what was allowed, then
7181 		 * make it smaller and try again.
7182 		 */
7183 		if (size > ring_buffer_max_event_size(buffer)) {
7184 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7185 			/* The above should only happen once */
7186 			if (WARN_ON_ONCE(cnt + meta_size == size))
7187 				return -EBADF;
7188 			goto again;
7189 		}
7190 
7191 		/* Ring buffer disabled, return as if not open for write */
7192 		return -EBADF;
7193 	}
7194 
7195 	entry = ring_buffer_event_data(event);
7196 	entry->ip = ip;
7197 	memcpy(&entry->buf, buf, cnt);
7198 	written = cnt;
7199 
7200 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7201 		/* do not add \n before testing triggers, but add \0 */
7202 		entry->buf[cnt] = '\0';
7203 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7204 	}
7205 
7206 	if (entry->buf[cnt - 1] != '\n') {
7207 		entry->buf[cnt] = '\n';
7208 		entry->buf[cnt + 1] = '\0';
7209 	} else
7210 		entry->buf[cnt] = '\0';
7211 
7212 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7213 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7214 	__buffer_unlock_commit(buffer, event);
7215 
7216 	if (tt)
7217 		event_triggers_post_call(tr->trace_marker_file, tt);
7218 
7219 	return written;
7220 }
7221 
7222 struct trace_user_buf {
7223 	char		*buf;
7224 };
7225 
7226 struct trace_user_buf_info {
7227 	struct trace_user_buf __percpu	*tbuf;
7228 	int				ref;
7229 };
7230 
7231 
7232 static DEFINE_MUTEX(trace_user_buffer_mutex);
7233 static struct trace_user_buf_info *trace_user_buffer;
7234 
7235 static void trace_user_fault_buffer_free(struct trace_user_buf_info *tinfo)
7236 {
7237 	char *buf;
7238 	int cpu;
7239 
7240 	for_each_possible_cpu(cpu) {
7241 		buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7242 		kfree(buf);
7243 	}
7244 	free_percpu(tinfo->tbuf);
7245 	kfree(tinfo);
7246 }
7247 
7248 static int trace_user_fault_buffer_enable(void)
7249 {
7250 	struct trace_user_buf_info *tinfo;
7251 	char *buf;
7252 	int cpu;
7253 
7254 	guard(mutex)(&trace_user_buffer_mutex);
7255 
7256 	if (trace_user_buffer) {
7257 		trace_user_buffer->ref++;
7258 		return 0;
7259 	}
7260 
7261 	tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
7262 	if (!tinfo)
7263 		return -ENOMEM;
7264 
7265 	tinfo->tbuf = alloc_percpu(struct trace_user_buf);
7266 	if (!tinfo->tbuf) {
7267 		kfree(tinfo);
7268 		return -ENOMEM;
7269 	}
7270 
7271 	tinfo->ref = 1;
7272 
7273 	/* Clear each buffer in case of error */
7274 	for_each_possible_cpu(cpu) {
7275 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
7276 	}
7277 
7278 	for_each_possible_cpu(cpu) {
7279 		buf = kmalloc_node(TRACE_MARKER_MAX_SIZE, GFP_KERNEL,
7280 				   cpu_to_node(cpu));
7281 		if (!buf) {
7282 			trace_user_fault_buffer_free(tinfo);
7283 			return -ENOMEM;
7284 		}
7285 		per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
7286 	}
7287 
7288 	trace_user_buffer = tinfo;
7289 
7290 	return 0;
7291 }
7292 
7293 static void trace_user_fault_buffer_disable(void)
7294 {
7295 	struct trace_user_buf_info *tinfo;
7296 
7297 	guard(mutex)(&trace_user_buffer_mutex);
7298 
7299 	tinfo = trace_user_buffer;
7300 
7301 	if (WARN_ON_ONCE(!tinfo))
7302 		return;
7303 
7304 	if (--tinfo->ref)
7305 		return;
7306 
7307 	trace_user_fault_buffer_free(tinfo);
7308 	trace_user_buffer = NULL;
7309 }
7310 
7311 /* Must be called with preemption disabled */
7312 static char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
7313 				   const char __user *ptr, size_t size,
7314 				   size_t *read_size)
7315 {
7316 	int cpu = smp_processor_id();
7317 	char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7318 	unsigned int cnt;
7319 	int trys = 0;
7320 	int ret;
7321 
7322 	if (size > TRACE_MARKER_MAX_SIZE)
7323 		size = TRACE_MARKER_MAX_SIZE;
7324 	*read_size = 0;
7325 
7326 	/*
7327 	 * This acts similar to a seqcount. The per CPU context switches are
7328 	 * recorded, migration is disabled and preemption is enabled. The
7329 	 * read of the user space memory is copied into the per CPU buffer.
7330 	 * Preemption is disabled again, and if the per CPU context switches count
7331 	 * is still the same, it means the buffer has not been corrupted.
7332 	 * If the count is different, it is assumed the buffer is corrupted
7333 	 * and reading must be tried again.
7334 	 */
7335 
7336 	do {
7337 		/*
7338 		 * If for some reason, copy_from_user() always causes a context
7339 		 * switch, this would then cause an infinite loop.
7340 		 * If this task is preempted by another user space task, it
7341 		 * will cause this task to try again. But just in case something
7342 		 * changes where the copying from user space causes another task
7343 		 * to run, prevent this from going into an infinite loop.
7344 		 * 100 tries should be plenty.
7345 		 */
7346 		if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
7347 			return NULL;
7348 
7349 		/* Read the current CPU context switch counter */
7350 		cnt = nr_context_switches_cpu(cpu);
7351 
7352 		/*
7353 		 * Preemption is going to be enabled, but this task must
7354 		 * remain on this CPU.
7355 		 */
7356 		migrate_disable();
7357 
7358 		/*
7359 		 * Now preemption is being enabed and another task can come in
7360 		 * and use the same buffer and corrupt our data.
7361 		 */
7362 		preempt_enable_notrace();
7363 
7364 		ret = __copy_from_user(buffer, ptr, size);
7365 
7366 		preempt_disable_notrace();
7367 		migrate_enable();
7368 
7369 		/* if it faulted, no need to test if the buffer was corrupted */
7370 		if (ret)
7371 			return NULL;
7372 
7373 		/*
7374 		 * Preemption is disabled again, now check the per CPU context
7375 		 * switch counter. If it doesn't match, then another user space
7376 		 * process may have schedule in and corrupted our buffer. In that
7377 		 * case the copying must be retried.
7378 		 */
7379 	} while (nr_context_switches_cpu(cpu) != cnt);
7380 
7381 	*read_size = size;
7382 	return buffer;
7383 }
7384 
7385 static ssize_t
7386 tracing_mark_write(struct file *filp, const char __user *ubuf,
7387 					size_t cnt, loff_t *fpos)
7388 {
7389 	struct trace_array *tr = filp->private_data;
7390 	ssize_t written = -ENODEV;
7391 	unsigned long ip;
7392 	size_t size;
7393 	char *buf;
7394 
7395 	if (tracing_disabled)
7396 		return -EINVAL;
7397 
7398 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7399 		return -EINVAL;
7400 
7401 	if ((ssize_t)cnt < 0)
7402 		return -EINVAL;
7403 
7404 	if (cnt > TRACE_MARKER_MAX_SIZE)
7405 		cnt = TRACE_MARKER_MAX_SIZE;
7406 
7407 	/* Must have preemption disabled while having access to the buffer */
7408 	guard(preempt_notrace)();
7409 
7410 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, &size);
7411 	if (!buf)
7412 		return -EFAULT;
7413 
7414 	if (cnt > size)
7415 		cnt = size;
7416 
7417 	/* The selftests expect this function to be the IP address */
7418 	ip = _THIS_IP_;
7419 
7420 	/* The global trace_marker can go to multiple instances */
7421 	if (tr == &global_trace) {
7422 		guard(rcu)();
7423 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7424 			written = write_marker_to_buffer(tr, buf, cnt, ip);
7425 			if (written < 0)
7426 				break;
7427 		}
7428 	} else {
7429 		written = write_marker_to_buffer(tr, buf, cnt, ip);
7430 	}
7431 
7432 	return written;
7433 }
7434 
7435 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7436 					  const char *buf, size_t cnt)
7437 {
7438 	struct ring_buffer_event *event;
7439 	struct trace_buffer *buffer;
7440 	struct raw_data_entry *entry;
7441 	ssize_t written;
7442 	size_t size;
7443 
7444 	size = sizeof(*entry) + cnt;
7445 
7446 	buffer = tr->array_buffer.buffer;
7447 
7448 	if (size > ring_buffer_max_event_size(buffer))
7449 		return -EINVAL;
7450 
7451 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7452 					    tracing_gen_ctx());
7453 	if (!event)
7454 		/* Ring buffer disabled, return as if not open for write */
7455 		return -EBADF;
7456 
7457 	entry = ring_buffer_event_data(event);
7458 	memcpy(&entry->id, buf, cnt);
7459 	written = cnt;
7460 
7461 	__buffer_unlock_commit(buffer, event);
7462 
7463 	return written;
7464 }
7465 
7466 static ssize_t
7467 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7468 					size_t cnt, loff_t *fpos)
7469 {
7470 	struct trace_array *tr = filp->private_data;
7471 	ssize_t written = -ENODEV;
7472 	size_t size;
7473 	char *buf;
7474 
7475 	if (tracing_disabled)
7476 		return -EINVAL;
7477 
7478 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7479 		return -EINVAL;
7480 
7481 	/* The marker must at least have a tag id */
7482 	if (cnt < sizeof(unsigned int))
7483 		return -EINVAL;
7484 
7485 	/* Must have preemption disabled while having access to the buffer */
7486 	guard(preempt_notrace)();
7487 
7488 	buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, &size);
7489 	if (!buf)
7490 		return -EFAULT;
7491 
7492 	/* raw write is all or nothing */
7493 	if (cnt > size)
7494 		return -EINVAL;
7495 
7496 	/* The global trace_marker_raw can go to multiple instances */
7497 	if (tr == &global_trace) {
7498 		guard(rcu)();
7499 		list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7500 			written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7501 			if (written < 0)
7502 				break;
7503 		}
7504 	} else {
7505 		written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7506 	}
7507 
7508 	return written;
7509 }
7510 
7511 static int tracing_mark_open(struct inode *inode, struct file *filp)
7512 {
7513 	int ret;
7514 
7515 	ret = trace_user_fault_buffer_enable();
7516 	if (ret < 0)
7517 		return ret;
7518 
7519 	stream_open(inode, filp);
7520 	ret = tracing_open_generic_tr(inode, filp);
7521 	if (ret < 0)
7522 		trace_user_fault_buffer_disable();
7523 	return ret;
7524 }
7525 
7526 static int tracing_mark_release(struct inode *inode, struct file *file)
7527 {
7528 	trace_user_fault_buffer_disable();
7529 	return tracing_release_generic_tr(inode, file);
7530 }
7531 
7532 static int tracing_clock_show(struct seq_file *m, void *v)
7533 {
7534 	struct trace_array *tr = m->private;
7535 	int i;
7536 
7537 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7538 		seq_printf(m,
7539 			"%s%s%s%s", i ? " " : "",
7540 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7541 			i == tr->clock_id ? "]" : "");
7542 	seq_putc(m, '\n');
7543 
7544 	return 0;
7545 }
7546 
7547 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7548 {
7549 	int i;
7550 
7551 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7552 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7553 			break;
7554 	}
7555 	if (i == ARRAY_SIZE(trace_clocks))
7556 		return -EINVAL;
7557 
7558 	guard(mutex)(&trace_types_lock);
7559 
7560 	tr->clock_id = i;
7561 
7562 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7563 
7564 	/*
7565 	 * New clock may not be consistent with the previous clock.
7566 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7567 	 */
7568 	tracing_reset_online_cpus(&tr->array_buffer);
7569 
7570 #ifdef CONFIG_TRACER_MAX_TRACE
7571 	if (tr->max_buffer.buffer)
7572 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7573 	tracing_reset_online_cpus(&tr->max_buffer);
7574 #endif
7575 
7576 	if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7577 		struct trace_scratch *tscratch = tr->scratch;
7578 
7579 		tscratch->clock_id = i;
7580 	}
7581 
7582 	return 0;
7583 }
7584 
7585 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7586 				   size_t cnt, loff_t *fpos)
7587 {
7588 	struct seq_file *m = filp->private_data;
7589 	struct trace_array *tr = m->private;
7590 	char buf[64];
7591 	const char *clockstr;
7592 	int ret;
7593 
7594 	if (cnt >= sizeof(buf))
7595 		return -EINVAL;
7596 
7597 	if (copy_from_user(buf, ubuf, cnt))
7598 		return -EFAULT;
7599 
7600 	buf[cnt] = 0;
7601 
7602 	clockstr = strstrip(buf);
7603 
7604 	ret = tracing_set_clock(tr, clockstr);
7605 	if (ret)
7606 		return ret;
7607 
7608 	*fpos += cnt;
7609 
7610 	return cnt;
7611 }
7612 
7613 static int tracing_clock_open(struct inode *inode, struct file *file)
7614 {
7615 	struct trace_array *tr = inode->i_private;
7616 	int ret;
7617 
7618 	ret = tracing_check_open_get_tr(tr);
7619 	if (ret)
7620 		return ret;
7621 
7622 	ret = single_open(file, tracing_clock_show, inode->i_private);
7623 	if (ret < 0)
7624 		trace_array_put(tr);
7625 
7626 	return ret;
7627 }
7628 
7629 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7630 {
7631 	struct trace_array *tr = m->private;
7632 
7633 	guard(mutex)(&trace_types_lock);
7634 
7635 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7636 		seq_puts(m, "delta [absolute]\n");
7637 	else
7638 		seq_puts(m, "[delta] absolute\n");
7639 
7640 	return 0;
7641 }
7642 
7643 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7644 {
7645 	struct trace_array *tr = inode->i_private;
7646 	int ret;
7647 
7648 	ret = tracing_check_open_get_tr(tr);
7649 	if (ret)
7650 		return ret;
7651 
7652 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7653 	if (ret < 0)
7654 		trace_array_put(tr);
7655 
7656 	return ret;
7657 }
7658 
7659 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7660 {
7661 	if (rbe == this_cpu_read(trace_buffered_event))
7662 		return ring_buffer_time_stamp(buffer);
7663 
7664 	return ring_buffer_event_time_stamp(buffer, rbe);
7665 }
7666 
7667 /*
7668  * Set or disable using the per CPU trace_buffer_event when possible.
7669  */
7670 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7671 {
7672 	guard(mutex)(&trace_types_lock);
7673 
7674 	if (set && tr->no_filter_buffering_ref++)
7675 		return 0;
7676 
7677 	if (!set) {
7678 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7679 			return -EINVAL;
7680 
7681 		--tr->no_filter_buffering_ref;
7682 	}
7683 
7684 	return 0;
7685 }
7686 
7687 struct ftrace_buffer_info {
7688 	struct trace_iterator	iter;
7689 	void			*spare;
7690 	unsigned int		spare_cpu;
7691 	unsigned int		spare_size;
7692 	unsigned int		read;
7693 };
7694 
7695 #ifdef CONFIG_TRACER_SNAPSHOT
7696 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7697 {
7698 	struct trace_array *tr = inode->i_private;
7699 	struct trace_iterator *iter;
7700 	struct seq_file *m;
7701 	int ret;
7702 
7703 	ret = tracing_check_open_get_tr(tr);
7704 	if (ret)
7705 		return ret;
7706 
7707 	if (file->f_mode & FMODE_READ) {
7708 		iter = __tracing_open(inode, file, true);
7709 		if (IS_ERR(iter))
7710 			ret = PTR_ERR(iter);
7711 	} else {
7712 		/* Writes still need the seq_file to hold the private data */
7713 		ret = -ENOMEM;
7714 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7715 		if (!m)
7716 			goto out;
7717 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7718 		if (!iter) {
7719 			kfree(m);
7720 			goto out;
7721 		}
7722 		ret = 0;
7723 
7724 		iter->tr = tr;
7725 		iter->array_buffer = &tr->max_buffer;
7726 		iter->cpu_file = tracing_get_cpu(inode);
7727 		m->private = iter;
7728 		file->private_data = m;
7729 	}
7730 out:
7731 	if (ret < 0)
7732 		trace_array_put(tr);
7733 
7734 	return ret;
7735 }
7736 
7737 static void tracing_swap_cpu_buffer(void *tr)
7738 {
7739 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7740 }
7741 
7742 static ssize_t
7743 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7744 		       loff_t *ppos)
7745 {
7746 	struct seq_file *m = filp->private_data;
7747 	struct trace_iterator *iter = m->private;
7748 	struct trace_array *tr = iter->tr;
7749 	unsigned long val;
7750 	int ret;
7751 
7752 	ret = tracing_update_buffers(tr);
7753 	if (ret < 0)
7754 		return ret;
7755 
7756 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7757 	if (ret)
7758 		return ret;
7759 
7760 	guard(mutex)(&trace_types_lock);
7761 
7762 	if (tr->current_trace->use_max_tr)
7763 		return -EBUSY;
7764 
7765 	local_irq_disable();
7766 	arch_spin_lock(&tr->max_lock);
7767 	if (tr->cond_snapshot)
7768 		ret = -EBUSY;
7769 	arch_spin_unlock(&tr->max_lock);
7770 	local_irq_enable();
7771 	if (ret)
7772 		return ret;
7773 
7774 	switch (val) {
7775 	case 0:
7776 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7777 			return -EINVAL;
7778 		if (tr->allocated_snapshot)
7779 			free_snapshot(tr);
7780 		break;
7781 	case 1:
7782 /* Only allow per-cpu swap if the ring buffer supports it */
7783 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7784 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7785 			return -EINVAL;
7786 #endif
7787 		if (tr->allocated_snapshot)
7788 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7789 					&tr->array_buffer, iter->cpu_file);
7790 
7791 		ret = tracing_arm_snapshot_locked(tr);
7792 		if (ret)
7793 			return ret;
7794 
7795 		/* Now, we're going to swap */
7796 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7797 			local_irq_disable();
7798 			update_max_tr(tr, current, smp_processor_id(), NULL);
7799 			local_irq_enable();
7800 		} else {
7801 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7802 						 (void *)tr, 1);
7803 		}
7804 		tracing_disarm_snapshot(tr);
7805 		break;
7806 	default:
7807 		if (tr->allocated_snapshot) {
7808 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7809 				tracing_reset_online_cpus(&tr->max_buffer);
7810 			else
7811 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7812 		}
7813 		break;
7814 	}
7815 
7816 	if (ret >= 0) {
7817 		*ppos += cnt;
7818 		ret = cnt;
7819 	}
7820 
7821 	return ret;
7822 }
7823 
7824 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7825 {
7826 	struct seq_file *m = file->private_data;
7827 	int ret;
7828 
7829 	ret = tracing_release(inode, file);
7830 
7831 	if (file->f_mode & FMODE_READ)
7832 		return ret;
7833 
7834 	/* If write only, the seq_file is just a stub */
7835 	if (m)
7836 		kfree(m->private);
7837 	kfree(m);
7838 
7839 	return 0;
7840 }
7841 
7842 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7843 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7844 				    size_t count, loff_t *ppos);
7845 static int tracing_buffers_release(struct inode *inode, struct file *file);
7846 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7847 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7848 
7849 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7850 {
7851 	struct ftrace_buffer_info *info;
7852 	int ret;
7853 
7854 	/* The following checks for tracefs lockdown */
7855 	ret = tracing_buffers_open(inode, filp);
7856 	if (ret < 0)
7857 		return ret;
7858 
7859 	info = filp->private_data;
7860 
7861 	if (info->iter.trace->use_max_tr) {
7862 		tracing_buffers_release(inode, filp);
7863 		return -EBUSY;
7864 	}
7865 
7866 	info->iter.snapshot = true;
7867 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7868 
7869 	return ret;
7870 }
7871 
7872 #endif /* CONFIG_TRACER_SNAPSHOT */
7873 
7874 
7875 static const struct file_operations tracing_thresh_fops = {
7876 	.open		= tracing_open_generic,
7877 	.read		= tracing_thresh_read,
7878 	.write		= tracing_thresh_write,
7879 	.llseek		= generic_file_llseek,
7880 };
7881 
7882 #ifdef CONFIG_TRACER_MAX_TRACE
7883 static const struct file_operations tracing_max_lat_fops = {
7884 	.open		= tracing_open_generic_tr,
7885 	.read		= tracing_max_lat_read,
7886 	.write		= tracing_max_lat_write,
7887 	.llseek		= generic_file_llseek,
7888 	.release	= tracing_release_generic_tr,
7889 };
7890 #endif
7891 
7892 static const struct file_operations set_tracer_fops = {
7893 	.open		= tracing_open_generic_tr,
7894 	.read		= tracing_set_trace_read,
7895 	.write		= tracing_set_trace_write,
7896 	.llseek		= generic_file_llseek,
7897 	.release	= tracing_release_generic_tr,
7898 };
7899 
7900 static const struct file_operations tracing_pipe_fops = {
7901 	.open		= tracing_open_pipe,
7902 	.poll		= tracing_poll_pipe,
7903 	.read		= tracing_read_pipe,
7904 	.splice_read	= tracing_splice_read_pipe,
7905 	.release	= tracing_release_pipe,
7906 };
7907 
7908 static const struct file_operations tracing_entries_fops = {
7909 	.open		= tracing_open_generic_tr,
7910 	.read		= tracing_entries_read,
7911 	.write		= tracing_entries_write,
7912 	.llseek		= generic_file_llseek,
7913 	.release	= tracing_release_generic_tr,
7914 };
7915 
7916 static const struct file_operations tracing_buffer_meta_fops = {
7917 	.open		= tracing_buffer_meta_open,
7918 	.read		= seq_read,
7919 	.llseek		= seq_lseek,
7920 	.release	= tracing_seq_release,
7921 };
7922 
7923 static const struct file_operations tracing_total_entries_fops = {
7924 	.open		= tracing_open_generic_tr,
7925 	.read		= tracing_total_entries_read,
7926 	.llseek		= generic_file_llseek,
7927 	.release	= tracing_release_generic_tr,
7928 };
7929 
7930 static const struct file_operations tracing_free_buffer_fops = {
7931 	.open		= tracing_open_generic_tr,
7932 	.write		= tracing_free_buffer_write,
7933 	.release	= tracing_free_buffer_release,
7934 };
7935 
7936 static const struct file_operations tracing_mark_fops = {
7937 	.open		= tracing_mark_open,
7938 	.write		= tracing_mark_write,
7939 	.release	= tracing_mark_release,
7940 };
7941 
7942 static const struct file_operations tracing_mark_raw_fops = {
7943 	.open		= tracing_mark_open,
7944 	.write		= tracing_mark_raw_write,
7945 	.release	= tracing_mark_release,
7946 };
7947 
7948 static const struct file_operations trace_clock_fops = {
7949 	.open		= tracing_clock_open,
7950 	.read		= seq_read,
7951 	.llseek		= seq_lseek,
7952 	.release	= tracing_single_release_tr,
7953 	.write		= tracing_clock_write,
7954 };
7955 
7956 static const struct file_operations trace_time_stamp_mode_fops = {
7957 	.open		= tracing_time_stamp_mode_open,
7958 	.read		= seq_read,
7959 	.llseek		= seq_lseek,
7960 	.release	= tracing_single_release_tr,
7961 };
7962 
7963 static const struct file_operations last_boot_fops = {
7964 	.open		= tracing_last_boot_open,
7965 	.read		= seq_read,
7966 	.llseek		= seq_lseek,
7967 	.release	= tracing_seq_release,
7968 };
7969 
7970 #ifdef CONFIG_TRACER_SNAPSHOT
7971 static const struct file_operations snapshot_fops = {
7972 	.open		= tracing_snapshot_open,
7973 	.read		= seq_read,
7974 	.write		= tracing_snapshot_write,
7975 	.llseek		= tracing_lseek,
7976 	.release	= tracing_snapshot_release,
7977 };
7978 
7979 static const struct file_operations snapshot_raw_fops = {
7980 	.open		= snapshot_raw_open,
7981 	.read		= tracing_buffers_read,
7982 	.release	= tracing_buffers_release,
7983 	.splice_read	= tracing_buffers_splice_read,
7984 };
7985 
7986 #endif /* CONFIG_TRACER_SNAPSHOT */
7987 
7988 /*
7989  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7990  * @filp: The active open file structure
7991  * @ubuf: The userspace provided buffer to read value into
7992  * @cnt: The maximum number of bytes to read
7993  * @ppos: The current "file" position
7994  *
7995  * This function implements the write interface for a struct trace_min_max_param.
7996  * The filp->private_data must point to a trace_min_max_param structure that
7997  * defines where to write the value, the min and the max acceptable values,
7998  * and a lock to protect the write.
7999  */
8000 static ssize_t
8001 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
8002 {
8003 	struct trace_min_max_param *param = filp->private_data;
8004 	u64 val;
8005 	int err;
8006 
8007 	if (!param)
8008 		return -EFAULT;
8009 
8010 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
8011 	if (err)
8012 		return err;
8013 
8014 	if (param->lock)
8015 		mutex_lock(param->lock);
8016 
8017 	if (param->min && val < *param->min)
8018 		err = -EINVAL;
8019 
8020 	if (param->max && val > *param->max)
8021 		err = -EINVAL;
8022 
8023 	if (!err)
8024 		*param->val = val;
8025 
8026 	if (param->lock)
8027 		mutex_unlock(param->lock);
8028 
8029 	if (err)
8030 		return err;
8031 
8032 	return cnt;
8033 }
8034 
8035 /*
8036  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
8037  * @filp: The active open file structure
8038  * @ubuf: The userspace provided buffer to read value into
8039  * @cnt: The maximum number of bytes to read
8040  * @ppos: The current "file" position
8041  *
8042  * This function implements the read interface for a struct trace_min_max_param.
8043  * The filp->private_data must point to a trace_min_max_param struct with valid
8044  * data.
8045  */
8046 static ssize_t
8047 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
8048 {
8049 	struct trace_min_max_param *param = filp->private_data;
8050 	char buf[U64_STR_SIZE];
8051 	int len;
8052 	u64 val;
8053 
8054 	if (!param)
8055 		return -EFAULT;
8056 
8057 	val = *param->val;
8058 
8059 	if (cnt > sizeof(buf))
8060 		cnt = sizeof(buf);
8061 
8062 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
8063 
8064 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
8065 }
8066 
8067 const struct file_operations trace_min_max_fops = {
8068 	.open		= tracing_open_generic,
8069 	.read		= trace_min_max_read,
8070 	.write		= trace_min_max_write,
8071 };
8072 
8073 #define TRACING_LOG_ERRS_MAX	8
8074 #define TRACING_LOG_LOC_MAX	128
8075 
8076 #define CMD_PREFIX "  Command: "
8077 
8078 struct err_info {
8079 	const char	**errs;	/* ptr to loc-specific array of err strings */
8080 	u8		type;	/* index into errs -> specific err string */
8081 	u16		pos;	/* caret position */
8082 	u64		ts;
8083 };
8084 
8085 struct tracing_log_err {
8086 	struct list_head	list;
8087 	struct err_info		info;
8088 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
8089 	char			*cmd;                     /* what caused err */
8090 };
8091 
8092 static DEFINE_MUTEX(tracing_err_log_lock);
8093 
8094 static struct tracing_log_err *alloc_tracing_log_err(int len)
8095 {
8096 	struct tracing_log_err *err;
8097 
8098 	err = kzalloc(sizeof(*err), GFP_KERNEL);
8099 	if (!err)
8100 		return ERR_PTR(-ENOMEM);
8101 
8102 	err->cmd = kzalloc(len, GFP_KERNEL);
8103 	if (!err->cmd) {
8104 		kfree(err);
8105 		return ERR_PTR(-ENOMEM);
8106 	}
8107 
8108 	return err;
8109 }
8110 
8111 static void free_tracing_log_err(struct tracing_log_err *err)
8112 {
8113 	kfree(err->cmd);
8114 	kfree(err);
8115 }
8116 
8117 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8118 						   int len)
8119 {
8120 	struct tracing_log_err *err;
8121 	char *cmd;
8122 
8123 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8124 		err = alloc_tracing_log_err(len);
8125 		if (PTR_ERR(err) != -ENOMEM)
8126 			tr->n_err_log_entries++;
8127 
8128 		return err;
8129 	}
8130 	cmd = kzalloc(len, GFP_KERNEL);
8131 	if (!cmd)
8132 		return ERR_PTR(-ENOMEM);
8133 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8134 	kfree(err->cmd);
8135 	err->cmd = cmd;
8136 	list_del(&err->list);
8137 
8138 	return err;
8139 }
8140 
8141 /**
8142  * err_pos - find the position of a string within a command for error careting
8143  * @cmd: The tracing command that caused the error
8144  * @str: The string to position the caret at within @cmd
8145  *
8146  * Finds the position of the first occurrence of @str within @cmd.  The
8147  * return value can be passed to tracing_log_err() for caret placement
8148  * within @cmd.
8149  *
8150  * Returns the index within @cmd of the first occurrence of @str or 0
8151  * if @str was not found.
8152  */
8153 unsigned int err_pos(char *cmd, const char *str)
8154 {
8155 	char *found;
8156 
8157 	if (WARN_ON(!strlen(cmd)))
8158 		return 0;
8159 
8160 	found = strstr(cmd, str);
8161 	if (found)
8162 		return found - cmd;
8163 
8164 	return 0;
8165 }
8166 
8167 /**
8168  * tracing_log_err - write an error to the tracing error log
8169  * @tr: The associated trace array for the error (NULL for top level array)
8170  * @loc: A string describing where the error occurred
8171  * @cmd: The tracing command that caused the error
8172  * @errs: The array of loc-specific static error strings
8173  * @type: The index into errs[], which produces the specific static err string
8174  * @pos: The position the caret should be placed in the cmd
8175  *
8176  * Writes an error into tracing/error_log of the form:
8177  *
8178  * <loc>: error: <text>
8179  *   Command: <cmd>
8180  *              ^
8181  *
8182  * tracing/error_log is a small log file containing the last
8183  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8184  * unless there has been a tracing error, and the error log can be
8185  * cleared and have its memory freed by writing the empty string in
8186  * truncation mode to it i.e. echo > tracing/error_log.
8187  *
8188  * NOTE: the @errs array along with the @type param are used to
8189  * produce a static error string - this string is not copied and saved
8190  * when the error is logged - only a pointer to it is saved.  See
8191  * existing callers for examples of how static strings are typically
8192  * defined for use with tracing_log_err().
8193  */
8194 void tracing_log_err(struct trace_array *tr,
8195 		     const char *loc, const char *cmd,
8196 		     const char **errs, u8 type, u16 pos)
8197 {
8198 	struct tracing_log_err *err;
8199 	int len = 0;
8200 
8201 	if (!tr)
8202 		tr = &global_trace;
8203 
8204 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8205 
8206 	guard(mutex)(&tracing_err_log_lock);
8207 
8208 	err = get_tracing_log_err(tr, len);
8209 	if (PTR_ERR(err) == -ENOMEM)
8210 		return;
8211 
8212 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8213 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8214 
8215 	err->info.errs = errs;
8216 	err->info.type = type;
8217 	err->info.pos = pos;
8218 	err->info.ts = local_clock();
8219 
8220 	list_add_tail(&err->list, &tr->err_log);
8221 }
8222 
8223 static void clear_tracing_err_log(struct trace_array *tr)
8224 {
8225 	struct tracing_log_err *err, *next;
8226 
8227 	guard(mutex)(&tracing_err_log_lock);
8228 
8229 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8230 		list_del(&err->list);
8231 		free_tracing_log_err(err);
8232 	}
8233 
8234 	tr->n_err_log_entries = 0;
8235 }
8236 
8237 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8238 {
8239 	struct trace_array *tr = m->private;
8240 
8241 	mutex_lock(&tracing_err_log_lock);
8242 
8243 	return seq_list_start(&tr->err_log, *pos);
8244 }
8245 
8246 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8247 {
8248 	struct trace_array *tr = m->private;
8249 
8250 	return seq_list_next(v, &tr->err_log, pos);
8251 }
8252 
8253 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8254 {
8255 	mutex_unlock(&tracing_err_log_lock);
8256 }
8257 
8258 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8259 {
8260 	u16 i;
8261 
8262 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8263 		seq_putc(m, ' ');
8264 	for (i = 0; i < pos; i++)
8265 		seq_putc(m, ' ');
8266 	seq_puts(m, "^\n");
8267 }
8268 
8269 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8270 {
8271 	struct tracing_log_err *err = v;
8272 
8273 	if (err) {
8274 		const char *err_text = err->info.errs[err->info.type];
8275 		u64 sec = err->info.ts;
8276 		u32 nsec;
8277 
8278 		nsec = do_div(sec, NSEC_PER_SEC);
8279 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8280 			   err->loc, err_text);
8281 		seq_printf(m, "%s", err->cmd);
8282 		tracing_err_log_show_pos(m, err->info.pos);
8283 	}
8284 
8285 	return 0;
8286 }
8287 
8288 static const struct seq_operations tracing_err_log_seq_ops = {
8289 	.start  = tracing_err_log_seq_start,
8290 	.next   = tracing_err_log_seq_next,
8291 	.stop   = tracing_err_log_seq_stop,
8292 	.show   = tracing_err_log_seq_show
8293 };
8294 
8295 static int tracing_err_log_open(struct inode *inode, struct file *file)
8296 {
8297 	struct trace_array *tr = inode->i_private;
8298 	int ret = 0;
8299 
8300 	ret = tracing_check_open_get_tr(tr);
8301 	if (ret)
8302 		return ret;
8303 
8304 	/* If this file was opened for write, then erase contents */
8305 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8306 		clear_tracing_err_log(tr);
8307 
8308 	if (file->f_mode & FMODE_READ) {
8309 		ret = seq_open(file, &tracing_err_log_seq_ops);
8310 		if (!ret) {
8311 			struct seq_file *m = file->private_data;
8312 			m->private = tr;
8313 		} else {
8314 			trace_array_put(tr);
8315 		}
8316 	}
8317 	return ret;
8318 }
8319 
8320 static ssize_t tracing_err_log_write(struct file *file,
8321 				     const char __user *buffer,
8322 				     size_t count, loff_t *ppos)
8323 {
8324 	return count;
8325 }
8326 
8327 static int tracing_err_log_release(struct inode *inode, struct file *file)
8328 {
8329 	struct trace_array *tr = inode->i_private;
8330 
8331 	trace_array_put(tr);
8332 
8333 	if (file->f_mode & FMODE_READ)
8334 		seq_release(inode, file);
8335 
8336 	return 0;
8337 }
8338 
8339 static const struct file_operations tracing_err_log_fops = {
8340 	.open           = tracing_err_log_open,
8341 	.write		= tracing_err_log_write,
8342 	.read           = seq_read,
8343 	.llseek         = tracing_lseek,
8344 	.release        = tracing_err_log_release,
8345 };
8346 
8347 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8348 {
8349 	struct trace_array *tr = inode->i_private;
8350 	struct ftrace_buffer_info *info;
8351 	int ret;
8352 
8353 	ret = tracing_check_open_get_tr(tr);
8354 	if (ret)
8355 		return ret;
8356 
8357 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8358 	if (!info) {
8359 		trace_array_put(tr);
8360 		return -ENOMEM;
8361 	}
8362 
8363 	mutex_lock(&trace_types_lock);
8364 
8365 	info->iter.tr		= tr;
8366 	info->iter.cpu_file	= tracing_get_cpu(inode);
8367 	info->iter.trace	= tr->current_trace;
8368 	info->iter.array_buffer = &tr->array_buffer;
8369 	info->spare		= NULL;
8370 	/* Force reading ring buffer for first read */
8371 	info->read		= (unsigned int)-1;
8372 
8373 	filp->private_data = info;
8374 
8375 	tr->trace_ref++;
8376 
8377 	mutex_unlock(&trace_types_lock);
8378 
8379 	ret = nonseekable_open(inode, filp);
8380 	if (ret < 0)
8381 		trace_array_put(tr);
8382 
8383 	return ret;
8384 }
8385 
8386 static __poll_t
8387 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8388 {
8389 	struct ftrace_buffer_info *info = filp->private_data;
8390 	struct trace_iterator *iter = &info->iter;
8391 
8392 	return trace_poll(iter, filp, poll_table);
8393 }
8394 
8395 static ssize_t
8396 tracing_buffers_read(struct file *filp, char __user *ubuf,
8397 		     size_t count, loff_t *ppos)
8398 {
8399 	struct ftrace_buffer_info *info = filp->private_data;
8400 	struct trace_iterator *iter = &info->iter;
8401 	void *trace_data;
8402 	int page_size;
8403 	ssize_t ret = 0;
8404 	ssize_t size;
8405 
8406 	if (!count)
8407 		return 0;
8408 
8409 #ifdef CONFIG_TRACER_MAX_TRACE
8410 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8411 		return -EBUSY;
8412 #endif
8413 
8414 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8415 
8416 	/* Make sure the spare matches the current sub buffer size */
8417 	if (info->spare) {
8418 		if (page_size != info->spare_size) {
8419 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8420 						   info->spare_cpu, info->spare);
8421 			info->spare = NULL;
8422 		}
8423 	}
8424 
8425 	if (!info->spare) {
8426 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8427 							  iter->cpu_file);
8428 		if (IS_ERR(info->spare)) {
8429 			ret = PTR_ERR(info->spare);
8430 			info->spare = NULL;
8431 		} else {
8432 			info->spare_cpu = iter->cpu_file;
8433 			info->spare_size = page_size;
8434 		}
8435 	}
8436 	if (!info->spare)
8437 		return ret;
8438 
8439 	/* Do we have previous read data to read? */
8440 	if (info->read < page_size)
8441 		goto read;
8442 
8443  again:
8444 	trace_access_lock(iter->cpu_file);
8445 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8446 				    info->spare,
8447 				    count,
8448 				    iter->cpu_file, 0);
8449 	trace_access_unlock(iter->cpu_file);
8450 
8451 	if (ret < 0) {
8452 		if (trace_empty(iter) && !iter->closed) {
8453 			if (update_last_data_if_empty(iter->tr))
8454 				return 0;
8455 
8456 			if ((filp->f_flags & O_NONBLOCK))
8457 				return -EAGAIN;
8458 
8459 			ret = wait_on_pipe(iter, 0);
8460 			if (ret)
8461 				return ret;
8462 
8463 			goto again;
8464 		}
8465 		return 0;
8466 	}
8467 
8468 	info->read = 0;
8469  read:
8470 	size = page_size - info->read;
8471 	if (size > count)
8472 		size = count;
8473 	trace_data = ring_buffer_read_page_data(info->spare);
8474 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8475 	if (ret == size)
8476 		return -EFAULT;
8477 
8478 	size -= ret;
8479 
8480 	*ppos += size;
8481 	info->read += size;
8482 
8483 	return size;
8484 }
8485 
8486 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8487 {
8488 	struct ftrace_buffer_info *info = file->private_data;
8489 	struct trace_iterator *iter = &info->iter;
8490 
8491 	iter->closed = true;
8492 	/* Make sure the waiters see the new wait_index */
8493 	(void)atomic_fetch_inc_release(&iter->wait_index);
8494 
8495 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8496 
8497 	return 0;
8498 }
8499 
8500 static int tracing_buffers_release(struct inode *inode, struct file *file)
8501 {
8502 	struct ftrace_buffer_info *info = file->private_data;
8503 	struct trace_iterator *iter = &info->iter;
8504 
8505 	guard(mutex)(&trace_types_lock);
8506 
8507 	iter->tr->trace_ref--;
8508 
8509 	__trace_array_put(iter->tr);
8510 
8511 	if (info->spare)
8512 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8513 					   info->spare_cpu, info->spare);
8514 	kvfree(info);
8515 
8516 	return 0;
8517 }
8518 
8519 struct buffer_ref {
8520 	struct trace_buffer	*buffer;
8521 	void			*page;
8522 	int			cpu;
8523 	refcount_t		refcount;
8524 };
8525 
8526 static void buffer_ref_release(struct buffer_ref *ref)
8527 {
8528 	if (!refcount_dec_and_test(&ref->refcount))
8529 		return;
8530 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8531 	kfree(ref);
8532 }
8533 
8534 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8535 				    struct pipe_buffer *buf)
8536 {
8537 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8538 
8539 	buffer_ref_release(ref);
8540 	buf->private = 0;
8541 }
8542 
8543 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8544 				struct pipe_buffer *buf)
8545 {
8546 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8547 
8548 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8549 		return false;
8550 
8551 	refcount_inc(&ref->refcount);
8552 	return true;
8553 }
8554 
8555 /* Pipe buffer operations for a buffer. */
8556 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8557 	.release		= buffer_pipe_buf_release,
8558 	.get			= buffer_pipe_buf_get,
8559 };
8560 
8561 /*
8562  * Callback from splice_to_pipe(), if we need to release some pages
8563  * at the end of the spd in case we error'ed out in filling the pipe.
8564  */
8565 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8566 {
8567 	struct buffer_ref *ref =
8568 		(struct buffer_ref *)spd->partial[i].private;
8569 
8570 	buffer_ref_release(ref);
8571 	spd->partial[i].private = 0;
8572 }
8573 
8574 static ssize_t
8575 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8576 			    struct pipe_inode_info *pipe, size_t len,
8577 			    unsigned int flags)
8578 {
8579 	struct ftrace_buffer_info *info = file->private_data;
8580 	struct trace_iterator *iter = &info->iter;
8581 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8582 	struct page *pages_def[PIPE_DEF_BUFFERS];
8583 	struct splice_pipe_desc spd = {
8584 		.pages		= pages_def,
8585 		.partial	= partial_def,
8586 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8587 		.ops		= &buffer_pipe_buf_ops,
8588 		.spd_release	= buffer_spd_release,
8589 	};
8590 	struct buffer_ref *ref;
8591 	bool woken = false;
8592 	int page_size;
8593 	int entries, i;
8594 	ssize_t ret = 0;
8595 
8596 #ifdef CONFIG_TRACER_MAX_TRACE
8597 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8598 		return -EBUSY;
8599 #endif
8600 
8601 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8602 	if (*ppos & (page_size - 1))
8603 		return -EINVAL;
8604 
8605 	if (len & (page_size - 1)) {
8606 		if (len < page_size)
8607 			return -EINVAL;
8608 		len &= (~(page_size - 1));
8609 	}
8610 
8611 	if (splice_grow_spd(pipe, &spd))
8612 		return -ENOMEM;
8613 
8614  again:
8615 	trace_access_lock(iter->cpu_file);
8616 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8617 
8618 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8619 		struct page *page;
8620 		int r;
8621 
8622 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8623 		if (!ref) {
8624 			ret = -ENOMEM;
8625 			break;
8626 		}
8627 
8628 		refcount_set(&ref->refcount, 1);
8629 		ref->buffer = iter->array_buffer->buffer;
8630 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8631 		if (IS_ERR(ref->page)) {
8632 			ret = PTR_ERR(ref->page);
8633 			ref->page = NULL;
8634 			kfree(ref);
8635 			break;
8636 		}
8637 		ref->cpu = iter->cpu_file;
8638 
8639 		r = ring_buffer_read_page(ref->buffer, ref->page,
8640 					  len, iter->cpu_file, 1);
8641 		if (r < 0) {
8642 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8643 						   ref->page);
8644 			kfree(ref);
8645 			break;
8646 		}
8647 
8648 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8649 
8650 		spd.pages[i] = page;
8651 		spd.partial[i].len = page_size;
8652 		spd.partial[i].offset = 0;
8653 		spd.partial[i].private = (unsigned long)ref;
8654 		spd.nr_pages++;
8655 		*ppos += page_size;
8656 
8657 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8658 	}
8659 
8660 	trace_access_unlock(iter->cpu_file);
8661 	spd.nr_pages = i;
8662 
8663 	/* did we read anything? */
8664 	if (!spd.nr_pages) {
8665 
8666 		if (ret)
8667 			goto out;
8668 
8669 		if (woken)
8670 			goto out;
8671 
8672 		ret = -EAGAIN;
8673 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8674 			goto out;
8675 
8676 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8677 		if (ret)
8678 			goto out;
8679 
8680 		/* No need to wait after waking up when tracing is off */
8681 		if (!tracer_tracing_is_on(iter->tr))
8682 			goto out;
8683 
8684 		/* Iterate one more time to collect any new data then exit */
8685 		woken = true;
8686 
8687 		goto again;
8688 	}
8689 
8690 	ret = splice_to_pipe(pipe, &spd);
8691 out:
8692 	splice_shrink_spd(&spd);
8693 
8694 	return ret;
8695 }
8696 
8697 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8698 {
8699 	struct ftrace_buffer_info *info = file->private_data;
8700 	struct trace_iterator *iter = &info->iter;
8701 	int err;
8702 
8703 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8704 		if (!(file->f_flags & O_NONBLOCK)) {
8705 			err = ring_buffer_wait(iter->array_buffer->buffer,
8706 					       iter->cpu_file,
8707 					       iter->tr->buffer_percent,
8708 					       NULL, NULL);
8709 			if (err)
8710 				return err;
8711 		}
8712 
8713 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8714 						  iter->cpu_file);
8715 	} else if (cmd) {
8716 		return -ENOTTY;
8717 	}
8718 
8719 	/*
8720 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8721 	 * waiters
8722 	 */
8723 	guard(mutex)(&trace_types_lock);
8724 
8725 	/* Make sure the waiters see the new wait_index */
8726 	(void)atomic_fetch_inc_release(&iter->wait_index);
8727 
8728 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8729 
8730 	return 0;
8731 }
8732 
8733 #ifdef CONFIG_TRACER_MAX_TRACE
8734 static int get_snapshot_map(struct trace_array *tr)
8735 {
8736 	int err = 0;
8737 
8738 	/*
8739 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8740 	 * take trace_types_lock. Instead use the specific
8741 	 * snapshot_trigger_lock.
8742 	 */
8743 	spin_lock(&tr->snapshot_trigger_lock);
8744 
8745 	if (tr->snapshot || tr->mapped == UINT_MAX)
8746 		err = -EBUSY;
8747 	else
8748 		tr->mapped++;
8749 
8750 	spin_unlock(&tr->snapshot_trigger_lock);
8751 
8752 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8753 	if (tr->mapped == 1)
8754 		synchronize_rcu();
8755 
8756 	return err;
8757 
8758 }
8759 static void put_snapshot_map(struct trace_array *tr)
8760 {
8761 	spin_lock(&tr->snapshot_trigger_lock);
8762 	if (!WARN_ON(!tr->mapped))
8763 		tr->mapped--;
8764 	spin_unlock(&tr->snapshot_trigger_lock);
8765 }
8766 #else
8767 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8768 static inline void put_snapshot_map(struct trace_array *tr) { }
8769 #endif
8770 
8771 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8772 {
8773 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8774 	struct trace_iterator *iter = &info->iter;
8775 
8776 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8777 	put_snapshot_map(iter->tr);
8778 }
8779 
8780 static const struct vm_operations_struct tracing_buffers_vmops = {
8781 	.close		= tracing_buffers_mmap_close,
8782 };
8783 
8784 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8785 {
8786 	struct ftrace_buffer_info *info = filp->private_data;
8787 	struct trace_iterator *iter = &info->iter;
8788 	int ret = 0;
8789 
8790 	/* A memmap'ed buffer is not supported for user space mmap */
8791 	if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8792 		return -ENODEV;
8793 
8794 	ret = get_snapshot_map(iter->tr);
8795 	if (ret)
8796 		return ret;
8797 
8798 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8799 	if (ret)
8800 		put_snapshot_map(iter->tr);
8801 
8802 	vma->vm_ops = &tracing_buffers_vmops;
8803 
8804 	return ret;
8805 }
8806 
8807 static const struct file_operations tracing_buffers_fops = {
8808 	.open		= tracing_buffers_open,
8809 	.read		= tracing_buffers_read,
8810 	.poll		= tracing_buffers_poll,
8811 	.release	= tracing_buffers_release,
8812 	.flush		= tracing_buffers_flush,
8813 	.splice_read	= tracing_buffers_splice_read,
8814 	.unlocked_ioctl = tracing_buffers_ioctl,
8815 	.mmap		= tracing_buffers_mmap,
8816 };
8817 
8818 static ssize_t
8819 tracing_stats_read(struct file *filp, char __user *ubuf,
8820 		   size_t count, loff_t *ppos)
8821 {
8822 	struct inode *inode = file_inode(filp);
8823 	struct trace_array *tr = inode->i_private;
8824 	struct array_buffer *trace_buf = &tr->array_buffer;
8825 	int cpu = tracing_get_cpu(inode);
8826 	struct trace_seq *s;
8827 	unsigned long cnt;
8828 	unsigned long long t;
8829 	unsigned long usec_rem;
8830 
8831 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8832 	if (!s)
8833 		return -ENOMEM;
8834 
8835 	trace_seq_init(s);
8836 
8837 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8838 	trace_seq_printf(s, "entries: %ld\n", cnt);
8839 
8840 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8841 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8842 
8843 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8844 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8845 
8846 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8847 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8848 
8849 	if (trace_clocks[tr->clock_id].in_ns) {
8850 		/* local or global for trace_clock */
8851 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8852 		usec_rem = do_div(t, USEC_PER_SEC);
8853 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8854 								t, usec_rem);
8855 
8856 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8857 		usec_rem = do_div(t, USEC_PER_SEC);
8858 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8859 	} else {
8860 		/* counter or tsc mode for trace_clock */
8861 		trace_seq_printf(s, "oldest event ts: %llu\n",
8862 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8863 
8864 		trace_seq_printf(s, "now ts: %llu\n",
8865 				ring_buffer_time_stamp(trace_buf->buffer));
8866 	}
8867 
8868 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8869 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8870 
8871 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8872 	trace_seq_printf(s, "read events: %ld\n", cnt);
8873 
8874 	count = simple_read_from_buffer(ubuf, count, ppos,
8875 					s->buffer, trace_seq_used(s));
8876 
8877 	kfree(s);
8878 
8879 	return count;
8880 }
8881 
8882 static const struct file_operations tracing_stats_fops = {
8883 	.open		= tracing_open_generic_tr,
8884 	.read		= tracing_stats_read,
8885 	.llseek		= generic_file_llseek,
8886 	.release	= tracing_release_generic_tr,
8887 };
8888 
8889 #ifdef CONFIG_DYNAMIC_FTRACE
8890 
8891 static ssize_t
8892 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8893 		  size_t cnt, loff_t *ppos)
8894 {
8895 	ssize_t ret;
8896 	char *buf;
8897 	int r;
8898 
8899 	/* 512 should be plenty to hold the amount needed */
8900 #define DYN_INFO_BUF_SIZE	512
8901 
8902 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8903 	if (!buf)
8904 		return -ENOMEM;
8905 
8906 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8907 		      "%ld pages:%ld groups: %ld\n"
8908 		      "ftrace boot update time = %llu (ns)\n"
8909 		      "ftrace module total update time = %llu (ns)\n",
8910 		      ftrace_update_tot_cnt,
8911 		      ftrace_number_of_pages,
8912 		      ftrace_number_of_groups,
8913 		      ftrace_update_time,
8914 		      ftrace_total_mod_time);
8915 
8916 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8917 	kfree(buf);
8918 	return ret;
8919 }
8920 
8921 static const struct file_operations tracing_dyn_info_fops = {
8922 	.open		= tracing_open_generic,
8923 	.read		= tracing_read_dyn_info,
8924 	.llseek		= generic_file_llseek,
8925 };
8926 #endif /* CONFIG_DYNAMIC_FTRACE */
8927 
8928 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8929 static void
8930 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8931 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8932 		void *data)
8933 {
8934 	tracing_snapshot_instance(tr);
8935 }
8936 
8937 static void
8938 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8939 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8940 		      void *data)
8941 {
8942 	struct ftrace_func_mapper *mapper = data;
8943 	long *count = NULL;
8944 
8945 	if (mapper)
8946 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8947 
8948 	if (count) {
8949 
8950 		if (*count <= 0)
8951 			return;
8952 
8953 		(*count)--;
8954 	}
8955 
8956 	tracing_snapshot_instance(tr);
8957 }
8958 
8959 static int
8960 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8961 		      struct ftrace_probe_ops *ops, void *data)
8962 {
8963 	struct ftrace_func_mapper *mapper = data;
8964 	long *count = NULL;
8965 
8966 	seq_printf(m, "%ps:", (void *)ip);
8967 
8968 	seq_puts(m, "snapshot");
8969 
8970 	if (mapper)
8971 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8972 
8973 	if (count)
8974 		seq_printf(m, ":count=%ld\n", *count);
8975 	else
8976 		seq_puts(m, ":unlimited\n");
8977 
8978 	return 0;
8979 }
8980 
8981 static int
8982 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8983 		     unsigned long ip, void *init_data, void **data)
8984 {
8985 	struct ftrace_func_mapper *mapper = *data;
8986 
8987 	if (!mapper) {
8988 		mapper = allocate_ftrace_func_mapper();
8989 		if (!mapper)
8990 			return -ENOMEM;
8991 		*data = mapper;
8992 	}
8993 
8994 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8995 }
8996 
8997 static void
8998 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8999 		     unsigned long ip, void *data)
9000 {
9001 	struct ftrace_func_mapper *mapper = data;
9002 
9003 	if (!ip) {
9004 		if (!mapper)
9005 			return;
9006 		free_ftrace_func_mapper(mapper, NULL);
9007 		return;
9008 	}
9009 
9010 	ftrace_func_mapper_remove_ip(mapper, ip);
9011 }
9012 
9013 static struct ftrace_probe_ops snapshot_probe_ops = {
9014 	.func			= ftrace_snapshot,
9015 	.print			= ftrace_snapshot_print,
9016 };
9017 
9018 static struct ftrace_probe_ops snapshot_count_probe_ops = {
9019 	.func			= ftrace_count_snapshot,
9020 	.print			= ftrace_snapshot_print,
9021 	.init			= ftrace_snapshot_init,
9022 	.free			= ftrace_snapshot_free,
9023 };
9024 
9025 static int
9026 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
9027 			       char *glob, char *cmd, char *param, int enable)
9028 {
9029 	struct ftrace_probe_ops *ops;
9030 	void *count = (void *)-1;
9031 	char *number;
9032 	int ret;
9033 
9034 	if (!tr)
9035 		return -ENODEV;
9036 
9037 	/* hash funcs only work with set_ftrace_filter */
9038 	if (!enable)
9039 		return -EINVAL;
9040 
9041 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
9042 
9043 	if (glob[0] == '!') {
9044 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
9045 		if (!ret)
9046 			tracing_disarm_snapshot(tr);
9047 
9048 		return ret;
9049 	}
9050 
9051 	if (!param)
9052 		goto out_reg;
9053 
9054 	number = strsep(&param, ":");
9055 
9056 	if (!strlen(number))
9057 		goto out_reg;
9058 
9059 	/*
9060 	 * We use the callback data field (which is a pointer)
9061 	 * as our counter.
9062 	 */
9063 	ret = kstrtoul(number, 0, (unsigned long *)&count);
9064 	if (ret)
9065 		return ret;
9066 
9067  out_reg:
9068 	ret = tracing_arm_snapshot(tr);
9069 	if (ret < 0)
9070 		return ret;
9071 
9072 	ret = register_ftrace_function_probe(glob, tr, ops, count);
9073 	if (ret < 0)
9074 		tracing_disarm_snapshot(tr);
9075 
9076 	return ret < 0 ? ret : 0;
9077 }
9078 
9079 static struct ftrace_func_command ftrace_snapshot_cmd = {
9080 	.name			= "snapshot",
9081 	.func			= ftrace_trace_snapshot_callback,
9082 };
9083 
9084 static __init int register_snapshot_cmd(void)
9085 {
9086 	return register_ftrace_command(&ftrace_snapshot_cmd);
9087 }
9088 #else
9089 static inline __init int register_snapshot_cmd(void) { return 0; }
9090 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
9091 
9092 static struct dentry *tracing_get_dentry(struct trace_array *tr)
9093 {
9094 	/* Top directory uses NULL as the parent */
9095 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
9096 		return NULL;
9097 
9098 	if (WARN_ON(!tr->dir))
9099 		return ERR_PTR(-ENODEV);
9100 
9101 	/* All sub buffers have a descriptor */
9102 	return tr->dir;
9103 }
9104 
9105 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
9106 {
9107 	struct dentry *d_tracer;
9108 
9109 	if (tr->percpu_dir)
9110 		return tr->percpu_dir;
9111 
9112 	d_tracer = tracing_get_dentry(tr);
9113 	if (IS_ERR(d_tracer))
9114 		return NULL;
9115 
9116 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
9117 
9118 	MEM_FAIL(!tr->percpu_dir,
9119 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
9120 
9121 	return tr->percpu_dir;
9122 }
9123 
9124 static struct dentry *
9125 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
9126 		      void *data, long cpu, const struct file_operations *fops)
9127 {
9128 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
9129 
9130 	if (ret) /* See tracing_get_cpu() */
9131 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
9132 	return ret;
9133 }
9134 
9135 static void
9136 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
9137 {
9138 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
9139 	struct dentry *d_cpu;
9140 	char cpu_dir[30]; /* 30 characters should be more than enough */
9141 
9142 	if (!d_percpu)
9143 		return;
9144 
9145 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
9146 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
9147 	if (!d_cpu) {
9148 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
9149 		return;
9150 	}
9151 
9152 	/* per cpu trace_pipe */
9153 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
9154 				tr, cpu, &tracing_pipe_fops);
9155 
9156 	/* per cpu trace */
9157 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
9158 				tr, cpu, &tracing_fops);
9159 
9160 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
9161 				tr, cpu, &tracing_buffers_fops);
9162 
9163 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
9164 				tr, cpu, &tracing_stats_fops);
9165 
9166 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
9167 				tr, cpu, &tracing_entries_fops);
9168 
9169 	if (tr->range_addr_start)
9170 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
9171 				      tr, cpu, &tracing_buffer_meta_fops);
9172 #ifdef CONFIG_TRACER_SNAPSHOT
9173 	if (!tr->range_addr_start) {
9174 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9175 				      tr, cpu, &snapshot_fops);
9176 
9177 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9178 				      tr, cpu, &snapshot_raw_fops);
9179 	}
9180 #endif
9181 }
9182 
9183 #ifdef CONFIG_FTRACE_SELFTEST
9184 /* Let selftest have access to static functions in this file */
9185 #include "trace_selftest.c"
9186 #endif
9187 
9188 static ssize_t
9189 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9190 			loff_t *ppos)
9191 {
9192 	struct trace_option_dentry *topt = filp->private_data;
9193 	char *buf;
9194 
9195 	if (topt->flags->val & topt->opt->bit)
9196 		buf = "1\n";
9197 	else
9198 		buf = "0\n";
9199 
9200 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9201 }
9202 
9203 static ssize_t
9204 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9205 			 loff_t *ppos)
9206 {
9207 	struct trace_option_dentry *topt = filp->private_data;
9208 	unsigned long val;
9209 	int ret;
9210 
9211 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9212 	if (ret)
9213 		return ret;
9214 
9215 	if (val != 0 && val != 1)
9216 		return -EINVAL;
9217 
9218 	if (!!(topt->flags->val & topt->opt->bit) != val) {
9219 		guard(mutex)(&trace_types_lock);
9220 		ret = __set_tracer_option(topt->tr, topt->flags,
9221 					  topt->opt, !val);
9222 		if (ret)
9223 			return ret;
9224 	}
9225 
9226 	*ppos += cnt;
9227 
9228 	return cnt;
9229 }
9230 
9231 static int tracing_open_options(struct inode *inode, struct file *filp)
9232 {
9233 	struct trace_option_dentry *topt = inode->i_private;
9234 	int ret;
9235 
9236 	ret = tracing_check_open_get_tr(topt->tr);
9237 	if (ret)
9238 		return ret;
9239 
9240 	filp->private_data = inode->i_private;
9241 	return 0;
9242 }
9243 
9244 static int tracing_release_options(struct inode *inode, struct file *file)
9245 {
9246 	struct trace_option_dentry *topt = file->private_data;
9247 
9248 	trace_array_put(topt->tr);
9249 	return 0;
9250 }
9251 
9252 static const struct file_operations trace_options_fops = {
9253 	.open = tracing_open_options,
9254 	.read = trace_options_read,
9255 	.write = trace_options_write,
9256 	.llseek	= generic_file_llseek,
9257 	.release = tracing_release_options,
9258 };
9259 
9260 /*
9261  * In order to pass in both the trace_array descriptor as well as the index
9262  * to the flag that the trace option file represents, the trace_array
9263  * has a character array of trace_flags_index[], which holds the index
9264  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9265  * The address of this character array is passed to the flag option file
9266  * read/write callbacks.
9267  *
9268  * In order to extract both the index and the trace_array descriptor,
9269  * get_tr_index() uses the following algorithm.
9270  *
9271  *   idx = *ptr;
9272  *
9273  * As the pointer itself contains the address of the index (remember
9274  * index[1] == 1).
9275  *
9276  * Then to get the trace_array descriptor, by subtracting that index
9277  * from the ptr, we get to the start of the index itself.
9278  *
9279  *   ptr - idx == &index[0]
9280  *
9281  * Then a simple container_of() from that pointer gets us to the
9282  * trace_array descriptor.
9283  */
9284 static void get_tr_index(void *data, struct trace_array **ptr,
9285 			 unsigned int *pindex)
9286 {
9287 	*pindex = *(unsigned char *)data;
9288 
9289 	*ptr = container_of(data - *pindex, struct trace_array,
9290 			    trace_flags_index);
9291 }
9292 
9293 static ssize_t
9294 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9295 			loff_t *ppos)
9296 {
9297 	void *tr_index = filp->private_data;
9298 	struct trace_array *tr;
9299 	unsigned int index;
9300 	char *buf;
9301 
9302 	get_tr_index(tr_index, &tr, &index);
9303 
9304 	if (tr->trace_flags & (1 << index))
9305 		buf = "1\n";
9306 	else
9307 		buf = "0\n";
9308 
9309 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9310 }
9311 
9312 static ssize_t
9313 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9314 			 loff_t *ppos)
9315 {
9316 	void *tr_index = filp->private_data;
9317 	struct trace_array *tr;
9318 	unsigned int index;
9319 	unsigned long val;
9320 	int ret;
9321 
9322 	get_tr_index(tr_index, &tr, &index);
9323 
9324 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9325 	if (ret)
9326 		return ret;
9327 
9328 	if (val != 0 && val != 1)
9329 		return -EINVAL;
9330 
9331 	mutex_lock(&event_mutex);
9332 	mutex_lock(&trace_types_lock);
9333 	ret = set_tracer_flag(tr, 1 << index, val);
9334 	mutex_unlock(&trace_types_lock);
9335 	mutex_unlock(&event_mutex);
9336 
9337 	if (ret < 0)
9338 		return ret;
9339 
9340 	*ppos += cnt;
9341 
9342 	return cnt;
9343 }
9344 
9345 static const struct file_operations trace_options_core_fops = {
9346 	.open = tracing_open_generic,
9347 	.read = trace_options_core_read,
9348 	.write = trace_options_core_write,
9349 	.llseek = generic_file_llseek,
9350 };
9351 
9352 struct dentry *trace_create_file(const char *name,
9353 				 umode_t mode,
9354 				 struct dentry *parent,
9355 				 void *data,
9356 				 const struct file_operations *fops)
9357 {
9358 	struct dentry *ret;
9359 
9360 	ret = tracefs_create_file(name, mode, parent, data, fops);
9361 	if (!ret)
9362 		pr_warn("Could not create tracefs '%s' entry\n", name);
9363 
9364 	return ret;
9365 }
9366 
9367 
9368 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9369 {
9370 	struct dentry *d_tracer;
9371 
9372 	if (tr->options)
9373 		return tr->options;
9374 
9375 	d_tracer = tracing_get_dentry(tr);
9376 	if (IS_ERR(d_tracer))
9377 		return NULL;
9378 
9379 	tr->options = tracefs_create_dir("options", d_tracer);
9380 	if (!tr->options) {
9381 		pr_warn("Could not create tracefs directory 'options'\n");
9382 		return NULL;
9383 	}
9384 
9385 	return tr->options;
9386 }
9387 
9388 static void
9389 create_trace_option_file(struct trace_array *tr,
9390 			 struct trace_option_dentry *topt,
9391 			 struct tracer_flags *flags,
9392 			 struct tracer_opt *opt)
9393 {
9394 	struct dentry *t_options;
9395 
9396 	t_options = trace_options_init_dentry(tr);
9397 	if (!t_options)
9398 		return;
9399 
9400 	topt->flags = flags;
9401 	topt->opt = opt;
9402 	topt->tr = tr;
9403 
9404 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9405 					t_options, topt, &trace_options_fops);
9406 
9407 }
9408 
9409 static void
9410 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9411 {
9412 	struct trace_option_dentry *topts;
9413 	struct trace_options *tr_topts;
9414 	struct tracer_flags *flags;
9415 	struct tracer_opt *opts;
9416 	int cnt;
9417 	int i;
9418 
9419 	if (!tracer)
9420 		return;
9421 
9422 	flags = tracer->flags;
9423 
9424 	if (!flags || !flags->opts)
9425 		return;
9426 
9427 	/*
9428 	 * If this is an instance, only create flags for tracers
9429 	 * the instance may have.
9430 	 */
9431 	if (!trace_ok_for_array(tracer, tr))
9432 		return;
9433 
9434 	for (i = 0; i < tr->nr_topts; i++) {
9435 		/* Make sure there's no duplicate flags. */
9436 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9437 			return;
9438 	}
9439 
9440 	opts = flags->opts;
9441 
9442 	for (cnt = 0; opts[cnt].name; cnt++)
9443 		;
9444 
9445 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9446 	if (!topts)
9447 		return;
9448 
9449 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9450 			    GFP_KERNEL);
9451 	if (!tr_topts) {
9452 		kfree(topts);
9453 		return;
9454 	}
9455 
9456 	tr->topts = tr_topts;
9457 	tr->topts[tr->nr_topts].tracer = tracer;
9458 	tr->topts[tr->nr_topts].topts = topts;
9459 	tr->nr_topts++;
9460 
9461 	for (cnt = 0; opts[cnt].name; cnt++) {
9462 		create_trace_option_file(tr, &topts[cnt], flags,
9463 					 &opts[cnt]);
9464 		MEM_FAIL(topts[cnt].entry == NULL,
9465 			  "Failed to create trace option: %s",
9466 			  opts[cnt].name);
9467 	}
9468 }
9469 
9470 static struct dentry *
9471 create_trace_option_core_file(struct trace_array *tr,
9472 			      const char *option, long index)
9473 {
9474 	struct dentry *t_options;
9475 
9476 	t_options = trace_options_init_dentry(tr);
9477 	if (!t_options)
9478 		return NULL;
9479 
9480 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9481 				 (void *)&tr->trace_flags_index[index],
9482 				 &trace_options_core_fops);
9483 }
9484 
9485 static void create_trace_options_dir(struct trace_array *tr)
9486 {
9487 	struct dentry *t_options;
9488 	bool top_level = tr == &global_trace;
9489 	int i;
9490 
9491 	t_options = trace_options_init_dentry(tr);
9492 	if (!t_options)
9493 		return;
9494 
9495 	for (i = 0; trace_options[i]; i++) {
9496 		if (top_level ||
9497 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9498 			create_trace_option_core_file(tr, trace_options[i], i);
9499 	}
9500 }
9501 
9502 static ssize_t
9503 rb_simple_read(struct file *filp, char __user *ubuf,
9504 	       size_t cnt, loff_t *ppos)
9505 {
9506 	struct trace_array *tr = filp->private_data;
9507 	char buf[64];
9508 	int r;
9509 
9510 	r = tracer_tracing_is_on(tr);
9511 	r = sprintf(buf, "%d\n", r);
9512 
9513 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9514 }
9515 
9516 static ssize_t
9517 rb_simple_write(struct file *filp, const char __user *ubuf,
9518 		size_t cnt, loff_t *ppos)
9519 {
9520 	struct trace_array *tr = filp->private_data;
9521 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9522 	unsigned long val;
9523 	int ret;
9524 
9525 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9526 	if (ret)
9527 		return ret;
9528 
9529 	if (buffer) {
9530 		guard(mutex)(&trace_types_lock);
9531 		if (!!val == tracer_tracing_is_on(tr)) {
9532 			val = 0; /* do nothing */
9533 		} else if (val) {
9534 			tracer_tracing_on(tr);
9535 			if (tr->current_trace->start)
9536 				tr->current_trace->start(tr);
9537 		} else {
9538 			tracer_tracing_off(tr);
9539 			if (tr->current_trace->stop)
9540 				tr->current_trace->stop(tr);
9541 			/* Wake up any waiters */
9542 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9543 		}
9544 	}
9545 
9546 	(*ppos)++;
9547 
9548 	return cnt;
9549 }
9550 
9551 static const struct file_operations rb_simple_fops = {
9552 	.open		= tracing_open_generic_tr,
9553 	.read		= rb_simple_read,
9554 	.write		= rb_simple_write,
9555 	.release	= tracing_release_generic_tr,
9556 	.llseek		= default_llseek,
9557 };
9558 
9559 static ssize_t
9560 buffer_percent_read(struct file *filp, char __user *ubuf,
9561 		    size_t cnt, loff_t *ppos)
9562 {
9563 	struct trace_array *tr = filp->private_data;
9564 	char buf[64];
9565 	int r;
9566 
9567 	r = tr->buffer_percent;
9568 	r = sprintf(buf, "%d\n", r);
9569 
9570 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9571 }
9572 
9573 static ssize_t
9574 buffer_percent_write(struct file *filp, const char __user *ubuf,
9575 		     size_t cnt, loff_t *ppos)
9576 {
9577 	struct trace_array *tr = filp->private_data;
9578 	unsigned long val;
9579 	int ret;
9580 
9581 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9582 	if (ret)
9583 		return ret;
9584 
9585 	if (val > 100)
9586 		return -EINVAL;
9587 
9588 	tr->buffer_percent = val;
9589 
9590 	(*ppos)++;
9591 
9592 	return cnt;
9593 }
9594 
9595 static const struct file_operations buffer_percent_fops = {
9596 	.open		= tracing_open_generic_tr,
9597 	.read		= buffer_percent_read,
9598 	.write		= buffer_percent_write,
9599 	.release	= tracing_release_generic_tr,
9600 	.llseek		= default_llseek,
9601 };
9602 
9603 static ssize_t
9604 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9605 {
9606 	struct trace_array *tr = filp->private_data;
9607 	size_t size;
9608 	char buf[64];
9609 	int order;
9610 	int r;
9611 
9612 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9613 	size = (PAGE_SIZE << order) / 1024;
9614 
9615 	r = sprintf(buf, "%zd\n", size);
9616 
9617 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9618 }
9619 
9620 static ssize_t
9621 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9622 			 size_t cnt, loff_t *ppos)
9623 {
9624 	struct trace_array *tr = filp->private_data;
9625 	unsigned long val;
9626 	int old_order;
9627 	int order;
9628 	int pages;
9629 	int ret;
9630 
9631 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9632 	if (ret)
9633 		return ret;
9634 
9635 	val *= 1024; /* value passed in is in KB */
9636 
9637 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9638 	order = fls(pages - 1);
9639 
9640 	/* limit between 1 and 128 system pages */
9641 	if (order < 0 || order > 7)
9642 		return -EINVAL;
9643 
9644 	/* Do not allow tracing while changing the order of the ring buffer */
9645 	tracing_stop_tr(tr);
9646 
9647 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9648 	if (old_order == order)
9649 		goto out;
9650 
9651 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9652 	if (ret)
9653 		goto out;
9654 
9655 #ifdef CONFIG_TRACER_MAX_TRACE
9656 
9657 	if (!tr->allocated_snapshot)
9658 		goto out_max;
9659 
9660 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9661 	if (ret) {
9662 		/* Put back the old order */
9663 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9664 		if (WARN_ON_ONCE(cnt)) {
9665 			/*
9666 			 * AARGH! We are left with different orders!
9667 			 * The max buffer is our "snapshot" buffer.
9668 			 * When a tracer needs a snapshot (one of the
9669 			 * latency tracers), it swaps the max buffer
9670 			 * with the saved snap shot. We succeeded to
9671 			 * update the order of the main buffer, but failed to
9672 			 * update the order of the max buffer. But when we tried
9673 			 * to reset the main buffer to the original size, we
9674 			 * failed there too. This is very unlikely to
9675 			 * happen, but if it does, warn and kill all
9676 			 * tracing.
9677 			 */
9678 			tracing_disabled = 1;
9679 		}
9680 		goto out;
9681 	}
9682  out_max:
9683 #endif
9684 	(*ppos)++;
9685  out:
9686 	if (ret)
9687 		cnt = ret;
9688 	tracing_start_tr(tr);
9689 	return cnt;
9690 }
9691 
9692 static const struct file_operations buffer_subbuf_size_fops = {
9693 	.open		= tracing_open_generic_tr,
9694 	.read		= buffer_subbuf_size_read,
9695 	.write		= buffer_subbuf_size_write,
9696 	.release	= tracing_release_generic_tr,
9697 	.llseek		= default_llseek,
9698 };
9699 
9700 static struct dentry *trace_instance_dir;
9701 
9702 static void
9703 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9704 
9705 #ifdef CONFIG_MODULES
9706 static int make_mod_delta(struct module *mod, void *data)
9707 {
9708 	struct trace_module_delta *module_delta;
9709 	struct trace_scratch *tscratch;
9710 	struct trace_mod_entry *entry;
9711 	struct trace_array *tr = data;
9712 	int i;
9713 
9714 	tscratch = tr->scratch;
9715 	module_delta = READ_ONCE(tr->module_delta);
9716 	for (i = 0; i < tscratch->nr_entries; i++) {
9717 		entry = &tscratch->entries[i];
9718 		if (strcmp(mod->name, entry->mod_name))
9719 			continue;
9720 		if (mod->state == MODULE_STATE_GOING)
9721 			module_delta->delta[i] = 0;
9722 		else
9723 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9724 						 - entry->mod_addr;
9725 		break;
9726 	}
9727 	return 0;
9728 }
9729 #else
9730 static int make_mod_delta(struct module *mod, void *data)
9731 {
9732 	return 0;
9733 }
9734 #endif
9735 
9736 static int mod_addr_comp(const void *a, const void *b, const void *data)
9737 {
9738 	const struct trace_mod_entry *e1 = a;
9739 	const struct trace_mod_entry *e2 = b;
9740 
9741 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9742 }
9743 
9744 static void setup_trace_scratch(struct trace_array *tr,
9745 				struct trace_scratch *tscratch, unsigned int size)
9746 {
9747 	struct trace_module_delta *module_delta;
9748 	struct trace_mod_entry *entry;
9749 	int i, nr_entries;
9750 
9751 	if (!tscratch)
9752 		return;
9753 
9754 	tr->scratch = tscratch;
9755 	tr->scratch_size = size;
9756 
9757 	if (tscratch->text_addr)
9758 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9759 
9760 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9761 		goto reset;
9762 
9763 	/* Check if each module name is a valid string */
9764 	for (i = 0; i < tscratch->nr_entries; i++) {
9765 		int n;
9766 
9767 		entry = &tscratch->entries[i];
9768 
9769 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9770 			if (entry->mod_name[n] == '\0')
9771 				break;
9772 			if (!isprint(entry->mod_name[n]))
9773 				goto reset;
9774 		}
9775 		if (n == MODULE_NAME_LEN)
9776 			goto reset;
9777 	}
9778 
9779 	/* Sort the entries so that we can find appropriate module from address. */
9780 	nr_entries = tscratch->nr_entries;
9781 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9782 	       mod_addr_comp, NULL, NULL);
9783 
9784 	if (IS_ENABLED(CONFIG_MODULES)) {
9785 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9786 		if (!module_delta) {
9787 			pr_info("module_delta allocation failed. Not able to decode module address.");
9788 			goto reset;
9789 		}
9790 		init_rcu_head(&module_delta->rcu);
9791 	} else
9792 		module_delta = NULL;
9793 	WRITE_ONCE(tr->module_delta, module_delta);
9794 
9795 	/* Scan modules to make text delta for modules. */
9796 	module_for_each_mod(make_mod_delta, tr);
9797 
9798 	/* Set trace_clock as the same of the previous boot. */
9799 	if (tscratch->clock_id != tr->clock_id) {
9800 		if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9801 		    tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9802 			pr_info("the previous trace_clock info is not valid.");
9803 			goto reset;
9804 		}
9805 	}
9806 	return;
9807  reset:
9808 	/* Invalid trace modules */
9809 	memset(tscratch, 0, size);
9810 }
9811 
9812 static int
9813 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9814 {
9815 	enum ring_buffer_flags rb_flags;
9816 	struct trace_scratch *tscratch;
9817 	unsigned int scratch_size = 0;
9818 
9819 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9820 
9821 	buf->tr = tr;
9822 
9823 	if (tr->range_addr_start && tr->range_addr_size) {
9824 		/* Add scratch buffer to handle 128 modules */
9825 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9826 						      tr->range_addr_start,
9827 						      tr->range_addr_size,
9828 						      struct_size(tscratch, entries, 128));
9829 
9830 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9831 		setup_trace_scratch(tr, tscratch, scratch_size);
9832 
9833 		/*
9834 		 * This is basically the same as a mapped buffer,
9835 		 * with the same restrictions.
9836 		 */
9837 		tr->mapped++;
9838 	} else {
9839 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9840 	}
9841 	if (!buf->buffer)
9842 		return -ENOMEM;
9843 
9844 	buf->data = alloc_percpu(struct trace_array_cpu);
9845 	if (!buf->data) {
9846 		ring_buffer_free(buf->buffer);
9847 		buf->buffer = NULL;
9848 		return -ENOMEM;
9849 	}
9850 
9851 	/* Allocate the first page for all buffers */
9852 	set_buffer_entries(&tr->array_buffer,
9853 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9854 
9855 	return 0;
9856 }
9857 
9858 static void free_trace_buffer(struct array_buffer *buf)
9859 {
9860 	if (buf->buffer) {
9861 		ring_buffer_free(buf->buffer);
9862 		buf->buffer = NULL;
9863 		free_percpu(buf->data);
9864 		buf->data = NULL;
9865 	}
9866 }
9867 
9868 static int allocate_trace_buffers(struct trace_array *tr, int size)
9869 {
9870 	int ret;
9871 
9872 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9873 	if (ret)
9874 		return ret;
9875 
9876 #ifdef CONFIG_TRACER_MAX_TRACE
9877 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9878 	if (tr->range_addr_start)
9879 		return 0;
9880 
9881 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9882 				    allocate_snapshot ? size : 1);
9883 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9884 		free_trace_buffer(&tr->array_buffer);
9885 		return -ENOMEM;
9886 	}
9887 	tr->allocated_snapshot = allocate_snapshot;
9888 
9889 	allocate_snapshot = false;
9890 #endif
9891 
9892 	return 0;
9893 }
9894 
9895 static void free_trace_buffers(struct trace_array *tr)
9896 {
9897 	if (!tr)
9898 		return;
9899 
9900 	free_trace_buffer(&tr->array_buffer);
9901 	kfree(tr->module_delta);
9902 
9903 #ifdef CONFIG_TRACER_MAX_TRACE
9904 	free_trace_buffer(&tr->max_buffer);
9905 #endif
9906 }
9907 
9908 static void init_trace_flags_index(struct trace_array *tr)
9909 {
9910 	int i;
9911 
9912 	/* Used by the trace options files */
9913 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9914 		tr->trace_flags_index[i] = i;
9915 }
9916 
9917 static void __update_tracer_options(struct trace_array *tr)
9918 {
9919 	struct tracer *t;
9920 
9921 	for (t = trace_types; t; t = t->next)
9922 		add_tracer_options(tr, t);
9923 }
9924 
9925 static void update_tracer_options(struct trace_array *tr)
9926 {
9927 	guard(mutex)(&trace_types_lock);
9928 	tracer_options_updated = true;
9929 	__update_tracer_options(tr);
9930 }
9931 
9932 /* Must have trace_types_lock held */
9933 struct trace_array *trace_array_find(const char *instance)
9934 {
9935 	struct trace_array *tr, *found = NULL;
9936 
9937 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9938 		if (tr->name && strcmp(tr->name, instance) == 0) {
9939 			found = tr;
9940 			break;
9941 		}
9942 	}
9943 
9944 	return found;
9945 }
9946 
9947 struct trace_array *trace_array_find_get(const char *instance)
9948 {
9949 	struct trace_array *tr;
9950 
9951 	guard(mutex)(&trace_types_lock);
9952 	tr = trace_array_find(instance);
9953 	if (tr)
9954 		tr->ref++;
9955 
9956 	return tr;
9957 }
9958 
9959 static int trace_array_create_dir(struct trace_array *tr)
9960 {
9961 	int ret;
9962 
9963 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9964 	if (!tr->dir)
9965 		return -EINVAL;
9966 
9967 	ret = event_trace_add_tracer(tr->dir, tr);
9968 	if (ret) {
9969 		tracefs_remove(tr->dir);
9970 		return ret;
9971 	}
9972 
9973 	init_tracer_tracefs(tr, tr->dir);
9974 	__update_tracer_options(tr);
9975 
9976 	return ret;
9977 }
9978 
9979 static struct trace_array *
9980 trace_array_create_systems(const char *name, const char *systems,
9981 			   unsigned long range_addr_start,
9982 			   unsigned long range_addr_size)
9983 {
9984 	struct trace_array *tr;
9985 	int ret;
9986 
9987 	ret = -ENOMEM;
9988 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9989 	if (!tr)
9990 		return ERR_PTR(ret);
9991 
9992 	tr->name = kstrdup(name, GFP_KERNEL);
9993 	if (!tr->name)
9994 		goto out_free_tr;
9995 
9996 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9997 		goto out_free_tr;
9998 
9999 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
10000 		goto out_free_tr;
10001 
10002 	if (systems) {
10003 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
10004 		if (!tr->system_names)
10005 			goto out_free_tr;
10006 	}
10007 
10008 	/* Only for boot up memory mapped ring buffers */
10009 	tr->range_addr_start = range_addr_start;
10010 	tr->range_addr_size = range_addr_size;
10011 
10012 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
10013 
10014 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
10015 
10016 	raw_spin_lock_init(&tr->start_lock);
10017 
10018 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10019 #ifdef CONFIG_TRACER_MAX_TRACE
10020 	spin_lock_init(&tr->snapshot_trigger_lock);
10021 #endif
10022 	tr->current_trace = &nop_trace;
10023 
10024 	INIT_LIST_HEAD(&tr->systems);
10025 	INIT_LIST_HEAD(&tr->events);
10026 	INIT_LIST_HEAD(&tr->hist_vars);
10027 	INIT_LIST_HEAD(&tr->err_log);
10028 	INIT_LIST_HEAD(&tr->marker_list);
10029 
10030 #ifdef CONFIG_MODULES
10031 	INIT_LIST_HEAD(&tr->mod_events);
10032 #endif
10033 
10034 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
10035 		goto out_free_tr;
10036 
10037 	/* The ring buffer is defaultly expanded */
10038 	trace_set_ring_buffer_expanded(tr);
10039 
10040 	if (ftrace_allocate_ftrace_ops(tr) < 0)
10041 		goto out_free_tr;
10042 
10043 	ftrace_init_trace_array(tr);
10044 
10045 	init_trace_flags_index(tr);
10046 
10047 	if (trace_instance_dir) {
10048 		ret = trace_array_create_dir(tr);
10049 		if (ret)
10050 			goto out_free_tr;
10051 	} else
10052 		__trace_early_add_events(tr);
10053 
10054 	list_add(&tr->list, &ftrace_trace_arrays);
10055 
10056 	tr->ref++;
10057 
10058 	return tr;
10059 
10060  out_free_tr:
10061 	ftrace_free_ftrace_ops(tr);
10062 	free_trace_buffers(tr);
10063 	free_cpumask_var(tr->pipe_cpumask);
10064 	free_cpumask_var(tr->tracing_cpumask);
10065 	kfree_const(tr->system_names);
10066 	kfree(tr->range_name);
10067 	kfree(tr->name);
10068 	kfree(tr);
10069 
10070 	return ERR_PTR(ret);
10071 }
10072 
10073 static struct trace_array *trace_array_create(const char *name)
10074 {
10075 	return trace_array_create_systems(name, NULL, 0, 0);
10076 }
10077 
10078 static int instance_mkdir(const char *name)
10079 {
10080 	struct trace_array *tr;
10081 	int ret;
10082 
10083 	guard(mutex)(&event_mutex);
10084 	guard(mutex)(&trace_types_lock);
10085 
10086 	ret = -EEXIST;
10087 	if (trace_array_find(name))
10088 		return -EEXIST;
10089 
10090 	tr = trace_array_create(name);
10091 
10092 	ret = PTR_ERR_OR_ZERO(tr);
10093 
10094 	return ret;
10095 }
10096 
10097 #ifdef CONFIG_MMU
10098 static u64 map_pages(unsigned long start, unsigned long size)
10099 {
10100 	unsigned long vmap_start, vmap_end;
10101 	struct vm_struct *area;
10102 	int ret;
10103 
10104 	area = get_vm_area(size, VM_IOREMAP);
10105 	if (!area)
10106 		return 0;
10107 
10108 	vmap_start = (unsigned long) area->addr;
10109 	vmap_end = vmap_start + size;
10110 
10111 	ret = vmap_page_range(vmap_start, vmap_end,
10112 			      start, pgprot_nx(PAGE_KERNEL));
10113 	if (ret < 0) {
10114 		free_vm_area(area);
10115 		return 0;
10116 	}
10117 
10118 	return (u64)vmap_start;
10119 }
10120 #else
10121 static inline u64 map_pages(unsigned long start, unsigned long size)
10122 {
10123 	return 0;
10124 }
10125 #endif
10126 
10127 /**
10128  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
10129  * @name: The name of the trace array to be looked up/created.
10130  * @systems: A list of systems to create event directories for (NULL for all)
10131  *
10132  * Returns pointer to trace array with given name.
10133  * NULL, if it cannot be created.
10134  *
10135  * NOTE: This function increments the reference counter associated with the
10136  * trace array returned. This makes sure it cannot be freed while in use.
10137  * Use trace_array_put() once the trace array is no longer needed.
10138  * If the trace_array is to be freed, trace_array_destroy() needs to
10139  * be called after the trace_array_put(), or simply let user space delete
10140  * it from the tracefs instances directory. But until the
10141  * trace_array_put() is called, user space can not delete it.
10142  *
10143  */
10144 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
10145 {
10146 	struct trace_array *tr;
10147 
10148 	guard(mutex)(&event_mutex);
10149 	guard(mutex)(&trace_types_lock);
10150 
10151 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10152 		if (tr->name && strcmp(tr->name, name) == 0) {
10153 			tr->ref++;
10154 			return tr;
10155 		}
10156 	}
10157 
10158 	tr = trace_array_create_systems(name, systems, 0, 0);
10159 
10160 	if (IS_ERR(tr))
10161 		tr = NULL;
10162 	else
10163 		tr->ref++;
10164 
10165 	return tr;
10166 }
10167 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
10168 
10169 static int __remove_instance(struct trace_array *tr)
10170 {
10171 	int i;
10172 
10173 	/* Reference counter for a newly created trace array = 1. */
10174 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10175 		return -EBUSY;
10176 
10177 	list_del(&tr->list);
10178 
10179 	/* Disable all the flags that were enabled coming in */
10180 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10181 		if ((1 << i) & ZEROED_TRACE_FLAGS)
10182 			set_tracer_flag(tr, 1 << i, 0);
10183 	}
10184 
10185 	if (printk_trace == tr)
10186 		update_printk_trace(&global_trace);
10187 
10188 	if (update_marker_trace(tr, 0))
10189 		synchronize_rcu();
10190 
10191 	tracing_set_nop(tr);
10192 	clear_ftrace_function_probes(tr);
10193 	event_trace_del_tracer(tr);
10194 	ftrace_clear_pids(tr);
10195 	ftrace_destroy_function_files(tr);
10196 	tracefs_remove(tr->dir);
10197 	free_percpu(tr->last_func_repeats);
10198 	free_trace_buffers(tr);
10199 	clear_tracing_err_log(tr);
10200 
10201 	if (tr->range_name) {
10202 		reserve_mem_release_by_name(tr->range_name);
10203 		kfree(tr->range_name);
10204 	}
10205 
10206 	for (i = 0; i < tr->nr_topts; i++) {
10207 		kfree(tr->topts[i].topts);
10208 	}
10209 	kfree(tr->topts);
10210 
10211 	free_cpumask_var(tr->pipe_cpumask);
10212 	free_cpumask_var(tr->tracing_cpumask);
10213 	kfree_const(tr->system_names);
10214 	kfree(tr->name);
10215 	kfree(tr);
10216 
10217 	return 0;
10218 }
10219 
10220 int trace_array_destroy(struct trace_array *this_tr)
10221 {
10222 	struct trace_array *tr;
10223 
10224 	if (!this_tr)
10225 		return -EINVAL;
10226 
10227 	guard(mutex)(&event_mutex);
10228 	guard(mutex)(&trace_types_lock);
10229 
10230 
10231 	/* Making sure trace array exists before destroying it. */
10232 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10233 		if (tr == this_tr)
10234 			return __remove_instance(tr);
10235 	}
10236 
10237 	return -ENODEV;
10238 }
10239 EXPORT_SYMBOL_GPL(trace_array_destroy);
10240 
10241 static int instance_rmdir(const char *name)
10242 {
10243 	struct trace_array *tr;
10244 
10245 	guard(mutex)(&event_mutex);
10246 	guard(mutex)(&trace_types_lock);
10247 
10248 	tr = trace_array_find(name);
10249 	if (!tr)
10250 		return -ENODEV;
10251 
10252 	return __remove_instance(tr);
10253 }
10254 
10255 static __init void create_trace_instances(struct dentry *d_tracer)
10256 {
10257 	struct trace_array *tr;
10258 
10259 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10260 							 instance_mkdir,
10261 							 instance_rmdir);
10262 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10263 		return;
10264 
10265 	guard(mutex)(&event_mutex);
10266 	guard(mutex)(&trace_types_lock);
10267 
10268 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10269 		if (!tr->name)
10270 			continue;
10271 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10272 			     "Failed to create instance directory\n"))
10273 			return;
10274 	}
10275 }
10276 
10277 static void
10278 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10279 {
10280 	int cpu;
10281 
10282 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10283 			tr, &show_traces_fops);
10284 
10285 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10286 			tr, &set_tracer_fops);
10287 
10288 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10289 			  tr, &tracing_cpumask_fops);
10290 
10291 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10292 			  tr, &tracing_iter_fops);
10293 
10294 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10295 			  tr, &tracing_fops);
10296 
10297 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10298 			  tr, &tracing_pipe_fops);
10299 
10300 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10301 			  tr, &tracing_entries_fops);
10302 
10303 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10304 			  tr, &tracing_total_entries_fops);
10305 
10306 	trace_create_file("free_buffer", 0200, d_tracer,
10307 			  tr, &tracing_free_buffer_fops);
10308 
10309 	trace_create_file("trace_marker", 0220, d_tracer,
10310 			  tr, &tracing_mark_fops);
10311 
10312 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10313 
10314 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10315 			  tr, &tracing_mark_raw_fops);
10316 
10317 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10318 			  &trace_clock_fops);
10319 
10320 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10321 			  tr, &rb_simple_fops);
10322 
10323 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10324 			  &trace_time_stamp_mode_fops);
10325 
10326 	tr->buffer_percent = 50;
10327 
10328 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10329 			tr, &buffer_percent_fops);
10330 
10331 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10332 			  tr, &buffer_subbuf_size_fops);
10333 
10334 	create_trace_options_dir(tr);
10335 
10336 #ifdef CONFIG_TRACER_MAX_TRACE
10337 	trace_create_maxlat_file(tr, d_tracer);
10338 #endif
10339 
10340 	if (ftrace_create_function_files(tr, d_tracer))
10341 		MEM_FAIL(1, "Could not allocate function filter files");
10342 
10343 	if (tr->range_addr_start) {
10344 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10345 				  tr, &last_boot_fops);
10346 #ifdef CONFIG_TRACER_SNAPSHOT
10347 	} else {
10348 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10349 				  tr, &snapshot_fops);
10350 #endif
10351 	}
10352 
10353 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10354 			  tr, &tracing_err_log_fops);
10355 
10356 	for_each_tracing_cpu(cpu)
10357 		tracing_init_tracefs_percpu(tr, cpu);
10358 
10359 	ftrace_init_tracefs(tr, d_tracer);
10360 }
10361 
10362 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10363 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10364 {
10365 	struct vfsmount *mnt;
10366 	struct file_system_type *type;
10367 	struct fs_context *fc;
10368 	int ret;
10369 
10370 	/*
10371 	 * To maintain backward compatibility for tools that mount
10372 	 * debugfs to get to the tracing facility, tracefs is automatically
10373 	 * mounted to the debugfs/tracing directory.
10374 	 */
10375 	type = get_fs_type("tracefs");
10376 	if (!type)
10377 		return NULL;
10378 
10379 	fc = fs_context_for_submount(type, mntpt);
10380 	put_filesystem(type);
10381 	if (IS_ERR(fc))
10382 		return ERR_CAST(fc);
10383 
10384 	pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10385 
10386 	ret = vfs_parse_fs_string(fc, "source", "tracefs");
10387 	if (!ret)
10388 		mnt = fc_mount(fc);
10389 	else
10390 		mnt = ERR_PTR(ret);
10391 
10392 	put_fs_context(fc);
10393 	return mnt;
10394 }
10395 #endif
10396 
10397 /**
10398  * tracing_init_dentry - initialize top level trace array
10399  *
10400  * This is called when creating files or directories in the tracing
10401  * directory. It is called via fs_initcall() by any of the boot up code
10402  * and expects to return the dentry of the top level tracing directory.
10403  */
10404 int tracing_init_dentry(void)
10405 {
10406 	struct trace_array *tr = &global_trace;
10407 
10408 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10409 		pr_warn("Tracing disabled due to lockdown\n");
10410 		return -EPERM;
10411 	}
10412 
10413 	/* The top level trace array uses  NULL as parent */
10414 	if (tr->dir)
10415 		return 0;
10416 
10417 	if (WARN_ON(!tracefs_initialized()))
10418 		return -ENODEV;
10419 
10420 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10421 	/*
10422 	 * As there may still be users that expect the tracing
10423 	 * files to exist in debugfs/tracing, we must automount
10424 	 * the tracefs file system there, so older tools still
10425 	 * work with the newer kernel.
10426 	 */
10427 	tr->dir = debugfs_create_automount("tracing", NULL,
10428 					   trace_automount, NULL);
10429 #endif
10430 
10431 	return 0;
10432 }
10433 
10434 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10435 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10436 
10437 static struct workqueue_struct *eval_map_wq __initdata;
10438 static struct work_struct eval_map_work __initdata;
10439 static struct work_struct tracerfs_init_work __initdata;
10440 
10441 static void __init eval_map_work_func(struct work_struct *work)
10442 {
10443 	int len;
10444 
10445 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10446 	trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10447 }
10448 
10449 static int __init trace_eval_init(void)
10450 {
10451 	INIT_WORK(&eval_map_work, eval_map_work_func);
10452 
10453 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10454 	if (!eval_map_wq) {
10455 		pr_err("Unable to allocate eval_map_wq\n");
10456 		/* Do work here */
10457 		eval_map_work_func(&eval_map_work);
10458 		return -ENOMEM;
10459 	}
10460 
10461 	queue_work(eval_map_wq, &eval_map_work);
10462 	return 0;
10463 }
10464 
10465 subsys_initcall(trace_eval_init);
10466 
10467 static int __init trace_eval_sync(void)
10468 {
10469 	/* Make sure the eval map updates are finished */
10470 	if (eval_map_wq)
10471 		destroy_workqueue(eval_map_wq);
10472 	return 0;
10473 }
10474 
10475 late_initcall_sync(trace_eval_sync);
10476 
10477 
10478 #ifdef CONFIG_MODULES
10479 
10480 bool module_exists(const char *module)
10481 {
10482 	/* All modules have the symbol __this_module */
10483 	static const char this_mod[] = "__this_module";
10484 	char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10485 	unsigned long val;
10486 	int n;
10487 
10488 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10489 
10490 	if (n > sizeof(modname) - 1)
10491 		return false;
10492 
10493 	val = module_kallsyms_lookup_name(modname);
10494 	return val != 0;
10495 }
10496 
10497 static void trace_module_add_evals(struct module *mod)
10498 {
10499 	/*
10500 	 * Modules with bad taint do not have events created, do
10501 	 * not bother with enums either.
10502 	 */
10503 	if (trace_module_has_bad_taint(mod))
10504 		return;
10505 
10506 	/* Even if no trace_evals, this need to sanitize field types. */
10507 	trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10508 }
10509 
10510 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10511 static void trace_module_remove_evals(struct module *mod)
10512 {
10513 	union trace_eval_map_item *map;
10514 	union trace_eval_map_item **last = &trace_eval_maps;
10515 
10516 	if (!mod->num_trace_evals)
10517 		return;
10518 
10519 	guard(mutex)(&trace_eval_mutex);
10520 
10521 	map = trace_eval_maps;
10522 
10523 	while (map) {
10524 		if (map->head.mod == mod)
10525 			break;
10526 		map = trace_eval_jmp_to_tail(map);
10527 		last = &map->tail.next;
10528 		map = map->tail.next;
10529 	}
10530 	if (!map)
10531 		return;
10532 
10533 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10534 	kfree(map);
10535 }
10536 #else
10537 static inline void trace_module_remove_evals(struct module *mod) { }
10538 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10539 
10540 static void trace_module_record(struct module *mod, bool add)
10541 {
10542 	struct trace_array *tr;
10543 	unsigned long flags;
10544 
10545 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10546 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10547 		/* Update any persistent trace array that has already been started */
10548 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10549 			guard(mutex)(&scratch_mutex);
10550 			save_mod(mod, tr);
10551 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10552 			/* Update delta if the module loaded in previous boot */
10553 			make_mod_delta(mod, tr);
10554 		}
10555 	}
10556 }
10557 
10558 static int trace_module_notify(struct notifier_block *self,
10559 			       unsigned long val, void *data)
10560 {
10561 	struct module *mod = data;
10562 
10563 	switch (val) {
10564 	case MODULE_STATE_COMING:
10565 		trace_module_add_evals(mod);
10566 		trace_module_record(mod, true);
10567 		break;
10568 	case MODULE_STATE_GOING:
10569 		trace_module_remove_evals(mod);
10570 		trace_module_record(mod, false);
10571 		break;
10572 	}
10573 
10574 	return NOTIFY_OK;
10575 }
10576 
10577 static struct notifier_block trace_module_nb = {
10578 	.notifier_call = trace_module_notify,
10579 	.priority = 0,
10580 };
10581 #endif /* CONFIG_MODULES */
10582 
10583 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10584 {
10585 
10586 	event_trace_init();
10587 
10588 	init_tracer_tracefs(&global_trace, NULL);
10589 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10590 
10591 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10592 			&global_trace, &tracing_thresh_fops);
10593 
10594 	trace_create_file("README", TRACE_MODE_READ, NULL,
10595 			NULL, &tracing_readme_fops);
10596 
10597 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10598 			NULL, &tracing_saved_cmdlines_fops);
10599 
10600 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10601 			  NULL, &tracing_saved_cmdlines_size_fops);
10602 
10603 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10604 			NULL, &tracing_saved_tgids_fops);
10605 
10606 	trace_create_eval_file(NULL);
10607 
10608 #ifdef CONFIG_MODULES
10609 	register_module_notifier(&trace_module_nb);
10610 #endif
10611 
10612 #ifdef CONFIG_DYNAMIC_FTRACE
10613 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10614 			NULL, &tracing_dyn_info_fops);
10615 #endif
10616 
10617 	create_trace_instances(NULL);
10618 
10619 	update_tracer_options(&global_trace);
10620 }
10621 
10622 static __init int tracer_init_tracefs(void)
10623 {
10624 	int ret;
10625 
10626 	trace_access_lock_init();
10627 
10628 	ret = tracing_init_dentry();
10629 	if (ret)
10630 		return 0;
10631 
10632 	if (eval_map_wq) {
10633 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10634 		queue_work(eval_map_wq, &tracerfs_init_work);
10635 	} else {
10636 		tracer_init_tracefs_work_func(NULL);
10637 	}
10638 
10639 	rv_init_interface();
10640 
10641 	return 0;
10642 }
10643 
10644 fs_initcall(tracer_init_tracefs);
10645 
10646 static int trace_die_panic_handler(struct notifier_block *self,
10647 				unsigned long ev, void *unused);
10648 
10649 static struct notifier_block trace_panic_notifier = {
10650 	.notifier_call = trace_die_panic_handler,
10651 	.priority = INT_MAX - 1,
10652 };
10653 
10654 static struct notifier_block trace_die_notifier = {
10655 	.notifier_call = trace_die_panic_handler,
10656 	.priority = INT_MAX - 1,
10657 };
10658 
10659 /*
10660  * The idea is to execute the following die/panic callback early, in order
10661  * to avoid showing irrelevant information in the trace (like other panic
10662  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10663  * warnings get disabled (to prevent potential log flooding).
10664  */
10665 static int trace_die_panic_handler(struct notifier_block *self,
10666 				unsigned long ev, void *unused)
10667 {
10668 	if (!ftrace_dump_on_oops_enabled())
10669 		return NOTIFY_DONE;
10670 
10671 	/* The die notifier requires DIE_OOPS to trigger */
10672 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10673 		return NOTIFY_DONE;
10674 
10675 	ftrace_dump(DUMP_PARAM);
10676 
10677 	return NOTIFY_DONE;
10678 }
10679 
10680 /*
10681  * printk is set to max of 1024, we really don't need it that big.
10682  * Nothing should be printing 1000 characters anyway.
10683  */
10684 #define TRACE_MAX_PRINT		1000
10685 
10686 /*
10687  * Define here KERN_TRACE so that we have one place to modify
10688  * it if we decide to change what log level the ftrace dump
10689  * should be at.
10690  */
10691 #define KERN_TRACE		KERN_EMERG
10692 
10693 void
10694 trace_printk_seq(struct trace_seq *s)
10695 {
10696 	/* Probably should print a warning here. */
10697 	if (s->seq.len >= TRACE_MAX_PRINT)
10698 		s->seq.len = TRACE_MAX_PRINT;
10699 
10700 	/*
10701 	 * More paranoid code. Although the buffer size is set to
10702 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10703 	 * an extra layer of protection.
10704 	 */
10705 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10706 		s->seq.len = s->seq.size - 1;
10707 
10708 	/* should be zero ended, but we are paranoid. */
10709 	s->buffer[s->seq.len] = 0;
10710 
10711 	printk(KERN_TRACE "%s", s->buffer);
10712 
10713 	trace_seq_init(s);
10714 }
10715 
10716 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10717 {
10718 	iter->tr = tr;
10719 	iter->trace = iter->tr->current_trace;
10720 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10721 	iter->array_buffer = &tr->array_buffer;
10722 
10723 	if (iter->trace && iter->trace->open)
10724 		iter->trace->open(iter);
10725 
10726 	/* Annotate start of buffers if we had overruns */
10727 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10728 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10729 
10730 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10731 	if (trace_clocks[iter->tr->clock_id].in_ns)
10732 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10733 
10734 	/* Can not use kmalloc for iter.temp and iter.fmt */
10735 	iter->temp = static_temp_buf;
10736 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10737 	iter->fmt = static_fmt_buf;
10738 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10739 }
10740 
10741 void trace_init_global_iter(struct trace_iterator *iter)
10742 {
10743 	trace_init_iter(iter, &global_trace);
10744 }
10745 
10746 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10747 {
10748 	/* use static because iter can be a bit big for the stack */
10749 	static struct trace_iterator iter;
10750 	unsigned int old_userobj;
10751 	unsigned long flags;
10752 	int cnt = 0;
10753 
10754 	/*
10755 	 * Always turn off tracing when we dump.
10756 	 * We don't need to show trace output of what happens
10757 	 * between multiple crashes.
10758 	 *
10759 	 * If the user does a sysrq-z, then they can re-enable
10760 	 * tracing with echo 1 > tracing_on.
10761 	 */
10762 	tracer_tracing_off(tr);
10763 
10764 	local_irq_save(flags);
10765 
10766 	/* Simulate the iterator */
10767 	trace_init_iter(&iter, tr);
10768 
10769 	/* While dumping, do not allow the buffer to be enable */
10770 	tracer_tracing_disable(tr);
10771 
10772 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10773 
10774 	/* don't look at user memory in panic mode */
10775 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10776 
10777 	if (dump_mode == DUMP_ORIG)
10778 		iter.cpu_file = raw_smp_processor_id();
10779 	else
10780 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10781 
10782 	if (tr == &global_trace)
10783 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10784 	else
10785 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10786 
10787 	/* Did function tracer already get disabled? */
10788 	if (ftrace_is_dead()) {
10789 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10790 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10791 	}
10792 
10793 	/*
10794 	 * We need to stop all tracing on all CPUS to read
10795 	 * the next buffer. This is a bit expensive, but is
10796 	 * not done often. We fill all what we can read,
10797 	 * and then release the locks again.
10798 	 */
10799 
10800 	while (!trace_empty(&iter)) {
10801 
10802 		if (!cnt)
10803 			printk(KERN_TRACE "---------------------------------\n");
10804 
10805 		cnt++;
10806 
10807 		trace_iterator_reset(&iter);
10808 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10809 
10810 		if (trace_find_next_entry_inc(&iter) != NULL) {
10811 			int ret;
10812 
10813 			ret = print_trace_line(&iter);
10814 			if (ret != TRACE_TYPE_NO_CONSUME)
10815 				trace_consume(&iter);
10816 
10817 			trace_printk_seq(&iter.seq);
10818 		}
10819 		touch_nmi_watchdog();
10820 	}
10821 
10822 	if (!cnt)
10823 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10824 	else
10825 		printk(KERN_TRACE "---------------------------------\n");
10826 
10827 	tr->trace_flags |= old_userobj;
10828 
10829 	tracer_tracing_enable(tr);
10830 	local_irq_restore(flags);
10831 }
10832 
10833 static void ftrace_dump_by_param(void)
10834 {
10835 	bool first_param = true;
10836 	char dump_param[MAX_TRACER_SIZE];
10837 	char *buf, *token, *inst_name;
10838 	struct trace_array *tr;
10839 
10840 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10841 	buf = dump_param;
10842 
10843 	while ((token = strsep(&buf, ",")) != NULL) {
10844 		if (first_param) {
10845 			first_param = false;
10846 			if (!strcmp("0", token))
10847 				continue;
10848 			else if (!strcmp("1", token)) {
10849 				ftrace_dump_one(&global_trace, DUMP_ALL);
10850 				continue;
10851 			}
10852 			else if (!strcmp("2", token) ||
10853 			  !strcmp("orig_cpu", token)) {
10854 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10855 				continue;
10856 			}
10857 		}
10858 
10859 		inst_name = strsep(&token, "=");
10860 		tr = trace_array_find(inst_name);
10861 		if (!tr) {
10862 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10863 			continue;
10864 		}
10865 
10866 		if (token && (!strcmp("2", token) ||
10867 			  !strcmp("orig_cpu", token)))
10868 			ftrace_dump_one(tr, DUMP_ORIG);
10869 		else
10870 			ftrace_dump_one(tr, DUMP_ALL);
10871 	}
10872 }
10873 
10874 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10875 {
10876 	static atomic_t dump_running;
10877 
10878 	/* Only allow one dump user at a time. */
10879 	if (atomic_inc_return(&dump_running) != 1) {
10880 		atomic_dec(&dump_running);
10881 		return;
10882 	}
10883 
10884 	switch (oops_dump_mode) {
10885 	case DUMP_ALL:
10886 		ftrace_dump_one(&global_trace, DUMP_ALL);
10887 		break;
10888 	case DUMP_ORIG:
10889 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10890 		break;
10891 	case DUMP_PARAM:
10892 		ftrace_dump_by_param();
10893 		break;
10894 	case DUMP_NONE:
10895 		break;
10896 	default:
10897 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10898 		ftrace_dump_one(&global_trace, DUMP_ALL);
10899 	}
10900 
10901 	atomic_dec(&dump_running);
10902 }
10903 EXPORT_SYMBOL_GPL(ftrace_dump);
10904 
10905 #define WRITE_BUFSIZE  4096
10906 
10907 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10908 				size_t count, loff_t *ppos,
10909 				int (*createfn)(const char *))
10910 {
10911 	char *kbuf __free(kfree) = NULL;
10912 	char *buf, *tmp;
10913 	int ret = 0;
10914 	size_t done = 0;
10915 	size_t size;
10916 
10917 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10918 	if (!kbuf)
10919 		return -ENOMEM;
10920 
10921 	while (done < count) {
10922 		size = count - done;
10923 
10924 		if (size >= WRITE_BUFSIZE)
10925 			size = WRITE_BUFSIZE - 1;
10926 
10927 		if (copy_from_user(kbuf, buffer + done, size))
10928 			return -EFAULT;
10929 
10930 		kbuf[size] = '\0';
10931 		buf = kbuf;
10932 		do {
10933 			tmp = strchr(buf, '\n');
10934 			if (tmp) {
10935 				*tmp = '\0';
10936 				size = tmp - buf + 1;
10937 			} else {
10938 				size = strlen(buf);
10939 				if (done + size < count) {
10940 					if (buf != kbuf)
10941 						break;
10942 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10943 					pr_warn("Line length is too long: Should be less than %d\n",
10944 						WRITE_BUFSIZE - 2);
10945 					return -EINVAL;
10946 				}
10947 			}
10948 			done += size;
10949 
10950 			/* Remove comments */
10951 			tmp = strchr(buf, '#');
10952 
10953 			if (tmp)
10954 				*tmp = '\0';
10955 
10956 			ret = createfn(buf);
10957 			if (ret)
10958 				return ret;
10959 			buf += size;
10960 
10961 		} while (done < count);
10962 	}
10963 	return done;
10964 }
10965 
10966 #ifdef CONFIG_TRACER_MAX_TRACE
10967 __init static bool tr_needs_alloc_snapshot(const char *name)
10968 {
10969 	char *test;
10970 	int len = strlen(name);
10971 	bool ret;
10972 
10973 	if (!boot_snapshot_index)
10974 		return false;
10975 
10976 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10977 	    boot_snapshot_info[len] == '\t')
10978 		return true;
10979 
10980 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10981 	if (!test)
10982 		return false;
10983 
10984 	sprintf(test, "\t%s\t", name);
10985 	ret = strstr(boot_snapshot_info, test) == NULL;
10986 	kfree(test);
10987 	return ret;
10988 }
10989 
10990 __init static void do_allocate_snapshot(const char *name)
10991 {
10992 	if (!tr_needs_alloc_snapshot(name))
10993 		return;
10994 
10995 	/*
10996 	 * When allocate_snapshot is set, the next call to
10997 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10998 	 * will allocate the snapshot buffer. That will alse clear
10999 	 * this flag.
11000 	 */
11001 	allocate_snapshot = true;
11002 }
11003 #else
11004 static inline void do_allocate_snapshot(const char *name) { }
11005 #endif
11006 
11007 __init static void enable_instances(void)
11008 {
11009 	struct trace_array *tr;
11010 	bool memmap_area = false;
11011 	char *curr_str;
11012 	char *name;
11013 	char *str;
11014 	char *tok;
11015 
11016 	/* A tab is always appended */
11017 	boot_instance_info[boot_instance_index - 1] = '\0';
11018 	str = boot_instance_info;
11019 
11020 	while ((curr_str = strsep(&str, "\t"))) {
11021 		phys_addr_t start = 0;
11022 		phys_addr_t size = 0;
11023 		unsigned long addr = 0;
11024 		bool traceprintk = false;
11025 		bool traceoff = false;
11026 		char *flag_delim;
11027 		char *addr_delim;
11028 		char *rname __free(kfree) = NULL;
11029 
11030 		tok = strsep(&curr_str, ",");
11031 
11032 		flag_delim = strchr(tok, '^');
11033 		addr_delim = strchr(tok, '@');
11034 
11035 		if (addr_delim)
11036 			*addr_delim++ = '\0';
11037 
11038 		if (flag_delim)
11039 			*flag_delim++ = '\0';
11040 
11041 		name = tok;
11042 
11043 		if (flag_delim) {
11044 			char *flag;
11045 
11046 			while ((flag = strsep(&flag_delim, "^"))) {
11047 				if (strcmp(flag, "traceoff") == 0) {
11048 					traceoff = true;
11049 				} else if ((strcmp(flag, "printk") == 0) ||
11050 					   (strcmp(flag, "traceprintk") == 0) ||
11051 					   (strcmp(flag, "trace_printk") == 0)) {
11052 					traceprintk = true;
11053 				} else {
11054 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
11055 						flag, name);
11056 				}
11057 			}
11058 		}
11059 
11060 		tok = addr_delim;
11061 		if (tok && isdigit(*tok)) {
11062 			start = memparse(tok, &tok);
11063 			if (!start) {
11064 				pr_warn("Tracing: Invalid boot instance address for %s\n",
11065 					name);
11066 				continue;
11067 			}
11068 			if (*tok != ':') {
11069 				pr_warn("Tracing: No size specified for instance %s\n", name);
11070 				continue;
11071 			}
11072 			tok++;
11073 			size = memparse(tok, &tok);
11074 			if (!size) {
11075 				pr_warn("Tracing: Invalid boot instance size for %s\n",
11076 					name);
11077 				continue;
11078 			}
11079 			memmap_area = true;
11080 		} else if (tok) {
11081 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
11082 				start = 0;
11083 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
11084 				continue;
11085 			}
11086 			rname = kstrdup(tok, GFP_KERNEL);
11087 		}
11088 
11089 		if (start) {
11090 			/* Start and size must be page aligned */
11091 			if (start & ~PAGE_MASK) {
11092 				pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
11093 				continue;
11094 			}
11095 			if (size & ~PAGE_MASK) {
11096 				pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
11097 				continue;
11098 			}
11099 
11100 			if (memmap_area)
11101 				addr = map_pages(start, size);
11102 			else
11103 				addr = (unsigned long)phys_to_virt(start);
11104 			if (addr) {
11105 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
11106 					name, &start, (unsigned long)size);
11107 			} else {
11108 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
11109 				continue;
11110 			}
11111 		} else {
11112 			/* Only non mapped buffers have snapshot buffers */
11113 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
11114 				do_allocate_snapshot(name);
11115 		}
11116 
11117 		tr = trace_array_create_systems(name, NULL, addr, size);
11118 		if (IS_ERR(tr)) {
11119 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
11120 			continue;
11121 		}
11122 
11123 		if (traceoff)
11124 			tracer_tracing_off(tr);
11125 
11126 		if (traceprintk)
11127 			update_printk_trace(tr);
11128 
11129 		/*
11130 		 * memmap'd buffers can not be freed.
11131 		 */
11132 		if (memmap_area) {
11133 			tr->flags |= TRACE_ARRAY_FL_MEMMAP;
11134 			tr->ref++;
11135 		}
11136 
11137 		if (start) {
11138 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
11139 			tr->range_name = no_free_ptr(rname);
11140 		}
11141 
11142 		while ((tok = strsep(&curr_str, ","))) {
11143 			early_enable_events(tr, tok, true);
11144 		}
11145 	}
11146 }
11147 
11148 __init static int tracer_alloc_buffers(void)
11149 {
11150 	int ring_buf_size;
11151 	int ret = -ENOMEM;
11152 
11153 
11154 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
11155 		pr_warn("Tracing disabled due to lockdown\n");
11156 		return -EPERM;
11157 	}
11158 
11159 	/*
11160 	 * Make sure we don't accidentally add more trace options
11161 	 * than we have bits for.
11162 	 */
11163 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
11164 
11165 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
11166 		return -ENOMEM;
11167 
11168 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
11169 		goto out_free_buffer_mask;
11170 
11171 	/* Only allocate trace_printk buffers if a trace_printk exists */
11172 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11173 		/* Must be called before global_trace.buffer is allocated */
11174 		trace_printk_init_buffers();
11175 
11176 	/* To save memory, keep the ring buffer size to its minimum */
11177 	if (global_trace.ring_buffer_expanded)
11178 		ring_buf_size = trace_buf_size;
11179 	else
11180 		ring_buf_size = 1;
11181 
11182 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11183 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11184 
11185 	raw_spin_lock_init(&global_trace.start_lock);
11186 
11187 	/*
11188 	 * The prepare callbacks allocates some memory for the ring buffer. We
11189 	 * don't free the buffer if the CPU goes down. If we were to free
11190 	 * the buffer, then the user would lose any trace that was in the
11191 	 * buffer. The memory will be removed once the "instance" is removed.
11192 	 */
11193 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11194 				      "trace/RB:prepare", trace_rb_cpu_prepare,
11195 				      NULL);
11196 	if (ret < 0)
11197 		goto out_free_cpumask;
11198 	/* Used for event triggers */
11199 	ret = -ENOMEM;
11200 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11201 	if (!temp_buffer)
11202 		goto out_rm_hp_state;
11203 
11204 	if (trace_create_savedcmd() < 0)
11205 		goto out_free_temp_buffer;
11206 
11207 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11208 		goto out_free_savedcmd;
11209 
11210 	/* TODO: make the number of buffers hot pluggable with CPUS */
11211 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11212 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11213 		goto out_free_pipe_cpumask;
11214 	}
11215 	if (global_trace.buffer_disabled)
11216 		tracing_off();
11217 
11218 	if (trace_boot_clock) {
11219 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
11220 		if (ret < 0)
11221 			pr_warn("Trace clock %s not defined, going back to default\n",
11222 				trace_boot_clock);
11223 	}
11224 
11225 	/*
11226 	 * register_tracer() might reference current_trace, so it
11227 	 * needs to be set before we register anything. This is
11228 	 * just a bootstrap of current_trace anyway.
11229 	 */
11230 	global_trace.current_trace = &nop_trace;
11231 
11232 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11233 #ifdef CONFIG_TRACER_MAX_TRACE
11234 	spin_lock_init(&global_trace.snapshot_trigger_lock);
11235 #endif
11236 	ftrace_init_global_array_ops(&global_trace);
11237 
11238 #ifdef CONFIG_MODULES
11239 	INIT_LIST_HEAD(&global_trace.mod_events);
11240 #endif
11241 
11242 	init_trace_flags_index(&global_trace);
11243 
11244 	register_tracer(&nop_trace);
11245 
11246 	/* Function tracing may start here (via kernel command line) */
11247 	init_function_trace();
11248 
11249 	/* All seems OK, enable tracing */
11250 	tracing_disabled = 0;
11251 
11252 	atomic_notifier_chain_register(&panic_notifier_list,
11253 				       &trace_panic_notifier);
11254 
11255 	register_die_notifier(&trace_die_notifier);
11256 
11257 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11258 
11259 	INIT_LIST_HEAD(&global_trace.systems);
11260 	INIT_LIST_HEAD(&global_trace.events);
11261 	INIT_LIST_HEAD(&global_trace.hist_vars);
11262 	INIT_LIST_HEAD(&global_trace.err_log);
11263 	list_add(&global_trace.marker_list, &marker_copies);
11264 	list_add(&global_trace.list, &ftrace_trace_arrays);
11265 
11266 	apply_trace_boot_options();
11267 
11268 	register_snapshot_cmd();
11269 
11270 	return 0;
11271 
11272 out_free_pipe_cpumask:
11273 	free_cpumask_var(global_trace.pipe_cpumask);
11274 out_free_savedcmd:
11275 	trace_free_saved_cmdlines_buffer();
11276 out_free_temp_buffer:
11277 	ring_buffer_free(temp_buffer);
11278 out_rm_hp_state:
11279 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11280 out_free_cpumask:
11281 	free_cpumask_var(global_trace.tracing_cpumask);
11282 out_free_buffer_mask:
11283 	free_cpumask_var(tracing_buffer_mask);
11284 	return ret;
11285 }
11286 
11287 #ifdef CONFIG_FUNCTION_TRACER
11288 /* Used to set module cached ftrace filtering at boot up */
11289 __init struct trace_array *trace_get_global_array(void)
11290 {
11291 	return &global_trace;
11292 }
11293 #endif
11294 
11295 void __init ftrace_boot_snapshot(void)
11296 {
11297 #ifdef CONFIG_TRACER_MAX_TRACE
11298 	struct trace_array *tr;
11299 
11300 	if (!snapshot_at_boot)
11301 		return;
11302 
11303 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11304 		if (!tr->allocated_snapshot)
11305 			continue;
11306 
11307 		tracing_snapshot_instance(tr);
11308 		trace_array_puts(tr, "** Boot snapshot taken **\n");
11309 	}
11310 #endif
11311 }
11312 
11313 void __init early_trace_init(void)
11314 {
11315 	if (tracepoint_printk) {
11316 		tracepoint_print_iter =
11317 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11318 		if (MEM_FAIL(!tracepoint_print_iter,
11319 			     "Failed to allocate trace iterator\n"))
11320 			tracepoint_printk = 0;
11321 		else
11322 			static_key_enable(&tracepoint_printk_key.key);
11323 	}
11324 	tracer_alloc_buffers();
11325 
11326 	init_events();
11327 }
11328 
11329 void __init trace_init(void)
11330 {
11331 	trace_event_init();
11332 
11333 	if (boot_instance_index)
11334 		enable_instances();
11335 }
11336 
11337 __init static void clear_boot_tracer(void)
11338 {
11339 	/*
11340 	 * The default tracer at boot buffer is an init section.
11341 	 * This function is called in lateinit. If we did not
11342 	 * find the boot tracer, then clear it out, to prevent
11343 	 * later registration from accessing the buffer that is
11344 	 * about to be freed.
11345 	 */
11346 	if (!default_bootup_tracer)
11347 		return;
11348 
11349 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11350 	       default_bootup_tracer);
11351 	default_bootup_tracer = NULL;
11352 }
11353 
11354 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
11355 __init static void tracing_set_default_clock(void)
11356 {
11357 	/* sched_clock_stable() is determined in late_initcall */
11358 	if (!trace_boot_clock && !sched_clock_stable()) {
11359 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11360 			pr_warn("Can not set tracing clock due to lockdown\n");
11361 			return;
11362 		}
11363 
11364 		printk(KERN_WARNING
11365 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11366 		       "If you want to keep using the local clock, then add:\n"
11367 		       "  \"trace_clock=local\"\n"
11368 		       "on the kernel command line\n");
11369 		tracing_set_clock(&global_trace, "global");
11370 	}
11371 }
11372 #else
11373 static inline void tracing_set_default_clock(void) { }
11374 #endif
11375 
11376 __init static int late_trace_init(void)
11377 {
11378 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11379 		static_key_disable(&tracepoint_printk_key.key);
11380 		tracepoint_printk = 0;
11381 	}
11382 
11383 	if (traceoff_after_boot)
11384 		tracing_off();
11385 
11386 	tracing_set_default_clock();
11387 	clear_boot_tracer();
11388 	return 0;
11389 }
11390 
11391 late_initcall_sync(late_trace_init);
11392